1 | // SPDX-License-Identifier: GPL-2.0-or-later |
---|---|
2 | /* A network driver using virtio. |
3 | * |
4 | * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
5 | */ |
6 | //#define DEBUG |
7 | #include <linux/netdevice.h> |
8 | #include <linux/etherdevice.h> |
9 | #include <linux/ethtool.h> |
10 | #include <linux/module.h> |
11 | #include <linux/virtio.h> |
12 | #include <linux/virtio_net.h> |
13 | #include <linux/bpf.h> |
14 | #include <linux/bpf_trace.h> |
15 | #include <linux/scatterlist.h> |
16 | #include <linux/if_vlan.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/cpu.h> |
19 | #include <linux/average.h> |
20 | #include <linux/filter.h> |
21 | #include <linux/kernel.h> |
22 | #include <linux/dim.h> |
23 | #include <net/route.h> |
24 | #include <net/xdp.h> |
25 | #include <net/net_failover.h> |
26 | #include <net/netdev_rx_queue.h> |
27 | |
28 | static int napi_weight = NAPI_POLL_WEIGHT; |
29 | module_param(napi_weight, int, 0444); |
30 | |
31 | static bool csum = true, gso = true, napi_tx = true; |
32 | module_param(csum, bool, 0444); |
33 | module_param(gso, bool, 0444); |
34 | module_param(napi_tx, bool, 0644); |
35 | |
36 | /* FIXME: MTU in config. */ |
37 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) |
38 | #define GOOD_COPY_LEN 128 |
39 | |
40 | #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) |
41 | |
42 | /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ |
43 | #define VIRTIO_XDP_HEADROOM 256 |
44 | |
45 | /* Separating two types of XDP xmit */ |
46 | #define VIRTIO_XDP_TX BIT(0) |
47 | #define VIRTIO_XDP_REDIR BIT(1) |
48 | |
49 | #define VIRTIO_XDP_FLAG BIT(0) |
50 | |
51 | /* RX packet size EWMA. The average packet size is used to determine the packet |
52 | * buffer size when refilling RX rings. As the entire RX ring may be refilled |
53 | * at once, the weight is chosen so that the EWMA will be insensitive to short- |
54 | * term, transient changes in packet size. |
55 | */ |
56 | DECLARE_EWMA(pkt_len, 0, 64) |
57 | |
58 | #define VIRTNET_DRIVER_VERSION "1.0.0" |
59 | |
60 | static const unsigned long guest_offloads[] = { |
61 | VIRTIO_NET_F_GUEST_TSO4, |
62 | VIRTIO_NET_F_GUEST_TSO6, |
63 | VIRTIO_NET_F_GUEST_ECN, |
64 | VIRTIO_NET_F_GUEST_UFO, |
65 | VIRTIO_NET_F_GUEST_CSUM, |
66 | VIRTIO_NET_F_GUEST_USO4, |
67 | VIRTIO_NET_F_GUEST_USO6, |
68 | VIRTIO_NET_F_GUEST_HDRLEN |
69 | }; |
70 | |
71 | #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ |
72 | (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ |
73 | (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ |
74 | (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ |
75 | (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ |
76 | (1ULL << VIRTIO_NET_F_GUEST_USO6)) |
77 | |
78 | struct virtnet_stat_desc { |
79 | char desc[ETH_GSTRING_LEN]; |
80 | size_t offset; |
81 | }; |
82 | |
83 | struct virtnet_sq_free_stats { |
84 | u64 packets; |
85 | u64 bytes; |
86 | }; |
87 | |
88 | struct virtnet_sq_stats { |
89 | struct u64_stats_sync syncp; |
90 | u64_stats_t packets; |
91 | u64_stats_t bytes; |
92 | u64_stats_t xdp_tx; |
93 | u64_stats_t xdp_tx_drops; |
94 | u64_stats_t kicks; |
95 | u64_stats_t tx_timeouts; |
96 | }; |
97 | |
98 | struct virtnet_rq_stats { |
99 | struct u64_stats_sync syncp; |
100 | u64_stats_t packets; |
101 | u64_stats_t bytes; |
102 | u64_stats_t drops; |
103 | u64_stats_t xdp_packets; |
104 | u64_stats_t xdp_tx; |
105 | u64_stats_t xdp_redirects; |
106 | u64_stats_t xdp_drops; |
107 | u64_stats_t kicks; |
108 | }; |
109 | |
110 | #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) |
111 | #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) |
112 | |
113 | static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { |
114 | { "packets", VIRTNET_SQ_STAT(packets) }, |
115 | { "bytes", VIRTNET_SQ_STAT(bytes) }, |
116 | { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, |
117 | { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, |
118 | { "kicks", VIRTNET_SQ_STAT(kicks) }, |
119 | { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, |
120 | }; |
121 | |
122 | static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { |
123 | { "packets", VIRTNET_RQ_STAT(packets) }, |
124 | { "bytes", VIRTNET_RQ_STAT(bytes) }, |
125 | { "drops", VIRTNET_RQ_STAT(drops) }, |
126 | { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, |
127 | { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, |
128 | { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, |
129 | { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, |
130 | { "kicks", VIRTNET_RQ_STAT(kicks) }, |
131 | }; |
132 | |
133 | #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) |
134 | #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) |
135 | |
136 | struct virtnet_interrupt_coalesce { |
137 | u32 max_packets; |
138 | u32 max_usecs; |
139 | }; |
140 | |
141 | /* The dma information of pages allocated at a time. */ |
142 | struct virtnet_rq_dma { |
143 | dma_addr_t addr; |
144 | u32 ref; |
145 | u16 len; |
146 | u16 need_sync; |
147 | }; |
148 | |
149 | /* Internal representation of a send virtqueue */ |
150 | struct send_queue { |
151 | /* Virtqueue associated with this send _queue */ |
152 | struct virtqueue *vq; |
153 | |
154 | /* TX: fragments + linear part + virtio header */ |
155 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
156 | |
157 | /* Name of the send queue: output.$index */ |
158 | char name[16]; |
159 | |
160 | struct virtnet_sq_stats stats; |
161 | |
162 | struct virtnet_interrupt_coalesce intr_coal; |
163 | |
164 | struct napi_struct napi; |
165 | |
166 | /* Record whether sq is in reset state. */ |
167 | bool reset; |
168 | }; |
169 | |
170 | /* Internal representation of a receive virtqueue */ |
171 | struct receive_queue { |
172 | /* Virtqueue associated with this receive_queue */ |
173 | struct virtqueue *vq; |
174 | |
175 | struct napi_struct napi; |
176 | |
177 | struct bpf_prog __rcu *xdp_prog; |
178 | |
179 | struct virtnet_rq_stats stats; |
180 | |
181 | /* The number of rx notifications */ |
182 | u16 calls; |
183 | |
184 | /* Is dynamic interrupt moderation enabled? */ |
185 | bool dim_enabled; |
186 | |
187 | /* Dynamic Interrupt Moderation */ |
188 | struct dim dim; |
189 | |
190 | u32 packets_in_napi; |
191 | |
192 | struct virtnet_interrupt_coalesce intr_coal; |
193 | |
194 | /* Chain pages by the private ptr. */ |
195 | struct page *pages; |
196 | |
197 | /* Average packet length for mergeable receive buffers. */ |
198 | struct ewma_pkt_len mrg_avg_pkt_len; |
199 | |
200 | /* Page frag for packet buffer allocation. */ |
201 | struct page_frag alloc_frag; |
202 | |
203 | /* RX: fragments + linear part + virtio header */ |
204 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
205 | |
206 | /* Min single buffer size for mergeable buffers case. */ |
207 | unsigned int min_buf_len; |
208 | |
209 | /* Name of this receive queue: input.$index */ |
210 | char name[16]; |
211 | |
212 | struct xdp_rxq_info xdp_rxq; |
213 | |
214 | /* Record the last dma info to free after new pages is allocated. */ |
215 | struct virtnet_rq_dma *last_dma; |
216 | |
217 | /* Do dma by self */ |
218 | bool do_dma; |
219 | }; |
220 | |
221 | /* This structure can contain rss message with maximum settings for indirection table and keysize |
222 | * Note, that default structure that describes RSS configuration virtio_net_rss_config |
223 | * contains same info but can't handle table values. |
224 | * In any case, structure would be passed to virtio hw through sg_buf split by parts |
225 | * because table sizes may be differ according to the device configuration. |
226 | */ |
227 | #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 |
228 | #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 |
229 | struct virtio_net_ctrl_rss { |
230 | u32 hash_types; |
231 | u16 indirection_table_mask; |
232 | u16 unclassified_queue; |
233 | u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; |
234 | u16 max_tx_vq; |
235 | u8 hash_key_length; |
236 | u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; |
237 | }; |
238 | |
239 | /* Control VQ buffers: protected by the rtnl lock */ |
240 | struct control_buf { |
241 | struct virtio_net_ctrl_hdr hdr; |
242 | virtio_net_ctrl_ack status; |
243 | struct virtio_net_ctrl_mq mq; |
244 | u8 promisc; |
245 | u8 allmulti; |
246 | __virtio16 vid; |
247 | __virtio64 offloads; |
248 | struct virtio_net_ctrl_rss rss; |
249 | struct virtio_net_ctrl_coal_tx coal_tx; |
250 | struct virtio_net_ctrl_coal_rx coal_rx; |
251 | struct virtio_net_ctrl_coal_vq coal_vq; |
252 | }; |
253 | |
254 | struct virtnet_info { |
255 | struct virtio_device *vdev; |
256 | struct virtqueue *cvq; |
257 | struct net_device *dev; |
258 | struct send_queue *sq; |
259 | struct receive_queue *rq; |
260 | unsigned int status; |
261 | |
262 | /* Max # of queue pairs supported by the device */ |
263 | u16 max_queue_pairs; |
264 | |
265 | /* # of queue pairs currently used by the driver */ |
266 | u16 curr_queue_pairs; |
267 | |
268 | /* # of XDP queue pairs currently used by the driver */ |
269 | u16 xdp_queue_pairs; |
270 | |
271 | /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ |
272 | bool xdp_enabled; |
273 | |
274 | /* I like... big packets and I cannot lie! */ |
275 | bool big_packets; |
276 | |
277 | /* number of sg entries allocated for big packets */ |
278 | unsigned int big_packets_num_skbfrags; |
279 | |
280 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ |
281 | bool mergeable_rx_bufs; |
282 | |
283 | /* Host supports rss and/or hash report */ |
284 | bool has_rss; |
285 | bool has_rss_hash_report; |
286 | u8 rss_key_size; |
287 | u16 rss_indir_table_size; |
288 | u32 rss_hash_types_supported; |
289 | u32 rss_hash_types_saved; |
290 | |
291 | /* Has control virtqueue */ |
292 | bool has_cvq; |
293 | |
294 | /* Host can handle any s/g split between our header and packet data */ |
295 | bool any_header_sg; |
296 | |
297 | /* Packet virtio header size */ |
298 | u8 hdr_len; |
299 | |
300 | /* Work struct for delayed refilling if we run low on memory. */ |
301 | struct delayed_work refill; |
302 | |
303 | /* Is delayed refill enabled? */ |
304 | bool refill_enabled; |
305 | |
306 | /* The lock to synchronize the access to refill_enabled */ |
307 | spinlock_t refill_lock; |
308 | |
309 | /* Work struct for config space updates */ |
310 | struct work_struct config_work; |
311 | |
312 | /* Work struct for setting rx mode */ |
313 | struct work_struct rx_mode_work; |
314 | |
315 | /* OK to queue work setting RX mode? */ |
316 | bool rx_mode_work_enabled; |
317 | |
318 | /* Does the affinity hint is set for virtqueues? */ |
319 | bool affinity_hint_set; |
320 | |
321 | /* CPU hotplug instances for online & dead */ |
322 | struct hlist_node node; |
323 | struct hlist_node node_dead; |
324 | |
325 | struct control_buf *ctrl; |
326 | |
327 | /* Ethtool settings */ |
328 | u8 duplex; |
329 | u32 speed; |
330 | |
331 | /* Is rx dynamic interrupt moderation enabled? */ |
332 | bool rx_dim_enabled; |
333 | |
334 | /* Interrupt coalescing settings */ |
335 | struct virtnet_interrupt_coalesce intr_coal_tx; |
336 | struct virtnet_interrupt_coalesce intr_coal_rx; |
337 | |
338 | unsigned long guest_offloads; |
339 | unsigned long guest_offloads_capable; |
340 | |
341 | /* failover when STANDBY feature enabled */ |
342 | struct failover *failover; |
343 | }; |
344 | |
345 | struct padded_vnet_hdr { |
346 | struct virtio_net_hdr_v1_hash hdr; |
347 | /* |
348 | * hdr is in a separate sg buffer, and data sg buffer shares same page |
349 | * with this header sg. This padding makes next sg 16 byte aligned |
350 | * after the header. |
351 | */ |
352 | char padding[12]; |
353 | }; |
354 | |
355 | struct virtio_net_common_hdr { |
356 | union { |
357 | struct virtio_net_hdr hdr; |
358 | struct virtio_net_hdr_mrg_rxbuf mrg_hdr; |
359 | struct virtio_net_hdr_v1_hash hash_v1_hdr; |
360 | }; |
361 | }; |
362 | |
363 | static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); |
364 | |
365 | static bool is_xdp_frame(void *ptr) |
366 | { |
367 | return (unsigned long)ptr & VIRTIO_XDP_FLAG; |
368 | } |
369 | |
370 | static void *xdp_to_ptr(struct xdp_frame *ptr) |
371 | { |
372 | return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); |
373 | } |
374 | |
375 | static struct xdp_frame *ptr_to_xdp(void *ptr) |
376 | { |
377 | return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); |
378 | } |
379 | |
380 | static void __free_old_xmit(struct send_queue *sq, bool in_napi, |
381 | struct virtnet_sq_free_stats *stats) |
382 | { |
383 | unsigned int len; |
384 | void *ptr; |
385 | |
386 | while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) { |
387 | ++stats->packets; |
388 | |
389 | if (!is_xdp_frame(ptr)) { |
390 | struct sk_buff *skb = ptr; |
391 | |
392 | pr_debug("Sent skb %p\n", skb); |
393 | |
394 | stats->bytes += skb->len; |
395 | napi_consume_skb(skb, budget: in_napi); |
396 | } else { |
397 | struct xdp_frame *frame = ptr_to_xdp(ptr); |
398 | |
399 | stats->bytes += xdp_get_frame_len(xdpf: frame); |
400 | xdp_return_frame(xdpf: frame); |
401 | } |
402 | } |
403 | } |
404 | |
405 | /* Converting between virtqueue no. and kernel tx/rx queue no. |
406 | * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq |
407 | */ |
408 | static int vq2txq(struct virtqueue *vq) |
409 | { |
410 | return (vq->index - 1) / 2; |
411 | } |
412 | |
413 | static int txq2vq(int txq) |
414 | { |
415 | return txq * 2 + 1; |
416 | } |
417 | |
418 | static int vq2rxq(struct virtqueue *vq) |
419 | { |
420 | return vq->index / 2; |
421 | } |
422 | |
423 | static int rxq2vq(int rxq) |
424 | { |
425 | return rxq * 2; |
426 | } |
427 | |
428 | static inline struct virtio_net_common_hdr * |
429 | skb_vnet_common_hdr(struct sk_buff *skb) |
430 | { |
431 | return (struct virtio_net_common_hdr *)skb->cb; |
432 | } |
433 | |
434 | /* |
435 | * private is used to chain pages for big packets, put the whole |
436 | * most recent used list in the beginning for reuse |
437 | */ |
438 | static void give_pages(struct receive_queue *rq, struct page *page) |
439 | { |
440 | struct page *end; |
441 | |
442 | /* Find end of list, sew whole thing into vi->rq.pages. */ |
443 | for (end = page; end->private; end = (struct page *)end->private); |
444 | end->private = (unsigned long)rq->pages; |
445 | rq->pages = page; |
446 | } |
447 | |
448 | static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) |
449 | { |
450 | struct page *p = rq->pages; |
451 | |
452 | if (p) { |
453 | rq->pages = (struct page *)p->private; |
454 | /* clear private here, it is used to chain pages */ |
455 | p->private = 0; |
456 | } else |
457 | p = alloc_page(gfp_mask); |
458 | return p; |
459 | } |
460 | |
461 | static void virtnet_rq_free_buf(struct virtnet_info *vi, |
462 | struct receive_queue *rq, void *buf) |
463 | { |
464 | if (vi->mergeable_rx_bufs) |
465 | put_page(page: virt_to_head_page(x: buf)); |
466 | else if (vi->big_packets) |
467 | give_pages(rq, page: buf); |
468 | else |
469 | put_page(page: virt_to_head_page(x: buf)); |
470 | } |
471 | |
472 | static void enable_delayed_refill(struct virtnet_info *vi) |
473 | { |
474 | spin_lock_bh(lock: &vi->refill_lock); |
475 | vi->refill_enabled = true; |
476 | spin_unlock_bh(lock: &vi->refill_lock); |
477 | } |
478 | |
479 | static void disable_delayed_refill(struct virtnet_info *vi) |
480 | { |
481 | spin_lock_bh(lock: &vi->refill_lock); |
482 | vi->refill_enabled = false; |
483 | spin_unlock_bh(lock: &vi->refill_lock); |
484 | } |
485 | |
486 | static void enable_rx_mode_work(struct virtnet_info *vi) |
487 | { |
488 | rtnl_lock(); |
489 | vi->rx_mode_work_enabled = true; |
490 | rtnl_unlock(); |
491 | } |
492 | |
493 | static void disable_rx_mode_work(struct virtnet_info *vi) |
494 | { |
495 | rtnl_lock(); |
496 | vi->rx_mode_work_enabled = false; |
497 | rtnl_unlock(); |
498 | } |
499 | |
500 | static void virtqueue_napi_schedule(struct napi_struct *napi, |
501 | struct virtqueue *vq) |
502 | { |
503 | if (napi_schedule_prep(n: napi)) { |
504 | virtqueue_disable_cb(vq); |
505 | __napi_schedule(n: napi); |
506 | } |
507 | } |
508 | |
509 | static bool virtqueue_napi_complete(struct napi_struct *napi, |
510 | struct virtqueue *vq, int processed) |
511 | { |
512 | int opaque; |
513 | |
514 | opaque = virtqueue_enable_cb_prepare(vq); |
515 | if (napi_complete_done(n: napi, work_done: processed)) { |
516 | if (unlikely(virtqueue_poll(vq, opaque))) |
517 | virtqueue_napi_schedule(napi, vq); |
518 | else |
519 | return true; |
520 | } else { |
521 | virtqueue_disable_cb(vq); |
522 | } |
523 | |
524 | return false; |
525 | } |
526 | |
527 | static void skb_xmit_done(struct virtqueue *vq) |
528 | { |
529 | struct virtnet_info *vi = vq->vdev->priv; |
530 | struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; |
531 | |
532 | /* Suppress further interrupts. */ |
533 | virtqueue_disable_cb(vq); |
534 | |
535 | if (napi->weight) |
536 | virtqueue_napi_schedule(napi, vq); |
537 | else |
538 | /* We were probably waiting for more output buffers. */ |
539 | netif_wake_subqueue(dev: vi->dev, queue_index: vq2txq(vq)); |
540 | } |
541 | |
542 | #define MRG_CTX_HEADER_SHIFT 22 |
543 | static void *mergeable_len_to_ctx(unsigned int truesize, |
544 | unsigned int headroom) |
545 | { |
546 | return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); |
547 | } |
548 | |
549 | static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) |
550 | { |
551 | return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; |
552 | } |
553 | |
554 | static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) |
555 | { |
556 | return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); |
557 | } |
558 | |
559 | static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, |
560 | unsigned int headroom, |
561 | unsigned int len) |
562 | { |
563 | struct sk_buff *skb; |
564 | |
565 | skb = build_skb(data: buf, frag_size: buflen); |
566 | if (unlikely(!skb)) |
567 | return NULL; |
568 | |
569 | skb_reserve(skb, len: headroom); |
570 | skb_put(skb, len); |
571 | |
572 | return skb; |
573 | } |
574 | |
575 | /* Called from bottom half context */ |
576 | static struct sk_buff *page_to_skb(struct virtnet_info *vi, |
577 | struct receive_queue *rq, |
578 | struct page *page, unsigned int offset, |
579 | unsigned int len, unsigned int truesize, |
580 | unsigned int headroom) |
581 | { |
582 | struct sk_buff *skb; |
583 | struct virtio_net_common_hdr *hdr; |
584 | unsigned int copy, hdr_len, hdr_padded_len; |
585 | struct page *page_to_free = NULL; |
586 | int tailroom, shinfo_size; |
587 | char *p, *hdr_p, *buf; |
588 | |
589 | p = page_address(page) + offset; |
590 | hdr_p = p; |
591 | |
592 | hdr_len = vi->hdr_len; |
593 | if (vi->mergeable_rx_bufs) |
594 | hdr_padded_len = hdr_len; |
595 | else |
596 | hdr_padded_len = sizeof(struct padded_vnet_hdr); |
597 | |
598 | buf = p - headroom; |
599 | len -= hdr_len; |
600 | offset += hdr_padded_len; |
601 | p += hdr_padded_len; |
602 | tailroom = truesize - headroom - hdr_padded_len - len; |
603 | |
604 | shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
605 | |
606 | /* copy small packet so we can reuse these pages */ |
607 | if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { |
608 | skb = virtnet_build_skb(buf, buflen: truesize, headroom: p - buf, len); |
609 | if (unlikely(!skb)) |
610 | return NULL; |
611 | |
612 | page = (struct page *)page->private; |
613 | if (page) |
614 | give_pages(rq, page); |
615 | goto ok; |
616 | } |
617 | |
618 | /* copy small packet so we can reuse these pages for small data */ |
619 | skb = napi_alloc_skb(napi: &rq->napi, GOOD_COPY_LEN); |
620 | if (unlikely(!skb)) |
621 | return NULL; |
622 | |
623 | /* Copy all frame if it fits skb->head, otherwise |
624 | * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. |
625 | */ |
626 | if (len <= skb_tailroom(skb)) |
627 | copy = len; |
628 | else |
629 | copy = ETH_HLEN; |
630 | skb_put_data(skb, data: p, len: copy); |
631 | |
632 | len -= copy; |
633 | offset += copy; |
634 | |
635 | if (vi->mergeable_rx_bufs) { |
636 | if (len) |
637 | skb_add_rx_frag(skb, i: 0, page, off: offset, size: len, truesize); |
638 | else |
639 | page_to_free = page; |
640 | goto ok; |
641 | } |
642 | |
643 | /* |
644 | * Verify that we can indeed put this data into a skb. |
645 | * This is here to handle cases when the device erroneously |
646 | * tries to receive more than is possible. This is usually |
647 | * the case of a broken device. |
648 | */ |
649 | if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { |
650 | net_dbg_ratelimited("%s: too much data\n", skb->dev->name); |
651 | dev_kfree_skb(skb); |
652 | return NULL; |
653 | } |
654 | BUG_ON(offset >= PAGE_SIZE); |
655 | while (len) { |
656 | unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); |
657 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, off: offset, |
658 | size: frag_size, truesize); |
659 | len -= frag_size; |
660 | page = (struct page *)page->private; |
661 | offset = 0; |
662 | } |
663 | |
664 | if (page) |
665 | give_pages(rq, page); |
666 | |
667 | ok: |
668 | hdr = skb_vnet_common_hdr(skb); |
669 | memcpy(hdr, hdr_p, hdr_len); |
670 | if (page_to_free) |
671 | put_page(page: page_to_free); |
672 | |
673 | return skb; |
674 | } |
675 | |
676 | static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) |
677 | { |
678 | struct page *page = virt_to_head_page(x: buf); |
679 | struct virtnet_rq_dma *dma; |
680 | void *head; |
681 | int offset; |
682 | |
683 | head = page_address(page); |
684 | |
685 | dma = head; |
686 | |
687 | --dma->ref; |
688 | |
689 | if (dma->need_sync && len) { |
690 | offset = buf - (head + sizeof(*dma)); |
691 | |
692 | virtqueue_dma_sync_single_range_for_cpu(vq: rq->vq, addr: dma->addr, |
693 | offset, size: len, |
694 | dir: DMA_FROM_DEVICE); |
695 | } |
696 | |
697 | if (dma->ref) |
698 | return; |
699 | |
700 | virtqueue_dma_unmap_single_attrs(vq: rq->vq, addr: dma->addr, size: dma->len, |
701 | dir: DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); |
702 | put_page(page); |
703 | } |
704 | |
705 | static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) |
706 | { |
707 | void *buf; |
708 | |
709 | buf = virtqueue_get_buf_ctx(vq: rq->vq, len, ctx); |
710 | if (buf && rq->do_dma) |
711 | virtnet_rq_unmap(rq, buf, len: *len); |
712 | |
713 | return buf; |
714 | } |
715 | |
716 | static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) |
717 | { |
718 | struct virtnet_rq_dma *dma; |
719 | dma_addr_t addr; |
720 | u32 offset; |
721 | void *head; |
722 | |
723 | if (!rq->do_dma) { |
724 | sg_init_one(rq->sg, buf, len); |
725 | return; |
726 | } |
727 | |
728 | head = page_address(rq->alloc_frag.page); |
729 | |
730 | offset = buf - head; |
731 | |
732 | dma = head; |
733 | |
734 | addr = dma->addr - sizeof(*dma) + offset; |
735 | |
736 | sg_init_table(rq->sg, 1); |
737 | rq->sg[0].dma_address = addr; |
738 | rq->sg[0].length = len; |
739 | } |
740 | |
741 | static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) |
742 | { |
743 | struct page_frag *alloc_frag = &rq->alloc_frag; |
744 | struct virtnet_rq_dma *dma; |
745 | void *buf, *head; |
746 | dma_addr_t addr; |
747 | |
748 | if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) |
749 | return NULL; |
750 | |
751 | head = page_address(alloc_frag->page); |
752 | |
753 | if (rq->do_dma) { |
754 | dma = head; |
755 | |
756 | /* new pages */ |
757 | if (!alloc_frag->offset) { |
758 | if (rq->last_dma) { |
759 | /* Now, the new page is allocated, the last dma |
760 | * will not be used. So the dma can be unmapped |
761 | * if the ref is 0. |
762 | */ |
763 | virtnet_rq_unmap(rq, buf: rq->last_dma, len: 0); |
764 | rq->last_dma = NULL; |
765 | } |
766 | |
767 | dma->len = alloc_frag->size - sizeof(*dma); |
768 | |
769 | addr = virtqueue_dma_map_single_attrs(vq: rq->vq, ptr: dma + 1, |
770 | size: dma->len, dir: DMA_FROM_DEVICE, attrs: 0); |
771 | if (virtqueue_dma_mapping_error(vq: rq->vq, addr)) |
772 | return NULL; |
773 | |
774 | dma->addr = addr; |
775 | dma->need_sync = virtqueue_dma_need_sync(vq: rq->vq, addr); |
776 | |
777 | /* Add a reference to dma to prevent the entire dma from |
778 | * being released during error handling. This reference |
779 | * will be freed after the pages are no longer used. |
780 | */ |
781 | get_page(page: alloc_frag->page); |
782 | dma->ref = 1; |
783 | alloc_frag->offset = sizeof(*dma); |
784 | |
785 | rq->last_dma = dma; |
786 | } |
787 | |
788 | ++dma->ref; |
789 | } |
790 | |
791 | buf = head + alloc_frag->offset; |
792 | |
793 | get_page(page: alloc_frag->page); |
794 | alloc_frag->offset += size; |
795 | |
796 | return buf; |
797 | } |
798 | |
799 | static void virtnet_rq_set_premapped(struct virtnet_info *vi) |
800 | { |
801 | int i; |
802 | |
803 | /* disable for big mode */ |
804 | if (!vi->mergeable_rx_bufs && vi->big_packets) |
805 | return; |
806 | |
807 | for (i = 0; i < vi->max_queue_pairs; i++) { |
808 | if (virtqueue_set_dma_premapped(vq: vi->rq[i].vq)) |
809 | continue; |
810 | |
811 | vi->rq[i].do_dma = true; |
812 | } |
813 | } |
814 | |
815 | static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) |
816 | { |
817 | struct virtnet_info *vi = vq->vdev->priv; |
818 | struct receive_queue *rq; |
819 | int i = vq2rxq(vq); |
820 | |
821 | rq = &vi->rq[i]; |
822 | |
823 | if (rq->do_dma) |
824 | virtnet_rq_unmap(rq, buf, len: 0); |
825 | |
826 | virtnet_rq_free_buf(vi, rq, buf); |
827 | } |
828 | |
829 | static void free_old_xmit(struct send_queue *sq, bool in_napi) |
830 | { |
831 | struct virtnet_sq_free_stats stats = {0}; |
832 | |
833 | __free_old_xmit(sq, in_napi, stats: &stats); |
834 | |
835 | /* Avoid overhead when no packets have been processed |
836 | * happens when called speculatively from start_xmit. |
837 | */ |
838 | if (!stats.packets) |
839 | return; |
840 | |
841 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
842 | u64_stats_add(p: &sq->stats.bytes, val: stats.bytes); |
843 | u64_stats_add(p: &sq->stats.packets, val: stats.packets); |
844 | u64_stats_update_end(syncp: &sq->stats.syncp); |
845 | } |
846 | |
847 | static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) |
848 | { |
849 | if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) |
850 | return false; |
851 | else if (q < vi->curr_queue_pairs) |
852 | return true; |
853 | else |
854 | return false; |
855 | } |
856 | |
857 | static void check_sq_full_and_disable(struct virtnet_info *vi, |
858 | struct net_device *dev, |
859 | struct send_queue *sq) |
860 | { |
861 | bool use_napi = sq->napi.weight; |
862 | int qnum; |
863 | |
864 | qnum = sq - vi->sq; |
865 | |
866 | /* If running out of space, stop queue to avoid getting packets that we |
867 | * are then unable to transmit. |
868 | * An alternative would be to force queuing layer to requeue the skb by |
869 | * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be |
870 | * returned in a normal path of operation: it means that driver is not |
871 | * maintaining the TX queue stop/start state properly, and causes |
872 | * the stack to do a non-trivial amount of useless work. |
873 | * Since most packets only take 1 or 2 ring slots, stopping the queue |
874 | * early means 16 slots are typically wasted. |
875 | */ |
876 | if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { |
877 | netif_stop_subqueue(dev, queue_index: qnum); |
878 | if (use_napi) { |
879 | if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) |
880 | virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq); |
881 | } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { |
882 | /* More just got used, free them then recheck. */ |
883 | free_old_xmit(sq, in_napi: false); |
884 | if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { |
885 | netif_start_subqueue(dev, queue_index: qnum); |
886 | virtqueue_disable_cb(vq: sq->vq); |
887 | } |
888 | } |
889 | } |
890 | } |
891 | |
892 | static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, |
893 | struct send_queue *sq, |
894 | struct xdp_frame *xdpf) |
895 | { |
896 | struct virtio_net_hdr_mrg_rxbuf *hdr; |
897 | struct skb_shared_info *shinfo; |
898 | u8 nr_frags = 0; |
899 | int err, i; |
900 | |
901 | if (unlikely(xdpf->headroom < vi->hdr_len)) |
902 | return -EOVERFLOW; |
903 | |
904 | if (unlikely(xdp_frame_has_frags(xdpf))) { |
905 | shinfo = xdp_get_shared_info_from_frame(frame: xdpf); |
906 | nr_frags = shinfo->nr_frags; |
907 | } |
908 | |
909 | /* In wrapping function virtnet_xdp_xmit(), we need to free |
910 | * up the pending old buffers, where we need to calculate the |
911 | * position of skb_shared_info in xdp_get_frame_len() and |
912 | * xdp_return_frame(), which will involve to xdpf->data and |
913 | * xdpf->headroom. Therefore, we need to update the value of |
914 | * headroom synchronously here. |
915 | */ |
916 | xdpf->headroom -= vi->hdr_len; |
917 | xdpf->data -= vi->hdr_len; |
918 | /* Zero header and leave csum up to XDP layers */ |
919 | hdr = xdpf->data; |
920 | memset(hdr, 0, vi->hdr_len); |
921 | xdpf->len += vi->hdr_len; |
922 | |
923 | sg_init_table(sq->sg, nr_frags + 1); |
924 | sg_set_buf(sg: sq->sg, buf: xdpf->data, buflen: xdpf->len); |
925 | for (i = 0; i < nr_frags; i++) { |
926 | skb_frag_t *frag = &shinfo->frags[i]; |
927 | |
928 | sg_set_page(sg: &sq->sg[i + 1], page: skb_frag_page(frag), |
929 | len: skb_frag_size(frag), offset: skb_frag_off(frag)); |
930 | } |
931 | |
932 | err = virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: nr_frags + 1, |
933 | data: xdp_to_ptr(ptr: xdpf), GFP_ATOMIC); |
934 | if (unlikely(err)) |
935 | return -ENOSPC; /* Caller handle free/refcnt */ |
936 | |
937 | return 0; |
938 | } |
939 | |
940 | /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on |
941 | * the current cpu, so it does not need to be locked. |
942 | * |
943 | * Here we use marco instead of inline functions because we have to deal with |
944 | * three issues at the same time: 1. the choice of sq. 2. judge and execute the |
945 | * lock/unlock of txq 3. make sparse happy. It is difficult for two inline |
946 | * functions to perfectly solve these three problems at the same time. |
947 | */ |
948 | #define virtnet_xdp_get_sq(vi) ({ \ |
949 | int cpu = smp_processor_id(); \ |
950 | struct netdev_queue *txq; \ |
951 | typeof(vi) v = (vi); \ |
952 | unsigned int qp; \ |
953 | \ |
954 | if (v->curr_queue_pairs > nr_cpu_ids) { \ |
955 | qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ |
956 | qp += cpu; \ |
957 | txq = netdev_get_tx_queue(v->dev, qp); \ |
958 | __netif_tx_acquire(txq); \ |
959 | } else { \ |
960 | qp = cpu % v->curr_queue_pairs; \ |
961 | txq = netdev_get_tx_queue(v->dev, qp); \ |
962 | __netif_tx_lock(txq, cpu); \ |
963 | } \ |
964 | v->sq + qp; \ |
965 | }) |
966 | |
967 | #define virtnet_xdp_put_sq(vi, q) { \ |
968 | struct netdev_queue *txq; \ |
969 | typeof(vi) v = (vi); \ |
970 | \ |
971 | txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ |
972 | if (v->curr_queue_pairs > nr_cpu_ids) \ |
973 | __netif_tx_release(txq); \ |
974 | else \ |
975 | __netif_tx_unlock(txq); \ |
976 | } |
977 | |
978 | static int virtnet_xdp_xmit(struct net_device *dev, |
979 | int n, struct xdp_frame **frames, u32 flags) |
980 | { |
981 | struct virtnet_info *vi = netdev_priv(dev); |
982 | struct virtnet_sq_free_stats stats = {0}; |
983 | struct receive_queue *rq = vi->rq; |
984 | struct bpf_prog *xdp_prog; |
985 | struct send_queue *sq; |
986 | int nxmit = 0; |
987 | int kicks = 0; |
988 | int ret; |
989 | int i; |
990 | |
991 | /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this |
992 | * indicate XDP resources have been successfully allocated. |
993 | */ |
994 | xdp_prog = rcu_access_pointer(rq->xdp_prog); |
995 | if (!xdp_prog) |
996 | return -ENXIO; |
997 | |
998 | sq = virtnet_xdp_get_sq(vi); |
999 | |
1000 | if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { |
1001 | ret = -EINVAL; |
1002 | goto out; |
1003 | } |
1004 | |
1005 | /* Free up any pending old buffers before queueing new ones. */ |
1006 | __free_old_xmit(sq, in_napi: false, stats: &stats); |
1007 | |
1008 | for (i = 0; i < n; i++) { |
1009 | struct xdp_frame *xdpf = frames[i]; |
1010 | |
1011 | if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) |
1012 | break; |
1013 | nxmit++; |
1014 | } |
1015 | ret = nxmit; |
1016 | |
1017 | if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq)) |
1018 | check_sq_full_and_disable(vi, dev, sq); |
1019 | |
1020 | if (flags & XDP_XMIT_FLUSH) { |
1021 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) |
1022 | kicks = 1; |
1023 | } |
1024 | out: |
1025 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
1026 | u64_stats_add(p: &sq->stats.bytes, val: stats.bytes); |
1027 | u64_stats_add(p: &sq->stats.packets, val: stats.packets); |
1028 | u64_stats_add(p: &sq->stats.xdp_tx, val: n); |
1029 | u64_stats_add(p: &sq->stats.xdp_tx_drops, val: n - nxmit); |
1030 | u64_stats_add(p: &sq->stats.kicks, val: kicks); |
1031 | u64_stats_update_end(syncp: &sq->stats.syncp); |
1032 | |
1033 | virtnet_xdp_put_sq(vi, sq); |
1034 | return ret; |
1035 | } |
1036 | |
1037 | static void put_xdp_frags(struct xdp_buff *xdp) |
1038 | { |
1039 | struct skb_shared_info *shinfo; |
1040 | struct page *xdp_page; |
1041 | int i; |
1042 | |
1043 | if (xdp_buff_has_frags(xdp)) { |
1044 | shinfo = xdp_get_shared_info_from_buff(xdp); |
1045 | for (i = 0; i < shinfo->nr_frags; i++) { |
1046 | xdp_page = skb_frag_page(frag: &shinfo->frags[i]); |
1047 | put_page(page: xdp_page); |
1048 | } |
1049 | } |
1050 | } |
1051 | |
1052 | static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, |
1053 | struct net_device *dev, |
1054 | unsigned int *xdp_xmit, |
1055 | struct virtnet_rq_stats *stats) |
1056 | { |
1057 | struct xdp_frame *xdpf; |
1058 | int err; |
1059 | u32 act; |
1060 | |
1061 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp); |
1062 | u64_stats_inc(p: &stats->xdp_packets); |
1063 | |
1064 | switch (act) { |
1065 | case XDP_PASS: |
1066 | return act; |
1067 | |
1068 | case XDP_TX: |
1069 | u64_stats_inc(p: &stats->xdp_tx); |
1070 | xdpf = xdp_convert_buff_to_frame(xdp); |
1071 | if (unlikely(!xdpf)) { |
1072 | netdev_dbg(dev, "convert buff to frame failed for xdp\n"); |
1073 | return XDP_DROP; |
1074 | } |
1075 | |
1076 | err = virtnet_xdp_xmit(dev, n: 1, frames: &xdpf, flags: 0); |
1077 | if (unlikely(!err)) { |
1078 | xdp_return_frame_rx_napi(xdpf); |
1079 | } else if (unlikely(err < 0)) { |
1080 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
1081 | return XDP_DROP; |
1082 | } |
1083 | *xdp_xmit |= VIRTIO_XDP_TX; |
1084 | return act; |
1085 | |
1086 | case XDP_REDIRECT: |
1087 | u64_stats_inc(p: &stats->xdp_redirects); |
1088 | err = xdp_do_redirect(dev, xdp, prog: xdp_prog); |
1089 | if (err) |
1090 | return XDP_DROP; |
1091 | |
1092 | *xdp_xmit |= VIRTIO_XDP_REDIR; |
1093 | return act; |
1094 | |
1095 | default: |
1096 | bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act); |
1097 | fallthrough; |
1098 | case XDP_ABORTED: |
1099 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
1100 | fallthrough; |
1101 | case XDP_DROP: |
1102 | return XDP_DROP; |
1103 | } |
1104 | } |
1105 | |
1106 | static unsigned int virtnet_get_headroom(struct virtnet_info *vi) |
1107 | { |
1108 | return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; |
1109 | } |
1110 | |
1111 | /* We copy the packet for XDP in the following cases: |
1112 | * |
1113 | * 1) Packet is scattered across multiple rx buffers. |
1114 | * 2) Headroom space is insufficient. |
1115 | * |
1116 | * This is inefficient but it's a temporary condition that |
1117 | * we hit right after XDP is enabled and until queue is refilled |
1118 | * with large buffers with sufficient headroom - so it should affect |
1119 | * at most queue size packets. |
1120 | * Afterwards, the conditions to enable |
1121 | * XDP should preclude the underlying device from sending packets |
1122 | * across multiple buffers (num_buf > 1), and we make sure buffers |
1123 | * have enough headroom. |
1124 | */ |
1125 | static struct page *xdp_linearize_page(struct receive_queue *rq, |
1126 | int *num_buf, |
1127 | struct page *p, |
1128 | int offset, |
1129 | int page_off, |
1130 | unsigned int *len) |
1131 | { |
1132 | int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1133 | struct page *page; |
1134 | |
1135 | if (page_off + *len + tailroom > PAGE_SIZE) |
1136 | return NULL; |
1137 | |
1138 | page = alloc_page(GFP_ATOMIC); |
1139 | if (!page) |
1140 | return NULL; |
1141 | |
1142 | memcpy(page_address(page) + page_off, page_address(p) + offset, *len); |
1143 | page_off += *len; |
1144 | |
1145 | while (--*num_buf) { |
1146 | unsigned int buflen; |
1147 | void *buf; |
1148 | int off; |
1149 | |
1150 | buf = virtnet_rq_get_buf(rq, len: &buflen, NULL); |
1151 | if (unlikely(!buf)) |
1152 | goto err_buf; |
1153 | |
1154 | p = virt_to_head_page(x: buf); |
1155 | off = buf - page_address(p); |
1156 | |
1157 | /* guard against a misconfigured or uncooperative backend that |
1158 | * is sending packet larger than the MTU. |
1159 | */ |
1160 | if ((page_off + buflen + tailroom) > PAGE_SIZE) { |
1161 | put_page(page: p); |
1162 | goto err_buf; |
1163 | } |
1164 | |
1165 | memcpy(page_address(page) + page_off, |
1166 | page_address(p) + off, buflen); |
1167 | page_off += buflen; |
1168 | put_page(page: p); |
1169 | } |
1170 | |
1171 | /* Headroom does not contribute to packet length */ |
1172 | *len = page_off - VIRTIO_XDP_HEADROOM; |
1173 | return page; |
1174 | err_buf: |
1175 | __free_pages(page, order: 0); |
1176 | return NULL; |
1177 | } |
1178 | |
1179 | static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, |
1180 | unsigned int xdp_headroom, |
1181 | void *buf, |
1182 | unsigned int len) |
1183 | { |
1184 | unsigned int header_offset; |
1185 | unsigned int headroom; |
1186 | unsigned int buflen; |
1187 | struct sk_buff *skb; |
1188 | |
1189 | header_offset = VIRTNET_RX_PAD + xdp_headroom; |
1190 | headroom = vi->hdr_len + header_offset; |
1191 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1192 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1193 | |
1194 | skb = virtnet_build_skb(buf, buflen, headroom, len); |
1195 | if (unlikely(!skb)) |
1196 | return NULL; |
1197 | |
1198 | buf += header_offset; |
1199 | memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); |
1200 | |
1201 | return skb; |
1202 | } |
1203 | |
1204 | static struct sk_buff *receive_small_xdp(struct net_device *dev, |
1205 | struct virtnet_info *vi, |
1206 | struct receive_queue *rq, |
1207 | struct bpf_prog *xdp_prog, |
1208 | void *buf, |
1209 | unsigned int xdp_headroom, |
1210 | unsigned int len, |
1211 | unsigned int *xdp_xmit, |
1212 | struct virtnet_rq_stats *stats) |
1213 | { |
1214 | unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; |
1215 | unsigned int headroom = vi->hdr_len + header_offset; |
1216 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; |
1217 | struct page *page = virt_to_head_page(x: buf); |
1218 | struct page *xdp_page; |
1219 | unsigned int buflen; |
1220 | struct xdp_buff xdp; |
1221 | struct sk_buff *skb; |
1222 | unsigned int metasize = 0; |
1223 | u32 act; |
1224 | |
1225 | if (unlikely(hdr->hdr.gso_type)) |
1226 | goto err_xdp; |
1227 | |
1228 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1229 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1230 | |
1231 | if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { |
1232 | int offset = buf - page_address(page) + header_offset; |
1233 | unsigned int tlen = len + vi->hdr_len; |
1234 | int num_buf = 1; |
1235 | |
1236 | xdp_headroom = virtnet_get_headroom(vi); |
1237 | header_offset = VIRTNET_RX_PAD + xdp_headroom; |
1238 | headroom = vi->hdr_len + header_offset; |
1239 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1240 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1241 | xdp_page = xdp_linearize_page(rq, num_buf: &num_buf, p: page, |
1242 | offset, page_off: header_offset, |
1243 | len: &tlen); |
1244 | if (!xdp_page) |
1245 | goto err_xdp; |
1246 | |
1247 | buf = page_address(xdp_page); |
1248 | put_page(page); |
1249 | page = xdp_page; |
1250 | } |
1251 | |
1252 | xdp_init_buff(xdp: &xdp, frame_sz: buflen, rxq: &rq->xdp_rxq); |
1253 | xdp_prepare_buff(xdp: &xdp, hard_start: buf + VIRTNET_RX_PAD + vi->hdr_len, |
1254 | headroom: xdp_headroom, data_len: len, meta_valid: true); |
1255 | |
1256 | act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats); |
1257 | |
1258 | switch (act) { |
1259 | case XDP_PASS: |
1260 | /* Recalculate length in case bpf program changed it */ |
1261 | len = xdp.data_end - xdp.data; |
1262 | metasize = xdp.data - xdp.data_meta; |
1263 | break; |
1264 | |
1265 | case XDP_TX: |
1266 | case XDP_REDIRECT: |
1267 | goto xdp_xmit; |
1268 | |
1269 | default: |
1270 | goto err_xdp; |
1271 | } |
1272 | |
1273 | skb = virtnet_build_skb(buf, buflen, headroom: xdp.data - buf, len); |
1274 | if (unlikely(!skb)) |
1275 | goto err; |
1276 | |
1277 | if (metasize) |
1278 | skb_metadata_set(skb, meta_len: metasize); |
1279 | |
1280 | return skb; |
1281 | |
1282 | err_xdp: |
1283 | u64_stats_inc(p: &stats->xdp_drops); |
1284 | err: |
1285 | u64_stats_inc(p: &stats->drops); |
1286 | put_page(page); |
1287 | xdp_xmit: |
1288 | return NULL; |
1289 | } |
1290 | |
1291 | static struct sk_buff *receive_small(struct net_device *dev, |
1292 | struct virtnet_info *vi, |
1293 | struct receive_queue *rq, |
1294 | void *buf, void *ctx, |
1295 | unsigned int len, |
1296 | unsigned int *xdp_xmit, |
1297 | struct virtnet_rq_stats *stats) |
1298 | { |
1299 | unsigned int xdp_headroom = (unsigned long)ctx; |
1300 | struct page *page = virt_to_head_page(x: buf); |
1301 | struct sk_buff *skb; |
1302 | |
1303 | len -= vi->hdr_len; |
1304 | u64_stats_add(p: &stats->bytes, val: len); |
1305 | |
1306 | if (unlikely(len > GOOD_PACKET_LEN)) { |
1307 | pr_debug("%s: rx error: len %u exceeds max size %d\n", |
1308 | dev->name, len, GOOD_PACKET_LEN); |
1309 | DEV_STATS_INC(dev, rx_length_errors); |
1310 | goto err; |
1311 | } |
1312 | |
1313 | if (unlikely(vi->xdp_enabled)) { |
1314 | struct bpf_prog *xdp_prog; |
1315 | |
1316 | rcu_read_lock(); |
1317 | xdp_prog = rcu_dereference(rq->xdp_prog); |
1318 | if (xdp_prog) { |
1319 | skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, |
1320 | xdp_headroom, len, xdp_xmit, |
1321 | stats); |
1322 | rcu_read_unlock(); |
1323 | return skb; |
1324 | } |
1325 | rcu_read_unlock(); |
1326 | } |
1327 | |
1328 | skb = receive_small_build_skb(vi, xdp_headroom, buf, len); |
1329 | if (likely(skb)) |
1330 | return skb; |
1331 | |
1332 | err: |
1333 | u64_stats_inc(p: &stats->drops); |
1334 | put_page(page); |
1335 | return NULL; |
1336 | } |
1337 | |
1338 | static struct sk_buff *receive_big(struct net_device *dev, |
1339 | struct virtnet_info *vi, |
1340 | struct receive_queue *rq, |
1341 | void *buf, |
1342 | unsigned int len, |
1343 | struct virtnet_rq_stats *stats) |
1344 | { |
1345 | struct page *page = buf; |
1346 | struct sk_buff *skb = |
1347 | page_to_skb(vi, rq, page, offset: 0, len, PAGE_SIZE, headroom: 0); |
1348 | |
1349 | u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len); |
1350 | if (unlikely(!skb)) |
1351 | goto err; |
1352 | |
1353 | return skb; |
1354 | |
1355 | err: |
1356 | u64_stats_inc(p: &stats->drops); |
1357 | give_pages(rq, page); |
1358 | return NULL; |
1359 | } |
1360 | |
1361 | static void mergeable_buf_free(struct receive_queue *rq, int num_buf, |
1362 | struct net_device *dev, |
1363 | struct virtnet_rq_stats *stats) |
1364 | { |
1365 | struct page *page; |
1366 | void *buf; |
1367 | int len; |
1368 | |
1369 | while (num_buf-- > 1) { |
1370 | buf = virtnet_rq_get_buf(rq, len: &len, NULL); |
1371 | if (unlikely(!buf)) { |
1372 | pr_debug("%s: rx error: %d buffers missing\n", |
1373 | dev->name, num_buf); |
1374 | DEV_STATS_INC(dev, rx_length_errors); |
1375 | break; |
1376 | } |
1377 | u64_stats_add(p: &stats->bytes, val: len); |
1378 | page = virt_to_head_page(x: buf); |
1379 | put_page(page); |
1380 | } |
1381 | } |
1382 | |
1383 | /* Why not use xdp_build_skb_from_frame() ? |
1384 | * XDP core assumes that xdp frags are PAGE_SIZE in length, while in |
1385 | * virtio-net there are 2 points that do not match its requirements: |
1386 | * 1. The size of the prefilled buffer is not fixed before xdp is set. |
1387 | * 2. xdp_build_skb_from_frame() does more checks that we don't need, |
1388 | * like eth_type_trans() (which virtio-net does in receive_buf()). |
1389 | */ |
1390 | static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, |
1391 | struct virtnet_info *vi, |
1392 | struct xdp_buff *xdp, |
1393 | unsigned int xdp_frags_truesz) |
1394 | { |
1395 | struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); |
1396 | unsigned int headroom, data_len; |
1397 | struct sk_buff *skb; |
1398 | int metasize; |
1399 | u8 nr_frags; |
1400 | |
1401 | if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { |
1402 | pr_debug("Error building skb as missing reserved tailroom for xdp"); |
1403 | return NULL; |
1404 | } |
1405 | |
1406 | if (unlikely(xdp_buff_has_frags(xdp))) |
1407 | nr_frags = sinfo->nr_frags; |
1408 | |
1409 | skb = build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz); |
1410 | if (unlikely(!skb)) |
1411 | return NULL; |
1412 | |
1413 | headroom = xdp->data - xdp->data_hard_start; |
1414 | data_len = xdp->data_end - xdp->data; |
1415 | skb_reserve(skb, len: headroom); |
1416 | __skb_put(skb, len: data_len); |
1417 | |
1418 | metasize = xdp->data - xdp->data_meta; |
1419 | metasize = metasize > 0 ? metasize : 0; |
1420 | if (metasize) |
1421 | skb_metadata_set(skb, meta_len: metasize); |
1422 | |
1423 | if (unlikely(xdp_buff_has_frags(xdp))) |
1424 | xdp_update_skb_shared_info(skb, nr_frags, |
1425 | size: sinfo->xdp_frags_size, |
1426 | truesize: xdp_frags_truesz, |
1427 | pfmemalloc: xdp_buff_is_frag_pfmemalloc(xdp)); |
1428 | |
1429 | return skb; |
1430 | } |
1431 | |
1432 | /* TODO: build xdp in big mode */ |
1433 | static int virtnet_build_xdp_buff_mrg(struct net_device *dev, |
1434 | struct virtnet_info *vi, |
1435 | struct receive_queue *rq, |
1436 | struct xdp_buff *xdp, |
1437 | void *buf, |
1438 | unsigned int len, |
1439 | unsigned int frame_sz, |
1440 | int *num_buf, |
1441 | unsigned int *xdp_frags_truesize, |
1442 | struct virtnet_rq_stats *stats) |
1443 | { |
1444 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1445 | unsigned int headroom, tailroom, room; |
1446 | unsigned int truesize, cur_frag_size; |
1447 | struct skb_shared_info *shinfo; |
1448 | unsigned int xdp_frags_truesz = 0; |
1449 | struct page *page; |
1450 | skb_frag_t *frag; |
1451 | int offset; |
1452 | void *ctx; |
1453 | |
1454 | xdp_init_buff(xdp, frame_sz, rxq: &rq->xdp_rxq); |
1455 | xdp_prepare_buff(xdp, hard_start: buf - VIRTIO_XDP_HEADROOM, |
1456 | VIRTIO_XDP_HEADROOM + vi->hdr_len, data_len: len - vi->hdr_len, meta_valid: true); |
1457 | |
1458 | if (!*num_buf) |
1459 | return 0; |
1460 | |
1461 | if (*num_buf > 1) { |
1462 | /* If we want to build multi-buffer xdp, we need |
1463 | * to specify that the flags of xdp_buff have the |
1464 | * XDP_FLAGS_HAS_FRAG bit. |
1465 | */ |
1466 | if (!xdp_buff_has_frags(xdp)) |
1467 | xdp_buff_set_frags_flag(xdp); |
1468 | |
1469 | shinfo = xdp_get_shared_info_from_buff(xdp); |
1470 | shinfo->nr_frags = 0; |
1471 | shinfo->xdp_frags_size = 0; |
1472 | } |
1473 | |
1474 | if (*num_buf > MAX_SKB_FRAGS + 1) |
1475 | return -EINVAL; |
1476 | |
1477 | while (--*num_buf > 0) { |
1478 | buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx); |
1479 | if (unlikely(!buf)) { |
1480 | pr_debug("%s: rx error: %d buffers out of %d missing\n", |
1481 | dev->name, *num_buf, |
1482 | virtio16_to_cpu(vi->vdev, hdr->num_buffers)); |
1483 | DEV_STATS_INC(dev, rx_length_errors); |
1484 | goto err; |
1485 | } |
1486 | |
1487 | u64_stats_add(p: &stats->bytes, val: len); |
1488 | page = virt_to_head_page(x: buf); |
1489 | offset = buf - page_address(page); |
1490 | |
1491 | truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1492 | headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1493 | tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1494 | room = SKB_DATA_ALIGN(headroom + tailroom); |
1495 | |
1496 | cur_frag_size = truesize; |
1497 | xdp_frags_truesz += cur_frag_size; |
1498 | if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { |
1499 | put_page(page); |
1500 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n", |
1501 | dev->name, len, (unsigned long)(truesize - room)); |
1502 | DEV_STATS_INC(dev, rx_length_errors); |
1503 | goto err; |
1504 | } |
1505 | |
1506 | frag = &shinfo->frags[shinfo->nr_frags++]; |
1507 | skb_frag_fill_page_desc(frag, page, off: offset, size: len); |
1508 | if (page_is_pfmemalloc(page)) |
1509 | xdp_buff_set_frag_pfmemalloc(xdp); |
1510 | |
1511 | shinfo->xdp_frags_size += len; |
1512 | } |
1513 | |
1514 | *xdp_frags_truesize = xdp_frags_truesz; |
1515 | return 0; |
1516 | |
1517 | err: |
1518 | put_xdp_frags(xdp); |
1519 | return -EINVAL; |
1520 | } |
1521 | |
1522 | static void *mergeable_xdp_get_buf(struct virtnet_info *vi, |
1523 | struct receive_queue *rq, |
1524 | struct bpf_prog *xdp_prog, |
1525 | void *ctx, |
1526 | unsigned int *frame_sz, |
1527 | int *num_buf, |
1528 | struct page **page, |
1529 | int offset, |
1530 | unsigned int *len, |
1531 | struct virtio_net_hdr_mrg_rxbuf *hdr) |
1532 | { |
1533 | unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1534 | unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1535 | struct page *xdp_page; |
1536 | unsigned int xdp_room; |
1537 | |
1538 | /* Transient failure which in theory could occur if |
1539 | * in-flight packets from before XDP was enabled reach |
1540 | * the receive path after XDP is loaded. |
1541 | */ |
1542 | if (unlikely(hdr->hdr.gso_type)) |
1543 | return NULL; |
1544 | |
1545 | /* Now XDP core assumes frag size is PAGE_SIZE, but buffers |
1546 | * with headroom may add hole in truesize, which |
1547 | * make their length exceed PAGE_SIZE. So we disabled the |
1548 | * hole mechanism for xdp. See add_recvbuf_mergeable(). |
1549 | */ |
1550 | *frame_sz = truesize; |
1551 | |
1552 | if (likely(headroom >= virtnet_get_headroom(vi) && |
1553 | (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { |
1554 | return page_address(*page) + offset; |
1555 | } |
1556 | |
1557 | /* This happens when headroom is not enough because |
1558 | * of the buffer was prefilled before XDP is set. |
1559 | * This should only happen for the first several packets. |
1560 | * In fact, vq reset can be used here to help us clean up |
1561 | * the prefilled buffers, but many existing devices do not |
1562 | * support it, and we don't want to bother users who are |
1563 | * using xdp normally. |
1564 | */ |
1565 | if (!xdp_prog->aux->xdp_has_frags) { |
1566 | /* linearize data for XDP */ |
1567 | xdp_page = xdp_linearize_page(rq, num_buf, |
1568 | p: *page, offset, |
1569 | VIRTIO_XDP_HEADROOM, |
1570 | len); |
1571 | if (!xdp_page) |
1572 | return NULL; |
1573 | } else { |
1574 | xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + |
1575 | sizeof(struct skb_shared_info)); |
1576 | if (*len + xdp_room > PAGE_SIZE) |
1577 | return NULL; |
1578 | |
1579 | xdp_page = alloc_page(GFP_ATOMIC); |
1580 | if (!xdp_page) |
1581 | return NULL; |
1582 | |
1583 | memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, |
1584 | page_address(*page) + offset, *len); |
1585 | } |
1586 | |
1587 | *frame_sz = PAGE_SIZE; |
1588 | |
1589 | put_page(page: *page); |
1590 | |
1591 | *page = xdp_page; |
1592 | |
1593 | return page_address(*page) + VIRTIO_XDP_HEADROOM; |
1594 | } |
1595 | |
1596 | static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, |
1597 | struct virtnet_info *vi, |
1598 | struct receive_queue *rq, |
1599 | struct bpf_prog *xdp_prog, |
1600 | void *buf, |
1601 | void *ctx, |
1602 | unsigned int len, |
1603 | unsigned int *xdp_xmit, |
1604 | struct virtnet_rq_stats *stats) |
1605 | { |
1606 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1607 | int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers); |
1608 | struct page *page = virt_to_head_page(x: buf); |
1609 | int offset = buf - page_address(page); |
1610 | unsigned int xdp_frags_truesz = 0; |
1611 | struct sk_buff *head_skb; |
1612 | unsigned int frame_sz; |
1613 | struct xdp_buff xdp; |
1614 | void *data; |
1615 | u32 act; |
1616 | int err; |
1617 | |
1618 | data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, frame_sz: &frame_sz, num_buf: &num_buf, page: &page, |
1619 | offset, len: &len, hdr); |
1620 | if (unlikely(!data)) |
1621 | goto err_xdp; |
1622 | |
1623 | err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp: &xdp, buf: data, len, frame_sz, |
1624 | num_buf: &num_buf, xdp_frags_truesize: &xdp_frags_truesz, stats); |
1625 | if (unlikely(err)) |
1626 | goto err_xdp; |
1627 | |
1628 | act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats); |
1629 | |
1630 | switch (act) { |
1631 | case XDP_PASS: |
1632 | head_skb = build_skb_from_xdp_buff(dev, vi, xdp: &xdp, xdp_frags_truesz); |
1633 | if (unlikely(!head_skb)) |
1634 | break; |
1635 | return head_skb; |
1636 | |
1637 | case XDP_TX: |
1638 | case XDP_REDIRECT: |
1639 | return NULL; |
1640 | |
1641 | default: |
1642 | break; |
1643 | } |
1644 | |
1645 | put_xdp_frags(xdp: &xdp); |
1646 | |
1647 | err_xdp: |
1648 | put_page(page); |
1649 | mergeable_buf_free(rq, num_buf, dev, stats); |
1650 | |
1651 | u64_stats_inc(p: &stats->xdp_drops); |
1652 | u64_stats_inc(p: &stats->drops); |
1653 | return NULL; |
1654 | } |
1655 | |
1656 | static struct sk_buff *receive_mergeable(struct net_device *dev, |
1657 | struct virtnet_info *vi, |
1658 | struct receive_queue *rq, |
1659 | void *buf, |
1660 | void *ctx, |
1661 | unsigned int len, |
1662 | unsigned int *xdp_xmit, |
1663 | struct virtnet_rq_stats *stats) |
1664 | { |
1665 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1666 | int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers); |
1667 | struct page *page = virt_to_head_page(x: buf); |
1668 | int offset = buf - page_address(page); |
1669 | struct sk_buff *head_skb, *curr_skb; |
1670 | unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1671 | unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1672 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1673 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); |
1674 | |
1675 | head_skb = NULL; |
1676 | u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len); |
1677 | |
1678 | if (unlikely(len > truesize - room)) { |
1679 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n", |
1680 | dev->name, len, (unsigned long)(truesize - room)); |
1681 | DEV_STATS_INC(dev, rx_length_errors); |
1682 | goto err_skb; |
1683 | } |
1684 | |
1685 | if (unlikely(vi->xdp_enabled)) { |
1686 | struct bpf_prog *xdp_prog; |
1687 | |
1688 | rcu_read_lock(); |
1689 | xdp_prog = rcu_dereference(rq->xdp_prog); |
1690 | if (xdp_prog) { |
1691 | head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, |
1692 | len, xdp_xmit, stats); |
1693 | rcu_read_unlock(); |
1694 | return head_skb; |
1695 | } |
1696 | rcu_read_unlock(); |
1697 | } |
1698 | |
1699 | head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); |
1700 | curr_skb = head_skb; |
1701 | |
1702 | if (unlikely(!curr_skb)) |
1703 | goto err_skb; |
1704 | while (--num_buf) { |
1705 | int num_skb_frags; |
1706 | |
1707 | buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx); |
1708 | if (unlikely(!buf)) { |
1709 | pr_debug("%s: rx error: %d buffers out of %d missing\n", |
1710 | dev->name, num_buf, |
1711 | virtio16_to_cpu(vi->vdev, |
1712 | hdr->num_buffers)); |
1713 | DEV_STATS_INC(dev, rx_length_errors); |
1714 | goto err_buf; |
1715 | } |
1716 | |
1717 | u64_stats_add(p: &stats->bytes, val: len); |
1718 | page = virt_to_head_page(x: buf); |
1719 | |
1720 | truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1721 | headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1722 | tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1723 | room = SKB_DATA_ALIGN(headroom + tailroom); |
1724 | if (unlikely(len > truesize - room)) { |
1725 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n", |
1726 | dev->name, len, (unsigned long)(truesize - room)); |
1727 | DEV_STATS_INC(dev, rx_length_errors); |
1728 | goto err_skb; |
1729 | } |
1730 | |
1731 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; |
1732 | if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { |
1733 | struct sk_buff *nskb = alloc_skb(size: 0, GFP_ATOMIC); |
1734 | |
1735 | if (unlikely(!nskb)) |
1736 | goto err_skb; |
1737 | if (curr_skb == head_skb) |
1738 | skb_shinfo(curr_skb)->frag_list = nskb; |
1739 | else |
1740 | curr_skb->next = nskb; |
1741 | curr_skb = nskb; |
1742 | head_skb->truesize += nskb->truesize; |
1743 | num_skb_frags = 0; |
1744 | } |
1745 | if (curr_skb != head_skb) { |
1746 | head_skb->data_len += len; |
1747 | head_skb->len += len; |
1748 | head_skb->truesize += truesize; |
1749 | } |
1750 | offset = buf - page_address(page); |
1751 | if (skb_can_coalesce(skb: curr_skb, i: num_skb_frags, page, off: offset)) { |
1752 | put_page(page); |
1753 | skb_coalesce_rx_frag(skb: curr_skb, i: num_skb_frags - 1, |
1754 | size: len, truesize); |
1755 | } else { |
1756 | skb_add_rx_frag(skb: curr_skb, i: num_skb_frags, page, |
1757 | off: offset, size: len, truesize); |
1758 | } |
1759 | } |
1760 | |
1761 | ewma_pkt_len_add(e: &rq->mrg_avg_pkt_len, val: head_skb->len); |
1762 | return head_skb; |
1763 | |
1764 | err_skb: |
1765 | put_page(page); |
1766 | mergeable_buf_free(rq, num_buf, dev, stats); |
1767 | |
1768 | err_buf: |
1769 | u64_stats_inc(p: &stats->drops); |
1770 | dev_kfree_skb(head_skb); |
1771 | return NULL; |
1772 | } |
1773 | |
1774 | static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, |
1775 | struct sk_buff *skb) |
1776 | { |
1777 | enum pkt_hash_types rss_hash_type; |
1778 | |
1779 | if (!hdr_hash || !skb) |
1780 | return; |
1781 | |
1782 | switch (__le16_to_cpu(hdr_hash->hash_report)) { |
1783 | case VIRTIO_NET_HASH_REPORT_TCPv4: |
1784 | case VIRTIO_NET_HASH_REPORT_UDPv4: |
1785 | case VIRTIO_NET_HASH_REPORT_TCPv6: |
1786 | case VIRTIO_NET_HASH_REPORT_UDPv6: |
1787 | case VIRTIO_NET_HASH_REPORT_TCPv6_EX: |
1788 | case VIRTIO_NET_HASH_REPORT_UDPv6_EX: |
1789 | rss_hash_type = PKT_HASH_TYPE_L4; |
1790 | break; |
1791 | case VIRTIO_NET_HASH_REPORT_IPv4: |
1792 | case VIRTIO_NET_HASH_REPORT_IPv6: |
1793 | case VIRTIO_NET_HASH_REPORT_IPv6_EX: |
1794 | rss_hash_type = PKT_HASH_TYPE_L3; |
1795 | break; |
1796 | case VIRTIO_NET_HASH_REPORT_NONE: |
1797 | default: |
1798 | rss_hash_type = PKT_HASH_TYPE_NONE; |
1799 | } |
1800 | skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), type: rss_hash_type); |
1801 | } |
1802 | |
1803 | static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, |
1804 | void *buf, unsigned int len, void **ctx, |
1805 | unsigned int *xdp_xmit, |
1806 | struct virtnet_rq_stats *stats) |
1807 | { |
1808 | struct net_device *dev = vi->dev; |
1809 | struct sk_buff *skb; |
1810 | struct virtio_net_common_hdr *hdr; |
1811 | |
1812 | if (unlikely(len < vi->hdr_len + ETH_HLEN)) { |
1813 | pr_debug("%s: short packet %i\n", dev->name, len); |
1814 | DEV_STATS_INC(dev, rx_length_errors); |
1815 | virtnet_rq_free_buf(vi, rq, buf); |
1816 | return; |
1817 | } |
1818 | |
1819 | if (vi->mergeable_rx_bufs) |
1820 | skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, |
1821 | stats); |
1822 | else if (vi->big_packets) |
1823 | skb = receive_big(dev, vi, rq, buf, len, stats); |
1824 | else |
1825 | skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); |
1826 | |
1827 | if (unlikely(!skb)) |
1828 | return; |
1829 | |
1830 | hdr = skb_vnet_common_hdr(skb); |
1831 | if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) |
1832 | virtio_skb_set_hash(hdr_hash: &hdr->hash_v1_hdr, skb); |
1833 | |
1834 | if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) |
1835 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1836 | |
1837 | if (virtio_net_hdr_to_skb(skb, hdr: &hdr->hdr, |
1838 | little_endian: virtio_is_little_endian(vdev: vi->vdev))) { |
1839 | net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", |
1840 | dev->name, hdr->hdr.gso_type, |
1841 | hdr->hdr.gso_size); |
1842 | goto frame_err; |
1843 | } |
1844 | |
1845 | skb_record_rx_queue(skb, rx_queue: vq2rxq(vq: rq->vq)); |
1846 | skb->protocol = eth_type_trans(skb, dev); |
1847 | pr_debug("Receiving skb proto 0x%04x len %i type %i\n", |
1848 | ntohs(skb->protocol), skb->len, skb->pkt_type); |
1849 | |
1850 | napi_gro_receive(napi: &rq->napi, skb); |
1851 | return; |
1852 | |
1853 | frame_err: |
1854 | DEV_STATS_INC(dev, rx_frame_errors); |
1855 | dev_kfree_skb(skb); |
1856 | } |
1857 | |
1858 | /* Unlike mergeable buffers, all buffers are allocated to the |
1859 | * same size, except for the headroom. For this reason we do |
1860 | * not need to use mergeable_len_to_ctx here - it is enough |
1861 | * to store the headroom as the context ignoring the truesize. |
1862 | */ |
1863 | static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, |
1864 | gfp_t gfp) |
1865 | { |
1866 | char *buf; |
1867 | unsigned int xdp_headroom = virtnet_get_headroom(vi); |
1868 | void *ctx = (void *)(unsigned long)xdp_headroom; |
1869 | int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; |
1870 | int err; |
1871 | |
1872 | len = SKB_DATA_ALIGN(len) + |
1873 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1874 | |
1875 | buf = virtnet_rq_alloc(rq, size: len, gfp); |
1876 | if (unlikely(!buf)) |
1877 | return -ENOMEM; |
1878 | |
1879 | virtnet_rq_init_one_sg(rq, buf: buf + VIRTNET_RX_PAD + xdp_headroom, |
1880 | len: vi->hdr_len + GOOD_PACKET_LEN); |
1881 | |
1882 | err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp); |
1883 | if (err < 0) { |
1884 | if (rq->do_dma) |
1885 | virtnet_rq_unmap(rq, buf, len: 0); |
1886 | put_page(page: virt_to_head_page(x: buf)); |
1887 | } |
1888 | |
1889 | return err; |
1890 | } |
1891 | |
1892 | static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, |
1893 | gfp_t gfp) |
1894 | { |
1895 | struct page *first, *list = NULL; |
1896 | char *p; |
1897 | int i, err, offset; |
1898 | |
1899 | sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); |
1900 | |
1901 | /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ |
1902 | for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { |
1903 | first = get_a_page(rq, gfp_mask: gfp); |
1904 | if (!first) { |
1905 | if (list) |
1906 | give_pages(rq, page: list); |
1907 | return -ENOMEM; |
1908 | } |
1909 | sg_set_buf(sg: &rq->sg[i], page_address(first), PAGE_SIZE); |
1910 | |
1911 | /* chain new page in list head to match sg */ |
1912 | first->private = (unsigned long)list; |
1913 | list = first; |
1914 | } |
1915 | |
1916 | first = get_a_page(rq, gfp_mask: gfp); |
1917 | if (!first) { |
1918 | give_pages(rq, page: list); |
1919 | return -ENOMEM; |
1920 | } |
1921 | p = page_address(first); |
1922 | |
1923 | /* rq->sg[0], rq->sg[1] share the same page */ |
1924 | /* a separated rq->sg[0] for header - required in case !any_header_sg */ |
1925 | sg_set_buf(sg: &rq->sg[0], buf: p, buflen: vi->hdr_len); |
1926 | |
1927 | /* rq->sg[1] for data packet, from offset */ |
1928 | offset = sizeof(struct padded_vnet_hdr); |
1929 | sg_set_buf(sg: &rq->sg[1], buf: p + offset, PAGE_SIZE - offset); |
1930 | |
1931 | /* chain first in list head */ |
1932 | first->private = (unsigned long)list; |
1933 | err = virtqueue_add_inbuf(vq: rq->vq, sg: rq->sg, num: vi->big_packets_num_skbfrags + 2, |
1934 | data: first, gfp); |
1935 | if (err < 0) |
1936 | give_pages(rq, page: first); |
1937 | |
1938 | return err; |
1939 | } |
1940 | |
1941 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, |
1942 | struct ewma_pkt_len *avg_pkt_len, |
1943 | unsigned int room) |
1944 | { |
1945 | struct virtnet_info *vi = rq->vq->vdev->priv; |
1946 | const size_t hdr_len = vi->hdr_len; |
1947 | unsigned int len; |
1948 | |
1949 | if (room) |
1950 | return PAGE_SIZE - room; |
1951 | |
1952 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), |
1953 | rq->min_buf_len, PAGE_SIZE - hdr_len); |
1954 | |
1955 | return ALIGN(len, L1_CACHE_BYTES); |
1956 | } |
1957 | |
1958 | static int add_recvbuf_mergeable(struct virtnet_info *vi, |
1959 | struct receive_queue *rq, gfp_t gfp) |
1960 | { |
1961 | struct page_frag *alloc_frag = &rq->alloc_frag; |
1962 | unsigned int headroom = virtnet_get_headroom(vi); |
1963 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1964 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); |
1965 | unsigned int len, hole; |
1966 | void *ctx; |
1967 | char *buf; |
1968 | int err; |
1969 | |
1970 | /* Extra tailroom is needed to satisfy XDP's assumption. This |
1971 | * means rx frags coalescing won't work, but consider we've |
1972 | * disabled GSO for XDP, it won't be a big issue. |
1973 | */ |
1974 | len = get_mergeable_buf_len(rq, avg_pkt_len: &rq->mrg_avg_pkt_len, room); |
1975 | |
1976 | buf = virtnet_rq_alloc(rq, size: len + room, gfp); |
1977 | if (unlikely(!buf)) |
1978 | return -ENOMEM; |
1979 | |
1980 | buf += headroom; /* advance address leaving hole at front of pkt */ |
1981 | hole = alloc_frag->size - alloc_frag->offset; |
1982 | if (hole < len + room) { |
1983 | /* To avoid internal fragmentation, if there is very likely not |
1984 | * enough space for another buffer, add the remaining space to |
1985 | * the current buffer. |
1986 | * XDP core assumes that frame_size of xdp_buff and the length |
1987 | * of the frag are PAGE_SIZE, so we disable the hole mechanism. |
1988 | */ |
1989 | if (!headroom) |
1990 | len += hole; |
1991 | alloc_frag->offset += hole; |
1992 | } |
1993 | |
1994 | virtnet_rq_init_one_sg(rq, buf, len); |
1995 | |
1996 | ctx = mergeable_len_to_ctx(truesize: len + room, headroom); |
1997 | err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp); |
1998 | if (err < 0) { |
1999 | if (rq->do_dma) |
2000 | virtnet_rq_unmap(rq, buf, len: 0); |
2001 | put_page(page: virt_to_head_page(x: buf)); |
2002 | } |
2003 | |
2004 | return err; |
2005 | } |
2006 | |
2007 | /* |
2008 | * Returns false if we couldn't fill entirely (OOM). |
2009 | * |
2010 | * Normally run in the receive path, but can also be run from ndo_open |
2011 | * before we're receiving packets, or from refill_work which is |
2012 | * careful to disable receiving (using napi_disable). |
2013 | */ |
2014 | static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, |
2015 | gfp_t gfp) |
2016 | { |
2017 | int err; |
2018 | bool oom; |
2019 | |
2020 | do { |
2021 | if (vi->mergeable_rx_bufs) |
2022 | err = add_recvbuf_mergeable(vi, rq, gfp); |
2023 | else if (vi->big_packets) |
2024 | err = add_recvbuf_big(vi, rq, gfp); |
2025 | else |
2026 | err = add_recvbuf_small(vi, rq, gfp); |
2027 | |
2028 | oom = err == -ENOMEM; |
2029 | if (err) |
2030 | break; |
2031 | } while (rq->vq->num_free); |
2032 | if (virtqueue_kick_prepare(vq: rq->vq) && virtqueue_notify(vq: rq->vq)) { |
2033 | unsigned long flags; |
2034 | |
2035 | flags = u64_stats_update_begin_irqsave(syncp: &rq->stats.syncp); |
2036 | u64_stats_inc(p: &rq->stats.kicks); |
2037 | u64_stats_update_end_irqrestore(syncp: &rq->stats.syncp, flags); |
2038 | } |
2039 | |
2040 | return !oom; |
2041 | } |
2042 | |
2043 | static void skb_recv_done(struct virtqueue *rvq) |
2044 | { |
2045 | struct virtnet_info *vi = rvq->vdev->priv; |
2046 | struct receive_queue *rq = &vi->rq[vq2rxq(vq: rvq)]; |
2047 | |
2048 | rq->calls++; |
2049 | virtqueue_napi_schedule(napi: &rq->napi, vq: rvq); |
2050 | } |
2051 | |
2052 | static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) |
2053 | { |
2054 | napi_enable(n: napi); |
2055 | |
2056 | /* If all buffers were filled by other side before we napi_enabled, we |
2057 | * won't get another interrupt, so process any outstanding packets now. |
2058 | * Call local_bh_enable after to trigger softIRQ processing. |
2059 | */ |
2060 | local_bh_disable(); |
2061 | virtqueue_napi_schedule(napi, vq); |
2062 | local_bh_enable(); |
2063 | } |
2064 | |
2065 | static void virtnet_napi_tx_enable(struct virtnet_info *vi, |
2066 | struct virtqueue *vq, |
2067 | struct napi_struct *napi) |
2068 | { |
2069 | if (!napi->weight) |
2070 | return; |
2071 | |
2072 | /* Tx napi touches cachelines on the cpu handling tx interrupts. Only |
2073 | * enable the feature if this is likely affine with the transmit path. |
2074 | */ |
2075 | if (!vi->affinity_hint_set) { |
2076 | napi->weight = 0; |
2077 | return; |
2078 | } |
2079 | |
2080 | return virtnet_napi_enable(vq, napi); |
2081 | } |
2082 | |
2083 | static void virtnet_napi_tx_disable(struct napi_struct *napi) |
2084 | { |
2085 | if (napi->weight) |
2086 | napi_disable(n: napi); |
2087 | } |
2088 | |
2089 | static void refill_work(struct work_struct *work) |
2090 | { |
2091 | struct virtnet_info *vi = |
2092 | container_of(work, struct virtnet_info, refill.work); |
2093 | bool still_empty; |
2094 | int i; |
2095 | |
2096 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
2097 | struct receive_queue *rq = &vi->rq[i]; |
2098 | |
2099 | napi_disable(n: &rq->napi); |
2100 | still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); |
2101 | virtnet_napi_enable(vq: rq->vq, napi: &rq->napi); |
2102 | |
2103 | /* In theory, this can happen: if we don't get any buffers in |
2104 | * we will *never* try to fill again. |
2105 | */ |
2106 | if (still_empty) |
2107 | schedule_delayed_work(dwork: &vi->refill, HZ/2); |
2108 | } |
2109 | } |
2110 | |
2111 | static int virtnet_receive(struct receive_queue *rq, int budget, |
2112 | unsigned int *xdp_xmit) |
2113 | { |
2114 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2115 | struct virtnet_rq_stats stats = {}; |
2116 | unsigned int len; |
2117 | int packets = 0; |
2118 | void *buf; |
2119 | int i; |
2120 | |
2121 | if (!vi->big_packets || vi->mergeable_rx_bufs) { |
2122 | void *ctx; |
2123 | |
2124 | while (packets < budget && |
2125 | (buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx))) { |
2126 | receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats: &stats); |
2127 | packets++; |
2128 | } |
2129 | } else { |
2130 | while (packets < budget && |
2131 | (buf = virtnet_rq_get_buf(rq, len: &len, NULL)) != NULL) { |
2132 | receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats: &stats); |
2133 | packets++; |
2134 | } |
2135 | } |
2136 | |
2137 | if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { |
2138 | if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { |
2139 | spin_lock(lock: &vi->refill_lock); |
2140 | if (vi->refill_enabled) |
2141 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2142 | spin_unlock(lock: &vi->refill_lock); |
2143 | } |
2144 | } |
2145 | |
2146 | u64_stats_set(p: &stats.packets, val: packets); |
2147 | u64_stats_update_begin(syncp: &rq->stats.syncp); |
2148 | for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { |
2149 | size_t offset = virtnet_rq_stats_desc[i].offset; |
2150 | u64_stats_t *item, *src; |
2151 | |
2152 | item = (u64_stats_t *)((u8 *)&rq->stats + offset); |
2153 | src = (u64_stats_t *)((u8 *)&stats + offset); |
2154 | u64_stats_add(p: item, val: u64_stats_read(p: src)); |
2155 | } |
2156 | u64_stats_update_end(syncp: &rq->stats.syncp); |
2157 | |
2158 | return packets; |
2159 | } |
2160 | |
2161 | static void virtnet_poll_cleantx(struct receive_queue *rq) |
2162 | { |
2163 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2164 | unsigned int index = vq2rxq(vq: rq->vq); |
2165 | struct send_queue *sq = &vi->sq[index]; |
2166 | struct netdev_queue *txq = netdev_get_tx_queue(dev: vi->dev, index); |
2167 | |
2168 | if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, q: index)) |
2169 | return; |
2170 | |
2171 | if (__netif_tx_trylock(txq)) { |
2172 | if (sq->reset) { |
2173 | __netif_tx_unlock(txq); |
2174 | return; |
2175 | } |
2176 | |
2177 | do { |
2178 | virtqueue_disable_cb(vq: sq->vq); |
2179 | free_old_xmit(sq, in_napi: true); |
2180 | } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); |
2181 | |
2182 | if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) |
2183 | netif_tx_wake_queue(dev_queue: txq); |
2184 | |
2185 | __netif_tx_unlock(txq); |
2186 | } |
2187 | } |
2188 | |
2189 | static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) |
2190 | { |
2191 | struct dim_sample cur_sample = {}; |
2192 | |
2193 | if (!rq->packets_in_napi) |
2194 | return; |
2195 | |
2196 | u64_stats_update_begin(syncp: &rq->stats.syncp); |
2197 | dim_update_sample(event_ctr: rq->calls, |
2198 | packets: u64_stats_read(p: &rq->stats.packets), |
2199 | bytes: u64_stats_read(p: &rq->stats.bytes), |
2200 | s: &cur_sample); |
2201 | u64_stats_update_end(syncp: &rq->stats.syncp); |
2202 | |
2203 | net_dim(dim: &rq->dim, end_sample: cur_sample); |
2204 | rq->packets_in_napi = 0; |
2205 | } |
2206 | |
2207 | static int virtnet_poll(struct napi_struct *napi, int budget) |
2208 | { |
2209 | struct receive_queue *rq = |
2210 | container_of(napi, struct receive_queue, napi); |
2211 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2212 | struct send_queue *sq; |
2213 | unsigned int received; |
2214 | unsigned int xdp_xmit = 0; |
2215 | bool napi_complete; |
2216 | |
2217 | virtnet_poll_cleantx(rq); |
2218 | |
2219 | received = virtnet_receive(rq, budget, xdp_xmit: &xdp_xmit); |
2220 | rq->packets_in_napi += received; |
2221 | |
2222 | if (xdp_xmit & VIRTIO_XDP_REDIR) |
2223 | xdp_do_flush(); |
2224 | |
2225 | /* Out of packets? */ |
2226 | if (received < budget) { |
2227 | napi_complete = virtqueue_napi_complete(napi, vq: rq->vq, processed: received); |
2228 | if (napi_complete && rq->dim_enabled) |
2229 | virtnet_rx_dim_update(vi, rq); |
2230 | } |
2231 | |
2232 | if (xdp_xmit & VIRTIO_XDP_TX) { |
2233 | sq = virtnet_xdp_get_sq(vi); |
2234 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) { |
2235 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
2236 | u64_stats_inc(p: &sq->stats.kicks); |
2237 | u64_stats_update_end(syncp: &sq->stats.syncp); |
2238 | } |
2239 | virtnet_xdp_put_sq(vi, sq); |
2240 | } |
2241 | |
2242 | return received; |
2243 | } |
2244 | |
2245 | static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) |
2246 | { |
2247 | virtnet_napi_tx_disable(napi: &vi->sq[qp_index].napi); |
2248 | napi_disable(n: &vi->rq[qp_index].napi); |
2249 | xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq); |
2250 | } |
2251 | |
2252 | static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) |
2253 | { |
2254 | struct net_device *dev = vi->dev; |
2255 | int err; |
2256 | |
2257 | err = xdp_rxq_info_reg(xdp_rxq: &vi->rq[qp_index].xdp_rxq, dev, queue_index: qp_index, |
2258 | napi_id: vi->rq[qp_index].napi.napi_id); |
2259 | if (err < 0) |
2260 | return err; |
2261 | |
2262 | err = xdp_rxq_info_reg_mem_model(xdp_rxq: &vi->rq[qp_index].xdp_rxq, |
2263 | type: MEM_TYPE_PAGE_SHARED, NULL); |
2264 | if (err < 0) |
2265 | goto err_xdp_reg_mem_model; |
2266 | |
2267 | virtnet_napi_enable(vq: vi->rq[qp_index].vq, napi: &vi->rq[qp_index].napi); |
2268 | virtnet_napi_tx_enable(vi, vq: vi->sq[qp_index].vq, napi: &vi->sq[qp_index].napi); |
2269 | |
2270 | return 0; |
2271 | |
2272 | err_xdp_reg_mem_model: |
2273 | xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq); |
2274 | return err; |
2275 | } |
2276 | |
2277 | static int virtnet_open(struct net_device *dev) |
2278 | { |
2279 | struct virtnet_info *vi = netdev_priv(dev); |
2280 | int i, err; |
2281 | |
2282 | enable_delayed_refill(vi); |
2283 | |
2284 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2285 | if (i < vi->curr_queue_pairs) |
2286 | /* Make sure we have some buffers: if oom use wq. */ |
2287 | if (!try_fill_recv(vi, rq: &vi->rq[i], GFP_KERNEL)) |
2288 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2289 | |
2290 | err = virtnet_enable_queue_pair(vi, qp_index: i); |
2291 | if (err < 0) |
2292 | goto err_enable_qp; |
2293 | } |
2294 | |
2295 | return 0; |
2296 | |
2297 | err_enable_qp: |
2298 | disable_delayed_refill(vi); |
2299 | cancel_delayed_work_sync(dwork: &vi->refill); |
2300 | |
2301 | for (i--; i >= 0; i--) { |
2302 | virtnet_disable_queue_pair(vi, qp_index: i); |
2303 | cancel_work_sync(work: &vi->rq[i].dim.work); |
2304 | } |
2305 | |
2306 | return err; |
2307 | } |
2308 | |
2309 | static int virtnet_poll_tx(struct napi_struct *napi, int budget) |
2310 | { |
2311 | struct send_queue *sq = container_of(napi, struct send_queue, napi); |
2312 | struct virtnet_info *vi = sq->vq->vdev->priv; |
2313 | unsigned int index = vq2txq(vq: sq->vq); |
2314 | struct netdev_queue *txq; |
2315 | int opaque; |
2316 | bool done; |
2317 | |
2318 | if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { |
2319 | /* We don't need to enable cb for XDP */ |
2320 | napi_complete_done(n: napi, work_done: 0); |
2321 | return 0; |
2322 | } |
2323 | |
2324 | txq = netdev_get_tx_queue(dev: vi->dev, index); |
2325 | __netif_tx_lock(txq, raw_smp_processor_id()); |
2326 | virtqueue_disable_cb(vq: sq->vq); |
2327 | free_old_xmit(sq, in_napi: true); |
2328 | |
2329 | if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) |
2330 | netif_tx_wake_queue(dev_queue: txq); |
2331 | |
2332 | opaque = virtqueue_enable_cb_prepare(vq: sq->vq); |
2333 | |
2334 | done = napi_complete_done(n: napi, work_done: 0); |
2335 | |
2336 | if (!done) |
2337 | virtqueue_disable_cb(vq: sq->vq); |
2338 | |
2339 | __netif_tx_unlock(txq); |
2340 | |
2341 | if (done) { |
2342 | if (unlikely(virtqueue_poll(sq->vq, opaque))) { |
2343 | if (napi_schedule_prep(n: napi)) { |
2344 | __netif_tx_lock(txq, raw_smp_processor_id()); |
2345 | virtqueue_disable_cb(vq: sq->vq); |
2346 | __netif_tx_unlock(txq); |
2347 | __napi_schedule(n: napi); |
2348 | } |
2349 | } |
2350 | } |
2351 | |
2352 | return 0; |
2353 | } |
2354 | |
2355 | static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) |
2356 | { |
2357 | struct virtio_net_hdr_mrg_rxbuf *hdr; |
2358 | const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; |
2359 | struct virtnet_info *vi = sq->vq->vdev->priv; |
2360 | int num_sg; |
2361 | unsigned hdr_len = vi->hdr_len; |
2362 | bool can_push; |
2363 | |
2364 | pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); |
2365 | |
2366 | can_push = vi->any_header_sg && |
2367 | !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && |
2368 | !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; |
2369 | /* Even if we can, don't push here yet as this would skew |
2370 | * csum_start offset below. */ |
2371 | if (can_push) |
2372 | hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); |
2373 | else |
2374 | hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; |
2375 | |
2376 | if (virtio_net_hdr_from_skb(skb, hdr: &hdr->hdr, |
2377 | little_endian: virtio_is_little_endian(vdev: vi->vdev), has_data_valid: false, |
2378 | vlan_hlen: 0)) |
2379 | return -EPROTO; |
2380 | |
2381 | if (vi->mergeable_rx_bufs) |
2382 | hdr->num_buffers = 0; |
2383 | |
2384 | sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); |
2385 | if (can_push) { |
2386 | __skb_push(skb, len: hdr_len); |
2387 | num_sg = skb_to_sgvec(skb, sg: sq->sg, offset: 0, len: skb->len); |
2388 | if (unlikely(num_sg < 0)) |
2389 | return num_sg; |
2390 | /* Pull header back to avoid skew in tx bytes calculations. */ |
2391 | __skb_pull(skb, len: hdr_len); |
2392 | } else { |
2393 | sg_set_buf(sg: sq->sg, buf: hdr, buflen: hdr_len); |
2394 | num_sg = skb_to_sgvec(skb, sg: sq->sg + 1, offset: 0, len: skb->len); |
2395 | if (unlikely(num_sg < 0)) |
2396 | return num_sg; |
2397 | num_sg++; |
2398 | } |
2399 | return virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: num_sg, data: skb, GFP_ATOMIC); |
2400 | } |
2401 | |
2402 | static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) |
2403 | { |
2404 | struct virtnet_info *vi = netdev_priv(dev); |
2405 | int qnum = skb_get_queue_mapping(skb); |
2406 | struct send_queue *sq = &vi->sq[qnum]; |
2407 | int err; |
2408 | struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum); |
2409 | bool kick = !netdev_xmit_more(); |
2410 | bool use_napi = sq->napi.weight; |
2411 | |
2412 | /* Free up any pending old buffers before queueing new ones. */ |
2413 | do { |
2414 | if (use_napi) |
2415 | virtqueue_disable_cb(vq: sq->vq); |
2416 | |
2417 | free_old_xmit(sq, in_napi: false); |
2418 | |
2419 | } while (use_napi && kick && |
2420 | unlikely(!virtqueue_enable_cb_delayed(sq->vq))); |
2421 | |
2422 | /* timestamp packet in software */ |
2423 | skb_tx_timestamp(skb); |
2424 | |
2425 | /* Try to transmit */ |
2426 | err = xmit_skb(sq, skb); |
2427 | |
2428 | /* This should not happen! */ |
2429 | if (unlikely(err)) { |
2430 | DEV_STATS_INC(dev, tx_fifo_errors); |
2431 | if (net_ratelimit()) |
2432 | dev_warn(&dev->dev, |
2433 | "Unexpected TXQ (%d) queue failure: %d\n", |
2434 | qnum, err); |
2435 | DEV_STATS_INC(dev, tx_dropped); |
2436 | dev_kfree_skb_any(skb); |
2437 | return NETDEV_TX_OK; |
2438 | } |
2439 | |
2440 | /* Don't wait up for transmitted skbs to be freed. */ |
2441 | if (!use_napi) { |
2442 | skb_orphan(skb); |
2443 | nf_reset_ct(skb); |
2444 | } |
2445 | |
2446 | check_sq_full_and_disable(vi, dev, sq); |
2447 | |
2448 | if (kick || netif_xmit_stopped(dev_queue: txq)) { |
2449 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) { |
2450 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
2451 | u64_stats_inc(p: &sq->stats.kicks); |
2452 | u64_stats_update_end(syncp: &sq->stats.syncp); |
2453 | } |
2454 | } |
2455 | |
2456 | return NETDEV_TX_OK; |
2457 | } |
2458 | |
2459 | static int virtnet_rx_resize(struct virtnet_info *vi, |
2460 | struct receive_queue *rq, u32 ring_num) |
2461 | { |
2462 | bool running = netif_running(dev: vi->dev); |
2463 | int err, qindex; |
2464 | |
2465 | qindex = rq - vi->rq; |
2466 | |
2467 | if (running) { |
2468 | napi_disable(n: &rq->napi); |
2469 | cancel_work_sync(work: &rq->dim.work); |
2470 | } |
2471 | |
2472 | err = virtqueue_resize(vq: rq->vq, num: ring_num, recycle: virtnet_rq_unmap_free_buf); |
2473 | if (err) |
2474 | netdev_err(dev: vi->dev, format: "resize rx fail: rx queue index: %d err: %d\n", qindex, err); |
2475 | |
2476 | if (!try_fill_recv(vi, rq, GFP_KERNEL)) |
2477 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2478 | |
2479 | if (running) |
2480 | virtnet_napi_enable(vq: rq->vq, napi: &rq->napi); |
2481 | return err; |
2482 | } |
2483 | |
2484 | static int virtnet_tx_resize(struct virtnet_info *vi, |
2485 | struct send_queue *sq, u32 ring_num) |
2486 | { |
2487 | bool running = netif_running(dev: vi->dev); |
2488 | struct netdev_queue *txq; |
2489 | int err, qindex; |
2490 | |
2491 | qindex = sq - vi->sq; |
2492 | |
2493 | if (running) |
2494 | virtnet_napi_tx_disable(napi: &sq->napi); |
2495 | |
2496 | txq = netdev_get_tx_queue(dev: vi->dev, index: qindex); |
2497 | |
2498 | /* 1. wait all ximt complete |
2499 | * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() |
2500 | */ |
2501 | __netif_tx_lock_bh(txq); |
2502 | |
2503 | /* Prevent rx poll from accessing sq. */ |
2504 | sq->reset = true; |
2505 | |
2506 | /* Prevent the upper layer from trying to send packets. */ |
2507 | netif_stop_subqueue(dev: vi->dev, queue_index: qindex); |
2508 | |
2509 | __netif_tx_unlock_bh(txq); |
2510 | |
2511 | err = virtqueue_resize(vq: sq->vq, num: ring_num, recycle: virtnet_sq_free_unused_buf); |
2512 | if (err) |
2513 | netdev_err(dev: vi->dev, format: "resize tx fail: tx queue index: %d err: %d\n", qindex, err); |
2514 | |
2515 | __netif_tx_lock_bh(txq); |
2516 | sq->reset = false; |
2517 | netif_tx_wake_queue(dev_queue: txq); |
2518 | __netif_tx_unlock_bh(txq); |
2519 | |
2520 | if (running) |
2521 | virtnet_napi_tx_enable(vi, vq: sq->vq, napi: &sq->napi); |
2522 | return err; |
2523 | } |
2524 | |
2525 | /* |
2526 | * Send command via the control virtqueue and check status. Commands |
2527 | * supported by the hypervisor, as indicated by feature bits, should |
2528 | * never fail unless improperly formatted. |
2529 | */ |
2530 | static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, |
2531 | struct scatterlist *out) |
2532 | { |
2533 | struct scatterlist *sgs[4], hdr, stat; |
2534 | unsigned out_num = 0, tmp; |
2535 | int ret; |
2536 | |
2537 | /* Caller should know better */ |
2538 | BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); |
2539 | |
2540 | vi->ctrl->status = ~0; |
2541 | vi->ctrl->hdr.class = class; |
2542 | vi->ctrl->hdr.cmd = cmd; |
2543 | /* Add header */ |
2544 | sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); |
2545 | sgs[out_num++] = &hdr; |
2546 | |
2547 | if (out) |
2548 | sgs[out_num++] = out; |
2549 | |
2550 | /* Add return status. */ |
2551 | sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); |
2552 | sgs[out_num] = &stat; |
2553 | |
2554 | BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); |
2555 | ret = virtqueue_add_sgs(vq: vi->cvq, sgs, out_sgs: out_num, in_sgs: 1, data: vi, GFP_ATOMIC); |
2556 | if (ret < 0) { |
2557 | dev_warn(&vi->vdev->dev, |
2558 | "Failed to add sgs for command vq: %d\n.", ret); |
2559 | return false; |
2560 | } |
2561 | |
2562 | if (unlikely(!virtqueue_kick(vi->cvq))) |
2563 | return vi->ctrl->status == VIRTIO_NET_OK; |
2564 | |
2565 | /* Spin for a response, the kick causes an ioport write, trapping |
2566 | * into the hypervisor, so the request should be handled immediately. |
2567 | */ |
2568 | while (!virtqueue_get_buf(vq: vi->cvq, len: &tmp) && |
2569 | !virtqueue_is_broken(vq: vi->cvq)) { |
2570 | cond_resched(); |
2571 | cpu_relax(); |
2572 | } |
2573 | |
2574 | return vi->ctrl->status == VIRTIO_NET_OK; |
2575 | } |
2576 | |
2577 | static int virtnet_set_mac_address(struct net_device *dev, void *p) |
2578 | { |
2579 | struct virtnet_info *vi = netdev_priv(dev); |
2580 | struct virtio_device *vdev = vi->vdev; |
2581 | int ret; |
2582 | struct sockaddr *addr; |
2583 | struct scatterlist sg; |
2584 | |
2585 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY)) |
2586 | return -EOPNOTSUPP; |
2587 | |
2588 | addr = kmemdup(p, size: sizeof(*addr), GFP_KERNEL); |
2589 | if (!addr) |
2590 | return -ENOMEM; |
2591 | |
2592 | ret = eth_prepare_mac_addr_change(dev, p: addr); |
2593 | if (ret) |
2594 | goto out; |
2595 | |
2596 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { |
2597 | sg_init_one(&sg, addr->sa_data, dev->addr_len); |
2598 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
2599 | VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) { |
2600 | dev_warn(&vdev->dev, |
2601 | "Failed to set mac address by vq command.\n"); |
2602 | ret = -EINVAL; |
2603 | goto out; |
2604 | } |
2605 | } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && |
2606 | !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { |
2607 | unsigned int i; |
2608 | |
2609 | /* Naturally, this has an atomicity problem. */ |
2610 | for (i = 0; i < dev->addr_len; i++) |
2611 | virtio_cwrite8(vdev, |
2612 | offsetof(struct virtio_net_config, mac) + |
2613 | i, val: addr->sa_data[i]); |
2614 | } |
2615 | |
2616 | eth_commit_mac_addr_change(dev, p); |
2617 | ret = 0; |
2618 | |
2619 | out: |
2620 | kfree(objp: addr); |
2621 | return ret; |
2622 | } |
2623 | |
2624 | static void virtnet_stats(struct net_device *dev, |
2625 | struct rtnl_link_stats64 *tot) |
2626 | { |
2627 | struct virtnet_info *vi = netdev_priv(dev); |
2628 | unsigned int start; |
2629 | int i; |
2630 | |
2631 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2632 | u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; |
2633 | struct receive_queue *rq = &vi->rq[i]; |
2634 | struct send_queue *sq = &vi->sq[i]; |
2635 | |
2636 | do { |
2637 | start = u64_stats_fetch_begin(syncp: &sq->stats.syncp); |
2638 | tpackets = u64_stats_read(p: &sq->stats.packets); |
2639 | tbytes = u64_stats_read(p: &sq->stats.bytes); |
2640 | terrors = u64_stats_read(p: &sq->stats.tx_timeouts); |
2641 | } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start)); |
2642 | |
2643 | do { |
2644 | start = u64_stats_fetch_begin(syncp: &rq->stats.syncp); |
2645 | rpackets = u64_stats_read(p: &rq->stats.packets); |
2646 | rbytes = u64_stats_read(p: &rq->stats.bytes); |
2647 | rdrops = u64_stats_read(p: &rq->stats.drops); |
2648 | } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start)); |
2649 | |
2650 | tot->rx_packets += rpackets; |
2651 | tot->tx_packets += tpackets; |
2652 | tot->rx_bytes += rbytes; |
2653 | tot->tx_bytes += tbytes; |
2654 | tot->rx_dropped += rdrops; |
2655 | tot->tx_errors += terrors; |
2656 | } |
2657 | |
2658 | tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); |
2659 | tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); |
2660 | tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); |
2661 | tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); |
2662 | } |
2663 | |
2664 | static void virtnet_ack_link_announce(struct virtnet_info *vi) |
2665 | { |
2666 | rtnl_lock(); |
2667 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, |
2668 | VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) |
2669 | dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); |
2670 | rtnl_unlock(); |
2671 | } |
2672 | |
2673 | static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) |
2674 | { |
2675 | struct scatterlist sg; |
2676 | struct net_device *dev = vi->dev; |
2677 | |
2678 | if (!vi->has_cvq || !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_MQ)) |
2679 | return 0; |
2680 | |
2681 | vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vdev: vi->vdev, val: queue_pairs); |
2682 | sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); |
2683 | |
2684 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, |
2685 | VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, out: &sg)) { |
2686 | dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", |
2687 | queue_pairs); |
2688 | return -EINVAL; |
2689 | } else { |
2690 | vi->curr_queue_pairs = queue_pairs; |
2691 | /* virtnet_open() will refill when device is going to up. */ |
2692 | if (dev->flags & IFF_UP) |
2693 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2694 | } |
2695 | |
2696 | return 0; |
2697 | } |
2698 | |
2699 | static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) |
2700 | { |
2701 | int err; |
2702 | |
2703 | rtnl_lock(); |
2704 | err = _virtnet_set_queues(vi, queue_pairs); |
2705 | rtnl_unlock(); |
2706 | return err; |
2707 | } |
2708 | |
2709 | static int virtnet_close(struct net_device *dev) |
2710 | { |
2711 | struct virtnet_info *vi = netdev_priv(dev); |
2712 | int i; |
2713 | |
2714 | /* Make sure NAPI doesn't schedule refill work */ |
2715 | disable_delayed_refill(vi); |
2716 | /* Make sure refill_work doesn't re-enable napi! */ |
2717 | cancel_delayed_work_sync(dwork: &vi->refill); |
2718 | |
2719 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2720 | virtnet_disable_queue_pair(vi, qp_index: i); |
2721 | cancel_work_sync(work: &vi->rq[i].dim.work); |
2722 | } |
2723 | |
2724 | return 0; |
2725 | } |
2726 | |
2727 | static void virtnet_rx_mode_work(struct work_struct *work) |
2728 | { |
2729 | struct virtnet_info *vi = |
2730 | container_of(work, struct virtnet_info, rx_mode_work); |
2731 | struct net_device *dev = vi->dev; |
2732 | struct scatterlist sg[2]; |
2733 | struct virtio_net_ctrl_mac *mac_data; |
2734 | struct netdev_hw_addr *ha; |
2735 | int uc_count; |
2736 | int mc_count; |
2737 | void *buf; |
2738 | int i; |
2739 | |
2740 | /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ |
2741 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_RX)) |
2742 | return; |
2743 | |
2744 | rtnl_lock(); |
2745 | |
2746 | vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); |
2747 | vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); |
2748 | |
2749 | sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); |
2750 | |
2751 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
2752 | VIRTIO_NET_CTRL_RX_PROMISC, out: sg)) |
2753 | dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", |
2754 | vi->ctrl->promisc ? "en": "dis"); |
2755 | |
2756 | sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); |
2757 | |
2758 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
2759 | VIRTIO_NET_CTRL_RX_ALLMULTI, out: sg)) |
2760 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", |
2761 | vi->ctrl->allmulti ? "en": "dis"); |
2762 | |
2763 | netif_addr_lock_bh(dev); |
2764 | |
2765 | uc_count = netdev_uc_count(dev); |
2766 | mc_count = netdev_mc_count(dev); |
2767 | /* MAC filter - use one buffer for both lists */ |
2768 | buf = kzalloc(size: ((uc_count + mc_count) * ETH_ALEN) + |
2769 | (2 * sizeof(mac_data->entries)), GFP_ATOMIC); |
2770 | mac_data = buf; |
2771 | if (!buf) { |
2772 | netif_addr_unlock_bh(dev); |
2773 | rtnl_unlock(); |
2774 | return; |
2775 | } |
2776 | |
2777 | sg_init_table(sg, 2); |
2778 | |
2779 | /* Store the unicast list and count in the front of the buffer */ |
2780 | mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: uc_count); |
2781 | i = 0; |
2782 | netdev_for_each_uc_addr(ha, dev) |
2783 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); |
2784 | |
2785 | sg_set_buf(sg: &sg[0], buf: mac_data, |
2786 | buflen: sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); |
2787 | |
2788 | /* multicast list and count fill the end */ |
2789 | mac_data = (void *)&mac_data->macs[uc_count][0]; |
2790 | |
2791 | mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: mc_count); |
2792 | i = 0; |
2793 | netdev_for_each_mc_addr(ha, dev) |
2794 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); |
2795 | |
2796 | netif_addr_unlock_bh(dev); |
2797 | |
2798 | sg_set_buf(sg: &sg[1], buf: mac_data, |
2799 | buflen: sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); |
2800 | |
2801 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
2802 | VIRTIO_NET_CTRL_MAC_TABLE_SET, out: sg)) |
2803 | dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); |
2804 | |
2805 | rtnl_unlock(); |
2806 | |
2807 | kfree(objp: buf); |
2808 | } |
2809 | |
2810 | static void virtnet_set_rx_mode(struct net_device *dev) |
2811 | { |
2812 | struct virtnet_info *vi = netdev_priv(dev); |
2813 | |
2814 | if (vi->rx_mode_work_enabled) |
2815 | schedule_work(work: &vi->rx_mode_work); |
2816 | } |
2817 | |
2818 | static int virtnet_vlan_rx_add_vid(struct net_device *dev, |
2819 | __be16 proto, u16 vid) |
2820 | { |
2821 | struct virtnet_info *vi = netdev_priv(dev); |
2822 | struct scatterlist sg; |
2823 | |
2824 | vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid); |
2825 | sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); |
2826 | |
2827 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
2828 | VIRTIO_NET_CTRL_VLAN_ADD, out: &sg)) |
2829 | dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); |
2830 | return 0; |
2831 | } |
2832 | |
2833 | static int virtnet_vlan_rx_kill_vid(struct net_device *dev, |
2834 | __be16 proto, u16 vid) |
2835 | { |
2836 | struct virtnet_info *vi = netdev_priv(dev); |
2837 | struct scatterlist sg; |
2838 | |
2839 | vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid); |
2840 | sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); |
2841 | |
2842 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
2843 | VIRTIO_NET_CTRL_VLAN_DEL, out: &sg)) |
2844 | dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); |
2845 | return 0; |
2846 | } |
2847 | |
2848 | static void virtnet_clean_affinity(struct virtnet_info *vi) |
2849 | { |
2850 | int i; |
2851 | |
2852 | if (vi->affinity_hint_set) { |
2853 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2854 | virtqueue_set_affinity(vq: vi->rq[i].vq, NULL); |
2855 | virtqueue_set_affinity(vq: vi->sq[i].vq, NULL); |
2856 | } |
2857 | |
2858 | vi->affinity_hint_set = false; |
2859 | } |
2860 | } |
2861 | |
2862 | static void virtnet_set_affinity(struct virtnet_info *vi) |
2863 | { |
2864 | cpumask_var_t mask; |
2865 | int stragglers; |
2866 | int group_size; |
2867 | int i, j, cpu; |
2868 | int num_cpu; |
2869 | int stride; |
2870 | |
2871 | if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) { |
2872 | virtnet_clean_affinity(vi); |
2873 | return; |
2874 | } |
2875 | |
2876 | num_cpu = num_online_cpus(); |
2877 | stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); |
2878 | stragglers = num_cpu >= vi->curr_queue_pairs ? |
2879 | num_cpu % vi->curr_queue_pairs : |
2880 | 0; |
2881 | cpu = cpumask_first(cpu_online_mask); |
2882 | |
2883 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
2884 | group_size = stride + (i < stragglers ? 1 : 0); |
2885 | |
2886 | for (j = 0; j < group_size; j++) { |
2887 | cpumask_set_cpu(cpu, dstp: mask); |
2888 | cpu = cpumask_next_wrap(n: cpu, cpu_online_mask, |
2889 | start: nr_cpu_ids, wrap: false); |
2890 | } |
2891 | virtqueue_set_affinity(vq: vi->rq[i].vq, cpu_mask: mask); |
2892 | virtqueue_set_affinity(vq: vi->sq[i].vq, cpu_mask: mask); |
2893 | __netif_set_xps_queue(dev: vi->dev, cpumask_bits(mask), index: i, type: XPS_CPUS); |
2894 | cpumask_clear(dstp: mask); |
2895 | } |
2896 | |
2897 | vi->affinity_hint_set = true; |
2898 | free_cpumask_var(mask); |
2899 | } |
2900 | |
2901 | static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) |
2902 | { |
2903 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2904 | node); |
2905 | virtnet_set_affinity(vi); |
2906 | return 0; |
2907 | } |
2908 | |
2909 | static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) |
2910 | { |
2911 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2912 | node_dead); |
2913 | virtnet_set_affinity(vi); |
2914 | return 0; |
2915 | } |
2916 | |
2917 | static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) |
2918 | { |
2919 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2920 | node); |
2921 | |
2922 | virtnet_clean_affinity(vi); |
2923 | return 0; |
2924 | } |
2925 | |
2926 | static enum cpuhp_state virtionet_online; |
2927 | |
2928 | static int virtnet_cpu_notif_add(struct virtnet_info *vi) |
2929 | { |
2930 | int ret; |
2931 | |
2932 | ret = cpuhp_state_add_instance_nocalls(state: virtionet_online, node: &vi->node); |
2933 | if (ret) |
2934 | return ret; |
2935 | ret = cpuhp_state_add_instance_nocalls(state: CPUHP_VIRT_NET_DEAD, |
2936 | node: &vi->node_dead); |
2937 | if (!ret) |
2938 | return ret; |
2939 | cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node); |
2940 | return ret; |
2941 | } |
2942 | |
2943 | static void virtnet_cpu_notif_remove(struct virtnet_info *vi) |
2944 | { |
2945 | cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node); |
2946 | cpuhp_state_remove_instance_nocalls(state: CPUHP_VIRT_NET_DEAD, |
2947 | node: &vi->node_dead); |
2948 | } |
2949 | |
2950 | static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, |
2951 | u16 vqn, u32 max_usecs, u32 max_packets) |
2952 | { |
2953 | struct scatterlist sgs; |
2954 | |
2955 | vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn); |
2956 | vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs); |
2957 | vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets); |
2958 | sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq)); |
2959 | |
2960 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
2961 | VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, |
2962 | out: &sgs)) |
2963 | return -EINVAL; |
2964 | |
2965 | return 0; |
2966 | } |
2967 | |
2968 | static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, |
2969 | u16 queue, u32 max_usecs, |
2970 | u32 max_packets) |
2971 | { |
2972 | int err; |
2973 | |
2974 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: queue), |
2975 | max_usecs, max_packets); |
2976 | if (err) |
2977 | return err; |
2978 | |
2979 | vi->rq[queue].intr_coal.max_usecs = max_usecs; |
2980 | vi->rq[queue].intr_coal.max_packets = max_packets; |
2981 | |
2982 | return 0; |
2983 | } |
2984 | |
2985 | static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, |
2986 | u16 queue, u32 max_usecs, |
2987 | u32 max_packets) |
2988 | { |
2989 | int err; |
2990 | |
2991 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: queue), |
2992 | max_usecs, max_packets); |
2993 | if (err) |
2994 | return err; |
2995 | |
2996 | vi->sq[queue].intr_coal.max_usecs = max_usecs; |
2997 | vi->sq[queue].intr_coal.max_packets = max_packets; |
2998 | |
2999 | return 0; |
3000 | } |
3001 | |
3002 | static void virtnet_get_ringparam(struct net_device *dev, |
3003 | struct ethtool_ringparam *ring, |
3004 | struct kernel_ethtool_ringparam *kernel_ring, |
3005 | struct netlink_ext_ack *extack) |
3006 | { |
3007 | struct virtnet_info *vi = netdev_priv(dev); |
3008 | |
3009 | ring->rx_max_pending = vi->rq[0].vq->num_max; |
3010 | ring->tx_max_pending = vi->sq[0].vq->num_max; |
3011 | ring->rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq); |
3012 | ring->tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq); |
3013 | } |
3014 | |
3015 | static int virtnet_set_ringparam(struct net_device *dev, |
3016 | struct ethtool_ringparam *ring, |
3017 | struct kernel_ethtool_ringparam *kernel_ring, |
3018 | struct netlink_ext_ack *extack) |
3019 | { |
3020 | struct virtnet_info *vi = netdev_priv(dev); |
3021 | u32 rx_pending, tx_pending; |
3022 | struct receive_queue *rq; |
3023 | struct send_queue *sq; |
3024 | int i, err; |
3025 | |
3026 | if (ring->rx_mini_pending || ring->rx_jumbo_pending) |
3027 | return -EINVAL; |
3028 | |
3029 | rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq); |
3030 | tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq); |
3031 | |
3032 | if (ring->rx_pending == rx_pending && |
3033 | ring->tx_pending == tx_pending) |
3034 | return 0; |
3035 | |
3036 | if (ring->rx_pending > vi->rq[0].vq->num_max) |
3037 | return -EINVAL; |
3038 | |
3039 | if (ring->tx_pending > vi->sq[0].vq->num_max) |
3040 | return -EINVAL; |
3041 | |
3042 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3043 | rq = vi->rq + i; |
3044 | sq = vi->sq + i; |
3045 | |
3046 | if (ring->tx_pending != tx_pending) { |
3047 | err = virtnet_tx_resize(vi, sq, ring_num: ring->tx_pending); |
3048 | if (err) |
3049 | return err; |
3050 | |
3051 | /* Upon disabling and re-enabling a transmit virtqueue, the device must |
3052 | * set the coalescing parameters of the virtqueue to those configured |
3053 | * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver |
3054 | * did not set any TX coalescing parameters, to 0. |
3055 | */ |
3056 | err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue: i, |
3057 | max_usecs: vi->intr_coal_tx.max_usecs, |
3058 | max_packets: vi->intr_coal_tx.max_packets); |
3059 | if (err) |
3060 | return err; |
3061 | } |
3062 | |
3063 | if (ring->rx_pending != rx_pending) { |
3064 | err = virtnet_rx_resize(vi, rq, ring_num: ring->rx_pending); |
3065 | if (err) |
3066 | return err; |
3067 | |
3068 | /* The reason is same as the transmit virtqueue reset */ |
3069 | err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: i, |
3070 | max_usecs: vi->intr_coal_rx.max_usecs, |
3071 | max_packets: vi->intr_coal_rx.max_packets); |
3072 | if (err) |
3073 | return err; |
3074 | } |
3075 | } |
3076 | |
3077 | return 0; |
3078 | } |
3079 | |
3080 | static bool virtnet_commit_rss_command(struct virtnet_info *vi) |
3081 | { |
3082 | struct net_device *dev = vi->dev; |
3083 | struct scatterlist sgs[4]; |
3084 | unsigned int sg_buf_size; |
3085 | |
3086 | /* prepare sgs */ |
3087 | sg_init_table(sgs, 4); |
3088 | |
3089 | sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); |
3090 | sg_set_buf(sg: &sgs[0], buf: &vi->ctrl->rss, buflen: sg_buf_size); |
3091 | |
3092 | sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); |
3093 | sg_set_buf(sg: &sgs[1], buf: vi->ctrl->rss.indirection_table, buflen: sg_buf_size); |
3094 | |
3095 | sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) |
3096 | - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); |
3097 | sg_set_buf(sg: &sgs[2], buf: &vi->ctrl->rss.max_tx_vq, buflen: sg_buf_size); |
3098 | |
3099 | sg_buf_size = vi->rss_key_size; |
3100 | sg_set_buf(sg: &sgs[3], buf: vi->ctrl->rss.key, buflen: sg_buf_size); |
3101 | |
3102 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, |
3103 | cmd: vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG |
3104 | : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, out: sgs)) { |
3105 | dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); |
3106 | return false; |
3107 | } |
3108 | return true; |
3109 | } |
3110 | |
3111 | static void virtnet_init_default_rss(struct virtnet_info *vi) |
3112 | { |
3113 | u32 indir_val = 0; |
3114 | int i = 0; |
3115 | |
3116 | vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; |
3117 | vi->rss_hash_types_saved = vi->rss_hash_types_supported; |
3118 | vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size |
3119 | ? vi->rss_indir_table_size - 1 : 0; |
3120 | vi->ctrl->rss.unclassified_queue = 0; |
3121 | |
3122 | for (; i < vi->rss_indir_table_size; ++i) { |
3123 | indir_val = ethtool_rxfh_indir_default(index: i, n_rx_rings: vi->curr_queue_pairs); |
3124 | vi->ctrl->rss.indirection_table[i] = indir_val; |
3125 | } |
3126 | |
3127 | vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; |
3128 | vi->ctrl->rss.hash_key_length = vi->rss_key_size; |
3129 | |
3130 | netdev_rss_key_fill(buffer: vi->ctrl->rss.key, len: vi->rss_key_size); |
3131 | } |
3132 | |
3133 | static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) |
3134 | { |
3135 | info->data = 0; |
3136 | switch (info->flow_type) { |
3137 | case TCP_V4_FLOW: |
3138 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { |
3139 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3140 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3141 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { |
3142 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3143 | } |
3144 | break; |
3145 | case TCP_V6_FLOW: |
3146 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { |
3147 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3148 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3149 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { |
3150 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3151 | } |
3152 | break; |
3153 | case UDP_V4_FLOW: |
3154 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { |
3155 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3156 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3157 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { |
3158 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3159 | } |
3160 | break; |
3161 | case UDP_V6_FLOW: |
3162 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { |
3163 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3164 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3165 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { |
3166 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3167 | } |
3168 | break; |
3169 | case IPV4_FLOW: |
3170 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) |
3171 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3172 | |
3173 | break; |
3174 | case IPV6_FLOW: |
3175 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) |
3176 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3177 | |
3178 | break; |
3179 | default: |
3180 | info->data = 0; |
3181 | break; |
3182 | } |
3183 | } |
3184 | |
3185 | static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) |
3186 | { |
3187 | u32 new_hashtypes = vi->rss_hash_types_saved; |
3188 | bool is_disable = info->data & RXH_DISCARD; |
3189 | bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); |
3190 | |
3191 | /* supports only 'sd', 'sdfn' and 'r' */ |
3192 | if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) |
3193 | return false; |
3194 | |
3195 | switch (info->flow_type) { |
3196 | case TCP_V4_FLOW: |
3197 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); |
3198 | if (!is_disable) |
3199 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 |
3200 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); |
3201 | break; |
3202 | case UDP_V4_FLOW: |
3203 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); |
3204 | if (!is_disable) |
3205 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 |
3206 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); |
3207 | break; |
3208 | case IPV4_FLOW: |
3209 | new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; |
3210 | if (!is_disable) |
3211 | new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; |
3212 | break; |
3213 | case TCP_V6_FLOW: |
3214 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); |
3215 | if (!is_disable) |
3216 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 |
3217 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); |
3218 | break; |
3219 | case UDP_V6_FLOW: |
3220 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); |
3221 | if (!is_disable) |
3222 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 |
3223 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); |
3224 | break; |
3225 | case IPV6_FLOW: |
3226 | new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; |
3227 | if (!is_disable) |
3228 | new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; |
3229 | break; |
3230 | default: |
3231 | /* unsupported flow */ |
3232 | return false; |
3233 | } |
3234 | |
3235 | /* if unsupported hashtype was set */ |
3236 | if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) |
3237 | return false; |
3238 | |
3239 | if (new_hashtypes != vi->rss_hash_types_saved) { |
3240 | vi->rss_hash_types_saved = new_hashtypes; |
3241 | vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; |
3242 | if (vi->dev->features & NETIF_F_RXHASH) |
3243 | return virtnet_commit_rss_command(vi); |
3244 | } |
3245 | |
3246 | return true; |
3247 | } |
3248 | |
3249 | static void virtnet_get_drvinfo(struct net_device *dev, |
3250 | struct ethtool_drvinfo *info) |
3251 | { |
3252 | struct virtnet_info *vi = netdev_priv(dev); |
3253 | struct virtio_device *vdev = vi->vdev; |
3254 | |
3255 | strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); |
3256 | strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); |
3257 | strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); |
3258 | |
3259 | } |
3260 | |
3261 | /* TODO: Eliminate OOO packets during switching */ |
3262 | static int virtnet_set_channels(struct net_device *dev, |
3263 | struct ethtool_channels *channels) |
3264 | { |
3265 | struct virtnet_info *vi = netdev_priv(dev); |
3266 | u16 queue_pairs = channels->combined_count; |
3267 | int err; |
3268 | |
3269 | /* We don't support separate rx/tx channels. |
3270 | * We don't allow setting 'other' channels. |
3271 | */ |
3272 | if (channels->rx_count || channels->tx_count || channels->other_count) |
3273 | return -EINVAL; |
3274 | |
3275 | if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) |
3276 | return -EINVAL; |
3277 | |
3278 | /* For now we don't support modifying channels while XDP is loaded |
3279 | * also when XDP is loaded all RX queues have XDP programs so we only |
3280 | * need to check a single RX queue. |
3281 | */ |
3282 | if (vi->rq[0].xdp_prog) |
3283 | return -EINVAL; |
3284 | |
3285 | cpus_read_lock(); |
3286 | err = _virtnet_set_queues(vi, queue_pairs); |
3287 | if (err) { |
3288 | cpus_read_unlock(); |
3289 | goto err; |
3290 | } |
3291 | virtnet_set_affinity(vi); |
3292 | cpus_read_unlock(); |
3293 | |
3294 | netif_set_real_num_tx_queues(dev, txq: queue_pairs); |
3295 | netif_set_real_num_rx_queues(dev, rxq: queue_pairs); |
3296 | err: |
3297 | return err; |
3298 | } |
3299 | |
3300 | static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) |
3301 | { |
3302 | struct virtnet_info *vi = netdev_priv(dev); |
3303 | unsigned int i, j; |
3304 | u8 *p = data; |
3305 | |
3306 | switch (stringset) { |
3307 | case ETH_SS_STATS: |
3308 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3309 | for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) |
3310 | ethtool_sprintf(data: &p, fmt: "rx_queue_%u_%s", i, |
3311 | virtnet_rq_stats_desc[j].desc); |
3312 | } |
3313 | |
3314 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3315 | for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) |
3316 | ethtool_sprintf(data: &p, fmt: "tx_queue_%u_%s", i, |
3317 | virtnet_sq_stats_desc[j].desc); |
3318 | } |
3319 | break; |
3320 | } |
3321 | } |
3322 | |
3323 | static int virtnet_get_sset_count(struct net_device *dev, int sset) |
3324 | { |
3325 | struct virtnet_info *vi = netdev_priv(dev); |
3326 | |
3327 | switch (sset) { |
3328 | case ETH_SS_STATS: |
3329 | return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + |
3330 | VIRTNET_SQ_STATS_LEN); |
3331 | default: |
3332 | return -EOPNOTSUPP; |
3333 | } |
3334 | } |
3335 | |
3336 | static void virtnet_get_ethtool_stats(struct net_device *dev, |
3337 | struct ethtool_stats *stats, u64 *data) |
3338 | { |
3339 | struct virtnet_info *vi = netdev_priv(dev); |
3340 | unsigned int idx = 0, start, i, j; |
3341 | const u8 *stats_base; |
3342 | const u64_stats_t *p; |
3343 | size_t offset; |
3344 | |
3345 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3346 | struct receive_queue *rq = &vi->rq[i]; |
3347 | |
3348 | stats_base = (const u8 *)&rq->stats; |
3349 | do { |
3350 | start = u64_stats_fetch_begin(syncp: &rq->stats.syncp); |
3351 | for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { |
3352 | offset = virtnet_rq_stats_desc[j].offset; |
3353 | p = (const u64_stats_t *)(stats_base + offset); |
3354 | data[idx + j] = u64_stats_read(p); |
3355 | } |
3356 | } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start)); |
3357 | idx += VIRTNET_RQ_STATS_LEN; |
3358 | } |
3359 | |
3360 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3361 | struct send_queue *sq = &vi->sq[i]; |
3362 | |
3363 | stats_base = (const u8 *)&sq->stats; |
3364 | do { |
3365 | start = u64_stats_fetch_begin(syncp: &sq->stats.syncp); |
3366 | for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { |
3367 | offset = virtnet_sq_stats_desc[j].offset; |
3368 | p = (const u64_stats_t *)(stats_base + offset); |
3369 | data[idx + j] = u64_stats_read(p); |
3370 | } |
3371 | } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start)); |
3372 | idx += VIRTNET_SQ_STATS_LEN; |
3373 | } |
3374 | } |
3375 | |
3376 | static void virtnet_get_channels(struct net_device *dev, |
3377 | struct ethtool_channels *channels) |
3378 | { |
3379 | struct virtnet_info *vi = netdev_priv(dev); |
3380 | |
3381 | channels->combined_count = vi->curr_queue_pairs; |
3382 | channels->max_combined = vi->max_queue_pairs; |
3383 | channels->max_other = 0; |
3384 | channels->rx_count = 0; |
3385 | channels->tx_count = 0; |
3386 | channels->other_count = 0; |
3387 | } |
3388 | |
3389 | static int virtnet_set_link_ksettings(struct net_device *dev, |
3390 | const struct ethtool_link_ksettings *cmd) |
3391 | { |
3392 | struct virtnet_info *vi = netdev_priv(dev); |
3393 | |
3394 | return ethtool_virtdev_set_link_ksettings(dev, cmd, |
3395 | dev_speed: &vi->speed, dev_duplex: &vi->duplex); |
3396 | } |
3397 | |
3398 | static int virtnet_get_link_ksettings(struct net_device *dev, |
3399 | struct ethtool_link_ksettings *cmd) |
3400 | { |
3401 | struct virtnet_info *vi = netdev_priv(dev); |
3402 | |
3403 | cmd->base.speed = vi->speed; |
3404 | cmd->base.duplex = vi->duplex; |
3405 | cmd->base.port = PORT_OTHER; |
3406 | |
3407 | return 0; |
3408 | } |
3409 | |
3410 | static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, |
3411 | struct ethtool_coalesce *ec) |
3412 | { |
3413 | struct scatterlist sgs_tx; |
3414 | int i; |
3415 | |
3416 | vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); |
3417 | vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); |
3418 | sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx)); |
3419 | |
3420 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
3421 | VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, |
3422 | out: &sgs_tx)) |
3423 | return -EINVAL; |
3424 | |
3425 | vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; |
3426 | vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; |
3427 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3428 | vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; |
3429 | vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; |
3430 | } |
3431 | |
3432 | return 0; |
3433 | } |
3434 | |
3435 | static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, |
3436 | struct ethtool_coalesce *ec) |
3437 | { |
3438 | bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; |
3439 | struct scatterlist sgs_rx; |
3440 | int i; |
3441 | |
3442 | if (rx_ctrl_dim_on && !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) |
3443 | return -EOPNOTSUPP; |
3444 | |
3445 | if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || |
3446 | ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) |
3447 | return -EINVAL; |
3448 | |
3449 | if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { |
3450 | vi->rx_dim_enabled = true; |
3451 | for (i = 0; i < vi->max_queue_pairs; i++) |
3452 | vi->rq[i].dim_enabled = true; |
3453 | return 0; |
3454 | } |
3455 | |
3456 | if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { |
3457 | vi->rx_dim_enabled = false; |
3458 | for (i = 0; i < vi->max_queue_pairs; i++) |
3459 | vi->rq[i].dim_enabled = false; |
3460 | } |
3461 | |
3462 | /* Since the per-queue coalescing params can be set, |
3463 | * we need apply the global new params even if they |
3464 | * are not updated. |
3465 | */ |
3466 | vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); |
3467 | vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); |
3468 | sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx)); |
3469 | |
3470 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
3471 | VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, |
3472 | out: &sgs_rx)) |
3473 | return -EINVAL; |
3474 | |
3475 | vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; |
3476 | vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; |
3477 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3478 | vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; |
3479 | vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; |
3480 | } |
3481 | |
3482 | return 0; |
3483 | } |
3484 | |
3485 | static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, |
3486 | struct ethtool_coalesce *ec) |
3487 | { |
3488 | int err; |
3489 | |
3490 | err = virtnet_send_tx_notf_coal_cmds(vi, ec); |
3491 | if (err) |
3492 | return err; |
3493 | |
3494 | err = virtnet_send_rx_notf_coal_cmds(vi, ec); |
3495 | if (err) |
3496 | return err; |
3497 | |
3498 | return 0; |
3499 | } |
3500 | |
3501 | static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, |
3502 | struct ethtool_coalesce *ec, |
3503 | u16 queue) |
3504 | { |
3505 | bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; |
3506 | bool cur_rx_dim = vi->rq[queue].dim_enabled; |
3507 | u32 max_usecs, max_packets; |
3508 | int err; |
3509 | |
3510 | max_usecs = vi->rq[queue].intr_coal.max_usecs; |
3511 | max_packets = vi->rq[queue].intr_coal.max_packets; |
3512 | |
3513 | if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || |
3514 | ec->rx_max_coalesced_frames != max_packets)) |
3515 | return -EINVAL; |
3516 | |
3517 | if (rx_ctrl_dim_on && !cur_rx_dim) { |
3518 | vi->rq[queue].dim_enabled = true; |
3519 | return 0; |
3520 | } |
3521 | |
3522 | if (!rx_ctrl_dim_on && cur_rx_dim) |
3523 | vi->rq[queue].dim_enabled = false; |
3524 | |
3525 | /* If no params are updated, userspace ethtool will |
3526 | * reject the modification. |
3527 | */ |
3528 | err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, |
3529 | max_usecs: ec->rx_coalesce_usecs, |
3530 | max_packets: ec->rx_max_coalesced_frames); |
3531 | if (err) |
3532 | return err; |
3533 | |
3534 | return 0; |
3535 | } |
3536 | |
3537 | static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, |
3538 | struct ethtool_coalesce *ec, |
3539 | u16 queue) |
3540 | { |
3541 | int err; |
3542 | |
3543 | err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); |
3544 | if (err) |
3545 | return err; |
3546 | |
3547 | err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, |
3548 | max_usecs: ec->tx_coalesce_usecs, |
3549 | max_packets: ec->tx_max_coalesced_frames); |
3550 | if (err) |
3551 | return err; |
3552 | |
3553 | return 0; |
3554 | } |
3555 | |
3556 | static void virtnet_rx_dim_work(struct work_struct *work) |
3557 | { |
3558 | struct dim *dim = container_of(work, struct dim, work); |
3559 | struct receive_queue *rq = container_of(dim, |
3560 | struct receive_queue, dim); |
3561 | struct virtnet_info *vi = rq->vq->vdev->priv; |
3562 | struct net_device *dev = vi->dev; |
3563 | struct dim_cq_moder update_moder; |
3564 | int i, qnum, err; |
3565 | |
3566 | if (!rtnl_trylock()) |
3567 | return; |
3568 | |
3569 | /* Each rxq's work is queued by "net_dim()->schedule_work()" |
3570 | * in response to NAPI traffic changes. Note that dim->profile_ix |
3571 | * for each rxq is updated prior to the queuing action. |
3572 | * So we only need to traverse and update profiles for all rxqs |
3573 | * in the work which is holding rtnl_lock. |
3574 | */ |
3575 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3576 | rq = &vi->rq[i]; |
3577 | dim = &rq->dim; |
3578 | qnum = rq - vi->rq; |
3579 | |
3580 | if (!rq->dim_enabled) |
3581 | continue; |
3582 | |
3583 | update_moder = net_dim_get_rx_moderation(cq_period_mode: dim->mode, ix: dim->profile_ix); |
3584 | if (update_moder.usec != rq->intr_coal.max_usecs || |
3585 | update_moder.pkts != rq->intr_coal.max_packets) { |
3586 | err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue: qnum, |
3587 | max_usecs: update_moder.usec, |
3588 | max_packets: update_moder.pkts); |
3589 | if (err) |
3590 | pr_debug("%s: Failed to send dim parameters on rxq%d\n", |
3591 | dev->name, qnum); |
3592 | dim->state = DIM_START_MEASURE; |
3593 | } |
3594 | } |
3595 | |
3596 | rtnl_unlock(); |
3597 | } |
3598 | |
3599 | static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) |
3600 | { |
3601 | /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL |
3602 | * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. |
3603 | */ |
3604 | if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) |
3605 | return -EOPNOTSUPP; |
3606 | |
3607 | if (ec->tx_max_coalesced_frames > 1 || |
3608 | ec->rx_max_coalesced_frames != 1) |
3609 | return -EINVAL; |
3610 | |
3611 | return 0; |
3612 | } |
3613 | |
3614 | static int virtnet_should_update_vq_weight(int dev_flags, int weight, |
3615 | int vq_weight, bool *should_update) |
3616 | { |
3617 | if (weight ^ vq_weight) { |
3618 | if (dev_flags & IFF_UP) |
3619 | return -EBUSY; |
3620 | *should_update = true; |
3621 | } |
3622 | |
3623 | return 0; |
3624 | } |
3625 | |
3626 | static int virtnet_set_coalesce(struct net_device *dev, |
3627 | struct ethtool_coalesce *ec, |
3628 | struct kernel_ethtool_coalesce *kernel_coal, |
3629 | struct netlink_ext_ack *extack) |
3630 | { |
3631 | struct virtnet_info *vi = netdev_priv(dev); |
3632 | int ret, queue_number, napi_weight; |
3633 | bool update_napi = false; |
3634 | |
3635 | /* Can't change NAPI weight if the link is up */ |
3636 | napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; |
3637 | for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { |
3638 | ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight, |
3639 | vq_weight: vi->sq[queue_number].napi.weight, |
3640 | should_update: &update_napi); |
3641 | if (ret) |
3642 | return ret; |
3643 | |
3644 | if (update_napi) { |
3645 | /* All queues that belong to [queue_number, vi->max_queue_pairs] will be |
3646 | * updated for the sake of simplicity, which might not be necessary |
3647 | */ |
3648 | break; |
3649 | } |
3650 | } |
3651 | |
3652 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) |
3653 | ret = virtnet_send_notf_coal_cmds(vi, ec); |
3654 | else |
3655 | ret = virtnet_coal_params_supported(ec); |
3656 | |
3657 | if (ret) |
3658 | return ret; |
3659 | |
3660 | if (update_napi) { |
3661 | for (; queue_number < vi->max_queue_pairs; queue_number++) |
3662 | vi->sq[queue_number].napi.weight = napi_weight; |
3663 | } |
3664 | |
3665 | return ret; |
3666 | } |
3667 | |
3668 | static int virtnet_get_coalesce(struct net_device *dev, |
3669 | struct ethtool_coalesce *ec, |
3670 | struct kernel_ethtool_coalesce *kernel_coal, |
3671 | struct netlink_ext_ack *extack) |
3672 | { |
3673 | struct virtnet_info *vi = netdev_priv(dev); |
3674 | |
3675 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { |
3676 | ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; |
3677 | ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; |
3678 | ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; |
3679 | ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; |
3680 | ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; |
3681 | } else { |
3682 | ec->rx_max_coalesced_frames = 1; |
3683 | |
3684 | if (vi->sq[0].napi.weight) |
3685 | ec->tx_max_coalesced_frames = 1; |
3686 | } |
3687 | |
3688 | return 0; |
3689 | } |
3690 | |
3691 | static int virtnet_set_per_queue_coalesce(struct net_device *dev, |
3692 | u32 queue, |
3693 | struct ethtool_coalesce *ec) |
3694 | { |
3695 | struct virtnet_info *vi = netdev_priv(dev); |
3696 | int ret, napi_weight; |
3697 | bool update_napi = false; |
3698 | |
3699 | if (queue >= vi->max_queue_pairs) |
3700 | return -EINVAL; |
3701 | |
3702 | /* Can't change NAPI weight if the link is up */ |
3703 | napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; |
3704 | ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight, |
3705 | vq_weight: vi->sq[queue].napi.weight, |
3706 | should_update: &update_napi); |
3707 | if (ret) |
3708 | return ret; |
3709 | |
3710 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) |
3711 | ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); |
3712 | else |
3713 | ret = virtnet_coal_params_supported(ec); |
3714 | |
3715 | if (ret) |
3716 | return ret; |
3717 | |
3718 | if (update_napi) |
3719 | vi->sq[queue].napi.weight = napi_weight; |
3720 | |
3721 | return 0; |
3722 | } |
3723 | |
3724 | static int virtnet_get_per_queue_coalesce(struct net_device *dev, |
3725 | u32 queue, |
3726 | struct ethtool_coalesce *ec) |
3727 | { |
3728 | struct virtnet_info *vi = netdev_priv(dev); |
3729 | |
3730 | if (queue >= vi->max_queue_pairs) |
3731 | return -EINVAL; |
3732 | |
3733 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { |
3734 | ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; |
3735 | ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; |
3736 | ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; |
3737 | ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; |
3738 | ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; |
3739 | } else { |
3740 | ec->rx_max_coalesced_frames = 1; |
3741 | |
3742 | if (vi->sq[queue].napi.weight) |
3743 | ec->tx_max_coalesced_frames = 1; |
3744 | } |
3745 | |
3746 | return 0; |
3747 | } |
3748 | |
3749 | static void virtnet_init_settings(struct net_device *dev) |
3750 | { |
3751 | struct virtnet_info *vi = netdev_priv(dev); |
3752 | |
3753 | vi->speed = SPEED_UNKNOWN; |
3754 | vi->duplex = DUPLEX_UNKNOWN; |
3755 | } |
3756 | |
3757 | static void virtnet_update_settings(struct virtnet_info *vi) |
3758 | { |
3759 | u32 speed; |
3760 | u8 duplex; |
3761 | |
3762 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) |
3763 | return; |
3764 | |
3765 | virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); |
3766 | |
3767 | if (ethtool_validate_speed(speed)) |
3768 | vi->speed = speed; |
3769 | |
3770 | virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); |
3771 | |
3772 | if (ethtool_validate_duplex(duplex)) |
3773 | vi->duplex = duplex; |
3774 | } |
3775 | |
3776 | static u32 virtnet_get_rxfh_key_size(struct net_device *dev) |
3777 | { |
3778 | return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; |
3779 | } |
3780 | |
3781 | static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) |
3782 | { |
3783 | return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; |
3784 | } |
3785 | |
3786 | static int virtnet_get_rxfh(struct net_device *dev, |
3787 | struct ethtool_rxfh_param *rxfh) |
3788 | { |
3789 | struct virtnet_info *vi = netdev_priv(dev); |
3790 | int i; |
3791 | |
3792 | if (rxfh->indir) { |
3793 | for (i = 0; i < vi->rss_indir_table_size; ++i) |
3794 | rxfh->indir[i] = vi->ctrl->rss.indirection_table[i]; |
3795 | } |
3796 | |
3797 | if (rxfh->key) |
3798 | memcpy(rxfh->key, vi->ctrl->rss.key, vi->rss_key_size); |
3799 | |
3800 | rxfh->hfunc = ETH_RSS_HASH_TOP; |
3801 | |
3802 | return 0; |
3803 | } |
3804 | |
3805 | static int virtnet_set_rxfh(struct net_device *dev, |
3806 | struct ethtool_rxfh_param *rxfh, |
3807 | struct netlink_ext_ack *extack) |
3808 | { |
3809 | struct virtnet_info *vi = netdev_priv(dev); |
3810 | bool update = false; |
3811 | int i; |
3812 | |
3813 | if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && |
3814 | rxfh->hfunc != ETH_RSS_HASH_TOP) |
3815 | return -EOPNOTSUPP; |
3816 | |
3817 | if (rxfh->indir) { |
3818 | if (!vi->has_rss) |
3819 | return -EOPNOTSUPP; |
3820 | |
3821 | for (i = 0; i < vi->rss_indir_table_size; ++i) |
3822 | vi->ctrl->rss.indirection_table[i] = rxfh->indir[i]; |
3823 | update = true; |
3824 | } |
3825 | |
3826 | if (rxfh->key) { |
3827 | /* If either _F_HASH_REPORT or _F_RSS are negotiated, the |
3828 | * device provides hash calculation capabilities, that is, |
3829 | * hash_key is configured. |
3830 | */ |
3831 | if (!vi->has_rss && !vi->has_rss_hash_report) |
3832 | return -EOPNOTSUPP; |
3833 | |
3834 | memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size); |
3835 | update = true; |
3836 | } |
3837 | |
3838 | if (update) |
3839 | virtnet_commit_rss_command(vi); |
3840 | |
3841 | return 0; |
3842 | } |
3843 | |
3844 | static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) |
3845 | { |
3846 | struct virtnet_info *vi = netdev_priv(dev); |
3847 | int rc = 0; |
3848 | |
3849 | switch (info->cmd) { |
3850 | case ETHTOOL_GRXRINGS: |
3851 | info->data = vi->curr_queue_pairs; |
3852 | break; |
3853 | case ETHTOOL_GRXFH: |
3854 | virtnet_get_hashflow(vi, info); |
3855 | break; |
3856 | default: |
3857 | rc = -EOPNOTSUPP; |
3858 | } |
3859 | |
3860 | return rc; |
3861 | } |
3862 | |
3863 | static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) |
3864 | { |
3865 | struct virtnet_info *vi = netdev_priv(dev); |
3866 | int rc = 0; |
3867 | |
3868 | switch (info->cmd) { |
3869 | case ETHTOOL_SRXFH: |
3870 | if (!virtnet_set_hashflow(vi, info)) |
3871 | rc = -EINVAL; |
3872 | |
3873 | break; |
3874 | default: |
3875 | rc = -EOPNOTSUPP; |
3876 | } |
3877 | |
3878 | return rc; |
3879 | } |
3880 | |
3881 | static const struct ethtool_ops virtnet_ethtool_ops = { |
3882 | .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | |
3883 | ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, |
3884 | .get_drvinfo = virtnet_get_drvinfo, |
3885 | .get_link = ethtool_op_get_link, |
3886 | .get_ringparam = virtnet_get_ringparam, |
3887 | .set_ringparam = virtnet_set_ringparam, |
3888 | .get_strings = virtnet_get_strings, |
3889 | .get_sset_count = virtnet_get_sset_count, |
3890 | .get_ethtool_stats = virtnet_get_ethtool_stats, |
3891 | .set_channels = virtnet_set_channels, |
3892 | .get_channels = virtnet_get_channels, |
3893 | .get_ts_info = ethtool_op_get_ts_info, |
3894 | .get_link_ksettings = virtnet_get_link_ksettings, |
3895 | .set_link_ksettings = virtnet_set_link_ksettings, |
3896 | .set_coalesce = virtnet_set_coalesce, |
3897 | .get_coalesce = virtnet_get_coalesce, |
3898 | .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, |
3899 | .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, |
3900 | .get_rxfh_key_size = virtnet_get_rxfh_key_size, |
3901 | .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, |
3902 | .get_rxfh = virtnet_get_rxfh, |
3903 | .set_rxfh = virtnet_set_rxfh, |
3904 | .get_rxnfc = virtnet_get_rxnfc, |
3905 | .set_rxnfc = virtnet_set_rxnfc, |
3906 | }; |
3907 | |
3908 | static void virtnet_freeze_down(struct virtio_device *vdev) |
3909 | { |
3910 | struct virtnet_info *vi = vdev->priv; |
3911 | |
3912 | /* Make sure no work handler is accessing the device */ |
3913 | flush_work(work: &vi->config_work); |
3914 | disable_rx_mode_work(vi); |
3915 | flush_work(work: &vi->rx_mode_work); |
3916 | |
3917 | netif_tx_lock_bh(dev: vi->dev); |
3918 | netif_device_detach(dev: vi->dev); |
3919 | netif_tx_unlock_bh(dev: vi->dev); |
3920 | if (netif_running(dev: vi->dev)) |
3921 | virtnet_close(dev: vi->dev); |
3922 | } |
3923 | |
3924 | static int init_vqs(struct virtnet_info *vi); |
3925 | |
3926 | static int virtnet_restore_up(struct virtio_device *vdev) |
3927 | { |
3928 | struct virtnet_info *vi = vdev->priv; |
3929 | int err; |
3930 | |
3931 | err = init_vqs(vi); |
3932 | if (err) |
3933 | return err; |
3934 | |
3935 | virtio_device_ready(dev: vdev); |
3936 | |
3937 | enable_delayed_refill(vi); |
3938 | enable_rx_mode_work(vi); |
3939 | |
3940 | if (netif_running(dev: vi->dev)) { |
3941 | err = virtnet_open(dev: vi->dev); |
3942 | if (err) |
3943 | return err; |
3944 | } |
3945 | |
3946 | netif_tx_lock_bh(dev: vi->dev); |
3947 | netif_device_attach(dev: vi->dev); |
3948 | netif_tx_unlock_bh(dev: vi->dev); |
3949 | return err; |
3950 | } |
3951 | |
3952 | static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) |
3953 | { |
3954 | struct scatterlist sg; |
3955 | vi->ctrl->offloads = cpu_to_virtio64(vdev: vi->vdev, val: offloads); |
3956 | |
3957 | sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); |
3958 | |
3959 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, |
3960 | VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, out: &sg)) { |
3961 | dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); |
3962 | return -EINVAL; |
3963 | } |
3964 | |
3965 | return 0; |
3966 | } |
3967 | |
3968 | static int virtnet_clear_guest_offloads(struct virtnet_info *vi) |
3969 | { |
3970 | u64 offloads = 0; |
3971 | |
3972 | if (!vi->guest_offloads) |
3973 | return 0; |
3974 | |
3975 | return virtnet_set_guest_offloads(vi, offloads); |
3976 | } |
3977 | |
3978 | static int virtnet_restore_guest_offloads(struct virtnet_info *vi) |
3979 | { |
3980 | u64 offloads = vi->guest_offloads; |
3981 | |
3982 | if (!vi->guest_offloads) |
3983 | return 0; |
3984 | |
3985 | return virtnet_set_guest_offloads(vi, offloads); |
3986 | } |
3987 | |
3988 | static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, |
3989 | struct netlink_ext_ack *extack) |
3990 | { |
3991 | unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + |
3992 | sizeof(struct skb_shared_info)); |
3993 | unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; |
3994 | struct virtnet_info *vi = netdev_priv(dev); |
3995 | struct bpf_prog *old_prog; |
3996 | u16 xdp_qp = 0, curr_qp; |
3997 | int i, err; |
3998 | |
3999 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
4000 | && (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || |
4001 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || |
4002 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) || |
4003 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) || |
4004 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || |
4005 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) || |
4006 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { |
4007 | NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); |
4008 | return -EOPNOTSUPP; |
4009 | } |
4010 | |
4011 | if (vi->mergeable_rx_bufs && !vi->any_header_sg) { |
4012 | NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); |
4013 | return -EINVAL; |
4014 | } |
4015 | |
4016 | if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { |
4017 | NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); |
4018 | netdev_warn(dev, format: "single-buffer XDP requires MTU less than %u\n", max_sz); |
4019 | return -EINVAL; |
4020 | } |
4021 | |
4022 | curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; |
4023 | if (prog) |
4024 | xdp_qp = nr_cpu_ids; |
4025 | |
4026 | /* XDP requires extra queues for XDP_TX */ |
4027 | if (curr_qp + xdp_qp > vi->max_queue_pairs) { |
4028 | netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", |
4029 | curr_qp + xdp_qp, vi->max_queue_pairs); |
4030 | xdp_qp = 0; |
4031 | } |
4032 | |
4033 | old_prog = rtnl_dereference(vi->rq[0].xdp_prog); |
4034 | if (!prog && !old_prog) |
4035 | return 0; |
4036 | |
4037 | if (prog) |
4038 | bpf_prog_add(prog, i: vi->max_queue_pairs - 1); |
4039 | |
4040 | /* Make sure NAPI is not using any XDP TX queues for RX. */ |
4041 | if (netif_running(dev)) { |
4042 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4043 | napi_disable(n: &vi->rq[i].napi); |
4044 | virtnet_napi_tx_disable(napi: &vi->sq[i].napi); |
4045 | } |
4046 | } |
4047 | |
4048 | if (!prog) { |
4049 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4050 | rcu_assign_pointer(vi->rq[i].xdp_prog, prog); |
4051 | if (i == 0) |
4052 | virtnet_restore_guest_offloads(vi); |
4053 | } |
4054 | synchronize_net(); |
4055 | } |
4056 | |
4057 | err = _virtnet_set_queues(vi, queue_pairs: curr_qp + xdp_qp); |
4058 | if (err) |
4059 | goto err; |
4060 | netif_set_real_num_rx_queues(dev, rxq: curr_qp + xdp_qp); |
4061 | vi->xdp_queue_pairs = xdp_qp; |
4062 | |
4063 | if (prog) { |
4064 | vi->xdp_enabled = true; |
4065 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4066 | rcu_assign_pointer(vi->rq[i].xdp_prog, prog); |
4067 | if (i == 0 && !old_prog) |
4068 | virtnet_clear_guest_offloads(vi); |
4069 | } |
4070 | if (!old_prog) |
4071 | xdp_features_set_redirect_target(dev, support_sg: true); |
4072 | } else { |
4073 | xdp_features_clear_redirect_target(dev); |
4074 | vi->xdp_enabled = false; |
4075 | } |
4076 | |
4077 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4078 | if (old_prog) |
4079 | bpf_prog_put(prog: old_prog); |
4080 | if (netif_running(dev)) { |
4081 | virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi); |
4082 | virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq, |
4083 | napi: &vi->sq[i].napi); |
4084 | } |
4085 | } |
4086 | |
4087 | return 0; |
4088 | |
4089 | err: |
4090 | if (!prog) { |
4091 | virtnet_clear_guest_offloads(vi); |
4092 | for (i = 0; i < vi->max_queue_pairs; i++) |
4093 | rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); |
4094 | } |
4095 | |
4096 | if (netif_running(dev)) { |
4097 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4098 | virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi); |
4099 | virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq, |
4100 | napi: &vi->sq[i].napi); |
4101 | } |
4102 | } |
4103 | if (prog) |
4104 | bpf_prog_sub(prog, i: vi->max_queue_pairs - 1); |
4105 | return err; |
4106 | } |
4107 | |
4108 | static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) |
4109 | { |
4110 | switch (xdp->command) { |
4111 | case XDP_SETUP_PROG: |
4112 | return virtnet_xdp_set(dev, prog: xdp->prog, extack: xdp->extack); |
4113 | default: |
4114 | return -EINVAL; |
4115 | } |
4116 | } |
4117 | |
4118 | static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, |
4119 | size_t len) |
4120 | { |
4121 | struct virtnet_info *vi = netdev_priv(dev); |
4122 | int ret; |
4123 | |
4124 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY)) |
4125 | return -EOPNOTSUPP; |
4126 | |
4127 | ret = snprintf(buf, size: len, fmt: "sby"); |
4128 | if (ret >= len) |
4129 | return -EOPNOTSUPP; |
4130 | |
4131 | return 0; |
4132 | } |
4133 | |
4134 | static int virtnet_set_features(struct net_device *dev, |
4135 | netdev_features_t features) |
4136 | { |
4137 | struct virtnet_info *vi = netdev_priv(dev); |
4138 | u64 offloads; |
4139 | int err; |
4140 | |
4141 | if ((dev->features ^ features) & NETIF_F_GRO_HW) { |
4142 | if (vi->xdp_enabled) |
4143 | return -EBUSY; |
4144 | |
4145 | if (features & NETIF_F_GRO_HW) |
4146 | offloads = vi->guest_offloads_capable; |
4147 | else |
4148 | offloads = vi->guest_offloads_capable & |
4149 | ~GUEST_OFFLOAD_GRO_HW_MASK; |
4150 | |
4151 | err = virtnet_set_guest_offloads(vi, offloads); |
4152 | if (err) |
4153 | return err; |
4154 | vi->guest_offloads = offloads; |
4155 | } |
4156 | |
4157 | if ((dev->features ^ features) & NETIF_F_RXHASH) { |
4158 | if (features & NETIF_F_RXHASH) |
4159 | vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; |
4160 | else |
4161 | vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; |
4162 | |
4163 | if (!virtnet_commit_rss_command(vi)) |
4164 | return -EINVAL; |
4165 | } |
4166 | |
4167 | return 0; |
4168 | } |
4169 | |
4170 | static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) |
4171 | { |
4172 | struct virtnet_info *priv = netdev_priv(dev); |
4173 | struct send_queue *sq = &priv->sq[txqueue]; |
4174 | struct netdev_queue *txq = netdev_get_tx_queue(dev, index: txqueue); |
4175 | |
4176 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
4177 | u64_stats_inc(p: &sq->stats.tx_timeouts); |
4178 | u64_stats_update_end(syncp: &sq->stats.syncp); |
4179 | |
4180 | netdev_err(dev, format: "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", |
4181 | txqueue, sq->name, sq->vq->index, sq->vq->name, |
4182 | jiffies_to_usecs(j: jiffies - READ_ONCE(txq->trans_start))); |
4183 | } |
4184 | |
4185 | static const struct net_device_ops virtnet_netdev = { |
4186 | .ndo_open = virtnet_open, |
4187 | .ndo_stop = virtnet_close, |
4188 | .ndo_start_xmit = start_xmit, |
4189 | .ndo_validate_addr = eth_validate_addr, |
4190 | .ndo_set_mac_address = virtnet_set_mac_address, |
4191 | .ndo_set_rx_mode = virtnet_set_rx_mode, |
4192 | .ndo_get_stats64 = virtnet_stats, |
4193 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, |
4194 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, |
4195 | .ndo_bpf = virtnet_xdp, |
4196 | .ndo_xdp_xmit = virtnet_xdp_xmit, |
4197 | .ndo_features_check = passthru_features_check, |
4198 | .ndo_get_phys_port_name = virtnet_get_phys_port_name, |
4199 | .ndo_set_features = virtnet_set_features, |
4200 | .ndo_tx_timeout = virtnet_tx_timeout, |
4201 | }; |
4202 | |
4203 | static void virtnet_config_changed_work(struct work_struct *work) |
4204 | { |
4205 | struct virtnet_info *vi = |
4206 | container_of(work, struct virtnet_info, config_work); |
4207 | u16 v; |
4208 | |
4209 | if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, |
4210 | struct virtio_net_config, status, &v) < 0) |
4211 | return; |
4212 | |
4213 | if (v & VIRTIO_NET_S_ANNOUNCE) { |
4214 | netdev_notify_peers(dev: vi->dev); |
4215 | virtnet_ack_link_announce(vi); |
4216 | } |
4217 | |
4218 | /* Ignore unknown (future) status bits */ |
4219 | v &= VIRTIO_NET_S_LINK_UP; |
4220 | |
4221 | if (vi->status == v) |
4222 | return; |
4223 | |
4224 | vi->status = v; |
4225 | |
4226 | if (vi->status & VIRTIO_NET_S_LINK_UP) { |
4227 | virtnet_update_settings(vi); |
4228 | netif_carrier_on(dev: vi->dev); |
4229 | netif_tx_wake_all_queues(dev: vi->dev); |
4230 | } else { |
4231 | netif_carrier_off(dev: vi->dev); |
4232 | netif_tx_stop_all_queues(dev: vi->dev); |
4233 | } |
4234 | } |
4235 | |
4236 | static void virtnet_config_changed(struct virtio_device *vdev) |
4237 | { |
4238 | struct virtnet_info *vi = vdev->priv; |
4239 | |
4240 | schedule_work(work: &vi->config_work); |
4241 | } |
4242 | |
4243 | static void virtnet_free_queues(struct virtnet_info *vi) |
4244 | { |
4245 | int i; |
4246 | |
4247 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4248 | __netif_napi_del(napi: &vi->rq[i].napi); |
4249 | __netif_napi_del(napi: &vi->sq[i].napi); |
4250 | } |
4251 | |
4252 | /* We called __netif_napi_del(), |
4253 | * we need to respect an RCU grace period before freeing vi->rq |
4254 | */ |
4255 | synchronize_net(); |
4256 | |
4257 | kfree(objp: vi->rq); |
4258 | kfree(objp: vi->sq); |
4259 | kfree(objp: vi->ctrl); |
4260 | } |
4261 | |
4262 | static void _free_receive_bufs(struct virtnet_info *vi) |
4263 | { |
4264 | struct bpf_prog *old_prog; |
4265 | int i; |
4266 | |
4267 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4268 | while (vi->rq[i].pages) |
4269 | __free_pages(page: get_a_page(rq: &vi->rq[i], GFP_KERNEL), order: 0); |
4270 | |
4271 | old_prog = rtnl_dereference(vi->rq[i].xdp_prog); |
4272 | RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); |
4273 | if (old_prog) |
4274 | bpf_prog_put(prog: old_prog); |
4275 | } |
4276 | } |
4277 | |
4278 | static void free_receive_bufs(struct virtnet_info *vi) |
4279 | { |
4280 | rtnl_lock(); |
4281 | _free_receive_bufs(vi); |
4282 | rtnl_unlock(); |
4283 | } |
4284 | |
4285 | static void free_receive_page_frags(struct virtnet_info *vi) |
4286 | { |
4287 | int i; |
4288 | for (i = 0; i < vi->max_queue_pairs; i++) |
4289 | if (vi->rq[i].alloc_frag.page) { |
4290 | if (vi->rq[i].do_dma && vi->rq[i].last_dma) |
4291 | virtnet_rq_unmap(rq: &vi->rq[i], buf: vi->rq[i].last_dma, len: 0); |
4292 | put_page(page: vi->rq[i].alloc_frag.page); |
4293 | } |
4294 | } |
4295 | |
4296 | static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) |
4297 | { |
4298 | if (!is_xdp_frame(ptr: buf)) |
4299 | dev_kfree_skb(buf); |
4300 | else |
4301 | xdp_return_frame(xdpf: ptr_to_xdp(ptr: buf)); |
4302 | } |
4303 | |
4304 | static void free_unused_bufs(struct virtnet_info *vi) |
4305 | { |
4306 | void *buf; |
4307 | int i; |
4308 | |
4309 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4310 | struct virtqueue *vq = vi->sq[i].vq; |
4311 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) |
4312 | virtnet_sq_free_unused_buf(vq, buf); |
4313 | cond_resched(); |
4314 | } |
4315 | |
4316 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4317 | struct virtqueue *vq = vi->rq[i].vq; |
4318 | |
4319 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) |
4320 | virtnet_rq_unmap_free_buf(vq, buf); |
4321 | cond_resched(); |
4322 | } |
4323 | } |
4324 | |
4325 | static void virtnet_del_vqs(struct virtnet_info *vi) |
4326 | { |
4327 | struct virtio_device *vdev = vi->vdev; |
4328 | |
4329 | virtnet_clean_affinity(vi); |
4330 | |
4331 | vdev->config->del_vqs(vdev); |
4332 | |
4333 | virtnet_free_queues(vi); |
4334 | } |
4335 | |
4336 | /* How large should a single buffer be so a queue full of these can fit at |
4337 | * least one full packet? |
4338 | * Logic below assumes the mergeable buffer header is used. |
4339 | */ |
4340 | static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) |
4341 | { |
4342 | const unsigned int hdr_len = vi->hdr_len; |
4343 | unsigned int rq_size = virtqueue_get_vring_size(vq); |
4344 | unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; |
4345 | unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; |
4346 | unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); |
4347 | |
4348 | return max(max(min_buf_len, hdr_len) - hdr_len, |
4349 | (unsigned int)GOOD_PACKET_LEN); |
4350 | } |
4351 | |
4352 | static int virtnet_find_vqs(struct virtnet_info *vi) |
4353 | { |
4354 | vq_callback_t **callbacks; |
4355 | struct virtqueue **vqs; |
4356 | const char **names; |
4357 | int ret = -ENOMEM; |
4358 | int total_vqs; |
4359 | bool *ctx; |
4360 | u16 i; |
4361 | |
4362 | /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by |
4363 | * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by |
4364 | * possible control vq. |
4365 | */ |
4366 | total_vqs = vi->max_queue_pairs * 2 + |
4367 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VQ); |
4368 | |
4369 | /* Allocate space for find_vqs parameters */ |
4370 | vqs = kcalloc(n: total_vqs, size: sizeof(*vqs), GFP_KERNEL); |
4371 | if (!vqs) |
4372 | goto err_vq; |
4373 | callbacks = kmalloc_array(n: total_vqs, size: sizeof(*callbacks), GFP_KERNEL); |
4374 | if (!callbacks) |
4375 | goto err_callback; |
4376 | names = kmalloc_array(n: total_vqs, size: sizeof(*names), GFP_KERNEL); |
4377 | if (!names) |
4378 | goto err_names; |
4379 | if (!vi->big_packets || vi->mergeable_rx_bufs) { |
4380 | ctx = kcalloc(n: total_vqs, size: sizeof(*ctx), GFP_KERNEL); |
4381 | if (!ctx) |
4382 | goto err_ctx; |
4383 | } else { |
4384 | ctx = NULL; |
4385 | } |
4386 | |
4387 | /* Parameters for control virtqueue, if any */ |
4388 | if (vi->has_cvq) { |
4389 | callbacks[total_vqs - 1] = NULL; |
4390 | names[total_vqs - 1] = "control"; |
4391 | } |
4392 | |
4393 | /* Allocate/initialize parameters for send/receive virtqueues */ |
4394 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4395 | callbacks[rxq2vq(rxq: i)] = skb_recv_done; |
4396 | callbacks[txq2vq(txq: i)] = skb_xmit_done; |
4397 | sprintf(buf: vi->rq[i].name, fmt: "input.%u", i); |
4398 | sprintf(buf: vi->sq[i].name, fmt: "output.%u", i); |
4399 | names[rxq2vq(rxq: i)] = vi->rq[i].name; |
4400 | names[txq2vq(txq: i)] = vi->sq[i].name; |
4401 | if (ctx) |
4402 | ctx[rxq2vq(rxq: i)] = true; |
4403 | } |
4404 | |
4405 | ret = virtio_find_vqs_ctx(vdev: vi->vdev, nvqs: total_vqs, vqs, callbacks, |
4406 | names, ctx, NULL); |
4407 | if (ret) |
4408 | goto err_find; |
4409 | |
4410 | if (vi->has_cvq) { |
4411 | vi->cvq = vqs[total_vqs - 1]; |
4412 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) |
4413 | vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; |
4414 | } |
4415 | |
4416 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4417 | vi->rq[i].vq = vqs[rxq2vq(rxq: i)]; |
4418 | vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vq: vi->rq[i].vq); |
4419 | vi->sq[i].vq = vqs[txq2vq(txq: i)]; |
4420 | } |
4421 | |
4422 | /* run here: ret == 0. */ |
4423 | |
4424 | |
4425 | err_find: |
4426 | kfree(objp: ctx); |
4427 | err_ctx: |
4428 | kfree(objp: names); |
4429 | err_names: |
4430 | kfree(objp: callbacks); |
4431 | err_callback: |
4432 | kfree(objp: vqs); |
4433 | err_vq: |
4434 | return ret; |
4435 | } |
4436 | |
4437 | static int virtnet_alloc_queues(struct virtnet_info *vi) |
4438 | { |
4439 | int i; |
4440 | |
4441 | if (vi->has_cvq) { |
4442 | vi->ctrl = kzalloc(size: sizeof(*vi->ctrl), GFP_KERNEL); |
4443 | if (!vi->ctrl) |
4444 | goto err_ctrl; |
4445 | } else { |
4446 | vi->ctrl = NULL; |
4447 | } |
4448 | vi->sq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->sq), GFP_KERNEL); |
4449 | if (!vi->sq) |
4450 | goto err_sq; |
4451 | vi->rq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->rq), GFP_KERNEL); |
4452 | if (!vi->rq) |
4453 | goto err_rq; |
4454 | |
4455 | INIT_DELAYED_WORK(&vi->refill, refill_work); |
4456 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4457 | vi->rq[i].pages = NULL; |
4458 | netif_napi_add_weight(dev: vi->dev, napi: &vi->rq[i].napi, poll: virtnet_poll, |
4459 | weight: napi_weight); |
4460 | netif_napi_add_tx_weight(dev: vi->dev, napi: &vi->sq[i].napi, |
4461 | poll: virtnet_poll_tx, |
4462 | weight: napi_tx ? napi_weight : 0); |
4463 | |
4464 | INIT_WORK(&vi->rq[i].dim.work, virtnet_rx_dim_work); |
4465 | vi->rq[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; |
4466 | |
4467 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); |
4468 | ewma_pkt_len_init(e: &vi->rq[i].mrg_avg_pkt_len); |
4469 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); |
4470 | |
4471 | u64_stats_init(syncp: &vi->rq[i].stats.syncp); |
4472 | u64_stats_init(syncp: &vi->sq[i].stats.syncp); |
4473 | } |
4474 | |
4475 | return 0; |
4476 | |
4477 | err_rq: |
4478 | kfree(objp: vi->sq); |
4479 | err_sq: |
4480 | kfree(objp: vi->ctrl); |
4481 | err_ctrl: |
4482 | return -ENOMEM; |
4483 | } |
4484 | |
4485 | static int init_vqs(struct virtnet_info *vi) |
4486 | { |
4487 | int ret; |
4488 | |
4489 | /* Allocate send & receive queues */ |
4490 | ret = virtnet_alloc_queues(vi); |
4491 | if (ret) |
4492 | goto err; |
4493 | |
4494 | ret = virtnet_find_vqs(vi); |
4495 | if (ret) |
4496 | goto err_free; |
4497 | |
4498 | virtnet_rq_set_premapped(vi); |
4499 | |
4500 | cpus_read_lock(); |
4501 | virtnet_set_affinity(vi); |
4502 | cpus_read_unlock(); |
4503 | |
4504 | return 0; |
4505 | |
4506 | err_free: |
4507 | virtnet_free_queues(vi); |
4508 | err: |
4509 | return ret; |
4510 | } |
4511 | |
4512 | #ifdef CONFIG_SYSFS |
4513 | static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, |
4514 | char *buf) |
4515 | { |
4516 | struct virtnet_info *vi = netdev_priv(dev: queue->dev); |
4517 | unsigned int queue_index = get_netdev_rx_queue_index(queue); |
4518 | unsigned int headroom = virtnet_get_headroom(vi); |
4519 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
4520 | struct ewma_pkt_len *avg; |
4521 | |
4522 | BUG_ON(queue_index >= vi->max_queue_pairs); |
4523 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; |
4524 | return sprintf(buf, fmt: "%u\n", |
4525 | get_mergeable_buf_len(rq: &vi->rq[queue_index], avg_pkt_len: avg, |
4526 | SKB_DATA_ALIGN(headroom + tailroom))); |
4527 | } |
4528 | |
4529 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = |
4530 | __ATTR_RO(mergeable_rx_buffer_size); |
4531 | |
4532 | static struct attribute *virtio_net_mrg_rx_attrs[] = { |
4533 | &mergeable_rx_buffer_size_attribute.attr, |
4534 | NULL |
4535 | }; |
4536 | |
4537 | static const struct attribute_group virtio_net_mrg_rx_group = { |
4538 | .name = "virtio_net", |
4539 | .attrs = virtio_net_mrg_rx_attrs |
4540 | }; |
4541 | #endif |
4542 | |
4543 | static bool virtnet_fail_on_feature(struct virtio_device *vdev, |
4544 | unsigned int fbit, |
4545 | const char *fname, const char *dname) |
4546 | { |
4547 | if (!virtio_has_feature(vdev, fbit)) |
4548 | return false; |
4549 | |
4550 | dev_err(&vdev->dev, "device advertises feature %s but not %s", |
4551 | fname, dname); |
4552 | |
4553 | return true; |
4554 | } |
4555 | |
4556 | #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ |
4557 | virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) |
4558 | |
4559 | static bool virtnet_validate_features(struct virtio_device *vdev) |
4560 | { |
4561 | if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && |
4562 | (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, |
4563 | "VIRTIO_NET_F_CTRL_VQ") || |
4564 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, |
4565 | "VIRTIO_NET_F_CTRL_VQ") || |
4566 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, |
4567 | "VIRTIO_NET_F_CTRL_VQ") || |
4568 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || |
4569 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, |
4570 | "VIRTIO_NET_F_CTRL_VQ") || |
4571 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, |
4572 | "VIRTIO_NET_F_CTRL_VQ") || |
4573 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, |
4574 | "VIRTIO_NET_F_CTRL_VQ") || |
4575 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, |
4576 | "VIRTIO_NET_F_CTRL_VQ") || |
4577 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, |
4578 | "VIRTIO_NET_F_CTRL_VQ"))) { |
4579 | return false; |
4580 | } |
4581 | |
4582 | return true; |
4583 | } |
4584 | |
4585 | #define MIN_MTU ETH_MIN_MTU |
4586 | #define MAX_MTU ETH_MAX_MTU |
4587 | |
4588 | static int virtnet_validate(struct virtio_device *vdev) |
4589 | { |
4590 | if (!vdev->config->get) { |
4591 | dev_err(&vdev->dev, "%s failure: config access disabled\n", |
4592 | __func__); |
4593 | return -EINVAL; |
4594 | } |
4595 | |
4596 | if (!virtnet_validate_features(vdev)) |
4597 | return -EINVAL; |
4598 | |
4599 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { |
4600 | int mtu = virtio_cread16(vdev, |
4601 | offsetof(struct virtio_net_config, |
4602 | mtu)); |
4603 | if (mtu < MIN_MTU) |
4604 | __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); |
4605 | } |
4606 | |
4607 | if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && |
4608 | !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { |
4609 | dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); |
4610 | __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); |
4611 | } |
4612 | |
4613 | return 0; |
4614 | } |
4615 | |
4616 | static bool virtnet_check_guest_gso(const struct virtnet_info *vi) |
4617 | { |
4618 | return virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || |
4619 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || |
4620 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) || |
4621 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) || |
4622 | (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) && |
4623 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6)); |
4624 | } |
4625 | |
4626 | static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) |
4627 | { |
4628 | bool guest_gso = virtnet_check_guest_gso(vi); |
4629 | |
4630 | /* If device can receive ANY guest GSO packets, regardless of mtu, |
4631 | * allocate packets of maximum size, otherwise limit it to only |
4632 | * mtu size worth only. |
4633 | */ |
4634 | if (mtu > ETH_DATA_LEN || guest_gso) { |
4635 | vi->big_packets = true; |
4636 | vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); |
4637 | } |
4638 | } |
4639 | |
4640 | static int virtnet_probe(struct virtio_device *vdev) |
4641 | { |
4642 | int i, err = -ENOMEM; |
4643 | struct net_device *dev; |
4644 | struct virtnet_info *vi; |
4645 | u16 max_queue_pairs; |
4646 | int mtu = 0; |
4647 | |
4648 | /* Find if host supports multiqueue/rss virtio_net device */ |
4649 | max_queue_pairs = 1; |
4650 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) |
4651 | max_queue_pairs = |
4652 | virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); |
4653 | |
4654 | /* We need at least 2 queue's */ |
4655 | if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || |
4656 | max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || |
4657 | !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) |
4658 | max_queue_pairs = 1; |
4659 | |
4660 | /* Allocate ourselves a network device with room for our info */ |
4661 | dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); |
4662 | if (!dev) |
4663 | return -ENOMEM; |
4664 | |
4665 | /* Set up network device as normal. */ |
4666 | dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | |
4667 | IFF_TX_SKB_NO_LINEAR; |
4668 | dev->netdev_ops = &virtnet_netdev; |
4669 | dev->features = NETIF_F_HIGHDMA; |
4670 | |
4671 | dev->ethtool_ops = &virtnet_ethtool_ops; |
4672 | SET_NETDEV_DEV(dev, &vdev->dev); |
4673 | |
4674 | /* Do we support "hardware" checksums? */ |
4675 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { |
4676 | /* This opens up the world of extra features. */ |
4677 | dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; |
4678 | if (csum) |
4679 | dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; |
4680 | |
4681 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { |
4682 | dev->hw_features |= NETIF_F_TSO |
4683 | | NETIF_F_TSO_ECN | NETIF_F_TSO6; |
4684 | } |
4685 | /* Individual feature bits: what can host handle? */ |
4686 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) |
4687 | dev->hw_features |= NETIF_F_TSO; |
4688 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) |
4689 | dev->hw_features |= NETIF_F_TSO6; |
4690 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) |
4691 | dev->hw_features |= NETIF_F_TSO_ECN; |
4692 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) |
4693 | dev->hw_features |= NETIF_F_GSO_UDP_L4; |
4694 | |
4695 | dev->features |= NETIF_F_GSO_ROBUST; |
4696 | |
4697 | if (gso) |
4698 | dev->features |= dev->hw_features & NETIF_F_ALL_TSO; |
4699 | /* (!csum && gso) case will be fixed by register_netdev() */ |
4700 | } |
4701 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) |
4702 | dev->features |= NETIF_F_RXCSUM; |
4703 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
4704 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) |
4705 | dev->features |= NETIF_F_GRO_HW; |
4706 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) |
4707 | dev->hw_features |= NETIF_F_GRO_HW; |
4708 | |
4709 | dev->vlan_features = dev->features; |
4710 | dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; |
4711 | |
4712 | /* MTU range: 68 - 65535 */ |
4713 | dev->min_mtu = MIN_MTU; |
4714 | dev->max_mtu = MAX_MTU; |
4715 | |
4716 | /* Configuration may specify what MAC to use. Otherwise random. */ |
4717 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { |
4718 | u8 addr[ETH_ALEN]; |
4719 | |
4720 | virtio_cread_bytes(vdev, |
4721 | offsetof(struct virtio_net_config, mac), |
4722 | buf: addr, ETH_ALEN); |
4723 | eth_hw_addr_set(dev, addr); |
4724 | } else { |
4725 | eth_hw_addr_random(dev); |
4726 | dev_info(&vdev->dev, "Assigned random MAC address %pM\n", |
4727 | dev->dev_addr); |
4728 | } |
4729 | |
4730 | /* Set up our device-specific information */ |
4731 | vi = netdev_priv(dev); |
4732 | vi->dev = dev; |
4733 | vi->vdev = vdev; |
4734 | vdev->priv = vi; |
4735 | |
4736 | INIT_WORK(&vi->config_work, virtnet_config_changed_work); |
4737 | INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); |
4738 | spin_lock_init(&vi->refill_lock); |
4739 | |
4740 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { |
4741 | vi->mergeable_rx_bufs = true; |
4742 | dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; |
4743 | } |
4744 | |
4745 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) |
4746 | vi->has_rss_hash_report = true; |
4747 | |
4748 | if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { |
4749 | vi->has_rss = true; |
4750 | |
4751 | vi->rss_indir_table_size = |
4752 | virtio_cread16(vdev, offsetof(struct virtio_net_config, |
4753 | rss_max_indirection_table_length)); |
4754 | } |
4755 | |
4756 | if (vi->has_rss || vi->has_rss_hash_report) { |
4757 | vi->rss_key_size = |
4758 | virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); |
4759 | |
4760 | vi->rss_hash_types_supported = |
4761 | virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); |
4762 | vi->rss_hash_types_supported &= |
4763 | ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | |
4764 | VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | |
4765 | VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); |
4766 | |
4767 | dev->hw_features |= NETIF_F_RXHASH; |
4768 | } |
4769 | |
4770 | if (vi->has_rss_hash_report) |
4771 | vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); |
4772 | else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || |
4773 | virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) |
4774 | vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); |
4775 | else |
4776 | vi->hdr_len = sizeof(struct virtio_net_hdr); |
4777 | |
4778 | if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || |
4779 | virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) |
4780 | vi->any_header_sg = true; |
4781 | |
4782 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) |
4783 | vi->has_cvq = true; |
4784 | |
4785 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { |
4786 | mtu = virtio_cread16(vdev, |
4787 | offsetof(struct virtio_net_config, |
4788 | mtu)); |
4789 | if (mtu < dev->min_mtu) { |
4790 | /* Should never trigger: MTU was previously validated |
4791 | * in virtnet_validate. |
4792 | */ |
4793 | dev_err(&vdev->dev, |
4794 | "device MTU appears to have changed it is now %d < %d", |
4795 | mtu, dev->min_mtu); |
4796 | err = -EINVAL; |
4797 | goto free; |
4798 | } |
4799 | |
4800 | dev->mtu = mtu; |
4801 | dev->max_mtu = mtu; |
4802 | } |
4803 | |
4804 | virtnet_set_big_packets(vi, mtu); |
4805 | |
4806 | if (vi->any_header_sg) |
4807 | dev->needed_headroom = vi->hdr_len; |
4808 | |
4809 | /* Enable multiqueue by default */ |
4810 | if (num_online_cpus() >= max_queue_pairs) |
4811 | vi->curr_queue_pairs = max_queue_pairs; |
4812 | else |
4813 | vi->curr_queue_pairs = num_online_cpus(); |
4814 | vi->max_queue_pairs = max_queue_pairs; |
4815 | |
4816 | /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ |
4817 | err = init_vqs(vi); |
4818 | if (err) |
4819 | goto free; |
4820 | |
4821 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { |
4822 | vi->intr_coal_rx.max_usecs = 0; |
4823 | vi->intr_coal_tx.max_usecs = 0; |
4824 | vi->intr_coal_rx.max_packets = 0; |
4825 | |
4826 | /* Keep the default values of the coalescing parameters |
4827 | * aligned with the default napi_tx state. |
4828 | */ |
4829 | if (vi->sq[0].napi.weight) |
4830 | vi->intr_coal_tx.max_packets = 1; |
4831 | else |
4832 | vi->intr_coal_tx.max_packets = 0; |
4833 | } |
4834 | |
4835 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { |
4836 | /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ |
4837 | for (i = 0; i < vi->max_queue_pairs; i++) |
4838 | if (vi->sq[i].napi.weight) |
4839 | vi->sq[i].intr_coal.max_packets = 1; |
4840 | } |
4841 | |
4842 | #ifdef CONFIG_SYSFS |
4843 | if (vi->mergeable_rx_bufs) |
4844 | dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; |
4845 | #endif |
4846 | netif_set_real_num_tx_queues(dev, txq: vi->curr_queue_pairs); |
4847 | netif_set_real_num_rx_queues(dev, rxq: vi->curr_queue_pairs); |
4848 | |
4849 | virtnet_init_settings(dev); |
4850 | |
4851 | if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { |
4852 | vi->failover = net_failover_create(standby_dev: vi->dev); |
4853 | if (IS_ERR(ptr: vi->failover)) { |
4854 | err = PTR_ERR(ptr: vi->failover); |
4855 | goto free_vqs; |
4856 | } |
4857 | } |
4858 | |
4859 | if (vi->has_rss || vi->has_rss_hash_report) |
4860 | virtnet_init_default_rss(vi); |
4861 | |
4862 | enable_rx_mode_work(vi); |
4863 | |
4864 | /* serialize netdev register + virtio_device_ready() with ndo_open() */ |
4865 | rtnl_lock(); |
4866 | |
4867 | err = register_netdevice(dev); |
4868 | if (err) { |
4869 | pr_debug("virtio_net: registering device failed\n"); |
4870 | rtnl_unlock(); |
4871 | goto free_failover; |
4872 | } |
4873 | |
4874 | virtio_device_ready(dev: vdev); |
4875 | |
4876 | _virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs); |
4877 | |
4878 | /* a random MAC address has been assigned, notify the device. |
4879 | * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there |
4880 | * because many devices work fine without getting MAC explicitly |
4881 | */ |
4882 | if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && |
4883 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { |
4884 | struct scatterlist sg; |
4885 | |
4886 | sg_init_one(&sg, dev->dev_addr, dev->addr_len); |
4887 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
4888 | VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) { |
4889 | pr_debug("virtio_net: setting MAC address failed\n"); |
4890 | rtnl_unlock(); |
4891 | err = -EINVAL; |
4892 | goto free_unregister_netdev; |
4893 | } |
4894 | } |
4895 | |
4896 | rtnl_unlock(); |
4897 | |
4898 | err = virtnet_cpu_notif_add(vi); |
4899 | if (err) { |
4900 | pr_debug("virtio_net: registering cpu notifier failed\n"); |
4901 | goto free_unregister_netdev; |
4902 | } |
4903 | |
4904 | /* Assume link up if device can't report link status, |
4905 | otherwise get link status from config. */ |
4906 | netif_carrier_off(dev); |
4907 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) { |
4908 | schedule_work(work: &vi->config_work); |
4909 | } else { |
4910 | vi->status = VIRTIO_NET_S_LINK_UP; |
4911 | virtnet_update_settings(vi); |
4912 | netif_carrier_on(dev); |
4913 | } |
4914 | |
4915 | for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) |
4916 | if (virtio_has_feature(vdev: vi->vdev, fbit: guest_offloads[i])) |
4917 | set_bit(nr: guest_offloads[i], addr: &vi->guest_offloads); |
4918 | vi->guest_offloads_capable = vi->guest_offloads; |
4919 | |
4920 | pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", |
4921 | dev->name, max_queue_pairs); |
4922 | |
4923 | return 0; |
4924 | |
4925 | free_unregister_netdev: |
4926 | unregister_netdev(dev); |
4927 | free_failover: |
4928 | net_failover_destroy(failover: vi->failover); |
4929 | free_vqs: |
4930 | virtio_reset_device(dev: vdev); |
4931 | cancel_delayed_work_sync(dwork: &vi->refill); |
4932 | free_receive_page_frags(vi); |
4933 | virtnet_del_vqs(vi); |
4934 | free: |
4935 | free_netdev(dev); |
4936 | return err; |
4937 | } |
4938 | |
4939 | static void remove_vq_common(struct virtnet_info *vi) |
4940 | { |
4941 | virtio_reset_device(dev: vi->vdev); |
4942 | |
4943 | /* Free unused buffers in both send and recv, if any. */ |
4944 | free_unused_bufs(vi); |
4945 | |
4946 | free_receive_bufs(vi); |
4947 | |
4948 | free_receive_page_frags(vi); |
4949 | |
4950 | virtnet_del_vqs(vi); |
4951 | } |
4952 | |
4953 | static void virtnet_remove(struct virtio_device *vdev) |
4954 | { |
4955 | struct virtnet_info *vi = vdev->priv; |
4956 | |
4957 | virtnet_cpu_notif_remove(vi); |
4958 | |
4959 | /* Make sure no work handler is accessing the device. */ |
4960 | flush_work(work: &vi->config_work); |
4961 | disable_rx_mode_work(vi); |
4962 | flush_work(work: &vi->rx_mode_work); |
4963 | |
4964 | unregister_netdev(dev: vi->dev); |
4965 | |
4966 | net_failover_destroy(failover: vi->failover); |
4967 | |
4968 | remove_vq_common(vi); |
4969 | |
4970 | free_netdev(dev: vi->dev); |
4971 | } |
4972 | |
4973 | static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) |
4974 | { |
4975 | struct virtnet_info *vi = vdev->priv; |
4976 | |
4977 | virtnet_cpu_notif_remove(vi); |
4978 | virtnet_freeze_down(vdev); |
4979 | remove_vq_common(vi); |
4980 | |
4981 | return 0; |
4982 | } |
4983 | |
4984 | static __maybe_unused int virtnet_restore(struct virtio_device *vdev) |
4985 | { |
4986 | struct virtnet_info *vi = vdev->priv; |
4987 | int err; |
4988 | |
4989 | err = virtnet_restore_up(vdev); |
4990 | if (err) |
4991 | return err; |
4992 | virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs); |
4993 | |
4994 | err = virtnet_cpu_notif_add(vi); |
4995 | if (err) { |
4996 | virtnet_freeze_down(vdev); |
4997 | remove_vq_common(vi); |
4998 | return err; |
4999 | } |
5000 | |
5001 | return 0; |
5002 | } |
5003 | |
5004 | static struct virtio_device_id id_table[] = { |
5005 | { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, |
5006 | { 0 }, |
5007 | }; |
5008 | |
5009 | #define VIRTNET_FEATURES \ |
5010 | VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ |
5011 | VIRTIO_NET_F_MAC, \ |
5012 | VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ |
5013 | VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ |
5014 | VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ |
5015 | VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ |
5016 | VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ |
5017 | VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ |
5018 | VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ |
5019 | VIRTIO_NET_F_CTRL_MAC_ADDR, \ |
5020 | VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ |
5021 | VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ |
5022 | VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ |
5023 | VIRTIO_NET_F_VQ_NOTF_COAL, \ |
5024 | VIRTIO_NET_F_GUEST_HDRLEN |
5025 | |
5026 | static unsigned int features[] = { |
5027 | VIRTNET_FEATURES, |
5028 | }; |
5029 | |
5030 | static unsigned int features_legacy[] = { |
5031 | VIRTNET_FEATURES, |
5032 | VIRTIO_NET_F_GSO, |
5033 | VIRTIO_F_ANY_LAYOUT, |
5034 | }; |
5035 | |
5036 | static struct virtio_driver virtio_net_driver = { |
5037 | .feature_table = features, |
5038 | .feature_table_size = ARRAY_SIZE(features), |
5039 | .feature_table_legacy = features_legacy, |
5040 | .feature_table_size_legacy = ARRAY_SIZE(features_legacy), |
5041 | .driver.name = KBUILD_MODNAME, |
5042 | .driver.owner = THIS_MODULE, |
5043 | .id_table = id_table, |
5044 | .validate = virtnet_validate, |
5045 | .probe = virtnet_probe, |
5046 | .remove = virtnet_remove, |
5047 | .config_changed = virtnet_config_changed, |
5048 | #ifdef CONFIG_PM_SLEEP |
5049 | .freeze = virtnet_freeze, |
5050 | .restore = virtnet_restore, |
5051 | #endif |
5052 | }; |
5053 | |
5054 | static __init int virtio_net_driver_init(void) |
5055 | { |
5056 | int ret; |
5057 | |
5058 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "virtio/net:online", |
5059 | startup: virtnet_cpu_online, |
5060 | teardown: virtnet_cpu_down_prep); |
5061 | if (ret < 0) |
5062 | goto out; |
5063 | virtionet_online = ret; |
5064 | ret = cpuhp_setup_state_multi(state: CPUHP_VIRT_NET_DEAD, name: "virtio/net:dead", |
5065 | NULL, teardown: virtnet_cpu_dead); |
5066 | if (ret) |
5067 | goto err_dead; |
5068 | ret = register_virtio_driver(&virtio_net_driver); |
5069 | if (ret) |
5070 | goto err_virtio; |
5071 | return 0; |
5072 | err_virtio: |
5073 | cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD); |
5074 | err_dead: |
5075 | cpuhp_remove_multi_state(state: virtionet_online); |
5076 | out: |
5077 | return ret; |
5078 | } |
5079 | module_init(virtio_net_driver_init); |
5080 | |
5081 | static __exit void virtio_net_driver_exit(void) |
5082 | { |
5083 | unregister_virtio_driver(drv: &virtio_net_driver); |
5084 | cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD); |
5085 | cpuhp_remove_multi_state(state: virtionet_online); |
5086 | } |
5087 | module_exit(virtio_net_driver_exit); |
5088 | |
5089 | MODULE_DEVICE_TABLE(virtio, id_table); |
5090 | MODULE_DESCRIPTION("Virtio network driver"); |
5091 | MODULE_LICENSE("GPL"); |
5092 |
Definitions
- napi_weight
- csum
- gso
- napi_tx
- guest_offloads
- virtnet_stat_desc
- virtnet_sq_free_stats
- virtnet_sq_stats
- virtnet_rq_stats
- virtnet_sq_stats_desc
- virtnet_rq_stats_desc
- virtnet_interrupt_coalesce
- virtnet_rq_dma
- send_queue
- receive_queue
- virtio_net_ctrl_rss
- control_buf
- virtnet_info
- padded_vnet_hdr
- virtio_net_common_hdr
- is_xdp_frame
- xdp_to_ptr
- ptr_to_xdp
- __free_old_xmit
- vq2txq
- txq2vq
- vq2rxq
- rxq2vq
- skb_vnet_common_hdr
- give_pages
- get_a_page
- virtnet_rq_free_buf
- enable_delayed_refill
- disable_delayed_refill
- enable_rx_mode_work
- disable_rx_mode_work
- virtqueue_napi_schedule
- virtqueue_napi_complete
- skb_xmit_done
- mergeable_len_to_ctx
- mergeable_ctx_to_headroom
- mergeable_ctx_to_truesize
- virtnet_build_skb
- page_to_skb
- virtnet_rq_unmap
- virtnet_rq_get_buf
- virtnet_rq_init_one_sg
- virtnet_rq_alloc
- virtnet_rq_set_premapped
- virtnet_rq_unmap_free_buf
- free_old_xmit
- is_xdp_raw_buffer_queue
- check_sq_full_and_disable
- __virtnet_xdp_xmit_one
- virtnet_xdp_xmit
- put_xdp_frags
- virtnet_xdp_handler
- virtnet_get_headroom
- xdp_linearize_page
- receive_small_build_skb
- receive_small_xdp
- receive_small
- receive_big
- mergeable_buf_free
- build_skb_from_xdp_buff
- virtnet_build_xdp_buff_mrg
- mergeable_xdp_get_buf
- receive_mergeable_xdp
- receive_mergeable
- virtio_skb_set_hash
- receive_buf
- add_recvbuf_small
- add_recvbuf_big
- get_mergeable_buf_len
- add_recvbuf_mergeable
- try_fill_recv
- skb_recv_done
- virtnet_napi_enable
- virtnet_napi_tx_enable
- virtnet_napi_tx_disable
- refill_work
- virtnet_receive
- virtnet_poll_cleantx
- virtnet_rx_dim_update
- virtnet_poll
- virtnet_disable_queue_pair
- virtnet_enable_queue_pair
- virtnet_open
- virtnet_poll_tx
- xmit_skb
- start_xmit
- virtnet_rx_resize
- virtnet_tx_resize
- virtnet_send_command
- virtnet_set_mac_address
- virtnet_stats
- virtnet_ack_link_announce
- _virtnet_set_queues
- virtnet_set_queues
- virtnet_close
- virtnet_rx_mode_work
- virtnet_set_rx_mode
- virtnet_vlan_rx_add_vid
- virtnet_vlan_rx_kill_vid
- virtnet_clean_affinity
- virtnet_set_affinity
- virtnet_cpu_online
- virtnet_cpu_dead
- virtnet_cpu_down_prep
- virtionet_online
- virtnet_cpu_notif_add
- virtnet_cpu_notif_remove
- virtnet_send_ctrl_coal_vq_cmd
- virtnet_send_rx_ctrl_coal_vq_cmd
- virtnet_send_tx_ctrl_coal_vq_cmd
- virtnet_get_ringparam
- virtnet_set_ringparam
- virtnet_commit_rss_command
- virtnet_init_default_rss
- virtnet_get_hashflow
- virtnet_set_hashflow
- virtnet_get_drvinfo
- virtnet_set_channels
- virtnet_get_strings
- virtnet_get_sset_count
- virtnet_get_ethtool_stats
- virtnet_get_channels
- virtnet_set_link_ksettings
- virtnet_get_link_ksettings
- virtnet_send_tx_notf_coal_cmds
- virtnet_send_rx_notf_coal_cmds
- virtnet_send_notf_coal_cmds
- virtnet_send_rx_notf_coal_vq_cmds
- virtnet_send_notf_coal_vq_cmds
- virtnet_rx_dim_work
- virtnet_coal_params_supported
- virtnet_should_update_vq_weight
- virtnet_set_coalesce
- virtnet_get_coalesce
- virtnet_set_per_queue_coalesce
- virtnet_get_per_queue_coalesce
- virtnet_init_settings
- virtnet_update_settings
- virtnet_get_rxfh_key_size
- virtnet_get_rxfh_indir_size
- virtnet_get_rxfh
- virtnet_set_rxfh
- virtnet_get_rxnfc
- virtnet_set_rxnfc
- virtnet_ethtool_ops
- virtnet_freeze_down
- virtnet_restore_up
- virtnet_set_guest_offloads
- virtnet_clear_guest_offloads
- virtnet_restore_guest_offloads
- virtnet_xdp_set
- virtnet_xdp
- virtnet_get_phys_port_name
- virtnet_set_features
- virtnet_tx_timeout
- virtnet_netdev
- virtnet_config_changed_work
- virtnet_config_changed
- virtnet_free_queues
- _free_receive_bufs
- free_receive_bufs
- free_receive_page_frags
- virtnet_sq_free_unused_buf
- free_unused_bufs
- virtnet_del_vqs
- mergeable_min_buf_len
- virtnet_find_vqs
- virtnet_alloc_queues
- init_vqs
- mergeable_rx_buffer_size_show
- mergeable_rx_buffer_size_attribute
- virtio_net_mrg_rx_attrs
- virtio_net_mrg_rx_group
- virtnet_fail_on_feature
- virtnet_validate_features
- virtnet_validate
- virtnet_check_guest_gso
- virtnet_set_big_packets
- virtnet_probe
- remove_vq_common
- virtnet_remove
- virtnet_freeze
- virtnet_restore
- id_table
- features
- features_legacy
- virtio_net_driver
- virtio_net_driver_init
Improve your Profiling and Debugging skills
Find out more