1// SPDX-License-Identifier: GPL-2.0-or-later
2/* A network driver using virtio.
3 *
4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 */
6//#define DEBUG
7#include <linux/netdevice.h>
8#include <linux/etherdevice.h>
9#include <linux/ethtool.h>
10#include <linux/module.h>
11#include <linux/virtio.h>
12#include <linux/virtio_net.h>
13#include <linux/bpf.h>
14#include <linux/bpf_trace.h>
15#include <linux/scatterlist.h>
16#include <linux/if_vlan.h>
17#include <linux/slab.h>
18#include <linux/cpu.h>
19#include <linux/average.h>
20#include <linux/filter.h>
21#include <linux/kernel.h>
22#include <net/route.h>
23#include <net/xdp.h>
24#include <net/net_failover.h>
25#include <net/netdev_rx_queue.h>
26
27static int napi_weight = NAPI_POLL_WEIGHT;
28module_param(napi_weight, int, 0444);
29
30static bool csum = true, gso = true, napi_tx = true;
31module_param(csum, bool, 0444);
32module_param(gso, bool, 0444);
33module_param(napi_tx, bool, 0644);
34
35/* FIXME: MTU in config. */
36#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
37#define GOOD_COPY_LEN 128
38
39#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
40
41/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
42#define VIRTIO_XDP_HEADROOM 256
43
44/* Separating two types of XDP xmit */
45#define VIRTIO_XDP_TX BIT(0)
46#define VIRTIO_XDP_REDIR BIT(1)
47
48#define VIRTIO_XDP_FLAG BIT(0)
49
50/* RX packet size EWMA. The average packet size is used to determine the packet
51 * buffer size when refilling RX rings. As the entire RX ring may be refilled
52 * at once, the weight is chosen so that the EWMA will be insensitive to short-
53 * term, transient changes in packet size.
54 */
55DECLARE_EWMA(pkt_len, 0, 64)
56
57#define VIRTNET_DRIVER_VERSION "1.0.0"
58
59static const unsigned long guest_offloads[] = {
60 VIRTIO_NET_F_GUEST_TSO4,
61 VIRTIO_NET_F_GUEST_TSO6,
62 VIRTIO_NET_F_GUEST_ECN,
63 VIRTIO_NET_F_GUEST_UFO,
64 VIRTIO_NET_F_GUEST_CSUM,
65 VIRTIO_NET_F_GUEST_USO4,
66 VIRTIO_NET_F_GUEST_USO6,
67 VIRTIO_NET_F_GUEST_HDRLEN
68};
69
70#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
71 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
72 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
73 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
74 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
75 (1ULL << VIRTIO_NET_F_GUEST_USO6))
76
77struct virtnet_stat_desc {
78 char desc[ETH_GSTRING_LEN];
79 size_t offset;
80};
81
82struct virtnet_sq_stats {
83 struct u64_stats_sync syncp;
84 u64_stats_t packets;
85 u64_stats_t bytes;
86 u64_stats_t xdp_tx;
87 u64_stats_t xdp_tx_drops;
88 u64_stats_t kicks;
89 u64_stats_t tx_timeouts;
90};
91
92struct virtnet_rq_stats {
93 struct u64_stats_sync syncp;
94 u64_stats_t packets;
95 u64_stats_t bytes;
96 u64_stats_t drops;
97 u64_stats_t xdp_packets;
98 u64_stats_t xdp_tx;
99 u64_stats_t xdp_redirects;
100 u64_stats_t xdp_drops;
101 u64_stats_t kicks;
102};
103
104#define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m)
105#define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m)
106
107static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
108 { "packets", VIRTNET_SQ_STAT(packets) },
109 { "bytes", VIRTNET_SQ_STAT(bytes) },
110 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) },
111 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) },
112 { "kicks", VIRTNET_SQ_STAT(kicks) },
113 { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) },
114};
115
116static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
117 { "packets", VIRTNET_RQ_STAT(packets) },
118 { "bytes", VIRTNET_RQ_STAT(bytes) },
119 { "drops", VIRTNET_RQ_STAT(drops) },
120 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) },
121 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) },
122 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) },
123 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) },
124 { "kicks", VIRTNET_RQ_STAT(kicks) },
125};
126
127#define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
128#define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
129
130struct virtnet_interrupt_coalesce {
131 u32 max_packets;
132 u32 max_usecs;
133};
134
135/* The dma information of pages allocated at a time. */
136struct virtnet_rq_dma {
137 dma_addr_t addr;
138 u32 ref;
139 u16 len;
140 u16 need_sync;
141};
142
143/* Internal representation of a send virtqueue */
144struct send_queue {
145 /* Virtqueue associated with this send _queue */
146 struct virtqueue *vq;
147
148 /* TX: fragments + linear part + virtio header */
149 struct scatterlist sg[MAX_SKB_FRAGS + 2];
150
151 /* Name of the send queue: output.$index */
152 char name[16];
153
154 struct virtnet_sq_stats stats;
155
156 struct virtnet_interrupt_coalesce intr_coal;
157
158 struct napi_struct napi;
159
160 /* Record whether sq is in reset state. */
161 bool reset;
162};
163
164/* Internal representation of a receive virtqueue */
165struct receive_queue {
166 /* Virtqueue associated with this receive_queue */
167 struct virtqueue *vq;
168
169 struct napi_struct napi;
170
171 struct bpf_prog __rcu *xdp_prog;
172
173 struct virtnet_rq_stats stats;
174
175 struct virtnet_interrupt_coalesce intr_coal;
176
177 /* Chain pages by the private ptr. */
178 struct page *pages;
179
180 /* Average packet length for mergeable receive buffers. */
181 struct ewma_pkt_len mrg_avg_pkt_len;
182
183 /* Page frag for packet buffer allocation. */
184 struct page_frag alloc_frag;
185
186 /* RX: fragments + linear part + virtio header */
187 struct scatterlist sg[MAX_SKB_FRAGS + 2];
188
189 /* Min single buffer size for mergeable buffers case. */
190 unsigned int min_buf_len;
191
192 /* Name of this receive queue: input.$index */
193 char name[16];
194
195 struct xdp_rxq_info xdp_rxq;
196
197 /* Record the last dma info to free after new pages is allocated. */
198 struct virtnet_rq_dma *last_dma;
199
200 /* Do dma by self */
201 bool do_dma;
202};
203
204/* This structure can contain rss message with maximum settings for indirection table and keysize
205 * Note, that default structure that describes RSS configuration virtio_net_rss_config
206 * contains same info but can't handle table values.
207 * In any case, structure would be passed to virtio hw through sg_buf split by parts
208 * because table sizes may be differ according to the device configuration.
209 */
210#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
211#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128
212struct virtio_net_ctrl_rss {
213 u32 hash_types;
214 u16 indirection_table_mask;
215 u16 unclassified_queue;
216 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
217 u16 max_tx_vq;
218 u8 hash_key_length;
219 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
220};
221
222/* Control VQ buffers: protected by the rtnl lock */
223struct control_buf {
224 struct virtio_net_ctrl_hdr hdr;
225 virtio_net_ctrl_ack status;
226 struct virtio_net_ctrl_mq mq;
227 u8 promisc;
228 u8 allmulti;
229 __virtio16 vid;
230 __virtio64 offloads;
231 struct virtio_net_ctrl_rss rss;
232 struct virtio_net_ctrl_coal_tx coal_tx;
233 struct virtio_net_ctrl_coal_rx coal_rx;
234 struct virtio_net_ctrl_coal_vq coal_vq;
235};
236
237struct virtnet_info {
238 struct virtio_device *vdev;
239 struct virtqueue *cvq;
240 struct net_device *dev;
241 struct send_queue *sq;
242 struct receive_queue *rq;
243 unsigned int status;
244
245 /* Max # of queue pairs supported by the device */
246 u16 max_queue_pairs;
247
248 /* # of queue pairs currently used by the driver */
249 u16 curr_queue_pairs;
250
251 /* # of XDP queue pairs currently used by the driver */
252 u16 xdp_queue_pairs;
253
254 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
255 bool xdp_enabled;
256
257 /* I like... big packets and I cannot lie! */
258 bool big_packets;
259
260 /* number of sg entries allocated for big packets */
261 unsigned int big_packets_num_skbfrags;
262
263 /* Host will merge rx buffers for big packets (shake it! shake it!) */
264 bool mergeable_rx_bufs;
265
266 /* Host supports rss and/or hash report */
267 bool has_rss;
268 bool has_rss_hash_report;
269 u8 rss_key_size;
270 u16 rss_indir_table_size;
271 u32 rss_hash_types_supported;
272 u32 rss_hash_types_saved;
273
274 /* Has control virtqueue */
275 bool has_cvq;
276
277 /* Host can handle any s/g split between our header and packet data */
278 bool any_header_sg;
279
280 /* Packet virtio header size */
281 u8 hdr_len;
282
283 /* Work struct for delayed refilling if we run low on memory. */
284 struct delayed_work refill;
285
286 /* Is delayed refill enabled? */
287 bool refill_enabled;
288
289 /* The lock to synchronize the access to refill_enabled */
290 spinlock_t refill_lock;
291
292 /* Work struct for config space updates */
293 struct work_struct config_work;
294
295 /* Does the affinity hint is set for virtqueues? */
296 bool affinity_hint_set;
297
298 /* CPU hotplug instances for online & dead */
299 struct hlist_node node;
300 struct hlist_node node_dead;
301
302 struct control_buf *ctrl;
303
304 /* Ethtool settings */
305 u8 duplex;
306 u32 speed;
307
308 /* Interrupt coalescing settings */
309 struct virtnet_interrupt_coalesce intr_coal_tx;
310 struct virtnet_interrupt_coalesce intr_coal_rx;
311
312 unsigned long guest_offloads;
313 unsigned long guest_offloads_capable;
314
315 /* failover when STANDBY feature enabled */
316 struct failover *failover;
317};
318
319struct padded_vnet_hdr {
320 struct virtio_net_hdr_v1_hash hdr;
321 /*
322 * hdr is in a separate sg buffer, and data sg buffer shares same page
323 * with this header sg. This padding makes next sg 16 byte aligned
324 * after the header.
325 */
326 char padding[12];
327};
328
329struct virtio_net_common_hdr {
330 union {
331 struct virtio_net_hdr hdr;
332 struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
333 struct virtio_net_hdr_v1_hash hash_v1_hdr;
334 };
335};
336
337static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
338static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
339
340static bool is_xdp_frame(void *ptr)
341{
342 return (unsigned long)ptr & VIRTIO_XDP_FLAG;
343}
344
345static void *xdp_to_ptr(struct xdp_frame *ptr)
346{
347 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
348}
349
350static struct xdp_frame *ptr_to_xdp(void *ptr)
351{
352 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
353}
354
355/* Converting between virtqueue no. and kernel tx/rx queue no.
356 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
357 */
358static int vq2txq(struct virtqueue *vq)
359{
360 return (vq->index - 1) / 2;
361}
362
363static int txq2vq(int txq)
364{
365 return txq * 2 + 1;
366}
367
368static int vq2rxq(struct virtqueue *vq)
369{
370 return vq->index / 2;
371}
372
373static int rxq2vq(int rxq)
374{
375 return rxq * 2;
376}
377
378static inline struct virtio_net_common_hdr *
379skb_vnet_common_hdr(struct sk_buff *skb)
380{
381 return (struct virtio_net_common_hdr *)skb->cb;
382}
383
384/*
385 * private is used to chain pages for big packets, put the whole
386 * most recent used list in the beginning for reuse
387 */
388static void give_pages(struct receive_queue *rq, struct page *page)
389{
390 struct page *end;
391
392 /* Find end of list, sew whole thing into vi->rq.pages. */
393 for (end = page; end->private; end = (struct page *)end->private);
394 end->private = (unsigned long)rq->pages;
395 rq->pages = page;
396}
397
398static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
399{
400 struct page *p = rq->pages;
401
402 if (p) {
403 rq->pages = (struct page *)p->private;
404 /* clear private here, it is used to chain pages */
405 p->private = 0;
406 } else
407 p = alloc_page(gfp_mask);
408 return p;
409}
410
411static void enable_delayed_refill(struct virtnet_info *vi)
412{
413 spin_lock_bh(lock: &vi->refill_lock);
414 vi->refill_enabled = true;
415 spin_unlock_bh(lock: &vi->refill_lock);
416}
417
418static void disable_delayed_refill(struct virtnet_info *vi)
419{
420 spin_lock_bh(lock: &vi->refill_lock);
421 vi->refill_enabled = false;
422 spin_unlock_bh(lock: &vi->refill_lock);
423}
424
425static void virtqueue_napi_schedule(struct napi_struct *napi,
426 struct virtqueue *vq)
427{
428 if (napi_schedule_prep(n: napi)) {
429 virtqueue_disable_cb(vq);
430 __napi_schedule(n: napi);
431 }
432}
433
434static void virtqueue_napi_complete(struct napi_struct *napi,
435 struct virtqueue *vq, int processed)
436{
437 int opaque;
438
439 opaque = virtqueue_enable_cb_prepare(vq);
440 if (napi_complete_done(n: napi, work_done: processed)) {
441 if (unlikely(virtqueue_poll(vq, opaque)))
442 virtqueue_napi_schedule(napi, vq);
443 } else {
444 virtqueue_disable_cb(vq);
445 }
446}
447
448static void skb_xmit_done(struct virtqueue *vq)
449{
450 struct virtnet_info *vi = vq->vdev->priv;
451 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
452
453 /* Suppress further interrupts. */
454 virtqueue_disable_cb(vq);
455
456 if (napi->weight)
457 virtqueue_napi_schedule(napi, vq);
458 else
459 /* We were probably waiting for more output buffers. */
460 netif_wake_subqueue(dev: vi->dev, queue_index: vq2txq(vq));
461}
462
463#define MRG_CTX_HEADER_SHIFT 22
464static void *mergeable_len_to_ctx(unsigned int truesize,
465 unsigned int headroom)
466{
467 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
468}
469
470static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
471{
472 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
473}
474
475static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
476{
477 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
478}
479
480static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
481 unsigned int headroom,
482 unsigned int len)
483{
484 struct sk_buff *skb;
485
486 skb = build_skb(data: buf, frag_size: buflen);
487 if (unlikely(!skb))
488 return NULL;
489
490 skb_reserve(skb, len: headroom);
491 skb_put(skb, len);
492
493 return skb;
494}
495
496/* Called from bottom half context */
497static struct sk_buff *page_to_skb(struct virtnet_info *vi,
498 struct receive_queue *rq,
499 struct page *page, unsigned int offset,
500 unsigned int len, unsigned int truesize,
501 unsigned int headroom)
502{
503 struct sk_buff *skb;
504 struct virtio_net_common_hdr *hdr;
505 unsigned int copy, hdr_len, hdr_padded_len;
506 struct page *page_to_free = NULL;
507 int tailroom, shinfo_size;
508 char *p, *hdr_p, *buf;
509
510 p = page_address(page) + offset;
511 hdr_p = p;
512
513 hdr_len = vi->hdr_len;
514 if (vi->mergeable_rx_bufs)
515 hdr_padded_len = hdr_len;
516 else
517 hdr_padded_len = sizeof(struct padded_vnet_hdr);
518
519 buf = p - headroom;
520 len -= hdr_len;
521 offset += hdr_padded_len;
522 p += hdr_padded_len;
523 tailroom = truesize - headroom - hdr_padded_len - len;
524
525 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
526
527 /* copy small packet so we can reuse these pages */
528 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
529 skb = virtnet_build_skb(buf, buflen: truesize, headroom: p - buf, len);
530 if (unlikely(!skb))
531 return NULL;
532
533 page = (struct page *)page->private;
534 if (page)
535 give_pages(rq, page);
536 goto ok;
537 }
538
539 /* copy small packet so we can reuse these pages for small data */
540 skb = napi_alloc_skb(napi: &rq->napi, GOOD_COPY_LEN);
541 if (unlikely(!skb))
542 return NULL;
543
544 /* Copy all frame if it fits skb->head, otherwise
545 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
546 */
547 if (len <= skb_tailroom(skb))
548 copy = len;
549 else
550 copy = ETH_HLEN;
551 skb_put_data(skb, data: p, len: copy);
552
553 len -= copy;
554 offset += copy;
555
556 if (vi->mergeable_rx_bufs) {
557 if (len)
558 skb_add_rx_frag(skb, i: 0, page, off: offset, size: len, truesize);
559 else
560 page_to_free = page;
561 goto ok;
562 }
563
564 /*
565 * Verify that we can indeed put this data into a skb.
566 * This is here to handle cases when the device erroneously
567 * tries to receive more than is possible. This is usually
568 * the case of a broken device.
569 */
570 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
571 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
572 dev_kfree_skb(skb);
573 return NULL;
574 }
575 BUG_ON(offset >= PAGE_SIZE);
576 while (len) {
577 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
578 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, off: offset,
579 size: frag_size, truesize);
580 len -= frag_size;
581 page = (struct page *)page->private;
582 offset = 0;
583 }
584
585 if (page)
586 give_pages(rq, page);
587
588ok:
589 hdr = skb_vnet_common_hdr(skb);
590 memcpy(hdr, hdr_p, hdr_len);
591 if (page_to_free)
592 put_page(page: page_to_free);
593
594 return skb;
595}
596
597static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
598{
599 struct page *page = virt_to_head_page(x: buf);
600 struct virtnet_rq_dma *dma;
601 void *head;
602 int offset;
603
604 head = page_address(page);
605
606 dma = head;
607
608 --dma->ref;
609
610 if (dma->need_sync && len) {
611 offset = buf - (head + sizeof(*dma));
612
613 virtqueue_dma_sync_single_range_for_cpu(vq: rq->vq, addr: dma->addr,
614 offset, size: len,
615 dir: DMA_FROM_DEVICE);
616 }
617
618 if (dma->ref)
619 return;
620
621 virtqueue_dma_unmap_single_attrs(vq: rq->vq, addr: dma->addr, size: dma->len,
622 dir: DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
623 put_page(page);
624}
625
626static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
627{
628 void *buf;
629
630 buf = virtqueue_get_buf_ctx(vq: rq->vq, len, ctx);
631 if (buf && rq->do_dma)
632 virtnet_rq_unmap(rq, buf, len: *len);
633
634 return buf;
635}
636
637static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq)
638{
639 void *buf;
640
641 buf = virtqueue_detach_unused_buf(vq: rq->vq);
642 if (buf && rq->do_dma)
643 virtnet_rq_unmap(rq, buf, len: 0);
644
645 return buf;
646}
647
648static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
649{
650 struct virtnet_rq_dma *dma;
651 dma_addr_t addr;
652 u32 offset;
653 void *head;
654
655 if (!rq->do_dma) {
656 sg_init_one(rq->sg, buf, len);
657 return;
658 }
659
660 head = page_address(rq->alloc_frag.page);
661
662 offset = buf - head;
663
664 dma = head;
665
666 addr = dma->addr - sizeof(*dma) + offset;
667
668 sg_init_table(rq->sg, 1);
669 rq->sg[0].dma_address = addr;
670 rq->sg[0].length = len;
671}
672
673static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
674{
675 struct page_frag *alloc_frag = &rq->alloc_frag;
676 struct virtnet_rq_dma *dma;
677 void *buf, *head;
678 dma_addr_t addr;
679
680 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
681 return NULL;
682
683 head = page_address(alloc_frag->page);
684
685 if (rq->do_dma) {
686 dma = head;
687
688 /* new pages */
689 if (!alloc_frag->offset) {
690 if (rq->last_dma) {
691 /* Now, the new page is allocated, the last dma
692 * will not be used. So the dma can be unmapped
693 * if the ref is 0.
694 */
695 virtnet_rq_unmap(rq, buf: rq->last_dma, len: 0);
696 rq->last_dma = NULL;
697 }
698
699 dma->len = alloc_frag->size - sizeof(*dma);
700
701 addr = virtqueue_dma_map_single_attrs(vq: rq->vq, ptr: dma + 1,
702 size: dma->len, dir: DMA_FROM_DEVICE, attrs: 0);
703 if (virtqueue_dma_mapping_error(vq: rq->vq, addr))
704 return NULL;
705
706 dma->addr = addr;
707 dma->need_sync = virtqueue_dma_need_sync(vq: rq->vq, addr);
708
709 /* Add a reference to dma to prevent the entire dma from
710 * being released during error handling. This reference
711 * will be freed after the pages are no longer used.
712 */
713 get_page(page: alloc_frag->page);
714 dma->ref = 1;
715 alloc_frag->offset = sizeof(*dma);
716
717 rq->last_dma = dma;
718 }
719
720 ++dma->ref;
721 }
722
723 buf = head + alloc_frag->offset;
724
725 get_page(page: alloc_frag->page);
726 alloc_frag->offset += size;
727
728 return buf;
729}
730
731static void virtnet_rq_set_premapped(struct virtnet_info *vi)
732{
733 int i;
734
735 /* disable for big mode */
736 if (!vi->mergeable_rx_bufs && vi->big_packets)
737 return;
738
739 for (i = 0; i < vi->max_queue_pairs; i++) {
740 if (virtqueue_set_dma_premapped(vq: vi->rq[i].vq))
741 continue;
742
743 vi->rq[i].do_dma = true;
744 }
745}
746
747static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
748{
749 unsigned int len;
750 unsigned int packets = 0;
751 unsigned int bytes = 0;
752 void *ptr;
753
754 while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) {
755 if (likely(!is_xdp_frame(ptr))) {
756 struct sk_buff *skb = ptr;
757
758 pr_debug("Sent skb %p\n", skb);
759
760 bytes += skb->len;
761 napi_consume_skb(skb, budget: in_napi);
762 } else {
763 struct xdp_frame *frame = ptr_to_xdp(ptr);
764
765 bytes += xdp_get_frame_len(xdpf: frame);
766 xdp_return_frame(xdpf: frame);
767 }
768 packets++;
769 }
770
771 /* Avoid overhead when no packets have been processed
772 * happens when called speculatively from start_xmit.
773 */
774 if (!packets)
775 return;
776
777 u64_stats_update_begin(syncp: &sq->stats.syncp);
778 u64_stats_add(p: &sq->stats.bytes, val: bytes);
779 u64_stats_add(p: &sq->stats.packets, val: packets);
780 u64_stats_update_end(syncp: &sq->stats.syncp);
781}
782
783static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
784{
785 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
786 return false;
787 else if (q < vi->curr_queue_pairs)
788 return true;
789 else
790 return false;
791}
792
793static void check_sq_full_and_disable(struct virtnet_info *vi,
794 struct net_device *dev,
795 struct send_queue *sq)
796{
797 bool use_napi = sq->napi.weight;
798 int qnum;
799
800 qnum = sq - vi->sq;
801
802 /* If running out of space, stop queue to avoid getting packets that we
803 * are then unable to transmit.
804 * An alternative would be to force queuing layer to requeue the skb by
805 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
806 * returned in a normal path of operation: it means that driver is not
807 * maintaining the TX queue stop/start state properly, and causes
808 * the stack to do a non-trivial amount of useless work.
809 * Since most packets only take 1 or 2 ring slots, stopping the queue
810 * early means 16 slots are typically wasted.
811 */
812 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
813 netif_stop_subqueue(dev, queue_index: qnum);
814 if (use_napi) {
815 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
816 virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq);
817 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
818 /* More just got used, free them then recheck. */
819 free_old_xmit_skbs(sq, in_napi: false);
820 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
821 netif_start_subqueue(dev, queue_index: qnum);
822 virtqueue_disable_cb(vq: sq->vq);
823 }
824 }
825 }
826}
827
828static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
829 struct send_queue *sq,
830 struct xdp_frame *xdpf)
831{
832 struct virtio_net_hdr_mrg_rxbuf *hdr;
833 struct skb_shared_info *shinfo;
834 u8 nr_frags = 0;
835 int err, i;
836
837 if (unlikely(xdpf->headroom < vi->hdr_len))
838 return -EOVERFLOW;
839
840 if (unlikely(xdp_frame_has_frags(xdpf))) {
841 shinfo = xdp_get_shared_info_from_frame(frame: xdpf);
842 nr_frags = shinfo->nr_frags;
843 }
844
845 /* In wrapping function virtnet_xdp_xmit(), we need to free
846 * up the pending old buffers, where we need to calculate the
847 * position of skb_shared_info in xdp_get_frame_len() and
848 * xdp_return_frame(), which will involve to xdpf->data and
849 * xdpf->headroom. Therefore, we need to update the value of
850 * headroom synchronously here.
851 */
852 xdpf->headroom -= vi->hdr_len;
853 xdpf->data -= vi->hdr_len;
854 /* Zero header and leave csum up to XDP layers */
855 hdr = xdpf->data;
856 memset(hdr, 0, vi->hdr_len);
857 xdpf->len += vi->hdr_len;
858
859 sg_init_table(sq->sg, nr_frags + 1);
860 sg_set_buf(sg: sq->sg, buf: xdpf->data, buflen: xdpf->len);
861 for (i = 0; i < nr_frags; i++) {
862 skb_frag_t *frag = &shinfo->frags[i];
863
864 sg_set_page(sg: &sq->sg[i + 1], page: skb_frag_page(frag),
865 len: skb_frag_size(frag), offset: skb_frag_off(frag));
866 }
867
868 err = virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: nr_frags + 1,
869 data: xdp_to_ptr(ptr: xdpf), GFP_ATOMIC);
870 if (unlikely(err))
871 return -ENOSPC; /* Caller handle free/refcnt */
872
873 return 0;
874}
875
876/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
877 * the current cpu, so it does not need to be locked.
878 *
879 * Here we use marco instead of inline functions because we have to deal with
880 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
881 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
882 * functions to perfectly solve these three problems at the same time.
883 */
884#define virtnet_xdp_get_sq(vi) ({ \
885 int cpu = smp_processor_id(); \
886 struct netdev_queue *txq; \
887 typeof(vi) v = (vi); \
888 unsigned int qp; \
889 \
890 if (v->curr_queue_pairs > nr_cpu_ids) { \
891 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
892 qp += cpu; \
893 txq = netdev_get_tx_queue(v->dev, qp); \
894 __netif_tx_acquire(txq); \
895 } else { \
896 qp = cpu % v->curr_queue_pairs; \
897 txq = netdev_get_tx_queue(v->dev, qp); \
898 __netif_tx_lock(txq, cpu); \
899 } \
900 v->sq + qp; \
901})
902
903#define virtnet_xdp_put_sq(vi, q) { \
904 struct netdev_queue *txq; \
905 typeof(vi) v = (vi); \
906 \
907 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
908 if (v->curr_queue_pairs > nr_cpu_ids) \
909 __netif_tx_release(txq); \
910 else \
911 __netif_tx_unlock(txq); \
912}
913
914static int virtnet_xdp_xmit(struct net_device *dev,
915 int n, struct xdp_frame **frames, u32 flags)
916{
917 struct virtnet_info *vi = netdev_priv(dev);
918 struct receive_queue *rq = vi->rq;
919 struct bpf_prog *xdp_prog;
920 struct send_queue *sq;
921 unsigned int len;
922 int packets = 0;
923 int bytes = 0;
924 int nxmit = 0;
925 int kicks = 0;
926 void *ptr;
927 int ret;
928 int i;
929
930 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
931 * indicate XDP resources have been successfully allocated.
932 */
933 xdp_prog = rcu_access_pointer(rq->xdp_prog);
934 if (!xdp_prog)
935 return -ENXIO;
936
937 sq = virtnet_xdp_get_sq(vi);
938
939 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
940 ret = -EINVAL;
941 goto out;
942 }
943
944 /* Free up any pending old buffers before queueing new ones. */
945 while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) {
946 if (likely(is_xdp_frame(ptr))) {
947 struct xdp_frame *frame = ptr_to_xdp(ptr);
948
949 bytes += xdp_get_frame_len(xdpf: frame);
950 xdp_return_frame(xdpf: frame);
951 } else {
952 struct sk_buff *skb = ptr;
953
954 bytes += skb->len;
955 napi_consume_skb(skb, budget: false);
956 }
957 packets++;
958 }
959
960 for (i = 0; i < n; i++) {
961 struct xdp_frame *xdpf = frames[i];
962
963 if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
964 break;
965 nxmit++;
966 }
967 ret = nxmit;
968
969 if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq))
970 check_sq_full_and_disable(vi, dev, sq);
971
972 if (flags & XDP_XMIT_FLUSH) {
973 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq))
974 kicks = 1;
975 }
976out:
977 u64_stats_update_begin(syncp: &sq->stats.syncp);
978 u64_stats_add(p: &sq->stats.bytes, val: bytes);
979 u64_stats_add(p: &sq->stats.packets, val: packets);
980 u64_stats_add(p: &sq->stats.xdp_tx, val: n);
981 u64_stats_add(p: &sq->stats.xdp_tx_drops, val: n - nxmit);
982 u64_stats_add(p: &sq->stats.kicks, val: kicks);
983 u64_stats_update_end(syncp: &sq->stats.syncp);
984
985 virtnet_xdp_put_sq(vi, sq);
986 return ret;
987}
988
989static void put_xdp_frags(struct xdp_buff *xdp)
990{
991 struct skb_shared_info *shinfo;
992 struct page *xdp_page;
993 int i;
994
995 if (xdp_buff_has_frags(xdp)) {
996 shinfo = xdp_get_shared_info_from_buff(xdp);
997 for (i = 0; i < shinfo->nr_frags; i++) {
998 xdp_page = skb_frag_page(frag: &shinfo->frags[i]);
999 put_page(page: xdp_page);
1000 }
1001 }
1002}
1003
1004static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
1005 struct net_device *dev,
1006 unsigned int *xdp_xmit,
1007 struct virtnet_rq_stats *stats)
1008{
1009 struct xdp_frame *xdpf;
1010 int err;
1011 u32 act;
1012
1013 act = bpf_prog_run_xdp(prog: xdp_prog, xdp);
1014 u64_stats_inc(p: &stats->xdp_packets);
1015
1016 switch (act) {
1017 case XDP_PASS:
1018 return act;
1019
1020 case XDP_TX:
1021 u64_stats_inc(p: &stats->xdp_tx);
1022 xdpf = xdp_convert_buff_to_frame(xdp);
1023 if (unlikely(!xdpf)) {
1024 netdev_dbg(dev, "convert buff to frame failed for xdp\n");
1025 return XDP_DROP;
1026 }
1027
1028 err = virtnet_xdp_xmit(dev, n: 1, frames: &xdpf, flags: 0);
1029 if (unlikely(!err)) {
1030 xdp_return_frame_rx_napi(xdpf);
1031 } else if (unlikely(err < 0)) {
1032 trace_xdp_exception(dev, xdp: xdp_prog, act);
1033 return XDP_DROP;
1034 }
1035 *xdp_xmit |= VIRTIO_XDP_TX;
1036 return act;
1037
1038 case XDP_REDIRECT:
1039 u64_stats_inc(p: &stats->xdp_redirects);
1040 err = xdp_do_redirect(dev, xdp, prog: xdp_prog);
1041 if (err)
1042 return XDP_DROP;
1043
1044 *xdp_xmit |= VIRTIO_XDP_REDIR;
1045 return act;
1046
1047 default:
1048 bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act);
1049 fallthrough;
1050 case XDP_ABORTED:
1051 trace_xdp_exception(dev, xdp: xdp_prog, act);
1052 fallthrough;
1053 case XDP_DROP:
1054 return XDP_DROP;
1055 }
1056}
1057
1058static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
1059{
1060 return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
1061}
1062
1063/* We copy the packet for XDP in the following cases:
1064 *
1065 * 1) Packet is scattered across multiple rx buffers.
1066 * 2) Headroom space is insufficient.
1067 *
1068 * This is inefficient but it's a temporary condition that
1069 * we hit right after XDP is enabled and until queue is refilled
1070 * with large buffers with sufficient headroom - so it should affect
1071 * at most queue size packets.
1072 * Afterwards, the conditions to enable
1073 * XDP should preclude the underlying device from sending packets
1074 * across multiple buffers (num_buf > 1), and we make sure buffers
1075 * have enough headroom.
1076 */
1077static struct page *xdp_linearize_page(struct receive_queue *rq,
1078 int *num_buf,
1079 struct page *p,
1080 int offset,
1081 int page_off,
1082 unsigned int *len)
1083{
1084 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1085 struct page *page;
1086
1087 if (page_off + *len + tailroom > PAGE_SIZE)
1088 return NULL;
1089
1090 page = alloc_page(GFP_ATOMIC);
1091 if (!page)
1092 return NULL;
1093
1094 memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
1095 page_off += *len;
1096
1097 while (--*num_buf) {
1098 unsigned int buflen;
1099 void *buf;
1100 int off;
1101
1102 buf = virtnet_rq_get_buf(rq, len: &buflen, NULL);
1103 if (unlikely(!buf))
1104 goto err_buf;
1105
1106 p = virt_to_head_page(x: buf);
1107 off = buf - page_address(p);
1108
1109 /* guard against a misconfigured or uncooperative backend that
1110 * is sending packet larger than the MTU.
1111 */
1112 if ((page_off + buflen + tailroom) > PAGE_SIZE) {
1113 put_page(page: p);
1114 goto err_buf;
1115 }
1116
1117 memcpy(page_address(page) + page_off,
1118 page_address(p) + off, buflen);
1119 page_off += buflen;
1120 put_page(page: p);
1121 }
1122
1123 /* Headroom does not contribute to packet length */
1124 *len = page_off - VIRTIO_XDP_HEADROOM;
1125 return page;
1126err_buf:
1127 __free_pages(page, order: 0);
1128 return NULL;
1129}
1130
1131static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
1132 unsigned int xdp_headroom,
1133 void *buf,
1134 unsigned int len)
1135{
1136 unsigned int header_offset;
1137 unsigned int headroom;
1138 unsigned int buflen;
1139 struct sk_buff *skb;
1140
1141 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1142 headroom = vi->hdr_len + header_offset;
1143 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1144 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1145
1146 skb = virtnet_build_skb(buf, buflen, headroom, len);
1147 if (unlikely(!skb))
1148 return NULL;
1149
1150 buf += header_offset;
1151 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len);
1152
1153 return skb;
1154}
1155
1156static struct sk_buff *receive_small_xdp(struct net_device *dev,
1157 struct virtnet_info *vi,
1158 struct receive_queue *rq,
1159 struct bpf_prog *xdp_prog,
1160 void *buf,
1161 unsigned int xdp_headroom,
1162 unsigned int len,
1163 unsigned int *xdp_xmit,
1164 struct virtnet_rq_stats *stats)
1165{
1166 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
1167 unsigned int headroom = vi->hdr_len + header_offset;
1168 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
1169 struct page *page = virt_to_head_page(x: buf);
1170 struct page *xdp_page;
1171 unsigned int buflen;
1172 struct xdp_buff xdp;
1173 struct sk_buff *skb;
1174 unsigned int metasize = 0;
1175 u32 act;
1176
1177 if (unlikely(hdr->hdr.gso_type))
1178 goto err_xdp;
1179
1180 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1181 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1182
1183 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
1184 int offset = buf - page_address(page) + header_offset;
1185 unsigned int tlen = len + vi->hdr_len;
1186 int num_buf = 1;
1187
1188 xdp_headroom = virtnet_get_headroom(vi);
1189 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1190 headroom = vi->hdr_len + header_offset;
1191 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1192 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1193 xdp_page = xdp_linearize_page(rq, num_buf: &num_buf, p: page,
1194 offset, page_off: header_offset,
1195 len: &tlen);
1196 if (!xdp_page)
1197 goto err_xdp;
1198
1199 buf = page_address(xdp_page);
1200 put_page(page);
1201 page = xdp_page;
1202 }
1203
1204 xdp_init_buff(xdp: &xdp, frame_sz: buflen, rxq: &rq->xdp_rxq);
1205 xdp_prepare_buff(xdp: &xdp, hard_start: buf + VIRTNET_RX_PAD + vi->hdr_len,
1206 headroom: xdp_headroom, data_len: len, meta_valid: true);
1207
1208 act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
1209
1210 switch (act) {
1211 case XDP_PASS:
1212 /* Recalculate length in case bpf program changed it */
1213 len = xdp.data_end - xdp.data;
1214 metasize = xdp.data - xdp.data_meta;
1215 break;
1216
1217 case XDP_TX:
1218 case XDP_REDIRECT:
1219 goto xdp_xmit;
1220
1221 default:
1222 goto err_xdp;
1223 }
1224
1225 skb = virtnet_build_skb(buf, buflen, headroom: xdp.data - buf, len);
1226 if (unlikely(!skb))
1227 goto err;
1228
1229 if (metasize)
1230 skb_metadata_set(skb, meta_len: metasize);
1231
1232 return skb;
1233
1234err_xdp:
1235 u64_stats_inc(p: &stats->xdp_drops);
1236err:
1237 u64_stats_inc(p: &stats->drops);
1238 put_page(page);
1239xdp_xmit:
1240 return NULL;
1241}
1242
1243static struct sk_buff *receive_small(struct net_device *dev,
1244 struct virtnet_info *vi,
1245 struct receive_queue *rq,
1246 void *buf, void *ctx,
1247 unsigned int len,
1248 unsigned int *xdp_xmit,
1249 struct virtnet_rq_stats *stats)
1250{
1251 unsigned int xdp_headroom = (unsigned long)ctx;
1252 struct page *page = virt_to_head_page(x: buf);
1253 struct sk_buff *skb;
1254
1255 len -= vi->hdr_len;
1256 u64_stats_add(p: &stats->bytes, val: len);
1257
1258 if (unlikely(len > GOOD_PACKET_LEN)) {
1259 pr_debug("%s: rx error: len %u exceeds max size %d\n",
1260 dev->name, len, GOOD_PACKET_LEN);
1261 DEV_STATS_INC(dev, rx_length_errors);
1262 goto err;
1263 }
1264
1265 if (unlikely(vi->xdp_enabled)) {
1266 struct bpf_prog *xdp_prog;
1267
1268 rcu_read_lock();
1269 xdp_prog = rcu_dereference(rq->xdp_prog);
1270 if (xdp_prog) {
1271 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
1272 xdp_headroom, len, xdp_xmit,
1273 stats);
1274 rcu_read_unlock();
1275 return skb;
1276 }
1277 rcu_read_unlock();
1278 }
1279
1280 skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
1281 if (likely(skb))
1282 return skb;
1283
1284err:
1285 u64_stats_inc(p: &stats->drops);
1286 put_page(page);
1287 return NULL;
1288}
1289
1290static struct sk_buff *receive_big(struct net_device *dev,
1291 struct virtnet_info *vi,
1292 struct receive_queue *rq,
1293 void *buf,
1294 unsigned int len,
1295 struct virtnet_rq_stats *stats)
1296{
1297 struct page *page = buf;
1298 struct sk_buff *skb =
1299 page_to_skb(vi, rq, page, offset: 0, len, PAGE_SIZE, headroom: 0);
1300
1301 u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
1302 if (unlikely(!skb))
1303 goto err;
1304
1305 return skb;
1306
1307err:
1308 u64_stats_inc(p: &stats->drops);
1309 give_pages(rq, page);
1310 return NULL;
1311}
1312
1313static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
1314 struct net_device *dev,
1315 struct virtnet_rq_stats *stats)
1316{
1317 struct page *page;
1318 void *buf;
1319 int len;
1320
1321 while (num_buf-- > 1) {
1322 buf = virtnet_rq_get_buf(rq, len: &len, NULL);
1323 if (unlikely(!buf)) {
1324 pr_debug("%s: rx error: %d buffers missing\n",
1325 dev->name, num_buf);
1326 DEV_STATS_INC(dev, rx_length_errors);
1327 break;
1328 }
1329 u64_stats_add(p: &stats->bytes, val: len);
1330 page = virt_to_head_page(x: buf);
1331 put_page(page);
1332 }
1333}
1334
1335/* Why not use xdp_build_skb_from_frame() ?
1336 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
1337 * virtio-net there are 2 points that do not match its requirements:
1338 * 1. The size of the prefilled buffer is not fixed before xdp is set.
1339 * 2. xdp_build_skb_from_frame() does more checks that we don't need,
1340 * like eth_type_trans() (which virtio-net does in receive_buf()).
1341 */
1342static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
1343 struct virtnet_info *vi,
1344 struct xdp_buff *xdp,
1345 unsigned int xdp_frags_truesz)
1346{
1347 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
1348 unsigned int headroom, data_len;
1349 struct sk_buff *skb;
1350 int metasize;
1351 u8 nr_frags;
1352
1353 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
1354 pr_debug("Error building skb as missing reserved tailroom for xdp");
1355 return NULL;
1356 }
1357
1358 if (unlikely(xdp_buff_has_frags(xdp)))
1359 nr_frags = sinfo->nr_frags;
1360
1361 skb = build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz);
1362 if (unlikely(!skb))
1363 return NULL;
1364
1365 headroom = xdp->data - xdp->data_hard_start;
1366 data_len = xdp->data_end - xdp->data;
1367 skb_reserve(skb, len: headroom);
1368 __skb_put(skb, len: data_len);
1369
1370 metasize = xdp->data - xdp->data_meta;
1371 metasize = metasize > 0 ? metasize : 0;
1372 if (metasize)
1373 skb_metadata_set(skb, meta_len: metasize);
1374
1375 if (unlikely(xdp_buff_has_frags(xdp)))
1376 xdp_update_skb_shared_info(skb, nr_frags,
1377 size: sinfo->xdp_frags_size,
1378 truesize: xdp_frags_truesz,
1379 pfmemalloc: xdp_buff_is_frag_pfmemalloc(xdp));
1380
1381 return skb;
1382}
1383
1384/* TODO: build xdp in big mode */
1385static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
1386 struct virtnet_info *vi,
1387 struct receive_queue *rq,
1388 struct xdp_buff *xdp,
1389 void *buf,
1390 unsigned int len,
1391 unsigned int frame_sz,
1392 int *num_buf,
1393 unsigned int *xdp_frags_truesize,
1394 struct virtnet_rq_stats *stats)
1395{
1396 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
1397 unsigned int headroom, tailroom, room;
1398 unsigned int truesize, cur_frag_size;
1399 struct skb_shared_info *shinfo;
1400 unsigned int xdp_frags_truesz = 0;
1401 struct page *page;
1402 skb_frag_t *frag;
1403 int offset;
1404 void *ctx;
1405
1406 xdp_init_buff(xdp, frame_sz, rxq: &rq->xdp_rxq);
1407 xdp_prepare_buff(xdp, hard_start: buf - VIRTIO_XDP_HEADROOM,
1408 VIRTIO_XDP_HEADROOM + vi->hdr_len, data_len: len - vi->hdr_len, meta_valid: true);
1409
1410 if (!*num_buf)
1411 return 0;
1412
1413 if (*num_buf > 1) {
1414 /* If we want to build multi-buffer xdp, we need
1415 * to specify that the flags of xdp_buff have the
1416 * XDP_FLAGS_HAS_FRAG bit.
1417 */
1418 if (!xdp_buff_has_frags(xdp))
1419 xdp_buff_set_frags_flag(xdp);
1420
1421 shinfo = xdp_get_shared_info_from_buff(xdp);
1422 shinfo->nr_frags = 0;
1423 shinfo->xdp_frags_size = 0;
1424 }
1425
1426 if (*num_buf > MAX_SKB_FRAGS + 1)
1427 return -EINVAL;
1428
1429 while (--*num_buf > 0) {
1430 buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
1431 if (unlikely(!buf)) {
1432 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1433 dev->name, *num_buf,
1434 virtio16_to_cpu(vi->vdev, hdr->num_buffers));
1435 DEV_STATS_INC(dev, rx_length_errors);
1436 goto err;
1437 }
1438
1439 u64_stats_add(p: &stats->bytes, val: len);
1440 page = virt_to_head_page(x: buf);
1441 offset = buf - page_address(page);
1442
1443 truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
1444 headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
1445 tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1446 room = SKB_DATA_ALIGN(headroom + tailroom);
1447
1448 cur_frag_size = truesize;
1449 xdp_frags_truesz += cur_frag_size;
1450 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
1451 put_page(page);
1452 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
1453 dev->name, len, (unsigned long)(truesize - room));
1454 DEV_STATS_INC(dev, rx_length_errors);
1455 goto err;
1456 }
1457
1458 frag = &shinfo->frags[shinfo->nr_frags++];
1459 skb_frag_fill_page_desc(frag, page, off: offset, size: len);
1460 if (page_is_pfmemalloc(page))
1461 xdp_buff_set_frag_pfmemalloc(xdp);
1462
1463 shinfo->xdp_frags_size += len;
1464 }
1465
1466 *xdp_frags_truesize = xdp_frags_truesz;
1467 return 0;
1468
1469err:
1470 put_xdp_frags(xdp);
1471 return -EINVAL;
1472}
1473
1474static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
1475 struct receive_queue *rq,
1476 struct bpf_prog *xdp_prog,
1477 void *ctx,
1478 unsigned int *frame_sz,
1479 int *num_buf,
1480 struct page **page,
1481 int offset,
1482 unsigned int *len,
1483 struct virtio_net_hdr_mrg_rxbuf *hdr)
1484{
1485 unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
1486 unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
1487 struct page *xdp_page;
1488 unsigned int xdp_room;
1489
1490 /* Transient failure which in theory could occur if
1491 * in-flight packets from before XDP was enabled reach
1492 * the receive path after XDP is loaded.
1493 */
1494 if (unlikely(hdr->hdr.gso_type))
1495 return NULL;
1496
1497 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
1498 * with headroom may add hole in truesize, which
1499 * make their length exceed PAGE_SIZE. So we disabled the
1500 * hole mechanism for xdp. See add_recvbuf_mergeable().
1501 */
1502 *frame_sz = truesize;
1503
1504 if (likely(headroom >= virtnet_get_headroom(vi) &&
1505 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
1506 return page_address(*page) + offset;
1507 }
1508
1509 /* This happens when headroom is not enough because
1510 * of the buffer was prefilled before XDP is set.
1511 * This should only happen for the first several packets.
1512 * In fact, vq reset can be used here to help us clean up
1513 * the prefilled buffers, but many existing devices do not
1514 * support it, and we don't want to bother users who are
1515 * using xdp normally.
1516 */
1517 if (!xdp_prog->aux->xdp_has_frags) {
1518 /* linearize data for XDP */
1519 xdp_page = xdp_linearize_page(rq, num_buf,
1520 p: *page, offset,
1521 VIRTIO_XDP_HEADROOM,
1522 len);
1523 if (!xdp_page)
1524 return NULL;
1525 } else {
1526 xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
1527 sizeof(struct skb_shared_info));
1528 if (*len + xdp_room > PAGE_SIZE)
1529 return NULL;
1530
1531 xdp_page = alloc_page(GFP_ATOMIC);
1532 if (!xdp_page)
1533 return NULL;
1534
1535 memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
1536 page_address(*page) + offset, *len);
1537 }
1538
1539 *frame_sz = PAGE_SIZE;
1540
1541 put_page(page: *page);
1542
1543 *page = xdp_page;
1544
1545 return page_address(*page) + VIRTIO_XDP_HEADROOM;
1546}
1547
1548static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
1549 struct virtnet_info *vi,
1550 struct receive_queue *rq,
1551 struct bpf_prog *xdp_prog,
1552 void *buf,
1553 void *ctx,
1554 unsigned int len,
1555 unsigned int *xdp_xmit,
1556 struct virtnet_rq_stats *stats)
1557{
1558 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
1559 int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
1560 struct page *page = virt_to_head_page(x: buf);
1561 int offset = buf - page_address(page);
1562 unsigned int xdp_frags_truesz = 0;
1563 struct sk_buff *head_skb;
1564 unsigned int frame_sz;
1565 struct xdp_buff xdp;
1566 void *data;
1567 u32 act;
1568 int err;
1569
1570 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, frame_sz: &frame_sz, num_buf: &num_buf, page: &page,
1571 offset, len: &len, hdr);
1572 if (unlikely(!data))
1573 goto err_xdp;
1574
1575 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp: &xdp, buf: data, len, frame_sz,
1576 num_buf: &num_buf, xdp_frags_truesize: &xdp_frags_truesz, stats);
1577 if (unlikely(err))
1578 goto err_xdp;
1579
1580 act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats);
1581
1582 switch (act) {
1583 case XDP_PASS:
1584 head_skb = build_skb_from_xdp_buff(dev, vi, xdp: &xdp, xdp_frags_truesz);
1585 if (unlikely(!head_skb))
1586 break;
1587 return head_skb;
1588
1589 case XDP_TX:
1590 case XDP_REDIRECT:
1591 return NULL;
1592
1593 default:
1594 break;
1595 }
1596
1597 put_xdp_frags(xdp: &xdp);
1598
1599err_xdp:
1600 put_page(page);
1601 mergeable_buf_free(rq, num_buf, dev, stats);
1602
1603 u64_stats_inc(p: &stats->xdp_drops);
1604 u64_stats_inc(p: &stats->drops);
1605 return NULL;
1606}
1607
1608static struct sk_buff *receive_mergeable(struct net_device *dev,
1609 struct virtnet_info *vi,
1610 struct receive_queue *rq,
1611 void *buf,
1612 void *ctx,
1613 unsigned int len,
1614 unsigned int *xdp_xmit,
1615 struct virtnet_rq_stats *stats)
1616{
1617 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
1618 int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers);
1619 struct page *page = virt_to_head_page(x: buf);
1620 int offset = buf - page_address(page);
1621 struct sk_buff *head_skb, *curr_skb;
1622 unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
1623 unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
1624 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1625 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
1626
1627 head_skb = NULL;
1628 u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len);
1629
1630 if (unlikely(len > truesize - room)) {
1631 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
1632 dev->name, len, (unsigned long)(truesize - room));
1633 DEV_STATS_INC(dev, rx_length_errors);
1634 goto err_skb;
1635 }
1636
1637 if (unlikely(vi->xdp_enabled)) {
1638 struct bpf_prog *xdp_prog;
1639
1640 rcu_read_lock();
1641 xdp_prog = rcu_dereference(rq->xdp_prog);
1642 if (xdp_prog) {
1643 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
1644 len, xdp_xmit, stats);
1645 rcu_read_unlock();
1646 return head_skb;
1647 }
1648 rcu_read_unlock();
1649 }
1650
1651 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
1652 curr_skb = head_skb;
1653
1654 if (unlikely(!curr_skb))
1655 goto err_skb;
1656 while (--num_buf) {
1657 int num_skb_frags;
1658
1659 buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx);
1660 if (unlikely(!buf)) {
1661 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1662 dev->name, num_buf,
1663 virtio16_to_cpu(vi->vdev,
1664 hdr->num_buffers));
1665 DEV_STATS_INC(dev, rx_length_errors);
1666 goto err_buf;
1667 }
1668
1669 u64_stats_add(p: &stats->bytes, val: len);
1670 page = virt_to_head_page(x: buf);
1671
1672 truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx);
1673 headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx);
1674 tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1675 room = SKB_DATA_ALIGN(headroom + tailroom);
1676 if (unlikely(len > truesize - room)) {
1677 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
1678 dev->name, len, (unsigned long)(truesize - room));
1679 DEV_STATS_INC(dev, rx_length_errors);
1680 goto err_skb;
1681 }
1682
1683 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
1684 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
1685 struct sk_buff *nskb = alloc_skb(size: 0, GFP_ATOMIC);
1686
1687 if (unlikely(!nskb))
1688 goto err_skb;
1689 if (curr_skb == head_skb)
1690 skb_shinfo(curr_skb)->frag_list = nskb;
1691 else
1692 curr_skb->next = nskb;
1693 curr_skb = nskb;
1694 head_skb->truesize += nskb->truesize;
1695 num_skb_frags = 0;
1696 }
1697 if (curr_skb != head_skb) {
1698 head_skb->data_len += len;
1699 head_skb->len += len;
1700 head_skb->truesize += truesize;
1701 }
1702 offset = buf - page_address(page);
1703 if (skb_can_coalesce(skb: curr_skb, i: num_skb_frags, page, off: offset)) {
1704 put_page(page);
1705 skb_coalesce_rx_frag(skb: curr_skb, i: num_skb_frags - 1,
1706 size: len, truesize);
1707 } else {
1708 skb_add_rx_frag(skb: curr_skb, i: num_skb_frags, page,
1709 off: offset, size: len, truesize);
1710 }
1711 }
1712
1713 ewma_pkt_len_add(e: &rq->mrg_avg_pkt_len, val: head_skb->len);
1714 return head_skb;
1715
1716err_skb:
1717 put_page(page);
1718 mergeable_buf_free(rq, num_buf, dev, stats);
1719
1720err_buf:
1721 u64_stats_inc(p: &stats->drops);
1722 dev_kfree_skb(head_skb);
1723 return NULL;
1724}
1725
1726static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
1727 struct sk_buff *skb)
1728{
1729 enum pkt_hash_types rss_hash_type;
1730
1731 if (!hdr_hash || !skb)
1732 return;
1733
1734 switch (__le16_to_cpu(hdr_hash->hash_report)) {
1735 case VIRTIO_NET_HASH_REPORT_TCPv4:
1736 case VIRTIO_NET_HASH_REPORT_UDPv4:
1737 case VIRTIO_NET_HASH_REPORT_TCPv6:
1738 case VIRTIO_NET_HASH_REPORT_UDPv6:
1739 case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
1740 case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
1741 rss_hash_type = PKT_HASH_TYPE_L4;
1742 break;
1743 case VIRTIO_NET_HASH_REPORT_IPv4:
1744 case VIRTIO_NET_HASH_REPORT_IPv6:
1745 case VIRTIO_NET_HASH_REPORT_IPv6_EX:
1746 rss_hash_type = PKT_HASH_TYPE_L3;
1747 break;
1748 case VIRTIO_NET_HASH_REPORT_NONE:
1749 default:
1750 rss_hash_type = PKT_HASH_TYPE_NONE;
1751 }
1752 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), type: rss_hash_type);
1753}
1754
1755static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
1756 void *buf, unsigned int len, void **ctx,
1757 unsigned int *xdp_xmit,
1758 struct virtnet_rq_stats *stats)
1759{
1760 struct net_device *dev = vi->dev;
1761 struct sk_buff *skb;
1762 struct virtio_net_common_hdr *hdr;
1763
1764 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
1765 pr_debug("%s: short packet %i\n", dev->name, len);
1766 DEV_STATS_INC(dev, rx_length_errors);
1767 virtnet_rq_free_unused_buf(vq: rq->vq, buf);
1768 return;
1769 }
1770
1771 if (vi->mergeable_rx_bufs)
1772 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
1773 stats);
1774 else if (vi->big_packets)
1775 skb = receive_big(dev, vi, rq, buf, len, stats);
1776 else
1777 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
1778
1779 if (unlikely(!skb))
1780 return;
1781
1782 hdr = skb_vnet_common_hdr(skb);
1783 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
1784 virtio_skb_set_hash(hdr_hash: &hdr->hash_v1_hdr, skb);
1785
1786 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
1787 skb->ip_summed = CHECKSUM_UNNECESSARY;
1788
1789 if (virtio_net_hdr_to_skb(skb, hdr: &hdr->hdr,
1790 little_endian: virtio_is_little_endian(vdev: vi->vdev))) {
1791 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
1792 dev->name, hdr->hdr.gso_type,
1793 hdr->hdr.gso_size);
1794 goto frame_err;
1795 }
1796
1797 skb_record_rx_queue(skb, rx_queue: vq2rxq(vq: rq->vq));
1798 skb->protocol = eth_type_trans(skb, dev);
1799 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
1800 ntohs(skb->protocol), skb->len, skb->pkt_type);
1801
1802 napi_gro_receive(napi: &rq->napi, skb);
1803 return;
1804
1805frame_err:
1806 DEV_STATS_INC(dev, rx_frame_errors);
1807 dev_kfree_skb(skb);
1808}
1809
1810/* Unlike mergeable buffers, all buffers are allocated to the
1811 * same size, except for the headroom. For this reason we do
1812 * not need to use mergeable_len_to_ctx here - it is enough
1813 * to store the headroom as the context ignoring the truesize.
1814 */
1815static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
1816 gfp_t gfp)
1817{
1818 char *buf;
1819 unsigned int xdp_headroom = virtnet_get_headroom(vi);
1820 void *ctx = (void *)(unsigned long)xdp_headroom;
1821 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
1822 int err;
1823
1824 len = SKB_DATA_ALIGN(len) +
1825 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1826
1827 buf = virtnet_rq_alloc(rq, size: len, gfp);
1828 if (unlikely(!buf))
1829 return -ENOMEM;
1830
1831 virtnet_rq_init_one_sg(rq, buf: buf + VIRTNET_RX_PAD + xdp_headroom,
1832 len: vi->hdr_len + GOOD_PACKET_LEN);
1833
1834 err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp);
1835 if (err < 0) {
1836 if (rq->do_dma)
1837 virtnet_rq_unmap(rq, buf, len: 0);
1838 put_page(page: virt_to_head_page(x: buf));
1839 }
1840
1841 return err;
1842}
1843
1844static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
1845 gfp_t gfp)
1846{
1847 struct page *first, *list = NULL;
1848 char *p;
1849 int i, err, offset;
1850
1851 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
1852
1853 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
1854 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
1855 first = get_a_page(rq, gfp_mask: gfp);
1856 if (!first) {
1857 if (list)
1858 give_pages(rq, page: list);
1859 return -ENOMEM;
1860 }
1861 sg_set_buf(sg: &rq->sg[i], page_address(first), PAGE_SIZE);
1862
1863 /* chain new page in list head to match sg */
1864 first->private = (unsigned long)list;
1865 list = first;
1866 }
1867
1868 first = get_a_page(rq, gfp_mask: gfp);
1869 if (!first) {
1870 give_pages(rq, page: list);
1871 return -ENOMEM;
1872 }
1873 p = page_address(first);
1874
1875 /* rq->sg[0], rq->sg[1] share the same page */
1876 /* a separated rq->sg[0] for header - required in case !any_header_sg */
1877 sg_set_buf(sg: &rq->sg[0], buf: p, buflen: vi->hdr_len);
1878
1879 /* rq->sg[1] for data packet, from offset */
1880 offset = sizeof(struct padded_vnet_hdr);
1881 sg_set_buf(sg: &rq->sg[1], buf: p + offset, PAGE_SIZE - offset);
1882
1883 /* chain first in list head */
1884 first->private = (unsigned long)list;
1885 err = virtqueue_add_inbuf(vq: rq->vq, sg: rq->sg, num: vi->big_packets_num_skbfrags + 2,
1886 data: first, gfp);
1887 if (err < 0)
1888 give_pages(rq, page: first);
1889
1890 return err;
1891}
1892
1893static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
1894 struct ewma_pkt_len *avg_pkt_len,
1895 unsigned int room)
1896{
1897 struct virtnet_info *vi = rq->vq->vdev->priv;
1898 const size_t hdr_len = vi->hdr_len;
1899 unsigned int len;
1900
1901 if (room)
1902 return PAGE_SIZE - room;
1903
1904 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
1905 rq->min_buf_len, PAGE_SIZE - hdr_len);
1906
1907 return ALIGN(len, L1_CACHE_BYTES);
1908}
1909
1910static int add_recvbuf_mergeable(struct virtnet_info *vi,
1911 struct receive_queue *rq, gfp_t gfp)
1912{
1913 struct page_frag *alloc_frag = &rq->alloc_frag;
1914 unsigned int headroom = virtnet_get_headroom(vi);
1915 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
1916 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
1917 unsigned int len, hole;
1918 void *ctx;
1919 char *buf;
1920 int err;
1921
1922 /* Extra tailroom is needed to satisfy XDP's assumption. This
1923 * means rx frags coalescing won't work, but consider we've
1924 * disabled GSO for XDP, it won't be a big issue.
1925 */
1926 len = get_mergeable_buf_len(rq, avg_pkt_len: &rq->mrg_avg_pkt_len, room);
1927
1928 buf = virtnet_rq_alloc(rq, size: len + room, gfp);
1929 if (unlikely(!buf))
1930 return -ENOMEM;
1931
1932 buf += headroom; /* advance address leaving hole at front of pkt */
1933 hole = alloc_frag->size - alloc_frag->offset;
1934 if (hole < len + room) {
1935 /* To avoid internal fragmentation, if there is very likely not
1936 * enough space for another buffer, add the remaining space to
1937 * the current buffer.
1938 * XDP core assumes that frame_size of xdp_buff and the length
1939 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
1940 */
1941 if (!headroom)
1942 len += hole;
1943 alloc_frag->offset += hole;
1944 }
1945
1946 virtnet_rq_init_one_sg(rq, buf, len);
1947
1948 ctx = mergeable_len_to_ctx(truesize: len + room, headroom);
1949 err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp);
1950 if (err < 0) {
1951 if (rq->do_dma)
1952 virtnet_rq_unmap(rq, buf, len: 0);
1953 put_page(page: virt_to_head_page(x: buf));
1954 }
1955
1956 return err;
1957}
1958
1959/*
1960 * Returns false if we couldn't fill entirely (OOM).
1961 *
1962 * Normally run in the receive path, but can also be run from ndo_open
1963 * before we're receiving packets, or from refill_work which is
1964 * careful to disable receiving (using napi_disable).
1965 */
1966static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
1967 gfp_t gfp)
1968{
1969 int err;
1970 bool oom;
1971
1972 do {
1973 if (vi->mergeable_rx_bufs)
1974 err = add_recvbuf_mergeable(vi, rq, gfp);
1975 else if (vi->big_packets)
1976 err = add_recvbuf_big(vi, rq, gfp);
1977 else
1978 err = add_recvbuf_small(vi, rq, gfp);
1979
1980 oom = err == -ENOMEM;
1981 if (err)
1982 break;
1983 } while (rq->vq->num_free);
1984 if (virtqueue_kick_prepare(vq: rq->vq) && virtqueue_notify(vq: rq->vq)) {
1985 unsigned long flags;
1986
1987 flags = u64_stats_update_begin_irqsave(syncp: &rq->stats.syncp);
1988 u64_stats_inc(p: &rq->stats.kicks);
1989 u64_stats_update_end_irqrestore(syncp: &rq->stats.syncp, flags);
1990 }
1991
1992 return !oom;
1993}
1994
1995static void skb_recv_done(struct virtqueue *rvq)
1996{
1997 struct virtnet_info *vi = rvq->vdev->priv;
1998 struct receive_queue *rq = &vi->rq[vq2rxq(vq: rvq)];
1999
2000 virtqueue_napi_schedule(napi: &rq->napi, vq: rvq);
2001}
2002
2003static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
2004{
2005 napi_enable(n: napi);
2006
2007 /* If all buffers were filled by other side before we napi_enabled, we
2008 * won't get another interrupt, so process any outstanding packets now.
2009 * Call local_bh_enable after to trigger softIRQ processing.
2010 */
2011 local_bh_disable();
2012 virtqueue_napi_schedule(napi, vq);
2013 local_bh_enable();
2014}
2015
2016static void virtnet_napi_tx_enable(struct virtnet_info *vi,
2017 struct virtqueue *vq,
2018 struct napi_struct *napi)
2019{
2020 if (!napi->weight)
2021 return;
2022
2023 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
2024 * enable the feature if this is likely affine with the transmit path.
2025 */
2026 if (!vi->affinity_hint_set) {
2027 napi->weight = 0;
2028 return;
2029 }
2030
2031 return virtnet_napi_enable(vq, napi);
2032}
2033
2034static void virtnet_napi_tx_disable(struct napi_struct *napi)
2035{
2036 if (napi->weight)
2037 napi_disable(n: napi);
2038}
2039
2040static void refill_work(struct work_struct *work)
2041{
2042 struct virtnet_info *vi =
2043 container_of(work, struct virtnet_info, refill.work);
2044 bool still_empty;
2045 int i;
2046
2047 for (i = 0; i < vi->curr_queue_pairs; i++) {
2048 struct receive_queue *rq = &vi->rq[i];
2049
2050 napi_disable(n: &rq->napi);
2051 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
2052 virtnet_napi_enable(vq: rq->vq, napi: &rq->napi);
2053
2054 /* In theory, this can happen: if we don't get any buffers in
2055 * we will *never* try to fill again.
2056 */
2057 if (still_empty)
2058 schedule_delayed_work(dwork: &vi->refill, HZ/2);
2059 }
2060}
2061
2062static int virtnet_receive(struct receive_queue *rq, int budget,
2063 unsigned int *xdp_xmit)
2064{
2065 struct virtnet_info *vi = rq->vq->vdev->priv;
2066 struct virtnet_rq_stats stats = {};
2067 unsigned int len;
2068 int packets = 0;
2069 void *buf;
2070 int i;
2071
2072 if (!vi->big_packets || vi->mergeable_rx_bufs) {
2073 void *ctx;
2074
2075 while (packets < budget &&
2076 (buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx))) {
2077 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats: &stats);
2078 packets++;
2079 }
2080 } else {
2081 while (packets < budget &&
2082 (buf = virtnet_rq_get_buf(rq, len: &len, NULL)) != NULL) {
2083 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats: &stats);
2084 packets++;
2085 }
2086 }
2087
2088 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
2089 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
2090 spin_lock(lock: &vi->refill_lock);
2091 if (vi->refill_enabled)
2092 schedule_delayed_work(dwork: &vi->refill, delay: 0);
2093 spin_unlock(lock: &vi->refill_lock);
2094 }
2095 }
2096
2097 u64_stats_set(p: &stats.packets, val: packets);
2098 u64_stats_update_begin(syncp: &rq->stats.syncp);
2099 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
2100 size_t offset = virtnet_rq_stats_desc[i].offset;
2101 u64_stats_t *item, *src;
2102
2103 item = (u64_stats_t *)((u8 *)&rq->stats + offset);
2104 src = (u64_stats_t *)((u8 *)&stats + offset);
2105 u64_stats_add(p: item, val: u64_stats_read(p: src));
2106 }
2107 u64_stats_update_end(syncp: &rq->stats.syncp);
2108
2109 return packets;
2110}
2111
2112static void virtnet_poll_cleantx(struct receive_queue *rq)
2113{
2114 struct virtnet_info *vi = rq->vq->vdev->priv;
2115 unsigned int index = vq2rxq(vq: rq->vq);
2116 struct send_queue *sq = &vi->sq[index];
2117 struct netdev_queue *txq = netdev_get_tx_queue(dev: vi->dev, index);
2118
2119 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, q: index))
2120 return;
2121
2122 if (__netif_tx_trylock(txq)) {
2123 if (sq->reset) {
2124 __netif_tx_unlock(txq);
2125 return;
2126 }
2127
2128 do {
2129 virtqueue_disable_cb(vq: sq->vq);
2130 free_old_xmit_skbs(sq, in_napi: true);
2131 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
2132
2133 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
2134 netif_tx_wake_queue(dev_queue: txq);
2135
2136 __netif_tx_unlock(txq);
2137 }
2138}
2139
2140static int virtnet_poll(struct napi_struct *napi, int budget)
2141{
2142 struct receive_queue *rq =
2143 container_of(napi, struct receive_queue, napi);
2144 struct virtnet_info *vi = rq->vq->vdev->priv;
2145 struct send_queue *sq;
2146 unsigned int received;
2147 unsigned int xdp_xmit = 0;
2148
2149 virtnet_poll_cleantx(rq);
2150
2151 received = virtnet_receive(rq, budget, xdp_xmit: &xdp_xmit);
2152
2153 if (xdp_xmit & VIRTIO_XDP_REDIR)
2154 xdp_do_flush();
2155
2156 /* Out of packets? */
2157 if (received < budget)
2158 virtqueue_napi_complete(napi, vq: rq->vq, processed: received);
2159
2160 if (xdp_xmit & VIRTIO_XDP_TX) {
2161 sq = virtnet_xdp_get_sq(vi);
2162 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
2163 u64_stats_update_begin(syncp: &sq->stats.syncp);
2164 u64_stats_inc(p: &sq->stats.kicks);
2165 u64_stats_update_end(syncp: &sq->stats.syncp);
2166 }
2167 virtnet_xdp_put_sq(vi, sq);
2168 }
2169
2170 return received;
2171}
2172
2173static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
2174{
2175 virtnet_napi_tx_disable(napi: &vi->sq[qp_index].napi);
2176 napi_disable(n: &vi->rq[qp_index].napi);
2177 xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
2178}
2179
2180static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
2181{
2182 struct net_device *dev = vi->dev;
2183 int err;
2184
2185 err = xdp_rxq_info_reg(xdp_rxq: &vi->rq[qp_index].xdp_rxq, dev, queue_index: qp_index,
2186 napi_id: vi->rq[qp_index].napi.napi_id);
2187 if (err < 0)
2188 return err;
2189
2190 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &vi->rq[qp_index].xdp_rxq,
2191 type: MEM_TYPE_PAGE_SHARED, NULL);
2192 if (err < 0)
2193 goto err_xdp_reg_mem_model;
2194
2195 virtnet_napi_enable(vq: vi->rq[qp_index].vq, napi: &vi->rq[qp_index].napi);
2196 virtnet_napi_tx_enable(vi, vq: vi->sq[qp_index].vq, napi: &vi->sq[qp_index].napi);
2197
2198 return 0;
2199
2200err_xdp_reg_mem_model:
2201 xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq);
2202 return err;
2203}
2204
2205static int virtnet_open(struct net_device *dev)
2206{
2207 struct virtnet_info *vi = netdev_priv(dev);
2208 int i, err;
2209
2210 enable_delayed_refill(vi);
2211
2212 for (i = 0; i < vi->max_queue_pairs; i++) {
2213 if (i < vi->curr_queue_pairs)
2214 /* Make sure we have some buffers: if oom use wq. */
2215 if (!try_fill_recv(vi, rq: &vi->rq[i], GFP_KERNEL))
2216 schedule_delayed_work(dwork: &vi->refill, delay: 0);
2217
2218 err = virtnet_enable_queue_pair(vi, qp_index: i);
2219 if (err < 0)
2220 goto err_enable_qp;
2221 }
2222
2223 return 0;
2224
2225err_enable_qp:
2226 disable_delayed_refill(vi);
2227 cancel_delayed_work_sync(dwork: &vi->refill);
2228
2229 for (i--; i >= 0; i--)
2230 virtnet_disable_queue_pair(vi, qp_index: i);
2231 return err;
2232}
2233
2234static int virtnet_poll_tx(struct napi_struct *napi, int budget)
2235{
2236 struct send_queue *sq = container_of(napi, struct send_queue, napi);
2237 struct virtnet_info *vi = sq->vq->vdev->priv;
2238 unsigned int index = vq2txq(vq: sq->vq);
2239 struct netdev_queue *txq;
2240 int opaque;
2241 bool done;
2242
2243 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
2244 /* We don't need to enable cb for XDP */
2245 napi_complete_done(n: napi, work_done: 0);
2246 return 0;
2247 }
2248
2249 txq = netdev_get_tx_queue(dev: vi->dev, index);
2250 __netif_tx_lock(txq, raw_smp_processor_id());
2251 virtqueue_disable_cb(vq: sq->vq);
2252 free_old_xmit_skbs(sq, in_napi: true);
2253
2254 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
2255 netif_tx_wake_queue(dev_queue: txq);
2256
2257 opaque = virtqueue_enable_cb_prepare(vq: sq->vq);
2258
2259 done = napi_complete_done(n: napi, work_done: 0);
2260
2261 if (!done)
2262 virtqueue_disable_cb(vq: sq->vq);
2263
2264 __netif_tx_unlock(txq);
2265
2266 if (done) {
2267 if (unlikely(virtqueue_poll(sq->vq, opaque))) {
2268 if (napi_schedule_prep(n: napi)) {
2269 __netif_tx_lock(txq, raw_smp_processor_id());
2270 virtqueue_disable_cb(vq: sq->vq);
2271 __netif_tx_unlock(txq);
2272 __napi_schedule(n: napi);
2273 }
2274 }
2275 }
2276
2277 return 0;
2278}
2279
2280static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
2281{
2282 struct virtio_net_hdr_mrg_rxbuf *hdr;
2283 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
2284 struct virtnet_info *vi = sq->vq->vdev->priv;
2285 int num_sg;
2286 unsigned hdr_len = vi->hdr_len;
2287 bool can_push;
2288
2289 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
2290
2291 can_push = vi->any_header_sg &&
2292 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
2293 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
2294 /* Even if we can, don't push here yet as this would skew
2295 * csum_start offset below. */
2296 if (can_push)
2297 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
2298 else
2299 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;
2300
2301 if (virtio_net_hdr_from_skb(skb, hdr: &hdr->hdr,
2302 little_endian: virtio_is_little_endian(vdev: vi->vdev), has_data_valid: false,
2303 vlan_hlen: 0))
2304 return -EPROTO;
2305
2306 if (vi->mergeable_rx_bufs)
2307 hdr->num_buffers = 0;
2308
2309 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
2310 if (can_push) {
2311 __skb_push(skb, len: hdr_len);
2312 num_sg = skb_to_sgvec(skb, sg: sq->sg, offset: 0, len: skb->len);
2313 if (unlikely(num_sg < 0))
2314 return num_sg;
2315 /* Pull header back to avoid skew in tx bytes calculations. */
2316 __skb_pull(skb, len: hdr_len);
2317 } else {
2318 sg_set_buf(sg: sq->sg, buf: hdr, buflen: hdr_len);
2319 num_sg = skb_to_sgvec(skb, sg: sq->sg + 1, offset: 0, len: skb->len);
2320 if (unlikely(num_sg < 0))
2321 return num_sg;
2322 num_sg++;
2323 }
2324 return virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: num_sg, data: skb, GFP_ATOMIC);
2325}
2326
2327static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
2328{
2329 struct virtnet_info *vi = netdev_priv(dev);
2330 int qnum = skb_get_queue_mapping(skb);
2331 struct send_queue *sq = &vi->sq[qnum];
2332 int err;
2333 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum);
2334 bool kick = !netdev_xmit_more();
2335 bool use_napi = sq->napi.weight;
2336
2337 /* Free up any pending old buffers before queueing new ones. */
2338 do {
2339 if (use_napi)
2340 virtqueue_disable_cb(vq: sq->vq);
2341
2342 free_old_xmit_skbs(sq, in_napi: false);
2343
2344 } while (use_napi && kick &&
2345 unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
2346
2347 /* timestamp packet in software */
2348 skb_tx_timestamp(skb);
2349
2350 /* Try to transmit */
2351 err = xmit_skb(sq, skb);
2352
2353 /* This should not happen! */
2354 if (unlikely(err)) {
2355 DEV_STATS_INC(dev, tx_fifo_errors);
2356 if (net_ratelimit())
2357 dev_warn(&dev->dev,
2358 "Unexpected TXQ (%d) queue failure: %d\n",
2359 qnum, err);
2360 DEV_STATS_INC(dev, tx_dropped);
2361 dev_kfree_skb_any(skb);
2362 return NETDEV_TX_OK;
2363 }
2364
2365 /* Don't wait up for transmitted skbs to be freed. */
2366 if (!use_napi) {
2367 skb_orphan(skb);
2368 nf_reset_ct(skb);
2369 }
2370
2371 check_sq_full_and_disable(vi, dev, sq);
2372
2373 if (kick || netif_xmit_stopped(dev_queue: txq)) {
2374 if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) {
2375 u64_stats_update_begin(syncp: &sq->stats.syncp);
2376 u64_stats_inc(p: &sq->stats.kicks);
2377 u64_stats_update_end(syncp: &sq->stats.syncp);
2378 }
2379 }
2380
2381 return NETDEV_TX_OK;
2382}
2383
2384static int virtnet_rx_resize(struct virtnet_info *vi,
2385 struct receive_queue *rq, u32 ring_num)
2386{
2387 bool running = netif_running(dev: vi->dev);
2388 int err, qindex;
2389
2390 qindex = rq - vi->rq;
2391
2392 if (running)
2393 napi_disable(n: &rq->napi);
2394
2395 err = virtqueue_resize(vq: rq->vq, num: ring_num, recycle: virtnet_rq_free_unused_buf);
2396 if (err)
2397 netdev_err(dev: vi->dev, format: "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
2398
2399 if (!try_fill_recv(vi, rq, GFP_KERNEL))
2400 schedule_delayed_work(dwork: &vi->refill, delay: 0);
2401
2402 if (running)
2403 virtnet_napi_enable(vq: rq->vq, napi: &rq->napi);
2404 return err;
2405}
2406
2407static int virtnet_tx_resize(struct virtnet_info *vi,
2408 struct send_queue *sq, u32 ring_num)
2409{
2410 bool running = netif_running(dev: vi->dev);
2411 struct netdev_queue *txq;
2412 int err, qindex;
2413
2414 qindex = sq - vi->sq;
2415
2416 if (running)
2417 virtnet_napi_tx_disable(napi: &sq->napi);
2418
2419 txq = netdev_get_tx_queue(dev: vi->dev, index: qindex);
2420
2421 /* 1. wait all ximt complete
2422 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
2423 */
2424 __netif_tx_lock_bh(txq);
2425
2426 /* Prevent rx poll from accessing sq. */
2427 sq->reset = true;
2428
2429 /* Prevent the upper layer from trying to send packets. */
2430 netif_stop_subqueue(dev: vi->dev, queue_index: qindex);
2431
2432 __netif_tx_unlock_bh(txq);
2433
2434 err = virtqueue_resize(vq: sq->vq, num: ring_num, recycle: virtnet_sq_free_unused_buf);
2435 if (err)
2436 netdev_err(dev: vi->dev, format: "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
2437
2438 __netif_tx_lock_bh(txq);
2439 sq->reset = false;
2440 netif_tx_wake_queue(dev_queue: txq);
2441 __netif_tx_unlock_bh(txq);
2442
2443 if (running)
2444 virtnet_napi_tx_enable(vi, vq: sq->vq, napi: &sq->napi);
2445 return err;
2446}
2447
2448/*
2449 * Send command via the control virtqueue and check status. Commands
2450 * supported by the hypervisor, as indicated by feature bits, should
2451 * never fail unless improperly formatted.
2452 */
2453static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
2454 struct scatterlist *out)
2455{
2456 struct scatterlist *sgs[4], hdr, stat;
2457 unsigned out_num = 0, tmp;
2458 int ret;
2459
2460 /* Caller should know better */
2461 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
2462
2463 vi->ctrl->status = ~0;
2464 vi->ctrl->hdr.class = class;
2465 vi->ctrl->hdr.cmd = cmd;
2466 /* Add header */
2467 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
2468 sgs[out_num++] = &hdr;
2469
2470 if (out)
2471 sgs[out_num++] = out;
2472
2473 /* Add return status. */
2474 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
2475 sgs[out_num] = &stat;
2476
2477 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
2478 ret = virtqueue_add_sgs(vq: vi->cvq, sgs, out_sgs: out_num, in_sgs: 1, data: vi, GFP_ATOMIC);
2479 if (ret < 0) {
2480 dev_warn(&vi->vdev->dev,
2481 "Failed to add sgs for command vq: %d\n.", ret);
2482 return false;
2483 }
2484
2485 if (unlikely(!virtqueue_kick(vi->cvq)))
2486 return vi->ctrl->status == VIRTIO_NET_OK;
2487
2488 /* Spin for a response, the kick causes an ioport write, trapping
2489 * into the hypervisor, so the request should be handled immediately.
2490 */
2491 while (!virtqueue_get_buf(vq: vi->cvq, len: &tmp) &&
2492 !virtqueue_is_broken(vq: vi->cvq))
2493 cpu_relax();
2494
2495 return vi->ctrl->status == VIRTIO_NET_OK;
2496}
2497
2498static int virtnet_set_mac_address(struct net_device *dev, void *p)
2499{
2500 struct virtnet_info *vi = netdev_priv(dev);
2501 struct virtio_device *vdev = vi->vdev;
2502 int ret;
2503 struct sockaddr *addr;
2504 struct scatterlist sg;
2505
2506 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
2507 return -EOPNOTSUPP;
2508
2509 addr = kmemdup(p, size: sizeof(*addr), GFP_KERNEL);
2510 if (!addr)
2511 return -ENOMEM;
2512
2513 ret = eth_prepare_mac_addr_change(dev, p: addr);
2514 if (ret)
2515 goto out;
2516
2517 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
2518 sg_init_one(&sg, addr->sa_data, dev->addr_len);
2519 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
2520 VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
2521 dev_warn(&vdev->dev,
2522 "Failed to set mac address by vq command.\n");
2523 ret = -EINVAL;
2524 goto out;
2525 }
2526 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
2527 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2528 unsigned int i;
2529
2530 /* Naturally, this has an atomicity problem. */
2531 for (i = 0; i < dev->addr_len; i++)
2532 virtio_cwrite8(vdev,
2533 offsetof(struct virtio_net_config, mac) +
2534 i, val: addr->sa_data[i]);
2535 }
2536
2537 eth_commit_mac_addr_change(dev, p);
2538 ret = 0;
2539
2540out:
2541 kfree(objp: addr);
2542 return ret;
2543}
2544
2545static void virtnet_stats(struct net_device *dev,
2546 struct rtnl_link_stats64 *tot)
2547{
2548 struct virtnet_info *vi = netdev_priv(dev);
2549 unsigned int start;
2550 int i;
2551
2552 for (i = 0; i < vi->max_queue_pairs; i++) {
2553 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
2554 struct receive_queue *rq = &vi->rq[i];
2555 struct send_queue *sq = &vi->sq[i];
2556
2557 do {
2558 start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
2559 tpackets = u64_stats_read(p: &sq->stats.packets);
2560 tbytes = u64_stats_read(p: &sq->stats.bytes);
2561 terrors = u64_stats_read(p: &sq->stats.tx_timeouts);
2562 } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
2563
2564 do {
2565 start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
2566 rpackets = u64_stats_read(p: &rq->stats.packets);
2567 rbytes = u64_stats_read(p: &rq->stats.bytes);
2568 rdrops = u64_stats_read(p: &rq->stats.drops);
2569 } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
2570
2571 tot->rx_packets += rpackets;
2572 tot->tx_packets += tpackets;
2573 tot->rx_bytes += rbytes;
2574 tot->tx_bytes += tbytes;
2575 tot->rx_dropped += rdrops;
2576 tot->tx_errors += terrors;
2577 }
2578
2579 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
2580 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
2581 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
2582 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
2583}
2584
2585static void virtnet_ack_link_announce(struct virtnet_info *vi)
2586{
2587 rtnl_lock();
2588 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
2589 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
2590 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
2591 rtnl_unlock();
2592}
2593
2594static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
2595{
2596 struct scatterlist sg;
2597 struct net_device *dev = vi->dev;
2598
2599 if (!vi->has_cvq || !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_MQ))
2600 return 0;
2601
2602 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vdev: vi->vdev, val: queue_pairs);
2603 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
2604
2605 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
2606 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, out: &sg)) {
2607 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
2608 queue_pairs);
2609 return -EINVAL;
2610 } else {
2611 vi->curr_queue_pairs = queue_pairs;
2612 /* virtnet_open() will refill when device is going to up. */
2613 if (dev->flags & IFF_UP)
2614 schedule_delayed_work(dwork: &vi->refill, delay: 0);
2615 }
2616
2617 return 0;
2618}
2619
2620static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
2621{
2622 int err;
2623
2624 rtnl_lock();
2625 err = _virtnet_set_queues(vi, queue_pairs);
2626 rtnl_unlock();
2627 return err;
2628}
2629
2630static int virtnet_close(struct net_device *dev)
2631{
2632 struct virtnet_info *vi = netdev_priv(dev);
2633 int i;
2634
2635 /* Make sure NAPI doesn't schedule refill work */
2636 disable_delayed_refill(vi);
2637 /* Make sure refill_work doesn't re-enable napi! */
2638 cancel_delayed_work_sync(dwork: &vi->refill);
2639
2640 for (i = 0; i < vi->max_queue_pairs; i++)
2641 virtnet_disable_queue_pair(vi, qp_index: i);
2642
2643 return 0;
2644}
2645
2646static void virtnet_set_rx_mode(struct net_device *dev)
2647{
2648 struct virtnet_info *vi = netdev_priv(dev);
2649 struct scatterlist sg[2];
2650 struct virtio_net_ctrl_mac *mac_data;
2651 struct netdev_hw_addr *ha;
2652 int uc_count;
2653 int mc_count;
2654 void *buf;
2655 int i;
2656
2657 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
2658 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_RX))
2659 return;
2660
2661 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
2662 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
2663
2664 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
2665
2666 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
2667 VIRTIO_NET_CTRL_RX_PROMISC, out: sg))
2668 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
2669 vi->ctrl->promisc ? "en" : "dis");
2670
2671 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
2672
2673 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
2674 VIRTIO_NET_CTRL_RX_ALLMULTI, out: sg))
2675 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
2676 vi->ctrl->allmulti ? "en" : "dis");
2677
2678 uc_count = netdev_uc_count(dev);
2679 mc_count = netdev_mc_count(dev);
2680 /* MAC filter - use one buffer for both lists */
2681 buf = kzalloc(size: ((uc_count + mc_count) * ETH_ALEN) +
2682 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
2683 mac_data = buf;
2684 if (!buf)
2685 return;
2686
2687 sg_init_table(sg, 2);
2688
2689 /* Store the unicast list and count in the front of the buffer */
2690 mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: uc_count);
2691 i = 0;
2692 netdev_for_each_uc_addr(ha, dev)
2693 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
2694
2695 sg_set_buf(sg: &sg[0], buf: mac_data,
2696 buflen: sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
2697
2698 /* multicast list and count fill the end */
2699 mac_data = (void *)&mac_data->macs[uc_count][0];
2700
2701 mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: mc_count);
2702 i = 0;
2703 netdev_for_each_mc_addr(ha, dev)
2704 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
2705
2706 sg_set_buf(sg: &sg[1], buf: mac_data,
2707 buflen: sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
2708
2709 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
2710 VIRTIO_NET_CTRL_MAC_TABLE_SET, out: sg))
2711 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
2712
2713 kfree(objp: buf);
2714}
2715
2716static int virtnet_vlan_rx_add_vid(struct net_device *dev,
2717 __be16 proto, u16 vid)
2718{
2719 struct virtnet_info *vi = netdev_priv(dev);
2720 struct scatterlist sg;
2721
2722 vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
2723 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
2724
2725 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
2726 VIRTIO_NET_CTRL_VLAN_ADD, out: &sg))
2727 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
2728 return 0;
2729}
2730
2731static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
2732 __be16 proto, u16 vid)
2733{
2734 struct virtnet_info *vi = netdev_priv(dev);
2735 struct scatterlist sg;
2736
2737 vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid);
2738 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
2739
2740 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
2741 VIRTIO_NET_CTRL_VLAN_DEL, out: &sg))
2742 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
2743 return 0;
2744}
2745
2746static void virtnet_clean_affinity(struct virtnet_info *vi)
2747{
2748 int i;
2749
2750 if (vi->affinity_hint_set) {
2751 for (i = 0; i < vi->max_queue_pairs; i++) {
2752 virtqueue_set_affinity(vq: vi->rq[i].vq, NULL);
2753 virtqueue_set_affinity(vq: vi->sq[i].vq, NULL);
2754 }
2755
2756 vi->affinity_hint_set = false;
2757 }
2758}
2759
2760static void virtnet_set_affinity(struct virtnet_info *vi)
2761{
2762 cpumask_var_t mask;
2763 int stragglers;
2764 int group_size;
2765 int i, j, cpu;
2766 int num_cpu;
2767 int stride;
2768
2769 if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) {
2770 virtnet_clean_affinity(vi);
2771 return;
2772 }
2773
2774 num_cpu = num_online_cpus();
2775 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
2776 stragglers = num_cpu >= vi->curr_queue_pairs ?
2777 num_cpu % vi->curr_queue_pairs :
2778 0;
2779 cpu = cpumask_first(cpu_online_mask);
2780
2781 for (i = 0; i < vi->curr_queue_pairs; i++) {
2782 group_size = stride + (i < stragglers ? 1 : 0);
2783
2784 for (j = 0; j < group_size; j++) {
2785 cpumask_set_cpu(cpu, dstp: mask);
2786 cpu = cpumask_next_wrap(n: cpu, cpu_online_mask,
2787 start: nr_cpu_ids, wrap: false);
2788 }
2789 virtqueue_set_affinity(vq: vi->rq[i].vq, cpu_mask: mask);
2790 virtqueue_set_affinity(vq: vi->sq[i].vq, cpu_mask: mask);
2791 __netif_set_xps_queue(dev: vi->dev, cpumask_bits(mask), index: i, type: XPS_CPUS);
2792 cpumask_clear(dstp: mask);
2793 }
2794
2795 vi->affinity_hint_set = true;
2796 free_cpumask_var(mask);
2797}
2798
2799static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
2800{
2801 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
2802 node);
2803 virtnet_set_affinity(vi);
2804 return 0;
2805}
2806
2807static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
2808{
2809 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
2810 node_dead);
2811 virtnet_set_affinity(vi);
2812 return 0;
2813}
2814
2815static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
2816{
2817 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
2818 node);
2819
2820 virtnet_clean_affinity(vi);
2821 return 0;
2822}
2823
2824static enum cpuhp_state virtionet_online;
2825
2826static int virtnet_cpu_notif_add(struct virtnet_info *vi)
2827{
2828 int ret;
2829
2830 ret = cpuhp_state_add_instance_nocalls(state: virtionet_online, node: &vi->node);
2831 if (ret)
2832 return ret;
2833 ret = cpuhp_state_add_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
2834 node: &vi->node_dead);
2835 if (!ret)
2836 return ret;
2837 cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
2838 return ret;
2839}
2840
2841static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
2842{
2843 cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node);
2844 cpuhp_state_remove_instance_nocalls(state: CPUHP_VIRT_NET_DEAD,
2845 node: &vi->node_dead);
2846}
2847
2848static void virtnet_get_ringparam(struct net_device *dev,
2849 struct ethtool_ringparam *ring,
2850 struct kernel_ethtool_ringparam *kernel_ring,
2851 struct netlink_ext_ack *extack)
2852{
2853 struct virtnet_info *vi = netdev_priv(dev);
2854
2855 ring->rx_max_pending = vi->rq[0].vq->num_max;
2856 ring->tx_max_pending = vi->sq[0].vq->num_max;
2857 ring->rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq);
2858 ring->tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq);
2859}
2860
2861static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
2862 u16 vqn, u32 max_usecs, u32 max_packets);
2863
2864static int virtnet_set_ringparam(struct net_device *dev,
2865 struct ethtool_ringparam *ring,
2866 struct kernel_ethtool_ringparam *kernel_ring,
2867 struct netlink_ext_ack *extack)
2868{
2869 struct virtnet_info *vi = netdev_priv(dev);
2870 u32 rx_pending, tx_pending;
2871 struct receive_queue *rq;
2872 struct send_queue *sq;
2873 int i, err;
2874
2875 if (ring->rx_mini_pending || ring->rx_jumbo_pending)
2876 return -EINVAL;
2877
2878 rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq);
2879 tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq);
2880
2881 if (ring->rx_pending == rx_pending &&
2882 ring->tx_pending == tx_pending)
2883 return 0;
2884
2885 if (ring->rx_pending > vi->rq[0].vq->num_max)
2886 return -EINVAL;
2887
2888 if (ring->tx_pending > vi->sq[0].vq->num_max)
2889 return -EINVAL;
2890
2891 for (i = 0; i < vi->max_queue_pairs; i++) {
2892 rq = vi->rq + i;
2893 sq = vi->sq + i;
2894
2895 if (ring->tx_pending != tx_pending) {
2896 err = virtnet_tx_resize(vi, sq, ring_num: ring->tx_pending);
2897 if (err)
2898 return err;
2899
2900 /* Upon disabling and re-enabling a transmit virtqueue, the device must
2901 * set the coalescing parameters of the virtqueue to those configured
2902 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
2903 * did not set any TX coalescing parameters, to 0.
2904 */
2905 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: i),
2906 max_usecs: vi->intr_coal_tx.max_usecs,
2907 max_packets: vi->intr_coal_tx.max_packets);
2908 if (err)
2909 return err;
2910
2911 vi->sq[i].intr_coal.max_usecs = vi->intr_coal_tx.max_usecs;
2912 vi->sq[i].intr_coal.max_packets = vi->intr_coal_tx.max_packets;
2913 }
2914
2915 if (ring->rx_pending != rx_pending) {
2916 err = virtnet_rx_resize(vi, rq, ring_num: ring->rx_pending);
2917 if (err)
2918 return err;
2919
2920 /* The reason is same as the transmit virtqueue reset */
2921 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: i),
2922 max_usecs: vi->intr_coal_rx.max_usecs,
2923 max_packets: vi->intr_coal_rx.max_packets);
2924 if (err)
2925 return err;
2926
2927 vi->rq[i].intr_coal.max_usecs = vi->intr_coal_rx.max_usecs;
2928 vi->rq[i].intr_coal.max_packets = vi->intr_coal_rx.max_packets;
2929 }
2930 }
2931
2932 return 0;
2933}
2934
2935static bool virtnet_commit_rss_command(struct virtnet_info *vi)
2936{
2937 struct net_device *dev = vi->dev;
2938 struct scatterlist sgs[4];
2939 unsigned int sg_buf_size;
2940
2941 /* prepare sgs */
2942 sg_init_table(sgs, 4);
2943
2944 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table);
2945 sg_set_buf(sg: &sgs[0], buf: &vi->ctrl->rss, buflen: sg_buf_size);
2946
2947 sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1);
2948 sg_set_buf(sg: &sgs[1], buf: vi->ctrl->rss.indirection_table, buflen: sg_buf_size);
2949
2950 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
2951 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
2952 sg_set_buf(sg: &sgs[2], buf: &vi->ctrl->rss.max_tx_vq, buflen: sg_buf_size);
2953
2954 sg_buf_size = vi->rss_key_size;
2955 sg_set_buf(sg: &sgs[3], buf: vi->ctrl->rss.key, buflen: sg_buf_size);
2956
2957 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
2958 cmd: vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
2959 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, out: sgs)) {
2960 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
2961 return false;
2962 }
2963 return true;
2964}
2965
2966static void virtnet_init_default_rss(struct virtnet_info *vi)
2967{
2968 u32 indir_val = 0;
2969 int i = 0;
2970
2971 vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
2972 vi->rss_hash_types_saved = vi->rss_hash_types_supported;
2973 vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size
2974 ? vi->rss_indir_table_size - 1 : 0;
2975 vi->ctrl->rss.unclassified_queue = 0;
2976
2977 for (; i < vi->rss_indir_table_size; ++i) {
2978 indir_val = ethtool_rxfh_indir_default(index: i, n_rx_rings: vi->curr_queue_pairs);
2979 vi->ctrl->rss.indirection_table[i] = indir_val;
2980 }
2981
2982 vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
2983 vi->ctrl->rss.hash_key_length = vi->rss_key_size;
2984
2985 netdev_rss_key_fill(buffer: vi->ctrl->rss.key, len: vi->rss_key_size);
2986}
2987
2988static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
2989{
2990 info->data = 0;
2991 switch (info->flow_type) {
2992 case TCP_V4_FLOW:
2993 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
2994 info->data = RXH_IP_SRC | RXH_IP_DST |
2995 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2996 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
2997 info->data = RXH_IP_SRC | RXH_IP_DST;
2998 }
2999 break;
3000 case TCP_V6_FLOW:
3001 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
3002 info->data = RXH_IP_SRC | RXH_IP_DST |
3003 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3004 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
3005 info->data = RXH_IP_SRC | RXH_IP_DST;
3006 }
3007 break;
3008 case UDP_V4_FLOW:
3009 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
3010 info->data = RXH_IP_SRC | RXH_IP_DST |
3011 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3012 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
3013 info->data = RXH_IP_SRC | RXH_IP_DST;
3014 }
3015 break;
3016 case UDP_V6_FLOW:
3017 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
3018 info->data = RXH_IP_SRC | RXH_IP_DST |
3019 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3020 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
3021 info->data = RXH_IP_SRC | RXH_IP_DST;
3022 }
3023 break;
3024 case IPV4_FLOW:
3025 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
3026 info->data = RXH_IP_SRC | RXH_IP_DST;
3027
3028 break;
3029 case IPV6_FLOW:
3030 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
3031 info->data = RXH_IP_SRC | RXH_IP_DST;
3032
3033 break;
3034 default:
3035 info->data = 0;
3036 break;
3037 }
3038}
3039
3040static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
3041{
3042 u32 new_hashtypes = vi->rss_hash_types_saved;
3043 bool is_disable = info->data & RXH_DISCARD;
3044 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);
3045
3046 /* supports only 'sd', 'sdfn' and 'r' */
3047 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
3048 return false;
3049
3050 switch (info->flow_type) {
3051 case TCP_V4_FLOW:
3052 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
3053 if (!is_disable)
3054 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
3055 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
3056 break;
3057 case UDP_V4_FLOW:
3058 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
3059 if (!is_disable)
3060 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
3061 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
3062 break;
3063 case IPV4_FLOW:
3064 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
3065 if (!is_disable)
3066 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
3067 break;
3068 case TCP_V6_FLOW:
3069 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
3070 if (!is_disable)
3071 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
3072 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
3073 break;
3074 case UDP_V6_FLOW:
3075 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
3076 if (!is_disable)
3077 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
3078 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
3079 break;
3080 case IPV6_FLOW:
3081 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
3082 if (!is_disable)
3083 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
3084 break;
3085 default:
3086 /* unsupported flow */
3087 return false;
3088 }
3089
3090 /* if unsupported hashtype was set */
3091 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
3092 return false;
3093
3094 if (new_hashtypes != vi->rss_hash_types_saved) {
3095 vi->rss_hash_types_saved = new_hashtypes;
3096 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved;
3097 if (vi->dev->features & NETIF_F_RXHASH)
3098 return virtnet_commit_rss_command(vi);
3099 }
3100
3101 return true;
3102}
3103
3104static void virtnet_get_drvinfo(struct net_device *dev,
3105 struct ethtool_drvinfo *info)
3106{
3107 struct virtnet_info *vi = netdev_priv(dev);
3108 struct virtio_device *vdev = vi->vdev;
3109
3110 strscpy(p: info->driver, KBUILD_MODNAME, size: sizeof(info->driver));
3111 strscpy(p: info->version, VIRTNET_DRIVER_VERSION, size: sizeof(info->version));
3112 strscpy(p: info->bus_info, q: virtio_bus_name(vdev), size: sizeof(info->bus_info));
3113
3114}
3115
3116/* TODO: Eliminate OOO packets during switching */
3117static int virtnet_set_channels(struct net_device *dev,
3118 struct ethtool_channels *channels)
3119{
3120 struct virtnet_info *vi = netdev_priv(dev);
3121 u16 queue_pairs = channels->combined_count;
3122 int err;
3123
3124 /* We don't support separate rx/tx channels.
3125 * We don't allow setting 'other' channels.
3126 */
3127 if (channels->rx_count || channels->tx_count || channels->other_count)
3128 return -EINVAL;
3129
3130 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
3131 return -EINVAL;
3132
3133 /* For now we don't support modifying channels while XDP is loaded
3134 * also when XDP is loaded all RX queues have XDP programs so we only
3135 * need to check a single RX queue.
3136 */
3137 if (vi->rq[0].xdp_prog)
3138 return -EINVAL;
3139
3140 cpus_read_lock();
3141 err = _virtnet_set_queues(vi, queue_pairs);
3142 if (err) {
3143 cpus_read_unlock();
3144 goto err;
3145 }
3146 virtnet_set_affinity(vi);
3147 cpus_read_unlock();
3148
3149 netif_set_real_num_tx_queues(dev, txq: queue_pairs);
3150 netif_set_real_num_rx_queues(dev, rxq: queue_pairs);
3151 err:
3152 return err;
3153}
3154
3155static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
3156{
3157 struct virtnet_info *vi = netdev_priv(dev);
3158 unsigned int i, j;
3159 u8 *p = data;
3160
3161 switch (stringset) {
3162 case ETH_SS_STATS:
3163 for (i = 0; i < vi->curr_queue_pairs; i++) {
3164 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
3165 ethtool_sprintf(data: &p, fmt: "rx_queue_%u_%s", i,
3166 virtnet_rq_stats_desc[j].desc);
3167 }
3168
3169 for (i = 0; i < vi->curr_queue_pairs; i++) {
3170 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
3171 ethtool_sprintf(data: &p, fmt: "tx_queue_%u_%s", i,
3172 virtnet_sq_stats_desc[j].desc);
3173 }
3174 break;
3175 }
3176}
3177
3178static int virtnet_get_sset_count(struct net_device *dev, int sset)
3179{
3180 struct virtnet_info *vi = netdev_priv(dev);
3181
3182 switch (sset) {
3183 case ETH_SS_STATS:
3184 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
3185 VIRTNET_SQ_STATS_LEN);
3186 default:
3187 return -EOPNOTSUPP;
3188 }
3189}
3190
3191static void virtnet_get_ethtool_stats(struct net_device *dev,
3192 struct ethtool_stats *stats, u64 *data)
3193{
3194 struct virtnet_info *vi = netdev_priv(dev);
3195 unsigned int idx = 0, start, i, j;
3196 const u8 *stats_base;
3197 const u64_stats_t *p;
3198 size_t offset;
3199
3200 for (i = 0; i < vi->curr_queue_pairs; i++) {
3201 struct receive_queue *rq = &vi->rq[i];
3202
3203 stats_base = (const u8 *)&rq->stats;
3204 do {
3205 start = u64_stats_fetch_begin(syncp: &rq->stats.syncp);
3206 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
3207 offset = virtnet_rq_stats_desc[j].offset;
3208 p = (const u64_stats_t *)(stats_base + offset);
3209 data[idx + j] = u64_stats_read(p);
3210 }
3211 } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start));
3212 idx += VIRTNET_RQ_STATS_LEN;
3213 }
3214
3215 for (i = 0; i < vi->curr_queue_pairs; i++) {
3216 struct send_queue *sq = &vi->sq[i];
3217
3218 stats_base = (const u8 *)&sq->stats;
3219 do {
3220 start = u64_stats_fetch_begin(syncp: &sq->stats.syncp);
3221 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
3222 offset = virtnet_sq_stats_desc[j].offset;
3223 p = (const u64_stats_t *)(stats_base + offset);
3224 data[idx + j] = u64_stats_read(p);
3225 }
3226 } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start));
3227 idx += VIRTNET_SQ_STATS_LEN;
3228 }
3229}
3230
3231static void virtnet_get_channels(struct net_device *dev,
3232 struct ethtool_channels *channels)
3233{
3234 struct virtnet_info *vi = netdev_priv(dev);
3235
3236 channels->combined_count = vi->curr_queue_pairs;
3237 channels->max_combined = vi->max_queue_pairs;
3238 channels->max_other = 0;
3239 channels->rx_count = 0;
3240 channels->tx_count = 0;
3241 channels->other_count = 0;
3242}
3243
3244static int virtnet_set_link_ksettings(struct net_device *dev,
3245 const struct ethtool_link_ksettings *cmd)
3246{
3247 struct virtnet_info *vi = netdev_priv(dev);
3248
3249 return ethtool_virtdev_set_link_ksettings(dev, cmd,
3250 dev_speed: &vi->speed, dev_duplex: &vi->duplex);
3251}
3252
3253static int virtnet_get_link_ksettings(struct net_device *dev,
3254 struct ethtool_link_ksettings *cmd)
3255{
3256 struct virtnet_info *vi = netdev_priv(dev);
3257
3258 cmd->base.speed = vi->speed;
3259 cmd->base.duplex = vi->duplex;
3260 cmd->base.port = PORT_OTHER;
3261
3262 return 0;
3263}
3264
3265static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
3266 struct ethtool_coalesce *ec)
3267{
3268 struct scatterlist sgs_tx, sgs_rx;
3269 int i;
3270
3271 vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
3272 vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
3273 sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx));
3274
3275 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
3276 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
3277 out: &sgs_tx))
3278 return -EINVAL;
3279
3280 /* Save parameters */
3281 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
3282 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
3283 for (i = 0; i < vi->max_queue_pairs; i++) {
3284 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
3285 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
3286 }
3287
3288 vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
3289 vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
3290 sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx));
3291
3292 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
3293 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
3294 out: &sgs_rx))
3295 return -EINVAL;
3296
3297 /* Save parameters */
3298 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
3299 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
3300 for (i = 0; i < vi->max_queue_pairs; i++) {
3301 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
3302 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
3303 }
3304
3305 return 0;
3306}
3307
3308static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
3309 u16 vqn, u32 max_usecs, u32 max_packets)
3310{
3311 struct scatterlist sgs;
3312
3313 vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn);
3314 vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs);
3315 vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets);
3316 sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq));
3317
3318 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
3319 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
3320 out: &sgs))
3321 return -EINVAL;
3322
3323 return 0;
3324}
3325
3326static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
3327 struct ethtool_coalesce *ec,
3328 u16 queue)
3329{
3330 int err;
3331
3332 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: queue),
3333 max_usecs: ec->rx_coalesce_usecs,
3334 max_packets: ec->rx_max_coalesced_frames);
3335 if (err)
3336 return err;
3337
3338 vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs;
3339 vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames;
3340
3341 err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: queue),
3342 max_usecs: ec->tx_coalesce_usecs,
3343 max_packets: ec->tx_max_coalesced_frames);
3344 if (err)
3345 return err;
3346
3347 vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs;
3348 vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames;
3349
3350 return 0;
3351}
3352
3353static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
3354{
3355 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
3356 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
3357 */
3358 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
3359 return -EOPNOTSUPP;
3360
3361 if (ec->tx_max_coalesced_frames > 1 ||
3362 ec->rx_max_coalesced_frames != 1)
3363 return -EINVAL;
3364
3365 return 0;
3366}
3367
3368static int virtnet_should_update_vq_weight(int dev_flags, int weight,
3369 int vq_weight, bool *should_update)
3370{
3371 if (weight ^ vq_weight) {
3372 if (dev_flags & IFF_UP)
3373 return -EBUSY;
3374 *should_update = true;
3375 }
3376
3377 return 0;
3378}
3379
3380static int virtnet_set_coalesce(struct net_device *dev,
3381 struct ethtool_coalesce *ec,
3382 struct kernel_ethtool_coalesce *kernel_coal,
3383 struct netlink_ext_ack *extack)
3384{
3385 struct virtnet_info *vi = netdev_priv(dev);
3386 int ret, queue_number, napi_weight;
3387 bool update_napi = false;
3388
3389 /* Can't change NAPI weight if the link is up */
3390 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
3391 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
3392 ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
3393 vq_weight: vi->sq[queue_number].napi.weight,
3394 should_update: &update_napi);
3395 if (ret)
3396 return ret;
3397
3398 if (update_napi) {
3399 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
3400 * updated for the sake of simplicity, which might not be necessary
3401 */
3402 break;
3403 }
3404 }
3405
3406 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL))
3407 ret = virtnet_send_notf_coal_cmds(vi, ec);
3408 else
3409 ret = virtnet_coal_params_supported(ec);
3410
3411 if (ret)
3412 return ret;
3413
3414 if (update_napi) {
3415 for (; queue_number < vi->max_queue_pairs; queue_number++)
3416 vi->sq[queue_number].napi.weight = napi_weight;
3417 }
3418
3419 return ret;
3420}
3421
3422static int virtnet_get_coalesce(struct net_device *dev,
3423 struct ethtool_coalesce *ec,
3424 struct kernel_ethtool_coalesce *kernel_coal,
3425 struct netlink_ext_ack *extack)
3426{
3427 struct virtnet_info *vi = netdev_priv(dev);
3428
3429 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
3430 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
3431 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
3432 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
3433 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
3434 } else {
3435 ec->rx_max_coalesced_frames = 1;
3436
3437 if (vi->sq[0].napi.weight)
3438 ec->tx_max_coalesced_frames = 1;
3439 }
3440
3441 return 0;
3442}
3443
3444static int virtnet_set_per_queue_coalesce(struct net_device *dev,
3445 u32 queue,
3446 struct ethtool_coalesce *ec)
3447{
3448 struct virtnet_info *vi = netdev_priv(dev);
3449 int ret, napi_weight;
3450 bool update_napi = false;
3451
3452 if (queue >= vi->max_queue_pairs)
3453 return -EINVAL;
3454
3455 /* Can't change NAPI weight if the link is up */
3456 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
3457 ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight,
3458 vq_weight: vi->sq[queue].napi.weight,
3459 should_update: &update_napi);
3460 if (ret)
3461 return ret;
3462
3463 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
3464 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
3465 else
3466 ret = virtnet_coal_params_supported(ec);
3467
3468 if (ret)
3469 return ret;
3470
3471 if (update_napi)
3472 vi->sq[queue].napi.weight = napi_weight;
3473
3474 return 0;
3475}
3476
3477static int virtnet_get_per_queue_coalesce(struct net_device *dev,
3478 u32 queue,
3479 struct ethtool_coalesce *ec)
3480{
3481 struct virtnet_info *vi = netdev_priv(dev);
3482
3483 if (queue >= vi->max_queue_pairs)
3484 return -EINVAL;
3485
3486 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
3487 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
3488 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
3489 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
3490 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
3491 } else {
3492 ec->rx_max_coalesced_frames = 1;
3493
3494 if (vi->sq[queue].napi.weight)
3495 ec->tx_max_coalesced_frames = 1;
3496 }
3497
3498 return 0;
3499}
3500
3501static void virtnet_init_settings(struct net_device *dev)
3502{
3503 struct virtnet_info *vi = netdev_priv(dev);
3504
3505 vi->speed = SPEED_UNKNOWN;
3506 vi->duplex = DUPLEX_UNKNOWN;
3507}
3508
3509static void virtnet_update_settings(struct virtnet_info *vi)
3510{
3511 u32 speed;
3512 u8 duplex;
3513
3514 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
3515 return;
3516
3517 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
3518
3519 if (ethtool_validate_speed(speed))
3520 vi->speed = speed;
3521
3522 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
3523
3524 if (ethtool_validate_duplex(duplex))
3525 vi->duplex = duplex;
3526}
3527
3528static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
3529{
3530 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
3531}
3532
3533static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
3534{
3535 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
3536}
3537
3538static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc)
3539{
3540 struct virtnet_info *vi = netdev_priv(dev);
3541 int i;
3542
3543 if (indir) {
3544 for (i = 0; i < vi->rss_indir_table_size; ++i)
3545 indir[i] = vi->ctrl->rss.indirection_table[i];
3546 }
3547
3548 if (key)
3549 memcpy(key, vi->ctrl->rss.key, vi->rss_key_size);
3550
3551 if (hfunc)
3552 *hfunc = ETH_RSS_HASH_TOP;
3553
3554 return 0;
3555}
3556
3557static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc)
3558{
3559 struct virtnet_info *vi = netdev_priv(dev);
3560 int i;
3561
3562 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
3563 return -EOPNOTSUPP;
3564
3565 if (indir) {
3566 for (i = 0; i < vi->rss_indir_table_size; ++i)
3567 vi->ctrl->rss.indirection_table[i] = indir[i];
3568 }
3569 if (key)
3570 memcpy(vi->ctrl->rss.key, key, vi->rss_key_size);
3571
3572 virtnet_commit_rss_command(vi);
3573
3574 return 0;
3575}
3576
3577static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
3578{
3579 struct virtnet_info *vi = netdev_priv(dev);
3580 int rc = 0;
3581
3582 switch (info->cmd) {
3583 case ETHTOOL_GRXRINGS:
3584 info->data = vi->curr_queue_pairs;
3585 break;
3586 case ETHTOOL_GRXFH:
3587 virtnet_get_hashflow(vi, info);
3588 break;
3589 default:
3590 rc = -EOPNOTSUPP;
3591 }
3592
3593 return rc;
3594}
3595
3596static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
3597{
3598 struct virtnet_info *vi = netdev_priv(dev);
3599 int rc = 0;
3600
3601 switch (info->cmd) {
3602 case ETHTOOL_SRXFH:
3603 if (!virtnet_set_hashflow(vi, info))
3604 rc = -EINVAL;
3605
3606 break;
3607 default:
3608 rc = -EOPNOTSUPP;
3609 }
3610
3611 return rc;
3612}
3613
3614static const struct ethtool_ops virtnet_ethtool_ops = {
3615 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
3616 ETHTOOL_COALESCE_USECS,
3617 .get_drvinfo = virtnet_get_drvinfo,
3618 .get_link = ethtool_op_get_link,
3619 .get_ringparam = virtnet_get_ringparam,
3620 .set_ringparam = virtnet_set_ringparam,
3621 .get_strings = virtnet_get_strings,
3622 .get_sset_count = virtnet_get_sset_count,
3623 .get_ethtool_stats = virtnet_get_ethtool_stats,
3624 .set_channels = virtnet_set_channels,
3625 .get_channels = virtnet_get_channels,
3626 .get_ts_info = ethtool_op_get_ts_info,
3627 .get_link_ksettings = virtnet_get_link_ksettings,
3628 .set_link_ksettings = virtnet_set_link_ksettings,
3629 .set_coalesce = virtnet_set_coalesce,
3630 .get_coalesce = virtnet_get_coalesce,
3631 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
3632 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
3633 .get_rxfh_key_size = virtnet_get_rxfh_key_size,
3634 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
3635 .get_rxfh = virtnet_get_rxfh,
3636 .set_rxfh = virtnet_set_rxfh,
3637 .get_rxnfc = virtnet_get_rxnfc,
3638 .set_rxnfc = virtnet_set_rxnfc,
3639};
3640
3641static void virtnet_freeze_down(struct virtio_device *vdev)
3642{
3643 struct virtnet_info *vi = vdev->priv;
3644
3645 /* Make sure no work handler is accessing the device */
3646 flush_work(work: &vi->config_work);
3647
3648 netif_tx_lock_bh(dev: vi->dev);
3649 netif_device_detach(dev: vi->dev);
3650 netif_tx_unlock_bh(dev: vi->dev);
3651 if (netif_running(dev: vi->dev))
3652 virtnet_close(dev: vi->dev);
3653}
3654
3655static int init_vqs(struct virtnet_info *vi);
3656
3657static int virtnet_restore_up(struct virtio_device *vdev)
3658{
3659 struct virtnet_info *vi = vdev->priv;
3660 int err;
3661
3662 err = init_vqs(vi);
3663 if (err)
3664 return err;
3665
3666 virtio_device_ready(dev: vdev);
3667
3668 enable_delayed_refill(vi);
3669
3670 if (netif_running(dev: vi->dev)) {
3671 err = virtnet_open(dev: vi->dev);
3672 if (err)
3673 return err;
3674 }
3675
3676 netif_tx_lock_bh(dev: vi->dev);
3677 netif_device_attach(dev: vi->dev);
3678 netif_tx_unlock_bh(dev: vi->dev);
3679 return err;
3680}
3681
3682static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
3683{
3684 struct scatterlist sg;
3685 vi->ctrl->offloads = cpu_to_virtio64(vdev: vi->vdev, val: offloads);
3686
3687 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
3688
3689 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
3690 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, out: &sg)) {
3691 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
3692 return -EINVAL;
3693 }
3694
3695 return 0;
3696}
3697
3698static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
3699{
3700 u64 offloads = 0;
3701
3702 if (!vi->guest_offloads)
3703 return 0;
3704
3705 return virtnet_set_guest_offloads(vi, offloads);
3706}
3707
3708static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
3709{
3710 u64 offloads = vi->guest_offloads;
3711
3712 if (!vi->guest_offloads)
3713 return 0;
3714
3715 return virtnet_set_guest_offloads(vi, offloads);
3716}
3717
3718static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
3719 struct netlink_ext_ack *extack)
3720{
3721 unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
3722 sizeof(struct skb_shared_info));
3723 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
3724 struct virtnet_info *vi = netdev_priv(dev);
3725 struct bpf_prog *old_prog;
3726 u16 xdp_qp = 0, curr_qp;
3727 int i, err;
3728
3729 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
3730 && (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
3731 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
3732 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
3733 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
3734 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
3735 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
3736 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
3737 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
3738 return -EOPNOTSUPP;
3739 }
3740
3741 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
3742 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
3743 return -EINVAL;
3744 }
3745
3746 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
3747 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
3748 netdev_warn(dev, format: "single-buffer XDP requires MTU less than %u\n", max_sz);
3749 return -EINVAL;
3750 }
3751
3752 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
3753 if (prog)
3754 xdp_qp = nr_cpu_ids;
3755
3756 /* XDP requires extra queues for XDP_TX */
3757 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
3758 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
3759 curr_qp + xdp_qp, vi->max_queue_pairs);
3760 xdp_qp = 0;
3761 }
3762
3763 old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
3764 if (!prog && !old_prog)
3765 return 0;
3766
3767 if (prog)
3768 bpf_prog_add(prog, i: vi->max_queue_pairs - 1);
3769
3770 /* Make sure NAPI is not using any XDP TX queues for RX. */
3771 if (netif_running(dev)) {
3772 for (i = 0; i < vi->max_queue_pairs; i++) {
3773 napi_disable(n: &vi->rq[i].napi);
3774 virtnet_napi_tx_disable(napi: &vi->sq[i].napi);
3775 }
3776 }
3777
3778 if (!prog) {
3779 for (i = 0; i < vi->max_queue_pairs; i++) {
3780 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
3781 if (i == 0)
3782 virtnet_restore_guest_offloads(vi);
3783 }
3784 synchronize_net();
3785 }
3786
3787 err = _virtnet_set_queues(vi, queue_pairs: curr_qp + xdp_qp);
3788 if (err)
3789 goto err;
3790 netif_set_real_num_rx_queues(dev, rxq: curr_qp + xdp_qp);
3791 vi->xdp_queue_pairs = xdp_qp;
3792
3793 if (prog) {
3794 vi->xdp_enabled = true;
3795 for (i = 0; i < vi->max_queue_pairs; i++) {
3796 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
3797 if (i == 0 && !old_prog)
3798 virtnet_clear_guest_offloads(vi);
3799 }
3800 if (!old_prog)
3801 xdp_features_set_redirect_target(dev, support_sg: true);
3802 } else {
3803 xdp_features_clear_redirect_target(dev);
3804 vi->xdp_enabled = false;
3805 }
3806
3807 for (i = 0; i < vi->max_queue_pairs; i++) {
3808 if (old_prog)
3809 bpf_prog_put(prog: old_prog);
3810 if (netif_running(dev)) {
3811 virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi);
3812 virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq,
3813 napi: &vi->sq[i].napi);
3814 }
3815 }
3816
3817 return 0;
3818
3819err:
3820 if (!prog) {
3821 virtnet_clear_guest_offloads(vi);
3822 for (i = 0; i < vi->max_queue_pairs; i++)
3823 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
3824 }
3825
3826 if (netif_running(dev)) {
3827 for (i = 0; i < vi->max_queue_pairs; i++) {
3828 virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi);
3829 virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq,
3830 napi: &vi->sq[i].napi);
3831 }
3832 }
3833 if (prog)
3834 bpf_prog_sub(prog, i: vi->max_queue_pairs - 1);
3835 return err;
3836}
3837
3838static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
3839{
3840 switch (xdp->command) {
3841 case XDP_SETUP_PROG:
3842 return virtnet_xdp_set(dev, prog: xdp->prog, extack: xdp->extack);
3843 default:
3844 return -EINVAL;
3845 }
3846}
3847
3848static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
3849 size_t len)
3850{
3851 struct virtnet_info *vi = netdev_priv(dev);
3852 int ret;
3853
3854 if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY))
3855 return -EOPNOTSUPP;
3856
3857 ret = snprintf(buf, size: len, fmt: "sby");
3858 if (ret >= len)
3859 return -EOPNOTSUPP;
3860
3861 return 0;
3862}
3863
3864static int virtnet_set_features(struct net_device *dev,
3865 netdev_features_t features)
3866{
3867 struct virtnet_info *vi = netdev_priv(dev);
3868 u64 offloads;
3869 int err;
3870
3871 if ((dev->features ^ features) & NETIF_F_GRO_HW) {
3872 if (vi->xdp_enabled)
3873 return -EBUSY;
3874
3875 if (features & NETIF_F_GRO_HW)
3876 offloads = vi->guest_offloads_capable;
3877 else
3878 offloads = vi->guest_offloads_capable &
3879 ~GUEST_OFFLOAD_GRO_HW_MASK;
3880
3881 err = virtnet_set_guest_offloads(vi, offloads);
3882 if (err)
3883 return err;
3884 vi->guest_offloads = offloads;
3885 }
3886
3887 if ((dev->features ^ features) & NETIF_F_RXHASH) {
3888 if (features & NETIF_F_RXHASH)
3889 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved;
3890 else
3891 vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE;
3892
3893 if (!virtnet_commit_rss_command(vi))
3894 return -EINVAL;
3895 }
3896
3897 return 0;
3898}
3899
3900static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
3901{
3902 struct virtnet_info *priv = netdev_priv(dev);
3903 struct send_queue *sq = &priv->sq[txqueue];
3904 struct netdev_queue *txq = netdev_get_tx_queue(dev, index: txqueue);
3905
3906 u64_stats_update_begin(syncp: &sq->stats.syncp);
3907 u64_stats_inc(p: &sq->stats.tx_timeouts);
3908 u64_stats_update_end(syncp: &sq->stats.syncp);
3909
3910 netdev_err(dev, format: "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
3911 txqueue, sq->name, sq->vq->index, sq->vq->name,
3912 jiffies_to_usecs(j: jiffies - READ_ONCE(txq->trans_start)));
3913}
3914
3915static const struct net_device_ops virtnet_netdev = {
3916 .ndo_open = virtnet_open,
3917 .ndo_stop = virtnet_close,
3918 .ndo_start_xmit = start_xmit,
3919 .ndo_validate_addr = eth_validate_addr,
3920 .ndo_set_mac_address = virtnet_set_mac_address,
3921 .ndo_set_rx_mode = virtnet_set_rx_mode,
3922 .ndo_get_stats64 = virtnet_stats,
3923 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
3924 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
3925 .ndo_bpf = virtnet_xdp,
3926 .ndo_xdp_xmit = virtnet_xdp_xmit,
3927 .ndo_features_check = passthru_features_check,
3928 .ndo_get_phys_port_name = virtnet_get_phys_port_name,
3929 .ndo_set_features = virtnet_set_features,
3930 .ndo_tx_timeout = virtnet_tx_timeout,
3931};
3932
3933static void virtnet_config_changed_work(struct work_struct *work)
3934{
3935 struct virtnet_info *vi =
3936 container_of(work, struct virtnet_info, config_work);
3937 u16 v;
3938
3939 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
3940 struct virtio_net_config, status, &v) < 0)
3941 return;
3942
3943 if (v & VIRTIO_NET_S_ANNOUNCE) {
3944 netdev_notify_peers(dev: vi->dev);
3945 virtnet_ack_link_announce(vi);
3946 }
3947
3948 /* Ignore unknown (future) status bits */
3949 v &= VIRTIO_NET_S_LINK_UP;
3950
3951 if (vi->status == v)
3952 return;
3953
3954 vi->status = v;
3955
3956 if (vi->status & VIRTIO_NET_S_LINK_UP) {
3957 virtnet_update_settings(vi);
3958 netif_carrier_on(dev: vi->dev);
3959 netif_tx_wake_all_queues(dev: vi->dev);
3960 } else {
3961 netif_carrier_off(dev: vi->dev);
3962 netif_tx_stop_all_queues(dev: vi->dev);
3963 }
3964}
3965
3966static void virtnet_config_changed(struct virtio_device *vdev)
3967{
3968 struct virtnet_info *vi = vdev->priv;
3969
3970 schedule_work(work: &vi->config_work);
3971}
3972
3973static void virtnet_free_queues(struct virtnet_info *vi)
3974{
3975 int i;
3976
3977 for (i = 0; i < vi->max_queue_pairs; i++) {
3978 __netif_napi_del(napi: &vi->rq[i].napi);
3979 __netif_napi_del(napi: &vi->sq[i].napi);
3980 }
3981
3982 /* We called __netif_napi_del(),
3983 * we need to respect an RCU grace period before freeing vi->rq
3984 */
3985 synchronize_net();
3986
3987 kfree(objp: vi->rq);
3988 kfree(objp: vi->sq);
3989 kfree(objp: vi->ctrl);
3990}
3991
3992static void _free_receive_bufs(struct virtnet_info *vi)
3993{
3994 struct bpf_prog *old_prog;
3995 int i;
3996
3997 for (i = 0; i < vi->max_queue_pairs; i++) {
3998 while (vi->rq[i].pages)
3999 __free_pages(page: get_a_page(rq: &vi->rq[i], GFP_KERNEL), order: 0);
4000
4001 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
4002 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
4003 if (old_prog)
4004 bpf_prog_put(prog: old_prog);
4005 }
4006}
4007
4008static void free_receive_bufs(struct virtnet_info *vi)
4009{
4010 rtnl_lock();
4011 _free_receive_bufs(vi);
4012 rtnl_unlock();
4013}
4014
4015static void free_receive_page_frags(struct virtnet_info *vi)
4016{
4017 int i;
4018 for (i = 0; i < vi->max_queue_pairs; i++)
4019 if (vi->rq[i].alloc_frag.page) {
4020 if (vi->rq[i].do_dma && vi->rq[i].last_dma)
4021 virtnet_rq_unmap(rq: &vi->rq[i], buf: vi->rq[i].last_dma, len: 0);
4022 put_page(page: vi->rq[i].alloc_frag.page);
4023 }
4024}
4025
4026static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
4027{
4028 if (!is_xdp_frame(ptr: buf))
4029 dev_kfree_skb(buf);
4030 else
4031 xdp_return_frame(xdpf: ptr_to_xdp(ptr: buf));
4032}
4033
4034static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
4035{
4036 struct virtnet_info *vi = vq->vdev->priv;
4037 int i = vq2rxq(vq);
4038
4039 if (vi->mergeable_rx_bufs)
4040 put_page(page: virt_to_head_page(x: buf));
4041 else if (vi->big_packets)
4042 give_pages(rq: &vi->rq[i], page: buf);
4043 else
4044 put_page(page: virt_to_head_page(x: buf));
4045}
4046
4047static void free_unused_bufs(struct virtnet_info *vi)
4048{
4049 void *buf;
4050 int i;
4051
4052 for (i = 0; i < vi->max_queue_pairs; i++) {
4053 struct virtqueue *vq = vi->sq[i].vq;
4054 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
4055 virtnet_sq_free_unused_buf(vq, buf);
4056 cond_resched();
4057 }
4058
4059 for (i = 0; i < vi->max_queue_pairs; i++) {
4060 struct receive_queue *rq = &vi->rq[i];
4061
4062 while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL)
4063 virtnet_rq_free_unused_buf(vq: rq->vq, buf);
4064 cond_resched();
4065 }
4066}
4067
4068static void virtnet_del_vqs(struct virtnet_info *vi)
4069{
4070 struct virtio_device *vdev = vi->vdev;
4071
4072 virtnet_clean_affinity(vi);
4073
4074 vdev->config->del_vqs(vdev);
4075
4076 virtnet_free_queues(vi);
4077}
4078
4079/* How large should a single buffer be so a queue full of these can fit at
4080 * least one full packet?
4081 * Logic below assumes the mergeable buffer header is used.
4082 */
4083static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
4084{
4085 const unsigned int hdr_len = vi->hdr_len;
4086 unsigned int rq_size = virtqueue_get_vring_size(vq);
4087 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
4088 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
4089 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
4090
4091 return max(max(min_buf_len, hdr_len) - hdr_len,
4092 (unsigned int)GOOD_PACKET_LEN);
4093}
4094
4095static int virtnet_find_vqs(struct virtnet_info *vi)
4096{
4097 vq_callback_t **callbacks;
4098 struct virtqueue **vqs;
4099 int ret = -ENOMEM;
4100 int i, total_vqs;
4101 const char **names;
4102 bool *ctx;
4103
4104 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
4105 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
4106 * possible control vq.
4107 */
4108 total_vqs = vi->max_queue_pairs * 2 +
4109 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VQ);
4110
4111 /* Allocate space for find_vqs parameters */
4112 vqs = kcalloc(n: total_vqs, size: sizeof(*vqs), GFP_KERNEL);
4113 if (!vqs)
4114 goto err_vq;
4115 callbacks = kmalloc_array(n: total_vqs, size: sizeof(*callbacks), GFP_KERNEL);
4116 if (!callbacks)
4117 goto err_callback;
4118 names = kmalloc_array(n: total_vqs, size: sizeof(*names), GFP_KERNEL);
4119 if (!names)
4120 goto err_names;
4121 if (!vi->big_packets || vi->mergeable_rx_bufs) {
4122 ctx = kcalloc(n: total_vqs, size: sizeof(*ctx), GFP_KERNEL);
4123 if (!ctx)
4124 goto err_ctx;
4125 } else {
4126 ctx = NULL;
4127 }
4128
4129 /* Parameters for control virtqueue, if any */
4130 if (vi->has_cvq) {
4131 callbacks[total_vqs - 1] = NULL;
4132 names[total_vqs - 1] = "control";
4133 }
4134
4135 /* Allocate/initialize parameters for send/receive virtqueues */
4136 for (i = 0; i < vi->max_queue_pairs; i++) {
4137 callbacks[rxq2vq(rxq: i)] = skb_recv_done;
4138 callbacks[txq2vq(txq: i)] = skb_xmit_done;
4139 sprintf(buf: vi->rq[i].name, fmt: "input.%d", i);
4140 sprintf(buf: vi->sq[i].name, fmt: "output.%d", i);
4141 names[rxq2vq(rxq: i)] = vi->rq[i].name;
4142 names[txq2vq(txq: i)] = vi->sq[i].name;
4143 if (ctx)
4144 ctx[rxq2vq(rxq: i)] = true;
4145 }
4146
4147 ret = virtio_find_vqs_ctx(vdev: vi->vdev, nvqs: total_vqs, vqs, callbacks,
4148 names, ctx, NULL);
4149 if (ret)
4150 goto err_find;
4151
4152 if (vi->has_cvq) {
4153 vi->cvq = vqs[total_vqs - 1];
4154 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
4155 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
4156 }
4157
4158 for (i = 0; i < vi->max_queue_pairs; i++) {
4159 vi->rq[i].vq = vqs[rxq2vq(rxq: i)];
4160 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vq: vi->rq[i].vq);
4161 vi->sq[i].vq = vqs[txq2vq(txq: i)];
4162 }
4163
4164 /* run here: ret == 0. */
4165
4166
4167err_find:
4168 kfree(objp: ctx);
4169err_ctx:
4170 kfree(objp: names);
4171err_names:
4172 kfree(objp: callbacks);
4173err_callback:
4174 kfree(objp: vqs);
4175err_vq:
4176 return ret;
4177}
4178
4179static int virtnet_alloc_queues(struct virtnet_info *vi)
4180{
4181 int i;
4182
4183 if (vi->has_cvq) {
4184 vi->ctrl = kzalloc(size: sizeof(*vi->ctrl), GFP_KERNEL);
4185 if (!vi->ctrl)
4186 goto err_ctrl;
4187 } else {
4188 vi->ctrl = NULL;
4189 }
4190 vi->sq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->sq), GFP_KERNEL);
4191 if (!vi->sq)
4192 goto err_sq;
4193 vi->rq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->rq), GFP_KERNEL);
4194 if (!vi->rq)
4195 goto err_rq;
4196
4197 INIT_DELAYED_WORK(&vi->refill, refill_work);
4198 for (i = 0; i < vi->max_queue_pairs; i++) {
4199 vi->rq[i].pages = NULL;
4200 netif_napi_add_weight(dev: vi->dev, napi: &vi->rq[i].napi, poll: virtnet_poll,
4201 weight: napi_weight);
4202 netif_napi_add_tx_weight(dev: vi->dev, napi: &vi->sq[i].napi,
4203 poll: virtnet_poll_tx,
4204 weight: napi_tx ? napi_weight : 0);
4205
4206 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
4207 ewma_pkt_len_init(e: &vi->rq[i].mrg_avg_pkt_len);
4208 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
4209
4210 u64_stats_init(syncp: &vi->rq[i].stats.syncp);
4211 u64_stats_init(syncp: &vi->sq[i].stats.syncp);
4212 }
4213
4214 return 0;
4215
4216err_rq:
4217 kfree(objp: vi->sq);
4218err_sq:
4219 kfree(objp: vi->ctrl);
4220err_ctrl:
4221 return -ENOMEM;
4222}
4223
4224static int init_vqs(struct virtnet_info *vi)
4225{
4226 int ret;
4227
4228 /* Allocate send & receive queues */
4229 ret = virtnet_alloc_queues(vi);
4230 if (ret)
4231 goto err;
4232
4233 ret = virtnet_find_vqs(vi);
4234 if (ret)
4235 goto err_free;
4236
4237 virtnet_rq_set_premapped(vi);
4238
4239 cpus_read_lock();
4240 virtnet_set_affinity(vi);
4241 cpus_read_unlock();
4242
4243 return 0;
4244
4245err_free:
4246 virtnet_free_queues(vi);
4247err:
4248 return ret;
4249}
4250
4251#ifdef CONFIG_SYSFS
4252static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
4253 char *buf)
4254{
4255 struct virtnet_info *vi = netdev_priv(dev: queue->dev);
4256 unsigned int queue_index = get_netdev_rx_queue_index(queue);
4257 unsigned int headroom = virtnet_get_headroom(vi);
4258 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
4259 struct ewma_pkt_len *avg;
4260
4261 BUG_ON(queue_index >= vi->max_queue_pairs);
4262 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
4263 return sprintf(buf, fmt: "%u\n",
4264 get_mergeable_buf_len(rq: &vi->rq[queue_index], avg_pkt_len: avg,
4265 SKB_DATA_ALIGN(headroom + tailroom)));
4266}
4267
4268static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
4269 __ATTR_RO(mergeable_rx_buffer_size);
4270
4271static struct attribute *virtio_net_mrg_rx_attrs[] = {
4272 &mergeable_rx_buffer_size_attribute.attr,
4273 NULL
4274};
4275
4276static const struct attribute_group virtio_net_mrg_rx_group = {
4277 .name = "virtio_net",
4278 .attrs = virtio_net_mrg_rx_attrs
4279};
4280#endif
4281
4282static bool virtnet_fail_on_feature(struct virtio_device *vdev,
4283 unsigned int fbit,
4284 const char *fname, const char *dname)
4285{
4286 if (!virtio_has_feature(vdev, fbit))
4287 return false;
4288
4289 dev_err(&vdev->dev, "device advertises feature %s but not %s",
4290 fname, dname);
4291
4292 return true;
4293}
4294
4295#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
4296 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
4297
4298static bool virtnet_validate_features(struct virtio_device *vdev)
4299{
4300 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
4301 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
4302 "VIRTIO_NET_F_CTRL_VQ") ||
4303 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
4304 "VIRTIO_NET_F_CTRL_VQ") ||
4305 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
4306 "VIRTIO_NET_F_CTRL_VQ") ||
4307 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
4308 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
4309 "VIRTIO_NET_F_CTRL_VQ") ||
4310 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
4311 "VIRTIO_NET_F_CTRL_VQ") ||
4312 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
4313 "VIRTIO_NET_F_CTRL_VQ") ||
4314 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
4315 "VIRTIO_NET_F_CTRL_VQ") ||
4316 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
4317 "VIRTIO_NET_F_CTRL_VQ"))) {
4318 return false;
4319 }
4320
4321 return true;
4322}
4323
4324#define MIN_MTU ETH_MIN_MTU
4325#define MAX_MTU ETH_MAX_MTU
4326
4327static int virtnet_validate(struct virtio_device *vdev)
4328{
4329 if (!vdev->config->get) {
4330 dev_err(&vdev->dev, "%s failure: config access disabled\n",
4331 __func__);
4332 return -EINVAL;
4333 }
4334
4335 if (!virtnet_validate_features(vdev))
4336 return -EINVAL;
4337
4338 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
4339 int mtu = virtio_cread16(vdev,
4340 offsetof(struct virtio_net_config,
4341 mtu));
4342 if (mtu < MIN_MTU)
4343 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
4344 }
4345
4346 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
4347 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
4348 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
4349 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
4350 }
4351
4352 return 0;
4353}
4354
4355static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
4356{
4357 return virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
4358 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
4359 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
4360 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
4361 (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
4362 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6));
4363}
4364
4365static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
4366{
4367 bool guest_gso = virtnet_check_guest_gso(vi);
4368
4369 /* If device can receive ANY guest GSO packets, regardless of mtu,
4370 * allocate packets of maximum size, otherwise limit it to only
4371 * mtu size worth only.
4372 */
4373 if (mtu > ETH_DATA_LEN || guest_gso) {
4374 vi->big_packets = true;
4375 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
4376 }
4377}
4378
4379static int virtnet_probe(struct virtio_device *vdev)
4380{
4381 int i, err = -ENOMEM;
4382 struct net_device *dev;
4383 struct virtnet_info *vi;
4384 u16 max_queue_pairs;
4385 int mtu = 0;
4386
4387 /* Find if host supports multiqueue/rss virtio_net device */
4388 max_queue_pairs = 1;
4389 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
4390 max_queue_pairs =
4391 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
4392
4393 /* We need at least 2 queue's */
4394 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
4395 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
4396 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
4397 max_queue_pairs = 1;
4398
4399 /* Allocate ourselves a network device with room for our info */
4400 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
4401 if (!dev)
4402 return -ENOMEM;
4403
4404 /* Set up network device as normal. */
4405 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
4406 IFF_TX_SKB_NO_LINEAR;
4407 dev->netdev_ops = &virtnet_netdev;
4408 dev->features = NETIF_F_HIGHDMA;
4409
4410 dev->ethtool_ops = &virtnet_ethtool_ops;
4411 SET_NETDEV_DEV(dev, &vdev->dev);
4412
4413 /* Do we support "hardware" checksums? */
4414 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
4415 /* This opens up the world of extra features. */
4416 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
4417 if (csum)
4418 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
4419
4420 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
4421 dev->hw_features |= NETIF_F_TSO
4422 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
4423 }
4424 /* Individual feature bits: what can host handle? */
4425 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
4426 dev->hw_features |= NETIF_F_TSO;
4427 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
4428 dev->hw_features |= NETIF_F_TSO6;
4429 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
4430 dev->hw_features |= NETIF_F_TSO_ECN;
4431 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
4432 dev->hw_features |= NETIF_F_GSO_UDP_L4;
4433
4434 dev->features |= NETIF_F_GSO_ROBUST;
4435
4436 if (gso)
4437 dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
4438 /* (!csum && gso) case will be fixed by register_netdev() */
4439 }
4440 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
4441 dev->features |= NETIF_F_RXCSUM;
4442 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
4443 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
4444 dev->features |= NETIF_F_GRO_HW;
4445 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
4446 dev->hw_features |= NETIF_F_GRO_HW;
4447
4448 dev->vlan_features = dev->features;
4449 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
4450
4451 /* MTU range: 68 - 65535 */
4452 dev->min_mtu = MIN_MTU;
4453 dev->max_mtu = MAX_MTU;
4454
4455 /* Configuration may specify what MAC to use. Otherwise random. */
4456 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
4457 u8 addr[ETH_ALEN];
4458
4459 virtio_cread_bytes(vdev,
4460 offsetof(struct virtio_net_config, mac),
4461 buf: addr, ETH_ALEN);
4462 eth_hw_addr_set(dev, addr);
4463 } else {
4464 eth_hw_addr_random(dev);
4465 dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
4466 dev->dev_addr);
4467 }
4468
4469 /* Set up our device-specific information */
4470 vi = netdev_priv(dev);
4471 vi->dev = dev;
4472 vi->vdev = vdev;
4473 vdev->priv = vi;
4474
4475 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
4476 spin_lock_init(&vi->refill_lock);
4477
4478 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
4479 vi->mergeable_rx_bufs = true;
4480 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
4481 }
4482
4483 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
4484 vi->has_rss_hash_report = true;
4485
4486 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
4487 vi->has_rss = true;
4488
4489 if (vi->has_rss || vi->has_rss_hash_report) {
4490 vi->rss_indir_table_size =
4491 virtio_cread16(vdev, offsetof(struct virtio_net_config,
4492 rss_max_indirection_table_length));
4493 vi->rss_key_size =
4494 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
4495
4496 vi->rss_hash_types_supported =
4497 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
4498 vi->rss_hash_types_supported &=
4499 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
4500 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
4501 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
4502
4503 dev->hw_features |= NETIF_F_RXHASH;
4504 }
4505
4506 if (vi->has_rss_hash_report)
4507 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
4508 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
4509 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
4510 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
4511 else
4512 vi->hdr_len = sizeof(struct virtio_net_hdr);
4513
4514 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
4515 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
4516 vi->any_header_sg = true;
4517
4518 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
4519 vi->has_cvq = true;
4520
4521 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
4522 mtu = virtio_cread16(vdev,
4523 offsetof(struct virtio_net_config,
4524 mtu));
4525 if (mtu < dev->min_mtu) {
4526 /* Should never trigger: MTU was previously validated
4527 * in virtnet_validate.
4528 */
4529 dev_err(&vdev->dev,
4530 "device MTU appears to have changed it is now %d < %d",
4531 mtu, dev->min_mtu);
4532 err = -EINVAL;
4533 goto free;
4534 }
4535
4536 dev->mtu = mtu;
4537 dev->max_mtu = mtu;
4538 }
4539
4540 virtnet_set_big_packets(vi, mtu);
4541
4542 if (vi->any_header_sg)
4543 dev->needed_headroom = vi->hdr_len;
4544
4545 /* Enable multiqueue by default */
4546 if (num_online_cpus() >= max_queue_pairs)
4547 vi->curr_queue_pairs = max_queue_pairs;
4548 else
4549 vi->curr_queue_pairs = num_online_cpus();
4550 vi->max_queue_pairs = max_queue_pairs;
4551
4552 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
4553 err = init_vqs(vi);
4554 if (err)
4555 goto free;
4556
4557 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
4558 vi->intr_coal_rx.max_usecs = 0;
4559 vi->intr_coal_tx.max_usecs = 0;
4560 vi->intr_coal_rx.max_packets = 0;
4561
4562 /* Keep the default values of the coalescing parameters
4563 * aligned with the default napi_tx state.
4564 */
4565 if (vi->sq[0].napi.weight)
4566 vi->intr_coal_tx.max_packets = 1;
4567 else
4568 vi->intr_coal_tx.max_packets = 0;
4569 }
4570
4571 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
4572 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
4573 for (i = 0; i < vi->max_queue_pairs; i++)
4574 if (vi->sq[i].napi.weight)
4575 vi->sq[i].intr_coal.max_packets = 1;
4576 }
4577
4578#ifdef CONFIG_SYSFS
4579 if (vi->mergeable_rx_bufs)
4580 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
4581#endif
4582 netif_set_real_num_tx_queues(dev, txq: vi->curr_queue_pairs);
4583 netif_set_real_num_rx_queues(dev, rxq: vi->curr_queue_pairs);
4584
4585 virtnet_init_settings(dev);
4586
4587 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
4588 vi->failover = net_failover_create(standby_dev: vi->dev);
4589 if (IS_ERR(ptr: vi->failover)) {
4590 err = PTR_ERR(ptr: vi->failover);
4591 goto free_vqs;
4592 }
4593 }
4594
4595 if (vi->has_rss || vi->has_rss_hash_report)
4596 virtnet_init_default_rss(vi);
4597
4598 /* serialize netdev register + virtio_device_ready() with ndo_open() */
4599 rtnl_lock();
4600
4601 err = register_netdevice(dev);
4602 if (err) {
4603 pr_debug("virtio_net: registering device failed\n");
4604 rtnl_unlock();
4605 goto free_failover;
4606 }
4607
4608 virtio_device_ready(dev: vdev);
4609
4610 _virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
4611
4612 /* a random MAC address has been assigned, notify the device.
4613 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
4614 * because many devices work fine without getting MAC explicitly
4615 */
4616 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
4617 virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
4618 struct scatterlist sg;
4619
4620 sg_init_one(&sg, dev->dev_addr, dev->addr_len);
4621 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
4622 VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) {
4623 pr_debug("virtio_net: setting MAC address failed\n");
4624 rtnl_unlock();
4625 err = -EINVAL;
4626 goto free_unregister_netdev;
4627 }
4628 }
4629
4630 rtnl_unlock();
4631
4632 err = virtnet_cpu_notif_add(vi);
4633 if (err) {
4634 pr_debug("virtio_net: registering cpu notifier failed\n");
4635 goto free_unregister_netdev;
4636 }
4637
4638 /* Assume link up if device can't report link status,
4639 otherwise get link status from config. */
4640 netif_carrier_off(dev);
4641 if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) {
4642 schedule_work(work: &vi->config_work);
4643 } else {
4644 vi->status = VIRTIO_NET_S_LINK_UP;
4645 virtnet_update_settings(vi);
4646 netif_carrier_on(dev);
4647 }
4648
4649 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
4650 if (virtio_has_feature(vdev: vi->vdev, fbit: guest_offloads[i]))
4651 set_bit(nr: guest_offloads[i], addr: &vi->guest_offloads);
4652 vi->guest_offloads_capable = vi->guest_offloads;
4653
4654 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
4655 dev->name, max_queue_pairs);
4656
4657 return 0;
4658
4659free_unregister_netdev:
4660 unregister_netdev(dev);
4661free_failover:
4662 net_failover_destroy(failover: vi->failover);
4663free_vqs:
4664 virtio_reset_device(dev: vdev);
4665 cancel_delayed_work_sync(dwork: &vi->refill);
4666 free_receive_page_frags(vi);
4667 virtnet_del_vqs(vi);
4668free:
4669 free_netdev(dev);
4670 return err;
4671}
4672
4673static void remove_vq_common(struct virtnet_info *vi)
4674{
4675 virtio_reset_device(dev: vi->vdev);
4676
4677 /* Free unused buffers in both send and recv, if any. */
4678 free_unused_bufs(vi);
4679
4680 free_receive_bufs(vi);
4681
4682 free_receive_page_frags(vi);
4683
4684 virtnet_del_vqs(vi);
4685}
4686
4687static void virtnet_remove(struct virtio_device *vdev)
4688{
4689 struct virtnet_info *vi = vdev->priv;
4690
4691 virtnet_cpu_notif_remove(vi);
4692
4693 /* Make sure no work handler is accessing the device. */
4694 flush_work(work: &vi->config_work);
4695
4696 unregister_netdev(dev: vi->dev);
4697
4698 net_failover_destroy(failover: vi->failover);
4699
4700 remove_vq_common(vi);
4701
4702 free_netdev(dev: vi->dev);
4703}
4704
4705static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
4706{
4707 struct virtnet_info *vi = vdev->priv;
4708
4709 virtnet_cpu_notif_remove(vi);
4710 virtnet_freeze_down(vdev);
4711 remove_vq_common(vi);
4712
4713 return 0;
4714}
4715
4716static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
4717{
4718 struct virtnet_info *vi = vdev->priv;
4719 int err;
4720
4721 err = virtnet_restore_up(vdev);
4722 if (err)
4723 return err;
4724 virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs);
4725
4726 err = virtnet_cpu_notif_add(vi);
4727 if (err) {
4728 virtnet_freeze_down(vdev);
4729 remove_vq_common(vi);
4730 return err;
4731 }
4732
4733 return 0;
4734}
4735
4736static struct virtio_device_id id_table[] = {
4737 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
4738 { 0 },
4739};
4740
4741#define VIRTNET_FEATURES \
4742 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
4743 VIRTIO_NET_F_MAC, \
4744 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
4745 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
4746 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
4747 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
4748 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
4749 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
4750 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
4751 VIRTIO_NET_F_CTRL_MAC_ADDR, \
4752 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
4753 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
4754 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
4755 VIRTIO_NET_F_VQ_NOTF_COAL, \
4756 VIRTIO_NET_F_GUEST_HDRLEN
4757
4758static unsigned int features[] = {
4759 VIRTNET_FEATURES,
4760};
4761
4762static unsigned int features_legacy[] = {
4763 VIRTNET_FEATURES,
4764 VIRTIO_NET_F_GSO,
4765 VIRTIO_F_ANY_LAYOUT,
4766};
4767
4768static struct virtio_driver virtio_net_driver = {
4769 .feature_table = features,
4770 .feature_table_size = ARRAY_SIZE(features),
4771 .feature_table_legacy = features_legacy,
4772 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
4773 .driver.name = KBUILD_MODNAME,
4774 .driver.owner = THIS_MODULE,
4775 .id_table = id_table,
4776 .validate = virtnet_validate,
4777 .probe = virtnet_probe,
4778 .remove = virtnet_remove,
4779 .config_changed = virtnet_config_changed,
4780#ifdef CONFIG_PM_SLEEP
4781 .freeze = virtnet_freeze,
4782 .restore = virtnet_restore,
4783#endif
4784};
4785
4786static __init int virtio_net_driver_init(void)
4787{
4788 int ret;
4789
4790 ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "virtio/net:online",
4791 startup: virtnet_cpu_online,
4792 teardown: virtnet_cpu_down_prep);
4793 if (ret < 0)
4794 goto out;
4795 virtionet_online = ret;
4796 ret = cpuhp_setup_state_multi(state: CPUHP_VIRT_NET_DEAD, name: "virtio/net:dead",
4797 NULL, teardown: virtnet_cpu_dead);
4798 if (ret)
4799 goto err_dead;
4800 ret = register_virtio_driver(drv: &virtio_net_driver);
4801 if (ret)
4802 goto err_virtio;
4803 return 0;
4804err_virtio:
4805 cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
4806err_dead:
4807 cpuhp_remove_multi_state(state: virtionet_online);
4808out:
4809 return ret;
4810}
4811module_init(virtio_net_driver_init);
4812
4813static __exit void virtio_net_driver_exit(void)
4814{
4815 unregister_virtio_driver(drv: &virtio_net_driver);
4816 cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD);
4817 cpuhp_remove_multi_state(state: virtionet_online);
4818}
4819module_exit(virtio_net_driver_exit);
4820
4821MODULE_DEVICE_TABLE(virtio, id_table);
4822MODULE_DESCRIPTION("Virtio network driver");
4823MODULE_LICENSE("GPL");
4824

source code of linux/drivers/net/virtio_net.c