1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* A network driver using virtio. |
3 | * |
4 | * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
5 | */ |
6 | //#define DEBUG |
7 | #include <linux/netdevice.h> |
8 | #include <linux/etherdevice.h> |
9 | #include <linux/ethtool.h> |
10 | #include <linux/module.h> |
11 | #include <linux/virtio.h> |
12 | #include <linux/virtio_net.h> |
13 | #include <linux/bpf.h> |
14 | #include <linux/bpf_trace.h> |
15 | #include <linux/scatterlist.h> |
16 | #include <linux/if_vlan.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/cpu.h> |
19 | #include <linux/average.h> |
20 | #include <linux/filter.h> |
21 | #include <linux/kernel.h> |
22 | #include <net/route.h> |
23 | #include <net/xdp.h> |
24 | #include <net/net_failover.h> |
25 | #include <net/netdev_rx_queue.h> |
26 | |
27 | static int napi_weight = NAPI_POLL_WEIGHT; |
28 | module_param(napi_weight, int, 0444); |
29 | |
30 | static bool csum = true, gso = true, napi_tx = true; |
31 | module_param(csum, bool, 0444); |
32 | module_param(gso, bool, 0444); |
33 | module_param(napi_tx, bool, 0644); |
34 | |
35 | /* FIXME: MTU in config. */ |
36 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) |
37 | #define GOOD_COPY_LEN 128 |
38 | |
39 | #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) |
40 | |
41 | /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ |
42 | #define VIRTIO_XDP_HEADROOM 256 |
43 | |
44 | /* Separating two types of XDP xmit */ |
45 | #define VIRTIO_XDP_TX BIT(0) |
46 | #define VIRTIO_XDP_REDIR BIT(1) |
47 | |
48 | #define VIRTIO_XDP_FLAG BIT(0) |
49 | |
50 | /* RX packet size EWMA. The average packet size is used to determine the packet |
51 | * buffer size when refilling RX rings. As the entire RX ring may be refilled |
52 | * at once, the weight is chosen so that the EWMA will be insensitive to short- |
53 | * term, transient changes in packet size. |
54 | */ |
55 | DECLARE_EWMA(pkt_len, 0, 64) |
56 | |
57 | #define VIRTNET_DRIVER_VERSION "1.0.0" |
58 | |
59 | static const unsigned long guest_offloads[] = { |
60 | VIRTIO_NET_F_GUEST_TSO4, |
61 | VIRTIO_NET_F_GUEST_TSO6, |
62 | VIRTIO_NET_F_GUEST_ECN, |
63 | VIRTIO_NET_F_GUEST_UFO, |
64 | VIRTIO_NET_F_GUEST_CSUM, |
65 | VIRTIO_NET_F_GUEST_USO4, |
66 | VIRTIO_NET_F_GUEST_USO6, |
67 | VIRTIO_NET_F_GUEST_HDRLEN |
68 | }; |
69 | |
70 | #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ |
71 | (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ |
72 | (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ |
73 | (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ |
74 | (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ |
75 | (1ULL << VIRTIO_NET_F_GUEST_USO6)) |
76 | |
77 | struct virtnet_stat_desc { |
78 | char desc[ETH_GSTRING_LEN]; |
79 | size_t offset; |
80 | }; |
81 | |
82 | struct virtnet_sq_stats { |
83 | struct u64_stats_sync syncp; |
84 | u64_stats_t packets; |
85 | u64_stats_t bytes; |
86 | u64_stats_t xdp_tx; |
87 | u64_stats_t xdp_tx_drops; |
88 | u64_stats_t kicks; |
89 | u64_stats_t tx_timeouts; |
90 | }; |
91 | |
92 | struct virtnet_rq_stats { |
93 | struct u64_stats_sync syncp; |
94 | u64_stats_t packets; |
95 | u64_stats_t bytes; |
96 | u64_stats_t drops; |
97 | u64_stats_t xdp_packets; |
98 | u64_stats_t xdp_tx; |
99 | u64_stats_t xdp_redirects; |
100 | u64_stats_t xdp_drops; |
101 | u64_stats_t kicks; |
102 | }; |
103 | |
104 | #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) |
105 | #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) |
106 | |
107 | static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { |
108 | { "packets" , VIRTNET_SQ_STAT(packets) }, |
109 | { "bytes" , VIRTNET_SQ_STAT(bytes) }, |
110 | { "xdp_tx" , VIRTNET_SQ_STAT(xdp_tx) }, |
111 | { "xdp_tx_drops" , VIRTNET_SQ_STAT(xdp_tx_drops) }, |
112 | { "kicks" , VIRTNET_SQ_STAT(kicks) }, |
113 | { "tx_timeouts" , VIRTNET_SQ_STAT(tx_timeouts) }, |
114 | }; |
115 | |
116 | static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { |
117 | { "packets" , VIRTNET_RQ_STAT(packets) }, |
118 | { "bytes" , VIRTNET_RQ_STAT(bytes) }, |
119 | { "drops" , VIRTNET_RQ_STAT(drops) }, |
120 | { "xdp_packets" , VIRTNET_RQ_STAT(xdp_packets) }, |
121 | { "xdp_tx" , VIRTNET_RQ_STAT(xdp_tx) }, |
122 | { "xdp_redirects" , VIRTNET_RQ_STAT(xdp_redirects) }, |
123 | { "xdp_drops" , VIRTNET_RQ_STAT(xdp_drops) }, |
124 | { "kicks" , VIRTNET_RQ_STAT(kicks) }, |
125 | }; |
126 | |
127 | #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) |
128 | #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) |
129 | |
130 | struct virtnet_interrupt_coalesce { |
131 | u32 max_packets; |
132 | u32 max_usecs; |
133 | }; |
134 | |
135 | /* The dma information of pages allocated at a time. */ |
136 | struct virtnet_rq_dma { |
137 | dma_addr_t addr; |
138 | u32 ref; |
139 | u16 len; |
140 | u16 need_sync; |
141 | }; |
142 | |
143 | /* Internal representation of a send virtqueue */ |
144 | struct send_queue { |
145 | /* Virtqueue associated with this send _queue */ |
146 | struct virtqueue *vq; |
147 | |
148 | /* TX: fragments + linear part + virtio header */ |
149 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
150 | |
151 | /* Name of the send queue: output.$index */ |
152 | char name[16]; |
153 | |
154 | struct virtnet_sq_stats stats; |
155 | |
156 | struct virtnet_interrupt_coalesce intr_coal; |
157 | |
158 | struct napi_struct napi; |
159 | |
160 | /* Record whether sq is in reset state. */ |
161 | bool reset; |
162 | }; |
163 | |
164 | /* Internal representation of a receive virtqueue */ |
165 | struct receive_queue { |
166 | /* Virtqueue associated with this receive_queue */ |
167 | struct virtqueue *vq; |
168 | |
169 | struct napi_struct napi; |
170 | |
171 | struct bpf_prog __rcu *xdp_prog; |
172 | |
173 | struct virtnet_rq_stats stats; |
174 | |
175 | struct virtnet_interrupt_coalesce intr_coal; |
176 | |
177 | /* Chain pages by the private ptr. */ |
178 | struct page *pages; |
179 | |
180 | /* Average packet length for mergeable receive buffers. */ |
181 | struct ewma_pkt_len mrg_avg_pkt_len; |
182 | |
183 | /* Page frag for packet buffer allocation. */ |
184 | struct page_frag alloc_frag; |
185 | |
186 | /* RX: fragments + linear part + virtio header */ |
187 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
188 | |
189 | /* Min single buffer size for mergeable buffers case. */ |
190 | unsigned int min_buf_len; |
191 | |
192 | /* Name of this receive queue: input.$index */ |
193 | char name[16]; |
194 | |
195 | struct xdp_rxq_info xdp_rxq; |
196 | |
197 | /* Record the last dma info to free after new pages is allocated. */ |
198 | struct virtnet_rq_dma *last_dma; |
199 | |
200 | /* Do dma by self */ |
201 | bool do_dma; |
202 | }; |
203 | |
204 | /* This structure can contain rss message with maximum settings for indirection table and keysize |
205 | * Note, that default structure that describes RSS configuration virtio_net_rss_config |
206 | * contains same info but can't handle table values. |
207 | * In any case, structure would be passed to virtio hw through sg_buf split by parts |
208 | * because table sizes may be differ according to the device configuration. |
209 | */ |
210 | #define 40 |
211 | #define 128 |
212 | struct { |
213 | u32 ; |
214 | u16 ; |
215 | u16 ; |
216 | u16 [VIRTIO_NET_RSS_MAX_TABLE_LEN]; |
217 | u16 ; |
218 | u8 ; |
219 | u8 [VIRTIO_NET_RSS_MAX_KEY_SIZE]; |
220 | }; |
221 | |
222 | /* Control VQ buffers: protected by the rtnl lock */ |
223 | struct control_buf { |
224 | struct virtio_net_ctrl_hdr hdr; |
225 | virtio_net_ctrl_ack status; |
226 | struct virtio_net_ctrl_mq mq; |
227 | u8 promisc; |
228 | u8 allmulti; |
229 | __virtio16 vid; |
230 | __virtio64 offloads; |
231 | struct virtio_net_ctrl_rss ; |
232 | struct virtio_net_ctrl_coal_tx coal_tx; |
233 | struct virtio_net_ctrl_coal_rx coal_rx; |
234 | struct virtio_net_ctrl_coal_vq coal_vq; |
235 | }; |
236 | |
237 | struct virtnet_info { |
238 | struct virtio_device *vdev; |
239 | struct virtqueue *cvq; |
240 | struct net_device *dev; |
241 | struct send_queue *sq; |
242 | struct receive_queue *rq; |
243 | unsigned int status; |
244 | |
245 | /* Max # of queue pairs supported by the device */ |
246 | u16 max_queue_pairs; |
247 | |
248 | /* # of queue pairs currently used by the driver */ |
249 | u16 curr_queue_pairs; |
250 | |
251 | /* # of XDP queue pairs currently used by the driver */ |
252 | u16 xdp_queue_pairs; |
253 | |
254 | /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ |
255 | bool xdp_enabled; |
256 | |
257 | /* I like... big packets and I cannot lie! */ |
258 | bool big_packets; |
259 | |
260 | /* number of sg entries allocated for big packets */ |
261 | unsigned int big_packets_num_skbfrags; |
262 | |
263 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ |
264 | bool mergeable_rx_bufs; |
265 | |
266 | /* Host supports rss and/or hash report */ |
267 | bool ; |
268 | bool ; |
269 | u8 ; |
270 | u16 ; |
271 | u32 ; |
272 | u32 ; |
273 | |
274 | /* Has control virtqueue */ |
275 | bool has_cvq; |
276 | |
277 | /* Host can handle any s/g split between our header and packet data */ |
278 | bool ; |
279 | |
280 | /* Packet virtio header size */ |
281 | u8 hdr_len; |
282 | |
283 | /* Work struct for delayed refilling if we run low on memory. */ |
284 | struct delayed_work refill; |
285 | |
286 | /* Is delayed refill enabled? */ |
287 | bool refill_enabled; |
288 | |
289 | /* The lock to synchronize the access to refill_enabled */ |
290 | spinlock_t refill_lock; |
291 | |
292 | /* Work struct for config space updates */ |
293 | struct work_struct config_work; |
294 | |
295 | /* Does the affinity hint is set for virtqueues? */ |
296 | bool affinity_hint_set; |
297 | |
298 | /* CPU hotplug instances for online & dead */ |
299 | struct hlist_node node; |
300 | struct hlist_node node_dead; |
301 | |
302 | struct control_buf *ctrl; |
303 | |
304 | /* Ethtool settings */ |
305 | u8 duplex; |
306 | u32 speed; |
307 | |
308 | /* Interrupt coalescing settings */ |
309 | struct virtnet_interrupt_coalesce intr_coal_tx; |
310 | struct virtnet_interrupt_coalesce intr_coal_rx; |
311 | |
312 | unsigned long guest_offloads; |
313 | unsigned long guest_offloads_capable; |
314 | |
315 | /* failover when STANDBY feature enabled */ |
316 | struct failover *failover; |
317 | }; |
318 | |
319 | struct padded_vnet_hdr { |
320 | struct virtio_net_hdr_v1_hash hdr; |
321 | /* |
322 | * hdr is in a separate sg buffer, and data sg buffer shares same page |
323 | * with this header sg. This padding makes next sg 16 byte aligned |
324 | * after the header. |
325 | */ |
326 | char padding[12]; |
327 | }; |
328 | |
329 | struct virtio_net_common_hdr { |
330 | union { |
331 | struct virtio_net_hdr hdr; |
332 | struct virtio_net_hdr_mrg_rxbuf mrg_hdr; |
333 | struct virtio_net_hdr_v1_hash hash_v1_hdr; |
334 | }; |
335 | }; |
336 | |
337 | static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf); |
338 | static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); |
339 | |
340 | static bool is_xdp_frame(void *ptr) |
341 | { |
342 | return (unsigned long)ptr & VIRTIO_XDP_FLAG; |
343 | } |
344 | |
345 | static void *xdp_to_ptr(struct xdp_frame *ptr) |
346 | { |
347 | return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); |
348 | } |
349 | |
350 | static struct xdp_frame *ptr_to_xdp(void *ptr) |
351 | { |
352 | return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); |
353 | } |
354 | |
355 | /* Converting between virtqueue no. and kernel tx/rx queue no. |
356 | * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq |
357 | */ |
358 | static int vq2txq(struct virtqueue *vq) |
359 | { |
360 | return (vq->index - 1) / 2; |
361 | } |
362 | |
363 | static int txq2vq(int txq) |
364 | { |
365 | return txq * 2 + 1; |
366 | } |
367 | |
368 | static int vq2rxq(struct virtqueue *vq) |
369 | { |
370 | return vq->index / 2; |
371 | } |
372 | |
373 | static int rxq2vq(int rxq) |
374 | { |
375 | return rxq * 2; |
376 | } |
377 | |
378 | static inline struct virtio_net_common_hdr * |
379 | skb_vnet_common_hdr(struct sk_buff *skb) |
380 | { |
381 | return (struct virtio_net_common_hdr *)skb->cb; |
382 | } |
383 | |
384 | /* |
385 | * private is used to chain pages for big packets, put the whole |
386 | * most recent used list in the beginning for reuse |
387 | */ |
388 | static void give_pages(struct receive_queue *rq, struct page *page) |
389 | { |
390 | struct page *end; |
391 | |
392 | /* Find end of list, sew whole thing into vi->rq.pages. */ |
393 | for (end = page; end->private; end = (struct page *)end->private); |
394 | end->private = (unsigned long)rq->pages; |
395 | rq->pages = page; |
396 | } |
397 | |
398 | static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) |
399 | { |
400 | struct page *p = rq->pages; |
401 | |
402 | if (p) { |
403 | rq->pages = (struct page *)p->private; |
404 | /* clear private here, it is used to chain pages */ |
405 | p->private = 0; |
406 | } else |
407 | p = alloc_page(gfp_mask); |
408 | return p; |
409 | } |
410 | |
411 | static void enable_delayed_refill(struct virtnet_info *vi) |
412 | { |
413 | spin_lock_bh(lock: &vi->refill_lock); |
414 | vi->refill_enabled = true; |
415 | spin_unlock_bh(lock: &vi->refill_lock); |
416 | } |
417 | |
418 | static void disable_delayed_refill(struct virtnet_info *vi) |
419 | { |
420 | spin_lock_bh(lock: &vi->refill_lock); |
421 | vi->refill_enabled = false; |
422 | spin_unlock_bh(lock: &vi->refill_lock); |
423 | } |
424 | |
425 | static void virtqueue_napi_schedule(struct napi_struct *napi, |
426 | struct virtqueue *vq) |
427 | { |
428 | if (napi_schedule_prep(n: napi)) { |
429 | virtqueue_disable_cb(vq); |
430 | __napi_schedule(n: napi); |
431 | } |
432 | } |
433 | |
434 | static void virtqueue_napi_complete(struct napi_struct *napi, |
435 | struct virtqueue *vq, int processed) |
436 | { |
437 | int opaque; |
438 | |
439 | opaque = virtqueue_enable_cb_prepare(vq); |
440 | if (napi_complete_done(n: napi, work_done: processed)) { |
441 | if (unlikely(virtqueue_poll(vq, opaque))) |
442 | virtqueue_napi_schedule(napi, vq); |
443 | } else { |
444 | virtqueue_disable_cb(vq); |
445 | } |
446 | } |
447 | |
448 | static void skb_xmit_done(struct virtqueue *vq) |
449 | { |
450 | struct virtnet_info *vi = vq->vdev->priv; |
451 | struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; |
452 | |
453 | /* Suppress further interrupts. */ |
454 | virtqueue_disable_cb(vq); |
455 | |
456 | if (napi->weight) |
457 | virtqueue_napi_schedule(napi, vq); |
458 | else |
459 | /* We were probably waiting for more output buffers. */ |
460 | netif_wake_subqueue(dev: vi->dev, queue_index: vq2txq(vq)); |
461 | } |
462 | |
463 | #define 22 |
464 | static void *mergeable_len_to_ctx(unsigned int truesize, |
465 | unsigned int headroom) |
466 | { |
467 | return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); |
468 | } |
469 | |
470 | static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) |
471 | { |
472 | return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; |
473 | } |
474 | |
475 | static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) |
476 | { |
477 | return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); |
478 | } |
479 | |
480 | static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, |
481 | unsigned int headroom, |
482 | unsigned int len) |
483 | { |
484 | struct sk_buff *skb; |
485 | |
486 | skb = build_skb(data: buf, frag_size: buflen); |
487 | if (unlikely(!skb)) |
488 | return NULL; |
489 | |
490 | skb_reserve(skb, len: headroom); |
491 | skb_put(skb, len); |
492 | |
493 | return skb; |
494 | } |
495 | |
496 | /* Called from bottom half context */ |
497 | static struct sk_buff *page_to_skb(struct virtnet_info *vi, |
498 | struct receive_queue *rq, |
499 | struct page *page, unsigned int offset, |
500 | unsigned int len, unsigned int truesize, |
501 | unsigned int headroom) |
502 | { |
503 | struct sk_buff *skb; |
504 | struct virtio_net_common_hdr *hdr; |
505 | unsigned int copy, hdr_len, hdr_padded_len; |
506 | struct page *page_to_free = NULL; |
507 | int tailroom, shinfo_size; |
508 | char *p, *hdr_p, *buf; |
509 | |
510 | p = page_address(page) + offset; |
511 | hdr_p = p; |
512 | |
513 | hdr_len = vi->hdr_len; |
514 | if (vi->mergeable_rx_bufs) |
515 | hdr_padded_len = hdr_len; |
516 | else |
517 | hdr_padded_len = sizeof(struct padded_vnet_hdr); |
518 | |
519 | buf = p - headroom; |
520 | len -= hdr_len; |
521 | offset += hdr_padded_len; |
522 | p += hdr_padded_len; |
523 | tailroom = truesize - headroom - hdr_padded_len - len; |
524 | |
525 | shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
526 | |
527 | /* copy small packet so we can reuse these pages */ |
528 | if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { |
529 | skb = virtnet_build_skb(buf, buflen: truesize, headroom: p - buf, len); |
530 | if (unlikely(!skb)) |
531 | return NULL; |
532 | |
533 | page = (struct page *)page->private; |
534 | if (page) |
535 | give_pages(rq, page); |
536 | goto ok; |
537 | } |
538 | |
539 | /* copy small packet so we can reuse these pages for small data */ |
540 | skb = napi_alloc_skb(napi: &rq->napi, GOOD_COPY_LEN); |
541 | if (unlikely(!skb)) |
542 | return NULL; |
543 | |
544 | /* Copy all frame if it fits skb->head, otherwise |
545 | * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. |
546 | */ |
547 | if (len <= skb_tailroom(skb)) |
548 | copy = len; |
549 | else |
550 | copy = ETH_HLEN; |
551 | skb_put_data(skb, data: p, len: copy); |
552 | |
553 | len -= copy; |
554 | offset += copy; |
555 | |
556 | if (vi->mergeable_rx_bufs) { |
557 | if (len) |
558 | skb_add_rx_frag(skb, i: 0, page, off: offset, size: len, truesize); |
559 | else |
560 | page_to_free = page; |
561 | goto ok; |
562 | } |
563 | |
564 | /* |
565 | * Verify that we can indeed put this data into a skb. |
566 | * This is here to handle cases when the device erroneously |
567 | * tries to receive more than is possible. This is usually |
568 | * the case of a broken device. |
569 | */ |
570 | if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { |
571 | net_dbg_ratelimited("%s: too much data\n" , skb->dev->name); |
572 | dev_kfree_skb(skb); |
573 | return NULL; |
574 | } |
575 | BUG_ON(offset >= PAGE_SIZE); |
576 | while (len) { |
577 | unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); |
578 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, off: offset, |
579 | size: frag_size, truesize); |
580 | len -= frag_size; |
581 | page = (struct page *)page->private; |
582 | offset = 0; |
583 | } |
584 | |
585 | if (page) |
586 | give_pages(rq, page); |
587 | |
588 | ok: |
589 | hdr = skb_vnet_common_hdr(skb); |
590 | memcpy(hdr, hdr_p, hdr_len); |
591 | if (page_to_free) |
592 | put_page(page: page_to_free); |
593 | |
594 | return skb; |
595 | } |
596 | |
597 | static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) |
598 | { |
599 | struct page *page = virt_to_head_page(x: buf); |
600 | struct virtnet_rq_dma *dma; |
601 | void *head; |
602 | int offset; |
603 | |
604 | head = page_address(page); |
605 | |
606 | dma = head; |
607 | |
608 | --dma->ref; |
609 | |
610 | if (dma->need_sync && len) { |
611 | offset = buf - (head + sizeof(*dma)); |
612 | |
613 | virtqueue_dma_sync_single_range_for_cpu(vq: rq->vq, addr: dma->addr, |
614 | offset, size: len, |
615 | dir: DMA_FROM_DEVICE); |
616 | } |
617 | |
618 | if (dma->ref) |
619 | return; |
620 | |
621 | virtqueue_dma_unmap_single_attrs(vq: rq->vq, addr: dma->addr, size: dma->len, |
622 | dir: DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); |
623 | put_page(page); |
624 | } |
625 | |
626 | static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) |
627 | { |
628 | void *buf; |
629 | |
630 | buf = virtqueue_get_buf_ctx(vq: rq->vq, len, ctx); |
631 | if (buf && rq->do_dma) |
632 | virtnet_rq_unmap(rq, buf, len: *len); |
633 | |
634 | return buf; |
635 | } |
636 | |
637 | static void *virtnet_rq_detach_unused_buf(struct receive_queue *rq) |
638 | { |
639 | void *buf; |
640 | |
641 | buf = virtqueue_detach_unused_buf(vq: rq->vq); |
642 | if (buf && rq->do_dma) |
643 | virtnet_rq_unmap(rq, buf, len: 0); |
644 | |
645 | return buf; |
646 | } |
647 | |
648 | static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) |
649 | { |
650 | struct virtnet_rq_dma *dma; |
651 | dma_addr_t addr; |
652 | u32 offset; |
653 | void *head; |
654 | |
655 | if (!rq->do_dma) { |
656 | sg_init_one(rq->sg, buf, len); |
657 | return; |
658 | } |
659 | |
660 | head = page_address(rq->alloc_frag.page); |
661 | |
662 | offset = buf - head; |
663 | |
664 | dma = head; |
665 | |
666 | addr = dma->addr - sizeof(*dma) + offset; |
667 | |
668 | sg_init_table(rq->sg, 1); |
669 | rq->sg[0].dma_address = addr; |
670 | rq->sg[0].length = len; |
671 | } |
672 | |
673 | static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) |
674 | { |
675 | struct page_frag *alloc_frag = &rq->alloc_frag; |
676 | struct virtnet_rq_dma *dma; |
677 | void *buf, *head; |
678 | dma_addr_t addr; |
679 | |
680 | if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) |
681 | return NULL; |
682 | |
683 | head = page_address(alloc_frag->page); |
684 | |
685 | if (rq->do_dma) { |
686 | dma = head; |
687 | |
688 | /* new pages */ |
689 | if (!alloc_frag->offset) { |
690 | if (rq->last_dma) { |
691 | /* Now, the new page is allocated, the last dma |
692 | * will not be used. So the dma can be unmapped |
693 | * if the ref is 0. |
694 | */ |
695 | virtnet_rq_unmap(rq, buf: rq->last_dma, len: 0); |
696 | rq->last_dma = NULL; |
697 | } |
698 | |
699 | dma->len = alloc_frag->size - sizeof(*dma); |
700 | |
701 | addr = virtqueue_dma_map_single_attrs(vq: rq->vq, ptr: dma + 1, |
702 | size: dma->len, dir: DMA_FROM_DEVICE, attrs: 0); |
703 | if (virtqueue_dma_mapping_error(vq: rq->vq, addr)) |
704 | return NULL; |
705 | |
706 | dma->addr = addr; |
707 | dma->need_sync = virtqueue_dma_need_sync(vq: rq->vq, addr); |
708 | |
709 | /* Add a reference to dma to prevent the entire dma from |
710 | * being released during error handling. This reference |
711 | * will be freed after the pages are no longer used. |
712 | */ |
713 | get_page(page: alloc_frag->page); |
714 | dma->ref = 1; |
715 | alloc_frag->offset = sizeof(*dma); |
716 | |
717 | rq->last_dma = dma; |
718 | } |
719 | |
720 | ++dma->ref; |
721 | } |
722 | |
723 | buf = head + alloc_frag->offset; |
724 | |
725 | get_page(page: alloc_frag->page); |
726 | alloc_frag->offset += size; |
727 | |
728 | return buf; |
729 | } |
730 | |
731 | static void virtnet_rq_set_premapped(struct virtnet_info *vi) |
732 | { |
733 | int i; |
734 | |
735 | /* disable for big mode */ |
736 | if (!vi->mergeable_rx_bufs && vi->big_packets) |
737 | return; |
738 | |
739 | for (i = 0; i < vi->max_queue_pairs; i++) { |
740 | if (virtqueue_set_dma_premapped(vq: vi->rq[i].vq)) |
741 | continue; |
742 | |
743 | vi->rq[i].do_dma = true; |
744 | } |
745 | } |
746 | |
747 | static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) |
748 | { |
749 | unsigned int len; |
750 | unsigned int packets = 0; |
751 | unsigned int bytes = 0; |
752 | void *ptr; |
753 | |
754 | while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) { |
755 | if (likely(!is_xdp_frame(ptr))) { |
756 | struct sk_buff *skb = ptr; |
757 | |
758 | pr_debug("Sent skb %p\n" , skb); |
759 | |
760 | bytes += skb->len; |
761 | napi_consume_skb(skb, budget: in_napi); |
762 | } else { |
763 | struct xdp_frame *frame = ptr_to_xdp(ptr); |
764 | |
765 | bytes += xdp_get_frame_len(xdpf: frame); |
766 | xdp_return_frame(xdpf: frame); |
767 | } |
768 | packets++; |
769 | } |
770 | |
771 | /* Avoid overhead when no packets have been processed |
772 | * happens when called speculatively from start_xmit. |
773 | */ |
774 | if (!packets) |
775 | return; |
776 | |
777 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
778 | u64_stats_add(p: &sq->stats.bytes, val: bytes); |
779 | u64_stats_add(p: &sq->stats.packets, val: packets); |
780 | u64_stats_update_end(syncp: &sq->stats.syncp); |
781 | } |
782 | |
783 | static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) |
784 | { |
785 | if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) |
786 | return false; |
787 | else if (q < vi->curr_queue_pairs) |
788 | return true; |
789 | else |
790 | return false; |
791 | } |
792 | |
793 | static void check_sq_full_and_disable(struct virtnet_info *vi, |
794 | struct net_device *dev, |
795 | struct send_queue *sq) |
796 | { |
797 | bool use_napi = sq->napi.weight; |
798 | int qnum; |
799 | |
800 | qnum = sq - vi->sq; |
801 | |
802 | /* If running out of space, stop queue to avoid getting packets that we |
803 | * are then unable to transmit. |
804 | * An alternative would be to force queuing layer to requeue the skb by |
805 | * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be |
806 | * returned in a normal path of operation: it means that driver is not |
807 | * maintaining the TX queue stop/start state properly, and causes |
808 | * the stack to do a non-trivial amount of useless work. |
809 | * Since most packets only take 1 or 2 ring slots, stopping the queue |
810 | * early means 16 slots are typically wasted. |
811 | */ |
812 | if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { |
813 | netif_stop_subqueue(dev, queue_index: qnum); |
814 | if (use_napi) { |
815 | if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) |
816 | virtqueue_napi_schedule(napi: &sq->napi, vq: sq->vq); |
817 | } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { |
818 | /* More just got used, free them then recheck. */ |
819 | free_old_xmit_skbs(sq, in_napi: false); |
820 | if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { |
821 | netif_start_subqueue(dev, queue_index: qnum); |
822 | virtqueue_disable_cb(vq: sq->vq); |
823 | } |
824 | } |
825 | } |
826 | } |
827 | |
828 | static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, |
829 | struct send_queue *sq, |
830 | struct xdp_frame *xdpf) |
831 | { |
832 | struct virtio_net_hdr_mrg_rxbuf *hdr; |
833 | struct skb_shared_info *shinfo; |
834 | u8 nr_frags = 0; |
835 | int err, i; |
836 | |
837 | if (unlikely(xdpf->headroom < vi->hdr_len)) |
838 | return -EOVERFLOW; |
839 | |
840 | if (unlikely(xdp_frame_has_frags(xdpf))) { |
841 | shinfo = xdp_get_shared_info_from_frame(frame: xdpf); |
842 | nr_frags = shinfo->nr_frags; |
843 | } |
844 | |
845 | /* In wrapping function virtnet_xdp_xmit(), we need to free |
846 | * up the pending old buffers, where we need to calculate the |
847 | * position of skb_shared_info in xdp_get_frame_len() and |
848 | * xdp_return_frame(), which will involve to xdpf->data and |
849 | * xdpf->headroom. Therefore, we need to update the value of |
850 | * headroom synchronously here. |
851 | */ |
852 | xdpf->headroom -= vi->hdr_len; |
853 | xdpf->data -= vi->hdr_len; |
854 | /* Zero header and leave csum up to XDP layers */ |
855 | hdr = xdpf->data; |
856 | memset(hdr, 0, vi->hdr_len); |
857 | xdpf->len += vi->hdr_len; |
858 | |
859 | sg_init_table(sq->sg, nr_frags + 1); |
860 | sg_set_buf(sg: sq->sg, buf: xdpf->data, buflen: xdpf->len); |
861 | for (i = 0; i < nr_frags; i++) { |
862 | skb_frag_t *frag = &shinfo->frags[i]; |
863 | |
864 | sg_set_page(sg: &sq->sg[i + 1], page: skb_frag_page(frag), |
865 | len: skb_frag_size(frag), offset: skb_frag_off(frag)); |
866 | } |
867 | |
868 | err = virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: nr_frags + 1, |
869 | data: xdp_to_ptr(ptr: xdpf), GFP_ATOMIC); |
870 | if (unlikely(err)) |
871 | return -ENOSPC; /* Caller handle free/refcnt */ |
872 | |
873 | return 0; |
874 | } |
875 | |
876 | /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on |
877 | * the current cpu, so it does not need to be locked. |
878 | * |
879 | * Here we use marco instead of inline functions because we have to deal with |
880 | * three issues at the same time: 1. the choice of sq. 2. judge and execute the |
881 | * lock/unlock of txq 3. make sparse happy. It is difficult for two inline |
882 | * functions to perfectly solve these three problems at the same time. |
883 | */ |
884 | #define virtnet_xdp_get_sq(vi) ({ \ |
885 | int cpu = smp_processor_id(); \ |
886 | struct netdev_queue *txq; \ |
887 | typeof(vi) v = (vi); \ |
888 | unsigned int qp; \ |
889 | \ |
890 | if (v->curr_queue_pairs > nr_cpu_ids) { \ |
891 | qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ |
892 | qp += cpu; \ |
893 | txq = netdev_get_tx_queue(v->dev, qp); \ |
894 | __netif_tx_acquire(txq); \ |
895 | } else { \ |
896 | qp = cpu % v->curr_queue_pairs; \ |
897 | txq = netdev_get_tx_queue(v->dev, qp); \ |
898 | __netif_tx_lock(txq, cpu); \ |
899 | } \ |
900 | v->sq + qp; \ |
901 | }) |
902 | |
903 | #define virtnet_xdp_put_sq(vi, q) { \ |
904 | struct netdev_queue *txq; \ |
905 | typeof(vi) v = (vi); \ |
906 | \ |
907 | txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ |
908 | if (v->curr_queue_pairs > nr_cpu_ids) \ |
909 | __netif_tx_release(txq); \ |
910 | else \ |
911 | __netif_tx_unlock(txq); \ |
912 | } |
913 | |
914 | static int virtnet_xdp_xmit(struct net_device *dev, |
915 | int n, struct xdp_frame **frames, u32 flags) |
916 | { |
917 | struct virtnet_info *vi = netdev_priv(dev); |
918 | struct receive_queue *rq = vi->rq; |
919 | struct bpf_prog *xdp_prog; |
920 | struct send_queue *sq; |
921 | unsigned int len; |
922 | int packets = 0; |
923 | int bytes = 0; |
924 | int nxmit = 0; |
925 | int kicks = 0; |
926 | void *ptr; |
927 | int ret; |
928 | int i; |
929 | |
930 | /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this |
931 | * indicate XDP resources have been successfully allocated. |
932 | */ |
933 | xdp_prog = rcu_access_pointer(rq->xdp_prog); |
934 | if (!xdp_prog) |
935 | return -ENXIO; |
936 | |
937 | sq = virtnet_xdp_get_sq(vi); |
938 | |
939 | if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { |
940 | ret = -EINVAL; |
941 | goto out; |
942 | } |
943 | |
944 | /* Free up any pending old buffers before queueing new ones. */ |
945 | while ((ptr = virtqueue_get_buf(vq: sq->vq, len: &len)) != NULL) { |
946 | if (likely(is_xdp_frame(ptr))) { |
947 | struct xdp_frame *frame = ptr_to_xdp(ptr); |
948 | |
949 | bytes += xdp_get_frame_len(xdpf: frame); |
950 | xdp_return_frame(xdpf: frame); |
951 | } else { |
952 | struct sk_buff *skb = ptr; |
953 | |
954 | bytes += skb->len; |
955 | napi_consume_skb(skb, budget: false); |
956 | } |
957 | packets++; |
958 | } |
959 | |
960 | for (i = 0; i < n; i++) { |
961 | struct xdp_frame *xdpf = frames[i]; |
962 | |
963 | if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) |
964 | break; |
965 | nxmit++; |
966 | } |
967 | ret = nxmit; |
968 | |
969 | if (!is_xdp_raw_buffer_queue(vi, q: sq - vi->sq)) |
970 | check_sq_full_and_disable(vi, dev, sq); |
971 | |
972 | if (flags & XDP_XMIT_FLUSH) { |
973 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) |
974 | kicks = 1; |
975 | } |
976 | out: |
977 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
978 | u64_stats_add(p: &sq->stats.bytes, val: bytes); |
979 | u64_stats_add(p: &sq->stats.packets, val: packets); |
980 | u64_stats_add(p: &sq->stats.xdp_tx, val: n); |
981 | u64_stats_add(p: &sq->stats.xdp_tx_drops, val: n - nxmit); |
982 | u64_stats_add(p: &sq->stats.kicks, val: kicks); |
983 | u64_stats_update_end(syncp: &sq->stats.syncp); |
984 | |
985 | virtnet_xdp_put_sq(vi, sq); |
986 | return ret; |
987 | } |
988 | |
989 | static void put_xdp_frags(struct xdp_buff *xdp) |
990 | { |
991 | struct skb_shared_info *shinfo; |
992 | struct page *xdp_page; |
993 | int i; |
994 | |
995 | if (xdp_buff_has_frags(xdp)) { |
996 | shinfo = xdp_get_shared_info_from_buff(xdp); |
997 | for (i = 0; i < shinfo->nr_frags; i++) { |
998 | xdp_page = skb_frag_page(frag: &shinfo->frags[i]); |
999 | put_page(page: xdp_page); |
1000 | } |
1001 | } |
1002 | } |
1003 | |
1004 | static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, |
1005 | struct net_device *dev, |
1006 | unsigned int *xdp_xmit, |
1007 | struct virtnet_rq_stats *stats) |
1008 | { |
1009 | struct xdp_frame *xdpf; |
1010 | int err; |
1011 | u32 act; |
1012 | |
1013 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp); |
1014 | u64_stats_inc(p: &stats->xdp_packets); |
1015 | |
1016 | switch (act) { |
1017 | case XDP_PASS: |
1018 | return act; |
1019 | |
1020 | case XDP_TX: |
1021 | u64_stats_inc(p: &stats->xdp_tx); |
1022 | xdpf = xdp_convert_buff_to_frame(xdp); |
1023 | if (unlikely(!xdpf)) { |
1024 | netdev_dbg(dev, "convert buff to frame failed for xdp\n" ); |
1025 | return XDP_DROP; |
1026 | } |
1027 | |
1028 | err = virtnet_xdp_xmit(dev, n: 1, frames: &xdpf, flags: 0); |
1029 | if (unlikely(!err)) { |
1030 | xdp_return_frame_rx_napi(xdpf); |
1031 | } else if (unlikely(err < 0)) { |
1032 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
1033 | return XDP_DROP; |
1034 | } |
1035 | *xdp_xmit |= VIRTIO_XDP_TX; |
1036 | return act; |
1037 | |
1038 | case XDP_REDIRECT: |
1039 | u64_stats_inc(p: &stats->xdp_redirects); |
1040 | err = xdp_do_redirect(dev, xdp, prog: xdp_prog); |
1041 | if (err) |
1042 | return XDP_DROP; |
1043 | |
1044 | *xdp_xmit |= VIRTIO_XDP_REDIR; |
1045 | return act; |
1046 | |
1047 | default: |
1048 | bpf_warn_invalid_xdp_action(dev, prog: xdp_prog, act); |
1049 | fallthrough; |
1050 | case XDP_ABORTED: |
1051 | trace_xdp_exception(dev, xdp: xdp_prog, act); |
1052 | fallthrough; |
1053 | case XDP_DROP: |
1054 | return XDP_DROP; |
1055 | } |
1056 | } |
1057 | |
1058 | static unsigned int virtnet_get_headroom(struct virtnet_info *vi) |
1059 | { |
1060 | return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; |
1061 | } |
1062 | |
1063 | /* We copy the packet for XDP in the following cases: |
1064 | * |
1065 | * 1) Packet is scattered across multiple rx buffers. |
1066 | * 2) Headroom space is insufficient. |
1067 | * |
1068 | * This is inefficient but it's a temporary condition that |
1069 | * we hit right after XDP is enabled and until queue is refilled |
1070 | * with large buffers with sufficient headroom - so it should affect |
1071 | * at most queue size packets. |
1072 | * Afterwards, the conditions to enable |
1073 | * XDP should preclude the underlying device from sending packets |
1074 | * across multiple buffers (num_buf > 1), and we make sure buffers |
1075 | * have enough headroom. |
1076 | */ |
1077 | static struct page *xdp_linearize_page(struct receive_queue *rq, |
1078 | int *num_buf, |
1079 | struct page *p, |
1080 | int offset, |
1081 | int page_off, |
1082 | unsigned int *len) |
1083 | { |
1084 | int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1085 | struct page *page; |
1086 | |
1087 | if (page_off + *len + tailroom > PAGE_SIZE) |
1088 | return NULL; |
1089 | |
1090 | page = alloc_page(GFP_ATOMIC); |
1091 | if (!page) |
1092 | return NULL; |
1093 | |
1094 | memcpy(page_address(page) + page_off, page_address(p) + offset, *len); |
1095 | page_off += *len; |
1096 | |
1097 | while (--*num_buf) { |
1098 | unsigned int buflen; |
1099 | void *buf; |
1100 | int off; |
1101 | |
1102 | buf = virtnet_rq_get_buf(rq, len: &buflen, NULL); |
1103 | if (unlikely(!buf)) |
1104 | goto err_buf; |
1105 | |
1106 | p = virt_to_head_page(x: buf); |
1107 | off = buf - page_address(p); |
1108 | |
1109 | /* guard against a misconfigured or uncooperative backend that |
1110 | * is sending packet larger than the MTU. |
1111 | */ |
1112 | if ((page_off + buflen + tailroom) > PAGE_SIZE) { |
1113 | put_page(page: p); |
1114 | goto err_buf; |
1115 | } |
1116 | |
1117 | memcpy(page_address(page) + page_off, |
1118 | page_address(p) + off, buflen); |
1119 | page_off += buflen; |
1120 | put_page(page: p); |
1121 | } |
1122 | |
1123 | /* Headroom does not contribute to packet length */ |
1124 | *len = page_off - VIRTIO_XDP_HEADROOM; |
1125 | return page; |
1126 | err_buf: |
1127 | __free_pages(page, order: 0); |
1128 | return NULL; |
1129 | } |
1130 | |
1131 | static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, |
1132 | unsigned int xdp_headroom, |
1133 | void *buf, |
1134 | unsigned int len) |
1135 | { |
1136 | unsigned int ; |
1137 | unsigned int headroom; |
1138 | unsigned int buflen; |
1139 | struct sk_buff *skb; |
1140 | |
1141 | header_offset = VIRTNET_RX_PAD + xdp_headroom; |
1142 | headroom = vi->hdr_len + header_offset; |
1143 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1144 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1145 | |
1146 | skb = virtnet_build_skb(buf, buflen, headroom, len); |
1147 | if (unlikely(!skb)) |
1148 | return NULL; |
1149 | |
1150 | buf += header_offset; |
1151 | memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); |
1152 | |
1153 | return skb; |
1154 | } |
1155 | |
1156 | static struct sk_buff *receive_small_xdp(struct net_device *dev, |
1157 | struct virtnet_info *vi, |
1158 | struct receive_queue *rq, |
1159 | struct bpf_prog *xdp_prog, |
1160 | void *buf, |
1161 | unsigned int xdp_headroom, |
1162 | unsigned int len, |
1163 | unsigned int *xdp_xmit, |
1164 | struct virtnet_rq_stats *stats) |
1165 | { |
1166 | unsigned int = VIRTNET_RX_PAD + xdp_headroom; |
1167 | unsigned int headroom = vi->hdr_len + header_offset; |
1168 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; |
1169 | struct page *page = virt_to_head_page(x: buf); |
1170 | struct page *xdp_page; |
1171 | unsigned int buflen; |
1172 | struct xdp_buff xdp; |
1173 | struct sk_buff *skb; |
1174 | unsigned int metasize = 0; |
1175 | u32 act; |
1176 | |
1177 | if (unlikely(hdr->hdr.gso_type)) |
1178 | goto err_xdp; |
1179 | |
1180 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1181 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1182 | |
1183 | if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { |
1184 | int offset = buf - page_address(page) + header_offset; |
1185 | unsigned int tlen = len + vi->hdr_len; |
1186 | int num_buf = 1; |
1187 | |
1188 | xdp_headroom = virtnet_get_headroom(vi); |
1189 | header_offset = VIRTNET_RX_PAD + xdp_headroom; |
1190 | headroom = vi->hdr_len + header_offset; |
1191 | buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + |
1192 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1193 | xdp_page = xdp_linearize_page(rq, num_buf: &num_buf, p: page, |
1194 | offset, page_off: header_offset, |
1195 | len: &tlen); |
1196 | if (!xdp_page) |
1197 | goto err_xdp; |
1198 | |
1199 | buf = page_address(xdp_page); |
1200 | put_page(page); |
1201 | page = xdp_page; |
1202 | } |
1203 | |
1204 | xdp_init_buff(xdp: &xdp, frame_sz: buflen, rxq: &rq->xdp_rxq); |
1205 | xdp_prepare_buff(xdp: &xdp, hard_start: buf + VIRTNET_RX_PAD + vi->hdr_len, |
1206 | headroom: xdp_headroom, data_len: len, meta_valid: true); |
1207 | |
1208 | act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats); |
1209 | |
1210 | switch (act) { |
1211 | case XDP_PASS: |
1212 | /* Recalculate length in case bpf program changed it */ |
1213 | len = xdp.data_end - xdp.data; |
1214 | metasize = xdp.data - xdp.data_meta; |
1215 | break; |
1216 | |
1217 | case XDP_TX: |
1218 | case XDP_REDIRECT: |
1219 | goto xdp_xmit; |
1220 | |
1221 | default: |
1222 | goto err_xdp; |
1223 | } |
1224 | |
1225 | skb = virtnet_build_skb(buf, buflen, headroom: xdp.data - buf, len); |
1226 | if (unlikely(!skb)) |
1227 | goto err; |
1228 | |
1229 | if (metasize) |
1230 | skb_metadata_set(skb, meta_len: metasize); |
1231 | |
1232 | return skb; |
1233 | |
1234 | err_xdp: |
1235 | u64_stats_inc(p: &stats->xdp_drops); |
1236 | err: |
1237 | u64_stats_inc(p: &stats->drops); |
1238 | put_page(page); |
1239 | xdp_xmit: |
1240 | return NULL; |
1241 | } |
1242 | |
1243 | static struct sk_buff *receive_small(struct net_device *dev, |
1244 | struct virtnet_info *vi, |
1245 | struct receive_queue *rq, |
1246 | void *buf, void *ctx, |
1247 | unsigned int len, |
1248 | unsigned int *xdp_xmit, |
1249 | struct virtnet_rq_stats *stats) |
1250 | { |
1251 | unsigned int xdp_headroom = (unsigned long)ctx; |
1252 | struct page *page = virt_to_head_page(x: buf); |
1253 | struct sk_buff *skb; |
1254 | |
1255 | len -= vi->hdr_len; |
1256 | u64_stats_add(p: &stats->bytes, val: len); |
1257 | |
1258 | if (unlikely(len > GOOD_PACKET_LEN)) { |
1259 | pr_debug("%s: rx error: len %u exceeds max size %d\n" , |
1260 | dev->name, len, GOOD_PACKET_LEN); |
1261 | DEV_STATS_INC(dev, rx_length_errors); |
1262 | goto err; |
1263 | } |
1264 | |
1265 | if (unlikely(vi->xdp_enabled)) { |
1266 | struct bpf_prog *xdp_prog; |
1267 | |
1268 | rcu_read_lock(); |
1269 | xdp_prog = rcu_dereference(rq->xdp_prog); |
1270 | if (xdp_prog) { |
1271 | skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, |
1272 | xdp_headroom, len, xdp_xmit, |
1273 | stats); |
1274 | rcu_read_unlock(); |
1275 | return skb; |
1276 | } |
1277 | rcu_read_unlock(); |
1278 | } |
1279 | |
1280 | skb = receive_small_build_skb(vi, xdp_headroom, buf, len); |
1281 | if (likely(skb)) |
1282 | return skb; |
1283 | |
1284 | err: |
1285 | u64_stats_inc(p: &stats->drops); |
1286 | put_page(page); |
1287 | return NULL; |
1288 | } |
1289 | |
1290 | static struct sk_buff *receive_big(struct net_device *dev, |
1291 | struct virtnet_info *vi, |
1292 | struct receive_queue *rq, |
1293 | void *buf, |
1294 | unsigned int len, |
1295 | struct virtnet_rq_stats *stats) |
1296 | { |
1297 | struct page *page = buf; |
1298 | struct sk_buff *skb = |
1299 | page_to_skb(vi, rq, page, offset: 0, len, PAGE_SIZE, headroom: 0); |
1300 | |
1301 | u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len); |
1302 | if (unlikely(!skb)) |
1303 | goto err; |
1304 | |
1305 | return skb; |
1306 | |
1307 | err: |
1308 | u64_stats_inc(p: &stats->drops); |
1309 | give_pages(rq, page); |
1310 | return NULL; |
1311 | } |
1312 | |
1313 | static void mergeable_buf_free(struct receive_queue *rq, int num_buf, |
1314 | struct net_device *dev, |
1315 | struct virtnet_rq_stats *stats) |
1316 | { |
1317 | struct page *page; |
1318 | void *buf; |
1319 | int len; |
1320 | |
1321 | while (num_buf-- > 1) { |
1322 | buf = virtnet_rq_get_buf(rq, len: &len, NULL); |
1323 | if (unlikely(!buf)) { |
1324 | pr_debug("%s: rx error: %d buffers missing\n" , |
1325 | dev->name, num_buf); |
1326 | DEV_STATS_INC(dev, rx_length_errors); |
1327 | break; |
1328 | } |
1329 | u64_stats_add(p: &stats->bytes, val: len); |
1330 | page = virt_to_head_page(x: buf); |
1331 | put_page(page); |
1332 | } |
1333 | } |
1334 | |
1335 | /* Why not use xdp_build_skb_from_frame() ? |
1336 | * XDP core assumes that xdp frags are PAGE_SIZE in length, while in |
1337 | * virtio-net there are 2 points that do not match its requirements: |
1338 | * 1. The size of the prefilled buffer is not fixed before xdp is set. |
1339 | * 2. xdp_build_skb_from_frame() does more checks that we don't need, |
1340 | * like eth_type_trans() (which virtio-net does in receive_buf()). |
1341 | */ |
1342 | static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, |
1343 | struct virtnet_info *vi, |
1344 | struct xdp_buff *xdp, |
1345 | unsigned int xdp_frags_truesz) |
1346 | { |
1347 | struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); |
1348 | unsigned int headroom, data_len; |
1349 | struct sk_buff *skb; |
1350 | int metasize; |
1351 | u8 nr_frags; |
1352 | |
1353 | if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { |
1354 | pr_debug("Error building skb as missing reserved tailroom for xdp" ); |
1355 | return NULL; |
1356 | } |
1357 | |
1358 | if (unlikely(xdp_buff_has_frags(xdp))) |
1359 | nr_frags = sinfo->nr_frags; |
1360 | |
1361 | skb = build_skb(data: xdp->data_hard_start, frag_size: xdp->frame_sz); |
1362 | if (unlikely(!skb)) |
1363 | return NULL; |
1364 | |
1365 | headroom = xdp->data - xdp->data_hard_start; |
1366 | data_len = xdp->data_end - xdp->data; |
1367 | skb_reserve(skb, len: headroom); |
1368 | __skb_put(skb, len: data_len); |
1369 | |
1370 | metasize = xdp->data - xdp->data_meta; |
1371 | metasize = metasize > 0 ? metasize : 0; |
1372 | if (metasize) |
1373 | skb_metadata_set(skb, meta_len: metasize); |
1374 | |
1375 | if (unlikely(xdp_buff_has_frags(xdp))) |
1376 | xdp_update_skb_shared_info(skb, nr_frags, |
1377 | size: sinfo->xdp_frags_size, |
1378 | truesize: xdp_frags_truesz, |
1379 | pfmemalloc: xdp_buff_is_frag_pfmemalloc(xdp)); |
1380 | |
1381 | return skb; |
1382 | } |
1383 | |
1384 | /* TODO: build xdp in big mode */ |
1385 | static int virtnet_build_xdp_buff_mrg(struct net_device *dev, |
1386 | struct virtnet_info *vi, |
1387 | struct receive_queue *rq, |
1388 | struct xdp_buff *xdp, |
1389 | void *buf, |
1390 | unsigned int len, |
1391 | unsigned int frame_sz, |
1392 | int *num_buf, |
1393 | unsigned int *xdp_frags_truesize, |
1394 | struct virtnet_rq_stats *stats) |
1395 | { |
1396 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1397 | unsigned int headroom, tailroom, room; |
1398 | unsigned int truesize, cur_frag_size; |
1399 | struct skb_shared_info *shinfo; |
1400 | unsigned int xdp_frags_truesz = 0; |
1401 | struct page *page; |
1402 | skb_frag_t *frag; |
1403 | int offset; |
1404 | void *ctx; |
1405 | |
1406 | xdp_init_buff(xdp, frame_sz, rxq: &rq->xdp_rxq); |
1407 | xdp_prepare_buff(xdp, hard_start: buf - VIRTIO_XDP_HEADROOM, |
1408 | VIRTIO_XDP_HEADROOM + vi->hdr_len, data_len: len - vi->hdr_len, meta_valid: true); |
1409 | |
1410 | if (!*num_buf) |
1411 | return 0; |
1412 | |
1413 | if (*num_buf > 1) { |
1414 | /* If we want to build multi-buffer xdp, we need |
1415 | * to specify that the flags of xdp_buff have the |
1416 | * XDP_FLAGS_HAS_FRAG bit. |
1417 | */ |
1418 | if (!xdp_buff_has_frags(xdp)) |
1419 | xdp_buff_set_frags_flag(xdp); |
1420 | |
1421 | shinfo = xdp_get_shared_info_from_buff(xdp); |
1422 | shinfo->nr_frags = 0; |
1423 | shinfo->xdp_frags_size = 0; |
1424 | } |
1425 | |
1426 | if (*num_buf > MAX_SKB_FRAGS + 1) |
1427 | return -EINVAL; |
1428 | |
1429 | while (--*num_buf > 0) { |
1430 | buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx); |
1431 | if (unlikely(!buf)) { |
1432 | pr_debug("%s: rx error: %d buffers out of %d missing\n" , |
1433 | dev->name, *num_buf, |
1434 | virtio16_to_cpu(vi->vdev, hdr->num_buffers)); |
1435 | DEV_STATS_INC(dev, rx_length_errors); |
1436 | goto err; |
1437 | } |
1438 | |
1439 | u64_stats_add(p: &stats->bytes, val: len); |
1440 | page = virt_to_head_page(x: buf); |
1441 | offset = buf - page_address(page); |
1442 | |
1443 | truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1444 | headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1445 | tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1446 | room = SKB_DATA_ALIGN(headroom + tailroom); |
1447 | |
1448 | cur_frag_size = truesize; |
1449 | xdp_frags_truesz += cur_frag_size; |
1450 | if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { |
1451 | put_page(page); |
1452 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n" , |
1453 | dev->name, len, (unsigned long)(truesize - room)); |
1454 | DEV_STATS_INC(dev, rx_length_errors); |
1455 | goto err; |
1456 | } |
1457 | |
1458 | frag = &shinfo->frags[shinfo->nr_frags++]; |
1459 | skb_frag_fill_page_desc(frag, page, off: offset, size: len); |
1460 | if (page_is_pfmemalloc(page)) |
1461 | xdp_buff_set_frag_pfmemalloc(xdp); |
1462 | |
1463 | shinfo->xdp_frags_size += len; |
1464 | } |
1465 | |
1466 | *xdp_frags_truesize = xdp_frags_truesz; |
1467 | return 0; |
1468 | |
1469 | err: |
1470 | put_xdp_frags(xdp); |
1471 | return -EINVAL; |
1472 | } |
1473 | |
1474 | static void *mergeable_xdp_get_buf(struct virtnet_info *vi, |
1475 | struct receive_queue *rq, |
1476 | struct bpf_prog *xdp_prog, |
1477 | void *ctx, |
1478 | unsigned int *frame_sz, |
1479 | int *num_buf, |
1480 | struct page **page, |
1481 | int offset, |
1482 | unsigned int *len, |
1483 | struct virtio_net_hdr_mrg_rxbuf *hdr) |
1484 | { |
1485 | unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1486 | unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1487 | struct page *xdp_page; |
1488 | unsigned int xdp_room; |
1489 | |
1490 | /* Transient failure which in theory could occur if |
1491 | * in-flight packets from before XDP was enabled reach |
1492 | * the receive path after XDP is loaded. |
1493 | */ |
1494 | if (unlikely(hdr->hdr.gso_type)) |
1495 | return NULL; |
1496 | |
1497 | /* Now XDP core assumes frag size is PAGE_SIZE, but buffers |
1498 | * with headroom may add hole in truesize, which |
1499 | * make their length exceed PAGE_SIZE. So we disabled the |
1500 | * hole mechanism for xdp. See add_recvbuf_mergeable(). |
1501 | */ |
1502 | *frame_sz = truesize; |
1503 | |
1504 | if (likely(headroom >= virtnet_get_headroom(vi) && |
1505 | (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { |
1506 | return page_address(*page) + offset; |
1507 | } |
1508 | |
1509 | /* This happens when headroom is not enough because |
1510 | * of the buffer was prefilled before XDP is set. |
1511 | * This should only happen for the first several packets. |
1512 | * In fact, vq reset can be used here to help us clean up |
1513 | * the prefilled buffers, but many existing devices do not |
1514 | * support it, and we don't want to bother users who are |
1515 | * using xdp normally. |
1516 | */ |
1517 | if (!xdp_prog->aux->xdp_has_frags) { |
1518 | /* linearize data for XDP */ |
1519 | xdp_page = xdp_linearize_page(rq, num_buf, |
1520 | p: *page, offset, |
1521 | VIRTIO_XDP_HEADROOM, |
1522 | len); |
1523 | if (!xdp_page) |
1524 | return NULL; |
1525 | } else { |
1526 | xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + |
1527 | sizeof(struct skb_shared_info)); |
1528 | if (*len + xdp_room > PAGE_SIZE) |
1529 | return NULL; |
1530 | |
1531 | xdp_page = alloc_page(GFP_ATOMIC); |
1532 | if (!xdp_page) |
1533 | return NULL; |
1534 | |
1535 | memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, |
1536 | page_address(*page) + offset, *len); |
1537 | } |
1538 | |
1539 | *frame_sz = PAGE_SIZE; |
1540 | |
1541 | put_page(page: *page); |
1542 | |
1543 | *page = xdp_page; |
1544 | |
1545 | return page_address(*page) + VIRTIO_XDP_HEADROOM; |
1546 | } |
1547 | |
1548 | static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, |
1549 | struct virtnet_info *vi, |
1550 | struct receive_queue *rq, |
1551 | struct bpf_prog *xdp_prog, |
1552 | void *buf, |
1553 | void *ctx, |
1554 | unsigned int len, |
1555 | unsigned int *xdp_xmit, |
1556 | struct virtnet_rq_stats *stats) |
1557 | { |
1558 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1559 | int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers); |
1560 | struct page *page = virt_to_head_page(x: buf); |
1561 | int offset = buf - page_address(page); |
1562 | unsigned int xdp_frags_truesz = 0; |
1563 | struct sk_buff *head_skb; |
1564 | unsigned int frame_sz; |
1565 | struct xdp_buff xdp; |
1566 | void *data; |
1567 | u32 act; |
1568 | int err; |
1569 | |
1570 | data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, frame_sz: &frame_sz, num_buf: &num_buf, page: &page, |
1571 | offset, len: &len, hdr); |
1572 | if (unlikely(!data)) |
1573 | goto err_xdp; |
1574 | |
1575 | err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp: &xdp, buf: data, len, frame_sz, |
1576 | num_buf: &num_buf, xdp_frags_truesize: &xdp_frags_truesz, stats); |
1577 | if (unlikely(err)) |
1578 | goto err_xdp; |
1579 | |
1580 | act = virtnet_xdp_handler(xdp_prog, xdp: &xdp, dev, xdp_xmit, stats); |
1581 | |
1582 | switch (act) { |
1583 | case XDP_PASS: |
1584 | head_skb = build_skb_from_xdp_buff(dev, vi, xdp: &xdp, xdp_frags_truesz); |
1585 | if (unlikely(!head_skb)) |
1586 | break; |
1587 | return head_skb; |
1588 | |
1589 | case XDP_TX: |
1590 | case XDP_REDIRECT: |
1591 | return NULL; |
1592 | |
1593 | default: |
1594 | break; |
1595 | } |
1596 | |
1597 | put_xdp_frags(xdp: &xdp); |
1598 | |
1599 | err_xdp: |
1600 | put_page(page); |
1601 | mergeable_buf_free(rq, num_buf, dev, stats); |
1602 | |
1603 | u64_stats_inc(p: &stats->xdp_drops); |
1604 | u64_stats_inc(p: &stats->drops); |
1605 | return NULL; |
1606 | } |
1607 | |
1608 | static struct sk_buff *receive_mergeable(struct net_device *dev, |
1609 | struct virtnet_info *vi, |
1610 | struct receive_queue *rq, |
1611 | void *buf, |
1612 | void *ctx, |
1613 | unsigned int len, |
1614 | unsigned int *xdp_xmit, |
1615 | struct virtnet_rq_stats *stats) |
1616 | { |
1617 | struct virtio_net_hdr_mrg_rxbuf *hdr = buf; |
1618 | int num_buf = virtio16_to_cpu(vdev: vi->vdev, val: hdr->num_buffers); |
1619 | struct page *page = virt_to_head_page(x: buf); |
1620 | int offset = buf - page_address(page); |
1621 | struct sk_buff *head_skb, *curr_skb; |
1622 | unsigned int truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1623 | unsigned int headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1624 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1625 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); |
1626 | |
1627 | head_skb = NULL; |
1628 | u64_stats_add(p: &stats->bytes, val: len - vi->hdr_len); |
1629 | |
1630 | if (unlikely(len > truesize - room)) { |
1631 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n" , |
1632 | dev->name, len, (unsigned long)(truesize - room)); |
1633 | DEV_STATS_INC(dev, rx_length_errors); |
1634 | goto err_skb; |
1635 | } |
1636 | |
1637 | if (unlikely(vi->xdp_enabled)) { |
1638 | struct bpf_prog *xdp_prog; |
1639 | |
1640 | rcu_read_lock(); |
1641 | xdp_prog = rcu_dereference(rq->xdp_prog); |
1642 | if (xdp_prog) { |
1643 | head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, |
1644 | len, xdp_xmit, stats); |
1645 | rcu_read_unlock(); |
1646 | return head_skb; |
1647 | } |
1648 | rcu_read_unlock(); |
1649 | } |
1650 | |
1651 | head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); |
1652 | curr_skb = head_skb; |
1653 | |
1654 | if (unlikely(!curr_skb)) |
1655 | goto err_skb; |
1656 | while (--num_buf) { |
1657 | int num_skb_frags; |
1658 | |
1659 | buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx); |
1660 | if (unlikely(!buf)) { |
1661 | pr_debug("%s: rx error: %d buffers out of %d missing\n" , |
1662 | dev->name, num_buf, |
1663 | virtio16_to_cpu(vi->vdev, |
1664 | hdr->num_buffers)); |
1665 | DEV_STATS_INC(dev, rx_length_errors); |
1666 | goto err_buf; |
1667 | } |
1668 | |
1669 | u64_stats_add(p: &stats->bytes, val: len); |
1670 | page = virt_to_head_page(x: buf); |
1671 | |
1672 | truesize = mergeable_ctx_to_truesize(mrg_ctx: ctx); |
1673 | headroom = mergeable_ctx_to_headroom(mrg_ctx: ctx); |
1674 | tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1675 | room = SKB_DATA_ALIGN(headroom + tailroom); |
1676 | if (unlikely(len > truesize - room)) { |
1677 | pr_debug("%s: rx error: len %u exceeds truesize %lu\n" , |
1678 | dev->name, len, (unsigned long)(truesize - room)); |
1679 | DEV_STATS_INC(dev, rx_length_errors); |
1680 | goto err_skb; |
1681 | } |
1682 | |
1683 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; |
1684 | if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { |
1685 | struct sk_buff *nskb = alloc_skb(size: 0, GFP_ATOMIC); |
1686 | |
1687 | if (unlikely(!nskb)) |
1688 | goto err_skb; |
1689 | if (curr_skb == head_skb) |
1690 | skb_shinfo(curr_skb)->frag_list = nskb; |
1691 | else |
1692 | curr_skb->next = nskb; |
1693 | curr_skb = nskb; |
1694 | head_skb->truesize += nskb->truesize; |
1695 | num_skb_frags = 0; |
1696 | } |
1697 | if (curr_skb != head_skb) { |
1698 | head_skb->data_len += len; |
1699 | head_skb->len += len; |
1700 | head_skb->truesize += truesize; |
1701 | } |
1702 | offset = buf - page_address(page); |
1703 | if (skb_can_coalesce(skb: curr_skb, i: num_skb_frags, page, off: offset)) { |
1704 | put_page(page); |
1705 | skb_coalesce_rx_frag(skb: curr_skb, i: num_skb_frags - 1, |
1706 | size: len, truesize); |
1707 | } else { |
1708 | skb_add_rx_frag(skb: curr_skb, i: num_skb_frags, page, |
1709 | off: offset, size: len, truesize); |
1710 | } |
1711 | } |
1712 | |
1713 | ewma_pkt_len_add(e: &rq->mrg_avg_pkt_len, val: head_skb->len); |
1714 | return head_skb; |
1715 | |
1716 | err_skb: |
1717 | put_page(page); |
1718 | mergeable_buf_free(rq, num_buf, dev, stats); |
1719 | |
1720 | err_buf: |
1721 | u64_stats_inc(p: &stats->drops); |
1722 | dev_kfree_skb(head_skb); |
1723 | return NULL; |
1724 | } |
1725 | |
1726 | static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, |
1727 | struct sk_buff *skb) |
1728 | { |
1729 | enum pkt_hash_types ; |
1730 | |
1731 | if (!hdr_hash || !skb) |
1732 | return; |
1733 | |
1734 | switch (__le16_to_cpu(hdr_hash->hash_report)) { |
1735 | case VIRTIO_NET_HASH_REPORT_TCPv4: |
1736 | case VIRTIO_NET_HASH_REPORT_UDPv4: |
1737 | case VIRTIO_NET_HASH_REPORT_TCPv6: |
1738 | case VIRTIO_NET_HASH_REPORT_UDPv6: |
1739 | case VIRTIO_NET_HASH_REPORT_TCPv6_EX: |
1740 | case VIRTIO_NET_HASH_REPORT_UDPv6_EX: |
1741 | rss_hash_type = PKT_HASH_TYPE_L4; |
1742 | break; |
1743 | case VIRTIO_NET_HASH_REPORT_IPv4: |
1744 | case VIRTIO_NET_HASH_REPORT_IPv6: |
1745 | case VIRTIO_NET_HASH_REPORT_IPv6_EX: |
1746 | rss_hash_type = PKT_HASH_TYPE_L3; |
1747 | break; |
1748 | case VIRTIO_NET_HASH_REPORT_NONE: |
1749 | default: |
1750 | rss_hash_type = PKT_HASH_TYPE_NONE; |
1751 | } |
1752 | skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), type: rss_hash_type); |
1753 | } |
1754 | |
1755 | static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, |
1756 | void *buf, unsigned int len, void **ctx, |
1757 | unsigned int *xdp_xmit, |
1758 | struct virtnet_rq_stats *stats) |
1759 | { |
1760 | struct net_device *dev = vi->dev; |
1761 | struct sk_buff *skb; |
1762 | struct virtio_net_common_hdr *hdr; |
1763 | |
1764 | if (unlikely(len < vi->hdr_len + ETH_HLEN)) { |
1765 | pr_debug("%s: short packet %i\n" , dev->name, len); |
1766 | DEV_STATS_INC(dev, rx_length_errors); |
1767 | virtnet_rq_free_unused_buf(vq: rq->vq, buf); |
1768 | return; |
1769 | } |
1770 | |
1771 | if (vi->mergeable_rx_bufs) |
1772 | skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, |
1773 | stats); |
1774 | else if (vi->big_packets) |
1775 | skb = receive_big(dev, vi, rq, buf, len, stats); |
1776 | else |
1777 | skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); |
1778 | |
1779 | if (unlikely(!skb)) |
1780 | return; |
1781 | |
1782 | hdr = skb_vnet_common_hdr(skb); |
1783 | if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) |
1784 | virtio_skb_set_hash(hdr_hash: &hdr->hash_v1_hdr, skb); |
1785 | |
1786 | if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) |
1787 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1788 | |
1789 | if (virtio_net_hdr_to_skb(skb, hdr: &hdr->hdr, |
1790 | little_endian: virtio_is_little_endian(vdev: vi->vdev))) { |
1791 | net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n" , |
1792 | dev->name, hdr->hdr.gso_type, |
1793 | hdr->hdr.gso_size); |
1794 | goto frame_err; |
1795 | } |
1796 | |
1797 | skb_record_rx_queue(skb, rx_queue: vq2rxq(vq: rq->vq)); |
1798 | skb->protocol = eth_type_trans(skb, dev); |
1799 | pr_debug("Receiving skb proto 0x%04x len %i type %i\n" , |
1800 | ntohs(skb->protocol), skb->len, skb->pkt_type); |
1801 | |
1802 | napi_gro_receive(napi: &rq->napi, skb); |
1803 | return; |
1804 | |
1805 | frame_err: |
1806 | DEV_STATS_INC(dev, rx_frame_errors); |
1807 | dev_kfree_skb(skb); |
1808 | } |
1809 | |
1810 | /* Unlike mergeable buffers, all buffers are allocated to the |
1811 | * same size, except for the headroom. For this reason we do |
1812 | * not need to use mergeable_len_to_ctx here - it is enough |
1813 | * to store the headroom as the context ignoring the truesize. |
1814 | */ |
1815 | static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, |
1816 | gfp_t gfp) |
1817 | { |
1818 | char *buf; |
1819 | unsigned int xdp_headroom = virtnet_get_headroom(vi); |
1820 | void *ctx = (void *)(unsigned long)xdp_headroom; |
1821 | int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; |
1822 | int err; |
1823 | |
1824 | len = SKB_DATA_ALIGN(len) + |
1825 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1826 | |
1827 | buf = virtnet_rq_alloc(rq, size: len, gfp); |
1828 | if (unlikely(!buf)) |
1829 | return -ENOMEM; |
1830 | |
1831 | virtnet_rq_init_one_sg(rq, buf: buf + VIRTNET_RX_PAD + xdp_headroom, |
1832 | len: vi->hdr_len + GOOD_PACKET_LEN); |
1833 | |
1834 | err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp); |
1835 | if (err < 0) { |
1836 | if (rq->do_dma) |
1837 | virtnet_rq_unmap(rq, buf, len: 0); |
1838 | put_page(page: virt_to_head_page(x: buf)); |
1839 | } |
1840 | |
1841 | return err; |
1842 | } |
1843 | |
1844 | static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, |
1845 | gfp_t gfp) |
1846 | { |
1847 | struct page *first, *list = NULL; |
1848 | char *p; |
1849 | int i, err, offset; |
1850 | |
1851 | sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); |
1852 | |
1853 | /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ |
1854 | for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { |
1855 | first = get_a_page(rq, gfp_mask: gfp); |
1856 | if (!first) { |
1857 | if (list) |
1858 | give_pages(rq, page: list); |
1859 | return -ENOMEM; |
1860 | } |
1861 | sg_set_buf(sg: &rq->sg[i], page_address(first), PAGE_SIZE); |
1862 | |
1863 | /* chain new page in list head to match sg */ |
1864 | first->private = (unsigned long)list; |
1865 | list = first; |
1866 | } |
1867 | |
1868 | first = get_a_page(rq, gfp_mask: gfp); |
1869 | if (!first) { |
1870 | give_pages(rq, page: list); |
1871 | return -ENOMEM; |
1872 | } |
1873 | p = page_address(first); |
1874 | |
1875 | /* rq->sg[0], rq->sg[1] share the same page */ |
1876 | /* a separated rq->sg[0] for header - required in case !any_header_sg */ |
1877 | sg_set_buf(sg: &rq->sg[0], buf: p, buflen: vi->hdr_len); |
1878 | |
1879 | /* rq->sg[1] for data packet, from offset */ |
1880 | offset = sizeof(struct padded_vnet_hdr); |
1881 | sg_set_buf(sg: &rq->sg[1], buf: p + offset, PAGE_SIZE - offset); |
1882 | |
1883 | /* chain first in list head */ |
1884 | first->private = (unsigned long)list; |
1885 | err = virtqueue_add_inbuf(vq: rq->vq, sg: rq->sg, num: vi->big_packets_num_skbfrags + 2, |
1886 | data: first, gfp); |
1887 | if (err < 0) |
1888 | give_pages(rq, page: first); |
1889 | |
1890 | return err; |
1891 | } |
1892 | |
1893 | static unsigned int get_mergeable_buf_len(struct receive_queue *rq, |
1894 | struct ewma_pkt_len *avg_pkt_len, |
1895 | unsigned int room) |
1896 | { |
1897 | struct virtnet_info *vi = rq->vq->vdev->priv; |
1898 | const size_t hdr_len = vi->hdr_len; |
1899 | unsigned int len; |
1900 | |
1901 | if (room) |
1902 | return PAGE_SIZE - room; |
1903 | |
1904 | len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), |
1905 | rq->min_buf_len, PAGE_SIZE - hdr_len); |
1906 | |
1907 | return ALIGN(len, L1_CACHE_BYTES); |
1908 | } |
1909 | |
1910 | static int add_recvbuf_mergeable(struct virtnet_info *vi, |
1911 | struct receive_queue *rq, gfp_t gfp) |
1912 | { |
1913 | struct page_frag *alloc_frag = &rq->alloc_frag; |
1914 | unsigned int headroom = virtnet_get_headroom(vi); |
1915 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
1916 | unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); |
1917 | unsigned int len, hole; |
1918 | void *ctx; |
1919 | char *buf; |
1920 | int err; |
1921 | |
1922 | /* Extra tailroom is needed to satisfy XDP's assumption. This |
1923 | * means rx frags coalescing won't work, but consider we've |
1924 | * disabled GSO for XDP, it won't be a big issue. |
1925 | */ |
1926 | len = get_mergeable_buf_len(rq, avg_pkt_len: &rq->mrg_avg_pkt_len, room); |
1927 | |
1928 | buf = virtnet_rq_alloc(rq, size: len + room, gfp); |
1929 | if (unlikely(!buf)) |
1930 | return -ENOMEM; |
1931 | |
1932 | buf += headroom; /* advance address leaving hole at front of pkt */ |
1933 | hole = alloc_frag->size - alloc_frag->offset; |
1934 | if (hole < len + room) { |
1935 | /* To avoid internal fragmentation, if there is very likely not |
1936 | * enough space for another buffer, add the remaining space to |
1937 | * the current buffer. |
1938 | * XDP core assumes that frame_size of xdp_buff and the length |
1939 | * of the frag are PAGE_SIZE, so we disable the hole mechanism. |
1940 | */ |
1941 | if (!headroom) |
1942 | len += hole; |
1943 | alloc_frag->offset += hole; |
1944 | } |
1945 | |
1946 | virtnet_rq_init_one_sg(rq, buf, len); |
1947 | |
1948 | ctx = mergeable_len_to_ctx(truesize: len + room, headroom); |
1949 | err = virtqueue_add_inbuf_ctx(vq: rq->vq, sg: rq->sg, num: 1, data: buf, ctx, gfp); |
1950 | if (err < 0) { |
1951 | if (rq->do_dma) |
1952 | virtnet_rq_unmap(rq, buf, len: 0); |
1953 | put_page(page: virt_to_head_page(x: buf)); |
1954 | } |
1955 | |
1956 | return err; |
1957 | } |
1958 | |
1959 | /* |
1960 | * Returns false if we couldn't fill entirely (OOM). |
1961 | * |
1962 | * Normally run in the receive path, but can also be run from ndo_open |
1963 | * before we're receiving packets, or from refill_work which is |
1964 | * careful to disable receiving (using napi_disable). |
1965 | */ |
1966 | static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, |
1967 | gfp_t gfp) |
1968 | { |
1969 | int err; |
1970 | bool oom; |
1971 | |
1972 | do { |
1973 | if (vi->mergeable_rx_bufs) |
1974 | err = add_recvbuf_mergeable(vi, rq, gfp); |
1975 | else if (vi->big_packets) |
1976 | err = add_recvbuf_big(vi, rq, gfp); |
1977 | else |
1978 | err = add_recvbuf_small(vi, rq, gfp); |
1979 | |
1980 | oom = err == -ENOMEM; |
1981 | if (err) |
1982 | break; |
1983 | } while (rq->vq->num_free); |
1984 | if (virtqueue_kick_prepare(vq: rq->vq) && virtqueue_notify(vq: rq->vq)) { |
1985 | unsigned long flags; |
1986 | |
1987 | flags = u64_stats_update_begin_irqsave(syncp: &rq->stats.syncp); |
1988 | u64_stats_inc(p: &rq->stats.kicks); |
1989 | u64_stats_update_end_irqrestore(syncp: &rq->stats.syncp, flags); |
1990 | } |
1991 | |
1992 | return !oom; |
1993 | } |
1994 | |
1995 | static void skb_recv_done(struct virtqueue *rvq) |
1996 | { |
1997 | struct virtnet_info *vi = rvq->vdev->priv; |
1998 | struct receive_queue *rq = &vi->rq[vq2rxq(vq: rvq)]; |
1999 | |
2000 | virtqueue_napi_schedule(napi: &rq->napi, vq: rvq); |
2001 | } |
2002 | |
2003 | static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) |
2004 | { |
2005 | napi_enable(n: napi); |
2006 | |
2007 | /* If all buffers were filled by other side before we napi_enabled, we |
2008 | * won't get another interrupt, so process any outstanding packets now. |
2009 | * Call local_bh_enable after to trigger softIRQ processing. |
2010 | */ |
2011 | local_bh_disable(); |
2012 | virtqueue_napi_schedule(napi, vq); |
2013 | local_bh_enable(); |
2014 | } |
2015 | |
2016 | static void virtnet_napi_tx_enable(struct virtnet_info *vi, |
2017 | struct virtqueue *vq, |
2018 | struct napi_struct *napi) |
2019 | { |
2020 | if (!napi->weight) |
2021 | return; |
2022 | |
2023 | /* Tx napi touches cachelines on the cpu handling tx interrupts. Only |
2024 | * enable the feature if this is likely affine with the transmit path. |
2025 | */ |
2026 | if (!vi->affinity_hint_set) { |
2027 | napi->weight = 0; |
2028 | return; |
2029 | } |
2030 | |
2031 | return virtnet_napi_enable(vq, napi); |
2032 | } |
2033 | |
2034 | static void virtnet_napi_tx_disable(struct napi_struct *napi) |
2035 | { |
2036 | if (napi->weight) |
2037 | napi_disable(n: napi); |
2038 | } |
2039 | |
2040 | static void refill_work(struct work_struct *work) |
2041 | { |
2042 | struct virtnet_info *vi = |
2043 | container_of(work, struct virtnet_info, refill.work); |
2044 | bool still_empty; |
2045 | int i; |
2046 | |
2047 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
2048 | struct receive_queue *rq = &vi->rq[i]; |
2049 | |
2050 | napi_disable(n: &rq->napi); |
2051 | still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); |
2052 | virtnet_napi_enable(vq: rq->vq, napi: &rq->napi); |
2053 | |
2054 | /* In theory, this can happen: if we don't get any buffers in |
2055 | * we will *never* try to fill again. |
2056 | */ |
2057 | if (still_empty) |
2058 | schedule_delayed_work(dwork: &vi->refill, HZ/2); |
2059 | } |
2060 | } |
2061 | |
2062 | static int virtnet_receive(struct receive_queue *rq, int budget, |
2063 | unsigned int *xdp_xmit) |
2064 | { |
2065 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2066 | struct virtnet_rq_stats stats = {}; |
2067 | unsigned int len; |
2068 | int packets = 0; |
2069 | void *buf; |
2070 | int i; |
2071 | |
2072 | if (!vi->big_packets || vi->mergeable_rx_bufs) { |
2073 | void *ctx; |
2074 | |
2075 | while (packets < budget && |
2076 | (buf = virtnet_rq_get_buf(rq, len: &len, ctx: &ctx))) { |
2077 | receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats: &stats); |
2078 | packets++; |
2079 | } |
2080 | } else { |
2081 | while (packets < budget && |
2082 | (buf = virtnet_rq_get_buf(rq, len: &len, NULL)) != NULL) { |
2083 | receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats: &stats); |
2084 | packets++; |
2085 | } |
2086 | } |
2087 | |
2088 | if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { |
2089 | if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { |
2090 | spin_lock(lock: &vi->refill_lock); |
2091 | if (vi->refill_enabled) |
2092 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2093 | spin_unlock(lock: &vi->refill_lock); |
2094 | } |
2095 | } |
2096 | |
2097 | u64_stats_set(p: &stats.packets, val: packets); |
2098 | u64_stats_update_begin(syncp: &rq->stats.syncp); |
2099 | for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { |
2100 | size_t offset = virtnet_rq_stats_desc[i].offset; |
2101 | u64_stats_t *item, *src; |
2102 | |
2103 | item = (u64_stats_t *)((u8 *)&rq->stats + offset); |
2104 | src = (u64_stats_t *)((u8 *)&stats + offset); |
2105 | u64_stats_add(p: item, val: u64_stats_read(p: src)); |
2106 | } |
2107 | u64_stats_update_end(syncp: &rq->stats.syncp); |
2108 | |
2109 | return packets; |
2110 | } |
2111 | |
2112 | static void virtnet_poll_cleantx(struct receive_queue *rq) |
2113 | { |
2114 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2115 | unsigned int index = vq2rxq(vq: rq->vq); |
2116 | struct send_queue *sq = &vi->sq[index]; |
2117 | struct netdev_queue *txq = netdev_get_tx_queue(dev: vi->dev, index); |
2118 | |
2119 | if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, q: index)) |
2120 | return; |
2121 | |
2122 | if (__netif_tx_trylock(txq)) { |
2123 | if (sq->reset) { |
2124 | __netif_tx_unlock(txq); |
2125 | return; |
2126 | } |
2127 | |
2128 | do { |
2129 | virtqueue_disable_cb(vq: sq->vq); |
2130 | free_old_xmit_skbs(sq, in_napi: true); |
2131 | } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); |
2132 | |
2133 | if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) |
2134 | netif_tx_wake_queue(dev_queue: txq); |
2135 | |
2136 | __netif_tx_unlock(txq); |
2137 | } |
2138 | } |
2139 | |
2140 | static int virtnet_poll(struct napi_struct *napi, int budget) |
2141 | { |
2142 | struct receive_queue *rq = |
2143 | container_of(napi, struct receive_queue, napi); |
2144 | struct virtnet_info *vi = rq->vq->vdev->priv; |
2145 | struct send_queue *sq; |
2146 | unsigned int received; |
2147 | unsigned int xdp_xmit = 0; |
2148 | |
2149 | virtnet_poll_cleantx(rq); |
2150 | |
2151 | received = virtnet_receive(rq, budget, xdp_xmit: &xdp_xmit); |
2152 | |
2153 | if (xdp_xmit & VIRTIO_XDP_REDIR) |
2154 | xdp_do_flush(); |
2155 | |
2156 | /* Out of packets? */ |
2157 | if (received < budget) |
2158 | virtqueue_napi_complete(napi, vq: rq->vq, processed: received); |
2159 | |
2160 | if (xdp_xmit & VIRTIO_XDP_TX) { |
2161 | sq = virtnet_xdp_get_sq(vi); |
2162 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) { |
2163 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
2164 | u64_stats_inc(p: &sq->stats.kicks); |
2165 | u64_stats_update_end(syncp: &sq->stats.syncp); |
2166 | } |
2167 | virtnet_xdp_put_sq(vi, sq); |
2168 | } |
2169 | |
2170 | return received; |
2171 | } |
2172 | |
2173 | static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) |
2174 | { |
2175 | virtnet_napi_tx_disable(napi: &vi->sq[qp_index].napi); |
2176 | napi_disable(n: &vi->rq[qp_index].napi); |
2177 | xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq); |
2178 | } |
2179 | |
2180 | static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) |
2181 | { |
2182 | struct net_device *dev = vi->dev; |
2183 | int err; |
2184 | |
2185 | err = xdp_rxq_info_reg(xdp_rxq: &vi->rq[qp_index].xdp_rxq, dev, queue_index: qp_index, |
2186 | napi_id: vi->rq[qp_index].napi.napi_id); |
2187 | if (err < 0) |
2188 | return err; |
2189 | |
2190 | err = xdp_rxq_info_reg_mem_model(xdp_rxq: &vi->rq[qp_index].xdp_rxq, |
2191 | type: MEM_TYPE_PAGE_SHARED, NULL); |
2192 | if (err < 0) |
2193 | goto err_xdp_reg_mem_model; |
2194 | |
2195 | virtnet_napi_enable(vq: vi->rq[qp_index].vq, napi: &vi->rq[qp_index].napi); |
2196 | virtnet_napi_tx_enable(vi, vq: vi->sq[qp_index].vq, napi: &vi->sq[qp_index].napi); |
2197 | |
2198 | return 0; |
2199 | |
2200 | err_xdp_reg_mem_model: |
2201 | xdp_rxq_info_unreg(xdp_rxq: &vi->rq[qp_index].xdp_rxq); |
2202 | return err; |
2203 | } |
2204 | |
2205 | static int virtnet_open(struct net_device *dev) |
2206 | { |
2207 | struct virtnet_info *vi = netdev_priv(dev); |
2208 | int i, err; |
2209 | |
2210 | enable_delayed_refill(vi); |
2211 | |
2212 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2213 | if (i < vi->curr_queue_pairs) |
2214 | /* Make sure we have some buffers: if oom use wq. */ |
2215 | if (!try_fill_recv(vi, rq: &vi->rq[i], GFP_KERNEL)) |
2216 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2217 | |
2218 | err = virtnet_enable_queue_pair(vi, qp_index: i); |
2219 | if (err < 0) |
2220 | goto err_enable_qp; |
2221 | } |
2222 | |
2223 | return 0; |
2224 | |
2225 | err_enable_qp: |
2226 | disable_delayed_refill(vi); |
2227 | cancel_delayed_work_sync(dwork: &vi->refill); |
2228 | |
2229 | for (i--; i >= 0; i--) |
2230 | virtnet_disable_queue_pair(vi, qp_index: i); |
2231 | return err; |
2232 | } |
2233 | |
2234 | static int virtnet_poll_tx(struct napi_struct *napi, int budget) |
2235 | { |
2236 | struct send_queue *sq = container_of(napi, struct send_queue, napi); |
2237 | struct virtnet_info *vi = sq->vq->vdev->priv; |
2238 | unsigned int index = vq2txq(vq: sq->vq); |
2239 | struct netdev_queue *txq; |
2240 | int opaque; |
2241 | bool done; |
2242 | |
2243 | if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { |
2244 | /* We don't need to enable cb for XDP */ |
2245 | napi_complete_done(n: napi, work_done: 0); |
2246 | return 0; |
2247 | } |
2248 | |
2249 | txq = netdev_get_tx_queue(dev: vi->dev, index); |
2250 | __netif_tx_lock(txq, raw_smp_processor_id()); |
2251 | virtqueue_disable_cb(vq: sq->vq); |
2252 | free_old_xmit_skbs(sq, in_napi: true); |
2253 | |
2254 | if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) |
2255 | netif_tx_wake_queue(dev_queue: txq); |
2256 | |
2257 | opaque = virtqueue_enable_cb_prepare(vq: sq->vq); |
2258 | |
2259 | done = napi_complete_done(n: napi, work_done: 0); |
2260 | |
2261 | if (!done) |
2262 | virtqueue_disable_cb(vq: sq->vq); |
2263 | |
2264 | __netif_tx_unlock(txq); |
2265 | |
2266 | if (done) { |
2267 | if (unlikely(virtqueue_poll(sq->vq, opaque))) { |
2268 | if (napi_schedule_prep(n: napi)) { |
2269 | __netif_tx_lock(txq, raw_smp_processor_id()); |
2270 | virtqueue_disable_cb(vq: sq->vq); |
2271 | __netif_tx_unlock(txq); |
2272 | __napi_schedule(n: napi); |
2273 | } |
2274 | } |
2275 | } |
2276 | |
2277 | return 0; |
2278 | } |
2279 | |
2280 | static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) |
2281 | { |
2282 | struct virtio_net_hdr_mrg_rxbuf *hdr; |
2283 | const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; |
2284 | struct virtnet_info *vi = sq->vq->vdev->priv; |
2285 | int num_sg; |
2286 | unsigned hdr_len = vi->hdr_len; |
2287 | bool can_push; |
2288 | |
2289 | pr_debug("%s: xmit %p %pM\n" , vi->dev->name, skb, dest); |
2290 | |
2291 | can_push = vi->any_header_sg && |
2292 | !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && |
2293 | !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; |
2294 | /* Even if we can, don't push here yet as this would skew |
2295 | * csum_start offset below. */ |
2296 | if (can_push) |
2297 | hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); |
2298 | else |
2299 | hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; |
2300 | |
2301 | if (virtio_net_hdr_from_skb(skb, hdr: &hdr->hdr, |
2302 | little_endian: virtio_is_little_endian(vdev: vi->vdev), has_data_valid: false, |
2303 | vlan_hlen: 0)) |
2304 | return -EPROTO; |
2305 | |
2306 | if (vi->mergeable_rx_bufs) |
2307 | hdr->num_buffers = 0; |
2308 | |
2309 | sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); |
2310 | if (can_push) { |
2311 | __skb_push(skb, len: hdr_len); |
2312 | num_sg = skb_to_sgvec(skb, sg: sq->sg, offset: 0, len: skb->len); |
2313 | if (unlikely(num_sg < 0)) |
2314 | return num_sg; |
2315 | /* Pull header back to avoid skew in tx bytes calculations. */ |
2316 | __skb_pull(skb, len: hdr_len); |
2317 | } else { |
2318 | sg_set_buf(sg: sq->sg, buf: hdr, buflen: hdr_len); |
2319 | num_sg = skb_to_sgvec(skb, sg: sq->sg + 1, offset: 0, len: skb->len); |
2320 | if (unlikely(num_sg < 0)) |
2321 | return num_sg; |
2322 | num_sg++; |
2323 | } |
2324 | return virtqueue_add_outbuf(vq: sq->vq, sg: sq->sg, num: num_sg, data: skb, GFP_ATOMIC); |
2325 | } |
2326 | |
2327 | static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) |
2328 | { |
2329 | struct virtnet_info *vi = netdev_priv(dev); |
2330 | int qnum = skb_get_queue_mapping(skb); |
2331 | struct send_queue *sq = &vi->sq[qnum]; |
2332 | int err; |
2333 | struct netdev_queue *txq = netdev_get_tx_queue(dev, index: qnum); |
2334 | bool kick = !netdev_xmit_more(); |
2335 | bool use_napi = sq->napi.weight; |
2336 | |
2337 | /* Free up any pending old buffers before queueing new ones. */ |
2338 | do { |
2339 | if (use_napi) |
2340 | virtqueue_disable_cb(vq: sq->vq); |
2341 | |
2342 | free_old_xmit_skbs(sq, in_napi: false); |
2343 | |
2344 | } while (use_napi && kick && |
2345 | unlikely(!virtqueue_enable_cb_delayed(sq->vq))); |
2346 | |
2347 | /* timestamp packet in software */ |
2348 | skb_tx_timestamp(skb); |
2349 | |
2350 | /* Try to transmit */ |
2351 | err = xmit_skb(sq, skb); |
2352 | |
2353 | /* This should not happen! */ |
2354 | if (unlikely(err)) { |
2355 | DEV_STATS_INC(dev, tx_fifo_errors); |
2356 | if (net_ratelimit()) |
2357 | dev_warn(&dev->dev, |
2358 | "Unexpected TXQ (%d) queue failure: %d\n" , |
2359 | qnum, err); |
2360 | DEV_STATS_INC(dev, tx_dropped); |
2361 | dev_kfree_skb_any(skb); |
2362 | return NETDEV_TX_OK; |
2363 | } |
2364 | |
2365 | /* Don't wait up for transmitted skbs to be freed. */ |
2366 | if (!use_napi) { |
2367 | skb_orphan(skb); |
2368 | nf_reset_ct(skb); |
2369 | } |
2370 | |
2371 | check_sq_full_and_disable(vi, dev, sq); |
2372 | |
2373 | if (kick || netif_xmit_stopped(dev_queue: txq)) { |
2374 | if (virtqueue_kick_prepare(vq: sq->vq) && virtqueue_notify(vq: sq->vq)) { |
2375 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
2376 | u64_stats_inc(p: &sq->stats.kicks); |
2377 | u64_stats_update_end(syncp: &sq->stats.syncp); |
2378 | } |
2379 | } |
2380 | |
2381 | return NETDEV_TX_OK; |
2382 | } |
2383 | |
2384 | static int virtnet_rx_resize(struct virtnet_info *vi, |
2385 | struct receive_queue *rq, u32 ring_num) |
2386 | { |
2387 | bool running = netif_running(dev: vi->dev); |
2388 | int err, qindex; |
2389 | |
2390 | qindex = rq - vi->rq; |
2391 | |
2392 | if (running) |
2393 | napi_disable(n: &rq->napi); |
2394 | |
2395 | err = virtqueue_resize(vq: rq->vq, num: ring_num, recycle: virtnet_rq_free_unused_buf); |
2396 | if (err) |
2397 | netdev_err(dev: vi->dev, format: "resize rx fail: rx queue index: %d err: %d\n" , qindex, err); |
2398 | |
2399 | if (!try_fill_recv(vi, rq, GFP_KERNEL)) |
2400 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2401 | |
2402 | if (running) |
2403 | virtnet_napi_enable(vq: rq->vq, napi: &rq->napi); |
2404 | return err; |
2405 | } |
2406 | |
2407 | static int virtnet_tx_resize(struct virtnet_info *vi, |
2408 | struct send_queue *sq, u32 ring_num) |
2409 | { |
2410 | bool running = netif_running(dev: vi->dev); |
2411 | struct netdev_queue *txq; |
2412 | int err, qindex; |
2413 | |
2414 | qindex = sq - vi->sq; |
2415 | |
2416 | if (running) |
2417 | virtnet_napi_tx_disable(napi: &sq->napi); |
2418 | |
2419 | txq = netdev_get_tx_queue(dev: vi->dev, index: qindex); |
2420 | |
2421 | /* 1. wait all ximt complete |
2422 | * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() |
2423 | */ |
2424 | __netif_tx_lock_bh(txq); |
2425 | |
2426 | /* Prevent rx poll from accessing sq. */ |
2427 | sq->reset = true; |
2428 | |
2429 | /* Prevent the upper layer from trying to send packets. */ |
2430 | netif_stop_subqueue(dev: vi->dev, queue_index: qindex); |
2431 | |
2432 | __netif_tx_unlock_bh(txq); |
2433 | |
2434 | err = virtqueue_resize(vq: sq->vq, num: ring_num, recycle: virtnet_sq_free_unused_buf); |
2435 | if (err) |
2436 | netdev_err(dev: vi->dev, format: "resize tx fail: tx queue index: %d err: %d\n" , qindex, err); |
2437 | |
2438 | __netif_tx_lock_bh(txq); |
2439 | sq->reset = false; |
2440 | netif_tx_wake_queue(dev_queue: txq); |
2441 | __netif_tx_unlock_bh(txq); |
2442 | |
2443 | if (running) |
2444 | virtnet_napi_tx_enable(vi, vq: sq->vq, napi: &sq->napi); |
2445 | return err; |
2446 | } |
2447 | |
2448 | /* |
2449 | * Send command via the control virtqueue and check status. Commands |
2450 | * supported by the hypervisor, as indicated by feature bits, should |
2451 | * never fail unless improperly formatted. |
2452 | */ |
2453 | static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, |
2454 | struct scatterlist *out) |
2455 | { |
2456 | struct scatterlist *sgs[4], hdr, stat; |
2457 | unsigned out_num = 0, tmp; |
2458 | int ret; |
2459 | |
2460 | /* Caller should know better */ |
2461 | BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); |
2462 | |
2463 | vi->ctrl->status = ~0; |
2464 | vi->ctrl->hdr.class = class; |
2465 | vi->ctrl->hdr.cmd = cmd; |
2466 | /* Add header */ |
2467 | sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); |
2468 | sgs[out_num++] = &hdr; |
2469 | |
2470 | if (out) |
2471 | sgs[out_num++] = out; |
2472 | |
2473 | /* Add return status. */ |
2474 | sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); |
2475 | sgs[out_num] = &stat; |
2476 | |
2477 | BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); |
2478 | ret = virtqueue_add_sgs(vq: vi->cvq, sgs, out_sgs: out_num, in_sgs: 1, data: vi, GFP_ATOMIC); |
2479 | if (ret < 0) { |
2480 | dev_warn(&vi->vdev->dev, |
2481 | "Failed to add sgs for command vq: %d\n." , ret); |
2482 | return false; |
2483 | } |
2484 | |
2485 | if (unlikely(!virtqueue_kick(vi->cvq))) |
2486 | return vi->ctrl->status == VIRTIO_NET_OK; |
2487 | |
2488 | /* Spin for a response, the kick causes an ioport write, trapping |
2489 | * into the hypervisor, so the request should be handled immediately. |
2490 | */ |
2491 | while (!virtqueue_get_buf(vq: vi->cvq, len: &tmp) && |
2492 | !virtqueue_is_broken(vq: vi->cvq)) |
2493 | cpu_relax(); |
2494 | |
2495 | return vi->ctrl->status == VIRTIO_NET_OK; |
2496 | } |
2497 | |
2498 | static int virtnet_set_mac_address(struct net_device *dev, void *p) |
2499 | { |
2500 | struct virtnet_info *vi = netdev_priv(dev); |
2501 | struct virtio_device *vdev = vi->vdev; |
2502 | int ret; |
2503 | struct sockaddr *addr; |
2504 | struct scatterlist sg; |
2505 | |
2506 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY)) |
2507 | return -EOPNOTSUPP; |
2508 | |
2509 | addr = kmemdup(p, size: sizeof(*addr), GFP_KERNEL); |
2510 | if (!addr) |
2511 | return -ENOMEM; |
2512 | |
2513 | ret = eth_prepare_mac_addr_change(dev, p: addr); |
2514 | if (ret) |
2515 | goto out; |
2516 | |
2517 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { |
2518 | sg_init_one(&sg, addr->sa_data, dev->addr_len); |
2519 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
2520 | VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) { |
2521 | dev_warn(&vdev->dev, |
2522 | "Failed to set mac address by vq command.\n" ); |
2523 | ret = -EINVAL; |
2524 | goto out; |
2525 | } |
2526 | } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && |
2527 | !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { |
2528 | unsigned int i; |
2529 | |
2530 | /* Naturally, this has an atomicity problem. */ |
2531 | for (i = 0; i < dev->addr_len; i++) |
2532 | virtio_cwrite8(vdev, |
2533 | offsetof(struct virtio_net_config, mac) + |
2534 | i, val: addr->sa_data[i]); |
2535 | } |
2536 | |
2537 | eth_commit_mac_addr_change(dev, p); |
2538 | ret = 0; |
2539 | |
2540 | out: |
2541 | kfree(objp: addr); |
2542 | return ret; |
2543 | } |
2544 | |
2545 | static void virtnet_stats(struct net_device *dev, |
2546 | struct rtnl_link_stats64 *tot) |
2547 | { |
2548 | struct virtnet_info *vi = netdev_priv(dev); |
2549 | unsigned int start; |
2550 | int i; |
2551 | |
2552 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2553 | u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; |
2554 | struct receive_queue *rq = &vi->rq[i]; |
2555 | struct send_queue *sq = &vi->sq[i]; |
2556 | |
2557 | do { |
2558 | start = u64_stats_fetch_begin(syncp: &sq->stats.syncp); |
2559 | tpackets = u64_stats_read(p: &sq->stats.packets); |
2560 | tbytes = u64_stats_read(p: &sq->stats.bytes); |
2561 | terrors = u64_stats_read(p: &sq->stats.tx_timeouts); |
2562 | } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start)); |
2563 | |
2564 | do { |
2565 | start = u64_stats_fetch_begin(syncp: &rq->stats.syncp); |
2566 | rpackets = u64_stats_read(p: &rq->stats.packets); |
2567 | rbytes = u64_stats_read(p: &rq->stats.bytes); |
2568 | rdrops = u64_stats_read(p: &rq->stats.drops); |
2569 | } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start)); |
2570 | |
2571 | tot->rx_packets += rpackets; |
2572 | tot->tx_packets += tpackets; |
2573 | tot->rx_bytes += rbytes; |
2574 | tot->tx_bytes += tbytes; |
2575 | tot->rx_dropped += rdrops; |
2576 | tot->tx_errors += terrors; |
2577 | } |
2578 | |
2579 | tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); |
2580 | tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); |
2581 | tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); |
2582 | tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); |
2583 | } |
2584 | |
2585 | static void virtnet_ack_link_announce(struct virtnet_info *vi) |
2586 | { |
2587 | rtnl_lock(); |
2588 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, |
2589 | VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) |
2590 | dev_warn(&vi->dev->dev, "Failed to ack link announce.\n" ); |
2591 | rtnl_unlock(); |
2592 | } |
2593 | |
2594 | static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) |
2595 | { |
2596 | struct scatterlist sg; |
2597 | struct net_device *dev = vi->dev; |
2598 | |
2599 | if (!vi->has_cvq || !virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_MQ)) |
2600 | return 0; |
2601 | |
2602 | vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vdev: vi->vdev, val: queue_pairs); |
2603 | sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); |
2604 | |
2605 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, |
2606 | VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, out: &sg)) { |
2607 | dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n" , |
2608 | queue_pairs); |
2609 | return -EINVAL; |
2610 | } else { |
2611 | vi->curr_queue_pairs = queue_pairs; |
2612 | /* virtnet_open() will refill when device is going to up. */ |
2613 | if (dev->flags & IFF_UP) |
2614 | schedule_delayed_work(dwork: &vi->refill, delay: 0); |
2615 | } |
2616 | |
2617 | return 0; |
2618 | } |
2619 | |
2620 | static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) |
2621 | { |
2622 | int err; |
2623 | |
2624 | rtnl_lock(); |
2625 | err = _virtnet_set_queues(vi, queue_pairs); |
2626 | rtnl_unlock(); |
2627 | return err; |
2628 | } |
2629 | |
2630 | static int virtnet_close(struct net_device *dev) |
2631 | { |
2632 | struct virtnet_info *vi = netdev_priv(dev); |
2633 | int i; |
2634 | |
2635 | /* Make sure NAPI doesn't schedule refill work */ |
2636 | disable_delayed_refill(vi); |
2637 | /* Make sure refill_work doesn't re-enable napi! */ |
2638 | cancel_delayed_work_sync(dwork: &vi->refill); |
2639 | |
2640 | for (i = 0; i < vi->max_queue_pairs; i++) |
2641 | virtnet_disable_queue_pair(vi, qp_index: i); |
2642 | |
2643 | return 0; |
2644 | } |
2645 | |
2646 | static void virtnet_set_rx_mode(struct net_device *dev) |
2647 | { |
2648 | struct virtnet_info *vi = netdev_priv(dev); |
2649 | struct scatterlist sg[2]; |
2650 | struct virtio_net_ctrl_mac *mac_data; |
2651 | struct netdev_hw_addr *ha; |
2652 | int uc_count; |
2653 | int mc_count; |
2654 | void *buf; |
2655 | int i; |
2656 | |
2657 | /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ |
2658 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_RX)) |
2659 | return; |
2660 | |
2661 | vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); |
2662 | vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); |
2663 | |
2664 | sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); |
2665 | |
2666 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
2667 | VIRTIO_NET_CTRL_RX_PROMISC, out: sg)) |
2668 | dev_warn(&dev->dev, "Failed to %sable promisc mode.\n" , |
2669 | vi->ctrl->promisc ? "en" : "dis" ); |
2670 | |
2671 | sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); |
2672 | |
2673 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, |
2674 | VIRTIO_NET_CTRL_RX_ALLMULTI, out: sg)) |
2675 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n" , |
2676 | vi->ctrl->allmulti ? "en" : "dis" ); |
2677 | |
2678 | uc_count = netdev_uc_count(dev); |
2679 | mc_count = netdev_mc_count(dev); |
2680 | /* MAC filter - use one buffer for both lists */ |
2681 | buf = kzalloc(size: ((uc_count + mc_count) * ETH_ALEN) + |
2682 | (2 * sizeof(mac_data->entries)), GFP_ATOMIC); |
2683 | mac_data = buf; |
2684 | if (!buf) |
2685 | return; |
2686 | |
2687 | sg_init_table(sg, 2); |
2688 | |
2689 | /* Store the unicast list and count in the front of the buffer */ |
2690 | mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: uc_count); |
2691 | i = 0; |
2692 | netdev_for_each_uc_addr(ha, dev) |
2693 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); |
2694 | |
2695 | sg_set_buf(sg: &sg[0], buf: mac_data, |
2696 | buflen: sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); |
2697 | |
2698 | /* multicast list and count fill the end */ |
2699 | mac_data = (void *)&mac_data->macs[uc_count][0]; |
2700 | |
2701 | mac_data->entries = cpu_to_virtio32(vdev: vi->vdev, val: mc_count); |
2702 | i = 0; |
2703 | netdev_for_each_mc_addr(ha, dev) |
2704 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); |
2705 | |
2706 | sg_set_buf(sg: &sg[1], buf: mac_data, |
2707 | buflen: sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); |
2708 | |
2709 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
2710 | VIRTIO_NET_CTRL_MAC_TABLE_SET, out: sg)) |
2711 | dev_warn(&dev->dev, "Failed to set MAC filter table.\n" ); |
2712 | |
2713 | kfree(objp: buf); |
2714 | } |
2715 | |
2716 | static int virtnet_vlan_rx_add_vid(struct net_device *dev, |
2717 | __be16 proto, u16 vid) |
2718 | { |
2719 | struct virtnet_info *vi = netdev_priv(dev); |
2720 | struct scatterlist sg; |
2721 | |
2722 | vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid); |
2723 | sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); |
2724 | |
2725 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
2726 | VIRTIO_NET_CTRL_VLAN_ADD, out: &sg)) |
2727 | dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n" , vid); |
2728 | return 0; |
2729 | } |
2730 | |
2731 | static int virtnet_vlan_rx_kill_vid(struct net_device *dev, |
2732 | __be16 proto, u16 vid) |
2733 | { |
2734 | struct virtnet_info *vi = netdev_priv(dev); |
2735 | struct scatterlist sg; |
2736 | |
2737 | vi->ctrl->vid = cpu_to_virtio16(vdev: vi->vdev, val: vid); |
2738 | sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); |
2739 | |
2740 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, |
2741 | VIRTIO_NET_CTRL_VLAN_DEL, out: &sg)) |
2742 | dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n" , vid); |
2743 | return 0; |
2744 | } |
2745 | |
2746 | static void virtnet_clean_affinity(struct virtnet_info *vi) |
2747 | { |
2748 | int i; |
2749 | |
2750 | if (vi->affinity_hint_set) { |
2751 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2752 | virtqueue_set_affinity(vq: vi->rq[i].vq, NULL); |
2753 | virtqueue_set_affinity(vq: vi->sq[i].vq, NULL); |
2754 | } |
2755 | |
2756 | vi->affinity_hint_set = false; |
2757 | } |
2758 | } |
2759 | |
2760 | static void virtnet_set_affinity(struct virtnet_info *vi) |
2761 | { |
2762 | cpumask_var_t mask; |
2763 | int stragglers; |
2764 | int group_size; |
2765 | int i, j, cpu; |
2766 | int num_cpu; |
2767 | int stride; |
2768 | |
2769 | if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) { |
2770 | virtnet_clean_affinity(vi); |
2771 | return; |
2772 | } |
2773 | |
2774 | num_cpu = num_online_cpus(); |
2775 | stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); |
2776 | stragglers = num_cpu >= vi->curr_queue_pairs ? |
2777 | num_cpu % vi->curr_queue_pairs : |
2778 | 0; |
2779 | cpu = cpumask_first(cpu_online_mask); |
2780 | |
2781 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
2782 | group_size = stride + (i < stragglers ? 1 : 0); |
2783 | |
2784 | for (j = 0; j < group_size; j++) { |
2785 | cpumask_set_cpu(cpu, dstp: mask); |
2786 | cpu = cpumask_next_wrap(n: cpu, cpu_online_mask, |
2787 | start: nr_cpu_ids, wrap: false); |
2788 | } |
2789 | virtqueue_set_affinity(vq: vi->rq[i].vq, cpu_mask: mask); |
2790 | virtqueue_set_affinity(vq: vi->sq[i].vq, cpu_mask: mask); |
2791 | __netif_set_xps_queue(dev: vi->dev, cpumask_bits(mask), index: i, type: XPS_CPUS); |
2792 | cpumask_clear(dstp: mask); |
2793 | } |
2794 | |
2795 | vi->affinity_hint_set = true; |
2796 | free_cpumask_var(mask); |
2797 | } |
2798 | |
2799 | static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) |
2800 | { |
2801 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2802 | node); |
2803 | virtnet_set_affinity(vi); |
2804 | return 0; |
2805 | } |
2806 | |
2807 | static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) |
2808 | { |
2809 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2810 | node_dead); |
2811 | virtnet_set_affinity(vi); |
2812 | return 0; |
2813 | } |
2814 | |
2815 | static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) |
2816 | { |
2817 | struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, |
2818 | node); |
2819 | |
2820 | virtnet_clean_affinity(vi); |
2821 | return 0; |
2822 | } |
2823 | |
2824 | static enum cpuhp_state virtionet_online; |
2825 | |
2826 | static int virtnet_cpu_notif_add(struct virtnet_info *vi) |
2827 | { |
2828 | int ret; |
2829 | |
2830 | ret = cpuhp_state_add_instance_nocalls(state: virtionet_online, node: &vi->node); |
2831 | if (ret) |
2832 | return ret; |
2833 | ret = cpuhp_state_add_instance_nocalls(state: CPUHP_VIRT_NET_DEAD, |
2834 | node: &vi->node_dead); |
2835 | if (!ret) |
2836 | return ret; |
2837 | cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node); |
2838 | return ret; |
2839 | } |
2840 | |
2841 | static void virtnet_cpu_notif_remove(struct virtnet_info *vi) |
2842 | { |
2843 | cpuhp_state_remove_instance_nocalls(state: virtionet_online, node: &vi->node); |
2844 | cpuhp_state_remove_instance_nocalls(state: CPUHP_VIRT_NET_DEAD, |
2845 | node: &vi->node_dead); |
2846 | } |
2847 | |
2848 | static void virtnet_get_ringparam(struct net_device *dev, |
2849 | struct ethtool_ringparam *ring, |
2850 | struct kernel_ethtool_ringparam *kernel_ring, |
2851 | struct netlink_ext_ack *extack) |
2852 | { |
2853 | struct virtnet_info *vi = netdev_priv(dev); |
2854 | |
2855 | ring->rx_max_pending = vi->rq[0].vq->num_max; |
2856 | ring->tx_max_pending = vi->sq[0].vq->num_max; |
2857 | ring->rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq); |
2858 | ring->tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq); |
2859 | } |
2860 | |
2861 | static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, |
2862 | u16 vqn, u32 max_usecs, u32 max_packets); |
2863 | |
2864 | static int virtnet_set_ringparam(struct net_device *dev, |
2865 | struct ethtool_ringparam *ring, |
2866 | struct kernel_ethtool_ringparam *kernel_ring, |
2867 | struct netlink_ext_ack *extack) |
2868 | { |
2869 | struct virtnet_info *vi = netdev_priv(dev); |
2870 | u32 rx_pending, tx_pending; |
2871 | struct receive_queue *rq; |
2872 | struct send_queue *sq; |
2873 | int i, err; |
2874 | |
2875 | if (ring->rx_mini_pending || ring->rx_jumbo_pending) |
2876 | return -EINVAL; |
2877 | |
2878 | rx_pending = virtqueue_get_vring_size(vq: vi->rq[0].vq); |
2879 | tx_pending = virtqueue_get_vring_size(vq: vi->sq[0].vq); |
2880 | |
2881 | if (ring->rx_pending == rx_pending && |
2882 | ring->tx_pending == tx_pending) |
2883 | return 0; |
2884 | |
2885 | if (ring->rx_pending > vi->rq[0].vq->num_max) |
2886 | return -EINVAL; |
2887 | |
2888 | if (ring->tx_pending > vi->sq[0].vq->num_max) |
2889 | return -EINVAL; |
2890 | |
2891 | for (i = 0; i < vi->max_queue_pairs; i++) { |
2892 | rq = vi->rq + i; |
2893 | sq = vi->sq + i; |
2894 | |
2895 | if (ring->tx_pending != tx_pending) { |
2896 | err = virtnet_tx_resize(vi, sq, ring_num: ring->tx_pending); |
2897 | if (err) |
2898 | return err; |
2899 | |
2900 | /* Upon disabling and re-enabling a transmit virtqueue, the device must |
2901 | * set the coalescing parameters of the virtqueue to those configured |
2902 | * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver |
2903 | * did not set any TX coalescing parameters, to 0. |
2904 | */ |
2905 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: i), |
2906 | max_usecs: vi->intr_coal_tx.max_usecs, |
2907 | max_packets: vi->intr_coal_tx.max_packets); |
2908 | if (err) |
2909 | return err; |
2910 | |
2911 | vi->sq[i].intr_coal.max_usecs = vi->intr_coal_tx.max_usecs; |
2912 | vi->sq[i].intr_coal.max_packets = vi->intr_coal_tx.max_packets; |
2913 | } |
2914 | |
2915 | if (ring->rx_pending != rx_pending) { |
2916 | err = virtnet_rx_resize(vi, rq, ring_num: ring->rx_pending); |
2917 | if (err) |
2918 | return err; |
2919 | |
2920 | /* The reason is same as the transmit virtqueue reset */ |
2921 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: i), |
2922 | max_usecs: vi->intr_coal_rx.max_usecs, |
2923 | max_packets: vi->intr_coal_rx.max_packets); |
2924 | if (err) |
2925 | return err; |
2926 | |
2927 | vi->rq[i].intr_coal.max_usecs = vi->intr_coal_rx.max_usecs; |
2928 | vi->rq[i].intr_coal.max_packets = vi->intr_coal_rx.max_packets; |
2929 | } |
2930 | } |
2931 | |
2932 | return 0; |
2933 | } |
2934 | |
2935 | static bool virtnet_commit_rss_command(struct virtnet_info *vi) |
2936 | { |
2937 | struct net_device *dev = vi->dev; |
2938 | struct scatterlist sgs[4]; |
2939 | unsigned int sg_buf_size; |
2940 | |
2941 | /* prepare sgs */ |
2942 | sg_init_table(sgs, 4); |
2943 | |
2944 | sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); |
2945 | sg_set_buf(sg: &sgs[0], buf: &vi->ctrl->rss, buflen: sg_buf_size); |
2946 | |
2947 | sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); |
2948 | sg_set_buf(sg: &sgs[1], buf: vi->ctrl->rss.indirection_table, buflen: sg_buf_size); |
2949 | |
2950 | sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) |
2951 | - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); |
2952 | sg_set_buf(sg: &sgs[2], buf: &vi->ctrl->rss.max_tx_vq, buflen: sg_buf_size); |
2953 | |
2954 | sg_buf_size = vi->rss_key_size; |
2955 | sg_set_buf(sg: &sgs[3], buf: vi->ctrl->rss.key, buflen: sg_buf_size); |
2956 | |
2957 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, |
2958 | cmd: vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG |
2959 | : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, out: sgs)) { |
2960 | dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n" ); |
2961 | return false; |
2962 | } |
2963 | return true; |
2964 | } |
2965 | |
2966 | static void (struct virtnet_info *vi) |
2967 | { |
2968 | u32 indir_val = 0; |
2969 | int i = 0; |
2970 | |
2971 | vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; |
2972 | vi->rss_hash_types_saved = vi->rss_hash_types_supported; |
2973 | vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size |
2974 | ? vi->rss_indir_table_size - 1 : 0; |
2975 | vi->ctrl->rss.unclassified_queue = 0; |
2976 | |
2977 | for (; i < vi->rss_indir_table_size; ++i) { |
2978 | indir_val = ethtool_rxfh_indir_default(index: i, n_rx_rings: vi->curr_queue_pairs); |
2979 | vi->ctrl->rss.indirection_table[i] = indir_val; |
2980 | } |
2981 | |
2982 | vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; |
2983 | vi->ctrl->rss.hash_key_length = vi->rss_key_size; |
2984 | |
2985 | netdev_rss_key_fill(buffer: vi->ctrl->rss.key, len: vi->rss_key_size); |
2986 | } |
2987 | |
2988 | static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) |
2989 | { |
2990 | info->data = 0; |
2991 | switch (info->flow_type) { |
2992 | case TCP_V4_FLOW: |
2993 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { |
2994 | info->data = RXH_IP_SRC | RXH_IP_DST | |
2995 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
2996 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { |
2997 | info->data = RXH_IP_SRC | RXH_IP_DST; |
2998 | } |
2999 | break; |
3000 | case TCP_V6_FLOW: |
3001 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { |
3002 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3003 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3004 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { |
3005 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3006 | } |
3007 | break; |
3008 | case UDP_V4_FLOW: |
3009 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { |
3010 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3011 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3012 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { |
3013 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3014 | } |
3015 | break; |
3016 | case UDP_V6_FLOW: |
3017 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { |
3018 | info->data = RXH_IP_SRC | RXH_IP_DST | |
3019 | RXH_L4_B_0_1 | RXH_L4_B_2_3; |
3020 | } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { |
3021 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3022 | } |
3023 | break; |
3024 | case IPV4_FLOW: |
3025 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) |
3026 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3027 | |
3028 | break; |
3029 | case IPV6_FLOW: |
3030 | if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) |
3031 | info->data = RXH_IP_SRC | RXH_IP_DST; |
3032 | |
3033 | break; |
3034 | default: |
3035 | info->data = 0; |
3036 | break; |
3037 | } |
3038 | } |
3039 | |
3040 | static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) |
3041 | { |
3042 | u32 new_hashtypes = vi->rss_hash_types_saved; |
3043 | bool is_disable = info->data & RXH_DISCARD; |
3044 | bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); |
3045 | |
3046 | /* supports only 'sd', 'sdfn' and 'r' */ |
3047 | if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) |
3048 | return false; |
3049 | |
3050 | switch (info->flow_type) { |
3051 | case TCP_V4_FLOW: |
3052 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); |
3053 | if (!is_disable) |
3054 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 |
3055 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); |
3056 | break; |
3057 | case UDP_V4_FLOW: |
3058 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); |
3059 | if (!is_disable) |
3060 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 |
3061 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); |
3062 | break; |
3063 | case IPV4_FLOW: |
3064 | new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; |
3065 | if (!is_disable) |
3066 | new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; |
3067 | break; |
3068 | case TCP_V6_FLOW: |
3069 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); |
3070 | if (!is_disable) |
3071 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 |
3072 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); |
3073 | break; |
3074 | case UDP_V6_FLOW: |
3075 | new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); |
3076 | if (!is_disable) |
3077 | new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 |
3078 | | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); |
3079 | break; |
3080 | case IPV6_FLOW: |
3081 | new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; |
3082 | if (!is_disable) |
3083 | new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; |
3084 | break; |
3085 | default: |
3086 | /* unsupported flow */ |
3087 | return false; |
3088 | } |
3089 | |
3090 | /* if unsupported hashtype was set */ |
3091 | if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) |
3092 | return false; |
3093 | |
3094 | if (new_hashtypes != vi->rss_hash_types_saved) { |
3095 | vi->rss_hash_types_saved = new_hashtypes; |
3096 | vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; |
3097 | if (vi->dev->features & NETIF_F_RXHASH) |
3098 | return virtnet_commit_rss_command(vi); |
3099 | } |
3100 | |
3101 | return true; |
3102 | } |
3103 | |
3104 | static void virtnet_get_drvinfo(struct net_device *dev, |
3105 | struct ethtool_drvinfo *info) |
3106 | { |
3107 | struct virtnet_info *vi = netdev_priv(dev); |
3108 | struct virtio_device *vdev = vi->vdev; |
3109 | |
3110 | strscpy(p: info->driver, KBUILD_MODNAME, size: sizeof(info->driver)); |
3111 | strscpy(p: info->version, VIRTNET_DRIVER_VERSION, size: sizeof(info->version)); |
3112 | strscpy(p: info->bus_info, q: virtio_bus_name(vdev), size: sizeof(info->bus_info)); |
3113 | |
3114 | } |
3115 | |
3116 | /* TODO: Eliminate OOO packets during switching */ |
3117 | static int virtnet_set_channels(struct net_device *dev, |
3118 | struct ethtool_channels *channels) |
3119 | { |
3120 | struct virtnet_info *vi = netdev_priv(dev); |
3121 | u16 queue_pairs = channels->combined_count; |
3122 | int err; |
3123 | |
3124 | /* We don't support separate rx/tx channels. |
3125 | * We don't allow setting 'other' channels. |
3126 | */ |
3127 | if (channels->rx_count || channels->tx_count || channels->other_count) |
3128 | return -EINVAL; |
3129 | |
3130 | if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) |
3131 | return -EINVAL; |
3132 | |
3133 | /* For now we don't support modifying channels while XDP is loaded |
3134 | * also when XDP is loaded all RX queues have XDP programs so we only |
3135 | * need to check a single RX queue. |
3136 | */ |
3137 | if (vi->rq[0].xdp_prog) |
3138 | return -EINVAL; |
3139 | |
3140 | cpus_read_lock(); |
3141 | err = _virtnet_set_queues(vi, queue_pairs); |
3142 | if (err) { |
3143 | cpus_read_unlock(); |
3144 | goto err; |
3145 | } |
3146 | virtnet_set_affinity(vi); |
3147 | cpus_read_unlock(); |
3148 | |
3149 | netif_set_real_num_tx_queues(dev, txq: queue_pairs); |
3150 | netif_set_real_num_rx_queues(dev, rxq: queue_pairs); |
3151 | err: |
3152 | return err; |
3153 | } |
3154 | |
3155 | static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) |
3156 | { |
3157 | struct virtnet_info *vi = netdev_priv(dev); |
3158 | unsigned int i, j; |
3159 | u8 *p = data; |
3160 | |
3161 | switch (stringset) { |
3162 | case ETH_SS_STATS: |
3163 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3164 | for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) |
3165 | ethtool_sprintf(data: &p, fmt: "rx_queue_%u_%s" , i, |
3166 | virtnet_rq_stats_desc[j].desc); |
3167 | } |
3168 | |
3169 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3170 | for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) |
3171 | ethtool_sprintf(data: &p, fmt: "tx_queue_%u_%s" , i, |
3172 | virtnet_sq_stats_desc[j].desc); |
3173 | } |
3174 | break; |
3175 | } |
3176 | } |
3177 | |
3178 | static int virtnet_get_sset_count(struct net_device *dev, int sset) |
3179 | { |
3180 | struct virtnet_info *vi = netdev_priv(dev); |
3181 | |
3182 | switch (sset) { |
3183 | case ETH_SS_STATS: |
3184 | return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + |
3185 | VIRTNET_SQ_STATS_LEN); |
3186 | default: |
3187 | return -EOPNOTSUPP; |
3188 | } |
3189 | } |
3190 | |
3191 | static void virtnet_get_ethtool_stats(struct net_device *dev, |
3192 | struct ethtool_stats *stats, u64 *data) |
3193 | { |
3194 | struct virtnet_info *vi = netdev_priv(dev); |
3195 | unsigned int idx = 0, start, i, j; |
3196 | const u8 *stats_base; |
3197 | const u64_stats_t *p; |
3198 | size_t offset; |
3199 | |
3200 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3201 | struct receive_queue *rq = &vi->rq[i]; |
3202 | |
3203 | stats_base = (const u8 *)&rq->stats; |
3204 | do { |
3205 | start = u64_stats_fetch_begin(syncp: &rq->stats.syncp); |
3206 | for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { |
3207 | offset = virtnet_rq_stats_desc[j].offset; |
3208 | p = (const u64_stats_t *)(stats_base + offset); |
3209 | data[idx + j] = u64_stats_read(p); |
3210 | } |
3211 | } while (u64_stats_fetch_retry(syncp: &rq->stats.syncp, start)); |
3212 | idx += VIRTNET_RQ_STATS_LEN; |
3213 | } |
3214 | |
3215 | for (i = 0; i < vi->curr_queue_pairs; i++) { |
3216 | struct send_queue *sq = &vi->sq[i]; |
3217 | |
3218 | stats_base = (const u8 *)&sq->stats; |
3219 | do { |
3220 | start = u64_stats_fetch_begin(syncp: &sq->stats.syncp); |
3221 | for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { |
3222 | offset = virtnet_sq_stats_desc[j].offset; |
3223 | p = (const u64_stats_t *)(stats_base + offset); |
3224 | data[idx + j] = u64_stats_read(p); |
3225 | } |
3226 | } while (u64_stats_fetch_retry(syncp: &sq->stats.syncp, start)); |
3227 | idx += VIRTNET_SQ_STATS_LEN; |
3228 | } |
3229 | } |
3230 | |
3231 | static void virtnet_get_channels(struct net_device *dev, |
3232 | struct ethtool_channels *channels) |
3233 | { |
3234 | struct virtnet_info *vi = netdev_priv(dev); |
3235 | |
3236 | channels->combined_count = vi->curr_queue_pairs; |
3237 | channels->max_combined = vi->max_queue_pairs; |
3238 | channels->max_other = 0; |
3239 | channels->rx_count = 0; |
3240 | channels->tx_count = 0; |
3241 | channels->other_count = 0; |
3242 | } |
3243 | |
3244 | static int virtnet_set_link_ksettings(struct net_device *dev, |
3245 | const struct ethtool_link_ksettings *cmd) |
3246 | { |
3247 | struct virtnet_info *vi = netdev_priv(dev); |
3248 | |
3249 | return ethtool_virtdev_set_link_ksettings(dev, cmd, |
3250 | dev_speed: &vi->speed, dev_duplex: &vi->duplex); |
3251 | } |
3252 | |
3253 | static int virtnet_get_link_ksettings(struct net_device *dev, |
3254 | struct ethtool_link_ksettings *cmd) |
3255 | { |
3256 | struct virtnet_info *vi = netdev_priv(dev); |
3257 | |
3258 | cmd->base.speed = vi->speed; |
3259 | cmd->base.duplex = vi->duplex; |
3260 | cmd->base.port = PORT_OTHER; |
3261 | |
3262 | return 0; |
3263 | } |
3264 | |
3265 | static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, |
3266 | struct ethtool_coalesce *ec) |
3267 | { |
3268 | struct scatterlist sgs_tx, sgs_rx; |
3269 | int i; |
3270 | |
3271 | vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); |
3272 | vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); |
3273 | sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx)); |
3274 | |
3275 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
3276 | VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, |
3277 | out: &sgs_tx)) |
3278 | return -EINVAL; |
3279 | |
3280 | /* Save parameters */ |
3281 | vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; |
3282 | vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; |
3283 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3284 | vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; |
3285 | vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; |
3286 | } |
3287 | |
3288 | vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); |
3289 | vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); |
3290 | sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx)); |
3291 | |
3292 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
3293 | VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, |
3294 | out: &sgs_rx)) |
3295 | return -EINVAL; |
3296 | |
3297 | /* Save parameters */ |
3298 | vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; |
3299 | vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; |
3300 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3301 | vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; |
3302 | vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; |
3303 | } |
3304 | |
3305 | return 0; |
3306 | } |
3307 | |
3308 | static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, |
3309 | u16 vqn, u32 max_usecs, u32 max_packets) |
3310 | { |
3311 | struct scatterlist sgs; |
3312 | |
3313 | vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn); |
3314 | vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs); |
3315 | vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets); |
3316 | sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq)); |
3317 | |
3318 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, |
3319 | VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, |
3320 | out: &sgs)) |
3321 | return -EINVAL; |
3322 | |
3323 | return 0; |
3324 | } |
3325 | |
3326 | static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, |
3327 | struct ethtool_coalesce *ec, |
3328 | u16 queue) |
3329 | { |
3330 | int err; |
3331 | |
3332 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: rxq2vq(rxq: queue), |
3333 | max_usecs: ec->rx_coalesce_usecs, |
3334 | max_packets: ec->rx_max_coalesced_frames); |
3335 | if (err) |
3336 | return err; |
3337 | |
3338 | vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs; |
3339 | vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames; |
3340 | |
3341 | err = virtnet_send_ctrl_coal_vq_cmd(vi, vqn: txq2vq(txq: queue), |
3342 | max_usecs: ec->tx_coalesce_usecs, |
3343 | max_packets: ec->tx_max_coalesced_frames); |
3344 | if (err) |
3345 | return err; |
3346 | |
3347 | vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs; |
3348 | vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames; |
3349 | |
3350 | return 0; |
3351 | } |
3352 | |
3353 | static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) |
3354 | { |
3355 | /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL |
3356 | * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. |
3357 | */ |
3358 | if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) |
3359 | return -EOPNOTSUPP; |
3360 | |
3361 | if (ec->tx_max_coalesced_frames > 1 || |
3362 | ec->rx_max_coalesced_frames != 1) |
3363 | return -EINVAL; |
3364 | |
3365 | return 0; |
3366 | } |
3367 | |
3368 | static int virtnet_should_update_vq_weight(int dev_flags, int weight, |
3369 | int vq_weight, bool *should_update) |
3370 | { |
3371 | if (weight ^ vq_weight) { |
3372 | if (dev_flags & IFF_UP) |
3373 | return -EBUSY; |
3374 | *should_update = true; |
3375 | } |
3376 | |
3377 | return 0; |
3378 | } |
3379 | |
3380 | static int virtnet_set_coalesce(struct net_device *dev, |
3381 | struct ethtool_coalesce *ec, |
3382 | struct kernel_ethtool_coalesce *kernel_coal, |
3383 | struct netlink_ext_ack *extack) |
3384 | { |
3385 | struct virtnet_info *vi = netdev_priv(dev); |
3386 | int ret, queue_number, napi_weight; |
3387 | bool update_napi = false; |
3388 | |
3389 | /* Can't change NAPI weight if the link is up */ |
3390 | napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; |
3391 | for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { |
3392 | ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight, |
3393 | vq_weight: vi->sq[queue_number].napi.weight, |
3394 | should_update: &update_napi); |
3395 | if (ret) |
3396 | return ret; |
3397 | |
3398 | if (update_napi) { |
3399 | /* All queues that belong to [queue_number, vi->max_queue_pairs] will be |
3400 | * updated for the sake of simplicity, which might not be necessary |
3401 | */ |
3402 | break; |
3403 | } |
3404 | } |
3405 | |
3406 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) |
3407 | ret = virtnet_send_notf_coal_cmds(vi, ec); |
3408 | else |
3409 | ret = virtnet_coal_params_supported(ec); |
3410 | |
3411 | if (ret) |
3412 | return ret; |
3413 | |
3414 | if (update_napi) { |
3415 | for (; queue_number < vi->max_queue_pairs; queue_number++) |
3416 | vi->sq[queue_number].napi.weight = napi_weight; |
3417 | } |
3418 | |
3419 | return ret; |
3420 | } |
3421 | |
3422 | static int virtnet_get_coalesce(struct net_device *dev, |
3423 | struct ethtool_coalesce *ec, |
3424 | struct kernel_ethtool_coalesce *kernel_coal, |
3425 | struct netlink_ext_ack *extack) |
3426 | { |
3427 | struct virtnet_info *vi = netdev_priv(dev); |
3428 | |
3429 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { |
3430 | ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; |
3431 | ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; |
3432 | ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; |
3433 | ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; |
3434 | } else { |
3435 | ec->rx_max_coalesced_frames = 1; |
3436 | |
3437 | if (vi->sq[0].napi.weight) |
3438 | ec->tx_max_coalesced_frames = 1; |
3439 | } |
3440 | |
3441 | return 0; |
3442 | } |
3443 | |
3444 | static int virtnet_set_per_queue_coalesce(struct net_device *dev, |
3445 | u32 queue, |
3446 | struct ethtool_coalesce *ec) |
3447 | { |
3448 | struct virtnet_info *vi = netdev_priv(dev); |
3449 | int ret, napi_weight; |
3450 | bool update_napi = false; |
3451 | |
3452 | if (queue >= vi->max_queue_pairs) |
3453 | return -EINVAL; |
3454 | |
3455 | /* Can't change NAPI weight if the link is up */ |
3456 | napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; |
3457 | ret = virtnet_should_update_vq_weight(dev_flags: dev->flags, weight: napi_weight, |
3458 | vq_weight: vi->sq[queue].napi.weight, |
3459 | should_update: &update_napi); |
3460 | if (ret) |
3461 | return ret; |
3462 | |
3463 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) |
3464 | ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); |
3465 | else |
3466 | ret = virtnet_coal_params_supported(ec); |
3467 | |
3468 | if (ret) |
3469 | return ret; |
3470 | |
3471 | if (update_napi) |
3472 | vi->sq[queue].napi.weight = napi_weight; |
3473 | |
3474 | return 0; |
3475 | } |
3476 | |
3477 | static int virtnet_get_per_queue_coalesce(struct net_device *dev, |
3478 | u32 queue, |
3479 | struct ethtool_coalesce *ec) |
3480 | { |
3481 | struct virtnet_info *vi = netdev_priv(dev); |
3482 | |
3483 | if (queue >= vi->max_queue_pairs) |
3484 | return -EINVAL; |
3485 | |
3486 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { |
3487 | ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; |
3488 | ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; |
3489 | ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; |
3490 | ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; |
3491 | } else { |
3492 | ec->rx_max_coalesced_frames = 1; |
3493 | |
3494 | if (vi->sq[queue].napi.weight) |
3495 | ec->tx_max_coalesced_frames = 1; |
3496 | } |
3497 | |
3498 | return 0; |
3499 | } |
3500 | |
3501 | static void virtnet_init_settings(struct net_device *dev) |
3502 | { |
3503 | struct virtnet_info *vi = netdev_priv(dev); |
3504 | |
3505 | vi->speed = SPEED_UNKNOWN; |
3506 | vi->duplex = DUPLEX_UNKNOWN; |
3507 | } |
3508 | |
3509 | static void virtnet_update_settings(struct virtnet_info *vi) |
3510 | { |
3511 | u32 speed; |
3512 | u8 duplex; |
3513 | |
3514 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) |
3515 | return; |
3516 | |
3517 | virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); |
3518 | |
3519 | if (ethtool_validate_speed(speed)) |
3520 | vi->speed = speed; |
3521 | |
3522 | virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); |
3523 | |
3524 | if (ethtool_validate_duplex(duplex)) |
3525 | vi->duplex = duplex; |
3526 | } |
3527 | |
3528 | static u32 virtnet_get_rxfh_key_size(struct net_device *dev) |
3529 | { |
3530 | return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; |
3531 | } |
3532 | |
3533 | static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) |
3534 | { |
3535 | return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; |
3536 | } |
3537 | |
3538 | static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) |
3539 | { |
3540 | struct virtnet_info *vi = netdev_priv(dev); |
3541 | int i; |
3542 | |
3543 | if (indir) { |
3544 | for (i = 0; i < vi->rss_indir_table_size; ++i) |
3545 | indir[i] = vi->ctrl->rss.indirection_table[i]; |
3546 | } |
3547 | |
3548 | if (key) |
3549 | memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); |
3550 | |
3551 | if (hfunc) |
3552 | *hfunc = ETH_RSS_HASH_TOP; |
3553 | |
3554 | return 0; |
3555 | } |
3556 | |
3557 | static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) |
3558 | { |
3559 | struct virtnet_info *vi = netdev_priv(dev); |
3560 | int i; |
3561 | |
3562 | if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) |
3563 | return -EOPNOTSUPP; |
3564 | |
3565 | if (indir) { |
3566 | for (i = 0; i < vi->rss_indir_table_size; ++i) |
3567 | vi->ctrl->rss.indirection_table[i] = indir[i]; |
3568 | } |
3569 | if (key) |
3570 | memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); |
3571 | |
3572 | virtnet_commit_rss_command(vi); |
3573 | |
3574 | return 0; |
3575 | } |
3576 | |
3577 | static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) |
3578 | { |
3579 | struct virtnet_info *vi = netdev_priv(dev); |
3580 | int rc = 0; |
3581 | |
3582 | switch (info->cmd) { |
3583 | case ETHTOOL_GRXRINGS: |
3584 | info->data = vi->curr_queue_pairs; |
3585 | break; |
3586 | case ETHTOOL_GRXFH: |
3587 | virtnet_get_hashflow(vi, info); |
3588 | break; |
3589 | default: |
3590 | rc = -EOPNOTSUPP; |
3591 | } |
3592 | |
3593 | return rc; |
3594 | } |
3595 | |
3596 | static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) |
3597 | { |
3598 | struct virtnet_info *vi = netdev_priv(dev); |
3599 | int rc = 0; |
3600 | |
3601 | switch (info->cmd) { |
3602 | case ETHTOOL_SRXFH: |
3603 | if (!virtnet_set_hashflow(vi, info)) |
3604 | rc = -EINVAL; |
3605 | |
3606 | break; |
3607 | default: |
3608 | rc = -EOPNOTSUPP; |
3609 | } |
3610 | |
3611 | return rc; |
3612 | } |
3613 | |
3614 | static const struct ethtool_ops virtnet_ethtool_ops = { |
3615 | .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | |
3616 | ETHTOOL_COALESCE_USECS, |
3617 | .get_drvinfo = virtnet_get_drvinfo, |
3618 | .get_link = ethtool_op_get_link, |
3619 | .get_ringparam = virtnet_get_ringparam, |
3620 | .set_ringparam = virtnet_set_ringparam, |
3621 | .get_strings = virtnet_get_strings, |
3622 | .get_sset_count = virtnet_get_sset_count, |
3623 | .get_ethtool_stats = virtnet_get_ethtool_stats, |
3624 | .set_channels = virtnet_set_channels, |
3625 | .get_channels = virtnet_get_channels, |
3626 | .get_ts_info = ethtool_op_get_ts_info, |
3627 | .get_link_ksettings = virtnet_get_link_ksettings, |
3628 | .set_link_ksettings = virtnet_set_link_ksettings, |
3629 | .set_coalesce = virtnet_set_coalesce, |
3630 | .get_coalesce = virtnet_get_coalesce, |
3631 | .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, |
3632 | .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, |
3633 | .get_rxfh_key_size = virtnet_get_rxfh_key_size, |
3634 | .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, |
3635 | .get_rxfh = virtnet_get_rxfh, |
3636 | .set_rxfh = virtnet_set_rxfh, |
3637 | .get_rxnfc = virtnet_get_rxnfc, |
3638 | .set_rxnfc = virtnet_set_rxnfc, |
3639 | }; |
3640 | |
3641 | static void virtnet_freeze_down(struct virtio_device *vdev) |
3642 | { |
3643 | struct virtnet_info *vi = vdev->priv; |
3644 | |
3645 | /* Make sure no work handler is accessing the device */ |
3646 | flush_work(work: &vi->config_work); |
3647 | |
3648 | netif_tx_lock_bh(dev: vi->dev); |
3649 | netif_device_detach(dev: vi->dev); |
3650 | netif_tx_unlock_bh(dev: vi->dev); |
3651 | if (netif_running(dev: vi->dev)) |
3652 | virtnet_close(dev: vi->dev); |
3653 | } |
3654 | |
3655 | static int init_vqs(struct virtnet_info *vi); |
3656 | |
3657 | static int virtnet_restore_up(struct virtio_device *vdev) |
3658 | { |
3659 | struct virtnet_info *vi = vdev->priv; |
3660 | int err; |
3661 | |
3662 | err = init_vqs(vi); |
3663 | if (err) |
3664 | return err; |
3665 | |
3666 | virtio_device_ready(dev: vdev); |
3667 | |
3668 | enable_delayed_refill(vi); |
3669 | |
3670 | if (netif_running(dev: vi->dev)) { |
3671 | err = virtnet_open(dev: vi->dev); |
3672 | if (err) |
3673 | return err; |
3674 | } |
3675 | |
3676 | netif_tx_lock_bh(dev: vi->dev); |
3677 | netif_device_attach(dev: vi->dev); |
3678 | netif_tx_unlock_bh(dev: vi->dev); |
3679 | return err; |
3680 | } |
3681 | |
3682 | static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) |
3683 | { |
3684 | struct scatterlist sg; |
3685 | vi->ctrl->offloads = cpu_to_virtio64(vdev: vi->vdev, val: offloads); |
3686 | |
3687 | sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); |
3688 | |
3689 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, |
3690 | VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, out: &sg)) { |
3691 | dev_warn(&vi->dev->dev, "Fail to set guest offload.\n" ); |
3692 | return -EINVAL; |
3693 | } |
3694 | |
3695 | return 0; |
3696 | } |
3697 | |
3698 | static int virtnet_clear_guest_offloads(struct virtnet_info *vi) |
3699 | { |
3700 | u64 offloads = 0; |
3701 | |
3702 | if (!vi->guest_offloads) |
3703 | return 0; |
3704 | |
3705 | return virtnet_set_guest_offloads(vi, offloads); |
3706 | } |
3707 | |
3708 | static int virtnet_restore_guest_offloads(struct virtnet_info *vi) |
3709 | { |
3710 | u64 offloads = vi->guest_offloads; |
3711 | |
3712 | if (!vi->guest_offloads) |
3713 | return 0; |
3714 | |
3715 | return virtnet_set_guest_offloads(vi, offloads); |
3716 | } |
3717 | |
3718 | static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, |
3719 | struct netlink_ext_ack *extack) |
3720 | { |
3721 | unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + |
3722 | sizeof(struct skb_shared_info)); |
3723 | unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; |
3724 | struct virtnet_info *vi = netdev_priv(dev); |
3725 | struct bpf_prog *old_prog; |
3726 | u16 xdp_qp = 0, curr_qp; |
3727 | int i, err; |
3728 | |
3729 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
3730 | && (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || |
3731 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || |
3732 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) || |
3733 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) || |
3734 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || |
3735 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) || |
3736 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { |
3737 | NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first" ); |
3738 | return -EOPNOTSUPP; |
3739 | } |
3740 | |
3741 | if (vi->mergeable_rx_bufs && !vi->any_header_sg) { |
3742 | NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required" ); |
3743 | return -EINVAL; |
3744 | } |
3745 | |
3746 | if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { |
3747 | NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags" ); |
3748 | netdev_warn(dev, format: "single-buffer XDP requires MTU less than %u\n" , max_sz); |
3749 | return -EINVAL; |
3750 | } |
3751 | |
3752 | curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; |
3753 | if (prog) |
3754 | xdp_qp = nr_cpu_ids; |
3755 | |
3756 | /* XDP requires extra queues for XDP_TX */ |
3757 | if (curr_qp + xdp_qp > vi->max_queue_pairs) { |
3758 | netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n" , |
3759 | curr_qp + xdp_qp, vi->max_queue_pairs); |
3760 | xdp_qp = 0; |
3761 | } |
3762 | |
3763 | old_prog = rtnl_dereference(vi->rq[0].xdp_prog); |
3764 | if (!prog && !old_prog) |
3765 | return 0; |
3766 | |
3767 | if (prog) |
3768 | bpf_prog_add(prog, i: vi->max_queue_pairs - 1); |
3769 | |
3770 | /* Make sure NAPI is not using any XDP TX queues for RX. */ |
3771 | if (netif_running(dev)) { |
3772 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3773 | napi_disable(n: &vi->rq[i].napi); |
3774 | virtnet_napi_tx_disable(napi: &vi->sq[i].napi); |
3775 | } |
3776 | } |
3777 | |
3778 | if (!prog) { |
3779 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3780 | rcu_assign_pointer(vi->rq[i].xdp_prog, prog); |
3781 | if (i == 0) |
3782 | virtnet_restore_guest_offloads(vi); |
3783 | } |
3784 | synchronize_net(); |
3785 | } |
3786 | |
3787 | err = _virtnet_set_queues(vi, queue_pairs: curr_qp + xdp_qp); |
3788 | if (err) |
3789 | goto err; |
3790 | netif_set_real_num_rx_queues(dev, rxq: curr_qp + xdp_qp); |
3791 | vi->xdp_queue_pairs = xdp_qp; |
3792 | |
3793 | if (prog) { |
3794 | vi->xdp_enabled = true; |
3795 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3796 | rcu_assign_pointer(vi->rq[i].xdp_prog, prog); |
3797 | if (i == 0 && !old_prog) |
3798 | virtnet_clear_guest_offloads(vi); |
3799 | } |
3800 | if (!old_prog) |
3801 | xdp_features_set_redirect_target(dev, support_sg: true); |
3802 | } else { |
3803 | xdp_features_clear_redirect_target(dev); |
3804 | vi->xdp_enabled = false; |
3805 | } |
3806 | |
3807 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3808 | if (old_prog) |
3809 | bpf_prog_put(prog: old_prog); |
3810 | if (netif_running(dev)) { |
3811 | virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi); |
3812 | virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq, |
3813 | napi: &vi->sq[i].napi); |
3814 | } |
3815 | } |
3816 | |
3817 | return 0; |
3818 | |
3819 | err: |
3820 | if (!prog) { |
3821 | virtnet_clear_guest_offloads(vi); |
3822 | for (i = 0; i < vi->max_queue_pairs; i++) |
3823 | rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); |
3824 | } |
3825 | |
3826 | if (netif_running(dev)) { |
3827 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3828 | virtnet_napi_enable(vq: vi->rq[i].vq, napi: &vi->rq[i].napi); |
3829 | virtnet_napi_tx_enable(vi, vq: vi->sq[i].vq, |
3830 | napi: &vi->sq[i].napi); |
3831 | } |
3832 | } |
3833 | if (prog) |
3834 | bpf_prog_sub(prog, i: vi->max_queue_pairs - 1); |
3835 | return err; |
3836 | } |
3837 | |
3838 | static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) |
3839 | { |
3840 | switch (xdp->command) { |
3841 | case XDP_SETUP_PROG: |
3842 | return virtnet_xdp_set(dev, prog: xdp->prog, extack: xdp->extack); |
3843 | default: |
3844 | return -EINVAL; |
3845 | } |
3846 | } |
3847 | |
3848 | static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, |
3849 | size_t len) |
3850 | { |
3851 | struct virtnet_info *vi = netdev_priv(dev); |
3852 | int ret; |
3853 | |
3854 | if (!virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STANDBY)) |
3855 | return -EOPNOTSUPP; |
3856 | |
3857 | ret = snprintf(buf, size: len, fmt: "sby" ); |
3858 | if (ret >= len) |
3859 | return -EOPNOTSUPP; |
3860 | |
3861 | return 0; |
3862 | } |
3863 | |
3864 | static int virtnet_set_features(struct net_device *dev, |
3865 | netdev_features_t features) |
3866 | { |
3867 | struct virtnet_info *vi = netdev_priv(dev); |
3868 | u64 offloads; |
3869 | int err; |
3870 | |
3871 | if ((dev->features ^ features) & NETIF_F_GRO_HW) { |
3872 | if (vi->xdp_enabled) |
3873 | return -EBUSY; |
3874 | |
3875 | if (features & NETIF_F_GRO_HW) |
3876 | offloads = vi->guest_offloads_capable; |
3877 | else |
3878 | offloads = vi->guest_offloads_capable & |
3879 | ~GUEST_OFFLOAD_GRO_HW_MASK; |
3880 | |
3881 | err = virtnet_set_guest_offloads(vi, offloads); |
3882 | if (err) |
3883 | return err; |
3884 | vi->guest_offloads = offloads; |
3885 | } |
3886 | |
3887 | if ((dev->features ^ features) & NETIF_F_RXHASH) { |
3888 | if (features & NETIF_F_RXHASH) |
3889 | vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; |
3890 | else |
3891 | vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; |
3892 | |
3893 | if (!virtnet_commit_rss_command(vi)) |
3894 | return -EINVAL; |
3895 | } |
3896 | |
3897 | return 0; |
3898 | } |
3899 | |
3900 | static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) |
3901 | { |
3902 | struct virtnet_info *priv = netdev_priv(dev); |
3903 | struct send_queue *sq = &priv->sq[txqueue]; |
3904 | struct netdev_queue *txq = netdev_get_tx_queue(dev, index: txqueue); |
3905 | |
3906 | u64_stats_update_begin(syncp: &sq->stats.syncp); |
3907 | u64_stats_inc(p: &sq->stats.tx_timeouts); |
3908 | u64_stats_update_end(syncp: &sq->stats.syncp); |
3909 | |
3910 | netdev_err(dev, format: "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n" , |
3911 | txqueue, sq->name, sq->vq->index, sq->vq->name, |
3912 | jiffies_to_usecs(j: jiffies - READ_ONCE(txq->trans_start))); |
3913 | } |
3914 | |
3915 | static const struct net_device_ops virtnet_netdev = { |
3916 | .ndo_open = virtnet_open, |
3917 | .ndo_stop = virtnet_close, |
3918 | .ndo_start_xmit = start_xmit, |
3919 | .ndo_validate_addr = eth_validate_addr, |
3920 | .ndo_set_mac_address = virtnet_set_mac_address, |
3921 | .ndo_set_rx_mode = virtnet_set_rx_mode, |
3922 | .ndo_get_stats64 = virtnet_stats, |
3923 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, |
3924 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, |
3925 | .ndo_bpf = virtnet_xdp, |
3926 | .ndo_xdp_xmit = virtnet_xdp_xmit, |
3927 | .ndo_features_check = passthru_features_check, |
3928 | .ndo_get_phys_port_name = virtnet_get_phys_port_name, |
3929 | .ndo_set_features = virtnet_set_features, |
3930 | .ndo_tx_timeout = virtnet_tx_timeout, |
3931 | }; |
3932 | |
3933 | static void virtnet_config_changed_work(struct work_struct *work) |
3934 | { |
3935 | struct virtnet_info *vi = |
3936 | container_of(work, struct virtnet_info, config_work); |
3937 | u16 v; |
3938 | |
3939 | if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, |
3940 | struct virtio_net_config, status, &v) < 0) |
3941 | return; |
3942 | |
3943 | if (v & VIRTIO_NET_S_ANNOUNCE) { |
3944 | netdev_notify_peers(dev: vi->dev); |
3945 | virtnet_ack_link_announce(vi); |
3946 | } |
3947 | |
3948 | /* Ignore unknown (future) status bits */ |
3949 | v &= VIRTIO_NET_S_LINK_UP; |
3950 | |
3951 | if (vi->status == v) |
3952 | return; |
3953 | |
3954 | vi->status = v; |
3955 | |
3956 | if (vi->status & VIRTIO_NET_S_LINK_UP) { |
3957 | virtnet_update_settings(vi); |
3958 | netif_carrier_on(dev: vi->dev); |
3959 | netif_tx_wake_all_queues(dev: vi->dev); |
3960 | } else { |
3961 | netif_carrier_off(dev: vi->dev); |
3962 | netif_tx_stop_all_queues(dev: vi->dev); |
3963 | } |
3964 | } |
3965 | |
3966 | static void virtnet_config_changed(struct virtio_device *vdev) |
3967 | { |
3968 | struct virtnet_info *vi = vdev->priv; |
3969 | |
3970 | schedule_work(work: &vi->config_work); |
3971 | } |
3972 | |
3973 | static void virtnet_free_queues(struct virtnet_info *vi) |
3974 | { |
3975 | int i; |
3976 | |
3977 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3978 | __netif_napi_del(napi: &vi->rq[i].napi); |
3979 | __netif_napi_del(napi: &vi->sq[i].napi); |
3980 | } |
3981 | |
3982 | /* We called __netif_napi_del(), |
3983 | * we need to respect an RCU grace period before freeing vi->rq |
3984 | */ |
3985 | synchronize_net(); |
3986 | |
3987 | kfree(objp: vi->rq); |
3988 | kfree(objp: vi->sq); |
3989 | kfree(objp: vi->ctrl); |
3990 | } |
3991 | |
3992 | static void _free_receive_bufs(struct virtnet_info *vi) |
3993 | { |
3994 | struct bpf_prog *old_prog; |
3995 | int i; |
3996 | |
3997 | for (i = 0; i < vi->max_queue_pairs; i++) { |
3998 | while (vi->rq[i].pages) |
3999 | __free_pages(page: get_a_page(rq: &vi->rq[i], GFP_KERNEL), order: 0); |
4000 | |
4001 | old_prog = rtnl_dereference(vi->rq[i].xdp_prog); |
4002 | RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); |
4003 | if (old_prog) |
4004 | bpf_prog_put(prog: old_prog); |
4005 | } |
4006 | } |
4007 | |
4008 | static void free_receive_bufs(struct virtnet_info *vi) |
4009 | { |
4010 | rtnl_lock(); |
4011 | _free_receive_bufs(vi); |
4012 | rtnl_unlock(); |
4013 | } |
4014 | |
4015 | static void free_receive_page_frags(struct virtnet_info *vi) |
4016 | { |
4017 | int i; |
4018 | for (i = 0; i < vi->max_queue_pairs; i++) |
4019 | if (vi->rq[i].alloc_frag.page) { |
4020 | if (vi->rq[i].do_dma && vi->rq[i].last_dma) |
4021 | virtnet_rq_unmap(rq: &vi->rq[i], buf: vi->rq[i].last_dma, len: 0); |
4022 | put_page(page: vi->rq[i].alloc_frag.page); |
4023 | } |
4024 | } |
4025 | |
4026 | static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) |
4027 | { |
4028 | if (!is_xdp_frame(ptr: buf)) |
4029 | dev_kfree_skb(buf); |
4030 | else |
4031 | xdp_return_frame(xdpf: ptr_to_xdp(ptr: buf)); |
4032 | } |
4033 | |
4034 | static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) |
4035 | { |
4036 | struct virtnet_info *vi = vq->vdev->priv; |
4037 | int i = vq2rxq(vq); |
4038 | |
4039 | if (vi->mergeable_rx_bufs) |
4040 | put_page(page: virt_to_head_page(x: buf)); |
4041 | else if (vi->big_packets) |
4042 | give_pages(rq: &vi->rq[i], page: buf); |
4043 | else |
4044 | put_page(page: virt_to_head_page(x: buf)); |
4045 | } |
4046 | |
4047 | static void free_unused_bufs(struct virtnet_info *vi) |
4048 | { |
4049 | void *buf; |
4050 | int i; |
4051 | |
4052 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4053 | struct virtqueue *vq = vi->sq[i].vq; |
4054 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) |
4055 | virtnet_sq_free_unused_buf(vq, buf); |
4056 | cond_resched(); |
4057 | } |
4058 | |
4059 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4060 | struct receive_queue *rq = &vi->rq[i]; |
4061 | |
4062 | while ((buf = virtnet_rq_detach_unused_buf(rq)) != NULL) |
4063 | virtnet_rq_free_unused_buf(vq: rq->vq, buf); |
4064 | cond_resched(); |
4065 | } |
4066 | } |
4067 | |
4068 | static void virtnet_del_vqs(struct virtnet_info *vi) |
4069 | { |
4070 | struct virtio_device *vdev = vi->vdev; |
4071 | |
4072 | virtnet_clean_affinity(vi); |
4073 | |
4074 | vdev->config->del_vqs(vdev); |
4075 | |
4076 | virtnet_free_queues(vi); |
4077 | } |
4078 | |
4079 | /* How large should a single buffer be so a queue full of these can fit at |
4080 | * least one full packet? |
4081 | * Logic below assumes the mergeable buffer header is used. |
4082 | */ |
4083 | static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) |
4084 | { |
4085 | const unsigned int hdr_len = vi->hdr_len; |
4086 | unsigned int rq_size = virtqueue_get_vring_size(vq); |
4087 | unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; |
4088 | unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; |
4089 | unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); |
4090 | |
4091 | return max(max(min_buf_len, hdr_len) - hdr_len, |
4092 | (unsigned int)GOOD_PACKET_LEN); |
4093 | } |
4094 | |
4095 | static int virtnet_find_vqs(struct virtnet_info *vi) |
4096 | { |
4097 | vq_callback_t **callbacks; |
4098 | struct virtqueue **vqs; |
4099 | int ret = -ENOMEM; |
4100 | int i, total_vqs; |
4101 | const char **names; |
4102 | bool *ctx; |
4103 | |
4104 | /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by |
4105 | * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by |
4106 | * possible control vq. |
4107 | */ |
4108 | total_vqs = vi->max_queue_pairs * 2 + |
4109 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VQ); |
4110 | |
4111 | /* Allocate space for find_vqs parameters */ |
4112 | vqs = kcalloc(n: total_vqs, size: sizeof(*vqs), GFP_KERNEL); |
4113 | if (!vqs) |
4114 | goto err_vq; |
4115 | callbacks = kmalloc_array(n: total_vqs, size: sizeof(*callbacks), GFP_KERNEL); |
4116 | if (!callbacks) |
4117 | goto err_callback; |
4118 | names = kmalloc_array(n: total_vqs, size: sizeof(*names), GFP_KERNEL); |
4119 | if (!names) |
4120 | goto err_names; |
4121 | if (!vi->big_packets || vi->mergeable_rx_bufs) { |
4122 | ctx = kcalloc(n: total_vqs, size: sizeof(*ctx), GFP_KERNEL); |
4123 | if (!ctx) |
4124 | goto err_ctx; |
4125 | } else { |
4126 | ctx = NULL; |
4127 | } |
4128 | |
4129 | /* Parameters for control virtqueue, if any */ |
4130 | if (vi->has_cvq) { |
4131 | callbacks[total_vqs - 1] = NULL; |
4132 | names[total_vqs - 1] = "control" ; |
4133 | } |
4134 | |
4135 | /* Allocate/initialize parameters for send/receive virtqueues */ |
4136 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4137 | callbacks[rxq2vq(rxq: i)] = skb_recv_done; |
4138 | callbacks[txq2vq(txq: i)] = skb_xmit_done; |
4139 | sprintf(buf: vi->rq[i].name, fmt: "input.%d" , i); |
4140 | sprintf(buf: vi->sq[i].name, fmt: "output.%d" , i); |
4141 | names[rxq2vq(rxq: i)] = vi->rq[i].name; |
4142 | names[txq2vq(txq: i)] = vi->sq[i].name; |
4143 | if (ctx) |
4144 | ctx[rxq2vq(rxq: i)] = true; |
4145 | } |
4146 | |
4147 | ret = virtio_find_vqs_ctx(vdev: vi->vdev, nvqs: total_vqs, vqs, callbacks, |
4148 | names, ctx, NULL); |
4149 | if (ret) |
4150 | goto err_find; |
4151 | |
4152 | if (vi->has_cvq) { |
4153 | vi->cvq = vqs[total_vqs - 1]; |
4154 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) |
4155 | vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; |
4156 | } |
4157 | |
4158 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4159 | vi->rq[i].vq = vqs[rxq2vq(rxq: i)]; |
4160 | vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vq: vi->rq[i].vq); |
4161 | vi->sq[i].vq = vqs[txq2vq(txq: i)]; |
4162 | } |
4163 | |
4164 | /* run here: ret == 0. */ |
4165 | |
4166 | |
4167 | err_find: |
4168 | kfree(objp: ctx); |
4169 | err_ctx: |
4170 | kfree(objp: names); |
4171 | err_names: |
4172 | kfree(objp: callbacks); |
4173 | err_callback: |
4174 | kfree(objp: vqs); |
4175 | err_vq: |
4176 | return ret; |
4177 | } |
4178 | |
4179 | static int virtnet_alloc_queues(struct virtnet_info *vi) |
4180 | { |
4181 | int i; |
4182 | |
4183 | if (vi->has_cvq) { |
4184 | vi->ctrl = kzalloc(size: sizeof(*vi->ctrl), GFP_KERNEL); |
4185 | if (!vi->ctrl) |
4186 | goto err_ctrl; |
4187 | } else { |
4188 | vi->ctrl = NULL; |
4189 | } |
4190 | vi->sq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->sq), GFP_KERNEL); |
4191 | if (!vi->sq) |
4192 | goto err_sq; |
4193 | vi->rq = kcalloc(n: vi->max_queue_pairs, size: sizeof(*vi->rq), GFP_KERNEL); |
4194 | if (!vi->rq) |
4195 | goto err_rq; |
4196 | |
4197 | INIT_DELAYED_WORK(&vi->refill, refill_work); |
4198 | for (i = 0; i < vi->max_queue_pairs; i++) { |
4199 | vi->rq[i].pages = NULL; |
4200 | netif_napi_add_weight(dev: vi->dev, napi: &vi->rq[i].napi, poll: virtnet_poll, |
4201 | weight: napi_weight); |
4202 | netif_napi_add_tx_weight(dev: vi->dev, napi: &vi->sq[i].napi, |
4203 | poll: virtnet_poll_tx, |
4204 | weight: napi_tx ? napi_weight : 0); |
4205 | |
4206 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); |
4207 | ewma_pkt_len_init(e: &vi->rq[i].mrg_avg_pkt_len); |
4208 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); |
4209 | |
4210 | u64_stats_init(syncp: &vi->rq[i].stats.syncp); |
4211 | u64_stats_init(syncp: &vi->sq[i].stats.syncp); |
4212 | } |
4213 | |
4214 | return 0; |
4215 | |
4216 | err_rq: |
4217 | kfree(objp: vi->sq); |
4218 | err_sq: |
4219 | kfree(objp: vi->ctrl); |
4220 | err_ctrl: |
4221 | return -ENOMEM; |
4222 | } |
4223 | |
4224 | static int init_vqs(struct virtnet_info *vi) |
4225 | { |
4226 | int ret; |
4227 | |
4228 | /* Allocate send & receive queues */ |
4229 | ret = virtnet_alloc_queues(vi); |
4230 | if (ret) |
4231 | goto err; |
4232 | |
4233 | ret = virtnet_find_vqs(vi); |
4234 | if (ret) |
4235 | goto err_free; |
4236 | |
4237 | virtnet_rq_set_premapped(vi); |
4238 | |
4239 | cpus_read_lock(); |
4240 | virtnet_set_affinity(vi); |
4241 | cpus_read_unlock(); |
4242 | |
4243 | return 0; |
4244 | |
4245 | err_free: |
4246 | virtnet_free_queues(vi); |
4247 | err: |
4248 | return ret; |
4249 | } |
4250 | |
4251 | #ifdef CONFIG_SYSFS |
4252 | static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, |
4253 | char *buf) |
4254 | { |
4255 | struct virtnet_info *vi = netdev_priv(dev: queue->dev); |
4256 | unsigned int queue_index = get_netdev_rx_queue_index(queue); |
4257 | unsigned int headroom = virtnet_get_headroom(vi); |
4258 | unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; |
4259 | struct ewma_pkt_len *avg; |
4260 | |
4261 | BUG_ON(queue_index >= vi->max_queue_pairs); |
4262 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; |
4263 | return sprintf(buf, fmt: "%u\n" , |
4264 | get_mergeable_buf_len(rq: &vi->rq[queue_index], avg_pkt_len: avg, |
4265 | SKB_DATA_ALIGN(headroom + tailroom))); |
4266 | } |
4267 | |
4268 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = |
4269 | __ATTR_RO(mergeable_rx_buffer_size); |
4270 | |
4271 | static struct attribute *virtio_net_mrg_rx_attrs[] = { |
4272 | &mergeable_rx_buffer_size_attribute.attr, |
4273 | NULL |
4274 | }; |
4275 | |
4276 | static const struct attribute_group virtio_net_mrg_rx_group = { |
4277 | .name = "virtio_net" , |
4278 | .attrs = virtio_net_mrg_rx_attrs |
4279 | }; |
4280 | #endif |
4281 | |
4282 | static bool virtnet_fail_on_feature(struct virtio_device *vdev, |
4283 | unsigned int fbit, |
4284 | const char *fname, const char *dname) |
4285 | { |
4286 | if (!virtio_has_feature(vdev, fbit)) |
4287 | return false; |
4288 | |
4289 | dev_err(&vdev->dev, "device advertises feature %s but not %s" , |
4290 | fname, dname); |
4291 | |
4292 | return true; |
4293 | } |
4294 | |
4295 | #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ |
4296 | virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) |
4297 | |
4298 | static bool virtnet_validate_features(struct virtio_device *vdev) |
4299 | { |
4300 | if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && |
4301 | (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, |
4302 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4303 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, |
4304 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4305 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, |
4306 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4307 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ" ) || |
4308 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, |
4309 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4310 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, |
4311 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4312 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, |
4313 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4314 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, |
4315 | "VIRTIO_NET_F_CTRL_VQ" ) || |
4316 | VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, |
4317 | "VIRTIO_NET_F_CTRL_VQ" ))) { |
4318 | return false; |
4319 | } |
4320 | |
4321 | return true; |
4322 | } |
4323 | |
4324 | #define MIN_MTU ETH_MIN_MTU |
4325 | #define MAX_MTU ETH_MAX_MTU |
4326 | |
4327 | static int virtnet_validate(struct virtio_device *vdev) |
4328 | { |
4329 | if (!vdev->config->get) { |
4330 | dev_err(&vdev->dev, "%s failure: config access disabled\n" , |
4331 | __func__); |
4332 | return -EINVAL; |
4333 | } |
4334 | |
4335 | if (!virtnet_validate_features(vdev)) |
4336 | return -EINVAL; |
4337 | |
4338 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { |
4339 | int mtu = virtio_cread16(vdev, |
4340 | offsetof(struct virtio_net_config, |
4341 | mtu)); |
4342 | if (mtu < MIN_MTU) |
4343 | __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); |
4344 | } |
4345 | |
4346 | if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && |
4347 | !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { |
4348 | dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby" ); |
4349 | __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); |
4350 | } |
4351 | |
4352 | return 0; |
4353 | } |
4354 | |
4355 | static bool virtnet_check_guest_gso(const struct virtnet_info *vi) |
4356 | { |
4357 | return virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || |
4358 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || |
4359 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_ECN) || |
4360 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_UFO) || |
4361 | (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO4) && |
4362 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_GUEST_USO6)); |
4363 | } |
4364 | |
4365 | static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) |
4366 | { |
4367 | bool guest_gso = virtnet_check_guest_gso(vi); |
4368 | |
4369 | /* If device can receive ANY guest GSO packets, regardless of mtu, |
4370 | * allocate packets of maximum size, otherwise limit it to only |
4371 | * mtu size worth only. |
4372 | */ |
4373 | if (mtu > ETH_DATA_LEN || guest_gso) { |
4374 | vi->big_packets = true; |
4375 | vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); |
4376 | } |
4377 | } |
4378 | |
4379 | static int virtnet_probe(struct virtio_device *vdev) |
4380 | { |
4381 | int i, err = -ENOMEM; |
4382 | struct net_device *dev; |
4383 | struct virtnet_info *vi; |
4384 | u16 max_queue_pairs; |
4385 | int mtu = 0; |
4386 | |
4387 | /* Find if host supports multiqueue/rss virtio_net device */ |
4388 | max_queue_pairs = 1; |
4389 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) |
4390 | max_queue_pairs = |
4391 | virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); |
4392 | |
4393 | /* We need at least 2 queue's */ |
4394 | if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || |
4395 | max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || |
4396 | !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) |
4397 | max_queue_pairs = 1; |
4398 | |
4399 | /* Allocate ourselves a network device with room for our info */ |
4400 | dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); |
4401 | if (!dev) |
4402 | return -ENOMEM; |
4403 | |
4404 | /* Set up network device as normal. */ |
4405 | dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | |
4406 | IFF_TX_SKB_NO_LINEAR; |
4407 | dev->netdev_ops = &virtnet_netdev; |
4408 | dev->features = NETIF_F_HIGHDMA; |
4409 | |
4410 | dev->ethtool_ops = &virtnet_ethtool_ops; |
4411 | SET_NETDEV_DEV(dev, &vdev->dev); |
4412 | |
4413 | /* Do we support "hardware" checksums? */ |
4414 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { |
4415 | /* This opens up the world of extra features. */ |
4416 | dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; |
4417 | if (csum) |
4418 | dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; |
4419 | |
4420 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { |
4421 | dev->hw_features |= NETIF_F_TSO |
4422 | | NETIF_F_TSO_ECN | NETIF_F_TSO6; |
4423 | } |
4424 | /* Individual feature bits: what can host handle? */ |
4425 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) |
4426 | dev->hw_features |= NETIF_F_TSO; |
4427 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) |
4428 | dev->hw_features |= NETIF_F_TSO6; |
4429 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) |
4430 | dev->hw_features |= NETIF_F_TSO_ECN; |
4431 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) |
4432 | dev->hw_features |= NETIF_F_GSO_UDP_L4; |
4433 | |
4434 | dev->features |= NETIF_F_GSO_ROBUST; |
4435 | |
4436 | if (gso) |
4437 | dev->features |= dev->hw_features & NETIF_F_ALL_TSO; |
4438 | /* (!csum && gso) case will be fixed by register_netdev() */ |
4439 | } |
4440 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) |
4441 | dev->features |= NETIF_F_RXCSUM; |
4442 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
4443 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) |
4444 | dev->features |= NETIF_F_GRO_HW; |
4445 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) |
4446 | dev->hw_features |= NETIF_F_GRO_HW; |
4447 | |
4448 | dev->vlan_features = dev->features; |
4449 | dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; |
4450 | |
4451 | /* MTU range: 68 - 65535 */ |
4452 | dev->min_mtu = MIN_MTU; |
4453 | dev->max_mtu = MAX_MTU; |
4454 | |
4455 | /* Configuration may specify what MAC to use. Otherwise random. */ |
4456 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { |
4457 | u8 addr[ETH_ALEN]; |
4458 | |
4459 | virtio_cread_bytes(vdev, |
4460 | offsetof(struct virtio_net_config, mac), |
4461 | buf: addr, ETH_ALEN); |
4462 | eth_hw_addr_set(dev, addr); |
4463 | } else { |
4464 | eth_hw_addr_random(dev); |
4465 | dev_info(&vdev->dev, "Assigned random MAC address %pM\n" , |
4466 | dev->dev_addr); |
4467 | } |
4468 | |
4469 | /* Set up our device-specific information */ |
4470 | vi = netdev_priv(dev); |
4471 | vi->dev = dev; |
4472 | vi->vdev = vdev; |
4473 | vdev->priv = vi; |
4474 | |
4475 | INIT_WORK(&vi->config_work, virtnet_config_changed_work); |
4476 | spin_lock_init(&vi->refill_lock); |
4477 | |
4478 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { |
4479 | vi->mergeable_rx_bufs = true; |
4480 | dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; |
4481 | } |
4482 | |
4483 | if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) |
4484 | vi->has_rss_hash_report = true; |
4485 | |
4486 | if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) |
4487 | vi->has_rss = true; |
4488 | |
4489 | if (vi->has_rss || vi->has_rss_hash_report) { |
4490 | vi->rss_indir_table_size = |
4491 | virtio_cread16(vdev, offsetof(struct virtio_net_config, |
4492 | rss_max_indirection_table_length)); |
4493 | vi->rss_key_size = |
4494 | virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); |
4495 | |
4496 | vi->rss_hash_types_supported = |
4497 | virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); |
4498 | vi->rss_hash_types_supported &= |
4499 | ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | |
4500 | VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | |
4501 | VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); |
4502 | |
4503 | dev->hw_features |= NETIF_F_RXHASH; |
4504 | } |
4505 | |
4506 | if (vi->has_rss_hash_report) |
4507 | vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); |
4508 | else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || |
4509 | virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) |
4510 | vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); |
4511 | else |
4512 | vi->hdr_len = sizeof(struct virtio_net_hdr); |
4513 | |
4514 | if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || |
4515 | virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) |
4516 | vi->any_header_sg = true; |
4517 | |
4518 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) |
4519 | vi->has_cvq = true; |
4520 | |
4521 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { |
4522 | mtu = virtio_cread16(vdev, |
4523 | offsetof(struct virtio_net_config, |
4524 | mtu)); |
4525 | if (mtu < dev->min_mtu) { |
4526 | /* Should never trigger: MTU was previously validated |
4527 | * in virtnet_validate. |
4528 | */ |
4529 | dev_err(&vdev->dev, |
4530 | "device MTU appears to have changed it is now %d < %d" , |
4531 | mtu, dev->min_mtu); |
4532 | err = -EINVAL; |
4533 | goto free; |
4534 | } |
4535 | |
4536 | dev->mtu = mtu; |
4537 | dev->max_mtu = mtu; |
4538 | } |
4539 | |
4540 | virtnet_set_big_packets(vi, mtu); |
4541 | |
4542 | if (vi->any_header_sg) |
4543 | dev->needed_headroom = vi->hdr_len; |
4544 | |
4545 | /* Enable multiqueue by default */ |
4546 | if (num_online_cpus() >= max_queue_pairs) |
4547 | vi->curr_queue_pairs = max_queue_pairs; |
4548 | else |
4549 | vi->curr_queue_pairs = num_online_cpus(); |
4550 | vi->max_queue_pairs = max_queue_pairs; |
4551 | |
4552 | /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ |
4553 | err = init_vqs(vi); |
4554 | if (err) |
4555 | goto free; |
4556 | |
4557 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { |
4558 | vi->intr_coal_rx.max_usecs = 0; |
4559 | vi->intr_coal_tx.max_usecs = 0; |
4560 | vi->intr_coal_rx.max_packets = 0; |
4561 | |
4562 | /* Keep the default values of the coalescing parameters |
4563 | * aligned with the default napi_tx state. |
4564 | */ |
4565 | if (vi->sq[0].napi.weight) |
4566 | vi->intr_coal_tx.max_packets = 1; |
4567 | else |
4568 | vi->intr_coal_tx.max_packets = 0; |
4569 | } |
4570 | |
4571 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { |
4572 | /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ |
4573 | for (i = 0; i < vi->max_queue_pairs; i++) |
4574 | if (vi->sq[i].napi.weight) |
4575 | vi->sq[i].intr_coal.max_packets = 1; |
4576 | } |
4577 | |
4578 | #ifdef CONFIG_SYSFS |
4579 | if (vi->mergeable_rx_bufs) |
4580 | dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; |
4581 | #endif |
4582 | netif_set_real_num_tx_queues(dev, txq: vi->curr_queue_pairs); |
4583 | netif_set_real_num_rx_queues(dev, rxq: vi->curr_queue_pairs); |
4584 | |
4585 | virtnet_init_settings(dev); |
4586 | |
4587 | if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { |
4588 | vi->failover = net_failover_create(standby_dev: vi->dev); |
4589 | if (IS_ERR(ptr: vi->failover)) { |
4590 | err = PTR_ERR(ptr: vi->failover); |
4591 | goto free_vqs; |
4592 | } |
4593 | } |
4594 | |
4595 | if (vi->has_rss || vi->has_rss_hash_report) |
4596 | virtnet_init_default_rss(vi); |
4597 | |
4598 | /* serialize netdev register + virtio_device_ready() with ndo_open() */ |
4599 | rtnl_lock(); |
4600 | |
4601 | err = register_netdevice(dev); |
4602 | if (err) { |
4603 | pr_debug("virtio_net: registering device failed\n" ); |
4604 | rtnl_unlock(); |
4605 | goto free_failover; |
4606 | } |
4607 | |
4608 | virtio_device_ready(dev: vdev); |
4609 | |
4610 | _virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs); |
4611 | |
4612 | /* a random MAC address has been assigned, notify the device. |
4613 | * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there |
4614 | * because many devices work fine without getting MAC explicitly |
4615 | */ |
4616 | if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && |
4617 | virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { |
4618 | struct scatterlist sg; |
4619 | |
4620 | sg_init_one(&sg, dev->dev_addr, dev->addr_len); |
4621 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
4622 | VIRTIO_NET_CTRL_MAC_ADDR_SET, out: &sg)) { |
4623 | pr_debug("virtio_net: setting MAC address failed\n" ); |
4624 | rtnl_unlock(); |
4625 | err = -EINVAL; |
4626 | goto free_unregister_netdev; |
4627 | } |
4628 | } |
4629 | |
4630 | rtnl_unlock(); |
4631 | |
4632 | err = virtnet_cpu_notif_add(vi); |
4633 | if (err) { |
4634 | pr_debug("virtio_net: registering cpu notifier failed\n" ); |
4635 | goto free_unregister_netdev; |
4636 | } |
4637 | |
4638 | /* Assume link up if device can't report link status, |
4639 | otherwise get link status from config. */ |
4640 | netif_carrier_off(dev); |
4641 | if (virtio_has_feature(vdev: vi->vdev, VIRTIO_NET_F_STATUS)) { |
4642 | schedule_work(work: &vi->config_work); |
4643 | } else { |
4644 | vi->status = VIRTIO_NET_S_LINK_UP; |
4645 | virtnet_update_settings(vi); |
4646 | netif_carrier_on(dev); |
4647 | } |
4648 | |
4649 | for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) |
4650 | if (virtio_has_feature(vdev: vi->vdev, fbit: guest_offloads[i])) |
4651 | set_bit(nr: guest_offloads[i], addr: &vi->guest_offloads); |
4652 | vi->guest_offloads_capable = vi->guest_offloads; |
4653 | |
4654 | pr_debug("virtnet: registered device %s with %d RX and TX vq's\n" , |
4655 | dev->name, max_queue_pairs); |
4656 | |
4657 | return 0; |
4658 | |
4659 | free_unregister_netdev: |
4660 | unregister_netdev(dev); |
4661 | free_failover: |
4662 | net_failover_destroy(failover: vi->failover); |
4663 | free_vqs: |
4664 | virtio_reset_device(dev: vdev); |
4665 | cancel_delayed_work_sync(dwork: &vi->refill); |
4666 | free_receive_page_frags(vi); |
4667 | virtnet_del_vqs(vi); |
4668 | free: |
4669 | free_netdev(dev); |
4670 | return err; |
4671 | } |
4672 | |
4673 | static void remove_vq_common(struct virtnet_info *vi) |
4674 | { |
4675 | virtio_reset_device(dev: vi->vdev); |
4676 | |
4677 | /* Free unused buffers in both send and recv, if any. */ |
4678 | free_unused_bufs(vi); |
4679 | |
4680 | free_receive_bufs(vi); |
4681 | |
4682 | free_receive_page_frags(vi); |
4683 | |
4684 | virtnet_del_vqs(vi); |
4685 | } |
4686 | |
4687 | static void virtnet_remove(struct virtio_device *vdev) |
4688 | { |
4689 | struct virtnet_info *vi = vdev->priv; |
4690 | |
4691 | virtnet_cpu_notif_remove(vi); |
4692 | |
4693 | /* Make sure no work handler is accessing the device. */ |
4694 | flush_work(work: &vi->config_work); |
4695 | |
4696 | unregister_netdev(dev: vi->dev); |
4697 | |
4698 | net_failover_destroy(failover: vi->failover); |
4699 | |
4700 | remove_vq_common(vi); |
4701 | |
4702 | free_netdev(dev: vi->dev); |
4703 | } |
4704 | |
4705 | static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) |
4706 | { |
4707 | struct virtnet_info *vi = vdev->priv; |
4708 | |
4709 | virtnet_cpu_notif_remove(vi); |
4710 | virtnet_freeze_down(vdev); |
4711 | remove_vq_common(vi); |
4712 | |
4713 | return 0; |
4714 | } |
4715 | |
4716 | static __maybe_unused int virtnet_restore(struct virtio_device *vdev) |
4717 | { |
4718 | struct virtnet_info *vi = vdev->priv; |
4719 | int err; |
4720 | |
4721 | err = virtnet_restore_up(vdev); |
4722 | if (err) |
4723 | return err; |
4724 | virtnet_set_queues(vi, queue_pairs: vi->curr_queue_pairs); |
4725 | |
4726 | err = virtnet_cpu_notif_add(vi); |
4727 | if (err) { |
4728 | virtnet_freeze_down(vdev); |
4729 | remove_vq_common(vi); |
4730 | return err; |
4731 | } |
4732 | |
4733 | return 0; |
4734 | } |
4735 | |
4736 | static struct virtio_device_id id_table[] = { |
4737 | { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, |
4738 | { 0 }, |
4739 | }; |
4740 | |
4741 | #define VIRTNET_FEATURES \ |
4742 | VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ |
4743 | VIRTIO_NET_F_MAC, \ |
4744 | VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ |
4745 | VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ |
4746 | VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ |
4747 | VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ |
4748 | VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ |
4749 | VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ |
4750 | VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ |
4751 | VIRTIO_NET_F_CTRL_MAC_ADDR, \ |
4752 | VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ |
4753 | VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ |
4754 | VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ |
4755 | VIRTIO_NET_F_VQ_NOTF_COAL, \ |
4756 | VIRTIO_NET_F_GUEST_HDRLEN |
4757 | |
4758 | static unsigned int features[] = { |
4759 | VIRTNET_FEATURES, |
4760 | }; |
4761 | |
4762 | static unsigned int features_legacy[] = { |
4763 | VIRTNET_FEATURES, |
4764 | VIRTIO_NET_F_GSO, |
4765 | VIRTIO_F_ANY_LAYOUT, |
4766 | }; |
4767 | |
4768 | static struct virtio_driver virtio_net_driver = { |
4769 | .feature_table = features, |
4770 | .feature_table_size = ARRAY_SIZE(features), |
4771 | .feature_table_legacy = features_legacy, |
4772 | .feature_table_size_legacy = ARRAY_SIZE(features_legacy), |
4773 | .driver.name = KBUILD_MODNAME, |
4774 | .driver.owner = THIS_MODULE, |
4775 | .id_table = id_table, |
4776 | .validate = virtnet_validate, |
4777 | .probe = virtnet_probe, |
4778 | .remove = virtnet_remove, |
4779 | .config_changed = virtnet_config_changed, |
4780 | #ifdef CONFIG_PM_SLEEP |
4781 | .freeze = virtnet_freeze, |
4782 | .restore = virtnet_restore, |
4783 | #endif |
4784 | }; |
4785 | |
4786 | static __init int virtio_net_driver_init(void) |
4787 | { |
4788 | int ret; |
4789 | |
4790 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "virtio/net:online" , |
4791 | startup: virtnet_cpu_online, |
4792 | teardown: virtnet_cpu_down_prep); |
4793 | if (ret < 0) |
4794 | goto out; |
4795 | virtionet_online = ret; |
4796 | ret = cpuhp_setup_state_multi(state: CPUHP_VIRT_NET_DEAD, name: "virtio/net:dead" , |
4797 | NULL, teardown: virtnet_cpu_dead); |
4798 | if (ret) |
4799 | goto err_dead; |
4800 | ret = register_virtio_driver(drv: &virtio_net_driver); |
4801 | if (ret) |
4802 | goto err_virtio; |
4803 | return 0; |
4804 | err_virtio: |
4805 | cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD); |
4806 | err_dead: |
4807 | cpuhp_remove_multi_state(state: virtionet_online); |
4808 | out: |
4809 | return ret; |
4810 | } |
4811 | module_init(virtio_net_driver_init); |
4812 | |
4813 | static __exit void virtio_net_driver_exit(void) |
4814 | { |
4815 | unregister_virtio_driver(drv: &virtio_net_driver); |
4816 | cpuhp_remove_multi_state(state: CPUHP_VIRT_NET_DEAD); |
4817 | cpuhp_remove_multi_state(state: virtionet_online); |
4818 | } |
4819 | module_exit(virtio_net_driver_exit); |
4820 | |
4821 | MODULE_DEVICE_TABLE(virtio, id_table); |
4822 | MODULE_DESCRIPTION("Virtio network driver" ); |
4823 | MODULE_LICENSE("GPL" ); |
4824 | |