1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2018 Intel Corporation. */ |
3 | |
4 | #include <linux/bpf_trace.h> |
5 | #include <net/xdp_sock_drv.h> |
6 | #include "i40e_txrx_common.h" |
7 | #include "i40e_xsk.h" |
8 | |
9 | void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) |
10 | { |
11 | memset(rx_ring->rx_bi_zc, 0, |
12 | sizeof(*rx_ring->rx_bi_zc) * rx_ring->count); |
13 | } |
14 | |
15 | static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) |
16 | { |
17 | return &rx_ring->rx_bi_zc[idx]; |
18 | } |
19 | |
20 | /** |
21 | * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer |
22 | * @rx_ring: Current rx ring |
23 | * @pool_present: is pool for XSK present |
24 | * |
25 | * Try allocating memory and return ENOMEM, if failed to allocate. |
26 | * If allocation was successful, substitute buffer with allocated one. |
27 | * Returns 0 on success, negative on failure |
28 | */ |
29 | static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present) |
30 | { |
31 | size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) : |
32 | sizeof(*rx_ring->rx_bi); |
33 | void *sw_ring = kcalloc(n: rx_ring->count, size: elem_size, GFP_KERNEL); |
34 | |
35 | if (!sw_ring) |
36 | return -ENOMEM; |
37 | |
38 | if (pool_present) { |
39 | kfree(objp: rx_ring->rx_bi); |
40 | rx_ring->rx_bi = NULL; |
41 | rx_ring->rx_bi_zc = sw_ring; |
42 | } else { |
43 | kfree(objp: rx_ring->rx_bi_zc); |
44 | rx_ring->rx_bi_zc = NULL; |
45 | rx_ring->rx_bi = sw_ring; |
46 | } |
47 | return 0; |
48 | } |
49 | |
50 | /** |
51 | * i40e_realloc_rx_bi_zc - reallocate rx SW rings |
52 | * @vsi: Current VSI |
53 | * @zc: is zero copy set |
54 | * |
55 | * Reallocate buffer for rx_rings that might be used by XSK. |
56 | * XDP requires more memory, than rx_buf provides. |
57 | * Returns 0 on success, negative on failure |
58 | */ |
59 | int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) |
60 | { |
61 | struct i40e_ring *rx_ring; |
62 | unsigned long q; |
63 | |
64 | for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) { |
65 | rx_ring = vsi->rx_rings[q]; |
66 | if (i40e_realloc_rx_xdp_bi(rx_ring, pool_present: zc)) |
67 | return -ENOMEM; |
68 | } |
69 | return 0; |
70 | } |
71 | |
72 | /** |
73 | * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a |
74 | * certain ring/qid |
75 | * @vsi: Current VSI |
76 | * @pool: buffer pool |
77 | * @qid: Rx ring to associate buffer pool with |
78 | * |
79 | * Returns 0 on success, <0 on failure |
80 | **/ |
81 | static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, |
82 | struct xsk_buff_pool *pool, |
83 | u16 qid) |
84 | { |
85 | struct net_device *netdev = vsi->netdev; |
86 | bool if_running; |
87 | int err; |
88 | |
89 | if (vsi->type != I40E_VSI_MAIN) |
90 | return -EINVAL; |
91 | |
92 | if (qid >= vsi->num_queue_pairs) |
93 | return -EINVAL; |
94 | |
95 | if (qid >= netdev->real_num_rx_queues || |
96 | qid >= netdev->real_num_tx_queues) |
97 | return -EINVAL; |
98 | |
99 | err = xsk_pool_dma_map(pool, dev: &vsi->back->pdev->dev, I40E_RX_DMA_ATTR); |
100 | if (err) |
101 | return err; |
102 | |
103 | set_bit(nr: qid, addr: vsi->af_xdp_zc_qps); |
104 | |
105 | if_running = netif_running(dev: vsi->netdev) && i40e_enabled_xdp_vsi(vsi); |
106 | |
107 | if (if_running) { |
108 | err = i40e_queue_pair_disable(vsi, queue_pair: qid); |
109 | if (err) |
110 | return err; |
111 | |
112 | err = i40e_realloc_rx_xdp_bi(rx_ring: vsi->rx_rings[qid], pool_present: true); |
113 | if (err) |
114 | return err; |
115 | |
116 | err = i40e_queue_pair_enable(vsi, queue_pair: qid); |
117 | if (err) |
118 | return err; |
119 | |
120 | /* Kick start the NAPI context so that receiving will start */ |
121 | err = i40e_xsk_wakeup(dev: vsi->netdev, queue_id: qid, XDP_WAKEUP_RX); |
122 | if (err) |
123 | return err; |
124 | } |
125 | |
126 | return 0; |
127 | } |
128 | |
129 | /** |
130 | * i40e_xsk_pool_disable - Disassociate an AF_XDP buffer pool from a |
131 | * certain ring/qid |
132 | * @vsi: Current VSI |
133 | * @qid: Rx ring to associate buffer pool with |
134 | * |
135 | * Returns 0 on success, <0 on failure |
136 | **/ |
137 | static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) |
138 | { |
139 | struct net_device *netdev = vsi->netdev; |
140 | struct xsk_buff_pool *pool; |
141 | bool if_running; |
142 | int err; |
143 | |
144 | pool = xsk_get_pool_from_qid(dev: netdev, queue_id: qid); |
145 | if (!pool) |
146 | return -EINVAL; |
147 | |
148 | if_running = netif_running(dev: vsi->netdev) && i40e_enabled_xdp_vsi(vsi); |
149 | |
150 | if (if_running) { |
151 | err = i40e_queue_pair_disable(vsi, queue_pair: qid); |
152 | if (err) |
153 | return err; |
154 | } |
155 | |
156 | clear_bit(nr: qid, addr: vsi->af_xdp_zc_qps); |
157 | xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); |
158 | |
159 | if (if_running) { |
160 | err = i40e_realloc_rx_xdp_bi(rx_ring: vsi->rx_rings[qid], pool_present: false); |
161 | if (err) |
162 | return err; |
163 | err = i40e_queue_pair_enable(vsi, queue_pair: qid); |
164 | if (err) |
165 | return err; |
166 | } |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | /** |
172 | * i40e_xsk_pool_setup - Enable/disassociate an AF_XDP buffer pool to/from |
173 | * a ring/qid |
174 | * @vsi: Current VSI |
175 | * @pool: Buffer pool to enable/associate to a ring, or NULL to disable |
176 | * @qid: Rx ring to (dis)associate buffer pool (from)to |
177 | * |
178 | * This function enables or disables a buffer pool to a certain ring. |
179 | * |
180 | * Returns 0 on success, <0 on failure |
181 | **/ |
182 | int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, |
183 | u16 qid) |
184 | { |
185 | return pool ? i40e_xsk_pool_enable(vsi, pool, qid) : |
186 | i40e_xsk_pool_disable(vsi, qid); |
187 | } |
188 | |
189 | /** |
190 | * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff |
191 | * @rx_ring: Rx ring |
192 | * @xdp: xdp_buff used as input to the XDP program |
193 | * @xdp_prog: XDP program to run |
194 | * |
195 | * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR} |
196 | **/ |
197 | static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp, |
198 | struct bpf_prog *xdp_prog) |
199 | { |
200 | int err, result = I40E_XDP_PASS; |
201 | struct i40e_ring *xdp_ring; |
202 | u32 act; |
203 | |
204 | act = bpf_prog_run_xdp(prog: xdp_prog, xdp); |
205 | |
206 | if (likely(act == XDP_REDIRECT)) { |
207 | err = xdp_do_redirect(dev: rx_ring->netdev, xdp, prog: xdp_prog); |
208 | if (!err) |
209 | return I40E_XDP_REDIR; |
210 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool) && err == -ENOBUFS) |
211 | result = I40E_XDP_EXIT; |
212 | else |
213 | result = I40E_XDP_CONSUMED; |
214 | goto out_failure; |
215 | } |
216 | |
217 | switch (act) { |
218 | case XDP_PASS: |
219 | break; |
220 | case XDP_TX: |
221 | xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; |
222 | result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); |
223 | if (result == I40E_XDP_CONSUMED) |
224 | goto out_failure; |
225 | break; |
226 | case XDP_DROP: |
227 | result = I40E_XDP_CONSUMED; |
228 | break; |
229 | default: |
230 | bpf_warn_invalid_xdp_action(dev: rx_ring->netdev, prog: xdp_prog, act); |
231 | fallthrough; |
232 | case XDP_ABORTED: |
233 | result = I40E_XDP_CONSUMED; |
234 | out_failure: |
235 | trace_xdp_exception(dev: rx_ring->netdev, xdp: xdp_prog, act); |
236 | } |
237 | return result; |
238 | } |
239 | |
240 | bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) |
241 | { |
242 | u16 ntu = rx_ring->next_to_use; |
243 | union i40e_rx_desc *rx_desc; |
244 | struct xdp_buff **xdp; |
245 | u32 nb_buffs, i; |
246 | dma_addr_t dma; |
247 | |
248 | rx_desc = I40E_RX_DESC(rx_ring, ntu); |
249 | xdp = i40e_rx_bi(rx_ring, idx: ntu); |
250 | |
251 | nb_buffs = min_t(u16, count, rx_ring->count - ntu); |
252 | nb_buffs = xsk_buff_alloc_batch(pool: rx_ring->xsk_pool, xdp, max: nb_buffs); |
253 | if (!nb_buffs) |
254 | return false; |
255 | |
256 | i = nb_buffs; |
257 | while (i--) { |
258 | dma = xsk_buff_xdp_get_dma(xdp: *xdp); |
259 | rx_desc->read.pkt_addr = cpu_to_le64(dma); |
260 | rx_desc->read.hdr_addr = 0; |
261 | |
262 | rx_desc++; |
263 | xdp++; |
264 | } |
265 | |
266 | ntu += nb_buffs; |
267 | if (ntu == rx_ring->count) { |
268 | rx_desc = I40E_RX_DESC(rx_ring, 0); |
269 | ntu = 0; |
270 | } |
271 | |
272 | /* clear the status bits for the next_to_use descriptor */ |
273 | rx_desc->wb.qword1.status_error_len = 0; |
274 | i40e_release_rx_desc(rx_ring, val: ntu); |
275 | |
276 | return count == nb_buffs; |
277 | } |
278 | |
279 | /** |
280 | * i40e_construct_skb_zc - Create skbuff from zero-copy Rx buffer |
281 | * @rx_ring: Rx ring |
282 | * @xdp: xdp_buff |
283 | * |
284 | * This functions allocates a new skb from a zero-copy Rx buffer. |
285 | * |
286 | * Returns the skb, or NULL on failure. |
287 | **/ |
288 | static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, |
289 | struct xdp_buff *xdp) |
290 | { |
291 | unsigned int totalsize = xdp->data_end - xdp->data_meta; |
292 | unsigned int metasize = xdp->data - xdp->data_meta; |
293 | struct skb_shared_info *sinfo = NULL; |
294 | struct sk_buff *skb; |
295 | u32 nr_frags = 0; |
296 | |
297 | if (unlikely(xdp_buff_has_frags(xdp))) { |
298 | sinfo = xdp_get_shared_info_from_buff(xdp); |
299 | nr_frags = sinfo->nr_frags; |
300 | } |
301 | net_prefetch(p: xdp->data_meta); |
302 | |
303 | /* allocate a skb to store the frags */ |
304 | skb = __napi_alloc_skb(napi: &rx_ring->q_vector->napi, length: totalsize, |
305 | GFP_ATOMIC | __GFP_NOWARN); |
306 | if (unlikely(!skb)) |
307 | goto out; |
308 | |
309 | memcpy(__skb_put(skb, totalsize), xdp->data_meta, |
310 | ALIGN(totalsize, sizeof(long))); |
311 | |
312 | if (metasize) { |
313 | skb_metadata_set(skb, meta_len: metasize); |
314 | __skb_pull(skb, len: metasize); |
315 | } |
316 | |
317 | if (likely(!xdp_buff_has_frags(xdp))) |
318 | goto out; |
319 | |
320 | for (int i = 0; i < nr_frags; i++) { |
321 | struct skb_shared_info *skinfo = skb_shinfo(skb); |
322 | skb_frag_t *frag = &sinfo->frags[i]; |
323 | struct page *page; |
324 | void *addr; |
325 | |
326 | page = dev_alloc_page(); |
327 | if (!page) { |
328 | dev_kfree_skb(skb); |
329 | return NULL; |
330 | } |
331 | addr = page_to_virt(page); |
332 | |
333 | memcpy(addr, skb_frag_page(frag), skb_frag_size(frag)); |
334 | |
335 | __skb_fill_page_desc_noacc(shinfo: skinfo, i: skinfo->nr_frags++, |
336 | page: addr, off: 0, size: skb_frag_size(frag)); |
337 | } |
338 | |
339 | out: |
340 | xsk_buff_free(xdp); |
341 | return skb; |
342 | } |
343 | |
344 | static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, |
345 | struct xdp_buff *xdp_buff, |
346 | union i40e_rx_desc *rx_desc, |
347 | unsigned int *rx_packets, |
348 | unsigned int *rx_bytes, |
349 | unsigned int xdp_res, |
350 | bool *failure) |
351 | { |
352 | struct sk_buff *skb; |
353 | |
354 | *rx_packets = 1; |
355 | *rx_bytes = xdp_get_buff_len(xdp: xdp_buff); |
356 | |
357 | if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) |
358 | return; |
359 | |
360 | if (xdp_res == I40E_XDP_EXIT) { |
361 | *failure = true; |
362 | return; |
363 | } |
364 | |
365 | if (xdp_res == I40E_XDP_CONSUMED) { |
366 | xsk_buff_free(xdp: xdp_buff); |
367 | return; |
368 | } |
369 | if (xdp_res == I40E_XDP_PASS) { |
370 | /* NB! We are not checking for errors using |
371 | * i40e_test_staterr with |
372 | * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that |
373 | * SBP is *not* set in PRT_SBPVSI (default not set). |
374 | */ |
375 | skb = i40e_construct_skb_zc(rx_ring, xdp: xdp_buff); |
376 | if (!skb) { |
377 | rx_ring->rx_stats.alloc_buff_failed++; |
378 | *rx_packets = 0; |
379 | *rx_bytes = 0; |
380 | return; |
381 | } |
382 | |
383 | if (eth_skb_pad(skb)) { |
384 | *rx_packets = 0; |
385 | *rx_bytes = 0; |
386 | return; |
387 | } |
388 | |
389 | i40e_process_skb_fields(rx_ring, rx_desc, skb); |
390 | napi_gro_receive(napi: &rx_ring->q_vector->napi, skb); |
391 | return; |
392 | } |
393 | |
394 | /* Should never get here, as all valid cases have been handled already. |
395 | */ |
396 | WARN_ON_ONCE(1); |
397 | } |
398 | |
399 | static int |
400 | i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, |
401 | struct xdp_buff *xdp, const unsigned int size) |
402 | { |
403 | struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp: first); |
404 | |
405 | if (!xdp_buff_has_frags(xdp: first)) { |
406 | sinfo->nr_frags = 0; |
407 | sinfo->xdp_frags_size = 0; |
408 | xdp_buff_set_frags_flag(xdp: first); |
409 | } |
410 | |
411 | if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { |
412 | xsk_buff_free(xdp: first); |
413 | return -ENOMEM; |
414 | } |
415 | |
416 | __skb_fill_page_desc_noacc(shinfo: sinfo, i: sinfo->nr_frags++, |
417 | virt_to_page(xdp->data_hard_start), off: 0, size); |
418 | sinfo->xdp_frags_size += size; |
419 | xsk_buff_add_frag(xdp); |
420 | |
421 | return 0; |
422 | } |
423 | |
424 | /** |
425 | * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring |
426 | * @rx_ring: Rx ring |
427 | * @budget: NAPI budget |
428 | * |
429 | * Returns amount of work completed |
430 | **/ |
431 | int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) |
432 | { |
433 | unsigned int total_rx_bytes = 0, total_rx_packets = 0; |
434 | u16 next_to_process = rx_ring->next_to_process; |
435 | u16 next_to_clean = rx_ring->next_to_clean; |
436 | unsigned int xdp_res, xdp_xmit = 0; |
437 | struct xdp_buff *first = NULL; |
438 | u32 count = rx_ring->count; |
439 | struct bpf_prog *xdp_prog; |
440 | u32 entries_to_alloc; |
441 | bool failure = false; |
442 | |
443 | if (next_to_process != next_to_clean) |
444 | first = *i40e_rx_bi(rx_ring, idx: next_to_clean); |
445 | |
446 | /* NB! xdp_prog will always be !NULL, due to the fact that |
447 | * this path is enabled by setting an XDP program. |
448 | */ |
449 | xdp_prog = READ_ONCE(rx_ring->xdp_prog); |
450 | |
451 | while (likely(total_rx_packets < (unsigned int)budget)) { |
452 | union i40e_rx_desc *rx_desc; |
453 | unsigned int rx_packets; |
454 | unsigned int rx_bytes; |
455 | struct xdp_buff *bi; |
456 | unsigned int size; |
457 | u64 qword; |
458 | |
459 | rx_desc = I40E_RX_DESC(rx_ring, next_to_process); |
460 | qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); |
461 | |
462 | /* This memory barrier is needed to keep us from reading |
463 | * any other fields out of the rx_desc until we have |
464 | * verified the descriptor has been written back. |
465 | */ |
466 | dma_rmb(); |
467 | |
468 | if (i40e_rx_is_programming_status(qword1: qword)) { |
469 | i40e_clean_programming_status(rx_ring, |
470 | qword0_raw: rx_desc->raw.qword[0], |
471 | qword1: qword); |
472 | bi = *i40e_rx_bi(rx_ring, idx: next_to_process); |
473 | xsk_buff_free(xdp: bi); |
474 | if (++next_to_process == count) |
475 | next_to_process = 0; |
476 | continue; |
477 | } |
478 | |
479 | size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> |
480 | I40E_RXD_QW1_LENGTH_PBUF_SHIFT; |
481 | if (!size) |
482 | break; |
483 | |
484 | bi = *i40e_rx_bi(rx_ring, idx: next_to_process); |
485 | xsk_buff_set_size(xdp: bi, size); |
486 | xsk_buff_dma_sync_for_cpu(xdp: bi, pool: rx_ring->xsk_pool); |
487 | |
488 | if (!first) |
489 | first = bi; |
490 | else if (i40e_add_xsk_frag(rx_ring, first, xdp: bi, size)) |
491 | break; |
492 | |
493 | if (++next_to_process == count) |
494 | next_to_process = 0; |
495 | |
496 | if (i40e_is_non_eop(rx_ring, rx_desc)) |
497 | continue; |
498 | |
499 | xdp_res = i40e_run_xdp_zc(rx_ring, xdp: first, xdp_prog); |
500 | i40e_handle_xdp_result_zc(rx_ring, xdp_buff: first, rx_desc, rx_packets: &rx_packets, |
501 | rx_bytes: &rx_bytes, xdp_res, failure: &failure); |
502 | first->flags = 0; |
503 | next_to_clean = next_to_process; |
504 | if (failure) |
505 | break; |
506 | total_rx_packets += rx_packets; |
507 | total_rx_bytes += rx_bytes; |
508 | xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); |
509 | first = NULL; |
510 | } |
511 | |
512 | rx_ring->next_to_clean = next_to_clean; |
513 | rx_ring->next_to_process = next_to_process; |
514 | |
515 | entries_to_alloc = I40E_DESC_UNUSED(rx_ring); |
516 | if (entries_to_alloc >= I40E_RX_BUFFER_WRITE) |
517 | failure |= !i40e_alloc_rx_buffers_zc(rx_ring, count: entries_to_alloc); |
518 | |
519 | i40e_finalize_xdp_rx(rx_ring, xdp_res: xdp_xmit); |
520 | i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); |
521 | |
522 | if (xsk_uses_need_wakeup(pool: rx_ring->xsk_pool)) { |
523 | if (failure || next_to_clean == rx_ring->next_to_use) |
524 | xsk_set_rx_need_wakeup(pool: rx_ring->xsk_pool); |
525 | else |
526 | xsk_clear_rx_need_wakeup(pool: rx_ring->xsk_pool); |
527 | |
528 | return (int)total_rx_packets; |
529 | } |
530 | return failure ? budget : (int)total_rx_packets; |
531 | } |
532 | |
533 | static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc, |
534 | unsigned int *total_bytes) |
535 | { |
536 | u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc); |
537 | struct i40e_tx_desc *tx_desc; |
538 | dma_addr_t dma; |
539 | |
540 | dma = xsk_buff_raw_get_dma(pool: xdp_ring->xsk_pool, addr: desc->addr); |
541 | xsk_buff_raw_dma_sync_for_device(pool: xdp_ring->xsk_pool, dma, size: desc->len); |
542 | |
543 | tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++); |
544 | tx_desc->buffer_addr = cpu_to_le64(dma); |
545 | tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd: cmd, td_offset: 0, size: desc->len, td_tag: 0); |
546 | |
547 | *total_bytes += desc->len; |
548 | } |
549 | |
550 | static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *desc, |
551 | unsigned int *total_bytes) |
552 | { |
553 | u16 ntu = xdp_ring->next_to_use; |
554 | struct i40e_tx_desc *tx_desc; |
555 | dma_addr_t dma; |
556 | u32 i; |
557 | |
558 | loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { |
559 | u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc: &desc[i]); |
560 | |
561 | dma = xsk_buff_raw_get_dma(pool: xdp_ring->xsk_pool, addr: desc[i].addr); |
562 | xsk_buff_raw_dma_sync_for_device(pool: xdp_ring->xsk_pool, dma, size: desc[i].len); |
563 | |
564 | tx_desc = I40E_TX_DESC(xdp_ring, ntu++); |
565 | tx_desc->buffer_addr = cpu_to_le64(dma); |
566 | tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd: cmd, td_offset: 0, size: desc[i].len, td_tag: 0); |
567 | |
568 | *total_bytes += desc[i].len; |
569 | } |
570 | |
571 | xdp_ring->next_to_use = ntu; |
572 | } |
573 | |
574 | static void i40e_fill_tx_hw_ring(struct i40e_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts, |
575 | unsigned int *total_bytes) |
576 | { |
577 | u32 batched, leftover, i; |
578 | |
579 | batched = nb_pkts & ~(PKTS_PER_BATCH - 1); |
580 | leftover = nb_pkts & (PKTS_PER_BATCH - 1); |
581 | for (i = 0; i < batched; i += PKTS_PER_BATCH) |
582 | i40e_xmit_pkt_batch(xdp_ring, desc: &descs[i], total_bytes); |
583 | for (i = batched; i < batched + leftover; i++) |
584 | i40e_xmit_pkt(xdp_ring, desc: &descs[i], total_bytes); |
585 | } |
586 | |
587 | static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) |
588 | { |
589 | u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; |
590 | struct i40e_tx_desc *tx_desc; |
591 | |
592 | tx_desc = I40E_TX_DESC(xdp_ring, ntu); |
593 | tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); |
594 | } |
595 | |
596 | /** |
597 | * i40e_xmit_zc - Performs zero-copy Tx AF_XDP |
598 | * @xdp_ring: XDP Tx ring |
599 | * @budget: NAPI budget |
600 | * |
601 | * Returns true if the work is finished. |
602 | **/ |
603 | static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) |
604 | { |
605 | struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; |
606 | u32 nb_pkts, nb_processed = 0; |
607 | unsigned int total_bytes = 0; |
608 | |
609 | nb_pkts = xsk_tx_peek_release_desc_batch(pool: xdp_ring->xsk_pool, max: budget); |
610 | if (!nb_pkts) |
611 | return true; |
612 | |
613 | if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { |
614 | nb_processed = xdp_ring->count - xdp_ring->next_to_use; |
615 | i40e_fill_tx_hw_ring(xdp_ring, descs, nb_pkts: nb_processed, total_bytes: &total_bytes); |
616 | xdp_ring->next_to_use = 0; |
617 | } |
618 | |
619 | i40e_fill_tx_hw_ring(xdp_ring, descs: &descs[nb_processed], nb_pkts: nb_pkts - nb_processed, |
620 | total_bytes: &total_bytes); |
621 | |
622 | /* Request an interrupt for the last frame and bump tail ptr. */ |
623 | i40e_set_rs_bit(xdp_ring); |
624 | i40e_xdp_ring_update_tail(xdp_ring); |
625 | |
626 | i40e_update_tx_stats(tx_ring: xdp_ring, total_packets: nb_pkts, total_bytes); |
627 | |
628 | return nb_pkts < budget; |
629 | } |
630 | |
631 | /** |
632 | * i40e_clean_xdp_tx_buffer - Frees and unmaps an XDP Tx entry |
633 | * @tx_ring: XDP Tx ring |
634 | * @tx_bi: Tx buffer info to clean |
635 | **/ |
636 | static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, |
637 | struct i40e_tx_buffer *tx_bi) |
638 | { |
639 | xdp_return_frame(xdpf: tx_bi->xdpf); |
640 | tx_ring->xdp_tx_active--; |
641 | dma_unmap_single(tx_ring->dev, |
642 | dma_unmap_addr(tx_bi, dma), |
643 | dma_unmap_len(tx_bi, len), DMA_TO_DEVICE); |
644 | dma_unmap_len_set(tx_bi, len, 0); |
645 | } |
646 | |
647 | /** |
648 | * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries |
649 | * @vsi: Current VSI |
650 | * @tx_ring: XDP Tx ring |
651 | * |
652 | * Returns true if cleanup/transmission is done. |
653 | **/ |
654 | bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) |
655 | { |
656 | struct xsk_buff_pool *bp = tx_ring->xsk_pool; |
657 | u32 i, completed_frames, xsk_frames = 0; |
658 | u32 head_idx = i40e_get_head(tx_ring); |
659 | struct i40e_tx_buffer *tx_bi; |
660 | unsigned int ntc; |
661 | |
662 | if (head_idx < tx_ring->next_to_clean) |
663 | head_idx += tx_ring->count; |
664 | completed_frames = head_idx - tx_ring->next_to_clean; |
665 | |
666 | if (completed_frames == 0) |
667 | goto out_xmit; |
668 | |
669 | if (likely(!tx_ring->xdp_tx_active)) { |
670 | xsk_frames = completed_frames; |
671 | goto skip; |
672 | } |
673 | |
674 | ntc = tx_ring->next_to_clean; |
675 | |
676 | for (i = 0; i < completed_frames; i++) { |
677 | tx_bi = &tx_ring->tx_bi[ntc]; |
678 | |
679 | if (tx_bi->xdpf) { |
680 | i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); |
681 | tx_bi->xdpf = NULL; |
682 | } else { |
683 | xsk_frames++; |
684 | } |
685 | |
686 | if (++ntc >= tx_ring->count) |
687 | ntc = 0; |
688 | } |
689 | |
690 | skip: |
691 | tx_ring->next_to_clean += completed_frames; |
692 | if (unlikely(tx_ring->next_to_clean >= tx_ring->count)) |
693 | tx_ring->next_to_clean -= tx_ring->count; |
694 | |
695 | if (xsk_frames) |
696 | xsk_tx_completed(pool: bp, nb_entries: xsk_frames); |
697 | |
698 | i40e_arm_wb(tx_ring, vsi, budget: completed_frames); |
699 | |
700 | out_xmit: |
701 | if (xsk_uses_need_wakeup(pool: tx_ring->xsk_pool)) |
702 | xsk_set_tx_need_wakeup(pool: tx_ring->xsk_pool); |
703 | |
704 | return i40e_xmit_zc(xdp_ring: tx_ring, I40E_DESC_UNUSED(tx_ring)); |
705 | } |
706 | |
707 | /** |
708 | * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup |
709 | * @dev: the netdevice |
710 | * @queue_id: queue id to wake up |
711 | * @flags: ignored in our case since we have Rx and Tx in the same NAPI. |
712 | * |
713 | * Returns <0 for errors, 0 otherwise. |
714 | **/ |
715 | int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) |
716 | { |
717 | struct i40e_netdev_priv *np = netdev_priv(dev); |
718 | struct i40e_vsi *vsi = np->vsi; |
719 | struct i40e_pf *pf = vsi->back; |
720 | struct i40e_ring *ring; |
721 | |
722 | if (test_bit(__I40E_CONFIG_BUSY, pf->state)) |
723 | return -EAGAIN; |
724 | |
725 | if (test_bit(__I40E_VSI_DOWN, vsi->state)) |
726 | return -ENETDOWN; |
727 | |
728 | if (!i40e_enabled_xdp_vsi(vsi)) |
729 | return -EINVAL; |
730 | |
731 | if (queue_id >= vsi->num_queue_pairs) |
732 | return -EINVAL; |
733 | |
734 | if (!vsi->xdp_rings[queue_id]->xsk_pool) |
735 | return -EINVAL; |
736 | |
737 | ring = vsi->xdp_rings[queue_id]; |
738 | |
739 | /* The idea here is that if NAPI is running, mark a miss, so |
740 | * it will run again. If not, trigger an interrupt and |
741 | * schedule the NAPI from interrupt context. If NAPI would be |
742 | * scheduled here, the interrupt affinity would not be |
743 | * honored. |
744 | */ |
745 | if (!napi_if_scheduled_mark_missed(n: &ring->q_vector->napi)) |
746 | i40e_force_wb(vsi, q_vector: ring->q_vector); |
747 | |
748 | return 0; |
749 | } |
750 | |
751 | void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) |
752 | { |
753 | u16 ntc = rx_ring->next_to_clean; |
754 | u16 ntu = rx_ring->next_to_use; |
755 | |
756 | while (ntc != ntu) { |
757 | struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, idx: ntc); |
758 | |
759 | xsk_buff_free(xdp: rx_bi); |
760 | ntc++; |
761 | if (ntc >= rx_ring->count) |
762 | ntc = 0; |
763 | } |
764 | } |
765 | |
766 | /** |
767 | * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown |
768 | * @tx_ring: XDP Tx ring |
769 | **/ |
770 | void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) |
771 | { |
772 | u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; |
773 | struct xsk_buff_pool *bp = tx_ring->xsk_pool; |
774 | struct i40e_tx_buffer *tx_bi; |
775 | u32 xsk_frames = 0; |
776 | |
777 | while (ntc != ntu) { |
778 | tx_bi = &tx_ring->tx_bi[ntc]; |
779 | |
780 | if (tx_bi->xdpf) |
781 | i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); |
782 | else |
783 | xsk_frames++; |
784 | |
785 | tx_bi->xdpf = NULL; |
786 | |
787 | ntc++; |
788 | if (ntc >= tx_ring->count) |
789 | ntc = 0; |
790 | } |
791 | |
792 | if (xsk_frames) |
793 | xsk_tx_completed(pool: bp, nb_entries: xsk_frames); |
794 | } |
795 | |
796 | /** |
797 | * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have an AF_XDP |
798 | * buffer pool attached |
799 | * @vsi: vsi |
800 | * |
801 | * Returns true if any of the Rx rings has an AF_XDP buffer pool attached |
802 | **/ |
803 | bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi) |
804 | { |
805 | struct net_device *netdev = vsi->netdev; |
806 | int i; |
807 | |
808 | for (i = 0; i < vsi->num_queue_pairs; i++) { |
809 | if (xsk_get_pool_from_qid(dev: netdev, queue_id: i)) |
810 | return true; |
811 | } |
812 | |
813 | return false; |
814 | } |
815 | |