1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2018 Solarflare Communications Inc. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License version 2 as published |
8 | * by the Free Software Foundation, incorporated herein by reference. |
9 | */ |
10 | |
11 | #include "net_driver.h" |
12 | #include <linux/module.h> |
13 | #include <linux/iommu.h> |
14 | #include "efx.h" |
15 | #include "nic.h" |
16 | #include "rx_common.h" |
17 | |
18 | /* This is the percentage fill level below which new RX descriptors |
19 | * will be added to the RX descriptor ring. |
20 | */ |
21 | static unsigned int rx_refill_threshold; |
22 | module_param(rx_refill_threshold, uint, 0444); |
23 | MODULE_PARM_DESC(rx_refill_threshold, |
24 | "RX descriptor ring refill threshold (%)" ); |
25 | |
26 | /* RX maximum head room required. |
27 | * |
28 | * This must be at least 1 to prevent overflow, plus one packet-worth |
29 | * to allow pipelined receives. |
30 | */ |
31 | #define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) |
32 | |
33 | /* Check the RX page recycle ring for a page that can be reused. */ |
34 | static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) |
35 | { |
36 | struct efx_nic *efx = rx_queue->efx; |
37 | struct efx_rx_page_state *state; |
38 | unsigned int index; |
39 | struct page *page; |
40 | |
41 | if (unlikely(!rx_queue->page_ring)) |
42 | return NULL; |
43 | index = rx_queue->page_remove & rx_queue->page_ptr_mask; |
44 | page = rx_queue->page_ring[index]; |
45 | if (page == NULL) |
46 | return NULL; |
47 | |
48 | rx_queue->page_ring[index] = NULL; |
49 | /* page_remove cannot exceed page_add. */ |
50 | if (rx_queue->page_remove != rx_queue->page_add) |
51 | ++rx_queue->page_remove; |
52 | |
53 | /* If page_count is 1 then we hold the only reference to this page. */ |
54 | if (page_count(page) == 1) { |
55 | ++rx_queue->page_recycle_count; |
56 | return page; |
57 | } else { |
58 | state = page_address(page); |
59 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
60 | PAGE_SIZE << efx->rx_buffer_order, |
61 | DMA_FROM_DEVICE); |
62 | put_page(page); |
63 | ++rx_queue->page_recycle_failed; |
64 | } |
65 | |
66 | return NULL; |
67 | } |
68 | |
69 | /* Attempt to recycle the page if there is an RX recycle ring; the page can |
70 | * only be added if this is the final RX buffer, to prevent pages being used in |
71 | * the descriptor ring and appearing in the recycle ring simultaneously. |
72 | */ |
73 | static void efx_recycle_rx_page(struct efx_channel *channel, |
74 | struct efx_rx_buffer *rx_buf) |
75 | { |
76 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
77 | struct efx_nic *efx = rx_queue->efx; |
78 | struct page *page = rx_buf->page; |
79 | unsigned int index; |
80 | |
81 | /* Only recycle the page after processing the final buffer. */ |
82 | if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) |
83 | return; |
84 | |
85 | index = rx_queue->page_add & rx_queue->page_ptr_mask; |
86 | if (rx_queue->page_ring[index] == NULL) { |
87 | unsigned int read_index = rx_queue->page_remove & |
88 | rx_queue->page_ptr_mask; |
89 | |
90 | /* The next slot in the recycle ring is available, but |
91 | * increment page_remove if the read pointer currently |
92 | * points here. |
93 | */ |
94 | if (read_index == index) |
95 | ++rx_queue->page_remove; |
96 | rx_queue->page_ring[index] = page; |
97 | ++rx_queue->page_add; |
98 | return; |
99 | } |
100 | ++rx_queue->page_recycle_full; |
101 | efx_unmap_rx_buffer(efx, rx_buf); |
102 | put_page(page: rx_buf->page); |
103 | } |
104 | |
105 | /* Recycle the pages that are used by buffers that have just been received. */ |
106 | void efx_recycle_rx_pages(struct efx_channel *channel, |
107 | struct efx_rx_buffer *rx_buf, |
108 | unsigned int n_frags) |
109 | { |
110 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
111 | |
112 | if (unlikely(!rx_queue->page_ring)) |
113 | return; |
114 | |
115 | do { |
116 | efx_recycle_rx_page(channel, rx_buf); |
117 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
118 | } while (--n_frags); |
119 | } |
120 | |
121 | void efx_discard_rx_packet(struct efx_channel *channel, |
122 | struct efx_rx_buffer *rx_buf, |
123 | unsigned int n_frags) |
124 | { |
125 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
126 | |
127 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
128 | |
129 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: n_frags); |
130 | } |
131 | |
132 | static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) |
133 | { |
134 | unsigned int bufs_in_recycle_ring, page_ring_size; |
135 | struct efx_nic *efx = rx_queue->efx; |
136 | |
137 | bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx); |
138 | page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / |
139 | efx->rx_bufs_per_page); |
140 | rx_queue->page_ring = kcalloc(n: page_ring_size, |
141 | size: sizeof(*rx_queue->page_ring), GFP_KERNEL); |
142 | if (!rx_queue->page_ring) |
143 | rx_queue->page_ptr_mask = 0; |
144 | else |
145 | rx_queue->page_ptr_mask = page_ring_size - 1; |
146 | } |
147 | |
148 | static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) |
149 | { |
150 | struct efx_nic *efx = rx_queue->efx; |
151 | int i; |
152 | |
153 | if (unlikely(!rx_queue->page_ring)) |
154 | return; |
155 | |
156 | /* Unmap and release the pages in the recycle ring. Remove the ring. */ |
157 | for (i = 0; i <= rx_queue->page_ptr_mask; i++) { |
158 | struct page *page = rx_queue->page_ring[i]; |
159 | struct efx_rx_page_state *state; |
160 | |
161 | if (page == NULL) |
162 | continue; |
163 | |
164 | state = page_address(page); |
165 | dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, |
166 | PAGE_SIZE << efx->rx_buffer_order, |
167 | DMA_FROM_DEVICE); |
168 | put_page(page); |
169 | } |
170 | kfree(objp: rx_queue->page_ring); |
171 | rx_queue->page_ring = NULL; |
172 | } |
173 | |
174 | static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, |
175 | struct efx_rx_buffer *rx_buf) |
176 | { |
177 | /* Release the page reference we hold for the buffer. */ |
178 | if (rx_buf->page) |
179 | put_page(page: rx_buf->page); |
180 | |
181 | /* If this is the last buffer in a page, unmap and free it. */ |
182 | if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { |
183 | efx_unmap_rx_buffer(efx: rx_queue->efx, rx_buf); |
184 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
185 | } |
186 | rx_buf->page = NULL; |
187 | } |
188 | |
189 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) |
190 | { |
191 | struct efx_nic *efx = rx_queue->efx; |
192 | unsigned int entries; |
193 | int rc; |
194 | |
195 | /* Create the smallest power-of-two aligned ring */ |
196 | entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); |
197 | EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); |
198 | rx_queue->ptr_mask = entries - 1; |
199 | |
200 | netif_dbg(efx, probe, efx->net_dev, |
201 | "creating RX queue %d size %#x mask %#x\n" , |
202 | efx_rx_queue_index(rx_queue), efx->rxq_entries, |
203 | rx_queue->ptr_mask); |
204 | |
205 | /* Allocate RX buffers */ |
206 | rx_queue->buffer = kcalloc(n: entries, size: sizeof(*rx_queue->buffer), |
207 | GFP_KERNEL); |
208 | if (!rx_queue->buffer) |
209 | return -ENOMEM; |
210 | |
211 | rc = efx_nic_probe_rx(rx_queue); |
212 | if (rc) { |
213 | kfree(objp: rx_queue->buffer); |
214 | rx_queue->buffer = NULL; |
215 | } |
216 | |
217 | return rc; |
218 | } |
219 | |
220 | void efx_init_rx_queue(struct efx_rx_queue *rx_queue) |
221 | { |
222 | unsigned int max_fill, trigger, max_trigger; |
223 | struct efx_nic *efx = rx_queue->efx; |
224 | int rc = 0; |
225 | |
226 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
227 | "initialising RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
228 | |
229 | /* Initialise ptr fields */ |
230 | rx_queue->added_count = 0; |
231 | rx_queue->notified_count = 0; |
232 | rx_queue->granted_count = 0; |
233 | rx_queue->removed_count = 0; |
234 | rx_queue->min_fill = -1U; |
235 | efx_init_rx_recycle_ring(rx_queue); |
236 | |
237 | rx_queue->page_remove = 0; |
238 | rx_queue->page_add = rx_queue->page_ptr_mask + 1; |
239 | rx_queue->page_recycle_count = 0; |
240 | rx_queue->page_recycle_failed = 0; |
241 | rx_queue->page_recycle_full = 0; |
242 | |
243 | /* Initialise limit fields */ |
244 | max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; |
245 | max_trigger = |
246 | max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
247 | if (rx_refill_threshold != 0) { |
248 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; |
249 | if (trigger > max_trigger) |
250 | trigger = max_trigger; |
251 | } else { |
252 | trigger = max_trigger; |
253 | } |
254 | |
255 | rx_queue->max_fill = max_fill; |
256 | rx_queue->fast_fill_trigger = trigger; |
257 | rx_queue->refill_enabled = true; |
258 | |
259 | /* Initialise XDP queue information */ |
260 | rc = xdp_rxq_info_reg(xdp_rxq: &rx_queue->xdp_rxq_info, dev: efx->net_dev, |
261 | queue_index: rx_queue->core_index, napi_id: 0); |
262 | |
263 | if (rc) { |
264 | netif_err(efx, rx_err, efx->net_dev, |
265 | "Failure to initialise XDP queue information rc=%d\n" , |
266 | rc); |
267 | efx->xdp_rxq_info_failed = true; |
268 | } else { |
269 | rx_queue->xdp_rxq_info_valid = true; |
270 | } |
271 | |
272 | /* Set up RX descriptor ring */ |
273 | efx_nic_init_rx(rx_queue); |
274 | } |
275 | |
276 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) |
277 | { |
278 | struct efx_rx_buffer *rx_buf; |
279 | int i; |
280 | |
281 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
282 | "shutting down RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
283 | |
284 | del_timer_sync(timer: &rx_queue->slow_fill); |
285 | if (rx_queue->grant_credits) |
286 | flush_work(work: &rx_queue->grant_work); |
287 | |
288 | /* Release RX buffers from the current read ptr to the write ptr */ |
289 | if (rx_queue->buffer) { |
290 | for (i = rx_queue->removed_count; i < rx_queue->added_count; |
291 | i++) { |
292 | unsigned int index = i & rx_queue->ptr_mask; |
293 | |
294 | rx_buf = efx_rx_buffer(rx_queue, index); |
295 | efx_fini_rx_buffer(rx_queue, rx_buf); |
296 | } |
297 | } |
298 | |
299 | efx_fini_rx_recycle_ring(rx_queue); |
300 | |
301 | if (rx_queue->xdp_rxq_info_valid) |
302 | xdp_rxq_info_unreg(xdp_rxq: &rx_queue->xdp_rxq_info); |
303 | |
304 | rx_queue->xdp_rxq_info_valid = false; |
305 | } |
306 | |
307 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) |
308 | { |
309 | netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, |
310 | "destroying RX queue %d\n" , efx_rx_queue_index(rx_queue)); |
311 | |
312 | efx_nic_remove_rx(rx_queue); |
313 | |
314 | kfree(objp: rx_queue->buffer); |
315 | rx_queue->buffer = NULL; |
316 | } |
317 | |
318 | /* Unmap a DMA-mapped page. This function is only called for the final RX |
319 | * buffer in a page. |
320 | */ |
321 | void efx_unmap_rx_buffer(struct efx_nic *efx, |
322 | struct efx_rx_buffer *rx_buf) |
323 | { |
324 | struct page *page = rx_buf->page; |
325 | |
326 | if (page) { |
327 | struct efx_rx_page_state *state = page_address(page); |
328 | |
329 | dma_unmap_page(&efx->pci_dev->dev, |
330 | state->dma_addr, |
331 | PAGE_SIZE << efx->rx_buffer_order, |
332 | DMA_FROM_DEVICE); |
333 | } |
334 | } |
335 | |
336 | void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, |
337 | struct efx_rx_buffer *rx_buf, |
338 | unsigned int num_bufs) |
339 | { |
340 | do { |
341 | if (rx_buf->page) { |
342 | put_page(page: rx_buf->page); |
343 | rx_buf->page = NULL; |
344 | } |
345 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
346 | } while (--num_bufs); |
347 | } |
348 | |
349 | void efx_rx_slow_fill(struct timer_list *t) |
350 | { |
351 | struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); |
352 | |
353 | /* Post an event to cause NAPI to run and refill the queue */ |
354 | efx_nic_generate_fill_event(rx_queue); |
355 | ++rx_queue->slow_fill_count; |
356 | } |
357 | |
358 | void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) |
359 | { |
360 | mod_timer(timer: &rx_queue->slow_fill, expires: jiffies + msecs_to_jiffies(m: 10)); |
361 | } |
362 | |
363 | /* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers |
364 | * |
365 | * @rx_queue: Efx RX queue |
366 | * |
367 | * This allocates a batch of pages, maps them for DMA, and populates |
368 | * struct efx_rx_buffers for each one. Return a negative error code or |
369 | * 0 on success. If a single page can be used for multiple buffers, |
370 | * then the page will either be inserted fully, or not at all. |
371 | */ |
372 | static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) |
373 | { |
374 | unsigned int page_offset, index, count; |
375 | struct efx_nic *efx = rx_queue->efx; |
376 | struct efx_rx_page_state *state; |
377 | struct efx_rx_buffer *rx_buf; |
378 | dma_addr_t dma_addr; |
379 | struct page *page; |
380 | |
381 | count = 0; |
382 | do { |
383 | page = efx_reuse_page(rx_queue); |
384 | if (page == NULL) { |
385 | page = alloc_pages(__GFP_COMP | |
386 | (atomic ? GFP_ATOMIC : GFP_KERNEL), |
387 | order: efx->rx_buffer_order); |
388 | if (unlikely(page == NULL)) |
389 | return -ENOMEM; |
390 | dma_addr = |
391 | dma_map_page(&efx->pci_dev->dev, page, 0, |
392 | PAGE_SIZE << efx->rx_buffer_order, |
393 | DMA_FROM_DEVICE); |
394 | if (unlikely(dma_mapping_error(&efx->pci_dev->dev, |
395 | dma_addr))) { |
396 | __free_pages(page, order: efx->rx_buffer_order); |
397 | return -EIO; |
398 | } |
399 | state = page_address(page); |
400 | state->dma_addr = dma_addr; |
401 | } else { |
402 | state = page_address(page); |
403 | dma_addr = state->dma_addr; |
404 | } |
405 | |
406 | dma_addr += sizeof(struct efx_rx_page_state); |
407 | page_offset = sizeof(struct efx_rx_page_state); |
408 | |
409 | do { |
410 | index = rx_queue->added_count & rx_queue->ptr_mask; |
411 | rx_buf = efx_rx_buffer(rx_queue, index); |
412 | rx_buf->dma_addr = dma_addr + efx->rx_ip_align + |
413 | EFX_XDP_HEADROOM; |
414 | rx_buf->page = page; |
415 | rx_buf->page_offset = page_offset + efx->rx_ip_align + |
416 | EFX_XDP_HEADROOM; |
417 | rx_buf->len = efx->rx_dma_len; |
418 | rx_buf->flags = 0; |
419 | ++rx_queue->added_count; |
420 | get_page(page); |
421 | dma_addr += efx->rx_page_buf_step; |
422 | page_offset += efx->rx_page_buf_step; |
423 | } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); |
424 | |
425 | rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; |
426 | } while (++count < efx->rx_pages_per_batch); |
427 | |
428 | return 0; |
429 | } |
430 | |
431 | void efx_rx_config_page_split(struct efx_nic *efx) |
432 | { |
433 | efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + |
434 | EFX_XDP_HEADROOM + EFX_XDP_TAILROOM, |
435 | EFX_RX_BUF_ALIGNMENT); |
436 | efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : |
437 | ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / |
438 | efx->rx_page_buf_step); |
439 | efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / |
440 | efx->rx_bufs_per_page; |
441 | efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, |
442 | efx->rx_bufs_per_page); |
443 | } |
444 | |
445 | /* efx_fast_push_rx_descriptors - push new RX descriptors quickly |
446 | * @rx_queue: RX descriptor queue |
447 | * |
448 | * This will aim to fill the RX descriptor queue up to |
449 | * @rx_queue->@max_fill. If there is insufficient atomic |
450 | * memory to do so, a slow fill will be scheduled. |
451 | * |
452 | * The caller must provide serialisation (none is used here). In practise, |
453 | * this means this function must run from the NAPI handler, or be called |
454 | * when NAPI is disabled. |
455 | */ |
456 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) |
457 | { |
458 | struct efx_nic *efx = rx_queue->efx; |
459 | unsigned int fill_level, batch_size; |
460 | int space, rc = 0; |
461 | |
462 | if (!rx_queue->refill_enabled) |
463 | return; |
464 | |
465 | /* Calculate current fill level, and exit if we don't need to fill */ |
466 | fill_level = (rx_queue->added_count - rx_queue->removed_count); |
467 | EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); |
468 | if (fill_level >= rx_queue->fast_fill_trigger) |
469 | goto out; |
470 | |
471 | /* Record minimum fill level */ |
472 | if (unlikely(fill_level < rx_queue->min_fill)) { |
473 | if (fill_level) |
474 | rx_queue->min_fill = fill_level; |
475 | } |
476 | |
477 | batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; |
478 | space = rx_queue->max_fill - fill_level; |
479 | EFX_WARN_ON_ONCE_PARANOID(space < batch_size); |
480 | |
481 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
482 | "RX queue %d fast-filling descriptor ring from" |
483 | " level %d to level %d\n" , |
484 | efx_rx_queue_index(rx_queue), fill_level, |
485 | rx_queue->max_fill); |
486 | |
487 | do { |
488 | rc = efx_init_rx_buffers(rx_queue, atomic); |
489 | if (unlikely(rc)) { |
490 | /* Ensure that we don't leave the rx queue empty */ |
491 | efx_schedule_slow_fill(rx_queue); |
492 | goto out; |
493 | } |
494 | } while ((space -= batch_size) >= batch_size); |
495 | |
496 | netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, |
497 | "RX queue %d fast-filled descriptor ring " |
498 | "to level %d\n" , efx_rx_queue_index(rx_queue), |
499 | rx_queue->added_count - rx_queue->removed_count); |
500 | |
501 | out: |
502 | if (rx_queue->notified_count != rx_queue->added_count) |
503 | efx_nic_notify_rx_desc(rx_queue); |
504 | } |
505 | |
506 | /* Pass a received packet up through GRO. GRO can handle pages |
507 | * regardless of checksum state and skbs with a good checksum. |
508 | */ |
509 | void |
510 | efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, |
511 | unsigned int n_frags, u8 *eh, __wsum csum) |
512 | { |
513 | struct napi_struct *napi = &channel->napi_str; |
514 | struct efx_nic *efx = channel->efx; |
515 | struct sk_buff *skb; |
516 | |
517 | skb = napi_get_frags(napi); |
518 | if (unlikely(!skb)) { |
519 | struct efx_rx_queue *rx_queue; |
520 | |
521 | rx_queue = efx_channel_get_rx_queue(channel); |
522 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: n_frags); |
523 | return; |
524 | } |
525 | |
526 | if (efx->net_dev->features & NETIF_F_RXHASH && |
527 | efx_rx_buf_hash_valid(efx, prefix: eh)) |
528 | skb_set_hash(skb, hash: efx_rx_buf_hash(efx, eh), |
529 | type: PKT_HASH_TYPE_L3); |
530 | if (csum) { |
531 | skb->csum = csum; |
532 | skb->ip_summed = CHECKSUM_COMPLETE; |
533 | } else { |
534 | skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? |
535 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE); |
536 | } |
537 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
538 | |
539 | for (;;) { |
540 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
541 | page: rx_buf->page, off: rx_buf->page_offset, |
542 | size: rx_buf->len); |
543 | rx_buf->page = NULL; |
544 | skb->len += rx_buf->len; |
545 | if (skb_shinfo(skb)->nr_frags == n_frags) |
546 | break; |
547 | |
548 | rx_buf = efx_rx_buf_next(rx_queue: &channel->rx_queue, rx_buf); |
549 | } |
550 | |
551 | skb->data_len = skb->len; |
552 | skb->truesize += n_frags * efx->rx_buffer_truesize; |
553 | |
554 | skb_record_rx_queue(skb, rx_queue: channel->rx_queue.core_index); |
555 | |
556 | napi_gro_frags(napi); |
557 | } |
558 | |
559 | /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because |
560 | * (a) this is an infrequent control-plane operation and (b) n is small (max 64) |
561 | */ |
562 | struct efx_rss_context *(struct efx_nic *efx) |
563 | { |
564 | struct list_head *head = &efx->rss_context.list; |
565 | struct efx_rss_context *ctx, *new; |
566 | u32 id = 1; /* Don't use zero, that refers to the master RSS context */ |
567 | |
568 | WARN_ON(!mutex_is_locked(&efx->rss_lock)); |
569 | |
570 | /* Search for first gap in the numbering */ |
571 | list_for_each_entry(ctx, head, list) { |
572 | if (ctx->user_id != id) |
573 | break; |
574 | id++; |
575 | /* Check for wrap. If this happens, we have nearly 2^32 |
576 | * allocated RSS contexts, which seems unlikely. |
577 | */ |
578 | if (WARN_ON_ONCE(!id)) |
579 | return NULL; |
580 | } |
581 | |
582 | /* Create the new entry */ |
583 | new = kmalloc(size: sizeof(*new), GFP_KERNEL); |
584 | if (!new) |
585 | return NULL; |
586 | new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; |
587 | new->rx_hash_udp_4tuple = false; |
588 | |
589 | /* Insert the new entry into the gap */ |
590 | new->user_id = id; |
591 | list_add_tail(new: &new->list, head: &ctx->list); |
592 | return new; |
593 | } |
594 | |
595 | struct efx_rss_context *(struct efx_nic *efx, u32 id) |
596 | { |
597 | struct list_head *head = &efx->rss_context.list; |
598 | struct efx_rss_context *ctx; |
599 | |
600 | WARN_ON(!mutex_is_locked(&efx->rss_lock)); |
601 | |
602 | list_for_each_entry(ctx, head, list) |
603 | if (ctx->user_id == id) |
604 | return ctx; |
605 | return NULL; |
606 | } |
607 | |
608 | void (struct efx_rss_context *ctx) |
609 | { |
610 | list_del(entry: &ctx->list); |
611 | kfree(objp: ctx); |
612 | } |
613 | |
614 | void efx_set_default_rx_indir_table(struct efx_nic *efx, |
615 | struct efx_rss_context *ctx) |
616 | { |
617 | size_t i; |
618 | |
619 | for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) |
620 | ctx->rx_indir_table[i] = |
621 | ethtool_rxfh_indir_default(index: i, n_rx_rings: efx->rss_spread); |
622 | } |
623 | |
624 | /** |
625 | * efx_filter_is_mc_recipient - test whether spec is a multicast recipient |
626 | * @spec: Specification to test |
627 | * |
628 | * Return: %true if the specification is a non-drop RX filter that |
629 | * matches a local MAC address I/G bit value of 1 or matches a local |
630 | * IPv4 or IPv6 address value in the respective multicast address |
631 | * range. Otherwise %false. |
632 | */ |
633 | bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) |
634 | { |
635 | if (!(spec->flags & EFX_FILTER_FLAG_RX) || |
636 | spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) |
637 | return false; |
638 | |
639 | if (spec->match_flags & |
640 | (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && |
641 | is_multicast_ether_addr(addr: spec->loc_mac)) |
642 | return true; |
643 | |
644 | if ((spec->match_flags & |
645 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == |
646 | (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { |
647 | if (spec->ether_type == htons(ETH_P_IP) && |
648 | ipv4_is_multicast(addr: spec->loc_host[0])) |
649 | return true; |
650 | if (spec->ether_type == htons(ETH_P_IPV6) && |
651 | ((const u8 *)spec->loc_host)[0] == 0xff) |
652 | return true; |
653 | } |
654 | |
655 | return false; |
656 | } |
657 | |
658 | bool efx_filter_spec_equal(const struct efx_filter_spec *left, |
659 | const struct efx_filter_spec *right) |
660 | { |
661 | if ((left->match_flags ^ right->match_flags) | |
662 | ((left->flags ^ right->flags) & |
663 | (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) |
664 | return false; |
665 | |
666 | return memcmp(p: &left->vport_id, q: &right->vport_id, |
667 | size: sizeof(struct efx_filter_spec) - |
668 | offsetof(struct efx_filter_spec, vport_id)) == 0; |
669 | } |
670 | |
671 | u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) |
672 | { |
673 | BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3); |
674 | return jhash2(k: (const u32 *)&spec->vport_id, |
675 | length: (sizeof(struct efx_filter_spec) - |
676 | offsetof(struct efx_filter_spec, vport_id)) / 4, |
677 | initval: 0); |
678 | } |
679 | |
680 | #ifdef CONFIG_RFS_ACCEL |
681 | bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, |
682 | bool *force) |
683 | { |
684 | if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { |
685 | /* ARFS is currently updating this entry, leave it */ |
686 | return false; |
687 | } |
688 | if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { |
689 | /* ARFS tried and failed to update this, so it's probably out |
690 | * of date. Remove the filter and the ARFS rule entry. |
691 | */ |
692 | rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; |
693 | *force = true; |
694 | return true; |
695 | } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ |
696 | /* ARFS has moved on, so old filter is not needed. Since we did |
697 | * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will |
698 | * not be removed by efx_rps_hash_del() subsequently. |
699 | */ |
700 | *force = true; |
701 | return true; |
702 | } |
703 | /* Remove it iff ARFS wants to. */ |
704 | return true; |
705 | } |
706 | |
707 | static |
708 | struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, |
709 | const struct efx_filter_spec *spec) |
710 | { |
711 | u32 hash = efx_filter_spec_hash(spec); |
712 | |
713 | lockdep_assert_held(&efx->rps_hash_lock); |
714 | if (!efx->rps_hash_table) |
715 | return NULL; |
716 | return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; |
717 | } |
718 | |
719 | struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, |
720 | const struct efx_filter_spec *spec) |
721 | { |
722 | struct efx_arfs_rule *rule; |
723 | struct hlist_head *head; |
724 | struct hlist_node *node; |
725 | |
726 | head = efx_rps_hash_bucket(efx, spec); |
727 | if (!head) |
728 | return NULL; |
729 | hlist_for_each(node, head) { |
730 | rule = container_of(node, struct efx_arfs_rule, node); |
731 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) |
732 | return rule; |
733 | } |
734 | return NULL; |
735 | } |
736 | |
737 | struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, |
738 | const struct efx_filter_spec *spec, |
739 | bool *new) |
740 | { |
741 | struct efx_arfs_rule *rule; |
742 | struct hlist_head *head; |
743 | struct hlist_node *node; |
744 | |
745 | head = efx_rps_hash_bucket(efx, spec); |
746 | if (!head) |
747 | return NULL; |
748 | hlist_for_each(node, head) { |
749 | rule = container_of(node, struct efx_arfs_rule, node); |
750 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) { |
751 | *new = false; |
752 | return rule; |
753 | } |
754 | } |
755 | rule = kmalloc(size: sizeof(*rule), GFP_ATOMIC); |
756 | *new = true; |
757 | if (rule) { |
758 | memcpy(&rule->spec, spec, sizeof(rule->spec)); |
759 | hlist_add_head(n: &rule->node, h: head); |
760 | } |
761 | return rule; |
762 | } |
763 | |
764 | void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) |
765 | { |
766 | struct efx_arfs_rule *rule; |
767 | struct hlist_head *head; |
768 | struct hlist_node *node; |
769 | |
770 | head = efx_rps_hash_bucket(efx, spec); |
771 | if (WARN_ON(!head)) |
772 | return; |
773 | hlist_for_each(node, head) { |
774 | rule = container_of(node, struct efx_arfs_rule, node); |
775 | if (efx_filter_spec_equal(left: spec, right: &rule->spec)) { |
776 | /* Someone already reused the entry. We know that if |
777 | * this check doesn't fire (i.e. filter_id == REMOVING) |
778 | * then the REMOVING mark was put there by our caller, |
779 | * because caller is holding a lock on filter table and |
780 | * only holders of that lock set REMOVING. |
781 | */ |
782 | if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) |
783 | return; |
784 | hlist_del(n: node); |
785 | kfree(objp: rule); |
786 | return; |
787 | } |
788 | } |
789 | /* We didn't find it. */ |
790 | WARN_ON(1); |
791 | } |
792 | #endif |
793 | |
794 | int efx_probe_filters(struct efx_nic *efx) |
795 | { |
796 | int rc; |
797 | |
798 | mutex_lock(&efx->mac_lock); |
799 | rc = efx->type->filter_table_probe(efx); |
800 | if (rc) |
801 | goto out_unlock; |
802 | |
803 | #ifdef CONFIG_RFS_ACCEL |
804 | if (efx->type->offload_features & NETIF_F_NTUPLE) { |
805 | struct efx_channel *channel; |
806 | int i, success = 1; |
807 | |
808 | efx_for_each_channel(channel, efx) { |
809 | channel->rps_flow_id = |
810 | kcalloc(n: efx->type->max_rx_ip_filters, |
811 | size: sizeof(*channel->rps_flow_id), |
812 | GFP_KERNEL); |
813 | if (!channel->rps_flow_id) |
814 | success = 0; |
815 | else |
816 | for (i = 0; |
817 | i < efx->type->max_rx_ip_filters; |
818 | ++i) |
819 | channel->rps_flow_id[i] = |
820 | RPS_FLOW_ID_INVALID; |
821 | channel->rfs_expire_index = 0; |
822 | channel->rfs_filter_count = 0; |
823 | } |
824 | |
825 | if (!success) { |
826 | efx_for_each_channel(channel, efx) |
827 | kfree(objp: channel->rps_flow_id); |
828 | efx->type->filter_table_remove(efx); |
829 | rc = -ENOMEM; |
830 | goto out_unlock; |
831 | } |
832 | } |
833 | #endif |
834 | out_unlock: |
835 | mutex_unlock(lock: &efx->mac_lock); |
836 | return rc; |
837 | } |
838 | |
839 | void efx_remove_filters(struct efx_nic *efx) |
840 | { |
841 | #ifdef CONFIG_RFS_ACCEL |
842 | struct efx_channel *channel; |
843 | |
844 | efx_for_each_channel(channel, efx) { |
845 | cancel_delayed_work_sync(dwork: &channel->filter_work); |
846 | kfree(objp: channel->rps_flow_id); |
847 | channel->rps_flow_id = NULL; |
848 | } |
849 | #endif |
850 | efx->type->filter_table_remove(efx); |
851 | } |
852 | |
853 | #ifdef CONFIG_RFS_ACCEL |
854 | |
855 | static void efx_filter_rfs_work(struct work_struct *data) |
856 | { |
857 | struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, |
858 | work); |
859 | struct efx_nic *efx = efx_netdev_priv(dev: req->net_dev); |
860 | struct efx_channel *channel = efx_get_channel(efx, index: req->rxq_index); |
861 | int slot_idx = req - efx->rps_slot; |
862 | struct efx_arfs_rule *rule; |
863 | u16 arfs_id = 0; |
864 | int rc; |
865 | |
866 | rc = efx->type->filter_insert(efx, &req->spec, true); |
867 | if (rc >= 0) |
868 | /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ |
869 | rc %= efx->type->max_rx_ip_filters; |
870 | if (efx->rps_hash_table) { |
871 | spin_lock_bh(lock: &efx->rps_hash_lock); |
872 | rule = efx_rps_hash_find(efx, spec: &req->spec); |
873 | /* The rule might have already gone, if someone else's request |
874 | * for the same spec was already worked and then expired before |
875 | * we got around to our work. In that case we have nothing |
876 | * tying us to an arfs_id, meaning that as soon as the filter |
877 | * is considered for expiry it will be removed. |
878 | */ |
879 | if (rule) { |
880 | if (rc < 0) |
881 | rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; |
882 | else |
883 | rule->filter_id = rc; |
884 | arfs_id = rule->arfs_id; |
885 | } |
886 | spin_unlock_bh(lock: &efx->rps_hash_lock); |
887 | } |
888 | if (rc >= 0) { |
889 | /* Remember this so we can check whether to expire the filter |
890 | * later. |
891 | */ |
892 | mutex_lock(&efx->rps_mutex); |
893 | if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) |
894 | channel->rfs_filter_count++; |
895 | channel->rps_flow_id[rc] = req->flow_id; |
896 | mutex_unlock(lock: &efx->rps_mutex); |
897 | |
898 | if (req->spec.ether_type == htons(ETH_P_IP)) |
899 | netif_info(efx, rx_status, efx->net_dev, |
900 | "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n" , |
901 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
902 | req->spec.rem_host, ntohs(req->spec.rem_port), |
903 | req->spec.loc_host, ntohs(req->spec.loc_port), |
904 | req->rxq_index, req->flow_id, rc, arfs_id); |
905 | else |
906 | netif_info(efx, rx_status, efx->net_dev, |
907 | "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n" , |
908 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
909 | req->spec.rem_host, ntohs(req->spec.rem_port), |
910 | req->spec.loc_host, ntohs(req->spec.loc_port), |
911 | req->rxq_index, req->flow_id, rc, arfs_id); |
912 | channel->n_rfs_succeeded++; |
913 | } else { |
914 | if (req->spec.ether_type == htons(ETH_P_IP)) |
915 | netif_dbg(efx, rx_status, efx->net_dev, |
916 | "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n" , |
917 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
918 | req->spec.rem_host, ntohs(req->spec.rem_port), |
919 | req->spec.loc_host, ntohs(req->spec.loc_port), |
920 | req->rxq_index, req->flow_id, rc, arfs_id); |
921 | else |
922 | netif_dbg(efx, rx_status, efx->net_dev, |
923 | "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n" , |
924 | (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP" , |
925 | req->spec.rem_host, ntohs(req->spec.rem_port), |
926 | req->spec.loc_host, ntohs(req->spec.loc_port), |
927 | req->rxq_index, req->flow_id, rc, arfs_id); |
928 | channel->n_rfs_failed++; |
929 | /* We're overloading the NIC's filter tables, so let's do a |
930 | * chunk of extra expiry work. |
931 | */ |
932 | __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, |
933 | 100u)); |
934 | } |
935 | |
936 | /* Release references */ |
937 | clear_bit(nr: slot_idx, addr: &efx->rps_slot_map); |
938 | dev_put(dev: req->net_dev); |
939 | } |
940 | |
941 | int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, |
942 | u16 rxq_index, u32 flow_id) |
943 | { |
944 | struct efx_nic *efx = efx_netdev_priv(dev: net_dev); |
945 | struct efx_async_filter_insertion *req; |
946 | struct efx_arfs_rule *rule; |
947 | struct flow_keys fk; |
948 | int slot_idx; |
949 | bool new; |
950 | int rc; |
951 | |
952 | /* find a free slot */ |
953 | for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++) |
954 | if (!test_and_set_bit(nr: slot_idx, addr: &efx->rps_slot_map)) |
955 | break; |
956 | if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT) |
957 | return -EBUSY; |
958 | |
959 | if (flow_id == RPS_FLOW_ID_INVALID) { |
960 | rc = -EINVAL; |
961 | goto out_clear; |
962 | } |
963 | |
964 | if (!skb_flow_dissect_flow_keys(skb, flow: &fk, flags: 0)) { |
965 | rc = -EPROTONOSUPPORT; |
966 | goto out_clear; |
967 | } |
968 | |
969 | if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) { |
970 | rc = -EPROTONOSUPPORT; |
971 | goto out_clear; |
972 | } |
973 | if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) { |
974 | rc = -EPROTONOSUPPORT; |
975 | goto out_clear; |
976 | } |
977 | |
978 | req = efx->rps_slot + slot_idx; |
979 | efx_filter_init_rx(spec: &req->spec, priority: EFX_FILTER_PRI_HINT, |
980 | flags: efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, |
981 | rxq_id: rxq_index); |
982 | req->spec.match_flags = |
983 | EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | |
984 | EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | |
985 | EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; |
986 | req->spec.ether_type = fk.basic.n_proto; |
987 | req->spec.ip_proto = fk.basic.ip_proto; |
988 | |
989 | if (fk.basic.n_proto == htons(ETH_P_IP)) { |
990 | req->spec.rem_host[0] = fk.addrs.v4addrs.src; |
991 | req->spec.loc_host[0] = fk.addrs.v4addrs.dst; |
992 | } else { |
993 | memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src, |
994 | sizeof(struct in6_addr)); |
995 | memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst, |
996 | sizeof(struct in6_addr)); |
997 | } |
998 | |
999 | req->spec.rem_port = fk.ports.src; |
1000 | req->spec.loc_port = fk.ports.dst; |
1001 | |
1002 | if (efx->rps_hash_table) { |
1003 | /* Add it to ARFS hash table */ |
1004 | spin_lock(lock: &efx->rps_hash_lock); |
1005 | rule = efx_rps_hash_add(efx, spec: &req->spec, new: &new); |
1006 | if (!rule) { |
1007 | rc = -ENOMEM; |
1008 | goto out_unlock; |
1009 | } |
1010 | if (new) |
1011 | rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; |
1012 | rc = rule->arfs_id; |
1013 | /* Skip if existing or pending filter already does the right thing */ |
1014 | if (!new && rule->rxq_index == rxq_index && |
1015 | rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) |
1016 | goto out_unlock; |
1017 | rule->rxq_index = rxq_index; |
1018 | rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; |
1019 | spin_unlock(lock: &efx->rps_hash_lock); |
1020 | } else { |
1021 | /* Without an ARFS hash table, we just use arfs_id 0 for all |
1022 | * filters. This means if multiple flows hash to the same |
1023 | * flow_id, all but the most recently touched will be eligible |
1024 | * for expiry. |
1025 | */ |
1026 | rc = 0; |
1027 | } |
1028 | |
1029 | /* Queue the request */ |
1030 | dev_hold(dev: req->net_dev = net_dev); |
1031 | INIT_WORK(&req->work, efx_filter_rfs_work); |
1032 | req->rxq_index = rxq_index; |
1033 | req->flow_id = flow_id; |
1034 | schedule_work(work: &req->work); |
1035 | return rc; |
1036 | out_unlock: |
1037 | spin_unlock(lock: &efx->rps_hash_lock); |
1038 | out_clear: |
1039 | clear_bit(nr: slot_idx, addr: &efx->rps_slot_map); |
1040 | return rc; |
1041 | } |
1042 | |
1043 | bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) |
1044 | { |
1045 | bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); |
1046 | struct efx_nic *efx = channel->efx; |
1047 | unsigned int index, size, start; |
1048 | u32 flow_id; |
1049 | |
1050 | if (!mutex_trylock(lock: &efx->rps_mutex)) |
1051 | return false; |
1052 | expire_one = efx->type->filter_rfs_expire_one; |
1053 | index = channel->rfs_expire_index; |
1054 | start = index; |
1055 | size = efx->type->max_rx_ip_filters; |
1056 | while (quota) { |
1057 | flow_id = channel->rps_flow_id[index]; |
1058 | |
1059 | if (flow_id != RPS_FLOW_ID_INVALID) { |
1060 | quota--; |
1061 | if (expire_one(efx, flow_id, index)) { |
1062 | netif_info(efx, rx_status, efx->net_dev, |
1063 | "expired filter %d [channel %u flow %u]\n" , |
1064 | index, channel->channel, flow_id); |
1065 | channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; |
1066 | channel->rfs_filter_count--; |
1067 | } |
1068 | } |
1069 | if (++index == size) |
1070 | index = 0; |
1071 | /* If we were called with a quota that exceeds the total number |
1072 | * of filters in the table (which shouldn't happen, but could |
1073 | * if two callers race), ensure that we don't loop forever - |
1074 | * stop when we've examined every row of the table. |
1075 | */ |
1076 | if (index == start) |
1077 | break; |
1078 | } |
1079 | |
1080 | channel->rfs_expire_index = index; |
1081 | mutex_unlock(lock: &efx->rps_mutex); |
1082 | return true; |
1083 | } |
1084 | |
1085 | #endif /* CONFIG_RFS_ACCEL */ |
1086 | |