1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2005-2006 Fen Systems Ltd. |
5 | * Copyright 2005-2013 Solarflare Communications Inc. |
6 | */ |
7 | |
8 | #include <linux/socket.h> |
9 | #include <linux/in.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/ip.h> |
12 | #include <linux/ipv6.h> |
13 | #include <linux/tcp.h> |
14 | #include <linux/udp.h> |
15 | #include <linux/prefetch.h> |
16 | #include <linux/moduleparam.h> |
17 | #include <linux/iommu.h> |
18 | #include <net/ip.h> |
19 | #include <net/checksum.h> |
20 | #include <net/xdp.h> |
21 | #include <linux/bpf_trace.h> |
22 | #include "net_driver.h" |
23 | #include "efx.h" |
24 | #include "rx_common.h" |
25 | #include "filter.h" |
26 | #include "nic.h" |
27 | #include "selftest.h" |
28 | #include "workarounds.h" |
29 | |
30 | /* Preferred number of descriptors to fill at once */ |
31 | #define EFX_RX_PREFERRED_BATCH 8U |
32 | |
33 | /* Maximum rx prefix used by any architecture. */ |
34 | #define EFX_MAX_RX_PREFIX_SIZE 16 |
35 | |
36 | /* Size of buffer allocated for skb header area. */ |
37 | #define 128u |
38 | |
39 | /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ |
40 | #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ |
41 | EFX_RX_USR_BUF_SIZE) |
42 | |
43 | static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, |
44 | struct efx_rx_buffer *rx_buf, |
45 | int len) |
46 | { |
47 | struct efx_nic *efx = rx_queue->efx; |
48 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; |
49 | |
50 | if (likely(len <= max_len)) |
51 | return; |
52 | |
53 | /* The packet must be discarded, but this is only a fatal error |
54 | * if the caller indicated it was |
55 | */ |
56 | rx_buf->flags |= EFX_RX_PKT_DISCARD; |
57 | |
58 | if (net_ratelimit()) |
59 | netif_err(efx, rx_err, efx->net_dev, |
60 | "RX queue %d overlength RX event (%#x > %#x)\n" , |
61 | efx_rx_queue_index(rx_queue), len, max_len); |
62 | |
63 | efx_rx_queue_channel(rx_queue)->n_rx_overlength++; |
64 | } |
65 | |
66 | /* Allocate and construct an SKB around page fragments */ |
67 | static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, |
68 | struct efx_rx_buffer *rx_buf, |
69 | unsigned int n_frags, |
70 | u8 *eh, int hdr_len) |
71 | { |
72 | struct efx_nic *efx = channel->efx; |
73 | struct sk_buff *skb; |
74 | |
75 | /* Allocate an SKB to store the headers */ |
76 | skb = netdev_alloc_skb(dev: efx->net_dev, |
77 | length: efx->rx_ip_align + efx->rx_prefix_size + |
78 | hdr_len); |
79 | if (unlikely(skb == NULL)) { |
80 | atomic_inc(v: &efx->n_rx_noskb_drops); |
81 | return NULL; |
82 | } |
83 | |
84 | EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len); |
85 | |
86 | memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size, |
87 | efx->rx_prefix_size + hdr_len); |
88 | skb_reserve(skb, len: efx->rx_ip_align + efx->rx_prefix_size); |
89 | __skb_put(skb, len: hdr_len); |
90 | |
91 | /* Append the remaining page(s) onto the frag list */ |
92 | if (rx_buf->len > hdr_len) { |
93 | rx_buf->page_offset += hdr_len; |
94 | rx_buf->len -= hdr_len; |
95 | |
96 | for (;;) { |
97 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, |
98 | page: rx_buf->page, off: rx_buf->page_offset, |
99 | size: rx_buf->len, truesize: efx->rx_buffer_truesize); |
100 | rx_buf->page = NULL; |
101 | |
102 | if (skb_shinfo(skb)->nr_frags == n_frags) |
103 | break; |
104 | |
105 | rx_buf = efx_rx_buf_next(rx_queue: &channel->rx_queue, rx_buf); |
106 | } |
107 | } else { |
108 | __free_pages(page: rx_buf->page, order: efx->rx_buffer_order); |
109 | rx_buf->page = NULL; |
110 | n_frags = 0; |
111 | } |
112 | |
113 | /* Move past the ethernet header */ |
114 | skb->protocol = eth_type_trans(skb, dev: efx->net_dev); |
115 | |
116 | skb_mark_napi_id(skb, napi: &channel->napi_str); |
117 | |
118 | return skb; |
119 | } |
120 | |
121 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, |
122 | unsigned int n_frags, unsigned int len, u16 flags) |
123 | { |
124 | struct efx_nic *efx = rx_queue->efx; |
125 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); |
126 | struct efx_rx_buffer *rx_buf; |
127 | |
128 | rx_queue->rx_packets++; |
129 | |
130 | rx_buf = efx_rx_buffer(rx_queue, index); |
131 | rx_buf->flags |= flags; |
132 | |
133 | /* Validate the number of fragments and completed length */ |
134 | if (n_frags == 1) { |
135 | if (!(flags & EFX_RX_PKT_PREFIX_LEN)) |
136 | efx_rx_packet__check_len(rx_queue, rx_buf, len); |
137 | } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || |
138 | unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || |
139 | unlikely(len > n_frags * efx->rx_dma_len) || |
140 | unlikely(!efx->rx_scatter)) { |
141 | /* If this isn't an explicit discard request, either |
142 | * the hardware or the driver is broken. |
143 | */ |
144 | WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); |
145 | rx_buf->flags |= EFX_RX_PKT_DISCARD; |
146 | } |
147 | |
148 | netif_vdbg(efx, rx_status, efx->net_dev, |
149 | "RX queue %d received ids %x-%x len %d %s%s\n" , |
150 | efx_rx_queue_index(rx_queue), index, |
151 | (index + n_frags - 1) & rx_queue->ptr_mask, len, |
152 | (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "" , |
153 | (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "" ); |
154 | |
155 | /* Discard packet, if instructed to do so. Process the |
156 | * previous receive first. |
157 | */ |
158 | if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { |
159 | efx_rx_flush_packet(channel); |
160 | efx_discard_rx_packet(channel, rx_buf, n_frags); |
161 | return; |
162 | } |
163 | |
164 | if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN)) |
165 | rx_buf->len = len; |
166 | |
167 | /* Release and/or sync the DMA mapping - assumes all RX buffers |
168 | * consumed in-order per RX queue. |
169 | */ |
170 | efx_sync_rx_buffer(efx, rx_buf, len: rx_buf->len); |
171 | |
172 | /* Prefetch nice and early so data will (hopefully) be in cache by |
173 | * the time we look at it. |
174 | */ |
175 | prefetch(efx_rx_buf_va(rx_buf)); |
176 | |
177 | rx_buf->page_offset += efx->rx_prefix_size; |
178 | rx_buf->len -= efx->rx_prefix_size; |
179 | |
180 | if (n_frags > 1) { |
181 | /* Release/sync DMA mapping for additional fragments. |
182 | * Fix length for last fragment. |
183 | */ |
184 | unsigned int tail_frags = n_frags - 1; |
185 | |
186 | for (;;) { |
187 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); |
188 | if (--tail_frags == 0) |
189 | break; |
190 | efx_sync_rx_buffer(efx, rx_buf, len: efx->rx_dma_len); |
191 | } |
192 | rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; |
193 | efx_sync_rx_buffer(efx, rx_buf, len: rx_buf->len); |
194 | } |
195 | |
196 | /* All fragments have been DMA-synced, so recycle pages. */ |
197 | rx_buf = efx_rx_buffer(rx_queue, index); |
198 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
199 | |
200 | /* Pipeline receives so that we give time for packet headers to be |
201 | * prefetched into cache. |
202 | */ |
203 | efx_rx_flush_packet(channel); |
204 | channel->rx_pkt_n_frags = n_frags; |
205 | channel->rx_pkt_index = index; |
206 | } |
207 | |
208 | static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, |
209 | struct efx_rx_buffer *rx_buf, |
210 | unsigned int n_frags) |
211 | { |
212 | struct sk_buff *skb; |
213 | u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); |
214 | |
215 | skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); |
216 | if (unlikely(skb == NULL)) { |
217 | struct efx_rx_queue *rx_queue; |
218 | |
219 | rx_queue = efx_channel_get_rx_queue(channel); |
220 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: n_frags); |
221 | return; |
222 | } |
223 | skb_record_rx_queue(skb, rx_queue: channel->rx_queue.core_index); |
224 | |
225 | /* Set the SKB flags */ |
226 | skb_checksum_none_assert(skb); |
227 | if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) { |
228 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
229 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
230 | } |
231 | |
232 | efx_rx_skb_attach_timestamp(channel, skb); |
233 | |
234 | if (channel->type->receive_skb) |
235 | if (channel->type->receive_skb(channel, skb)) |
236 | return; |
237 | |
238 | /* Pass the packet up */ |
239 | if (channel->rx_list != NULL) |
240 | /* Add to list, will pass up later */ |
241 | list_add_tail(new: &skb->list, head: channel->rx_list); |
242 | else |
243 | /* No list, so pass it up now */ |
244 | netif_receive_skb(skb); |
245 | } |
246 | |
247 | /** efx_do_xdp: perform XDP processing on a received packet |
248 | * |
249 | * Returns true if packet should still be delivered. |
250 | */ |
251 | static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, |
252 | struct efx_rx_buffer *rx_buf, u8 **ehp) |
253 | { |
254 | u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE]; |
255 | struct efx_rx_queue *rx_queue; |
256 | struct bpf_prog *xdp_prog; |
257 | struct xdp_frame *xdpf; |
258 | struct xdp_buff xdp; |
259 | u32 xdp_act; |
260 | s16 offset; |
261 | int err; |
262 | |
263 | xdp_prog = rcu_dereference_bh(efx->xdp_prog); |
264 | if (!xdp_prog) |
265 | return true; |
266 | |
267 | rx_queue = efx_channel_get_rx_queue(channel); |
268 | |
269 | if (unlikely(channel->rx_pkt_n_frags > 1)) { |
270 | /* We can't do XDP on fragmented packets - drop. */ |
271 | efx_free_rx_buffers(rx_queue, rx_buf, |
272 | num_bufs: channel->rx_pkt_n_frags); |
273 | if (net_ratelimit()) |
274 | netif_err(efx, rx_err, efx->net_dev, |
275 | "XDP is not possible with multiple receive fragments (%d)\n" , |
276 | channel->rx_pkt_n_frags); |
277 | channel->n_rx_xdp_bad_drops++; |
278 | return false; |
279 | } |
280 | |
281 | dma_sync_single_for_cpu(dev: &efx->pci_dev->dev, addr: rx_buf->dma_addr, |
282 | size: rx_buf->len, dir: DMA_FROM_DEVICE); |
283 | |
284 | /* Save the rx prefix. */ |
285 | EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE); |
286 | memcpy(rx_prefix, *ehp - efx->rx_prefix_size, |
287 | efx->rx_prefix_size); |
288 | |
289 | xdp_init_buff(xdp: &xdp, frame_sz: efx->rx_page_buf_step, rxq: &rx_queue->xdp_rxq_info); |
290 | /* No support yet for XDP metadata */ |
291 | xdp_prepare_buff(xdp: &xdp, hard_start: *ehp - EFX_XDP_HEADROOM, EFX_XDP_HEADROOM, |
292 | data_len: rx_buf->len, meta_valid: false); |
293 | |
294 | xdp_act = bpf_prog_run_xdp(prog: xdp_prog, xdp: &xdp); |
295 | |
296 | offset = (u8 *)xdp.data - *ehp; |
297 | |
298 | switch (xdp_act) { |
299 | case XDP_PASS: |
300 | /* Fix up rx prefix. */ |
301 | if (offset) { |
302 | *ehp += offset; |
303 | rx_buf->page_offset += offset; |
304 | rx_buf->len -= offset; |
305 | memcpy(*ehp - efx->rx_prefix_size, rx_prefix, |
306 | efx->rx_prefix_size); |
307 | } |
308 | break; |
309 | |
310 | case XDP_TX: |
311 | /* Buffer ownership passes to tx on success. */ |
312 | xdpf = xdp_convert_buff_to_frame(xdp: &xdp); |
313 | err = efx_xdp_tx_buffers(efx, n: 1, xdpfs: &xdpf, flush: true); |
314 | if (unlikely(err != 1)) { |
315 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
316 | if (net_ratelimit()) |
317 | netif_err(efx, rx_err, efx->net_dev, |
318 | "XDP TX failed (%d)\n" , err); |
319 | channel->n_rx_xdp_bad_drops++; |
320 | trace_xdp_exception(dev: efx->net_dev, xdp: xdp_prog, act: xdp_act); |
321 | } else { |
322 | channel->n_rx_xdp_tx++; |
323 | } |
324 | break; |
325 | |
326 | case XDP_REDIRECT: |
327 | err = xdp_do_redirect(dev: efx->net_dev, xdp: &xdp, prog: xdp_prog); |
328 | if (unlikely(err)) { |
329 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
330 | if (net_ratelimit()) |
331 | netif_err(efx, rx_err, efx->net_dev, |
332 | "XDP redirect failed (%d)\n" , err); |
333 | channel->n_rx_xdp_bad_drops++; |
334 | trace_xdp_exception(dev: efx->net_dev, xdp: xdp_prog, act: xdp_act); |
335 | } else { |
336 | channel->n_rx_xdp_redirect++; |
337 | } |
338 | break; |
339 | |
340 | default: |
341 | bpf_warn_invalid_xdp_action(dev: efx->net_dev, prog: xdp_prog, act: xdp_act); |
342 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
343 | channel->n_rx_xdp_bad_drops++; |
344 | trace_xdp_exception(dev: efx->net_dev, xdp: xdp_prog, act: xdp_act); |
345 | break; |
346 | |
347 | case XDP_ABORTED: |
348 | trace_xdp_exception(dev: efx->net_dev, xdp: xdp_prog, act: xdp_act); |
349 | fallthrough; |
350 | case XDP_DROP: |
351 | efx_free_rx_buffers(rx_queue, rx_buf, num_bufs: 1); |
352 | channel->n_rx_xdp_drops++; |
353 | break; |
354 | } |
355 | |
356 | return xdp_act == XDP_PASS; |
357 | } |
358 | |
359 | /* Handle a received packet. Second half: Touches packet payload. */ |
360 | void __efx_rx_packet(struct efx_channel *channel) |
361 | { |
362 | struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); |
363 | struct efx_nic *efx = channel->efx; |
364 | struct efx_rx_buffer *rx_buf = |
365 | efx_rx_buffer(rx_queue, index: channel->rx_pkt_index); |
366 | u8 *eh = efx_rx_buf_va(buf: rx_buf); |
367 | |
368 | /* Read length from the prefix if necessary. This already |
369 | * excludes the length of the prefix itself. |
370 | */ |
371 | if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) { |
372 | rx_buf->len = le16_to_cpup(p: (__le16 *) |
373 | (eh + efx->rx_packet_len_offset)); |
374 | /* A known issue may prevent this being filled in; |
375 | * if that happens, just drop the packet. |
376 | * Must do that in the driver since passing a zero-length |
377 | * packet up to the stack may cause a crash. |
378 | */ |
379 | if (unlikely(!rx_buf->len)) { |
380 | efx_free_rx_buffers(rx_queue, rx_buf, |
381 | num_bufs: channel->rx_pkt_n_frags); |
382 | channel->n_rx_frm_trunc++; |
383 | goto out; |
384 | } |
385 | } |
386 | |
387 | /* If we're in loopback test, then pass the packet directly to the |
388 | * loopback layer, and free the rx_buf here |
389 | */ |
390 | if (unlikely(efx->loopback_selftest)) { |
391 | efx_loopback_rx_packet(efx, buf_ptr: eh, pkt_len: rx_buf->len); |
392 | efx_free_rx_buffers(rx_queue, rx_buf, |
393 | num_bufs: channel->rx_pkt_n_frags); |
394 | goto out; |
395 | } |
396 | |
397 | if (!efx_do_xdp(efx, channel, rx_buf, ehp: &eh)) |
398 | goto out; |
399 | |
400 | if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) |
401 | rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; |
402 | |
403 | if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) |
404 | efx_rx_packet_gro(channel, rx_buf, n_frags: channel->rx_pkt_n_frags, eh, csum: 0); |
405 | else |
406 | efx_rx_deliver(channel, eh, rx_buf, n_frags: channel->rx_pkt_n_frags); |
407 | out: |
408 | channel->rx_pkt_n_frags = 0; |
409 | } |
410 | |