1 | // SPDX-License-Identifier: (GPL-2.0 OR MIT) |
2 | /* Google virtual Ethernet (gve) driver |
3 | * |
4 | * Copyright (C) 2015-2021 Google, Inc. |
5 | */ |
6 | |
7 | #include "gve.h" |
8 | #include "gve_dqo.h" |
9 | #include "gve_adminq.h" |
10 | #include "gve_utils.h" |
11 | #include <linux/ip.h> |
12 | #include <linux/ipv6.h> |
13 | #include <linux/skbuff.h> |
14 | #include <linux/slab.h> |
15 | #include <net/ip6_checksum.h> |
16 | #include <net/ipv6.h> |
17 | #include <net/tcp.h> |
18 | |
19 | static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) |
20 | { |
21 | return page_count(page: bs->page_info.page) - bs->page_info.pagecnt_bias; |
22 | } |
23 | |
24 | static void gve_free_page_dqo(struct gve_priv *priv, |
25 | struct gve_rx_buf_state_dqo *bs, |
26 | bool free_page) |
27 | { |
28 | page_ref_sub(page: bs->page_info.page, nr: bs->page_info.pagecnt_bias - 1); |
29 | if (free_page) |
30 | gve_free_page(dev: &priv->pdev->dev, page: bs->page_info.page, dma: bs->addr, |
31 | DMA_FROM_DEVICE); |
32 | bs->page_info.page = NULL; |
33 | } |
34 | |
35 | static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) |
36 | { |
37 | struct gve_rx_buf_state_dqo *buf_state; |
38 | s16 buffer_id; |
39 | |
40 | buffer_id = rx->dqo.free_buf_states; |
41 | if (unlikely(buffer_id == -1)) |
42 | return NULL; |
43 | |
44 | buf_state = &rx->dqo.buf_states[buffer_id]; |
45 | |
46 | /* Remove buf_state from free list */ |
47 | rx->dqo.free_buf_states = buf_state->next; |
48 | |
49 | /* Point buf_state to itself to mark it as allocated */ |
50 | buf_state->next = buffer_id; |
51 | |
52 | return buf_state; |
53 | } |
54 | |
55 | static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, |
56 | struct gve_rx_buf_state_dqo *buf_state) |
57 | { |
58 | s16 buffer_id = buf_state - rx->dqo.buf_states; |
59 | |
60 | return buf_state->next == buffer_id; |
61 | } |
62 | |
63 | static void gve_free_buf_state(struct gve_rx_ring *rx, |
64 | struct gve_rx_buf_state_dqo *buf_state) |
65 | { |
66 | s16 buffer_id = buf_state - rx->dqo.buf_states; |
67 | |
68 | buf_state->next = rx->dqo.free_buf_states; |
69 | rx->dqo.free_buf_states = buffer_id; |
70 | } |
71 | |
72 | static struct gve_rx_buf_state_dqo * |
73 | gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) |
74 | { |
75 | struct gve_rx_buf_state_dqo *buf_state; |
76 | s16 buffer_id; |
77 | |
78 | buffer_id = list->head; |
79 | if (unlikely(buffer_id == -1)) |
80 | return NULL; |
81 | |
82 | buf_state = &rx->dqo.buf_states[buffer_id]; |
83 | |
84 | /* Remove buf_state from list */ |
85 | list->head = buf_state->next; |
86 | if (buf_state->next == -1) |
87 | list->tail = -1; |
88 | |
89 | /* Point buf_state to itself to mark it as allocated */ |
90 | buf_state->next = buffer_id; |
91 | |
92 | return buf_state; |
93 | } |
94 | |
95 | static void gve_enqueue_buf_state(struct gve_rx_ring *rx, |
96 | struct gve_index_list *list, |
97 | struct gve_rx_buf_state_dqo *buf_state) |
98 | { |
99 | s16 buffer_id = buf_state - rx->dqo.buf_states; |
100 | |
101 | buf_state->next = -1; |
102 | |
103 | if (list->head == -1) { |
104 | list->head = buffer_id; |
105 | list->tail = buffer_id; |
106 | } else { |
107 | int tail = list->tail; |
108 | |
109 | rx->dqo.buf_states[tail].next = buffer_id; |
110 | list->tail = buffer_id; |
111 | } |
112 | } |
113 | |
114 | static struct gve_rx_buf_state_dqo * |
115 | gve_get_recycled_buf_state(struct gve_rx_ring *rx) |
116 | { |
117 | struct gve_rx_buf_state_dqo *buf_state; |
118 | int i; |
119 | |
120 | /* Recycled buf states are immediately usable. */ |
121 | buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.recycled_buf_states); |
122 | if (likely(buf_state)) |
123 | return buf_state; |
124 | |
125 | if (unlikely(rx->dqo.used_buf_states.head == -1)) |
126 | return NULL; |
127 | |
128 | /* Used buf states are only usable when ref count reaches 0, which means |
129 | * no SKBs refer to them. |
130 | * |
131 | * Search a limited number before giving up. |
132 | */ |
133 | for (i = 0; i < 5; i++) { |
134 | buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.used_buf_states); |
135 | if (gve_buf_ref_cnt(bs: buf_state) == 0) { |
136 | rx->dqo.used_buf_states_cnt--; |
137 | return buf_state; |
138 | } |
139 | |
140 | gve_enqueue_buf_state(rx, list: &rx->dqo.used_buf_states, buf_state); |
141 | } |
142 | |
143 | /* For QPL, we cannot allocate any new buffers and must |
144 | * wait for the existing ones to be available. |
145 | */ |
146 | if (rx->dqo.qpl) |
147 | return NULL; |
148 | |
149 | /* If there are no free buf states discard an entry from |
150 | * `used_buf_states` so it can be used. |
151 | */ |
152 | if (unlikely(rx->dqo.free_buf_states == -1)) { |
153 | buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.used_buf_states); |
154 | if (gve_buf_ref_cnt(bs: buf_state) == 0) |
155 | return buf_state; |
156 | |
157 | gve_free_page_dqo(priv: rx->gve, bs: buf_state, free_page: true); |
158 | gve_free_buf_state(rx, buf_state); |
159 | } |
160 | |
161 | return NULL; |
162 | } |
163 | |
164 | static int gve_alloc_page_dqo(struct gve_rx_ring *rx, |
165 | struct gve_rx_buf_state_dqo *buf_state) |
166 | { |
167 | struct gve_priv *priv = rx->gve; |
168 | u32 idx; |
169 | |
170 | if (!rx->dqo.qpl) { |
171 | int err; |
172 | |
173 | err = gve_alloc_page(priv, dev: &priv->pdev->dev, |
174 | page: &buf_state->page_info.page, |
175 | dma: &buf_state->addr, |
176 | DMA_FROM_DEVICE, GFP_ATOMIC); |
177 | if (err) |
178 | return err; |
179 | } else { |
180 | idx = rx->dqo.next_qpl_page_idx; |
181 | if (idx >= priv->rx_pages_per_qpl) { |
182 | net_err_ratelimited("%s: Out of QPL pages\n" , |
183 | priv->dev->name); |
184 | return -ENOMEM; |
185 | } |
186 | buf_state->page_info.page = rx->dqo.qpl->pages[idx]; |
187 | buf_state->addr = rx->dqo.qpl->page_buses[idx]; |
188 | rx->dqo.next_qpl_page_idx++; |
189 | } |
190 | buf_state->page_info.page_offset = 0; |
191 | buf_state->page_info.page_address = |
192 | page_address(buf_state->page_info.page); |
193 | buf_state->last_single_ref_offset = 0; |
194 | |
195 | /* The page already has 1 ref. */ |
196 | page_ref_add(page: buf_state->page_info.page, INT_MAX - 1); |
197 | buf_state->page_info.pagecnt_bias = INT_MAX; |
198 | |
199 | return 0; |
200 | } |
201 | |
202 | static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) |
203 | { |
204 | struct device *hdev = &priv->pdev->dev; |
205 | int buf_count = rx->dqo.bufq.mask + 1; |
206 | |
207 | if (rx->dqo.hdr_bufs.data) { |
208 | dma_free_coherent(dev: hdev, size: priv->header_buf_size * buf_count, |
209 | cpu_addr: rx->dqo.hdr_bufs.data, dma_handle: rx->dqo.hdr_bufs.addr); |
210 | rx->dqo.hdr_bufs.data = NULL; |
211 | } |
212 | } |
213 | |
214 | void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) |
215 | { |
216 | int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx); |
217 | |
218 | if (!gve_rx_was_added_to_block(priv, queue_idx: idx)) |
219 | return; |
220 | |
221 | gve_remove_napi(priv, ntfy_idx); |
222 | gve_rx_remove_from_block(priv, queue_idx: idx); |
223 | } |
224 | |
225 | static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, |
226 | struct gve_rx_alloc_rings_cfg *cfg) |
227 | { |
228 | struct device *hdev = &priv->pdev->dev; |
229 | size_t completion_queue_slots; |
230 | size_t buffer_queue_slots; |
231 | int idx = rx->q_num; |
232 | size_t size; |
233 | int i; |
234 | |
235 | completion_queue_slots = rx->dqo.complq.mask + 1; |
236 | buffer_queue_slots = rx->dqo.bufq.mask + 1; |
237 | |
238 | if (rx->q_resources) { |
239 | dma_free_coherent(dev: hdev, size: sizeof(*rx->q_resources), |
240 | cpu_addr: rx->q_resources, dma_handle: rx->q_resources_bus); |
241 | rx->q_resources = NULL; |
242 | } |
243 | |
244 | for (i = 0; i < rx->dqo.num_buf_states; i++) { |
245 | struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; |
246 | /* Only free page for RDA. QPL pages are freed in gve_main. */ |
247 | if (bs->page_info.page) |
248 | gve_free_page_dqo(priv, bs, free_page: !rx->dqo.qpl); |
249 | } |
250 | if (rx->dqo.qpl) { |
251 | gve_unassign_qpl(qpl_cfg: cfg->qpl_cfg, id: rx->dqo.qpl->id); |
252 | rx->dqo.qpl = NULL; |
253 | } |
254 | |
255 | if (rx->dqo.bufq.desc_ring) { |
256 | size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; |
257 | dma_free_coherent(dev: hdev, size, cpu_addr: rx->dqo.bufq.desc_ring, |
258 | dma_handle: rx->dqo.bufq.bus); |
259 | rx->dqo.bufq.desc_ring = NULL; |
260 | } |
261 | |
262 | if (rx->dqo.complq.desc_ring) { |
263 | size = sizeof(rx->dqo.complq.desc_ring[0]) * |
264 | completion_queue_slots; |
265 | dma_free_coherent(dev: hdev, size, cpu_addr: rx->dqo.complq.desc_ring, |
266 | dma_handle: rx->dqo.complq.bus); |
267 | rx->dqo.complq.desc_ring = NULL; |
268 | } |
269 | |
270 | kvfree(addr: rx->dqo.buf_states); |
271 | rx->dqo.buf_states = NULL; |
272 | |
273 | gve_rx_free_hdr_bufs(priv, rx); |
274 | |
275 | netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n" , idx); |
276 | } |
277 | |
278 | static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) |
279 | { |
280 | struct device *hdev = &priv->pdev->dev; |
281 | int buf_count = rx->dqo.bufq.mask + 1; |
282 | |
283 | rx->dqo.hdr_bufs.data = dma_alloc_coherent(dev: hdev, size: priv->header_buf_size * buf_count, |
284 | dma_handle: &rx->dqo.hdr_bufs.addr, GFP_KERNEL); |
285 | if (!rx->dqo.hdr_bufs.data) |
286 | return -ENOMEM; |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) |
292 | { |
293 | int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx); |
294 | |
295 | gve_rx_add_to_block(priv, queue_idx: idx); |
296 | gve_add_napi(priv, ntfy_idx, gve_poll: gve_napi_poll_dqo); |
297 | } |
298 | |
299 | static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, |
300 | struct gve_rx_alloc_rings_cfg *cfg, |
301 | struct gve_rx_ring *rx, |
302 | int idx) |
303 | { |
304 | struct device *hdev = &priv->pdev->dev; |
305 | size_t size; |
306 | int i; |
307 | |
308 | const u32 buffer_queue_slots = cfg->raw_addressing ? |
309 | priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size; |
310 | const u32 completion_queue_slots = cfg->ring_size; |
311 | |
312 | netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n" ); |
313 | |
314 | memset(rx, 0, sizeof(*rx)); |
315 | rx->gve = priv; |
316 | rx->q_num = idx; |
317 | rx->dqo.bufq.mask = buffer_queue_slots - 1; |
318 | rx->dqo.complq.num_free_slots = completion_queue_slots; |
319 | rx->dqo.complq.mask = completion_queue_slots - 1; |
320 | rx->ctx.skb_head = NULL; |
321 | rx->ctx.skb_tail = NULL; |
322 | |
323 | rx->dqo.num_buf_states = cfg->raw_addressing ? |
324 | min_t(s16, S16_MAX, buffer_queue_slots * 4) : |
325 | priv->rx_pages_per_qpl; |
326 | rx->dqo.buf_states = kvcalloc(n: rx->dqo.num_buf_states, |
327 | size: sizeof(rx->dqo.buf_states[0]), |
328 | GFP_KERNEL); |
329 | if (!rx->dqo.buf_states) |
330 | return -ENOMEM; |
331 | |
332 | /* Allocate header buffers for header-split */ |
333 | if (cfg->enable_header_split) |
334 | if (gve_rx_alloc_hdr_bufs(priv, rx)) |
335 | goto err; |
336 | |
337 | /* Set up linked list of buffer IDs */ |
338 | for (i = 0; i < rx->dqo.num_buf_states - 1; i++) |
339 | rx->dqo.buf_states[i].next = i + 1; |
340 | |
341 | rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; |
342 | rx->dqo.recycled_buf_states.head = -1; |
343 | rx->dqo.recycled_buf_states.tail = -1; |
344 | rx->dqo.used_buf_states.head = -1; |
345 | rx->dqo.used_buf_states.tail = -1; |
346 | |
347 | /* Allocate RX completion queue */ |
348 | size = sizeof(rx->dqo.complq.desc_ring[0]) * |
349 | completion_queue_slots; |
350 | rx->dqo.complq.desc_ring = |
351 | dma_alloc_coherent(dev: hdev, size, dma_handle: &rx->dqo.complq.bus, GFP_KERNEL); |
352 | if (!rx->dqo.complq.desc_ring) |
353 | goto err; |
354 | |
355 | /* Allocate RX buffer queue */ |
356 | size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; |
357 | rx->dqo.bufq.desc_ring = |
358 | dma_alloc_coherent(dev: hdev, size, dma_handle: &rx->dqo.bufq.bus, GFP_KERNEL); |
359 | if (!rx->dqo.bufq.desc_ring) |
360 | goto err; |
361 | |
362 | if (!cfg->raw_addressing) { |
363 | rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx_qid: rx->q_num); |
364 | if (!rx->dqo.qpl) |
365 | goto err; |
366 | rx->dqo.next_qpl_page_idx = 0; |
367 | } |
368 | |
369 | rx->q_resources = dma_alloc_coherent(dev: hdev, size: sizeof(*rx->q_resources), |
370 | dma_handle: &rx->q_resources_bus, GFP_KERNEL); |
371 | if (!rx->q_resources) |
372 | goto err; |
373 | |
374 | return 0; |
375 | |
376 | err: |
377 | gve_rx_free_ring_dqo(priv, rx, cfg); |
378 | return -ENOMEM; |
379 | } |
380 | |
381 | void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) |
382 | { |
383 | const struct gve_rx_ring *rx = &priv->rx[queue_idx]; |
384 | u64 index = be32_to_cpu(rx->q_resources->db_index); |
385 | |
386 | iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); |
387 | } |
388 | |
389 | int gve_rx_alloc_rings_dqo(struct gve_priv *priv, |
390 | struct gve_rx_alloc_rings_cfg *cfg) |
391 | { |
392 | struct gve_rx_ring *rx; |
393 | int err; |
394 | int i; |
395 | |
396 | if (!cfg->raw_addressing && !cfg->qpls) { |
397 | netif_err(priv, drv, priv->dev, |
398 | "Cannot alloc QPL ring before allocing QPLs\n" ); |
399 | return -EINVAL; |
400 | } |
401 | |
402 | rx = kvcalloc(n: cfg->qcfg->max_queues, size: sizeof(struct gve_rx_ring), |
403 | GFP_KERNEL); |
404 | if (!rx) |
405 | return -ENOMEM; |
406 | |
407 | for (i = 0; i < cfg->qcfg->num_queues; i++) { |
408 | err = gve_rx_alloc_ring_dqo(priv, cfg, rx: &rx[i], idx: i); |
409 | if (err) { |
410 | netif_err(priv, drv, priv->dev, |
411 | "Failed to alloc rx ring=%d: err=%d\n" , |
412 | i, err); |
413 | goto err; |
414 | } |
415 | } |
416 | |
417 | cfg->rx = rx; |
418 | return 0; |
419 | |
420 | err: |
421 | for (i--; i >= 0; i--) |
422 | gve_rx_free_ring_dqo(priv, rx: &rx[i], cfg); |
423 | kvfree(addr: rx); |
424 | return err; |
425 | } |
426 | |
427 | void gve_rx_free_rings_dqo(struct gve_priv *priv, |
428 | struct gve_rx_alloc_rings_cfg *cfg) |
429 | { |
430 | struct gve_rx_ring *rx = cfg->rx; |
431 | int i; |
432 | |
433 | if (!rx) |
434 | return; |
435 | |
436 | for (i = 0; i < cfg->qcfg->num_queues; i++) |
437 | gve_rx_free_ring_dqo(priv, rx: &rx[i], cfg); |
438 | |
439 | kvfree(addr: rx); |
440 | cfg->rx = NULL; |
441 | } |
442 | |
443 | void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) |
444 | { |
445 | struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; |
446 | struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; |
447 | struct gve_priv *priv = rx->gve; |
448 | u32 num_avail_slots; |
449 | u32 num_full_slots; |
450 | u32 num_posted = 0; |
451 | |
452 | num_full_slots = (bufq->tail - bufq->head) & bufq->mask; |
453 | num_avail_slots = bufq->mask - num_full_slots; |
454 | |
455 | num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); |
456 | while (num_posted < num_avail_slots) { |
457 | struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; |
458 | struct gve_rx_buf_state_dqo *buf_state; |
459 | |
460 | buf_state = gve_get_recycled_buf_state(rx); |
461 | if (unlikely(!buf_state)) { |
462 | buf_state = gve_alloc_buf_state(rx); |
463 | if (unlikely(!buf_state)) |
464 | break; |
465 | |
466 | if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { |
467 | u64_stats_update_begin(syncp: &rx->statss); |
468 | rx->rx_buf_alloc_fail++; |
469 | u64_stats_update_end(syncp: &rx->statss); |
470 | gve_free_buf_state(rx, buf_state); |
471 | break; |
472 | } |
473 | } |
474 | |
475 | desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); |
476 | desc->buf_addr = cpu_to_le64(buf_state->addr + |
477 | buf_state->page_info.page_offset); |
478 | if (rx->dqo.hdr_bufs.data) |
479 | desc->header_buf_addr = |
480 | cpu_to_le64(rx->dqo.hdr_bufs.addr + |
481 | priv->header_buf_size * bufq->tail); |
482 | |
483 | bufq->tail = (bufq->tail + 1) & bufq->mask; |
484 | complq->num_free_slots--; |
485 | num_posted++; |
486 | |
487 | if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) |
488 | gve_rx_write_doorbell_dqo(priv, queue_idx: rx->q_num); |
489 | } |
490 | |
491 | rx->fill_cnt += num_posted; |
492 | } |
493 | |
494 | static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, |
495 | struct gve_rx_buf_state_dqo *buf_state) |
496 | { |
497 | const u16 data_buffer_size = priv->data_buffer_size_dqo; |
498 | int pagecount; |
499 | |
500 | /* Can't reuse if we only fit one buffer per page */ |
501 | if (data_buffer_size * 2 > PAGE_SIZE) |
502 | goto mark_used; |
503 | |
504 | pagecount = gve_buf_ref_cnt(bs: buf_state); |
505 | |
506 | /* Record the offset when we have a single remaining reference. |
507 | * |
508 | * When this happens, we know all of the other offsets of the page are |
509 | * usable. |
510 | */ |
511 | if (pagecount == 1) { |
512 | buf_state->last_single_ref_offset = |
513 | buf_state->page_info.page_offset; |
514 | } |
515 | |
516 | /* Use the next buffer sized chunk in the page. */ |
517 | buf_state->page_info.page_offset += data_buffer_size; |
518 | buf_state->page_info.page_offset &= (PAGE_SIZE - 1); |
519 | |
520 | /* If we wrap around to the same offset without ever dropping to 1 |
521 | * reference, then we don't know if this offset was ever freed. |
522 | */ |
523 | if (buf_state->page_info.page_offset == |
524 | buf_state->last_single_ref_offset) { |
525 | goto mark_used; |
526 | } |
527 | |
528 | gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state); |
529 | return; |
530 | |
531 | mark_used: |
532 | gve_enqueue_buf_state(rx, list: &rx->dqo.used_buf_states, buf_state); |
533 | rx->dqo.used_buf_states_cnt++; |
534 | } |
535 | |
536 | static void gve_rx_skb_csum(struct sk_buff *skb, |
537 | const struct gve_rx_compl_desc_dqo *desc, |
538 | struct gve_ptype ptype) |
539 | { |
540 | skb->ip_summed = CHECKSUM_NONE; |
541 | |
542 | /* HW did not identify and process L3 and L4 headers. */ |
543 | if (unlikely(!desc->l3_l4_processed)) |
544 | return; |
545 | |
546 | if (ptype.l3_type == GVE_L3_TYPE_IPV4) { |
547 | if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) |
548 | return; |
549 | } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { |
550 | /* Checksum should be skipped if this flag is set. */ |
551 | if (unlikely(desc->ipv6_ex_add)) |
552 | return; |
553 | } |
554 | |
555 | if (unlikely(desc->csum_l4_err)) |
556 | return; |
557 | |
558 | switch (ptype.l4_type) { |
559 | case GVE_L4_TYPE_TCP: |
560 | case GVE_L4_TYPE_UDP: |
561 | case GVE_L4_TYPE_ICMP: |
562 | case GVE_L4_TYPE_SCTP: |
563 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
564 | break; |
565 | default: |
566 | break; |
567 | } |
568 | } |
569 | |
570 | static void gve_rx_skb_hash(struct sk_buff *skb, |
571 | const struct gve_rx_compl_desc_dqo *compl_desc, |
572 | struct gve_ptype ptype) |
573 | { |
574 | enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; |
575 | |
576 | if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) |
577 | hash_type = PKT_HASH_TYPE_L4; |
578 | else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) |
579 | hash_type = PKT_HASH_TYPE_L3; |
580 | |
581 | skb_set_hash(skb, le32_to_cpu(compl_desc->hash), type: hash_type); |
582 | } |
583 | |
584 | static void gve_rx_free_skb(struct gve_rx_ring *rx) |
585 | { |
586 | if (!rx->ctx.skb_head) |
587 | return; |
588 | |
589 | dev_kfree_skb_any(skb: rx->ctx.skb_head); |
590 | rx->ctx.skb_head = NULL; |
591 | rx->ctx.skb_tail = NULL; |
592 | } |
593 | |
594 | static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) |
595 | { |
596 | if (!rx->dqo.qpl) |
597 | return false; |
598 | if (rx->dqo.used_buf_states_cnt < |
599 | (rx->dqo.num_buf_states - |
600 | GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) |
601 | return false; |
602 | return true; |
603 | } |
604 | |
605 | static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, |
606 | struct gve_rx_buf_state_dqo *buf_state, |
607 | u16 buf_len) |
608 | { |
609 | struct page *page = alloc_page(GFP_ATOMIC); |
610 | int num_frags; |
611 | |
612 | if (!page) |
613 | return -ENOMEM; |
614 | |
615 | memcpy(page_address(page), |
616 | buf_state->page_info.page_address + |
617 | buf_state->page_info.page_offset, |
618 | buf_len); |
619 | num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; |
620 | skb_add_rx_frag(skb: rx->ctx.skb_tail, i: num_frags, page, |
621 | off: 0, size: buf_len, PAGE_SIZE); |
622 | |
623 | u64_stats_update_begin(syncp: &rx->statss); |
624 | rx->rx_frag_alloc_cnt++; |
625 | u64_stats_update_end(syncp: &rx->statss); |
626 | /* Return unused buffer. */ |
627 | gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state); |
628 | return 0; |
629 | } |
630 | |
631 | /* Chains multi skbs for single rx packet. |
632 | * Returns 0 if buffer is appended, -1 otherwise. |
633 | */ |
634 | static int gve_rx_append_frags(struct napi_struct *napi, |
635 | struct gve_rx_buf_state_dqo *buf_state, |
636 | u16 buf_len, struct gve_rx_ring *rx, |
637 | struct gve_priv *priv) |
638 | { |
639 | int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; |
640 | |
641 | if (unlikely(num_frags == MAX_SKB_FRAGS)) { |
642 | struct sk_buff *skb; |
643 | |
644 | skb = napi_alloc_skb(napi, length: 0); |
645 | if (!skb) |
646 | return -1; |
647 | |
648 | if (rx->ctx.skb_tail == rx->ctx.skb_head) |
649 | skb_shinfo(rx->ctx.skb_head)->frag_list = skb; |
650 | else |
651 | rx->ctx.skb_tail->next = skb; |
652 | rx->ctx.skb_tail = skb; |
653 | num_frags = 0; |
654 | } |
655 | if (rx->ctx.skb_tail != rx->ctx.skb_head) { |
656 | rx->ctx.skb_head->len += buf_len; |
657 | rx->ctx.skb_head->data_len += buf_len; |
658 | rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; |
659 | } |
660 | |
661 | /* Trigger ondemand page allocation if we are running low on buffers */ |
662 | if (gve_rx_should_trigger_copy_ondemand(rx)) |
663 | return gve_rx_copy_ondemand(rx, buf_state, buf_len); |
664 | |
665 | skb_add_rx_frag(skb: rx->ctx.skb_tail, i: num_frags, |
666 | page: buf_state->page_info.page, |
667 | off: buf_state->page_info.page_offset, |
668 | size: buf_len, truesize: priv->data_buffer_size_dqo); |
669 | gve_dec_pagecnt_bias(page_info: &buf_state->page_info); |
670 | |
671 | /* Advances buffer page-offset if page is partially used. |
672 | * Marks buffer as used if page is full. |
673 | */ |
674 | gve_try_recycle_buf(priv, rx, buf_state); |
675 | return 0; |
676 | } |
677 | |
678 | /* Returns 0 if descriptor is completed successfully. |
679 | * Returns -EINVAL if descriptor is invalid. |
680 | * Returns -ENOMEM if data cannot be copied to skb. |
681 | */ |
682 | static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, |
683 | const struct gve_rx_compl_desc_dqo *compl_desc, |
684 | u32 desc_idx, int queue_idx) |
685 | { |
686 | const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); |
687 | const bool hbo = compl_desc->header_buffer_overflow; |
688 | const bool eop = compl_desc->end_of_packet != 0; |
689 | const bool hsplit = compl_desc->split_header; |
690 | struct gve_rx_buf_state_dqo *buf_state; |
691 | struct gve_priv *priv = rx->gve; |
692 | u16 buf_len; |
693 | u16 hdr_len; |
694 | |
695 | if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { |
696 | net_err_ratelimited("%s: Invalid RX buffer_id=%u\n" , |
697 | priv->dev->name, buffer_id); |
698 | return -EINVAL; |
699 | } |
700 | buf_state = &rx->dqo.buf_states[buffer_id]; |
701 | if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { |
702 | net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n" , |
703 | priv->dev->name, buffer_id); |
704 | return -EINVAL; |
705 | } |
706 | |
707 | if (unlikely(compl_desc->rx_error)) { |
708 | gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, |
709 | buf_state); |
710 | return -EINVAL; |
711 | } |
712 | |
713 | buf_len = compl_desc->packet_len; |
714 | hdr_len = compl_desc->header_len; |
715 | |
716 | /* Page might have not been used for awhile and was likely last written |
717 | * by a different thread. |
718 | */ |
719 | prefetch(buf_state->page_info.page); |
720 | |
721 | /* Copy the header into the skb in the case of header split */ |
722 | if (hsplit) { |
723 | int unsplit = 0; |
724 | |
725 | if (hdr_len && !hbo) { |
726 | rx->ctx.skb_head = gve_rx_copy_data(dev: priv->dev, napi, |
727 | data: rx->dqo.hdr_bufs.data + |
728 | desc_idx * priv->header_buf_size, |
729 | len: hdr_len); |
730 | if (unlikely(!rx->ctx.skb_head)) |
731 | goto error; |
732 | rx->ctx.skb_tail = rx->ctx.skb_head; |
733 | } else { |
734 | unsplit = 1; |
735 | } |
736 | u64_stats_update_begin(syncp: &rx->statss); |
737 | rx->rx_hsplit_pkt++; |
738 | rx->rx_hsplit_unsplit_pkt += unsplit; |
739 | rx->rx_hsplit_bytes += hdr_len; |
740 | u64_stats_update_end(syncp: &rx->statss); |
741 | } |
742 | |
743 | /* Sync the portion of dma buffer for CPU to read. */ |
744 | dma_sync_single_range_for_cpu(dev: &priv->pdev->dev, addr: buf_state->addr, |
745 | offset: buf_state->page_info.page_offset, |
746 | size: buf_len, dir: DMA_FROM_DEVICE); |
747 | |
748 | /* Append to current skb if one exists. */ |
749 | if (rx->ctx.skb_head) { |
750 | if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, |
751 | priv)) != 0) { |
752 | goto error; |
753 | } |
754 | return 0; |
755 | } |
756 | |
757 | if (eop && buf_len <= priv->rx_copybreak) { |
758 | rx->ctx.skb_head = gve_rx_copy(dev: priv->dev, napi, |
759 | page_info: &buf_state->page_info, len: buf_len); |
760 | if (unlikely(!rx->ctx.skb_head)) |
761 | goto error; |
762 | rx->ctx.skb_tail = rx->ctx.skb_head; |
763 | |
764 | u64_stats_update_begin(syncp: &rx->statss); |
765 | rx->rx_copied_pkt++; |
766 | rx->rx_copybreak_pkt++; |
767 | u64_stats_update_end(syncp: &rx->statss); |
768 | |
769 | gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, |
770 | buf_state); |
771 | return 0; |
772 | } |
773 | |
774 | rx->ctx.skb_head = napi_get_frags(napi); |
775 | if (unlikely(!rx->ctx.skb_head)) |
776 | goto error; |
777 | rx->ctx.skb_tail = rx->ctx.skb_head; |
778 | |
779 | if (gve_rx_should_trigger_copy_ondemand(rx)) { |
780 | if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) |
781 | goto error; |
782 | return 0; |
783 | } |
784 | |
785 | skb_add_rx_frag(skb: rx->ctx.skb_head, i: 0, page: buf_state->page_info.page, |
786 | off: buf_state->page_info.page_offset, size: buf_len, |
787 | truesize: priv->data_buffer_size_dqo); |
788 | gve_dec_pagecnt_bias(page_info: &buf_state->page_info); |
789 | |
790 | gve_try_recycle_buf(priv, rx, buf_state); |
791 | return 0; |
792 | |
793 | error: |
794 | gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state); |
795 | return -ENOMEM; |
796 | } |
797 | |
798 | static int gve_rx_complete_rsc(struct sk_buff *skb, |
799 | const struct gve_rx_compl_desc_dqo *desc, |
800 | struct gve_ptype ptype) |
801 | { |
802 | struct skb_shared_info *shinfo = skb_shinfo(skb); |
803 | |
804 | /* Only TCP is supported right now. */ |
805 | if (ptype.l4_type != GVE_L4_TYPE_TCP) |
806 | return -EINVAL; |
807 | |
808 | switch (ptype.l3_type) { |
809 | case GVE_L3_TYPE_IPV4: |
810 | shinfo->gso_type = SKB_GSO_TCPV4; |
811 | break; |
812 | case GVE_L3_TYPE_IPV6: |
813 | shinfo->gso_type = SKB_GSO_TCPV6; |
814 | break; |
815 | default: |
816 | return -EINVAL; |
817 | } |
818 | |
819 | shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); |
820 | return 0; |
821 | } |
822 | |
823 | /* Returns 0 if skb is completed successfully, -1 otherwise. */ |
824 | static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, |
825 | const struct gve_rx_compl_desc_dqo *desc, |
826 | netdev_features_t feat) |
827 | { |
828 | struct gve_ptype ptype = |
829 | rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; |
830 | int err; |
831 | |
832 | skb_record_rx_queue(skb: rx->ctx.skb_head, rx_queue: rx->q_num); |
833 | |
834 | if (feat & NETIF_F_RXHASH) |
835 | gve_rx_skb_hash(skb: rx->ctx.skb_head, compl_desc: desc, ptype); |
836 | |
837 | if (feat & NETIF_F_RXCSUM) |
838 | gve_rx_skb_csum(skb: rx->ctx.skb_head, desc, ptype); |
839 | |
840 | /* RSC packets must set gso_size otherwise the TCP stack will complain |
841 | * that packets are larger than MTU. |
842 | */ |
843 | if (desc->rsc) { |
844 | err = gve_rx_complete_rsc(skb: rx->ctx.skb_head, desc, ptype); |
845 | if (err < 0) |
846 | return err; |
847 | } |
848 | |
849 | if (skb_headlen(skb: rx->ctx.skb_head) == 0) |
850 | napi_gro_frags(napi); |
851 | else |
852 | napi_gro_receive(napi, skb: rx->ctx.skb_head); |
853 | |
854 | return 0; |
855 | } |
856 | |
857 | int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) |
858 | { |
859 | struct napi_struct *napi = &block->napi; |
860 | netdev_features_t feat = napi->dev->features; |
861 | |
862 | struct gve_rx_ring *rx = block->rx; |
863 | struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; |
864 | |
865 | u32 work_done = 0; |
866 | u64 bytes = 0; |
867 | int err; |
868 | |
869 | while (work_done < budget) { |
870 | struct gve_rx_compl_desc_dqo *compl_desc = |
871 | &complq->desc_ring[complq->head]; |
872 | u32 pkt_bytes; |
873 | |
874 | /* No more new packets */ |
875 | if (compl_desc->generation == complq->cur_gen_bit) |
876 | break; |
877 | |
878 | /* Prefetch the next two descriptors. */ |
879 | prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); |
880 | prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); |
881 | |
882 | /* Do not read data until we own the descriptor */ |
883 | dma_rmb(); |
884 | |
885 | err = gve_rx_dqo(napi, rx, compl_desc, desc_idx: complq->head, queue_idx: rx->q_num); |
886 | if (err < 0) { |
887 | gve_rx_free_skb(rx); |
888 | u64_stats_update_begin(syncp: &rx->statss); |
889 | if (err == -ENOMEM) |
890 | rx->rx_skb_alloc_fail++; |
891 | else if (err == -EINVAL) |
892 | rx->rx_desc_err_dropped_pkt++; |
893 | u64_stats_update_end(syncp: &rx->statss); |
894 | } |
895 | |
896 | complq->head = (complq->head + 1) & complq->mask; |
897 | complq->num_free_slots++; |
898 | |
899 | /* When the ring wraps, the generation bit is flipped. */ |
900 | complq->cur_gen_bit ^= (complq->head == 0); |
901 | |
902 | /* Receiving a completion means we have space to post another |
903 | * buffer on the buffer queue. |
904 | */ |
905 | { |
906 | struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; |
907 | |
908 | bufq->head = (bufq->head + 1) & bufq->mask; |
909 | } |
910 | |
911 | /* Free running counter of completed descriptors */ |
912 | rx->cnt++; |
913 | |
914 | if (!rx->ctx.skb_head) |
915 | continue; |
916 | |
917 | if (!compl_desc->end_of_packet) |
918 | continue; |
919 | |
920 | work_done++; |
921 | pkt_bytes = rx->ctx.skb_head->len; |
922 | /* The ethernet header (first ETH_HLEN bytes) is snipped off |
923 | * by eth_type_trans. |
924 | */ |
925 | if (skb_headlen(skb: rx->ctx.skb_head)) |
926 | pkt_bytes += ETH_HLEN; |
927 | |
928 | /* gve_rx_complete_skb() will consume skb if successful */ |
929 | if (gve_rx_complete_skb(rx, napi, desc: compl_desc, feat) != 0) { |
930 | gve_rx_free_skb(rx); |
931 | u64_stats_update_begin(syncp: &rx->statss); |
932 | rx->rx_desc_err_dropped_pkt++; |
933 | u64_stats_update_end(syncp: &rx->statss); |
934 | continue; |
935 | } |
936 | |
937 | bytes += pkt_bytes; |
938 | rx->ctx.skb_head = NULL; |
939 | rx->ctx.skb_tail = NULL; |
940 | } |
941 | |
942 | gve_rx_post_buffers_dqo(rx); |
943 | |
944 | u64_stats_update_begin(syncp: &rx->statss); |
945 | rx->rpackets += work_done; |
946 | rx->rbytes += bytes; |
947 | u64_stats_update_end(syncp: &rx->statss); |
948 | |
949 | return work_done; |
950 | } |
951 | |