1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7#include "gve.h"
8#include "gve_dqo.h"
9#include "gve_adminq.h"
10#include "gve_utils.h"
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13#include <linux/skbuff.h>
14#include <linux/slab.h>
15#include <net/ip6_checksum.h>
16#include <net/ipv6.h>
17#include <net/tcp.h>
18
19static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20{
21 return page_count(page: bs->page_info.page) - bs->page_info.pagecnt_bias;
22}
23
24static void gve_free_page_dqo(struct gve_priv *priv,
25 struct gve_rx_buf_state_dqo *bs,
26 bool free_page)
27{
28 page_ref_sub(page: bs->page_info.page, nr: bs->page_info.pagecnt_bias - 1);
29 if (free_page)
30 gve_free_page(dev: &priv->pdev->dev, page: bs->page_info.page, dma: bs->addr,
31 DMA_FROM_DEVICE);
32 bs->page_info.page = NULL;
33}
34
35static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36{
37 struct gve_rx_buf_state_dqo *buf_state;
38 s16 buffer_id;
39
40 buffer_id = rx->dqo.free_buf_states;
41 if (unlikely(buffer_id == -1))
42 return NULL;
43
44 buf_state = &rx->dqo.buf_states[buffer_id];
45
46 /* Remove buf_state from free list */
47 rx->dqo.free_buf_states = buf_state->next;
48
49 /* Point buf_state to itself to mark it as allocated */
50 buf_state->next = buffer_id;
51
52 return buf_state;
53}
54
55static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56 struct gve_rx_buf_state_dqo *buf_state)
57{
58 s16 buffer_id = buf_state - rx->dqo.buf_states;
59
60 return buf_state->next == buffer_id;
61}
62
63static void gve_free_buf_state(struct gve_rx_ring *rx,
64 struct gve_rx_buf_state_dqo *buf_state)
65{
66 s16 buffer_id = buf_state - rx->dqo.buf_states;
67
68 buf_state->next = rx->dqo.free_buf_states;
69 rx->dqo.free_buf_states = buffer_id;
70}
71
72static struct gve_rx_buf_state_dqo *
73gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74{
75 struct gve_rx_buf_state_dqo *buf_state;
76 s16 buffer_id;
77
78 buffer_id = list->head;
79 if (unlikely(buffer_id == -1))
80 return NULL;
81
82 buf_state = &rx->dqo.buf_states[buffer_id];
83
84 /* Remove buf_state from list */
85 list->head = buf_state->next;
86 if (buf_state->next == -1)
87 list->tail = -1;
88
89 /* Point buf_state to itself to mark it as allocated */
90 buf_state->next = buffer_id;
91
92 return buf_state;
93}
94
95static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96 struct gve_index_list *list,
97 struct gve_rx_buf_state_dqo *buf_state)
98{
99 s16 buffer_id = buf_state - rx->dqo.buf_states;
100
101 buf_state->next = -1;
102
103 if (list->head == -1) {
104 list->head = buffer_id;
105 list->tail = buffer_id;
106 } else {
107 int tail = list->tail;
108
109 rx->dqo.buf_states[tail].next = buffer_id;
110 list->tail = buffer_id;
111 }
112}
113
114static struct gve_rx_buf_state_dqo *
115gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116{
117 struct gve_rx_buf_state_dqo *buf_state;
118 int i;
119
120 /* Recycled buf states are immediately usable. */
121 buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.recycled_buf_states);
122 if (likely(buf_state))
123 return buf_state;
124
125 if (unlikely(rx->dqo.used_buf_states.head == -1))
126 return NULL;
127
128 /* Used buf states are only usable when ref count reaches 0, which means
129 * no SKBs refer to them.
130 *
131 * Search a limited number before giving up.
132 */
133 for (i = 0; i < 5; i++) {
134 buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.used_buf_states);
135 if (gve_buf_ref_cnt(bs: buf_state) == 0) {
136 rx->dqo.used_buf_states_cnt--;
137 return buf_state;
138 }
139
140 gve_enqueue_buf_state(rx, list: &rx->dqo.used_buf_states, buf_state);
141 }
142
143 /* For QPL, we cannot allocate any new buffers and must
144 * wait for the existing ones to be available.
145 */
146 if (rx->dqo.qpl)
147 return NULL;
148
149 /* If there are no free buf states discard an entry from
150 * `used_buf_states` so it can be used.
151 */
152 if (unlikely(rx->dqo.free_buf_states == -1)) {
153 buf_state = gve_dequeue_buf_state(rx, list: &rx->dqo.used_buf_states);
154 if (gve_buf_ref_cnt(bs: buf_state) == 0)
155 return buf_state;
156
157 gve_free_page_dqo(priv: rx->gve, bs: buf_state, free_page: true);
158 gve_free_buf_state(rx, buf_state);
159 }
160
161 return NULL;
162}
163
164static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165 struct gve_rx_buf_state_dqo *buf_state)
166{
167 struct gve_priv *priv = rx->gve;
168 u32 idx;
169
170 if (!rx->dqo.qpl) {
171 int err;
172
173 err = gve_alloc_page(priv, dev: &priv->pdev->dev,
174 page: &buf_state->page_info.page,
175 dma: &buf_state->addr,
176 DMA_FROM_DEVICE, GFP_ATOMIC);
177 if (err)
178 return err;
179 } else {
180 idx = rx->dqo.next_qpl_page_idx;
181 if (idx >= priv->rx_pages_per_qpl) {
182 net_err_ratelimited("%s: Out of QPL pages\n",
183 priv->dev->name);
184 return -ENOMEM;
185 }
186 buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187 buf_state->addr = rx->dqo.qpl->page_buses[idx];
188 rx->dqo.next_qpl_page_idx++;
189 }
190 buf_state->page_info.page_offset = 0;
191 buf_state->page_info.page_address =
192 page_address(buf_state->page_info.page);
193 buf_state->last_single_ref_offset = 0;
194
195 /* The page already has 1 ref. */
196 page_ref_add(page: buf_state->page_info.page, INT_MAX - 1);
197 buf_state->page_info.pagecnt_bias = INT_MAX;
198
199 return 0;
200}
201
202static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
203{
204 struct device *hdev = &priv->pdev->dev;
205 int buf_count = rx->dqo.bufq.mask + 1;
206
207 if (rx->dqo.hdr_bufs.data) {
208 dma_free_coherent(dev: hdev, size: priv->header_buf_size * buf_count,
209 cpu_addr: rx->dqo.hdr_bufs.data, dma_handle: rx->dqo.hdr_bufs.addr);
210 rx->dqo.hdr_bufs.data = NULL;
211 }
212}
213
214void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
215{
216 int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
217
218 if (!gve_rx_was_added_to_block(priv, queue_idx: idx))
219 return;
220
221 gve_remove_napi(priv, ntfy_idx);
222 gve_rx_remove_from_block(priv, queue_idx: idx);
223}
224
225static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
226 struct gve_rx_alloc_rings_cfg *cfg)
227{
228 struct device *hdev = &priv->pdev->dev;
229 size_t completion_queue_slots;
230 size_t buffer_queue_slots;
231 int idx = rx->q_num;
232 size_t size;
233 int i;
234
235 completion_queue_slots = rx->dqo.complq.mask + 1;
236 buffer_queue_slots = rx->dqo.bufq.mask + 1;
237
238 if (rx->q_resources) {
239 dma_free_coherent(dev: hdev, size: sizeof(*rx->q_resources),
240 cpu_addr: rx->q_resources, dma_handle: rx->q_resources_bus);
241 rx->q_resources = NULL;
242 }
243
244 for (i = 0; i < rx->dqo.num_buf_states; i++) {
245 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
246 /* Only free page for RDA. QPL pages are freed in gve_main. */
247 if (bs->page_info.page)
248 gve_free_page_dqo(priv, bs, free_page: !rx->dqo.qpl);
249 }
250 if (rx->dqo.qpl) {
251 gve_unassign_qpl(qpl_cfg: cfg->qpl_cfg, id: rx->dqo.qpl->id);
252 rx->dqo.qpl = NULL;
253 }
254
255 if (rx->dqo.bufq.desc_ring) {
256 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
257 dma_free_coherent(dev: hdev, size, cpu_addr: rx->dqo.bufq.desc_ring,
258 dma_handle: rx->dqo.bufq.bus);
259 rx->dqo.bufq.desc_ring = NULL;
260 }
261
262 if (rx->dqo.complq.desc_ring) {
263 size = sizeof(rx->dqo.complq.desc_ring[0]) *
264 completion_queue_slots;
265 dma_free_coherent(dev: hdev, size, cpu_addr: rx->dqo.complq.desc_ring,
266 dma_handle: rx->dqo.complq.bus);
267 rx->dqo.complq.desc_ring = NULL;
268 }
269
270 kvfree(addr: rx->dqo.buf_states);
271 rx->dqo.buf_states = NULL;
272
273 gve_rx_free_hdr_bufs(priv, rx);
274
275 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
276}
277
278static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
279{
280 struct device *hdev = &priv->pdev->dev;
281 int buf_count = rx->dqo.bufq.mask + 1;
282
283 rx->dqo.hdr_bufs.data = dma_alloc_coherent(dev: hdev, size: priv->header_buf_size * buf_count,
284 dma_handle: &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
285 if (!rx->dqo.hdr_bufs.data)
286 return -ENOMEM;
287
288 return 0;
289}
290
291void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
292{
293 int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
294
295 gve_rx_add_to_block(priv, queue_idx: idx);
296 gve_add_napi(priv, ntfy_idx, gve_poll: gve_napi_poll_dqo);
297}
298
299static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
300 struct gve_rx_alloc_rings_cfg *cfg,
301 struct gve_rx_ring *rx,
302 int idx)
303{
304 struct device *hdev = &priv->pdev->dev;
305 size_t size;
306 int i;
307
308 const u32 buffer_queue_slots = cfg->raw_addressing ?
309 priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
310 const u32 completion_queue_slots = cfg->ring_size;
311
312 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
313
314 memset(rx, 0, sizeof(*rx));
315 rx->gve = priv;
316 rx->q_num = idx;
317 rx->dqo.bufq.mask = buffer_queue_slots - 1;
318 rx->dqo.complq.num_free_slots = completion_queue_slots;
319 rx->dqo.complq.mask = completion_queue_slots - 1;
320 rx->ctx.skb_head = NULL;
321 rx->ctx.skb_tail = NULL;
322
323 rx->dqo.num_buf_states = cfg->raw_addressing ?
324 min_t(s16, S16_MAX, buffer_queue_slots * 4) :
325 priv->rx_pages_per_qpl;
326 rx->dqo.buf_states = kvcalloc(n: rx->dqo.num_buf_states,
327 size: sizeof(rx->dqo.buf_states[0]),
328 GFP_KERNEL);
329 if (!rx->dqo.buf_states)
330 return -ENOMEM;
331
332 /* Allocate header buffers for header-split */
333 if (cfg->enable_header_split)
334 if (gve_rx_alloc_hdr_bufs(priv, rx))
335 goto err;
336
337 /* Set up linked list of buffer IDs */
338 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
339 rx->dqo.buf_states[i].next = i + 1;
340
341 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
342 rx->dqo.recycled_buf_states.head = -1;
343 rx->dqo.recycled_buf_states.tail = -1;
344 rx->dqo.used_buf_states.head = -1;
345 rx->dqo.used_buf_states.tail = -1;
346
347 /* Allocate RX completion queue */
348 size = sizeof(rx->dqo.complq.desc_ring[0]) *
349 completion_queue_slots;
350 rx->dqo.complq.desc_ring =
351 dma_alloc_coherent(dev: hdev, size, dma_handle: &rx->dqo.complq.bus, GFP_KERNEL);
352 if (!rx->dqo.complq.desc_ring)
353 goto err;
354
355 /* Allocate RX buffer queue */
356 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
357 rx->dqo.bufq.desc_ring =
358 dma_alloc_coherent(dev: hdev, size, dma_handle: &rx->dqo.bufq.bus, GFP_KERNEL);
359 if (!rx->dqo.bufq.desc_ring)
360 goto err;
361
362 if (!cfg->raw_addressing) {
363 rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx_qid: rx->q_num);
364 if (!rx->dqo.qpl)
365 goto err;
366 rx->dqo.next_qpl_page_idx = 0;
367 }
368
369 rx->q_resources = dma_alloc_coherent(dev: hdev, size: sizeof(*rx->q_resources),
370 dma_handle: &rx->q_resources_bus, GFP_KERNEL);
371 if (!rx->q_resources)
372 goto err;
373
374 return 0;
375
376err:
377 gve_rx_free_ring_dqo(priv, rx, cfg);
378 return -ENOMEM;
379}
380
381void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
382{
383 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
384 u64 index = be32_to_cpu(rx->q_resources->db_index);
385
386 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
387}
388
389int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
390 struct gve_rx_alloc_rings_cfg *cfg)
391{
392 struct gve_rx_ring *rx;
393 int err;
394 int i;
395
396 if (!cfg->raw_addressing && !cfg->qpls) {
397 netif_err(priv, drv, priv->dev,
398 "Cannot alloc QPL ring before allocing QPLs\n");
399 return -EINVAL;
400 }
401
402 rx = kvcalloc(n: cfg->qcfg->max_queues, size: sizeof(struct gve_rx_ring),
403 GFP_KERNEL);
404 if (!rx)
405 return -ENOMEM;
406
407 for (i = 0; i < cfg->qcfg->num_queues; i++) {
408 err = gve_rx_alloc_ring_dqo(priv, cfg, rx: &rx[i], idx: i);
409 if (err) {
410 netif_err(priv, drv, priv->dev,
411 "Failed to alloc rx ring=%d: err=%d\n",
412 i, err);
413 goto err;
414 }
415 }
416
417 cfg->rx = rx;
418 return 0;
419
420err:
421 for (i--; i >= 0; i--)
422 gve_rx_free_ring_dqo(priv, rx: &rx[i], cfg);
423 kvfree(addr: rx);
424 return err;
425}
426
427void gve_rx_free_rings_dqo(struct gve_priv *priv,
428 struct gve_rx_alloc_rings_cfg *cfg)
429{
430 struct gve_rx_ring *rx = cfg->rx;
431 int i;
432
433 if (!rx)
434 return;
435
436 for (i = 0; i < cfg->qcfg->num_queues; i++)
437 gve_rx_free_ring_dqo(priv, rx: &rx[i], cfg);
438
439 kvfree(addr: rx);
440 cfg->rx = NULL;
441}
442
443void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
444{
445 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
446 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
447 struct gve_priv *priv = rx->gve;
448 u32 num_avail_slots;
449 u32 num_full_slots;
450 u32 num_posted = 0;
451
452 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
453 num_avail_slots = bufq->mask - num_full_slots;
454
455 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
456 while (num_posted < num_avail_slots) {
457 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
458 struct gve_rx_buf_state_dqo *buf_state;
459
460 buf_state = gve_get_recycled_buf_state(rx);
461 if (unlikely(!buf_state)) {
462 buf_state = gve_alloc_buf_state(rx);
463 if (unlikely(!buf_state))
464 break;
465
466 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
467 u64_stats_update_begin(syncp: &rx->statss);
468 rx->rx_buf_alloc_fail++;
469 u64_stats_update_end(syncp: &rx->statss);
470 gve_free_buf_state(rx, buf_state);
471 break;
472 }
473 }
474
475 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
476 desc->buf_addr = cpu_to_le64(buf_state->addr +
477 buf_state->page_info.page_offset);
478 if (rx->dqo.hdr_bufs.data)
479 desc->header_buf_addr =
480 cpu_to_le64(rx->dqo.hdr_bufs.addr +
481 priv->header_buf_size * bufq->tail);
482
483 bufq->tail = (bufq->tail + 1) & bufq->mask;
484 complq->num_free_slots--;
485 num_posted++;
486
487 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
488 gve_rx_write_doorbell_dqo(priv, queue_idx: rx->q_num);
489 }
490
491 rx->fill_cnt += num_posted;
492}
493
494static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
495 struct gve_rx_buf_state_dqo *buf_state)
496{
497 const u16 data_buffer_size = priv->data_buffer_size_dqo;
498 int pagecount;
499
500 /* Can't reuse if we only fit one buffer per page */
501 if (data_buffer_size * 2 > PAGE_SIZE)
502 goto mark_used;
503
504 pagecount = gve_buf_ref_cnt(bs: buf_state);
505
506 /* Record the offset when we have a single remaining reference.
507 *
508 * When this happens, we know all of the other offsets of the page are
509 * usable.
510 */
511 if (pagecount == 1) {
512 buf_state->last_single_ref_offset =
513 buf_state->page_info.page_offset;
514 }
515
516 /* Use the next buffer sized chunk in the page. */
517 buf_state->page_info.page_offset += data_buffer_size;
518 buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
519
520 /* If we wrap around to the same offset without ever dropping to 1
521 * reference, then we don't know if this offset was ever freed.
522 */
523 if (buf_state->page_info.page_offset ==
524 buf_state->last_single_ref_offset) {
525 goto mark_used;
526 }
527
528 gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state);
529 return;
530
531mark_used:
532 gve_enqueue_buf_state(rx, list: &rx->dqo.used_buf_states, buf_state);
533 rx->dqo.used_buf_states_cnt++;
534}
535
536static void gve_rx_skb_csum(struct sk_buff *skb,
537 const struct gve_rx_compl_desc_dqo *desc,
538 struct gve_ptype ptype)
539{
540 skb->ip_summed = CHECKSUM_NONE;
541
542 /* HW did not identify and process L3 and L4 headers. */
543 if (unlikely(!desc->l3_l4_processed))
544 return;
545
546 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
547 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
548 return;
549 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
550 /* Checksum should be skipped if this flag is set. */
551 if (unlikely(desc->ipv6_ex_add))
552 return;
553 }
554
555 if (unlikely(desc->csum_l4_err))
556 return;
557
558 switch (ptype.l4_type) {
559 case GVE_L4_TYPE_TCP:
560 case GVE_L4_TYPE_UDP:
561 case GVE_L4_TYPE_ICMP:
562 case GVE_L4_TYPE_SCTP:
563 skb->ip_summed = CHECKSUM_UNNECESSARY;
564 break;
565 default:
566 break;
567 }
568}
569
570static void gve_rx_skb_hash(struct sk_buff *skb,
571 const struct gve_rx_compl_desc_dqo *compl_desc,
572 struct gve_ptype ptype)
573{
574 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
575
576 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
577 hash_type = PKT_HASH_TYPE_L4;
578 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
579 hash_type = PKT_HASH_TYPE_L3;
580
581 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), type: hash_type);
582}
583
584static void gve_rx_free_skb(struct gve_rx_ring *rx)
585{
586 if (!rx->ctx.skb_head)
587 return;
588
589 dev_kfree_skb_any(skb: rx->ctx.skb_head);
590 rx->ctx.skb_head = NULL;
591 rx->ctx.skb_tail = NULL;
592}
593
594static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
595{
596 if (!rx->dqo.qpl)
597 return false;
598 if (rx->dqo.used_buf_states_cnt <
599 (rx->dqo.num_buf_states -
600 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
601 return false;
602 return true;
603}
604
605static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
606 struct gve_rx_buf_state_dqo *buf_state,
607 u16 buf_len)
608{
609 struct page *page = alloc_page(GFP_ATOMIC);
610 int num_frags;
611
612 if (!page)
613 return -ENOMEM;
614
615 memcpy(page_address(page),
616 buf_state->page_info.page_address +
617 buf_state->page_info.page_offset,
618 buf_len);
619 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
620 skb_add_rx_frag(skb: rx->ctx.skb_tail, i: num_frags, page,
621 off: 0, size: buf_len, PAGE_SIZE);
622
623 u64_stats_update_begin(syncp: &rx->statss);
624 rx->rx_frag_alloc_cnt++;
625 u64_stats_update_end(syncp: &rx->statss);
626 /* Return unused buffer. */
627 gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state);
628 return 0;
629}
630
631/* Chains multi skbs for single rx packet.
632 * Returns 0 if buffer is appended, -1 otherwise.
633 */
634static int gve_rx_append_frags(struct napi_struct *napi,
635 struct gve_rx_buf_state_dqo *buf_state,
636 u16 buf_len, struct gve_rx_ring *rx,
637 struct gve_priv *priv)
638{
639 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
640
641 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
642 struct sk_buff *skb;
643
644 skb = napi_alloc_skb(napi, length: 0);
645 if (!skb)
646 return -1;
647
648 if (rx->ctx.skb_tail == rx->ctx.skb_head)
649 skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
650 else
651 rx->ctx.skb_tail->next = skb;
652 rx->ctx.skb_tail = skb;
653 num_frags = 0;
654 }
655 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
656 rx->ctx.skb_head->len += buf_len;
657 rx->ctx.skb_head->data_len += buf_len;
658 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
659 }
660
661 /* Trigger ondemand page allocation if we are running low on buffers */
662 if (gve_rx_should_trigger_copy_ondemand(rx))
663 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
664
665 skb_add_rx_frag(skb: rx->ctx.skb_tail, i: num_frags,
666 page: buf_state->page_info.page,
667 off: buf_state->page_info.page_offset,
668 size: buf_len, truesize: priv->data_buffer_size_dqo);
669 gve_dec_pagecnt_bias(page_info: &buf_state->page_info);
670
671 /* Advances buffer page-offset if page is partially used.
672 * Marks buffer as used if page is full.
673 */
674 gve_try_recycle_buf(priv, rx, buf_state);
675 return 0;
676}
677
678/* Returns 0 if descriptor is completed successfully.
679 * Returns -EINVAL if descriptor is invalid.
680 * Returns -ENOMEM if data cannot be copied to skb.
681 */
682static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
683 const struct gve_rx_compl_desc_dqo *compl_desc,
684 u32 desc_idx, int queue_idx)
685{
686 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
687 const bool hbo = compl_desc->header_buffer_overflow;
688 const bool eop = compl_desc->end_of_packet != 0;
689 const bool hsplit = compl_desc->split_header;
690 struct gve_rx_buf_state_dqo *buf_state;
691 struct gve_priv *priv = rx->gve;
692 u16 buf_len;
693 u16 hdr_len;
694
695 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
696 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
697 priv->dev->name, buffer_id);
698 return -EINVAL;
699 }
700 buf_state = &rx->dqo.buf_states[buffer_id];
701 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
702 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
703 priv->dev->name, buffer_id);
704 return -EINVAL;
705 }
706
707 if (unlikely(compl_desc->rx_error)) {
708 gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states,
709 buf_state);
710 return -EINVAL;
711 }
712
713 buf_len = compl_desc->packet_len;
714 hdr_len = compl_desc->header_len;
715
716 /* Page might have not been used for awhile and was likely last written
717 * by a different thread.
718 */
719 prefetch(buf_state->page_info.page);
720
721 /* Copy the header into the skb in the case of header split */
722 if (hsplit) {
723 int unsplit = 0;
724
725 if (hdr_len && !hbo) {
726 rx->ctx.skb_head = gve_rx_copy_data(dev: priv->dev, napi,
727 data: rx->dqo.hdr_bufs.data +
728 desc_idx * priv->header_buf_size,
729 len: hdr_len);
730 if (unlikely(!rx->ctx.skb_head))
731 goto error;
732 rx->ctx.skb_tail = rx->ctx.skb_head;
733 } else {
734 unsplit = 1;
735 }
736 u64_stats_update_begin(syncp: &rx->statss);
737 rx->rx_hsplit_pkt++;
738 rx->rx_hsplit_unsplit_pkt += unsplit;
739 rx->rx_hsplit_bytes += hdr_len;
740 u64_stats_update_end(syncp: &rx->statss);
741 }
742
743 /* Sync the portion of dma buffer for CPU to read. */
744 dma_sync_single_range_for_cpu(dev: &priv->pdev->dev, addr: buf_state->addr,
745 offset: buf_state->page_info.page_offset,
746 size: buf_len, dir: DMA_FROM_DEVICE);
747
748 /* Append to current skb if one exists. */
749 if (rx->ctx.skb_head) {
750 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
751 priv)) != 0) {
752 goto error;
753 }
754 return 0;
755 }
756
757 if (eop && buf_len <= priv->rx_copybreak) {
758 rx->ctx.skb_head = gve_rx_copy(dev: priv->dev, napi,
759 page_info: &buf_state->page_info, len: buf_len);
760 if (unlikely(!rx->ctx.skb_head))
761 goto error;
762 rx->ctx.skb_tail = rx->ctx.skb_head;
763
764 u64_stats_update_begin(syncp: &rx->statss);
765 rx->rx_copied_pkt++;
766 rx->rx_copybreak_pkt++;
767 u64_stats_update_end(syncp: &rx->statss);
768
769 gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states,
770 buf_state);
771 return 0;
772 }
773
774 rx->ctx.skb_head = napi_get_frags(napi);
775 if (unlikely(!rx->ctx.skb_head))
776 goto error;
777 rx->ctx.skb_tail = rx->ctx.skb_head;
778
779 if (gve_rx_should_trigger_copy_ondemand(rx)) {
780 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
781 goto error;
782 return 0;
783 }
784
785 skb_add_rx_frag(skb: rx->ctx.skb_head, i: 0, page: buf_state->page_info.page,
786 off: buf_state->page_info.page_offset, size: buf_len,
787 truesize: priv->data_buffer_size_dqo);
788 gve_dec_pagecnt_bias(page_info: &buf_state->page_info);
789
790 gve_try_recycle_buf(priv, rx, buf_state);
791 return 0;
792
793error:
794 gve_enqueue_buf_state(rx, list: &rx->dqo.recycled_buf_states, buf_state);
795 return -ENOMEM;
796}
797
798static int gve_rx_complete_rsc(struct sk_buff *skb,
799 const struct gve_rx_compl_desc_dqo *desc,
800 struct gve_ptype ptype)
801{
802 struct skb_shared_info *shinfo = skb_shinfo(skb);
803
804 /* Only TCP is supported right now. */
805 if (ptype.l4_type != GVE_L4_TYPE_TCP)
806 return -EINVAL;
807
808 switch (ptype.l3_type) {
809 case GVE_L3_TYPE_IPV4:
810 shinfo->gso_type = SKB_GSO_TCPV4;
811 break;
812 case GVE_L3_TYPE_IPV6:
813 shinfo->gso_type = SKB_GSO_TCPV6;
814 break;
815 default:
816 return -EINVAL;
817 }
818
819 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
820 return 0;
821}
822
823/* Returns 0 if skb is completed successfully, -1 otherwise. */
824static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
825 const struct gve_rx_compl_desc_dqo *desc,
826 netdev_features_t feat)
827{
828 struct gve_ptype ptype =
829 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
830 int err;
831
832 skb_record_rx_queue(skb: rx->ctx.skb_head, rx_queue: rx->q_num);
833
834 if (feat & NETIF_F_RXHASH)
835 gve_rx_skb_hash(skb: rx->ctx.skb_head, compl_desc: desc, ptype);
836
837 if (feat & NETIF_F_RXCSUM)
838 gve_rx_skb_csum(skb: rx->ctx.skb_head, desc, ptype);
839
840 /* RSC packets must set gso_size otherwise the TCP stack will complain
841 * that packets are larger than MTU.
842 */
843 if (desc->rsc) {
844 err = gve_rx_complete_rsc(skb: rx->ctx.skb_head, desc, ptype);
845 if (err < 0)
846 return err;
847 }
848
849 if (skb_headlen(skb: rx->ctx.skb_head) == 0)
850 napi_gro_frags(napi);
851 else
852 napi_gro_receive(napi, skb: rx->ctx.skb_head);
853
854 return 0;
855}
856
857int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
858{
859 struct napi_struct *napi = &block->napi;
860 netdev_features_t feat = napi->dev->features;
861
862 struct gve_rx_ring *rx = block->rx;
863 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
864
865 u32 work_done = 0;
866 u64 bytes = 0;
867 int err;
868
869 while (work_done < budget) {
870 struct gve_rx_compl_desc_dqo *compl_desc =
871 &complq->desc_ring[complq->head];
872 u32 pkt_bytes;
873
874 /* No more new packets */
875 if (compl_desc->generation == complq->cur_gen_bit)
876 break;
877
878 /* Prefetch the next two descriptors. */
879 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
880 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
881
882 /* Do not read data until we own the descriptor */
883 dma_rmb();
884
885 err = gve_rx_dqo(napi, rx, compl_desc, desc_idx: complq->head, queue_idx: rx->q_num);
886 if (err < 0) {
887 gve_rx_free_skb(rx);
888 u64_stats_update_begin(syncp: &rx->statss);
889 if (err == -ENOMEM)
890 rx->rx_skb_alloc_fail++;
891 else if (err == -EINVAL)
892 rx->rx_desc_err_dropped_pkt++;
893 u64_stats_update_end(syncp: &rx->statss);
894 }
895
896 complq->head = (complq->head + 1) & complq->mask;
897 complq->num_free_slots++;
898
899 /* When the ring wraps, the generation bit is flipped. */
900 complq->cur_gen_bit ^= (complq->head == 0);
901
902 /* Receiving a completion means we have space to post another
903 * buffer on the buffer queue.
904 */
905 {
906 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
907
908 bufq->head = (bufq->head + 1) & bufq->mask;
909 }
910
911 /* Free running counter of completed descriptors */
912 rx->cnt++;
913
914 if (!rx->ctx.skb_head)
915 continue;
916
917 if (!compl_desc->end_of_packet)
918 continue;
919
920 work_done++;
921 pkt_bytes = rx->ctx.skb_head->len;
922 /* The ethernet header (first ETH_HLEN bytes) is snipped off
923 * by eth_type_trans.
924 */
925 if (skb_headlen(skb: rx->ctx.skb_head))
926 pkt_bytes += ETH_HLEN;
927
928 /* gve_rx_complete_skb() will consume skb if successful */
929 if (gve_rx_complete_skb(rx, napi, desc: compl_desc, feat) != 0) {
930 gve_rx_free_skb(rx);
931 u64_stats_update_begin(syncp: &rx->statss);
932 rx->rx_desc_err_dropped_pkt++;
933 u64_stats_update_end(syncp: &rx->statss);
934 continue;
935 }
936
937 bytes += pkt_bytes;
938 rx->ctx.skb_head = NULL;
939 rx->ctx.skb_tail = NULL;
940 }
941
942 gve_rx_post_buffers_dqo(rx);
943
944 u64_stats_update_begin(syncp: &rx->statss);
945 rx->rpackets += work_done;
946 rx->rbytes += bytes;
947 u64_stats_update_end(syncp: &rx->statss);
948
949 return work_done;
950}
951

source code of linux/drivers/net/ethernet/google/gve/gve_rx_dqo.c