1 | // SPDX-License-Identifier: (GPL-2.0 OR MIT) |
2 | /* Google virtual Ethernet (gve) driver |
3 | * |
4 | * Copyright (C) 2015-2021 Google, Inc. |
5 | */ |
6 | |
7 | #include "gve.h" |
8 | #include "gve_adminq.h" |
9 | #include "gve_utils.h" |
10 | #include "gve_dqo.h" |
11 | #include <net/ip.h> |
12 | #include <linux/tcp.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/skbuff.h> |
15 | |
16 | /* Returns true if tx_bufs are available. */ |
17 | static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) |
18 | { |
19 | int num_avail; |
20 | |
21 | if (!tx->dqo.qpl) |
22 | return true; |
23 | |
24 | num_avail = tx->dqo.num_tx_qpl_bufs - |
25 | (tx->dqo_tx.alloc_tx_qpl_buf_cnt - |
26 | tx->dqo_tx.free_tx_qpl_buf_cnt); |
27 | |
28 | if (count <= num_avail) |
29 | return true; |
30 | |
31 | /* Update cached value from dqo_compl. */ |
32 | tx->dqo_tx.free_tx_qpl_buf_cnt = |
33 | atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_cnt); |
34 | |
35 | num_avail = tx->dqo.num_tx_qpl_bufs - |
36 | (tx->dqo_tx.alloc_tx_qpl_buf_cnt - |
37 | tx->dqo_tx.free_tx_qpl_buf_cnt); |
38 | |
39 | return count <= num_avail; |
40 | } |
41 | |
42 | static s16 |
43 | gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx) |
44 | { |
45 | s16 index; |
46 | |
47 | index = tx->dqo_tx.free_tx_qpl_buf_head; |
48 | |
49 | /* No TX buffers available, try to steal the list from the |
50 | * completion handler. |
51 | */ |
52 | if (unlikely(index == -1)) { |
53 | tx->dqo_tx.free_tx_qpl_buf_head = |
54 | atomic_xchg(v: &tx->dqo_compl.free_tx_qpl_buf_head, new: -1); |
55 | index = tx->dqo_tx.free_tx_qpl_buf_head; |
56 | |
57 | if (unlikely(index == -1)) |
58 | return index; |
59 | } |
60 | |
61 | /* Remove TX buf from free list */ |
62 | tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index]; |
63 | |
64 | return index; |
65 | } |
66 | |
67 | static void |
68 | gve_free_tx_qpl_bufs(struct gve_tx_ring *tx, |
69 | struct gve_tx_pending_packet_dqo *pkt) |
70 | { |
71 | s16 index; |
72 | int i; |
73 | |
74 | if (!pkt->num_bufs) |
75 | return; |
76 | |
77 | index = pkt->tx_qpl_buf_ids[0]; |
78 | /* Create a linked list of buffers to be added to the free list */ |
79 | for (i = 1; i < pkt->num_bufs; i++) { |
80 | tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i]; |
81 | index = pkt->tx_qpl_buf_ids[i]; |
82 | } |
83 | |
84 | while (true) { |
85 | s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_head); |
86 | |
87 | tx->dqo.tx_qpl_buf_next[index] = old_head; |
88 | if (atomic_cmpxchg(v: &tx->dqo_compl.free_tx_qpl_buf_head, |
89 | old: old_head, |
90 | new: pkt->tx_qpl_buf_ids[0]) == old_head) { |
91 | break; |
92 | } |
93 | } |
94 | |
95 | atomic_add(i: pkt->num_bufs, v: &tx->dqo_compl.free_tx_qpl_buf_cnt); |
96 | pkt->num_bufs = 0; |
97 | } |
98 | |
99 | /* Returns true if a gve_tx_pending_packet_dqo object is available. */ |
100 | static bool gve_has_pending_packet(struct gve_tx_ring *tx) |
101 | { |
102 | /* Check TX path's list. */ |
103 | if (tx->dqo_tx.free_pending_packets != -1) |
104 | return true; |
105 | |
106 | /* Check completion handler's list. */ |
107 | if (atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets) != -1) |
108 | return true; |
109 | |
110 | return false; |
111 | } |
112 | |
113 | static struct gve_tx_pending_packet_dqo * |
114 | gve_alloc_pending_packet(struct gve_tx_ring *tx) |
115 | { |
116 | struct gve_tx_pending_packet_dqo *pending_packet; |
117 | s16 index; |
118 | |
119 | index = tx->dqo_tx.free_pending_packets; |
120 | |
121 | /* No pending_packets available, try to steal the list from the |
122 | * completion handler. |
123 | */ |
124 | if (unlikely(index == -1)) { |
125 | tx->dqo_tx.free_pending_packets = |
126 | atomic_xchg(v: &tx->dqo_compl.free_pending_packets, new: -1); |
127 | index = tx->dqo_tx.free_pending_packets; |
128 | |
129 | if (unlikely(index == -1)) |
130 | return NULL; |
131 | } |
132 | |
133 | pending_packet = &tx->dqo.pending_packets[index]; |
134 | |
135 | /* Remove pending_packet from free list */ |
136 | tx->dqo_tx.free_pending_packets = pending_packet->next; |
137 | pending_packet->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; |
138 | |
139 | return pending_packet; |
140 | } |
141 | |
142 | static void |
143 | gve_free_pending_packet(struct gve_tx_ring *tx, |
144 | struct gve_tx_pending_packet_dqo *pending_packet) |
145 | { |
146 | s16 index = pending_packet - tx->dqo.pending_packets; |
147 | |
148 | pending_packet->state = GVE_PACKET_STATE_UNALLOCATED; |
149 | while (true) { |
150 | s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets); |
151 | |
152 | pending_packet->next = old_head; |
153 | if (atomic_cmpxchg(v: &tx->dqo_compl.free_pending_packets, |
154 | old: old_head, new: index) == old_head) { |
155 | break; |
156 | } |
157 | } |
158 | } |
159 | |
160 | /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. |
161 | */ |
162 | static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) |
163 | { |
164 | int i; |
165 | |
166 | for (i = 0; i < tx->dqo.num_pending_packets; i++) { |
167 | struct gve_tx_pending_packet_dqo *cur_state = |
168 | &tx->dqo.pending_packets[i]; |
169 | int j; |
170 | |
171 | for (j = 0; j < cur_state->num_bufs; j++) { |
172 | if (j == 0) { |
173 | dma_unmap_single(tx->dev, |
174 | dma_unmap_addr(cur_state, dma[j]), |
175 | dma_unmap_len(cur_state, len[j]), |
176 | DMA_TO_DEVICE); |
177 | } else { |
178 | dma_unmap_page(tx->dev, |
179 | dma_unmap_addr(cur_state, dma[j]), |
180 | dma_unmap_len(cur_state, len[j]), |
181 | DMA_TO_DEVICE); |
182 | } |
183 | } |
184 | if (cur_state->skb) { |
185 | dev_consume_skb_any(skb: cur_state->skb); |
186 | cur_state->skb = NULL; |
187 | } |
188 | } |
189 | } |
190 | |
191 | void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) |
192 | { |
193 | int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx); |
194 | struct gve_tx_ring *tx = &priv->tx[idx]; |
195 | |
196 | if (!gve_tx_was_added_to_block(priv, queue_idx: idx)) |
197 | return; |
198 | |
199 | gve_remove_napi(priv, ntfy_idx); |
200 | gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); |
201 | netdev_tx_reset_queue(q: tx->netdev_txq); |
202 | gve_tx_clean_pending_packets(tx); |
203 | gve_tx_remove_from_block(priv, queue_idx: idx); |
204 | } |
205 | |
206 | static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, |
207 | struct gve_tx_alloc_rings_cfg *cfg) |
208 | { |
209 | struct device *hdev = &priv->pdev->dev; |
210 | int idx = tx->q_num; |
211 | size_t bytes; |
212 | |
213 | if (tx->q_resources) { |
214 | dma_free_coherent(dev: hdev, size: sizeof(*tx->q_resources), |
215 | cpu_addr: tx->q_resources, dma_handle: tx->q_resources_bus); |
216 | tx->q_resources = NULL; |
217 | } |
218 | |
219 | if (tx->dqo.compl_ring) { |
220 | bytes = sizeof(tx->dqo.compl_ring[0]) * |
221 | (tx->dqo.complq_mask + 1); |
222 | dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.compl_ring, |
223 | dma_handle: tx->complq_bus_dqo); |
224 | tx->dqo.compl_ring = NULL; |
225 | } |
226 | |
227 | if (tx->dqo.tx_ring) { |
228 | bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); |
229 | dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.tx_ring, dma_handle: tx->bus); |
230 | tx->dqo.tx_ring = NULL; |
231 | } |
232 | |
233 | kvfree(addr: tx->dqo.pending_packets); |
234 | tx->dqo.pending_packets = NULL; |
235 | |
236 | kvfree(addr: tx->dqo.tx_qpl_buf_next); |
237 | tx->dqo.tx_qpl_buf_next = NULL; |
238 | |
239 | if (tx->dqo.qpl) { |
240 | gve_unassign_qpl(qpl_cfg: cfg->qpl_cfg, id: tx->dqo.qpl->id); |
241 | tx->dqo.qpl = NULL; |
242 | } |
243 | |
244 | netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n" , idx); |
245 | } |
246 | |
247 | static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) |
248 | { |
249 | int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO * |
250 | tx->dqo.qpl->num_entries; |
251 | int i; |
252 | |
253 | tx->dqo.tx_qpl_buf_next = kvcalloc(n: num_tx_qpl_bufs, |
254 | size: sizeof(tx->dqo.tx_qpl_buf_next[0]), |
255 | GFP_KERNEL); |
256 | if (!tx->dqo.tx_qpl_buf_next) |
257 | return -ENOMEM; |
258 | |
259 | tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs; |
260 | |
261 | /* Generate free TX buf list */ |
262 | for (i = 0; i < num_tx_qpl_bufs - 1; i++) |
263 | tx->dqo.tx_qpl_buf_next[i] = i + 1; |
264 | tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1; |
265 | |
266 | atomic_set_release(v: &tx->dqo_compl.free_tx_qpl_buf_head, i: -1); |
267 | return 0; |
268 | } |
269 | |
270 | void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) |
271 | { |
272 | int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx); |
273 | struct gve_tx_ring *tx = &priv->tx[idx]; |
274 | |
275 | gve_tx_add_to_block(priv, queue_idx: idx); |
276 | |
277 | tx->netdev_txq = netdev_get_tx_queue(dev: priv->dev, index: idx); |
278 | gve_add_napi(priv, ntfy_idx, gve_poll: gve_napi_poll_dqo); |
279 | } |
280 | |
281 | static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, |
282 | struct gve_tx_alloc_rings_cfg *cfg, |
283 | struct gve_tx_ring *tx, |
284 | int idx) |
285 | { |
286 | struct device *hdev = &priv->pdev->dev; |
287 | int num_pending_packets; |
288 | size_t bytes; |
289 | int i; |
290 | |
291 | memset(tx, 0, sizeof(*tx)); |
292 | tx->q_num = idx; |
293 | tx->dev = hdev; |
294 | atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: 0); |
295 | |
296 | /* Queue sizes must be a power of 2 */ |
297 | tx->mask = cfg->ring_size - 1; |
298 | tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? |
299 | priv->options_dqo_rda.tx_comp_ring_entries - 1 : |
300 | tx->mask; |
301 | |
302 | /* The max number of pending packets determines the maximum number of |
303 | * descriptors which maybe written to the completion queue. |
304 | * |
305 | * We must set the number small enough to make sure we never overrun the |
306 | * completion queue. |
307 | */ |
308 | num_pending_packets = tx->dqo.complq_mask + 1; |
309 | |
310 | /* Reserve space for descriptor completions, which will be reported at |
311 | * most every GVE_TX_MIN_RE_INTERVAL packets. |
312 | */ |
313 | num_pending_packets -= |
314 | (tx->dqo.complq_mask + 1) / GVE_TX_MIN_RE_INTERVAL; |
315 | |
316 | /* Each packet may have at most 2 buffer completions if it receives both |
317 | * a miss and reinjection completion. |
318 | */ |
319 | num_pending_packets /= 2; |
320 | |
321 | tx->dqo.num_pending_packets = min_t(int, num_pending_packets, S16_MAX); |
322 | tx->dqo.pending_packets = kvcalloc(n: tx->dqo.num_pending_packets, |
323 | size: sizeof(tx->dqo.pending_packets[0]), |
324 | GFP_KERNEL); |
325 | if (!tx->dqo.pending_packets) |
326 | goto err; |
327 | |
328 | /* Set up linked list of pending packets */ |
329 | for (i = 0; i < tx->dqo.num_pending_packets - 1; i++) |
330 | tx->dqo.pending_packets[i].next = i + 1; |
331 | |
332 | tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; |
333 | atomic_set_release(v: &tx->dqo_compl.free_pending_packets, i: -1); |
334 | tx->dqo_compl.miss_completions.head = -1; |
335 | tx->dqo_compl.miss_completions.tail = -1; |
336 | tx->dqo_compl.timed_out_completions.head = -1; |
337 | tx->dqo_compl.timed_out_completions.tail = -1; |
338 | |
339 | bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); |
340 | tx->dqo.tx_ring = dma_alloc_coherent(dev: hdev, size: bytes, dma_handle: &tx->bus, GFP_KERNEL); |
341 | if (!tx->dqo.tx_ring) |
342 | goto err; |
343 | |
344 | bytes = sizeof(tx->dqo.compl_ring[0]) * (tx->dqo.complq_mask + 1); |
345 | tx->dqo.compl_ring = dma_alloc_coherent(dev: hdev, size: bytes, |
346 | dma_handle: &tx->complq_bus_dqo, |
347 | GFP_KERNEL); |
348 | if (!tx->dqo.compl_ring) |
349 | goto err; |
350 | |
351 | tx->q_resources = dma_alloc_coherent(dev: hdev, size: sizeof(*tx->q_resources), |
352 | dma_handle: &tx->q_resources_bus, GFP_KERNEL); |
353 | if (!tx->q_resources) |
354 | goto err; |
355 | |
356 | if (!cfg->raw_addressing) { |
357 | tx->dqo.qpl = gve_assign_tx_qpl(cfg, tx_qid: idx); |
358 | if (!tx->dqo.qpl) |
359 | goto err; |
360 | |
361 | if (gve_tx_qpl_buf_init(tx)) |
362 | goto err; |
363 | } |
364 | |
365 | return 0; |
366 | |
367 | err: |
368 | gve_tx_free_ring_dqo(priv, tx, cfg); |
369 | return -ENOMEM; |
370 | } |
371 | |
372 | int gve_tx_alloc_rings_dqo(struct gve_priv *priv, |
373 | struct gve_tx_alloc_rings_cfg *cfg) |
374 | { |
375 | struct gve_tx_ring *tx = cfg->tx; |
376 | int err = 0; |
377 | int i, j; |
378 | |
379 | if (!cfg->raw_addressing && !cfg->qpls) { |
380 | netif_err(priv, drv, priv->dev, |
381 | "Cannot alloc QPL ring before allocing QPLs\n" ); |
382 | return -EINVAL; |
383 | } |
384 | |
385 | if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { |
386 | netif_err(priv, drv, priv->dev, |
387 | "Cannot alloc more than the max num of Tx rings\n" ); |
388 | return -EINVAL; |
389 | } |
390 | |
391 | if (cfg->start_idx == 0) { |
392 | tx = kvcalloc(n: cfg->qcfg->max_queues, size: sizeof(struct gve_tx_ring), |
393 | GFP_KERNEL); |
394 | if (!tx) |
395 | return -ENOMEM; |
396 | } else if (!tx) { |
397 | netif_err(priv, drv, priv->dev, |
398 | "Cannot alloc tx rings from a nonzero start idx without tx array\n" ); |
399 | return -EINVAL; |
400 | } |
401 | |
402 | for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { |
403 | err = gve_tx_alloc_ring_dqo(priv, cfg, tx: &tx[i], idx: i); |
404 | if (err) { |
405 | netif_err(priv, drv, priv->dev, |
406 | "Failed to alloc tx ring=%d: err=%d\n" , |
407 | i, err); |
408 | goto err; |
409 | } |
410 | } |
411 | |
412 | cfg->tx = tx; |
413 | return 0; |
414 | |
415 | err: |
416 | for (j = 0; j < i; j++) |
417 | gve_tx_free_ring_dqo(priv, tx: &tx[j], cfg); |
418 | if (cfg->start_idx == 0) |
419 | kvfree(addr: tx); |
420 | return err; |
421 | } |
422 | |
423 | void gve_tx_free_rings_dqo(struct gve_priv *priv, |
424 | struct gve_tx_alloc_rings_cfg *cfg) |
425 | { |
426 | struct gve_tx_ring *tx = cfg->tx; |
427 | int i; |
428 | |
429 | if (!tx) |
430 | return; |
431 | |
432 | for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) |
433 | gve_tx_free_ring_dqo(priv, tx: &tx[i], cfg); |
434 | |
435 | if (cfg->start_idx == 0) { |
436 | kvfree(addr: tx); |
437 | cfg->tx = NULL; |
438 | } |
439 | } |
440 | |
441 | /* Returns the number of slots available in the ring */ |
442 | static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) |
443 | { |
444 | u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask; |
445 | |
446 | return tx->mask - num_used; |
447 | } |
448 | |
449 | static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, |
450 | int desc_count, int buf_count) |
451 | { |
452 | return gve_has_pending_packet(tx) && |
453 | num_avail_tx_slots(tx) >= desc_count && |
454 | gve_has_free_tx_qpl_bufs(tx, count: buf_count); |
455 | } |
456 | |
457 | /* Stops the queue if available descriptors is less than 'count'. |
458 | * Return: 0 if stop is not required. |
459 | */ |
460 | static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, |
461 | int desc_count, int buf_count) |
462 | { |
463 | if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
464 | return 0; |
465 | |
466 | /* Update cached TX head pointer */ |
467 | tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head); |
468 | |
469 | if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
470 | return 0; |
471 | |
472 | /* No space, so stop the queue */ |
473 | tx->stop_queue++; |
474 | netif_tx_stop_queue(dev_queue: tx->netdev_txq); |
475 | |
476 | /* Sync with restarting queue in `gve_tx_poll_dqo()` */ |
477 | mb(); |
478 | |
479 | /* After stopping queue, check if we can transmit again in order to |
480 | * avoid TOCTOU bug. |
481 | */ |
482 | tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head); |
483 | |
484 | if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
485 | return -EBUSY; |
486 | |
487 | netif_tx_start_queue(dev_queue: tx->netdev_txq); |
488 | tx->wake_queue++; |
489 | return 0; |
490 | } |
491 | |
492 | static void (const struct sk_buff *skb, |
493 | struct gve_tx_metadata_dqo *metadata) |
494 | { |
495 | memset(metadata, 0, sizeof(*metadata)); |
496 | metadata->version = GVE_TX_METADATA_VERSION_DQO; |
497 | |
498 | if (skb->l4_hash) { |
499 | u16 path_hash = skb->hash ^ (skb->hash >> 16); |
500 | |
501 | path_hash &= (1 << 15) - 1; |
502 | if (unlikely(path_hash == 0)) |
503 | path_hash = ~path_hash; |
504 | |
505 | metadata->path_hash = path_hash; |
506 | } |
507 | } |
508 | |
509 | static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, |
510 | struct sk_buff *skb, u32 len, u64 addr, |
511 | s16 compl_tag, bool eop, bool is_gso) |
512 | { |
513 | const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL; |
514 | |
515 | while (len > 0) { |
516 | struct gve_tx_pkt_desc_dqo *desc = |
517 | &tx->dqo.tx_ring[*desc_idx].pkt; |
518 | u32 cur_len = min_t(u32, len, GVE_TX_MAX_BUF_SIZE_DQO); |
519 | bool cur_eop = eop && cur_len == len; |
520 | |
521 | *desc = (struct gve_tx_pkt_desc_dqo){ |
522 | .buf_addr = cpu_to_le64(addr), |
523 | .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, |
524 | .end_of_packet = cur_eop, |
525 | .checksum_offload_enable = checksum_offload_en, |
526 | .compl_tag = cpu_to_le16(compl_tag), |
527 | .buf_size = cur_len, |
528 | }; |
529 | |
530 | addr += cur_len; |
531 | len -= cur_len; |
532 | *desc_idx = (*desc_idx + 1) & tx->mask; |
533 | } |
534 | } |
535 | |
536 | /* Validates and prepares `skb` for TSO. |
537 | * |
538 | * Returns header length, or < 0 if invalid. |
539 | */ |
540 | static int gve_prep_tso(struct sk_buff *skb) |
541 | { |
542 | struct tcphdr *tcp; |
543 | int ; |
544 | u32 paylen; |
545 | int err; |
546 | |
547 | /* Note: HW requires MSS (gso_size) to be <= 9728 and the total length |
548 | * of the TSO to be <= 262143. |
549 | * |
550 | * However, we don't validate these because: |
551 | * - Hypervisor enforces a limit of 9K MTU |
552 | * - Kernel will not produce a TSO larger than 64k |
553 | */ |
554 | |
555 | if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) |
556 | return -1; |
557 | |
558 | /* Needed because we will modify header. */ |
559 | err = skb_cow_head(skb, headroom: 0); |
560 | if (err < 0) |
561 | return err; |
562 | |
563 | tcp = tcp_hdr(skb); |
564 | |
565 | /* Remove payload length from checksum. */ |
566 | paylen = skb->len - skb_transport_offset(skb); |
567 | |
568 | switch (skb_shinfo(skb)->gso_type) { |
569 | case SKB_GSO_TCPV4: |
570 | case SKB_GSO_TCPV6: |
571 | csum_replace_by_diff(sum: &tcp->check, |
572 | diff: (__force __wsum)htonl(paylen)); |
573 | |
574 | /* Compute length of segmentation header. */ |
575 | header_len = skb_tcp_all_headers(skb); |
576 | break; |
577 | default: |
578 | return -EINVAL; |
579 | } |
580 | |
581 | if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) |
582 | return -EINVAL; |
583 | |
584 | return header_len; |
585 | } |
586 | |
587 | static void gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, |
588 | const struct sk_buff *skb, |
589 | const struct gve_tx_metadata_dqo *metadata, |
590 | int ) |
591 | { |
592 | *desc = (struct gve_tx_tso_context_desc_dqo){ |
593 | .header_len = header_len, |
594 | .cmd_dtype = { |
595 | .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, |
596 | .tso = 1, |
597 | }, |
598 | .flex0 = metadata->bytes[0], |
599 | .flex5 = metadata->bytes[5], |
600 | .flex6 = metadata->bytes[6], |
601 | .flex7 = metadata->bytes[7], |
602 | .flex8 = metadata->bytes[8], |
603 | .flex9 = metadata->bytes[9], |
604 | .flex10 = metadata->bytes[10], |
605 | .flex11 = metadata->bytes[11], |
606 | }; |
607 | desc->tso_total_len = skb->len - header_len; |
608 | desc->mss = skb_shinfo(skb)->gso_size; |
609 | } |
610 | |
611 | static void |
612 | gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, |
613 | const struct gve_tx_metadata_dqo *metadata) |
614 | { |
615 | *desc = (struct gve_tx_general_context_desc_dqo){ |
616 | .flex0 = metadata->bytes[0], |
617 | .flex1 = metadata->bytes[1], |
618 | .flex2 = metadata->bytes[2], |
619 | .flex3 = metadata->bytes[3], |
620 | .flex4 = metadata->bytes[4], |
621 | .flex5 = metadata->bytes[5], |
622 | .flex6 = metadata->bytes[6], |
623 | .flex7 = metadata->bytes[7], |
624 | .flex8 = metadata->bytes[8], |
625 | .flex9 = metadata->bytes[9], |
626 | .flex10 = metadata->bytes[10], |
627 | .flex11 = metadata->bytes[11], |
628 | .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, |
629 | }; |
630 | } |
631 | |
632 | static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, |
633 | struct sk_buff *skb, |
634 | struct gve_tx_pending_packet_dqo *pkt, |
635 | s16 completion_tag, |
636 | u32 *desc_idx, |
637 | bool is_gso) |
638 | { |
639 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
640 | int i; |
641 | |
642 | /* Note: HW requires that the size of a non-TSO packet be within the |
643 | * range of [17, 9728]. |
644 | * |
645 | * We don't double check because |
646 | * - We limited `netdev->min_mtu` to ETH_MIN_MTU. |
647 | * - Hypervisor won't allow MTU larger than 9216. |
648 | */ |
649 | |
650 | pkt->num_bufs = 0; |
651 | /* Map the linear portion of skb */ |
652 | { |
653 | u32 len = skb_headlen(skb); |
654 | dma_addr_t addr; |
655 | |
656 | addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); |
657 | if (unlikely(dma_mapping_error(tx->dev, addr))) |
658 | goto err; |
659 | |
660 | dma_unmap_len_set(pkt, len[pkt->num_bufs], len); |
661 | dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); |
662 | ++pkt->num_bufs; |
663 | |
664 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, |
665 | compl_tag: completion_tag, |
666 | /*eop=*/shinfo->nr_frags == 0, is_gso); |
667 | } |
668 | |
669 | for (i = 0; i < shinfo->nr_frags; i++) { |
670 | const skb_frag_t *frag = &shinfo->frags[i]; |
671 | bool is_eop = i == (shinfo->nr_frags - 1); |
672 | u32 len = skb_frag_size(frag); |
673 | dma_addr_t addr; |
674 | |
675 | addr = skb_frag_dma_map(dev: tx->dev, frag, offset: 0, size: len, dir: DMA_TO_DEVICE); |
676 | if (unlikely(dma_mapping_error(tx->dev, addr))) |
677 | goto err; |
678 | |
679 | dma_unmap_len_set(pkt, len[pkt->num_bufs], len); |
680 | dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); |
681 | ++pkt->num_bufs; |
682 | |
683 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, |
684 | compl_tag: completion_tag, eop: is_eop, is_gso); |
685 | } |
686 | |
687 | return 0; |
688 | err: |
689 | for (i = 0; i < pkt->num_bufs; i++) { |
690 | if (i == 0) { |
691 | dma_unmap_single(tx->dev, |
692 | dma_unmap_addr(pkt, dma[i]), |
693 | dma_unmap_len(pkt, len[i]), |
694 | DMA_TO_DEVICE); |
695 | } else { |
696 | dma_unmap_page(tx->dev, |
697 | dma_unmap_addr(pkt, dma[i]), |
698 | dma_unmap_len(pkt, len[i]), |
699 | DMA_TO_DEVICE); |
700 | } |
701 | } |
702 | pkt->num_bufs = 0; |
703 | return -1; |
704 | } |
705 | |
706 | /* Tx buffer i corresponds to |
707 | * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO |
708 | * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO |
709 | */ |
710 | static void gve_tx_buf_get_addr(struct gve_tx_ring *tx, |
711 | s16 index, |
712 | void **va, dma_addr_t *dma_addr) |
713 | { |
714 | int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); |
715 | int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO; |
716 | |
717 | *va = page_address(tx->dqo.qpl->pages[page_id]) + offset; |
718 | *dma_addr = tx->dqo.qpl->page_buses[page_id] + offset; |
719 | } |
720 | |
721 | static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, |
722 | struct sk_buff *skb, |
723 | struct gve_tx_pending_packet_dqo *pkt, |
724 | s16 completion_tag, |
725 | u32 *desc_idx, |
726 | bool is_gso) |
727 | { |
728 | u32 copy_offset = 0; |
729 | dma_addr_t dma_addr; |
730 | u32 copy_len; |
731 | s16 index; |
732 | void *va; |
733 | |
734 | /* Break the packet into buffer size chunks */ |
735 | pkt->num_bufs = 0; |
736 | while (copy_offset < skb->len) { |
737 | index = gve_alloc_tx_qpl_buf(tx); |
738 | if (unlikely(index == -1)) |
739 | goto err; |
740 | |
741 | gve_tx_buf_get_addr(tx, index, va: &va, dma_addr: &dma_addr); |
742 | copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO, |
743 | skb->len - copy_offset); |
744 | skb_copy_bits(skb, offset: copy_offset, to: va, len: copy_len); |
745 | |
746 | copy_offset += copy_len; |
747 | dma_sync_single_for_device(dev: tx->dev, addr: dma_addr, |
748 | size: copy_len, dir: DMA_TO_DEVICE); |
749 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, |
750 | len: copy_len, |
751 | addr: dma_addr, |
752 | compl_tag: completion_tag, |
753 | eop: copy_offset == skb->len, |
754 | is_gso); |
755 | |
756 | pkt->tx_qpl_buf_ids[pkt->num_bufs] = index; |
757 | ++tx->dqo_tx.alloc_tx_qpl_buf_cnt; |
758 | ++pkt->num_bufs; |
759 | } |
760 | |
761 | return 0; |
762 | err: |
763 | /* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */ |
764 | gve_free_tx_qpl_bufs(tx, pkt); |
765 | return -ENOMEM; |
766 | } |
767 | |
768 | /* Returns 0 on success, or < 0 on error. |
769 | * |
770 | * Before this function is called, the caller must ensure |
771 | * gve_has_pending_packet(tx) returns true. |
772 | */ |
773 | static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, |
774 | struct sk_buff *skb) |
775 | { |
776 | const bool is_gso = skb_is_gso(skb); |
777 | u32 desc_idx = tx->dqo_tx.tail; |
778 | struct gve_tx_pending_packet_dqo *pkt; |
779 | struct gve_tx_metadata_dqo metadata; |
780 | s16 completion_tag; |
781 | |
782 | pkt = gve_alloc_pending_packet(tx); |
783 | pkt->skb = skb; |
784 | completion_tag = pkt - tx->dqo.pending_packets; |
785 | |
786 | gve_extract_tx_metadata_dqo(skb, metadata: &metadata); |
787 | if (is_gso) { |
788 | int = gve_prep_tso(skb); |
789 | |
790 | if (unlikely(header_len < 0)) |
791 | goto err; |
792 | |
793 | gve_tx_fill_tso_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].tso_ctx, |
794 | skb, metadata: &metadata, header_len); |
795 | desc_idx = (desc_idx + 1) & tx->mask; |
796 | } |
797 | |
798 | gve_tx_fill_general_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].general_ctx, |
799 | metadata: &metadata); |
800 | desc_idx = (desc_idx + 1) & tx->mask; |
801 | |
802 | if (tx->dqo.qpl) { |
803 | if (gve_tx_add_skb_copy_dqo(tx, skb, pkt, |
804 | completion_tag, |
805 | desc_idx: &desc_idx, is_gso)) |
806 | goto err; |
807 | } else { |
808 | if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt, |
809 | completion_tag, |
810 | desc_idx: &desc_idx, is_gso)) |
811 | goto err; |
812 | } |
813 | |
814 | tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; |
815 | |
816 | /* Commit the changes to our state */ |
817 | tx->dqo_tx.tail = desc_idx; |
818 | |
819 | /* Request a descriptor completion on the last descriptor of the |
820 | * packet if we are allowed to by the HW enforced interval. |
821 | */ |
822 | { |
823 | u32 last_desc_idx = (desc_idx - 1) & tx->mask; |
824 | u32 last_report_event_interval = |
825 | (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; |
826 | |
827 | if (unlikely(last_report_event_interval >= |
828 | GVE_TX_MIN_RE_INTERVAL)) { |
829 | tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; |
830 | tx->dqo_tx.last_re_idx = last_desc_idx; |
831 | } |
832 | } |
833 | |
834 | return 0; |
835 | |
836 | err: |
837 | pkt->skb = NULL; |
838 | gve_free_pending_packet(tx, pending_packet: pkt); |
839 | |
840 | return -1; |
841 | } |
842 | |
843 | static int gve_num_descs_per_buf(size_t size) |
844 | { |
845 | return DIV_ROUND_UP(size, GVE_TX_MAX_BUF_SIZE_DQO); |
846 | } |
847 | |
848 | static int gve_num_buffer_descs_needed(const struct sk_buff *skb) |
849 | { |
850 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
851 | int num_descs; |
852 | int i; |
853 | |
854 | num_descs = gve_num_descs_per_buf(size: skb_headlen(skb)); |
855 | |
856 | for (i = 0; i < shinfo->nr_frags; i++) { |
857 | unsigned int frag_size = skb_frag_size(frag: &shinfo->frags[i]); |
858 | |
859 | num_descs += gve_num_descs_per_buf(size: frag_size); |
860 | } |
861 | |
862 | return num_descs; |
863 | } |
864 | |
865 | /* Returns true if HW is capable of sending TSO represented by `skb`. |
866 | * |
867 | * Each segment must not span more than GVE_TX_MAX_DATA_DESCS buffers. |
868 | * - The header is counted as one buffer for every single segment. |
869 | * - A buffer which is split between two segments is counted for both. |
870 | * - If a buffer contains both header and payload, it is counted as two buffers. |
871 | */ |
872 | static bool gve_can_send_tso(const struct sk_buff *skb) |
873 | { |
874 | const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; |
875 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
876 | const int = skb_tcp_all_headers(skb); |
877 | const int gso_size = shinfo->gso_size; |
878 | int cur_seg_num_bufs; |
879 | int cur_seg_size; |
880 | int i; |
881 | |
882 | cur_seg_size = skb_headlen(skb) - header_len; |
883 | cur_seg_num_bufs = cur_seg_size > 0; |
884 | |
885 | for (i = 0; i < shinfo->nr_frags; i++) { |
886 | if (cur_seg_size >= gso_size) { |
887 | cur_seg_size %= gso_size; |
888 | cur_seg_num_bufs = cur_seg_size > 0; |
889 | } |
890 | |
891 | if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg)) |
892 | return false; |
893 | |
894 | cur_seg_size += skb_frag_size(frag: &shinfo->frags[i]); |
895 | } |
896 | |
897 | return true; |
898 | } |
899 | |
900 | netdev_features_t gve_features_check_dqo(struct sk_buff *skb, |
901 | struct net_device *dev, |
902 | netdev_features_t features) |
903 | { |
904 | if (skb_is_gso(skb) && !gve_can_send_tso(skb)) |
905 | return features & ~NETIF_F_GSO_MASK; |
906 | |
907 | return features; |
908 | } |
909 | |
910 | /* Attempt to transmit specified SKB. |
911 | * |
912 | * Returns 0 if the SKB was transmitted or dropped. |
913 | * Returns -1 if there is not currently enough space to transmit the SKB. |
914 | */ |
915 | static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, |
916 | struct sk_buff *skb) |
917 | { |
918 | int num_buffer_descs; |
919 | int total_num_descs; |
920 | |
921 | if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb))) |
922 | goto drop; |
923 | |
924 | if (tx->dqo.qpl) { |
925 | /* We do not need to verify the number of buffers used per |
926 | * packet or per segment in case of TSO as with 2K size buffers |
927 | * none of the TX packet rules would be violated. |
928 | * |
929 | * gve_can_send_tso() checks that each TCP segment of gso_size is |
930 | * not distributed over more than 9 SKB frags.. |
931 | */ |
932 | num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO); |
933 | } else { |
934 | num_buffer_descs = gve_num_buffer_descs_needed(skb); |
935 | if (!skb_is_gso(skb)) { |
936 | if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { |
937 | if (unlikely(skb_linearize(skb) < 0)) |
938 | goto drop; |
939 | |
940 | num_buffer_descs = 1; |
941 | } |
942 | } |
943 | } |
944 | |
945 | /* Metadata + (optional TSO) + data descriptors. */ |
946 | total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; |
947 | if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs + |
948 | GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP, |
949 | num_buffer_descs))) { |
950 | return -1; |
951 | } |
952 | |
953 | if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0)) |
954 | goto drop; |
955 | |
956 | netdev_tx_sent_queue(dev_queue: tx->netdev_txq, bytes: skb->len); |
957 | skb_tx_timestamp(skb); |
958 | return 0; |
959 | |
960 | drop: |
961 | tx->dropped_pkt++; |
962 | dev_kfree_skb_any(skb); |
963 | return 0; |
964 | } |
965 | |
966 | /* Transmit a given skb and ring the doorbell. */ |
967 | netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) |
968 | { |
969 | struct gve_priv *priv = netdev_priv(dev); |
970 | struct gve_tx_ring *tx; |
971 | |
972 | tx = &priv->tx[skb_get_queue_mapping(skb)]; |
973 | if (unlikely(gve_try_tx_skb(priv, tx, skb) < 0)) { |
974 | /* We need to ring the txq doorbell -- we have stopped the Tx |
975 | * queue for want of resources, but prior calls to gve_tx() |
976 | * may have added descriptors without ringing the doorbell. |
977 | */ |
978 | gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail); |
979 | return NETDEV_TX_BUSY; |
980 | } |
981 | |
982 | if (!netif_xmit_stopped(dev_queue: tx->netdev_txq) && netdev_xmit_more()) |
983 | return NETDEV_TX_OK; |
984 | |
985 | gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail); |
986 | return NETDEV_TX_OK; |
987 | } |
988 | |
989 | static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, |
990 | struct gve_tx_pending_packet_dqo *pending_packet) |
991 | { |
992 | s16 old_tail, index; |
993 | |
994 | index = pending_packet - tx->dqo.pending_packets; |
995 | old_tail = list->tail; |
996 | list->tail = index; |
997 | if (old_tail == -1) |
998 | list->head = index; |
999 | else |
1000 | tx->dqo.pending_packets[old_tail].next = index; |
1001 | |
1002 | pending_packet->next = -1; |
1003 | pending_packet->prev = old_tail; |
1004 | } |
1005 | |
1006 | static void remove_from_list(struct gve_tx_ring *tx, |
1007 | struct gve_index_list *list, |
1008 | struct gve_tx_pending_packet_dqo *pkt) |
1009 | { |
1010 | s16 prev_index, next_index; |
1011 | |
1012 | prev_index = pkt->prev; |
1013 | next_index = pkt->next; |
1014 | |
1015 | if (prev_index == -1) { |
1016 | /* Node is head */ |
1017 | list->head = next_index; |
1018 | } else { |
1019 | tx->dqo.pending_packets[prev_index].next = next_index; |
1020 | } |
1021 | if (next_index == -1) { |
1022 | /* Node is tail */ |
1023 | list->tail = prev_index; |
1024 | } else { |
1025 | tx->dqo.pending_packets[next_index].prev = prev_index; |
1026 | } |
1027 | } |
1028 | |
1029 | static void gve_unmap_packet(struct device *dev, |
1030 | struct gve_tx_pending_packet_dqo *pkt) |
1031 | { |
1032 | int i; |
1033 | |
1034 | /* SKB linear portion is guaranteed to be mapped */ |
1035 | dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), |
1036 | dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); |
1037 | for (i = 1; i < pkt->num_bufs; i++) { |
1038 | dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]), |
1039 | dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE); |
1040 | } |
1041 | pkt->num_bufs = 0; |
1042 | } |
1043 | |
1044 | /* Completion types and expected behavior: |
1045 | * No Miss compl + Packet compl = Packet completed normally. |
1046 | * Miss compl + Re-inject compl = Packet completed normally. |
1047 | * No Miss compl + Re-inject compl = Skipped i.e. packet not completed. |
1048 | * Miss compl + Packet compl = Skipped i.e. packet not completed. |
1049 | */ |
1050 | static void gve_handle_packet_completion(struct gve_priv *priv, |
1051 | struct gve_tx_ring *tx, bool is_napi, |
1052 | u16 compl_tag, u64 *bytes, u64 *pkts, |
1053 | bool is_reinjection) |
1054 | { |
1055 | struct gve_tx_pending_packet_dqo *pending_packet; |
1056 | |
1057 | if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { |
1058 | net_err_ratelimited("%s: Invalid TX completion tag: %d\n" , |
1059 | priv->dev->name, (int)compl_tag); |
1060 | return; |
1061 | } |
1062 | |
1063 | pending_packet = &tx->dqo.pending_packets[compl_tag]; |
1064 | |
1065 | if (unlikely(is_reinjection)) { |
1066 | if (unlikely(pending_packet->state == |
1067 | GVE_PACKET_STATE_TIMED_OUT_COMPL)) { |
1068 | net_err_ratelimited("%s: Re-injection completion: %d received after timeout.\n" , |
1069 | priv->dev->name, (int)compl_tag); |
1070 | /* Packet was already completed as a result of timeout, |
1071 | * so just remove from list and free pending packet. |
1072 | */ |
1073 | remove_from_list(tx, |
1074 | list: &tx->dqo_compl.timed_out_completions, |
1075 | pkt: pending_packet); |
1076 | gve_free_pending_packet(tx, pending_packet); |
1077 | return; |
1078 | } |
1079 | if (unlikely(pending_packet->state != |
1080 | GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) { |
1081 | /* No outstanding miss completion but packet allocated |
1082 | * implies packet receives a re-injection completion |
1083 | * without a prior miss completion. Return without |
1084 | * completing the packet. |
1085 | */ |
1086 | net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n" , |
1087 | priv->dev->name, (int)compl_tag); |
1088 | return; |
1089 | } |
1090 | remove_from_list(tx, list: &tx->dqo_compl.miss_completions, |
1091 | pkt: pending_packet); |
1092 | } else { |
1093 | /* Packet is allocated but not a pending data completion. */ |
1094 | if (unlikely(pending_packet->state != |
1095 | GVE_PACKET_STATE_PENDING_DATA_COMPL)) { |
1096 | net_err_ratelimited("%s: No pending data completion: %d\n" , |
1097 | priv->dev->name, (int)compl_tag); |
1098 | return; |
1099 | } |
1100 | } |
1101 | tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; |
1102 | if (tx->dqo.qpl) |
1103 | gve_free_tx_qpl_bufs(tx, pkt: pending_packet); |
1104 | else |
1105 | gve_unmap_packet(dev: tx->dev, pkt: pending_packet); |
1106 | |
1107 | *bytes += pending_packet->skb->len; |
1108 | (*pkts)++; |
1109 | napi_consume_skb(skb: pending_packet->skb, budget: is_napi); |
1110 | pending_packet->skb = NULL; |
1111 | gve_free_pending_packet(tx, pending_packet); |
1112 | } |
1113 | |
1114 | static void gve_handle_miss_completion(struct gve_priv *priv, |
1115 | struct gve_tx_ring *tx, u16 compl_tag, |
1116 | u64 *bytes, u64 *pkts) |
1117 | { |
1118 | struct gve_tx_pending_packet_dqo *pending_packet; |
1119 | |
1120 | if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { |
1121 | net_err_ratelimited("%s: Invalid TX completion tag: %d\n" , |
1122 | priv->dev->name, (int)compl_tag); |
1123 | return; |
1124 | } |
1125 | |
1126 | pending_packet = &tx->dqo.pending_packets[compl_tag]; |
1127 | if (unlikely(pending_packet->state != |
1128 | GVE_PACKET_STATE_PENDING_DATA_COMPL)) { |
1129 | net_err_ratelimited("%s: Unexpected packet state: %d for completion tag : %d\n" , |
1130 | priv->dev->name, (int)pending_packet->state, |
1131 | (int)compl_tag); |
1132 | return; |
1133 | } |
1134 | |
1135 | pending_packet->state = GVE_PACKET_STATE_PENDING_REINJECT_COMPL; |
1136 | /* jiffies can wraparound but time comparisons can handle overflows. */ |
1137 | pending_packet->timeout_jiffies = |
1138 | jiffies + |
1139 | msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT * |
1140 | MSEC_PER_SEC); |
1141 | add_to_list(tx, list: &tx->dqo_compl.miss_completions, pending_packet); |
1142 | |
1143 | *bytes += pending_packet->skb->len; |
1144 | (*pkts)++; |
1145 | } |
1146 | |
1147 | static void remove_miss_completions(struct gve_priv *priv, |
1148 | struct gve_tx_ring *tx) |
1149 | { |
1150 | struct gve_tx_pending_packet_dqo *pending_packet; |
1151 | s16 next_index; |
1152 | |
1153 | next_index = tx->dqo_compl.miss_completions.head; |
1154 | while (next_index != -1) { |
1155 | pending_packet = &tx->dqo.pending_packets[next_index]; |
1156 | next_index = pending_packet->next; |
1157 | /* Break early because packets should timeout in order. */ |
1158 | if (time_is_after_jiffies(pending_packet->timeout_jiffies)) |
1159 | break; |
1160 | |
1161 | remove_from_list(tx, list: &tx->dqo_compl.miss_completions, |
1162 | pkt: pending_packet); |
1163 | /* Unmap/free TX buffers and free skb but do not unallocate packet i.e. |
1164 | * the completion tag is not freed to ensure that the driver |
1165 | * can take appropriate action if a corresponding valid |
1166 | * completion is received later. |
1167 | */ |
1168 | if (tx->dqo.qpl) |
1169 | gve_free_tx_qpl_bufs(tx, pkt: pending_packet); |
1170 | else |
1171 | gve_unmap_packet(dev: tx->dev, pkt: pending_packet); |
1172 | |
1173 | /* This indicates the packet was dropped. */ |
1174 | dev_kfree_skb_any(skb: pending_packet->skb); |
1175 | pending_packet->skb = NULL; |
1176 | tx->dropped_pkt++; |
1177 | net_err_ratelimited("%s: No reinjection completion was received for: %d.\n" , |
1178 | priv->dev->name, |
1179 | (int)(pending_packet - tx->dqo.pending_packets)); |
1180 | |
1181 | pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; |
1182 | pending_packet->timeout_jiffies = |
1183 | jiffies + |
1184 | msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT * |
1185 | MSEC_PER_SEC); |
1186 | /* Maintain pending packet in another list so the packet can be |
1187 | * unallocated at a later time. |
1188 | */ |
1189 | add_to_list(tx, list: &tx->dqo_compl.timed_out_completions, |
1190 | pending_packet); |
1191 | } |
1192 | } |
1193 | |
1194 | static void remove_timed_out_completions(struct gve_priv *priv, |
1195 | struct gve_tx_ring *tx) |
1196 | { |
1197 | struct gve_tx_pending_packet_dqo *pending_packet; |
1198 | s16 next_index; |
1199 | |
1200 | next_index = tx->dqo_compl.timed_out_completions.head; |
1201 | while (next_index != -1) { |
1202 | pending_packet = &tx->dqo.pending_packets[next_index]; |
1203 | next_index = pending_packet->next; |
1204 | /* Break early because packets should timeout in order. */ |
1205 | if (time_is_after_jiffies(pending_packet->timeout_jiffies)) |
1206 | break; |
1207 | |
1208 | remove_from_list(tx, list: &tx->dqo_compl.timed_out_completions, |
1209 | pkt: pending_packet); |
1210 | gve_free_pending_packet(tx, pending_packet); |
1211 | } |
1212 | } |
1213 | |
1214 | int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, |
1215 | struct napi_struct *napi) |
1216 | { |
1217 | u64 reinject_compl_bytes = 0; |
1218 | u64 reinject_compl_pkts = 0; |
1219 | int num_descs_cleaned = 0; |
1220 | u64 miss_compl_bytes = 0; |
1221 | u64 miss_compl_pkts = 0; |
1222 | u64 pkt_compl_bytes = 0; |
1223 | u64 pkt_compl_pkts = 0; |
1224 | |
1225 | /* Limit in order to avoid blocking for too long */ |
1226 | while (!napi || pkt_compl_pkts < napi->weight) { |
1227 | struct gve_tx_compl_desc *compl_desc = |
1228 | &tx->dqo.compl_ring[tx->dqo_compl.head]; |
1229 | u16 type; |
1230 | |
1231 | if (compl_desc->generation == tx->dqo_compl.cur_gen_bit) |
1232 | break; |
1233 | |
1234 | /* Prefetch the next descriptor. */ |
1235 | prefetch(&tx->dqo.compl_ring[(tx->dqo_compl.head + 1) & |
1236 | tx->dqo.complq_mask]); |
1237 | |
1238 | /* Do not read data until we own the descriptor */ |
1239 | dma_rmb(); |
1240 | type = compl_desc->type; |
1241 | |
1242 | if (type == GVE_COMPL_TYPE_DQO_DESC) { |
1243 | /* This is the last descriptor fetched by HW plus one */ |
1244 | u16 tx_head = le16_to_cpu(compl_desc->tx_head); |
1245 | |
1246 | atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: tx_head); |
1247 | } else if (type == GVE_COMPL_TYPE_DQO_PKT) { |
1248 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1249 | if (compl_tag & GVE_ALT_MISS_COMPL_BIT) { |
1250 | compl_tag &= ~GVE_ALT_MISS_COMPL_BIT; |
1251 | gve_handle_miss_completion(priv, tx, compl_tag, |
1252 | bytes: &miss_compl_bytes, |
1253 | pkts: &miss_compl_pkts); |
1254 | } else { |
1255 | gve_handle_packet_completion(priv, tx, is_napi: !!napi, |
1256 | compl_tag, |
1257 | bytes: &pkt_compl_bytes, |
1258 | pkts: &pkt_compl_pkts, |
1259 | is_reinjection: false); |
1260 | } |
1261 | } else if (type == GVE_COMPL_TYPE_DQO_MISS) { |
1262 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1263 | |
1264 | gve_handle_miss_completion(priv, tx, compl_tag, |
1265 | bytes: &miss_compl_bytes, |
1266 | pkts: &miss_compl_pkts); |
1267 | } else if (type == GVE_COMPL_TYPE_DQO_REINJECTION) { |
1268 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1269 | |
1270 | gve_handle_packet_completion(priv, tx, is_napi: !!napi, |
1271 | compl_tag, |
1272 | bytes: &reinject_compl_bytes, |
1273 | pkts: &reinject_compl_pkts, |
1274 | is_reinjection: true); |
1275 | } |
1276 | |
1277 | tx->dqo_compl.head = |
1278 | (tx->dqo_compl.head + 1) & tx->dqo.complq_mask; |
1279 | /* Flip the generation bit when we wrap around */ |
1280 | tx->dqo_compl.cur_gen_bit ^= tx->dqo_compl.head == 0; |
1281 | num_descs_cleaned++; |
1282 | } |
1283 | |
1284 | netdev_tx_completed_queue(dev_queue: tx->netdev_txq, |
1285 | pkts: pkt_compl_pkts + miss_compl_pkts, |
1286 | bytes: pkt_compl_bytes + miss_compl_bytes); |
1287 | |
1288 | remove_miss_completions(priv, tx); |
1289 | remove_timed_out_completions(priv, tx); |
1290 | |
1291 | u64_stats_update_begin(syncp: &tx->statss); |
1292 | tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; |
1293 | tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; |
1294 | u64_stats_update_end(syncp: &tx->statss); |
1295 | return num_descs_cleaned; |
1296 | } |
1297 | |
1298 | bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) |
1299 | { |
1300 | struct gve_tx_compl_desc *compl_desc; |
1301 | struct gve_tx_ring *tx = block->tx; |
1302 | struct gve_priv *priv = block->priv; |
1303 | |
1304 | if (do_clean) { |
1305 | int num_descs_cleaned = gve_clean_tx_done_dqo(priv, tx, |
1306 | napi: &block->napi); |
1307 | |
1308 | /* Sync with queue being stopped in `gve_maybe_stop_tx_dqo()` */ |
1309 | mb(); |
1310 | |
1311 | if (netif_tx_queue_stopped(dev_queue: tx->netdev_txq) && |
1312 | num_descs_cleaned > 0) { |
1313 | tx->wake_queue++; |
1314 | netif_tx_wake_queue(dev_queue: tx->netdev_txq); |
1315 | } |
1316 | } |
1317 | |
1318 | /* Return true if we still have work. */ |
1319 | compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; |
1320 | return compl_desc->generation != tx->dqo_compl.cur_gen_bit; |
1321 | } |
1322 | |