1 | // SPDX-License-Identifier: (GPL-2.0 OR MIT) |
---|---|
2 | /* Google virtual Ethernet (gve) driver |
3 | * |
4 | * Copyright (C) 2015-2021 Google, Inc. |
5 | */ |
6 | |
7 | #include "gve.h" |
8 | #include "gve_adminq.h" |
9 | #include "gve_utils.h" |
10 | #include "gve_dqo.h" |
11 | #include <net/ip.h> |
12 | #include <linux/tcp.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/skbuff.h> |
15 | |
16 | /* Returns true if tx_bufs are available. */ |
17 | static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) |
18 | { |
19 | int num_avail; |
20 | |
21 | if (!tx->dqo.qpl) |
22 | return true; |
23 | |
24 | num_avail = tx->dqo.num_tx_qpl_bufs - |
25 | (tx->dqo_tx.alloc_tx_qpl_buf_cnt - |
26 | tx->dqo_tx.free_tx_qpl_buf_cnt); |
27 | |
28 | if (count <= num_avail) |
29 | return true; |
30 | |
31 | /* Update cached value from dqo_compl. */ |
32 | tx->dqo_tx.free_tx_qpl_buf_cnt = |
33 | atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_cnt); |
34 | |
35 | num_avail = tx->dqo.num_tx_qpl_bufs - |
36 | (tx->dqo_tx.alloc_tx_qpl_buf_cnt - |
37 | tx->dqo_tx.free_tx_qpl_buf_cnt); |
38 | |
39 | return count <= num_avail; |
40 | } |
41 | |
42 | static s16 |
43 | gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx) |
44 | { |
45 | s16 index; |
46 | |
47 | index = tx->dqo_tx.free_tx_qpl_buf_head; |
48 | |
49 | /* No TX buffers available, try to steal the list from the |
50 | * completion handler. |
51 | */ |
52 | if (unlikely(index == -1)) { |
53 | tx->dqo_tx.free_tx_qpl_buf_head = |
54 | atomic_xchg(v: &tx->dqo_compl.free_tx_qpl_buf_head, new: -1); |
55 | index = tx->dqo_tx.free_tx_qpl_buf_head; |
56 | |
57 | if (unlikely(index == -1)) |
58 | return index; |
59 | } |
60 | |
61 | /* Remove TX buf from free list */ |
62 | tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index]; |
63 | |
64 | return index; |
65 | } |
66 | |
67 | static void |
68 | gve_free_tx_qpl_bufs(struct gve_tx_ring *tx, |
69 | struct gve_tx_pending_packet_dqo *pkt) |
70 | { |
71 | s16 index; |
72 | int i; |
73 | |
74 | if (!pkt->num_bufs) |
75 | return; |
76 | |
77 | index = pkt->tx_qpl_buf_ids[0]; |
78 | /* Create a linked list of buffers to be added to the free list */ |
79 | for (i = 1; i < pkt->num_bufs; i++) { |
80 | tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i]; |
81 | index = pkt->tx_qpl_buf_ids[i]; |
82 | } |
83 | |
84 | while (true) { |
85 | s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_head); |
86 | |
87 | tx->dqo.tx_qpl_buf_next[index] = old_head; |
88 | if (atomic_cmpxchg(v: &tx->dqo_compl.free_tx_qpl_buf_head, |
89 | old: old_head, |
90 | new: pkt->tx_qpl_buf_ids[0]) == old_head) { |
91 | break; |
92 | } |
93 | } |
94 | |
95 | atomic_add(i: pkt->num_bufs, v: &tx->dqo_compl.free_tx_qpl_buf_cnt); |
96 | pkt->num_bufs = 0; |
97 | } |
98 | |
99 | /* Returns true if a gve_tx_pending_packet_dqo object is available. */ |
100 | static bool gve_has_pending_packet(struct gve_tx_ring *tx) |
101 | { |
102 | /* Check TX path's list. */ |
103 | if (tx->dqo_tx.free_pending_packets != -1) |
104 | return true; |
105 | |
106 | /* Check completion handler's list. */ |
107 | if (atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets) != -1) |
108 | return true; |
109 | |
110 | return false; |
111 | } |
112 | |
113 | static struct gve_tx_pending_packet_dqo * |
114 | gve_alloc_pending_packet(struct gve_tx_ring *tx) |
115 | { |
116 | struct gve_tx_pending_packet_dqo *pending_packet; |
117 | s16 index; |
118 | |
119 | index = tx->dqo_tx.free_pending_packets; |
120 | |
121 | /* No pending_packets available, try to steal the list from the |
122 | * completion handler. |
123 | */ |
124 | if (unlikely(index == -1)) { |
125 | tx->dqo_tx.free_pending_packets = |
126 | atomic_xchg(v: &tx->dqo_compl.free_pending_packets, new: -1); |
127 | index = tx->dqo_tx.free_pending_packets; |
128 | |
129 | if (unlikely(index == -1)) |
130 | return NULL; |
131 | } |
132 | |
133 | pending_packet = &tx->dqo.pending_packets[index]; |
134 | |
135 | /* Remove pending_packet from free list */ |
136 | tx->dqo_tx.free_pending_packets = pending_packet->next; |
137 | pending_packet->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; |
138 | |
139 | return pending_packet; |
140 | } |
141 | |
142 | static void |
143 | gve_free_pending_packet(struct gve_tx_ring *tx, |
144 | struct gve_tx_pending_packet_dqo *pending_packet) |
145 | { |
146 | s16 index = pending_packet - tx->dqo.pending_packets; |
147 | |
148 | pending_packet->state = GVE_PACKET_STATE_UNALLOCATED; |
149 | while (true) { |
150 | s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets); |
151 | |
152 | pending_packet->next = old_head; |
153 | if (atomic_cmpxchg(v: &tx->dqo_compl.free_pending_packets, |
154 | old: old_head, new: index) == old_head) { |
155 | break; |
156 | } |
157 | } |
158 | } |
159 | |
160 | /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. |
161 | */ |
162 | static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) |
163 | { |
164 | int i; |
165 | |
166 | for (i = 0; i < tx->dqo.num_pending_packets; i++) { |
167 | struct gve_tx_pending_packet_dqo *cur_state = |
168 | &tx->dqo.pending_packets[i]; |
169 | int j; |
170 | |
171 | for (j = 0; j < cur_state->num_bufs; j++) { |
172 | if (j == 0) { |
173 | dma_unmap_single(tx->dev, |
174 | dma_unmap_addr(cur_state, dma[j]), |
175 | dma_unmap_len(cur_state, len[j]), |
176 | DMA_TO_DEVICE); |
177 | } else { |
178 | dma_unmap_page(tx->dev, |
179 | dma_unmap_addr(cur_state, dma[j]), |
180 | dma_unmap_len(cur_state, len[j]), |
181 | DMA_TO_DEVICE); |
182 | } |
183 | } |
184 | if (cur_state->skb) { |
185 | dev_consume_skb_any(skb: cur_state->skb); |
186 | cur_state->skb = NULL; |
187 | } |
188 | } |
189 | } |
190 | |
191 | void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) |
192 | { |
193 | int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx); |
194 | struct gve_tx_ring *tx = &priv->tx[idx]; |
195 | |
196 | if (!gve_tx_was_added_to_block(priv, queue_idx: idx)) |
197 | return; |
198 | |
199 | gve_remove_napi(priv, ntfy_idx); |
200 | gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); |
201 | netdev_tx_reset_queue(q: tx->netdev_txq); |
202 | gve_tx_clean_pending_packets(tx); |
203 | gve_tx_remove_from_block(priv, queue_idx: idx); |
204 | } |
205 | |
206 | static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, |
207 | struct gve_tx_alloc_rings_cfg *cfg) |
208 | { |
209 | struct device *hdev = &priv->pdev->dev; |
210 | int idx = tx->q_num; |
211 | size_t bytes; |
212 | u32 qpl_id; |
213 | |
214 | if (tx->q_resources) { |
215 | dma_free_coherent(dev: hdev, size: sizeof(*tx->q_resources), |
216 | cpu_addr: tx->q_resources, dma_handle: tx->q_resources_bus); |
217 | tx->q_resources = NULL; |
218 | } |
219 | |
220 | if (tx->dqo.compl_ring) { |
221 | bytes = sizeof(tx->dqo.compl_ring[0]) * |
222 | (tx->dqo.complq_mask + 1); |
223 | dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.compl_ring, |
224 | dma_handle: tx->complq_bus_dqo); |
225 | tx->dqo.compl_ring = NULL; |
226 | } |
227 | |
228 | if (tx->dqo.tx_ring) { |
229 | bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); |
230 | dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.tx_ring, dma_handle: tx->bus); |
231 | tx->dqo.tx_ring = NULL; |
232 | } |
233 | |
234 | kvfree(addr: tx->dqo.pending_packets); |
235 | tx->dqo.pending_packets = NULL; |
236 | |
237 | kvfree(addr: tx->dqo.tx_qpl_buf_next); |
238 | tx->dqo.tx_qpl_buf_next = NULL; |
239 | |
240 | if (tx->dqo.qpl) { |
241 | qpl_id = gve_tx_qpl_id(priv, tx_qid: tx->q_num); |
242 | gve_free_queue_page_list(priv, qpl: tx->dqo.qpl, id: qpl_id); |
243 | tx->dqo.qpl = NULL; |
244 | } |
245 | |
246 | netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); |
247 | } |
248 | |
249 | static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) |
250 | { |
251 | int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO * |
252 | tx->dqo.qpl->num_entries; |
253 | int i; |
254 | |
255 | tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs, |
256 | sizeof(tx->dqo.tx_qpl_buf_next[0]), |
257 | GFP_KERNEL); |
258 | if (!tx->dqo.tx_qpl_buf_next) |
259 | return -ENOMEM; |
260 | |
261 | tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs; |
262 | |
263 | /* Generate free TX buf list */ |
264 | for (i = 0; i < num_tx_qpl_bufs - 1; i++) |
265 | tx->dqo.tx_qpl_buf_next[i] = i + 1; |
266 | tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1; |
267 | |
268 | atomic_set_release(v: &tx->dqo_compl.free_tx_qpl_buf_head, i: -1); |
269 | return 0; |
270 | } |
271 | |
272 | void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) |
273 | { |
274 | int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx); |
275 | struct gve_tx_ring *tx = &priv->tx[idx]; |
276 | |
277 | gve_tx_add_to_block(priv, queue_idx: idx); |
278 | |
279 | tx->netdev_txq = netdev_get_tx_queue(dev: priv->dev, index: idx); |
280 | gve_add_napi(priv, ntfy_idx, gve_poll: gve_napi_poll_dqo); |
281 | } |
282 | |
283 | static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, |
284 | struct gve_tx_alloc_rings_cfg *cfg, |
285 | struct gve_tx_ring *tx, |
286 | int idx) |
287 | { |
288 | struct device *hdev = &priv->pdev->dev; |
289 | int num_pending_packets; |
290 | int qpl_page_cnt; |
291 | size_t bytes; |
292 | u32 qpl_id; |
293 | int i; |
294 | |
295 | memset(tx, 0, sizeof(*tx)); |
296 | tx->q_num = idx; |
297 | tx->dev = hdev; |
298 | atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: 0); |
299 | |
300 | /* Queue sizes must be a power of 2 */ |
301 | tx->mask = cfg->ring_size - 1; |
302 | tx->dqo.complq_mask = tx->mask; |
303 | |
304 | /* The max number of pending packets determines the maximum number of |
305 | * descriptors which maybe written to the completion queue. |
306 | * |
307 | * We must set the number small enough to make sure we never overrun the |
308 | * completion queue. |
309 | */ |
310 | num_pending_packets = tx->dqo.complq_mask + 1; |
311 | |
312 | /* Reserve space for descriptor completions, which will be reported at |
313 | * most every GVE_TX_MIN_RE_INTERVAL packets. |
314 | */ |
315 | num_pending_packets -= |
316 | (tx->dqo.complq_mask + 1) / GVE_TX_MIN_RE_INTERVAL; |
317 | |
318 | /* Each packet may have at most 2 buffer completions if it receives both |
319 | * a miss and reinjection completion. |
320 | */ |
321 | num_pending_packets /= 2; |
322 | |
323 | tx->dqo.num_pending_packets = min_t(int, num_pending_packets, S16_MAX); |
324 | tx->dqo.pending_packets = kvcalloc(tx->dqo.num_pending_packets, |
325 | sizeof(tx->dqo.pending_packets[0]), |
326 | GFP_KERNEL); |
327 | if (!tx->dqo.pending_packets) |
328 | goto err; |
329 | |
330 | /* Set up linked list of pending packets */ |
331 | for (i = 0; i < tx->dqo.num_pending_packets - 1; i++) |
332 | tx->dqo.pending_packets[i].next = i + 1; |
333 | |
334 | tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; |
335 | atomic_set_release(v: &tx->dqo_compl.free_pending_packets, i: -1); |
336 | tx->dqo_compl.miss_completions.head = -1; |
337 | tx->dqo_compl.miss_completions.tail = -1; |
338 | tx->dqo_compl.timed_out_completions.head = -1; |
339 | tx->dqo_compl.timed_out_completions.tail = -1; |
340 | |
341 | bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); |
342 | tx->dqo.tx_ring = dma_alloc_coherent(dev: hdev, size: bytes, dma_handle: &tx->bus, GFP_KERNEL); |
343 | if (!tx->dqo.tx_ring) |
344 | goto err; |
345 | |
346 | bytes = sizeof(tx->dqo.compl_ring[0]) * (tx->dqo.complq_mask + 1); |
347 | tx->dqo.compl_ring = dma_alloc_coherent(dev: hdev, size: bytes, |
348 | dma_handle: &tx->complq_bus_dqo, |
349 | GFP_KERNEL); |
350 | if (!tx->dqo.compl_ring) |
351 | goto err; |
352 | |
353 | tx->q_resources = dma_alloc_coherent(dev: hdev, size: sizeof(*tx->q_resources), |
354 | dma_handle: &tx->q_resources_bus, GFP_KERNEL); |
355 | if (!tx->q_resources) |
356 | goto err; |
357 | |
358 | if (!cfg->raw_addressing) { |
359 | qpl_id = gve_tx_qpl_id(priv, tx_qid: tx->q_num); |
360 | qpl_page_cnt = priv->tx_pages_per_qpl; |
361 | |
362 | tx->dqo.qpl = gve_alloc_queue_page_list(priv, id: qpl_id, |
363 | pages: qpl_page_cnt); |
364 | if (!tx->dqo.qpl) |
365 | goto err; |
366 | |
367 | if (gve_tx_qpl_buf_init(tx)) |
368 | goto err; |
369 | } |
370 | |
371 | return 0; |
372 | |
373 | err: |
374 | gve_tx_free_ring_dqo(priv, tx, cfg); |
375 | return -ENOMEM; |
376 | } |
377 | |
378 | int gve_tx_alloc_rings_dqo(struct gve_priv *priv, |
379 | struct gve_tx_alloc_rings_cfg *cfg) |
380 | { |
381 | struct gve_tx_ring *tx = cfg->tx; |
382 | int total_queues; |
383 | int err = 0; |
384 | int i, j; |
385 | |
386 | total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; |
387 | if (total_queues > cfg->qcfg->max_queues) { |
388 | netif_err(priv, drv, priv->dev, |
389 | "Cannot alloc more than the max num of Tx rings\n"); |
390 | return -EINVAL; |
391 | } |
392 | |
393 | tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), |
394 | GFP_KERNEL); |
395 | if (!tx) |
396 | return -ENOMEM; |
397 | |
398 | for (i = 0; i < total_queues; i++) { |
399 | err = gve_tx_alloc_ring_dqo(priv, cfg, tx: &tx[i], idx: i); |
400 | if (err) { |
401 | netif_err(priv, drv, priv->dev, |
402 | "Failed to alloc tx ring=%d: err=%d\n", |
403 | i, err); |
404 | goto err; |
405 | } |
406 | } |
407 | |
408 | cfg->tx = tx; |
409 | return 0; |
410 | |
411 | err: |
412 | for (j = 0; j < i; j++) |
413 | gve_tx_free_ring_dqo(priv, tx: &tx[j], cfg); |
414 | kvfree(addr: tx); |
415 | return err; |
416 | } |
417 | |
418 | void gve_tx_free_rings_dqo(struct gve_priv *priv, |
419 | struct gve_tx_alloc_rings_cfg *cfg) |
420 | { |
421 | struct gve_tx_ring *tx = cfg->tx; |
422 | int i; |
423 | |
424 | if (!tx) |
425 | return; |
426 | |
427 | for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) |
428 | gve_tx_free_ring_dqo(priv, tx: &tx[i], cfg); |
429 | |
430 | kvfree(addr: tx); |
431 | cfg->tx = NULL; |
432 | } |
433 | |
434 | /* Returns the number of slots available in the ring */ |
435 | static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) |
436 | { |
437 | u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask; |
438 | |
439 | return tx->mask - num_used; |
440 | } |
441 | |
442 | static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, |
443 | int desc_count, int buf_count) |
444 | { |
445 | return gve_has_pending_packet(tx) && |
446 | num_avail_tx_slots(tx) >= desc_count && |
447 | gve_has_free_tx_qpl_bufs(tx, count: buf_count); |
448 | } |
449 | |
450 | /* Stops the queue if available descriptors is less than 'count'. |
451 | * Return: 0 if stop is not required. |
452 | */ |
453 | static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, |
454 | int desc_count, int buf_count) |
455 | { |
456 | if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
457 | return 0; |
458 | |
459 | /* Update cached TX head pointer */ |
460 | tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head); |
461 | |
462 | if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
463 | return 0; |
464 | |
465 | /* No space, so stop the queue */ |
466 | tx->stop_queue++; |
467 | netif_tx_stop_queue(dev_queue: tx->netdev_txq); |
468 | |
469 | /* Sync with restarting queue in `gve_tx_poll_dqo()` */ |
470 | mb(); |
471 | |
472 | /* After stopping queue, check if we can transmit again in order to |
473 | * avoid TOCTOU bug. |
474 | */ |
475 | tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head); |
476 | |
477 | if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) |
478 | return -EBUSY; |
479 | |
480 | netif_tx_start_queue(dev_queue: tx->netdev_txq); |
481 | tx->wake_queue++; |
482 | return 0; |
483 | } |
484 | |
485 | static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb, |
486 | struct gve_tx_metadata_dqo *metadata) |
487 | { |
488 | memset(metadata, 0, sizeof(*metadata)); |
489 | metadata->version = GVE_TX_METADATA_VERSION_DQO; |
490 | |
491 | if (skb->l4_hash) { |
492 | u16 path_hash = skb->hash ^ (skb->hash >> 16); |
493 | |
494 | path_hash &= (1 << 15) - 1; |
495 | if (unlikely(path_hash == 0)) |
496 | path_hash = ~path_hash; |
497 | |
498 | metadata->path_hash = path_hash; |
499 | } |
500 | } |
501 | |
502 | static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, |
503 | struct sk_buff *skb, u32 len, u64 addr, |
504 | s16 compl_tag, bool eop, bool is_gso) |
505 | { |
506 | const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL; |
507 | |
508 | while (len > 0) { |
509 | struct gve_tx_pkt_desc_dqo *desc = |
510 | &tx->dqo.tx_ring[*desc_idx].pkt; |
511 | u32 cur_len = min_t(u32, len, GVE_TX_MAX_BUF_SIZE_DQO); |
512 | bool cur_eop = eop && cur_len == len; |
513 | |
514 | *desc = (struct gve_tx_pkt_desc_dqo){ |
515 | .buf_addr = cpu_to_le64(addr), |
516 | .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, |
517 | .end_of_packet = cur_eop, |
518 | .checksum_offload_enable = checksum_offload_en, |
519 | .compl_tag = cpu_to_le16(compl_tag), |
520 | .buf_size = cur_len, |
521 | }; |
522 | |
523 | addr += cur_len; |
524 | len -= cur_len; |
525 | *desc_idx = (*desc_idx + 1) & tx->mask; |
526 | } |
527 | } |
528 | |
529 | /* Validates and prepares `skb` for TSO. |
530 | * |
531 | * Returns header length, or < 0 if invalid. |
532 | */ |
533 | static int gve_prep_tso(struct sk_buff *skb) |
534 | { |
535 | struct tcphdr *tcp; |
536 | int header_len; |
537 | u32 paylen; |
538 | int err; |
539 | |
540 | /* Note: HW requires MSS (gso_size) to be <= 9728 and the total length |
541 | * of the TSO to be <= 262143. |
542 | * |
543 | * However, we don't validate these because: |
544 | * - Hypervisor enforces a limit of 9K MTU |
545 | * - Kernel will not produce a TSO larger than 64k |
546 | */ |
547 | |
548 | if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) |
549 | return -1; |
550 | |
551 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) |
552 | return -EINVAL; |
553 | |
554 | /* Needed because we will modify header. */ |
555 | err = skb_cow_head(skb, headroom: 0); |
556 | if (err < 0) |
557 | return err; |
558 | |
559 | tcp = tcp_hdr(skb); |
560 | paylen = skb->len - skb_transport_offset(skb); |
561 | csum_replace_by_diff(sum: &tcp->check, diff: (__force __wsum)htonl(paylen)); |
562 | header_len = skb_tcp_all_headers(skb); |
563 | |
564 | if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) |
565 | return -EINVAL; |
566 | |
567 | return header_len; |
568 | } |
569 | |
570 | static void gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, |
571 | const struct sk_buff *skb, |
572 | const struct gve_tx_metadata_dqo *metadata, |
573 | int header_len) |
574 | { |
575 | *desc = (struct gve_tx_tso_context_desc_dqo){ |
576 | .header_len = header_len, |
577 | .cmd_dtype = { |
578 | .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, |
579 | .tso = 1, |
580 | }, |
581 | .flex0 = metadata->bytes[0], |
582 | .flex5 = metadata->bytes[5], |
583 | .flex6 = metadata->bytes[6], |
584 | .flex7 = metadata->bytes[7], |
585 | .flex8 = metadata->bytes[8], |
586 | .flex9 = metadata->bytes[9], |
587 | .flex10 = metadata->bytes[10], |
588 | .flex11 = metadata->bytes[11], |
589 | }; |
590 | desc->tso_total_len = skb->len - header_len; |
591 | desc->mss = skb_shinfo(skb)->gso_size; |
592 | } |
593 | |
594 | static void |
595 | gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, |
596 | const struct gve_tx_metadata_dqo *metadata) |
597 | { |
598 | *desc = (struct gve_tx_general_context_desc_dqo){ |
599 | .flex0 = metadata->bytes[0], |
600 | .flex1 = metadata->bytes[1], |
601 | .flex2 = metadata->bytes[2], |
602 | .flex3 = metadata->bytes[3], |
603 | .flex4 = metadata->bytes[4], |
604 | .flex5 = metadata->bytes[5], |
605 | .flex6 = metadata->bytes[6], |
606 | .flex7 = metadata->bytes[7], |
607 | .flex8 = metadata->bytes[8], |
608 | .flex9 = metadata->bytes[9], |
609 | .flex10 = metadata->bytes[10], |
610 | .flex11 = metadata->bytes[11], |
611 | .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, |
612 | }; |
613 | } |
614 | |
615 | static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, |
616 | struct sk_buff *skb, |
617 | struct gve_tx_pending_packet_dqo *pkt, |
618 | s16 completion_tag, |
619 | u32 *desc_idx, |
620 | bool is_gso) |
621 | { |
622 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
623 | int i; |
624 | |
625 | /* Note: HW requires that the size of a non-TSO packet be within the |
626 | * range of [17, 9728]. |
627 | * |
628 | * We don't double check because |
629 | * - We limited `netdev->min_mtu` to ETH_MIN_MTU. |
630 | * - Hypervisor won't allow MTU larger than 9216. |
631 | */ |
632 | |
633 | pkt->num_bufs = 0; |
634 | /* Map the linear portion of skb */ |
635 | { |
636 | u32 len = skb_headlen(skb); |
637 | dma_addr_t addr; |
638 | |
639 | addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); |
640 | if (unlikely(dma_mapping_error(tx->dev, addr))) |
641 | goto err; |
642 | |
643 | dma_unmap_len_set(pkt, len[pkt->num_bufs], len); |
644 | dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); |
645 | ++pkt->num_bufs; |
646 | |
647 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, |
648 | compl_tag: completion_tag, |
649 | /*eop=*/shinfo->nr_frags == 0, is_gso); |
650 | } |
651 | |
652 | for (i = 0; i < shinfo->nr_frags; i++) { |
653 | const skb_frag_t *frag = &shinfo->frags[i]; |
654 | bool is_eop = i == (shinfo->nr_frags - 1); |
655 | u32 len = skb_frag_size(frag); |
656 | dma_addr_t addr; |
657 | |
658 | addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); |
659 | if (unlikely(dma_mapping_error(tx->dev, addr))) |
660 | goto err; |
661 | |
662 | dma_unmap_len_set(pkt, len[pkt->num_bufs], len); |
663 | netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, |
664 | dma[pkt->num_bufs], addr); |
665 | ++pkt->num_bufs; |
666 | |
667 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, |
668 | compl_tag: completion_tag, eop: is_eop, is_gso); |
669 | } |
670 | |
671 | return 0; |
672 | err: |
673 | for (i = 0; i < pkt->num_bufs; i++) { |
674 | if (i == 0) { |
675 | dma_unmap_single(tx->dev, |
676 | dma_unmap_addr(pkt, dma[i]), |
677 | dma_unmap_len(pkt, len[i]), |
678 | DMA_TO_DEVICE); |
679 | } else { |
680 | dma_unmap_page(tx->dev, |
681 | dma_unmap_addr(pkt, dma[i]), |
682 | dma_unmap_len(pkt, len[i]), |
683 | DMA_TO_DEVICE); |
684 | } |
685 | } |
686 | pkt->num_bufs = 0; |
687 | return -1; |
688 | } |
689 | |
690 | /* Tx buffer i corresponds to |
691 | * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO |
692 | * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO |
693 | */ |
694 | static void gve_tx_buf_get_addr(struct gve_tx_ring *tx, |
695 | s16 index, |
696 | void **va, dma_addr_t *dma_addr) |
697 | { |
698 | int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); |
699 | int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO; |
700 | |
701 | *va = page_address(tx->dqo.qpl->pages[page_id]) + offset; |
702 | *dma_addr = tx->dqo.qpl->page_buses[page_id] + offset; |
703 | } |
704 | |
705 | static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, |
706 | struct sk_buff *skb, |
707 | struct gve_tx_pending_packet_dqo *pkt, |
708 | s16 completion_tag, |
709 | u32 *desc_idx, |
710 | bool is_gso) |
711 | { |
712 | u32 copy_offset = 0; |
713 | dma_addr_t dma_addr; |
714 | u32 copy_len; |
715 | s16 index; |
716 | void *va; |
717 | |
718 | /* Break the packet into buffer size chunks */ |
719 | pkt->num_bufs = 0; |
720 | while (copy_offset < skb->len) { |
721 | index = gve_alloc_tx_qpl_buf(tx); |
722 | if (unlikely(index == -1)) |
723 | goto err; |
724 | |
725 | gve_tx_buf_get_addr(tx, index, va: &va, dma_addr: &dma_addr); |
726 | copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO, |
727 | skb->len - copy_offset); |
728 | skb_copy_bits(skb, offset: copy_offset, to: va, len: copy_len); |
729 | |
730 | copy_offset += copy_len; |
731 | dma_sync_single_for_device(dev: tx->dev, addr: dma_addr, |
732 | size: copy_len, dir: DMA_TO_DEVICE); |
733 | gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, |
734 | len: copy_len, |
735 | addr: dma_addr, |
736 | compl_tag: completion_tag, |
737 | eop: copy_offset == skb->len, |
738 | is_gso); |
739 | |
740 | pkt->tx_qpl_buf_ids[pkt->num_bufs] = index; |
741 | ++tx->dqo_tx.alloc_tx_qpl_buf_cnt; |
742 | ++pkt->num_bufs; |
743 | } |
744 | |
745 | return 0; |
746 | err: |
747 | /* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */ |
748 | gve_free_tx_qpl_bufs(tx, pkt); |
749 | return -ENOMEM; |
750 | } |
751 | |
752 | /* Returns 0 on success, or < 0 on error. |
753 | * |
754 | * Before this function is called, the caller must ensure |
755 | * gve_has_pending_packet(tx) returns true. |
756 | */ |
757 | static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, |
758 | struct sk_buff *skb) |
759 | { |
760 | const bool is_gso = skb_is_gso(skb); |
761 | u32 desc_idx = tx->dqo_tx.tail; |
762 | struct gve_tx_pending_packet_dqo *pkt; |
763 | struct gve_tx_metadata_dqo metadata; |
764 | s16 completion_tag; |
765 | |
766 | pkt = gve_alloc_pending_packet(tx); |
767 | if (!pkt) |
768 | return -ENOMEM; |
769 | |
770 | pkt->skb = skb; |
771 | completion_tag = pkt - tx->dqo.pending_packets; |
772 | |
773 | gve_extract_tx_metadata_dqo(skb, metadata: &metadata); |
774 | if (is_gso) { |
775 | int header_len = gve_prep_tso(skb); |
776 | |
777 | if (unlikely(header_len < 0)) |
778 | goto err; |
779 | |
780 | gve_tx_fill_tso_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].tso_ctx, |
781 | skb, metadata: &metadata, header_len); |
782 | desc_idx = (desc_idx + 1) & tx->mask; |
783 | } |
784 | |
785 | gve_tx_fill_general_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].general_ctx, |
786 | metadata: &metadata); |
787 | desc_idx = (desc_idx + 1) & tx->mask; |
788 | |
789 | if (tx->dqo.qpl) { |
790 | if (gve_tx_add_skb_copy_dqo(tx, skb, pkt, |
791 | completion_tag, |
792 | desc_idx: &desc_idx, is_gso)) |
793 | goto err; |
794 | } else { |
795 | if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt, |
796 | completion_tag, |
797 | desc_idx: &desc_idx, is_gso)) |
798 | goto err; |
799 | } |
800 | |
801 | tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; |
802 | |
803 | /* Commit the changes to our state */ |
804 | tx->dqo_tx.tail = desc_idx; |
805 | |
806 | /* Request a descriptor completion on the last descriptor of the |
807 | * packet if we are allowed to by the HW enforced interval. |
808 | */ |
809 | { |
810 | u32 last_desc_idx = (desc_idx - 1) & tx->mask; |
811 | u32 last_report_event_interval = |
812 | (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; |
813 | |
814 | if (unlikely(last_report_event_interval >= |
815 | GVE_TX_MIN_RE_INTERVAL)) { |
816 | tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; |
817 | tx->dqo_tx.last_re_idx = last_desc_idx; |
818 | } |
819 | } |
820 | |
821 | return 0; |
822 | |
823 | err: |
824 | pkt->skb = NULL; |
825 | gve_free_pending_packet(tx, pending_packet: pkt); |
826 | |
827 | return -1; |
828 | } |
829 | |
830 | static int gve_num_descs_per_buf(size_t size) |
831 | { |
832 | return DIV_ROUND_UP(size, GVE_TX_MAX_BUF_SIZE_DQO); |
833 | } |
834 | |
835 | static int gve_num_buffer_descs_needed(const struct sk_buff *skb) |
836 | { |
837 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
838 | int num_descs; |
839 | int i; |
840 | |
841 | num_descs = gve_num_descs_per_buf(size: skb_headlen(skb)); |
842 | |
843 | for (i = 0; i < shinfo->nr_frags; i++) { |
844 | unsigned int frag_size = skb_frag_size(frag: &shinfo->frags[i]); |
845 | |
846 | num_descs += gve_num_descs_per_buf(size: frag_size); |
847 | } |
848 | |
849 | return num_descs; |
850 | } |
851 | |
852 | /* Returns true if HW is capable of sending TSO represented by `skb`. |
853 | * |
854 | * Each segment must not span more than GVE_TX_MAX_DATA_DESCS buffers. |
855 | * - The header is counted as one buffer for every single segment. |
856 | * - A buffer which is split between two segments is counted for both. |
857 | * - If a buffer contains both header and payload, it is counted as two buffers. |
858 | */ |
859 | static bool gve_can_send_tso(const struct sk_buff *skb) |
860 | { |
861 | const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; |
862 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
863 | const int header_len = skb_tcp_all_headers(skb); |
864 | const int gso_size = shinfo->gso_size; |
865 | int cur_seg_num_bufs; |
866 | int prev_frag_size; |
867 | int cur_seg_size; |
868 | int i; |
869 | |
870 | cur_seg_size = skb_headlen(skb) - header_len; |
871 | prev_frag_size = skb_headlen(skb); |
872 | cur_seg_num_bufs = cur_seg_size > 0; |
873 | |
874 | for (i = 0; i < shinfo->nr_frags; i++) { |
875 | if (cur_seg_size >= gso_size) { |
876 | cur_seg_size %= gso_size; |
877 | cur_seg_num_bufs = cur_seg_size > 0; |
878 | |
879 | if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) { |
880 | int prev_frag_remain = prev_frag_size % |
881 | GVE_TX_MAX_BUF_SIZE_DQO; |
882 | |
883 | /* If the last descriptor of the previous frag |
884 | * is less than cur_seg_size, the segment will |
885 | * span two descriptors in the previous frag. |
886 | * Since max gso size (9728) is less than |
887 | * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible |
888 | * for the segment to span more than two |
889 | * descriptors. |
890 | */ |
891 | if (prev_frag_remain && |
892 | cur_seg_size > prev_frag_remain) |
893 | cur_seg_num_bufs++; |
894 | } |
895 | } |
896 | |
897 | if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg)) |
898 | return false; |
899 | |
900 | prev_frag_size = skb_frag_size(frag: &shinfo->frags[i]); |
901 | cur_seg_size += prev_frag_size; |
902 | } |
903 | |
904 | return true; |
905 | } |
906 | |
907 | netdev_features_t gve_features_check_dqo(struct sk_buff *skb, |
908 | struct net_device *dev, |
909 | netdev_features_t features) |
910 | { |
911 | if (skb_is_gso(skb) && !gve_can_send_tso(skb)) |
912 | return features & ~NETIF_F_GSO_MASK; |
913 | |
914 | return features; |
915 | } |
916 | |
917 | /* Attempt to transmit specified SKB. |
918 | * |
919 | * Returns 0 if the SKB was transmitted or dropped. |
920 | * Returns -1 if there is not currently enough space to transmit the SKB. |
921 | */ |
922 | static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, |
923 | struct sk_buff *skb) |
924 | { |
925 | int num_buffer_descs; |
926 | int total_num_descs; |
927 | |
928 | if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb))) |
929 | goto drop; |
930 | |
931 | if (tx->dqo.qpl) { |
932 | /* We do not need to verify the number of buffers used per |
933 | * packet or per segment in case of TSO as with 2K size buffers |
934 | * none of the TX packet rules would be violated. |
935 | * |
936 | * gve_can_send_tso() checks that each TCP segment of gso_size is |
937 | * not distributed over more than 9 SKB frags.. |
938 | */ |
939 | num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO); |
940 | } else { |
941 | num_buffer_descs = gve_num_buffer_descs_needed(skb); |
942 | if (!skb_is_gso(skb)) { |
943 | if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { |
944 | if (unlikely(skb_linearize(skb) < 0)) |
945 | goto drop; |
946 | |
947 | num_buffer_descs = 1; |
948 | } |
949 | } |
950 | } |
951 | |
952 | /* Metadata + (optional TSO) + data descriptors. */ |
953 | total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; |
954 | if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs + |
955 | GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP, |
956 | num_buffer_descs))) { |
957 | return -1; |
958 | } |
959 | |
960 | if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0)) |
961 | goto drop; |
962 | |
963 | netdev_tx_sent_queue(dev_queue: tx->netdev_txq, bytes: skb->len); |
964 | skb_tx_timestamp(skb); |
965 | return 0; |
966 | |
967 | drop: |
968 | tx->dropped_pkt++; |
969 | dev_kfree_skb_any(skb); |
970 | return 0; |
971 | } |
972 | |
973 | /* Transmit a given skb and ring the doorbell. */ |
974 | netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) |
975 | { |
976 | struct gve_priv *priv = netdev_priv(dev); |
977 | struct gve_tx_ring *tx; |
978 | |
979 | tx = &priv->tx[skb_get_queue_mapping(skb)]; |
980 | if (unlikely(gve_try_tx_skb(priv, tx, skb) < 0)) { |
981 | /* We need to ring the txq doorbell -- we have stopped the Tx |
982 | * queue for want of resources, but prior calls to gve_tx() |
983 | * may have added descriptors without ringing the doorbell. |
984 | */ |
985 | gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail); |
986 | return NETDEV_TX_BUSY; |
987 | } |
988 | |
989 | if (!netif_xmit_stopped(dev_queue: tx->netdev_txq) && netdev_xmit_more()) |
990 | return NETDEV_TX_OK; |
991 | |
992 | gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail); |
993 | return NETDEV_TX_OK; |
994 | } |
995 | |
996 | static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, |
997 | struct gve_tx_pending_packet_dqo *pending_packet) |
998 | { |
999 | s16 old_tail, index; |
1000 | |
1001 | index = pending_packet - tx->dqo.pending_packets; |
1002 | old_tail = list->tail; |
1003 | list->tail = index; |
1004 | if (old_tail == -1) |
1005 | list->head = index; |
1006 | else |
1007 | tx->dqo.pending_packets[old_tail].next = index; |
1008 | |
1009 | pending_packet->next = -1; |
1010 | pending_packet->prev = old_tail; |
1011 | } |
1012 | |
1013 | static void remove_from_list(struct gve_tx_ring *tx, |
1014 | struct gve_index_list *list, |
1015 | struct gve_tx_pending_packet_dqo *pkt) |
1016 | { |
1017 | s16 prev_index, next_index; |
1018 | |
1019 | prev_index = pkt->prev; |
1020 | next_index = pkt->next; |
1021 | |
1022 | if (prev_index == -1) { |
1023 | /* Node is head */ |
1024 | list->head = next_index; |
1025 | } else { |
1026 | tx->dqo.pending_packets[prev_index].next = next_index; |
1027 | } |
1028 | if (next_index == -1) { |
1029 | /* Node is tail */ |
1030 | list->tail = prev_index; |
1031 | } else { |
1032 | tx->dqo.pending_packets[next_index].prev = prev_index; |
1033 | } |
1034 | } |
1035 | |
1036 | static void gve_unmap_packet(struct device *dev, |
1037 | struct gve_tx_pending_packet_dqo *pkt) |
1038 | { |
1039 | int i; |
1040 | |
1041 | /* SKB linear portion is guaranteed to be mapped */ |
1042 | dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), |
1043 | dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); |
1044 | for (i = 1; i < pkt->num_bufs; i++) { |
1045 | netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), |
1046 | dma_unmap_len(pkt, len[i]), |
1047 | dir: DMA_TO_DEVICE, attrs: 0); |
1048 | } |
1049 | pkt->num_bufs = 0; |
1050 | } |
1051 | |
1052 | /* Completion types and expected behavior: |
1053 | * No Miss compl + Packet compl = Packet completed normally. |
1054 | * Miss compl + Re-inject compl = Packet completed normally. |
1055 | * No Miss compl + Re-inject compl = Skipped i.e. packet not completed. |
1056 | * Miss compl + Packet compl = Skipped i.e. packet not completed. |
1057 | */ |
1058 | static void gve_handle_packet_completion(struct gve_priv *priv, |
1059 | struct gve_tx_ring *tx, bool is_napi, |
1060 | u16 compl_tag, u64 *bytes, u64 *pkts, |
1061 | bool is_reinjection) |
1062 | { |
1063 | struct gve_tx_pending_packet_dqo *pending_packet; |
1064 | |
1065 | if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { |
1066 | net_err_ratelimited("%s: Invalid TX completion tag: %d\n", |
1067 | priv->dev->name, (int)compl_tag); |
1068 | return; |
1069 | } |
1070 | |
1071 | pending_packet = &tx->dqo.pending_packets[compl_tag]; |
1072 | |
1073 | if (unlikely(is_reinjection)) { |
1074 | if (unlikely(pending_packet->state == |
1075 | GVE_PACKET_STATE_TIMED_OUT_COMPL)) { |
1076 | net_err_ratelimited("%s: Re-injection completion: %d received after timeout.\n", |
1077 | priv->dev->name, (int)compl_tag); |
1078 | /* Packet was already completed as a result of timeout, |
1079 | * so just remove from list and free pending packet. |
1080 | */ |
1081 | remove_from_list(tx, |
1082 | list: &tx->dqo_compl.timed_out_completions, |
1083 | pkt: pending_packet); |
1084 | gve_free_pending_packet(tx, pending_packet); |
1085 | return; |
1086 | } |
1087 | if (unlikely(pending_packet->state != |
1088 | GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) { |
1089 | /* No outstanding miss completion but packet allocated |
1090 | * implies packet receives a re-injection completion |
1091 | * without a prior miss completion. Return without |
1092 | * completing the packet. |
1093 | */ |
1094 | net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n", |
1095 | priv->dev->name, (int)compl_tag); |
1096 | return; |
1097 | } |
1098 | remove_from_list(tx, list: &tx->dqo_compl.miss_completions, |
1099 | pkt: pending_packet); |
1100 | } else { |
1101 | /* Packet is allocated but not a pending data completion. */ |
1102 | if (unlikely(pending_packet->state != |
1103 | GVE_PACKET_STATE_PENDING_DATA_COMPL)) { |
1104 | net_err_ratelimited("%s: No pending data completion: %d\n", |
1105 | priv->dev->name, (int)compl_tag); |
1106 | return; |
1107 | } |
1108 | } |
1109 | tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; |
1110 | if (tx->dqo.qpl) |
1111 | gve_free_tx_qpl_bufs(tx, pkt: pending_packet); |
1112 | else |
1113 | gve_unmap_packet(dev: tx->dev, pkt: pending_packet); |
1114 | |
1115 | *bytes += pending_packet->skb->len; |
1116 | (*pkts)++; |
1117 | napi_consume_skb(skb: pending_packet->skb, budget: is_napi); |
1118 | pending_packet->skb = NULL; |
1119 | gve_free_pending_packet(tx, pending_packet); |
1120 | } |
1121 | |
1122 | static void gve_handle_miss_completion(struct gve_priv *priv, |
1123 | struct gve_tx_ring *tx, u16 compl_tag, |
1124 | u64 *bytes, u64 *pkts) |
1125 | { |
1126 | struct gve_tx_pending_packet_dqo *pending_packet; |
1127 | |
1128 | if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { |
1129 | net_err_ratelimited("%s: Invalid TX completion tag: %d\n", |
1130 | priv->dev->name, (int)compl_tag); |
1131 | return; |
1132 | } |
1133 | |
1134 | pending_packet = &tx->dqo.pending_packets[compl_tag]; |
1135 | if (unlikely(pending_packet->state != |
1136 | GVE_PACKET_STATE_PENDING_DATA_COMPL)) { |
1137 | net_err_ratelimited("%s: Unexpected packet state: %d for completion tag : %d\n", |
1138 | priv->dev->name, (int)pending_packet->state, |
1139 | (int)compl_tag); |
1140 | return; |
1141 | } |
1142 | |
1143 | pending_packet->state = GVE_PACKET_STATE_PENDING_REINJECT_COMPL; |
1144 | /* jiffies can wraparound but time comparisons can handle overflows. */ |
1145 | pending_packet->timeout_jiffies = |
1146 | jiffies + |
1147 | secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT); |
1148 | add_to_list(tx, list: &tx->dqo_compl.miss_completions, pending_packet); |
1149 | |
1150 | *bytes += pending_packet->skb->len; |
1151 | (*pkts)++; |
1152 | } |
1153 | |
1154 | static void remove_miss_completions(struct gve_priv *priv, |
1155 | struct gve_tx_ring *tx) |
1156 | { |
1157 | struct gve_tx_pending_packet_dqo *pending_packet; |
1158 | s16 next_index; |
1159 | |
1160 | next_index = tx->dqo_compl.miss_completions.head; |
1161 | while (next_index != -1) { |
1162 | pending_packet = &tx->dqo.pending_packets[next_index]; |
1163 | next_index = pending_packet->next; |
1164 | /* Break early because packets should timeout in order. */ |
1165 | if (time_is_after_jiffies(pending_packet->timeout_jiffies)) |
1166 | break; |
1167 | |
1168 | remove_from_list(tx, list: &tx->dqo_compl.miss_completions, |
1169 | pkt: pending_packet); |
1170 | /* Unmap/free TX buffers and free skb but do not unallocate packet i.e. |
1171 | * the completion tag is not freed to ensure that the driver |
1172 | * can take appropriate action if a corresponding valid |
1173 | * completion is received later. |
1174 | */ |
1175 | if (tx->dqo.qpl) |
1176 | gve_free_tx_qpl_bufs(tx, pkt: pending_packet); |
1177 | else |
1178 | gve_unmap_packet(dev: tx->dev, pkt: pending_packet); |
1179 | |
1180 | /* This indicates the packet was dropped. */ |
1181 | dev_kfree_skb_any(skb: pending_packet->skb); |
1182 | pending_packet->skb = NULL; |
1183 | tx->dropped_pkt++; |
1184 | net_err_ratelimited("%s: No reinjection completion was received for: %d.\n", |
1185 | priv->dev->name, |
1186 | (int)(pending_packet - tx->dqo.pending_packets)); |
1187 | |
1188 | pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; |
1189 | pending_packet->timeout_jiffies = |
1190 | jiffies + |
1191 | secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT); |
1192 | /* Maintain pending packet in another list so the packet can be |
1193 | * unallocated at a later time. |
1194 | */ |
1195 | add_to_list(tx, list: &tx->dqo_compl.timed_out_completions, |
1196 | pending_packet); |
1197 | } |
1198 | } |
1199 | |
1200 | static void remove_timed_out_completions(struct gve_priv *priv, |
1201 | struct gve_tx_ring *tx) |
1202 | { |
1203 | struct gve_tx_pending_packet_dqo *pending_packet; |
1204 | s16 next_index; |
1205 | |
1206 | next_index = tx->dqo_compl.timed_out_completions.head; |
1207 | while (next_index != -1) { |
1208 | pending_packet = &tx->dqo.pending_packets[next_index]; |
1209 | next_index = pending_packet->next; |
1210 | /* Break early because packets should timeout in order. */ |
1211 | if (time_is_after_jiffies(pending_packet->timeout_jiffies)) |
1212 | break; |
1213 | |
1214 | remove_from_list(tx, list: &tx->dqo_compl.timed_out_completions, |
1215 | pkt: pending_packet); |
1216 | gve_free_pending_packet(tx, pending_packet); |
1217 | } |
1218 | } |
1219 | |
1220 | int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, |
1221 | struct napi_struct *napi) |
1222 | { |
1223 | u64 reinject_compl_bytes = 0; |
1224 | u64 reinject_compl_pkts = 0; |
1225 | int num_descs_cleaned = 0; |
1226 | u64 miss_compl_bytes = 0; |
1227 | u64 miss_compl_pkts = 0; |
1228 | u64 pkt_compl_bytes = 0; |
1229 | u64 pkt_compl_pkts = 0; |
1230 | |
1231 | /* Limit in order to avoid blocking for too long */ |
1232 | while (!napi || pkt_compl_pkts < napi->weight) { |
1233 | struct gve_tx_compl_desc *compl_desc = |
1234 | &tx->dqo.compl_ring[tx->dqo_compl.head]; |
1235 | u16 type; |
1236 | |
1237 | if (compl_desc->generation == tx->dqo_compl.cur_gen_bit) |
1238 | break; |
1239 | |
1240 | /* Prefetch the next descriptor. */ |
1241 | prefetch(&tx->dqo.compl_ring[(tx->dqo_compl.head + 1) & |
1242 | tx->dqo.complq_mask]); |
1243 | |
1244 | /* Do not read data until we own the descriptor */ |
1245 | dma_rmb(); |
1246 | type = compl_desc->type; |
1247 | |
1248 | if (type == GVE_COMPL_TYPE_DQO_DESC) { |
1249 | /* This is the last descriptor fetched by HW plus one */ |
1250 | u16 tx_head = le16_to_cpu(compl_desc->tx_head); |
1251 | |
1252 | atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: tx_head); |
1253 | } else if (type == GVE_COMPL_TYPE_DQO_PKT) { |
1254 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1255 | if (compl_tag & GVE_ALT_MISS_COMPL_BIT) { |
1256 | compl_tag &= ~GVE_ALT_MISS_COMPL_BIT; |
1257 | gve_handle_miss_completion(priv, tx, compl_tag, |
1258 | bytes: &miss_compl_bytes, |
1259 | pkts: &miss_compl_pkts); |
1260 | } else { |
1261 | gve_handle_packet_completion(priv, tx, is_napi: !!napi, |
1262 | compl_tag, |
1263 | bytes: &pkt_compl_bytes, |
1264 | pkts: &pkt_compl_pkts, |
1265 | is_reinjection: false); |
1266 | } |
1267 | } else if (type == GVE_COMPL_TYPE_DQO_MISS) { |
1268 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1269 | |
1270 | gve_handle_miss_completion(priv, tx, compl_tag, |
1271 | bytes: &miss_compl_bytes, |
1272 | pkts: &miss_compl_pkts); |
1273 | } else if (type == GVE_COMPL_TYPE_DQO_REINJECTION) { |
1274 | u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); |
1275 | |
1276 | gve_handle_packet_completion(priv, tx, is_napi: !!napi, |
1277 | compl_tag, |
1278 | bytes: &reinject_compl_bytes, |
1279 | pkts: &reinject_compl_pkts, |
1280 | is_reinjection: true); |
1281 | } |
1282 | |
1283 | tx->dqo_compl.head = |
1284 | (tx->dqo_compl.head + 1) & tx->dqo.complq_mask; |
1285 | /* Flip the generation bit when we wrap around */ |
1286 | tx->dqo_compl.cur_gen_bit ^= tx->dqo_compl.head == 0; |
1287 | num_descs_cleaned++; |
1288 | } |
1289 | |
1290 | netdev_tx_completed_queue(dev_queue: tx->netdev_txq, |
1291 | pkts: pkt_compl_pkts + miss_compl_pkts, |
1292 | bytes: pkt_compl_bytes + miss_compl_bytes); |
1293 | |
1294 | remove_miss_completions(priv, tx); |
1295 | remove_timed_out_completions(priv, tx); |
1296 | |
1297 | u64_stats_update_begin(syncp: &tx->statss); |
1298 | tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; |
1299 | tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; |
1300 | u64_stats_update_end(syncp: &tx->statss); |
1301 | return num_descs_cleaned; |
1302 | } |
1303 | |
1304 | bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) |
1305 | { |
1306 | struct gve_tx_compl_desc *compl_desc; |
1307 | struct gve_tx_ring *tx = block->tx; |
1308 | struct gve_priv *priv = block->priv; |
1309 | |
1310 | if (do_clean) { |
1311 | int num_descs_cleaned = gve_clean_tx_done_dqo(priv, tx, |
1312 | napi: &block->napi); |
1313 | |
1314 | /* Sync with queue being stopped in `gve_maybe_stop_tx_dqo()` */ |
1315 | mb(); |
1316 | |
1317 | if (netif_tx_queue_stopped(dev_queue: tx->netdev_txq) && |
1318 | num_descs_cleaned > 0) { |
1319 | tx->wake_queue++; |
1320 | netif_tx_wake_queue(dev_queue: tx->netdev_txq); |
1321 | } |
1322 | } |
1323 | |
1324 | /* Return true if we still have work. */ |
1325 | compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; |
1326 | return compl_desc->generation != tx->dqo_compl.cur_gen_bit; |
1327 | } |
1328 |
Definitions
- gve_has_free_tx_qpl_bufs
- gve_alloc_tx_qpl_buf
- gve_free_tx_qpl_bufs
- gve_has_pending_packet
- gve_alloc_pending_packet
- gve_free_pending_packet
- gve_tx_clean_pending_packets
- gve_tx_stop_ring_dqo
- gve_tx_free_ring_dqo
- gve_tx_qpl_buf_init
- gve_tx_start_ring_dqo
- gve_tx_alloc_ring_dqo
- gve_tx_alloc_rings_dqo
- gve_tx_free_rings_dqo
- num_avail_tx_slots
- gve_has_avail_slots_tx_dqo
- gve_maybe_stop_tx_dqo
- gve_extract_tx_metadata_dqo
- gve_tx_fill_pkt_desc_dqo
- gve_prep_tso
- gve_tx_fill_tso_ctx_desc
- gve_tx_fill_general_ctx_desc
- gve_tx_add_skb_no_copy_dqo
- gve_tx_buf_get_addr
- gve_tx_add_skb_copy_dqo
- gve_tx_add_skb_dqo
- gve_num_descs_per_buf
- gve_num_buffer_descs_needed
- gve_can_send_tso
- gve_features_check_dqo
- gve_try_tx_skb
- gve_tx_dqo
- add_to_list
- remove_from_list
- gve_unmap_packet
- gve_handle_packet_completion
- gve_handle_miss_completion
- remove_miss_completions
- remove_timed_out_completions
- gve_clean_tx_done_dqo
Improve your Profiling and Debugging skills
Find out more