gve_tx_dqo.c source code [linux/drivers/net/ethernet/google/gve/gve_tx_dqo.c]

1	// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2	/ Google virtual Ethernet (gve) driver*
3	*
4	* Copyright (C) 2015-2021 Google, Inc.
5	*/
6
7	#include "gve.h"
8	#include "gve_adminq.h"
9	#include "gve_utils.h"
10	#include "gve_dqo.h"
11	#include <net/ip.h>
12	#include <linux/tcp.h>
13	#include <linux/slab.h>
14	#include <linux/skbuff.h>
15
16	/ Returns true if tx_bufs are available. /
17	static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring tx, int* count)
18	{
19	int num_avail;
20
21	if (!tx->dqo.qpl)
22	return true;
23
24	num_avail = tx->dqo.num_tx_qpl_bufs -
25	(tx->dqo_tx.alloc_tx_qpl_buf_cnt -
26	tx->dqo_tx.free_tx_qpl_buf_cnt);
27
28	if (count <= num_avail)
29	return true;
30
31	/ Update cached value from dqo_compl. /
32	tx->dqo_tx.free_tx_qpl_buf_cnt =
33	atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_cnt);
34
35	num_avail = tx->dqo.num_tx_qpl_bufs -
36	(tx->dqo_tx.alloc_tx_qpl_buf_cnt -
37	tx->dqo_tx.free_tx_qpl_buf_cnt);
38
39	return count <= num_avail;
40	}
41
42	static s16
43	gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx)
44	{
45	s16 index;
46
47	index = tx->dqo_tx.free_tx_qpl_buf_head;
48
49	/ No TX buffers available, try to steal the list from the*
50	* completion handler.
51	*/
52	if (unlikely(index == -`1`)) {
53	tx->dqo_tx.free_tx_qpl_buf_head =
54	atomic_xchg(v: &tx->dqo_compl.free_tx_qpl_buf_head, new: -`1`);
55	index = tx->dqo_tx.free_tx_qpl_buf_head;
56
57	if (unlikely(index == -`1`))
58	return index;
59	}
60
61	/ Remove TX buf from free list /
62	tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index];
63
64	return index;
65	}
66
67	static void
68	gve_free_tx_qpl_bufs(struct gve_tx_ring *tx,
69	struct gve_tx_pending_packet_dqo *pkt)
70	{
71	s16 index;
72	int i;
73
74	if (!pkt->num_bufs)
75	return;
76
77	index = pkt->tx_qpl_buf_ids[`0`];
78	/ Create a linked list of buffers to be added to the free list /
79	for (i = `1`; i < pkt->num_bufs; i++) {
80	tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i];
81	index = pkt->tx_qpl_buf_ids[i];
82	}
83
84	while (true) {
85	s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_tx_qpl_buf_head);
86
87	tx->dqo.tx_qpl_buf_next[index] = old_head;
88	if (atomic_cmpxchg(v: &tx->dqo_compl.free_tx_qpl_buf_head,
89	old: old_head,
90	new: pkt->tx_qpl_buf_ids[`0`]) == old_head) {
91	break;
92	}
93	}
94
95	atomic_add(i: pkt->num_bufs, v: &tx->dqo_compl.free_tx_qpl_buf_cnt);
96	pkt->num_bufs = `0`;
97	}
98
99	/ Returns true if a gve_tx_pending_packet_dqo object is available. /
100	static bool gve_has_pending_packet(struct gve_tx_ring *tx)
101	{
102	/ Check TX path's list. /
103	if (tx->dqo_tx.free_pending_packets != -`1`)
104	return true;
105
106	/ Check completion handler's list. /
107	if (atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets) != -`1`)
108	return true;
109
110	return false;
111	}
112
113	static struct gve_tx_pending_packet_dqo *
114	gve_alloc_pending_packet(struct gve_tx_ring *tx)
115	{
116	struct gve_tx_pending_packet_dqo *pending_packet;
117	s16 index;
118
119	index = tx->dqo_tx.free_pending_packets;
120
121	/ No pending_packets available, try to steal the list from the*
122	* completion handler.
123	*/
124	if (unlikely(index == -`1`)) {
125	tx->dqo_tx.free_pending_packets =
126	atomic_xchg(v: &tx->dqo_compl.free_pending_packets, new: -`1`);
127	index = tx->dqo_tx.free_pending_packets;
128
129	if (unlikely(index == -`1`))
130	return NULL;
131	}
132
133	pending_packet = &tx->dqo.pending_packets[index];
134
135	/ Remove pending_packet from free list /
136	tx->dqo_tx.free_pending_packets = pending_packet->next;
137	pending_packet->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
138
139	return pending_packet;
140	}
141
142	static void
143	gve_free_pending_packet(struct gve_tx_ring *tx,
144	struct gve_tx_pending_packet_dqo *pending_packet)
145	{
146	s16 index = pending_packet - tx->dqo.pending_packets;
147
148	pending_packet->state = GVE_PACKET_STATE_UNALLOCATED;
149	while (true) {
150	s16 old_head = atomic_read_acquire(v: &tx->dqo_compl.free_pending_packets);
151
152	pending_packet->next = old_head;
153	if (atomic_cmpxchg(v: &tx->dqo_compl.free_pending_packets,
154	old: old_head, new: index) == old_head) {
155	break;
156	}
157	}
158	}
159
160	/ gve_tx_free_desc - Cleans up all pending tx requests and buffers.*
161	*/
162	static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
163	{
164	int i;
165
166	for (i = `0`; i < tx->dqo.num_pending_packets; i++) {
167	struct gve_tx_pending_packet_dqo *cur_state =
168	&tx->dqo.pending_packets[i];
169	int j;
170
171	for (j = `0`; j < cur_state->num_bufs; j++) {
172	if (j == `0`) {
173	dma_unmap_single(tx->dev,
174	dma_unmap_addr(cur_state, dma[j]),
175	dma_unmap_len(cur_state, len[j]),
176	DMA_TO_DEVICE);
177	} else {
178	dma_unmap_page(tx->dev,
179	dma_unmap_addr(cur_state, dma[j]),
180	dma_unmap_len(cur_state, len[j]),
181	DMA_TO_DEVICE);
182	}
183	}
184	if (cur_state->skb) {
185	dev_consume_skb_any(skb: cur_state->skb);
186	cur_state->skb = NULL;
187	}
188	}
189	}
190
191	void gve_tx_stop_ring_dqo(struct gve_priv priv, int* idx)
192	{
193	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
194	struct gve_tx_ring *tx = &priv->tx[idx];
195
196	if (!gve_tx_was_added_to_block(priv, queue_idx: idx))
197	return;
198
199	gve_remove_napi(priv, ntfy_idx);
200	gve_clean_tx_done_dqo(priv, tx, /napi=/NULL);
201	netdev_tx_reset_queue(q: tx->netdev_txq);
202	gve_tx_clean_pending_packets(tx);
203	gve_tx_remove_from_block(priv, queue_idx: idx);
204	}
205
206	static void gve_tx_free_ring_dqo(struct gve_priv priv, struct* gve_tx_ring *tx,
207	struct gve_tx_alloc_rings_cfg *cfg)
208	{
209	struct device *hdev = &priv->pdev->dev;
210	int idx = tx->q_num;
211	size_t bytes;
212	u32 qpl_id;
213
214	if (tx->q_resources) {
215	dma_free_coherent(dev: hdev, size: sizeof(*tx->q_resources),
216	cpu_addr: tx->q_resources, dma_handle: tx->q_resources_bus);
217	tx->q_resources = NULL;
218	}
219
220	if (tx->dqo.compl_ring) {
221	bytes = sizeof(tx->dqo.compl_ring[`0`]) *
222	(tx->dqo.complq_mask + `1`);
223	dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.compl_ring,
224	dma_handle: tx->complq_bus_dqo);
225	tx->dqo.compl_ring = NULL;
226	}
227
228	if (tx->dqo.tx_ring) {
229	bytes = sizeof(tx->dqo.tx_ring[`0`]) * (tx->mask + `1`);
230	dma_free_coherent(dev: hdev, size: bytes, cpu_addr: tx->dqo.tx_ring, dma_handle: tx->bus);
231	tx->dqo.tx_ring = NULL;
232	}
233
234	kvfree(addr: tx->dqo.pending_packets);
235	tx->dqo.pending_packets = NULL;
236
237	kvfree(addr: tx->dqo.tx_qpl_buf_next);
238	tx->dqo.tx_qpl_buf_next = NULL;
239
240	if (tx->dqo.qpl) {
241	qpl_id = gve_tx_qpl_id(priv, tx_qid: tx->q_num);
242	gve_free_queue_page_list(priv, qpl: tx->dqo.qpl, id: qpl_id);
243	tx->dqo.qpl = NULL;
244	}
245
246	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
247	}
248
249	static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx)
250	{
251	int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO *
252	tx->dqo.qpl->num_entries;
253	int i;
254
255	tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs,
256	sizeof(tx->dqo.tx_qpl_buf_next[`0`]),
257	GFP_KERNEL);
258	if (!tx->dqo.tx_qpl_buf_next)
259	return -ENOMEM;
260
261	tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs;
262
263	/ Generate free TX buf list /
264	for (i = `0`; i < num_tx_qpl_bufs - `1`; i++)
265	tx->dqo.tx_qpl_buf_next[i] = i + `1`;
266	tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - `1`] = -`1`;
267
268	atomic_set_release(v: &tx->dqo_compl.free_tx_qpl_buf_head, i: -`1`);
269	return `0`;
270	}
271
272	void gve_tx_start_ring_dqo(struct gve_priv priv, int* idx)
273	{
274	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
275	struct gve_tx_ring *tx = &priv->tx[idx];
276
277	gve_tx_add_to_block(priv, queue_idx: idx);
278
279	tx->netdev_txq = netdev_get_tx_queue(dev: priv->dev, index: idx);
280	gve_add_napi(priv, ntfy_idx, gve_poll: gve_napi_poll_dqo);
281	}
282
283	static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
284	struct gve_tx_alloc_rings_cfg *cfg,
285	struct gve_tx_ring *tx,
286	int idx)
287	{
288	struct device *hdev = &priv->pdev->dev;
289	int num_pending_packets;
290	int qpl_page_cnt;
291	size_t bytes;
292	u32 qpl_id;
293	int i;
294
295	memset(tx, `0`, sizeof(*tx));
296	tx->q_num = idx;
297	tx->dev = hdev;
298	atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: `0`);
299
300	/ Queue sizes must be a power of 2 /
301	tx->mask = cfg->ring_size - `1`;
302	tx->dqo.complq_mask = tx->mask;
303
304	/ The max number of pending packets determines the maximum number of*
305	* descriptors which maybe written to the completion queue.
306	*
307	* We must set the number small enough to make sure we never overrun the
308	* completion queue.
309	*/
310	num_pending_packets = tx->dqo.complq_mask + `1`;
311
312	/ Reserve space for descriptor completions, which will be reported at*
313	* most every GVE_TX_MIN_RE_INTERVAL packets.
314	*/
315	num_pending_packets -=
316	(tx->dqo.complq_mask + `1`) / GVE_TX_MIN_RE_INTERVAL;
317
318	/ Each packet may have at most 2 buffer completions if it receives both*
319	* a miss and reinjection completion.
320	*/
321	num_pending_packets /= `2`;
322
323	tx->dqo.num_pending_packets = min_t(int, num_pending_packets, S16_MAX);
324	tx->dqo.pending_packets = kvcalloc(tx->dqo.num_pending_packets,
325	sizeof(tx->dqo.pending_packets[`0`]),
326	GFP_KERNEL);
327	if (!tx->dqo.pending_packets)
328	goto err;
329
330	/ Set up linked list of pending packets /
331	for (i = `0`; i < tx->dqo.num_pending_packets - `1`; i++)
332	tx->dqo.pending_packets[i].next = i + `1`;
333
334	tx->dqo.pending_packets[tx->dqo.num_pending_packets - `1`].next = -`1`;
335	atomic_set_release(v: &tx->dqo_compl.free_pending_packets, i: -`1`);
336	tx->dqo_compl.miss_completions.head = -`1`;
337	tx->dqo_compl.miss_completions.tail = -`1`;
338	tx->dqo_compl.timed_out_completions.head = -`1`;
339	tx->dqo_compl.timed_out_completions.tail = -`1`;
340
341	bytes = sizeof(tx->dqo.tx_ring[`0`]) * (tx->mask + `1`);
342	tx->dqo.tx_ring = dma_alloc_coherent(dev: hdev, size: bytes, dma_handle: &tx->bus, GFP_KERNEL);
343	if (!tx->dqo.tx_ring)
344	goto err;
345
346	bytes = sizeof(tx->dqo.compl_ring[`0`]) * (tx->dqo.complq_mask + `1`);
347	tx->dqo.compl_ring = dma_alloc_coherent(dev: hdev, size: bytes,
348	dma_handle: &tx->complq_bus_dqo,
349	GFP_KERNEL);
350	if (!tx->dqo.compl_ring)
351	goto err;
352
353	tx->q_resources = dma_alloc_coherent(dev: hdev, size: sizeof(*tx->q_resources),
354	dma_handle: &tx->q_resources_bus, GFP_KERNEL);
355	if (!tx->q_resources)
356	goto err;
357
358	if (!cfg->raw_addressing) {
359	qpl_id = gve_tx_qpl_id(priv, tx_qid: tx->q_num);
360	qpl_page_cnt = priv->tx_pages_per_qpl;
361
362	tx->dqo.qpl = gve_alloc_queue_page_list(priv, id: qpl_id,
363	pages: qpl_page_cnt);
364	if (!tx->dqo.qpl)
365	goto err;
366
367	if (gve_tx_qpl_buf_init(tx))
368	goto err;
369	}
370
371	return `0`;
372
373	err:
374	gve_tx_free_ring_dqo(priv, tx, cfg);
375	return -ENOMEM;
376	}
377
378	int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
379	struct gve_tx_alloc_rings_cfg *cfg)
380	{
381	struct gve_tx_ring *tx = cfg->tx;
382	int total_queues;
383	int err = `0`;
384	int i, j;
385
386	total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
387	if (total_queues > cfg->qcfg->max_queues) {
388	netif_err(priv, drv, priv->dev,
389	"Cannot alloc more than the max num of Tx rings\n");
390	return -EINVAL;
391	}
392
393	tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
394	GFP_KERNEL);
395	if (!tx)
396	return -ENOMEM;
397
398	for (i = `0`; i < total_queues; i++) {
399	err = gve_tx_alloc_ring_dqo(priv, cfg, tx: &tx[i], idx: i);
400	if (err) {
401	netif_err(priv, drv, priv->dev,
402	"Failed to alloc tx ring=%d: err=%d\n",
403	i, err);
404	goto err;
405	}
406	}
407
408	cfg->tx = tx;
409	return `0`;
410
411	err:
412	for (j = `0`; j < i; j++)
413	gve_tx_free_ring_dqo(priv, tx: &tx[j], cfg);
414	kvfree(addr: tx);
415	return err;
416	}
417
418	void gve_tx_free_rings_dqo(struct gve_priv *priv,
419	struct gve_tx_alloc_rings_cfg *cfg)
420	{
421	struct gve_tx_ring *tx = cfg->tx;
422	int i;
423
424	if (!tx)
425	return;
426
427	for (i = `0`; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
428	gve_tx_free_ring_dqo(priv, tx: &tx[i], cfg);
429
430	kvfree(addr: tx);
431	cfg->tx = NULL;
432	}
433
434	/ Returns the number of slots available in the ring /
435	static u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
436	{
437	u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask;
438
439	return tx->mask - num_used;
440	}
441
442	static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx,
443	int desc_count, int buf_count)
444	{
445	return gve_has_pending_packet(tx) &&
446	num_avail_tx_slots(tx) >= desc_count &&
447	gve_has_free_tx_qpl_bufs(tx, count: buf_count);
448	}
449
450	/ Stops the queue if available descriptors is less than 'count'.*
451	* Return: 0 if stop is not required.
452	*/
453	static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx,
454	int desc_count, int buf_count)
455	{
456	if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
457	return `0`;
458
459	/ Update cached TX head pointer /
460	tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head);
461
462	if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
463	return `0`;
464
465	/ No space, so stop the queue /
466	tx->stop_queue++;
467	netif_tx_stop_queue(dev_queue: tx->netdev_txq);
468
469	/ Sync with restarting queue in `gve_tx_poll_dqo()` /
470	mb();
471
472	/ After stopping queue, check if we can transmit again in order to*
473	* avoid TOCTOU bug.
474	*/
475	tx->dqo_tx.head = atomic_read_acquire(v: &tx->dqo_compl.hw_tx_head);
476
477	if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
478	return -EBUSY;
479
480	netif_tx_start_queue(dev_queue: tx->netdev_txq);
481	tx->wake_queue++;
482	return `0`;
483	}
484
485	static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb,
486	struct gve_tx_metadata_dqo *metadata)
487	{
488	memset(metadata, `0`, sizeof(*metadata));
489	metadata->version = GVE_TX_METADATA_VERSION_DQO;
490
491	if (skb->l4_hash) {
492	u16 path_hash = skb->hash ^ (skb->hash >> `16`);
493
494	path_hash &= (`1` << `15`) - `1`;
495	if (unlikely(path_hash == `0`))
496	path_hash = ~path_hash;
497
498	metadata->path_hash = path_hash;
499	}
500	}
501
502	static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring tx, u32 desc_idx,
503	struct sk_buff *skb, u32 len, u64 addr,
504	s16 compl_tag, bool eop, bool is_gso)
505	{
506	const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL;
507
508	while (len > `0`) {
509	struct gve_tx_pkt_desc_dqo *desc =
510	&tx->dqo.tx_ring[*desc_idx].pkt;
511	u32 cur_len = min_t(u32, len, GVE_TX_MAX_BUF_SIZE_DQO);
512	bool cur_eop = eop && cur_len == len;
513
514	desc = (struct* gve_tx_pkt_desc_dqo){
515	.buf_addr = cpu_to_le64(addr),
516	.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
517	.end_of_packet = cur_eop,
518	.checksum_offload_enable = checksum_offload_en,
519	.compl_tag = cpu_to_le16(compl_tag),
520	.buf_size = cur_len,
521	};
522
523	addr += cur_len;
524	len -= cur_len;
525	desc_idx = (desc_idx + `1`) & tx->mask;
526	}
527	}
528
529	/ Validates and prepares `skb` for TSO.*
530	*
531	* Returns header length, or < 0 if invalid.
532	*/
533	static int gve_prep_tso(struct sk_buff *skb)
534	{
535	struct tcphdr *tcp;
536	int header_len;
537	u32 paylen;
538	int err;
539
540	/ Note: HW requires MSS (gso_size) to be <= 9728 and the total length*
541	* of the TSO to be <= 262143.
542	*
543	* However, we don't validate these because:
544	* - Hypervisor enforces a limit of 9K MTU
545	* - Kernel will not produce a TSO larger than 64k
546	*/
547
548	if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO))
549	return -`1`;
550
551	if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6)))
552	return -EINVAL;
553
554	/ Needed because we will modify header. /
555	err = skb_cow_head(skb, headroom: `0`);
556	if (err < `0`)
557	return err;
558
559	tcp = tcp_hdr(skb);
560	paylen = skb->len - skb_transport_offset(skb);
561	csum_replace_by_diff(sum: &tcp->check, diff: (__force __wsum)htonl(paylen));
562	header_len = skb_tcp_all_headers(skb);
563
564	if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO))
565	return -EINVAL;
566
567	return header_len;
568	}
569
570	static void gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
571	const struct sk_buff *skb,
572	const struct gve_tx_metadata_dqo *metadata,
573	int header_len)
574	{
575	desc = (struct* gve_tx_tso_context_desc_dqo){
576	.header_len = header_len,
577	.cmd_dtype = {
578	.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
579	.tso = `1`,
580	},
581	.flex0 = metadata->bytes[`0`],
582	.flex5 = metadata->bytes[`5`],
583	.flex6 = metadata->bytes[`6`],
584	.flex7 = metadata->bytes[`7`],
585	.flex8 = metadata->bytes[`8`],
586	.flex9 = metadata->bytes[`9`],
587	.flex10 = metadata->bytes[`10`],
588	.flex11 = metadata->bytes[`11`],
589	};
590	desc->tso_total_len = skb->len - header_len;
591	desc->mss = skb_shinfo(skb)->gso_size;
592	}
593
594	static void
595	gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
596	const struct gve_tx_metadata_dqo *metadata)
597	{
598	desc = (struct* gve_tx_general_context_desc_dqo){
599	.flex0 = metadata->bytes[`0`],
600	.flex1 = metadata->bytes[`1`],
601	.flex2 = metadata->bytes[`2`],
602	.flex3 = metadata->bytes[`3`],
603	.flex4 = metadata->bytes[`4`],
604	.flex5 = metadata->bytes[`5`],
605	.flex6 = metadata->bytes[`6`],
606	.flex7 = metadata->bytes[`7`],
607	.flex8 = metadata->bytes[`8`],
608	.flex9 = metadata->bytes[`9`],
609	.flex10 = metadata->bytes[`10`],
610	.flex11 = metadata->bytes[`11`],
611	.cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
612	};
613	}
614
615	static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
616	struct sk_buff *skb,
617	struct gve_tx_pending_packet_dqo *pkt,
618	s16 completion_tag,
619	u32 *desc_idx,
620	bool is_gso)
621	{
622	const struct skb_shared_info *shinfo = skb_shinfo(skb);
623	int i;
624
625	/ Note: HW requires that the size of a non-TSO packet be within the*
626	* range of [17, 9728].
627	*
628	* We don't double check because
629	* - We limited `netdev->min_mtu` to ETH_MIN_MTU.
630	* - Hypervisor won't allow MTU larger than 9216.
631	*/
632
633	pkt->num_bufs = `0`;
634	/ Map the linear portion of skb /
635	{
636	u32 len = skb_headlen(skb);
637	dma_addr_t addr;
638
639	addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
640	if (unlikely(dma_mapping_error(tx->dev, addr)))
641	goto err;
642
643	dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
644	dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
645	++pkt->num_bufs;
646
647	gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
648	compl_tag: completion_tag,
649	/eop=/shinfo->nr_frags == `0`, is_gso);
650	}
651
652	for (i = `0`; i < shinfo->nr_frags; i++) {
653	const skb_frag_t *frag = &shinfo->frags[i];
654	bool is_eop = i == (shinfo->nr_frags - `1`);
655	u32 len = skb_frag_size(frag);
656	dma_addr_t addr;
657
658	addr = skb_frag_dma_map(tx->dev, frag, `0`, len, DMA_TO_DEVICE);
659	if (unlikely(dma_mapping_error(tx->dev, addr)))
660	goto err;
661
662	dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
663	netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt,
664	dma[pkt->num_bufs], addr);
665	++pkt->num_bufs;
666
667	gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
668	compl_tag: completion_tag, eop: is_eop, is_gso);
669	}
670
671	return `0`;
672	err:
673	for (i = `0`; i < pkt->num_bufs; i++) {
674	if (i == `0`) {
675	dma_unmap_single(tx->dev,
676	dma_unmap_addr(pkt, dma[i]),
677	dma_unmap_len(pkt, len[i]),
678	DMA_TO_DEVICE);
679	} else {
680	dma_unmap_page(tx->dev,
681	dma_unmap_addr(pkt, dma[i]),
682	dma_unmap_len(pkt, len[i]),
683	DMA_TO_DEVICE);
684	}
685	}
686	pkt->num_bufs = `0`;
687	return -`1`;
688	}
689
690	/ Tx buffer i corresponds to*
691	* qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
692	* qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
693	*/
694	static void gve_tx_buf_get_addr(struct gve_tx_ring *tx,
695	s16 index,
696	void *va, dma_addr_t dma_addr)
697	{
698	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
699	int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - `1`)) << GVE_TX_BUF_SHIFT_DQO;
700
701	*va = page_address(tx->dqo.qpl->pages[page_id]) + offset;
702	*dma_addr = tx->dqo.qpl->page_buses[page_id] + offset;
703	}
704
705	static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx,
706	struct sk_buff *skb,
707	struct gve_tx_pending_packet_dqo *pkt,
708	s16 completion_tag,
709	u32 *desc_idx,
710	bool is_gso)
711	{
712	u32 copy_offset = `0`;
713	dma_addr_t dma_addr;
714	u32 copy_len;
715	s16 index;
716	void *va;
717
718	/ Break the packet into buffer size chunks /
719	pkt->num_bufs = `0`;
720	while (copy_offset < skb->len) {
721	index = gve_alloc_tx_qpl_buf(tx);
722	if (unlikely(index == -`1`))
723	goto err;
724
725	gve_tx_buf_get_addr(tx, index, va: &va, dma_addr: &dma_addr);
726	copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO,
727	skb->len - copy_offset);
728	skb_copy_bits(skb, offset: copy_offset, to: va, len: copy_len);
729
730	copy_offset += copy_len;
731	dma_sync_single_for_device(dev: tx->dev, addr: dma_addr,
732	size: copy_len, dir: DMA_TO_DEVICE);
733	gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb,
734	len: copy_len,
735	addr: dma_addr,
736	compl_tag: completion_tag,
737	eop: copy_offset == skb->len,
738	is_gso);
739
740	pkt->tx_qpl_buf_ids[pkt->num_bufs] = index;
741	++tx->dqo_tx.alloc_tx_qpl_buf_cnt;
742	++pkt->num_bufs;
743	}
744
745	return `0`;
746	err:
747	/ Should not be here if gve_has_free_tx_qpl_bufs() check is correct /
748	gve_free_tx_qpl_bufs(tx, pkt);
749	return -ENOMEM;
750	}
751
752	/ Returns 0 on success, or < 0 on error.*
753	*
754	* Before this function is called, the caller must ensure
755	* gve_has_pending_packet(tx) returns true.
756	*/
757	static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
758	struct sk_buff *skb)
759	{
760	const bool is_gso = skb_is_gso(skb);
761	u32 desc_idx = tx->dqo_tx.tail;
762	struct gve_tx_pending_packet_dqo *pkt;
763	struct gve_tx_metadata_dqo metadata;
764	s16 completion_tag;
765
766	pkt = gve_alloc_pending_packet(tx);
767	if (!pkt)
768	return -ENOMEM;
769
770	pkt->skb = skb;
771	completion_tag = pkt - tx->dqo.pending_packets;
772
773	gve_extract_tx_metadata_dqo(skb, metadata: &metadata);
774	if (is_gso) {
775	int header_len = gve_prep_tso(skb);
776
777	if (unlikely(header_len < `0`))
778	goto err;
779
780	gve_tx_fill_tso_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].tso_ctx,
781	skb, metadata: &metadata, header_len);
782	desc_idx = (desc_idx + `1`) & tx->mask;
783	}
784
785	gve_tx_fill_general_ctx_desc(desc: &tx->dqo.tx_ring[desc_idx].general_ctx,
786	metadata: &metadata);
787	desc_idx = (desc_idx + `1`) & tx->mask;
788
789	if (tx->dqo.qpl) {
790	if (gve_tx_add_skb_copy_dqo(tx, skb, pkt,
791	completion_tag,
792	desc_idx: &desc_idx, is_gso))
793	goto err;
794	} else {
795	if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt,
796	completion_tag,
797	desc_idx: &desc_idx, is_gso))
798	goto err;
799	}
800
801	tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
802
803	/ Commit the changes to our state /
804	tx->dqo_tx.tail = desc_idx;
805
806	/ Request a descriptor completion on the last descriptor of the*
807	* packet if we are allowed to by the HW enforced interval.
808	*/
809	{
810	u32 last_desc_idx = (desc_idx - `1`) & tx->mask;
811	u32 last_report_event_interval =
812	(last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask;
813
814	if (unlikely(last_report_event_interval >=
815	GVE_TX_MIN_RE_INTERVAL)) {
816	tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true;
817	tx->dqo_tx.last_re_idx = last_desc_idx;
818	}
819	}
820
821	return `0`;
822
823	err:
824	pkt->skb = NULL;
825	gve_free_pending_packet(tx, pending_packet: pkt);
826
827	return -`1`;
828	}
829
830	static int gve_num_descs_per_buf(size_t size)
831	{
832	return DIV_ROUND_UP(size, GVE_TX_MAX_BUF_SIZE_DQO);
833	}
834
835	static int gve_num_buffer_descs_needed(const struct sk_buff *skb)
836	{
837	const struct skb_shared_info *shinfo = skb_shinfo(skb);
838	int num_descs;
839	int i;
840
841	num_descs = gve_num_descs_per_buf(size: skb_headlen(skb));
842
843	for (i = `0`; i < shinfo->nr_frags; i++) {
844	unsigned int frag_size = skb_frag_size(frag: &shinfo->frags[i]);
845
846	num_descs += gve_num_descs_per_buf(size: frag_size);
847	}
848
849	return num_descs;
850	}
851
852	/ Returns true if HW is capable of sending TSO represented by `skb`.*
853	*
854	* Each segment must not span more than GVE_TX_MAX_DATA_DESCS buffers.
855	* - The header is counted as one buffer for every single segment.
856	* - A buffer which is split between two segments is counted for both.
857	* - If a buffer contains both header and payload, it is counted as two buffers.
858	*/
859	static bool gve_can_send_tso(const struct sk_buff *skb)
860	{
861	const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - `1`;
862	const struct skb_shared_info *shinfo = skb_shinfo(skb);
863	const int header_len = skb_tcp_all_headers(skb);
864	const int gso_size = shinfo->gso_size;
865	int cur_seg_num_bufs;
866	int prev_frag_size;
867	int cur_seg_size;
868	int i;
869
870	cur_seg_size = skb_headlen(skb) - header_len;
871	prev_frag_size = skb_headlen(skb);
872	cur_seg_num_bufs = cur_seg_size > `0`;
873
874	for (i = `0`; i < shinfo->nr_frags; i++) {
875	if (cur_seg_size >= gso_size) {
876	cur_seg_size %= gso_size;
877	cur_seg_num_bufs = cur_seg_size > `0`;
878
879	if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) {
880	int prev_frag_remain = prev_frag_size %
881	GVE_TX_MAX_BUF_SIZE_DQO;
882
883	/ If the last descriptor of the previous frag*
884	* is less than cur_seg_size, the segment will
885	* span two descriptors in the previous frag.
886	* Since max gso size (9728) is less than
887	* GVE_TX_MAX_BUF_SIZE_DQO, it is impossible
888	* for the segment to span more than two
889	* descriptors.
890	*/
891	if (prev_frag_remain &&
892	cur_seg_size > prev_frag_remain)
893	cur_seg_num_bufs++;
894	}
895	}
896
897	if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg))
898	return false;
899
900	prev_frag_size = skb_frag_size(frag: &shinfo->frags[i]);
901	cur_seg_size += prev_frag_size;
902	}
903
904	return true;
905	}
906
907	netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
908	struct net_device *dev,
909	netdev_features_t features)
910	{
911	if (skb_is_gso(skb) && !gve_can_send_tso(skb))
912	return features & ~NETIF_F_GSO_MASK;
913
914	return features;
915	}
916
917	/ Attempt to transmit specified SKB.*
918	*
919	* Returns 0 if the SKB was transmitted or dropped.
920	* Returns -1 if there is not currently enough space to transmit the SKB.
921	*/
922	static int gve_try_tx_skb(struct gve_priv priv, struct* gve_tx_ring *tx,
923	struct sk_buff *skb)
924	{
925	int num_buffer_descs;
926	int total_num_descs;
927
928	if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb)))
929	goto drop;
930
931	if (tx->dqo.qpl) {
932	/ We do not need to verify the number of buffers used per*
933	* packet or per segment in case of TSO as with 2K size buffers
934	* none of the TX packet rules would be violated.
935	*
936	* gve_can_send_tso() checks that each TCP segment of gso_size is
937	* not distributed over more than 9 SKB frags..
938	*/
939	num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO);
940	} else {
941	num_buffer_descs = gve_num_buffer_descs_needed(skb);
942	if (!skb_is_gso(skb)) {
943	if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
944	if (unlikely(skb_linearize(skb) < `0`))
945	goto drop;
946
947	num_buffer_descs = `1`;
948	}
949	}
950	}
951
952	/ Metadata + (optional TSO) + data descriptors. /
953	total_num_descs = `1` + skb_is_gso(skb) + num_buffer_descs;
954	if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs +
955	GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP,
956	num_buffer_descs))) {
957	return -`1`;
958	}
959
960	if (unlikely(gve_tx_add_skb_dqo(tx, skb) < `0`))
961	goto drop;
962
963	netdev_tx_sent_queue(dev_queue: tx->netdev_txq, bytes: skb->len);
964	skb_tx_timestamp(skb);
965	return `0`;
966
967	drop:
968	tx->dropped_pkt++;
969	dev_kfree_skb_any(skb);
970	return `0`;
971	}
972
973	/ Transmit a given skb and ring the doorbell. /
974	netdev_tx_t gve_tx_dqo(struct sk_buff skb, struct* net_device *dev)
975	{
976	struct gve_priv *priv = netdev_priv(dev);
977	struct gve_tx_ring *tx;
978
979	tx = &priv->tx[skb_get_queue_mapping(skb)];
980	if (unlikely(gve_try_tx_skb(priv, tx, skb) < `0`)) {
981	/ We need to ring the txq doorbell -- we have stopped the Tx*
982	* queue for want of resources, but prior calls to gve_tx()
983	* may have added descriptors without ringing the doorbell.
984	*/
985	gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail);
986	return NETDEV_TX_BUSY;
987	}
988
989	if (!netif_xmit_stopped(dev_queue: tx->netdev_txq) && netdev_xmit_more())
990	return NETDEV_TX_OK;
991
992	gve_tx_put_doorbell_dqo(priv, q_resources: tx->q_resources, val: tx->dqo_tx.tail);
993	return NETDEV_TX_OK;
994	}
995
996	static void add_to_list(struct gve_tx_ring tx, struct* gve_index_list *list,
997	struct gve_tx_pending_packet_dqo *pending_packet)
998	{
999	s16 old_tail, index;
1000
1001	index = pending_packet - tx->dqo.pending_packets;
1002	old_tail = list->tail;
1003	list->tail = index;
1004	if (old_tail == -`1`)
1005	list->head = index;
1006	else
1007	tx->dqo.pending_packets[old_tail].next = index;
1008
1009	pending_packet->next = -`1`;
1010	pending_packet->prev = old_tail;
1011	}
1012
1013	static void remove_from_list(struct gve_tx_ring *tx,
1014	struct gve_index_list *list,
1015	struct gve_tx_pending_packet_dqo *pkt)
1016	{
1017	s16 prev_index, next_index;
1018
1019	prev_index = pkt->prev;
1020	next_index = pkt->next;
1021
1022	if (prev_index == -`1`) {
1023	/ Node is head /
1024	list->head = next_index;
1025	} else {
1026	tx->dqo.pending_packets[prev_index].next = next_index;
1027	}
1028	if (next_index == -`1`) {
1029	/ Node is tail /
1030	list->tail = prev_index;
1031	} else {
1032	tx->dqo.pending_packets[next_index].prev = prev_index;
1033	}
1034	}
1035
1036	static void gve_unmap_packet(struct device *dev,
1037	struct gve_tx_pending_packet_dqo *pkt)
1038	{
1039	int i;
1040
1041	/ SKB linear portion is guaranteed to be mapped /
1042	dma_unmap_single(dev, dma_unmap_addr(pkt, dma[`0`]),
1043	dma_unmap_len(pkt, len[`0`]), DMA_TO_DEVICE);
1044	for (i = `1`; i < pkt->num_bufs; i++) {
1045	netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]),
1046	dma_unmap_len(pkt, len[i]),
1047	dir: DMA_TO_DEVICE, attrs: `0`);
1048	}
1049	pkt->num_bufs = `0`;
1050	}
1051
1052	/ Completion types and expected behavior:*
1053	* No Miss compl + Packet compl = Packet completed normally.
1054	* Miss compl + Re-inject compl = Packet completed normally.
1055	* No Miss compl + Re-inject compl = Skipped i.e. packet not completed.
1056	* Miss compl + Packet compl = Skipped i.e. packet not completed.
1057	*/
1058	static void gve_handle_packet_completion(struct gve_priv *priv,
1059	struct gve_tx_ring *tx, bool is_napi,
1060	u16 compl_tag, u64 bytes, u64 pkts,
1061	bool is_reinjection)
1062	{
1063	struct gve_tx_pending_packet_dqo *pending_packet;
1064
1065	if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) {
1066	net_err_ratelimited("%s: Invalid TX completion tag: %d\n",
1067	priv->dev->name, (int)compl_tag);
1068	return;
1069	}
1070
1071	pending_packet = &tx->dqo.pending_packets[compl_tag];
1072
1073	if (unlikely(is_reinjection)) {
1074	if (unlikely(pending_packet->state ==
1075	GVE_PACKET_STATE_TIMED_OUT_COMPL)) {
1076	net_err_ratelimited("%s: Re-injection completion: %d received after timeout.\n",
1077	priv->dev->name, (int)compl_tag);
1078	/ Packet was already completed as a result of timeout,*
1079	* so just remove from list and free pending packet.
1080	*/
1081	remove_from_list(tx,
1082	list: &tx->dqo_compl.timed_out_completions,
1083	pkt: pending_packet);
1084	gve_free_pending_packet(tx, pending_packet);
1085	return;
1086	}
1087	if (unlikely(pending_packet->state !=
1088	GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) {
1089	/ No outstanding miss completion but packet allocated*
1090	* implies packet receives a re-injection completion
1091	* without a prior miss completion. Return without
1092	* completing the packet.
1093	*/
1094	net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n",
1095	priv->dev->name, (int)compl_tag);
1096	return;
1097	}
1098	remove_from_list(tx, list: &tx->dqo_compl.miss_completions,
1099	pkt: pending_packet);
1100	} else {
1101	/ Packet is allocated but not a pending data completion. /
1102	if (unlikely(pending_packet->state !=
1103	GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
1104	net_err_ratelimited("%s: No pending data completion: %d\n",
1105	priv->dev->name, (int)compl_tag);
1106	return;
1107	}
1108	}
1109	tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs;
1110	if (tx->dqo.qpl)
1111	gve_free_tx_qpl_bufs(tx, pkt: pending_packet);
1112	else
1113	gve_unmap_packet(dev: tx->dev, pkt: pending_packet);
1114
1115	*bytes += pending_packet->skb->len;
1116	(*pkts)++;
1117	napi_consume_skb(skb: pending_packet->skb, budget: is_napi);
1118	pending_packet->skb = NULL;
1119	gve_free_pending_packet(tx, pending_packet);
1120	}
1121
1122	static void gve_handle_miss_completion(struct gve_priv *priv,
1123	struct gve_tx_ring *tx, u16 compl_tag,
1124	u64 bytes, u64 pkts)
1125	{
1126	struct gve_tx_pending_packet_dqo *pending_packet;
1127
1128	if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) {
1129	net_err_ratelimited("%s: Invalid TX completion tag: %d\n",
1130	priv->dev->name, (int)compl_tag);
1131	return;
1132	}
1133
1134	pending_packet = &tx->dqo.pending_packets[compl_tag];
1135	if (unlikely(pending_packet->state !=
1136	GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
1137	net_err_ratelimited("%s: Unexpected packet state: %d for completion tag : %d\n",
1138	priv->dev->name, (int)pending_packet->state,
1139	(int)compl_tag);
1140	return;
1141	}
1142
1143	pending_packet->state = GVE_PACKET_STATE_PENDING_REINJECT_COMPL;
1144	/ jiffies can wraparound but time comparisons can handle overflows. /
1145	pending_packet->timeout_jiffies =
1146	jiffies +
1147	secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT);
1148	add_to_list(tx, list: &tx->dqo_compl.miss_completions, pending_packet);
1149
1150	*bytes += pending_packet->skb->len;
1151	(*pkts)++;
1152	}
1153
1154	static void remove_miss_completions(struct gve_priv *priv,
1155	struct gve_tx_ring *tx)
1156	{
1157	struct gve_tx_pending_packet_dqo *pending_packet;
1158	s16 next_index;
1159
1160	next_index = tx->dqo_compl.miss_completions.head;
1161	while (next_index != -`1`) {
1162	pending_packet = &tx->dqo.pending_packets[next_index];
1163	next_index = pending_packet->next;
1164	/ Break early because packets should timeout in order. /
1165	if (time_is_after_jiffies(pending_packet->timeout_jiffies))
1166	break;
1167
1168	remove_from_list(tx, list: &tx->dqo_compl.miss_completions,
1169	pkt: pending_packet);
1170	/ Unmap/free TX buffers and free skb but do not unallocate packet i.e.*
1171	* the completion tag is not freed to ensure that the driver
1172	* can take appropriate action if a corresponding valid
1173	* completion is received later.
1174	*/
1175	if (tx->dqo.qpl)
1176	gve_free_tx_qpl_bufs(tx, pkt: pending_packet);
1177	else
1178	gve_unmap_packet(dev: tx->dev, pkt: pending_packet);
1179
1180	/ This indicates the packet was dropped. /
1181	dev_kfree_skb_any(skb: pending_packet->skb);
1182	pending_packet->skb = NULL;
1183	tx->dropped_pkt++;
1184	net_err_ratelimited("%s: No reinjection completion was received for: %d.\n",
1185	priv->dev->name,
1186	(int)(pending_packet - tx->dqo.pending_packets));
1187
1188	pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL;
1189	pending_packet->timeout_jiffies =
1190	jiffies +
1191	secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT);
1192	/ Maintain pending packet in another list so the packet can be*
1193	* unallocated at a later time.
1194	*/
1195	add_to_list(tx, list: &tx->dqo_compl.timed_out_completions,
1196	pending_packet);
1197	}
1198	}
1199
1200	static void remove_timed_out_completions(struct gve_priv *priv,
1201	struct gve_tx_ring *tx)
1202	{
1203	struct gve_tx_pending_packet_dqo *pending_packet;
1204	s16 next_index;
1205
1206	next_index = tx->dqo_compl.timed_out_completions.head;
1207	while (next_index != -`1`) {
1208	pending_packet = &tx->dqo.pending_packets[next_index];
1209	next_index = pending_packet->next;
1210	/ Break early because packets should timeout in order. /
1211	if (time_is_after_jiffies(pending_packet->timeout_jiffies))
1212	break;
1213
1214	remove_from_list(tx, list: &tx->dqo_compl.timed_out_completions,
1215	pkt: pending_packet);
1216	gve_free_pending_packet(tx, pending_packet);
1217	}
1218	}
1219
1220	int gve_clean_tx_done_dqo(struct gve_priv priv, struct* gve_tx_ring *tx,
1221	struct napi_struct *napi)
1222	{
1223	u64 reinject_compl_bytes = `0`;
1224	u64 reinject_compl_pkts = `0`;
1225	int num_descs_cleaned = `0`;
1226	u64 miss_compl_bytes = `0`;
1227	u64 miss_compl_pkts = `0`;
1228	u64 pkt_compl_bytes = `0`;
1229	u64 pkt_compl_pkts = `0`;
1230
1231	/ Limit in order to avoid blocking for too long /
1232	while (!napi \|\| pkt_compl_pkts < napi->weight) {
1233	struct gve_tx_compl_desc *compl_desc =
1234	&tx->dqo.compl_ring[tx->dqo_compl.head];
1235	u16 type;
1236
1237	if (compl_desc->generation == tx->dqo_compl.cur_gen_bit)
1238	break;
1239
1240	/ Prefetch the next descriptor. /
1241	prefetch(&tx->dqo.compl_ring[(tx->dqo_compl.head + `1`) &
1242	tx->dqo.complq_mask]);
1243
1244	/ Do not read data until we own the descriptor /
1245	dma_rmb();
1246	type = compl_desc->type;
1247
1248	if (type == GVE_COMPL_TYPE_DQO_DESC) {
1249	/ This is the last descriptor fetched by HW plus one /
1250	u16 tx_head = le16_to_cpu(compl_desc->tx_head);
1251
1252	atomic_set_release(v: &tx->dqo_compl.hw_tx_head, i: tx_head);
1253	} else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1254	u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
1255	if (compl_tag & GVE_ALT_MISS_COMPL_BIT) {
1256	compl_tag &= ~GVE_ALT_MISS_COMPL_BIT;
1257	gve_handle_miss_completion(priv, tx, compl_tag,
1258	bytes: &miss_compl_bytes,
1259	pkts: &miss_compl_pkts);
1260	} else {
1261	gve_handle_packet_completion(priv, tx, is_napi: !!napi,
1262	compl_tag,
1263	bytes: &pkt_compl_bytes,
1264	pkts: &pkt_compl_pkts,
1265	is_reinjection: false);
1266	}
1267	} else if (type == GVE_COMPL_TYPE_DQO_MISS) {
1268	u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
1269
1270	gve_handle_miss_completion(priv, tx, compl_tag,
1271	bytes: &miss_compl_bytes,
1272	pkts: &miss_compl_pkts);
1273	} else if (type == GVE_COMPL_TYPE_DQO_REINJECTION) {
1274	u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
1275
1276	gve_handle_packet_completion(priv, tx, is_napi: !!napi,
1277	compl_tag,
1278	bytes: &reinject_compl_bytes,
1279	pkts: &reinject_compl_pkts,
1280	is_reinjection: true);
1281	}
1282
1283	tx->dqo_compl.head =
1284	(tx->dqo_compl.head + `1`) & tx->dqo.complq_mask;
1285	/ Flip the generation bit when we wrap around /
1286	tx->dqo_compl.cur_gen_bit ^= tx->dqo_compl.head == `0`;
1287	num_descs_cleaned++;
1288	}
1289
1290	netdev_tx_completed_queue(dev_queue: tx->netdev_txq,
1291	pkts: pkt_compl_pkts + miss_compl_pkts,
1292	bytes: pkt_compl_bytes + miss_compl_bytes);
1293
1294	remove_miss_completions(priv, tx);
1295	remove_timed_out_completions(priv, tx);
1296
1297	u64_stats_update_begin(syncp: &tx->statss);
1298	tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes;
1299	tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts;
1300	u64_stats_update_end(syncp: &tx->statss);
1301	return num_descs_cleaned;
1302	}
1303
1304	bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
1305	{
1306	struct gve_tx_compl_desc *compl_desc;
1307	struct gve_tx_ring *tx = block->tx;
1308	struct gve_priv *priv = block->priv;
1309
1310	if (do_clean) {
1311	int num_descs_cleaned = gve_clean_tx_done_dqo(priv, tx,
1312	napi: &block->napi);
1313
1314	/ Sync with queue being stopped in `gve_maybe_stop_tx_dqo()` /
1315	mb();
1316
1317	if (netif_tx_queue_stopped(dev_queue: tx->netdev_txq) &&
1318	num_descs_cleaned > `0`) {
1319	tx->wake_queue++;
1320	netif_tx_wake_queue(dev_queue: tx->netdev_txq);
1321	}
1322	}
1323
1324	/ Return true if we still have work. /
1325	compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
1326	return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
1327	}
1328

Provided by KDAB

Definitions

gve_has_free_tx_qpl_bufs
gve_alloc_tx_qpl_buf
gve_free_tx_qpl_bufs
gve_has_pending_packet
gve_alloc_pending_packet
gve_free_pending_packet
gve_tx_clean_pending_packets
gve_tx_stop_ring_dqo
gve_tx_free_ring_dqo
gve_tx_qpl_buf_init
gve_tx_start_ring_dqo
gve_tx_alloc_ring_dqo
gve_tx_alloc_rings_dqo
gve_tx_free_rings_dqo
num_avail_tx_slots
gve_has_avail_slots_tx_dqo
gve_maybe_stop_tx_dqo
gve_extract_tx_metadata_dqo
gve_tx_fill_pkt_desc_dqo
gve_prep_tso
gve_tx_fill_tso_ctx_desc
gve_tx_fill_general_ctx_desc
gve_tx_add_skb_no_copy_dqo
gve_tx_buf_get_addr
gve_tx_add_skb_copy_dqo
gve_tx_add_skb_dqo
gve_num_descs_per_buf
gve_num_buffer_descs_needed
gve_can_send_tso
gve_features_check_dqo
gve_try_tx_skb
gve_tx_dqo
add_to_list
remove_from_list
gve_unmap_packet
gve_handle_packet_completion
gve_handle_miss_completion
remove_miss_completions
remove_timed_out_completions
gve_clean_tx_done_dqo

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/drivers/net/ethernet/google/gve/gve_tx_dqo.c