sge.c source code [linux/drivers/net/ethernet/chelsio/cxgb3/sge.c]

1	/*
2	* Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
3	*
4	* This software is available to you under a choice of one of two
5	* licenses. You may choose to be licensed under the terms of the GNU
6	* General Public License (GPL) Version 2, available from the file
7	* COPYING in the main directory of this source tree, or the
8	* OpenIB.org BSD license below:
9	*
10	* Redistribution and use in source and binary forms, with or
11	* without modification, are permitted provided that the following
12	* conditions are met:
13	*
14	* - Redistributions of source code must retain the above
15	* copyright notice, this list of conditions and the following
16	* disclaimer.
17	*
18	* - Redistributions in binary form must reproduce the above
19	* copyright notice, this list of conditions and the following
20	* disclaimer in the documentation and/or other materials
21	* provided with the distribution.
22	*
23	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30	* SOFTWARE.
31	*/
32	#include <linux/skbuff.h>
33	#include <linux/netdevice.h>
34	#include <linux/etherdevice.h>
35	#include <linux/if_vlan.h>
36	#include <linux/ip.h>
37	#include <linux/tcp.h>
38	#include <linux/dma-mapping.h>
39	#include <linux/slab.h>
40	#include <linux/prefetch.h>
41	#include <net/arp.h>
42	#include "common.h"
43	#include "regs.h"
44	#include "sge_defs.h"
45	#include "t3_cpl.h"
46	#include "firmware_exports.h"
47	#include "cxgb3_offload.h"
48
49	#define USE_GTS 0
50
51	#define SGE_RX_SM_BUF_SIZE 1536
52
53	#define SGE_RX_COPY_THRES 256
54	#define SGE_RX_PULL_LEN 128
55
56	#define SGE_PG_RSVD SMP_CACHE_BYTES
57	/*
58	* Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
59	* It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
60	* directly.
61	*/
62	#define FL0_PG_CHUNK_SIZE 2048
63	#define FL0_PG_ORDER 0
64	#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
65	#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
66	#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
67	#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
68
69	#define SGE_RX_DROP_THRES 16
70	#define RX_RECLAIM_PERIOD (HZ/4)
71
72	/*
73	* Max number of Rx buffers we replenish at a time.
74	*/
75	#define MAX_RX_REFILL 16U
76	/*
77	* Period of the Tx buffer reclaim timer. This timer does not need to run
78	* frequently as Tx buffers are usually reclaimed by new Tx packets.
79	*/
80	#define TX_RECLAIM_PERIOD (HZ / 4)
81	#define TX_RECLAIM_TIMER_CHUNK 64U
82	#define TX_RECLAIM_CHUNK 16U
83
84	/ WR size in bytes /
85	#define WR_LEN (WR_FLITS * 8)
86
87	/*
88	* Types of Tx queues in each queue set. Order here matters, do not change.
89	*/
90	enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
91
92	/ Values for sge_txq.flags /
93	enum {
94	TXQ_RUNNING = `1` << `0`, / fetch engine is running /
95	TXQ_LAST_PKT_DB = `1` << `1`, / last packet rang the doorbell /
96	};
97
98	struct tx_desc {
99	__be64 flit[TX_DESC_FLITS];
100	};
101
102	struct rx_desc {
103	__be32 addr_lo;
104	__be32 len_gen;
105	__be32 gen2;
106	__be32 addr_hi;
107	};
108
109	struct tx_sw_desc { / SW state per Tx descriptor /
110	struct sk_buff *skb;
111	u8 eop; / set if last descriptor for packet /
112	u8 addr_idx; / buffer index of first SGL entry in descriptor /
113	u8 fragidx; / first page fragment associated with descriptor /
114	s8 sflit; / start flit of first SGL entry in descriptor /
115	};
116
117	struct rx_sw_desc { / SW state per Rx descriptor /
118	union {
119	struct sk_buff *skb;
120	struct fl_pg_chunk pg_chunk;
121	};
122	DEFINE_DMA_UNMAP_ADDR(dma_addr);
123	};
124
125	struct rsp_desc { / response queue descriptor /
126	struct rss_header rss_hdr;
127	__be32 flags;
128	__be32 len_cq;
129	struct_group(immediate,
130	u8 imm_data[`47`];
131	u8 intr_gen;
132	);
133	};
134
135	/*
136	* Holds unmapping information for Tx packets that need deferred unmapping.
137	* This structure lives at skb->head and must be allocated by callers.
138	*/
139	struct deferred_unmap_info {
140	struct pci_dev *pdev;
141	dma_addr_t addr[MAX_SKB_FRAGS + `1`];
142	};
143
144	/*
145	* Maps a number of flits to the number of Tx descriptors that can hold them.
146	* The formula is
147	*
148	* desc = 1 + (flits - 2) / (WR_FLITS - 1).
149	*
150	* HW allows up to 4 descriptors to be combined into a WR.
151	*/
152	static u8 flit_desc_map[] = {
153	`0`,
154	#if SGE_NUM_GENBITS == 1
155	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
156	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
157	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
158	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`
159	#elif SGE_NUM_GENBITS == 2
160	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
161	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
162	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
163	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
164	#else
165	# error "SGE_NUM_GENBITS must be 1 or 2"
166	#endif
167	};
168
169	static inline struct sge_qset rspq_to_qset(const* struct sge_rspq *q)
170	{
171	return container_of(q, struct sge_qset, rspq);
172	}
173
174	static inline struct sge_qset txq_to_qset(const* struct sge_txq q, int* qidx)
175	{
176	return container_of(q, struct sge_qset, txq[qidx]);
177	}
178
179	/**
180	* refill_rspq - replenish an SGE response queue
181	* @adapter: the adapter
182	* @q: the response queue to replenish
183	* @credits: how many new responses to make available
184	*
185	* Replenishes a response queue by making the supplied number of responses
186	* available to HW.
187	*/
188	static inline void refill_rspq(struct adapter *adapter,
189	const struct sge_rspq q, unsigned* int credits)
190	{
191	rmb();
192	t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
193	V_RSPQ(q->cntxt_id) \| V_CREDITS(credits));
194	}
195
196	/**
197	* need_skb_unmap - does the platform need unmapping of sk_buffs?
198	*
199	* Returns true if the platform needs sk_buff unmapping. The compiler
200	* optimizes away unnecessary code if this returns true.
201	*/
202	static inline int need_skb_unmap(void)
203	{
204	#ifdef CONFIG_NEED_DMA_MAP_STATE
205	return `1`;
206	#else
207	return `0`;
208	#endif
209	}
210
211	/**
212	* unmap_skb - unmap a packet main body and its page fragments
213	* @skb: the packet
214	* @q: the Tx queue containing Tx descriptors for the packet
215	* @cidx: index of Tx descriptor
216	* @pdev: the PCI device
217	*
218	* Unmap the main body of an sk_buff and its page fragments, if any.
219	* Because of the fairly complicated structure of our SGLs and the desire
220	* to conserve space for metadata, the information necessary to unmap an
221	* sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
222	* descriptors (the physical addresses of the various data buffers), and
223	* the SW descriptor state (assorted indices). The send functions
224	* initialize the indices for the first packet descriptor so we can unmap
225	* the buffers held in the first Tx descriptor here, and we have enough
226	* information at this point to set the state for the next Tx descriptor.
227	*
228	* Note that it is possible to clean up the first descriptor of a packet
229	* before the send routines have written the next descriptors, but this
230	* race does not cause any problem. We just end up writing the unmapping
231	* info for the descriptor first.
232	*/
233	static inline void unmap_skb(struct sk_buff skb, struct* sge_txq *q,
234	unsigned int cidx, struct pci_dev *pdev)
235	{
236	const struct sg_ent *sgp;
237	struct tx_sw_desc *d = &q->sdesc[cidx];
238	int nfrags, frag_idx, curflit, j = d->addr_idx;
239
240	sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
241	frag_idx = d->fragidx;
242
243	if (frag_idx == `0` && skb_headlen(skb)) {
244	dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[`0`]),
245	skb_headlen(skb), DMA_TO_DEVICE);
246	j = `1`;
247	}
248
249	curflit = d->sflit + `1` + j;
250	nfrags = skb_shinfo(skb)->nr_frags;
251
252	while (frag_idx < nfrags && curflit < WR_FLITS) {
253	dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
254	skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
255	DMA_TO_DEVICE);
256	j ^= `1`;
257	if (j == `0`) {
258	sgp++;
259	curflit++;
260	}
261	curflit++;
262	frag_idx++;
263	}
264
265	if (frag_idx < nfrags) { / SGL continues into next Tx descriptor /
266	d = cidx + `1` == q->size ? q->sdesc : d + `1`;
267	d->fragidx = frag_idx;
268	d->addr_idx = j;
269	d->sflit = curflit - WR_FLITS - j; / sflit can be -1 /
270	}
271	}
272
273	/**
274	* free_tx_desc - reclaims Tx descriptors and their buffers
275	* @adapter: the adapter
276	* @q: the Tx queue to reclaim descriptors from
277	* @n: the number of descriptors to reclaim
278	*
279	* Reclaims Tx descriptors from an SGE Tx queue and frees the associated
280	* Tx buffers. Called with the Tx queue lock held.
281	*/
282	static void free_tx_desc(struct adapter adapter, struct* sge_txq *q,
283	unsigned int n)
284	{
285	struct tx_sw_desc *d;
286	struct pci_dev *pdev = adapter->pdev;
287	unsigned int cidx = q->cidx;
288
289	const int need_unmap = need_skb_unmap() &&
290	q->cntxt_id >= FW_TUNNEL_SGEEC_START;
291
292	d = &q->sdesc[cidx];
293	while (n--) {
294	if (d->skb) { / an SGL is present /
295	if (need_unmap)
296	unmap_skb(skb: d->skb, q, cidx, pdev);
297	if (d->eop) {
298	dev_consume_skb_any(skb: d->skb);
299	d->skb = NULL;
300	}
301	}
302	++d;
303	if (++cidx == q->size) {
304	cidx = `0`;
305	d = q->sdesc;
306	}
307	}
308	q->cidx = cidx;
309	}
310
311	/**
312	* reclaim_completed_tx - reclaims completed Tx descriptors
313	* @adapter: the adapter
314	* @q: the Tx queue to reclaim completed descriptors from
315	* @chunk: maximum number of descriptors to reclaim
316	*
317	* Reclaims Tx descriptors that the SGE has indicated it has processed,
318	* and frees the associated buffers if possible. Called with the Tx
319	* queue's lock held.
320	*/
321	static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
322	struct sge_txq *q,
323	unsigned int chunk)
324	{
325	unsigned int reclaim = q->processed - q->cleaned;
326
327	reclaim = min(chunk, reclaim);
328	if (reclaim) {
329	free_tx_desc(adapter, q, n: reclaim);
330	q->cleaned += reclaim;
331	q->in_use -= reclaim;
332	}
333	return q->processed - q->cleaned;
334	}
335
336	/**
337	* should_restart_tx - are there enough resources to restart a Tx queue?
338	* @q: the Tx queue
339	*
340	* Checks if there are enough descriptors to restart a suspended Tx queue.
341	*/
342	static inline int should_restart_tx(const struct sge_txq *q)
343	{
344	unsigned int r = q->processed - q->cleaned;
345
346	return q->in_use - r < (q->size >> `1`);
347	}
348
349	static void clear_rx_desc(struct pci_dev pdev, const* struct sge_fl *q,
350	struct rx_sw_desc *d)
351	{
352	if (q->use_pages && d->pg_chunk.page) {
353	(*d->pg_chunk.p_cnt)--;
354	if (!*d->pg_chunk.p_cnt)
355	dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
356	q->alloc_size, DMA_FROM_DEVICE);
357
358	put_page(page: d->pg_chunk.page);
359	d->pg_chunk.page = NULL;
360	} else {
361	dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
362	q->buf_size, DMA_FROM_DEVICE);
363	kfree_skb(skb: d->skb);
364	d->skb = NULL;
365	}
366	}
367
368	/**
369	* free_rx_bufs - free the Rx buffers on an SGE free list
370	* @pdev: the PCI device associated with the adapter
371	* @q: the SGE free list to clean up
372	*
373	* Release the buffers on an SGE free-buffer Rx queue. HW fetching from
374	* this queue should be stopped before calling this function.
375	*/
376	static void free_rx_bufs(struct pci_dev pdev, struct* sge_fl *q)
377	{
378	unsigned int cidx = q->cidx;
379
380	while (q->credits--) {
381	struct rx_sw_desc *d = &q->sdesc[cidx];
382
383
384	clear_rx_desc(pdev, q, d);
385	if (++cidx == q->size)
386	cidx = `0`;
387	}
388
389	if (q->pg_chunk.page) {
390	__free_pages(page: q->pg_chunk.page, order: q->order);
391	q->pg_chunk.page = NULL;
392	}
393	}
394
395	/**
396	* add_one_rx_buf - add a packet buffer to a free-buffer list
397	* @va: buffer start VA
398	* @len: the buffer length
399	* @d: the HW Rx descriptor to write
400	* @sd: the SW Rx descriptor to write
401	* @gen: the generation bit value
402	* @pdev: the PCI device associated with the adapter
403	*
404	* Add a buffer of the given length to the supplied HW and SW Rx
405	* descriptors.
406	*/
407	static inline int add_one_rx_buf(void va, unsigned* int len,
408	struct rx_desc d, struct* rx_sw_desc *sd,
409	unsigned int gen, struct pci_dev *pdev)
410	{
411	dma_addr_t mapping;
412
413	mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
414	if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
415	return -ENOMEM;
416
417	dma_unmap_addr_set(sd, dma_addr, mapping);
418
419	d->addr_lo = cpu_to_be32(mapping);
420	d->addr_hi = cpu_to_be32((u64) mapping >> `32`);
421	dma_wmb();
422	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
423	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
424	return `0`;
425	}
426
427	static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
428	unsigned int gen)
429	{
430	d->addr_lo = cpu_to_be32(mapping);
431	d->addr_hi = cpu_to_be32((u64) mapping >> `32`);
432	dma_wmb();
433	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
434	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
435	return `0`;
436	}
437
438	static int alloc_pg_chunk(struct adapter adapter, struct* sge_fl *q,
439	struct rx_sw_desc *sd, gfp_t gfp,
440	unsigned int order)
441	{
442	if (!q->pg_chunk.page) {
443	dma_addr_t mapping;
444
445	q->pg_chunk.page = alloc_pages(gfp, order);
446	if (unlikely(!q->pg_chunk.page))
447	return -ENOMEM;
448	q->pg_chunk.va = page_address(q->pg_chunk.page);
449	q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
450	SGE_PG_RSVD;
451	q->pg_chunk.offset = `0`;
452	mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
453	`0`, q->alloc_size, DMA_FROM_DEVICE);
454	if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
455	__free_pages(page: q->pg_chunk.page, order);
456	q->pg_chunk.page = NULL;
457	return -EIO;
458	}
459	q->pg_chunk.mapping = mapping;
460	}
461	sd->pg_chunk = q->pg_chunk;
462
463	prefetch(sd->pg_chunk.p_cnt);
464
465	q->pg_chunk.offset += q->buf_size;
466	if (q->pg_chunk.offset == (PAGE_SIZE << order))
467	q->pg_chunk.page = NULL;
468	else {
469	q->pg_chunk.va += q->buf_size;
470	get_page(page: q->pg_chunk.page);
471	}
472
473	if (sd->pg_chunk.offset == `0`)
474	*sd->pg_chunk.p_cnt = `1`;
475	else
476	*sd->pg_chunk.p_cnt += `1`;
477
478	return `0`;
479	}
480
481	static inline void ring_fl_db(struct adapter adap, struct* sge_fl *q)
482	{
483	if (q->pend_cred >= q->credits / `4`) {
484	q->pend_cred = `0`;
485	wmb();
486	t3_write_reg(adapter: adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
487	}
488	}
489
490	/**
491	* refill_fl - refill an SGE free-buffer list
492	* @adap: the adapter
493	* @q: the free-list to refill
494	* @n: the number of new buffers to allocate
495	* @gfp: the gfp flags for allocating new buffers
496	*
497	* (Re)populate an SGE free-buffer list with up to @n new packet buffers,
498	* allocated with the supplied gfp flags. The caller must assure that
499	* @n does not exceed the queue's capacity.
500	*/
501	static int refill_fl(struct adapter adap, struct* sge_fl q, int* n, gfp_t gfp)
502	{
503	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
504	struct rx_desc *d = &q->desc[q->pidx];
505	unsigned int count = `0`;
506
507	while (n--) {
508	dma_addr_t mapping;
509	int err;
510
511	if (q->use_pages) {
512	if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
513	q->order))) {
514	nomem: q->alloc_failed++;
515	break;
516	}
517	mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
518	dma_unmap_addr_set(sd, dma_addr, mapping);
519
520	add_one_rx_chunk(mapping, d, gen: q->gen);
521	dma_sync_single_for_device(dev: &adap->pdev->dev, addr: mapping,
522	size: q->buf_size - SGE_PG_RSVD,
523	dir: DMA_FROM_DEVICE);
524	} else {
525	void *buf_start;
526
527	struct sk_buff *skb = alloc_skb(size: q->buf_size, priority: gfp);
528	if (!skb)
529	goto nomem;
530
531	sd->skb = skb;
532	buf_start = skb->data;
533	err = add_one_rx_buf(va: buf_start, len: q->buf_size, d, sd,
534	gen: q->gen, pdev: adap->pdev);
535	if (unlikely(err)) {
536	clear_rx_desc(pdev: adap->pdev, q, d: sd);
537	break;
538	}
539	}
540
541	d++;
542	sd++;
543	if (++q->pidx == q->size) {
544	q->pidx = `0`;
545	q->gen ^= `1`;
546	sd = q->sdesc;
547	d = q->desc;
548	}
549	count++;
550	}
551
552	q->credits += count;
553	q->pend_cred += count;
554	ring_fl_db(adap, q);
555
556	return count;
557	}
558
559	static inline void __refill_fl(struct adapter adap, struct* sge_fl *fl)
560	{
561	refill_fl(adap, q: fl, min(MAX_RX_REFILL, fl->size - fl->credits),
562	GFP_ATOMIC \| __GFP_COMP);
563	}
564
565	/**
566	* recycle_rx_buf - recycle a receive buffer
567	* @adap: the adapter
568	* @q: the SGE free list
569	* @idx: index of buffer to recycle
570	*
571	* Recycles the specified buffer on the given free list by adding it at
572	* the next available slot on the list.
573	*/
574	static void recycle_rx_buf(struct adapter adap, struct* sge_fl *q,
575	unsigned int idx)
576	{
577	struct rx_desc *from = &q->desc[idx];
578	struct rx_desc *to = &q->desc[q->pidx];
579
580	q->sdesc[q->pidx] = q->sdesc[idx];
581	to->addr_lo = from->addr_lo; / already big endian /
582	to->addr_hi = from->addr_hi; / likewise /
583	dma_wmb();
584	to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
585	to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
586
587	if (++q->pidx == q->size) {
588	q->pidx = `0`;
589	q->gen ^= `1`;
590	}
591
592	q->credits++;
593	q->pend_cred++;
594	ring_fl_db(adap, q);
595	}
596
597	/**
598	* alloc_ring - allocate resources for an SGE descriptor ring
599	* @pdev: the PCI device
600	* @nelem: the number of descriptors
601	* @elem_size: the size of each descriptor
602	* @sw_size: the size of the SW state associated with each ring element
603	* @phys: the physical address of the allocated ring
604	* @metadata: address of the array holding the SW state for the ring
605	*
606	* Allocates resources for an SGE descriptor ring, such as Tx queues,
607	* free buffer lists, or response queues. Each SGE ring requires
608	* space for its HW descriptors plus, optionally, space for the SW state
609	* associated with each HW entry (the metadata). The function returns
610	* three values: the virtual address for the HW ring (the return value
611	* of the function), the physical address of the HW ring, and the address
612	* of the SW ring.
613	*/
614	static void alloc_ring(struct* pci_dev *pdev, size_t nelem, size_t elem_size,
615	size_t sw_size, dma_addr_t * phys, void *metadata)
616	{
617	size_t len = nelem * elem_size;
618	void *s = NULL;
619	void *p = dma_alloc_coherent(dev: &pdev->dev, size: len, dma_handle: phys, GFP_KERNEL);
620
621	if (!p)
622	return NULL;
623	if (sw_size && metadata) {
624	s = kcalloc(n: nelem, size: sw_size, GFP_KERNEL);
625
626	if (!s) {
627	dma_free_coherent(dev: &pdev->dev, size: len, cpu_addr: p, dma_handle: *phys);
628	return NULL;
629	}
630	(void* **)metadata = s;
631	}
632	return p;
633	}
634
635	/**
636	* t3_reset_qset - reset a sge qset
637	* @q: the queue set
638	*
639	* Reset the qset structure.
640	* the NAPI structure is preserved in the event of
641	* the qset's reincarnation, for example during EEH recovery.
642	*/
643	static void t3_reset_qset(struct sge_qset *q)
644	{
645	if (q->adap &&
646	!(q->adap->flags & NAPI_INIT)) {
647	memset(q, `0`, sizeof(*q));
648	return;
649	}
650
651	q->adap = NULL;
652	memset(&q->rspq, `0`, sizeof(q->rspq));
653	memset(q->fl, `0`, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
654	memset(q->txq, `0`, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
655	q->txq_stopped = `0`;
656	q->tx_reclaim_timer.function = NULL; / for t3_stop_sge_timers() /
657	q->rx_reclaim_timer.function = NULL;
658	q->nomem = `0`;
659	napi_free_frags(napi: &q->napi);
660	}
661
662
663	/**
664	* t3_free_qset - free the resources of an SGE queue set
665	* @adapter: the adapter owning the queue set
666	* @q: the queue set
667	*
668	* Release the HW and SW resources associated with an SGE queue set, such
669	* as HW contexts, packet buffers, and descriptor rings. Traffic to the
670	* queue set must be quiesced prior to calling this.
671	*/
672	static void t3_free_qset(struct adapter adapter, struct* sge_qset *q)
673	{
674	int i;
675	struct pci_dev *pdev = adapter->pdev;
676
677	for (i = `0`; i < SGE_RXQ_PER_SET; ++i)
678	if (q->fl[i].desc) {
679	spin_lock_irq(lock: &adapter->sge.reg_lock);
680	t3_sge_disable_fl(adapter, id: q->fl[i].cntxt_id);
681	spin_unlock_irq(lock: &adapter->sge.reg_lock);
682	free_rx_bufs(pdev, q: &q->fl[i]);
683	kfree(objp: q->fl[i].sdesc);
684	dma_free_coherent(dev: &pdev->dev,
685	size: q->fl[i].size *
686	sizeof(struct rx_desc), cpu_addr: q->fl[i].desc,
687	dma_handle: q->fl[i].phys_addr);
688	}
689
690	for (i = `0`; i < SGE_TXQ_PER_SET; ++i)
691	if (q->txq[i].desc) {
692	spin_lock_irq(lock: &adapter->sge.reg_lock);
693	t3_sge_enable_ecntxt(adapter, id: q->txq[i].cntxt_id, enable: `0`);
694	spin_unlock_irq(lock: &adapter->sge.reg_lock);
695	if (q->txq[i].sdesc) {
696	free_tx_desc(adapter, q: &q->txq[i],
697	n: q->txq[i].in_use);
698	kfree(objp: q->txq[i].sdesc);
699	}
700	dma_free_coherent(dev: &pdev->dev,
701	size: q->txq[i].size *
702	sizeof(struct tx_desc),
703	cpu_addr: q->txq[i].desc, dma_handle: q->txq[i].phys_addr);
704	__skb_queue_purge(list: &q->txq[i].sendq);
705	}
706
707	if (q->rspq.desc) {
708	spin_lock_irq(lock: &adapter->sge.reg_lock);
709	t3_sge_disable_rspcntxt(adapter, id: q->rspq.cntxt_id);
710	spin_unlock_irq(lock: &adapter->sge.reg_lock);
711	dma_free_coherent(dev: &pdev->dev,
712	size: q->rspq.size * sizeof(struct rsp_desc),
713	cpu_addr: q->rspq.desc, dma_handle: q->rspq.phys_addr);
714	}
715
716	t3_reset_qset(q);
717	}
718
719	/**
720	* init_qset_cntxt - initialize an SGE queue set context info
721	* @qs: the queue set
722	* @id: the queue set id
723	*
724	* Initializes the TIDs and context ids for the queues of a queue set.
725	*/
726	static void init_qset_cntxt(struct sge_qset qs, unsigned* int id)
727	{
728	qs->rspq.cntxt_id = id;
729	qs->fl[`0`].cntxt_id = `2` * id;
730	qs->fl[`1`].cntxt_id = `2` * id + `1`;
731	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
732	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
733	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
734	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
735	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
736	}
737
738	/**
739	* sgl_len - calculates the size of an SGL of the given capacity
740	* @n: the number of SGL entries
741	*
742	* Calculates the number of flits needed for a scatter/gather list that
743	* can hold the given number of entries.
744	*/
745	static inline unsigned int sgl_len(unsigned int n)
746	{
747	/ alternatively: 3 * (n / 2) + 2 * (n & 1) /
748	return (`3` * n) / `2` + (n & `1`);
749	}
750
751	/**
752	* flits_to_desc - returns the num of Tx descriptors for the given flits
753	* @n: the number of flits
754	*
755	* Calculates the number of Tx descriptors needed for the supplied number
756	* of flits.
757	*/
758	static inline unsigned int flits_to_desc(unsigned int n)
759	{
760	BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
761	return flit_desc_map[n];
762	}
763
764	/**
765	* get_packet - return the next ingress packet buffer from a free list
766	* @adap: the adapter that received the packet
767	* @fl: the SGE free list holding the packet
768	* @len: the packet length including any SGE padding
769	* @drop_thres: # of remaining buffers before we start dropping packets
770	*
771	* Get the next packet from a free list and complete setup of the
772	* sk_buff. If the packet is small we make a copy and recycle the
773	* original buffer, otherwise we use the original buffer itself. If a
774	* positive drop threshold is supplied packets are dropped and their
775	* buffers recycled if (a) the number of remaining buffers is under the
776	* threshold and the packet is too big to copy, or (b) the packet should
777	* be copied but there is no memory for the copy.
778	*/
779	static struct sk_buff get_packet(struct* adapter adap, struct* sge_fl *fl,
780	unsigned int len, unsigned int drop_thres)
781	{
782	struct sk_buff *skb = NULL;
783	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
784
785	prefetch(sd->skb->data);
786	fl->credits--;
787
788	if (len <= SGE_RX_COPY_THRES) {
789	skb = alloc_skb(size: len, GFP_ATOMIC);
790	if (likely(skb != NULL)) {
791	__skb_put(skb, len);
792	dma_sync_single_for_cpu(dev: &adap->pdev->dev,
793	dma_unmap_addr(sd, dma_addr),
794	size: len, dir: DMA_FROM_DEVICE);
795	memcpy(skb->data, sd->skb->data, len);
796	dma_sync_single_for_device(dev: &adap->pdev->dev,
797	dma_unmap_addr(sd, dma_addr),
798	size: len, dir: DMA_FROM_DEVICE);
799	} else if (!drop_thres)
800	goto use_orig_buf;
801	recycle:
802	recycle_rx_buf(adap, q: fl, idx: fl->cidx);
803	return skb;
804	}
805
806	if (unlikely(fl->credits < drop_thres) &&
807	refill_fl(adap, q: fl, min(MAX_RX_REFILL, fl->size - fl->credits - `1`),
808	GFP_ATOMIC \| __GFP_COMP) == `0`)
809	goto recycle;
810
811	use_orig_buf:
812	dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
813	fl->buf_size, DMA_FROM_DEVICE);
814	skb = sd->skb;
815	skb_put(skb, len);
816	__refill_fl(adap, fl);
817	return skb;
818	}
819
820	/**
821	* get_packet_pg - return the next ingress packet buffer from a free list
822	* @adap: the adapter that received the packet
823	* @fl: the SGE free list holding the packet
824	* @q: the queue
825	* @len: the packet length including any SGE padding
826	* @drop_thres: # of remaining buffers before we start dropping packets
827	*
828	* Get the next packet from a free list populated with page chunks.
829	* If the packet is small we make a copy and recycle the original buffer,
830	* otherwise we attach the original buffer as a page fragment to a fresh
831	* sk_buff. If a positive drop threshold is supplied packets are dropped
832	* and their buffers recycled if (a) the number of remaining buffers is
833	* under the threshold and the packet is too big to copy, or (b) there's
834	* no system memory.
835	*
836	* Note: this function is similar to @get_packet but deals with Rx buffers
837	* that are page chunks rather than sk_buffs.
838	*/
839	static struct sk_buff get_packet_pg(struct* adapter adap, struct* sge_fl *fl,
840	struct sge_rspq q, unsigned* int len,
841	unsigned int drop_thres)
842	{
843	struct sk_buff newskb, skb;
844	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
845
846	dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr);
847
848	newskb = skb = q->pg_skb;
849	if (!skb && (len <= SGE_RX_COPY_THRES)) {
850	newskb = alloc_skb(size: len, GFP_ATOMIC);
851	if (likely(newskb != NULL)) {
852	__skb_put(skb: newskb, len);
853	dma_sync_single_for_cpu(dev: &adap->pdev->dev, addr: dma_addr,
854	size: len, dir: DMA_FROM_DEVICE);
855	memcpy(newskb->data, sd->pg_chunk.va, len);
856	dma_sync_single_for_device(dev: &adap->pdev->dev, addr: dma_addr,
857	size: len, dir: DMA_FROM_DEVICE);
858	} else if (!drop_thres)
859	return NULL;
860	recycle:
861	fl->credits--;
862	recycle_rx_buf(adap, q: fl, idx: fl->cidx);
863	q->rx_recycle_buf++;
864	return newskb;
865	}
866
867	if (unlikely(q->rx_recycle_buf \|\| (!skb && fl->credits <= drop_thres)))
868	goto recycle;
869
870	prefetch(sd->pg_chunk.p_cnt);
871
872	if (!skb)
873	newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
874
875	if (unlikely(!newskb)) {
876	if (!drop_thres)
877	return NULL;
878	goto recycle;
879	}
880
881	dma_sync_single_for_cpu(dev: &adap->pdev->dev, addr: dma_addr, size: len,
882	dir: DMA_FROM_DEVICE);
883	(*sd->pg_chunk.p_cnt)--;
884	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
885	dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
886	fl->alloc_size, DMA_FROM_DEVICE);
887	if (!skb) {
888	__skb_put(skb: newskb, SGE_RX_PULL_LEN);
889	memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
890	skb_fill_page_desc(skb: newskb, i: `0`, page: sd->pg_chunk.page,
891	off: sd->pg_chunk.offset + SGE_RX_PULL_LEN,
892	size: len - SGE_RX_PULL_LEN);
893	newskb->len = len;
894	newskb->data_len = len - SGE_RX_PULL_LEN;
895	newskb->truesize += newskb->data_len;
896	} else {
897	skb_fill_page_desc(skb: newskb, skb_shinfo(newskb)->nr_frags,
898	page: sd->pg_chunk.page,
899	off: sd->pg_chunk.offset, size: len);
900	newskb->len += len;
901	newskb->data_len += len;
902	newskb->truesize += len;
903	}
904
905	fl->credits--;
906	/*
907	* We do not refill FLs here, we let the caller do it to overlap a
908	* prefetch.
909	*/
910	return newskb;
911	}
912
913	/**
914	* get_imm_packet - return the next ingress packet buffer from a response
915	* @resp: the response descriptor containing the packet data
916	*
917	* Return a packet containing the immediate data of the given response.
918	*/
919	static inline struct sk_buff get_imm_packet(const* struct rsp_desc *resp)
920	{
921	struct sk_buff *skb = alloc_skb(size: IMMED_PKT_SIZE, GFP_ATOMIC);
922
923	if (skb) {
924	__skb_put(skb, len: IMMED_PKT_SIZE);
925	BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate));
926	skb_copy_to_linear_data(skb, from: &resp->immediate, len: IMMED_PKT_SIZE);
927	}
928	return skb;
929	}
930
931	/**
932	* calc_tx_descs - calculate the number of Tx descriptors for a packet
933	* @skb: the packet
934	*
935	* Returns the number of Tx descriptors needed for the given Ethernet
936	* packet. Ethernet packets require addition of WR and CPL headers.
937	*/
938	static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
939	{
940	unsigned int flits;
941
942	if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
943	return `1`;
944
945	flits = sgl_len(skb_shinfo(skb)->nr_frags + `1`) + `2`;
946	if (skb_shinfo(skb)->gso_size)
947	flits++;
948	return flits_to_desc(n: flits);
949	}
950
951	/ map_skb - map a packet main body and its page fragments*
952	* @pdev: the PCI device
953	* @skb: the packet
954	* @addr: placeholder to save the mapped addresses
955	*
956	* map the main body of an sk_buff and its page fragments, if any.
957	*/
958	static int map_skb(struct pci_dev pdev, const* struct sk_buff *skb,
959	dma_addr_t *addr)
960	{
961	const skb_frag_t fp, end;
962	const struct skb_shared_info *si;
963
964	if (skb_headlen(skb)) {
965	*addr = dma_map_single(&pdev->dev, skb->data,
966	skb_headlen(skb), DMA_TO_DEVICE);
967	if (dma_mapping_error(dev: &pdev->dev, dma_addr: *addr))
968	goto out_err;
969	addr++;
970	}
971
972	si = skb_shinfo(skb);
973	end = &si->frags[si->nr_frags];
974
975	for (fp = si->frags; fp < end; fp++) {
976	*addr = skb_frag_dma_map(dev: &pdev->dev, frag: fp, offset: `0`, size: skb_frag_size(frag: fp),
977	dir: DMA_TO_DEVICE);
978	if (dma_mapping_error(dev: &pdev->dev, dma_addr: *addr))
979	goto unwind;
980	addr++;
981	}
982	return `0`;
983
984	unwind:
985	while (fp-- > si->frags)
986	dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
987	DMA_TO_DEVICE);
988
989	dma_unmap_single(&pdev->dev, addr[-`1`], skb_headlen(skb),
990	DMA_TO_DEVICE);
991	out_err:
992	return -ENOMEM;
993	}
994
995	/**
996	* write_sgl - populate a scatter/gather list for a packet
997	* @skb: the packet
998	* @sgp: the SGL to populate
999	* @start: start address of skb main body data to include in the SGL
1000	* @len: length of skb main body data to include in the SGL
1001	* @addr: the list of the mapped addresses
1002	*
1003	* Copies the scatter/gather list for the buffers that make up a packet
1004	* and returns the SGL size in 8-byte words. The caller must size the SGL
1005	* appropriately.
1006	*/
1007	static inline unsigned int write_sgl(const struct sk_buff *skb,
1008	struct sg_ent sgp, unsigned* char *start,
1009	unsigned int len, const dma_addr_t *addr)
1010	{
1011	unsigned int i, j = `0`, k = `0`, nfrags;
1012
1013	if (len) {
1014	sgp->len[`0`] = cpu_to_be32(len);
1015	sgp->addr[j++] = cpu_to_be64(addr[k++]);
1016	}
1017
1018	nfrags = skb_shinfo(skb)->nr_frags;
1019	for (i = `0`; i < nfrags; i++) {
1020	const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1021
1022	sgp->len[j] = cpu_to_be32(skb_frag_size(frag));
1023	sgp->addr[j] = cpu_to_be64(addr[k++]);
1024	j ^= `1`;
1025	if (j == `0`)
1026	++sgp;
1027	}
1028	if (j)
1029	sgp->len[j] = `0`;
1030	return ((nfrags + (len != `0`)) * `3`) / `2` + j;
1031	}
1032
1033	/**
1034	* check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1035	* @adap: the adapter
1036	* @q: the Tx queue
1037	*
1038	* Ring the doorbel if a Tx queue is asleep. There is a natural race,
1039	* where the HW is going to sleep just after we checked, however,
1040	* then the interrupt handler will detect the outstanding TX packet
1041	* and ring the doorbell for us.
1042	*
1043	* When GTS is disabled we unconditionally ring the doorbell.
1044	*/
1045	static inline void check_ring_tx_db(struct adapter adap, struct* sge_txq *q)
1046	{
1047	#if USE_GTS
1048	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1049	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == `0`) {
1050	set_bit(TXQ_LAST_PKT_DB, &q->flags);
1051	t3_write_reg(adap, A_SG_KDOORBELL,
1052	F_SELEGRCNTX \| V_EGRCNTX(q->cntxt_id));
1053	}
1054	#else
1055	wmb(); / write descriptors before telling HW /
1056	t3_write_reg(adapter: adap, A_SG_KDOORBELL,
1057	F_SELEGRCNTX \| V_EGRCNTX(q->cntxt_id));
1058	#endif
1059	}
1060
1061	static inline void wr_gen2(struct tx_desc d, unsigned* int gen)
1062	{
1063	#if SGE_NUM_GENBITS == 2
1064	d->flit[TX_DESC_FLITS - `1`] = cpu_to_be64(gen);
1065	#endif
1066	}
1067
1068	/**
1069	* write_wr_hdr_sgl - write a WR header and, optionally, SGL
1070	* @ndesc: number of Tx descriptors spanned by the SGL
1071	* @skb: the packet corresponding to the WR
1072	* @d: first Tx descriptor to be written
1073	* @pidx: index of above descriptors
1074	* @q: the SGE Tx queue
1075	* @sgl: the SGL
1076	* @flits: number of flits to the start of the SGL in the first descriptor
1077	* @sgl_flits: the SGL size in flits
1078	* @gen: the Tx descriptor generation
1079	* @wr_hi: top 32 bits of WR header based on WR type (big endian)
1080	* @wr_lo: low 32 bits of WR header based on WR type (big endian)
1081	*
1082	* Write a work request header and an associated SGL. If the SGL is
1083	* small enough to fit into one Tx descriptor it has already been written
1084	* and we just need to write the WR header. Otherwise we distribute the
1085	* SGL across the number of descriptors it spans.
1086	*/
1087	static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1088	struct tx_desc d, unsigned* int pidx,
1089	const struct sge_txq *q,
1090	const struct sg_ent *sgl,
1091	unsigned int flits, unsigned int sgl_flits,
1092	unsigned int gen, __be32 wr_hi,
1093	__be32 wr_lo)
1094	{
1095	struct work_request_hdr wrp = (struct* work_request_hdr *)d;
1096	struct tx_sw_desc *sd = &q->sdesc[pidx];
1097
1098	sd->skb = skb;
1099	if (need_skb_unmap()) {
1100	sd->fragidx = `0`;
1101	sd->addr_idx = `0`;
1102	sd->sflit = flits;
1103	}
1104
1105	if (likely(ndesc == `1`)) {
1106	sd->eop = `1`;
1107	wrp->wr_hi = htonl(F_WR_SOP \| F_WR_EOP \| V_WR_DATATYPE(`1`) \|
1108	V_WR_SGLSFLT(flits)) \| wr_hi;
1109	dma_wmb();
1110	wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) \|
1111	V_WR_GEN(gen)) \| wr_lo;
1112	wr_gen2(d, gen);
1113	} else {
1114	unsigned int ogen = gen;
1115	const u64 fp = (const* u64 *)sgl;
1116	struct work_request_hdr *wp = wrp;
1117
1118	wrp->wr_hi = htonl(F_WR_SOP \| V_WR_DATATYPE(`1`) \|
1119	V_WR_SGLSFLT(flits)) \| wr_hi;
1120
1121	while (sgl_flits) {
1122	unsigned int avail = WR_FLITS - flits;
1123
1124	if (avail > sgl_flits)
1125	avail = sgl_flits;
1126	memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1127	sgl_flits -= avail;
1128	ndesc--;
1129	if (!sgl_flits)
1130	break;
1131
1132	fp += avail;
1133	d++;
1134	sd->eop = `0`;
1135	sd++;
1136	if (++pidx == q->size) {
1137	pidx = `0`;
1138	gen ^= `1`;
1139	d = q->desc;
1140	sd = q->sdesc;
1141	}
1142
1143	sd->skb = skb;
1144	wrp = (struct work_request_hdr *)d;
1145	wrp->wr_hi = htonl(V_WR_DATATYPE(`1`) \|
1146	V_WR_SGLSFLT(`1`)) \| wr_hi;
1147	wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1148	sgl_flits + `1`)) \|
1149	V_WR_GEN(gen)) \| wr_lo;
1150	wr_gen2(d, gen);
1151	flits = `1`;
1152	}
1153	sd->eop = `1`;
1154	wrp->wr_hi \|= htonl(F_WR_EOP);
1155	dma_wmb();
1156	wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) \| V_WR_GEN(ogen)) \| wr_lo;
1157	wr_gen2(d: (struct tx_desc *)wp, gen: ogen);
1158	WARN_ON(ndesc != `0`);
1159	}
1160	}
1161
1162	/**
1163	* write_tx_pkt_wr - write a TX_PKT work request
1164	* @adap: the adapter
1165	* @skb: the packet to send
1166	* @pi: the egress interface
1167	* @pidx: index of the first Tx descriptor to write
1168	* @gen: the generation value to use
1169	* @q: the Tx queue
1170	* @ndesc: number of descriptors the packet will occupy
1171	* @compl: the value of the COMPL bit to use
1172	* @addr: address
1173	*
1174	* Generate a TX_PKT work request to send the supplied packet.
1175	*/
1176	static void write_tx_pkt_wr(struct adapter adap, struct* sk_buff *skb,
1177	const struct port_info *pi,
1178	unsigned int pidx, unsigned int gen,
1179	struct sge_txq q, unsigned* int ndesc,
1180	unsigned int compl, const dma_addr_t *addr)
1181	{
1182	unsigned int flits, sgl_flits, cntrl, tso_info;
1183	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / `2` + `1`];
1184	struct tx_desc *d = &q->desc[pidx];
1185	struct cpl_tx_pkt cpl = (struct* cpl_tx_pkt *)d;
1186
1187	cpl->len = htonl(skb->len);
1188	cntrl = V_TXPKT_INTF(pi->port_id);
1189
1190	if (skb_vlan_tag_present(skb))
1191	cntrl \|= F_TXPKT_VLAN_VLD \| V_TXPKT_VLAN(skb_vlan_tag_get(skb));
1192
1193	tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1194	if (tso_info) {
1195	int eth_type;
1196	struct cpl_tx_pkt_lso hdr = (struct* cpl_tx_pkt_lso *)cpl;
1197
1198	d->flit[`2`] = `0`;
1199	cntrl \|= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1200	hdr->cntrl = htonl(cntrl);
1201	eth_type = skb_network_offset(skb) == ETH_HLEN ?
1202	CPL_ETH_II : CPL_ETH_II_VLAN;
1203	tso_info \|= V_LSO_ETH_TYPE(eth_type) \|
1204	V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) \|
1205	V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1206	hdr->lso_info = htonl(tso_info);
1207	flits = `3`;
1208	} else {
1209	cntrl \|= V_TXPKT_OPCODE(CPL_TX_PKT);
1210	cntrl \|= F_TXPKT_IPCSUM_DIS; / SW calculates IP csum /
1211	cntrl \|= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1212	cpl->cntrl = htonl(cntrl);
1213
1214	if (skb->len <= WR_LEN - sizeof(*cpl)) {
1215	q->sdesc[pidx].skb = NULL;
1216	if (!skb->data_len)
1217	skb_copy_from_linear_data(skb, to: &d->flit[`2`],
1218	len: skb->len);
1219	else
1220	skb_copy_bits(skb, offset: `0`, to: &d->flit[`2`], len: skb->len);
1221
1222	flits = (skb->len + `7`) / `8` + `2`;
1223	cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & `7`) \|
1224	V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1225	\| F_WR_SOP \| F_WR_EOP \| compl);
1226	dma_wmb();
1227	cpl->wr.wr_lo = htonl(V_WR_LEN(flits) \| V_WR_GEN(gen) \|
1228	V_WR_TID(q->token));
1229	wr_gen2(d, gen);
1230	dev_consume_skb_any(skb);
1231	return;
1232	}
1233
1234	flits = `2`;
1235	}
1236
1237	sgp = ndesc == `1` ? (struct sg_ent *)&d->flit[flits] : sgl;
1238	sgl_flits = write_sgl(skb, sgp, start: skb->data, len: skb_headlen(skb), addr);
1239
1240	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1241	htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) \| compl),
1242	htonl(V_WR_TID(q->token)));
1243	}
1244
1245	static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1246	struct sge_qset qs, struct* sge_txq *q)
1247	{
1248	netif_tx_stop_queue(dev_queue: txq);
1249	set_bit(nr: TXQ_ETH, addr: &qs->txq_stopped);
1250	q->stops++;
1251	}
1252
1253	/**
1254	* t3_eth_xmit - add a packet to the Ethernet Tx queue
1255	* @skb: the packet
1256	* @dev: the egress net device
1257	*
1258	* Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1259	*/
1260	netdev_tx_t t3_eth_xmit(struct sk_buff skb, struct* net_device *dev)
1261	{
1262	int qidx;
1263	unsigned int ndesc, pidx, credits, gen, compl;
1264	const struct port_info *pi = netdev_priv(dev);
1265	struct adapter *adap = pi->adapter;
1266	struct netdev_queue *txq;
1267	struct sge_qset *qs;
1268	struct sge_txq *q;
1269	dma_addr_t addr[MAX_SKB_FRAGS + `1`];
1270
1271	/*
1272	* The chip min packet length is 9 octets but play safe and reject
1273	* anything shorter than an Ethernet header.
1274	*/
1275	if (unlikely(skb->len < ETH_HLEN)) {
1276	dev_kfree_skb_any(skb);
1277	return NETDEV_TX_OK;
1278	}
1279
1280	qidx = skb_get_queue_mapping(skb);
1281	qs = &pi->qs[qidx];
1282	q = &qs->txq[TXQ_ETH];
1283	txq = netdev_get_tx_queue(dev, index: qidx);
1284
1285	reclaim_completed_tx(adapter: adap, q, TX_RECLAIM_CHUNK);
1286
1287	credits = q->size - q->in_use;
1288	ndesc = calc_tx_descs(skb);
1289
1290	if (unlikely(credits < ndesc)) {
1291	t3_stop_tx_queue(txq, qs, q);
1292	dev_err(&adap->pdev->dev,
1293	"%s: Tx ring %u full while queue awake!\n",
1294	dev->name, q->cntxt_id & `7`);
1295	return NETDEV_TX_BUSY;
1296	}
1297
1298	/ Check if ethernet packet can't be sent as immediate data /
1299	if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) {
1300	if (unlikely(map_skb(adap->pdev, skb, addr) < `0`)) {
1301	dev_kfree_skb(skb);
1302	return NETDEV_TX_OK;
1303	}
1304	}
1305
1306	q->in_use += ndesc;
1307	if (unlikely(credits - ndesc < q->stop_thres)) {
1308	t3_stop_tx_queue(txq, qs, q);
1309
1310	if (should_restart_tx(q) &&
1311	test_and_clear_bit(nr: TXQ_ETH, addr: &qs->txq_stopped)) {
1312	q->restarts++;
1313	netif_tx_start_queue(dev_queue: txq);
1314	}
1315	}
1316
1317	gen = q->gen;
1318	q->unacked += ndesc;
1319	compl = (q->unacked & `8`) << (S_WR_COMPL - `3`);
1320	q->unacked &= `7`;
1321	pidx = q->pidx;
1322	q->pidx += ndesc;
1323	if (q->pidx >= q->size) {
1324	q->pidx -= q->size;
1325	q->gen ^= `1`;
1326	}
1327
1328	/ update port statistics /
1329	if (skb->ip_summed == CHECKSUM_PARTIAL)
1330	qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1331	if (skb_shinfo(skb)->gso_size)
1332	qs->port_stats[SGE_PSTAT_TSO]++;
1333	if (skb_vlan_tag_present(skb))
1334	qs->port_stats[SGE_PSTAT_VLANINS]++;
1335
1336	/*
1337	* We do not use Tx completion interrupts to free DMAd Tx packets.
1338	* This is good for performance but means that we rely on new Tx
1339	* packets arriving to run the destructors of completed packets,
1340	* which open up space in their sockets' send queues. Sometimes
1341	* we do not get such new packets causing Tx to stall. A single
1342	* UDP transmitter is a good example of this situation. We have
1343	* a clean up timer that periodically reclaims completed packets
1344	* but it doesn't run often enough (nor do we want it to) to prevent
1345	* lengthy stalls. A solution to this problem is to run the
1346	* destructor early, after the packet is queued but before it's DMAd.
1347	* A cons is that we lie to socket memory accounting, but the amount
1348	* of extra memory is reasonable (limited by the number of Tx
1349	* descriptors), the packets do actually get freed quickly by new
1350	* packets almost always, and for protocols like TCP that wait for
1351	* acks to really free up the data the extra memory is even less.
1352	* On the positive side we run the destructors on the sending CPU
1353	* rather than on a potentially different completing CPU, usually a
1354	* good thing. We also run them without holding our Tx queue lock,
1355	* unlike what reclaim_completed_tx() would otherwise do.
1356	*
1357	* Run the destructor before telling the DMA engine about the packet
1358	* to make sure it doesn't complete and get freed prematurely.
1359	*/
1360	if (likely(!skb_shared(skb)))
1361	skb_orphan(skb);
1362
1363	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr);
1364	check_ring_tx_db(adap, q);
1365	return NETDEV_TX_OK;
1366	}
1367
1368	/**
1369	* write_imm - write a packet into a Tx descriptor as immediate data
1370	* @d: the Tx descriptor to write
1371	* @skb: the packet
1372	* @len: the length of packet data to write as immediate data
1373	* @gen: the generation bit value to write
1374	*
1375	* Writes a packet as immediate data into a Tx descriptor. The packet
1376	* contains a work request at its beginning. We must write the packet
1377	* carefully so the SGE doesn't read it accidentally before it's written
1378	* in its entirety.
1379	*/
1380	static inline void write_imm(struct tx_desc d, struct* sk_buff *skb,
1381	unsigned int len, unsigned int gen)
1382	{
1383	struct work_request_hdr from = (struct* work_request_hdr *)skb->data;
1384	struct work_request_hdr to = (struct* work_request_hdr *)d;
1385
1386	if (likely(!skb->data_len))
1387	memcpy(&to[`1`], &from[`1`], len - sizeof(*from));
1388	else
1389	skb_copy_bits(skb, offset: sizeof(from), to: &to[`1`], len: len - sizeof(from));
1390
1391	to->wr_hi = from->wr_hi \| htonl(F_WR_SOP \| F_WR_EOP \|
1392	V_WR_BCNTLFLT(len & `7`));
1393	dma_wmb();
1394	to->wr_lo = from->wr_lo \| htonl(V_WR_GEN(gen) \|
1395	V_WR_LEN((len + `7`) / `8`));
1396	wr_gen2(d, gen);
1397	kfree_skb(skb);
1398	}
1399
1400	/**
1401	* check_desc_avail - check descriptor availability on a send queue
1402	* @adap: the adapter
1403	* @q: the send queue
1404	* @skb: the packet needing the descriptors
1405	* @ndesc: the number of Tx descriptors needed
1406	* @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1407	*
1408	* Checks if the requested number of Tx descriptors is available on an
1409	* SGE send queue. If the queue is already suspended or not enough
1410	* descriptors are available the packet is queued for later transmission.
1411	* Must be called with the Tx queue locked.
1412	*
1413	* Returns 0 if enough descriptors are available, 1 if there aren't
1414	* enough descriptors and the packet has been queued, and 2 if the caller
1415	* needs to retry because there weren't enough descriptors at the
1416	* beginning of the call but some freed up in the mean time.
1417	*/
1418	static inline int check_desc_avail(struct adapter adap, struct* sge_txq *q,
1419	struct sk_buff skb, unsigned* int ndesc,
1420	unsigned int qid)
1421	{
1422	if (unlikely(!skb_queue_empty(&q->sendq))) {
1423	addq_exit:__skb_queue_tail(list: &q->sendq, newsk: skb);
1424	return `1`;
1425	}
1426	if (unlikely(q->size - q->in_use < ndesc)) {
1427	struct sge_qset *qs = txq_to_qset(q, qidx: qid);
1428
1429	set_bit(nr: qid, addr: &qs->txq_stopped);
1430	smp_mb__after_atomic();
1431
1432	if (should_restart_tx(q) &&
1433	test_and_clear_bit(nr: qid, addr: &qs->txq_stopped))
1434	return `2`;
1435
1436	q->stops++;
1437	goto addq_exit;
1438	}
1439	return `0`;
1440	}
1441
1442	/**
1443	* reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1444	* @q: the SGE control Tx queue
1445	*
1446	* This is a variant of reclaim_completed_tx() that is used for Tx queues
1447	* that send only immediate data (presently just the control queues) and
1448	* thus do not have any sk_buffs to release.
1449	*/
1450	static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1451	{
1452	unsigned int reclaim = q->processed - q->cleaned;
1453
1454	q->in_use -= reclaim;
1455	q->cleaned += reclaim;
1456	}
1457
1458	static inline int immediate(const struct sk_buff *skb)
1459	{
1460	return skb->len <= WR_LEN;
1461	}
1462
1463	/**
1464	* ctrl_xmit - send a packet through an SGE control Tx queue
1465	* @adap: the adapter
1466	* @q: the control queue
1467	* @skb: the packet
1468	*
1469	* Send a packet through an SGE control Tx queue. Packets sent through
1470	* a control queue must fit entirely as immediate data in a single Tx
1471	* descriptor and have no page fragments.
1472	*/
1473	static int ctrl_xmit(struct adapter adap, struct* sge_txq *q,
1474	struct sk_buff *skb)
1475	{
1476	int ret;
1477	struct work_request_hdr wrp = (struct* work_request_hdr *)skb->data;
1478
1479	if (unlikely(!immediate(skb))) {
1480	WARN_ON(`1`);
1481	dev_kfree_skb(skb);
1482	return NET_XMIT_SUCCESS;
1483	}
1484
1485	wrp->wr_hi \|= htonl(F_WR_SOP \| F_WR_EOP);
1486	wrp->wr_lo = htonl(V_WR_TID(q->token));
1487
1488	spin_lock(lock: &q->lock);
1489	again:reclaim_completed_tx_imm(q);
1490
1491	ret = check_desc_avail(adap, q, skb, ndesc: `1`, qid: TXQ_CTRL);
1492	if (unlikely(ret)) {
1493	if (ret == `1`) {
1494	spin_unlock(lock: &q->lock);
1495	return NET_XMIT_CN;
1496	}
1497	goto again;
1498	}
1499
1500	write_imm(d: &q->desc[q->pidx], skb, len: skb->len, gen: q->gen);
1501
1502	q->in_use++;
1503	if (++q->pidx >= q->size) {
1504	q->pidx = `0`;
1505	q->gen ^= `1`;
1506	}
1507	spin_unlock(lock: &q->lock);
1508	wmb();
1509	t3_write_reg(adapter: adap, A_SG_KDOORBELL,
1510	F_SELEGRCNTX \| V_EGRCNTX(q->cntxt_id));
1511	return NET_XMIT_SUCCESS;
1512	}
1513
1514	/**
1515	* restart_ctrlq - restart a suspended control queue
1516	* @w: pointer to the work associated with this handler
1517	*
1518	* Resumes transmission on a suspended Tx control queue.
1519	*/
1520	static void restart_ctrlq(struct work_struct *w)
1521	{
1522	struct sk_buff *skb;
1523	struct sge_qset qs = container_of(w, struct* sge_qset,
1524	txq[TXQ_CTRL].qresume_task);
1525	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1526
1527	spin_lock(lock: &q->lock);
1528	again:reclaim_completed_tx_imm(q);
1529
1530	while (q->in_use < q->size &&
1531	(skb = __skb_dequeue(list: &q->sendq)) != NULL) {
1532
1533	write_imm(d: &q->desc[q->pidx], skb, len: skb->len, gen: q->gen);
1534
1535	if (++q->pidx >= q->size) {
1536	q->pidx = `0`;
1537	q->gen ^= `1`;
1538	}
1539	q->in_use++;
1540	}
1541
1542	if (!skb_queue_empty(list: &q->sendq)) {
1543	set_bit(nr: TXQ_CTRL, addr: &qs->txq_stopped);
1544	smp_mb__after_atomic();
1545
1546	if (should_restart_tx(q) &&
1547	test_and_clear_bit(nr: TXQ_CTRL, addr: &qs->txq_stopped))
1548	goto again;
1549	q->stops++;
1550	}
1551
1552	spin_unlock(lock: &q->lock);
1553	wmb();
1554	t3_write_reg(adapter: qs->adap, A_SG_KDOORBELL,
1555	F_SELEGRCNTX \| V_EGRCNTX(q->cntxt_id));
1556	}
1557
1558	/*
1559	* Send a management message through control queue 0
1560	*/
1561	int t3_mgmt_tx(struct adapter adap, struct* sk_buff *skb)
1562	{
1563	int ret;
1564	local_bh_disable();
1565	ret = ctrl_xmit(adap, q: &adap->sge.qs[`0`].txq[TXQ_CTRL], skb);
1566	local_bh_enable();
1567
1568	return ret;
1569	}
1570
1571	/**
1572	* deferred_unmap_destructor - unmap a packet when it is freed
1573	* @skb: the packet
1574	*
1575	* This is the packet destructor used for Tx packets that need to remain
1576	* mapped until they are freed rather than until their Tx descriptors are
1577	* freed.
1578	*/
1579	static void deferred_unmap_destructor(struct sk_buff *skb)
1580	{
1581	int i;
1582	const dma_addr_t *p;
1583	const struct skb_shared_info *si;
1584	const struct deferred_unmap_info *dui;
1585
1586	dui = (struct deferred_unmap_info *)skb->head;
1587	p = dui->addr;
1588
1589	if (skb_tail_pointer(skb) - skb_transport_header(skb))
1590	dma_unmap_single(&dui->pdev->dev, *p++,
1591	skb_tail_pointer(skb) - skb_transport_header(skb),
1592	DMA_TO_DEVICE);
1593
1594	si = skb_shinfo(skb);
1595	for (i = `0`; i < si->nr_frags; i++)
1596	dma_unmap_page(&dui->pdev->dev, *p++,
1597	skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
1598	}
1599
1600	static void setup_deferred_unmapping(struct sk_buff skb, struct* pci_dev *pdev,
1601	const struct sg_ent sgl, int* sgl_flits)
1602	{
1603	dma_addr_t *p;
1604	struct deferred_unmap_info *dui;
1605
1606	dui = (struct deferred_unmap_info *)skb->head;
1607	dui->pdev = pdev;
1608	for (p = dui->addr; sgl_flits >= `3`; sgl++, sgl_flits -= `3`) {
1609	*p++ = be64_to_cpu(sgl->addr[`0`]);
1610	*p++ = be64_to_cpu(sgl->addr[`1`]);
1611	}
1612	if (sgl_flits)
1613	*p = be64_to_cpu(sgl->addr[`0`]);
1614	}
1615
1616	/**
1617	* write_ofld_wr - write an offload work request
1618	* @adap: the adapter
1619	* @skb: the packet to send
1620	* @q: the Tx queue
1621	* @pidx: index of the first Tx descriptor to write
1622	* @gen: the generation value to use
1623	* @ndesc: number of descriptors the packet will occupy
1624	* @addr: the address
1625	*
1626	* Write an offload work request to send the supplied packet. The packet
1627	* data already carry the work request with most fields populated.
1628	*/
1629	static void write_ofld_wr(struct adapter adap, struct* sk_buff *skb,
1630	struct sge_txq q, unsigned* int pidx,
1631	unsigned int gen, unsigned int ndesc,
1632	const dma_addr_t *addr)
1633	{
1634	unsigned int sgl_flits, flits;
1635	struct work_request_hdr *from;
1636	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / `2` + `1`];
1637	struct tx_desc *d = &q->desc[pidx];
1638
1639	if (immediate(skb)) {
1640	q->sdesc[pidx].skb = NULL;
1641	write_imm(d, skb, len: skb->len, gen);
1642	return;
1643	}
1644
1645	/ Only TX_DATA builds SGLs /
1646
1647	from = (struct work_request_hdr *)skb->data;
1648	memcpy(&d->flit[`1`], &from[`1`],
1649	skb_transport_offset(skb) - sizeof(*from));
1650
1651	flits = skb_transport_offset(skb) / `8`;
1652	sgp = ndesc == `1` ? (struct sg_ent *)&d->flit[flits] : sgl;
1653	sgl_flits = write_sgl(skb, sgp, start: skb_transport_header(skb),
1654	len: skb_tail_pointer(skb) - skb_transport_header(skb),
1655	addr);
1656	if (need_skb_unmap()) {
1657	setup_deferred_unmapping(skb, pdev: adap->pdev, sgl: sgp, sgl_flits);
1658	skb->destructor = deferred_unmap_destructor;
1659	}
1660
1661	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1662	gen, wr_hi: from->wr_hi, wr_lo: from->wr_lo);
1663	}
1664
1665	/**
1666	* calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1667	* @skb: the packet
1668	*
1669	* Returns the number of Tx descriptors needed for the given offload
1670	* packet. These packets are already fully constructed.
1671	*/
1672	static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1673	{
1674	unsigned int flits, cnt;
1675
1676	if (skb->len <= WR_LEN)
1677	return `1`; / packet fits as immediate data /
1678
1679	flits = skb_transport_offset(skb) / `8`; / headers /
1680	cnt = skb_shinfo(skb)->nr_frags;
1681	if (skb_tail_pointer(skb) != skb_transport_header(skb))
1682	cnt++;
1683	return flits_to_desc(n: flits + sgl_len(n: cnt));
1684	}
1685
1686	/**
1687	* ofld_xmit - send a packet through an offload queue
1688	* @adap: the adapter
1689	* @q: the Tx offload queue
1690	* @skb: the packet
1691	*
1692	* Send an offload packet through an SGE offload queue.
1693	*/
1694	static int ofld_xmit(struct adapter adap, struct* sge_txq *q,
1695	struct sk_buff *skb)
1696	{
1697	int ret;
1698	unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1699
1700	spin_lock(lock: &q->lock);
1701	again: reclaim_completed_tx(adapter: adap, q, TX_RECLAIM_CHUNK);
1702
1703	ret = check_desc_avail(adap, q, skb, ndesc, qid: TXQ_OFLD);
1704	if (unlikely(ret)) {
1705	if (ret == `1`) {
1706	skb->priority = ndesc; / save for restart /
1707	spin_unlock(lock: &q->lock);
1708	return NET_XMIT_CN;
1709	}
1710	goto again;
1711	}
1712
1713	if (!immediate(skb) &&
1714	map_skb(pdev: adap->pdev, skb, addr: (dma_addr_t *)skb->head)) {
1715	spin_unlock(lock: &q->lock);
1716	return NET_XMIT_SUCCESS;
1717	}
1718
1719	gen = q->gen;
1720	q->in_use += ndesc;
1721	pidx = q->pidx;
1722	q->pidx += ndesc;
1723	if (q->pidx >= q->size) {
1724	q->pidx -= q->size;
1725	q->gen ^= `1`;
1726	}
1727	spin_unlock(lock: &q->lock);
1728
1729	write_ofld_wr(adap, skb, q, pidx, gen, ndesc, addr: (dma_addr_t *)skb->head);
1730	check_ring_tx_db(adap, q);
1731	return NET_XMIT_SUCCESS;
1732	}
1733
1734	/**
1735	* restart_offloadq - restart a suspended offload queue
1736	* @w: pointer to the work associated with this handler
1737	*
1738	* Resumes transmission on a suspended Tx offload queue.
1739	*/
1740	static void restart_offloadq(struct work_struct *w)
1741	{
1742	struct sk_buff *skb;
1743	struct sge_qset qs = container_of(w, struct* sge_qset,
1744	txq[TXQ_OFLD].qresume_task);
1745	struct sge_txq *q = &qs->txq[TXQ_OFLD];
1746	const struct port_info *pi = netdev_priv(dev: qs->netdev);
1747	struct adapter *adap = pi->adapter;
1748	unsigned int written = `0`;
1749
1750	spin_lock(lock: &q->lock);
1751	again: reclaim_completed_tx(adapter: adap, q, TX_RECLAIM_CHUNK);
1752
1753	while ((skb = skb_peek(list_: &q->sendq)) != NULL) {
1754	unsigned int gen, pidx;
1755	unsigned int ndesc = skb->priority;
1756
1757	if (unlikely(q->size - q->in_use < ndesc)) {
1758	set_bit(nr: TXQ_OFLD, addr: &qs->txq_stopped);
1759	smp_mb__after_atomic();
1760
1761	if (should_restart_tx(q) &&
1762	test_and_clear_bit(nr: TXQ_OFLD, addr: &qs->txq_stopped))
1763	goto again;
1764	q->stops++;
1765	break;
1766	}
1767
1768	if (!immediate(skb) &&
1769	map_skb(pdev: adap->pdev, skb, addr: (dma_addr_t *)skb->head))
1770	break;
1771
1772	gen = q->gen;
1773	q->in_use += ndesc;
1774	pidx = q->pidx;
1775	q->pidx += ndesc;
1776	written += ndesc;
1777	if (q->pidx >= q->size) {
1778	q->pidx -= q->size;
1779	q->gen ^= `1`;
1780	}
1781	__skb_unlink(skb, list: &q->sendq);
1782	spin_unlock(lock: &q->lock);
1783
1784	write_ofld_wr(adap, skb, q, pidx, gen, ndesc,
1785	addr: (dma_addr_t *)skb->head);
1786	spin_lock(lock: &q->lock);
1787	}
1788	spin_unlock(lock: &q->lock);
1789
1790	#if USE_GTS
1791	set_bit(TXQ_RUNNING, &q->flags);
1792	set_bit(TXQ_LAST_PKT_DB, &q->flags);
1793	#endif
1794	wmb();
1795	if (likely(written))
1796	t3_write_reg(adapter: adap, A_SG_KDOORBELL,
1797	F_SELEGRCNTX \| V_EGRCNTX(q->cntxt_id));
1798	}
1799
1800	/**
1801	* queue_set - return the queue set a packet should use
1802	* @skb: the packet
1803	*
1804	* Maps a packet to the SGE queue set it should use. The desired queue
1805	* set is carried in bits 1-3 in the packet's priority.
1806	*/
1807	static inline int queue_set(const struct sk_buff *skb)
1808	{
1809	return skb->priority >> `1`;
1810	}
1811
1812	/**
1813	* is_ctrl_pkt - return whether an offload packet is a control packet
1814	* @skb: the packet
1815	*
1816	* Determines whether an offload packet should use an OFLD or a CTRL
1817	* Tx queue. This is indicated by bit 0 in the packet's priority.
1818	*/
1819	static inline int is_ctrl_pkt(const struct sk_buff *skb)
1820	{
1821	return skb->priority & `1`;
1822	}
1823
1824	/**
1825	* t3_offload_tx - send an offload packet
1826	* @tdev: the offload device to send to
1827	* @skb: the packet
1828	*
1829	* Sends an offload packet. We use the packet priority to select the
1830	* appropriate Tx queue as follows: bit 0 indicates whether the packet
1831	* should be sent as regular or control, bits 1-3 select the queue set.
1832	*/
1833	int t3_offload_tx(struct t3cdev tdev, struct* sk_buff *skb)
1834	{
1835	struct adapter *adap = tdev2adap(tdev);
1836	struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1837
1838	if (unlikely(is_ctrl_pkt(skb)))
1839	return ctrl_xmit(adap, q: &qs->txq[TXQ_CTRL], skb);
1840
1841	return ofld_xmit(adap, q: &qs->txq[TXQ_OFLD], skb);
1842	}
1843
1844	/**
1845	* offload_enqueue - add an offload packet to an SGE offload receive queue
1846	* @q: the SGE response queue
1847	* @skb: the packet
1848	*
1849	* Add a new offload packet to an SGE response queue's offload packet
1850	* queue. If the packet is the first on the queue it schedules the RX
1851	* softirq to process the queue.
1852	*/
1853	static inline void offload_enqueue(struct sge_rspq q, struct* sk_buff *skb)
1854	{
1855	int was_empty = skb_queue_empty(list: &q->rx_queue);
1856
1857	__skb_queue_tail(list: &q->rx_queue, newsk: skb);
1858
1859	if (was_empty) {
1860	struct sge_qset *qs = rspq_to_qset(q);
1861
1862	napi_schedule(n: &qs->napi);
1863	}
1864	}
1865
1866	/**
1867	* deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1868	* @tdev: the offload device that will be receiving the packets
1869	* @q: the SGE response queue that assembled the bundle
1870	* @skbs: the partial bundle
1871	* @n: the number of packets in the bundle
1872	*
1873	* Delivers a (partial) bundle of Rx offload packets to an offload device.
1874	*/
1875	static inline void deliver_partial_bundle(struct t3cdev *tdev,
1876	struct sge_rspq *q,
1877	struct sk_buff skbs[], int* n)
1878	{
1879	if (n) {
1880	q->offload_bundles++;
1881	tdev->recv(tdev, skbs, n);
1882	}
1883	}
1884
1885	/**
1886	* ofld_poll - NAPI handler for offload packets in interrupt mode
1887	* @napi: the network device doing the polling
1888	* @budget: polling budget
1889	*
1890	* The NAPI handler for offload packets when a response queue is serviced
1891	* by the hard interrupt handler, i.e., when it's operating in non-polling
1892	* mode. Creates small packet batches and sends them through the offload
1893	* receive handler. Batches need to be of modest size as we do prefetches
1894	* on the packets in each.
1895	*/
1896	static int ofld_poll(struct napi_struct napi, int* budget)
1897	{
1898	struct sge_qset qs = container_of(napi, struct* sge_qset, napi);
1899	struct sge_rspq *q = &qs->rspq;
1900	struct adapter *adapter = qs->adap;
1901	int work_done = `0`;
1902
1903	while (work_done < budget) {
1904	struct sk_buff skb, tmp, *skbs[RX_BUNDLE_SIZE];
1905	struct sk_buff_head queue;
1906	int ngathered;
1907
1908	spin_lock_irq(lock: &q->lock);
1909	__skb_queue_head_init(list: &queue);
1910	skb_queue_splice_init(list: &q->rx_queue, head: &queue);
1911	if (skb_queue_empty(list: &queue)) {
1912	napi_complete_done(n: napi, work_done);
1913	spin_unlock_irq(lock: &q->lock);
1914	return work_done;
1915	}
1916	spin_unlock_irq(lock: &q->lock);
1917
1918	ngathered = `0`;
1919	skb_queue_walk_safe(&queue, skb, tmp) {
1920	if (work_done >= budget)
1921	break;
1922	work_done++;
1923
1924	__skb_unlink(skb, list: &queue);
1925	prefetch(skb->data);
1926	skbs[ngathered] = skb;
1927	if (++ngathered == RX_BUNDLE_SIZE) {
1928	q->offload_bundles++;
1929	adapter->tdev.recv(&adapter->tdev, skbs,
1930	ngathered);
1931	ngathered = `0`;
1932	}
1933	}
1934	if (!skb_queue_empty(list: &queue)) {
1935	/ splice remaining packets back onto Rx queue /
1936	spin_lock_irq(lock: &q->lock);
1937	skb_queue_splice(list: &queue, head: &q->rx_queue);
1938	spin_unlock_irq(lock: &q->lock);
1939	}
1940	deliver_partial_bundle(tdev: &adapter->tdev, q, skbs, n: ngathered);
1941	}
1942
1943	return work_done;
1944	}
1945
1946	/**
1947	* rx_offload - process a received offload packet
1948	* @tdev: the offload device receiving the packet
1949	* @rq: the response queue that received the packet
1950	* @skb: the packet
1951	* @rx_gather: a gather list of packets if we are building a bundle
1952	* @gather_idx: index of the next available slot in the bundle
1953	*
1954	* Process an ingress offload packet and add it to the offload ingress
1955	* queue. Returns the index of the next available slot in the bundle.
1956	*/
1957	static inline int rx_offload(struct t3cdev tdev, struct* sge_rspq *rq,
1958	struct sk_buff skb, struct* sk_buff *rx_gather[],
1959	unsigned int gather_idx)
1960	{
1961	skb_reset_mac_header(skb);
1962	skb_reset_network_header(skb);
1963	skb_reset_transport_header(skb);
1964
1965	if (rq->polling) {
1966	rx_gather[gather_idx++] = skb;
1967	if (gather_idx == RX_BUNDLE_SIZE) {
1968	tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1969	gather_idx = `0`;
1970	rq->offload_bundles++;
1971	}
1972	} else
1973	offload_enqueue(q: rq, skb);
1974
1975	return gather_idx;
1976	}
1977
1978	/**
1979	* restart_tx - check whether to restart suspended Tx queues
1980	* @qs: the queue set to resume
1981	*
1982	* Restarts suspended Tx queues of an SGE queue set if they have enough
1983	* free resources to resume operation.
1984	*/
1985	static void restart_tx(struct sge_qset *qs)
1986	{
1987	if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1988	should_restart_tx(q: &qs->txq[TXQ_ETH]) &&
1989	test_and_clear_bit(nr: TXQ_ETH, addr: &qs->txq_stopped)) {
1990	qs->txq[TXQ_ETH].restarts++;
1991	if (netif_running(dev: qs->netdev))
1992	netif_tx_wake_queue(dev_queue: qs->tx_q);
1993	}
1994
1995	if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1996	should_restart_tx(q: &qs->txq[TXQ_OFLD]) &&
1997	test_and_clear_bit(nr: TXQ_OFLD, addr: &qs->txq_stopped)) {
1998	qs->txq[TXQ_OFLD].restarts++;
1999
2000	/ The work can be quite lengthy so we use driver's own queue /
2001	queue_work(wq: cxgb3_wq, work: &qs->txq[TXQ_OFLD].qresume_task);
2002	}
2003	if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
2004	should_restart_tx(q: &qs->txq[TXQ_CTRL]) &&
2005	test_and_clear_bit(nr: TXQ_CTRL, addr: &qs->txq_stopped)) {
2006	qs->txq[TXQ_CTRL].restarts++;
2007
2008	/ The work can be quite lengthy so we use driver's own queue /
2009	queue_work(wq: cxgb3_wq, work: &qs->txq[TXQ_CTRL].qresume_task);
2010	}
2011	}
2012
2013	/**
2014	* cxgb3_arp_process - process an ARP request probing a private IP address
2015	* @pi: the port info
2016	* @skb: the skbuff containing the ARP request
2017	*
2018	* Check if the ARP request is probing the private IP address
2019	* dedicated to iSCSI, generate an ARP reply if so.
2020	*/
2021	static void cxgb3_arp_process(struct port_info pi, struct* sk_buff *skb)
2022	{
2023	struct net_device *dev = skb->dev;
2024	struct arphdr *arp;
2025	unsigned char *arp_ptr;
2026	unsigned char *sha;
2027	__be32 sip, tip;
2028
2029	if (!dev)
2030	return;
2031
2032	skb_reset_network_header(skb);
2033	arp = arp_hdr(skb);
2034
2035	if (arp->ar_op != htons(ARPOP_REQUEST))
2036	return;
2037
2038	arp_ptr = (unsigned char *)(arp + `1`);
2039	sha = arp_ptr;
2040	arp_ptr += dev->addr_len;
2041	memcpy(&sip, arp_ptr, sizeof(sip));
2042	arp_ptr += sizeof(sip);
2043	arp_ptr += dev->addr_len;
2044	memcpy(&tip, arp_ptr, sizeof(tip));
2045
2046	if (tip != pi->iscsi_ipv4addr)
2047	return;
2048
2049	arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip: sip, dev, src_ip: tip, dest_hw: sha,
2050	src_hw: pi->iscsic.mac_addr, th: sha);
2051
2052	}
2053
2054	static inline int is_arp(struct sk_buff *skb)
2055	{
2056	return skb->protocol == htons(ETH_P_ARP);
2057	}
2058
2059	static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
2060	struct sk_buff *skb)
2061	{
2062	if (is_arp(skb)) {
2063	cxgb3_arp_process(pi, skb);
2064	return;
2065	}
2066
2067	if (pi->iscsic.recv)
2068	pi->iscsic.recv(pi, skb);
2069
2070	}
2071
2072	/**
2073	* rx_eth - process an ingress ethernet packet
2074	* @adap: the adapter
2075	* @rq: the response queue that received the packet
2076	* @skb: the packet
2077	* @pad: padding
2078	* @lro: large receive offload
2079	*
2080	* Process an ingress ethernet packet and deliver it to the stack.
2081	* The padding is 2 if the packet was delivered in an Rx buffer and 0
2082	* if it was immediate data in a response.
2083	*/
2084	static void rx_eth(struct adapter adap, struct* sge_rspq *rq,
2085	struct sk_buff skb, int* pad, int lro)
2086	{
2087	struct cpl_rx_pkt p = (struct* cpl_rx_pkt *)(skb->data + pad);
2088	struct sge_qset *qs = rspq_to_qset(q: rq);
2089	struct port_info *pi;
2090
2091	skb_pull(skb, len: sizeof(*p) + pad);
2092	skb->protocol = eth_type_trans(skb, dev: adap->port[p->iff]);
2093	pi = netdev_priv(dev: skb->dev);
2094	if ((skb->dev->features & NETIF_F_RXCSUM) && p->csum_valid &&
2095	p->csum == htons(`0xffff`) && !p->fragment) {
2096	qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2097	skb->ip_summed = CHECKSUM_UNNECESSARY;
2098	} else
2099	skb_checksum_none_assert(skb);
2100	skb_record_rx_queue(skb, rx_queue: qs - &adap->sge.qs[pi->first_qset]);
2101
2102	if (p->vlan_valid) {
2103	qs->port_stats[SGE_PSTAT_VLANEX]++;
2104	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan));
2105	}
2106	if (rq->polling) {
2107	if (lro)
2108	napi_gro_receive(napi: &qs->napi, skb);
2109	else {
2110	if (unlikely(pi->iscsic.flags))
2111	cxgb3_process_iscsi_prov_pack(pi, skb);
2112	netif_receive_skb(skb);
2113	}
2114	} else
2115	netif_rx(skb);
2116	}
2117
2118	static inline int is_eth_tcp(u32 rss)
2119	{
2120	return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2121	}
2122
2123	/**
2124	* lro_add_page - add a page chunk to an LRO session
2125	* @adap: the adapter
2126	* @qs: the associated queue set
2127	* @fl: the free list containing the page chunk to add
2128	* @len: packet length
2129	* @complete: Indicates the last fragment of a frame
2130	*
2131	* Add a received packet contained in a page chunk to an existing LRO
2132	* session.
2133	*/
2134	static void lro_add_page(struct adapter adap, struct* sge_qset *qs,
2135	struct sge_fl fl, int* len, int complete)
2136	{
2137	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2138	struct port_info *pi = netdev_priv(dev: qs->netdev);
2139	struct sk_buff *skb = NULL;
2140	struct cpl_rx_pkt *cpl;
2141	skb_frag_t *rx_frag;
2142	int nr_frags;
2143	int offset = `0`;
2144
2145	if (!qs->nomem) {
2146	skb = napi_get_frags(napi: &qs->napi);
2147	qs->nomem = !skb;
2148	}
2149
2150	fl->credits--;
2151
2152	dma_sync_single_for_cpu(dev: &adap->pdev->dev,
2153	dma_unmap_addr(sd, dma_addr),
2154	size: fl->buf_size - SGE_PG_RSVD, dir: DMA_FROM_DEVICE);
2155
2156	(*sd->pg_chunk.p_cnt)--;
2157	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2158	dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
2159	fl->alloc_size, DMA_FROM_DEVICE);
2160
2161	if (!skb) {
2162	put_page(page: sd->pg_chunk.page);
2163	if (complete)
2164	qs->nomem = `0`;
2165	return;
2166	}
2167
2168	rx_frag = skb_shinfo(skb)->frags;
2169	nr_frags = skb_shinfo(skb)->nr_frags;
2170
2171	if (!nr_frags) {
2172	offset = `2` + sizeof(struct cpl_rx_pkt);
2173	cpl = qs->lro_va = sd->pg_chunk.va + `2`;
2174
2175	if ((qs->netdev->features & NETIF_F_RXCSUM) &&
2176	cpl->csum_valid && cpl->csum == htons(`0xffff`)) {
2177	skb->ip_summed = CHECKSUM_UNNECESSARY;
2178	qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2179	} else
2180	skb->ip_summed = CHECKSUM_NONE;
2181	} else
2182	cpl = qs->lro_va;
2183
2184	len -= offset;
2185
2186	rx_frag += nr_frags;
2187	skb_frag_fill_page_desc(frag: rx_frag, page: sd->pg_chunk.page,
2188	off: sd->pg_chunk.offset + offset, size: len);
2189
2190	skb->len += len;
2191	skb->data_len += len;
2192	skb->truesize += len;
2193	skb_shinfo(skb)->nr_frags++;
2194
2195	if (!complete)
2196	return;
2197
2198	skb_record_rx_queue(skb, rx_queue: qs - &adap->sge.qs[pi->first_qset]);
2199
2200	if (cpl->vlan_valid) {
2201	qs->port_stats[SGE_PSTAT_VLANEX]++;
2202	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
2203	}
2204	napi_gro_frags(napi: &qs->napi);
2205	}
2206
2207	/**
2208	* handle_rsp_cntrl_info - handles control information in a response
2209	* @qs: the queue set corresponding to the response
2210	* @flags: the response control flags
2211	*
2212	* Handles the control information of an SGE response, such as GTS
2213	* indications and completion credits for the queue set's Tx queues.
2214	* HW coalesces credits, we don't do any extra SW coalescing.
2215	*/
2216	static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2217	{
2218	unsigned int credits;
2219
2220	#if USE_GTS
2221	if (flags & F_RSPD_TXQ0_GTS)
2222	clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2223	#endif
2224
2225	credits = G_RSPD_TXQ0_CR(flags);
2226	if (credits)
2227	qs->txq[TXQ_ETH].processed += credits;
2228
2229	credits = G_RSPD_TXQ2_CR(flags);
2230	if (credits)
2231	qs->txq[TXQ_CTRL].processed += credits;
2232
2233	# if USE_GTS
2234	if (flags & F_RSPD_TXQ1_GTS)
2235	clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2236	# endif
2237	credits = G_RSPD_TXQ1_CR(flags);
2238	if (credits)
2239	qs->txq[TXQ_OFLD].processed += credits;
2240	}
2241
2242	/**
2243	* check_ring_db - check if we need to ring any doorbells
2244	* @adap: the adapter
2245	* @qs: the queue set whose Tx queues are to be examined
2246	* @sleeping: indicates which Tx queue sent GTS
2247	*
2248	* Checks if some of a queue set's Tx queues need to ring their doorbells
2249	* to resume transmission after idling while they still have unprocessed
2250	* descriptors.
2251	*/
2252	static void check_ring_db(struct adapter adap, struct* sge_qset *qs,
2253	unsigned int sleeping)
2254	{
2255	if (sleeping & F_RSPD_TXQ0_GTS) {
2256	struct sge_txq *txq = &qs->txq[TXQ_ETH];
2257
2258	if (txq->cleaned + txq->in_use != txq->processed &&
2259	!test_and_set_bit(nr: TXQ_LAST_PKT_DB, addr: &txq->flags)) {
2260	set_bit(nr: TXQ_RUNNING, addr: &txq->flags);
2261	t3_write_reg(adapter: adap, A_SG_KDOORBELL, F_SELEGRCNTX \|
2262	V_EGRCNTX(txq->cntxt_id));
2263	}
2264	}
2265
2266	if (sleeping & F_RSPD_TXQ1_GTS) {
2267	struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2268
2269	if (txq->cleaned + txq->in_use != txq->processed &&
2270	!test_and_set_bit(nr: TXQ_LAST_PKT_DB, addr: &txq->flags)) {
2271	set_bit(nr: TXQ_RUNNING, addr: &txq->flags);
2272	t3_write_reg(adapter: adap, A_SG_KDOORBELL, F_SELEGRCNTX \|
2273	V_EGRCNTX(txq->cntxt_id));
2274	}
2275	}
2276	}
2277
2278	/**
2279	* is_new_response - check if a response is newly written
2280	* @r: the response descriptor
2281	* @q: the response queue
2282	*
2283	* Returns true if a response descriptor contains a yet unprocessed
2284	* response.
2285	*/
2286	static inline int is_new_response(const struct rsp_desc *r,
2287	const struct sge_rspq *q)
2288	{
2289	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2290	}
2291
2292	static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2293	{
2294	q->pg_skb = NULL;
2295	q->rx_recycle_buf = `0`;
2296	}
2297
2298	#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS \| F_RSPD_TXQ1_GTS)
2299	#define RSPD_CTRL_MASK (RSPD_GTS_MASK \| \
2300	V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) \| \
2301	V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) \| \
2302	V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2303
2304	/ How long to delay the next interrupt in case of memory shortage, in 0.1us. /
2305	#define NOMEM_INTR_DELAY 2500
2306
2307	/**
2308	* process_responses - process responses from an SGE response queue
2309	* @adap: the adapter
2310	* @qs: the queue set to which the response queue belongs
2311	* @budget: how many responses can be processed in this round
2312	*
2313	* Process responses from an SGE response queue up to the supplied budget.
2314	* Responses include received packets as well as credits and other events
2315	* for the queues that belong to the response queue's queue set.
2316	* A negative budget is effectively unlimited.
2317	*
2318	* Additionally choose the interrupt holdoff time for the next interrupt
2319	* on this queue. If the system is under memory shortage use a fairly
2320	* long delay to help recovery.
2321	*/
2322	static int process_responses(struct adapter adap, struct* sge_qset *qs,
2323	int budget)
2324	{
2325	struct sge_rspq *q = &qs->rspq;
2326	struct rsp_desc *r = &q->desc[q->cidx];
2327	int budget_left = budget;
2328	unsigned int sleeping = `0`;
2329	struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2330	int ngathered = `0`;
2331
2332	q->next_holdoff = q->holdoff_tmr;
2333
2334	while (likely(budget_left && is_new_response(r, q))) {
2335	int packet_complete, eth, ethpad = `2`;
2336	int lro = !!(qs->netdev->features & NETIF_F_GRO);
2337	struct sk_buff *skb = NULL;
2338	u32 len, flags;
2339	__be32 rss_hi, rss_lo;
2340
2341	dma_rmb();
2342	eth = r->rss_hdr.opcode == CPL_RX_PKT;
2343	rss_hi = (const* __be32 *)r;
2344	rss_lo = r->rss_hdr.rss_hash_val;
2345	flags = ntohl(r->flags);
2346
2347	if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2348	skb = alloc_skb(size: AN_PKT_SIZE, GFP_ATOMIC);
2349	if (!skb)
2350	goto no_mem;
2351
2352	__skb_put_data(skb, data: r, len: AN_PKT_SIZE);
2353	skb->data[`0`] = CPL_ASYNC_NOTIF;
2354	rss_hi = htonl(CPL_ASYNC_NOTIF << `24`);
2355	q->async_notif++;
2356	} else if (flags & F_RSPD_IMM_DATA_VALID) {
2357	skb = get_imm_packet(resp: r);
2358	if (unlikely(!skb)) {
2359	no_mem:
2360	q->next_holdoff = NOMEM_INTR_DELAY;
2361	q->nomem++;
2362	/ consume one credit since we tried /
2363	budget_left--;
2364	break;
2365	}
2366	q->imm_data++;
2367	ethpad = `0`;
2368	} else if ((len = ntohl(r->len_cq)) != `0`) {
2369	struct sge_fl *fl;
2370
2371	lro &= eth && is_eth_tcp(rss: rss_hi);
2372
2373	fl = (len & F_RSPD_FLQ) ? &qs->fl[`1`] : &qs->fl[`0`];
2374	if (fl->use_pages) {
2375	void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2376
2377	net_prefetch(p: addr);
2378	__refill_fl(adap, fl);
2379	if (lro > `0`) {
2380	lro_add_page(adap, qs, fl,
2381	G_RSPD_LEN(len),
2382	complete: flags & F_RSPD_EOP);
2383	goto next_fl;
2384	}
2385
2386	skb = get_packet_pg(adap, fl, q,
2387	G_RSPD_LEN(len),
2388	drop_thres: eth ?
2389	SGE_RX_DROP_THRES : `0`);
2390	q->pg_skb = skb;
2391	} else
2392	skb = get_packet(adap, fl, G_RSPD_LEN(len),
2393	drop_thres: eth ? SGE_RX_DROP_THRES : `0`);
2394	if (unlikely(!skb)) {
2395	if (!eth)
2396	goto no_mem;
2397	q->rx_drops++;
2398	} else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2399	__skb_pull(skb, len: `2`);
2400	next_fl:
2401	if (++fl->cidx == fl->size)
2402	fl->cidx = `0`;
2403	} else
2404	q->pure_rsps++;
2405
2406	if (flags & RSPD_CTRL_MASK) {
2407	sleeping \|= flags & RSPD_GTS_MASK;
2408	handle_rsp_cntrl_info(qs, flags);
2409	}
2410
2411	r++;
2412	if (unlikely(++q->cidx == q->size)) {
2413	q->cidx = `0`;
2414	q->gen ^= `1`;
2415	r = q->desc;
2416	}
2417	prefetch(r);
2418
2419	if (++q->credits >= (q->size / `4`)) {
2420	refill_rspq(adapter: adap, q, credits: q->credits);
2421	q->credits = `0`;
2422	}
2423
2424	packet_complete = flags &
2425	(F_RSPD_EOP \| F_RSPD_IMM_DATA_VALID \|
2426	F_RSPD_ASYNC_NOTIF);
2427
2428	if (skb != NULL && packet_complete) {
2429	if (eth)
2430	rx_eth(adap, rq: q, skb, pad: ethpad, lro);
2431	else {
2432	q->offload_pkts++;
2433	/ Preserve the RSS info in csum & priority /
2434	skb->csum = rss_hi;
2435	skb->priority = rss_lo;
2436	ngathered = rx_offload(tdev: &adap->tdev, rq: q, skb,
2437	rx_gather: offload_skbs,
2438	gather_idx: ngathered);
2439	}
2440
2441	if (flags & F_RSPD_EOP)
2442	clear_rspq_bufstate(q);
2443	}
2444	--budget_left;
2445	}
2446
2447	deliver_partial_bundle(tdev: &adap->tdev, q, skbs: offload_skbs, n: ngathered);
2448
2449	if (sleeping)
2450	check_ring_db(adap, qs, sleeping);
2451
2452	smp_mb(); / commit Tx queue .processed updates /
2453	if (unlikely(qs->txq_stopped != `0`))
2454	restart_tx(qs);
2455
2456	budget -= budget_left;
2457	return budget;
2458	}
2459
2460	static inline int is_pure_response(const struct rsp_desc *r)
2461	{
2462	__be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF \| F_RSPD_IMM_DATA_VALID);
2463
2464	return (n \| r->len_cq) == `0`;
2465	}
2466
2467	/**
2468	* napi_rx_handler - the NAPI handler for Rx processing
2469	* @napi: the napi instance
2470	* @budget: how many packets we can process in this round
2471	*
2472	* Handler for new data events when using NAPI.
2473	*/
2474	static int napi_rx_handler(struct napi_struct napi, int* budget)
2475	{
2476	struct sge_qset qs = container_of(napi, struct* sge_qset, napi);
2477	struct adapter *adap = qs->adap;
2478	int work_done = process_responses(adap, qs, budget);
2479
2480	if (likely(work_done < budget)) {
2481	napi_complete_done(n: napi, work_done);
2482
2483	/*
2484	* Because we don't atomically flush the following
2485	* write it is possible that in very rare cases it can
2486	* reach the device in a way that races with a new
2487	* response being written plus an error interrupt
2488	* causing the NAPI interrupt handler below to return
2489	* unhandled status to the OS. To protect against
2490	* this would require flushing the write and doing
2491	* both the write and the flush with interrupts off.
2492	* Way too expensive and unjustifiable given the
2493	* rarity of the race.
2494	*
2495	* The race cannot happen at all with MSI-X.
2496	*/
2497	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) \|
2498	V_NEWTIMER(qs->rspq.next_holdoff) \|
2499	V_NEWINDEX(qs->rspq.cidx));
2500	}
2501	return work_done;
2502	}
2503
2504	/**
2505	* process_pure_responses - process pure responses from a response queue
2506	* @adap: the adapter
2507	* @qs: the queue set owning the response queue
2508	* @r: the first pure response to process
2509	*
2510	* A simpler version of process_responses() that handles only pure (i.e.,
2511	* non data-carrying) responses. Such respones are too light-weight to
2512	* justify calling a softirq under NAPI, so we handle them specially in
2513	* the interrupt handler. The function is called with a pointer to a
2514	* response, which the caller must ensure is a valid pure response.
2515	*
2516	* Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2517	*/
2518	static int process_pure_responses(struct adapter adap, struct* sge_qset *qs,
2519	struct rsp_desc *r)
2520	{
2521	struct sge_rspq *q = &qs->rspq;
2522	unsigned int sleeping = `0`;
2523
2524	do {
2525	u32 flags = ntohl(r->flags);
2526
2527	r++;
2528	if (unlikely(++q->cidx == q->size)) {
2529	q->cidx = `0`;
2530	q->gen ^= `1`;
2531	r = q->desc;
2532	}
2533	prefetch(r);
2534
2535	if (flags & RSPD_CTRL_MASK) {
2536	sleeping \|= flags & RSPD_GTS_MASK;
2537	handle_rsp_cntrl_info(qs, flags);
2538	}
2539
2540	q->pure_rsps++;
2541	if (++q->credits >= (q->size / `4`)) {
2542	refill_rspq(adapter: adap, q, credits: q->credits);
2543	q->credits = `0`;
2544	}
2545	if (!is_new_response(r, q))
2546	break;
2547	dma_rmb();
2548	} while (is_pure_response(r));
2549
2550	if (sleeping)
2551	check_ring_db(adap, qs, sleeping);
2552
2553	smp_mb(); / commit Tx queue .processed updates /
2554	if (unlikely(qs->txq_stopped != `0`))
2555	restart_tx(qs);
2556
2557	return is_new_response(r, q);
2558	}
2559
2560	/**
2561	* handle_responses - decide what to do with new responses in NAPI mode
2562	* @adap: the adapter
2563	* @q: the response queue
2564	*
2565	* This is used by the NAPI interrupt handlers to decide what to do with
2566	* new SGE responses. If there are no new responses it returns -1. If
2567	* there are new responses and they are pure (i.e., non-data carrying)
2568	* it handles them straight in hard interrupt context as they are very
2569	* cheap and don't deliver any packets. Finally, if there are any data
2570	* signaling responses it schedules the NAPI handler. Returns 1 if it
2571	* schedules NAPI, 0 if all new responses were pure.
2572	*
2573	* The caller must ascertain NAPI is not already running.
2574	*/
2575	static inline int handle_responses(struct adapter adap, struct* sge_rspq *q)
2576	{
2577	struct sge_qset *qs = rspq_to_qset(q);
2578	struct rsp_desc *r = &q->desc[q->cidx];
2579
2580	if (!is_new_response(r, q))
2581	return -`1`;
2582	dma_rmb();
2583	if (is_pure_response(r) && process_pure_responses(adap, qs, r) == `0`) {
2584	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(q->cntxt_id) \|
2585	V_NEWTIMER(q->holdoff_tmr) \| V_NEWINDEX(q->cidx));
2586	return `0`;
2587	}
2588	napi_schedule(n: &qs->napi);
2589	return `1`;
2590	}
2591
2592	/*
2593	* The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2594	* (i.e., response queue serviced in hard interrupt).
2595	*/
2596	static irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2597	{
2598	struct sge_qset *qs = cookie;
2599	struct adapter *adap = qs->adap;
2600	struct sge_rspq *q = &qs->rspq;
2601
2602	spin_lock(lock: &q->lock);
2603	if (process_responses(adap, qs, budget: -`1`) == `0`)
2604	q->unhandled_irqs++;
2605	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(q->cntxt_id) \|
2606	V_NEWTIMER(q->next_holdoff) \| V_NEWINDEX(q->cidx));
2607	spin_unlock(lock: &q->lock);
2608	return IRQ_HANDLED;
2609	}
2610
2611	/*
2612	* The MSI-X interrupt handler for an SGE response queue for the NAPI case
2613	* (i.e., response queue serviced by NAPI polling).
2614	*/
2615	static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2616	{
2617	struct sge_qset *qs = cookie;
2618	struct sge_rspq *q = &qs->rspq;
2619
2620	spin_lock(lock: &q->lock);
2621
2622	if (handle_responses(adap: qs->adap, q) < `0`)
2623	q->unhandled_irqs++;
2624	spin_unlock(lock: &q->lock);
2625	return IRQ_HANDLED;
2626	}
2627
2628	/*
2629	* The non-NAPI MSI interrupt handler. This needs to handle data events from
2630	* SGE response queues as well as error and other async events as they all use
2631	* the same MSI vector. We use one SGE response queue per port in this mode
2632	* and protect all response queues with queue 0's lock.
2633	*/
2634	static irqreturn_t t3_intr_msi(int irq, void *cookie)
2635	{
2636	int new_packets = `0`;
2637	struct adapter *adap = cookie;
2638	struct sge_rspq *q = &adap->sge.qs[`0`].rspq;
2639
2640	spin_lock(lock: &q->lock);
2641
2642	if (process_responses(adap, qs: &adap->sge.qs[`0`], budget: -`1`)) {
2643	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(q->cntxt_id) \|
2644	V_NEWTIMER(q->next_holdoff) \| V_NEWINDEX(q->cidx));
2645	new_packets = `1`;
2646	}
2647
2648	if (adap->params.nports == `2` &&
2649	process_responses(adap, qs: &adap->sge.qs[`1`], budget: -`1`)) {
2650	struct sge_rspq *q1 = &adap->sge.qs[`1`].rspq;
2651
2652	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) \|
2653	V_NEWTIMER(q1->next_holdoff) \|
2654	V_NEWINDEX(q1->cidx));
2655	new_packets = `1`;
2656	}
2657
2658	if (!new_packets && t3_slow_intr_handler(adapter: adap) == `0`)
2659	q->unhandled_irqs++;
2660
2661	spin_unlock(lock: &q->lock);
2662	return IRQ_HANDLED;
2663	}
2664
2665	static int rspq_check_napi(struct sge_qset *qs)
2666	{
2667	struct sge_rspq *q = &qs->rspq;
2668
2669	return is_new_response(r: &q->desc[q->cidx], q) && napi_schedule(n: &qs->napi);
2670	}
2671
2672	/*
2673	* The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2674	* by NAPI polling). Handles data events from SGE response queues as well as
2675	* error and other async events as they all use the same MSI vector. We use
2676	* one SGE response queue per port in this mode and protect all response
2677	* queues with queue 0's lock.
2678	*/
2679	static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2680	{
2681	int new_packets;
2682	struct adapter *adap = cookie;
2683	struct sge_rspq *q = &adap->sge.qs[`0`].rspq;
2684
2685	spin_lock(lock: &q->lock);
2686
2687	new_packets = rspq_check_napi(qs: &adap->sge.qs[`0`]);
2688	if (adap->params.nports == `2`)
2689	new_packets += rspq_check_napi(qs: &adap->sge.qs[`1`]);
2690	if (!new_packets && t3_slow_intr_handler(adapter: adap) == `0`)
2691	q->unhandled_irqs++;
2692
2693	spin_unlock(lock: &q->lock);
2694	return IRQ_HANDLED;
2695	}
2696
2697	/*
2698	* A helper function that processes responses and issues GTS.
2699	*/
2700	static inline int process_responses_gts(struct adapter *adap,
2701	struct sge_rspq *rq)
2702	{
2703	int work;
2704
2705	work = process_responses(adap, qs: rspq_to_qset(q: rq), budget: -`1`);
2706	t3_write_reg(adapter: adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) \|
2707	V_NEWTIMER(rq->next_holdoff) \| V_NEWINDEX(rq->cidx));
2708	return work;
2709	}
2710
2711	/*
2712	* The legacy INTx interrupt handler. This needs to handle data events from
2713	* SGE response queues as well as error and other async events as they all use
2714	* the same interrupt pin. We use one SGE response queue per port in this mode
2715	* and protect all response queues with queue 0's lock.
2716	*/
2717	static irqreturn_t t3_intr(int irq, void *cookie)
2718	{
2719	int work_done, w0, w1;
2720	struct adapter *adap = cookie;
2721	struct sge_rspq *q0 = &adap->sge.qs[`0`].rspq;
2722	struct sge_rspq *q1 = &adap->sge.qs[`1`].rspq;
2723
2724	spin_lock(lock: &q0->lock);
2725
2726	w0 = is_new_response(r: &q0->desc[q0->cidx], q: q0);
2727	w1 = adap->params.nports == `2` &&
2728	is_new_response(r: &q1->desc[q1->cidx], q: q1);
2729
2730	if (likely(w0 \| w1)) {
2731	t3_write_reg(adapter: adap, A_PL_CLI, val: `0`);
2732	t3_read_reg(adapter: adap, A_PL_CLI); / flush /
2733
2734	if (likely(w0))
2735	process_responses_gts(adap, rq: q0);
2736
2737	if (w1)
2738	process_responses_gts(adap, rq: q1);
2739
2740	work_done = w0 \| w1;
2741	} else
2742	work_done = t3_slow_intr_handler(adapter: adap);
2743
2744	spin_unlock(lock: &q0->lock);
2745	return IRQ_RETVAL(work_done != `0`);
2746	}
2747
2748	/*
2749	* Interrupt handler for legacy INTx interrupts for T3B-based cards.
2750	* Handles data events from SGE response queues as well as error and other
2751	* async events as they all use the same interrupt pin. We use one SGE
2752	* response queue per port in this mode and protect all response queues with
2753	* queue 0's lock.
2754	*/
2755	static irqreturn_t t3b_intr(int irq, void *cookie)
2756	{
2757	u32 map;
2758	struct adapter *adap = cookie;
2759	struct sge_rspq *q0 = &adap->sge.qs[`0`].rspq;
2760
2761	t3_write_reg(adapter: adap, A_PL_CLI, val: `0`);
2762	map = t3_read_reg(adapter: adap, A_SG_DATA_INTR);
2763
2764	if (unlikely(!map)) / shared interrupt, most likely /
2765	return IRQ_NONE;
2766
2767	spin_lock(lock: &q0->lock);
2768
2769	if (unlikely(map & F_ERRINTR))
2770	t3_slow_intr_handler(adapter: adap);
2771
2772	if (likely(map & `1`))
2773	process_responses_gts(adap, rq: q0);
2774
2775	if (map & `2`)
2776	process_responses_gts(adap, rq: &adap->sge.qs[`1`].rspq);
2777
2778	spin_unlock(lock: &q0->lock);
2779	return IRQ_HANDLED;
2780	}
2781
2782	/*
2783	* NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2784	* Handles data events from SGE response queues as well as error and other
2785	* async events as they all use the same interrupt pin. We use one SGE
2786	* response queue per port in this mode and protect all response queues with
2787	* queue 0's lock.
2788	*/
2789	static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2790	{
2791	u32 map;
2792	struct adapter *adap = cookie;
2793	struct sge_qset *qs0 = &adap->sge.qs[`0`];
2794	struct sge_rspq *q0 = &qs0->rspq;
2795
2796	t3_write_reg(adapter: adap, A_PL_CLI, val: `0`);
2797	map = t3_read_reg(adapter: adap, A_SG_DATA_INTR);
2798
2799	if (unlikely(!map)) / shared interrupt, most likely /
2800	return IRQ_NONE;
2801
2802	spin_lock(lock: &q0->lock);
2803
2804	if (unlikely(map & F_ERRINTR))
2805	t3_slow_intr_handler(adapter: adap);
2806
2807	if (likely(map & `1`))
2808	napi_schedule(n: &qs0->napi);
2809
2810	if (map & `2`)
2811	napi_schedule(n: &adap->sge.qs[`1`].napi);
2812
2813	spin_unlock(lock: &q0->lock);
2814	return IRQ_HANDLED;
2815	}
2816
2817	/**
2818	* t3_intr_handler - select the top-level interrupt handler
2819	* @adap: the adapter
2820	* @polling: whether using NAPI to service response queues
2821	*
2822	* Selects the top-level interrupt handler based on the type of interrupts
2823	* (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2824	* response queues.
2825	*/
2826	irq_handler_t t3_intr_handler(struct adapter adap, int* polling)
2827	{
2828	if (adap->flags & USING_MSIX)
2829	return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2830	if (adap->flags & USING_MSI)
2831	return polling ? t3_intr_msi_napi : t3_intr_msi;
2832	if (adap->params.rev > `0`)
2833	return polling ? t3b_intr_napi : t3b_intr;
2834	return t3_intr;
2835	}
2836
2837	#define SGE_PARERR (F_CPPARITYERROR \| F_OCPARITYERROR \| F_RCPARITYERROR \| \
2838	F_IRPARITYERROR \| V_ITPARITYERROR(M_ITPARITYERROR) \| \
2839	V_FLPARITYERROR(M_FLPARITYERROR) \| F_LODRBPARITYERROR \| \
2840	F_HIDRBPARITYERROR \| F_LORCQPARITYERROR \| \
2841	F_HIRCQPARITYERROR)
2842	#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR \| F_R_REQ_FRAMINGERROR)
2843	#define SGE_FATALERR (SGE_PARERR \| SGE_FRAMINGERR \| F_RSPQCREDITOVERFOW \| \
2844	F_RSPQDISABLED)
2845
2846	/**
2847	* t3_sge_err_intr_handler - SGE async event interrupt handler
2848	* @adapter: the adapter
2849	*
2850	* Interrupt handler for SGE asynchronous (non-data) events.
2851	*/
2852	void t3_sge_err_intr_handler(struct adapter *adapter)
2853	{
2854	unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2855	~F_FLEMPTY;
2856
2857	if (status & SGE_PARERR)
2858	CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2859	status & SGE_PARERR);
2860	if (status & SGE_FRAMINGERR)
2861	CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2862	status & SGE_FRAMINGERR);
2863
2864	if (status & F_RSPQCREDITOVERFOW)
2865	CH_ALERT(adapter, "SGE response queue credit overflow\n");
2866
2867	if (status & F_RSPQDISABLED) {
2868	v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2869
2870	CH_ALERT(adapter,
2871	"packet delivered to disabled response queue "
2872	"(0x%x)\n", (v >> S_RSPQ0DISABLED) & `0xff`);
2873	}
2874
2875	if (status & (F_HIPIODRBDROPERR \| F_LOPIODRBDROPERR))
2876	queue_work(wq: cxgb3_wq, work: &adapter->db_drop_task);
2877
2878	if (status & (F_HIPRIORITYDBFULL \| F_LOPRIORITYDBFULL))
2879	queue_work(wq: cxgb3_wq, work: &adapter->db_full_task);
2880
2881	if (status & (F_HIPRIORITYDBEMPTY \| F_LOPRIORITYDBEMPTY))
2882	queue_work(wq: cxgb3_wq, work: &adapter->db_empty_task);
2883
2884	t3_write_reg(adapter, A_SG_INT_CAUSE, val: status);
2885	if (status & SGE_FATALERR)
2886	t3_fatal_err(adapter);
2887	}
2888
2889	/**
2890	* sge_timer_tx - perform periodic maintenance of an SGE qset
2891	* @t: a timer list containing the SGE queue set to maintain
2892	*
2893	* Runs periodically from a timer to perform maintenance of an SGE queue
2894	* set. It performs two tasks:
2895	*
2896	* Cleans up any completed Tx descriptors that may still be pending.
2897	* Normal descriptor cleanup happens when new packets are added to a Tx
2898	* queue so this timer is relatively infrequent and does any cleanup only
2899	* if the Tx queue has not seen any new packets in a while. We make a
2900	* best effort attempt to reclaim descriptors, in that we don't wait
2901	* around if we cannot get a queue's lock (which most likely is because
2902	* someone else is queueing new packets and so will also handle the clean
2903	* up). Since control queues use immediate data exclusively we don't
2904	* bother cleaning them up here.
2905	*
2906	*/
2907	static void sge_timer_tx(struct timer_list *t)
2908	{
2909	struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer);
2910	struct port_info *pi = netdev_priv(dev: qs->netdev);
2911	struct adapter *adap = pi->adapter;
2912	unsigned int tbd[SGE_TXQ_PER_SET] = {`0`, `0`};
2913	unsigned long next_period;
2914
2915	if (__netif_tx_trylock(txq: qs->tx_q)) {
2916	tbd[TXQ_ETH] = reclaim_completed_tx(adapter: adap, q: &qs->txq[TXQ_ETH],
2917	TX_RECLAIM_TIMER_CHUNK);
2918	__netif_tx_unlock(txq: qs->tx_q);
2919	}
2920
2921	if (spin_trylock(lock: &qs->txq[TXQ_OFLD].lock)) {
2922	tbd[TXQ_OFLD] = reclaim_completed_tx(adapter: adap, q: &qs->txq[TXQ_OFLD],
2923	TX_RECLAIM_TIMER_CHUNK);
2924	spin_unlock(lock: &qs->txq[TXQ_OFLD].lock);
2925	}
2926
2927	next_period = TX_RECLAIM_PERIOD >>
2928	(max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2929	TX_RECLAIM_TIMER_CHUNK);
2930	mod_timer(timer: &qs->tx_reclaim_timer, expires: jiffies + next_period);
2931	}
2932
2933	/**
2934	* sge_timer_rx - perform periodic maintenance of an SGE qset
2935	* @t: the timer list containing the SGE queue set to maintain
2936	*
2937	* a) Replenishes Rx queues that have run out due to memory shortage.
2938	* Normally new Rx buffers are added when existing ones are consumed but
2939	* when out of memory a queue can become empty. We try to add only a few
2940	* buffers here, the queue will be replenished fully as these new buffers
2941	* are used up if memory shortage has subsided.
2942	*
2943	* b) Return coalesced response queue credits in case a response queue is
2944	* starved.
2945	*
2946	*/
2947	static void sge_timer_rx(struct timer_list *t)
2948	{
2949	spinlock_t *lock;
2950	struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer);
2951	struct port_info *pi = netdev_priv(dev: qs->netdev);
2952	struct adapter *adap = pi->adapter;
2953	u32 status;
2954
2955	lock = adap->params.rev > `0` ?
2956	&qs->rspq.lock : &adap->sge.qs[`0`].rspq.lock;
2957
2958	if (!spin_trylock_irq(lock))
2959	goto out;
2960
2961	if (napi_is_scheduled(n: &qs->napi))
2962	goto unlock;
2963
2964	if (adap->params.rev < `4`) {
2965	status = t3_read_reg(adapter: adap, A_SG_RSPQ_FL_STATUS);
2966
2967	if (status & (`1` << qs->rspq.cntxt_id)) {
2968	qs->rspq.starved++;
2969	if (qs->rspq.credits) {
2970	qs->rspq.credits--;
2971	refill_rspq(adapter: adap, q: &qs->rspq, credits: `1`);
2972	qs->rspq.restarted++;
2973	t3_write_reg(adapter: adap, A_SG_RSPQ_FL_STATUS,
2974	val: `1` << qs->rspq.cntxt_id);
2975	}
2976	}
2977	}
2978
2979	if (qs->fl[`0`].credits < qs->fl[`0`].size)
2980	__refill_fl(adap, fl: &qs->fl[`0`]);
2981	if (qs->fl[`1`].credits < qs->fl[`1`].size)
2982	__refill_fl(adap, fl: &qs->fl[`1`]);
2983
2984	unlock:
2985	spin_unlock_irq(lock);
2986	out:
2987	mod_timer(timer: &qs->rx_reclaim_timer, expires: jiffies + RX_RECLAIM_PERIOD);
2988	}
2989
2990	/**
2991	* t3_update_qset_coalesce - update coalescing settings for a queue set
2992	* @qs: the SGE queue set
2993	* @p: new queue set parameters
2994	*
2995	* Update the coalescing settings for an SGE queue set. Nothing is done
2996	* if the queue set is not initialized yet.
2997	*/
2998	void t3_update_qset_coalesce(struct sge_qset qs, const* struct qset_params *p)
2999	{
3000	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * `10`, `1U`);/ can't be 0 /
3001	qs->rspq.polling = p->polling;
3002	qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
3003	}
3004
3005	/**
3006	* t3_sge_alloc_qset - initialize an SGE queue set
3007	* @adapter: the adapter
3008	* @id: the queue set id
3009	* @nports: how many Ethernet ports will be using this queue set
3010	* @irq_vec_idx: the IRQ vector index for response queue interrupts
3011	* @p: configuration parameters for this queue set
3012	* @ntxq: number of Tx queues for the queue set
3013	* @dev: net device associated with this queue set
3014	* @netdevq: net device TX queue associated with this queue set
3015	*
3016	* Allocate resources and initialize an SGE queue set. A queue set
3017	* comprises a response queue, two Rx free-buffer queues, and up to 3
3018	* Tx queues. The Tx queues are assigned roles in the order Ethernet
3019	* queue, offload queue, and control queue.
3020	*/
3021	int t3_sge_alloc_qset(struct adapter adapter, unsigned* int id, int nports,
3022	int irq_vec_idx, const struct qset_params *p,
3023	int ntxq, struct net_device *dev,
3024	struct netdev_queue *netdevq)
3025	{
3026	int i, avail, ret = -ENOMEM;
3027	struct sge_qset *q = &adapter->sge.qs[id];
3028
3029	init_qset_cntxt(qs: q, id);
3030	timer_setup(&q->tx_reclaim_timer, sge_timer_tx, `0`);
3031	timer_setup(&q->rx_reclaim_timer, sge_timer_rx, `0`);
3032
3033	q->fl[`0`].desc = alloc_ring(pdev: adapter->pdev, nelem: p->fl_size,
3034	elem_size: sizeof(struct rx_desc),
3035	sw_size: sizeof(struct rx_sw_desc),
3036	phys: &q->fl[`0`].phys_addr, metadata: &q->fl[`0`].sdesc);
3037	if (!q->fl[`0`].desc)
3038	goto err;
3039
3040	q->fl[`1`].desc = alloc_ring(pdev: adapter->pdev, nelem: p->jumbo_size,
3041	elem_size: sizeof(struct rx_desc),
3042	sw_size: sizeof(struct rx_sw_desc),
3043	phys: &q->fl[`1`].phys_addr, metadata: &q->fl[`1`].sdesc);
3044	if (!q->fl[`1`].desc)
3045	goto err;
3046
3047	q->rspq.desc = alloc_ring(pdev: adapter->pdev, nelem: p->rspq_size,
3048	elem_size: sizeof(struct rsp_desc), sw_size: `0`,
3049	phys: &q->rspq.phys_addr, NULL);
3050	if (!q->rspq.desc)
3051	goto err;
3052
3053	for (i = `0`; i < ntxq; ++i) {
3054	/*
3055	* The control queue always uses immediate data so does not
3056	* need to keep track of any sk_buffs.
3057	*/
3058	size_t sz = i == TXQ_CTRL ? `0` : sizeof(struct tx_sw_desc);
3059
3060	q->txq[i].desc = alloc_ring(pdev: adapter->pdev, nelem: p->txq_size[i],
3061	elem_size: sizeof(struct tx_desc), sw_size: sz,
3062	phys: &q->txq[i].phys_addr,
3063	metadata: &q->txq[i].sdesc);
3064	if (!q->txq[i].desc)
3065	goto err;
3066
3067	q->txq[i].gen = `1`;
3068	q->txq[i].size = p->txq_size[i];
3069	spin_lock_init(&q->txq[i].lock);
3070	skb_queue_head_init(list: &q->txq[i].sendq);
3071	}
3072
3073	INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);
3074	INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);
3075
3076	q->fl[`0`].gen = q->fl[`1`].gen = `1`;
3077	q->fl[`0`].size = p->fl_size;
3078	q->fl[`1`].size = p->jumbo_size;
3079
3080	q->rspq.gen = `1`;
3081	q->rspq.size = p->rspq_size;
3082	spin_lock_init(&q->rspq.lock);
3083	skb_queue_head_init(list: &q->rspq.rx_queue);
3084
3085	q->txq[TXQ_ETH].stop_thres = nports *
3086	flits_to_desc(n: sgl_len(MAX_SKB_FRAGS + `1`) + `3`);
3087
3088	#if FL0_PG_CHUNK_SIZE > 0
3089	q->fl[`0`].buf_size = FL0_PG_CHUNK_SIZE;
3090	#else
3091	q->fl[`0`].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3092	#endif
3093	#if FL1_PG_CHUNK_SIZE > 0
3094	q->fl[`1`].buf_size = FL1_PG_CHUNK_SIZE;
3095	#else
3096	q->fl[`1`].buf_size = is_offload(adapter) ?
3097	(`16` * `1024`) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3098	MAX_FRAME_SIZE + `2` + sizeof(struct cpl_rx_pkt);
3099	#endif
3100
3101	q->fl[`0`].use_pages = FL0_PG_CHUNK_SIZE > `0`;
3102	q->fl[`1`].use_pages = FL1_PG_CHUNK_SIZE > `0`;
3103	q->fl[`0`].order = FL0_PG_ORDER;
3104	q->fl[`1`].order = FL1_PG_ORDER;
3105	q->fl[`0`].alloc_size = FL0_PG_ALLOC_SIZE;
3106	q->fl[`1`].alloc_size = FL1_PG_ALLOC_SIZE;
3107
3108	spin_lock_irq(lock: &adapter->sge.reg_lock);
3109
3110	/ FL threshold comparison uses < /
3111	ret = t3_sge_init_rspcntxt(adapter, id: q->rspq.cntxt_id, irq_vec_idx,
3112	base_addr: q->rspq.phys_addr, size: q->rspq.size,
3113	fl_thres: q->fl[`0`].buf_size - SGE_PG_RSVD, gen: `1`, cidx: `0`);
3114	if (ret)
3115	goto err_unlock;
3116
3117	for (i = `0`; i < SGE_RXQ_PER_SET; ++i) {
3118	ret = t3_sge_init_flcntxt(adapter, id: q->fl[i].cntxt_id, gts_enable: `0`,
3119	base_addr: q->fl[i].phys_addr, size: q->fl[i].size,
3120	esize: q->fl[i].buf_size - SGE_PG_RSVD,
3121	cong_thres: p->cong_thres, gen: `1`, cidx: `0`);
3122	if (ret)
3123	goto err_unlock;
3124	}
3125
3126	ret = t3_sge_init_ecntxt(adapter, id: q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3127	type: SGE_CNTXT_ETH, respq: id, base_addr: q->txq[TXQ_ETH].phys_addr,
3128	size: q->txq[TXQ_ETH].size, token: q->txq[TXQ_ETH].token,
3129	gen: `1`, cidx: `0`);
3130	if (ret)
3131	goto err_unlock;
3132
3133	if (ntxq > `1`) {
3134	ret = t3_sge_init_ecntxt(adapter, id: q->txq[TXQ_OFLD].cntxt_id,
3135	USE_GTS, type: SGE_CNTXT_OFLD, respq: id,
3136	base_addr: q->txq[TXQ_OFLD].phys_addr,
3137	size: q->txq[TXQ_OFLD].size, token: `0`, gen: `1`, cidx: `0`);
3138	if (ret)
3139	goto err_unlock;
3140	}
3141
3142	if (ntxq > `2`) {
3143	ret = t3_sge_init_ecntxt(adapter, id: q->txq[TXQ_CTRL].cntxt_id, gts_enable: `0`,
3144	type: SGE_CNTXT_CTRL, respq: id,
3145	base_addr: q->txq[TXQ_CTRL].phys_addr,
3146	size: q->txq[TXQ_CTRL].size,
3147	token: q->txq[TXQ_CTRL].token, gen: `1`, cidx: `0`);
3148	if (ret)
3149	goto err_unlock;
3150	}
3151
3152	spin_unlock_irq(lock: &adapter->sge.reg_lock);
3153
3154	q->adap = adapter;
3155	q->netdev = dev;
3156	q->tx_q = netdevq;
3157	t3_update_qset_coalesce(qs: q, p);
3158
3159	avail = refill_fl(adap: adapter, q: &q->fl[`0`], n: q->fl[`0`].size,
3160	GFP_KERNEL \| __GFP_COMP);
3161	if (!avail) {
3162	CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3163	ret = -ENOMEM;
3164	goto err;
3165	}
3166	if (avail < q->fl[`0`].size)
3167	CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3168	avail);
3169
3170	avail = refill_fl(adap: adapter, q: &q->fl[`1`], n: q->fl[`1`].size,
3171	GFP_KERNEL \| __GFP_COMP);
3172	if (avail < q->fl[`1`].size)
3173	CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3174	avail);
3175	refill_rspq(adapter, q: &q->rspq, credits: q->rspq.size - `1`);
3176
3177	t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) \|
3178	V_NEWTIMER(q->rspq.holdoff_tmr));
3179
3180	return `0`;
3181
3182	err_unlock:
3183	spin_unlock_irq(lock: &adapter->sge.reg_lock);
3184	err:
3185	t3_free_qset(adapter, q);
3186	return ret;
3187	}
3188
3189	/**
3190	* t3_start_sge_timers - start SGE timer call backs
3191	* @adap: the adapter
3192	*
3193	* Starts each SGE queue set's timer call back
3194	*/
3195	void t3_start_sge_timers(struct adapter *adap)
3196	{
3197	int i;
3198
3199	for (i = `0`; i < SGE_QSETS; ++i) {
3200	struct sge_qset *q = &adap->sge.qs[i];
3201
3202	if (q->tx_reclaim_timer.function)
3203	mod_timer(timer: &q->tx_reclaim_timer,
3204	expires: jiffies + TX_RECLAIM_PERIOD);
3205
3206	if (q->rx_reclaim_timer.function)
3207	mod_timer(timer: &q->rx_reclaim_timer,
3208	expires: jiffies + RX_RECLAIM_PERIOD);
3209	}
3210	}
3211
3212	/**
3213	* t3_stop_sge_timers - stop SGE timer call backs
3214	* @adap: the adapter
3215	*
3216	* Stops each SGE queue set's timer call back
3217	*/
3218	void t3_stop_sge_timers(struct adapter *adap)
3219	{
3220	int i;
3221
3222	for (i = `0`; i < SGE_QSETS; ++i) {
3223	struct sge_qset *q = &adap->sge.qs[i];
3224
3225	if (q->tx_reclaim_timer.function)
3226	del_timer_sync(timer: &q->tx_reclaim_timer);
3227	if (q->rx_reclaim_timer.function)
3228	del_timer_sync(timer: &q->rx_reclaim_timer);
3229	}
3230	}
3231
3232	/**
3233	* t3_free_sge_resources - free SGE resources
3234	* @adap: the adapter
3235	*
3236	* Frees resources used by the SGE queue sets.
3237	*/
3238	void t3_free_sge_resources(struct adapter *adap)
3239	{
3240	int i;
3241
3242	for (i = `0`; i < SGE_QSETS; ++i)
3243	t3_free_qset(adapter: adap, q: &adap->sge.qs[i]);
3244	}
3245
3246	/**
3247	* t3_sge_start - enable SGE
3248	* @adap: the adapter
3249	*
3250	* Enables the SGE for DMAs. This is the last step in starting packet
3251	* transfers.
3252	*/
3253	void t3_sge_start(struct adapter *adap)
3254	{
3255	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3256	}
3257
3258	/**
3259	* t3_sge_stop_dma - Disable SGE DMA engine operation
3260	* @adap: the adapter
3261	*
3262	* Can be invoked from interrupt context e.g. error handler.
3263	*
3264	* Note that this function cannot disable the restart of works as
3265	* it cannot wait if called from interrupt context, however the
3266	* works will have no effect since the doorbells are disabled. The
3267	* driver will call tg3_sge_stop() later from process context, at
3268	* which time the works will be stopped if they are still running.
3269	*/
3270	void t3_sge_stop_dma(struct adapter *adap)
3271	{
3272	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, val: `0`);
3273	}
3274
3275	/**
3276	* t3_sge_stop - disable SGE operation completly
3277	* @adap: the adapter
3278	*
3279	* Called from process context. Disables the DMA engine and any
3280	* pending queue restart works.
3281	*/
3282	void t3_sge_stop(struct adapter *adap)
3283	{
3284	int i;
3285
3286	t3_sge_stop_dma(adap);
3287
3288	/ workqueues aren't initialized otherwise /
3289	if (!(adap->flags & FULL_INIT_DONE))
3290	return;
3291	for (i = `0`; i < SGE_QSETS; ++i) {
3292	struct sge_qset *qs = &adap->sge.qs[i];
3293
3294	cancel_work_sync(work: &qs->txq[TXQ_OFLD].qresume_task);
3295	cancel_work_sync(work: &qs->txq[TXQ_CTRL].qresume_task);
3296	}
3297	}
3298
3299	/**
3300	* t3_sge_init - initialize SGE
3301	* @adap: the adapter
3302	* @p: the SGE parameters
3303	*
3304	* Performs SGE initialization needed every time after a chip reset.
3305	* We do not initialize any of the queue sets here, instead the driver
3306	* top-level must request those individually. We also do not enable DMA
3307	* here, that should be done after the queues have been set up.
3308	*/
3309	void t3_sge_init(struct adapter adap, struct* sge_params *p)
3310	{
3311	unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, `2`) >> `12`);
3312
3313	ctrl = F_DROPPKT \| V_PKTSHIFT(`2`) \| F_FLMODE \| F_AVOIDCQOVFL \|
3314	F_CQCRDTCTRL \| F_CONGMODE \| F_TNLFLMODE \| F_FATLPERREN \|
3315	V_HOSTPAGESIZE(PAGE_SHIFT - `11`) \| F_BIGENDIANINGRESS \|
3316	V_USERSPACESIZE(ups ? ups - `1` : `0`) \| F_ISCSICOALESCING;
3317	#if SGE_NUM_GENBITS == 1
3318	ctrl \|= F_EGRGENCTRL;
3319	#endif
3320	if (adap->params.rev > `0`) {
3321	if (!(adap->flags & (USING_MSIX \| USING_MSI)))
3322	ctrl \|= F_ONEINTMULTQ \| F_OPTONEINTMULTQ;
3323	}
3324	t3_write_reg(adapter: adap, A_SG_CONTROL, val: ctrl);
3325	t3_write_reg(adapter: adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(`512`) \|
3326	V_LORCQDRBTHRSH(`512`));
3327	t3_write_reg(adapter: adap, A_SG_TIMER_TICK, val: core_ticks_per_usec(adap) / `10`);
3328	t3_write_reg(adapter: adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(`32`) \|
3329	V_TIMEOUT(`200` * core_ticks_per_usec(adap)));
3330	t3_write_reg(adapter: adap, A_SG_HI_DRB_HI_THRSH,
3331	val: adap->params.rev < T3_REV_C ? `1000` : `500`);
3332	t3_write_reg(adapter: adap, A_SG_HI_DRB_LO_THRSH, val: `256`);
3333	t3_write_reg(adapter: adap, A_SG_LO_DRB_HI_THRSH, val: `1000`);
3334	t3_write_reg(adapter: adap, A_SG_LO_DRB_LO_THRSH, val: `256`);
3335	t3_write_reg(adapter: adap, A_SG_OCO_BASE, V_BASE1(`0xfff`));
3336	t3_write_reg(adapter: adap, A_SG_DRB_PRI_THRESH, val: `63` * `1024`);
3337	}
3338
3339	/**
3340	* t3_sge_prep - one-time SGE initialization
3341	* @adap: the associated adapter
3342	* @p: SGE parameters
3343	*
3344	* Performs one-time initialization of SGE SW state. Includes determining
3345	* defaults for the assorted SGE parameters, which admins can change until
3346	* they are used to initialize the SGE.
3347	*/
3348	void t3_sge_prep(struct adapter adap, struct* sge_params *p)
3349	{
3350	int i;
3351
3352	p->max_pkt_size = (`16` * `1024`) - sizeof(struct cpl_rx_data) -
3353	SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3354
3355	for (i = `0`; i < SGE_QSETS; ++i) {
3356	struct qset_params *q = p->qset + i;
3357
3358	q->polling = adap->params.rev > `0`;
3359	q->coalesce_usecs = `5`;
3360	q->rspq_size = `1024`;
3361	q->fl_size = `1024`;
3362	q->jumbo_size = `512`;
3363	q->txq_size[TXQ_ETH] = `1024`;
3364	q->txq_size[TXQ_OFLD] = `1024`;
3365	q->txq_size[TXQ_CTRL] = `256`;
3366	q->cong_thres = `0`;
3367	}
3368
3369	spin_lock_init(&adap->sge.reg_lock);
3370	}
3371

source code of linux/drivers/net/ethernet/chelsio/cxgb3/sge.c