fm10k_main.c source code [linux/drivers/net/ethernet/intel/fm10k/fm10k_main.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright(c) 2013 - 2019 Intel Corporation. /
3
4	#include <linux/types.h>
5	#include <linux/module.h>
6	#include <net/ipv6.h>
7	#include <net/ip.h>
8	#include <net/tcp.h>
9	#include <linux/if_macvlan.h>
10	#include <linux/prefetch.h>
11
12	#include "fm10k.h"
13
14	#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
15	char fm10k_driver_name[] = "fm10k";
16	static const char fm10k_driver_string[] = DRV_SUMMARY;
17	static const char fm10k_copyright[] =
18	"Copyright(c) 2013 - 2019 Intel Corporation.";
19
20	MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
21	MODULE_DESCRIPTION(DRV_SUMMARY);
22	MODULE_LICENSE("GPL v2");
23
24	/ single workqueue for entire fm10k driver /
25	struct workqueue_struct *fm10k_workqueue;
26
27	/**
28	* fm10k_init_module - Driver Registration Routine
29	*
30	* fm10k_init_module is the first routine called when the driver is
31	* loaded. All it does is register with the PCI subsystem.
32	**/
33	static int __init fm10k_init_module(void)
34	{
35	int ret;
36
37	pr_info("%s\n", fm10k_driver_string);
38	pr_info("%s\n", fm10k_copyright);
39
40	/ create driver workqueue /
41	fm10k_workqueue = alloc_workqueue(fmt: "%s", flags: WQ_MEM_RECLAIM, max_active: `0`,
42	fm10k_driver_name);
43	if (!fm10k_workqueue)
44	return -ENOMEM;
45
46	fm10k_dbg_init();
47
48	ret = fm10k_register_pci_driver();
49	if (ret) {
50	fm10k_dbg_exit();
51	destroy_workqueue(wq: fm10k_workqueue);
52	}
53
54	return ret;
55	}
56	module_init(fm10k_init_module);
57
58	/**
59	* fm10k_exit_module - Driver Exit Cleanup Routine
60	*
61	* fm10k_exit_module is called just before the driver is removed
62	* from memory.
63	**/
64	static void __exit fm10k_exit_module(void)
65	{
66	fm10k_unregister_pci_driver();
67
68	fm10k_dbg_exit();
69
70	/ destroy driver workqueue /
71	destroy_workqueue(wq: fm10k_workqueue);
72	}
73	module_exit(fm10k_exit_module);
74
75	static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
76	struct fm10k_rx_buffer *bi)
77	{
78	struct page *page = bi->page;
79	dma_addr_t dma;
80
81	/ Only page will be NULL if buffer was consumed /
82	if (likely(page))
83	return true;
84
85	/ alloc new page for storage /
86	page = dev_alloc_page();
87	if (unlikely(!page)) {
88	rx_ring->rx_stats.alloc_failed++;
89	return false;
90	}
91
92	/ map page for use /
93	dma = dma_map_page(rx_ring->dev, page, `0`, PAGE_SIZE, DMA_FROM_DEVICE);
94
95	/ if mapping failed free memory back to system since*
96	* there isn't much point in holding memory we can't use
97	*/
98	if (dma_mapping_error(dev: rx_ring->dev, dma_addr: dma)) {
99	__free_page(page);
100
101	rx_ring->rx_stats.alloc_failed++;
102	return false;
103	}
104
105	bi->dma = dma;
106	bi->page = page;
107	bi->page_offset = `0`;
108
109	return true;
110	}
111
112	/**
113	* fm10k_alloc_rx_buffers - Replace used receive buffers
114	* @rx_ring: ring to place buffers on
115	* @cleaned_count: number of buffers to replace
116	**/
117	void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
118	{
119	union fm10k_rx_desc *rx_desc;
120	struct fm10k_rx_buffer *bi;
121	u16 i = rx_ring->next_to_use;
122
123	/ nothing to do /
124	if (!cleaned_count)
125	return;
126
127	rx_desc = FM10K_RX_DESC(rx_ring, i);
128	bi = &rx_ring->rx_buffer[i];
129	i -= rx_ring->count;
130
131	do {
132	if (!fm10k_alloc_mapped_page(rx_ring, bi))
133	break;
134
135	/ Refresh the desc even if buffer_addrs didn't change*
136	* because each write-back erases this info.
137	*/
138	rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
139
140	rx_desc++;
141	bi++;
142	i++;
143	if (unlikely(!i)) {
144	rx_desc = FM10K_RX_DESC(rx_ring, `0`);
145	bi = rx_ring->rx_buffer;
146	i -= rx_ring->count;
147	}
148
149	/ clear the status bits for the next_to_use descriptor /
150	rx_desc->d.staterr = `0`;
151
152	cleaned_count--;
153	} while (cleaned_count);
154
155	i += rx_ring->count;
156
157	if (rx_ring->next_to_use != i) {
158	/ record the next descriptor to use /
159	rx_ring->next_to_use = i;
160
161	/ update next to alloc since we have filled the ring /
162	rx_ring->next_to_alloc = i;
163
164	/ Force memory writes to complete before letting h/w*
165	* know there are new descriptors to fetch. (Only
166	* applicable for weak-ordered memory model archs,
167	* such as IA-64).
168	*/
169	wmb();
170
171	/ notify hardware of new descriptors /
172	writel(val: i, addr: rx_ring->tail);
173	}
174	}
175
176	/**
177	* fm10k_reuse_rx_page - page flip buffer and store it back on the ring
178	* @rx_ring: rx descriptor ring to store buffers on
179	* @old_buff: donor buffer to have page reused
180	*
181	* Synchronizes page for reuse by the interface
182	**/
183	static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
184	struct fm10k_rx_buffer *old_buff)
185	{
186	struct fm10k_rx_buffer *new_buff;
187	u16 nta = rx_ring->next_to_alloc;
188
189	new_buff = &rx_ring->rx_buffer[nta];
190
191	/ update, and store next to alloc /
192	nta++;
193	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : `0`;
194
195	/ transfer page from old buffer to new buffer /
196	new_buff = old_buff;
197
198	/ sync the buffer for use by the device /
199	dma_sync_single_range_for_device(dev: rx_ring->dev, addr: old_buff->dma,
200	offset: old_buff->page_offset,
201	FM10K_RX_BUFSZ,
202	dir: DMA_FROM_DEVICE);
203	}
204
205	static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
206	struct page *page,
207	unsigned int __maybe_unused truesize)
208	{
209	/ avoid re-using remote and pfmemalloc pages /
210	if (!dev_page_is_reusable(page))
211	return false;
212
213	#if (PAGE_SIZE < 8192)
214	/ if we are only owner of page we can reuse it /
215	if (unlikely(page_count(page) != `1`))
216	return false;
217
218	/ flip page offset to other buffer /
219	rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
220	#else
221	/ move offset up to the next cache line /
222	rx_buffer->page_offset += truesize;
223
224	if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
225	return false;
226	#endif
227
228	/ Even if we own the page, we are not allowed to use atomic_set()*
229	* This would break get_page_unless_zero() users.
230	*/
231	page_ref_inc(page);
232
233	return true;
234	}
235
236	/**
237	* fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
238	* @rx_buffer: buffer containing page to add
239	* @size: packet size from rx_desc
240	* @rx_desc: descriptor containing length of buffer written by hardware
241	* @skb: sk_buff to place the data into
242	*
243	* This function will add the data contained in rx_buffer->page to the skb.
244	* This is done either through a direct copy if the data in the buffer is
245	* less than the skb header size, otherwise it will just attach the page as
246	* a frag to the skb.
247	*
248	* The function will then update the page offset if necessary and return
249	* true if the buffer can be reused by the interface.
250	**/
251	static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
252	unsigned int size,
253	union fm10k_rx_desc *rx_desc,
254	struct sk_buff *skb)
255	{
256	struct page *page = rx_buffer->page;
257	unsigned char *va = page_address(page) + rx_buffer->page_offset;
258	#if (PAGE_SIZE < 8192)
259	unsigned int truesize = FM10K_RX_BUFSZ;
260	#else
261	unsigned int truesize = ALIGN(size, `512`);
262	#endif
263	unsigned int pull_len;
264
265	if (unlikely(skb_is_nonlinear(skb)))
266	goto add_tail_frag;
267
268	if (likely(size <= FM10K_RX_HDR_LEN)) {
269	memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
270
271	/ page is reusable, we can reuse buffer as-is /
272	if (dev_page_is_reusable(page))
273	return true;
274
275	/ this page cannot be reused so discard it /
276	__free_page(page);
277	return false;
278	}
279
280	/ we need the header to contain the greater of either ETH_HLEN or*
281	* 60 bytes if the skb->len is less than 60 for skb_pad.
282	*/
283	pull_len = eth_get_headlen(dev: skb->dev, data: va, FM10K_RX_HDR_LEN);
284
285	/ align pull length to size of long to optimize memcpy performance /
286	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
287
288	/ update all of the pointers /
289	va += pull_len;
290	size -= pull_len;
291
292	add_tail_frag:
293	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
294	off: (unsigned long)va & ~PAGE_MASK, size, truesize);
295
296	return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
297	}
298
299	static struct sk_buff fm10k_fetch_rx_buffer(struct* fm10k_ring *rx_ring,
300	union fm10k_rx_desc *rx_desc,
301	struct sk_buff *skb)
302	{
303	unsigned int size = le16_to_cpu(rx_desc->w.length);
304	struct fm10k_rx_buffer *rx_buffer;
305	struct page *page;
306
307	rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
308	page = rx_buffer->page;
309	prefetchw(x: page);
310
311	if (likely(!skb)) {
312	void *page_addr = page_address(page) +
313	rx_buffer->page_offset;
314
315	/ prefetch first cache line of first page /
316	net_prefetch(p: page_addr);
317
318	/ allocate a skb to store the frags /
319	skb = napi_alloc_skb(napi: &rx_ring->q_vector->napi,
320	FM10K_RX_HDR_LEN);
321	if (unlikely(!skb)) {
322	rx_ring->rx_stats.alloc_failed++;
323	return NULL;
324	}
325
326	/ we will be copying header into skb->data in*
327	* pskb_may_pull so it is in our interest to prefetch
328	* it now to avoid a possible cache miss
329	*/
330	prefetchw(x: skb->data);
331	}
332
333	/ we are reusing so sync this buffer for CPU use /
334	dma_sync_single_range_for_cpu(dev: rx_ring->dev,
335	addr: rx_buffer->dma,
336	offset: rx_buffer->page_offset,
337	size,
338	dir: DMA_FROM_DEVICE);
339
340	/ pull page into skb /
341	if (fm10k_add_rx_frag(rx_buffer, size, rx_desc, skb)) {
342	/ hand second half of page back to the ring /
343	fm10k_reuse_rx_page(rx_ring, old_buff: rx_buffer);
344	} else {
345	/ we are not reusing the buffer so unmap it /
346	dma_unmap_page(rx_ring->dev, rx_buffer->dma,
347	PAGE_SIZE, DMA_FROM_DEVICE);
348	}
349
350	/ clear contents of rx_buffer /
351	rx_buffer->page = NULL;
352
353	return skb;
354	}
355
356	static inline void fm10k_rx_checksum(struct fm10k_ring *ring,
357	union fm10k_rx_desc *rx_desc,
358	struct sk_buff *skb)
359	{
360	skb_checksum_none_assert(skb);
361
362	/ Rx checksum disabled via ethtool /
363	if (!(ring->netdev->features & NETIF_F_RXCSUM))
364	return;
365
366	/ TCP/UDP checksum error bit is set /
367	if (fm10k_test_staterr(rx_desc,
368	FM10K_RXD_STATUS_L4E \|
369	FM10K_RXD_STATUS_L4E2 \|
370	FM10K_RXD_STATUS_IPE \|
371	FM10K_RXD_STATUS_IPE2)) {
372	ring->rx_stats.csum_err++;
373	return;
374	}
375
376	/ It must be a TCP or UDP packet with a valid checksum /
377	if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2))
378	skb->encapsulation = true;
379	else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS))
380	return;
381
382	skb->ip_summed = CHECKSUM_UNNECESSARY;
383
384	ring->rx_stats.csum_good++;
385	}
386
387	#define FM10K_RSS_L4_TYPES_MASK \
388	(BIT(FM10K_RSSTYPE_IPV4_TCP) \| \
389	BIT(FM10K_RSSTYPE_IPV4_UDP) \| \
390	BIT(FM10K_RSSTYPE_IPV6_TCP) \| \
391	BIT(FM10K_RSSTYPE_IPV6_UDP))
392
393	static inline void fm10k_rx_hash(struct fm10k_ring *ring,
394	union fm10k_rx_desc *rx_desc,
395	struct sk_buff *skb)
396	{
397	u16 rss_type;
398
399	if (!(ring->netdev->features & NETIF_F_RXHASH))
400	return;
401
402	rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK;
403	if (!rss_type)
404	return;
405
406	skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss),
407	type: (BIT(rss_type) & FM10K_RSS_L4_TYPES_MASK) ?
408	PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
409	}
410
411	static void fm10k_type_trans(struct fm10k_ring *rx_ring,
412	union fm10k_rx_desc __maybe_unused *rx_desc,
413	struct sk_buff *skb)
414	{
415	struct net_device *dev = rx_ring->netdev;
416	struct fm10k_l2_accel *l2_accel = rcu_dereference_bh(rx_ring->l2_accel);
417
418	/ check to see if DGLORT belongs to a MACVLAN /
419	if (l2_accel) {
420	u16 idx = le16_to_cpu(FM10K_CB(skb)->fi.w.dglort) - `1`;
421
422	idx -= l2_accel->dglort;
423	if (idx < l2_accel->size && l2_accel->macvlan[idx])
424	dev = l2_accel->macvlan[idx];
425	else
426	l2_accel = NULL;
427	}
428
429	/ Record Rx queue, or update macvlan statistics /
430	if (!l2_accel)
431	skb_record_rx_queue(skb, rx_queue: rx_ring->queue_index);
432	else
433	macvlan_count_rx(vlan: netdev_priv(dev), len: skb->len + ETH_HLEN, success: true,
434	multicast: false);
435
436	skb->protocol = eth_type_trans(skb, dev);
437	}
438
439	/**
440	* fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
441	* @rx_ring: rx descriptor ring packet is being transacted on
442	* @rx_desc: pointer to the EOP Rx descriptor
443	* @skb: pointer to current skb being populated
444	*
445	* This function checks the ring, descriptor, and packet information in
446	* order to populate the hash, checksum, VLAN, timestamp, protocol, and
447	* other fields within the skb.
448	**/
449	static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
450	union fm10k_rx_desc *rx_desc,
451	struct sk_buff *skb)
452	{
453	unsigned int len = skb->len;
454
455	fm10k_rx_hash(ring: rx_ring, rx_desc, skb);
456
457	fm10k_rx_checksum(ring: rx_ring, rx_desc, skb);
458
459	FM10K_CB(skb)->tstamp = rx_desc->q.timestamp;
460
461	FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
462
463	FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
464
465	if (rx_desc->w.vlan) {
466	u16 vid = le16_to_cpu(rx_desc->w.vlan);
467
468	if ((vid & VLAN_VID_MASK) != rx_ring->vid)
469	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci: vid);
470	else if (vid & VLAN_PRIO_MASK)
471	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
472	vlan_tci: vid & VLAN_PRIO_MASK);
473	}
474
475	fm10k_type_trans(rx_ring, rx_desc, skb);
476
477	return len;
478	}
479
480	/**
481	* fm10k_is_non_eop - process handling of non-EOP buffers
482	* @rx_ring: Rx ring being processed
483	* @rx_desc: Rx descriptor for current buffer
484	*
485	* This function updates next to clean. If the buffer is an EOP buffer
486	* this function exits returning false, otherwise it will place the
487	* sk_buff in the next buffer to be chained and return true indicating
488	* that this is in fact a non-EOP buffer.
489	**/
490	static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
491	union fm10k_rx_desc *rx_desc)
492	{
493	u32 ntc = rx_ring->next_to_clean + `1`;
494
495	/ fetch, update, and store next to clean /
496	ntc = (ntc < rx_ring->count) ? ntc : `0`;
497	rx_ring->next_to_clean = ntc;
498
499	prefetch(FM10K_RX_DESC(rx_ring, ntc));
500
501	if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
502	return false;
503
504	return true;
505	}
506
507	/**
508	* fm10k_cleanup_headers - Correct corrupted or empty headers
509	* @rx_ring: rx descriptor ring packet is being transacted on
510	* @rx_desc: pointer to the EOP Rx descriptor
511	* @skb: pointer to current skb being fixed
512	*
513	* Address the case where we are pulling data in on pages only
514	* and as such no data is present in the skb header.
515	*
516	* In addition if skb is not at least 60 bytes we need to pad it so that
517	* it is large enough to qualify as a valid Ethernet frame.
518	*
519	* Returns true if an error was encountered and skb was freed.
520	**/
521	static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
522	union fm10k_rx_desc *rx_desc,
523	struct sk_buff *skb)
524	{
525	if (unlikely((fm10k_test_staterr(rx_desc,
526	FM10K_RXD_STATUS_RXE)))) {
527	#define FM10K_TEST_RXD_BIT(rxd, bit) \
528	((rxd)->w.csum_err & cpu_to_le16(bit))
529	if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_ERROR))
530	rx_ring->rx_stats.switch_errors++;
531	if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_NO_DESCRIPTOR))
532	rx_ring->rx_stats.drops++;
533	if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_PP_ERROR))
534	rx_ring->rx_stats.pp_errors++;
535	if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_READY))
536	rx_ring->rx_stats.link_errors++;
537	if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_TOO_BIG))
538	rx_ring->rx_stats.length_errors++;
539	dev_kfree_skb_any(skb);
540	rx_ring->rx_stats.errors++;
541	return true;
542	}
543
544	/ if eth_skb_pad returns an error the skb was freed /
545	if (eth_skb_pad(skb))
546	return true;
547
548	return false;
549	}
550
551	/**
552	* fm10k_receive_skb - helper function to handle rx indications
553	* @q_vector: structure containing interrupt and ring information
554	* @skb: packet to send up
555	**/
556	static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
557	struct sk_buff *skb)
558	{
559	napi_gro_receive(napi: &q_vector->napi, skb);
560	}
561
562	static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
563	struct fm10k_ring *rx_ring,
564	int budget)
565	{
566	struct sk_buff *skb = rx_ring->skb;
567	unsigned int total_bytes = `0`, total_packets = `0`;
568	u16 cleaned_count = fm10k_desc_unused(ring: rx_ring);
569
570	while (likely(total_packets < budget)) {
571	union fm10k_rx_desc *rx_desc;
572
573	/ return some buffers to hardware, one at a time is too slow /
574	if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
575	fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
576	cleaned_count = `0`;
577	}
578
579	rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
580
581	if (!rx_desc->d.staterr)
582	break;
583
584	/ This memory barrier is needed to keep us from reading*
585	* any other fields out of the rx_desc until we know the
586	* descriptor has been written back
587	*/
588	dma_rmb();
589
590	/ retrieve a buffer from the ring /
591	skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
592
593	/ exit if we failed to retrieve a buffer /
594	if (!skb)
595	break;
596
597	cleaned_count++;
598
599	/ fetch next buffer in frame if non-eop /
600	if (fm10k_is_non_eop(rx_ring, rx_desc))
601	continue;
602
603	/ verify the packet layout is correct /
604	if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
605	skb = NULL;
606	continue;
607	}
608
609	/ populate checksum, timestamp, VLAN, and protocol /
610	total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
611
612	fm10k_receive_skb(q_vector, skb);
613
614	/ reset skb pointer /
615	skb = NULL;
616
617	/ update budget accounting /
618	total_packets++;
619	}
620
621	/ place incomplete frames back on ring for completion /
622	rx_ring->skb = skb;
623
624	u64_stats_update_begin(syncp: &rx_ring->syncp);
625	rx_ring->stats.packets += total_packets;
626	rx_ring->stats.bytes += total_bytes;
627	u64_stats_update_end(syncp: &rx_ring->syncp);
628	q_vector->rx.total_packets += total_packets;
629	q_vector->rx.total_bytes += total_bytes;
630
631	return total_packets;
632	}
633
634	#define VXLAN_HLEN (sizeof(struct udphdr) + 8)
635	static struct ethhdr fm10k_port_is_vxlan(struct* sk_buff *skb)
636	{
637	struct fm10k_intfc *interface = netdev_priv(dev: skb->dev);
638
639	if (interface->vxlan_port != udp_hdr(skb)->dest)
640	return NULL;
641
642	/ return offset of udp_hdr plus 8 bytes for VXLAN header /
643	return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN);
644	}
645
646	#define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF)
647	#define NVGRE_TNI htons(0x2000)
648	struct fm10k_nvgre_hdr {
649	__be16 flags;
650	__be16 proto;
651	__be32 tni;
652	};
653
654	static struct ethhdr fm10k_gre_is_nvgre(struct* sk_buff *skb)
655	{
656	struct fm10k_nvgre_hdr *nvgre_hdr;
657	int hlen = ip_hdrlen(skb);
658
659	/ currently only IPv4 is supported due to hlen above /
660	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
661	return NULL;
662
663	/ our transport header should be NVGRE /
664	nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen);
665
666	/ verify all reserved flags are 0 /
667	if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS)
668	return NULL;
669
670	/ report start of ethernet header /
671	if (nvgre_hdr->flags & NVGRE_TNI)
672	return (struct ethhdr *)(nvgre_hdr + `1`);
673
674	return (struct ethhdr *)(&nvgre_hdr->tni);
675	}
676
677	__be16 fm10k_tx_encap_offload(struct sk_buff *skb)
678	{
679	u8 l4_hdr = `0`, inner_l4_hdr = `0`, inner_l4_hlen;
680	struct ethhdr *eth_hdr;
681
682	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER \|\|
683	skb->inner_protocol != htons(ETH_P_TEB))
684	return `0`;
685
686	switch (vlan_get_protocol(skb)) {
687	case htons(ETH_P_IP):
688	l4_hdr = ip_hdr(skb)->protocol;
689	break;
690	case htons(ETH_P_IPV6):
691	l4_hdr = ipv6_hdr(skb)->nexthdr;
692	break;
693	default:
694	return `0`;
695	}
696
697	switch (l4_hdr) {
698	case IPPROTO_UDP:
699	eth_hdr = fm10k_port_is_vxlan(skb);
700	break;
701	case IPPROTO_GRE:
702	eth_hdr = fm10k_gre_is_nvgre(skb);
703	break;
704	default:
705	return `0`;
706	}
707
708	if (!eth_hdr)
709	return `0`;
710
711	switch (eth_hdr->h_proto) {
712	case htons(ETH_P_IP):
713	inner_l4_hdr = inner_ip_hdr(skb)->protocol;
714	break;
715	case htons(ETH_P_IPV6):
716	inner_l4_hdr = inner_ipv6_hdr(skb)->nexthdr;
717	break;
718	default:
719	return `0`;
720	}
721
722	switch (inner_l4_hdr) {
723	case IPPROTO_TCP:
724	inner_l4_hlen = inner_tcp_hdrlen(skb);
725	break;
726	case IPPROTO_UDP:
727	inner_l4_hlen = `8`;
728	break;
729	default:
730	return `0`;
731	}
732
733	/ The hardware allows tunnel offloads only if the combined inner and*
734	* outer header is 184 bytes or less
735	*/
736	if (skb_inner_transport_header(skb) + inner_l4_hlen -
737	skb_mac_header(skb) > FM10K_TUNNEL_HEADER_LENGTH)
738	return `0`;
739
740	return eth_hdr->h_proto;
741	}
742
743	static int fm10k_tso(struct fm10k_ring *tx_ring,
744	struct fm10k_tx_buffer *first)
745	{
746	struct sk_buff *skb = first->skb;
747	struct fm10k_tx_desc *tx_desc;
748	unsigned char *th;
749	u8 hdrlen;
750
751	if (skb->ip_summed != CHECKSUM_PARTIAL)
752	return `0`;
753
754	if (!skb_is_gso(skb))
755	return `0`;
756
757	/ compute header lengths /
758	if (skb->encapsulation) {
759	if (!fm10k_tx_encap_offload(skb))
760	goto err_vxlan;
761	th = skb_inner_transport_header(skb);
762	} else {
763	th = skb_transport_header(skb);
764	}
765
766	/ compute offset from SOF to transport header and add header len /
767	hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << `2`);
768
769	first->tx_flags \|= FM10K_TX_FLAGS_CSUM;
770
771	/ update gso size and bytecount with header size /
772	first->gso_segs = skb_shinfo(skb)->gso_segs;
773	first->bytecount += (first->gso_segs - `1`) * hdrlen;
774
775	/ populate Tx descriptor header size and mss /
776	tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
777	tx_desc->hdrlen = hdrlen;
778	tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
779
780	return `1`;
781
782	err_vxlan:
783	tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
784	if (net_ratelimit())
785	netdev_err(dev: tx_ring->netdev,
786	format: "TSO requested for unsupported tunnel, disabling offload\n");
787	return -`1`;
788	}
789
790	static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
791	struct fm10k_tx_buffer *first)
792	{
793	struct sk_buff *skb = first->skb;
794	struct fm10k_tx_desc *tx_desc;
795	union {
796	struct iphdr *ipv4;
797	struct ipv6hdr *ipv6;
798	u8 *raw;
799	} network_hdr;
800	u8 *transport_hdr;
801	__be16 frag_off;
802	__be16 protocol;
803	u8 l4_hdr = `0`;
804
805	if (skb->ip_summed != CHECKSUM_PARTIAL)
806	goto no_csum;
807
808	if (skb->encapsulation) {
809	protocol = fm10k_tx_encap_offload(skb);
810	if (!protocol) {
811	if (skb_checksum_help(skb)) {
812	dev_warn(tx_ring->dev,
813	"failed to offload encap csum!\n");
814	tx_ring->tx_stats.csum_err++;
815	}
816	goto no_csum;
817	}
818	network_hdr.raw = skb_inner_network_header(skb);
819	transport_hdr = skb_inner_transport_header(skb);
820	} else {
821	protocol = vlan_get_protocol(skb);
822	network_hdr.raw = skb_network_header(skb);
823	transport_hdr = skb_transport_header(skb);
824	}
825
826	switch (protocol) {
827	case htons(ETH_P_IP):
828	l4_hdr = network_hdr.ipv4->protocol;
829	break;
830	case htons(ETH_P_IPV6):
831	l4_hdr = network_hdr.ipv6->nexthdr;
832	if (likely((transport_hdr - network_hdr.raw) ==
833	sizeof(struct ipv6hdr)))
834	break;
835	ipv6_skip_exthdr(skb, start: network_hdr.raw - skb->data +
836	sizeof(struct ipv6hdr),
837	nexthdrp: &l4_hdr, frag_offp: &frag_off);
838	if (unlikely(frag_off))
839	l4_hdr = NEXTHDR_FRAGMENT;
840	break;
841	default:
842	break;
843	}
844
845	switch (l4_hdr) {
846	case IPPROTO_TCP:
847	case IPPROTO_UDP:
848	break;
849	case IPPROTO_GRE:
850	if (skb->encapsulation)
851	break;
852	fallthrough;
853	default:
854	if (unlikely(net_ratelimit())) {
855	dev_warn(tx_ring->dev,
856	"partial checksum, version=%d l4 proto=%x\n",
857	protocol, l4_hdr);
858	}
859	skb_checksum_help(skb);
860	tx_ring->tx_stats.csum_err++;
861	goto no_csum;
862	}
863
864	/ update TX checksum flag /
865	first->tx_flags \|= FM10K_TX_FLAGS_CSUM;
866	tx_ring->tx_stats.csum_good++;
867
868	no_csum:
869	/ populate Tx descriptor header size and mss /
870	tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
871	tx_desc->hdrlen = `0`;
872	tx_desc->mss = `0`;
873	}
874
875	#define FM10K_SET_FLAG(_input, _flag, _result) \
876	((_flag <= _result) ? \
877	((u32)(_input & _flag) * (_result / _flag)) : \
878	((u32)(_input & _flag) / (_flag / _result)))
879
880	static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
881	{
882	/ set type for advanced descriptor with frame checksum insertion /
883	u32 desc_flags = `0`;
884
885	/ set checksum offload bits /
886	desc_flags \|= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
887	FM10K_TXD_FLAG_CSUM);
888
889	return desc_flags;
890	}
891
892	static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
893	struct fm10k_tx_desc *tx_desc, u16 i,
894	dma_addr_t dma, unsigned int size, u8 desc_flags)
895	{
896	/ set RS and INT for last frame in a cache line /
897	if ((++i & (FM10K_TXD_WB_FIFO_SIZE - `1`)) == `0`)
898	desc_flags \|= FM10K_TXD_FLAG_RS \| FM10K_TXD_FLAG_INT;
899
900	/ record values to descriptor /
901	tx_desc->buffer_addr = cpu_to_le64(dma);
902	tx_desc->flags = desc_flags;
903	tx_desc->buflen = cpu_to_le16(size);
904
905	/ return true if we just wrapped the ring /
906	return i == tx_ring->count;
907	}
908
909	static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
910	{
911	netif_stop_subqueue(dev: tx_ring->netdev, queue_index: tx_ring->queue_index);
912
913	/ Memory barrier before checking head and tail /
914	smp_mb();
915
916	/ Check again in a case another CPU has just made room available /
917	if (likely(fm10k_desc_unused(tx_ring) < size))
918	return -EBUSY;
919
920	/ A reprieve! - use start_queue because it doesn't call schedule /
921	netif_start_subqueue(dev: tx_ring->netdev, queue_index: tx_ring->queue_index);
922	++tx_ring->tx_stats.restart_queue;
923	return `0`;
924	}
925
926	static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
927	{
928	if (likely(fm10k_desc_unused(tx_ring) >= size))
929	return `0`;
930	return __fm10k_maybe_stop_tx(tx_ring, size);
931	}
932
933	static void fm10k_tx_map(struct fm10k_ring *tx_ring,
934	struct fm10k_tx_buffer *first)
935	{
936	struct sk_buff *skb = first->skb;
937	struct fm10k_tx_buffer *tx_buffer;
938	struct fm10k_tx_desc *tx_desc;
939	skb_frag_t *frag;
940	unsigned char *data;
941	dma_addr_t dma;
942	unsigned int data_len, size;
943	u32 tx_flags = first->tx_flags;
944	u16 i = tx_ring->next_to_use;
945	u8 flags = fm10k_tx_desc_flags(skb, tx_flags);
946
947	tx_desc = FM10K_TX_DESC(tx_ring, i);
948
949	/ add HW VLAN tag /
950	if (skb_vlan_tag_present(skb))
951	tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
952	else
953	tx_desc->vlan = `0`;
954
955	size = skb_headlen(skb);
956	data = skb->data;
957
958	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
959
960	data_len = skb->data_len;
961	tx_buffer = first;
962
963	for (frag = &skb_shinfo(skb)->frags[`0`];; frag++) {
964	if (dma_mapping_error(dev: tx_ring->dev, dma_addr: dma))
965	goto dma_error;
966
967	/ record length, and DMA address /
968	dma_unmap_len_set(tx_buffer, len, size);
969	dma_unmap_addr_set(tx_buffer, dma, dma);
970
971	while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
972	if (fm10k_tx_desc_push(tx_ring, tx_desc: tx_desc++, i: i++, dma,
973	FM10K_MAX_DATA_PER_TXD, desc_flags: flags)) {
974	tx_desc = FM10K_TX_DESC(tx_ring, `0`);
975	i = `0`;
976	}
977
978	dma += FM10K_MAX_DATA_PER_TXD;
979	size -= FM10K_MAX_DATA_PER_TXD;
980	}
981
982	if (likely(!data_len))
983	break;
984
985	if (fm10k_tx_desc_push(tx_ring, tx_desc: tx_desc++, i: i++,
986	dma, size, desc_flags: flags)) {
987	tx_desc = FM10K_TX_DESC(tx_ring, `0`);
988	i = `0`;
989	}
990
991	size = skb_frag_size(frag);
992	data_len -= size;
993
994	dma = skb_frag_dma_map(dev: tx_ring->dev, frag, offset: `0`, size,
995	dir: DMA_TO_DEVICE);
996
997	tx_buffer = &tx_ring->tx_buffer[i];
998	}
999
1000	/ write last descriptor with LAST bit set /
1001	flags \|= FM10K_TXD_FLAG_LAST;
1002
1003	if (fm10k_tx_desc_push(tx_ring, tx_desc, i: i++, dma, size, desc_flags: flags))
1004	i = `0`;
1005
1006	/ record bytecount for BQL /
1007	netdev_tx_sent_queue(dev_queue: txring_txq(ring: tx_ring), bytes: first->bytecount);
1008
1009	/ record SW timestamp if HW timestamp is not available /
1010	skb_tx_timestamp(skb: first->skb);
1011
1012	/ Force memory writes to complete before letting h/w know there*
1013	* are new descriptors to fetch. (Only applicable for weak-ordered
1014	* memory model archs, such as IA-64).
1015	*
1016	* We also need this memory barrier to make certain all of the
1017	* status bits have been updated before next_to_watch is written.
1018	*/
1019	wmb();
1020
1021	/ set next_to_watch value indicating a packet is present /
1022	first->next_to_watch = tx_desc;
1023
1024	tx_ring->next_to_use = i;
1025
1026	/ Make sure there is space in the ring for the next send. /
1027	fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
1028
1029	/ notify HW of packet /
1030	if (netif_xmit_stopped(dev_queue: txring_txq(ring: tx_ring)) \|\| !netdev_xmit_more()) {
1031	writel(val: i, addr: tx_ring->tail);
1032	}
1033
1034	return;
1035	dma_error:
1036	dev_err(tx_ring->dev, "TX DMA map failed\n");
1037
1038	/ clear dma mappings for failed tx_buffer map /
1039	for (;;) {
1040	tx_buffer = &tx_ring->tx_buffer[i];
1041	fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
1042	if (tx_buffer == first)
1043	break;
1044	if (i == `0`)
1045	i = tx_ring->count;
1046	i--;
1047	}
1048
1049	tx_ring->next_to_use = i;
1050	}
1051
1052	netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
1053	struct fm10k_ring *tx_ring)
1054	{
1055	u16 count = TXD_USE_COUNT(skb_headlen(skb));
1056	struct fm10k_tx_buffer *first;
1057	unsigned short f;
1058	u32 tx_flags = `0`;
1059	int tso;
1060
1061	/ need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,*
1062	* + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
1063	* + 2 desc gap to keep tail from touching head
1064	* otherwise try next time
1065	*/
1066	for (f = `0`; f < skb_shinfo(skb)->nr_frags; f++) {
1067	skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
1068
1069	count += TXD_USE_COUNT(skb_frag_size(frag));
1070	}
1071
1072	if (fm10k_maybe_stop_tx(tx_ring, size: count + `3`)) {
1073	tx_ring->tx_stats.tx_busy++;
1074	return NETDEV_TX_BUSY;
1075	}
1076
1077	/ record the location of the first descriptor for this packet /
1078	first = &tx_ring->tx_buffer[tx_ring->next_to_use];
1079	first->skb = skb;
1080	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
1081	first->gso_segs = `1`;
1082
1083	/ record initial flags and protocol /
1084	first->tx_flags = tx_flags;
1085
1086	tso = fm10k_tso(tx_ring, first);
1087	if (tso < `0`)
1088	goto out_drop;
1089	else if (!tso)
1090	fm10k_tx_csum(tx_ring, first);
1091
1092	fm10k_tx_map(tx_ring, first);
1093
1094	return NETDEV_TX_OK;
1095
1096	out_drop:
1097	dev_kfree_skb_any(skb: first->skb);
1098	first->skb = NULL;
1099
1100	return NETDEV_TX_OK;
1101	}
1102
1103	static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
1104	{
1105	return ring->stats.packets;
1106	}
1107
1108	/**
1109	* fm10k_get_tx_pending - how many Tx descriptors not processed
1110	* @ring: the ring structure
1111	* @in_sw: is tx_pending being checked in SW or in HW?
1112	*/
1113	u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw)
1114	{
1115	struct fm10k_intfc *interface = ring->q_vector->interface;
1116	struct fm10k_hw *hw = &interface->hw;
1117	u32 head, tail;
1118
1119	if (likely(in_sw)) {
1120	head = ring->next_to_clean;
1121	tail = ring->next_to_use;
1122	} else {
1123	head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
1124	tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
1125	}
1126
1127	return ((head <= tail) ? tail : tail + ring->count) - head;
1128	}
1129
1130	bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
1131	{
1132	u32 tx_done = fm10k_get_tx_completed(ring: tx_ring);
1133	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
1134	u32 tx_pending = fm10k_get_tx_pending(ring: tx_ring, in_sw: true);
1135
1136	clear_check_for_tx_hang(tx_ring);
1137
1138	/ Check for a hung queue, but be thorough. This verifies*
1139	* that a transmit has been completed since the previous
1140	* check AND there is at least one packet pending. By
1141	* requiring this to fail twice we avoid races with
1142	* clearing the ARMED bit and conditions where we
1143	* run the check_tx_hang logic with a transmit completion
1144	* pending but without time to complete it yet.
1145	*/
1146	if (!tx_pending \|\| (tx_done_old != tx_done)) {
1147	/ update completed stats and continue /
1148	tx_ring->tx_stats.tx_done_old = tx_done;
1149	/ reset the countdown /
1150	clear_bit(nr: __FM10K_HANG_CHECK_ARMED, addr: tx_ring->state);
1151
1152	return false;
1153	}
1154
1155	/ make sure it is true for two checks in a row /
1156	return test_and_set_bit(nr: __FM10K_HANG_CHECK_ARMED, addr: tx_ring->state);
1157	}
1158
1159	/**
1160	* fm10k_tx_timeout_reset - initiate reset due to Tx timeout
1161	* @interface: driver private struct
1162	**/
1163	void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
1164	{
1165	/ Do the reset outside of interrupt context /
1166	if (!test_bit(__FM10K_DOWN, interface->state)) {
1167	interface->tx_timeout_count++;
1168	set_bit(nr: FM10K_FLAG_RESET_REQUESTED, addr: interface->flags);
1169	fm10k_service_event_schedule(interface);
1170	}
1171	}
1172
1173	/**
1174	* fm10k_clean_tx_irq - Reclaim resources after transmit completes
1175	* @q_vector: structure containing interrupt and ring information
1176	* @tx_ring: tx ring to clean
1177	* @napi_budget: Used to determine if we are in netpoll
1178	**/
1179	static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
1180	struct fm10k_ring tx_ring, int* napi_budget)
1181	{
1182	struct fm10k_intfc *interface = q_vector->interface;
1183	struct fm10k_tx_buffer *tx_buffer;
1184	struct fm10k_tx_desc *tx_desc;
1185	unsigned int total_bytes = `0`, total_packets = `0`;
1186	unsigned int budget = q_vector->tx.work_limit;
1187	unsigned int i = tx_ring->next_to_clean;
1188
1189	if (test_bit(__FM10K_DOWN, interface->state))
1190	return true;
1191
1192	tx_buffer = &tx_ring->tx_buffer[i];
1193	tx_desc = FM10K_TX_DESC(tx_ring, i);
1194	i -= tx_ring->count;
1195
1196	do {
1197	struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
1198
1199	/ if next_to_watch is not set then there is no work pending /
1200	if (!eop_desc)
1201	break;
1202
1203	/ prevent any other reads prior to eop_desc /
1204	smp_rmb();
1205
1206	/ if DD is not set pending work has not been completed /
1207	if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
1208	break;
1209
1210	/ clear next_to_watch to prevent false hangs /
1211	tx_buffer->next_to_watch = NULL;
1212
1213	/ update the statistics for this packet /
1214	total_bytes += tx_buffer->bytecount;
1215	total_packets += tx_buffer->gso_segs;
1216
1217	/ free the skb /
1218	napi_consume_skb(skb: tx_buffer->skb, budget: napi_budget);
1219
1220	/ unmap skb header data /
1221	dma_unmap_single(tx_ring->dev,
1222	dma_unmap_addr(tx_buffer, dma),
1223	dma_unmap_len(tx_buffer, len),
1224	DMA_TO_DEVICE);
1225
1226	/ clear tx_buffer data /
1227	tx_buffer->skb = NULL;
1228	dma_unmap_len_set(tx_buffer, len, `0`);
1229
1230	/ unmap remaining buffers /
1231	while (tx_desc != eop_desc) {
1232	tx_buffer++;
1233	tx_desc++;
1234	i++;
1235	if (unlikely(!i)) {
1236	i -= tx_ring->count;
1237	tx_buffer = tx_ring->tx_buffer;
1238	tx_desc = FM10K_TX_DESC(tx_ring, `0`);
1239	}
1240
1241	/ unmap any remaining paged data /
1242	if (dma_unmap_len(tx_buffer, len)) {
1243	dma_unmap_page(tx_ring->dev,
1244	dma_unmap_addr(tx_buffer, dma),
1245	dma_unmap_len(tx_buffer, len),
1246	DMA_TO_DEVICE);
1247	dma_unmap_len_set(tx_buffer, len, `0`);
1248	}
1249	}
1250
1251	/ move us one more past the eop_desc for start of next pkt /
1252	tx_buffer++;
1253	tx_desc++;
1254	i++;
1255	if (unlikely(!i)) {
1256	i -= tx_ring->count;
1257	tx_buffer = tx_ring->tx_buffer;
1258	tx_desc = FM10K_TX_DESC(tx_ring, `0`);
1259	}
1260
1261	/ issue prefetch for next Tx descriptor /
1262	prefetch(tx_desc);
1263
1264	/ update budget accounting /
1265	budget--;
1266	} while (likely(budget));
1267
1268	i += tx_ring->count;
1269	tx_ring->next_to_clean = i;
1270	u64_stats_update_begin(syncp: &tx_ring->syncp);
1271	tx_ring->stats.bytes += total_bytes;
1272	tx_ring->stats.packets += total_packets;
1273	u64_stats_update_end(syncp: &tx_ring->syncp);
1274	q_vector->tx.total_bytes += total_bytes;
1275	q_vector->tx.total_packets += total_packets;
1276
1277	if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
1278	/ schedule immediate reset if we believe we hung /
1279	struct fm10k_hw *hw = &interface->hw;
1280
1281	netif_err(interface, drv, tx_ring->netdev,
1282	"Detected Tx Unit Hang\n"
1283	" Tx Queue <%d>\n"
1284	" TDH, TDT <%x>, <%x>\n"
1285	" next_to_use <%x>\n"
1286	" next_to_clean <%x>\n",
1287	tx_ring->queue_index,
1288	fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
1289	fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
1290	tx_ring->next_to_use, i);
1291
1292	netif_stop_subqueue(dev: tx_ring->netdev,
1293	queue_index: tx_ring->queue_index);
1294
1295	netif_info(interface, probe, tx_ring->netdev,
1296	"tx hang %d detected on queue %d, resetting interface\n",
1297	interface->tx_timeout_count + `1`,
1298	tx_ring->queue_index);
1299
1300	fm10k_tx_timeout_reset(interface);
1301
1302	/ the netdev is about to reset, no point in enabling stuff /
1303	return true;
1304	}
1305
1306	/ notify netdev of completed buffers /
1307	netdev_tx_completed_queue(dev_queue: txring_txq(ring: tx_ring),
1308	pkts: total_packets, bytes: total_bytes);
1309
1310	#define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
1311	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
1312	(fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
1313	/ Make sure that anybody stopping the queue after this*
1314	* sees the new next_to_clean.
1315	*/
1316	smp_mb();
1317	if (__netif_subqueue_stopped(dev: tx_ring->netdev,
1318	queue_index: tx_ring->queue_index) &&
1319	!test_bit(__FM10K_DOWN, interface->state)) {
1320	netif_wake_subqueue(dev: tx_ring->netdev,
1321	queue_index: tx_ring->queue_index);
1322	++tx_ring->tx_stats.restart_queue;
1323	}
1324	}
1325
1326	return !!budget;
1327	}
1328
1329	/**
1330	* fm10k_update_itr - update the dynamic ITR value based on packet size
1331	*
1332	* Stores a new ITR value based on strictly on packet size. The
1333	* divisors and thresholds used by this function were determined based
1334	* on theoretical maximum wire speed and testing data, in order to
1335	* minimize response time while increasing bulk throughput.
1336	*
1337	* @ring_container: Container for rings to have ITR updated
1338	**/
1339	static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
1340	{
1341	unsigned int avg_wire_size, packets, itr_round;
1342
1343	/ Only update ITR if we are using adaptive setting /
1344	if (!ITR_IS_ADAPTIVE(ring_container->itr))
1345	goto clear_counts;
1346
1347	packets = ring_container->total_packets;
1348	if (!packets)
1349	goto clear_counts;
1350
1351	avg_wire_size = ring_container->total_bytes / packets;
1352
1353	/ The following is a crude approximation of:*
1354	* wmem_default / (size + overhead) = desired_pkts_per_int
1355	* rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
1356	* (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
1357	*
1358	* Assuming wmem_default is 212992 and overhead is 640 bytes per
1359	* packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
1360	* formula down to
1361	*
1362	* (34 * (size + 24)) / (size + 640) = ITR
1363	*
1364	* We first do some math on the packet size and then finally bitshift
1365	* by 8 after rounding up. We also have to account for PCIe link speed
1366	* difference as ITR scales based on this.
1367	*/
1368	if (avg_wire_size <= `360`) {
1369	/ Start at 250K ints/sec and gradually drop to 77K ints/sec /
1370	avg_wire_size *= `8`;
1371	avg_wire_size += `376`;
1372	} else if (avg_wire_size <= `1152`) {
1373	/ 77K ints/sec to 45K ints/sec /
1374	avg_wire_size *= `3`;
1375	avg_wire_size += `2176`;
1376	} else if (avg_wire_size <= `1920`) {
1377	/ 45K ints/sec to 38K ints/sec /
1378	avg_wire_size += `4480`;
1379	} else {
1380	/ plateau at a limit of 38K ints/sec /
1381	avg_wire_size = `6656`;
1382	}
1383
1384	/ Perform final bitshift for division after rounding up to ensure*
1385	* that the calculation will never get below a 1. The bit shift
1386	* accounts for changes in the ITR due to PCIe link speed.
1387	*/
1388	itr_round = READ_ONCE(ring_container->itr_scale) + `8`;
1389	avg_wire_size += BIT(itr_round) - `1`;
1390	avg_wire_size >>= itr_round;
1391
1392	/ write back value and retain adaptive flag /
1393	ring_container->itr = avg_wire_size \| FM10K_ITR_ADAPTIVE;
1394
1395	clear_counts:
1396	ring_container->total_bytes = `0`;
1397	ring_container->total_packets = `0`;
1398	}
1399
1400	static void fm10k_qv_enable(struct fm10k_q_vector *q_vector)
1401	{
1402	/ Enable auto-mask and clear the current mask /
1403	u32 itr = FM10K_ITR_ENABLE;
1404
1405	/ Update Tx ITR /
1406	fm10k_update_itr(ring_container: &q_vector->tx);
1407
1408	/ Update Rx ITR /
1409	fm10k_update_itr(ring_container: &q_vector->rx);
1410
1411	/ Store Tx itr in timer slot 0 /
1412	itr \|= (q_vector->tx.itr & FM10K_ITR_MAX);
1413
1414	/ Shift Rx itr to timer slot 1 /
1415	itr \|= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT;
1416
1417	/ Write the final value to the ITR register /
1418	writel(val: itr, addr: q_vector->itr);
1419	}
1420
1421	static int fm10k_poll(struct napi_struct napi, int* budget)
1422	{
1423	struct fm10k_q_vector *q_vector =
1424	container_of(napi, struct fm10k_q_vector, napi);
1425	struct fm10k_ring *ring;
1426	int per_ring_budget, work_done = `0`;
1427	bool clean_complete = true;
1428
1429	fm10k_for_each_ring(ring, q_vector->tx) {
1430	if (!fm10k_clean_tx_irq(q_vector, tx_ring: ring, napi_budget: budget))
1431	clean_complete = false;
1432	}
1433
1434	/ Handle case where we are called by netpoll with a budget of 0 /
1435	if (budget <= `0`)
1436	return budget;
1437
1438	/ attempt to distribute budget to each queue fairly, but don't*
1439	* allow the budget to go below 1 because we'll exit polling
1440	*/
1441	if (q_vector->rx.count > `1`)
1442	per_ring_budget = max(budget / q_vector->rx.count, `1`);
1443	else
1444	per_ring_budget = budget;
1445
1446	fm10k_for_each_ring(ring, q_vector->rx) {
1447	int work = fm10k_clean_rx_irq(q_vector, rx_ring: ring, budget: per_ring_budget);
1448
1449	work_done += work;
1450	if (work >= per_ring_budget)
1451	clean_complete = false;
1452	}
1453
1454	/ If all work not completed, return budget and keep polling /
1455	if (!clean_complete)
1456	return budget;
1457
1458	/ Exit the polling mode, but don't re-enable interrupts if stack might*
1459	* poll us due to busy-polling
1460	*/
1461	if (likely(napi_complete_done(napi, work_done)))
1462	fm10k_qv_enable(q_vector);
1463
1464	return min(work_done, budget - `1`);
1465	}
1466
1467	/**
1468	* fm10k_set_qos_queues: Allocate queues for a QOS-enabled device
1469	* @interface: board private structure to initialize
1470	*
1471	* When QoS (Quality of Service) is enabled, allocate queues for
1472	* each traffic class. If multiqueue isn't available,then abort QoS
1473	* initialization.
1474	*
1475	* This function handles all combinations of Qos and RSS.
1476	*
1477	**/
1478	static bool fm10k_set_qos_queues(struct fm10k_intfc *interface)
1479	{
1480	struct net_device *dev = interface->netdev;
1481	struct fm10k_ring_feature *f;
1482	int rss_i, i;
1483	int pcs;
1484
1485	/ Map queue offset and counts onto allocated tx queues /
1486	pcs = netdev_get_num_tc(dev);
1487
1488	if (pcs <= `1`)
1489	return false;
1490
1491	/ set QoS mask and indices /
1492	f = &interface->ring_feature[RING_F_QOS];
1493	f->indices = pcs;
1494	f->mask = BIT(fls(pcs - `1`)) - `1`;
1495
1496	/ determine the upper limit for our current DCB mode /
1497	rss_i = interface->hw.mac.max_queues / pcs;
1498	rss_i = BIT(fls(rss_i) - `1`);
1499
1500	/ set RSS mask and indices /
1501	f = &interface->ring_feature[RING_F_RSS];
1502	rss_i = min_t(u16, rss_i, f->limit);
1503	f->indices = rss_i;
1504	f->mask = BIT(fls(rss_i - `1`)) - `1`;
1505
1506	/ configure pause class to queue mapping /
1507	for (i = `0`; i < pcs; i++)
1508	netdev_set_tc_queue(dev, tc: i, count: rss_i, offset: rss_i * i);
1509
1510	interface->num_rx_queues = rss_i * pcs;
1511	interface->num_tx_queues = rss_i * pcs;
1512
1513	return true;
1514	}
1515
1516	/**
1517	* fm10k_set_rss_queues: Allocate queues for RSS
1518	* @interface: board private structure to initialize
1519	*
1520	* This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
1521	* to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
1522	*
1523	**/
1524	static bool fm10k_set_rss_queues(struct fm10k_intfc *interface)
1525	{
1526	struct fm10k_ring_feature *f;
1527	u16 rss_i;
1528
1529	f = &interface->ring_feature[RING_F_RSS];
1530	rss_i = min_t(u16, interface->hw.mac.max_queues, f->limit);
1531
1532	/ record indices and power of 2 mask for RSS /
1533	f->indices = rss_i;
1534	f->mask = BIT(fls(rss_i - `1`)) - `1`;
1535
1536	interface->num_rx_queues = rss_i;
1537	interface->num_tx_queues = rss_i;
1538
1539	return true;
1540	}
1541
1542	/**
1543	* fm10k_set_num_queues: Allocate queues for device, feature dependent
1544	* @interface: board private structure to initialize
1545	*
1546	* This is the top level queue allocation routine. The order here is very
1547	* important, starting with the "most" number of features turned on at once,
1548	* and ending with the smallest set of features. This way large combinations
1549	* can be allocated if they're turned on, and smaller combinations are the
1550	* fall through conditions.
1551	*
1552	**/
1553	static void fm10k_set_num_queues(struct fm10k_intfc *interface)
1554	{
1555	/ Attempt to setup QoS and RSS first /
1556	if (fm10k_set_qos_queues(interface))
1557	return;
1558
1559	/ If we don't have QoS, just fallback to only RSS. /
1560	fm10k_set_rss_queues(interface);
1561	}
1562
1563	/**
1564	* fm10k_reset_num_queues - Reset the number of queues to zero
1565	* @interface: board private structure
1566	*
1567	* This function should be called whenever we need to reset the number of
1568	* queues after an error condition.
1569	*/
1570	static void fm10k_reset_num_queues(struct fm10k_intfc *interface)
1571	{
1572	interface->num_tx_queues = `0`;
1573	interface->num_rx_queues = `0`;
1574	interface->num_q_vectors = `0`;
1575	}
1576
1577	/**
1578	* fm10k_alloc_q_vector - Allocate memory for a single interrupt vector
1579	* @interface: board private structure to initialize
1580	* @v_count: q_vectors allocated on interface, used for ring interleaving
1581	* @v_idx: index of vector in interface struct
1582	* @txr_count: total number of Tx rings to allocate
1583	* @txr_idx: index of first Tx ring to allocate
1584	* @rxr_count: total number of Rx rings to allocate
1585	* @rxr_idx: index of first Rx ring to allocate
1586	*
1587	* We allocate one q_vector. If allocation fails we return -ENOMEM.
1588	**/
1589	static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
1590	unsigned int v_count, unsigned int v_idx,
1591	unsigned int txr_count, unsigned int txr_idx,
1592	unsigned int rxr_count, unsigned int rxr_idx)
1593	{
1594	struct fm10k_q_vector *q_vector;
1595	struct fm10k_ring *ring;
1596	int ring_count;
1597
1598	ring_count = txr_count + rxr_count;
1599
1600	/ allocate q_vector and rings /
1601	q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL);
1602	if (!q_vector)
1603	return -ENOMEM;
1604
1605	/ initialize NAPI /
1606	netif_napi_add(dev: interface->netdev, napi: &q_vector->napi, poll: fm10k_poll);
1607
1608	/ tie q_vector and interface together /
1609	interface->q_vector[v_idx] = q_vector;
1610	q_vector->interface = interface;
1611	q_vector->v_idx = v_idx;
1612
1613	/ initialize pointer to rings /
1614	ring = q_vector->ring;
1615
1616	/ save Tx ring container info /
1617	q_vector->tx.ring = ring;
1618	q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
1619	q_vector->tx.itr = interface->tx_itr;
1620	q_vector->tx.itr_scale = interface->hw.mac.itr_scale;
1621	q_vector->tx.count = txr_count;
1622
1623	while (txr_count) {
1624	/ assign generic ring traits /
1625	ring->dev = &interface->pdev->dev;
1626	ring->netdev = interface->netdev;
1627
1628	/ configure backlink on ring /
1629	ring->q_vector = q_vector;
1630
1631	/ apply Tx specific ring traits /
1632	ring->count = interface->tx_ring_count;
1633	ring->queue_index = txr_idx;
1634
1635	/ assign ring to interface /
1636	interface->tx_ring[txr_idx] = ring;
1637
1638	/ update count and index /
1639	txr_count--;
1640	txr_idx += v_count;
1641
1642	/ push pointer to next ring /
1643	ring++;
1644	}
1645
1646	/ save Rx ring container info /
1647	q_vector->rx.ring = ring;
1648	q_vector->rx.itr = interface->rx_itr;
1649	q_vector->rx.itr_scale = interface->hw.mac.itr_scale;
1650	q_vector->rx.count = rxr_count;
1651
1652	while (rxr_count) {
1653	/ assign generic ring traits /
1654	ring->dev = &interface->pdev->dev;
1655	ring->netdev = interface->netdev;
1656	rcu_assign_pointer(ring->l2_accel, interface->l2_accel);
1657
1658	/ configure backlink on ring /
1659	ring->q_vector = q_vector;
1660
1661	/ apply Rx specific ring traits /
1662	ring->count = interface->rx_ring_count;
1663	ring->queue_index = rxr_idx;
1664
1665	/ assign ring to interface /
1666	interface->rx_ring[rxr_idx] = ring;
1667
1668	/ update count and index /
1669	rxr_count--;
1670	rxr_idx += v_count;
1671
1672	/ push pointer to next ring /
1673	ring++;
1674	}
1675
1676	fm10k_dbg_q_vector_init(q_vector);
1677
1678	return `0`;
1679	}
1680
1681	/**
1682	* fm10k_free_q_vector - Free memory allocated for specific interrupt vector
1683	* @interface: board private structure to initialize
1684	* @v_idx: Index of vector to be freed
1685	*
1686	* This function frees the memory allocated to the q_vector. In addition if
1687	* NAPI is enabled it will delete any references to the NAPI struct prior
1688	* to freeing the q_vector.
1689	**/
1690	static void fm10k_free_q_vector(struct fm10k_intfc interface, int* v_idx)
1691	{
1692	struct fm10k_q_vector *q_vector = interface->q_vector[v_idx];
1693	struct fm10k_ring *ring;
1694
1695	fm10k_dbg_q_vector_exit(q_vector);
1696
1697	fm10k_for_each_ring(ring, q_vector->tx)
1698	interface->tx_ring[ring->queue_index] = NULL;
1699
1700	fm10k_for_each_ring(ring, q_vector->rx)
1701	interface->rx_ring[ring->queue_index] = NULL;
1702
1703	interface->q_vector[v_idx] = NULL;
1704	netif_napi_del(napi: &q_vector->napi);
1705	kfree_rcu(q_vector, rcu);
1706	}
1707
1708	/**
1709	* fm10k_alloc_q_vectors - Allocate memory for interrupt vectors
1710	* @interface: board private structure to initialize
1711	*
1712	* We allocate one q_vector per queue interrupt. If allocation fails we
1713	* return -ENOMEM.
1714	**/
1715	static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface)
1716	{
1717	unsigned int q_vectors = interface->num_q_vectors;
1718	unsigned int rxr_remaining = interface->num_rx_queues;
1719	unsigned int txr_remaining = interface->num_tx_queues;
1720	unsigned int rxr_idx = `0`, txr_idx = `0`, v_idx = `0`;
1721	int err;
1722
1723	if (q_vectors >= (rxr_remaining + txr_remaining)) {
1724	for (; rxr_remaining; v_idx++) {
1725	err = fm10k_alloc_q_vector(interface, v_count: q_vectors, v_idx,
1726	txr_count: `0`, txr_idx: `0`, rxr_count: `1`, rxr_idx);
1727	if (err)
1728	goto err_out;
1729
1730	/ update counts and index /
1731	rxr_remaining--;
1732	rxr_idx++;
1733	}
1734	}
1735
1736	for (; v_idx < q_vectors; v_idx++) {
1737	int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1738	int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1739
1740	err = fm10k_alloc_q_vector(interface, v_count: q_vectors, v_idx,
1741	txr_count: tqpv, txr_idx,
1742	rxr_count: rqpv, rxr_idx);
1743
1744	if (err)
1745	goto err_out;
1746
1747	/ update counts and index /
1748	rxr_remaining -= rqpv;
1749	txr_remaining -= tqpv;
1750	rxr_idx++;
1751	txr_idx++;
1752	}
1753
1754	return `0`;
1755
1756	err_out:
1757	fm10k_reset_num_queues(interface);
1758
1759	while (v_idx--)
1760	fm10k_free_q_vector(interface, v_idx);
1761
1762	return -ENOMEM;
1763	}
1764
1765	/**
1766	* fm10k_free_q_vectors - Free memory allocated for interrupt vectors
1767	* @interface: board private structure to initialize
1768	*
1769	* This function frees the memory allocated to the q_vectors. In addition if
1770	* NAPI is enabled it will delete any references to the NAPI struct prior
1771	* to freeing the q_vector.
1772	**/
1773	static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
1774	{
1775	int v_idx = interface->num_q_vectors;
1776
1777	fm10k_reset_num_queues(interface);
1778
1779	while (v_idx--)
1780	fm10k_free_q_vector(interface, v_idx);
1781	}
1782
1783	/**
1784	* fm10k_reset_msix_capability - reset MSI-X capability
1785	* @interface: board private structure to initialize
1786	*
1787	* Reset the MSI-X capability back to its starting state
1788	**/
1789	static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
1790	{
1791	pci_disable_msix(dev: interface->pdev);
1792	kfree(objp: interface->msix_entries);
1793	interface->msix_entries = NULL;
1794	}
1795
1796	/**
1797	* fm10k_init_msix_capability - configure MSI-X capability
1798	* @interface: board private structure to initialize
1799	*
1800	* Attempt to configure the interrupts using the best available
1801	* capabilities of the hardware and the kernel.
1802	**/
1803	static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
1804	{
1805	struct fm10k_hw *hw = &interface->hw;
1806	int v_budget, vector;
1807
1808	/ It's easy to be greedy for MSI-X vectors, but it really*
1809	* doesn't do us much good if we have a lot more vectors
1810	* than CPU's. So let's be conservative and only ask for
1811	* (roughly) the same number of vectors as there are CPU's.
1812	* the default is to use pairs of vectors
1813	*/
1814	v_budget = max(interface->num_rx_queues, interface->num_tx_queues);
1815	v_budget = min_t(u16, v_budget, num_online_cpus());
1816
1817	/ account for vectors not related to queues /
1818	v_budget += NON_Q_VECTORS;
1819
1820	/ At the same time, hardware can only support a maximum of*
1821	* hw.mac->max_msix_vectors vectors. With features
1822	* such as RSS and VMDq, we can easily surpass the number of Rx and Tx
1823	* descriptor queues supported by our device. Thus, we cap it off in
1824	* those rare cases where the cpu count also exceeds our vector limit.
1825	*/
1826	v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
1827
1828	/ A failure in MSI-X entry allocation is fatal. /
1829	interface->msix_entries = kcalloc(n: v_budget, size: sizeof(struct msix_entry),
1830	GFP_KERNEL);
1831	if (!interface->msix_entries)
1832	return -ENOMEM;
1833
1834	/ populate entry values /
1835	for (vector = `0`; vector < v_budget; vector++)
1836	interface->msix_entries[vector].entry = vector;
1837
1838	/ Attempt to enable MSI-X with requested value /
1839	v_budget = pci_enable_msix_range(dev: interface->pdev,
1840	entries: interface->msix_entries,
1841	MIN_MSIX_COUNT(hw),
1842	maxvec: v_budget);
1843	if (v_budget < `0`) {
1844	kfree(objp: interface->msix_entries);
1845	interface->msix_entries = NULL;
1846	return v_budget;
1847	}
1848
1849	/ record the number of queues available for q_vectors /
1850	interface->num_q_vectors = v_budget - NON_Q_VECTORS;
1851
1852	return `0`;
1853	}
1854
1855	/**
1856	* fm10k_cache_ring_qos - Descriptor ring to register mapping for QoS
1857	* @interface: Interface structure continaining rings and devices
1858	*
1859	* Cache the descriptor ring offsets for Qos
1860	**/
1861	static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface)
1862	{
1863	struct net_device *dev = interface->netdev;
1864	int pc, offset, rss_i, i;
1865	u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + `1`;
1866	u8 num_pcs = netdev_get_num_tc(dev);
1867
1868	if (num_pcs <= `1`)
1869	return false;
1870
1871	rss_i = interface->ring_feature[RING_F_RSS].indices;
1872
1873	for (pc = `0`, offset = `0`; pc < num_pcs; pc++, offset += rss_i) {
1874	int q_idx = pc;
1875
1876	for (i = `0`; i < rss_i; i++) {
1877	interface->tx_ring[offset + i]->reg_idx = q_idx;
1878	interface->tx_ring[offset + i]->qos_pc = pc;
1879	interface->rx_ring[offset + i]->reg_idx = q_idx;
1880	interface->rx_ring[offset + i]->qos_pc = pc;
1881	q_idx += pc_stride;
1882	}
1883	}
1884
1885	return true;
1886	}
1887
1888	/**
1889	* fm10k_cache_ring_rss - Descriptor ring to register mapping for RSS
1890	* @interface: Interface structure continaining rings and devices
1891	*
1892	* Cache the descriptor ring offsets for RSS
1893	**/
1894	static void fm10k_cache_ring_rss(struct fm10k_intfc *interface)
1895	{
1896	int i;
1897
1898	for (i = `0`; i < interface->num_rx_queues; i++)
1899	interface->rx_ring[i]->reg_idx = i;
1900
1901	for (i = `0`; i < interface->num_tx_queues; i++)
1902	interface->tx_ring[i]->reg_idx = i;
1903	}
1904
1905	/**
1906	* fm10k_assign_rings - Map rings to network devices
1907	* @interface: Interface structure containing rings and devices
1908	*
1909	* This function is meant to go though and configure both the network
1910	* devices so that they contain rings, and configure the rings so that
1911	* they function with their network devices.
1912	**/
1913	static void fm10k_assign_rings(struct fm10k_intfc *interface)
1914	{
1915	if (fm10k_cache_ring_qos(interface))
1916	return;
1917
1918	fm10k_cache_ring_rss(interface);
1919	}
1920
1921	static void fm10k_init_reta(struct fm10k_intfc *interface)
1922	{
1923	u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
1924	u32 reta;
1925
1926	/ If the Rx flow indirection table has been configured manually, we*
1927	* need to maintain it when possible.
1928	*/
1929	if (netif_is_rxfh_configured(dev: interface->netdev)) {
1930	for (i = FM10K_RETA_SIZE; i--;) {
1931	reta = interface->reta[i];
1932	if ((((reta << `24`) >> `24`) < rss_i) &&
1933	(((reta << `16`) >> `24`) < rss_i) &&
1934	(((reta << `8`) >> `24`) < rss_i) &&
1935	(((reta) >> `24`) < rss_i))
1936	continue;
1937
1938	/ this should never happen /
1939	dev_err(&interface->pdev->dev,
1940	"RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n");
1941	goto repopulate_reta;
1942	}
1943
1944	/ do nothing if all of the elements are in bounds /
1945	return;
1946	}
1947
1948	repopulate_reta:
1949	fm10k_write_reta(interface, NULL);
1950	}
1951
1952	/**
1953	* fm10k_init_queueing_scheme - Determine proper queueing scheme
1954	* @interface: board private structure to initialize
1955	*
1956	* We determine which queueing scheme to use based on...
1957	* - Hardware queue count (num_*_queues)
1958	* - defined by miscellaneous hardware support/features (RSS, etc.)
1959	**/
1960	int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
1961	{
1962	int err;
1963
1964	/ Number of supported queues /
1965	fm10k_set_num_queues(interface);
1966
1967	/ Configure MSI-X capability /
1968	err = fm10k_init_msix_capability(interface);
1969	if (err) {
1970	dev_err(&interface->pdev->dev,
1971	"Unable to initialize MSI-X capability\n");
1972	goto err_init_msix;
1973	}
1974
1975	/ Allocate memory for queues /
1976	err = fm10k_alloc_q_vectors(interface);
1977	if (err) {
1978	dev_err(&interface->pdev->dev,
1979	"Unable to allocate queue vectors\n");
1980	goto err_alloc_q_vectors;
1981	}
1982
1983	/ Map rings to devices, and map devices to physical queues /
1984	fm10k_assign_rings(interface);
1985
1986	/ Initialize RSS redirection table /
1987	fm10k_init_reta(interface);
1988
1989	return `0`;
1990
1991	err_alloc_q_vectors:
1992	fm10k_reset_msix_capability(interface);
1993	err_init_msix:
1994	fm10k_reset_num_queues(interface);
1995	return err;
1996	}
1997
1998	/**
1999	* fm10k_clear_queueing_scheme - Clear the current queueing scheme settings
2000	* @interface: board private structure to clear queueing scheme on
2001	*
2002	* We go through and clear queueing specific resources and reset the structure
2003	* to pre-load conditions
2004	**/
2005	void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface)
2006	{
2007	fm10k_free_q_vectors(interface);
2008	fm10k_reset_msix_capability(interface);
2009	}
2010

source code of linux/drivers/net/ethernet/intel/fm10k/fm10k_main.c