ice_main.c source code [linux/drivers/net/ethernet/intel/ice/ice_main.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright (c) 2018-2023, Intel Corporation. /
3
4	/ Intel(R) Ethernet Connection E800 Series Linux Driver /
5
6	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8	#include <generated/utsrelease.h>
9	#include <linux/crash_dump.h>
10	#include "ice.h"
11	#include "ice_base.h"
12	#include "ice_lib.h"
13	#include "ice_fltr.h"
14	#include "ice_dcb_lib.h"
15	#include "ice_dcb_nl.h"
16	#include "ice_devlink.h"
17	/ Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the*
18	* ice tracepoint functions. This must be done exactly once across the
19	* ice driver.
20	*/
21	#define CREATE_TRACE_POINTS
22	#include "ice_trace.h"
23	#include "ice_eswitch.h"
24	#include "ice_tc_lib.h"
25	#include "ice_vsi_vlan_ops.h"
26	#include <net/xdp_sock_drv.h>
27
28	#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
29	static const char ice_driver_string[] = DRV_SUMMARY;
30	static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
31
32	/ DDP Package file located in firmware search paths (e.g. /lib/firmware/) /
33	#define ICE_DDP_PKG_PATH "intel/ice/ddp/"
34	#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
35
36	MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
37	MODULE_DESCRIPTION(DRV_SUMMARY);
38	MODULE_LICENSE("GPL v2");
39	MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
40
41	static int debug = -`1`;
42	module_param(debug, int, `0644`);
43	#ifndef CONFIG_DYNAMIC_DEBUG
44	MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
45	#else
46	MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
47	#endif /* !CONFIG_DYNAMIC_DEBUG */
48
49	DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
50	EXPORT_SYMBOL(ice_xdp_locking_key);
51
52	/**
53	* ice_hw_to_dev - Get device pointer from the hardware structure
54	* @hw: pointer to the device HW structure
55	*
56	* Used to access the device pointer from compilation units which can't easily
57	* include the definition of struct ice_pf without leading to circular header
58	* dependencies.
59	*/
60	struct device ice_hw_to_dev(struct* ice_hw *hw)
61	{
62	struct ice_pf pf = container_of(hw, struct* ice_pf, hw);
63
64	return &pf->pdev->dev;
65	}
66
67	static struct workqueue_struct *ice_wq;
68	struct workqueue_struct *ice_lag_wq;
69	static const struct net_device_ops ice_netdev_safe_mode_ops;
70	static const struct net_device_ops ice_netdev_ops;
71
72	static void ice_rebuild(struct ice_pf pf, enum* ice_reset_req reset_type);
73
74	static void ice_vsi_release_all(struct ice_pf *pf);
75
76	static int ice_rebuild_channels(struct ice_pf *pf);
77	static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
78
79	static int
80	ice_indr_setup_tc_cb(struct net_device netdev, struct* Qdisc *sch,
81	void cb_priv, enum* tc_setup_type type, void *type_data,
82	void *data,
83	void (cleanup)(struct* flow_block_cb *block_cb));
84
85	bool netif_is_ice(const struct net_device *dev)
86	{
87	return dev && (dev->netdev_ops == &ice_netdev_ops);
88	}
89
90	/**
91	* ice_get_tx_pending - returns number of Tx descriptors not processed
92	* @ring: the ring of descriptors
93	*/
94	static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
95	{
96	u16 head, tail;
97
98	head = ring->next_to_clean;
99	tail = ring->next_to_use;
100
101	if (head != tail)
102	return (head < tail) ?
103	tail - head : (tail + ring->count - head);
104	return `0`;
105	}
106
107	/**
108	* ice_check_for_hang_subtask - check for and recover hung queues
109	* @pf: pointer to PF struct
110	*/
111	static void ice_check_for_hang_subtask(struct ice_pf *pf)
112	{
113	struct ice_vsi *vsi = NULL;
114	struct ice_hw *hw;
115	unsigned int i;
116	int packets;
117	u32 v;
118
119	ice_for_each_vsi(pf, v)
120	if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
121	vsi = pf->vsi[v];
122	break;
123	}
124
125	if (!vsi \|\| test_bit(ICE_VSI_DOWN, vsi->state))
126	return;
127
128	if (!(vsi->netdev && netif_carrier_ok(dev: vsi->netdev)))
129	return;
130
131	hw = &vsi->back->hw;
132
133	ice_for_each_txq(vsi, i) {
134	struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
135	struct ice_ring_stats *ring_stats;
136
137	if (!tx_ring)
138	continue;
139	if (ice_ring_ch_enabled(ring: tx_ring))
140	continue;
141
142	ring_stats = tx_ring->ring_stats;
143	if (!ring_stats)
144	continue;
145
146	if (tx_ring->desc) {
147	/ If packet counter has not changed the queue is*
148	* likely stalled, so force an interrupt for this
149	* queue.
150	*
151	* prev_pkt would be negative if there was no
152	* pending work.
153	*/
154	packets = ring_stats->stats.pkts & INT_MAX;
155	if (ring_stats->tx_stats.prev_pkt == packets) {
156	/ Trigger sw interrupt to revive the queue /
157	ice_trigger_sw_intr(hw, q_vector: tx_ring->q_vector);
158	continue;
159	}
160
161	/ Memory barrier between read of packet count and call*
162	* to ice_get_tx_pending()
163	*/
164	smp_rmb();
165	ring_stats->tx_stats.prev_pkt =
166	ice_get_tx_pending(ring: tx_ring) ? packets : -`1`;
167	}
168	}
169	}
170
171	/**
172	* ice_init_mac_fltr - Set initial MAC filters
173	* @pf: board private structure
174	*
175	* Set initial set of MAC filters for PF VSI; configure filters for permanent
176	* address and broadcast address. If an error is encountered, netdevice will be
177	* unregistered.
178	*/
179	static int ice_init_mac_fltr(struct ice_pf *pf)
180	{
181	struct ice_vsi *vsi;
182	u8 *perm_addr;
183
184	vsi = ice_get_main_vsi(pf);
185	if (!vsi)
186	return -EINVAL;
187
188	perm_addr = vsi->port_info->mac.perm_addr;
189	return ice_fltr_add_mac_and_broadcast(vsi, mac: perm_addr, action: ICE_FWD_TO_VSI);
190	}
191
192	/**
193	* ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
194	* @netdev: the net device on which the sync is happening
195	* @addr: MAC address to sync
196	*
197	* This is a callback function which is called by the in kernel device sync
198	* functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
199	* populates the tmp_sync_list, which is later used by ice_add_mac to add the
200	* MAC filters from the hardware.
201	*/
202	static int ice_add_mac_to_sync_list(struct net_device netdev, const* u8 *addr)
203	{
204	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
205	struct ice_vsi *vsi = np->vsi;
206
207	if (ice_fltr_add_mac_to_list(vsi, list: &vsi->tmp_sync_list, mac: addr,
208	action: ICE_FWD_TO_VSI))
209	return -EINVAL;
210
211	return `0`;
212	}
213
214	/**
215	* ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
216	* @netdev: the net device on which the unsync is happening
217	* @addr: MAC address to unsync
218	*
219	* This is a callback function which is called by the in kernel device unsync
220	* functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
221	* populates the tmp_unsync_list, which is later used by ice_remove_mac to
222	* delete the MAC filters from the hardware.
223	*/
224	static int ice_add_mac_to_unsync_list(struct net_device netdev, const* u8 *addr)
225	{
226	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
227	struct ice_vsi *vsi = np->vsi;
228
229	/ Under some circumstances, we might receive a request to delete our*
230	* own device address from our uc list. Because we store the device
231	* address in the VSI's MAC filter list, we need to ignore such
232	* requests and not delete our device address from this list.
233	*/
234	if (ether_addr_equal(addr1: addr, addr2: netdev->dev_addr))
235	return `0`;
236
237	if (ice_fltr_add_mac_to_list(vsi, list: &vsi->tmp_unsync_list, mac: addr,
238	action: ICE_FWD_TO_VSI))
239	return -EINVAL;
240
241	return `0`;
242	}
243
244	/**
245	* ice_vsi_fltr_changed - check if filter state changed
246	* @vsi: VSI to be checked
247	*
248	* returns true if filter state has changed, false otherwise.
249	*/
250	static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
251	{
252	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) \|\|
253	test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
254	}
255
256	/**
257	* ice_set_promisc - Enable promiscuous mode for a given PF
258	* @vsi: the VSI being configured
259	* @promisc_m: mask of promiscuous config bits
260	*
261	*/
262	static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
263	{
264	int status;
265
266	if (vsi->type != ICE_VSI_PF)
267	return `0`;
268
269	if (ice_vsi_has_non_zero_vlans(vsi)) {
270	promisc_m \|= (ICE_PROMISC_VLAN_RX \| ICE_PROMISC_VLAN_TX);
271	status = ice_fltr_set_vlan_vsi_promisc(hw: &vsi->back->hw, vsi,
272	promisc_mask: promisc_m);
273	} else {
274	status = ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
275	promisc_mask: promisc_m, vid: `0`);
276	}
277	if (status && status != -EEXIST)
278	return status;
279
280	netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
281	vsi->vsi_num, promisc_m);
282	return `0`;
283	}
284
285	/**
286	* ice_clear_promisc - Disable promiscuous mode for a given PF
287	* @vsi: the VSI being configured
288	* @promisc_m: mask of promiscuous config bits
289	*
290	*/
291	static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
292	{
293	int status;
294
295	if (vsi->type != ICE_VSI_PF)
296	return `0`;
297
298	if (ice_vsi_has_non_zero_vlans(vsi)) {
299	promisc_m \|= (ICE_PROMISC_VLAN_RX \| ICE_PROMISC_VLAN_TX);
300	status = ice_fltr_clear_vlan_vsi_promisc(hw: &vsi->back->hw, vsi,
301	promisc_mask: promisc_m);
302	} else {
303	status = ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
304	promisc_mask: promisc_m, vid: `0`);
305	}
306
307	netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
308	vsi->vsi_num, promisc_m);
309	return status;
310	}
311
312	/**
313	* ice_vsi_sync_fltr - Update the VSI filter list to the HW
314	* @vsi: ptr to the VSI
315	*
316	* Push any outstanding VSI filter changes through the AdminQ.
317	*/
318	static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
319	{
320	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
321	struct device *dev = ice_pf_to_dev(vsi->back);
322	struct net_device *netdev = vsi->netdev;
323	bool promisc_forced_on = false;
324	struct ice_pf *pf = vsi->back;
325	struct ice_hw *hw = &pf->hw;
326	u32 changed_flags = `0`;
327	int err;
328
329	if (!vsi->netdev)
330	return -EINVAL;
331
332	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
333	usleep_range(min: `1000`, max: `2000`);
334
335	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
336	vsi->current_netdev_flags = vsi->netdev->flags;
337
338	INIT_LIST_HEAD(list: &vsi->tmp_sync_list);
339	INIT_LIST_HEAD(list: &vsi->tmp_unsync_list);
340
341	if (ice_vsi_fltr_changed(vsi)) {
342	clear_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
343	clear_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
344
345	/ grab the netdev's addr_list_lock /
346	netif_addr_lock_bh(dev: netdev);
347	__dev_uc_sync(dev: netdev, sync: ice_add_mac_to_sync_list,
348	unsync: ice_add_mac_to_unsync_list);
349	__dev_mc_sync(dev: netdev, sync: ice_add_mac_to_sync_list,
350	unsync: ice_add_mac_to_unsync_list);
351	/ our temp lists are populated. release lock /
352	netif_addr_unlock_bh(dev: netdev);
353	}
354
355	/ Remove MAC addresses in the unsync list /
356	err = ice_fltr_remove_mac_list(vsi, list: &vsi->tmp_unsync_list);
357	ice_fltr_free_list(dev, h: &vsi->tmp_unsync_list);
358	if (err) {
359	netdev_err(dev: netdev, format: "Failed to delete MAC filters\n");
360	/ if we failed because of alloc failures, just bail /
361	if (err == -ENOMEM)
362	goto out;
363	}
364
365	/ Add MAC addresses in the sync list /
366	err = ice_fltr_add_mac_list(vsi, list: &vsi->tmp_sync_list);
367	ice_fltr_free_list(dev, h: &vsi->tmp_sync_list);
368	/ If filter is added successfully or already exists, do not go into*
369	* 'if' condition and report it as error. Instead continue processing
370	* rest of the function.
371	*/
372	if (err && err != -EEXIST) {
373	netdev_err(dev: netdev, format: "Failed to add MAC filters\n");
374	/ If there is no more space for new umac filters, VSI*
375	* should go into promiscuous mode. There should be some
376	* space reserved for promiscuous filters.
377	*/
378	if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
379	!test_and_set_bit(nr: ICE_FLTR_OVERFLOW_PROMISC,
380	addr: vsi->state)) {
381	promisc_forced_on = true;
382	netdev_warn(dev: netdev, format: "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
383	vsi->vsi_num);
384	} else {
385	goto out;
386	}
387	}
388	err = `0`;
389	/ check for changes in promiscuous modes /
390	if (changed_flags & IFF_ALLMULTI) {
391	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
392	err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
393	if (err) {
394	vsi->current_netdev_flags &= ~IFF_ALLMULTI;
395	goto out_promisc;
396	}
397	} else {
398	/ !(vsi->current_netdev_flags & IFF_ALLMULTI) /
399	err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
400	if (err) {
401	vsi->current_netdev_flags \|= IFF_ALLMULTI;
402	goto out_promisc;
403	}
404	}
405	}
406
407	if (((changed_flags & IFF_PROMISC) \|\| promisc_forced_on) \|\|
408	test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
409	clear_bit(nr: ICE_VSI_PROMISC_CHANGED, addr: vsi->state);
410	if (vsi->current_netdev_flags & IFF_PROMISC) {
411	/ Apply Rx filter rule to get traffic from wire /
412	if (!ice_is_dflt_vsi_in_use(pi: vsi->port_info)) {
413	err = ice_set_dflt_vsi(vsi);
414	if (err && err != -EEXIST) {
415	netdev_err(dev: netdev, format: "Error %d setting default VSI %i Rx rule\n",
416	err, vsi->vsi_num);
417	vsi->current_netdev_flags &=
418	~IFF_PROMISC;
419	goto out_promisc;
420	}
421	err = `0`;
422	vlan_ops->dis_rx_filtering(vsi);
423
424	/ promiscuous mode implies allmulticast so*
425	* that VSIs that are in promiscuous mode are
426	* subscribed to multicast packets coming to
427	* the port
428	*/
429	err = ice_set_promisc(vsi,
430	ICE_MCAST_PROMISC_BITS);
431	if (err)
432	goto out_promisc;
433	}
434	} else {
435	/ Clear Rx filter to remove traffic from wire /
436	if (ice_is_vsi_dflt_vsi(vsi)) {
437	err = ice_clear_dflt_vsi(vsi);
438	if (err) {
439	netdev_err(dev: netdev, format: "Error %d clearing default VSI %i Rx rule\n",
440	err, vsi->vsi_num);
441	vsi->current_netdev_flags \|=
442	IFF_PROMISC;
443	goto out_promisc;
444	}
445	if (vsi->netdev->features &
446	NETIF_F_HW_VLAN_CTAG_FILTER)
447	vlan_ops->ena_rx_filtering(vsi);
448	}
449
450	/ disable allmulti here, but only if allmulti is not*
451	* still enabled for the netdev
452	*/
453	if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
454	err = ice_clear_promisc(vsi,
455	ICE_MCAST_PROMISC_BITS);
456	if (err) {
457	netdev_err(dev: netdev, format: "Error %d clearing multicast promiscuous on VSI %i\n",
458	err, vsi->vsi_num);
459	}
460	}
461	}
462	}
463	goto exit;
464
465	out_promisc:
466	set_bit(nr: ICE_VSI_PROMISC_CHANGED, addr: vsi->state);
467	goto exit;
468	out:
469	/ if something went wrong then set the changed flag so we try again /
470	set_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
471	set_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
472	exit:
473	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
474	return err;
475	}
476
477	/**
478	* ice_sync_fltr_subtask - Sync the VSI filter list with HW
479	* @pf: board private structure
480	*/
481	static void ice_sync_fltr_subtask(struct ice_pf *pf)
482	{
483	int v;
484
485	if (!pf \|\| !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
486	return;
487
488	clear_bit(nr: ICE_FLAG_FLTR_SYNC, addr: pf->flags);
489
490	ice_for_each_vsi(pf, v)
491	if (pf->vsi[v] && ice_vsi_fltr_changed(vsi: pf->vsi[v]) &&
492	ice_vsi_sync_fltr(vsi: pf->vsi[v])) {
493	/ come back and try again later /
494	set_bit(nr: ICE_FLAG_FLTR_SYNC, addr: pf->flags);
495	break;
496	}
497	}
498
499	/**
500	* ice_pf_dis_all_vsi - Pause all VSIs on a PF
501	* @pf: the PF
502	* @locked: is the rtnl_lock already held
503	*/
504	static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
505	{
506	int node;
507	int v;
508
509	ice_for_each_vsi(pf, v)
510	if (pf->vsi[v])
511	ice_dis_vsi(vsi: pf->vsi[v], locked);
512
513	for (node = `0`; node < ICE_MAX_PF_AGG_NODES; node++)
514	pf->pf_agg_node[node].num_vsis = `0`;
515
516	for (node = `0`; node < ICE_MAX_VF_AGG_NODES; node++)
517	pf->vf_agg_node[node].num_vsis = `0`;
518	}
519
520	/**
521	* ice_clear_sw_switch_recipes - clear switch recipes
522	* @pf: board private structure
523	*
524	* Mark switch recipes as not created in sw structures. There are cases where
525	* rules (especially advanced rules) need to be restored, either re-read from
526	* hardware or added again. For example after the reset. 'recp_created' flag
527	* prevents from doing that and need to be cleared upfront.
528	*/
529	static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
530	{
531	struct ice_sw_recipe *recp;
532	u8 i;
533
534	recp = pf->hw.switch_info->recp_list;
535	for (i = `0`; i < ICE_MAX_NUM_RECIPES; i++)
536	recp[i].recp_created = false;
537	}
538
539	/**
540	* ice_prepare_for_reset - prep for reset
541	* @pf: board private structure
542	* @reset_type: reset type requested
543	*
544	* Inform or close all dependent features in prep for reset.
545	*/
546	static void
547	ice_prepare_for_reset(struct ice_pf pf, enum* ice_reset_req reset_type)
548	{
549	struct ice_hw *hw = &pf->hw;
550	struct ice_vsi *vsi;
551	struct ice_vf *vf;
552	unsigned int bkt;
553
554	dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
555
556	/ already prepared for reset /
557	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
558	return;
559
560	ice_unplug_aux_dev(pf);
561
562	/ Notify VFs of impending reset /
563	if (ice_check_sq_alive(hw, cq: &hw->mailboxq))
564	ice_vc_notify_reset(pf);
565
566	/ Disable VFs until reset is completed /
567	mutex_lock(&pf->vfs.table_lock);
568	ice_for_each_vf(pf, bkt, vf)
569	ice_set_vf_state_dis(vf);
570	mutex_unlock(lock: &pf->vfs.table_lock);
571
572	if (ice_is_eswitch_mode_switchdev(pf)) {
573	if (reset_type != ICE_RESET_PFR)
574	ice_clear_sw_switch_recipes(pf);
575	}
576
577	/ release ADQ specific HW and SW resources /
578	vsi = ice_get_main_vsi(pf);
579	if (!vsi)
580	goto skip;
581
582	/ to be on safe side, reset orig_rss_size so that normal flow*
583	* of deciding rss_size can take precedence
584	*/
585	vsi->orig_rss_size = `0`;
586
587	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
588	if (reset_type == ICE_RESET_PFR) {
589	vsi->old_ena_tc = vsi->all_enatc;
590	vsi->old_numtc = vsi->all_numtc;
591	} else {
592	ice_remove_q_channels(vsi, rem_adv_fltr: true);
593
594	/ for other reset type, do not support channel rebuild*
595	* hence reset needed info
596	*/
597	vsi->old_ena_tc = `0`;
598	vsi->all_enatc = `0`;
599	vsi->old_numtc = `0`;
600	vsi->all_numtc = `0`;
601	vsi->req_txq = `0`;
602	vsi->req_rxq = `0`;
603	clear_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
604	memset(&vsi->mqprio_qopt, `0`, sizeof(vsi->mqprio_qopt));
605	}
606	}
607	skip:
608
609	/ clear SW filtering DB /
610	ice_clear_hw_tbls(hw);
611	/ disable the VSIs and their queues that are not already DOWN /
612	ice_pf_dis_all_vsi(pf, locked: false);
613
614	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
615	ice_ptp_prepare_for_reset(pf);
616
617	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
618	ice_gnss_exit(pf);
619
620	if (hw->port_info)
621	ice_sched_clear_port(pi: hw->port_info);
622
623	ice_shutdown_all_ctrlq(hw);
624
625	set_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
626	}
627
628	/**
629	* ice_do_reset - Initiate one of many types of resets
630	* @pf: board private structure
631	* @reset_type: reset type requested before this function was called.
632	*/
633	static void ice_do_reset(struct ice_pf pf, enum* ice_reset_req reset_type)
634	{
635	struct device *dev = ice_pf_to_dev(pf);
636	struct ice_hw *hw = &pf->hw;
637
638	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
639
640	if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
641	dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
642	reset_type = ICE_RESET_CORER;
643	}
644
645	ice_prepare_for_reset(pf, reset_type);
646
647	/ trigger the reset /
648	if (ice_reset(hw, req: reset_type)) {
649	dev_err(dev, "reset %d failed\n", reset_type);
650	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
651	clear_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state);
652	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
653	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
654	clear_bit(nr: ICE_CORER_REQ, addr: pf->state);
655	clear_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
656	wake_up(&pf->reset_wait_queue);
657	return;
658	}
659
660	/ PFR is a bit of a special case because it doesn't result in an OICR*
661	* interrupt. So for PFR, rebuild after the reset and clear the reset-
662	* associated state bits.
663	*/
664	if (reset_type == ICE_RESET_PFR) {
665	pf->pfr_count++;
666	ice_rebuild(pf, reset_type);
667	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
668	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
669	wake_up(&pf->reset_wait_queue);
670	ice_reset_all_vfs(pf);
671	}
672	}
673
674	/**
675	* ice_reset_subtask - Set up for resetting the device and driver
676	* @pf: board private structure
677	*/
678	static void ice_reset_subtask(struct ice_pf *pf)
679	{
680	enum ice_reset_req reset_type = ICE_RESET_INVAL;
681
682	/ When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an*
683	* OICR interrupt. The OICR handler (ice_misc_intr) determines what type
684	* of reset is pending and sets bits in pf->state indicating the reset
685	* type and ICE_RESET_OICR_RECV. So, if the latter bit is set
686	* prepare for pending reset if not already (for PF software-initiated
687	* global resets the software should already be prepared for it as
688	* indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
689	* by firmware or software on other PFs, that bit is not set so prepare
690	* for the reset now), poll for reset done, rebuild and return.
691	*/
692	if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
693	/ Perform the largest reset requested /
694	if (test_and_clear_bit(nr: ICE_CORER_RECV, addr: pf->state))
695	reset_type = ICE_RESET_CORER;
696	if (test_and_clear_bit(nr: ICE_GLOBR_RECV, addr: pf->state))
697	reset_type = ICE_RESET_GLOBR;
698	if (test_and_clear_bit(nr: ICE_EMPR_RECV, addr: pf->state))
699	reset_type = ICE_RESET_EMPR;
700	/ return if no valid reset type requested /
701	if (reset_type == ICE_RESET_INVAL)
702	return;
703	ice_prepare_for_reset(pf, reset_type);
704
705	/ make sure we are ready to rebuild /
706	if (ice_check_reset(hw: &pf->hw)) {
707	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
708	} else {
709	/ done with reset. start rebuild /
710	pf->hw.reset_ongoing = false;
711	ice_rebuild(pf, reset_type);
712	/ clear bit to resume normal operations, but*
713	* ICE_NEEDS_RESTART bit is set in case rebuild failed
714	*/
715	clear_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state);
716	clear_bit(nr: ICE_PREPARED_FOR_RESET, addr: pf->state);
717	clear_bit(nr: ICE_PFR_REQ, addr: pf->state);
718	clear_bit(nr: ICE_CORER_REQ, addr: pf->state);
719	clear_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
720	wake_up(&pf->reset_wait_queue);
721	ice_reset_all_vfs(pf);
722	}
723
724	return;
725	}
726
727	/ No pending resets to finish processing. Check for new resets /
728	if (test_bit(ICE_PFR_REQ, pf->state)) {
729	reset_type = ICE_RESET_PFR;
730	if (pf->lag && pf->lag->bonded) {
731	dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
732	reset_type = ICE_RESET_CORER;
733	}
734	}
735	if (test_bit(ICE_CORER_REQ, pf->state))
736	reset_type = ICE_RESET_CORER;
737	if (test_bit(ICE_GLOBR_REQ, pf->state))
738	reset_type = ICE_RESET_GLOBR;
739	/ If no valid reset type requested just return /
740	if (reset_type == ICE_RESET_INVAL)
741	return;
742
743	/ reset if not already down or busy /
744	if (!test_bit(ICE_DOWN, pf->state) &&
745	!test_bit(ICE_CFG_BUSY, pf->state)) {
746	ice_do_reset(pf, reset_type);
747	}
748	}
749
750	/**
751	* ice_print_topo_conflict - print topology conflict message
752	* @vsi: the VSI whose topology status is being checked
753	*/
754	static void ice_print_topo_conflict(struct ice_vsi *vsi)
755	{
756	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
757	case ICE_AQ_LINK_TOPO_CONFLICT:
758	case ICE_AQ_LINK_MEDIA_CONFLICT:
759	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
760	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
761	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
762	netdev_info(dev: vsi->netdev, format: "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
763	break;
764	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
765	if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
766	netdev_warn(dev: vsi->netdev, format: "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
767	else
768	netdev_err(dev: vsi->netdev, format: "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
769	break;
770	default:
771	break;
772	}
773	}
774
775	/**
776	* ice_print_link_msg - print link up or down message
777	* @vsi: the VSI whose link status is being queried
778	* @isup: boolean for if the link is now up or down
779	*/
780	void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
781	{
782	struct ice_aqc_get_phy_caps_data *caps;
783	const char *an_advertised;
784	const char *fec_req;
785	const char *speed;
786	const char *fec;
787	const char *fc;
788	const char *an;
789	int status;
790
791	if (!vsi)
792	return;
793
794	if (vsi->current_isup == isup)
795	return;
796
797	vsi->current_isup = isup;
798
799	if (!isup) {
800	netdev_info(dev: vsi->netdev, format: "NIC Link is Down\n");
801	return;
802	}
803
804	switch (vsi->port_info->phy.link_info.link_speed) {
805	case ICE_AQ_LINK_SPEED_100GB:
806	speed = "100 G";
807	break;
808	case ICE_AQ_LINK_SPEED_50GB:
809	speed = "50 G";
810	break;
811	case ICE_AQ_LINK_SPEED_40GB:
812	speed = "40 G";
813	break;
814	case ICE_AQ_LINK_SPEED_25GB:
815	speed = "25 G";
816	break;
817	case ICE_AQ_LINK_SPEED_20GB:
818	speed = "20 G";
819	break;
820	case ICE_AQ_LINK_SPEED_10GB:
821	speed = "10 G";
822	break;
823	case ICE_AQ_LINK_SPEED_5GB:
824	speed = "5 G";
825	break;
826	case ICE_AQ_LINK_SPEED_2500MB:
827	speed = "2.5 G";
828	break;
829	case ICE_AQ_LINK_SPEED_1000MB:
830	speed = "1 G";
831	break;
832	case ICE_AQ_LINK_SPEED_100MB:
833	speed = "100 M";
834	break;
835	default:
836	speed = "Unknown ";
837	break;
838	}
839
840	switch (vsi->port_info->fc.current_mode) {
841	case ICE_FC_FULL:
842	fc = "Rx/Tx";
843	break;
844	case ICE_FC_TX_PAUSE:
845	fc = "Tx";
846	break;
847	case ICE_FC_RX_PAUSE:
848	fc = "Rx";
849	break;
850	case ICE_FC_NONE:
851	fc = "None";
852	break;
853	default:
854	fc = "Unknown";
855	break;
856	}
857
858	/ Get FEC mode based on negotiated link info /
859	switch (vsi->port_info->phy.link_info.fec_info) {
860	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
861	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
862	fec = "RS-FEC";
863	break;
864	case ICE_AQ_LINK_25G_KR_FEC_EN:
865	fec = "FC-FEC/BASE-R";
866	break;
867	default:
868	fec = "NONE";
869	break;
870	}
871
872	/ check if autoneg completed, might be false due to not supported /
873	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
874	an = "True";
875	else
876	an = "False";
877
878	/ Get FEC mode requested based on PHY caps last SW configuration /
879	caps = kzalloc(size: sizeof(*caps), GFP_KERNEL);
880	if (!caps) {
881	fec_req = "Unknown";
882	an_advertised = "Unknown";
883	goto done;
884	}
885
886	status = ice_aq_get_phy_caps(pi: vsi->port_info, qual_mods: false,
887	ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
888	if (status)
889	netdev_info(dev: vsi->netdev, format: "Get phy capability failed.\n");
890
891	an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
892
893	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ \|\|
894	caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
895	fec_req = "RS-FEC";
896	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ \|\|
897	caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
898	fec_req = "FC-FEC/BASE-R";
899	else
900	fec_req = "NONE";
901
902	kfree(objp: caps);
903
904	done:
905	netdev_info(dev: vsi->netdev, format: "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
906	speed, fec_req, fec, an_advertised, an, fc);
907	ice_print_topo_conflict(vsi);
908	}
909
910	/**
911	* ice_vsi_link_event - update the VSI's netdev
912	* @vsi: the VSI on which the link event occurred
913	* @link_up: whether or not the VSI needs to be set up or down
914	*/
915	static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
916	{
917	if (!vsi)
918	return;
919
920	if (test_bit(ICE_VSI_DOWN, vsi->state) \|\| !vsi->netdev)
921	return;
922
923	if (vsi->type == ICE_VSI_PF) {
924	if (link_up == netif_carrier_ok(dev: vsi->netdev))
925	return;
926
927	if (link_up) {
928	netif_carrier_on(dev: vsi->netdev);
929	netif_tx_wake_all_queues(dev: vsi->netdev);
930	} else {
931	netif_carrier_off(dev: vsi->netdev);
932	netif_tx_stop_all_queues(dev: vsi->netdev);
933	}
934	}
935	}
936
937	/**
938	* ice_set_dflt_mib - send a default config MIB to the FW
939	* @pf: private PF struct
940	*
941	* This function sends a default configuration MIB to the FW.
942	*
943	* If this function errors out at any point, the driver is still able to
944	* function. The main impact is that LFC may not operate as expected.
945	* Therefore an error state in this function should be treated with a DBG
946	* message and continue on with driver rebuild/reenable.
947	*/
948	static void ice_set_dflt_mib(struct ice_pf *pf)
949	{
950	struct device *dev = ice_pf_to_dev(pf);
951	u8 mib_type, buf, lldpmib = NULL;
952	u16 len, typelen, offset = `0`;
953	struct ice_lldp_org_tlv *tlv;
954	struct ice_hw *hw = &pf->hw;
955	u32 ouisubtype;
956
957	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
958	lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
959	if (!lldpmib) {
960	dev_dbg(dev, "%s Failed to allocate MIB memory\n",
961	__func__);
962	return;
963	}
964
965	/ Add ETS CFG TLV /
966	tlv = (struct ice_lldp_org_tlv *)lldpmib;
967	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) \|
968	ICE_IEEE_ETS_TLV_LEN);
969	tlv->typelen = htons(typelen);
970	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
971	ICE_IEEE_SUBTYPE_ETS_CFG);
972	tlv->ouisubtype = htonl(ouisubtype);
973
974	buf = tlv->tlvinfo;
975	buf[`0`] = `0`;
976
977	/ ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.*
978	* Octets 5 - 12 are BW values, set octet 5 to 100% BW.
979	* Octets 13 - 20 are TSA values - leave as zeros
980	*/
981	buf[`5`] = `0x64`;
982	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
983	offset += len + `2`;
984	tlv = (struct ice_lldp_org_tlv *)
985	((char )tlv + sizeof*(tlv->typelen) + len);
986
987	/ Add ETS REC TLV /
988	buf = tlv->tlvinfo;
989	tlv->typelen = htons(typelen);
990
991	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
992	ICE_IEEE_SUBTYPE_ETS_REC);
993	tlv->ouisubtype = htonl(ouisubtype);
994
995	/ First octet of buf is reserved*
996	* Octets 1 - 4 map UP to TC - all UPs map to zero
997	* Octets 5 - 12 are BW values - set TC 0 to 100%.
998	* Octets 13 - 20 are TSA value - leave as zeros
999	*/
1000	buf[`5`] = `0x64`;
1001	offset += len + `2`;
1002	tlv = (struct ice_lldp_org_tlv *)
1003	((char )tlv + sizeof*(tlv->typelen) + len);
1004
1005	/ Add PFC CFG TLV /
1006	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) \|
1007	ICE_IEEE_PFC_TLV_LEN);
1008	tlv->typelen = htons(typelen);
1009
1010	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) \|
1011	ICE_IEEE_SUBTYPE_PFC_CFG);
1012	tlv->ouisubtype = htonl(ouisubtype);
1013
1014	/ Octet 1 left as all zeros - PFC disabled /
1015	buf[`0`] = `0x08`;
1016	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
1017	offset += len + `2`;
1018
1019	if (ice_aq_set_lldp_mib(hw, mib_type, buf: (void *)lldpmib, buf_size: offset, NULL))
1020	dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
1021
1022	kfree(objp: lldpmib);
1023	}
1024
1025	/**
1026	* ice_check_phy_fw_load - check if PHY FW load failed
1027	* @pf: pointer to PF struct
1028	* @link_cfg_err: bitmap from the link info structure
1029	*
1030	* check if external PHY FW load failed and print an error message if it did
1031	*/
1032	static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
1033	{
1034	if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
1035	clear_bit(nr: ICE_FLAG_PHY_FW_LOAD_FAILED, addr: pf->flags);
1036	return;
1037	}
1038
1039	if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
1040	return;
1041
1042	if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
1043	dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
1044	set_bit(nr: ICE_FLAG_PHY_FW_LOAD_FAILED, addr: pf->flags);
1045	}
1046	}
1047
1048	/**
1049	* ice_check_module_power
1050	* @pf: pointer to PF struct
1051	* @link_cfg_err: bitmap from the link info structure
1052	*
1053	* check module power level returned by a previous call to aq_get_link_info
1054	* and print error messages if module power level is not supported
1055	*/
1056	static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
1057	{
1058	/ if module power level is supported, clear the flag /
1059	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT \|
1060	ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
1061	clear_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1062	return;
1063	}
1064
1065	/ if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the*
1066	* above block didn't clear this bit, there's nothing to do
1067	*/
1068	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
1069	return;
1070
1071	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
1072	dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
1073	set_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1074	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
1075	dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
1076	set_bit(nr: ICE_FLAG_MOD_POWER_UNSUPPORTED, addr: pf->flags);
1077	}
1078	}
1079
1080	/**
1081	* ice_check_link_cfg_err - check if link configuration failed
1082	* @pf: pointer to the PF struct
1083	* @link_cfg_err: bitmap from the link info structure
1084	*
1085	* print if any link configuration failure happens due to the value in the
1086	* link_cfg_err parameter in the link info structure
1087	*/
1088	static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
1089	{
1090	ice_check_module_power(pf, link_cfg_err);
1091	ice_check_phy_fw_load(pf, link_cfg_err);
1092	}
1093
1094	/**
1095	* ice_link_event - process the link event
1096	* @pf: PF that the link event is associated with
1097	* @pi: port_info for the port that the link event is associated with
1098	* @link_up: true if the physical link is up and false if it is down
1099	* @link_speed: current link speed received from the link event
1100	*
1101	* Returns 0 on success and negative on failure
1102	*/
1103	static int
1104	ice_link_event(struct ice_pf pf, struct* ice_port_info *pi, bool link_up,
1105	u16 link_speed)
1106	{
1107	struct device *dev = ice_pf_to_dev(pf);
1108	struct ice_phy_info *phy_info;
1109	struct ice_vsi *vsi;
1110	u16 old_link_speed;
1111	bool old_link;
1112	int status;
1113
1114	phy_info = &pi->phy;
1115	phy_info->link_info_old = phy_info->link_info;
1116
1117	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
1118	old_link_speed = phy_info->link_info_old.link_speed;
1119
1120	/ update the link info structures and re-enable link events,*
1121	* don't bail on failure due to other book keeping needed
1122	*/
1123	status = ice_update_link_info(pi);
1124	if (status)
1125	dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
1126	pi->lport, status,
1127	ice_aq_str(pi->hw->adminq.sq_last_status));
1128
1129	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
1130
1131	/ Check if the link state is up after updating link info, and treat*
1132	* this event as an UP event since the link is actually UP now.
1133	*/
1134	if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
1135	link_up = true;
1136
1137	vsi = ice_get_main_vsi(pf);
1138	if (!vsi \|\| !vsi->port_info)
1139	return -EINVAL;
1140
1141	/ turn off PHY if media was removed /
1142	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
1143	!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
1144	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
1145	ice_set_link(vsi, ena: false);
1146	}
1147
1148	/ if the old link up/down and speed is the same as the new /
1149	if (link_up == old_link && link_speed == old_link_speed)
1150	return `0`;
1151
1152	ice_ptp_link_change(pf, port: pf->hw.pf_id, linkup: link_up);
1153
1154	if (ice_is_dcb_active(pf)) {
1155	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
1156	ice_dcb_rebuild(pf);
1157	} else {
1158	if (link_up)
1159	ice_set_dflt_mib(pf);
1160	}
1161	ice_vsi_link_event(vsi, link_up);
1162	ice_print_link_msg(vsi, isup: link_up);
1163
1164	ice_vc_notify_link_state(pf);
1165
1166	return `0`;
1167	}
1168
1169	/**
1170	* ice_watchdog_subtask - periodic tasks not using event driven scheduling
1171	* @pf: board private structure
1172	*/
1173	static void ice_watchdog_subtask(struct ice_pf *pf)
1174	{
1175	int i;
1176
1177	/ if interface is down do nothing /
1178	if (test_bit(ICE_DOWN, pf->state) \|\|
1179	test_bit(ICE_CFG_BUSY, pf->state))
1180	return;
1181
1182	/ make sure we don't do these things too often /
1183	if (time_before(jiffies,
1184	pf->serv_tmr_prev + pf->serv_tmr_period))
1185	return;
1186
1187	pf->serv_tmr_prev = jiffies;
1188
1189	/ Update the stats for active netdevs so the network stack*
1190	* can look at updated numbers whenever it cares to
1191	*/
1192	ice_update_pf_stats(pf);
1193	ice_for_each_vsi(pf, i)
1194	if (pf->vsi[i] && pf->vsi[i]->netdev)
1195	ice_update_vsi_stats(vsi: pf->vsi[i]);
1196	}
1197
1198	/**
1199	* ice_init_link_events - enable/initialize link events
1200	* @pi: pointer to the port_info instance
1201	*
1202	* Returns -EIO on failure, 0 on success
1203	*/
1204	static int ice_init_link_events(struct ice_port_info *pi)
1205	{
1206	u16 mask;
1207
1208	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN \| ICE_AQ_LINK_EVENT_MEDIA_NA \|
1209	ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL \|
1210	ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
1211
1212	if (ice_aq_set_event_mask(hw: pi->hw, port_num: pi->lport, mask, NULL)) {
1213	dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
1214	pi->lport);
1215	return -EIO;
1216	}
1217
1218	if (ice_aq_get_link_info(pi, ena_lse: true, NULL, NULL)) {
1219	dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
1220	pi->lport);
1221	return -EIO;
1222	}
1223
1224	return `0`;
1225	}
1226
1227	/**
1228	* ice_handle_link_event - handle link event via ARQ
1229	* @pf: PF that the link event is associated with
1230	* @event: event structure containing link status info
1231	*/
1232	static int
1233	ice_handle_link_event(struct ice_pf pf, struct* ice_rq_event_info *event)
1234	{
1235	struct ice_aqc_get_link_status_data *link_data;
1236	struct ice_port_info *port_info;
1237	int status;
1238
1239	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
1240	port_info = pf->hw.port_info;
1241	if (!port_info)
1242	return -EINVAL;
1243
1244	status = ice_link_event(pf, pi: port_info,
1245	link_up: !!(link_data->link_info & ICE_AQ_LINK_UP),
1246	le16_to_cpu(link_data->link_speed));
1247	if (status)
1248	dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
1249	status);
1250
1251	return status;
1252	}
1253
1254	/**
1255	* ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
1256	* @pf: pointer to the PF private structure
1257	* @task: intermediate helper storage and identifier for waiting
1258	* @opcode: the opcode to wait for
1259	*
1260	* Prepares to wait for a specific AdminQ completion event on the ARQ for
1261	* a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
1262	*
1263	* Calls are separated to allow caller registering for event before sending
1264	* the command, which mitigates a race between registering and FW responding.
1265	*
1266	* To obtain only the descriptor contents, pass an task->event with null
1267	* msg_buf. If the complete data buffer is desired, allocate the
1268	* task->event.msg_buf with enough space ahead of time.
1269	*/
1270	void ice_aq_prep_for_event(struct ice_pf pf, struct* ice_aq_task *task,
1271	u16 opcode)
1272	{
1273	INIT_HLIST_NODE(h: &task->entry);
1274	task->opcode = opcode;
1275	task->state = ICE_AQ_TASK_WAITING;
1276
1277	spin_lock_bh(lock: &pf->aq_wait_lock);
1278	hlist_add_head(n: &task->entry, h: &pf->aq_wait_list);
1279	spin_unlock_bh(lock: &pf->aq_wait_lock);
1280	}
1281
1282	/**
1283	* ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1284	* @pf: pointer to the PF private structure
1285	* @task: ptr prepared by ice_aq_prep_for_event()
1286	* @timeout: how long to wait, in jiffies
1287	*
1288	* Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1289	* current thread will be put to sleep until the specified event occurs or
1290	* until the given timeout is reached.
1291	*
1292	* Returns: zero on success, or a negative error code on failure.
1293	*/
1294	int ice_aq_wait_for_event(struct ice_pf pf, struct* ice_aq_task *task,
1295	unsigned long timeout)
1296	{
1297	enum ice_aq_task_state *state = &task->state;
1298	struct device *dev = ice_pf_to_dev(pf);
1299	unsigned long start = jiffies;
1300	long ret;
1301	int err;
1302
1303	ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
1304	*state != ICE_AQ_TASK_WAITING,
1305	timeout);
1306	switch (*state) {
1307	case ICE_AQ_TASK_NOT_PREPARED:
1308	WARN(`1`, "call to %s without ice_aq_prep_for_event()", __func__);
1309	err = -EINVAL;
1310	break;
1311	case ICE_AQ_TASK_WAITING:
1312	err = ret < `0` ? ret : -ETIMEDOUT;
1313	break;
1314	case ICE_AQ_TASK_CANCELED:
1315	err = ret < `0` ? ret : -ECANCELED;
1316	break;
1317	case ICE_AQ_TASK_COMPLETE:
1318	err = ret < `0` ? ret : `0`;
1319	break;
1320	default:
1321	WARN(`1`, "Unexpected AdminQ wait task state %u", *state);
1322	err = -EINVAL;
1323	break;
1324	}
1325
1326	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1327	jiffies_to_msecs(jiffies - start),
1328	jiffies_to_msecs(timeout),
1329	task->opcode);
1330
1331	spin_lock_bh(lock: &pf->aq_wait_lock);
1332	hlist_del(n: &task->entry);
1333	spin_unlock_bh(lock: &pf->aq_wait_lock);
1334
1335	return err;
1336	}
1337
1338	/**
1339	* ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1340	* @pf: pointer to the PF private structure
1341	* @opcode: the opcode of the event
1342	* @event: the event to check
1343	*
1344	* Loops over the current list of pending threads waiting for an AdminQ event.
1345	* For each matching task, copy the contents of the event into the task
1346	* structure and wake up the thread.
1347	*
1348	* If multiple threads wait for the same opcode, they will all be woken up.
1349	*
1350	* Note that event->msg_buf will only be duplicated if the event has a buffer
1351	* with enough space already allocated. Otherwise, only the descriptor and
1352	* message length will be copied.
1353	*
1354	* Returns: true if an event was found, false otherwise
1355	*/
1356	static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
1357	struct ice_rq_event_info *event)
1358	{
1359	struct ice_rq_event_info *task_ev;
1360	struct ice_aq_task *task;
1361	bool found = false;
1362
1363	spin_lock_bh(lock: &pf->aq_wait_lock);
1364	hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
1365	if (task->state != ICE_AQ_TASK_WAITING)
1366	continue;
1367	if (task->opcode != opcode)
1368	continue;
1369
1370	task_ev = &task->event;
1371	memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
1372	task_ev->msg_len = event->msg_len;
1373
1374	/ Only copy the data buffer if a destination was set /
1375	if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
1376	memcpy(task_ev->msg_buf, event->msg_buf,
1377	event->buf_len);
1378	task_ev->buf_len = event->buf_len;
1379	}
1380
1381	task->state = ICE_AQ_TASK_COMPLETE;
1382	found = true;
1383	}
1384	spin_unlock_bh(lock: &pf->aq_wait_lock);
1385
1386	if (found)
1387	wake_up(&pf->aq_wait_queue);
1388	}
1389
1390	/**
1391	* ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1392	* @pf: the PF private structure
1393	*
1394	* Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1395	* This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1396	*/
1397	static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
1398	{
1399	struct ice_aq_task *task;
1400
1401	spin_lock_bh(lock: &pf->aq_wait_lock);
1402	hlist_for_each_entry(task, &pf->aq_wait_list, entry)
1403	task->state = ICE_AQ_TASK_CANCELED;
1404	spin_unlock_bh(lock: &pf->aq_wait_lock);
1405
1406	wake_up(&pf->aq_wait_queue);
1407	}
1408
1409	#define ICE_MBX_OVERFLOW_WATERMARK 64
1410
1411	/**
1412	* __ice_clean_ctrlq - helper function to clean controlq rings
1413	* @pf: ptr to struct ice_pf
1414	* @q_type: specific Control queue type
1415	*/
1416	static int __ice_clean_ctrlq(struct ice_pf pf, enum* ice_ctl_q q_type)
1417	{
1418	struct device *dev = ice_pf_to_dev(pf);
1419	struct ice_rq_event_info event;
1420	struct ice_hw *hw = &pf->hw;
1421	struct ice_ctl_q_info *cq;
1422	u16 pending, i = `0`;
1423	const char *qtype;
1424	u32 oldval, val;
1425
1426	/ Do not clean control queue if/when PF reset fails /
1427	if (test_bit(ICE_RESET_FAILED, pf->state))
1428	return `0`;
1429
1430	switch (q_type) {
1431	case ICE_CTL_Q_ADMIN:
1432	cq = &hw->adminq;
1433	qtype = "Admin";
1434	break;
1435	case ICE_CTL_Q_SB:
1436	cq = &hw->sbq;
1437	qtype = "Sideband";
1438	break;
1439	case ICE_CTL_Q_MAILBOX:
1440	cq = &hw->mailboxq;
1441	qtype = "Mailbox";
1442	/ we are going to try to detect a malicious VF, so set the*
1443	* state to begin detection
1444	*/
1445	hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
1446	break;
1447	default:
1448	dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
1449	return `0`;
1450	}
1451
1452	/ check for error indications - PF_xx_AxQLEN register layout for*
1453	* FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1454	*/
1455	val = rd32(hw, cq->rq.len);
1456	if (val & (PF_FW_ARQLEN_ARQVFE_M \| PF_FW_ARQLEN_ARQOVFL_M \|
1457	PF_FW_ARQLEN_ARQCRIT_M)) {
1458	oldval = val;
1459	if (val & PF_FW_ARQLEN_ARQVFE_M)
1460	dev_dbg(dev, "%s Receive Queue VF Error detected\n",
1461	qtype);
1462	if (val & PF_FW_ARQLEN_ARQOVFL_M) {
1463	dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
1464	qtype);
1465	}
1466	if (val & PF_FW_ARQLEN_ARQCRIT_M)
1467	dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
1468	qtype);
1469	val &= ~(PF_FW_ARQLEN_ARQVFE_M \| PF_FW_ARQLEN_ARQOVFL_M \|
1470	PF_FW_ARQLEN_ARQCRIT_M);
1471	if (oldval != val)
1472	wr32(hw, cq->rq.len, val);
1473	}
1474
1475	val = rd32(hw, cq->sq.len);
1476	if (val & (PF_FW_ATQLEN_ATQVFE_M \| PF_FW_ATQLEN_ATQOVFL_M \|
1477	PF_FW_ATQLEN_ATQCRIT_M)) {
1478	oldval = val;
1479	if (val & PF_FW_ATQLEN_ATQVFE_M)
1480	dev_dbg(dev, "%s Send Queue VF Error detected\n",
1481	qtype);
1482	if (val & PF_FW_ATQLEN_ATQOVFL_M) {
1483	dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
1484	qtype);
1485	}
1486	if (val & PF_FW_ATQLEN_ATQCRIT_M)
1487	dev_dbg(dev, "%s Send Queue Critical Error detected\n",
1488	qtype);
1489	val &= ~(PF_FW_ATQLEN_ATQVFE_M \| PF_FW_ATQLEN_ATQOVFL_M \|
1490	PF_FW_ATQLEN_ATQCRIT_M);
1491	if (oldval != val)
1492	wr32(hw, cq->sq.len, val);
1493	}
1494
1495	event.buf_len = cq->rq_buf_size;
1496	event.msg_buf = kzalloc(size: event.buf_len, GFP_KERNEL);
1497	if (!event.msg_buf)
1498	return `0`;
1499
1500	do {
1501	struct ice_mbx_data data = {};
1502	u16 opcode;
1503	int ret;
1504
1505	ret = ice_clean_rq_elem(hw, cq, e: &event, pending: &pending);
1506	if (ret == -EALREADY)
1507	break;
1508	if (ret) {
1509	dev_err(dev, "%s Receive Queue event error %d\n", qtype,
1510	ret);
1511	break;
1512	}
1513
1514	opcode = le16_to_cpu(event.desc.opcode);
1515
1516	/ Notify any thread that might be waiting for this event /
1517	ice_aq_check_events(pf, opcode, event: &event);
1518
1519	switch (opcode) {
1520	case ice_aqc_opc_get_link_status:
1521	if (ice_handle_link_event(pf, event: &event))
1522	dev_err(dev, "Could not handle link event\n");
1523	break;
1524	case ice_aqc_opc_event_lan_overflow:
1525	ice_vf_lan_overflow_event(pf, event: &event);
1526	break;
1527	case ice_mbx_opc_send_msg_to_pf:
1528	data.num_msg_proc = i;
1529	data.num_pending_arq = pending;
1530	data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries;
1531	data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
1532
1533	ice_vc_process_vf_msg(pf, event: &event, mbxdata: &data);
1534	break;
1535	case ice_aqc_opc_fw_logging:
1536	ice_output_fw_log(hw, desc: &event.desc, buf: event.msg_buf);
1537	break;
1538	case ice_aqc_opc_lldp_set_mib_change:
1539	ice_dcb_process_lldp_set_mib_change(pf, event: &event);
1540	break;
1541	default:
1542	dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
1543	qtype, opcode);
1544	break;
1545	}
1546	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
1547
1548	kfree(objp: event.msg_buf);
1549
1550	return pending && (i == ICE_DFLT_IRQ_WORK);
1551	}
1552
1553	/**
1554	* ice_ctrlq_pending - check if there is a difference between ntc and ntu
1555	* @hw: pointer to hardware info
1556	* @cq: control queue information
1557	*
1558	* returns true if there are pending messages in a queue, false if there aren't
1559	*/
1560	static bool ice_ctrlq_pending(struct ice_hw hw, struct* ice_ctl_q_info *cq)
1561	{
1562	u16 ntu;
1563
1564	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
1565	return cq->rq.next_to_clean != ntu;
1566	}
1567
1568	/**
1569	* ice_clean_adminq_subtask - clean the AdminQ rings
1570	* @pf: board private structure
1571	*/
1572	static void ice_clean_adminq_subtask(struct ice_pf *pf)
1573	{
1574	struct ice_hw *hw = &pf->hw;
1575
1576	if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
1577	return;
1578
1579	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_ADMIN))
1580	return;
1581
1582	clear_bit(nr: ICE_ADMINQ_EVENT_PENDING, addr: pf->state);
1583
1584	/ There might be a situation where new messages arrive to a control*
1585	* queue between processing the last message and clearing the
1586	* EVENT_PENDING bit. So before exiting, check queue head again (using
1587	* ice_ctrlq_pending) and process new messages if any.
1588	*/
1589	if (ice_ctrlq_pending(hw, cq: &hw->adminq))
1590	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_ADMIN);
1591
1592	ice_flush(hw);
1593	}
1594
1595	/**
1596	* ice_clean_mailboxq_subtask - clean the MailboxQ rings
1597	* @pf: board private structure
1598	*/
1599	static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
1600	{
1601	struct ice_hw *hw = &pf->hw;
1602
1603	if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
1604	return;
1605
1606	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_MAILBOX))
1607	return;
1608
1609	clear_bit(nr: ICE_MAILBOXQ_EVENT_PENDING, addr: pf->state);
1610
1611	if (ice_ctrlq_pending(hw, cq: &hw->mailboxq))
1612	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_MAILBOX);
1613
1614	ice_flush(hw);
1615	}
1616
1617	/**
1618	* ice_clean_sbq_subtask - clean the Sideband Queue rings
1619	* @pf: board private structure
1620	*/
1621	static void ice_clean_sbq_subtask(struct ice_pf *pf)
1622	{
1623	struct ice_hw *hw = &pf->hw;
1624
1625	/ Nothing to do here if sideband queue is not supported /
1626	if (!ice_is_sbq_supported(hw)) {
1627	clear_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
1628	return;
1629	}
1630
1631	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
1632	return;
1633
1634	if (__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_SB))
1635	return;
1636
1637	clear_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
1638
1639	if (ice_ctrlq_pending(hw, cq: &hw->sbq))
1640	__ice_clean_ctrlq(pf, q_type: ICE_CTL_Q_SB);
1641
1642	ice_flush(hw);
1643	}
1644
1645	/**
1646	* ice_service_task_schedule - schedule the service task to wake up
1647	* @pf: board private structure
1648	*
1649	* If not already scheduled, this puts the task into the work queue.
1650	*/
1651	void ice_service_task_schedule(struct ice_pf *pf)
1652	{
1653	if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
1654	!test_and_set_bit(nr: ICE_SERVICE_SCHED, addr: pf->state) &&
1655	!test_bit(ICE_NEEDS_RESTART, pf->state))
1656	queue_work(wq: ice_wq, work: &pf->serv_task);
1657	}
1658
1659	/**
1660	* ice_service_task_complete - finish up the service task
1661	* @pf: board private structure
1662	*/
1663	static void ice_service_task_complete(struct ice_pf *pf)
1664	{
1665	WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
1666
1667	/ force memory (pf->state) to sync before next service task /
1668	smp_mb__before_atomic();
1669	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
1670	}
1671
1672	/**
1673	* ice_service_task_stop - stop service task and cancel works
1674	* @pf: board private structure
1675	*
1676	* Return 0 if the ICE_SERVICE_DIS bit was not already set,
1677	* 1 otherwise.
1678	*/
1679	static int ice_service_task_stop(struct ice_pf *pf)
1680	{
1681	int ret;
1682
1683	ret = test_and_set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
1684
1685	if (pf->serv_tmr.function)
1686	del_timer_sync(timer: &pf->serv_tmr);
1687	if (pf->serv_task.func)
1688	cancel_work_sync(work: &pf->serv_task);
1689
1690	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
1691	return ret;
1692	}
1693
1694	/**
1695	* ice_service_task_restart - restart service task and schedule works
1696	* @pf: board private structure
1697	*
1698	* This function is needed for suspend and resume works (e.g WoL scenario)
1699	*/
1700	static void ice_service_task_restart(struct ice_pf *pf)
1701	{
1702	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
1703	ice_service_task_schedule(pf);
1704	}
1705
1706	/**
1707	* ice_service_timer - timer callback to schedule service task
1708	* @t: pointer to timer_list
1709	*/
1710	static void ice_service_timer(struct timer_list *t)
1711	{
1712	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
1713
1714	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: pf->serv_tmr_period + jiffies));
1715	ice_service_task_schedule(pf);
1716	}
1717
1718	/**
1719	* ice_handle_mdd_event - handle malicious driver detect event
1720	* @pf: pointer to the PF structure
1721	*
1722	* Called from service task. OICR interrupt handler indicates MDD event.
1723	* VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1724	* messages are wrapped by netif_msg_[rx\|tx]_err. Since VF Rx MDD events
1725	* disable the queue, the PF can be configured to reset the VF using ethtool
1726	* private flag mdd-auto-reset-vf.
1727	*/
1728	static void ice_handle_mdd_event(struct ice_pf *pf)
1729	{
1730	struct device *dev = ice_pf_to_dev(pf);
1731	struct ice_hw *hw = &pf->hw;
1732	struct ice_vf *vf;
1733	unsigned int bkt;
1734	u32 reg;
1735
1736	if (!test_and_clear_bit(nr: ICE_MDD_EVENT_PENDING, addr: pf->state)) {
1737	/ Since the VF MDD event logging is rate limited, check if*
1738	* there are pending MDD events.
1739	*/
1740	ice_print_vfs_mdd_events(pf);
1741	return;
1742	}
1743
1744	/ find what triggered an MDD event /
1745	reg = rd32(hw, GL_MDET_TX_PQM);
1746	if (reg & GL_MDET_TX_PQM_VALID_M) {
1747	u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
1748	GL_MDET_TX_PQM_PF_NUM_S;
1749	u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
1750	GL_MDET_TX_PQM_VF_NUM_S;
1751	u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
1752	GL_MDET_TX_PQM_MAL_TYPE_S;
1753	u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
1754	GL_MDET_TX_PQM_QNUM_S);
1755
1756	if (netif_msg_tx_err(pf))
1757	dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1758	event, queue, pf_num, vf_num);
1759	wr32(hw, GL_MDET_TX_PQM, `0xffffffff`);
1760	}
1761
1762	reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw));
1763	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
1764	u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
1765	GL_MDET_TX_TCLAN_PF_NUM_S;
1766	u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
1767	GL_MDET_TX_TCLAN_VF_NUM_S;
1768	u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
1769	GL_MDET_TX_TCLAN_MAL_TYPE_S;
1770	u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
1771	GL_MDET_TX_TCLAN_QNUM_S);
1772
1773	if (netif_msg_tx_err(pf))
1774	dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1775	event, queue, pf_num, vf_num);
1776	wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
1777	}
1778
1779	reg = rd32(hw, GL_MDET_RX);
1780	if (reg & GL_MDET_RX_VALID_M) {
1781	u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
1782	GL_MDET_RX_PF_NUM_S;
1783	u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
1784	GL_MDET_RX_VF_NUM_S;
1785	u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
1786	GL_MDET_RX_MAL_TYPE_S;
1787	u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
1788	GL_MDET_RX_QNUM_S);
1789
1790	if (netif_msg_rx_err(pf))
1791	dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1792	event, queue, pf_num, vf_num);
1793	wr32(hw, GL_MDET_RX, `0xffffffff`);
1794	}
1795
1796	/ check to see if this PF caused an MDD event /
1797	reg = rd32(hw, PF_MDET_TX_PQM);
1798	if (reg & PF_MDET_TX_PQM_VALID_M) {
1799	wr32(hw, PF_MDET_TX_PQM, `0xFFFF`);
1800	if (netif_msg_tx_err(pf))
1801	dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
1802	}
1803
1804	reg = rd32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw));
1805	if (reg & PF_MDET_TX_TCLAN_VALID_M) {
1806	wr32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw), `0xffff`);
1807	if (netif_msg_tx_err(pf))
1808	dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1809	}
1810
1811	reg = rd32(hw, PF_MDET_RX);
1812	if (reg & PF_MDET_RX_VALID_M) {
1813	wr32(hw, PF_MDET_RX, `0xFFFF`);
1814	if (netif_msg_rx_err(pf))
1815	dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
1816	}
1817
1818	/ Check to see if one of the VFs caused an MDD event, and then*
1819	* increment counters and set print pending
1820	*/
1821	mutex_lock(&pf->vfs.table_lock);
1822	ice_for_each_vf(pf, bkt, vf) {
1823	reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
1824	if (reg & VP_MDET_TX_PQM_VALID_M) {
1825	wr32(hw, VP_MDET_TX_PQM(vf->vf_id), `0xFFFF`);
1826	vf->mdd_tx_events.count++;
1827	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1828	if (netif_msg_tx_err(pf))
1829	dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1830	vf->vf_id);
1831	}
1832
1833	reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
1834	if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1835	wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), `0xFFFF`);
1836	vf->mdd_tx_events.count++;
1837	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1838	if (netif_msg_tx_err(pf))
1839	dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1840	vf->vf_id);
1841	}
1842
1843	reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
1844	if (reg & VP_MDET_TX_TDPU_VALID_M) {
1845	wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), `0xFFFF`);
1846	vf->mdd_tx_events.count++;
1847	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1848	if (netif_msg_tx_err(pf))
1849	dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1850	vf->vf_id);
1851	}
1852
1853	reg = rd32(hw, VP_MDET_RX(vf->vf_id));
1854	if (reg & VP_MDET_RX_VALID_M) {
1855	wr32(hw, VP_MDET_RX(vf->vf_id), `0xFFFF`);
1856	vf->mdd_rx_events.count++;
1857	set_bit(nr: ICE_MDD_VF_PRINT_PENDING, addr: pf->state);
1858	if (netif_msg_rx_err(pf))
1859	dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
1860	vf->vf_id);
1861
1862	/ Since the queue is disabled on VF Rx MDD events, the*
1863	* PF can be configured to reset the VF through ethtool
1864	* private flag mdd-auto-reset-vf.
1865	*/
1866	if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
1867	/ VF MDD event counters will be cleared by*
1868	* reset, so print the event prior to reset.
1869	*/
1870	ice_print_vf_rx_mdd_event(vf);
1871	ice_reset_vf(vf, flags: ICE_VF_RESET_LOCK);
1872	}
1873	}
1874	}
1875	mutex_unlock(lock: &pf->vfs.table_lock);
1876
1877	ice_print_vfs_mdd_events(pf);
1878	}
1879
1880	/**
1881	* ice_force_phys_link_state - Force the physical link state
1882	* @vsi: VSI to force the physical link state to up/down
1883	* @link_up: true/false indicates to set the physical link to up/down
1884	*
1885	* Force the physical link state by getting the current PHY capabilities from
1886	* hardware and setting the PHY config based on the determined capabilities. If
1887	* link changes a link event will be triggered because both the Enable Automatic
1888	* Link Update and LESM Enable bits are set when setting the PHY capabilities.
1889	*
1890	* Returns 0 on success, negative on failure
1891	*/
1892	static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
1893	{
1894	struct ice_aqc_get_phy_caps_data *pcaps;
1895	struct ice_aqc_set_phy_cfg_data *cfg;
1896	struct ice_port_info *pi;
1897	struct device *dev;
1898	int retcode;
1899
1900	if (!vsi \|\| !vsi->port_info \|\| !vsi->back)
1901	return -EINVAL;
1902	if (vsi->type != ICE_VSI_PF)
1903	return `0`;
1904
1905	dev = ice_pf_to_dev(vsi->back);
1906
1907	pi = vsi->port_info;
1908
1909	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
1910	if (!pcaps)
1911	return -ENOMEM;
1912
1913	retcode = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_ACTIVE_CFG, caps: pcaps,
1914	NULL);
1915	if (retcode) {
1916	dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
1917	vsi->vsi_num, retcode);
1918	retcode = -EIO;
1919	goto out;
1920	}
1921
1922	/ No change in link /
1923	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
1924	link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
1925	goto out;
1926
1927	/ Use the current user PHY configuration. The current user PHY*
1928	* configuration is initialized during probe from PHY capabilities
1929	* software mode, and updated on set PHY configuration.
1930	*/
1931	cfg = kmemdup(p: &pi->phy.curr_user_phy_cfg, size: sizeof(*cfg), GFP_KERNEL);
1932	if (!cfg) {
1933	retcode = -ENOMEM;
1934	goto out;
1935	}
1936
1937	cfg->caps \|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
1938	if (link_up)
1939	cfg->caps \|= ICE_AQ_PHY_ENA_LINK;
1940	else
1941	cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
1942
1943	retcode = ice_aq_set_phy_cfg(hw: &vsi->back->hw, pi, cfg, NULL);
1944	if (retcode) {
1945	dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
1946	vsi->vsi_num, retcode);
1947	retcode = -EIO;
1948	}
1949
1950	kfree(objp: cfg);
1951	out:
1952	kfree(objp: pcaps);
1953	return retcode;
1954	}
1955
1956	/**
1957	* ice_init_nvm_phy_type - Initialize the NVM PHY type
1958	* @pi: port info structure
1959	*
1960	* Initialize nvm_phy_type_[low\|high] for link lenient mode support
1961	*/
1962	static int ice_init_nvm_phy_type(struct ice_port_info *pi)
1963	{
1964	struct ice_aqc_get_phy_caps_data *pcaps;
1965	struct ice_pf *pf = pi->hw->back;
1966	int err;
1967
1968	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
1969	if (!pcaps)
1970	return -ENOMEM;
1971
1972	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
1973	caps: pcaps, NULL);
1974
1975	if (err) {
1976	dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1977	goto out;
1978	}
1979
1980	pf->nvm_phy_type_hi = pcaps->phy_type_high;
1981	pf->nvm_phy_type_lo = pcaps->phy_type_low;
1982
1983	out:
1984	kfree(objp: pcaps);
1985	return err;
1986	}
1987
1988	/**
1989	* ice_init_link_dflt_override - Initialize link default override
1990	* @pi: port info structure
1991	*
1992	* Initialize link default override and PHY total port shutdown during probe
1993	*/
1994	static void ice_init_link_dflt_override(struct ice_port_info *pi)
1995	{
1996	struct ice_link_default_override_tlv *ldo;
1997	struct ice_pf *pf = pi->hw->back;
1998
1999	ldo = &pf->link_dflt_override;
2000	if (ice_get_link_default_override(ldo, pi))
2001	return;
2002
2003	if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
2004	return;
2005
2006	/ Enable Total Port Shutdown (override/replace link-down-on-close*
2007	* ethtool private flag) for ports with Port Disable bit set.
2008	*/
2009	set_bit(nr: ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, addr: pf->flags);
2010	set_bit(nr: ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, addr: pf->flags);
2011	}
2012
2013	/**
2014	* ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
2015	* @pi: port info structure
2016	*
2017	* If default override is enabled, initialize the user PHY cfg speed and FEC
2018	* settings using the default override mask from the NVM.
2019	*
2020	* The PHY should only be configured with the default override settings the
2021	* first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
2022	* is used to indicate that the user PHY cfg default override is initialized
2023	* and the PHY has not been configured with the default override settings. The
2024	* state is set here, and cleared in ice_configure_phy the first time the PHY is
2025	* configured.
2026	*
2027	* This function should be called only if the FW doesn't support default
2028	* configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
2029	*/
2030	static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
2031	{
2032	struct ice_link_default_override_tlv *ldo;
2033	struct ice_aqc_set_phy_cfg_data *cfg;
2034	struct ice_phy_info *phy = &pi->phy;
2035	struct ice_pf *pf = pi->hw->back;
2036
2037	ldo = &pf->link_dflt_override;
2038
2039	/ If link default override is enabled, use to mask NVM PHY capabilities*
2040	* for speed and FEC default configuration.
2041	*/
2042	cfg = &phy->curr_user_phy_cfg;
2043
2044	if (ldo->phy_type_low \|\| ldo->phy_type_high) {
2045	cfg->phy_type_low = pf->nvm_phy_type_lo &
2046	cpu_to_le64(ldo->phy_type_low);
2047	cfg->phy_type_high = pf->nvm_phy_type_hi &
2048	cpu_to_le64(ldo->phy_type_high);
2049	}
2050	cfg->link_fec_opt = ldo->fec_options;
2051	phy->curr_user_fec_req = ICE_FEC_AUTO;
2052
2053	set_bit(nr: ICE_LINK_DEFAULT_OVERRIDE_PENDING, addr: pf->state);
2054	}
2055
2056	/**
2057	* ice_init_phy_user_cfg - Initialize the PHY user configuration
2058	* @pi: port info structure
2059	*
2060	* Initialize the current user PHY configuration, speed, FEC, and FC requested
2061	* mode to default. The PHY defaults are from get PHY capabilities topology
2062	* with media so call when media is first available. An error is returned if
2063	* called when media is not available. The PHY initialization completed state is
2064	* set here.
2065	*
2066	* These configurations are used when setting PHY
2067	* configuration. The user PHY configuration is updated on set PHY
2068	* configuration. Returns 0 on success, negative on failure
2069	*/
2070	static int ice_init_phy_user_cfg(struct ice_port_info *pi)
2071	{
2072	struct ice_aqc_get_phy_caps_data *pcaps;
2073	struct ice_phy_info *phy = &pi->phy;
2074	struct ice_pf *pf = pi->hw->back;
2075	int err;
2076
2077	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2078	return -EIO;
2079
2080	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
2081	if (!pcaps)
2082	return -ENOMEM;
2083
2084	if (ice_fw_supports_report_dflt_cfg(hw: pi->hw))
2085	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_DFLT_CFG,
2086	caps: pcaps, NULL);
2087	else
2088	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2089	caps: pcaps, NULL);
2090	if (err) {
2091	dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
2092	goto err_out;
2093	}
2094
2095	ice_copy_phy_caps_to_cfg(pi, caps: pcaps, cfg: &pi->phy.curr_user_phy_cfg);
2096
2097	/ check if lenient mode is supported and enabled /
2098	if (ice_fw_supports_link_override(hw: pi->hw) &&
2099	!(pcaps->module_compliance_enforcement &
2100	ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
2101	set_bit(nr: ICE_FLAG_LINK_LENIENT_MODE_ENA, addr: pf->flags);
2102
2103	/ if the FW supports default PHY configuration mode, then the driver*
2104	* does not have to apply link override settings. If not,
2105	* initialize user PHY configuration with link override values
2106	*/
2107	if (!ice_fw_supports_report_dflt_cfg(hw: pi->hw) &&
2108	(pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
2109	ice_init_phy_cfg_dflt_override(pi);
2110	goto out;
2111	}
2112	}
2113
2114	/ if link default override is not enabled, set user flow control and*
2115	* FEC settings based on what get_phy_caps returned
2116	*/
2117	phy->curr_user_fec_req = ice_caps_to_fec_mode(caps: pcaps->caps,
2118	fec_options: pcaps->link_fec_options);
2119	phy->curr_user_fc_req = ice_caps_to_fc_mode(caps: pcaps->caps);
2120
2121	out:
2122	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
2123	set_bit(nr: ICE_PHY_INIT_COMPLETE, addr: pf->state);
2124	err_out:
2125	kfree(objp: pcaps);
2126	return err;
2127	}
2128
2129	/**
2130	* ice_configure_phy - configure PHY
2131	* @vsi: VSI of PHY
2132	*
2133	* Set the PHY configuration. If the current PHY configuration is the same as
2134	* the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
2135	* configure the based get PHY capabilities for topology with media.
2136	*/
2137	static int ice_configure_phy(struct ice_vsi *vsi)
2138	{
2139	struct device *dev = ice_pf_to_dev(vsi->back);
2140	struct ice_port_info *pi = vsi->port_info;
2141	struct ice_aqc_get_phy_caps_data *pcaps;
2142	struct ice_aqc_set_phy_cfg_data *cfg;
2143	struct ice_phy_info *phy = &pi->phy;
2144	struct ice_pf *pf = vsi->back;
2145	int err;
2146
2147	/ Ensure we have media as we cannot configure a medialess port /
2148	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2149	return -EPERM;
2150
2151	ice_print_topo_conflict(vsi);
2152
2153	if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
2154	phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
2155	return -EPERM;
2156
2157	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
2158	return ice_force_phys_link_state(vsi, link_up: true);
2159
2160	pcaps = kzalloc(size: sizeof(*pcaps), GFP_KERNEL);
2161	if (!pcaps)
2162	return -ENOMEM;
2163
2164	/ Get current PHY config /
2165	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_ACTIVE_CFG, caps: pcaps,
2166	NULL);
2167	if (err) {
2168	dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
2169	vsi->vsi_num, err);
2170	goto done;
2171	}
2172
2173	/ If PHY enable link is configured and configuration has not changed,*
2174	* there's nothing to do
2175	*/
2176	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
2177	ice_phy_caps_equals_cfg(caps: pcaps, cfg: &phy->curr_user_phy_cfg))
2178	goto done;
2179
2180	/ Use PHY topology as baseline for configuration /
2181	memset(pcaps, `0`, sizeof(*pcaps));
2182	if (ice_fw_supports_report_dflt_cfg(hw: pi->hw))
2183	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_DFLT_CFG,
2184	caps: pcaps, NULL);
2185	else
2186	err = ice_aq_get_phy_caps(pi, qual_mods: false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2187	caps: pcaps, NULL);
2188	if (err) {
2189	dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
2190	vsi->vsi_num, err);
2191	goto done;
2192	}
2193
2194	cfg = kzalloc(size: sizeof(*cfg), GFP_KERNEL);
2195	if (!cfg) {
2196	err = -ENOMEM;
2197	goto done;
2198	}
2199
2200	ice_copy_phy_caps_to_cfg(pi, caps: pcaps, cfg);
2201
2202	/ Speed - If default override pending, use curr_user_phy_cfg set in*
2203	* ice_init_phy_user_cfg_ldo.
2204	*/
2205	if (test_and_clear_bit(nr: ICE_LINK_DEFAULT_OVERRIDE_PENDING,
2206	addr: vsi->back->state)) {
2207	cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
2208	cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
2209	} else {
2210	u64 phy_low = `0`, phy_high = `0`;
2211
2212	ice_update_phy_type(phy_type_low: &phy_low, phy_type_high: &phy_high,
2213	link_speeds_bitmap: pi->phy.curr_user_speed_req);
2214	cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
2215	cfg->phy_type_high = pcaps->phy_type_high &
2216	cpu_to_le64(phy_high);
2217	}
2218
2219	/ Can't provide what was requested; use PHY capabilities /
2220	if (!cfg->phy_type_low && !cfg->phy_type_high) {
2221	cfg->phy_type_low = pcaps->phy_type_low;
2222	cfg->phy_type_high = pcaps->phy_type_high;
2223	}
2224
2225	/ FEC /
2226	ice_cfg_phy_fec(pi, cfg, fec: phy->curr_user_fec_req);
2227
2228	/ Can't provide what was requested; use PHY capabilities /
2229	if (cfg->link_fec_opt !=
2230	(cfg->link_fec_opt & pcaps->link_fec_options)) {
2231	cfg->caps \|= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
2232	cfg->link_fec_opt = pcaps->link_fec_options;
2233	}
2234
2235	/ Flow Control - always supported; no need to check against*
2236	* capabilities
2237	*/
2238	ice_cfg_phy_fc(pi, cfg, req_mode: phy->curr_user_fc_req);
2239
2240	/ Enable link and link update /
2241	cfg->caps \|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT \| ICE_AQ_PHY_ENA_LINK;
2242
2243	err = ice_aq_set_phy_cfg(hw: &pf->hw, pi, cfg, NULL);
2244	if (err)
2245	dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
2246	vsi->vsi_num, err);
2247
2248	kfree(objp: cfg);
2249	done:
2250	kfree(objp: pcaps);
2251	return err;
2252	}
2253
2254	/**
2255	* ice_check_media_subtask - Check for media
2256	* @pf: pointer to PF struct
2257	*
2258	* If media is available, then initialize PHY user configuration if it is not
2259	* been, and configure the PHY if the interface is up.
2260	*/
2261	static void ice_check_media_subtask(struct ice_pf *pf)
2262	{
2263	struct ice_port_info *pi;
2264	struct ice_vsi *vsi;
2265	int err;
2266
2267	/ No need to check for media if it's already present /
2268	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
2269	return;
2270
2271	vsi = ice_get_main_vsi(pf);
2272	if (!vsi)
2273	return;
2274
2275	/ Refresh link info and check if media is present /
2276	pi = vsi->port_info;
2277	err = ice_update_link_info(pi);
2278	if (err)
2279	return;
2280
2281	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
2282
2283	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2284	if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
2285	ice_init_phy_user_cfg(pi);
2286
2287	/ PHY settings are reset on media insertion, reconfigure*
2288	* PHY to preserve settings.
2289	*/
2290	if (test_bit(ICE_VSI_DOWN, vsi->state) &&
2291	test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
2292	return;
2293
2294	err = ice_configure_phy(vsi);
2295	if (!err)
2296	clear_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
2297
2298	/ A Link Status Event will be generated; the event handler*
2299	* will complete bringing the interface up
2300	*/
2301	}
2302	}
2303
2304	/**
2305	* ice_service_task - manage and run subtasks
2306	* @work: pointer to work_struct contained by the PF struct
2307	*/
2308	static void ice_service_task(struct work_struct *work)
2309	{
2310	struct ice_pf pf = container_of(work, struct* ice_pf, serv_task);
2311	unsigned long start_time = jiffies;
2312
2313	/ subtasks /
2314
2315	/ process reset requests first /
2316	ice_reset_subtask(pf);
2317
2318	/ bail if a reset/recovery cycle is pending or rebuild failed /
2319	if (ice_is_reset_in_progress(state: pf->state) \|\|
2320	test_bit(ICE_SUSPENDED, pf->state) \|\|
2321	test_bit(ICE_NEEDS_RESTART, pf->state)) {
2322	ice_service_task_complete(pf);
2323	return;
2324	}
2325
2326	if (test_and_clear_bit(nr: ICE_AUX_ERR_PENDING, addr: pf->state)) {
2327	struct iidc_event *event;
2328
2329	event = kzalloc(size: sizeof(*event), GFP_KERNEL);
2330	if (event) {
2331	set_bit(nr: IIDC_EVENT_CRIT_ERR, addr: event->type);
2332	/ report the entire OICR value to AUX driver /
2333	swap(event->reg, pf->oicr_err_reg);
2334	ice_send_event_to_aux(pf, event);
2335	kfree(objp: event);
2336	}
2337	}
2338
2339	/ unplug aux dev per request, if an unplug request came in*
2340	* while processing a plug request, this will handle it
2341	*/
2342	if (test_and_clear_bit(nr: ICE_FLAG_UNPLUG_AUX_DEV, addr: pf->flags))
2343	ice_unplug_aux_dev(pf);
2344
2345	/ Plug aux device per request /
2346	if (test_and_clear_bit(nr: ICE_FLAG_PLUG_AUX_DEV, addr: pf->flags))
2347	ice_plug_aux_dev(pf);
2348
2349	if (test_and_clear_bit(nr: ICE_FLAG_MTU_CHANGED, addr: pf->flags)) {
2350	struct iidc_event *event;
2351
2352	event = kzalloc(size: sizeof(*event), GFP_KERNEL);
2353	if (event) {
2354	set_bit(nr: IIDC_EVENT_AFTER_MTU_CHANGE, addr: event->type);
2355	ice_send_event_to_aux(pf, event);
2356	kfree(objp: event);
2357	}
2358	}
2359
2360	ice_clean_adminq_subtask(pf);
2361	ice_check_media_subtask(pf);
2362	ice_check_for_hang_subtask(pf);
2363	ice_sync_fltr_subtask(pf);
2364	ice_handle_mdd_event(pf);
2365	ice_watchdog_subtask(pf);
2366
2367	if (ice_is_safe_mode(pf)) {
2368	ice_service_task_complete(pf);
2369	return;
2370	}
2371
2372	ice_process_vflr_event(pf);
2373	ice_clean_mailboxq_subtask(pf);
2374	ice_clean_sbq_subtask(pf);
2375	ice_sync_arfs_fltrs(pf);
2376	ice_flush_fdir_ctx(pf);
2377
2378	/ Clear ICE_SERVICE_SCHED flag to allow scheduling next event /
2379	ice_service_task_complete(pf);
2380
2381	/ If the tasks have taken longer than one service timer period*
2382	* or there is more work to be done, reset the service timer to
2383	* schedule the service task now.
2384	*/
2385	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) \|\|
2386	test_bit(ICE_MDD_EVENT_PENDING, pf->state) \|\|
2387	test_bit(ICE_VFLR_EVENT_PENDING, pf->state) \|\|
2388	test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) \|\|
2389	test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) \|\|
2390	test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) \|\|
2391	test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
2392	mod_timer(timer: &pf->serv_tmr, expires: jiffies);
2393	}
2394
2395	/**
2396	* ice_set_ctrlq_len - helper function to set controlq length
2397	* @hw: pointer to the HW instance
2398	*/
2399	static void ice_set_ctrlq_len(struct ice_hw *hw)
2400	{
2401	hw->adminq.num_rq_entries = ICE_AQ_LEN;
2402	hw->adminq.num_sq_entries = ICE_AQ_LEN;
2403	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
2404	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
2405	hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
2406	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
2407	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2408	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2409	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
2410	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
2411	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2412	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2413	}
2414
2415	/**
2416	* ice_schedule_reset - schedule a reset
2417	* @pf: board private structure
2418	* @reset: reset being requested
2419	*/
2420	int ice_schedule_reset(struct ice_pf pf, enum* ice_reset_req reset)
2421	{
2422	struct device *dev = ice_pf_to_dev(pf);
2423
2424	/ bail out if earlier reset has failed /
2425	if (test_bit(ICE_RESET_FAILED, pf->state)) {
2426	dev_dbg(dev, "earlier reset has failed\n");
2427	return -EIO;
2428	}
2429	/ bail if reset/recovery already in progress /
2430	if (ice_is_reset_in_progress(state: pf->state)) {
2431	dev_dbg(dev, "Reset already in progress\n");
2432	return -EBUSY;
2433	}
2434
2435	switch (reset) {
2436	case ICE_RESET_PFR:
2437	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
2438	break;
2439	case ICE_RESET_CORER:
2440	set_bit(nr: ICE_CORER_REQ, addr: pf->state);
2441	break;
2442	case ICE_RESET_GLOBR:
2443	set_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
2444	break;
2445	default:
2446	return -EINVAL;
2447	}
2448
2449	ice_service_task_schedule(pf);
2450	return `0`;
2451	}
2452
2453	/**
2454	* ice_irq_affinity_notify - Callback for affinity changes
2455	* @notify: context as to what irq was changed
2456	* @mask: the new affinity mask
2457	*
2458	* This is a callback function used by the irq_set_affinity_notifier function
2459	* so that we may register to receive changes to the irq affinity masks.
2460	*/
2461	static void
2462	ice_irq_affinity_notify(struct irq_affinity_notify *notify,
2463	const cpumask_t *mask)
2464	{
2465	struct ice_q_vector *q_vector =
2466	container_of(notify, struct ice_q_vector, affinity_notify);
2467
2468	cpumask_copy(dstp: &q_vector->affinity_mask, srcp: mask);
2469	}
2470
2471	/**
2472	* ice_irq_affinity_release - Callback for affinity notifier release
2473	* @ref: internal core kernel usage
2474	*
2475	* This is a callback function used by the irq_set_affinity_notifier function
2476	* to inform the current notification subscriber that they will no longer
2477	* receive notifications.
2478	*/
2479	static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
2480
2481	/**
2482	* ice_vsi_ena_irq - Enable IRQ for the given VSI
2483	* @vsi: the VSI being configured
2484	*/
2485	static int ice_vsi_ena_irq(struct ice_vsi *vsi)
2486	{
2487	struct ice_hw *hw = &vsi->back->hw;
2488	int i;
2489
2490	ice_for_each_q_vector(vsi, i)
2491	ice_irq_dynamic_ena(hw, vsi, q_vector: vsi->q_vectors[i]);
2492
2493	ice_flush(hw);
2494	return `0`;
2495	}
2496
2497	/**
2498	* ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2499	* @vsi: the VSI being configured
2500	* @basename: name for the vector
2501	*/
2502	static int ice_vsi_req_irq_msix(struct ice_vsi vsi, char* *basename)
2503	{
2504	int q_vectors = vsi->num_q_vectors;
2505	struct ice_pf *pf = vsi->back;
2506	struct device *dev;
2507	int rx_int_idx = `0`;
2508	int tx_int_idx = `0`;
2509	int vector, err;
2510	int irq_num;
2511
2512	dev = ice_pf_to_dev(pf);
2513	for (vector = `0`; vector < q_vectors; vector++) {
2514	struct ice_q_vector *q_vector = vsi->q_vectors[vector];
2515
2516	irq_num = q_vector->irq.virq;
2517
2518	if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
2519	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2520	fmt: "%s-%s-%d", basename, "TxRx", rx_int_idx++);
2521	tx_int_idx++;
2522	} else if (q_vector->rx.rx_ring) {
2523	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2524	fmt: "%s-%s-%d", basename, "rx", rx_int_idx++);
2525	} else if (q_vector->tx.tx_ring) {
2526	snprintf(buf: q_vector->name, size: sizeof(q_vector->name) - `1`,
2527	fmt: "%s-%s-%d", basename, "tx", tx_int_idx++);
2528	} else {
2529	/ skip this unused q_vector /
2530	continue;
2531	}
2532	if (vsi->type == ICE_VSI_CTRL && vsi->vf)
2533	err = devm_request_irq(dev, irq: irq_num, handler: vsi->irq_handler,
2534	IRQF_SHARED, devname: q_vector->name,
2535	dev_id: q_vector);
2536	else
2537	err = devm_request_irq(dev, irq: irq_num, handler: vsi->irq_handler,
2538	irqflags: `0`, devname: q_vector->name, dev_id: q_vector);
2539	if (err) {
2540	netdev_err(dev: vsi->netdev, format: "MSIX request_irq failed, error: %d\n",
2541	err);
2542	goto free_q_irqs;
2543	}
2544
2545	/ register for affinity change notifications /
2546	if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
2547	struct irq_affinity_notify *affinity_notify;
2548
2549	affinity_notify = &q_vector->affinity_notify;
2550	affinity_notify->notify = ice_irq_affinity_notify;
2551	affinity_notify->release = ice_irq_affinity_release;
2552	irq_set_affinity_notifier(irq: irq_num, notify: affinity_notify);
2553	}
2554
2555	/ assign the mask for this irq /
2556	irq_set_affinity_hint(irq: irq_num, m: &q_vector->affinity_mask);
2557	}
2558
2559	err = ice_set_cpu_rx_rmap(vsi);
2560	if (err) {
2561	netdev_err(dev: vsi->netdev, format: "Failed to setup CPU RMAP on VSI %u: %pe\n",
2562	vsi->vsi_num, ERR_PTR(error: err));
2563	goto free_q_irqs;
2564	}
2565
2566	vsi->irqs_ready = true;
2567	return `0`;
2568
2569	free_q_irqs:
2570	while (vector--) {
2571	irq_num = vsi->q_vectors[vector]->irq.virq;
2572	if (!IS_ENABLED(CONFIG_RFS_ACCEL))
2573	irq_set_affinity_notifier(irq: irq_num, NULL);
2574	irq_set_affinity_hint(irq: irq_num, NULL);
2575	devm_free_irq(dev, irq: irq_num, dev_id: &vsi->q_vectors[vector]);
2576	}
2577	return err;
2578	}
2579
2580	/**
2581	* ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2582	* @vsi: VSI to setup Tx rings used by XDP
2583	*
2584	* Return 0 on success and negative value on error
2585	*/
2586	static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
2587	{
2588	struct device *dev = ice_pf_to_dev(vsi->back);
2589	struct ice_tx_desc *tx_desc;
2590	int i, j;
2591
2592	ice_for_each_xdp_txq(vsi, i) {
2593	u16 xdp_q_idx = vsi->alloc_txq + i;
2594	struct ice_ring_stats *ring_stats;
2595	struct ice_tx_ring *xdp_ring;
2596
2597	xdp_ring = kzalloc(size: sizeof(*xdp_ring), GFP_KERNEL);
2598	if (!xdp_ring)
2599	goto free_xdp_rings;
2600
2601	ring_stats = kzalloc(size: sizeof(*ring_stats), GFP_KERNEL);
2602	if (!ring_stats) {
2603	ice_free_tx_ring(tx_ring: xdp_ring);
2604	goto free_xdp_rings;
2605	}
2606
2607	xdp_ring->ring_stats = ring_stats;
2608	xdp_ring->q_index = xdp_q_idx;
2609	xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
2610	xdp_ring->vsi = vsi;
2611	xdp_ring->netdev = NULL;
2612	xdp_ring->dev = dev;
2613	xdp_ring->count = vsi->num_tx_desc;
2614	WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
2615	if (ice_setup_tx_ring(tx_ring: xdp_ring))
2616	goto free_xdp_rings;
2617	ice_set_ring_xdp(ring: xdp_ring);
2618	spin_lock_init(&xdp_ring->tx_lock);
2619	for (j = `0`; j < xdp_ring->count; j++) {
2620	tx_desc = ICE_TX_DESC(xdp_ring, j);
2621	tx_desc->cmd_type_offset_bsz = `0`;
2622	}
2623	}
2624
2625	return `0`;
2626
2627	free_xdp_rings:
2628	for (; i >= `0`; i--) {
2629	if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) {
2630	kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2631	vsi->xdp_rings[i]->ring_stats = NULL;
2632	ice_free_tx_ring(tx_ring: vsi->xdp_rings[i]);
2633	}
2634	}
2635	return -ENOMEM;
2636	}
2637
2638	/**
2639	* ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2640	* @vsi: VSI to set the bpf prog on
2641	* @prog: the bpf prog pointer
2642	*/
2643	static void ice_vsi_assign_bpf_prog(struct ice_vsi vsi, struct* bpf_prog *prog)
2644	{
2645	struct bpf_prog *old_prog;
2646	int i;
2647
2648	old_prog = xchg(&vsi->xdp_prog, prog);
2649	ice_for_each_rxq(vsi, i)
2650	WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
2651
2652	if (old_prog)
2653	bpf_prog_put(prog: old_prog);
2654	}
2655
2656	/**
2657	* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2658	* @vsi: VSI to bring up Tx rings used by XDP
2659	* @prog: bpf program that will be assigned to VSI
2660	*
2661	* Return 0 on success and negative value on error
2662	*/
2663	int ice_prepare_xdp_rings(struct ice_vsi vsi, struct* bpf_prog *prog)
2664	{
2665	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { `0` };
2666	int xdp_rings_rem = vsi->num_xdp_txq;
2667	struct ice_pf *pf = vsi->back;
2668	struct ice_qs_cfg xdp_qs_cfg = {
2669	.qs_mutex = &pf->avail_q_mutex,
2670	.pf_map = pf->avail_txqs,
2671	.pf_map_size = pf->max_pf_txqs,
2672	.q_count = vsi->num_xdp_txq,
2673	.scatter_count = ICE_MAX_SCATTER_TXQS,
2674	.vsi_map = vsi->txq_map,
2675	.vsi_map_offset = vsi->alloc_txq,
2676	.mapping_mode = ICE_VSI_MAP_CONTIG
2677	};
2678	struct device *dev;
2679	int i, v_idx;
2680	int status;
2681
2682	dev = ice_pf_to_dev(pf);
2683	vsi->xdp_rings = devm_kcalloc(dev, n: vsi->num_xdp_txq,
2684	size: sizeof(*vsi->xdp_rings), GFP_KERNEL);
2685	if (!vsi->xdp_rings)
2686	return -ENOMEM;
2687
2688	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
2689	if (__ice_vsi_get_qs(qs_cfg: &xdp_qs_cfg))
2690	goto err_map_xdp;
2691
2692	if (static_key_enabled(&ice_xdp_locking_key))
2693	netdev_warn(dev: vsi->netdev,
2694	format: "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
2695
2696	if (ice_xdp_alloc_setup_rings(vsi))
2697	goto clear_xdp_rings;
2698
2699	/ follow the logic from ice_vsi_map_rings_to_vectors /
2700	ice_for_each_q_vector(vsi, v_idx) {
2701	struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2702	int xdp_rings_per_v, q_id, q_base;
2703
2704	xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
2705	vsi->num_q_vectors - v_idx);
2706	q_base = vsi->num_xdp_txq - xdp_rings_rem;
2707
2708	for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
2709	struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
2710
2711	xdp_ring->q_vector = q_vector;
2712	xdp_ring->next = q_vector->tx.tx_ring;
2713	q_vector->tx.tx_ring = xdp_ring;
2714	}
2715	xdp_rings_rem -= xdp_rings_per_v;
2716	}
2717
2718	ice_for_each_rxq(vsi, i) {
2719	if (static_key_enabled(&ice_xdp_locking_key)) {
2720	vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
2721	} else {
2722	struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
2723	struct ice_tx_ring *ring;
2724
2725	ice_for_each_tx_ring(ring, q_vector->tx) {
2726	if (ice_ring_is_xdp(ring)) {
2727	vsi->rx_rings[i]->xdp_ring = ring;
2728	break;
2729	}
2730	}
2731	}
2732	ice_tx_xsk_pool(vsi, qid: i);
2733	}
2734
2735	/ omit the scheduler update if in reset path; XDP queues will be*
2736	* taken into account at the end of ice_vsi_rebuild, where
2737	* ice_cfg_vsi_lan is being called
2738	*/
2739	if (ice_is_reset_in_progress(state: pf->state))
2740	return `0`;
2741
2742	/ tell the Tx scheduler that right now we have*
2743	* additional queues
2744	*/
2745	for (i = `0`; i < vsi->tc_cfg.numtc; i++)
2746	max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
2747
2748	status = ice_cfg_vsi_lan(pi: vsi->port_info, vsi_handle: vsi->idx, tc_bitmap: vsi->tc_cfg.ena_tc,
2749	max_lanqs: max_txqs);
2750	if (status) {
2751	dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
2752	status);
2753	goto clear_xdp_rings;
2754	}
2755
2756	/ assign the prog only when it's not already present on VSI;*
2757	* this flow is a subject of both ethtool -L and ndo_bpf flows;
2758	* VSI rebuild that happens under ethtool -L can expose us to
2759	* the bpf_prog refcount issues as we would be swapping same
2760	* bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2761	* on it as it would be treated as an 'old_prog'; for ndo_bpf
2762	* this is not harmful as dev_xdp_install bumps the refcount
2763	* before calling the op exposed by the driver;
2764	*/
2765	if (!ice_is_xdp_ena_vsi(vsi))
2766	ice_vsi_assign_bpf_prog(vsi, prog);
2767
2768	return `0`;
2769	clear_xdp_rings:
2770	ice_for_each_xdp_txq(vsi, i)
2771	if (vsi->xdp_rings[i]) {
2772	kfree_rcu(vsi->xdp_rings[i], rcu);
2773	vsi->xdp_rings[i] = NULL;
2774	}
2775
2776	err_map_xdp:
2777	mutex_lock(&pf->avail_q_mutex);
2778	ice_for_each_xdp_txq(vsi, i) {
2779	clear_bit(nr: vsi->txq_map[i + vsi->alloc_txq], addr: pf->avail_txqs);
2780	vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2781	}
2782	mutex_unlock(lock: &pf->avail_q_mutex);
2783
2784	devm_kfree(dev, p: vsi->xdp_rings);
2785	return -ENOMEM;
2786	}
2787
2788	/**
2789	* ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2790	* @vsi: VSI to remove XDP rings
2791	*
2792	* Detach XDP rings from irq vectors, clean up the PF bitmap and free
2793	* resources
2794	*/
2795	int ice_destroy_xdp_rings(struct ice_vsi *vsi)
2796	{
2797	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { `0` };
2798	struct ice_pf *pf = vsi->back;
2799	int i, v_idx;
2800
2801	/ q_vectors are freed in reset path so there's no point in detaching*
2802	* rings; in case of rebuild being triggered not from reset bits
2803	* in pf->state won't be set, so additionally check first q_vector
2804	* against NULL
2805	*/
2806	if (ice_is_reset_in_progress(state: pf->state) \|\| !vsi->q_vectors[`0`])
2807	goto free_qmap;
2808
2809	ice_for_each_q_vector(vsi, v_idx) {
2810	struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2811	struct ice_tx_ring *ring;
2812
2813	ice_for_each_tx_ring(ring, q_vector->tx)
2814	if (!ring->tx_buf \|\| !ice_ring_is_xdp(ring))
2815	break;
2816
2817	/ restore the value of last node prior to XDP setup /
2818	q_vector->tx.tx_ring = ring;
2819	}
2820
2821	free_qmap:
2822	mutex_lock(&pf->avail_q_mutex);
2823	ice_for_each_xdp_txq(vsi, i) {
2824	clear_bit(nr: vsi->txq_map[i + vsi->alloc_txq], addr: pf->avail_txqs);
2825	vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2826	}
2827	mutex_unlock(lock: &pf->avail_q_mutex);
2828
2829	ice_for_each_xdp_txq(vsi, i)
2830	if (vsi->xdp_rings[i]) {
2831	if (vsi->xdp_rings[i]->desc) {
2832	synchronize_rcu();
2833	ice_free_tx_ring(tx_ring: vsi->xdp_rings[i]);
2834	}
2835	kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2836	vsi->xdp_rings[i]->ring_stats = NULL;
2837	kfree_rcu(vsi->xdp_rings[i], rcu);
2838	vsi->xdp_rings[i] = NULL;
2839	}
2840
2841	devm_kfree(ice_pf_to_dev(pf), p: vsi->xdp_rings);
2842	vsi->xdp_rings = NULL;
2843
2844	if (static_key_enabled(&ice_xdp_locking_key))
2845	static_branch_dec(&ice_xdp_locking_key);
2846
2847	if (ice_is_reset_in_progress(state: pf->state) \|\| !vsi->q_vectors[`0`])
2848	return `0`;
2849
2850	ice_vsi_assign_bpf_prog(vsi, NULL);
2851
2852	/ notify Tx scheduler that we destroyed XDP queues and bring*
2853	* back the old number of child nodes
2854	*/
2855	for (i = `0`; i < vsi->tc_cfg.numtc; i++)
2856	max_txqs[i] = vsi->num_txq;
2857
2858	/ change number of XDP Tx queues to 0 /
2859	vsi->num_xdp_txq = `0`;
2860
2861	return ice_cfg_vsi_lan(pi: vsi->port_info, vsi_handle: vsi->idx, tc_bitmap: vsi->tc_cfg.ena_tc,
2862	max_lanqs: max_txqs);
2863	}
2864
2865	/**
2866	* ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2867	* @vsi: VSI to schedule napi on
2868	*/
2869	static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
2870	{
2871	int i;
2872
2873	ice_for_each_rxq(vsi, i) {
2874	struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
2875
2876	if (rx_ring->xsk_pool)
2877	napi_schedule(n: &rx_ring->q_vector->napi);
2878	}
2879	}
2880
2881	/**
2882	* ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
2883	* @vsi: VSI to determine the count of XDP Tx qs
2884	*
2885	* returns 0 if Tx qs count is higher than at least half of CPU count,
2886	* -ENOMEM otherwise
2887	*/
2888	int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
2889	{
2890	u16 avail = ice_get_avail_txq_count(pf: vsi->back);
2891	u16 cpus = num_possible_cpus();
2892
2893	if (avail < cpus / `2`)
2894	return -ENOMEM;
2895
2896	vsi->num_xdp_txq = min_t(u16, avail, cpus);
2897
2898	if (vsi->num_xdp_txq < cpus)
2899	static_branch_inc(&ice_xdp_locking_key);
2900
2901	return `0`;
2902	}
2903
2904	/**
2905	* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2906	* @vsi: Pointer to VSI structure
2907	*/
2908	static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
2909	{
2910	if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
2911	return ICE_RXBUF_1664;
2912	else
2913	return ICE_RXBUF_3072;
2914	}
2915
2916	/**
2917	* ice_xdp_setup_prog - Add or remove XDP eBPF program
2918	* @vsi: VSI to setup XDP for
2919	* @prog: XDP program
2920	* @extack: netlink extended ack
2921	*/
2922	static int
2923	ice_xdp_setup_prog(struct ice_vsi vsi, struct* bpf_prog *prog,
2924	struct netlink_ext_ack *extack)
2925	{
2926	unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
2927	bool if_running = netif_running(dev: vsi->netdev);
2928	int ret = `0`, xdp_ring_err = `0`;
2929
2930	if (prog && !prog->aux->xdp_has_frags) {
2931	if (frame_size > ice_max_xdp_frame_size(vsi)) {
2932	NL_SET_ERR_MSG_MOD(extack,
2933	"MTU is too large for linear frames and XDP prog does not support frags");
2934	return -EOPNOTSUPP;
2935	}
2936	}
2937
2938	/ hot swap progs and avoid toggling link /
2939	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
2940	ice_vsi_assign_bpf_prog(vsi, prog);
2941	return `0`;
2942	}
2943
2944	/ need to stop netdev while setting up the program for Rx rings /
2945	if (if_running && !test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state)) {
2946	ret = ice_down(vsi);
2947	if (ret) {
2948	NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
2949	return ret;
2950	}
2951	}
2952
2953	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
2954	xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
2955	if (xdp_ring_err) {
2956	NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
2957	} else {
2958	xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
2959	if (xdp_ring_err)
2960	NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
2961	}
2962	xdp_features_set_redirect_target(dev: vsi->netdev, support_sg: true);
2963	/ reallocate Rx queues that are used for zero-copy /
2964	xdp_ring_err = ice_realloc_zc_buf(vsi, zc: true);
2965	if (xdp_ring_err)
2966	NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
2967	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
2968	xdp_features_clear_redirect_target(dev: vsi->netdev);
2969	xdp_ring_err = ice_destroy_xdp_rings(vsi);
2970	if (xdp_ring_err)
2971	NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
2972	/ reallocate Rx queues that were used for zero-copy /
2973	xdp_ring_err = ice_realloc_zc_buf(vsi, zc: false);
2974	if (xdp_ring_err)
2975	NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
2976	}
2977
2978	if (if_running)
2979	ret = ice_up(vsi);
2980
2981	if (!ret && prog)
2982	ice_vsi_rx_napi_schedule(vsi);
2983
2984	return (ret \|\| xdp_ring_err) ? -ENOMEM : `0`;
2985	}
2986
2987	/**
2988	* ice_xdp_safe_mode - XDP handler for safe mode
2989	* @dev: netdevice
2990	* @xdp: XDP command
2991	*/
2992	static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
2993	struct netdev_bpf *xdp)
2994	{
2995	NL_SET_ERR_MSG_MOD(xdp->extack,
2996	"Please provide working DDP firmware package in order to use XDP\n"
2997	"Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
2998	return -EOPNOTSUPP;
2999	}
3000
3001	/**
3002	* ice_xdp - implements XDP handler
3003	* @dev: netdevice
3004	* @xdp: XDP command
3005	*/
3006	static int ice_xdp(struct net_device dev, struct* netdev_bpf *xdp)
3007	{
3008	struct ice_netdev_priv *np = netdev_priv(dev);
3009	struct ice_vsi *vsi = np->vsi;
3010
3011	if (vsi->type != ICE_VSI_PF) {
3012	NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
3013	return -EINVAL;
3014	}
3015
3016	switch (xdp->command) {
3017	case XDP_SETUP_PROG:
3018	return ice_xdp_setup_prog(vsi, prog: xdp->prog, extack: xdp->extack);
3019	case XDP_SETUP_XSK_POOL:
3020	return ice_xsk_pool_setup(vsi, pool: xdp->xsk.pool,
3021	qid: xdp->xsk.queue_id);
3022	default:
3023	return -EINVAL;
3024	}
3025	}
3026
3027	/**
3028	* ice_ena_misc_vector - enable the non-queue interrupts
3029	* @pf: board private structure
3030	*/
3031	static void ice_ena_misc_vector(struct ice_pf *pf)
3032	{
3033	struct ice_hw *hw = &pf->hw;
3034	u32 val;
3035
3036	/ Disable anti-spoof detection interrupt to prevent spurious event*
3037	* interrupts during a function reset. Anti-spoof functionally is
3038	* still supported.
3039	*/
3040	val = rd32(hw, GL_MDCK_TX_TDPU);
3041	val \|= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
3042	wr32(hw, GL_MDCK_TX_TDPU, val);
3043
3044	/ clear things first /
3045	wr32(hw, PFINT_OICR_ENA, `0`); / disable all /
3046	rd32(hw, PFINT_OICR); / read to clear /
3047
3048	val = (PFINT_OICR_ECC_ERR_M \|
3049	PFINT_OICR_MAL_DETECT_M \|
3050	PFINT_OICR_GRST_M \|
3051	PFINT_OICR_PCI_EXCEPTION_M \|
3052	PFINT_OICR_VFLR_M \|
3053	PFINT_OICR_HMC_ERR_M \|
3054	PFINT_OICR_PE_PUSH_M \|
3055	PFINT_OICR_PE_CRITERR_M);
3056
3057	wr32(hw, PFINT_OICR_ENA, val);
3058
3059	/ SW_ITR_IDX = 0, but don't change INTENA /
3060	wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
3061	GLINT_DYN_CTL_SW_ITR_INDX_M \| GLINT_DYN_CTL_INTENA_MSK_M);
3062	}
3063
3064	/**
3065	* ice_misc_intr - misc interrupt handler
3066	* @irq: interrupt number
3067	* @data: pointer to a q_vector
3068	*/
3069	static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
3070	{
3071	struct ice_pf pf = (struct* ice_pf *)data;
3072	struct ice_hw *hw = &pf->hw;
3073	struct device *dev;
3074	u32 oicr, ena_mask;
3075
3076	dev = ice_pf_to_dev(pf);
3077	set_bit(nr: ICE_ADMINQ_EVENT_PENDING, addr: pf->state);
3078	set_bit(nr: ICE_MAILBOXQ_EVENT_PENDING, addr: pf->state);
3079	set_bit(nr: ICE_SIDEBANDQ_EVENT_PENDING, addr: pf->state);
3080
3081	oicr = rd32(hw, PFINT_OICR);
3082	ena_mask = rd32(hw, PFINT_OICR_ENA);
3083
3084	if (oicr & PFINT_OICR_SWINT_M) {
3085	ena_mask &= ~PFINT_OICR_SWINT_M;
3086	pf->sw_int_count++;
3087	}
3088
3089	if (oicr & PFINT_OICR_MAL_DETECT_M) {
3090	ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
3091	set_bit(nr: ICE_MDD_EVENT_PENDING, addr: pf->state);
3092	}
3093	if (oicr & PFINT_OICR_VFLR_M) {
3094	/ disable any further VFLR event notifications /
3095	if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
3096	u32 reg = rd32(hw, PFINT_OICR_ENA);
3097
3098	reg &= ~PFINT_OICR_VFLR_M;
3099	wr32(hw, PFINT_OICR_ENA, reg);
3100	} else {
3101	ena_mask &= ~PFINT_OICR_VFLR_M;
3102	set_bit(nr: ICE_VFLR_EVENT_PENDING, addr: pf->state);
3103	}
3104	}
3105
3106	if (oicr & PFINT_OICR_GRST_M) {
3107	u32 reset;
3108
3109	/ we have a reset warning /
3110	ena_mask &= ~PFINT_OICR_GRST_M;
3111	reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
3112	GLGEN_RSTAT_RESET_TYPE_S;
3113
3114	if (reset == ICE_RESET_CORER)
3115	pf->corer_count++;
3116	else if (reset == ICE_RESET_GLOBR)
3117	pf->globr_count++;
3118	else if (reset == ICE_RESET_EMPR)
3119	pf->empr_count++;
3120	else
3121	dev_dbg(dev, "Invalid reset type %d\n", reset);
3122
3123	/ If a reset cycle isn't already in progress, we set a bit in*
3124	* pf->state so that the service task can start a reset/rebuild.
3125	*/
3126	if (!test_and_set_bit(nr: ICE_RESET_OICR_RECV, addr: pf->state)) {
3127	if (reset == ICE_RESET_CORER)
3128	set_bit(nr: ICE_CORER_RECV, addr: pf->state);
3129	else if (reset == ICE_RESET_GLOBR)
3130	set_bit(nr: ICE_GLOBR_RECV, addr: pf->state);
3131	else
3132	set_bit(nr: ICE_EMPR_RECV, addr: pf->state);
3133
3134	/ There are couple of different bits at play here.*
3135	* hw->reset_ongoing indicates whether the hardware is
3136	* in reset. This is set to true when a reset interrupt
3137	* is received and set back to false after the driver
3138	* has determined that the hardware is out of reset.
3139	*
3140	* ICE_RESET_OICR_RECV in pf->state indicates
3141	* that a post reset rebuild is required before the
3142	* driver is operational again. This is set above.
3143	*
3144	* As this is the start of the reset/rebuild cycle, set
3145	* both to indicate that.
3146	*/
3147	hw->reset_ongoing = true;
3148	}
3149	}
3150
3151	if (oicr & PFINT_OICR_TSYN_TX_M) {
3152	ena_mask &= ~PFINT_OICR_TSYN_TX_M;
3153	if (!hw->reset_ongoing && ice_ptp_pf_handles_tx_interrupt(pf))
3154	set_bit(nr: ICE_MISC_THREAD_TX_TSTAMP, addr: pf->misc_thread);
3155	}
3156
3157	if (oicr & PFINT_OICR_TSYN_EVNT_M) {
3158	u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
3159	u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
3160
3161	ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
3162
3163	if (ice_pf_src_tmr_owned(pf)) {
3164	/ Save EVENTs from GLTSYN register /
3165	pf->ptp.ext_ts_irq \|= gltsyn_stat &
3166	(GLTSYN_STAT_EVENT0_M \|
3167	GLTSYN_STAT_EVENT1_M \|
3168	GLTSYN_STAT_EVENT2_M);
3169
3170	set_bit(nr: ICE_MISC_THREAD_EXTTS_EVENT, addr: pf->misc_thread);
3171	}
3172	}
3173
3174	#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M \| PFINT_OICR_HMC_ERR_M \| PFINT_OICR_PE_PUSH_M)
3175	if (oicr & ICE_AUX_CRIT_ERR) {
3176	pf->oicr_err_reg \|= oicr;
3177	set_bit(nr: ICE_AUX_ERR_PENDING, addr: pf->state);
3178	ena_mask &= ~ICE_AUX_CRIT_ERR;
3179	}
3180
3181	/ Report any remaining unexpected interrupts /
3182	oicr &= ena_mask;
3183	if (oicr) {
3184	dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
3185	/ If a critical error is pending there is no choice but to*
3186	* reset the device.
3187	*/
3188	if (oicr & (PFINT_OICR_PCI_EXCEPTION_M \|
3189	PFINT_OICR_ECC_ERR_M)) {
3190	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
3191	}
3192	}
3193
3194	return IRQ_WAKE_THREAD;
3195	}
3196
3197	/**
3198	* ice_misc_intr_thread_fn - misc interrupt thread function
3199	* @irq: interrupt number
3200	* @data: pointer to a q_vector
3201	*/
3202	static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
3203	{
3204	struct ice_pf *pf = data;
3205	struct ice_hw *hw;
3206
3207	hw = &pf->hw;
3208
3209	if (ice_is_reset_in_progress(state: pf->state))
3210	return IRQ_HANDLED;
3211
3212	ice_service_task_schedule(pf);
3213
3214	if (test_and_clear_bit(nr: ICE_MISC_THREAD_EXTTS_EVENT, addr: pf->misc_thread))
3215	ice_ptp_extts_event(pf);
3216
3217	if (test_and_clear_bit(nr: ICE_MISC_THREAD_TX_TSTAMP, addr: pf->misc_thread)) {
3218	/ Process outstanding Tx timestamps. If there is more work,*
3219	* re-arm the interrupt to trigger again.
3220	*/
3221	if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) {
3222	wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
3223	ice_flush(hw);
3224	}
3225	}
3226
3227	ice_irq_dynamic_ena(hw, NULL, NULL);
3228
3229	return IRQ_HANDLED;
3230	}
3231
3232	/**
3233	* ice_dis_ctrlq_interrupts - disable control queue interrupts
3234	* @hw: pointer to HW structure
3235	*/
3236	static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
3237	{
3238	/ disable Admin queue Interrupt causes /
3239	wr32(hw, PFINT_FW_CTL,
3240	rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
3241
3242	/ disable Mailbox queue Interrupt causes /
3243	wr32(hw, PFINT_MBX_CTL,
3244	rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
3245
3246	wr32(hw, PFINT_SB_CTL,
3247	rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
3248
3249	/ disable Control queue Interrupt causes /
3250	wr32(hw, PFINT_OICR_CTL,
3251	rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
3252
3253	ice_flush(hw);
3254	}
3255
3256	/**
3257	* ice_free_irq_msix_misc - Unroll misc vector setup
3258	* @pf: board private structure
3259	*/
3260	static void ice_free_irq_msix_misc(struct ice_pf *pf)
3261	{
3262	int misc_irq_num = pf->oicr_irq.virq;
3263	struct ice_hw *hw = &pf->hw;
3264
3265	ice_dis_ctrlq_interrupts(hw);
3266
3267	/ disable OICR interrupt /
3268	wr32(hw, PFINT_OICR_ENA, `0`);
3269	ice_flush(hw);
3270
3271	synchronize_irq(irq: misc_irq_num);
3272	devm_free_irq(ice_pf_to_dev(pf), irq: misc_irq_num, dev_id: pf);
3273
3274	ice_free_irq(pf, map: pf->oicr_irq);
3275	}
3276
3277	/**
3278	* ice_ena_ctrlq_interrupts - enable control queue interrupts
3279	* @hw: pointer to HW structure
3280	* @reg_idx: HW vector index to associate the control queue interrupts with
3281	*/
3282	static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
3283	{
3284	u32 val;
3285
3286	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) \|
3287	PFINT_OICR_CTL_CAUSE_ENA_M);
3288	wr32(hw, PFINT_OICR_CTL, val);
3289
3290	/ enable Admin queue Interrupt causes /
3291	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) \|
3292	PFINT_FW_CTL_CAUSE_ENA_M);
3293	wr32(hw, PFINT_FW_CTL, val);
3294
3295	/ enable Mailbox queue Interrupt causes /
3296	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) \|
3297	PFINT_MBX_CTL_CAUSE_ENA_M);
3298	wr32(hw, PFINT_MBX_CTL, val);
3299
3300	/ This enables Sideband queue Interrupt causes /
3301	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) \|
3302	PFINT_SB_CTL_CAUSE_ENA_M);
3303	wr32(hw, PFINT_SB_CTL, val);
3304
3305	ice_flush(hw);
3306	}
3307
3308	/**
3309	* ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3310	* @pf: board private structure
3311	*
3312	* This sets up the handler for MSIX 0, which is used to manage the
3313	* non-queue interrupts, e.g. AdminQ and errors. This is not used
3314	* when in MSI or Legacy interrupt mode.
3315	*/
3316	static int ice_req_irq_msix_misc(struct ice_pf *pf)
3317	{
3318	struct device *dev = ice_pf_to_dev(pf);
3319	struct ice_hw *hw = &pf->hw;
3320	struct msi_map oicr_irq;
3321	int err = `0`;
3322
3323	if (!pf->int_name[`0`])
3324	snprintf(buf: pf->int_name, size: sizeof(pf->int_name) - `1`, fmt: "%s-%s:misc",
3325	dev_driver_string(dev), dev_name(dev));
3326
3327	/ Do not request IRQ but do enable OICR interrupt since settings are*
3328	* lost during reset. Note that this function is called only during
3329	* rebuild path and not while reset is in progress.
3330	*/
3331	if (ice_is_reset_in_progress(state: pf->state))
3332	goto skip_req_irq;
3333
3334	/ reserve one vector in irq_tracker for misc interrupts /
3335	oicr_irq = ice_alloc_irq(pf, dyn_only: false);
3336	if (oicr_irq.index < `0`)
3337	return oicr_irq.index;
3338
3339	pf->oicr_irq = oicr_irq;
3340	err = devm_request_threaded_irq(dev, irq: pf->oicr_irq.virq, handler: ice_misc_intr,
3341	thread_fn: ice_misc_intr_thread_fn, irqflags: `0`,
3342	devname: pf->int_name, dev_id: pf);
3343	if (err) {
3344	dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
3345	pf->int_name, err);
3346	ice_free_irq(pf, map: pf->oicr_irq);
3347	return err;
3348	}
3349
3350	skip_req_irq:
3351	ice_ena_misc_vector(pf);
3352
3353	ice_ena_ctrlq_interrupts(hw, reg_idx: pf->oicr_irq.index);
3354	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index),
3355	ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
3356
3357	ice_flush(hw);
3358	ice_irq_dynamic_ena(hw, NULL, NULL);
3359
3360	return `0`;
3361	}
3362
3363	/**
3364	* ice_napi_add - register NAPI handler for the VSI
3365	* @vsi: VSI for which NAPI handler is to be registered
3366	*
3367	* This function is only called in the driver's load path. Registering the NAPI
3368	* handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
3369	* reset/rebuild, etc.)
3370	*/
3371	static void ice_napi_add(struct ice_vsi *vsi)
3372	{
3373	int v_idx;
3374
3375	if (!vsi->netdev)
3376	return;
3377
3378	ice_for_each_q_vector(vsi, v_idx)
3379	netif_napi_add(dev: vsi->netdev, napi: &vsi->q_vectors[v_idx]->napi,
3380	poll: ice_napi_poll);
3381	}
3382
3383	/**
3384	* ice_set_ops - set netdev and ethtools ops for the given netdev
3385	* @vsi: the VSI associated with the new netdev
3386	*/
3387	static void ice_set_ops(struct ice_vsi *vsi)
3388	{
3389	struct net_device *netdev = vsi->netdev;
3390	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3391
3392	if (ice_is_safe_mode(pf)) {
3393	netdev->netdev_ops = &ice_netdev_safe_mode_ops;
3394	ice_set_ethtool_safe_mode_ops(netdev);
3395	return;
3396	}
3397
3398	netdev->netdev_ops = &ice_netdev_ops;
3399	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
3400	ice_set_ethtool_ops(netdev);
3401
3402	if (vsi->type != ICE_VSI_PF)
3403	return;
3404
3405	netdev->xdp_features = NETDEV_XDP_ACT_BASIC \| NETDEV_XDP_ACT_REDIRECT \|
3406	NETDEV_XDP_ACT_XSK_ZEROCOPY \|
3407	NETDEV_XDP_ACT_RX_SG;
3408	netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
3409	}
3410
3411	/**
3412	* ice_set_netdev_features - set features for the given netdev
3413	* @netdev: netdev instance
3414	*/
3415	static void ice_set_netdev_features(struct net_device *netdev)
3416	{
3417	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3418	bool is_dvm_ena = ice_is_dvm_ena(hw: &pf->hw);
3419	netdev_features_t csumo_features;
3420	netdev_features_t vlano_features;
3421	netdev_features_t dflt_features;
3422	netdev_features_t tso_features;
3423
3424	if (ice_is_safe_mode(pf)) {
3425	/ safe mode /
3426	netdev->features = NETIF_F_SG \| NETIF_F_HIGHDMA;
3427	netdev->hw_features = netdev->features;
3428	return;
3429	}
3430
3431	dflt_features = NETIF_F_SG \|
3432	NETIF_F_HIGHDMA \|
3433	NETIF_F_NTUPLE \|
3434	NETIF_F_RXHASH;
3435
3436	csumo_features = NETIF_F_RXCSUM \|
3437	NETIF_F_IP_CSUM \|
3438	NETIF_F_SCTP_CRC \|
3439	NETIF_F_IPV6_CSUM;
3440
3441	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER \|
3442	NETIF_F_HW_VLAN_CTAG_TX \|
3443	NETIF_F_HW_VLAN_CTAG_RX;
3444
3445	/ Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) /
3446	if (is_dvm_ena)
3447	vlano_features \|= NETIF_F_HW_VLAN_STAG_FILTER;
3448
3449	tso_features = NETIF_F_TSO \|
3450	NETIF_F_TSO_ECN \|
3451	NETIF_F_TSO6 \|
3452	NETIF_F_GSO_GRE \|
3453	NETIF_F_GSO_UDP_TUNNEL \|
3454	NETIF_F_GSO_GRE_CSUM \|
3455	NETIF_F_GSO_UDP_TUNNEL_CSUM \|
3456	NETIF_F_GSO_PARTIAL \|
3457	NETIF_F_GSO_IPXIP4 \|
3458	NETIF_F_GSO_IPXIP6 \|
3459	NETIF_F_GSO_UDP_L4;
3460
3461	netdev->gso_partial_features \|= NETIF_F_GSO_UDP_TUNNEL_CSUM \|
3462	NETIF_F_GSO_GRE_CSUM;
3463	/ set features that user can change /
3464	netdev->hw_features = dflt_features \| csumo_features \|
3465	vlano_features \| tso_features;
3466
3467	/ add support for HW_CSUM on packets with MPLS header /
3468	netdev->mpls_features = NETIF_F_HW_CSUM \|
3469	NETIF_F_TSO \|
3470	NETIF_F_TSO6;
3471
3472	/ enable features /
3473	netdev->features \|= netdev->hw_features;
3474
3475	netdev->hw_features \|= NETIF_F_HW_TC;
3476	netdev->hw_features \|= NETIF_F_LOOPBACK;
3477
3478	/ encap and VLAN devices inherit default, csumo and tso features /
3479	netdev->hw_enc_features \|= dflt_features \| csumo_features \|
3480	tso_features;
3481	netdev->vlan_features \|= dflt_features \| csumo_features \|
3482	tso_features;
3483
3484	/ advertise support but don't enable by default since only one type of*
3485	* VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
3486	* type turns on the other has to be turned off. This is enforced by the
3487	* ice_fix_features() ndo callback.
3488	*/
3489	if (is_dvm_ena)
3490	netdev->hw_features \|= NETIF_F_HW_VLAN_STAG_RX \|
3491	NETIF_F_HW_VLAN_STAG_TX;
3492
3493	/ Leave CRC / FCS stripping enabled by default, but allow the value to*
3494	* be changed at runtime
3495	*/
3496	netdev->hw_features \|= NETIF_F_RXFCS;
3497
3498	netif_set_tso_max_size(dev: netdev, ICE_MAX_TSO_SIZE);
3499	}
3500
3501	/**
3502	* ice_fill_rss_lut - Fill the RSS lookup table with default values
3503	* @lut: Lookup table
3504	* @rss_table_size: Lookup table size
3505	* @rss_size: Range of queue number for hashing
3506	*/
3507	void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
3508	{
3509	u16 i;
3510
3511	for (i = `0`; i < rss_table_size; i++)
3512	lut[i] = i % rss_size;
3513	}
3514
3515	/**
3516	* ice_pf_vsi_setup - Set up a PF VSI
3517	* @pf: board private structure
3518	* @pi: pointer to the port_info instance
3519	*
3520	* Returns pointer to the successfully allocated VSI software struct
3521	* on success, otherwise returns NULL on failure.
3522	*/
3523	static struct ice_vsi *
3524	ice_pf_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3525	{
3526	struct ice_vsi_cfg_params params = {};
3527
3528	params.type = ICE_VSI_PF;
3529	params.pi = pi;
3530	params.flags = ICE_VSI_FLAG_INIT;
3531
3532	return ice_vsi_setup(pf, params: &params);
3533	}
3534
3535	static struct ice_vsi *
3536	ice_chnl_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi,
3537	struct ice_channel *ch)
3538	{
3539	struct ice_vsi_cfg_params params = {};
3540
3541	params.type = ICE_VSI_CHNL;
3542	params.pi = pi;
3543	params.ch = ch;
3544	params.flags = ICE_VSI_FLAG_INIT;
3545
3546	return ice_vsi_setup(pf, params: &params);
3547	}
3548
3549	/**
3550	* ice_ctrl_vsi_setup - Set up a control VSI
3551	* @pf: board private structure
3552	* @pi: pointer to the port_info instance
3553	*
3554	* Returns pointer to the successfully allocated VSI software struct
3555	* on success, otherwise returns NULL on failure.
3556	*/
3557	static struct ice_vsi *
3558	ice_ctrl_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3559	{
3560	struct ice_vsi_cfg_params params = {};
3561
3562	params.type = ICE_VSI_CTRL;
3563	params.pi = pi;
3564	params.flags = ICE_VSI_FLAG_INIT;
3565
3566	return ice_vsi_setup(pf, params: &params);
3567	}
3568
3569	/**
3570	* ice_lb_vsi_setup - Set up a loopback VSI
3571	* @pf: board private structure
3572	* @pi: pointer to the port_info instance
3573	*
3574	* Returns pointer to the successfully allocated VSI software struct
3575	* on success, otherwise returns NULL on failure.
3576	*/
3577	struct ice_vsi *
3578	ice_lb_vsi_setup(struct ice_pf pf, struct* ice_port_info *pi)
3579	{
3580	struct ice_vsi_cfg_params params = {};
3581
3582	params.type = ICE_VSI_LB;
3583	params.pi = pi;
3584	params.flags = ICE_VSI_FLAG_INIT;
3585
3586	return ice_vsi_setup(pf, params: &params);
3587	}
3588
3589	/**
3590	* ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3591	* @netdev: network interface to be adjusted
3592	* @proto: VLAN TPID
3593	* @vid: VLAN ID to be added
3594	*
3595	* net_device_ops implementation for adding VLAN IDs
3596	*/
3597	static int
3598	ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3599	{
3600	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
3601	struct ice_vsi_vlan_ops *vlan_ops;
3602	struct ice_vsi *vsi = np->vsi;
3603	struct ice_vlan vlan;
3604	int ret;
3605
3606	/ VLAN 0 is added by default during load/reset /
3607	if (!vid)
3608	return `0`;
3609
3610	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
3611	usleep_range(min: `1000`, max: `2000`);
3612
3613	/ Add multicast promisc rule for the VLAN ID to be added if*
3614	* all-multicast is currently enabled.
3615	*/
3616	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3617	ret = ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3618	ICE_MCAST_VLAN_PROMISC_BITS,
3619	vid);
3620	if (ret)
3621	goto finish;
3622	}
3623
3624	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3625
3626	/ Add a switch rule for this VLAN ID so its corresponding VLAN tagged*
3627	* packets aren't pruned by the device's internal switch on Rx
3628	*/
3629	vlan = ICE_VLAN(be16_to_cpu(proto), vid, `0`);
3630	ret = vlan_ops->add_vlan(vsi, &vlan);
3631	if (ret)
3632	goto finish;
3633
3634	/ If all-multicast is currently enabled and this VLAN ID is only one*
3635	* besides VLAN-0 we have to update look-up type of multicast promisc
3636	* rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
3637	*/
3638	if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
3639	ice_vsi_num_non_zero_vlans(vsi) == `1`) {
3640	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3641	ICE_MCAST_PROMISC_BITS, vid: `0`);
3642	ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3643	ICE_MCAST_VLAN_PROMISC_BITS, vid: `0`);
3644	}
3645
3646	finish:
3647	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
3648
3649	return ret;
3650	}
3651
3652	/**
3653	* ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3654	* @netdev: network interface to be adjusted
3655	* @proto: VLAN TPID
3656	* @vid: VLAN ID to be removed
3657	*
3658	* net_device_ops implementation for removing VLAN IDs
3659	*/
3660	static int
3661	ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3662	{
3663	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
3664	struct ice_vsi_vlan_ops *vlan_ops;
3665	struct ice_vsi *vsi = np->vsi;
3666	struct ice_vlan vlan;
3667	int ret;
3668
3669	/ don't allow removal of VLAN 0 /
3670	if (!vid)
3671	return `0`;
3672
3673	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: vsi->state))
3674	usleep_range(min: `1000`, max: `2000`);
3675
3676	ret = ice_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3677	ICE_MCAST_VLAN_PROMISC_BITS, vid);
3678	if (ret) {
3679	netdev_err(dev: netdev, format: "Error clearing multicast promiscuous mode on VSI %i\n",
3680	vsi->vsi_num);
3681	vsi->current_netdev_flags \|= IFF_ALLMULTI;
3682	}
3683
3684	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3685
3686	/ Make sure VLAN delete is successful before updating VLAN*
3687	* information
3688	*/
3689	vlan = ICE_VLAN(be16_to_cpu(proto), vid, `0`);
3690	ret = vlan_ops->del_vlan(vsi, &vlan);
3691	if (ret)
3692	goto finish;
3693
3694	/ Remove multicast promisc rule for the removed VLAN ID if*
3695	* all-multicast is enabled.
3696	*/
3697	if (vsi->current_netdev_flags & IFF_ALLMULTI)
3698	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3699	ICE_MCAST_VLAN_PROMISC_BITS, vid);
3700
3701	if (!ice_vsi_has_non_zero_vlans(vsi)) {
3702	/ Update look-up type of multicast promisc rule for VLAN 0*
3703	* from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
3704	* all-multicast is enabled and VLAN 0 is the only VLAN rule.
3705	*/
3706	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3707	ice_fltr_clear_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3708	ICE_MCAST_VLAN_PROMISC_BITS,
3709	vid: `0`);
3710	ice_fltr_set_vsi_promisc(hw: &vsi->back->hw, vsi_handle: vsi->idx,
3711	ICE_MCAST_PROMISC_BITS, vid: `0`);
3712	}
3713	}
3714
3715	finish:
3716	clear_bit(nr: ICE_CFG_BUSY, addr: vsi->state);
3717
3718	return ret;
3719	}
3720
3721	/**
3722	* ice_rep_indr_tc_block_unbind
3723	* @cb_priv: indirection block private data
3724	*/
3725	static void ice_rep_indr_tc_block_unbind(void *cb_priv)
3726	{
3727	struct ice_indr_block_priv *indr_priv = cb_priv;
3728
3729	list_del(entry: &indr_priv->list);
3730	kfree(objp: indr_priv);
3731	}
3732
3733	/**
3734	* ice_tc_indir_block_unregister - Unregister TC indirect block notifications
3735	* @vsi: VSI struct which has the netdev
3736	*/
3737	static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
3738	{
3739	struct ice_netdev_priv *np = netdev_priv(dev: vsi->netdev);
3740
3741	flow_indr_dev_unregister(cb: ice_indr_setup_tc_cb, cb_priv: np,
3742	release: ice_rep_indr_tc_block_unbind);
3743	}
3744
3745	/**
3746	* ice_tc_indir_block_register - Register TC indirect block notifications
3747	* @vsi: VSI struct which has the netdev
3748	*
3749	* Returns 0 on success, negative value on failure
3750	*/
3751	static int ice_tc_indir_block_register(struct ice_vsi *vsi)
3752	{
3753	struct ice_netdev_priv *np;
3754
3755	if (!vsi \|\| !vsi->netdev)
3756	return -EINVAL;
3757
3758	np = netdev_priv(dev: vsi->netdev);
3759
3760	INIT_LIST_HEAD(list: &np->tc_indr_block_priv_list);
3761	return flow_indr_dev_register(cb: ice_indr_setup_tc_cb, cb_priv: np);
3762	}
3763
3764	/**
3765	* ice_get_avail_q_count - Get count of queues in use
3766	* @pf_qmap: bitmap to get queue use count from
3767	* @lock: pointer to a mutex that protects access to pf_qmap
3768	* @size: size of the bitmap
3769	*/
3770	static u16
3771	ice_get_avail_q_count(unsigned long pf_qmap, struct* mutex *lock, u16 size)
3772	{
3773	unsigned long bit;
3774	u16 count = `0`;
3775
3776	mutex_lock(lock);
3777	for_each_clear_bit(bit, pf_qmap, size)
3778	count++;
3779	mutex_unlock(lock);
3780
3781	return count;
3782	}
3783
3784	/**
3785	* ice_get_avail_txq_count - Get count of Tx queues in use
3786	* @pf: pointer to an ice_pf instance
3787	*/
3788	u16 ice_get_avail_txq_count(struct ice_pf *pf)
3789	{
3790	return ice_get_avail_q_count(pf_qmap: pf->avail_txqs, lock: &pf->avail_q_mutex,
3791	size: pf->max_pf_txqs);
3792	}
3793
3794	/**
3795	* ice_get_avail_rxq_count - Get count of Rx queues in use
3796	* @pf: pointer to an ice_pf instance
3797	*/
3798	u16 ice_get_avail_rxq_count(struct ice_pf *pf)
3799	{
3800	return ice_get_avail_q_count(pf_qmap: pf->avail_rxqs, lock: &pf->avail_q_mutex,
3801	size: pf->max_pf_rxqs);
3802	}
3803
3804	/**
3805	* ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3806	* @pf: board private structure to initialize
3807	*/
3808	static void ice_deinit_pf(struct ice_pf *pf)
3809	{
3810	ice_service_task_stop(pf);
3811	mutex_destroy(lock: &pf->lag_mutex);
3812	mutex_destroy(lock: &pf->adev_mutex);
3813	mutex_destroy(lock: &pf->sw_mutex);
3814	mutex_destroy(lock: &pf->tc_mutex);
3815	mutex_destroy(lock: &pf->avail_q_mutex);
3816	mutex_destroy(lock: &pf->vfs.table_lock);
3817
3818	if (pf->avail_txqs) {
3819	bitmap_free(bitmap: pf->avail_txqs);
3820	pf->avail_txqs = NULL;
3821	}
3822
3823	if (pf->avail_rxqs) {
3824	bitmap_free(bitmap: pf->avail_rxqs);
3825	pf->avail_rxqs = NULL;
3826	}
3827
3828	if (pf->ptp.clock)
3829	ptp_clock_unregister(ptp: pf->ptp.clock);
3830	}
3831
3832	/**
3833	* ice_set_pf_caps - set PFs capability flags
3834	* @pf: pointer to the PF instance
3835	*/
3836	static void ice_set_pf_caps(struct ice_pf *pf)
3837	{
3838	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
3839
3840	clear_bit(nr: ICE_FLAG_RDMA_ENA, addr: pf->flags);
3841	if (func_caps->common_cap.rdma)
3842	set_bit(nr: ICE_FLAG_RDMA_ENA, addr: pf->flags);
3843	clear_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
3844	if (func_caps->common_cap.dcb)
3845	set_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
3846	clear_bit(nr: ICE_FLAG_SRIOV_CAPABLE, addr: pf->flags);
3847	if (func_caps->common_cap.sr_iov_1_1) {
3848	set_bit(nr: ICE_FLAG_SRIOV_CAPABLE, addr: pf->flags);
3849	pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
3850	ICE_MAX_SRIOV_VFS);
3851	}
3852	clear_bit(nr: ICE_FLAG_RSS_ENA, addr: pf->flags);
3853	if (func_caps->common_cap.rss_table_size)
3854	set_bit(nr: ICE_FLAG_RSS_ENA, addr: pf->flags);
3855
3856	clear_bit(nr: ICE_FLAG_FD_ENA, addr: pf->flags);
3857	if (func_caps->fd_fltr_guar > `0` \|\| func_caps->fd_fltr_best_effort > `0`) {
3858	u16 unused;
3859
3860	/ ctrl_vsi_idx will be set to a valid value when flow director*
3861	* is setup by ice_init_fdir
3862	*/
3863	pf->ctrl_vsi_idx = ICE_NO_VSI;
3864	set_bit(nr: ICE_FLAG_FD_ENA, addr: pf->flags);
3865	/ force guaranteed filter pool for PF /
3866	ice_alloc_fd_guar_item(hw: &pf->hw, cntr_id: &unused,
3867	num_fltr: func_caps->fd_fltr_guar);
3868	/ force shared filter pool for PF /
3869	ice_alloc_fd_shrd_item(hw: &pf->hw, cntr_id: &unused,
3870	num_fltr: func_caps->fd_fltr_best_effort);
3871	}
3872
3873	clear_bit(nr: ICE_FLAG_PTP_SUPPORTED, addr: pf->flags);
3874	if (func_caps->common_cap.ieee_1588 &&
3875	!(pf->hw.mac_type == ICE_MAC_E830))
3876	set_bit(nr: ICE_FLAG_PTP_SUPPORTED, addr: pf->flags);
3877
3878	pf->max_pf_txqs = func_caps->common_cap.num_txq;
3879	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
3880	}
3881
3882	/**
3883	* ice_init_pf - Initialize general software structures (struct ice_pf)
3884	* @pf: board private structure to initialize
3885	*/
3886	static int ice_init_pf(struct ice_pf *pf)
3887	{
3888	ice_set_pf_caps(pf);
3889
3890	mutex_init(&pf->sw_mutex);
3891	mutex_init(&pf->tc_mutex);
3892	mutex_init(&pf->adev_mutex);
3893	mutex_init(&pf->lag_mutex);
3894
3895	INIT_HLIST_HEAD(&pf->aq_wait_list);
3896	spin_lock_init(&pf->aq_wait_lock);
3897	init_waitqueue_head(&pf->aq_wait_queue);
3898
3899	init_waitqueue_head(&pf->reset_wait_queue);
3900
3901	/ setup service timer and periodic service task /
3902	timer_setup(&pf->serv_tmr, ice_service_timer, `0`);
3903	pf->serv_tmr_period = HZ;
3904	INIT_WORK(&pf->serv_task, ice_service_task);
3905	clear_bit(nr: ICE_SERVICE_SCHED, addr: pf->state);
3906
3907	mutex_init(&pf->avail_q_mutex);
3908	pf->avail_txqs = bitmap_zalloc(nbits: pf->max_pf_txqs, GFP_KERNEL);
3909	if (!pf->avail_txqs)
3910	return -ENOMEM;
3911
3912	pf->avail_rxqs = bitmap_zalloc(nbits: pf->max_pf_rxqs, GFP_KERNEL);
3913	if (!pf->avail_rxqs) {
3914	bitmap_free(bitmap: pf->avail_txqs);
3915	pf->avail_txqs = NULL;
3916	return -ENOMEM;
3917	}
3918
3919	mutex_init(&pf->vfs.table_lock);
3920	hash_init(pf->vfs.table);
3921	ice_mbx_init_snapshot(hw: &pf->hw);
3922
3923	return `0`;
3924	}
3925
3926	/**
3927	* ice_is_wol_supported - check if WoL is supported
3928	* @hw: pointer to hardware info
3929	*
3930	* Check if WoL is supported based on the HW configuration.
3931	* Returns true if NVM supports and enables WoL for this port, false otherwise
3932	*/
3933	bool ice_is_wol_supported(struct ice_hw *hw)
3934	{
3935	u16 wol_ctrl;
3936
3937	/ A bit set to 1 in the NVM Software Reserved Word 2 (WoL control*
3938	* word) indicates WoL is not supported on the corresponding PF ID.
3939	*/
3940	if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, data: &wol_ctrl))
3941	return false;
3942
3943	return !(BIT(hw->port_info->lport) & wol_ctrl);
3944	}
3945
3946	/**
3947	* ice_vsi_recfg_qs - Change the number of queues on a VSI
3948	* @vsi: VSI being changed
3949	* @new_rx: new number of Rx queues
3950	* @new_tx: new number of Tx queues
3951	* @locked: is adev device_lock held
3952	*
3953	* Only change the number of queues if new_tx, or new_rx is non-0.
3954	*
3955	* Returns 0 on success.
3956	*/
3957	int ice_vsi_recfg_qs(struct ice_vsi vsi, int* new_rx, int new_tx, bool locked)
3958	{
3959	struct ice_pf *pf = vsi->back;
3960	int err = `0`, timeout = `50`;
3961
3962	if (!new_rx && !new_tx)
3963	return -EINVAL;
3964
3965	while (test_and_set_bit(nr: ICE_CFG_BUSY, addr: pf->state)) {
3966	timeout--;
3967	if (!timeout)
3968	return -EBUSY;
3969	usleep_range(min: `1000`, max: `2000`);
3970	}
3971
3972	if (new_tx)
3973	vsi->req_txq = (u16)new_tx;
3974	if (new_rx)
3975	vsi->req_rxq = (u16)new_rx;
3976
3977	/ set for the next time the netdev is started /
3978	if (!netif_running(dev: vsi->netdev)) {
3979	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
3980	dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
3981	goto done;
3982	}
3983
3984	ice_vsi_close(vsi);
3985	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
3986	ice_pf_dcb_recfg(pf, locked);
3987	ice_vsi_open(vsi);
3988	done:
3989	clear_bit(nr: ICE_CFG_BUSY, addr: pf->state);
3990	return err;
3991	}
3992
3993	/**
3994	* ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
3995	* @pf: PF to configure
3996	*
3997	* No VLAN offloads/filtering are advertised in safe mode so make sure the PF
3998	* VSI can still Tx/Rx VLAN tagged packets.
3999	*/
4000	static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
4001	{
4002	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4003	struct ice_vsi_ctx *ctxt;
4004	struct ice_hw *hw;
4005	int status;
4006
4007	if (!vsi)
4008	return;
4009
4010	ctxt = kzalloc(size: sizeof(*ctxt), GFP_KERNEL);
4011	if (!ctxt)
4012	return;
4013
4014	hw = &pf->hw;
4015	ctxt->info = vsi->info;
4016
4017	ctxt->info.valid_sections =
4018	cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID \|
4019	ICE_AQ_VSI_PROP_SECURITY_VALID \|
4020	ICE_AQ_VSI_PROP_SW_VALID);
4021
4022	/ disable VLAN anti-spoof /
4023	ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
4024	ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
4025
4026	/ disable VLAN pruning and keep all other settings /
4027	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
4028
4029	/ allow all VLANs on Tx and don't strip on Rx /
4030	ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL \|
4031	ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
4032
4033	status = ice_update_vsi(hw, vsi_handle: vsi->idx, vsi_ctx: ctxt, NULL);
4034	if (status) {
4035	dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
4036	status, ice_aq_str(hw->adminq.sq_last_status));
4037	} else {
4038	vsi->info.sec_flags = ctxt->info.sec_flags;
4039	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
4040	vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
4041	}
4042
4043	kfree(objp: ctxt);
4044	}
4045
4046	/**
4047	* ice_log_pkg_init - log result of DDP package load
4048	* @hw: pointer to hardware info
4049	* @state: state of package load
4050	*/
4051	static void ice_log_pkg_init(struct ice_hw hw, enum* ice_ddp_state state)
4052	{
4053	struct ice_pf *pf = hw->back;
4054	struct device *dev;
4055
4056	dev = ice_pf_to_dev(pf);
4057
4058	switch (state) {
4059	case ICE_DDP_PKG_SUCCESS:
4060	dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
4061	hw->active_pkg_name,
4062	hw->active_pkg_ver.major,
4063	hw->active_pkg_ver.minor,
4064	hw->active_pkg_ver.update,
4065	hw->active_pkg_ver.draft);
4066	break;
4067	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
4068	dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
4069	hw->active_pkg_name,
4070	hw->active_pkg_ver.major,
4071	hw->active_pkg_ver.minor,
4072	hw->active_pkg_ver.update,
4073	hw->active_pkg_ver.draft);
4074	break;
4075	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
4076	dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
4077	hw->active_pkg_name,
4078	hw->active_pkg_ver.major,
4079	hw->active_pkg_ver.minor,
4080	ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4081	break;
4082	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
4083	dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
4084	hw->active_pkg_name,
4085	hw->active_pkg_ver.major,
4086	hw->active_pkg_ver.minor,
4087	hw->active_pkg_ver.update,
4088	hw->active_pkg_ver.draft,
4089	hw->pkg_name,
4090	hw->pkg_ver.major,
4091	hw->pkg_ver.minor,
4092	hw->pkg_ver.update,
4093	hw->pkg_ver.draft);
4094	break;
4095	case ICE_DDP_PKG_FW_MISMATCH:
4096	dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
4097	break;
4098	case ICE_DDP_PKG_INVALID_FILE:
4099	dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
4100	break;
4101	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
4102	dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
4103	break;
4104	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
4105	dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
4106	ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4107	break;
4108	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
4109	dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
4110	break;
4111	case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
4112	dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
4113	break;
4114	case ICE_DDP_PKG_LOAD_ERROR:
4115	dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
4116	/ poll for reset to complete /
4117	if (ice_check_reset(hw))
4118	dev_err(dev, "Error resetting device. Please reload the driver\n");
4119	break;
4120	case ICE_DDP_PKG_ERR:
4121	default:
4122	dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n");
4123	break;
4124	}
4125	}
4126
4127	/**
4128	* ice_load_pkg - load/reload the DDP Package file
4129	* @firmware: firmware structure when firmware requested or NULL for reload
4130	* @pf: pointer to the PF instance
4131	*
4132	* Called on probe and post CORER/GLOBR rebuild to load DDP Package and
4133	* initialize HW tables.
4134	*/
4135	static void
4136	ice_load_pkg(const struct firmware firmware, struct* ice_pf *pf)
4137	{
4138	enum ice_ddp_state state = ICE_DDP_PKG_ERR;
4139	struct device *dev = ice_pf_to_dev(pf);
4140	struct ice_hw *hw = &pf->hw;
4141
4142	/ Load DDP Package /
4143	if (firmware && !hw->pkg_copy) {
4144	state = ice_copy_and_init_pkg(hw, buf: firmware->data,
4145	len: firmware->size);
4146	ice_log_pkg_init(hw, state);
4147	} else if (!firmware && hw->pkg_copy) {
4148	/ Reload package during rebuild after CORER/GLOBR reset /
4149	state = ice_init_pkg(hw, buff: hw->pkg_copy, len: hw->pkg_size);
4150	ice_log_pkg_init(hw, state);
4151	} else {
4152	dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
4153	}
4154
4155	if (!ice_is_init_pkg_successful(state)) {
4156	/ Safe Mode /
4157	clear_bit(nr: ICE_FLAG_ADV_FEATURES, addr: pf->flags);
4158	return;
4159	}
4160
4161	/ Successful download package is the precondition for advanced*
4162	* features, hence setting the ICE_FLAG_ADV_FEATURES flag
4163	*/
4164	set_bit(nr: ICE_FLAG_ADV_FEATURES, addr: pf->flags);
4165	}
4166
4167	/**
4168	* ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4169	* @pf: pointer to the PF structure
4170	*
4171	* There is no error returned here because the driver should be able to handle
4172	* 128 Byte cache lines, so we only print a warning in case issues are seen,
4173	* specifically with Tx.
4174	*/
4175	static void ice_verify_cacheline_size(struct ice_pf *pf)
4176	{
4177	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
4178	dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4179	ICE_CACHE_LINE_BYTES);
4180	}
4181
4182	/**
4183	* ice_send_version - update firmware with driver version
4184	* @pf: PF struct
4185	*
4186	* Returns 0 on success, else error code
4187	*/
4188	static int ice_send_version(struct ice_pf *pf)
4189	{
4190	struct ice_driver_ver dv;
4191
4192	dv.major_ver = `0xff`;
4193	dv.minor_ver = `0xff`;
4194	dv.build_ver = `0xff`;
4195	dv.subbuild_ver = `0`;
4196	strscpy(p: (char *)dv.driver_string, UTS_RELEASE,
4197	size: sizeof(dv.driver_string));
4198	return ice_aq_send_driver_ver(hw: &pf->hw, dv: &dv, NULL);
4199	}
4200
4201	/**
4202	* ice_init_fdir - Initialize flow director VSI and configuration
4203	* @pf: pointer to the PF instance
4204	*
4205	* returns 0 on success, negative on error
4206	*/
4207	static int ice_init_fdir(struct ice_pf *pf)
4208	{
4209	struct device *dev = ice_pf_to_dev(pf);
4210	struct ice_vsi *ctrl_vsi;
4211	int err;
4212
4213	/ Side Band Flow Director needs to have a control VSI.*
4214	* Allocate it and store it in the PF.
4215	*/
4216	ctrl_vsi = ice_ctrl_vsi_setup(pf, pi: pf->hw.port_info);
4217	if (!ctrl_vsi) {
4218	dev_dbg(dev, "could not create control VSI\n");
4219	return -ENOMEM;
4220	}
4221
4222	err = ice_vsi_open_ctrl(vsi: ctrl_vsi);
4223	if (err) {
4224	dev_dbg(dev, "could not open control VSI\n");
4225	goto err_vsi_open;
4226	}
4227
4228	mutex_init(&pf->hw.fdir_fltr_lock);
4229
4230	err = ice_fdir_create_dflt_rules(pf);
4231	if (err)
4232	goto err_fdir_rule;
4233
4234	return `0`;
4235
4236	err_fdir_rule:
4237	ice_fdir_release_flows(hw: &pf->hw);
4238	ice_vsi_close(vsi: ctrl_vsi);
4239	err_vsi_open:
4240	ice_vsi_release(vsi: ctrl_vsi);
4241	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4242	pf->vsi[pf->ctrl_vsi_idx] = NULL;
4243	pf->ctrl_vsi_idx = ICE_NO_VSI;
4244	}
4245	return err;
4246	}
4247
4248	static void ice_deinit_fdir(struct ice_pf *pf)
4249	{
4250	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
4251
4252	if (!vsi)
4253	return;
4254
4255	ice_vsi_manage_fdir(vsi, ena: false);
4256	ice_vsi_release(vsi);
4257	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4258	pf->vsi[pf->ctrl_vsi_idx] = NULL;
4259	pf->ctrl_vsi_idx = ICE_NO_VSI;
4260	}
4261
4262	mutex_destroy(lock: &(&pf->hw)->fdir_fltr_lock);
4263	}
4264
4265	/**
4266	* ice_get_opt_fw_name - return optional firmware file name or NULL
4267	* @pf: pointer to the PF instance
4268	*/
4269	static char ice_get_opt_fw_name(struct* ice_pf *pf)
4270	{
4271	/ Optional firmware name same as default with additional dash*
4272	* followed by a EUI-64 identifier (PCIe Device Serial Number)
4273	*/
4274	struct pci_dev *pdev = pf->pdev;
4275	char *opt_fw_filename;
4276	u64 dsn;
4277
4278	/ Determine the name of the optional file using the DSN (two*
4279	* dwords following the start of the DSN Capability).
4280	*/
4281	dsn = pci_get_dsn(dev: pdev);
4282	if (!dsn)
4283	return NULL;
4284
4285	opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
4286	if (!opt_fw_filename)
4287	return NULL;
4288
4289	snprintf(buf: opt_fw_filename, NAME_MAX, fmt: "%sice-%016llx.pkg",
4290	ICE_DDP_PKG_PATH, dsn);
4291
4292	return opt_fw_filename;
4293	}
4294
4295	/**
4296	* ice_request_fw - Device initialization routine
4297	* @pf: pointer to the PF instance
4298	*/
4299	static void ice_request_fw(struct ice_pf *pf)
4300	{
4301	char *opt_fw_filename = ice_get_opt_fw_name(pf);
4302	const struct firmware *firmware = NULL;
4303	struct device *dev = ice_pf_to_dev(pf);
4304	int err = `0`;
4305
4306	/ optional device-specific DDP (if present) overrides the default DDP*
4307	* package file. kernel logs a debug message if the file doesn't exist,
4308	* and warning messages for other errors.
4309	*/
4310	if (opt_fw_filename) {
4311	err = firmware_request_nowarn(fw: &firmware, name: opt_fw_filename, device: dev);
4312	if (err) {
4313	kfree(objp: opt_fw_filename);
4314	goto dflt_pkg_load;
4315	}
4316
4317	/ request for firmware was successful. Download to device /
4318	ice_load_pkg(firmware, pf);
4319	kfree(objp: opt_fw_filename);
4320	release_firmware(fw: firmware);
4321	return;
4322	}
4323
4324	dflt_pkg_load:
4325	err = request_firmware(fw: &firmware, ICE_DDP_PKG_FILE, device: dev);
4326	if (err) {
4327	dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4328	return;
4329	}
4330
4331	/ request for firmware was successful. Download to device /
4332	ice_load_pkg(firmware, pf);
4333	release_firmware(fw: firmware);
4334	}
4335
4336	/**
4337	* ice_print_wake_reason - show the wake up cause in the log
4338	* @pf: pointer to the PF struct
4339	*/
4340	static void ice_print_wake_reason(struct ice_pf *pf)
4341	{
4342	u32 wus = pf->wakeup_reason;
4343	const char *wake_str;
4344
4345	/ if no wake event, nothing to print /
4346	if (!wus)
4347	return;
4348
4349	if (wus & PFPM_WUS_LNKC_M)
4350	wake_str = "Link\n";
4351	else if (wus & PFPM_WUS_MAG_M)
4352	wake_str = "Magic Packet\n";
4353	else if (wus & PFPM_WUS_MNG_M)
4354	wake_str = "Management\n";
4355	else if (wus & PFPM_WUS_FW_RST_WK_M)
4356	wake_str = "Firmware Reset\n";
4357	else
4358	wake_str = "Unknown\n";
4359
4360	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
4361	}
4362
4363	/**
4364	* ice_register_netdev - register netdev
4365	* @vsi: pointer to the VSI struct
4366	*/
4367	static int ice_register_netdev(struct ice_vsi *vsi)
4368	{
4369	int err;
4370
4371	if (!vsi \|\| !vsi->netdev)
4372	return -EIO;
4373
4374	err = register_netdev(dev: vsi->netdev);
4375	if (err)
4376	return err;
4377
4378	set_bit(nr: ICE_VSI_NETDEV_REGISTERED, addr: vsi->state);
4379	netif_carrier_off(dev: vsi->netdev);
4380	netif_tx_stop_all_queues(dev: vsi->netdev);
4381
4382	return `0`;
4383	}
4384
4385	static void ice_unregister_netdev(struct ice_vsi *vsi)
4386	{
4387	if (!vsi \|\| !vsi->netdev)
4388	return;
4389
4390	unregister_netdev(dev: vsi->netdev);
4391	clear_bit(nr: ICE_VSI_NETDEV_REGISTERED, addr: vsi->state);
4392	}
4393
4394	/**
4395	* ice_cfg_netdev - Allocate, configure and register a netdev
4396	* @vsi: the VSI associated with the new netdev
4397	*
4398	* Returns 0 on success, negative value on failure
4399	*/
4400	static int ice_cfg_netdev(struct ice_vsi *vsi)
4401	{
4402	struct ice_netdev_priv *np;
4403	struct net_device *netdev;
4404	u8 mac_addr[ETH_ALEN];
4405
4406	netdev = alloc_etherdev_mqs(sizeof_priv: sizeof(*np), txqs: vsi->alloc_txq,
4407	rxqs: vsi->alloc_rxq);
4408	if (!netdev)
4409	return -ENOMEM;
4410
4411	set_bit(nr: ICE_VSI_NETDEV_ALLOCD, addr: vsi->state);
4412	vsi->netdev = netdev;
4413	np = netdev_priv(dev: netdev);
4414	np->vsi = vsi;
4415
4416	ice_set_netdev_features(netdev);
4417	ice_set_ops(vsi);
4418
4419	if (vsi->type == ICE_VSI_PF) {
4420	SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
4421	ether_addr_copy(dst: mac_addr, src: vsi->port_info->mac.perm_addr);
4422	eth_hw_addr_set(dev: netdev, addr: mac_addr);
4423	}
4424
4425	netdev->priv_flags \|= IFF_UNICAST_FLT;
4426
4427	/ Setup netdev TC information /
4428	ice_vsi_cfg_netdev_tc(vsi, ena_tc: vsi->tc_cfg.ena_tc);
4429
4430	netdev->max_mtu = ICE_MAX_MTU;
4431
4432	return `0`;
4433	}
4434
4435	static void ice_decfg_netdev(struct ice_vsi *vsi)
4436	{
4437	clear_bit(nr: ICE_VSI_NETDEV_ALLOCD, addr: vsi->state);
4438	free_netdev(dev: vsi->netdev);
4439	vsi->netdev = NULL;
4440	}
4441
4442	static int ice_start_eth(struct ice_vsi *vsi)
4443	{
4444	int err;
4445
4446	err = ice_init_mac_fltr(pf: vsi->back);
4447	if (err)
4448	return err;
4449
4450	err = ice_vsi_open(vsi);
4451	if (err)
4452	ice_fltr_remove_all(vsi);
4453
4454	return err;
4455	}
4456
4457	static void ice_stop_eth(struct ice_vsi *vsi)
4458	{
4459	ice_fltr_remove_all(vsi);
4460	ice_vsi_close(vsi);
4461	}
4462
4463	static int ice_init_eth(struct ice_pf *pf)
4464	{
4465	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4466	int err;
4467
4468	if (!vsi)
4469	return -EINVAL;
4470
4471	/ init channel list /
4472	INIT_LIST_HEAD(list: &vsi->ch_list);
4473
4474	err = ice_cfg_netdev(vsi);
4475	if (err)
4476	return err;
4477	/ Setup DCB netlink interface /
4478	ice_dcbnl_setup(vsi);
4479
4480	err = ice_init_mac_fltr(pf);
4481	if (err)
4482	goto err_init_mac_fltr;
4483
4484	err = ice_devlink_create_pf_port(pf);
4485	if (err)
4486	goto err_devlink_create_pf_port;
4487
4488	SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
4489
4490	err = ice_register_netdev(vsi);
4491	if (err)
4492	goto err_register_netdev;
4493
4494	err = ice_tc_indir_block_register(vsi);
4495	if (err)
4496	goto err_tc_indir_block_register;
4497
4498	ice_napi_add(vsi);
4499
4500	return `0`;
4501
4502	err_tc_indir_block_register:
4503	ice_unregister_netdev(vsi);
4504	err_register_netdev:
4505	ice_devlink_destroy_pf_port(pf);
4506	err_devlink_create_pf_port:
4507	err_init_mac_fltr:
4508	ice_decfg_netdev(vsi);
4509	return err;
4510	}
4511
4512	static void ice_deinit_eth(struct ice_pf *pf)
4513	{
4514	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4515
4516	if (!vsi)
4517	return;
4518
4519	ice_vsi_close(vsi);
4520	ice_unregister_netdev(vsi);
4521	ice_devlink_destroy_pf_port(pf);
4522	ice_tc_indir_block_unregister(vsi);
4523	ice_decfg_netdev(vsi);
4524	}
4525
4526	/**
4527	* ice_wait_for_fw - wait for full FW readiness
4528	* @hw: pointer to the hardware structure
4529	* @timeout: milliseconds that can elapse before timing out
4530	*/
4531	static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout)
4532	{
4533	int fw_loading;
4534	u32 elapsed = `0`;
4535
4536	while (elapsed <= timeout) {
4537	fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M;
4538
4539	/ firmware was not yet loaded, we have to wait more /
4540	if (fw_loading) {
4541	elapsed += `100`;
4542	msleep(msecs: `100`);
4543	continue;
4544	}
4545	return `0`;
4546	}
4547
4548	return -ETIMEDOUT;
4549	}
4550
4551	static int ice_init_dev(struct ice_pf *pf)
4552	{
4553	struct device *dev = ice_pf_to_dev(pf);
4554	struct ice_hw *hw = &pf->hw;
4555	int err;
4556
4557	err = ice_init_hw(hw);
4558	if (err) {
4559	dev_err(dev, "ice_init_hw failed: %d\n", err);
4560	return err;
4561	}
4562
4563	/ Some cards require longer initialization times*
4564	* due to necessity of loading FW from an external source.
4565	* This can take even half a minute.
4566	*/
4567	if (ice_is_pf_c827(hw)) {
4568	err = ice_wait_for_fw(hw, timeout: `30000`);
4569	if (err) {
4570	dev_err(dev, "ice_wait_for_fw timed out");
4571	return err;
4572	}
4573	}
4574
4575	ice_init_feature_support(pf);
4576
4577	ice_request_fw(pf);
4578
4579	/ if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be*
4580	* set in pf->state, which will cause ice_is_safe_mode to return
4581	* true
4582	*/
4583	if (ice_is_safe_mode(pf)) {
4584	/ we already got function/device capabilities but these don't*
4585	* reflect what the driver needs to do in safe mode. Instead of
4586	* adding conditional logic everywhere to ignore these
4587	* device/function capabilities, override them.
4588	*/
4589	ice_set_safe_mode_caps(hw);
4590	}
4591
4592	err = ice_init_pf(pf);
4593	if (err) {
4594	dev_err(dev, "ice_init_pf failed: %d\n", err);
4595	goto err_init_pf;
4596	}
4597
4598	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
4599	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
4600	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
4601	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
4602	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
4603	pf->hw.udp_tunnel_nic.tables[`0`].n_entries =
4604	pf->hw.tnl.valid_count[TNL_VXLAN];
4605	pf->hw.udp_tunnel_nic.tables[`0`].tunnel_types =
4606	UDP_TUNNEL_TYPE_VXLAN;
4607	}
4608	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
4609	pf->hw.udp_tunnel_nic.tables[`1`].n_entries =
4610	pf->hw.tnl.valid_count[TNL_GENEVE];
4611	pf->hw.udp_tunnel_nic.tables[`1`].tunnel_types =
4612	UDP_TUNNEL_TYPE_GENEVE;
4613	}
4614
4615	err = ice_init_interrupt_scheme(pf);
4616	if (err) {
4617	dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
4618	err = -EIO;
4619	goto err_init_interrupt_scheme;
4620	}
4621
4622	/ In case of MSIX we are going to setup the misc vector right here*
4623	* to handle admin queue events etc. In case of legacy and MSI
4624	* the misc functionality and queue processing is combined in
4625	* the same vector and that gets setup at open.
4626	*/
4627	err = ice_req_irq_msix_misc(pf);
4628	if (err) {
4629	dev_err(dev, "setup of misc vector failed: %d\n", err);
4630	goto err_req_irq_msix_misc;
4631	}
4632
4633	return `0`;
4634
4635	err_req_irq_msix_misc:
4636	ice_clear_interrupt_scheme(pf);
4637	err_init_interrupt_scheme:
4638	ice_deinit_pf(pf);
4639	err_init_pf:
4640	ice_deinit_hw(hw);
4641	return err;
4642	}
4643
4644	static void ice_deinit_dev(struct ice_pf *pf)
4645	{
4646	ice_free_irq_msix_misc(pf);
4647	ice_deinit_pf(pf);
4648	ice_deinit_hw(hw: &pf->hw);
4649
4650	/ Service task is already stopped, so call reset directly. /
4651	ice_reset(hw: &pf->hw, req: ICE_RESET_PFR);
4652	pci_wait_for_pending_transaction(dev: pf->pdev);
4653	ice_clear_interrupt_scheme(pf);
4654	}
4655
4656	static void ice_init_features(struct ice_pf *pf)
4657	{
4658	struct device *dev = ice_pf_to_dev(pf);
4659
4660	if (ice_is_safe_mode(pf))
4661	return;
4662
4663	/ initialize DDP driven features /
4664	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4665	ice_ptp_init(pf);
4666
4667	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
4668	ice_gnss_init(pf);
4669
4670	if (ice_is_feature_supported(pf, f: ICE_F_CGU) \|\|
4671	ice_is_feature_supported(pf, f: ICE_F_PHY_RCLK))
4672	ice_dpll_init(pf);
4673
4674	/ Note: Flow director init failure is non-fatal to load /
4675	if (ice_init_fdir(pf))
4676	dev_err(dev, "could not initialize flow director\n");
4677
4678	/ Note: DCB init failure is non-fatal to load /
4679	if (ice_init_pf_dcb(pf, locked: false)) {
4680	clear_bit(nr: ICE_FLAG_DCB_CAPABLE, addr: pf->flags);
4681	clear_bit(nr: ICE_FLAG_DCB_ENA, addr: pf->flags);
4682	} else {
4683	ice_cfg_lldp_mib_change(hw: &pf->hw, ena_mib: true);
4684	}
4685
4686	if (ice_init_lag(pf))
4687	dev_warn(dev, "Failed to init link aggregation support\n");
4688	}
4689
4690	static void ice_deinit_features(struct ice_pf *pf)
4691	{
4692	if (ice_is_safe_mode(pf))
4693	return;
4694
4695	ice_deinit_lag(pf);
4696	if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
4697	ice_cfg_lldp_mib_change(hw: &pf->hw, ena_mib: false);
4698	ice_deinit_fdir(pf);
4699	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
4700	ice_gnss_exit(pf);
4701	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4702	ice_ptp_release(pf);
4703	if (test_bit(ICE_FLAG_DPLL, pf->flags))
4704	ice_dpll_deinit(pf);
4705	}
4706
4707	static void ice_init_wakeup(struct ice_pf *pf)
4708	{
4709	/ Save wakeup reason register for later use /
4710	pf->wakeup_reason = rd32(&pf->hw, PFPM_WUS);
4711
4712	/ check for a power management event /
4713	ice_print_wake_reason(pf);
4714
4715	/ clear wake status, all bits /
4716	wr32(&pf->hw, PFPM_WUS, U32_MAX);
4717
4718	/ Disable WoL at init, wait for user to enable /
4719	device_set_wakeup_enable(ice_pf_to_dev(pf), enable: false);
4720	}
4721
4722	static int ice_init_link(struct ice_pf *pf)
4723	{
4724	struct device *dev = ice_pf_to_dev(pf);
4725	int err;
4726
4727	err = ice_init_link_events(pi: pf->hw.port_info);
4728	if (err) {
4729	dev_err(dev, "ice_init_link_events failed: %d\n", err);
4730	return err;
4731	}
4732
4733	/ not a fatal error if this fails /
4734	err = ice_init_nvm_phy_type(pi: pf->hw.port_info);
4735	if (err)
4736	dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
4737
4738	/ not a fatal error if this fails /
4739	err = ice_update_link_info(pi: pf->hw.port_info);
4740	if (err)
4741	dev_err(dev, "ice_update_link_info failed: %d\n", err);
4742
4743	ice_init_link_dflt_override(pi: pf->hw.port_info);
4744
4745	ice_check_link_cfg_err(pf,
4746	link_cfg_err: pf->hw.port_info->phy.link_info.link_cfg_err);
4747
4748	/ if media available, initialize PHY settings /
4749	if (pf->hw.port_info->phy.link_info.link_info &
4750	ICE_AQ_MEDIA_AVAILABLE) {
4751	/ not a fatal error if this fails /
4752	err = ice_init_phy_user_cfg(pi: pf->hw.port_info);
4753	if (err)
4754	dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
4755
4756	if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
4757	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4758
4759	if (vsi)
4760	ice_configure_phy(vsi);
4761	}
4762	} else {
4763	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
4764	}
4765
4766	return err;
4767	}
4768
4769	static int ice_init_pf_sw(struct ice_pf *pf)
4770	{
4771	bool dvm = ice_is_dvm_ena(hw: &pf->hw);
4772	struct ice_vsi *vsi;
4773	int err;
4774
4775	/ create switch struct for the switch element created by FW on boot /
4776	pf->first_sw = kzalloc(size: sizeof(*pf->first_sw), GFP_KERNEL);
4777	if (!pf->first_sw)
4778	return -ENOMEM;
4779
4780	if (pf->hw.evb_veb)
4781	pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
4782	else
4783	pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
4784
4785	pf->first_sw->pf = pf;
4786
4787	/ record the sw_id available for later use /
4788	pf->first_sw->sw_id = pf->hw.port_info->sw_id;
4789
4790	err = ice_aq_set_port_params(pi: pf->hw.port_info, double_vlan: dvm, NULL);
4791	if (err)
4792	goto err_aq_set_port_params;
4793
4794	vsi = ice_pf_vsi_setup(pf, pi: pf->hw.port_info);
4795	if (!vsi) {
4796	err = -ENOMEM;
4797	goto err_pf_vsi_setup;
4798	}
4799
4800	return `0`;
4801
4802	err_pf_vsi_setup:
4803	err_aq_set_port_params:
4804	kfree(objp: pf->first_sw);
4805	return err;
4806	}
4807
4808	static void ice_deinit_pf_sw(struct ice_pf *pf)
4809	{
4810	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4811
4812	if (!vsi)
4813	return;
4814
4815	ice_vsi_release(vsi);
4816	kfree(objp: pf->first_sw);
4817	}
4818
4819	static int ice_alloc_vsis(struct ice_pf *pf)
4820	{
4821	struct device *dev = ice_pf_to_dev(pf);
4822
4823	pf->num_alloc_vsi = pf->hw.func_caps.guar_num_vsi;
4824	if (!pf->num_alloc_vsi)
4825	return -EIO;
4826
4827	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
4828	dev_warn(dev,
4829	"limiting the VSI count due to UDP tunnel limitation %d > %d\n",
4830	pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
4831	pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
4832	}
4833
4834	pf->vsi = devm_kcalloc(dev, n: pf->num_alloc_vsi, size: sizeof(*pf->vsi),
4835	GFP_KERNEL);
4836	if (!pf->vsi)
4837	return -ENOMEM;
4838
4839	pf->vsi_stats = devm_kcalloc(dev, n: pf->num_alloc_vsi,
4840	size: sizeof(*pf->vsi_stats), GFP_KERNEL);
4841	if (!pf->vsi_stats) {
4842	devm_kfree(dev, p: pf->vsi);
4843	return -ENOMEM;
4844	}
4845
4846	return `0`;
4847	}
4848
4849	static void ice_dealloc_vsis(struct ice_pf *pf)
4850	{
4851	devm_kfree(ice_pf_to_dev(pf), p: pf->vsi_stats);
4852	pf->vsi_stats = NULL;
4853
4854	pf->num_alloc_vsi = `0`;
4855	devm_kfree(ice_pf_to_dev(pf), p: pf->vsi);
4856	pf->vsi = NULL;
4857	}
4858
4859	static int ice_init_devlink(struct ice_pf *pf)
4860	{
4861	int err;
4862
4863	err = ice_devlink_register_params(pf);
4864	if (err)
4865	return err;
4866
4867	ice_devlink_init_regions(pf);
4868	ice_devlink_register(pf);
4869
4870	return `0`;
4871	}
4872
4873	static void ice_deinit_devlink(struct ice_pf *pf)
4874	{
4875	ice_devlink_unregister(pf);
4876	ice_devlink_destroy_regions(pf);
4877	ice_devlink_unregister_params(pf);
4878	}
4879
4880	static int ice_init(struct ice_pf *pf)
4881	{
4882	int err;
4883
4884	err = ice_init_dev(pf);
4885	if (err)
4886	return err;
4887
4888	err = ice_alloc_vsis(pf);
4889	if (err)
4890	goto err_alloc_vsis;
4891
4892	err = ice_init_pf_sw(pf);
4893	if (err)
4894	goto err_init_pf_sw;
4895
4896	ice_init_wakeup(pf);
4897
4898	err = ice_init_link(pf);
4899	if (err)
4900	goto err_init_link;
4901
4902	err = ice_send_version(pf);
4903	if (err)
4904	goto err_init_link;
4905
4906	ice_verify_cacheline_size(pf);
4907
4908	if (ice_is_safe_mode(pf))
4909	ice_set_safe_mode_vlan_cfg(pf);
4910	else
4911	/ print PCI link speed and width /
4912	pcie_print_link_status(dev: pf->pdev);
4913
4914	/ ready to go, so clear down state bit /
4915	clear_bit(nr: ICE_DOWN, addr: pf->state);
4916	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
4917
4918	/ since everything is good, start the service timer /
4919	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
4920
4921	return `0`;
4922
4923	err_init_link:
4924	ice_deinit_pf_sw(pf);
4925	err_init_pf_sw:
4926	ice_dealloc_vsis(pf);
4927	err_alloc_vsis:
4928	ice_deinit_dev(pf);
4929	return err;
4930	}
4931
4932	static void ice_deinit(struct ice_pf *pf)
4933	{
4934	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
4935	set_bit(nr: ICE_DOWN, addr: pf->state);
4936
4937	ice_deinit_pf_sw(pf);
4938	ice_dealloc_vsis(pf);
4939	ice_deinit_dev(pf);
4940	}
4941
4942	/**
4943	* ice_load - load pf by init hw and starting VSI
4944	* @pf: pointer to the pf instance
4945	*/
4946	int ice_load(struct ice_pf *pf)
4947	{
4948	struct ice_vsi_cfg_params params = {};
4949	struct ice_vsi *vsi;
4950	int err;
4951
4952	err = ice_init_dev(pf);
4953	if (err)
4954	return err;
4955
4956	vsi = ice_get_main_vsi(pf);
4957
4958	params = ice_vsi_to_params(vsi);
4959	params.flags = ICE_VSI_FLAG_INIT;
4960
4961	rtnl_lock();
4962	err = ice_vsi_cfg(vsi, params: &params);
4963	if (err)
4964	goto err_vsi_cfg;
4965
4966	err = ice_start_eth(vsi: ice_get_main_vsi(pf));
4967	if (err)
4968	goto err_start_eth;
4969	rtnl_unlock();
4970
4971	err = ice_init_rdma(pf);
4972	if (err)
4973	goto err_init_rdma;
4974
4975	ice_init_features(pf);
4976	ice_service_task_restart(pf);
4977
4978	clear_bit(nr: ICE_DOWN, addr: pf->state);
4979
4980	return `0`;
4981
4982	err_init_rdma:
4983	ice_vsi_close(vsi: ice_get_main_vsi(pf));
4984	rtnl_lock();
4985	err_start_eth:
4986	ice_vsi_decfg(vsi: ice_get_main_vsi(pf));
4987	err_vsi_cfg:
4988	rtnl_unlock();
4989	ice_deinit_dev(pf);
4990	return err;
4991	}
4992
4993	/**
4994	* ice_unload - unload pf by stopping VSI and deinit hw
4995	* @pf: pointer to the pf instance
4996	*/
4997	void ice_unload(struct ice_pf *pf)
4998	{
4999	ice_deinit_features(pf);
5000	ice_deinit_rdma(pf);
5001	rtnl_lock();
5002	ice_stop_eth(vsi: ice_get_main_vsi(pf));
5003	ice_vsi_decfg(vsi: ice_get_main_vsi(pf));
5004	rtnl_unlock();
5005	ice_deinit_dev(pf);
5006	}
5007
5008	/**
5009	* ice_probe - Device initialization routine
5010	* @pdev: PCI device information struct
5011	* @ent: entry in ice_pci_tbl
5012	*
5013	* Returns 0 on success, negative on failure
5014	*/
5015	static int
5016	ice_probe(struct pci_dev pdev, const* struct pci_device_id __always_unused *ent)
5017	{
5018	struct device *dev = &pdev->dev;
5019	struct ice_pf *pf;
5020	struct ice_hw *hw;
5021	int err;
5022
5023	if (pdev->is_virtfn) {
5024	dev_err(dev, "can't probe a virtual function\n");
5025	return -EINVAL;
5026	}
5027
5028	/ when under a kdump kernel initiate a reset before enabling the*
5029	* device in order to clear out any pending DMA transactions. These
5030	* transactions can cause some systems to machine check when doing
5031	* the pcim_enable_device() below.
5032	*/
5033	if (is_kdump_kernel()) {
5034	pci_save_state(dev: pdev);
5035	pci_clear_master(dev: pdev);
5036	err = pcie_flr(dev: pdev);
5037	if (err)
5038	return err;
5039	pci_restore_state(dev: pdev);
5040	}
5041
5042	/ this driver uses devres, see*
5043	* Documentation/driver-api/driver-model/devres.rst
5044	*/
5045	err = pcim_enable_device(pdev);
5046	if (err)
5047	return err;
5048
5049	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), name: dev_driver_string(dev));
5050	if (err) {
5051	dev_err(dev, "BAR0 I/O map error %d\n", err);
5052	return err;
5053	}
5054
5055	pf = ice_allocate_pf(dev);
5056	if (!pf)
5057	return -ENOMEM;
5058
5059	/ initialize Auxiliary index to invalid value /
5060	pf->aux_idx = -`1`;
5061
5062	/ set up for high or low DMA /
5063	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(`64`));
5064	if (err) {
5065	dev_err(dev, "DMA configuration failed: 0x%x\n", err);
5066	return err;
5067	}
5068
5069	pci_set_master(dev: pdev);
5070
5071	pf->pdev = pdev;
5072	pci_set_drvdata(pdev, data: pf);
5073	set_bit(nr: ICE_DOWN, addr: pf->state);
5074	/ Disable service task until DOWN bit is cleared /
5075	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
5076
5077	hw = &pf->hw;
5078	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
5079	pci_save_state(dev: pdev);
5080
5081	hw->back = pf;
5082	hw->port_info = NULL;
5083	hw->vendor_id = pdev->vendor;
5084	hw->device_id = pdev->device;
5085	pci_read_config_byte(dev: pdev, PCI_REVISION_ID, val: &hw->revision_id);
5086	hw->subsystem_vendor_id = pdev->subsystem_vendor;
5087	hw->subsystem_device_id = pdev->subsystem_device;
5088	hw->bus.device = PCI_SLOT(pdev->devfn);
5089	hw->bus.func = PCI_FUNC(pdev->devfn);
5090	ice_set_ctrlq_len(hw);
5091
5092	pf->msg_enable = netif_msg_init(debug_value: debug, ICE_DFLT_NETIF_M);
5093
5094	#ifndef CONFIG_DYNAMIC_DEBUG
5095	if (debug < -`1`)
5096	hw->debug_mask = debug;
5097	#endif
5098
5099	err = ice_init(pf);
5100	if (err)
5101	goto err_init;
5102
5103	err = ice_init_eth(pf);
5104	if (err)
5105	goto err_init_eth;
5106
5107	err = ice_init_rdma(pf);
5108	if (err)
5109	goto err_init_rdma;
5110
5111	err = ice_init_devlink(pf);
5112	if (err)
5113	goto err_init_devlink;
5114
5115	ice_init_features(pf);
5116
5117	return `0`;
5118
5119	err_init_devlink:
5120	ice_deinit_rdma(pf);
5121	err_init_rdma:
5122	ice_deinit_eth(pf);
5123	err_init_eth:
5124	ice_deinit(pf);
5125	err_init:
5126	pci_disable_device(dev: pdev);
5127	return err;
5128	}
5129
5130	/**
5131	* ice_set_wake - enable or disable Wake on LAN
5132	* @pf: pointer to the PF struct
5133	*
5134	* Simple helper for WoL control
5135	*/
5136	static void ice_set_wake(struct ice_pf *pf)
5137	{
5138	struct ice_hw *hw = &pf->hw;
5139	bool wol = pf->wol_ena;
5140
5141	/ clear wake state, otherwise new wake events won't fire /
5142	wr32(hw, PFPM_WUS, U32_MAX);
5143
5144	/ enable / disable APM wake up, no RMW needed /
5145	wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : `0`);
5146
5147	/ set magic packet filter enabled /
5148	wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : `0`);
5149	}
5150
5151	/**
5152	* ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
5153	* @pf: pointer to the PF struct
5154	*
5155	* Issue firmware command to enable multicast magic wake, making
5156	* sure that any locally administered address (LAA) is used for
5157	* wake, and that PF reset doesn't undo the LAA.
5158	*/
5159	static void ice_setup_mc_magic_wake(struct ice_pf *pf)
5160	{
5161	struct device *dev = ice_pf_to_dev(pf);
5162	struct ice_hw *hw = &pf->hw;
5163	u8 mac_addr[ETH_ALEN];
5164	struct ice_vsi *vsi;
5165	int status;
5166	u8 flags;
5167
5168	if (!pf->wol_ena)
5169	return;
5170
5171	vsi = ice_get_main_vsi(pf);
5172	if (!vsi)
5173	return;
5174
5175	/ Get current MAC address in case it's an LAA /
5176	if (vsi->netdev)
5177	ether_addr_copy(dst: mac_addr, src: vsi->netdev->dev_addr);
5178	else
5179	ether_addr_copy(dst: mac_addr, src: vsi->port_info->mac.perm_addr);
5180
5181	flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN \|
5182	ICE_AQC_MAN_MAC_UPDATE_LAA_WOL \|
5183	ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
5184
5185	status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
5186	if (status)
5187	dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
5188	status, ice_aq_str(hw->adminq.sq_last_status));
5189	}
5190
5191	/**
5192	* ice_remove - Device removal routine
5193	* @pdev: PCI device information struct
5194	*/
5195	static void ice_remove(struct pci_dev *pdev)
5196	{
5197	struct ice_pf *pf = pci_get_drvdata(pdev);
5198	int i;
5199
5200	for (i = `0`; i < ICE_MAX_RESET_WAIT; i++) {
5201	if (!ice_is_reset_in_progress(state: pf->state))
5202	break;
5203	msleep(msecs: `100`);
5204	}
5205
5206	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
5207	set_bit(nr: ICE_VF_RESETS_DISABLED, addr: pf->state);
5208	ice_free_vfs(pf);
5209	}
5210
5211	ice_service_task_stop(pf);
5212	ice_aq_cancel_waiting_tasks(pf);
5213	set_bit(nr: ICE_DOWN, addr: pf->state);
5214
5215	if (!ice_is_safe_mode(pf))
5216	ice_remove_arfs(pf);
5217	ice_deinit_features(pf);
5218	ice_deinit_devlink(pf);
5219	ice_deinit_rdma(pf);
5220	ice_deinit_eth(pf);
5221	ice_deinit(pf);
5222
5223	ice_vsi_release_all(pf);
5224
5225	ice_setup_mc_magic_wake(pf);
5226	ice_set_wake(pf);
5227
5228	pci_disable_device(dev: pdev);
5229	}
5230
5231	/**
5232	* ice_shutdown - PCI callback for shutting down device
5233	* @pdev: PCI device information struct
5234	*/
5235	static void ice_shutdown(struct pci_dev *pdev)
5236	{
5237	struct ice_pf *pf = pci_get_drvdata(pdev);
5238
5239	ice_remove(pdev);
5240
5241	if (system_state == SYSTEM_POWER_OFF) {
5242	pci_wake_from_d3(dev: pdev, enable: pf->wol_ena);
5243	pci_set_power_state(dev: pdev, PCI_D3hot);
5244	}
5245	}
5246
5247	#ifdef CONFIG_PM
5248	/**
5249	* ice_prepare_for_shutdown - prep for PCI shutdown
5250	* @pf: board private structure
5251	*
5252	* Inform or close all dependent features in prep for PCI device shutdown
5253	*/
5254	static void ice_prepare_for_shutdown(struct ice_pf *pf)
5255	{
5256	struct ice_hw *hw = &pf->hw;
5257	u32 v;
5258
5259	/ Notify VFs of impending reset /
5260	if (ice_check_sq_alive(hw, cq: &hw->mailboxq))
5261	ice_vc_notify_reset(pf);
5262
5263	dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
5264
5265	/ disable the VSIs and their queues that are not already DOWN /
5266	ice_pf_dis_all_vsi(pf, locked: false);
5267
5268	ice_for_each_vsi(pf, v)
5269	if (pf->vsi[v])
5270	pf->vsi[v]->vsi_num = `0`;
5271
5272	ice_shutdown_all_ctrlq(hw);
5273	}
5274
5275	/**
5276	* ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
5277	* @pf: board private structure to reinitialize
5278	*
5279	* This routine reinitialize interrupt scheme that was cleared during
5280	* power management suspend callback.
5281	*
5282	* This should be called during resume routine to re-allocate the q_vectors
5283	* and reacquire interrupts.
5284	*/
5285	static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
5286	{
5287	struct device *dev = ice_pf_to_dev(pf);
5288	int ret, v;
5289
5290	/ Since we clear MSIX flag during suspend, we need to*
5291	* set it back during resume...
5292	*/
5293
5294	ret = ice_init_interrupt_scheme(pf);
5295	if (ret) {
5296	dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
5297	return ret;
5298	}
5299
5300	/ Remap vectors and rings, after successful re-init interrupts /
5301	ice_for_each_vsi(pf, v) {
5302	if (!pf->vsi[v])
5303	continue;
5304
5305	ret = ice_vsi_alloc_q_vectors(vsi: pf->vsi[v]);
5306	if (ret)
5307	goto err_reinit;
5308	ice_vsi_map_rings_to_vectors(vsi: pf->vsi[v]);
5309	}
5310
5311	ret = ice_req_irq_msix_misc(pf);
5312	if (ret) {
5313	dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
5314	ret);
5315	goto err_reinit;
5316	}
5317
5318	return `0`;
5319
5320	err_reinit:
5321	while (v--)
5322	if (pf->vsi[v])
5323	ice_vsi_free_q_vectors(vsi: pf->vsi[v]);
5324
5325	return ret;
5326	}
5327
5328	/**
5329	* ice_suspend
5330	* @dev: generic device information structure
5331	*
5332	* Power Management callback to quiesce the device and prepare
5333	* for D3 transition.
5334	*/
5335	static int __maybe_unused ice_suspend(struct device *dev)
5336	{
5337	struct pci_dev *pdev = to_pci_dev(dev);
5338	struct ice_pf *pf;
5339	int disabled, v;
5340
5341	pf = pci_get_drvdata(pdev);
5342
5343	if (!ice_pf_state_is_nominal(pf)) {
5344	dev_err(dev, "Device is not ready, no need to suspend it\n");
5345	return -EBUSY;
5346	}
5347
5348	/ Stop watchdog tasks until resume completion.*
5349	* Even though it is most likely that the service task is
5350	* disabled if the device is suspended or down, the service task's
5351	* state is controlled by a different state bit, and we should
5352	* store and honor whatever state that bit is in at this point.
5353	*/
5354	disabled = ice_service_task_stop(pf);
5355
5356	ice_unplug_aux_dev(pf);
5357
5358	/ Already suspended?, then there is nothing to do /
5359	if (test_and_set_bit(nr: ICE_SUSPENDED, addr: pf->state)) {
5360	if (!disabled)
5361	ice_service_task_restart(pf);
5362	return `0`;
5363	}
5364
5365	if (test_bit(ICE_DOWN, pf->state) \|\|
5366	ice_is_reset_in_progress(state: pf->state)) {
5367	dev_err(dev, "can't suspend device in reset or already down\n");
5368	if (!disabled)
5369	ice_service_task_restart(pf);
5370	return `0`;
5371	}
5372
5373	ice_setup_mc_magic_wake(pf);
5374
5375	ice_prepare_for_shutdown(pf);
5376
5377	ice_set_wake(pf);
5378
5379	/ Free vectors, clear the interrupt scheme and release IRQs*
5380	* for proper hibernation, especially with large number of CPUs.
5381	* Otherwise hibernation might fail when mapping all the vectors back
5382	* to CPU0.
5383	*/
5384	ice_free_irq_msix_misc(pf);
5385	ice_for_each_vsi(pf, v) {
5386	if (!pf->vsi[v])
5387	continue;
5388	ice_vsi_free_q_vectors(vsi: pf->vsi[v]);
5389	}
5390	ice_clear_interrupt_scheme(pf);
5391
5392	pci_save_state(dev: pdev);
5393	pci_wake_from_d3(dev: pdev, enable: pf->wol_ena);
5394	pci_set_power_state(dev: pdev, PCI_D3hot);
5395	return `0`;
5396	}
5397
5398	/**
5399	* ice_resume - PM callback for waking up from D3
5400	* @dev: generic device information structure
5401	*/
5402	static int __maybe_unused ice_resume(struct device *dev)
5403	{
5404	struct pci_dev *pdev = to_pci_dev(dev);
5405	enum ice_reset_req reset_type;
5406	struct ice_pf *pf;
5407	struct ice_hw *hw;
5408	int ret;
5409
5410	pci_set_power_state(dev: pdev, PCI_D0);
5411	pci_restore_state(dev: pdev);
5412	pci_save_state(dev: pdev);
5413
5414	if (!pci_device_is_present(pdev))
5415	return -ENODEV;
5416
5417	ret = pci_enable_device_mem(dev: pdev);
5418	if (ret) {
5419	dev_err(dev, "Cannot enable device after suspend\n");
5420	return ret;
5421	}
5422
5423	pf = pci_get_drvdata(pdev);
5424	hw = &pf->hw;
5425
5426	pf->wakeup_reason = rd32(hw, PFPM_WUS);
5427	ice_print_wake_reason(pf);
5428
5429	/ We cleared the interrupt scheme when we suspended, so we need to*
5430	* restore it now to resume device functionality.
5431	*/
5432	ret = ice_reinit_interrupt_scheme(pf);
5433	if (ret)
5434	dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
5435
5436	clear_bit(nr: ICE_DOWN, addr: pf->state);
5437	/ Now perform PF reset and rebuild /
5438	reset_type = ICE_RESET_PFR;
5439	/ re-enable service task for reset, but allow reset to schedule it /
5440	clear_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
5441
5442	if (ice_schedule_reset(pf, reset: reset_type))
5443	dev_err(dev, "Reset during resume failed.\n");
5444
5445	clear_bit(nr: ICE_SUSPENDED, addr: pf->state);
5446	ice_service_task_restart(pf);
5447
5448	/ Restart the service task /
5449	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
5450
5451	return `0`;
5452	}
5453	#endif /* CONFIG_PM */
5454
5455	/**
5456	* ice_pci_err_detected - warning that PCI error has been detected
5457	* @pdev: PCI device information struct
5458	* @err: the type of PCI error
5459	*
5460	* Called to warn that something happened on the PCI bus and the error handling
5461	* is in progress. Allows the driver to gracefully prepare/handle PCI errors.
5462	*/
5463	static pci_ers_result_t
5464	ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
5465	{
5466	struct ice_pf *pf = pci_get_drvdata(pdev);
5467
5468	if (!pf) {
5469	dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
5470	__func__, err);
5471	return PCI_ERS_RESULT_DISCONNECT;
5472	}
5473
5474	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5475	ice_service_task_stop(pf);
5476
5477	if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5478	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
5479	ice_prepare_for_reset(pf, reset_type: ICE_RESET_PFR);
5480	}
5481	}
5482
5483	return PCI_ERS_RESULT_NEED_RESET;
5484	}
5485
5486	/**
5487	* ice_pci_err_slot_reset - a PCI slot reset has just happened
5488	* @pdev: PCI device information struct
5489	*
5490	* Called to determine if the driver can recover from the PCI slot reset by
5491	* using a register read to determine if the device is recoverable.
5492	*/
5493	static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
5494	{
5495	struct ice_pf *pf = pci_get_drvdata(pdev);
5496	pci_ers_result_t result;
5497	int err;
5498	u32 reg;
5499
5500	err = pci_enable_device_mem(dev: pdev);
5501	if (err) {
5502	dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
5503	err);
5504	result = PCI_ERS_RESULT_DISCONNECT;
5505	} else {
5506	pci_set_master(dev: pdev);
5507	pci_restore_state(dev: pdev);
5508	pci_save_state(dev: pdev);
5509	pci_wake_from_d3(dev: pdev, enable: false);
5510
5511	/ Check for life /
5512	reg = rd32(&pf->hw, GLGEN_RTRIG);
5513	if (!reg)
5514	result = PCI_ERS_RESULT_RECOVERED;
5515	else
5516	result = PCI_ERS_RESULT_DISCONNECT;
5517	}
5518
5519	return result;
5520	}
5521
5522	/**
5523	* ice_pci_err_resume - restart operations after PCI error recovery
5524	* @pdev: PCI device information struct
5525	*
5526	* Called to allow the driver to bring things back up after PCI error and/or
5527	* reset recovery have finished
5528	*/
5529	static void ice_pci_err_resume(struct pci_dev *pdev)
5530	{
5531	struct ice_pf *pf = pci_get_drvdata(pdev);
5532
5533	if (!pf) {
5534	dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
5535	__func__);
5536	return;
5537	}
5538
5539	if (test_bit(ICE_SUSPENDED, pf->state)) {
5540	dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
5541	__func__);
5542	return;
5543	}
5544
5545	ice_restore_all_vfs_msi_state(pf);
5546
5547	ice_do_reset(pf, reset_type: ICE_RESET_PFR);
5548	ice_service_task_restart(pf);
5549	mod_timer(timer: &pf->serv_tmr, expires: round_jiffies(j: jiffies + pf->serv_tmr_period));
5550	}
5551
5552	/**
5553	* ice_pci_err_reset_prepare - prepare device driver for PCI reset
5554	* @pdev: PCI device information struct
5555	*/
5556	static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
5557	{
5558	struct ice_pf *pf = pci_get_drvdata(pdev);
5559
5560	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5561	ice_service_task_stop(pf);
5562
5563	if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5564	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
5565	ice_prepare_for_reset(pf, reset_type: ICE_RESET_PFR);
5566	}
5567	}
5568	}
5569
5570	/**
5571	* ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5572	* @pdev: PCI device information struct
5573	*/
5574	static void ice_pci_err_reset_done(struct pci_dev *pdev)
5575	{
5576	ice_pci_err_resume(pdev);
5577	}
5578
5579	/ ice_pci_tbl - PCI Device ID Table*
5580	*
5581	* Wildcard entries (PCI_ANY_ID) should come last
5582	* Last entry must be all 0s
5583	*
5584	* { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5585	* Class, Class Mask, private data (not used) }
5586	*/
5587	static const struct pci_device_id ice_pci_tbl[] = {
5588	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE) },
5589	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP) },
5590	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP) },
5591	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE) },
5592	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP) },
5593	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP) },
5594	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE) },
5595	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP) },
5596	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP) },
5597	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T) },
5598	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII) },
5599	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE) },
5600	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP) },
5601	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP) },
5602	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T) },
5603	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII) },
5604	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE) },
5605	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP) },
5606	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T) },
5607	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII) },
5608	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE) },
5609	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP) },
5610	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T) },
5611	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE) },
5612	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP) },
5613	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT) },
5614	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_BACKPLANE) },
5615	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_QSFP56) },
5616	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP) },
5617	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP_DD) },
5618	/ required last entry /
5619	{}
5620	};
5621	MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
5622
5623	static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
5624
5625	static const struct pci_error_handlers ice_pci_err_handler = {
5626	.error_detected = ice_pci_err_detected,
5627	.slot_reset = ice_pci_err_slot_reset,
5628	.reset_prepare = ice_pci_err_reset_prepare,
5629	.reset_done = ice_pci_err_reset_done,
5630	.resume = ice_pci_err_resume
5631	};
5632
5633	static struct pci_driver ice_driver = {
5634	.name = KBUILD_MODNAME,
5635	.id_table = ice_pci_tbl,
5636	.probe = ice_probe,
5637	.remove = ice_remove,
5638	#ifdef CONFIG_PM
5639	.driver.pm = &ice_pm_ops,
5640	#endif /* CONFIG_PM */
5641	.shutdown = ice_shutdown,
5642	.sriov_configure = ice_sriov_configure,
5643	.sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix,
5644	.sriov_set_msix_vec_count = ice_sriov_set_msix_vec_count,
5645	.err_handler = &ice_pci_err_handler
5646	};
5647
5648	/**
5649	* ice_module_init - Driver registration routine
5650	*
5651	* ice_module_init is the first routine called when the driver is
5652	* loaded. All it does is register with the PCI subsystem.
5653	*/
5654	static int __init ice_module_init(void)
5655	{
5656	int status = -ENOMEM;
5657
5658	pr_info("%s\n", ice_driver_string);
5659	pr_info("%s\n", ice_copyright);
5660
5661	ice_adv_lnk_speed_maps_init();
5662
5663	ice_wq = alloc_workqueue(fmt: "%s", flags: `0`, max_active: `0`, KBUILD_MODNAME);
5664	if (!ice_wq) {
5665	pr_err("Failed to create workqueue\n");
5666	return status;
5667	}
5668
5669	ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", `0`);
5670	if (!ice_lag_wq) {
5671	pr_err("Failed to create LAG workqueue\n");
5672	goto err_dest_wq;
5673	}
5674
5675	status = pci_register_driver(&ice_driver);
5676	if (status) {
5677	pr_err("failed to register PCI driver, err %d\n", status);
5678	goto err_dest_lag_wq;
5679	}
5680
5681	return `0`;
5682
5683	err_dest_lag_wq:
5684	destroy_workqueue(wq: ice_lag_wq);
5685	err_dest_wq:
5686	destroy_workqueue(wq: ice_wq);
5687	return status;
5688	}
5689	module_init(ice_module_init);
5690
5691	/**
5692	* ice_module_exit - Driver exit cleanup routine
5693	*
5694	* ice_module_exit is called just before the driver is removed
5695	* from memory.
5696	*/
5697	static void __exit ice_module_exit(void)
5698	{
5699	pci_unregister_driver(dev: &ice_driver);
5700	destroy_workqueue(wq: ice_wq);
5701	destroy_workqueue(wq: ice_lag_wq);
5702	pr_info("module unloaded\n");
5703	}
5704	module_exit(ice_module_exit);
5705
5706	/**
5707	* ice_set_mac_address - NDO callback to set MAC address
5708	* @netdev: network interface device structure
5709	* @pi: pointer to an address structure
5710	*
5711	* Returns 0 on success, negative on failure
5712	*/
5713	static int ice_set_mac_address(struct net_device netdev, void* *pi)
5714	{
5715	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5716	struct ice_vsi *vsi = np->vsi;
5717	struct ice_pf *pf = vsi->back;
5718	struct ice_hw *hw = &pf->hw;
5719	struct sockaddr *addr = pi;
5720	u8 old_mac[ETH_ALEN];
5721	u8 flags = `0`;
5722	u8 *mac;
5723	int err;
5724
5725	mac = (u8 *)addr->sa_data;
5726
5727	if (!is_valid_ether_addr(addr: mac))
5728	return -EADDRNOTAVAIL;
5729
5730	if (test_bit(ICE_DOWN, pf->state) \|\|
5731	ice_is_reset_in_progress(state: pf->state)) {
5732	netdev_err(dev: netdev, format: "can't set mac %pM. device not ready\n",
5733	mac);
5734	return -EBUSY;
5735	}
5736
5737	if (ice_chnl_dmac_fltr_cnt(pf)) {
5738	netdev_err(dev: netdev, format: "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
5739	mac);
5740	return -EAGAIN;
5741	}
5742
5743	netif_addr_lock_bh(dev: netdev);
5744	ether_addr_copy(dst: old_mac, src: netdev->dev_addr);
5745	/ change the netdev's MAC address /
5746	eth_hw_addr_set(dev: netdev, addr: mac);
5747	netif_addr_unlock_bh(dev: netdev);
5748
5749	/ Clean up old MAC filter. Not an error if old filter doesn't exist /
5750	err = ice_fltr_remove_mac(vsi, mac: old_mac, action: ICE_FWD_TO_VSI);
5751	if (err && err != -ENOENT) {
5752	err = -EADDRNOTAVAIL;
5753	goto err_update_filters;
5754	}
5755
5756	/ Add filter for new MAC. If filter exists, return success /
5757	err = ice_fltr_add_mac(vsi, mac, action: ICE_FWD_TO_VSI);
5758	if (err == -EEXIST) {
5759	/ Although this MAC filter is already present in hardware it's*
5760	* possible in some cases (e.g. bonding) that dev_addr was
5761	* modified outside of the driver and needs to be restored back
5762	* to this value.
5763	*/
5764	netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
5765
5766	return `0`;
5767	} else if (err) {
5768	/ error if the new filter addition failed /
5769	err = -EADDRNOTAVAIL;
5770	}
5771
5772	err_update_filters:
5773	if (err) {
5774	netdev_err(dev: netdev, format: "can't set MAC %pM. filter update failed\n",
5775	mac);
5776	netif_addr_lock_bh(dev: netdev);
5777	eth_hw_addr_set(dev: netdev, addr: old_mac);
5778	netif_addr_unlock_bh(dev: netdev);
5779	return err;
5780	}
5781
5782	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
5783	netdev->dev_addr);
5784
5785	/ write new MAC address to the firmware /
5786	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
5787	err = ice_aq_manage_mac_write(hw, mac_addr: mac, flags, NULL);
5788	if (err) {
5789	netdev_err(dev: netdev, format: "can't set MAC %pM. write to firmware failed error %d\n",
5790	mac, err);
5791	}
5792	return `0`;
5793	}
5794
5795	/**
5796	* ice_set_rx_mode - NDO callback to set the netdev filters
5797	* @netdev: network interface device structure
5798	*/
5799	static void ice_set_rx_mode(struct net_device *netdev)
5800	{
5801	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5802	struct ice_vsi *vsi = np->vsi;
5803
5804	if (!vsi \|\| ice_is_switchdev_running(pf: vsi->back))
5805	return;
5806
5807	/ Set the flags to synchronize filters*
5808	* ndo_set_rx_mode may be triggered even without a change in netdev
5809	* flags
5810	*/
5811	set_bit(nr: ICE_VSI_UMAC_FLTR_CHANGED, addr: vsi->state);
5812	set_bit(nr: ICE_VSI_MMAC_FLTR_CHANGED, addr: vsi->state);
5813	set_bit(nr: ICE_FLAG_FLTR_SYNC, addr: vsi->back->flags);
5814
5815	/ schedule our worker thread which will take care of*
5816	* applying the new filter changes
5817	*/
5818	ice_service_task_schedule(pf: vsi->back);
5819	}
5820
5821	/**
5822	* ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
5823	* @netdev: network interface device structure
5824	* @queue_index: Queue ID
5825	* @maxrate: maximum bandwidth in Mbps
5826	*/
5827	static int
5828	ice_set_tx_maxrate(struct net_device netdev, int* queue_index, u32 maxrate)
5829	{
5830	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5831	struct ice_vsi *vsi = np->vsi;
5832	u16 q_handle;
5833	int status;
5834	u8 tc;
5835
5836	/ Validate maxrate requested is within permitted range /
5837	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / `1000`))) {
5838	netdev_err(dev: netdev, format: "Invalid max rate %d specified for the queue %d\n",
5839	maxrate, queue_index);
5840	return -EINVAL;
5841	}
5842
5843	q_handle = vsi->tx_rings[queue_index]->q_handle;
5844	tc = ice_dcb_get_tc(vsi, queue_index);
5845
5846	vsi = ice_locate_vsi_using_queue(vsi, queue: queue_index);
5847	if (!vsi) {
5848	netdev_err(dev: netdev, format: "Invalid VSI for given queue %d\n",
5849	queue_index);
5850	return -EINVAL;
5851	}
5852
5853	/ Set BW back to default, when user set maxrate to 0 /
5854	if (!maxrate)
5855	status = ice_cfg_q_bw_dflt_lmt(pi: vsi->port_info, vsi_handle: vsi->idx, tc,
5856	q_handle, rl_type: ICE_MAX_BW);
5857	else
5858	status = ice_cfg_q_bw_lmt(pi: vsi->port_info, vsi_handle: vsi->idx, tc,
5859	q_handle, rl_type: ICE_MAX_BW, bw: maxrate * `1000`);
5860	if (status)
5861	netdev_err(dev: netdev, format: "Unable to set Tx max rate, error %d\n",
5862	status);
5863
5864	return status;
5865	}
5866
5867	/**
5868	* ice_fdb_add - add an entry to the hardware database
5869	* @ndm: the input from the stack
5870	* @tb: pointer to array of nladdr (unused)
5871	* @dev: the net device pointer
5872	* @addr: the MAC address entry being added
5873	* @vid: VLAN ID
5874	* @flags: instructions from stack about fdb operation
5875	* @extack: netlink extended ack
5876	*/
5877	static int
5878	ice_fdb_add(struct ndmsg ndm, struct* nlattr __always_unused *tb[],
5879	struct net_device dev, const* unsigned char *addr, u16 vid,
5880	u16 flags, struct netlink_ext_ack __always_unused *extack)
5881	{
5882	int err;
5883
5884	if (vid) {
5885	netdev_err(dev, format: "VLANs aren't supported yet for dev_uc\|mc_add()\n");
5886	return -EINVAL;
5887	}
5888	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
5889	netdev_err(dev, format: "FDB only supports static addresses\n");
5890	return -EINVAL;
5891	}
5892
5893	if (is_unicast_ether_addr(addr) \|\| is_link_local_ether_addr(addr))
5894	err = dev_uc_add_excl(dev, addr);
5895	else if (is_multicast_ether_addr(addr))
5896	err = dev_mc_add_excl(dev, addr);
5897	else
5898	err = -EINVAL;
5899
5900	/ Only return duplicate errors if NLM_F_EXCL is set /
5901	if (err == -EEXIST && !(flags & NLM_F_EXCL))
5902	err = `0`;
5903
5904	return err;
5905	}
5906
5907	/**
5908	* ice_fdb_del - delete an entry from the hardware database
5909	* @ndm: the input from the stack
5910	* @tb: pointer to array of nladdr (unused)
5911	* @dev: the net device pointer
5912	* @addr: the MAC address entry being added
5913	* @vid: VLAN ID
5914	* @extack: netlink extended ack
5915	*/
5916	static int
5917	ice_fdb_del(struct ndmsg ndm, __always_unused struct* nlattr *tb[],
5918	struct net_device dev, const* unsigned char *addr,
5919	__always_unused u16 vid, struct netlink_ext_ack *extack)
5920	{
5921	int err;
5922
5923	if (ndm->ndm_state & NUD_PERMANENT) {
5924	netdev_err(dev, format: "FDB only supports static addresses\n");
5925	return -EINVAL;
5926	}
5927
5928	if (is_unicast_ether_addr(addr))
5929	err = dev_uc_del(dev, addr);
5930	else if (is_multicast_ether_addr(addr))
5931	err = dev_mc_del(dev, addr);
5932	else
5933	err = -EINVAL;
5934
5935	return err;
5936	}
5937
5938	#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX \| \
5939	NETIF_F_HW_VLAN_CTAG_TX \| \
5940	NETIF_F_HW_VLAN_STAG_RX \| \
5941	NETIF_F_HW_VLAN_STAG_TX)
5942
5943	#define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX \| \
5944	NETIF_F_HW_VLAN_STAG_RX)
5945
5946	#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER \| \
5947	NETIF_F_HW_VLAN_STAG_FILTER)
5948
5949	/**
5950	* ice_fix_features - fix the netdev features flags based on device limitations
5951	* @netdev: ptr to the netdev that flags are being fixed on
5952	* @features: features that need to be checked and possibly fixed
5953	*
5954	* Make sure any fixups are made to features in this callback. This enables the
5955	* driver to not have to check unsupported configurations throughout the driver
5956	* because that's the responsiblity of this callback.
5957	*
5958	* Single VLAN Mode (SVM) Supported Features:
5959	* NETIF_F_HW_VLAN_CTAG_FILTER
5960	* NETIF_F_HW_VLAN_CTAG_RX
5961	* NETIF_F_HW_VLAN_CTAG_TX
5962	*
5963	* Double VLAN Mode (DVM) Supported Features:
5964	* NETIF_F_HW_VLAN_CTAG_FILTER
5965	* NETIF_F_HW_VLAN_CTAG_RX
5966	* NETIF_F_HW_VLAN_CTAG_TX
5967	*
5968	* NETIF_F_HW_VLAN_STAG_FILTER
5969	* NETIF_HW_VLAN_STAG_RX
5970	* NETIF_HW_VLAN_STAG_TX
5971	*
5972	* Features that need fixing:
5973	* Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
5974	* These are mutually exlusive as the VSI context cannot support multiple
5975	* VLAN ethertypes simultaneously for stripping and/or insertion. If this
5976	* is not done, then default to clearing the requested STAG offload
5977	* settings.
5978	*
5979	* All supported filtering has to be enabled or disabled together. For
5980	* example, in DVM, CTAG and STAG filtering have to be enabled and disabled
5981	* together. If this is not done, then default to VLAN filtering disabled.
5982	* These are mutually exclusive as there is currently no way to
5983	* enable/disable VLAN filtering based on VLAN ethertype when using VLAN
5984	* prune rules.
5985	*/
5986	static netdev_features_t
5987	ice_fix_features(struct net_device *netdev, netdev_features_t features)
5988	{
5989	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
5990	netdev_features_t req_vlan_fltr, cur_vlan_fltr;
5991	bool cur_ctag, cur_stag, req_ctag, req_stag;
5992
5993	cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
5994	cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
5995	cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
5996
5997	req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
5998	req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
5999	req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
6000
6001	if (req_vlan_fltr != cur_vlan_fltr) {
6002	if (ice_is_dvm_ena(hw: &np->vsi->back->hw)) {
6003	if (req_ctag && req_stag) {
6004	features \|= NETIF_VLAN_FILTERING_FEATURES;
6005	} else if (!req_ctag && !req_stag) {
6006	features &= ~NETIF_VLAN_FILTERING_FEATURES;
6007	} else if ((!cur_ctag && req_ctag && !cur_stag) \|\|
6008	(!cur_stag && req_stag && !cur_ctag)) {
6009	features \|= NETIF_VLAN_FILTERING_FEATURES;
6010	netdev_warn(dev: netdev, format: "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
6011	} else if ((cur_ctag && !req_ctag && cur_stag) \|\|
6012	(cur_stag && !req_stag && cur_ctag)) {
6013	features &= ~NETIF_VLAN_FILTERING_FEATURES;
6014	netdev_warn(dev: netdev, format: "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
6015	}
6016	} else {
6017	if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
6018	netdev_warn(dev: netdev, format: "cannot support requested 802.1ad filtering setting in SVM mode\n");
6019
6020	if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
6021	features \|= NETIF_F_HW_VLAN_CTAG_FILTER;
6022	}
6023	}
6024
6025	if ((features & (NETIF_F_HW_VLAN_CTAG_RX \| NETIF_F_HW_VLAN_CTAG_TX)) &&
6026	(features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_STAG_TX))) {
6027	netdev_warn(dev: netdev, format: "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
6028	features &= ~(NETIF_F_HW_VLAN_STAG_RX \|
6029	NETIF_F_HW_VLAN_STAG_TX);
6030	}
6031
6032	if (!(netdev->features & NETIF_F_RXFCS) &&
6033	(features & NETIF_F_RXFCS) &&
6034	(features & NETIF_VLAN_STRIPPING_FEATURES) &&
6035	!ice_vsi_has_non_zero_vlans(vsi: np->vsi)) {
6036	netdev_warn(dev: netdev, format: "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
6037	features &= ~NETIF_VLAN_STRIPPING_FEATURES;
6038	}
6039
6040	return features;
6041	}
6042
6043	/**
6044	* ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
6045	* @vsi: PF's VSI
6046	* @features: features used to determine VLAN offload settings
6047	*
6048	* First, determine the vlan_ethertype based on the VLAN offload bits in
6049	* features. Then determine if stripping and insertion should be enabled or
6050	* disabled. Finally enable or disable VLAN stripping and insertion.
6051	*/
6052	static int
6053	ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
6054	{
6055	bool enable_stripping = true, enable_insertion = true;
6056	struct ice_vsi_vlan_ops *vlan_ops;
6057	int strip_err = `0`, insert_err = `0`;
6058	u16 vlan_ethertype = `0`;
6059
6060	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6061
6062	if (features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_STAG_TX))
6063	vlan_ethertype = ETH_P_8021AD;
6064	else if (features & (NETIF_F_HW_VLAN_CTAG_RX \| NETIF_F_HW_VLAN_CTAG_TX))
6065	vlan_ethertype = ETH_P_8021Q;
6066
6067	if (!(features & (NETIF_F_HW_VLAN_STAG_RX \| NETIF_F_HW_VLAN_CTAG_RX)))
6068	enable_stripping = false;
6069	if (!(features & (NETIF_F_HW_VLAN_STAG_TX \| NETIF_F_HW_VLAN_CTAG_TX)))
6070	enable_insertion = false;
6071
6072	if (enable_stripping)
6073	strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
6074	else
6075	strip_err = vlan_ops->dis_stripping(vsi);
6076
6077	if (enable_insertion)
6078	insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
6079	else
6080	insert_err = vlan_ops->dis_insertion(vsi);
6081
6082	if (strip_err \|\| insert_err)
6083	return -EIO;
6084
6085	return `0`;
6086	}
6087
6088	/**
6089	* ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
6090	* @vsi: PF's VSI
6091	* @features: features used to determine VLAN filtering settings
6092	*
6093	* Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
6094	* features.
6095	*/
6096	static int
6097	ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
6098	{
6099	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6100	int err = `0`;
6101
6102	/ support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking*
6103	* if either bit is set
6104	*/
6105	if (features &
6106	(NETIF_F_HW_VLAN_CTAG_FILTER \| NETIF_F_HW_VLAN_STAG_FILTER))
6107	err = vlan_ops->ena_rx_filtering(vsi);
6108	else
6109	err = vlan_ops->dis_rx_filtering(vsi);
6110
6111	return err;
6112	}
6113
6114	/**
6115	* ice_set_vlan_features - set VLAN settings based on suggested feature set
6116	* @netdev: ptr to the netdev being adjusted
6117	* @features: the feature set that the stack is suggesting
6118	*
6119	* Only update VLAN settings if the requested_vlan_features are different than
6120	* the current_vlan_features.
6121	*/
6122	static int
6123	ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
6124	{
6125	netdev_features_t current_vlan_features, requested_vlan_features;
6126	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6127	struct ice_vsi *vsi = np->vsi;
6128	int err;
6129
6130	current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
6131	requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
6132	if (current_vlan_features ^ requested_vlan_features) {
6133	if ((features & NETIF_F_RXFCS) &&
6134	(features & NETIF_VLAN_STRIPPING_FEATURES)) {
6135	dev_err(ice_pf_to_dev(vsi->back),
6136	"To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
6137	return -EIO;
6138	}
6139
6140	err = ice_set_vlan_offload_features(vsi, features);
6141	if (err)
6142	return err;
6143	}
6144
6145	current_vlan_features = netdev->features &
6146	NETIF_VLAN_FILTERING_FEATURES;
6147	requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
6148	if (current_vlan_features ^ requested_vlan_features) {
6149	err = ice_set_vlan_filtering_features(vsi, features);
6150	if (err)
6151	return err;
6152	}
6153
6154	return `0`;
6155	}
6156
6157	/**
6158	* ice_set_loopback - turn on/off loopback mode on underlying PF
6159	* @vsi: ptr to VSI
6160	* @ena: flag to indicate the on/off setting
6161	*/
6162	static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
6163	{
6164	bool if_running = netif_running(dev: vsi->netdev);
6165	int ret;
6166
6167	if (if_running && !test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state)) {
6168	ret = ice_down(vsi);
6169	if (ret) {
6170	netdev_err(dev: vsi->netdev, format: "Preparing device to toggle loopback failed\n");
6171	return ret;
6172	}
6173	}
6174	ret = ice_aq_set_mac_loopback(hw: &vsi->back->hw, ena_lpbk: ena, NULL);
6175	if (ret)
6176	netdev_err(dev: vsi->netdev, format: "Failed to toggle loopback state\n");
6177	if (if_running)
6178	ret = ice_up(vsi);
6179
6180	return ret;
6181	}
6182
6183	/**
6184	* ice_set_features - set the netdev feature flags
6185	* @netdev: ptr to the netdev being adjusted
6186	* @features: the feature set that the stack is suggesting
6187	*/
6188	static int
6189	ice_set_features(struct net_device *netdev, netdev_features_t features)
6190	{
6191	netdev_features_t changed = netdev->features ^ features;
6192	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6193	struct ice_vsi *vsi = np->vsi;
6194	struct ice_pf *pf = vsi->back;
6195	int ret = `0`;
6196
6197	/ Don't set any netdev advanced features with device in Safe Mode /
6198	if (ice_is_safe_mode(pf)) {
6199	dev_err(ice_pf_to_dev(pf),
6200	"Device is in Safe Mode - not enabling advanced netdev features\n");
6201	return ret;
6202	}
6203
6204	/ Do not change setting during reset /
6205	if (ice_is_reset_in_progress(state: pf->state)) {
6206	dev_err(ice_pf_to_dev(pf),
6207	"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
6208	return -EBUSY;
6209	}
6210
6211	/ Multiple features can be changed in one call so keep features in*
6212	* separate if/else statements to guarantee each feature is checked
6213	*/
6214	if (changed & NETIF_F_RXHASH)
6215	ice_vsi_manage_rss_lut(vsi, ena: !!(features & NETIF_F_RXHASH));
6216
6217	ret = ice_set_vlan_features(netdev, features);
6218	if (ret)
6219	return ret;
6220
6221	/ Turn on receive of FCS aka CRC, and after setting this*
6222	* flag the packet data will have the 4 byte CRC appended
6223	*/
6224	if (changed & NETIF_F_RXFCS) {
6225	if ((features & NETIF_F_RXFCS) &&
6226	(features & NETIF_VLAN_STRIPPING_FEATURES)) {
6227	dev_err(ice_pf_to_dev(vsi->back),
6228	"To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
6229	return -EIO;
6230	}
6231
6232	ice_vsi_cfg_crc_strip(vsi, disable: !!(features & NETIF_F_RXFCS));
6233	ret = ice_down_up(vsi);
6234	if (ret)
6235	return ret;
6236	}
6237
6238	if (changed & NETIF_F_NTUPLE) {
6239	bool ena = !!(features & NETIF_F_NTUPLE);
6240
6241	ice_vsi_manage_fdir(vsi, ena);
6242	ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
6243	}
6244
6245	/ don't turn off hw_tc_offload when ADQ is already enabled /
6246	if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
6247	dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
6248	return -EACCES;
6249	}
6250
6251	if (changed & NETIF_F_HW_TC) {
6252	bool ena = !!(features & NETIF_F_HW_TC);
6253
6254	ena ? set_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags) :
6255	clear_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags);
6256	}
6257
6258	if (changed & NETIF_F_LOOPBACK)
6259	ret = ice_set_loopback(vsi, ena: !!(features & NETIF_F_LOOPBACK));
6260
6261	return ret;
6262	}
6263
6264	/**
6265	* ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
6266	* @vsi: VSI to setup VLAN properties for
6267	*/
6268	static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
6269	{
6270	int err;
6271
6272	err = ice_set_vlan_offload_features(vsi, features: vsi->netdev->features);
6273	if (err)
6274	return err;
6275
6276	err = ice_set_vlan_filtering_features(vsi, features: vsi->netdev->features);
6277	if (err)
6278	return err;
6279
6280	return ice_vsi_add_vlan_zero(vsi);
6281	}
6282
6283	/**
6284	* ice_vsi_cfg_lan - Setup the VSI lan related config
6285	* @vsi: the VSI being configured
6286	*
6287	* Return 0 on success and negative value on error
6288	*/
6289	int ice_vsi_cfg_lan(struct ice_vsi *vsi)
6290	{
6291	int err;
6292
6293	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
6294	ice_set_rx_mode(netdev: vsi->netdev);
6295
6296	err = ice_vsi_vlan_setup(vsi);
6297	if (err)
6298	return err;
6299	}
6300	ice_vsi_cfg_dcb_rings(vsi);
6301
6302	err = ice_vsi_cfg_lan_txqs(vsi);
6303	if (!err && ice_is_xdp_ena_vsi(vsi))
6304	err = ice_vsi_cfg_xdp_txqs(vsi);
6305	if (!err)
6306	err = ice_vsi_cfg_rxqs(vsi);
6307
6308	return err;
6309	}
6310
6311	/ THEORY OF MODERATION:*
6312	* The ice driver hardware works differently than the hardware that DIMLIB was
6313	* originally made for. ice hardware doesn't have packet count limits that
6314	* can trigger an interrupt, but it does have interrupt rate limit support,
6315	* which is hard-coded to a limit of 250,000 ints/second.
6316	* If not using dynamic moderation, the INTRL value can be modified
6317	* by ethtool rx-usecs-high.
6318	*/
6319	struct ice_dim {
6320	/ the throttle rate for interrupts, basically worst case delay before*
6321	* an initial interrupt fires, value is stored in microseconds.
6322	*/
6323	u16 itr;
6324	};
6325
6326	/ Make a different profile for Rx that doesn't allow quite so aggressive*
6327	* moderation at the high end (it maxes out at 126us or about 8k interrupts a
6328	* second.
6329	*/
6330	static const struct ice_dim rx_profile[] = {
6331	{`2`}, / 500,000 ints/s, capped at 250K by INTRL /
6332	{`8`}, / 125,000 ints/s /
6333	{`16`}, / 62,500 ints/s /
6334	{`62`}, / 16,129 ints/s /
6335	{`126`} / 7,936 ints/s /
6336	};
6337
6338	/ The transmit profile, which has the same sorts of values*
6339	* as the previous struct
6340	*/
6341	static const struct ice_dim tx_profile[] = {
6342	{`2`}, / 500,000 ints/s, capped at 250K by INTRL /
6343	{`8`}, / 125,000 ints/s /
6344	{`40`}, / 16,125 ints/s /
6345	{`128`}, / 7,812 ints/s /
6346	{`256`} / 3,906 ints/s /
6347	};
6348
6349	static void ice_tx_dim_work(struct work_struct *work)
6350	{
6351	struct ice_ring_container *rc;
6352	struct dim *dim;
6353	u16 itr;
6354
6355	dim = container_of(work, struct dim, work);
6356	rc = dim->priv;
6357
6358	WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
6359
6360	/ look up the values in our local table /
6361	itr = tx_profile[dim->profile_ix].itr;
6362
6363	ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
6364	ice_write_itr(rc, itr);
6365
6366	dim->state = DIM_START_MEASURE;
6367	}
6368
6369	static void ice_rx_dim_work(struct work_struct *work)
6370	{
6371	struct ice_ring_container *rc;
6372	struct dim *dim;
6373	u16 itr;
6374
6375	dim = container_of(work, struct dim, work);
6376	rc = dim->priv;
6377
6378	WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
6379
6380	/ look up the values in our local table /
6381	itr = rx_profile[dim->profile_ix].itr;
6382
6383	ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
6384	ice_write_itr(rc, itr);
6385
6386	dim->state = DIM_START_MEASURE;
6387	}
6388
6389	#define ICE_DIM_DEFAULT_PROFILE_IX 1
6390
6391	/**
6392	* ice_init_moderation - set up interrupt moderation
6393	* @q_vector: the vector containing rings to be configured
6394	*
6395	* Set up interrupt moderation registers, with the intent to do the right thing
6396	* when called from reset or from probe, and whether or not dynamic moderation
6397	* is enabled or not. Take special care to write all the registers in both
6398	* dynamic moderation mode or not in order to make sure hardware is in a known
6399	* state.
6400	*/
6401	static void ice_init_moderation(struct ice_q_vector *q_vector)
6402	{
6403	struct ice_ring_container *rc;
6404	bool tx_dynamic, rx_dynamic;
6405
6406	rc = &q_vector->tx;
6407	INIT_WORK(&rc->dim.work, ice_tx_dim_work);
6408	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6409	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6410	rc->dim.priv = rc;
6411	tx_dynamic = ITR_IS_DYNAMIC(rc);
6412
6413	/ set the initial TX ITR to match the above /
6414	ice_write_itr(rc, itr: tx_dynamic ?
6415	tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
6416
6417	rc = &q_vector->rx;
6418	INIT_WORK(&rc->dim.work, ice_rx_dim_work);
6419	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6420	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6421	rc->dim.priv = rc;
6422	rx_dynamic = ITR_IS_DYNAMIC(rc);
6423
6424	/ set the initial RX ITR to match the above /
6425	ice_write_itr(rc, itr: rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
6426	rc->itr_setting);
6427
6428	ice_set_q_vector_intrl(q_vector);
6429	}
6430
6431	/**
6432	* ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
6433	* @vsi: the VSI being configured
6434	*/
6435	static void ice_napi_enable_all(struct ice_vsi *vsi)
6436	{
6437	int q_idx;
6438
6439	if (!vsi->netdev)
6440	return;
6441
6442	ice_for_each_q_vector(vsi, q_idx) {
6443	struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6444
6445	ice_init_moderation(q_vector);
6446
6447	if (q_vector->rx.rx_ring \|\| q_vector->tx.tx_ring)
6448	napi_enable(n: &q_vector->napi);
6449	}
6450	}
6451
6452	/**
6453	* ice_up_complete - Finish the last steps of bringing up a connection
6454	* @vsi: The VSI being configured
6455	*
6456	* Return 0 on success and negative value on error
6457	*/
6458	static int ice_up_complete(struct ice_vsi *vsi)
6459	{
6460	struct ice_pf *pf = vsi->back;
6461	int err;
6462
6463	ice_vsi_cfg_msix(vsi);
6464
6465	/ Enable only Rx rings, Tx rings were enabled by the FW when the*
6466	* Tx queue group list was configured and the context bits were
6467	* programmed using ice_vsi_cfg_txqs
6468	*/
6469	err = ice_vsi_start_all_rx_rings(vsi);
6470	if (err)
6471	return err;
6472
6473	clear_bit(nr: ICE_VSI_DOWN, addr: vsi->state);
6474	ice_napi_enable_all(vsi);
6475	ice_vsi_ena_irq(vsi);
6476
6477	if (vsi->port_info &&
6478	(vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
6479	vsi->netdev && vsi->type == ICE_VSI_PF) {
6480	ice_print_link_msg(vsi, isup: true);
6481	netif_tx_start_all_queues(dev: vsi->netdev);
6482	netif_carrier_on(dev: vsi->netdev);
6483	ice_ptp_link_change(pf, port: pf->hw.pf_id, linkup: true);
6484	}
6485
6486	/ Perform an initial read of the statistics registers now to*
6487	* set the baseline so counters are ready when interface is up
6488	*/
6489	ice_update_eth_stats(vsi);
6490
6491	if (vsi->type == ICE_VSI_PF)
6492	ice_service_task_schedule(pf);
6493
6494	return `0`;
6495	}
6496
6497	/**
6498	* ice_up - Bring the connection back up after being down
6499	* @vsi: VSI being configured
6500	*/
6501	int ice_up(struct ice_vsi *vsi)
6502	{
6503	int err;
6504
6505	err = ice_vsi_cfg_lan(vsi);
6506	if (!err)
6507	err = ice_up_complete(vsi);
6508
6509	return err;
6510	}
6511
6512	/**
6513	* ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
6514	* @syncp: pointer to u64_stats_sync
6515	* @stats: stats that pkts and bytes count will be taken from
6516	* @pkts: packets stats counter
6517	* @bytes: bytes stats counter
6518	*
6519	* This function fetches stats from the ring considering the atomic operations
6520	* that needs to be performed to read u64 values in 32 bit machine.
6521	*/
6522	void
6523	ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
6524	struct ice_q_stats stats, u64 pkts, u64 bytes)
6525	{
6526	unsigned int start;
6527
6528	do {
6529	start = u64_stats_fetch_begin(syncp);
6530	*pkts = stats.pkts;
6531	*bytes = stats.bytes;
6532	} while (u64_stats_fetch_retry(syncp, start));
6533	}
6534
6535	/**
6536	* ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
6537	* @vsi: the VSI to be updated
6538	* @vsi_stats: the stats struct to be updated
6539	* @rings: rings to work on
6540	* @count: number of rings
6541	*/
6542	static void
6543	ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
6544	struct rtnl_link_stats64 *vsi_stats,
6545	struct ice_tx_ring **rings, u16 count)
6546	{
6547	u16 i;
6548
6549	for (i = `0`; i < count; i++) {
6550	struct ice_tx_ring *ring;
6551	u64 pkts = `0`, bytes = `0`;
6552
6553	ring = READ_ONCE(rings[i]);
6554	if (!ring \|\| !ring->ring_stats)
6555	continue;
6556	ice_fetch_u64_stats_per_ring(syncp: &ring->ring_stats->syncp,
6557	stats: ring->ring_stats->stats, pkts: &pkts,
6558	bytes: &bytes);
6559	vsi_stats->tx_packets += pkts;
6560	vsi_stats->tx_bytes += bytes;
6561	vsi->tx_restart += ring->ring_stats->tx_stats.restart_q;
6562	vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy;
6563	vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize;
6564	}
6565	}
6566
6567	/**
6568	* ice_update_vsi_ring_stats - Update VSI stats counters
6569	* @vsi: the VSI to be updated
6570	*/
6571	static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
6572	{
6573	struct rtnl_link_stats64 net_stats, stats_prev;
6574	struct rtnl_link_stats64 *vsi_stats;
6575	u64 pkts, bytes;
6576	int i;
6577
6578	vsi_stats = kzalloc(size: sizeof(*vsi_stats), GFP_ATOMIC);
6579	if (!vsi_stats)
6580	return;
6581
6582	/ reset non-netdev (extended) stats /
6583	vsi->tx_restart = `0`;
6584	vsi->tx_busy = `0`;
6585	vsi->tx_linearize = `0`;
6586	vsi->rx_buf_failed = `0`;
6587	vsi->rx_page_failed = `0`;
6588
6589	rcu_read_lock();
6590
6591	/ update Tx rings counters /
6592	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, rings: vsi->tx_rings,
6593	count: vsi->num_txq);
6594
6595	/ update Rx rings counters /
6596	ice_for_each_rxq(vsi, i) {
6597	struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]);
6598	struct ice_ring_stats *ring_stats;
6599
6600	ring_stats = ring->ring_stats;
6601	ice_fetch_u64_stats_per_ring(syncp: &ring_stats->syncp,
6602	stats: ring_stats->stats, pkts: &pkts,
6603	bytes: &bytes);
6604	vsi_stats->rx_packets += pkts;
6605	vsi_stats->rx_bytes += bytes;
6606	vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed;
6607	vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed;
6608	}
6609
6610	/ update XDP Tx rings counters /
6611	if (ice_is_xdp_ena_vsi(vsi))
6612	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, rings: vsi->xdp_rings,
6613	count: vsi->num_xdp_txq);
6614
6615	rcu_read_unlock();
6616
6617	net_stats = &vsi->net_stats;
6618	stats_prev = &vsi->net_stats_prev;
6619
6620	/ clear prev counters after reset /
6621	if (vsi_stats->tx_packets < stats_prev->tx_packets \|\|
6622	vsi_stats->rx_packets < stats_prev->rx_packets) {
6623	stats_prev->tx_packets = `0`;
6624	stats_prev->tx_bytes = `0`;
6625	stats_prev->rx_packets = `0`;
6626	stats_prev->rx_bytes = `0`;
6627	}
6628
6629	/ update netdev counters /
6630	net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
6631	net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
6632	net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
6633	net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
6634
6635	stats_prev->tx_packets = vsi_stats->tx_packets;
6636	stats_prev->tx_bytes = vsi_stats->tx_bytes;
6637	stats_prev->rx_packets = vsi_stats->rx_packets;
6638	stats_prev->rx_bytes = vsi_stats->rx_bytes;
6639
6640	kfree(objp: vsi_stats);
6641	}
6642
6643	/**
6644	* ice_update_vsi_stats - Update VSI stats counters
6645	* @vsi: the VSI to be updated
6646	*/
6647	void ice_update_vsi_stats(struct ice_vsi *vsi)
6648	{
6649	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
6650	struct ice_eth_stats *cur_es = &vsi->eth_stats;
6651	struct ice_pf *pf = vsi->back;
6652
6653	if (test_bit(ICE_VSI_DOWN, vsi->state) \|\|
6654	test_bit(ICE_CFG_BUSY, pf->state))
6655	return;
6656
6657	/ get stats as recorded by Tx/Rx rings /
6658	ice_update_vsi_ring_stats(vsi);
6659
6660	/ get VSI stats as recorded by the hardware /
6661	ice_update_eth_stats(vsi);
6662
6663	cur_ns->tx_errors = cur_es->tx_errors;
6664	cur_ns->rx_dropped = cur_es->rx_discards;
6665	cur_ns->tx_dropped = cur_es->tx_discards;
6666	cur_ns->multicast = cur_es->rx_multicast;
6667
6668	/ update some more netdev stats if this is main VSI /
6669	if (vsi->type == ICE_VSI_PF) {
6670	cur_ns->rx_crc_errors = pf->stats.crc_errors;
6671	cur_ns->rx_errors = pf->stats.crc_errors +
6672	pf->stats.illegal_bytes +
6673	pf->stats.rx_len_errors +
6674	pf->stats.rx_undersize +
6675	pf->hw_csum_rx_error +
6676	pf->stats.rx_jabber +
6677	pf->stats.rx_fragments +
6678	pf->stats.rx_oversize;
6679	cur_ns->rx_length_errors = pf->stats.rx_len_errors;
6680	/ record drops from the port level /
6681	cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
6682	}
6683	}
6684
6685	/**
6686	* ice_update_pf_stats - Update PF port stats counters
6687	* @pf: PF whose stats needs to be updated
6688	*/
6689	void ice_update_pf_stats(struct ice_pf *pf)
6690	{
6691	struct ice_hw_port_stats prev_ps, cur_ps;
6692	struct ice_hw *hw = &pf->hw;
6693	u16 fd_ctr_base;
6694	u8 port;
6695
6696	port = hw->port_info->lport;
6697	prev_ps = &pf->stats_prev;
6698	cur_ps = &pf->stats;
6699
6700	if (ice_is_reset_in_progress(state: pf->state))
6701	pf->stat_prev_loaded = false;
6702
6703	ice_stat_update40(hw, GLPRT_GORCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6704	prev_stat: &prev_ps->eth.rx_bytes,
6705	cur_stat: &cur_ps->eth.rx_bytes);
6706
6707	ice_stat_update40(hw, GLPRT_UPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6708	prev_stat: &prev_ps->eth.rx_unicast,
6709	cur_stat: &cur_ps->eth.rx_unicast);
6710
6711	ice_stat_update40(hw, GLPRT_MPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6712	prev_stat: &prev_ps->eth.rx_multicast,
6713	cur_stat: &cur_ps->eth.rx_multicast);
6714
6715	ice_stat_update40(hw, GLPRT_BPRCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6716	prev_stat: &prev_ps->eth.rx_broadcast,
6717	cur_stat: &cur_ps->eth.rx_broadcast);
6718
6719	ice_stat_update32(hw, PRTRPB_RDPC, prev_stat_loaded: pf->stat_prev_loaded,
6720	prev_stat: &prev_ps->eth.rx_discards,
6721	cur_stat: &cur_ps->eth.rx_discards);
6722
6723	ice_stat_update40(hw, GLPRT_GOTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6724	prev_stat: &prev_ps->eth.tx_bytes,
6725	cur_stat: &cur_ps->eth.tx_bytes);
6726
6727	ice_stat_update40(hw, GLPRT_UPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6728	prev_stat: &prev_ps->eth.tx_unicast,
6729	cur_stat: &cur_ps->eth.tx_unicast);
6730
6731	ice_stat_update40(hw, GLPRT_MPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6732	prev_stat: &prev_ps->eth.tx_multicast,
6733	cur_stat: &cur_ps->eth.tx_multicast);
6734
6735	ice_stat_update40(hw, GLPRT_BPTCL(port), prev_stat_loaded: pf->stat_prev_loaded,
6736	prev_stat: &prev_ps->eth.tx_broadcast,
6737	cur_stat: &cur_ps->eth.tx_broadcast);
6738
6739	ice_stat_update32(hw, GLPRT_TDOLD(port), prev_stat_loaded: pf->stat_prev_loaded,
6740	prev_stat: &prev_ps->tx_dropped_link_down,
6741	cur_stat: &cur_ps->tx_dropped_link_down);
6742
6743	ice_stat_update40(hw, GLPRT_PRC64L(port), prev_stat_loaded: pf->stat_prev_loaded,
6744	prev_stat: &prev_ps->rx_size_64, cur_stat: &cur_ps->rx_size_64);
6745
6746	ice_stat_update40(hw, GLPRT_PRC127L(port), prev_stat_loaded: pf->stat_prev_loaded,
6747	prev_stat: &prev_ps->rx_size_127, cur_stat: &cur_ps->rx_size_127);
6748
6749	ice_stat_update40(hw, GLPRT_PRC255L(port), prev_stat_loaded: pf->stat_prev_loaded,
6750	prev_stat: &prev_ps->rx_size_255, cur_stat: &cur_ps->rx_size_255);
6751
6752	ice_stat_update40(hw, GLPRT_PRC511L(port), prev_stat_loaded: pf->stat_prev_loaded,
6753	prev_stat: &prev_ps->rx_size_511, cur_stat: &cur_ps->rx_size_511);
6754
6755	ice_stat_update40(hw, GLPRT_PRC1023L(port), prev_stat_loaded: pf->stat_prev_loaded,
6756	prev_stat: &prev_ps->rx_size_1023, cur_stat: &cur_ps->rx_size_1023);
6757
6758	ice_stat_update40(hw, GLPRT_PRC1522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6759	prev_stat: &prev_ps->rx_size_1522, cur_stat: &cur_ps->rx_size_1522);
6760
6761	ice_stat_update40(hw, GLPRT_PRC9522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6762	prev_stat: &prev_ps->rx_size_big, cur_stat: &cur_ps->rx_size_big);
6763
6764	ice_stat_update40(hw, GLPRT_PTC64L(port), prev_stat_loaded: pf->stat_prev_loaded,
6765	prev_stat: &prev_ps->tx_size_64, cur_stat: &cur_ps->tx_size_64);
6766
6767	ice_stat_update40(hw, GLPRT_PTC127L(port), prev_stat_loaded: pf->stat_prev_loaded,
6768	prev_stat: &prev_ps->tx_size_127, cur_stat: &cur_ps->tx_size_127);
6769
6770	ice_stat_update40(hw, GLPRT_PTC255L(port), prev_stat_loaded: pf->stat_prev_loaded,
6771	prev_stat: &prev_ps->tx_size_255, cur_stat: &cur_ps->tx_size_255);
6772
6773	ice_stat_update40(hw, GLPRT_PTC511L(port), prev_stat_loaded: pf->stat_prev_loaded,
6774	prev_stat: &prev_ps->tx_size_511, cur_stat: &cur_ps->tx_size_511);
6775
6776	ice_stat_update40(hw, GLPRT_PTC1023L(port), prev_stat_loaded: pf->stat_prev_loaded,
6777	prev_stat: &prev_ps->tx_size_1023, cur_stat: &cur_ps->tx_size_1023);
6778
6779	ice_stat_update40(hw, GLPRT_PTC1522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6780	prev_stat: &prev_ps->tx_size_1522, cur_stat: &cur_ps->tx_size_1522);
6781
6782	ice_stat_update40(hw, GLPRT_PTC9522L(port), prev_stat_loaded: pf->stat_prev_loaded,
6783	prev_stat: &prev_ps->tx_size_big, cur_stat: &cur_ps->tx_size_big);
6784
6785	fd_ctr_base = hw->fd_ctr_base;
6786
6787	ice_stat_update40(hw,
6788	GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
6789	prev_stat_loaded: pf->stat_prev_loaded, prev_stat: &prev_ps->fd_sb_match,
6790	cur_stat: &cur_ps->fd_sb_match);
6791	ice_stat_update32(hw, GLPRT_LXONRXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6792	prev_stat: &prev_ps->link_xon_rx, cur_stat: &cur_ps->link_xon_rx);
6793
6794	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6795	prev_stat: &prev_ps->link_xoff_rx, cur_stat: &cur_ps->link_xoff_rx);
6796
6797	ice_stat_update32(hw, GLPRT_LXONTXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6798	prev_stat: &prev_ps->link_xon_tx, cur_stat: &cur_ps->link_xon_tx);
6799
6800	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), prev_stat_loaded: pf->stat_prev_loaded,
6801	prev_stat: &prev_ps->link_xoff_tx, cur_stat: &cur_ps->link_xoff_tx);
6802
6803	ice_update_dcb_stats(pf);
6804
6805	ice_stat_update32(hw, GLPRT_CRCERRS(port), prev_stat_loaded: pf->stat_prev_loaded,
6806	prev_stat: &prev_ps->crc_errors, cur_stat: &cur_ps->crc_errors);
6807
6808	ice_stat_update32(hw, GLPRT_ILLERRC(port), prev_stat_loaded: pf->stat_prev_loaded,
6809	prev_stat: &prev_ps->illegal_bytes, cur_stat: &cur_ps->illegal_bytes);
6810
6811	ice_stat_update32(hw, GLPRT_MLFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6812	prev_stat: &prev_ps->mac_local_faults,
6813	cur_stat: &cur_ps->mac_local_faults);
6814
6815	ice_stat_update32(hw, GLPRT_MRFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6816	prev_stat: &prev_ps->mac_remote_faults,
6817	cur_stat: &cur_ps->mac_remote_faults);
6818
6819	ice_stat_update32(hw, GLPRT_RLEC(port), prev_stat_loaded: pf->stat_prev_loaded,
6820	prev_stat: &prev_ps->rx_len_errors, cur_stat: &cur_ps->rx_len_errors);
6821
6822	ice_stat_update32(hw, GLPRT_RUC(port), prev_stat_loaded: pf->stat_prev_loaded,
6823	prev_stat: &prev_ps->rx_undersize, cur_stat: &cur_ps->rx_undersize);
6824
6825	ice_stat_update32(hw, GLPRT_RFC(port), prev_stat_loaded: pf->stat_prev_loaded,
6826	prev_stat: &prev_ps->rx_fragments, cur_stat: &cur_ps->rx_fragments);
6827
6828	ice_stat_update32(hw, GLPRT_ROC(port), prev_stat_loaded: pf->stat_prev_loaded,
6829	prev_stat: &prev_ps->rx_oversize, cur_stat: &cur_ps->rx_oversize);
6830
6831	ice_stat_update32(hw, GLPRT_RJC(port), prev_stat_loaded: pf->stat_prev_loaded,
6832	prev_stat: &prev_ps->rx_jabber, cur_stat: &cur_ps->rx_jabber);
6833
6834	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? `1` : `0`;
6835
6836	pf->stat_prev_loaded = true;
6837	}
6838
6839	/**
6840	* ice_get_stats64 - get statistics for network device structure
6841	* @netdev: network interface device structure
6842	* @stats: main device statistics structure
6843	*/
6844	static
6845	void ice_get_stats64(struct net_device netdev, struct* rtnl_link_stats64 *stats)
6846	{
6847	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
6848	struct rtnl_link_stats64 *vsi_stats;
6849	struct ice_vsi *vsi = np->vsi;
6850
6851	vsi_stats = &vsi->net_stats;
6852
6853	if (!vsi->num_txq \|\| !vsi->num_rxq)
6854	return;
6855
6856	/ netdev packet/byte stats come from ring counter. These are obtained*
6857	* by summing up ring counters (done by ice_update_vsi_ring_stats).
6858	* But, only call the update routine and read the registers if VSI is
6859	* not down.
6860	*/
6861	if (!test_bit(ICE_VSI_DOWN, vsi->state))
6862	ice_update_vsi_ring_stats(vsi);
6863	stats->tx_packets = vsi_stats->tx_packets;
6864	stats->tx_bytes = vsi_stats->tx_bytes;
6865	stats->rx_packets = vsi_stats->rx_packets;
6866	stats->rx_bytes = vsi_stats->rx_bytes;
6867
6868	/ The rest of the stats can be read from the hardware but instead we*
6869	* just return values that the watchdog task has already obtained from
6870	* the hardware.
6871	*/
6872	stats->multicast = vsi_stats->multicast;
6873	stats->tx_errors = vsi_stats->tx_errors;
6874	stats->tx_dropped = vsi_stats->tx_dropped;
6875	stats->rx_errors = vsi_stats->rx_errors;
6876	stats->rx_dropped = vsi_stats->rx_dropped;
6877	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
6878	stats->rx_length_errors = vsi_stats->rx_length_errors;
6879	}
6880
6881	/**
6882	* ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
6883	* @vsi: VSI having NAPI disabled
6884	*/
6885	static void ice_napi_disable_all(struct ice_vsi *vsi)
6886	{
6887	int q_idx;
6888
6889	if (!vsi->netdev)
6890	return;
6891
6892	ice_for_each_q_vector(vsi, q_idx) {
6893	struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6894
6895	if (q_vector->rx.rx_ring \|\| q_vector->tx.tx_ring)
6896	napi_disable(n: &q_vector->napi);
6897
6898	cancel_work_sync(work: &q_vector->tx.dim.work);
6899	cancel_work_sync(work: &q_vector->rx.dim.work);
6900	}
6901	}
6902
6903	/**
6904	* ice_down - Shutdown the connection
6905	* @vsi: The VSI being stopped
6906	*
6907	* Caller of this function is expected to set the vsi->state ICE_DOWN bit
6908	*/
6909	int ice_down(struct ice_vsi *vsi)
6910	{
6911	int i, tx_err, rx_err, vlan_err = `0`;
6912
6913	WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
6914
6915	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
6916	vlan_err = ice_vsi_del_vlan_zero(vsi);
6917	ice_ptp_link_change(pf: vsi->back, port: vsi->back->hw.pf_id, linkup: false);
6918	netif_carrier_off(dev: vsi->netdev);
6919	netif_tx_disable(dev: vsi->netdev);
6920	} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
6921	ice_eswitch_stop_all_tx_queues(pf: vsi->back);
6922	}
6923
6924	ice_vsi_dis_irq(vsi);
6925
6926	tx_err = ice_vsi_stop_lan_tx_rings(vsi, rst_src: ICE_NO_RESET, rel_vmvf_num: `0`);
6927	if (tx_err)
6928	netdev_err(dev: vsi->netdev, format: "Failed stop Tx rings, VSI %d error %d\n",
6929	vsi->vsi_num, tx_err);
6930	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
6931	tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
6932	if (tx_err)
6933	netdev_err(dev: vsi->netdev, format: "Failed stop XDP rings, VSI %d error %d\n",
6934	vsi->vsi_num, tx_err);
6935	}
6936
6937	rx_err = ice_vsi_stop_all_rx_rings(vsi);
6938	if (rx_err)
6939	netdev_err(dev: vsi->netdev, format: "Failed stop Rx rings, VSI %d error %d\n",
6940	vsi->vsi_num, rx_err);
6941
6942	ice_napi_disable_all(vsi);
6943
6944	ice_for_each_txq(vsi, i)
6945	ice_clean_tx_ring(tx_ring: vsi->tx_rings[i]);
6946
6947	if (ice_is_xdp_ena_vsi(vsi))
6948	ice_for_each_xdp_txq(vsi, i)
6949	ice_clean_tx_ring(tx_ring: vsi->xdp_rings[i]);
6950
6951	ice_for_each_rxq(vsi, i)
6952	ice_clean_rx_ring(rx_ring: vsi->rx_rings[i]);
6953
6954	if (tx_err \|\| rx_err \|\| vlan_err) {
6955	netdev_err(dev: vsi->netdev, format: "Failed to close VSI 0x%04X on switch 0x%04X\n",
6956	vsi->vsi_num, vsi->vsw->sw_id);
6957	return -EIO;
6958	}
6959
6960	return `0`;
6961	}
6962
6963	/**
6964	* ice_down_up - shutdown the VSI connection and bring it up
6965	* @vsi: the VSI to be reconnected
6966	*/
6967	int ice_down_up(struct ice_vsi *vsi)
6968	{
6969	int ret;
6970
6971	/ if DOWN already set, nothing to do /
6972	if (test_and_set_bit(nr: ICE_VSI_DOWN, addr: vsi->state))
6973	return `0`;
6974
6975	ret = ice_down(vsi);
6976	if (ret)
6977	return ret;
6978
6979	ret = ice_up(vsi);
6980	if (ret) {
6981	netdev_err(dev: vsi->netdev, format: "reallocating resources failed during netdev features change, may need to reload driver\n");
6982	return ret;
6983	}
6984
6985	return `0`;
6986	}
6987
6988	/**
6989	* ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
6990	* @vsi: VSI having resources allocated
6991	*
6992	* Return 0 on success, negative on failure
6993	*/
6994	int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
6995	{
6996	int i, err = `0`;
6997
6998	if (!vsi->num_txq) {
6999	dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
7000	vsi->vsi_num);
7001	return -EINVAL;
7002	}
7003
7004	ice_for_each_txq(vsi, i) {
7005	struct ice_tx_ring *ring = vsi->tx_rings[i];
7006
7007	if (!ring)
7008	return -EINVAL;
7009
7010	if (vsi->netdev)
7011	ring->netdev = vsi->netdev;
7012	err = ice_setup_tx_ring(tx_ring: ring);
7013	if (err)
7014	break;
7015	}
7016
7017	return err;
7018	}
7019
7020	/**
7021	* ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
7022	* @vsi: VSI having resources allocated
7023	*
7024	* Return 0 on success, negative on failure
7025	*/
7026	int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
7027	{
7028	int i, err = `0`;
7029
7030	if (!vsi->num_rxq) {
7031	dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
7032	vsi->vsi_num);
7033	return -EINVAL;
7034	}
7035
7036	ice_for_each_rxq(vsi, i) {
7037	struct ice_rx_ring *ring = vsi->rx_rings[i];
7038
7039	if (!ring)
7040	return -EINVAL;
7041
7042	if (vsi->netdev)
7043	ring->netdev = vsi->netdev;
7044	err = ice_setup_rx_ring(rx_ring: ring);
7045	if (err)
7046	break;
7047	}
7048
7049	return err;
7050	}
7051
7052	/**
7053	* ice_vsi_open_ctrl - open control VSI for use
7054	* @vsi: the VSI to open
7055	*
7056	* Initialization of the Control VSI
7057	*
7058	* Returns 0 on success, negative value on error
7059	*/
7060	int ice_vsi_open_ctrl(struct ice_vsi *vsi)
7061	{
7062	char int_name[ICE_INT_NAME_STR_LEN];
7063	struct ice_pf *pf = vsi->back;
7064	struct device *dev;
7065	int err;
7066
7067	dev = ice_pf_to_dev(pf);
7068	/ allocate descriptors /
7069	err = ice_vsi_setup_tx_rings(vsi);
7070	if (err)
7071	goto err_setup_tx;
7072
7073	err = ice_vsi_setup_rx_rings(vsi);
7074	if (err)
7075	goto err_setup_rx;
7076
7077	err = ice_vsi_cfg_lan(vsi);
7078	if (err)
7079	goto err_setup_rx;
7080
7081	snprintf(buf: int_name, size: sizeof(int_name) - `1`, fmt: "%s-%s:ctrl",
7082	dev_driver_string(dev), dev_name(dev));
7083	err = ice_vsi_req_irq_msix(vsi, basename: int_name);
7084	if (err)
7085	goto err_setup_rx;
7086
7087	ice_vsi_cfg_msix(vsi);
7088
7089	err = ice_vsi_start_all_rx_rings(vsi);
7090	if (err)
7091	goto err_up_complete;
7092
7093	clear_bit(nr: ICE_VSI_DOWN, addr: vsi->state);
7094	ice_vsi_ena_irq(vsi);
7095
7096	return `0`;
7097
7098	err_up_complete:
7099	ice_down(vsi);
7100	err_setup_rx:
7101	ice_vsi_free_rx_rings(vsi);
7102	err_setup_tx:
7103	ice_vsi_free_tx_rings(vsi);
7104
7105	return err;
7106	}
7107
7108	/**
7109	* ice_vsi_open - Called when a network interface is made active
7110	* @vsi: the VSI to open
7111	*
7112	* Initialization of the VSI
7113	*
7114	* Returns 0 on success, negative value on error
7115	*/
7116	int ice_vsi_open(struct ice_vsi *vsi)
7117	{
7118	char int_name[ICE_INT_NAME_STR_LEN];
7119	struct ice_pf *pf = vsi->back;
7120	int err;
7121
7122	/ allocate descriptors /
7123	err = ice_vsi_setup_tx_rings(vsi);
7124	if (err)
7125	goto err_setup_tx;
7126
7127	err = ice_vsi_setup_rx_rings(vsi);
7128	if (err)
7129	goto err_setup_rx;
7130
7131	err = ice_vsi_cfg_lan(vsi);
7132	if (err)
7133	goto err_setup_rx;
7134
7135	snprintf(buf: int_name, size: sizeof(int_name) - `1`, fmt: "%s-%s",
7136	dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
7137	err = ice_vsi_req_irq_msix(vsi, basename: int_name);
7138	if (err)
7139	goto err_setup_rx;
7140
7141	ice_vsi_cfg_netdev_tc(vsi, ena_tc: vsi->tc_cfg.ena_tc);
7142
7143	if (vsi->type == ICE_VSI_PF) {
7144	/ Notify the stack of the actual queue counts. /
7145	err = netif_set_real_num_tx_queues(dev: vsi->netdev, txq: vsi->num_txq);
7146	if (err)
7147	goto err_set_qs;
7148
7149	err = netif_set_real_num_rx_queues(dev: vsi->netdev, rxq: vsi->num_rxq);
7150	if (err)
7151	goto err_set_qs;
7152	}
7153
7154	err = ice_up_complete(vsi);
7155	if (err)
7156	goto err_up_complete;
7157
7158	return `0`;
7159
7160	err_up_complete:
7161	ice_down(vsi);
7162	err_set_qs:
7163	ice_vsi_free_irq(vsi);
7164	err_setup_rx:
7165	ice_vsi_free_rx_rings(vsi);
7166	err_setup_tx:
7167	ice_vsi_free_tx_rings(vsi);
7168
7169	return err;
7170	}
7171
7172	/**
7173	* ice_vsi_release_all - Delete all VSIs
7174	* @pf: PF from which all VSIs are being removed
7175	*/
7176	static void ice_vsi_release_all(struct ice_pf *pf)
7177	{
7178	int err, i;
7179
7180	if (!pf->vsi)
7181	return;
7182
7183	ice_for_each_vsi(pf, i) {
7184	if (!pf->vsi[i])
7185	continue;
7186
7187	if (pf->vsi[i]->type == ICE_VSI_CHNL)
7188	continue;
7189
7190	err = ice_vsi_release(vsi: pf->vsi[i]);
7191	if (err)
7192	dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
7193	i, err, pf->vsi[i]->vsi_num);
7194	}
7195	}
7196
7197	/**
7198	* ice_vsi_rebuild_by_type - Rebuild VSI of a given type
7199	* @pf: pointer to the PF instance
7200	* @type: VSI type to rebuild
7201	*
7202	* Iterates through the pf->vsi array and rebuilds VSIs of the requested type
7203	*/
7204	static int ice_vsi_rebuild_by_type(struct ice_pf pf, enum* ice_vsi_type type)
7205	{
7206	struct device *dev = ice_pf_to_dev(pf);
7207	int i, err;
7208
7209	ice_for_each_vsi(pf, i) {
7210	struct ice_vsi *vsi = pf->vsi[i];
7211
7212	if (!vsi \|\| vsi->type != type)
7213	continue;
7214
7215	/ rebuild the VSI /
7216	err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
7217	if (err) {
7218	dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
7219	err, vsi->idx, ice_vsi_type_str(type));
7220	return err;
7221	}
7222
7223	/ replay filters for the VSI /
7224	err = ice_replay_vsi(hw: &pf->hw, vsi_handle: vsi->idx);
7225	if (err) {
7226	dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
7227	err, vsi->idx, ice_vsi_type_str(type));
7228	return err;
7229	}
7230
7231	/ Re-map HW VSI number, using VSI handle that has been*
7232	* previously validated in ice_replay_vsi() call above
7233	*/
7234	vsi->vsi_num = ice_get_hw_vsi_num(hw: &pf->hw, vsi_handle: vsi->idx);
7235
7236	/ enable the VSI /
7237	err = ice_ena_vsi(vsi, locked: false);
7238	if (err) {
7239	dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
7240	err, vsi->idx, ice_vsi_type_str(type));
7241	return err;
7242	}
7243
7244	dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
7245	ice_vsi_type_str(type));
7246	}
7247
7248	return `0`;
7249	}
7250
7251	/**
7252	* ice_update_pf_netdev_link - Update PF netdev link status
7253	* @pf: pointer to the PF instance
7254	*/
7255	static void ice_update_pf_netdev_link(struct ice_pf *pf)
7256	{
7257	bool link_up;
7258	int i;
7259
7260	ice_for_each_vsi(pf, i) {
7261	struct ice_vsi *vsi = pf->vsi[i];
7262
7263	if (!vsi \|\| vsi->type != ICE_VSI_PF)
7264	return;
7265
7266	ice_get_link_status(pi: pf->vsi[i]->port_info, link_up: &link_up);
7267	if (link_up) {
7268	netif_carrier_on(dev: pf->vsi[i]->netdev);
7269	netif_tx_wake_all_queues(dev: pf->vsi[i]->netdev);
7270	} else {
7271	netif_carrier_off(dev: pf->vsi[i]->netdev);
7272	netif_tx_stop_all_queues(dev: pf->vsi[i]->netdev);
7273	}
7274	}
7275	}
7276
7277	/**
7278	* ice_rebuild - rebuild after reset
7279	* @pf: PF to rebuild
7280	* @reset_type: type of reset
7281	*
7282	* Do not rebuild VF VSI in this flow because that is already handled via
7283	* ice_reset_all_vfs(). This is because requirements for resetting a VF after a
7284	* PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
7285	* to reset/rebuild all the VF VSI twice.
7286	*/
7287	static void ice_rebuild(struct ice_pf pf, enum* ice_reset_req reset_type)
7288	{
7289	struct device *dev = ice_pf_to_dev(pf);
7290	struct ice_hw *hw = &pf->hw;
7291	bool dvm;
7292	int err;
7293
7294	if (test_bit(ICE_DOWN, pf->state))
7295	goto clear_recovery;
7296
7297	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
7298
7299	#define ICE_EMP_RESET_SLEEP_MS 5000
7300	if (reset_type == ICE_RESET_EMPR) {
7301	/ If an EMP reset has occurred, any previously pending flash*
7302	* update will have completed. We no longer know whether or
7303	* not the NVM update EMP reset is restricted.
7304	*/
7305	pf->fw_emp_reset_disabled = false;
7306
7307	msleep(ICE_EMP_RESET_SLEEP_MS);
7308	}
7309
7310	err = ice_init_all_ctrlq(hw);
7311	if (err) {
7312	dev_err(dev, "control queues init failed %d\n", err);
7313	goto err_init_ctrlq;
7314	}
7315
7316	/ if DDP was previously loaded successfully /
7317	if (!ice_is_safe_mode(pf)) {
7318	/ reload the SW DB of filter tables /
7319	if (reset_type == ICE_RESET_PFR)
7320	ice_fill_blk_tbls(hw);
7321	else
7322	/ Reload DDP Package after CORER/GLOBR reset /
7323	ice_load_pkg(NULL, pf);
7324	}
7325
7326	err = ice_clear_pf_cfg(hw);
7327	if (err) {
7328	dev_err(dev, "clear PF configuration failed %d\n", err);
7329	goto err_init_ctrlq;
7330	}
7331
7332	ice_clear_pxe_mode(hw);
7333
7334	err = ice_init_nvm(hw);
7335	if (err) {
7336	dev_err(dev, "ice_init_nvm failed %d\n", err);
7337	goto err_init_ctrlq;
7338	}
7339
7340	err = ice_get_caps(hw);
7341	if (err) {
7342	dev_err(dev, "ice_get_caps failed %d\n", err);
7343	goto err_init_ctrlq;
7344	}
7345
7346	err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
7347	if (err) {
7348	dev_err(dev, "set_mac_cfg failed %d\n", err);
7349	goto err_init_ctrlq;
7350	}
7351
7352	dvm = ice_is_dvm_ena(hw);
7353
7354	err = ice_aq_set_port_params(pi: pf->hw.port_info, double_vlan: dvm, NULL);
7355	if (err)
7356	goto err_init_ctrlq;
7357
7358	err = ice_sched_init_port(pi: hw->port_info);
7359	if (err)
7360	goto err_sched_init_port;
7361
7362	/ start misc vector /
7363	err = ice_req_irq_msix_misc(pf);
7364	if (err) {
7365	dev_err(dev, "misc vector setup failed: %d\n", err);
7366	goto err_sched_init_port;
7367	}
7368
7369	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7370	wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
7371	if (!rd32(hw, PFQF_FD_SIZE)) {
7372	u16 unused, guar, b_effort;
7373
7374	guar = hw->func_caps.fd_fltr_guar;
7375	b_effort = hw->func_caps.fd_fltr_best_effort;
7376
7377	/ force guaranteed filter pool for PF /
7378	ice_alloc_fd_guar_item(hw, cntr_id: &unused, num_fltr: guar);
7379	/ force shared filter pool for PF /
7380	ice_alloc_fd_shrd_item(hw, cntr_id: &unused, num_fltr: b_effort);
7381	}
7382	}
7383
7384	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
7385	ice_dcb_rebuild(pf);
7386
7387	/ If the PF previously had enabled PTP, PTP init needs to happen before*
7388	* the VSI rebuild. If not, this causes the PTP link status events to
7389	* fail.
7390	*/
7391	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
7392	ice_ptp_reset(pf);
7393
7394	if (ice_is_feature_supported(pf, f: ICE_F_GNSS))
7395	ice_gnss_init(pf);
7396
7397	/ rebuild PF VSI /
7398	err = ice_vsi_rebuild_by_type(pf, type: ICE_VSI_PF);
7399	if (err) {
7400	dev_err(dev, "PF VSI rebuild failed: %d\n", err);
7401	goto err_vsi_rebuild;
7402	}
7403
7404	/ configure PTP timestamping after VSI rebuild /
7405	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) {
7406	if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF)
7407	ice_ptp_cfg_timestamp(pf, ena: false);
7408	else if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL)
7409	/ for E82x PHC owner always need to have interrupts /
7410	ice_ptp_cfg_timestamp(pf, ena: true);
7411	}
7412
7413	err = ice_vsi_rebuild_by_type(pf, type: ICE_VSI_SWITCHDEV_CTRL);
7414	if (err) {
7415	dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
7416	goto err_vsi_rebuild;
7417	}
7418
7419	if (reset_type == ICE_RESET_PFR) {
7420	err = ice_rebuild_channels(pf);
7421	if (err) {
7422	dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
7423	err);
7424	goto err_vsi_rebuild;
7425	}
7426	}
7427
7428	/ If Flow Director is active /
7429	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7430	err = ice_vsi_rebuild_by_type(pf, type: ICE_VSI_CTRL);
7431	if (err) {
7432	dev_err(dev, "control VSI rebuild failed: %d\n", err);
7433	goto err_vsi_rebuild;
7434	}
7435
7436	/ replay HW Flow Director recipes /
7437	if (hw->fdir_prof)
7438	ice_fdir_replay_flows(hw);
7439
7440	/ replay Flow Director filters /
7441	ice_fdir_replay_fltrs(pf);
7442
7443	ice_rebuild_arfs(pf);
7444	}
7445
7446	ice_update_pf_netdev_link(pf);
7447
7448	/ tell the firmware we are up /
7449	err = ice_send_version(pf);
7450	if (err) {
7451	dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
7452	err);
7453	goto err_vsi_rebuild;
7454	}
7455
7456	ice_replay_post(hw);
7457
7458	/ if we get here, reset flow is successful /
7459	clear_bit(nr: ICE_RESET_FAILED, addr: pf->state);
7460
7461	ice_plug_aux_dev(pf);
7462	if (ice_is_feature_supported(pf, f: ICE_F_SRIOV_LAG))
7463	ice_lag_rebuild(pf);
7464	return;
7465
7466	err_vsi_rebuild:
7467	err_sched_init_port:
7468	ice_sched_cleanup_all(hw);
7469	err_init_ctrlq:
7470	ice_shutdown_all_ctrlq(hw);
7471	set_bit(nr: ICE_RESET_FAILED, addr: pf->state);
7472	clear_recovery:
7473	/ set this bit in PF state to control service task scheduling /
7474	set_bit(nr: ICE_NEEDS_RESTART, addr: pf->state);
7475	dev_err(dev, "Rebuild failed, unload and reload driver\n");
7476	}
7477
7478	/**
7479	* ice_change_mtu - NDO callback to change the MTU
7480	* @netdev: network interface device structure
7481	* @new_mtu: new value for maximum frame size
7482	*
7483	* Returns 0 on success, negative on failure
7484	*/
7485	static int ice_change_mtu(struct net_device netdev, int* new_mtu)
7486	{
7487	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
7488	struct ice_vsi *vsi = np->vsi;
7489	struct ice_pf *pf = vsi->back;
7490	struct bpf_prog *prog;
7491	u8 count = `0`;
7492	int err = `0`;
7493
7494	if (new_mtu == (int)netdev->mtu) {
7495	netdev_warn(dev: netdev, format: "MTU is already %u\n", netdev->mtu);
7496	return `0`;
7497	}
7498
7499	prog = vsi->xdp_prog;
7500	if (prog && !prog->aux->xdp_has_frags) {
7501	int frame_size = ice_max_xdp_frame_size(vsi);
7502
7503	if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
7504	netdev_err(dev: netdev, format: "max MTU for XDP usage is %d\n",
7505	frame_size - ICE_ETH_PKT_HDR_PAD);
7506	return -EINVAL;
7507	}
7508	} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
7509	if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
7510	netdev_err(dev: netdev, format: "Too big MTU for legacy-rx; Max is %d\n",
7511	ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
7512	return -EINVAL;
7513	}
7514	}
7515
7516	/ if a reset is in progress, wait for some time for it to complete /
7517	do {
7518	if (ice_is_reset_in_progress(state: pf->state)) {
7519	count++;
7520	usleep_range(min: `1000`, max: `2000`);
7521	} else {
7522	break;
7523	}
7524
7525	} while (count < `100`);
7526
7527	if (count == `100`) {
7528	netdev_err(dev: netdev, format: "can't change MTU. Device is busy\n");
7529	return -EBUSY;
7530	}
7531
7532	netdev->mtu = (unsigned int)new_mtu;
7533	err = ice_down_up(vsi);
7534	if (err)
7535	return err;
7536
7537	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
7538	set_bit(nr: ICE_FLAG_MTU_CHANGED, addr: pf->flags);
7539
7540	return err;
7541	}
7542
7543	/**
7544	* ice_eth_ioctl - Access the hwtstamp interface
7545	* @netdev: network interface device structure
7546	* @ifr: interface request data
7547	* @cmd: ioctl command
7548	*/
7549	static int ice_eth_ioctl(struct net_device netdev, struct* ifreq ifr, int* cmd)
7550	{
7551	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
7552	struct ice_pf *pf = np->vsi->back;
7553
7554	switch (cmd) {
7555	case SIOCGHWTSTAMP:
7556	return ice_ptp_get_ts_config(pf, ifr);
7557	case SIOCSHWTSTAMP:
7558	return ice_ptp_set_ts_config(pf, ifr);
7559	default:
7560	return -EOPNOTSUPP;
7561	}
7562	}
7563
7564	/**
7565	* ice_aq_str - convert AQ err code to a string
7566	* @aq_err: the AQ error code to convert
7567	*/
7568	const char ice_aq_str(enum* ice_aq_err aq_err)
7569	{
7570	switch (aq_err) {
7571	case ICE_AQ_RC_OK:
7572	return "OK";
7573	case ICE_AQ_RC_EPERM:
7574	return "ICE_AQ_RC_EPERM";
7575	case ICE_AQ_RC_ENOENT:
7576	return "ICE_AQ_RC_ENOENT";
7577	case ICE_AQ_RC_ENOMEM:
7578	return "ICE_AQ_RC_ENOMEM";
7579	case ICE_AQ_RC_EBUSY:
7580	return "ICE_AQ_RC_EBUSY";
7581	case ICE_AQ_RC_EEXIST:
7582	return "ICE_AQ_RC_EEXIST";
7583	case ICE_AQ_RC_EINVAL:
7584	return "ICE_AQ_RC_EINVAL";
7585	case ICE_AQ_RC_ENOSPC:
7586	return "ICE_AQ_RC_ENOSPC";
7587	case ICE_AQ_RC_ENOSYS:
7588	return "ICE_AQ_RC_ENOSYS";
7589	case ICE_AQ_RC_EMODE:
7590	return "ICE_AQ_RC_EMODE";
7591	case ICE_AQ_RC_ENOSEC:
7592	return "ICE_AQ_RC_ENOSEC";
7593	case ICE_AQ_RC_EBADSIG:
7594	return "ICE_AQ_RC_EBADSIG";
7595	case ICE_AQ_RC_ESVN:
7596	return "ICE_AQ_RC_ESVN";
7597	case ICE_AQ_RC_EBADMAN:
7598	return "ICE_AQ_RC_EBADMAN";
7599	case ICE_AQ_RC_EBADBUF:
7600	return "ICE_AQ_RC_EBADBUF";
7601	}
7602
7603	return "ICE_AQ_RC_UNKNOWN";
7604	}
7605
7606	/**
7607	* ice_set_rss_lut - Set RSS LUT
7608	* @vsi: Pointer to VSI structure
7609	* @lut: Lookup table
7610	* @lut_size: Lookup table size
7611	*
7612	* Returns 0 on success, negative on failure
7613	*/
7614	int ice_set_rss_lut(struct ice_vsi vsi, u8 lut, u16 lut_size)
7615	{
7616	struct ice_aq_get_set_rss_lut_params params = {};
7617	struct ice_hw *hw = &vsi->back->hw;
7618	int status;
7619
7620	if (!lut)
7621	return -EINVAL;
7622
7623	params.vsi_handle = vsi->idx;
7624	params.lut_size = lut_size;
7625	params.lut_type = vsi->rss_lut_type;
7626	params.lut = lut;
7627
7628	status = ice_aq_set_rss_lut(hw, set_params: &params);
7629	if (status)
7630	dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
7631	status, ice_aq_str(hw->adminq.sq_last_status));
7632
7633	return status;
7634	}
7635
7636	/**
7637	* ice_set_rss_key - Set RSS key
7638	* @vsi: Pointer to the VSI structure
7639	* @seed: RSS hash seed
7640	*
7641	* Returns 0 on success, negative on failure
7642	*/
7643	int ice_set_rss_key(struct ice_vsi vsi, u8 seed)
7644	{
7645	struct ice_hw *hw = &vsi->back->hw;
7646	int status;
7647
7648	if (!seed)
7649	return -EINVAL;
7650
7651	status = ice_aq_set_rss_key(hw, vsi_handle: vsi->idx, keys: (struct ice_aqc_get_set_rss_keys *)seed);
7652	if (status)
7653	dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
7654	status, ice_aq_str(hw->adminq.sq_last_status));
7655
7656	return status;
7657	}
7658
7659	/**
7660	* ice_get_rss_lut - Get RSS LUT
7661	* @vsi: Pointer to VSI structure
7662	* @lut: Buffer to store the lookup table entries
7663	* @lut_size: Size of buffer to store the lookup table entries
7664	*
7665	* Returns 0 on success, negative on failure
7666	*/
7667	int ice_get_rss_lut(struct ice_vsi vsi, u8 lut, u16 lut_size)
7668	{
7669	struct ice_aq_get_set_rss_lut_params params = {};
7670	struct ice_hw *hw = &vsi->back->hw;
7671	int status;
7672
7673	if (!lut)
7674	return -EINVAL;
7675
7676	params.vsi_handle = vsi->idx;
7677	params.lut_size = lut_size;
7678	params.lut_type = vsi->rss_lut_type;
7679	params.lut = lut;
7680
7681	status = ice_aq_get_rss_lut(hw, get_params: &params);
7682	if (status)
7683	dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
7684	status, ice_aq_str(hw->adminq.sq_last_status));
7685
7686	return status;
7687	}
7688
7689	/**
7690	* ice_get_rss_key - Get RSS key
7691	* @vsi: Pointer to VSI structure
7692	* @seed: Buffer to store the key in
7693	*
7694	* Returns 0 on success, negative on failure
7695	*/
7696	int ice_get_rss_key(struct ice_vsi vsi, u8 seed)
7697	{
7698	struct ice_hw *hw = &vsi->back->hw;
7699	int status;
7700
7701	if (!seed)
7702	return -EINVAL;
7703
7704	status = ice_aq_get_rss_key(hw, vsi_handle: vsi->idx, keys: (struct ice_aqc_get_set_rss_keys *)seed);
7705	if (status)
7706	dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
7707	status, ice_aq_str(hw->adminq.sq_last_status));
7708
7709	return status;
7710	}
7711
7712	/**
7713	* ice_bridge_getlink - Get the hardware bridge mode
7714	* @skb: skb buff
7715	* @pid: process ID
7716	* @seq: RTNL message seq
7717	* @dev: the netdev being configured
7718	* @filter_mask: filter mask passed in
7719	* @nlflags: netlink flags passed in
7720	*
7721	* Return the bridge mode (VEB/VEPA)
7722	*/
7723	static int
7724	ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
7725	struct net_device dev, u32 filter_mask, int* nlflags)
7726	{
7727	struct ice_netdev_priv *np = netdev_priv(dev);
7728	struct ice_vsi *vsi = np->vsi;
7729	struct ice_pf *pf = vsi->back;
7730	u16 bmode;
7731
7732	bmode = pf->first_sw->bridge_mode;
7733
7734	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode: bmode, flags: `0`, mask: `0`, nlflags,
7735	filter_mask, NULL);
7736	}
7737
7738	/**
7739	* ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
7740	* @vsi: Pointer to VSI structure
7741	* @bmode: Hardware bridge mode (VEB/VEPA)
7742	*
7743	* Returns 0 on success, negative on failure
7744	*/
7745	static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
7746	{
7747	struct ice_aqc_vsi_props *vsi_props;
7748	struct ice_hw *hw = &vsi->back->hw;
7749	struct ice_vsi_ctx *ctxt;
7750	int ret;
7751
7752	vsi_props = &vsi->info;
7753
7754	ctxt = kzalloc(size: sizeof(*ctxt), GFP_KERNEL);
7755	if (!ctxt)
7756	return -ENOMEM;
7757
7758	ctxt->info = vsi->info;
7759
7760	if (bmode == BRIDGE_MODE_VEB)
7761	/ change from VEPA to VEB mode /
7762	ctxt->info.sw_flags \|= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7763	else
7764	/ change from VEB to VEPA mode /
7765	ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7766	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
7767
7768	ret = ice_update_vsi(hw, vsi_handle: vsi->idx, vsi_ctx: ctxt, NULL);
7769	if (ret) {
7770	dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
7771	bmode, ret, ice_aq_str(hw->adminq.sq_last_status));
7772	goto out;
7773	}
7774	/ Update sw flags for book keeping /
7775	vsi_props->sw_flags = ctxt->info.sw_flags;
7776
7777	out:
7778	kfree(objp: ctxt);
7779	return ret;
7780	}
7781
7782	/**
7783	* ice_bridge_setlink - Set the hardware bridge mode
7784	* @dev: the netdev being configured
7785	* @nlh: RTNL message
7786	* @flags: bridge setlink flags
7787	* @extack: netlink extended ack
7788	*
7789	* Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
7790	* hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
7791	* not already set for all VSIs connected to this switch. And also update the
7792	* unicast switch filter rules for the corresponding switch of the netdev.
7793	*/
7794	static int
7795	ice_bridge_setlink(struct net_device dev, struct* nlmsghdr *nlh,
7796	u16 __always_unused flags,
7797	struct netlink_ext_ack __always_unused *extack)
7798	{
7799	struct ice_netdev_priv *np = netdev_priv(dev);
7800	struct ice_pf *pf = np->vsi->back;
7801	struct nlattr attr, br_spec;
7802	struct ice_hw *hw = &pf->hw;
7803	struct ice_sw *pf_sw;
7804	int rem, v, err = `0`;
7805
7806	pf_sw = pf->first_sw;
7807	/ find the attribute in the netlink message /
7808	br_spec = nlmsg_find_attr(nlh, hdrlen: sizeof(struct ifinfomsg), attrtype: IFLA_AF_SPEC);
7809
7810	nla_for_each_nested(attr, br_spec, rem) {
7811	__u16 mode;
7812
7813	if (nla_type(nla: attr) != IFLA_BRIDGE_MODE)
7814	continue;
7815	mode = nla_get_u16(nla: attr);
7816	if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
7817	return -EINVAL;
7818	/ Continue if bridge mode is not being flipped /
7819	if (mode == pf_sw->bridge_mode)
7820	continue;
7821	/ Iterates through the PF VSI list and update the loopback*
7822	* mode of the VSI
7823	*/
7824	ice_for_each_vsi(pf, v) {
7825	if (!pf->vsi[v])
7826	continue;
7827	err = ice_vsi_update_bridge_mode(vsi: pf->vsi[v], bmode: mode);
7828	if (err)
7829	return err;
7830	}
7831
7832	hw->evb_veb = (mode == BRIDGE_MODE_VEB);
7833	/ Update the unicast switch filter rules for the corresponding*
7834	* switch of the netdev
7835	*/
7836	err = ice_update_sw_rule_bridge_mode(hw);
7837	if (err) {
7838	netdev_err(dev, format: "switch rule update failed, mode = %d err %d aq_err %s\n",
7839	mode, err,
7840	ice_aq_str(aq_err: hw->adminq.sq_last_status));
7841	/ revert hw->evb_veb /
7842	hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
7843	return err;
7844	}
7845
7846	pf_sw->bridge_mode = mode;
7847	}
7848
7849	return `0`;
7850	}
7851
7852	/**
7853	* ice_tx_timeout - Respond to a Tx Hang
7854	* @netdev: network interface device structure
7855	* @txqueue: Tx queue
7856	*/
7857	static void ice_tx_timeout(struct net_device netdev, unsigned* int txqueue)
7858	{
7859	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
7860	struct ice_tx_ring *tx_ring = NULL;
7861	struct ice_vsi *vsi = np->vsi;
7862	struct ice_pf *pf = vsi->back;
7863	u32 i;
7864
7865	pf->tx_timeout_count++;
7866
7867	/ Check if PFC is enabled for the TC to which the queue belongs*
7868	* to. If yes then Tx timeout is not caused by a hung queue, no
7869	* need to reset and rebuild
7870	*/
7871	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
7872	dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
7873	txqueue);
7874	return;
7875	}
7876
7877	/ now that we have an index, find the tx_ring struct /
7878	ice_for_each_txq(vsi, i)
7879	if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
7880	if (txqueue == vsi->tx_rings[i]->q_index) {
7881	tx_ring = vsi->tx_rings[i];
7882	break;
7883	}
7884
7885	/ Reset recovery level if enough time has elapsed after last timeout.*
7886	* Also ensure no new reset action happens before next timeout period.
7887	*/
7888	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * `20`)))
7889	pf->tx_timeout_recovery_level = `1`;
7890	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
7891	netdev->watchdog_timeo)))
7892	return;
7893
7894	if (tx_ring) {
7895	struct ice_hw *hw = &pf->hw;
7896	u32 head, val = `0`;
7897
7898	head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
7899	QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
7900	/ Read interrupt register /
7901	val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
7902
7903	netdev_info(dev: netdev, format: "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
7904	vsi->vsi_num, txqueue, tx_ring->next_to_clean,
7905	head, tx_ring->next_to_use, val);
7906	}
7907
7908	pf->tx_timeout_last_recovery = jiffies;
7909	netdev_info(dev: netdev, format: "tx_timeout recovery level %d, txqueue %u\n",
7910	pf->tx_timeout_recovery_level, txqueue);
7911
7912	switch (pf->tx_timeout_recovery_level) {
7913	case `1`:
7914	set_bit(nr: ICE_PFR_REQ, addr: pf->state);
7915	break;
7916	case `2`:
7917	set_bit(nr: ICE_CORER_REQ, addr: pf->state);
7918	break;
7919	case `3`:
7920	set_bit(nr: ICE_GLOBR_REQ, addr: pf->state);
7921	break;
7922	default:
7923	netdev_err(dev: netdev, format: "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
7924	set_bit(nr: ICE_DOWN, addr: pf->state);
7925	set_bit(nr: ICE_VSI_NEEDS_RESTART, addr: vsi->state);
7926	set_bit(nr: ICE_SERVICE_DIS, addr: pf->state);
7927	break;
7928	}
7929
7930	ice_service_task_schedule(pf);
7931	pf->tx_timeout_recovery_level++;
7932	}
7933
7934	/**
7935	* ice_setup_tc_cls_flower - flower classifier offloads
7936	* @np: net device to configure
7937	* @filter_dev: device on which filter is added
7938	* @cls_flower: offload data
7939	*/
7940	static int
7941	ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
7942	struct net_device *filter_dev,
7943	struct flow_cls_offload *cls_flower)
7944	{
7945	struct ice_vsi *vsi = np->vsi;
7946
7947	if (cls_flower->common.chain_index)
7948	return -EOPNOTSUPP;
7949
7950	switch (cls_flower->command) {
7951	case FLOW_CLS_REPLACE:
7952	return ice_add_cls_flower(netdev: filter_dev, vsi, cls_flower);
7953	case FLOW_CLS_DESTROY:
7954	return ice_del_cls_flower(vsi, cls_flower);
7955	default:
7956	return -EINVAL;
7957	}
7958	}
7959
7960	/**
7961	* ice_setup_tc_block_cb - callback handler registered for TC block
7962	* @type: TC SETUP type
7963	* @type_data: TC flower offload data that contains user input
7964	* @cb_priv: netdev private data
7965	*/
7966	static int
7967	ice_setup_tc_block_cb(enum tc_setup_type type, void type_data, void* *cb_priv)
7968	{
7969	struct ice_netdev_priv *np = cb_priv;
7970
7971	switch (type) {
7972	case TC_SETUP_CLSFLOWER:
7973	return ice_setup_tc_cls_flower(np, filter_dev: np->vsi->netdev,
7974	cls_flower: type_data);
7975	default:
7976	return -EOPNOTSUPP;
7977	}
7978	}
7979
7980	/**
7981	* ice_validate_mqprio_qopt - Validate TCF input parameters
7982	* @vsi: Pointer to VSI
7983	* @mqprio_qopt: input parameters for mqprio queue configuration
7984	*
7985	* This function validates MQPRIO params, such as qcount (power of 2 wherever
7986	* needed), and make sure user doesn't specify qcount and BW rate limit
7987	* for TCs, which are more than "num_tc"
7988	*/
7989	static int
7990	ice_validate_mqprio_qopt(struct ice_vsi *vsi,
7991	struct tc_mqprio_qopt_offload *mqprio_qopt)
7992	{
7993	int non_power_of_2_qcount = `0`;
7994	struct ice_pf *pf = vsi->back;
7995	int max_rss_q_cnt = `0`;
7996	u64 sum_min_rate = `0`;
7997	struct device *dev;
7998	int i, speed;
7999	u8 num_tc;
8000
8001	if (vsi->type != ICE_VSI_PF)
8002	return -EINVAL;
8003
8004	if (mqprio_qopt->qopt.offset[`0`] != `0` \|\|
8005	mqprio_qopt->qopt.num_tc < `1` \|\|
8006	mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
8007	return -EINVAL;
8008
8009	dev = ice_pf_to_dev(pf);
8010	vsi->ch_rss_size = `0`;
8011	num_tc = mqprio_qopt->qopt.num_tc;
8012	speed = ice_get_link_speed_kbps(vsi);
8013
8014	for (i = `0`; num_tc; i++) {
8015	int qcount = mqprio_qopt->qopt.count[i];
8016	u64 max_rate, min_rate, rem;
8017
8018	if (!qcount)
8019	return -EINVAL;
8020
8021	if (is_power_of_2(n: qcount)) {
8022	if (non_power_of_2_qcount &&
8023	qcount > non_power_of_2_qcount) {
8024	dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
8025	qcount, non_power_of_2_qcount);
8026	return -EINVAL;
8027	}
8028	if (qcount > max_rss_q_cnt)
8029	max_rss_q_cnt = qcount;
8030	} else {
8031	if (non_power_of_2_qcount &&
8032	qcount != non_power_of_2_qcount) {
8033	dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
8034	qcount, non_power_of_2_qcount);
8035	return -EINVAL;
8036	}
8037	if (qcount < max_rss_q_cnt) {
8038	dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
8039	qcount, max_rss_q_cnt);
8040	return -EINVAL;
8041	}
8042	max_rss_q_cnt = qcount;
8043	non_power_of_2_qcount = qcount;
8044	}
8045
8046	/ TC command takes input in K/N/Gbps or K/M/Gbit etc but*
8047	* converts the bandwidth rate limit into Bytes/s when
8048	* passing it down to the driver. So convert input bandwidth
8049	* from Bytes/s to Kbps
8050	*/
8051	max_rate = mqprio_qopt->max_rate[i];
8052	max_rate = div_u64(dividend: max_rate, ICE_BW_KBPS_DIVISOR);
8053
8054	/ min_rate is minimum guaranteed rate and it can't be zero /
8055	min_rate = mqprio_qopt->min_rate[i];
8056	min_rate = div_u64(dividend: min_rate, ICE_BW_KBPS_DIVISOR);
8057	sum_min_rate += min_rate;
8058
8059	if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
8060	dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
8061	min_rate, ICE_MIN_BW_LIMIT);
8062	return -EINVAL;
8063	}
8064
8065	if (max_rate && max_rate > speed) {
8066	dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
8067	i, max_rate, speed);
8068	return -EINVAL;
8069	}
8070
8071	iter_div_u64_rem(dividend: min_rate, ICE_MIN_BW_LIMIT, remainder: &rem);
8072	if (rem) {
8073	dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
8074	i, ICE_MIN_BW_LIMIT);
8075	return -EINVAL;
8076	}
8077
8078	iter_div_u64_rem(dividend: max_rate, ICE_MIN_BW_LIMIT, remainder: &rem);
8079	if (rem) {
8080	dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
8081	i, ICE_MIN_BW_LIMIT);
8082	return -EINVAL;
8083	}
8084
8085	/ min_rate can't be more than max_rate, except when max_rate*
8086	* is zero (implies max_rate sought is max line rate). In such
8087	* a case min_rate can be more than max.
8088	*/
8089	if (max_rate && min_rate > max_rate) {
8090	dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
8091	min_rate, max_rate);
8092	return -EINVAL;
8093	}
8094
8095	if (i >= mqprio_qopt->qopt.num_tc - `1`)
8096	break;
8097	if (mqprio_qopt->qopt.offset[i + `1`] !=
8098	(mqprio_qopt->qopt.offset[i] + qcount))
8099	return -EINVAL;
8100	}
8101	if (vsi->num_rxq <
8102	(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8103	return -EINVAL;
8104	if (vsi->num_txq <
8105	(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8106	return -EINVAL;
8107
8108	if (sum_min_rate && sum_min_rate > (u64)speed) {
8109	dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
8110	sum_min_rate, speed);
8111	return -EINVAL;
8112	}
8113
8114	/ make sure vsi->ch_rss_size is set correctly based on TC's qcount /
8115	vsi->ch_rss_size = max_rss_q_cnt;
8116
8117	return `0`;
8118	}
8119
8120	/**
8121	* ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
8122	* @pf: ptr to PF device
8123	* @vsi: ptr to VSI
8124	*/
8125	static int ice_add_vsi_to_fdir(struct ice_pf pf, struct* ice_vsi *vsi)
8126	{
8127	struct device *dev = ice_pf_to_dev(pf);
8128	bool added = false;
8129	struct ice_hw *hw;
8130	int flow;
8131
8132	if (!(vsi->num_gfltr \|\| vsi->num_bfltr))
8133	return -EINVAL;
8134
8135	hw = &pf->hw;
8136	for (flow = `0`; flow < ICE_FLTR_PTYPE_MAX; flow++) {
8137	struct ice_fd_hw_prof *prof;
8138	int tun, status;
8139	u64 entry_h;
8140
8141	if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
8142	hw->fdir_prof[flow]->cnt))
8143	continue;
8144
8145	for (tun = `0`; tun < ICE_FD_HW_SEG_MAX; tun++) {
8146	enum ice_flow_priority prio;
8147	u64 prof_id;
8148
8149	/ add this VSI to FDir profile for this flow /
8150	prio = ICE_FLOW_PRIO_NORMAL;
8151	prof = hw->fdir_prof[flow];
8152	prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
8153	status = ice_flow_add_entry(hw, blk: ICE_BLK_FD, prof_id,
8154	entry_id: prof->vsi_h[`0`], vsi: vsi->idx,
8155	prio, data: prof->fdir_seg[tun],
8156	entry_h: &entry_h);
8157	if (status) {
8158	dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
8159	vsi->idx, flow);
8160	continue;
8161	}
8162
8163	prof->entry_h[prof->cnt][tun] = entry_h;
8164	}
8165
8166	/ store VSI for filter replay and delete /
8167	prof->vsi_h[prof->cnt] = vsi->idx;
8168	prof->cnt++;
8169
8170	added = true;
8171	dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
8172	flow);
8173	}
8174
8175	if (!added)
8176	dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
8177
8178	return `0`;
8179	}
8180
8181	/**
8182	* ice_add_channel - add a channel by adding VSI
8183	* @pf: ptr to PF device
8184	* @sw_id: underlying HW switching element ID
8185	* @ch: ptr to channel structure
8186	*
8187	* Add a channel (VSI) using add_vsi and queue_map
8188	*/
8189	static int ice_add_channel(struct ice_pf pf, u16 sw_id, struct* ice_channel *ch)
8190	{
8191	struct device *dev = ice_pf_to_dev(pf);
8192	struct ice_vsi *vsi;
8193
8194	if (ch->type != ICE_VSI_CHNL) {
8195	dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
8196	return -EINVAL;
8197	}
8198
8199	vsi = ice_chnl_vsi_setup(pf, pi: pf->hw.port_info, ch);
8200	if (!vsi \|\| vsi->type != ICE_VSI_CHNL) {
8201	dev_err(dev, "create chnl VSI failure\n");
8202	return -EINVAL;
8203	}
8204
8205	ice_add_vsi_to_fdir(pf, vsi);
8206
8207	ch->sw_id = sw_id;
8208	ch->vsi_num = vsi->vsi_num;
8209	ch->info.mapping_flags = vsi->info.mapping_flags;
8210	ch->ch_vsi = vsi;
8211	/ set the back pointer of channel for newly created VSI /
8212	vsi->ch = ch;
8213
8214	memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
8215	sizeof(vsi->info.q_mapping));
8216	memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
8217	sizeof(vsi->info.tc_mapping));
8218
8219	return `0`;
8220	}
8221
8222	/**
8223	* ice_chnl_cfg_res
8224	* @vsi: the VSI being setup
8225	* @ch: ptr to channel structure
8226	*
8227	* Configure channel specific resources such as rings, vector.
8228	*/
8229	static void ice_chnl_cfg_res(struct ice_vsi vsi, struct* ice_channel *ch)
8230	{
8231	int i;
8232
8233	for (i = `0`; i < ch->num_txq; i++) {
8234	struct ice_q_vector tx_q_vector, rx_q_vector;
8235	struct ice_ring_container *rc;
8236	struct ice_tx_ring *tx_ring;
8237	struct ice_rx_ring *rx_ring;
8238
8239	tx_ring = vsi->tx_rings[ch->base_q + i];
8240	rx_ring = vsi->rx_rings[ch->base_q + i];
8241	if (!tx_ring \|\| !rx_ring)
8242	continue;
8243
8244	/ setup ring being channel enabled /
8245	tx_ring->ch = ch;
8246	rx_ring->ch = ch;
8247
8248	/ following code block sets up vector specific attributes /
8249	tx_q_vector = tx_ring->q_vector;
8250	rx_q_vector = rx_ring->q_vector;
8251	if (!tx_q_vector && !rx_q_vector)
8252	continue;
8253
8254	if (tx_q_vector) {
8255	tx_q_vector->ch = ch;
8256	/ setup Tx and Rx ITR setting if DIM is off /
8257	rc = &tx_q_vector->tx;
8258	if (!ITR_IS_DYNAMIC(rc))
8259	ice_write_itr(rc, itr: rc->itr_setting);
8260	}
8261	if (rx_q_vector) {
8262	rx_q_vector->ch = ch;
8263	/ setup Tx and Rx ITR setting if DIM is off /
8264	rc = &rx_q_vector->rx;
8265	if (!ITR_IS_DYNAMIC(rc))
8266	ice_write_itr(rc, itr: rc->itr_setting);
8267	}
8268	}
8269
8270	/ it is safe to assume that, if channel has non-zero num_t[r]xq, then*
8271	* GLINT_ITR register would have written to perform in-context
8272	* update, hence perform flush
8273	*/
8274	if (ch->num_txq \|\| ch->num_rxq)
8275	ice_flush(&vsi->back->hw);
8276	}
8277
8278	/**
8279	* ice_cfg_chnl_all_res - configure channel resources
8280	* @vsi: pte to main_vsi
8281	* @ch: ptr to channel structure
8282	*
8283	* This function configures channel specific resources such as flow-director
8284	* counter index, and other resources such as queues, vectors, ITR settings
8285	*/
8286	static void
8287	ice_cfg_chnl_all_res(struct ice_vsi vsi, struct* ice_channel *ch)
8288	{
8289	/ configure channel (aka ADQ) resources such as queues, vectors,*
8290	* ITR settings for channel specific vectors and anything else
8291	*/
8292	ice_chnl_cfg_res(vsi, ch);
8293	}
8294
8295	/**
8296	* ice_setup_hw_channel - setup new channel
8297	* @pf: ptr to PF device
8298	* @vsi: the VSI being setup
8299	* @ch: ptr to channel structure
8300	* @sw_id: underlying HW switching element ID
8301	* @type: type of channel to be created (VMDq2/VF)
8302	*
8303	* Setup new channel (VSI) based on specified type (VMDq2/VF)
8304	* and configures Tx rings accordingly
8305	*/
8306	static int
8307	ice_setup_hw_channel(struct ice_pf pf, struct* ice_vsi *vsi,
8308	struct ice_channel *ch, u16 sw_id, u8 type)
8309	{
8310	struct device *dev = ice_pf_to_dev(pf);
8311	int ret;
8312
8313	ch->base_q = vsi->next_base_q;
8314	ch->type = type;
8315
8316	ret = ice_add_channel(pf, sw_id, ch);
8317	if (ret) {
8318	dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
8319	return ret;
8320	}
8321
8322	/ configure/setup ADQ specific resources /
8323	ice_cfg_chnl_all_res(vsi, ch);
8324
8325	/ make sure to update the next_base_q so that subsequent channel's*
8326	* (aka ADQ) VSI queue map is correct
8327	*/
8328	vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
8329	dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
8330	ch->num_rxq);
8331
8332	return `0`;
8333	}
8334
8335	/**
8336	* ice_setup_channel - setup new channel using uplink element
8337	* @pf: ptr to PF device
8338	* @vsi: the VSI being setup
8339	* @ch: ptr to channel structure
8340	*
8341	* Setup new channel (VSI) based on specified type (VMDq2/VF)
8342	* and uplink switching element
8343	*/
8344	static bool
8345	ice_setup_channel(struct ice_pf pf, struct* ice_vsi *vsi,
8346	struct ice_channel *ch)
8347	{
8348	struct device *dev = ice_pf_to_dev(pf);
8349	u16 sw_id;
8350	int ret;
8351
8352	if (vsi->type != ICE_VSI_PF) {
8353	dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
8354	return false;
8355	}
8356
8357	sw_id = pf->first_sw->sw_id;
8358
8359	/ create channel (VSI) /
8360	ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, type: ICE_VSI_CHNL);
8361	if (ret) {
8362	dev_err(dev, "failed to setup hw_channel\n");
8363	return false;
8364	}
8365	dev_dbg(dev, "successfully created channel()\n");
8366
8367	return ch->ch_vsi ? true : false;
8368	}
8369
8370	/**
8371	* ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
8372	* @vsi: VSI to be configured
8373	* @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
8374	* @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
8375	*/
8376	static int
8377	ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
8378	{
8379	int err;
8380
8381	err = ice_set_min_bw_limit(vsi, min_tx_rate);
8382	if (err)
8383	return err;
8384
8385	return ice_set_max_bw_limit(vsi, max_tx_rate);
8386	}
8387
8388	/**
8389	* ice_create_q_channel - function to create channel
8390	* @vsi: VSI to be configured
8391	* @ch: ptr to channel (it contains channel specific params)
8392	*
8393	* This function creates channel (VSI) using num_queues specified by user,
8394	* reconfigs RSS if needed.
8395	*/
8396	static int ice_create_q_channel(struct ice_vsi vsi, struct* ice_channel *ch)
8397	{
8398	struct ice_pf *pf = vsi->back;
8399	struct device *dev;
8400
8401	if (!ch)
8402	return -EINVAL;
8403
8404	dev = ice_pf_to_dev(pf);
8405	if (!ch->num_txq \|\| !ch->num_rxq) {
8406	dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
8407	return -EINVAL;
8408	}
8409
8410	if (!vsi->cnt_q_avail \|\| vsi->cnt_q_avail < ch->num_txq) {
8411	dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
8412	vsi->cnt_q_avail, ch->num_txq);
8413	return -EINVAL;
8414	}
8415
8416	if (!ice_setup_channel(pf, vsi, ch)) {
8417	dev_info(dev, "Failed to setup channel\n");
8418	return -EINVAL;
8419	}
8420	/ configure BW rate limit /
8421	if (ch->ch_vsi && (ch->max_tx_rate \|\| ch->min_tx_rate)) {
8422	int ret;
8423
8424	ret = ice_set_bw_limit(vsi: ch->ch_vsi, max_tx_rate: ch->max_tx_rate,
8425	min_tx_rate: ch->min_tx_rate);
8426	if (ret)
8427	dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
8428	ch->max_tx_rate, ch->ch_vsi->vsi_num);
8429	else
8430	dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
8431	ch->max_tx_rate, ch->ch_vsi->vsi_num);
8432	}
8433
8434	vsi->cnt_q_avail -= ch->num_txq;
8435
8436	return `0`;
8437	}
8438
8439	/**
8440	* ice_rem_all_chnl_fltrs - removes all channel filters
8441	* @pf: ptr to PF, TC-flower based filter are tracked at PF level
8442	*
8443	* Remove all advanced switch filters only if they are channel specific
8444	* tc-flower based filter
8445	*/
8446	static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
8447	{
8448	struct ice_tc_flower_fltr *fltr;
8449	struct hlist_node *node;
8450
8451	/ to remove all channel filters, iterate an ordered list of filters /
8452	hlist_for_each_entry_safe(fltr, node,
8453	&pf->tc_flower_fltr_list,
8454	tc_flower_node) {
8455	struct ice_rule_query_data rule;
8456	int status;
8457
8458	/ for now process only channel specific filters /
8459	if (!ice_is_chnl_fltr(f: fltr))
8460	continue;
8461
8462	rule.rid = fltr->rid;
8463	rule.rule_id = fltr->rule_id;
8464	rule.vsi_handle = fltr->dest_vsi_handle;
8465	status = ice_rem_adv_rule_by_id(hw: &pf->hw, remove_entry: &rule);
8466	if (status) {
8467	if (status == -ENOENT)
8468	dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
8469	rule.rule_id);
8470	else
8471	dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
8472	status);
8473	} else if (fltr->dest_vsi) {
8474	/ update advanced switch filter count /
8475	if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
8476	u32 flags = fltr->flags;
8477
8478	fltr->dest_vsi->num_chnl_fltr--;
8479	if (flags & (ICE_TC_FLWR_FIELD_DST_MAC \|
8480	ICE_TC_FLWR_FIELD_ENC_DST_MAC))
8481	pf->num_dmac_chnl_fltrs--;
8482	}
8483	}
8484
8485	hlist_del(n: &fltr->tc_flower_node);
8486	kfree(objp: fltr);
8487	}
8488	}
8489
8490	/**
8491	* ice_remove_q_channels - Remove queue channels for the TCs
8492	* @vsi: VSI to be configured
8493	* @rem_fltr: delete advanced switch filter or not
8494	*
8495	* Remove queue channels for the TCs
8496	*/
8497	static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
8498	{
8499	struct ice_channel ch, ch_tmp;
8500	struct ice_pf *pf = vsi->back;
8501	int i;
8502
8503	/ remove all tc-flower based filter if they are channel filters only /
8504	if (rem_fltr)
8505	ice_rem_all_chnl_fltrs(pf);
8506
8507	/ remove ntuple filters since queue configuration is being changed /
8508	if (vsi->netdev->features & NETIF_F_NTUPLE) {
8509	struct ice_hw *hw = &pf->hw;
8510
8511	mutex_lock(&hw->fdir_fltr_lock);
8512	ice_fdir_del_all_fltrs(vsi);
8513	mutex_unlock(lock: &hw->fdir_fltr_lock);
8514	}
8515
8516	/ perform cleanup for channels if they exist /
8517	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
8518	struct ice_vsi *ch_vsi;
8519
8520	list_del(entry: &ch->list);
8521	ch_vsi = ch->ch_vsi;
8522	if (!ch_vsi) {
8523	kfree(objp: ch);
8524	continue;
8525	}
8526
8527	/ Reset queue contexts /
8528	for (i = `0`; i < ch->num_rxq; i++) {
8529	struct ice_tx_ring *tx_ring;
8530	struct ice_rx_ring *rx_ring;
8531
8532	tx_ring = vsi->tx_rings[ch->base_q + i];
8533	rx_ring = vsi->rx_rings[ch->base_q + i];
8534	if (tx_ring) {
8535	tx_ring->ch = NULL;
8536	if (tx_ring->q_vector)
8537	tx_ring->q_vector->ch = NULL;
8538	}
8539	if (rx_ring) {
8540	rx_ring->ch = NULL;
8541	if (rx_ring->q_vector)
8542	rx_ring->q_vector->ch = NULL;
8543	}
8544	}
8545
8546	/ Release FD resources for the channel VSI /
8547	ice_fdir_rem_adq_chnl(hw: &pf->hw, vsi_idx: ch->ch_vsi->idx);
8548
8549	/ clear the VSI from scheduler tree /
8550	ice_rm_vsi_lan_cfg(pi: ch->ch_vsi->port_info, vsi_handle: ch->ch_vsi->idx);
8551
8552	/ Delete VSI from FW, PF and HW VSI arrays /
8553	ice_vsi_delete(vsi: ch->ch_vsi);
8554
8555	/ free the channel /
8556	kfree(objp: ch);
8557	}
8558
8559	/ clear the channel VSI map which is stored in main VSI /
8560	ice_for_each_chnl_tc(i)
8561	vsi->tc_map_vsi[i] = NULL;
8562
8563	/ reset main VSI's all TC information /
8564	vsi->all_enatc = `0`;
8565	vsi->all_numtc = `0`;
8566	}
8567
8568	/**
8569	* ice_rebuild_channels - rebuild channel
8570	* @pf: ptr to PF
8571	*
8572	* Recreate channel VSIs and replay filters
8573	*/
8574	static int ice_rebuild_channels(struct ice_pf *pf)
8575	{
8576	struct device *dev = ice_pf_to_dev(pf);
8577	struct ice_vsi *main_vsi;
8578	bool rem_adv_fltr = true;
8579	struct ice_channel *ch;
8580	struct ice_vsi *vsi;
8581	int tc_idx = `1`;
8582	int i, err;
8583
8584	main_vsi = ice_get_main_vsi(pf);
8585	if (!main_vsi)
8586	return `0`;
8587
8588	if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) \|\|
8589	main_vsi->old_numtc == `1`)
8590	return `0`; / nothing to be done /
8591
8592	/ reconfigure main VSI based on old value of TC and cached values*
8593	* for MQPRIO opts
8594	*/
8595	err = ice_vsi_cfg_tc(vsi: main_vsi, ena_tc: main_vsi->old_ena_tc);
8596	if (err) {
8597	dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
8598	main_vsi->old_ena_tc, main_vsi->vsi_num);
8599	return err;
8600	}
8601
8602	/ rebuild ADQ VSIs /
8603	ice_for_each_vsi(pf, i) {
8604	enum ice_vsi_type type;
8605
8606	vsi = pf->vsi[i];
8607	if (!vsi \|\| vsi->type != ICE_VSI_CHNL)
8608	continue;
8609
8610	type = vsi->type;
8611
8612	/ rebuild ADQ VSI /
8613	err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
8614	if (err) {
8615	dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
8616	ice_vsi_type_str(type), vsi->idx, err);
8617	goto cleanup;
8618	}
8619
8620	/ Re-map HW VSI number, using VSI handle that has been*
8621	* previously validated in ice_replay_vsi() call above
8622	*/
8623	vsi->vsi_num = ice_get_hw_vsi_num(hw: &pf->hw, vsi_handle: vsi->idx);
8624
8625	/ replay filters for the VSI /
8626	err = ice_replay_vsi(hw: &pf->hw, vsi_handle: vsi->idx);
8627	if (err) {
8628	dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
8629	ice_vsi_type_str(type), err, vsi->idx);
8630	rem_adv_fltr = false;
8631	goto cleanup;
8632	}
8633	dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
8634	ice_vsi_type_str(type), vsi->idx);
8635
8636	/ store ADQ VSI at correct TC index in main VSI's*
8637	* map of TC to VSI
8638	*/
8639	main_vsi->tc_map_vsi[tc_idx++] = vsi;
8640	}
8641
8642	/ ADQ VSI(s) has been rebuilt successfully, so setup*
8643	* channel for main VSI's Tx and Rx rings
8644	*/
8645	list_for_each_entry(ch, &main_vsi->ch_list, list) {
8646	struct ice_vsi *ch_vsi;
8647
8648	ch_vsi = ch->ch_vsi;
8649	if (!ch_vsi)
8650	continue;
8651
8652	/ reconfig channel resources /
8653	ice_cfg_chnl_all_res(vsi: main_vsi, ch);
8654
8655	/ replay BW rate limit if it is non-zero /
8656	if (!ch->max_tx_rate && !ch->min_tx_rate)
8657	continue;
8658
8659	err = ice_set_bw_limit(vsi: ch_vsi, max_tx_rate: ch->max_tx_rate,
8660	min_tx_rate: ch->min_tx_rate);
8661	if (err)
8662	dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8663	err, ch->max_tx_rate, ch->min_tx_rate,
8664	ch_vsi->vsi_num);
8665	else
8666	dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8667	ch->max_tx_rate, ch->min_tx_rate,
8668	ch_vsi->vsi_num);
8669	}
8670
8671	/ reconfig RSS for main VSI /
8672	if (main_vsi->ch_rss_size)
8673	ice_vsi_cfg_rss_lut_key(vsi: main_vsi);
8674
8675	return `0`;
8676
8677	cleanup:
8678	ice_remove_q_channels(vsi: main_vsi, rem_fltr: rem_adv_fltr);
8679	return err;
8680	}
8681
8682	/**
8683	* ice_create_q_channels - Add queue channel for the given TCs
8684	* @vsi: VSI to be configured
8685	*
8686	* Configures queue channel mapping to the given TCs
8687	*/
8688	static int ice_create_q_channels(struct ice_vsi *vsi)
8689	{
8690	struct ice_pf *pf = vsi->back;
8691	struct ice_channel *ch;
8692	int ret = `0`, i;
8693
8694	ice_for_each_chnl_tc(i) {
8695	if (!(vsi->all_enatc & BIT(i)))
8696	continue;
8697
8698	ch = kzalloc(size: sizeof(*ch), GFP_KERNEL);
8699	if (!ch) {
8700	ret = -ENOMEM;
8701	goto err_free;
8702	}
8703	INIT_LIST_HEAD(list: &ch->list);
8704	ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
8705	ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
8706	ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
8707	ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
8708	ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
8709
8710	/ convert to Kbits/s /
8711	if (ch->max_tx_rate)
8712	ch->max_tx_rate = div_u64(dividend: ch->max_tx_rate,
8713	ICE_BW_KBPS_DIVISOR);
8714	if (ch->min_tx_rate)
8715	ch->min_tx_rate = div_u64(dividend: ch->min_tx_rate,
8716	ICE_BW_KBPS_DIVISOR);
8717
8718	ret = ice_create_q_channel(vsi, ch);
8719	if (ret) {
8720	dev_err(ice_pf_to_dev(pf),
8721	"failed creating channel TC:%d\n", i);
8722	kfree(objp: ch);
8723	goto err_free;
8724	}
8725	list_add_tail(new: &ch->list, head: &vsi->ch_list);
8726	vsi->tc_map_vsi[i] = ch->ch_vsi;
8727	dev_dbg(ice_pf_to_dev(pf),
8728	"successfully created channel: VSI %pK\n", ch->ch_vsi);
8729	}
8730	return `0`;
8731
8732	err_free:
8733	ice_remove_q_channels(vsi, rem_fltr: false);
8734
8735	return ret;
8736	}
8737
8738	/**
8739	* ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
8740	* @netdev: net device to configure
8741	* @type_data: TC offload data
8742	*/
8743	static int ice_setup_tc_mqprio_qdisc(struct net_device netdev, void* *type_data)
8744	{
8745	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
8746	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
8747	struct ice_vsi *vsi = np->vsi;
8748	struct ice_pf *pf = vsi->back;
8749	u16 mode, ena_tc_qdisc = `0`;
8750	int cur_txq, cur_rxq;
8751	u8 hw = `0`, num_tcf;
8752	struct device *dev;
8753	int ret, i;
8754
8755	dev = ice_pf_to_dev(pf);
8756	num_tcf = mqprio_qopt->qopt.num_tc;
8757	hw = mqprio_qopt->qopt.hw;
8758	mode = mqprio_qopt->mode;
8759	if (!hw) {
8760	clear_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
8761	vsi->ch_rss_size = `0`;
8762	memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8763	goto config_tcf;
8764	}
8765
8766	/ Generate queue region map for number of TCF requested /
8767	for (i = `0`; i < num_tcf; i++)
8768	ena_tc_qdisc \|= BIT(i);
8769
8770	switch (mode) {
8771	case TC_MQPRIO_MODE_CHANNEL:
8772
8773	if (pf->hw.port_info->is_custom_tx_enabled) {
8774	dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
8775	return -EBUSY;
8776	}
8777	ice_tear_down_devlink_rate_tree(pf);
8778
8779	ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
8780	if (ret) {
8781	netdev_err(dev: netdev, format: "failed to validate_mqprio_qopt(), ret %d\n",
8782	ret);
8783	return ret;
8784	}
8785	memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8786	set_bit(nr: ICE_FLAG_TC_MQPRIO, addr: pf->flags);
8787	/ don't assume state of hw_tc_offload during driver load*
8788	* and set the flag for TC flower filter if hw_tc_offload
8789	* already ON
8790	*/
8791	if (vsi->netdev->features & NETIF_F_HW_TC)
8792	set_bit(nr: ICE_FLAG_CLS_FLOWER, addr: pf->flags);
8793	break;
8794	default:
8795	return -EINVAL;
8796	}
8797
8798	config_tcf:
8799
8800	/ Requesting same TCF configuration as already enabled /
8801	if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
8802	mode != TC_MQPRIO_MODE_CHANNEL)
8803	return `0`;
8804
8805	/ Pause VSI queues /
8806	ice_dis_vsi(vsi, locked: true);
8807
8808	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
8809	ice_remove_q_channels(vsi, rem_fltr: true);
8810
8811	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
8812	vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
8813	num_online_cpus());
8814	vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
8815	num_online_cpus());
8816	} else {
8817	/ logic to rebuild VSI, same like ethtool -L /
8818	u16 offset = `0`, qcount_tx = `0`, qcount_rx = `0`;
8819
8820	for (i = `0`; i < num_tcf; i++) {
8821	if (!(ena_tc_qdisc & BIT(i)))
8822	continue;
8823
8824	offset = vsi->mqprio_qopt.qopt.offset[i];
8825	qcount_rx = vsi->mqprio_qopt.qopt.count[i];
8826	qcount_tx = vsi->mqprio_qopt.qopt.count[i];
8827	}
8828	vsi->req_txq = offset + qcount_tx;
8829	vsi->req_rxq = offset + qcount_rx;
8830
8831	/ store away original rss_size info, so that it gets reused*
8832	* form ice_vsi_rebuild during tc-qdisc delete stage - to
8833	* determine, what should be the rss_sizefor main VSI
8834	*/
8835	vsi->orig_rss_size = vsi->rss_size;
8836	}
8837
8838	/ save current values of Tx and Rx queues before calling VSI rebuild*
8839	* for fallback option
8840	*/
8841	cur_txq = vsi->num_txq;
8842	cur_rxq = vsi->num_rxq;
8843
8844	/ proceed with rebuild main VSI using correct number of queues /
8845	ret = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
8846	if (ret) {
8847	/ fallback to current number of queues /
8848	dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
8849	vsi->req_txq = cur_txq;
8850	vsi->req_rxq = cur_rxq;
8851	clear_bit(nr: ICE_RESET_FAILED, addr: pf->state);
8852	if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT)) {
8853	dev_err(dev, "Rebuild of main VSI failed again\n");
8854	return ret;
8855	}
8856	}
8857
8858	vsi->all_numtc = num_tcf;
8859	vsi->all_enatc = ena_tc_qdisc;
8860	ret = ice_vsi_cfg_tc(vsi, ena_tc: ena_tc_qdisc);
8861	if (ret) {
8862	netdev_err(dev: netdev, format: "failed configuring TC for VSI id=%d\n",
8863	vsi->vsi_num);
8864	goto exit;
8865	}
8866
8867	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
8868	u64 max_tx_rate = vsi->mqprio_qopt.max_rate[`0`];
8869	u64 min_tx_rate = vsi->mqprio_qopt.min_rate[`0`];
8870
8871	/ set TC0 rate limit if specified /
8872	if (max_tx_rate \|\| min_tx_rate) {
8873	/ convert to Kbits/s /
8874	if (max_tx_rate)
8875	max_tx_rate = div_u64(dividend: max_tx_rate, ICE_BW_KBPS_DIVISOR);
8876	if (min_tx_rate)
8877	min_tx_rate = div_u64(dividend: min_tx_rate, ICE_BW_KBPS_DIVISOR);
8878
8879	ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
8880	if (!ret) {
8881	dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
8882	max_tx_rate, min_tx_rate, vsi->vsi_num);
8883	} else {
8884	dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
8885	max_tx_rate, min_tx_rate, vsi->vsi_num);
8886	goto exit;
8887	}
8888	}
8889	ret = ice_create_q_channels(vsi);
8890	if (ret) {
8891	netdev_err(dev: netdev, format: "failed configuring queue channels\n");
8892	goto exit;
8893	} else {
8894	netdev_dbg(netdev, "successfully configured channels\n");
8895	}
8896	}
8897
8898	if (vsi->ch_rss_size)
8899	ice_vsi_cfg_rss_lut_key(vsi);
8900
8901	exit:
8902	/ if error, reset the all_numtc and all_enatc /
8903	if (ret) {
8904	vsi->all_numtc = `0`;
8905	vsi->all_enatc = `0`;
8906	}
8907	/ resume VSI /
8908	ice_ena_vsi(vsi, locked: true);
8909
8910	return ret;
8911	}
8912
8913	static LIST_HEAD(ice_block_cb_list);
8914
8915	static int
8916	ice_setup_tc(struct net_device netdev, enum* tc_setup_type type,
8917	void *type_data)
8918	{
8919	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
8920	struct ice_pf *pf = np->vsi->back;
8921	bool locked = false;
8922	int err;
8923
8924	switch (type) {
8925	case TC_SETUP_BLOCK:
8926	return flow_block_cb_setup_simple(f: type_data,
8927	driver_list: &ice_block_cb_list,
8928	cb: ice_setup_tc_block_cb,
8929	cb_ident: np, cb_priv: np, ingress_only: true);
8930	case TC_SETUP_QDISC_MQPRIO:
8931	if (ice_is_eswitch_mode_switchdev(pf)) {
8932	netdev_err(dev: netdev, format: "TC MQPRIO offload not supported, switchdev is enabled\n");
8933	return -EOPNOTSUPP;
8934	}
8935
8936	if (pf->adev) {
8937	mutex_lock(&pf->adev_mutex);
8938	device_lock(dev: &pf->adev->dev);
8939	locked = true;
8940	if (pf->adev->dev.driver) {
8941	netdev_err(dev: netdev, format: "Cannot change qdisc when RDMA is active\n");
8942	err = -EBUSY;
8943	goto adev_unlock;
8944	}
8945	}
8946
8947	/ setup traffic classifier for receive side /
8948	mutex_lock(&pf->tc_mutex);
8949	err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
8950	mutex_unlock(lock: &pf->tc_mutex);
8951
8952	adev_unlock:
8953	if (locked) {
8954	device_unlock(dev: &pf->adev->dev);
8955	mutex_unlock(lock: &pf->adev_mutex);
8956	}
8957	return err;
8958	default:
8959	return -EOPNOTSUPP;
8960	}
8961	return -EOPNOTSUPP;
8962	}
8963
8964	static struct ice_indr_block_priv *
8965	ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
8966	struct net_device *netdev)
8967	{
8968	struct ice_indr_block_priv *cb_priv;
8969
8970	list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
8971	if (!cb_priv->netdev)
8972	return NULL;
8973	if (cb_priv->netdev == netdev)
8974	return cb_priv;
8975	}
8976	return NULL;
8977	}
8978
8979	static int
8980	ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
8981	void *indr_priv)
8982	{
8983	struct ice_indr_block_priv *priv = indr_priv;
8984	struct ice_netdev_priv *np = priv->np;
8985
8986	switch (type) {
8987	case TC_SETUP_CLSFLOWER:
8988	return ice_setup_tc_cls_flower(np, filter_dev: priv->netdev,
8989	cls_flower: (struct flow_cls_offload *)
8990	type_data);
8991	default:
8992	return -EOPNOTSUPP;
8993	}
8994	}
8995
8996	static int
8997	ice_indr_setup_tc_block(struct net_device netdev, struct* Qdisc *sch,
8998	struct ice_netdev_priv *np,
8999	struct flow_block_offload f, void* *data,
9000	void (cleanup)(struct* flow_block_cb *block_cb))
9001	{
9002	struct ice_indr_block_priv *indr_priv;
9003	struct flow_block_cb *block_cb;
9004
9005	if (!ice_is_tunnel_supported(dev: netdev) &&
9006	!(is_vlan_dev(dev: netdev) &&
9007	vlan_dev_real_dev(dev: netdev) == np->vsi->netdev))
9008	return -EOPNOTSUPP;
9009
9010	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
9011	return -EOPNOTSUPP;
9012
9013	switch (f->command) {
9014	case FLOW_BLOCK_BIND:
9015	indr_priv = ice_indr_block_priv_lookup(np, netdev);
9016	if (indr_priv)
9017	return -EEXIST;
9018
9019	indr_priv = kzalloc(size: sizeof(*indr_priv), GFP_KERNEL);
9020	if (!indr_priv)
9021	return -ENOMEM;
9022
9023	indr_priv->netdev = netdev;
9024	indr_priv->np = np;
9025	list_add(new: &indr_priv->list, head: &np->tc_indr_block_priv_list);
9026
9027	block_cb =
9028	flow_indr_block_cb_alloc(cb: ice_indr_setup_block_cb,
9029	cb_ident: indr_priv, cb_priv: indr_priv,
9030	release: ice_rep_indr_tc_block_unbind,
9031	bo: f, dev: netdev, sch, data, indr_cb_priv: np,
9032	cleanup);
9033
9034	if (IS_ERR(ptr: block_cb)) {
9035	list_del(entry: &indr_priv->list);
9036	kfree(objp: indr_priv);
9037	return PTR_ERR(ptr: block_cb);
9038	}
9039	flow_block_cb_add(block_cb, offload: f);
9040	list_add_tail(new: &block_cb->driver_list, head: &ice_block_cb_list);
9041	break;
9042	case FLOW_BLOCK_UNBIND:
9043	indr_priv = ice_indr_block_priv_lookup(np, netdev);
9044	if (!indr_priv)
9045	return -ENOENT;
9046
9047	block_cb = flow_block_cb_lookup(block: f->block,
9048	cb: ice_indr_setup_block_cb,
9049	cb_ident: indr_priv);
9050	if (!block_cb)
9051	return -ENOENT;
9052
9053	flow_indr_block_cb_remove(block_cb, offload: f);
9054
9055	list_del(entry: &block_cb->driver_list);
9056	break;
9057	default:
9058	return -EOPNOTSUPP;
9059	}
9060	return `0`;
9061	}
9062
9063	static int
9064	ice_indr_setup_tc_cb(struct net_device netdev, struct* Qdisc *sch,
9065	void cb_priv, enum* tc_setup_type type, void *type_data,
9066	void *data,
9067	void (cleanup)(struct* flow_block_cb *block_cb))
9068	{
9069	switch (type) {
9070	case TC_SETUP_BLOCK:
9071	return ice_indr_setup_tc_block(netdev, sch, np: cb_priv, f: type_data,
9072	data, cleanup);
9073
9074	default:
9075	return -EOPNOTSUPP;
9076	}
9077	}
9078
9079	/**
9080	* ice_open - Called when a network interface becomes active
9081	* @netdev: network interface device structure
9082	*
9083	* The open entry point is called when a network interface is made
9084	* active by the system (IFF_UP). At this point all resources needed
9085	* for transmit and receive operations are allocated, the interrupt
9086	* handler is registered with the OS, the netdev watchdog is enabled,
9087	* and the stack is notified that the interface is ready.
9088	*
9089	* Returns 0 on success, negative value on failure
9090	*/
9091	int ice_open(struct net_device *netdev)
9092	{
9093	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9094	struct ice_pf *pf = np->vsi->back;
9095
9096	if (ice_is_reset_in_progress(state: pf->state)) {
9097	netdev_err(dev: netdev, format: "can't open net device while reset is in progress");
9098	return -EBUSY;
9099	}
9100
9101	return ice_open_internal(netdev);
9102	}
9103
9104	/**
9105	* ice_open_internal - Called when a network interface becomes active
9106	* @netdev: network interface device structure
9107	*
9108	* Internal ice_open implementation. Should not be used directly except for ice_open and reset
9109	* handling routine
9110	*
9111	* Returns 0 on success, negative value on failure
9112	*/
9113	int ice_open_internal(struct net_device *netdev)
9114	{
9115	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9116	struct ice_vsi *vsi = np->vsi;
9117	struct ice_pf *pf = vsi->back;
9118	struct ice_port_info *pi;
9119	int err;
9120
9121	if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
9122	netdev_err(dev: netdev, format: "driver needs to be unloaded and reloaded\n");
9123	return -EIO;
9124	}
9125
9126	netif_carrier_off(dev: netdev);
9127
9128	pi = vsi->port_info;
9129	err = ice_update_link_info(pi);
9130	if (err) {
9131	netdev_err(dev: netdev, format: "Failed to get link info, error %d\n", err);
9132	return err;
9133	}
9134
9135	ice_check_link_cfg_err(pf, link_cfg_err: pi->phy.link_info.link_cfg_err);
9136
9137	/ Set PHY if there is media, otherwise, turn off PHY /
9138	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9139	clear_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
9140	if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
9141	err = ice_init_phy_user_cfg(pi);
9142	if (err) {
9143	netdev_err(dev: netdev, format: "Failed to initialize PHY settings, error %d\n",
9144	err);
9145	return err;
9146	}
9147	}
9148
9149	err = ice_configure_phy(vsi);
9150	if (err) {
9151	netdev_err(dev: netdev, format: "Failed to set physical link up, error %d\n",
9152	err);
9153	return err;
9154	}
9155	} else {
9156	set_bit(nr: ICE_FLAG_NO_MEDIA, addr: pf->flags);
9157	ice_set_link(vsi, ena: false);
9158	}
9159
9160	err = ice_vsi_open(vsi);
9161	if (err)
9162	netdev_err(dev: netdev, format: "Failed to open VSI 0x%04X on switch 0x%04X\n",
9163	vsi->vsi_num, vsi->vsw->sw_id);
9164
9165	/ Update existing tunnels information /
9166	udp_tunnel_get_rx_info(dev: netdev);
9167
9168	return err;
9169	}
9170
9171	/**
9172	* ice_stop - Disables a network interface
9173	* @netdev: network interface device structure
9174	*
9175	* The stop entry point is called when an interface is de-activated by the OS,
9176	* and the netdevice enters the DOWN state. The hardware is still under the
9177	* driver's control, but the netdev interface is disabled.
9178	*
9179	* Returns success only - not allowed to fail
9180	*/
9181	int ice_stop(struct net_device *netdev)
9182	{
9183	struct ice_netdev_priv *np = netdev_priv(dev: netdev);
9184	struct ice_vsi *vsi = np->vsi;
9185	struct ice_pf *pf = vsi->back;
9186
9187	if (ice_is_reset_in_progress(state: pf->state)) {
9188	netdev_err(dev: netdev, format: "can't stop net device while reset is in progress");
9189	return -EBUSY;
9190	}
9191
9192	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
9193	int link_err = ice_force_phys_link_state(vsi, link_up: false);
9194
9195	if (link_err) {
9196	netdev_err(dev: vsi->netdev, format: "Failed to set physical link down, VSI %d error %d\n",
9197	vsi->vsi_num, link_err);
9198	return -EIO;
9199	}
9200	}
9201
9202	ice_vsi_close(vsi);
9203
9204	return `0`;
9205	}
9206
9207	/**
9208	* ice_features_check - Validate encapsulated packet conforms to limits
9209	* @skb: skb buffer
9210	* @netdev: This port's netdev
9211	* @features: Offload features that the stack believes apply
9212	*/
9213	static netdev_features_t
9214	ice_features_check(struct sk_buff *skb,
9215	struct net_device __always_unused *netdev,
9216	netdev_features_t features)
9217	{
9218	bool gso = skb_is_gso(skb);
9219	size_t len;
9220
9221	/ No point in doing any of this if neither checksum nor GSO are*
9222	* being requested for this frame. We can rule out both by just
9223	* checking for CHECKSUM_PARTIAL
9224	*/
9225	if (skb->ip_summed != CHECKSUM_PARTIAL)
9226	return features;
9227
9228	/ We cannot support GSO if the MSS is going to be less than*
9229	* 64 bytes. If it is then we need to drop support for GSO.
9230	*/
9231	if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
9232	features &= ~NETIF_F_GSO_MASK;
9233
9234	len = skb_network_offset(skb);
9235	if (len > ICE_TXD_MACLEN_MAX \|\| len & `0x1`)
9236	goto out_rm_features;
9237
9238	len = skb_network_header_len(skb);
9239	if (len > ICE_TXD_IPLEN_MAX \|\| len & `0x1`)
9240	goto out_rm_features;
9241
9242	if (skb->encapsulation) {
9243	/ this must work for VXLAN frames AND IPIP/SIT frames, and in*
9244	* the case of IPIP frames, the transport header pointer is
9245	* after the inner header! So check to make sure that this
9246	* is a GRE or UDP_TUNNEL frame before doing that math.
9247	*/
9248	if (gso && (skb_shinfo(skb)->gso_type &
9249	(SKB_GSO_GRE \| SKB_GSO_UDP_TUNNEL))) {
9250	len = skb_inner_network_header(skb) -
9251	skb_transport_header(skb);
9252	if (len > ICE_TXD_L4LEN_MAX \|\| len & `0x1`)
9253	goto out_rm_features;
9254	}
9255
9256	len = skb_inner_network_header_len(skb);
9257	if (len > ICE_TXD_IPLEN_MAX \|\| len & `0x1`)
9258	goto out_rm_features;
9259	}
9260
9261	return features;
9262	out_rm_features:
9263	return features & ~(NETIF_F_CSUM_MASK \| NETIF_F_GSO_MASK);
9264	}
9265
9266	static const struct net_device_ops ice_netdev_safe_mode_ops = {
9267	.ndo_open = ice_open,
9268	.ndo_stop = ice_stop,
9269	.ndo_start_xmit = ice_start_xmit,
9270	.ndo_set_mac_address = ice_set_mac_address,
9271	.ndo_validate_addr = eth_validate_addr,
9272	.ndo_change_mtu = ice_change_mtu,
9273	.ndo_get_stats64 = ice_get_stats64,
9274	.ndo_tx_timeout = ice_tx_timeout,
9275	.ndo_bpf = ice_xdp_safe_mode,
9276	};
9277
9278	static const struct net_device_ops ice_netdev_ops = {
9279	.ndo_open = ice_open,
9280	.ndo_stop = ice_stop,
9281	.ndo_start_xmit = ice_start_xmit,
9282	.ndo_select_queue = ice_select_queue,
9283	.ndo_features_check = ice_features_check,
9284	.ndo_fix_features = ice_fix_features,
9285	.ndo_set_rx_mode = ice_set_rx_mode,
9286	.ndo_set_mac_address = ice_set_mac_address,
9287	.ndo_validate_addr = eth_validate_addr,
9288	.ndo_change_mtu = ice_change_mtu,
9289	.ndo_get_stats64 = ice_get_stats64,
9290	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
9291	.ndo_eth_ioctl = ice_eth_ioctl,
9292	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
9293	.ndo_set_vf_mac = ice_set_vf_mac,
9294	.ndo_get_vf_config = ice_get_vf_cfg,
9295	.ndo_set_vf_trust = ice_set_vf_trust,
9296	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
9297	.ndo_set_vf_link_state = ice_set_vf_link_state,
9298	.ndo_get_vf_stats = ice_get_vf_stats,
9299	.ndo_set_vf_rate = ice_set_vf_bw,
9300	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
9301	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
9302	.ndo_setup_tc = ice_setup_tc,
9303	.ndo_set_features = ice_set_features,
9304	.ndo_bridge_getlink = ice_bridge_getlink,
9305	.ndo_bridge_setlink = ice_bridge_setlink,
9306	.ndo_fdb_add = ice_fdb_add,
9307	.ndo_fdb_del = ice_fdb_del,
9308	#ifdef CONFIG_RFS_ACCEL
9309	.ndo_rx_flow_steer = ice_rx_flow_steer,
9310	#endif
9311	.ndo_tx_timeout = ice_tx_timeout,
9312	.ndo_bpf = ice_xdp,
9313	.ndo_xdp_xmit = ice_xdp_xmit,
9314	.ndo_xsk_wakeup = ice_xsk_wakeup,
9315	};
9316

source code of linux/drivers/net/ethernet/intel/ice/ice_main.c