gve_main.c source code [linux/drivers/net/ethernet/google/gve/gve_main.c]

1	// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2	/ Google virtual Ethernet (gve) driver*
3	*
4	* Copyright (C) 2015-2021 Google, Inc.
5	*/
6
7	#include <linux/bpf.h>
8	#include <linux/cpumask.h>
9	#include <linux/etherdevice.h>
10	#include <linux/filter.h>
11	#include <linux/interrupt.h>
12	#include <linux/module.h>
13	#include <linux/pci.h>
14	#include <linux/sched.h>
15	#include <linux/timer.h>
16	#include <linux/workqueue.h>
17	#include <linux/utsname.h>
18	#include <linux/version.h>
19	#include <net/sch_generic.h>
20	#include <net/xdp_sock_drv.h>
21	#include "gve.h"
22	#include "gve_dqo.h"
23	#include "gve_adminq.h"
24	#include "gve_register.h"
25	#include "gve_utils.h"
26
27	#define GVE_DEFAULT_RX_COPYBREAK (256)
28
29	#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV \| NETIF_MSG_LINK)
30	#define GVE_VERSION "1.0.0"
31	#define GVE_VERSION_PREFIX "GVE-"
32
33	// Minimum amount of time between queue kicks in msec (10 seconds)
34	#define MIN_TX_TIMEOUT_GAP (1000 * 10)
35
36	char gve_driver_name[] = "gve";
37	const char gve_version_str[] = GVE_VERSION;
38	static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
39
40	static int gve_verify_driver_compatibility(struct gve_priv *priv)
41	{
42	int err;
43	struct gve_driver_info *driver_info;
44	dma_addr_t driver_info_bus;
45
46	driver_info = dma_alloc_coherent(dev: &priv->pdev->dev,
47	size: sizeof(struct gve_driver_info),
48	dma_handle: &driver_info_bus, GFP_KERNEL);
49	if (!driver_info)
50	return -ENOMEM;
51
52	driver_info = (struct* gve_driver_info) {
53	.os_type = `1`, / Linux /
54	.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
55	.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
56	.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
57	.driver_capability_flags = {
58	cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
59	cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
60	cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
61	cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
62	},
63	};
64	strscpy(driver_info->os_version_str1, utsname()->release,
65	sizeof(driver_info->os_version_str1));
66	strscpy(driver_info->os_version_str2, utsname()->version,
67	sizeof(driver_info->os_version_str2));
68
69	err = gve_adminq_verify_driver_compatibility(priv,
70	driver_info_len: sizeof(struct gve_driver_info),
71	driver_info_addr: driver_info_bus);
72
73	/ It's ok if the device doesn't support this /
74	if (err == -EOPNOTSUPP)
75	err = `0`;
76
77	dma_free_coherent(dev: &priv->pdev->dev,
78	size: sizeof(struct gve_driver_info),
79	cpu_addr: driver_info, dma_handle: driver_info_bus);
80	return err;
81	}
82
83	static netdev_features_t gve_features_check(struct sk_buff *skb,
84	struct net_device *dev,
85	netdev_features_t features)
86	{
87	struct gve_priv *priv = netdev_priv(dev);
88
89	if (!gve_is_gqi(priv))
90	return gve_features_check_dqo(skb, dev, features);
91
92	return features;
93	}
94
95	static netdev_tx_t gve_start_xmit(struct sk_buff skb, struct* net_device *dev)
96	{
97	struct gve_priv *priv = netdev_priv(dev);
98
99	if (gve_is_gqi(priv))
100	return gve_tx(skb, dev);
101	else
102	return gve_tx_dqo(skb, dev);
103	}
104
105	static void gve_get_stats(struct net_device dev, struct* rtnl_link_stats64 *s)
106	{
107	struct gve_priv *priv = netdev_priv(dev);
108	unsigned int start;
109	u64 packets, bytes;
110	int num_tx_queues;
111	int ring;
112
113	num_tx_queues = gve_num_tx_queues(priv);
114	if (priv->rx) {
115	for (ring = `0`; ring < priv->rx_cfg.num_queues; ring++) {
116	do {
117	start =
118	u64_stats_fetch_begin(syncp: &priv->rx[ring].statss);
119	packets = priv->rx[ring].rpackets;
120	bytes = priv->rx[ring].rbytes;
121	} while (u64_stats_fetch_retry(syncp: &priv->rx[ring].statss,
122	start));
123	s->rx_packets += packets;
124	s->rx_bytes += bytes;
125	}
126	}
127	if (priv->tx) {
128	for (ring = `0`; ring < num_tx_queues; ring++) {
129	do {
130	start =
131	u64_stats_fetch_begin(syncp: &priv->tx[ring].statss);
132	packets = priv->tx[ring].pkt_done;
133	bytes = priv->tx[ring].bytes_done;
134	} while (u64_stats_fetch_retry(syncp: &priv->tx[ring].statss,
135	start));
136	s->tx_packets += packets;
137	s->tx_bytes += bytes;
138	}
139	}
140	}
141
142	static int gve_alloc_counter_array(struct gve_priv *priv)
143	{
144	priv->counter_array =
145	dma_alloc_coherent(dev: &priv->pdev->dev,
146	size: priv->num_event_counters *
147	sizeof(*priv->counter_array),
148	dma_handle: &priv->counter_array_bus, GFP_KERNEL);
149	if (!priv->counter_array)
150	return -ENOMEM;
151
152	return `0`;
153	}
154
155	static void gve_free_counter_array(struct gve_priv *priv)
156	{
157	if (!priv->counter_array)
158	return;
159
160	dma_free_coherent(dev: &priv->pdev->dev,
161	size: priv->num_event_counters *
162	sizeof(*priv->counter_array),
163	cpu_addr: priv->counter_array, dma_handle: priv->counter_array_bus);
164	priv->counter_array = NULL;
165	}
166
167	/ NIC requests to report stats /
168	static void gve_stats_report_task(struct work_struct *work)
169	{
170	struct gve_priv priv = container_of(work, struct* gve_priv,
171	stats_report_task);
172	if (gve_get_do_report_stats(priv)) {
173	gve_handle_report_stats(priv);
174	gve_clear_do_report_stats(priv);
175	}
176	}
177
178	static void gve_stats_report_schedule(struct gve_priv *priv)
179	{
180	if (!gve_get_probe_in_progress(priv) &&
181	!gve_get_reset_in_progress(priv)) {
182	gve_set_do_report_stats(priv);
183	queue_work(wq: priv->gve_wq, work: &priv->stats_report_task);
184	}
185	}
186
187	static void gve_stats_report_timer(struct timer_list *t)
188	{
189	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
190
191	mod_timer(timer: &priv->stats_report_timer,
192	expires: round_jiffies(j: jiffies +
193	msecs_to_jiffies(m: priv->stats_report_timer_period)));
194	gve_stats_report_schedule(priv);
195	}
196
197	static int gve_alloc_stats_report(struct gve_priv *priv)
198	{
199	int tx_stats_num, rx_stats_num;
200
201	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
202	gve_num_tx_queues(priv);
203	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
204	priv->rx_cfg.num_queues;
205	priv->stats_report_len = struct_size(priv->stats_report, stats,
206	size_add(tx_stats_num, rx_stats_num));
207	priv->stats_report =
208	dma_alloc_coherent(dev: &priv->pdev->dev, size: priv->stats_report_len,
209	dma_handle: &priv->stats_report_bus, GFP_KERNEL);
210	if (!priv->stats_report)
211	return -ENOMEM;
212	/ Set up timer for the report-stats task /
213	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, `0`);
214	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
215	return `0`;
216	}
217
218	static void gve_free_stats_report(struct gve_priv *priv)
219	{
220	if (!priv->stats_report)
221	return;
222
223	del_timer_sync(timer: &priv->stats_report_timer);
224	dma_free_coherent(dev: &priv->pdev->dev, size: priv->stats_report_len,
225	cpu_addr: priv->stats_report, dma_handle: priv->stats_report_bus);
226	priv->stats_report = NULL;
227	}
228
229	static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
230	{
231	struct gve_priv *priv = arg;
232
233	queue_work(wq: priv->gve_wq, work: &priv->service_task);
234	return IRQ_HANDLED;
235	}
236
237	static irqreturn_t gve_intr(int irq, void *arg)
238	{
239	struct gve_notify_block *block = arg;
240	struct gve_priv *priv = block->priv;
241
242	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
243	napi_schedule_irqoff(n: &block->napi);
244	return IRQ_HANDLED;
245	}
246
247	static irqreturn_t gve_intr_dqo(int irq, void *arg)
248	{
249	struct gve_notify_block *block = arg;
250
251	/ Interrupts are automatically masked /
252	napi_schedule_irqoff(n: &block->napi);
253	return IRQ_HANDLED;
254	}
255
256	int gve_napi_poll(struct napi_struct napi, int* budget)
257	{
258	struct gve_notify_block *block;
259	__be32 __iomem *irq_doorbell;
260	bool reschedule = false;
261	struct gve_priv *priv;
262	int work_done = `0`;
263
264	block = container_of(napi, struct gve_notify_block, napi);
265	priv = block->priv;
266
267	if (block->tx) {
268	if (block->tx->q_num < priv->tx_cfg.num_queues)
269	reschedule \|= gve_tx_poll(block, budget);
270	else if (budget)
271	reschedule \|= gve_xdp_poll(block, budget);
272	}
273
274	if (!budget)
275	return `0`;
276
277	if (block->rx) {
278	work_done = gve_rx_poll(block, budget);
279	reschedule \|= work_done == budget;
280	}
281
282	if (reschedule)
283	return budget;
284
285	/ Complete processing - don't unmask irq if busy polling is enabled /
286	if (likely(napi_complete_done(napi, work_done))) {
287	irq_doorbell = gve_irq_doorbell(priv, block);
288	iowrite32be(GVE_IRQ_ACK \| GVE_IRQ_EVENT, irq_doorbell);
289
290	/ Ensure IRQ ACK is visible before we check pending work.*
291	* If queue had issued updates, it would be truly visible.
292	*/
293	mb();
294
295	if (block->tx)
296	reschedule \|= gve_tx_clean_pending(priv, tx: block->tx);
297	if (block->rx)
298	reschedule \|= gve_rx_work_pending(rx: block->rx);
299
300	if (reschedule && napi_schedule(n: napi))
301	iowrite32be(GVE_IRQ_MASK, irq_doorbell);
302	}
303	return work_done;
304	}
305
306	int gve_napi_poll_dqo(struct napi_struct napi, int* budget)
307	{
308	struct gve_notify_block *block =
309	container_of(napi, struct gve_notify_block, napi);
310	struct gve_priv *priv = block->priv;
311	bool reschedule = false;
312	int work_done = `0`;
313
314	if (block->tx)
315	reschedule \|= gve_tx_poll_dqo(block, /do_clean=/true);
316
317	if (!budget)
318	return `0`;
319
320	if (block->rx) {
321	work_done = gve_rx_poll_dqo(block, budget);
322	reschedule \|= work_done == budget;
323	}
324
325	if (reschedule)
326	return budget;
327
328	if (likely(napi_complete_done(napi, work_done))) {
329	/ Enable interrupts again.*
330	*
331	* We don't need to repoll afterwards because HW supports the
332	* PCI MSI-X PBA feature.
333	*
334	* Another interrupt would be triggered if a new event came in
335	* since the last one.
336	*/
337	gve_write_irq_doorbell_dqo(priv, block,
338	GVE_ITR_NO_UPDATE_DQO \| GVE_ITR_ENABLE_BIT_DQO);
339	}
340
341	return work_done;
342	}
343
344	static int gve_alloc_notify_blocks(struct gve_priv *priv)
345	{
346	int num_vecs_requested = priv->num_ntfy_blks + `1`;
347	unsigned int active_cpus;
348	int vecs_enabled;
349	int i, j;
350	int err;
351
352	priv->msix_vectors = kvcalloc(n: num_vecs_requested,
353	size: sizeof(*priv->msix_vectors), GFP_KERNEL);
354	if (!priv->msix_vectors)
355	return -ENOMEM;
356	for (i = `0`; i < num_vecs_requested; i++)
357	priv->msix_vectors[i].entry = i;
358	vecs_enabled = pci_enable_msix_range(dev: priv->pdev, entries: priv->msix_vectors,
359	GVE_MIN_MSIX, maxvec: num_vecs_requested);
360	if (vecs_enabled < `0`) {
361	dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
362	GVE_MIN_MSIX, vecs_enabled);
363	err = vecs_enabled;
364	goto abort_with_msix_vectors;
365	}
366	if (vecs_enabled != num_vecs_requested) {
367	int new_num_ntfy_blks = (vecs_enabled - `1`) & ~`0x1`;
368	int vecs_per_type = new_num_ntfy_blks / `2`;
369	int vecs_left = new_num_ntfy_blks % `2`;
370
371	priv->num_ntfy_blks = new_num_ntfy_blks;
372	priv->mgmt_msix_idx = priv->num_ntfy_blks;
373	priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
374	vecs_per_type);
375	priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
376	vecs_per_type + vecs_left);
377	dev_err(&priv->pdev->dev,
378	"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
379	vecs_enabled, priv->tx_cfg.max_queues,
380	priv->rx_cfg.max_queues);
381	if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
382	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
383	if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
384	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
385	}
386	/ Half the notification blocks go to TX and half to RX /
387	active_cpus = min_t(int, priv->num_ntfy_blks / `2`, num_online_cpus());
388
389	/ Setup Management Vector - the last vector /
390	snprintf(buf: priv->mgmt_msix_name, size: sizeof(priv->mgmt_msix_name), fmt: "gve-mgmnt@pci:%s",
391	pci_name(pdev: priv->pdev));
392	err = request_irq(irq: priv->msix_vectors[priv->mgmt_msix_idx].vector,
393	handler: gve_mgmnt_intr, flags: `0`, name: priv->mgmt_msix_name, dev: priv);
394	if (err) {
395	dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
396	goto abort_with_msix_enabled;
397	}
398	priv->irq_db_indices =
399	dma_alloc_coherent(dev: &priv->pdev->dev,
400	size: priv->num_ntfy_blks *
401	sizeof(*priv->irq_db_indices),
402	dma_handle: &priv->irq_db_indices_bus, GFP_KERNEL);
403	if (!priv->irq_db_indices) {
404	err = -ENOMEM;
405	goto abort_with_mgmt_vector;
406	}
407
408	priv->ntfy_blocks = kvzalloc(size: priv->num_ntfy_blks *
409	sizeof(*priv->ntfy_blocks), GFP_KERNEL);
410	if (!priv->ntfy_blocks) {
411	err = -ENOMEM;
412	goto abort_with_irq_db_indices;
413	}
414
415	/ Setup the other blocks - the first n-1 vectors /
416	for (i = `0`; i < priv->num_ntfy_blks; i++) {
417	struct gve_notify_block *block = &priv->ntfy_blocks[i];
418	int msix_idx = i;
419
420	snprintf(buf: block->name, size: sizeof(block->name), fmt: "gve-ntfy-blk%d@pci:%s",
421	i, pci_name(pdev: priv->pdev));
422	block->priv = priv;
423	err = request_irq(irq: priv->msix_vectors[msix_idx].vector,
424	handler: gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
425	flags: `0`, name: block->name, dev: block);
426	if (err) {
427	dev_err(&priv->pdev->dev,
428	"Failed to receive msix vector %d\n", i);
429	goto abort_with_some_ntfy_blocks;
430	}
431	irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
432	m: get_cpu_mask(cpu: i % active_cpus));
433	block->irq_db_index = &priv->irq_db_indices[i].index;
434	}
435	return `0`;
436	abort_with_some_ntfy_blocks:
437	for (j = `0`; j < i; j++) {
438	struct gve_notify_block *block = &priv->ntfy_blocks[j];
439	int msix_idx = j;
440
441	irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
442	NULL);
443	free_irq(priv->msix_vectors[msix_idx].vector, block);
444	}
445	kvfree(addr: priv->ntfy_blocks);
446	priv->ntfy_blocks = NULL;
447	abort_with_irq_db_indices:
448	dma_free_coherent(dev: &priv->pdev->dev, size: priv->num_ntfy_blks *
449	sizeof(*priv->irq_db_indices),
450	cpu_addr: priv->irq_db_indices, dma_handle: priv->irq_db_indices_bus);
451	priv->irq_db_indices = NULL;
452	abort_with_mgmt_vector:
453	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
454	abort_with_msix_enabled:
455	pci_disable_msix(dev: priv->pdev);
456	abort_with_msix_vectors:
457	kvfree(addr: priv->msix_vectors);
458	priv->msix_vectors = NULL;
459	return err;
460	}
461
462	static void gve_free_notify_blocks(struct gve_priv *priv)
463	{
464	int i;
465
466	if (!priv->msix_vectors)
467	return;
468
469	/ Free the irqs /
470	for (i = `0`; i < priv->num_ntfy_blks; i++) {
471	struct gve_notify_block *block = &priv->ntfy_blocks[i];
472	int msix_idx = i;
473
474	irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
475	NULL);
476	free_irq(priv->msix_vectors[msix_idx].vector, block);
477	}
478	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
479	kvfree(addr: priv->ntfy_blocks);
480	priv->ntfy_blocks = NULL;
481	dma_free_coherent(dev: &priv->pdev->dev, size: priv->num_ntfy_blks *
482	sizeof(*priv->irq_db_indices),
483	cpu_addr: priv->irq_db_indices, dma_handle: priv->irq_db_indices_bus);
484	priv->irq_db_indices = NULL;
485	pci_disable_msix(dev: priv->pdev);
486	kvfree(addr: priv->msix_vectors);
487	priv->msix_vectors = NULL;
488	}
489
490	static int gve_setup_device_resources(struct gve_priv *priv)
491	{
492	int err;
493
494	err = gve_alloc_counter_array(priv);
495	if (err)
496	return err;
497	err = gve_alloc_notify_blocks(priv);
498	if (err)
499	goto abort_with_counter;
500	err = gve_alloc_stats_report(priv);
501	if (err)
502	goto abort_with_ntfy_blocks;
503	err = gve_adminq_configure_device_resources(priv,
504	counter_array_bus_addr: priv->counter_array_bus,
505	num_counters: priv->num_event_counters,
506	db_array_bus_addr: priv->irq_db_indices_bus,
507	num_ntfy_blks: priv->num_ntfy_blks);
508	if (unlikely(err)) {
509	dev_err(&priv->pdev->dev,
510	"could not setup device_resources: err=%d\n", err);
511	err = -ENXIO;
512	goto abort_with_stats_report;
513	}
514
515	if (!gve_is_gqi(priv)) {
516	priv->ptype_lut_dqo = kvzalloc(size: sizeof(*priv->ptype_lut_dqo),
517	GFP_KERNEL);
518	if (!priv->ptype_lut_dqo) {
519	err = -ENOMEM;
520	goto abort_with_stats_report;
521	}
522	err = gve_adminq_get_ptype_map_dqo(priv, ptype_lut: priv->ptype_lut_dqo);
523	if (err) {
524	dev_err(&priv->pdev->dev,
525	"Failed to get ptype map: err=%d\n", err);
526	goto abort_with_ptype_lut;
527	}
528	}
529
530	err = gve_adminq_report_stats(priv, stats_report_len: priv->stats_report_len,
531	stats_report_addr: priv->stats_report_bus,
532	GVE_STATS_REPORT_TIMER_PERIOD);
533	if (err)
534	dev_err(&priv->pdev->dev,
535	"Failed to report stats: err=%d\n", err);
536	gve_set_device_resources_ok(priv);
537	return `0`;
538
539	abort_with_ptype_lut:
540	kvfree(addr: priv->ptype_lut_dqo);
541	priv->ptype_lut_dqo = NULL;
542	abort_with_stats_report:
543	gve_free_stats_report(priv);
544	abort_with_ntfy_blocks:
545	gve_free_notify_blocks(priv);
546	abort_with_counter:
547	gve_free_counter_array(priv);
548
549	return err;
550	}
551
552	static void gve_trigger_reset(struct gve_priv *priv);
553
554	static void gve_teardown_device_resources(struct gve_priv *priv)
555	{
556	int err;
557
558	/ Tell device its resources are being freed /
559	if (gve_get_device_resources_ok(priv)) {
560	/ detach the stats report /
561	err = gve_adminq_report_stats(priv, stats_report_len: `0`, stats_report_addr: `0x0`, GVE_STATS_REPORT_TIMER_PERIOD);
562	if (err) {
563	dev_err(&priv->pdev->dev,
564	"Failed to detach stats report: err=%d\n", err);
565	gve_trigger_reset(priv);
566	}
567	err = gve_adminq_deconfigure_device_resources(priv);
568	if (err) {
569	dev_err(&priv->pdev->dev,
570	"Could not deconfigure device resources: err=%d\n",
571	err);
572	gve_trigger_reset(priv);
573	}
574	}
575
576	kvfree(addr: priv->ptype_lut_dqo);
577	priv->ptype_lut_dqo = NULL;
578
579	gve_free_counter_array(priv);
580	gve_free_notify_blocks(priv);
581	gve_free_stats_report(priv);
582	gve_clear_device_resources_ok(priv);
583	}
584
585	static int gve_unregister_qpl(struct gve_priv *priv, u32 i)
586	{
587	int err;
588
589	err = gve_adminq_unregister_page_list(priv, page_list_id: priv->qpls[i].id);
590	if (err) {
591	netif_err(priv, drv, priv->dev,
592	"Failed to unregister queue page list %d\n",
593	priv->qpls[i].id);
594	return err;
595	}
596
597	priv->num_registered_pages -= priv->qpls[i].num_entries;
598	return `0`;
599	}
600
601	static int gve_register_qpl(struct gve_priv *priv, u32 i)
602	{
603	int num_rx_qpls;
604	int pages;
605	int err;
606
607	/ Rx QPLs succeed Tx QPLs in the priv->qpls array. /
608	num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
609	if (i >= gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg) + num_rx_qpls) {
610	netif_err(priv, drv, priv->dev,
611	"Cannot register nonexisting QPL at index %d\n", i);
612	return -EINVAL;
613	}
614
615	pages = priv->qpls[i].num_entries;
616
617	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
618	netif_err(priv, drv, priv->dev,
619	"Reached max number of registered pages %llu > %llu\n",
620	pages + priv->num_registered_pages,
621	priv->max_registered_pages);
622	return -EINVAL;
623	}
624
625	err = gve_adminq_register_page_list(priv, qpl: &priv->qpls[i]);
626	if (err) {
627	netif_err(priv, drv, priv->dev,
628	"failed to register queue page list %d\n",
629	priv->qpls[i].id);
630	/ This failure will trigger a reset - no need to clean*
631	* up
632	*/
633	return err;
634	}
635
636	priv->num_registered_pages += pages;
637	return `0`;
638	}
639
640	static int gve_register_xdp_qpls(struct gve_priv *priv)
641	{
642	int start_id;
643	int err;
644	int i;
645
646	start_id = gve_xdp_tx_start_queue_id(priv);
647	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
648	err = gve_register_qpl(priv, i);
649	/ This failure will trigger a reset - no need to clean up /
650	if (err)
651	return err;
652	}
653	return `0`;
654	}
655
656	static int gve_register_qpls(struct gve_priv *priv)
657	{
658	int num_tx_qpls, num_rx_qpls;
659	int start_id;
660	int err;
661	int i;
662
663	num_tx_qpls = gve_num_tx_qpls(tx_cfg: &priv->tx_cfg, num_xdp_queues: gve_num_xdp_qpls(priv),
664	is_qpl: gve_is_qpl(priv));
665	num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
666
667	for (i = `0`; i < num_tx_qpls; i++) {
668	err = gve_register_qpl(priv, i);
669	if (err)
670	return err;
671	}
672
673	/ there might be a gap between the tx and rx qpl ids /
674	start_id = gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg);
675	for (i = `0`; i < num_rx_qpls; i++) {
676	err = gve_register_qpl(priv, i: start_id + i);
677	if (err)
678	return err;
679	}
680
681	return `0`;
682	}
683
684	static int gve_unregister_xdp_qpls(struct gve_priv *priv)
685	{
686	int start_id;
687	int err;
688	int i;
689
690	start_id = gve_xdp_tx_start_queue_id(priv);
691	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
692	err = gve_unregister_qpl(priv, i);
693	/ This failure will trigger a reset - no need to clean /
694	if (err)
695	return err;
696	}
697	return `0`;
698	}
699
700	static int gve_unregister_qpls(struct gve_priv *priv)
701	{
702	int num_tx_qpls, num_rx_qpls;
703	int start_id;
704	int err;
705	int i;
706
707	num_tx_qpls = gve_num_tx_qpls(tx_cfg: &priv->tx_cfg, num_xdp_queues: gve_num_xdp_qpls(priv),
708	is_qpl: gve_is_qpl(priv));
709	num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
710
711	for (i = `0`; i < num_tx_qpls; i++) {
712	err = gve_unregister_qpl(priv, i);
713	/ This failure will trigger a reset - no need to clean /
714	if (err)
715	return err;
716	}
717
718	start_id = gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg);
719	for (i = `0`; i < num_rx_qpls; i++) {
720	err = gve_unregister_qpl(priv, i: start_id + i);
721	/ This failure will trigger a reset - no need to clean /
722	if (err)
723	return err;
724	}
725	return `0`;
726	}
727
728	static int gve_create_xdp_rings(struct gve_priv *priv)
729	{
730	int err;
731
732	err = gve_adminq_create_tx_queues(priv,
733	start_id: gve_xdp_tx_start_queue_id(priv),
734	num_queues: priv->num_xdp_queues);
735	if (err) {
736	netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
737	priv->num_xdp_queues);
738	/ This failure will trigger a reset - no need to clean*
739	* up
740	*/
741	return err;
742	}
743	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
744	priv->num_xdp_queues);
745
746	return `0`;
747	}
748
749	static int gve_create_rings(struct gve_priv *priv)
750	{
751	int num_tx_queues = gve_num_tx_queues(priv);
752	int err;
753	int i;
754
755	err = gve_adminq_create_tx_queues(priv, start_id: `0`, num_queues: num_tx_queues);
756	if (err) {
757	netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
758	num_tx_queues);
759	/ This failure will trigger a reset - no need to clean*
760	* up
761	*/
762	return err;
763	}
764	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
765	num_tx_queues);
766
767	err = gve_adminq_create_rx_queues(priv, num_queues: priv->rx_cfg.num_queues);
768	if (err) {
769	netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
770	priv->rx_cfg.num_queues);
771	/ This failure will trigger a reset - no need to clean*
772	* up
773	*/
774	return err;
775	}
776	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
777	priv->rx_cfg.num_queues);
778
779	if (gve_is_gqi(priv)) {
780	/ Rx data ring has been prefilled with packet buffers at queue*
781	* allocation time.
782	*
783	* Write the doorbell to provide descriptor slots and packet
784	* buffers to the NIC.
785	*/
786	for (i = `0`; i < priv->rx_cfg.num_queues; i++)
787	gve_rx_write_doorbell(priv, rx: &priv->rx[i]);
788	} else {
789	for (i = `0`; i < priv->rx_cfg.num_queues; i++) {
790	/ Post buffers and ring doorbell. /
791	gve_rx_post_buffers_dqo(rx: &priv->rx[i]);
792	}
793	}
794
795	return `0`;
796	}
797
798	static void init_xdp_sync_stats(struct gve_priv *priv)
799	{
800	int start_id = gve_xdp_tx_start_queue_id(priv);
801	int i;
802
803	/ Init stats /
804	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
805	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: i);
806
807	u64_stats_init(syncp: &priv->tx[i].statss);
808	priv->tx[i].ntfy_id = ntfy_idx;
809	}
810	}
811
812	static void gve_init_sync_stats(struct gve_priv *priv)
813	{
814	int i;
815
816	for (i = `0`; i < priv->tx_cfg.num_queues; i++)
817	u64_stats_init(syncp: &priv->tx[i].statss);
818
819	/ Init stats for XDP TX queues /
820	init_xdp_sync_stats(priv);
821
822	for (i = `0`; i < priv->rx_cfg.num_queues; i++)
823	u64_stats_init(syncp: &priv->rx[i].statss);
824	}
825
826	static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
827	struct gve_tx_alloc_rings_cfg *cfg)
828	{
829	cfg->qcfg = &priv->tx_cfg;
830	cfg->raw_addressing = !gve_is_qpl(priv);
831	cfg->qpls = priv->qpls;
832	cfg->qpl_cfg = &priv->qpl_cfg;
833	cfg->ring_size = priv->tx_desc_cnt;
834	cfg->start_idx = `0`;
835	cfg->num_rings = gve_num_tx_queues(priv);
836	cfg->tx = priv->tx;
837	}
838
839	static void gve_tx_stop_rings(struct gve_priv priv, int* start_id, int num_rings)
840	{
841	int i;
842
843	if (!priv->tx)
844	return;
845
846	for (i = start_id; i < start_id + num_rings; i++) {
847	if (gve_is_gqi(priv))
848	gve_tx_stop_ring_gqi(priv, idx: i);
849	else
850	gve_tx_stop_ring_dqo(priv, idx: i);
851	}
852	}
853
854	static void gve_tx_start_rings(struct gve_priv priv, int* start_id,
855	int num_rings)
856	{
857	int i;
858
859	for (i = start_id; i < start_id + num_rings; i++) {
860	if (gve_is_gqi(priv))
861	gve_tx_start_ring_gqi(priv, idx: i);
862	else
863	gve_tx_start_ring_dqo(priv, idx: i);
864	}
865	}
866
867	static int gve_alloc_xdp_rings(struct gve_priv *priv)
868	{
869	struct gve_tx_alloc_rings_cfg cfg = {`0`};
870	int err = `0`;
871
872	if (!priv->num_xdp_queues)
873	return `0`;
874
875	gve_tx_get_curr_alloc_cfg(priv, cfg: &cfg);
876	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
877	cfg.num_rings = priv->num_xdp_queues;
878
879	err = gve_tx_alloc_rings_gqi(priv, cfg: &cfg);
880	if (err)
881	return err;
882
883	gve_tx_start_rings(priv, start_id: cfg.start_idx, num_rings: cfg.num_rings);
884	init_xdp_sync_stats(priv);
885
886	return `0`;
887	}
888
889	static int gve_alloc_rings(struct gve_priv *priv,
890	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
891	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
892	{
893	int err;
894
895	if (gve_is_gqi(priv))
896	err = gve_tx_alloc_rings_gqi(priv, cfg: tx_alloc_cfg);
897	else
898	err = gve_tx_alloc_rings_dqo(priv, cfg: tx_alloc_cfg);
899	if (err)
900	return err;
901
902	if (gve_is_gqi(priv))
903	err = gve_rx_alloc_rings_gqi(priv, cfg: rx_alloc_cfg);
904	else
905	err = gve_rx_alloc_rings_dqo(priv, cfg: rx_alloc_cfg);
906	if (err)
907	goto free_tx;
908
909	return `0`;
910
911	free_tx:
912	if (gve_is_gqi(priv))
913	gve_tx_free_rings_gqi(priv, cfg: tx_alloc_cfg);
914	else
915	gve_tx_free_rings_dqo(priv, cfg: tx_alloc_cfg);
916	return err;
917	}
918
919	static int gve_destroy_xdp_rings(struct gve_priv *priv)
920	{
921	int start_id;
922	int err;
923
924	start_id = gve_xdp_tx_start_queue_id(priv);
925	err = gve_adminq_destroy_tx_queues(priv,
926	start_id,
927	num_queues: priv->num_xdp_queues);
928	if (err) {
929	netif_err(priv, drv, priv->dev,
930	"failed to destroy XDP queues\n");
931	/ This failure will trigger a reset - no need to clean up /
932	return err;
933	}
934	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
935
936	return `0`;
937	}
938
939	static int gve_destroy_rings(struct gve_priv *priv)
940	{
941	int num_tx_queues = gve_num_tx_queues(priv);
942	int err;
943
944	err = gve_adminq_destroy_tx_queues(priv, start_id: `0`, num_queues: num_tx_queues);
945	if (err) {
946	netif_err(priv, drv, priv->dev,
947	"failed to destroy tx queues\n");
948	/ This failure will trigger a reset - no need to clean up /
949	return err;
950	}
951	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
952	err = gve_adminq_destroy_rx_queues(priv, queue_id: priv->rx_cfg.num_queues);
953	if (err) {
954	netif_err(priv, drv, priv->dev,
955	"failed to destroy rx queues\n");
956	/ This failure will trigger a reset - no need to clean up /
957	return err;
958	}
959	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
960	return `0`;
961	}
962
963	static void gve_free_xdp_rings(struct gve_priv *priv)
964	{
965	struct gve_tx_alloc_rings_cfg cfg = {`0`};
966
967	gve_tx_get_curr_alloc_cfg(priv, cfg: &cfg);
968	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
969	cfg.num_rings = priv->num_xdp_queues;
970
971	if (priv->tx) {
972	gve_tx_stop_rings(priv, start_id: cfg.start_idx, num_rings: cfg.num_rings);
973	gve_tx_free_rings_gqi(priv, cfg: &cfg);
974	}
975	}
976
977	static void gve_free_rings(struct gve_priv *priv,
978	struct gve_tx_alloc_rings_cfg *tx_cfg,
979	struct gve_rx_alloc_rings_cfg *rx_cfg)
980	{
981	if (gve_is_gqi(priv)) {
982	gve_tx_free_rings_gqi(priv, cfg: tx_cfg);
983	gve_rx_free_rings_gqi(priv, cfg: rx_cfg);
984	} else {
985	gve_tx_free_rings_dqo(priv, cfg: tx_cfg);
986	gve_rx_free_rings_dqo(priv, cfg: rx_cfg);
987	}
988	}
989
990	int gve_alloc_page(struct gve_priv priv, struct* device *dev,
991	struct page *page, dma_addr_t dma,
992	enum dma_data_direction dir, gfp_t gfp_flags)
993	{
994	*page = alloc_page(gfp_flags);
995	if (!*page) {
996	priv->page_alloc_fail++;
997	return -ENOMEM;
998	}
999	dma = dma_map_page(dev, page, `0`, PAGE_SIZE, dir);
1000	if (dma_mapping_error(dev, dma_addr: *dma)) {
1001	priv->dma_mapping_error++;
1002	put_page(page: *page);
1003	return -ENOMEM;
1004	}
1005	return `0`;
1006	}
1007
1008	static int gve_alloc_queue_page_list(struct gve_priv *priv,
1009	struct gve_queue_page_list *qpl,
1010	u32 id, int pages)
1011	{
1012	int err;
1013	int i;
1014
1015	qpl->id = id;
1016	qpl->num_entries = `0`;
1017	qpl->pages = kvcalloc(n: pages, size: sizeof(*qpl->pages), GFP_KERNEL);
1018	/ caller handles clean up /
1019	if (!qpl->pages)
1020	return -ENOMEM;
1021	qpl->page_buses = kvcalloc(n: pages, size: sizeof(*qpl->page_buses), GFP_KERNEL);
1022	/ caller handles clean up /
1023	if (!qpl->page_buses)
1024	return -ENOMEM;
1025
1026	for (i = `0`; i < pages; i++) {
1027	err = gve_alloc_page(priv, dev: &priv->pdev->dev, page: &qpl->pages[i],
1028	dma: &qpl->page_buses[i],
1029	dir: gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1030	/ caller handles clean up /
1031	if (err)
1032	return -ENOMEM;
1033	qpl->num_entries++;
1034	}
1035
1036	return `0`;
1037	}
1038
1039	void gve_free_page(struct device dev, struct* page *page, dma_addr_t dma,
1040	enum dma_data_direction dir)
1041	{
1042	if (!dma_mapping_error(dev, dma_addr: dma))
1043	dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1044	if (page)
1045	put_page(page);
1046	}
1047
1048	static void gve_free_queue_page_list(struct gve_priv *priv,
1049	struct gve_queue_page_list *qpl,
1050	int id)
1051	{
1052	int i;
1053
1054	if (!qpl->pages)
1055	return;
1056	if (!qpl->page_buses)
1057	goto free_pages;
1058
1059	for (i = `0`; i < qpl->num_entries; i++)
1060	gve_free_page(dev: &priv->pdev->dev, page: qpl->pages[i],
1061	dma: qpl->page_buses[i], dir: gve_qpl_dma_dir(priv, id));
1062
1063	kvfree(addr: qpl->page_buses);
1064	qpl->page_buses = NULL;
1065	free_pages:
1066	kvfree(addr: qpl->pages);
1067	qpl->pages = NULL;
1068	}
1069
1070	static void gve_free_n_qpls(struct gve_priv *priv,
1071	struct gve_queue_page_list *qpls,
1072	int start_id,
1073	int num_qpls)
1074	{
1075	int i;
1076
1077	for (i = start_id; i < start_id + num_qpls; i++)
1078	gve_free_queue_page_list(priv, qpl: &qpls[i], id: i);
1079	}
1080
1081	static int gve_alloc_n_qpls(struct gve_priv *priv,
1082	struct gve_queue_page_list *qpls,
1083	int page_count,
1084	int start_id,
1085	int num_qpls)
1086	{
1087	int err;
1088	int i;
1089
1090	for (i = start_id; i < start_id + num_qpls; i++) {
1091	err = gve_alloc_queue_page_list(priv, qpl: &qpls[i], id: i, pages: page_count);
1092	if (err)
1093	goto free_qpls;
1094	}
1095
1096	return `0`;
1097
1098	free_qpls:
1099	/ Must include the failing QPL too for gve_alloc_queue_page_list fails*
1100	* without cleaning up.
1101	*/
1102	gve_free_n_qpls(priv, qpls, start_id, num_qpls: i - start_id + `1`);
1103	return err;
1104	}
1105
1106	static int gve_alloc_qpls(struct gve_priv *priv,
1107	struct gve_qpls_alloc_cfg *cfg)
1108	{
1109	int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1110	int rx_start_id, tx_num_qpls, rx_num_qpls;
1111	struct gve_queue_page_list *qpls;
1112	int page_count;
1113	int err;
1114
1115	if (cfg->raw_addressing)
1116	return `0`;
1117
1118	qpls = kvcalloc(n: max_queues, size: sizeof(*qpls), GFP_KERNEL);
1119	if (!qpls)
1120	return -ENOMEM;
1121
1122	cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) *
1123	sizeof(unsigned long) * BITS_PER_BYTE;
1124	cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1125	size: sizeof(unsigned long), GFP_KERNEL);
1126	if (!cfg->qpl_cfg->qpl_id_map) {
1127	err = -ENOMEM;
1128	goto free_qpl_array;
1129	}
1130
1131	/ Allocate TX QPLs /
1132	page_count = priv->tx_pages_per_qpl;
1133	tx_num_qpls = gve_num_tx_qpls(tx_cfg: cfg->tx_cfg, num_xdp_queues: cfg->num_xdp_queues,
1134	is_qpl: gve_is_qpl(priv));
1135	err = gve_alloc_n_qpls(priv, qpls, page_count, start_id: `0`, num_qpls: tx_num_qpls);
1136	if (err)
1137	goto free_qpl_map;
1138
1139	/ Allocate RX QPLs /
1140	rx_start_id = gve_rx_start_qpl_id(tx_cfg: cfg->tx_cfg);
1141	/ For GQI_QPL number of pages allocated have 1:1 relationship with*
1142	* number of descriptors. For DQO, number of pages required are
1143	* more than descriptors (because of out of order completions).
1144	*/
1145	page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1146	rx_num_qpls = gve_num_rx_qpls(rx_cfg: cfg->rx_cfg, is_qpl: gve_is_qpl(priv));
1147	err = gve_alloc_n_qpls(priv, qpls, page_count, start_id: rx_start_id, num_qpls: rx_num_qpls);
1148	if (err)
1149	goto free_tx_qpls;
1150
1151	cfg->qpls = qpls;
1152	return `0`;
1153
1154	free_tx_qpls:
1155	gve_free_n_qpls(priv, qpls, start_id: `0`, num_qpls: tx_num_qpls);
1156	free_qpl_map:
1157	kvfree(addr: cfg->qpl_cfg->qpl_id_map);
1158	cfg->qpl_cfg->qpl_id_map = NULL;
1159	free_qpl_array:
1160	kvfree(addr: qpls);
1161	return err;
1162	}
1163
1164	static void gve_free_qpls(struct gve_priv *priv,
1165	struct gve_qpls_alloc_cfg *cfg)
1166	{
1167	int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1168	struct gve_queue_page_list *qpls = cfg->qpls;
1169	int i;
1170
1171	if (!qpls)
1172	return;
1173
1174	kvfree(addr: cfg->qpl_cfg->qpl_id_map);
1175	cfg->qpl_cfg->qpl_id_map = NULL;
1176
1177	for (i = `0`; i < max_queues; i++)
1178	gve_free_queue_page_list(priv, qpl: &qpls[i], id: i);
1179
1180	kvfree(addr: qpls);
1181	cfg->qpls = NULL;
1182	}
1183
1184	/ Use this to schedule a reset when the device is capable of continuing*
1185	* to handle other requests in its current state. If it is not, do a reset
1186	* in thread instead.
1187	*/
1188	void gve_schedule_reset(struct gve_priv *priv)
1189	{
1190	gve_set_do_reset(priv);
1191	queue_work(wq: priv->gve_wq, work: &priv->service_task);
1192	}
1193
1194	static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1195	static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1196	static void gve_turndown(struct gve_priv *priv);
1197	static void gve_turnup(struct gve_priv *priv);
1198
1199	static int gve_reg_xdp_info(struct gve_priv priv, struct* net_device *dev)
1200	{
1201	struct napi_struct *napi;
1202	struct gve_rx_ring *rx;
1203	int err = `0`;
1204	int i, j;
1205	u32 tx_qid;
1206
1207	if (!priv->num_xdp_queues)
1208	return `0`;
1209
1210	for (i = `0`; i < priv->rx_cfg.num_queues; i++) {
1211	rx = &priv->rx[i];
1212	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1213
1214	err = xdp_rxq_info_reg(xdp_rxq: &rx->xdp_rxq, dev, queue_index: i,
1215	napi_id: napi->napi_id);
1216	if (err)
1217	goto err;
1218	err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xdp_rxq,
1219	type: MEM_TYPE_PAGE_SHARED, NULL);
1220	if (err)
1221	goto err;
1222	rx->xsk_pool = xsk_get_pool_from_qid(dev, queue_id: i);
1223	if (rx->xsk_pool) {
1224	err = xdp_rxq_info_reg(xdp_rxq: &rx->xsk_rxq, dev, queue_index: i,
1225	napi_id: napi->napi_id);
1226	if (err)
1227	goto err;
1228	err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xsk_rxq,
1229	type: MEM_TYPE_XSK_BUFF_POOL, NULL);
1230	if (err)
1231	goto err;
1232	xsk_pool_set_rxq_info(pool: rx->xsk_pool,
1233	rxq: &rx->xsk_rxq);
1234	}
1235	}
1236
1237	for (i = `0`; i < priv->num_xdp_queues; i++) {
1238	tx_qid = gve_xdp_tx_queue_id(priv, queue_id: i);
1239	priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, queue_id: i);
1240	}
1241	return `0`;
1242
1243	err:
1244	for (j = i; j >= `0`; j--) {
1245	rx = &priv->rx[j];
1246	if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xdp_rxq))
1247	xdp_rxq_info_unreg(xdp_rxq: &rx->xdp_rxq);
1248	if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xsk_rxq))
1249	xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1250	}
1251	return err;
1252	}
1253
1254	static void gve_unreg_xdp_info(struct gve_priv *priv)
1255	{
1256	int i, tx_qid;
1257
1258	if (!priv->num_xdp_queues)
1259	return;
1260
1261	for (i = `0`; i < priv->rx_cfg.num_queues; i++) {
1262	struct gve_rx_ring *rx = &priv->rx[i];
1263
1264	xdp_rxq_info_unreg(xdp_rxq: &rx->xdp_rxq);
1265	if (rx->xsk_pool) {
1266	xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1267	rx->xsk_pool = NULL;
1268	}
1269	}
1270
1271	for (i = `0`; i < priv->num_xdp_queues; i++) {
1272	tx_qid = gve_xdp_tx_queue_id(priv, queue_id: i);
1273	priv->tx[tx_qid].xsk_pool = NULL;
1274	}
1275	}
1276
1277	static void gve_drain_page_cache(struct gve_priv *priv)
1278	{
1279	int i;
1280
1281	for (i = `0`; i < priv->rx_cfg.num_queues; i++)
1282	page_frag_cache_drain(nc: &priv->rx[i].page_cache);
1283	}
1284
1285	static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv,
1286	struct gve_qpls_alloc_cfg *cfg)
1287	{
1288	cfg->raw_addressing = !gve_is_qpl(priv);
1289	cfg->is_gqi = gve_is_gqi(priv);
1290	cfg->num_xdp_queues = priv->num_xdp_queues;
1291	cfg->qpl_cfg = &priv->qpl_cfg;
1292	cfg->tx_cfg = &priv->tx_cfg;
1293	cfg->rx_cfg = &priv->rx_cfg;
1294	cfg->qpls = priv->qpls;
1295	}
1296
1297	static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1298	struct gve_rx_alloc_rings_cfg *cfg)
1299	{
1300	cfg->qcfg = &priv->rx_cfg;
1301	cfg->qcfg_tx = &priv->tx_cfg;
1302	cfg->raw_addressing = !gve_is_qpl(priv);
1303	cfg->enable_header_split = priv->header_split_enabled;
1304	cfg->qpls = priv->qpls;
1305	cfg->qpl_cfg = &priv->qpl_cfg;
1306	cfg->ring_size = priv->rx_desc_cnt;
1307	cfg->packet_buffer_size = gve_is_gqi(priv) ?
1308	GVE_DEFAULT_RX_BUFFER_SIZE :
1309	priv->data_buffer_size_dqo;
1310	cfg->rx = priv->rx;
1311	}
1312
1313	static void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1314	struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1315	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1316	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1317	{
1318	gve_qpls_get_curr_alloc_cfg(priv, cfg: qpls_alloc_cfg);
1319	gve_tx_get_curr_alloc_cfg(priv, cfg: tx_alloc_cfg);
1320	gve_rx_get_curr_alloc_cfg(priv, cfg: rx_alloc_cfg);
1321	}
1322
1323	static void gve_rx_start_rings(struct gve_priv priv, int* num_rings)
1324	{
1325	int i;
1326
1327	for (i = `0`; i < num_rings; i++) {
1328	if (gve_is_gqi(priv))
1329	gve_rx_start_ring_gqi(priv, idx: i);
1330	else
1331	gve_rx_start_ring_dqo(priv, idx: i);
1332	}
1333	}
1334
1335	static void gve_rx_stop_rings(struct gve_priv priv, int* num_rings)
1336	{
1337	int i;
1338
1339	if (!priv->rx)
1340	return;
1341
1342	for (i = `0`; i < num_rings; i++) {
1343	if (gve_is_gqi(priv))
1344	gve_rx_stop_ring_gqi(priv, idx: i);
1345	else
1346	gve_rx_stop_ring_dqo(priv, idx: i);
1347	}
1348	}
1349
1350	static void gve_queues_mem_free(struct gve_priv *priv,
1351	struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1352	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1353	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1354	{
1355	gve_free_rings(priv, tx_cfg: tx_alloc_cfg, rx_cfg: rx_alloc_cfg);
1356	gve_free_qpls(priv, cfg: qpls_alloc_cfg);
1357	}
1358
1359	static int gve_queues_mem_alloc(struct gve_priv *priv,
1360	struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1361	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1362	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1363	{
1364	int err;
1365
1366	err = gve_alloc_qpls(priv, cfg: qpls_alloc_cfg);
1367	if (err) {
1368	netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n");
1369	return err;
1370	}
1371	tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1372	rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1373	err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
1374	if (err) {
1375	netif_err(priv, drv, priv->dev, "Failed to alloc rings\n");
1376	goto free_qpls;
1377	}
1378
1379	return `0`;
1380
1381	free_qpls:
1382	gve_free_qpls(priv, cfg: qpls_alloc_cfg);
1383	return err;
1384	}
1385
1386	static void gve_queues_mem_remove(struct gve_priv *priv)
1387	{
1388	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {`0`};
1389	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {`0`};
1390	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {`0`};
1391
1392	gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1393	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1394	gve_queues_mem_free(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1395	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1396	priv->qpls = NULL;
1397	priv->tx = NULL;
1398	priv->rx = NULL;
1399	}
1400
1401	/ The passed-in queue memory is stored into priv and the queues are made live.*
1402	* No memory is allocated. Passed-in memory is freed on errors.
1403	*/
1404	static int gve_queues_start(struct gve_priv *priv,
1405	struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1406	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1407	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1408	{
1409	struct net_device *dev = priv->dev;
1410	int err;
1411
1412	/ Record new resources into priv /
1413	priv->qpls = qpls_alloc_cfg->qpls;
1414	priv->tx = tx_alloc_cfg->tx;
1415	priv->rx = rx_alloc_cfg->rx;
1416
1417	/ Record new configs into priv /
1418	priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg;
1419	priv->tx_cfg = *tx_alloc_cfg->qcfg;
1420	priv->rx_cfg = *rx_alloc_cfg->qcfg;
1421	priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1422	priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1423
1424	if (priv->xdp_prog)
1425	priv->num_xdp_queues = priv->rx_cfg.num_queues;
1426	else
1427	priv->num_xdp_queues = `0`;
1428
1429	gve_tx_start_rings(priv, start_id: `0`, num_rings: tx_alloc_cfg->num_rings);
1430	gve_rx_start_rings(priv, num_rings: rx_alloc_cfg->qcfg->num_queues);
1431	gve_init_sync_stats(priv);
1432
1433	err = netif_set_real_num_tx_queues(dev, txq: priv->tx_cfg.num_queues);
1434	if (err)
1435	goto stop_and_free_rings;
1436	err = netif_set_real_num_rx_queues(dev, rxq: priv->rx_cfg.num_queues);
1437	if (err)
1438	goto stop_and_free_rings;
1439
1440	err = gve_reg_xdp_info(priv, dev);
1441	if (err)
1442	goto stop_and_free_rings;
1443
1444	err = gve_register_qpls(priv);
1445	if (err)
1446	goto reset;
1447
1448	priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1449	priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1450
1451	err = gve_create_rings(priv);
1452	if (err)
1453	goto reset;
1454
1455	gve_set_device_rings_ok(priv);
1456
1457	if (gve_get_report_stats(priv))
1458	mod_timer(timer: &priv->stats_report_timer,
1459	expires: round_jiffies(j: jiffies +
1460	msecs_to_jiffies(m: priv->stats_report_timer_period)));
1461
1462	gve_turnup(priv);
1463	queue_work(wq: priv->gve_wq, work: &priv->service_task);
1464	priv->interface_up_cnt++;
1465	return `0`;
1466
1467	reset:
1468	if (gve_get_reset_in_progress(priv))
1469	goto stop_and_free_rings;
1470	gve_reset_and_teardown(priv, was_up: true);
1471	/ if this fails there is nothing we can do so just ignore the return /
1472	gve_reset_recovery(priv, was_up: false);
1473	/ return the original error /
1474	return err;
1475	stop_and_free_rings:
1476	gve_tx_stop_rings(priv, start_id: `0`, num_rings: gve_num_tx_queues(priv));
1477	gve_rx_stop_rings(priv, num_rings: priv->rx_cfg.num_queues);
1478	gve_queues_mem_remove(priv);
1479	return err;
1480	}
1481
1482	static int gve_open(struct net_device *dev)
1483	{
1484	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {`0`};
1485	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {`0`};
1486	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {`0`};
1487	struct gve_priv *priv = netdev_priv(dev);
1488	int err;
1489
1490	gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1491	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1492
1493	err = gve_queues_mem_alloc(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1494	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1495	if (err)
1496	return err;
1497
1498	/ No need to free on error: ownership of resources is lost after*
1499	* calling gve_queues_start.
1500	*/
1501	err = gve_queues_start(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1502	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1503	if (err)
1504	return err;
1505
1506	return `0`;
1507	}
1508
1509	static int gve_queues_stop(struct gve_priv *priv)
1510	{
1511	int err;
1512
1513	netif_carrier_off(dev: priv->dev);
1514	if (gve_get_device_rings_ok(priv)) {
1515	gve_turndown(priv);
1516	gve_drain_page_cache(priv);
1517	err = gve_destroy_rings(priv);
1518	if (err)
1519	goto err;
1520	err = gve_unregister_qpls(priv);
1521	if (err)
1522	goto err;
1523	gve_clear_device_rings_ok(priv);
1524	}
1525	del_timer_sync(timer: &priv->stats_report_timer);
1526
1527	gve_unreg_xdp_info(priv);
1528
1529	gve_tx_stop_rings(priv, start_id: `0`, num_rings: gve_num_tx_queues(priv));
1530	gve_rx_stop_rings(priv, num_rings: priv->rx_cfg.num_queues);
1531
1532	priv->interface_down_cnt++;
1533	return `0`;
1534
1535	err:
1536	/ This must have been called from a reset due to the rtnl lock*
1537	* so just return at this point.
1538	*/
1539	if (gve_get_reset_in_progress(priv))
1540	return err;
1541	/ Otherwise reset before returning /
1542	gve_reset_and_teardown(priv, was_up: true);
1543	return gve_reset_recovery(priv, was_up: false);
1544	}
1545
1546	static int gve_close(struct net_device *dev)
1547	{
1548	struct gve_priv *priv = netdev_priv(dev);
1549	int err;
1550
1551	err = gve_queues_stop(priv);
1552	if (err)
1553	return err;
1554
1555	gve_queues_mem_remove(priv);
1556	return `0`;
1557	}
1558
1559	static int gve_remove_xdp_queues(struct gve_priv *priv)
1560	{
1561	int qpl_start_id;
1562	int err;
1563
1564	qpl_start_id = gve_xdp_tx_start_queue_id(priv);
1565
1566	err = gve_destroy_xdp_rings(priv);
1567	if (err)
1568	return err;
1569
1570	err = gve_unregister_xdp_qpls(priv);
1571	if (err)
1572	return err;
1573
1574	gve_unreg_xdp_info(priv);
1575	gve_free_xdp_rings(priv);
1576
1577	gve_free_n_qpls(priv, qpls: priv->qpls, start_id: qpl_start_id, num_qpls: gve_num_xdp_qpls(priv));
1578	priv->num_xdp_queues = `0`;
1579	return `0`;
1580	}
1581
1582	static int gve_add_xdp_queues(struct gve_priv *priv)
1583	{
1584	int start_id;
1585	int err;
1586
1587	priv->num_xdp_queues = priv->rx_cfg.num_queues;
1588
1589	start_id = gve_xdp_tx_start_queue_id(priv);
1590	err = gve_alloc_n_qpls(priv, qpls: priv->qpls, page_count: priv->tx_pages_per_qpl,
1591	start_id, num_qpls: gve_num_xdp_qpls(priv));
1592	if (err)
1593	goto err;
1594
1595	err = gve_alloc_xdp_rings(priv);
1596	if (err)
1597	goto free_xdp_qpls;
1598
1599	err = gve_reg_xdp_info(priv, dev: priv->dev);
1600	if (err)
1601	goto free_xdp_rings;
1602
1603	err = gve_register_xdp_qpls(priv);
1604	if (err)
1605	goto free_xdp_rings;
1606
1607	err = gve_create_xdp_rings(priv);
1608	if (err)
1609	goto free_xdp_rings;
1610
1611	return `0`;
1612
1613	free_xdp_rings:
1614	gve_free_xdp_rings(priv);
1615	free_xdp_qpls:
1616	gve_free_n_qpls(priv, qpls: priv->qpls, start_id, num_qpls: gve_num_xdp_qpls(priv));
1617	err:
1618	priv->num_xdp_queues = `0`;
1619	return err;
1620	}
1621
1622	static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1623	{
1624	if (!gve_get_napi_enabled(priv))
1625	return;
1626
1627	if (link_status == netif_carrier_ok(dev: priv->dev))
1628	return;
1629
1630	if (link_status) {
1631	netdev_info(dev: priv->dev, format: "Device link is up.\n");
1632	netif_carrier_on(dev: priv->dev);
1633	} else {
1634	netdev_info(dev: priv->dev, format: "Device link is down.\n");
1635	netif_carrier_off(dev: priv->dev);
1636	}
1637	}
1638
1639	static int gve_set_xdp(struct gve_priv priv, struct* bpf_prog *prog,
1640	struct netlink_ext_ack *extack)
1641	{
1642	struct bpf_prog *old_prog;
1643	int err = `0`;
1644	u32 status;
1645
1646	old_prog = READ_ONCE(priv->xdp_prog);
1647	if (!netif_carrier_ok(dev: priv->dev)) {
1648	WRITE_ONCE(priv->xdp_prog, prog);
1649	if (old_prog)
1650	bpf_prog_put(prog: old_prog);
1651	return `0`;
1652	}
1653
1654	gve_turndown(priv);
1655	if (!old_prog && prog) {
1656	// Allocate XDP TX queues if an XDP program is
1657	// being installed
1658	err = gve_add_xdp_queues(priv);
1659	if (err)
1660	goto out;
1661	} else if (old_prog && !prog) {
1662	// Remove XDP TX queues if an XDP program is
1663	// being uninstalled
1664	err = gve_remove_xdp_queues(priv);
1665	if (err)
1666	goto out;
1667	}
1668	WRITE_ONCE(priv->xdp_prog, prog);
1669	if (old_prog)
1670	bpf_prog_put(prog: old_prog);
1671
1672	out:
1673	gve_turnup(priv);
1674	status = ioread32be(&priv->reg_bar0->device_status);
1675	gve_handle_link_status(priv, link_status: GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1676	return err;
1677	}
1678
1679	static int gve_xsk_pool_enable(struct net_device *dev,
1680	struct xsk_buff_pool *pool,
1681	u16 qid)
1682	{
1683	struct gve_priv *priv = netdev_priv(dev);
1684	struct napi_struct *napi;
1685	struct gve_rx_ring *rx;
1686	int tx_qid;
1687	int err;
1688
1689	if (qid >= priv->rx_cfg.num_queues) {
1690	dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1691	return -EINVAL;
1692	}
1693	if (xsk_pool_get_rx_frame_size(pool) <
1694	priv->dev->max_mtu + sizeof(struct ethhdr)) {
1695	dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1696	return -EINVAL;
1697	}
1698
1699	err = xsk_pool_dma_map(pool, dev: &priv->pdev->dev,
1700	DMA_ATTR_SKIP_CPU_SYNC \| DMA_ATTR_WEAK_ORDERING);
1701	if (err)
1702	return err;
1703
1704	/ If XDP prog is not installed, return /
1705	if (!priv->xdp_prog)
1706	return `0`;
1707
1708	rx = &priv->rx[qid];
1709	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1710	err = xdp_rxq_info_reg(xdp_rxq: &rx->xsk_rxq, dev, queue_index: qid, napi_id: napi->napi_id);
1711	if (err)
1712	goto err;
1713
1714	err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xsk_rxq,
1715	type: MEM_TYPE_XSK_BUFF_POOL, NULL);
1716	if (err)
1717	goto err;
1718
1719	xsk_pool_set_rxq_info(pool, rxq: &rx->xsk_rxq);
1720	rx->xsk_pool = pool;
1721
1722	tx_qid = gve_xdp_tx_queue_id(priv, queue_id: qid);
1723	priv->tx[tx_qid].xsk_pool = pool;
1724
1725	return `0`;
1726	err:
1727	if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xsk_rxq))
1728	xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1729
1730	xsk_pool_dma_unmap(pool,
1731	DMA_ATTR_SKIP_CPU_SYNC \| DMA_ATTR_WEAK_ORDERING);
1732	return err;
1733	}
1734
1735	static int gve_xsk_pool_disable(struct net_device *dev,
1736	u16 qid)
1737	{
1738	struct gve_priv *priv = netdev_priv(dev);
1739	struct napi_struct *napi_rx;
1740	struct napi_struct *napi_tx;
1741	struct xsk_buff_pool *pool;
1742	int tx_qid;
1743
1744	pool = xsk_get_pool_from_qid(dev, queue_id: qid);
1745	if (!pool)
1746	return -EINVAL;
1747	if (qid >= priv->rx_cfg.num_queues)
1748	return -EINVAL;
1749
1750	/ If XDP prog is not installed, unmap DMA and return /
1751	if (!priv->xdp_prog)
1752	goto done;
1753
1754	tx_qid = gve_xdp_tx_queue_id(priv, queue_id: qid);
1755	if (!netif_running(dev)) {
1756	priv->rx[qid].xsk_pool = NULL;
1757	xdp_rxq_info_unreg(xdp_rxq: &priv->rx[qid].xsk_rxq);
1758	priv->tx[tx_qid].xsk_pool = NULL;
1759	goto done;
1760	}
1761
1762	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1763	napi_disable(n: napi_rx); / make sure current rx poll is done /
1764
1765	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1766	napi_disable(n: napi_tx); / make sure current tx poll is done /
1767
1768	priv->rx[qid].xsk_pool = NULL;
1769	xdp_rxq_info_unreg(xdp_rxq: &priv->rx[qid].xsk_rxq);
1770	priv->tx[tx_qid].xsk_pool = NULL;
1771	smp_mb(); / Make sure it is visible to the workers on datapath /
1772
1773	napi_enable(n: napi_rx);
1774	if (gve_rx_work_pending(rx: &priv->rx[qid]))
1775	napi_schedule(n: napi_rx);
1776
1777	napi_enable(n: napi_tx);
1778	if (gve_tx_clean_pending(priv, tx: &priv->tx[tx_qid]))
1779	napi_schedule(n: napi_tx);
1780
1781	done:
1782	xsk_pool_dma_unmap(pool,
1783	DMA_ATTR_SKIP_CPU_SYNC \| DMA_ATTR_WEAK_ORDERING);
1784	return `0`;
1785	}
1786
1787	static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1788	{
1789	struct gve_priv *priv = netdev_priv(dev);
1790	int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1791
1792	if (queue_id >= priv->rx_cfg.num_queues \|\| !priv->xdp_prog)
1793	return -EINVAL;
1794
1795	if (flags & XDP_WAKEUP_TX) {
1796	struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1797	struct napi_struct *napi =
1798	&priv->ntfy_blocks[tx->ntfy_id].napi;
1799
1800	if (!napi_if_scheduled_mark_missed(n: napi)) {
1801	/ Call local_bh_enable to trigger SoftIRQ processing /
1802	local_bh_disable();
1803	napi_schedule(n: napi);
1804	local_bh_enable();
1805	}
1806
1807	tx->xdp_xsk_wakeup++;
1808	}
1809
1810	return `0`;
1811	}
1812
1813	static int verify_xdp_configuration(struct net_device *dev)
1814	{
1815	struct gve_priv *priv = netdev_priv(dev);
1816
1817	if (dev->features & NETIF_F_LRO) {
1818	netdev_warn(dev, format: "XDP is not supported when LRO is on.\n");
1819	return -EOPNOTSUPP;
1820	}
1821
1822	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1823	netdev_warn(dev, format: "XDP is not supported in mode %d.\n",
1824	priv->queue_format);
1825	return -EOPNOTSUPP;
1826	}
1827
1828	if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1829	netdev_warn(dev, format: "XDP is not supported for mtu %d.\n",
1830	dev->mtu);
1831	return -EOPNOTSUPP;
1832	}
1833
1834	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues \|\|
1835	(`2` * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1836	netdev_warn(dev, format: "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1837	priv->rx_cfg.num_queues,
1838	priv->tx_cfg.num_queues,
1839	priv->tx_cfg.max_queues);
1840	return -EINVAL;
1841	}
1842	return `0`;
1843	}
1844
1845	static int gve_xdp(struct net_device dev, struct* netdev_bpf *xdp)
1846	{
1847	struct gve_priv *priv = netdev_priv(dev);
1848	int err;
1849
1850	err = verify_xdp_configuration(dev);
1851	if (err)
1852	return err;
1853	switch (xdp->command) {
1854	case XDP_SETUP_PROG:
1855	return gve_set_xdp(priv, prog: xdp->prog, extack: xdp->extack);
1856	case XDP_SETUP_XSK_POOL:
1857	if (xdp->xsk.pool)
1858	return gve_xsk_pool_enable(dev, pool: xdp->xsk.pool, qid: xdp->xsk.queue_id);
1859	else
1860	return gve_xsk_pool_disable(dev, qid: xdp->xsk.queue_id);
1861	default:
1862	return -EINVAL;
1863	}
1864	}
1865
1866	static int gve_adjust_config(struct gve_priv *priv,
1867	struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1868	struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1869	struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1870	{
1871	int err;
1872
1873	/ Allocate resources for the new confiugration /
1874	err = gve_queues_mem_alloc(priv, qpls_alloc_cfg,
1875	tx_alloc_cfg, rx_alloc_cfg);
1876	if (err) {
1877	netif_err(priv, drv, priv->dev,
1878	"Adjust config failed to alloc new queues");
1879	return err;
1880	}
1881
1882	/ Teardown the device and free existing resources /
1883	err = gve_close(dev: priv->dev);
1884	if (err) {
1885	netif_err(priv, drv, priv->dev,
1886	"Adjust config failed to close old queues");
1887	gve_queues_mem_free(priv, qpls_alloc_cfg,
1888	tx_alloc_cfg, rx_alloc_cfg);
1889	return err;
1890	}
1891
1892	/ Bring the device back up again with the new resources. /
1893	err = gve_queues_start(priv, qpls_alloc_cfg,
1894	tx_alloc_cfg, rx_alloc_cfg);
1895	if (err) {
1896	netif_err(priv, drv, priv->dev,
1897	"Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1898	/ No need to free on error: ownership of resources is lost after*
1899	* calling gve_queues_start.
1900	*/
1901	gve_turndown(priv);
1902	return err;
1903	}
1904
1905	return `0`;
1906	}
1907
1908	int gve_adjust_queues(struct gve_priv *priv,
1909	struct gve_queue_config new_rx_config,
1910	struct gve_queue_config new_tx_config)
1911	{
1912	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {`0`};
1913	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {`0`};
1914	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {`0`};
1915	struct gve_qpl_config new_qpl_cfg;
1916	int err;
1917
1918	gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1919	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1920
1921	/ qpl_cfg is not read-only, it contains a map that gets updated as*
1922	* rings are allocated, which is why we cannot use the yet unreleased
1923	* one in priv.
1924	*/
1925	qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1926	tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1927	rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1928
1929	/ Relay the new config from ethtool /
1930	qpls_alloc_cfg.tx_cfg = &new_tx_config;
1931	tx_alloc_cfg.qcfg = &new_tx_config;
1932	rx_alloc_cfg.qcfg_tx = &new_tx_config;
1933	qpls_alloc_cfg.rx_cfg = &new_rx_config;
1934	rx_alloc_cfg.qcfg = &new_rx_config;
1935	tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1936
1937	if (netif_carrier_ok(dev: priv->dev)) {
1938	err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1939	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1940	return err;
1941	}
1942	/ Set the config for the next up. /
1943	priv->tx_cfg = new_tx_config;
1944	priv->rx_cfg = new_rx_config;
1945
1946	return `0`;
1947	}
1948
1949	static void gve_turndown(struct gve_priv *priv)
1950	{
1951	int idx;
1952
1953	if (netif_carrier_ok(dev: priv->dev))
1954	netif_carrier_off(dev: priv->dev);
1955
1956	if (!gve_get_napi_enabled(priv))
1957	return;
1958
1959	/ Disable napi to prevent more work from coming in /
1960	for (idx = `0`; idx < gve_num_tx_queues(priv); idx++) {
1961	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
1962	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1963
1964	napi_disable(n: &block->napi);
1965	}
1966	for (idx = `0`; idx < priv->rx_cfg.num_queues; idx++) {
1967	int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
1968	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1969
1970	napi_disable(n: &block->napi);
1971	}
1972
1973	/ Stop tx queues /
1974	netif_tx_disable(dev: priv->dev);
1975
1976	gve_clear_napi_enabled(priv);
1977	gve_clear_report_stats(priv);
1978	}
1979
1980	static void gve_turnup(struct gve_priv *priv)
1981	{
1982	int idx;
1983
1984	/ Start the tx queues /
1985	netif_tx_start_all_queues(dev: priv->dev);
1986
1987	/ Enable napi and unmask interrupts for all queues /
1988	for (idx = `0`; idx < gve_num_tx_queues(priv); idx++) {
1989	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
1990	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1991
1992	napi_enable(n: &block->napi);
1993	if (gve_is_gqi(priv)) {
1994	iowrite32be(`0`, gve_irq_doorbell(priv, block));
1995	} else {
1996	gve_set_itr_coalesce_usecs_dqo(priv, block,
1997	usecs: priv->tx_coalesce_usecs);
1998	}
1999	}
2000	for (idx = `0`; idx < priv->rx_cfg.num_queues; idx++) {
2001	int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
2002	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
2003
2004	napi_enable(n: &block->napi);
2005	if (gve_is_gqi(priv)) {
2006	iowrite32be(`0`, gve_irq_doorbell(priv, block));
2007	} else {
2008	gve_set_itr_coalesce_usecs_dqo(priv, block,
2009	usecs: priv->rx_coalesce_usecs);
2010	}
2011	}
2012
2013	gve_set_napi_enabled(priv);
2014	}
2015
2016	static void gve_tx_timeout(struct net_device dev, unsigned* int txqueue)
2017	{
2018	struct gve_notify_block *block;
2019	struct gve_tx_ring *tx = NULL;
2020	struct gve_priv *priv;
2021	u32 last_nic_done;
2022	u32 current_time;
2023	u32 ntfy_idx;
2024
2025	netdev_info(dev, format: "Timeout on tx queue, %d", txqueue);
2026	priv = netdev_priv(dev);
2027	if (txqueue > priv->tx_cfg.num_queues)
2028	goto reset;
2029
2030	ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: txqueue);
2031	if (ntfy_idx >= priv->num_ntfy_blks)
2032	goto reset;
2033
2034	block = &priv->ntfy_blocks[ntfy_idx];
2035	tx = block->tx;
2036
2037	current_time = jiffies_to_msecs(j: jiffies);
2038	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
2039	goto reset;
2040
2041	/ Check to see if there are missed completions, which will allow us to*
2042	* kick the queue.
2043	*/
2044	last_nic_done = gve_tx_load_event_counter(priv, tx);
2045	if (last_nic_done - tx->done) {
2046	netdev_info(dev, format: "Kicking queue %d", txqueue);
2047	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
2048	napi_schedule(n: &block->napi);
2049	tx->last_kick_msec = current_time;
2050	goto out;
2051	} // Else reset.
2052
2053	reset:
2054	gve_schedule_reset(priv);
2055
2056	out:
2057	if (tx)
2058	tx->queue_timeout++;
2059	priv->tx_timeo_cnt++;
2060	}
2061
2062	u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2063	{
2064	if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2065	return GVE_MAX_RX_BUFFER_SIZE;
2066	else
2067	return GVE_DEFAULT_RX_BUFFER_SIZE;
2068	}
2069
2070	/ header-split is not supported on non-DQO_RDA yet even if device advertises it /
2071	bool gve_header_split_supported(const struct gve_priv *priv)
2072	{
2073	return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2074	}
2075
2076	int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2077	{
2078	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {`0`};
2079	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {`0`};
2080	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {`0`};
2081	bool enable_hdr_split;
2082	int err = `0`;
2083
2084	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2085	return `0`;
2086
2087	if (!gve_header_split_supported(priv)) {
2088	dev_err(&priv->pdev->dev, "Header-split not supported\n");
2089	return -EOPNOTSUPP;
2090	}
2091
2092	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2093	enable_hdr_split = true;
2094	else
2095	enable_hdr_split = false;
2096
2097	if (enable_hdr_split == priv->header_split_enabled)
2098	return `0`;
2099
2100	gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2101	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2102
2103	rx_alloc_cfg.enable_header_split = enable_hdr_split;
2104	rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hsplit: enable_hdr_split);
2105
2106	if (netif_running(dev: priv->dev))
2107	err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2108	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2109	return err;
2110	}
2111
2112	static int gve_set_features(struct net_device *netdev,
2113	netdev_features_t features)
2114	{
2115	const netdev_features_t orig_features = netdev->features;
2116	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {`0`};
2117	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {`0`};
2118	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {`0`};
2119	struct gve_priv *priv = netdev_priv(dev: netdev);
2120	struct gve_qpl_config new_qpl_cfg;
2121	int err;
2122
2123	gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2124	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2125	/ qpl_cfg is not read-only, it contains a map that gets updated as*
2126	* rings are allocated, which is why we cannot use the yet unreleased
2127	* one in priv.
2128	*/
2129	qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2130	tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2131	rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2132
2133	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2134	netdev->features ^= NETIF_F_LRO;
2135	if (netif_carrier_ok(dev: netdev)) {
2136	err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2137	tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2138	if (err) {
2139	/ Revert the change on error. /
2140	netdev->features = orig_features;
2141	return err;
2142	}
2143	}
2144	}
2145
2146	return `0`;
2147	}
2148
2149	static const struct net_device_ops gve_netdev_ops = {
2150	.ndo_start_xmit = gve_start_xmit,
2151	.ndo_features_check = gve_features_check,
2152	.ndo_open = gve_open,
2153	.ndo_stop = gve_close,
2154	.ndo_get_stats64 = gve_get_stats,
2155	.ndo_tx_timeout = gve_tx_timeout,
2156	.ndo_set_features = gve_set_features,
2157	.ndo_bpf = gve_xdp,
2158	.ndo_xdp_xmit = gve_xdp_xmit,
2159	.ndo_xsk_wakeup = gve_xsk_wakeup,
2160	};
2161
2162	static void gve_handle_status(struct gve_priv *priv, u32 status)
2163	{
2164	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2165	dev_info(&priv->pdev->dev, "Device requested reset.\n");
2166	gve_set_do_reset(priv);
2167	}
2168	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2169	priv->stats_report_trigger_cnt++;
2170	gve_set_do_report_stats(priv);
2171	}
2172	}
2173
2174	static void gve_handle_reset(struct gve_priv *priv)
2175	{
2176	/ A service task will be scheduled at the end of probe to catch any*
2177	* resets that need to happen, and we don't want to reset until
2178	* probe is done.
2179	*/
2180	if (gve_get_probe_in_progress(priv))
2181	return;
2182
2183	if (gve_get_do_reset(priv)) {
2184	rtnl_lock();
2185	gve_reset(priv, attempt_teardown: false);
2186	rtnl_unlock();
2187	}
2188	}
2189
2190	void gve_handle_report_stats(struct gve_priv *priv)
2191	{
2192	struct stats *stats = priv->stats_report->stats;
2193	int idx, stats_idx = `0`;
2194	unsigned int start = `0`;
2195	u64 tx_bytes;
2196
2197	if (!gve_get_report_stats(priv))
2198	return;
2199
2200	be64_add_cpu(var: &priv->stats_report->written_count, val: `1`);
2201	/ tx stats /
2202	if (priv->tx) {
2203	for (idx = `0`; idx < gve_num_tx_queues(priv); idx++) {
2204	u32 last_completion = `0`;
2205	u32 tx_frames = `0`;
2206
2207	/ DQO doesn't currently support these metrics. /
2208	if (gve_is_gqi(priv)) {
2209	last_completion = priv->tx[idx].done;
2210	tx_frames = priv->tx[idx].req;
2211	}
2212
2213	do {
2214	start = u64_stats_fetch_begin(syncp: &priv->tx[idx].statss);
2215	tx_bytes = priv->tx[idx].bytes_done;
2216	} while (u64_stats_fetch_retry(syncp: &priv->tx[idx].statss, start));
2217	stats[stats_idx++] = (struct stats) {
2218	.stat_name = cpu_to_be32(TX_WAKE_CNT),
2219	.value = cpu_to_be64(priv->tx[idx].wake_queue),
2220	.queue_id = cpu_to_be32(idx),
2221	};
2222	stats[stats_idx++] = (struct stats) {
2223	.stat_name = cpu_to_be32(TX_STOP_CNT),
2224	.value = cpu_to_be64(priv->tx[idx].stop_queue),
2225	.queue_id = cpu_to_be32(idx),
2226	};
2227	stats[stats_idx++] = (struct stats) {
2228	.stat_name = cpu_to_be32(TX_FRAMES_SENT),
2229	.value = cpu_to_be64(tx_frames),
2230	.queue_id = cpu_to_be32(idx),
2231	};
2232	stats[stats_idx++] = (struct stats) {
2233	.stat_name = cpu_to_be32(TX_BYTES_SENT),
2234	.value = cpu_to_be64(tx_bytes),
2235	.queue_id = cpu_to_be32(idx),
2236	};
2237	stats[stats_idx++] = (struct stats) {
2238	.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2239	.value = cpu_to_be64(last_completion),
2240	.queue_id = cpu_to_be32(idx),
2241	};
2242	stats[stats_idx++] = (struct stats) {
2243	.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2244	.value = cpu_to_be64(priv->tx[idx].queue_timeout),
2245	.queue_id = cpu_to_be32(idx),
2246	};
2247	}
2248	}
2249	/ rx stats /
2250	if (priv->rx) {
2251	for (idx = `0`; idx < priv->rx_cfg.num_queues; idx++) {
2252	stats[stats_idx++] = (struct stats) {
2253	.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2254	.value = cpu_to_be64(priv->rx[idx].desc.seqno),
2255	.queue_id = cpu_to_be32(idx),
2256	};
2257	stats[stats_idx++] = (struct stats) {
2258	.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2259	.value = cpu_to_be64(priv->rx[`0`].fill_cnt),
2260	.queue_id = cpu_to_be32(idx),
2261	};
2262	}
2263	}
2264	}
2265
2266	/ Handle NIC status register changes, reset requests and report stats /
2267	static void gve_service_task(struct work_struct *work)
2268	{
2269	struct gve_priv priv = container_of(work, struct* gve_priv,
2270	service_task);
2271	u32 status = ioread32be(&priv->reg_bar0->device_status);
2272
2273	gve_handle_status(priv, status);
2274
2275	gve_handle_reset(priv);
2276	gve_handle_link_status(priv, link_status: GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2277	}
2278
2279	static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2280	{
2281	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2282	priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2283	priv->dev->xdp_features \|= NETDEV_XDP_ACT_REDIRECT;
2284	priv->dev->xdp_features \|= NETDEV_XDP_ACT_NDO_XMIT;
2285	priv->dev->xdp_features \|= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2286	} else {
2287	priv->dev->xdp_features = `0`;
2288	}
2289	}
2290
2291	static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2292	{
2293	int num_ntfy;
2294	int err;
2295
2296	/ Set up the adminq /
2297	err = gve_adminq_alloc(dev: &priv->pdev->dev, priv);
2298	if (err) {
2299	dev_err(&priv->pdev->dev,
2300	"Failed to alloc admin queue: err=%d\n", err);
2301	return err;
2302	}
2303
2304	err = gve_verify_driver_compatibility(priv);
2305	if (err) {
2306	dev_err(&priv->pdev->dev,
2307	"Could not verify driver compatibility: err=%d\n", err);
2308	goto err;
2309	}
2310
2311	priv->num_registered_pages = `0`;
2312
2313	if (skip_describe_device)
2314	goto setup_device;
2315
2316	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2317	/ Get the initial information we need from the device /
2318	err = gve_adminq_describe_device(priv);
2319	if (err) {
2320	dev_err(&priv->pdev->dev,
2321	"Could not get device information: err=%d\n", err);
2322	goto err;
2323	}
2324	priv->dev->mtu = priv->dev->max_mtu;
2325	num_ntfy = pci_msix_vec_count(dev: priv->pdev);
2326	if (num_ntfy <= `0`) {
2327	dev_err(&priv->pdev->dev,
2328	"could not count MSI-x vectors: err=%d\n", num_ntfy);
2329	err = num_ntfy;
2330	goto err;
2331	} else if (num_ntfy < GVE_MIN_MSIX) {
2332	dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2333	GVE_MIN_MSIX, num_ntfy);
2334	err = -EINVAL;
2335	goto err;
2336	}
2337
2338	/ Big TCP is only supported on DQ/
2339	if (!gve_is_gqi(priv))
2340	netif_set_tso_max_size(dev: priv->dev, GVE_DQO_TX_MAX);
2341
2342	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2343	/ gvnic has one Notification Block per MSI-x vector, except for the*
2344	* management vector
2345	*/
2346	priv->num_ntfy_blks = (num_ntfy - `1`) & ~`0x1`;
2347	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2348
2349	priv->tx_cfg.max_queues =
2350	min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / `2`);
2351	priv->rx_cfg.max_queues =
2352	min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / `2`);
2353
2354	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2355	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2356	if (priv->default_num_queues > `0`) {
2357	priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2358	priv->tx_cfg.num_queues);
2359	priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2360	priv->rx_cfg.num_queues);
2361	}
2362
2363	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2364	priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2365	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2366	priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2367
2368	if (!gve_is_gqi(priv)) {
2369	priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2370	priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2371	}
2372
2373	setup_device:
2374	gve_set_netdev_xdp_features(priv);
2375	err = gve_setup_device_resources(priv);
2376	if (!err)
2377	return `0`;
2378	err:
2379	gve_adminq_free(dev: &priv->pdev->dev, priv);
2380	return err;
2381	}
2382
2383	static void gve_teardown_priv_resources(struct gve_priv *priv)
2384	{
2385	gve_teardown_device_resources(priv);
2386	gve_adminq_free(dev: &priv->pdev->dev, priv);
2387	}
2388
2389	static void gve_trigger_reset(struct gve_priv *priv)
2390	{
2391	/ Reset the device by releasing the AQ /
2392	gve_adminq_release(priv);
2393	}
2394
2395	static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2396	{
2397	gve_trigger_reset(priv);
2398	/ With the reset having already happened, close cannot fail /
2399	if (was_up)
2400	gve_close(dev: priv->dev);
2401	gve_teardown_priv_resources(priv);
2402	}
2403
2404	static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2405	{
2406	int err;
2407
2408	err = gve_init_priv(priv, skip_describe_device: true);
2409	if (err)
2410	goto err;
2411	if (was_up) {
2412	err = gve_open(dev: priv->dev);
2413	if (err)
2414	goto err;
2415	}
2416	return `0`;
2417	err:
2418	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2419	gve_turndown(priv);
2420	return err;
2421	}
2422
2423	int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2424	{
2425	bool was_up = netif_carrier_ok(dev: priv->dev);
2426	int err;
2427
2428	dev_info(&priv->pdev->dev, "Performing reset\n");
2429	gve_clear_do_reset(priv);
2430	gve_set_reset_in_progress(priv);
2431	/ If we aren't attempting to teardown normally, just go turndown and*
2432	* reset right away.
2433	*/
2434	if (!attempt_teardown) {
2435	gve_turndown(priv);
2436	gve_reset_and_teardown(priv, was_up);
2437	} else {
2438	/ Otherwise attempt to close normally /
2439	if (was_up) {
2440	err = gve_close(dev: priv->dev);
2441	/ If that fails reset as we did above /
2442	if (err)
2443	gve_reset_and_teardown(priv, was_up);
2444	}
2445	/ Clean up any remaining resources /
2446	gve_teardown_priv_resources(priv);
2447	}
2448
2449	/ Set it all back up /
2450	err = gve_reset_recovery(priv, was_up);
2451	gve_clear_reset_in_progress(priv);
2452	priv->reset_cnt++;
2453	priv->interface_up_cnt = `0`;
2454	priv->interface_down_cnt = `0`;
2455	priv->stats_report_trigger_cnt = `0`;
2456	return err;
2457	}
2458
2459	static void gve_write_version(u8 __iomem *driver_version_register)
2460	{
2461	const char *c = gve_version_prefix;
2462
2463	while (*c) {
2464	writeb(val: *c, addr: driver_version_register);
2465	c++;
2466	}
2467
2468	c = gve_version_str;
2469	while (*c) {
2470	writeb(val: *c, addr: driver_version_register);
2471	c++;
2472	}
2473	writeb(val: `'\n'`, addr: driver_version_register);
2474	}
2475
2476	static int gve_probe(struct pci_dev pdev, const* struct pci_device_id *ent)
2477	{
2478	int max_tx_queues, max_rx_queues;
2479	struct net_device *dev;
2480	__be32 __iomem *db_bar;
2481	struct gve_registers __iomem *reg_bar;
2482	struct gve_priv *priv;
2483	int err;
2484
2485	err = pci_enable_device(dev: pdev);
2486	if (err)
2487	return err;
2488
2489	err = pci_request_regions(pdev, gve_driver_name);
2490	if (err)
2491	goto abort_with_enabled;
2492
2493	pci_set_master(dev: pdev);
2494
2495	err = dma_set_mask_and_coherent(dev: &pdev->dev, DMA_BIT_MASK(`64`));
2496	if (err) {
2497	dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2498	goto abort_with_pci_region;
2499	}
2500
2501	reg_bar = pci_iomap(dev: pdev, GVE_REGISTER_BAR, max: `0`);
2502	if (!reg_bar) {
2503	dev_err(&pdev->dev, "Failed to map pci bar!\n");
2504	err = -ENOMEM;
2505	goto abort_with_pci_region;
2506	}
2507
2508	db_bar = pci_iomap(dev: pdev, GVE_DOORBELL_BAR, max: `0`);
2509	if (!db_bar) {
2510	dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2511	err = -ENOMEM;
2512	goto abort_with_reg_bar;
2513	}
2514
2515	gve_write_version(driver_version_register: &reg_bar->driver_version);
2516	/ Get max queues to alloc etherdev /
2517	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2518	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2519	/ Alloc and setup the netdev and priv /
2520	dev = alloc_etherdev_mqs(sizeof_priv: sizeof(*priv), txqs: max_tx_queues, rxqs: max_rx_queues);
2521	if (!dev) {
2522	dev_err(&pdev->dev, "could not allocate netdev\n");
2523	err = -ENOMEM;
2524	goto abort_with_db_bar;
2525	}
2526	SET_NETDEV_DEV(dev, &pdev->dev);
2527	pci_set_drvdata(pdev, data: dev);
2528	dev->ethtool_ops = &gve_ethtool_ops;
2529	dev->netdev_ops = &gve_netdev_ops;
2530
2531	/ Set default and supported features.*
2532	*
2533	* Features might be set in other locations as well (such as
2534	* `gve_adminq_describe_device`).
2535	*/
2536	dev->hw_features = NETIF_F_HIGHDMA;
2537	dev->hw_features \|= NETIF_F_SG;
2538	dev->hw_features \|= NETIF_F_HW_CSUM;
2539	dev->hw_features \|= NETIF_F_TSO;
2540	dev->hw_features \|= NETIF_F_TSO6;
2541	dev->hw_features \|= NETIF_F_TSO_ECN;
2542	dev->hw_features \|= NETIF_F_RXCSUM;
2543	dev->hw_features \|= NETIF_F_RXHASH;
2544	dev->features = dev->hw_features;
2545	dev->watchdog_timeo = `5` * HZ;
2546	dev->min_mtu = ETH_MIN_MTU;
2547	netif_carrier_off(dev);
2548
2549	priv = netdev_priv(dev);
2550	priv->dev = dev;
2551	priv->pdev = pdev;
2552	priv->msg_enable = DEFAULT_MSG_LEVEL;
2553	priv->reg_bar0 = reg_bar;
2554	priv->db_bar2 = db_bar;
2555	priv->service_task_flags = `0x0`;
2556	priv->state_flags = `0x0`;
2557	priv->ethtool_flags = `0x0`;
2558	priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2559	priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2560
2561	gve_set_probe_in_progress(priv);
2562	priv->gve_wq = alloc_ordered_workqueue("gve", `0`);
2563	if (!priv->gve_wq) {
2564	dev_err(&pdev->dev, "Could not allocate workqueue");
2565	err = -ENOMEM;
2566	goto abort_with_netdev;
2567	}
2568	INIT_WORK(&priv->service_task, gve_service_task);
2569	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2570	priv->tx_cfg.max_queues = max_tx_queues;
2571	priv->rx_cfg.max_queues = max_rx_queues;
2572
2573	err = gve_init_priv(priv, skip_describe_device: false);
2574	if (err)
2575	goto abort_with_wq;
2576
2577	err = register_netdev(dev);
2578	if (err)
2579	goto abort_with_gve_init;
2580
2581	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2582	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2583	gve_clear_probe_in_progress(priv);
2584	queue_work(wq: priv->gve_wq, work: &priv->service_task);
2585	return `0`;
2586
2587	abort_with_gve_init:
2588	gve_teardown_priv_resources(priv);
2589
2590	abort_with_wq:
2591	destroy_workqueue(wq: priv->gve_wq);
2592
2593	abort_with_netdev:
2594	free_netdev(dev);
2595
2596	abort_with_db_bar:
2597	pci_iounmap(dev: pdev, db_bar);
2598
2599	abort_with_reg_bar:
2600	pci_iounmap(dev: pdev, reg_bar);
2601
2602	abort_with_pci_region:
2603	pci_release_regions(pdev);
2604
2605	abort_with_enabled:
2606	pci_disable_device(dev: pdev);
2607	return err;
2608	}
2609
2610	static void gve_remove(struct pci_dev *pdev)
2611	{
2612	struct net_device *netdev = pci_get_drvdata(pdev);
2613	struct gve_priv *priv = netdev_priv(dev: netdev);
2614	__be32 __iomem *db_bar = priv->db_bar2;
2615	void __iomem *reg_bar = priv->reg_bar0;
2616
2617	unregister_netdev(dev: netdev);
2618	gve_teardown_priv_resources(priv);
2619	destroy_workqueue(wq: priv->gve_wq);
2620	free_netdev(dev: netdev);
2621	pci_iounmap(dev: pdev, db_bar);
2622	pci_iounmap(dev: pdev, reg_bar);
2623	pci_release_regions(pdev);
2624	pci_disable_device(dev: pdev);
2625	}
2626
2627	static void gve_shutdown(struct pci_dev *pdev)
2628	{
2629	struct net_device *netdev = pci_get_drvdata(pdev);
2630	struct gve_priv *priv = netdev_priv(dev: netdev);
2631	bool was_up = netif_carrier_ok(dev: priv->dev);
2632
2633	rtnl_lock();
2634	if (was_up && gve_close(dev: priv->dev)) {
2635	/ If the dev was up, attempt to close, if close fails, reset /
2636	gve_reset_and_teardown(priv, was_up);
2637	} else {
2638	/ If the dev wasn't up or close worked, finish tearing down /
2639	gve_teardown_priv_resources(priv);
2640	}
2641	rtnl_unlock();
2642	}
2643
2644	#ifdef CONFIG_PM
2645	static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2646	{
2647	struct net_device *netdev = pci_get_drvdata(pdev);
2648	struct gve_priv *priv = netdev_priv(dev: netdev);
2649	bool was_up = netif_carrier_ok(dev: priv->dev);
2650
2651	priv->suspend_cnt++;
2652	rtnl_lock();
2653	if (was_up && gve_close(dev: priv->dev)) {
2654	/ If the dev was up, attempt to close, if close fails, reset /
2655	gve_reset_and_teardown(priv, was_up);
2656	} else {
2657	/ If the dev wasn't up or close worked, finish tearing down /
2658	gve_teardown_priv_resources(priv);
2659	}
2660	priv->up_before_suspend = was_up;
2661	rtnl_unlock();
2662	return `0`;
2663	}
2664
2665	static int gve_resume(struct pci_dev *pdev)
2666	{
2667	struct net_device *netdev = pci_get_drvdata(pdev);
2668	struct gve_priv *priv = netdev_priv(dev: netdev);
2669	int err;
2670
2671	priv->resume_cnt++;
2672	rtnl_lock();
2673	err = gve_reset_recovery(priv, was_up: priv->up_before_suspend);
2674	rtnl_unlock();
2675	return err;
2676	}
2677	#endif /* CONFIG_PM */
2678
2679	static const struct pci_device_id gve_id_table[] = {
2680	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2681	{ }
2682	};
2683
2684	static struct pci_driver gve_driver = {
2685	.name = gve_driver_name,
2686	.id_table = gve_id_table,
2687	.probe = gve_probe,
2688	.remove = gve_remove,
2689	.shutdown = gve_shutdown,
2690	#ifdef CONFIG_PM
2691	.suspend = gve_suspend,
2692	.resume = gve_resume,
2693	#endif
2694	};
2695
2696	module_pci_driver(gve_driver);
2697
2698	MODULE_DEVICE_TABLE(pci, gve_id_table);
2699	MODULE_AUTHOR("Google, Inc.");
2700	MODULE_DESCRIPTION("Google Virtual NIC Driver");
2701	MODULE_LICENSE("Dual MIT/GPL");
2702	MODULE_VERSION(GVE_VERSION);
2703

source code of linux/drivers/net/ethernet/google/gve/gve_main.c