1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7#include <linux/bpf.h>
8#include <linux/cpumask.h>
9#include <linux/etherdevice.h>
10#include <linux/filter.h>
11#include <linux/interrupt.h>
12#include <linux/module.h>
13#include <linux/pci.h>
14#include <linux/sched.h>
15#include <linux/timer.h>
16#include <linux/workqueue.h>
17#include <linux/utsname.h>
18#include <linux/version.h>
19#include <net/sch_generic.h>
20#include <net/xdp_sock_drv.h>
21#include "gve.h"
22#include "gve_dqo.h"
23#include "gve_adminq.h"
24#include "gve_register.h"
25#include "gve_utils.h"
26
27#define GVE_DEFAULT_RX_COPYBREAK (256)
28
29#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
30#define GVE_VERSION "1.0.0"
31#define GVE_VERSION_PREFIX "GVE-"
32
33// Minimum amount of time between queue kicks in msec (10 seconds)
34#define MIN_TX_TIMEOUT_GAP (1000 * 10)
35
36char gve_driver_name[] = "gve";
37const char gve_version_str[] = GVE_VERSION;
38static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
39
40static int gve_verify_driver_compatibility(struct gve_priv *priv)
41{
42 int err;
43 struct gve_driver_info *driver_info;
44 dma_addr_t driver_info_bus;
45
46 driver_info = dma_alloc_coherent(dev: &priv->pdev->dev,
47 size: sizeof(struct gve_driver_info),
48 dma_handle: &driver_info_bus, GFP_KERNEL);
49 if (!driver_info)
50 return -ENOMEM;
51
52 *driver_info = (struct gve_driver_info) {
53 .os_type = 1, /* Linux */
54 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
55 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
56 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
57 .driver_capability_flags = {
58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
62 },
63 };
64 strscpy(driver_info->os_version_str1, utsname()->release,
65 sizeof(driver_info->os_version_str1));
66 strscpy(driver_info->os_version_str2, utsname()->version,
67 sizeof(driver_info->os_version_str2));
68
69 err = gve_adminq_verify_driver_compatibility(priv,
70 driver_info_len: sizeof(struct gve_driver_info),
71 driver_info_addr: driver_info_bus);
72
73 /* It's ok if the device doesn't support this */
74 if (err == -EOPNOTSUPP)
75 err = 0;
76
77 dma_free_coherent(dev: &priv->pdev->dev,
78 size: sizeof(struct gve_driver_info),
79 cpu_addr: driver_info, dma_handle: driver_info_bus);
80 return err;
81}
82
83static netdev_features_t gve_features_check(struct sk_buff *skb,
84 struct net_device *dev,
85 netdev_features_t features)
86{
87 struct gve_priv *priv = netdev_priv(dev);
88
89 if (!gve_is_gqi(priv))
90 return gve_features_check_dqo(skb, dev, features);
91
92 return features;
93}
94
95static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
96{
97 struct gve_priv *priv = netdev_priv(dev);
98
99 if (gve_is_gqi(priv))
100 return gve_tx(skb, dev);
101 else
102 return gve_tx_dqo(skb, dev);
103}
104
105static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
106{
107 struct gve_priv *priv = netdev_priv(dev);
108 unsigned int start;
109 u64 packets, bytes;
110 int num_tx_queues;
111 int ring;
112
113 num_tx_queues = gve_num_tx_queues(priv);
114 if (priv->rx) {
115 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
116 do {
117 start =
118 u64_stats_fetch_begin(syncp: &priv->rx[ring].statss);
119 packets = priv->rx[ring].rpackets;
120 bytes = priv->rx[ring].rbytes;
121 } while (u64_stats_fetch_retry(syncp: &priv->rx[ring].statss,
122 start));
123 s->rx_packets += packets;
124 s->rx_bytes += bytes;
125 }
126 }
127 if (priv->tx) {
128 for (ring = 0; ring < num_tx_queues; ring++) {
129 do {
130 start =
131 u64_stats_fetch_begin(syncp: &priv->tx[ring].statss);
132 packets = priv->tx[ring].pkt_done;
133 bytes = priv->tx[ring].bytes_done;
134 } while (u64_stats_fetch_retry(syncp: &priv->tx[ring].statss,
135 start));
136 s->tx_packets += packets;
137 s->tx_bytes += bytes;
138 }
139 }
140}
141
142static int gve_alloc_counter_array(struct gve_priv *priv)
143{
144 priv->counter_array =
145 dma_alloc_coherent(dev: &priv->pdev->dev,
146 size: priv->num_event_counters *
147 sizeof(*priv->counter_array),
148 dma_handle: &priv->counter_array_bus, GFP_KERNEL);
149 if (!priv->counter_array)
150 return -ENOMEM;
151
152 return 0;
153}
154
155static void gve_free_counter_array(struct gve_priv *priv)
156{
157 if (!priv->counter_array)
158 return;
159
160 dma_free_coherent(dev: &priv->pdev->dev,
161 size: priv->num_event_counters *
162 sizeof(*priv->counter_array),
163 cpu_addr: priv->counter_array, dma_handle: priv->counter_array_bus);
164 priv->counter_array = NULL;
165}
166
167/* NIC requests to report stats */
168static void gve_stats_report_task(struct work_struct *work)
169{
170 struct gve_priv *priv = container_of(work, struct gve_priv,
171 stats_report_task);
172 if (gve_get_do_report_stats(priv)) {
173 gve_handle_report_stats(priv);
174 gve_clear_do_report_stats(priv);
175 }
176}
177
178static void gve_stats_report_schedule(struct gve_priv *priv)
179{
180 if (!gve_get_probe_in_progress(priv) &&
181 !gve_get_reset_in_progress(priv)) {
182 gve_set_do_report_stats(priv);
183 queue_work(wq: priv->gve_wq, work: &priv->stats_report_task);
184 }
185}
186
187static void gve_stats_report_timer(struct timer_list *t)
188{
189 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
190
191 mod_timer(timer: &priv->stats_report_timer,
192 expires: round_jiffies(j: jiffies +
193 msecs_to_jiffies(m: priv->stats_report_timer_period)));
194 gve_stats_report_schedule(priv);
195}
196
197static int gve_alloc_stats_report(struct gve_priv *priv)
198{
199 int tx_stats_num, rx_stats_num;
200
201 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
202 gve_num_tx_queues(priv);
203 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
204 priv->rx_cfg.num_queues;
205 priv->stats_report_len = struct_size(priv->stats_report, stats,
206 size_add(tx_stats_num, rx_stats_num));
207 priv->stats_report =
208 dma_alloc_coherent(dev: &priv->pdev->dev, size: priv->stats_report_len,
209 dma_handle: &priv->stats_report_bus, GFP_KERNEL);
210 if (!priv->stats_report)
211 return -ENOMEM;
212 /* Set up timer for the report-stats task */
213 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
214 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
215 return 0;
216}
217
218static void gve_free_stats_report(struct gve_priv *priv)
219{
220 if (!priv->stats_report)
221 return;
222
223 del_timer_sync(timer: &priv->stats_report_timer);
224 dma_free_coherent(dev: &priv->pdev->dev, size: priv->stats_report_len,
225 cpu_addr: priv->stats_report, dma_handle: priv->stats_report_bus);
226 priv->stats_report = NULL;
227}
228
229static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
230{
231 struct gve_priv *priv = arg;
232
233 queue_work(wq: priv->gve_wq, work: &priv->service_task);
234 return IRQ_HANDLED;
235}
236
237static irqreturn_t gve_intr(int irq, void *arg)
238{
239 struct gve_notify_block *block = arg;
240 struct gve_priv *priv = block->priv;
241
242 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
243 napi_schedule_irqoff(n: &block->napi);
244 return IRQ_HANDLED;
245}
246
247static irqreturn_t gve_intr_dqo(int irq, void *arg)
248{
249 struct gve_notify_block *block = arg;
250
251 /* Interrupts are automatically masked */
252 napi_schedule_irqoff(n: &block->napi);
253 return IRQ_HANDLED;
254}
255
256int gve_napi_poll(struct napi_struct *napi, int budget)
257{
258 struct gve_notify_block *block;
259 __be32 __iomem *irq_doorbell;
260 bool reschedule = false;
261 struct gve_priv *priv;
262 int work_done = 0;
263
264 block = container_of(napi, struct gve_notify_block, napi);
265 priv = block->priv;
266
267 if (block->tx) {
268 if (block->tx->q_num < priv->tx_cfg.num_queues)
269 reschedule |= gve_tx_poll(block, budget);
270 else if (budget)
271 reschedule |= gve_xdp_poll(block, budget);
272 }
273
274 if (!budget)
275 return 0;
276
277 if (block->rx) {
278 work_done = gve_rx_poll(block, budget);
279 reschedule |= work_done == budget;
280 }
281
282 if (reschedule)
283 return budget;
284
285 /* Complete processing - don't unmask irq if busy polling is enabled */
286 if (likely(napi_complete_done(napi, work_done))) {
287 irq_doorbell = gve_irq_doorbell(priv, block);
288 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
289
290 /* Ensure IRQ ACK is visible before we check pending work.
291 * If queue had issued updates, it would be truly visible.
292 */
293 mb();
294
295 if (block->tx)
296 reschedule |= gve_tx_clean_pending(priv, tx: block->tx);
297 if (block->rx)
298 reschedule |= gve_rx_work_pending(rx: block->rx);
299
300 if (reschedule && napi_schedule(n: napi))
301 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
302 }
303 return work_done;
304}
305
306int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
307{
308 struct gve_notify_block *block =
309 container_of(napi, struct gve_notify_block, napi);
310 struct gve_priv *priv = block->priv;
311 bool reschedule = false;
312 int work_done = 0;
313
314 if (block->tx)
315 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
316
317 if (!budget)
318 return 0;
319
320 if (block->rx) {
321 work_done = gve_rx_poll_dqo(block, budget);
322 reschedule |= work_done == budget;
323 }
324
325 if (reschedule)
326 return budget;
327
328 if (likely(napi_complete_done(napi, work_done))) {
329 /* Enable interrupts again.
330 *
331 * We don't need to repoll afterwards because HW supports the
332 * PCI MSI-X PBA feature.
333 *
334 * Another interrupt would be triggered if a new event came in
335 * since the last one.
336 */
337 gve_write_irq_doorbell_dqo(priv, block,
338 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
339 }
340
341 return work_done;
342}
343
344static int gve_alloc_notify_blocks(struct gve_priv *priv)
345{
346 int num_vecs_requested = priv->num_ntfy_blks + 1;
347 unsigned int active_cpus;
348 int vecs_enabled;
349 int i, j;
350 int err;
351
352 priv->msix_vectors = kvcalloc(n: num_vecs_requested,
353 size: sizeof(*priv->msix_vectors), GFP_KERNEL);
354 if (!priv->msix_vectors)
355 return -ENOMEM;
356 for (i = 0; i < num_vecs_requested; i++)
357 priv->msix_vectors[i].entry = i;
358 vecs_enabled = pci_enable_msix_range(dev: priv->pdev, entries: priv->msix_vectors,
359 GVE_MIN_MSIX, maxvec: num_vecs_requested);
360 if (vecs_enabled < 0) {
361 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
362 GVE_MIN_MSIX, vecs_enabled);
363 err = vecs_enabled;
364 goto abort_with_msix_vectors;
365 }
366 if (vecs_enabled != num_vecs_requested) {
367 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
368 int vecs_per_type = new_num_ntfy_blks / 2;
369 int vecs_left = new_num_ntfy_blks % 2;
370
371 priv->num_ntfy_blks = new_num_ntfy_blks;
372 priv->mgmt_msix_idx = priv->num_ntfy_blks;
373 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
374 vecs_per_type);
375 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
376 vecs_per_type + vecs_left);
377 dev_err(&priv->pdev->dev,
378 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
379 vecs_enabled, priv->tx_cfg.max_queues,
380 priv->rx_cfg.max_queues);
381 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
382 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
383 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
384 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
385 }
386 /* Half the notification blocks go to TX and half to RX */
387 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
388
389 /* Setup Management Vector - the last vector */
390 snprintf(buf: priv->mgmt_msix_name, size: sizeof(priv->mgmt_msix_name), fmt: "gve-mgmnt@pci:%s",
391 pci_name(pdev: priv->pdev));
392 err = request_irq(irq: priv->msix_vectors[priv->mgmt_msix_idx].vector,
393 handler: gve_mgmnt_intr, flags: 0, name: priv->mgmt_msix_name, dev: priv);
394 if (err) {
395 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
396 goto abort_with_msix_enabled;
397 }
398 priv->irq_db_indices =
399 dma_alloc_coherent(dev: &priv->pdev->dev,
400 size: priv->num_ntfy_blks *
401 sizeof(*priv->irq_db_indices),
402 dma_handle: &priv->irq_db_indices_bus, GFP_KERNEL);
403 if (!priv->irq_db_indices) {
404 err = -ENOMEM;
405 goto abort_with_mgmt_vector;
406 }
407
408 priv->ntfy_blocks = kvzalloc(size: priv->num_ntfy_blks *
409 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
410 if (!priv->ntfy_blocks) {
411 err = -ENOMEM;
412 goto abort_with_irq_db_indices;
413 }
414
415 /* Setup the other blocks - the first n-1 vectors */
416 for (i = 0; i < priv->num_ntfy_blks; i++) {
417 struct gve_notify_block *block = &priv->ntfy_blocks[i];
418 int msix_idx = i;
419
420 snprintf(buf: block->name, size: sizeof(block->name), fmt: "gve-ntfy-blk%d@pci:%s",
421 i, pci_name(pdev: priv->pdev));
422 block->priv = priv;
423 err = request_irq(irq: priv->msix_vectors[msix_idx].vector,
424 handler: gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
425 flags: 0, name: block->name, dev: block);
426 if (err) {
427 dev_err(&priv->pdev->dev,
428 "Failed to receive msix vector %d\n", i);
429 goto abort_with_some_ntfy_blocks;
430 }
431 irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
432 m: get_cpu_mask(cpu: i % active_cpus));
433 block->irq_db_index = &priv->irq_db_indices[i].index;
434 }
435 return 0;
436abort_with_some_ntfy_blocks:
437 for (j = 0; j < i; j++) {
438 struct gve_notify_block *block = &priv->ntfy_blocks[j];
439 int msix_idx = j;
440
441 irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
442 NULL);
443 free_irq(priv->msix_vectors[msix_idx].vector, block);
444 }
445 kvfree(addr: priv->ntfy_blocks);
446 priv->ntfy_blocks = NULL;
447abort_with_irq_db_indices:
448 dma_free_coherent(dev: &priv->pdev->dev, size: priv->num_ntfy_blks *
449 sizeof(*priv->irq_db_indices),
450 cpu_addr: priv->irq_db_indices, dma_handle: priv->irq_db_indices_bus);
451 priv->irq_db_indices = NULL;
452abort_with_mgmt_vector:
453 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
454abort_with_msix_enabled:
455 pci_disable_msix(dev: priv->pdev);
456abort_with_msix_vectors:
457 kvfree(addr: priv->msix_vectors);
458 priv->msix_vectors = NULL;
459 return err;
460}
461
462static void gve_free_notify_blocks(struct gve_priv *priv)
463{
464 int i;
465
466 if (!priv->msix_vectors)
467 return;
468
469 /* Free the irqs */
470 for (i = 0; i < priv->num_ntfy_blks; i++) {
471 struct gve_notify_block *block = &priv->ntfy_blocks[i];
472 int msix_idx = i;
473
474 irq_set_affinity_hint(irq: priv->msix_vectors[msix_idx].vector,
475 NULL);
476 free_irq(priv->msix_vectors[msix_idx].vector, block);
477 }
478 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
479 kvfree(addr: priv->ntfy_blocks);
480 priv->ntfy_blocks = NULL;
481 dma_free_coherent(dev: &priv->pdev->dev, size: priv->num_ntfy_blks *
482 sizeof(*priv->irq_db_indices),
483 cpu_addr: priv->irq_db_indices, dma_handle: priv->irq_db_indices_bus);
484 priv->irq_db_indices = NULL;
485 pci_disable_msix(dev: priv->pdev);
486 kvfree(addr: priv->msix_vectors);
487 priv->msix_vectors = NULL;
488}
489
490static int gve_setup_device_resources(struct gve_priv *priv)
491{
492 int err;
493
494 err = gve_alloc_counter_array(priv);
495 if (err)
496 return err;
497 err = gve_alloc_notify_blocks(priv);
498 if (err)
499 goto abort_with_counter;
500 err = gve_alloc_stats_report(priv);
501 if (err)
502 goto abort_with_ntfy_blocks;
503 err = gve_adminq_configure_device_resources(priv,
504 counter_array_bus_addr: priv->counter_array_bus,
505 num_counters: priv->num_event_counters,
506 db_array_bus_addr: priv->irq_db_indices_bus,
507 num_ntfy_blks: priv->num_ntfy_blks);
508 if (unlikely(err)) {
509 dev_err(&priv->pdev->dev,
510 "could not setup device_resources: err=%d\n", err);
511 err = -ENXIO;
512 goto abort_with_stats_report;
513 }
514
515 if (!gve_is_gqi(priv)) {
516 priv->ptype_lut_dqo = kvzalloc(size: sizeof(*priv->ptype_lut_dqo),
517 GFP_KERNEL);
518 if (!priv->ptype_lut_dqo) {
519 err = -ENOMEM;
520 goto abort_with_stats_report;
521 }
522 err = gve_adminq_get_ptype_map_dqo(priv, ptype_lut: priv->ptype_lut_dqo);
523 if (err) {
524 dev_err(&priv->pdev->dev,
525 "Failed to get ptype map: err=%d\n", err);
526 goto abort_with_ptype_lut;
527 }
528 }
529
530 err = gve_adminq_report_stats(priv, stats_report_len: priv->stats_report_len,
531 stats_report_addr: priv->stats_report_bus,
532 GVE_STATS_REPORT_TIMER_PERIOD);
533 if (err)
534 dev_err(&priv->pdev->dev,
535 "Failed to report stats: err=%d\n", err);
536 gve_set_device_resources_ok(priv);
537 return 0;
538
539abort_with_ptype_lut:
540 kvfree(addr: priv->ptype_lut_dqo);
541 priv->ptype_lut_dqo = NULL;
542abort_with_stats_report:
543 gve_free_stats_report(priv);
544abort_with_ntfy_blocks:
545 gve_free_notify_blocks(priv);
546abort_with_counter:
547 gve_free_counter_array(priv);
548
549 return err;
550}
551
552static void gve_trigger_reset(struct gve_priv *priv);
553
554static void gve_teardown_device_resources(struct gve_priv *priv)
555{
556 int err;
557
558 /* Tell device its resources are being freed */
559 if (gve_get_device_resources_ok(priv)) {
560 /* detach the stats report */
561 err = gve_adminq_report_stats(priv, stats_report_len: 0, stats_report_addr: 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
562 if (err) {
563 dev_err(&priv->pdev->dev,
564 "Failed to detach stats report: err=%d\n", err);
565 gve_trigger_reset(priv);
566 }
567 err = gve_adminq_deconfigure_device_resources(priv);
568 if (err) {
569 dev_err(&priv->pdev->dev,
570 "Could not deconfigure device resources: err=%d\n",
571 err);
572 gve_trigger_reset(priv);
573 }
574 }
575
576 kvfree(addr: priv->ptype_lut_dqo);
577 priv->ptype_lut_dqo = NULL;
578
579 gve_free_counter_array(priv);
580 gve_free_notify_blocks(priv);
581 gve_free_stats_report(priv);
582 gve_clear_device_resources_ok(priv);
583}
584
585static int gve_unregister_qpl(struct gve_priv *priv, u32 i)
586{
587 int err;
588
589 err = gve_adminq_unregister_page_list(priv, page_list_id: priv->qpls[i].id);
590 if (err) {
591 netif_err(priv, drv, priv->dev,
592 "Failed to unregister queue page list %d\n",
593 priv->qpls[i].id);
594 return err;
595 }
596
597 priv->num_registered_pages -= priv->qpls[i].num_entries;
598 return 0;
599}
600
601static int gve_register_qpl(struct gve_priv *priv, u32 i)
602{
603 int num_rx_qpls;
604 int pages;
605 int err;
606
607 /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */
608 num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
609 if (i >= gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg) + num_rx_qpls) {
610 netif_err(priv, drv, priv->dev,
611 "Cannot register nonexisting QPL at index %d\n", i);
612 return -EINVAL;
613 }
614
615 pages = priv->qpls[i].num_entries;
616
617 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
618 netif_err(priv, drv, priv->dev,
619 "Reached max number of registered pages %llu > %llu\n",
620 pages + priv->num_registered_pages,
621 priv->max_registered_pages);
622 return -EINVAL;
623 }
624
625 err = gve_adminq_register_page_list(priv, qpl: &priv->qpls[i]);
626 if (err) {
627 netif_err(priv, drv, priv->dev,
628 "failed to register queue page list %d\n",
629 priv->qpls[i].id);
630 /* This failure will trigger a reset - no need to clean
631 * up
632 */
633 return err;
634 }
635
636 priv->num_registered_pages += pages;
637 return 0;
638}
639
640static int gve_register_xdp_qpls(struct gve_priv *priv)
641{
642 int start_id;
643 int err;
644 int i;
645
646 start_id = gve_xdp_tx_start_queue_id(priv);
647 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
648 err = gve_register_qpl(priv, i);
649 /* This failure will trigger a reset - no need to clean up */
650 if (err)
651 return err;
652 }
653 return 0;
654}
655
656static int gve_register_qpls(struct gve_priv *priv)
657{
658 int num_tx_qpls, num_rx_qpls;
659 int start_id;
660 int err;
661 int i;
662
663 num_tx_qpls = gve_num_tx_qpls(tx_cfg: &priv->tx_cfg, num_xdp_queues: gve_num_xdp_qpls(priv),
664 is_qpl: gve_is_qpl(priv));
665 num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
666
667 for (i = 0; i < num_tx_qpls; i++) {
668 err = gve_register_qpl(priv, i);
669 if (err)
670 return err;
671 }
672
673 /* there might be a gap between the tx and rx qpl ids */
674 start_id = gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg);
675 for (i = 0; i < num_rx_qpls; i++) {
676 err = gve_register_qpl(priv, i: start_id + i);
677 if (err)
678 return err;
679 }
680
681 return 0;
682}
683
684static int gve_unregister_xdp_qpls(struct gve_priv *priv)
685{
686 int start_id;
687 int err;
688 int i;
689
690 start_id = gve_xdp_tx_start_queue_id(priv);
691 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
692 err = gve_unregister_qpl(priv, i);
693 /* This failure will trigger a reset - no need to clean */
694 if (err)
695 return err;
696 }
697 return 0;
698}
699
700static int gve_unregister_qpls(struct gve_priv *priv)
701{
702 int num_tx_qpls, num_rx_qpls;
703 int start_id;
704 int err;
705 int i;
706
707 num_tx_qpls = gve_num_tx_qpls(tx_cfg: &priv->tx_cfg, num_xdp_queues: gve_num_xdp_qpls(priv),
708 is_qpl: gve_is_qpl(priv));
709 num_rx_qpls = gve_num_rx_qpls(rx_cfg: &priv->rx_cfg, is_qpl: gve_is_qpl(priv));
710
711 for (i = 0; i < num_tx_qpls; i++) {
712 err = gve_unregister_qpl(priv, i);
713 /* This failure will trigger a reset - no need to clean */
714 if (err)
715 return err;
716 }
717
718 start_id = gve_rx_start_qpl_id(tx_cfg: &priv->tx_cfg);
719 for (i = 0; i < num_rx_qpls; i++) {
720 err = gve_unregister_qpl(priv, i: start_id + i);
721 /* This failure will trigger a reset - no need to clean */
722 if (err)
723 return err;
724 }
725 return 0;
726}
727
728static int gve_create_xdp_rings(struct gve_priv *priv)
729{
730 int err;
731
732 err = gve_adminq_create_tx_queues(priv,
733 start_id: gve_xdp_tx_start_queue_id(priv),
734 num_queues: priv->num_xdp_queues);
735 if (err) {
736 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
737 priv->num_xdp_queues);
738 /* This failure will trigger a reset - no need to clean
739 * up
740 */
741 return err;
742 }
743 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
744 priv->num_xdp_queues);
745
746 return 0;
747}
748
749static int gve_create_rings(struct gve_priv *priv)
750{
751 int num_tx_queues = gve_num_tx_queues(priv);
752 int err;
753 int i;
754
755 err = gve_adminq_create_tx_queues(priv, start_id: 0, num_queues: num_tx_queues);
756 if (err) {
757 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
758 num_tx_queues);
759 /* This failure will trigger a reset - no need to clean
760 * up
761 */
762 return err;
763 }
764 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
765 num_tx_queues);
766
767 err = gve_adminq_create_rx_queues(priv, num_queues: priv->rx_cfg.num_queues);
768 if (err) {
769 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
770 priv->rx_cfg.num_queues);
771 /* This failure will trigger a reset - no need to clean
772 * up
773 */
774 return err;
775 }
776 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
777 priv->rx_cfg.num_queues);
778
779 if (gve_is_gqi(priv)) {
780 /* Rx data ring has been prefilled with packet buffers at queue
781 * allocation time.
782 *
783 * Write the doorbell to provide descriptor slots and packet
784 * buffers to the NIC.
785 */
786 for (i = 0; i < priv->rx_cfg.num_queues; i++)
787 gve_rx_write_doorbell(priv, rx: &priv->rx[i]);
788 } else {
789 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
790 /* Post buffers and ring doorbell. */
791 gve_rx_post_buffers_dqo(rx: &priv->rx[i]);
792 }
793 }
794
795 return 0;
796}
797
798static void init_xdp_sync_stats(struct gve_priv *priv)
799{
800 int start_id = gve_xdp_tx_start_queue_id(priv);
801 int i;
802
803 /* Init stats */
804 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
805 int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: i);
806
807 u64_stats_init(syncp: &priv->tx[i].statss);
808 priv->tx[i].ntfy_id = ntfy_idx;
809 }
810}
811
812static void gve_init_sync_stats(struct gve_priv *priv)
813{
814 int i;
815
816 for (i = 0; i < priv->tx_cfg.num_queues; i++)
817 u64_stats_init(syncp: &priv->tx[i].statss);
818
819 /* Init stats for XDP TX queues */
820 init_xdp_sync_stats(priv);
821
822 for (i = 0; i < priv->rx_cfg.num_queues; i++)
823 u64_stats_init(syncp: &priv->rx[i].statss);
824}
825
826static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
827 struct gve_tx_alloc_rings_cfg *cfg)
828{
829 cfg->qcfg = &priv->tx_cfg;
830 cfg->raw_addressing = !gve_is_qpl(priv);
831 cfg->qpls = priv->qpls;
832 cfg->qpl_cfg = &priv->qpl_cfg;
833 cfg->ring_size = priv->tx_desc_cnt;
834 cfg->start_idx = 0;
835 cfg->num_rings = gve_num_tx_queues(priv);
836 cfg->tx = priv->tx;
837}
838
839static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
840{
841 int i;
842
843 if (!priv->tx)
844 return;
845
846 for (i = start_id; i < start_id + num_rings; i++) {
847 if (gve_is_gqi(priv))
848 gve_tx_stop_ring_gqi(priv, idx: i);
849 else
850 gve_tx_stop_ring_dqo(priv, idx: i);
851 }
852}
853
854static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
855 int num_rings)
856{
857 int i;
858
859 for (i = start_id; i < start_id + num_rings; i++) {
860 if (gve_is_gqi(priv))
861 gve_tx_start_ring_gqi(priv, idx: i);
862 else
863 gve_tx_start_ring_dqo(priv, idx: i);
864 }
865}
866
867static int gve_alloc_xdp_rings(struct gve_priv *priv)
868{
869 struct gve_tx_alloc_rings_cfg cfg = {0};
870 int err = 0;
871
872 if (!priv->num_xdp_queues)
873 return 0;
874
875 gve_tx_get_curr_alloc_cfg(priv, cfg: &cfg);
876 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
877 cfg.num_rings = priv->num_xdp_queues;
878
879 err = gve_tx_alloc_rings_gqi(priv, cfg: &cfg);
880 if (err)
881 return err;
882
883 gve_tx_start_rings(priv, start_id: cfg.start_idx, num_rings: cfg.num_rings);
884 init_xdp_sync_stats(priv);
885
886 return 0;
887}
888
889static int gve_alloc_rings(struct gve_priv *priv,
890 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
891 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
892{
893 int err;
894
895 if (gve_is_gqi(priv))
896 err = gve_tx_alloc_rings_gqi(priv, cfg: tx_alloc_cfg);
897 else
898 err = gve_tx_alloc_rings_dqo(priv, cfg: tx_alloc_cfg);
899 if (err)
900 return err;
901
902 if (gve_is_gqi(priv))
903 err = gve_rx_alloc_rings_gqi(priv, cfg: rx_alloc_cfg);
904 else
905 err = gve_rx_alloc_rings_dqo(priv, cfg: rx_alloc_cfg);
906 if (err)
907 goto free_tx;
908
909 return 0;
910
911free_tx:
912 if (gve_is_gqi(priv))
913 gve_tx_free_rings_gqi(priv, cfg: tx_alloc_cfg);
914 else
915 gve_tx_free_rings_dqo(priv, cfg: tx_alloc_cfg);
916 return err;
917}
918
919static int gve_destroy_xdp_rings(struct gve_priv *priv)
920{
921 int start_id;
922 int err;
923
924 start_id = gve_xdp_tx_start_queue_id(priv);
925 err = gve_adminq_destroy_tx_queues(priv,
926 start_id,
927 num_queues: priv->num_xdp_queues);
928 if (err) {
929 netif_err(priv, drv, priv->dev,
930 "failed to destroy XDP queues\n");
931 /* This failure will trigger a reset - no need to clean up */
932 return err;
933 }
934 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
935
936 return 0;
937}
938
939static int gve_destroy_rings(struct gve_priv *priv)
940{
941 int num_tx_queues = gve_num_tx_queues(priv);
942 int err;
943
944 err = gve_adminq_destroy_tx_queues(priv, start_id: 0, num_queues: num_tx_queues);
945 if (err) {
946 netif_err(priv, drv, priv->dev,
947 "failed to destroy tx queues\n");
948 /* This failure will trigger a reset - no need to clean up */
949 return err;
950 }
951 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
952 err = gve_adminq_destroy_rx_queues(priv, queue_id: priv->rx_cfg.num_queues);
953 if (err) {
954 netif_err(priv, drv, priv->dev,
955 "failed to destroy rx queues\n");
956 /* This failure will trigger a reset - no need to clean up */
957 return err;
958 }
959 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
960 return 0;
961}
962
963static void gve_free_xdp_rings(struct gve_priv *priv)
964{
965 struct gve_tx_alloc_rings_cfg cfg = {0};
966
967 gve_tx_get_curr_alloc_cfg(priv, cfg: &cfg);
968 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
969 cfg.num_rings = priv->num_xdp_queues;
970
971 if (priv->tx) {
972 gve_tx_stop_rings(priv, start_id: cfg.start_idx, num_rings: cfg.num_rings);
973 gve_tx_free_rings_gqi(priv, cfg: &cfg);
974 }
975}
976
977static void gve_free_rings(struct gve_priv *priv,
978 struct gve_tx_alloc_rings_cfg *tx_cfg,
979 struct gve_rx_alloc_rings_cfg *rx_cfg)
980{
981 if (gve_is_gqi(priv)) {
982 gve_tx_free_rings_gqi(priv, cfg: tx_cfg);
983 gve_rx_free_rings_gqi(priv, cfg: rx_cfg);
984 } else {
985 gve_tx_free_rings_dqo(priv, cfg: tx_cfg);
986 gve_rx_free_rings_dqo(priv, cfg: rx_cfg);
987 }
988}
989
990int gve_alloc_page(struct gve_priv *priv, struct device *dev,
991 struct page **page, dma_addr_t *dma,
992 enum dma_data_direction dir, gfp_t gfp_flags)
993{
994 *page = alloc_page(gfp_flags);
995 if (!*page) {
996 priv->page_alloc_fail++;
997 return -ENOMEM;
998 }
999 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1000 if (dma_mapping_error(dev, dma_addr: *dma)) {
1001 priv->dma_mapping_error++;
1002 put_page(page: *page);
1003 return -ENOMEM;
1004 }
1005 return 0;
1006}
1007
1008static int gve_alloc_queue_page_list(struct gve_priv *priv,
1009 struct gve_queue_page_list *qpl,
1010 u32 id, int pages)
1011{
1012 int err;
1013 int i;
1014
1015 qpl->id = id;
1016 qpl->num_entries = 0;
1017 qpl->pages = kvcalloc(n: pages, size: sizeof(*qpl->pages), GFP_KERNEL);
1018 /* caller handles clean up */
1019 if (!qpl->pages)
1020 return -ENOMEM;
1021 qpl->page_buses = kvcalloc(n: pages, size: sizeof(*qpl->page_buses), GFP_KERNEL);
1022 /* caller handles clean up */
1023 if (!qpl->page_buses)
1024 return -ENOMEM;
1025
1026 for (i = 0; i < pages; i++) {
1027 err = gve_alloc_page(priv, dev: &priv->pdev->dev, page: &qpl->pages[i],
1028 dma: &qpl->page_buses[i],
1029 dir: gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1030 /* caller handles clean up */
1031 if (err)
1032 return -ENOMEM;
1033 qpl->num_entries++;
1034 }
1035
1036 return 0;
1037}
1038
1039void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1040 enum dma_data_direction dir)
1041{
1042 if (!dma_mapping_error(dev, dma_addr: dma))
1043 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1044 if (page)
1045 put_page(page);
1046}
1047
1048static void gve_free_queue_page_list(struct gve_priv *priv,
1049 struct gve_queue_page_list *qpl,
1050 int id)
1051{
1052 int i;
1053
1054 if (!qpl->pages)
1055 return;
1056 if (!qpl->page_buses)
1057 goto free_pages;
1058
1059 for (i = 0; i < qpl->num_entries; i++)
1060 gve_free_page(dev: &priv->pdev->dev, page: qpl->pages[i],
1061 dma: qpl->page_buses[i], dir: gve_qpl_dma_dir(priv, id));
1062
1063 kvfree(addr: qpl->page_buses);
1064 qpl->page_buses = NULL;
1065free_pages:
1066 kvfree(addr: qpl->pages);
1067 qpl->pages = NULL;
1068}
1069
1070static void gve_free_n_qpls(struct gve_priv *priv,
1071 struct gve_queue_page_list *qpls,
1072 int start_id,
1073 int num_qpls)
1074{
1075 int i;
1076
1077 for (i = start_id; i < start_id + num_qpls; i++)
1078 gve_free_queue_page_list(priv, qpl: &qpls[i], id: i);
1079}
1080
1081static int gve_alloc_n_qpls(struct gve_priv *priv,
1082 struct gve_queue_page_list *qpls,
1083 int page_count,
1084 int start_id,
1085 int num_qpls)
1086{
1087 int err;
1088 int i;
1089
1090 for (i = start_id; i < start_id + num_qpls; i++) {
1091 err = gve_alloc_queue_page_list(priv, qpl: &qpls[i], id: i, pages: page_count);
1092 if (err)
1093 goto free_qpls;
1094 }
1095
1096 return 0;
1097
1098free_qpls:
1099 /* Must include the failing QPL too for gve_alloc_queue_page_list fails
1100 * without cleaning up.
1101 */
1102 gve_free_n_qpls(priv, qpls, start_id, num_qpls: i - start_id + 1);
1103 return err;
1104}
1105
1106static int gve_alloc_qpls(struct gve_priv *priv,
1107 struct gve_qpls_alloc_cfg *cfg)
1108{
1109 int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1110 int rx_start_id, tx_num_qpls, rx_num_qpls;
1111 struct gve_queue_page_list *qpls;
1112 int page_count;
1113 int err;
1114
1115 if (cfg->raw_addressing)
1116 return 0;
1117
1118 qpls = kvcalloc(n: max_queues, size: sizeof(*qpls), GFP_KERNEL);
1119 if (!qpls)
1120 return -ENOMEM;
1121
1122 cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) *
1123 sizeof(unsigned long) * BITS_PER_BYTE;
1124 cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1125 size: sizeof(unsigned long), GFP_KERNEL);
1126 if (!cfg->qpl_cfg->qpl_id_map) {
1127 err = -ENOMEM;
1128 goto free_qpl_array;
1129 }
1130
1131 /* Allocate TX QPLs */
1132 page_count = priv->tx_pages_per_qpl;
1133 tx_num_qpls = gve_num_tx_qpls(tx_cfg: cfg->tx_cfg, num_xdp_queues: cfg->num_xdp_queues,
1134 is_qpl: gve_is_qpl(priv));
1135 err = gve_alloc_n_qpls(priv, qpls, page_count, start_id: 0, num_qpls: tx_num_qpls);
1136 if (err)
1137 goto free_qpl_map;
1138
1139 /* Allocate RX QPLs */
1140 rx_start_id = gve_rx_start_qpl_id(tx_cfg: cfg->tx_cfg);
1141 /* For GQI_QPL number of pages allocated have 1:1 relationship with
1142 * number of descriptors. For DQO, number of pages required are
1143 * more than descriptors (because of out of order completions).
1144 */
1145 page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1146 rx_num_qpls = gve_num_rx_qpls(rx_cfg: cfg->rx_cfg, is_qpl: gve_is_qpl(priv));
1147 err = gve_alloc_n_qpls(priv, qpls, page_count, start_id: rx_start_id, num_qpls: rx_num_qpls);
1148 if (err)
1149 goto free_tx_qpls;
1150
1151 cfg->qpls = qpls;
1152 return 0;
1153
1154free_tx_qpls:
1155 gve_free_n_qpls(priv, qpls, start_id: 0, num_qpls: tx_num_qpls);
1156free_qpl_map:
1157 kvfree(addr: cfg->qpl_cfg->qpl_id_map);
1158 cfg->qpl_cfg->qpl_id_map = NULL;
1159free_qpl_array:
1160 kvfree(addr: qpls);
1161 return err;
1162}
1163
1164static void gve_free_qpls(struct gve_priv *priv,
1165 struct gve_qpls_alloc_cfg *cfg)
1166{
1167 int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1168 struct gve_queue_page_list *qpls = cfg->qpls;
1169 int i;
1170
1171 if (!qpls)
1172 return;
1173
1174 kvfree(addr: cfg->qpl_cfg->qpl_id_map);
1175 cfg->qpl_cfg->qpl_id_map = NULL;
1176
1177 for (i = 0; i < max_queues; i++)
1178 gve_free_queue_page_list(priv, qpl: &qpls[i], id: i);
1179
1180 kvfree(addr: qpls);
1181 cfg->qpls = NULL;
1182}
1183
1184/* Use this to schedule a reset when the device is capable of continuing
1185 * to handle other requests in its current state. If it is not, do a reset
1186 * in thread instead.
1187 */
1188void gve_schedule_reset(struct gve_priv *priv)
1189{
1190 gve_set_do_reset(priv);
1191 queue_work(wq: priv->gve_wq, work: &priv->service_task);
1192}
1193
1194static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1195static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1196static void gve_turndown(struct gve_priv *priv);
1197static void gve_turnup(struct gve_priv *priv);
1198
1199static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1200{
1201 struct napi_struct *napi;
1202 struct gve_rx_ring *rx;
1203 int err = 0;
1204 int i, j;
1205 u32 tx_qid;
1206
1207 if (!priv->num_xdp_queues)
1208 return 0;
1209
1210 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1211 rx = &priv->rx[i];
1212 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1213
1214 err = xdp_rxq_info_reg(xdp_rxq: &rx->xdp_rxq, dev, queue_index: i,
1215 napi_id: napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xdp_rxq,
1219 type: MEM_TYPE_PAGE_SHARED, NULL);
1220 if (err)
1221 goto err;
1222 rx->xsk_pool = xsk_get_pool_from_qid(dev, queue_id: i);
1223 if (rx->xsk_pool) {
1224 err = xdp_rxq_info_reg(xdp_rxq: &rx->xsk_rxq, dev, queue_index: i,
1225 napi_id: napi->napi_id);
1226 if (err)
1227 goto err;
1228 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xsk_rxq,
1229 type: MEM_TYPE_XSK_BUFF_POOL, NULL);
1230 if (err)
1231 goto err;
1232 xsk_pool_set_rxq_info(pool: rx->xsk_pool,
1233 rxq: &rx->xsk_rxq);
1234 }
1235 }
1236
1237 for (i = 0; i < priv->num_xdp_queues; i++) {
1238 tx_qid = gve_xdp_tx_queue_id(priv, queue_id: i);
1239 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, queue_id: i);
1240 }
1241 return 0;
1242
1243err:
1244 for (j = i; j >= 0; j--) {
1245 rx = &priv->rx[j];
1246 if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xdp_rxq))
1247 xdp_rxq_info_unreg(xdp_rxq: &rx->xdp_rxq);
1248 if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xsk_rxq))
1249 xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1250 }
1251 return err;
1252}
1253
1254static void gve_unreg_xdp_info(struct gve_priv *priv)
1255{
1256 int i, tx_qid;
1257
1258 if (!priv->num_xdp_queues)
1259 return;
1260
1261 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1262 struct gve_rx_ring *rx = &priv->rx[i];
1263
1264 xdp_rxq_info_unreg(xdp_rxq: &rx->xdp_rxq);
1265 if (rx->xsk_pool) {
1266 xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1267 rx->xsk_pool = NULL;
1268 }
1269 }
1270
1271 for (i = 0; i < priv->num_xdp_queues; i++) {
1272 tx_qid = gve_xdp_tx_queue_id(priv, queue_id: i);
1273 priv->tx[tx_qid].xsk_pool = NULL;
1274 }
1275}
1276
1277static void gve_drain_page_cache(struct gve_priv *priv)
1278{
1279 int i;
1280
1281 for (i = 0; i < priv->rx_cfg.num_queues; i++)
1282 page_frag_cache_drain(nc: &priv->rx[i].page_cache);
1283}
1284
1285static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv,
1286 struct gve_qpls_alloc_cfg *cfg)
1287{
1288 cfg->raw_addressing = !gve_is_qpl(priv);
1289 cfg->is_gqi = gve_is_gqi(priv);
1290 cfg->num_xdp_queues = priv->num_xdp_queues;
1291 cfg->qpl_cfg = &priv->qpl_cfg;
1292 cfg->tx_cfg = &priv->tx_cfg;
1293 cfg->rx_cfg = &priv->rx_cfg;
1294 cfg->qpls = priv->qpls;
1295}
1296
1297static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1298 struct gve_rx_alloc_rings_cfg *cfg)
1299{
1300 cfg->qcfg = &priv->rx_cfg;
1301 cfg->qcfg_tx = &priv->tx_cfg;
1302 cfg->raw_addressing = !gve_is_qpl(priv);
1303 cfg->enable_header_split = priv->header_split_enabled;
1304 cfg->qpls = priv->qpls;
1305 cfg->qpl_cfg = &priv->qpl_cfg;
1306 cfg->ring_size = priv->rx_desc_cnt;
1307 cfg->packet_buffer_size = gve_is_gqi(priv) ?
1308 GVE_DEFAULT_RX_BUFFER_SIZE :
1309 priv->data_buffer_size_dqo;
1310 cfg->rx = priv->rx;
1311}
1312
1313static void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1314 struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1315 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1316 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1317{
1318 gve_qpls_get_curr_alloc_cfg(priv, cfg: qpls_alloc_cfg);
1319 gve_tx_get_curr_alloc_cfg(priv, cfg: tx_alloc_cfg);
1320 gve_rx_get_curr_alloc_cfg(priv, cfg: rx_alloc_cfg);
1321}
1322
1323static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1324{
1325 int i;
1326
1327 for (i = 0; i < num_rings; i++) {
1328 if (gve_is_gqi(priv))
1329 gve_rx_start_ring_gqi(priv, idx: i);
1330 else
1331 gve_rx_start_ring_dqo(priv, idx: i);
1332 }
1333}
1334
1335static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1336{
1337 int i;
1338
1339 if (!priv->rx)
1340 return;
1341
1342 for (i = 0; i < num_rings; i++) {
1343 if (gve_is_gqi(priv))
1344 gve_rx_stop_ring_gqi(priv, idx: i);
1345 else
1346 gve_rx_stop_ring_dqo(priv, idx: i);
1347 }
1348}
1349
1350static void gve_queues_mem_free(struct gve_priv *priv,
1351 struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1352 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1353 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1354{
1355 gve_free_rings(priv, tx_cfg: tx_alloc_cfg, rx_cfg: rx_alloc_cfg);
1356 gve_free_qpls(priv, cfg: qpls_alloc_cfg);
1357}
1358
1359static int gve_queues_mem_alloc(struct gve_priv *priv,
1360 struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1361 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1362 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1363{
1364 int err;
1365
1366 err = gve_alloc_qpls(priv, cfg: qpls_alloc_cfg);
1367 if (err) {
1368 netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n");
1369 return err;
1370 }
1371 tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1372 rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1373 err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
1374 if (err) {
1375 netif_err(priv, drv, priv->dev, "Failed to alloc rings\n");
1376 goto free_qpls;
1377 }
1378
1379 return 0;
1380
1381free_qpls:
1382 gve_free_qpls(priv, cfg: qpls_alloc_cfg);
1383 return err;
1384}
1385
1386static void gve_queues_mem_remove(struct gve_priv *priv)
1387{
1388 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1389 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1390 struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1391
1392 gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1393 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1394 gve_queues_mem_free(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1395 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1396 priv->qpls = NULL;
1397 priv->tx = NULL;
1398 priv->rx = NULL;
1399}
1400
1401/* The passed-in queue memory is stored into priv and the queues are made live.
1402 * No memory is allocated. Passed-in memory is freed on errors.
1403 */
1404static int gve_queues_start(struct gve_priv *priv,
1405 struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1406 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1407 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1408{
1409 struct net_device *dev = priv->dev;
1410 int err;
1411
1412 /* Record new resources into priv */
1413 priv->qpls = qpls_alloc_cfg->qpls;
1414 priv->tx = tx_alloc_cfg->tx;
1415 priv->rx = rx_alloc_cfg->rx;
1416
1417 /* Record new configs into priv */
1418 priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg;
1419 priv->tx_cfg = *tx_alloc_cfg->qcfg;
1420 priv->rx_cfg = *rx_alloc_cfg->qcfg;
1421 priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1422 priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1423
1424 if (priv->xdp_prog)
1425 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1426 else
1427 priv->num_xdp_queues = 0;
1428
1429 gve_tx_start_rings(priv, start_id: 0, num_rings: tx_alloc_cfg->num_rings);
1430 gve_rx_start_rings(priv, num_rings: rx_alloc_cfg->qcfg->num_queues);
1431 gve_init_sync_stats(priv);
1432
1433 err = netif_set_real_num_tx_queues(dev, txq: priv->tx_cfg.num_queues);
1434 if (err)
1435 goto stop_and_free_rings;
1436 err = netif_set_real_num_rx_queues(dev, rxq: priv->rx_cfg.num_queues);
1437 if (err)
1438 goto stop_and_free_rings;
1439
1440 err = gve_reg_xdp_info(priv, dev);
1441 if (err)
1442 goto stop_and_free_rings;
1443
1444 err = gve_register_qpls(priv);
1445 if (err)
1446 goto reset;
1447
1448 priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1449 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1450
1451 err = gve_create_rings(priv);
1452 if (err)
1453 goto reset;
1454
1455 gve_set_device_rings_ok(priv);
1456
1457 if (gve_get_report_stats(priv))
1458 mod_timer(timer: &priv->stats_report_timer,
1459 expires: round_jiffies(j: jiffies +
1460 msecs_to_jiffies(m: priv->stats_report_timer_period)));
1461
1462 gve_turnup(priv);
1463 queue_work(wq: priv->gve_wq, work: &priv->service_task);
1464 priv->interface_up_cnt++;
1465 return 0;
1466
1467reset:
1468 if (gve_get_reset_in_progress(priv))
1469 goto stop_and_free_rings;
1470 gve_reset_and_teardown(priv, was_up: true);
1471 /* if this fails there is nothing we can do so just ignore the return */
1472 gve_reset_recovery(priv, was_up: false);
1473 /* return the original error */
1474 return err;
1475stop_and_free_rings:
1476 gve_tx_stop_rings(priv, start_id: 0, num_rings: gve_num_tx_queues(priv));
1477 gve_rx_stop_rings(priv, num_rings: priv->rx_cfg.num_queues);
1478 gve_queues_mem_remove(priv);
1479 return err;
1480}
1481
1482static int gve_open(struct net_device *dev)
1483{
1484 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1485 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1486 struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1487 struct gve_priv *priv = netdev_priv(dev);
1488 int err;
1489
1490 gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1491 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1492
1493 err = gve_queues_mem_alloc(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1494 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1495 if (err)
1496 return err;
1497
1498 /* No need to free on error: ownership of resources is lost after
1499 * calling gve_queues_start.
1500 */
1501 err = gve_queues_start(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1502 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1503 if (err)
1504 return err;
1505
1506 return 0;
1507}
1508
1509static int gve_queues_stop(struct gve_priv *priv)
1510{
1511 int err;
1512
1513 netif_carrier_off(dev: priv->dev);
1514 if (gve_get_device_rings_ok(priv)) {
1515 gve_turndown(priv);
1516 gve_drain_page_cache(priv);
1517 err = gve_destroy_rings(priv);
1518 if (err)
1519 goto err;
1520 err = gve_unregister_qpls(priv);
1521 if (err)
1522 goto err;
1523 gve_clear_device_rings_ok(priv);
1524 }
1525 del_timer_sync(timer: &priv->stats_report_timer);
1526
1527 gve_unreg_xdp_info(priv);
1528
1529 gve_tx_stop_rings(priv, start_id: 0, num_rings: gve_num_tx_queues(priv));
1530 gve_rx_stop_rings(priv, num_rings: priv->rx_cfg.num_queues);
1531
1532 priv->interface_down_cnt++;
1533 return 0;
1534
1535err:
1536 /* This must have been called from a reset due to the rtnl lock
1537 * so just return at this point.
1538 */
1539 if (gve_get_reset_in_progress(priv))
1540 return err;
1541 /* Otherwise reset before returning */
1542 gve_reset_and_teardown(priv, was_up: true);
1543 return gve_reset_recovery(priv, was_up: false);
1544}
1545
1546static int gve_close(struct net_device *dev)
1547{
1548 struct gve_priv *priv = netdev_priv(dev);
1549 int err;
1550
1551 err = gve_queues_stop(priv);
1552 if (err)
1553 return err;
1554
1555 gve_queues_mem_remove(priv);
1556 return 0;
1557}
1558
1559static int gve_remove_xdp_queues(struct gve_priv *priv)
1560{
1561 int qpl_start_id;
1562 int err;
1563
1564 qpl_start_id = gve_xdp_tx_start_queue_id(priv);
1565
1566 err = gve_destroy_xdp_rings(priv);
1567 if (err)
1568 return err;
1569
1570 err = gve_unregister_xdp_qpls(priv);
1571 if (err)
1572 return err;
1573
1574 gve_unreg_xdp_info(priv);
1575 gve_free_xdp_rings(priv);
1576
1577 gve_free_n_qpls(priv, qpls: priv->qpls, start_id: qpl_start_id, num_qpls: gve_num_xdp_qpls(priv));
1578 priv->num_xdp_queues = 0;
1579 return 0;
1580}
1581
1582static int gve_add_xdp_queues(struct gve_priv *priv)
1583{
1584 int start_id;
1585 int err;
1586
1587 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1588
1589 start_id = gve_xdp_tx_start_queue_id(priv);
1590 err = gve_alloc_n_qpls(priv, qpls: priv->qpls, page_count: priv->tx_pages_per_qpl,
1591 start_id, num_qpls: gve_num_xdp_qpls(priv));
1592 if (err)
1593 goto err;
1594
1595 err = gve_alloc_xdp_rings(priv);
1596 if (err)
1597 goto free_xdp_qpls;
1598
1599 err = gve_reg_xdp_info(priv, dev: priv->dev);
1600 if (err)
1601 goto free_xdp_rings;
1602
1603 err = gve_register_xdp_qpls(priv);
1604 if (err)
1605 goto free_xdp_rings;
1606
1607 err = gve_create_xdp_rings(priv);
1608 if (err)
1609 goto free_xdp_rings;
1610
1611 return 0;
1612
1613free_xdp_rings:
1614 gve_free_xdp_rings(priv);
1615free_xdp_qpls:
1616 gve_free_n_qpls(priv, qpls: priv->qpls, start_id, num_qpls: gve_num_xdp_qpls(priv));
1617err:
1618 priv->num_xdp_queues = 0;
1619 return err;
1620}
1621
1622static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1623{
1624 if (!gve_get_napi_enabled(priv))
1625 return;
1626
1627 if (link_status == netif_carrier_ok(dev: priv->dev))
1628 return;
1629
1630 if (link_status) {
1631 netdev_info(dev: priv->dev, format: "Device link is up.\n");
1632 netif_carrier_on(dev: priv->dev);
1633 } else {
1634 netdev_info(dev: priv->dev, format: "Device link is down.\n");
1635 netif_carrier_off(dev: priv->dev);
1636 }
1637}
1638
1639static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1640 struct netlink_ext_ack *extack)
1641{
1642 struct bpf_prog *old_prog;
1643 int err = 0;
1644 u32 status;
1645
1646 old_prog = READ_ONCE(priv->xdp_prog);
1647 if (!netif_carrier_ok(dev: priv->dev)) {
1648 WRITE_ONCE(priv->xdp_prog, prog);
1649 if (old_prog)
1650 bpf_prog_put(prog: old_prog);
1651 return 0;
1652 }
1653
1654 gve_turndown(priv);
1655 if (!old_prog && prog) {
1656 // Allocate XDP TX queues if an XDP program is
1657 // being installed
1658 err = gve_add_xdp_queues(priv);
1659 if (err)
1660 goto out;
1661 } else if (old_prog && !prog) {
1662 // Remove XDP TX queues if an XDP program is
1663 // being uninstalled
1664 err = gve_remove_xdp_queues(priv);
1665 if (err)
1666 goto out;
1667 }
1668 WRITE_ONCE(priv->xdp_prog, prog);
1669 if (old_prog)
1670 bpf_prog_put(prog: old_prog);
1671
1672out:
1673 gve_turnup(priv);
1674 status = ioread32be(&priv->reg_bar0->device_status);
1675 gve_handle_link_status(priv, link_status: GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1676 return err;
1677}
1678
1679static int gve_xsk_pool_enable(struct net_device *dev,
1680 struct xsk_buff_pool *pool,
1681 u16 qid)
1682{
1683 struct gve_priv *priv = netdev_priv(dev);
1684 struct napi_struct *napi;
1685 struct gve_rx_ring *rx;
1686 int tx_qid;
1687 int err;
1688
1689 if (qid >= priv->rx_cfg.num_queues) {
1690 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1691 return -EINVAL;
1692 }
1693 if (xsk_pool_get_rx_frame_size(pool) <
1694 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1695 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1696 return -EINVAL;
1697 }
1698
1699 err = xsk_pool_dma_map(pool, dev: &priv->pdev->dev,
1700 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1701 if (err)
1702 return err;
1703
1704 /* If XDP prog is not installed, return */
1705 if (!priv->xdp_prog)
1706 return 0;
1707
1708 rx = &priv->rx[qid];
1709 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1710 err = xdp_rxq_info_reg(xdp_rxq: &rx->xsk_rxq, dev, queue_index: qid, napi_id: napi->napi_id);
1711 if (err)
1712 goto err;
1713
1714 err = xdp_rxq_info_reg_mem_model(xdp_rxq: &rx->xsk_rxq,
1715 type: MEM_TYPE_XSK_BUFF_POOL, NULL);
1716 if (err)
1717 goto err;
1718
1719 xsk_pool_set_rxq_info(pool, rxq: &rx->xsk_rxq);
1720 rx->xsk_pool = pool;
1721
1722 tx_qid = gve_xdp_tx_queue_id(priv, queue_id: qid);
1723 priv->tx[tx_qid].xsk_pool = pool;
1724
1725 return 0;
1726err:
1727 if (xdp_rxq_info_is_reg(xdp_rxq: &rx->xsk_rxq))
1728 xdp_rxq_info_unreg(xdp_rxq: &rx->xsk_rxq);
1729
1730 xsk_pool_dma_unmap(pool,
1731 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1732 return err;
1733}
1734
1735static int gve_xsk_pool_disable(struct net_device *dev,
1736 u16 qid)
1737{
1738 struct gve_priv *priv = netdev_priv(dev);
1739 struct napi_struct *napi_rx;
1740 struct napi_struct *napi_tx;
1741 struct xsk_buff_pool *pool;
1742 int tx_qid;
1743
1744 pool = xsk_get_pool_from_qid(dev, queue_id: qid);
1745 if (!pool)
1746 return -EINVAL;
1747 if (qid >= priv->rx_cfg.num_queues)
1748 return -EINVAL;
1749
1750 /* If XDP prog is not installed, unmap DMA and return */
1751 if (!priv->xdp_prog)
1752 goto done;
1753
1754 tx_qid = gve_xdp_tx_queue_id(priv, queue_id: qid);
1755 if (!netif_running(dev)) {
1756 priv->rx[qid].xsk_pool = NULL;
1757 xdp_rxq_info_unreg(xdp_rxq: &priv->rx[qid].xsk_rxq);
1758 priv->tx[tx_qid].xsk_pool = NULL;
1759 goto done;
1760 }
1761
1762 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1763 napi_disable(n: napi_rx); /* make sure current rx poll is done */
1764
1765 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1766 napi_disable(n: napi_tx); /* make sure current tx poll is done */
1767
1768 priv->rx[qid].xsk_pool = NULL;
1769 xdp_rxq_info_unreg(xdp_rxq: &priv->rx[qid].xsk_rxq);
1770 priv->tx[tx_qid].xsk_pool = NULL;
1771 smp_mb(); /* Make sure it is visible to the workers on datapath */
1772
1773 napi_enable(n: napi_rx);
1774 if (gve_rx_work_pending(rx: &priv->rx[qid]))
1775 napi_schedule(n: napi_rx);
1776
1777 napi_enable(n: napi_tx);
1778 if (gve_tx_clean_pending(priv, tx: &priv->tx[tx_qid]))
1779 napi_schedule(n: napi_tx);
1780
1781done:
1782 xsk_pool_dma_unmap(pool,
1783 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1784 return 0;
1785}
1786
1787static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1788{
1789 struct gve_priv *priv = netdev_priv(dev);
1790 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1791
1792 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1793 return -EINVAL;
1794
1795 if (flags & XDP_WAKEUP_TX) {
1796 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1797 struct napi_struct *napi =
1798 &priv->ntfy_blocks[tx->ntfy_id].napi;
1799
1800 if (!napi_if_scheduled_mark_missed(n: napi)) {
1801 /* Call local_bh_enable to trigger SoftIRQ processing */
1802 local_bh_disable();
1803 napi_schedule(n: napi);
1804 local_bh_enable();
1805 }
1806
1807 tx->xdp_xsk_wakeup++;
1808 }
1809
1810 return 0;
1811}
1812
1813static int verify_xdp_configuration(struct net_device *dev)
1814{
1815 struct gve_priv *priv = netdev_priv(dev);
1816
1817 if (dev->features & NETIF_F_LRO) {
1818 netdev_warn(dev, format: "XDP is not supported when LRO is on.\n");
1819 return -EOPNOTSUPP;
1820 }
1821
1822 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1823 netdev_warn(dev, format: "XDP is not supported in mode %d.\n",
1824 priv->queue_format);
1825 return -EOPNOTSUPP;
1826 }
1827
1828 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1829 netdev_warn(dev, format: "XDP is not supported for mtu %d.\n",
1830 dev->mtu);
1831 return -EOPNOTSUPP;
1832 }
1833
1834 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1835 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1836 netdev_warn(dev, format: "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1837 priv->rx_cfg.num_queues,
1838 priv->tx_cfg.num_queues,
1839 priv->tx_cfg.max_queues);
1840 return -EINVAL;
1841 }
1842 return 0;
1843}
1844
1845static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1846{
1847 struct gve_priv *priv = netdev_priv(dev);
1848 int err;
1849
1850 err = verify_xdp_configuration(dev);
1851 if (err)
1852 return err;
1853 switch (xdp->command) {
1854 case XDP_SETUP_PROG:
1855 return gve_set_xdp(priv, prog: xdp->prog, extack: xdp->extack);
1856 case XDP_SETUP_XSK_POOL:
1857 if (xdp->xsk.pool)
1858 return gve_xsk_pool_enable(dev, pool: xdp->xsk.pool, qid: xdp->xsk.queue_id);
1859 else
1860 return gve_xsk_pool_disable(dev, qid: xdp->xsk.queue_id);
1861 default:
1862 return -EINVAL;
1863 }
1864}
1865
1866static int gve_adjust_config(struct gve_priv *priv,
1867 struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1868 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1869 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1870{
1871 int err;
1872
1873 /* Allocate resources for the new confiugration */
1874 err = gve_queues_mem_alloc(priv, qpls_alloc_cfg,
1875 tx_alloc_cfg, rx_alloc_cfg);
1876 if (err) {
1877 netif_err(priv, drv, priv->dev,
1878 "Adjust config failed to alloc new queues");
1879 return err;
1880 }
1881
1882 /* Teardown the device and free existing resources */
1883 err = gve_close(dev: priv->dev);
1884 if (err) {
1885 netif_err(priv, drv, priv->dev,
1886 "Adjust config failed to close old queues");
1887 gve_queues_mem_free(priv, qpls_alloc_cfg,
1888 tx_alloc_cfg, rx_alloc_cfg);
1889 return err;
1890 }
1891
1892 /* Bring the device back up again with the new resources. */
1893 err = gve_queues_start(priv, qpls_alloc_cfg,
1894 tx_alloc_cfg, rx_alloc_cfg);
1895 if (err) {
1896 netif_err(priv, drv, priv->dev,
1897 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1898 /* No need to free on error: ownership of resources is lost after
1899 * calling gve_queues_start.
1900 */
1901 gve_turndown(priv);
1902 return err;
1903 }
1904
1905 return 0;
1906}
1907
1908int gve_adjust_queues(struct gve_priv *priv,
1909 struct gve_queue_config new_rx_config,
1910 struct gve_queue_config new_tx_config)
1911{
1912 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1913 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1914 struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1915 struct gve_qpl_config new_qpl_cfg;
1916 int err;
1917
1918 gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1919 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1920
1921 /* qpl_cfg is not read-only, it contains a map that gets updated as
1922 * rings are allocated, which is why we cannot use the yet unreleased
1923 * one in priv.
1924 */
1925 qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1926 tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1927 rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1928
1929 /* Relay the new config from ethtool */
1930 qpls_alloc_cfg.tx_cfg = &new_tx_config;
1931 tx_alloc_cfg.qcfg = &new_tx_config;
1932 rx_alloc_cfg.qcfg_tx = &new_tx_config;
1933 qpls_alloc_cfg.rx_cfg = &new_rx_config;
1934 rx_alloc_cfg.qcfg = &new_rx_config;
1935 tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1936
1937 if (netif_carrier_ok(dev: priv->dev)) {
1938 err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
1939 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
1940 return err;
1941 }
1942 /* Set the config for the next up. */
1943 priv->tx_cfg = new_tx_config;
1944 priv->rx_cfg = new_rx_config;
1945
1946 return 0;
1947}
1948
1949static void gve_turndown(struct gve_priv *priv)
1950{
1951 int idx;
1952
1953 if (netif_carrier_ok(dev: priv->dev))
1954 netif_carrier_off(dev: priv->dev);
1955
1956 if (!gve_get_napi_enabled(priv))
1957 return;
1958
1959 /* Disable napi to prevent more work from coming in */
1960 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1961 int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
1962 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1963
1964 napi_disable(n: &block->napi);
1965 }
1966 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1967 int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
1968 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1969
1970 napi_disable(n: &block->napi);
1971 }
1972
1973 /* Stop tx queues */
1974 netif_tx_disable(dev: priv->dev);
1975
1976 gve_clear_napi_enabled(priv);
1977 gve_clear_report_stats(priv);
1978}
1979
1980static void gve_turnup(struct gve_priv *priv)
1981{
1982 int idx;
1983
1984 /* Start the tx queues */
1985 netif_tx_start_all_queues(dev: priv->dev);
1986
1987 /* Enable napi and unmask interrupts for all queues */
1988 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1989 int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: idx);
1990 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1991
1992 napi_enable(n: &block->napi);
1993 if (gve_is_gqi(priv)) {
1994 iowrite32be(0, gve_irq_doorbell(priv, block));
1995 } else {
1996 gve_set_itr_coalesce_usecs_dqo(priv, block,
1997 usecs: priv->tx_coalesce_usecs);
1998 }
1999 }
2000 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2001 int ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx: idx);
2002 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
2003
2004 napi_enable(n: &block->napi);
2005 if (gve_is_gqi(priv)) {
2006 iowrite32be(0, gve_irq_doorbell(priv, block));
2007 } else {
2008 gve_set_itr_coalesce_usecs_dqo(priv, block,
2009 usecs: priv->rx_coalesce_usecs);
2010 }
2011 }
2012
2013 gve_set_napi_enabled(priv);
2014}
2015
2016static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
2017{
2018 struct gve_notify_block *block;
2019 struct gve_tx_ring *tx = NULL;
2020 struct gve_priv *priv;
2021 u32 last_nic_done;
2022 u32 current_time;
2023 u32 ntfy_idx;
2024
2025 netdev_info(dev, format: "Timeout on tx queue, %d", txqueue);
2026 priv = netdev_priv(dev);
2027 if (txqueue > priv->tx_cfg.num_queues)
2028 goto reset;
2029
2030 ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx: txqueue);
2031 if (ntfy_idx >= priv->num_ntfy_blks)
2032 goto reset;
2033
2034 block = &priv->ntfy_blocks[ntfy_idx];
2035 tx = block->tx;
2036
2037 current_time = jiffies_to_msecs(j: jiffies);
2038 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
2039 goto reset;
2040
2041 /* Check to see if there are missed completions, which will allow us to
2042 * kick the queue.
2043 */
2044 last_nic_done = gve_tx_load_event_counter(priv, tx);
2045 if (last_nic_done - tx->done) {
2046 netdev_info(dev, format: "Kicking queue %d", txqueue);
2047 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
2048 napi_schedule(n: &block->napi);
2049 tx->last_kick_msec = current_time;
2050 goto out;
2051 } // Else reset.
2052
2053reset:
2054 gve_schedule_reset(priv);
2055
2056out:
2057 if (tx)
2058 tx->queue_timeout++;
2059 priv->tx_timeo_cnt++;
2060}
2061
2062u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2063{
2064 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2065 return GVE_MAX_RX_BUFFER_SIZE;
2066 else
2067 return GVE_DEFAULT_RX_BUFFER_SIZE;
2068}
2069
2070/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
2071bool gve_header_split_supported(const struct gve_priv *priv)
2072{
2073 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2074}
2075
2076int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2077{
2078 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2079 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2080 struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
2081 bool enable_hdr_split;
2082 int err = 0;
2083
2084 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2085 return 0;
2086
2087 if (!gve_header_split_supported(priv)) {
2088 dev_err(&priv->pdev->dev, "Header-split not supported\n");
2089 return -EOPNOTSUPP;
2090 }
2091
2092 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2093 enable_hdr_split = true;
2094 else
2095 enable_hdr_split = false;
2096
2097 if (enable_hdr_split == priv->header_split_enabled)
2098 return 0;
2099
2100 gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2101 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2102
2103 rx_alloc_cfg.enable_header_split = enable_hdr_split;
2104 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hsplit: enable_hdr_split);
2105
2106 if (netif_running(dev: priv->dev))
2107 err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2108 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2109 return err;
2110}
2111
2112static int gve_set_features(struct net_device *netdev,
2113 netdev_features_t features)
2114{
2115 const netdev_features_t orig_features = netdev->features;
2116 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2117 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2118 struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
2119 struct gve_priv *priv = netdev_priv(dev: netdev);
2120 struct gve_qpl_config new_qpl_cfg;
2121 int err;
2122
2123 gve_get_curr_alloc_cfgs(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2124 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2125 /* qpl_cfg is not read-only, it contains a map that gets updated as
2126 * rings are allocated, which is why we cannot use the yet unreleased
2127 * one in priv.
2128 */
2129 qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2130 tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2131 rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2132
2133 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2134 netdev->features ^= NETIF_F_LRO;
2135 if (netif_carrier_ok(dev: netdev)) {
2136 err = gve_adjust_config(priv, qpls_alloc_cfg: &qpls_alloc_cfg,
2137 tx_alloc_cfg: &tx_alloc_cfg, rx_alloc_cfg: &rx_alloc_cfg);
2138 if (err) {
2139 /* Revert the change on error. */
2140 netdev->features = orig_features;
2141 return err;
2142 }
2143 }
2144 }
2145
2146 return 0;
2147}
2148
2149static const struct net_device_ops gve_netdev_ops = {
2150 .ndo_start_xmit = gve_start_xmit,
2151 .ndo_features_check = gve_features_check,
2152 .ndo_open = gve_open,
2153 .ndo_stop = gve_close,
2154 .ndo_get_stats64 = gve_get_stats,
2155 .ndo_tx_timeout = gve_tx_timeout,
2156 .ndo_set_features = gve_set_features,
2157 .ndo_bpf = gve_xdp,
2158 .ndo_xdp_xmit = gve_xdp_xmit,
2159 .ndo_xsk_wakeup = gve_xsk_wakeup,
2160};
2161
2162static void gve_handle_status(struct gve_priv *priv, u32 status)
2163{
2164 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2165 dev_info(&priv->pdev->dev, "Device requested reset.\n");
2166 gve_set_do_reset(priv);
2167 }
2168 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2169 priv->stats_report_trigger_cnt++;
2170 gve_set_do_report_stats(priv);
2171 }
2172}
2173
2174static void gve_handle_reset(struct gve_priv *priv)
2175{
2176 /* A service task will be scheduled at the end of probe to catch any
2177 * resets that need to happen, and we don't want to reset until
2178 * probe is done.
2179 */
2180 if (gve_get_probe_in_progress(priv))
2181 return;
2182
2183 if (gve_get_do_reset(priv)) {
2184 rtnl_lock();
2185 gve_reset(priv, attempt_teardown: false);
2186 rtnl_unlock();
2187 }
2188}
2189
2190void gve_handle_report_stats(struct gve_priv *priv)
2191{
2192 struct stats *stats = priv->stats_report->stats;
2193 int idx, stats_idx = 0;
2194 unsigned int start = 0;
2195 u64 tx_bytes;
2196
2197 if (!gve_get_report_stats(priv))
2198 return;
2199
2200 be64_add_cpu(var: &priv->stats_report->written_count, val: 1);
2201 /* tx stats */
2202 if (priv->tx) {
2203 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2204 u32 last_completion = 0;
2205 u32 tx_frames = 0;
2206
2207 /* DQO doesn't currently support these metrics. */
2208 if (gve_is_gqi(priv)) {
2209 last_completion = priv->tx[idx].done;
2210 tx_frames = priv->tx[idx].req;
2211 }
2212
2213 do {
2214 start = u64_stats_fetch_begin(syncp: &priv->tx[idx].statss);
2215 tx_bytes = priv->tx[idx].bytes_done;
2216 } while (u64_stats_fetch_retry(syncp: &priv->tx[idx].statss, start));
2217 stats[stats_idx++] = (struct stats) {
2218 .stat_name = cpu_to_be32(TX_WAKE_CNT),
2219 .value = cpu_to_be64(priv->tx[idx].wake_queue),
2220 .queue_id = cpu_to_be32(idx),
2221 };
2222 stats[stats_idx++] = (struct stats) {
2223 .stat_name = cpu_to_be32(TX_STOP_CNT),
2224 .value = cpu_to_be64(priv->tx[idx].stop_queue),
2225 .queue_id = cpu_to_be32(idx),
2226 };
2227 stats[stats_idx++] = (struct stats) {
2228 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
2229 .value = cpu_to_be64(tx_frames),
2230 .queue_id = cpu_to_be32(idx),
2231 };
2232 stats[stats_idx++] = (struct stats) {
2233 .stat_name = cpu_to_be32(TX_BYTES_SENT),
2234 .value = cpu_to_be64(tx_bytes),
2235 .queue_id = cpu_to_be32(idx),
2236 };
2237 stats[stats_idx++] = (struct stats) {
2238 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2239 .value = cpu_to_be64(last_completion),
2240 .queue_id = cpu_to_be32(idx),
2241 };
2242 stats[stats_idx++] = (struct stats) {
2243 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2244 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
2245 .queue_id = cpu_to_be32(idx),
2246 };
2247 }
2248 }
2249 /* rx stats */
2250 if (priv->rx) {
2251 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2252 stats[stats_idx++] = (struct stats) {
2253 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2254 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
2255 .queue_id = cpu_to_be32(idx),
2256 };
2257 stats[stats_idx++] = (struct stats) {
2258 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2259 .value = cpu_to_be64(priv->rx[0].fill_cnt),
2260 .queue_id = cpu_to_be32(idx),
2261 };
2262 }
2263 }
2264}
2265
2266/* Handle NIC status register changes, reset requests and report stats */
2267static void gve_service_task(struct work_struct *work)
2268{
2269 struct gve_priv *priv = container_of(work, struct gve_priv,
2270 service_task);
2271 u32 status = ioread32be(&priv->reg_bar0->device_status);
2272
2273 gve_handle_status(priv, status);
2274
2275 gve_handle_reset(priv);
2276 gve_handle_link_status(priv, link_status: GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2277}
2278
2279static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2280{
2281 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2282 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2283 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2284 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2285 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2286 } else {
2287 priv->dev->xdp_features = 0;
2288 }
2289}
2290
2291static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2292{
2293 int num_ntfy;
2294 int err;
2295
2296 /* Set up the adminq */
2297 err = gve_adminq_alloc(dev: &priv->pdev->dev, priv);
2298 if (err) {
2299 dev_err(&priv->pdev->dev,
2300 "Failed to alloc admin queue: err=%d\n", err);
2301 return err;
2302 }
2303
2304 err = gve_verify_driver_compatibility(priv);
2305 if (err) {
2306 dev_err(&priv->pdev->dev,
2307 "Could not verify driver compatibility: err=%d\n", err);
2308 goto err;
2309 }
2310
2311 priv->num_registered_pages = 0;
2312
2313 if (skip_describe_device)
2314 goto setup_device;
2315
2316 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2317 /* Get the initial information we need from the device */
2318 err = gve_adminq_describe_device(priv);
2319 if (err) {
2320 dev_err(&priv->pdev->dev,
2321 "Could not get device information: err=%d\n", err);
2322 goto err;
2323 }
2324 priv->dev->mtu = priv->dev->max_mtu;
2325 num_ntfy = pci_msix_vec_count(dev: priv->pdev);
2326 if (num_ntfy <= 0) {
2327 dev_err(&priv->pdev->dev,
2328 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2329 err = num_ntfy;
2330 goto err;
2331 } else if (num_ntfy < GVE_MIN_MSIX) {
2332 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2333 GVE_MIN_MSIX, num_ntfy);
2334 err = -EINVAL;
2335 goto err;
2336 }
2337
2338 /* Big TCP is only supported on DQ*/
2339 if (!gve_is_gqi(priv))
2340 netif_set_tso_max_size(dev: priv->dev, GVE_DQO_TX_MAX);
2341
2342 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2343 /* gvnic has one Notification Block per MSI-x vector, except for the
2344 * management vector
2345 */
2346 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2347 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2348
2349 priv->tx_cfg.max_queues =
2350 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2351 priv->rx_cfg.max_queues =
2352 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2353
2354 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2355 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2356 if (priv->default_num_queues > 0) {
2357 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2358 priv->tx_cfg.num_queues);
2359 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2360 priv->rx_cfg.num_queues);
2361 }
2362
2363 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2364 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2365 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2366 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2367
2368 if (!gve_is_gqi(priv)) {
2369 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2370 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2371 }
2372
2373setup_device:
2374 gve_set_netdev_xdp_features(priv);
2375 err = gve_setup_device_resources(priv);
2376 if (!err)
2377 return 0;
2378err:
2379 gve_adminq_free(dev: &priv->pdev->dev, priv);
2380 return err;
2381}
2382
2383static void gve_teardown_priv_resources(struct gve_priv *priv)
2384{
2385 gve_teardown_device_resources(priv);
2386 gve_adminq_free(dev: &priv->pdev->dev, priv);
2387}
2388
2389static void gve_trigger_reset(struct gve_priv *priv)
2390{
2391 /* Reset the device by releasing the AQ */
2392 gve_adminq_release(priv);
2393}
2394
2395static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2396{
2397 gve_trigger_reset(priv);
2398 /* With the reset having already happened, close cannot fail */
2399 if (was_up)
2400 gve_close(dev: priv->dev);
2401 gve_teardown_priv_resources(priv);
2402}
2403
2404static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2405{
2406 int err;
2407
2408 err = gve_init_priv(priv, skip_describe_device: true);
2409 if (err)
2410 goto err;
2411 if (was_up) {
2412 err = gve_open(dev: priv->dev);
2413 if (err)
2414 goto err;
2415 }
2416 return 0;
2417err:
2418 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2419 gve_turndown(priv);
2420 return err;
2421}
2422
2423int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2424{
2425 bool was_up = netif_carrier_ok(dev: priv->dev);
2426 int err;
2427
2428 dev_info(&priv->pdev->dev, "Performing reset\n");
2429 gve_clear_do_reset(priv);
2430 gve_set_reset_in_progress(priv);
2431 /* If we aren't attempting to teardown normally, just go turndown and
2432 * reset right away.
2433 */
2434 if (!attempt_teardown) {
2435 gve_turndown(priv);
2436 gve_reset_and_teardown(priv, was_up);
2437 } else {
2438 /* Otherwise attempt to close normally */
2439 if (was_up) {
2440 err = gve_close(dev: priv->dev);
2441 /* If that fails reset as we did above */
2442 if (err)
2443 gve_reset_and_teardown(priv, was_up);
2444 }
2445 /* Clean up any remaining resources */
2446 gve_teardown_priv_resources(priv);
2447 }
2448
2449 /* Set it all back up */
2450 err = gve_reset_recovery(priv, was_up);
2451 gve_clear_reset_in_progress(priv);
2452 priv->reset_cnt++;
2453 priv->interface_up_cnt = 0;
2454 priv->interface_down_cnt = 0;
2455 priv->stats_report_trigger_cnt = 0;
2456 return err;
2457}
2458
2459static void gve_write_version(u8 __iomem *driver_version_register)
2460{
2461 const char *c = gve_version_prefix;
2462
2463 while (*c) {
2464 writeb(val: *c, addr: driver_version_register);
2465 c++;
2466 }
2467
2468 c = gve_version_str;
2469 while (*c) {
2470 writeb(val: *c, addr: driver_version_register);
2471 c++;
2472 }
2473 writeb(val: '\n', addr: driver_version_register);
2474}
2475
2476static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2477{
2478 int max_tx_queues, max_rx_queues;
2479 struct net_device *dev;
2480 __be32 __iomem *db_bar;
2481 struct gve_registers __iomem *reg_bar;
2482 struct gve_priv *priv;
2483 int err;
2484
2485 err = pci_enable_device(dev: pdev);
2486 if (err)
2487 return err;
2488
2489 err = pci_request_regions(pdev, gve_driver_name);
2490 if (err)
2491 goto abort_with_enabled;
2492
2493 pci_set_master(dev: pdev);
2494
2495 err = dma_set_mask_and_coherent(dev: &pdev->dev, DMA_BIT_MASK(64));
2496 if (err) {
2497 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2498 goto abort_with_pci_region;
2499 }
2500
2501 reg_bar = pci_iomap(dev: pdev, GVE_REGISTER_BAR, max: 0);
2502 if (!reg_bar) {
2503 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2504 err = -ENOMEM;
2505 goto abort_with_pci_region;
2506 }
2507
2508 db_bar = pci_iomap(dev: pdev, GVE_DOORBELL_BAR, max: 0);
2509 if (!db_bar) {
2510 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2511 err = -ENOMEM;
2512 goto abort_with_reg_bar;
2513 }
2514
2515 gve_write_version(driver_version_register: &reg_bar->driver_version);
2516 /* Get max queues to alloc etherdev */
2517 max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2518 max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2519 /* Alloc and setup the netdev and priv */
2520 dev = alloc_etherdev_mqs(sizeof_priv: sizeof(*priv), txqs: max_tx_queues, rxqs: max_rx_queues);
2521 if (!dev) {
2522 dev_err(&pdev->dev, "could not allocate netdev\n");
2523 err = -ENOMEM;
2524 goto abort_with_db_bar;
2525 }
2526 SET_NETDEV_DEV(dev, &pdev->dev);
2527 pci_set_drvdata(pdev, data: dev);
2528 dev->ethtool_ops = &gve_ethtool_ops;
2529 dev->netdev_ops = &gve_netdev_ops;
2530
2531 /* Set default and supported features.
2532 *
2533 * Features might be set in other locations as well (such as
2534 * `gve_adminq_describe_device`).
2535 */
2536 dev->hw_features = NETIF_F_HIGHDMA;
2537 dev->hw_features |= NETIF_F_SG;
2538 dev->hw_features |= NETIF_F_HW_CSUM;
2539 dev->hw_features |= NETIF_F_TSO;
2540 dev->hw_features |= NETIF_F_TSO6;
2541 dev->hw_features |= NETIF_F_TSO_ECN;
2542 dev->hw_features |= NETIF_F_RXCSUM;
2543 dev->hw_features |= NETIF_F_RXHASH;
2544 dev->features = dev->hw_features;
2545 dev->watchdog_timeo = 5 * HZ;
2546 dev->min_mtu = ETH_MIN_MTU;
2547 netif_carrier_off(dev);
2548
2549 priv = netdev_priv(dev);
2550 priv->dev = dev;
2551 priv->pdev = pdev;
2552 priv->msg_enable = DEFAULT_MSG_LEVEL;
2553 priv->reg_bar0 = reg_bar;
2554 priv->db_bar2 = db_bar;
2555 priv->service_task_flags = 0x0;
2556 priv->state_flags = 0x0;
2557 priv->ethtool_flags = 0x0;
2558 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2559 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2560
2561 gve_set_probe_in_progress(priv);
2562 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2563 if (!priv->gve_wq) {
2564 dev_err(&pdev->dev, "Could not allocate workqueue");
2565 err = -ENOMEM;
2566 goto abort_with_netdev;
2567 }
2568 INIT_WORK(&priv->service_task, gve_service_task);
2569 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2570 priv->tx_cfg.max_queues = max_tx_queues;
2571 priv->rx_cfg.max_queues = max_rx_queues;
2572
2573 err = gve_init_priv(priv, skip_describe_device: false);
2574 if (err)
2575 goto abort_with_wq;
2576
2577 err = register_netdev(dev);
2578 if (err)
2579 goto abort_with_gve_init;
2580
2581 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2582 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2583 gve_clear_probe_in_progress(priv);
2584 queue_work(wq: priv->gve_wq, work: &priv->service_task);
2585 return 0;
2586
2587abort_with_gve_init:
2588 gve_teardown_priv_resources(priv);
2589
2590abort_with_wq:
2591 destroy_workqueue(wq: priv->gve_wq);
2592
2593abort_with_netdev:
2594 free_netdev(dev);
2595
2596abort_with_db_bar:
2597 pci_iounmap(dev: pdev, db_bar);
2598
2599abort_with_reg_bar:
2600 pci_iounmap(dev: pdev, reg_bar);
2601
2602abort_with_pci_region:
2603 pci_release_regions(pdev);
2604
2605abort_with_enabled:
2606 pci_disable_device(dev: pdev);
2607 return err;
2608}
2609
2610static void gve_remove(struct pci_dev *pdev)
2611{
2612 struct net_device *netdev = pci_get_drvdata(pdev);
2613 struct gve_priv *priv = netdev_priv(dev: netdev);
2614 __be32 __iomem *db_bar = priv->db_bar2;
2615 void __iomem *reg_bar = priv->reg_bar0;
2616
2617 unregister_netdev(dev: netdev);
2618 gve_teardown_priv_resources(priv);
2619 destroy_workqueue(wq: priv->gve_wq);
2620 free_netdev(dev: netdev);
2621 pci_iounmap(dev: pdev, db_bar);
2622 pci_iounmap(dev: pdev, reg_bar);
2623 pci_release_regions(pdev);
2624 pci_disable_device(dev: pdev);
2625}
2626
2627static void gve_shutdown(struct pci_dev *pdev)
2628{
2629 struct net_device *netdev = pci_get_drvdata(pdev);
2630 struct gve_priv *priv = netdev_priv(dev: netdev);
2631 bool was_up = netif_carrier_ok(dev: priv->dev);
2632
2633 rtnl_lock();
2634 if (was_up && gve_close(dev: priv->dev)) {
2635 /* If the dev was up, attempt to close, if close fails, reset */
2636 gve_reset_and_teardown(priv, was_up);
2637 } else {
2638 /* If the dev wasn't up or close worked, finish tearing down */
2639 gve_teardown_priv_resources(priv);
2640 }
2641 rtnl_unlock();
2642}
2643
2644#ifdef CONFIG_PM
2645static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2646{
2647 struct net_device *netdev = pci_get_drvdata(pdev);
2648 struct gve_priv *priv = netdev_priv(dev: netdev);
2649 bool was_up = netif_carrier_ok(dev: priv->dev);
2650
2651 priv->suspend_cnt++;
2652 rtnl_lock();
2653 if (was_up && gve_close(dev: priv->dev)) {
2654 /* If the dev was up, attempt to close, if close fails, reset */
2655 gve_reset_and_teardown(priv, was_up);
2656 } else {
2657 /* If the dev wasn't up or close worked, finish tearing down */
2658 gve_teardown_priv_resources(priv);
2659 }
2660 priv->up_before_suspend = was_up;
2661 rtnl_unlock();
2662 return 0;
2663}
2664
2665static int gve_resume(struct pci_dev *pdev)
2666{
2667 struct net_device *netdev = pci_get_drvdata(pdev);
2668 struct gve_priv *priv = netdev_priv(dev: netdev);
2669 int err;
2670
2671 priv->resume_cnt++;
2672 rtnl_lock();
2673 err = gve_reset_recovery(priv, was_up: priv->up_before_suspend);
2674 rtnl_unlock();
2675 return err;
2676}
2677#endif /* CONFIG_PM */
2678
2679static const struct pci_device_id gve_id_table[] = {
2680 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2681 { }
2682};
2683
2684static struct pci_driver gve_driver = {
2685 .name = gve_driver_name,
2686 .id_table = gve_id_table,
2687 .probe = gve_probe,
2688 .remove = gve_remove,
2689 .shutdown = gve_shutdown,
2690#ifdef CONFIG_PM
2691 .suspend = gve_suspend,
2692 .resume = gve_resume,
2693#endif
2694};
2695
2696module_pci_driver(gve_driver);
2697
2698MODULE_DEVICE_TABLE(pci, gve_id_table);
2699MODULE_AUTHOR("Google, Inc.");
2700MODULE_DESCRIPTION("Google Virtual NIC Driver");
2701MODULE_LICENSE("Dual MIT/GPL");
2702MODULE_VERSION(GVE_VERSION);
2703

source code of linux/drivers/net/ethernet/google/gve/gve_main.c