1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VDPA simulator for networking device.
4 *
5 * Copyright (c) 2020, Red Hat Inc. All rights reserved.
6 * Author: Jason Wang <jasowang@redhat.com>
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/device.h>
13#include <linux/kernel.h>
14#include <linux/etherdevice.h>
15#include <linux/vringh.h>
16#include <linux/vdpa.h>
17#include <net/netlink.h>
18#include <uapi/linux/virtio_net.h>
19#include <uapi/linux/vdpa.h>
20
21#include "vdpa_sim.h"
22
23#define DRV_VERSION "0.1"
24#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
25#define DRV_DESC "vDPA Device Simulator for networking device"
26#define DRV_LICENSE "GPL v2"
27
28#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
29 (1ULL << VIRTIO_NET_F_MAC) | \
30 (1ULL << VIRTIO_NET_F_STATUS) | \
31 (1ULL << VIRTIO_NET_F_MTU) | \
32 (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
33 (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR))
34
35/* 3 virtqueues, 2 address spaces, 2 virtqueue groups */
36#define VDPASIM_NET_VQ_NUM 3
37#define VDPASIM_NET_AS_NUM 2
38#define VDPASIM_NET_GROUP_NUM 2
39
40struct vdpasim_dataq_stats {
41 struct u64_stats_sync syncp;
42 u64 pkts;
43 u64 bytes;
44 u64 drops;
45 u64 errors;
46 u64 overruns;
47};
48
49struct vdpasim_cq_stats {
50 struct u64_stats_sync syncp;
51 u64 requests;
52 u64 successes;
53 u64 errors;
54};
55
56struct vdpasim_net{
57 struct vdpasim vdpasim;
58 struct vdpasim_dataq_stats tx_stats;
59 struct vdpasim_dataq_stats rx_stats;
60 struct vdpasim_cq_stats cq_stats;
61 void *buffer;
62};
63
64static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
65{
66 return container_of(vdpasim, struct vdpasim_net, vdpasim);
67}
68
69static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
70{
71 /* Make sure data is wrote before advancing index */
72 smp_wmb();
73
74 vringh_complete_iotlb(vrh: &vq->vring, head: vq->head, len);
75
76 /* Make sure used is visible before rasing the interrupt. */
77 smp_wmb();
78
79 local_bh_disable();
80 if (vringh_need_notify_iotlb(vrh: &vq->vring) > 0)
81 vringh_notify(vrh: &vq->vring);
82 local_bh_enable();
83}
84
85static bool receive_filter(struct vdpasim *vdpasim, size_t len)
86{
87 bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1);
88 size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
89 sizeof(struct virtio_net_hdr);
90 struct virtio_net_config *vio_config = vdpasim->config;
91 struct vdpasim_net *net = sim_to_net(vdpasim);
92
93 if (len < ETH_ALEN + hdr_len)
94 return false;
95
96 if (is_broadcast_ether_addr(addr: net->buffer + hdr_len) ||
97 is_multicast_ether_addr(addr: net->buffer + hdr_len))
98 return true;
99 if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN))
100 return true;
101
102 return false;
103}
104
105static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
106 u8 cmd)
107{
108 struct virtio_net_config *vio_config = vdpasim->config;
109 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
110 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
111 size_t read;
112
113 switch (cmd) {
114 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
115 read = vringh_iov_pull_iotlb(vrh: &cvq->vring, riov: &cvq->in_iov,
116 dst: vio_config->mac, ETH_ALEN);
117 if (read == ETH_ALEN)
118 status = VIRTIO_NET_OK;
119 break;
120 default:
121 break;
122 }
123
124 return status;
125}
126
127static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
128{
129 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
130 struct vdpasim_net *net = sim_to_net(vdpasim);
131 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
132 struct virtio_net_ctrl_hdr ctrl;
133 size_t read, write;
134 u64 requests = 0, errors = 0, successes = 0;
135 int err;
136
137 if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
138 return;
139
140 if (!cvq->ready)
141 return;
142
143 while (true) {
144 err = vringh_getdesc_iotlb(vrh: &cvq->vring, riov: &cvq->in_iov,
145 wiov: &cvq->out_iov,
146 head: &cvq->head, GFP_ATOMIC);
147 if (err <= 0)
148 break;
149
150 ++requests;
151 read = vringh_iov_pull_iotlb(vrh: &cvq->vring, riov: &cvq->in_iov, dst: &ctrl,
152 len: sizeof(ctrl));
153 if (read != sizeof(ctrl)) {
154 ++errors;
155 break;
156 }
157
158 switch (ctrl.class) {
159 case VIRTIO_NET_CTRL_MAC:
160 status = vdpasim_handle_ctrl_mac(vdpasim, cmd: ctrl.cmd);
161 break;
162 default:
163 break;
164 }
165
166 if (status == VIRTIO_NET_OK)
167 ++successes;
168 else
169 ++errors;
170
171 /* Make sure data is wrote before advancing index */
172 smp_wmb();
173
174 write = vringh_iov_push_iotlb(vrh: &cvq->vring, wiov: &cvq->out_iov,
175 src: &status, len: sizeof(status));
176 vringh_complete_iotlb(vrh: &cvq->vring, head: cvq->head, len: write);
177 vringh_kiov_cleanup(kiov: &cvq->in_iov);
178 vringh_kiov_cleanup(kiov: &cvq->out_iov);
179
180 /* Make sure used is visible before rasing the interrupt. */
181 smp_wmb();
182
183 local_bh_disable();
184 if (cvq->cb)
185 cvq->cb(cvq->private);
186 local_bh_enable();
187 }
188
189 u64_stats_update_begin(syncp: &net->cq_stats.syncp);
190 net->cq_stats.requests += requests;
191 net->cq_stats.errors += errors;
192 net->cq_stats.successes += successes;
193 u64_stats_update_end(syncp: &net->cq_stats.syncp);
194}
195
196static void vdpasim_net_work(struct vdpasim *vdpasim)
197{
198 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
199 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
200 struct vdpasim_net *net = sim_to_net(vdpasim);
201 ssize_t read, write;
202 u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0;
203 u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
204 int err;
205
206 mutex_lock(&vdpasim->mutex);
207
208 if (!vdpasim->running)
209 goto out;
210
211 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
212 goto out;
213
214 vdpasim_handle_cvq(vdpasim);
215
216 if (!txq->ready || !rxq->ready)
217 goto out;
218
219 while (true) {
220 err = vringh_getdesc_iotlb(vrh: &txq->vring, riov: &txq->out_iov, NULL,
221 head: &txq->head, GFP_ATOMIC);
222 if (err <= 0) {
223 if (err)
224 ++tx_errors;
225 break;
226 }
227
228 ++tx_pkts;
229 read = vringh_iov_pull_iotlb(vrh: &txq->vring, riov: &txq->out_iov,
230 dst: net->buffer, PAGE_SIZE);
231
232 tx_bytes += read;
233
234 if (!receive_filter(vdpasim, len: read)) {
235 ++rx_drops;
236 vdpasim_net_complete(vq: txq, len: 0);
237 continue;
238 }
239
240 err = vringh_getdesc_iotlb(vrh: &rxq->vring, NULL, wiov: &rxq->in_iov,
241 head: &rxq->head, GFP_ATOMIC);
242 if (err <= 0) {
243 ++rx_overruns;
244 vdpasim_net_complete(vq: txq, len: 0);
245 break;
246 }
247
248 write = vringh_iov_push_iotlb(vrh: &rxq->vring, wiov: &rxq->in_iov,
249 src: net->buffer, len: read);
250 if (write <= 0) {
251 ++rx_errors;
252 break;
253 }
254
255 ++rx_pkts;
256 rx_bytes += write;
257
258 vdpasim_net_complete(vq: txq, len: 0);
259 vdpasim_net_complete(vq: rxq, len: write);
260
261 if (tx_pkts > 4) {
262 vdpasim_schedule_work(vdpasim);
263 goto out;
264 }
265 }
266
267out:
268 mutex_unlock(lock: &vdpasim->mutex);
269
270 u64_stats_update_begin(syncp: &net->tx_stats.syncp);
271 net->tx_stats.pkts += tx_pkts;
272 net->tx_stats.bytes += tx_bytes;
273 net->tx_stats.errors += tx_errors;
274 u64_stats_update_end(syncp: &net->tx_stats.syncp);
275
276 u64_stats_update_begin(syncp: &net->rx_stats.syncp);
277 net->rx_stats.pkts += rx_pkts;
278 net->rx_stats.bytes += rx_bytes;
279 net->rx_stats.drops += rx_drops;
280 net->rx_stats.errors += rx_errors;
281 net->rx_stats.overruns += rx_overruns;
282 u64_stats_update_end(syncp: &net->rx_stats.syncp);
283}
284
285static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx,
286 struct sk_buff *msg,
287 struct netlink_ext_ack *extack)
288{
289 struct vdpasim_net *net = sim_to_net(vdpasim);
290 u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops;
291 u64 tx_pkts, tx_bytes, tx_errors, tx_drops;
292 u64 cq_requests, cq_successes, cq_errors;
293 unsigned int start;
294 int err = -EMSGSIZE;
295
296 switch(idx) {
297 case 0:
298 do {
299 start = u64_stats_fetch_begin(syncp: &net->rx_stats.syncp);
300 rx_pkts = net->rx_stats.pkts;
301 rx_bytes = net->rx_stats.bytes;
302 rx_errors = net->rx_stats.errors;
303 rx_overruns = net->rx_stats.overruns;
304 rx_drops = net->rx_stats.drops;
305 } while (u64_stats_fetch_retry(syncp: &net->rx_stats.syncp, start));
306
307 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
308 str: "rx packets"))
309 break;
310 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
311 value: rx_pkts, padattr: VDPA_ATTR_PAD))
312 break;
313 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
314 str: "rx bytes"))
315 break;
316 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
317 value: rx_bytes, padattr: VDPA_ATTR_PAD))
318 break;
319 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
320 str: "rx errors"))
321 break;
322 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
323 value: rx_errors, padattr: VDPA_ATTR_PAD))
324 break;
325 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
326 str: "rx overruns"))
327 break;
328 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
329 value: rx_overruns, padattr: VDPA_ATTR_PAD))
330 break;
331 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
332 str: "rx drops"))
333 break;
334 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
335 value: rx_drops, padattr: VDPA_ATTR_PAD))
336 break;
337 err = 0;
338 break;
339 case 1:
340 do {
341 start = u64_stats_fetch_begin(syncp: &net->tx_stats.syncp);
342 tx_pkts = net->tx_stats.pkts;
343 tx_bytes = net->tx_stats.bytes;
344 tx_errors = net->tx_stats.errors;
345 tx_drops = net->tx_stats.drops;
346 } while (u64_stats_fetch_retry(syncp: &net->tx_stats.syncp, start));
347
348 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
349 str: "tx packets"))
350 break;
351 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
352 value: tx_pkts, padattr: VDPA_ATTR_PAD))
353 break;
354 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
355 str: "tx bytes"))
356 break;
357 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
358 value: tx_bytes, padattr: VDPA_ATTR_PAD))
359 break;
360 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
361 str: "tx errors"))
362 break;
363 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
364 value: tx_errors, padattr: VDPA_ATTR_PAD))
365 break;
366 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
367 str: "tx drops"))
368 break;
369 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
370 value: tx_drops, padattr: VDPA_ATTR_PAD))
371 break;
372 err = 0;
373 break;
374 case 2:
375 do {
376 start = u64_stats_fetch_begin(syncp: &net->cq_stats.syncp);
377 cq_requests = net->cq_stats.requests;
378 cq_successes = net->cq_stats.successes;
379 cq_errors = net->cq_stats.errors;
380 } while (u64_stats_fetch_retry(syncp: &net->cq_stats.syncp, start));
381
382 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
383 str: "cvq requests"))
384 break;
385 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
386 value: cq_requests, padattr: VDPA_ATTR_PAD))
387 break;
388 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
389 str: "cvq successes"))
390 break;
391 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
392 value: cq_successes, padattr: VDPA_ATTR_PAD))
393 break;
394 if (nla_put_string(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
395 str: "cvq errors"))
396 break;
397 if (nla_put_u64_64bit(skb: msg, attrtype: VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
398 value: cq_errors, padattr: VDPA_ATTR_PAD))
399 break;
400 err = 0;
401 break;
402 default:
403 err = -EINVAL;
404 break;
405 }
406
407 return err;
408}
409
410static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
411{
412 struct virtio_net_config *net_config = config;
413
414 net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
415}
416
417static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
418 const struct vdpa_dev_set_config *config)
419{
420 struct virtio_net_config *vio_config = vdpasim->config;
421
422 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
423 memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
424 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
425 vio_config->mtu = cpu_to_vdpasim16(vdpasim, val: config->net.mtu);
426 else
427 /* Setup default MTU to be 1500 */
428 vio_config->mtu = cpu_to_vdpasim16(vdpasim, val: 1500);
429}
430
431static void vdpasim_net_free(struct vdpasim *vdpasim)
432{
433 struct vdpasim_net *net = sim_to_net(vdpasim);
434
435 kvfree(addr: net->buffer);
436}
437
438static void vdpasim_net_mgmtdev_release(struct device *dev)
439{
440}
441
442static struct device vdpasim_net_mgmtdev = {
443 .init_name = "vdpasim_net",
444 .release = vdpasim_net_mgmtdev_release,
445};
446
447static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
448 const struct vdpa_dev_set_config *config)
449{
450 struct vdpasim_dev_attr dev_attr = {};
451 struct vdpasim_net *net;
452 struct vdpasim *simdev;
453 int ret;
454
455 dev_attr.mgmt_dev = mdev;
456 dev_attr.name = name;
457 dev_attr.id = VIRTIO_ID_NET;
458 dev_attr.supported_features = VDPASIM_NET_FEATURES;
459 dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
460 dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
461 dev_attr.nas = VDPASIM_NET_AS_NUM;
462 dev_attr.alloc_size = sizeof(struct vdpasim_net);
463 dev_attr.config_size = sizeof(struct virtio_net_config);
464 dev_attr.get_config = vdpasim_net_get_config;
465 dev_attr.work_fn = vdpasim_net_work;
466 dev_attr.get_stats = vdpasim_net_get_stats;
467 dev_attr.free = vdpasim_net_free;
468
469 simdev = vdpasim_create(attr: &dev_attr, config);
470 if (IS_ERR(ptr: simdev))
471 return PTR_ERR(ptr: simdev);
472
473 vdpasim_net_setup_config(vdpasim: simdev, config);
474
475 net = sim_to_net(vdpasim: simdev);
476
477 u64_stats_init(syncp: &net->tx_stats.syncp);
478 u64_stats_init(syncp: &net->rx_stats.syncp);
479 u64_stats_init(syncp: &net->cq_stats.syncp);
480
481 net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL);
482 if (!net->buffer) {
483 ret = -ENOMEM;
484 goto reg_err;
485 }
486
487 /*
488 * Initialization must be completed before this call, since it can
489 * connect the device to the vDPA bus, so requests can arrive after
490 * this call.
491 */
492 ret = _vdpa_register_device(vdev: &simdev->vdpa, VDPASIM_NET_VQ_NUM);
493 if (ret)
494 goto reg_err;
495
496 return 0;
497
498reg_err:
499 put_device(dev: &simdev->vdpa.dev);
500 return ret;
501}
502
503static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
504 struct vdpa_device *dev)
505{
506 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
507
508 _vdpa_unregister_device(vdev: &simdev->vdpa);
509}
510
511static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
512 .dev_add = vdpasim_net_dev_add,
513 .dev_del = vdpasim_net_dev_del
514};
515
516static struct virtio_device_id id_table[] = {
517 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
518 { 0 },
519};
520
521static struct vdpa_mgmt_dev mgmt_dev = {
522 .device = &vdpasim_net_mgmtdev,
523 .id_table = id_table,
524 .ops = &vdpasim_net_mgmtdev_ops,
525 .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
526 1 << VDPA_ATTR_DEV_NET_CFG_MTU |
527 1 << VDPA_ATTR_DEV_FEATURES),
528 .max_supported_vqs = VDPASIM_NET_VQ_NUM,
529 .supported_features = VDPASIM_NET_FEATURES,
530};
531
532static int __init vdpasim_net_init(void)
533{
534 int ret;
535
536 ret = device_register(dev: &vdpasim_net_mgmtdev);
537 if (ret) {
538 put_device(dev: &vdpasim_net_mgmtdev);
539 return ret;
540 }
541
542 ret = vdpa_mgmtdev_register(mdev: &mgmt_dev);
543 if (ret)
544 goto parent_err;
545 return 0;
546
547parent_err:
548 device_unregister(dev: &vdpasim_net_mgmtdev);
549 return ret;
550}
551
552static void __exit vdpasim_net_exit(void)
553{
554 vdpa_mgmtdev_unregister(mdev: &mgmt_dev);
555 device_unregister(dev: &vdpasim_net_mgmtdev);
556}
557
558module_init(vdpasim_net_init);
559module_exit(vdpasim_net_exit);
560
561MODULE_VERSION(DRV_VERSION);
562MODULE_LICENSE(DRV_LICENSE);
563MODULE_AUTHOR(DRV_AUTHOR);
564MODULE_DESCRIPTION(DRV_DESC);
565

source code of linux/drivers/vdpa/vdpa_sim/vdpa_sim_net.c