1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define _GNU_SOURCE |
3 | #include <getopt.h> |
4 | #include <limits.h> |
5 | #include <string.h> |
6 | #include <poll.h> |
7 | #include <sys/eventfd.h> |
8 | #include <stdlib.h> |
9 | #include <assert.h> |
10 | #include <unistd.h> |
11 | #include <sys/ioctl.h> |
12 | #include <sys/stat.h> |
13 | #include <sys/types.h> |
14 | #include <fcntl.h> |
15 | #include <stdbool.h> |
16 | #include <linux/vhost.h> |
17 | #include <linux/if.h> |
18 | #include <linux/if_tun.h> |
19 | #include <linux/in.h> |
20 | #include <linux/if_packet.h> |
21 | #include <linux/virtio_net.h> |
22 | #include <netinet/ether.h> |
23 | |
24 | #define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf) |
25 | #define TEST_BUF_LEN 256 |
26 | #define TEST_PTYPE ETH_P_LOOPBACK |
27 | #define DESC_NUM 256 |
28 | |
29 | /* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ |
30 | void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; |
31 | |
32 | struct vq_info { |
33 | int kick; |
34 | int call; |
35 | int idx; |
36 | long started; |
37 | long completed; |
38 | struct pollfd fds; |
39 | void *ring; |
40 | /* copy used for control */ |
41 | struct vring vring; |
42 | struct virtqueue *vq; |
43 | }; |
44 | |
45 | struct vdev_info { |
46 | struct virtio_device vdev; |
47 | int control; |
48 | struct vq_info vqs[2]; |
49 | int nvqs; |
50 | void *buf; |
51 | size_t buf_size; |
52 | char *test_buf; |
53 | char *res_buf; |
54 | struct vhost_memory *mem; |
55 | int sock; |
56 | int ifindex; |
57 | unsigned char mac[ETHER_ADDR_LEN]; |
58 | }; |
59 | |
60 | static int tun_alloc(struct vdev_info *dev, char *tun_name) |
61 | { |
62 | struct ifreq ifr; |
63 | int len = HDR_LEN; |
64 | int fd, e; |
65 | |
66 | fd = open("/dev/net/tun" , O_RDWR); |
67 | if (fd < 0) { |
68 | perror("Cannot open /dev/net/tun" ); |
69 | return fd; |
70 | } |
71 | |
72 | memset(&ifr, 0, sizeof(ifr)); |
73 | |
74 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; |
75 | strncpy(p: ifr.ifr_name, q: tun_name, IFNAMSIZ); |
76 | |
77 | e = ioctl(fd, TUNSETIFF, &ifr); |
78 | if (e < 0) { |
79 | perror("ioctl[TUNSETIFF]" ); |
80 | close(fd); |
81 | return e; |
82 | } |
83 | |
84 | e = ioctl(fd, TUNSETVNETHDRSZ, &len); |
85 | if (e < 0) { |
86 | perror("ioctl[TUNSETVNETHDRSZ]" ); |
87 | close(fd); |
88 | return e; |
89 | } |
90 | |
91 | e = ioctl(fd, SIOCGIFHWADDR, &ifr); |
92 | if (e < 0) { |
93 | perror("ioctl[SIOCGIFHWADDR]" ); |
94 | close(fd); |
95 | return e; |
96 | } |
97 | |
98 | memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); |
99 | return fd; |
100 | } |
101 | |
102 | static void vdev_create_socket(struct vdev_info *dev, char *tun_name) |
103 | { |
104 | struct ifreq ifr; |
105 | |
106 | dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); |
107 | assert(dev->sock != -1); |
108 | |
109 | strncpy(p: ifr.ifr_name, q: tun_name, IFNAMSIZ); |
110 | assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); |
111 | |
112 | dev->ifindex = ifr.ifr_ifindex; |
113 | |
114 | /* Set the flags that bring the device up */ |
115 | assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); |
116 | ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); |
117 | assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); |
118 | } |
119 | |
120 | static void vdev_send_packet(struct vdev_info *dev) |
121 | { |
122 | char *sendbuf = dev->test_buf + HDR_LEN; |
123 | struct sockaddr_ll saddrll = {0}; |
124 | int sockfd = dev->sock; |
125 | int ret; |
126 | |
127 | saddrll.sll_family = PF_PACKET; |
128 | saddrll.sll_ifindex = dev->ifindex; |
129 | saddrll.sll_halen = ETH_ALEN; |
130 | saddrll.sll_protocol = htons(TEST_PTYPE); |
131 | |
132 | ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0, |
133 | (struct sockaddr *)&saddrll, |
134 | sizeof(struct sockaddr_ll)); |
135 | assert(ret >= 0); |
136 | } |
137 | |
138 | static bool vq_notify(struct virtqueue *vq) |
139 | { |
140 | struct vq_info *info = vq->priv; |
141 | unsigned long long v = 1; |
142 | int r; |
143 | |
144 | r = write(info->kick, &v, sizeof(v)); |
145 | assert(r == sizeof(v)); |
146 | |
147 | return true; |
148 | } |
149 | |
150 | static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) |
151 | { |
152 | struct vhost_vring_addr addr = { |
153 | .index = info->idx, |
154 | .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, |
155 | .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, |
156 | .used_user_addr = (uint64_t)(unsigned long)info->vring.used, |
157 | }; |
158 | struct vhost_vring_state state = { .index = info->idx }; |
159 | struct vhost_vring_file file = { .index = info->idx }; |
160 | int r; |
161 | |
162 | state.num = info->vring.num; |
163 | r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); |
164 | assert(r >= 0); |
165 | |
166 | state.num = 0; |
167 | r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); |
168 | assert(r >= 0); |
169 | |
170 | r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); |
171 | assert(r >= 0); |
172 | |
173 | file.fd = info->kick; |
174 | r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); |
175 | assert(r >= 0); |
176 | } |
177 | |
178 | static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) |
179 | { |
180 | if (info->vq) |
181 | vring_del_virtqueue(vq: info->vq); |
182 | |
183 | memset(info->ring, 0, vring_size(num, 4096)); |
184 | vring_init(vr: &info->vring, num, p: info->ring, align: 4096); |
185 | info->vq = vring_new_virtqueue(index: info->idx, num, vring_align: 4096, vdev, weak_barriers: true, ctx: false, |
186 | pages: info->ring, notify: vq_notify, NULL, name: "test" ); |
187 | assert(info->vq); |
188 | info->vq->priv = info; |
189 | } |
190 | |
191 | static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd) |
192 | { |
193 | struct vhost_vring_file backend = { .index = idx, .fd = fd }; |
194 | struct vq_info *info = &dev->vqs[idx]; |
195 | int r; |
196 | |
197 | info->idx = idx; |
198 | info->kick = eventfd(0, EFD_NONBLOCK); |
199 | r = posix_memalign(&info->ring, 4096, vring_size(num, align: 4096)); |
200 | assert(r >= 0); |
201 | vq_reset(info, num, vdev: &dev->vdev); |
202 | vhost_vq_setup(dev, info); |
203 | |
204 | r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend); |
205 | assert(!r); |
206 | } |
207 | |
208 | static void vdev_info_init(struct vdev_info *dev, unsigned long long features) |
209 | { |
210 | struct *eh; |
211 | int i, r; |
212 | |
213 | dev->vdev.features = features; |
214 | INIT_LIST_HEAD(list: &dev->vdev.vqs); |
215 | spin_lock_init(&dev->vdev.vqs_list_lock); |
216 | |
217 | dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2; |
218 | dev->buf = malloc(dev->buf_size); |
219 | assert(dev->buf); |
220 | dev->test_buf = dev->buf; |
221 | dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN; |
222 | |
223 | memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN); |
224 | eh = (struct ether_header *)(dev->test_buf + HDR_LEN); |
225 | eh->ether_type = htons(TEST_PTYPE); |
226 | memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN); |
227 | memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN); |
228 | |
229 | for (i = sizeof(*eh); i < TEST_BUF_LEN; i++) |
230 | dev->test_buf[i + HDR_LEN] = (char)i; |
231 | |
232 | dev->control = open("/dev/vhost-net" , O_RDWR); |
233 | assert(dev->control >= 0); |
234 | |
235 | r = ioctl(dev->control, VHOST_SET_OWNER, NULL); |
236 | assert(r >= 0); |
237 | |
238 | dev->mem = malloc(offsetof(struct vhost_memory, regions) + |
239 | sizeof(dev->mem->regions[0])); |
240 | assert(dev->mem); |
241 | memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + |
242 | sizeof(dev->mem->regions[0])); |
243 | dev->mem->nregions = 1; |
244 | dev->mem->regions[0].guest_phys_addr = (long)dev->buf; |
245 | dev->mem->regions[0].userspace_addr = (long)dev->buf; |
246 | dev->mem->regions[0].memory_size = dev->buf_size; |
247 | |
248 | r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); |
249 | assert(r >= 0); |
250 | |
251 | r = ioctl(dev->control, VHOST_SET_FEATURES, &features); |
252 | assert(r >= 0); |
253 | |
254 | dev->nvqs = 2; |
255 | } |
256 | |
257 | static void wait_for_interrupt(struct vq_info *vq) |
258 | { |
259 | unsigned long long val; |
260 | |
261 | poll(&vq->fds, 1, 100); |
262 | |
263 | if (vq->fds.revents & POLLIN) |
264 | read(vq->fds.fd, &val, sizeof(val)); |
265 | } |
266 | |
267 | static void verify_res_buf(char *res_buf) |
268 | { |
269 | int i; |
270 | |
271 | for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++) |
272 | assert(res_buf[i] == (char)i); |
273 | } |
274 | |
275 | static void run_tx_test(struct vdev_info *dev, struct vq_info *vq, |
276 | bool delayed, int bufs) |
277 | { |
278 | long long spurious = 0; |
279 | struct scatterlist sl; |
280 | unsigned int len; |
281 | int r; |
282 | |
283 | for (;;) { |
284 | long started_before = vq->started; |
285 | long completed_before = vq->completed; |
286 | |
287 | virtqueue_disable_cb(vq: vq->vq); |
288 | do { |
289 | while (vq->started < bufs && |
290 | (vq->started - vq->completed) < 1) { |
291 | sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN); |
292 | r = virtqueue_add_outbuf(vq: vq->vq, sg: &sl, num: 1, |
293 | data: dev->test_buf + vq->started, |
294 | GFP_ATOMIC); |
295 | if (unlikely(r != 0)) |
296 | break; |
297 | |
298 | ++vq->started; |
299 | |
300 | if (unlikely(!virtqueue_kick(vq->vq))) { |
301 | r = -1; |
302 | break; |
303 | } |
304 | } |
305 | |
306 | if (vq->started >= bufs) |
307 | r = -1; |
308 | |
309 | /* Flush out completed bufs if any */ |
310 | while (virtqueue_get_buf(vq: vq->vq, len: &len)) { |
311 | int n; |
312 | |
313 | n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL); |
314 | assert(n == TEST_BUF_LEN); |
315 | verify_res_buf(res_buf: dev->res_buf); |
316 | |
317 | ++vq->completed; |
318 | r = 0; |
319 | } |
320 | } while (r == 0); |
321 | |
322 | if (vq->completed == completed_before && vq->started == started_before) |
323 | ++spurious; |
324 | |
325 | assert(vq->completed <= bufs); |
326 | assert(vq->started <= bufs); |
327 | if (vq->completed == bufs) |
328 | break; |
329 | |
330 | if (delayed) { |
331 | if (virtqueue_enable_cb_delayed(vq: vq->vq)) |
332 | wait_for_interrupt(vq); |
333 | } else { |
334 | if (virtqueue_enable_cb(vq: vq->vq)) |
335 | wait_for_interrupt(vq); |
336 | } |
337 | } |
338 | printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n" , |
339 | spurious, vq->started, vq->completed); |
340 | } |
341 | |
342 | static void run_rx_test(struct vdev_info *dev, struct vq_info *vq, |
343 | bool delayed, int bufs) |
344 | { |
345 | long long spurious = 0; |
346 | struct scatterlist sl; |
347 | unsigned int len; |
348 | int r; |
349 | |
350 | for (;;) { |
351 | long started_before = vq->started; |
352 | long completed_before = vq->completed; |
353 | |
354 | do { |
355 | while (vq->started < bufs && |
356 | (vq->started - vq->completed) < 1) { |
357 | sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN); |
358 | |
359 | r = virtqueue_add_inbuf(vq: vq->vq, sg: &sl, num: 1, |
360 | data: dev->res_buf + vq->started, |
361 | GFP_ATOMIC); |
362 | if (unlikely(r != 0)) |
363 | break; |
364 | |
365 | ++vq->started; |
366 | |
367 | vdev_send_packet(dev); |
368 | |
369 | if (unlikely(!virtqueue_kick(vq->vq))) { |
370 | r = -1; |
371 | break; |
372 | } |
373 | } |
374 | |
375 | if (vq->started >= bufs) |
376 | r = -1; |
377 | |
378 | /* Flush out completed bufs if any */ |
379 | while (virtqueue_get_buf(vq: vq->vq, len: &len)) { |
380 | struct *eh; |
381 | |
382 | eh = (struct ether_header *)(dev->res_buf + HDR_LEN); |
383 | |
384 | /* tun netdev is up and running, only handle the |
385 | * TEST_PTYPE packet. |
386 | */ |
387 | if (eh->ether_type == htons(TEST_PTYPE)) { |
388 | assert(len == TEST_BUF_LEN + HDR_LEN); |
389 | verify_res_buf(res_buf: dev->res_buf + HDR_LEN); |
390 | } |
391 | |
392 | ++vq->completed; |
393 | r = 0; |
394 | } |
395 | } while (r == 0); |
396 | |
397 | if (vq->completed == completed_before && vq->started == started_before) |
398 | ++spurious; |
399 | |
400 | assert(vq->completed <= bufs); |
401 | assert(vq->started <= bufs); |
402 | if (vq->completed == bufs) |
403 | break; |
404 | } |
405 | |
406 | printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n" , |
407 | spurious, vq->started, vq->completed); |
408 | } |
409 | |
410 | static const char optstring[] = "h" ; |
411 | static const struct option longopts[] = { |
412 | { |
413 | .name = "help" , |
414 | .val = 'h', |
415 | }, |
416 | { |
417 | .name = "event-idx" , |
418 | .val = 'E', |
419 | }, |
420 | { |
421 | .name = "no-event-idx" , |
422 | .val = 'e', |
423 | }, |
424 | { |
425 | .name = "indirect" , |
426 | .val = 'I', |
427 | }, |
428 | { |
429 | .name = "no-indirect" , |
430 | .val = 'i', |
431 | }, |
432 | { |
433 | .name = "virtio-1" , |
434 | .val = '1', |
435 | }, |
436 | { |
437 | .name = "no-virtio-1" , |
438 | .val = '0', |
439 | }, |
440 | { |
441 | .name = "delayed-interrupt" , |
442 | .val = 'D', |
443 | }, |
444 | { |
445 | .name = "no-delayed-interrupt" , |
446 | .val = 'd', |
447 | }, |
448 | { |
449 | .name = "buf-num" , |
450 | .val = 'n', |
451 | .has_arg = required_argument, |
452 | }, |
453 | { |
454 | .name = "batch" , |
455 | .val = 'b', |
456 | .has_arg = required_argument, |
457 | }, |
458 | { |
459 | } |
460 | }; |
461 | |
462 | static void help(int status) |
463 | { |
464 | fprintf(stderr, "Usage: vhost_net_test [--help]" |
465 | " [--no-indirect]" |
466 | " [--no-event-idx]" |
467 | " [--no-virtio-1]" |
468 | " [--delayed-interrupt]" |
469 | " [--buf-num]" |
470 | "\n" ); |
471 | |
472 | exit(status); |
473 | } |
474 | |
475 | int main(int argc, char **argv) |
476 | { |
477 | unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | |
478 | (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); |
479 | char tun_name[IFNAMSIZ]; |
480 | long nbufs = 0x100000; |
481 | struct vdev_info dev; |
482 | bool delayed = false; |
483 | int o, fd; |
484 | |
485 | for (;;) { |
486 | o = getopt_long(argc, argv, optstring, longopts, NULL); |
487 | switch (o) { |
488 | case -1: |
489 | goto done; |
490 | case '?': |
491 | help(status: 2); |
492 | case 'e': |
493 | features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); |
494 | break; |
495 | case 'h': |
496 | help(status: 0); |
497 | case 'i': |
498 | features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); |
499 | break; |
500 | case '0': |
501 | features &= ~(1ULL << VIRTIO_F_VERSION_1); |
502 | break; |
503 | case 'D': |
504 | delayed = true; |
505 | break; |
506 | case 'n': |
507 | nbufs = strtol(optarg, NULL, 10); |
508 | assert(nbufs > 0); |
509 | break; |
510 | default: |
511 | assert(0); |
512 | break; |
513 | } |
514 | } |
515 | |
516 | done: |
517 | memset(&dev, 0, sizeof(dev)); |
518 | snprintf(buf: tun_name, IFNAMSIZ, fmt: "tun_%d" , getpid()); |
519 | |
520 | fd = tun_alloc(dev: &dev, tun_name); |
521 | assert(fd >= 0); |
522 | |
523 | vdev_info_init(dev: &dev, features); |
524 | vq_info_add(dev: &dev, idx: 0, DESC_NUM, fd); |
525 | vq_info_add(dev: &dev, idx: 1, DESC_NUM, fd); |
526 | vdev_create_socket(dev: &dev, tun_name); |
527 | |
528 | run_rx_test(dev: &dev, vq: &dev.vqs[0], delayed, bufs: nbufs); |
529 | run_tx_test(dev: &dev, vq: &dev.vqs[1], delayed, bufs: nbufs); |
530 | |
531 | return 0; |
532 | } |
533 | |