1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) |
2 | |
3 | /* |
4 | * AF_XDP user-space access library. |
5 | * |
6 | * Copyright(c) 2018 - 2019 Intel Corporation. |
7 | * |
8 | * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> |
9 | */ |
10 | |
11 | #include <errno.h> |
12 | #include <stdlib.h> |
13 | #include <string.h> |
14 | #include <unistd.h> |
15 | #include <arpa/inet.h> |
16 | #include <asm/barrier.h> |
17 | #include <linux/compiler.h> |
18 | #include <linux/ethtool.h> |
19 | #include <linux/filter.h> |
20 | #include <linux/if_ether.h> |
21 | #include <linux/if_link.h> |
22 | #include <linux/if_packet.h> |
23 | #include <linux/if_xdp.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/list.h> |
26 | #include <linux/netlink.h> |
27 | #include <linux/rtnetlink.h> |
28 | #include <linux/sockios.h> |
29 | #include <net/if.h> |
30 | #include <sys/ioctl.h> |
31 | #include <sys/mman.h> |
32 | #include <sys/socket.h> |
33 | #include <sys/types.h> |
34 | |
35 | #include <bpf/bpf.h> |
36 | #include <bpf/libbpf.h> |
37 | #include "xsk.h" |
38 | #include "bpf_util.h" |
39 | |
40 | #ifndef SOL_XDP |
41 | #define SOL_XDP 283 |
42 | #endif |
43 | |
44 | #ifndef AF_XDP |
45 | #define AF_XDP 44 |
46 | #endif |
47 | |
48 | #ifndef PF_XDP |
49 | #define PF_XDP AF_XDP |
50 | #endif |
51 | |
52 | #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) |
53 | |
54 | #define XSKMAP_SIZE 1 |
55 | |
56 | struct xsk_umem { |
57 | struct xsk_ring_prod *fill_save; |
58 | struct xsk_ring_cons *comp_save; |
59 | char *umem_area; |
60 | struct xsk_umem_config config; |
61 | int fd; |
62 | int refcount; |
63 | struct list_head ctx_list; |
64 | bool rx_ring_setup_done; |
65 | bool tx_ring_setup_done; |
66 | }; |
67 | |
68 | struct xsk_ctx { |
69 | struct xsk_ring_prod *fill; |
70 | struct xsk_ring_cons *comp; |
71 | __u32 queue_id; |
72 | struct xsk_umem *umem; |
73 | int refcount; |
74 | int ifindex; |
75 | struct list_head list; |
76 | }; |
77 | |
78 | struct xsk_socket { |
79 | struct xsk_ring_cons *rx; |
80 | struct xsk_ring_prod *tx; |
81 | struct xsk_ctx *ctx; |
82 | struct xsk_socket_config config; |
83 | int fd; |
84 | }; |
85 | |
86 | struct nl_mtu_req { |
87 | struct nlmsghdr nh; |
88 | struct ifinfomsg msg; |
89 | char buf[512]; |
90 | }; |
91 | |
92 | int xsk_umem__fd(const struct xsk_umem *umem) |
93 | { |
94 | return umem ? umem->fd : -EINVAL; |
95 | } |
96 | |
97 | int xsk_socket__fd(const struct xsk_socket *xsk) |
98 | { |
99 | return xsk ? xsk->fd : -EINVAL; |
100 | } |
101 | |
102 | static bool xsk_page_aligned(void *buffer) |
103 | { |
104 | unsigned long addr = (unsigned long)buffer; |
105 | |
106 | return !(addr & (getpagesize() - 1)); |
107 | } |
108 | |
109 | static void xsk_set_umem_config(struct xsk_umem_config *cfg, |
110 | const struct xsk_umem_config *usr_cfg) |
111 | { |
112 | if (!usr_cfg) { |
113 | cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; |
114 | cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; |
115 | cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; |
116 | cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; |
117 | cfg->flags = XSK_UMEM__DEFAULT_FLAGS; |
118 | cfg->tx_metadata_len = 0; |
119 | return; |
120 | } |
121 | |
122 | cfg->fill_size = usr_cfg->fill_size; |
123 | cfg->comp_size = usr_cfg->comp_size; |
124 | cfg->frame_size = usr_cfg->frame_size; |
125 | cfg->frame_headroom = usr_cfg->frame_headroom; |
126 | cfg->flags = usr_cfg->flags; |
127 | cfg->tx_metadata_len = usr_cfg->tx_metadata_len; |
128 | } |
129 | |
130 | static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, |
131 | const struct xsk_socket_config *usr_cfg) |
132 | { |
133 | if (!usr_cfg) { |
134 | cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; |
135 | cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; |
136 | cfg->bind_flags = 0; |
137 | return 0; |
138 | } |
139 | |
140 | cfg->rx_size = usr_cfg->rx_size; |
141 | cfg->tx_size = usr_cfg->tx_size; |
142 | cfg->bind_flags = usr_cfg->bind_flags; |
143 | |
144 | return 0; |
145 | } |
146 | |
147 | static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) |
148 | { |
149 | socklen_t optlen; |
150 | int err; |
151 | |
152 | optlen = sizeof(*off); |
153 | err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); |
154 | if (err) |
155 | return err; |
156 | |
157 | if (optlen == sizeof(*off)) |
158 | return 0; |
159 | |
160 | return -EINVAL; |
161 | } |
162 | |
163 | static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, |
164 | struct xsk_ring_prod *fill, |
165 | struct xsk_ring_cons *comp) |
166 | { |
167 | struct xdp_mmap_offsets off; |
168 | void *map; |
169 | int err; |
170 | |
171 | err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, |
172 | &umem->config.fill_size, |
173 | sizeof(umem->config.fill_size)); |
174 | if (err) |
175 | return -errno; |
176 | |
177 | err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, |
178 | &umem->config.comp_size, |
179 | sizeof(umem->config.comp_size)); |
180 | if (err) |
181 | return -errno; |
182 | |
183 | err = xsk_get_mmap_offsets(fd, off: &off); |
184 | if (err) |
185 | return -errno; |
186 | |
187 | map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), |
188 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, |
189 | XDP_UMEM_PGOFF_FILL_RING); |
190 | if (map == MAP_FAILED) |
191 | return -errno; |
192 | |
193 | fill->mask = umem->config.fill_size - 1; |
194 | fill->size = umem->config.fill_size; |
195 | fill->producer = map + off.fr.producer; |
196 | fill->consumer = map + off.fr.consumer; |
197 | fill->flags = map + off.fr.flags; |
198 | fill->ring = map + off.fr.desc; |
199 | fill->cached_cons = umem->config.fill_size; |
200 | |
201 | map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), |
202 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, |
203 | XDP_UMEM_PGOFF_COMPLETION_RING); |
204 | if (map == MAP_FAILED) { |
205 | err = -errno; |
206 | goto out_mmap; |
207 | } |
208 | |
209 | comp->mask = umem->config.comp_size - 1; |
210 | comp->size = umem->config.comp_size; |
211 | comp->producer = map + off.cr.producer; |
212 | comp->consumer = map + off.cr.consumer; |
213 | comp->flags = map + off.cr.flags; |
214 | comp->ring = map + off.cr.desc; |
215 | |
216 | return 0; |
217 | |
218 | out_mmap: |
219 | munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); |
220 | return err; |
221 | } |
222 | |
223 | int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, |
224 | __u64 size, struct xsk_ring_prod *fill, |
225 | struct xsk_ring_cons *comp, |
226 | const struct xsk_umem_config *usr_config) |
227 | { |
228 | struct xdp_umem_reg mr; |
229 | struct xsk_umem *umem; |
230 | int err; |
231 | |
232 | if (!umem_area || !umem_ptr || !fill || !comp) |
233 | return -EFAULT; |
234 | if (!size && !xsk_page_aligned(buffer: umem_area)) |
235 | return -EINVAL; |
236 | |
237 | umem = calloc(1, sizeof(*umem)); |
238 | if (!umem) |
239 | return -ENOMEM; |
240 | |
241 | umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); |
242 | if (umem->fd < 0) { |
243 | err = -errno; |
244 | goto out_umem_alloc; |
245 | } |
246 | |
247 | umem->umem_area = umem_area; |
248 | INIT_LIST_HEAD(list: &umem->ctx_list); |
249 | xsk_set_umem_config(cfg: &umem->config, usr_cfg: usr_config); |
250 | |
251 | memset(&mr, 0, sizeof(mr)); |
252 | mr.addr = (uintptr_t)umem_area; |
253 | mr.len = size; |
254 | mr.chunk_size = umem->config.frame_size; |
255 | mr.headroom = umem->config.frame_headroom; |
256 | mr.flags = umem->config.flags; |
257 | mr.tx_metadata_len = umem->config.tx_metadata_len; |
258 | |
259 | err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); |
260 | if (err) { |
261 | err = -errno; |
262 | goto out_socket; |
263 | } |
264 | |
265 | err = xsk_create_umem_rings(umem, fd: umem->fd, fill, comp); |
266 | if (err) |
267 | goto out_socket; |
268 | |
269 | umem->fill_save = fill; |
270 | umem->comp_save = comp; |
271 | *umem_ptr = umem; |
272 | return 0; |
273 | |
274 | out_socket: |
275 | close(umem->fd); |
276 | out_umem_alloc: |
277 | free(umem); |
278 | return err; |
279 | } |
280 | |
281 | bool xsk_is_in_mode(u32 ifindex, int mode) |
282 | { |
283 | LIBBPF_OPTS(bpf_xdp_query_opts, opts); |
284 | int ret; |
285 | |
286 | ret = bpf_xdp_query(ifindex, mode, &opts); |
287 | if (ret) { |
288 | printf("XDP mode query returned error %s\n" , strerror(errno)); |
289 | return false; |
290 | } |
291 | |
292 | if (mode == XDP_FLAGS_DRV_MODE) |
293 | return opts.attach_mode == XDP_ATTACHED_DRV; |
294 | else if (mode == XDP_FLAGS_SKB_MODE) |
295 | return opts.attach_mode == XDP_ATTACHED_SKB; |
296 | |
297 | return false; |
298 | } |
299 | |
300 | /* Lifted from netlink.c in tools/lib/bpf */ |
301 | static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) |
302 | { |
303 | int len; |
304 | |
305 | do { |
306 | len = recvmsg(sock, mhdr, flags); |
307 | } while (len < 0 && (errno == EINTR || errno == EAGAIN)); |
308 | |
309 | if (len < 0) |
310 | return -errno; |
311 | return len; |
312 | } |
313 | |
314 | /* Lifted from netlink.c in tools/lib/bpf */ |
315 | static int alloc_iov(struct iovec *iov, int len) |
316 | { |
317 | void *nbuf; |
318 | |
319 | nbuf = realloc(iov->iov_base, len); |
320 | if (!nbuf) |
321 | return -ENOMEM; |
322 | |
323 | iov->iov_base = nbuf; |
324 | iov->iov_len = len; |
325 | return 0; |
326 | } |
327 | |
328 | /* Original version lifted from netlink.c in tools/lib/bpf */ |
329 | static int netlink_recv(int sock) |
330 | { |
331 | struct iovec iov = {}; |
332 | struct msghdr mhdr = { |
333 | .msg_iov = &iov, |
334 | .msg_iovlen = 1, |
335 | }; |
336 | bool multipart = true; |
337 | struct nlmsgerr *err; |
338 | struct nlmsghdr *nh; |
339 | int len, ret; |
340 | |
341 | ret = alloc_iov(iov: &iov, len: 4096); |
342 | if (ret) |
343 | goto done; |
344 | |
345 | while (multipart) { |
346 | multipart = false; |
347 | len = netlink_recvmsg(sock, mhdr: &mhdr, MSG_PEEK | MSG_TRUNC); |
348 | if (len < 0) { |
349 | ret = len; |
350 | goto done; |
351 | } |
352 | |
353 | if (len > iov.iov_len) { |
354 | ret = alloc_iov(iov: &iov, len); |
355 | if (ret) |
356 | goto done; |
357 | } |
358 | |
359 | len = netlink_recvmsg(sock, mhdr: &mhdr, flags: 0); |
360 | if (len < 0) { |
361 | ret = len; |
362 | goto done; |
363 | } |
364 | |
365 | if (len == 0) |
366 | break; |
367 | |
368 | for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); |
369 | nh = NLMSG_NEXT(nh, len)) { |
370 | if (nh->nlmsg_flags & NLM_F_MULTI) |
371 | multipart = true; |
372 | switch (nh->nlmsg_type) { |
373 | case NLMSG_ERROR: |
374 | err = (struct nlmsgerr *)NLMSG_DATA(nh); |
375 | if (!err->error) |
376 | continue; |
377 | ret = err->error; |
378 | goto done; |
379 | case NLMSG_DONE: |
380 | ret = 0; |
381 | goto done; |
382 | default: |
383 | break; |
384 | } |
385 | } |
386 | } |
387 | ret = 0; |
388 | done: |
389 | free(iov.iov_base); |
390 | return ret; |
391 | } |
392 | |
393 | int xsk_set_mtu(int ifindex, int mtu) |
394 | { |
395 | struct nl_mtu_req req; |
396 | struct rtattr *rta; |
397 | int fd, ret; |
398 | |
399 | fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); |
400 | if (fd < 0) |
401 | return fd; |
402 | |
403 | memset(&req, 0, sizeof(req)); |
404 | req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); |
405 | req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; |
406 | req.nh.nlmsg_type = RTM_NEWLINK; |
407 | req.msg.ifi_family = AF_UNSPEC; |
408 | req.msg.ifi_index = ifindex; |
409 | rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); |
410 | rta->rta_type = IFLA_MTU; |
411 | rta->rta_len = RTA_LENGTH(sizeof(unsigned int)); |
412 | req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu)); |
413 | memcpy(RTA_DATA(rta), &mtu, sizeof(mtu)); |
414 | |
415 | ret = send(fd, &req, req.nh.nlmsg_len, 0); |
416 | if (ret < 0) { |
417 | close(fd); |
418 | return errno; |
419 | } |
420 | |
421 | ret = netlink_recv(sock: fd); |
422 | close(fd); |
423 | return ret; |
424 | } |
425 | |
426 | int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags) |
427 | { |
428 | int prog_fd; |
429 | |
430 | prog_fd = bpf_program__fd(prog); |
431 | return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); |
432 | } |
433 | |
434 | void xsk_detach_xdp_program(int ifindex, u32 xdp_flags) |
435 | { |
436 | bpf_xdp_detach(ifindex, xdp_flags, NULL); |
437 | } |
438 | |
439 | void xsk_clear_xskmap(struct bpf_map *map) |
440 | { |
441 | u32 index = 0; |
442 | int map_fd; |
443 | |
444 | map_fd = bpf_map__fd(map); |
445 | bpf_map_delete_elem(map_fd, &index); |
446 | } |
447 | |
448 | int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index) |
449 | { |
450 | int map_fd, sock_fd; |
451 | |
452 | map_fd = bpf_map__fd(map); |
453 | sock_fd = xsk_socket__fd(xsk); |
454 | |
455 | return bpf_map_update_elem(map_fd, &index, &sock_fd, 0); |
456 | } |
457 | |
458 | static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, |
459 | __u32 queue_id) |
460 | { |
461 | struct xsk_ctx *ctx; |
462 | |
463 | if (list_empty(head: &umem->ctx_list)) |
464 | return NULL; |
465 | |
466 | list_for_each_entry(ctx, &umem->ctx_list, list) { |
467 | if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { |
468 | ctx->refcount++; |
469 | return ctx; |
470 | } |
471 | } |
472 | |
473 | return NULL; |
474 | } |
475 | |
476 | static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) |
477 | { |
478 | struct xsk_umem *umem = ctx->umem; |
479 | struct xdp_mmap_offsets off; |
480 | int err; |
481 | |
482 | if (--ctx->refcount) |
483 | return; |
484 | |
485 | if (!unmap) |
486 | goto out_free; |
487 | |
488 | err = xsk_get_mmap_offsets(fd: umem->fd, off: &off); |
489 | if (err) |
490 | goto out_free; |
491 | |
492 | munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * |
493 | sizeof(__u64)); |
494 | munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * |
495 | sizeof(__u64)); |
496 | |
497 | out_free: |
498 | list_del(entry: &ctx->list); |
499 | free(ctx); |
500 | } |
501 | |
502 | static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, |
503 | struct xsk_umem *umem, int ifindex, |
504 | __u32 queue_id, |
505 | struct xsk_ring_prod *fill, |
506 | struct xsk_ring_cons *comp) |
507 | { |
508 | struct xsk_ctx *ctx; |
509 | int err; |
510 | |
511 | ctx = calloc(1, sizeof(*ctx)); |
512 | if (!ctx) |
513 | return NULL; |
514 | |
515 | if (!umem->fill_save) { |
516 | err = xsk_create_umem_rings(umem, fd: xsk->fd, fill, comp); |
517 | if (err) { |
518 | free(ctx); |
519 | return NULL; |
520 | } |
521 | } else if (umem->fill_save != fill || umem->comp_save != comp) { |
522 | /* Copy over rings to new structs. */ |
523 | memcpy(fill, umem->fill_save, sizeof(*fill)); |
524 | memcpy(comp, umem->comp_save, sizeof(*comp)); |
525 | } |
526 | |
527 | ctx->ifindex = ifindex; |
528 | ctx->refcount = 1; |
529 | ctx->umem = umem; |
530 | ctx->queue_id = queue_id; |
531 | |
532 | ctx->fill = fill; |
533 | ctx->comp = comp; |
534 | list_add(new: &ctx->list, head: &umem->ctx_list); |
535 | return ctx; |
536 | } |
537 | |
538 | int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, |
539 | int ifindex, |
540 | __u32 queue_id, struct xsk_umem *umem, |
541 | struct xsk_ring_cons *rx, |
542 | struct xsk_ring_prod *tx, |
543 | struct xsk_ring_prod *fill, |
544 | struct xsk_ring_cons *comp, |
545 | const struct xsk_socket_config *usr_config) |
546 | { |
547 | bool unmap, rx_setup_done = false, tx_setup_done = false; |
548 | void *rx_map = NULL, *tx_map = NULL; |
549 | struct sockaddr_xdp sxdp = {}; |
550 | struct xdp_mmap_offsets off; |
551 | struct xsk_socket *xsk; |
552 | struct xsk_ctx *ctx; |
553 | int err; |
554 | |
555 | if (!umem || !xsk_ptr || !(rx || tx)) |
556 | return -EFAULT; |
557 | |
558 | unmap = umem->fill_save != fill; |
559 | |
560 | xsk = calloc(1, sizeof(*xsk)); |
561 | if (!xsk) |
562 | return -ENOMEM; |
563 | |
564 | err = xsk_set_xdp_socket_config(cfg: &xsk->config, usr_cfg: usr_config); |
565 | if (err) |
566 | goto out_xsk_alloc; |
567 | |
568 | if (umem->refcount++ > 0) { |
569 | xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); |
570 | if (xsk->fd < 0) { |
571 | err = -errno; |
572 | goto out_xsk_alloc; |
573 | } |
574 | } else { |
575 | xsk->fd = umem->fd; |
576 | rx_setup_done = umem->rx_ring_setup_done; |
577 | tx_setup_done = umem->tx_ring_setup_done; |
578 | } |
579 | |
580 | ctx = xsk_get_ctx(umem, ifindex, queue_id); |
581 | if (!ctx) { |
582 | if (!fill || !comp) { |
583 | err = -EFAULT; |
584 | goto out_socket; |
585 | } |
586 | |
587 | ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp); |
588 | if (!ctx) { |
589 | err = -ENOMEM; |
590 | goto out_socket; |
591 | } |
592 | } |
593 | xsk->ctx = ctx; |
594 | |
595 | if (rx && !rx_setup_done) { |
596 | err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, |
597 | &xsk->config.rx_size, |
598 | sizeof(xsk->config.rx_size)); |
599 | if (err) { |
600 | err = -errno; |
601 | goto out_put_ctx; |
602 | } |
603 | if (xsk->fd == umem->fd) |
604 | umem->rx_ring_setup_done = true; |
605 | } |
606 | if (tx && !tx_setup_done) { |
607 | err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, |
608 | &xsk->config.tx_size, |
609 | sizeof(xsk->config.tx_size)); |
610 | if (err) { |
611 | err = -errno; |
612 | goto out_put_ctx; |
613 | } |
614 | if (xsk->fd == umem->fd) |
615 | umem->tx_ring_setup_done = true; |
616 | } |
617 | |
618 | err = xsk_get_mmap_offsets(fd: xsk->fd, off: &off); |
619 | if (err) { |
620 | err = -errno; |
621 | goto out_put_ctx; |
622 | } |
623 | |
624 | if (rx) { |
625 | rx_map = mmap(NULL, off.rx.desc + |
626 | xsk->config.rx_size * sizeof(struct xdp_desc), |
627 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, |
628 | xsk->fd, XDP_PGOFF_RX_RING); |
629 | if (rx_map == MAP_FAILED) { |
630 | err = -errno; |
631 | goto out_put_ctx; |
632 | } |
633 | |
634 | rx->mask = xsk->config.rx_size - 1; |
635 | rx->size = xsk->config.rx_size; |
636 | rx->producer = rx_map + off.rx.producer; |
637 | rx->consumer = rx_map + off.rx.consumer; |
638 | rx->flags = rx_map + off.rx.flags; |
639 | rx->ring = rx_map + off.rx.desc; |
640 | rx->cached_prod = *rx->producer; |
641 | rx->cached_cons = *rx->consumer; |
642 | } |
643 | xsk->rx = rx; |
644 | |
645 | if (tx) { |
646 | tx_map = mmap(NULL, off.tx.desc + |
647 | xsk->config.tx_size * sizeof(struct xdp_desc), |
648 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, |
649 | xsk->fd, XDP_PGOFF_TX_RING); |
650 | if (tx_map == MAP_FAILED) { |
651 | err = -errno; |
652 | goto out_mmap_rx; |
653 | } |
654 | |
655 | tx->mask = xsk->config.tx_size - 1; |
656 | tx->size = xsk->config.tx_size; |
657 | tx->producer = tx_map + off.tx.producer; |
658 | tx->consumer = tx_map + off.tx.consumer; |
659 | tx->flags = tx_map + off.tx.flags; |
660 | tx->ring = tx_map + off.tx.desc; |
661 | tx->cached_prod = *tx->producer; |
662 | /* cached_cons is r->size bigger than the real consumer pointer |
663 | * See xsk_prod_nb_free |
664 | */ |
665 | tx->cached_cons = *tx->consumer + xsk->config.tx_size; |
666 | } |
667 | xsk->tx = tx; |
668 | |
669 | sxdp.sxdp_family = PF_XDP; |
670 | sxdp.sxdp_ifindex = ctx->ifindex; |
671 | sxdp.sxdp_queue_id = ctx->queue_id; |
672 | if (umem->refcount > 1) { |
673 | sxdp.sxdp_flags |= XDP_SHARED_UMEM; |
674 | sxdp.sxdp_shared_umem_fd = umem->fd; |
675 | } else { |
676 | sxdp.sxdp_flags = xsk->config.bind_flags; |
677 | } |
678 | |
679 | err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); |
680 | if (err) { |
681 | err = -errno; |
682 | goto out_mmap_tx; |
683 | } |
684 | |
685 | *xsk_ptr = xsk; |
686 | umem->fill_save = NULL; |
687 | umem->comp_save = NULL; |
688 | return 0; |
689 | |
690 | out_mmap_tx: |
691 | if (tx) |
692 | munmap(tx_map, off.tx.desc + |
693 | xsk->config.tx_size * sizeof(struct xdp_desc)); |
694 | out_mmap_rx: |
695 | if (rx) |
696 | munmap(rx_map, off.rx.desc + |
697 | xsk->config.rx_size * sizeof(struct xdp_desc)); |
698 | out_put_ctx: |
699 | xsk_put_ctx(ctx, unmap); |
700 | out_socket: |
701 | if (--umem->refcount) |
702 | close(xsk->fd); |
703 | out_xsk_alloc: |
704 | free(xsk); |
705 | return err; |
706 | } |
707 | |
708 | int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex, |
709 | __u32 queue_id, struct xsk_umem *umem, |
710 | struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, |
711 | const struct xsk_socket_config *usr_config) |
712 | { |
713 | if (!umem) |
714 | return -EFAULT; |
715 | |
716 | return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem, |
717 | rx, tx, fill: umem->fill_save, |
718 | comp: umem->comp_save, usr_config); |
719 | } |
720 | |
721 | int xsk_umem__delete(struct xsk_umem *umem) |
722 | { |
723 | struct xdp_mmap_offsets off; |
724 | int err; |
725 | |
726 | if (!umem) |
727 | return 0; |
728 | |
729 | if (umem->refcount) |
730 | return -EBUSY; |
731 | |
732 | err = xsk_get_mmap_offsets(fd: umem->fd, off: &off); |
733 | if (!err && umem->fill_save && umem->comp_save) { |
734 | munmap(umem->fill_save->ring - off.fr.desc, |
735 | off.fr.desc + umem->config.fill_size * sizeof(__u64)); |
736 | munmap(umem->comp_save->ring - off.cr.desc, |
737 | off.cr.desc + umem->config.comp_size * sizeof(__u64)); |
738 | } |
739 | |
740 | close(umem->fd); |
741 | free(umem); |
742 | |
743 | return 0; |
744 | } |
745 | |
746 | void xsk_socket__delete(struct xsk_socket *xsk) |
747 | { |
748 | size_t desc_sz = sizeof(struct xdp_desc); |
749 | struct xdp_mmap_offsets off; |
750 | struct xsk_umem *umem; |
751 | struct xsk_ctx *ctx; |
752 | int err; |
753 | |
754 | if (!xsk) |
755 | return; |
756 | |
757 | ctx = xsk->ctx; |
758 | umem = ctx->umem; |
759 | |
760 | xsk_put_ctx(ctx, unmap: true); |
761 | |
762 | err = xsk_get_mmap_offsets(fd: xsk->fd, off: &off); |
763 | if (!err) { |
764 | if (xsk->rx) { |
765 | munmap(xsk->rx->ring - off.rx.desc, |
766 | off.rx.desc + xsk->config.rx_size * desc_sz); |
767 | } |
768 | if (xsk->tx) { |
769 | munmap(xsk->tx->ring - off.tx.desc, |
770 | off.tx.desc + xsk->config.tx_size * desc_sz); |
771 | } |
772 | } |
773 | |
774 | umem->refcount--; |
775 | /* Do not close an fd that also has an associated umem connected |
776 | * to it. |
777 | */ |
778 | if (xsk->fd != umem->fd) |
779 | close(xsk->fd); |
780 | free(xsk); |
781 | } |
782 | |