1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2023 Isovalent */
3
4#include <linux/netdevice.h>
5#include <linux/ethtool.h>
6#include <linux/etherdevice.h>
7#include <linux/filter.h>
8#include <linux/netfilter_netdev.h>
9#include <linux/bpf_mprog.h>
10
11#include <net/netkit.h>
12#include <net/dst.h>
13#include <net/tcx.h>
14
15#define DRV_NAME "netkit"
16
17struct netkit {
18 /* Needed in fast-path */
19 struct net_device __rcu *peer;
20 struct bpf_mprog_entry __rcu *active;
21 enum netkit_action policy;
22 struct bpf_mprog_bundle bundle;
23
24 /* Needed in slow-path */
25 enum netkit_mode mode;
26 bool primary;
27 u32 headroom;
28};
29
30struct netkit_link {
31 struct bpf_link link;
32 struct net_device *dev;
33 u32 location;
34};
35
36static __always_inline int
37netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
38 enum netkit_action ret)
39{
40 const struct bpf_mprog_fp *fp;
41 const struct bpf_prog *prog;
42
43 bpf_mprog_foreach_prog(entry, fp, prog) {
44 bpf_compute_data_pointers(skb);
45 ret = bpf_prog_run(prog, ctx: skb);
46 if (ret != NETKIT_NEXT)
47 break;
48 }
49 return ret;
50}
51
52static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
53{
54 skb_scrub_packet(skb, xnet);
55 skb->priority = 0;
56 nf_skip_egress(skb, skip: true);
57}
58
59static struct netkit *netkit_priv(const struct net_device *dev)
60{
61 return netdev_priv(dev);
62}
63
64static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
65{
66 struct netkit *nk = netkit_priv(dev);
67 enum netkit_action ret = READ_ONCE(nk->policy);
68 netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
69 const struct bpf_mprog_entry *entry;
70 struct net_device *peer;
71
72 rcu_read_lock();
73 peer = rcu_dereference(nk->peer);
74 if (unlikely(!peer || !(peer->flags & IFF_UP) ||
75 !pskb_may_pull(skb, ETH_HLEN) ||
76 skb_orphan_frags(skb, GFP_ATOMIC)))
77 goto drop;
78 netkit_prep_forward(skb, xnet: !net_eq(net1: dev_net(dev), net2: dev_net(dev: peer)));
79 skb->dev = peer;
80 entry = rcu_dereference(nk->active);
81 if (entry)
82 ret = netkit_run(entry, skb, ret);
83 switch (ret) {
84 case NETKIT_NEXT:
85 case NETKIT_PASS:
86 skb->protocol = eth_type_trans(skb, dev: skb->dev);
87 skb_postpull_rcsum(skb, start: eth_hdr(skb), ETH_HLEN);
88 __netif_rx(skb);
89 break;
90 case NETKIT_REDIRECT:
91 skb_do_redirect(skb);
92 break;
93 case NETKIT_DROP:
94 default:
95drop:
96 kfree_skb(skb);
97 dev_core_stats_tx_dropped_inc(dev);
98 ret_dev = NET_XMIT_DROP;
99 break;
100 }
101 rcu_read_unlock();
102 return ret_dev;
103}
104
105static int netkit_open(struct net_device *dev)
106{
107 struct netkit *nk = netkit_priv(dev);
108 struct net_device *peer = rtnl_dereference(nk->peer);
109
110 if (!peer)
111 return -ENOTCONN;
112 if (peer->flags & IFF_UP) {
113 netif_carrier_on(dev);
114 netif_carrier_on(dev: peer);
115 }
116 return 0;
117}
118
119static int netkit_close(struct net_device *dev)
120{
121 struct netkit *nk = netkit_priv(dev);
122 struct net_device *peer = rtnl_dereference(nk->peer);
123
124 netif_carrier_off(dev);
125 if (peer)
126 netif_carrier_off(dev: peer);
127 return 0;
128}
129
130static int netkit_get_iflink(const struct net_device *dev)
131{
132 struct netkit *nk = netkit_priv(dev);
133 struct net_device *peer;
134 int iflink = 0;
135
136 rcu_read_lock();
137 peer = rcu_dereference(nk->peer);
138 if (peer)
139 iflink = peer->ifindex;
140 rcu_read_unlock();
141 return iflink;
142}
143
144static void netkit_set_multicast(struct net_device *dev)
145{
146 /* Nothing to do, we receive whatever gets pushed to us! */
147}
148
149static void netkit_set_headroom(struct net_device *dev, int headroom)
150{
151 struct netkit *nk = netkit_priv(dev), *nk2;
152 struct net_device *peer;
153
154 if (headroom < 0)
155 headroom = NET_SKB_PAD;
156
157 rcu_read_lock();
158 peer = rcu_dereference(nk->peer);
159 if (unlikely(!peer))
160 goto out;
161
162 nk2 = netkit_priv(dev: peer);
163 nk->headroom = headroom;
164 headroom = max(nk->headroom, nk2->headroom);
165
166 peer->needed_headroom = headroom;
167 dev->needed_headroom = headroom;
168out:
169 rcu_read_unlock();
170}
171
172static struct net_device *netkit_peer_dev(struct net_device *dev)
173{
174 return rcu_dereference(netkit_priv(dev)->peer);
175}
176
177static void netkit_uninit(struct net_device *dev);
178
179static const struct net_device_ops netkit_netdev_ops = {
180 .ndo_open = netkit_open,
181 .ndo_stop = netkit_close,
182 .ndo_start_xmit = netkit_xmit,
183 .ndo_set_rx_mode = netkit_set_multicast,
184 .ndo_set_rx_headroom = netkit_set_headroom,
185 .ndo_get_iflink = netkit_get_iflink,
186 .ndo_get_peer_dev = netkit_peer_dev,
187 .ndo_uninit = netkit_uninit,
188 .ndo_features_check = passthru_features_check,
189};
190
191static void netkit_get_drvinfo(struct net_device *dev,
192 struct ethtool_drvinfo *info)
193{
194 strscpy(p: info->driver, DRV_NAME, size: sizeof(info->driver));
195}
196
197static const struct ethtool_ops netkit_ethtool_ops = {
198 .get_drvinfo = netkit_get_drvinfo,
199};
200
201static void netkit_setup(struct net_device *dev)
202{
203 static const netdev_features_t netkit_features_hw_vlan =
204 NETIF_F_HW_VLAN_CTAG_TX |
205 NETIF_F_HW_VLAN_CTAG_RX |
206 NETIF_F_HW_VLAN_STAG_TX |
207 NETIF_F_HW_VLAN_STAG_RX;
208 static const netdev_features_t netkit_features =
209 netkit_features_hw_vlan |
210 NETIF_F_SG |
211 NETIF_F_FRAGLIST |
212 NETIF_F_HW_CSUM |
213 NETIF_F_RXCSUM |
214 NETIF_F_SCTP_CRC |
215 NETIF_F_HIGHDMA |
216 NETIF_F_GSO_SOFTWARE |
217 NETIF_F_GSO_ENCAP_ALL;
218
219 ether_setup(dev);
220 dev->max_mtu = ETH_MAX_MTU;
221
222 dev->flags |= IFF_NOARP;
223 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
224 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
225 dev->priv_flags |= IFF_PHONY_HEADROOM;
226 dev->priv_flags |= IFF_NO_QUEUE;
227
228 dev->ethtool_ops = &netkit_ethtool_ops;
229 dev->netdev_ops = &netkit_netdev_ops;
230
231 dev->features |= netkit_features | NETIF_F_LLTX;
232 dev->hw_features = netkit_features;
233 dev->hw_enc_features = netkit_features;
234 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
235 dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
236
237 dev->needs_free_netdev = true;
238
239 netif_set_tso_max_size(dev, GSO_MAX_SIZE);
240}
241
242static struct net *netkit_get_link_net(const struct net_device *dev)
243{
244 struct netkit *nk = netkit_priv(dev);
245 struct net_device *peer = rtnl_dereference(nk->peer);
246
247 return peer ? dev_net(dev: peer) : dev_net(dev);
248}
249
250static int netkit_check_policy(int policy, struct nlattr *tb,
251 struct netlink_ext_ack *extack)
252{
253 switch (policy) {
254 case NETKIT_PASS:
255 case NETKIT_DROP:
256 return 0;
257 default:
258 NL_SET_ERR_MSG_ATTR(extack, tb,
259 "Provided default xmit policy not supported");
260 return -EINVAL;
261 }
262}
263
264static int netkit_check_mode(int mode, struct nlattr *tb,
265 struct netlink_ext_ack *extack)
266{
267 switch (mode) {
268 case NETKIT_L2:
269 case NETKIT_L3:
270 return 0;
271 default:
272 NL_SET_ERR_MSG_ATTR(extack, tb,
273 "Provided device mode can only be L2 or L3");
274 return -EINVAL;
275 }
276}
277
278static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
279 struct netlink_ext_ack *extack)
280{
281 struct nlattr *attr = tb[IFLA_ADDRESS];
282
283 if (!attr)
284 return 0;
285 NL_SET_ERR_MSG_ATTR(extack, attr,
286 "Setting Ethernet address is not supported");
287 return -EOPNOTSUPP;
288}
289
290static struct rtnl_link_ops netkit_link_ops;
291
292static int netkit_new_link(struct net *src_net, struct net_device *dev,
293 struct nlattr *tb[], struct nlattr *data[],
294 struct netlink_ext_ack *extack)
295{
296 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
297 enum netkit_action default_prim = NETKIT_PASS;
298 enum netkit_action default_peer = NETKIT_PASS;
299 enum netkit_mode mode = NETKIT_L3;
300 unsigned char ifname_assign_type;
301 struct ifinfomsg *ifmp = NULL;
302 struct net_device *peer;
303 char ifname[IFNAMSIZ];
304 struct netkit *nk;
305 struct net *net;
306 int err;
307
308 if (data) {
309 if (data[IFLA_NETKIT_MODE]) {
310 attr = data[IFLA_NETKIT_MODE];
311 mode = nla_get_u32(nla: attr);
312 err = netkit_check_mode(mode, tb: attr, extack);
313 if (err < 0)
314 return err;
315 }
316 if (data[IFLA_NETKIT_PEER_INFO]) {
317 attr = data[IFLA_NETKIT_PEER_INFO];
318 ifmp = nla_data(nla: attr);
319 err = rtnl_nla_parse_ifinfomsg(tb: peer_tb, nla_peer: attr, exterr: extack);
320 if (err < 0)
321 return err;
322 err = netkit_validate(tb: peer_tb, NULL, extack);
323 if (err < 0)
324 return err;
325 tbp = peer_tb;
326 }
327 if (data[IFLA_NETKIT_POLICY]) {
328 attr = data[IFLA_NETKIT_POLICY];
329 default_prim = nla_get_u32(nla: attr);
330 err = netkit_check_policy(policy: default_prim, tb: attr, extack);
331 if (err < 0)
332 return err;
333 }
334 if (data[IFLA_NETKIT_PEER_POLICY]) {
335 attr = data[IFLA_NETKIT_PEER_POLICY];
336 default_peer = nla_get_u32(nla: attr);
337 err = netkit_check_policy(policy: default_peer, tb: attr, extack);
338 if (err < 0)
339 return err;
340 }
341 }
342
343 if (ifmp && tbp[IFLA_IFNAME]) {
344 nla_strscpy(dst: ifname, nla: tbp[IFLA_IFNAME], IFNAMSIZ);
345 ifname_assign_type = NET_NAME_USER;
346 } else {
347 strscpy(p: ifname, q: "nk%d", IFNAMSIZ);
348 ifname_assign_type = NET_NAME_ENUM;
349 }
350
351 net = rtnl_link_get_net(src_net, tb: tbp);
352 if (IS_ERR(ptr: net))
353 return PTR_ERR(ptr: net);
354
355 peer = rtnl_create_link(net, ifname, name_assign_type: ifname_assign_type,
356 ops: &netkit_link_ops, tb: tbp, extack);
357 if (IS_ERR(ptr: peer)) {
358 put_net(net);
359 return PTR_ERR(ptr: peer);
360 }
361
362 netif_inherit_tso_max(to: peer, from: dev);
363
364 if (mode == NETKIT_L2)
365 eth_hw_addr_random(dev: peer);
366 if (ifmp && dev->ifindex)
367 peer->ifindex = ifmp->ifi_index;
368
369 nk = netkit_priv(dev: peer);
370 nk->primary = false;
371 nk->policy = default_peer;
372 nk->mode = mode;
373 bpf_mprog_bundle_init(bundle: &nk->bundle);
374
375 err = register_netdevice(dev: peer);
376 put_net(net);
377 if (err < 0)
378 goto err_register_peer;
379 netif_carrier_off(dev: peer);
380 if (mode == NETKIT_L2)
381 dev_change_flags(dev: peer, flags: peer->flags & ~IFF_NOARP, NULL);
382
383 err = rtnl_configure_link(dev: peer, NULL, portid: 0, NULL);
384 if (err < 0)
385 goto err_configure_peer;
386
387 if (mode == NETKIT_L2)
388 eth_hw_addr_random(dev);
389 if (tb[IFLA_IFNAME])
390 nla_strscpy(dst: dev->name, nla: tb[IFLA_IFNAME], IFNAMSIZ);
391 else
392 strscpy(p: dev->name, q: "nk%d", IFNAMSIZ);
393
394 nk = netkit_priv(dev);
395 nk->primary = true;
396 nk->policy = default_prim;
397 nk->mode = mode;
398 bpf_mprog_bundle_init(bundle: &nk->bundle);
399
400 err = register_netdevice(dev);
401 if (err < 0)
402 goto err_configure_peer;
403 netif_carrier_off(dev);
404 if (mode == NETKIT_L2)
405 dev_change_flags(dev, flags: dev->flags & ~IFF_NOARP, NULL);
406
407 rcu_assign_pointer(netkit_priv(dev)->peer, peer);
408 rcu_assign_pointer(netkit_priv(peer)->peer, dev);
409 return 0;
410err_configure_peer:
411 unregister_netdevice(dev: peer);
412 return err;
413err_register_peer:
414 free_netdev(dev: peer);
415 return err;
416}
417
418static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev,
419 bool bundle_fallback)
420{
421 struct netkit *nk = netkit_priv(dev);
422 struct bpf_mprog_entry *entry;
423
424 ASSERT_RTNL();
425 entry = rcu_dereference_rtnl(nk->active);
426 if (entry)
427 return entry;
428 if (bundle_fallback)
429 return &nk->bundle.a;
430 return NULL;
431}
432
433static void netkit_entry_update(struct net_device *dev,
434 struct bpf_mprog_entry *entry)
435{
436 struct netkit *nk = netkit_priv(dev);
437
438 ASSERT_RTNL();
439 rcu_assign_pointer(nk->active, entry);
440}
441
442static void netkit_entry_sync(void)
443{
444 synchronize_rcu();
445}
446
447static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which)
448{
449 struct net_device *dev;
450 struct netkit *nk;
451
452 ASSERT_RTNL();
453
454 switch (which) {
455 case BPF_NETKIT_PRIMARY:
456 case BPF_NETKIT_PEER:
457 break;
458 default:
459 return ERR_PTR(error: -EINVAL);
460 }
461
462 dev = __dev_get_by_index(net, ifindex);
463 if (!dev)
464 return ERR_PTR(error: -ENODEV);
465 if (dev->netdev_ops != &netkit_netdev_ops)
466 return ERR_PTR(error: -ENXIO);
467
468 nk = netkit_priv(dev);
469 if (!nk->primary)
470 return ERR_PTR(error: -EACCES);
471 if (which == BPF_NETKIT_PEER) {
472 dev = rcu_dereference_rtnl(nk->peer);
473 if (!dev)
474 return ERR_PTR(error: -ENODEV);
475 }
476 return dev;
477}
478
479int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
480{
481 struct bpf_mprog_entry *entry, *entry_new;
482 struct bpf_prog *replace_prog = NULL;
483 struct net_device *dev;
484 int ret;
485
486 rtnl_lock();
487 dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex,
488 which: attr->attach_type);
489 if (IS_ERR(ptr: dev)) {
490 ret = PTR_ERR(ptr: dev);
491 goto out;
492 }
493 entry = netkit_entry_fetch(dev, bundle_fallback: true);
494 if (attr->attach_flags & BPF_F_REPLACE) {
495 replace_prog = bpf_prog_get_type(ufd: attr->replace_bpf_fd,
496 type: prog->type);
497 if (IS_ERR(ptr: replace_prog)) {
498 ret = PTR_ERR(ptr: replace_prog);
499 replace_prog = NULL;
500 goto out;
501 }
502 }
503 ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: prog, NULL, prog_old: replace_prog,
504 flags: attr->attach_flags, id_or_fd: attr->relative_fd,
505 revision: attr->expected_revision);
506 if (!ret) {
507 if (entry != entry_new) {
508 netkit_entry_update(dev, entry: entry_new);
509 netkit_entry_sync();
510 }
511 bpf_mprog_commit(entry);
512 }
513out:
514 if (replace_prog)
515 bpf_prog_put(prog: replace_prog);
516 rtnl_unlock();
517 return ret;
518}
519
520int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
521{
522 struct bpf_mprog_entry *entry, *entry_new;
523 struct net_device *dev;
524 int ret;
525
526 rtnl_lock();
527 dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex,
528 which: attr->attach_type);
529 if (IS_ERR(ptr: dev)) {
530 ret = PTR_ERR(ptr: dev);
531 goto out;
532 }
533 entry = netkit_entry_fetch(dev, bundle_fallback: false);
534 if (!entry) {
535 ret = -ENOENT;
536 goto out;
537 }
538 ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog, NULL, flags: attr->attach_flags,
539 id_or_fd: attr->relative_fd, revision: attr->expected_revision);
540 if (!ret) {
541 if (!bpf_mprog_total(entry: entry_new))
542 entry_new = NULL;
543 netkit_entry_update(dev, entry: entry_new);
544 netkit_entry_sync();
545 bpf_mprog_commit(entry);
546 }
547out:
548 rtnl_unlock();
549 return ret;
550}
551
552int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
553{
554 struct net_device *dev;
555 int ret;
556
557 rtnl_lock();
558 dev = netkit_dev_fetch(current->nsproxy->net_ns,
559 ifindex: attr->query.target_ifindex,
560 which: attr->query.attach_type);
561 if (IS_ERR(ptr: dev)) {
562 ret = PTR_ERR(ptr: dev);
563 goto out;
564 }
565 ret = bpf_mprog_query(attr, uattr, entry: netkit_entry_fetch(dev, bundle_fallback: false));
566out:
567 rtnl_unlock();
568 return ret;
569}
570
571static struct netkit_link *netkit_link(const struct bpf_link *link)
572{
573 return container_of(link, struct netkit_link, link);
574}
575
576static int netkit_link_prog_attach(struct bpf_link *link, u32 flags,
577 u32 id_or_fd, u64 revision)
578{
579 struct netkit_link *nkl = netkit_link(link);
580 struct bpf_mprog_entry *entry, *entry_new;
581 struct net_device *dev = nkl->dev;
582 int ret;
583
584 ASSERT_RTNL();
585 entry = netkit_entry_fetch(dev, bundle_fallback: true);
586 ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: link->prog, link, NULL, flags,
587 id_or_fd, revision);
588 if (!ret) {
589 if (entry != entry_new) {
590 netkit_entry_update(dev, entry: entry_new);
591 netkit_entry_sync();
592 }
593 bpf_mprog_commit(entry);
594 }
595 return ret;
596}
597
598static void netkit_link_release(struct bpf_link *link)
599{
600 struct netkit_link *nkl = netkit_link(link);
601 struct bpf_mprog_entry *entry, *entry_new;
602 struct net_device *dev;
603 int ret = 0;
604
605 rtnl_lock();
606 dev = nkl->dev;
607 if (!dev)
608 goto out;
609 entry = netkit_entry_fetch(dev, bundle_fallback: false);
610 if (!entry) {
611 ret = -ENOENT;
612 goto out;
613 }
614 ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog: link->prog, link, flags: 0, id_or_fd: 0, revision: 0);
615 if (!ret) {
616 if (!bpf_mprog_total(entry: entry_new))
617 entry_new = NULL;
618 netkit_entry_update(dev, entry: entry_new);
619 netkit_entry_sync();
620 bpf_mprog_commit(entry);
621 nkl->dev = NULL;
622 }
623out:
624 WARN_ON_ONCE(ret);
625 rtnl_unlock();
626}
627
628static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog,
629 struct bpf_prog *oprog)
630{
631 struct netkit_link *nkl = netkit_link(link);
632 struct bpf_mprog_entry *entry, *entry_new;
633 struct net_device *dev;
634 int ret = 0;
635
636 rtnl_lock();
637 dev = nkl->dev;
638 if (!dev) {
639 ret = -ENOLINK;
640 goto out;
641 }
642 if (oprog && link->prog != oprog) {
643 ret = -EPERM;
644 goto out;
645 }
646 oprog = link->prog;
647 if (oprog == nprog) {
648 bpf_prog_put(prog: nprog);
649 goto out;
650 }
651 entry = netkit_entry_fetch(dev, bundle_fallback: false);
652 if (!entry) {
653 ret = -ENOENT;
654 goto out;
655 }
656 ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: nprog, link, prog_old: oprog,
657 BPF_F_REPLACE | BPF_F_ID,
658 id_or_fd: link->prog->aux->id, revision: 0);
659 if (!ret) {
660 WARN_ON_ONCE(entry != entry_new);
661 oprog = xchg(&link->prog, nprog);
662 bpf_prog_put(prog: oprog);
663 bpf_mprog_commit(entry);
664 }
665out:
666 rtnl_unlock();
667 return ret;
668}
669
670static void netkit_link_dealloc(struct bpf_link *link)
671{
672 kfree(objp: netkit_link(link));
673}
674
675static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
676{
677 const struct netkit_link *nkl = netkit_link(link);
678 u32 ifindex = 0;
679
680 rtnl_lock();
681 if (nkl->dev)
682 ifindex = nkl->dev->ifindex;
683 rtnl_unlock();
684
685 seq_printf(m: seq, fmt: "ifindex:\t%u\n", ifindex);
686 seq_printf(m: seq, fmt: "attach_type:\t%u (%s)\n",
687 nkl->location,
688 nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer");
689}
690
691static int netkit_link_fill_info(const struct bpf_link *link,
692 struct bpf_link_info *info)
693{
694 const struct netkit_link *nkl = netkit_link(link);
695 u32 ifindex = 0;
696
697 rtnl_lock();
698 if (nkl->dev)
699 ifindex = nkl->dev->ifindex;
700 rtnl_unlock();
701
702 info->netkit.ifindex = ifindex;
703 info->netkit.attach_type = nkl->location;
704 return 0;
705}
706
707static int netkit_link_detach(struct bpf_link *link)
708{
709 netkit_link_release(link);
710 return 0;
711}
712
713static const struct bpf_link_ops netkit_link_lops = {
714 .release = netkit_link_release,
715 .detach = netkit_link_detach,
716 .dealloc = netkit_link_dealloc,
717 .update_prog = netkit_link_update,
718 .show_fdinfo = netkit_link_fdinfo,
719 .fill_link_info = netkit_link_fill_info,
720};
721
722static int netkit_link_init(struct netkit_link *nkl,
723 struct bpf_link_primer *link_primer,
724 const union bpf_attr *attr,
725 struct net_device *dev,
726 struct bpf_prog *prog)
727{
728 bpf_link_init(link: &nkl->link, type: BPF_LINK_TYPE_NETKIT,
729 ops: &netkit_link_lops, prog);
730 nkl->location = attr->link_create.attach_type;
731 nkl->dev = dev;
732 return bpf_link_prime(link: &nkl->link, primer: link_primer);
733}
734
735int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
736{
737 struct bpf_link_primer link_primer;
738 struct netkit_link *nkl;
739 struct net_device *dev;
740 int ret;
741
742 rtnl_lock();
743 dev = netkit_dev_fetch(current->nsproxy->net_ns,
744 ifindex: attr->link_create.target_ifindex,
745 which: attr->link_create.attach_type);
746 if (IS_ERR(ptr: dev)) {
747 ret = PTR_ERR(ptr: dev);
748 goto out;
749 }
750 nkl = kzalloc(size: sizeof(*nkl), GFP_KERNEL_ACCOUNT);
751 if (!nkl) {
752 ret = -ENOMEM;
753 goto out;
754 }
755 ret = netkit_link_init(nkl, link_primer: &link_primer, attr, dev, prog);
756 if (ret) {
757 kfree(objp: nkl);
758 goto out;
759 }
760 ret = netkit_link_prog_attach(link: &nkl->link,
761 flags: attr->link_create.flags,
762 id_or_fd: attr->link_create.netkit.relative_fd,
763 revision: attr->link_create.netkit.expected_revision);
764 if (ret) {
765 nkl->dev = NULL;
766 bpf_link_cleanup(primer: &link_primer);
767 goto out;
768 }
769 ret = bpf_link_settle(primer: &link_primer);
770out:
771 rtnl_unlock();
772 return ret;
773}
774
775static void netkit_release_all(struct net_device *dev)
776{
777 struct bpf_mprog_entry *entry;
778 struct bpf_tuple tuple = {};
779 struct bpf_mprog_fp *fp;
780 struct bpf_mprog_cp *cp;
781
782 entry = netkit_entry_fetch(dev, bundle_fallback: false);
783 if (!entry)
784 return;
785 netkit_entry_update(dev, NULL);
786 netkit_entry_sync();
787 bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
788 if (tuple.link)
789 netkit_link(link: tuple.link)->dev = NULL;
790 else
791 bpf_prog_put(prog: tuple.prog);
792 }
793}
794
795static void netkit_uninit(struct net_device *dev)
796{
797 netkit_release_all(dev);
798}
799
800static void netkit_del_link(struct net_device *dev, struct list_head *head)
801{
802 struct netkit *nk = netkit_priv(dev);
803 struct net_device *peer = rtnl_dereference(nk->peer);
804
805 RCU_INIT_POINTER(nk->peer, NULL);
806 unregister_netdevice_queue(dev, head);
807 if (peer) {
808 nk = netkit_priv(dev: peer);
809 RCU_INIT_POINTER(nk->peer, NULL);
810 unregister_netdevice_queue(dev: peer, head);
811 }
812}
813
814static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
815 struct nlattr *data[],
816 struct netlink_ext_ack *extack)
817{
818 struct netkit *nk = netkit_priv(dev);
819 struct net_device *peer = rtnl_dereference(nk->peer);
820 enum netkit_action policy;
821 struct nlattr *attr;
822 int err;
823
824 if (!nk->primary) {
825 NL_SET_ERR_MSG(extack,
826 "netkit link settings can be changed only through the primary device");
827 return -EACCES;
828 }
829
830 if (data[IFLA_NETKIT_MODE]) {
831 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE],
832 "netkit link operating mode cannot be changed after device creation");
833 return -EACCES;
834 }
835
836 if (data[IFLA_NETKIT_POLICY]) {
837 attr = data[IFLA_NETKIT_POLICY];
838 policy = nla_get_u32(nla: attr);
839 err = netkit_check_policy(policy, tb: attr, extack);
840 if (err)
841 return err;
842 WRITE_ONCE(nk->policy, policy);
843 }
844
845 if (data[IFLA_NETKIT_PEER_POLICY]) {
846 err = -EOPNOTSUPP;
847 attr = data[IFLA_NETKIT_PEER_POLICY];
848 policy = nla_get_u32(nla: attr);
849 if (peer)
850 err = netkit_check_policy(policy, tb: attr, extack);
851 if (err)
852 return err;
853 nk = netkit_priv(dev: peer);
854 WRITE_ONCE(nk->policy, policy);
855 }
856
857 return 0;
858}
859
860static size_t netkit_get_size(const struct net_device *dev)
861{
862 return nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_POLICY */
863 nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
864 nla_total_size(payload: sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
865 nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_MODE */
866 0;
867}
868
869static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
870{
871 struct netkit *nk = netkit_priv(dev);
872 struct net_device *peer = rtnl_dereference(nk->peer);
873
874 if (nla_put_u8(skb, attrtype: IFLA_NETKIT_PRIMARY, value: nk->primary))
875 return -EMSGSIZE;
876 if (nla_put_u32(skb, attrtype: IFLA_NETKIT_POLICY, value: nk->policy))
877 return -EMSGSIZE;
878 if (nla_put_u32(skb, attrtype: IFLA_NETKIT_MODE, value: nk->mode))
879 return -EMSGSIZE;
880
881 if (peer) {
882 nk = netkit_priv(dev: peer);
883 if (nla_put_u32(skb, attrtype: IFLA_NETKIT_PEER_POLICY, value: nk->policy))
884 return -EMSGSIZE;
885 }
886
887 return 0;
888}
889
890static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
891 [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) },
892 [IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
893 [IFLA_NETKIT_MODE] = { .type = NLA_U32 },
894 [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
895 [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
896 .reject_message = "Primary attribute is read-only" },
897};
898
899static struct rtnl_link_ops netkit_link_ops = {
900 .kind = DRV_NAME,
901 .priv_size = sizeof(struct netkit),
902 .setup = netkit_setup,
903 .newlink = netkit_new_link,
904 .dellink = netkit_del_link,
905 .changelink = netkit_change_link,
906 .get_link_net = netkit_get_link_net,
907 .get_size = netkit_get_size,
908 .fill_info = netkit_fill_info,
909 .policy = netkit_policy,
910 .validate = netkit_validate,
911 .maxtype = IFLA_NETKIT_MAX,
912};
913
914static __init int netkit_init(void)
915{
916 BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
917 (int)NETKIT_PASS != (int)TCX_PASS ||
918 (int)NETKIT_DROP != (int)TCX_DROP ||
919 (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
920
921 return rtnl_link_register(ops: &netkit_link_ops);
922}
923
924static __exit void netkit_exit(void)
925{
926 rtnl_link_unregister(ops: &netkit_link_ops);
927}
928
929module_init(netkit_init);
930module_exit(netkit_exit);
931
932MODULE_DESCRIPTION("BPF-programmable network device");
933MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
934MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
935MODULE_LICENSE("GPL");
936MODULE_ALIAS_RTNL_LINK(DRV_NAME);
937

source code of linux/drivers/net/netkit.c