1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2023 Isovalent */ |
3 | |
4 | #include <linux/netdevice.h> |
5 | #include <linux/ethtool.h> |
6 | #include <linux/etherdevice.h> |
7 | #include <linux/filter.h> |
8 | #include <linux/netfilter_netdev.h> |
9 | #include <linux/bpf_mprog.h> |
10 | |
11 | #include <net/netkit.h> |
12 | #include <net/dst.h> |
13 | #include <net/tcx.h> |
14 | |
15 | #define DRV_NAME "netkit" |
16 | |
17 | struct netkit { |
18 | /* Needed in fast-path */ |
19 | struct net_device __rcu *peer; |
20 | struct bpf_mprog_entry __rcu *active; |
21 | enum netkit_action policy; |
22 | struct bpf_mprog_bundle bundle; |
23 | |
24 | /* Needed in slow-path */ |
25 | enum netkit_mode mode; |
26 | bool primary; |
27 | u32 headroom; |
28 | }; |
29 | |
30 | struct netkit_link { |
31 | struct bpf_link link; |
32 | struct net_device *dev; |
33 | u32 location; |
34 | }; |
35 | |
36 | static __always_inline int |
37 | netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, |
38 | enum netkit_action ret) |
39 | { |
40 | const struct bpf_mprog_fp *fp; |
41 | const struct bpf_prog *prog; |
42 | |
43 | bpf_mprog_foreach_prog(entry, fp, prog) { |
44 | bpf_compute_data_pointers(skb); |
45 | ret = bpf_prog_run(prog, ctx: skb); |
46 | if (ret != NETKIT_NEXT) |
47 | break; |
48 | } |
49 | return ret; |
50 | } |
51 | |
52 | static void netkit_prep_forward(struct sk_buff *skb, bool xnet) |
53 | { |
54 | skb_scrub_packet(skb, xnet); |
55 | skb->priority = 0; |
56 | nf_skip_egress(skb, skip: true); |
57 | } |
58 | |
59 | static struct netkit *netkit_priv(const struct net_device *dev) |
60 | { |
61 | return netdev_priv(dev); |
62 | } |
63 | |
64 | static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) |
65 | { |
66 | struct netkit *nk = netkit_priv(dev); |
67 | enum netkit_action ret = READ_ONCE(nk->policy); |
68 | netdev_tx_t ret_dev = NET_XMIT_SUCCESS; |
69 | const struct bpf_mprog_entry *entry; |
70 | struct net_device *peer; |
71 | |
72 | rcu_read_lock(); |
73 | peer = rcu_dereference(nk->peer); |
74 | if (unlikely(!peer || !(peer->flags & IFF_UP) || |
75 | !pskb_may_pull(skb, ETH_HLEN) || |
76 | skb_orphan_frags(skb, GFP_ATOMIC))) |
77 | goto drop; |
78 | netkit_prep_forward(skb, xnet: !net_eq(net1: dev_net(dev), net2: dev_net(dev: peer))); |
79 | skb->dev = peer; |
80 | entry = rcu_dereference(nk->active); |
81 | if (entry) |
82 | ret = netkit_run(entry, skb, ret); |
83 | switch (ret) { |
84 | case NETKIT_NEXT: |
85 | case NETKIT_PASS: |
86 | skb->protocol = eth_type_trans(skb, dev: skb->dev); |
87 | skb_postpull_rcsum(skb, start: eth_hdr(skb), ETH_HLEN); |
88 | __netif_rx(skb); |
89 | break; |
90 | case NETKIT_REDIRECT: |
91 | skb_do_redirect(skb); |
92 | break; |
93 | case NETKIT_DROP: |
94 | default: |
95 | drop: |
96 | kfree_skb(skb); |
97 | dev_core_stats_tx_dropped_inc(dev); |
98 | ret_dev = NET_XMIT_DROP; |
99 | break; |
100 | } |
101 | rcu_read_unlock(); |
102 | return ret_dev; |
103 | } |
104 | |
105 | static int netkit_open(struct net_device *dev) |
106 | { |
107 | struct netkit *nk = netkit_priv(dev); |
108 | struct net_device *peer = rtnl_dereference(nk->peer); |
109 | |
110 | if (!peer) |
111 | return -ENOTCONN; |
112 | if (peer->flags & IFF_UP) { |
113 | netif_carrier_on(dev); |
114 | netif_carrier_on(dev: peer); |
115 | } |
116 | return 0; |
117 | } |
118 | |
119 | static int netkit_close(struct net_device *dev) |
120 | { |
121 | struct netkit *nk = netkit_priv(dev); |
122 | struct net_device *peer = rtnl_dereference(nk->peer); |
123 | |
124 | netif_carrier_off(dev); |
125 | if (peer) |
126 | netif_carrier_off(dev: peer); |
127 | return 0; |
128 | } |
129 | |
130 | static int netkit_get_iflink(const struct net_device *dev) |
131 | { |
132 | struct netkit *nk = netkit_priv(dev); |
133 | struct net_device *peer; |
134 | int iflink = 0; |
135 | |
136 | rcu_read_lock(); |
137 | peer = rcu_dereference(nk->peer); |
138 | if (peer) |
139 | iflink = peer->ifindex; |
140 | rcu_read_unlock(); |
141 | return iflink; |
142 | } |
143 | |
144 | static void netkit_set_multicast(struct net_device *dev) |
145 | { |
146 | /* Nothing to do, we receive whatever gets pushed to us! */ |
147 | } |
148 | |
149 | static void netkit_set_headroom(struct net_device *dev, int headroom) |
150 | { |
151 | struct netkit *nk = netkit_priv(dev), *nk2; |
152 | struct net_device *peer; |
153 | |
154 | if (headroom < 0) |
155 | headroom = NET_SKB_PAD; |
156 | |
157 | rcu_read_lock(); |
158 | peer = rcu_dereference(nk->peer); |
159 | if (unlikely(!peer)) |
160 | goto out; |
161 | |
162 | nk2 = netkit_priv(dev: peer); |
163 | nk->headroom = headroom; |
164 | headroom = max(nk->headroom, nk2->headroom); |
165 | |
166 | peer->needed_headroom = headroom; |
167 | dev->needed_headroom = headroom; |
168 | out: |
169 | rcu_read_unlock(); |
170 | } |
171 | |
172 | static struct net_device *netkit_peer_dev(struct net_device *dev) |
173 | { |
174 | return rcu_dereference(netkit_priv(dev)->peer); |
175 | } |
176 | |
177 | static void netkit_uninit(struct net_device *dev); |
178 | |
179 | static const struct net_device_ops netkit_netdev_ops = { |
180 | .ndo_open = netkit_open, |
181 | .ndo_stop = netkit_close, |
182 | .ndo_start_xmit = netkit_xmit, |
183 | .ndo_set_rx_mode = netkit_set_multicast, |
184 | .ndo_set_rx_headroom = netkit_set_headroom, |
185 | .ndo_get_iflink = netkit_get_iflink, |
186 | .ndo_get_peer_dev = netkit_peer_dev, |
187 | .ndo_uninit = netkit_uninit, |
188 | .ndo_features_check = passthru_features_check, |
189 | }; |
190 | |
191 | static void netkit_get_drvinfo(struct net_device *dev, |
192 | struct ethtool_drvinfo *info) |
193 | { |
194 | strscpy(p: info->driver, DRV_NAME, size: sizeof(info->driver)); |
195 | } |
196 | |
197 | static const struct ethtool_ops netkit_ethtool_ops = { |
198 | .get_drvinfo = netkit_get_drvinfo, |
199 | }; |
200 | |
201 | static void netkit_setup(struct net_device *dev) |
202 | { |
203 | static const netdev_features_t netkit_features_hw_vlan = |
204 | NETIF_F_HW_VLAN_CTAG_TX | |
205 | NETIF_F_HW_VLAN_CTAG_RX | |
206 | NETIF_F_HW_VLAN_STAG_TX | |
207 | NETIF_F_HW_VLAN_STAG_RX; |
208 | static const netdev_features_t netkit_features = |
209 | netkit_features_hw_vlan | |
210 | NETIF_F_SG | |
211 | NETIF_F_FRAGLIST | |
212 | NETIF_F_HW_CSUM | |
213 | NETIF_F_RXCSUM | |
214 | NETIF_F_SCTP_CRC | |
215 | NETIF_F_HIGHDMA | |
216 | NETIF_F_GSO_SOFTWARE | |
217 | NETIF_F_GSO_ENCAP_ALL; |
218 | |
219 | ether_setup(dev); |
220 | dev->max_mtu = ETH_MAX_MTU; |
221 | |
222 | dev->flags |= IFF_NOARP; |
223 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; |
224 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; |
225 | dev->priv_flags |= IFF_PHONY_HEADROOM; |
226 | dev->priv_flags |= IFF_NO_QUEUE; |
227 | |
228 | dev->ethtool_ops = &netkit_ethtool_ops; |
229 | dev->netdev_ops = &netkit_netdev_ops; |
230 | |
231 | dev->features |= netkit_features | NETIF_F_LLTX; |
232 | dev->hw_features = netkit_features; |
233 | dev->hw_enc_features = netkit_features; |
234 | dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; |
235 | dev->vlan_features = dev->features & ~netkit_features_hw_vlan; |
236 | |
237 | dev->needs_free_netdev = true; |
238 | |
239 | netif_set_tso_max_size(dev, GSO_MAX_SIZE); |
240 | } |
241 | |
242 | static struct net *netkit_get_link_net(const struct net_device *dev) |
243 | { |
244 | struct netkit *nk = netkit_priv(dev); |
245 | struct net_device *peer = rtnl_dereference(nk->peer); |
246 | |
247 | return peer ? dev_net(dev: peer) : dev_net(dev); |
248 | } |
249 | |
250 | static int netkit_check_policy(int policy, struct nlattr *tb, |
251 | struct netlink_ext_ack *extack) |
252 | { |
253 | switch (policy) { |
254 | case NETKIT_PASS: |
255 | case NETKIT_DROP: |
256 | return 0; |
257 | default: |
258 | NL_SET_ERR_MSG_ATTR(extack, tb, |
259 | "Provided default xmit policy not supported" ); |
260 | return -EINVAL; |
261 | } |
262 | } |
263 | |
264 | static int netkit_check_mode(int mode, struct nlattr *tb, |
265 | struct netlink_ext_ack *extack) |
266 | { |
267 | switch (mode) { |
268 | case NETKIT_L2: |
269 | case NETKIT_L3: |
270 | return 0; |
271 | default: |
272 | NL_SET_ERR_MSG_ATTR(extack, tb, |
273 | "Provided device mode can only be L2 or L3" ); |
274 | return -EINVAL; |
275 | } |
276 | } |
277 | |
278 | static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], |
279 | struct netlink_ext_ack *extack) |
280 | { |
281 | struct nlattr *attr = tb[IFLA_ADDRESS]; |
282 | |
283 | if (!attr) |
284 | return 0; |
285 | NL_SET_ERR_MSG_ATTR(extack, attr, |
286 | "Setting Ethernet address is not supported" ); |
287 | return -EOPNOTSUPP; |
288 | } |
289 | |
290 | static struct rtnl_link_ops netkit_link_ops; |
291 | |
292 | static int netkit_new_link(struct net *src_net, struct net_device *dev, |
293 | struct nlattr *tb[], struct nlattr *data[], |
294 | struct netlink_ext_ack *extack) |
295 | { |
296 | struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr; |
297 | enum netkit_action default_prim = NETKIT_PASS; |
298 | enum netkit_action default_peer = NETKIT_PASS; |
299 | enum netkit_mode mode = NETKIT_L3; |
300 | unsigned char ifname_assign_type; |
301 | struct ifinfomsg *ifmp = NULL; |
302 | struct net_device *peer; |
303 | char ifname[IFNAMSIZ]; |
304 | struct netkit *nk; |
305 | struct net *net; |
306 | int err; |
307 | |
308 | if (data) { |
309 | if (data[IFLA_NETKIT_MODE]) { |
310 | attr = data[IFLA_NETKIT_MODE]; |
311 | mode = nla_get_u32(nla: attr); |
312 | err = netkit_check_mode(mode, tb: attr, extack); |
313 | if (err < 0) |
314 | return err; |
315 | } |
316 | if (data[IFLA_NETKIT_PEER_INFO]) { |
317 | attr = data[IFLA_NETKIT_PEER_INFO]; |
318 | ifmp = nla_data(nla: attr); |
319 | err = rtnl_nla_parse_ifinfomsg(tb: peer_tb, nla_peer: attr, exterr: extack); |
320 | if (err < 0) |
321 | return err; |
322 | err = netkit_validate(tb: peer_tb, NULL, extack); |
323 | if (err < 0) |
324 | return err; |
325 | tbp = peer_tb; |
326 | } |
327 | if (data[IFLA_NETKIT_POLICY]) { |
328 | attr = data[IFLA_NETKIT_POLICY]; |
329 | default_prim = nla_get_u32(nla: attr); |
330 | err = netkit_check_policy(policy: default_prim, tb: attr, extack); |
331 | if (err < 0) |
332 | return err; |
333 | } |
334 | if (data[IFLA_NETKIT_PEER_POLICY]) { |
335 | attr = data[IFLA_NETKIT_PEER_POLICY]; |
336 | default_peer = nla_get_u32(nla: attr); |
337 | err = netkit_check_policy(policy: default_peer, tb: attr, extack); |
338 | if (err < 0) |
339 | return err; |
340 | } |
341 | } |
342 | |
343 | if (ifmp && tbp[IFLA_IFNAME]) { |
344 | nla_strscpy(dst: ifname, nla: tbp[IFLA_IFNAME], IFNAMSIZ); |
345 | ifname_assign_type = NET_NAME_USER; |
346 | } else { |
347 | strscpy(p: ifname, q: "nk%d" , IFNAMSIZ); |
348 | ifname_assign_type = NET_NAME_ENUM; |
349 | } |
350 | |
351 | net = rtnl_link_get_net(src_net, tb: tbp); |
352 | if (IS_ERR(ptr: net)) |
353 | return PTR_ERR(ptr: net); |
354 | |
355 | peer = rtnl_create_link(net, ifname, name_assign_type: ifname_assign_type, |
356 | ops: &netkit_link_ops, tb: tbp, extack); |
357 | if (IS_ERR(ptr: peer)) { |
358 | put_net(net); |
359 | return PTR_ERR(ptr: peer); |
360 | } |
361 | |
362 | netif_inherit_tso_max(to: peer, from: dev); |
363 | |
364 | if (mode == NETKIT_L2) |
365 | eth_hw_addr_random(dev: peer); |
366 | if (ifmp && dev->ifindex) |
367 | peer->ifindex = ifmp->ifi_index; |
368 | |
369 | nk = netkit_priv(dev: peer); |
370 | nk->primary = false; |
371 | nk->policy = default_peer; |
372 | nk->mode = mode; |
373 | bpf_mprog_bundle_init(bundle: &nk->bundle); |
374 | |
375 | err = register_netdevice(dev: peer); |
376 | put_net(net); |
377 | if (err < 0) |
378 | goto err_register_peer; |
379 | netif_carrier_off(dev: peer); |
380 | if (mode == NETKIT_L2) |
381 | dev_change_flags(dev: peer, flags: peer->flags & ~IFF_NOARP, NULL); |
382 | |
383 | err = rtnl_configure_link(dev: peer, NULL, portid: 0, NULL); |
384 | if (err < 0) |
385 | goto err_configure_peer; |
386 | |
387 | if (mode == NETKIT_L2) |
388 | eth_hw_addr_random(dev); |
389 | if (tb[IFLA_IFNAME]) |
390 | nla_strscpy(dst: dev->name, nla: tb[IFLA_IFNAME], IFNAMSIZ); |
391 | else |
392 | strscpy(p: dev->name, q: "nk%d" , IFNAMSIZ); |
393 | |
394 | nk = netkit_priv(dev); |
395 | nk->primary = true; |
396 | nk->policy = default_prim; |
397 | nk->mode = mode; |
398 | bpf_mprog_bundle_init(bundle: &nk->bundle); |
399 | |
400 | err = register_netdevice(dev); |
401 | if (err < 0) |
402 | goto err_configure_peer; |
403 | netif_carrier_off(dev); |
404 | if (mode == NETKIT_L2) |
405 | dev_change_flags(dev, flags: dev->flags & ~IFF_NOARP, NULL); |
406 | |
407 | rcu_assign_pointer(netkit_priv(dev)->peer, peer); |
408 | rcu_assign_pointer(netkit_priv(peer)->peer, dev); |
409 | return 0; |
410 | err_configure_peer: |
411 | unregister_netdevice(dev: peer); |
412 | return err; |
413 | err_register_peer: |
414 | free_netdev(dev: peer); |
415 | return err; |
416 | } |
417 | |
418 | static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev, |
419 | bool bundle_fallback) |
420 | { |
421 | struct netkit *nk = netkit_priv(dev); |
422 | struct bpf_mprog_entry *entry; |
423 | |
424 | ASSERT_RTNL(); |
425 | entry = rcu_dereference_rtnl(nk->active); |
426 | if (entry) |
427 | return entry; |
428 | if (bundle_fallback) |
429 | return &nk->bundle.a; |
430 | return NULL; |
431 | } |
432 | |
433 | static void netkit_entry_update(struct net_device *dev, |
434 | struct bpf_mprog_entry *entry) |
435 | { |
436 | struct netkit *nk = netkit_priv(dev); |
437 | |
438 | ASSERT_RTNL(); |
439 | rcu_assign_pointer(nk->active, entry); |
440 | } |
441 | |
442 | static void netkit_entry_sync(void) |
443 | { |
444 | synchronize_rcu(); |
445 | } |
446 | |
447 | static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which) |
448 | { |
449 | struct net_device *dev; |
450 | struct netkit *nk; |
451 | |
452 | ASSERT_RTNL(); |
453 | |
454 | switch (which) { |
455 | case BPF_NETKIT_PRIMARY: |
456 | case BPF_NETKIT_PEER: |
457 | break; |
458 | default: |
459 | return ERR_PTR(error: -EINVAL); |
460 | } |
461 | |
462 | dev = __dev_get_by_index(net, ifindex); |
463 | if (!dev) |
464 | return ERR_PTR(error: -ENODEV); |
465 | if (dev->netdev_ops != &netkit_netdev_ops) |
466 | return ERR_PTR(error: -ENXIO); |
467 | |
468 | nk = netkit_priv(dev); |
469 | if (!nk->primary) |
470 | return ERR_PTR(error: -EACCES); |
471 | if (which == BPF_NETKIT_PEER) { |
472 | dev = rcu_dereference_rtnl(nk->peer); |
473 | if (!dev) |
474 | return ERR_PTR(error: -ENODEV); |
475 | } |
476 | return dev; |
477 | } |
478 | |
479 | int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
480 | { |
481 | struct bpf_mprog_entry *entry, *entry_new; |
482 | struct bpf_prog *replace_prog = NULL; |
483 | struct net_device *dev; |
484 | int ret; |
485 | |
486 | rtnl_lock(); |
487 | dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex, |
488 | which: attr->attach_type); |
489 | if (IS_ERR(ptr: dev)) { |
490 | ret = PTR_ERR(ptr: dev); |
491 | goto out; |
492 | } |
493 | entry = netkit_entry_fetch(dev, bundle_fallback: true); |
494 | if (attr->attach_flags & BPF_F_REPLACE) { |
495 | replace_prog = bpf_prog_get_type(ufd: attr->replace_bpf_fd, |
496 | type: prog->type); |
497 | if (IS_ERR(ptr: replace_prog)) { |
498 | ret = PTR_ERR(ptr: replace_prog); |
499 | replace_prog = NULL; |
500 | goto out; |
501 | } |
502 | } |
503 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: prog, NULL, prog_old: replace_prog, |
504 | flags: attr->attach_flags, id_or_fd: attr->relative_fd, |
505 | revision: attr->expected_revision); |
506 | if (!ret) { |
507 | if (entry != entry_new) { |
508 | netkit_entry_update(dev, entry: entry_new); |
509 | netkit_entry_sync(); |
510 | } |
511 | bpf_mprog_commit(entry); |
512 | } |
513 | out: |
514 | if (replace_prog) |
515 | bpf_prog_put(prog: replace_prog); |
516 | rtnl_unlock(); |
517 | return ret; |
518 | } |
519 | |
520 | int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) |
521 | { |
522 | struct bpf_mprog_entry *entry, *entry_new; |
523 | struct net_device *dev; |
524 | int ret; |
525 | |
526 | rtnl_lock(); |
527 | dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex, |
528 | which: attr->attach_type); |
529 | if (IS_ERR(ptr: dev)) { |
530 | ret = PTR_ERR(ptr: dev); |
531 | goto out; |
532 | } |
533 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
534 | if (!entry) { |
535 | ret = -ENOENT; |
536 | goto out; |
537 | } |
538 | ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog, NULL, flags: attr->attach_flags, |
539 | id_or_fd: attr->relative_fd, revision: attr->expected_revision); |
540 | if (!ret) { |
541 | if (!bpf_mprog_total(entry: entry_new)) |
542 | entry_new = NULL; |
543 | netkit_entry_update(dev, entry: entry_new); |
544 | netkit_entry_sync(); |
545 | bpf_mprog_commit(entry); |
546 | } |
547 | out: |
548 | rtnl_unlock(); |
549 | return ret; |
550 | } |
551 | |
552 | int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) |
553 | { |
554 | struct net_device *dev; |
555 | int ret; |
556 | |
557 | rtnl_lock(); |
558 | dev = netkit_dev_fetch(current->nsproxy->net_ns, |
559 | ifindex: attr->query.target_ifindex, |
560 | which: attr->query.attach_type); |
561 | if (IS_ERR(ptr: dev)) { |
562 | ret = PTR_ERR(ptr: dev); |
563 | goto out; |
564 | } |
565 | ret = bpf_mprog_query(attr, uattr, entry: netkit_entry_fetch(dev, bundle_fallback: false)); |
566 | out: |
567 | rtnl_unlock(); |
568 | return ret; |
569 | } |
570 | |
571 | static struct netkit_link *netkit_link(const struct bpf_link *link) |
572 | { |
573 | return container_of(link, struct netkit_link, link); |
574 | } |
575 | |
576 | static int netkit_link_prog_attach(struct bpf_link *link, u32 flags, |
577 | u32 id_or_fd, u64 revision) |
578 | { |
579 | struct netkit_link *nkl = netkit_link(link); |
580 | struct bpf_mprog_entry *entry, *entry_new; |
581 | struct net_device *dev = nkl->dev; |
582 | int ret; |
583 | |
584 | ASSERT_RTNL(); |
585 | entry = netkit_entry_fetch(dev, bundle_fallback: true); |
586 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: link->prog, link, NULL, flags, |
587 | id_or_fd, revision); |
588 | if (!ret) { |
589 | if (entry != entry_new) { |
590 | netkit_entry_update(dev, entry: entry_new); |
591 | netkit_entry_sync(); |
592 | } |
593 | bpf_mprog_commit(entry); |
594 | } |
595 | return ret; |
596 | } |
597 | |
598 | static void netkit_link_release(struct bpf_link *link) |
599 | { |
600 | struct netkit_link *nkl = netkit_link(link); |
601 | struct bpf_mprog_entry *entry, *entry_new; |
602 | struct net_device *dev; |
603 | int ret = 0; |
604 | |
605 | rtnl_lock(); |
606 | dev = nkl->dev; |
607 | if (!dev) |
608 | goto out; |
609 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
610 | if (!entry) { |
611 | ret = -ENOENT; |
612 | goto out; |
613 | } |
614 | ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog: link->prog, link, flags: 0, id_or_fd: 0, revision: 0); |
615 | if (!ret) { |
616 | if (!bpf_mprog_total(entry: entry_new)) |
617 | entry_new = NULL; |
618 | netkit_entry_update(dev, entry: entry_new); |
619 | netkit_entry_sync(); |
620 | bpf_mprog_commit(entry); |
621 | nkl->dev = NULL; |
622 | } |
623 | out: |
624 | WARN_ON_ONCE(ret); |
625 | rtnl_unlock(); |
626 | } |
627 | |
628 | static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog, |
629 | struct bpf_prog *oprog) |
630 | { |
631 | struct netkit_link *nkl = netkit_link(link); |
632 | struct bpf_mprog_entry *entry, *entry_new; |
633 | struct net_device *dev; |
634 | int ret = 0; |
635 | |
636 | rtnl_lock(); |
637 | dev = nkl->dev; |
638 | if (!dev) { |
639 | ret = -ENOLINK; |
640 | goto out; |
641 | } |
642 | if (oprog && link->prog != oprog) { |
643 | ret = -EPERM; |
644 | goto out; |
645 | } |
646 | oprog = link->prog; |
647 | if (oprog == nprog) { |
648 | bpf_prog_put(prog: nprog); |
649 | goto out; |
650 | } |
651 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
652 | if (!entry) { |
653 | ret = -ENOENT; |
654 | goto out; |
655 | } |
656 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: nprog, link, prog_old: oprog, |
657 | BPF_F_REPLACE | BPF_F_ID, |
658 | id_or_fd: link->prog->aux->id, revision: 0); |
659 | if (!ret) { |
660 | WARN_ON_ONCE(entry != entry_new); |
661 | oprog = xchg(&link->prog, nprog); |
662 | bpf_prog_put(prog: oprog); |
663 | bpf_mprog_commit(entry); |
664 | } |
665 | out: |
666 | rtnl_unlock(); |
667 | return ret; |
668 | } |
669 | |
670 | static void netkit_link_dealloc(struct bpf_link *link) |
671 | { |
672 | kfree(objp: netkit_link(link)); |
673 | } |
674 | |
675 | static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq) |
676 | { |
677 | const struct netkit_link *nkl = netkit_link(link); |
678 | u32 ifindex = 0; |
679 | |
680 | rtnl_lock(); |
681 | if (nkl->dev) |
682 | ifindex = nkl->dev->ifindex; |
683 | rtnl_unlock(); |
684 | |
685 | seq_printf(m: seq, fmt: "ifindex:\t%u\n" , ifindex); |
686 | seq_printf(m: seq, fmt: "attach_type:\t%u (%s)\n" , |
687 | nkl->location, |
688 | nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer" ); |
689 | } |
690 | |
691 | static int netkit_link_fill_info(const struct bpf_link *link, |
692 | struct bpf_link_info *info) |
693 | { |
694 | const struct netkit_link *nkl = netkit_link(link); |
695 | u32 ifindex = 0; |
696 | |
697 | rtnl_lock(); |
698 | if (nkl->dev) |
699 | ifindex = nkl->dev->ifindex; |
700 | rtnl_unlock(); |
701 | |
702 | info->netkit.ifindex = ifindex; |
703 | info->netkit.attach_type = nkl->location; |
704 | return 0; |
705 | } |
706 | |
707 | static int netkit_link_detach(struct bpf_link *link) |
708 | { |
709 | netkit_link_release(link); |
710 | return 0; |
711 | } |
712 | |
713 | static const struct bpf_link_ops netkit_link_lops = { |
714 | .release = netkit_link_release, |
715 | .detach = netkit_link_detach, |
716 | .dealloc = netkit_link_dealloc, |
717 | .update_prog = netkit_link_update, |
718 | .show_fdinfo = netkit_link_fdinfo, |
719 | .fill_link_info = netkit_link_fill_info, |
720 | }; |
721 | |
722 | static int netkit_link_init(struct netkit_link *nkl, |
723 | struct bpf_link_primer *link_primer, |
724 | const union bpf_attr *attr, |
725 | struct net_device *dev, |
726 | struct bpf_prog *prog) |
727 | { |
728 | bpf_link_init(link: &nkl->link, type: BPF_LINK_TYPE_NETKIT, |
729 | ops: &netkit_link_lops, prog); |
730 | nkl->location = attr->link_create.attach_type; |
731 | nkl->dev = dev; |
732 | return bpf_link_prime(link: &nkl->link, primer: link_primer); |
733 | } |
734 | |
735 | int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
736 | { |
737 | struct bpf_link_primer link_primer; |
738 | struct netkit_link *nkl; |
739 | struct net_device *dev; |
740 | int ret; |
741 | |
742 | rtnl_lock(); |
743 | dev = netkit_dev_fetch(current->nsproxy->net_ns, |
744 | ifindex: attr->link_create.target_ifindex, |
745 | which: attr->link_create.attach_type); |
746 | if (IS_ERR(ptr: dev)) { |
747 | ret = PTR_ERR(ptr: dev); |
748 | goto out; |
749 | } |
750 | nkl = kzalloc(size: sizeof(*nkl), GFP_KERNEL_ACCOUNT); |
751 | if (!nkl) { |
752 | ret = -ENOMEM; |
753 | goto out; |
754 | } |
755 | ret = netkit_link_init(nkl, link_primer: &link_primer, attr, dev, prog); |
756 | if (ret) { |
757 | kfree(objp: nkl); |
758 | goto out; |
759 | } |
760 | ret = netkit_link_prog_attach(link: &nkl->link, |
761 | flags: attr->link_create.flags, |
762 | id_or_fd: attr->link_create.netkit.relative_fd, |
763 | revision: attr->link_create.netkit.expected_revision); |
764 | if (ret) { |
765 | nkl->dev = NULL; |
766 | bpf_link_cleanup(primer: &link_primer); |
767 | goto out; |
768 | } |
769 | ret = bpf_link_settle(primer: &link_primer); |
770 | out: |
771 | rtnl_unlock(); |
772 | return ret; |
773 | } |
774 | |
775 | static void netkit_release_all(struct net_device *dev) |
776 | { |
777 | struct bpf_mprog_entry *entry; |
778 | struct bpf_tuple tuple = {}; |
779 | struct bpf_mprog_fp *fp; |
780 | struct bpf_mprog_cp *cp; |
781 | |
782 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
783 | if (!entry) |
784 | return; |
785 | netkit_entry_update(dev, NULL); |
786 | netkit_entry_sync(); |
787 | bpf_mprog_foreach_tuple(entry, fp, cp, tuple) { |
788 | if (tuple.link) |
789 | netkit_link(link: tuple.link)->dev = NULL; |
790 | else |
791 | bpf_prog_put(prog: tuple.prog); |
792 | } |
793 | } |
794 | |
795 | static void netkit_uninit(struct net_device *dev) |
796 | { |
797 | netkit_release_all(dev); |
798 | } |
799 | |
800 | static void netkit_del_link(struct net_device *dev, struct list_head *head) |
801 | { |
802 | struct netkit *nk = netkit_priv(dev); |
803 | struct net_device *peer = rtnl_dereference(nk->peer); |
804 | |
805 | RCU_INIT_POINTER(nk->peer, NULL); |
806 | unregister_netdevice_queue(dev, head); |
807 | if (peer) { |
808 | nk = netkit_priv(dev: peer); |
809 | RCU_INIT_POINTER(nk->peer, NULL); |
810 | unregister_netdevice_queue(dev: peer, head); |
811 | } |
812 | } |
813 | |
814 | static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], |
815 | struct nlattr *data[], |
816 | struct netlink_ext_ack *extack) |
817 | { |
818 | struct netkit *nk = netkit_priv(dev); |
819 | struct net_device *peer = rtnl_dereference(nk->peer); |
820 | enum netkit_action policy; |
821 | struct nlattr *attr; |
822 | int err; |
823 | |
824 | if (!nk->primary) { |
825 | NL_SET_ERR_MSG(extack, |
826 | "netkit link settings can be changed only through the primary device" ); |
827 | return -EACCES; |
828 | } |
829 | |
830 | if (data[IFLA_NETKIT_MODE]) { |
831 | NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE], |
832 | "netkit link operating mode cannot be changed after device creation" ); |
833 | return -EACCES; |
834 | } |
835 | |
836 | if (data[IFLA_NETKIT_POLICY]) { |
837 | attr = data[IFLA_NETKIT_POLICY]; |
838 | policy = nla_get_u32(nla: attr); |
839 | err = netkit_check_policy(policy, tb: attr, extack); |
840 | if (err) |
841 | return err; |
842 | WRITE_ONCE(nk->policy, policy); |
843 | } |
844 | |
845 | if (data[IFLA_NETKIT_PEER_POLICY]) { |
846 | err = -EOPNOTSUPP; |
847 | attr = data[IFLA_NETKIT_PEER_POLICY]; |
848 | policy = nla_get_u32(nla: attr); |
849 | if (peer) |
850 | err = netkit_check_policy(policy, tb: attr, extack); |
851 | if (err) |
852 | return err; |
853 | nk = netkit_priv(dev: peer); |
854 | WRITE_ONCE(nk->policy, policy); |
855 | } |
856 | |
857 | return 0; |
858 | } |
859 | |
860 | static size_t netkit_get_size(const struct net_device *dev) |
861 | { |
862 | return nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_POLICY */ |
863 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */ |
864 | nla_total_size(payload: sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ |
865 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_MODE */ |
866 | 0; |
867 | } |
868 | |
869 | static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) |
870 | { |
871 | struct netkit *nk = netkit_priv(dev); |
872 | struct net_device *peer = rtnl_dereference(nk->peer); |
873 | |
874 | if (nla_put_u8(skb, attrtype: IFLA_NETKIT_PRIMARY, value: nk->primary)) |
875 | return -EMSGSIZE; |
876 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_POLICY, value: nk->policy)) |
877 | return -EMSGSIZE; |
878 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_MODE, value: nk->mode)) |
879 | return -EMSGSIZE; |
880 | |
881 | if (peer) { |
882 | nk = netkit_priv(dev: peer); |
883 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_PEER_POLICY, value: nk->policy)) |
884 | return -EMSGSIZE; |
885 | } |
886 | |
887 | return 0; |
888 | } |
889 | |
890 | static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { |
891 | [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) }, |
892 | [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, |
893 | [IFLA_NETKIT_MODE] = { .type = NLA_U32 }, |
894 | [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, |
895 | [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, |
896 | .reject_message = "Primary attribute is read-only" }, |
897 | }; |
898 | |
899 | static struct rtnl_link_ops netkit_link_ops = { |
900 | .kind = DRV_NAME, |
901 | .priv_size = sizeof(struct netkit), |
902 | .setup = netkit_setup, |
903 | .newlink = netkit_new_link, |
904 | .dellink = netkit_del_link, |
905 | .changelink = netkit_change_link, |
906 | .get_link_net = netkit_get_link_net, |
907 | .get_size = netkit_get_size, |
908 | .fill_info = netkit_fill_info, |
909 | .policy = netkit_policy, |
910 | .validate = netkit_validate, |
911 | .maxtype = IFLA_NETKIT_MAX, |
912 | }; |
913 | |
914 | static __init int netkit_init(void) |
915 | { |
916 | BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || |
917 | (int)NETKIT_PASS != (int)TCX_PASS || |
918 | (int)NETKIT_DROP != (int)TCX_DROP || |
919 | (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); |
920 | |
921 | return rtnl_link_register(ops: &netkit_link_ops); |
922 | } |
923 | |
924 | static __exit void netkit_exit(void) |
925 | { |
926 | rtnl_link_unregister(ops: &netkit_link_ops); |
927 | } |
928 | |
929 | module_init(netkit_init); |
930 | module_exit(netkit_exit); |
931 | |
932 | MODULE_DESCRIPTION("BPF-programmable network device" ); |
933 | MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>" ); |
934 | MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>" ); |
935 | MODULE_LICENSE("GPL" ); |
936 | MODULE_ALIAS_RTNL_LINK(DRV_NAME); |
937 | |