1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2023 Isovalent */ |
3 | |
4 | #include <linux/netdevice.h> |
5 | #include <linux/ethtool.h> |
6 | #include <linux/etherdevice.h> |
7 | #include <linux/filter.h> |
8 | #include <linux/netfilter_netdev.h> |
9 | #include <linux/bpf_mprog.h> |
10 | #include <linux/indirect_call_wrapper.h> |
11 | |
12 | #include <net/netkit.h> |
13 | #include <net/dst.h> |
14 | #include <net/tcx.h> |
15 | |
16 | #define DRV_NAME "netkit" |
17 | |
18 | struct netkit { |
19 | /* Needed in fast-path */ |
20 | struct net_device __rcu *peer; |
21 | struct bpf_mprog_entry __rcu *active; |
22 | enum netkit_action policy; |
23 | enum netkit_scrub scrub; |
24 | struct bpf_mprog_bundle bundle; |
25 | |
26 | /* Needed in slow-path */ |
27 | enum netkit_mode mode; |
28 | bool primary; |
29 | u32 headroom; |
30 | }; |
31 | |
32 | struct netkit_link { |
33 | struct bpf_link link; |
34 | struct net_device *dev; |
35 | u32 location; |
36 | }; |
37 | |
38 | static __always_inline int |
39 | netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, |
40 | enum netkit_action ret) |
41 | { |
42 | const struct bpf_mprog_fp *fp; |
43 | const struct bpf_prog *prog; |
44 | |
45 | bpf_mprog_foreach_prog(entry, fp, prog) { |
46 | bpf_compute_data_pointers(skb); |
47 | ret = bpf_prog_run(prog, ctx: skb); |
48 | if (ret != NETKIT_NEXT) |
49 | break; |
50 | } |
51 | return ret; |
52 | } |
53 | |
54 | static void netkit_xnet(struct sk_buff *skb) |
55 | { |
56 | skb->priority = 0; |
57 | skb->mark = 0; |
58 | } |
59 | |
60 | static void netkit_prep_forward(struct sk_buff *skb, |
61 | bool xnet, bool xnet_scrub) |
62 | { |
63 | skb_scrub_packet(skb, xnet: false); |
64 | nf_skip_egress(skb, skip: true); |
65 | skb_reset_mac_header(skb); |
66 | if (!xnet) |
67 | return; |
68 | skb_clear_tstamp(skb); |
69 | if (xnet_scrub) |
70 | netkit_xnet(skb); |
71 | } |
72 | |
73 | static struct netkit *netkit_priv(const struct net_device *dev) |
74 | { |
75 | return netdev_priv(dev); |
76 | } |
77 | |
78 | static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) |
79 | { |
80 | struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; |
81 | struct netkit *nk = netkit_priv(dev); |
82 | enum netkit_action ret = READ_ONCE(nk->policy); |
83 | netdev_tx_t ret_dev = NET_XMIT_SUCCESS; |
84 | const struct bpf_mprog_entry *entry; |
85 | struct net_device *peer; |
86 | int len = skb->len; |
87 | |
88 | bpf_net_ctx = bpf_net_ctx_set(bpf_net_ctx: &__bpf_net_ctx); |
89 | rcu_read_lock(); |
90 | peer = rcu_dereference(nk->peer); |
91 | if (unlikely(!peer || !(peer->flags & IFF_UP) || |
92 | !pskb_may_pull(skb, ETH_HLEN) || |
93 | skb_orphan_frags(skb, GFP_ATOMIC))) |
94 | goto drop; |
95 | netkit_prep_forward(skb, xnet: !net_eq(net1: dev_net(dev), net2: dev_net(dev: peer)), |
96 | xnet_scrub: nk->scrub); |
97 | eth_skb_pkt_type(skb, dev: peer); |
98 | skb->dev = peer; |
99 | entry = rcu_dereference(nk->active); |
100 | if (entry) |
101 | ret = netkit_run(entry, skb, ret); |
102 | switch (ret) { |
103 | case NETKIT_NEXT: |
104 | case NETKIT_PASS: |
105 | eth_skb_pull_mac(skb); |
106 | skb_postpull_rcsum(skb, start: eth_hdr(skb), ETH_HLEN); |
107 | if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { |
108 | dev_sw_netstats_tx_add(dev, packets: 1, len); |
109 | dev_sw_netstats_rx_add(dev: peer, len); |
110 | } else { |
111 | goto drop_stats; |
112 | } |
113 | break; |
114 | case NETKIT_REDIRECT: |
115 | dev_sw_netstats_tx_add(dev, packets: 1, len); |
116 | skb_do_redirect(skb); |
117 | break; |
118 | case NETKIT_DROP: |
119 | default: |
120 | drop: |
121 | kfree_skb(skb); |
122 | drop_stats: |
123 | dev_core_stats_tx_dropped_inc(dev); |
124 | ret_dev = NET_XMIT_DROP; |
125 | break; |
126 | } |
127 | rcu_read_unlock(); |
128 | bpf_net_ctx_clear(bpf_net_ctx); |
129 | return ret_dev; |
130 | } |
131 | |
132 | static int netkit_open(struct net_device *dev) |
133 | { |
134 | struct netkit *nk = netkit_priv(dev); |
135 | struct net_device *peer = rtnl_dereference(nk->peer); |
136 | |
137 | if (!peer) |
138 | return -ENOTCONN; |
139 | if (peer->flags & IFF_UP) { |
140 | netif_carrier_on(dev); |
141 | netif_carrier_on(dev: peer); |
142 | } |
143 | return 0; |
144 | } |
145 | |
146 | static int netkit_close(struct net_device *dev) |
147 | { |
148 | struct netkit *nk = netkit_priv(dev); |
149 | struct net_device *peer = rtnl_dereference(nk->peer); |
150 | |
151 | netif_carrier_off(dev); |
152 | if (peer) |
153 | netif_carrier_off(dev: peer); |
154 | return 0; |
155 | } |
156 | |
157 | static int netkit_get_iflink(const struct net_device *dev) |
158 | { |
159 | struct netkit *nk = netkit_priv(dev); |
160 | struct net_device *peer; |
161 | int iflink = 0; |
162 | |
163 | rcu_read_lock(); |
164 | peer = rcu_dereference(nk->peer); |
165 | if (peer) |
166 | iflink = READ_ONCE(peer->ifindex); |
167 | rcu_read_unlock(); |
168 | return iflink; |
169 | } |
170 | |
171 | static void netkit_set_multicast(struct net_device *dev) |
172 | { |
173 | /* Nothing to do, we receive whatever gets pushed to us! */ |
174 | } |
175 | |
176 | static int netkit_set_macaddr(struct net_device *dev, void *sa) |
177 | { |
178 | struct netkit *nk = netkit_priv(dev); |
179 | |
180 | if (nk->mode != NETKIT_L2) |
181 | return -EOPNOTSUPP; |
182 | |
183 | return eth_mac_addr(dev, p: sa); |
184 | } |
185 | |
186 | static void netkit_set_headroom(struct net_device *dev, int headroom) |
187 | { |
188 | struct netkit *nk = netkit_priv(dev), *nk2; |
189 | struct net_device *peer; |
190 | |
191 | if (headroom < 0) |
192 | headroom = NET_SKB_PAD; |
193 | |
194 | rcu_read_lock(); |
195 | peer = rcu_dereference(nk->peer); |
196 | if (unlikely(!peer)) |
197 | goto out; |
198 | |
199 | nk2 = netkit_priv(dev: peer); |
200 | nk->headroom = headroom; |
201 | headroom = max(nk->headroom, nk2->headroom); |
202 | |
203 | peer->needed_headroom = headroom; |
204 | dev->needed_headroom = headroom; |
205 | out: |
206 | rcu_read_unlock(); |
207 | } |
208 | |
209 | INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev) |
210 | { |
211 | return rcu_dereference(netkit_priv(dev)->peer); |
212 | } |
213 | |
214 | static void netkit_get_stats(struct net_device *dev, |
215 | struct rtnl_link_stats64 *stats) |
216 | { |
217 | dev_fetch_sw_netstats(s: stats, netstats: dev->tstats); |
218 | stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); |
219 | } |
220 | |
221 | static void netkit_uninit(struct net_device *dev); |
222 | |
223 | static const struct net_device_ops netkit_netdev_ops = { |
224 | .ndo_open = netkit_open, |
225 | .ndo_stop = netkit_close, |
226 | .ndo_start_xmit = netkit_xmit, |
227 | .ndo_set_rx_mode = netkit_set_multicast, |
228 | .ndo_set_rx_headroom = netkit_set_headroom, |
229 | .ndo_set_mac_address = netkit_set_macaddr, |
230 | .ndo_get_iflink = netkit_get_iflink, |
231 | .ndo_get_peer_dev = netkit_peer_dev, |
232 | .ndo_get_stats64 = netkit_get_stats, |
233 | .ndo_uninit = netkit_uninit, |
234 | .ndo_features_check = passthru_features_check, |
235 | }; |
236 | |
237 | static void netkit_get_drvinfo(struct net_device *dev, |
238 | struct ethtool_drvinfo *info) |
239 | { |
240 | strscpy(info->driver, DRV_NAME, sizeof(info->driver)); |
241 | } |
242 | |
243 | static const struct ethtool_ops netkit_ethtool_ops = { |
244 | .get_drvinfo = netkit_get_drvinfo, |
245 | }; |
246 | |
247 | static void netkit_setup(struct net_device *dev) |
248 | { |
249 | static const netdev_features_t netkit_features_hw_vlan = |
250 | NETIF_F_HW_VLAN_CTAG_TX | |
251 | NETIF_F_HW_VLAN_CTAG_RX | |
252 | NETIF_F_HW_VLAN_STAG_TX | |
253 | NETIF_F_HW_VLAN_STAG_RX; |
254 | static const netdev_features_t netkit_features = |
255 | netkit_features_hw_vlan | |
256 | NETIF_F_SG | |
257 | NETIF_F_FRAGLIST | |
258 | NETIF_F_HW_CSUM | |
259 | NETIF_F_RXCSUM | |
260 | NETIF_F_SCTP_CRC | |
261 | NETIF_F_HIGHDMA | |
262 | NETIF_F_GSO_SOFTWARE | |
263 | NETIF_F_GSO_ENCAP_ALL; |
264 | |
265 | ether_setup(dev); |
266 | dev->max_mtu = ETH_MAX_MTU; |
267 | dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; |
268 | |
269 | dev->flags |= IFF_NOARP; |
270 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; |
271 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; |
272 | dev->priv_flags |= IFF_PHONY_HEADROOM; |
273 | dev->priv_flags |= IFF_NO_QUEUE; |
274 | dev->priv_flags |= IFF_DISABLE_NETPOLL; |
275 | dev->lltx = true; |
276 | |
277 | dev->ethtool_ops = &netkit_ethtool_ops; |
278 | dev->netdev_ops = &netkit_netdev_ops; |
279 | |
280 | dev->features |= netkit_features; |
281 | dev->hw_features = netkit_features; |
282 | dev->hw_enc_features = netkit_features; |
283 | dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; |
284 | dev->vlan_features = dev->features & ~netkit_features_hw_vlan; |
285 | |
286 | dev->needs_free_netdev = true; |
287 | |
288 | netif_set_tso_max_size(dev, GSO_MAX_SIZE); |
289 | } |
290 | |
291 | static struct net *netkit_get_link_net(const struct net_device *dev) |
292 | { |
293 | struct netkit *nk = netkit_priv(dev); |
294 | struct net_device *peer = rtnl_dereference(nk->peer); |
295 | |
296 | return peer ? dev_net(dev: peer) : dev_net(dev); |
297 | } |
298 | |
299 | static int netkit_check_policy(int policy, struct nlattr *tb, |
300 | struct netlink_ext_ack *extack) |
301 | { |
302 | switch (policy) { |
303 | case NETKIT_PASS: |
304 | case NETKIT_DROP: |
305 | return 0; |
306 | default: |
307 | NL_SET_ERR_MSG_ATTR(extack, tb, |
308 | "Provided default xmit policy not supported" ); |
309 | return -EINVAL; |
310 | } |
311 | } |
312 | |
313 | static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], |
314 | struct netlink_ext_ack *extack) |
315 | { |
316 | struct nlattr *attr = tb[IFLA_ADDRESS]; |
317 | |
318 | if (!attr) |
319 | return 0; |
320 | if (nla_len(nla: attr) != ETH_ALEN) |
321 | return -EINVAL; |
322 | if (!is_valid_ether_addr(addr: nla_data(nla: attr))) |
323 | return -EADDRNOTAVAIL; |
324 | return 0; |
325 | } |
326 | |
327 | static struct rtnl_link_ops netkit_link_ops; |
328 | |
329 | static int netkit_new_link(struct net_device *dev, |
330 | struct rtnl_newlink_params *params, |
331 | struct netlink_ext_ack *extack) |
332 | { |
333 | struct net *peer_net = rtnl_newlink_peer_net(p: params); |
334 | enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; |
335 | enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; |
336 | struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr; |
337 | enum netkit_action policy_prim = NETKIT_PASS; |
338 | enum netkit_action policy_peer = NETKIT_PASS; |
339 | struct nlattr **data = params->data; |
340 | enum netkit_mode mode = NETKIT_L3; |
341 | unsigned char ifname_assign_type; |
342 | struct nlattr **tb = params->tb; |
343 | u16 headroom = 0, tailroom = 0; |
344 | struct ifinfomsg *ifmp = NULL; |
345 | struct net_device *peer; |
346 | char ifname[IFNAMSIZ]; |
347 | struct netkit *nk; |
348 | int err; |
349 | |
350 | tbp = tb; |
351 | if (data) { |
352 | if (data[IFLA_NETKIT_MODE]) |
353 | mode = nla_get_u32(nla: data[IFLA_NETKIT_MODE]); |
354 | if (data[IFLA_NETKIT_PEER_INFO]) { |
355 | attr = data[IFLA_NETKIT_PEER_INFO]; |
356 | ifmp = nla_data(nla: attr); |
357 | rtnl_nla_parse_ifinfomsg(tb: peer_tb, nla_peer: attr, exterr: extack); |
358 | tbp = peer_tb; |
359 | } |
360 | if (data[IFLA_NETKIT_SCRUB]) |
361 | scrub_prim = nla_get_u32(nla: data[IFLA_NETKIT_SCRUB]); |
362 | if (data[IFLA_NETKIT_PEER_SCRUB]) |
363 | scrub_peer = nla_get_u32(nla: data[IFLA_NETKIT_PEER_SCRUB]); |
364 | if (data[IFLA_NETKIT_POLICY]) { |
365 | attr = data[IFLA_NETKIT_POLICY]; |
366 | policy_prim = nla_get_u32(nla: attr); |
367 | err = netkit_check_policy(policy: policy_prim, tb: attr, extack); |
368 | if (err < 0) |
369 | return err; |
370 | } |
371 | if (data[IFLA_NETKIT_PEER_POLICY]) { |
372 | attr = data[IFLA_NETKIT_PEER_POLICY]; |
373 | policy_peer = nla_get_u32(nla: attr); |
374 | err = netkit_check_policy(policy: policy_peer, tb: attr, extack); |
375 | if (err < 0) |
376 | return err; |
377 | } |
378 | if (data[IFLA_NETKIT_HEADROOM]) |
379 | headroom = nla_get_u16(nla: data[IFLA_NETKIT_HEADROOM]); |
380 | if (data[IFLA_NETKIT_TAILROOM]) |
381 | tailroom = nla_get_u16(nla: data[IFLA_NETKIT_TAILROOM]); |
382 | } |
383 | |
384 | if (ifmp && tbp[IFLA_IFNAME]) { |
385 | nla_strscpy(dst: ifname, nla: tbp[IFLA_IFNAME], IFNAMSIZ); |
386 | ifname_assign_type = NET_NAME_USER; |
387 | } else { |
388 | strscpy(ifname, "nk%d" , IFNAMSIZ); |
389 | ifname_assign_type = NET_NAME_ENUM; |
390 | } |
391 | if (mode != NETKIT_L2 && |
392 | (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) |
393 | return -EOPNOTSUPP; |
394 | |
395 | peer = rtnl_create_link(net: peer_net, ifname, name_assign_type: ifname_assign_type, |
396 | ops: &netkit_link_ops, tb: tbp, extack); |
397 | if (IS_ERR(ptr: peer)) |
398 | return PTR_ERR(ptr: peer); |
399 | |
400 | netif_inherit_tso_max(to: peer, from: dev); |
401 | if (headroom) { |
402 | peer->needed_headroom = headroom; |
403 | dev->needed_headroom = headroom; |
404 | } |
405 | if (tailroom) { |
406 | peer->needed_tailroom = tailroom; |
407 | dev->needed_tailroom = tailroom; |
408 | } |
409 | |
410 | if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) |
411 | eth_hw_addr_random(dev: peer); |
412 | if (ifmp && dev->ifindex) |
413 | peer->ifindex = ifmp->ifi_index; |
414 | |
415 | nk = netkit_priv(dev: peer); |
416 | nk->primary = false; |
417 | nk->policy = policy_peer; |
418 | nk->scrub = scrub_peer; |
419 | nk->mode = mode; |
420 | nk->headroom = headroom; |
421 | bpf_mprog_bundle_init(bundle: &nk->bundle); |
422 | |
423 | err = register_netdevice(dev: peer); |
424 | if (err < 0) |
425 | goto err_register_peer; |
426 | netif_carrier_off(dev: peer); |
427 | if (mode == NETKIT_L2) |
428 | dev_change_flags(dev: peer, flags: peer->flags & ~IFF_NOARP, NULL); |
429 | |
430 | err = rtnl_configure_link(dev: peer, NULL, portid: 0, NULL); |
431 | if (err < 0) |
432 | goto err_configure_peer; |
433 | |
434 | if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) |
435 | eth_hw_addr_random(dev); |
436 | if (tb[IFLA_IFNAME]) |
437 | nla_strscpy(dst: dev->name, nla: tb[IFLA_IFNAME], IFNAMSIZ); |
438 | else |
439 | strscpy(dev->name, "nk%d" , IFNAMSIZ); |
440 | |
441 | nk = netkit_priv(dev); |
442 | nk->primary = true; |
443 | nk->policy = policy_prim; |
444 | nk->scrub = scrub_prim; |
445 | nk->mode = mode; |
446 | nk->headroom = headroom; |
447 | bpf_mprog_bundle_init(bundle: &nk->bundle); |
448 | |
449 | err = register_netdevice(dev); |
450 | if (err < 0) |
451 | goto err_configure_peer; |
452 | netif_carrier_off(dev); |
453 | if (mode == NETKIT_L2) |
454 | dev_change_flags(dev, flags: dev->flags & ~IFF_NOARP, NULL); |
455 | |
456 | rcu_assign_pointer(netkit_priv(dev)->peer, peer); |
457 | rcu_assign_pointer(netkit_priv(peer)->peer, dev); |
458 | return 0; |
459 | err_configure_peer: |
460 | unregister_netdevice(dev: peer); |
461 | return err; |
462 | err_register_peer: |
463 | free_netdev(dev: peer); |
464 | return err; |
465 | } |
466 | |
467 | static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev, |
468 | bool bundle_fallback) |
469 | { |
470 | struct netkit *nk = netkit_priv(dev); |
471 | struct bpf_mprog_entry *entry; |
472 | |
473 | ASSERT_RTNL(); |
474 | entry = rcu_dereference_rtnl(nk->active); |
475 | if (entry) |
476 | return entry; |
477 | if (bundle_fallback) |
478 | return &nk->bundle.a; |
479 | return NULL; |
480 | } |
481 | |
482 | static void netkit_entry_update(struct net_device *dev, |
483 | struct bpf_mprog_entry *entry) |
484 | { |
485 | struct netkit *nk = netkit_priv(dev); |
486 | |
487 | ASSERT_RTNL(); |
488 | rcu_assign_pointer(nk->active, entry); |
489 | } |
490 | |
491 | static void netkit_entry_sync(void) |
492 | { |
493 | synchronize_rcu(); |
494 | } |
495 | |
496 | static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which) |
497 | { |
498 | struct net_device *dev; |
499 | struct netkit *nk; |
500 | |
501 | ASSERT_RTNL(); |
502 | |
503 | switch (which) { |
504 | case BPF_NETKIT_PRIMARY: |
505 | case BPF_NETKIT_PEER: |
506 | break; |
507 | default: |
508 | return ERR_PTR(error: -EINVAL); |
509 | } |
510 | |
511 | dev = __dev_get_by_index(net, ifindex); |
512 | if (!dev) |
513 | return ERR_PTR(error: -ENODEV); |
514 | if (dev->netdev_ops != &netkit_netdev_ops) |
515 | return ERR_PTR(error: -ENXIO); |
516 | |
517 | nk = netkit_priv(dev); |
518 | if (!nk->primary) |
519 | return ERR_PTR(error: -EACCES); |
520 | if (which == BPF_NETKIT_PEER) { |
521 | dev = rcu_dereference_rtnl(nk->peer); |
522 | if (!dev) |
523 | return ERR_PTR(error: -ENODEV); |
524 | } |
525 | return dev; |
526 | } |
527 | |
528 | int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
529 | { |
530 | struct bpf_mprog_entry *entry, *entry_new; |
531 | struct bpf_prog *replace_prog = NULL; |
532 | struct net_device *dev; |
533 | int ret; |
534 | |
535 | rtnl_lock(); |
536 | dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex, |
537 | which: attr->attach_type); |
538 | if (IS_ERR(ptr: dev)) { |
539 | ret = PTR_ERR(ptr: dev); |
540 | goto out; |
541 | } |
542 | entry = netkit_entry_fetch(dev, bundle_fallback: true); |
543 | if (attr->attach_flags & BPF_F_REPLACE) { |
544 | replace_prog = bpf_prog_get_type(ufd: attr->replace_bpf_fd, |
545 | type: prog->type); |
546 | if (IS_ERR(ptr: replace_prog)) { |
547 | ret = PTR_ERR(ptr: replace_prog); |
548 | replace_prog = NULL; |
549 | goto out; |
550 | } |
551 | } |
552 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: prog, NULL, prog_old: replace_prog, |
553 | flags: attr->attach_flags, id_or_fd: attr->relative_fd, |
554 | revision: attr->expected_revision); |
555 | if (!ret) { |
556 | if (entry != entry_new) { |
557 | netkit_entry_update(dev, entry: entry_new); |
558 | netkit_entry_sync(); |
559 | } |
560 | bpf_mprog_commit(entry); |
561 | } |
562 | out: |
563 | if (replace_prog) |
564 | bpf_prog_put(prog: replace_prog); |
565 | rtnl_unlock(); |
566 | return ret; |
567 | } |
568 | |
569 | int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) |
570 | { |
571 | struct bpf_mprog_entry *entry, *entry_new; |
572 | struct net_device *dev; |
573 | int ret; |
574 | |
575 | rtnl_lock(); |
576 | dev = netkit_dev_fetch(current->nsproxy->net_ns, ifindex: attr->target_ifindex, |
577 | which: attr->attach_type); |
578 | if (IS_ERR(ptr: dev)) { |
579 | ret = PTR_ERR(ptr: dev); |
580 | goto out; |
581 | } |
582 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
583 | if (!entry) { |
584 | ret = -ENOENT; |
585 | goto out; |
586 | } |
587 | ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog, NULL, flags: attr->attach_flags, |
588 | id_or_fd: attr->relative_fd, revision: attr->expected_revision); |
589 | if (!ret) { |
590 | if (!bpf_mprog_total(entry: entry_new)) |
591 | entry_new = NULL; |
592 | netkit_entry_update(dev, entry: entry_new); |
593 | netkit_entry_sync(); |
594 | bpf_mprog_commit(entry); |
595 | } |
596 | out: |
597 | rtnl_unlock(); |
598 | return ret; |
599 | } |
600 | |
601 | int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) |
602 | { |
603 | struct net_device *dev; |
604 | int ret; |
605 | |
606 | rtnl_lock(); |
607 | dev = netkit_dev_fetch(current->nsproxy->net_ns, |
608 | ifindex: attr->query.target_ifindex, |
609 | which: attr->query.attach_type); |
610 | if (IS_ERR(ptr: dev)) { |
611 | ret = PTR_ERR(ptr: dev); |
612 | goto out; |
613 | } |
614 | ret = bpf_mprog_query(attr, uattr, entry: netkit_entry_fetch(dev, bundle_fallback: false)); |
615 | out: |
616 | rtnl_unlock(); |
617 | return ret; |
618 | } |
619 | |
620 | static struct netkit_link *netkit_link(const struct bpf_link *link) |
621 | { |
622 | return container_of(link, struct netkit_link, link); |
623 | } |
624 | |
625 | static int netkit_link_prog_attach(struct bpf_link *link, u32 flags, |
626 | u32 id_or_fd, u64 revision) |
627 | { |
628 | struct netkit_link *nkl = netkit_link(link); |
629 | struct bpf_mprog_entry *entry, *entry_new; |
630 | struct net_device *dev = nkl->dev; |
631 | int ret; |
632 | |
633 | ASSERT_RTNL(); |
634 | entry = netkit_entry_fetch(dev, bundle_fallback: true); |
635 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: link->prog, link, NULL, flags, |
636 | id_or_fd, revision); |
637 | if (!ret) { |
638 | if (entry != entry_new) { |
639 | netkit_entry_update(dev, entry: entry_new); |
640 | netkit_entry_sync(); |
641 | } |
642 | bpf_mprog_commit(entry); |
643 | } |
644 | return ret; |
645 | } |
646 | |
647 | static void netkit_link_release(struct bpf_link *link) |
648 | { |
649 | struct netkit_link *nkl = netkit_link(link); |
650 | struct bpf_mprog_entry *entry, *entry_new; |
651 | struct net_device *dev; |
652 | int ret = 0; |
653 | |
654 | rtnl_lock(); |
655 | dev = nkl->dev; |
656 | if (!dev) |
657 | goto out; |
658 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
659 | if (!entry) { |
660 | ret = -ENOENT; |
661 | goto out; |
662 | } |
663 | ret = bpf_mprog_detach(entry, entry_new: &entry_new, prog: link->prog, link, flags: 0, id_or_fd: 0, revision: 0); |
664 | if (!ret) { |
665 | if (!bpf_mprog_total(entry: entry_new)) |
666 | entry_new = NULL; |
667 | netkit_entry_update(dev, entry: entry_new); |
668 | netkit_entry_sync(); |
669 | bpf_mprog_commit(entry); |
670 | nkl->dev = NULL; |
671 | } |
672 | out: |
673 | WARN_ON_ONCE(ret); |
674 | rtnl_unlock(); |
675 | } |
676 | |
677 | static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog, |
678 | struct bpf_prog *oprog) |
679 | { |
680 | struct netkit_link *nkl = netkit_link(link); |
681 | struct bpf_mprog_entry *entry, *entry_new; |
682 | struct net_device *dev; |
683 | int ret = 0; |
684 | |
685 | rtnl_lock(); |
686 | dev = nkl->dev; |
687 | if (!dev) { |
688 | ret = -ENOLINK; |
689 | goto out; |
690 | } |
691 | if (oprog && link->prog != oprog) { |
692 | ret = -EPERM; |
693 | goto out; |
694 | } |
695 | oprog = link->prog; |
696 | if (oprog == nprog) { |
697 | bpf_prog_put(prog: nprog); |
698 | goto out; |
699 | } |
700 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
701 | if (!entry) { |
702 | ret = -ENOENT; |
703 | goto out; |
704 | } |
705 | ret = bpf_mprog_attach(entry, entry_new: &entry_new, prog_new: nprog, link, prog_old: oprog, |
706 | BPF_F_REPLACE | BPF_F_ID, |
707 | id_or_fd: link->prog->aux->id, revision: 0); |
708 | if (!ret) { |
709 | WARN_ON_ONCE(entry != entry_new); |
710 | oprog = xchg(&link->prog, nprog); |
711 | bpf_prog_put(prog: oprog); |
712 | bpf_mprog_commit(entry); |
713 | } |
714 | out: |
715 | rtnl_unlock(); |
716 | return ret; |
717 | } |
718 | |
719 | static void netkit_link_dealloc(struct bpf_link *link) |
720 | { |
721 | kfree(objp: netkit_link(link)); |
722 | } |
723 | |
724 | static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq) |
725 | { |
726 | const struct netkit_link *nkl = netkit_link(link); |
727 | u32 ifindex = 0; |
728 | |
729 | rtnl_lock(); |
730 | if (nkl->dev) |
731 | ifindex = nkl->dev->ifindex; |
732 | rtnl_unlock(); |
733 | |
734 | seq_printf(m: seq, fmt: "ifindex:\t%u\n" , ifindex); |
735 | seq_printf(m: seq, fmt: "attach_type:\t%u (%s)\n" , |
736 | nkl->location, |
737 | nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer" ); |
738 | } |
739 | |
740 | static int netkit_link_fill_info(const struct bpf_link *link, |
741 | struct bpf_link_info *info) |
742 | { |
743 | const struct netkit_link *nkl = netkit_link(link); |
744 | u32 ifindex = 0; |
745 | |
746 | rtnl_lock(); |
747 | if (nkl->dev) |
748 | ifindex = nkl->dev->ifindex; |
749 | rtnl_unlock(); |
750 | |
751 | info->netkit.ifindex = ifindex; |
752 | info->netkit.attach_type = nkl->location; |
753 | return 0; |
754 | } |
755 | |
756 | static int netkit_link_detach(struct bpf_link *link) |
757 | { |
758 | netkit_link_release(link); |
759 | return 0; |
760 | } |
761 | |
762 | static const struct bpf_link_ops netkit_link_lops = { |
763 | .release = netkit_link_release, |
764 | .detach = netkit_link_detach, |
765 | .dealloc = netkit_link_dealloc, |
766 | .update_prog = netkit_link_update, |
767 | .show_fdinfo = netkit_link_fdinfo, |
768 | .fill_link_info = netkit_link_fill_info, |
769 | }; |
770 | |
771 | static int netkit_link_init(struct netkit_link *nkl, |
772 | struct bpf_link_primer *link_primer, |
773 | const union bpf_attr *attr, |
774 | struct net_device *dev, |
775 | struct bpf_prog *prog) |
776 | { |
777 | bpf_link_init(link: &nkl->link, type: BPF_LINK_TYPE_NETKIT, |
778 | ops: &netkit_link_lops, prog); |
779 | nkl->location = attr->link_create.attach_type; |
780 | nkl->dev = dev; |
781 | return bpf_link_prime(link: &nkl->link, primer: link_primer); |
782 | } |
783 | |
784 | int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
785 | { |
786 | struct bpf_link_primer link_primer; |
787 | struct netkit_link *nkl; |
788 | struct net_device *dev; |
789 | int ret; |
790 | |
791 | rtnl_lock(); |
792 | dev = netkit_dev_fetch(current->nsproxy->net_ns, |
793 | ifindex: attr->link_create.target_ifindex, |
794 | which: attr->link_create.attach_type); |
795 | if (IS_ERR(ptr: dev)) { |
796 | ret = PTR_ERR(ptr: dev); |
797 | goto out; |
798 | } |
799 | nkl = kzalloc(sizeof(*nkl), GFP_KERNEL_ACCOUNT); |
800 | if (!nkl) { |
801 | ret = -ENOMEM; |
802 | goto out; |
803 | } |
804 | ret = netkit_link_init(nkl, link_primer: &link_primer, attr, dev, prog); |
805 | if (ret) { |
806 | kfree(objp: nkl); |
807 | goto out; |
808 | } |
809 | ret = netkit_link_prog_attach(link: &nkl->link, |
810 | flags: attr->link_create.flags, |
811 | id_or_fd: attr->link_create.netkit.relative_fd, |
812 | revision: attr->link_create.netkit.expected_revision); |
813 | if (ret) { |
814 | nkl->dev = NULL; |
815 | bpf_link_cleanup(primer: &link_primer); |
816 | goto out; |
817 | } |
818 | ret = bpf_link_settle(primer: &link_primer); |
819 | out: |
820 | rtnl_unlock(); |
821 | return ret; |
822 | } |
823 | |
824 | static void netkit_release_all(struct net_device *dev) |
825 | { |
826 | struct bpf_mprog_entry *entry; |
827 | struct bpf_tuple tuple = {}; |
828 | struct bpf_mprog_fp *fp; |
829 | struct bpf_mprog_cp *cp; |
830 | |
831 | entry = netkit_entry_fetch(dev, bundle_fallback: false); |
832 | if (!entry) |
833 | return; |
834 | netkit_entry_update(dev, NULL); |
835 | netkit_entry_sync(); |
836 | bpf_mprog_foreach_tuple(entry, fp, cp, tuple) { |
837 | if (tuple.link) |
838 | netkit_link(link: tuple.link)->dev = NULL; |
839 | else |
840 | bpf_prog_put(prog: tuple.prog); |
841 | } |
842 | } |
843 | |
844 | static void netkit_uninit(struct net_device *dev) |
845 | { |
846 | netkit_release_all(dev); |
847 | } |
848 | |
849 | static void netkit_del_link(struct net_device *dev, struct list_head *head) |
850 | { |
851 | struct netkit *nk = netkit_priv(dev); |
852 | struct net_device *peer = rtnl_dereference(nk->peer); |
853 | |
854 | RCU_INIT_POINTER(nk->peer, NULL); |
855 | unregister_netdevice_queue(dev, head); |
856 | if (peer) { |
857 | nk = netkit_priv(dev: peer); |
858 | RCU_INIT_POINTER(nk->peer, NULL); |
859 | unregister_netdevice_queue(dev: peer, head); |
860 | } |
861 | } |
862 | |
863 | static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], |
864 | struct nlattr *data[], |
865 | struct netlink_ext_ack *extack) |
866 | { |
867 | struct netkit *nk = netkit_priv(dev); |
868 | struct net_device *peer = rtnl_dereference(nk->peer); |
869 | enum netkit_action policy; |
870 | struct nlattr *attr; |
871 | int err, i; |
872 | static const struct { |
873 | u32 attr; |
874 | char *name; |
875 | } fixed_params[] = { |
876 | { IFLA_NETKIT_MODE, "operating mode" }, |
877 | { IFLA_NETKIT_SCRUB, "scrubbing" }, |
878 | { IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" }, |
879 | { IFLA_NETKIT_PEER_INFO, "peer info" }, |
880 | { IFLA_NETKIT_HEADROOM, "headroom" }, |
881 | { IFLA_NETKIT_TAILROOM, "tailroom" }, |
882 | }; |
883 | |
884 | if (!nk->primary) { |
885 | NL_SET_ERR_MSG(extack, |
886 | "netkit link settings can be changed only through the primary device" ); |
887 | return -EACCES; |
888 | } |
889 | |
890 | for (i = 0; i < ARRAY_SIZE(fixed_params); i++) { |
891 | attr = data[fixed_params[i].attr]; |
892 | if (attr) { |
893 | NL_SET_ERR_MSG_ATTR_FMT(extack, attr, |
894 | "netkit link %s cannot be changed after device creation" , |
895 | fixed_params[i].name); |
896 | return -EACCES; |
897 | } |
898 | } |
899 | |
900 | if (data[IFLA_NETKIT_POLICY]) { |
901 | attr = data[IFLA_NETKIT_POLICY]; |
902 | policy = nla_get_u32(nla: attr); |
903 | err = netkit_check_policy(policy, tb: attr, extack); |
904 | if (err) |
905 | return err; |
906 | WRITE_ONCE(nk->policy, policy); |
907 | } |
908 | |
909 | if (data[IFLA_NETKIT_PEER_POLICY]) { |
910 | err = -EOPNOTSUPP; |
911 | attr = data[IFLA_NETKIT_PEER_POLICY]; |
912 | policy = nla_get_u32(nla: attr); |
913 | if (peer) |
914 | err = netkit_check_policy(policy, tb: attr, extack); |
915 | if (err) |
916 | return err; |
917 | nk = netkit_priv(dev: peer); |
918 | WRITE_ONCE(nk->policy, policy); |
919 | } |
920 | |
921 | return 0; |
922 | } |
923 | |
924 | static size_t netkit_get_size(const struct net_device *dev) |
925 | { |
926 | return nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_POLICY */ |
927 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */ |
928 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_SCRUB */ |
929 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ |
930 | nla_total_size(payload: sizeof(u32)) + /* IFLA_NETKIT_MODE */ |
931 | nla_total_size(payload: sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ |
932 | nla_total_size(payload: sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ |
933 | nla_total_size(payload: sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ |
934 | 0; |
935 | } |
936 | |
937 | static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) |
938 | { |
939 | struct netkit *nk = netkit_priv(dev); |
940 | struct net_device *peer = rtnl_dereference(nk->peer); |
941 | |
942 | if (nla_put_u8(skb, attrtype: IFLA_NETKIT_PRIMARY, value: nk->primary)) |
943 | return -EMSGSIZE; |
944 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_POLICY, value: nk->policy)) |
945 | return -EMSGSIZE; |
946 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_MODE, value: nk->mode)) |
947 | return -EMSGSIZE; |
948 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_SCRUB, value: nk->scrub)) |
949 | return -EMSGSIZE; |
950 | if (nla_put_u16(skb, attrtype: IFLA_NETKIT_HEADROOM, value: dev->needed_headroom)) |
951 | return -EMSGSIZE; |
952 | if (nla_put_u16(skb, attrtype: IFLA_NETKIT_TAILROOM, value: dev->needed_tailroom)) |
953 | return -EMSGSIZE; |
954 | |
955 | if (peer) { |
956 | nk = netkit_priv(dev: peer); |
957 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_PEER_POLICY, value: nk->policy)) |
958 | return -EMSGSIZE; |
959 | if (nla_put_u32(skb, attrtype: IFLA_NETKIT_PEER_SCRUB, value: nk->scrub)) |
960 | return -EMSGSIZE; |
961 | } |
962 | |
963 | return 0; |
964 | } |
965 | |
966 | static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { |
967 | [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) }, |
968 | [IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3), |
969 | [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, |
970 | [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, |
971 | [IFLA_NETKIT_HEADROOM] = { .type = NLA_U16 }, |
972 | [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, |
973 | [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), |
974 | [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), |
975 | [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, |
976 | .reject_message = "Primary attribute is read-only" }, |
977 | }; |
978 | |
979 | static struct rtnl_link_ops netkit_link_ops = { |
980 | .kind = DRV_NAME, |
981 | .priv_size = sizeof(struct netkit), |
982 | .setup = netkit_setup, |
983 | .newlink = netkit_new_link, |
984 | .dellink = netkit_del_link, |
985 | .changelink = netkit_change_link, |
986 | .get_link_net = netkit_get_link_net, |
987 | .get_size = netkit_get_size, |
988 | .fill_info = netkit_fill_info, |
989 | .policy = netkit_policy, |
990 | .validate = netkit_validate, |
991 | .peer_type = IFLA_NETKIT_PEER_INFO, |
992 | .maxtype = IFLA_NETKIT_MAX, |
993 | }; |
994 | |
995 | static __init int netkit_init(void) |
996 | { |
997 | BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || |
998 | (int)NETKIT_PASS != (int)TCX_PASS || |
999 | (int)NETKIT_DROP != (int)TCX_DROP || |
1000 | (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); |
1001 | |
1002 | return rtnl_link_register(ops: &netkit_link_ops); |
1003 | } |
1004 | |
1005 | static __exit void netkit_exit(void) |
1006 | { |
1007 | rtnl_link_unregister(ops: &netkit_link_ops); |
1008 | } |
1009 | |
1010 | module_init(netkit_init); |
1011 | module_exit(netkit_exit); |
1012 | |
1013 | MODULE_DESCRIPTION("BPF-programmable network device" ); |
1014 | MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>" ); |
1015 | MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>" ); |
1016 | MODULE_LICENSE("GPL" ); |
1017 | MODULE_ALIAS_RTNL_LINK(DRV_NAME); |
1018 | |