1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Operations on the network namespace |
4 | */ |
5 | #ifndef __NET_NET_NAMESPACE_H |
6 | #define __NET_NET_NAMESPACE_H |
7 | |
8 | #include <linux/atomic.h> |
9 | #include <linux/refcount.h> |
10 | #include <linux/workqueue.h> |
11 | #include <linux/list.h> |
12 | #include <linux/sysctl.h> |
13 | #include <linux/uidgid.h> |
14 | |
15 | #include <net/flow.h> |
16 | #include <net/netns/core.h> |
17 | #include <net/netns/mib.h> |
18 | #include <net/netns/unix.h> |
19 | #include <net/netns/packet.h> |
20 | #include <net/netns/ipv4.h> |
21 | #include <net/netns/ipv6.h> |
22 | #include <net/netns/nexthop.h> |
23 | #include <net/netns/ieee802154_6lowpan.h> |
24 | #include <net/netns/sctp.h> |
25 | #include <net/netns/netfilter.h> |
26 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
27 | #include <net/netns/conntrack.h> |
28 | #endif |
29 | #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
30 | #include <net/netns/flow_table.h> |
31 | #endif |
32 | #include <net/netns/nftables.h> |
33 | #include <net/netns/xfrm.h> |
34 | #include <net/netns/mpls.h> |
35 | #include <net/netns/can.h> |
36 | #include <net/netns/xdp.h> |
37 | #include <net/netns/smc.h> |
38 | #include <net/netns/bpf.h> |
39 | #include <net/netns/mctp.h> |
40 | #include <net/net_trackers.h> |
41 | #include <linux/ns_common.h> |
42 | #include <linux/idr.h> |
43 | #include <linux/skbuff.h> |
44 | #include <linux/notifier.h> |
45 | #include <linux/xarray.h> |
46 | |
47 | struct user_namespace; |
48 | struct proc_dir_entry; |
49 | struct net_device; |
50 | struct sock; |
51 | struct ; |
52 | struct net_generic; |
53 | struct uevent_sock; |
54 | struct netns_ipvs; |
55 | struct bpf_prog; |
56 | |
57 | |
58 | #define NETDEV_HASHBITS 8 |
59 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) |
60 | |
61 | struct net { |
62 | /* First cache line can be often dirtied. |
63 | * Do not place here read-mostly fields. |
64 | */ |
65 | refcount_t passive; /* To decide when the network |
66 | * namespace should be freed. |
67 | */ |
68 | spinlock_t rules_mod_lock; |
69 | |
70 | unsigned int dev_base_seq; /* protected by rtnl_mutex */ |
71 | u32 ifindex; |
72 | |
73 | spinlock_t nsid_lock; |
74 | atomic_t fnhe_genid; |
75 | |
76 | struct list_head list; /* list of network namespaces */ |
77 | struct list_head exit_list; /* To linked to call pernet exit |
78 | * methods on dead net ( |
79 | * pernet_ops_rwsem read locked), |
80 | * or to unregister pernet ops |
81 | * (pernet_ops_rwsem write locked). |
82 | */ |
83 | struct llist_node cleanup_list; /* namespaces on death row */ |
84 | |
85 | #ifdef CONFIG_KEYS |
86 | struct key_tag *key_domain; /* Key domain of operation tag */ |
87 | #endif |
88 | struct user_namespace *user_ns; /* Owning user namespace */ |
89 | struct ucounts *ucounts; |
90 | struct idr netns_ids; |
91 | |
92 | struct ns_common ns; |
93 | struct ref_tracker_dir refcnt_tracker; |
94 | struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not |
95 | * refcounted against netns |
96 | */ |
97 | struct list_head dev_base_head; |
98 | struct proc_dir_entry *proc_net; |
99 | struct proc_dir_entry *proc_net_stat; |
100 | |
101 | #ifdef CONFIG_SYSCTL |
102 | struct ctl_table_set sysctls; |
103 | #endif |
104 | |
105 | struct sock *rtnl; /* rtnetlink socket */ |
106 | struct sock *genl_sock; |
107 | |
108 | struct uevent_sock *uevent_sock; /* uevent socket */ |
109 | |
110 | struct hlist_head *dev_name_head; |
111 | struct hlist_head *dev_index_head; |
112 | struct xarray dev_by_index; |
113 | struct raw_notifier_head netdev_chain; |
114 | |
115 | /* Note that @hash_mix can be read millions times per second, |
116 | * it is critical that it is on a read_mostly cache line. |
117 | */ |
118 | u32 hash_mix; |
119 | |
120 | struct net_device *loopback_dev; /* The loopback */ |
121 | |
122 | /* core fib_rules */ |
123 | struct list_head rules_ops; |
124 | |
125 | struct netns_core core; |
126 | struct netns_mib mib; |
127 | struct netns_packet packet; |
128 | #if IS_ENABLED(CONFIG_UNIX) |
129 | struct netns_unix unx; |
130 | #endif |
131 | struct netns_nexthop nexthop; |
132 | struct netns_ipv4 ipv4; |
133 | #if IS_ENABLED(CONFIG_IPV6) |
134 | struct netns_ipv6 ipv6; |
135 | #endif |
136 | #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) |
137 | struct netns_ieee802154_lowpan ieee802154_lowpan; |
138 | #endif |
139 | #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) |
140 | struct netns_sctp sctp; |
141 | #endif |
142 | #ifdef CONFIG_NETFILTER |
143 | struct netns_nf nf; |
144 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
145 | struct netns_ct ct; |
146 | #endif |
147 | #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) |
148 | struct netns_nftables nft; |
149 | #endif |
150 | #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
151 | struct netns_ft ft; |
152 | #endif |
153 | #endif |
154 | #ifdef CONFIG_WEXT_CORE |
155 | struct sk_buff_head wext_nlevents; |
156 | #endif |
157 | struct net_generic __rcu *gen; |
158 | |
159 | /* Used to store attached BPF programs */ |
160 | struct netns_bpf bpf; |
161 | |
162 | /* Note : following structs are cache line aligned */ |
163 | #ifdef CONFIG_XFRM |
164 | struct netns_xfrm xfrm; |
165 | #endif |
166 | |
167 | u64 net_cookie; /* written once */ |
168 | |
169 | #if IS_ENABLED(CONFIG_IP_VS) |
170 | struct netns_ipvs *ipvs; |
171 | #endif |
172 | #if IS_ENABLED(CONFIG_MPLS) |
173 | struct netns_mpls mpls; |
174 | #endif |
175 | #if IS_ENABLED(CONFIG_CAN) |
176 | struct netns_can can; |
177 | #endif |
178 | #ifdef CONFIG_XDP_SOCKETS |
179 | struct netns_xdp xdp; |
180 | #endif |
181 | #if IS_ENABLED(CONFIG_MCTP) |
182 | struct netns_mctp mctp; |
183 | #endif |
184 | #if IS_ENABLED(CONFIG_CRYPTO_USER) |
185 | struct sock *crypto_nlsk; |
186 | #endif |
187 | struct sock *diag_nlsk; |
188 | #if IS_ENABLED(CONFIG_SMC) |
189 | struct netns_smc smc; |
190 | #endif |
191 | } __randomize_layout; |
192 | |
193 | #include <linux/seq_file_net.h> |
194 | |
195 | /* Init's network namespace */ |
196 | extern struct net init_net; |
197 | |
198 | #ifdef CONFIG_NET_NS |
199 | struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, |
200 | struct net *old_net); |
201 | |
202 | void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); |
203 | |
204 | void net_ns_barrier(void); |
205 | |
206 | struct ns_common *get_net_ns(struct ns_common *ns); |
207 | struct net *get_net_ns_by_fd(int fd); |
208 | #else /* CONFIG_NET_NS */ |
209 | #include <linux/sched.h> |
210 | #include <linux/nsproxy.h> |
211 | static inline struct net *copy_net_ns(unsigned long flags, |
212 | struct user_namespace *user_ns, struct net *old_net) |
213 | { |
214 | if (flags & CLONE_NEWNET) |
215 | return ERR_PTR(-EINVAL); |
216 | return old_net; |
217 | } |
218 | |
219 | static inline void net_ns_get_ownership(const struct net *net, |
220 | kuid_t *uid, kgid_t *gid) |
221 | { |
222 | *uid = GLOBAL_ROOT_UID; |
223 | *gid = GLOBAL_ROOT_GID; |
224 | } |
225 | |
226 | static inline void net_ns_barrier(void) {} |
227 | |
228 | static inline struct ns_common *get_net_ns(struct ns_common *ns) |
229 | { |
230 | return ERR_PTR(-EINVAL); |
231 | } |
232 | |
233 | static inline struct net *get_net_ns_by_fd(int fd) |
234 | { |
235 | return ERR_PTR(-EINVAL); |
236 | } |
237 | #endif /* CONFIG_NET_NS */ |
238 | |
239 | |
240 | extern struct list_head net_namespace_list; |
241 | |
242 | struct net *get_net_ns_by_pid(pid_t pid); |
243 | |
244 | #ifdef CONFIG_SYSCTL |
245 | void ipx_register_sysctl(void); |
246 | void ipx_unregister_sysctl(void); |
247 | #else |
248 | #define ipx_register_sysctl() |
249 | #define ipx_unregister_sysctl() |
250 | #endif |
251 | |
252 | #ifdef CONFIG_NET_NS |
253 | void __put_net(struct net *net); |
254 | |
255 | /* Try using get_net_track() instead */ |
256 | static inline struct net *get_net(struct net *net) |
257 | { |
258 | refcount_inc(r: &net->ns.count); |
259 | return net; |
260 | } |
261 | |
262 | static inline struct net *maybe_get_net(struct net *net) |
263 | { |
264 | /* Used when we know struct net exists but we |
265 | * aren't guaranteed a previous reference count |
266 | * exists. If the reference count is zero this |
267 | * function fails and returns NULL. |
268 | */ |
269 | if (!refcount_inc_not_zero(r: &net->ns.count)) |
270 | net = NULL; |
271 | return net; |
272 | } |
273 | |
274 | /* Try using put_net_track() instead */ |
275 | static inline void put_net(struct net *net) |
276 | { |
277 | if (refcount_dec_and_test(r: &net->ns.count)) |
278 | __put_net(net); |
279 | } |
280 | |
281 | static inline |
282 | int net_eq(const struct net *net1, const struct net *net2) |
283 | { |
284 | return net1 == net2; |
285 | } |
286 | |
287 | static inline int check_net(const struct net *net) |
288 | { |
289 | return refcount_read(r: &net->ns.count) != 0; |
290 | } |
291 | |
292 | void net_drop_ns(void *); |
293 | |
294 | #else |
295 | |
296 | static inline struct net *get_net(struct net *net) |
297 | { |
298 | return net; |
299 | } |
300 | |
301 | static inline void put_net(struct net *net) |
302 | { |
303 | } |
304 | |
305 | static inline struct net *maybe_get_net(struct net *net) |
306 | { |
307 | return net; |
308 | } |
309 | |
310 | static inline |
311 | int net_eq(const struct net *net1, const struct net *net2) |
312 | { |
313 | return 1; |
314 | } |
315 | |
316 | static inline int check_net(const struct net *net) |
317 | { |
318 | return 1; |
319 | } |
320 | |
321 | #define net_drop_ns NULL |
322 | #endif |
323 | |
324 | |
325 | static inline void __netns_tracker_alloc(struct net *net, |
326 | netns_tracker *tracker, |
327 | bool refcounted, |
328 | gfp_t gfp) |
329 | { |
330 | #ifdef CONFIG_NET_NS_REFCNT_TRACKER |
331 | ref_tracker_alloc(dir: refcounted ? &net->refcnt_tracker : |
332 | &net->notrefcnt_tracker, |
333 | trackerp: tracker, gfp); |
334 | #endif |
335 | } |
336 | |
337 | static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, |
338 | gfp_t gfp) |
339 | { |
340 | __netns_tracker_alloc(net, tracker, refcounted: true, gfp); |
341 | } |
342 | |
343 | static inline void __netns_tracker_free(struct net *net, |
344 | netns_tracker *tracker, |
345 | bool refcounted) |
346 | { |
347 | #ifdef CONFIG_NET_NS_REFCNT_TRACKER |
348 | ref_tracker_free(dir: refcounted ? &net->refcnt_tracker : |
349 | &net->notrefcnt_tracker, trackerp: tracker); |
350 | #endif |
351 | } |
352 | |
353 | static inline struct net *get_net_track(struct net *net, |
354 | netns_tracker *tracker, gfp_t gfp) |
355 | { |
356 | get_net(net); |
357 | netns_tracker_alloc(net, tracker, gfp); |
358 | return net; |
359 | } |
360 | |
361 | static inline void put_net_track(struct net *net, netns_tracker *tracker) |
362 | { |
363 | __netns_tracker_free(net, tracker, refcounted: true); |
364 | put_net(net); |
365 | } |
366 | |
367 | typedef struct { |
368 | #ifdef CONFIG_NET_NS |
369 | struct net __rcu *net; |
370 | #endif |
371 | } possible_net_t; |
372 | |
373 | static inline void write_pnet(possible_net_t *pnet, struct net *net) |
374 | { |
375 | #ifdef CONFIG_NET_NS |
376 | rcu_assign_pointer(pnet->net, net); |
377 | #endif |
378 | } |
379 | |
380 | static inline struct net *read_pnet(const possible_net_t *pnet) |
381 | { |
382 | #ifdef CONFIG_NET_NS |
383 | return rcu_dereference_protected(pnet->net, true); |
384 | #else |
385 | return &init_net; |
386 | #endif |
387 | } |
388 | |
389 | static inline struct net *read_pnet_rcu(possible_net_t *pnet) |
390 | { |
391 | #ifdef CONFIG_NET_NS |
392 | return rcu_dereference(pnet->net); |
393 | #else |
394 | return &init_net; |
395 | #endif |
396 | } |
397 | |
398 | /* Protected by net_rwsem */ |
399 | #define for_each_net(VAR) \ |
400 | list_for_each_entry(VAR, &net_namespace_list, list) |
401 | #define for_each_net_continue_reverse(VAR) \ |
402 | list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) |
403 | #define for_each_net_rcu(VAR) \ |
404 | list_for_each_entry_rcu(VAR, &net_namespace_list, list) |
405 | |
406 | #ifdef CONFIG_NET_NS |
407 | #define __net_init |
408 | #define __net_exit |
409 | #define __net_initdata |
410 | #define __net_initconst |
411 | #else |
412 | #define __net_init __init |
413 | #define __net_exit __ref |
414 | #define __net_initdata __initdata |
415 | #define __net_initconst __initconst |
416 | #endif |
417 | |
418 | int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); |
419 | int peernet2id(const struct net *net, struct net *peer); |
420 | bool peernet_has_id(const struct net *net, struct net *peer); |
421 | struct net *get_net_ns_by_id(const struct net *net, int id); |
422 | |
423 | struct pernet_operations { |
424 | struct list_head list; |
425 | /* |
426 | * Below methods are called without any exclusive locks. |
427 | * More than one net may be constructed and destructed |
428 | * in parallel on several cpus. Every pernet_operations |
429 | * have to keep in mind all other pernet_operations and |
430 | * to introduce a locking, if they share common resources. |
431 | * |
432 | * The only time they are called with exclusive lock is |
433 | * from register_pernet_subsys(), unregister_pernet_subsys() |
434 | * register_pernet_device() and unregister_pernet_device(). |
435 | * |
436 | * Exit methods using blocking RCU primitives, such as |
437 | * synchronize_rcu(), should be implemented via exit_batch. |
438 | * Then, destruction of a group of net requires single |
439 | * synchronize_rcu() related to these pernet_operations, |
440 | * instead of separate synchronize_rcu() for every net. |
441 | * Please, avoid synchronize_rcu() at all, where it's possible. |
442 | * |
443 | * Note that a combination of pre_exit() and exit() can |
444 | * be used, since a synchronize_rcu() is guaranteed between |
445 | * the calls. |
446 | */ |
447 | int (*init)(struct net *net); |
448 | void (*pre_exit)(struct net *net); |
449 | void (*exit)(struct net *net); |
450 | void (*exit_batch)(struct list_head *net_exit_list); |
451 | /* Following method is called with RTNL held. */ |
452 | void (*exit_batch_rtnl)(struct list_head *net_exit_list, |
453 | struct list_head *dev_kill_list); |
454 | unsigned int *id; |
455 | size_t size; |
456 | }; |
457 | |
458 | /* |
459 | * Use these carefully. If you implement a network device and it |
460 | * needs per network namespace operations use device pernet operations, |
461 | * otherwise use pernet subsys operations. |
462 | * |
463 | * Network interfaces need to be removed from a dying netns _before_ |
464 | * subsys notifiers can be called, as most of the network code cleanup |
465 | * (which is done from subsys notifiers) runs with the assumption that |
466 | * dev_remove_pack has been called so no new packets will arrive during |
467 | * and after the cleanup functions have been called. dev_remove_pack |
468 | * is not per namespace so instead the guarantee of no more packets |
469 | * arriving in a network namespace is provided by ensuring that all |
470 | * network devices and all sockets have left the network namespace |
471 | * before the cleanup methods are called. |
472 | * |
473 | * For the longest time the ipv4 icmp code was registered as a pernet |
474 | * device which caused kernel oops, and panics during network |
475 | * namespace cleanup. So please don't get this wrong. |
476 | */ |
477 | int register_pernet_subsys(struct pernet_operations *); |
478 | void unregister_pernet_subsys(struct pernet_operations *); |
479 | int register_pernet_device(struct pernet_operations *); |
480 | void unregister_pernet_device(struct pernet_operations *); |
481 | |
482 | struct ctl_table; |
483 | |
484 | #define register_net_sysctl(net, path, table) \ |
485 | register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table)) |
486 | #ifdef CONFIG_SYSCTL |
487 | int net_sysctl_init(void); |
488 | struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, |
489 | struct ctl_table *table, size_t table_size); |
490 | void unregister_net_sysctl_table(struct ctl_table_header *); |
491 | #else |
492 | static inline int net_sysctl_init(void) { return 0; } |
493 | static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net, |
494 | const char *path, struct ctl_table *table, size_t table_size) |
495 | { |
496 | return NULL; |
497 | } |
498 | static inline void unregister_net_sysctl_table(struct ctl_table_header *header) |
499 | { |
500 | } |
501 | #endif |
502 | |
503 | static inline int rt_genid_ipv4(const struct net *net) |
504 | { |
505 | return atomic_read(v: &net->ipv4.rt_genid); |
506 | } |
507 | |
508 | #if IS_ENABLED(CONFIG_IPV6) |
509 | static inline int rt_genid_ipv6(const struct net *net) |
510 | { |
511 | return atomic_read(v: &net->ipv6.fib6_sernum); |
512 | } |
513 | #endif |
514 | |
515 | static inline void rt_genid_bump_ipv4(struct net *net) |
516 | { |
517 | atomic_inc(v: &net->ipv4.rt_genid); |
518 | } |
519 | |
520 | extern void (*__fib6_flush_trees)(struct net *net); |
521 | static inline void rt_genid_bump_ipv6(struct net *net) |
522 | { |
523 | if (__fib6_flush_trees) |
524 | __fib6_flush_trees(net); |
525 | } |
526 | |
527 | #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) |
528 | static inline struct netns_ieee802154_lowpan * |
529 | net_ieee802154_lowpan(struct net *net) |
530 | { |
531 | return &net->ieee802154_lowpan; |
532 | } |
533 | #endif |
534 | |
535 | /* For callers who don't really care about whether it's IPv4 or IPv6 */ |
536 | static inline void rt_genid_bump_all(struct net *net) |
537 | { |
538 | rt_genid_bump_ipv4(net); |
539 | rt_genid_bump_ipv6(net); |
540 | } |
541 | |
542 | static inline int fnhe_genid(const struct net *net) |
543 | { |
544 | return atomic_read(v: &net->fnhe_genid); |
545 | } |
546 | |
547 | static inline void fnhe_genid_bump(struct net *net) |
548 | { |
549 | atomic_inc(v: &net->fnhe_genid); |
550 | } |
551 | |
552 | #ifdef CONFIG_NET |
553 | void net_ns_init(void); |
554 | #else |
555 | static inline void net_ns_init(void) {} |
556 | #endif |
557 | |
558 | #endif /* __NET_NET_NAMESPACE_H */ |
559 | |