1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Operations on the network namespace |
4 | */ |
5 | #ifndef __NET_NET_NAMESPACE_H |
6 | #define __NET_NET_NAMESPACE_H |
7 | |
8 | #include <linux/atomic.h> |
9 | #include <linux/refcount.h> |
10 | #include <linux/workqueue.h> |
11 | #include <linux/list.h> |
12 | #include <linux/sysctl.h> |
13 | #include <linux/uidgid.h> |
14 | |
15 | #include <net/flow.h> |
16 | #include <net/netns/core.h> |
17 | #include <net/netns/mib.h> |
18 | #include <net/netns/unix.h> |
19 | #include <net/netns/packet.h> |
20 | #include <net/netns/ipv4.h> |
21 | #include <net/netns/ipv6.h> |
22 | #include <net/netns/nexthop.h> |
23 | #include <net/netns/ieee802154_6lowpan.h> |
24 | #include <net/netns/sctp.h> |
25 | #include <net/netns/netfilter.h> |
26 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
27 | #include <net/netns/conntrack.h> |
28 | #endif |
29 | #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
30 | #include <net/netns/flow_table.h> |
31 | #endif |
32 | #include <net/netns/nftables.h> |
33 | #include <net/netns/xfrm.h> |
34 | #include <net/netns/mpls.h> |
35 | #include <net/netns/can.h> |
36 | #include <net/netns/xdp.h> |
37 | #include <net/netns/smc.h> |
38 | #include <net/netns/bpf.h> |
39 | #include <net/netns/mctp.h> |
40 | #include <net/net_trackers.h> |
41 | #include <linux/ns_common.h> |
42 | #include <linux/idr.h> |
43 | #include <linux/skbuff.h> |
44 | #include <linux/notifier.h> |
45 | #include <linux/xarray.h> |
46 | |
47 | struct user_namespace; |
48 | struct proc_dir_entry; |
49 | struct net_device; |
50 | struct sock; |
51 | struct ; |
52 | struct net_generic; |
53 | struct uevent_sock; |
54 | struct netns_ipvs; |
55 | struct bpf_prog; |
56 | |
57 | |
58 | #define NETDEV_HASHBITS 8 |
59 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) |
60 | |
61 | struct net { |
62 | /* First cache line can be often dirtied. |
63 | * Do not place here read-mostly fields. |
64 | */ |
65 | refcount_t passive; /* To decide when the network |
66 | * namespace should be freed. |
67 | */ |
68 | spinlock_t rules_mod_lock; |
69 | |
70 | atomic_t dev_unreg_count; |
71 | |
72 | unsigned int dev_base_seq; /* protected by rtnl_mutex */ |
73 | u32 ifindex; |
74 | |
75 | spinlock_t nsid_lock; |
76 | atomic_t fnhe_genid; |
77 | |
78 | struct list_head list; /* list of network namespaces */ |
79 | struct list_head exit_list; /* To linked to call pernet exit |
80 | * methods on dead net ( |
81 | * pernet_ops_rwsem read locked), |
82 | * or to unregister pernet ops |
83 | * (pernet_ops_rwsem write locked). |
84 | */ |
85 | struct llist_node cleanup_list; /* namespaces on death row */ |
86 | |
87 | #ifdef CONFIG_KEYS |
88 | struct key_tag *key_domain; /* Key domain of operation tag */ |
89 | #endif |
90 | struct user_namespace *user_ns; /* Owning user namespace */ |
91 | struct ucounts *ucounts; |
92 | struct idr netns_ids; |
93 | |
94 | struct ns_common ns; |
95 | struct ref_tracker_dir refcnt_tracker; |
96 | struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not |
97 | * refcounted against netns |
98 | */ |
99 | struct list_head dev_base_head; |
100 | struct proc_dir_entry *proc_net; |
101 | struct proc_dir_entry *proc_net_stat; |
102 | |
103 | #ifdef CONFIG_SYSCTL |
104 | struct ctl_table_set sysctls; |
105 | #endif |
106 | |
107 | struct sock *rtnl; /* rtnetlink socket */ |
108 | struct sock *genl_sock; |
109 | |
110 | struct uevent_sock *uevent_sock; /* uevent socket */ |
111 | |
112 | struct hlist_head *dev_name_head; |
113 | struct hlist_head *dev_index_head; |
114 | struct xarray dev_by_index; |
115 | struct raw_notifier_head netdev_chain; |
116 | |
117 | /* Note that @hash_mix can be read millions times per second, |
118 | * it is critical that it is on a read_mostly cache line. |
119 | */ |
120 | u32 hash_mix; |
121 | |
122 | struct net_device *loopback_dev; /* The loopback */ |
123 | |
124 | /* core fib_rules */ |
125 | struct list_head rules_ops; |
126 | |
127 | struct netns_core core; |
128 | struct netns_mib mib; |
129 | struct netns_packet packet; |
130 | #if IS_ENABLED(CONFIG_UNIX) |
131 | struct netns_unix unx; |
132 | #endif |
133 | struct netns_nexthop nexthop; |
134 | struct netns_ipv4 ipv4; |
135 | #if IS_ENABLED(CONFIG_IPV6) |
136 | struct netns_ipv6 ipv6; |
137 | #endif |
138 | #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) |
139 | struct netns_ieee802154_lowpan ieee802154_lowpan; |
140 | #endif |
141 | #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) |
142 | struct netns_sctp sctp; |
143 | #endif |
144 | #ifdef CONFIG_NETFILTER |
145 | struct netns_nf nf; |
146 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
147 | struct netns_ct ct; |
148 | #endif |
149 | #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) |
150 | struct netns_nftables nft; |
151 | #endif |
152 | #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
153 | struct netns_ft ft; |
154 | #endif |
155 | #endif |
156 | #ifdef CONFIG_WEXT_CORE |
157 | struct sk_buff_head wext_nlevents; |
158 | #endif |
159 | struct net_generic __rcu *gen; |
160 | |
161 | /* Used to store attached BPF programs */ |
162 | struct netns_bpf bpf; |
163 | |
164 | /* Note : following structs are cache line aligned */ |
165 | #ifdef CONFIG_XFRM |
166 | struct netns_xfrm xfrm; |
167 | #endif |
168 | |
169 | u64 net_cookie; /* written once */ |
170 | |
171 | #if IS_ENABLED(CONFIG_IP_VS) |
172 | struct netns_ipvs *ipvs; |
173 | #endif |
174 | #if IS_ENABLED(CONFIG_MPLS) |
175 | struct netns_mpls mpls; |
176 | #endif |
177 | #if IS_ENABLED(CONFIG_CAN) |
178 | struct netns_can can; |
179 | #endif |
180 | #ifdef CONFIG_XDP_SOCKETS |
181 | struct netns_xdp xdp; |
182 | #endif |
183 | #if IS_ENABLED(CONFIG_MCTP) |
184 | struct netns_mctp mctp; |
185 | #endif |
186 | #if IS_ENABLED(CONFIG_CRYPTO_USER) |
187 | struct sock *crypto_nlsk; |
188 | #endif |
189 | struct sock *diag_nlsk; |
190 | #if IS_ENABLED(CONFIG_SMC) |
191 | struct netns_smc smc; |
192 | #endif |
193 | } __randomize_layout; |
194 | |
195 | #include <linux/seq_file_net.h> |
196 | |
197 | /* Init's network namespace */ |
198 | extern struct net init_net; |
199 | |
200 | #ifdef CONFIG_NET_NS |
201 | struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, |
202 | struct net *old_net); |
203 | |
204 | void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); |
205 | |
206 | void net_ns_barrier(void); |
207 | |
208 | struct ns_common *get_net_ns(struct ns_common *ns); |
209 | struct net *get_net_ns_by_fd(int fd); |
210 | #else /* CONFIG_NET_NS */ |
211 | #include <linux/sched.h> |
212 | #include <linux/nsproxy.h> |
213 | static inline struct net *copy_net_ns(unsigned long flags, |
214 | struct user_namespace *user_ns, struct net *old_net) |
215 | { |
216 | if (flags & CLONE_NEWNET) |
217 | return ERR_PTR(-EINVAL); |
218 | return old_net; |
219 | } |
220 | |
221 | static inline void net_ns_get_ownership(const struct net *net, |
222 | kuid_t *uid, kgid_t *gid) |
223 | { |
224 | *uid = GLOBAL_ROOT_UID; |
225 | *gid = GLOBAL_ROOT_GID; |
226 | } |
227 | |
228 | static inline void net_ns_barrier(void) {} |
229 | |
230 | static inline struct ns_common *get_net_ns(struct ns_common *ns) |
231 | { |
232 | return ERR_PTR(-EINVAL); |
233 | } |
234 | |
235 | static inline struct net *get_net_ns_by_fd(int fd) |
236 | { |
237 | return ERR_PTR(-EINVAL); |
238 | } |
239 | #endif /* CONFIG_NET_NS */ |
240 | |
241 | |
242 | extern struct list_head net_namespace_list; |
243 | |
244 | struct net *get_net_ns_by_pid(pid_t pid); |
245 | |
246 | #ifdef CONFIG_SYSCTL |
247 | void ipx_register_sysctl(void); |
248 | void ipx_unregister_sysctl(void); |
249 | #else |
250 | #define ipx_register_sysctl() |
251 | #define ipx_unregister_sysctl() |
252 | #endif |
253 | |
254 | #ifdef CONFIG_NET_NS |
255 | void __put_net(struct net *net); |
256 | |
257 | /* Try using get_net_track() instead */ |
258 | static inline struct net *get_net(struct net *net) |
259 | { |
260 | refcount_inc(r: &net->ns.count); |
261 | return net; |
262 | } |
263 | |
264 | static inline struct net *maybe_get_net(struct net *net) |
265 | { |
266 | /* Used when we know struct net exists but we |
267 | * aren't guaranteed a previous reference count |
268 | * exists. If the reference count is zero this |
269 | * function fails and returns NULL. |
270 | */ |
271 | if (!refcount_inc_not_zero(r: &net->ns.count)) |
272 | net = NULL; |
273 | return net; |
274 | } |
275 | |
276 | /* Try using put_net_track() instead */ |
277 | static inline void put_net(struct net *net) |
278 | { |
279 | if (refcount_dec_and_test(r: &net->ns.count)) |
280 | __put_net(net); |
281 | } |
282 | |
283 | static inline |
284 | int net_eq(const struct net *net1, const struct net *net2) |
285 | { |
286 | return net1 == net2; |
287 | } |
288 | |
289 | static inline int check_net(const struct net *net) |
290 | { |
291 | return refcount_read(r: &net->ns.count) != 0; |
292 | } |
293 | |
294 | void net_drop_ns(void *); |
295 | |
296 | #else |
297 | |
298 | static inline struct net *get_net(struct net *net) |
299 | { |
300 | return net; |
301 | } |
302 | |
303 | static inline void put_net(struct net *net) |
304 | { |
305 | } |
306 | |
307 | static inline struct net *maybe_get_net(struct net *net) |
308 | { |
309 | return net; |
310 | } |
311 | |
312 | static inline |
313 | int net_eq(const struct net *net1, const struct net *net2) |
314 | { |
315 | return 1; |
316 | } |
317 | |
318 | static inline int check_net(const struct net *net) |
319 | { |
320 | return 1; |
321 | } |
322 | |
323 | #define net_drop_ns NULL |
324 | #endif |
325 | |
326 | |
327 | static inline void __netns_tracker_alloc(struct net *net, |
328 | netns_tracker *tracker, |
329 | bool refcounted, |
330 | gfp_t gfp) |
331 | { |
332 | #ifdef CONFIG_NET_NS_REFCNT_TRACKER |
333 | ref_tracker_alloc(dir: refcounted ? &net->refcnt_tracker : |
334 | &net->notrefcnt_tracker, |
335 | trackerp: tracker, gfp); |
336 | #endif |
337 | } |
338 | |
339 | static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, |
340 | gfp_t gfp) |
341 | { |
342 | __netns_tracker_alloc(net, tracker, refcounted: true, gfp); |
343 | } |
344 | |
345 | static inline void __netns_tracker_free(struct net *net, |
346 | netns_tracker *tracker, |
347 | bool refcounted) |
348 | { |
349 | #ifdef CONFIG_NET_NS_REFCNT_TRACKER |
350 | ref_tracker_free(dir: refcounted ? &net->refcnt_tracker : |
351 | &net->notrefcnt_tracker, trackerp: tracker); |
352 | #endif |
353 | } |
354 | |
355 | static inline struct net *get_net_track(struct net *net, |
356 | netns_tracker *tracker, gfp_t gfp) |
357 | { |
358 | get_net(net); |
359 | netns_tracker_alloc(net, tracker, gfp); |
360 | return net; |
361 | } |
362 | |
363 | static inline void put_net_track(struct net *net, netns_tracker *tracker) |
364 | { |
365 | __netns_tracker_free(net, tracker, refcounted: true); |
366 | put_net(net); |
367 | } |
368 | |
369 | typedef struct { |
370 | #ifdef CONFIG_NET_NS |
371 | struct net __rcu *net; |
372 | #endif |
373 | } possible_net_t; |
374 | |
375 | static inline void write_pnet(possible_net_t *pnet, struct net *net) |
376 | { |
377 | #ifdef CONFIG_NET_NS |
378 | rcu_assign_pointer(pnet->net, net); |
379 | #endif |
380 | } |
381 | |
382 | static inline struct net *read_pnet(const possible_net_t *pnet) |
383 | { |
384 | #ifdef CONFIG_NET_NS |
385 | return rcu_dereference_protected(pnet->net, true); |
386 | #else |
387 | return &init_net; |
388 | #endif |
389 | } |
390 | |
391 | static inline struct net *read_pnet_rcu(possible_net_t *pnet) |
392 | { |
393 | #ifdef CONFIG_NET_NS |
394 | return rcu_dereference(pnet->net); |
395 | #else |
396 | return &init_net; |
397 | #endif |
398 | } |
399 | |
400 | /* Protected by net_rwsem */ |
401 | #define for_each_net(VAR) \ |
402 | list_for_each_entry(VAR, &net_namespace_list, list) |
403 | #define for_each_net_continue_reverse(VAR) \ |
404 | list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) |
405 | #define for_each_net_rcu(VAR) \ |
406 | list_for_each_entry_rcu(VAR, &net_namespace_list, list) |
407 | |
408 | #ifdef CONFIG_NET_NS |
409 | #define __net_init |
410 | #define __net_exit |
411 | #define __net_initdata |
412 | #define __net_initconst |
413 | #else |
414 | #define __net_init __init |
415 | #define __net_exit __ref |
416 | #define __net_initdata __initdata |
417 | #define __net_initconst __initconst |
418 | #endif |
419 | |
420 | int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); |
421 | int peernet2id(const struct net *net, struct net *peer); |
422 | bool peernet_has_id(const struct net *net, struct net *peer); |
423 | struct net *get_net_ns_by_id(const struct net *net, int id); |
424 | |
425 | struct pernet_operations { |
426 | struct list_head list; |
427 | /* |
428 | * Below methods are called without any exclusive locks. |
429 | * More than one net may be constructed and destructed |
430 | * in parallel on several cpus. Every pernet_operations |
431 | * have to keep in mind all other pernet_operations and |
432 | * to introduce a locking, if they share common resources. |
433 | * |
434 | * The only time they are called with exclusive lock is |
435 | * from register_pernet_subsys(), unregister_pernet_subsys() |
436 | * register_pernet_device() and unregister_pernet_device(). |
437 | * |
438 | * Exit methods using blocking RCU primitives, such as |
439 | * synchronize_rcu(), should be implemented via exit_batch. |
440 | * Then, destruction of a group of net requires single |
441 | * synchronize_rcu() related to these pernet_operations, |
442 | * instead of separate synchronize_rcu() for every net. |
443 | * Please, avoid synchronize_rcu() at all, where it's possible. |
444 | * |
445 | * Note that a combination of pre_exit() and exit() can |
446 | * be used, since a synchronize_rcu() is guaranteed between |
447 | * the calls. |
448 | */ |
449 | int (*init)(struct net *net); |
450 | void (*pre_exit)(struct net *net); |
451 | void (*exit)(struct net *net); |
452 | void (*exit_batch)(struct list_head *net_exit_list); |
453 | unsigned int *id; |
454 | size_t size; |
455 | }; |
456 | |
457 | /* |
458 | * Use these carefully. If you implement a network device and it |
459 | * needs per network namespace operations use device pernet operations, |
460 | * otherwise use pernet subsys operations. |
461 | * |
462 | * Network interfaces need to be removed from a dying netns _before_ |
463 | * subsys notifiers can be called, as most of the network code cleanup |
464 | * (which is done from subsys notifiers) runs with the assumption that |
465 | * dev_remove_pack has been called so no new packets will arrive during |
466 | * and after the cleanup functions have been called. dev_remove_pack |
467 | * is not per namespace so instead the guarantee of no more packets |
468 | * arriving in a network namespace is provided by ensuring that all |
469 | * network devices and all sockets have left the network namespace |
470 | * before the cleanup methods are called. |
471 | * |
472 | * For the longest time the ipv4 icmp code was registered as a pernet |
473 | * device which caused kernel oops, and panics during network |
474 | * namespace cleanup. So please don't get this wrong. |
475 | */ |
476 | int register_pernet_subsys(struct pernet_operations *); |
477 | void unregister_pernet_subsys(struct pernet_operations *); |
478 | int register_pernet_device(struct pernet_operations *); |
479 | void unregister_pernet_device(struct pernet_operations *); |
480 | |
481 | struct ctl_table; |
482 | |
483 | #define register_net_sysctl(net, path, table) \ |
484 | register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table)) |
485 | #ifdef CONFIG_SYSCTL |
486 | int net_sysctl_init(void); |
487 | struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, |
488 | struct ctl_table *table, size_t table_size); |
489 | void unregister_net_sysctl_table(struct ctl_table_header *); |
490 | #else |
491 | static inline int net_sysctl_init(void) { return 0; } |
492 | static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net, |
493 | const char *path, struct ctl_table *table, size_t table_size) |
494 | { |
495 | return NULL; |
496 | } |
497 | static inline void unregister_net_sysctl_table(struct ctl_table_header *header) |
498 | { |
499 | } |
500 | #endif |
501 | |
502 | static inline int rt_genid_ipv4(const struct net *net) |
503 | { |
504 | return atomic_read(v: &net->ipv4.rt_genid); |
505 | } |
506 | |
507 | #if IS_ENABLED(CONFIG_IPV6) |
508 | static inline int rt_genid_ipv6(const struct net *net) |
509 | { |
510 | return atomic_read(v: &net->ipv6.fib6_sernum); |
511 | } |
512 | #endif |
513 | |
514 | static inline void rt_genid_bump_ipv4(struct net *net) |
515 | { |
516 | atomic_inc(v: &net->ipv4.rt_genid); |
517 | } |
518 | |
519 | extern void (*__fib6_flush_trees)(struct net *net); |
520 | static inline void rt_genid_bump_ipv6(struct net *net) |
521 | { |
522 | if (__fib6_flush_trees) |
523 | __fib6_flush_trees(net); |
524 | } |
525 | |
526 | #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) |
527 | static inline struct netns_ieee802154_lowpan * |
528 | net_ieee802154_lowpan(struct net *net) |
529 | { |
530 | return &net->ieee802154_lowpan; |
531 | } |
532 | #endif |
533 | |
534 | /* For callers who don't really care about whether it's IPv4 or IPv6 */ |
535 | static inline void rt_genid_bump_all(struct net *net) |
536 | { |
537 | rt_genid_bump_ipv4(net); |
538 | rt_genid_bump_ipv6(net); |
539 | } |
540 | |
541 | static inline int fnhe_genid(const struct net *net) |
542 | { |
543 | return atomic_read(v: &net->fnhe_genid); |
544 | } |
545 | |
546 | static inline void fnhe_genid_bump(struct net *net) |
547 | { |
548 | atomic_inc(v: &net->fnhe_genid); |
549 | } |
550 | |
551 | #ifdef CONFIG_NET |
552 | void net_ns_init(void); |
553 | #else |
554 | static inline void net_ns_init(void) {} |
555 | #endif |
556 | |
557 | #endif /* __NET_NET_NAMESPACE_H */ |
558 | |