1// SPDX-License-Identifier: GPL-2.0-only
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/workqueue.h>
5#include <linux/rtnetlink.h>
6#include <linux/cache.h>
7#include <linux/slab.h>
8#include <linux/list.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/idr.h>
12#include <linux/rculist.h>
13#include <linux/nsproxy.h>
14#include <linux/fs.h>
15#include <linux/proc_ns.h>
16#include <linux/file.h>
17#include <linux/export.h>
18#include <linux/user_namespace.h>
19#include <linux/net_namespace.h>
20#include <linux/sched/task.h>
21#include <linux/uidgid.h>
22#include <linux/cookie.h>
23#include <linux/proc_fs.h>
24
25#include <net/sock.h>
26#include <net/netlink.h>
27#include <net/net_namespace.h>
28#include <net/netns/generic.h>
29
30/*
31 * Our network namespace constructor/destructor lists
32 */
33
34static LIST_HEAD(pernet_list);
35static struct list_head *first_device = &pernet_list;
36
37LIST_HEAD(net_namespace_list);
38EXPORT_SYMBOL_GPL(net_namespace_list);
39
40/* Protects net_namespace_list. Nests iside rtnl_lock() */
41DECLARE_RWSEM(net_rwsem);
42EXPORT_SYMBOL_GPL(net_rwsem);
43
44#ifdef CONFIG_KEYS
45static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
46#endif
47
48struct net init_net;
49EXPORT_SYMBOL(init_net);
50
51static bool init_net_initialized;
52/*
53 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
54 * init_net_initialized and first_device pointer.
55 * This is internal net namespace object. Please, don't use it
56 * outside.
57 */
58DECLARE_RWSEM(pernet_ops_rwsem);
59EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
60
61#define MIN_PERNET_OPS_ID \
62 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
63
64#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
65
66static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
67
68DEFINE_COOKIE(net_cookie);
69
70static struct net_generic *net_alloc_generic(void)
71{
72 struct net_generic *ng;
73 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
74
75 ng = kzalloc(size: generic_size, GFP_KERNEL);
76 if (ng)
77 ng->s.len = max_gen_ptrs;
78
79 return ng;
80}
81
82static int net_assign_generic(struct net *net, unsigned int id, void *data)
83{
84 struct net_generic *ng, *old_ng;
85
86 BUG_ON(id < MIN_PERNET_OPS_ID);
87
88 old_ng = rcu_dereference_protected(net->gen,
89 lockdep_is_held(&pernet_ops_rwsem));
90 if (old_ng->s.len > id) {
91 old_ng->ptr[id] = data;
92 return 0;
93 }
94
95 ng = net_alloc_generic();
96 if (!ng)
97 return -ENOMEM;
98
99 /*
100 * Some synchronisation notes:
101 *
102 * The net_generic explores the net->gen array inside rcu
103 * read section. Besides once set the net->gen->ptr[x]
104 * pointer never changes (see rules in netns/generic.h).
105 *
106 * That said, we simply duplicate this array and schedule
107 * the old copy for kfree after a grace period.
108 */
109
110 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
111 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
112 ng->ptr[id] = data;
113
114 rcu_assign_pointer(net->gen, ng);
115 kfree_rcu(old_ng, s.rcu);
116 return 0;
117}
118
119static int ops_init(const struct pernet_operations *ops, struct net *net)
120{
121 struct net_generic *ng;
122 int err = -ENOMEM;
123 void *data = NULL;
124
125 if (ops->id && ops->size) {
126 data = kzalloc(size: ops->size, GFP_KERNEL);
127 if (!data)
128 goto out;
129
130 err = net_assign_generic(net, id: *ops->id, data);
131 if (err)
132 goto cleanup;
133 }
134 err = 0;
135 if (ops->init)
136 err = ops->init(net);
137 if (!err)
138 return 0;
139
140 if (ops->id && ops->size) {
141 ng = rcu_dereference_protected(net->gen,
142 lockdep_is_held(&pernet_ops_rwsem));
143 ng->ptr[*ops->id] = NULL;
144 }
145
146cleanup:
147 kfree(objp: data);
148
149out:
150 return err;
151}
152
153static void ops_pre_exit_list(const struct pernet_operations *ops,
154 struct list_head *net_exit_list)
155{
156 struct net *net;
157
158 if (ops->pre_exit) {
159 list_for_each_entry(net, net_exit_list, exit_list)
160 ops->pre_exit(net);
161 }
162}
163
164static void ops_exit_list(const struct pernet_operations *ops,
165 struct list_head *net_exit_list)
166{
167 struct net *net;
168 if (ops->exit) {
169 list_for_each_entry(net, net_exit_list, exit_list) {
170 ops->exit(net);
171 cond_resched();
172 }
173 }
174 if (ops->exit_batch)
175 ops->exit_batch(net_exit_list);
176}
177
178static void ops_free_list(const struct pernet_operations *ops,
179 struct list_head *net_exit_list)
180{
181 struct net *net;
182 if (ops->size && ops->id) {
183 list_for_each_entry(net, net_exit_list, exit_list)
184 kfree(objp: net_generic(net, id: *ops->id));
185 }
186}
187
188/* should be called with nsid_lock held */
189static int alloc_netid(struct net *net, struct net *peer, int reqid)
190{
191 int min = 0, max = 0;
192
193 if (reqid >= 0) {
194 min = reqid;
195 max = reqid + 1;
196 }
197
198 return idr_alloc(&net->netns_ids, ptr: peer, start: min, end: max, GFP_ATOMIC);
199}
200
201/* This function is used by idr_for_each(). If net is equal to peer, the
202 * function returns the id so that idr_for_each() stops. Because we cannot
203 * returns the id 0 (idr_for_each() will not stop), we return the magic value
204 * NET_ID_ZERO (-1) for it.
205 */
206#define NET_ID_ZERO -1
207static int net_eq_idr(int id, void *net, void *peer)
208{
209 if (net_eq(net1: net, net2: peer))
210 return id ? : NET_ID_ZERO;
211 return 0;
212}
213
214/* Must be called from RCU-critical section or with nsid_lock held */
215static int __peernet2id(const struct net *net, struct net *peer)
216{
217 int id = idr_for_each(&net->netns_ids, fn: net_eq_idr, data: peer);
218
219 /* Magic value for id 0. */
220 if (id == NET_ID_ZERO)
221 return 0;
222 if (id > 0)
223 return id;
224
225 return NETNSA_NSID_NOT_ASSIGNED;
226}
227
228static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
229 struct nlmsghdr *nlh, gfp_t gfp);
230/* This function returns the id of a peer netns. If no id is assigned, one will
231 * be allocated and returned.
232 */
233int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
234{
235 int id;
236
237 if (refcount_read(r: &net->ns.count) == 0)
238 return NETNSA_NSID_NOT_ASSIGNED;
239
240 spin_lock_bh(lock: &net->nsid_lock);
241 id = __peernet2id(net, peer);
242 if (id >= 0) {
243 spin_unlock_bh(lock: &net->nsid_lock);
244 return id;
245 }
246
247 /* When peer is obtained from RCU lists, we may race with
248 * its cleanup. Check whether it's alive, and this guarantees
249 * we never hash a peer back to net->netns_ids, after it has
250 * just been idr_remove()'d from there in cleanup_net().
251 */
252 if (!maybe_get_net(net: peer)) {
253 spin_unlock_bh(lock: &net->nsid_lock);
254 return NETNSA_NSID_NOT_ASSIGNED;
255 }
256
257 id = alloc_netid(net, peer, reqid: -1);
258 spin_unlock_bh(lock: &net->nsid_lock);
259
260 put_net(net: peer);
261 if (id < 0)
262 return NETNSA_NSID_NOT_ASSIGNED;
263
264 rtnl_net_notifyid(net, RTM_NEWNSID, id, portid: 0, NULL, gfp);
265
266 return id;
267}
268EXPORT_SYMBOL_GPL(peernet2id_alloc);
269
270/* This function returns, if assigned, the id of a peer netns. */
271int peernet2id(const struct net *net, struct net *peer)
272{
273 int id;
274
275 rcu_read_lock();
276 id = __peernet2id(net, peer);
277 rcu_read_unlock();
278
279 return id;
280}
281EXPORT_SYMBOL(peernet2id);
282
283/* This function returns true is the peer netns has an id assigned into the
284 * current netns.
285 */
286bool peernet_has_id(const struct net *net, struct net *peer)
287{
288 return peernet2id(net, peer) >= 0;
289}
290
291struct net *get_net_ns_by_id(const struct net *net, int id)
292{
293 struct net *peer;
294
295 if (id < 0)
296 return NULL;
297
298 rcu_read_lock();
299 peer = idr_find(&net->netns_ids, id);
300 if (peer)
301 peer = maybe_get_net(net: peer);
302 rcu_read_unlock();
303
304 return peer;
305}
306EXPORT_SYMBOL_GPL(get_net_ns_by_id);
307
308/* init code that must occur even if setup_net() is not called. */
309static __net_init void preinit_net(struct net *net)
310{
311 ref_tracker_dir_init(dir: &net->notrefcnt_tracker, quarantine_count: 128, name: "net notrefcnt");
312}
313
314/*
315 * setup_net runs the initializers for the network namespace object.
316 */
317static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
318{
319 /* Must be called with pernet_ops_rwsem held */
320 const struct pernet_operations *ops, *saved_ops;
321 LIST_HEAD(net_exit_list);
322 LIST_HEAD(dev_kill_list);
323 int error = 0;
324
325 refcount_set(r: &net->ns.count, n: 1);
326 ref_tracker_dir_init(dir: &net->refcnt_tracker, quarantine_count: 128, name: "net refcnt");
327
328 refcount_set(r: &net->passive, n: 1);
329 get_random_bytes(buf: &net->hash_mix, len: sizeof(u32));
330 preempt_disable();
331 net->net_cookie = gen_cookie_next(gc: &net_cookie);
332 preempt_enable();
333 net->dev_base_seq = 1;
334 net->user_ns = user_ns;
335 idr_init(idr: &net->netns_ids);
336 spin_lock_init(&net->nsid_lock);
337 mutex_init(&net->ipv4.ra_mutex);
338
339 list_for_each_entry(ops, &pernet_list, list) {
340 error = ops_init(ops, net);
341 if (error < 0)
342 goto out_undo;
343 }
344 down_write(sem: &net_rwsem);
345 list_add_tail_rcu(new: &net->list, head: &net_namespace_list);
346 up_write(sem: &net_rwsem);
347out:
348 return error;
349
350out_undo:
351 /* Walk through the list backwards calling the exit functions
352 * for the pernet modules whose init functions did not fail.
353 */
354 list_add(new: &net->exit_list, head: &net_exit_list);
355 saved_ops = ops;
356 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
357 ops_pre_exit_list(ops, net_exit_list: &net_exit_list);
358
359 synchronize_rcu();
360
361 ops = saved_ops;
362 rtnl_lock();
363 list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
364 if (ops->exit_batch_rtnl)
365 ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
366 }
367 unregister_netdevice_many(head: &dev_kill_list);
368 rtnl_unlock();
369
370 ops = saved_ops;
371 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
372 ops_exit_list(ops, net_exit_list: &net_exit_list);
373
374 ops = saved_ops;
375 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
376 ops_free_list(ops, net_exit_list: &net_exit_list);
377
378 rcu_barrier();
379 goto out;
380}
381
382static int __net_init net_defaults_init_net(struct net *net)
383{
384 net->core.sysctl_somaxconn = SOMAXCONN;
385 /* Limits per socket sk_omem_alloc usage.
386 * TCP zerocopy regular usage needs 128 KB.
387 */
388 net->core.sysctl_optmem_max = 128 * 1024;
389 net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
390
391 return 0;
392}
393
394static struct pernet_operations net_defaults_ops = {
395 .init = net_defaults_init_net,
396};
397
398static __init int net_defaults_init(void)
399{
400 if (register_pernet_subsys(&net_defaults_ops))
401 panic(fmt: "Cannot initialize net default settings");
402
403 return 0;
404}
405
406core_initcall(net_defaults_init);
407
408#ifdef CONFIG_NET_NS
409static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
410{
411 return inc_ucount(ns, current_euid(), type: UCOUNT_NET_NAMESPACES);
412}
413
414static void dec_net_namespaces(struct ucounts *ucounts)
415{
416 dec_ucount(ucounts, type: UCOUNT_NET_NAMESPACES);
417}
418
419static struct kmem_cache *net_cachep __ro_after_init;
420static struct workqueue_struct *netns_wq;
421
422static struct net *net_alloc(void)
423{
424 struct net *net = NULL;
425 struct net_generic *ng;
426
427 ng = net_alloc_generic();
428 if (!ng)
429 goto out;
430
431 net = kmem_cache_zalloc(k: net_cachep, GFP_KERNEL);
432 if (!net)
433 goto out_free;
434
435#ifdef CONFIG_KEYS
436 net->key_domain = kzalloc(size: sizeof(struct key_tag), GFP_KERNEL);
437 if (!net->key_domain)
438 goto out_free_2;
439 refcount_set(r: &net->key_domain->usage, n: 1);
440#endif
441
442 rcu_assign_pointer(net->gen, ng);
443out:
444 return net;
445
446#ifdef CONFIG_KEYS
447out_free_2:
448 kmem_cache_free(s: net_cachep, objp: net);
449 net = NULL;
450#endif
451out_free:
452 kfree(objp: ng);
453 goto out;
454}
455
456static void net_free(struct net *net)
457{
458 if (refcount_dec_and_test(r: &net->passive)) {
459 kfree(rcu_access_pointer(net->gen));
460
461 /* There should not be any trackers left there. */
462 ref_tracker_dir_exit(dir: &net->notrefcnt_tracker);
463
464 kmem_cache_free(s: net_cachep, objp: net);
465 }
466}
467
468void net_drop_ns(void *p)
469{
470 struct net *net = (struct net *)p;
471
472 if (net)
473 net_free(net);
474}
475
476struct net *copy_net_ns(unsigned long flags,
477 struct user_namespace *user_ns, struct net *old_net)
478{
479 struct ucounts *ucounts;
480 struct net *net;
481 int rv;
482
483 if (!(flags & CLONE_NEWNET))
484 return get_net(net: old_net);
485
486 ucounts = inc_net_namespaces(ns: user_ns);
487 if (!ucounts)
488 return ERR_PTR(error: -ENOSPC);
489
490 net = net_alloc();
491 if (!net) {
492 rv = -ENOMEM;
493 goto dec_ucounts;
494 }
495
496 preinit_net(net);
497 refcount_set(r: &net->passive, n: 1);
498 net->ucounts = ucounts;
499 get_user_ns(ns: user_ns);
500
501 rv = down_read_killable(sem: &pernet_ops_rwsem);
502 if (rv < 0)
503 goto put_userns;
504
505 rv = setup_net(net, user_ns);
506
507 up_read(sem: &pernet_ops_rwsem);
508
509 if (rv < 0) {
510put_userns:
511#ifdef CONFIG_KEYS
512 key_remove_domain(domain_tag: net->key_domain);
513#endif
514 put_user_ns(ns: user_ns);
515 net_free(net);
516dec_ucounts:
517 dec_net_namespaces(ucounts);
518 return ERR_PTR(error: rv);
519 }
520 return net;
521}
522
523/**
524 * net_ns_get_ownership - get sysfs ownership data for @net
525 * @net: network namespace in question (can be NULL)
526 * @uid: kernel user ID for sysfs objects
527 * @gid: kernel group ID for sysfs objects
528 *
529 * Returns the uid/gid pair of root in the user namespace associated with the
530 * given network namespace.
531 */
532void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
533{
534 if (net) {
535 kuid_t ns_root_uid = make_kuid(from: net->user_ns, uid: 0);
536 kgid_t ns_root_gid = make_kgid(from: net->user_ns, gid: 0);
537
538 if (uid_valid(uid: ns_root_uid))
539 *uid = ns_root_uid;
540
541 if (gid_valid(gid: ns_root_gid))
542 *gid = ns_root_gid;
543 } else {
544 *uid = GLOBAL_ROOT_UID;
545 *gid = GLOBAL_ROOT_GID;
546 }
547}
548EXPORT_SYMBOL_GPL(net_ns_get_ownership);
549
550static void unhash_nsid(struct net *net, struct net *last)
551{
552 struct net *tmp;
553 /* This function is only called from cleanup_net() work,
554 * and this work is the only process, that may delete
555 * a net from net_namespace_list. So, when the below
556 * is executing, the list may only grow. Thus, we do not
557 * use for_each_net_rcu() or net_rwsem.
558 */
559 for_each_net(tmp) {
560 int id;
561
562 spin_lock_bh(lock: &tmp->nsid_lock);
563 id = __peernet2id(net: tmp, peer: net);
564 if (id >= 0)
565 idr_remove(&tmp->netns_ids, id);
566 spin_unlock_bh(lock: &tmp->nsid_lock);
567 if (id >= 0)
568 rtnl_net_notifyid(net: tmp, RTM_DELNSID, id, portid: 0, NULL,
569 GFP_KERNEL);
570 if (tmp == last)
571 break;
572 }
573 spin_lock_bh(lock: &net->nsid_lock);
574 idr_destroy(&net->netns_ids);
575 spin_unlock_bh(lock: &net->nsid_lock);
576}
577
578static LLIST_HEAD(cleanup_list);
579
580static void cleanup_net(struct work_struct *work)
581{
582 const struct pernet_operations *ops;
583 struct net *net, *tmp, *last;
584 struct llist_node *net_kill_list;
585 LIST_HEAD(net_exit_list);
586 LIST_HEAD(dev_kill_list);
587
588 /* Atomically snapshot the list of namespaces to cleanup */
589 net_kill_list = llist_del_all(head: &cleanup_list);
590
591 down_read(sem: &pernet_ops_rwsem);
592
593 /* Don't let anyone else find us. */
594 down_write(sem: &net_rwsem);
595 llist_for_each_entry(net, net_kill_list, cleanup_list)
596 list_del_rcu(entry: &net->list);
597 /* Cache last net. After we unlock rtnl, no one new net
598 * added to net_namespace_list can assign nsid pointer
599 * to a net from net_kill_list (see peernet2id_alloc()).
600 * So, we skip them in unhash_nsid().
601 *
602 * Note, that unhash_nsid() does not delete nsid links
603 * between net_kill_list's nets, as they've already
604 * deleted from net_namespace_list. But, this would be
605 * useless anyway, as netns_ids are destroyed there.
606 */
607 last = list_last_entry(&net_namespace_list, struct net, list);
608 up_write(sem: &net_rwsem);
609
610 llist_for_each_entry(net, net_kill_list, cleanup_list) {
611 unhash_nsid(net, last);
612 list_add_tail(new: &net->exit_list, head: &net_exit_list);
613 }
614
615 /* Run all of the network namespace pre_exit methods */
616 list_for_each_entry_reverse(ops, &pernet_list, list)
617 ops_pre_exit_list(ops, net_exit_list: &net_exit_list);
618
619 /*
620 * Another CPU might be rcu-iterating the list, wait for it.
621 * This needs to be before calling the exit() notifiers, so
622 * the rcu_barrier() below isn't sufficient alone.
623 * Also the pre_exit() and exit() methods need this barrier.
624 */
625 synchronize_rcu_expedited();
626
627 rtnl_lock();
628 list_for_each_entry_reverse(ops, &pernet_list, list) {
629 if (ops->exit_batch_rtnl)
630 ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
631 }
632 unregister_netdevice_many(head: &dev_kill_list);
633 rtnl_unlock();
634
635 /* Run all of the network namespace exit methods */
636 list_for_each_entry_reverse(ops, &pernet_list, list)
637 ops_exit_list(ops, net_exit_list: &net_exit_list);
638
639 /* Free the net generic variables */
640 list_for_each_entry_reverse(ops, &pernet_list, list)
641 ops_free_list(ops, net_exit_list: &net_exit_list);
642
643 up_read(sem: &pernet_ops_rwsem);
644
645 /* Ensure there are no outstanding rcu callbacks using this
646 * network namespace.
647 */
648 rcu_barrier();
649
650 /* Finally it is safe to free my network namespace structure */
651 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
652 list_del_init(entry: &net->exit_list);
653 dec_net_namespaces(ucounts: net->ucounts);
654#ifdef CONFIG_KEYS
655 key_remove_domain(domain_tag: net->key_domain);
656#endif
657 put_user_ns(ns: net->user_ns);
658 net_free(net);
659 }
660}
661
662/**
663 * net_ns_barrier - wait until concurrent net_cleanup_work is done
664 *
665 * cleanup_net runs from work queue and will first remove namespaces
666 * from the global list, then run net exit functions.
667 *
668 * Call this in module exit path to make sure that all netns
669 * ->exit ops have been invoked before the function is removed.
670 */
671void net_ns_barrier(void)
672{
673 down_write(sem: &pernet_ops_rwsem);
674 up_write(sem: &pernet_ops_rwsem);
675}
676EXPORT_SYMBOL(net_ns_barrier);
677
678static DECLARE_WORK(net_cleanup_work, cleanup_net);
679
680void __put_net(struct net *net)
681{
682 ref_tracker_dir_exit(dir: &net->refcnt_tracker);
683 /* Cleanup the network namespace in process context */
684 if (llist_add(new: &net->cleanup_list, head: &cleanup_list))
685 queue_work(wq: netns_wq, work: &net_cleanup_work);
686}
687EXPORT_SYMBOL_GPL(__put_net);
688
689/**
690 * get_net_ns - increment the refcount of the network namespace
691 * @ns: common namespace (net)
692 *
693 * Returns the net's common namespace.
694 */
695struct ns_common *get_net_ns(struct ns_common *ns)
696{
697 return &get_net(container_of(ns, struct net, ns))->ns;
698}
699EXPORT_SYMBOL_GPL(get_net_ns);
700
701struct net *get_net_ns_by_fd(int fd)
702{
703 struct fd f = fdget(fd);
704 struct net *net = ERR_PTR(error: -EINVAL);
705
706 if (!f.file)
707 return ERR_PTR(error: -EBADF);
708
709 if (proc_ns_file(file: f.file)) {
710 struct ns_common *ns = get_proc_ns(file_inode(f.file));
711 if (ns->ops == &netns_operations)
712 net = get_net(container_of(ns, struct net, ns));
713 }
714 fdput(fd: f);
715
716 return net;
717}
718EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
719#endif
720
721struct net *get_net_ns_by_pid(pid_t pid)
722{
723 struct task_struct *tsk;
724 struct net *net;
725
726 /* Lookup the network namespace */
727 net = ERR_PTR(error: -ESRCH);
728 rcu_read_lock();
729 tsk = find_task_by_vpid(nr: pid);
730 if (tsk) {
731 struct nsproxy *nsproxy;
732 task_lock(p: tsk);
733 nsproxy = tsk->nsproxy;
734 if (nsproxy)
735 net = get_net(net: nsproxy->net_ns);
736 task_unlock(p: tsk);
737 }
738 rcu_read_unlock();
739 return net;
740}
741EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
742
743static __net_init int net_ns_net_init(struct net *net)
744{
745#ifdef CONFIG_NET_NS
746 net->ns.ops = &netns_operations;
747#endif
748 return ns_alloc_inum(ns: &net->ns);
749}
750
751static __net_exit void net_ns_net_exit(struct net *net)
752{
753 ns_free_inum(&net->ns);
754}
755
756static struct pernet_operations __net_initdata net_ns_ops = {
757 .init = net_ns_net_init,
758 .exit = net_ns_net_exit,
759};
760
761static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
762 [NETNSA_NONE] = { .type = NLA_UNSPEC },
763 [NETNSA_NSID] = { .type = NLA_S32 },
764 [NETNSA_PID] = { .type = NLA_U32 },
765 [NETNSA_FD] = { .type = NLA_U32 },
766 [NETNSA_TARGET_NSID] = { .type = NLA_S32 },
767};
768
769static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
770 struct netlink_ext_ack *extack)
771{
772 struct net *net = sock_net(sk: skb->sk);
773 struct nlattr *tb[NETNSA_MAX + 1];
774 struct nlattr *nla;
775 struct net *peer;
776 int nsid, err;
777
778 err = nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
779 NETNSA_MAX, policy: rtnl_net_policy, extack);
780 if (err < 0)
781 return err;
782 if (!tb[NETNSA_NSID]) {
783 NL_SET_ERR_MSG(extack, "nsid is missing");
784 return -EINVAL;
785 }
786 nsid = nla_get_s32(nla: tb[NETNSA_NSID]);
787
788 if (tb[NETNSA_PID]) {
789 peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID]));
790 nla = tb[NETNSA_PID];
791 } else if (tb[NETNSA_FD]) {
792 peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD]));
793 nla = tb[NETNSA_FD];
794 } else {
795 NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
796 return -EINVAL;
797 }
798 if (IS_ERR(ptr: peer)) {
799 NL_SET_BAD_ATTR(extack, nla);
800 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
801 return PTR_ERR(ptr: peer);
802 }
803
804 spin_lock_bh(lock: &net->nsid_lock);
805 if (__peernet2id(net, peer) >= 0) {
806 spin_unlock_bh(lock: &net->nsid_lock);
807 err = -EEXIST;
808 NL_SET_BAD_ATTR(extack, nla);
809 NL_SET_ERR_MSG(extack,
810 "Peer netns already has a nsid assigned");
811 goto out;
812 }
813
814 err = alloc_netid(net, peer, reqid: nsid);
815 spin_unlock_bh(lock: &net->nsid_lock);
816 if (err >= 0) {
817 rtnl_net_notifyid(net, RTM_NEWNSID, id: err, NETLINK_CB(skb).portid,
818 nlh, GFP_KERNEL);
819 err = 0;
820 } else if (err == -ENOSPC && nsid >= 0) {
821 err = -EEXIST;
822 NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
823 NL_SET_ERR_MSG(extack, "The specified nsid is already used");
824 }
825out:
826 put_net(net: peer);
827 return err;
828}
829
830static int rtnl_net_get_size(void)
831{
832 return NLMSG_ALIGN(sizeof(struct rtgenmsg))
833 + nla_total_size(payload: sizeof(s32)) /* NETNSA_NSID */
834 + nla_total_size(payload: sizeof(s32)) /* NETNSA_CURRENT_NSID */
835 ;
836}
837
838struct net_fill_args {
839 u32 portid;
840 u32 seq;
841 int flags;
842 int cmd;
843 int nsid;
844 bool add_ref;
845 int ref_nsid;
846};
847
848static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
849{
850 struct nlmsghdr *nlh;
851 struct rtgenmsg *rth;
852
853 nlh = nlmsg_put(skb, portid: args->portid, seq: args->seq, type: args->cmd, payload: sizeof(*rth),
854 flags: args->flags);
855 if (!nlh)
856 return -EMSGSIZE;
857
858 rth = nlmsg_data(nlh);
859 rth->rtgen_family = AF_UNSPEC;
860
861 if (nla_put_s32(skb, attrtype: NETNSA_NSID, value: args->nsid))
862 goto nla_put_failure;
863
864 if (args->add_ref &&
865 nla_put_s32(skb, attrtype: NETNSA_CURRENT_NSID, value: args->ref_nsid))
866 goto nla_put_failure;
867
868 nlmsg_end(skb, nlh);
869 return 0;
870
871nla_put_failure:
872 nlmsg_cancel(skb, nlh);
873 return -EMSGSIZE;
874}
875
876static int rtnl_net_valid_getid_req(struct sk_buff *skb,
877 const struct nlmsghdr *nlh,
878 struct nlattr **tb,
879 struct netlink_ext_ack *extack)
880{
881 int i, err;
882
883 if (!netlink_strict_get_check(skb))
884 return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg),
885 tb, NETNSA_MAX, policy: rtnl_net_policy,
886 extack);
887
888 err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
889 NETNSA_MAX, policy: rtnl_net_policy,
890 extack);
891 if (err)
892 return err;
893
894 for (i = 0; i <= NETNSA_MAX; i++) {
895 if (!tb[i])
896 continue;
897
898 switch (i) {
899 case NETNSA_PID:
900 case NETNSA_FD:
901 case NETNSA_NSID:
902 case NETNSA_TARGET_NSID:
903 break;
904 default:
905 NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
906 return -EINVAL;
907 }
908 }
909
910 return 0;
911}
912
913static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
914 struct netlink_ext_ack *extack)
915{
916 struct net *net = sock_net(sk: skb->sk);
917 struct nlattr *tb[NETNSA_MAX + 1];
918 struct net_fill_args fillargs = {
919 .portid = NETLINK_CB(skb).portid,
920 .seq = nlh->nlmsg_seq,
921 .cmd = RTM_NEWNSID,
922 };
923 struct net *peer, *target = net;
924 struct nlattr *nla;
925 struct sk_buff *msg;
926 int err;
927
928 err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
929 if (err < 0)
930 return err;
931 if (tb[NETNSA_PID]) {
932 peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID]));
933 nla = tb[NETNSA_PID];
934 } else if (tb[NETNSA_FD]) {
935 peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD]));
936 nla = tb[NETNSA_FD];
937 } else if (tb[NETNSA_NSID]) {
938 peer = get_net_ns_by_id(net, nla_get_s32(nla: tb[NETNSA_NSID]));
939 if (!peer)
940 peer = ERR_PTR(error: -ENOENT);
941 nla = tb[NETNSA_NSID];
942 } else {
943 NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
944 return -EINVAL;
945 }
946
947 if (IS_ERR(ptr: peer)) {
948 NL_SET_BAD_ATTR(extack, nla);
949 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
950 return PTR_ERR(ptr: peer);
951 }
952
953 if (tb[NETNSA_TARGET_NSID]) {
954 int id = nla_get_s32(nla: tb[NETNSA_TARGET_NSID]);
955
956 target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid: id);
957 if (IS_ERR(ptr: target)) {
958 NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
959 NL_SET_ERR_MSG(extack,
960 "Target netns reference is invalid");
961 err = PTR_ERR(ptr: target);
962 goto out;
963 }
964 fillargs.add_ref = true;
965 fillargs.ref_nsid = peernet2id(net, peer);
966 }
967
968 msg = nlmsg_new(payload: rtnl_net_get_size(), GFP_KERNEL);
969 if (!msg) {
970 err = -ENOMEM;
971 goto out;
972 }
973
974 fillargs.nsid = peernet2id(target, peer);
975 err = rtnl_net_fill(skb: msg, args: &fillargs);
976 if (err < 0)
977 goto err_out;
978
979 err = rtnl_unicast(skb: msg, net, NETLINK_CB(skb).portid);
980 goto out;
981
982err_out:
983 nlmsg_free(skb: msg);
984out:
985 if (fillargs.add_ref)
986 put_net(net: target);
987 put_net(net: peer);
988 return err;
989}
990
991struct rtnl_net_dump_cb {
992 struct net *tgt_net;
993 struct net *ref_net;
994 struct sk_buff *skb;
995 struct net_fill_args fillargs;
996 int idx;
997 int s_idx;
998};
999
1000/* Runs in RCU-critical section. */
1001static int rtnl_net_dumpid_one(int id, void *peer, void *data)
1002{
1003 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
1004 int ret;
1005
1006 if (net_cb->idx < net_cb->s_idx)
1007 goto cont;
1008
1009 net_cb->fillargs.nsid = id;
1010 if (net_cb->fillargs.add_ref)
1011 net_cb->fillargs.ref_nsid = __peernet2id(net: net_cb->ref_net, peer);
1012 ret = rtnl_net_fill(skb: net_cb->skb, args: &net_cb->fillargs);
1013 if (ret < 0)
1014 return ret;
1015
1016cont:
1017 net_cb->idx++;
1018 return 0;
1019}
1020
1021static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
1022 struct rtnl_net_dump_cb *net_cb,
1023 struct netlink_callback *cb)
1024{
1025 struct netlink_ext_ack *extack = cb->extack;
1026 struct nlattr *tb[NETNSA_MAX + 1];
1027 int err, i;
1028
1029 err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
1030 NETNSA_MAX, policy: rtnl_net_policy,
1031 extack);
1032 if (err < 0)
1033 return err;
1034
1035 for (i = 0; i <= NETNSA_MAX; i++) {
1036 if (!tb[i])
1037 continue;
1038
1039 if (i == NETNSA_TARGET_NSID) {
1040 struct net *net;
1041
1042 net = rtnl_get_net_ns_capable(sk, netnsid: nla_get_s32(nla: tb[i]));
1043 if (IS_ERR(ptr: net)) {
1044 NL_SET_BAD_ATTR(extack, tb[i]);
1045 NL_SET_ERR_MSG(extack,
1046 "Invalid target network namespace id");
1047 return PTR_ERR(ptr: net);
1048 }
1049 net_cb->fillargs.add_ref = true;
1050 net_cb->ref_net = net_cb->tgt_net;
1051 net_cb->tgt_net = net;
1052 } else {
1053 NL_SET_BAD_ATTR(extack, tb[i]);
1054 NL_SET_ERR_MSG(extack,
1055 "Unsupported attribute in dump request");
1056 return -EINVAL;
1057 }
1058 }
1059
1060 return 0;
1061}
1062
1063static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1064{
1065 struct rtnl_net_dump_cb net_cb = {
1066 .tgt_net = sock_net(sk: skb->sk),
1067 .skb = skb,
1068 .fillargs = {
1069 .portid = NETLINK_CB(cb->skb).portid,
1070 .seq = cb->nlh->nlmsg_seq,
1071 .flags = NLM_F_MULTI,
1072 .cmd = RTM_NEWNSID,
1073 },
1074 .idx = 0,
1075 .s_idx = cb->args[0],
1076 };
1077 int err = 0;
1078
1079 if (cb->strict_check) {
1080 err = rtnl_valid_dump_net_req(nlh: cb->nlh, sk: skb->sk, net_cb: &net_cb, cb);
1081 if (err < 0)
1082 goto end;
1083 }
1084
1085 rcu_read_lock();
1086 idr_for_each(&net_cb.tgt_net->netns_ids, fn: rtnl_net_dumpid_one, data: &net_cb);
1087 rcu_read_unlock();
1088
1089 cb->args[0] = net_cb.idx;
1090end:
1091 if (net_cb.fillargs.add_ref)
1092 put_net(net: net_cb.tgt_net);
1093 return err < 0 ? err : skb->len;
1094}
1095
1096static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1097 struct nlmsghdr *nlh, gfp_t gfp)
1098{
1099 struct net_fill_args fillargs = {
1100 .portid = portid,
1101 .seq = nlh ? nlh->nlmsg_seq : 0,
1102 .cmd = cmd,
1103 .nsid = id,
1104 };
1105 struct sk_buff *msg;
1106 int err = -ENOMEM;
1107
1108 msg = nlmsg_new(payload: rtnl_net_get_size(), flags: gfp);
1109 if (!msg)
1110 goto out;
1111
1112 err = rtnl_net_fill(skb: msg, args: &fillargs);
1113 if (err < 0)
1114 goto err_out;
1115
1116 rtnl_notify(skb: msg, net, pid: portid, RTNLGRP_NSID, nlh, flags: gfp);
1117 return;
1118
1119err_out:
1120 nlmsg_free(skb: msg);
1121out:
1122 rtnl_set_sk_err(net, RTNLGRP_NSID, error: err);
1123}
1124
1125#ifdef CONFIG_NET_NS
1126static void __init netns_ipv4_struct_check(void)
1127{
1128 /* TX readonly hotpath cache lines */
1129 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1130 sysctl_tcp_early_retrans);
1131 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1132 sysctl_tcp_tso_win_divisor);
1133 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1134 sysctl_tcp_tso_rtt_log);
1135 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1136 sysctl_tcp_autocorking);
1137 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1138 sysctl_tcp_min_snd_mss);
1139 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1140 sysctl_tcp_notsent_lowat);
1141 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1142 sysctl_tcp_limit_output_bytes);
1143 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1144 sysctl_tcp_min_rtt_wlen);
1145 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1146 sysctl_tcp_wmem);
1147 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1148 sysctl_ip_fwd_use_pmtu);
1149 CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);
1150
1151 /* TXRX readonly hotpath cache lines */
1152 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
1153 sysctl_tcp_moderate_rcvbuf);
1154 CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);
1155
1156 /* RX readonly hotpath cache line */
1157 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1158 sysctl_ip_early_demux);
1159 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1160 sysctl_tcp_early_demux);
1161 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1162 sysctl_tcp_reordering);
1163 CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1164 sysctl_tcp_rmem);
1165 CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
1166}
1167#endif
1168
1169void __init net_ns_init(void)
1170{
1171 struct net_generic *ng;
1172
1173#ifdef CONFIG_NET_NS
1174 netns_ipv4_struct_check();
1175 net_cachep = kmem_cache_create(name: "net_namespace", size: sizeof(struct net),
1176 SMP_CACHE_BYTES,
1177 SLAB_PANIC|SLAB_ACCOUNT, NULL);
1178
1179 /* Create workqueue for cleanup */
1180 netns_wq = create_singlethread_workqueue("netns");
1181 if (!netns_wq)
1182 panic(fmt: "Could not create netns workq");
1183#endif
1184
1185 ng = net_alloc_generic();
1186 if (!ng)
1187 panic(fmt: "Could not allocate generic netns");
1188
1189 rcu_assign_pointer(init_net.gen, ng);
1190
1191#ifdef CONFIG_KEYS
1192 init_net.key_domain = &init_net_key_domain;
1193#endif
1194 down_write(sem: &pernet_ops_rwsem);
1195 preinit_net(net: &init_net);
1196 if (setup_net(net: &init_net, user_ns: &init_user_ns))
1197 panic(fmt: "Could not setup the initial network namespace");
1198
1199 init_net_initialized = true;
1200 up_write(sem: &pernet_ops_rwsem);
1201
1202 if (register_pernet_subsys(&net_ns_ops))
1203 panic(fmt: "Could not register network namespace subsystems");
1204
1205 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1206 flags: RTNL_FLAG_DOIT_UNLOCKED);
1207 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1208 flags: RTNL_FLAG_DOIT_UNLOCKED);
1209}
1210
1211static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
1212{
1213 ops_pre_exit_list(ops, net_exit_list);
1214 synchronize_rcu();
1215
1216 if (ops->exit_batch_rtnl) {
1217 LIST_HEAD(dev_kill_list);
1218
1219 rtnl_lock();
1220 ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
1221 unregister_netdevice_many(head: &dev_kill_list);
1222 rtnl_unlock();
1223 }
1224 ops_exit_list(ops, net_exit_list);
1225
1226 ops_free_list(ops, net_exit_list);
1227}
1228
1229#ifdef CONFIG_NET_NS
1230static int __register_pernet_operations(struct list_head *list,
1231 struct pernet_operations *ops)
1232{
1233 struct net *net;
1234 int error;
1235 LIST_HEAD(net_exit_list);
1236
1237 list_add_tail(new: &ops->list, head: list);
1238 if (ops->init || (ops->id && ops->size)) {
1239 /* We held write locked pernet_ops_rwsem, and parallel
1240 * setup_net() and cleanup_net() are not possible.
1241 */
1242 for_each_net(net) {
1243 error = ops_init(ops, net);
1244 if (error)
1245 goto out_undo;
1246 list_add_tail(new: &net->exit_list, head: &net_exit_list);
1247 }
1248 }
1249 return 0;
1250
1251out_undo:
1252 /* If I have an error cleanup all namespaces I initialized */
1253 list_del(entry: &ops->list);
1254 free_exit_list(ops, net_exit_list: &net_exit_list);
1255 return error;
1256}
1257
1258static void __unregister_pernet_operations(struct pernet_operations *ops)
1259{
1260 struct net *net;
1261 LIST_HEAD(net_exit_list);
1262
1263 list_del(entry: &ops->list);
1264 /* See comment in __register_pernet_operations() */
1265 for_each_net(net)
1266 list_add_tail(new: &net->exit_list, head: &net_exit_list);
1267
1268 free_exit_list(ops, net_exit_list: &net_exit_list);
1269}
1270
1271#else
1272
1273static int __register_pernet_operations(struct list_head *list,
1274 struct pernet_operations *ops)
1275{
1276 if (!init_net_initialized) {
1277 list_add_tail(&ops->list, list);
1278 return 0;
1279 }
1280
1281 return ops_init(ops, &init_net);
1282}
1283
1284static void __unregister_pernet_operations(struct pernet_operations *ops)
1285{
1286 if (!init_net_initialized) {
1287 list_del(&ops->list);
1288 } else {
1289 LIST_HEAD(net_exit_list);
1290 list_add(&init_net.exit_list, &net_exit_list);
1291 free_exit_list(ops, &net_exit_list);
1292 }
1293}
1294
1295#endif /* CONFIG_NET_NS */
1296
1297static DEFINE_IDA(net_generic_ids);
1298
1299static int register_pernet_operations(struct list_head *list,
1300 struct pernet_operations *ops)
1301{
1302 int error;
1303
1304 if (ops->id) {
1305 error = ida_alloc_min(ida: &net_generic_ids, MIN_PERNET_OPS_ID,
1306 GFP_KERNEL);
1307 if (error < 0)
1308 return error;
1309 *ops->id = error;
1310 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
1311 }
1312 error = __register_pernet_operations(list, ops);
1313 if (error) {
1314 rcu_barrier();
1315 if (ops->id)
1316 ida_free(&net_generic_ids, id: *ops->id);
1317 }
1318
1319 return error;
1320}
1321
1322static void unregister_pernet_operations(struct pernet_operations *ops)
1323{
1324 __unregister_pernet_operations(ops);
1325 rcu_barrier();
1326 if (ops->id)
1327 ida_free(&net_generic_ids, id: *ops->id);
1328}
1329
1330/**
1331 * register_pernet_subsys - register a network namespace subsystem
1332 * @ops: pernet operations structure for the subsystem
1333 *
1334 * Register a subsystem which has init and exit functions
1335 * that are called when network namespaces are created and
1336 * destroyed respectively.
1337 *
1338 * When registered all network namespace init functions are
1339 * called for every existing network namespace. Allowing kernel
1340 * modules to have a race free view of the set of network namespaces.
1341 *
1342 * When a new network namespace is created all of the init
1343 * methods are called in the order in which they were registered.
1344 *
1345 * When a network namespace is destroyed all of the exit methods
1346 * are called in the reverse of the order with which they were
1347 * registered.
1348 */
1349int register_pernet_subsys(struct pernet_operations *ops)
1350{
1351 int error;
1352 down_write(sem: &pernet_ops_rwsem);
1353 error = register_pernet_operations(list: first_device, ops);
1354 up_write(sem: &pernet_ops_rwsem);
1355 return error;
1356}
1357EXPORT_SYMBOL_GPL(register_pernet_subsys);
1358
1359/**
1360 * unregister_pernet_subsys - unregister a network namespace subsystem
1361 * @ops: pernet operations structure to manipulate
1362 *
1363 * Remove the pernet operations structure from the list to be
1364 * used when network namespaces are created or destroyed. In
1365 * addition run the exit method for all existing network
1366 * namespaces.
1367 */
1368void unregister_pernet_subsys(struct pernet_operations *ops)
1369{
1370 down_write(sem: &pernet_ops_rwsem);
1371 unregister_pernet_operations(ops);
1372 up_write(sem: &pernet_ops_rwsem);
1373}
1374EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1375
1376/**
1377 * register_pernet_device - register a network namespace device
1378 * @ops: pernet operations structure for the subsystem
1379 *
1380 * Register a device which has init and exit functions
1381 * that are called when network namespaces are created and
1382 * destroyed respectively.
1383 *
1384 * When registered all network namespace init functions are
1385 * called for every existing network namespace. Allowing kernel
1386 * modules to have a race free view of the set of network namespaces.
1387 *
1388 * When a new network namespace is created all of the init
1389 * methods are called in the order in which they were registered.
1390 *
1391 * When a network namespace is destroyed all of the exit methods
1392 * are called in the reverse of the order with which they were
1393 * registered.
1394 */
1395int register_pernet_device(struct pernet_operations *ops)
1396{
1397 int error;
1398 down_write(sem: &pernet_ops_rwsem);
1399 error = register_pernet_operations(list: &pernet_list, ops);
1400 if (!error && (first_device == &pernet_list))
1401 first_device = &ops->list;
1402 up_write(sem: &pernet_ops_rwsem);
1403 return error;
1404}
1405EXPORT_SYMBOL_GPL(register_pernet_device);
1406
1407/**
1408 * unregister_pernet_device - unregister a network namespace netdevice
1409 * @ops: pernet operations structure to manipulate
1410 *
1411 * Remove the pernet operations structure from the list to be
1412 * used when network namespaces are created or destroyed. In
1413 * addition run the exit method for all existing network
1414 * namespaces.
1415 */
1416void unregister_pernet_device(struct pernet_operations *ops)
1417{
1418 down_write(sem: &pernet_ops_rwsem);
1419 if (&ops->list == first_device)
1420 first_device = first_device->next;
1421 unregister_pernet_operations(ops);
1422 up_write(sem: &pernet_ops_rwsem);
1423}
1424EXPORT_SYMBOL_GPL(unregister_pernet_device);
1425
1426#ifdef CONFIG_NET_NS
1427static struct ns_common *netns_get(struct task_struct *task)
1428{
1429 struct net *net = NULL;
1430 struct nsproxy *nsproxy;
1431
1432 task_lock(p: task);
1433 nsproxy = task->nsproxy;
1434 if (nsproxy)
1435 net = get_net(net: nsproxy->net_ns);
1436 task_unlock(p: task);
1437
1438 return net ? &net->ns : NULL;
1439}
1440
1441static inline struct net *to_net_ns(struct ns_common *ns)
1442{
1443 return container_of(ns, struct net, ns);
1444}
1445
1446static void netns_put(struct ns_common *ns)
1447{
1448 put_net(net: to_net_ns(ns));
1449}
1450
1451static int netns_install(struct nsset *nsset, struct ns_common *ns)
1452{
1453 struct nsproxy *nsproxy = nsset->nsproxy;
1454 struct net *net = to_net_ns(ns);
1455
1456 if (!ns_capable(ns: net->user_ns, CAP_SYS_ADMIN) ||
1457 !ns_capable(ns: nsset->cred->user_ns, CAP_SYS_ADMIN))
1458 return -EPERM;
1459
1460 put_net(net: nsproxy->net_ns);
1461 nsproxy->net_ns = get_net(net);
1462 return 0;
1463}
1464
1465static struct user_namespace *netns_owner(struct ns_common *ns)
1466{
1467 return to_net_ns(ns)->user_ns;
1468}
1469
1470const struct proc_ns_operations netns_operations = {
1471 .name = "net",
1472 .type = CLONE_NEWNET,
1473 .get = netns_get,
1474 .put = netns_put,
1475 .install = netns_install,
1476 .owner = netns_owner,
1477};
1478#endif
1479

source code of linux/net/core/net_namespace.c