pnode.c source code [linux/fs/pnode.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* linux/fs/pnode.c
4	*
5	* (C) Copyright IBM Corporation 2005.
6	* Author : Ram Pai (linuxram@us.ibm.com)
7	*/
8	#include <linux/mnt_namespace.h>
9	#include <linux/mount.h>
10	#include <linux/fs.h>
11	#include <linux/nsproxy.h>
12	#include <uapi/linux/mount.h>
13	#include "internal.h"
14	#include "pnode.h"
15
16	/ return the next shared peer mount of @p /
17	static inline struct mount next_peer(struct* mount *p)
18	{
19	return list_entry(p->mnt_share.next, struct mount, mnt_share);
20	}
21
22	static inline struct mount first_slave(struct* mount *p)
23	{
24	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25	}
26
27	static inline struct mount last_slave(struct* mount *p)
28	{
29	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30	}
31
32	static inline struct mount next_slave(struct* mount *p)
33	{
34	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
35	}
36
37	static struct mount get_peer_under_root(struct* mount *mnt,
38	struct mnt_namespace *ns,
39	const struct path *root)
40	{
41	struct mount *m = mnt;
42
43	do {
44	/ Check the namespace first for optimization /
45	if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
46	return m;
47
48	m = next_peer(p: m);
49	} while (m != mnt);
50
51	return NULL;
52	}
53
54	/*
55	* Get ID of closest dominating peer group having a representative
56	* under the given root.
57	*
58	* Caller must hold namespace_sem
59	*/
60	int get_dominating_id(struct mount mnt, const* struct path *root)
61	{
62	struct mount *m;
63
64	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
65	struct mount *d = get_peer_under_root(mnt: m, ns: mnt->mnt_ns, root);
66	if (d)
67	return d->mnt_group_id;
68	}
69
70	return `0`;
71	}
72
73	static int do_make_slave(struct mount *mnt)
74	{
75	struct mount master, slave_mnt;
76
77	if (list_empty(head: &mnt->mnt_share)) {
78	if (IS_MNT_SHARED(mnt)) {
79	mnt_release_group_id(mnt);
80	CLEAR_MNT_SHARED(mnt);
81	}
82	master = mnt->mnt_master;
83	if (!master) {
84	struct list_head *p = &mnt->mnt_slave_list;
85	while (!list_empty(head: p)) {
86	slave_mnt = list_first_entry(p,
87	struct mount, mnt_slave);
88	list_del_init(entry: &slave_mnt->mnt_slave);
89	slave_mnt->mnt_master = NULL;
90	}
91	return `0`;
92	}
93	} else {
94	struct mount *m;
95	/*
96	* slave 'mnt' to a peer mount that has the
97	* same root dentry. If none is available then
98	* slave it to anything that is available.
99	*/
100	for (m = master = next_peer(p: mnt); m != mnt; m = next_peer(p: m)) {
101	if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
102	master = m;
103	break;
104	}
105	}
106	list_del_init(entry: &mnt->mnt_share);
107	mnt->mnt_group_id = `0`;
108	CLEAR_MNT_SHARED(mnt);
109	}
110	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
111	slave_mnt->mnt_master = master;
112	list_move(list: &mnt->mnt_slave, head: &master->mnt_slave_list);
113	list_splice(list: &mnt->mnt_slave_list, head: master->mnt_slave_list.prev);
114	INIT_LIST_HEAD(list: &mnt->mnt_slave_list);
115	mnt->mnt_master = master;
116	return `0`;
117	}
118
119	/*
120	* vfsmount lock must be held for write
121	*/
122	void change_mnt_propagation(struct mount mnt, int* type)
123	{
124	if (type == MS_SHARED) {
125	set_mnt_shared(mnt);
126	return;
127	}
128	do_make_slave(mnt);
129	if (type != MS_SLAVE) {
130	list_del_init(entry: &mnt->mnt_slave);
131	mnt->mnt_master = NULL;
132	if (type == MS_UNBINDABLE)
133	mnt->mnt.mnt_flags \|= MNT_UNBINDABLE;
134	else
135	mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
136	}
137	}
138
139	/*
140	* get the next mount in the propagation tree.
141	* @m: the mount seen last
142	* @origin: the original mount from where the tree walk initiated
143	*
144	* Note that peer groups form contiguous segments of slave lists.
145	* We rely on that in get_source() to be able to find out if
146	* vfsmount found while iterating with propagation_next() is
147	* a peer of one we'd found earlier.
148	*/
149	static struct mount propagation_next(struct* mount *m,
150	struct mount *origin)
151	{
152	/ are there any slaves of this mount? /
153	if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list))
154	return first_slave(p: m);
155
156	while (`1`) {
157	struct mount *master = m->mnt_master;
158
159	if (master == origin->mnt_master) {
160	struct mount *next = next_peer(p: m);
161	return (next == origin) ? NULL : next;
162	} else if (m->mnt_slave.next != &master->mnt_slave_list)
163	return next_slave(p: m);
164
165	/ back at master /
166	m = master;
167	}
168	}
169
170	static struct mount skip_propagation_subtree(struct* mount *m,
171	struct mount *origin)
172	{
173	/*
174	* Advance m such that propagation_next will not return
175	* the slaves of m.
176	*/
177	if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list))
178	m = last_slave(p: m);
179
180	return m;
181	}
182
183	static struct mount next_group(struct* mount m, struct* mount *origin)
184	{
185	while (`1`) {
186	while (`1`) {
187	struct mount *next;
188	if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list))
189	return first_slave(p: m);
190	next = next_peer(p: m);
191	if (m->mnt_group_id == origin->mnt_group_id) {
192	if (next == origin)
193	return NULL;
194	} else if (m->mnt_slave.next != &next->mnt_slave)
195	break;
196	m = next;
197	}
198	/ m is the last peer /
199	while (`1`) {
200	struct mount *master = m->mnt_master;
201	if (m->mnt_slave.next != &master->mnt_slave_list)
202	return next_slave(p: m);
203	m = next_peer(p: master);
204	if (master->mnt_group_id == origin->mnt_group_id)
205	break;
206	if (master->mnt_slave.next == &m->mnt_slave)
207	break;
208	m = master;
209	}
210	if (m == origin)
211	return NULL;
212	}
213	}
214
215	/ all accesses are serialized by namespace_sem /
216	static struct mount last_dest, first_source, last_source, dest_master;
217	static struct hlist_head *list;
218
219	static inline bool peers(const struct mount m1, const* struct mount *m2)
220	{
221	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
222	}
223
224	static int propagate_one(struct mount m, struct* mountpoint *dest_mp)
225	{
226	struct mount *child;
227	int type;
228	/ skip ones added by this propagate_mnt() /
229	if (IS_MNT_NEW(m))
230	return `0`;
231	/ skip if mountpoint isn't covered by it /
232	if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
233	return `0`;
234	if (peers(m1: m, m2: last_dest)) {
235	type = CL_MAKE_SHARED;
236	} else {
237	struct mount n, p;
238	bool done;
239	for (n = m; ; n = p) {
240	p = n->mnt_master;
241	if (p == dest_master \|\| IS_MNT_MARKED(p))
242	break;
243	}
244	do {
245	struct mount *parent = last_source->mnt_parent;
246	if (peers(m1: last_source, m2: first_source))
247	break;
248	done = parent->mnt_master == p;
249	if (done && peers(m1: n, m2: parent))
250	break;
251	last_source = last_source->mnt_master;
252	} while (!done);
253
254	type = CL_SLAVE;
255	/ beginning of peer group among the slaves? /
256	if (IS_MNT_SHARED(m))
257	type \|= CL_MAKE_SHARED;
258	}
259
260	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
261	if (IS_ERR(ptr: child))
262	return PTR_ERR(ptr: child);
263	read_seqlock_excl(sl: &mount_lock);
264	mnt_set_mountpoint(m, dest_mp, child);
265	if (m->mnt_master != dest_master)
266	SET_MNT_MARK(m->mnt_master);
267	read_sequnlock_excl(sl: &mount_lock);
268	last_dest = m;
269	last_source = child;
270	hlist_add_head(n: &child->mnt_hash, h: list);
271	return count_mounts(ns: m->mnt_ns, mnt: child);
272	}
273
274	/*
275	* mount 'source_mnt' under the destination 'dest_mnt' at
276	* dentry 'dest_dentry'. And propagate that mount to
277	* all the peer and slave mounts of 'dest_mnt'.
278	* Link all the new mounts into a propagation tree headed at
279	* source_mnt. Also link all the new mounts using ->mnt_list
280	* headed at source_mnt's ->mnt_list
281	*
282	* @dest_mnt: destination mount.
283	* @dest_dentry: destination dentry.
284	* @source_mnt: source mount.
285	* @tree_list : list of heads of trees to be attached.
286	*/
287	int propagate_mnt(struct mount dest_mnt, struct* mountpoint *dest_mp,
288	struct mount source_mnt, struct* hlist_head *tree_list)
289	{
290	struct mount m, n;
291	int ret = `0`;
292
293	/*
294	* we don't want to bother passing tons of arguments to
295	* propagate_one(); everything is serialized by namespace_sem,
296	* so globals will do just fine.
297	*/
298	last_dest = dest_mnt;
299	first_source = source_mnt;
300	last_source = source_mnt;
301	list = tree_list;
302	dest_master = dest_mnt->mnt_master;
303
304	/ all peers of dest_mnt, except dest_mnt itself /
305	for (n = next_peer(p: dest_mnt); n != dest_mnt; n = next_peer(p: n)) {
306	ret = propagate_one(m: n, dest_mp);
307	if (ret)
308	goto out;
309	}
310
311	/ all slave groups /
312	for (m = next_group(m: dest_mnt, origin: dest_mnt); m;
313	m = next_group(m, origin: dest_mnt)) {
314	/ everything in that slave group /
315	n = m;
316	do {
317	ret = propagate_one(m: n, dest_mp);
318	if (ret)
319	goto out;
320	n = next_peer(p: n);
321	} while (n != m);
322	}
323	out:
324	read_seqlock_excl(sl: &mount_lock);
325	hlist_for_each_entry(n, tree_list, mnt_hash) {
326	m = n->mnt_parent;
327	if (m->mnt_master != dest_mnt->mnt_master)
328	CLEAR_MNT_MARK(m->mnt_master);
329	}
330	read_sequnlock_excl(sl: &mount_lock);
331	return ret;
332	}
333
334	static struct mount find_topper(struct* mount *mnt)
335	{
336	/ If there is exactly one mount covering mnt completely return it. /
337	struct mount *child;
338
339	if (!list_is_singular(head: &mnt->mnt_mounts))
340	return NULL;
341
342	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
343	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
344	return NULL;
345
346	return child;
347	}
348
349	/*
350	* return true if the refcount is greater than count
351	*/
352	static inline int do_refcount_check(struct mount mnt, int* count)
353	{
354	return mnt_get_count(mnt) > count;
355	}
356
357	/**
358	* propagation_would_overmount - check whether propagation from @from
359	* would overmount @to
360	* @from: shared mount
361	* @to: mount to check
362	* @mp: future mountpoint of @to on @from
363	*
364	* If @from propagates mounts to @to, @from and @to must either be peers
365	* or one of the masters in the hierarchy of masters of @to must be a
366	* peer of @from.
367	*
368	* If the root of the @to mount is equal to the future mountpoint @mp of
369	* the @to mount on @from then @to will be overmounted by whatever is
370	* propagated to it.
371	*
372	* Context: This function expects namespace_lock() to be held and that
373	* @mp is stable.
374	* Return: If @from overmounts @to, true is returned, false if not.
375	*/
376	bool propagation_would_overmount(const struct mount *from,
377	const struct mount *to,
378	const struct mountpoint *mp)
379	{
380	if (!IS_MNT_SHARED(from))
381	return false;
382
383	if (IS_MNT_NEW(to))
384	return false;
385
386	if (to->mnt.mnt_root != mp->m_dentry)
387	return false;
388
389	for (const struct mount *m = to; m; m = m->mnt_master) {
390	if (peers(m1: from, m2: m))
391	return true;
392	}
393
394	return false;
395	}
396
397	/*
398	* check if the mount 'mnt' can be unmounted successfully.
399	* @mnt: the mount to be checked for unmount
400	* NOTE: unmounting 'mnt' would naturally propagate to all
401	* other mounts its parent propagates to.
402	* Check if any of these mounts that do not have submounts
403	* have more references than 'refcnt'. If so return busy.
404	*
405	* vfsmount lock must be held for write
406	*/
407	int propagate_mount_busy(struct mount mnt, int* refcnt)
408	{
409	struct mount m, child, *topper;
410	struct mount *parent = mnt->mnt_parent;
411
412	if (mnt == parent)
413	return do_refcount_check(mnt, count: refcnt);
414
415	/*
416	* quickly check if the current mount can be unmounted.
417	* If not, we don't have to go checking for all other
418	* mounts
419	*/
420	if (!list_empty(head: &mnt->mnt_mounts) \|\| do_refcount_check(mnt, count: refcnt))
421	return `1`;
422
423	for (m = propagation_next(m: parent, origin: parent); m;
424	m = propagation_next(m, origin: parent)) {
425	int count = `1`;
426	child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
427	if (!child)
428	continue;
429
430	/ Is there exactly one mount on the child that covers*
431	* it completely whose reference should be ignored?
432	*/
433	topper = find_topper(mnt: child);
434	if (topper)
435	count += `1`;
436	else if (!list_empty(head: &child->mnt_mounts))
437	continue;
438
439	if (do_refcount_check(mnt: child, count))
440	return `1`;
441	}
442	return `0`;
443	}
444
445	/*
446	* Clear MNT_LOCKED when it can be shown to be safe.
447	*
448	* mount_lock lock must be held for write
449	*/
450	void propagate_mount_unlock(struct mount *mnt)
451	{
452	struct mount *parent = mnt->mnt_parent;
453	struct mount m, child;
454
455	BUG_ON(parent == mnt);
456
457	for (m = propagation_next(m: parent, origin: parent); m;
458	m = propagation_next(m, origin: parent)) {
459	child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
460	if (child)
461	child->mnt.mnt_flags &= ~MNT_LOCKED;
462	}
463	}
464
465	static void umount_one(struct mount mnt, struct* list_head *to_umount)
466	{
467	CLEAR_MNT_MARK(mnt);
468	mnt->mnt.mnt_flags \|= MNT_UMOUNT;
469	list_del_init(entry: &mnt->mnt_child);
470	list_del_init(entry: &mnt->mnt_umounting);
471	list_move_tail(list: &mnt->mnt_list, head: to_umount);
472	}
473
474	/*
475	* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
476	* parent propagates to.
477	*/
478	static bool __propagate_umount(struct mount *mnt,
479	struct list_head *to_umount,
480	struct list_head *to_restore)
481	{
482	bool progress = false;
483	struct mount *child;
484
485	/*
486	* The state of the parent won't change if this mount is
487	* already unmounted or marked as without children.
488	*/
489	if (mnt->mnt.mnt_flags & (MNT_UMOUNT \| MNT_MARKED))
490	goto out;
491
492	/ Verify topper is the only grandchild that has not been*
493	* speculatively unmounted.
494	*/
495	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
496	if (child->mnt_mountpoint == mnt->mnt.mnt_root)
497	continue;
498	if (!list_empty(head: &child->mnt_umounting) && IS_MNT_MARKED(child))
499	continue;
500	/ Found a mounted child /
501	goto children;
502	}
503
504	/ Mark mounts that can be unmounted if not locked /
505	SET_MNT_MARK(mnt);
506	progress = true;
507
508	/ If a mount is without children and not locked umount it. /
509	if (!IS_MNT_LOCKED(mnt)) {
510	umount_one(mnt, to_umount);
511	} else {
512	children:
513	list_move_tail(list: &mnt->mnt_umounting, head: to_restore);
514	}
515	out:
516	return progress;
517	}
518
519	static void umount_list(struct list_head *to_umount,
520	struct list_head *to_restore)
521	{
522	struct mount mnt, child, *tmp;
523	list_for_each_entry(mnt, to_umount, mnt_list) {
524	list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
525	/ topper? /
526	if (child->mnt_mountpoint == mnt->mnt.mnt_root)
527	list_move_tail(list: &child->mnt_umounting, head: to_restore);
528	else
529	umount_one(mnt: child, to_umount);
530	}
531	}
532	}
533
534	static void restore_mounts(struct list_head *to_restore)
535	{
536	/ Restore mounts to a clean working state /
537	while (!list_empty(head: to_restore)) {
538	struct mount mnt, parent;
539	struct mountpoint *mp;
540
541	mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
542	CLEAR_MNT_MARK(mnt);
543	list_del_init(entry: &mnt->mnt_umounting);
544
545	/ Should this mount be reparented? /
546	mp = mnt->mnt_mp;
547	parent = mnt->mnt_parent;
548	while (parent->mnt.mnt_flags & MNT_UMOUNT) {
549	mp = parent->mnt_mp;
550	parent = parent->mnt_parent;
551	}
552	if (parent != mnt->mnt_parent)
553	mnt_change_mountpoint(parent, mp, mnt);
554	}
555	}
556
557	static void cleanup_umount_visitations(struct list_head *visited)
558	{
559	while (!list_empty(head: visited)) {
560	struct mount *mnt =
561	list_first_entry(visited, struct mount, mnt_umounting);
562	list_del_init(entry: &mnt->mnt_umounting);
563	}
564	}
565
566	/*
567	* collect all mounts that receive propagation from the mount in @list,
568	* and return these additional mounts in the same list.
569	* @list: the list of mounts to be unmounted.
570	*
571	* vfsmount lock must be held for write
572	*/
573	int propagate_umount(struct list_head *list)
574	{
575	struct mount *mnt;
576	LIST_HEAD(to_restore);
577	LIST_HEAD(to_umount);
578	LIST_HEAD(visited);
579
580	/ Find candidates for unmounting /
581	list_for_each_entry_reverse(mnt, list, mnt_list) {
582	struct mount *parent = mnt->mnt_parent;
583	struct mount *m;
584
585	/*
586	* If this mount has already been visited it is known that it's
587	* entire peer group and all of their slaves in the propagation
588	* tree for the mountpoint has already been visited and there is
589	* no need to visit them again.
590	*/
591	if (!list_empty(head: &mnt->mnt_umounting))
592	continue;
593
594	list_add_tail(new: &mnt->mnt_umounting, head: &visited);
595	for (m = propagation_next(m: parent, origin: parent); m;
596	m = propagation_next(m, origin: parent)) {
597	struct mount *child = __lookup_mnt(&m->mnt,
598	mnt->mnt_mountpoint);
599	if (!child)
600	continue;
601
602	if (!list_empty(head: &child->mnt_umounting)) {
603	/*
604	* If the child has already been visited it is
605	* know that it's entire peer group and all of
606	* their slaves in the propgation tree for the
607	* mountpoint has already been visited and there
608	* is no need to visit this subtree again.
609	*/
610	m = skip_propagation_subtree(m, origin: parent);
611	continue;
612	} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
613	/*
614	* We have come accross an partially unmounted
615	* mount in list that has not been visited yet.
616	* Remember it has been visited and continue
617	* about our merry way.
618	*/
619	list_add_tail(new: &child->mnt_umounting, head: &visited);
620	continue;
621	}
622
623	/ Check the child and parents while progress is made /
624	while (__propagate_umount(mnt: child,
625	to_umount: &to_umount, to_restore: &to_restore)) {
626	/ Is the parent a umount candidate? /
627	child = child->mnt_parent;
628	if (list_empty(head: &child->mnt_umounting))
629	break;
630	}
631	}
632	}
633
634	umount_list(to_umount: &to_umount, to_restore: &to_restore);
635	restore_mounts(to_restore: &to_restore);
636	cleanup_umount_visitations(visited: &visited);
637	list_splice_tail(list: &to_umount, head: list);
638
639	return `0`;
640	}
641

source code of linux/fs/pnode.c