1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/fs/pnode.c |
4 | * |
5 | * (C) Copyright IBM Corporation 2005. |
6 | * Author : Ram Pai (linuxram@us.ibm.com) |
7 | */ |
8 | #include <linux/mnt_namespace.h> |
9 | #include <linux/mount.h> |
10 | #include <linux/fs.h> |
11 | #include <linux/nsproxy.h> |
12 | #include <uapi/linux/mount.h> |
13 | #include "internal.h" |
14 | #include "pnode.h" |
15 | |
16 | /* return the next shared peer mount of @p */ |
17 | static inline struct mount *next_peer(struct mount *p) |
18 | { |
19 | return list_entry(p->mnt_share.next, struct mount, mnt_share); |
20 | } |
21 | |
22 | static inline struct mount *first_slave(struct mount *p) |
23 | { |
24 | return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); |
25 | } |
26 | |
27 | static inline struct mount *last_slave(struct mount *p) |
28 | { |
29 | return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); |
30 | } |
31 | |
32 | static inline struct mount *next_slave(struct mount *p) |
33 | { |
34 | return list_entry(p->mnt_slave.next, struct mount, mnt_slave); |
35 | } |
36 | |
37 | static struct mount *get_peer_under_root(struct mount *mnt, |
38 | struct mnt_namespace *ns, |
39 | const struct path *root) |
40 | { |
41 | struct mount *m = mnt; |
42 | |
43 | do { |
44 | /* Check the namespace first for optimization */ |
45 | if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root)) |
46 | return m; |
47 | |
48 | m = next_peer(p: m); |
49 | } while (m != mnt); |
50 | |
51 | return NULL; |
52 | } |
53 | |
54 | /* |
55 | * Get ID of closest dominating peer group having a representative |
56 | * under the given root. |
57 | * |
58 | * Caller must hold namespace_sem |
59 | */ |
60 | int get_dominating_id(struct mount *mnt, const struct path *root) |
61 | { |
62 | struct mount *m; |
63 | |
64 | for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { |
65 | struct mount *d = get_peer_under_root(mnt: m, ns: mnt->mnt_ns, root); |
66 | if (d) |
67 | return d->mnt_group_id; |
68 | } |
69 | |
70 | return 0; |
71 | } |
72 | |
73 | static int do_make_slave(struct mount *mnt) |
74 | { |
75 | struct mount *master, *slave_mnt; |
76 | |
77 | if (list_empty(head: &mnt->mnt_share)) { |
78 | if (IS_MNT_SHARED(mnt)) { |
79 | mnt_release_group_id(mnt); |
80 | CLEAR_MNT_SHARED(mnt); |
81 | } |
82 | master = mnt->mnt_master; |
83 | if (!master) { |
84 | struct list_head *p = &mnt->mnt_slave_list; |
85 | while (!list_empty(head: p)) { |
86 | slave_mnt = list_first_entry(p, |
87 | struct mount, mnt_slave); |
88 | list_del_init(entry: &slave_mnt->mnt_slave); |
89 | slave_mnt->mnt_master = NULL; |
90 | } |
91 | return 0; |
92 | } |
93 | } else { |
94 | struct mount *m; |
95 | /* |
96 | * slave 'mnt' to a peer mount that has the |
97 | * same root dentry. If none is available then |
98 | * slave it to anything that is available. |
99 | */ |
100 | for (m = master = next_peer(p: mnt); m != mnt; m = next_peer(p: m)) { |
101 | if (m->mnt.mnt_root == mnt->mnt.mnt_root) { |
102 | master = m; |
103 | break; |
104 | } |
105 | } |
106 | list_del_init(entry: &mnt->mnt_share); |
107 | mnt->mnt_group_id = 0; |
108 | CLEAR_MNT_SHARED(mnt); |
109 | } |
110 | list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) |
111 | slave_mnt->mnt_master = master; |
112 | list_move(list: &mnt->mnt_slave, head: &master->mnt_slave_list); |
113 | list_splice(list: &mnt->mnt_slave_list, head: master->mnt_slave_list.prev); |
114 | INIT_LIST_HEAD(list: &mnt->mnt_slave_list); |
115 | mnt->mnt_master = master; |
116 | return 0; |
117 | } |
118 | |
119 | /* |
120 | * vfsmount lock must be held for write |
121 | */ |
122 | void change_mnt_propagation(struct mount *mnt, int type) |
123 | { |
124 | if (type == MS_SHARED) { |
125 | set_mnt_shared(mnt); |
126 | return; |
127 | } |
128 | do_make_slave(mnt); |
129 | if (type != MS_SLAVE) { |
130 | list_del_init(entry: &mnt->mnt_slave); |
131 | mnt->mnt_master = NULL; |
132 | if (type == MS_UNBINDABLE) |
133 | mnt->mnt.mnt_flags |= MNT_UNBINDABLE; |
134 | else |
135 | mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE; |
136 | } |
137 | } |
138 | |
139 | /* |
140 | * get the next mount in the propagation tree. |
141 | * @m: the mount seen last |
142 | * @origin: the original mount from where the tree walk initiated |
143 | * |
144 | * Note that peer groups form contiguous segments of slave lists. |
145 | * We rely on that in get_source() to be able to find out if |
146 | * vfsmount found while iterating with propagation_next() is |
147 | * a peer of one we'd found earlier. |
148 | */ |
149 | static struct mount *propagation_next(struct mount *m, |
150 | struct mount *origin) |
151 | { |
152 | /* are there any slaves of this mount? */ |
153 | if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list)) |
154 | return first_slave(p: m); |
155 | |
156 | while (1) { |
157 | struct mount *master = m->mnt_master; |
158 | |
159 | if (master == origin->mnt_master) { |
160 | struct mount *next = next_peer(p: m); |
161 | return (next == origin) ? NULL : next; |
162 | } else if (m->mnt_slave.next != &master->mnt_slave_list) |
163 | return next_slave(p: m); |
164 | |
165 | /* back at master */ |
166 | m = master; |
167 | } |
168 | } |
169 | |
170 | static struct mount *skip_propagation_subtree(struct mount *m, |
171 | struct mount *origin) |
172 | { |
173 | /* |
174 | * Advance m such that propagation_next will not return |
175 | * the slaves of m. |
176 | */ |
177 | if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list)) |
178 | m = last_slave(p: m); |
179 | |
180 | return m; |
181 | } |
182 | |
183 | static struct mount *next_group(struct mount *m, struct mount *origin) |
184 | { |
185 | while (1) { |
186 | while (1) { |
187 | struct mount *next; |
188 | if (!IS_MNT_NEW(m) && !list_empty(head: &m->mnt_slave_list)) |
189 | return first_slave(p: m); |
190 | next = next_peer(p: m); |
191 | if (m->mnt_group_id == origin->mnt_group_id) { |
192 | if (next == origin) |
193 | return NULL; |
194 | } else if (m->mnt_slave.next != &next->mnt_slave) |
195 | break; |
196 | m = next; |
197 | } |
198 | /* m is the last peer */ |
199 | while (1) { |
200 | struct mount *master = m->mnt_master; |
201 | if (m->mnt_slave.next != &master->mnt_slave_list) |
202 | return next_slave(p: m); |
203 | m = next_peer(p: master); |
204 | if (master->mnt_group_id == origin->mnt_group_id) |
205 | break; |
206 | if (master->mnt_slave.next == &m->mnt_slave) |
207 | break; |
208 | m = master; |
209 | } |
210 | if (m == origin) |
211 | return NULL; |
212 | } |
213 | } |
214 | |
215 | /* all accesses are serialized by namespace_sem */ |
216 | static struct mount *last_dest, *first_source, *last_source, *dest_master; |
217 | static struct hlist_head *list; |
218 | |
219 | static inline bool peers(const struct mount *m1, const struct mount *m2) |
220 | { |
221 | return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; |
222 | } |
223 | |
224 | static int propagate_one(struct mount *m, struct mountpoint *dest_mp) |
225 | { |
226 | struct mount *child; |
227 | int type; |
228 | /* skip ones added by this propagate_mnt() */ |
229 | if (IS_MNT_NEW(m)) |
230 | return 0; |
231 | /* skip if mountpoint isn't covered by it */ |
232 | if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) |
233 | return 0; |
234 | if (peers(m1: m, m2: last_dest)) { |
235 | type = CL_MAKE_SHARED; |
236 | } else { |
237 | struct mount *n, *p; |
238 | bool done; |
239 | for (n = m; ; n = p) { |
240 | p = n->mnt_master; |
241 | if (p == dest_master || IS_MNT_MARKED(p)) |
242 | break; |
243 | } |
244 | do { |
245 | struct mount *parent = last_source->mnt_parent; |
246 | if (peers(m1: last_source, m2: first_source)) |
247 | break; |
248 | done = parent->mnt_master == p; |
249 | if (done && peers(m1: n, m2: parent)) |
250 | break; |
251 | last_source = last_source->mnt_master; |
252 | } while (!done); |
253 | |
254 | type = CL_SLAVE; |
255 | /* beginning of peer group among the slaves? */ |
256 | if (IS_MNT_SHARED(m)) |
257 | type |= CL_MAKE_SHARED; |
258 | } |
259 | |
260 | child = copy_tree(last_source, last_source->mnt.mnt_root, type); |
261 | if (IS_ERR(ptr: child)) |
262 | return PTR_ERR(ptr: child); |
263 | read_seqlock_excl(sl: &mount_lock); |
264 | mnt_set_mountpoint(m, dest_mp, child); |
265 | if (m->mnt_master != dest_master) |
266 | SET_MNT_MARK(m->mnt_master); |
267 | read_sequnlock_excl(sl: &mount_lock); |
268 | last_dest = m; |
269 | last_source = child; |
270 | hlist_add_head(n: &child->mnt_hash, h: list); |
271 | return count_mounts(ns: m->mnt_ns, mnt: child); |
272 | } |
273 | |
274 | /* |
275 | * mount 'source_mnt' under the destination 'dest_mnt' at |
276 | * dentry 'dest_dentry'. And propagate that mount to |
277 | * all the peer and slave mounts of 'dest_mnt'. |
278 | * Link all the new mounts into a propagation tree headed at |
279 | * source_mnt. Also link all the new mounts using ->mnt_list |
280 | * headed at source_mnt's ->mnt_list |
281 | * |
282 | * @dest_mnt: destination mount. |
283 | * @dest_dentry: destination dentry. |
284 | * @source_mnt: source mount. |
285 | * @tree_list : list of heads of trees to be attached. |
286 | */ |
287 | int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, |
288 | struct mount *source_mnt, struct hlist_head *tree_list) |
289 | { |
290 | struct mount *m, *n; |
291 | int ret = 0; |
292 | |
293 | /* |
294 | * we don't want to bother passing tons of arguments to |
295 | * propagate_one(); everything is serialized by namespace_sem, |
296 | * so globals will do just fine. |
297 | */ |
298 | last_dest = dest_mnt; |
299 | first_source = source_mnt; |
300 | last_source = source_mnt; |
301 | list = tree_list; |
302 | dest_master = dest_mnt->mnt_master; |
303 | |
304 | /* all peers of dest_mnt, except dest_mnt itself */ |
305 | for (n = next_peer(p: dest_mnt); n != dest_mnt; n = next_peer(p: n)) { |
306 | ret = propagate_one(m: n, dest_mp); |
307 | if (ret) |
308 | goto out; |
309 | } |
310 | |
311 | /* all slave groups */ |
312 | for (m = next_group(m: dest_mnt, origin: dest_mnt); m; |
313 | m = next_group(m, origin: dest_mnt)) { |
314 | /* everything in that slave group */ |
315 | n = m; |
316 | do { |
317 | ret = propagate_one(m: n, dest_mp); |
318 | if (ret) |
319 | goto out; |
320 | n = next_peer(p: n); |
321 | } while (n != m); |
322 | } |
323 | out: |
324 | read_seqlock_excl(sl: &mount_lock); |
325 | hlist_for_each_entry(n, tree_list, mnt_hash) { |
326 | m = n->mnt_parent; |
327 | if (m->mnt_master != dest_mnt->mnt_master) |
328 | CLEAR_MNT_MARK(m->mnt_master); |
329 | } |
330 | read_sequnlock_excl(sl: &mount_lock); |
331 | return ret; |
332 | } |
333 | |
334 | static struct mount *find_topper(struct mount *mnt) |
335 | { |
336 | /* If there is exactly one mount covering mnt completely return it. */ |
337 | struct mount *child; |
338 | |
339 | if (!list_is_singular(head: &mnt->mnt_mounts)) |
340 | return NULL; |
341 | |
342 | child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child); |
343 | if (child->mnt_mountpoint != mnt->mnt.mnt_root) |
344 | return NULL; |
345 | |
346 | return child; |
347 | } |
348 | |
349 | /* |
350 | * return true if the refcount is greater than count |
351 | */ |
352 | static inline int do_refcount_check(struct mount *mnt, int count) |
353 | { |
354 | return mnt_get_count(mnt) > count; |
355 | } |
356 | |
357 | /** |
358 | * propagation_would_overmount - check whether propagation from @from |
359 | * would overmount @to |
360 | * @from: shared mount |
361 | * @to: mount to check |
362 | * @mp: future mountpoint of @to on @from |
363 | * |
364 | * If @from propagates mounts to @to, @from and @to must either be peers |
365 | * or one of the masters in the hierarchy of masters of @to must be a |
366 | * peer of @from. |
367 | * |
368 | * If the root of the @to mount is equal to the future mountpoint @mp of |
369 | * the @to mount on @from then @to will be overmounted by whatever is |
370 | * propagated to it. |
371 | * |
372 | * Context: This function expects namespace_lock() to be held and that |
373 | * @mp is stable. |
374 | * Return: If @from overmounts @to, true is returned, false if not. |
375 | */ |
376 | bool propagation_would_overmount(const struct mount *from, |
377 | const struct mount *to, |
378 | const struct mountpoint *mp) |
379 | { |
380 | if (!IS_MNT_SHARED(from)) |
381 | return false; |
382 | |
383 | if (IS_MNT_NEW(to)) |
384 | return false; |
385 | |
386 | if (to->mnt.mnt_root != mp->m_dentry) |
387 | return false; |
388 | |
389 | for (const struct mount *m = to; m; m = m->mnt_master) { |
390 | if (peers(m1: from, m2: m)) |
391 | return true; |
392 | } |
393 | |
394 | return false; |
395 | } |
396 | |
397 | /* |
398 | * check if the mount 'mnt' can be unmounted successfully. |
399 | * @mnt: the mount to be checked for unmount |
400 | * NOTE: unmounting 'mnt' would naturally propagate to all |
401 | * other mounts its parent propagates to. |
402 | * Check if any of these mounts that **do not have submounts** |
403 | * have more references than 'refcnt'. If so return busy. |
404 | * |
405 | * vfsmount lock must be held for write |
406 | */ |
407 | int propagate_mount_busy(struct mount *mnt, int refcnt) |
408 | { |
409 | struct mount *m, *child, *topper; |
410 | struct mount *parent = mnt->mnt_parent; |
411 | |
412 | if (mnt == parent) |
413 | return do_refcount_check(mnt, count: refcnt); |
414 | |
415 | /* |
416 | * quickly check if the current mount can be unmounted. |
417 | * If not, we don't have to go checking for all other |
418 | * mounts |
419 | */ |
420 | if (!list_empty(head: &mnt->mnt_mounts) || do_refcount_check(mnt, count: refcnt)) |
421 | return 1; |
422 | |
423 | for (m = propagation_next(m: parent, origin: parent); m; |
424 | m = propagation_next(m, origin: parent)) { |
425 | int count = 1; |
426 | child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); |
427 | if (!child) |
428 | continue; |
429 | |
430 | /* Is there exactly one mount on the child that covers |
431 | * it completely whose reference should be ignored? |
432 | */ |
433 | topper = find_topper(mnt: child); |
434 | if (topper) |
435 | count += 1; |
436 | else if (!list_empty(head: &child->mnt_mounts)) |
437 | continue; |
438 | |
439 | if (do_refcount_check(mnt: child, count)) |
440 | return 1; |
441 | } |
442 | return 0; |
443 | } |
444 | |
445 | /* |
446 | * Clear MNT_LOCKED when it can be shown to be safe. |
447 | * |
448 | * mount_lock lock must be held for write |
449 | */ |
450 | void propagate_mount_unlock(struct mount *mnt) |
451 | { |
452 | struct mount *parent = mnt->mnt_parent; |
453 | struct mount *m, *child; |
454 | |
455 | BUG_ON(parent == mnt); |
456 | |
457 | for (m = propagation_next(m: parent, origin: parent); m; |
458 | m = propagation_next(m, origin: parent)) { |
459 | child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); |
460 | if (child) |
461 | child->mnt.mnt_flags &= ~MNT_LOCKED; |
462 | } |
463 | } |
464 | |
465 | static void umount_one(struct mount *mnt, struct list_head *to_umount) |
466 | { |
467 | CLEAR_MNT_MARK(mnt); |
468 | mnt->mnt.mnt_flags |= MNT_UMOUNT; |
469 | list_del_init(entry: &mnt->mnt_child); |
470 | list_del_init(entry: &mnt->mnt_umounting); |
471 | list_move_tail(list: &mnt->mnt_list, head: to_umount); |
472 | } |
473 | |
474 | /* |
475 | * NOTE: unmounting 'mnt' naturally propagates to all other mounts its |
476 | * parent propagates to. |
477 | */ |
478 | static bool __propagate_umount(struct mount *mnt, |
479 | struct list_head *to_umount, |
480 | struct list_head *to_restore) |
481 | { |
482 | bool progress = false; |
483 | struct mount *child; |
484 | |
485 | /* |
486 | * The state of the parent won't change if this mount is |
487 | * already unmounted or marked as without children. |
488 | */ |
489 | if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) |
490 | goto out; |
491 | |
492 | /* Verify topper is the only grandchild that has not been |
493 | * speculatively unmounted. |
494 | */ |
495 | list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { |
496 | if (child->mnt_mountpoint == mnt->mnt.mnt_root) |
497 | continue; |
498 | if (!list_empty(head: &child->mnt_umounting) && IS_MNT_MARKED(child)) |
499 | continue; |
500 | /* Found a mounted child */ |
501 | goto children; |
502 | } |
503 | |
504 | /* Mark mounts that can be unmounted if not locked */ |
505 | SET_MNT_MARK(mnt); |
506 | progress = true; |
507 | |
508 | /* If a mount is without children and not locked umount it. */ |
509 | if (!IS_MNT_LOCKED(mnt)) { |
510 | umount_one(mnt, to_umount); |
511 | } else { |
512 | children: |
513 | list_move_tail(list: &mnt->mnt_umounting, head: to_restore); |
514 | } |
515 | out: |
516 | return progress; |
517 | } |
518 | |
519 | static void umount_list(struct list_head *to_umount, |
520 | struct list_head *to_restore) |
521 | { |
522 | struct mount *mnt, *child, *tmp; |
523 | list_for_each_entry(mnt, to_umount, mnt_list) { |
524 | list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { |
525 | /* topper? */ |
526 | if (child->mnt_mountpoint == mnt->mnt.mnt_root) |
527 | list_move_tail(list: &child->mnt_umounting, head: to_restore); |
528 | else |
529 | umount_one(mnt: child, to_umount); |
530 | } |
531 | } |
532 | } |
533 | |
534 | static void restore_mounts(struct list_head *to_restore) |
535 | { |
536 | /* Restore mounts to a clean working state */ |
537 | while (!list_empty(head: to_restore)) { |
538 | struct mount *mnt, *parent; |
539 | struct mountpoint *mp; |
540 | |
541 | mnt = list_first_entry(to_restore, struct mount, mnt_umounting); |
542 | CLEAR_MNT_MARK(mnt); |
543 | list_del_init(entry: &mnt->mnt_umounting); |
544 | |
545 | /* Should this mount be reparented? */ |
546 | mp = mnt->mnt_mp; |
547 | parent = mnt->mnt_parent; |
548 | while (parent->mnt.mnt_flags & MNT_UMOUNT) { |
549 | mp = parent->mnt_mp; |
550 | parent = parent->mnt_parent; |
551 | } |
552 | if (parent != mnt->mnt_parent) |
553 | mnt_change_mountpoint(parent, mp, mnt); |
554 | } |
555 | } |
556 | |
557 | static void cleanup_umount_visitations(struct list_head *visited) |
558 | { |
559 | while (!list_empty(head: visited)) { |
560 | struct mount *mnt = |
561 | list_first_entry(visited, struct mount, mnt_umounting); |
562 | list_del_init(entry: &mnt->mnt_umounting); |
563 | } |
564 | } |
565 | |
566 | /* |
567 | * collect all mounts that receive propagation from the mount in @list, |
568 | * and return these additional mounts in the same list. |
569 | * @list: the list of mounts to be unmounted. |
570 | * |
571 | * vfsmount lock must be held for write |
572 | */ |
573 | int propagate_umount(struct list_head *list) |
574 | { |
575 | struct mount *mnt; |
576 | LIST_HEAD(to_restore); |
577 | LIST_HEAD(to_umount); |
578 | LIST_HEAD(visited); |
579 | |
580 | /* Find candidates for unmounting */ |
581 | list_for_each_entry_reverse(mnt, list, mnt_list) { |
582 | struct mount *parent = mnt->mnt_parent; |
583 | struct mount *m; |
584 | |
585 | /* |
586 | * If this mount has already been visited it is known that it's |
587 | * entire peer group and all of their slaves in the propagation |
588 | * tree for the mountpoint has already been visited and there is |
589 | * no need to visit them again. |
590 | */ |
591 | if (!list_empty(head: &mnt->mnt_umounting)) |
592 | continue; |
593 | |
594 | list_add_tail(new: &mnt->mnt_umounting, head: &visited); |
595 | for (m = propagation_next(m: parent, origin: parent); m; |
596 | m = propagation_next(m, origin: parent)) { |
597 | struct mount *child = __lookup_mnt(&m->mnt, |
598 | mnt->mnt_mountpoint); |
599 | if (!child) |
600 | continue; |
601 | |
602 | if (!list_empty(head: &child->mnt_umounting)) { |
603 | /* |
604 | * If the child has already been visited it is |
605 | * know that it's entire peer group and all of |
606 | * their slaves in the propgation tree for the |
607 | * mountpoint has already been visited and there |
608 | * is no need to visit this subtree again. |
609 | */ |
610 | m = skip_propagation_subtree(m, origin: parent); |
611 | continue; |
612 | } else if (child->mnt.mnt_flags & MNT_UMOUNT) { |
613 | /* |
614 | * We have come accross an partially unmounted |
615 | * mount in list that has not been visited yet. |
616 | * Remember it has been visited and continue |
617 | * about our merry way. |
618 | */ |
619 | list_add_tail(new: &child->mnt_umounting, head: &visited); |
620 | continue; |
621 | } |
622 | |
623 | /* Check the child and parents while progress is made */ |
624 | while (__propagate_umount(mnt: child, |
625 | to_umount: &to_umount, to_restore: &to_restore)) { |
626 | /* Is the parent a umount candidate? */ |
627 | child = child->mnt_parent; |
628 | if (list_empty(head: &child->mnt_umounting)) |
629 | break; |
630 | } |
631 | } |
632 | } |
633 | |
634 | umount_list(to_umount: &to_umount, to_restore: &to_restore); |
635 | restore_mounts(to_restore: &to_restore); |
636 | cleanup_umount_visitations(visited: &visited); |
637 | list_splice_tail(list: &to_umount, head: list); |
638 | |
639 | return 0; |
640 | } |
641 | |