1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * |
4 | * Copyright (C) 2011 Novell Inc. |
5 | */ |
6 | |
7 | #include <linux/fs.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/namei.h> |
10 | #include <linux/file.h> |
11 | #include <linux/xattr.h> |
12 | #include <linux/rbtree.h> |
13 | #include <linux/security.h> |
14 | #include <linux/cred.h> |
15 | #include <linux/ratelimit.h> |
16 | #include "overlayfs.h" |
17 | |
18 | struct ovl_cache_entry { |
19 | unsigned int len; |
20 | unsigned int type; |
21 | u64 real_ino; |
22 | u64 ino; |
23 | struct list_head l_node; |
24 | struct rb_node node; |
25 | struct ovl_cache_entry *next_maybe_whiteout; |
26 | bool is_upper; |
27 | bool is_whiteout; |
28 | bool check_xwhiteout; |
29 | char name[]; |
30 | }; |
31 | |
32 | struct ovl_dir_cache { |
33 | long refcount; |
34 | u64 version; |
35 | struct list_head entries; |
36 | struct rb_root root; |
37 | }; |
38 | |
39 | struct ovl_readdir_data { |
40 | struct dir_context ctx; |
41 | struct dentry *dentry; |
42 | bool is_lowest; |
43 | struct rb_root *root; |
44 | struct list_head *list; |
45 | struct list_head middle; |
46 | struct ovl_cache_entry *first_maybe_whiteout; |
47 | int count; |
48 | int err; |
49 | bool is_upper; |
50 | bool d_type_supported; |
51 | bool in_xwhiteouts_dir; |
52 | }; |
53 | |
54 | struct ovl_dir_file { |
55 | bool is_real; |
56 | bool is_upper; |
57 | struct ovl_dir_cache *cache; |
58 | struct list_head *cursor; |
59 | struct file *realfile; |
60 | struct file *upperfile; |
61 | }; |
62 | |
63 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) |
64 | { |
65 | return rb_entry(n, struct ovl_cache_entry, node); |
66 | } |
67 | |
68 | static bool ovl_cache_entry_find_link(const char *name, int len, |
69 | struct rb_node ***link, |
70 | struct rb_node **parent) |
71 | { |
72 | bool found = false; |
73 | struct rb_node **newp = *link; |
74 | |
75 | while (!found && *newp) { |
76 | int cmp; |
77 | struct ovl_cache_entry *tmp; |
78 | |
79 | *parent = *newp; |
80 | tmp = ovl_cache_entry_from_node(n: *newp); |
81 | cmp = strncmp(name, tmp->name, len); |
82 | if (cmp > 0) |
83 | newp = &tmp->node.rb_right; |
84 | else if (cmp < 0 || len < tmp->len) |
85 | newp = &tmp->node.rb_left; |
86 | else |
87 | found = true; |
88 | } |
89 | *link = newp; |
90 | |
91 | return found; |
92 | } |
93 | |
94 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, |
95 | const char *name, int len) |
96 | { |
97 | struct rb_node *node = root->rb_node; |
98 | int cmp; |
99 | |
100 | while (node) { |
101 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(n: node); |
102 | |
103 | cmp = strncmp(name, p->name, len); |
104 | if (cmp > 0) |
105 | node = p->node.rb_right; |
106 | else if (cmp < 0 || len < p->len) |
107 | node = p->node.rb_left; |
108 | else |
109 | return p; |
110 | } |
111 | |
112 | return NULL; |
113 | } |
114 | |
115 | static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, |
116 | struct ovl_cache_entry *p) |
117 | { |
118 | /* Don't care if not doing ovl_iter() */ |
119 | if (!rdd->dentry) |
120 | return false; |
121 | |
122 | /* Always recalc d_ino when remapping lower inode numbers */ |
123 | if (ovl_xino_bits(ofs: OVL_FS(sb: rdd->dentry->d_sb))) |
124 | return true; |
125 | |
126 | /* Always recalc d_ino for parent */ |
127 | if (strcmp(p->name, ".." ) == 0) |
128 | return true; |
129 | |
130 | /* If this is lower, then native d_ino will do */ |
131 | if (!rdd->is_upper) |
132 | return false; |
133 | |
134 | /* |
135 | * Recalc d_ino for '.' and for all entries if dir is impure (contains |
136 | * copied up entries) |
137 | */ |
138 | if ((p->name[0] == '.' && p->len == 1) || |
139 | ovl_test_flag(flag: OVL_IMPURE, inode: d_inode(dentry: rdd->dentry))) |
140 | return true; |
141 | |
142 | return false; |
143 | } |
144 | |
145 | static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, |
146 | const char *name, int len, |
147 | u64 ino, unsigned int d_type) |
148 | { |
149 | struct ovl_cache_entry *p; |
150 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); |
151 | |
152 | p = kmalloc(size, GFP_KERNEL); |
153 | if (!p) |
154 | return NULL; |
155 | |
156 | memcpy(p->name, name, len); |
157 | p->name[len] = '\0'; |
158 | p->len = len; |
159 | p->type = d_type; |
160 | p->real_ino = ino; |
161 | p->ino = ino; |
162 | /* Defer setting d_ino for upper entry to ovl_iterate() */ |
163 | if (ovl_calc_d_ino(rdd, p)) |
164 | p->ino = 0; |
165 | p->is_upper = rdd->is_upper; |
166 | p->is_whiteout = false; |
167 | /* Defer check for overlay.whiteout to ovl_iterate() */ |
168 | p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; |
169 | |
170 | if (d_type == DT_CHR) { |
171 | p->next_maybe_whiteout = rdd->first_maybe_whiteout; |
172 | rdd->first_maybe_whiteout = p; |
173 | } |
174 | return p; |
175 | } |
176 | |
177 | static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, |
178 | const char *name, int len, u64 ino, |
179 | unsigned int d_type) |
180 | { |
181 | struct rb_node **newp = &rdd->root->rb_node; |
182 | struct rb_node *parent = NULL; |
183 | struct ovl_cache_entry *p; |
184 | |
185 | if (ovl_cache_entry_find_link(name, len, link: &newp, parent: &parent)) |
186 | return true; |
187 | |
188 | p = ovl_cache_entry_new(rdd, name, len, ino, d_type); |
189 | if (p == NULL) { |
190 | rdd->err = -ENOMEM; |
191 | return false; |
192 | } |
193 | |
194 | list_add_tail(new: &p->l_node, head: rdd->list); |
195 | rb_link_node(node: &p->node, parent, rb_link: newp); |
196 | rb_insert_color(&p->node, rdd->root); |
197 | |
198 | return true; |
199 | } |
200 | |
201 | static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, |
202 | const char *name, int namelen, |
203 | loff_t offset, u64 ino, unsigned int d_type) |
204 | { |
205 | struct ovl_cache_entry *p; |
206 | |
207 | p = ovl_cache_entry_find(root: rdd->root, name, len: namelen); |
208 | if (p) { |
209 | list_move_tail(list: &p->l_node, head: &rdd->middle); |
210 | } else { |
211 | p = ovl_cache_entry_new(rdd, name, len: namelen, ino, d_type); |
212 | if (p == NULL) |
213 | rdd->err = -ENOMEM; |
214 | else |
215 | list_add_tail(new: &p->l_node, head: &rdd->middle); |
216 | } |
217 | |
218 | return rdd->err == 0; |
219 | } |
220 | |
221 | void ovl_cache_free(struct list_head *list) |
222 | { |
223 | struct ovl_cache_entry *p; |
224 | struct ovl_cache_entry *n; |
225 | |
226 | list_for_each_entry_safe(p, n, list, l_node) |
227 | kfree(objp: p); |
228 | |
229 | INIT_LIST_HEAD(list); |
230 | } |
231 | |
232 | void ovl_dir_cache_free(struct inode *inode) |
233 | { |
234 | struct ovl_dir_cache *cache = ovl_dir_cache(inode); |
235 | |
236 | if (cache) { |
237 | ovl_cache_free(list: &cache->entries); |
238 | kfree(objp: cache); |
239 | } |
240 | } |
241 | |
242 | static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) |
243 | { |
244 | struct ovl_dir_cache *cache = od->cache; |
245 | |
246 | WARN_ON(cache->refcount <= 0); |
247 | cache->refcount--; |
248 | if (!cache->refcount) { |
249 | if (ovl_dir_cache(inode) == cache) |
250 | ovl_set_dir_cache(inode, NULL); |
251 | |
252 | ovl_cache_free(list: &cache->entries); |
253 | kfree(objp: cache); |
254 | } |
255 | } |
256 | |
257 | static bool ovl_fill_merge(struct dir_context *ctx, const char *name, |
258 | int namelen, loff_t offset, u64 ino, |
259 | unsigned int d_type) |
260 | { |
261 | struct ovl_readdir_data *rdd = |
262 | container_of(ctx, struct ovl_readdir_data, ctx); |
263 | |
264 | rdd->count++; |
265 | if (!rdd->is_lowest) |
266 | return ovl_cache_entry_add_rb(rdd, name, len: namelen, ino, d_type); |
267 | else |
268 | return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); |
269 | } |
270 | |
271 | static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) |
272 | { |
273 | int err; |
274 | struct ovl_cache_entry *p; |
275 | struct dentry *dentry, *dir = path->dentry; |
276 | const struct cred *old_cred; |
277 | |
278 | old_cred = ovl_override_creds(sb: rdd->dentry->d_sb); |
279 | |
280 | err = down_write_killable(sem: &dir->d_inode->i_rwsem); |
281 | if (!err) { |
282 | while (rdd->first_maybe_whiteout) { |
283 | p = rdd->first_maybe_whiteout; |
284 | rdd->first_maybe_whiteout = p->next_maybe_whiteout; |
285 | dentry = lookup_one(mnt_idmap(mnt: path->mnt), p->name, dir, p->len); |
286 | if (!IS_ERR(ptr: dentry)) { |
287 | p->is_whiteout = ovl_is_whiteout(dentry); |
288 | dput(dentry); |
289 | } |
290 | } |
291 | inode_unlock(inode: dir->d_inode); |
292 | } |
293 | revert_creds(old_cred); |
294 | |
295 | return err; |
296 | } |
297 | |
298 | static inline int ovl_dir_read(const struct path *realpath, |
299 | struct ovl_readdir_data *rdd) |
300 | { |
301 | struct file *realfile; |
302 | int err; |
303 | |
304 | realfile = ovl_path_open(path: realpath, O_RDONLY | O_LARGEFILE); |
305 | if (IS_ERR(ptr: realfile)) |
306 | return PTR_ERR(ptr: realfile); |
307 | |
308 | rdd->first_maybe_whiteout = NULL; |
309 | rdd->ctx.pos = 0; |
310 | do { |
311 | rdd->count = 0; |
312 | rdd->err = 0; |
313 | err = iterate_dir(realfile, &rdd->ctx); |
314 | if (err >= 0) |
315 | err = rdd->err; |
316 | } while (!err && rdd->count); |
317 | |
318 | if (!err && rdd->first_maybe_whiteout && rdd->dentry) |
319 | err = ovl_check_whiteouts(path: realpath, rdd); |
320 | |
321 | fput(realfile); |
322 | |
323 | return err; |
324 | } |
325 | |
326 | static void ovl_dir_reset(struct file *file) |
327 | { |
328 | struct ovl_dir_file *od = file->private_data; |
329 | struct ovl_dir_cache *cache = od->cache; |
330 | struct inode *inode = file_inode(f: file); |
331 | bool is_real; |
332 | |
333 | if (cache && ovl_inode_version_get(inode) != cache->version) { |
334 | ovl_cache_put(od, inode); |
335 | od->cache = NULL; |
336 | od->cursor = NULL; |
337 | } |
338 | is_real = ovl_dir_is_real(dir: inode); |
339 | if (od->is_real != is_real) { |
340 | /* is_real can only become false when dir is copied up */ |
341 | if (WARN_ON(is_real)) |
342 | return; |
343 | od->is_real = false; |
344 | } |
345 | } |
346 | |
347 | static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, |
348 | struct rb_root *root) |
349 | { |
350 | int err; |
351 | struct path realpath; |
352 | struct ovl_readdir_data rdd = { |
353 | .ctx.actor = ovl_fill_merge, |
354 | .dentry = dentry, |
355 | .list = list, |
356 | .root = root, |
357 | .is_lowest = false, |
358 | }; |
359 | int idx, next; |
360 | const struct ovl_layer *layer; |
361 | |
362 | for (idx = 0; idx != -1; idx = next) { |
363 | next = ovl_path_next(idx, dentry, path: &realpath, layer: &layer); |
364 | rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; |
365 | rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && |
366 | ovl_dentry_has_xwhiteouts(dentry); |
367 | |
368 | if (next != -1) { |
369 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
370 | if (err) |
371 | break; |
372 | } else { |
373 | /* |
374 | * Insert lowest layer entries before upper ones, this |
375 | * allows offsets to be reasonably constant |
376 | */ |
377 | list_add(new: &rdd.middle, head: rdd.list); |
378 | rdd.is_lowest = true; |
379 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
380 | list_del(entry: &rdd.middle); |
381 | } |
382 | } |
383 | return err; |
384 | } |
385 | |
386 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) |
387 | { |
388 | struct list_head *p; |
389 | loff_t off = 0; |
390 | |
391 | list_for_each(p, &od->cache->entries) { |
392 | if (off >= pos) |
393 | break; |
394 | off++; |
395 | } |
396 | /* Cursor is safe since the cache is stable */ |
397 | od->cursor = p; |
398 | } |
399 | |
400 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) |
401 | { |
402 | int res; |
403 | struct ovl_dir_cache *cache; |
404 | struct inode *inode = d_inode(dentry); |
405 | |
406 | cache = ovl_dir_cache(inode); |
407 | if (cache && ovl_inode_version_get(inode) == cache->version) { |
408 | WARN_ON(!cache->refcount); |
409 | cache->refcount++; |
410 | return cache; |
411 | } |
412 | ovl_set_dir_cache(inode: d_inode(dentry), NULL); |
413 | |
414 | cache = kzalloc(size: sizeof(struct ovl_dir_cache), GFP_KERNEL); |
415 | if (!cache) |
416 | return ERR_PTR(error: -ENOMEM); |
417 | |
418 | cache->refcount = 1; |
419 | INIT_LIST_HEAD(list: &cache->entries); |
420 | cache->root = RB_ROOT; |
421 | |
422 | res = ovl_dir_read_merged(dentry, list: &cache->entries, root: &cache->root); |
423 | if (res) { |
424 | ovl_cache_free(list: &cache->entries); |
425 | kfree(objp: cache); |
426 | return ERR_PTR(error: res); |
427 | } |
428 | |
429 | cache->version = ovl_inode_version_get(inode); |
430 | ovl_set_dir_cache(inode, cache); |
431 | |
432 | return cache; |
433 | } |
434 | |
435 | /* Map inode number to lower fs unique range */ |
436 | static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, |
437 | const char *name, int namelen, bool warn) |
438 | { |
439 | unsigned int xinoshift = 64 - xinobits; |
440 | |
441 | if (unlikely(ino >> xinoshift)) { |
442 | if (warn) { |
443 | pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n" , |
444 | namelen, name, ino, xinobits); |
445 | } |
446 | return ino; |
447 | } |
448 | |
449 | /* |
450 | * The lowest xinobit is reserved for mapping the non-peresistent inode |
451 | * numbers range, but this range is only exposed via st_ino, not here. |
452 | */ |
453 | return ino | ((u64)fsid) << (xinoshift + 1); |
454 | } |
455 | |
456 | /* |
457 | * Set d_ino for upper entries if needed. Non-upper entries should always report |
458 | * the uppermost real inode ino and should not call this function. |
459 | * |
460 | * When not all layer are on same fs, report real ino also for upper. |
461 | * |
462 | * When all layers are on the same fs, and upper has a reference to |
463 | * copy up origin, call vfs_getattr() on the overlay entry to make |
464 | * sure that d_ino will be consistent with st_ino from stat(2). |
465 | * |
466 | * Also checks the overlay.whiteout xattr by doing a full lookup which will return |
467 | * negative in this case. |
468 | */ |
469 | static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) |
470 | |
471 | { |
472 | struct dentry *dir = path->dentry; |
473 | struct ovl_fs *ofs = OVL_FS(sb: dir->d_sb); |
474 | struct dentry *this = NULL; |
475 | enum ovl_path_type type; |
476 | u64 ino = p->real_ino; |
477 | int xinobits = ovl_xino_bits(ofs); |
478 | int err = 0; |
479 | |
480 | if (!ovl_same_dev(ofs) && !p->check_xwhiteout) |
481 | goto out; |
482 | |
483 | if (p->name[0] == '.') { |
484 | if (p->len == 1) { |
485 | this = dget(dentry: dir); |
486 | goto get; |
487 | } |
488 | if (p->len == 2 && p->name[1] == '.') { |
489 | /* we shall not be moved */ |
490 | this = dget(dentry: dir->d_parent); |
491 | goto get; |
492 | } |
493 | } |
494 | /* This checks also for xwhiteouts */ |
495 | this = lookup_one(mnt_idmap(mnt: path->mnt), p->name, dir, p->len); |
496 | if (IS_ERR_OR_NULL(ptr: this) || !this->d_inode) { |
497 | /* Mark a stale entry */ |
498 | p->is_whiteout = true; |
499 | if (IS_ERR(ptr: this)) { |
500 | err = PTR_ERR(ptr: this); |
501 | this = NULL; |
502 | goto fail; |
503 | } |
504 | goto out; |
505 | } |
506 | |
507 | get: |
508 | if (!ovl_same_dev(ofs) || !update_ino) |
509 | goto out; |
510 | |
511 | type = ovl_path_type(dentry: this); |
512 | if (OVL_TYPE_ORIGIN(type)) { |
513 | struct kstat stat; |
514 | struct path statpath = *path; |
515 | |
516 | statpath.dentry = this; |
517 | err = vfs_getattr(&statpath, &stat, STATX_INO, 0); |
518 | if (err) |
519 | goto fail; |
520 | |
521 | /* |
522 | * Directory inode is always on overlay st_dev. |
523 | * Non-dir with ovl_same_dev() could be on pseudo st_dev in case |
524 | * of xino bits overflow. |
525 | */ |
526 | WARN_ON_ONCE(S_ISDIR(stat.mode) && |
527 | dir->d_sb->s_dev != stat.dev); |
528 | ino = stat.ino; |
529 | } else if (xinobits && !OVL_TYPE_UPPER(type)) { |
530 | ino = ovl_remap_lower_ino(ino, xinobits, |
531 | fsid: ovl_layer_lower(dentry: this)->fsid, |
532 | name: p->name, namelen: p->len, |
533 | warn: ovl_xino_warn(ofs)); |
534 | } |
535 | |
536 | out: |
537 | p->ino = ino; |
538 | dput(this); |
539 | return err; |
540 | |
541 | fail: |
542 | pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n" , |
543 | p->name, err); |
544 | goto out; |
545 | } |
546 | |
547 | static bool ovl_fill_plain(struct dir_context *ctx, const char *name, |
548 | int namelen, loff_t offset, u64 ino, |
549 | unsigned int d_type) |
550 | { |
551 | struct ovl_cache_entry *p; |
552 | struct ovl_readdir_data *rdd = |
553 | container_of(ctx, struct ovl_readdir_data, ctx); |
554 | |
555 | rdd->count++; |
556 | p = ovl_cache_entry_new(rdd, name, len: namelen, ino, d_type); |
557 | if (p == NULL) { |
558 | rdd->err = -ENOMEM; |
559 | return false; |
560 | } |
561 | list_add_tail(new: &p->l_node, head: rdd->list); |
562 | |
563 | return true; |
564 | } |
565 | |
566 | static int ovl_dir_read_impure(const struct path *path, struct list_head *list, |
567 | struct rb_root *root) |
568 | { |
569 | int err; |
570 | struct path realpath; |
571 | struct ovl_cache_entry *p, *n; |
572 | struct ovl_readdir_data rdd = { |
573 | .ctx.actor = ovl_fill_plain, |
574 | .list = list, |
575 | .root = root, |
576 | }; |
577 | |
578 | INIT_LIST_HEAD(list); |
579 | *root = RB_ROOT; |
580 | ovl_path_upper(dentry: path->dentry, path: &realpath); |
581 | |
582 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
583 | if (err) |
584 | return err; |
585 | |
586 | list_for_each_entry_safe(p, n, list, l_node) { |
587 | if (strcmp(p->name, "." ) != 0 && |
588 | strcmp(p->name, ".." ) != 0) { |
589 | err = ovl_cache_update(path, p, update_ino: true); |
590 | if (err) |
591 | return err; |
592 | } |
593 | if (p->ino == p->real_ino) { |
594 | list_del(entry: &p->l_node); |
595 | kfree(objp: p); |
596 | } else { |
597 | struct rb_node **newp = &root->rb_node; |
598 | struct rb_node *parent = NULL; |
599 | |
600 | if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, |
601 | &newp, &parent))) |
602 | return -EIO; |
603 | |
604 | rb_link_node(node: &p->node, parent, rb_link: newp); |
605 | rb_insert_color(&p->node, root); |
606 | } |
607 | } |
608 | return 0; |
609 | } |
610 | |
611 | static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) |
612 | { |
613 | int res; |
614 | struct dentry *dentry = path->dentry; |
615 | struct inode *inode = d_inode(dentry); |
616 | struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb); |
617 | struct ovl_dir_cache *cache; |
618 | |
619 | cache = ovl_dir_cache(inode); |
620 | if (cache && ovl_inode_version_get(inode) == cache->version) |
621 | return cache; |
622 | |
623 | /* Impure cache is not refcounted, free it here */ |
624 | ovl_dir_cache_free(inode); |
625 | ovl_set_dir_cache(inode, NULL); |
626 | |
627 | cache = kzalloc(size: sizeof(struct ovl_dir_cache), GFP_KERNEL); |
628 | if (!cache) |
629 | return ERR_PTR(error: -ENOMEM); |
630 | |
631 | res = ovl_dir_read_impure(path, list: &cache->entries, root: &cache->root); |
632 | if (res) { |
633 | ovl_cache_free(list: &cache->entries); |
634 | kfree(objp: cache); |
635 | return ERR_PTR(error: res); |
636 | } |
637 | if (list_empty(head: &cache->entries)) { |
638 | /* |
639 | * A good opportunity to get rid of an unneeded "impure" flag. |
640 | * Removing the "impure" xattr is best effort. |
641 | */ |
642 | if (!ovl_want_write(dentry)) { |
643 | ovl_removexattr(ofs, dentry: ovl_dentry_upper(dentry), |
644 | ox: OVL_XATTR_IMPURE); |
645 | ovl_drop_write(dentry); |
646 | } |
647 | ovl_clear_flag(flag: OVL_IMPURE, inode); |
648 | kfree(objp: cache); |
649 | return NULL; |
650 | } |
651 | |
652 | cache->version = ovl_inode_version_get(inode); |
653 | ovl_set_dir_cache(inode, cache); |
654 | |
655 | return cache; |
656 | } |
657 | |
658 | struct ovl_readdir_translate { |
659 | struct dir_context *orig_ctx; |
660 | struct ovl_dir_cache *cache; |
661 | struct dir_context ctx; |
662 | u64 parent_ino; |
663 | int fsid; |
664 | int xinobits; |
665 | bool xinowarn; |
666 | }; |
667 | |
668 | static bool ovl_fill_real(struct dir_context *ctx, const char *name, |
669 | int namelen, loff_t offset, u64 ino, |
670 | unsigned int d_type) |
671 | { |
672 | struct ovl_readdir_translate *rdt = |
673 | container_of(ctx, struct ovl_readdir_translate, ctx); |
674 | struct dir_context *orig_ctx = rdt->orig_ctx; |
675 | |
676 | if (rdt->parent_ino && strcmp(name, ".." ) == 0) { |
677 | ino = rdt->parent_ino; |
678 | } else if (rdt->cache) { |
679 | struct ovl_cache_entry *p; |
680 | |
681 | p = ovl_cache_entry_find(root: &rdt->cache->root, name, len: namelen); |
682 | if (p) |
683 | ino = p->ino; |
684 | } else if (rdt->xinobits) { |
685 | ino = ovl_remap_lower_ino(ino, xinobits: rdt->xinobits, fsid: rdt->fsid, |
686 | name, namelen, warn: rdt->xinowarn); |
687 | } |
688 | |
689 | return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); |
690 | } |
691 | |
692 | static bool ovl_is_impure_dir(struct file *file) |
693 | { |
694 | struct ovl_dir_file *od = file->private_data; |
695 | struct inode *dir = file_inode(f: file); |
696 | |
697 | /* |
698 | * Only upper dir can be impure, but if we are in the middle of |
699 | * iterating a lower real dir, dir could be copied up and marked |
700 | * impure. We only want the impure cache if we started iterating |
701 | * a real upper dir to begin with. |
702 | */ |
703 | return od->is_upper && ovl_test_flag(flag: OVL_IMPURE, inode: dir); |
704 | |
705 | } |
706 | |
707 | static int ovl_iterate_real(struct file *file, struct dir_context *ctx) |
708 | { |
709 | int err; |
710 | struct ovl_dir_file *od = file->private_data; |
711 | struct dentry *dir = file->f_path.dentry; |
712 | struct ovl_fs *ofs = OVL_FS(sb: dir->d_sb); |
713 | const struct ovl_layer *lower_layer = ovl_layer_lower(dentry: dir); |
714 | struct ovl_readdir_translate rdt = { |
715 | .ctx.actor = ovl_fill_real, |
716 | .orig_ctx = ctx, |
717 | .xinobits = ovl_xino_bits(ofs), |
718 | .xinowarn = ovl_xino_warn(ofs), |
719 | }; |
720 | |
721 | if (rdt.xinobits && lower_layer) |
722 | rdt.fsid = lower_layer->fsid; |
723 | |
724 | if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { |
725 | struct kstat stat; |
726 | struct path statpath = file->f_path; |
727 | |
728 | statpath.dentry = dir->d_parent; |
729 | err = vfs_getattr(&statpath, &stat, STATX_INO, 0); |
730 | if (err) |
731 | return err; |
732 | |
733 | WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); |
734 | rdt.parent_ino = stat.ino; |
735 | } |
736 | |
737 | if (ovl_is_impure_dir(file)) { |
738 | rdt.cache = ovl_cache_get_impure(path: &file->f_path); |
739 | if (IS_ERR(ptr: rdt.cache)) |
740 | return PTR_ERR(ptr: rdt.cache); |
741 | } |
742 | |
743 | err = iterate_dir(od->realfile, &rdt.ctx); |
744 | ctx->pos = rdt.ctx.pos; |
745 | |
746 | return err; |
747 | } |
748 | |
749 | |
750 | static int ovl_iterate(struct file *file, struct dir_context *ctx) |
751 | { |
752 | struct ovl_dir_file *od = file->private_data; |
753 | struct dentry *dentry = file->f_path.dentry; |
754 | struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb); |
755 | struct ovl_cache_entry *p; |
756 | const struct cred *old_cred; |
757 | int err; |
758 | |
759 | old_cred = ovl_override_creds(sb: dentry->d_sb); |
760 | if (!ctx->pos) |
761 | ovl_dir_reset(file); |
762 | |
763 | if (od->is_real) { |
764 | /* |
765 | * If parent is merge, then need to adjust d_ino for '..', if |
766 | * dir is impure then need to adjust d_ino for copied up |
767 | * entries. |
768 | */ |
769 | if (ovl_xino_bits(ofs) || |
770 | (ovl_same_fs(ofs) && |
771 | (ovl_is_impure_dir(file) || |
772 | OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { |
773 | err = ovl_iterate_real(file, ctx); |
774 | } else { |
775 | err = iterate_dir(od->realfile, ctx); |
776 | } |
777 | goto out; |
778 | } |
779 | |
780 | if (!od->cache) { |
781 | struct ovl_dir_cache *cache; |
782 | |
783 | cache = ovl_cache_get(dentry); |
784 | err = PTR_ERR(ptr: cache); |
785 | if (IS_ERR(ptr: cache)) |
786 | goto out; |
787 | |
788 | od->cache = cache; |
789 | ovl_seek_cursor(od, pos: ctx->pos); |
790 | } |
791 | |
792 | while (od->cursor != &od->cache->entries) { |
793 | p = list_entry(od->cursor, struct ovl_cache_entry, l_node); |
794 | if (!p->is_whiteout) { |
795 | if (!p->ino || p->check_xwhiteout) { |
796 | err = ovl_cache_update(path: &file->f_path, p, update_ino: !p->ino); |
797 | if (err) |
798 | goto out; |
799 | } |
800 | } |
801 | /* ovl_cache_update() sets is_whiteout on stale entry */ |
802 | if (!p->is_whiteout) { |
803 | if (!dir_emit(ctx, name: p->name, namelen: p->len, ino: p->ino, type: p->type)) |
804 | break; |
805 | } |
806 | od->cursor = p->l_node.next; |
807 | ctx->pos++; |
808 | } |
809 | err = 0; |
810 | out: |
811 | revert_creds(old_cred); |
812 | return err; |
813 | } |
814 | |
815 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) |
816 | { |
817 | loff_t res; |
818 | struct ovl_dir_file *od = file->private_data; |
819 | |
820 | inode_lock(inode: file_inode(f: file)); |
821 | if (!file->f_pos) |
822 | ovl_dir_reset(file); |
823 | |
824 | if (od->is_real) { |
825 | res = vfs_llseek(file: od->realfile, offset, whence: origin); |
826 | file->f_pos = od->realfile->f_pos; |
827 | } else { |
828 | res = -EINVAL; |
829 | |
830 | switch (origin) { |
831 | case SEEK_CUR: |
832 | offset += file->f_pos; |
833 | break; |
834 | case SEEK_SET: |
835 | break; |
836 | default: |
837 | goto out_unlock; |
838 | } |
839 | if (offset < 0) |
840 | goto out_unlock; |
841 | |
842 | if (offset != file->f_pos) { |
843 | file->f_pos = offset; |
844 | if (od->cache) |
845 | ovl_seek_cursor(od, pos: offset); |
846 | } |
847 | res = offset; |
848 | } |
849 | out_unlock: |
850 | inode_unlock(inode: file_inode(f: file)); |
851 | |
852 | return res; |
853 | } |
854 | |
855 | static struct file *ovl_dir_open_realfile(const struct file *file, |
856 | const struct path *realpath) |
857 | { |
858 | struct file *res; |
859 | const struct cred *old_cred; |
860 | |
861 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
862 | res = ovl_path_open(path: realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); |
863 | revert_creds(old_cred); |
864 | |
865 | return res; |
866 | } |
867 | |
868 | /* |
869 | * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. |
870 | * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. |
871 | * |
872 | * TODO: use same abstract type for file->private_data of dir and file so |
873 | * upperfile could also be cached for files as well. |
874 | */ |
875 | struct file *ovl_dir_real_file(const struct file *file, bool want_upper) |
876 | { |
877 | |
878 | struct ovl_dir_file *od = file->private_data; |
879 | struct dentry *dentry = file->f_path.dentry; |
880 | struct file *old, *realfile = od->realfile; |
881 | |
882 | if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) |
883 | return want_upper ? NULL : realfile; |
884 | |
885 | /* |
886 | * Need to check if we started out being a lower dir, but got copied up |
887 | */ |
888 | if (!od->is_upper) { |
889 | realfile = READ_ONCE(od->upperfile); |
890 | if (!realfile) { |
891 | struct path upperpath; |
892 | |
893 | ovl_path_upper(dentry, path: &upperpath); |
894 | realfile = ovl_dir_open_realfile(file, realpath: &upperpath); |
895 | if (IS_ERR(ptr: realfile)) |
896 | return realfile; |
897 | |
898 | old = cmpxchg_release(&od->upperfile, NULL, realfile); |
899 | if (old) { |
900 | fput(realfile); |
901 | realfile = old; |
902 | } |
903 | } |
904 | } |
905 | |
906 | return realfile; |
907 | } |
908 | |
909 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, |
910 | int datasync) |
911 | { |
912 | struct file *realfile; |
913 | int err; |
914 | |
915 | err = ovl_sync_status(ofs: OVL_FS(sb: file_inode(f: file)->i_sb)); |
916 | if (err <= 0) |
917 | return err; |
918 | |
919 | realfile = ovl_dir_real_file(file, want_upper: true); |
920 | err = PTR_ERR_OR_ZERO(ptr: realfile); |
921 | |
922 | /* Nothing to sync for lower */ |
923 | if (!realfile || err) |
924 | return err; |
925 | |
926 | return vfs_fsync_range(file: realfile, start, end, datasync); |
927 | } |
928 | |
929 | static int ovl_dir_release(struct inode *inode, struct file *file) |
930 | { |
931 | struct ovl_dir_file *od = file->private_data; |
932 | |
933 | if (od->cache) { |
934 | inode_lock(inode); |
935 | ovl_cache_put(od, inode); |
936 | inode_unlock(inode); |
937 | } |
938 | fput(od->realfile); |
939 | if (od->upperfile) |
940 | fput(od->upperfile); |
941 | kfree(objp: od); |
942 | |
943 | return 0; |
944 | } |
945 | |
946 | static int ovl_dir_open(struct inode *inode, struct file *file) |
947 | { |
948 | struct path realpath; |
949 | struct file *realfile; |
950 | struct ovl_dir_file *od; |
951 | enum ovl_path_type type; |
952 | |
953 | od = kzalloc(size: sizeof(struct ovl_dir_file), GFP_KERNEL); |
954 | if (!od) |
955 | return -ENOMEM; |
956 | |
957 | type = ovl_path_real(dentry: file->f_path.dentry, path: &realpath); |
958 | realfile = ovl_dir_open_realfile(file, realpath: &realpath); |
959 | if (IS_ERR(ptr: realfile)) { |
960 | kfree(objp: od); |
961 | return PTR_ERR(ptr: realfile); |
962 | } |
963 | od->realfile = realfile; |
964 | od->is_real = ovl_dir_is_real(dir: inode); |
965 | od->is_upper = OVL_TYPE_UPPER(type); |
966 | file->private_data = od; |
967 | |
968 | return 0; |
969 | } |
970 | |
971 | WRAP_DIR_ITER(ovl_iterate) // FIXME! |
972 | const struct file_operations ovl_dir_operations = { |
973 | .read = generic_read_dir, |
974 | .open = ovl_dir_open, |
975 | .iterate_shared = shared_ovl_iterate, |
976 | .llseek = ovl_dir_llseek, |
977 | .fsync = ovl_dir_fsync, |
978 | .release = ovl_dir_release, |
979 | }; |
980 | |
981 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) |
982 | { |
983 | int err; |
984 | struct ovl_cache_entry *p, *n; |
985 | struct rb_root root = RB_ROOT; |
986 | const struct cred *old_cred; |
987 | |
988 | old_cred = ovl_override_creds(sb: dentry->d_sb); |
989 | err = ovl_dir_read_merged(dentry, list, root: &root); |
990 | revert_creds(old_cred); |
991 | if (err) |
992 | return err; |
993 | |
994 | err = 0; |
995 | |
996 | list_for_each_entry_safe(p, n, list, l_node) { |
997 | /* |
998 | * Select whiteouts in upperdir, they should |
999 | * be cleared when deleting this directory. |
1000 | */ |
1001 | if (p->is_whiteout) { |
1002 | if (p->is_upper) |
1003 | continue; |
1004 | goto del_entry; |
1005 | } |
1006 | |
1007 | if (p->name[0] == '.') { |
1008 | if (p->len == 1) |
1009 | goto del_entry; |
1010 | if (p->len == 2 && p->name[1] == '.') |
1011 | goto del_entry; |
1012 | } |
1013 | err = -ENOTEMPTY; |
1014 | break; |
1015 | |
1016 | del_entry: |
1017 | list_del(entry: &p->l_node); |
1018 | kfree(objp: p); |
1019 | } |
1020 | |
1021 | return err; |
1022 | } |
1023 | |
1024 | void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, |
1025 | struct list_head *list) |
1026 | { |
1027 | struct ovl_cache_entry *p; |
1028 | |
1029 | inode_lock_nested(inode: upper->d_inode, subclass: I_MUTEX_CHILD); |
1030 | list_for_each_entry(p, list, l_node) { |
1031 | struct dentry *dentry; |
1032 | |
1033 | if (WARN_ON(!p->is_whiteout || !p->is_upper)) |
1034 | continue; |
1035 | |
1036 | dentry = ovl_lookup_upper(ofs, name: p->name, base: upper, len: p->len); |
1037 | if (IS_ERR(ptr: dentry)) { |
1038 | pr_err("lookup '%s/%.*s' failed (%i)\n" , |
1039 | upper->d_name.name, p->len, p->name, |
1040 | (int) PTR_ERR(dentry)); |
1041 | continue; |
1042 | } |
1043 | if (dentry->d_inode) |
1044 | ovl_cleanup(ofs, dir: upper->d_inode, dentry); |
1045 | dput(dentry); |
1046 | } |
1047 | inode_unlock(inode: upper->d_inode); |
1048 | } |
1049 | |
1050 | static bool ovl_check_d_type(struct dir_context *ctx, const char *name, |
1051 | int namelen, loff_t offset, u64 ino, |
1052 | unsigned int d_type) |
1053 | { |
1054 | struct ovl_readdir_data *rdd = |
1055 | container_of(ctx, struct ovl_readdir_data, ctx); |
1056 | |
1057 | /* Even if d_type is not supported, DT_DIR is returned for . and .. */ |
1058 | if (!strncmp(name, "." , namelen) || !strncmp(name, ".." , namelen)) |
1059 | return true; |
1060 | |
1061 | if (d_type != DT_UNKNOWN) |
1062 | rdd->d_type_supported = true; |
1063 | |
1064 | return true; |
1065 | } |
1066 | |
1067 | /* |
1068 | * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values |
1069 | * if error is encountered. |
1070 | */ |
1071 | int ovl_check_d_type_supported(const struct path *realpath) |
1072 | { |
1073 | int err; |
1074 | struct ovl_readdir_data rdd = { |
1075 | .ctx.actor = ovl_check_d_type, |
1076 | .d_type_supported = false, |
1077 | }; |
1078 | |
1079 | err = ovl_dir_read(realpath, rdd: &rdd); |
1080 | if (err) |
1081 | return err; |
1082 | |
1083 | return rdd.d_type_supported; |
1084 | } |
1085 | |
1086 | #define OVL_INCOMPATDIR_NAME "incompat" |
1087 | |
1088 | static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, |
1089 | int level) |
1090 | { |
1091 | int err; |
1092 | struct inode *dir = path->dentry->d_inode; |
1093 | LIST_HEAD(list); |
1094 | struct ovl_cache_entry *p; |
1095 | struct ovl_readdir_data rdd = { |
1096 | .ctx.actor = ovl_fill_plain, |
1097 | .list = &list, |
1098 | }; |
1099 | bool incompat = false; |
1100 | |
1101 | /* |
1102 | * The "work/incompat" directory is treated specially - if it is not |
1103 | * empty, instead of printing a generic error and mounting read-only, |
1104 | * we will error about incompat features and fail the mount. |
1105 | * |
1106 | * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name |
1107 | * starts with '#'. |
1108 | */ |
1109 | if (level == 2 && |
1110 | !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) |
1111 | incompat = true; |
1112 | |
1113 | err = ovl_dir_read(realpath: path, rdd: &rdd); |
1114 | if (err) |
1115 | goto out; |
1116 | |
1117 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1118 | list_for_each_entry(p, &list, l_node) { |
1119 | struct dentry *dentry; |
1120 | |
1121 | if (p->name[0] == '.') { |
1122 | if (p->len == 1) |
1123 | continue; |
1124 | if (p->len == 2 && p->name[1] == '.') |
1125 | continue; |
1126 | } else if (incompat) { |
1127 | pr_err("overlay with incompat feature '%s' cannot be mounted\n" , |
1128 | p->name); |
1129 | err = -EINVAL; |
1130 | break; |
1131 | } |
1132 | dentry = ovl_lookup_upper(ofs, name: p->name, base: path->dentry, len: p->len); |
1133 | if (IS_ERR(ptr: dentry)) |
1134 | continue; |
1135 | if (dentry->d_inode) |
1136 | err = ovl_workdir_cleanup(ofs, dir, mnt: path->mnt, dentry, level); |
1137 | dput(dentry); |
1138 | if (err) |
1139 | break; |
1140 | } |
1141 | inode_unlock(inode: dir); |
1142 | out: |
1143 | ovl_cache_free(list: &list); |
1144 | return err; |
1145 | } |
1146 | |
1147 | int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, |
1148 | struct vfsmount *mnt, struct dentry *dentry, int level) |
1149 | { |
1150 | int err; |
1151 | |
1152 | if (!d_is_dir(dentry) || level > 1) { |
1153 | return ovl_cleanup(ofs, dir, dentry); |
1154 | } |
1155 | |
1156 | err = ovl_do_rmdir(ofs, dir, dentry); |
1157 | if (err) { |
1158 | struct path path = { .mnt = mnt, .dentry = dentry }; |
1159 | |
1160 | inode_unlock(inode: dir); |
1161 | err = ovl_workdir_cleanup_recurse(ofs, path: &path, level: level + 1); |
1162 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1163 | if (!err) |
1164 | err = ovl_cleanup(ofs, dir, dentry); |
1165 | } |
1166 | |
1167 | return err; |
1168 | } |
1169 | |
1170 | int ovl_indexdir_cleanup(struct ovl_fs *ofs) |
1171 | { |
1172 | int err; |
1173 | struct dentry *indexdir = ofs->workdir; |
1174 | struct dentry *index = NULL; |
1175 | struct inode *dir = indexdir->d_inode; |
1176 | struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; |
1177 | LIST_HEAD(list); |
1178 | struct ovl_cache_entry *p; |
1179 | struct ovl_readdir_data rdd = { |
1180 | .ctx.actor = ovl_fill_plain, |
1181 | .list = &list, |
1182 | }; |
1183 | |
1184 | err = ovl_dir_read(realpath: &path, rdd: &rdd); |
1185 | if (err) |
1186 | goto out; |
1187 | |
1188 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1189 | list_for_each_entry(p, &list, l_node) { |
1190 | if (p->name[0] == '.') { |
1191 | if (p->len == 1) |
1192 | continue; |
1193 | if (p->len == 2 && p->name[1] == '.') |
1194 | continue; |
1195 | } |
1196 | index = ovl_lookup_upper(ofs, name: p->name, base: indexdir, len: p->len); |
1197 | if (IS_ERR(ptr: index)) { |
1198 | err = PTR_ERR(ptr: index); |
1199 | index = NULL; |
1200 | break; |
1201 | } |
1202 | /* Cleanup leftover from index create/cleanup attempt */ |
1203 | if (index->d_name.name[0] == '#') { |
1204 | err = ovl_workdir_cleanup(ofs, dir, mnt: path.mnt, dentry: index, level: 1); |
1205 | if (err) |
1206 | break; |
1207 | goto next; |
1208 | } |
1209 | err = ovl_verify_index(ofs, index); |
1210 | if (!err) { |
1211 | goto next; |
1212 | } else if (err == -ESTALE) { |
1213 | /* Cleanup stale index entries */ |
1214 | err = ovl_cleanup(ofs, dir, dentry: index); |
1215 | } else if (err != -ENOENT) { |
1216 | /* |
1217 | * Abort mount to avoid corrupting the index if |
1218 | * an incompatible index entry was found or on out |
1219 | * of memory. |
1220 | */ |
1221 | break; |
1222 | } else if (ofs->config.nfs_export) { |
1223 | /* |
1224 | * Whiteout orphan index to block future open by |
1225 | * handle after overlay nlink dropped to zero. |
1226 | */ |
1227 | err = ovl_cleanup_and_whiteout(ofs, dir, dentry: index); |
1228 | } else { |
1229 | /* Cleanup orphan index entries */ |
1230 | err = ovl_cleanup(ofs, dir, dentry: index); |
1231 | } |
1232 | |
1233 | if (err) |
1234 | break; |
1235 | |
1236 | next: |
1237 | dput(index); |
1238 | index = NULL; |
1239 | } |
1240 | dput(index); |
1241 | inode_unlock(inode: dir); |
1242 | out: |
1243 | ovl_cache_free(list: &list); |
1244 | if (err) |
1245 | pr_err("failed index dir cleanup (%i)\n" , err); |
1246 | return err; |
1247 | } |
1248 | |