1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2017 Red Hat, Inc. |
4 | */ |
5 | |
6 | #include <linux/cred.h> |
7 | #include <linux/file.h> |
8 | #include <linux/mount.h> |
9 | #include <linux/xattr.h> |
10 | #include <linux/uio.h> |
11 | #include <linux/uaccess.h> |
12 | #include <linux/security.h> |
13 | #include <linux/fs.h> |
14 | #include <linux/backing-file.h> |
15 | #include "overlayfs.h" |
16 | |
17 | static char ovl_whatisit(struct inode *inode, struct inode *realinode) |
18 | { |
19 | if (realinode != ovl_inode_upper(inode)) |
20 | return 'l'; |
21 | if (ovl_has_upperdata(inode)) |
22 | return 'u'; |
23 | else |
24 | return 'm'; |
25 | } |
26 | |
27 | /* No atime modification on underlying */ |
28 | #define OVL_OPEN_FLAGS (O_NOATIME) |
29 | |
30 | static struct file *ovl_open_realfile(const struct file *file, |
31 | const struct path *realpath) |
32 | { |
33 | struct inode *realinode = d_inode(dentry: realpath->dentry); |
34 | struct inode *inode = file_inode(f: file); |
35 | struct mnt_idmap *real_idmap; |
36 | struct file *realfile; |
37 | const struct cred *old_cred; |
38 | int flags = file->f_flags | OVL_OPEN_FLAGS; |
39 | int acc_mode = ACC_MODE(flags); |
40 | int err; |
41 | |
42 | if (flags & O_APPEND) |
43 | acc_mode |= MAY_APPEND; |
44 | |
45 | old_cred = ovl_override_creds(sb: inode->i_sb); |
46 | real_idmap = mnt_idmap(mnt: realpath->mnt); |
47 | err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); |
48 | if (err) { |
49 | realfile = ERR_PTR(error: err); |
50 | } else { |
51 | if (!inode_owner_or_capable(idmap: real_idmap, inode: realinode)) |
52 | flags &= ~O_NOATIME; |
53 | |
54 | realfile = backing_file_open(user_path: &file->f_path, flags, real_path: realpath, |
55 | current_cred()); |
56 | } |
57 | revert_creds(old_cred); |
58 | |
59 | pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n" , |
60 | file, file, ovl_whatisit(inode, realinode), file->f_flags, |
61 | realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); |
62 | |
63 | return realfile; |
64 | } |
65 | |
66 | #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) |
67 | |
68 | static int ovl_change_flags(struct file *file, unsigned int flags) |
69 | { |
70 | struct inode *inode = file_inode(f: file); |
71 | int err; |
72 | |
73 | flags &= OVL_SETFL_MASK; |
74 | |
75 | if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) |
76 | return -EPERM; |
77 | |
78 | if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) |
79 | return -EINVAL; |
80 | |
81 | if (file->f_op->check_flags) { |
82 | err = file->f_op->check_flags(flags); |
83 | if (err) |
84 | return err; |
85 | } |
86 | |
87 | spin_lock(lock: &file->f_lock); |
88 | file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; |
89 | file->f_iocb_flags = iocb_flags(file); |
90 | spin_unlock(lock: &file->f_lock); |
91 | |
92 | return 0; |
93 | } |
94 | |
95 | static int ovl_real_fdget_meta(const struct file *file, struct fd *real, |
96 | bool allow_meta) |
97 | { |
98 | struct dentry *dentry = file_dentry(file); |
99 | struct path realpath; |
100 | int err; |
101 | |
102 | real->flags = 0; |
103 | real->file = file->private_data; |
104 | |
105 | if (allow_meta) { |
106 | ovl_path_real(dentry, path: &realpath); |
107 | } else { |
108 | /* lazy lookup and verify of lowerdata */ |
109 | err = ovl_verify_lowerdata(dentry); |
110 | if (err) |
111 | return err; |
112 | |
113 | ovl_path_realdata(dentry, path: &realpath); |
114 | } |
115 | if (!realpath.dentry) |
116 | return -EIO; |
117 | |
118 | /* Has it been copied up since we'd opened it? */ |
119 | if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) { |
120 | real->flags = FDPUT_FPUT; |
121 | real->file = ovl_open_realfile(file, realpath: &realpath); |
122 | |
123 | return PTR_ERR_OR_ZERO(ptr: real->file); |
124 | } |
125 | |
126 | /* Did the flags change since open? */ |
127 | if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) |
128 | return ovl_change_flags(file: real->file, flags: file->f_flags); |
129 | |
130 | return 0; |
131 | } |
132 | |
133 | static int ovl_real_fdget(const struct file *file, struct fd *real) |
134 | { |
135 | if (d_is_dir(dentry: file_dentry(file))) { |
136 | real->flags = 0; |
137 | real->file = ovl_dir_real_file(file, want_upper: false); |
138 | |
139 | return PTR_ERR_OR_ZERO(ptr: real->file); |
140 | } |
141 | |
142 | return ovl_real_fdget_meta(file, real, allow_meta: false); |
143 | } |
144 | |
145 | static int ovl_open(struct inode *inode, struct file *file) |
146 | { |
147 | struct dentry *dentry = file_dentry(file); |
148 | struct file *realfile; |
149 | struct path realpath; |
150 | int err; |
151 | |
152 | /* lazy lookup and verify lowerdata */ |
153 | err = ovl_verify_lowerdata(dentry); |
154 | if (err) |
155 | return err; |
156 | |
157 | err = ovl_maybe_copy_up(dentry, flags: file->f_flags); |
158 | if (err) |
159 | return err; |
160 | |
161 | /* No longer need these flags, so don't pass them on to underlying fs */ |
162 | file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
163 | |
164 | ovl_path_realdata(dentry, path: &realpath); |
165 | if (!realpath.dentry) |
166 | return -EIO; |
167 | |
168 | realfile = ovl_open_realfile(file, realpath: &realpath); |
169 | if (IS_ERR(ptr: realfile)) |
170 | return PTR_ERR(ptr: realfile); |
171 | |
172 | file->private_data = realfile; |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | static int ovl_release(struct inode *inode, struct file *file) |
178 | { |
179 | fput(file->private_data); |
180 | |
181 | return 0; |
182 | } |
183 | |
184 | static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) |
185 | { |
186 | struct inode *inode = file_inode(f: file); |
187 | struct fd real; |
188 | const struct cred *old_cred; |
189 | loff_t ret; |
190 | |
191 | /* |
192 | * The two special cases below do not need to involve real fs, |
193 | * so we can optimizing concurrent callers. |
194 | */ |
195 | if (offset == 0) { |
196 | if (whence == SEEK_CUR) |
197 | return file->f_pos; |
198 | |
199 | if (whence == SEEK_SET) |
200 | return vfs_setpos(file, offset: 0, maxsize: 0); |
201 | } |
202 | |
203 | ret = ovl_real_fdget(file, real: &real); |
204 | if (ret) |
205 | return ret; |
206 | |
207 | /* |
208 | * Overlay file f_pos is the master copy that is preserved |
209 | * through copy up and modified on read/write, but only real |
210 | * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose |
211 | * limitations that are more strict than ->s_maxbytes for specific |
212 | * files, so we use the real file to perform seeks. |
213 | */ |
214 | ovl_inode_lock(inode); |
215 | real.file->f_pos = file->f_pos; |
216 | |
217 | old_cred = ovl_override_creds(sb: inode->i_sb); |
218 | ret = vfs_llseek(file: real.file, offset, whence); |
219 | revert_creds(old_cred); |
220 | |
221 | file->f_pos = real.file->f_pos; |
222 | ovl_inode_unlock(inode); |
223 | |
224 | fdput(fd: real); |
225 | |
226 | return ret; |
227 | } |
228 | |
229 | static void ovl_file_modified(struct file *file) |
230 | { |
231 | /* Update size/mtime */ |
232 | ovl_copyattr(to: file_inode(f: file)); |
233 | } |
234 | |
235 | static void ovl_file_accessed(struct file *file) |
236 | { |
237 | struct inode *inode, *upperinode; |
238 | struct timespec64 ctime, uctime; |
239 | struct timespec64 mtime, umtime; |
240 | |
241 | if (file->f_flags & O_NOATIME) |
242 | return; |
243 | |
244 | inode = file_inode(f: file); |
245 | upperinode = ovl_inode_upper(inode); |
246 | |
247 | if (!upperinode) |
248 | return; |
249 | |
250 | ctime = inode_get_ctime(inode); |
251 | uctime = inode_get_ctime(inode: upperinode); |
252 | mtime = inode_get_mtime(inode); |
253 | umtime = inode_get_mtime(inode: upperinode); |
254 | if ((!timespec64_equal(a: &mtime, b: &umtime)) || |
255 | !timespec64_equal(a: &ctime, b: &uctime)) { |
256 | inode_set_mtime_to_ts(inode, ts: inode_get_mtime(inode: upperinode)); |
257 | inode_set_ctime_to_ts(inode, ts: uctime); |
258 | } |
259 | |
260 | touch_atime(&file->f_path); |
261 | } |
262 | |
263 | static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
264 | { |
265 | struct file *file = iocb->ki_filp; |
266 | struct fd real; |
267 | ssize_t ret; |
268 | struct backing_file_ctx ctx = { |
269 | .cred = ovl_creds(sb: file_inode(f: file)->i_sb), |
270 | .user_file = file, |
271 | .accessed = ovl_file_accessed, |
272 | }; |
273 | |
274 | if (!iov_iter_count(i: iter)) |
275 | return 0; |
276 | |
277 | ret = ovl_real_fdget(file, real: &real); |
278 | if (ret) |
279 | return ret; |
280 | |
281 | ret = backing_file_read_iter(file: real.file, iter, iocb, flags: iocb->ki_flags, |
282 | ctx: &ctx); |
283 | fdput(fd: real); |
284 | |
285 | return ret; |
286 | } |
287 | |
288 | static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) |
289 | { |
290 | struct file *file = iocb->ki_filp; |
291 | struct inode *inode = file_inode(f: file); |
292 | struct fd real; |
293 | ssize_t ret; |
294 | int ifl = iocb->ki_flags; |
295 | struct backing_file_ctx ctx = { |
296 | .cred = ovl_creds(sb: inode->i_sb), |
297 | .user_file = file, |
298 | .end_write = ovl_file_modified, |
299 | }; |
300 | |
301 | if (!iov_iter_count(i: iter)) |
302 | return 0; |
303 | |
304 | inode_lock(inode); |
305 | /* Update mode */ |
306 | ovl_copyattr(to: inode); |
307 | |
308 | ret = ovl_real_fdget(file, real: &real); |
309 | if (ret) |
310 | goto out_unlock; |
311 | |
312 | if (!ovl_should_sync(ofs: OVL_FS(sb: inode->i_sb))) |
313 | ifl &= ~(IOCB_DSYNC | IOCB_SYNC); |
314 | |
315 | /* |
316 | * Overlayfs doesn't support deferred completions, don't copy |
317 | * this property in case it is set by the issuer. |
318 | */ |
319 | ifl &= ~IOCB_DIO_CALLER_COMP; |
320 | ret = backing_file_write_iter(file: real.file, iter, iocb, flags: ifl, ctx: &ctx); |
321 | fdput(fd: real); |
322 | |
323 | out_unlock: |
324 | inode_unlock(inode); |
325 | |
326 | return ret; |
327 | } |
328 | |
329 | static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, |
330 | struct pipe_inode_info *pipe, size_t len, |
331 | unsigned int flags) |
332 | { |
333 | struct fd real; |
334 | ssize_t ret; |
335 | struct backing_file_ctx ctx = { |
336 | .cred = ovl_creds(sb: file_inode(f: in)->i_sb), |
337 | .user_file = in, |
338 | .accessed = ovl_file_accessed, |
339 | }; |
340 | |
341 | ret = ovl_real_fdget(file: in, real: &real); |
342 | if (ret) |
343 | return ret; |
344 | |
345 | ret = backing_file_splice_read(in: real.file, ppos, pipe, len, flags, ctx: &ctx); |
346 | fdput(fd: real); |
347 | |
348 | return ret; |
349 | } |
350 | |
351 | /* |
352 | * Calling iter_file_splice_write() directly from overlay's f_op may deadlock |
353 | * due to lock order inversion between pipe->mutex in iter_file_splice_write() |
354 | * and file_start_write(real.file) in ovl_write_iter(). |
355 | * |
356 | * So do everything ovl_write_iter() does and call iter_file_splice_write() on |
357 | * the real file. |
358 | */ |
359 | static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, |
360 | loff_t *ppos, size_t len, unsigned int flags) |
361 | { |
362 | struct fd real; |
363 | struct inode *inode = file_inode(f: out); |
364 | ssize_t ret; |
365 | struct backing_file_ctx ctx = { |
366 | .cred = ovl_creds(sb: inode->i_sb), |
367 | .user_file = out, |
368 | .end_write = ovl_file_modified, |
369 | }; |
370 | |
371 | inode_lock(inode); |
372 | /* Update mode */ |
373 | ovl_copyattr(to: inode); |
374 | |
375 | ret = ovl_real_fdget(file: out, real: &real); |
376 | if (ret) |
377 | goto out_unlock; |
378 | |
379 | ret = backing_file_splice_write(pipe, out: real.file, ppos, len, flags, ctx: &ctx); |
380 | fdput(fd: real); |
381 | |
382 | out_unlock: |
383 | inode_unlock(inode); |
384 | |
385 | return ret; |
386 | } |
387 | |
388 | static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
389 | { |
390 | struct fd real; |
391 | const struct cred *old_cred; |
392 | int ret; |
393 | |
394 | ret = ovl_sync_status(ofs: OVL_FS(sb: file_inode(f: file)->i_sb)); |
395 | if (ret <= 0) |
396 | return ret; |
397 | |
398 | ret = ovl_real_fdget_meta(file, real: &real, allow_meta: !datasync); |
399 | if (ret) |
400 | return ret; |
401 | |
402 | /* Don't sync lower file for fear of receiving EROFS error */ |
403 | if (file_inode(f: real.file) == ovl_inode_upper(inode: file_inode(f: file))) { |
404 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
405 | ret = vfs_fsync_range(file: real.file, start, end, datasync); |
406 | revert_creds(old_cred); |
407 | } |
408 | |
409 | fdput(fd: real); |
410 | |
411 | return ret; |
412 | } |
413 | |
414 | static int ovl_mmap(struct file *file, struct vm_area_struct *vma) |
415 | { |
416 | struct file *realfile = file->private_data; |
417 | struct backing_file_ctx ctx = { |
418 | .cred = ovl_creds(sb: file_inode(f: file)->i_sb), |
419 | .user_file = file, |
420 | .accessed = ovl_file_accessed, |
421 | }; |
422 | |
423 | return backing_file_mmap(file: realfile, vma, ctx: &ctx); |
424 | } |
425 | |
426 | static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
427 | { |
428 | struct inode *inode = file_inode(f: file); |
429 | struct fd real; |
430 | const struct cred *old_cred; |
431 | int ret; |
432 | |
433 | inode_lock(inode); |
434 | /* Update mode */ |
435 | ovl_copyattr(to: inode); |
436 | ret = file_remove_privs(file); |
437 | if (ret) |
438 | goto out_unlock; |
439 | |
440 | ret = ovl_real_fdget(file, real: &real); |
441 | if (ret) |
442 | goto out_unlock; |
443 | |
444 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
445 | ret = vfs_fallocate(file: real.file, mode, offset, len); |
446 | revert_creds(old_cred); |
447 | |
448 | /* Update size */ |
449 | ovl_file_modified(file); |
450 | |
451 | fdput(fd: real); |
452 | |
453 | out_unlock: |
454 | inode_unlock(inode); |
455 | |
456 | return ret; |
457 | } |
458 | |
459 | static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) |
460 | { |
461 | struct fd real; |
462 | const struct cred *old_cred; |
463 | int ret; |
464 | |
465 | ret = ovl_real_fdget(file, real: &real); |
466 | if (ret) |
467 | return ret; |
468 | |
469 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
470 | ret = vfs_fadvise(file: real.file, offset, len, advice); |
471 | revert_creds(old_cred); |
472 | |
473 | fdput(fd: real); |
474 | |
475 | return ret; |
476 | } |
477 | |
478 | enum ovl_copyop { |
479 | OVL_COPY, |
480 | OVL_CLONE, |
481 | OVL_DEDUPE, |
482 | }; |
483 | |
484 | static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, |
485 | struct file *file_out, loff_t pos_out, |
486 | loff_t len, unsigned int flags, enum ovl_copyop op) |
487 | { |
488 | struct inode *inode_out = file_inode(f: file_out); |
489 | struct fd real_in, real_out; |
490 | const struct cred *old_cred; |
491 | loff_t ret; |
492 | |
493 | inode_lock(inode: inode_out); |
494 | if (op != OVL_DEDUPE) { |
495 | /* Update mode */ |
496 | ovl_copyattr(to: inode_out); |
497 | ret = file_remove_privs(file_out); |
498 | if (ret) |
499 | goto out_unlock; |
500 | } |
501 | |
502 | ret = ovl_real_fdget(file: file_out, real: &real_out); |
503 | if (ret) |
504 | goto out_unlock; |
505 | |
506 | ret = ovl_real_fdget(file: file_in, real: &real_in); |
507 | if (ret) { |
508 | fdput(fd: real_out); |
509 | goto out_unlock; |
510 | } |
511 | |
512 | old_cred = ovl_override_creds(sb: file_inode(f: file_out)->i_sb); |
513 | switch (op) { |
514 | case OVL_COPY: |
515 | ret = vfs_copy_file_range(real_in.file, pos_in, |
516 | real_out.file, pos_out, len, flags); |
517 | break; |
518 | |
519 | case OVL_CLONE: |
520 | ret = vfs_clone_file_range(file_in: real_in.file, pos_in, |
521 | file_out: real_out.file, pos_out, len, remap_flags: flags); |
522 | break; |
523 | |
524 | case OVL_DEDUPE: |
525 | ret = vfs_dedupe_file_range_one(src_file: real_in.file, src_pos: pos_in, |
526 | dst_file: real_out.file, dst_pos: pos_out, len, |
527 | remap_flags: flags); |
528 | break; |
529 | } |
530 | revert_creds(old_cred); |
531 | |
532 | /* Update size */ |
533 | ovl_file_modified(file: file_out); |
534 | |
535 | fdput(fd: real_in); |
536 | fdput(fd: real_out); |
537 | |
538 | out_unlock: |
539 | inode_unlock(inode: inode_out); |
540 | |
541 | return ret; |
542 | } |
543 | |
544 | static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, |
545 | struct file *file_out, loff_t pos_out, |
546 | size_t len, unsigned int flags) |
547 | { |
548 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, |
549 | op: OVL_COPY); |
550 | } |
551 | |
552 | static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, |
553 | struct file *file_out, loff_t pos_out, |
554 | loff_t len, unsigned int remap_flags) |
555 | { |
556 | enum ovl_copyop op; |
557 | |
558 | if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) |
559 | return -EINVAL; |
560 | |
561 | if (remap_flags & REMAP_FILE_DEDUP) |
562 | op = OVL_DEDUPE; |
563 | else |
564 | op = OVL_CLONE; |
565 | |
566 | /* |
567 | * Don't copy up because of a dedupe request, this wouldn't make sense |
568 | * most of the time (data would be duplicated instead of deduplicated). |
569 | */ |
570 | if (op == OVL_DEDUPE && |
571 | (!ovl_inode_upper(inode: file_inode(f: file_in)) || |
572 | !ovl_inode_upper(inode: file_inode(f: file_out)))) |
573 | return -EPERM; |
574 | |
575 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, |
576 | flags: remap_flags, op); |
577 | } |
578 | |
579 | static int ovl_flush(struct file *file, fl_owner_t id) |
580 | { |
581 | struct fd real; |
582 | const struct cred *old_cred; |
583 | int err; |
584 | |
585 | err = ovl_real_fdget(file, real: &real); |
586 | if (err) |
587 | return err; |
588 | |
589 | if (real.file->f_op->flush) { |
590 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
591 | err = real.file->f_op->flush(real.file, id); |
592 | revert_creds(old_cred); |
593 | } |
594 | fdput(fd: real); |
595 | |
596 | return err; |
597 | } |
598 | |
599 | const struct file_operations ovl_file_operations = { |
600 | .open = ovl_open, |
601 | .release = ovl_release, |
602 | .llseek = ovl_llseek, |
603 | .read_iter = ovl_read_iter, |
604 | .write_iter = ovl_write_iter, |
605 | .fsync = ovl_fsync, |
606 | .mmap = ovl_mmap, |
607 | .fallocate = ovl_fallocate, |
608 | .fadvise = ovl_fadvise, |
609 | .flush = ovl_flush, |
610 | .splice_read = ovl_splice_read, |
611 | .splice_write = ovl_splice_write, |
612 | |
613 | .copy_file_range = ovl_copy_file_range, |
614 | .remap_file_range = ovl_remap_file_range, |
615 | }; |
616 | |