1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_inode.h" |
14 | #include "xfs_acl.h" |
15 | #include "xfs_quota.h" |
16 | #include "xfs_da_format.h" |
17 | #include "xfs_da_btree.h" |
18 | #include "xfs_attr.h" |
19 | #include "xfs_trans.h" |
20 | #include "xfs_trace.h" |
21 | #include "xfs_icache.h" |
22 | #include "xfs_symlink.h" |
23 | #include "xfs_dir2.h" |
24 | #include "xfs_iomap.h" |
25 | #include "xfs_error.h" |
26 | #include "xfs_ioctl.h" |
27 | #include "xfs_xattr.h" |
28 | |
29 | #include <linux/posix_acl.h> |
30 | #include <linux/security.h> |
31 | #include <linux/iversion.h> |
32 | #include <linux/fiemap.h> |
33 | |
34 | /* |
35 | * Directories have different lock order w.r.t. mmap_lock compared to regular |
36 | * files. This is due to readdir potentially triggering page faults on a user |
37 | * buffer inside filldir(), and this happens with the ilock on the directory |
38 | * held. For regular files, the lock order is the other way around - the |
39 | * mmap_lock is taken during the page fault, and then we lock the ilock to do |
40 | * block mapping. Hence we need a different class for the directory ilock so |
41 | * that lockdep can tell them apart. |
42 | */ |
43 | static struct lock_class_key xfs_nondir_ilock_class; |
44 | static struct lock_class_key xfs_dir_ilock_class; |
45 | |
46 | static int |
47 | xfs_initxattrs( |
48 | struct inode *inode, |
49 | const struct xattr *xattr_array, |
50 | void *fs_info) |
51 | { |
52 | const struct xattr *xattr; |
53 | struct xfs_inode *ip = XFS_I(inode); |
54 | int error = 0; |
55 | |
56 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
57 | struct xfs_da_args args = { |
58 | .dp = ip, |
59 | .attr_filter = XFS_ATTR_SECURE, |
60 | .name = xattr->name, |
61 | .namelen = strlen(xattr->name), |
62 | .value = xattr->value, |
63 | .valuelen = xattr->value_len, |
64 | }; |
65 | error = xfs_attr_change(args: &args); |
66 | if (error < 0) |
67 | break; |
68 | } |
69 | return error; |
70 | } |
71 | |
72 | /* |
73 | * Hook in SELinux. This is not quite correct yet, what we really need |
74 | * here (as we do for default ACLs) is a mechanism by which creation of |
75 | * these attrs can be journalled at inode creation time (along with the |
76 | * inode, of course, such that log replay can't cause these to be lost). |
77 | */ |
78 | int |
79 | xfs_inode_init_security( |
80 | struct inode *inode, |
81 | struct inode *dir, |
82 | const struct qstr *qstr) |
83 | { |
84 | return security_inode_init_security(inode, dir, qstr, |
85 | initxattrs: &xfs_initxattrs, NULL); |
86 | } |
87 | |
88 | static void |
89 | xfs_dentry_to_name( |
90 | struct xfs_name *namep, |
91 | struct dentry *dentry) |
92 | { |
93 | namep->name = dentry->d_name.name; |
94 | namep->len = dentry->d_name.len; |
95 | namep->type = XFS_DIR3_FT_UNKNOWN; |
96 | } |
97 | |
98 | static int |
99 | xfs_dentry_mode_to_name( |
100 | struct xfs_name *namep, |
101 | struct dentry *dentry, |
102 | int mode) |
103 | { |
104 | namep->name = dentry->d_name.name; |
105 | namep->len = dentry->d_name.len; |
106 | namep->type = xfs_mode_to_ftype(mode); |
107 | |
108 | if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN)) |
109 | return -EFSCORRUPTED; |
110 | |
111 | return 0; |
112 | } |
113 | |
114 | STATIC void |
115 | xfs_cleanup_inode( |
116 | struct inode *dir, |
117 | struct inode *inode, |
118 | struct dentry *dentry) |
119 | { |
120 | struct xfs_name teardown; |
121 | |
122 | /* Oh, the horror. |
123 | * If we can't add the ACL or we fail in |
124 | * xfs_inode_init_security we must back out. |
125 | * ENOSPC can hit here, among other things. |
126 | */ |
127 | xfs_dentry_to_name(namep: &teardown, dentry); |
128 | |
129 | xfs_remove(dp: XFS_I(inode: dir), name: &teardown, ip: XFS_I(inode)); |
130 | } |
131 | |
132 | /* |
133 | * Check to see if we are likely to need an extended attribute to be added to |
134 | * the inode we are about to allocate. This allows the attribute fork to be |
135 | * created during the inode allocation, reducing the number of transactions we |
136 | * need to do in this fast path. |
137 | * |
138 | * The security checks are optimistic, but not guaranteed. The two LSMs that |
139 | * require xattrs to be added here (selinux and smack) are also the only two |
140 | * LSMs that add a sb->s_security structure to the superblock. Hence if security |
141 | * is enabled and sb->s_security is set, we have a pretty good idea that we are |
142 | * going to be asked to add a security xattr immediately after allocating the |
143 | * xfs inode and instantiating the VFS inode. |
144 | */ |
145 | static inline bool |
146 | xfs_create_need_xattr( |
147 | struct inode *dir, |
148 | struct posix_acl *default_acl, |
149 | struct posix_acl *acl) |
150 | { |
151 | if (acl) |
152 | return true; |
153 | if (default_acl) |
154 | return true; |
155 | #if IS_ENABLED(CONFIG_SECURITY) |
156 | if (dir->i_sb->s_security) |
157 | return true; |
158 | #endif |
159 | return false; |
160 | } |
161 | |
162 | |
163 | STATIC int |
164 | xfs_generic_create( |
165 | struct mnt_idmap *idmap, |
166 | struct inode *dir, |
167 | struct dentry *dentry, |
168 | umode_t mode, |
169 | dev_t rdev, |
170 | struct file *tmpfile) /* unnamed file */ |
171 | { |
172 | struct inode *inode; |
173 | struct xfs_inode *ip = NULL; |
174 | struct posix_acl *default_acl, *acl; |
175 | struct xfs_name name; |
176 | int error; |
177 | |
178 | /* |
179 | * Irix uses Missed'em'V split, but doesn't want to see |
180 | * the upper 5 bits of (14bit) major. |
181 | */ |
182 | if (S_ISCHR(mode) || S_ISBLK(mode)) { |
183 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) |
184 | return -EINVAL; |
185 | } else { |
186 | rdev = 0; |
187 | } |
188 | |
189 | error = posix_acl_create(dir, &mode, &default_acl, &acl); |
190 | if (error) |
191 | return error; |
192 | |
193 | /* Verify mode is valid also for tmpfile case */ |
194 | error = xfs_dentry_mode_to_name(namep: &name, dentry, mode); |
195 | if (unlikely(error)) |
196 | goto out_free_acl; |
197 | |
198 | if (!tmpfile) { |
199 | error = xfs_create(idmap, dp: XFS_I(inode: dir), name: &name, mode, rdev, |
200 | need_xattr: xfs_create_need_xattr(dir, default_acl, acl), |
201 | ipp: &ip); |
202 | } else { |
203 | error = xfs_create_tmpfile(idmap, dp: XFS_I(inode: dir), mode, ipp: &ip); |
204 | } |
205 | if (unlikely(error)) |
206 | goto out_free_acl; |
207 | |
208 | inode = VFS_I(ip); |
209 | |
210 | error = xfs_inode_init_security(inode, dir, qstr: &dentry->d_name); |
211 | if (unlikely(error)) |
212 | goto out_cleanup_inode; |
213 | |
214 | if (default_acl) { |
215 | error = __xfs_set_acl(inode, acl: default_acl, ACL_TYPE_DEFAULT); |
216 | if (error) |
217 | goto out_cleanup_inode; |
218 | } |
219 | if (acl) { |
220 | error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); |
221 | if (error) |
222 | goto out_cleanup_inode; |
223 | } |
224 | |
225 | xfs_setup_iops(ip); |
226 | |
227 | if (tmpfile) { |
228 | /* |
229 | * The VFS requires that any inode fed to d_tmpfile must have |
230 | * nlink == 1 so that it can decrement the nlink in d_tmpfile. |
231 | * However, we created the temp file with nlink == 0 because |
232 | * we're not allowed to put an inode with nlink > 0 on the |
233 | * unlinked list. Therefore we have to set nlink to 1 so that |
234 | * d_tmpfile can immediately set it back to zero. |
235 | */ |
236 | set_nlink(inode, nlink: 1); |
237 | d_tmpfile(tmpfile, inode); |
238 | } else |
239 | d_instantiate(dentry, inode); |
240 | |
241 | xfs_finish_inode_setup(ip); |
242 | |
243 | out_free_acl: |
244 | posix_acl_release(acl: default_acl); |
245 | posix_acl_release(acl); |
246 | return error; |
247 | |
248 | out_cleanup_inode: |
249 | xfs_finish_inode_setup(ip); |
250 | if (!tmpfile) |
251 | xfs_cleanup_inode(dir, inode, dentry); |
252 | xfs_irele(ip); |
253 | goto out_free_acl; |
254 | } |
255 | |
256 | STATIC int |
257 | xfs_vn_mknod( |
258 | struct mnt_idmap *idmap, |
259 | struct inode *dir, |
260 | struct dentry *dentry, |
261 | umode_t mode, |
262 | dev_t rdev) |
263 | { |
264 | return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL); |
265 | } |
266 | |
267 | STATIC int |
268 | xfs_vn_create( |
269 | struct mnt_idmap *idmap, |
270 | struct inode *dir, |
271 | struct dentry *dentry, |
272 | umode_t mode, |
273 | bool flags) |
274 | { |
275 | return xfs_generic_create(idmap, dir, dentry, mode, rdev: 0, NULL); |
276 | } |
277 | |
278 | STATIC int |
279 | xfs_vn_mkdir( |
280 | struct mnt_idmap *idmap, |
281 | struct inode *dir, |
282 | struct dentry *dentry, |
283 | umode_t mode) |
284 | { |
285 | return xfs_generic_create(idmap, dir, dentry, mode: mode | S_IFDIR, rdev: 0, NULL); |
286 | } |
287 | |
288 | STATIC struct dentry * |
289 | xfs_vn_lookup( |
290 | struct inode *dir, |
291 | struct dentry *dentry, |
292 | unsigned int flags) |
293 | { |
294 | struct inode *inode; |
295 | struct xfs_inode *cip; |
296 | struct xfs_name name; |
297 | int error; |
298 | |
299 | if (dentry->d_name.len >= MAXNAMELEN) |
300 | return ERR_PTR(error: -ENAMETOOLONG); |
301 | |
302 | xfs_dentry_to_name(namep: &name, dentry); |
303 | error = xfs_lookup(dp: XFS_I(inode: dir), name: &name, ipp: &cip, NULL); |
304 | if (likely(!error)) |
305 | inode = VFS_I(ip: cip); |
306 | else if (likely(error == -ENOENT)) |
307 | inode = NULL; |
308 | else |
309 | inode = ERR_PTR(error); |
310 | return d_splice_alias(inode, dentry); |
311 | } |
312 | |
313 | STATIC struct dentry * |
314 | xfs_vn_ci_lookup( |
315 | struct inode *dir, |
316 | struct dentry *dentry, |
317 | unsigned int flags) |
318 | { |
319 | struct xfs_inode *ip; |
320 | struct xfs_name xname; |
321 | struct xfs_name ci_name; |
322 | struct qstr dname; |
323 | int error; |
324 | |
325 | if (dentry->d_name.len >= MAXNAMELEN) |
326 | return ERR_PTR(error: -ENAMETOOLONG); |
327 | |
328 | xfs_dentry_to_name(namep: &xname, dentry); |
329 | error = xfs_lookup(dp: XFS_I(inode: dir), name: &xname, ipp: &ip, ci_name: &ci_name); |
330 | if (unlikely(error)) { |
331 | if (unlikely(error != -ENOENT)) |
332 | return ERR_PTR(error); |
333 | /* |
334 | * call d_add(dentry, NULL) here when d_drop_negative_children |
335 | * is called in xfs_vn_mknod (ie. allow negative dentries |
336 | * with CI filesystems). |
337 | */ |
338 | return NULL; |
339 | } |
340 | |
341 | /* if exact match, just splice and exit */ |
342 | if (!ci_name.name) |
343 | return d_splice_alias(VFS_I(ip), dentry); |
344 | |
345 | /* else case-insensitive match... */ |
346 | dname.name = ci_name.name; |
347 | dname.len = ci_name.len; |
348 | dentry = d_add_ci(dentry, VFS_I(ip), &dname); |
349 | kfree(objp: ci_name.name); |
350 | return dentry; |
351 | } |
352 | |
353 | STATIC int |
354 | xfs_vn_link( |
355 | struct dentry *old_dentry, |
356 | struct inode *dir, |
357 | struct dentry *dentry) |
358 | { |
359 | struct inode *inode = d_inode(dentry: old_dentry); |
360 | struct xfs_name name; |
361 | int error; |
362 | |
363 | error = xfs_dentry_mode_to_name(namep: &name, dentry, mode: inode->i_mode); |
364 | if (unlikely(error)) |
365 | return error; |
366 | |
367 | error = xfs_link(tdp: XFS_I(inode: dir), sip: XFS_I(inode), target_name: &name); |
368 | if (unlikely(error)) |
369 | return error; |
370 | |
371 | ihold(inode); |
372 | d_instantiate(dentry, inode); |
373 | return 0; |
374 | } |
375 | |
376 | STATIC int |
377 | xfs_vn_unlink( |
378 | struct inode *dir, |
379 | struct dentry *dentry) |
380 | { |
381 | struct xfs_name name; |
382 | int error; |
383 | |
384 | xfs_dentry_to_name(namep: &name, dentry); |
385 | |
386 | error = xfs_remove(dp: XFS_I(inode: dir), name: &name, ip: XFS_I(inode: d_inode(dentry))); |
387 | if (error) |
388 | return error; |
389 | |
390 | /* |
391 | * With unlink, the VFS makes the dentry "negative": no inode, |
392 | * but still hashed. This is incompatible with case-insensitive |
393 | * mode, so invalidate (unhash) the dentry in CI-mode. |
394 | */ |
395 | if (xfs_has_asciici(XFS_M(dir->i_sb))) |
396 | d_invalidate(dentry); |
397 | return 0; |
398 | } |
399 | |
400 | STATIC int |
401 | xfs_vn_symlink( |
402 | struct mnt_idmap *idmap, |
403 | struct inode *dir, |
404 | struct dentry *dentry, |
405 | const char *symname) |
406 | { |
407 | struct inode *inode; |
408 | struct xfs_inode *cip = NULL; |
409 | struct xfs_name name; |
410 | int error; |
411 | umode_t mode; |
412 | |
413 | mode = S_IFLNK | |
414 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); |
415 | error = xfs_dentry_mode_to_name(namep: &name, dentry, mode); |
416 | if (unlikely(error)) |
417 | goto out; |
418 | |
419 | error = xfs_symlink(idmap, dp: XFS_I(inode: dir), link_name: &name, target_path: symname, mode, ipp: &cip); |
420 | if (unlikely(error)) |
421 | goto out; |
422 | |
423 | inode = VFS_I(ip: cip); |
424 | |
425 | error = xfs_inode_init_security(inode, dir, qstr: &dentry->d_name); |
426 | if (unlikely(error)) |
427 | goto out_cleanup_inode; |
428 | |
429 | xfs_setup_iops(ip: cip); |
430 | |
431 | d_instantiate(dentry, inode); |
432 | xfs_finish_inode_setup(ip: cip); |
433 | return 0; |
434 | |
435 | out_cleanup_inode: |
436 | xfs_finish_inode_setup(ip: cip); |
437 | xfs_cleanup_inode(dir, inode, dentry); |
438 | xfs_irele(ip: cip); |
439 | out: |
440 | return error; |
441 | } |
442 | |
443 | STATIC int |
444 | xfs_vn_rename( |
445 | struct mnt_idmap *idmap, |
446 | struct inode *odir, |
447 | struct dentry *odentry, |
448 | struct inode *ndir, |
449 | struct dentry *ndentry, |
450 | unsigned int flags) |
451 | { |
452 | struct inode *new_inode = d_inode(dentry: ndentry); |
453 | int omode = 0; |
454 | int error; |
455 | struct xfs_name oname; |
456 | struct xfs_name nname; |
457 | |
458 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
459 | return -EINVAL; |
460 | |
461 | /* if we are exchanging files, we need to set i_mode of both files */ |
462 | if (flags & RENAME_EXCHANGE) |
463 | omode = d_inode(dentry: ndentry)->i_mode; |
464 | |
465 | error = xfs_dentry_mode_to_name(namep: &oname, dentry: odentry, mode: omode); |
466 | if (omode && unlikely(error)) |
467 | return error; |
468 | |
469 | error = xfs_dentry_mode_to_name(namep: &nname, dentry: ndentry, |
470 | mode: d_inode(dentry: odentry)->i_mode); |
471 | if (unlikely(error)) |
472 | return error; |
473 | |
474 | return xfs_rename(idmap, src_dp: XFS_I(inode: odir), src_name: &oname, |
475 | src_ip: XFS_I(inode: d_inode(dentry: odentry)), target_dp: XFS_I(inode: ndir), target_name: &nname, |
476 | target_ip: new_inode ? XFS_I(inode: new_inode) : NULL, flags); |
477 | } |
478 | |
479 | /* |
480 | * careful here - this function can get called recursively, so |
481 | * we need to be very careful about how much stack we use. |
482 | * uio is kmalloced for this reason... |
483 | */ |
484 | STATIC const char * |
485 | xfs_vn_get_link( |
486 | struct dentry *dentry, |
487 | struct inode *inode, |
488 | struct delayed_call *done) |
489 | { |
490 | char *link; |
491 | int error = -ENOMEM; |
492 | |
493 | if (!dentry) |
494 | return ERR_PTR(error: -ECHILD); |
495 | |
496 | link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL); |
497 | if (!link) |
498 | goto out_err; |
499 | |
500 | error = xfs_readlink(ip: XFS_I(inode: d_inode(dentry)), link); |
501 | if (unlikely(error)) |
502 | goto out_kfree; |
503 | |
504 | set_delayed_call(call: done, fn: kfree_link, arg: link); |
505 | return link; |
506 | |
507 | out_kfree: |
508 | kfree(objp: link); |
509 | out_err: |
510 | return ERR_PTR(error); |
511 | } |
512 | |
513 | static uint32_t |
514 | xfs_stat_blksize( |
515 | struct xfs_inode *ip) |
516 | { |
517 | struct xfs_mount *mp = ip->i_mount; |
518 | |
519 | /* |
520 | * If the file blocks are being allocated from a realtime volume, then |
521 | * always return the realtime extent size. |
522 | */ |
523 | if (XFS_IS_REALTIME_INODE(ip)) |
524 | return XFS_FSB_TO_B(mp, xfs_get_extsz_hint(ip)); |
525 | |
526 | /* |
527 | * Allow large block sizes to be reported to userspace programs if the |
528 | * "largeio" mount option is used. |
529 | * |
530 | * If compatibility mode is specified, simply return the basic unit of |
531 | * caching so that we don't get inefficient read/modify/write I/O from |
532 | * user apps. Otherwise.... |
533 | * |
534 | * If the underlying volume is a stripe, then return the stripe width in |
535 | * bytes as the recommended I/O size. It is not a stripe and we've set a |
536 | * default buffered I/O size, return that, otherwise return the compat |
537 | * default. |
538 | */ |
539 | if (xfs_has_large_iosize(mp)) { |
540 | if (mp->m_swidth) |
541 | return XFS_FSB_TO_B(mp, mp->m_swidth); |
542 | if (xfs_has_allocsize(mp)) |
543 | return 1U << mp->m_allocsize_log; |
544 | } |
545 | |
546 | return PAGE_SIZE; |
547 | } |
548 | |
549 | STATIC int |
550 | xfs_vn_getattr( |
551 | struct mnt_idmap *idmap, |
552 | const struct path *path, |
553 | struct kstat *stat, |
554 | u32 request_mask, |
555 | unsigned int query_flags) |
556 | { |
557 | struct inode *inode = d_inode(dentry: path->dentry); |
558 | struct xfs_inode *ip = XFS_I(inode); |
559 | struct xfs_mount *mp = ip->i_mount; |
560 | vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); |
561 | vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); |
562 | |
563 | trace_xfs_getattr(ip); |
564 | |
565 | if (xfs_is_shutdown(mp)) |
566 | return -EIO; |
567 | |
568 | stat->size = XFS_ISIZE(ip); |
569 | stat->dev = inode->i_sb->s_dev; |
570 | stat->mode = inode->i_mode; |
571 | stat->nlink = inode->i_nlink; |
572 | stat->uid = vfsuid_into_kuid(vfsuid); |
573 | stat->gid = vfsgid_into_kgid(vfsgid); |
574 | stat->ino = ip->i_ino; |
575 | stat->atime = inode_get_atime(inode); |
576 | stat->mtime = inode_get_mtime(inode); |
577 | stat->ctime = inode_get_ctime(inode); |
578 | stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); |
579 | |
580 | if (xfs_has_v3inodes(mp)) { |
581 | if (request_mask & STATX_BTIME) { |
582 | stat->result_mask |= STATX_BTIME; |
583 | stat->btime = ip->i_crtime; |
584 | } |
585 | } |
586 | |
587 | if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { |
588 | stat->change_cookie = inode_query_iversion(inode); |
589 | stat->result_mask |= STATX_CHANGE_COOKIE; |
590 | } |
591 | |
592 | /* |
593 | * Note: If you add another clause to set an attribute flag, please |
594 | * update attributes_mask below. |
595 | */ |
596 | if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) |
597 | stat->attributes |= STATX_ATTR_IMMUTABLE; |
598 | if (ip->i_diflags & XFS_DIFLAG_APPEND) |
599 | stat->attributes |= STATX_ATTR_APPEND; |
600 | if (ip->i_diflags & XFS_DIFLAG_NODUMP) |
601 | stat->attributes |= STATX_ATTR_NODUMP; |
602 | |
603 | stat->attributes_mask |= (STATX_ATTR_IMMUTABLE | |
604 | STATX_ATTR_APPEND | |
605 | STATX_ATTR_NODUMP); |
606 | |
607 | switch (inode->i_mode & S_IFMT) { |
608 | case S_IFBLK: |
609 | case S_IFCHR: |
610 | stat->blksize = BLKDEV_IOSIZE; |
611 | stat->rdev = inode->i_rdev; |
612 | break; |
613 | case S_IFREG: |
614 | if (request_mask & STATX_DIOALIGN) { |
615 | struct xfs_buftarg *target = xfs_inode_buftarg(ip); |
616 | struct block_device *bdev = target->bt_bdev; |
617 | |
618 | stat->result_mask |= STATX_DIOALIGN; |
619 | stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; |
620 | stat->dio_offset_align = bdev_logical_block_size(bdev); |
621 | } |
622 | fallthrough; |
623 | default: |
624 | stat->blksize = xfs_stat_blksize(ip); |
625 | stat->rdev = 0; |
626 | break; |
627 | } |
628 | |
629 | return 0; |
630 | } |
631 | |
632 | static int |
633 | xfs_vn_change_ok( |
634 | struct mnt_idmap *idmap, |
635 | struct dentry *dentry, |
636 | struct iattr *iattr) |
637 | { |
638 | struct xfs_mount *mp = XFS_I(inode: d_inode(dentry))->i_mount; |
639 | |
640 | if (xfs_is_readonly(mp)) |
641 | return -EROFS; |
642 | |
643 | if (xfs_is_shutdown(mp)) |
644 | return -EIO; |
645 | |
646 | return setattr_prepare(idmap, dentry, iattr); |
647 | } |
648 | |
649 | /* |
650 | * Set non-size attributes of an inode. |
651 | * |
652 | * Caution: The caller of this function is responsible for calling |
653 | * setattr_prepare() or otherwise verifying the change is fine. |
654 | */ |
655 | static int |
656 | xfs_setattr_nonsize( |
657 | struct mnt_idmap *idmap, |
658 | struct dentry *dentry, |
659 | struct xfs_inode *ip, |
660 | struct iattr *iattr) |
661 | { |
662 | xfs_mount_t *mp = ip->i_mount; |
663 | struct inode *inode = VFS_I(ip); |
664 | int mask = iattr->ia_valid; |
665 | xfs_trans_t *tp; |
666 | int error; |
667 | kuid_t uid = GLOBAL_ROOT_UID; |
668 | kgid_t gid = GLOBAL_ROOT_GID; |
669 | struct xfs_dquot *udqp = NULL, *gdqp = NULL; |
670 | struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL; |
671 | |
672 | ASSERT((mask & ATTR_SIZE) == 0); |
673 | |
674 | /* |
675 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
676 | * before we start any other transactions. Trying to do this later |
677 | * is messy. We don't care to take a readlock to look at the ids |
678 | * in inode here, because we can't hold it across the trans_reserve. |
679 | * If the IDs do change before we take the ilock, we're covered |
680 | * because the i_*dquot fields will get updated anyway. |
681 | */ |
682 | if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { |
683 | uint qflags = 0; |
684 | |
685 | if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { |
686 | uid = from_vfsuid(idmap, fs_userns: i_user_ns(inode), |
687 | vfsuid: iattr->ia_vfsuid); |
688 | qflags |= XFS_QMOPT_UQUOTA; |
689 | } else { |
690 | uid = inode->i_uid; |
691 | } |
692 | if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { |
693 | gid = from_vfsgid(idmap, fs_userns: i_user_ns(inode), |
694 | vfsgid: iattr->ia_vfsgid); |
695 | qflags |= XFS_QMOPT_GQUOTA; |
696 | } else { |
697 | gid = inode->i_gid; |
698 | } |
699 | |
700 | /* |
701 | * We take a reference when we initialize udqp and gdqp, |
702 | * so it is important that we never blindly double trip on |
703 | * the same variable. See xfs_create() for an example. |
704 | */ |
705 | ASSERT(udqp == NULL); |
706 | ASSERT(gdqp == NULL); |
707 | error = xfs_qm_vop_dqalloc(ip, uid, gid, prid_t: ip->i_projid, |
708 | qflags, &udqp, &gdqp, NULL); |
709 | if (error) |
710 | return error; |
711 | } |
712 | |
713 | error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL, |
714 | force: has_capability_noaudit(current, CAP_FOWNER), tpp: &tp); |
715 | if (error) |
716 | goto out_dqrele; |
717 | |
718 | /* |
719 | * Register quota modifications in the transaction. Must be the owner |
720 | * or privileged. These IDs could have changed since we last looked at |
721 | * them. But, we're assured that if the ownership did change while we |
722 | * didn't have the inode locked, inode's dquot(s) would have changed |
723 | * also. |
724 | */ |
725 | if (XFS_IS_UQUOTA_ON(mp) && |
726 | i_uid_needs_update(idmap, attr: iattr, inode)) { |
727 | ASSERT(udqp); |
728 | old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); |
729 | } |
730 | if (XFS_IS_GQUOTA_ON(mp) && |
731 | i_gid_needs_update(idmap, attr: iattr, inode)) { |
732 | ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); |
733 | ASSERT(gdqp); |
734 | old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); |
735 | } |
736 | |
737 | setattr_copy(idmap, inode, attr: iattr); |
738 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
739 | |
740 | XFS_STATS_INC(mp, xs_ig_attrchg); |
741 | |
742 | if (xfs_has_wsync(mp)) |
743 | xfs_trans_set_sync(tp); |
744 | error = xfs_trans_commit(tp); |
745 | |
746 | /* |
747 | * Release any dquot(s) the inode had kept before chown. |
748 | */ |
749 | xfs_qm_dqrele(old_udqp); |
750 | xfs_qm_dqrele(old_gdqp); |
751 | xfs_qm_dqrele(udqp); |
752 | xfs_qm_dqrele(gdqp); |
753 | |
754 | if (error) |
755 | return error; |
756 | |
757 | /* |
758 | * XXX(hch): Updating the ACL entries is not atomic vs the i_mode |
759 | * update. We could avoid this with linked transactions |
760 | * and passing down the transaction pointer all the way |
761 | * to attr_set. No previous user of the generic |
762 | * Posix ACL code seems to care about this issue either. |
763 | */ |
764 | if (mask & ATTR_MODE) { |
765 | error = posix_acl_chmod(idmap, dentry, inode->i_mode); |
766 | if (error) |
767 | return error; |
768 | } |
769 | |
770 | return 0; |
771 | |
772 | out_dqrele: |
773 | xfs_qm_dqrele(udqp); |
774 | xfs_qm_dqrele(gdqp); |
775 | return error; |
776 | } |
777 | |
778 | /* |
779 | * Truncate file. Must have write permission and not be a directory. |
780 | * |
781 | * Caution: The caller of this function is responsible for calling |
782 | * setattr_prepare() or otherwise verifying the change is fine. |
783 | */ |
784 | STATIC int |
785 | xfs_setattr_size( |
786 | struct mnt_idmap *idmap, |
787 | struct dentry *dentry, |
788 | struct xfs_inode *ip, |
789 | struct iattr *iattr) |
790 | { |
791 | struct xfs_mount *mp = ip->i_mount; |
792 | struct inode *inode = VFS_I(ip); |
793 | xfs_off_t oldsize, newsize; |
794 | struct xfs_trans *tp; |
795 | int error; |
796 | uint lock_flags = 0; |
797 | bool did_zeroing = false; |
798 | |
799 | xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); |
800 | ASSERT(S_ISREG(inode->i_mode)); |
801 | ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| |
802 | ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0); |
803 | |
804 | oldsize = inode->i_size; |
805 | newsize = iattr->ia_size; |
806 | |
807 | /* |
808 | * Short circuit the truncate case for zero length files. |
809 | */ |
810 | if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) { |
811 | if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) |
812 | return 0; |
813 | |
814 | /* |
815 | * Use the regular setattr path to update the timestamps. |
816 | */ |
817 | iattr->ia_valid &= ~ATTR_SIZE; |
818 | return xfs_setattr_nonsize(idmap, dentry, ip, iattr); |
819 | } |
820 | |
821 | /* |
822 | * Make sure that the dquots are attached to the inode. |
823 | */ |
824 | error = xfs_qm_dqattach(ip); |
825 | if (error) |
826 | return error; |
827 | |
828 | /* |
829 | * Wait for all direct I/O to complete. |
830 | */ |
831 | inode_dio_wait(inode); |
832 | |
833 | /* |
834 | * File data changes must be complete before we start the transaction to |
835 | * modify the inode. This needs to be done before joining the inode to |
836 | * the transaction because the inode cannot be unlocked once it is a |
837 | * part of the transaction. |
838 | * |
839 | * Start with zeroing any data beyond EOF that we may expose on file |
840 | * extension, or zeroing out the rest of the block on a downward |
841 | * truncate. |
842 | */ |
843 | if (newsize > oldsize) { |
844 | trace_xfs_zero_eof(ip, offset: oldsize, count: newsize - oldsize); |
845 | error = xfs_zero_range(ip, pos: oldsize, len: newsize - oldsize, |
846 | did_zero: &did_zeroing); |
847 | } else { |
848 | /* |
849 | * iomap won't detect a dirty page over an unwritten block (or a |
850 | * cow block over a hole) and subsequently skips zeroing the |
851 | * newly post-EOF portion of the page. Flush the new EOF to |
852 | * convert the block before the pagecache truncate. |
853 | */ |
854 | error = filemap_write_and_wait_range(mapping: inode->i_mapping, lstart: newsize, |
855 | lend: newsize); |
856 | if (error) |
857 | return error; |
858 | error = xfs_truncate_page(ip, pos: newsize, did_zero: &did_zeroing); |
859 | } |
860 | |
861 | if (error) |
862 | return error; |
863 | |
864 | /* |
865 | * We've already locked out new page faults, so now we can safely remove |
866 | * pages from the page cache knowing they won't get refaulted until we |
867 | * drop the XFS_MMAP_EXCL lock after the extent manipulations are |
868 | * complete. The truncate_setsize() call also cleans partial EOF page |
869 | * PTEs on extending truncates and hence ensures sub-page block size |
870 | * filesystems are correctly handled, too. |
871 | * |
872 | * We have to do all the page cache truncate work outside the |
873 | * transaction context as the "lock" order is page lock->log space |
874 | * reservation as defined by extent allocation in the writeback path. |
875 | * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but |
876 | * having already truncated the in-memory version of the file (i.e. made |
877 | * user visible changes). There's not much we can do about this, except |
878 | * to hope that the caller sees ENOMEM and retries the truncate |
879 | * operation. |
880 | * |
881 | * And we update in-core i_size and truncate page cache beyond newsize |
882 | * before writeback the [i_disk_size, newsize] range, so we're |
883 | * guaranteed not to write stale data past the new EOF on truncate down. |
884 | */ |
885 | truncate_setsize(inode, newsize); |
886 | |
887 | /* |
888 | * We are going to log the inode size change in this transaction so |
889 | * any previous writes that are beyond the on disk EOF and the new |
890 | * EOF that have not been written out need to be written here. If we |
891 | * do not write the data out, we expose ourselves to the null files |
892 | * problem. Note that this includes any block zeroing we did above; |
893 | * otherwise those blocks may not be zeroed after a crash. |
894 | */ |
895 | if (did_zeroing || |
896 | (newsize > ip->i_disk_size && oldsize != ip->i_disk_size)) { |
897 | error = filemap_write_and_wait_range(mapping: VFS_I(ip)->i_mapping, |
898 | lstart: ip->i_disk_size, lend: newsize - 1); |
899 | if (error) |
900 | return error; |
901 | } |
902 | |
903 | error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_itruncate, blocks: 0, rtextents: 0, flags: 0, tpp: &tp); |
904 | if (error) |
905 | return error; |
906 | |
907 | lock_flags |= XFS_ILOCK_EXCL; |
908 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
909 | xfs_trans_ijoin(tp, ip, 0); |
910 | |
911 | /* |
912 | * Only change the c/mtime if we are changing the size or we are |
913 | * explicitly asked to change it. This handles the semantic difference |
914 | * between truncate() and ftruncate() as implemented in the VFS. |
915 | * |
916 | * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a |
917 | * special case where we need to update the times despite not having |
918 | * these flags set. For all other operations the VFS set these flags |
919 | * explicitly if it wants a timestamp update. |
920 | */ |
921 | if (newsize != oldsize && |
922 | !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { |
923 | iattr->ia_ctime = iattr->ia_mtime = |
924 | current_time(inode); |
925 | iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; |
926 | } |
927 | |
928 | /* |
929 | * The first thing we do is set the size to new_size permanently on |
930 | * disk. This way we don't have to worry about anyone ever being able |
931 | * to look at the data being freed even in the face of a crash. |
932 | * What we're getting around here is the case where we free a block, it |
933 | * is allocated to another file, it is written to, and then we crash. |
934 | * If the new data gets written to the file but the log buffers |
935 | * containing the free and reallocation don't, then we'd end up with |
936 | * garbage in the blocks being freed. As long as we make the new size |
937 | * permanent before actually freeing any blocks it doesn't matter if |
938 | * they get written to. |
939 | */ |
940 | ip->i_disk_size = newsize; |
941 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
942 | |
943 | if (newsize <= oldsize) { |
944 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); |
945 | if (error) |
946 | goto out_trans_cancel; |
947 | |
948 | /* |
949 | * Truncated "down", so we're removing references to old data |
950 | * here - if we delay flushing for a long time, we expose |
951 | * ourselves unduly to the notorious NULL files problem. So, |
952 | * we mark this inode and flush it when the file is closed, |
953 | * and do not wait the usual (long) time for writeout. |
954 | */ |
955 | xfs_iflags_set(ip, XFS_ITRUNCATED); |
956 | |
957 | /* A truncate down always removes post-EOF blocks. */ |
958 | xfs_inode_clear_eofblocks_tag(ip); |
959 | } |
960 | |
961 | ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); |
962 | setattr_copy(idmap, inode, attr: iattr); |
963 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
964 | |
965 | XFS_STATS_INC(mp, xs_ig_attrchg); |
966 | |
967 | if (xfs_has_wsync(mp)) |
968 | xfs_trans_set_sync(tp); |
969 | |
970 | error = xfs_trans_commit(tp); |
971 | out_unlock: |
972 | if (lock_flags) |
973 | xfs_iunlock(ip, lock_flags); |
974 | return error; |
975 | |
976 | out_trans_cancel: |
977 | xfs_trans_cancel(tp); |
978 | goto out_unlock; |
979 | } |
980 | |
981 | int |
982 | xfs_vn_setattr_size( |
983 | struct mnt_idmap *idmap, |
984 | struct dentry *dentry, |
985 | struct iattr *iattr) |
986 | { |
987 | struct xfs_inode *ip = XFS_I(inode: d_inode(dentry)); |
988 | int error; |
989 | |
990 | trace_xfs_setattr(ip); |
991 | |
992 | error = xfs_vn_change_ok(idmap, dentry, iattr); |
993 | if (error) |
994 | return error; |
995 | return xfs_setattr_size(idmap, dentry, ip, iattr); |
996 | } |
997 | |
998 | STATIC int |
999 | xfs_vn_setattr( |
1000 | struct mnt_idmap *idmap, |
1001 | struct dentry *dentry, |
1002 | struct iattr *iattr) |
1003 | { |
1004 | struct inode *inode = d_inode(dentry); |
1005 | struct xfs_inode *ip = XFS_I(inode); |
1006 | int error; |
1007 | |
1008 | if (iattr->ia_valid & ATTR_SIZE) { |
1009 | uint iolock; |
1010 | |
1011 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); |
1012 | iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; |
1013 | |
1014 | error = xfs_break_layouts(inode, iolock: &iolock, reason: BREAK_UNMAP); |
1015 | if (error) { |
1016 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
1017 | return error; |
1018 | } |
1019 | |
1020 | error = xfs_vn_setattr_size(idmap, dentry, iattr); |
1021 | xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); |
1022 | } else { |
1023 | trace_xfs_setattr(ip); |
1024 | |
1025 | error = xfs_vn_change_ok(idmap, dentry, iattr); |
1026 | if (!error) |
1027 | error = xfs_setattr_nonsize(idmap, dentry, ip, iattr); |
1028 | } |
1029 | |
1030 | return error; |
1031 | } |
1032 | |
1033 | STATIC int |
1034 | xfs_vn_update_time( |
1035 | struct inode *inode, |
1036 | int flags) |
1037 | { |
1038 | struct xfs_inode *ip = XFS_I(inode); |
1039 | struct xfs_mount *mp = ip->i_mount; |
1040 | int log_flags = XFS_ILOG_TIMESTAMP; |
1041 | struct xfs_trans *tp; |
1042 | int error; |
1043 | struct timespec64 now; |
1044 | |
1045 | trace_xfs_update_time(ip); |
1046 | |
1047 | if (inode->i_sb->s_flags & SB_LAZYTIME) { |
1048 | if (!((flags & S_VERSION) && |
1049 | inode_maybe_inc_iversion(inode, force: false))) { |
1050 | generic_update_time(inode, flags); |
1051 | return 0; |
1052 | } |
1053 | |
1054 | /* Capture the iversion update that just occurred */ |
1055 | log_flags |= XFS_ILOG_CORE; |
1056 | } |
1057 | |
1058 | error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_fsyncts, blocks: 0, rtextents: 0, flags: 0, tpp: &tp); |
1059 | if (error) |
1060 | return error; |
1061 | |
1062 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1063 | if (flags & (S_CTIME|S_MTIME)) |
1064 | now = inode_set_ctime_current(inode); |
1065 | else |
1066 | now = current_time(inode); |
1067 | |
1068 | if (flags & S_MTIME) |
1069 | inode_set_mtime_to_ts(inode, ts: now); |
1070 | if (flags & S_ATIME) |
1071 | inode_set_atime_to_ts(inode, ts: now); |
1072 | |
1073 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1074 | xfs_trans_log_inode(tp, ip, log_flags); |
1075 | return xfs_trans_commit(tp); |
1076 | } |
1077 | |
1078 | STATIC int |
1079 | xfs_vn_fiemap( |
1080 | struct inode *inode, |
1081 | struct fiemap_extent_info *fieinfo, |
1082 | u64 start, |
1083 | u64 length) |
1084 | { |
1085 | int error; |
1086 | |
1087 | xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); |
1088 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { |
1089 | fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; |
1090 | error = iomap_fiemap(inode, fieinfo, start, len: length, |
1091 | ops: &xfs_xattr_iomap_ops); |
1092 | } else { |
1093 | error = iomap_fiemap(inode, fieinfo, start, len: length, |
1094 | ops: &xfs_read_iomap_ops); |
1095 | } |
1096 | xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); |
1097 | |
1098 | return error; |
1099 | } |
1100 | |
1101 | STATIC int |
1102 | xfs_vn_tmpfile( |
1103 | struct mnt_idmap *idmap, |
1104 | struct inode *dir, |
1105 | struct file *file, |
1106 | umode_t mode) |
1107 | { |
1108 | int err = xfs_generic_create(idmap, dir, dentry: file->f_path.dentry, mode, rdev: 0, tmpfile: file); |
1109 | |
1110 | return finish_open_simple(file, error: err); |
1111 | } |
1112 | |
1113 | static const struct inode_operations xfs_inode_operations = { |
1114 | .get_inode_acl = xfs_get_acl, |
1115 | .set_acl = xfs_set_acl, |
1116 | .getattr = xfs_vn_getattr, |
1117 | .setattr = xfs_vn_setattr, |
1118 | .listxattr = xfs_vn_listxattr, |
1119 | .fiemap = xfs_vn_fiemap, |
1120 | .update_time = xfs_vn_update_time, |
1121 | .fileattr_get = xfs_fileattr_get, |
1122 | .fileattr_set = xfs_fileattr_set, |
1123 | }; |
1124 | |
1125 | static const struct inode_operations xfs_dir_inode_operations = { |
1126 | .create = xfs_vn_create, |
1127 | .lookup = xfs_vn_lookup, |
1128 | .link = xfs_vn_link, |
1129 | .unlink = xfs_vn_unlink, |
1130 | .symlink = xfs_vn_symlink, |
1131 | .mkdir = xfs_vn_mkdir, |
1132 | /* |
1133 | * Yes, XFS uses the same method for rmdir and unlink. |
1134 | * |
1135 | * There are some subtile differences deeper in the code, |
1136 | * but we use S_ISDIR to check for those. |
1137 | */ |
1138 | .rmdir = xfs_vn_unlink, |
1139 | .mknod = xfs_vn_mknod, |
1140 | .rename = xfs_vn_rename, |
1141 | .get_inode_acl = xfs_get_acl, |
1142 | .set_acl = xfs_set_acl, |
1143 | .getattr = xfs_vn_getattr, |
1144 | .setattr = xfs_vn_setattr, |
1145 | .listxattr = xfs_vn_listxattr, |
1146 | .update_time = xfs_vn_update_time, |
1147 | .tmpfile = xfs_vn_tmpfile, |
1148 | .fileattr_get = xfs_fileattr_get, |
1149 | .fileattr_set = xfs_fileattr_set, |
1150 | }; |
1151 | |
1152 | static const struct inode_operations xfs_dir_ci_inode_operations = { |
1153 | .create = xfs_vn_create, |
1154 | .lookup = xfs_vn_ci_lookup, |
1155 | .link = xfs_vn_link, |
1156 | .unlink = xfs_vn_unlink, |
1157 | .symlink = xfs_vn_symlink, |
1158 | .mkdir = xfs_vn_mkdir, |
1159 | /* |
1160 | * Yes, XFS uses the same method for rmdir and unlink. |
1161 | * |
1162 | * There are some subtile differences deeper in the code, |
1163 | * but we use S_ISDIR to check for those. |
1164 | */ |
1165 | .rmdir = xfs_vn_unlink, |
1166 | .mknod = xfs_vn_mknod, |
1167 | .rename = xfs_vn_rename, |
1168 | .get_inode_acl = xfs_get_acl, |
1169 | .set_acl = xfs_set_acl, |
1170 | .getattr = xfs_vn_getattr, |
1171 | .setattr = xfs_vn_setattr, |
1172 | .listxattr = xfs_vn_listxattr, |
1173 | .update_time = xfs_vn_update_time, |
1174 | .tmpfile = xfs_vn_tmpfile, |
1175 | .fileattr_get = xfs_fileattr_get, |
1176 | .fileattr_set = xfs_fileattr_set, |
1177 | }; |
1178 | |
1179 | static const struct inode_operations xfs_symlink_inode_operations = { |
1180 | .get_link = xfs_vn_get_link, |
1181 | .getattr = xfs_vn_getattr, |
1182 | .setattr = xfs_vn_setattr, |
1183 | .listxattr = xfs_vn_listxattr, |
1184 | .update_time = xfs_vn_update_time, |
1185 | }; |
1186 | |
1187 | /* Figure out if this file actually supports DAX. */ |
1188 | static bool |
1189 | xfs_inode_supports_dax( |
1190 | struct xfs_inode *ip) |
1191 | { |
1192 | struct xfs_mount *mp = ip->i_mount; |
1193 | |
1194 | /* Only supported on regular files. */ |
1195 | if (!S_ISREG(VFS_I(ip)->i_mode)) |
1196 | return false; |
1197 | |
1198 | /* Block size must match page size */ |
1199 | if (mp->m_sb.sb_blocksize != PAGE_SIZE) |
1200 | return false; |
1201 | |
1202 | /* Device has to support DAX too. */ |
1203 | return xfs_inode_buftarg(ip)->bt_daxdev != NULL; |
1204 | } |
1205 | |
1206 | static bool |
1207 | xfs_inode_should_enable_dax( |
1208 | struct xfs_inode *ip) |
1209 | { |
1210 | if (!IS_ENABLED(CONFIG_FS_DAX)) |
1211 | return false; |
1212 | if (xfs_has_dax_never(mp: ip->i_mount)) |
1213 | return false; |
1214 | if (!xfs_inode_supports_dax(ip)) |
1215 | return false; |
1216 | if (xfs_has_dax_always(mp: ip->i_mount)) |
1217 | return true; |
1218 | if (ip->i_diflags2 & XFS_DIFLAG2_DAX) |
1219 | return true; |
1220 | return false; |
1221 | } |
1222 | |
1223 | void |
1224 | xfs_diflags_to_iflags( |
1225 | struct xfs_inode *ip, |
1226 | bool init) |
1227 | { |
1228 | struct inode *inode = VFS_I(ip); |
1229 | unsigned int xflags = xfs_ip2xflags(ip); |
1230 | unsigned int flags = 0; |
1231 | |
1232 | ASSERT(!(IS_DAX(inode) && init)); |
1233 | |
1234 | if (xflags & FS_XFLAG_IMMUTABLE) |
1235 | flags |= S_IMMUTABLE; |
1236 | if (xflags & FS_XFLAG_APPEND) |
1237 | flags |= S_APPEND; |
1238 | if (xflags & FS_XFLAG_SYNC) |
1239 | flags |= S_SYNC; |
1240 | if (xflags & FS_XFLAG_NOATIME) |
1241 | flags |= S_NOATIME; |
1242 | if (init && xfs_inode_should_enable_dax(ip)) |
1243 | flags |= S_DAX; |
1244 | |
1245 | /* |
1246 | * S_DAX can only be set during inode initialization and is never set by |
1247 | * the VFS, so we cannot mask off S_DAX in i_flags. |
1248 | */ |
1249 | inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME); |
1250 | inode->i_flags |= flags; |
1251 | } |
1252 | |
1253 | /* |
1254 | * Initialize the Linux inode. |
1255 | * |
1256 | * When reading existing inodes from disk this is called directly from xfs_iget, |
1257 | * when creating a new inode it is called from xfs_init_new_inode after setting |
1258 | * up the inode. These callers have different criteria for clearing XFS_INEW, so |
1259 | * leave it up to the caller to deal with unlocking the inode appropriately. |
1260 | */ |
1261 | void |
1262 | xfs_setup_inode( |
1263 | struct xfs_inode *ip) |
1264 | { |
1265 | struct inode *inode = &ip->i_vnode; |
1266 | gfp_t gfp_mask; |
1267 | |
1268 | inode->i_ino = ip->i_ino; |
1269 | inode->i_state |= I_NEW; |
1270 | |
1271 | inode_sb_list_add(inode); |
1272 | /* make the inode look hashed for the writeback code */ |
1273 | inode_fake_hash(inode); |
1274 | |
1275 | i_size_write(inode, i_size: ip->i_disk_size); |
1276 | xfs_diflags_to_iflags(ip, init: true); |
1277 | |
1278 | if (S_ISDIR(inode->i_mode)) { |
1279 | /* |
1280 | * We set the i_rwsem class here to avoid potential races with |
1281 | * lockdep_annotate_inode_mutex_key() reinitialising the lock |
1282 | * after a filehandle lookup has already found the inode in |
1283 | * cache before it has been unlocked via unlock_new_inode(). |
1284 | */ |
1285 | lockdep_set_class(&inode->i_rwsem, |
1286 | &inode->i_sb->s_type->i_mutex_dir_key); |
1287 | lockdep_set_class(&ip->i_lock, &xfs_dir_ilock_class); |
1288 | } else { |
1289 | lockdep_set_class(&ip->i_lock, &xfs_nondir_ilock_class); |
1290 | } |
1291 | |
1292 | /* |
1293 | * Ensure all page cache allocations are done from GFP_NOFS context to |
1294 | * prevent direct reclaim recursion back into the filesystem and blowing |
1295 | * stacks or deadlocking. |
1296 | */ |
1297 | gfp_mask = mapping_gfp_mask(mapping: inode->i_mapping); |
1298 | mapping_set_gfp_mask(m: inode->i_mapping, mask: (gfp_mask & ~(__GFP_FS))); |
1299 | |
1300 | /* |
1301 | * For real-time inodes update the stable write flags to that of the RT |
1302 | * device instead of the data device. |
1303 | */ |
1304 | if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) |
1305 | xfs_update_stable_writes(ip); |
1306 | |
1307 | /* |
1308 | * If there is no attribute fork no ACL can exist on this inode, |
1309 | * and it can't have any file capabilities attached to it either. |
1310 | */ |
1311 | if (!xfs_inode_has_attr_fork(ip)) { |
1312 | inode_has_no_xattr(inode); |
1313 | cache_no_acl(inode); |
1314 | } |
1315 | } |
1316 | |
1317 | void |
1318 | xfs_setup_iops( |
1319 | struct xfs_inode *ip) |
1320 | { |
1321 | struct inode *inode = &ip->i_vnode; |
1322 | |
1323 | switch (inode->i_mode & S_IFMT) { |
1324 | case S_IFREG: |
1325 | inode->i_op = &xfs_inode_operations; |
1326 | inode->i_fop = &xfs_file_operations; |
1327 | if (IS_DAX(inode)) |
1328 | inode->i_mapping->a_ops = &xfs_dax_aops; |
1329 | else |
1330 | inode->i_mapping->a_ops = &xfs_address_space_operations; |
1331 | break; |
1332 | case S_IFDIR: |
1333 | if (xfs_has_asciici(XFS_M(inode->i_sb))) |
1334 | inode->i_op = &xfs_dir_ci_inode_operations; |
1335 | else |
1336 | inode->i_op = &xfs_dir_inode_operations; |
1337 | inode->i_fop = &xfs_dir_file_operations; |
1338 | break; |
1339 | case S_IFLNK: |
1340 | inode->i_op = &xfs_symlink_inode_operations; |
1341 | break; |
1342 | default: |
1343 | inode->i_op = &xfs_inode_operations; |
1344 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
1345 | break; |
1346 | } |
1347 | } |
1348 | |