1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_ag.h" |
14 | #include "xfs_inode.h" |
15 | #include "xfs_errortag.h" |
16 | #include "xfs_error.h" |
17 | #include "xfs_icache.h" |
18 | #include "xfs_trans.h" |
19 | #include "xfs_ialloc.h" |
20 | #include "xfs_dir2.h" |
21 | #include "xfs_health.h" |
22 | |
23 | #include <linux/iversion.h> |
24 | |
25 | /* |
26 | * If we are doing readahead on an inode buffer, we might be in log recovery |
27 | * reading an inode allocation buffer that hasn't yet been replayed, and hence |
28 | * has not had the inode cores stamped into it. Hence for readahead, the buffer |
29 | * may be potentially invalid. |
30 | * |
31 | * If the readahead buffer is invalid, we need to mark it with an error and |
32 | * clear the DONE status of the buffer so that a followup read will re-read it |
33 | * from disk. We don't report the error otherwise to avoid warnings during log |
34 | * recovery and we don't get unnecessary panics on debug kernels. We use EIO here |
35 | * because all we want to do is say readahead failed; there is no-one to report |
36 | * the error to, so this will distinguish it from a non-ra verifier failure. |
37 | * Changes to this readahead error behaviour also need to be reflected in |
38 | * xfs_dquot_buf_readahead_verify(). |
39 | */ |
40 | static void |
41 | xfs_inode_buf_verify( |
42 | struct xfs_buf *bp, |
43 | bool readahead) |
44 | { |
45 | struct xfs_mount *mp = bp->b_mount; |
46 | int i; |
47 | int ni; |
48 | |
49 | /* |
50 | * Validate the magic number and version of every inode in the buffer |
51 | */ |
52 | ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; |
53 | for (i = 0; i < ni; i++) { |
54 | struct xfs_dinode *dip; |
55 | xfs_agino_t unlinked_ino; |
56 | int di_ok; |
57 | |
58 | dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); |
59 | unlinked_ino = be32_to_cpu(dip->di_next_unlinked); |
60 | di_ok = xfs_verify_magic16(bp, dip->di_magic) && |
61 | xfs_dinode_good_version(mp, dip->di_version) && |
62 | xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); |
63 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, |
64 | XFS_ERRTAG_ITOBP_INOTOBP))) { |
65 | if (readahead) { |
66 | bp->b_flags &= ~XBF_DONE; |
67 | xfs_buf_ioerror(bp, -EIO); |
68 | return; |
69 | } |
70 | |
71 | #ifdef DEBUG |
72 | xfs_alert(mp, |
73 | "bad inode magic/vsn daddr %lld #%d (magic=%x)" , |
74 | (unsigned long long)xfs_buf_daddr(bp), i, |
75 | be16_to_cpu(dip->di_magic)); |
76 | #endif |
77 | xfs_buf_verifier_error(bp, -EFSCORRUPTED, |
78 | __func__, dip, sizeof(*dip), |
79 | NULL); |
80 | return; |
81 | } |
82 | } |
83 | } |
84 | |
85 | |
86 | static void |
87 | xfs_inode_buf_read_verify( |
88 | struct xfs_buf *bp) |
89 | { |
90 | xfs_inode_buf_verify(bp, readahead: false); |
91 | } |
92 | |
93 | static void |
94 | xfs_inode_buf_readahead_verify( |
95 | struct xfs_buf *bp) |
96 | { |
97 | xfs_inode_buf_verify(bp, readahead: true); |
98 | } |
99 | |
100 | static void |
101 | xfs_inode_buf_write_verify( |
102 | struct xfs_buf *bp) |
103 | { |
104 | xfs_inode_buf_verify(bp, readahead: false); |
105 | } |
106 | |
107 | const struct xfs_buf_ops xfs_inode_buf_ops = { |
108 | .name = "xfs_inode" , |
109 | .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), |
110 | cpu_to_be16(XFS_DINODE_MAGIC) }, |
111 | .verify_read = xfs_inode_buf_read_verify, |
112 | .verify_write = xfs_inode_buf_write_verify, |
113 | }; |
114 | |
115 | const struct xfs_buf_ops xfs_inode_buf_ra_ops = { |
116 | .name = "xfs_inode_ra" , |
117 | .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), |
118 | cpu_to_be16(XFS_DINODE_MAGIC) }, |
119 | .verify_read = xfs_inode_buf_readahead_verify, |
120 | .verify_write = xfs_inode_buf_write_verify, |
121 | }; |
122 | |
123 | |
124 | /* |
125 | * This routine is called to map an inode to the buffer containing the on-disk |
126 | * version of the inode. It returns a pointer to the buffer containing the |
127 | * on-disk inode in the bpp parameter. |
128 | */ |
129 | int |
130 | xfs_imap_to_bp( |
131 | struct xfs_mount *mp, |
132 | struct xfs_trans *tp, |
133 | struct xfs_imap *imap, |
134 | struct xfs_buf **bpp) |
135 | { |
136 | int error; |
137 | |
138 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, |
139 | imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); |
140 | if (xfs_metadata_is_sick(error)) |
141 | xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), |
142 | XFS_SICK_AG_INODES); |
143 | return error; |
144 | } |
145 | |
146 | static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) |
147 | { |
148 | struct timespec64 tv; |
149 | uint32_t n; |
150 | |
151 | tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(dividend: ts, NSEC_PER_SEC, remainder: &n)); |
152 | tv.tv_nsec = n; |
153 | |
154 | return tv; |
155 | } |
156 | |
157 | /* Convert an ondisk timestamp to an incore timestamp. */ |
158 | struct timespec64 |
159 | xfs_inode_from_disk_ts( |
160 | struct xfs_dinode *dip, |
161 | const xfs_timestamp_t ts) |
162 | { |
163 | struct timespec64 tv; |
164 | struct xfs_legacy_timestamp *lts; |
165 | |
166 | if (xfs_dinode_has_bigtime(dip)) |
167 | return xfs_inode_decode_bigtime(be64_to_cpu(ts)); |
168 | |
169 | lts = (struct xfs_legacy_timestamp *)&ts; |
170 | tv.tv_sec = (int)be32_to_cpu(lts->t_sec); |
171 | tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); |
172 | |
173 | return tv; |
174 | } |
175 | |
176 | int |
177 | xfs_inode_from_disk( |
178 | struct xfs_inode *ip, |
179 | struct xfs_dinode *from) |
180 | { |
181 | struct inode *inode = VFS_I(ip); |
182 | int error; |
183 | xfs_failaddr_t fa; |
184 | |
185 | ASSERT(ip->i_cowfp == NULL); |
186 | |
187 | fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); |
188 | if (fa) { |
189 | xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode" , from, |
190 | sizeof(*from), fa); |
191 | return -EFSCORRUPTED; |
192 | } |
193 | |
194 | /* |
195 | * First get the permanent information that is needed to allocate an |
196 | * inode. If the inode is unused, mode is zero and we shouldn't mess |
197 | * with the uninitialized part of it. |
198 | */ |
199 | if (!xfs_has_v3inodes(ip->i_mount)) |
200 | ip->i_flushiter = be16_to_cpu(from->di_flushiter); |
201 | inode->i_generation = be32_to_cpu(from->di_gen); |
202 | inode->i_mode = be16_to_cpu(from->di_mode); |
203 | if (!inode->i_mode) |
204 | return 0; |
205 | |
206 | /* |
207 | * Convert v1 inodes immediately to v2 inode format as this is the |
208 | * minimum inode version format we support in the rest of the code. |
209 | * They will also be unconditionally written back to disk as v2 inodes. |
210 | */ |
211 | if (unlikely(from->di_version == 1)) { |
212 | set_nlink(inode, be16_to_cpu(from->di_onlink)); |
213 | ip->i_projid = 0; |
214 | } else { |
215 | set_nlink(inode, be32_to_cpu(from->di_nlink)); |
216 | ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | |
217 | be16_to_cpu(from->di_projid_lo); |
218 | } |
219 | |
220 | i_uid_write(inode, be32_to_cpu(from->di_uid)); |
221 | i_gid_write(inode, be32_to_cpu(from->di_gid)); |
222 | |
223 | /* |
224 | * Time is signed, so need to convert to signed 32 bit before |
225 | * storing in inode timestamp which may be 64 bit. Otherwise |
226 | * a time before epoch is converted to a time long after epoch |
227 | * on 64 bit systems. |
228 | */ |
229 | inode_set_atime_to_ts(inode, |
230 | ts: xfs_inode_from_disk_ts(dip: from, ts: from->di_atime)); |
231 | inode_set_mtime_to_ts(inode, |
232 | ts: xfs_inode_from_disk_ts(dip: from, ts: from->di_mtime)); |
233 | inode_set_ctime_to_ts(inode, |
234 | ts: xfs_inode_from_disk_ts(dip: from, ts: from->di_ctime)); |
235 | |
236 | ip->i_disk_size = be64_to_cpu(from->di_size); |
237 | ip->i_nblocks = be64_to_cpu(from->di_nblocks); |
238 | ip->i_extsize = be32_to_cpu(from->di_extsize); |
239 | ip->i_forkoff = from->di_forkoff; |
240 | ip->i_diflags = be16_to_cpu(from->di_flags); |
241 | ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); |
242 | |
243 | if (from->di_dmevmask || from->di_dmstate) |
244 | xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); |
245 | |
246 | if (xfs_has_v3inodes(ip->i_mount)) { |
247 | inode_set_iversion_queried(inode, |
248 | be64_to_cpu(from->di_changecount)); |
249 | ip->i_crtime = xfs_inode_from_disk_ts(dip: from, ts: from->di_crtime); |
250 | ip->i_diflags2 = be64_to_cpu(from->di_flags2); |
251 | ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); |
252 | } |
253 | |
254 | error = xfs_iformat_data_fork(ip, from); |
255 | if (error) |
256 | return error; |
257 | if (from->di_forkoff) { |
258 | error = xfs_iformat_attr_fork(ip, from); |
259 | if (error) |
260 | goto out_destroy_data_fork; |
261 | } |
262 | if (xfs_is_reflink_inode(ip)) |
263 | xfs_ifork_init_cow(ip); |
264 | return 0; |
265 | |
266 | out_destroy_data_fork: |
267 | xfs_idestroy_fork(&ip->i_df); |
268 | return error; |
269 | } |
270 | |
271 | /* Convert an incore timestamp to an ondisk timestamp. */ |
272 | static inline xfs_timestamp_t |
273 | xfs_inode_to_disk_ts( |
274 | struct xfs_inode *ip, |
275 | const struct timespec64 tv) |
276 | { |
277 | struct xfs_legacy_timestamp *lts; |
278 | xfs_timestamp_t ts; |
279 | |
280 | if (xfs_inode_has_bigtime(ip)) |
281 | return cpu_to_be64(xfs_inode_encode_bigtime(tv)); |
282 | |
283 | lts = (struct xfs_legacy_timestamp *)&ts; |
284 | lts->t_sec = cpu_to_be32(tv.tv_sec); |
285 | lts->t_nsec = cpu_to_be32(tv.tv_nsec); |
286 | |
287 | return ts; |
288 | } |
289 | |
290 | static inline void |
291 | xfs_inode_to_disk_iext_counters( |
292 | struct xfs_inode *ip, |
293 | struct xfs_dinode *to) |
294 | { |
295 | if (xfs_inode_has_large_extent_counts(ip)) { |
296 | to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); |
297 | to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); |
298 | /* |
299 | * We might be upgrading the inode to use larger extent counters |
300 | * than was previously used. Hence zero the unused field. |
301 | */ |
302 | to->di_nrext64_pad = cpu_to_be16(0); |
303 | } else { |
304 | to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); |
305 | to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); |
306 | } |
307 | } |
308 | |
309 | void |
310 | xfs_inode_to_disk( |
311 | struct xfs_inode *ip, |
312 | struct xfs_dinode *to, |
313 | xfs_lsn_t lsn) |
314 | { |
315 | struct inode *inode = VFS_I(ip); |
316 | |
317 | to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); |
318 | to->di_onlink = 0; |
319 | |
320 | to->di_format = xfs_ifork_format(&ip->i_df); |
321 | to->di_uid = cpu_to_be32(i_uid_read(inode)); |
322 | to->di_gid = cpu_to_be32(i_gid_read(inode)); |
323 | to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); |
324 | to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); |
325 | |
326 | to->di_atime = xfs_inode_to_disk_ts(ip, tv: inode_get_atime(inode)); |
327 | to->di_mtime = xfs_inode_to_disk_ts(ip, tv: inode_get_mtime(inode)); |
328 | to->di_ctime = xfs_inode_to_disk_ts(ip, tv: inode_get_ctime(inode)); |
329 | to->di_nlink = cpu_to_be32(inode->i_nlink); |
330 | to->di_gen = cpu_to_be32(inode->i_generation); |
331 | to->di_mode = cpu_to_be16(inode->i_mode); |
332 | |
333 | to->di_size = cpu_to_be64(ip->i_disk_size); |
334 | to->di_nblocks = cpu_to_be64(ip->i_nblocks); |
335 | to->di_extsize = cpu_to_be32(ip->i_extsize); |
336 | to->di_forkoff = ip->i_forkoff; |
337 | to->di_aformat = xfs_ifork_format(&ip->i_af); |
338 | to->di_flags = cpu_to_be16(ip->i_diflags); |
339 | |
340 | if (xfs_has_v3inodes(ip->i_mount)) { |
341 | to->di_version = 3; |
342 | to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); |
343 | to->di_crtime = xfs_inode_to_disk_ts(ip, tv: ip->i_crtime); |
344 | to->di_flags2 = cpu_to_be64(ip->i_diflags2); |
345 | to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); |
346 | to->di_ino = cpu_to_be64(ip->i_ino); |
347 | to->di_lsn = cpu_to_be64(lsn); |
348 | memset(to->di_pad2, 0, sizeof(to->di_pad2)); |
349 | uuid_copy(dst: &to->di_uuid, src: &ip->i_mount->m_sb.sb_meta_uuid); |
350 | to->di_v3_pad = 0; |
351 | } else { |
352 | to->di_version = 2; |
353 | to->di_flushiter = cpu_to_be16(ip->i_flushiter); |
354 | memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); |
355 | } |
356 | |
357 | xfs_inode_to_disk_iext_counters(ip, to); |
358 | } |
359 | |
360 | static xfs_failaddr_t |
361 | xfs_dinode_verify_fork( |
362 | struct xfs_dinode *dip, |
363 | struct xfs_mount *mp, |
364 | int whichfork) |
365 | { |
366 | xfs_extnum_t di_nextents; |
367 | xfs_extnum_t max_extents; |
368 | mode_t mode = be16_to_cpu(dip->di_mode); |
369 | uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); |
370 | uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); |
371 | |
372 | di_nextents = xfs_dfork_nextents(dip, whichfork); |
373 | |
374 | /* |
375 | * For fork types that can contain local data, check that the fork |
376 | * format matches the size of local data contained within the fork. |
377 | * |
378 | * For all types, check that when the size says the should be in extent |
379 | * or btree format, the inode isn't claiming it is in local format. |
380 | */ |
381 | if (whichfork == XFS_DATA_FORK) { |
382 | if (S_ISDIR(mode) || S_ISLNK(mode)) { |
383 | if (be64_to_cpu(dip->di_size) <= fork_size && |
384 | fork_format != XFS_DINODE_FMT_LOCAL) |
385 | return __this_address; |
386 | } |
387 | |
388 | if (be64_to_cpu(dip->di_size) > fork_size && |
389 | fork_format == XFS_DINODE_FMT_LOCAL) |
390 | return __this_address; |
391 | } |
392 | |
393 | switch (fork_format) { |
394 | case XFS_DINODE_FMT_LOCAL: |
395 | /* |
396 | * No local regular files yet. |
397 | */ |
398 | if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) |
399 | return __this_address; |
400 | if (di_nextents) |
401 | return __this_address; |
402 | break; |
403 | case XFS_DINODE_FMT_EXTENTS: |
404 | if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) |
405 | return __this_address; |
406 | break; |
407 | case XFS_DINODE_FMT_BTREE: |
408 | max_extents = xfs_iext_max_nextents( |
409 | xfs_dinode_has_large_extent_counts(dip), |
410 | whichfork); |
411 | if (di_nextents > max_extents) |
412 | return __this_address; |
413 | break; |
414 | default: |
415 | return __this_address; |
416 | } |
417 | return NULL; |
418 | } |
419 | |
420 | static xfs_failaddr_t |
421 | xfs_dinode_verify_forkoff( |
422 | struct xfs_dinode *dip, |
423 | struct xfs_mount *mp) |
424 | { |
425 | if (!dip->di_forkoff) |
426 | return NULL; |
427 | |
428 | switch (dip->di_format) { |
429 | case XFS_DINODE_FMT_DEV: |
430 | if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) |
431 | return __this_address; |
432 | break; |
433 | case XFS_DINODE_FMT_LOCAL: /* fall through ... */ |
434 | case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ |
435 | case XFS_DINODE_FMT_BTREE: |
436 | if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) |
437 | return __this_address; |
438 | break; |
439 | default: |
440 | return __this_address; |
441 | } |
442 | return NULL; |
443 | } |
444 | |
445 | static xfs_failaddr_t |
446 | xfs_dinode_verify_nrext64( |
447 | struct xfs_mount *mp, |
448 | struct xfs_dinode *dip) |
449 | { |
450 | if (xfs_dinode_has_large_extent_counts(dip)) { |
451 | if (!xfs_has_large_extent_counts(mp)) |
452 | return __this_address; |
453 | if (dip->di_nrext64_pad != 0) |
454 | return __this_address; |
455 | } else if (dip->di_version >= 3) { |
456 | if (dip->di_v3_pad != 0) |
457 | return __this_address; |
458 | } |
459 | |
460 | return NULL; |
461 | } |
462 | |
463 | xfs_failaddr_t |
464 | xfs_dinode_verify( |
465 | struct xfs_mount *mp, |
466 | xfs_ino_t ino, |
467 | struct xfs_dinode *dip) |
468 | { |
469 | xfs_failaddr_t fa; |
470 | uint16_t mode; |
471 | uint16_t flags; |
472 | uint64_t flags2; |
473 | uint64_t di_size; |
474 | xfs_extnum_t nextents; |
475 | xfs_extnum_t naextents; |
476 | xfs_filblks_t nblocks; |
477 | |
478 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) |
479 | return __this_address; |
480 | |
481 | /* Verify v3 integrity information first */ |
482 | if (dip->di_version >= 3) { |
483 | if (!xfs_has_v3inodes(mp)) |
484 | return __this_address; |
485 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, |
486 | XFS_DINODE_CRC_OFF)) |
487 | return __this_address; |
488 | if (be64_to_cpu(dip->di_ino) != ino) |
489 | return __this_address; |
490 | if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) |
491 | return __this_address; |
492 | } |
493 | |
494 | /* don't allow invalid i_size */ |
495 | di_size = be64_to_cpu(dip->di_size); |
496 | if (di_size & (1ULL << 63)) |
497 | return __this_address; |
498 | |
499 | mode = be16_to_cpu(dip->di_mode); |
500 | if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) |
501 | return __this_address; |
502 | |
503 | /* No zero-length symlinks/dirs. */ |
504 | if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) |
505 | return __this_address; |
506 | |
507 | fa = xfs_dinode_verify_nrext64(mp, dip); |
508 | if (fa) |
509 | return fa; |
510 | |
511 | nextents = xfs_dfork_data_extents(dip); |
512 | naextents = xfs_dfork_attr_extents(dip); |
513 | nblocks = be64_to_cpu(dip->di_nblocks); |
514 | |
515 | /* Fork checks carried over from xfs_iformat_fork */ |
516 | if (mode && nextents + naextents > nblocks) |
517 | return __this_address; |
518 | |
519 | if (nextents + naextents == 0 && nblocks != 0) |
520 | return __this_address; |
521 | |
522 | if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) |
523 | return __this_address; |
524 | |
525 | if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) |
526 | return __this_address; |
527 | |
528 | flags = be16_to_cpu(dip->di_flags); |
529 | |
530 | if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) |
531 | return __this_address; |
532 | |
533 | /* check for illegal values of forkoff */ |
534 | fa = xfs_dinode_verify_forkoff(dip, mp); |
535 | if (fa) |
536 | return fa; |
537 | |
538 | /* Do we have appropriate data fork formats for the mode? */ |
539 | switch (mode & S_IFMT) { |
540 | case S_IFIFO: |
541 | case S_IFCHR: |
542 | case S_IFBLK: |
543 | case S_IFSOCK: |
544 | if (dip->di_format != XFS_DINODE_FMT_DEV) |
545 | return __this_address; |
546 | break; |
547 | case S_IFREG: |
548 | case S_IFLNK: |
549 | case S_IFDIR: |
550 | fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); |
551 | if (fa) |
552 | return fa; |
553 | break; |
554 | case 0: |
555 | /* Uninitialized inode ok. */ |
556 | break; |
557 | default: |
558 | return __this_address; |
559 | } |
560 | |
561 | if (dip->di_forkoff) { |
562 | fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); |
563 | if (fa) |
564 | return fa; |
565 | } else { |
566 | /* |
567 | * If there is no fork offset, this may be a freshly-made inode |
568 | * in a new disk cluster, in which case di_aformat is zeroed. |
569 | * Otherwise, such an inode must be in EXTENTS format; this goes |
570 | * for freed inodes as well. |
571 | */ |
572 | switch (dip->di_aformat) { |
573 | case 0: |
574 | case XFS_DINODE_FMT_EXTENTS: |
575 | break; |
576 | default: |
577 | return __this_address; |
578 | } |
579 | if (naextents) |
580 | return __this_address; |
581 | } |
582 | |
583 | /* extent size hint validation */ |
584 | fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), |
585 | mode, flags); |
586 | if (fa) |
587 | return fa; |
588 | |
589 | /* only version 3 or greater inodes are extensively verified here */ |
590 | if (dip->di_version < 3) |
591 | return NULL; |
592 | |
593 | flags2 = be64_to_cpu(dip->di_flags2); |
594 | |
595 | /* don't allow reflink/cowextsize if we don't have reflink */ |
596 | if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && |
597 | !xfs_has_reflink(mp)) |
598 | return __this_address; |
599 | |
600 | /* only regular files get reflink */ |
601 | if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) |
602 | return __this_address; |
603 | |
604 | /* don't let reflink and realtime mix */ |
605 | if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) |
606 | return __this_address; |
607 | |
608 | /* COW extent size hint validation */ |
609 | fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), |
610 | mode, flags, flags2); |
611 | if (fa) |
612 | return fa; |
613 | |
614 | /* bigtime iflag can only happen on bigtime filesystems */ |
615 | if (xfs_dinode_has_bigtime(dip) && |
616 | !xfs_has_bigtime(mp)) |
617 | return __this_address; |
618 | |
619 | return NULL; |
620 | } |
621 | |
622 | void |
623 | xfs_dinode_calc_crc( |
624 | struct xfs_mount *mp, |
625 | struct xfs_dinode *dip) |
626 | { |
627 | uint32_t crc; |
628 | |
629 | if (dip->di_version < 3) |
630 | return; |
631 | |
632 | ASSERT(xfs_has_crc(mp)); |
633 | crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, |
634 | XFS_DINODE_CRC_OFF); |
635 | dip->di_crc = xfs_end_cksum(crc); |
636 | } |
637 | |
638 | /* |
639 | * Validate di_extsize hint. |
640 | * |
641 | * 1. Extent size hint is only valid for directories and regular files. |
642 | * 2. FS_XFLAG_EXTSIZE is only valid for regular files. |
643 | * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. |
644 | * 4. Hint cannot be larger than MAXTEXTLEN. |
645 | * 5. Can be changed on directories at any time. |
646 | * 6. Hint value of 0 turns off hints, clears inode flags. |
647 | * 7. Extent size must be a multiple of the appropriate block size. |
648 | * For realtime files, this is the rt extent size. |
649 | * 8. For non-realtime files, the extent size hint must be limited |
650 | * to half the AG size to avoid alignment extending the extent beyond the |
651 | * limits of the AG. |
652 | */ |
653 | xfs_failaddr_t |
654 | xfs_inode_validate_extsize( |
655 | struct xfs_mount *mp, |
656 | uint32_t extsize, |
657 | uint16_t mode, |
658 | uint16_t flags) |
659 | { |
660 | bool rt_flag; |
661 | bool hint_flag; |
662 | bool inherit_flag; |
663 | uint32_t extsize_bytes; |
664 | uint32_t blocksize_bytes; |
665 | |
666 | rt_flag = (flags & XFS_DIFLAG_REALTIME); |
667 | hint_flag = (flags & XFS_DIFLAG_EXTSIZE); |
668 | inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); |
669 | extsize_bytes = XFS_FSB_TO_B(mp, extsize); |
670 | |
671 | /* |
672 | * This comment describes a historic gap in this verifier function. |
673 | * |
674 | * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this |
675 | * function has never checked that the extent size hint is an integer |
676 | * multiple of the realtime extent size. Since we allow users to set |
677 | * this combination on non-rt filesystems /and/ to change the rt |
678 | * extent size when adding a rt device to a filesystem, the net effect |
679 | * is that users can configure a filesystem anticipating one rt |
680 | * geometry and change their minds later. Directories do not use the |
681 | * extent size hint, so this is harmless for them. |
682 | * |
683 | * If a directory with a misaligned extent size hint is allowed to |
684 | * propagate that hint into a new regular realtime file, the result |
685 | * is that the inode cluster buffer verifier will trigger a corruption |
686 | * shutdown the next time it is run, because the verifier has always |
687 | * enforced the alignment rule for regular files. |
688 | * |
689 | * Because we allow administrators to set a new rt extent size when |
690 | * adding a rt section, we cannot add a check to this verifier because |
691 | * that will result a new source of directory corruption errors when |
692 | * reading an existing filesystem. Instead, we rely on callers to |
693 | * decide when alignment checks are appropriate, and fix things up as |
694 | * needed. |
695 | */ |
696 | |
697 | if (rt_flag) |
698 | blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); |
699 | else |
700 | blocksize_bytes = mp->m_sb.sb_blocksize; |
701 | |
702 | if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) |
703 | return __this_address; |
704 | |
705 | if (hint_flag && !S_ISREG(mode)) |
706 | return __this_address; |
707 | |
708 | if (inherit_flag && !S_ISDIR(mode)) |
709 | return __this_address; |
710 | |
711 | if ((hint_flag || inherit_flag) && extsize == 0) |
712 | return __this_address; |
713 | |
714 | /* free inodes get flags set to zero but extsize remains */ |
715 | if (mode && !(hint_flag || inherit_flag) && extsize != 0) |
716 | return __this_address; |
717 | |
718 | if (extsize_bytes % blocksize_bytes) |
719 | return __this_address; |
720 | |
721 | if (extsize > XFS_MAX_BMBT_EXTLEN) |
722 | return __this_address; |
723 | |
724 | if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) |
725 | return __this_address; |
726 | |
727 | return NULL; |
728 | } |
729 | |
730 | /* |
731 | * Validate di_cowextsize hint. |
732 | * |
733 | * 1. CoW extent size hint can only be set if reflink is enabled on the fs. |
734 | * The inode does not have to have any shared blocks, but it must be a v3. |
735 | * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; |
736 | * for a directory, the hint is propagated to new files. |
737 | * 3. Can be changed on files & directories at any time. |
738 | * 4. Hint value of 0 turns off hints, clears inode flags. |
739 | * 5. Extent size must be a multiple of the appropriate block size. |
740 | * 6. The extent size hint must be limited to half the AG size to avoid |
741 | * alignment extending the extent beyond the limits of the AG. |
742 | */ |
743 | xfs_failaddr_t |
744 | xfs_inode_validate_cowextsize( |
745 | struct xfs_mount *mp, |
746 | uint32_t cowextsize, |
747 | uint16_t mode, |
748 | uint16_t flags, |
749 | uint64_t flags2) |
750 | { |
751 | bool rt_flag; |
752 | bool hint_flag; |
753 | uint32_t cowextsize_bytes; |
754 | |
755 | rt_flag = (flags & XFS_DIFLAG_REALTIME); |
756 | hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); |
757 | cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); |
758 | |
759 | if (hint_flag && !xfs_has_reflink(mp)) |
760 | return __this_address; |
761 | |
762 | if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) |
763 | return __this_address; |
764 | |
765 | if (hint_flag && cowextsize == 0) |
766 | return __this_address; |
767 | |
768 | /* free inodes get flags set to zero but cowextsize remains */ |
769 | if (mode && !hint_flag && cowextsize != 0) |
770 | return __this_address; |
771 | |
772 | if (hint_flag && rt_flag) |
773 | return __this_address; |
774 | |
775 | if (cowextsize_bytes % mp->m_sb.sb_blocksize) |
776 | return __this_address; |
777 | |
778 | if (cowextsize > XFS_MAX_BMBT_EXTLEN) |
779 | return __this_address; |
780 | |
781 | if (cowextsize > mp->m_sb.sb_agblocks / 2) |
782 | return __this_address; |
783 | |
784 | return NULL; |
785 | } |
786 | |