1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * linux/fs/jbd2/recovery.c |
4 | * |
5 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 |
6 | * |
7 | * Copyright 1999-2000 Red Hat Software --- All Rights Reserved |
8 | * |
9 | * Journal recovery routines for the generic filesystem journaling code; |
10 | * part of the ext2fs journaling system. |
11 | */ |
12 | |
13 | #ifndef __KERNEL__ |
14 | #include "jfs_user.h" |
15 | #else |
16 | #include <linux/time.h> |
17 | #include <linux/fs.h> |
18 | #include <linux/jbd2.h> |
19 | #include <linux/errno.h> |
20 | #include <linux/crc32.h> |
21 | #include <linux/blkdev.h> |
22 | #endif |
23 | |
24 | /* |
25 | * Maintain information about the progress of the recovery job, so that |
26 | * the different passes can carry information between them. |
27 | */ |
28 | struct recovery_info |
29 | { |
30 | tid_t start_transaction; |
31 | tid_t end_transaction; |
32 | unsigned long head_block; |
33 | |
34 | int nr_replays; |
35 | int nr_revokes; |
36 | int nr_revoke_hits; |
37 | }; |
38 | |
39 | static int do_one_pass(journal_t *journal, |
40 | struct recovery_info *info, enum passtype pass); |
41 | static int scan_revoke_records(journal_t *, struct buffer_head *, |
42 | tid_t, struct recovery_info *); |
43 | |
44 | #ifdef __KERNEL__ |
45 | |
46 | /* Release readahead buffers after use */ |
47 | static void journal_brelse_array(struct buffer_head *b[], int n) |
48 | { |
49 | while (--n >= 0) |
50 | brelse (bh: b[n]); |
51 | } |
52 | |
53 | |
54 | /* |
55 | * When reading from the journal, we are going through the block device |
56 | * layer directly and so there is no readahead being done for us. We |
57 | * need to implement any readahead ourselves if we want it to happen at |
58 | * all. Recovery is basically one long sequential read, so make sure we |
59 | * do the IO in reasonably large chunks. |
60 | * |
61 | * This is not so critical that we need to be enormously clever about |
62 | * the readahead size, though. 128K is a purely arbitrary, good-enough |
63 | * fixed value. |
64 | */ |
65 | |
66 | #define MAXBUF 8 |
67 | static int do_readahead(journal_t *journal, unsigned int start) |
68 | { |
69 | int err; |
70 | unsigned int max, nbufs, next; |
71 | unsigned long long blocknr; |
72 | struct buffer_head *bh; |
73 | |
74 | struct buffer_head * bufs[MAXBUF]; |
75 | |
76 | /* Do up to 128K of readahead */ |
77 | max = start + (128 * 1024 / journal->j_blocksize); |
78 | if (max > journal->j_total_len) |
79 | max = journal->j_total_len; |
80 | |
81 | /* Do the readahead itself. We'll submit MAXBUF buffer_heads at |
82 | * a time to the block device IO layer. */ |
83 | |
84 | nbufs = 0; |
85 | |
86 | for (next = start; next < max; next++) { |
87 | err = jbd2_journal_bmap(journal, next, &blocknr); |
88 | |
89 | if (err) { |
90 | printk(KERN_ERR "JBD2: bad block at offset %u\n" , |
91 | next); |
92 | goto failed; |
93 | } |
94 | |
95 | bh = __getblk(bdev: journal->j_dev, block: blocknr, size: journal->j_blocksize); |
96 | if (!bh) { |
97 | err = -ENOMEM; |
98 | goto failed; |
99 | } |
100 | |
101 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) { |
102 | bufs[nbufs++] = bh; |
103 | if (nbufs == MAXBUF) { |
104 | bh_readahead_batch(nr: nbufs, bhs: bufs, op_flags: 0); |
105 | journal_brelse_array(b: bufs, n: nbufs); |
106 | nbufs = 0; |
107 | } |
108 | } else |
109 | brelse(bh); |
110 | } |
111 | |
112 | if (nbufs) |
113 | bh_readahead_batch(nr: nbufs, bhs: bufs, op_flags: 0); |
114 | err = 0; |
115 | |
116 | failed: |
117 | if (nbufs) |
118 | journal_brelse_array(b: bufs, n: nbufs); |
119 | return err; |
120 | } |
121 | |
122 | #endif /* __KERNEL__ */ |
123 | |
124 | |
125 | /* |
126 | * Read a block from the journal |
127 | */ |
128 | |
129 | static int jread(struct buffer_head **bhp, journal_t *journal, |
130 | unsigned int offset) |
131 | { |
132 | int err; |
133 | unsigned long long blocknr; |
134 | struct buffer_head *bh; |
135 | |
136 | *bhp = NULL; |
137 | |
138 | if (offset >= journal->j_total_len) { |
139 | printk(KERN_ERR "JBD2: corrupted journal superblock\n" ); |
140 | return -EFSCORRUPTED; |
141 | } |
142 | |
143 | err = jbd2_journal_bmap(journal, offset, &blocknr); |
144 | |
145 | if (err) { |
146 | printk(KERN_ERR "JBD2: bad block at offset %u\n" , |
147 | offset); |
148 | return err; |
149 | } |
150 | |
151 | bh = __getblk(bdev: journal->j_dev, block: blocknr, size: journal->j_blocksize); |
152 | if (!bh) |
153 | return -ENOMEM; |
154 | |
155 | if (!buffer_uptodate(bh)) { |
156 | /* |
157 | * If this is a brand new buffer, start readahead. |
158 | * Otherwise, we assume we are already reading it. |
159 | */ |
160 | bool need_readahead = !buffer_req(bh); |
161 | |
162 | bh_read_nowait(bh, op_flags: 0); |
163 | if (need_readahead) |
164 | do_readahead(journal, start: offset); |
165 | wait_on_buffer(bh); |
166 | } |
167 | |
168 | if (!buffer_uptodate(bh)) { |
169 | printk(KERN_ERR "JBD2: Failed to read block at offset %u\n" , |
170 | offset); |
171 | brelse(bh); |
172 | return -EIO; |
173 | } |
174 | |
175 | *bhp = bh; |
176 | return 0; |
177 | } |
178 | |
179 | static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) |
180 | { |
181 | struct jbd2_journal_block_tail *tail; |
182 | __be32 provided; |
183 | __u32 calculated; |
184 | |
185 | if (!jbd2_journal_has_csum_v2or3(journal: j)) |
186 | return 1; |
187 | |
188 | tail = (struct jbd2_journal_block_tail *)((char *)buf + |
189 | j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); |
190 | provided = tail->t_checksum; |
191 | tail->t_checksum = 0; |
192 | calculated = jbd2_chksum(journal: j, crc: j->j_csum_seed, address: buf, length: j->j_blocksize); |
193 | tail->t_checksum = provided; |
194 | |
195 | return provided == cpu_to_be32(calculated); |
196 | } |
197 | |
198 | /* |
199 | * Count the number of in-use tags in a journal descriptor block. |
200 | */ |
201 | |
202 | static int count_tags(journal_t *journal, struct buffer_head *bh) |
203 | { |
204 | char * tagp; |
205 | journal_block_tag_t tag; |
206 | int nr = 0, size = journal->j_blocksize; |
207 | int tag_bytes = journal_tag_bytes(journal); |
208 | |
209 | if (jbd2_journal_has_csum_v2or3(journal)) |
210 | size -= sizeof(struct jbd2_journal_block_tail); |
211 | |
212 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
213 | |
214 | while ((tagp - bh->b_data + tag_bytes) <= size) { |
215 | memcpy(&tag, tagp, sizeof(tag)); |
216 | |
217 | nr++; |
218 | tagp += tag_bytes; |
219 | if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) |
220 | tagp += 16; |
221 | |
222 | if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) |
223 | break; |
224 | } |
225 | |
226 | return nr; |
227 | } |
228 | |
229 | |
230 | /* Make sure we wrap around the log correctly! */ |
231 | #define wrap(journal, var) \ |
232 | do { \ |
233 | if (var >= (journal)->j_last) \ |
234 | var -= ((journal)->j_last - (journal)->j_first); \ |
235 | } while (0) |
236 | |
237 | static int fc_do_one_pass(journal_t *journal, |
238 | struct recovery_info *info, enum passtype pass) |
239 | { |
240 | unsigned int expected_commit_id = info->end_transaction; |
241 | unsigned long next_fc_block; |
242 | struct buffer_head *bh; |
243 | int err = 0; |
244 | |
245 | next_fc_block = journal->j_fc_first; |
246 | if (!journal->j_fc_replay_callback) |
247 | return 0; |
248 | |
249 | while (next_fc_block <= journal->j_fc_last) { |
250 | jbd2_debug(3, "Fast commit replay: next block %ld\n" , |
251 | next_fc_block); |
252 | err = jread(bhp: &bh, journal, offset: next_fc_block); |
253 | if (err) { |
254 | jbd2_debug(3, "Fast commit replay: read error\n" ); |
255 | break; |
256 | } |
257 | |
258 | err = journal->j_fc_replay_callback(journal, bh, pass, |
259 | next_fc_block - journal->j_fc_first, |
260 | expected_commit_id); |
261 | brelse(bh); |
262 | next_fc_block++; |
263 | if (err < 0 || err == JBD2_FC_REPLAY_STOP) |
264 | break; |
265 | err = 0; |
266 | } |
267 | |
268 | if (err) |
269 | jbd2_debug(3, "Fast commit replay failed, err = %d\n" , err); |
270 | |
271 | return err; |
272 | } |
273 | |
274 | /** |
275 | * jbd2_journal_recover - recovers a on-disk journal |
276 | * @journal: the journal to recover |
277 | * |
278 | * The primary function for recovering the log contents when mounting a |
279 | * journaled device. |
280 | * |
281 | * Recovery is done in three passes. In the first pass, we look for the |
282 | * end of the log. In the second, we assemble the list of revoke |
283 | * blocks. In the third and final pass, we replay any un-revoked blocks |
284 | * in the log. |
285 | */ |
286 | int jbd2_journal_recover(journal_t *journal) |
287 | { |
288 | int err, err2; |
289 | journal_superblock_t * sb; |
290 | |
291 | struct recovery_info info; |
292 | |
293 | memset(&info, 0, sizeof(info)); |
294 | sb = journal->j_superblock; |
295 | |
296 | /* |
297 | * The journal superblock's s_start field (the current log head) |
298 | * is always zero if, and only if, the journal was cleanly |
299 | * unmounted. |
300 | */ |
301 | if (!sb->s_start) { |
302 | jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n" , |
303 | be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); |
304 | journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; |
305 | journal->j_head = be32_to_cpu(sb->s_head); |
306 | return 0; |
307 | } |
308 | |
309 | err = do_one_pass(journal, info: &info, pass: PASS_SCAN); |
310 | if (!err) |
311 | err = do_one_pass(journal, info: &info, pass: PASS_REVOKE); |
312 | if (!err) |
313 | err = do_one_pass(journal, info: &info, pass: PASS_REPLAY); |
314 | |
315 | jbd2_debug(1, "JBD2: recovery, exit status %d, " |
316 | "recovered transactions %u to %u\n" , |
317 | err, info.start_transaction, info.end_transaction); |
318 | jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n" , |
319 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); |
320 | |
321 | /* Restart the log at the next transaction ID, thus invalidating |
322 | * any existing commit records in the log. */ |
323 | journal->j_transaction_sequence = ++info.end_transaction; |
324 | journal->j_head = info.head_block; |
325 | jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n" , |
326 | journal->j_transaction_sequence, journal->j_head); |
327 | |
328 | jbd2_journal_clear_revoke(journal); |
329 | err2 = sync_blockdev(bdev: journal->j_fs_dev); |
330 | if (!err) |
331 | err = err2; |
332 | err2 = jbd2_check_fs_dev_write_error(journal); |
333 | if (!err) |
334 | err = err2; |
335 | /* Make sure all replayed data is on permanent storage */ |
336 | if (journal->j_flags & JBD2_BARRIER) { |
337 | err2 = blkdev_issue_flush(bdev: journal->j_fs_dev); |
338 | if (!err) |
339 | err = err2; |
340 | } |
341 | return err; |
342 | } |
343 | |
344 | /** |
345 | * jbd2_journal_skip_recovery - Start journal and wipe exiting records |
346 | * @journal: journal to startup |
347 | * |
348 | * Locate any valid recovery information from the journal and set up the |
349 | * journal structures in memory to ignore it (presumably because the |
350 | * caller has evidence that it is out of date). |
351 | * This function doesn't appear to be exported.. |
352 | * |
353 | * We perform one pass over the journal to allow us to tell the user how |
354 | * much recovery information is being erased, and to let us initialise |
355 | * the journal transaction sequence numbers to the next unused ID. |
356 | */ |
357 | int jbd2_journal_skip_recovery(journal_t *journal) |
358 | { |
359 | int err; |
360 | |
361 | struct recovery_info info; |
362 | |
363 | memset (&info, 0, sizeof(info)); |
364 | |
365 | err = do_one_pass(journal, info: &info, pass: PASS_SCAN); |
366 | |
367 | if (err) { |
368 | printk(KERN_ERR "JBD2: error %d scanning journal\n" , err); |
369 | ++journal->j_transaction_sequence; |
370 | journal->j_head = journal->j_first; |
371 | } else { |
372 | #ifdef CONFIG_JBD2_DEBUG |
373 | int dropped = info.end_transaction - |
374 | be32_to_cpu(journal->j_superblock->s_sequence); |
375 | jbd2_debug(1, |
376 | "JBD2: ignoring %d transaction%s from the journal.\n" , |
377 | dropped, (dropped == 1) ? "" : "s" ); |
378 | #endif |
379 | journal->j_transaction_sequence = ++info.end_transaction; |
380 | journal->j_head = info.head_block; |
381 | } |
382 | |
383 | journal->j_tail = 0; |
384 | return err; |
385 | } |
386 | |
387 | static inline unsigned long long read_tag_block(journal_t *journal, |
388 | journal_block_tag_t *tag) |
389 | { |
390 | unsigned long long block = be32_to_cpu(tag->t_blocknr); |
391 | if (jbd2_has_feature_64bit(j: journal)) |
392 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; |
393 | return block; |
394 | } |
395 | |
396 | /* |
397 | * calc_chksums calculates the checksums for the blocks described in the |
398 | * descriptor block. |
399 | */ |
400 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, |
401 | unsigned long *next_log_block, __u32 *crc32_sum) |
402 | { |
403 | int i, num_blks, err; |
404 | unsigned long io_block; |
405 | struct buffer_head *obh; |
406 | |
407 | num_blks = count_tags(journal, bh); |
408 | /* Calculate checksum of the descriptor block. */ |
409 | *crc32_sum = crc32_be(crc: *crc32_sum, p: (void *)bh->b_data, len: bh->b_size); |
410 | |
411 | for (i = 0; i < num_blks; i++) { |
412 | io_block = (*next_log_block)++; |
413 | wrap(journal, *next_log_block); |
414 | err = jread(bhp: &obh, journal, offset: io_block); |
415 | if (err) { |
416 | printk(KERN_ERR "JBD2: IO error %d recovering block " |
417 | "%lu in log\n" , err, io_block); |
418 | return 1; |
419 | } else { |
420 | *crc32_sum = crc32_be(crc: *crc32_sum, p: (void *)obh->b_data, |
421 | len: obh->b_size); |
422 | } |
423 | put_bh(bh: obh); |
424 | } |
425 | return 0; |
426 | } |
427 | |
428 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) |
429 | { |
430 | struct commit_header *h; |
431 | __be32 provided; |
432 | __u32 calculated; |
433 | |
434 | if (!jbd2_journal_has_csum_v2or3(journal: j)) |
435 | return 1; |
436 | |
437 | h = buf; |
438 | provided = h->h_chksum[0]; |
439 | h->h_chksum[0] = 0; |
440 | calculated = jbd2_chksum(journal: j, crc: j->j_csum_seed, address: buf, length: j->j_blocksize); |
441 | h->h_chksum[0] = provided; |
442 | |
443 | return provided == cpu_to_be32(calculated); |
444 | } |
445 | |
446 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
447 | journal_block_tag3_t *tag3, |
448 | void *buf, __u32 sequence) |
449 | { |
450 | __u32 csum32; |
451 | __be32 seq; |
452 | |
453 | if (!jbd2_journal_has_csum_v2or3(journal: j)) |
454 | return 1; |
455 | |
456 | seq = cpu_to_be32(sequence); |
457 | csum32 = jbd2_chksum(journal: j, crc: j->j_csum_seed, address: (__u8 *)&seq, length: sizeof(seq)); |
458 | csum32 = jbd2_chksum(journal: j, crc: csum32, address: buf, length: j->j_blocksize); |
459 | |
460 | if (jbd2_has_feature_csum3(j)) |
461 | return tag3->t_checksum == cpu_to_be32(csum32); |
462 | else |
463 | return tag->t_checksum == cpu_to_be16(csum32); |
464 | } |
465 | |
466 | static int do_one_pass(journal_t *journal, |
467 | struct recovery_info *info, enum passtype pass) |
468 | { |
469 | unsigned int first_commit_ID, next_commit_ID; |
470 | unsigned long next_log_block, head_block; |
471 | int err, success = 0; |
472 | journal_superblock_t * sb; |
473 | journal_header_t * tmp; |
474 | struct buffer_head * bh; |
475 | unsigned int sequence; |
476 | int blocktype; |
477 | int tag_bytes = journal_tag_bytes(journal); |
478 | __u32 crc32_sum = ~0; /* Transactional Checksums */ |
479 | int descr_csum_size = 0; |
480 | int block_error = 0; |
481 | bool need_check_commit_time = false; |
482 | __u64 last_trans_commit_time = 0, commit_time; |
483 | |
484 | /* |
485 | * First thing is to establish what we expect to find in the log |
486 | * (in terms of transaction IDs), and where (in terms of log |
487 | * block offsets): query the superblock. |
488 | */ |
489 | |
490 | sb = journal->j_superblock; |
491 | next_commit_ID = be32_to_cpu(sb->s_sequence); |
492 | next_log_block = be32_to_cpu(sb->s_start); |
493 | head_block = next_log_block; |
494 | |
495 | first_commit_ID = next_commit_ID; |
496 | if (pass == PASS_SCAN) |
497 | info->start_transaction = first_commit_ID; |
498 | |
499 | jbd2_debug(1, "Starting recovery pass %d\n" , pass); |
500 | |
501 | /* |
502 | * Now we walk through the log, transaction by transaction, |
503 | * making sure that each transaction has a commit block in the |
504 | * expected place. Each complete transaction gets replayed back |
505 | * into the main filesystem. |
506 | */ |
507 | |
508 | while (1) { |
509 | int flags; |
510 | char * tagp; |
511 | journal_block_tag_t tag; |
512 | struct buffer_head * obh; |
513 | struct buffer_head * nbh; |
514 | |
515 | cond_resched(); |
516 | |
517 | /* If we already know where to stop the log traversal, |
518 | * check right now that we haven't gone past the end of |
519 | * the log. */ |
520 | |
521 | if (pass != PASS_SCAN) |
522 | if (tid_geq(x: next_commit_ID, y: info->end_transaction)) |
523 | break; |
524 | |
525 | jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n" , |
526 | next_commit_ID, next_log_block, journal->j_last); |
527 | |
528 | /* Skip over each chunk of the transaction looking |
529 | * either the next descriptor block or the final commit |
530 | * record. */ |
531 | |
532 | jbd2_debug(3, "JBD2: checking block %ld\n" , next_log_block); |
533 | err = jread(bhp: &bh, journal, offset: next_log_block); |
534 | if (err) |
535 | goto failed; |
536 | |
537 | next_log_block++; |
538 | wrap(journal, next_log_block); |
539 | |
540 | /* What kind of buffer is it? |
541 | * |
542 | * If it is a descriptor block, check that it has the |
543 | * expected sequence number. Otherwise, we're all done |
544 | * here. */ |
545 | |
546 | tmp = (journal_header_t *)bh->b_data; |
547 | |
548 | if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { |
549 | brelse(bh); |
550 | break; |
551 | } |
552 | |
553 | blocktype = be32_to_cpu(tmp->h_blocktype); |
554 | sequence = be32_to_cpu(tmp->h_sequence); |
555 | jbd2_debug(3, "Found magic %d, sequence %d\n" , |
556 | blocktype, sequence); |
557 | |
558 | if (sequence != next_commit_ID) { |
559 | brelse(bh); |
560 | break; |
561 | } |
562 | |
563 | /* OK, we have a valid descriptor block which matches |
564 | * all of the sequence number checks. What are we going |
565 | * to do with it? That depends on the pass... */ |
566 | |
567 | switch(blocktype) { |
568 | case JBD2_DESCRIPTOR_BLOCK: |
569 | /* Verify checksum first */ |
570 | if (jbd2_journal_has_csum_v2or3(journal)) |
571 | descr_csum_size = |
572 | sizeof(struct jbd2_journal_block_tail); |
573 | if (descr_csum_size > 0 && |
574 | !jbd2_descriptor_block_csum_verify(j: journal, |
575 | buf: bh->b_data)) { |
576 | /* |
577 | * PASS_SCAN can see stale blocks due to lazy |
578 | * journal init. Don't error out on those yet. |
579 | */ |
580 | if (pass != PASS_SCAN) { |
581 | pr_err("JBD2: Invalid checksum recovering block %lu in log\n" , |
582 | next_log_block); |
583 | err = -EFSBADCRC; |
584 | brelse(bh); |
585 | goto failed; |
586 | } |
587 | need_check_commit_time = true; |
588 | jbd2_debug(1, |
589 | "invalid descriptor block found in %lu\n" , |
590 | next_log_block); |
591 | } |
592 | |
593 | /* If it is a valid descriptor block, replay it |
594 | * in pass REPLAY; if journal_checksums enabled, then |
595 | * calculate checksums in PASS_SCAN, otherwise, |
596 | * just skip over the blocks it describes. */ |
597 | if (pass != PASS_REPLAY) { |
598 | if (pass == PASS_SCAN && |
599 | jbd2_has_feature_checksum(j: journal) && |
600 | !need_check_commit_time && |
601 | !info->end_transaction) { |
602 | if (calc_chksums(journal, bh, |
603 | next_log_block: &next_log_block, |
604 | crc32_sum: &crc32_sum)) { |
605 | put_bh(bh); |
606 | break; |
607 | } |
608 | put_bh(bh); |
609 | continue; |
610 | } |
611 | next_log_block += count_tags(journal, bh); |
612 | wrap(journal, next_log_block); |
613 | put_bh(bh); |
614 | continue; |
615 | } |
616 | |
617 | /* A descriptor block: we can now write all of |
618 | * the data blocks. Yay, useful work is finally |
619 | * getting done here! */ |
620 | |
621 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
622 | while ((tagp - bh->b_data + tag_bytes) |
623 | <= journal->j_blocksize - descr_csum_size) { |
624 | unsigned long io_block; |
625 | |
626 | memcpy(&tag, tagp, sizeof(tag)); |
627 | flags = be16_to_cpu(tag.t_flags); |
628 | |
629 | io_block = next_log_block++; |
630 | wrap(journal, next_log_block); |
631 | err = jread(bhp: &obh, journal, offset: io_block); |
632 | if (err) { |
633 | /* Recover what we can, but |
634 | * report failure at the end. */ |
635 | success = err; |
636 | printk(KERN_ERR |
637 | "JBD2: IO error %d recovering " |
638 | "block %lu in log\n" , |
639 | err, io_block); |
640 | } else { |
641 | unsigned long long blocknr; |
642 | |
643 | J_ASSERT(obh != NULL); |
644 | blocknr = read_tag_block(journal, |
645 | tag: &tag); |
646 | |
647 | /* If the block has been |
648 | * revoked, then we're all done |
649 | * here. */ |
650 | if (jbd2_journal_test_revoke |
651 | (journal, blocknr, |
652 | next_commit_ID)) { |
653 | brelse(bh: obh); |
654 | ++info->nr_revoke_hits; |
655 | goto skip_write; |
656 | } |
657 | |
658 | /* Look for block corruption */ |
659 | if (!jbd2_block_tag_csum_verify( |
660 | j: journal, tag: &tag, tag3: (journal_block_tag3_t *)tagp, |
661 | buf: obh->b_data, be32_to_cpu(tmp->h_sequence))) { |
662 | brelse(bh: obh); |
663 | success = -EFSBADCRC; |
664 | printk(KERN_ERR "JBD2: Invalid " |
665 | "checksum recovering " |
666 | "data block %llu in " |
667 | "journal block %lu\n" , |
668 | blocknr, io_block); |
669 | block_error = 1; |
670 | goto skip_write; |
671 | } |
672 | |
673 | /* Find a buffer for the new |
674 | * data being restored */ |
675 | nbh = __getblk(bdev: journal->j_fs_dev, |
676 | block: blocknr, |
677 | size: journal->j_blocksize); |
678 | if (nbh == NULL) { |
679 | printk(KERN_ERR |
680 | "JBD2: Out of memory " |
681 | "during recovery.\n" ); |
682 | err = -ENOMEM; |
683 | brelse(bh); |
684 | brelse(bh: obh); |
685 | goto failed; |
686 | } |
687 | |
688 | lock_buffer(bh: nbh); |
689 | memcpy(nbh->b_data, obh->b_data, |
690 | journal->j_blocksize); |
691 | if (flags & JBD2_FLAG_ESCAPE) { |
692 | *((__be32 *)nbh->b_data) = |
693 | cpu_to_be32(JBD2_MAGIC_NUMBER); |
694 | } |
695 | |
696 | BUFFER_TRACE(nbh, "marking dirty" ); |
697 | set_buffer_uptodate(nbh); |
698 | mark_buffer_dirty(bh: nbh); |
699 | BUFFER_TRACE(nbh, "marking uptodate" ); |
700 | ++info->nr_replays; |
701 | unlock_buffer(bh: nbh); |
702 | brelse(bh: obh); |
703 | brelse(bh: nbh); |
704 | } |
705 | |
706 | skip_write: |
707 | tagp += tag_bytes; |
708 | if (!(flags & JBD2_FLAG_SAME_UUID)) |
709 | tagp += 16; |
710 | |
711 | if (flags & JBD2_FLAG_LAST_TAG) |
712 | break; |
713 | } |
714 | |
715 | brelse(bh); |
716 | continue; |
717 | |
718 | case JBD2_COMMIT_BLOCK: |
719 | /* How to differentiate between interrupted commit |
720 | * and journal corruption ? |
721 | * |
722 | * {nth transaction} |
723 | * Checksum Verification Failed |
724 | * | |
725 | * ____________________ |
726 | * | | |
727 | * async_commit sync_commit |
728 | * | | |
729 | * | GO TO NEXT "Journal Corruption" |
730 | * | TRANSACTION |
731 | * | |
732 | * {(n+1)th transanction} |
733 | * | |
734 | * _______|______________ |
735 | * | | |
736 | * Commit block found Commit block not found |
737 | * | | |
738 | * "Journal Corruption" | |
739 | * _____________|_________ |
740 | * | | |
741 | * nth trans corrupt OR nth trans |
742 | * and (n+1)th interrupted interrupted |
743 | * before commit block |
744 | * could reach the disk. |
745 | * (Cannot find the difference in above |
746 | * mentioned conditions. Hence assume |
747 | * "Interrupted Commit".) |
748 | */ |
749 | commit_time = be64_to_cpu( |
750 | ((struct commit_header *)bh->b_data)->h_commit_sec); |
751 | /* |
752 | * If need_check_commit_time is set, it means we are in |
753 | * PASS_SCAN and csum verify failed before. If |
754 | * commit_time is increasing, it's the same journal, |
755 | * otherwise it is stale journal block, just end this |
756 | * recovery. |
757 | */ |
758 | if (need_check_commit_time) { |
759 | if (commit_time >= last_trans_commit_time) { |
760 | pr_err("JBD2: Invalid checksum found in transaction %u\n" , |
761 | next_commit_ID); |
762 | err = -EFSBADCRC; |
763 | brelse(bh); |
764 | goto failed; |
765 | } |
766 | ignore_crc_mismatch: |
767 | /* |
768 | * It likely does not belong to same journal, |
769 | * just end this recovery with success. |
770 | */ |
771 | jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n" , |
772 | next_commit_ID); |
773 | brelse(bh); |
774 | goto done; |
775 | } |
776 | |
777 | /* |
778 | * Found an expected commit block: if checksums |
779 | * are present, verify them in PASS_SCAN; else not |
780 | * much to do other than move on to the next sequence |
781 | * number. |
782 | */ |
783 | if (pass == PASS_SCAN && |
784 | jbd2_has_feature_checksum(j: journal)) { |
785 | struct commit_header *cbh = |
786 | (struct commit_header *)bh->b_data; |
787 | unsigned found_chksum = |
788 | be32_to_cpu(cbh->h_chksum[0]); |
789 | |
790 | if (info->end_transaction) { |
791 | journal->j_failed_commit = |
792 | info->end_transaction; |
793 | brelse(bh); |
794 | break; |
795 | } |
796 | |
797 | /* Neither checksum match nor unused? */ |
798 | if (!((crc32_sum == found_chksum && |
799 | cbh->h_chksum_type == |
800 | JBD2_CRC32_CHKSUM && |
801 | cbh->h_chksum_size == |
802 | JBD2_CRC32_CHKSUM_SIZE) || |
803 | (cbh->h_chksum_type == 0 && |
804 | cbh->h_chksum_size == 0 && |
805 | found_chksum == 0))) |
806 | goto chksum_error; |
807 | |
808 | crc32_sum = ~0; |
809 | } |
810 | if (pass == PASS_SCAN && |
811 | !jbd2_commit_block_csum_verify(j: journal, |
812 | buf: bh->b_data)) { |
813 | chksum_error: |
814 | if (commit_time < last_trans_commit_time) |
815 | goto ignore_crc_mismatch; |
816 | info->end_transaction = next_commit_ID; |
817 | info->head_block = head_block; |
818 | |
819 | if (!jbd2_has_feature_async_commit(j: journal)) { |
820 | journal->j_failed_commit = |
821 | next_commit_ID; |
822 | brelse(bh); |
823 | break; |
824 | } |
825 | } |
826 | if (pass == PASS_SCAN) { |
827 | last_trans_commit_time = commit_time; |
828 | head_block = next_log_block; |
829 | } |
830 | brelse(bh); |
831 | next_commit_ID++; |
832 | continue; |
833 | |
834 | case JBD2_REVOKE_BLOCK: |
835 | /* |
836 | * Check revoke block crc in pass_scan, if csum verify |
837 | * failed, check commit block time later. |
838 | */ |
839 | if (pass == PASS_SCAN && |
840 | !jbd2_descriptor_block_csum_verify(j: journal, |
841 | buf: bh->b_data)) { |
842 | jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n" , |
843 | next_log_block); |
844 | need_check_commit_time = true; |
845 | } |
846 | /* If we aren't in the REVOKE pass, then we can |
847 | * just skip over this block. */ |
848 | if (pass != PASS_REVOKE) { |
849 | brelse(bh); |
850 | continue; |
851 | } |
852 | |
853 | err = scan_revoke_records(journal, bh, |
854 | next_commit_ID, info); |
855 | brelse(bh); |
856 | if (err) |
857 | goto failed; |
858 | continue; |
859 | |
860 | default: |
861 | jbd2_debug(3, "Unrecognised magic %d, end of scan.\n" , |
862 | blocktype); |
863 | brelse(bh); |
864 | goto done; |
865 | } |
866 | } |
867 | |
868 | done: |
869 | /* |
870 | * We broke out of the log scan loop: either we came to the |
871 | * known end of the log or we found an unexpected block in the |
872 | * log. If the latter happened, then we know that the "current" |
873 | * transaction marks the end of the valid log. |
874 | */ |
875 | |
876 | if (pass == PASS_SCAN) { |
877 | if (!info->end_transaction) |
878 | info->end_transaction = next_commit_ID; |
879 | if (!info->head_block) |
880 | info->head_block = head_block; |
881 | } else { |
882 | /* It's really bad news if different passes end up at |
883 | * different places (but possible due to IO errors). */ |
884 | if (info->end_transaction != next_commit_ID) { |
885 | printk(KERN_ERR "JBD2: recovery pass %d ended at " |
886 | "transaction %u, expected %u\n" , |
887 | pass, next_commit_ID, info->end_transaction); |
888 | if (!success) |
889 | success = -EIO; |
890 | } |
891 | } |
892 | |
893 | if (jbd2_has_feature_fast_commit(j: journal) && pass != PASS_REVOKE) { |
894 | err = fc_do_one_pass(journal, info, pass); |
895 | if (err) |
896 | success = err; |
897 | } |
898 | |
899 | if (block_error && success == 0) |
900 | success = -EIO; |
901 | return success; |
902 | |
903 | failed: |
904 | return err; |
905 | } |
906 | |
907 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ |
908 | |
909 | static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, |
910 | tid_t sequence, struct recovery_info *info) |
911 | { |
912 | jbd2_journal_revoke_header_t *; |
913 | int offset, max; |
914 | unsigned csum_size = 0; |
915 | __u32 rcount; |
916 | int record_len = 4; |
917 | |
918 | header = (jbd2_journal_revoke_header_t *) bh->b_data; |
919 | offset = sizeof(jbd2_journal_revoke_header_t); |
920 | rcount = be32_to_cpu(header->r_count); |
921 | |
922 | if (jbd2_journal_has_csum_v2or3(journal)) |
923 | csum_size = sizeof(struct jbd2_journal_block_tail); |
924 | if (rcount > journal->j_blocksize - csum_size) |
925 | return -EINVAL; |
926 | max = rcount; |
927 | |
928 | if (jbd2_has_feature_64bit(j: journal)) |
929 | record_len = 8; |
930 | |
931 | while (offset + record_len <= max) { |
932 | unsigned long long blocknr; |
933 | int err; |
934 | |
935 | if (record_len == 4) |
936 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); |
937 | else |
938 | blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); |
939 | offset += record_len; |
940 | err = jbd2_journal_set_revoke(journal, blocknr, sequence); |
941 | if (err) |
942 | return err; |
943 | ++info->nr_revokes; |
944 | } |
945 | return 0; |
946 | } |
947 | |