1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Assorted bcachefs debug code |
4 | * |
5 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> |
6 | * Copyright 2012 Google, Inc. |
7 | */ |
8 | |
9 | #include "bcachefs.h" |
10 | #include "bkey_methods.h" |
11 | #include "btree_cache.h" |
12 | #include "btree_io.h" |
13 | #include "btree_iter.h" |
14 | #include "btree_locking.h" |
15 | #include "btree_update.h" |
16 | #include "btree_update_interior.h" |
17 | #include "buckets.h" |
18 | #include "debug.h" |
19 | #include "error.h" |
20 | #include "extents.h" |
21 | #include "fsck.h" |
22 | #include "inode.h" |
23 | #include "super.h" |
24 | |
25 | #include <linux/console.h> |
26 | #include <linux/debugfs.h> |
27 | #include <linux/module.h> |
28 | #include <linux/random.h> |
29 | #include <linux/seq_file.h> |
30 | |
31 | static struct dentry *bch_debug; |
32 | |
33 | static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, |
34 | struct extent_ptr_decoded pick) |
35 | { |
36 | struct btree *v = c->verify_data; |
37 | struct btree_node *n_ondisk = c->verify_ondisk; |
38 | struct btree_node *n_sorted = c->verify_data->data; |
39 | struct bset *sorted, *inmemory = &b->data->keys; |
40 | struct bch_dev *ca = bch_dev_bkey_exists(c, idx: pick.ptr.dev); |
41 | struct bio *bio; |
42 | bool failed = false, saw_error = false; |
43 | |
44 | if (!bch2_dev_get_ioref(ca, READ)) |
45 | return false; |
46 | |
47 | bio = bio_alloc_bioset(bdev: ca->disk_sb.bdev, |
48 | nr_vecs: buf_pages(p: n_sorted, len: btree_buf_bytes(b)), |
49 | opf: REQ_OP_READ|REQ_META, |
50 | GFP_NOFS, |
51 | bs: &c->btree_bio); |
52 | bio->bi_iter.bi_sector = pick.ptr.offset; |
53 | bch2_bio_map(bio, base: n_sorted, btree_buf_bytes(b)); |
54 | |
55 | submit_bio_wait(bio); |
56 | |
57 | bio_put(bio); |
58 | percpu_ref_put(ref: &ca->io_ref); |
59 | |
60 | memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); |
61 | |
62 | v->written = 0; |
63 | if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) |
64 | return false; |
65 | |
66 | n_sorted = c->verify_data->data; |
67 | sorted = &n_sorted->keys; |
68 | |
69 | if (inmemory->u64s != sorted->u64s || |
70 | memcmp(p: inmemory->start, |
71 | q: sorted->start, |
72 | vstruct_end(inmemory) - (void *) inmemory->start)) { |
73 | unsigned offset = 0, sectors; |
74 | struct bset *i; |
75 | unsigned j; |
76 | |
77 | console_lock(); |
78 | |
79 | printk(KERN_ERR "*** in memory:\n" ); |
80 | bch2_dump_bset(c, b, inmemory, 0); |
81 | |
82 | printk(KERN_ERR "*** read back in:\n" ); |
83 | bch2_dump_bset(c, v, sorted, 0); |
84 | |
85 | while (offset < v->written) { |
86 | if (!offset) { |
87 | i = &n_ondisk->keys; |
88 | sectors = vstruct_blocks(n_ondisk, c->block_bits) << |
89 | c->block_bits; |
90 | } else { |
91 | struct btree_node_entry *bne = |
92 | (void *) n_ondisk + (offset << 9); |
93 | i = &bne->keys; |
94 | |
95 | sectors = vstruct_blocks(bne, c->block_bits) << |
96 | c->block_bits; |
97 | } |
98 | |
99 | printk(KERN_ERR "*** on disk block %u:\n" , offset); |
100 | bch2_dump_bset(c, b, i, offset); |
101 | |
102 | offset += sectors; |
103 | } |
104 | |
105 | for (j = 0; j < le16_to_cpu(inmemory->u64s); j++) |
106 | if (inmemory->_data[j] != sorted->_data[j]) |
107 | break; |
108 | |
109 | console_unlock(); |
110 | bch_err(c, "verify failed at key %u" , j); |
111 | |
112 | failed = true; |
113 | } |
114 | |
115 | if (v->written != b->written) { |
116 | bch_err(c, "written wrong: expected %u, got %u" , |
117 | b->written, v->written); |
118 | failed = true; |
119 | } |
120 | |
121 | return failed; |
122 | } |
123 | |
124 | void __bch2_btree_verify(struct bch_fs *c, struct btree *b) |
125 | { |
126 | struct bkey_ptrs_c ptrs; |
127 | struct extent_ptr_decoded p; |
128 | const union bch_extent_entry *entry; |
129 | struct btree *v; |
130 | struct bset *inmemory = &b->data->keys; |
131 | struct bkey_packed *k; |
132 | bool failed = false; |
133 | |
134 | if (c->opts.nochanges) |
135 | return; |
136 | |
137 | bch2_btree_node_io_lock(b); |
138 | mutex_lock(&c->verify_lock); |
139 | |
140 | if (!c->verify_ondisk) { |
141 | c->verify_ondisk = kvmalloc(size: btree_buf_bytes(b), GFP_KERNEL); |
142 | if (!c->verify_ondisk) |
143 | goto out; |
144 | } |
145 | |
146 | if (!c->verify_data) { |
147 | c->verify_data = __bch2_btree_node_mem_alloc(c); |
148 | if (!c->verify_data) |
149 | goto out; |
150 | |
151 | list_del_init(entry: &c->verify_data->list); |
152 | } |
153 | |
154 | BUG_ON(b->nsets != 1); |
155 | |
156 | for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) |
157 | if (k->type == KEY_TYPE_btree_ptr_v2) |
158 | ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; |
159 | |
160 | v = c->verify_data; |
161 | bkey_copy(dst: &v->key, src: &b->key); |
162 | v->c.level = b->c.level; |
163 | v->c.btree_id = b->c.btree_id; |
164 | bch2_btree_keys_init(v); |
165 | |
166 | ptrs = bch2_bkey_ptrs_c(k: bkey_i_to_s_c(k: &b->key)); |
167 | bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry) |
168 | failed |= bch2_btree_verify_replica(c, b, pick: p); |
169 | |
170 | if (failed) { |
171 | struct printbuf buf = PRINTBUF; |
172 | |
173 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k: &b->key)); |
174 | bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n" , buf.buf); |
175 | printbuf_exit(&buf); |
176 | } |
177 | out: |
178 | mutex_unlock(lock: &c->verify_lock); |
179 | bch2_btree_node_io_unlock(b); |
180 | } |
181 | |
182 | void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, |
183 | const struct btree *b) |
184 | { |
185 | struct btree_node *n_ondisk = NULL; |
186 | struct extent_ptr_decoded pick; |
187 | struct bch_dev *ca; |
188 | struct bio *bio = NULL; |
189 | unsigned offset = 0; |
190 | int ret; |
191 | |
192 | if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(k: &b->key), NULL, &pick) <= 0) { |
193 | prt_printf(out, "error getting device to read from: invalid device\n" ); |
194 | return; |
195 | } |
196 | |
197 | ca = bch_dev_bkey_exists(c, idx: pick.ptr.dev); |
198 | if (!bch2_dev_get_ioref(ca, READ)) { |
199 | prt_printf(out, "error getting device to read from: not online\n" ); |
200 | return; |
201 | } |
202 | |
203 | n_ondisk = kvmalloc(size: btree_buf_bytes(b), GFP_KERNEL); |
204 | if (!n_ondisk) { |
205 | prt_printf(out, "memory allocation failure\n" ); |
206 | goto out; |
207 | } |
208 | |
209 | bio = bio_alloc_bioset(bdev: ca->disk_sb.bdev, |
210 | nr_vecs: buf_pages(p: n_ondisk, len: btree_buf_bytes(b)), |
211 | opf: REQ_OP_READ|REQ_META, |
212 | GFP_NOFS, |
213 | bs: &c->btree_bio); |
214 | bio->bi_iter.bi_sector = pick.ptr.offset; |
215 | bch2_bio_map(bio, base: n_ondisk, btree_buf_bytes(b)); |
216 | |
217 | ret = submit_bio_wait(bio); |
218 | if (ret) { |
219 | prt_printf(out, "IO error reading btree node: %s\n" , bch2_err_str(ret)); |
220 | goto out; |
221 | } |
222 | |
223 | while (offset < btree_sectors(c)) { |
224 | struct bset *i; |
225 | struct nonce nonce; |
226 | struct bch_csum csum; |
227 | struct bkey_packed *k; |
228 | unsigned sectors; |
229 | |
230 | if (!offset) { |
231 | i = &n_ondisk->keys; |
232 | |
233 | if (!bch2_checksum_type_valid(c, type: BSET_CSUM_TYPE(k: i))) { |
234 | prt_printf(out, "unknown checksum type at offset %u: %llu\n" , |
235 | offset, BSET_CSUM_TYPE(i)); |
236 | goto out; |
237 | } |
238 | |
239 | nonce = btree_nonce(i, offset: offset << 9); |
240 | csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); |
241 | |
242 | if (bch2_crc_cmp(l: csum, r: n_ondisk->csum)) { |
243 | prt_printf(out, "invalid checksum\n" ); |
244 | goto out; |
245 | } |
246 | |
247 | bset_encrypt(c, i, offset: offset << 9); |
248 | |
249 | sectors = vstruct_sectors(n_ondisk, c->block_bits); |
250 | } else { |
251 | struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); |
252 | |
253 | i = &bne->keys; |
254 | |
255 | if (i->seq != n_ondisk->keys.seq) |
256 | break; |
257 | |
258 | if (!bch2_checksum_type_valid(c, type: BSET_CSUM_TYPE(k: i))) { |
259 | prt_printf(out, "unknown checksum type at offset %u: %llu\n" , |
260 | offset, BSET_CSUM_TYPE(i)); |
261 | goto out; |
262 | } |
263 | |
264 | nonce = btree_nonce(i, offset: offset << 9); |
265 | csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); |
266 | |
267 | if (bch2_crc_cmp(l: csum, r: bne->csum)) { |
268 | prt_printf(out, "invalid checksum" ); |
269 | goto out; |
270 | } |
271 | |
272 | bset_encrypt(c, i, offset: offset << 9); |
273 | |
274 | sectors = vstruct_sectors(bne, c->block_bits); |
275 | } |
276 | |
277 | prt_printf(out, " offset %u version %u, journal seq %llu\n" , |
278 | offset, |
279 | le16_to_cpu(i->version), |
280 | le64_to_cpu(i->journal_seq)); |
281 | offset += sectors; |
282 | |
283 | printbuf_indent_add(out, 4); |
284 | |
285 | for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { |
286 | struct bkey u; |
287 | |
288 | bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, u: &u)); |
289 | prt_newline(out); |
290 | } |
291 | |
292 | printbuf_indent_sub(out, 4); |
293 | } |
294 | out: |
295 | if (bio) |
296 | bio_put(bio); |
297 | kvfree(addr: n_ondisk); |
298 | percpu_ref_put(ref: &ca->io_ref); |
299 | } |
300 | |
301 | #ifdef CONFIG_DEBUG_FS |
302 | |
303 | /* XXX: bch_fs refcounting */ |
304 | |
305 | struct dump_iter { |
306 | struct bch_fs *c; |
307 | enum btree_id id; |
308 | struct bpos from; |
309 | struct bpos prev_node; |
310 | u64 iter; |
311 | |
312 | struct printbuf buf; |
313 | |
314 | char __user *ubuf; /* destination user buffer */ |
315 | size_t size; /* size of requested read */ |
316 | ssize_t ret; /* bytes read so far */ |
317 | }; |
318 | |
319 | static ssize_t flush_buf(struct dump_iter *i) |
320 | { |
321 | if (i->buf.pos) { |
322 | size_t bytes = min_t(size_t, i->buf.pos, i->size); |
323 | int copied = bytes - copy_to_user(to: i->ubuf, from: i->buf.buf, n: bytes); |
324 | |
325 | i->ret += copied; |
326 | i->ubuf += copied; |
327 | i->size -= copied; |
328 | i->buf.pos -= copied; |
329 | memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); |
330 | |
331 | if (copied != bytes) |
332 | return -EFAULT; |
333 | } |
334 | |
335 | return i->size ? 0 : i->ret; |
336 | } |
337 | |
338 | static int bch2_dump_open(struct inode *inode, struct file *file) |
339 | { |
340 | struct btree_debug *bd = inode->i_private; |
341 | struct dump_iter *i; |
342 | |
343 | i = kzalloc(size: sizeof(struct dump_iter), GFP_KERNEL); |
344 | if (!i) |
345 | return -ENOMEM; |
346 | |
347 | file->private_data = i; |
348 | i->from = POS_MIN; |
349 | i->iter = 0; |
350 | i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); |
351 | i->id = bd->id; |
352 | i->buf = PRINTBUF; |
353 | |
354 | return 0; |
355 | } |
356 | |
357 | static int bch2_dump_release(struct inode *inode, struct file *file) |
358 | { |
359 | struct dump_iter *i = file->private_data; |
360 | |
361 | printbuf_exit(&i->buf); |
362 | kfree(objp: i); |
363 | return 0; |
364 | } |
365 | |
366 | static ssize_t bch2_read_btree(struct file *file, char __user *buf, |
367 | size_t size, loff_t *ppos) |
368 | { |
369 | struct dump_iter *i = file->private_data; |
370 | |
371 | i->ubuf = buf; |
372 | i->size = size; |
373 | i->ret = 0; |
374 | |
375 | return flush_buf(i) ?: |
376 | bch2_trans_run(i->c, |
377 | for_each_btree_key(trans, iter, i->id, i->from, |
378 | BTREE_ITER_PREFETCH| |
379 | BTREE_ITER_ALL_SNAPSHOTS, k, ({ |
380 | bch2_bkey_val_to_text(&i->buf, i->c, k); |
381 | prt_newline(&i->buf); |
382 | bch2_trans_unlock(trans); |
383 | i->from = bpos_successor(iter.pos); |
384 | flush_buf(i); |
385 | }))) ?: |
386 | i->ret; |
387 | } |
388 | |
389 | static const struct file_operations btree_debug_ops = { |
390 | .owner = THIS_MODULE, |
391 | .open = bch2_dump_open, |
392 | .release = bch2_dump_release, |
393 | .read = bch2_read_btree, |
394 | }; |
395 | |
396 | static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, |
397 | size_t size, loff_t *ppos) |
398 | { |
399 | struct dump_iter *i = file->private_data; |
400 | struct btree_trans *trans; |
401 | struct btree_iter iter; |
402 | struct btree *b; |
403 | ssize_t ret; |
404 | |
405 | i->ubuf = buf; |
406 | i->size = size; |
407 | i->ret = 0; |
408 | |
409 | ret = flush_buf(i); |
410 | if (ret) |
411 | return ret; |
412 | |
413 | if (bpos_eq(SPOS_MAX, r: i->from)) |
414 | return i->ret; |
415 | |
416 | trans = bch2_trans_get(i->c); |
417 | retry: |
418 | bch2_trans_begin(trans); |
419 | |
420 | for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { |
421 | bch2_btree_node_to_text(&i->buf, i->c, b); |
422 | i->from = !bpos_eq(SPOS_MAX, r: b->key.k.p) |
423 | ? bpos_successor(p: b->key.k.p) |
424 | : b->key.k.p; |
425 | |
426 | ret = drop_locks_do(trans, flush_buf(i)); |
427 | if (ret) |
428 | break; |
429 | } |
430 | bch2_trans_iter_exit(trans, &iter); |
431 | |
432 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
433 | goto retry; |
434 | |
435 | bch2_trans_put(trans); |
436 | |
437 | if (!ret) |
438 | ret = flush_buf(i); |
439 | |
440 | return ret ?: i->ret; |
441 | } |
442 | |
443 | static const struct file_operations btree_format_debug_ops = { |
444 | .owner = THIS_MODULE, |
445 | .open = bch2_dump_open, |
446 | .release = bch2_dump_release, |
447 | .read = bch2_read_btree_formats, |
448 | }; |
449 | |
450 | static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, |
451 | size_t size, loff_t *ppos) |
452 | { |
453 | struct dump_iter *i = file->private_data; |
454 | |
455 | i->ubuf = buf; |
456 | i->size = size; |
457 | i->ret = 0; |
458 | |
459 | return flush_buf(i) ?: |
460 | bch2_trans_run(i->c, |
461 | for_each_btree_key(trans, iter, i->id, i->from, |
462 | BTREE_ITER_PREFETCH| |
463 | BTREE_ITER_ALL_SNAPSHOTS, k, ({ |
464 | struct btree_path_level *l = |
465 | &btree_iter_path(trans, &iter)->l[0]; |
466 | struct bkey_packed *_k = |
467 | bch2_btree_node_iter_peek(&l->iter, l->b); |
468 | |
469 | if (bpos_gt(l->b->key.k.p, i->prev_node)) { |
470 | bch2_btree_node_to_text(&i->buf, i->c, l->b); |
471 | i->prev_node = l->b->key.k.p; |
472 | } |
473 | |
474 | bch2_bfloat_to_text(&i->buf, l->b, _k); |
475 | bch2_trans_unlock(trans); |
476 | i->from = bpos_successor(iter.pos); |
477 | flush_buf(i); |
478 | }))) ?: |
479 | i->ret; |
480 | } |
481 | |
482 | static const struct file_operations bfloat_failed_debug_ops = { |
483 | .owner = THIS_MODULE, |
484 | .open = bch2_dump_open, |
485 | .release = bch2_dump_release, |
486 | .read = bch2_read_bfloat_failed, |
487 | }; |
488 | |
489 | static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, |
490 | struct btree *b) |
491 | { |
492 | if (!out->nr_tabstops) |
493 | printbuf_tabstop_push(out, 32); |
494 | |
495 | prt_printf(out, "%px btree=%s l=%u " , |
496 | b, |
497 | bch2_btree_id_str(b->c.btree_id), |
498 | b->c.level); |
499 | prt_newline(out); |
500 | |
501 | printbuf_indent_add(out, 2); |
502 | |
503 | bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k: &b->key)); |
504 | prt_newline(out); |
505 | |
506 | prt_printf(out, "flags: " ); |
507 | prt_tab(out); |
508 | prt_bitflags(out, bch2_btree_node_flags, b->flags); |
509 | prt_newline(out); |
510 | |
511 | prt_printf(out, "pcpu read locks: " ); |
512 | prt_tab(out); |
513 | prt_printf(out, "%u" , b->c.lock.readers != NULL); |
514 | prt_newline(out); |
515 | |
516 | prt_printf(out, "written:" ); |
517 | prt_tab(out); |
518 | prt_printf(out, "%u" , b->written); |
519 | prt_newline(out); |
520 | |
521 | prt_printf(out, "writes blocked:" ); |
522 | prt_tab(out); |
523 | prt_printf(out, "%u" , !list_empty_careful(&b->write_blocked)); |
524 | prt_newline(out); |
525 | |
526 | prt_printf(out, "will make reachable:" ); |
527 | prt_tab(out); |
528 | prt_printf(out, "%lx" , b->will_make_reachable); |
529 | prt_newline(out); |
530 | |
531 | prt_printf(out, "journal pin %px:" , &b->writes[0].journal); |
532 | prt_tab(out); |
533 | prt_printf(out, "%llu" , b->writes[0].journal.seq); |
534 | prt_newline(out); |
535 | |
536 | prt_printf(out, "journal pin %px:" , &b->writes[1].journal); |
537 | prt_tab(out); |
538 | prt_printf(out, "%llu" , b->writes[1].journal.seq); |
539 | prt_newline(out); |
540 | |
541 | printbuf_indent_sub(out, 2); |
542 | } |
543 | |
544 | static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, |
545 | size_t size, loff_t *ppos) |
546 | { |
547 | struct dump_iter *i = file->private_data; |
548 | struct bch_fs *c = i->c; |
549 | bool done = false; |
550 | ssize_t ret = 0; |
551 | |
552 | i->ubuf = buf; |
553 | i->size = size; |
554 | i->ret = 0; |
555 | |
556 | do { |
557 | struct bucket_table *tbl; |
558 | struct rhash_head *pos; |
559 | struct btree *b; |
560 | |
561 | ret = flush_buf(i); |
562 | if (ret) |
563 | return ret; |
564 | |
565 | rcu_read_lock(); |
566 | i->buf.atomic++; |
567 | tbl = rht_dereference_rcu(c->btree_cache.table.tbl, |
568 | &c->btree_cache.table); |
569 | if (i->iter < tbl->size) { |
570 | rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) |
571 | bch2_cached_btree_node_to_text(out: &i->buf, c, b); |
572 | i->iter++; |
573 | } else { |
574 | done = true; |
575 | } |
576 | --i->buf.atomic; |
577 | rcu_read_unlock(); |
578 | } while (!done); |
579 | |
580 | if (i->buf.allocation_failure) |
581 | ret = -ENOMEM; |
582 | |
583 | if (!ret) |
584 | ret = flush_buf(i); |
585 | |
586 | return ret ?: i->ret; |
587 | } |
588 | |
589 | static const struct file_operations cached_btree_nodes_ops = { |
590 | .owner = THIS_MODULE, |
591 | .open = bch2_dump_open, |
592 | .release = bch2_dump_release, |
593 | .read = bch2_cached_btree_nodes_read, |
594 | }; |
595 | |
596 | static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, |
597 | size_t size, loff_t *ppos) |
598 | { |
599 | struct dump_iter *i = file->private_data; |
600 | struct bch_fs *c = i->c; |
601 | struct btree_trans *trans; |
602 | ssize_t ret = 0; |
603 | u32 seq; |
604 | |
605 | i->ubuf = buf; |
606 | i->size = size; |
607 | i->ret = 0; |
608 | restart: |
609 | seqmutex_lock(lock: &c->btree_trans_lock); |
610 | list_for_each_entry(trans, &c->btree_trans_list, list) { |
611 | struct task_struct *task = READ_ONCE(trans->locking_wait.task); |
612 | |
613 | if (!task || task->pid <= i->iter) |
614 | continue; |
615 | |
616 | closure_get(cl: &trans->ref); |
617 | seq = seqmutex_seq(lock: &c->btree_trans_lock); |
618 | seqmutex_unlock(lock: &c->btree_trans_lock); |
619 | |
620 | ret = flush_buf(i); |
621 | if (ret) { |
622 | closure_put(cl: &trans->ref); |
623 | goto unlocked; |
624 | } |
625 | |
626 | bch2_btree_trans_to_text(&i->buf, trans); |
627 | |
628 | prt_printf(&i->buf, "backtrace:" ); |
629 | prt_newline(&i->buf); |
630 | printbuf_indent_add(&i->buf, 2); |
631 | bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); |
632 | printbuf_indent_sub(&i->buf, 2); |
633 | prt_newline(&i->buf); |
634 | |
635 | i->iter = task->pid; |
636 | |
637 | closure_put(cl: &trans->ref); |
638 | |
639 | if (!seqmutex_relock(lock: &c->btree_trans_lock, seq)) |
640 | goto restart; |
641 | } |
642 | seqmutex_unlock(lock: &c->btree_trans_lock); |
643 | unlocked: |
644 | if (i->buf.allocation_failure) |
645 | ret = -ENOMEM; |
646 | |
647 | if (!ret) |
648 | ret = flush_buf(i); |
649 | |
650 | return ret ?: i->ret; |
651 | } |
652 | |
653 | static const struct file_operations btree_transactions_ops = { |
654 | .owner = THIS_MODULE, |
655 | .open = bch2_dump_open, |
656 | .release = bch2_dump_release, |
657 | .read = bch2_btree_transactions_read, |
658 | }; |
659 | |
660 | static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, |
661 | size_t size, loff_t *ppos) |
662 | { |
663 | struct dump_iter *i = file->private_data; |
664 | struct bch_fs *c = i->c; |
665 | bool done = false; |
666 | int err; |
667 | |
668 | i->ubuf = buf; |
669 | i->size = size; |
670 | i->ret = 0; |
671 | |
672 | while (1) { |
673 | err = flush_buf(i); |
674 | if (err) |
675 | return err; |
676 | |
677 | if (!i->size) |
678 | break; |
679 | |
680 | if (done) |
681 | break; |
682 | |
683 | done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); |
684 | i->iter++; |
685 | } |
686 | |
687 | if (i->buf.allocation_failure) |
688 | return -ENOMEM; |
689 | |
690 | return i->ret; |
691 | } |
692 | |
693 | static const struct file_operations journal_pins_ops = { |
694 | .owner = THIS_MODULE, |
695 | .open = bch2_dump_open, |
696 | .release = bch2_dump_release, |
697 | .read = bch2_journal_pins_read, |
698 | }; |
699 | |
700 | static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, |
701 | size_t size, loff_t *ppos) |
702 | { |
703 | struct dump_iter *i = file->private_data; |
704 | struct bch_fs *c = i->c; |
705 | int err; |
706 | |
707 | i->ubuf = buf; |
708 | i->size = size; |
709 | i->ret = 0; |
710 | |
711 | if (!i->iter) { |
712 | bch2_btree_updates_to_text(&i->buf, c); |
713 | i->iter++; |
714 | } |
715 | |
716 | err = flush_buf(i); |
717 | if (err) |
718 | return err; |
719 | |
720 | if (i->buf.allocation_failure) |
721 | return -ENOMEM; |
722 | |
723 | return i->ret; |
724 | } |
725 | |
726 | static const struct file_operations btree_updates_ops = { |
727 | .owner = THIS_MODULE, |
728 | .open = bch2_dump_open, |
729 | .release = bch2_dump_release, |
730 | .read = bch2_btree_updates_read, |
731 | }; |
732 | |
733 | static int btree_transaction_stats_open(struct inode *inode, struct file *file) |
734 | { |
735 | struct bch_fs *c = inode->i_private; |
736 | struct dump_iter *i; |
737 | |
738 | i = kzalloc(size: sizeof(struct dump_iter), GFP_KERNEL); |
739 | if (!i) |
740 | return -ENOMEM; |
741 | |
742 | i->iter = 1; |
743 | i->c = c; |
744 | i->buf = PRINTBUF; |
745 | file->private_data = i; |
746 | |
747 | return 0; |
748 | } |
749 | |
750 | static int btree_transaction_stats_release(struct inode *inode, struct file *file) |
751 | { |
752 | struct dump_iter *i = file->private_data; |
753 | |
754 | printbuf_exit(&i->buf); |
755 | kfree(objp: i); |
756 | |
757 | return 0; |
758 | } |
759 | |
760 | static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, |
761 | size_t size, loff_t *ppos) |
762 | { |
763 | struct dump_iter *i = file->private_data; |
764 | struct bch_fs *c = i->c; |
765 | int err; |
766 | |
767 | i->ubuf = buf; |
768 | i->size = size; |
769 | i->ret = 0; |
770 | |
771 | while (1) { |
772 | struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; |
773 | |
774 | err = flush_buf(i); |
775 | if (err) |
776 | return err; |
777 | |
778 | if (!i->size) |
779 | break; |
780 | |
781 | if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || |
782 | !bch2_btree_transaction_fns[i->iter]) |
783 | break; |
784 | |
785 | prt_printf(&i->buf, "%s: " , bch2_btree_transaction_fns[i->iter]); |
786 | prt_newline(&i->buf); |
787 | printbuf_indent_add(&i->buf, 2); |
788 | |
789 | mutex_lock(&s->lock); |
790 | |
791 | prt_printf(&i->buf, "Max mem used: %u" , s->max_mem); |
792 | prt_newline(&i->buf); |
793 | |
794 | prt_printf(&i->buf, "Transaction duration:" ); |
795 | prt_newline(&i->buf); |
796 | |
797 | printbuf_indent_add(&i->buf, 2); |
798 | bch2_time_stats_to_text(&i->buf, &s->duration); |
799 | printbuf_indent_sub(&i->buf, 2); |
800 | |
801 | if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { |
802 | prt_printf(&i->buf, "Lock hold times:" ); |
803 | prt_newline(&i->buf); |
804 | |
805 | printbuf_indent_add(&i->buf, 2); |
806 | bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); |
807 | printbuf_indent_sub(&i->buf, 2); |
808 | } |
809 | |
810 | if (s->max_paths_text) { |
811 | prt_printf(&i->buf, "Maximum allocated btree paths (%u):" , s->nr_max_paths); |
812 | prt_newline(&i->buf); |
813 | |
814 | printbuf_indent_add(&i->buf, 2); |
815 | prt_str_indented(out: &i->buf, str: s->max_paths_text); |
816 | printbuf_indent_sub(&i->buf, 2); |
817 | } |
818 | |
819 | mutex_unlock(lock: &s->lock); |
820 | |
821 | printbuf_indent_sub(&i->buf, 2); |
822 | prt_newline(&i->buf); |
823 | i->iter++; |
824 | } |
825 | |
826 | if (i->buf.allocation_failure) |
827 | return -ENOMEM; |
828 | |
829 | return i->ret; |
830 | } |
831 | |
832 | static const struct file_operations btree_transaction_stats_op = { |
833 | .owner = THIS_MODULE, |
834 | .open = btree_transaction_stats_open, |
835 | .release = btree_transaction_stats_release, |
836 | .read = btree_transaction_stats_read, |
837 | }; |
838 | |
839 | static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, |
840 | size_t size, loff_t *ppos) |
841 | { |
842 | struct dump_iter *i = file->private_data; |
843 | struct bch_fs *c = i->c; |
844 | struct btree_trans *trans; |
845 | ssize_t ret = 0; |
846 | u32 seq; |
847 | |
848 | i->ubuf = buf; |
849 | i->size = size; |
850 | i->ret = 0; |
851 | |
852 | if (i->iter) |
853 | goto out; |
854 | restart: |
855 | seqmutex_lock(lock: &c->btree_trans_lock); |
856 | list_for_each_entry(trans, &c->btree_trans_list, list) { |
857 | struct task_struct *task = READ_ONCE(trans->locking_wait.task); |
858 | |
859 | if (!task || task->pid <= i->iter) |
860 | continue; |
861 | |
862 | closure_get(cl: &trans->ref); |
863 | seq = seqmutex_seq(lock: &c->btree_trans_lock); |
864 | seqmutex_unlock(lock: &c->btree_trans_lock); |
865 | |
866 | ret = flush_buf(i); |
867 | if (ret) { |
868 | closure_put(cl: &trans->ref); |
869 | goto out; |
870 | } |
871 | |
872 | bch2_check_for_deadlock(trans, &i->buf); |
873 | |
874 | i->iter = task->pid; |
875 | |
876 | closure_put(cl: &trans->ref); |
877 | |
878 | if (!seqmutex_relock(lock: &c->btree_trans_lock, seq)) |
879 | goto restart; |
880 | } |
881 | seqmutex_unlock(lock: &c->btree_trans_lock); |
882 | out: |
883 | if (i->buf.allocation_failure) |
884 | ret = -ENOMEM; |
885 | |
886 | if (!ret) |
887 | ret = flush_buf(i); |
888 | |
889 | return ret ?: i->ret; |
890 | } |
891 | |
892 | static const struct file_operations btree_deadlock_ops = { |
893 | .owner = THIS_MODULE, |
894 | .open = bch2_dump_open, |
895 | .release = bch2_dump_release, |
896 | .read = bch2_btree_deadlock_read, |
897 | }; |
898 | |
899 | void bch2_fs_debug_exit(struct bch_fs *c) |
900 | { |
901 | if (!IS_ERR_OR_NULL(ptr: c->fs_debug_dir)) |
902 | debugfs_remove_recursive(dentry: c->fs_debug_dir); |
903 | } |
904 | |
905 | static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd) |
906 | { |
907 | struct dentry *d; |
908 | |
909 | d = debugfs_create_dir(name: bch2_btree_id_str(bd->id), parent: c->btree_debug_dir); |
910 | |
911 | debugfs_create_file(name: "keys" , mode: 0400, parent: d, data: bd, fops: &btree_debug_ops); |
912 | |
913 | debugfs_create_file(name: "formats" , mode: 0400, parent: d, data: bd, fops: &btree_format_debug_ops); |
914 | |
915 | debugfs_create_file(name: "bfloat-failed" , mode: 0400, parent: d, data: bd, |
916 | fops: &bfloat_failed_debug_ops); |
917 | } |
918 | |
919 | void bch2_fs_debug_init(struct bch_fs *c) |
920 | { |
921 | struct btree_debug *bd; |
922 | char name[100]; |
923 | |
924 | if (IS_ERR_OR_NULL(ptr: bch_debug)) |
925 | return; |
926 | |
927 | snprintf(buf: name, size: sizeof(name), fmt: "%pU" , c->sb.user_uuid.b); |
928 | c->fs_debug_dir = debugfs_create_dir(name, parent: bch_debug); |
929 | if (IS_ERR_OR_NULL(ptr: c->fs_debug_dir)) |
930 | return; |
931 | |
932 | debugfs_create_file(name: "cached_btree_nodes" , mode: 0400, parent: c->fs_debug_dir, |
933 | data: c->btree_debug, fops: &cached_btree_nodes_ops); |
934 | |
935 | debugfs_create_file(name: "btree_transactions" , mode: 0400, parent: c->fs_debug_dir, |
936 | data: c->btree_debug, fops: &btree_transactions_ops); |
937 | |
938 | debugfs_create_file(name: "journal_pins" , mode: 0400, parent: c->fs_debug_dir, |
939 | data: c->btree_debug, fops: &journal_pins_ops); |
940 | |
941 | debugfs_create_file(name: "btree_updates" , mode: 0400, parent: c->fs_debug_dir, |
942 | data: c->btree_debug, fops: &btree_updates_ops); |
943 | |
944 | debugfs_create_file(name: "btree_transaction_stats" , mode: 0400, parent: c->fs_debug_dir, |
945 | data: c, fops: &btree_transaction_stats_op); |
946 | |
947 | debugfs_create_file(name: "btree_deadlock" , mode: 0400, parent: c->fs_debug_dir, |
948 | data: c->btree_debug, fops: &btree_deadlock_ops); |
949 | |
950 | c->btree_debug_dir = debugfs_create_dir(name: "btrees" , parent: c->fs_debug_dir); |
951 | if (IS_ERR_OR_NULL(ptr: c->btree_debug_dir)) |
952 | return; |
953 | |
954 | for (bd = c->btree_debug; |
955 | bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); |
956 | bd++) { |
957 | bd->id = bd - c->btree_debug; |
958 | bch2_fs_debug_btree_init(c, bd); |
959 | } |
960 | } |
961 | |
962 | #endif |
963 | |
964 | void bch2_debug_exit(void) |
965 | { |
966 | if (!IS_ERR_OR_NULL(ptr: bch_debug)) |
967 | debugfs_remove_recursive(dentry: bch_debug); |
968 | } |
969 | |
970 | int __init bch2_debug_init(void) |
971 | { |
972 | bch_debug = debugfs_create_dir(name: "bcachefs" , NULL); |
973 | return 0; |
974 | } |
975 | |