1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include "bcachefs.h" |
3 | #include "error.h" |
4 | #include "journal.h" |
5 | #include "recovery_passes.h" |
6 | #include "super.h" |
7 | #include "thread_with_file.h" |
8 | |
9 | #define FSCK_ERR_RATELIMIT_NR 10 |
10 | |
11 | bool bch2_inconsistent_error(struct bch_fs *c) |
12 | { |
13 | set_bit(nr: BCH_FS_error, addr: &c->flags); |
14 | |
15 | switch (c->opts.errors) { |
16 | case BCH_ON_ERROR_continue: |
17 | return false; |
18 | case BCH_ON_ERROR_ro: |
19 | if (bch2_fs_emergency_read_only(c)) |
20 | bch_err(c, "inconsistency detected - emergency read only at journal seq %llu" , |
21 | journal_cur_seq(&c->journal)); |
22 | return true; |
23 | case BCH_ON_ERROR_panic: |
24 | panic(bch2_fmt(c, "panic after error" )); |
25 | return true; |
26 | default: |
27 | BUG(); |
28 | } |
29 | } |
30 | |
31 | int bch2_topology_error(struct bch_fs *c) |
32 | { |
33 | set_bit(nr: BCH_FS_topology_error, addr: &c->flags); |
34 | if (!test_bit(BCH_FS_fsck_running, &c->flags)) { |
35 | bch2_inconsistent_error(c); |
36 | return -BCH_ERR_btree_need_topology_repair; |
37 | } else { |
38 | return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: |
39 | -BCH_ERR_btree_node_read_validate_error; |
40 | } |
41 | } |
42 | |
43 | void bch2_fatal_error(struct bch_fs *c) |
44 | { |
45 | if (bch2_fs_emergency_read_only(c)) |
46 | bch_err(c, "fatal error - emergency read only" ); |
47 | } |
48 | |
49 | void bch2_io_error_work(struct work_struct *work) |
50 | { |
51 | struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); |
52 | struct bch_fs *c = ca->fs; |
53 | bool dev; |
54 | |
55 | down_write(sem: &c->state_lock); |
56 | dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, |
57 | BCH_FORCE_IF_DEGRADED); |
58 | if (dev |
59 | ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, |
60 | BCH_FORCE_IF_DEGRADED) |
61 | : bch2_fs_emergency_read_only(c)) |
62 | bch_err(ca, |
63 | "too many IO errors, setting %s RO" , |
64 | dev ? "device" : "filesystem" ); |
65 | up_write(sem: &c->state_lock); |
66 | } |
67 | |
68 | void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) |
69 | { |
70 | atomic64_inc(v: &ca->errors[type]); |
71 | //queue_work(system_long_wq, &ca->io_error_work); |
72 | } |
73 | |
74 | enum ask_yn { |
75 | YN_NO, |
76 | YN_YES, |
77 | YN_ALLNO, |
78 | YN_ALLYES, |
79 | }; |
80 | |
81 | static enum ask_yn parse_yn_response(char *buf) |
82 | { |
83 | buf = strim(buf); |
84 | |
85 | if (strlen(buf) == 1) |
86 | switch (buf[0]) { |
87 | case 'n': |
88 | return YN_NO; |
89 | case 'y': |
90 | return YN_YES; |
91 | case 'N': |
92 | return YN_ALLNO; |
93 | case 'Y': |
94 | return YN_ALLYES; |
95 | } |
96 | return -1; |
97 | } |
98 | |
99 | #ifdef __KERNEL__ |
100 | static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) |
101 | { |
102 | struct stdio_redirect *stdio = c->stdio; |
103 | |
104 | if (c->stdio_filter && c->stdio_filter != current) |
105 | stdio = NULL; |
106 | |
107 | if (!stdio) |
108 | return YN_NO; |
109 | |
110 | char buf[100]; |
111 | int ret; |
112 | |
113 | do { |
114 | bch2_print(c, " (y,n, or Y,N for all errors of this type) " ); |
115 | |
116 | int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); |
117 | if (r < 0) |
118 | return YN_NO; |
119 | buf[r] = '\0'; |
120 | } while ((ret = parse_yn_response(buf)) < 0); |
121 | |
122 | return ret; |
123 | } |
124 | #else |
125 | |
126 | #include "tools-util.h" |
127 | |
128 | static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) |
129 | { |
130 | char *buf = NULL; |
131 | size_t buflen = 0; |
132 | int ret; |
133 | |
134 | do { |
135 | fputs(" (y,n, or Y,N for all errors of this type) " , stdout); |
136 | fflush(stdout); |
137 | |
138 | if (getline(&buf, &buflen, stdin) < 0) |
139 | die("error reading from standard input" ); |
140 | } while ((ret = parse_yn_response(buf)) < 0); |
141 | |
142 | free(buf); |
143 | return ret; |
144 | } |
145 | |
146 | #endif |
147 | |
148 | static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) |
149 | { |
150 | struct fsck_err_state *s; |
151 | |
152 | if (!test_bit(BCH_FS_fsck_running, &c->flags)) |
153 | return NULL; |
154 | |
155 | list_for_each_entry(s, &c->fsck_error_msgs, list) |
156 | if (s->fmt == fmt) { |
157 | /* |
158 | * move it to the head of the list: repeated fsck errors |
159 | * are common |
160 | */ |
161 | list_move(list: &s->list, head: &c->fsck_error_msgs); |
162 | return s; |
163 | } |
164 | |
165 | s = kzalloc(size: sizeof(*s), GFP_NOFS); |
166 | if (!s) { |
167 | if (!c->fsck_alloc_msgs_err) |
168 | bch_err(c, "kmalloc err, cannot ratelimit fsck errs" ); |
169 | c->fsck_alloc_msgs_err = true; |
170 | return NULL; |
171 | } |
172 | |
173 | INIT_LIST_HEAD(list: &s->list); |
174 | s->fmt = fmt; |
175 | list_add(new: &s->list, head: &c->fsck_error_msgs); |
176 | return s; |
177 | } |
178 | |
179 | int bch2_fsck_err(struct bch_fs *c, |
180 | enum bch_fsck_flags flags, |
181 | enum bch_sb_error_id err, |
182 | const char *fmt, ...) |
183 | { |
184 | struct fsck_err_state *s = NULL; |
185 | va_list args; |
186 | bool print = true, suppressing = false, inconsistent = false; |
187 | struct printbuf buf = PRINTBUF, *out = &buf; |
188 | int ret = -BCH_ERR_fsck_ignore; |
189 | |
190 | if ((flags & FSCK_CAN_FIX) && |
191 | test_bit(err, c->sb.errors_silent)) |
192 | return -BCH_ERR_fsck_fix; |
193 | |
194 | bch2_sb_error_count(c, err); |
195 | |
196 | va_start(args, fmt); |
197 | prt_vprintf(out, fmt, args); |
198 | va_end(args); |
199 | |
200 | mutex_lock(&c->fsck_error_msgs_lock); |
201 | s = fsck_err_get(c, fmt); |
202 | if (s) { |
203 | /* |
204 | * We may be called multiple times for the same error on |
205 | * transaction restart - this memoizes instead of asking the user |
206 | * multiple times for the same error: |
207 | */ |
208 | if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { |
209 | ret = s->ret; |
210 | mutex_unlock(lock: &c->fsck_error_msgs_lock); |
211 | printbuf_exit(&buf); |
212 | return ret; |
213 | } |
214 | |
215 | kfree(objp: s->last_msg); |
216 | s->last_msg = kstrdup(s: buf.buf, GFP_KERNEL); |
217 | |
218 | if (c->opts.ratelimit_errors && |
219 | !(flags & FSCK_NO_RATELIMIT) && |
220 | s->nr >= FSCK_ERR_RATELIMIT_NR) { |
221 | if (s->nr == FSCK_ERR_RATELIMIT_NR) |
222 | suppressing = true; |
223 | else |
224 | print = false; |
225 | } |
226 | |
227 | s->nr++; |
228 | } |
229 | |
230 | #ifdef BCACHEFS_LOG_PREFIX |
231 | if (!strncmp(fmt, "bcachefs:" , 9)) |
232 | prt_printf(out, bch2_log_msg(c, "" )); |
233 | #endif |
234 | |
235 | if (!test_bit(BCH_FS_fsck_running, &c->flags)) { |
236 | if (c->opts.errors != BCH_ON_ERROR_continue || |
237 | !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { |
238 | prt_str(out, str: ", shutting down" ); |
239 | inconsistent = true; |
240 | ret = -BCH_ERR_fsck_errors_not_fixed; |
241 | } else if (flags & FSCK_CAN_FIX) { |
242 | prt_str(out, str: ", fixing" ); |
243 | ret = -BCH_ERR_fsck_fix; |
244 | } else { |
245 | prt_str(out, str: ", continuing" ); |
246 | ret = -BCH_ERR_fsck_ignore; |
247 | } |
248 | } else if (c->opts.fix_errors == FSCK_FIX_exit) { |
249 | prt_str(out, str: ", exiting" ); |
250 | ret = -BCH_ERR_fsck_errors_not_fixed; |
251 | } else if (flags & FSCK_CAN_FIX) { |
252 | int fix = s && s->fix |
253 | ? s->fix |
254 | : c->opts.fix_errors; |
255 | |
256 | if (fix == FSCK_FIX_ask) { |
257 | int ask; |
258 | |
259 | prt_str(out, str: ": fix?" ); |
260 | if (bch2_fs_stdio_redirect(c)) |
261 | bch2_print(c, "%s" , out->buf); |
262 | else |
263 | bch2_print_string_as_lines(KERN_ERR, lines: out->buf); |
264 | print = false; |
265 | |
266 | ask = bch2_fsck_ask_yn(c); |
267 | |
268 | if (ask >= YN_ALLNO && s) |
269 | s->fix = ask == YN_ALLNO |
270 | ? FSCK_FIX_no |
271 | : FSCK_FIX_yes; |
272 | |
273 | ret = ask & 1 |
274 | ? -BCH_ERR_fsck_fix |
275 | : -BCH_ERR_fsck_ignore; |
276 | } else if (fix == FSCK_FIX_yes || |
277 | (c->opts.nochanges && |
278 | !(flags & FSCK_CAN_IGNORE))) { |
279 | prt_str(out, str: ", fixing" ); |
280 | ret = -BCH_ERR_fsck_fix; |
281 | } else { |
282 | prt_str(out, str: ", not fixing" ); |
283 | } |
284 | } else if (flags & FSCK_NEED_FSCK) { |
285 | prt_str(out, str: " (run fsck to correct)" ); |
286 | } else { |
287 | prt_str(out, str: " (repair unimplemented)" ); |
288 | } |
289 | |
290 | if (ret == -BCH_ERR_fsck_ignore && |
291 | (c->opts.fix_errors == FSCK_FIX_exit || |
292 | !(flags & FSCK_CAN_IGNORE))) |
293 | ret = -BCH_ERR_fsck_errors_not_fixed; |
294 | |
295 | if (print) { |
296 | if (bch2_fs_stdio_redirect(c)) |
297 | bch2_print(c, "%s\n" , out->buf); |
298 | else |
299 | bch2_print_string_as_lines(KERN_ERR, lines: out->buf); |
300 | } |
301 | |
302 | if (test_bit(BCH_FS_fsck_running, &c->flags) && |
303 | (ret != -BCH_ERR_fsck_fix && |
304 | ret != -BCH_ERR_fsck_ignore)) |
305 | bch_err(c, "Unable to continue, halting" ); |
306 | else if (suppressing) |
307 | bch_err(c, "Ratelimiting new instances of previous error" ); |
308 | |
309 | if (s) |
310 | s->ret = ret; |
311 | |
312 | mutex_unlock(lock: &c->fsck_error_msgs_lock); |
313 | |
314 | printbuf_exit(&buf); |
315 | |
316 | if (inconsistent) |
317 | bch2_inconsistent_error(c); |
318 | |
319 | if (ret == -BCH_ERR_fsck_fix) { |
320 | set_bit(nr: BCH_FS_errors_fixed, addr: &c->flags); |
321 | } else { |
322 | set_bit(nr: BCH_FS_errors_not_fixed, addr: &c->flags); |
323 | set_bit(nr: BCH_FS_error, addr: &c->flags); |
324 | } |
325 | |
326 | return ret; |
327 | } |
328 | |
329 | void bch2_flush_fsck_errs(struct bch_fs *c) |
330 | { |
331 | struct fsck_err_state *s, *n; |
332 | |
333 | mutex_lock(&c->fsck_error_msgs_lock); |
334 | |
335 | list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { |
336 | if (s->ratelimited && s->last_msg) |
337 | bch_err(c, "Saw %llu errors like:\n %s" , s->nr, s->last_msg); |
338 | |
339 | list_del(entry: &s->list); |
340 | kfree(objp: s->last_msg); |
341 | kfree(objp: s); |
342 | } |
343 | |
344 | mutex_unlock(lock: &c->fsck_error_msgs_lock); |
345 | } |
346 | |