1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
4 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/sched.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/spinlock.h> |
10 | #include <linux/completion.h> |
11 | #include <linux/buffer_head.h> |
12 | #include <linux/gfs2_ondisk.h> |
13 | #include <linux/crc32.h> |
14 | #include <linux/crc32c.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/kthread.h> |
17 | #include <linux/freezer.h> |
18 | #include <linux/bio.h> |
19 | #include <linux/blkdev.h> |
20 | #include <linux/writeback.h> |
21 | #include <linux/list_sort.h> |
22 | |
23 | #include "gfs2.h" |
24 | #include "incore.h" |
25 | #include "bmap.h" |
26 | #include "glock.h" |
27 | #include "log.h" |
28 | #include "lops.h" |
29 | #include "meta_io.h" |
30 | #include "util.h" |
31 | #include "dir.h" |
32 | #include "trace_gfs2.h" |
33 | #include "trans.h" |
34 | |
35 | static void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
36 | |
37 | /** |
38 | * gfs2_struct2blk - compute stuff |
39 | * @sdp: the filesystem |
40 | * @nstruct: the number of structures |
41 | * |
42 | * Compute the number of log descriptor blocks needed to hold a certain number |
43 | * of structures of a certain size. |
44 | * |
45 | * Returns: the number of blocks needed (minimum is always 1) |
46 | */ |
47 | |
48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct) |
49 | { |
50 | unsigned int blks; |
51 | unsigned int first, second; |
52 | |
53 | /* The initial struct gfs2_log_descriptor block */ |
54 | blks = 1; |
55 | first = sdp->sd_ldptrs; |
56 | |
57 | if (nstruct > first) { |
58 | /* Subsequent struct gfs2_meta_header blocks */ |
59 | second = sdp->sd_inptrs; |
60 | blks += DIV_ROUND_UP(nstruct - first, second); |
61 | } |
62 | |
63 | return blks; |
64 | } |
65 | |
66 | /** |
67 | * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters |
68 | * @bd: The gfs2_bufdata to remove |
69 | * |
70 | * The ail lock _must_ be held when calling this function |
71 | * |
72 | */ |
73 | |
74 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd) |
75 | { |
76 | bd->bd_tr = NULL; |
77 | list_del_init(entry: &bd->bd_ail_st_list); |
78 | list_del_init(entry: &bd->bd_ail_gl_list); |
79 | atomic_dec(v: &bd->bd_gl->gl_ail_count); |
80 | brelse(bh: bd->bd_bh); |
81 | } |
82 | |
83 | static int __gfs2_writepage(struct folio *folio, struct writeback_control *wbc, |
84 | void *data) |
85 | { |
86 | struct address_space *mapping = data; |
87 | int ret = mapping->a_ops->writepage(&folio->page, wbc); |
88 | mapping_set_error(mapping, error: ret); |
89 | return ret; |
90 | } |
91 | |
92 | /** |
93 | * gfs2_ail1_start_one - Start I/O on a transaction |
94 | * @sdp: The superblock |
95 | * @wbc: The writeback control structure |
96 | * @tr: The transaction to start I/O on |
97 | * @plug: The block plug currently active |
98 | */ |
99 | |
100 | static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, |
101 | struct writeback_control *wbc, |
102 | struct gfs2_trans *tr, struct blk_plug *plug) |
103 | __releases(&sdp->sd_ail_lock) |
104 | __acquires(&sdp->sd_ail_lock) |
105 | { |
106 | struct gfs2_glock *gl = NULL; |
107 | struct address_space *mapping; |
108 | struct gfs2_bufdata *bd, *s; |
109 | struct buffer_head *bh; |
110 | int ret = 0; |
111 | |
112 | list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { |
113 | bh = bd->bd_bh; |
114 | |
115 | gfs2_assert(sdp, bd->bd_tr == tr); |
116 | |
117 | if (!buffer_busy(bh)) { |
118 | if (buffer_uptodate(bh)) { |
119 | list_move(list: &bd->bd_ail_st_list, |
120 | head: &tr->tr_ail2_list); |
121 | continue; |
122 | } |
123 | if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { |
124 | gfs2_io_error_bh(sdp, bh); |
125 | gfs2_withdraw_delayed(sdp); |
126 | } |
127 | } |
128 | |
129 | if (gfs2_withdrawing_or_withdrawn(sdp)) { |
130 | gfs2_remove_from_ail(bd); |
131 | continue; |
132 | } |
133 | if (!buffer_dirty(bh)) |
134 | continue; |
135 | if (gl == bd->bd_gl) |
136 | continue; |
137 | gl = bd->bd_gl; |
138 | list_move(list: &bd->bd_ail_st_list, head: &tr->tr_ail1_list); |
139 | mapping = bh->b_folio->mapping; |
140 | if (!mapping) |
141 | continue; |
142 | spin_unlock(lock: &sdp->sd_ail_lock); |
143 | ret = write_cache_pages(mapping, wbc, writepage: __gfs2_writepage, data: mapping); |
144 | if (need_resched()) { |
145 | blk_finish_plug(plug); |
146 | cond_resched(); |
147 | blk_start_plug(plug); |
148 | } |
149 | spin_lock(lock: &sdp->sd_ail_lock); |
150 | if (ret == -ENODATA) /* if a jdata write into a new hole */ |
151 | ret = 0; /* ignore it */ |
152 | if (ret || wbc->nr_to_write <= 0) |
153 | break; |
154 | return -EBUSY; |
155 | } |
156 | |
157 | return ret; |
158 | } |
159 | |
160 | static void dump_ail_list(struct gfs2_sbd *sdp) |
161 | { |
162 | struct gfs2_trans *tr; |
163 | struct gfs2_bufdata *bd; |
164 | struct buffer_head *bh; |
165 | |
166 | list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { |
167 | list_for_each_entry_reverse(bd, &tr->tr_ail1_list, |
168 | bd_ail_st_list) { |
169 | bh = bd->bd_bh; |
170 | fs_err(sdp, "bd %p: blk:0x%llx bh=%p " , bd, |
171 | (unsigned long long)bd->bd_blkno, bh); |
172 | if (!bh) { |
173 | fs_err(sdp, "\n" ); |
174 | continue; |
175 | } |
176 | fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d " |
177 | "map:%d new:%d ar:%d aw:%d delay:%d " |
178 | "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n" , |
179 | (unsigned long long)bh->b_blocknr, |
180 | buffer_uptodate(bh), buffer_dirty(bh), |
181 | buffer_locked(bh), buffer_req(bh), |
182 | buffer_mapped(bh), buffer_new(bh), |
183 | buffer_async_read(bh), buffer_async_write(bh), |
184 | buffer_delay(bh), buffer_write_io_error(bh), |
185 | buffer_unwritten(bh), |
186 | buffer_defer_completion(bh), |
187 | buffer_pinned(bh), buffer_escaped(bh)); |
188 | } |
189 | } |
190 | } |
191 | |
192 | /** |
193 | * gfs2_ail1_flush - start writeback of some ail1 entries |
194 | * @sdp: The super block |
195 | * @wbc: The writeback control structure |
196 | * |
197 | * Writes back some ail1 entries, according to the limits in the |
198 | * writeback control structure |
199 | */ |
200 | |
201 | void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) |
202 | { |
203 | struct list_head *head = &sdp->sd_ail1_list; |
204 | struct gfs2_trans *tr; |
205 | struct blk_plug plug; |
206 | int ret; |
207 | unsigned long flush_start = jiffies; |
208 | |
209 | trace_gfs2_ail_flush(sdp, wbc, start: 1); |
210 | blk_start_plug(&plug); |
211 | spin_lock(lock: &sdp->sd_ail_lock); |
212 | restart: |
213 | ret = 0; |
214 | if (time_after(jiffies, flush_start + (HZ * 600))) { |
215 | fs_err(sdp, "Error: In %s for ten minutes! t=%d\n" , |
216 | __func__, current->journal_info ? 1 : 0); |
217 | dump_ail_list(sdp); |
218 | goto out; |
219 | } |
220 | list_for_each_entry_reverse(tr, head, tr_list) { |
221 | if (wbc->nr_to_write <= 0) |
222 | break; |
223 | ret = gfs2_ail1_start_one(sdp, wbc, tr, plug: &plug); |
224 | if (ret) { |
225 | if (ret == -EBUSY) |
226 | goto restart; |
227 | break; |
228 | } |
229 | } |
230 | out: |
231 | spin_unlock(lock: &sdp->sd_ail_lock); |
232 | blk_finish_plug(&plug); |
233 | if (ret) { |
234 | gfs2_lm(sdp, fmt: "gfs2_ail1_start_one returned: %d\n" , ret); |
235 | gfs2_withdraw(sdp); |
236 | } |
237 | trace_gfs2_ail_flush(sdp, wbc, start: 0); |
238 | } |
239 | |
240 | /** |
241 | * gfs2_ail1_start - start writeback of all ail1 entries |
242 | * @sdp: The superblock |
243 | */ |
244 | |
245 | static void gfs2_ail1_start(struct gfs2_sbd *sdp) |
246 | { |
247 | struct writeback_control wbc = { |
248 | .sync_mode = WB_SYNC_NONE, |
249 | .nr_to_write = LONG_MAX, |
250 | .range_start = 0, |
251 | .range_end = LLONG_MAX, |
252 | }; |
253 | |
254 | return gfs2_ail1_flush(sdp, wbc: &wbc); |
255 | } |
256 | |
257 | static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp) |
258 | { |
259 | unsigned int new_flush_tail = sdp->sd_log_head; |
260 | struct gfs2_trans *tr; |
261 | |
262 | if (!list_empty(head: &sdp->sd_ail1_list)) { |
263 | tr = list_last_entry(&sdp->sd_ail1_list, |
264 | struct gfs2_trans, tr_list); |
265 | new_flush_tail = tr->tr_first; |
266 | } |
267 | sdp->sd_log_flush_tail = new_flush_tail; |
268 | } |
269 | |
270 | static void gfs2_log_update_head(struct gfs2_sbd *sdp) |
271 | { |
272 | unsigned int new_head = sdp->sd_log_flush_head; |
273 | |
274 | if (sdp->sd_log_flush_tail == sdp->sd_log_head) |
275 | sdp->sd_log_flush_tail = new_head; |
276 | sdp->sd_log_head = new_head; |
277 | } |
278 | |
279 | /* |
280 | * gfs2_ail_empty_tr - empty one of the ail lists of a transaction |
281 | */ |
282 | |
283 | static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, |
284 | struct list_head *head) |
285 | { |
286 | struct gfs2_bufdata *bd; |
287 | |
288 | while (!list_empty(head)) { |
289 | bd = list_first_entry(head, struct gfs2_bufdata, |
290 | bd_ail_st_list); |
291 | gfs2_assert(sdp, bd->bd_tr == tr); |
292 | gfs2_remove_from_ail(bd); |
293 | } |
294 | } |
295 | |
296 | /** |
297 | * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced |
298 | * @sdp: the filesystem |
299 | * @tr: the transaction |
300 | * @max_revokes: If nonzero, issue revokes for the bd items for written buffers |
301 | * |
302 | * returns: the transaction's count of remaining active items |
303 | */ |
304 | |
305 | static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, |
306 | int *max_revokes) |
307 | { |
308 | struct gfs2_bufdata *bd, *s; |
309 | struct buffer_head *bh; |
310 | int active_count = 0; |
311 | |
312 | list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, |
313 | bd_ail_st_list) { |
314 | bh = bd->bd_bh; |
315 | gfs2_assert(sdp, bd->bd_tr == tr); |
316 | /* |
317 | * If another process flagged an io error, e.g. writing to the |
318 | * journal, error all other bhs and move them off the ail1 to |
319 | * prevent a tight loop when unmount tries to flush ail1, |
320 | * regardless of whether they're still busy. If no outside |
321 | * errors were found and the buffer is busy, move to the next. |
322 | * If the ail buffer is not busy and caught an error, flag it |
323 | * for others. |
324 | */ |
325 | if (!sdp->sd_log_error && buffer_busy(bh)) { |
326 | active_count++; |
327 | continue; |
328 | } |
329 | if (!buffer_uptodate(bh) && |
330 | !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { |
331 | gfs2_io_error_bh(sdp, bh); |
332 | gfs2_withdraw_delayed(sdp); |
333 | } |
334 | /* |
335 | * If we have space for revokes and the bd is no longer on any |
336 | * buf list, we can just add a revoke for it immediately and |
337 | * avoid having to put it on the ail2 list, where it would need |
338 | * to be revoked later. |
339 | */ |
340 | if (*max_revokes && list_empty(head: &bd->bd_list)) { |
341 | gfs2_add_revoke(sdp, bd); |
342 | (*max_revokes)--; |
343 | continue; |
344 | } |
345 | list_move(list: &bd->bd_ail_st_list, head: &tr->tr_ail2_list); |
346 | } |
347 | return active_count; |
348 | } |
349 | |
350 | /** |
351 | * gfs2_ail1_empty - Try to empty the ail1 lists |
352 | * @sdp: The superblock |
353 | * @max_revokes: If non-zero, add revokes where appropriate |
354 | * |
355 | * Tries to empty the ail1 lists, starting with the oldest first. |
356 | * Returns %true if the ail1 list is now empty. |
357 | */ |
358 | |
359 | static bool gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) |
360 | { |
361 | struct gfs2_trans *tr, *s; |
362 | int oldest_tr = 1; |
363 | bool empty; |
364 | |
365 | spin_lock(lock: &sdp->sd_ail_lock); |
366 | list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { |
367 | if (!gfs2_ail1_empty_one(sdp, tr, max_revokes: &max_revokes) && oldest_tr) |
368 | list_move(list: &tr->tr_list, head: &sdp->sd_ail2_list); |
369 | else |
370 | oldest_tr = 0; |
371 | } |
372 | gfs2_log_update_flush_tail(sdp); |
373 | empty = list_empty(head: &sdp->sd_ail1_list); |
374 | spin_unlock(lock: &sdp->sd_ail_lock); |
375 | |
376 | return empty; |
377 | } |
378 | |
379 | static void gfs2_ail1_wait(struct gfs2_sbd *sdp) |
380 | { |
381 | struct gfs2_trans *tr; |
382 | struct gfs2_bufdata *bd; |
383 | struct buffer_head *bh; |
384 | |
385 | spin_lock(lock: &sdp->sd_ail_lock); |
386 | list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { |
387 | list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) { |
388 | bh = bd->bd_bh; |
389 | if (!buffer_locked(bh)) |
390 | continue; |
391 | get_bh(bh); |
392 | spin_unlock(lock: &sdp->sd_ail_lock); |
393 | wait_on_buffer(bh); |
394 | brelse(bh); |
395 | return; |
396 | } |
397 | } |
398 | spin_unlock(lock: &sdp->sd_ail_lock); |
399 | } |
400 | |
401 | static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
402 | { |
403 | gfs2_ail_empty_tr(sdp, tr, head: &tr->tr_ail2_list); |
404 | list_del(entry: &tr->tr_list); |
405 | gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); |
406 | gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); |
407 | gfs2_trans_free(sdp, tr); |
408 | } |
409 | |
410 | static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) |
411 | { |
412 | struct list_head *ail2_list = &sdp->sd_ail2_list; |
413 | unsigned int old_tail = sdp->sd_log_tail; |
414 | struct gfs2_trans *tr, *safe; |
415 | |
416 | spin_lock(lock: &sdp->sd_ail_lock); |
417 | if (old_tail <= new_tail) { |
418 | list_for_each_entry_safe(tr, safe, ail2_list, tr_list) { |
419 | if (old_tail <= tr->tr_first && tr->tr_first < new_tail) |
420 | __ail2_empty(sdp, tr); |
421 | } |
422 | } else { |
423 | list_for_each_entry_safe(tr, safe, ail2_list, tr_list) { |
424 | if (old_tail <= tr->tr_first || tr->tr_first < new_tail) |
425 | __ail2_empty(sdp, tr); |
426 | } |
427 | } |
428 | spin_unlock(lock: &sdp->sd_ail_lock); |
429 | } |
430 | |
431 | /** |
432 | * gfs2_log_is_empty - Check if the log is empty |
433 | * @sdp: The GFS2 superblock |
434 | */ |
435 | |
436 | bool gfs2_log_is_empty(struct gfs2_sbd *sdp) { |
437 | return atomic_read(v: &sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks; |
438 | } |
439 | |
440 | static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes) |
441 | { |
442 | unsigned int available; |
443 | |
444 | available = atomic_read(v: &sdp->sd_log_revokes_available); |
445 | while (available >= revokes) { |
446 | if (atomic_try_cmpxchg(v: &sdp->sd_log_revokes_available, |
447 | old: &available, new: available - revokes)) |
448 | return true; |
449 | } |
450 | return false; |
451 | } |
452 | |
453 | /** |
454 | * gfs2_log_release_revokes - Release a given number of revokes |
455 | * @sdp: The GFS2 superblock |
456 | * @revokes: The number of revokes to release |
457 | * |
458 | * sdp->sd_log_flush_lock must be held. |
459 | */ |
460 | void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes) |
461 | { |
462 | if (revokes) |
463 | atomic_add(i: revokes, v: &sdp->sd_log_revokes_available); |
464 | } |
465 | |
466 | /** |
467 | * gfs2_log_release - Release a given number of log blocks |
468 | * @sdp: The GFS2 superblock |
469 | * @blks: The number of blocks |
470 | * |
471 | */ |
472 | |
473 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) |
474 | { |
475 | atomic_add(i: blks, v: &sdp->sd_log_blks_free); |
476 | trace_gfs2_log_blocks(sdp, blocks: blks); |
477 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
478 | sdp->sd_jdesc->jd_blocks); |
479 | if (atomic_read(v: &sdp->sd_log_blks_needed)) |
480 | wake_up(&sdp->sd_log_waitq); |
481 | } |
482 | |
483 | /** |
484 | * __gfs2_log_try_reserve - Try to make a log reservation |
485 | * @sdp: The GFS2 superblock |
486 | * @blks: The number of blocks to reserve |
487 | * @taboo_blks: The number of blocks to leave free |
488 | * |
489 | * Try to do the same as __gfs2_log_reserve(), but fail if no more log |
490 | * space is immediately available. |
491 | */ |
492 | static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks, |
493 | unsigned int taboo_blks) |
494 | { |
495 | unsigned wanted = blks + taboo_blks; |
496 | unsigned int free_blocks; |
497 | |
498 | free_blocks = atomic_read(v: &sdp->sd_log_blks_free); |
499 | while (free_blocks >= wanted) { |
500 | if (atomic_try_cmpxchg(v: &sdp->sd_log_blks_free, old: &free_blocks, |
501 | new: free_blocks - blks)) { |
502 | trace_gfs2_log_blocks(sdp, blocks: -blks); |
503 | return true; |
504 | } |
505 | } |
506 | return false; |
507 | } |
508 | |
509 | /** |
510 | * __gfs2_log_reserve - Make a log reservation |
511 | * @sdp: The GFS2 superblock |
512 | * @blks: The number of blocks to reserve |
513 | * @taboo_blks: The number of blocks to leave free |
514 | * |
515 | * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS |
516 | * for all other processes. This ensures that when the log is almost full, |
517 | * logd will still be able to call gfs2_log_flush one more time without |
518 | * blocking, which will advance the tail and make some more log space |
519 | * available. |
520 | * |
521 | * We no longer flush the log here, instead we wake up logd to do that |
522 | * for us. To avoid the thundering herd and to ensure that we deal fairly |
523 | * with queued waiters, we use an exclusive wait. This means that when we |
524 | * get woken with enough journal space to get our reservation, we need to |
525 | * wake the next waiter on the list. |
526 | */ |
527 | |
528 | static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks, |
529 | unsigned int taboo_blks) |
530 | { |
531 | unsigned wanted = blks + taboo_blks; |
532 | unsigned int free_blocks; |
533 | |
534 | atomic_add(i: blks, v: &sdp->sd_log_blks_needed); |
535 | for (;;) { |
536 | if (current != sdp->sd_logd_process) |
537 | wake_up(&sdp->sd_logd_waitq); |
538 | io_wait_event(sdp->sd_log_waitq, |
539 | (free_blocks = atomic_read(&sdp->sd_log_blks_free), |
540 | free_blocks >= wanted)); |
541 | do { |
542 | if (atomic_try_cmpxchg(v: &sdp->sd_log_blks_free, |
543 | old: &free_blocks, |
544 | new: free_blocks - blks)) |
545 | goto reserved; |
546 | } while (free_blocks >= wanted); |
547 | } |
548 | |
549 | reserved: |
550 | trace_gfs2_log_blocks(sdp, blocks: -blks); |
551 | if (atomic_sub_return(i: blks, v: &sdp->sd_log_blks_needed)) |
552 | wake_up(&sdp->sd_log_waitq); |
553 | } |
554 | |
555 | /** |
556 | * gfs2_log_try_reserve - Try to make a log reservation |
557 | * @sdp: The GFS2 superblock |
558 | * @tr: The transaction |
559 | * @extra_revokes: The number of additional revokes reserved (output) |
560 | * |
561 | * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be |
562 | * held for correct revoke accounting. |
563 | */ |
564 | |
565 | bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, |
566 | unsigned int *) |
567 | { |
568 | unsigned int blks = tr->tr_reserved; |
569 | unsigned int revokes = tr->tr_revokes; |
570 | unsigned int revoke_blks = 0; |
571 | |
572 | *extra_revokes = 0; |
573 | if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) { |
574 | revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); |
575 | *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; |
576 | blks += revoke_blks; |
577 | } |
578 | if (!blks) |
579 | return true; |
580 | if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS)) |
581 | return true; |
582 | if (!revoke_blks) |
583 | gfs2_log_release_revokes(sdp, revokes); |
584 | return false; |
585 | } |
586 | |
587 | /** |
588 | * gfs2_log_reserve - Make a log reservation |
589 | * @sdp: The GFS2 superblock |
590 | * @tr: The transaction |
591 | * @extra_revokes: The number of additional revokes reserved (output) |
592 | * |
593 | * sdp->sd_log_flush_lock must not be held. |
594 | */ |
595 | |
596 | void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, |
597 | unsigned int *) |
598 | { |
599 | unsigned int blks = tr->tr_reserved; |
600 | unsigned int revokes = tr->tr_revokes; |
601 | unsigned int revoke_blks; |
602 | |
603 | *extra_revokes = 0; |
604 | if (revokes) { |
605 | revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); |
606 | *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; |
607 | blks += revoke_blks; |
608 | } |
609 | __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS); |
610 | } |
611 | |
612 | /** |
613 | * log_distance - Compute distance between two journal blocks |
614 | * @sdp: The GFS2 superblock |
615 | * @newer: The most recent journal block of the pair |
616 | * @older: The older journal block of the pair |
617 | * |
618 | * Compute the distance (in the journal direction) between two |
619 | * blocks in the journal |
620 | * |
621 | * Returns: the distance in blocks |
622 | */ |
623 | |
624 | static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer, |
625 | unsigned int older) |
626 | { |
627 | int dist; |
628 | |
629 | dist = newer - older; |
630 | if (dist < 0) |
631 | dist += sdp->sd_jdesc->jd_blocks; |
632 | |
633 | return dist; |
634 | } |
635 | |
636 | /** |
637 | * calc_reserved - Calculate the number of blocks to keep reserved |
638 | * @sdp: The GFS2 superblock |
639 | * |
640 | * This is complex. We need to reserve room for all our currently used |
641 | * metadata blocks (e.g. normal file I/O rewriting file time stamps) and |
642 | * all our journaled data blocks for journaled files (e.g. files in the |
643 | * meta_fs like rindex, or files for which chattr +j was done.) |
644 | * If we don't reserve enough space, corruption will follow. |
645 | * |
646 | * We can have metadata blocks and jdata blocks in the same journal. Each |
647 | * type gets its own log descriptor, for which we need to reserve a block. |
648 | * In fact, each type has the potential for needing more than one log descriptor |
649 | * in cases where we have more blocks than will fit in a log descriptor. |
650 | * Metadata journal entries take up half the space of journaled buffer entries. |
651 | * |
652 | * Also, we need to reserve blocks for revoke journal entries and one for an |
653 | * overall header for the lot. |
654 | * |
655 | * Returns: the number of blocks reserved |
656 | */ |
657 | static unsigned int calc_reserved(struct gfs2_sbd *sdp) |
658 | { |
659 | unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS; |
660 | unsigned int blocks; |
661 | struct gfs2_trans *tr = sdp->sd_log_tr; |
662 | |
663 | if (tr) { |
664 | blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm; |
665 | reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp)); |
666 | blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; |
667 | reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp)); |
668 | } |
669 | return reserved; |
670 | } |
671 | |
672 | static void log_pull_tail(struct gfs2_sbd *sdp) |
673 | { |
674 | unsigned int new_tail = sdp->sd_log_flush_tail; |
675 | unsigned int dist; |
676 | |
677 | if (new_tail == sdp->sd_log_tail) |
678 | return; |
679 | dist = log_distance(sdp, newer: new_tail, older: sdp->sd_log_tail); |
680 | ail2_empty(sdp, new_tail); |
681 | gfs2_log_release(sdp, blks: dist); |
682 | sdp->sd_log_tail = new_tail; |
683 | } |
684 | |
685 | |
686 | void log_flush_wait(struct gfs2_sbd *sdp) |
687 | { |
688 | DEFINE_WAIT(wait); |
689 | |
690 | if (atomic_read(v: &sdp->sd_log_in_flight)) { |
691 | do { |
692 | prepare_to_wait(wq_head: &sdp->sd_log_flush_wait, wq_entry: &wait, |
693 | TASK_UNINTERRUPTIBLE); |
694 | if (atomic_read(v: &sdp->sd_log_in_flight)) |
695 | io_schedule(); |
696 | } while(atomic_read(v: &sdp->sd_log_in_flight)); |
697 | finish_wait(wq_head: &sdp->sd_log_flush_wait, wq_entry: &wait); |
698 | } |
699 | } |
700 | |
701 | static int ip_cmp(void *priv, const struct list_head *a, const struct list_head *b) |
702 | { |
703 | struct gfs2_inode *ipa, *ipb; |
704 | |
705 | ipa = list_entry(a, struct gfs2_inode, i_ordered); |
706 | ipb = list_entry(b, struct gfs2_inode, i_ordered); |
707 | |
708 | if (ipa->i_no_addr < ipb->i_no_addr) |
709 | return -1; |
710 | if (ipa->i_no_addr > ipb->i_no_addr) |
711 | return 1; |
712 | return 0; |
713 | } |
714 | |
715 | static void __ordered_del_inode(struct gfs2_inode *ip) |
716 | { |
717 | if (!list_empty(head: &ip->i_ordered)) |
718 | list_del_init(entry: &ip->i_ordered); |
719 | } |
720 | |
721 | static void gfs2_ordered_write(struct gfs2_sbd *sdp) |
722 | { |
723 | struct gfs2_inode *ip; |
724 | LIST_HEAD(written); |
725 | |
726 | spin_lock(lock: &sdp->sd_ordered_lock); |
727 | list_sort(NULL, head: &sdp->sd_log_ordered, cmp: &ip_cmp); |
728 | while (!list_empty(head: &sdp->sd_log_ordered)) { |
729 | ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); |
730 | if (ip->i_inode.i_mapping->nrpages == 0) { |
731 | __ordered_del_inode(ip); |
732 | continue; |
733 | } |
734 | list_move(list: &ip->i_ordered, head: &written); |
735 | spin_unlock(lock: &sdp->sd_ordered_lock); |
736 | filemap_fdatawrite(ip->i_inode.i_mapping); |
737 | spin_lock(lock: &sdp->sd_ordered_lock); |
738 | } |
739 | list_splice(list: &written, head: &sdp->sd_log_ordered); |
740 | spin_unlock(lock: &sdp->sd_ordered_lock); |
741 | } |
742 | |
743 | static void gfs2_ordered_wait(struct gfs2_sbd *sdp) |
744 | { |
745 | struct gfs2_inode *ip; |
746 | |
747 | spin_lock(lock: &sdp->sd_ordered_lock); |
748 | while (!list_empty(head: &sdp->sd_log_ordered)) { |
749 | ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); |
750 | __ordered_del_inode(ip); |
751 | if (ip->i_inode.i_mapping->nrpages == 0) |
752 | continue; |
753 | spin_unlock(lock: &sdp->sd_ordered_lock); |
754 | filemap_fdatawait(mapping: ip->i_inode.i_mapping); |
755 | spin_lock(lock: &sdp->sd_ordered_lock); |
756 | } |
757 | spin_unlock(lock: &sdp->sd_ordered_lock); |
758 | } |
759 | |
760 | void gfs2_ordered_del_inode(struct gfs2_inode *ip) |
761 | { |
762 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
763 | |
764 | spin_lock(lock: &sdp->sd_ordered_lock); |
765 | __ordered_del_inode(ip); |
766 | spin_unlock(lock: &sdp->sd_ordered_lock); |
767 | } |
768 | |
769 | void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) |
770 | { |
771 | struct buffer_head *bh = bd->bd_bh; |
772 | struct gfs2_glock *gl = bd->bd_gl; |
773 | |
774 | sdp->sd_log_num_revoke++; |
775 | if (atomic_inc_return(v: &gl->gl_revokes) == 1) |
776 | gfs2_glock_hold(gl); |
777 | bh->b_private = NULL; |
778 | bd->bd_blkno = bh->b_blocknr; |
779 | gfs2_remove_from_ail(bd); /* drops ref on bh */ |
780 | bd->bd_bh = NULL; |
781 | set_bit(nr: GLF_LFLUSH, addr: &gl->gl_flags); |
782 | list_add(new: &bd->bd_list, head: &sdp->sd_log_revokes); |
783 | } |
784 | |
785 | void gfs2_glock_remove_revoke(struct gfs2_glock *gl) |
786 | { |
787 | if (atomic_dec_return(v: &gl->gl_revokes) == 0) { |
788 | clear_bit(nr: GLF_LFLUSH, addr: &gl->gl_flags); |
789 | gfs2_glock_queue_put(gl); |
790 | } |
791 | } |
792 | |
793 | /** |
794 | * gfs2_flush_revokes - Add as many revokes to the system transaction as we can |
795 | * @sdp: The GFS2 superblock |
796 | * |
797 | * Our usual strategy is to defer writing revokes as much as we can in the hope |
798 | * that we'll eventually overwrite the journal, which will make those revokes |
799 | * go away. This changes when we flush the log: at that point, there will |
800 | * likely be some left-over space in the last revoke block of that transaction. |
801 | * We can fill that space with additional revokes for blocks that have already |
802 | * been written back. This will basically come at no cost now, and will save |
803 | * us from having to keep track of those blocks on the AIL2 list later. |
804 | */ |
805 | void gfs2_flush_revokes(struct gfs2_sbd *sdp) |
806 | { |
807 | /* number of revokes we still have room for */ |
808 | unsigned int max_revokes = atomic_read(v: &sdp->sd_log_revokes_available); |
809 | |
810 | gfs2_log_lock(sdp); |
811 | gfs2_ail1_empty(sdp, max_revokes); |
812 | gfs2_log_unlock(sdp); |
813 | |
814 | if (gfs2_withdrawing(sdp)) |
815 | gfs2_withdraw(sdp); |
816 | } |
817 | |
818 | /** |
819 | * gfs2_write_log_header - Write a journal log header buffer at lblock |
820 | * @sdp: The GFS2 superblock |
821 | * @jd: journal descriptor of the journal to which we are writing |
822 | * @seq: sequence number |
823 | * @tail: tail of the log |
824 | * @lblock: value for lh_blkno (block number relative to start of journal) |
825 | * @flags: log header flags GFS2_LOG_HEAD_* |
826 | * @op_flags: flags to pass to the bio |
827 | * |
828 | * Returns: the initialized log buffer descriptor |
829 | */ |
830 | |
831 | void (struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, |
832 | u64 seq, u32 tail, u32 lblock, u32 flags, |
833 | blk_opf_t op_flags) |
834 | { |
835 | struct gfs2_log_header *lh; |
836 | u32 hash, crc; |
837 | struct page *page; |
838 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
839 | struct timespec64 tv; |
840 | struct super_block *sb = sdp->sd_vfs; |
841 | u64 dblock; |
842 | |
843 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
844 | return; |
845 | |
846 | page = mempool_alloc(pool: gfs2_page_pool, GFP_NOIO); |
847 | lh = page_address(page); |
848 | clear_page(page: lh); |
849 | |
850 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); |
851 | lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); |
852 | lh->lh_header.__pad0 = cpu_to_be64(0); |
853 | lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); |
854 | lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); |
855 | lh->lh_sequence = cpu_to_be64(seq); |
856 | lh->lh_flags = cpu_to_be32(flags); |
857 | lh->lh_tail = cpu_to_be32(tail); |
858 | lh->lh_blkno = cpu_to_be32(lblock); |
859 | hash = ~crc32(~0, lh, LH_V1_SIZE); |
860 | lh->lh_hash = cpu_to_be32(hash); |
861 | |
862 | ktime_get_coarse_real_ts64(ts: &tv); |
863 | lh->lh_nsec = cpu_to_be32(tv.tv_nsec); |
864 | lh->lh_sec = cpu_to_be64(tv.tv_sec); |
865 | if (!list_empty(head: &jd->extent_list)) |
866 | dblock = gfs2_log_bmap(jd, lbn: lblock); |
867 | else { |
868 | unsigned int extlen; |
869 | int ret; |
870 | |
871 | extlen = 1; |
872 | ret = gfs2_get_extent(inode: jd->jd_inode, lblock, dblock: &dblock, extlen: &extlen); |
873 | if (gfs2_assert_withdraw(sdp, ret == 0)) |
874 | return; |
875 | } |
876 | lh->lh_addr = cpu_to_be64(dblock); |
877 | lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr); |
878 | |
879 | /* We may only write local statfs, quota, etc., when writing to our |
880 | own journal. The values are left 0 when recovering a journal |
881 | different from our own. */ |
882 | if (!(flags & GFS2_LOG_HEAD_RECOVERY)) { |
883 | lh->lh_statfs_addr = |
884 | cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr); |
885 | lh->lh_quota_addr = |
886 | cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr); |
887 | |
888 | spin_lock(lock: &sdp->sd_statfs_spin); |
889 | lh->lh_local_total = cpu_to_be64(l_sc->sc_total); |
890 | lh->lh_local_free = cpu_to_be64(l_sc->sc_free); |
891 | lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes); |
892 | spin_unlock(lock: &sdp->sd_statfs_spin); |
893 | } |
894 | |
895 | BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE); |
896 | |
897 | crc = crc32c(crc: ~0, address: (void *)lh + LH_V1_SIZE + 4, |
898 | length: sb->s_blocksize - LH_V1_SIZE - 4); |
899 | lh->lh_crc = cpu_to_be32(crc); |
900 | |
901 | gfs2_log_write(sdp, jd, page, size: sb->s_blocksize, offset: 0, blkno: dblock); |
902 | gfs2_log_submit_bio(biop: &jd->jd_log_bio, opf: REQ_OP_WRITE | op_flags); |
903 | } |
904 | |
905 | /** |
906 | * log_write_header - Get and initialize a journal header buffer |
907 | * @sdp: The GFS2 superblock |
908 | * @flags: The log header flags, including log header origin |
909 | * |
910 | * Returns: the initialized log buffer descriptor |
911 | */ |
912 | |
913 | static void (struct gfs2_sbd *sdp, u32 flags) |
914 | { |
915 | blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; |
916 | struct super_block *sb = sdp->sd_vfs; |
917 | |
918 | gfs2_assert_withdraw(sdp, sb->s_writers.frozen != SB_FREEZE_COMPLETE); |
919 | |
920 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { |
921 | gfs2_ordered_wait(sdp); |
922 | log_flush_wait(sdp); |
923 | op_flags = REQ_SYNC | REQ_META | REQ_PRIO; |
924 | } |
925 | sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head); |
926 | gfs2_write_log_header(sdp, jd: sdp->sd_jdesc, seq: sdp->sd_log_sequence++, |
927 | tail: sdp->sd_log_flush_tail, lblock: sdp->sd_log_flush_head, |
928 | flags, op_flags); |
929 | gfs2_log_incr_head(sdp); |
930 | log_flush_wait(sdp); |
931 | log_pull_tail(sdp); |
932 | gfs2_log_update_head(sdp); |
933 | } |
934 | |
935 | /** |
936 | * gfs2_ail_drain - drain the ail lists after a withdraw |
937 | * @sdp: Pointer to GFS2 superblock |
938 | */ |
939 | void gfs2_ail_drain(struct gfs2_sbd *sdp) |
940 | { |
941 | struct gfs2_trans *tr; |
942 | |
943 | spin_lock(lock: &sdp->sd_ail_lock); |
944 | /* |
945 | * For transactions on the sd_ail1_list we need to drain both the |
946 | * ail1 and ail2 lists. That's because function gfs2_ail1_start_one |
947 | * (temporarily) moves items from its tr_ail1 list to tr_ail2 list |
948 | * before revokes are sent for that block. Items on the sd_ail2_list |
949 | * should have already gotten beyond that point, so no need. |
950 | */ |
951 | while (!list_empty(head: &sdp->sd_ail1_list)) { |
952 | tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans, |
953 | tr_list); |
954 | gfs2_ail_empty_tr(sdp, tr, head: &tr->tr_ail1_list); |
955 | gfs2_ail_empty_tr(sdp, tr, head: &tr->tr_ail2_list); |
956 | list_del(entry: &tr->tr_list); |
957 | gfs2_trans_free(sdp, tr); |
958 | } |
959 | while (!list_empty(head: &sdp->sd_ail2_list)) { |
960 | tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, |
961 | tr_list); |
962 | gfs2_ail_empty_tr(sdp, tr, head: &tr->tr_ail2_list); |
963 | list_del(entry: &tr->tr_list); |
964 | gfs2_trans_free(sdp, tr); |
965 | } |
966 | gfs2_drain_revokes(sdp); |
967 | spin_unlock(lock: &sdp->sd_ail_lock); |
968 | } |
969 | |
970 | /** |
971 | * empty_ail1_list - try to start IO and empty the ail1 list |
972 | * @sdp: Pointer to GFS2 superblock |
973 | */ |
974 | static void empty_ail1_list(struct gfs2_sbd *sdp) |
975 | { |
976 | unsigned long start = jiffies; |
977 | bool empty = false; |
978 | |
979 | while (!empty) { |
980 | if (time_after(jiffies, start + (HZ * 600))) { |
981 | fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n" , |
982 | __func__, current->journal_info ? 1 : 0); |
983 | dump_ail_list(sdp); |
984 | return; |
985 | } |
986 | gfs2_ail1_start(sdp); |
987 | gfs2_ail1_wait(sdp); |
988 | empty = gfs2_ail1_empty(sdp, max_revokes: 0); |
989 | |
990 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
991 | break; |
992 | } |
993 | |
994 | if (gfs2_withdrawing(sdp)) |
995 | gfs2_withdraw(sdp); |
996 | } |
997 | |
998 | /** |
999 | * trans_drain - drain the buf and databuf queue for a failed transaction |
1000 | * @tr: the transaction to drain |
1001 | * |
1002 | * When this is called, we're taking an error exit for a log write that failed |
1003 | * but since we bypassed the after_commit functions, we need to remove the |
1004 | * items from the buf and databuf queue. |
1005 | */ |
1006 | static void trans_drain(struct gfs2_trans *tr) |
1007 | { |
1008 | struct gfs2_bufdata *bd; |
1009 | struct list_head *head; |
1010 | |
1011 | if (!tr) |
1012 | return; |
1013 | |
1014 | head = &tr->tr_buf; |
1015 | while (!list_empty(head)) { |
1016 | bd = list_first_entry(head, struct gfs2_bufdata, bd_list); |
1017 | list_del_init(entry: &bd->bd_list); |
1018 | if (!list_empty(head: &bd->bd_ail_st_list)) |
1019 | gfs2_remove_from_ail(bd); |
1020 | kmem_cache_free(s: gfs2_bufdata_cachep, objp: bd); |
1021 | } |
1022 | head = &tr->tr_databuf; |
1023 | while (!list_empty(head)) { |
1024 | bd = list_first_entry(head, struct gfs2_bufdata, bd_list); |
1025 | list_del_init(entry: &bd->bd_list); |
1026 | if (!list_empty(head: &bd->bd_ail_st_list)) |
1027 | gfs2_remove_from_ail(bd); |
1028 | kmem_cache_free(s: gfs2_bufdata_cachep, objp: bd); |
1029 | } |
1030 | } |
1031 | |
1032 | /** |
1033 | * gfs2_log_flush - flush incore transaction(s) |
1034 | * @sdp: The filesystem |
1035 | * @gl: The glock structure to flush. If NULL, flush the whole incore log |
1036 | * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags |
1037 | * |
1038 | */ |
1039 | |
1040 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) |
1041 | { |
1042 | struct gfs2_trans *tr = NULL; |
1043 | unsigned int reserved_blocks = 0, used_blocks = 0; |
1044 | bool frozen = test_bit(SDF_FROZEN, &sdp->sd_flags); |
1045 | unsigned int first_log_head; |
1046 | unsigned int reserved_revokes = 0; |
1047 | |
1048 | down_write(sem: &sdp->sd_log_flush_lock); |
1049 | trace_gfs2_log_flush(sdp, start: 1, flags); |
1050 | |
1051 | repeat: |
1052 | /* |
1053 | * Do this check while holding the log_flush_lock to prevent new |
1054 | * buffers from being added to the ail via gfs2_pin() |
1055 | */ |
1056 | if (gfs2_withdrawing_or_withdrawn(sdp) || |
1057 | !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) |
1058 | goto out; |
1059 | |
1060 | /* Log might have been flushed while we waited for the flush lock */ |
1061 | if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) |
1062 | goto out; |
1063 | |
1064 | first_log_head = sdp->sd_log_head; |
1065 | sdp->sd_log_flush_head = first_log_head; |
1066 | |
1067 | tr = sdp->sd_log_tr; |
1068 | if (tr || sdp->sd_log_num_revoke) { |
1069 | if (reserved_blocks) |
1070 | gfs2_log_release(sdp, blks: reserved_blocks); |
1071 | reserved_blocks = sdp->sd_log_blks_reserved; |
1072 | reserved_revokes = sdp->sd_log_num_revoke; |
1073 | if (tr) { |
1074 | sdp->sd_log_tr = NULL; |
1075 | tr->tr_first = first_log_head; |
1076 | if (unlikely(frozen)) { |
1077 | if (gfs2_assert_withdraw_delayed(sdp, |
1078 | !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) |
1079 | goto out_withdraw; |
1080 | } |
1081 | } |
1082 | } else if (!reserved_blocks) { |
1083 | unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; |
1084 | |
1085 | reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; |
1086 | if (current == sdp->sd_logd_process) |
1087 | taboo_blocks = 0; |
1088 | |
1089 | if (!__gfs2_log_try_reserve(sdp, blks: reserved_blocks, taboo_blks: taboo_blocks)) { |
1090 | up_write(sem: &sdp->sd_log_flush_lock); |
1091 | __gfs2_log_reserve(sdp, blks: reserved_blocks, taboo_blks: taboo_blocks); |
1092 | down_write(sem: &sdp->sd_log_flush_lock); |
1093 | goto repeat; |
1094 | } |
1095 | BUG_ON(sdp->sd_log_num_revoke); |
1096 | } |
1097 | |
1098 | if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) |
1099 | clear_bit(nr: SDF_JOURNAL_LIVE, addr: &sdp->sd_flags); |
1100 | |
1101 | if (unlikely(frozen)) |
1102 | if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) |
1103 | goto out_withdraw; |
1104 | |
1105 | gfs2_ordered_write(sdp); |
1106 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1107 | goto out_withdraw; |
1108 | lops_before_commit(sdp, tr); |
1109 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1110 | goto out_withdraw; |
1111 | gfs2_log_submit_bio(biop: &sdp->sd_jdesc->jd_log_bio, opf: REQ_OP_WRITE); |
1112 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1113 | goto out_withdraw; |
1114 | |
1115 | if (sdp->sd_log_head != sdp->sd_log_flush_head) { |
1116 | log_write_header(sdp, flags); |
1117 | } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) { |
1118 | log_write_header(sdp, flags); |
1119 | } |
1120 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1121 | goto out_withdraw; |
1122 | lops_after_commit(sdp, tr); |
1123 | |
1124 | gfs2_log_lock(sdp); |
1125 | sdp->sd_log_blks_reserved = 0; |
1126 | |
1127 | spin_lock(lock: &sdp->sd_ail_lock); |
1128 | if (tr && !list_empty(head: &tr->tr_ail1_list)) { |
1129 | list_add(new: &tr->tr_list, head: &sdp->sd_ail1_list); |
1130 | tr = NULL; |
1131 | } |
1132 | spin_unlock(lock: &sdp->sd_ail_lock); |
1133 | gfs2_log_unlock(sdp); |
1134 | |
1135 | if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { |
1136 | if (!sdp->sd_log_idle) { |
1137 | empty_ail1_list(sdp); |
1138 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1139 | goto out_withdraw; |
1140 | log_write_header(sdp, flags); |
1141 | } |
1142 | if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | |
1143 | GFS2_LOG_HEAD_FLUSH_FREEZE)) |
1144 | gfs2_log_shutdown(sdp); |
1145 | } |
1146 | |
1147 | out_end: |
1148 | used_blocks = log_distance(sdp, newer: sdp->sd_log_flush_head, older: first_log_head); |
1149 | reserved_revokes += atomic_read(v: &sdp->sd_log_revokes_available); |
1150 | atomic_set(v: &sdp->sd_log_revokes_available, i: sdp->sd_ldptrs); |
1151 | gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs); |
1152 | if (reserved_revokes > sdp->sd_ldptrs) |
1153 | reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; |
1154 | out: |
1155 | if (used_blocks != reserved_blocks) { |
1156 | gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); |
1157 | gfs2_log_release(sdp, blks: reserved_blocks - used_blocks); |
1158 | } |
1159 | up_write(sem: &sdp->sd_log_flush_lock); |
1160 | gfs2_trans_free(sdp, tr); |
1161 | if (gfs2_withdrawing(sdp)) |
1162 | gfs2_withdraw(sdp); |
1163 | trace_gfs2_log_flush(sdp, start: 0, flags); |
1164 | return; |
1165 | |
1166 | out_withdraw: |
1167 | trans_drain(tr); |
1168 | /** |
1169 | * If the tr_list is empty, we're withdrawing during a log |
1170 | * flush that targets a transaction, but the transaction was |
1171 | * never queued onto any of the ail lists. Here we add it to |
1172 | * ail1 just so that ail_drain() will find and free it. |
1173 | */ |
1174 | spin_lock(lock: &sdp->sd_ail_lock); |
1175 | if (tr && list_empty(head: &tr->tr_list)) |
1176 | list_add(new: &tr->tr_list, head: &sdp->sd_ail1_list); |
1177 | spin_unlock(lock: &sdp->sd_ail_lock); |
1178 | tr = NULL; |
1179 | goto out_end; |
1180 | } |
1181 | |
1182 | /** |
1183 | * gfs2_merge_trans - Merge a new transaction into a cached transaction |
1184 | * @sdp: the filesystem |
1185 | * @new: New transaction to be merged |
1186 | */ |
1187 | |
1188 | static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) |
1189 | { |
1190 | struct gfs2_trans *old = sdp->sd_log_tr; |
1191 | |
1192 | WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); |
1193 | |
1194 | old->tr_num_buf_new += new->tr_num_buf_new; |
1195 | old->tr_num_databuf_new += new->tr_num_databuf_new; |
1196 | old->tr_num_buf_rm += new->tr_num_buf_rm; |
1197 | old->tr_num_databuf_rm += new->tr_num_databuf_rm; |
1198 | old->tr_revokes += new->tr_revokes; |
1199 | old->tr_num_revoke += new->tr_num_revoke; |
1200 | |
1201 | list_splice_tail_init(list: &new->tr_databuf, head: &old->tr_databuf); |
1202 | list_splice_tail_init(list: &new->tr_buf, head: &old->tr_buf); |
1203 | |
1204 | spin_lock(lock: &sdp->sd_ail_lock); |
1205 | list_splice_tail_init(list: &new->tr_ail1_list, head: &old->tr_ail1_list); |
1206 | list_splice_tail_init(list: &new->tr_ail2_list, head: &old->tr_ail2_list); |
1207 | spin_unlock(lock: &sdp->sd_ail_lock); |
1208 | } |
1209 | |
1210 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
1211 | { |
1212 | unsigned int reserved; |
1213 | unsigned int unused; |
1214 | unsigned int maxres; |
1215 | |
1216 | gfs2_log_lock(sdp); |
1217 | |
1218 | if (sdp->sd_log_tr) { |
1219 | gfs2_merge_trans(sdp, new: tr); |
1220 | } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { |
1221 | gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags)); |
1222 | sdp->sd_log_tr = tr; |
1223 | set_bit(nr: TR_ATTACHED, addr: &tr->tr_flags); |
1224 | } |
1225 | |
1226 | reserved = calc_reserved(sdp); |
1227 | maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; |
1228 | gfs2_assert_withdraw(sdp, maxres >= reserved); |
1229 | unused = maxres - reserved; |
1230 | if (unused) |
1231 | gfs2_log_release(sdp, blks: unused); |
1232 | sdp->sd_log_blks_reserved = reserved; |
1233 | |
1234 | gfs2_log_unlock(sdp); |
1235 | } |
1236 | |
1237 | static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) |
1238 | { |
1239 | return atomic_read(v: &sdp->sd_log_pinned) + |
1240 | atomic_read(v: &sdp->sd_log_blks_needed) >= |
1241 | atomic_read(v: &sdp->sd_log_thresh1); |
1242 | } |
1243 | |
1244 | static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) |
1245 | { |
1246 | return sdp->sd_jdesc->jd_blocks - |
1247 | atomic_read(v: &sdp->sd_log_blks_free) + |
1248 | atomic_read(v: &sdp->sd_log_blks_needed) >= |
1249 | atomic_read(v: &sdp->sd_log_thresh2); |
1250 | } |
1251 | |
1252 | /** |
1253 | * gfs2_log_commit - Commit a transaction to the log |
1254 | * @sdp: the filesystem |
1255 | * @tr: the transaction |
1256 | * |
1257 | * We wake up gfs2_logd if the number of pinned blocks exceed thresh1 |
1258 | * or the total number of used blocks (pinned blocks plus AIL blocks) |
1259 | * is greater than thresh2. |
1260 | * |
1261 | * At mount time thresh1 is 2/5ths of journal size, thresh2 is 4/5ths of |
1262 | * journal size. |
1263 | * |
1264 | * Returns: errno |
1265 | */ |
1266 | |
1267 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
1268 | { |
1269 | log_refund(sdp, tr); |
1270 | |
1271 | if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp)) |
1272 | wake_up(&sdp->sd_logd_waitq); |
1273 | } |
1274 | |
1275 | /** |
1276 | * gfs2_log_shutdown - write a shutdown header into a journal |
1277 | * @sdp: the filesystem |
1278 | * |
1279 | */ |
1280 | |
1281 | static void gfs2_log_shutdown(struct gfs2_sbd *sdp) |
1282 | { |
1283 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); |
1284 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
1285 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); |
1286 | |
1287 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN); |
1288 | log_pull_tail(sdp); |
1289 | |
1290 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); |
1291 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); |
1292 | } |
1293 | |
1294 | /** |
1295 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks |
1296 | * @data: Pointer to GFS2 superblock |
1297 | * |
1298 | * Also, periodically check to make sure that we're using the most recent |
1299 | * journal index. |
1300 | */ |
1301 | |
1302 | int gfs2_logd(void *data) |
1303 | { |
1304 | struct gfs2_sbd *sdp = data; |
1305 | unsigned long t = 1; |
1306 | |
1307 | set_freezable(); |
1308 | while (!kthread_should_stop()) { |
1309 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
1310 | break; |
1311 | |
1312 | /* Check for errors writing to the journal */ |
1313 | if (sdp->sd_log_error) { |
1314 | gfs2_lm(sdp, |
1315 | fmt: "GFS2: fsid=%s: error %d: " |
1316 | "withdrawing the file system to " |
1317 | "prevent further damage.\n" , |
1318 | sdp->sd_fsname, sdp->sd_log_error); |
1319 | gfs2_withdraw(sdp); |
1320 | break; |
1321 | } |
1322 | |
1323 | if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { |
1324 | gfs2_ail1_empty(sdp, max_revokes: 0); |
1325 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | |
1326 | GFS2_LFC_LOGD_JFLUSH_REQD); |
1327 | } |
1328 | |
1329 | if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || |
1330 | gfs2_ail_flush_reqd(sdp)) { |
1331 | clear_bit(nr: SDF_FORCE_AIL_FLUSH, addr: &sdp->sd_flags); |
1332 | gfs2_ail1_start(sdp); |
1333 | gfs2_ail1_wait(sdp); |
1334 | gfs2_ail1_empty(sdp, max_revokes: 0); |
1335 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | |
1336 | GFS2_LFC_LOGD_AIL_FLUSH_REQD); |
1337 | } |
1338 | |
1339 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; |
1340 | |
1341 | t = wait_event_freezable_timeout(sdp->sd_logd_waitq, |
1342 | test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || |
1343 | gfs2_ail_flush_reqd(sdp) || |
1344 | gfs2_jrnl_flush_reqd(sdp) || |
1345 | sdp->sd_log_error || |
1346 | gfs2_withdrawing_or_withdrawn(sdp) || |
1347 | kthread_should_stop(), |
1348 | t); |
1349 | } |
1350 | |
1351 | if (gfs2_withdrawing(sdp)) |
1352 | gfs2_withdraw(sdp); |
1353 | |
1354 | return 0; |
1355 | } |
1356 | |
1357 | |