1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
4 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/sched.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/spinlock.h> |
10 | #include <linux/completion.h> |
11 | #include <linux/buffer_head.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/pagemap.h> |
14 | #include <linux/writeback.h> |
15 | #include <linux/swap.h> |
16 | #include <linux/delay.h> |
17 | #include <linux/bio.h> |
18 | #include <linux/gfs2_ondisk.h> |
19 | |
20 | #include "gfs2.h" |
21 | #include "incore.h" |
22 | #include "glock.h" |
23 | #include "glops.h" |
24 | #include "inode.h" |
25 | #include "log.h" |
26 | #include "lops.h" |
27 | #include "meta_io.h" |
28 | #include "rgrp.h" |
29 | #include "trans.h" |
30 | #include "util.h" |
31 | #include "trace_gfs2.h" |
32 | |
33 | static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) |
34 | { |
35 | struct buffer_head *bh, *head; |
36 | int nr_underway = 0; |
37 | blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); |
38 | |
39 | BUG_ON(!PageLocked(page)); |
40 | BUG_ON(!page_has_buffers(page)); |
41 | |
42 | head = page_buffers(page); |
43 | bh = head; |
44 | |
45 | do { |
46 | if (!buffer_mapped(bh)) |
47 | continue; |
48 | /* |
49 | * If it's a fully non-blocking write attempt and we cannot |
50 | * lock the buffer then redirty the page. Note that this can |
51 | * potentially cause a busy-wait loop from flusher thread and kswapd |
52 | * activity, but those code paths have their own higher-level |
53 | * throttling. |
54 | */ |
55 | if (wbc->sync_mode != WB_SYNC_NONE) { |
56 | lock_buffer(bh); |
57 | } else if (!trylock_buffer(bh)) { |
58 | redirty_page_for_writepage(wbc, page); |
59 | continue; |
60 | } |
61 | if (test_clear_buffer_dirty(bh)) { |
62 | mark_buffer_async_write(bh); |
63 | } else { |
64 | unlock_buffer(bh); |
65 | } |
66 | } while ((bh = bh->b_this_page) != head); |
67 | |
68 | /* |
69 | * The page and its buffers are protected by PageWriteback(), so we can |
70 | * drop the bh refcounts early. |
71 | */ |
72 | BUG_ON(PageWriteback(page)); |
73 | set_page_writeback(page); |
74 | |
75 | do { |
76 | struct buffer_head *next = bh->b_this_page; |
77 | if (buffer_async_write(bh)) { |
78 | submit_bh(REQ_OP_WRITE | write_flags, bh); |
79 | nr_underway++; |
80 | } |
81 | bh = next; |
82 | } while (bh != head); |
83 | unlock_page(page); |
84 | |
85 | if (nr_underway == 0) |
86 | end_page_writeback(page); |
87 | |
88 | return 0; |
89 | } |
90 | |
91 | const struct address_space_operations gfs2_meta_aops = { |
92 | .dirty_folio = block_dirty_folio, |
93 | .invalidate_folio = block_invalidate_folio, |
94 | .writepage = gfs2_aspace_writepage, |
95 | .release_folio = gfs2_release_folio, |
96 | }; |
97 | |
98 | const struct address_space_operations gfs2_rgrp_aops = { |
99 | .dirty_folio = block_dirty_folio, |
100 | .invalidate_folio = block_invalidate_folio, |
101 | .writepage = gfs2_aspace_writepage, |
102 | .release_folio = gfs2_release_folio, |
103 | }; |
104 | |
105 | /** |
106 | * gfs2_getbuf - Get a buffer with a given address space |
107 | * @gl: the glock |
108 | * @blkno: the block number (filesystem scope) |
109 | * @create: 1 if the buffer should be created |
110 | * |
111 | * Returns: the buffer |
112 | */ |
113 | |
114 | struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) |
115 | { |
116 | struct address_space *mapping = gfs2_glock2aspace(gl); |
117 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
118 | struct folio *folio; |
119 | struct buffer_head *bh; |
120 | unsigned int shift; |
121 | unsigned long index; |
122 | unsigned int bufnum; |
123 | |
124 | if (mapping == NULL) |
125 | mapping = &sdp->sd_aspace; |
126 | |
127 | shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; |
128 | index = blkno >> shift; /* convert block to page */ |
129 | bufnum = blkno - (index << shift); /* block buf index within page */ |
130 | |
131 | if (create) { |
132 | folio = __filemap_get_folio(mapping, index, |
133 | FGP_LOCK | FGP_ACCESSED | FGP_CREAT, |
134 | gfp: mapping_gfp_mask(mapping) | __GFP_NOFAIL); |
135 | bh = folio_buffers(folio); |
136 | if (!bh) |
137 | bh = create_empty_buffers(folio, |
138 | blocksize: sdp->sd_sb.sb_bsize, b_state: 0); |
139 | } else { |
140 | folio = __filemap_get_folio(mapping, index, |
141 | FGP_LOCK | FGP_ACCESSED, gfp: 0); |
142 | if (IS_ERR(ptr: folio)) |
143 | return NULL; |
144 | bh = folio_buffers(folio); |
145 | } |
146 | |
147 | if (!bh) |
148 | goto out_unlock; |
149 | |
150 | bh = get_nth_bh(bh, count: bufnum); |
151 | if (!buffer_mapped(bh)) |
152 | map_bh(bh, sb: sdp->sd_vfs, block: blkno); |
153 | |
154 | out_unlock: |
155 | folio_unlock(folio); |
156 | folio_put(folio); |
157 | |
158 | return bh; |
159 | } |
160 | |
161 | static void meta_prep_new(struct buffer_head *bh) |
162 | { |
163 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; |
164 | |
165 | lock_buffer(bh); |
166 | clear_buffer_dirty(bh); |
167 | set_buffer_uptodate(bh); |
168 | unlock_buffer(bh); |
169 | |
170 | mh->mh_magic = cpu_to_be32(GFS2_MAGIC); |
171 | } |
172 | |
173 | /** |
174 | * gfs2_meta_new - Get a block |
175 | * @gl: The glock associated with this block |
176 | * @blkno: The block number |
177 | * |
178 | * Returns: The buffer |
179 | */ |
180 | |
181 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) |
182 | { |
183 | struct buffer_head *bh; |
184 | bh = gfs2_getbuf(gl, blkno, create: CREATE); |
185 | meta_prep_new(bh); |
186 | return bh; |
187 | } |
188 | |
189 | static void gfs2_meta_read_endio(struct bio *bio) |
190 | { |
191 | struct bio_vec *bvec; |
192 | struct bvec_iter_all iter_all; |
193 | |
194 | bio_for_each_segment_all(bvec, bio, iter_all) { |
195 | struct page *page = bvec->bv_page; |
196 | struct buffer_head *bh = page_buffers(page); |
197 | unsigned int len = bvec->bv_len; |
198 | |
199 | while (bh_offset(bh) < bvec->bv_offset) |
200 | bh = bh->b_this_page; |
201 | do { |
202 | struct buffer_head *next = bh->b_this_page; |
203 | len -= bh->b_size; |
204 | bh->b_end_io(bh, !bio->bi_status); |
205 | bh = next; |
206 | } while (bh && len); |
207 | } |
208 | bio_put(bio); |
209 | } |
210 | |
211 | /* |
212 | * Submit several consecutive buffer head I/O requests as a single bio I/O |
213 | * request. (See submit_bh_wbc.) |
214 | */ |
215 | static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) |
216 | { |
217 | while (num > 0) { |
218 | struct buffer_head *bh = *bhs; |
219 | struct bio *bio; |
220 | |
221 | bio = bio_alloc(bdev: bh->b_bdev, nr_vecs: num, opf, GFP_NOIO); |
222 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
223 | while (num > 0) { |
224 | bh = *bhs; |
225 | if (!bio_add_page(bio, page: bh->b_page, len: bh->b_size, off: bh_offset(bh))) { |
226 | BUG_ON(bio->bi_iter.bi_size == 0); |
227 | break; |
228 | } |
229 | bhs++; |
230 | num--; |
231 | } |
232 | bio->bi_end_io = gfs2_meta_read_endio; |
233 | submit_bio(bio); |
234 | } |
235 | } |
236 | |
237 | /** |
238 | * gfs2_meta_read - Read a block from disk |
239 | * @gl: The glock covering the block |
240 | * @blkno: The block number |
241 | * @flags: flags |
242 | * @rahead: Do read-ahead |
243 | * @bhp: the place where the buffer is returned (NULL on failure) |
244 | * |
245 | * Returns: errno |
246 | */ |
247 | |
248 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, |
249 | int rahead, struct buffer_head **bhp) |
250 | { |
251 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
252 | struct buffer_head *bh, *bhs[2]; |
253 | int num = 0; |
254 | |
255 | if (gfs2_withdrawing_or_withdrawn(sdp) && |
256 | !gfs2_withdraw_in_prog(sdp)) { |
257 | *bhp = NULL; |
258 | return -EIO; |
259 | } |
260 | |
261 | *bhp = bh = gfs2_getbuf(gl, blkno, create: CREATE); |
262 | |
263 | lock_buffer(bh); |
264 | if (buffer_uptodate(bh)) { |
265 | unlock_buffer(bh); |
266 | flags &= ~DIO_WAIT; |
267 | } else { |
268 | bh->b_end_io = end_buffer_read_sync; |
269 | get_bh(bh); |
270 | bhs[num++] = bh; |
271 | } |
272 | |
273 | if (rahead) { |
274 | bh = gfs2_getbuf(gl, blkno: blkno + 1, create: CREATE); |
275 | |
276 | lock_buffer(bh); |
277 | if (buffer_uptodate(bh)) { |
278 | unlock_buffer(bh); |
279 | brelse(bh); |
280 | } else { |
281 | bh->b_end_io = end_buffer_read_sync; |
282 | bhs[num++] = bh; |
283 | } |
284 | } |
285 | |
286 | gfs2_submit_bhs(opf: REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num); |
287 | if (!(flags & DIO_WAIT)) |
288 | return 0; |
289 | |
290 | bh = *bhp; |
291 | wait_on_buffer(bh); |
292 | if (unlikely(!buffer_uptodate(bh))) { |
293 | struct gfs2_trans *tr = current->journal_info; |
294 | if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) |
295 | gfs2_io_error_bh_wd(sdp, bh); |
296 | brelse(bh); |
297 | *bhp = NULL; |
298 | return -EIO; |
299 | } |
300 | |
301 | return 0; |
302 | } |
303 | |
304 | /** |
305 | * gfs2_meta_wait - Reread a block from disk |
306 | * @sdp: the filesystem |
307 | * @bh: The block to wait for |
308 | * |
309 | * Returns: errno |
310 | */ |
311 | |
312 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) |
313 | { |
314 | if (gfs2_withdrawing_or_withdrawn(sdp) && |
315 | !gfs2_withdraw_in_prog(sdp)) |
316 | return -EIO; |
317 | |
318 | wait_on_buffer(bh); |
319 | |
320 | if (!buffer_uptodate(bh)) { |
321 | struct gfs2_trans *tr = current->journal_info; |
322 | if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) |
323 | gfs2_io_error_bh_wd(sdp, bh); |
324 | return -EIO; |
325 | } |
326 | if (gfs2_withdrawing_or_withdrawn(sdp) && |
327 | !gfs2_withdraw_in_prog(sdp)) |
328 | return -EIO; |
329 | |
330 | return 0; |
331 | } |
332 | |
333 | void gfs2_remove_from_journal(struct buffer_head *bh, int meta) |
334 | { |
335 | struct address_space *mapping = bh->b_folio->mapping; |
336 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); |
337 | struct gfs2_bufdata *bd = bh->b_private; |
338 | struct gfs2_trans *tr = current->journal_info; |
339 | int was_pinned = 0; |
340 | |
341 | if (test_clear_buffer_pinned(bh)) { |
342 | trace_gfs2_pin(bd, pin: 0); |
343 | atomic_dec(v: &sdp->sd_log_pinned); |
344 | list_del_init(entry: &bd->bd_list); |
345 | if (meta == REMOVE_META) |
346 | tr->tr_num_buf_rm++; |
347 | else |
348 | tr->tr_num_databuf_rm++; |
349 | set_bit(nr: TR_TOUCHED, addr: &tr->tr_flags); |
350 | was_pinned = 1; |
351 | brelse(bh); |
352 | } |
353 | if (bd) { |
354 | if (bd->bd_tr) { |
355 | gfs2_trans_add_revoke(sdp, bd); |
356 | } else if (was_pinned) { |
357 | bh->b_private = NULL; |
358 | kmem_cache_free(s: gfs2_bufdata_cachep, objp: bd); |
359 | } else if (!list_empty(head: &bd->bd_ail_st_list) && |
360 | !list_empty(head: &bd->bd_ail_gl_list)) { |
361 | gfs2_remove_from_ail(bd); |
362 | } |
363 | } |
364 | clear_buffer_dirty(bh); |
365 | clear_buffer_uptodate(bh); |
366 | } |
367 | |
368 | /** |
369 | * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list |
370 | * @sdp: superblock |
371 | * @bstart: starting block address of buffers to remove |
372 | * @blen: length of buffers to be removed |
373 | * |
374 | * This function is called from gfs2_journal wipe, whose job is to remove |
375 | * buffers, corresponding to deleted blocks, from the journal. If we find any |
376 | * bufdata elements on the system ail1 list, they haven't been written to |
377 | * the journal yet. So we remove them. |
378 | */ |
379 | static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen) |
380 | { |
381 | struct gfs2_trans *tr, *s; |
382 | struct gfs2_bufdata *bd, *bs; |
383 | struct buffer_head *bh; |
384 | u64 end = bstart + blen; |
385 | |
386 | gfs2_log_lock(sdp); |
387 | spin_lock(lock: &sdp->sd_ail_lock); |
388 | list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) { |
389 | list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list, |
390 | bd_ail_st_list) { |
391 | bh = bd->bd_bh; |
392 | if (bh->b_blocknr < bstart || bh->b_blocknr >= end) |
393 | continue; |
394 | |
395 | gfs2_remove_from_journal(bh, meta: REMOVE_JDATA); |
396 | } |
397 | } |
398 | spin_unlock(lock: &sdp->sd_ail_lock); |
399 | gfs2_log_unlock(sdp); |
400 | } |
401 | |
402 | static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno) |
403 | { |
404 | struct address_space *mapping = ip->i_inode.i_mapping; |
405 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
406 | struct folio *folio; |
407 | struct buffer_head *bh; |
408 | unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; |
409 | unsigned long index = blkno >> shift; /* convert block to page */ |
410 | unsigned int bufnum = blkno - (index << shift); |
411 | |
412 | folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, gfp: 0); |
413 | if (IS_ERR(ptr: folio)) |
414 | return NULL; |
415 | bh = folio_buffers(folio); |
416 | if (bh) |
417 | bh = get_nth_bh(bh, count: bufnum); |
418 | folio_unlock(folio); |
419 | folio_put(folio); |
420 | return bh; |
421 | } |
422 | |
423 | /** |
424 | * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore |
425 | * @ip: the inode who owns the buffers |
426 | * @bstart: the first buffer in the run |
427 | * @blen: the number of buffers in the run |
428 | * |
429 | */ |
430 | |
431 | void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) |
432 | { |
433 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
434 | struct buffer_head *bh; |
435 | int ty; |
436 | |
437 | if (!ip->i_gl) { |
438 | /* This can only happen during incomplete inode creation. */ |
439 | BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); |
440 | return; |
441 | } |
442 | |
443 | gfs2_ail1_wipe(sdp, bstart, blen); |
444 | while (blen) { |
445 | ty = REMOVE_META; |
446 | bh = gfs2_getbuf(gl: ip->i_gl, blkno: bstart, create: NO_CREATE); |
447 | if (!bh && gfs2_is_jdata(ip)) { |
448 | bh = gfs2_getjdatabuf(ip, blkno: bstart); |
449 | ty = REMOVE_JDATA; |
450 | } |
451 | if (bh) { |
452 | lock_buffer(bh); |
453 | gfs2_log_lock(sdp); |
454 | spin_lock(lock: &sdp->sd_ail_lock); |
455 | gfs2_remove_from_journal(bh, meta: ty); |
456 | spin_unlock(lock: &sdp->sd_ail_lock); |
457 | gfs2_log_unlock(sdp); |
458 | unlock_buffer(bh); |
459 | brelse(bh); |
460 | } |
461 | |
462 | bstart++; |
463 | blen--; |
464 | } |
465 | } |
466 | |
467 | /** |
468 | * gfs2_meta_buffer - Get a metadata buffer |
469 | * @ip: The GFS2 inode |
470 | * @mtype: The block type (GFS2_METATYPE_*) |
471 | * @num: The block number (device relative) of the buffer |
472 | * @bhp: the buffer is returned here |
473 | * |
474 | * Returns: errno |
475 | */ |
476 | |
477 | int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, |
478 | struct buffer_head **bhp) |
479 | { |
480 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
481 | struct gfs2_glock *gl = ip->i_gl; |
482 | struct buffer_head *bh; |
483 | int ret = 0; |
484 | int rahead = 0; |
485 | |
486 | if (num == ip->i_no_addr) |
487 | rahead = ip->i_rahead; |
488 | |
489 | ret = gfs2_meta_read(gl, blkno: num, DIO_WAIT, rahead, bhp: &bh); |
490 | if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { |
491 | brelse(bh); |
492 | ret = -EIO; |
493 | } else { |
494 | *bhp = bh; |
495 | } |
496 | return ret; |
497 | } |
498 | |
499 | /** |
500 | * gfs2_meta_ra - start readahead on an extent of a file |
501 | * @gl: the glock the blocks belong to |
502 | * @dblock: the starting disk block |
503 | * @extlen: the number of blocks in the extent |
504 | * |
505 | * returns: the first buffer in the extent |
506 | */ |
507 | |
508 | struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) |
509 | { |
510 | struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; |
511 | struct buffer_head *first_bh, *bh; |
512 | u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> |
513 | sdp->sd_sb.sb_bsize_shift; |
514 | |
515 | BUG_ON(!extlen); |
516 | |
517 | if (max_ra < 1) |
518 | max_ra = 1; |
519 | if (extlen > max_ra) |
520 | extlen = max_ra; |
521 | |
522 | first_bh = gfs2_getbuf(gl, blkno: dblock, create: CREATE); |
523 | |
524 | if (buffer_uptodate(bh: first_bh)) |
525 | goto out; |
526 | bh_read_nowait(bh: first_bh, REQ_META | REQ_PRIO); |
527 | |
528 | dblock++; |
529 | extlen--; |
530 | |
531 | while (extlen) { |
532 | bh = gfs2_getbuf(gl, blkno: dblock, create: CREATE); |
533 | |
534 | bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO); |
535 | brelse(bh); |
536 | dblock++; |
537 | extlen--; |
538 | if (!buffer_locked(bh: first_bh) && buffer_uptodate(bh: first_bh)) |
539 | goto out; |
540 | } |
541 | |
542 | wait_on_buffer(bh: first_bh); |
543 | out: |
544 | return first_bh; |
545 | } |
546 | |
547 | |