1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Buffer/page management specific to NILFS |
4 | * |
5 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. |
6 | * |
7 | * Written by Ryusuke Konishi and Seiji Kihara. |
8 | */ |
9 | |
10 | #include <linux/pagemap.h> |
11 | #include <linux/writeback.h> |
12 | #include <linux/swap.h> |
13 | #include <linux/bitops.h> |
14 | #include <linux/page-flags.h> |
15 | #include <linux/list.h> |
16 | #include <linux/highmem.h> |
17 | #include <linux/pagevec.h> |
18 | #include <linux/gfp.h> |
19 | #include "nilfs.h" |
20 | #include "page.h" |
21 | #include "mdt.h" |
22 | |
23 | |
24 | #define NILFS_BUFFER_INHERENT_BITS \ |
25 | (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ |
26 | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) |
27 | |
28 | static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, |
29 | unsigned long block, pgoff_t index, int blkbits, |
30 | unsigned long b_state) |
31 | |
32 | { |
33 | unsigned long first_block; |
34 | struct buffer_head *bh = folio_buffers(folio); |
35 | |
36 | if (!bh) |
37 | bh = create_empty_buffers(folio, blocksize: 1 << blkbits, b_state); |
38 | |
39 | first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); |
40 | bh = get_nth_bh(bh, count: block - first_block); |
41 | |
42 | touch_buffer(bh); |
43 | wait_on_buffer(bh); |
44 | return bh; |
45 | } |
46 | |
47 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, |
48 | struct address_space *mapping, |
49 | unsigned long blkoff, |
50 | unsigned long b_state) |
51 | { |
52 | int blkbits = inode->i_blkbits; |
53 | pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); |
54 | struct folio *folio; |
55 | struct buffer_head *bh; |
56 | |
57 | folio = filemap_grab_folio(mapping, index); |
58 | if (IS_ERR(ptr: folio)) |
59 | return NULL; |
60 | |
61 | bh = __nilfs_get_folio_block(folio, block: blkoff, index, blkbits, b_state); |
62 | if (unlikely(!bh)) { |
63 | folio_unlock(folio); |
64 | folio_put(folio); |
65 | return NULL; |
66 | } |
67 | return bh; |
68 | } |
69 | |
70 | /** |
71 | * nilfs_forget_buffer - discard dirty state |
72 | * @bh: buffer head of the buffer to be discarded |
73 | */ |
74 | void nilfs_forget_buffer(struct buffer_head *bh) |
75 | { |
76 | struct folio *folio = bh->b_folio; |
77 | const unsigned long clear_bits = |
78 | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | |
79 | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | |
80 | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); |
81 | |
82 | lock_buffer(bh); |
83 | set_mask_bits(&bh->b_state, clear_bits, 0); |
84 | if (nilfs_folio_buffers_clean(folio)) |
85 | __nilfs_clear_folio_dirty(folio); |
86 | |
87 | bh->b_blocknr = -1; |
88 | folio_clear_uptodate(folio); |
89 | folio_clear_mappedtodisk(folio); |
90 | unlock_buffer(bh); |
91 | brelse(bh); |
92 | } |
93 | |
94 | /** |
95 | * nilfs_copy_buffer -- copy buffer data and flags |
96 | * @dbh: destination buffer |
97 | * @sbh: source buffer |
98 | */ |
99 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) |
100 | { |
101 | void *kaddr0, *kaddr1; |
102 | unsigned long bits; |
103 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; |
104 | struct buffer_head *bh; |
105 | |
106 | kaddr0 = kmap_local_page(page: spage); |
107 | kaddr1 = kmap_local_page(page: dpage); |
108 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); |
109 | kunmap_local(kaddr1); |
110 | kunmap_local(kaddr0); |
111 | |
112 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; |
113 | dbh->b_blocknr = sbh->b_blocknr; |
114 | dbh->b_bdev = sbh->b_bdev; |
115 | |
116 | bh = dbh; |
117 | bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped)); |
118 | while ((bh = bh->b_this_page) != dbh) { |
119 | lock_buffer(bh); |
120 | bits &= bh->b_state; |
121 | unlock_buffer(bh); |
122 | } |
123 | if (bits & BIT(BH_Uptodate)) |
124 | SetPageUptodate(dpage); |
125 | else |
126 | ClearPageUptodate(page: dpage); |
127 | if (bits & BIT(BH_Mapped)) |
128 | SetPageMappedToDisk(dpage); |
129 | else |
130 | ClearPageMappedToDisk(page: dpage); |
131 | } |
132 | |
133 | /** |
134 | * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. |
135 | * @folio: Folio to be checked. |
136 | * |
137 | * nilfs_folio_buffers_clean() returns false if the folio has dirty buffers. |
138 | * Otherwise, it returns true. |
139 | */ |
140 | bool nilfs_folio_buffers_clean(struct folio *folio) |
141 | { |
142 | struct buffer_head *bh, *head; |
143 | |
144 | bh = head = folio_buffers(folio); |
145 | do { |
146 | if (buffer_dirty(bh)) |
147 | return false; |
148 | bh = bh->b_this_page; |
149 | } while (bh != head); |
150 | return true; |
151 | } |
152 | |
153 | void nilfs_folio_bug(struct folio *folio) |
154 | { |
155 | struct buffer_head *bh, *head; |
156 | struct address_space *m; |
157 | unsigned long ino; |
158 | |
159 | if (unlikely(!folio)) { |
160 | printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n" ); |
161 | return; |
162 | } |
163 | |
164 | m = folio->mapping; |
165 | ino = m ? m->host->i_ino : 0; |
166 | |
167 | printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx " |
168 | "mapping=%p ino=%lu\n" , |
169 | folio, folio_ref_count(folio), |
170 | (unsigned long long)folio->index, folio->flags, m, ino); |
171 | |
172 | head = folio_buffers(folio); |
173 | if (head) { |
174 | int i = 0; |
175 | |
176 | bh = head; |
177 | do { |
178 | printk(KERN_CRIT |
179 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n" , |
180 | i++, bh, atomic_read(&bh->b_count), |
181 | (unsigned long long)bh->b_blocknr, bh->b_state); |
182 | bh = bh->b_this_page; |
183 | } while (bh != head); |
184 | } |
185 | } |
186 | |
187 | /** |
188 | * nilfs_copy_folio -- copy the folio with buffers |
189 | * @dst: destination folio |
190 | * @src: source folio |
191 | * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. |
192 | * |
193 | * This function is for both data folios and btnode folios. The dirty flag |
194 | * should be treated by caller. The folio must not be under i/o. |
195 | * Both src and dst folio must be locked |
196 | */ |
197 | static void nilfs_copy_folio(struct folio *dst, struct folio *src, |
198 | bool copy_dirty) |
199 | { |
200 | struct buffer_head *dbh, *dbufs, *sbh; |
201 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; |
202 | |
203 | BUG_ON(folio_test_writeback(dst)); |
204 | |
205 | sbh = folio_buffers(src); |
206 | dbh = folio_buffers(dst); |
207 | if (!dbh) |
208 | dbh = create_empty_buffers(folio: dst, blocksize: sbh->b_size, b_state: 0); |
209 | |
210 | if (copy_dirty) |
211 | mask |= BIT(BH_Dirty); |
212 | |
213 | dbufs = dbh; |
214 | do { |
215 | lock_buffer(bh: sbh); |
216 | lock_buffer(bh: dbh); |
217 | dbh->b_state = sbh->b_state & mask; |
218 | dbh->b_blocknr = sbh->b_blocknr; |
219 | dbh->b_bdev = sbh->b_bdev; |
220 | sbh = sbh->b_this_page; |
221 | dbh = dbh->b_this_page; |
222 | } while (dbh != dbufs); |
223 | |
224 | folio_copy(dst, src); |
225 | |
226 | if (folio_test_uptodate(folio: src) && !folio_test_uptodate(folio: dst)) |
227 | folio_mark_uptodate(folio: dst); |
228 | else if (!folio_test_uptodate(folio: src) && folio_test_uptodate(folio: dst)) |
229 | folio_clear_uptodate(folio: dst); |
230 | if (folio_test_mappedtodisk(folio: src) && !folio_test_mappedtodisk(folio: dst)) |
231 | folio_set_mappedtodisk(folio: dst); |
232 | else if (!folio_test_mappedtodisk(folio: src) && folio_test_mappedtodisk(folio: dst)) |
233 | folio_clear_mappedtodisk(folio: dst); |
234 | |
235 | do { |
236 | unlock_buffer(bh: sbh); |
237 | unlock_buffer(bh: dbh); |
238 | sbh = sbh->b_this_page; |
239 | dbh = dbh->b_this_page; |
240 | } while (dbh != dbufs); |
241 | } |
242 | |
243 | int nilfs_copy_dirty_pages(struct address_space *dmap, |
244 | struct address_space *smap) |
245 | { |
246 | struct folio_batch fbatch; |
247 | unsigned int i; |
248 | pgoff_t index = 0; |
249 | int err = 0; |
250 | |
251 | folio_batch_init(fbatch: &fbatch); |
252 | repeat: |
253 | if (!filemap_get_folios_tag(mapping: smap, start: &index, end: (pgoff_t)-1, |
254 | PAGECACHE_TAG_DIRTY, fbatch: &fbatch)) |
255 | return 0; |
256 | |
257 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
258 | struct folio *folio = fbatch.folios[i], *dfolio; |
259 | |
260 | folio_lock(folio); |
261 | if (unlikely(!folio_test_dirty(folio))) |
262 | NILFS_FOLIO_BUG(folio, "inconsistent dirty state" ); |
263 | |
264 | dfolio = filemap_grab_folio(mapping: dmap, index: folio->index); |
265 | if (unlikely(IS_ERR(dfolio))) { |
266 | /* No empty page is added to the page cache */ |
267 | folio_unlock(folio); |
268 | err = PTR_ERR(ptr: dfolio); |
269 | break; |
270 | } |
271 | if (unlikely(!folio_buffers(folio))) |
272 | NILFS_FOLIO_BUG(folio, |
273 | "found empty page in dat page cache" ); |
274 | |
275 | nilfs_copy_folio(dst: dfolio, src: folio, copy_dirty: true); |
276 | filemap_dirty_folio(mapping: folio_mapping(dfolio), folio: dfolio); |
277 | |
278 | folio_unlock(folio: dfolio); |
279 | folio_put(folio: dfolio); |
280 | folio_unlock(folio); |
281 | } |
282 | folio_batch_release(fbatch: &fbatch); |
283 | cond_resched(); |
284 | |
285 | if (likely(!err)) |
286 | goto repeat; |
287 | return err; |
288 | } |
289 | |
290 | /** |
291 | * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache |
292 | * @dmap: destination page cache |
293 | * @smap: source page cache |
294 | * |
295 | * No pages must be added to the cache during this process. |
296 | * This must be ensured by the caller. |
297 | */ |
298 | void nilfs_copy_back_pages(struct address_space *dmap, |
299 | struct address_space *smap) |
300 | { |
301 | struct folio_batch fbatch; |
302 | unsigned int i, n; |
303 | pgoff_t start = 0; |
304 | |
305 | folio_batch_init(fbatch: &fbatch); |
306 | repeat: |
307 | n = filemap_get_folios(mapping: smap, start: &start, end: ~0UL, fbatch: &fbatch); |
308 | if (!n) |
309 | return; |
310 | |
311 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
312 | struct folio *folio = fbatch.folios[i], *dfolio; |
313 | pgoff_t index = folio->index; |
314 | |
315 | folio_lock(folio); |
316 | dfolio = filemap_lock_folio(mapping: dmap, index); |
317 | if (!IS_ERR(ptr: dfolio)) { |
318 | /* overwrite existing folio in the destination cache */ |
319 | WARN_ON(folio_test_dirty(dfolio)); |
320 | nilfs_copy_folio(dst: dfolio, src: folio, copy_dirty: false); |
321 | folio_unlock(folio: dfolio); |
322 | folio_put(folio: dfolio); |
323 | /* Do we not need to remove folio from smap here? */ |
324 | } else { |
325 | struct folio *f; |
326 | |
327 | /* move the folio to the destination cache */ |
328 | xa_lock_irq(&smap->i_pages); |
329 | f = __xa_erase(&smap->i_pages, index); |
330 | WARN_ON(folio != f); |
331 | smap->nrpages--; |
332 | xa_unlock_irq(&smap->i_pages); |
333 | |
334 | xa_lock_irq(&dmap->i_pages); |
335 | f = __xa_store(&dmap->i_pages, index, entry: folio, GFP_NOFS); |
336 | if (unlikely(f)) { |
337 | /* Probably -ENOMEM */ |
338 | folio->mapping = NULL; |
339 | folio_put(folio); |
340 | } else { |
341 | folio->mapping = dmap; |
342 | dmap->nrpages++; |
343 | if (folio_test_dirty(folio)) |
344 | __xa_set_mark(&dmap->i_pages, index, |
345 | PAGECACHE_TAG_DIRTY); |
346 | } |
347 | xa_unlock_irq(&dmap->i_pages); |
348 | } |
349 | folio_unlock(folio); |
350 | } |
351 | folio_batch_release(fbatch: &fbatch); |
352 | cond_resched(); |
353 | |
354 | goto repeat; |
355 | } |
356 | |
357 | /** |
358 | * nilfs_clear_dirty_pages - discard dirty pages in address space |
359 | * @mapping: address space with dirty pages for discarding |
360 | * @silent: suppress [true] or print [false] warning messages |
361 | */ |
362 | void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) |
363 | { |
364 | struct folio_batch fbatch; |
365 | unsigned int i; |
366 | pgoff_t index = 0; |
367 | |
368 | folio_batch_init(fbatch: &fbatch); |
369 | |
370 | while (filemap_get_folios_tag(mapping, start: &index, end: (pgoff_t)-1, |
371 | PAGECACHE_TAG_DIRTY, fbatch: &fbatch)) { |
372 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
373 | struct folio *folio = fbatch.folios[i]; |
374 | |
375 | folio_lock(folio); |
376 | |
377 | /* |
378 | * This folio may have been removed from the address |
379 | * space by truncation or invalidation when the lock |
380 | * was acquired. Skip processing in that case. |
381 | */ |
382 | if (likely(folio->mapping == mapping)) |
383 | nilfs_clear_folio_dirty(folio, silent); |
384 | |
385 | folio_unlock(folio); |
386 | } |
387 | folio_batch_release(fbatch: &fbatch); |
388 | cond_resched(); |
389 | } |
390 | } |
391 | |
392 | /** |
393 | * nilfs_clear_folio_dirty - discard dirty folio |
394 | * @folio: dirty folio that will be discarded |
395 | * @silent: suppress [true] or print [false] warning messages |
396 | */ |
397 | void nilfs_clear_folio_dirty(struct folio *folio, bool silent) |
398 | { |
399 | struct inode *inode = folio->mapping->host; |
400 | struct super_block *sb = inode->i_sb; |
401 | struct buffer_head *bh, *head; |
402 | |
403 | BUG_ON(!folio_test_locked(folio)); |
404 | |
405 | if (!silent) |
406 | nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu" , |
407 | folio_pos(folio), inode->i_ino); |
408 | |
409 | folio_clear_uptodate(folio); |
410 | folio_clear_mappedtodisk(folio); |
411 | |
412 | head = folio_buffers(folio); |
413 | if (head) { |
414 | const unsigned long clear_bits = |
415 | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | |
416 | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | |
417 | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); |
418 | |
419 | bh = head; |
420 | do { |
421 | lock_buffer(bh); |
422 | if (!silent) |
423 | nilfs_warn(sb, |
424 | "discard dirty block: blocknr=%llu, size=%zu" , |
425 | (u64)bh->b_blocknr, bh->b_size); |
426 | |
427 | set_mask_bits(&bh->b_state, clear_bits, 0); |
428 | unlock_buffer(bh); |
429 | } while (bh = bh->b_this_page, bh != head); |
430 | } |
431 | |
432 | __nilfs_clear_folio_dirty(folio); |
433 | } |
434 | |
435 | unsigned int nilfs_page_count_clean_buffers(struct page *page, |
436 | unsigned int from, unsigned int to) |
437 | { |
438 | unsigned int block_start, block_end; |
439 | struct buffer_head *bh, *head; |
440 | unsigned int nc = 0; |
441 | |
442 | for (bh = head = page_buffers(page), block_start = 0; |
443 | bh != head || !block_start; |
444 | block_start = block_end, bh = bh->b_this_page) { |
445 | block_end = block_start + bh->b_size; |
446 | if (block_end > from && block_start < to && !buffer_dirty(bh)) |
447 | nc++; |
448 | } |
449 | return nc; |
450 | } |
451 | |
452 | /* |
453 | * NILFS2 needs clear_page_dirty() in the following two cases: |
454 | * |
455 | * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty |
456 | * flag of pages when it copies back pages from shadow cache to the |
457 | * original cache. |
458 | * |
459 | * 2) Some B-tree operations like insertion or deletion may dispose buffers |
460 | * in dirty state, and this needs to cancel the dirty state of their pages. |
461 | */ |
462 | void __nilfs_clear_folio_dirty(struct folio *folio) |
463 | { |
464 | struct address_space *mapping = folio->mapping; |
465 | |
466 | if (mapping) { |
467 | xa_lock_irq(&mapping->i_pages); |
468 | if (folio_test_dirty(folio)) { |
469 | __xa_clear_mark(&mapping->i_pages, index: folio->index, |
470 | PAGECACHE_TAG_DIRTY); |
471 | xa_unlock_irq(&mapping->i_pages); |
472 | folio_clear_dirty_for_io(folio); |
473 | return; |
474 | } |
475 | xa_unlock_irq(&mapping->i_pages); |
476 | return; |
477 | } |
478 | folio_clear_dirty(folio); |
479 | } |
480 | |
481 | /** |
482 | * nilfs_find_uncommitted_extent - find extent of uncommitted data |
483 | * @inode: inode |
484 | * @start_blk: start block offset (in) |
485 | * @blkoff: start offset of the found extent (out) |
486 | * |
487 | * This function searches an extent of buffers marked "delayed" which |
488 | * starts from a block offset equal to or larger than @start_blk. If |
489 | * such an extent was found, this will store the start offset in |
490 | * @blkoff and return its length in blocks. Otherwise, zero is |
491 | * returned. |
492 | */ |
493 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, |
494 | sector_t start_blk, |
495 | sector_t *blkoff) |
496 | { |
497 | unsigned int i, nr_folios; |
498 | pgoff_t index; |
499 | unsigned long length = 0; |
500 | struct folio_batch fbatch; |
501 | struct folio *folio; |
502 | |
503 | if (inode->i_mapping->nrpages == 0) |
504 | return 0; |
505 | |
506 | index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); |
507 | |
508 | folio_batch_init(fbatch: &fbatch); |
509 | |
510 | repeat: |
511 | nr_folios = filemap_get_folios_contig(mapping: inode->i_mapping, start: &index, ULONG_MAX, |
512 | fbatch: &fbatch); |
513 | if (nr_folios == 0) |
514 | return length; |
515 | |
516 | i = 0; |
517 | do { |
518 | folio = fbatch.folios[i]; |
519 | |
520 | folio_lock(folio); |
521 | if (folio_buffers(folio)) { |
522 | struct buffer_head *bh, *head; |
523 | sector_t b; |
524 | |
525 | b = folio->index << (PAGE_SHIFT - inode->i_blkbits); |
526 | bh = head = folio_buffers(folio); |
527 | do { |
528 | if (b < start_blk) |
529 | continue; |
530 | if (buffer_delay(bh)) { |
531 | if (length == 0) |
532 | *blkoff = b; |
533 | length++; |
534 | } else if (length > 0) { |
535 | goto out_locked; |
536 | } |
537 | } while (++b, bh = bh->b_this_page, bh != head); |
538 | } else { |
539 | if (length > 0) |
540 | goto out_locked; |
541 | } |
542 | folio_unlock(folio); |
543 | |
544 | } while (++i < nr_folios); |
545 | |
546 | folio_batch_release(fbatch: &fbatch); |
547 | cond_resched(); |
548 | goto repeat; |
549 | |
550 | out_locked: |
551 | folio_unlock(folio); |
552 | folio_batch_release(fbatch: &fbatch); |
553 | return length; |
554 | } |
555 | |