1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <linux/slab.h> |
4 | #include "messages.h" |
5 | #include "subpage.h" |
6 | #include "btrfs_inode.h" |
7 | |
8 | /* |
9 | * Subpage (block size < folio size) support overview: |
10 | * |
11 | * Limitations: |
12 | * |
13 | * - Only support 64K page size for now |
14 | * This is to make metadata handling easier, as 64K page would ensure |
15 | * all nodesize would fit inside one page, thus we don't need to handle |
16 | * cases where a tree block crosses several pages. |
17 | * |
18 | * - Only metadata read-write for now |
19 | * The data read-write part is in development. |
20 | * |
21 | * - Metadata can't cross 64K page boundary |
22 | * btrfs-progs and kernel have done that for a while, thus only ancient |
23 | * filesystems could have such problem. For such case, do a graceful |
24 | * rejection. |
25 | * |
26 | * Special behavior: |
27 | * |
28 | * - Metadata |
29 | * Metadata read is fully supported. |
30 | * Meaning when reading one tree block will only trigger the read for the |
31 | * needed range, other unrelated range in the same page will not be touched. |
32 | * |
33 | * Metadata write support is partial. |
34 | * The writeback is still for the full page, but we will only submit |
35 | * the dirty extent buffers in the page. |
36 | * |
37 | * This means, if we have a metadata page like this: |
38 | * |
39 | * Page offset |
40 | * 0 16K 32K 48K 64K |
41 | * |/////////| |///////////| |
42 | * \- Tree block A \- Tree block B |
43 | * |
44 | * Even if we just want to writeback tree block A, we will also writeback |
45 | * tree block B if it's also dirty. |
46 | * |
47 | * This may cause extra metadata writeback which results more COW. |
48 | * |
49 | * Implementation: |
50 | * |
51 | * - Common |
52 | * Both metadata and data will use a new structure, btrfs_subpage, to |
53 | * record the status of each sector inside a page. This provides the extra |
54 | * granularity needed. |
55 | * |
56 | * - Metadata |
57 | * Since we have multiple tree blocks inside one page, we can't rely on page |
58 | * locking anymore, or we will have greatly reduced concurrency or even |
59 | * deadlocks (hold one tree lock while trying to lock another tree lock in |
60 | * the same page). |
61 | * |
62 | * Thus for metadata locking, subpage support relies on io_tree locking only. |
63 | * This means a slightly higher tree locking latency. |
64 | */ |
65 | |
66 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
67 | struct folio *folio, enum btrfs_subpage_type type) |
68 | { |
69 | struct btrfs_subpage *subpage; |
70 | |
71 | /* For metadata we don't support large folio yet. */ |
72 | if (type == BTRFS_SUBPAGE_METADATA) |
73 | ASSERT(!folio_test_large(folio)); |
74 | |
75 | /* |
76 | * We have cases like a dummy extent buffer page, which is not mapped |
77 | * and doesn't need to be locked. |
78 | */ |
79 | if (folio->mapping) |
80 | ASSERT(folio_test_locked(folio)); |
81 | |
82 | /* Either not subpage, or the folio already has private attached. */ |
83 | if (folio_test_private(folio)) |
84 | return 0; |
85 | if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) |
86 | return 0; |
87 | if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) |
88 | return 0; |
89 | |
90 | subpage = btrfs_alloc_subpage(fs_info, fsize: folio_size(folio), type); |
91 | if (IS_ERR(ptr: subpage)) |
92 | return PTR_ERR(ptr: subpage); |
93 | |
94 | folio_attach_private(folio, data: subpage); |
95 | return 0; |
96 | } |
97 | |
98 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, |
99 | enum btrfs_subpage_type type) |
100 | { |
101 | struct btrfs_subpage *subpage; |
102 | |
103 | /* Either not subpage, or the folio already has private attached. */ |
104 | if (!folio_test_private(folio)) |
105 | return; |
106 | if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) |
107 | return; |
108 | if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) |
109 | return; |
110 | |
111 | subpage = folio_detach_private(folio); |
112 | ASSERT(subpage); |
113 | btrfs_free_subpage(subpage); |
114 | } |
115 | |
116 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
117 | size_t fsize, enum btrfs_subpage_type type) |
118 | { |
119 | struct btrfs_subpage *ret; |
120 | unsigned int real_size; |
121 | |
122 | ASSERT(fs_info->sectorsize < fsize); |
123 | |
124 | real_size = struct_size(ret, bitmaps, |
125 | BITS_TO_LONGS(btrfs_bitmap_nr_max * |
126 | (fsize >> fs_info->sectorsize_bits))); |
127 | ret = kzalloc(real_size, GFP_NOFS); |
128 | if (!ret) |
129 | return ERR_PTR(error: -ENOMEM); |
130 | |
131 | spin_lock_init(&ret->lock); |
132 | if (type == BTRFS_SUBPAGE_METADATA) |
133 | atomic_set(v: &ret->eb_refs, i: 0); |
134 | else |
135 | atomic_set(v: &ret->nr_locked, i: 0); |
136 | return ret; |
137 | } |
138 | |
139 | void btrfs_free_subpage(struct btrfs_subpage *subpage) |
140 | { |
141 | kfree(objp: subpage); |
142 | } |
143 | |
144 | /* |
145 | * Increase the eb_refs of current subpage. |
146 | * |
147 | * This is important for eb allocation, to prevent race with last eb freeing |
148 | * of the same page. |
149 | * With the eb_refs increased before the eb inserted into radix tree, |
150 | * detach_extent_buffer_page() won't detach the folio private while we're still |
151 | * allocating the extent buffer. |
152 | */ |
153 | void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
154 | { |
155 | struct btrfs_subpage *subpage; |
156 | |
157 | if (!btrfs_meta_is_subpage(fs_info)) |
158 | return; |
159 | |
160 | ASSERT(folio_test_private(folio) && folio->mapping); |
161 | lockdep_assert_held(&folio->mapping->i_private_lock); |
162 | |
163 | subpage = folio_get_private(folio); |
164 | atomic_inc(v: &subpage->eb_refs); |
165 | } |
166 | |
167 | void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
168 | { |
169 | struct btrfs_subpage *subpage; |
170 | |
171 | if (!btrfs_meta_is_subpage(fs_info)) |
172 | return; |
173 | |
174 | ASSERT(folio_test_private(folio) && folio->mapping); |
175 | lockdep_assert_held(&folio->mapping->i_private_lock); |
176 | |
177 | subpage = folio_get_private(folio); |
178 | ASSERT(atomic_read(&subpage->eb_refs)); |
179 | atomic_dec(v: &subpage->eb_refs); |
180 | } |
181 | |
182 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
183 | struct folio *folio, u64 start, u32 len) |
184 | { |
185 | /* Basic checks */ |
186 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
187 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
188 | IS_ALIGNED(len, fs_info->sectorsize)); |
189 | /* |
190 | * The range check only works for mapped page, we can still have |
191 | * unmapped page like dummy extent buffer pages. |
192 | */ |
193 | if (folio->mapping) |
194 | ASSERT(folio_pos(folio) <= start && |
195 | start + len <= folio_pos(folio) + folio_size(folio)); |
196 | } |
197 | |
198 | #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ |
199 | ({ \ |
200 | unsigned int __start_bit; \ |
201 | const unsigned int blocks_per_folio = \ |
202 | btrfs_blocks_per_folio(fs_info, folio); \ |
203 | \ |
204 | btrfs_subpage_assert(fs_info, folio, start, len); \ |
205 | __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ |
206 | __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ |
207 | __start_bit; \ |
208 | }) |
209 | |
210 | static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) |
211 | { |
212 | u64 orig_start = *start; |
213 | u32 orig_len = *len; |
214 | |
215 | *start = max_t(u64, folio_pos(folio), orig_start); |
216 | /* |
217 | * For certain call sites like btrfs_drop_pages(), we may have pages |
218 | * beyond the target range. In that case, just set @len to 0, subpage |
219 | * helpers can handle @len == 0 without any problem. |
220 | */ |
221 | if (folio_pos(folio) >= orig_start + orig_len) |
222 | *len = 0; |
223 | else |
224 | *len = min_t(u64, folio_pos(folio) + folio_size(folio), |
225 | orig_start + orig_len) - *start; |
226 | } |
227 | |
228 | static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, |
229 | struct folio *folio, u64 start, u32 len) |
230 | { |
231 | struct btrfs_subpage *subpage = folio_get_private(folio); |
232 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
233 | const int nbits = (len >> fs_info->sectorsize_bits); |
234 | unsigned long flags; |
235 | unsigned int cleared = 0; |
236 | int bit = start_bit; |
237 | bool last; |
238 | |
239 | btrfs_subpage_assert(fs_info, folio, start, len); |
240 | |
241 | spin_lock_irqsave(&subpage->lock, flags); |
242 | /* |
243 | * We have call sites passing @lock_page into |
244 | * extent_clear_unlock_delalloc() for compression path. |
245 | * |
246 | * This @locked_page is locked by plain lock_page(), thus its |
247 | * subpage::locked is 0. Handle them in a special way. |
248 | */ |
249 | if (atomic_read(v: &subpage->nr_locked) == 0) { |
250 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
251 | return true; |
252 | } |
253 | |
254 | for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) { |
255 | clear_bit(nr: bit, addr: subpage->bitmaps); |
256 | cleared++; |
257 | } |
258 | ASSERT(atomic_read(&subpage->nr_locked) >= cleared); |
259 | last = atomic_sub_and_test(i: cleared, v: &subpage->nr_locked); |
260 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
261 | return last; |
262 | } |
263 | |
264 | /* |
265 | * Handle different locked folios: |
266 | * |
267 | * - Non-subpage folio |
268 | * Just unlock it. |
269 | * |
270 | * - folio locked but without any subpage locked |
271 | * This happens either before writepage_delalloc() or the delalloc range is |
272 | * already handled by previous folio. |
273 | * We can simple unlock it. |
274 | * |
275 | * - folio locked with subpage range locked. |
276 | * We go through the locked sectors inside the range and clear their locked |
277 | * bitmap, reduce the writer lock number, and unlock the page if that's |
278 | * the last locked range. |
279 | */ |
280 | void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, |
281 | struct folio *folio, u64 start, u32 len) |
282 | { |
283 | struct btrfs_subpage *subpage = folio_get_private(folio); |
284 | |
285 | ASSERT(folio_test_locked(folio)); |
286 | |
287 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) { |
288 | folio_unlock(folio); |
289 | return; |
290 | } |
291 | |
292 | /* |
293 | * For subpage case, there are two types of locked page. With or |
294 | * without locked number. |
295 | * |
296 | * Since we own the page lock, no one else could touch subpage::locked |
297 | * and we are safe to do several atomic operations without spinlock. |
298 | */ |
299 | if (atomic_read(v: &subpage->nr_locked) == 0) { |
300 | /* No subpage lock, locked by plain lock_page(). */ |
301 | folio_unlock(folio); |
302 | return; |
303 | } |
304 | |
305 | btrfs_subpage_clamp_range(folio, start: &start, len: &len); |
306 | if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) |
307 | folio_unlock(folio); |
308 | } |
309 | |
310 | void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, |
311 | struct folio *folio, unsigned long bitmap) |
312 | { |
313 | struct btrfs_subpage *subpage = folio_get_private(folio); |
314 | const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); |
315 | const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; |
316 | unsigned long flags; |
317 | bool last = false; |
318 | int cleared = 0; |
319 | int bit; |
320 | |
321 | if (!btrfs_is_subpage(fs_info, folio)) { |
322 | folio_unlock(folio); |
323 | return; |
324 | } |
325 | |
326 | if (atomic_read(v: &subpage->nr_locked) == 0) { |
327 | /* No subpage lock, locked by plain lock_page(). */ |
328 | folio_unlock(folio); |
329 | return; |
330 | } |
331 | |
332 | spin_lock_irqsave(&subpage->lock, flags); |
333 | for_each_set_bit(bit, &bitmap, blocks_per_folio) { |
334 | if (test_and_clear_bit(nr: bit + start_bit, addr: subpage->bitmaps)) |
335 | cleared++; |
336 | } |
337 | ASSERT(atomic_read(&subpage->nr_locked) >= cleared); |
338 | last = atomic_sub_and_test(i: cleared, v: &subpage->nr_locked); |
339 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
340 | if (last) |
341 | folio_unlock(folio); |
342 | } |
343 | |
344 | #define subpage_test_bitmap_all_set(fs_info, folio, name) \ |
345 | ({ \ |
346 | struct btrfs_subpage *subpage = folio_get_private(folio); \ |
347 | const unsigned int blocks_per_folio = \ |
348 | btrfs_blocks_per_folio(fs_info, folio); \ |
349 | \ |
350 | bitmap_test_range_all_set(subpage->bitmaps, \ |
351 | blocks_per_folio * btrfs_bitmap_nr_##name, \ |
352 | blocks_per_folio); \ |
353 | }) |
354 | |
355 | #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ |
356 | ({ \ |
357 | struct btrfs_subpage *subpage = folio_get_private(folio); \ |
358 | const unsigned int blocks_per_folio = \ |
359 | btrfs_blocks_per_folio(fs_info, folio); \ |
360 | \ |
361 | bitmap_test_range_all_zero(subpage->bitmaps, \ |
362 | blocks_per_folio * btrfs_bitmap_nr_##name, \ |
363 | blocks_per_folio); \ |
364 | }) |
365 | |
366 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
367 | struct folio *folio, u64 start, u32 len) |
368 | { |
369 | struct btrfs_subpage *subpage = folio_get_private(folio); |
370 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
371 | uptodate, start, len); |
372 | unsigned long flags; |
373 | |
374 | spin_lock_irqsave(&subpage->lock, flags); |
375 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
376 | if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) |
377 | folio_mark_uptodate(folio); |
378 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
379 | } |
380 | |
381 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, |
382 | struct folio *folio, u64 start, u32 len) |
383 | { |
384 | struct btrfs_subpage *subpage = folio_get_private(folio); |
385 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
386 | uptodate, start, len); |
387 | unsigned long flags; |
388 | |
389 | spin_lock_irqsave(&subpage->lock, flags); |
390 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
391 | folio_clear_uptodate(folio); |
392 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
393 | } |
394 | |
395 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
396 | struct folio *folio, u64 start, u32 len) |
397 | { |
398 | struct btrfs_subpage *subpage = folio_get_private(folio); |
399 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
400 | dirty, start, len); |
401 | unsigned long flags; |
402 | |
403 | spin_lock_irqsave(&subpage->lock, flags); |
404 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
405 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
406 | folio_mark_dirty(folio); |
407 | } |
408 | |
409 | /* |
410 | * Extra clear_and_test function for subpage dirty bitmap. |
411 | * |
412 | * Return true if we're the last bits in the dirty_bitmap and clear the |
413 | * dirty_bitmap. |
414 | * Return false otherwise. |
415 | * |
416 | * NOTE: Callers should manually clear page dirty for true case, as we have |
417 | * extra handling for tree blocks. |
418 | */ |
419 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, |
420 | struct folio *folio, u64 start, u32 len) |
421 | { |
422 | struct btrfs_subpage *subpage = folio_get_private(folio); |
423 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
424 | dirty, start, len); |
425 | unsigned long flags; |
426 | bool last = false; |
427 | |
428 | spin_lock_irqsave(&subpage->lock, flags); |
429 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
430 | if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) |
431 | last = true; |
432 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
433 | return last; |
434 | } |
435 | |
436 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, |
437 | struct folio *folio, u64 start, u32 len) |
438 | { |
439 | bool last; |
440 | |
441 | last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); |
442 | if (last) |
443 | folio_clear_dirty_for_io(folio); |
444 | } |
445 | |
446 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
447 | struct folio *folio, u64 start, u32 len) |
448 | { |
449 | struct btrfs_subpage *subpage = folio_get_private(folio); |
450 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
451 | writeback, start, len); |
452 | unsigned long flags; |
453 | |
454 | spin_lock_irqsave(&subpage->lock, flags); |
455 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
456 | if (!folio_test_writeback(folio)) |
457 | folio_start_writeback(folio); |
458 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
459 | } |
460 | |
461 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, |
462 | struct folio *folio, u64 start, u32 len) |
463 | { |
464 | struct btrfs_subpage *subpage = folio_get_private(folio); |
465 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
466 | writeback, start, len); |
467 | unsigned long flags; |
468 | |
469 | spin_lock_irqsave(&subpage->lock, flags); |
470 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
471 | if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { |
472 | ASSERT(folio_test_writeback(folio)); |
473 | folio_end_writeback(folio); |
474 | } |
475 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
476 | } |
477 | |
478 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
479 | struct folio *folio, u64 start, u32 len) |
480 | { |
481 | struct btrfs_subpage *subpage = folio_get_private(folio); |
482 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
483 | ordered, start, len); |
484 | unsigned long flags; |
485 | |
486 | spin_lock_irqsave(&subpage->lock, flags); |
487 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
488 | folio_set_ordered(folio); |
489 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
490 | } |
491 | |
492 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, |
493 | struct folio *folio, u64 start, u32 len) |
494 | { |
495 | struct btrfs_subpage *subpage = folio_get_private(folio); |
496 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
497 | ordered, start, len); |
498 | unsigned long flags; |
499 | |
500 | spin_lock_irqsave(&subpage->lock, flags); |
501 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
502 | if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) |
503 | folio_clear_ordered(folio); |
504 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
505 | } |
506 | |
507 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, |
508 | struct folio *folio, u64 start, u32 len) |
509 | { |
510 | struct btrfs_subpage *subpage = folio_get_private(folio); |
511 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
512 | checked, start, len); |
513 | unsigned long flags; |
514 | |
515 | spin_lock_irqsave(&subpage->lock, flags); |
516 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
517 | if (subpage_test_bitmap_all_set(fs_info, folio, checked)) |
518 | folio_set_checked(folio); |
519 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
520 | } |
521 | |
522 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, |
523 | struct folio *folio, u64 start, u32 len) |
524 | { |
525 | struct btrfs_subpage *subpage = folio_get_private(folio); |
526 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
527 | checked, start, len); |
528 | unsigned long flags; |
529 | |
530 | spin_lock_irqsave(&subpage->lock, flags); |
531 | bitmap_clear(map: subpage->bitmaps, start: start_bit, nbits: len >> fs_info->sectorsize_bits); |
532 | folio_clear_checked(folio); |
533 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
534 | } |
535 | |
536 | /* |
537 | * Unlike set/clear which is dependent on each page status, for test all bits |
538 | * are tested in the same way. |
539 | */ |
540 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ |
541 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ |
542 | struct folio *folio, u64 start, u32 len) \ |
543 | { \ |
544 | struct btrfs_subpage *subpage = folio_get_private(folio); \ |
545 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ |
546 | name, start, len); \ |
547 | unsigned long flags; \ |
548 | bool ret; \ |
549 | \ |
550 | spin_lock_irqsave(&subpage->lock, flags); \ |
551 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
552 | len >> fs_info->sectorsize_bits); \ |
553 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
554 | return ret; \ |
555 | } |
556 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); |
557 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
558 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
559 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
560 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
561 | |
562 | /* |
563 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed |
564 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall |
565 | * back to regular sectorsize branch. |
566 | */ |
567 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ |
568 | folio_clear_func, folio_test_func) \ |
569 | void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ |
570 | struct folio *folio, u64 start, u32 len) \ |
571 | { \ |
572 | if (unlikely(!fs_info) || \ |
573 | !btrfs_is_subpage(fs_info, folio)) { \ |
574 | folio_set_func(folio); \ |
575 | return; \ |
576 | } \ |
577 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ |
578 | } \ |
579 | void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ |
580 | struct folio *folio, u64 start, u32 len) \ |
581 | { \ |
582 | if (unlikely(!fs_info) || \ |
583 | !btrfs_is_subpage(fs_info, folio)) { \ |
584 | folio_clear_func(folio); \ |
585 | return; \ |
586 | } \ |
587 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ |
588 | } \ |
589 | bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ |
590 | struct folio *folio, u64 start, u32 len) \ |
591 | { \ |
592 | if (unlikely(!fs_info) || \ |
593 | !btrfs_is_subpage(fs_info, folio)) \ |
594 | return folio_test_func(folio); \ |
595 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ |
596 | } \ |
597 | void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ |
598 | struct folio *folio, u64 start, u32 len) \ |
599 | { \ |
600 | if (unlikely(!fs_info) || \ |
601 | !btrfs_is_subpage(fs_info, folio)) { \ |
602 | folio_set_func(folio); \ |
603 | return; \ |
604 | } \ |
605 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
606 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ |
607 | } \ |
608 | void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ |
609 | struct folio *folio, u64 start, u32 len) \ |
610 | { \ |
611 | if (unlikely(!fs_info) || \ |
612 | !btrfs_is_subpage(fs_info, folio)) { \ |
613 | folio_clear_func(folio); \ |
614 | return; \ |
615 | } \ |
616 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
617 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ |
618 | } \ |
619 | bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ |
620 | struct folio *folio, u64 start, u32 len) \ |
621 | { \ |
622 | if (unlikely(!fs_info) || \ |
623 | !btrfs_is_subpage(fs_info, folio)) \ |
624 | return folio_test_func(folio); \ |
625 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
626 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ |
627 | } \ |
628 | void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \ |
629 | { \ |
630 | if (!btrfs_meta_is_subpage(eb->fs_info)) { \ |
631 | folio_set_func(folio); \ |
632 | return; \ |
633 | } \ |
634 | btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \ |
635 | } \ |
636 | void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \ |
637 | { \ |
638 | if (!btrfs_meta_is_subpage(eb->fs_info)) { \ |
639 | folio_clear_func(folio); \ |
640 | return; \ |
641 | } \ |
642 | btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \ |
643 | } \ |
644 | bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \ |
645 | { \ |
646 | if (!btrfs_meta_is_subpage(eb->fs_info)) \ |
647 | return folio_test_func(folio); \ |
648 | return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \ |
649 | } |
650 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, |
651 | folio_test_uptodate); |
652 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, |
653 | folio_test_dirty); |
654 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, |
655 | folio_test_writeback); |
656 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, |
657 | folio_test_ordered); |
658 | IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, |
659 | folio_test_checked); |
660 | |
661 | #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ |
662 | { \ |
663 | const unsigned int blocks_per_folio = \ |
664 | btrfs_blocks_per_folio(fs_info, folio); \ |
665 | const struct btrfs_subpage *subpage = folio_get_private(folio); \ |
666 | \ |
667 | ASSERT(blocks_per_folio <= BITS_PER_LONG); \ |
668 | *dst = bitmap_read(subpage->bitmaps, \ |
669 | blocks_per_folio * btrfs_bitmap_nr_##name, \ |
670 | blocks_per_folio); \ |
671 | } |
672 | |
673 | #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ |
674 | { \ |
675 | unsigned long bitmap; \ |
676 | const unsigned int blocks_per_folio = \ |
677 | btrfs_blocks_per_folio(fs_info, folio); \ |
678 | \ |
679 | GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ |
680 | btrfs_warn(fs_info, \ |
681 | "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ |
682 | start, len, folio_pos(folio), \ |
683 | blocks_per_folio, &bitmap); \ |
684 | } |
685 | |
686 | /* |
687 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit |
688 | * is cleared. |
689 | */ |
690 | void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, |
691 | struct folio *folio, u64 start, u32 len) |
692 | { |
693 | struct btrfs_subpage *subpage; |
694 | unsigned int start_bit; |
695 | unsigned int nbits; |
696 | unsigned long flags; |
697 | |
698 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) |
699 | return; |
700 | |
701 | if (!btrfs_is_subpage(fs_info, folio)) { |
702 | ASSERT(!folio_test_dirty(folio)); |
703 | return; |
704 | } |
705 | |
706 | start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); |
707 | nbits = len >> fs_info->sectorsize_bits; |
708 | subpage = folio_get_private(folio); |
709 | ASSERT(subpage); |
710 | spin_lock_irqsave(&subpage->lock, flags); |
711 | if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { |
712 | SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); |
713 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
714 | } |
715 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
716 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
717 | } |
718 | |
719 | /* |
720 | * This is for folio already locked by plain lock_page()/folio_lock(), which |
721 | * doesn't have any subpage awareness. |
722 | * |
723 | * This populates the involved subpage ranges so that subpage helpers can |
724 | * properly unlock them. |
725 | */ |
726 | void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, |
727 | struct folio *folio, u64 start, u32 len) |
728 | { |
729 | struct btrfs_subpage *subpage; |
730 | unsigned long flags; |
731 | unsigned int start_bit; |
732 | unsigned int nbits; |
733 | int ret; |
734 | |
735 | ASSERT(folio_test_locked(folio)); |
736 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) |
737 | return; |
738 | |
739 | subpage = folio_get_private(folio); |
740 | start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
741 | nbits = len >> fs_info->sectorsize_bits; |
742 | spin_lock_irqsave(&subpage->lock, flags); |
743 | /* Target range should not yet be locked. */ |
744 | if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { |
745 | SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); |
746 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
747 | } |
748 | bitmap_set(map: subpage->bitmaps, start: start_bit, nbits); |
749 | ret = atomic_add_return(i: nbits, v: &subpage->nr_locked); |
750 | ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); |
751 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
752 | } |
753 | |
754 | /* |
755 | * Clear the dirty flag for the folio. |
756 | * |
757 | * If the affected folio is no longer dirty, return true. Otherwise return false. |
758 | */ |
759 | bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb) |
760 | { |
761 | bool last; |
762 | |
763 | if (!btrfs_meta_is_subpage(fs_info: eb->fs_info)) { |
764 | folio_clear_dirty_for_io(folio); |
765 | return true; |
766 | } |
767 | |
768 | last = btrfs_subpage_clear_and_test_dirty(fs_info: eb->fs_info, folio, start: eb->start, len: eb->len); |
769 | if (last) { |
770 | folio_clear_dirty_for_io(folio); |
771 | return true; |
772 | } |
773 | return false; |
774 | } |
775 | |
776 | void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, |
777 | struct folio *folio, u64 start, u32 len) |
778 | { |
779 | struct btrfs_subpage *subpage; |
780 | const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); |
781 | unsigned long uptodate_bitmap; |
782 | unsigned long dirty_bitmap; |
783 | unsigned long writeback_bitmap; |
784 | unsigned long ordered_bitmap; |
785 | unsigned long checked_bitmap; |
786 | unsigned long locked_bitmap; |
787 | unsigned long flags; |
788 | |
789 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
790 | ASSERT(blocks_per_folio > 1); |
791 | subpage = folio_get_private(folio); |
792 | |
793 | spin_lock_irqsave(&subpage->lock, flags); |
794 | GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); |
795 | GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); |
796 | GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); |
797 | GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); |
798 | GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); |
799 | GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); |
800 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
801 | |
802 | dump_page(folio_page(folio, 0), reason: "btrfs subpage dump" ); |
803 | btrfs_warn(fs_info, |
804 | "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl" , |
805 | start, len, folio_pos(folio), |
806 | blocks_per_folio, &uptodate_bitmap, |
807 | blocks_per_folio, &dirty_bitmap, |
808 | blocks_per_folio, &locked_bitmap, |
809 | blocks_per_folio, &writeback_bitmap, |
810 | blocks_per_folio, &ordered_bitmap, |
811 | blocks_per_folio, &checked_bitmap); |
812 | } |
813 | |
814 | void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, |
815 | struct folio *folio, |
816 | unsigned long *ret_bitmap) |
817 | { |
818 | struct btrfs_subpage *subpage; |
819 | unsigned long flags; |
820 | |
821 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
822 | ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); |
823 | subpage = folio_get_private(folio); |
824 | |
825 | spin_lock_irqsave(&subpage->lock, flags); |
826 | GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); |
827 | spin_unlock_irqrestore(lock: &subpage->lock, flags); |
828 | } |
829 | |