1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * include/linux/buffer_head.h |
4 | * |
5 | * Everything to do with buffer_heads. |
6 | */ |
7 | |
8 | #ifndef _LINUX_BUFFER_HEAD_H |
9 | #define _LINUX_BUFFER_HEAD_H |
10 | |
11 | #include <linux/types.h> |
12 | #include <linux/blk_types.h> |
13 | #include <linux/fs.h> |
14 | #include <linux/linkage.h> |
15 | #include <linux/pagemap.h> |
16 | #include <linux/wait.h> |
17 | #include <linux/atomic.h> |
18 | |
19 | enum bh_state_bits { |
20 | BH_Uptodate, /* Contains valid data */ |
21 | BH_Dirty, /* Is dirty */ |
22 | BH_Lock, /* Is locked */ |
23 | BH_Req, /* Has been submitted for I/O */ |
24 | |
25 | BH_Mapped, /* Has a disk mapping */ |
26 | BH_New, /* Disk mapping was newly created by get_block */ |
27 | BH_Async_Read, /* Is under end_buffer_async_read I/O */ |
28 | BH_Async_Write, /* Is under end_buffer_async_write I/O */ |
29 | BH_Delay, /* Buffer is not yet allocated on disk */ |
30 | BH_Boundary, /* Block is followed by a discontiguity */ |
31 | BH_Write_EIO, /* I/O error on write */ |
32 | BH_Unwritten, /* Buffer is allocated on disk but not written */ |
33 | BH_Quiet, /* Buffer Error Prinks to be quiet */ |
34 | BH_Meta, /* Buffer contains metadata */ |
35 | BH_Prio, /* Buffer should be submitted with REQ_PRIO */ |
36 | BH_Defer_Completion, /* Defer AIO completion to workqueue */ |
37 | |
38 | BH_PrivateStart,/* not a state bit, but the first bit available |
39 | * for private allocation by other entities |
40 | */ |
41 | }; |
42 | |
43 | #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) |
44 | |
45 | struct page; |
46 | struct buffer_head; |
47 | struct address_space; |
48 | typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); |
49 | |
50 | /* |
51 | * Historically, a buffer_head was used to map a single block |
52 | * within a page, and of course as the unit of I/O through the |
53 | * filesystem and block layers. Nowadays the basic I/O unit |
54 | * is the bio, and buffer_heads are used for extracting block |
55 | * mappings (via a get_block_t call), for tracking state within |
56 | * a page (via a page_mapping) and for wrapping bio submission |
57 | * for backward compatibility reasons (e.g. submit_bh). |
58 | */ |
59 | struct buffer_head { |
60 | unsigned long b_state; /* buffer state bitmap (see above) */ |
61 | struct buffer_head *b_this_page;/* circular list of page's buffers */ |
62 | union { |
63 | struct page *b_page; /* the page this bh is mapped to */ |
64 | struct folio *b_folio; /* the folio this bh is mapped to */ |
65 | }; |
66 | |
67 | sector_t b_blocknr; /* start block number */ |
68 | size_t b_size; /* size of mapping */ |
69 | char *b_data; /* pointer to data within the page */ |
70 | |
71 | struct block_device *b_bdev; |
72 | bh_end_io_t *b_end_io; /* I/O completion */ |
73 | void *b_private; /* reserved for b_end_io */ |
74 | struct list_head b_assoc_buffers; /* associated with another mapping */ |
75 | struct address_space *b_assoc_map; /* mapping this buffer is |
76 | associated with */ |
77 | atomic_t b_count; /* users using this buffer_head */ |
78 | spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to |
79 | * serialise IO completion of other |
80 | * buffers in the page */ |
81 | }; |
82 | |
83 | /* |
84 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() |
85 | * and buffer_foo() functions. |
86 | * To avoid reset buffer flags that are already set, because that causes |
87 | * a costly cache line transition, check the flag first. |
88 | */ |
89 | #define BUFFER_FNS(bit, name) \ |
90 | static __always_inline void set_buffer_##name(struct buffer_head *bh) \ |
91 | { \ |
92 | if (!test_bit(BH_##bit, &(bh)->b_state)) \ |
93 | set_bit(BH_##bit, &(bh)->b_state); \ |
94 | } \ |
95 | static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ |
96 | { \ |
97 | clear_bit(BH_##bit, &(bh)->b_state); \ |
98 | } \ |
99 | static __always_inline int buffer_##name(const struct buffer_head *bh) \ |
100 | { \ |
101 | return test_bit(BH_##bit, &(bh)->b_state); \ |
102 | } |
103 | |
104 | /* |
105 | * test_set_buffer_foo() and test_clear_buffer_foo() |
106 | */ |
107 | #define TAS_BUFFER_FNS(bit, name) \ |
108 | static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \ |
109 | { \ |
110 | return test_and_set_bit(BH_##bit, &(bh)->b_state); \ |
111 | } \ |
112 | static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ |
113 | { \ |
114 | return test_and_clear_bit(BH_##bit, &(bh)->b_state); \ |
115 | } \ |
116 | |
117 | /* |
118 | * Emit the buffer bitops functions. Note that there are also functions |
119 | * of the form "mark_buffer_foo()". These are higher-level functions which |
120 | * do something in addition to setting a b_state bit. |
121 | */ |
122 | BUFFER_FNS(Dirty, dirty) |
123 | TAS_BUFFER_FNS(Dirty, dirty) |
124 | BUFFER_FNS(Lock, locked) |
125 | BUFFER_FNS(Req, req) |
126 | TAS_BUFFER_FNS(Req, req) |
127 | BUFFER_FNS(Mapped, mapped) |
128 | BUFFER_FNS(New, new) |
129 | BUFFER_FNS(Async_Read, async_read) |
130 | BUFFER_FNS(Async_Write, async_write) |
131 | BUFFER_FNS(Delay, delay) |
132 | BUFFER_FNS(Boundary, boundary) |
133 | BUFFER_FNS(Write_EIO, write_io_error) |
134 | BUFFER_FNS(Unwritten, unwritten) |
135 | BUFFER_FNS(Meta, meta) |
136 | BUFFER_FNS(Prio, prio) |
137 | BUFFER_FNS(Defer_Completion, defer_completion) |
138 | |
139 | static __always_inline void set_buffer_uptodate(struct buffer_head *bh) |
140 | { |
141 | /* |
142 | * If somebody else already set this uptodate, they will |
143 | * have done the memory barrier, and a reader will thus |
144 | * see *some* valid buffer state. |
145 | * |
146 | * Any other serialization (with IO errors or whatever that |
147 | * might clear the bit) has to come from other state (eg BH_Lock). |
148 | */ |
149 | if (test_bit(BH_Uptodate, &bh->b_state)) |
150 | return; |
151 | |
152 | /* |
153 | * make it consistent with folio_mark_uptodate |
154 | * pairs with smp_load_acquire in buffer_uptodate |
155 | */ |
156 | smp_mb__before_atomic(); |
157 | set_bit(nr: BH_Uptodate, addr: &bh->b_state); |
158 | } |
159 | |
160 | static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) |
161 | { |
162 | clear_bit(nr: BH_Uptodate, addr: &bh->b_state); |
163 | } |
164 | |
165 | static __always_inline int buffer_uptodate(const struct buffer_head *bh) |
166 | { |
167 | /* |
168 | * make it consistent with folio_test_uptodate |
169 | * pairs with smp_mb__before_atomic in set_buffer_uptodate |
170 | */ |
171 | return test_bit_acquire(BH_Uptodate, &bh->b_state); |
172 | } |
173 | |
174 | static inline unsigned long bh_offset(const struct buffer_head *bh) |
175 | { |
176 | return (unsigned long)(bh)->b_data & (page_size(page: bh->b_page) - 1); |
177 | } |
178 | |
179 | /* If we *know* page->private refers to buffer_heads */ |
180 | #define page_buffers(page) \ |
181 | ({ \ |
182 | BUG_ON(!PagePrivate(page)); \ |
183 | ((struct buffer_head *)page_private(page)); \ |
184 | }) |
185 | #define page_has_buffers(page) PagePrivate(page) |
186 | #define folio_buffers(folio) folio_get_private(folio) |
187 | |
188 | void buffer_check_dirty_writeback(struct folio *folio, |
189 | bool *dirty, bool *writeback); |
190 | |
191 | /* |
192 | * Declarations |
193 | */ |
194 | |
195 | void mark_buffer_dirty(struct buffer_head *bh); |
196 | void mark_buffer_write_io_error(struct buffer_head *bh); |
197 | void touch_buffer(struct buffer_head *bh); |
198 | void folio_set_bh(struct buffer_head *bh, struct folio *folio, |
199 | unsigned long offset); |
200 | struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, |
201 | gfp_t gfp); |
202 | struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, |
203 | bool retry); |
204 | struct buffer_head *create_empty_buffers(struct folio *folio, |
205 | unsigned long blocksize, unsigned long b_state); |
206 | void end_buffer_read_sync(struct buffer_head *bh, int uptodate); |
207 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate); |
208 | void end_buffer_async_write(struct buffer_head *bh, int uptodate); |
209 | |
210 | /* Things to do with buffers at mapping->private_list */ |
211 | void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); |
212 | int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, |
213 | bool datasync); |
214 | int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, |
215 | bool datasync); |
216 | void clean_bdev_aliases(struct block_device *bdev, sector_t block, |
217 | sector_t len); |
218 | static inline void clean_bdev_bh_alias(struct buffer_head *bh) |
219 | { |
220 | clean_bdev_aliases(bdev: bh->b_bdev, block: bh->b_blocknr, len: 1); |
221 | } |
222 | |
223 | void mark_buffer_async_write(struct buffer_head *bh); |
224 | void __wait_on_buffer(struct buffer_head *); |
225 | wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); |
226 | struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, |
227 | unsigned size); |
228 | struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, |
229 | unsigned size, gfp_t gfp); |
230 | void __brelse(struct buffer_head *); |
231 | void __bforget(struct buffer_head *); |
232 | void __breadahead(struct block_device *, sector_t block, unsigned int size); |
233 | struct buffer_head *__bread_gfp(struct block_device *, |
234 | sector_t block, unsigned size, gfp_t gfp); |
235 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); |
236 | void free_buffer_head(struct buffer_head * bh); |
237 | void unlock_buffer(struct buffer_head *bh); |
238 | void __lock_buffer(struct buffer_head *bh); |
239 | int sync_dirty_buffer(struct buffer_head *bh); |
240 | int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); |
241 | void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); |
242 | void submit_bh(blk_opf_t, struct buffer_head *); |
243 | void write_boundary_block(struct block_device *bdev, |
244 | sector_t bblock, unsigned blocksize); |
245 | int bh_uptodate_or_lock(struct buffer_head *bh); |
246 | int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); |
247 | void __bh_read_batch(int nr, struct buffer_head *bhs[], |
248 | blk_opf_t op_flags, bool force_lock); |
249 | |
250 | /* |
251 | * Generic address_space_operations implementations for buffer_head-backed |
252 | * address_spaces. |
253 | */ |
254 | void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); |
255 | int block_write_full_page(struct page *page, get_block_t *get_block, |
256 | struct writeback_control *wbc); |
257 | int __block_write_full_folio(struct inode *inode, struct folio *folio, |
258 | get_block_t *get_block, struct writeback_control *wbc, |
259 | bh_end_io_t *handler); |
260 | int block_read_full_folio(struct folio *, get_block_t *); |
261 | bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); |
262 | int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, |
263 | struct page **pagep, get_block_t *get_block); |
264 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, |
265 | get_block_t *get_block); |
266 | int block_write_end(struct file *, struct address_space *, |
267 | loff_t, unsigned, unsigned, |
268 | struct page *, void *); |
269 | int generic_write_end(struct file *, struct address_space *, |
270 | loff_t, unsigned, unsigned, |
271 | struct page *, void *); |
272 | void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); |
273 | void clean_page_buffers(struct page *page); |
274 | int cont_write_begin(struct file *, struct address_space *, loff_t, |
275 | unsigned, struct page **, void **, |
276 | get_block_t *, loff_t *); |
277 | int generic_cont_expand_simple(struct inode *inode, loff_t size); |
278 | void block_commit_write(struct page *page, unsigned int from, unsigned int to); |
279 | int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
280 | get_block_t get_block); |
281 | sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); |
282 | int block_truncate_page(struct address_space *, loff_t, get_block_t *); |
283 | |
284 | #ifdef CONFIG_MIGRATION |
285 | extern int buffer_migrate_folio(struct address_space *, |
286 | struct folio *dst, struct folio *src, enum migrate_mode); |
287 | extern int buffer_migrate_folio_norefs(struct address_space *, |
288 | struct folio *dst, struct folio *src, enum migrate_mode); |
289 | #else |
290 | #define buffer_migrate_folio NULL |
291 | #define buffer_migrate_folio_norefs NULL |
292 | #endif |
293 | |
294 | /* |
295 | * inline definitions |
296 | */ |
297 | |
298 | static inline void get_bh(struct buffer_head *bh) |
299 | { |
300 | atomic_inc(v: &bh->b_count); |
301 | } |
302 | |
303 | static inline void put_bh(struct buffer_head *bh) |
304 | { |
305 | smp_mb__before_atomic(); |
306 | atomic_dec(v: &bh->b_count); |
307 | } |
308 | |
309 | static inline void brelse(struct buffer_head *bh) |
310 | { |
311 | if (bh) |
312 | __brelse(bh); |
313 | } |
314 | |
315 | static inline void bforget(struct buffer_head *bh) |
316 | { |
317 | if (bh) |
318 | __bforget(bh); |
319 | } |
320 | |
321 | static inline struct buffer_head * |
322 | sb_bread(struct super_block *sb, sector_t block) |
323 | { |
324 | return __bread_gfp(sb->s_bdev, block, size: sb->s_blocksize, __GFP_MOVABLE); |
325 | } |
326 | |
327 | static inline struct buffer_head * |
328 | sb_bread_unmovable(struct super_block *sb, sector_t block) |
329 | { |
330 | return __bread_gfp(sb->s_bdev, block, size: sb->s_blocksize, gfp: 0); |
331 | } |
332 | |
333 | static inline void |
334 | sb_breadahead(struct super_block *sb, sector_t block) |
335 | { |
336 | __breadahead(sb->s_bdev, block, size: sb->s_blocksize); |
337 | } |
338 | |
339 | static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, |
340 | sector_t block, unsigned size) |
341 | { |
342 | gfp_t gfp; |
343 | |
344 | gfp = mapping_gfp_constraint(mapping: bdev->bd_inode->i_mapping, gfp_mask: ~__GFP_FS); |
345 | gfp |= __GFP_NOFAIL; |
346 | |
347 | return bdev_getblk(bdev, block, size, gfp); |
348 | } |
349 | |
350 | static inline struct buffer_head *__getblk(struct block_device *bdev, |
351 | sector_t block, unsigned size) |
352 | { |
353 | gfp_t gfp; |
354 | |
355 | gfp = mapping_gfp_constraint(mapping: bdev->bd_inode->i_mapping, gfp_mask: ~__GFP_FS); |
356 | gfp |= __GFP_MOVABLE | __GFP_NOFAIL; |
357 | |
358 | return bdev_getblk(bdev, block, size, gfp); |
359 | } |
360 | |
361 | static inline struct buffer_head *sb_getblk(struct super_block *sb, |
362 | sector_t block) |
363 | { |
364 | return __getblk(bdev: sb->s_bdev, block, size: sb->s_blocksize); |
365 | } |
366 | |
367 | static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, |
368 | sector_t block, gfp_t gfp) |
369 | { |
370 | return bdev_getblk(bdev: sb->s_bdev, block, size: sb->s_blocksize, gfp); |
371 | } |
372 | |
373 | static inline struct buffer_head * |
374 | sb_find_get_block(struct super_block *sb, sector_t block) |
375 | { |
376 | return __find_get_block(bdev: sb->s_bdev, block, size: sb->s_blocksize); |
377 | } |
378 | |
379 | static inline void |
380 | map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) |
381 | { |
382 | set_buffer_mapped(bh); |
383 | bh->b_bdev = sb->s_bdev; |
384 | bh->b_blocknr = block; |
385 | bh->b_size = sb->s_blocksize; |
386 | } |
387 | |
388 | static inline void wait_on_buffer(struct buffer_head *bh) |
389 | { |
390 | might_sleep(); |
391 | if (buffer_locked(bh)) |
392 | __wait_on_buffer(bh); |
393 | } |
394 | |
395 | static inline int trylock_buffer(struct buffer_head *bh) |
396 | { |
397 | return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state)); |
398 | } |
399 | |
400 | static inline void lock_buffer(struct buffer_head *bh) |
401 | { |
402 | might_sleep(); |
403 | if (!trylock_buffer(bh)) |
404 | __lock_buffer(bh); |
405 | } |
406 | |
407 | static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) |
408 | { |
409 | if (!buffer_uptodate(bh) && trylock_buffer(bh)) { |
410 | if (!buffer_uptodate(bh)) |
411 | __bh_read(bh, op_flags, wait: false); |
412 | else |
413 | unlock_buffer(bh); |
414 | } |
415 | } |
416 | |
417 | static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags) |
418 | { |
419 | if (!bh_uptodate_or_lock(bh)) |
420 | __bh_read(bh, op_flags, wait: false); |
421 | } |
422 | |
423 | /* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */ |
424 | static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags) |
425 | { |
426 | if (bh_uptodate_or_lock(bh)) |
427 | return 1; |
428 | return __bh_read(bh, op_flags, wait: true); |
429 | } |
430 | |
431 | static inline void bh_read_batch(int nr, struct buffer_head *bhs[]) |
432 | { |
433 | __bh_read_batch(nr, bhs, op_flags: 0, force_lock: true); |
434 | } |
435 | |
436 | static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], |
437 | blk_opf_t op_flags) |
438 | { |
439 | __bh_read_batch(nr, bhs, op_flags, force_lock: false); |
440 | } |
441 | |
442 | /** |
443 | * __bread() - reads a specified block and returns the bh |
444 | * @bdev: the block_device to read from |
445 | * @block: number of block |
446 | * @size: size (in bytes) to read |
447 | * |
448 | * Reads a specified block, and returns buffer head that contains it. |
449 | * The page cache is allocated from movable area so that it can be migrated. |
450 | * It returns NULL if the block was unreadable. |
451 | */ |
452 | static inline struct buffer_head * |
453 | __bread(struct block_device *bdev, sector_t block, unsigned size) |
454 | { |
455 | return __bread_gfp(bdev, block, size, __GFP_MOVABLE); |
456 | } |
457 | |
458 | /** |
459 | * get_nth_bh - Get a reference on the n'th buffer after this one. |
460 | * @bh: The buffer to start counting from. |
461 | * @count: How many buffers to skip. |
462 | * |
463 | * This is primarily useful for finding the nth buffer in a folio; in |
464 | * that case you pass the head buffer and the byte offset in the folio |
465 | * divided by the block size. It can be used for other purposes, but |
466 | * it will wrap at the end of the folio rather than returning NULL or |
467 | * proceeding to the next folio for you. |
468 | * |
469 | * Return: The requested buffer with an elevated refcount. |
470 | */ |
471 | static inline __must_check |
472 | struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count) |
473 | { |
474 | while (count--) |
475 | bh = bh->b_this_page; |
476 | get_bh(bh); |
477 | return bh; |
478 | } |
479 | |
480 | bool block_dirty_folio(struct address_space *mapping, struct folio *folio); |
481 | |
482 | #ifdef CONFIG_BUFFER_HEAD |
483 | |
484 | void buffer_init(void); |
485 | bool try_to_free_buffers(struct folio *folio); |
486 | int inode_has_buffers(struct inode *inode); |
487 | void invalidate_inode_buffers(struct inode *inode); |
488 | int remove_inode_buffers(struct inode *inode); |
489 | int sync_mapping_buffers(struct address_space *mapping); |
490 | void invalidate_bh_lrus(void); |
491 | void invalidate_bh_lrus_cpu(void); |
492 | bool has_bh_in_lru(int cpu, void *dummy); |
493 | extern int buffer_heads_over_limit; |
494 | |
495 | #else /* CONFIG_BUFFER_HEAD */ |
496 | |
497 | static inline void buffer_init(void) {} |
498 | static inline bool try_to_free_buffers(struct folio *folio) { return true; } |
499 | static inline int inode_has_buffers(struct inode *inode) { return 0; } |
500 | static inline void invalidate_inode_buffers(struct inode *inode) {} |
501 | static inline int remove_inode_buffers(struct inode *inode) { return 1; } |
502 | static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } |
503 | static inline void invalidate_bh_lrus(void) {} |
504 | static inline void invalidate_bh_lrus_cpu(void) {} |
505 | static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } |
506 | #define buffer_heads_over_limit 0 |
507 | |
508 | #endif /* CONFIG_BUFFER_HEAD */ |
509 | #endif /* _LINUX_BUFFER_HEAD_H */ |
510 | |