1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | |
3 | #ifndef BTRFS_EXTENT_IO_H |
4 | #define BTRFS_EXTENT_IO_H |
5 | |
6 | #include <linux/rbtree.h> |
7 | #include <linux/refcount.h> |
8 | #include <linux/fiemap.h> |
9 | #include <linux/btrfs_tree.h> |
10 | #include <linux/spinlock.h> |
11 | #include <linux/atomic.h> |
12 | #include <linux/rwsem.h> |
13 | #include <linux/list.h> |
14 | #include <linux/slab.h> |
15 | #include "compression.h" |
16 | #include "messages.h" |
17 | #include "ulist.h" |
18 | #include "misc.h" |
19 | |
20 | struct page; |
21 | struct file; |
22 | struct folio; |
23 | struct inode; |
24 | struct fiemap_extent_info; |
25 | struct readahead_control; |
26 | struct address_space; |
27 | struct writeback_control; |
28 | struct extent_io_tree; |
29 | struct extent_map_tree; |
30 | struct btrfs_block_group; |
31 | struct btrfs_fs_info; |
32 | struct btrfs_inode; |
33 | struct btrfs_root; |
34 | struct btrfs_trans_handle; |
35 | struct btrfs_tree_parent_check; |
36 | |
37 | enum { |
38 | EXTENT_BUFFER_UPTODATE, |
39 | EXTENT_BUFFER_DIRTY, |
40 | EXTENT_BUFFER_CORRUPT, |
41 | /* this got triggered by readahead */ |
42 | EXTENT_BUFFER_READAHEAD, |
43 | EXTENT_BUFFER_TREE_REF, |
44 | EXTENT_BUFFER_STALE, |
45 | EXTENT_BUFFER_WRITEBACK, |
46 | /* read IO error */ |
47 | EXTENT_BUFFER_READ_ERR, |
48 | EXTENT_BUFFER_UNMAPPED, |
49 | EXTENT_BUFFER_IN_TREE, |
50 | /* write IO error */ |
51 | EXTENT_BUFFER_WRITE_ERR, |
52 | /* Indicate the extent buffer is written zeroed out (for zoned) */ |
53 | EXTENT_BUFFER_ZONED_ZEROOUT, |
54 | /* Indicate that extent buffer pages a being read */ |
55 | EXTENT_BUFFER_READING, |
56 | }; |
57 | |
58 | /* these are flags for __process_pages_contig */ |
59 | enum { |
60 | ENUM_BIT(PAGE_UNLOCK), |
61 | /* Page starts writeback, clear dirty bit and set writeback bit */ |
62 | ENUM_BIT(PAGE_START_WRITEBACK), |
63 | ENUM_BIT(PAGE_END_WRITEBACK), |
64 | ENUM_BIT(PAGE_SET_ORDERED), |
65 | }; |
66 | |
67 | /* |
68 | * Folio private values. Every page that is controlled by the extent map has |
69 | * folio private set to this value. |
70 | */ |
71 | #define EXTENT_FOLIO_PRIVATE 1 |
72 | |
73 | /* |
74 | * The extent buffer bitmap operations are done with byte granularity instead of |
75 | * word granularity for two reasons: |
76 | * 1. The bitmaps must be little-endian on disk. |
77 | * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a |
78 | * single word in a bitmap may straddle two pages in the extent buffer. |
79 | */ |
80 | #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) |
81 | #define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) |
82 | #define BITMAP_FIRST_BYTE_MASK(start) \ |
83 | ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) |
84 | #define BITMAP_LAST_BYTE_MASK(nbits) \ |
85 | (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) |
86 | |
87 | |
88 | int __init extent_buffer_init_cachep(void); |
89 | void __cold extent_buffer_free_cachep(void); |
90 | |
91 | #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE) |
92 | struct extent_buffer { |
93 | u64 start; |
94 | u32 len; |
95 | u32 folio_size; |
96 | unsigned long bflags; |
97 | struct btrfs_fs_info *fs_info; |
98 | |
99 | /* |
100 | * The address where the eb can be accessed without any cross-page handling. |
101 | * This can be NULL if not possible. |
102 | */ |
103 | void *addr; |
104 | |
105 | spinlock_t refs_lock; |
106 | atomic_t refs; |
107 | int read_mirror; |
108 | /* >= 0 if eb belongs to a log tree, -1 otherwise */ |
109 | s8 log_index; |
110 | u8 folio_shift; |
111 | struct rcu_head rcu_head; |
112 | |
113 | struct rw_semaphore lock; |
114 | |
115 | /* |
116 | * Pointers to all the folios of the extent buffer. |
117 | * |
118 | * For now the folio is always order 0 (aka, a single page). |
119 | */ |
120 | struct folio *folios[INLINE_EXTENT_BUFFER_PAGES]; |
121 | #ifdef CONFIG_BTRFS_DEBUG |
122 | struct list_head leak_list; |
123 | pid_t lock_owner; |
124 | #endif |
125 | }; |
126 | |
127 | struct btrfs_eb_write_context { |
128 | struct writeback_control *wbc; |
129 | struct extent_buffer *eb; |
130 | /* Block group @eb resides in. Only used for zoned mode. */ |
131 | struct btrfs_block_group *zoned_bg; |
132 | }; |
133 | |
134 | static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb, |
135 | u64 start) |
136 | { |
137 | ASSERT(eb->folio_size); |
138 | return start & (eb->folio_size - 1); |
139 | } |
140 | |
141 | /* |
142 | * Get the correct offset inside the page of extent buffer. |
143 | * |
144 | * @eb: target extent buffer |
145 | * @start: offset inside the extent buffer |
146 | * |
147 | * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. |
148 | */ |
149 | static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb, |
150 | unsigned long offset) |
151 | { |
152 | /* |
153 | * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case |
154 | * 1.1) One large folio covering the whole eb |
155 | * The eb->start is aligned to folio size, thus adding it |
156 | * won't cause any difference. |
157 | * 1.2) Several page sized folios |
158 | * The eb->start is aligned to folio (page) size, thus |
159 | * adding it won't cause any difference. |
160 | * |
161 | * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case |
162 | * In this case there would only be one page sized folio, and there |
163 | * may be several different extent buffers in the page/folio. |
164 | * We need to add eb->start to properly access the offset inside |
165 | * that eb. |
166 | */ |
167 | return offset_in_folio(eb->folios[0], offset + eb->start); |
168 | } |
169 | |
170 | static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb, |
171 | unsigned long offset) |
172 | { |
173 | /* |
174 | * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case |
175 | * 1.1) One large folio covering the whole eb. |
176 | * the folio_shift would be large enough to always make us |
177 | * return 0 as index. |
178 | * 1.2) Several page sized folios |
179 | * The folio_shift would be PAGE_SHIFT, giving us the correct |
180 | * index. |
181 | * |
182 | * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case |
183 | * The folio would only be page sized, and always give us 0 as index. |
184 | */ |
185 | return offset >> eb->folio_shift; |
186 | } |
187 | |
188 | /* |
189 | * Structure to record how many bytes and which ranges are set/cleared |
190 | */ |
191 | struct extent_changeset { |
192 | /* How many bytes are set/cleared in this operation */ |
193 | u64 bytes_changed; |
194 | |
195 | /* Changed ranges */ |
196 | struct ulist range_changed; |
197 | }; |
198 | |
199 | static inline void extent_changeset_init(struct extent_changeset *changeset) |
200 | { |
201 | changeset->bytes_changed = 0; |
202 | ulist_init(ulist: &changeset->range_changed); |
203 | } |
204 | |
205 | static inline struct extent_changeset *extent_changeset_alloc(void) |
206 | { |
207 | struct extent_changeset *ret; |
208 | |
209 | ret = kmalloc(size: sizeof(*ret), GFP_KERNEL); |
210 | if (!ret) |
211 | return NULL; |
212 | |
213 | extent_changeset_init(changeset: ret); |
214 | return ret; |
215 | } |
216 | |
217 | static inline void extent_changeset_release(struct extent_changeset *changeset) |
218 | { |
219 | if (!changeset) |
220 | return; |
221 | changeset->bytes_changed = 0; |
222 | ulist_release(ulist: &changeset->range_changed); |
223 | } |
224 | |
225 | static inline void extent_changeset_free(struct extent_changeset *changeset) |
226 | { |
227 | if (!changeset) |
228 | return; |
229 | extent_changeset_release(changeset); |
230 | kfree(objp: changeset); |
231 | } |
232 | |
233 | int try_release_extent_mapping(struct page *page, gfp_t mask); |
234 | int try_release_extent_buffer(struct page *page); |
235 | |
236 | int btrfs_read_folio(struct file *file, struct folio *folio); |
237 | void extent_write_locked_range(struct inode *inode, struct page *locked_page, |
238 | u64 start, u64 end, struct writeback_control *wbc, |
239 | bool pages_dirty); |
240 | int extent_writepages(struct address_space *mapping, |
241 | struct writeback_control *wbc); |
242 | int btree_write_cache_pages(struct address_space *mapping, |
243 | struct writeback_control *wbc); |
244 | void extent_readahead(struct readahead_control *rac); |
245 | int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, |
246 | u64 start, u64 len); |
247 | int set_folio_extent_mapped(struct folio *folio); |
248 | int set_page_extent_mapped(struct page *page); |
249 | void clear_page_extent_mapped(struct page *page); |
250 | |
251 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
252 | u64 start, u64 owner_root, int level); |
253 | struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
254 | u64 start, unsigned long len); |
255 | struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
256 | u64 start); |
257 | struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src); |
258 | struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, |
259 | u64 start); |
260 | void free_extent_buffer(struct extent_buffer *eb); |
261 | void free_extent_buffer_stale(struct extent_buffer *eb); |
262 | #define WAIT_NONE 0 |
263 | #define WAIT_COMPLETE 1 |
264 | #define WAIT_PAGE_LOCK 2 |
265 | int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, |
266 | struct btrfs_tree_parent_check *parent_check); |
267 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb); |
268 | void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, |
269 | u64 bytenr, u64 owner_root, u64 gen, int level); |
270 | void btrfs_readahead_node_child(struct extent_buffer *node, int slot); |
271 | |
272 | static inline int num_extent_pages(const struct extent_buffer *eb) |
273 | { |
274 | /* |
275 | * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to |
276 | * sectorsize, it's just eb->len >> PAGE_SHIFT. |
277 | * |
278 | * For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE, |
279 | * thus have to ensure we get at least one page. |
280 | */ |
281 | return (eb->len >> PAGE_SHIFT) ?: 1; |
282 | } |
283 | |
284 | /* |
285 | * This can only be determined at runtime by checking eb::folios[0]. |
286 | * |
287 | * As we can have either one large folio covering the whole eb |
288 | * (either nodesize <= PAGE_SIZE, or high order folio), or multiple |
289 | * single-paged folios. |
290 | */ |
291 | static inline int num_extent_folios(const struct extent_buffer *eb) |
292 | { |
293 | if (folio_order(folio: eb->folios[0])) |
294 | return 1; |
295 | return num_extent_pages(eb); |
296 | } |
297 | |
298 | static inline int extent_buffer_uptodate(const struct extent_buffer *eb) |
299 | { |
300 | return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
301 | } |
302 | |
303 | int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, |
304 | unsigned long start, unsigned long len); |
305 | void read_extent_buffer(const struct extent_buffer *eb, void *dst, |
306 | unsigned long start, |
307 | unsigned long len); |
308 | int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, |
309 | void __user *dst, unsigned long start, |
310 | unsigned long len); |
311 | void write_extent_buffer(const struct extent_buffer *eb, const void *src, |
312 | unsigned long start, unsigned long len); |
313 | |
314 | static inline void write_extent_buffer_chunk_tree_uuid( |
315 | const struct extent_buffer *eb, const void *chunk_tree_uuid) |
316 | { |
317 | write_extent_buffer(eb, src: chunk_tree_uuid, |
318 | offsetof(struct btrfs_header, chunk_tree_uuid), |
319 | BTRFS_FSID_SIZE); |
320 | } |
321 | |
322 | static inline void write_extent_buffer_fsid(const struct extent_buffer *eb, |
323 | const void *fsid) |
324 | { |
325 | write_extent_buffer(eb, src: fsid, offsetof(struct btrfs_header, fsid), |
326 | BTRFS_FSID_SIZE); |
327 | } |
328 | |
329 | void copy_extent_buffer_full(const struct extent_buffer *dst, |
330 | const struct extent_buffer *src); |
331 | void copy_extent_buffer(const struct extent_buffer *dst, |
332 | const struct extent_buffer *src, |
333 | unsigned long dst_offset, unsigned long src_offset, |
334 | unsigned long len); |
335 | void memcpy_extent_buffer(const struct extent_buffer *dst, |
336 | unsigned long dst_offset, unsigned long src_offset, |
337 | unsigned long len); |
338 | void memmove_extent_buffer(const struct extent_buffer *dst, |
339 | unsigned long dst_offset, unsigned long src_offset, |
340 | unsigned long len); |
341 | void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, |
342 | unsigned long len); |
343 | int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, |
344 | unsigned long pos); |
345 | void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, |
346 | unsigned long pos, unsigned long len); |
347 | void extent_buffer_bitmap_clear(const struct extent_buffer *eb, |
348 | unsigned long start, unsigned long pos, |
349 | unsigned long len); |
350 | void set_extent_buffer_dirty(struct extent_buffer *eb); |
351 | void set_extent_buffer_uptodate(struct extent_buffer *eb); |
352 | void clear_extent_buffer_uptodate(struct extent_buffer *eb); |
353 | void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); |
354 | void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, |
355 | struct page *locked_page, |
356 | u32 bits_to_clear, unsigned long page_ops); |
357 | int extent_invalidate_folio(struct extent_io_tree *tree, |
358 | struct folio *folio, size_t offset); |
359 | void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, |
360 | struct extent_buffer *buf); |
361 | |
362 | int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, |
363 | gfp_t ); |
364 | |
365 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
366 | bool find_lock_delalloc_range(struct inode *inode, |
367 | struct page *locked_page, u64 *start, |
368 | u64 *end); |
369 | #endif |
370 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
371 | u64 start); |
372 | |
373 | #ifdef CONFIG_BTRFS_DEBUG |
374 | void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info); |
375 | #else |
376 | #define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0) |
377 | #endif |
378 | |
379 | #endif |
380 | |