1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef LINUX_IOMAP_H |
3 | #define LINUX_IOMAP_H 1 |
4 | |
5 | #include <linux/atomic.h> |
6 | #include <linux/bitmap.h> |
7 | #include <linux/blk_types.h> |
8 | #include <linux/mm.h> |
9 | #include <linux/types.h> |
10 | #include <linux/mm_types.h> |
11 | #include <linux/blkdev.h> |
12 | |
13 | struct address_space; |
14 | struct fiemap_extent_info; |
15 | struct inode; |
16 | struct iomap_iter; |
17 | struct iomap_dio; |
18 | struct iomap_writepage_ctx; |
19 | struct iov_iter; |
20 | struct kiocb; |
21 | struct page; |
22 | struct vm_area_struct; |
23 | struct vm_fault; |
24 | |
25 | /* |
26 | * Types of block ranges for iomap mappings: |
27 | */ |
28 | #define IOMAP_HOLE 0 /* no blocks allocated, need allocation */ |
29 | #define IOMAP_DELALLOC 1 /* delayed allocation blocks */ |
30 | #define IOMAP_MAPPED 2 /* blocks allocated at @addr */ |
31 | #define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */ |
32 | #define IOMAP_INLINE 4 /* data inline in the inode */ |
33 | |
34 | /* |
35 | * Flags reported by the file system from iomap_begin: |
36 | * |
37 | * IOMAP_F_NEW indicates that the blocks have been newly allocated and need |
38 | * zeroing for areas that no data is copied to. |
39 | * |
40 | * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access |
41 | * written data and requires fdatasync to commit them to persistent storage. |
42 | * This needs to take into account metadata changes that *may* be made at IO |
43 | * completion, such as file size updates from direct IO. |
44 | * |
45 | * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be |
46 | * unshared as part a write. |
47 | * |
48 | * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block |
49 | * mappings. |
50 | * |
51 | * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of |
52 | * buffer heads for this mapping. |
53 | * |
54 | * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent |
55 | * rather than a file data extent. |
56 | */ |
57 | #define IOMAP_F_NEW (1U << 0) |
58 | #define IOMAP_F_DIRTY (1U << 1) |
59 | #define IOMAP_F_SHARED (1U << 2) |
60 | #define IOMAP_F_MERGED (1U << 3) |
61 | #ifdef CONFIG_BUFFER_HEAD |
62 | #define IOMAP_F_BUFFER_HEAD (1U << 4) |
63 | #else |
64 | #define IOMAP_F_BUFFER_HEAD 0 |
65 | #endif /* CONFIG_BUFFER_HEAD */ |
66 | #define IOMAP_F_XATTR (1U << 5) |
67 | |
68 | /* |
69 | * Flags set by the core iomap code during operations: |
70 | * |
71 | * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size |
72 | * has changed as the result of this write operation. |
73 | * |
74 | * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file |
75 | * range it covers needs to be remapped by the high level before the operation |
76 | * can proceed. |
77 | */ |
78 | #define IOMAP_F_SIZE_CHANGED (1U << 8) |
79 | #define IOMAP_F_STALE (1U << 9) |
80 | |
81 | /* |
82 | * Flags from 0x1000 up are for file system specific usage: |
83 | */ |
84 | #define IOMAP_F_PRIVATE (1U << 12) |
85 | |
86 | |
87 | /* |
88 | * Magic value for addr: |
89 | */ |
90 | #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ |
91 | |
92 | struct iomap_folio_ops; |
93 | |
94 | struct iomap { |
95 | u64 addr; /* disk offset of mapping, bytes */ |
96 | loff_t offset; /* file offset of mapping, bytes */ |
97 | u64 length; /* length of mapping, bytes */ |
98 | u16 type; /* type of mapping */ |
99 | u16 flags; /* flags for mapping */ |
100 | struct block_device *bdev; /* block device for I/O */ |
101 | struct dax_device *dax_dev; /* dax_dev for dax operations */ |
102 | void *inline_data; |
103 | void *private; /* filesystem private */ |
104 | const struct iomap_folio_ops *folio_ops; |
105 | u64 validity_cookie; /* used with .iomap_valid() */ |
106 | }; |
107 | |
108 | static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) |
109 | { |
110 | return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; |
111 | } |
112 | |
113 | /* |
114 | * Returns the inline data pointer for logical offset @pos. |
115 | */ |
116 | static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) |
117 | { |
118 | return iomap->inline_data + pos - iomap->offset; |
119 | } |
120 | |
121 | /* |
122 | * Check if the mapping's length is within the valid range for inline data. |
123 | * This is used to guard against accessing data beyond the page inline_data |
124 | * points at. |
125 | */ |
126 | static inline bool iomap_inline_data_valid(const struct iomap *iomap) |
127 | { |
128 | return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); |
129 | } |
130 | |
131 | /* |
132 | * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio |
133 | * and put_folio will be called for each folio written to. This only applies |
134 | * to buffered writes as unbuffered writes will not typically have folios |
135 | * associated with them. |
136 | * |
137 | * When get_folio succeeds, put_folio will always be called to do any |
138 | * cleanup work necessary. put_folio is responsible for unlocking and putting |
139 | * @folio. |
140 | */ |
141 | struct iomap_folio_ops { |
142 | struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, |
143 | unsigned len); |
144 | void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, |
145 | struct folio *folio); |
146 | |
147 | /* |
148 | * Check that the cached iomap still maps correctly to the filesystem's |
149 | * internal extent map. FS internal extent maps can change while iomap |
150 | * is iterating a cached iomap, so this hook allows iomap to detect that |
151 | * the iomap needs to be refreshed during a long running write |
152 | * operation. |
153 | * |
154 | * The filesystem can store internal state (e.g. a sequence number) in |
155 | * iomap->validity_cookie when the iomap is first mapped to be able to |
156 | * detect changes between mapping time and whenever .iomap_valid() is |
157 | * called. |
158 | * |
159 | * This is called with the folio over the specified file position held |
160 | * locked by the iomap code. |
161 | */ |
162 | bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); |
163 | }; |
164 | |
165 | /* |
166 | * Flags for iomap_begin / iomap_end. No flag implies a read. |
167 | */ |
168 | #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ |
169 | #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ |
170 | #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ |
171 | #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ |
172 | #define IOMAP_DIRECT (1 << 4) /* direct I/O */ |
173 | #define IOMAP_NOWAIT (1 << 5) /* do not block */ |
174 | #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ |
175 | #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ |
176 | #ifdef CONFIG_FS_DAX |
177 | #define IOMAP_DAX (1 << 8) /* DAX mapping */ |
178 | #else |
179 | #define IOMAP_DAX 0 |
180 | #endif /* CONFIG_FS_DAX */ |
181 | |
182 | struct iomap_ops { |
183 | /* |
184 | * Return the existing mapping at pos, or reserve space starting at |
185 | * pos for up to length, as long as we can do it as a single mapping. |
186 | * The actual length is returned in iomap->length. |
187 | */ |
188 | int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, |
189 | unsigned flags, struct iomap *iomap, |
190 | struct iomap *srcmap); |
191 | |
192 | /* |
193 | * Commit and/or unreserve space previous allocated using iomap_begin. |
194 | * Written indicates the length of the successful write operation which |
195 | * needs to be commited, while the rest needs to be unreserved. |
196 | * Written might be zero if no data was written. |
197 | */ |
198 | int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length, |
199 | ssize_t written, unsigned flags, struct iomap *iomap); |
200 | }; |
201 | |
202 | /** |
203 | * struct iomap_iter - Iterate through a range of a file |
204 | * @inode: Set at the start of the iteration and should not change. |
205 | * @pos: The current file position we are operating on. It is updated by |
206 | * calls to iomap_iter(). Treat as read-only in the body. |
207 | * @len: The remaining length of the file segment we're operating on. |
208 | * It is updated at the same time as @pos. |
209 | * @processed: The number of bytes processed by the body in the most recent |
210 | * iteration, or a negative errno. 0 causes the iteration to stop. |
211 | * @flags: Zero or more of the iomap_begin flags above. |
212 | * @iomap: Map describing the I/O iteration |
213 | * @srcmap: Source map for COW operations |
214 | */ |
215 | struct iomap_iter { |
216 | struct inode *inode; |
217 | loff_t pos; |
218 | u64 len; |
219 | s64 processed; |
220 | unsigned flags; |
221 | struct iomap iomap; |
222 | struct iomap srcmap; |
223 | void *private; |
224 | }; |
225 | |
226 | int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); |
227 | |
228 | /** |
229 | * iomap_length - length of the current iomap iteration |
230 | * @iter: iteration structure |
231 | * |
232 | * Returns the length that the operation applies to for the current iteration. |
233 | */ |
234 | static inline u64 iomap_length(const struct iomap_iter *iter) |
235 | { |
236 | u64 end = iter->iomap.offset + iter->iomap.length; |
237 | |
238 | if (iter->srcmap.type != IOMAP_HOLE) |
239 | end = min(end, iter->srcmap.offset + iter->srcmap.length); |
240 | return min(iter->len, end - iter->pos); |
241 | } |
242 | |
243 | /** |
244 | * iomap_iter_srcmap - return the source map for the current iomap iteration |
245 | * @i: iteration structure |
246 | * |
247 | * Write operations on file systems with reflink support might require a |
248 | * source and a destination map. This function retourns the source map |
249 | * for a given operation, which may or may no be identical to the destination |
250 | * map in &i->iomap. |
251 | */ |
252 | static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) |
253 | { |
254 | if (i->srcmap.type != IOMAP_HOLE) |
255 | return &i->srcmap; |
256 | return &i->iomap; |
257 | } |
258 | |
259 | ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, |
260 | const struct iomap_ops *ops); |
261 | int iomap_file_buffered_write_punch_delalloc(struct inode *inode, |
262 | struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, |
263 | int (*punch)(struct inode *inode, loff_t pos, loff_t length)); |
264 | |
265 | int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); |
266 | void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); |
267 | bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); |
268 | struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); |
269 | bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); |
270 | void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); |
271 | bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); |
272 | int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, |
273 | const struct iomap_ops *ops); |
274 | int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, |
275 | bool *did_zero, const struct iomap_ops *ops); |
276 | int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, |
277 | const struct iomap_ops *ops); |
278 | vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, |
279 | const struct iomap_ops *ops); |
280 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
281 | u64 start, u64 len, const struct iomap_ops *ops); |
282 | loff_t iomap_seek_hole(struct inode *inode, loff_t offset, |
283 | const struct iomap_ops *ops); |
284 | loff_t iomap_seek_data(struct inode *inode, loff_t offset, |
285 | const struct iomap_ops *ops); |
286 | sector_t iomap_bmap(struct address_space *mapping, sector_t bno, |
287 | const struct iomap_ops *ops); |
288 | |
289 | /* |
290 | * Structure for writeback I/O completions. |
291 | */ |
292 | struct iomap_ioend { |
293 | struct list_head io_list; /* next ioend in chain */ |
294 | u16 io_type; |
295 | u16 io_flags; /* IOMAP_F_* */ |
296 | u32 io_folios; /* folios added to ioend */ |
297 | struct inode *io_inode; /* file being written to */ |
298 | size_t io_size; /* size of the extent */ |
299 | loff_t io_offset; /* offset in the file */ |
300 | sector_t io_sector; /* start sector of ioend */ |
301 | struct bio *io_bio; /* bio being built */ |
302 | struct bio io_inline_bio; /* MUST BE LAST! */ |
303 | }; |
304 | |
305 | struct iomap_writeback_ops { |
306 | /* |
307 | * Required, maps the blocks so that writeback can be performed on |
308 | * the range starting at offset. |
309 | */ |
310 | int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, |
311 | loff_t offset); |
312 | |
313 | /* |
314 | * Optional, allows the file systems to perform actions just before |
315 | * submitting the bio and/or override the bio end_io handler for complex |
316 | * operations like copy on write extent manipulation or unwritten extent |
317 | * conversions. |
318 | */ |
319 | int (*prepare_ioend)(struct iomap_ioend *ioend, int status); |
320 | |
321 | /* |
322 | * Optional, allows the file system to discard state on a page where |
323 | * we failed to submit any I/O. |
324 | */ |
325 | void (*discard_folio)(struct folio *folio, loff_t pos); |
326 | }; |
327 | |
328 | struct iomap_writepage_ctx { |
329 | struct iomap iomap; |
330 | struct iomap_ioend *ioend; |
331 | const struct iomap_writeback_ops *ops; |
332 | }; |
333 | |
334 | void iomap_finish_ioends(struct iomap_ioend *ioend, int error); |
335 | void iomap_ioend_try_merge(struct iomap_ioend *ioend, |
336 | struct list_head *more_ioends); |
337 | void iomap_sort_ioends(struct list_head *ioend_list); |
338 | int iomap_writepages(struct address_space *mapping, |
339 | struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, |
340 | const struct iomap_writeback_ops *ops); |
341 | |
342 | /* |
343 | * Flags for direct I/O ->end_io: |
344 | */ |
345 | #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ |
346 | #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ |
347 | |
348 | struct iomap_dio_ops { |
349 | int (*end_io)(struct kiocb *iocb, ssize_t size, int error, |
350 | unsigned flags); |
351 | void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, |
352 | loff_t file_offset); |
353 | |
354 | /* |
355 | * Filesystems wishing to attach private information to a direct io bio |
356 | * must provide a ->submit_io method that attaches the additional |
357 | * information to the bio and changes the ->bi_end_io callback to a |
358 | * custom function. This function should, at a minimum, perform any |
359 | * relevant post-processing of the bio and end with a call to |
360 | * iomap_dio_bio_end_io. |
361 | */ |
362 | struct bio_set *bio_set; |
363 | }; |
364 | |
365 | /* |
366 | * Wait for the I/O to complete in iomap_dio_rw even if the kiocb is not |
367 | * synchronous. |
368 | */ |
369 | #define IOMAP_DIO_FORCE_WAIT (1 << 0) |
370 | |
371 | /* |
372 | * Do not allocate blocks or zero partial blocks, but instead fall back to |
373 | * the caller by returning -EAGAIN. Used to optimize direct I/O writes that |
374 | * are not aligned to the file system block size. |
375 | */ |
376 | #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) |
377 | |
378 | /* |
379 | * When a page fault occurs, return a partial synchronous result and allow |
380 | * the caller to retry the rest of the operation after dealing with the page |
381 | * fault. |
382 | */ |
383 | #define IOMAP_DIO_PARTIAL (1 << 2) |
384 | |
385 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
386 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops, |
387 | unsigned int dio_flags, void *private, size_t done_before); |
388 | struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
389 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops, |
390 | unsigned int dio_flags, void *private, size_t done_before); |
391 | ssize_t iomap_dio_complete(struct iomap_dio *dio); |
392 | void iomap_dio_bio_end_io(struct bio *bio); |
393 | |
394 | #ifdef CONFIG_SWAP |
395 | struct file; |
396 | struct swap_info_struct; |
397 | |
398 | int iomap_swapfile_activate(struct swap_info_struct *sis, |
399 | struct file *swap_file, sector_t *pagespan, |
400 | const struct iomap_ops *ops); |
401 | #else |
402 | # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) |
403 | #endif /* CONFIG_SWAP */ |
404 | |
405 | #endif /* LINUX_IOMAP_H */ |
406 | |