1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * Berkeley style UIO structures - Alan Cox 1994. |
4 | */ |
5 | #ifndef __LINUX_UIO_H |
6 | #define __LINUX_UIO_H |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/thread_info.h> |
10 | #include <linux/mm_types.h> |
11 | #include <uapi/linux/uio.h> |
12 | |
13 | struct page; |
14 | |
15 | typedef unsigned int __bitwise ; |
16 | |
17 | struct kvec { |
18 | void *iov_base; /* and that should *never* hold a userland pointer */ |
19 | size_t iov_len; |
20 | }; |
21 | |
22 | enum iter_type { |
23 | /* iter types */ |
24 | ITER_UBUF, |
25 | ITER_IOVEC, |
26 | ITER_BVEC, |
27 | ITER_KVEC, |
28 | ITER_XARRAY, |
29 | ITER_DISCARD, |
30 | }; |
31 | |
32 | #define ITER_SOURCE 1 // == WRITE |
33 | #define ITER_DEST 0 // == READ |
34 | |
35 | struct iov_iter_state { |
36 | size_t iov_offset; |
37 | size_t count; |
38 | unsigned long nr_segs; |
39 | }; |
40 | |
41 | struct iov_iter { |
42 | u8 iter_type; |
43 | bool nofault; |
44 | bool data_source; |
45 | size_t iov_offset; |
46 | /* |
47 | * Hack alert: overlay ubuf_iovec with iovec + count, so |
48 | * that the members resolve correctly regardless of the type |
49 | * of iterator used. This means that you can use: |
50 | * |
51 | * &iter->__ubuf_iovec or iter->__iov |
52 | * |
53 | * interchangably for the user_backed cases, hence simplifying |
54 | * some of the cases that need to deal with both. |
55 | */ |
56 | union { |
57 | /* |
58 | * This really should be a const, but we cannot do that without |
59 | * also modifying any of the zero-filling iter init functions. |
60 | * Leave it non-const for now, but it should be treated as such. |
61 | */ |
62 | struct iovec __ubuf_iovec; |
63 | struct { |
64 | union { |
65 | /* use iter_iov() to get the current vec */ |
66 | const struct iovec *__iov; |
67 | const struct kvec *kvec; |
68 | const struct bio_vec *bvec; |
69 | struct xarray *xarray; |
70 | void __user *ubuf; |
71 | }; |
72 | size_t count; |
73 | }; |
74 | }; |
75 | union { |
76 | unsigned long nr_segs; |
77 | loff_t xarray_start; |
78 | }; |
79 | }; |
80 | |
81 | static inline const struct iovec *iter_iov(const struct iov_iter *iter) |
82 | { |
83 | if (iter->iter_type == ITER_UBUF) |
84 | return (const struct iovec *) &iter->__ubuf_iovec; |
85 | return iter->__iov; |
86 | } |
87 | |
88 | #define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) |
89 | #define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset) |
90 | |
91 | static inline enum iter_type iov_iter_type(const struct iov_iter *i) |
92 | { |
93 | return i->iter_type; |
94 | } |
95 | |
96 | static inline void iov_iter_save_state(struct iov_iter *iter, |
97 | struct iov_iter_state *state) |
98 | { |
99 | state->iov_offset = iter->iov_offset; |
100 | state->count = iter->count; |
101 | state->nr_segs = iter->nr_segs; |
102 | } |
103 | |
104 | static inline bool iter_is_ubuf(const struct iov_iter *i) |
105 | { |
106 | return iov_iter_type(i) == ITER_UBUF; |
107 | } |
108 | |
109 | static inline bool iter_is_iovec(const struct iov_iter *i) |
110 | { |
111 | return iov_iter_type(i) == ITER_IOVEC; |
112 | } |
113 | |
114 | static inline bool iov_iter_is_kvec(const struct iov_iter *i) |
115 | { |
116 | return iov_iter_type(i) == ITER_KVEC; |
117 | } |
118 | |
119 | static inline bool iov_iter_is_bvec(const struct iov_iter *i) |
120 | { |
121 | return iov_iter_type(i) == ITER_BVEC; |
122 | } |
123 | |
124 | static inline bool iov_iter_is_discard(const struct iov_iter *i) |
125 | { |
126 | return iov_iter_type(i) == ITER_DISCARD; |
127 | } |
128 | |
129 | static inline bool iov_iter_is_xarray(const struct iov_iter *i) |
130 | { |
131 | return iov_iter_type(i) == ITER_XARRAY; |
132 | } |
133 | |
134 | static inline unsigned char iov_iter_rw(const struct iov_iter *i) |
135 | { |
136 | return i->data_source ? WRITE : READ; |
137 | } |
138 | |
139 | static inline bool user_backed_iter(const struct iov_iter *i) |
140 | { |
141 | return iter_is_ubuf(i) || iter_is_iovec(i); |
142 | } |
143 | |
144 | /* |
145 | * Total number of bytes covered by an iovec. |
146 | * |
147 | * NOTE that it is not safe to use this function until all the iovec's |
148 | * segment lengths have been validated. Because the individual lengths can |
149 | * overflow a size_t when added together. |
150 | */ |
151 | static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) |
152 | { |
153 | unsigned long seg; |
154 | size_t ret = 0; |
155 | |
156 | for (seg = 0; seg < nr_segs; seg++) |
157 | ret += iov[seg].iov_len; |
158 | return ret; |
159 | } |
160 | |
161 | size_t copy_page_from_iter_atomic(struct page *page, size_t offset, |
162 | size_t bytes, struct iov_iter *i); |
163 | void iov_iter_advance(struct iov_iter *i, size_t bytes); |
164 | void iov_iter_revert(struct iov_iter *i, size_t bytes); |
165 | size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); |
166 | size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); |
167 | size_t iov_iter_single_seg_count(const struct iov_iter *i); |
168 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, |
169 | struct iov_iter *i); |
170 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, |
171 | struct iov_iter *i); |
172 | |
173 | size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); |
174 | size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); |
175 | size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); |
176 | |
177 | static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, |
178 | size_t bytes, struct iov_iter *i) |
179 | { |
180 | return copy_page_to_iter(page: &folio->page, offset, bytes, i); |
181 | } |
182 | |
183 | static inline size_t copy_folio_from_iter_atomic(struct folio *folio, |
184 | size_t offset, size_t bytes, struct iov_iter *i) |
185 | { |
186 | return copy_page_from_iter_atomic(page: &folio->page, offset, bytes, i); |
187 | } |
188 | |
189 | size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, |
190 | size_t bytes, struct iov_iter *i); |
191 | |
192 | static __always_inline __must_check |
193 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
194 | { |
195 | if (check_copy_size(addr, bytes, is_source: true)) |
196 | return _copy_to_iter(addr, bytes, i); |
197 | return 0; |
198 | } |
199 | |
200 | static __always_inline __must_check |
201 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
202 | { |
203 | if (check_copy_size(addr, bytes, is_source: false)) |
204 | return _copy_from_iter(addr, bytes, i); |
205 | return 0; |
206 | } |
207 | |
208 | static __always_inline __must_check |
209 | bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) |
210 | { |
211 | size_t copied = copy_from_iter(addr, bytes, i); |
212 | if (likely(copied == bytes)) |
213 | return true; |
214 | iov_iter_revert(i, bytes: copied); |
215 | return false; |
216 | } |
217 | |
218 | static __always_inline __must_check |
219 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
220 | { |
221 | if (check_copy_size(addr, bytes, is_source: false)) |
222 | return _copy_from_iter_nocache(addr, bytes, i); |
223 | return 0; |
224 | } |
225 | |
226 | static __always_inline __must_check |
227 | bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) |
228 | { |
229 | size_t copied = copy_from_iter_nocache(addr, bytes, i); |
230 | if (likely(copied == bytes)) |
231 | return true; |
232 | iov_iter_revert(i, bytes: copied); |
233 | return false; |
234 | } |
235 | |
236 | #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE |
237 | /* |
238 | * Note, users like pmem that depend on the stricter semantics of |
239 | * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for |
240 | * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the |
241 | * destination is flushed from the cache on return. |
242 | */ |
243 | size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); |
244 | #else |
245 | #define _copy_from_iter_flushcache _copy_from_iter_nocache |
246 | #endif |
247 | |
248 | #ifdef CONFIG_ARCH_HAS_COPY_MC |
249 | size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); |
250 | #else |
251 | #define _copy_mc_to_iter _copy_to_iter |
252 | #endif |
253 | |
254 | size_t iov_iter_zero(size_t bytes, struct iov_iter *); |
255 | bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, |
256 | unsigned len_mask); |
257 | unsigned long iov_iter_alignment(const struct iov_iter *i); |
258 | unsigned long iov_iter_gap_alignment(const struct iov_iter *i); |
259 | void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, |
260 | unsigned long nr_segs, size_t count); |
261 | void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, |
262 | unsigned long nr_segs, size_t count); |
263 | void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, |
264 | unsigned long nr_segs, size_t count); |
265 | void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); |
266 | void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, |
267 | loff_t start, size_t count); |
268 | ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, |
269 | size_t maxsize, unsigned maxpages, size_t *start); |
270 | ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, |
271 | size_t maxsize, size_t *start); |
272 | int iov_iter_npages(const struct iov_iter *i, int maxpages); |
273 | void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); |
274 | |
275 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); |
276 | |
277 | static inline size_t iov_iter_count(const struct iov_iter *i) |
278 | { |
279 | return i->count; |
280 | } |
281 | |
282 | /* |
283 | * Cap the iov_iter by given limit; note that the second argument is |
284 | * *not* the new size - it's upper limit for such. Passing it a value |
285 | * greater than the amount of data in iov_iter is fine - it'll just do |
286 | * nothing in that case. |
287 | */ |
288 | static inline void iov_iter_truncate(struct iov_iter *i, u64 count) |
289 | { |
290 | /* |
291 | * count doesn't have to fit in size_t - comparison extends both |
292 | * operands to u64 here and any value that would be truncated by |
293 | * conversion in assignement is by definition greater than all |
294 | * values of size_t, including old i->count. |
295 | */ |
296 | if (i->count > count) |
297 | i->count = count; |
298 | } |
299 | |
300 | /* |
301 | * reexpand a previously truncated iterator; count must be no more than how much |
302 | * we had shrunk it. |
303 | */ |
304 | static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) |
305 | { |
306 | i->count = count; |
307 | } |
308 | |
309 | static inline int |
310 | iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) |
311 | { |
312 | size_t shorted = 0; |
313 | int npages; |
314 | |
315 | if (iov_iter_count(i) > max_bytes) { |
316 | shorted = iov_iter_count(i) - max_bytes; |
317 | iov_iter_truncate(i, count: max_bytes); |
318 | } |
319 | npages = iov_iter_npages(i, maxpages); |
320 | if (shorted) |
321 | iov_iter_reexpand(i, count: iov_iter_count(i) + shorted); |
322 | |
323 | return npages; |
324 | } |
325 | |
326 | struct iovec *iovec_from_user(const struct iovec __user *uvector, |
327 | unsigned long nr_segs, unsigned long fast_segs, |
328 | struct iovec *fast_iov, bool compat); |
329 | ssize_t import_iovec(int type, const struct iovec __user *uvec, |
330 | unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, |
331 | struct iov_iter *i); |
332 | ssize_t __import_iovec(int type, const struct iovec __user *uvec, |
333 | unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, |
334 | struct iov_iter *i, bool compat); |
335 | int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); |
336 | |
337 | static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, |
338 | void __user *buf, size_t count) |
339 | { |
340 | WARN_ON(direction & ~(READ | WRITE)); |
341 | *i = (struct iov_iter) { |
342 | .iter_type = ITER_UBUF, |
343 | .data_source = direction, |
344 | .ubuf = buf, |
345 | .count = count, |
346 | .nr_segs = 1 |
347 | }; |
348 | } |
349 | /* Flags for iov_iter_get/extract_pages*() */ |
350 | /* Allow P2PDMA on the extracted pages */ |
351 | #define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) |
352 | |
353 | ssize_t (struct iov_iter *i, struct page ***pages, |
354 | size_t maxsize, unsigned int maxpages, |
355 | iov_iter_extraction_t , |
356 | size_t *offset0); |
357 | |
358 | /** |
359 | * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained |
360 | * @iter: The iterator |
361 | * |
362 | * Examine the iterator and indicate by returning true or false as to how, if |
363 | * at all, pages extracted from the iterator will be retained by the extraction |
364 | * function. |
365 | * |
366 | * %true indicates that the pages will have a pin placed in them that the |
367 | * caller must unpin. This is must be done for DMA/async DIO to force fork() |
368 | * to forcibly copy a page for the child (the parent must retain the original |
369 | * page). |
370 | * |
371 | * %false indicates that no measures are taken and that it's up to the caller |
372 | * to retain the pages. |
373 | */ |
374 | static inline bool (const struct iov_iter *iter) |
375 | { |
376 | return user_backed_iter(i: iter); |
377 | } |
378 | |
379 | struct sg_table; |
380 | ssize_t (struct iov_iter *iter, size_t len, |
381 | struct sg_table *sgtable, unsigned int sg_max, |
382 | iov_iter_extraction_t ); |
383 | |
384 | #endif |
385 | |