1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/fs/read_write.c |
4 | * |
5 | * Copyright (C) 1991, 1992 Linus Torvalds |
6 | */ |
7 | |
8 | #include <linux/slab.h> |
9 | #include <linux/stat.h> |
10 | #include <linux/sched/xacct.h> |
11 | #include <linux/fcntl.h> |
12 | #include <linux/file.h> |
13 | #include <linux/uio.h> |
14 | #include <linux/fsnotify.h> |
15 | #include <linux/security.h> |
16 | #include <linux/export.h> |
17 | #include <linux/syscalls.h> |
18 | #include <linux/pagemap.h> |
19 | #include <linux/splice.h> |
20 | #include <linux/compat.h> |
21 | #include <linux/mount.h> |
22 | #include <linux/fs.h> |
23 | #include "internal.h" |
24 | |
25 | #include <linux/uaccess.h> |
26 | #include <asm/unistd.h> |
27 | |
28 | const struct file_operations generic_ro_fops = { |
29 | .llseek = generic_file_llseek, |
30 | .read_iter = generic_file_read_iter, |
31 | .mmap = generic_file_readonly_mmap, |
32 | .splice_read = generic_file_splice_read, |
33 | }; |
34 | |
35 | EXPORT_SYMBOL(generic_ro_fops); |
36 | |
37 | static inline bool unsigned_offsets(struct file *file) |
38 | { |
39 | return file->f_mode & FMODE_UNSIGNED_OFFSET; |
40 | } |
41 | |
42 | /** |
43 | * vfs_setpos - update the file offset for lseek |
44 | * @file: file structure in question |
45 | * @offset: file offset to seek to |
46 | * @maxsize: maximum file size |
47 | * |
48 | * This is a low-level filesystem helper for updating the file offset to |
49 | * the value specified by @offset if the given offset is valid and it is |
50 | * not equal to the current file offset. |
51 | * |
52 | * Return the specified offset on success and -EINVAL on invalid offset. |
53 | */ |
54 | loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) |
55 | { |
56 | if (offset < 0 && !unsigned_offsets(file)) |
57 | return -EINVAL; |
58 | if (offset > maxsize) |
59 | return -EINVAL; |
60 | |
61 | if (offset != file->f_pos) { |
62 | file->f_pos = offset; |
63 | file->f_version = 0; |
64 | } |
65 | return offset; |
66 | } |
67 | EXPORT_SYMBOL(vfs_setpos); |
68 | |
69 | /** |
70 | * generic_file_llseek_size - generic llseek implementation for regular files |
71 | * @file: file structure to seek on |
72 | * @offset: file offset to seek to |
73 | * @whence: type of seek |
74 | * @size: max size of this file in file system |
75 | * @eof: offset used for SEEK_END position |
76 | * |
77 | * This is a variant of generic_file_llseek that allows passing in a custom |
78 | * maximum file size and a custom EOF position, for e.g. hashed directories |
79 | * |
80 | * Synchronization: |
81 | * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) |
82 | * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. |
83 | * read/writes behave like SEEK_SET against seeks. |
84 | */ |
85 | loff_t |
86 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, |
87 | loff_t maxsize, loff_t eof) |
88 | { |
89 | switch (whence) { |
90 | case SEEK_END: |
91 | offset += eof; |
92 | break; |
93 | case SEEK_CUR: |
94 | /* |
95 | * Here we special-case the lseek(fd, 0, SEEK_CUR) |
96 | * position-querying operation. Avoid rewriting the "same" |
97 | * f_pos value back to the file because a concurrent read(), |
98 | * write() or lseek() might have altered it |
99 | */ |
100 | if (offset == 0) |
101 | return file->f_pos; |
102 | /* |
103 | * f_lock protects against read/modify/write race with other |
104 | * SEEK_CURs. Note that parallel writes and reads behave |
105 | * like SEEK_SET. |
106 | */ |
107 | spin_lock(lock: &file->f_lock); |
108 | offset = vfs_setpos(file, offset: file->f_pos + offset, maxsize); |
109 | spin_unlock(lock: &file->f_lock); |
110 | return offset; |
111 | case SEEK_DATA: |
112 | /* |
113 | * In the generic case the entire file is data, so as long as |
114 | * offset isn't at the end of the file then the offset is data. |
115 | */ |
116 | if ((unsigned long long)offset >= eof) |
117 | return -ENXIO; |
118 | break; |
119 | case SEEK_HOLE: |
120 | /* |
121 | * There is a virtual hole at the end of the file, so as long as |
122 | * offset isn't i_size or larger, return i_size. |
123 | */ |
124 | if ((unsigned long long)offset >= eof) |
125 | return -ENXIO; |
126 | offset = eof; |
127 | break; |
128 | } |
129 | |
130 | return vfs_setpos(file, offset, maxsize); |
131 | } |
132 | EXPORT_SYMBOL(generic_file_llseek_size); |
133 | |
134 | /** |
135 | * generic_file_llseek - generic llseek implementation for regular files |
136 | * @file: file structure to seek on |
137 | * @offset: file offset to seek to |
138 | * @whence: type of seek |
139 | * |
140 | * This is a generic implemenation of ->llseek useable for all normal local |
141 | * filesystems. It just updates the file offset to the value specified by |
142 | * @offset and @whence. |
143 | */ |
144 | loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) |
145 | { |
146 | struct inode *inode = file->f_mapping->host; |
147 | |
148 | return generic_file_llseek_size(file, offset, whence, |
149 | maxsize: inode->i_sb->s_maxbytes, |
150 | eof: i_size_read(inode)); |
151 | } |
152 | EXPORT_SYMBOL(generic_file_llseek); |
153 | |
154 | /** |
155 | * fixed_size_llseek - llseek implementation for fixed-sized devices |
156 | * @file: file structure to seek on |
157 | * @offset: file offset to seek to |
158 | * @whence: type of seek |
159 | * @size: size of the file |
160 | * |
161 | */ |
162 | loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) |
163 | { |
164 | switch (whence) { |
165 | case SEEK_SET: case SEEK_CUR: case SEEK_END: |
166 | return generic_file_llseek_size(file, offset, whence, |
167 | maxsize: size, eof: size); |
168 | default: |
169 | return -EINVAL; |
170 | } |
171 | } |
172 | EXPORT_SYMBOL(fixed_size_llseek); |
173 | |
174 | /** |
175 | * no_seek_end_llseek - llseek implementation for fixed-sized devices |
176 | * @file: file structure to seek on |
177 | * @offset: file offset to seek to |
178 | * @whence: type of seek |
179 | * |
180 | */ |
181 | loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) |
182 | { |
183 | switch (whence) { |
184 | case SEEK_SET: case SEEK_CUR: |
185 | return generic_file_llseek_size(file, offset, whence, |
186 | OFFSET_MAX, eof: 0); |
187 | default: |
188 | return -EINVAL; |
189 | } |
190 | } |
191 | EXPORT_SYMBOL(no_seek_end_llseek); |
192 | |
193 | /** |
194 | * no_seek_end_llseek_size - llseek implementation for fixed-sized devices |
195 | * @file: file structure to seek on |
196 | * @offset: file offset to seek to |
197 | * @whence: type of seek |
198 | * @size: maximal offset allowed |
199 | * |
200 | */ |
201 | loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) |
202 | { |
203 | switch (whence) { |
204 | case SEEK_SET: case SEEK_CUR: |
205 | return generic_file_llseek_size(file, offset, whence, |
206 | maxsize: size, eof: 0); |
207 | default: |
208 | return -EINVAL; |
209 | } |
210 | } |
211 | EXPORT_SYMBOL(no_seek_end_llseek_size); |
212 | |
213 | /** |
214 | * noop_llseek - No Operation Performed llseek implementation |
215 | * @file: file structure to seek on |
216 | * @offset: file offset to seek to |
217 | * @whence: type of seek |
218 | * |
219 | * This is an implementation of ->llseek useable for the rare special case when |
220 | * userspace expects the seek to succeed but the (device) file is actually not |
221 | * able to perform the seek. In this case you use noop_llseek() instead of |
222 | * falling back to the default implementation of ->llseek. |
223 | */ |
224 | loff_t noop_llseek(struct file *file, loff_t offset, int whence) |
225 | { |
226 | return file->f_pos; |
227 | } |
228 | EXPORT_SYMBOL(noop_llseek); |
229 | |
230 | loff_t default_llseek(struct file *file, loff_t offset, int whence) |
231 | { |
232 | struct inode *inode = file_inode(f: file); |
233 | loff_t retval; |
234 | |
235 | inode_lock(inode); |
236 | switch (whence) { |
237 | case SEEK_END: |
238 | offset += i_size_read(inode); |
239 | break; |
240 | case SEEK_CUR: |
241 | if (offset == 0) { |
242 | retval = file->f_pos; |
243 | goto out; |
244 | } |
245 | offset += file->f_pos; |
246 | break; |
247 | case SEEK_DATA: |
248 | /* |
249 | * In the generic case the entire file is data, so as |
250 | * long as offset isn't at the end of the file then the |
251 | * offset is data. |
252 | */ |
253 | if (offset >= inode->i_size) { |
254 | retval = -ENXIO; |
255 | goto out; |
256 | } |
257 | break; |
258 | case SEEK_HOLE: |
259 | /* |
260 | * There is a virtual hole at the end of the file, so |
261 | * as long as offset isn't i_size or larger, return |
262 | * i_size. |
263 | */ |
264 | if (offset >= inode->i_size) { |
265 | retval = -ENXIO; |
266 | goto out; |
267 | } |
268 | offset = inode->i_size; |
269 | break; |
270 | } |
271 | retval = -EINVAL; |
272 | if (offset >= 0 || unsigned_offsets(file)) { |
273 | if (offset != file->f_pos) { |
274 | file->f_pos = offset; |
275 | file->f_version = 0; |
276 | } |
277 | retval = offset; |
278 | } |
279 | out: |
280 | inode_unlock(inode); |
281 | return retval; |
282 | } |
283 | EXPORT_SYMBOL(default_llseek); |
284 | |
285 | loff_t vfs_llseek(struct file *file, loff_t offset, int whence) |
286 | { |
287 | if (!(file->f_mode & FMODE_LSEEK)) |
288 | return -ESPIPE; |
289 | return file->f_op->llseek(file, offset, whence); |
290 | } |
291 | EXPORT_SYMBOL(vfs_llseek); |
292 | |
293 | static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) |
294 | { |
295 | off_t retval; |
296 | struct fd f = fdget_pos(fd); |
297 | if (!f.file) |
298 | return -EBADF; |
299 | |
300 | retval = -EINVAL; |
301 | if (whence <= SEEK_MAX) { |
302 | loff_t res = vfs_llseek(file: f.file, offset, whence); |
303 | retval = res; |
304 | if (res != (loff_t)retval) |
305 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ |
306 | } |
307 | fdput_pos(f); |
308 | return retval; |
309 | } |
310 | |
311 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) |
312 | { |
313 | return ksys_lseek(fd, offset, whence); |
314 | } |
315 | |
316 | #ifdef CONFIG_COMPAT |
317 | COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) |
318 | { |
319 | return ksys_lseek(fd, offset, whence); |
320 | } |
321 | #endif |
322 | |
323 | #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ |
324 | defined(__ARCH_WANT_SYS_LLSEEK) |
325 | SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, |
326 | unsigned long, offset_low, loff_t __user *, result, |
327 | unsigned int, whence) |
328 | { |
329 | int retval; |
330 | struct fd f = fdget_pos(fd); |
331 | loff_t offset; |
332 | |
333 | if (!f.file) |
334 | return -EBADF; |
335 | |
336 | retval = -EINVAL; |
337 | if (whence > SEEK_MAX) |
338 | goto out_putf; |
339 | |
340 | offset = vfs_llseek(file: f.file, offset: ((loff_t) offset_high << 32) | offset_low, |
341 | whence); |
342 | |
343 | retval = (int)offset; |
344 | if (offset >= 0) { |
345 | retval = -EFAULT; |
346 | if (!copy_to_user(to: result, from: &offset, n: sizeof(offset))) |
347 | retval = 0; |
348 | } |
349 | out_putf: |
350 | fdput_pos(f); |
351 | return retval; |
352 | } |
353 | #endif |
354 | |
355 | int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) |
356 | { |
357 | if (unlikely((ssize_t) count < 0)) |
358 | return -EINVAL; |
359 | |
360 | if (ppos) { |
361 | loff_t pos = *ppos; |
362 | |
363 | if (unlikely(pos < 0)) { |
364 | if (!unsigned_offsets(file)) |
365 | return -EINVAL; |
366 | if (count >= -pos) /* both values are in 0..LLONG_MAX */ |
367 | return -EOVERFLOW; |
368 | } else if (unlikely((loff_t) (pos + count) < 0)) { |
369 | if (!unsigned_offsets(file)) |
370 | return -EINVAL; |
371 | } |
372 | } |
373 | |
374 | return security_file_permission(file, |
375 | mask: read_write == READ ? MAY_READ : MAY_WRITE); |
376 | } |
377 | EXPORT_SYMBOL(rw_verify_area); |
378 | |
379 | static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) |
380 | { |
381 | struct kiocb kiocb; |
382 | struct iov_iter iter; |
383 | ssize_t ret; |
384 | |
385 | init_sync_kiocb(kiocb: &kiocb, filp); |
386 | kiocb.ki_pos = (ppos ? *ppos : 0); |
387 | iov_iter_ubuf(i: &iter, ITER_DEST, buf, count: len); |
388 | |
389 | ret = call_read_iter(file: filp, kio: &kiocb, iter: &iter); |
390 | BUG_ON(ret == -EIOCBQUEUED); |
391 | if (ppos) |
392 | *ppos = kiocb.ki_pos; |
393 | return ret; |
394 | } |
395 | |
396 | static int warn_unsupported(struct file *file, const char *op) |
397 | { |
398 | pr_warn_ratelimited( |
399 | "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n" , |
400 | op, file, current->pid, current->comm); |
401 | return -EINVAL; |
402 | } |
403 | |
404 | ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) |
405 | { |
406 | struct kvec iov = { |
407 | .iov_base = buf, |
408 | .iov_len = min_t(size_t, count, MAX_RW_COUNT), |
409 | }; |
410 | struct kiocb kiocb; |
411 | struct iov_iter iter; |
412 | ssize_t ret; |
413 | |
414 | if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) |
415 | return -EINVAL; |
416 | if (!(file->f_mode & FMODE_CAN_READ)) |
417 | return -EINVAL; |
418 | /* |
419 | * Also fail if ->read_iter and ->read are both wired up as that |
420 | * implies very convoluted semantics. |
421 | */ |
422 | if (unlikely(!file->f_op->read_iter || file->f_op->read)) |
423 | return warn_unsupported(file, op: "read" ); |
424 | |
425 | init_sync_kiocb(kiocb: &kiocb, filp: file); |
426 | kiocb.ki_pos = pos ? *pos : 0; |
427 | iov_iter_kvec(i: &iter, ITER_DEST, kvec: &iov, nr_segs: 1, count: iov.iov_len); |
428 | ret = file->f_op->read_iter(&kiocb, &iter); |
429 | if (ret > 0) { |
430 | if (pos) |
431 | *pos = kiocb.ki_pos; |
432 | fsnotify_access(file); |
433 | add_rchar(current, amt: ret); |
434 | } |
435 | inc_syscr(current); |
436 | return ret; |
437 | } |
438 | |
439 | ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) |
440 | { |
441 | ssize_t ret; |
442 | |
443 | ret = rw_verify_area(READ, file, ppos: pos, count); |
444 | if (ret) |
445 | return ret; |
446 | return __kernel_read(file, buf, count, pos); |
447 | } |
448 | EXPORT_SYMBOL(kernel_read); |
449 | |
450 | ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) |
451 | { |
452 | ssize_t ret; |
453 | |
454 | if (!(file->f_mode & FMODE_READ)) |
455 | return -EBADF; |
456 | if (!(file->f_mode & FMODE_CAN_READ)) |
457 | return -EINVAL; |
458 | if (unlikely(!access_ok(buf, count))) |
459 | return -EFAULT; |
460 | |
461 | ret = rw_verify_area(READ, file, ppos: pos, count); |
462 | if (ret) |
463 | return ret; |
464 | if (count > MAX_RW_COUNT) |
465 | count = MAX_RW_COUNT; |
466 | |
467 | if (file->f_op->read) |
468 | ret = file->f_op->read(file, buf, count, pos); |
469 | else if (file->f_op->read_iter) |
470 | ret = new_sync_read(filp: file, buf, len: count, ppos: pos); |
471 | else |
472 | ret = -EINVAL; |
473 | if (ret > 0) { |
474 | fsnotify_access(file); |
475 | add_rchar(current, amt: ret); |
476 | } |
477 | inc_syscr(current); |
478 | return ret; |
479 | } |
480 | |
481 | static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) |
482 | { |
483 | struct kiocb kiocb; |
484 | struct iov_iter iter; |
485 | ssize_t ret; |
486 | |
487 | init_sync_kiocb(kiocb: &kiocb, filp); |
488 | kiocb.ki_pos = (ppos ? *ppos : 0); |
489 | iov_iter_ubuf(i: &iter, ITER_SOURCE, buf: (void __user *)buf, count: len); |
490 | |
491 | ret = call_write_iter(file: filp, kio: &kiocb, iter: &iter); |
492 | BUG_ON(ret == -EIOCBQUEUED); |
493 | if (ret > 0 && ppos) |
494 | *ppos = kiocb.ki_pos; |
495 | return ret; |
496 | } |
497 | |
498 | /* caller is responsible for file_start_write/file_end_write */ |
499 | ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos) |
500 | { |
501 | struct kiocb kiocb; |
502 | ssize_t ret; |
503 | |
504 | if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) |
505 | return -EBADF; |
506 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
507 | return -EINVAL; |
508 | /* |
509 | * Also fail if ->write_iter and ->write are both wired up as that |
510 | * implies very convoluted semantics. |
511 | */ |
512 | if (unlikely(!file->f_op->write_iter || file->f_op->write)) |
513 | return warn_unsupported(file, op: "write" ); |
514 | |
515 | init_sync_kiocb(kiocb: &kiocb, filp: file); |
516 | kiocb.ki_pos = pos ? *pos : 0; |
517 | ret = file->f_op->write_iter(&kiocb, from); |
518 | if (ret > 0) { |
519 | if (pos) |
520 | *pos = kiocb.ki_pos; |
521 | fsnotify_modify(file); |
522 | add_wchar(current, amt: ret); |
523 | } |
524 | inc_syscw(current); |
525 | return ret; |
526 | } |
527 | |
528 | /* caller is responsible for file_start_write/file_end_write */ |
529 | ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) |
530 | { |
531 | struct kvec iov = { |
532 | .iov_base = (void *)buf, |
533 | .iov_len = min_t(size_t, count, MAX_RW_COUNT), |
534 | }; |
535 | struct iov_iter iter; |
536 | iov_iter_kvec(i: &iter, ITER_SOURCE, kvec: &iov, nr_segs: 1, count: iov.iov_len); |
537 | return __kernel_write_iter(file, from: &iter, pos); |
538 | } |
539 | /* |
540 | * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", |
541 | * but autofs is one of the few internal kernel users that actually |
542 | * wants this _and_ can be built as a module. So we need to export |
543 | * this symbol for autofs, even though it really isn't appropriate |
544 | * for any other kernel modules. |
545 | */ |
546 | EXPORT_SYMBOL_GPL(__kernel_write); |
547 | |
548 | ssize_t kernel_write(struct file *file, const void *buf, size_t count, |
549 | loff_t *pos) |
550 | { |
551 | ssize_t ret; |
552 | |
553 | ret = rw_verify_area(WRITE, file, ppos: pos, count); |
554 | if (ret) |
555 | return ret; |
556 | |
557 | file_start_write(file); |
558 | ret = __kernel_write(file, buf, count, pos); |
559 | file_end_write(file); |
560 | return ret; |
561 | } |
562 | EXPORT_SYMBOL(kernel_write); |
563 | |
564 | ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
565 | { |
566 | ssize_t ret; |
567 | |
568 | if (!(file->f_mode & FMODE_WRITE)) |
569 | return -EBADF; |
570 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
571 | return -EINVAL; |
572 | if (unlikely(!access_ok(buf, count))) |
573 | return -EFAULT; |
574 | |
575 | ret = rw_verify_area(WRITE, file, ppos: pos, count); |
576 | if (ret) |
577 | return ret; |
578 | if (count > MAX_RW_COUNT) |
579 | count = MAX_RW_COUNT; |
580 | file_start_write(file); |
581 | if (file->f_op->write) |
582 | ret = file->f_op->write(file, buf, count, pos); |
583 | else if (file->f_op->write_iter) |
584 | ret = new_sync_write(filp: file, buf, len: count, ppos: pos); |
585 | else |
586 | ret = -EINVAL; |
587 | if (ret > 0) { |
588 | fsnotify_modify(file); |
589 | add_wchar(current, amt: ret); |
590 | } |
591 | inc_syscw(current); |
592 | file_end_write(file); |
593 | return ret; |
594 | } |
595 | |
596 | /* file_ppos returns &file->f_pos or NULL if file is stream */ |
597 | static inline loff_t *file_ppos(struct file *file) |
598 | { |
599 | return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; |
600 | } |
601 | |
602 | ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) |
603 | { |
604 | struct fd f = fdget_pos(fd); |
605 | ssize_t ret = -EBADF; |
606 | |
607 | if (f.file) { |
608 | loff_t pos, *ppos = file_ppos(file: f.file); |
609 | if (ppos) { |
610 | pos = *ppos; |
611 | ppos = &pos; |
612 | } |
613 | ret = vfs_read(file: f.file, buf, count, pos: ppos); |
614 | if (ret >= 0 && ppos) |
615 | f.file->f_pos = pos; |
616 | fdput_pos(f); |
617 | } |
618 | return ret; |
619 | } |
620 | |
621 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) |
622 | { |
623 | return ksys_read(fd, buf, count); |
624 | } |
625 | |
626 | ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) |
627 | { |
628 | struct fd f = fdget_pos(fd); |
629 | ssize_t ret = -EBADF; |
630 | |
631 | if (f.file) { |
632 | loff_t pos, *ppos = file_ppos(file: f.file); |
633 | if (ppos) { |
634 | pos = *ppos; |
635 | ppos = &pos; |
636 | } |
637 | ret = vfs_write(file: f.file, buf, count, pos: ppos); |
638 | if (ret >= 0 && ppos) |
639 | f.file->f_pos = pos; |
640 | fdput_pos(f); |
641 | } |
642 | |
643 | return ret; |
644 | } |
645 | |
646 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, |
647 | size_t, count) |
648 | { |
649 | return ksys_write(fd, buf, count); |
650 | } |
651 | |
652 | ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, |
653 | loff_t pos) |
654 | { |
655 | struct fd f; |
656 | ssize_t ret = -EBADF; |
657 | |
658 | if (pos < 0) |
659 | return -EINVAL; |
660 | |
661 | f = fdget(fd); |
662 | if (f.file) { |
663 | ret = -ESPIPE; |
664 | if (f.file->f_mode & FMODE_PREAD) |
665 | ret = vfs_read(file: f.file, buf, count, pos: &pos); |
666 | fdput(fd: f); |
667 | } |
668 | |
669 | return ret; |
670 | } |
671 | |
672 | SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, |
673 | size_t, count, loff_t, pos) |
674 | { |
675 | return ksys_pread64(fd, buf, count, pos); |
676 | } |
677 | |
678 | #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64) |
679 | COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf, |
680 | size_t, count, compat_arg_u64_dual(pos)) |
681 | { |
682 | return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos)); |
683 | } |
684 | #endif |
685 | |
686 | ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, |
687 | size_t count, loff_t pos) |
688 | { |
689 | struct fd f; |
690 | ssize_t ret = -EBADF; |
691 | |
692 | if (pos < 0) |
693 | return -EINVAL; |
694 | |
695 | f = fdget(fd); |
696 | if (f.file) { |
697 | ret = -ESPIPE; |
698 | if (f.file->f_mode & FMODE_PWRITE) |
699 | ret = vfs_write(file: f.file, buf, count, pos: &pos); |
700 | fdput(fd: f); |
701 | } |
702 | |
703 | return ret; |
704 | } |
705 | |
706 | SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, |
707 | size_t, count, loff_t, pos) |
708 | { |
709 | return ksys_pwrite64(fd, buf, count, pos); |
710 | } |
711 | |
712 | #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64) |
713 | COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf, |
714 | size_t, count, compat_arg_u64_dual(pos)) |
715 | { |
716 | return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos)); |
717 | } |
718 | #endif |
719 | |
720 | static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, |
721 | loff_t *ppos, int type, rwf_t flags) |
722 | { |
723 | struct kiocb kiocb; |
724 | ssize_t ret; |
725 | |
726 | init_sync_kiocb(kiocb: &kiocb, filp); |
727 | ret = kiocb_set_rw_flags(ki: &kiocb, flags); |
728 | if (ret) |
729 | return ret; |
730 | kiocb.ki_pos = (ppos ? *ppos : 0); |
731 | |
732 | if (type == READ) |
733 | ret = call_read_iter(file: filp, kio: &kiocb, iter); |
734 | else |
735 | ret = call_write_iter(file: filp, kio: &kiocb, iter); |
736 | BUG_ON(ret == -EIOCBQUEUED); |
737 | if (ppos) |
738 | *ppos = kiocb.ki_pos; |
739 | return ret; |
740 | } |
741 | |
742 | /* Do it by hand, with file-ops */ |
743 | static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, |
744 | loff_t *ppos, int type, rwf_t flags) |
745 | { |
746 | ssize_t ret = 0; |
747 | |
748 | if (flags & ~RWF_HIPRI) |
749 | return -EOPNOTSUPP; |
750 | |
751 | while (iov_iter_count(i: iter)) { |
752 | ssize_t nr; |
753 | |
754 | if (type == READ) { |
755 | nr = filp->f_op->read(filp, iter_iov_addr(iter), |
756 | iter_iov_len(iter), ppos); |
757 | } else { |
758 | nr = filp->f_op->write(filp, iter_iov_addr(iter), |
759 | iter_iov_len(iter), ppos); |
760 | } |
761 | |
762 | if (nr < 0) { |
763 | if (!ret) |
764 | ret = nr; |
765 | break; |
766 | } |
767 | ret += nr; |
768 | if (nr != iter_iov_len(iter)) |
769 | break; |
770 | iov_iter_advance(i: iter, bytes: nr); |
771 | } |
772 | |
773 | return ret; |
774 | } |
775 | |
776 | static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, |
777 | loff_t *pos, rwf_t flags) |
778 | { |
779 | size_t tot_len; |
780 | ssize_t ret = 0; |
781 | |
782 | if (!(file->f_mode & FMODE_READ)) |
783 | return -EBADF; |
784 | if (!(file->f_mode & FMODE_CAN_READ)) |
785 | return -EINVAL; |
786 | |
787 | tot_len = iov_iter_count(i: iter); |
788 | if (!tot_len) |
789 | goto out; |
790 | ret = rw_verify_area(READ, file, ppos: pos, count: tot_len); |
791 | if (ret < 0) |
792 | return ret; |
793 | |
794 | if (file->f_op->read_iter) |
795 | ret = do_iter_readv_writev(filp: file, iter, ppos: pos, READ, flags); |
796 | else |
797 | ret = do_loop_readv_writev(filp: file, iter, ppos: pos, READ, flags); |
798 | out: |
799 | if (ret >= 0) |
800 | fsnotify_access(file); |
801 | return ret; |
802 | } |
803 | |
804 | ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, |
805 | struct iov_iter *iter) |
806 | { |
807 | size_t tot_len; |
808 | ssize_t ret = 0; |
809 | |
810 | if (!file->f_op->read_iter) |
811 | return -EINVAL; |
812 | if (!(file->f_mode & FMODE_READ)) |
813 | return -EBADF; |
814 | if (!(file->f_mode & FMODE_CAN_READ)) |
815 | return -EINVAL; |
816 | |
817 | tot_len = iov_iter_count(i: iter); |
818 | if (!tot_len) |
819 | goto out; |
820 | ret = rw_verify_area(READ, file, ppos: &iocb->ki_pos, count: tot_len); |
821 | if (ret < 0) |
822 | return ret; |
823 | |
824 | ret = call_read_iter(file, kio: iocb, iter); |
825 | out: |
826 | if (ret >= 0) |
827 | fsnotify_access(file); |
828 | return ret; |
829 | } |
830 | EXPORT_SYMBOL(vfs_iocb_iter_read); |
831 | |
832 | ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, |
833 | rwf_t flags) |
834 | { |
835 | if (!file->f_op->read_iter) |
836 | return -EINVAL; |
837 | return do_iter_read(file, iter, pos: ppos, flags); |
838 | } |
839 | EXPORT_SYMBOL(vfs_iter_read); |
840 | |
841 | static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, |
842 | loff_t *pos, rwf_t flags) |
843 | { |
844 | size_t tot_len; |
845 | ssize_t ret = 0; |
846 | |
847 | if (!(file->f_mode & FMODE_WRITE)) |
848 | return -EBADF; |
849 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
850 | return -EINVAL; |
851 | |
852 | tot_len = iov_iter_count(i: iter); |
853 | if (!tot_len) |
854 | return 0; |
855 | ret = rw_verify_area(WRITE, file, ppos: pos, count: tot_len); |
856 | if (ret < 0) |
857 | return ret; |
858 | |
859 | if (file->f_op->write_iter) |
860 | ret = do_iter_readv_writev(filp: file, iter, ppos: pos, WRITE, flags); |
861 | else |
862 | ret = do_loop_readv_writev(filp: file, iter, ppos: pos, WRITE, flags); |
863 | if (ret > 0) |
864 | fsnotify_modify(file); |
865 | return ret; |
866 | } |
867 | |
868 | ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, |
869 | struct iov_iter *iter) |
870 | { |
871 | size_t tot_len; |
872 | ssize_t ret = 0; |
873 | |
874 | if (!file->f_op->write_iter) |
875 | return -EINVAL; |
876 | if (!(file->f_mode & FMODE_WRITE)) |
877 | return -EBADF; |
878 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
879 | return -EINVAL; |
880 | |
881 | tot_len = iov_iter_count(i: iter); |
882 | if (!tot_len) |
883 | return 0; |
884 | ret = rw_verify_area(WRITE, file, ppos: &iocb->ki_pos, count: tot_len); |
885 | if (ret < 0) |
886 | return ret; |
887 | |
888 | ret = call_write_iter(file, kio: iocb, iter); |
889 | if (ret > 0) |
890 | fsnotify_modify(file); |
891 | |
892 | return ret; |
893 | } |
894 | EXPORT_SYMBOL(vfs_iocb_iter_write); |
895 | |
896 | ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, |
897 | rwf_t flags) |
898 | { |
899 | if (!file->f_op->write_iter) |
900 | return -EINVAL; |
901 | return do_iter_write(file, iter, pos: ppos, flags); |
902 | } |
903 | EXPORT_SYMBOL(vfs_iter_write); |
904 | |
905 | static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, |
906 | unsigned long vlen, loff_t *pos, rwf_t flags) |
907 | { |
908 | struct iovec iovstack[UIO_FASTIOV]; |
909 | struct iovec *iov = iovstack; |
910 | struct iov_iter iter; |
911 | ssize_t ret; |
912 | |
913 | ret = import_iovec(ITER_DEST, uvec: vec, nr_segs: vlen, ARRAY_SIZE(iovstack), iovp: &iov, i: &iter); |
914 | if (ret >= 0) { |
915 | ret = do_iter_read(file, iter: &iter, pos, flags); |
916 | kfree(objp: iov); |
917 | } |
918 | |
919 | return ret; |
920 | } |
921 | |
922 | static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, |
923 | unsigned long vlen, loff_t *pos, rwf_t flags) |
924 | { |
925 | struct iovec iovstack[UIO_FASTIOV]; |
926 | struct iovec *iov = iovstack; |
927 | struct iov_iter iter; |
928 | ssize_t ret; |
929 | |
930 | ret = import_iovec(ITER_SOURCE, uvec: vec, nr_segs: vlen, ARRAY_SIZE(iovstack), iovp: &iov, i: &iter); |
931 | if (ret >= 0) { |
932 | file_start_write(file); |
933 | ret = do_iter_write(file, iter: &iter, pos, flags); |
934 | file_end_write(file); |
935 | kfree(objp: iov); |
936 | } |
937 | return ret; |
938 | } |
939 | |
940 | static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, |
941 | unsigned long vlen, rwf_t flags) |
942 | { |
943 | struct fd f = fdget_pos(fd); |
944 | ssize_t ret = -EBADF; |
945 | |
946 | if (f.file) { |
947 | loff_t pos, *ppos = file_ppos(file: f.file); |
948 | if (ppos) { |
949 | pos = *ppos; |
950 | ppos = &pos; |
951 | } |
952 | ret = vfs_readv(file: f.file, vec, vlen, pos: ppos, flags); |
953 | if (ret >= 0 && ppos) |
954 | f.file->f_pos = pos; |
955 | fdput_pos(f); |
956 | } |
957 | |
958 | if (ret > 0) |
959 | add_rchar(current, amt: ret); |
960 | inc_syscr(current); |
961 | return ret; |
962 | } |
963 | |
964 | static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, |
965 | unsigned long vlen, rwf_t flags) |
966 | { |
967 | struct fd f = fdget_pos(fd); |
968 | ssize_t ret = -EBADF; |
969 | |
970 | if (f.file) { |
971 | loff_t pos, *ppos = file_ppos(file: f.file); |
972 | if (ppos) { |
973 | pos = *ppos; |
974 | ppos = &pos; |
975 | } |
976 | ret = vfs_writev(file: f.file, vec, vlen, pos: ppos, flags); |
977 | if (ret >= 0 && ppos) |
978 | f.file->f_pos = pos; |
979 | fdput_pos(f); |
980 | } |
981 | |
982 | if (ret > 0) |
983 | add_wchar(current, amt: ret); |
984 | inc_syscw(current); |
985 | return ret; |
986 | } |
987 | |
988 | static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) |
989 | { |
990 | #define HALF_LONG_BITS (BITS_PER_LONG / 2) |
991 | return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; |
992 | } |
993 | |
994 | static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, |
995 | unsigned long vlen, loff_t pos, rwf_t flags) |
996 | { |
997 | struct fd f; |
998 | ssize_t ret = -EBADF; |
999 | |
1000 | if (pos < 0) |
1001 | return -EINVAL; |
1002 | |
1003 | f = fdget(fd); |
1004 | if (f.file) { |
1005 | ret = -ESPIPE; |
1006 | if (f.file->f_mode & FMODE_PREAD) |
1007 | ret = vfs_readv(file: f.file, vec, vlen, pos: &pos, flags); |
1008 | fdput(fd: f); |
1009 | } |
1010 | |
1011 | if (ret > 0) |
1012 | add_rchar(current, amt: ret); |
1013 | inc_syscr(current); |
1014 | return ret; |
1015 | } |
1016 | |
1017 | static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, |
1018 | unsigned long vlen, loff_t pos, rwf_t flags) |
1019 | { |
1020 | struct fd f; |
1021 | ssize_t ret = -EBADF; |
1022 | |
1023 | if (pos < 0) |
1024 | return -EINVAL; |
1025 | |
1026 | f = fdget(fd); |
1027 | if (f.file) { |
1028 | ret = -ESPIPE; |
1029 | if (f.file->f_mode & FMODE_PWRITE) |
1030 | ret = vfs_writev(file: f.file, vec, vlen, pos: &pos, flags); |
1031 | fdput(fd: f); |
1032 | } |
1033 | |
1034 | if (ret > 0) |
1035 | add_wchar(current, amt: ret); |
1036 | inc_syscw(current); |
1037 | return ret; |
1038 | } |
1039 | |
1040 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, |
1041 | unsigned long, vlen) |
1042 | { |
1043 | return do_readv(fd, vec, vlen, flags: 0); |
1044 | } |
1045 | |
1046 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, |
1047 | unsigned long, vlen) |
1048 | { |
1049 | return do_writev(fd, vec, vlen, flags: 0); |
1050 | } |
1051 | |
1052 | SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, |
1053 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
1054 | { |
1055 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1056 | |
1057 | return do_preadv(fd, vec, vlen, pos, flags: 0); |
1058 | } |
1059 | |
1060 | SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, |
1061 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, |
1062 | rwf_t, flags) |
1063 | { |
1064 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1065 | |
1066 | if (pos == -1) |
1067 | return do_readv(fd, vec, vlen, flags); |
1068 | |
1069 | return do_preadv(fd, vec, vlen, pos, flags); |
1070 | } |
1071 | |
1072 | SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, |
1073 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
1074 | { |
1075 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1076 | |
1077 | return do_pwritev(fd, vec, vlen, pos, flags: 0); |
1078 | } |
1079 | |
1080 | SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, |
1081 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, |
1082 | rwf_t, flags) |
1083 | { |
1084 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1085 | |
1086 | if (pos == -1) |
1087 | return do_writev(fd, vec, vlen, flags); |
1088 | |
1089 | return do_pwritev(fd, vec, vlen, pos, flags); |
1090 | } |
1091 | |
1092 | /* |
1093 | * Various compat syscalls. Note that they all pretend to take a native |
1094 | * iovec - import_iovec will properly treat those as compat_iovecs based on |
1095 | * in_compat_syscall(). |
1096 | */ |
1097 | #ifdef CONFIG_COMPAT |
1098 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 |
1099 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, |
1100 | const struct iovec __user *, vec, |
1101 | unsigned long, vlen, loff_t, pos) |
1102 | { |
1103 | return do_preadv(fd, vec, vlen, pos, 0); |
1104 | } |
1105 | #endif |
1106 | |
1107 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, |
1108 | const struct iovec __user *, vec, |
1109 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1110 | { |
1111 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1112 | |
1113 | return do_preadv(fd, vec, vlen, pos, 0); |
1114 | } |
1115 | |
1116 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 |
1117 | COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, |
1118 | const struct iovec __user *, vec, |
1119 | unsigned long, vlen, loff_t, pos, rwf_t, flags) |
1120 | { |
1121 | if (pos == -1) |
1122 | return do_readv(fd, vec, vlen, flags); |
1123 | return do_preadv(fd, vec, vlen, pos, flags); |
1124 | } |
1125 | #endif |
1126 | |
1127 | COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, |
1128 | const struct iovec __user *, vec, |
1129 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, |
1130 | rwf_t, flags) |
1131 | { |
1132 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1133 | |
1134 | if (pos == -1) |
1135 | return do_readv(fd, vec, vlen, flags); |
1136 | return do_preadv(fd, vec, vlen, pos, flags); |
1137 | } |
1138 | |
1139 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 |
1140 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, |
1141 | const struct iovec __user *, vec, |
1142 | unsigned long, vlen, loff_t, pos) |
1143 | { |
1144 | return do_pwritev(fd, vec, vlen, pos, 0); |
1145 | } |
1146 | #endif |
1147 | |
1148 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, |
1149 | const struct iovec __user *,vec, |
1150 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1151 | { |
1152 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1153 | |
1154 | return do_pwritev(fd, vec, vlen, pos, 0); |
1155 | } |
1156 | |
1157 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 |
1158 | COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, |
1159 | const struct iovec __user *, vec, |
1160 | unsigned long, vlen, loff_t, pos, rwf_t, flags) |
1161 | { |
1162 | if (pos == -1) |
1163 | return do_writev(fd, vec, vlen, flags); |
1164 | return do_pwritev(fd, vec, vlen, pos, flags); |
1165 | } |
1166 | #endif |
1167 | |
1168 | COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, |
1169 | const struct iovec __user *,vec, |
1170 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) |
1171 | { |
1172 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1173 | |
1174 | if (pos == -1) |
1175 | return do_writev(fd, vec, vlen, flags); |
1176 | return do_pwritev(fd, vec, vlen, pos, flags); |
1177 | } |
1178 | #endif /* CONFIG_COMPAT */ |
1179 | |
1180 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, |
1181 | size_t count, loff_t max) |
1182 | { |
1183 | struct fd in, out; |
1184 | struct inode *in_inode, *out_inode; |
1185 | struct pipe_inode_info *opipe; |
1186 | loff_t pos; |
1187 | loff_t out_pos; |
1188 | ssize_t retval; |
1189 | int fl; |
1190 | |
1191 | /* |
1192 | * Get input file, and verify that it is ok.. |
1193 | */ |
1194 | retval = -EBADF; |
1195 | in = fdget(fd: in_fd); |
1196 | if (!in.file) |
1197 | goto out; |
1198 | if (!(in.file->f_mode & FMODE_READ)) |
1199 | goto fput_in; |
1200 | retval = -ESPIPE; |
1201 | if (!ppos) { |
1202 | pos = in.file->f_pos; |
1203 | } else { |
1204 | pos = *ppos; |
1205 | if (!(in.file->f_mode & FMODE_PREAD)) |
1206 | goto fput_in; |
1207 | } |
1208 | retval = rw_verify_area(READ, file: in.file, ppos: &pos, count); |
1209 | if (retval < 0) |
1210 | goto fput_in; |
1211 | if (count > MAX_RW_COUNT) |
1212 | count = MAX_RW_COUNT; |
1213 | |
1214 | /* |
1215 | * Get output file, and verify that it is ok.. |
1216 | */ |
1217 | retval = -EBADF; |
1218 | out = fdget(fd: out_fd); |
1219 | if (!out.file) |
1220 | goto fput_in; |
1221 | if (!(out.file->f_mode & FMODE_WRITE)) |
1222 | goto fput_out; |
1223 | in_inode = file_inode(f: in.file); |
1224 | out_inode = file_inode(f: out.file); |
1225 | out_pos = out.file->f_pos; |
1226 | |
1227 | if (!max) |
1228 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
1229 | |
1230 | if (unlikely(pos + count > max)) { |
1231 | retval = -EOVERFLOW; |
1232 | if (pos >= max) |
1233 | goto fput_out; |
1234 | count = max - pos; |
1235 | } |
1236 | |
1237 | fl = 0; |
1238 | #if 0 |
1239 | /* |
1240 | * We need to debate whether we can enable this or not. The |
1241 | * man page documents EAGAIN return for the output at least, |
1242 | * and the application is arguably buggy if it doesn't expect |
1243 | * EAGAIN on a non-blocking file descriptor. |
1244 | */ |
1245 | if (in.file->f_flags & O_NONBLOCK) |
1246 | fl = SPLICE_F_NONBLOCK; |
1247 | #endif |
1248 | opipe = get_pipe_info(file: out.file, for_splice: true); |
1249 | if (!opipe) { |
1250 | retval = rw_verify_area(WRITE, file: out.file, ppos: &out_pos, count); |
1251 | if (retval < 0) |
1252 | goto fput_out; |
1253 | file_start_write(file: out.file); |
1254 | retval = do_splice_direct(in: in.file, ppos: &pos, out: out.file, opos: &out_pos, |
1255 | len: count, flags: fl); |
1256 | file_end_write(file: out.file); |
1257 | } else { |
1258 | if (out.file->f_flags & O_NONBLOCK) |
1259 | fl |= SPLICE_F_NONBLOCK; |
1260 | |
1261 | retval = splice_file_to_pipe(in: in.file, opipe, offset: &pos, len: count, flags: fl); |
1262 | } |
1263 | |
1264 | if (retval > 0) { |
1265 | add_rchar(current, amt: retval); |
1266 | add_wchar(current, amt: retval); |
1267 | fsnotify_access(file: in.file); |
1268 | fsnotify_modify(file: out.file); |
1269 | out.file->f_pos = out_pos; |
1270 | if (ppos) |
1271 | *ppos = pos; |
1272 | else |
1273 | in.file->f_pos = pos; |
1274 | } |
1275 | |
1276 | inc_syscr(current); |
1277 | inc_syscw(current); |
1278 | if (pos > max) |
1279 | retval = -EOVERFLOW; |
1280 | |
1281 | fput_out: |
1282 | fdput(fd: out); |
1283 | fput_in: |
1284 | fdput(fd: in); |
1285 | out: |
1286 | return retval; |
1287 | } |
1288 | |
1289 | SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) |
1290 | { |
1291 | loff_t pos; |
1292 | off_t off; |
1293 | ssize_t ret; |
1294 | |
1295 | if (offset) { |
1296 | if (unlikely(get_user(off, offset))) |
1297 | return -EFAULT; |
1298 | pos = off; |
1299 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, MAX_NON_LFS); |
1300 | if (unlikely(put_user(pos, offset))) |
1301 | return -EFAULT; |
1302 | return ret; |
1303 | } |
1304 | |
1305 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1306 | } |
1307 | |
1308 | SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) |
1309 | { |
1310 | loff_t pos; |
1311 | ssize_t ret; |
1312 | |
1313 | if (offset) { |
1314 | if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) |
1315 | return -EFAULT; |
1316 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, max: 0); |
1317 | if (unlikely(put_user(pos, offset))) |
1318 | return -EFAULT; |
1319 | return ret; |
1320 | } |
1321 | |
1322 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1323 | } |
1324 | |
1325 | #ifdef CONFIG_COMPAT |
1326 | COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, |
1327 | compat_off_t __user *, offset, compat_size_t, count) |
1328 | { |
1329 | loff_t pos; |
1330 | off_t off; |
1331 | ssize_t ret; |
1332 | |
1333 | if (offset) { |
1334 | if (unlikely(get_user(off, offset))) |
1335 | return -EFAULT; |
1336 | pos = off; |
1337 | ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); |
1338 | if (unlikely(put_user(pos, offset))) |
1339 | return -EFAULT; |
1340 | return ret; |
1341 | } |
1342 | |
1343 | return do_sendfile(out_fd, in_fd, NULL, count, 0); |
1344 | } |
1345 | |
1346 | COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, |
1347 | compat_loff_t __user *, offset, compat_size_t, count) |
1348 | { |
1349 | loff_t pos; |
1350 | ssize_t ret; |
1351 | |
1352 | if (offset) { |
1353 | if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) |
1354 | return -EFAULT; |
1355 | ret = do_sendfile(out_fd, in_fd, &pos, count, 0); |
1356 | if (unlikely(put_user(pos, offset))) |
1357 | return -EFAULT; |
1358 | return ret; |
1359 | } |
1360 | |
1361 | return do_sendfile(out_fd, in_fd, NULL, count, 0); |
1362 | } |
1363 | #endif |
1364 | |
1365 | /** |
1366 | * generic_copy_file_range - copy data between two files |
1367 | * @file_in: file structure to read from |
1368 | * @pos_in: file offset to read from |
1369 | * @file_out: file structure to write data to |
1370 | * @pos_out: file offset to write data to |
1371 | * @len: amount of data to copy |
1372 | * @flags: copy flags |
1373 | * |
1374 | * This is a generic filesystem helper to copy data from one file to another. |
1375 | * It has no constraints on the source or destination file owners - the files |
1376 | * can belong to different superblocks and different filesystem types. Short |
1377 | * copies are allowed. |
1378 | * |
1379 | * This should be called from the @file_out filesystem, as per the |
1380 | * ->copy_file_range() method. |
1381 | * |
1382 | * Returns the number of bytes copied or a negative error indicating the |
1383 | * failure. |
1384 | */ |
1385 | |
1386 | ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, |
1387 | struct file *file_out, loff_t pos_out, |
1388 | size_t len, unsigned int flags) |
1389 | { |
1390 | lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); |
1391 | |
1392 | return do_splice_direct(in: file_in, ppos: &pos_in, out: file_out, opos: &pos_out, |
1393 | len: len > MAX_RW_COUNT ? MAX_RW_COUNT : len, flags: 0); |
1394 | } |
1395 | EXPORT_SYMBOL(generic_copy_file_range); |
1396 | |
1397 | /* |
1398 | * Performs necessary checks before doing a file copy |
1399 | * |
1400 | * Can adjust amount of bytes to copy via @req_count argument. |
1401 | * Returns appropriate error code that caller should return or |
1402 | * zero in case the copy should be allowed. |
1403 | */ |
1404 | static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, |
1405 | struct file *file_out, loff_t pos_out, |
1406 | size_t *req_count, unsigned int flags) |
1407 | { |
1408 | struct inode *inode_in = file_inode(f: file_in); |
1409 | struct inode *inode_out = file_inode(f: file_out); |
1410 | uint64_t count = *req_count; |
1411 | loff_t size_in; |
1412 | int ret; |
1413 | |
1414 | ret = generic_file_rw_checks(file_in, file_out); |
1415 | if (ret) |
1416 | return ret; |
1417 | |
1418 | /* |
1419 | * We allow some filesystems to handle cross sb copy, but passing |
1420 | * a file of the wrong filesystem type to filesystem driver can result |
1421 | * in an attempt to dereference the wrong type of ->private_data, so |
1422 | * avoid doing that until we really have a good reason. |
1423 | * |
1424 | * nfs and cifs define several different file_system_type structures |
1425 | * and several different sets of file_operations, but they all end up |
1426 | * using the same ->copy_file_range() function pointer. |
1427 | */ |
1428 | if (flags & COPY_FILE_SPLICE) { |
1429 | /* cross sb splice is allowed */ |
1430 | } else if (file_out->f_op->copy_file_range) { |
1431 | if (file_in->f_op->copy_file_range != |
1432 | file_out->f_op->copy_file_range) |
1433 | return -EXDEV; |
1434 | } else if (file_inode(f: file_in)->i_sb != file_inode(f: file_out)->i_sb) { |
1435 | return -EXDEV; |
1436 | } |
1437 | |
1438 | /* Don't touch certain kinds of inodes */ |
1439 | if (IS_IMMUTABLE(inode_out)) |
1440 | return -EPERM; |
1441 | |
1442 | if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) |
1443 | return -ETXTBSY; |
1444 | |
1445 | /* Ensure offsets don't wrap. */ |
1446 | if (pos_in + count < pos_in || pos_out + count < pos_out) |
1447 | return -EOVERFLOW; |
1448 | |
1449 | /* Shorten the copy to EOF */ |
1450 | size_in = i_size_read(inode: inode_in); |
1451 | if (pos_in >= size_in) |
1452 | count = 0; |
1453 | else |
1454 | count = min(count, size_in - (uint64_t)pos_in); |
1455 | |
1456 | ret = generic_write_check_limits(file: file_out, pos: pos_out, count: &count); |
1457 | if (ret) |
1458 | return ret; |
1459 | |
1460 | /* Don't allow overlapped copying within the same file. */ |
1461 | if (inode_in == inode_out && |
1462 | pos_out + count > pos_in && |
1463 | pos_out < pos_in + count) |
1464 | return -EINVAL; |
1465 | |
1466 | *req_count = count; |
1467 | return 0; |
1468 | } |
1469 | |
1470 | /* |
1471 | * copy_file_range() differs from regular file read and write in that it |
1472 | * specifically allows return partial success. When it does so is up to |
1473 | * the copy_file_range method. |
1474 | */ |
1475 | ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, |
1476 | struct file *file_out, loff_t pos_out, |
1477 | size_t len, unsigned int flags) |
1478 | { |
1479 | ssize_t ret; |
1480 | bool splice = flags & COPY_FILE_SPLICE; |
1481 | |
1482 | if (flags & ~COPY_FILE_SPLICE) |
1483 | return -EINVAL; |
1484 | |
1485 | ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, req_count: &len, |
1486 | flags); |
1487 | if (unlikely(ret)) |
1488 | return ret; |
1489 | |
1490 | ret = rw_verify_area(READ, file: file_in, ppos: &pos_in, count: len); |
1491 | if (unlikely(ret)) |
1492 | return ret; |
1493 | |
1494 | ret = rw_verify_area(WRITE, file: file_out, ppos: &pos_out, count: len); |
1495 | if (unlikely(ret)) |
1496 | return ret; |
1497 | |
1498 | if (len == 0) |
1499 | return 0; |
1500 | |
1501 | file_start_write(file: file_out); |
1502 | |
1503 | /* |
1504 | * Cloning is supported by more file systems, so we implement copy on |
1505 | * same sb using clone, but for filesystems where both clone and copy |
1506 | * are supported (e.g. nfs,cifs), we only call the copy method. |
1507 | */ |
1508 | if (!splice && file_out->f_op->copy_file_range) { |
1509 | ret = file_out->f_op->copy_file_range(file_in, pos_in, |
1510 | file_out, pos_out, |
1511 | len, flags); |
1512 | goto done; |
1513 | } |
1514 | |
1515 | if (!splice && file_in->f_op->remap_file_range && |
1516 | file_inode(f: file_in)->i_sb == file_inode(f: file_out)->i_sb) { |
1517 | ret = file_in->f_op->remap_file_range(file_in, pos_in, |
1518 | file_out, pos_out, |
1519 | min_t(loff_t, MAX_RW_COUNT, len), |
1520 | REMAP_FILE_CAN_SHORTEN); |
1521 | if (ret > 0) |
1522 | goto done; |
1523 | } |
1524 | |
1525 | /* |
1526 | * We can get here for same sb copy of filesystems that do not implement |
1527 | * ->copy_file_range() in case filesystem does not support clone or in |
1528 | * case filesystem supports clone but rejected the clone request (e.g. |
1529 | * because it was not block aligned). |
1530 | * |
1531 | * In both cases, fall back to kernel copy so we are able to maintain a |
1532 | * consistent story about which filesystems support copy_file_range() |
1533 | * and which filesystems do not, that will allow userspace tools to |
1534 | * make consistent desicions w.r.t using copy_file_range(). |
1535 | * |
1536 | * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. |
1537 | */ |
1538 | ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, |
1539 | flags); |
1540 | |
1541 | done: |
1542 | if (ret > 0) { |
1543 | fsnotify_access(file: file_in); |
1544 | add_rchar(current, amt: ret); |
1545 | fsnotify_modify(file: file_out); |
1546 | add_wchar(current, amt: ret); |
1547 | } |
1548 | |
1549 | inc_syscr(current); |
1550 | inc_syscw(current); |
1551 | |
1552 | file_end_write(file: file_out); |
1553 | |
1554 | return ret; |
1555 | } |
1556 | EXPORT_SYMBOL(vfs_copy_file_range); |
1557 | |
1558 | SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, |
1559 | int, fd_out, loff_t __user *, off_out, |
1560 | size_t, len, unsigned int, flags) |
1561 | { |
1562 | loff_t pos_in; |
1563 | loff_t pos_out; |
1564 | struct fd f_in; |
1565 | struct fd f_out; |
1566 | ssize_t ret = -EBADF; |
1567 | |
1568 | f_in = fdget(fd: fd_in); |
1569 | if (!f_in.file) |
1570 | goto out2; |
1571 | |
1572 | f_out = fdget(fd: fd_out); |
1573 | if (!f_out.file) |
1574 | goto out1; |
1575 | |
1576 | ret = -EFAULT; |
1577 | if (off_in) { |
1578 | if (copy_from_user(to: &pos_in, from: off_in, n: sizeof(loff_t))) |
1579 | goto out; |
1580 | } else { |
1581 | pos_in = f_in.file->f_pos; |
1582 | } |
1583 | |
1584 | if (off_out) { |
1585 | if (copy_from_user(to: &pos_out, from: off_out, n: sizeof(loff_t))) |
1586 | goto out; |
1587 | } else { |
1588 | pos_out = f_out.file->f_pos; |
1589 | } |
1590 | |
1591 | ret = -EINVAL; |
1592 | if (flags != 0) |
1593 | goto out; |
1594 | |
1595 | ret = vfs_copy_file_range(file_in: f_in.file, pos_in, file_out: f_out.file, pos_out, len, |
1596 | flags); |
1597 | if (ret > 0) { |
1598 | pos_in += ret; |
1599 | pos_out += ret; |
1600 | |
1601 | if (off_in) { |
1602 | if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) |
1603 | ret = -EFAULT; |
1604 | } else { |
1605 | f_in.file->f_pos = pos_in; |
1606 | } |
1607 | |
1608 | if (off_out) { |
1609 | if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) |
1610 | ret = -EFAULT; |
1611 | } else { |
1612 | f_out.file->f_pos = pos_out; |
1613 | } |
1614 | } |
1615 | |
1616 | out: |
1617 | fdput(fd: f_out); |
1618 | out1: |
1619 | fdput(fd: f_in); |
1620 | out2: |
1621 | return ret; |
1622 | } |
1623 | |
1624 | /* |
1625 | * Don't operate on ranges the page cache doesn't support, and don't exceed the |
1626 | * LFS limits. If pos is under the limit it becomes a short access. If it |
1627 | * exceeds the limit we return -EFBIG. |
1628 | */ |
1629 | int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) |
1630 | { |
1631 | struct inode *inode = file->f_mapping->host; |
1632 | loff_t max_size = inode->i_sb->s_maxbytes; |
1633 | loff_t limit = rlimit(RLIMIT_FSIZE); |
1634 | |
1635 | if (limit != RLIM_INFINITY) { |
1636 | if (pos >= limit) { |
1637 | send_sig(SIGXFSZ, current, 0); |
1638 | return -EFBIG; |
1639 | } |
1640 | *count = min(*count, limit - pos); |
1641 | } |
1642 | |
1643 | if (!(file->f_flags & O_LARGEFILE)) |
1644 | max_size = MAX_NON_LFS; |
1645 | |
1646 | if (unlikely(pos >= max_size)) |
1647 | return -EFBIG; |
1648 | |
1649 | *count = min(*count, max_size - pos); |
1650 | |
1651 | return 0; |
1652 | } |
1653 | |
1654 | /* Like generic_write_checks(), but takes size of write instead of iter. */ |
1655 | int generic_write_checks_count(struct kiocb *iocb, loff_t *count) |
1656 | { |
1657 | struct file *file = iocb->ki_filp; |
1658 | struct inode *inode = file->f_mapping->host; |
1659 | |
1660 | if (IS_SWAPFILE(inode)) |
1661 | return -ETXTBSY; |
1662 | |
1663 | if (!*count) |
1664 | return 0; |
1665 | |
1666 | if (iocb->ki_flags & IOCB_APPEND) |
1667 | iocb->ki_pos = i_size_read(inode); |
1668 | |
1669 | if ((iocb->ki_flags & IOCB_NOWAIT) && |
1670 | !((iocb->ki_flags & IOCB_DIRECT) || |
1671 | (file->f_mode & FMODE_BUF_WASYNC))) |
1672 | return -EINVAL; |
1673 | |
1674 | return generic_write_check_limits(file: iocb->ki_filp, pos: iocb->ki_pos, count); |
1675 | } |
1676 | EXPORT_SYMBOL(generic_write_checks_count); |
1677 | |
1678 | /* |
1679 | * Performs necessary checks before doing a write |
1680 | * |
1681 | * Can adjust writing position or amount of bytes to write. |
1682 | * Returns appropriate error code that caller should return or |
1683 | * zero in case that write should be allowed. |
1684 | */ |
1685 | ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) |
1686 | { |
1687 | loff_t count = iov_iter_count(i: from); |
1688 | int ret; |
1689 | |
1690 | ret = generic_write_checks_count(iocb, count: &count); |
1691 | if (ret) |
1692 | return ret; |
1693 | |
1694 | iov_iter_truncate(from, count); |
1695 | return iov_iter_count(i: from); |
1696 | } |
1697 | EXPORT_SYMBOL(generic_write_checks); |
1698 | |
1699 | /* |
1700 | * Performs common checks before doing a file copy/clone |
1701 | * from @file_in to @file_out. |
1702 | */ |
1703 | int generic_file_rw_checks(struct file *file_in, struct file *file_out) |
1704 | { |
1705 | struct inode *inode_in = file_inode(f: file_in); |
1706 | struct inode *inode_out = file_inode(f: file_out); |
1707 | |
1708 | /* Don't copy dirs, pipes, sockets... */ |
1709 | if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) |
1710 | return -EISDIR; |
1711 | if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) |
1712 | return -EINVAL; |
1713 | |
1714 | if (!(file_in->f_mode & FMODE_READ) || |
1715 | !(file_out->f_mode & FMODE_WRITE) || |
1716 | (file_out->f_flags & O_APPEND)) |
1717 | return -EBADF; |
1718 | |
1719 | return 0; |
1720 | } |
1721 | |