1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/fs/read_write.c |
4 | * |
5 | * Copyright (C) 1991, 1992 Linus Torvalds |
6 | */ |
7 | |
8 | #include <linux/slab.h> |
9 | #include <linux/stat.h> |
10 | #include <linux/sched/xacct.h> |
11 | #include <linux/fcntl.h> |
12 | #include <linux/file.h> |
13 | #include <linux/uio.h> |
14 | #include <linux/fsnotify.h> |
15 | #include <linux/security.h> |
16 | #include <linux/export.h> |
17 | #include <linux/syscalls.h> |
18 | #include <linux/pagemap.h> |
19 | #include <linux/splice.h> |
20 | #include <linux/compat.h> |
21 | #include <linux/mount.h> |
22 | #include <linux/fs.h> |
23 | #include "internal.h" |
24 | |
25 | #include <linux/uaccess.h> |
26 | #include <asm/unistd.h> |
27 | |
28 | const struct file_operations generic_ro_fops = { |
29 | .llseek = generic_file_llseek, |
30 | .read_iter = generic_file_read_iter, |
31 | .mmap = generic_file_readonly_mmap, |
32 | .splice_read = filemap_splice_read, |
33 | }; |
34 | |
35 | EXPORT_SYMBOL(generic_ro_fops); |
36 | |
37 | static inline bool unsigned_offsets(struct file *file) |
38 | { |
39 | return file->f_mode & FMODE_UNSIGNED_OFFSET; |
40 | } |
41 | |
42 | /** |
43 | * vfs_setpos - update the file offset for lseek |
44 | * @file: file structure in question |
45 | * @offset: file offset to seek to |
46 | * @maxsize: maximum file size |
47 | * |
48 | * This is a low-level filesystem helper for updating the file offset to |
49 | * the value specified by @offset if the given offset is valid and it is |
50 | * not equal to the current file offset. |
51 | * |
52 | * Return the specified offset on success and -EINVAL on invalid offset. |
53 | */ |
54 | loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) |
55 | { |
56 | if (offset < 0 && !unsigned_offsets(file)) |
57 | return -EINVAL; |
58 | if (offset > maxsize) |
59 | return -EINVAL; |
60 | |
61 | if (offset != file->f_pos) { |
62 | file->f_pos = offset; |
63 | file->f_version = 0; |
64 | } |
65 | return offset; |
66 | } |
67 | EXPORT_SYMBOL(vfs_setpos); |
68 | |
69 | /** |
70 | * generic_file_llseek_size - generic llseek implementation for regular files |
71 | * @file: file structure to seek on |
72 | * @offset: file offset to seek to |
73 | * @whence: type of seek |
74 | * @maxsize: max size of this file in file system |
75 | * @eof: offset used for SEEK_END position |
76 | * |
77 | * This is a variant of generic_file_llseek that allows passing in a custom |
78 | * maximum file size and a custom EOF position, for e.g. hashed directories |
79 | * |
80 | * Synchronization: |
81 | * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) |
82 | * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. |
83 | * read/writes behave like SEEK_SET against seeks. |
84 | */ |
85 | loff_t |
86 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, |
87 | loff_t maxsize, loff_t eof) |
88 | { |
89 | switch (whence) { |
90 | case SEEK_END: |
91 | offset += eof; |
92 | break; |
93 | case SEEK_CUR: |
94 | /* |
95 | * Here we special-case the lseek(fd, 0, SEEK_CUR) |
96 | * position-querying operation. Avoid rewriting the "same" |
97 | * f_pos value back to the file because a concurrent read(), |
98 | * write() or lseek() might have altered it |
99 | */ |
100 | if (offset == 0) |
101 | return file->f_pos; |
102 | /* |
103 | * f_lock protects against read/modify/write race with other |
104 | * SEEK_CURs. Note that parallel writes and reads behave |
105 | * like SEEK_SET. |
106 | */ |
107 | spin_lock(lock: &file->f_lock); |
108 | offset = vfs_setpos(file, file->f_pos + offset, maxsize); |
109 | spin_unlock(lock: &file->f_lock); |
110 | return offset; |
111 | case SEEK_DATA: |
112 | /* |
113 | * In the generic case the entire file is data, so as long as |
114 | * offset isn't at the end of the file then the offset is data. |
115 | */ |
116 | if ((unsigned long long)offset >= eof) |
117 | return -ENXIO; |
118 | break; |
119 | case SEEK_HOLE: |
120 | /* |
121 | * There is a virtual hole at the end of the file, so as long as |
122 | * offset isn't i_size or larger, return i_size. |
123 | */ |
124 | if ((unsigned long long)offset >= eof) |
125 | return -ENXIO; |
126 | offset = eof; |
127 | break; |
128 | } |
129 | |
130 | return vfs_setpos(file, offset, maxsize); |
131 | } |
132 | EXPORT_SYMBOL(generic_file_llseek_size); |
133 | |
134 | /** |
135 | * generic_file_llseek - generic llseek implementation for regular files |
136 | * @file: file structure to seek on |
137 | * @offset: file offset to seek to |
138 | * @whence: type of seek |
139 | * |
140 | * This is a generic implemenation of ->llseek useable for all normal local |
141 | * filesystems. It just updates the file offset to the value specified by |
142 | * @offset and @whence. |
143 | */ |
144 | loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) |
145 | { |
146 | struct inode *inode = file->f_mapping->host; |
147 | |
148 | return generic_file_llseek_size(file, offset, whence, |
149 | inode->i_sb->s_maxbytes, |
150 | i_size_read(inode)); |
151 | } |
152 | EXPORT_SYMBOL(generic_file_llseek); |
153 | |
154 | /** |
155 | * fixed_size_llseek - llseek implementation for fixed-sized devices |
156 | * @file: file structure to seek on |
157 | * @offset: file offset to seek to |
158 | * @whence: type of seek |
159 | * @size: size of the file |
160 | * |
161 | */ |
162 | loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) |
163 | { |
164 | switch (whence) { |
165 | case SEEK_SET: case SEEK_CUR: case SEEK_END: |
166 | return generic_file_llseek_size(file, offset, whence, |
167 | size, size); |
168 | default: |
169 | return -EINVAL; |
170 | } |
171 | } |
172 | EXPORT_SYMBOL(fixed_size_llseek); |
173 | |
174 | /** |
175 | * no_seek_end_llseek - llseek implementation for fixed-sized devices |
176 | * @file: file structure to seek on |
177 | * @offset: file offset to seek to |
178 | * @whence: type of seek |
179 | * |
180 | */ |
181 | loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) |
182 | { |
183 | switch (whence) { |
184 | case SEEK_SET: case SEEK_CUR: |
185 | return generic_file_llseek_size(file, offset, whence, |
186 | OFFSET_MAX, 0); |
187 | default: |
188 | return -EINVAL; |
189 | } |
190 | } |
191 | EXPORT_SYMBOL(no_seek_end_llseek); |
192 | |
193 | /** |
194 | * no_seek_end_llseek_size - llseek implementation for fixed-sized devices |
195 | * @file: file structure to seek on |
196 | * @offset: file offset to seek to |
197 | * @whence: type of seek |
198 | * @size: maximal offset allowed |
199 | * |
200 | */ |
201 | loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) |
202 | { |
203 | switch (whence) { |
204 | case SEEK_SET: case SEEK_CUR: |
205 | return generic_file_llseek_size(file, offset, whence, |
206 | size, 0); |
207 | default: |
208 | return -EINVAL; |
209 | } |
210 | } |
211 | EXPORT_SYMBOL(no_seek_end_llseek_size); |
212 | |
213 | /** |
214 | * noop_llseek - No Operation Performed llseek implementation |
215 | * @file: file structure to seek on |
216 | * @offset: file offset to seek to |
217 | * @whence: type of seek |
218 | * |
219 | * This is an implementation of ->llseek useable for the rare special case when |
220 | * userspace expects the seek to succeed but the (device) file is actually not |
221 | * able to perform the seek. In this case you use noop_llseek() instead of |
222 | * falling back to the default implementation of ->llseek. |
223 | */ |
224 | loff_t noop_llseek(struct file *file, loff_t offset, int whence) |
225 | { |
226 | return file->f_pos; |
227 | } |
228 | EXPORT_SYMBOL(noop_llseek); |
229 | |
230 | loff_t default_llseek(struct file *file, loff_t offset, int whence) |
231 | { |
232 | struct inode *inode = file_inode(f: file); |
233 | loff_t retval; |
234 | |
235 | inode_lock(inode); |
236 | switch (whence) { |
237 | case SEEK_END: |
238 | offset += i_size_read(inode); |
239 | break; |
240 | case SEEK_CUR: |
241 | if (offset == 0) { |
242 | retval = file->f_pos; |
243 | goto out; |
244 | } |
245 | offset += file->f_pos; |
246 | break; |
247 | case SEEK_DATA: |
248 | /* |
249 | * In the generic case the entire file is data, so as |
250 | * long as offset isn't at the end of the file then the |
251 | * offset is data. |
252 | */ |
253 | if (offset >= inode->i_size) { |
254 | retval = -ENXIO; |
255 | goto out; |
256 | } |
257 | break; |
258 | case SEEK_HOLE: |
259 | /* |
260 | * There is a virtual hole at the end of the file, so |
261 | * as long as offset isn't i_size or larger, return |
262 | * i_size. |
263 | */ |
264 | if (offset >= inode->i_size) { |
265 | retval = -ENXIO; |
266 | goto out; |
267 | } |
268 | offset = inode->i_size; |
269 | break; |
270 | } |
271 | retval = -EINVAL; |
272 | if (offset >= 0 || unsigned_offsets(file)) { |
273 | if (offset != file->f_pos) { |
274 | file->f_pos = offset; |
275 | file->f_version = 0; |
276 | } |
277 | retval = offset; |
278 | } |
279 | out: |
280 | inode_unlock(inode); |
281 | return retval; |
282 | } |
283 | EXPORT_SYMBOL(default_llseek); |
284 | |
285 | loff_t vfs_llseek(struct file *file, loff_t offset, int whence) |
286 | { |
287 | if (!(file->f_mode & FMODE_LSEEK)) |
288 | return -ESPIPE; |
289 | return file->f_op->llseek(file, offset, whence); |
290 | } |
291 | EXPORT_SYMBOL(vfs_llseek); |
292 | |
293 | static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) |
294 | { |
295 | off_t retval; |
296 | struct fd f = fdget_pos(fd); |
297 | if (!f.file) |
298 | return -EBADF; |
299 | |
300 | retval = -EINVAL; |
301 | if (whence <= SEEK_MAX) { |
302 | loff_t res = vfs_llseek(f.file, offset, whence); |
303 | retval = res; |
304 | if (res != (loff_t)retval) |
305 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ |
306 | } |
307 | fdput_pos(f); |
308 | return retval; |
309 | } |
310 | |
311 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) |
312 | { |
313 | return ksys_lseek(fd, offset, whence); |
314 | } |
315 | |
316 | #ifdef CONFIG_COMPAT |
317 | COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) |
318 | { |
319 | return ksys_lseek(fd, offset, whence); |
320 | } |
321 | #endif |
322 | |
323 | #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ |
324 | defined(__ARCH_WANT_SYS_LLSEEK) |
325 | SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, |
326 | unsigned long, offset_low, loff_t __user *, result, |
327 | unsigned int, whence) |
328 | { |
329 | int retval; |
330 | struct fd f = fdget_pos(fd); |
331 | loff_t offset; |
332 | |
333 | if (!f.file) |
334 | return -EBADF; |
335 | |
336 | retval = -EINVAL; |
337 | if (whence > SEEK_MAX) |
338 | goto out_putf; |
339 | |
340 | offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, |
341 | whence); |
342 | |
343 | retval = (int)offset; |
344 | if (offset >= 0) { |
345 | retval = -EFAULT; |
346 | if (!copy_to_user(to: result, from: &offset, n: sizeof(offset))) |
347 | retval = 0; |
348 | } |
349 | out_putf: |
350 | fdput_pos(f); |
351 | return retval; |
352 | } |
353 | #endif |
354 | |
355 | int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) |
356 | { |
357 | int mask = read_write == READ ? MAY_READ : MAY_WRITE; |
358 | int ret; |
359 | |
360 | if (unlikely((ssize_t) count < 0)) |
361 | return -EINVAL; |
362 | |
363 | if (ppos) { |
364 | loff_t pos = *ppos; |
365 | |
366 | if (unlikely(pos < 0)) { |
367 | if (!unsigned_offsets(file)) |
368 | return -EINVAL; |
369 | if (count >= -pos) /* both values are in 0..LLONG_MAX */ |
370 | return -EOVERFLOW; |
371 | } else if (unlikely((loff_t) (pos + count) < 0)) { |
372 | if (!unsigned_offsets(file)) |
373 | return -EINVAL; |
374 | } |
375 | } |
376 | |
377 | ret = security_file_permission(file, mask); |
378 | if (ret) |
379 | return ret; |
380 | |
381 | return fsnotify_file_area_perm(file, perm_mask: mask, ppos, count); |
382 | } |
383 | EXPORT_SYMBOL(rw_verify_area); |
384 | |
385 | static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) |
386 | { |
387 | struct kiocb kiocb; |
388 | struct iov_iter iter; |
389 | ssize_t ret; |
390 | |
391 | init_sync_kiocb(kiocb: &kiocb, filp); |
392 | kiocb.ki_pos = (ppos ? *ppos : 0); |
393 | iov_iter_ubuf(i: &iter, ITER_DEST, buf, count: len); |
394 | |
395 | ret = call_read_iter(file: filp, kio: &kiocb, iter: &iter); |
396 | BUG_ON(ret == -EIOCBQUEUED); |
397 | if (ppos) |
398 | *ppos = kiocb.ki_pos; |
399 | return ret; |
400 | } |
401 | |
402 | static int warn_unsupported(struct file *file, const char *op) |
403 | { |
404 | pr_warn_ratelimited( |
405 | "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n" , |
406 | op, file, current->pid, current->comm); |
407 | return -EINVAL; |
408 | } |
409 | |
410 | ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) |
411 | { |
412 | struct kvec iov = { |
413 | .iov_base = buf, |
414 | .iov_len = min_t(size_t, count, MAX_RW_COUNT), |
415 | }; |
416 | struct kiocb kiocb; |
417 | struct iov_iter iter; |
418 | ssize_t ret; |
419 | |
420 | if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) |
421 | return -EINVAL; |
422 | if (!(file->f_mode & FMODE_CAN_READ)) |
423 | return -EINVAL; |
424 | /* |
425 | * Also fail if ->read_iter and ->read are both wired up as that |
426 | * implies very convoluted semantics. |
427 | */ |
428 | if (unlikely(!file->f_op->read_iter || file->f_op->read)) |
429 | return warn_unsupported(file, op: "read" ); |
430 | |
431 | init_sync_kiocb(kiocb: &kiocb, filp: file); |
432 | kiocb.ki_pos = pos ? *pos : 0; |
433 | iov_iter_kvec(i: &iter, ITER_DEST, kvec: &iov, nr_segs: 1, count: iov.iov_len); |
434 | ret = file->f_op->read_iter(&kiocb, &iter); |
435 | if (ret > 0) { |
436 | if (pos) |
437 | *pos = kiocb.ki_pos; |
438 | fsnotify_access(file); |
439 | add_rchar(current, amt: ret); |
440 | } |
441 | inc_syscr(current); |
442 | return ret; |
443 | } |
444 | |
445 | ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) |
446 | { |
447 | ssize_t ret; |
448 | |
449 | ret = rw_verify_area(READ, file, pos, count); |
450 | if (ret) |
451 | return ret; |
452 | return __kernel_read(file, buf, count, pos); |
453 | } |
454 | EXPORT_SYMBOL(kernel_read); |
455 | |
456 | ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) |
457 | { |
458 | ssize_t ret; |
459 | |
460 | if (!(file->f_mode & FMODE_READ)) |
461 | return -EBADF; |
462 | if (!(file->f_mode & FMODE_CAN_READ)) |
463 | return -EINVAL; |
464 | if (unlikely(!access_ok(buf, count))) |
465 | return -EFAULT; |
466 | |
467 | ret = rw_verify_area(READ, file, pos, count); |
468 | if (ret) |
469 | return ret; |
470 | if (count > MAX_RW_COUNT) |
471 | count = MAX_RW_COUNT; |
472 | |
473 | if (file->f_op->read) |
474 | ret = file->f_op->read(file, buf, count, pos); |
475 | else if (file->f_op->read_iter) |
476 | ret = new_sync_read(filp: file, buf, len: count, ppos: pos); |
477 | else |
478 | ret = -EINVAL; |
479 | if (ret > 0) { |
480 | fsnotify_access(file); |
481 | add_rchar(current, amt: ret); |
482 | } |
483 | inc_syscr(current); |
484 | return ret; |
485 | } |
486 | |
487 | static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) |
488 | { |
489 | struct kiocb kiocb; |
490 | struct iov_iter iter; |
491 | ssize_t ret; |
492 | |
493 | init_sync_kiocb(kiocb: &kiocb, filp); |
494 | kiocb.ki_pos = (ppos ? *ppos : 0); |
495 | iov_iter_ubuf(i: &iter, ITER_SOURCE, buf: (void __user *)buf, count: len); |
496 | |
497 | ret = call_write_iter(file: filp, kio: &kiocb, iter: &iter); |
498 | BUG_ON(ret == -EIOCBQUEUED); |
499 | if (ret > 0 && ppos) |
500 | *ppos = kiocb.ki_pos; |
501 | return ret; |
502 | } |
503 | |
504 | /* caller is responsible for file_start_write/file_end_write */ |
505 | ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos) |
506 | { |
507 | struct kiocb kiocb; |
508 | ssize_t ret; |
509 | |
510 | if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) |
511 | return -EBADF; |
512 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
513 | return -EINVAL; |
514 | /* |
515 | * Also fail if ->write_iter and ->write are both wired up as that |
516 | * implies very convoluted semantics. |
517 | */ |
518 | if (unlikely(!file->f_op->write_iter || file->f_op->write)) |
519 | return warn_unsupported(file, op: "write" ); |
520 | |
521 | init_sync_kiocb(kiocb: &kiocb, filp: file); |
522 | kiocb.ki_pos = pos ? *pos : 0; |
523 | ret = file->f_op->write_iter(&kiocb, from); |
524 | if (ret > 0) { |
525 | if (pos) |
526 | *pos = kiocb.ki_pos; |
527 | fsnotify_modify(file); |
528 | add_wchar(current, amt: ret); |
529 | } |
530 | inc_syscw(current); |
531 | return ret; |
532 | } |
533 | |
534 | /* caller is responsible for file_start_write/file_end_write */ |
535 | ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) |
536 | { |
537 | struct kvec iov = { |
538 | .iov_base = (void *)buf, |
539 | .iov_len = min_t(size_t, count, MAX_RW_COUNT), |
540 | }; |
541 | struct iov_iter iter; |
542 | iov_iter_kvec(i: &iter, ITER_SOURCE, kvec: &iov, nr_segs: 1, count: iov.iov_len); |
543 | return __kernel_write_iter(file, from: &iter, pos); |
544 | } |
545 | /* |
546 | * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", |
547 | * but autofs is one of the few internal kernel users that actually |
548 | * wants this _and_ can be built as a module. So we need to export |
549 | * this symbol for autofs, even though it really isn't appropriate |
550 | * for any other kernel modules. |
551 | */ |
552 | EXPORT_SYMBOL_GPL(__kernel_write); |
553 | |
554 | ssize_t kernel_write(struct file *file, const void *buf, size_t count, |
555 | loff_t *pos) |
556 | { |
557 | ssize_t ret; |
558 | |
559 | ret = rw_verify_area(WRITE, file, pos, count); |
560 | if (ret) |
561 | return ret; |
562 | |
563 | file_start_write(file); |
564 | ret = __kernel_write(file, buf, count, pos); |
565 | file_end_write(file); |
566 | return ret; |
567 | } |
568 | EXPORT_SYMBOL(kernel_write); |
569 | |
570 | ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
571 | { |
572 | ssize_t ret; |
573 | |
574 | if (!(file->f_mode & FMODE_WRITE)) |
575 | return -EBADF; |
576 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
577 | return -EINVAL; |
578 | if (unlikely(!access_ok(buf, count))) |
579 | return -EFAULT; |
580 | |
581 | ret = rw_verify_area(WRITE, file, pos, count); |
582 | if (ret) |
583 | return ret; |
584 | if (count > MAX_RW_COUNT) |
585 | count = MAX_RW_COUNT; |
586 | file_start_write(file); |
587 | if (file->f_op->write) |
588 | ret = file->f_op->write(file, buf, count, pos); |
589 | else if (file->f_op->write_iter) |
590 | ret = new_sync_write(filp: file, buf, len: count, ppos: pos); |
591 | else |
592 | ret = -EINVAL; |
593 | if (ret > 0) { |
594 | fsnotify_modify(file); |
595 | add_wchar(current, amt: ret); |
596 | } |
597 | inc_syscw(current); |
598 | file_end_write(file); |
599 | return ret; |
600 | } |
601 | |
602 | /* file_ppos returns &file->f_pos or NULL if file is stream */ |
603 | static inline loff_t *file_ppos(struct file *file) |
604 | { |
605 | return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; |
606 | } |
607 | |
608 | ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) |
609 | { |
610 | struct fd f = fdget_pos(fd); |
611 | ssize_t ret = -EBADF; |
612 | |
613 | if (f.file) { |
614 | loff_t pos, *ppos = file_ppos(file: f.file); |
615 | if (ppos) { |
616 | pos = *ppos; |
617 | ppos = &pos; |
618 | } |
619 | ret = vfs_read(file: f.file, buf, count, pos: ppos); |
620 | if (ret >= 0 && ppos) |
621 | f.file->f_pos = pos; |
622 | fdput_pos(f); |
623 | } |
624 | return ret; |
625 | } |
626 | |
627 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) |
628 | { |
629 | return ksys_read(fd, buf, count); |
630 | } |
631 | |
632 | ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) |
633 | { |
634 | struct fd f = fdget_pos(fd); |
635 | ssize_t ret = -EBADF; |
636 | |
637 | if (f.file) { |
638 | loff_t pos, *ppos = file_ppos(file: f.file); |
639 | if (ppos) { |
640 | pos = *ppos; |
641 | ppos = &pos; |
642 | } |
643 | ret = vfs_write(file: f.file, buf, count, pos: ppos); |
644 | if (ret >= 0 && ppos) |
645 | f.file->f_pos = pos; |
646 | fdput_pos(f); |
647 | } |
648 | |
649 | return ret; |
650 | } |
651 | |
652 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, |
653 | size_t, count) |
654 | { |
655 | return ksys_write(fd, buf, count); |
656 | } |
657 | |
658 | ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, |
659 | loff_t pos) |
660 | { |
661 | struct fd f; |
662 | ssize_t ret = -EBADF; |
663 | |
664 | if (pos < 0) |
665 | return -EINVAL; |
666 | |
667 | f = fdget(fd); |
668 | if (f.file) { |
669 | ret = -ESPIPE; |
670 | if (f.file->f_mode & FMODE_PREAD) |
671 | ret = vfs_read(file: f.file, buf, count, pos: &pos); |
672 | fdput(fd: f); |
673 | } |
674 | |
675 | return ret; |
676 | } |
677 | |
678 | SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, |
679 | size_t, count, loff_t, pos) |
680 | { |
681 | return ksys_pread64(fd, buf, count, pos); |
682 | } |
683 | |
684 | #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64) |
685 | COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf, |
686 | size_t, count, compat_arg_u64_dual(pos)) |
687 | { |
688 | return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos)); |
689 | } |
690 | #endif |
691 | |
692 | ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, |
693 | size_t count, loff_t pos) |
694 | { |
695 | struct fd f; |
696 | ssize_t ret = -EBADF; |
697 | |
698 | if (pos < 0) |
699 | return -EINVAL; |
700 | |
701 | f = fdget(fd); |
702 | if (f.file) { |
703 | ret = -ESPIPE; |
704 | if (f.file->f_mode & FMODE_PWRITE) |
705 | ret = vfs_write(file: f.file, buf, count, pos: &pos); |
706 | fdput(fd: f); |
707 | } |
708 | |
709 | return ret; |
710 | } |
711 | |
712 | SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, |
713 | size_t, count, loff_t, pos) |
714 | { |
715 | return ksys_pwrite64(fd, buf, count, pos); |
716 | } |
717 | |
718 | #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64) |
719 | COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf, |
720 | size_t, count, compat_arg_u64_dual(pos)) |
721 | { |
722 | return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos)); |
723 | } |
724 | #endif |
725 | |
726 | static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, |
727 | loff_t *ppos, int type, rwf_t flags) |
728 | { |
729 | struct kiocb kiocb; |
730 | ssize_t ret; |
731 | |
732 | init_sync_kiocb(kiocb: &kiocb, filp); |
733 | ret = kiocb_set_rw_flags(ki: &kiocb, flags); |
734 | if (ret) |
735 | return ret; |
736 | kiocb.ki_pos = (ppos ? *ppos : 0); |
737 | |
738 | if (type == READ) |
739 | ret = call_read_iter(file: filp, kio: &kiocb, iter); |
740 | else |
741 | ret = call_write_iter(file: filp, kio: &kiocb, iter); |
742 | BUG_ON(ret == -EIOCBQUEUED); |
743 | if (ppos) |
744 | *ppos = kiocb.ki_pos; |
745 | return ret; |
746 | } |
747 | |
748 | /* Do it by hand, with file-ops */ |
749 | static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, |
750 | loff_t *ppos, int type, rwf_t flags) |
751 | { |
752 | ssize_t ret = 0; |
753 | |
754 | if (flags & ~RWF_HIPRI) |
755 | return -EOPNOTSUPP; |
756 | |
757 | while (iov_iter_count(i: iter)) { |
758 | ssize_t nr; |
759 | |
760 | if (type == READ) { |
761 | nr = filp->f_op->read(filp, iter_iov_addr(iter), |
762 | iter_iov_len(iter), ppos); |
763 | } else { |
764 | nr = filp->f_op->write(filp, iter_iov_addr(iter), |
765 | iter_iov_len(iter), ppos); |
766 | } |
767 | |
768 | if (nr < 0) { |
769 | if (!ret) |
770 | ret = nr; |
771 | break; |
772 | } |
773 | ret += nr; |
774 | if (nr != iter_iov_len(iter)) |
775 | break; |
776 | iov_iter_advance(i: iter, bytes: nr); |
777 | } |
778 | |
779 | return ret; |
780 | } |
781 | |
782 | ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, |
783 | struct iov_iter *iter) |
784 | { |
785 | size_t tot_len; |
786 | ssize_t ret = 0; |
787 | |
788 | if (!file->f_op->read_iter) |
789 | return -EINVAL; |
790 | if (!(file->f_mode & FMODE_READ)) |
791 | return -EBADF; |
792 | if (!(file->f_mode & FMODE_CAN_READ)) |
793 | return -EINVAL; |
794 | |
795 | tot_len = iov_iter_count(i: iter); |
796 | if (!tot_len) |
797 | goto out; |
798 | ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); |
799 | if (ret < 0) |
800 | return ret; |
801 | |
802 | ret = call_read_iter(file, kio: iocb, iter); |
803 | out: |
804 | if (ret >= 0) |
805 | fsnotify_access(file); |
806 | return ret; |
807 | } |
808 | EXPORT_SYMBOL(vfs_iocb_iter_read); |
809 | |
810 | ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, |
811 | rwf_t flags) |
812 | { |
813 | size_t tot_len; |
814 | ssize_t ret = 0; |
815 | |
816 | if (!file->f_op->read_iter) |
817 | return -EINVAL; |
818 | if (!(file->f_mode & FMODE_READ)) |
819 | return -EBADF; |
820 | if (!(file->f_mode & FMODE_CAN_READ)) |
821 | return -EINVAL; |
822 | |
823 | tot_len = iov_iter_count(i: iter); |
824 | if (!tot_len) |
825 | goto out; |
826 | ret = rw_verify_area(READ, file, ppos, tot_len); |
827 | if (ret < 0) |
828 | return ret; |
829 | |
830 | ret = do_iter_readv_writev(filp: file, iter, ppos, READ, flags); |
831 | out: |
832 | if (ret >= 0) |
833 | fsnotify_access(file); |
834 | return ret; |
835 | } |
836 | EXPORT_SYMBOL(vfs_iter_read); |
837 | |
838 | /* |
839 | * Caller is responsible for calling kiocb_end_write() on completion |
840 | * if async iocb was queued. |
841 | */ |
842 | ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, |
843 | struct iov_iter *iter) |
844 | { |
845 | size_t tot_len; |
846 | ssize_t ret = 0; |
847 | |
848 | if (!file->f_op->write_iter) |
849 | return -EINVAL; |
850 | if (!(file->f_mode & FMODE_WRITE)) |
851 | return -EBADF; |
852 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
853 | return -EINVAL; |
854 | |
855 | tot_len = iov_iter_count(i: iter); |
856 | if (!tot_len) |
857 | return 0; |
858 | ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); |
859 | if (ret < 0) |
860 | return ret; |
861 | |
862 | kiocb_start_write(iocb); |
863 | ret = call_write_iter(file, kio: iocb, iter); |
864 | if (ret != -EIOCBQUEUED) |
865 | kiocb_end_write(iocb); |
866 | if (ret > 0) |
867 | fsnotify_modify(file); |
868 | |
869 | return ret; |
870 | } |
871 | EXPORT_SYMBOL(vfs_iocb_iter_write); |
872 | |
873 | ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, |
874 | rwf_t flags) |
875 | { |
876 | size_t tot_len; |
877 | ssize_t ret; |
878 | |
879 | if (!(file->f_mode & FMODE_WRITE)) |
880 | return -EBADF; |
881 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
882 | return -EINVAL; |
883 | if (!file->f_op->write_iter) |
884 | return -EINVAL; |
885 | |
886 | tot_len = iov_iter_count(i: iter); |
887 | if (!tot_len) |
888 | return 0; |
889 | |
890 | ret = rw_verify_area(WRITE, file, ppos, tot_len); |
891 | if (ret < 0) |
892 | return ret; |
893 | |
894 | file_start_write(file); |
895 | ret = do_iter_readv_writev(filp: file, iter, ppos, WRITE, flags); |
896 | if (ret > 0) |
897 | fsnotify_modify(file); |
898 | file_end_write(file); |
899 | |
900 | return ret; |
901 | } |
902 | EXPORT_SYMBOL(vfs_iter_write); |
903 | |
904 | static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, |
905 | unsigned long vlen, loff_t *pos, rwf_t flags) |
906 | { |
907 | struct iovec iovstack[UIO_FASTIOV]; |
908 | struct iovec *iov = iovstack; |
909 | struct iov_iter iter; |
910 | size_t tot_len; |
911 | ssize_t ret = 0; |
912 | |
913 | if (!(file->f_mode & FMODE_READ)) |
914 | return -EBADF; |
915 | if (!(file->f_mode & FMODE_CAN_READ)) |
916 | return -EINVAL; |
917 | |
918 | ret = import_iovec(ITER_DEST, uvec: vec, nr_segs: vlen, ARRAY_SIZE(iovstack), iovp: &iov, |
919 | i: &iter); |
920 | if (ret < 0) |
921 | return ret; |
922 | |
923 | tot_len = iov_iter_count(i: &iter); |
924 | if (!tot_len) |
925 | goto out; |
926 | |
927 | ret = rw_verify_area(READ, file, pos, tot_len); |
928 | if (ret < 0) |
929 | goto out; |
930 | |
931 | if (file->f_op->read_iter) |
932 | ret = do_iter_readv_writev(filp: file, iter: &iter, ppos: pos, READ, flags); |
933 | else |
934 | ret = do_loop_readv_writev(filp: file, iter: &iter, ppos: pos, READ, flags); |
935 | out: |
936 | if (ret >= 0) |
937 | fsnotify_access(file); |
938 | kfree(objp: iov); |
939 | return ret; |
940 | } |
941 | |
942 | static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, |
943 | unsigned long vlen, loff_t *pos, rwf_t flags) |
944 | { |
945 | struct iovec iovstack[UIO_FASTIOV]; |
946 | struct iovec *iov = iovstack; |
947 | struct iov_iter iter; |
948 | size_t tot_len; |
949 | ssize_t ret = 0; |
950 | |
951 | if (!(file->f_mode & FMODE_WRITE)) |
952 | return -EBADF; |
953 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
954 | return -EINVAL; |
955 | |
956 | ret = import_iovec(ITER_SOURCE, uvec: vec, nr_segs: vlen, ARRAY_SIZE(iovstack), iovp: &iov, |
957 | i: &iter); |
958 | if (ret < 0) |
959 | return ret; |
960 | |
961 | tot_len = iov_iter_count(i: &iter); |
962 | if (!tot_len) |
963 | goto out; |
964 | |
965 | ret = rw_verify_area(WRITE, file, pos, tot_len); |
966 | if (ret < 0) |
967 | goto out; |
968 | |
969 | file_start_write(file); |
970 | if (file->f_op->write_iter) |
971 | ret = do_iter_readv_writev(filp: file, iter: &iter, ppos: pos, WRITE, flags); |
972 | else |
973 | ret = do_loop_readv_writev(filp: file, iter: &iter, ppos: pos, WRITE, flags); |
974 | if (ret > 0) |
975 | fsnotify_modify(file); |
976 | file_end_write(file); |
977 | out: |
978 | kfree(objp: iov); |
979 | return ret; |
980 | } |
981 | |
982 | static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, |
983 | unsigned long vlen, rwf_t flags) |
984 | { |
985 | struct fd f = fdget_pos(fd); |
986 | ssize_t ret = -EBADF; |
987 | |
988 | if (f.file) { |
989 | loff_t pos, *ppos = file_ppos(file: f.file); |
990 | if (ppos) { |
991 | pos = *ppos; |
992 | ppos = &pos; |
993 | } |
994 | ret = vfs_readv(file: f.file, vec, vlen, pos: ppos, flags); |
995 | if (ret >= 0 && ppos) |
996 | f.file->f_pos = pos; |
997 | fdput_pos(f); |
998 | } |
999 | |
1000 | if (ret > 0) |
1001 | add_rchar(current, amt: ret); |
1002 | inc_syscr(current); |
1003 | return ret; |
1004 | } |
1005 | |
1006 | static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, |
1007 | unsigned long vlen, rwf_t flags) |
1008 | { |
1009 | struct fd f = fdget_pos(fd); |
1010 | ssize_t ret = -EBADF; |
1011 | |
1012 | if (f.file) { |
1013 | loff_t pos, *ppos = file_ppos(file: f.file); |
1014 | if (ppos) { |
1015 | pos = *ppos; |
1016 | ppos = &pos; |
1017 | } |
1018 | ret = vfs_writev(file: f.file, vec, vlen, pos: ppos, flags); |
1019 | if (ret >= 0 && ppos) |
1020 | f.file->f_pos = pos; |
1021 | fdput_pos(f); |
1022 | } |
1023 | |
1024 | if (ret > 0) |
1025 | add_wchar(current, amt: ret); |
1026 | inc_syscw(current); |
1027 | return ret; |
1028 | } |
1029 | |
1030 | static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) |
1031 | { |
1032 | #define HALF_LONG_BITS (BITS_PER_LONG / 2) |
1033 | return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; |
1034 | } |
1035 | |
1036 | static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, |
1037 | unsigned long vlen, loff_t pos, rwf_t flags) |
1038 | { |
1039 | struct fd f; |
1040 | ssize_t ret = -EBADF; |
1041 | |
1042 | if (pos < 0) |
1043 | return -EINVAL; |
1044 | |
1045 | f = fdget(fd); |
1046 | if (f.file) { |
1047 | ret = -ESPIPE; |
1048 | if (f.file->f_mode & FMODE_PREAD) |
1049 | ret = vfs_readv(file: f.file, vec, vlen, pos: &pos, flags); |
1050 | fdput(fd: f); |
1051 | } |
1052 | |
1053 | if (ret > 0) |
1054 | add_rchar(current, amt: ret); |
1055 | inc_syscr(current); |
1056 | return ret; |
1057 | } |
1058 | |
1059 | static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, |
1060 | unsigned long vlen, loff_t pos, rwf_t flags) |
1061 | { |
1062 | struct fd f; |
1063 | ssize_t ret = -EBADF; |
1064 | |
1065 | if (pos < 0) |
1066 | return -EINVAL; |
1067 | |
1068 | f = fdget(fd); |
1069 | if (f.file) { |
1070 | ret = -ESPIPE; |
1071 | if (f.file->f_mode & FMODE_PWRITE) |
1072 | ret = vfs_writev(file: f.file, vec, vlen, pos: &pos, flags); |
1073 | fdput(fd: f); |
1074 | } |
1075 | |
1076 | if (ret > 0) |
1077 | add_wchar(current, amt: ret); |
1078 | inc_syscw(current); |
1079 | return ret; |
1080 | } |
1081 | |
1082 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, |
1083 | unsigned long, vlen) |
1084 | { |
1085 | return do_readv(fd, vec, vlen, flags: 0); |
1086 | } |
1087 | |
1088 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, |
1089 | unsigned long, vlen) |
1090 | { |
1091 | return do_writev(fd, vec, vlen, flags: 0); |
1092 | } |
1093 | |
1094 | SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, |
1095 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
1096 | { |
1097 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1098 | |
1099 | return do_preadv(fd, vec, vlen, pos, flags: 0); |
1100 | } |
1101 | |
1102 | SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, |
1103 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, |
1104 | rwf_t, flags) |
1105 | { |
1106 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1107 | |
1108 | if (pos == -1) |
1109 | return do_readv(fd, vec, vlen, flags); |
1110 | |
1111 | return do_preadv(fd, vec, vlen, pos, flags); |
1112 | } |
1113 | |
1114 | SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, |
1115 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
1116 | { |
1117 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1118 | |
1119 | return do_pwritev(fd, vec, vlen, pos, flags: 0); |
1120 | } |
1121 | |
1122 | SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, |
1123 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, |
1124 | rwf_t, flags) |
1125 | { |
1126 | loff_t pos = pos_from_hilo(high: pos_h, low: pos_l); |
1127 | |
1128 | if (pos == -1) |
1129 | return do_writev(fd, vec, vlen, flags); |
1130 | |
1131 | return do_pwritev(fd, vec, vlen, pos, flags); |
1132 | } |
1133 | |
1134 | /* |
1135 | * Various compat syscalls. Note that they all pretend to take a native |
1136 | * iovec - import_iovec will properly treat those as compat_iovecs based on |
1137 | * in_compat_syscall(). |
1138 | */ |
1139 | #ifdef CONFIG_COMPAT |
1140 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 |
1141 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, |
1142 | const struct iovec __user *, vec, |
1143 | unsigned long, vlen, loff_t, pos) |
1144 | { |
1145 | return do_preadv(fd, vec, vlen, pos, flags: 0); |
1146 | } |
1147 | #endif |
1148 | |
1149 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, |
1150 | const struct iovec __user *, vec, |
1151 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1152 | { |
1153 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1154 | |
1155 | return do_preadv(fd, vec, vlen, pos, flags: 0); |
1156 | } |
1157 | |
1158 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 |
1159 | COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, |
1160 | const struct iovec __user *, vec, |
1161 | unsigned long, vlen, loff_t, pos, rwf_t, flags) |
1162 | { |
1163 | if (pos == -1) |
1164 | return do_readv(fd, vec, vlen, flags); |
1165 | return do_preadv(fd, vec, vlen, pos, flags); |
1166 | } |
1167 | #endif |
1168 | |
1169 | COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, |
1170 | const struct iovec __user *, vec, |
1171 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, |
1172 | rwf_t, flags) |
1173 | { |
1174 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1175 | |
1176 | if (pos == -1) |
1177 | return do_readv(fd, vec, vlen, flags); |
1178 | return do_preadv(fd, vec, vlen, pos, flags); |
1179 | } |
1180 | |
1181 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 |
1182 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, |
1183 | const struct iovec __user *, vec, |
1184 | unsigned long, vlen, loff_t, pos) |
1185 | { |
1186 | return do_pwritev(fd, vec, vlen, pos, flags: 0); |
1187 | } |
1188 | #endif |
1189 | |
1190 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, |
1191 | const struct iovec __user *,vec, |
1192 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1193 | { |
1194 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1195 | |
1196 | return do_pwritev(fd, vec, vlen, pos, flags: 0); |
1197 | } |
1198 | |
1199 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 |
1200 | COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, |
1201 | const struct iovec __user *, vec, |
1202 | unsigned long, vlen, loff_t, pos, rwf_t, flags) |
1203 | { |
1204 | if (pos == -1) |
1205 | return do_writev(fd, vec, vlen, flags); |
1206 | return do_pwritev(fd, vec, vlen, pos, flags); |
1207 | } |
1208 | #endif |
1209 | |
1210 | COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, |
1211 | const struct iovec __user *,vec, |
1212 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) |
1213 | { |
1214 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1215 | |
1216 | if (pos == -1) |
1217 | return do_writev(fd, vec, vlen, flags); |
1218 | return do_pwritev(fd, vec, vlen, pos, flags); |
1219 | } |
1220 | #endif /* CONFIG_COMPAT */ |
1221 | |
1222 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, |
1223 | size_t count, loff_t max) |
1224 | { |
1225 | struct fd in, out; |
1226 | struct inode *in_inode, *out_inode; |
1227 | struct pipe_inode_info *opipe; |
1228 | loff_t pos; |
1229 | loff_t out_pos; |
1230 | ssize_t retval; |
1231 | int fl; |
1232 | |
1233 | /* |
1234 | * Get input file, and verify that it is ok.. |
1235 | */ |
1236 | retval = -EBADF; |
1237 | in = fdget(fd: in_fd); |
1238 | if (!in.file) |
1239 | goto out; |
1240 | if (!(in.file->f_mode & FMODE_READ)) |
1241 | goto fput_in; |
1242 | retval = -ESPIPE; |
1243 | if (!ppos) { |
1244 | pos = in.file->f_pos; |
1245 | } else { |
1246 | pos = *ppos; |
1247 | if (!(in.file->f_mode & FMODE_PREAD)) |
1248 | goto fput_in; |
1249 | } |
1250 | retval = rw_verify_area(READ, in.file, &pos, count); |
1251 | if (retval < 0) |
1252 | goto fput_in; |
1253 | if (count > MAX_RW_COUNT) |
1254 | count = MAX_RW_COUNT; |
1255 | |
1256 | /* |
1257 | * Get output file, and verify that it is ok.. |
1258 | */ |
1259 | retval = -EBADF; |
1260 | out = fdget(fd: out_fd); |
1261 | if (!out.file) |
1262 | goto fput_in; |
1263 | if (!(out.file->f_mode & FMODE_WRITE)) |
1264 | goto fput_out; |
1265 | in_inode = file_inode(f: in.file); |
1266 | out_inode = file_inode(f: out.file); |
1267 | out_pos = out.file->f_pos; |
1268 | |
1269 | if (!max) |
1270 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
1271 | |
1272 | if (unlikely(pos + count > max)) { |
1273 | retval = -EOVERFLOW; |
1274 | if (pos >= max) |
1275 | goto fput_out; |
1276 | count = max - pos; |
1277 | } |
1278 | |
1279 | fl = 0; |
1280 | #if 0 |
1281 | /* |
1282 | * We need to debate whether we can enable this or not. The |
1283 | * man page documents EAGAIN return for the output at least, |
1284 | * and the application is arguably buggy if it doesn't expect |
1285 | * EAGAIN on a non-blocking file descriptor. |
1286 | */ |
1287 | if (in.file->f_flags & O_NONBLOCK) |
1288 | fl = SPLICE_F_NONBLOCK; |
1289 | #endif |
1290 | opipe = get_pipe_info(file: out.file, for_splice: true); |
1291 | if (!opipe) { |
1292 | retval = rw_verify_area(WRITE, out.file, &out_pos, count); |
1293 | if (retval < 0) |
1294 | goto fput_out; |
1295 | retval = do_splice_direct(in: in.file, ppos: &pos, out: out.file, opos: &out_pos, |
1296 | len: count, flags: fl); |
1297 | } else { |
1298 | if (out.file->f_flags & O_NONBLOCK) |
1299 | fl |= SPLICE_F_NONBLOCK; |
1300 | |
1301 | retval = splice_file_to_pipe(in: in.file, opipe, offset: &pos, len: count, flags: fl); |
1302 | } |
1303 | |
1304 | if (retval > 0) { |
1305 | add_rchar(current, amt: retval); |
1306 | add_wchar(current, amt: retval); |
1307 | fsnotify_access(file: in.file); |
1308 | fsnotify_modify(file: out.file); |
1309 | out.file->f_pos = out_pos; |
1310 | if (ppos) |
1311 | *ppos = pos; |
1312 | else |
1313 | in.file->f_pos = pos; |
1314 | } |
1315 | |
1316 | inc_syscr(current); |
1317 | inc_syscw(current); |
1318 | if (pos > max) |
1319 | retval = -EOVERFLOW; |
1320 | |
1321 | fput_out: |
1322 | fdput(fd: out); |
1323 | fput_in: |
1324 | fdput(fd: in); |
1325 | out: |
1326 | return retval; |
1327 | } |
1328 | |
1329 | SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) |
1330 | { |
1331 | loff_t pos; |
1332 | off_t off; |
1333 | ssize_t ret; |
1334 | |
1335 | if (offset) { |
1336 | if (unlikely(get_user(off, offset))) |
1337 | return -EFAULT; |
1338 | pos = off; |
1339 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, MAX_NON_LFS); |
1340 | if (unlikely(put_user(pos, offset))) |
1341 | return -EFAULT; |
1342 | return ret; |
1343 | } |
1344 | |
1345 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1346 | } |
1347 | |
1348 | SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) |
1349 | { |
1350 | loff_t pos; |
1351 | ssize_t ret; |
1352 | |
1353 | if (offset) { |
1354 | if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) |
1355 | return -EFAULT; |
1356 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, max: 0); |
1357 | if (unlikely(put_user(pos, offset))) |
1358 | return -EFAULT; |
1359 | return ret; |
1360 | } |
1361 | |
1362 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1363 | } |
1364 | |
1365 | #ifdef CONFIG_COMPAT |
1366 | COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, |
1367 | compat_off_t __user *, offset, compat_size_t, count) |
1368 | { |
1369 | loff_t pos; |
1370 | off_t off; |
1371 | ssize_t ret; |
1372 | |
1373 | if (offset) { |
1374 | if (unlikely(get_user(off, offset))) |
1375 | return -EFAULT; |
1376 | pos = off; |
1377 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, MAX_NON_LFS); |
1378 | if (unlikely(put_user(pos, offset))) |
1379 | return -EFAULT; |
1380 | return ret; |
1381 | } |
1382 | |
1383 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1384 | } |
1385 | |
1386 | COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, |
1387 | compat_loff_t __user *, offset, compat_size_t, count) |
1388 | { |
1389 | loff_t pos; |
1390 | ssize_t ret; |
1391 | |
1392 | if (offset) { |
1393 | if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) |
1394 | return -EFAULT; |
1395 | ret = do_sendfile(out_fd, in_fd, ppos: &pos, count, max: 0); |
1396 | if (unlikely(put_user(pos, offset))) |
1397 | return -EFAULT; |
1398 | return ret; |
1399 | } |
1400 | |
1401 | return do_sendfile(out_fd, in_fd, NULL, count, max: 0); |
1402 | } |
1403 | #endif |
1404 | |
1405 | /* |
1406 | * Performs necessary checks before doing a file copy |
1407 | * |
1408 | * Can adjust amount of bytes to copy via @req_count argument. |
1409 | * Returns appropriate error code that caller should return or |
1410 | * zero in case the copy should be allowed. |
1411 | */ |
1412 | static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, |
1413 | struct file *file_out, loff_t pos_out, |
1414 | size_t *req_count, unsigned int flags) |
1415 | { |
1416 | struct inode *inode_in = file_inode(f: file_in); |
1417 | struct inode *inode_out = file_inode(f: file_out); |
1418 | uint64_t count = *req_count; |
1419 | loff_t size_in; |
1420 | int ret; |
1421 | |
1422 | ret = generic_file_rw_checks(file_in, file_out); |
1423 | if (ret) |
1424 | return ret; |
1425 | |
1426 | /* |
1427 | * We allow some filesystems to handle cross sb copy, but passing |
1428 | * a file of the wrong filesystem type to filesystem driver can result |
1429 | * in an attempt to dereference the wrong type of ->private_data, so |
1430 | * avoid doing that until we really have a good reason. |
1431 | * |
1432 | * nfs and cifs define several different file_system_type structures |
1433 | * and several different sets of file_operations, but they all end up |
1434 | * using the same ->copy_file_range() function pointer. |
1435 | */ |
1436 | if (flags & COPY_FILE_SPLICE) { |
1437 | /* cross sb splice is allowed */ |
1438 | } else if (file_out->f_op->copy_file_range) { |
1439 | if (file_in->f_op->copy_file_range != |
1440 | file_out->f_op->copy_file_range) |
1441 | return -EXDEV; |
1442 | } else if (file_inode(f: file_in)->i_sb != file_inode(f: file_out)->i_sb) { |
1443 | return -EXDEV; |
1444 | } |
1445 | |
1446 | /* Don't touch certain kinds of inodes */ |
1447 | if (IS_IMMUTABLE(inode_out)) |
1448 | return -EPERM; |
1449 | |
1450 | if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) |
1451 | return -ETXTBSY; |
1452 | |
1453 | /* Ensure offsets don't wrap. */ |
1454 | if (pos_in + count < pos_in || pos_out + count < pos_out) |
1455 | return -EOVERFLOW; |
1456 | |
1457 | /* Shorten the copy to EOF */ |
1458 | size_in = i_size_read(inode: inode_in); |
1459 | if (pos_in >= size_in) |
1460 | count = 0; |
1461 | else |
1462 | count = min(count, size_in - (uint64_t)pos_in); |
1463 | |
1464 | ret = generic_write_check_limits(file: file_out, pos: pos_out, count: &count); |
1465 | if (ret) |
1466 | return ret; |
1467 | |
1468 | /* Don't allow overlapped copying within the same file. */ |
1469 | if (inode_in == inode_out && |
1470 | pos_out + count > pos_in && |
1471 | pos_out < pos_in + count) |
1472 | return -EINVAL; |
1473 | |
1474 | *req_count = count; |
1475 | return 0; |
1476 | } |
1477 | |
1478 | /* |
1479 | * copy_file_range() differs from regular file read and write in that it |
1480 | * specifically allows return partial success. When it does so is up to |
1481 | * the copy_file_range method. |
1482 | */ |
1483 | ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, |
1484 | struct file *file_out, loff_t pos_out, |
1485 | size_t len, unsigned int flags) |
1486 | { |
1487 | ssize_t ret; |
1488 | bool splice = flags & COPY_FILE_SPLICE; |
1489 | bool samesb = file_inode(f: file_in)->i_sb == file_inode(f: file_out)->i_sb; |
1490 | |
1491 | if (flags & ~COPY_FILE_SPLICE) |
1492 | return -EINVAL; |
1493 | |
1494 | ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, req_count: &len, |
1495 | flags); |
1496 | if (unlikely(ret)) |
1497 | return ret; |
1498 | |
1499 | ret = rw_verify_area(READ, file_in, &pos_in, len); |
1500 | if (unlikely(ret)) |
1501 | return ret; |
1502 | |
1503 | ret = rw_verify_area(WRITE, file_out, &pos_out, len); |
1504 | if (unlikely(ret)) |
1505 | return ret; |
1506 | |
1507 | if (len == 0) |
1508 | return 0; |
1509 | |
1510 | file_start_write(file: file_out); |
1511 | |
1512 | /* |
1513 | * Cloning is supported by more file systems, so we implement copy on |
1514 | * same sb using clone, but for filesystems where both clone and copy |
1515 | * are supported (e.g. nfs,cifs), we only call the copy method. |
1516 | */ |
1517 | if (!splice && file_out->f_op->copy_file_range) { |
1518 | ret = file_out->f_op->copy_file_range(file_in, pos_in, |
1519 | file_out, pos_out, |
1520 | len, flags); |
1521 | } else if (!splice && file_in->f_op->remap_file_range && samesb) { |
1522 | ret = file_in->f_op->remap_file_range(file_in, pos_in, |
1523 | file_out, pos_out, |
1524 | min_t(loff_t, MAX_RW_COUNT, len), |
1525 | REMAP_FILE_CAN_SHORTEN); |
1526 | /* fallback to splice */ |
1527 | if (ret <= 0) |
1528 | splice = true; |
1529 | } else if (samesb) { |
1530 | /* Fallback to splice for same sb copy for backward compat */ |
1531 | splice = true; |
1532 | } |
1533 | |
1534 | file_end_write(file: file_out); |
1535 | |
1536 | if (!splice) |
1537 | goto done; |
1538 | |
1539 | /* |
1540 | * We can get here for same sb copy of filesystems that do not implement |
1541 | * ->copy_file_range() in case filesystem does not support clone or in |
1542 | * case filesystem supports clone but rejected the clone request (e.g. |
1543 | * because it was not block aligned). |
1544 | * |
1545 | * In both cases, fall back to kernel copy so we are able to maintain a |
1546 | * consistent story about which filesystems support copy_file_range() |
1547 | * and which filesystems do not, that will allow userspace tools to |
1548 | * make consistent desicions w.r.t using copy_file_range(). |
1549 | * |
1550 | * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE |
1551 | * for server-side-copy between any two sb. |
1552 | * |
1553 | * In any case, we call do_splice_direct() and not splice_file_range(), |
1554 | * without file_start_write() held, to avoid possible deadlocks related |
1555 | * to splicing from input file, while file_start_write() is held on |
1556 | * the output file on a different sb. |
1557 | */ |
1558 | ret = do_splice_direct(in: file_in, ppos: &pos_in, out: file_out, opos: &pos_out, |
1559 | min_t(size_t, len, MAX_RW_COUNT), flags: 0); |
1560 | done: |
1561 | if (ret > 0) { |
1562 | fsnotify_access(file: file_in); |
1563 | add_rchar(current, amt: ret); |
1564 | fsnotify_modify(file: file_out); |
1565 | add_wchar(current, amt: ret); |
1566 | } |
1567 | |
1568 | inc_syscr(current); |
1569 | inc_syscw(current); |
1570 | |
1571 | return ret; |
1572 | } |
1573 | EXPORT_SYMBOL(vfs_copy_file_range); |
1574 | |
1575 | SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, |
1576 | int, fd_out, loff_t __user *, off_out, |
1577 | size_t, len, unsigned int, flags) |
1578 | { |
1579 | loff_t pos_in; |
1580 | loff_t pos_out; |
1581 | struct fd f_in; |
1582 | struct fd f_out; |
1583 | ssize_t ret = -EBADF; |
1584 | |
1585 | f_in = fdget(fd: fd_in); |
1586 | if (!f_in.file) |
1587 | goto out2; |
1588 | |
1589 | f_out = fdget(fd: fd_out); |
1590 | if (!f_out.file) |
1591 | goto out1; |
1592 | |
1593 | ret = -EFAULT; |
1594 | if (off_in) { |
1595 | if (copy_from_user(to: &pos_in, from: off_in, n: sizeof(loff_t))) |
1596 | goto out; |
1597 | } else { |
1598 | pos_in = f_in.file->f_pos; |
1599 | } |
1600 | |
1601 | if (off_out) { |
1602 | if (copy_from_user(to: &pos_out, from: off_out, n: sizeof(loff_t))) |
1603 | goto out; |
1604 | } else { |
1605 | pos_out = f_out.file->f_pos; |
1606 | } |
1607 | |
1608 | ret = -EINVAL; |
1609 | if (flags != 0) |
1610 | goto out; |
1611 | |
1612 | ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, |
1613 | flags); |
1614 | if (ret > 0) { |
1615 | pos_in += ret; |
1616 | pos_out += ret; |
1617 | |
1618 | if (off_in) { |
1619 | if (copy_to_user(to: off_in, from: &pos_in, n: sizeof(loff_t))) |
1620 | ret = -EFAULT; |
1621 | } else { |
1622 | f_in.file->f_pos = pos_in; |
1623 | } |
1624 | |
1625 | if (off_out) { |
1626 | if (copy_to_user(to: off_out, from: &pos_out, n: sizeof(loff_t))) |
1627 | ret = -EFAULT; |
1628 | } else { |
1629 | f_out.file->f_pos = pos_out; |
1630 | } |
1631 | } |
1632 | |
1633 | out: |
1634 | fdput(fd: f_out); |
1635 | out1: |
1636 | fdput(fd: f_in); |
1637 | out2: |
1638 | return ret; |
1639 | } |
1640 | |
1641 | /* |
1642 | * Don't operate on ranges the page cache doesn't support, and don't exceed the |
1643 | * LFS limits. If pos is under the limit it becomes a short access. If it |
1644 | * exceeds the limit we return -EFBIG. |
1645 | */ |
1646 | int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) |
1647 | { |
1648 | struct inode *inode = file->f_mapping->host; |
1649 | loff_t max_size = inode->i_sb->s_maxbytes; |
1650 | loff_t limit = rlimit(RLIMIT_FSIZE); |
1651 | |
1652 | if (limit != RLIM_INFINITY) { |
1653 | if (pos >= limit) { |
1654 | send_sig(SIGXFSZ, current, 0); |
1655 | return -EFBIG; |
1656 | } |
1657 | *count = min(*count, limit - pos); |
1658 | } |
1659 | |
1660 | if (!(file->f_flags & O_LARGEFILE)) |
1661 | max_size = MAX_NON_LFS; |
1662 | |
1663 | if (unlikely(pos >= max_size)) |
1664 | return -EFBIG; |
1665 | |
1666 | *count = min(*count, max_size - pos); |
1667 | |
1668 | return 0; |
1669 | } |
1670 | |
1671 | /* Like generic_write_checks(), but takes size of write instead of iter. */ |
1672 | int generic_write_checks_count(struct kiocb *iocb, loff_t *count) |
1673 | { |
1674 | struct file *file = iocb->ki_filp; |
1675 | struct inode *inode = file->f_mapping->host; |
1676 | |
1677 | if (IS_SWAPFILE(inode)) |
1678 | return -ETXTBSY; |
1679 | |
1680 | if (!*count) |
1681 | return 0; |
1682 | |
1683 | if (iocb->ki_flags & IOCB_APPEND) |
1684 | iocb->ki_pos = i_size_read(inode); |
1685 | |
1686 | if ((iocb->ki_flags & IOCB_NOWAIT) && |
1687 | !((iocb->ki_flags & IOCB_DIRECT) || |
1688 | (file->f_mode & FMODE_BUF_WASYNC))) |
1689 | return -EINVAL; |
1690 | |
1691 | return generic_write_check_limits(file: iocb->ki_filp, pos: iocb->ki_pos, count); |
1692 | } |
1693 | EXPORT_SYMBOL(generic_write_checks_count); |
1694 | |
1695 | /* |
1696 | * Performs necessary checks before doing a write |
1697 | * |
1698 | * Can adjust writing position or amount of bytes to write. |
1699 | * Returns appropriate error code that caller should return or |
1700 | * zero in case that write should be allowed. |
1701 | */ |
1702 | ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) |
1703 | { |
1704 | loff_t count = iov_iter_count(i: from); |
1705 | int ret; |
1706 | |
1707 | ret = generic_write_checks_count(iocb, &count); |
1708 | if (ret) |
1709 | return ret; |
1710 | |
1711 | iov_iter_truncate(i: from, count); |
1712 | return iov_iter_count(i: from); |
1713 | } |
1714 | EXPORT_SYMBOL(generic_write_checks); |
1715 | |
1716 | /* |
1717 | * Performs common checks before doing a file copy/clone |
1718 | * from @file_in to @file_out. |
1719 | */ |
1720 | int generic_file_rw_checks(struct file *file_in, struct file *file_out) |
1721 | { |
1722 | struct inode *inode_in = file_inode(f: file_in); |
1723 | struct inode *inode_out = file_inode(f: file_out); |
1724 | |
1725 | /* Don't copy dirs, pipes, sockets... */ |
1726 | if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) |
1727 | return -EISDIR; |
1728 | if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) |
1729 | return -EINVAL; |
1730 | |
1731 | if (!(file_in->f_mode & FMODE_READ) || |
1732 | !(file_out->f_mode & FMODE_WRITE) || |
1733 | (file_out->f_flags & O_APPEND)) |
1734 | return -EBADF; |
1735 | |
1736 | return 0; |
1737 | } |
1738 | |