1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/fs/nfs/file.c |
4 | * |
5 | * Copyright (C) 1992 Rick Sladkey |
6 | * |
7 | * Changes Copyright (C) 1994 by Florian La Roche |
8 | * - Do not copy data too often around in the kernel. |
9 | * - In nfs_file_read the return value of kmalloc wasn't checked. |
10 | * - Put in a better version of read look-ahead buffering. Original idea |
11 | * and implementation by Wai S Kok elekokws@ee.nus.sg. |
12 | * |
13 | * Expire cache on write to a file by Wai S Kok (Oct 1994). |
14 | * |
15 | * Total rewrite of read side for new NFS buffer cache.. Linus. |
16 | * |
17 | * nfs regular file handling functions |
18 | */ |
19 | |
20 | #include <linux/module.h> |
21 | #include <linux/time.h> |
22 | #include <linux/kernel.h> |
23 | #include <linux/errno.h> |
24 | #include <linux/fcntl.h> |
25 | #include <linux/stat.h> |
26 | #include <linux/nfs_fs.h> |
27 | #include <linux/nfs_mount.h> |
28 | #include <linux/mm.h> |
29 | #include <linux/pagemap.h> |
30 | #include <linux/gfp.h> |
31 | #include <linux/swap.h> |
32 | |
33 | #include <linux/uaccess.h> |
34 | #include <linux/filelock.h> |
35 | |
36 | #include "delegation.h" |
37 | #include "internal.h" |
38 | #include "iostat.h" |
39 | #include "fscache.h" |
40 | #include "pnfs.h" |
41 | |
42 | #include "nfstrace.h" |
43 | |
44 | #define NFSDBG_FACILITY NFSDBG_FILE |
45 | |
46 | static const struct vm_operations_struct nfs_file_vm_ops; |
47 | |
48 | int nfs_check_flags(int flags) |
49 | { |
50 | if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) |
51 | return -EINVAL; |
52 | |
53 | return 0; |
54 | } |
55 | EXPORT_SYMBOL_GPL(nfs_check_flags); |
56 | |
57 | /* |
58 | * Open file |
59 | */ |
60 | static int |
61 | nfs_file_open(struct inode *inode, struct file *filp) |
62 | { |
63 | int res; |
64 | |
65 | dprintk("NFS: open file(%pD2)\n" , filp); |
66 | |
67 | nfs_inc_stats(inode, stat: NFSIOS_VFSOPEN); |
68 | res = nfs_check_flags(filp->f_flags); |
69 | if (res) |
70 | return res; |
71 | |
72 | res = nfs_open(inode, filp); |
73 | if (res == 0) |
74 | filp->f_mode |= FMODE_CAN_ODIRECT; |
75 | return res; |
76 | } |
77 | |
78 | int |
79 | nfs_file_release(struct inode *inode, struct file *filp) |
80 | { |
81 | dprintk("NFS: release(%pD2)\n" , filp); |
82 | |
83 | nfs_inc_stats(inode, stat: NFSIOS_VFSRELEASE); |
84 | nfs_file_clear_open_context(flip: filp); |
85 | nfs_fscache_release_file(inode, filp); |
86 | return 0; |
87 | } |
88 | EXPORT_SYMBOL_GPL(nfs_file_release); |
89 | |
90 | /** |
91 | * nfs_revalidate_file_size - Revalidate the file size |
92 | * @inode: pointer to inode struct |
93 | * @filp: pointer to struct file |
94 | * |
95 | * Revalidates the file length. This is basically a wrapper around |
96 | * nfs_revalidate_inode() that takes into account the fact that we may |
97 | * have cached writes (in which case we don't care about the server's |
98 | * idea of what the file length is), or O_DIRECT (in which case we |
99 | * shouldn't trust the cache). |
100 | */ |
101 | static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) |
102 | { |
103 | struct nfs_server *server = NFS_SERVER(inode); |
104 | |
105 | if (filp->f_flags & O_DIRECT) |
106 | goto force_reval; |
107 | if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE)) |
108 | goto force_reval; |
109 | return 0; |
110 | force_reval: |
111 | return __nfs_revalidate_inode(server, inode); |
112 | } |
113 | |
114 | loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) |
115 | { |
116 | dprintk("NFS: llseek file(%pD2, %lld, %d)\n" , |
117 | filp, offset, whence); |
118 | |
119 | /* |
120 | * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate |
121 | * the cached file length |
122 | */ |
123 | if (whence != SEEK_SET && whence != SEEK_CUR) { |
124 | struct inode *inode = filp->f_mapping->host; |
125 | |
126 | int retval = nfs_revalidate_file_size(inode, filp); |
127 | if (retval < 0) |
128 | return (loff_t)retval; |
129 | } |
130 | |
131 | return generic_file_llseek(file: filp, offset, whence); |
132 | } |
133 | EXPORT_SYMBOL_GPL(nfs_file_llseek); |
134 | |
135 | /* |
136 | * Flush all dirty pages, and check for write errors. |
137 | */ |
138 | static int |
139 | nfs_file_flush(struct file *file, fl_owner_t id) |
140 | { |
141 | struct inode *inode = file_inode(f: file); |
142 | errseq_t since; |
143 | |
144 | dprintk("NFS: flush(%pD2)\n" , file); |
145 | |
146 | nfs_inc_stats(inode, stat: NFSIOS_VFSFLUSH); |
147 | if ((file->f_mode & FMODE_WRITE) == 0) |
148 | return 0; |
149 | |
150 | /* Flush writes to the server and return any errors */ |
151 | since = filemap_sample_wb_err(mapping: file->f_mapping); |
152 | nfs_wb_all(inode); |
153 | return filemap_check_wb_err(mapping: file->f_mapping, since); |
154 | } |
155 | |
156 | ssize_t |
157 | nfs_file_read(struct kiocb *iocb, struct iov_iter *to) |
158 | { |
159 | struct inode *inode = file_inode(f: iocb->ki_filp); |
160 | ssize_t result; |
161 | |
162 | if (iocb->ki_flags & IOCB_DIRECT) |
163 | return nfs_file_direct_read(iocb, iter: to, swap: false); |
164 | |
165 | dprintk("NFS: read(%pD2, %zu@%lu)\n" , |
166 | iocb->ki_filp, |
167 | iov_iter_count(to), (unsigned long) iocb->ki_pos); |
168 | |
169 | nfs_start_io_read(inode); |
170 | result = nfs_revalidate_mapping(inode, mapping: iocb->ki_filp->f_mapping); |
171 | if (!result) { |
172 | result = generic_file_read_iter(iocb, to); |
173 | if (result > 0) |
174 | nfs_add_stats(inode, stat: NFSIOS_NORMALREADBYTES, addend: result); |
175 | } |
176 | nfs_end_io_read(inode); |
177 | return result; |
178 | } |
179 | EXPORT_SYMBOL_GPL(nfs_file_read); |
180 | |
181 | ssize_t |
182 | nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, |
183 | size_t len, unsigned int flags) |
184 | { |
185 | struct inode *inode = file_inode(f: in); |
186 | ssize_t result; |
187 | |
188 | dprintk("NFS: splice_read(%pD2, %zu@%llu)\n" , in, len, *ppos); |
189 | |
190 | nfs_start_io_read(inode); |
191 | result = nfs_revalidate_mapping(inode, mapping: in->f_mapping); |
192 | if (!result) { |
193 | result = filemap_splice_read(in, ppos, pipe, len, flags); |
194 | if (result > 0) |
195 | nfs_add_stats(inode, stat: NFSIOS_NORMALREADBYTES, addend: result); |
196 | } |
197 | nfs_end_io_read(inode); |
198 | return result; |
199 | } |
200 | EXPORT_SYMBOL_GPL(nfs_file_splice_read); |
201 | |
202 | int |
203 | nfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
204 | { |
205 | struct inode *inode = file_inode(f: file); |
206 | int status; |
207 | |
208 | dprintk("NFS: mmap(%pD2)\n" , file); |
209 | |
210 | /* Note: generic_file_mmap() returns ENOSYS on nommu systems |
211 | * so we call that before revalidating the mapping |
212 | */ |
213 | status = generic_file_mmap(file, vma); |
214 | if (!status) { |
215 | vma->vm_ops = &nfs_file_vm_ops; |
216 | status = nfs_revalidate_mapping(inode, mapping: file->f_mapping); |
217 | } |
218 | return status; |
219 | } |
220 | EXPORT_SYMBOL_GPL(nfs_file_mmap); |
221 | |
222 | /* |
223 | * Flush any dirty pages for this process, and check for write errors. |
224 | * The return status from this call provides a reliable indication of |
225 | * whether any write errors occurred for this process. |
226 | */ |
227 | static int |
228 | nfs_file_fsync_commit(struct file *file, int datasync) |
229 | { |
230 | struct inode *inode = file_inode(f: file); |
231 | int ret, ret2; |
232 | |
233 | dprintk("NFS: fsync file(%pD2) datasync %d\n" , file, datasync); |
234 | |
235 | nfs_inc_stats(inode, stat: NFSIOS_VFSFSYNC); |
236 | ret = nfs_commit_inode(inode, FLUSH_SYNC); |
237 | ret2 = file_check_and_advance_wb_err(file); |
238 | if (ret2 < 0) |
239 | return ret2; |
240 | return ret; |
241 | } |
242 | |
243 | int |
244 | nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
245 | { |
246 | struct inode *inode = file_inode(f: file); |
247 | struct nfs_inode *nfsi = NFS_I(inode); |
248 | long save_nredirtied = atomic_long_read(v: &nfsi->redirtied_pages); |
249 | long nredirtied; |
250 | int ret; |
251 | |
252 | trace_nfs_fsync_enter(inode); |
253 | |
254 | for (;;) { |
255 | ret = file_write_and_wait_range(file, start, end); |
256 | if (ret != 0) |
257 | break; |
258 | ret = nfs_file_fsync_commit(file, datasync); |
259 | if (ret != 0) |
260 | break; |
261 | ret = pnfs_sync_inode(inode, datasync: !!datasync); |
262 | if (ret != 0) |
263 | break; |
264 | nredirtied = atomic_long_read(v: &nfsi->redirtied_pages); |
265 | if (nredirtied == save_nredirtied) |
266 | break; |
267 | save_nredirtied = nredirtied; |
268 | } |
269 | |
270 | trace_nfs_fsync_exit(inode, error: ret); |
271 | return ret; |
272 | } |
273 | EXPORT_SYMBOL_GPL(nfs_file_fsync); |
274 | |
275 | /* |
276 | * Decide whether a read/modify/write cycle may be more efficient |
277 | * then a modify/write/read cycle when writing to a page in the |
278 | * page cache. |
279 | * |
280 | * Some pNFS layout drivers can only read/write at a certain block |
281 | * granularity like all block devices and therefore we must perform |
282 | * read/modify/write whenever a page hasn't read yet and the data |
283 | * to be written there is not aligned to a block boundary and/or |
284 | * smaller than the block size. |
285 | * |
286 | * The modify/write/read cycle may occur if a page is read before |
287 | * being completely filled by the writer. In this situation, the |
288 | * page must be completely written to stable storage on the server |
289 | * before it can be refilled by reading in the page from the server. |
290 | * This can lead to expensive, small, FILE_SYNC mode writes being |
291 | * done. |
292 | * |
293 | * It may be more efficient to read the page first if the file is |
294 | * open for reading in addition to writing, the page is not marked |
295 | * as Uptodate, it is not dirty or waiting to be committed, |
296 | * indicating that it was previously allocated and then modified, |
297 | * that there were valid bytes of data in that range of the file, |
298 | * and that the new data won't completely replace the old data in |
299 | * that range of the file. |
300 | */ |
301 | static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos, |
302 | unsigned int len) |
303 | { |
304 | unsigned int pglen = nfs_folio_length(folio); |
305 | unsigned int offset = offset_in_folio(folio, pos); |
306 | unsigned int end = offset + len; |
307 | |
308 | return !pglen || (end >= pglen && !offset); |
309 | } |
310 | |
311 | static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, |
312 | loff_t pos, unsigned int len) |
313 | { |
314 | /* |
315 | * Up-to-date pages, those with ongoing or full-page write |
316 | * don't need read/modify/write |
317 | */ |
318 | if (folio_test_uptodate(folio) || folio_test_private(folio) || |
319 | nfs_folio_is_full_write(folio, pos, len)) |
320 | return false; |
321 | |
322 | if (pnfs_ld_read_whole_page(inode: file_inode(f: file))) |
323 | return true; |
324 | /* Open for reading too? */ |
325 | if (file->f_mode & FMODE_READ) |
326 | return true; |
327 | return false; |
328 | } |
329 | |
330 | /* |
331 | * This does the "real" work of the write. We must allocate and lock the |
332 | * page to be sent back to the generic routine, which then copies the |
333 | * data from user space. |
334 | * |
335 | * If the writer ends up delaying the write, the writer needs to |
336 | * increment the page use counts until he is done with the page. |
337 | */ |
338 | static int nfs_write_begin(struct file *file, struct address_space *mapping, |
339 | loff_t pos, unsigned len, struct page **pagep, |
340 | void **fsdata) |
341 | { |
342 | struct folio *folio; |
343 | int once_thru = 0; |
344 | int ret; |
345 | |
346 | dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n" , |
347 | file, mapping->host->i_ino, len, (long long) pos); |
348 | |
349 | start: |
350 | folio = __filemap_get_folio(mapping, index: pos >> PAGE_SHIFT, FGP_WRITEBEGIN, |
351 | gfp: mapping_gfp_mask(mapping)); |
352 | if (IS_ERR(ptr: folio)) |
353 | return PTR_ERR(ptr: folio); |
354 | *pagep = &folio->page; |
355 | |
356 | ret = nfs_flush_incompatible(file, folio); |
357 | if (ret) { |
358 | folio_unlock(folio); |
359 | folio_put(folio); |
360 | } else if (!once_thru && |
361 | nfs_want_read_modify_write(file, folio, pos, len)) { |
362 | once_thru = 1; |
363 | ret = nfs_read_folio(file, folio); |
364 | folio_put(folio); |
365 | if (!ret) |
366 | goto start; |
367 | } |
368 | return ret; |
369 | } |
370 | |
371 | static int nfs_write_end(struct file *file, struct address_space *mapping, |
372 | loff_t pos, unsigned len, unsigned copied, |
373 | struct page *page, void *fsdata) |
374 | { |
375 | struct nfs_open_context *ctx = nfs_file_open_context(filp: file); |
376 | struct folio *folio = page_folio(page); |
377 | unsigned offset = offset_in_folio(folio, pos); |
378 | int status; |
379 | |
380 | dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n" , |
381 | file, mapping->host->i_ino, len, (long long) pos); |
382 | |
383 | /* |
384 | * Zero any uninitialised parts of the page, and then mark the page |
385 | * as up to date if it turns out that we're extending the file. |
386 | */ |
387 | if (!folio_test_uptodate(folio)) { |
388 | size_t fsize = folio_size(folio); |
389 | unsigned pglen = nfs_folio_length(folio); |
390 | unsigned end = offset + copied; |
391 | |
392 | if (pglen == 0) { |
393 | folio_zero_segments(folio, start1: 0, xend1: offset, start2: end, xend2: fsize); |
394 | folio_mark_uptodate(folio); |
395 | } else if (end >= pglen) { |
396 | folio_zero_segment(folio, start: end, xend: fsize); |
397 | if (offset == 0) |
398 | folio_mark_uptodate(folio); |
399 | } else |
400 | folio_zero_segment(folio, start: pglen, xend: fsize); |
401 | } |
402 | |
403 | status = nfs_update_folio(file, folio, offset, count: copied); |
404 | |
405 | folio_unlock(folio); |
406 | folio_put(folio); |
407 | |
408 | if (status < 0) |
409 | return status; |
410 | NFS_I(inode: mapping->host)->write_io += copied; |
411 | |
412 | if (nfs_ctx_key_to_expire(ctx, inode: mapping->host)) |
413 | nfs_wb_all(inode: mapping->host); |
414 | |
415 | return copied; |
416 | } |
417 | |
418 | /* |
419 | * Partially or wholly invalidate a page |
420 | * - Release the private state associated with a page if undergoing complete |
421 | * page invalidation |
422 | * - Called if either PG_private or PG_fscache is set on the page |
423 | * - Caller holds page lock |
424 | */ |
425 | static void nfs_invalidate_folio(struct folio *folio, size_t offset, |
426 | size_t length) |
427 | { |
428 | struct inode *inode = folio_file_mapping(folio)->host; |
429 | dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n" , |
430 | folio->index, offset, length); |
431 | |
432 | if (offset != 0 || length < folio_size(folio)) |
433 | return; |
434 | /* Cancel any unstarted writes on this page */ |
435 | nfs_wb_folio_cancel(inode, folio); |
436 | folio_wait_fscache(folio); |
437 | trace_nfs_invalidate_folio(inode, folio); |
438 | } |
439 | |
440 | /* |
441 | * Attempt to release the private state associated with a folio |
442 | * - Called if either private or fscache flags are set on the folio |
443 | * - Caller holds folio lock |
444 | * - Return true (may release folio) or false (may not) |
445 | */ |
446 | static bool nfs_release_folio(struct folio *folio, gfp_t gfp) |
447 | { |
448 | dfprintk(PAGECACHE, "NFS: release_folio(%p)\n" , folio); |
449 | |
450 | /* If the private flag is set, then the folio is not freeable */ |
451 | if (folio_test_private(folio)) { |
452 | if ((current_gfp_context(flags: gfp) & GFP_KERNEL) != GFP_KERNEL || |
453 | current_is_kswapd()) |
454 | return false; |
455 | if (nfs_wb_folio(inode: folio_file_mapping(folio)->host, folio) < 0) |
456 | return false; |
457 | } |
458 | return nfs_fscache_release_folio(folio, gfp); |
459 | } |
460 | |
461 | static void nfs_check_dirty_writeback(struct folio *folio, |
462 | bool *dirty, bool *writeback) |
463 | { |
464 | struct nfs_inode *nfsi; |
465 | struct address_space *mapping = folio->mapping; |
466 | |
467 | /* |
468 | * Check if an unstable folio is currently being committed and |
469 | * if so, have the VM treat it as if the folio is under writeback |
470 | * so it will not block due to folios that will shortly be freeable. |
471 | */ |
472 | nfsi = NFS_I(inode: mapping->host); |
473 | if (atomic_read(v: &nfsi->commit_info.rpcs_out)) { |
474 | *writeback = true; |
475 | return; |
476 | } |
477 | |
478 | /* |
479 | * If the private flag is set, then the folio is not freeable |
480 | * and as the inode is not being committed, it's not going to |
481 | * be cleaned in the near future so treat it as dirty |
482 | */ |
483 | if (folio_test_private(folio)) |
484 | *dirty = true; |
485 | } |
486 | |
487 | /* |
488 | * Attempt to clear the private state associated with a page when an error |
489 | * occurs that requires the cached contents of an inode to be written back or |
490 | * destroyed |
491 | * - Called if either PG_private or fscache is set on the page |
492 | * - Caller holds page lock |
493 | * - Return 0 if successful, -error otherwise |
494 | */ |
495 | static int nfs_launder_folio(struct folio *folio) |
496 | { |
497 | struct inode *inode = folio->mapping->host; |
498 | int ret; |
499 | |
500 | dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n" , |
501 | inode->i_ino, folio_pos(folio)); |
502 | |
503 | folio_wait_fscache(folio); |
504 | ret = nfs_wb_folio(inode, folio); |
505 | trace_nfs_launder_folio_done(inode, folio, ret); |
506 | return ret; |
507 | } |
508 | |
509 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
510 | sector_t *span) |
511 | { |
512 | unsigned long blocks; |
513 | long long isize; |
514 | int ret; |
515 | struct inode *inode = file_inode(f: file); |
516 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
517 | struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
518 | |
519 | spin_lock(lock: &inode->i_lock); |
520 | blocks = inode->i_blocks; |
521 | isize = inode->i_size; |
522 | spin_unlock(lock: &inode->i_lock); |
523 | if (blocks*512 < isize) { |
524 | pr_warn("swap activate: swapfile has holes\n" ); |
525 | return -EINVAL; |
526 | } |
527 | |
528 | ret = rpc_clnt_swap_activate(clnt); |
529 | if (ret) |
530 | return ret; |
531 | ret = add_swap_extent(sis, start_page: 0, nr_pages: sis->max, start_block: 0); |
532 | if (ret < 0) { |
533 | rpc_clnt_swap_deactivate(clnt); |
534 | return ret; |
535 | } |
536 | |
537 | *span = sis->pages; |
538 | |
539 | if (cl->rpc_ops->enable_swap) |
540 | cl->rpc_ops->enable_swap(inode); |
541 | |
542 | sis->flags |= SWP_FS_OPS; |
543 | return ret; |
544 | } |
545 | |
546 | static void nfs_swap_deactivate(struct file *file) |
547 | { |
548 | struct inode *inode = file_inode(f: file); |
549 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
550 | struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; |
551 | |
552 | rpc_clnt_swap_deactivate(clnt); |
553 | if (cl->rpc_ops->disable_swap) |
554 | cl->rpc_ops->disable_swap(file_inode(f: file)); |
555 | } |
556 | |
557 | const struct address_space_operations nfs_file_aops = { |
558 | .read_folio = nfs_read_folio, |
559 | .readahead = nfs_readahead, |
560 | .dirty_folio = filemap_dirty_folio, |
561 | .writepage = nfs_writepage, |
562 | .writepages = nfs_writepages, |
563 | .write_begin = nfs_write_begin, |
564 | .write_end = nfs_write_end, |
565 | .invalidate_folio = nfs_invalidate_folio, |
566 | .release_folio = nfs_release_folio, |
567 | .migrate_folio = nfs_migrate_folio, |
568 | .launder_folio = nfs_launder_folio, |
569 | .is_dirty_writeback = nfs_check_dirty_writeback, |
570 | .error_remove_page = generic_error_remove_page, |
571 | .swap_activate = nfs_swap_activate, |
572 | .swap_deactivate = nfs_swap_deactivate, |
573 | .swap_rw = nfs_swap_rw, |
574 | }; |
575 | |
576 | /* |
577 | * Notification that a PTE pointing to an NFS page is about to be made |
578 | * writable, implying that someone is about to modify the page through a |
579 | * shared-writable mapping |
580 | */ |
581 | static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) |
582 | { |
583 | struct file *filp = vmf->vma->vm_file; |
584 | struct inode *inode = file_inode(f: filp); |
585 | unsigned pagelen; |
586 | vm_fault_t ret = VM_FAULT_NOPAGE; |
587 | struct address_space *mapping; |
588 | struct folio *folio = page_folio(vmf->page); |
589 | |
590 | dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n" , |
591 | filp, filp->f_mapping->host->i_ino, |
592 | (long long)folio_file_pos(folio)); |
593 | |
594 | sb_start_pagefault(sb: inode->i_sb); |
595 | |
596 | /* make sure the cache has finished storing the page */ |
597 | if (folio_test_fscache(folio) && |
598 | folio_wait_fscache_killable(folio) < 0) { |
599 | ret = VM_FAULT_RETRY; |
600 | goto out; |
601 | } |
602 | |
603 | wait_on_bit_action(word: &NFS_I(inode)->flags, NFS_INO_INVALIDATING, |
604 | action: nfs_wait_bit_killable, |
605 | TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); |
606 | |
607 | folio_lock(folio); |
608 | mapping = folio_file_mapping(folio); |
609 | if (mapping != inode->i_mapping) |
610 | goto out_unlock; |
611 | |
612 | folio_wait_writeback(folio); |
613 | |
614 | pagelen = nfs_folio_length(folio); |
615 | if (pagelen == 0) |
616 | goto out_unlock; |
617 | |
618 | ret = VM_FAULT_LOCKED; |
619 | if (nfs_flush_incompatible(file: filp, folio) == 0 && |
620 | nfs_update_folio(file: filp, folio, offset: 0, count: pagelen) == 0) |
621 | goto out; |
622 | |
623 | ret = VM_FAULT_SIGBUS; |
624 | out_unlock: |
625 | folio_unlock(folio); |
626 | out: |
627 | sb_end_pagefault(sb: inode->i_sb); |
628 | return ret; |
629 | } |
630 | |
631 | static const struct vm_operations_struct nfs_file_vm_ops = { |
632 | .fault = filemap_fault, |
633 | .map_pages = filemap_map_pages, |
634 | .page_mkwrite = nfs_vm_page_mkwrite, |
635 | }; |
636 | |
637 | ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) |
638 | { |
639 | struct file *file = iocb->ki_filp; |
640 | struct inode *inode = file_inode(f: file); |
641 | unsigned int mntflags = NFS_SERVER(inode)->flags; |
642 | ssize_t result, written; |
643 | errseq_t since; |
644 | int error; |
645 | |
646 | result = nfs_key_timeout_notify(filp: file, inode); |
647 | if (result) |
648 | return result; |
649 | |
650 | if (iocb->ki_flags & IOCB_DIRECT) |
651 | return nfs_file_direct_write(iocb, iter: from, swap: false); |
652 | |
653 | dprintk("NFS: write(%pD2, %zu@%Ld)\n" , |
654 | file, iov_iter_count(from), (long long) iocb->ki_pos); |
655 | |
656 | if (IS_SWAPFILE(inode)) |
657 | goto out_swapfile; |
658 | /* |
659 | * O_APPEND implies that we must revalidate the file length. |
660 | */ |
661 | if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { |
662 | result = nfs_revalidate_file_size(inode, filp: file); |
663 | if (result) |
664 | return result; |
665 | } |
666 | |
667 | nfs_clear_invalid_mapping(mapping: file->f_mapping); |
668 | |
669 | since = filemap_sample_wb_err(mapping: file->f_mapping); |
670 | nfs_start_io_write(inode); |
671 | result = generic_write_checks(iocb, from); |
672 | if (result > 0) |
673 | result = generic_perform_write(iocb, from); |
674 | nfs_end_io_write(inode); |
675 | if (result <= 0) |
676 | goto out; |
677 | |
678 | written = result; |
679 | nfs_add_stats(inode, stat: NFSIOS_NORMALWRITTENBYTES, addend: written); |
680 | |
681 | if (mntflags & NFS_MOUNT_WRITE_EAGER) { |
682 | result = filemap_fdatawrite_range(mapping: file->f_mapping, |
683 | start: iocb->ki_pos - written, |
684 | end: iocb->ki_pos - 1); |
685 | if (result < 0) |
686 | goto out; |
687 | } |
688 | if (mntflags & NFS_MOUNT_WRITE_WAIT) { |
689 | filemap_fdatawait_range(file->f_mapping, |
690 | lstart: iocb->ki_pos - written, |
691 | lend: iocb->ki_pos - 1); |
692 | } |
693 | result = generic_write_sync(iocb, count: written); |
694 | if (result < 0) |
695 | return result; |
696 | |
697 | out: |
698 | /* Return error values */ |
699 | error = filemap_check_wb_err(mapping: file->f_mapping, since); |
700 | switch (error) { |
701 | default: |
702 | break; |
703 | case -EDQUOT: |
704 | case -EFBIG: |
705 | case -ENOSPC: |
706 | nfs_wb_all(inode); |
707 | error = file_check_and_advance_wb_err(file); |
708 | if (error < 0) |
709 | result = error; |
710 | } |
711 | return result; |
712 | |
713 | out_swapfile: |
714 | printk(KERN_INFO "NFS: attempt to write to active swap file!\n" ); |
715 | return -ETXTBSY; |
716 | } |
717 | EXPORT_SYMBOL_GPL(nfs_file_write); |
718 | |
719 | static int |
720 | do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) |
721 | { |
722 | struct inode *inode = filp->f_mapping->host; |
723 | int status = 0; |
724 | unsigned int saved_type = fl->fl_type; |
725 | |
726 | /* Try local locking first */ |
727 | posix_test_lock(filp, fl); |
728 | if (fl->fl_type != F_UNLCK) { |
729 | /* found a conflict */ |
730 | goto out; |
731 | } |
732 | fl->fl_type = saved_type; |
733 | |
734 | if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) |
735 | goto out_noconflict; |
736 | |
737 | if (is_local) |
738 | goto out_noconflict; |
739 | |
740 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
741 | out: |
742 | return status; |
743 | out_noconflict: |
744 | fl->fl_type = F_UNLCK; |
745 | goto out; |
746 | } |
747 | |
748 | static int |
749 | do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) |
750 | { |
751 | struct inode *inode = filp->f_mapping->host; |
752 | struct nfs_lock_context *l_ctx; |
753 | int status; |
754 | |
755 | /* |
756 | * Flush all pending writes before doing anything |
757 | * with locks.. |
758 | */ |
759 | nfs_wb_all(inode); |
760 | |
761 | l_ctx = nfs_get_lock_context(ctx: nfs_file_open_context(filp)); |
762 | if (!IS_ERR(ptr: l_ctx)) { |
763 | status = nfs_iocounter_wait(l_ctx); |
764 | nfs_put_lock_context(l_ctx); |
765 | /* NOTE: special case |
766 | * If we're signalled while cleaning up locks on process exit, we |
767 | * still need to complete the unlock. |
768 | */ |
769 | if (status < 0 && !(fl->fl_flags & FL_CLOSE)) |
770 | return status; |
771 | } |
772 | |
773 | /* |
774 | * Use local locking if mounted with "-onolock" or with appropriate |
775 | * "-olocal_lock=" |
776 | */ |
777 | if (!is_local) |
778 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
779 | else |
780 | status = locks_lock_file_wait(filp, fl); |
781 | return status; |
782 | } |
783 | |
784 | static int |
785 | do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) |
786 | { |
787 | struct inode *inode = filp->f_mapping->host; |
788 | int status; |
789 | |
790 | /* |
791 | * Flush all pending writes before doing anything |
792 | * with locks.. |
793 | */ |
794 | status = nfs_sync_mapping(mapping: filp->f_mapping); |
795 | if (status != 0) |
796 | goto out; |
797 | |
798 | /* |
799 | * Use local locking if mounted with "-onolock" or with appropriate |
800 | * "-olocal_lock=" |
801 | */ |
802 | if (!is_local) |
803 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
804 | else |
805 | status = locks_lock_file_wait(filp, fl); |
806 | if (status < 0) |
807 | goto out; |
808 | |
809 | /* |
810 | * Invalidate cache to prevent missing any changes. If |
811 | * the file is mapped, clear the page cache as well so |
812 | * those mappings will be loaded. |
813 | * |
814 | * This makes locking act as a cache coherency point. |
815 | */ |
816 | nfs_sync_mapping(mapping: filp->f_mapping); |
817 | if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { |
818 | nfs_zap_caches(inode); |
819 | if (mapping_mapped(mapping: filp->f_mapping)) |
820 | nfs_revalidate_mapping(inode, mapping: filp->f_mapping); |
821 | } |
822 | out: |
823 | return status; |
824 | } |
825 | |
826 | /* |
827 | * Lock a (portion of) a file |
828 | */ |
829 | int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) |
830 | { |
831 | struct inode *inode = filp->f_mapping->host; |
832 | int ret = -ENOLCK; |
833 | int is_local = 0; |
834 | |
835 | dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n" , |
836 | filp, fl->fl_type, fl->fl_flags, |
837 | (long long)fl->fl_start, (long long)fl->fl_end); |
838 | |
839 | nfs_inc_stats(inode, stat: NFSIOS_VFSLOCK); |
840 | |
841 | if (fl->fl_flags & FL_RECLAIM) |
842 | return -ENOGRACE; |
843 | |
844 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) |
845 | is_local = 1; |
846 | |
847 | if (NFS_PROTO(inode)->lock_check_bounds != NULL) { |
848 | ret = NFS_PROTO(inode)->lock_check_bounds(fl); |
849 | if (ret < 0) |
850 | goto out_err; |
851 | } |
852 | |
853 | if (IS_GETLK(cmd)) |
854 | ret = do_getlk(filp, cmd, fl, is_local); |
855 | else if (fl->fl_type == F_UNLCK) |
856 | ret = do_unlk(filp, cmd, fl, is_local); |
857 | else |
858 | ret = do_setlk(filp, cmd, fl, is_local); |
859 | out_err: |
860 | return ret; |
861 | } |
862 | EXPORT_SYMBOL_GPL(nfs_lock); |
863 | |
864 | /* |
865 | * Lock a (portion of) a file |
866 | */ |
867 | int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) |
868 | { |
869 | struct inode *inode = filp->f_mapping->host; |
870 | int is_local = 0; |
871 | |
872 | dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n" , |
873 | filp, fl->fl_type, fl->fl_flags); |
874 | |
875 | if (!(fl->fl_flags & FL_FLOCK)) |
876 | return -ENOLCK; |
877 | |
878 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) |
879 | is_local = 1; |
880 | |
881 | /* We're simulating flock() locks using posix locks on the server */ |
882 | if (fl->fl_type == F_UNLCK) |
883 | return do_unlk(filp, cmd, fl, is_local); |
884 | return do_setlk(filp, cmd, fl, is_local); |
885 | } |
886 | EXPORT_SYMBOL_GPL(nfs_flock); |
887 | |
888 | const struct file_operations nfs_file_operations = { |
889 | .llseek = nfs_file_llseek, |
890 | .read_iter = nfs_file_read, |
891 | .write_iter = nfs_file_write, |
892 | .mmap = nfs_file_mmap, |
893 | .open = nfs_file_open, |
894 | .flush = nfs_file_flush, |
895 | .release = nfs_file_release, |
896 | .fsync = nfs_file_fsync, |
897 | .lock = nfs_lock, |
898 | .flock = nfs_flock, |
899 | .splice_read = nfs_file_splice_read, |
900 | .splice_write = iter_file_splice_write, |
901 | .check_flags = nfs_check_flags, |
902 | .setlease = simple_nosetlease, |
903 | }; |
904 | EXPORT_SYMBOL_GPL(nfs_file_operations); |
905 | |