write.c source code [linux/fs/nfs/write.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* linux/fs/nfs/write.c
4	*
5	* Write file data over NFS.
6	*
7	* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
8	*/
9
10	#include <linux/types.h>
11	#include <linux/slab.h>
12	#include <linux/mm.h>
13	#include <linux/pagemap.h>
14	#include <linux/file.h>
15	#include <linux/writeback.h>
16	#include <linux/swap.h>
17	#include <linux/migrate.h>
18
19	#include <linux/sunrpc/clnt.h>
20	#include <linux/nfs_fs.h>
21	#include <linux/nfs_mount.h>
22	#include <linux/nfs_page.h>
23	#include <linux/backing-dev.h>
24	#include <linux/export.h>
25	#include <linux/freezer.h>
26	#include <linux/wait.h>
27	#include <linux/iversion.h>
28	#include <linux/filelock.h>
29
30	#include <linux/uaccess.h>
31	#include <linux/sched/mm.h>
32
33	#include "delegation.h"
34	#include "internal.h"
35	#include "iostat.h"
36	#include "nfs4_fs.h"
37	#include "fscache.h"
38	#include "pnfs.h"
39
40	#include "nfstrace.h"
41
42	#define NFSDBG_FACILITY NFSDBG_PAGECACHE
43
44	#define MIN_POOL_WRITE (32)
45	#define MIN_POOL_COMMIT (4)
46
47	struct nfs_io_completion {
48	void (complete)(void* *data);
49	void *data;
50	struct kref refcount;
51	};
52
53	/*
54	* Local function declarations
55	*/
56	static void nfs_redirty_request(struct nfs_page *req);
57	static const struct rpc_call_ops nfs_commit_ops;
58	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
59	static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
60	static const struct nfs_rw_ops nfs_rw_write_ops;
61	static void nfs_inode_remove_request(struct nfs_page *req);
62	static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
63	struct nfs_page *req);
64	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
65	struct inode *inode);
66	static struct nfs_page *
67	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
68	struct folio *folio);
69
70	static struct kmem_cache *nfs_wdata_cachep;
71	static mempool_t *nfs_wdata_mempool;
72	static struct kmem_cache *nfs_cdata_cachep;
73	static mempool_t *nfs_commit_mempool;
74
75	struct nfs_commit_data nfs_commitdata_alloc(void*)
76	{
77	struct nfs_commit_data *p;
78
79	p = kmem_cache_zalloc(k: nfs_cdata_cachep, flags: nfs_io_gfp_mask());
80	if (!p) {
81	p = mempool_alloc(pool: nfs_commit_mempool, GFP_NOWAIT);
82	if (!p)
83	return NULL;
84	memset(p, `0`, sizeof(*p));
85	}
86	INIT_LIST_HEAD(list: &p->pages);
87	return p;
88	}
89	EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
90
91	void nfs_commit_free(struct nfs_commit_data *p)
92	{
93	mempool_free(element: p, pool: nfs_commit_mempool);
94	}
95	EXPORT_SYMBOL_GPL(nfs_commit_free);
96
97	static struct nfs_pgio_header nfs_writehdr_alloc(void*)
98	{
99	struct nfs_pgio_header *p;
100
101	p = kmem_cache_zalloc(k: nfs_wdata_cachep, flags: nfs_io_gfp_mask());
102	if (!p) {
103	p = mempool_alloc(pool: nfs_wdata_mempool, GFP_NOWAIT);
104	if (!p)
105	return NULL;
106	memset(p, `0`, sizeof(*p));
107	}
108	p->rw_mode = FMODE_WRITE;
109	return p;
110	}
111
112	static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
113	{
114	mempool_free(element: hdr, pool: nfs_wdata_mempool);
115	}
116
117	static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
118	{
119	return kmalloc(size: sizeof(struct nfs_io_completion), flags: gfp_flags);
120	}
121
122	static void nfs_io_completion_init(struct nfs_io_completion *ioc,
123	void (complete)(void* ), void* *data)
124	{
125	ioc->complete = complete;
126	ioc->data = data;
127	kref_init(kref: &ioc->refcount);
128	}
129
130	static void nfs_io_completion_release(struct kref *kref)
131	{
132	struct nfs_io_completion *ioc = container_of(kref,
133	struct nfs_io_completion, refcount);
134	ioc->complete(ioc->data);
135	kfree(objp: ioc);
136	}
137
138	static void nfs_io_completion_get(struct nfs_io_completion *ioc)
139	{
140	if (ioc != NULL)
141	kref_get(kref: &ioc->refcount);
142	}
143
144	static void nfs_io_completion_put(struct nfs_io_completion *ioc)
145	{
146	if (ioc != NULL)
147	kref_put(kref: &ioc->refcount, release: nfs_io_completion_release);
148	}
149
150	static void
151	nfs_page_set_inode_ref(struct nfs_page req, struct* inode *inode)
152	{
153	if (!test_and_set_bit(nr: PG_INODE_REF, addr: &req->wb_flags)) {
154	kref_get(kref: &req->wb_kref);
155	atomic_long_inc(v: &NFS_I(inode)->nrequests);
156	}
157	}
158
159	static int
160	nfs_cancel_remove_inode(struct nfs_page req, struct* inode *inode)
161	{
162	int ret;
163
164	if (!test_bit(PG_REMOVE, &req->wb_flags))
165	return `0`;
166	ret = nfs_page_group_lock(req);
167	if (ret)
168	return ret;
169	if (test_and_clear_bit(nr: PG_REMOVE, addr: &req->wb_flags))
170	nfs_page_set_inode_ref(req, inode);
171	nfs_page_group_unlock(req);
172	return `0`;
173	}
174
175	static struct nfs_page nfs_folio_private_request(struct* folio *folio)
176	{
177	return folio_get_private(folio);
178	}
179
180	/**
181	* nfs_folio_find_private_request - find head request associated with a folio
182	* @folio: pointer to folio
183	*
184	* must be called while holding the inode lock.
185	*
186	* returns matching head request with reference held, or NULL if not found.
187	*/
188	static struct nfs_page nfs_folio_find_private_request(struct* folio *folio)
189	{
190	struct address_space *mapping = folio_file_mapping(folio);
191	struct nfs_page *req;
192
193	if (!folio_test_private(folio))
194	return NULL;
195	spin_lock(lock: &mapping->private_lock);
196	req = nfs_folio_private_request(folio);
197	if (req) {
198	WARN_ON_ONCE(req->wb_head != req);
199	kref_get(kref: &req->wb_kref);
200	}
201	spin_unlock(lock: &mapping->private_lock);
202	return req;
203	}
204
205	static struct nfs_page nfs_folio_find_swap_request(struct* folio *folio)
206	{
207	struct inode *inode = folio_file_mapping(folio)->host;
208	struct nfs_inode *nfsi = NFS_I(inode);
209	struct nfs_page *req = NULL;
210	if (!folio_test_swapcache(folio))
211	return NULL;
212	mutex_lock(&nfsi->commit_mutex);
213	if (folio_test_swapcache(folio)) {
214	req = nfs_page_search_commits_for_head_request_locked(nfsi,
215	folio);
216	if (req) {
217	WARN_ON_ONCE(req->wb_head != req);
218	kref_get(kref: &req->wb_kref);
219	}
220	}
221	mutex_unlock(lock: &nfsi->commit_mutex);
222	return req;
223	}
224
225	/**
226	* nfs_folio_find_head_request - find head request associated with a folio
227	* @folio: pointer to folio
228	*
229	* returns matching head request with reference held, or NULL if not found.
230	*/
231	static struct nfs_page nfs_folio_find_head_request(struct* folio *folio)
232	{
233	struct nfs_page *req;
234
235	req = nfs_folio_find_private_request(folio);
236	if (!req)
237	req = nfs_folio_find_swap_request(folio);
238	return req;
239	}
240
241	static struct nfs_page nfs_folio_find_and_lock_request(struct* folio *folio)
242	{
243	struct inode *inode = folio_file_mapping(folio)->host;
244	struct nfs_page req, head;
245	int ret;
246
247	for (;;) {
248	req = nfs_folio_find_head_request(folio);
249	if (!req)
250	return req;
251	head = nfs_page_group_lock_head(req);
252	if (head != req)
253	nfs_release_request(req);
254	if (IS_ERR(ptr: head))
255	return head;
256	ret = nfs_cancel_remove_inode(req: head, inode);
257	if (ret < `0`) {
258	nfs_unlock_and_release_request(head);
259	return ERR_PTR(error: ret);
260	}
261	/ Ensure that nobody removed the request before we locked it /
262	if (head == nfs_folio_private_request(folio))
263	break;
264	if (folio_test_swapcache(folio))
265	break;
266	nfs_unlock_and_release_request(head);
267	}
268	return head;
269	}
270
271	/ Adjust the file length if we're writing beyond the end /
272	static void nfs_grow_file(struct folio folio, unsigned* int offset,
273	unsigned int count)
274	{
275	struct inode *inode = folio_file_mapping(folio)->host;
276	loff_t end, i_size;
277	pgoff_t end_index;
278
279	spin_lock(lock: &inode->i_lock);
280	i_size = i_size_read(inode);
281	end_index = ((i_size - `1`) >> folio_shift(folio)) << folio_order(folio);
282	if (i_size > `0` && folio_index(folio) < end_index)
283	goto out;
284	end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
285	if (i_size >= end)
286	goto out;
287	trace_nfs_size_grow(inode, new_size: end);
288	i_size_write(inode, i_size: end);
289	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
290	nfs_inc_stats(inode, stat: NFSIOS_EXTENDWRITE);
291	out:
292	spin_unlock(lock: &inode->i_lock);
293	nfs_fscache_invalidate(inode, flags: `0`);
294	}
295
296	/ A writeback failed: mark the page as bad, and invalidate the page cache /
297	static void nfs_set_pageerror(struct address_space *mapping)
298	{
299	struct inode *inode = mapping->host;
300
301	nfs_zap_mapping(inode: mapping->host, mapping);
302	/ Force file size revalidation /
303	spin_lock(lock: &inode->i_lock);
304	nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED \|
305	NFS_INO_INVALID_CHANGE \|
306	NFS_INO_INVALID_SIZE);
307	spin_unlock(lock: &inode->i_lock);
308	}
309
310	static void nfs_mapping_set_error(struct folio folio, int* error)
311	{
312	struct address_space *mapping = folio_file_mapping(folio);
313
314	folio_set_error(folio);
315	filemap_set_wb_err(mapping, err: error);
316	if (mapping->host)
317	errseq_set(eseq: &mapping->host->i_sb->s_wb_err,
318	err: error == -ENOSPC ? -ENOSPC : -EIO);
319	nfs_set_pageerror(mapping);
320	}
321
322	/*
323	* nfs_page_group_search_locked
324	* @head - head request of page group
325	* @page_offset - offset into page
326	*
327	* Search page group with head @head to find a request that contains the
328	* page offset @page_offset.
329	*
330	* Returns a pointer to the first matching nfs request, or NULL if no
331	* match is found.
332	*
333	* Must be called with the page group lock held
334	*/
335	static struct nfs_page *
336	nfs_page_group_search_locked(struct nfs_page head, unsigned* int page_offset)
337	{
338	struct nfs_page *req;
339
340	req = head;
341	do {
342	if (page_offset >= req->wb_pgbase &&
343	page_offset < (req->wb_pgbase + req->wb_bytes))
344	return req;
345
346	req = req->wb_this_page;
347	} while (req != head);
348
349	return NULL;
350	}
351
352	/*
353	* nfs_page_group_covers_page
354	* @head - head request of page group
355	*
356	* Return true if the page group with head @head covers the whole page,
357	* returns false otherwise
358	*/
359	static bool nfs_page_group_covers_page(struct nfs_page *req)
360	{
361	unsigned int len = nfs_folio_length(folio: nfs_page_to_folio(req));
362	struct nfs_page *tmp;
363	unsigned int pos = `0`;
364
365	nfs_page_group_lock(req);
366
367	for (;;) {
368	tmp = nfs_page_group_search_locked(head: req->wb_head, page_offset: pos);
369	if (!tmp)
370	break;
371	pos = tmp->wb_pgbase + tmp->wb_bytes;
372	}
373
374	nfs_page_group_unlock(req);
375	return pos >= len;
376	}
377
378	/ We can set the PG_uptodate flag if we see that a write request*
379	* covers the full page.
380	*/
381	static void nfs_mark_uptodate(struct nfs_page *req)
382	{
383	struct folio *folio = nfs_page_to_folio(req);
384
385	if (folio_test_uptodate(folio))
386	return;
387	if (!nfs_page_group_covers_page(req))
388	return;
389	folio_mark_uptodate(folio);
390	}
391
392	static int wb_priority(struct writeback_control *wbc)
393	{
394	int ret = `0`;
395
396	if (wbc->sync_mode == WB_SYNC_ALL)
397	ret = FLUSH_COND_STABLE;
398	return ret;
399	}
400
401	/*
402	* NFS congestion control
403	*/
404
405	int nfs_congestion_kb;
406
407	#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
408	#define NFS_CONGESTION_OFF_THRESH \
409	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
410
411	static void nfs_folio_set_writeback(struct folio *folio)
412	{
413	struct nfs_server *nfss = NFS_SERVER(inode: folio_file_mapping(folio)->host);
414
415	folio_start_writeback(folio);
416	if (atomic_long_inc_return(v: &nfss->writeback) > NFS_CONGESTION_ON_THRESH)
417	nfss->write_congested = `1`;
418	}
419
420	static void nfs_folio_end_writeback(struct folio *folio)
421	{
422	struct nfs_server *nfss = NFS_SERVER(inode: folio_file_mapping(folio)->host);
423
424	folio_end_writeback(folio);
425	if (atomic_long_dec_return(v: &nfss->writeback) <
426	NFS_CONGESTION_OFF_THRESH)
427	nfss->write_congested = `0`;
428	}
429
430	static void nfs_page_end_writeback(struct nfs_page *req)
431	{
432	if (nfs_page_group_sync_on_bit(req, PG_WB_END)) {
433	nfs_unlock_request(req);
434	nfs_folio_end_writeback(folio: nfs_page_to_folio(req));
435	} else
436	nfs_unlock_request(req);
437	}
438
439	/*
440	* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
441	*
442	* @destroy_list - request list (using wb_this_page) terminated by @old_head
443	* @old_head - the old head of the list
444	*
445	* All subrequests must be locked and removed from all lists, so at this point
446	* they are only "active" in this function, and possibly in nfs_wait_on_request
447	* with a reference held by some other context.
448	*/
449	static void
450	nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
451	struct nfs_page *old_head,
452	struct inode *inode)
453	{
454	while (destroy_list) {
455	struct nfs_page *subreq = destroy_list;
456
457	destroy_list = (subreq->wb_this_page == old_head) ?
458	NULL : subreq->wb_this_page;
459
460	/ Note: lock subreq in order to change subreq->wb_head /
461	nfs_page_set_headlock(req: subreq);
462	WARN_ON_ONCE(old_head != subreq->wb_head);
463
464	/ make sure old group is not used /
465	subreq->wb_this_page = subreq;
466	subreq->wb_head = subreq;
467
468	clear_bit(nr: PG_REMOVE, addr: &subreq->wb_flags);
469
470	/ Note: races with nfs_page_group_destroy() /
471	if (!kref_read(kref: &subreq->wb_kref)) {
472	/ Check if we raced with nfs_page_group_destroy() /
473	if (test_and_clear_bit(nr: PG_TEARDOWN, addr: &subreq->wb_flags)) {
474	nfs_page_clear_headlock(req: subreq);
475	nfs_free_request(req: subreq);
476	} else
477	nfs_page_clear_headlock(req: subreq);
478	continue;
479	}
480	nfs_page_clear_headlock(req: subreq);
481
482	nfs_release_request(old_head);
483
484	if (test_and_clear_bit(nr: PG_INODE_REF, addr: &subreq->wb_flags)) {
485	nfs_release_request(subreq);
486	atomic_long_dec(v: &NFS_I(inode)->nrequests);
487	}
488
489	/ subreq is now totally disconnected from page group or any*
490	* write / commit lists. last chance to wake any waiters */
491	nfs_unlock_and_release_request(subreq);
492	}
493	}
494
495	/*
496	* nfs_join_page_group - destroy subrequests of the head req
497	* @head: the page used to lookup the "page group" of nfs_page structures
498	* @inode: Inode to which the request belongs.
499	*
500	* This function joins all sub requests to the head request by first
501	* locking all requests in the group, cancelling any pending operations
502	* and finally updating the head request to cover the whole range covered by
503	* the (former) group. All subrequests are removed from any write or commit
504	* lists, unlinked from the group and destroyed.
505	*/
506	void nfs_join_page_group(struct nfs_page head, struct* nfs_commit_info *cinfo,
507	struct inode *inode)
508	{
509	struct nfs_page *subreq;
510	struct nfs_page *destroy_list = NULL;
511	unsigned int pgbase, off, bytes;
512
513	pgbase = head->wb_pgbase;
514	bytes = head->wb_bytes;
515	off = head->wb_offset;
516	for (subreq = head->wb_this_page; subreq != head;
517	subreq = subreq->wb_this_page) {
518	/ Subrequests should always form a contiguous range /
519	if (pgbase > subreq->wb_pgbase) {
520	off -= pgbase - subreq->wb_pgbase;
521	bytes += pgbase - subreq->wb_pgbase;
522	pgbase = subreq->wb_pgbase;
523	}
524	bytes = max(subreq->wb_pgbase + subreq->wb_bytes
525	- pgbase, bytes);
526	}
527
528	/ Set the head request's range to cover the former page group /
529	head->wb_pgbase = pgbase;
530	head->wb_bytes = bytes;
531	head->wb_offset = off;
532
533	/ Now that all requests are locked, make sure they aren't on any list.*
534	* Commit list removal accounting is done after locks are dropped */
535	subreq = head;
536	do {
537	nfs_clear_request_commit(cinfo, req: subreq);
538	subreq = subreq->wb_this_page;
539	} while (subreq != head);
540
541	/ unlink subrequests from head, destroy them later /
542	if (head->wb_this_page != head) {
543	/ destroy list will be terminated by head /
544	destroy_list = head->wb_this_page;
545	head->wb_this_page = head;
546	}
547
548	nfs_destroy_unlinked_subrequests(destroy_list, old_head: head, inode);
549	}
550
551	/*
552	* nfs_lock_and_join_requests - join all subreqs to the head req
553	* @folio: the folio used to lookup the "page group" of nfs_page structures
554	*
555	* This function joins all sub requests to the head request by first
556	* locking all requests in the group, cancelling any pending operations
557	* and finally updating the head request to cover the whole range covered by
558	* the (former) group. All subrequests are removed from any write or commit
559	* lists, unlinked from the group and destroyed.
560	*
561	* Returns a locked, referenced pointer to the head request - which after
562	* this call is guaranteed to be the only request associated with the page.
563	* Returns NULL if no requests are found for @folio, or a ERR_PTR if an
564	* error was encountered.
565	*/
566	static struct nfs_page nfs_lock_and_join_requests(struct* folio *folio)
567	{
568	struct inode *inode = folio_file_mapping(folio)->host;
569	struct nfs_page *head;
570	struct nfs_commit_info cinfo;
571	int ret;
572
573	nfs_init_cinfo_from_inode(cinfo: &cinfo, inode);
574	/*
575	* A reference is taken only on the head request which acts as a
576	* reference to the whole page group - the group will not be destroyed
577	* until the head reference is released.
578	*/
579	head = nfs_folio_find_and_lock_request(folio);
580	if (IS_ERR_OR_NULL(ptr: head))
581	return head;
582
583	/ lock each request in the page group /
584	ret = nfs_page_group_lock_subrequests(head);
585	if (ret < `0`) {
586	nfs_unlock_and_release_request(head);
587	return ERR_PTR(error: ret);
588	}
589
590	nfs_join_page_group(head, cinfo: &cinfo, inode);
591
592	return head;
593	}
594
595	static void nfs_write_error(struct nfs_page req, int* error)
596	{
597	trace_nfs_write_error(inode: nfs_page_to_inode(req), req, error);
598	nfs_mapping_set_error(folio: nfs_page_to_folio(req), error);
599	nfs_inode_remove_request(req);
600	nfs_page_end_writeback(req);
601	nfs_release_request(req);
602	}
603
604	/*
605	* Find an associated nfs write request, and prepare to flush it out
606	* May return an error if the user signalled nfs_wait_on_request().
607	*/
608	static int nfs_page_async_flush(struct folio *folio,
609	struct writeback_control *wbc,
610	struct nfs_pageio_descriptor *pgio)
611	{
612	struct nfs_page *req;
613	int ret = `0`;
614
615	req = nfs_lock_and_join_requests(folio);
616	if (!req)
617	goto out;
618	ret = PTR_ERR(ptr: req);
619	if (IS_ERR(ptr: req))
620	goto out;
621
622	nfs_folio_set_writeback(folio);
623	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
624
625	/ If there is a fatal error that covers this write, just exit /
626	ret = pgio->pg_error;
627	if (nfs_error_is_fatal_on_server(err: ret))
628	goto out_launder;
629
630	ret = `0`;
631	if (!nfs_pageio_add_request(pgio, req)) {
632	ret = pgio->pg_error;
633	/*
634	* Remove the problematic req upon fatal errors on the server
635	*/
636	if (nfs_error_is_fatal_on_server(err: ret))
637	goto out_launder;
638	if (wbc->sync_mode == WB_SYNC_NONE)
639	ret = AOP_WRITEPAGE_ACTIVATE;
640	folio_redirty_for_writepage(wbc, folio);
641	nfs_redirty_request(req);
642	pgio->pg_error = `0`;
643	} else
644	nfs_add_stats(inode: folio_file_mapping(folio)->host,
645	stat: NFSIOS_WRITEPAGES, addend: `1`);
646	out:
647	return ret;
648	out_launder:
649	nfs_write_error(req, error: ret);
650	return `0`;
651	}
652
653	static int nfs_do_writepage(struct folio folio, struct* writeback_control *wbc,
654	struct nfs_pageio_descriptor *pgio)
655	{
656	nfs_pageio_cond_complete(pgio, folio_index(folio));
657	return nfs_page_async_flush(folio, wbc, pgio);
658	}
659
660	/*
661	* Write an mmapped page to the server.
662	*/
663	static int nfs_writepage_locked(struct folio *folio,
664	struct writeback_control *wbc)
665	{
666	struct nfs_pageio_descriptor pgio;
667	struct inode *inode = folio_file_mapping(folio)->host;
668	int err;
669
670	if (wbc->sync_mode == WB_SYNC_NONE &&
671	NFS_SERVER(inode)->write_congested)
672	return AOP_WRITEPAGE_ACTIVATE;
673
674	nfs_inc_stats(inode, stat: NFSIOS_VFSWRITEPAGE);
675	nfs_pageio_init_write(pgio: &pgio, inode, ioflags: `0`, force_mds: false,
676	compl_ops: &nfs_async_write_completion_ops);
677	err = nfs_do_writepage(folio, wbc, pgio: &pgio);
678	pgio.pg_error = `0`;
679	nfs_pageio_complete(desc: &pgio);
680	return err;
681	}
682
683	int nfs_writepage(struct page page, struct* writeback_control *wbc)
684	{
685	struct folio *folio = page_folio(page);
686	int ret;
687
688	ret = nfs_writepage_locked(folio, wbc);
689	if (ret != AOP_WRITEPAGE_ACTIVATE)
690	unlock_page(page);
691	return ret;
692	}
693
694	static int nfs_writepages_callback(struct folio *folio,
695	struct writeback_control wbc, void* *data)
696	{
697	int ret;
698
699	ret = nfs_do_writepage(folio, wbc, pgio: data);
700	if (ret != AOP_WRITEPAGE_ACTIVATE)
701	folio_unlock(folio);
702	return ret;
703	}
704
705	static void nfs_io_completion_commit(void *inode)
706	{
707	nfs_commit_inode(inode, `0`);
708	}
709
710	int nfs_writepages(struct address_space mapping, struct* writeback_control *wbc)
711	{
712	struct inode *inode = mapping->host;
713	struct nfs_pageio_descriptor pgio;
714	struct nfs_io_completion *ioc = NULL;
715	unsigned int mntflags = NFS_SERVER(inode)->flags;
716	int priority = `0`;
717	int err;
718
719	if (wbc->sync_mode == WB_SYNC_NONE &&
720	NFS_SERVER(inode)->write_congested)
721	return `0`;
722
723	nfs_inc_stats(inode, stat: NFSIOS_VFSWRITEPAGES);
724
725	if (!(mntflags & NFS_MOUNT_WRITE_EAGER) \|\| wbc->for_kupdate \|\|
726	wbc->for_background \|\| wbc->for_sync \|\| wbc->for_reclaim) {
727	ioc = nfs_io_completion_alloc(GFP_KERNEL);
728	if (ioc)
729	nfs_io_completion_init(ioc, complete: nfs_io_completion_commit,
730	data: inode);
731	priority = wb_priority(wbc);
732	}
733
734	do {
735	nfs_pageio_init_write(pgio: &pgio, inode, ioflags: priority, force_mds: false,
736	compl_ops: &nfs_async_write_completion_ops);
737	pgio.pg_io_completion = ioc;
738	err = write_cache_pages(mapping, wbc, writepage: nfs_writepages_callback,
739	data: &pgio);
740	pgio.pg_error = `0`;
741	nfs_pageio_complete(desc: &pgio);
742	if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR)
743	break;
744	} while (err < `0` && !nfs_error_is_fatal(err));
745	nfs_io_completion_put(ioc);
746
747	if (err < `0`)
748	goto out_err;
749	return `0`;
750	out_err:
751	return err;
752	}
753
754	/*
755	* Insert a write request into an inode
756	*/
757	static void nfs_inode_add_request(struct nfs_page *req)
758	{
759	struct folio *folio = nfs_page_to_folio(req);
760	struct address_space *mapping = folio_file_mapping(folio);
761	struct nfs_inode *nfsi = NFS_I(inode: mapping->host);
762
763	WARN_ON_ONCE(req->wb_this_page != req);
764
765	/ Lock the request! /
766	nfs_lock_request(req);
767
768	/*
769	* Swap-space should not get truncated. Hence no need to plug the race
770	* with invalidate/truncate.
771	*/
772	spin_lock(lock: &mapping->private_lock);
773	if (likely(!folio_test_swapcache(folio))) {
774	set_bit(nr: PG_MAPPED, addr: &req->wb_flags);
775	folio_set_private(folio);
776	folio->private = req;
777	}
778	spin_unlock(lock: &mapping->private_lock);
779	atomic_long_inc(v: &nfsi->nrequests);
780	/ this a head request for a page group - mark it as having an*
781	* extra reference so sub groups can follow suit.
782	* This flag also informs pgio layer when to bump nrequests when
783	* adding subrequests. */
784	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
785	kref_get(kref: &req->wb_kref);
786	}
787
788	/*
789	* Remove a write request from an inode
790	*/
791	static void nfs_inode_remove_request(struct nfs_page *req)
792	{
793	struct nfs_inode *nfsi = NFS_I(inode: nfs_page_to_inode(req));
794
795	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
796	struct folio *folio = nfs_page_to_folio(req: req->wb_head);
797	struct address_space *mapping = folio_file_mapping(folio);
798
799	spin_lock(lock: &mapping->private_lock);
800	if (likely(folio && !folio_test_swapcache(folio))) {
801	folio->private = NULL;
802	folio_clear_private(folio);
803	clear_bit(nr: PG_MAPPED, addr: &req->wb_head->wb_flags);
804	}
805	spin_unlock(lock: &mapping->private_lock);
806	}
807
808	if (test_and_clear_bit(nr: PG_INODE_REF, addr: &req->wb_flags)) {
809	atomic_long_dec(v: &nfsi->nrequests);
810	nfs_release_request(req);
811	}
812	}
813
814	static void nfs_mark_request_dirty(struct nfs_page *req)
815	{
816	struct folio *folio = nfs_page_to_folio(req);
817	if (folio)
818	filemap_dirty_folio(mapping: folio_mapping(folio), folio);
819	}
820
821	/*
822	* nfs_page_search_commits_for_head_request_locked
823	*
824	* Search through commit lists on @inode for the head request for @folio.
825	* Must be called while holding the inode (which is cinfo) lock.
826	*
827	* Returns the head request if found, or NULL if not found.
828	*/
829	static struct nfs_page *
830	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
831	struct folio *folio)
832	{
833	struct nfs_page freq, t;
834	struct nfs_commit_info cinfo;
835	struct inode *inode = &nfsi->vfs_inode;
836
837	nfs_init_cinfo_from_inode(cinfo: &cinfo, inode);
838
839	/ search through pnfs commit lists /
840	freq = pnfs_search_commit_reqs(inode, cinfo: &cinfo, folio);
841	if (freq)
842	return freq->wb_head;
843
844	/ Linearly search the commit list for the correct request /
845	list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
846	if (nfs_page_to_folio(req: freq) == folio)
847	return freq->wb_head;
848	}
849
850	return NULL;
851	}
852
853	/**
854	* nfs_request_add_commit_list_locked - add request to a commit list
855	* @req: pointer to a struct nfs_page
856	* @dst: commit list head
857	* @cinfo: holds list lock and accounting info
858	*
859	* This sets the PG_CLEAN bit, updates the cinfo count of
860	* number of outstanding requests requiring a commit as well as
861	* the MM page stats.
862	*
863	* The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
864	* nfs_page lock.
865	*/
866	void
867	nfs_request_add_commit_list_locked(struct nfs_page req, struct* list_head *dst,
868	struct nfs_commit_info *cinfo)
869	{
870	set_bit(nr: PG_CLEAN, addr: &req->wb_flags);
871	nfs_list_add_request(req, head: dst);
872	atomic_long_inc(v: &cinfo->mds->ncommit);
873	}
874	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
875
876	/**
877	* nfs_request_add_commit_list - add request to a commit list
878	* @req: pointer to a struct nfs_page
879	* @cinfo: holds list lock and accounting info
880	*
881	* This sets the PG_CLEAN bit, updates the cinfo count of
882	* number of outstanding requests requiring a commit as well as
883	* the MM page stats.
884	*
885	* The caller must _not_ hold the cinfo->lock, but must be
886	* holding the nfs_page lock.
887	*/
888	void
889	nfs_request_add_commit_list(struct nfs_page req, struct* nfs_commit_info *cinfo)
890	{
891	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
892	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
893	mutex_unlock(lock: &NFS_I(inode: cinfo->inode)->commit_mutex);
894	nfs_folio_mark_unstable(folio: nfs_page_to_folio(req), cinfo);
895	}
896	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
897
898	/**
899	* nfs_request_remove_commit_list - Remove request from a commit list
900	* @req: pointer to a nfs_page
901	* @cinfo: holds list lock and accounting info
902	*
903	* This clears the PG_CLEAN bit, and updates the cinfo's count of
904	* number of outstanding requests requiring a commit
905	* It does not update the MM page stats.
906	*
907	* The caller _must_ hold the cinfo->lock and the nfs_page lock.
908	*/
909	void
910	nfs_request_remove_commit_list(struct nfs_page *req,
911	struct nfs_commit_info *cinfo)
912	{
913	if (!test_and_clear_bit(nr: PG_CLEAN, addr: &(req)->wb_flags))
914	return;
915	nfs_list_remove_request(req);
916	atomic_long_dec(v: &cinfo->mds->ncommit);
917	}
918	EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
919
920	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
921	struct inode *inode)
922	{
923	cinfo->inode = inode;
924	cinfo->mds = &NFS_I(inode)->commit_info;
925	cinfo->ds = pnfs_get_ds_info(inode);
926	cinfo->dreq = NULL;
927	cinfo->completion_ops = &nfs_commit_completion_ops;
928	}
929
930	void nfs_init_cinfo(struct nfs_commit_info *cinfo,
931	struct inode *inode,
932	struct nfs_direct_req *dreq)
933	{
934	if (dreq)
935	nfs_init_cinfo_from_dreq(cinfo, dreq);
936	else
937	nfs_init_cinfo_from_inode(cinfo, inode);
938	}
939	EXPORT_SYMBOL_GPL(nfs_init_cinfo);
940
941	/*
942	* Add a request to the inode's commit list.
943	*/
944	void
945	nfs_mark_request_commit(struct nfs_page req, struct* pnfs_layout_segment *lseg,
946	struct nfs_commit_info *cinfo, u32 ds_commit_idx)
947	{
948	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
949	return;
950	nfs_request_add_commit_list(req, cinfo);
951	}
952
953	static void nfs_folio_clear_commit(struct folio *folio)
954	{
955	if (folio) {
956	long nr = folio_nr_pages(folio);
957
958	node_stat_mod_folio(folio, item: NR_WRITEBACK, nr: -nr);
959	wb_stat_mod(wb: &inode_to_bdi(inode: folio_file_mapping(folio)->host)->wb,
960	item: WB_WRITEBACK, amount: -nr);
961	}
962	}
963
964	/ Called holding the request lock on @req /
965	static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
966	struct nfs_page *req)
967	{
968	if (test_bit(PG_CLEAN, &req->wb_flags)) {
969	struct nfs_open_context *ctx = nfs_req_openctx(req);
970	struct inode *inode = d_inode(dentry: ctx->dentry);
971
972	mutex_lock(&NFS_I(inode)->commit_mutex);
973	if (!pnfs_clear_request_commit(req, cinfo)) {
974	nfs_request_remove_commit_list(req, cinfo);
975	}
976	mutex_unlock(lock: &NFS_I(inode)->commit_mutex);
977	nfs_folio_clear_commit(folio: nfs_page_to_folio(req));
978	}
979	}
980
981	int nfs_write_need_commit(struct nfs_pgio_header *hdr)
982	{
983	if (hdr->verf.committed == NFS_DATA_SYNC)
984	return hdr->lseg == NULL;
985	return hdr->verf.committed != NFS_FILE_SYNC;
986	}
987
988	static void nfs_async_write_init(struct nfs_pgio_header *hdr)
989	{
990	nfs_io_completion_get(ioc: hdr->io_completion);
991	}
992
993	static void nfs_write_completion(struct nfs_pgio_header *hdr)
994	{
995	struct nfs_commit_info cinfo;
996	unsigned long bytes = `0`;
997
998	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
999	goto out;
1000	nfs_init_cinfo_from_inode(cinfo: &cinfo, inode: hdr->inode);
1001	while (!list_empty(head: &hdr->pages)) {
1002	struct nfs_page *req = nfs_list_entry(head: hdr->pages.next);
1003
1004	bytes += req->wb_bytes;
1005	nfs_list_remove_request(req);
1006	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
1007	(hdr->good_bytes < bytes)) {
1008	trace_nfs_comp_error(inode: hdr->inode, req, error: hdr->error);
1009	nfs_mapping_set_error(folio: nfs_page_to_folio(req),
1010	error: hdr->error);
1011	goto remove_req;
1012	}
1013	if (nfs_write_need_commit(hdr)) {
1014	/ Reset wb_nio, since the write was successful. /
1015	req->wb_nio = `0`;
1016	memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
1017	nfs_mark_request_commit(req, lseg: hdr->lseg, cinfo: &cinfo,
1018	ds_commit_idx: hdr->pgio_mirror_idx);
1019	goto next;
1020	}
1021	remove_req:
1022	nfs_inode_remove_request(req);
1023	next:
1024	nfs_page_end_writeback(req);
1025	nfs_release_request(req);
1026	}
1027	out:
1028	nfs_io_completion_put(ioc: hdr->io_completion);
1029	hdr->release(hdr);
1030	}
1031
1032	unsigned long
1033	nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
1034	{
1035	return atomic_long_read(v: &cinfo->mds->ncommit);
1036	}
1037
1038	/ NFS_I(cinfo->inode)->commit_mutex held by caller /
1039	int
1040	nfs_scan_commit_list(struct list_head src, struct* list_head *dst,
1041	struct nfs_commit_info cinfo, int* max)
1042	{
1043	struct nfs_page req, tmp;
1044	int ret = `0`;
1045
1046	list_for_each_entry_safe(req, tmp, src, wb_list) {
1047	kref_get(kref: &req->wb_kref);
1048	if (!nfs_lock_request(req)) {
1049	nfs_release_request(req);
1050	continue;
1051	}
1052	nfs_request_remove_commit_list(req, cinfo);
1053	clear_bit(nr: PG_COMMIT_TO_DS, addr: &req->wb_flags);
1054	nfs_list_add_request(req, head: dst);
1055	ret++;
1056	if ((ret == max) && !cinfo->dreq)
1057	break;
1058	cond_resched();
1059	}
1060	return ret;
1061	}
1062	EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
1063
1064	/*
1065	* nfs_scan_commit - Scan an inode for commit requests
1066	* @inode: NFS inode to scan
1067	* @dst: mds destination list
1068	* @cinfo: mds and ds lists of reqs ready to commit
1069	*
1070	* Moves requests from the inode's 'commit' request list.
1071	* The requests are not checked to ensure that they form a contiguous set.
1072	*/
1073	int
1074	nfs_scan_commit(struct inode inode, struct* list_head *dst,
1075	struct nfs_commit_info *cinfo)
1076	{
1077	int ret = `0`;
1078
1079	if (!atomic_long_read(v: &cinfo->mds->ncommit))
1080	return `0`;
1081	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
1082	if (atomic_long_read(v: &cinfo->mds->ncommit) > `0`) {
1083	const int max = INT_MAX;
1084
1085	ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
1086	cinfo, max);
1087	ret += pnfs_scan_commit_lists(inode, cinfo, max: max - ret);
1088	}
1089	mutex_unlock(lock: &NFS_I(inode: cinfo->inode)->commit_mutex);
1090	return ret;
1091	}
1092
1093	/*
1094	* Search for an existing write request, and attempt to update
1095	* it to reflect a new dirty region on a given page.
1096	*
1097	* If the attempt fails, then the existing request is flushed out
1098	* to disk.
1099	*/
1100	static struct nfs_page nfs_try_to_update_request(struct* folio *folio,
1101	unsigned int offset,
1102	unsigned int bytes)
1103	{
1104	struct nfs_page *req;
1105	unsigned int rqend;
1106	unsigned int end;
1107	int error;
1108
1109	end = offset + bytes;
1110
1111	req = nfs_lock_and_join_requests(folio);
1112	if (IS_ERR_OR_NULL(ptr: req))
1113	return req;
1114
1115	rqend = req->wb_offset + req->wb_bytes;
1116	/*
1117	* Tell the caller to flush out the request if
1118	* the offsets are non-contiguous.
1119	* Note: nfs_flush_incompatible() will already
1120	* have flushed out requests having wrong owners.
1121	*/
1122	if (offset > rqend \|\| end < req->wb_offset)
1123	goto out_flushme;
1124
1125	/ Okay, the request matches. Update the region /
1126	if (offset < req->wb_offset) {
1127	req->wb_offset = offset;
1128	req->wb_pgbase = offset;
1129	}
1130	if (end > rqend)
1131	req->wb_bytes = end - req->wb_offset;
1132	else
1133	req->wb_bytes = rqend - req->wb_offset;
1134	req->wb_nio = `0`;
1135	return req;
1136	out_flushme:
1137	/*
1138	* Note: we mark the request dirty here because
1139	* nfs_lock_and_join_requests() cannot preserve
1140	* commit flags, so we have to replay the write.
1141	*/
1142	nfs_mark_request_dirty(req);
1143	nfs_unlock_and_release_request(req);
1144	error = nfs_wb_folio(inode: folio_file_mapping(folio)->host, folio);
1145	return (error < `0`) ? ERR_PTR(error) : NULL;
1146	}
1147
1148	/*
1149	* Try to update an existing write request, or create one if there is none.
1150	*
1151	* Note: Should always be called with the Page Lock held to prevent races
1152	* if we have to add a new request. Also assumes that the caller has
1153	* already called nfs_flush_incompatible() if necessary.
1154	*/
1155	static struct nfs_page nfs_setup_write_request(struct* nfs_open_context *ctx,
1156	struct folio *folio,
1157	unsigned int offset,
1158	unsigned int bytes)
1159	{
1160	struct nfs_page *req;
1161
1162	req = nfs_try_to_update_request(folio, offset, bytes);
1163	if (req != NULL)
1164	goto out;
1165	req = nfs_page_create_from_folio(ctx, folio, offset, count: bytes);
1166	if (IS_ERR(ptr: req))
1167	goto out;
1168	nfs_inode_add_request(req);
1169	out:
1170	return req;
1171	}
1172
1173	static int nfs_writepage_setup(struct nfs_open_context *ctx,
1174	struct folio folio, unsigned* int offset,
1175	unsigned int count)
1176	{
1177	struct nfs_page *req;
1178
1179	req = nfs_setup_write_request(ctx, folio, offset, bytes: count);
1180	if (IS_ERR(ptr: req))
1181	return PTR_ERR(ptr: req);
1182	/ Update file length /
1183	nfs_grow_file(folio, offset, count);
1184	nfs_mark_uptodate(req);
1185	nfs_mark_request_dirty(req);
1186	nfs_unlock_and_release_request(req);
1187	return `0`;
1188	}
1189
1190	int nfs_flush_incompatible(struct file file, struct* folio *folio)
1191	{
1192	struct nfs_open_context *ctx = nfs_file_open_context(filp: file);
1193	struct nfs_lock_context *l_ctx;
1194	struct file_lock_context *flctx = locks_inode_context(inode: file_inode(f: file));
1195	struct nfs_page *req;
1196	int do_flush, status;
1197	/*
1198	* Look for a request corresponding to this page. If there
1199	* is one, and it belongs to another file, we flush it out
1200	* before we try to copy anything into the page. Do this
1201	* due to the lack of an ACCESS-type call in NFSv2.
1202	* Also do the same if we find a request from an existing
1203	* dropped page.
1204	*/
1205	do {
1206	req = nfs_folio_find_head_request(folio);
1207	if (req == NULL)
1208	return `0`;
1209	l_ctx = req->wb_lock_context;
1210	do_flush = nfs_page_to_folio(req) != folio \|\|
1211	!nfs_match_open_context(ctx1: nfs_req_openctx(req), ctx2: ctx);
1212	if (l_ctx && flctx &&
1213	!(list_empty_careful(head: &flctx->flc_posix) &&
1214	list_empty_careful(head: &flctx->flc_flock))) {
1215	do_flush \|= l_ctx->lockowner != current->files;
1216	}
1217	nfs_release_request(req);
1218	if (!do_flush)
1219	return `0`;
1220	status = nfs_wb_folio(inode: folio_file_mapping(folio)->host, folio);
1221	} while (status == `0`);
1222	return status;
1223	}
1224
1225	/*
1226	* Avoid buffered writes when a open context credential's key would
1227	* expire soon.
1228	*
1229	* Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
1230	*
1231	* Return 0 and set a credential flag which triggers the inode to flush
1232	* and performs NFS_FILE_SYNC writes if the key will expired within
1233	* RPC_KEY_EXPIRE_TIMEO.
1234	*/
1235	int
1236	nfs_key_timeout_notify(struct file filp, struct* inode *inode)
1237	{
1238	struct nfs_open_context *ctx = nfs_file_open_context(filp);
1239
1240	if (nfs_ctx_key_to_expire(ctx, inode) &&
1241	!rcu_access_pointer(ctx->ll_cred))
1242	/ Already expired! /
1243	return -EACCES;
1244	return `0`;
1245	}
1246
1247	/*
1248	* Test if the open context credential key is marked to expire soon.
1249	*/
1250	bool nfs_ctx_key_to_expire(struct nfs_open_context ctx, struct* inode *inode)
1251	{
1252	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
1253	struct rpc_cred cred, new, *old = NULL;
1254	struct auth_cred acred = {
1255	.cred = ctx->cred,
1256	};
1257	bool ret = false;
1258
1259	rcu_read_lock();
1260	cred = rcu_dereference(ctx->ll_cred);
1261	if (cred && !(cred->cr_ops->crkey_timeout &&
1262	cred->cr_ops->crkey_timeout(cred)))
1263	goto out;
1264	rcu_read_unlock();
1265
1266	new = auth->au_ops->lookup_cred(auth, &acred, `0`);
1267	if (new == cred) {
1268	put_rpccred(new);
1269	return true;
1270	}
1271	if (IS_ERR_OR_NULL(ptr: new)) {
1272	new = NULL;
1273	ret = true;
1274	} else if (new->cr_ops->crkey_timeout &&
1275	new->cr_ops->crkey_timeout(new))
1276	ret = true;
1277
1278	rcu_read_lock();
1279	old = rcu_dereference_protected(xchg(&ctx->ll_cred,
1280	RCU_INITIALIZER(new)), `1`);
1281	out:
1282	rcu_read_unlock();
1283	put_rpccred(old);
1284	return ret;
1285	}
1286
1287	/*
1288	* If the page cache is marked as unsafe or invalid, then we can't rely on
1289	* the PageUptodate() flag. In this case, we will need to turn off
1290	* write optimisations that depend on the page contents being correct.
1291	*/
1292	static bool nfs_folio_write_uptodate(struct folio folio, unsigned* int pagelen)
1293	{
1294	struct inode *inode = folio_file_mapping(folio)->host;
1295	struct nfs_inode *nfsi = NFS_I(inode);
1296
1297	if (nfs_have_delegated_attributes(inode))
1298	goto out;
1299	if (nfsi->cache_validity &
1300	(NFS_INO_INVALID_CHANGE \| NFS_INO_INVALID_SIZE))
1301	return false;
1302	smp_rmb();
1303	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != `0`)
1304	return false;
1305	out:
1306	if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != `0`)
1307	return false;
1308	return folio_test_uptodate(folio) != `0`;
1309	}
1310
1311	static bool
1312	is_whole_file_wrlock(struct file_lock *fl)
1313	{
1314	return fl->fl_start == `0` && fl->fl_end == OFFSET_MAX &&
1315	fl->fl_type == F_WRLCK;
1316	}
1317
1318	/ If we know the page is up to date, and we're not using byte range locks (or*
1319	* if we have the whole file locked for writing), it may be more efficient to
1320	* extend the write to cover the entire page in order to avoid fragmentation
1321	* inefficiencies.
1322	*
1323	* If the file is opened for synchronous writes then we can just skip the rest
1324	* of the checks.
1325	*/
1326	static int nfs_can_extend_write(struct file file, struct* folio *folio,
1327	unsigned int pagelen)
1328	{
1329	struct inode *inode = file_inode(f: file);
1330	struct file_lock_context *flctx = locks_inode_context(inode);
1331	struct file_lock *fl;
1332	int ret;
1333
1334	if (file->f_flags & O_DSYNC)
1335	return `0`;
1336	if (!nfs_folio_write_uptodate(folio, pagelen))
1337	return `0`;
1338	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
1339	return `1`;
1340	if (!flctx \|\| (list_empty_careful(head: &flctx->flc_flock) &&
1341	list_empty_careful(head: &flctx->flc_posix)))
1342	return `1`;
1343
1344	/ Check to see if there are whole file write locks /
1345	ret = `0`;
1346	spin_lock(lock: &flctx->flc_lock);
1347	if (!list_empty(head: &flctx->flc_posix)) {
1348	fl = list_first_entry(&flctx->flc_posix, struct file_lock,
1349	fl_list);
1350	if (is_whole_file_wrlock(fl))
1351	ret = `1`;
1352	} else if (!list_empty(head: &flctx->flc_flock)) {
1353	fl = list_first_entry(&flctx->flc_flock, struct file_lock,
1354	fl_list);
1355	if (fl->fl_type == F_WRLCK)
1356	ret = `1`;
1357	}
1358	spin_unlock(lock: &flctx->flc_lock);
1359	return ret;
1360	}
1361
1362	/*
1363	* Update and possibly write a cached page of an NFS file.
1364	*
1365	* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
1366	* things with a page scheduled for an RPC call (e.g. invalidate it).
1367	*/
1368	int nfs_update_folio(struct file file, struct* folio *folio,
1369	unsigned int offset, unsigned int count)
1370	{
1371	struct nfs_open_context *ctx = nfs_file_open_context(filp: file);
1372	struct address_space *mapping = folio_file_mapping(folio);
1373	struct inode *inode = mapping->host;
1374	unsigned int pagelen = nfs_folio_length(folio);
1375	int status = `0`;
1376
1377	nfs_inc_stats(inode, stat: NFSIOS_VFSUPDATEPAGE);
1378
1379	dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
1380	(long long)(folio_file_pos(folio) + offset));
1381
1382	if (!count)
1383	goto out;
1384
1385	if (nfs_can_extend_write(file, folio, pagelen)) {
1386	count = max(count + offset, pagelen);
1387	offset = `0`;
1388	}
1389
1390	status = nfs_writepage_setup(ctx, folio, offset, count);
1391	if (status < `0`)
1392	nfs_set_pageerror(mapping);
1393	out:
1394	dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n",
1395	status, (long long)i_size_read(inode));
1396	return status;
1397	}
1398
1399	static int flush_task_priority(int how)
1400	{
1401	switch (how & (FLUSH_HIGHPRI\|FLUSH_LOWPRI)) {
1402	case FLUSH_HIGHPRI:
1403	return RPC_PRIORITY_HIGH;
1404	case FLUSH_LOWPRI:
1405	return RPC_PRIORITY_LOW;
1406	}
1407	return RPC_PRIORITY_NORMAL;
1408	}
1409
1410	static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1411	struct rpc_message *msg,
1412	const struct nfs_rpc_ops *rpc_ops,
1413	struct rpc_task_setup task_setup_data, int* how)
1414	{
1415	int priority = flush_task_priority(how);
1416
1417	if (IS_SWAPFILE(hdr->inode))
1418	task_setup_data->flags \|= RPC_TASK_SWAPPER;
1419	task_setup_data->priority = priority;
1420	rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
1421	trace_nfs_initiate_write(hdr);
1422	}
1423
1424	/ If a nfs_flush_* function fails, it should remove reqs from @head and*
1425	* call this on each, which will prepare them to be retried on next
1426	* writeback using standard nfs.
1427	*/
1428	static void nfs_redirty_request(struct nfs_page *req)
1429	{
1430	struct nfs_inode *nfsi = NFS_I(inode: nfs_page_to_inode(req));
1431
1432	/ Bump the transmission count /
1433	req->wb_nio++;
1434	nfs_mark_request_dirty(req);
1435	atomic_long_inc(v: &nfsi->redirtied_pages);
1436	nfs_page_end_writeback(req);
1437	nfs_release_request(req);
1438	}
1439
1440	static void nfs_async_write_error(struct list_head head, int* error)
1441	{
1442	struct nfs_page *req;
1443
1444	while (!list_empty(head)) {
1445	req = nfs_list_entry(head: head->next);
1446	nfs_list_remove_request(req);
1447	if (nfs_error_is_fatal_on_server(err: error))
1448	nfs_write_error(req, error);
1449	else
1450	nfs_redirty_request(req);
1451	}
1452	}
1453
1454	static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
1455	{
1456	nfs_async_write_error(head: &hdr->pages, error: `0`);
1457	}
1458
1459	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1460	.init_hdr = nfs_async_write_init,
1461	.error_cleanup = nfs_async_write_error,
1462	.completion = nfs_write_completion,
1463	.reschedule_io = nfs_async_write_reschedule_io,
1464	};
1465
1466	void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1467	struct inode inode, int* ioflags, bool force_mds,
1468	const struct nfs_pgio_completion_ops *compl_ops)
1469	{
1470	struct nfs_server *server = NFS_SERVER(inode);
1471	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1472
1473	#ifdef CONFIG_NFS_V4_1
1474	if (server->pnfs_curr_ld && !force_mds)
1475	pg_ops = server->pnfs_curr_ld->pg_write_ops;
1476	#endif
1477	nfs_pageio_init(desc: pgio, inode, pg_ops, compl_ops, rw_ops: &nfs_rw_write_ops,
1478	bsize: server->wsize, how: ioflags);
1479	}
1480	EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1481
1482	void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1483	{
1484	struct nfs_pgio_mirror *mirror;
1485
1486	if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
1487	pgio->pg_ops->pg_cleanup(pgio);
1488
1489	pgio->pg_ops = &nfs_pgio_rw_ops;
1490
1491	nfs_pageio_stop_mirroring(pgio);
1492
1493	mirror = &pgio->pg_mirrors[`0`];
1494	mirror->pg_bsize = NFS_SERVER(inode: pgio->pg_inode)->wsize;
1495	}
1496	EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1497
1498
1499	void nfs_commit_prepare(struct rpc_task task, void* *calldata)
1500	{
1501	struct nfs_commit_data *data = calldata;
1502
1503	NFS_PROTO(inode: data->inode)->commit_rpc_prepare(task, data);
1504	}
1505
1506	static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
1507	struct nfs_fattr *fattr)
1508	{
1509	struct nfs_pgio_args *argp = &hdr->args;
1510	struct nfs_pgio_res *resp = &hdr->res;
1511	u64 size = argp->offset + resp->count;
1512
1513	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1514	fattr->size = size;
1515	if (nfs_size_to_loff_t(size: fattr->size) < i_size_read(inode: hdr->inode)) {
1516	fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
1517	return;
1518	}
1519	if (size != fattr->size)
1520	return;
1521	/ Set attribute barrier /
1522	nfs_fattr_set_barrier(fattr);
1523	/ ...and update size /
1524	fattr->valid \|= NFS_ATTR_FATTR_SIZE;
1525	}
1526
1527	void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
1528	{
1529	struct nfs_fattr *fattr = &hdr->fattr;
1530	struct inode *inode = hdr->inode;
1531
1532	spin_lock(lock: &inode->i_lock);
1533	nfs_writeback_check_extend(hdr, fattr);
1534	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1535	spin_unlock(lock: &inode->i_lock);
1536	}
1537	EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
1538
1539	/*
1540	* This function is called when the WRITE call is complete.
1541	*/
1542	static int nfs_writeback_done(struct rpc_task *task,
1543	struct nfs_pgio_header *hdr,
1544	struct inode *inode)
1545	{
1546	int status;
1547
1548	/*
1549	* ->write_done will attempt to use post-op attributes to detect
1550	* conflicting writes by other clients. A strict interpretation
1551	* of close-to-open would allow us to continue caching even if
1552	* another writer had changed the file, but some applications
1553	* depend on tighter cache coherency when writing.
1554	*/
1555	status = NFS_PROTO(inode)->write_done(task, hdr);
1556	if (status != `0`)
1557	return status;
1558
1559	nfs_add_stats(inode, stat: NFSIOS_SERVERWRITTENBYTES, addend: hdr->res.count);
1560	trace_nfs_writeback_done(task, hdr);
1561
1562	if (task->tk_status >= `0`) {
1563	enum nfs3_stable_how committed = hdr->res.verf->committed;
1564
1565	if (committed == NFS_UNSTABLE) {
1566	/*
1567	* We have some uncommitted data on the server at
1568	* this point, so ensure that we keep track of that
1569	* fact irrespective of what later writes do.
1570	*/
1571	set_bit(nr: NFS_IOHDR_UNSTABLE_WRITES, addr: &hdr->flags);
1572	}
1573
1574	if (committed < hdr->args.stable) {
1575	/ We tried a write call, but the server did not*
1576	* commit data to stable storage even though we
1577	* requested it.
1578	* Note: There is a known bug in Tru64 < 5.0 in which
1579	* the server reports NFS_DATA_SYNC, but performs
1580	* NFS_FILE_SYNC. We therefore implement this checking
1581	* as a dprintk() in order to avoid filling syslog.
1582	*/
1583	static unsigned long complain;
1584
1585	/ Note this will print the MDS for a DS write /
1586	if (time_before(complain, jiffies)) {
1587	dprintk("NFS: faulty NFS server %s:"
1588	" (committed = %d) != (stable = %d)\n",
1589	NFS_SERVER(inode)->nfs_client->cl_hostname,
1590	committed, hdr->args.stable);
1591	complain = jiffies + `300` * HZ;
1592	}
1593	}
1594	}
1595
1596	/ Deal with the suid/sgid bit corner case /
1597	if (nfs_should_remove_suid(inode)) {
1598	spin_lock(lock: &inode->i_lock);
1599	nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
1600	spin_unlock(lock: &inode->i_lock);
1601	}
1602	return `0`;
1603	}
1604
1605	/*
1606	* This function is called when the WRITE call is complete.
1607	*/
1608	static void nfs_writeback_result(struct rpc_task *task,
1609	struct nfs_pgio_header *hdr)
1610	{
1611	struct nfs_pgio_args *argp = &hdr->args;
1612	struct nfs_pgio_res *resp = &hdr->res;
1613
1614	if (resp->count < argp->count) {
1615	static unsigned long complain;
1616
1617	/ This a short write! /
1618	nfs_inc_stats(inode: hdr->inode, stat: NFSIOS_SHORTWRITE);
1619
1620	/ Has the server at least made some progress? /
1621	if (resp->count == `0`) {
1622	if (time_before(complain, jiffies)) {
1623	printk(KERN_WARNING
1624	"NFS: Server wrote zero bytes, expected %u.\n",
1625	argp->count);
1626	complain = jiffies + `300` * HZ;
1627	}
1628	nfs_set_pgio_error(hdr, error: -EIO, pos: argp->offset);
1629	task->tk_status = -EIO;
1630	return;
1631	}
1632
1633	/ For non rpc-based layout drivers, retry-through-MDS /
1634	if (!task->tk_ops) {
1635	hdr->pnfs_error = -EAGAIN;
1636	return;
1637	}
1638
1639	/ Was this an NFSv2 write or an NFSv3 stable write? /
1640	if (resp->verf->committed != NFS_UNSTABLE) {
1641	/ Resend from where the server left off /
1642	hdr->mds_offset += resp->count;
1643	argp->offset += resp->count;
1644	argp->pgbase += resp->count;
1645	argp->count -= resp->count;
1646	} else {
1647	/ Resend as a stable write in order to avoid*
1648	* headaches in the case of a server crash.
1649	*/
1650	argp->stable = NFS_FILE_SYNC;
1651	}
1652	resp->count = `0`;
1653	resp->verf->committed = `0`;
1654	rpc_restart_call_prepare(task);
1655	}
1656	}
1657
1658	static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1659	{
1660	return wait_var_event_killable(&cinfo->rpcs_out,
1661	!atomic_read(&cinfo->rpcs_out));
1662	}
1663
1664	static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
1665	{
1666	atomic_inc(v: &cinfo->rpcs_out);
1667	}
1668
1669	bool nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1670	{
1671	if (atomic_dec_and_test(v: &cinfo->rpcs_out)) {
1672	wake_up_var(var: &cinfo->rpcs_out);
1673	return true;
1674	}
1675	return false;
1676	}
1677
1678	void nfs_commitdata_release(struct nfs_commit_data *data)
1679	{
1680	put_nfs_open_context(ctx: data->context);
1681	nfs_commit_free(data);
1682	}
1683	EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1684
1685	int nfs_initiate_commit(struct rpc_clnt clnt, struct* nfs_commit_data *data,
1686	const struct nfs_rpc_ops *nfs_ops,
1687	const struct rpc_call_ops *call_ops,
1688	int how, int flags)
1689	{
1690	struct rpc_task *task;
1691	int priority = flush_task_priority(how);
1692	struct rpc_message msg = {
1693	.rpc_argp = &data->args,
1694	.rpc_resp = &data->res,
1695	.rpc_cred = data->cred,
1696	};
1697	struct rpc_task_setup task_setup_data = {
1698	.task = &data->task,
1699	.rpc_client = clnt,
1700	.rpc_message = &msg,
1701	.callback_ops = call_ops,
1702	.callback_data = data,
1703	.workqueue = nfsiod_workqueue,
1704	.flags = RPC_TASK_ASYNC \| flags,
1705	.priority = priority,
1706	};
1707
1708	if (nfs_server_capable(inode: data->inode, NFS_CAP_MOVEABLE))
1709	task_setup_data.flags \|= RPC_TASK_MOVEABLE;
1710
1711	/ Set up the initial task struct. /
1712	nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
1713	trace_nfs_initiate_commit(data);
1714
1715	dprintk("NFS: initiated commit call\n");
1716
1717	task = rpc_run_task(&task_setup_data);
1718	if (IS_ERR(ptr: task))
1719	return PTR_ERR(ptr: task);
1720	if (how & FLUSH_SYNC)
1721	rpc_wait_for_completion_task(task);
1722	rpc_put_task(task);
1723	return `0`;
1724	}
1725	EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1726
1727	static loff_t nfs_get_lwb(struct list_head *head)
1728	{
1729	loff_t lwb = `0`;
1730	struct nfs_page *req;
1731
1732	list_for_each_entry(req, head, wb_list)
1733	if (lwb < (req_offset(req) + req->wb_bytes))
1734	lwb = req_offset(req) + req->wb_bytes;
1735
1736	return lwb;
1737	}
1738
1739	/*
1740	* Set up the argument/result storage required for the RPC call.
1741	*/
1742	void nfs_init_commit(struct nfs_commit_data *data,
1743	struct list_head *head,
1744	struct pnfs_layout_segment *lseg,
1745	struct nfs_commit_info *cinfo)
1746	{
1747	struct nfs_page *first;
1748	struct nfs_open_context *ctx;
1749	struct inode *inode;
1750
1751	/ Set up the RPC argument and reply structs*
1752	* NB: take care not to mess about with data->commit et al. */
1753
1754	if (head)
1755	list_splice_init(list: head, head: &data->pages);
1756
1757	first = nfs_list_entry(head: data->pages.next);
1758	ctx = nfs_req_openctx(req: first);
1759	inode = d_inode(dentry: ctx->dentry);
1760
1761	data->inode = inode;
1762	data->cred = ctx->cred;
1763	data->lseg = lseg; / reference transferred /
1764	/ only set lwb for pnfs commit /
1765	if (lseg)
1766	data->lwb = nfs_get_lwb(head: &data->pages);
1767	data->mds_ops = &nfs_commit_ops;
1768	data->completion_ops = cinfo->completion_ops;
1769	data->dreq = cinfo->dreq;
1770
1771	data->args.fh = NFS_FH(inode: data->inode);
1772	/ Note: we always request a commit of the entire inode /
1773	data->args.offset = `0`;
1774	data->args.count = `0`;
1775	data->context = get_nfs_open_context(ctx);
1776	data->res.fattr = &data->fattr;
1777	data->res.verf = &data->verf;
1778	nfs_fattr_init(fattr: &data->fattr);
1779	nfs_commit_begin(cinfo: cinfo->mds);
1780	}
1781	EXPORT_SYMBOL_GPL(nfs_init_commit);
1782
1783	void nfs_retry_commit(struct list_head *page_list,
1784	struct pnfs_layout_segment *lseg,
1785	struct nfs_commit_info *cinfo,
1786	u32 ds_commit_idx)
1787	{
1788	struct nfs_page *req;
1789
1790	while (!list_empty(head: page_list)) {
1791	req = nfs_list_entry(head: page_list->next);
1792	nfs_list_remove_request(req);
1793	nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
1794	nfs_folio_clear_commit(folio: nfs_page_to_folio(req));
1795	nfs_unlock_and_release_request(req);
1796	}
1797	}
1798	EXPORT_SYMBOL_GPL(nfs_retry_commit);
1799
1800	static void nfs_commit_resched_write(struct nfs_commit_info *cinfo,
1801	struct nfs_page *req)
1802	{
1803	struct folio *folio = nfs_page_to_folio(req);
1804
1805	filemap_dirty_folio(mapping: folio_mapping(folio), folio);
1806	}
1807
1808	/*
1809	* Commit dirty pages
1810	*/
1811	static int
1812	nfs_commit_list(struct inode inode, struct* list_head head, int* how,
1813	struct nfs_commit_info *cinfo)
1814	{
1815	struct nfs_commit_data *data;
1816	unsigned short task_flags = `0`;
1817
1818	/ another commit raced with us /
1819	if (list_empty(head))
1820	return `0`;
1821
1822	data = nfs_commitdata_alloc();
1823	if (!data) {
1824	nfs_retry_commit(head, NULL, cinfo, -`1`);
1825	return -ENOMEM;
1826	}
1827
1828	/ Set up the argument struct /
1829	nfs_init_commit(data, head, NULL, cinfo);
1830	if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
1831	task_flags = RPC_TASK_MOVEABLE;
1832	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
1833	data->mds_ops, how,
1834	RPC_TASK_CRED_NOREF \| task_flags);
1835	}
1836
1837	/*
1838	* COMMIT call returned
1839	*/
1840	static void nfs_commit_done(struct rpc_task task, void* *calldata)
1841	{
1842	struct nfs_commit_data *data = calldata;
1843
1844	/ Call the NFS version-specific code /
1845	NFS_PROTO(inode: data->inode)->commit_done(task, data);
1846	trace_nfs_commit_done(task, data);
1847	}
1848
1849	static void nfs_commit_release_pages(struct nfs_commit_data *data)
1850	{
1851	const struct nfs_writeverf *verf = data->res.verf;
1852	struct nfs_page *req;
1853	int status = data->task.tk_status;
1854	struct nfs_commit_info cinfo;
1855	struct nfs_server *nfss;
1856	struct folio *folio;
1857
1858	while (!list_empty(head: &data->pages)) {
1859	req = nfs_list_entry(head: data->pages.next);
1860	nfs_list_remove_request(req);
1861	folio = nfs_page_to_folio(req);
1862	nfs_folio_clear_commit(folio);
1863
1864	dprintk("NFS: commit (%s/%llu %d@%lld)",
1865	nfs_req_openctx(req)->dentry->d_sb->s_id,
1866	(unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
1867	req->wb_bytes,
1868	(long long)req_offset(req));
1869	if (status < `0`) {
1870	if (folio) {
1871	trace_nfs_commit_error(inode: data->inode, req,
1872	error: status);
1873	nfs_mapping_set_error(folio, error: status);
1874	nfs_inode_remove_request(req);
1875	}
1876	dprintk_cont(", error = %d\n", status);
1877	goto next;
1878	}
1879
1880	/ Okay, COMMIT succeeded, apparently. Check the verifier*
1881	* returned by the server against all stored verfs. */
1882	if (nfs_write_match_verf(verf, req)) {
1883	/ We have a match /
1884	if (folio)
1885	nfs_inode_remove_request(req);
1886	dprintk_cont(" OK\n");
1887	goto next;
1888	}
1889	/ We have a mismatch. Write the page again /
1890	dprintk_cont(" mismatch\n");
1891	nfs_mark_request_dirty(req);
1892	atomic_long_inc(v: &NFS_I(inode: data->inode)->redirtied_pages);
1893	next:
1894	nfs_unlock_and_release_request(req);
1895	/ Latency breaker /
1896	cond_resched();
1897	}
1898	nfss = NFS_SERVER(inode: data->inode);
1899	if (atomic_long_read(v: &nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
1900	nfss->write_congested = `0`;
1901
1902	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1903	nfs_commit_end(cinfo: cinfo.mds);
1904	}
1905
1906	static void nfs_commit_release(void *calldata)
1907	{
1908	struct nfs_commit_data *data = calldata;
1909
1910	data->completion_ops->completion(data);
1911	nfs_commitdata_release(calldata);
1912	}
1913
1914	static const struct rpc_call_ops nfs_commit_ops = {
1915	.rpc_call_prepare = nfs_commit_prepare,
1916	.rpc_call_done = nfs_commit_done,
1917	.rpc_release = nfs_commit_release,
1918	};
1919
1920	static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1921	.completion = nfs_commit_release_pages,
1922	.resched_write = nfs_commit_resched_write,
1923	};
1924
1925	int nfs_generic_commit_list(struct inode inode, struct* list_head *head,
1926	int how, struct nfs_commit_info *cinfo)
1927	{
1928	int status;
1929
1930	status = pnfs_commit_list(inode, mds_pages: head, how, cinfo);
1931	if (status == PNFS_NOT_ATTEMPTED)
1932	status = nfs_commit_list(inode, head, how, cinfo);
1933	return status;
1934	}
1935
1936	static int __nfs_commit_inode(struct inode inode, int* how,
1937	struct writeback_control *wbc)
1938	{
1939	LIST_HEAD(head);
1940	struct nfs_commit_info cinfo;
1941	int may_wait = how & FLUSH_SYNC;
1942	int ret, nscan;
1943
1944	how &= ~FLUSH_SYNC;
1945	nfs_init_cinfo_from_inode(cinfo: &cinfo, inode);
1946	nfs_commit_begin(cinfo: cinfo.mds);
1947	for (;;) {
1948	ret = nscan = nfs_scan_commit(inode, dst: &head, cinfo: &cinfo);
1949	if (ret <= `0`)
1950	break;
1951	ret = nfs_generic_commit_list(inode, head: &head, how, cinfo: &cinfo);
1952	if (ret < `0`)
1953	break;
1954	ret = `0`;
1955	if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
1956	if (nscan < wbc->nr_to_write)
1957	wbc->nr_to_write -= nscan;
1958	else
1959	wbc->nr_to_write = `0`;
1960	}
1961	if (nscan < INT_MAX)
1962	break;
1963	cond_resched();
1964	}
1965	nfs_commit_end(cinfo: cinfo.mds);
1966	if (ret \|\| !may_wait)
1967	return ret;
1968	return wait_on_commit(cinfo: cinfo.mds);
1969	}
1970
1971	int nfs_commit_inode(struct inode inode, int* how)
1972	{
1973	return __nfs_commit_inode(inode, how, NULL);
1974	}
1975	EXPORT_SYMBOL_GPL(nfs_commit_inode);
1976
1977	int nfs_write_inode(struct inode inode, struct* writeback_control *wbc)
1978	{
1979	struct nfs_inode *nfsi = NFS_I(inode);
1980	int flags = FLUSH_SYNC;
1981	int ret = `0`;
1982
1983	if (wbc->sync_mode == WB_SYNC_NONE) {
1984	/ no commits means nothing needs to be done /
1985	if (!atomic_long_read(v: &nfsi->commit_info.ncommit))
1986	goto check_requests_outstanding;
1987
1988	/ Don't commit yet if this is a non-blocking flush and there*
1989	* are a lot of outstanding writes for this mapping.
1990	*/
1991	if (mapping_tagged(mapping: inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1992	goto out_mark_dirty;
1993
1994	/ don't wait for the COMMIT response /
1995	flags = `0`;
1996	}
1997
1998	ret = __nfs_commit_inode(inode, how: flags, wbc);
1999	if (!ret) {
2000	if (flags & FLUSH_SYNC)
2001	return `0`;
2002	} else if (atomic_long_read(v: &nfsi->commit_info.ncommit))
2003	goto out_mark_dirty;
2004
2005	check_requests_outstanding:
2006	if (!atomic_read(v: &nfsi->commit_info.rpcs_out))
2007	return ret;
2008	out_mark_dirty:
2009	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
2010	return ret;
2011	}
2012	EXPORT_SYMBOL_GPL(nfs_write_inode);
2013
2014	/*
2015	* Wrapper for filemap_write_and_wait_range()
2016	*
2017	* Needed for pNFS in order to ensure data becomes visible to the
2018	* client.
2019	*/
2020	int nfs_filemap_write_and_wait_range(struct address_space *mapping,
2021	loff_t lstart, loff_t lend)
2022	{
2023	int ret;
2024
2025	ret = filemap_write_and_wait_range(mapping, lstart, lend);
2026	if (ret == `0`)
2027	ret = pnfs_sync_inode(inode: mapping->host, datasync: true);
2028	return ret;
2029	}
2030	EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
2031
2032	/*
2033	* flush the inode to disk.
2034	*/
2035	int nfs_wb_all(struct inode *inode)
2036	{
2037	int ret;
2038
2039	trace_nfs_writeback_inode_enter(inode);
2040
2041	ret = filemap_write_and_wait(mapping: inode->i_mapping);
2042	if (ret)
2043	goto out;
2044	ret = nfs_commit_inode(inode, FLUSH_SYNC);
2045	if (ret < `0`)
2046	goto out;
2047	pnfs_sync_inode(inode, datasync: true);
2048	ret = `0`;
2049
2050	out:
2051	trace_nfs_writeback_inode_exit(inode, error: ret);
2052	return ret;
2053	}
2054	EXPORT_SYMBOL_GPL(nfs_wb_all);
2055
2056	int nfs_wb_folio_cancel(struct inode inode, struct* folio *folio)
2057	{
2058	struct nfs_page *req;
2059	int ret = `0`;
2060
2061	folio_wait_writeback(folio);
2062
2063	/ blocking call to cancel all requests and join to a single (head)*
2064	* request */
2065	req = nfs_lock_and_join_requests(folio);
2066
2067	if (IS_ERR(ptr: req)) {
2068	ret = PTR_ERR(ptr: req);
2069	} else if (req) {
2070	/ all requests from this folio have been cancelled by*
2071	* nfs_lock_and_join_requests, so just remove the head
2072	* request from the inode / page_private pointer and
2073	* release it */
2074	nfs_inode_remove_request(req);
2075	nfs_unlock_and_release_request(req);
2076	}
2077
2078	return ret;
2079	}
2080
2081	/**
2082	* nfs_wb_folio - Write back all requests on one page
2083	* @inode: pointer to page
2084	* @folio: pointer to folio
2085	*
2086	* Assumes that the folio has been locked by the caller, and will
2087	* not unlock it.
2088	*/
2089	int nfs_wb_folio(struct inode inode, struct* folio *folio)
2090	{
2091	loff_t range_start = folio_file_pos(folio);
2092	loff_t range_end = range_start + (loff_t)folio_size(folio) - `1`;
2093	struct writeback_control wbc = {
2094	.sync_mode = WB_SYNC_ALL,
2095	.nr_to_write = `0`,
2096	.range_start = range_start,
2097	.range_end = range_end,
2098	};
2099	int ret;
2100
2101	trace_nfs_writeback_folio(inode, folio);
2102
2103	for (;;) {
2104	folio_wait_writeback(folio);
2105	if (folio_clear_dirty_for_io(folio)) {
2106	ret = nfs_writepage_locked(folio, wbc: &wbc);
2107	if (ret < `0`)
2108	goto out_error;
2109	continue;
2110	}
2111	ret = `0`;
2112	if (!folio_test_private(folio))
2113	break;
2114	ret = nfs_commit_inode(inode, FLUSH_SYNC);
2115	if (ret < `0`)
2116	goto out_error;
2117	}
2118	out_error:
2119	trace_nfs_writeback_folio_done(inode, folio, ret);
2120	return ret;
2121	}
2122
2123	#ifdef CONFIG_MIGRATION
2124	int nfs_migrate_folio(struct address_space mapping, struct* folio *dst,
2125	struct folio src, enum* migrate_mode mode)
2126	{
2127	/*
2128	* If the private flag is set, the folio is currently associated with
2129	* an in-progress read or write request. Don't try to migrate it.
2130	*
2131	* FIXME: we could do this in principle, but we'll need a way to ensure
2132	* that we can safely release the inode reference while holding
2133	* the folio lock.
2134	*/
2135	if (folio_test_private(folio: src))
2136	return -EBUSY;
2137
2138	if (folio_test_fscache(src)) {
2139	if (mode == MIGRATE_ASYNC)
2140	return -EBUSY;
2141	folio_wait_fscache(folio: src);
2142	}
2143
2144	return migrate_folio(mapping, dst, src, mode);
2145	}
2146	#endif
2147
2148	int __init nfs_init_writepagecache(void)
2149	{
2150	nfs_wdata_cachep = kmem_cache_create(name: "nfs_write_data",
2151	size: sizeof(struct nfs_pgio_header),
2152	align: `0`, SLAB_HWCACHE_ALIGN,
2153	NULL);
2154	if (nfs_wdata_cachep == NULL)
2155	return -ENOMEM;
2156
2157	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
2158	kc: nfs_wdata_cachep);
2159	if (nfs_wdata_mempool == NULL)
2160	goto out_destroy_write_cache;
2161
2162	nfs_cdata_cachep = kmem_cache_create(name: "nfs_commit_data",
2163	size: sizeof(struct nfs_commit_data),
2164	align: `0`, SLAB_HWCACHE_ALIGN,
2165	NULL);
2166	if (nfs_cdata_cachep == NULL)
2167	goto out_destroy_write_mempool;
2168
2169	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
2170	kc: nfs_cdata_cachep);
2171	if (nfs_commit_mempool == NULL)
2172	goto out_destroy_commit_cache;
2173
2174	/*
2175	* NFS congestion size, scale with available memory.
2176	*
2177	* 64MB: 8192k
2178	* 128MB: 11585k
2179	* 256MB: 16384k
2180	* 512MB: 23170k
2181	* 1GB: 32768k
2182	* 2GB: 46340k
2183	* 4GB: 65536k
2184	* 8GB: 92681k
2185	* 16GB: 131072k
2186	*
2187	* This allows larger machines to have larger/more transfers.
2188	* Limit the default to 256M
2189	*/
2190	nfs_congestion_kb = (`16`*int_sqrt(totalram_pages())) << (PAGE_SHIFT-`10`);
2191	if (nfs_congestion_kb > `256`*`1024`)
2192	nfs_congestion_kb = `256`*`1024`;
2193
2194	return `0`;
2195
2196	out_destroy_commit_cache:
2197	kmem_cache_destroy(s: nfs_cdata_cachep);
2198	out_destroy_write_mempool:
2199	mempool_destroy(pool: nfs_wdata_mempool);
2200	out_destroy_write_cache:
2201	kmem_cache_destroy(s: nfs_wdata_cachep);
2202	return -ENOMEM;
2203	}
2204
2205	void nfs_destroy_writepagecache(void)
2206	{
2207	mempool_destroy(pool: nfs_commit_mempool);
2208	kmem_cache_destroy(s: nfs_cdata_cachep);
2209	mempool_destroy(pool: nfs_wdata_mempool);
2210	kmem_cache_destroy(s: nfs_wdata_cachep);
2211	}
2212
2213	static const struct nfs_rw_ops nfs_rw_write_ops = {
2214	.rw_alloc_header = nfs_writehdr_alloc,
2215	.rw_free_header = nfs_writehdr_free,
2216	.rw_done = nfs_writeback_done,
2217	.rw_result = nfs_writeback_result,
2218	.rw_initiate = nfs_initiate_write,
2219	};
2220

source code of linux/fs/nfs/write.c