filecache.c source code [linux/fs/nfsd/filecache.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* The NFSD open file cache.
4	*
5	* (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
6	*
7	* An nfsd_file object is a per-file collection of open state that binds
8	* together:
9	* - a struct file *
10	* - a user credential
11	* - a network namespace
12	* - a read-ahead context
13	* - monitoring for writeback errors
14	*
15	* nfsd_file objects are reference-counted. Consumers acquire a new
16	* object via the nfsd_file_acquire API. They manage their interest in
17	* the acquired object, and hence the object's reference count, via
18	* nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
19	* object:
20	*
21	* * non-garbage-collected: When a consumer wants to precisely control
22	* the lifetime of a file's open state, it acquires a non-garbage-
23	* collected nfsd_file. The final nfsd_file_put releases the open
24	* state immediately.
25	*
26	* * garbage-collected: When a consumer does not control the lifetime
27	* of open state, it acquires a garbage-collected nfsd_file. The
28	* final nfsd_file_put allows the open state to linger for a period
29	* during which it may be re-used.
30	*/
31
32	#include <linux/hash.h>
33	#include <linux/slab.h>
34	#include <linux/file.h>
35	#include <linux/pagemap.h>
36	#include <linux/sched.h>
37	#include <linux/list_lru.h>
38	#include <linux/fsnotify_backend.h>
39	#include <linux/fsnotify.h>
40	#include <linux/seq_file.h>
41	#include <linux/rhashtable.h>
42
43	#include "vfs.h"
44	#include "nfsd.h"
45	#include "nfsfh.h"
46	#include "netns.h"
47	#include "filecache.h"
48	#include "trace.h"
49
50	#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
51
52	#define NFSD_FILE_CACHE_UP (0)
53
54	/ We only care about NFSD_MAY_READ/WRITE for this cache /
55	#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ\|NFSD_MAY_WRITE)
56
57	static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
58	static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
59	static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
60	static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
61	static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
62
63	struct nfsd_fcache_disposal {
64	struct work_struct work;
65	spinlock_t lock;
66	struct list_head freeme;
67	};
68
69	static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
70
71	static struct kmem_cache *nfsd_file_slab;
72	static struct kmem_cache *nfsd_file_mark_slab;
73	static struct list_lru nfsd_file_lru;
74	static unsigned long nfsd_file_flags;
75	static struct fsnotify_group *nfsd_file_fsnotify_group;
76	static struct delayed_work nfsd_filecache_laundrette;
77	static struct rhltable nfsd_file_rhltable
78	____cacheline_aligned_in_smp;
79
80	static bool
81	nfsd_match_cred(const struct cred c1, const* struct cred *c2)
82	{
83	int i;
84
85	if (!uid_eq(left: c1->fsuid, right: c2->fsuid))
86	return false;
87	if (!gid_eq(left: c1->fsgid, right: c2->fsgid))
88	return false;
89	if (c1->group_info == NULL \|\| c2->group_info == NULL)
90	return c1->group_info == c2->group_info;
91	if (c1->group_info->ngroups != c2->group_info->ngroups)
92	return false;
93	for (i = `0`; i < c1->group_info->ngroups; i++) {
94	if (!gid_eq(left: c1->group_info->gid[i], right: c2->group_info->gid[i]))
95	return false;
96	}
97	return true;
98	}
99
100	static const struct rhashtable_params nfsd_file_rhash_params = {
101	.key_len = sizeof_field(struct nfsd_file, nf_inode),
102	.key_offset = offsetof(struct nfsd_file, nf_inode),
103	.head_offset = offsetof(struct nfsd_file, nf_rlist),
104
105	/*
106	* Start with a single page hash table to reduce resizing churn
107	* on light workloads.
108	*/
109	.min_size = `256`,
110	.automatic_shrinking = true,
111	};
112
113	static void
114	nfsd_file_schedule_laundrette(void)
115	{
116	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
117	queue_delayed_work(wq: system_wq, dwork: &nfsd_filecache_laundrette,
118	NFSD_LAUNDRETTE_DELAY);
119	}
120
121	static void
122	nfsd_file_slab_free(struct rcu_head *rcu)
123	{
124	struct nfsd_file nf = container_of(rcu, struct* nfsd_file, nf_rcu);
125
126	put_cred(cred: nf->nf_cred);
127	kmem_cache_free(s: nfsd_file_slab, objp: nf);
128	}
129
130	static void
131	nfsd_file_mark_free(struct fsnotify_mark *mark)
132	{
133	struct nfsd_file_mark nfm = container_of(mark, struct* nfsd_file_mark,
134	nfm_mark);
135
136	kmem_cache_free(s: nfsd_file_mark_slab, objp: nfm);
137	}
138
139	static struct nfsd_file_mark *
140	nfsd_file_mark_get(struct nfsd_file_mark *nfm)
141	{
142	if (!refcount_inc_not_zero(r: &nfm->nfm_ref))
143	return NULL;
144	return nfm;
145	}
146
147	static void
148	nfsd_file_mark_put(struct nfsd_file_mark *nfm)
149	{
150	if (refcount_dec_and_test(r: &nfm->nfm_ref)) {
151	fsnotify_destroy_mark(mark: &nfm->nfm_mark, group: nfsd_file_fsnotify_group);
152	fsnotify_put_mark(mark: &nfm->nfm_mark);
153	}
154	}
155
156	static struct nfsd_file_mark *
157	nfsd_file_mark_find_or_create(struct nfsd_file nf, struct* inode *inode)
158	{
159	int err;
160	struct fsnotify_mark *mark;
161	struct nfsd_file_mark nfm = NULL, new;
162
163	do {
164	fsnotify_group_lock(group: nfsd_file_fsnotify_group);
165	mark = fsnotify_find_mark(connp: &inode->i_fsnotify_marks,
166	group: nfsd_file_fsnotify_group);
167	if (mark) {
168	nfm = nfsd_file_mark_get(container_of(mark,
169	struct nfsd_file_mark,
170	nfm_mark));
171	fsnotify_group_unlock(group: nfsd_file_fsnotify_group);
172	if (nfm) {
173	fsnotify_put_mark(mark);
174	break;
175	}
176	/ Avoid soft lockup race with nfsd_file_mark_put() /
177	fsnotify_destroy_mark(mark, group: nfsd_file_fsnotify_group);
178	fsnotify_put_mark(mark);
179	} else {
180	fsnotify_group_unlock(group: nfsd_file_fsnotify_group);
181	}
182
183	/ allocate a new nfm /
184	new = kmem_cache_alloc(cachep: nfsd_file_mark_slab, GFP_KERNEL);
185	if (!new)
186	return NULL;
187	fsnotify_init_mark(mark: &new->nfm_mark, group: nfsd_file_fsnotify_group);
188	new->nfm_mark.mask = FS_ATTRIB\|FS_DELETE_SELF;
189	refcount_set(r: &new->nfm_ref, n: `1`);
190
191	err = fsnotify_add_inode_mark(mark: &new->nfm_mark, inode, add_flags: `0`);
192
193	/*
194	* If the add was successful, then return the object.
195	* Otherwise, we need to put the reference we hold on the
196	* nfm_mark. The fsnotify code will take a reference and put
197	* it on failure, so we can't just free it directly. It's also
198	* not safe to call fsnotify_destroy_mark on it as the
199	* mark->group will be NULL. Thus, we can't let the nfm_ref
200	* counter drive the destruction at this point.
201	*/
202	if (likely(!err))
203	nfm = new;
204	else
205	fsnotify_put_mark(mark: &new->nfm_mark);
206	} while (unlikely(err == -EEXIST));
207
208	return nfm;
209	}
210
211	static struct nfsd_file *
212	nfsd_file_alloc(struct net net, struct* inode inode, unsigned* char need,
213	bool want_gc)
214	{
215	struct nfsd_file *nf;
216
217	nf = kmem_cache_alloc(cachep: nfsd_file_slab, GFP_KERNEL);
218	if (unlikely(!nf))
219	return NULL;
220
221	INIT_LIST_HEAD(list: &nf->nf_lru);
222	nf->nf_birthtime = ktime_get();
223	nf->nf_file = NULL;
224	nf->nf_cred = get_current_cred();
225	nf->nf_net = net;
226	nf->nf_flags = want_gc ?
227	BIT(NFSD_FILE_HASHED) \| BIT(NFSD_FILE_PENDING) \| BIT(NFSD_FILE_GC) :
228	BIT(NFSD_FILE_HASHED) \| BIT(NFSD_FILE_PENDING);
229	nf->nf_inode = inode;
230	refcount_set(r: &nf->nf_ref, n: `1`);
231	nf->nf_may = need;
232	nf->nf_mark = NULL;
233	return nf;
234	}
235
236	/**
237	* nfsd_file_check_write_error - check for writeback errors on a file
238	* @nf: nfsd_file to check for writeback errors
239	*
240	* Check whether a nfsd_file has an unseen error. Reset the write
241	* verifier if so.
242	*/
243	static void
244	nfsd_file_check_write_error(struct nfsd_file *nf)
245	{
246	struct file *file = nf->nf_file;
247
248	if ((file->f_mode & FMODE_WRITE) &&
249	filemap_check_wb_err(mapping: file->f_mapping, READ_ONCE(file->f_wb_err)))
250	nfsd_reset_write_verifier(nn: net_generic(net: nf->nf_net, id: nfsd_net_id));
251	}
252
253	static void
254	nfsd_file_hash_remove(struct nfsd_file *nf)
255	{
256	trace_nfsd_file_unhash(nf);
257	rhltable_remove(hlt: &nfsd_file_rhltable, list: &nf->nf_rlist,
258	params: nfsd_file_rhash_params);
259	}
260
261	static bool
262	nfsd_file_unhash(struct nfsd_file *nf)
263	{
264	if (test_and_clear_bit(NFSD_FILE_HASHED, addr: &nf->nf_flags)) {
265	nfsd_file_hash_remove(nf);
266	return true;
267	}
268	return false;
269	}
270
271	static void
272	nfsd_file_free(struct nfsd_file *nf)
273	{
274	s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
275
276	trace_nfsd_file_free(nf);
277
278	this_cpu_inc(nfsd_file_releases);
279	this_cpu_add(nfsd_file_total_age, age);
280
281	nfsd_file_unhash(nf);
282	if (nf->nf_mark)
283	nfsd_file_mark_put(nfm: nf->nf_mark);
284	if (nf->nf_file) {
285	nfsd_file_check_write_error(nf);
286	filp_close(nf->nf_file, NULL);
287	}
288
289	/*
290	* If this item is still linked via nf_lru, that's a bug.
291	* WARN and leak it to preserve system stability.
292	*/
293	if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
294	return;
295
296	call_rcu(head: &nf->nf_rcu, func: nfsd_file_slab_free);
297	}
298
299	static bool
300	nfsd_file_check_writeback(struct nfsd_file *nf)
301	{
302	struct file *file = nf->nf_file;
303	struct address_space *mapping;
304
305	/ File not open for write? /
306	if (!(file->f_mode & FMODE_WRITE))
307	return false;
308
309	/*
310	* Some filesystems (e.g. NFS) flush all dirty data on close.
311	* On others, there is no need to wait for writeback.
312	*/
313	if (!(file_inode(f: file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
314	return false;
315
316	mapping = file->f_mapping;
317	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) \|\|
318	mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
319	}
320
321
322	static bool nfsd_file_lru_add(struct nfsd_file *nf)
323	{
324	set_bit(NFSD_FILE_REFERENCED, addr: &nf->nf_flags);
325	if (list_lru_add(lru: &nfsd_file_lru, item: &nf->nf_lru)) {
326	trace_nfsd_file_lru_add(nf);
327	return true;
328	}
329	return false;
330	}
331
332	static bool nfsd_file_lru_remove(struct nfsd_file *nf)
333	{
334	if (list_lru_del(lru: &nfsd_file_lru, item: &nf->nf_lru)) {
335	trace_nfsd_file_lru_del(nf);
336	return true;
337	}
338	return false;
339	}
340
341	struct nfsd_file *
342	nfsd_file_get(struct nfsd_file *nf)
343	{
344	if (nf && refcount_inc_not_zero(r: &nf->nf_ref))
345	return nf;
346	return NULL;
347	}
348
349	/**
350	* nfsd_file_put - put the reference to a nfsd_file
351	* @nf: nfsd_file of which to put the reference
352	*
353	* Put a reference to a nfsd_file. In the non-GC case, we just put the
354	* reference immediately. In the GC case, if the reference would be
355	* the last one, the put it on the LRU instead to be cleaned up later.
356	*/
357	void
358	nfsd_file_put(struct nfsd_file *nf)
359	{
360	might_sleep();
361	trace_nfsd_file_put(nf);
362
363	if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
364	test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
365	/*
366	* If this is the last reference (nf_ref == 1), then try to
367	* transfer it to the LRU.
368	*/
369	if (refcount_dec_not_one(r: &nf->nf_ref))
370	return;
371
372	/ Try to add it to the LRU. If that fails, decrement. /
373	if (nfsd_file_lru_add(nf)) {
374	/ If it's still hashed, we're done /
375	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
376	nfsd_file_schedule_laundrette();
377	return;
378	}
379
380	/*
381	* We're racing with unhashing, so try to remove it from
382	* the LRU. If removal fails, then someone else already
383	* has our reference.
384	*/
385	if (!nfsd_file_lru_remove(nf))
386	return;
387	}
388	}
389	if (refcount_dec_and_test(r: &nf->nf_ref))
390	nfsd_file_free(nf);
391	}
392
393	static void
394	nfsd_file_dispose_list(struct list_head *dispose)
395	{
396	struct nfsd_file *nf;
397
398	while (!list_empty(head: dispose)) {
399	nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
400	list_del_init(entry: &nf->nf_lru);
401	nfsd_file_free(nf);
402	}
403	}
404
405	/**
406	* nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
407	* @dispose: list of nfsd_files to be disposed
408	*
409	* Transfers each file to the "freeme" list for its nfsd_net, to eventually
410	* be disposed of by the per-net garbage collector.
411	*/
412	static void
413	nfsd_file_dispose_list_delayed(struct list_head *dispose)
414	{
415	while(!list_empty(head: dispose)) {
416	struct nfsd_file *nf = list_first_entry(dispose,
417	struct nfsd_file, nf_lru);
418	struct nfsd_net *nn = net_generic(net: nf->nf_net, id: nfsd_net_id);
419	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
420
421	spin_lock(lock: &l->lock);
422	list_move_tail(list: &nf->nf_lru, head: &l->freeme);
423	spin_unlock(lock: &l->lock);
424	queue_work(wq: nfsd_filecache_wq, work: &l->work);
425	}
426	}
427
428	/**
429	* nfsd_file_lru_cb - Examine an entry on the LRU list
430	* @item: LRU entry to examine
431	* @lru: controlling LRU
432	* @lock: LRU list lock (unused)
433	* @arg: dispose list
434	*
435	* Return values:
436	* %LRU_REMOVED: @item was removed from the LRU
437	* %LRU_ROTATE: @item is to be moved to the LRU tail
438	* %LRU_SKIP: @item cannot be evicted
439	*/
440	static enum lru_status
441	nfsd_file_lru_cb(struct list_head item, struct* list_lru_one *lru,
442	spinlock_t lock, void* *arg)
443	__releases(lock)
444	__acquires(lock)
445	{
446	struct list_head *head = arg;
447	struct nfsd_file nf = list_entry(item, struct* nfsd_file, nf_lru);
448
449	/ We should only be dealing with GC entries here /
450	WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
451
452	/*
453	* Don't throw out files that are still undergoing I/O or
454	* that have uncleared errors pending.
455	*/
456	if (nfsd_file_check_writeback(nf)) {
457	trace_nfsd_file_gc_writeback(nf);
458	return LRU_SKIP;
459	}
460
461	/ If it was recently added to the list, skip it /
462	if (test_and_clear_bit(NFSD_FILE_REFERENCED, addr: &nf->nf_flags)) {
463	trace_nfsd_file_gc_referenced(nf);
464	return LRU_ROTATE;
465	}
466
467	/*
468	* Put the reference held on behalf of the LRU. If it wasn't the last
469	* one, then just remove it from the LRU and ignore it.
470	*/
471	if (!refcount_dec_and_test(r: &nf->nf_ref)) {
472	trace_nfsd_file_gc_in_use(nf);
473	list_lru_isolate(list: lru, item: &nf->nf_lru);
474	return LRU_REMOVED;
475	}
476
477	/ Refcount went to zero. Unhash it and queue it to the dispose list /
478	nfsd_file_unhash(nf);
479	list_lru_isolate_move(list: lru, item: &nf->nf_lru, head);
480	this_cpu_inc(nfsd_file_evictions);
481	trace_nfsd_file_gc_disposed(nf);
482	return LRU_REMOVED;
483	}
484
485	static void
486	nfsd_file_gc(void)
487	{
488	LIST_HEAD(dispose);
489	unsigned long ret;
490
491	ret = list_lru_walk(lru: &nfsd_file_lru, isolate: nfsd_file_lru_cb,
492	cb_arg: &dispose, nr_to_walk: list_lru_count(lru: &nfsd_file_lru));
493	trace_nfsd_file_gc_removed(removed: ret, remaining: list_lru_count(lru: &nfsd_file_lru));
494	nfsd_file_dispose_list_delayed(dispose: &dispose);
495	}
496
497	static void
498	nfsd_file_gc_worker(struct work_struct *work)
499	{
500	nfsd_file_gc();
501	if (list_lru_count(lru: &nfsd_file_lru))
502	nfsd_file_schedule_laundrette();
503	}
504
505	static unsigned long
506	nfsd_file_lru_count(struct shrinker s, struct* shrink_control *sc)
507	{
508	return list_lru_count(lru: &nfsd_file_lru);
509	}
510
511	static unsigned long
512	nfsd_file_lru_scan(struct shrinker s, struct* shrink_control *sc)
513	{
514	LIST_HEAD(dispose);
515	unsigned long ret;
516
517	ret = list_lru_shrink_walk(lru: &nfsd_file_lru, sc,
518	isolate: nfsd_file_lru_cb, cb_arg: &dispose);
519	trace_nfsd_file_shrinker_removed(removed: ret, remaining: list_lru_count(lru: &nfsd_file_lru));
520	nfsd_file_dispose_list_delayed(dispose: &dispose);
521	return ret;
522	}
523
524	static struct shrinker *nfsd_file_shrinker;
525
526	/**
527	* nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
528	* @nf: nfsd_file to attempt to queue
529	* @dispose: private list to queue successfully-put objects
530	*
531	* Unhash an nfsd_file, try to get a reference to it, and then put that
532	* reference. If it's the last reference, queue it to the dispose list.
533	*/
534	static void
535	nfsd_file_cond_queue(struct nfsd_file nf, struct* list_head *dispose)
536	__must_hold(RCU)
537	{
538	int decrement = `1`;
539
540	/ If we raced with someone else unhashing, ignore it /
541	if (!nfsd_file_unhash(nf))
542	return;
543
544	/ If we can't get a reference, ignore it /
545	if (!nfsd_file_get(nf))
546	return;
547
548	/ Extra decrement if we remove from the LRU /
549	if (nfsd_file_lru_remove(nf))
550	++decrement;
551
552	/ If refcount goes to 0, then put on the dispose list /
553	if (refcount_sub_and_test(i: decrement, r: &nf->nf_ref)) {
554	list_add(new: &nf->nf_lru, head: dispose);
555	trace_nfsd_file_closing(nf);
556	}
557	}
558
559	/**
560	* nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
561	* @inode: inode on which to close out nfsd_files
562	* @dispose: list on which to gather nfsd_files to close out
563	*
564	* An nfsd_file represents a struct file being held open on behalf of nfsd.
565	* An open file however can block other activity (such as leases), or cause
566	* undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
567	*
568	* This function is intended to find open nfsd_files when this sort of
569	* conflicting access occurs and then attempt to close those files out.
570	*
571	* Populates the dispose list with entries that have already had their
572	* refcounts go to zero. The actual free of an nfsd_file can be expensive,
573	* so we leave it up to the caller whether it wants to wait or not.
574	*/
575	static void
576	nfsd_file_queue_for_close(struct inode inode, struct* list_head *dispose)
577	{
578	struct rhlist_head tmp, list;
579	struct nfsd_file *nf;
580
581	rcu_read_lock();
582	list = rhltable_lookup(hlt: &nfsd_file_rhltable, key: &inode,
583	params: nfsd_file_rhash_params);
584	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
585	if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
586	continue;
587	nfsd_file_cond_queue(nf, dispose);
588	}
589	rcu_read_unlock();
590	}
591
592	/**
593	* nfsd_file_close_inode - attempt a delayed close of a nfsd_file
594	* @inode: inode of the file to attempt to remove
595	*
596	* Close out any open nfsd_files that can be reaped for @inode. The
597	* actual freeing is deferred to the dispose_list_delayed infrastructure.
598	*
599	* This is used by the fsnotify callbacks and setlease notifier.
600	*/
601	static void
602	nfsd_file_close_inode(struct inode *inode)
603	{
604	LIST_HEAD(dispose);
605
606	nfsd_file_queue_for_close(inode, dispose: &dispose);
607	nfsd_file_dispose_list_delayed(dispose: &dispose);
608	}
609
610	/**
611	* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
612	* @inode: inode of the file to attempt to remove
613	*
614	* Close out any open nfsd_files that can be reaped for @inode. The
615	* nfsd_files are closed out synchronously.
616	*
617	* This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
618	* when reexporting NFS.
619	*/
620	void
621	nfsd_file_close_inode_sync(struct inode *inode)
622	{
623	struct nfsd_file *nf;
624	LIST_HEAD(dispose);
625
626	trace_nfsd_file_close(inode);
627
628	nfsd_file_queue_for_close(inode, dispose: &dispose);
629	while (!list_empty(head: &dispose)) {
630	nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
631	list_del_init(entry: &nf->nf_lru);
632	nfsd_file_free(nf);
633	}
634	flush_delayed_fput();
635	}
636
637	/**
638	* nfsd_file_delayed_close - close unused nfsd_files
639	* @work: dummy
640	*
641	* Scrape the freeme list for this nfsd_net, and then dispose of them
642	* all.
643	*/
644	static void
645	nfsd_file_delayed_close(struct work_struct *work)
646	{
647	LIST_HEAD(head);
648	struct nfsd_fcache_disposal *l = container_of(work,
649	struct nfsd_fcache_disposal, work);
650
651	spin_lock(lock: &l->lock);
652	list_splice_init(list: &l->freeme, head: &head);
653	spin_unlock(lock: &l->lock);
654
655	nfsd_file_dispose_list(dispose: &head);
656	}
657
658	static int
659	nfsd_file_lease_notifier_call(struct notifier_block nb, unsigned* long arg,
660	void *data)
661	{
662	struct file_lock *fl = data;
663
664	/ Only close files for F_SETLEASE leases /
665	if (fl->fl_flags & FL_LEASE)
666	nfsd_file_close_inode(inode: file_inode(f: fl->fl_file));
667	return `0`;
668	}
669
670	static struct notifier_block nfsd_file_lease_notifier = {
671	.notifier_call = nfsd_file_lease_notifier_call,
672	};
673
674	static int
675	nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
676	struct inode inode, struct* inode *dir,
677	const struct qstr *name, u32 cookie)
678	{
679	if (WARN_ON_ONCE(!inode))
680	return `0`;
681
682	trace_nfsd_file_fsnotify_handle_event(inode, mask);
683
684	/ Should be no marks on non-regular files /
685	if (!S_ISREG(inode->i_mode)) {
686	WARN_ON_ONCE(`1`);
687	return `0`;
688	}
689
690	/ don't close files if this was not the last link /
691	if (mask & FS_ATTRIB) {
692	if (inode->i_nlink)
693	return `0`;
694	}
695
696	nfsd_file_close_inode(inode);
697	return `0`;
698	}
699
700
701	static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
702	.handle_inode_event = nfsd_file_fsnotify_handle_event,
703	.free_mark = nfsd_file_mark_free,
704	};
705
706	int
707	nfsd_file_cache_init(void)
708	{
709	int ret;
710
711	lockdep_assert_held(&nfsd_mutex);
712	if (test_and_set_bit(NFSD_FILE_CACHE_UP, addr: &nfsd_file_flags) == `1`)
713	return `0`;
714
715	ret = rhltable_init(hlt: &nfsd_file_rhltable, params: &nfsd_file_rhash_params);
716	if (ret)
717	return ret;
718
719	ret = -ENOMEM;
720	nfsd_filecache_wq = alloc_workqueue(fmt: "nfsd_filecache", flags: `0`, max_active: `0`);
721	if (!nfsd_filecache_wq)
722	goto out;
723
724	nfsd_file_slab = kmem_cache_create(name: "nfsd_file",
725	size: sizeof(struct nfsd_file), align: `0`, flags: `0`, NULL);
726	if (!nfsd_file_slab) {
727	pr_err("nfsd: unable to create nfsd_file_slab\n");
728	goto out_err;
729	}
730
731	nfsd_file_mark_slab = kmem_cache_create(name: "nfsd_file_mark",
732	size: sizeof(struct nfsd_file_mark), align: `0`, flags: `0`, NULL);
733	if (!nfsd_file_mark_slab) {
734	pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
735	goto out_err;
736	}
737
738
739	ret = list_lru_init(&nfsd_file_lru);
740	if (ret) {
741	pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
742	goto out_err;
743	}
744
745	nfsd_file_shrinker = shrinker_alloc(flags: `0`, fmt: "nfsd-filecache");
746	if (!nfsd_file_shrinker) {
747	ret = -ENOMEM;
748	pr_err("nfsd: failed to allocate nfsd_file_shrinker\n");
749	goto out_lru;
750	}
751
752	nfsd_file_shrinker->count_objects = nfsd_file_lru_count;
753	nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan;
754	nfsd_file_shrinker->seeks = `1`;
755
756	shrinker_register(shrinker: nfsd_file_shrinker);
757
758	ret = lease_register_notifier(&nfsd_file_lease_notifier);
759	if (ret) {
760	pr_err("nfsd: unable to register lease notifier: %d\n", ret);
761	goto out_shrinker;
762	}
763
764	nfsd_file_fsnotify_group = fsnotify_alloc_group(ops: &nfsd_file_fsnotify_ops,
765	FSNOTIFY_GROUP_NOFS);
766	if (IS_ERR(ptr: nfsd_file_fsnotify_group)) {
767	pr_err("nfsd: unable to create fsnotify group: %ld\n",
768	PTR_ERR(nfsd_file_fsnotify_group));
769	ret = PTR_ERR(ptr: nfsd_file_fsnotify_group);
770	nfsd_file_fsnotify_group = NULL;
771	goto out_notifier;
772	}
773
774	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
775	out:
776	return ret;
777	out_notifier:
778	lease_unregister_notifier(&nfsd_file_lease_notifier);
779	out_shrinker:
780	shrinker_free(shrinker: nfsd_file_shrinker);
781	out_lru:
782	list_lru_destroy(lru: &nfsd_file_lru);
783	out_err:
784	kmem_cache_destroy(s: nfsd_file_slab);
785	nfsd_file_slab = NULL;
786	kmem_cache_destroy(s: nfsd_file_mark_slab);
787	nfsd_file_mark_slab = NULL;
788	destroy_workqueue(wq: nfsd_filecache_wq);
789	nfsd_filecache_wq = NULL;
790	rhltable_destroy(hlt: &nfsd_file_rhltable);
791	goto out;
792	}
793
794	/**
795	* __nfsd_file_cache_purge: clean out the cache for shutdown
796	* @net: net-namespace to shut down the cache (may be NULL)
797	*
798	* Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
799	* then close out everything. Called when an nfsd instance is being shut down,
800	* and when the exports table is flushed.
801	*/
802	static void
803	__nfsd_file_cache_purge(struct net *net)
804	{
805	struct rhashtable_iter iter;
806	struct nfsd_file *nf;
807	LIST_HEAD(dispose);
808
809	rhltable_walk_enter(hlt: &nfsd_file_rhltable, iter: &iter);
810	do {
811	rhashtable_walk_start(iter: &iter);
812
813	nf = rhashtable_walk_next(iter: &iter);
814	while (!IS_ERR_OR_NULL(ptr: nf)) {
815	if (!net \|\| nf->nf_net == net)
816	nfsd_file_cond_queue(nf, dispose: &dispose);
817	nf = rhashtable_walk_next(iter: &iter);
818	}
819
820	rhashtable_walk_stop(iter: &iter);
821	} while (nf == ERR_PTR(error: -EAGAIN));
822	rhashtable_walk_exit(iter: &iter);
823
824	nfsd_file_dispose_list(dispose: &dispose);
825	}
826
827	static struct nfsd_fcache_disposal *
828	nfsd_alloc_fcache_disposal(void)
829	{
830	struct nfsd_fcache_disposal *l;
831
832	l = kmalloc(size: sizeof(*l), GFP_KERNEL);
833	if (!l)
834	return NULL;
835	INIT_WORK(&l->work, nfsd_file_delayed_close);
836	spin_lock_init(&l->lock);
837	INIT_LIST_HEAD(list: &l->freeme);
838	return l;
839	}
840
841	static void
842	nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
843	{
844	cancel_work_sync(work: &l->work);
845	nfsd_file_dispose_list(dispose: &l->freeme);
846	kfree(objp: l);
847	}
848
849	static void
850	nfsd_free_fcache_disposal_net(struct net *net)
851	{
852	struct nfsd_net *nn = net_generic(net, id: nfsd_net_id);
853	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
854
855	nfsd_free_fcache_disposal(l);
856	}
857
858	int
859	nfsd_file_cache_start_net(struct net *net)
860	{
861	struct nfsd_net *nn = net_generic(net, id: nfsd_net_id);
862
863	nn->fcache_disposal = nfsd_alloc_fcache_disposal();
864	return nn->fcache_disposal ? `0` : -ENOMEM;
865	}
866
867	/**
868	* nfsd_file_cache_purge - Remove all cache items associated with @net
869	* @net: target net namespace
870	*
871	*/
872	void
873	nfsd_file_cache_purge(struct net *net)
874	{
875	lockdep_assert_held(&nfsd_mutex);
876	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == `1`)
877	__nfsd_file_cache_purge(net);
878	}
879
880	void
881	nfsd_file_cache_shutdown_net(struct net *net)
882	{
883	nfsd_file_cache_purge(net);
884	nfsd_free_fcache_disposal_net(net);
885	}
886
887	void
888	nfsd_file_cache_shutdown(void)
889	{
890	int i;
891
892	lockdep_assert_held(&nfsd_mutex);
893	if (test_and_clear_bit(NFSD_FILE_CACHE_UP, addr: &nfsd_file_flags) == `0`)
894	return;
895
896	lease_unregister_notifier(&nfsd_file_lease_notifier);
897	shrinker_free(shrinker: nfsd_file_shrinker);
898	/*
899	* make sure all callers of nfsd_file_lru_cb are done before
900	* calling nfsd_file_cache_purge
901	*/
902	cancel_delayed_work_sync(dwork: &nfsd_filecache_laundrette);
903	__nfsd_file_cache_purge(NULL);
904	list_lru_destroy(lru: &nfsd_file_lru);
905	rcu_barrier();
906	fsnotify_put_group(group: nfsd_file_fsnotify_group);
907	nfsd_file_fsnotify_group = NULL;
908	kmem_cache_destroy(s: nfsd_file_slab);
909	nfsd_file_slab = NULL;
910	fsnotify_wait_marks_destroyed();
911	kmem_cache_destroy(s: nfsd_file_mark_slab);
912	nfsd_file_mark_slab = NULL;
913	destroy_workqueue(wq: nfsd_filecache_wq);
914	nfsd_filecache_wq = NULL;
915	rhltable_destroy(hlt: &nfsd_file_rhltable);
916
917	for_each_possible_cpu(i) {
918	per_cpu(nfsd_file_cache_hits, i) = `0`;
919	per_cpu(nfsd_file_acquisitions, i) = `0`;
920	per_cpu(nfsd_file_releases, i) = `0`;
921	per_cpu(nfsd_file_total_age, i) = `0`;
922	per_cpu(nfsd_file_evictions, i) = `0`;
923	}
924	}
925
926	static struct nfsd_file *
927	nfsd_file_lookup_locked(const struct net net, const* struct cred *cred,
928	struct inode inode, unsigned* char need,
929	bool want_gc)
930	{
931	struct rhlist_head tmp, list;
932	struct nfsd_file *nf;
933
934	list = rhltable_lookup(hlt: &nfsd_file_rhltable, key: &inode,
935	params: nfsd_file_rhash_params);
936	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
937	if (nf->nf_may != need)
938	continue;
939	if (nf->nf_net != net)
940	continue;
941	if (!nfsd_match_cred(c1: nf->nf_cred, c2: cred))
942	continue;
943	if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
944	continue;
945	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == `0`)
946	continue;
947
948	if (!nfsd_file_get(nf))
949	continue;
950	return nf;
951	}
952	return NULL;
953	}
954
955	/**
956	* nfsd_file_is_cached - are there any cached open files for this inode?
957	* @inode: inode to check
958	*
959	* The lookup matches inodes in all net namespaces and is atomic wrt
960	* nfsd_file_acquire().
961	*
962	* Return values:
963	* %true: filecache contains at least one file matching this inode
964	* %false: filecache contains no files matching this inode
965	*/
966	bool
967	nfsd_file_is_cached(struct inode *inode)
968	{
969	struct rhlist_head tmp, list;
970	struct nfsd_file *nf;
971	bool ret = false;
972
973	rcu_read_lock();
974	list = rhltable_lookup(hlt: &nfsd_file_rhltable, key: &inode,
975	params: nfsd_file_rhash_params);
976	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
977	if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
978	ret = true;
979	break;
980	}
981	rcu_read_unlock();
982
983	trace_nfsd_file_is_cached(inode, found: (int)ret);
984	return ret;
985	}
986
987	static __be32
988	nfsd_file_do_acquire(struct svc_rqst rqstp, struct* svc_fh *fhp,
989	unsigned int may_flags, struct file *file,
990	struct nfsd_file **pnf, bool want_gc)
991	{
992	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
993	struct net *net = SVC_NET(rqstp);
994	struct nfsd_file new, nf;
995	bool stale_retry = true;
996	bool open_retry = true;
997	struct inode *inode;
998	__be32 status;
999	int ret;
1000
1001	retry:
1002	status = fh_verify(rqstp, fhp, S_IFREG,
1003	may_flags\|NFSD_MAY_OWNER_OVERRIDE);
1004	if (status != nfs_ok)
1005	return status;
1006	inode = d_inode(dentry: fhp->fh_dentry);
1007
1008	rcu_read_lock();
1009	nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
1010	rcu_read_unlock();
1011
1012	if (nf) {
1013	/*
1014	* If the nf is on the LRU then it holds an extra reference
1015	* that must be put if it's removed. It had better not be
1016	* the last one however, since we should hold another.
1017	*/
1018	if (nfsd_file_lru_remove(nf))
1019	WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
1020	goto wait_for_construction;
1021	}
1022
1023	new = nfsd_file_alloc(net, inode, need, want_gc);
1024	if (!new) {
1025	status = nfserr_jukebox;
1026	goto out;
1027	}
1028
1029	rcu_read_lock();
1030	spin_lock(lock: &inode->i_lock);
1031	nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
1032	if (unlikely(nf)) {
1033	spin_unlock(lock: &inode->i_lock);
1034	rcu_read_unlock();
1035	nfsd_file_slab_free(rcu: &new->nf_rcu);
1036	goto wait_for_construction;
1037	}
1038	nf = new;
1039	ret = rhltable_insert(hlt: &nfsd_file_rhltable, list: &nf->nf_rlist,
1040	params: nfsd_file_rhash_params);
1041	spin_unlock(lock: &inode->i_lock);
1042	rcu_read_unlock();
1043	if (likely(ret == `0`))
1044	goto open_file;
1045
1046	if (ret == -EEXIST)
1047	goto retry;
1048	trace_nfsd_file_insert_err(rqstp, inode, may_flags, error: ret);
1049	status = nfserr_jukebox;
1050	goto construction_err;
1051
1052	wait_for_construction:
1053	wait_on_bit(word: &nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
1054
1055	/ Did construction of this file fail? /
1056	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
1057	trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
1058	if (!open_retry) {
1059	status = nfserr_jukebox;
1060	goto construction_err;
1061	}
1062	open_retry = false;
1063	fh_put(fhp);
1064	goto retry;
1065	}
1066	this_cpu_inc(nfsd_file_cache_hits);
1067
1068	status = nfserrno(errno: nfsd_open_break_lease(file_inode(f: nf->nf_file), may_flags));
1069	if (status != nfs_ok) {
1070	nfsd_file_put(nf);
1071	nf = NULL;
1072	}
1073
1074	out:
1075	if (status == nfs_ok) {
1076	this_cpu_inc(nfsd_file_acquisitions);
1077	nfsd_file_check_write_error(nf);
1078	*pnf = nf;
1079	}
1080	trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
1081	return status;
1082
1083	open_file:
1084	trace_nfsd_file_alloc(nf);
1085	nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
1086	if (nf->nf_mark) {
1087	if (file) {
1088	get_file(f: file);
1089	nf->nf_file = file;
1090	status = nfs_ok;
1091	trace_nfsd_file_opened(nf, status);
1092	} else {
1093	ret = nfsd_open_verified(rqstp, fhp, may_flags,
1094	filp: &nf->nf_file);
1095	if (ret == -EOPENSTALE && stale_retry) {
1096	stale_retry = false;
1097	nfsd_file_unhash(nf);
1098	clear_and_wake_up_bit(NFSD_FILE_PENDING,
1099	word: &nf->nf_flags);
1100	if (refcount_dec_and_test(r: &nf->nf_ref))
1101	nfsd_file_free(nf);
1102	nf = NULL;
1103	fh_put(fhp);
1104	goto retry;
1105	}
1106	status = nfserrno(errno: ret);
1107	trace_nfsd_file_open(nf, status);
1108	}
1109	} else
1110	status = nfserr_jukebox;
1111	/*
1112	* If construction failed, or we raced with a call to unlink()
1113	* then unhash.
1114	*/
1115	if (status != nfs_ok \|\| inode->i_nlink == `0`)
1116	nfsd_file_unhash(nf);
1117	clear_and_wake_up_bit(NFSD_FILE_PENDING, word: &nf->nf_flags);
1118	if (status == nfs_ok)
1119	goto out;
1120
1121	construction_err:
1122	if (refcount_dec_and_test(r: &nf->nf_ref))
1123	nfsd_file_free(nf);
1124	nf = NULL;
1125	goto out;
1126	}
1127
1128	/**
1129	* nfsd_file_acquire_gc - Get a struct nfsd_file with an open file
1130	* @rqstp: the RPC transaction being executed
1131	* @fhp: the NFS filehandle of the file to be opened
1132	* @may_flags: NFSD_MAY_ settings for the file
1133	* @pnf: OUT: new or found "struct nfsd_file" object
1134	*
1135	* The nfsd_file object returned by this API is reference-counted
1136	* and garbage-collected. The object is retained for a few
1137	* seconds after the final nfsd_file_put() in case the caller
1138	* wants to re-use it.
1139	*
1140	* Return values:
1141	* %nfs_ok - @pnf points to an nfsd_file with its reference
1142	* count boosted.
1143	*
1144	* On error, an nfsstat value in network byte order is returned.
1145	*/
1146	__be32
1147	nfsd_file_acquire_gc(struct svc_rqst rqstp, struct* svc_fh *fhp,
1148	unsigned int may_flags, struct nfsd_file **pnf)
1149	{
1150	return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, want_gc: true);
1151	}
1152
1153	/**
1154	* nfsd_file_acquire - Get a struct nfsd_file with an open file
1155	* @rqstp: the RPC transaction being executed
1156	* @fhp: the NFS filehandle of the file to be opened
1157	* @may_flags: NFSD_MAY_ settings for the file
1158	* @pnf: OUT: new or found "struct nfsd_file" object
1159	*
1160	* The nfsd_file_object returned by this API is reference-counted
1161	* but not garbage-collected. The object is unhashed after the
1162	* final nfsd_file_put().
1163	*
1164	* Return values:
1165	* %nfs_ok - @pnf points to an nfsd_file with its reference
1166	* count boosted.
1167	*
1168	* On error, an nfsstat value in network byte order is returned.
1169	*/
1170	__be32
1171	nfsd_file_acquire(struct svc_rqst rqstp, struct* svc_fh *fhp,
1172	unsigned int may_flags, struct nfsd_file **pnf)
1173	{
1174	return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, want_gc: false);
1175	}
1176
1177	/**
1178	* nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
1179	* @rqstp: the RPC transaction being executed
1180	* @fhp: the NFS filehandle of the file just created
1181	* @may_flags: NFSD_MAY_ settings for the file
1182	* @file: cached, already-open file (may be NULL)
1183	* @pnf: OUT: new or found "struct nfsd_file" object
1184	*
1185	* Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
1186	* and @file is non-NULL, use it to instantiate a new nfsd_file instead of
1187	* opening a new one.
1188	*
1189	* Return values:
1190	* %nfs_ok - @pnf points to an nfsd_file with its reference
1191	* count boosted.
1192	*
1193	* On error, an nfsstat value in network byte order is returned.
1194	*/
1195	__be32
1196	nfsd_file_acquire_opened(struct svc_rqst rqstp, struct* svc_fh *fhp,
1197	unsigned int may_flags, struct file *file,
1198	struct nfsd_file **pnf)
1199	{
1200	return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, want_gc: false);
1201	}
1202
1203	/*
1204	* Note that fields may be added, removed or reordered in the future. Programs
1205	* scraping this file for info should test the labels to ensure they're
1206	* getting the correct field.
1207	*/
1208	int nfsd_file_cache_stats_show(struct seq_file m, void* *v)
1209	{
1210	unsigned long releases = `0`, evictions = `0`;
1211	unsigned long hits = `0`, acquisitions = `0`;
1212	unsigned int i, count = `0`, buckets = `0`;
1213	unsigned long lru = `0`, total_age = `0`;
1214
1215	/ Serialize with server shutdown /
1216	mutex_lock(&nfsd_mutex);
1217	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == `1`) {
1218	struct bucket_table *tbl;
1219	struct rhashtable *ht;
1220
1221	lru = list_lru_count(lru: &nfsd_file_lru);
1222
1223	rcu_read_lock();
1224	ht = &nfsd_file_rhltable.ht;
1225	count = atomic_read(v: &ht->nelems);
1226	tbl = rht_dereference_rcu(ht->tbl, ht);
1227	buckets = tbl->size;
1228	rcu_read_unlock();
1229	}
1230	mutex_unlock(lock: &nfsd_mutex);
1231
1232	for_each_possible_cpu(i) {
1233	hits += per_cpu(nfsd_file_cache_hits, i);
1234	acquisitions += per_cpu(nfsd_file_acquisitions, i);
1235	releases += per_cpu(nfsd_file_releases, i);
1236	total_age += per_cpu(nfsd_file_total_age, i);
1237	evictions += per_cpu(nfsd_file_evictions, i);
1238	}
1239
1240	seq_printf(m, fmt: "total inodes: %u\n", count);
1241	seq_printf(m, fmt: "hash buckets: %u\n", buckets);
1242	seq_printf(m, fmt: "lru entries: %lu\n", lru);
1243	seq_printf(m, fmt: "cache hits: %lu\n", hits);
1244	seq_printf(m, fmt: "acquisitions: %lu\n", acquisitions);
1245	seq_printf(m, fmt: "releases: %lu\n", releases);
1246	seq_printf(m, fmt: "evictions: %lu\n", evictions);
1247	if (releases)
1248	seq_printf(m, fmt: "mean age (ms): %ld\n", total_age / releases);
1249	else
1250	seq_printf(m, fmt: "mean age (ms): -\n");
1251	return `0`;
1252	}
1253

source code of linux/fs/nfsd/filecache.c