locks.c source code [linux/fs/locks.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* linux/fs/locks.c
4	*
5	* We implement four types of file locks: BSD locks, posix locks, open
6	* file description locks, and leases. For details about BSD locks,
7	* see the flock(2) man page; for details about the other three, see
8	* fcntl(2).
9	*
10	*
11	* Locking conflicts and dependencies:
12	* If multiple threads attempt to lock the same byte (or flock the same file)
13	* only one can be granted the lock, and other must wait their turn.
14	* The first lock has been "applied" or "granted", the others are "waiting"
15	* and are "blocked" by the "applied" lock..
16	*
17	* Waiting and applied locks are all kept in trees whose properties are:
18	*
19	* - the root of a tree may be an applied or waiting lock.
20	* - every other node in the tree is a waiting lock that
21	* conflicts with every ancestor of that node.
22	*
23	* Every such tree begins life as a waiting singleton which obviously
24	* satisfies the above properties.
25	*
26	* The only ways we modify trees preserve these properties:
27	*
28	* 1. We may add a new leaf node, but only after first verifying that it
29	* conflicts with all of its ancestors.
30	* 2. We may remove the root of a tree, creating a new singleton
31	* tree from the root and N new trees rooted in the immediate
32	* children.
33	* 3. If the root of a tree is not currently an applied lock, we may
34	* apply it (if possible).
35	* 4. We may upgrade the root of the tree (either extend its range,
36	* or upgrade its entire range from read to write).
37	*
38	* When an applied lock is modified in a way that reduces or downgrades any
39	* part of its range, we remove all its children (2 above). This particularly
40	* happens when a lock is unlocked.
41	*
42	* For each of those child trees we "wake up" the thread which is
43	* waiting for the lock so it can continue handling as follows: if the
44	* root of the tree applies, we do so (3). If it doesn't, it must
45	* conflict with some applied lock. We remove (wake up) all of its children
46	* (2), and add it is a new leaf to the tree rooted in the applied
47	* lock (1). We then repeat the process recursively with those
48	* children.
49	*
50	*/
51	#include <linux/capability.h>
52	#include <linux/file.h>
53	#include <linux/fdtable.h>
54	#include <linux/filelock.h>
55	#include <linux/fs.h>
56	#include <linux/init.h>
57	#include <linux/security.h>
58	#include <linux/slab.h>
59	#include <linux/syscalls.h>
60	#include <linux/time.h>
61	#include <linux/rcupdate.h>
62	#include <linux/pid_namespace.h>
63	#include <linux/hashtable.h>
64	#include <linux/percpu.h>
65	#include <linux/sysctl.h>
66
67	#define CREATE_TRACE_POINTS
68	#include <trace/events/filelock.h>
69
70	#include <linux/uaccess.h>
71
72	static struct file_lock file_lock(struct* file_lock_core *flc)
73	{
74	return container_of(flc, struct file_lock, c);
75	}
76
77	static struct file_lease file_lease(struct* file_lock_core *flc)
78	{
79	return container_of(flc, struct file_lease, c);
80	}
81
82	static bool lease_breaking(struct file_lease *fl)
83	{
84	return fl->c.flc_flags & (FL_UNLOCK_PENDING \| FL_DOWNGRADE_PENDING);
85	}
86
87	static int target_leasetype(struct file_lease *fl)
88	{
89	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
90	return F_UNLCK;
91	if (fl->c.flc_flags & FL_DOWNGRADE_PENDING)
92	return F_RDLCK;
93	return fl->c.flc_type;
94	}
95
96	static int leases_enable = `1`;
97	static int lease_break_time = `45`;
98
99	#ifdef CONFIG_SYSCTL
100	static const struct ctl_table locks_sysctls[] = {
101	{
102	.procname = "leases-enable",
103	.data = &leases_enable,
104	.maxlen = sizeof(int),
105	.mode = `0644`,
106	.proc_handler = proc_dointvec,
107	},
108	#ifdef CONFIG_MMU
109	{
110	.procname = "lease-break-time",
111	.data = &lease_break_time,
112	.maxlen = sizeof(int),
113	.mode = `0644`,
114	.proc_handler = proc_dointvec,
115	},
116	#endif /* CONFIG_MMU */
117	};
118
119	static int __init init_fs_locks_sysctls(void)
120	{
121	register_sysctl_init("fs", locks_sysctls);
122	return `0`;
123	}
124	early_initcall(init_fs_locks_sysctls);
125	#endif /* CONFIG_SYSCTL */
126
127	/*
128	* The global file_lock_list is only used for displaying /proc/locks, so we
129	* keep a list on each CPU, with each list protected by its own spinlock.
130	* Global serialization is done using file_rwsem.
131	*
132	* Note that alterations to the list also require that the relevant flc_lock is
133	* held.
134	*/
135	struct file_lock_list_struct {
136	spinlock_t lock;
137	struct hlist_head hlist;
138	};
139	static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
140	DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
141
142
143	/*
144	* The blocked_hash is used to find POSIX lock loops for deadlock detection.
145	* It is protected by blocked_lock_lock.
146	*
147	* We hash locks by lockowner in order to optimize searching for the lock a
148	* particular lockowner is waiting on.
149	*
150	* FIXME: make this value scale via some heuristic? We generally will want more
151	* buckets when we have more lockowners holding locks, but that's a little
152	* difficult to determine without knowing what the workload will look like.
153	*/
154	#define BLOCKED_HASH_BITS 7
155	static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
156
157	/*
158	* This lock protects the blocked_hash. Generally, if you're accessing it, you
159	* want to be holding this lock.
160	*
161	* In addition, it also protects the fl->fl_blocked_requests list, and the
162	* fl->fl_blocker pointer for file_lock structures that are acting as lock
163	* requests (in contrast to those that are acting as records of acquired locks).
164	*
165	* Note that when we acquire this lock in order to change the above fields,
166	* we often hold the flc_lock as well. In certain cases, when reading the fields
167	* protected by this lock, we can skip acquiring it iff we already hold the
168	* flc_lock.
169	*/
170	static DEFINE_SPINLOCK(blocked_lock_lock);
171
172	static struct kmem_cache *flctx_cache __ro_after_init;
173	static struct kmem_cache *filelock_cache __ro_after_init;
174	static struct kmem_cache *filelease_cache __ro_after_init;
175
176	static struct file_lock_context *
177	locks_get_lock_context(struct inode inode, int* type)
178	{
179	struct file_lock_context *ctx;
180
181	/ paired with cmpxchg() below /
182	ctx = locks_inode_context(inode);
183	if (likely(ctx) \|\| type == F_UNLCK)
184	goto out;
185
186	ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
187	if (!ctx)
188	goto out;
189
190	spin_lock_init(&ctx->flc_lock);
191	INIT_LIST_HEAD(list: &ctx->flc_flock);
192	INIT_LIST_HEAD(list: &ctx->flc_posix);
193	INIT_LIST_HEAD(list: &ctx->flc_lease);
194
195	/*
196	* Assign the pointer if it's not already assigned. If it is, then
197	* free the context we just allocated.
198	*/
199	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
200	kmem_cache_free(s: flctx_cache, objp: ctx);
201	ctx = locks_inode_context(inode);
202	}
203	out:
204	trace_locks_get_lock_context(inode, type, ctx);
205	return ctx;
206	}
207
208	static void
209	locks_dump_ctx_list(struct list_head list, char* *list_type)
210	{
211	struct file_lock_core *flc;
212
213	list_for_each_entry(flc, list, flc_list)
214	pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
215	list_type, flc->flc_owner, flc->flc_flags,
216	flc->flc_type, flc->flc_pid);
217	}
218
219	static void
220	locks_check_ctx_lists(struct inode *inode)
221	{
222	struct file_lock_context *ctx = inode->i_flctx;
223
224	if (unlikely(!list_empty(&ctx->flc_flock) \|\|
225	!list_empty(&ctx->flc_posix) \|\|
226	!list_empty(&ctx->flc_lease))) {
227	pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
228	MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
229	inode->i_ino);
230	locks_dump_ctx_list(list: &ctx->flc_flock, list_type: "FLOCK");
231	locks_dump_ctx_list(list: &ctx->flc_posix, list_type: "POSIX");
232	locks_dump_ctx_list(list: &ctx->flc_lease, list_type: "LEASE");
233	}
234	}
235
236	static void
237	locks_check_ctx_file_list(struct file filp, struct* list_head list, char* *list_type)
238	{
239	struct file_lock_core *flc;
240	struct inode *inode = file_inode(f: filp);
241
242	list_for_each_entry(flc, list, flc_list)
243	if (flc->flc_file == filp)
244	pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
245	" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
246	list_type, MAJOR(inode->i_sb->s_dev),
247	MINOR(inode->i_sb->s_dev), inode->i_ino,
248	flc->flc_owner, flc->flc_flags,
249	flc->flc_type, flc->flc_pid);
250	}
251
252	void
253	locks_free_lock_context(struct inode *inode)
254	{
255	struct file_lock_context *ctx = locks_inode_context(inode);
256
257	if (unlikely(ctx)) {
258	locks_check_ctx_lists(inode);
259	kmem_cache_free(s: flctx_cache, objp: ctx);
260	}
261	}
262
263	static void locks_init_lock_heads(struct file_lock_core *flc)
264	{
265	INIT_HLIST_NODE(h: &flc->flc_link);
266	INIT_LIST_HEAD(list: &flc->flc_list);
267	INIT_LIST_HEAD(list: &flc->flc_blocked_requests);
268	INIT_LIST_HEAD(list: &flc->flc_blocked_member);
269	init_waitqueue_head(&flc->flc_wait);
270	}
271
272	/ Allocate an empty lock structure. /
273	struct file_lock locks_alloc_lock(void*)
274	{
275	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
276
277	if (fl)
278	locks_init_lock_heads(flc: &fl->c);
279
280	return fl;
281	}
282	EXPORT_SYMBOL_GPL(locks_alloc_lock);
283
284	/ Allocate an empty lock structure. /
285	struct file_lease locks_alloc_lease(void*)
286	{
287	struct file_lease *fl = kmem_cache_zalloc(filelease_cache, GFP_KERNEL);
288
289	if (fl)
290	locks_init_lock_heads(flc: &fl->c);
291
292	return fl;
293	}
294	EXPORT_SYMBOL_GPL(locks_alloc_lease);
295
296	void locks_release_private(struct file_lock *fl)
297	{
298	struct file_lock_core *flc = &fl->c;
299
300	BUG_ON(waitqueue_active(&flc->flc_wait));
301	BUG_ON(!list_empty(&flc->flc_list));
302	BUG_ON(!list_empty(&flc->flc_blocked_requests));
303	BUG_ON(!list_empty(&flc->flc_blocked_member));
304	BUG_ON(!hlist_unhashed(&flc->flc_link));
305
306	if (fl->fl_ops) {
307	if (fl->fl_ops->fl_release_private)
308	fl->fl_ops->fl_release_private(fl);
309	fl->fl_ops = NULL;
310	}
311
312	if (fl->fl_lmops) {
313	if (fl->fl_lmops->lm_put_owner) {
314	fl->fl_lmops->lm_put_owner(flc->flc_owner);
315	flc->flc_owner = NULL;
316	}
317	fl->fl_lmops = NULL;
318	}
319	}
320	EXPORT_SYMBOL_GPL(locks_release_private);
321
322	/**
323	* locks_owner_has_blockers - Check for blocking lock requests
324	* @flctx: file lock context
325	* @owner: lock owner
326	*
327	* Return values:
328	* %true: @owner has at least one blocker
329	* %false: @owner has no blockers
330	*/
331	bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner)
332	{
333	struct file_lock_core *flc;
334
335	spin_lock(lock: &flctx->flc_lock);
336	list_for_each_entry(flc, &flctx->flc_posix, flc_list) {
337	if (flc->flc_owner != owner)
338	continue;
339	if (!list_empty(head: &flc->flc_blocked_requests)) {
340	spin_unlock(lock: &flctx->flc_lock);
341	return true;
342	}
343	}
344	spin_unlock(lock: &flctx->flc_lock);
345	return false;
346	}
347	EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
348
349	/ Free a lock which is not in use. /
350	void locks_free_lock(struct file_lock *fl)
351	{
352	locks_release_private(fl);
353	kmem_cache_free(s: filelock_cache, objp: fl);
354	}
355	EXPORT_SYMBOL(locks_free_lock);
356
357	/ Free a lease which is not in use. /
358	void locks_free_lease(struct file_lease *fl)
359	{
360	kmem_cache_free(s: filelease_cache, objp: fl);
361	}
362	EXPORT_SYMBOL(locks_free_lease);
363
364	static void
365	locks_dispose_list(struct list_head *dispose)
366	{
367	struct file_lock_core *flc;
368
369	while (!list_empty(head: dispose)) {
370	flc = list_first_entry(dispose, struct file_lock_core, flc_list);
371	list_del_init(entry: &flc->flc_list);
372	if (flc->flc_flags & (FL_LEASE\|FL_DELEG\|FL_LAYOUT))
373	locks_free_lease(file_lease(flc));
374	else
375	locks_free_lock(file_lock(flc));
376	}
377	}
378
379	void locks_init_lock(struct file_lock *fl)
380	{
381	memset(fl, `0`, sizeof(struct file_lock));
382	locks_init_lock_heads(flc: &fl->c);
383	}
384	EXPORT_SYMBOL(locks_init_lock);
385
386	void locks_init_lease(struct file_lease *fl)
387	{
388	memset(fl, `0`, sizeof(*fl));
389	locks_init_lock_heads(flc: &fl->c);
390	}
391	EXPORT_SYMBOL(locks_init_lease);
392
393	/*
394	* Initialize a new lock from an existing file_lock structure.
395	*/
396	void locks_copy_conflock(struct file_lock new, struct* file_lock *fl)
397	{
398	new->c.flc_owner = fl->c.flc_owner;
399	new->c.flc_pid = fl->c.flc_pid;
400	new->c.flc_file = NULL;
401	new->c.flc_flags = fl->c.flc_flags;
402	new->c.flc_type = fl->c.flc_type;
403	new->fl_start = fl->fl_start;
404	new->fl_end = fl->fl_end;
405	new->fl_lmops = fl->fl_lmops;
406	new->fl_ops = NULL;
407
408	if (fl->fl_lmops) {
409	if (fl->fl_lmops->lm_get_owner)
410	fl->fl_lmops->lm_get_owner(fl->c.flc_owner);
411	}
412	}
413	EXPORT_SYMBOL(locks_copy_conflock);
414
415	void locks_copy_lock(struct file_lock new, struct* file_lock *fl)
416	{
417	/ "new" must be a freshly-initialized lock /
418	WARN_ON_ONCE(new->fl_ops);
419
420	locks_copy_conflock(new, fl);
421
422	new->c.flc_file = fl->c.flc_file;
423	new->fl_ops = fl->fl_ops;
424
425	if (fl->fl_ops) {
426	if (fl->fl_ops->fl_copy_lock)
427	fl->fl_ops->fl_copy_lock(new, fl);
428	}
429	}
430	EXPORT_SYMBOL(locks_copy_lock);
431
432	static void locks_move_blocks(struct file_lock new, struct* file_lock *fl)
433	{
434	struct file_lock *f;
435
436	/*
437	* As ctx->flc_lock is held, new requests cannot be added to
438	* ->flc_blocked_requests, so we don't need a lock to check if it
439	* is empty.
440	*/
441	if (list_empty(head: &fl->c.flc_blocked_requests))
442	return;
443	spin_lock(lock: &blocked_lock_lock);
444	list_splice_init(list: &fl->c.flc_blocked_requests,
445	head: &new->c.flc_blocked_requests);
446	list_for_each_entry(f, &new->c.flc_blocked_requests,
447	c.flc_blocked_member)
448	f->c.flc_blocker = &new->c;
449	spin_unlock(lock: &blocked_lock_lock);
450	}
451
452	static inline int flock_translate_cmd(int cmd) {
453	switch (cmd) {
454	case LOCK_SH:
455	return F_RDLCK;
456	case LOCK_EX:
457	return F_WRLCK;
458	case LOCK_UN:
459	return F_UNLCK;
460	}
461	return -EINVAL;
462	}
463
464	/ Fill in a file_lock structure with an appropriate FLOCK lock. /
465	static void flock_make_lock(struct file filp, struct* file_lock fl, int* type)
466	{
467	locks_init_lock(fl);
468
469	fl->c.flc_file = filp;
470	fl->c.flc_owner = filp;
471	fl->c.flc_pid = current->tgid;
472	fl->c.flc_flags = FL_FLOCK;
473	fl->c.flc_type = type;
474	fl->fl_end = OFFSET_MAX;
475	}
476
477	static int assign_type(struct file_lock_core flc, int* type)
478	{
479	switch (type) {
480	case F_RDLCK:
481	case F_WRLCK:
482	case F_UNLCK:
483	flc->flc_type = type;
484	break;
485	default:
486	return -EINVAL;
487	}
488	return `0`;
489	}
490
491	static int flock64_to_posix_lock(struct file filp, struct* file_lock *fl,
492	struct flock64 *l)
493	{
494	switch (l->l_whence) {
495	case SEEK_SET:
496	fl->fl_start = `0`;
497	break;
498	case SEEK_CUR:
499	fl->fl_start = filp->f_pos;
500	break;
501	case SEEK_END:
502	fl->fl_start = i_size_read(inode: file_inode(f: filp));
503	break;
504	default:
505	return -EINVAL;
506	}
507	if (l->l_start > OFFSET_MAX - fl->fl_start)
508	return -EOVERFLOW;
509	fl->fl_start += l->l_start;
510	if (fl->fl_start < `0`)
511	return -EINVAL;
512
513	/ POSIX-1996 leaves the case l->l_len < 0 undefined;*
514	POSIX-2001 defines it. /*
515	if (l->l_len > `0`) {
516	if (l->l_len - `1` > OFFSET_MAX - fl->fl_start)
517	return -EOVERFLOW;
518	fl->fl_end = fl->fl_start + (l->l_len - `1`);
519
520	} else if (l->l_len < `0`) {
521	if (fl->fl_start + l->l_len < `0`)
522	return -EINVAL;
523	fl->fl_end = fl->fl_start - `1`;
524	fl->fl_start += l->l_len;
525	} else
526	fl->fl_end = OFFSET_MAX;
527
528	fl->c.flc_owner = current->files;
529	fl->c.flc_pid = current->tgid;
530	fl->c.flc_file = filp;
531	fl->c.flc_flags = FL_POSIX;
532	fl->fl_ops = NULL;
533	fl->fl_lmops = NULL;
534
535	return assign_type(flc: &fl->c, type: l->l_type);
536	}
537
538	/ Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX*
539	* style lock.
540	*/
541	static int flock_to_posix_lock(struct file filp, struct* file_lock *fl,
542	struct flock *l)
543	{
544	struct flock64 ll = {
545	.l_type = l->l_type,
546	.l_whence = l->l_whence,
547	.l_start = l->l_start,
548	.l_len = l->l_len,
549	};
550
551	return flock64_to_posix_lock(filp, fl, l: &ll);
552	}
553
554	/ default lease lock manager operations /
555	static bool
556	lease_break_callback(struct file_lease *fl)
557	{
558	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
559	return false;
560	}
561
562	static void
563	lease_setup(struct file_lease fl, void* **priv)
564	{
565	struct file *filp = fl->c.flc_file;
566	struct fasync_struct fa = priv;
567
568	/*
569	* fasync_insert_entry() returns the old entry if any. If there was no
570	* old entry, then it used "priv" and inserted it into the fasync list.
571	* Clear the pointer to indicate that it shouldn't be freed.
572	*/
573	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
574	*priv = NULL;
575
576	__f_setown(filp, task_pid(current), PIDTYPE_TGID, force: `0`);
577	}
578
579	static const struct lease_manager_operations lease_manager_ops = {
580	.lm_break = lease_break_callback,
581	.lm_change = lease_modify,
582	.lm_setup = lease_setup,
583	};
584
585	/*
586	* Initialize a lease, use the default lock manager operations
587	*/
588	static int lease_init(struct file filp, int* type, struct file_lease *fl)
589	{
590	if (assign_type(flc: &fl->c, type) != `0`)
591	return -EINVAL;
592
593	fl->c.flc_owner = filp;
594	fl->c.flc_pid = current->tgid;
595
596	fl->c.flc_file = filp;
597	fl->c.flc_flags = FL_LEASE;
598	fl->fl_lmops = &lease_manager_ops;
599	return `0`;
600	}
601
602	/ Allocate a file_lock initialised to this type of lease /
603	static struct file_lease lease_alloc(struct* file filp, int* type)
604	{
605	struct file_lease *fl = locks_alloc_lease();
606	int error = -ENOMEM;
607
608	if (fl == NULL)
609	return ERR_PTR(error);
610
611	error = lease_init(filp, type, fl);
612	if (error) {
613	locks_free_lease(fl);
614	return ERR_PTR(error);
615	}
616	return fl;
617	}
618
619	/ Check if two locks overlap each other.*
620	*/
621	static inline int locks_overlap(struct file_lock fl1, struct* file_lock *fl2)
622	{
623	return ((fl1->fl_end >= fl2->fl_start) &&
624	(fl2->fl_end >= fl1->fl_start));
625	}
626
627	/*
628	* Check whether two locks have the same owner.
629	*/
630	static int posix_same_owner(struct file_lock_core fl1, struct* file_lock_core *fl2)
631	{
632	return fl1->flc_owner == fl2->flc_owner;
633	}
634
635	/ Must be called with the flc_lock held! /
636	static void locks_insert_global_locks(struct file_lock_core *flc)
637	{
638	struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
639
640	percpu_rwsem_assert_held(&file_rwsem);
641
642	spin_lock(lock: &fll->lock);
643	flc->flc_link_cpu = smp_processor_id();
644	hlist_add_head(n: &flc->flc_link, h: &fll->hlist);
645	spin_unlock(lock: &fll->lock);
646	}
647
648	/ Must be called with the flc_lock held! /
649	static void locks_delete_global_locks(struct file_lock_core *flc)
650	{
651	struct file_lock_list_struct *fll;
652
653	percpu_rwsem_assert_held(&file_rwsem);
654
655	/*
656	* Avoid taking lock if already unhashed. This is safe since this check
657	* is done while holding the flc_lock, and new insertions into the list
658	* also require that it be held.
659	*/
660	if (hlist_unhashed(h: &flc->flc_link))
661	return;
662
663	fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu);
664	spin_lock(lock: &fll->lock);
665	hlist_del_init(n: &flc->flc_link);
666	spin_unlock(lock: &fll->lock);
667	}
668
669	static unsigned long
670	posix_owner_key(struct file_lock_core *flc)
671	{
672	return (unsigned long) flc->flc_owner;
673	}
674
675	static void locks_insert_global_blocked(struct file_lock_core *waiter)
676	{
677	lockdep_assert_held(&blocked_lock_lock);
678
679	hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter));
680	}
681
682	static void locks_delete_global_blocked(struct file_lock_core *waiter)
683	{
684	lockdep_assert_held(&blocked_lock_lock);
685
686	hash_del(node: &waiter->flc_link);
687	}
688
689	/ Remove waiter from blocker's block list.*
690	* When blocker ends up pointing to itself then the list is empty.
691	*
692	* Must be called with blocked_lock_lock held.
693	*/
694	static void __locks_unlink_block(struct file_lock_core *waiter)
695	{
696	locks_delete_global_blocked(waiter);
697	list_del_init(entry: &waiter->flc_blocked_member);
698	}
699
700	static void __locks_wake_up_blocks(struct file_lock_core *blocker)
701	{
702	while (!list_empty(head: &blocker->flc_blocked_requests)) {
703	struct file_lock_core *waiter;
704	struct file_lock *fl;
705
706	waiter = list_first_entry(&blocker->flc_blocked_requests,
707	struct file_lock_core, flc_blocked_member);
708
709	fl = file_lock(flc: waiter);
710	__locks_unlink_block(waiter);
711	if ((waiter->flc_flags & (FL_POSIX \| FL_FLOCK)) &&
712	fl->fl_lmops && fl->fl_lmops->lm_notify)
713	fl->fl_lmops->lm_notify(fl);
714	else
715	locks_wake_up(fl);
716
717	/*
718	* The setting of flc_blocker to NULL marks the "done"
719	* point in deleting a block. Paired with acquire at the top
720	* of locks_delete_block().
721	*/
722	smp_store_release(&waiter->flc_blocker, NULL);
723	}
724	}
725
726	static int __locks_delete_block(struct file_lock_core *waiter)
727	{
728	int status = -ENOENT;
729
730	/*
731	* If fl_blocker is NULL, it won't be set again as this thread "owns"
732	* the lock and is the only one that might try to claim the lock.
733	*
734	* We use acquire/release to manage fl_blocker so that we can
735	* optimize away taking the blocked_lock_lock in many cases.
736	*
737	* The smp_load_acquire guarantees two things:
738	*
739	* 1/ that fl_blocked_requests can be tested locklessly. If something
740	* was recently added to that list it must have been in a locked region
741	* before the locked region when fl_blocker was set to NULL.
742	*
743	* 2/ that no other thread is accessing 'waiter', so it is safe to free
744	* it. __locks_wake_up_blocks is careful not to touch waiter after
745	* fl_blocker is released.
746	*
747	* If a lockless check of fl_blocker shows it to be NULL, we know that
748	* no new locks can be inserted into its fl_blocked_requests list, and
749	* can avoid doing anything further if the list is empty.
750	*/
751	if (!smp_load_acquire(&waiter->flc_blocker) &&
752	list_empty(head: &waiter->flc_blocked_requests))
753	return status;
754
755	spin_lock(lock: &blocked_lock_lock);
756	if (waiter->flc_blocker)
757	status = `0`;
758	__locks_wake_up_blocks(blocker: waiter);
759	__locks_unlink_block(waiter);
760
761	/*
762	* The setting of fl_blocker to NULL marks the "done" point in deleting
763	* a block. Paired with acquire at the top of this function.
764	*/
765	smp_store_release(&waiter->flc_blocker, NULL);
766	spin_unlock(lock: &blocked_lock_lock);
767	return status;
768	}
769
770	/**
771	* locks_delete_block - stop waiting for a file lock
772	* @waiter: the lock which was waiting
773	*
774	* lockd/nfsd need to disconnect the lock while working on it.
775	*/
776	int locks_delete_block(struct file_lock *waiter)
777	{
778	return __locks_delete_block(waiter: &waiter->c);
779	}
780	EXPORT_SYMBOL(locks_delete_block);
781
782	/ Insert waiter into blocker's block list.*
783	* We use a circular list so that processes can be easily woken up in
784	* the order they blocked. The documentation doesn't require this but
785	* it seems like the reasonable thing to do.
786	*
787	* Must be called with both the flc_lock and blocked_lock_lock held. The
788	* fl_blocked_requests list itself is protected by the blocked_lock_lock,
789	* but by ensuring that the flc_lock is also held on insertions we can avoid
790	* taking the blocked_lock_lock in some cases when we see that the
791	* fl_blocked_requests list is empty.
792	*
793	* Rather than just adding to the list, we check for conflicts with any existing
794	* waiters, and add beneath any waiter that blocks the new waiter.
795	* Thus wakeups don't happen until needed.
796	*/
797	static void __locks_insert_block(struct file_lock_core *blocker,
798	struct file_lock_core *waiter,
799	bool conflict(struct file_lock_core *,
800	struct file_lock_core *))
801	{
802	struct file_lock_core *flc;
803
804	BUG_ON(!list_empty(&waiter->flc_blocked_member));
805	new_blocker:
806	list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member)
807	if (conflict(flc, waiter)) {
808	blocker = flc;
809	goto new_blocker;
810	}
811	waiter->flc_blocker = blocker;
812	list_add_tail(new: &waiter->flc_blocked_member,
813	head: &blocker->flc_blocked_requests);
814
815	if ((blocker->flc_flags & (FL_POSIX\|FL_OFDLCK)) == FL_POSIX)
816	locks_insert_global_blocked(waiter);
817
818	/ The requests in waiter->flc_blocked are known to conflict with*
819	* waiter, but might not conflict with blocker, or the requests
820	* and lock which block it. So they all need to be woken.
821	*/
822	__locks_wake_up_blocks(blocker: waiter);
823	}
824
825	/ Must be called with flc_lock held. /
826	static void locks_insert_block(struct file_lock_core *blocker,
827	struct file_lock_core *waiter,
828	bool conflict(struct file_lock_core *,
829	struct file_lock_core *))
830	{
831	spin_lock(lock: &blocked_lock_lock);
832	__locks_insert_block(blocker, waiter, conflict);
833	spin_unlock(lock: &blocked_lock_lock);
834	}
835
836	/*
837	* Wake up processes blocked waiting for blocker.
838	*
839	* Must be called with the inode->flc_lock held!
840	*/
841	static void locks_wake_up_blocks(struct file_lock_core *blocker)
842	{
843	/*
844	* Avoid taking global lock if list is empty. This is safe since new
845	* blocked requests are only added to the list under the flc_lock, and
846	* the flc_lock is always held here. Note that removal from the
847	* fl_blocked_requests list does not require the flc_lock, so we must
848	* recheck list_empty() after acquiring the blocked_lock_lock.
849	*/
850	if (list_empty(head: &blocker->flc_blocked_requests))
851	return;
852
853	spin_lock(lock: &blocked_lock_lock);
854	__locks_wake_up_blocks(blocker);
855	spin_unlock(lock: &blocked_lock_lock);
856	}
857
858	static void
859	locks_insert_lock_ctx(struct file_lock_core fl, struct* list_head *before)
860	{
861	list_add_tail(new: &fl->flc_list, head: before);
862	locks_insert_global_locks(flc: fl);
863	}
864
865	static void
866	locks_unlink_lock_ctx(struct file_lock_core *fl)
867	{
868	locks_delete_global_locks(flc: fl);
869	list_del_init(entry: &fl->flc_list);
870	locks_wake_up_blocks(blocker: fl);
871	}
872
873	static void
874	locks_delete_lock_ctx(struct file_lock_core fl, struct* list_head *dispose)
875	{
876	locks_unlink_lock_ctx(fl);
877	if (dispose)
878	list_add(new: &fl->flc_list, head: dispose);
879	else
880	locks_free_lock(file_lock(flc: fl));
881	}
882
883	/ Determine if lock sys_fl blocks lock caller_fl. Common functionality*
884	* checks for shared/exclusive status of overlapping locks.
885	*/
886	static bool locks_conflict(struct file_lock_core *caller_flc,
887	struct file_lock_core *sys_flc)
888	{
889	if (sys_flc->flc_type == F_WRLCK)
890	return true;
891	if (caller_flc->flc_type == F_WRLCK)
892	return true;
893	return false;
894	}
895
896	/ Determine if lock sys_fl blocks lock caller_fl. POSIX specific*
897	* checking before calling the locks_conflict().
898	*/
899	static bool posix_locks_conflict(struct file_lock_core *caller_flc,
900	struct file_lock_core *sys_flc)
901	{
902	struct file_lock *caller_fl = file_lock(flc: caller_flc);
903	struct file_lock *sys_fl = file_lock(flc: sys_flc);
904
905	/ POSIX locks owned by the same process do not conflict with*
906	* each other.
907	*/
908	if (posix_same_owner(fl1: caller_flc, fl2: sys_flc))
909	return false;
910
911	/ Check whether they overlap /
912	if (!locks_overlap(fl1: caller_fl, fl2: sys_fl))
913	return false;
914
915	return locks_conflict(caller_flc, sys_flc);
916	}
917
918	/ Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK*
919	* path so checks for additional GETLK-specific things like F_UNLCK.
920	*/
921	static bool posix_test_locks_conflict(struct file_lock *caller_fl,
922	struct file_lock *sys_fl)
923	{
924	struct file_lock_core *caller = &caller_fl->c;
925	struct file_lock_core *sys = &sys_fl->c;
926
927	/ F_UNLCK checks any locks on the same fd. /
928	if (lock_is_unlock(fl: caller_fl)) {
929	if (!posix_same_owner(fl1: caller, fl2: sys))
930	return false;
931	return locks_overlap(fl1: caller_fl, fl2: sys_fl);
932	}
933	return posix_locks_conflict(caller_flc: caller, sys_flc: sys);
934	}
935
936	/ Determine if lock sys_fl blocks lock caller_fl. FLOCK specific*
937	* checking before calling the locks_conflict().
938	*/
939	static bool flock_locks_conflict(struct file_lock_core *caller_flc,
940	struct file_lock_core *sys_flc)
941	{
942	/ FLOCK locks referring to the same filp do not conflict with*
943	* each other.
944	*/
945	if (caller_flc->flc_file == sys_flc->flc_file)
946	return false;
947
948	return locks_conflict(caller_flc, sys_flc);
949	}
950
951	void
952	posix_test_lock(struct file filp, struct* file_lock *fl)
953	{
954	struct file_lock *cfl;
955	struct file_lock_context *ctx;
956	struct inode *inode = file_inode(f: filp);
957	void *owner;
958	void (func)(void*);
959
960	ctx = locks_inode_context(inode);
961	if (!ctx \|\| list_empty_careful(head: &ctx->flc_posix)) {
962	fl->c.flc_type = F_UNLCK;
963	return;
964	}
965
966	retry:
967	spin_lock(lock: &ctx->flc_lock);
968	list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) {
969	if (!posix_test_locks_conflict(caller_fl: fl, sys_fl: cfl))
970	continue;
971	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
972	&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
973	owner = cfl->fl_lmops->lm_mod_owner;
974	func = cfl->fl_lmops->lm_expire_lock;
975	__module_get(module: owner);
976	spin_unlock(lock: &ctx->flc_lock);
977	(*func)();
978	module_put(module: owner);
979	goto retry;
980	}
981	locks_copy_conflock(fl, cfl);
982	goto out;
983	}
984	fl->c.flc_type = F_UNLCK;
985	out:
986	spin_unlock(lock: &ctx->flc_lock);
987	return;
988	}
989	EXPORT_SYMBOL(posix_test_lock);
990
991	/*
992	* Deadlock detection:
993	*
994	* We attempt to detect deadlocks that are due purely to posix file
995	* locks.
996	*
997	* We assume that a task can be waiting for at most one lock at a time.
998	* So for any acquired lock, the process holding that lock may be
999	* waiting on at most one other lock. That lock in turns may be held by
1000	* someone waiting for at most one other lock. Given a requested lock
1001	* caller_fl which is about to wait for a conflicting lock block_fl, we
1002	* follow this chain of waiters to ensure we are not about to create a
1003	* cycle.
1004	*
1005	* Since we do this before we ever put a process to sleep on a lock, we
1006	* are ensured that there is never a cycle; that is what guarantees that
1007	* the while() loop in posix_locks_deadlock() eventually completes.
1008	*
1009	* Note: the above assumption may not be true when handling lock
1010	* requests from a broken NFS client. It may also fail in the presence
1011	* of tasks (such as posix threads) sharing the same open file table.
1012	* To handle those cases, we just bail out after a few iterations.
1013	*
1014	* For FL_OFDLCK locks, the owner is the filp, not the files_struct.
1015	* Because the owner is not even nominally tied to a thread of
1016	* execution, the deadlock detection below can't reasonably work well. Just
1017	* skip it for those.
1018	*
1019	* In principle, we could do a more limited deadlock detection on FL_OFDLCK
1020	* locks that just checks for the case where two tasks are attempting to
1021	* upgrade from read to write locks on the same inode.
1022	*/
1023
1024	#define MAX_DEADLK_ITERATIONS 10
1025
1026	/ Find a lock that the owner of the given @blocker is blocking on. /
1027	static struct file_lock_core what_owner_is_waiting_for(struct* file_lock_core *blocker)
1028	{
1029	struct file_lock_core *flc;
1030
1031	hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) {
1032	if (posix_same_owner(fl1: flc, fl2: blocker)) {
1033	while (flc->flc_blocker)
1034	flc = flc->flc_blocker;
1035	return flc;
1036	}
1037	}
1038	return NULL;
1039	}
1040
1041	/ Must be called with the blocked_lock_lock held! /
1042	static bool posix_locks_deadlock(struct file_lock *caller_fl,
1043	struct file_lock *block_fl)
1044	{
1045	struct file_lock_core *caller = &caller_fl->c;
1046	struct file_lock_core *blocker = &block_fl->c;
1047	int i = `0`;
1048
1049	lockdep_assert_held(&blocked_lock_lock);
1050
1051	/*
1052	* This deadlock detector can't reasonably detect deadlocks with
1053	* FL_OFDLCK locks, since they aren't owned by a process, per-se.
1054	*/
1055	if (caller->flc_flags & FL_OFDLCK)
1056	return false;
1057
1058	while ((blocker = what_owner_is_waiting_for(blocker))) {
1059	if (i++ > MAX_DEADLK_ITERATIONS)
1060	return false;
1061	if (posix_same_owner(fl1: caller, fl2: blocker))
1062	return true;
1063	}
1064	return false;
1065	}
1066
1067	/ Try to create a FLOCK lock on filp. We always insert new FLOCK locks*
1068	* after any leases, but before any posix locks.
1069	*
1070	* Note that if called with an FL_EXISTS argument, the caller may determine
1071	* whether or not a lock was successfully freed by testing the return
1072	* value for -ENOENT.
1073	*/
1074	static int flock_lock_inode(struct inode inode, struct* file_lock *request)
1075	{
1076	struct file_lock *new_fl = NULL;
1077	struct file_lock *fl;
1078	struct file_lock_context *ctx;
1079	int error = `0`;
1080	bool found = false;
1081	LIST_HEAD(dispose);
1082
1083	ctx = locks_get_lock_context(inode, type: request->c.flc_type);
1084	if (!ctx) {
1085	if (request->c.flc_type != F_UNLCK)
1086	return -ENOMEM;
1087	return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : `0`;
1088	}
1089
1090	if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) {
1091	new_fl = locks_alloc_lock();
1092	if (!new_fl)
1093	return -ENOMEM;
1094	}
1095
1096	percpu_down_read(sem: &file_rwsem);
1097	spin_lock(lock: &ctx->flc_lock);
1098	if (request->c.flc_flags & FL_ACCESS)
1099	goto find_conflict;
1100
1101	list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
1102	if (request->c.flc_file != fl->c.flc_file)
1103	continue;
1104	if (request->c.flc_type == fl->c.flc_type)
1105	goto out;
1106	found = true;
1107	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1108	break;
1109	}
1110
1111	if (lock_is_unlock(fl: request)) {
1112	if ((request->c.flc_flags & FL_EXISTS) && !found)
1113	error = -ENOENT;
1114	goto out;
1115	}
1116
1117	find_conflict:
1118	list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
1119	if (!flock_locks_conflict(caller_flc: &request->c, sys_flc: &fl->c))
1120	continue;
1121	error = -EAGAIN;
1122	if (!(request->c.flc_flags & FL_SLEEP))
1123	goto out;
1124	error = FILE_LOCK_DEFERRED;
1125	locks_insert_block(blocker: &fl->c, waiter: &request->c, conflict: flock_locks_conflict);
1126	goto out;
1127	}
1128	if (request->c.flc_flags & FL_ACCESS)
1129	goto out;
1130	locks_copy_lock(new_fl, request);
1131	locks_move_blocks(new: new_fl, fl: request);
1132	locks_insert_lock_ctx(fl: &new_fl->c, before: &ctx->flc_flock);
1133	new_fl = NULL;
1134	error = `0`;
1135
1136	out:
1137	spin_unlock(lock: &ctx->flc_lock);
1138	percpu_up_read(sem: &file_rwsem);
1139	if (new_fl)
1140	locks_free_lock(new_fl);
1141	locks_dispose_list(dispose: &dispose);
1142	trace_flock_lock_inode(inode, fl: request, ret: error);
1143	return error;
1144	}
1145
1146	static int posix_lock_inode(struct inode inode, struct* file_lock *request,
1147	struct file_lock *conflock)
1148	{
1149	struct file_lock fl, tmp;
1150	struct file_lock *new_fl = NULL;
1151	struct file_lock *new_fl2 = NULL;
1152	struct file_lock *left = NULL;
1153	struct file_lock *right = NULL;
1154	struct file_lock_context *ctx;
1155	int error;
1156	bool added = false;
1157	LIST_HEAD(dispose);
1158	void *owner;
1159	void (func)(void*);
1160
1161	ctx = locks_get_lock_context(inode, type: request->c.flc_type);
1162	if (!ctx)
1163	return lock_is_unlock(fl: request) ? `0` : -ENOMEM;
1164
1165	/*
1166	* We may need two file_lock structures for this operation,
1167	* so we get them in advance to avoid races.
1168	*
1169	* In some cases we can be sure, that no new locks will be needed
1170	*/
1171	if (!(request->c.flc_flags & FL_ACCESS) &&
1172	(request->c.flc_type != F_UNLCK \|\|
1173	request->fl_start != `0` \|\| request->fl_end != OFFSET_MAX)) {
1174	new_fl = locks_alloc_lock();
1175	new_fl2 = locks_alloc_lock();
1176	}
1177
1178	retry:
1179	percpu_down_read(sem: &file_rwsem);
1180	spin_lock(lock: &ctx->flc_lock);
1181	/*
1182	* New lock request. Walk all POSIX locks and look for conflicts. If
1183	* there are any, either return error or put the request on the
1184	* blocker's list of waiters and the global blocked_hash.
1185	*/
1186	if (request->c.flc_type != F_UNLCK) {
1187	list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
1188	if (!posix_locks_conflict(caller_flc: &request->c, sys_flc: &fl->c))
1189	continue;
1190	if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
1191	&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
1192	owner = fl->fl_lmops->lm_mod_owner;
1193	func = fl->fl_lmops->lm_expire_lock;
1194	__module_get(module: owner);
1195	spin_unlock(lock: &ctx->flc_lock);
1196	percpu_up_read(sem: &file_rwsem);
1197	(*func)();
1198	module_put(module: owner);
1199	goto retry;
1200	}
1201	if (conflock)
1202	locks_copy_conflock(conflock, fl);
1203	error = -EAGAIN;
1204	if (!(request->c.flc_flags & FL_SLEEP))
1205	goto out;
1206	/*
1207	* Deadlock detection and insertion into the blocked
1208	* locks list must be done while holding the same lock!
1209	*/
1210	error = -EDEADLK;
1211	spin_lock(lock: &blocked_lock_lock);
1212	/*
1213	* Ensure that we don't find any locks blocked on this
1214	* request during deadlock detection.
1215	*/
1216	__locks_wake_up_blocks(blocker: &request->c);
1217	if (likely(!posix_locks_deadlock(request, fl))) {
1218	error = FILE_LOCK_DEFERRED;
1219	__locks_insert_block(blocker: &fl->c, waiter: &request->c,
1220	conflict: posix_locks_conflict);
1221	}
1222	spin_unlock(lock: &blocked_lock_lock);
1223	goto out;
1224	}
1225	}
1226
1227	/ If we're just looking for a conflict, we're done. /
1228	error = `0`;
1229	if (request->c.flc_flags & FL_ACCESS)
1230	goto out;
1231
1232	/ Find the first old lock with the same owner as the new lock /
1233	list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
1234	if (posix_same_owner(fl1: &request->c, fl2: &fl->c))
1235	break;
1236	}
1237
1238	/ Process locks with this owner. /
1239	list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) {
1240	if (!posix_same_owner(fl1: &request->c, fl2: &fl->c))
1241	break;
1242
1243	/ Detect adjacent or overlapping regions (if same lock type) /
1244	if (request->c.flc_type == fl->c.flc_type) {
1245	/ In all comparisons of start vs end, use*
1246	* "start - 1" rather than "end + 1". If end
1247	* is OFFSET_MAX, end + 1 will become negative.
1248	*/
1249	if (fl->fl_end < request->fl_start - `1`)
1250	continue;
1251	/ If the next lock in the list has entirely bigger*
1252	* addresses than the new one, insert the lock here.
1253	*/
1254	if (fl->fl_start - `1` > request->fl_end)
1255	break;
1256
1257	/ If we come here, the new and old lock are of the*
1258	* same type and adjacent or overlapping. Make one
1259	* lock yielding from the lower start address of both
1260	* locks to the higher end address.
1261	*/
1262	if (fl->fl_start > request->fl_start)
1263	fl->fl_start = request->fl_start;
1264	else
1265	request->fl_start = fl->fl_start;
1266	if (fl->fl_end < request->fl_end)
1267	fl->fl_end = request->fl_end;
1268	else
1269	request->fl_end = fl->fl_end;
1270	if (added) {
1271	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1272	continue;
1273	}
1274	request = fl;
1275	added = true;
1276	} else {
1277	/ Processing for different lock types is a bit*
1278	* more complex.
1279	*/
1280	if (fl->fl_end < request->fl_start)
1281	continue;
1282	if (fl->fl_start > request->fl_end)
1283	break;
1284	if (lock_is_unlock(fl: request))
1285	added = true;
1286	if (fl->fl_start < request->fl_start)
1287	left = fl;
1288	/ If the next lock in the list has a higher end*
1289	* address than the new one, insert the new one here.
1290	*/
1291	if (fl->fl_end > request->fl_end) {
1292	right = fl;
1293	break;
1294	}
1295	if (fl->fl_start >= request->fl_start) {
1296	/ The new lock completely replaces an old*
1297	* one (This may happen several times).
1298	*/
1299	if (added) {
1300	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1301	continue;
1302	}
1303	/*
1304	* Replace the old lock with new_fl, and
1305	* remove the old one. It's safe to do the
1306	* insert here since we know that we won't be
1307	* using new_fl later, and that the lock is
1308	* just replacing an existing lock.
1309	*/
1310	error = -ENOLCK;
1311	if (!new_fl)
1312	goto out;
1313	locks_copy_lock(new_fl, request);
1314	locks_move_blocks(new: new_fl, fl: request);
1315	request = new_fl;
1316	new_fl = NULL;
1317	locks_insert_lock_ctx(fl: &request->c,
1318	before: &fl->c.flc_list);
1319	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1320	added = true;
1321	}
1322	}
1323	}
1324
1325	/*
1326	* The above code only modifies existing locks in case of merging or
1327	* replacing. If new lock(s) need to be inserted all modifications are
1328	* done below this, so it's safe yet to bail out.
1329	*/
1330	error = -ENOLCK; / "no luck" /
1331	if (right && left == right && !new_fl2)
1332	goto out;
1333
1334	error = `0`;
1335	if (!added) {
1336	if (lock_is_unlock(fl: request)) {
1337	if (request->c.flc_flags & FL_EXISTS)
1338	error = -ENOENT;
1339	goto out;
1340	}
1341
1342	if (!new_fl) {
1343	error = -ENOLCK;
1344	goto out;
1345	}
1346	locks_copy_lock(new_fl, request);
1347	locks_move_blocks(new: new_fl, fl: request);
1348	locks_insert_lock_ctx(fl: &new_fl->c, before: &fl->c.flc_list);
1349	fl = new_fl;
1350	new_fl = NULL;
1351	}
1352	if (right) {
1353	if (left == right) {
1354	/ The new lock breaks the old one in two pieces,*
1355	* so we have to use the second new lock.
1356	*/
1357	left = new_fl2;
1358	new_fl2 = NULL;
1359	locks_copy_lock(left, right);
1360	locks_insert_lock_ctx(fl: &left->c, before: &fl->c.flc_list);
1361	}
1362	right->fl_start = request->fl_end + `1`;
1363	locks_wake_up_blocks(blocker: &right->c);
1364	}
1365	if (left) {
1366	left->fl_end = request->fl_start - `1`;
1367	locks_wake_up_blocks(blocker: &left->c);
1368	}
1369	out:
1370	trace_posix_lock_inode(inode, fl: request, ret: error);
1371	spin_unlock(lock: &ctx->flc_lock);
1372	percpu_up_read(sem: &file_rwsem);
1373	/*
1374	* Free any unused locks.
1375	*/
1376	if (new_fl)
1377	locks_free_lock(new_fl);
1378	if (new_fl2)
1379	locks_free_lock(new_fl2);
1380	locks_dispose_list(dispose: &dispose);
1381
1382	return error;
1383	}
1384
1385	/**
1386	* posix_lock_file - Apply a POSIX-style lock to a file
1387	* @filp: The file to apply the lock to
1388	* @fl: The lock to be applied
1389	* @conflock: Place to return a copy of the conflicting lock, if found.
1390	*
1391	* Add a POSIX style lock to a file.
1392	* We merge adjacent & overlapping locks whenever possible.
1393	* POSIX locks are sorted by owner task, then by starting address
1394	*
1395	* Note that if called with an FL_EXISTS argument, the caller may determine
1396	* whether or not a lock was successfully freed by testing the return
1397	* value for -ENOENT.
1398	*/
1399	int posix_lock_file(struct file filp, struct* file_lock *fl,
1400	struct file_lock *conflock)
1401	{
1402	return posix_lock_inode(inode: file_inode(f: filp), request: fl, conflock);
1403	}
1404	EXPORT_SYMBOL(posix_lock_file);
1405
1406	/**
1407	* posix_lock_inode_wait - Apply a POSIX-style lock to a file
1408	* @inode: inode of file to which lock request should be applied
1409	* @fl: The lock to be applied
1410	*
1411	* Apply a POSIX style lock request to an inode.
1412	*/
1413	static int posix_lock_inode_wait(struct inode inode, struct* file_lock *fl)
1414	{
1415	int error;
1416	might_sleep ();
1417	for (;;) {
1418	error = posix_lock_inode(inode, request: fl, NULL);
1419	if (error != FILE_LOCK_DEFERRED)
1420	break;
1421	error = wait_event_interruptible(fl->c.flc_wait,
1422	list_empty(&fl->c.flc_blocked_member));
1423	if (error)
1424	break;
1425	}
1426	locks_delete_block(fl);
1427	return error;
1428	}
1429
1430	static void lease_clear_pending(struct file_lease fl, int* arg)
1431	{
1432	switch (arg) {
1433	case F_UNLCK:
1434	fl->c.flc_flags &= ~FL_UNLOCK_PENDING;
1435	fallthrough;
1436	case F_RDLCK:
1437	fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING;
1438	}
1439	}
1440
1441	/ We already had a lease on this file; just change its type /
1442	int lease_modify(struct file_lease fl, int* arg, struct list_head *dispose)
1443	{
1444	int error = assign_type(flc: &fl->c, type: arg);
1445
1446	if (error)
1447	return error;
1448	lease_clear_pending(fl, arg);
1449	locks_wake_up_blocks(blocker: &fl->c);
1450	if (arg == F_UNLCK) {
1451	struct file *filp = fl->c.flc_file;
1452
1453	f_delown(filp);
1454	file_f_owner(file: filp)->signum = `0`;
1455	fasync_helper(`0`, fl->c.flc_file, `0`, &fl->fl_fasync);
1456	if (fl->fl_fasync != NULL) {
1457	printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1458	fl->fl_fasync = NULL;
1459	}
1460	locks_delete_lock_ctx(fl: &fl->c, dispose);
1461	}
1462	return `0`;
1463	}
1464	EXPORT_SYMBOL(lease_modify);
1465
1466	static bool past_time(unsigned long then)
1467	{
1468	if (!then)
1469	/ 0 is a special value meaning "this never expires": /
1470	return false;
1471	return time_after(jiffies, then);
1472	}
1473
1474	static void time_out_leases(struct inode inode, struct* list_head *dispose)
1475	{
1476	struct file_lock_context *ctx = inode->i_flctx;
1477	struct file_lease fl, tmp;
1478
1479	lockdep_assert_held(&ctx->flc_lock);
1480
1481	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
1482	trace_time_out_leases(inode, fl);
1483	if (past_time(then: fl->fl_downgrade_time))
1484	lease_modify(fl, F_RDLCK, dispose);
1485	if (past_time(then: fl->fl_break_time))
1486	lease_modify(fl, F_UNLCK, dispose);
1487	}
1488	}
1489
1490	static bool leases_conflict(struct file_lock_core lc, struct* file_lock_core *bc)
1491	{
1492	bool rc;
1493	struct file_lease *lease = file_lease(flc: lc);
1494	struct file_lease *breaker = file_lease(flc: bc);
1495
1496	if (lease->fl_lmops->lm_breaker_owns_lease
1497	&& lease->fl_lmops->lm_breaker_owns_lease(lease))
1498	return false;
1499	if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) {
1500	rc = false;
1501	goto trace;
1502	}
1503	if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) {
1504	rc = false;
1505	goto trace;
1506	}
1507
1508	rc = locks_conflict(caller_flc: bc, sys_flc: lc);
1509	trace:
1510	trace_leases_conflict(conflict: rc, lease, breaker);
1511	return rc;
1512	}
1513
1514	static bool
1515	any_leases_conflict(struct inode inode, struct* file_lease *breaker)
1516	{
1517	struct file_lock_context *ctx = inode->i_flctx;
1518	struct file_lock_core *flc;
1519
1520	lockdep_assert_held(&ctx->flc_lock);
1521
1522	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
1523	if (leases_conflict(lc: flc, bc: &breaker->c))
1524	return true;
1525	}
1526	return false;
1527	}
1528
1529	/**
1530	* __break_lease - revoke all outstanding leases on file
1531	* @inode: the inode of the file to return
1532	* @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1533	* break all leases
1534	* @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1535	* only delegations
1536	*
1537	* break_lease (inlined for speed) has checked there already is at least
1538	* some kind of lock (maybe a lease) on this file. Leases are broken on
1539	* a call to open() or truncate(). This function can sleep unless you
1540	* specified %O_NONBLOCK to your open().
1541	*/
1542	int __break_lease(struct inode inode, unsigned* int mode, unsigned int type)
1543	{
1544	int error = `0`;
1545	struct file_lock_context *ctx;
1546	struct file_lease new_fl, fl, *tmp;
1547	unsigned long break_time;
1548	int want_write = (mode & O_ACCMODE) != O_RDONLY;
1549	LIST_HEAD(dispose);
1550
1551	new_fl = lease_alloc(NULL, type: want_write ? F_WRLCK : F_RDLCK);
1552	if (IS_ERR(ptr: new_fl))
1553	return PTR_ERR(ptr: new_fl);
1554	new_fl->c.flc_flags = type;
1555
1556	/ typically we will check that ctx is non-NULL before calling /
1557	ctx = locks_inode_context(inode);
1558	if (!ctx) {
1559	WARN_ON_ONCE(`1`);
1560	goto free_lock;
1561	}
1562
1563	percpu_down_read(sem: &file_rwsem);
1564	spin_lock(lock: &ctx->flc_lock);
1565
1566	time_out_leases(inode, dispose: &dispose);
1567
1568	if (!any_leases_conflict(inode, breaker: new_fl))
1569	goto out;
1570
1571	break_time = `0`;
1572	if (lease_break_time > `0`) {
1573	break_time = jiffies + lease_break_time * HZ;
1574	if (break_time == `0`)
1575	break_time++; / so that 0 means no break time /
1576	}
1577
1578	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
1579	if (!leases_conflict(lc: &fl->c, bc: &new_fl->c))
1580	continue;
1581	if (want_write) {
1582	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
1583	continue;
1584	fl->c.flc_flags \|= FL_UNLOCK_PENDING;
1585	fl->fl_break_time = break_time;
1586	} else {
1587	if (lease_breaking(fl))
1588	continue;
1589	fl->c.flc_flags \|= FL_DOWNGRADE_PENDING;
1590	fl->fl_downgrade_time = break_time;
1591	}
1592	if (fl->fl_lmops->lm_break(fl))
1593	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1594	}
1595
1596	if (list_empty(head: &ctx->flc_lease))
1597	goto out;
1598
1599	if (mode & O_NONBLOCK) {
1600	trace_break_lease_noblock(inode, fl: new_fl);
1601	error = -EWOULDBLOCK;
1602	goto out;
1603	}
1604
1605	restart:
1606	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
1607	break_time = fl->fl_break_time;
1608	if (break_time != `0`)
1609	break_time -= jiffies;
1610	if (break_time == `0`)
1611	break_time++;
1612	locks_insert_block(blocker: &fl->c, waiter: &new_fl->c, conflict: leases_conflict);
1613	trace_break_lease_block(inode, fl: new_fl);
1614	spin_unlock(lock: &ctx->flc_lock);
1615	percpu_up_read(sem: &file_rwsem);
1616
1617	locks_dispose_list(dispose: &dispose);
1618	error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
1619	list_empty(&new_fl->c.flc_blocked_member),
1620	break_time);
1621
1622	percpu_down_read(sem: &file_rwsem);
1623	spin_lock(lock: &ctx->flc_lock);
1624	trace_break_lease_unblock(inode, fl: new_fl);
1625	__locks_delete_block(waiter: &new_fl->c);
1626	if (error >= `0`) {
1627	/*
1628	* Wait for the next conflicting lease that has not been
1629	* broken yet
1630	*/
1631	if (error == `0`)
1632	time_out_leases(inode, dispose: &dispose);
1633	if (any_leases_conflict(inode, breaker: new_fl))
1634	goto restart;
1635	error = `0`;
1636	}
1637	out:
1638	spin_unlock(lock: &ctx->flc_lock);
1639	percpu_up_read(sem: &file_rwsem);
1640	locks_dispose_list(dispose: &dispose);
1641	free_lock:
1642	locks_free_lease(new_fl);
1643	return error;
1644	}
1645	EXPORT_SYMBOL(__break_lease);
1646
1647	/**
1648	* lease_get_mtime - update modified time of an inode with exclusive lease
1649	* @inode: the inode
1650	* @time: pointer to a timespec which contains the last modified time
1651	*
1652	* This is to force NFS clients to flush their caches for files with
1653	* exclusive leases. The justification is that if someone has an
1654	* exclusive lease, then they could be modifying it.
1655	*/
1656	void lease_get_mtime(struct inode inode, struct* timespec64 *time)
1657	{
1658	bool has_lease = false;
1659	struct file_lock_context *ctx;
1660	struct file_lock_core *flc;
1661
1662	ctx = locks_inode_context(inode);
1663	if (ctx && !list_empty_careful(head: &ctx->flc_lease)) {
1664	spin_lock(lock: &ctx->flc_lock);
1665	flc = list_first_entry_or_null(&ctx->flc_lease,
1666	struct file_lock_core, flc_list);
1667	if (flc && flc->flc_type == F_WRLCK)
1668	has_lease = true;
1669	spin_unlock(lock: &ctx->flc_lock);
1670	}
1671
1672	if (has_lease)
1673	*time = current_time(inode);
1674	}
1675	EXPORT_SYMBOL(lease_get_mtime);
1676
1677	/**
1678	* fcntl_getlease - Enquire what lease is currently active
1679	* @filp: the file
1680	*
1681	* The value returned by this function will be one of
1682	* (if no lease break is pending):
1683	*
1684	* %F_RDLCK to indicate a shared lease is held.
1685	*
1686	* %F_WRLCK to indicate an exclusive lease is held.
1687	*
1688	* %F_UNLCK to indicate no lease is held.
1689	*
1690	* (if a lease break is pending):
1691	*
1692	* %F_RDLCK to indicate an exclusive lease needs to be
1693	* changed to a shared lease (or removed).
1694	*
1695	* %F_UNLCK to indicate the lease needs to be removed.
1696	*
1697	* XXX: sfr & willy disagree over whether F_INPROGRESS
1698	* should be returned to userspace.
1699	*/
1700	int fcntl_getlease(struct file *filp)
1701	{
1702	struct file_lease *fl;
1703	struct inode *inode = file_inode(f: filp);
1704	struct file_lock_context *ctx;
1705	int type = F_UNLCK;
1706	LIST_HEAD(dispose);
1707
1708	ctx = locks_inode_context(inode);
1709	if (ctx && !list_empty_careful(head: &ctx->flc_lease)) {
1710	percpu_down_read(sem: &file_rwsem);
1711	spin_lock(lock: &ctx->flc_lock);
1712	time_out_leases(inode, dispose: &dispose);
1713	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1714	if (fl->c.flc_file != filp)
1715	continue;
1716	type = target_leasetype(fl);
1717	break;
1718	}
1719	spin_unlock(lock: &ctx->flc_lock);
1720	percpu_up_read(sem: &file_rwsem);
1721
1722	locks_dispose_list(dispose: &dispose);
1723	}
1724	return type;
1725	}
1726
1727	/**
1728	* check_conflicting_open - see if the given file points to an inode that has
1729	* an existing open that would conflict with the
1730	* desired lease.
1731	* @filp: file to check
1732	* @arg: type of lease that we're trying to acquire
1733	* @flags: current lock flags
1734	*
1735	* Check to see if there's an existing open fd on this file that would
1736	* conflict with the lease we're trying to set.
1737	*/
1738	static int
1739	check_conflicting_open(struct file filp, const* int arg, int flags)
1740	{
1741	struct inode *inode = file_inode(f: filp);
1742	int self_wcount = `0`, self_rcount = `0`;
1743
1744	if (flags & FL_LAYOUT)
1745	return `0`;
1746	if (flags & FL_DELEG)
1747	/ We leave these checks to the caller /
1748	return `0`;
1749
1750	if (arg == F_RDLCK)
1751	return inode_is_open_for_write(inode) ? -EAGAIN : `0`;
1752	else if (arg != F_WRLCK)
1753	return `0`;
1754
1755	/*
1756	* Make sure that only read/write count is from lease requestor.
1757	* Note that this will result in denying write leases when i_writecount
1758	* is negative, which is what we want. (We shouldn't grant write leases
1759	* on files open for execution.)
1760	*/
1761	if (filp->f_mode & FMODE_WRITE)
1762	self_wcount = `1`;
1763	else if (filp->f_mode & FMODE_READ)
1764	self_rcount = `1`;
1765
1766	if (atomic_read(v: &inode->i_writecount) != self_wcount \|\|
1767	atomic_read(v: &inode->i_readcount) != self_rcount)
1768	return -EAGAIN;
1769
1770	return `0`;
1771	}
1772
1773	static int
1774	generic_add_lease(struct file filp, int* arg, struct file_lease *flp, void* **priv)
1775	{
1776	struct file_lease fl, my_fl = NULL, *lease;
1777	struct inode *inode = file_inode(f: filp);
1778	struct file_lock_context *ctx;
1779	bool is_deleg = (*flp)->c.flc_flags & FL_DELEG;
1780	int error;
1781	LIST_HEAD(dispose);
1782
1783	lease = *flp;
1784	trace_generic_add_lease(inode, fl: lease);
1785
1786	error = file_f_owner_allocate(file: filp);
1787	if (error)
1788	return error;
1789
1790	/ Note that arg is never F_UNLCK here /
1791	ctx = locks_get_lock_context(inode, type: arg);
1792	if (!ctx)
1793	return -ENOMEM;
1794
1795	/*
1796	* In the delegation case we need mutual exclusion with
1797	* a number of operations that take the i_mutex. We trylock
1798	* because delegations are an optional optimization, and if
1799	* there's some chance of a conflict--we'd rather not
1800	* bother, maybe that's a sign this just isn't a good file to
1801	* hand out a delegation on.
1802	*/
1803	if (is_deleg && !inode_trylock(inode))
1804	return -EAGAIN;
1805
1806	percpu_down_read(sem: &file_rwsem);
1807	spin_lock(lock: &ctx->flc_lock);
1808	time_out_leases(inode, dispose: &dispose);
1809	error = check_conflicting_open(filp, arg, flags: lease->c.flc_flags);
1810	if (error)
1811	goto out;
1812
1813	/*
1814	* At this point, we know that if there is an exclusive
1815	* lease on this file, then we hold it on this filp
1816	* (otherwise our open of this file would have blocked).
1817	* And if we are trying to acquire an exclusive lease,
1818	* then the file is not open by anyone (including us)
1819	* except for this filp.
1820	*/
1821	error = -EAGAIN;
1822	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1823	if (fl->c.flc_file == filp &&
1824	fl->c.flc_owner == lease->c.flc_owner) {
1825	my_fl = fl;
1826	continue;
1827	}
1828
1829	/*
1830	* No exclusive leases if someone else has a lease on
1831	* this file:
1832	*/
1833	if (arg == F_WRLCK)
1834	goto out;
1835	/*
1836	* Modifying our existing lease is OK, but no getting a
1837	* new lease if someone else is opening for write:
1838	*/
1839	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
1840	goto out;
1841	}
1842
1843	if (my_fl != NULL) {
1844	lease = my_fl;
1845	error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1846	if (error)
1847	goto out;
1848	goto out_setup;
1849	}
1850
1851	error = -EINVAL;
1852	if (!leases_enable)
1853	goto out;
1854
1855	locks_insert_lock_ctx(fl: &lease->c, before: &ctx->flc_lease);
1856	/*
1857	* The check in break_lease() is lockless. It's possible for another
1858	* open to race in after we did the earlier check for a conflicting
1859	* open but before the lease was inserted. Check again for a
1860	* conflicting open and cancel the lease if there is one.
1861	*
1862	* We also add a barrier here to ensure that the insertion of the lock
1863	* precedes these checks.
1864	*/
1865	smp_mb();
1866	error = check_conflicting_open(filp, arg, flags: lease->c.flc_flags);
1867	if (error) {
1868	locks_unlink_lock_ctx(fl: &lease->c);
1869	goto out;
1870	}
1871
1872	out_setup:
1873	if (lease->fl_lmops->lm_setup)
1874	lease->fl_lmops->lm_setup(lease, priv);
1875	out:
1876	spin_unlock(lock: &ctx->flc_lock);
1877	percpu_up_read(sem: &file_rwsem);
1878	locks_dispose_list(dispose: &dispose);
1879	if (is_deleg)
1880	inode_unlock(inode);
1881	if (!error && !my_fl)
1882	*flp = NULL;
1883	return error;
1884	}
1885
1886	static int generic_delete_lease(struct file filp, void* *owner)
1887	{
1888	int error = -EAGAIN;
1889	struct file_lease fl, victim = NULL;
1890	struct inode *inode = file_inode(f: filp);
1891	struct file_lock_context *ctx;
1892	LIST_HEAD(dispose);
1893
1894	ctx = locks_inode_context(inode);
1895	if (!ctx) {
1896	trace_generic_delete_lease(inode, NULL);
1897	return error;
1898	}
1899
1900	percpu_down_read(sem: &file_rwsem);
1901	spin_lock(lock: &ctx->flc_lock);
1902	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1903	if (fl->c.flc_file == filp &&
1904	fl->c.flc_owner == owner) {
1905	victim = fl;
1906	break;
1907	}
1908	}
1909	trace_generic_delete_lease(inode, fl: victim);
1910	if (victim)
1911	error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1912	spin_unlock(lock: &ctx->flc_lock);
1913	percpu_up_read(sem: &file_rwsem);
1914	locks_dispose_list(dispose: &dispose);
1915	return error;
1916	}
1917
1918	/**
1919	* generic_setlease - sets a lease on an open file
1920	* @filp: file pointer
1921	* @arg: type of lease to obtain
1922	* @flp: input - file_lock to use, output - file_lock inserted
1923	* @priv: private data for lm_setup (may be NULL if lm_setup
1924	* doesn't require it)
1925	*
1926	* The (input) flp->fl_lmops->lm_break function is required
1927	* by break_lease().
1928	*/
1929	int generic_setlease(struct file filp, int* arg, struct file_lease **flp,
1930	void **priv)
1931	{
1932	switch (arg) {
1933	case F_UNLCK:
1934	return generic_delete_lease(filp, owner: *priv);
1935	case F_RDLCK:
1936	case F_WRLCK:
1937	if (!(*flp)->fl_lmops->lm_break) {
1938	WARN_ON_ONCE(`1`);
1939	return -ENOLCK;
1940	}
1941
1942	return generic_add_lease(filp, arg, flp, priv);
1943	default:
1944	return -EINVAL;
1945	}
1946	}
1947	EXPORT_SYMBOL(generic_setlease);
1948
1949	/*
1950	* Kernel subsystems can register to be notified on any attempt to set
1951	* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1952	* to close files that it may have cached when there is an attempt to set a
1953	* conflicting lease.
1954	*/
1955	static struct srcu_notifier_head lease_notifier_chain;
1956
1957	static inline void
1958	lease_notifier_chain_init(void)
1959	{
1960	srcu_init_notifier_head(nh: &lease_notifier_chain);
1961	}
1962
1963	static inline void
1964	setlease_notifier(int arg, struct file_lease *lease)
1965	{
1966	if (arg != F_UNLCK)
1967	srcu_notifier_call_chain(nh: &lease_notifier_chain, val: arg, v: lease);
1968	}
1969
1970	int lease_register_notifier(struct notifier_block *nb)
1971	{
1972	return srcu_notifier_chain_register(nh: &lease_notifier_chain, nb);
1973	}
1974	EXPORT_SYMBOL_GPL(lease_register_notifier);
1975
1976	void lease_unregister_notifier(struct notifier_block *nb)
1977	{
1978	srcu_notifier_chain_unregister(nh: &lease_notifier_chain, nb);
1979	}
1980	EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1981
1982
1983	int
1984	kernel_setlease(struct file filp, int* arg, struct file_lease *lease, void* **priv)
1985	{
1986	if (lease)
1987	setlease_notifier(arg, lease: *lease);
1988	if (filp->f_op->setlease)
1989	return filp->f_op->setlease(filp, arg, lease, priv);
1990	else
1991	return generic_setlease(filp, arg, lease, priv);
1992	}
1993	EXPORT_SYMBOL_GPL(kernel_setlease);
1994
1995	/**
1996	* vfs_setlease - sets a lease on an open file
1997	* @filp: file pointer
1998	* @arg: type of lease to obtain
1999	* @lease: file_lock to use when adding a lease
2000	* @priv: private info for lm_setup when adding a lease (may be
2001	* NULL if lm_setup doesn't require it)
2002	*
2003	* Call this to establish a lease on the file. The "lease" argument is not
2004	* used for F_UNLCK requests and may be NULL. For commands that set or alter
2005	* an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
2006	* set; if not, this function will return -ENOLCK (and generate a scary-looking
2007	* stack trace).
2008	*
2009	* The "priv" pointer is passed directly to the lm_setup function as-is. It
2010	* may be NULL if the lm_setup operation doesn't require it.
2011	*/
2012	int
2013	vfs_setlease(struct file filp, int* arg, struct file_lease *lease, void* **priv)
2014	{
2015	struct inode *inode = file_inode(f: filp);
2016	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap: file_mnt_idmap(file: filp), inode);
2017	int error;
2018
2019	if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
2020	return -EACCES;
2021	if (!S_ISREG(inode->i_mode))
2022	return -EINVAL;
2023	error = security_file_lock(file: filp, cmd: arg);
2024	if (error)
2025	return error;
2026	return kernel_setlease(filp, arg, lease, priv);
2027	}
2028	EXPORT_SYMBOL_GPL(vfs_setlease);
2029
2030	static int do_fcntl_add_lease(unsigned int fd, struct file filp, int* arg)
2031	{
2032	struct file_lease *fl;
2033	struct fasync_struct *new;
2034	int error;
2035
2036	fl = lease_alloc(filp, type: arg);
2037	if (IS_ERR(ptr: fl))
2038	return PTR_ERR(ptr: fl);
2039
2040	new = fasync_alloc();
2041	if (!new) {
2042	locks_free_lease(fl);
2043	return -ENOMEM;
2044	}
2045	new->fa_fd = fd;
2046
2047	error = vfs_setlease(filp, arg, &fl, (void **)&new);
2048	if (fl)
2049	locks_free_lease(fl);
2050	if (new)
2051	fasync_free(new);
2052	return error;
2053	}
2054
2055	/**
2056	* fcntl_setlease - sets a lease on an open file
2057	* @fd: open file descriptor
2058	* @filp: file pointer
2059	* @arg: type of lease to obtain
2060	*
2061	* Call this fcntl to establish a lease on the file.
2062	* Note that you also need to call %F_SETSIG to
2063	* receive a signal when the lease is broken.
2064	*/
2065	int fcntl_setlease(unsigned int fd, struct file filp, int* arg)
2066	{
2067	if (arg == F_UNLCK)
2068	return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2069	return do_fcntl_add_lease(fd, filp, arg);
2070	}
2071
2072	/**
2073	* flock_lock_inode_wait - Apply a FLOCK-style lock to a file
2074	* @inode: inode of the file to apply to
2075	* @fl: The lock to be applied
2076	*
2077	* Apply a FLOCK style lock request to an inode.
2078	*/
2079	static int flock_lock_inode_wait(struct inode inode, struct* file_lock *fl)
2080	{
2081	int error;
2082	might_sleep();
2083	for (;;) {
2084	error = flock_lock_inode(inode, request: fl);
2085	if (error != FILE_LOCK_DEFERRED)
2086	break;
2087	error = wait_event_interruptible(fl->c.flc_wait,
2088	list_empty(&fl->c.flc_blocked_member));
2089	if (error)
2090	break;
2091	}
2092	locks_delete_block(fl);
2093	return error;
2094	}
2095
2096	/**
2097	* locks_lock_inode_wait - Apply a lock to an inode
2098	* @inode: inode of the file to apply to
2099	* @fl: The lock to be applied
2100	*
2101	* Apply a POSIX or FLOCK style lock request to an inode.
2102	*/
2103	int locks_lock_inode_wait(struct inode inode, struct* file_lock *fl)
2104	{
2105	int res = `0`;
2106	switch (fl->c.flc_flags & (FL_POSIX\|FL_FLOCK)) {
2107	case FL_POSIX:
2108	res = posix_lock_inode_wait(inode, fl);
2109	break;
2110	case FL_FLOCK:
2111	res = flock_lock_inode_wait(inode, fl);
2112	break;
2113	default:
2114	BUG();
2115	}
2116	return res;
2117	}
2118	EXPORT_SYMBOL(locks_lock_inode_wait);
2119
2120	/**
2121	* sys_flock: - flock() system call.
2122	* @fd: the file descriptor to lock.
2123	* @cmd: the type of lock to apply.
2124	*
2125	* Apply a %FL_FLOCK style lock to an open file descriptor.
2126	* The @cmd can be one of:
2127	*
2128	* - %LOCK_SH -- a shared lock.
2129	* - %LOCK_EX -- an exclusive lock.
2130	* - %LOCK_UN -- remove an existing lock.
2131	* - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
2132	*
2133	* %LOCK_MAND support has been removed from the kernel.
2134	*/
2135	SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2136	{
2137	int can_sleep, error, type;
2138	struct file_lock fl;
2139
2140	/*
2141	* LOCK_MAND locks were broken for a long time in that they never
2142	* conflicted with one another and didn't prevent any sort of open,
2143	* read or write activity.
2144	*
2145	* Just ignore these requests now, to preserve legacy behavior, but
2146	* throw a warning to let people know that they don't actually work.
2147	*/
2148	if (cmd & LOCK_MAND) {
2149	pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
2150	return `0`;
2151	}
2152
2153	type = flock_translate_cmd(cmd: cmd & ~LOCK_NB);
2154	if (type < `0`)
2155	return type;
2156
2157	CLASS(fd, f)(fd);
2158	if (fd_empty(f))
2159	return -EBADF;
2160
2161	if (type != F_UNLCK && !(fd_file(f)->f_mode & (FMODE_READ \| FMODE_WRITE)))
2162	return -EBADF;
2163
2164	flock_make_lock(fd_file(f), fl: &fl, type);
2165
2166	error = security_file_lock(fd_file(f), cmd: fl.c.flc_type);
2167	if (error)
2168	return error;
2169
2170	can_sleep = !(cmd & LOCK_NB);
2171	if (can_sleep)
2172	fl.c.flc_flags \|= FL_SLEEP;
2173
2174	if (fd_file(f)->f_op->flock)
2175	error = fd_file(f)->f_op->flock(fd_file(f),
2176	(can_sleep) ? F_SETLKW : F_SETLK,
2177	&fl);
2178	else
2179	error = locks_lock_file_wait(fd_file(f), fl: &fl);
2180
2181	locks_release_private(&fl);
2182	return error;
2183	}
2184
2185	/**
2186	* vfs_test_lock - test file byte range lock
2187	* @filp: The file to test lock for
2188	* @fl: The lock to test; also used to hold result
2189	*
2190	* Returns -ERRNO on failure. Indicates presence of conflicting lock by
2191	* setting conf->fl_type to something other than F_UNLCK.
2192	*/
2193	int vfs_test_lock(struct file filp, struct* file_lock *fl)
2194	{
2195	WARN_ON_ONCE(filp != fl->c.flc_file);
2196	if (filp->f_op->lock)
2197	return filp->f_op->lock(filp, F_GETLK, fl);
2198	posix_test_lock(filp, fl);
2199	return `0`;
2200	}
2201	EXPORT_SYMBOL_GPL(vfs_test_lock);
2202
2203	/**
2204	* locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2205	* @fl: The file_lock who's fl_pid should be translated
2206	* @ns: The namespace into which the pid should be translated
2207	*
2208	* Used to translate a fl_pid into a namespace virtual pid number
2209	*/
2210	static pid_t locks_translate_pid(struct file_lock_core fl, struct* pid_namespace *ns)
2211	{
2212	pid_t vnr;
2213	struct pid *pid;
2214
2215	if (fl->flc_flags & FL_OFDLCK)
2216	return -`1`;
2217
2218	/ Remote locks report a negative pid value /
2219	if (fl->flc_pid <= `0`)
2220	return fl->flc_pid;
2221
2222	/*
2223	* If the flock owner process is dead and its pid has been already
2224	* freed, the translation below won't work, but we still want to show
2225	* flock owner pid number in init pidns.
2226	*/
2227	if (ns == &init_pid_ns)
2228	return (pid_t) fl->flc_pid;
2229
2230	rcu_read_lock();
2231	pid = find_pid_ns(nr: fl->flc_pid, ns: &init_pid_ns);
2232	vnr = pid_nr_ns(pid, ns);
2233	rcu_read_unlock();
2234	return vnr;
2235	}
2236
2237	static int posix_lock_to_flock(struct flock flock, struct* file_lock *fl)
2238	{
2239	flock->l_pid = locks_translate_pid(fl: &fl->c, ns: task_active_pid_ns(current));
2240	#if BITS_PER_LONG == 32
2241	/*
2242	* Make sure we can represent the posix lock via
2243	* legacy 32bit flock.
2244	*/
2245	if (fl->fl_start > OFFT_OFFSET_MAX)
2246	return -EOVERFLOW;
2247	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2248	return -EOVERFLOW;
2249	#endif
2250	flock->l_start = fl->fl_start;
2251	flock->l_len = fl->fl_end == OFFSET_MAX ? `0` :
2252	fl->fl_end - fl->fl_start + `1`;
2253	flock->l_whence = `0`;
2254	flock->l_type = fl->c.flc_type;
2255	return `0`;
2256	}
2257
2258	#if BITS_PER_LONG == 32
2259	static void posix_lock_to_flock64(struct flock64 flock, struct* file_lock *fl)
2260	{
2261	flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
2262	flock->l_start = fl->fl_start;
2263	flock->l_len = fl->fl_end == OFFSET_MAX ? `0` :
2264	fl->fl_end - fl->fl_start + `1`;
2265	flock->l_whence = `0`;
2266	flock->l_type = fl->c.flc_type;
2267	}
2268	#endif
2269
2270	/ Report the first existing lock that would conflict with l.*
2271	* This implements the F_GETLK command of fcntl().
2272	*/
2273	int fcntl_getlk(struct file filp, unsigned* int cmd, struct flock *flock)
2274	{
2275	struct file_lock *fl;
2276	int error;
2277
2278	fl = locks_alloc_lock();
2279	if (fl == NULL)
2280	return -ENOMEM;
2281	error = -EINVAL;
2282	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2283	&& flock->l_type != F_WRLCK)
2284	goto out;
2285
2286	error = flock_to_posix_lock(filp, fl, l: flock);
2287	if (error)
2288	goto out;
2289
2290	if (cmd == F_OFD_GETLK) {
2291	error = -EINVAL;
2292	if (flock->l_pid != `0`)
2293	goto out;
2294
2295	fl->c.flc_flags \|= FL_OFDLCK;
2296	fl->c.flc_owner = filp;
2297	}
2298
2299	error = vfs_test_lock(filp, fl);
2300	if (error)
2301	goto out;
2302
2303	flock->l_type = fl->c.flc_type;
2304	if (fl->c.flc_type != F_UNLCK) {
2305	error = posix_lock_to_flock(flock, fl);
2306	if (error)
2307	goto out;
2308	}
2309	out:
2310	locks_free_lock(fl);
2311	return error;
2312	}
2313
2314	/**
2315	* vfs_lock_file - file byte range lock
2316	* @filp: The file to apply the lock to
2317	* @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2318	* @fl: The lock to be applied
2319	* @conf: Place to return a copy of the conflicting lock, if found.
2320	*
2321	* A caller that doesn't care about the conflicting lock may pass NULL
2322	* as the final argument.
2323	*
2324	* If the filesystem defines a private ->lock() method, then @conf will
2325	* be left unchanged; so a caller that cares should initialize it to
2326	* some acceptable default.
2327	*
2328	* To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2329	* locks, the ->lock() interface may return asynchronously, before the lock has
2330	* been granted or denied by the underlying filesystem, if (and only if)
2331	* lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations
2332	* flags need to be set.
2333	*
2334	* Callers expecting ->lock() to return asynchronously will only use F_SETLK,
2335	* not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a
2336	* blocking lock. When ->lock() does return asynchronously, it must return
2337	* FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes.
2338	* If the request is for non-blocking lock the file system should return
2339	* FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2340	* with the result. If the request timed out the callback routine will return a
2341	* nonzero return code and the file system should release the lock. The file
2342	* system is also responsible to keep a corresponding posix lock when it
2343	* grants a lock so the VFS can find out which locks are locally held and do
2344	* the correct lock cleanup when required.
2345	* The underlying filesystem must not drop the kernel lock or call
2346	* ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2347	* return code.
2348	*/
2349	int vfs_lock_file(struct file filp, unsigned* int cmd, struct file_lock fl, struct* file_lock *conf)
2350	{
2351	WARN_ON_ONCE(filp != fl->c.flc_file);
2352	if (filp->f_op->lock)
2353	return filp->f_op->lock(filp, cmd, fl);
2354	else
2355	return posix_lock_file(filp, fl, conf);
2356	}
2357	EXPORT_SYMBOL_GPL(vfs_lock_file);
2358
2359	static int do_lock_file_wait(struct file filp, unsigned* int cmd,
2360	struct file_lock *fl)
2361	{
2362	int error;
2363
2364	error = security_file_lock(file: filp, cmd: fl->c.flc_type);
2365	if (error)
2366	return error;
2367
2368	for (;;) {
2369	error = vfs_lock_file(filp, cmd, fl, NULL);
2370	if (error != FILE_LOCK_DEFERRED)
2371	break;
2372	error = wait_event_interruptible(fl->c.flc_wait,
2373	list_empty(&fl->c.flc_blocked_member));
2374	if (error)
2375	break;
2376	}
2377	locks_delete_block(fl);
2378
2379	return error;
2380	}
2381
2382	/ Ensure that fl->fl_file has compatible f_mode for F_SETLK calls /
2383	static int
2384	check_fmode_for_setlk(struct file_lock *fl)
2385	{
2386	switch (fl->c.flc_type) {
2387	case F_RDLCK:
2388	if (!(fl->c.flc_file->f_mode & FMODE_READ))
2389	return -EBADF;
2390	break;
2391	case F_WRLCK:
2392	if (!(fl->c.flc_file->f_mode & FMODE_WRITE))
2393	return -EBADF;
2394	}
2395	return `0`;
2396	}
2397
2398	/ Apply the lock described by l to an open file descriptor.*
2399	* This implements both the F_SETLK and F_SETLKW commands of fcntl().
2400	*/
2401	int fcntl_setlk(unsigned int fd, struct file filp, unsigned* int cmd,
2402	struct flock *flock)
2403	{
2404	struct file_lock *file_lock = locks_alloc_lock();
2405	struct inode *inode = file_inode(f: filp);
2406	struct file *f;
2407	int error;
2408
2409	if (file_lock == NULL)
2410	return -ENOLCK;
2411
2412	error = flock_to_posix_lock(filp, fl: file_lock, l: flock);
2413	if (error)
2414	goto out;
2415
2416	error = check_fmode_for_setlk(fl: file_lock);
2417	if (error)
2418	goto out;
2419
2420	/*
2421	* If the cmd is requesting file-private locks, then set the
2422	* FL_OFDLCK flag and override the owner.
2423	*/
2424	switch (cmd) {
2425	case F_OFD_SETLK:
2426	error = -EINVAL;
2427	if (flock->l_pid != `0`)
2428	goto out;
2429
2430	cmd = F_SETLK;
2431	file_lock->c.flc_flags \|= FL_OFDLCK;
2432	file_lock->c.flc_owner = filp;
2433	break;
2434	case F_OFD_SETLKW:
2435	error = -EINVAL;
2436	if (flock->l_pid != `0`)
2437	goto out;
2438
2439	cmd = F_SETLKW;
2440	file_lock->c.flc_flags \|= FL_OFDLCK;
2441	file_lock->c.flc_owner = filp;
2442	fallthrough;
2443	case F_SETLKW:
2444	file_lock->c.flc_flags \|= FL_SLEEP;
2445	}
2446
2447	error = do_lock_file_wait(filp, cmd, fl: file_lock);
2448
2449	/*
2450	* Detect close/fcntl races and recover by zapping all POSIX locks
2451	* associated with this file and our files_struct, just like on
2452	* filp_flush(). There is no need to do that when we're
2453	* unlocking though, or for OFD locks.
2454	*/
2455	if (!error && file_lock->c.flc_type != F_UNLCK &&
2456	!(file_lock->c.flc_flags & FL_OFDLCK)) {
2457	struct files_struct *files = current->files;
2458	/*
2459	* We need that spin_lock here - it prevents reordering between
2460	* update of i_flctx->flc_posix and check for it done in
2461	* close(). rcu_read_lock() wouldn't do.
2462	*/
2463	spin_lock(lock: &files->file_lock);
2464	f = files_lookup_fd_locked(files, fd);
2465	spin_unlock(lock: &files->file_lock);
2466	if (f != filp) {
2467	locks_remove_posix(filp, files);
2468	error = -EBADF;
2469	}
2470	}
2471	out:
2472	trace_fcntl_setlk(inode, fl: file_lock, ret: error);
2473	locks_free_lock(file_lock);
2474	return error;
2475	}
2476
2477	#if BITS_PER_LONG == 32
2478	/ Report the first existing lock that would conflict with l.*
2479	* This implements the F_GETLK command of fcntl().
2480	*/
2481	int fcntl_getlk64(struct file filp, unsigned* int cmd, struct flock64 *flock)
2482	{
2483	struct file_lock *fl;
2484	int error;
2485
2486	fl = locks_alloc_lock();
2487	if (fl == NULL)
2488	return -ENOMEM;
2489
2490	error = -EINVAL;
2491	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2492	&& flock->l_type != F_WRLCK)
2493	goto out;
2494
2495	error = flock64_to_posix_lock(filp, fl, flock);
2496	if (error)
2497	goto out;
2498
2499	if (cmd == F_OFD_GETLK) {
2500	error = -EINVAL;
2501	if (flock->l_pid != `0`)
2502	goto out;
2503
2504	fl->c.flc_flags \|= FL_OFDLCK;
2505	fl->c.flc_owner = filp;
2506	}
2507
2508	error = vfs_test_lock(filp, fl);
2509	if (error)
2510	goto out;
2511
2512	flock->l_type = fl->c.flc_type;
2513	if (fl->c.flc_type != F_UNLCK)
2514	posix_lock_to_flock64(flock, fl);
2515
2516	out:
2517	locks_free_lock(fl);
2518	return error;
2519	}
2520
2521	/ Apply the lock described by l to an open file descriptor.*
2522	* This implements both the F_SETLK and F_SETLKW commands of fcntl().
2523	*/
2524	int fcntl_setlk64(unsigned int fd, struct file filp, unsigned* int cmd,
2525	struct flock64 *flock)
2526	{
2527	struct file_lock *file_lock = locks_alloc_lock();
2528	struct file *f;
2529	int error;
2530
2531	if (file_lock == NULL)
2532	return -ENOLCK;
2533
2534	error = flock64_to_posix_lock(filp, file_lock, flock);
2535	if (error)
2536	goto out;
2537
2538	error = check_fmode_for_setlk(file_lock);
2539	if (error)
2540	goto out;
2541
2542	/*
2543	* If the cmd is requesting file-private locks, then set the
2544	* FL_OFDLCK flag and override the owner.
2545	*/
2546	switch (cmd) {
2547	case F_OFD_SETLK:
2548	error = -EINVAL;
2549	if (flock->l_pid != `0`)
2550	goto out;
2551
2552	cmd = F_SETLK64;
2553	file_lock->c.flc_flags \|= FL_OFDLCK;
2554	file_lock->c.flc_owner = filp;
2555	break;
2556	case F_OFD_SETLKW:
2557	error = -EINVAL;
2558	if (flock->l_pid != `0`)
2559	goto out;
2560
2561	cmd = F_SETLKW64;
2562	file_lock->c.flc_flags \|= FL_OFDLCK;
2563	file_lock->c.flc_owner = filp;
2564	fallthrough;
2565	case F_SETLKW64:
2566	file_lock->c.flc_flags \|= FL_SLEEP;
2567	}
2568
2569	error = do_lock_file_wait(filp, cmd, file_lock);
2570
2571	/*
2572	* Detect close/fcntl races and recover by zapping all POSIX locks
2573	* associated with this file and our files_struct, just like on
2574	* filp_flush(). There is no need to do that when we're
2575	* unlocking though, or for OFD locks.
2576	*/
2577	if (!error && file_lock->c.flc_type != F_UNLCK &&
2578	!(file_lock->c.flc_flags & FL_OFDLCK)) {
2579	struct files_struct *files = current->files;
2580	/*
2581	* We need that spin_lock here - it prevents reordering between
2582	* update of i_flctx->flc_posix and check for it done in
2583	* close(). rcu_read_lock() wouldn't do.
2584	*/
2585	spin_lock(&files->file_lock);
2586	f = files_lookup_fd_locked(files, fd);
2587	spin_unlock(&files->file_lock);
2588	if (f != filp) {
2589	locks_remove_posix(filp, files);
2590	error = -EBADF;
2591	}
2592	}
2593	out:
2594	locks_free_lock(file_lock);
2595	return error;
2596	}
2597	#endif /* BITS_PER_LONG == 32 */
2598
2599	/*
2600	* This function is called when the file is being removed
2601	* from the task's fd array. POSIX locks belonging to this task
2602	* are deleted at this time.
2603	*/
2604	void locks_remove_posix(struct file *filp, fl_owner_t owner)
2605	{
2606	int error;
2607	struct inode *inode = file_inode(f: filp);
2608	struct file_lock lock;
2609	struct file_lock_context *ctx;
2610
2611	/*
2612	* If there are no locks held on this file, we don't need to call
2613	* posix_lock_file(). Another process could be setting a lock on this
2614	* file at the same time, but we wouldn't remove that lock anyway.
2615	*/
2616	ctx = locks_inode_context(inode);
2617	if (!ctx \|\| list_empty(head: &ctx->flc_posix))
2618	return;
2619
2620	locks_init_lock(&lock);
2621	lock.c.flc_type = F_UNLCK;
2622	lock.c.flc_flags = FL_POSIX \| FL_CLOSE;
2623	lock.fl_start = `0`;
2624	lock.fl_end = OFFSET_MAX;
2625	lock.c.flc_owner = owner;
2626	lock.c.flc_pid = current->tgid;
2627	lock.c.flc_file = filp;
2628	lock.fl_ops = NULL;
2629	lock.fl_lmops = NULL;
2630
2631	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2632
2633	if (lock.fl_ops && lock.fl_ops->fl_release_private)
2634	lock.fl_ops->fl_release_private(&lock);
2635	trace_locks_remove_posix(inode, fl: &lock, ret: error);
2636	}
2637	EXPORT_SYMBOL(locks_remove_posix);
2638
2639	/ The i_flctx must be valid when calling into here /
2640	static void
2641	locks_remove_flock(struct file filp, struct* file_lock_context *flctx)
2642	{
2643	struct file_lock fl;
2644	struct inode *inode = file_inode(f: filp);
2645
2646	if (list_empty(head: &flctx->flc_flock))
2647	return;
2648
2649	flock_make_lock(filp, fl: &fl, F_UNLCK);
2650	fl.c.flc_flags \|= FL_CLOSE;
2651
2652	if (filp->f_op->flock)
2653	filp->f_op->flock(filp, F_SETLKW, &fl);
2654	else
2655	flock_lock_inode(inode, request: &fl);
2656
2657	if (fl.fl_ops && fl.fl_ops->fl_release_private)
2658	fl.fl_ops->fl_release_private(&fl);
2659	}
2660
2661	/ The i_flctx must be valid when calling into here /
2662	static void
2663	locks_remove_lease(struct file filp, struct* file_lock_context *ctx)
2664	{
2665	struct file_lease fl, tmp;
2666	LIST_HEAD(dispose);
2667
2668	if (list_empty(head: &ctx->flc_lease))
2669	return;
2670
2671	percpu_down_read(sem: &file_rwsem);
2672	spin_lock(lock: &ctx->flc_lock);
2673	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list)
2674	if (filp == fl->c.flc_file)
2675	lease_modify(fl, F_UNLCK, &dispose);
2676	spin_unlock(lock: &ctx->flc_lock);
2677	percpu_up_read(sem: &file_rwsem);
2678
2679	locks_dispose_list(dispose: &dispose);
2680	}
2681
2682	/*
2683	* This function is called on the last close of an open file.
2684	*/
2685	void locks_remove_file(struct file *filp)
2686	{
2687	struct file_lock_context *ctx;
2688
2689	ctx = locks_inode_context(inode: file_inode(f: filp));
2690	if (!ctx)
2691	return;
2692
2693	/ remove any OFD locks /
2694	locks_remove_posix(filp, filp);
2695
2696	/ remove flock locks /
2697	locks_remove_flock(filp, flctx: ctx);
2698
2699	/ remove any leases /
2700	locks_remove_lease(filp, ctx);
2701
2702	spin_lock(lock: &ctx->flc_lock);
2703	locks_check_ctx_file_list(filp, list: &ctx->flc_posix, list_type: "POSIX");
2704	locks_check_ctx_file_list(filp, list: &ctx->flc_flock, list_type: "FLOCK");
2705	locks_check_ctx_file_list(filp, list: &ctx->flc_lease, list_type: "LEASE");
2706	spin_unlock(lock: &ctx->flc_lock);
2707	}
2708
2709	/**
2710	* vfs_cancel_lock - file byte range unblock lock
2711	* @filp: The file to apply the unblock to
2712	* @fl: The lock to be unblocked
2713	*
2714	* Used by lock managers to cancel blocked requests
2715	*/
2716	int vfs_cancel_lock(struct file filp, struct* file_lock *fl)
2717	{
2718	WARN_ON_ONCE(filp != fl->c.flc_file);
2719	if (filp->f_op->lock)
2720	return filp->f_op->lock(filp, F_CANCELLK, fl);
2721	return `0`;
2722	}
2723	EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2724
2725	/**
2726	* vfs_inode_has_locks - are any file locks held on @inode?
2727	* @inode: inode to check for locks
2728	*
2729	* Return true if there are any FL_POSIX or FL_FLOCK locks currently
2730	* set on @inode.
2731	*/
2732	bool vfs_inode_has_locks(struct inode *inode)
2733	{
2734	struct file_lock_context *ctx;
2735	bool ret;
2736
2737	ctx = locks_inode_context(inode);
2738	if (!ctx)
2739	return false;
2740
2741	spin_lock(lock: &ctx->flc_lock);
2742	ret = !list_empty(head: &ctx->flc_posix) \|\| !list_empty(head: &ctx->flc_flock);
2743	spin_unlock(lock: &ctx->flc_lock);
2744	return ret;
2745	}
2746	EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
2747
2748	#ifdef CONFIG_PROC_FS
2749	#include <linux/proc_fs.h>
2750	#include <linux/seq_file.h>
2751
2752	struct locks_iterator {
2753	int li_cpu;
2754	loff_t li_pos;
2755	};
2756
2757	static void lock_get_status(struct seq_file f, struct* file_lock_core *flc,
2758	loff_t id, char pfx, int* repeat)
2759	{
2760	struct inode *inode = NULL;
2761	unsigned int pid;
2762	struct pid_namespace *proc_pidns = proc_pid_ns(sb: file_inode(f: f->file)->i_sb);
2763	int type = flc->flc_type;
2764	struct file_lock *fl = file_lock(flc);
2765
2766	pid = locks_translate_pid(fl: flc, ns: proc_pidns);
2767
2768	/*
2769	* If lock owner is dead (and pid is freed) or not visible in current
2770	* pidns, zero is shown as a pid value. Check lock info from
2771	* init_pid_ns to get saved lock pid value.
2772	*/
2773	if (flc->flc_file != NULL)
2774	inode = file_inode(f: flc->flc_file);
2775
2776	seq_printf(m: f, fmt: "%lld: ", id);
2777
2778	if (repeat)
2779	seq_printf(m: f, fmt: "%s", repeat - `1` + (int*)strlen(pfx), pfx);
2780
2781	if (flc->flc_flags & FL_POSIX) {
2782	if (flc->flc_flags & FL_ACCESS)
2783	seq_puts(m: f, s: "ACCESS");
2784	else if (flc->flc_flags & FL_OFDLCK)
2785	seq_puts(m: f, s: "OFDLCK");
2786	else
2787	seq_puts(m: f, s: "POSIX ");
2788
2789	seq_printf(m: f, fmt: " %s ",
2790	(inode == NULL) ? "NOINODE" : "ADVISORY ");
2791	} else if (flc->flc_flags & FL_FLOCK) {
2792	seq_puts(m: f, s: "FLOCK ADVISORY ");
2793	} else if (flc->flc_flags & (FL_LEASE\|FL_DELEG\|FL_LAYOUT)) {
2794	struct file_lease *lease = file_lease(flc);
2795
2796	type = target_leasetype(fl: lease);
2797
2798	if (flc->flc_flags & FL_DELEG)
2799	seq_puts(m: f, s: "DELEG ");
2800	else
2801	seq_puts(m: f, s: "LEASE ");
2802
2803	if (lease_breaking(fl: lease))
2804	seq_puts(m: f, s: "BREAKING ");
2805	else if (flc->flc_file)
2806	seq_puts(m: f, s: "ACTIVE ");
2807	else
2808	seq_puts(m: f, s: "BREAKER ");
2809	} else {
2810	seq_puts(m: f, s: "UNKNOWN UNKNOWN ");
2811	}
2812
2813	seq_printf(m: f, fmt: "%s ", (type == F_WRLCK) ? "WRITE" :
2814	(type == F_RDLCK) ? "READ" : "UNLCK");
2815	if (inode) {
2816	/ userspace relies on this representation of dev_t /
2817	seq_printf(m: f, fmt: "%d %02x:%02x:%lu ", pid,
2818	MAJOR(inode->i_sb->s_dev),
2819	MINOR(inode->i_sb->s_dev), inode->i_ino);
2820	} else {
2821	seq_printf(m: f, fmt: "%d <none>:0 ", pid);
2822	}
2823	if (flc->flc_flags & FL_POSIX) {
2824	if (fl->fl_end == OFFSET_MAX)
2825	seq_printf(m: f, fmt: "%Ld EOF\n", fl->fl_start);
2826	else
2827	seq_printf(m: f, fmt: "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2828	} else {
2829	seq_puts(m: f, s: "0 EOF\n");
2830	}
2831	}
2832
2833	static struct file_lock_core get_next_blocked_member(struct* file_lock_core *node)
2834	{
2835	struct file_lock_core *tmp;
2836
2837	/ NULL node or root node /
2838	if (node == NULL \|\| node->flc_blocker == NULL)
2839	return NULL;
2840
2841	/ Next member in the linked list could be itself /
2842	tmp = list_next_entry(node, flc_blocked_member);
2843	if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests,
2844	flc_blocked_member)
2845	\|\| tmp == node) {
2846	return NULL;
2847	}
2848
2849	return tmp;
2850	}
2851
2852	static int locks_show(struct seq_file f, void* *v)
2853	{
2854	struct locks_iterator *iter = f->private;
2855	struct file_lock_core cur, tmp;
2856	struct pid_namespace *proc_pidns = proc_pid_ns(sb: file_inode(f: f->file)->i_sb);
2857	int level = `0`;
2858
2859	cur = hlist_entry(v, struct file_lock_core, flc_link);
2860
2861	if (locks_translate_pid(fl: cur, ns: proc_pidns) == `0`)
2862	return `0`;
2863
2864	/ View this crossed linked list as a binary tree, the first member of flc_blocked_requests*
2865	* is the left child of current node, the next silibing in flc_blocked_member is the
2866	* right child, we can alse get the parent of current node from flc_blocker, so this
2867	* question becomes traversal of a binary tree
2868	*/
2869	while (cur != NULL) {
2870	if (level)
2871	lock_get_status(f, flc: cur, id: iter->li_pos, pfx: "-> ", repeat: level);
2872	else
2873	lock_get_status(f, flc: cur, id: iter->li_pos, pfx: "", repeat: level);
2874
2875	if (!list_empty(head: &cur->flc_blocked_requests)) {
2876	/ Turn left /
2877	cur = list_first_entry_or_null(&cur->flc_blocked_requests,
2878	struct file_lock_core,
2879	flc_blocked_member);
2880	level++;
2881	} else {
2882	/ Turn right /
2883	tmp = get_next_blocked_member(node: cur);
2884	/ Fall back to parent node /
2885	while (tmp == NULL && cur->flc_blocker != NULL) {
2886	cur = cur->flc_blocker;
2887	level--;
2888	tmp = get_next_blocked_member(node: cur);
2889	}
2890	cur = tmp;
2891	}
2892	}
2893
2894	return `0`;
2895	}
2896
2897	static void __show_fd_locks(struct seq_file *f,
2898	struct list_head head, int* *id,
2899	struct file filp, struct* files_struct *files)
2900	{
2901	struct file_lock_core *fl;
2902
2903	list_for_each_entry(fl, head, flc_list) {
2904
2905	if (filp != fl->flc_file)
2906	continue;
2907	if (fl->flc_owner != files && fl->flc_owner != filp)
2908	continue;
2909
2910	(*id)++;
2911	seq_puts(m: f, s: "lock:\t");
2912	lock_get_status(f, flc: fl, id: *id, pfx: "", repeat: `0`);
2913	}
2914	}
2915
2916	void show_fd_locks(struct seq_file *f,
2917	struct file filp, struct* files_struct *files)
2918	{
2919	struct inode *inode = file_inode(f: filp);
2920	struct file_lock_context *ctx;
2921	int id = `0`;
2922
2923	ctx = locks_inode_context(inode);
2924	if (!ctx)
2925	return;
2926
2927	spin_lock(lock: &ctx->flc_lock);
2928	__show_fd_locks(f, head: &ctx->flc_flock, id: &id, filp, files);
2929	__show_fd_locks(f, head: &ctx->flc_posix, id: &id, filp, files);
2930	__show_fd_locks(f, head: &ctx->flc_lease, id: &id, filp, files);
2931	spin_unlock(lock: &ctx->flc_lock);
2932	}
2933
2934	static void locks_start(struct* seq_file f, loff_t pos)
2935	__acquires(&blocked_lock_lock)
2936	{
2937	struct locks_iterator *iter = f->private;
2938
2939	iter->li_pos = *pos + `1`;
2940	percpu_down_write(&file_rwsem);
2941	spin_lock(lock: &blocked_lock_lock);
2942	return seq_hlist_start_percpu(head: &file_lock_list.hlist, cpu: &iter->li_cpu, pos: *pos);
2943	}
2944
2945	static void locks_next(struct* seq_file f, void* v, loff_t pos)
2946	{
2947	struct locks_iterator *iter = f->private;
2948
2949	++iter->li_pos;
2950	return seq_hlist_next_percpu(v, head: &file_lock_list.hlist, cpu: &iter->li_cpu, pos);
2951	}
2952
2953	static void locks_stop(struct seq_file f, void* *v)
2954	__releases(&blocked_lock_lock)
2955	{
2956	spin_unlock(lock: &blocked_lock_lock);
2957	percpu_up_write(&file_rwsem);
2958	}
2959
2960	static const struct seq_operations locks_seq_operations = {
2961	.start = locks_start,
2962	.next = locks_next,
2963	.stop = locks_stop,
2964	.show = locks_show,
2965	};
2966
2967	static int __init proc_locks_init(void)
2968	{
2969	proc_create_seq_private(name: "locks", mode: `0`, NULL, ops: &locks_seq_operations,
2970	state_size: sizeof(struct locks_iterator), NULL);
2971	return `0`;
2972	}
2973	fs_initcall(proc_locks_init);
2974	#endif
2975
2976	static int __init filelock_init(void)
2977	{
2978	int i;
2979
2980	flctx_cache = kmem_cache_create("file_lock_ctx",
2981	sizeof(struct file_lock_context), `0`, SLAB_PANIC, NULL);
2982
2983	filelock_cache = kmem_cache_create("file_lock_cache",
2984	sizeof(struct file_lock), `0`, SLAB_PANIC, NULL);
2985
2986	filelease_cache = kmem_cache_create("file_lease_cache",
2987	sizeof(struct file_lease), `0`, SLAB_PANIC, NULL);
2988
2989	for_each_possible_cpu(i) {
2990	struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2991
2992	spin_lock_init(&fll->lock);
2993	INIT_HLIST_HEAD(&fll->hlist);
2994	}
2995
2996	lease_notifier_chain_init();
2997	return `0`;
2998	}
2999	core_initcall(filelock_init);
3000

source code of linux/fs/locks.c