locking.c source code [linux/fs/btrfs/locking.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2008 Oracle. All rights reserved.
4	*/
5
6	#include <linux/sched.h>
7	#include <linux/pagemap.h>
8	#include <linux/spinlock.h>
9	#include <linux/page-flags.h>
10	#include <asm/bug.h>
11	#include <trace/events/btrfs.h>
12	#include "misc.h"
13	#include "ctree.h"
14	#include "extent_io.h"
15	#include "locking.h"
16
17	/*
18	* Lockdep class keys for extent_buffer->lock's in this root. For a given
19	* eb, the lockdep key is determined by the btrfs_root it belongs to and
20	* the level the eb occupies in the tree.
21	*
22	* Different roots are used for different purposes and may nest inside each
23	* other and they require separate keysets. As lockdep keys should be
24	* static, assign keysets according to the purpose of the root as indicated
25	* by btrfs_root->root_key.objectid. This ensures that all special purpose
26	* roots have separate keysets.
27	*
28	* Lock-nesting across peer nodes is always done with the immediate parent
29	* node locked thus preventing deadlock. As lockdep doesn't know this, use
30	* subclass to avoid triggering lockdep warning in such cases.
31	*
32	* The key is set by the readpage_end_io_hook after the buffer has passed
33	* csum validation but before the pages are unlocked. It is also set by
34	* btrfs_init_new_buffer on freshly allocated blocks.
35	*
36	* We also add a check to make sure the highest level of the tree is the
37	* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
38	* needs update as well.
39	*/
40	#ifdef CONFIG_DEBUG_LOCK_ALLOC
41	#if BTRFS_MAX_LEVEL != 8
42	#error
43	#endif
44
45	#define DEFINE_LEVEL(stem, level) \
46	.names[level] = "btrfs-" stem "-0" #level,
47
48	#define DEFINE_NAME(stem) \
49	DEFINE_LEVEL(stem, 0) \
50	DEFINE_LEVEL(stem, 1) \
51	DEFINE_LEVEL(stem, 2) \
52	DEFINE_LEVEL(stem, 3) \
53	DEFINE_LEVEL(stem, 4) \
54	DEFINE_LEVEL(stem, 5) \
55	DEFINE_LEVEL(stem, 6) \
56	DEFINE_LEVEL(stem, 7)
57
58	static struct btrfs_lockdep_keyset {
59	u64 id; / root objectid /
60	/ Longest entry: btrfs-block-group-00 /
61	char names[BTRFS_MAX_LEVEL][`24`];
62	struct lock_class_key keys[BTRFS_MAX_LEVEL];
63	} btrfs_lockdep_keysets[] = {
64	{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
65	{ .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
66	{ .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
67	{ .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
68	{ .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
69	{ .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
70	{ .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
71	{ .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
72	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
73	{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
74	{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
75	{ .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
76	{ .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") },
77	{ .id = `0`, DEFINE_NAME("tree") },
78	};
79
80	#undef DEFINE_LEVEL
81	#undef DEFINE_NAME
82
83	void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer eb, int* level)
84	{
85	struct btrfs_lockdep_keyset *ks;
86
87	ASSERT(level < ARRAY_SIZE(ks->keys));
88
89	/ Find the matching keyset, id 0 is the default entry /
90	for (ks = btrfs_lockdep_keysets; ks->id; ks++)
91	if (ks->id == objectid)
92	break;
93
94	lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
95	}
96
97	void btrfs_maybe_reset_lockdep_class(struct btrfs_root root, struct* extent_buffer *eb)
98	{
99	if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
100	btrfs_set_buffer_lockdep_class(objectid: root->root_key.objectid,
101	eb, level: btrfs_header_level(eb));
102	}
103
104	#endif
105
106	#ifdef CONFIG_BTRFS_DEBUG
107	static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner)
108	{
109	eb->lock_owner = owner;
110	}
111	#else
112	static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { }
113	#endif
114
115	/*
116	* Extent buffer locking
117	* =====================
118	*
119	* We use a rw_semaphore for tree locking, and the semantics are exactly the
120	* same:
121	*
122	* - reader/writer exclusion
123	* - writer/writer exclusion
124	* - reader/reader sharing
125	* - try-lock semantics for readers and writers
126	*
127	* The rwsem implementation does opportunistic spinning which reduces number of
128	* times the locking task needs to sleep.
129	*/
130
131	/*
132	* __btrfs_tree_read_lock - lock extent buffer for read
133	* @eb: the eb to be locked
134	* @nest: the nesting level to be used for lockdep
135	*
136	* This takes the read lock on the extent buffer, using the specified nesting
137	* level for lockdep purposes.
138	*/
139	void __btrfs_tree_read_lock(struct extent_buffer eb, enum* btrfs_lock_nesting nest)
140	{
141	u64 start_ns = `0`;
142
143	if (trace_btrfs_tree_read_lock_enabled())
144	start_ns = ktime_get_ns();
145
146	down_read_nested(sem: &eb->lock, subclass: nest);
147	trace_btrfs_tree_read_lock(eb, start_ns);
148	}
149
150	void btrfs_tree_read_lock(struct extent_buffer *eb)
151	{
152	__btrfs_tree_read_lock(eb, nest: BTRFS_NESTING_NORMAL);
153	}
154
155	/*
156	* Try-lock for read.
157	*
158	* Return 1 if the rwlock has been taken, 0 otherwise
159	*/
160	int btrfs_try_tree_read_lock(struct extent_buffer *eb)
161	{
162	if (down_read_trylock(sem: &eb->lock)) {
163	trace_btrfs_try_tree_read_lock(eb);
164	return `1`;
165	}
166	return `0`;
167	}
168
169	/*
170	* Try-lock for write.
171	*
172	* Return 1 if the rwlock has been taken, 0 otherwise
173	*/
174	int btrfs_try_tree_write_lock(struct extent_buffer *eb)
175	{
176	if (down_write_trylock(sem: &eb->lock)) {
177	btrfs_set_eb_lock_owner(eb, current->pid);
178	trace_btrfs_try_tree_write_lock(eb);
179	return `1`;
180	}
181	return `0`;
182	}
183
184	/*
185	* Release read lock.
186	*/
187	void btrfs_tree_read_unlock(struct extent_buffer *eb)
188	{
189	trace_btrfs_tree_read_unlock(eb);
190	up_read(sem: &eb->lock);
191	}
192
193	/*
194	* Lock eb for write.
195	*
196	* @eb: the eb to lock
197	* @nest: the nesting to use for the lock
198	*
199	* Returns with the eb->lock write locked.
200	*/
201	void __btrfs_tree_lock(struct extent_buffer eb, enum* btrfs_lock_nesting nest)
202	__acquires(&eb->lock)
203	{
204	u64 start_ns = `0`;
205
206	if (trace_btrfs_tree_lock_enabled())
207	start_ns = ktime_get_ns();
208
209	down_write_nested(sem: &eb->lock, subclass: nest);
210	btrfs_set_eb_lock_owner(eb, current->pid);
211	trace_btrfs_tree_lock(eb, start_ns);
212	}
213
214	void btrfs_tree_lock(struct extent_buffer *eb)
215	{
216	__btrfs_tree_lock(eb, nest: BTRFS_NESTING_NORMAL);
217	}
218
219	/*
220	* Release the write lock.
221	*/
222	void btrfs_tree_unlock(struct extent_buffer *eb)
223	{
224	trace_btrfs_tree_unlock(eb);
225	btrfs_set_eb_lock_owner(eb, owner: `0`);
226	up_write(sem: &eb->lock);
227	}
228
229	/*
230	* This releases any locks held in the path starting at level and going all the
231	* way up to the root.
232	*
233	* btrfs_search_slot will keep the lock held on higher nodes in a few corner
234	* cases, such as COW of the block at slot zero in the node. This ignores
235	* those rules, and it should only be called when there are no more updates to
236	* be done higher up in the tree.
237	*/
238	void btrfs_unlock_up_safe(struct btrfs_path path, int* level)
239	{
240	int i;
241
242	if (path->keep_locks)
243	return;
244
245	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
246	if (!path->nodes[i])
247	continue;
248	if (!path->locks[i])
249	continue;
250	btrfs_tree_unlock_rw(eb: path->nodes[i], rw: path->locks[i]);
251	path->locks[i] = `0`;
252	}
253	}
254
255	/*
256	* Loop around taking references on and locking the root node of the tree until
257	* we end up with a lock on the root node.
258	*
259	* Return: root extent buffer with write lock held
260	*/
261	struct extent_buffer btrfs_lock_root_node(struct* btrfs_root *root)
262	{
263	struct extent_buffer *eb;
264
265	while (`1`) {
266	eb = btrfs_root_node(root);
267
268	btrfs_maybe_reset_lockdep_class(root, eb);
269	btrfs_tree_lock(eb);
270	if (eb == root->node)
271	break;
272	btrfs_tree_unlock(eb);
273	free_extent_buffer(eb);
274	}
275	return eb;
276	}
277
278	/*
279	* Loop around taking references on and locking the root node of the tree until
280	* we end up with a lock on the root node.
281	*
282	* Return: root extent buffer with read lock held
283	*/
284	struct extent_buffer btrfs_read_lock_root_node(struct* btrfs_root *root)
285	{
286	struct extent_buffer *eb;
287
288	while (`1`) {
289	eb = btrfs_root_node(root);
290
291	btrfs_maybe_reset_lockdep_class(root, eb);
292	btrfs_tree_read_lock(eb);
293	if (eb == root->node)
294	break;
295	btrfs_tree_read_unlock(eb);
296	free_extent_buffer(eb);
297	}
298	return eb;
299	}
300
301	/*
302	* Loop around taking references on and locking the root node of the tree in
303	* nowait mode until we end up with a lock on the root node or returning to
304	* avoid blocking.
305	*
306	* Return: root extent buffer with read lock held or -EAGAIN.
307	*/
308	struct extent_buffer btrfs_try_read_lock_root_node(struct* btrfs_root *root)
309	{
310	struct extent_buffer *eb;
311
312	while (`1`) {
313	eb = btrfs_root_node(root);
314	if (!btrfs_try_tree_read_lock(eb)) {
315	free_extent_buffer(eb);
316	return ERR_PTR(error: -EAGAIN);
317	}
318	if (eb == root->node)
319	break;
320	btrfs_tree_read_unlock(eb);
321	free_extent_buffer(eb);
322	}
323	return eb;
324	}
325
326	/*
327	* DREW locks
328	* ==========
329	*
330	* DREW stands for double-reader-writer-exclusion lock. It's used in situation
331	* where you want to provide A-B exclusion but not AA or BB.
332	*
333	* Currently implementation gives more priority to reader. If a reader and a
334	* writer both race to acquire their respective sides of the lock the writer
335	* would yield its lock as soon as it detects a concurrent reader. Additionally
336	* if there are pending readers no new writers would be allowed to come in and
337	* acquire the lock.
338	*/
339
340	void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
341	{
342	atomic_set(v: &lock->readers, i: `0`);
343	atomic_set(v: &lock->writers, i: `0`);
344	init_waitqueue_head(&lock->pending_readers);
345	init_waitqueue_head(&lock->pending_writers);
346	}
347
348	/ Return true if acquisition is successful, false otherwise /
349	bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
350	{
351	if (atomic_read(v: &lock->readers))
352	return false;
353
354	atomic_inc(v: &lock->writers);
355
356	/ Ensure writers count is updated before we check for pending readers /
357	smp_mb__after_atomic();
358	if (atomic_read(v: &lock->readers)) {
359	btrfs_drew_write_unlock(lock);
360	return false;
361	}
362
363	return true;
364	}
365
366	void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
367	{
368	while (true) {
369	if (btrfs_drew_try_write_lock(lock))
370	return;
371	wait_event(lock->pending_writers, !atomic_read(&lock->readers));
372	}
373	}
374
375	void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
376	{
377	atomic_dec(v: &lock->writers);
378	cond_wake_up(wq: &lock->pending_readers);
379	}
380
381	void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
382	{
383	atomic_inc(v: &lock->readers);
384
385	/*
386	* Ensure the pending reader count is perceieved BEFORE this reader
387	* goes to sleep in case of active writers. This guarantees new writers
388	* won't be allowed and that the current reader will be woken up when
389	* the last active writer finishes its jobs.
390	*/
391	smp_mb__after_atomic();
392
393	wait_event(lock->pending_readers, atomic_read(&lock->writers) == `0`);
394	}
395
396	void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
397	{
398	/*
399	* atomic_dec_and_test implies a full barrier, so woken up writers
400	* are guaranteed to see the decrement
401	*/
402	if (atomic_dec_and_test(v: &lock->readers))
403	wake_up(&lock->pending_writers);
404	}
405

source code of linux/fs/btrfs/locking.c