discard.c source code [linux/fs/btrfs/discard.c]

1	// SPDX-License-Identifier: GPL-2.0
2
3	#include <linux/jiffies.h>
4	#include <linux/kernel.h>
5	#include <linux/ktime.h>
6	#include <linux/list.h>
7	#include <linux/math64.h>
8	#include <linux/sizes.h>
9	#include <linux/workqueue.h>
10	#include "ctree.h"
11	#include "block-group.h"
12	#include "discard.h"
13	#include "free-space-cache.h"
14	#include "fs.h"
15
16	/*
17	* This contains the logic to handle async discard.
18	*
19	* Async discard manages trimming of free space outside of transaction commit.
20	* Discarding is done by managing the block_groups on a LRU list based on free
21	* space recency. Two passes are used to first prioritize discarding extents
22	* and then allow for trimming in the bitmap the best opportunity to coalesce.
23	* The block_groups are maintained on multiple lists to allow for multiple
24	* passes with different discard filter requirements. A delayed work item is
25	* used to manage discarding with timeout determined by a max of the delay
26	* incurred by the iops rate limit, the byte rate limit, and the max delay of
27	* BTRFS_DISCARD_MAX_DELAY.
28	*
29	* Note, this only keeps track of block_groups that are explicitly for data.
30	* Mixed block_groups are not supported.
31	*
32	* The first list is special to manage discarding of fully free block groups.
33	* This is necessary because we issue a final trim for a full free block group
34	* after forgetting it. When a block group becomes unused, instead of directly
35	* being added to the unused_bgs list, we add it to this first list. Then
36	* from there, if it becomes fully discarded, we place it onto the unused_bgs
37	* list.
38	*
39	* The in-memory free space cache serves as the backing state for discard.
40	* Consequently this means there is no persistence. We opt to load all the
41	* block groups in as not discarded, so the mount case degenerates to the
42	* crashing case.
43	*
44	* As the free space cache uses bitmaps, there exists a tradeoff between
45	* ease/efficiency for find_free_extent() and the accuracy of discard state.
46	* Here we opt to let untrimmed regions merge with everything while only letting
47	* trimmed regions merge with other trimmed regions. This can cause
48	* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
49	* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
50	* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
51	* this resets the state and we will retry trimming the whole bitmap. This is a
52	* tradeoff between discard state accuracy and the cost of accounting.
53	*/
54
55	/ This is an initial delay to give some chance for block reuse /
56	#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
57	#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
58
59	#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
60	#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
61	#define BTRFS_DISCARD_MAX_IOPS (1000U)
62
63	/ Monotonically decreasing minimum length filters after index 0 /
64	static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
65	`0`,
66	BTRFS_ASYNC_DISCARD_MAX_FILTER,
67	BTRFS_ASYNC_DISCARD_MIN_FILTER
68	};
69
70	static struct list_head get_discard_list(struct* btrfs_discard_ctl *discard_ctl,
71	const struct btrfs_block_group *block_group)
72	{
73	return &discard_ctl->discard_list[block_group->discard_index];
74	}
75
76	/*
77	* Determine if async discard should be running.
78	*
79	* @discard_ctl: discard control
80	*
81	* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82	*/
83	static bool btrfs_run_discard_work(const struct btrfs_discard_ctl *discard_ctl)
84	{
85	struct btrfs_fs_info *fs_info = container_of(discard_ctl,
86	struct btrfs_fs_info,
87	discard_ctl);
88
89	return (!(fs_info->sb->s_flags & SB_RDONLY) &&
90	test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
91	}
92
93	static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94	struct btrfs_block_group *block_group)
95	{
96	lockdep_assert_held(&discard_ctl->lock);
97
98	if (list_empty(head: &block_group->discard_list) \|\|
99	block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
100	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
101	block_group->discard_index = BTRFS_DISCARD_INDEX_START;
102	block_group->discard_eligible_time = (ktime_get_ns() +
103	BTRFS_DISCARD_DELAY);
104	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
105	}
106	if (list_empty(head: &block_group->discard_list))
107	btrfs_get_block_group(cache: block_group);
108
109	list_move_tail(list: &block_group->discard_list,
110	head: get_discard_list(discard_ctl, block_group));
111	}
112
113	static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
114	struct btrfs_block_group *block_group)
115	{
116	if (!btrfs_is_block_group_data_only(block_group))
117	return;
118
119	if (!btrfs_run_discard_work(discard_ctl))
120	return;
121
122	spin_lock(lock: &discard_ctl->lock);
123	__add_to_discard_list(discard_ctl, block_group);
124	spin_unlock(lock: &discard_ctl->lock);
125	}
126
127	static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
128	struct btrfs_block_group *block_group)
129	{
130	bool queued;
131
132	spin_lock(lock: &discard_ctl->lock);
133
134	queued = !list_empty(head: &block_group->discard_list);
135
136	if (!btrfs_run_discard_work(discard_ctl)) {
137	spin_unlock(lock: &discard_ctl->lock);
138	return;
139	}
140
141	list_del_init(entry: &block_group->discard_list);
142
143	block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
144	block_group->discard_eligible_time = (ktime_get_ns() +
145	BTRFS_DISCARD_UNUSED_DELAY);
146	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
147	if (!queued)
148	btrfs_get_block_group(cache: block_group);
149	list_add_tail(new: &block_group->discard_list,
150	head: &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
151
152	spin_unlock(lock: &discard_ctl->lock);
153	}
154
155	static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
156	struct btrfs_block_group *block_group)
157	{
158	bool running = false;
159	bool queued = false;
160
161	spin_lock(lock: &discard_ctl->lock);
162
163	if (block_group == discard_ctl->block_group) {
164	running = true;
165	discard_ctl->block_group = NULL;
166	}
167
168	block_group->discard_eligible_time = `0`;
169	queued = !list_empty(head: &block_group->discard_list);
170	list_del_init(entry: &block_group->discard_list);
171	if (queued)
172	btrfs_put_block_group(cache: block_group);
173
174	spin_unlock(lock: &discard_ctl->lock);
175
176	return running;
177	}
178
179	/*
180	* Find block_group that's up next for discarding.
181	*
182	* @discard_ctl: discard control
183	* @now: current time
184	*
185	* Iterate over the discard lists to find the next block_group up for
186	* discarding checking the discard_eligible_time of block_group.
187	*/
188	static struct btrfs_block_group *find_next_block_group(
189	struct btrfs_discard_ctl *discard_ctl,
190	u64 now)
191	{
192	struct btrfs_block_group ret_block_group = NULL, block_group;
193	int i;
194
195	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++) {
196	struct list_head *discard_list = &discard_ctl->discard_list[i];
197
198	if (!list_empty(head: discard_list)) {
199	block_group = list_first_entry(discard_list,
200	struct btrfs_block_group,
201	discard_list);
202
203	if (!ret_block_group)
204	ret_block_group = block_group;
205
206	if (ret_block_group->discard_eligible_time < now)
207	break;
208
209	if (ret_block_group->discard_eligible_time >
210	block_group->discard_eligible_time)
211	ret_block_group = block_group;
212	}
213	}
214
215	return ret_block_group;
216	}
217
218	/*
219	* Look up next block group and set it for use.
220	*
221	* @discard_ctl: discard control
222	* @discard_state: the discard_state of the block_group after state management
223	* @discard_index: the discard_index of the block_group after state management
224	* @now: time when discard was invoked, in ns
225	*
226	* Wrap find_next_block_group() and set the block_group to be in use.
227	* @discard_state's control flow is managed here. Variables related to
228	* @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
229	* and @discard_index are remembered as it may change while we're discarding,
230	* but we want the discard to execute in the context determined here.
231	*/
232	static struct btrfs_block_group *peek_discard_list(
233	struct btrfs_discard_ctl *discard_ctl,
234	enum btrfs_discard_state *discard_state,
235	int *discard_index, u64 now)
236	{
237	struct btrfs_block_group *block_group;
238
239	spin_lock(lock: &discard_ctl->lock);
240	again:
241	block_group = find_next_block_group(discard_ctl, now);
242
243	if (block_group && now >= block_group->discard_eligible_time) {
244	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
245	block_group->used != `0`) {
246	if (btrfs_is_block_group_data_only(block_group)) {
247	__add_to_discard_list(discard_ctl, block_group);
248	/*
249	* The block group must have been moved to other
250	* discard list even if discard was disabled in
251	* the meantime or a transaction abort happened,
252	* otherwise we can end up in an infinite loop,
253	* always jumping into the 'again' label and
254	* keep getting this block group over and over
255	* in case there are no other block groups in
256	* the discard lists.
257	*/
258	ASSERT(block_group->discard_index !=
259	BTRFS_DISCARD_INDEX_UNUSED,
260	"discard_index=%d",
261	block_group->discard_index);
262	} else {
263	list_del_init(entry: &block_group->discard_list);
264	btrfs_put_block_group(cache: block_group);
265	}
266	goto again;
267	}
268	if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
269	block_group->discard_cursor = block_group->start;
270	block_group->discard_state = BTRFS_DISCARD_EXTENTS;
271	}
272	}
273	if (block_group) {
274	btrfs_get_block_group(cache: block_group);
275	discard_ctl->block_group = block_group;
276	*discard_state = block_group->discard_state;
277	*discard_index = block_group->discard_index;
278	}
279	spin_unlock(lock: &discard_ctl->lock);
280
281	return block_group;
282	}
283
284	/*
285	* Update a block group's filters.
286	*
287	* @block_group: block group of interest
288	* @bytes: recently freed region size after coalescing
289	*
290	* Async discard maintains multiple lists with progressively smaller filters
291	* to prioritize discarding based on size. Should a free space that matches
292	* a larger filter be returned to the free_space_cache, prioritize that discard
293	* by moving @block_group to the proper filter.
294	*/
295	void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
296	u64 bytes)
297	{
298	struct btrfs_discard_ctl *discard_ctl;
299
300	if (!block_group \|\|
301	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
302	return;
303
304	discard_ctl = &block_group->fs_info->discard_ctl;
305
306	if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
307	bytes >= discard_minlen[block_group->discard_index - `1`]) {
308	int i;
309
310	remove_from_discard_list(discard_ctl, block_group);
311
312	for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
313	i++) {
314	if (bytes >= discard_minlen[i]) {
315	block_group->discard_index = i;
316	add_to_discard_list(discard_ctl, block_group);
317	break;
318	}
319	}
320	}
321	}
322
323	/*
324	* Move a block group along the discard lists.
325	*
326	* @discard_ctl: discard control
327	* @block_group: block_group of interest
328	*
329	* Increment @block_group's discard_index. If it falls of the list, let it be.
330	* Otherwise add it back to the appropriate list.
331	*/
332	static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
333	struct btrfs_block_group *block_group)
334	{
335	block_group->discard_index++;
336	if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
337	block_group->discard_index = `1`;
338	return;
339	}
340
341	add_to_discard_list(discard_ctl, block_group);
342	}
343
344	/*
345	* Remove a block_group from the discard lists.
346	*
347	* @discard_ctl: discard control
348	* @block_group: block_group of interest
349	*
350	* Remove @block_group from the discard lists. If necessary, wait on the
351	* current work and then reschedule the delayed work.
352	*/
353	void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
354	struct btrfs_block_group *block_group)
355	{
356	if (remove_from_discard_list(discard_ctl, block_group)) {
357	cancel_delayed_work_sync(dwork: &discard_ctl->work);
358	btrfs_discard_schedule_work(discard_ctl, override: true);
359	}
360	}
361
362	/*
363	* Handles queuing the block_groups.
364	*
365	* @discard_ctl: discard control
366	* @block_group: block_group of interest
367	*
368	* Maintain the LRU order of the discard lists.
369	*/
370	void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
371	struct btrfs_block_group *block_group)
372	{
373	if (!block_group \|\| !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
374	return;
375
376	if (block_group->used == `0`)
377	add_to_discard_unused_list(discard_ctl, block_group);
378	else
379	add_to_discard_list(discard_ctl, block_group);
380
381	if (!delayed_work_pending(&discard_ctl->work))
382	btrfs_discard_schedule_work(discard_ctl, override: false);
383	}
384
385	static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
386	u64 now, bool override)
387	{
388	struct btrfs_block_group *block_group;
389
390	if (!btrfs_run_discard_work(discard_ctl))
391	return;
392	if (!override && delayed_work_pending(&discard_ctl->work))
393	return;
394
395	block_group = find_next_block_group(discard_ctl, now);
396	if (block_group) {
397	u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
398	u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
399
400	/*
401	* A single delayed workqueue item is responsible for
402	* discarding, so we can manage the bytes rate limit by keeping
403	* track of the previous discard.
404	*/
405	if (kbps_limit && discard_ctl->prev_discard) {
406	u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
407	u64 bps_delay = div64_u64(dividend: discard_ctl->prev_discard *
408	NSEC_PER_SEC, divisor: bps_limit);
409
410	delay = max(delay, bps_delay);
411	}
412
413	/*
414	* This timeout is to hopefully prevent immediate discarding
415	* in a recently allocated block group.
416	*/
417	if (now < block_group->discard_eligible_time) {
418	u64 bg_timeout = block_group->discard_eligible_time - now;
419
420	delay = max(delay, bg_timeout);
421	}
422
423	if (override && discard_ctl->prev_discard) {
424	u64 elapsed = now - discard_ctl->prev_discard_time;
425
426	if (delay > elapsed)
427	delay -= elapsed;
428	else
429	delay = `0`;
430	}
431
432	mod_delayed_work(wq: discard_ctl->discard_workers,
433	dwork: &discard_ctl->work, delay: nsecs_to_jiffies(n: delay));
434	}
435	}
436
437	/*
438	* Responsible for scheduling the discard work.
439	*
440	* @discard_ctl: discard control
441	* @override: override the current timer
442	*
443	* Discards are issued by a delayed workqueue item. @override is used to
444	* update the current delay as the baseline delay interval is reevaluated on
445	* transaction commit. This is also maxed with any other rate limit.
446	*/
447	void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
448	bool override)
449	{
450	const u64 now = ktime_get_ns();
451
452	spin_lock(lock: &discard_ctl->lock);
453	__btrfs_discard_schedule_work(discard_ctl, now, override);
454	spin_unlock(lock: &discard_ctl->lock);
455	}
456
457	/*
458	* Determine next step of a block_group.
459	*
460	* @discard_ctl: discard control
461	* @block_group: block_group of interest
462	*
463	* Determine the next step for a block group after it's finished going through
464	* a pass on a discard list. If it is unused and fully trimmed, we can mark it
465	* unused and send it to the unused_bgs path. Otherwise, pass it onto the
466	* appropriate filter list or let it fall off.
467	*/
468	static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
469	struct btrfs_block_group *block_group)
470	{
471	remove_from_discard_list(discard_ctl, block_group);
472
473	if (block_group->used == `0`) {
474	if (btrfs_is_free_space_trimmed(block_group))
475	btrfs_mark_bg_unused(bg: block_group);
476	else
477	add_to_discard_unused_list(discard_ctl, block_group);
478	} else {
479	btrfs_update_discard_index(discard_ctl, block_group);
480	}
481	}
482
483	/*
484	* Discard work queue callback
485	*
486	* @work: work
487	*
488	* Find the next block_group to start discarding and then discard a single
489	* region. It does this in a two-pass fashion: first extents and second
490	* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
491	*/
492	static void btrfs_discard_workfn(struct work_struct *work)
493	{
494	struct btrfs_discard_ctl *discard_ctl;
495	struct btrfs_block_group *block_group;
496	enum btrfs_discard_state discard_state;
497	int discard_index = `0`;
498	u64 trimmed = `0`;
499	u64 minlen = `0`;
500	u64 now = ktime_get_ns();
501
502	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
503
504	block_group = peek_discard_list(discard_ctl, discard_state: &discard_state,
505	discard_index: &discard_index, now);
506	if (!block_group)
507	return;
508	if (!btrfs_run_discard_work(discard_ctl)) {
509	spin_lock(lock: &discard_ctl->lock);
510	btrfs_put_block_group(cache: block_group);
511	discard_ctl->block_group = NULL;
512	spin_unlock(lock: &discard_ctl->lock);
513	return;
514	}
515	if (now < block_group->discard_eligible_time) {
516	spin_lock(lock: &discard_ctl->lock);
517	btrfs_put_block_group(cache: block_group);
518	discard_ctl->block_group = NULL;
519	spin_unlock(lock: &discard_ctl->lock);
520	btrfs_discard_schedule_work(discard_ctl, override: false);
521	return;
522	}
523
524	/ Perform discarding /
525	minlen = discard_minlen[discard_index];
526
527	if (discard_state == BTRFS_DISCARD_BITMAPS) {
528	u64 maxlen = `0`;
529
530	/*
531	* Use the previous levels minimum discard length as the max
532	* length filter. In the case something is added to make a
533	* region go beyond the max filter, the entire bitmap is set
534	* back to BTRFS_TRIM_STATE_UNTRIMMED.
535	*/
536	if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
537	maxlen = discard_minlen[discard_index - `1`];
538
539	btrfs_trim_block_group_bitmaps(block_group, trimmed: &trimmed,
540	start: block_group->discard_cursor,
541	end: btrfs_block_group_end(block_group),
542	minlen, maxlen, async: true);
543	discard_ctl->discard_bitmap_bytes += trimmed;
544	} else {
545	btrfs_trim_block_group_extents(block_group, trimmed: &trimmed,
546	start: block_group->discard_cursor,
547	end: btrfs_block_group_end(block_group),
548	minlen, async: true);
549	discard_ctl->discard_extent_bytes += trimmed;
550	}
551
552	/ Determine next steps for a block_group /
553	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
554	if (discard_state == BTRFS_DISCARD_BITMAPS) {
555	btrfs_finish_discard_pass(discard_ctl, block_group);
556	} else {
557	block_group->discard_cursor = block_group->start;
558	spin_lock(lock: &discard_ctl->lock);
559	if (block_group->discard_state !=
560	BTRFS_DISCARD_RESET_CURSOR)
561	block_group->discard_state =
562	BTRFS_DISCARD_BITMAPS;
563	spin_unlock(lock: &discard_ctl->lock);
564	}
565	}
566
567	now = ktime_get_ns();
568	spin_lock(lock: &discard_ctl->lock);
569	discard_ctl->prev_discard = trimmed;
570	discard_ctl->prev_discard_time = now;
571	btrfs_put_block_group(cache: block_group);
572	discard_ctl->block_group = NULL;
573	__btrfs_discard_schedule_work(discard_ctl, now, override: false);
574	spin_unlock(lock: &discard_ctl->lock);
575	}
576
577	/*
578	* Recalculate the base delay.
579	*
580	* @discard_ctl: discard control
581	*
582	* Recalculate the base delay which is based off the total number of
583	* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
584	* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
585	*/
586	void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
587	{
588	s32 discardable_extents;
589	s64 discardable_bytes;
590	u32 iops_limit;
591	unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
592	unsigned long delay;
593
594	discardable_extents = atomic_read(v: &discard_ctl->discardable_extents);
595	if (!discardable_extents)
596	return;
597
598	spin_lock(lock: &discard_ctl->lock);
599
600	/*
601	* The following is to fix a potential -1 discrepancy that we're not
602	* sure how to reproduce. But given that this is the only place that
603	* utilizes these numbers and this is only called by from
604	* btrfs_finish_extent_commit() which is synchronized, we can correct
605	* here.
606	*/
607	if (discardable_extents < `0`)
608	atomic_add(i: -discardable_extents,
609	v: &discard_ctl->discardable_extents);
610
611	discardable_bytes = atomic64_read(v: &discard_ctl->discardable_bytes);
612	if (discardable_bytes < `0`)
613	atomic64_add(i: -discardable_bytes,
614	v: &discard_ctl->discardable_bytes);
615
616	if (discardable_extents <= `0`) {
617	spin_unlock(lock: &discard_ctl->lock);
618	return;
619	}
620
621	iops_limit = READ_ONCE(discard_ctl->iops_limit);
622
623	if (iops_limit) {
624	delay = MSEC_PER_SEC / iops_limit;
625	} else {
626	/*
627	* Unset iops_limit means go as fast as possible, so allow a
628	* delay of 0.
629	*/
630	delay = `0`;
631	min_delay = `0`;
632	}
633
634	delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
635	discard_ctl->delay_ms = delay;
636
637	spin_unlock(lock: &discard_ctl->lock);
638	}
639
640	/*
641	* Propagate discard counters.
642	*
643	* @block_group: block_group of interest
644	*
645	* Propagate deltas of counters up to the discard_ctl. It maintains a current
646	* counter and a previous counter passing the delta up to the global stat.
647	* Then the current counter value becomes the previous counter value.
648	*/
649	void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
650	{
651	struct btrfs_free_space_ctl *ctl;
652	struct btrfs_discard_ctl *discard_ctl;
653	s32 extents_delta;
654	s64 bytes_delta;
655
656	if (!block_group \|\|
657	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) \|\|
658	!btrfs_is_block_group_data_only(block_group))
659	return;
660
661	ctl = block_group->free_space_ctl;
662	discard_ctl = &block_group->fs_info->discard_ctl;
663
664	lockdep_assert_held(&ctl->tree_lock);
665	extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
666	ctl->discardable_extents[BTRFS_STAT_PREV];
667	if (extents_delta) {
668	atomic_add(i: extents_delta, v: &discard_ctl->discardable_extents);
669	ctl->discardable_extents[BTRFS_STAT_PREV] =
670	ctl->discardable_extents[BTRFS_STAT_CURR];
671	}
672
673	bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
674	ctl->discardable_bytes[BTRFS_STAT_PREV];
675	if (bytes_delta) {
676	atomic64_add(i: bytes_delta, v: &discard_ctl->discardable_bytes);
677	ctl->discardable_bytes[BTRFS_STAT_PREV] =
678	ctl->discardable_bytes[BTRFS_STAT_CURR];
679	}
680	}
681
682	/*
683	* Punt unused_bgs list to discard lists.
684	*
685	* @fs_info: fs_info of interest
686	*
687	* The unused_bgs list needs to be punted to the discard lists because the
688	* order of operations is changed. In the normal synchronous discard path, the
689	* block groups are trimmed via a single large trim in transaction commit. This
690	* is ultimately what we are trying to avoid with asynchronous discard. Thus,
691	* it must be done before going down the unused_bgs path.
692	*/
693	void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
694	{
695	struct btrfs_block_group block_group, next;
696
697	spin_lock(lock: &fs_info->unused_bgs_lock);
698	/ We enabled async discard, so punt all to the queue /
699	list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
700	bg_list) {
701	list_del_init(entry: &block_group->bg_list);
702	btrfs_discard_queue_work(discard_ctl: &fs_info->discard_ctl, block_group);
703	/*
704	* This put is for the get done by btrfs_mark_bg_unused.
705	* Queueing discard incremented it for discard's reference.
706	*/
707	btrfs_put_block_group(cache: block_group);
708	}
709	spin_unlock(lock: &fs_info->unused_bgs_lock);
710	}
711
712	/*
713	* Purge discard lists.
714	*
715	* @discard_ctl: discard control
716	*
717	* If we are disabling async discard, we may have intercepted block groups that
718	* are completely free and ready for the unused_bgs path. As discarding will
719	* now happen in transaction commit or not at all, we can safely mark the
720	* corresponding block groups as unused and they will be sent on their merry
721	* way to the unused_bgs list.
722	*/
723	static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
724	{
725	struct btrfs_block_group block_group, next;
726	int i;
727
728	spin_lock(lock: &discard_ctl->lock);
729	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++) {
730	list_for_each_entry_safe(block_group, next,
731	&discard_ctl->discard_list[i],
732	discard_list) {
733	list_del_init(entry: &block_group->discard_list);
734	spin_unlock(lock: &discard_ctl->lock);
735	if (block_group->used == `0`)
736	btrfs_mark_bg_unused(bg: block_group);
737	spin_lock(lock: &discard_ctl->lock);
738	btrfs_put_block_group(cache: block_group);
739	}
740	}
741	spin_unlock(lock: &discard_ctl->lock);
742	}
743
744	void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
745	{
746	if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
747	btrfs_discard_cleanup(fs_info);
748	return;
749	}
750
751	btrfs_discard_punt_unused_bgs_list(fs_info);
752
753	set_bit(nr: BTRFS_FS_DISCARD_RUNNING, addr: &fs_info->flags);
754	}
755
756	void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
757	{
758	clear_bit(nr: BTRFS_FS_DISCARD_RUNNING, addr: &fs_info->flags);
759	}
760
761	void btrfs_discard_init(struct btrfs_fs_info *fs_info)
762	{
763	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
764	int i;
765
766	spin_lock_init(&discard_ctl->lock);
767	INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
768
769	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++)
770	INIT_LIST_HEAD(list: &discard_ctl->discard_list[i]);
771
772	discard_ctl->prev_discard = `0`;
773	discard_ctl->prev_discard_time = `0`;
774	atomic_set(v: &discard_ctl->discardable_extents, i: `0`);
775	atomic64_set(v: &discard_ctl->discardable_bytes, i: `0`);
776	discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
777	discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
778	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
779	discard_ctl->kbps_limit = `0`;
780	discard_ctl->discard_extent_bytes = `0`;
781	discard_ctl->discard_bitmap_bytes = `0`;
782	atomic64_set(v: &discard_ctl->discard_bytes_saved, i: `0`);
783	}
784
785	void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
786	{
787	btrfs_discard_stop(fs_info);
788	cancel_delayed_work_sync(dwork: &fs_info->discard_ctl.work);
789	btrfs_discard_purge_list(discard_ctl: &fs_info->discard_ctl);
790	}
791

source code of linux/fs/btrfs/discard.c