block_validity.c source code [linux/fs/ext4/block_validity.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/ext4/block_validity.c
4	*
5	* Copyright (C) 2009
6	* Theodore Ts'o (tytso@mit.edu)
7	*
8	* Track which blocks in the filesystem are metadata blocks that
9	* should never be used as data blocks by files or directories.
10	*/
11
12	#include <linux/time.h>
13	#include <linux/fs.h>
14	#include <linux/namei.h>
15	#include <linux/quotaops.h>
16	#include <linux/buffer_head.h>
17	#include <linux/swap.h>
18	#include <linux/pagemap.h>
19	#include <linux/blkdev.h>
20	#include <linux/slab.h>
21	#include "ext4.h"
22
23	struct ext4_system_zone {
24	struct rb_node node;
25	ext4_fsblk_t start_blk;
26	unsigned int count;
27	u32 ino;
28	};
29
30	static struct kmem_cache *ext4_system_zone_cachep;
31
32	int __init ext4_init_system_zone(void)
33	{
34	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, `0`);
35	if (ext4_system_zone_cachep == NULL)
36	return -ENOMEM;
37	return `0`;
38	}
39
40	void ext4_exit_system_zone(void)
41	{
42	rcu_barrier();
43	kmem_cache_destroy(s: ext4_system_zone_cachep);
44	}
45
46	static inline int can_merge(struct ext4_system_zone *entry1,
47	struct ext4_system_zone *entry2)
48	{
49	if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
50	entry1->ino == entry2->ino)
51	return `1`;
52	return `0`;
53	}
54
55	static void release_system_zone(struct ext4_system_blocks *system_blks)
56	{
57	struct ext4_system_zone entry, n;
58
59	rbtree_postorder_for_each_entry_safe(entry, n,
60	&system_blks->root, node)
61	kmem_cache_free(s: ext4_system_zone_cachep, objp: entry);
62	}
63
64	/*
65	* Mark a range of blocks as belonging to the "system zone" --- that
66	* is, filesystem metadata blocks which should never be used by
67	* inodes.
68	*/
69	static int add_system_zone(struct ext4_system_blocks *system_blks,
70	ext4_fsblk_t start_blk,
71	unsigned int count, u32 ino)
72	{
73	struct ext4_system_zone new_entry, entry;
74	struct rb_node *n = &system_blks->root.rb_node, node;
75	struct rb_node parent = NULL, new_node = NULL;
76
77	while (*n) {
78	parent = *n;
79	entry = rb_entry(parent, struct ext4_system_zone, node);
80	if (start_blk < entry->start_blk)
81	n = &(*n)->rb_left;
82	else if (start_blk >= (entry->start_blk + entry->count))
83	n = &(*n)->rb_right;
84	else / Unexpected overlap of system zones. /
85	return -EFSCORRUPTED;
86	}
87
88	new_entry = kmem_cache_alloc(cachep: ext4_system_zone_cachep,
89	GFP_KERNEL);
90	if (!new_entry)
91	return -ENOMEM;
92	new_entry->start_blk = start_blk;
93	new_entry->count = count;
94	new_entry->ino = ino;
95	new_node = &new_entry->node;
96
97	rb_link_node(node: new_node, parent, rb_link: n);
98	rb_insert_color(new_node, &system_blks->root);
99
100	/ Can we merge to the left? /
101	node = rb_prev(new_node);
102	if (node) {
103	entry = rb_entry(node, struct ext4_system_zone, node);
104	if (can_merge(entry1: entry, entry2: new_entry)) {
105	new_entry->start_blk = entry->start_blk;
106	new_entry->count += entry->count;
107	rb_erase(node, &system_blks->root);
108	kmem_cache_free(s: ext4_system_zone_cachep, objp: entry);
109	}
110	}
111
112	/ Can we merge to the right? /
113	node = rb_next(new_node);
114	if (node) {
115	entry = rb_entry(node, struct ext4_system_zone, node);
116	if (can_merge(entry1: new_entry, entry2: entry)) {
117	new_entry->count += entry->count;
118	rb_erase(node, &system_blks->root);
119	kmem_cache_free(s: ext4_system_zone_cachep, objp: entry);
120	}
121	}
122	return `0`;
123	}
124
125	static void debug_print_tree(struct ext4_sb_info *sbi)
126	{
127	struct rb_node *node;
128	struct ext4_system_zone *entry;
129	struct ext4_system_blocks *system_blks;
130	int first = `1`;
131
132	printk(KERN_INFO "System zones: ");
133	rcu_read_lock();
134	system_blks = rcu_dereference(sbi->s_system_blks);
135	node = rb_first(&system_blks->root);
136	while (node) {
137	entry = rb_entry(node, struct ext4_system_zone, node);
138	printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
139	entry->start_blk, entry->start_blk + entry->count - `1`);
140	first = `0`;
141	node = rb_next(node);
142	}
143	rcu_read_unlock();
144	printk(KERN_CONT "\n");
145	}
146
147	static int ext4_protect_reserved_inode(struct super_block *sb,
148	struct ext4_system_blocks *system_blks,
149	u32 ino)
150	{
151	struct inode *inode;
152	struct ext4_sb_info *sbi = EXT4_SB(sb);
153	struct ext4_map_blocks map;
154	u32 i = `0`, num;
155	int err = `0`, n;
156
157	if ((ino < EXT4_ROOT_INO) \|\|
158	(ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
159	return -EINVAL;
160	inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
161	if (IS_ERR(ptr: inode))
162	return PTR_ERR(ptr: inode);
163	num = (inode->i_size + sb->s_blocksize - `1`) >> sb->s_blocksize_bits;
164	while (i < num) {
165	cond_resched();
166	map.m_lblk = i;
167	map.m_len = num - i;
168	n = ext4_map_blocks(NULL, inode, map: &map, flags: `0`);
169	if (n < `0`) {
170	err = n;
171	break;
172	}
173	if (n == `0`) {
174	i++;
175	} else {
176	err = add_system_zone(system_blks, start_blk: map.m_pblk, count: n, ino);
177	if (err < `0`) {
178	if (err == -EFSCORRUPTED) {
179	EXT4_ERROR_INODE_ERR(inode, -err,
180	"blocks %llu-%llu from inode overlap system zone",
181	map.m_pblk,
182	map.m_pblk + map.m_len - `1`);
183	}
184	break;
185	}
186	i += n;
187	}
188	}
189	iput(inode);
190	return err;
191	}
192
193	static void ext4_destroy_system_zone(struct rcu_head *rcu)
194	{
195	struct ext4_system_blocks *system_blks;
196
197	system_blks = container_of(rcu, struct ext4_system_blocks, rcu);
198	release_system_zone(system_blks);
199	kfree(objp: system_blks);
200	}
201
202	/*
203	* Build system zone rbtree which is used for block validity checking.
204	*
205	* The update of system_blks pointer in this function is protected by
206	* sb->s_umount semaphore. However we have to be careful as we can be
207	* racing with ext4_inode_block_valid() calls reading system_blks rbtree
208	* protected only by RCU. That's why we first build the rbtree and then
209	* swap it in place.
210	*/
211	int ext4_setup_system_zone(struct super_block *sb)
212	{
213	ext4_group_t ngroups = ext4_get_groups_count(sb);
214	struct ext4_sb_info *sbi = EXT4_SB(sb);
215	struct ext4_system_blocks *system_blks;
216	struct ext4_group_desc *gdp;
217	ext4_group_t i;
218	int ret;
219
220	system_blks = kzalloc(size: sizeof(*system_blks), GFP_KERNEL);
221	if (!system_blks)
222	return -ENOMEM;
223
224	for (i=`0`; i < ngroups; i++) {
225	unsigned int meta_blks = ext4_num_base_meta_blocks(sb, block_group: i);
226
227	cond_resched();
228	if (meta_blks != `0`) {
229	ret = add_system_zone(system_blks,
230	start_blk: ext4_group_first_block_no(sb, group_no: i),
231	count: meta_blks, ino: `0`);
232	if (ret)
233	goto err;
234	}
235	gdp = ext4_get_group_desc(sb, block_group: i, NULL);
236	ret = add_system_zone(system_blks,
237	start_blk: ext4_block_bitmap(sb, bg: gdp), count: `1`, ino: `0`);
238	if (ret)
239	goto err;
240	ret = add_system_zone(system_blks,
241	start_blk: ext4_inode_bitmap(sb, bg: gdp), count: `1`, ino: `0`);
242	if (ret)
243	goto err;
244	ret = add_system_zone(system_blks,
245	start_blk: ext4_inode_table(sb, bg: gdp),
246	count: sbi->s_itb_per_group, ino: `0`);
247	if (ret)
248	goto err;
249	}
250	if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
251	ret = ext4_protect_reserved_inode(sb, system_blks,
252	le32_to_cpu(sbi->s_es->s_journal_inum));
253	if (ret)
254	goto err;
255	}
256
257	/*
258	* System blks rbtree complete, announce it once to prevent racing
259	* with ext4_inode_block_valid() accessing the rbtree at the same
260	* time.
261	*/
262	rcu_assign_pointer(sbi->s_system_blks, system_blks);
263
264	if (test_opt(sb, DEBUG))
265	debug_print_tree(sbi);
266	return `0`;
267	err:
268	release_system_zone(system_blks);
269	kfree(objp: system_blks);
270	return ret;
271	}
272
273	/*
274	* Called when the filesystem is unmounted or when remounting it with
275	* noblock_validity specified.
276	*
277	* The update of system_blks pointer in this function is protected by
278	* sb->s_umount semaphore. However we have to be careful as we can be
279	* racing with ext4_inode_block_valid() calls reading system_blks rbtree
280	* protected only by RCU. So we first clear the system_blks pointer and
281	* then free the rbtree only after RCU grace period expires.
282	*/
283	void ext4_release_system_zone(struct super_block *sb)
284	{
285	struct ext4_system_blocks *system_blks;
286
287	system_blks = rcu_dereference_protected(EXT4_SB(sb)->s_system_blks,
288	lockdep_is_held(&sb->s_umount));
289	rcu_assign_pointer(EXT4_SB(sb)->s_system_blks, NULL);
290
291	if (system_blks)
292	call_rcu(head: &system_blks->rcu, func: ext4_destroy_system_zone);
293	}
294
295	int ext4_sb_block_valid(struct super_block sb, struct* inode *inode,
296	ext4_fsblk_t start_blk, unsigned int count)
297	{
298	struct ext4_sb_info *sbi = EXT4_SB(sb);
299	struct ext4_system_blocks *system_blks;
300	struct ext4_system_zone *entry;
301	struct rb_node *n;
302	int ret = `1`;
303
304	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) \|\|
305	(start_blk + count < start_blk) \|\|
306	(start_blk + count > ext4_blocks_count(es: sbi->s_es)))
307	return `0`;
308
309	/*
310	* Lock the system zone to prevent it being released concurrently
311	* when doing a remount which inverse current "[no]block_validity"
312	* mount option.
313	*/
314	rcu_read_lock();
315	system_blks = rcu_dereference(sbi->s_system_blks);
316	if (system_blks == NULL)
317	goto out_rcu;
318
319	n = system_blks->root.rb_node;
320	while (n) {
321	entry = rb_entry(n, struct ext4_system_zone, node);
322	if (start_blk + count - `1` < entry->start_blk)
323	n = n->rb_left;
324	else if (start_blk >= (entry->start_blk + entry->count))
325	n = n->rb_right;
326	else {
327	ret = `0`;
328	if (inode)
329	ret = (entry->ino == inode->i_ino);
330	break;
331	}
332	}
333	out_rcu:
334	rcu_read_unlock();
335	return ret;
336	}
337
338	/*
339	* Returns 1 if the passed-in block region (start_blk,
340	* start_blk+count) is valid; 0 if some part of the block region
341	* overlaps with some other filesystem metadata blocks.
342	*/
343	int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
344	unsigned int count)
345	{
346	return ext4_sb_block_valid(sb: inode->i_sb, inode, start_blk, count);
347	}
348
349	int ext4_check_blockref(const char function, unsigned* int line,
350	struct inode inode, __le32 p, unsigned int max)
351	{
352	__le32 *bref = p;
353	unsigned int blk;
354
355	if (ext4_has_feature_journal(sb: inode->i_sb) &&
356	(inode->i_ino ==
357	le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
358	return `0`;
359
360	while (bref < p+max) {
361	blk = le32_to_cpu(*bref++);
362	if (blk &&
363	unlikely(!ext4_inode_block_valid(inode, blk, `1`))) {
364	ext4_error_inode(inode, function, line, blk,
365	"invalid block");
366	return -EFSCORRUPTED;
367	}
368	}
369	return `0`;
370	}
371
372

source code of linux/fs/ext4/block_validity.c