bmap.c source code [linux/fs/gfs2/bmap.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4	* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
5	*/
6
7	#include <linux/spinlock.h>
8	#include <linux/completion.h>
9	#include <linux/buffer_head.h>
10	#include <linux/blkdev.h>
11	#include <linux/gfs2_ondisk.h>
12	#include <linux/crc32.h>
13	#include <linux/iomap.h>
14	#include <linux/ktime.h>
15
16	#include "gfs2.h"
17	#include "incore.h"
18	#include "bmap.h"
19	#include "glock.h"
20	#include "inode.h"
21	#include "meta_io.h"
22	#include "quota.h"
23	#include "rgrp.h"
24	#include "log.h"
25	#include "super.h"
26	#include "trans.h"
27	#include "dir.h"
28	#include "util.h"
29	#include "aops.h"
30	#include "trace_gfs2.h"
31
32	/ This doesn't need to be that large as max 64 bit pointers in a 4k*
33	* block is 512, so __u16 is fine for that. It saves stack space to
34	* keep it small.
35	*/
36	struct metapath {
37	struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
38	__u16 mp_list[GFS2_MAX_META_HEIGHT];
39	int mp_fheight; / find_metapath height /
40	int mp_aheight; / actual height (lookup height) /
41	};
42
43	static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
45	/**
46	* gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio
47	* @ip: the inode
48	* @dibh: the dinode buffer
49	* @block: the block number that was allocated
50	* @folio: The folio.
51	*
52	* Returns: errno
53	*/
54	static int gfs2_unstuffer_folio(struct gfs2_inode ip, struct* buffer_head *dibh,
55	u64 block, struct folio *folio)
56	{
57	struct inode *inode = &ip->i_inode;
58
59	if (!folio_test_uptodate(folio)) {
60	void *kaddr = kmap_local_folio(folio, offset: `0`);
61	u64 dsize = i_size_read(inode);
62
63	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
64	memset(kaddr + dsize, `0`, folio_size(folio) - dsize);
65	kunmap_local(kaddr);
66
67	folio_mark_uptodate(folio);
68	}
69
70	if (gfs2_is_jdata(ip)) {
71	struct buffer_head *bh = folio_buffers(folio);
72
73	if (!bh)
74	bh = create_empty_buffers(folio,
75	BIT(inode->i_blkbits), BIT(BH_Uptodate));
76
77	if (!buffer_mapped(bh))
78	map_bh(bh, sb: inode->i_sb, block);
79
80	set_buffer_uptodate(bh);
81	gfs2_trans_add_data(gl: ip->i_gl, bh);
82	} else {
83	folio_mark_dirty(folio);
84	gfs2_ordered_add_inode(ip);
85	}
86
87	return `0`;
88	}
89
90	static int __gfs2_unstuff_inode(struct gfs2_inode ip, struct* folio *folio)
91	{
92	struct buffer_head bh, dibh;
93	struct gfs2_dinode *di;
94	u64 block = `0`;
95	int isdir = gfs2_is_dir(ip);
96	int error;
97
98	error = gfs2_meta_inode_buffer(ip, bhp: &dibh);
99	if (error)
100	return error;
101
102	if (i_size_read(inode: &ip->i_inode)) {
103	/ Get a free block, fill it with the stuffed data,*
104	and write it out to disk /*
105
106	unsigned int n = `1`;
107	error = gfs2_alloc_blocks(ip, bn: &block, n: &n, dinode: `0`);
108	if (error)
109	goto out_brelse;
110	if (isdir) {
111	gfs2_trans_remove_revoke(sdp: GFS2_SB(inode: &ip->i_inode), blkno: block, len: `1`);
112	error = gfs2_dir_get_new_buffer(ip, block, bhp: &bh);
113	if (error)
114	goto out_brelse;
115	gfs2_buffer_copy_tail(to_bh: bh, to_head: sizeof(struct gfs2_meta_header),
116	from_bh: dibh, from_head: sizeof(struct gfs2_dinode));
117	brelse(bh);
118	} else {
119	error = gfs2_unstuffer_folio(ip, dibh, block, folio);
120	if (error)
121	goto out_brelse;
122	}
123	}
124
125	/ Set up the pointer to the new block /
126
127	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
128	di = (struct gfs2_dinode *)dibh->b_data;
129	gfs2_buffer_clear_tail(bh: dibh, head: sizeof(struct gfs2_dinode));
130
131	if (i_size_read(inode: &ip->i_inode)) {
132	(__be64 )(di + `1`) = cpu_to_be64(block);
133	gfs2_add_inode_blocks(inode: &ip->i_inode, change: `1`);
134	di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
135	}
136
137	ip->i_height = `1`;
138	di->di_height = cpu_to_be16(`1`);
139
140	out_brelse:
141	brelse(bh: dibh);
142	return error;
143	}
144
145	/**
146	* gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
147	* @ip: The GFS2 inode to unstuff
148	*
149	* This routine unstuffs a dinode and returns it to a "normal" state such
150	* that the height can be grown in the traditional way.
151	*
152	* Returns: errno
153	*/
154
155	int gfs2_unstuff_dinode(struct gfs2_inode *ip)
156	{
157	struct inode *inode = &ip->i_inode;
158	struct folio *folio;
159	int error;
160
161	down_write(sem: &ip->i_rw_mutex);
162	folio = filemap_grab_folio(mapping: inode->i_mapping, index: `0`);
163	error = PTR_ERR(ptr: folio);
164	if (IS_ERR(ptr: folio))
165	goto out;
166	error = __gfs2_unstuff_inode(ip, folio);
167	folio_unlock(folio);
168	folio_put(folio);
169	out:
170	up_write(sem: &ip->i_rw_mutex);
171	return error;
172	}
173
174	/**
175	* find_metapath - Find path through the metadata tree
176	* @sdp: The superblock
177	* @block: The disk block to look up
178	* @mp: The metapath to return the result in
179	* @height: The pre-calculated height of the metadata tree
180	*
181	* This routine returns a struct metapath structure that defines a path
182	* through the metadata of inode "ip" to get to block "block".
183	*
184	* Example:
185	* Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
186	* filesystem with a blocksize of 4096.
187	*
188	* find_metapath() would return a struct metapath structure set to:
189	* mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
190	*
191	* That means that in order to get to the block containing the byte at
192	* offset 101342453, we would load the indirect block pointed to by pointer
193	* 0 in the dinode. We would then load the indirect block pointed to by
194	* pointer 48 in that indirect block. We would then load the data block
195	* pointed to by pointer 165 in that indirect block.
196	*
197	* ----------------------------------------
198	* \| Dinode \| \|
199	* \| \| 4\|
200	* \| \|0 1 2 3 4 5 9\|
201	* \| \| 6\|
202	* ----------------------------------------
203	* \|
204	* \|
205	* V
206	* ----------------------------------------
207	* \| Indirect Block \|
208	* \| 5\|
209	* \| 4 4 4 4 4 5 5 1\|
210	* \|0 5 6 7 8 9 0 1 2\|
211	* ----------------------------------------
212	* \|
213	* \|
214	* V
215	* ----------------------------------------
216	* \| Indirect Block \|
217	* \| 1 1 1 1 1 5\|
218	* \| 6 6 6 6 6 1\|
219	* \|0 3 4 5 6 7 2\|
220	* ----------------------------------------
221	* \|
222	* \|
223	* V
224	* ----------------------------------------
225	* \| Data block containing offset \|
226	* \| 101342453 \|
227	* \| \|
228	* \| \|
229	* ----------------------------------------
230	*
231	*/
232
233	static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
234	struct metapath mp, unsigned* int height)
235	{
236	unsigned int i;
237
238	mp->mp_fheight = height;
239	for (i = height; i--;)
240	mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
241	}
242
243	static inline unsigned int metapath_branch_start(const struct metapath *mp)
244	{
245	if (mp->mp_list[`0`] == `0`)
246	return `2`;
247	return `1`;
248	}
249
250	/**
251	* metaptr1 - Return the first possible metadata pointer in a metapath buffer
252	* @height: The metadata height (0 = dinode)
253	* @mp: The metapath
254	*/
255	static inline __be64 metaptr1(unsigned* int height, const struct metapath *mp)
256	{
257	struct buffer_head *bh = mp->mp_bh[height];
258	if (height == `0`)
259	return ((__be64 )(bh->b_data + sizeof(struct* gfs2_dinode)));
260	return ((__be64 )(bh->b_data + sizeof(struct* gfs2_meta_header)));
261	}
262
263	/**
264	* metapointer - Return pointer to start of metadata in a buffer
265	* @height: The metadata height (0 = dinode)
266	* @mp: The metapath
267	*
268	* Return a pointer to the block number of the next height of the metadata
269	* tree given a buffer containing the pointer to the current height of the
270	* metadata tree.
271	*/
272
273	static inline __be64 metapointer(unsigned* int height, const struct metapath *mp)
274	{
275	__be64 *p = metaptr1(height, mp);
276	return p + mp->mp_list[height];
277	}
278
279	static inline const __be64 metaend(unsigned* int height, const struct metapath *mp)
280	{
281	const struct buffer_head *bh = mp->mp_bh[height];
282	return (const __be64 *)(bh->b_data + bh->b_size);
283	}
284
285	static void clone_metapath(struct metapath clone, struct* metapath *mp)
286	{
287	unsigned int hgt;
288
289	clone = mp;
290	for (hgt = `0`; hgt < mp->mp_aheight; hgt++)
291	get_bh(bh: clone->mp_bh[hgt]);
292	}
293
294	static void gfs2_metapath_ra(struct gfs2_glock gl, __be64 start, __be64 *end)
295	{
296	const __be64 *t;
297
298	for (t = start; t < end; t++) {
299	struct buffer_head *rabh;
300
301	if (!*t)
302	continue;
303
304	rabh = gfs2_getbuf(gl, be64_to_cpu(*t), create: CREATE);
305	if (trylock_buffer(bh: rabh)) {
306	if (!buffer_uptodate(bh: rabh)) {
307	rabh->b_end_io = end_buffer_read_sync;
308	submit_bh(REQ_OP_READ \| REQ_RAHEAD \| REQ_META \|
309	REQ_PRIO, rabh);
310	continue;
311	}
312	unlock_buffer(bh: rabh);
313	}
314	brelse(bh: rabh);
315	}
316	}
317
318	static inline struct buffer_head *
319	metapath_dibh(struct metapath *mp)
320	{
321	return mp->mp_bh[`0`];
322	}
323
324	static int __fillup_metapath(struct gfs2_inode ip, struct* metapath *mp,
325	unsigned int x, unsigned int h)
326	{
327	for (; x < h; x++) {
328	__be64 *ptr = metapointer(height: x, mp);
329	u64 dblock = be64_to_cpu(*ptr);
330	int ret;
331
332	if (!dblock)
333	break;
334	ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, num: dblock, bhp: &mp->mp_bh[x + `1`]);
335	if (ret)
336	return ret;
337	}
338	mp->mp_aheight = x + `1`;
339	return `0`;
340	}
341
342	/**
343	* lookup_metapath - Walk the metadata tree to a specific point
344	* @ip: The inode
345	* @mp: The metapath
346	*
347	* Assumes that the inode's buffer has already been looked up and
348	* hooked onto mp->mp_bh[0] and that the metapath has been initialised
349	* by find_metapath().
350	*
351	* If this function encounters part of the tree which has not been
352	* allocated, it returns the current height of the tree at the point
353	* at which it found the unallocated block. Blocks which are found are
354	* added to the mp->mp_bh[] list.
355	*
356	* Returns: error
357	*/
358
359	static int lookup_metapath(struct gfs2_inode ip, struct* metapath *mp)
360	{
361	return __fillup_metapath(ip, mp, x: `0`, h: ip->i_height - `1`);
362	}
363
364	/**
365	* fillup_metapath - fill up buffers for the metadata path to a specific height
366	* @ip: The inode
367	* @mp: The metapath
368	* @h: The height to which it should be mapped
369	*
370	* Similar to lookup_metapath, but does lookups for a range of heights
371	*
372	* Returns: error or the number of buffers filled
373	*/
374
375	static int fillup_metapath(struct gfs2_inode ip, struct* metapath mp, int* h)
376	{
377	unsigned int x = `0`;
378	int ret;
379
380	if (h) {
381	/ find the first buffer we need to look up. /
382	for (x = h - `1`; x > `0`; x--) {
383	if (mp->mp_bh[x])
384	break;
385	}
386	}
387	ret = __fillup_metapath(ip, mp, x, h);
388	if (ret)
389	return ret;
390	return mp->mp_aheight - x - `1`;
391	}
392
393	static sector_t metapath_to_block(struct gfs2_sbd sdp, struct* metapath *mp)
394	{
395	sector_t factor = `1`, block = `0`;
396	int hgt;
397
398	for (hgt = mp->mp_fheight - `1`; hgt >= `0`; hgt--) {
399	if (hgt < mp->mp_aheight)
400	block += mp->mp_list[hgt] * factor;
401	factor *= sdp->sd_inptrs;
402	}
403	return block;
404	}
405
406	static void release_metapath(struct metapath *mp)
407	{
408	int i;
409
410	for (i = `0`; i < GFS2_MAX_META_HEIGHT; i++) {
411	if (mp->mp_bh[i] == NULL)
412	break;
413	brelse(bh: mp->mp_bh[i]);
414	mp->mp_bh[i] = NULL;
415	}
416	}
417
418	/**
419	* gfs2_extent_length - Returns length of an extent of blocks
420	* @bh: The metadata block
421	* @ptr: Current position in @bh
422	* @eob: Set to 1 if we hit "end of block"
423	*
424	* Returns: The length of the extent (minimum of one block)
425	*/
426
427	static inline unsigned int gfs2_extent_length(struct buffer_head bh, __be64 ptr, int *eob)
428	{
429	const __be64 end = (__be64 )(bh->b_data + bh->b_size);
430	const __be64 *first = ptr;
431	u64 d = be64_to_cpu(*ptr);
432
433	*eob = `0`;
434	do {
435	ptr++;
436	if (ptr >= end)
437	break;
438	d++;
439	} while(be64_to_cpu(*ptr) == d);
440	if (ptr >= end)
441	*eob = `1`;
442	return ptr - first;
443	}
444
445	enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
446
447	/*
448	* gfs2_metadata_walker - walk an indirect block
449	* @mp: Metapath to indirect block
450	* @ptrs: Number of pointers to look at
451	*
452	* When returning WALK_FOLLOW, the walker must update @mp to point at the right
453	* indirect block to follow.
454	*/
455	typedef enum walker_status (gfs2_metadata_walker)(struct* metapath *mp,
456	unsigned int ptrs);
457
458	/*
459	* gfs2_walk_metadata - walk a tree of indirect blocks
460	* @inode: The inode
461	* @mp: Starting point of walk
462	* @max_len: Maximum number of blocks to walk
463	* @walker: Called during the walk
464	*
465	* Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
466	* past the end of metadata, and a negative error code otherwise.
467	*/
468
469	static int gfs2_walk_metadata(struct inode inode, struct* metapath *mp,
470	u64 max_len, gfs2_metadata_walker walker)
471	{
472	struct gfs2_inode *ip = GFS2_I(inode);
473	struct gfs2_sbd *sdp = GFS2_SB(inode);
474	u64 factor = `1`;
475	unsigned int hgt;
476	int ret;
477
478	/*
479	* The walk starts in the lowest allocated indirect block, which may be
480	* before the position indicated by @mp. Adjust @max_len accordingly
481	* to avoid a short walk.
482	*/
483	for (hgt = mp->mp_fheight - `1`; hgt >= mp->mp_aheight; hgt--) {
484	max_len += mp->mp_list[hgt] * factor;
485	mp->mp_list[hgt] = `0`;
486	factor *= sdp->sd_inptrs;
487	}
488
489	for (;;) {
490	u16 start = mp->mp_list[hgt];
491	enum walker_status status;
492	unsigned int ptrs;
493	u64 len;
494
495	/ Walk indirect block. /
496	ptrs = (hgt >= `1` ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
497	len = ptrs * factor;
498	if (len > max_len)
499	ptrs = DIV_ROUND_UP_ULL(max_len, factor);
500	status = walker(mp, ptrs);
501	switch (status) {
502	case WALK_STOP:
503	return `1`;
504	case WALK_FOLLOW:
505	BUG_ON(mp->mp_aheight == mp->mp_fheight);
506	ptrs = mp->mp_list[hgt] - start;
507	len = ptrs * factor;
508	break;
509	case WALK_CONTINUE:
510	break;
511	}
512	if (len >= max_len)
513	break;
514	max_len -= len;
515	if (status == WALK_FOLLOW)
516	goto fill_up_metapath;
517
518	lower_metapath:
519	/ Decrease height of metapath. /
520	brelse(bh: mp->mp_bh[hgt]);
521	mp->mp_bh[hgt] = NULL;
522	mp->mp_list[hgt] = `0`;
523	if (!hgt)
524	break;
525	hgt--;
526	factor *= sdp->sd_inptrs;
527
528	/ Advance in metadata tree. /
529	(mp->mp_list[hgt])++;
530	if (hgt) {
531	if (mp->mp_list[hgt] >= sdp->sd_inptrs)
532	goto lower_metapath;
533	} else {
534	if (mp->mp_list[hgt] >= sdp->sd_diptrs)
535	break;
536	}
537
538	fill_up_metapath:
539	/ Increase height of metapath. /
540	ret = fillup_metapath(ip, mp, h: ip->i_height - `1`);
541	if (ret < `0`)
542	return ret;
543	hgt += ret;
544	for (; ret; ret--)
545	do_div(factor, sdp->sd_inptrs);
546	mp->mp_aheight = hgt + `1`;
547	}
548	return `0`;
549	}
550
551	static enum walker_status gfs2_hole_walker(struct metapath *mp,
552	unsigned int ptrs)
553	{
554	const __be64 start, ptr, *end;
555	unsigned int hgt;
556
557	hgt = mp->mp_aheight - `1`;
558	start = metapointer(height: hgt, mp);
559	end = start + ptrs;
560
561	for (ptr = start; ptr < end; ptr++) {
562	if (*ptr) {
563	mp->mp_list[hgt] += ptr - start;
564	if (mp->mp_aheight == mp->mp_fheight)
565	return WALK_STOP;
566	return WALK_FOLLOW;
567	}
568	}
569	return WALK_CONTINUE;
570	}
571
572	/**
573	* gfs2_hole_size - figure out the size of a hole
574	* @inode: The inode
575	* @lblock: The logical starting block number
576	* @len: How far to look (in blocks)
577	* @mp: The metapath at lblock
578	* @iomap: The iomap to store the hole size in
579	*
580	* This function modifies @mp.
581	*
582	* Returns: errno on error
583	*/
584	static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
585	struct metapath mp, struct* iomap *iomap)
586	{
587	struct metapath clone;
588	u64 hole_size;
589	int ret;
590
591	clone_metapath(clone: &clone, mp);
592	ret = gfs2_walk_metadata(inode, mp: &clone, max_len: len, walker: gfs2_hole_walker);
593	if (ret < `0`)
594	goto out;
595
596	if (ret == `1`)
597	hole_size = metapath_to_block(sdp: GFS2_SB(inode), mp: &clone) - lblock;
598	else
599	hole_size = len;
600	iomap->length = hole_size << inode->i_blkbits;
601	ret = `0`;
602
603	out:
604	release_metapath(mp: &clone);
605	return ret;
606	}
607
608	static inline void gfs2_indirect_init(struct metapath *mp,
609	struct gfs2_glock gl, unsigned* int i,
610	unsigned offset, u64 bn)
611	{
612	__be64 ptr = (__be64 )(mp->mp_bh[i - `1`]->b_data +
613	((i > `1`) ? sizeof(struct gfs2_meta_header) :
614	sizeof(struct gfs2_dinode)));
615	BUG_ON(i < `1`);
616	BUG_ON(mp->mp_bh[i] != NULL);
617	mp->mp_bh[i] = gfs2_meta_new(gl, blkno: bn);
618	gfs2_trans_add_meta(gl, bh: mp->mp_bh[i]);
619	gfs2_metatype_set(bh: mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
620	gfs2_buffer_clear_tail(bh: mp->mp_bh[i], head: sizeof(struct gfs2_meta_header));
621	ptr += offset;
622	*ptr = cpu_to_be64(bn);
623	}
624
625	enum alloc_state {
626	ALLOC_DATA = `0`,
627	ALLOC_GROW_DEPTH = `1`,
628	ALLOC_GROW_HEIGHT = `2`,
629	/ ALLOC_UNSTUFF = 3, TBD and rather complicated /
630	};
631
632	/**
633	* __gfs2_iomap_alloc - Build a metadata tree of the requested height
634	* @inode: The GFS2 inode
635	* @iomap: The iomap structure
636	* @mp: The metapath, with proper height information calculated
637	*
638	* In this routine we may have to alloc:
639	* i) Indirect blocks to grow the metadata tree height
640	* ii) Indirect blocks to fill in lower part of the metadata tree
641	* iii) Data blocks
642	*
643	* This function is called after __gfs2_iomap_get, which works out the
644	* total number of blocks which we need via gfs2_alloc_size.
645	*
646	* We then do the actual allocation asking for an extent at a time (if
647	* enough contiguous free blocks are available, there will only be one
648	* allocation request per call) and uses the state machine to initialise
649	* the blocks in order.
650	*
651	* Right now, this function will allocate at most one indirect block
652	* worth of data -- with a default block size of 4K, that's slightly
653	* less than 2M. If this limitation is ever removed to allow huge
654	* allocations, we would probably still want to limit the iomap size we
655	* return to avoid stalling other tasks during huge writes; the next
656	* iomap iteration would then find the blocks already allocated.
657	*
658	* Returns: errno on error
659	*/
660
661	static int __gfs2_iomap_alloc(struct inode inode, struct* iomap *iomap,
662	struct metapath *mp)
663	{
664	struct gfs2_inode *ip = GFS2_I(inode);
665	struct gfs2_sbd *sdp = GFS2_SB(inode);
666	struct buffer_head *dibh = metapath_dibh(mp);
667	u64 bn;
668	unsigned n, i, blks, alloced = `0`, iblks = `0`, branch_start = `0`;
669	size_t dblks = iomap->length >> inode->i_blkbits;
670	const unsigned end_of_metadata = mp->mp_fheight - `1`;
671	int ret;
672	enum alloc_state state;
673	__be64 *ptr;
674	__be64 zero_bn = `0`;
675
676	BUG_ON(mp->mp_aheight < `1`);
677	BUG_ON(dibh == NULL);
678	BUG_ON(dblks < `1`);
679
680	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
681
682	down_write(sem: &ip->i_rw_mutex);
683
684	if (mp->mp_fheight == mp->mp_aheight) {
685	/ Bottom indirect block exists /
686	state = ALLOC_DATA;
687	} else {
688	/ Need to allocate indirect blocks /
689	if (mp->mp_fheight == ip->i_height) {
690	/ Writing into existing tree, extend tree down /
691	iblks = mp->mp_fheight - mp->mp_aheight;
692	state = ALLOC_GROW_DEPTH;
693	} else {
694	/ Building up tree height /
695	state = ALLOC_GROW_HEIGHT;
696	iblks = mp->mp_fheight - ip->i_height;
697	branch_start = metapath_branch_start(mp);
698	iblks += (mp->mp_fheight - branch_start);
699	}
700	}
701
702	/ start of the second part of the function (state machine) /
703
704	blks = dblks + iblks;
705	i = mp->mp_aheight;
706	do {
707	n = blks - alloced;
708	ret = gfs2_alloc_blocks(ip, bn: &bn, n: &n, dinode: `0`);
709	if (ret)
710	goto out;
711	alloced += n;
712	if (state != ALLOC_DATA \|\| gfs2_is_jdata(ip))
713	gfs2_trans_remove_revoke(sdp, blkno: bn, len: n);
714	switch (state) {
715	/ Growing height of tree /
716	case ALLOC_GROW_HEIGHT:
717	if (i == `1`) {
718	ptr = (__be64 *)(dibh->b_data +
719	sizeof(struct gfs2_dinode));
720	zero_bn = *ptr;
721	}
722	for (; i - `1` < mp->mp_fheight - ip->i_height && n > `0`;
723	i++, n--)
724	gfs2_indirect_init(mp, gl: ip->i_gl, i, offset: `0`, bn: bn++);
725	if (i - `1` == mp->mp_fheight - ip->i_height) {
726	i--;
727	gfs2_buffer_copy_tail(to_bh: mp->mp_bh[i],
728	to_head: sizeof(struct gfs2_meta_header),
729	from_bh: dibh, from_head: sizeof(struct gfs2_dinode));
730	gfs2_buffer_clear_tail(bh: dibh,
731	head: sizeof(struct gfs2_dinode) +
732	sizeof(__be64));
733	ptr = (__be64 *)(mp->mp_bh[i]->b_data +
734	sizeof(struct gfs2_meta_header));
735	*ptr = zero_bn;
736	state = ALLOC_GROW_DEPTH;
737	for(i = branch_start; i < mp->mp_fheight; i++) {
738	if (mp->mp_bh[i] == NULL)
739	break;
740	brelse(bh: mp->mp_bh[i]);
741	mp->mp_bh[i] = NULL;
742	}
743	i = branch_start;
744	}
745	if (n == `0`)
746	break;
747	fallthrough; / To branching from existing tree /
748	case ALLOC_GROW_DEPTH:
749	if (i > `1` && i < mp->mp_fheight)
750	gfs2_trans_add_meta(gl: ip->i_gl, bh: mp->mp_bh[i-`1`]);
751	for (; i < mp->mp_fheight && n > `0`; i++, n--)
752	gfs2_indirect_init(mp, gl: ip->i_gl, i,
753	offset: mp->mp_list[i-`1`], bn: bn++);
754	if (i == mp->mp_fheight)
755	state = ALLOC_DATA;
756	if (n == `0`)
757	break;
758	fallthrough; / To tree complete, adding data blocks /
759	case ALLOC_DATA:
760	BUG_ON(n > dblks);
761	BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
762	gfs2_trans_add_meta(gl: ip->i_gl, bh: mp->mp_bh[end_of_metadata]);
763	dblks = n;
764	ptr = metapointer(height: end_of_metadata, mp);
765	iomap->addr = bn << inode->i_blkbits;
766	iomap->flags \|= IOMAP_F_MERGED \| IOMAP_F_NEW;
767	while (n-- > `0`)
768	*ptr++ = cpu_to_be64(bn++);
769	break;
770	}
771	} while (iomap->addr == IOMAP_NULL_ADDR);
772
773	iomap->type = IOMAP_MAPPED;
774	iomap->length = (u64)dblks << inode->i_blkbits;
775	ip->i_height = mp->mp_fheight;
776	gfs2_add_inode_blocks(inode: &ip->i_inode, change: alloced);
777	gfs2_dinode_out(ip, buf: dibh->b_data);
778	out:
779	up_write(sem: &ip->i_rw_mutex);
780	return ret;
781	}
782
783	#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
784
785	/**
786	* gfs2_alloc_size - Compute the maximum allocation size
787	* @inode: The inode
788	* @mp: The metapath
789	* @size: Requested size in blocks
790	*
791	* Compute the maximum size of the next allocation at @mp.
792	*
793	* Returns: size in blocks
794	*/
795	static u64 gfs2_alloc_size(struct inode inode, struct* metapath *mp, u64 size)
796	{
797	struct gfs2_inode *ip = GFS2_I(inode);
798	struct gfs2_sbd *sdp = GFS2_SB(inode);
799	const __be64 first, ptr, *end;
800
801	/*
802	* For writes to stuffed files, this function is called twice via
803	* __gfs2_iomap_get, before and after unstuffing. The size we return the
804	* first time needs to be large enough to get the reservation and
805	* allocation sizes right. The size we return the second time must
806	* be exact or else __gfs2_iomap_alloc won't do the right thing.
807	*/
808
809	if (gfs2_is_stuffed(ip) \|\| mp->mp_fheight != mp->mp_aheight) {
810	unsigned int maxsize = mp->mp_fheight > `1` ?
811	sdp->sd_inptrs : sdp->sd_diptrs;
812	maxsize -= mp->mp_list[mp->mp_fheight - `1`];
813	if (size > maxsize)
814	size = maxsize;
815	return size;
816	}
817
818	first = metapointer(height: ip->i_height - `1`, mp);
819	end = metaend(height: ip->i_height - `1`, mp);
820	if (end - first > size)
821	end = first + size;
822	for (ptr = first; ptr < end; ptr++) {
823	if (*ptr)
824	break;
825	}
826	return ptr - first;
827	}
828
829	/**
830	* __gfs2_iomap_get - Map blocks from an inode to disk blocks
831	* @inode: The inode
832	* @pos: Starting position in bytes
833	* @length: Length to map, in bytes
834	* @flags: iomap flags
835	* @iomap: The iomap structure
836	* @mp: The metapath
837	*
838	* Returns: errno
839	*/
840	static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
841	unsigned flags, struct iomap *iomap,
842	struct metapath *mp)
843	{
844	struct gfs2_inode *ip = GFS2_I(inode);
845	struct gfs2_sbd *sdp = GFS2_SB(inode);
846	loff_t size = i_size_read(inode);
847	__be64 *ptr;
848	sector_t lblock;
849	sector_t lblock_stop;
850	int ret;
851	int eob;
852	u64 len;
853	struct buffer_head dibh = NULL, bh;
854	u8 height;
855
856	if (!length)
857	return -EINVAL;
858
859	down_read(sem: &ip->i_rw_mutex);
860
861	ret = gfs2_meta_inode_buffer(ip, bhp: &dibh);
862	if (ret)
863	goto unlock;
864	mp->mp_bh[`0`] = dibh;
865
866	if (gfs2_is_stuffed(ip)) {
867	if (flags & IOMAP_WRITE) {
868	loff_t max_size = gfs2_max_stuffed_size(ip);
869
870	if (pos + length > max_size)
871	goto unstuff;
872	iomap->length = max_size;
873	} else {
874	if (pos >= size) {
875	if (flags & IOMAP_REPORT) {
876	ret = -ENOENT;
877	goto unlock;
878	} else {
879	iomap->offset = pos;
880	iomap->length = length;
881	goto hole_found;
882	}
883	}
884	iomap->length = size;
885	}
886	iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
887	sizeof(struct gfs2_dinode);
888	iomap->type = IOMAP_INLINE;
889	iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
890	goto out;
891	}
892
893	unstuff:
894	lblock = pos >> inode->i_blkbits;
895	iomap->offset = lblock << inode->i_blkbits;
896	lblock_stop = (pos + length - `1`) >> inode->i_blkbits;
897	len = lblock_stop - lblock + `1`;
898	iomap->length = len << inode->i_blkbits;
899
900	height = ip->i_height;
901	while ((lblock + `1`) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
902	height++;
903	find_metapath(sdp, block: lblock, mp, height);
904	if (height > ip->i_height \|\| gfs2_is_stuffed(ip))
905	goto do_alloc;
906
907	ret = lookup_metapath(ip, mp);
908	if (ret)
909	goto unlock;
910
911	if (mp->mp_aheight != ip->i_height)
912	goto do_alloc;
913
914	ptr = metapointer(height: ip->i_height - `1`, mp);
915	if (*ptr == `0`)
916	goto do_alloc;
917
918	bh = mp->mp_bh[ip->i_height - `1`];
919	len = gfs2_extent_length(bh, ptr, eob: &eob);
920
921	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
922	iomap->length = len << inode->i_blkbits;
923	iomap->type = IOMAP_MAPPED;
924	iomap->flags \|= IOMAP_F_MERGED;
925	if (eob)
926	iomap->flags \|= IOMAP_F_GFS2_BOUNDARY;
927
928	out:
929	iomap->bdev = inode->i_sb->s_bdev;
930	unlock:
931	up_read(sem: &ip->i_rw_mutex);
932	return ret;
933
934	do_alloc:
935	if (flags & IOMAP_REPORT) {
936	if (pos >= size)
937	ret = -ENOENT;
938	else if (height == ip->i_height)
939	ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
940	else
941	iomap->length = size - iomap->offset;
942	} else if (flags & IOMAP_WRITE) {
943	u64 alloc_size;
944
945	if (flags & IOMAP_DIRECT)
946	goto out; / (see gfs2_file_direct_write) /
947
948	len = gfs2_alloc_size(inode, mp, size: len);
949	alloc_size = len << inode->i_blkbits;
950	if (alloc_size < iomap->length)
951	iomap->length = alloc_size;
952	} else {
953	if (pos < size && height == ip->i_height)
954	ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
955	}
956	hole_found:
957	iomap->addr = IOMAP_NULL_ADDR;
958	iomap->type = IOMAP_HOLE;
959	goto out;
960	}
961
962	static struct folio *
963	gfs2_iomap_get_folio(struct iomap_iter iter, loff_t pos, unsigned* len)
964	{
965	struct inode *inode = iter->inode;
966	unsigned int blockmask = i_blocksize(node: inode) - `1`;
967	struct gfs2_sbd *sdp = GFS2_SB(inode);
968	unsigned int blocks;
969	struct folio *folio;
970	int status;
971
972	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
973	status = gfs2_trans_begin(sdp, RES_DINODE + blocks, revokes: `0`);
974	if (status)
975	return ERR_PTR(error: status);
976
977	folio = iomap_get_folio(iter, pos, len);
978	if (IS_ERR(ptr: folio))
979	gfs2_trans_end(sdp);
980	return folio;
981	}
982
983	static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
984	unsigned copied, struct folio *folio)
985	{
986	struct gfs2_trans *tr = current->journal_info;
987	struct gfs2_inode *ip = GFS2_I(inode);
988	struct gfs2_sbd *sdp = GFS2_SB(inode);
989
990	if (!gfs2_is_stuffed(ip))
991	gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
992	len: copied);
993
994	folio_unlock(folio);
995	folio_put(folio);
996
997	if (tr->tr_num_buf_new)
998	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
999
1000	gfs2_trans_end(sdp);
1001	}
1002
1003	static const struct iomap_folio_ops gfs2_iomap_folio_ops = {
1004	.get_folio = gfs2_iomap_get_folio,
1005	.put_folio = gfs2_iomap_put_folio,
1006	};
1007
1008	static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1009	loff_t length, unsigned flags,
1010	struct iomap *iomap,
1011	struct metapath *mp)
1012	{
1013	struct gfs2_inode *ip = GFS2_I(inode);
1014	struct gfs2_sbd *sdp = GFS2_SB(inode);
1015	bool unstuff;
1016	int ret;
1017
1018	unstuff = gfs2_is_stuffed(ip) &&
1019	pos + length > gfs2_max_stuffed_size(ip);
1020
1021	if (unstuff \|\| iomap->type == IOMAP_HOLE) {
1022	unsigned int data_blocks, ind_blocks;
1023	struct gfs2_alloc_parms ap = {};
1024	unsigned int rblocks;
1025	struct gfs2_trans *tr;
1026
1027	gfs2_write_calc_reserv(ip, len: iomap->length, data_blocks: &data_blocks,
1028	ind_blocks: &ind_blocks);
1029	ap.target = data_blocks + ind_blocks;
1030	ret = gfs2_quota_lock_check(ip, ap: &ap);
1031	if (ret)
1032	return ret;
1033
1034	ret = gfs2_inplace_reserve(ip, ap: &ap);
1035	if (ret)
1036	goto out_qunlock;
1037
1038	rblocks = RES_DINODE + ind_blocks;
1039	if (gfs2_is_jdata(ip))
1040	rblocks += data_blocks;
1041	if (ind_blocks \|\| data_blocks)
1042	rblocks += RES_STATFS + RES_QUOTA;
1043	if (inode == sdp->sd_rindex)
1044	rblocks += `2` * RES_STATFS;
1045	rblocks += gfs2_rg_blocks(ip, requested: data_blocks + ind_blocks);
1046
1047	ret = gfs2_trans_begin(sdp, blocks: rblocks,
1048	revokes: iomap->length >> inode->i_blkbits);
1049	if (ret)
1050	goto out_trans_fail;
1051
1052	if (unstuff) {
1053	ret = gfs2_unstuff_dinode(ip);
1054	if (ret)
1055	goto out_trans_end;
1056	release_metapath(mp);
1057	ret = __gfs2_iomap_get(inode, pos: iomap->offset,
1058	length: iomap->length, flags, iomap, mp);
1059	if (ret)
1060	goto out_trans_end;
1061	}
1062
1063	if (iomap->type == IOMAP_HOLE) {
1064	ret = __gfs2_iomap_alloc(inode, iomap, mp);
1065	if (ret) {
1066	gfs2_trans_end(sdp);
1067	gfs2_inplace_release(ip);
1068	punch_hole(ip, offset: iomap->offset, length: iomap->length);
1069	goto out_qunlock;
1070	}
1071	}
1072
1073	tr = current->journal_info;
1074	if (tr->tr_num_buf_new)
1075	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1076
1077	gfs2_trans_end(sdp);
1078	}
1079
1080	if (gfs2_is_stuffed(ip) \|\| gfs2_is_jdata(ip))
1081	iomap->folio_ops = &gfs2_iomap_folio_ops;
1082	return `0`;
1083
1084	out_trans_end:
1085	gfs2_trans_end(sdp);
1086	out_trans_fail:
1087	gfs2_inplace_release(ip);
1088	out_qunlock:
1089	gfs2_quota_unlock(ip);
1090	return ret;
1091	}
1092
1093	static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1094	unsigned flags, struct iomap *iomap,
1095	struct iomap *srcmap)
1096	{
1097	struct gfs2_inode *ip = GFS2_I(inode);
1098	struct metapath mp = { .mp_aheight = `1`, };
1099	int ret;
1100
1101	if (gfs2_is_jdata(ip))
1102	iomap->flags \|= IOMAP_F_BUFFER_HEAD;
1103
1104	trace_gfs2_iomap_start(ip, pos, length, flags);
1105	ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, mp: &mp);
1106	if (ret)
1107	goto out_unlock;
1108
1109	switch(flags & (IOMAP_WRITE \| IOMAP_ZERO)) {
1110	case IOMAP_WRITE:
1111	if (flags & IOMAP_DIRECT) {
1112	/*
1113	* Silently fall back to buffered I/O for stuffed files
1114	* or if we've got a hole (see gfs2_file_direct_write).
1115	*/
1116	if (iomap->type != IOMAP_MAPPED)
1117	ret = -ENOTBLK;
1118	goto out_unlock;
1119	}
1120	break;
1121	case IOMAP_ZERO:
1122	if (iomap->type == IOMAP_HOLE)
1123	goto out_unlock;
1124	break;
1125	default:
1126	goto out_unlock;
1127	}
1128
1129	ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, mp: &mp);
1130
1131	out_unlock:
1132	release_metapath(mp: &mp);
1133	trace_gfs2_iomap_end(ip, iomap, ret);
1134	return ret;
1135	}
1136
1137	static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1138	ssize_t written, unsigned flags, struct iomap *iomap)
1139	{
1140	struct gfs2_inode *ip = GFS2_I(inode);
1141	struct gfs2_sbd *sdp = GFS2_SB(inode);
1142
1143	switch (flags & (IOMAP_WRITE \| IOMAP_ZERO)) {
1144	case IOMAP_WRITE:
1145	if (flags & IOMAP_DIRECT)
1146	return `0`;
1147	break;
1148	case IOMAP_ZERO:
1149	if (iomap->type == IOMAP_HOLE)
1150	return `0`;
1151	break;
1152	default:
1153	return `0`;
1154	}
1155
1156	if (!gfs2_is_stuffed(ip))
1157	gfs2_ordered_add_inode(ip);
1158
1159	if (inode == sdp->sd_rindex)
1160	adjust_fs_space(inode);
1161
1162	gfs2_inplace_release(ip);
1163
1164	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1165	gfs2_quota_unlock(ip);
1166
1167	if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1168	/ Deallocate blocks that were just allocated. /
1169	loff_t hstart = round_up(pos + written, i_blocksize(inode));
1170	loff_t hend = iomap->offset + iomap->length;
1171
1172	if (hstart < hend) {
1173	truncate_pagecache_range(inode, offset: hstart, end: hend - `1`);
1174	punch_hole(ip, offset: hstart, length: hend - hstart);
1175	}
1176	}
1177
1178	if (unlikely(!written))
1179	return `0`;
1180
1181	if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1182	mark_inode_dirty(inode);
1183	set_bit(nr: GLF_DIRTY, addr: &ip->i_gl->gl_flags);
1184	return `0`;
1185	}
1186
1187	const struct iomap_ops gfs2_iomap_ops = {
1188	.iomap_begin = gfs2_iomap_begin,
1189	.iomap_end = gfs2_iomap_end,
1190	};
1191
1192	/**
1193	* gfs2_block_map - Map one or more blocks of an inode to a disk block
1194	* @inode: The inode
1195	* @lblock: The logical block number
1196	* @bh_map: The bh to be mapped
1197	* @create: True if its ok to alloc blocks to satify the request
1198	*
1199	* The size of the requested mapping is defined in bh_map->b_size.
1200	*
1201	* Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1202	* when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1203	* bh_map->b_size to indicate the size of the mapping when @lblock and
1204	* successive blocks are mapped, up to the requested size.
1205	*
1206	* Sets buffer_boundary() if a read of metadata will be required
1207	* before the next block can be mapped. Sets buffer_new() if new
1208	* blocks were allocated.
1209	*
1210	* Returns: errno
1211	*/
1212
1213	int gfs2_block_map(struct inode *inode, sector_t lblock,
1214	struct buffer_head bh_map, int* create)
1215	{
1216	struct gfs2_inode *ip = GFS2_I(inode);
1217	loff_t pos = (loff_t)lblock << inode->i_blkbits;
1218	loff_t length = bh_map->b_size;
1219	struct iomap iomap = { };
1220	int ret;
1221
1222	clear_buffer_mapped(bh: bh_map);
1223	clear_buffer_new(bh: bh_map);
1224	clear_buffer_boundary(bh: bh_map);
1225	trace_gfs2_bmap(ip, bh: bh_map, lblock, create, errno: `1`);
1226
1227	if (!create)
1228	ret = gfs2_iomap_get(inode, pos, length, iomap: &iomap);
1229	else
1230	ret = gfs2_iomap_alloc(inode, pos, length, iomap: &iomap);
1231	if (ret)
1232	goto out;
1233
1234	if (iomap.length > bh_map->b_size) {
1235	iomap.length = bh_map->b_size;
1236	iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1237	}
1238	if (iomap.addr != IOMAP_NULL_ADDR)
1239	map_bh(bh: bh_map, sb: inode->i_sb, block: iomap.addr >> inode->i_blkbits);
1240	bh_map->b_size = iomap.length;
1241	if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1242	set_buffer_boundary(bh_map);
1243	if (iomap.flags & IOMAP_F_NEW)
1244	set_buffer_new(bh_map);
1245
1246	out:
1247	trace_gfs2_bmap(ip, bh: bh_map, lblock, create, errno: ret);
1248	return ret;
1249	}
1250
1251	int gfs2_get_extent(struct inode inode, u64 lblock, u64 dblock,
1252	unsigned int *extlen)
1253	{
1254	unsigned int blkbits = inode->i_blkbits;
1255	struct iomap iomap = { };
1256	unsigned int len;
1257	int ret;
1258
1259	ret = gfs2_iomap_get(inode, pos: lblock << blkbits, length: *extlen << blkbits,
1260	iomap: &iomap);
1261	if (ret)
1262	return ret;
1263	if (iomap.type != IOMAP_MAPPED)
1264	return -EIO;
1265	*dblock = iomap.addr >> blkbits;
1266	len = iomap.length >> blkbits;
1267	if (len < *extlen)
1268	*extlen = len;
1269	return `0`;
1270	}
1271
1272	int gfs2_alloc_extent(struct inode inode, u64 lblock, u64 dblock,
1273	unsigned int extlen, bool new)
1274	{
1275	unsigned int blkbits = inode->i_blkbits;
1276	struct iomap iomap = { };
1277	unsigned int len;
1278	int ret;
1279
1280	ret = gfs2_iomap_alloc(inode, pos: lblock << blkbits, length: *extlen << blkbits,
1281	iomap: &iomap);
1282	if (ret)
1283	return ret;
1284	if (iomap.type != IOMAP_MAPPED)
1285	return -EIO;
1286	*dblock = iomap.addr >> blkbits;
1287	len = iomap.length >> blkbits;
1288	if (len < *extlen)
1289	*extlen = len;
1290	*new = iomap.flags & IOMAP_F_NEW;
1291	return `0`;
1292	}
1293
1294	/*
1295	* NOTE: Never call gfs2_block_zero_range with an open transaction because it
1296	* uses iomap write to perform its actions, which begin their own transactions
1297	* (iomap_begin, get_folio, etc.)
1298	*/
1299	static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1300	unsigned int length)
1301	{
1302	BUG_ON(current->journal_info);
1303	return iomap_zero_range(inode, pos: from, len: length, NULL, ops: &gfs2_iomap_ops);
1304	}
1305
1306	#define GFS2_JTRUNC_REVOKES 8192
1307
1308	/**
1309	* gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1310	* @inode: The inode being truncated
1311	* @oldsize: The original (larger) size
1312	* @newsize: The new smaller size
1313	*
1314	* With jdata files, we have to journal a revoke for each block which is
1315	* truncated. As a result, we need to split this into separate transactions
1316	* if the number of pages being truncated gets too large.
1317	*/
1318
1319	static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1320	{
1321	struct gfs2_sbd *sdp = GFS2_SB(inode);
1322	u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1323	u64 chunk;
1324	int error;
1325
1326	while (oldsize != newsize) {
1327	struct gfs2_trans *tr;
1328	unsigned int offs;
1329
1330	chunk = oldsize - newsize;
1331	if (chunk > max_chunk)
1332	chunk = max_chunk;
1333
1334	offs = oldsize & ~PAGE_MASK;
1335	if (offs && chunk > PAGE_SIZE)
1336	chunk = offs + ((chunk - offs) & PAGE_MASK);
1337
1338	truncate_pagecache(inode, new: oldsize - chunk);
1339	oldsize -= chunk;
1340
1341	tr = current->journal_info;
1342	if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1343	continue;
1344
1345	gfs2_trans_end(sdp);
1346	error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1347	if (error)
1348	return error;
1349	}
1350
1351	return `0`;
1352	}
1353
1354	static int trunc_start(struct inode *inode, u64 newsize)
1355	{
1356	struct gfs2_inode *ip = GFS2_I(inode);
1357	struct gfs2_sbd *sdp = GFS2_SB(inode);
1358	struct buffer_head *dibh = NULL;
1359	int journaled = gfs2_is_jdata(ip);
1360	u64 oldsize = inode->i_size;
1361	int error;
1362
1363	if (!gfs2_is_stuffed(ip)) {
1364	unsigned int blocksize = i_blocksize(node: inode);
1365	unsigned int offs = newsize & (blocksize - `1`);
1366	if (offs) {
1367	error = gfs2_block_zero_range(inode, from: newsize,
1368	length: blocksize - offs);
1369	if (error)
1370	return error;
1371	}
1372	}
1373	if (journaled)
1374	error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1375	else
1376	error = gfs2_trans_begin(sdp, RES_DINODE, revokes: `0`);
1377	if (error)
1378	return error;
1379
1380	error = gfs2_meta_inode_buffer(ip, bhp: &dibh);
1381	if (error)
1382	goto out;
1383
1384	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
1385
1386	if (gfs2_is_stuffed(ip))
1387	gfs2_buffer_clear_tail(bh: dibh, head: sizeof(struct gfs2_dinode) + newsize);
1388	else
1389	ip->i_diskflags \|= GFS2_DIF_TRUNC_IN_PROG;
1390
1391	i_size_write(inode, i_size: newsize);
1392	inode_set_mtime_to_ts(inode: &ip->i_inode, ts: inode_set_ctime_current(inode: &ip->i_inode));
1393	gfs2_dinode_out(ip, buf: dibh->b_data);
1394
1395	if (journaled)
1396	error = gfs2_journaled_truncate(inode, oldsize, newsize);
1397	else
1398	truncate_pagecache(inode, new: newsize);
1399
1400	out:
1401	brelse(bh: dibh);
1402	if (current->journal_info)
1403	gfs2_trans_end(sdp);
1404	return error;
1405	}
1406
1407	int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
1408	struct iomap *iomap)
1409	{
1410	struct metapath mp = { .mp_aheight = `1`, };
1411	int ret;
1412
1413	ret = __gfs2_iomap_get(inode, pos, length, flags: `0`, iomap, mp: &mp);
1414	release_metapath(mp: &mp);
1415	return ret;
1416	}
1417
1418	int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
1419	struct iomap *iomap)
1420	{
1421	struct metapath mp = { .mp_aheight = `1`, };
1422	int ret;
1423
1424	ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, mp: &mp);
1425	if (!ret && iomap->type == IOMAP_HOLE)
1426	ret = __gfs2_iomap_alloc(inode, iomap, mp: &mp);
1427	release_metapath(mp: &mp);
1428	return ret;
1429	}
1430
1431	/**
1432	* sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1433	* @ip: inode
1434	* @rd_gh: holder of resource group glock
1435	* @bh: buffer head to sweep
1436	* @start: starting point in bh
1437	* @end: end point in bh
1438	* @meta: true if bh points to metadata (rather than data)
1439	* @btotal: place to keep count of total blocks freed
1440	*
1441	* We sweep a metadata buffer (provided by the metapath) for blocks we need to
1442	* free, and free them all. However, we do it one rgrp at a time. If this
1443	* block has references to multiple rgrps, we break it into individual
1444	* transactions. This allows other processes to use the rgrps while we're
1445	* focused on a single one, for better concurrency / performance.
1446	* At every transaction boundary, we rewrite the inode into the journal.
1447	* That way the bitmaps are kept consistent with the inode and we can recover
1448	* if we're interrupted by power-outages.
1449	*
1450	* Returns: 0, or return code if an error occurred.
1451	* *btotal has the total number of blocks freed
1452	*/
1453	static int sweep_bh_for_rgrps(struct gfs2_inode ip, struct* gfs2_holder *rd_gh,
1454	struct buffer_head bh, __be64 start, __be64 *end,
1455	bool meta, u32 *btotal)
1456	{
1457	struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode);
1458	struct gfs2_rgrpd *rgd;
1459	struct gfs2_trans *tr;
1460	__be64 *p;
1461	int blks_outside_rgrp;
1462	u64 bn, bstart, isize_blks;
1463	s64 blen; / needs to be s64 or gfs2_add_inode_blocks breaks /
1464	int ret = `0`;
1465	bool buf_in_tr = false; / buffer was added to transaction /
1466
1467	more_rgrps:
1468	rgd = NULL;
1469	if (gfs2_holder_initialized(gh: rd_gh)) {
1470	rgd = gfs2_glock2rgrp(gl: rd_gh->gh_gl);
1471	gfs2_assert_withdraw(sdp,
1472	gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1473	}
1474	blks_outside_rgrp = `0`;
1475	bstart = `0`;
1476	blen = `0`;
1477
1478	for (p = start; p < end; p++) {
1479	if (!*p)
1480	continue;
1481	bn = be64_to_cpu(*p);
1482
1483	if (rgd) {
1484	if (!rgrp_contains_block(rgd, block: bn)) {
1485	blks_outside_rgrp++;
1486	continue;
1487	}
1488	} else {
1489	rgd = gfs2_blk2rgrpd(sdp, blk: bn, exact: true);
1490	if (unlikely(!rgd)) {
1491	ret = -EIO;
1492	goto out;
1493	}
1494	ret = gfs2_glock_nq_init(gl: rgd->rd_gl, LM_ST_EXCLUSIVE,
1495	LM_FLAG_NODE_SCOPE, gh: rd_gh);
1496	if (ret)
1497	goto out;
1498
1499	/ Must be done with the rgrp glock held: /
1500	if (gfs2_rs_active(rs: &ip->i_res) &&
1501	rgd == ip->i_res.rs_rgd)
1502	gfs2_rs_deltree(rs: &ip->i_res);
1503	}
1504
1505	/ The size of our transactions will be unknown until we*
1506	actually process all the metadata blocks that relate to
1507	the rgrp. So we estimate. We know it can't be more than
1508	the dinode's i_blocks and we don't want to exceed the
1509	journal flush threshold, sd_log_thresh2. /*
1510	if (current->journal_info == NULL) {
1511	unsigned int jblocks_rqsted, revokes;
1512
1513	jblocks_rqsted = rgd->rd_length + RES_DINODE +
1514	RES_INDIRECT;
1515	isize_blks = gfs2_get_inode_blocks(inode: &ip->i_inode);
1516	if (isize_blks > atomic_read(v: &sdp->sd_log_thresh2))
1517	jblocks_rqsted +=
1518	atomic_read(v: &sdp->sd_log_thresh2);
1519	else
1520	jblocks_rqsted += isize_blks;
1521	revokes = jblocks_rqsted;
1522	if (meta)
1523	revokes += end - start;
1524	else if (ip->i_depth)
1525	revokes += sdp->sd_inptrs;
1526	ret = gfs2_trans_begin(sdp, blocks: jblocks_rqsted, revokes);
1527	if (ret)
1528	goto out_unlock;
1529	down_write(sem: &ip->i_rw_mutex);
1530	}
1531	/ check if we will exceed the transaction blocks requested /
1532	tr = current->journal_info;
1533	if (tr->tr_num_buf_new + RES_STATFS +
1534	RES_QUOTA >= atomic_read(v: &sdp->sd_log_thresh2)) {
1535	/ We set blks_outside_rgrp to ensure the loop will*
1536	be repeated for the same rgrp, but with a new
1537	transaction. /*
1538	blks_outside_rgrp++;
1539	/ This next part is tricky. If the buffer was added*
1540	to the transaction, we've already set some block
1541	pointers to 0, so we better follow through and free
1542	them, or we will introduce corruption (so break).
1543	This may be impossible, or at least rare, but I
1544	decided to cover the case regardless.
1545
1546	If the buffer was not added to the transaction
1547	(this call), doing so would exceed our transaction
1548	size, so we need to end the transaction and start a
1549	new one (so goto). /*
1550
1551	if (buf_in_tr)
1552	break;
1553	goto out_unlock;
1554	}
1555
1556	gfs2_trans_add_meta(gl: ip->i_gl, bh);
1557	buf_in_tr = true;
1558	*p = `0`;
1559	if (bstart + blen == bn) {
1560	blen++;
1561	continue;
1562	}
1563	if (bstart) {
1564	__gfs2_free_blocks(ip, rgd, bstart, blen: (u32)blen, meta);
1565	(*btotal) += blen;
1566	gfs2_add_inode_blocks(inode: &ip->i_inode, change: -blen);
1567	}
1568	bstart = bn;
1569	blen = `1`;
1570	}
1571	if (bstart) {
1572	__gfs2_free_blocks(ip, rgd, bstart, blen: (u32)blen, meta);
1573	(*btotal) += blen;
1574	gfs2_add_inode_blocks(inode: &ip->i_inode, change: -blen);
1575	}
1576	out_unlock:
1577	if (!ret && blks_outside_rgrp) { / If buffer still has non-zero blocks*
1578	outside the rgrp we just processed,
1579	do it all over again. /*
1580	if (current->journal_info) {
1581	struct buffer_head *dibh;
1582
1583	ret = gfs2_meta_inode_buffer(ip, bhp: &dibh);
1584	if (ret)
1585	goto out;
1586
1587	/ Every transaction boundary, we rewrite the dinode*
1588	to keep its di_blocks current in case of failure. /*
1589	inode_set_mtime_to_ts(inode: &ip->i_inode, ts: inode_set_ctime_current(inode: &ip->i_inode));
1590	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
1591	gfs2_dinode_out(ip, buf: dibh->b_data);
1592	brelse(bh: dibh);
1593	up_write(sem: &ip->i_rw_mutex);
1594	gfs2_trans_end(sdp);
1595	buf_in_tr = false;
1596	}
1597	gfs2_glock_dq_uninit(gh: rd_gh);
1598	cond_resched();
1599	goto more_rgrps;
1600	}
1601	out:
1602	return ret;
1603	}
1604
1605	static bool mp_eq_to_hgt(struct metapath mp, __u16 list, unsigned int h)
1606	{
1607	if (memcmp(p: mp->mp_list, q: list, size: h * sizeof(mp->mp_list[`0`])))
1608	return false;
1609	return true;
1610	}
1611
1612	/**
1613	* find_nonnull_ptr - find a non-null pointer given a metapath and height
1614	* @sdp: The superblock
1615	* @mp: starting metapath
1616	* @h: desired height to search
1617	* @end_list: See punch_hole().
1618	* @end_aligned: See punch_hole().
1619	*
1620	* Assumes the metapath is valid (with buffers) out to height h.
1621	* Returns: true if a non-null pointer was found in the metapath buffer
1622	* false if all remaining pointers are NULL in the buffer
1623	*/
1624	static bool find_nonnull_ptr(struct gfs2_sbd sdp, struct* metapath *mp,
1625	unsigned int h,
1626	__u16 end_list, unsigned* int end_aligned)
1627	{
1628	struct buffer_head *bh = mp->mp_bh[h];
1629	__be64 first, ptr, *end;
1630
1631	first = metaptr1(height: h, mp);
1632	ptr = first + mp->mp_list[h];
1633	end = (__be64 *)(bh->b_data + bh->b_size);
1634	if (end_list && mp_eq_to_hgt(mp, list: end_list, h)) {
1635	bool keep_end = h < end_aligned;
1636	end = first + end_list[h] + keep_end;
1637	}
1638
1639	while (ptr < end) {
1640	if (ptr) { /* if we have a non-null pointer /
1641	mp->mp_list[h] = ptr - first;
1642	h++;
1643	if (h < GFS2_MAX_META_HEIGHT)
1644	mp->mp_list[h] = `0`;
1645	return true;
1646	}
1647	ptr++;
1648	}
1649	return false;
1650	}
1651
1652	enum dealloc_states {
1653	DEALLOC_MP_FULL = `0`, / Strip a metapath with all buffers read in /
1654	DEALLOC_MP_LOWER = `1`, / lower the metapath strip height /
1655	DEALLOC_FILL_MP = `2`, / Fill in the metapath to the given height. /
1656	DEALLOC_DONE = `3`, / process complete /
1657	};
1658
1659	static inline void
1660	metapointer_range(struct metapath mp, int* height,
1661	__u16 start_list, unsigned* int start_aligned,
1662	__u16 end_list, unsigned* int end_aligned,
1663	__be64 start, __be64 end)
1664	{
1665	struct buffer_head *bh = mp->mp_bh[height];
1666	__be64 *first;
1667
1668	first = metaptr1(height, mp);
1669	*start = first;
1670	if (mp_eq_to_hgt(mp, list: start_list, h: height)) {
1671	bool keep_start = height < start_aligned;
1672	*start = first + start_list[height] + keep_start;
1673	}
1674	end = (__be64 )(bh->b_data + bh->b_size);
1675	if (end_list && mp_eq_to_hgt(mp, list: end_list, h: height)) {
1676	bool keep_end = height < end_aligned;
1677	*end = first + end_list[height] + keep_end;
1678	}
1679	}
1680
1681	static inline bool walk_done(struct gfs2_sbd *sdp,
1682	struct metapath mp, int* height,
1683	__u16 end_list, unsigned* int end_aligned)
1684	{
1685	__u16 end;
1686
1687	if (end_list) {
1688	bool keep_end = height < end_aligned;
1689	if (!mp_eq_to_hgt(mp, list: end_list, h: height))
1690	return false;
1691	end = end_list[height] + keep_end;
1692	} else
1693	end = (height > `0`) ? sdp->sd_inptrs : sdp->sd_diptrs;
1694	return mp->mp_list[height] >= end;
1695	}
1696
1697	/**
1698	* punch_hole - deallocate blocks in a file
1699	* @ip: inode to truncate
1700	* @offset: the start of the hole
1701	* @length: the size of the hole (or 0 for truncate)
1702	*
1703	* Punch a hole into a file or truncate a file at a given position. This
1704	* function operates in whole blocks (@offset and @length are rounded
1705	* accordingly); partially filled blocks must be cleared otherwise.
1706	*
1707	* This function works from the bottom up, and from the right to the left. In
1708	* other words, it strips off the highest layer (data) before stripping any of
1709	* the metadata. Doing it this way is best in case the operation is interrupted
1710	* by power failure, etc. The dinode is rewritten in every transaction to
1711	* guarantee integrity.
1712	*/
1713	static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1714	{
1715	struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode);
1716	u64 maxsize = sdp->sd_heightsize[ip->i_height];
1717	struct metapath mp = {};
1718	struct buffer_head dibh, bh;
1719	struct gfs2_holder rd_gh;
1720	unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1721	unsigned int bsize = `1` << bsize_shift;
1722	u64 lblock = (offset + bsize - `1`) >> bsize_shift;
1723	__u16 start_list[GFS2_MAX_META_HEIGHT];
1724	__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1725	unsigned int start_aligned, end_aligned;
1726	unsigned int strip_h = ip->i_height - `1`;
1727	u32 btotal = `0`;
1728	int ret, state;
1729	int mp_h; / metapath buffers are read in to this height /
1730	u64 prev_bnr = `0`;
1731	__be64 start, end;
1732
1733	if (offset + bsize - `1` >= maxsize) {
1734	/*
1735	* The starting point lies beyond the allocated metadata;
1736	* there are no blocks to deallocate.
1737	*/
1738	return `0`;
1739	}
1740
1741	/*
1742	* The start position of the hole is defined by lblock, start_list, and
1743	* start_aligned. The end position of the hole is defined by lend,
1744	* end_list, and end_aligned.
1745	*
1746	* start_aligned and end_aligned define down to which height the start
1747	* and end positions are aligned to the metadata tree (i.e., the
1748	* position is a multiple of the metadata granularity at the height
1749	* above). This determines at which heights additional meta pointers
1750	* needs to be preserved for the remaining data.
1751	*/
1752
1753	if (length) {
1754	u64 end_offset = offset + length;
1755	u64 lend;
1756
1757	/*
1758	* Clip the end at the maximum file size for the given height:
1759	* that's how far the metadata goes; files bigger than that
1760	* will have additional layers of indirection.
1761	*/
1762	if (end_offset > maxsize)
1763	end_offset = maxsize;
1764	lend = end_offset >> bsize_shift;
1765
1766	if (lblock >= lend)
1767	return `0`;
1768
1769	find_metapath(sdp, block: lend, mp: &mp, height: ip->i_height);
1770	end_list = __end_list;
1771	memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1772
1773	for (mp_h = ip->i_height - `1`; mp_h > `0`; mp_h--) {
1774	if (end_list[mp_h])
1775	break;
1776	}
1777	end_aligned = mp_h;
1778	}
1779
1780	find_metapath(sdp, block: lblock, mp: &mp, height: ip->i_height);
1781	memcpy(start_list, mp.mp_list, sizeof(start_list));
1782
1783	for (mp_h = ip->i_height - `1`; mp_h > `0`; mp_h--) {
1784	if (start_list[mp_h])
1785	break;
1786	}
1787	start_aligned = mp_h;
1788
1789	ret = gfs2_meta_inode_buffer(ip, bhp: &dibh);
1790	if (ret)
1791	return ret;
1792
1793	mp.mp_bh[`0`] = dibh;
1794	ret = lookup_metapath(ip, mp: &mp);
1795	if (ret)
1796	goto out_metapath;
1797
1798	/ issue read-ahead on metadata /
1799	for (mp_h = `0`; mp_h < mp.mp_aheight - `1`; mp_h++) {
1800	metapointer_range(mp: &mp, height: mp_h, start_list, start_aligned,
1801	end_list, end_aligned, start: &start, end: &end);
1802	gfs2_metapath_ra(gl: ip->i_gl, start, end);
1803	}
1804
1805	if (mp.mp_aheight == ip->i_height)
1806	state = DEALLOC_MP_FULL; / We have a complete metapath /
1807	else
1808	state = DEALLOC_FILL_MP; / deal with partial metapath /
1809
1810	ret = gfs2_rindex_update(sdp);
1811	if (ret)
1812	goto out_metapath;
1813
1814	ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1815	if (ret)
1816	goto out_metapath;
1817	gfs2_holder_mark_uninitialized(gh: &rd_gh);
1818
1819	mp_h = strip_h;
1820
1821	while (state != DEALLOC_DONE) {
1822	switch (state) {
1823	/ Truncate a full metapath at the given strip height.*
1824	* Note that strip_h == mp_h in order to be in this state. */
1825	case DEALLOC_MP_FULL:
1826	bh = mp.mp_bh[mp_h];
1827	gfs2_assert_withdraw(sdp, bh);
1828	if (gfs2_assert_withdraw(sdp,
1829	prev_bnr != bh->b_blocknr)) {
1830	fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1831	"s_h:%u, mp_h:%u\n",
1832	(unsigned long long)ip->i_no_addr,
1833	prev_bnr, ip->i_height, strip_h, mp_h);
1834	}
1835	prev_bnr = bh->b_blocknr;
1836
1837	if (gfs2_metatype_check(sdp, bh,
1838	(mp_h ? GFS2_METATYPE_IN :
1839	GFS2_METATYPE_DI))) {
1840	ret = -EIO;
1841	goto out;
1842	}
1843
1844	/*
1845	* Below, passing end_aligned as 0 gives us the
1846	* metapointer range excluding the end point: the end
1847	* point is the first metapath we must not deallocate!
1848	*/
1849
1850	metapointer_range(mp: &mp, height: mp_h, start_list, start_aligned,
1851	end_list, end_aligned: `0` / end_aligned /,
1852	start: &start, end: &end);
1853	ret = sweep_bh_for_rgrps(ip, rd_gh: &rd_gh, bh: mp.mp_bh[mp_h],
1854	start, end,
1855	meta: mp_h != ip->i_height - `1`,
1856	btotal: &btotal);
1857
1858	/ If we hit an error or just swept dinode buffer,*
1859	just exit. /*
1860	if (ret \|\| !mp_h) {
1861	state = DEALLOC_DONE;
1862	break;
1863	}
1864	state = DEALLOC_MP_LOWER;
1865	break;
1866
1867	/ lower the metapath strip height /
1868	case DEALLOC_MP_LOWER:
1869	/ We're done with the current buffer, so release it,*
1870	unless it's the dinode buffer. Then back up to the
1871	previous pointer. /*
1872	if (mp_h) {
1873	brelse(bh: mp.mp_bh[mp_h]);
1874	mp.mp_bh[mp_h] = NULL;
1875	}
1876	/ If we can't get any lower in height, we've stripped*
1877	off all we can. Next step is to back up and start
1878	stripping the previous level of metadata. /*
1879	if (mp_h == `0`) {
1880	strip_h--;
1881	memcpy(mp.mp_list, start_list, sizeof(start_list));
1882	mp_h = strip_h;
1883	state = DEALLOC_FILL_MP;
1884	break;
1885	}
1886	mp.mp_list[mp_h] = `0`;
1887	mp_h--; / search one metadata height down /
1888	mp.mp_list[mp_h]++;
1889	if (walk_done(sdp, mp: &mp, height: mp_h, end_list, end_aligned))
1890	break;
1891	/ Here we've found a part of the metapath that is not*
1892	* allocated. We need to search at that height for the
1893	* next non-null pointer. */
1894	if (find_nonnull_ptr(sdp, mp: &mp, h: mp_h, end_list, end_aligned)) {
1895	state = DEALLOC_FILL_MP;
1896	mp_h++;
1897	}
1898	/ No more non-null pointers at this height. Back up*
1899	to the previous height and try again. /*
1900	break; / loop around in the same state /
1901
1902	/ Fill the metapath with buffers to the given height. /
1903	case DEALLOC_FILL_MP:
1904	/ Fill the buffers out to the current height. /
1905	ret = fillup_metapath(ip, mp: &mp, h: mp_h);
1906	if (ret < `0`)
1907	goto out;
1908
1909	/ On the first pass, issue read-ahead on metadata. /
1910	if (mp.mp_aheight > `1` && strip_h == ip->i_height - `1`) {
1911	unsigned int height = mp.mp_aheight - `1`;
1912
1913	/ No read-ahead for data blocks. /
1914	if (mp.mp_aheight - `1` == strip_h)
1915	height--;
1916
1917	for (; height >= mp.mp_aheight - ret; height--) {
1918	metapointer_range(mp: &mp, height,
1919	start_list, start_aligned,
1920	end_list, end_aligned,
1921	start: &start, end: &end);
1922	gfs2_metapath_ra(gl: ip->i_gl, start, end);
1923	}
1924	}
1925
1926	/ If buffers found for the entire strip height /
1927	if (mp.mp_aheight - `1` == strip_h) {
1928	state = DEALLOC_MP_FULL;
1929	break;
1930	}
1931	if (mp.mp_aheight < ip->i_height) / We have a partial height /
1932	mp_h = mp.mp_aheight - `1`;
1933
1934	/ If we find a non-null block pointer, crawl a bit*
1935	higher up in the metapath and try again, otherwise
1936	we need to look lower for a new starting point. /*
1937	if (find_nonnull_ptr(sdp, mp: &mp, h: mp_h, end_list, end_aligned))
1938	mp_h++;
1939	else
1940	state = DEALLOC_MP_LOWER;
1941	break;
1942	}
1943	}
1944
1945	if (btotal) {
1946	if (current->journal_info == NULL) {
1947	ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1948	RES_QUOTA, revokes: `0`);
1949	if (ret)
1950	goto out;
1951	down_write(sem: &ip->i_rw_mutex);
1952	}
1953	gfs2_statfs_change(sdp, total: `0`, free: +btotal, dinodes: `0`);
1954	gfs2_quota_change(ip, change: -(s64)btotal, uid: ip->i_inode.i_uid,
1955	gid: ip->i_inode.i_gid);
1956	inode_set_mtime_to_ts(inode: &ip->i_inode, ts: inode_set_ctime_current(inode: &ip->i_inode));
1957	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
1958	gfs2_dinode_out(ip, buf: dibh->b_data);
1959	up_write(sem: &ip->i_rw_mutex);
1960	gfs2_trans_end(sdp);
1961	}
1962
1963	out:
1964	if (gfs2_holder_initialized(gh: &rd_gh))
1965	gfs2_glock_dq_uninit(gh: &rd_gh);
1966	if (current->journal_info) {
1967	up_write(sem: &ip->i_rw_mutex);
1968	gfs2_trans_end(sdp);
1969	cond_resched();
1970	}
1971	gfs2_quota_unhold(ip);
1972	out_metapath:
1973	release_metapath(mp: &mp);
1974	return ret;
1975	}
1976
1977	static int trunc_end(struct gfs2_inode *ip)
1978	{
1979	struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode);
1980	struct buffer_head *dibh;
1981	int error;
1982
1983	error = gfs2_trans_begin(sdp, RES_DINODE, revokes: `0`);
1984	if (error)
1985	return error;
1986
1987	down_write(sem: &ip->i_rw_mutex);
1988
1989	error = gfs2_meta_inode_buffer(ip, bhp: &dibh);
1990	if (error)
1991	goto out;
1992
1993	if (!i_size_read(inode: &ip->i_inode)) {
1994	ip->i_height = `0`;
1995	ip->i_goal = ip->i_no_addr;
1996	gfs2_buffer_clear_tail(bh: dibh, head: sizeof(struct gfs2_dinode));
1997	gfs2_ordered_del_inode(ip);
1998	}
1999	inode_set_mtime_to_ts(inode: &ip->i_inode, ts: inode_set_ctime_current(inode: &ip->i_inode));
2000	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
2001
2002	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
2003	gfs2_dinode_out(ip, buf: dibh->b_data);
2004	brelse(bh: dibh);
2005
2006	out:
2007	up_write(sem: &ip->i_rw_mutex);
2008	gfs2_trans_end(sdp);
2009	return error;
2010	}
2011
2012	/**
2013	* do_shrink - make a file smaller
2014	* @inode: the inode
2015	* @newsize: the size to make the file
2016	*
2017	* Called with an exclusive lock on @inode. The @size must
2018	* be equal to or smaller than the current inode size.
2019	*
2020	* Returns: errno
2021	*/
2022
2023	static int do_shrink(struct inode *inode, u64 newsize)
2024	{
2025	struct gfs2_inode *ip = GFS2_I(inode);
2026	int error;
2027
2028	error = trunc_start(inode, newsize);
2029	if (error < `0`)
2030	return error;
2031	if (gfs2_is_stuffed(ip))
2032	return `0`;
2033
2034	error = punch_hole(ip, offset: newsize, length: `0`);
2035	if (error == `0`)
2036	error = trunc_end(ip);
2037
2038	return error;
2039	}
2040
2041	/**
2042	* do_grow - Touch and update inode size
2043	* @inode: The inode
2044	* @size: The new size
2045	*
2046	* This function updates the timestamps on the inode and
2047	* may also increase the size of the inode. This function
2048	* must not be called with @size any smaller than the current
2049	* inode size.
2050	*
2051	* Although it is not strictly required to unstuff files here,
2052	* earlier versions of GFS2 have a bug in the stuffed file reading
2053	* code which will result in a buffer overrun if the size is larger
2054	* than the max stuffed file size. In order to prevent this from
2055	* occurring, such files are unstuffed, but in other cases we can
2056	* just update the inode size directly.
2057	*
2058	* Returns: 0 on success, or -ve on error
2059	*/
2060
2061	static int do_grow(struct inode *inode, u64 size)
2062	{
2063	struct gfs2_inode *ip = GFS2_I(inode);
2064	struct gfs2_sbd *sdp = GFS2_SB(inode);
2065	struct gfs2_alloc_parms ap = { .target = `1`, };
2066	struct buffer_head *dibh;
2067	int error;
2068	int unstuff = `0`;
2069
2070	if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2071	error = gfs2_quota_lock_check(ip, ap: &ap);
2072	if (error)
2073	return error;
2074
2075	error = gfs2_inplace_reserve(ip, ap: &ap);
2076	if (error)
2077	goto do_grow_qunlock;
2078	unstuff = `1`;
2079	}
2080
2081	error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2082	(unstuff &&
2083	gfs2_is_jdata(ip) ? RES_JDATA : `0`) +
2084	(sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2085	`0` : RES_QUOTA), revokes: `0`);
2086	if (error)
2087	goto do_grow_release;
2088
2089	if (unstuff) {
2090	error = gfs2_unstuff_dinode(ip);
2091	if (error)
2092	goto do_end_trans;
2093	}
2094
2095	error = gfs2_meta_inode_buffer(ip, bhp: &dibh);
2096	if (error)
2097	goto do_end_trans;
2098
2099	truncate_setsize(inode, newsize: size);
2100	inode_set_mtime_to_ts(inode: &ip->i_inode, ts: inode_set_ctime_current(inode: &ip->i_inode));
2101	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
2102	gfs2_dinode_out(ip, buf: dibh->b_data);
2103	brelse(bh: dibh);
2104
2105	do_end_trans:
2106	gfs2_trans_end(sdp);
2107	do_grow_release:
2108	if (unstuff) {
2109	gfs2_inplace_release(ip);
2110	do_grow_qunlock:
2111	gfs2_quota_unlock(ip);
2112	}
2113	return error;
2114	}
2115
2116	/**
2117	* gfs2_setattr_size - make a file a given size
2118	* @inode: the inode
2119	* @newsize: the size to make the file
2120	*
2121	* The file size can grow, shrink, or stay the same size. This
2122	* is called holding i_rwsem and an exclusive glock on the inode
2123	* in question.
2124	*
2125	* Returns: errno
2126	*/
2127
2128	int gfs2_setattr_size(struct inode *inode, u64 newsize)
2129	{
2130	struct gfs2_inode *ip = GFS2_I(inode);
2131	int ret;
2132
2133	BUG_ON(!S_ISREG(inode->i_mode));
2134
2135	ret = inode_newsize_ok(inode, offset: newsize);
2136	if (ret)
2137	return ret;
2138
2139	inode_dio_wait(inode);
2140
2141	ret = gfs2_qa_get(ip);
2142	if (ret)
2143	goto out;
2144
2145	if (newsize >= inode->i_size) {
2146	ret = do_grow(inode, size: newsize);
2147	goto out;
2148	}
2149
2150	ret = do_shrink(inode, newsize);
2151	out:
2152	gfs2_rs_delete(ip);
2153	gfs2_qa_put(ip);
2154	return ret;
2155	}
2156
2157	int gfs2_truncatei_resume(struct gfs2_inode *ip)
2158	{
2159	int error;
2160	error = punch_hole(ip, offset: i_size_read(inode: &ip->i_inode), length: `0`);
2161	if (!error)
2162	error = trunc_end(ip);
2163	return error;
2164	}
2165
2166	int gfs2_file_dealloc(struct gfs2_inode *ip)
2167	{
2168	return punch_hole(ip, offset: `0`, length: `0`);
2169	}
2170
2171	/**
2172	* gfs2_free_journal_extents - Free cached journal bmap info
2173	* @jd: The journal
2174	*
2175	*/
2176
2177	void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2178	{
2179	struct gfs2_journal_extent *jext;
2180
2181	while(!list_empty(head: &jd->extent_list)) {
2182	jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2183	list_del(entry: &jext->list);
2184	kfree(objp: jext);
2185	}
2186	}
2187
2188	/**
2189	* gfs2_add_jextent - Add or merge a new extent to extent cache
2190	* @jd: The journal descriptor
2191	* @lblock: The logical block at start of new extent
2192	* @dblock: The physical block at start of new extent
2193	* @blocks: Size of extent in fs blocks
2194	*
2195	* Returns: 0 on success or -ENOMEM
2196	*/
2197
2198	static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2199	{
2200	struct gfs2_journal_extent *jext;
2201
2202	if (!list_empty(head: &jd->extent_list)) {
2203	jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2204	if ((jext->dblock + jext->blocks) == dblock) {
2205	jext->blocks += blocks;
2206	return `0`;
2207	}
2208	}
2209
2210	jext = kzalloc(size: sizeof(struct gfs2_journal_extent), GFP_NOFS);
2211	if (jext == NULL)
2212	return -ENOMEM;
2213	jext->dblock = dblock;
2214	jext->lblock = lblock;
2215	jext->blocks = blocks;
2216	list_add_tail(new: &jext->list, head: &jd->extent_list);
2217	jd->nr_extents++;
2218	return `0`;
2219	}
2220
2221	/**
2222	* gfs2_map_journal_extents - Cache journal bmap info
2223	* @sdp: The super block
2224	* @jd: The journal to map
2225	*
2226	* Create a reusable "extent" mapping from all logical
2227	* blocks to all physical blocks for the given journal. This will save
2228	* us time when writing journal blocks. Most journals will have only one
2229	* extent that maps all their logical blocks. That's because gfs2.mkfs
2230	* arranges the journal blocks sequentially to maximize performance.
2231	* So the extent would map the first block for the entire file length.
2232	* However, gfs2_jadd can happen while file activity is happening, so
2233	* those journals may not be sequential. Less likely is the case where
2234	* the users created their own journals by mounting the metafs and
2235	* laying it out. But it's still possible. These journals might have
2236	* several extents.
2237	*
2238	* Returns: 0 on success, or error on failure
2239	*/
2240
2241	int gfs2_map_journal_extents(struct gfs2_sbd sdp, struct* gfs2_jdesc *jd)
2242	{
2243	u64 lblock = `0`;
2244	u64 lblock_stop;
2245	struct gfs2_inode *ip = GFS2_I(inode: jd->jd_inode);
2246	struct buffer_head bh;
2247	unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2248	u64 size;
2249	int rc;
2250	ktime_t start, end;
2251
2252	start = ktime_get();
2253	lblock_stop = i_size_read(inode: jd->jd_inode) >> shift;
2254	size = (lblock_stop - lblock) << shift;
2255	jd->nr_extents = `0`;
2256	WARN_ON(!list_empty(&jd->extent_list));
2257
2258	do {
2259	bh.b_state = `0`;
2260	bh.b_blocknr = `0`;
2261	bh.b_size = size;
2262	rc = gfs2_block_map(inode: jd->jd_inode, lblock, bh_map: &bh, create: `0`);
2263	if (rc \|\| !buffer_mapped(bh: &bh))
2264	goto fail;
2265	rc = gfs2_add_jextent(jd, lblock, dblock: bh.b_blocknr, blocks: bh.b_size >> shift);
2266	if (rc)
2267	goto fail;
2268	size -= bh.b_size;
2269	lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2270	} while(size > `0`);
2271
2272	end = ktime_get();
2273	fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2274	jd->nr_extents, ktime_ms_delta(end, start));
2275	return `0`;
2276
2277	fail:
2278	fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2279	rc, jd->jd_jid,
2280	(unsigned long long)(i_size_read(jd->jd_inode) - size),
2281	jd->nr_extents);
2282	fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2283	rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2284	bh.b_state, (unsigned long long)bh.b_size);
2285	gfs2_free_journal_extents(jd);
2286	return rc;
2287	}
2288
2289	/**
2290	* gfs2_write_alloc_required - figure out if a write will require an allocation
2291	* @ip: the file being written to
2292	* @offset: the offset to write to
2293	* @len: the number of bytes being written
2294	*
2295	* Returns: 1 if an alloc is required, 0 otherwise
2296	*/
2297
2298	int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2299	unsigned int len)
2300	{
2301	struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode);
2302	struct buffer_head bh;
2303	unsigned int shift;
2304	u64 lblock, lblock_stop, size;
2305	u64 end_of_file;
2306
2307	if (!len)
2308	return `0`;
2309
2310	if (gfs2_is_stuffed(ip)) {
2311	if (offset + len > gfs2_max_stuffed_size(ip))
2312	return `1`;
2313	return `0`;
2314	}
2315
2316	shift = sdp->sd_sb.sb_bsize_shift;
2317	BUG_ON(gfs2_is_dir(ip));
2318	end_of_file = (i_size_read(inode: &ip->i_inode) + sdp->sd_sb.sb_bsize - `1`) >> shift;
2319	lblock = offset >> shift;
2320	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - `1`) >> shift;
2321	if (lblock_stop > end_of_file && ip != GFS2_I(inode: sdp->sd_rindex))
2322	return `1`;
2323
2324	size = (lblock_stop - lblock) << shift;
2325	do {
2326	bh.b_state = `0`;
2327	bh.b_size = size;
2328	gfs2_block_map(inode: &ip->i_inode, lblock, bh_map: &bh, create: `0`);
2329	if (!buffer_mapped(bh: &bh))
2330	return `1`;
2331	size -= bh.b_size;
2332	lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2333	} while(size > `0`);
2334
2335	return `0`;
2336	}
2337
2338	static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2339	{
2340	struct gfs2_inode *ip = GFS2_I(inode);
2341	struct buffer_head *dibh;
2342	int error;
2343
2344	if (offset >= inode->i_size)
2345	return `0`;
2346	if (offset + length > inode->i_size)
2347	length = inode->i_size - offset;
2348
2349	error = gfs2_meta_inode_buffer(ip, bhp: &dibh);
2350	if (error)
2351	return error;
2352	gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh);
2353	memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, `0`,
2354	length);
2355	brelse(bh: dibh);
2356	return `0`;
2357	}
2358
2359	static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2360	loff_t length)
2361	{
2362	struct gfs2_sbd *sdp = GFS2_SB(inode);
2363	loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2364	int error;
2365
2366	while (length) {
2367	struct gfs2_trans *tr;
2368	loff_t chunk;
2369	unsigned int offs;
2370
2371	chunk = length;
2372	if (chunk > max_chunk)
2373	chunk = max_chunk;
2374
2375	offs = offset & ~PAGE_MASK;
2376	if (offs && chunk > PAGE_SIZE)
2377	chunk = offs + ((chunk - offs) & PAGE_MASK);
2378
2379	truncate_pagecache_range(inode, offset, end: chunk);
2380	offset += chunk;
2381	length -= chunk;
2382
2383	tr = current->journal_info;
2384	if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2385	continue;
2386
2387	gfs2_trans_end(sdp);
2388	error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2389	if (error)
2390	return error;
2391	}
2392	return `0`;
2393	}
2394
2395	int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2396	{
2397	struct inode *inode = file_inode(f: file);
2398	struct gfs2_inode *ip = GFS2_I(inode);
2399	struct gfs2_sbd *sdp = GFS2_SB(inode);
2400	unsigned int blocksize = i_blocksize(node: inode);
2401	loff_t start, end;
2402	int error;
2403
2404	if (!gfs2_is_stuffed(ip)) {
2405	unsigned int start_off, end_len;
2406
2407	start_off = offset & (blocksize - `1`);
2408	end_len = (offset + length) & (blocksize - `1`);
2409	if (start_off) {
2410	unsigned int len = length;
2411	if (length > blocksize - start_off)
2412	len = blocksize - start_off;
2413	error = gfs2_block_zero_range(inode, from: offset, length: len);
2414	if (error)
2415	goto out;
2416	if (start_off + length < blocksize)
2417	end_len = `0`;
2418	}
2419	if (end_len) {
2420	error = gfs2_block_zero_range(inode,
2421	from: offset + length - end_len, length: end_len);
2422	if (error)
2423	goto out;
2424	}
2425	}
2426
2427	start = round_down(offset, blocksize);
2428	end = round_up(offset + length, blocksize) - `1`;
2429	error = filemap_write_and_wait_range(mapping: inode->i_mapping, lstart: start, lend: end);
2430	if (error)
2431	return error;
2432
2433	if (gfs2_is_jdata(ip))
2434	error = gfs2_trans_begin(sdp, RES_DINODE + `2` * RES_JDATA,
2435	GFS2_JTRUNC_REVOKES);
2436	else
2437	error = gfs2_trans_begin(sdp, RES_DINODE, revokes: `0`);
2438	if (error)
2439	return error;
2440
2441	if (gfs2_is_stuffed(ip)) {
2442	error = stuffed_zero_range(inode, offset, length);
2443	if (error)
2444	goto out;
2445	}
2446
2447	if (gfs2_is_jdata(ip)) {
2448	BUG_ON(!current->journal_info);
2449	gfs2_journaled_truncate_range(inode, offset, length);
2450	} else
2451	truncate_pagecache_range(inode, offset, end: offset + length - `1`);
2452
2453	file_update_time(file);
2454	mark_inode_dirty(inode);
2455
2456	if (current->journal_info)
2457	gfs2_trans_end(sdp);
2458
2459	if (!gfs2_is_stuffed(ip))
2460	error = punch_hole(ip, offset, length);
2461
2462	out:
2463	if (current->journal_info)
2464	gfs2_trans_end(sdp);
2465	return error;
2466	}
2467
2468	static int gfs2_map_blocks(struct iomap_writepage_ctx wpc, struct* inode *inode,
2469	loff_t offset, unsigned int len)
2470	{
2471	int ret;
2472
2473	if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2474	return -EIO;
2475
2476	if (offset >= wpc->iomap.offset &&
2477	offset < wpc->iomap.offset + wpc->iomap.length)
2478	return `0`;
2479
2480	memset(&wpc->iomap, `0`, sizeof(wpc->iomap));
2481	ret = gfs2_iomap_get(inode, pos: offset, INT_MAX, iomap: &wpc->iomap);
2482	return ret;
2483	}
2484
2485	const struct iomap_writeback_ops gfs2_writeback_ops = {
2486	.map_blocks = gfs2_map_blocks,
2487	};
2488

source code of linux/fs/gfs2/bmap.c