dir.c source code [linux/fs/ocfs2/dir.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* dir.c
4	*
5	* Creates, reads, walks and deletes directory-nodes
6	*
7	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
8	*
9	* Portions of this code from linux/fs/ext3/dir.c
10	*
11	* Copyright (C) 1992, 1993, 1994, 1995
12	* Remy Card (card@masi.ibp.fr)
13	* Laboratoire MASI - Institut Blaise pascal
14	* Universite Pierre et Marie Curie (Paris VI)
15	*
16	* from
17	*
18	* linux/fs/minix/dir.c
19	*
20	* Copyright (C) 1991, 1992 Linus Torvalds
21	*/
22
23	#include <linux/fs.h>
24	#include <linux/types.h>
25	#include <linux/slab.h>
26	#include <linux/highmem.h>
27	#include <linux/quotaops.h>
28	#include <linux/sort.h>
29	#include <linux/iversion.h>
30
31	#include <cluster/masklog.h>
32
33	#include "ocfs2.h"
34
35	#include "alloc.h"
36	#include "blockcheck.h"
37	#include "dir.h"
38	#include "dlmglue.h"
39	#include "extent_map.h"
40	#include "file.h"
41	#include "inode.h"
42	#include "journal.h"
43	#include "namei.h"
44	#include "suballoc.h"
45	#include "super.h"
46	#include "sysfile.h"
47	#include "uptodate.h"
48	#include "ocfs2_trace.h"
49
50	#include "buffer_head_io.h"
51
52	#define NAMEI_RA_CHUNKS 2
53	#define NAMEI_RA_BLOCKS 4
54	#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
55
56	static int ocfs2_do_extend_dir(struct super_block *sb,
57	handle_t *handle,
58	struct inode *dir,
59	struct buffer_head *parent_fe_bh,
60	struct ocfs2_alloc_context *data_ac,
61	struct ocfs2_alloc_context *meta_ac,
62	struct buffer_head **new_bh);
63	static int ocfs2_dir_indexed(struct inode *inode);
64
65	/*
66	* These are distinct checks because future versions of the file system will
67	* want to have a trailing dirent structure independent of indexing.
68	*/
69	static int ocfs2_supports_dir_trailer(struct inode *dir)
70	{
71	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
72
73	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
74	return `0`;
75
76	return ocfs2_meta_ecc(osb) \|\| ocfs2_dir_indexed(inode: dir);
77	}
78
79	/*
80	* "new' here refers to the point at which we're creating a new
81	* directory via "mkdir()", but also when we're expanding an inline
82	* directory. In either case, we don't yet have the indexing bit set
83	* on the directory, so the standard checks will fail in when metaecc
84	* is turned off. Only directory-initialization type functions should
85	* use this then. Everything else wants ocfs2_supports_dir_trailer()
86	*/
87	static int ocfs2_new_dir_wants_trailer(struct inode *dir)
88	{
89	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
90
91	return ocfs2_meta_ecc(osb) \|\|
92	ocfs2_supports_indexed_dirs(osb);
93	}
94
95	static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
96	{
97	return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
98	}
99
100	#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
101
102	/ XXX ocfs2_block_dqtrailer() is similar but not quite - can we make*
103	* them more consistent? */
104	struct ocfs2_dir_block_trailer ocfs2_dir_trailer_from_size(int* blocksize,
105	void *data)
106	{
107	char *p = data;
108
109	p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
110	return (struct ocfs2_dir_block_trailer *)p;
111	}
112
113	/*
114	* XXX: This is executed once on every dirent. We should consider optimizing
115	* it.
116	*/
117	static int ocfs2_skip_dir_trailer(struct inode *dir,
118	struct ocfs2_dir_entry *de,
119	unsigned long offset,
120	unsigned long blklen)
121	{
122	unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
123
124	if (!ocfs2_supports_dir_trailer(dir))
125	return `0`;
126
127	if (offset != toff)
128	return `0`;
129
130	return `1`;
131	}
132
133	static void ocfs2_init_dir_trailer(struct inode *inode,
134	struct buffer_head *bh, u16 rec_len)
135	{
136	struct ocfs2_dir_block_trailer *trailer;
137
138	trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
139	strcpy(p: trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
140	trailer->db_compat_rec_len =
141	cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
142	trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
143	trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
144	trailer->db_free_rec_len = cpu_to_le16(rec_len);
145	}
146	/*
147	* Link an unindexed block with a dir trailer structure into the index free
148	* list. This function will modify dirdata_bh, but assumes you've already
149	* passed it to the journal.
150	*/
151	static int ocfs2_dx_dir_link_trailer(struct inode dir, handle_t handle,
152	struct buffer_head *dx_root_bh,
153	struct buffer_head *dirdata_bh)
154	{
155	int ret;
156	struct ocfs2_dx_root_block *dx_root;
157	struct ocfs2_dir_block_trailer *trailer;
158
159	ret = ocfs2_journal_access_dr(handle, ci: INODE_CACHE(inode: dir), bh: dx_root_bh,
160	OCFS2_JOURNAL_ACCESS_WRITE);
161	if (ret) {
162	mlog_errno(ret);
163	goto out;
164	}
165	trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
166	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
167
168	trailer->db_free_next = dx_root->dr_free_blk;
169	dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
170
171	ocfs2_journal_dirty(handle, bh: dx_root_bh);
172
173	out:
174	return ret;
175	}
176
177	static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
178	{
179	return res->dl_prev_leaf_bh == NULL;
180	}
181
182	void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
183	{
184	brelse(bh: res->dl_dx_root_bh);
185	brelse(bh: res->dl_leaf_bh);
186	brelse(bh: res->dl_dx_leaf_bh);
187	brelse(bh: res->dl_prev_leaf_bh);
188	}
189
190	static int ocfs2_dir_indexed(struct inode *inode)
191	{
192	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
193	return `1`;
194	return `0`;
195	}
196
197	static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
198	{
199	return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
200	}
201
202	/*
203	* Hashing code adapted from ext3
204	*/
205	#define DELTA 0x9E3779B9
206
207	static void TEA_transform(__u32 buf[`4`], __u32 const in[])
208	{
209	__u32 sum = `0`;
210	__u32 b0 = buf[`0`], b1 = buf[`1`];
211	__u32 a = in[`0`], b = in[`1`], c = in[`2`], d = in[`3`];
212	int n = `16`;
213
214	do {
215	sum += DELTA;
216	b0 += ((b1 << `4`)+a) ^ (b1+sum) ^ ((b1 >> `5`)+b);
217	b1 += ((b0 << `4`)+c) ^ (b0+sum) ^ ((b0 >> `5`)+d);
218	} while (--n);
219
220	buf[`0`] += b0;
221	buf[`1`] += b1;
222	}
223
224	static void str2hashbuf(const char msg, int* len, __u32 buf, int* num)
225	{
226	__u32 pad, val;
227	int i;
228
229	pad = (__u32)len \| ((__u32)len << `8`);
230	pad \|= pad << `16`;
231
232	val = pad;
233	if (len > num*`4`)
234	len = num * `4`;
235	for (i = `0`; i < len; i++) {
236	if ((i % `4`) == `0`)
237	val = pad;
238	val = msg[i] + (val << `8`);
239	if ((i % `4`) == `3`) {
240	*buf++ = val;
241	val = pad;
242	num--;
243	}
244	}
245	if (--num >= `0`)
246	*buf++ = val;
247	while (--num >= `0`)
248	*buf++ = pad;
249	}
250
251	static void ocfs2_dx_dir_name_hash(struct inode dir, const* char name, int* len,
252	struct ocfs2_dx_hinfo *hinfo)
253	{
254	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
255	const char *p;
256	__u32 in[`8`], buf[`4`];
257
258	/*
259	* XXX: Is this really necessary, if the index is never looked
260	* at by readdir? Is a hash value of '0' a bad idea?
261	*/
262	if ((len == `1` && !strncmp(".", name, `1`)) \|\|
263	(len == `2` && !strncmp("..", name, `2`))) {
264	buf[`0`] = buf[`1`] = `0`;
265	goto out;
266	}
267
268	#ifdef OCFS2_DEBUG_DX_DIRS
269	/*
270	* This makes it very easy to debug indexing problems. We
271	* should never allow this to be selected without hand editing
272	* this file though.
273	*/
274	buf[`0`] = buf[`1`] = len;
275	goto out;
276	#endif
277
278	memcpy(buf, osb->osb_dx_seed, sizeof(buf));
279
280	p = name;
281	while (len > `0`) {
282	str2hashbuf(msg: p, len, buf: in, num: `4`);
283	TEA_transform(buf, in);
284	len -= `16`;
285	p += `16`;
286	}
287
288	out:
289	hinfo->major_hash = buf[`0`];
290	hinfo->minor_hash = buf[`1`];
291	}
292
293	/*
294	* bh passed here can be an inode block or a dir data block, depending
295	* on the inode inline data flag.
296	*/
297	static int ocfs2_check_dir_entry(struct inode * dir,
298	struct ocfs2_dir_entry * de,
299	struct buffer_head * bh,
300	unsigned long offset)
301	{
302	const char *error_msg = NULL;
303	const int rlen = le16_to_cpu(de->rec_len);
304
305	if (unlikely(rlen < OCFS2_DIR_REC_LEN(`1`)))
306	error_msg = "rec_len is smaller than minimal";
307	else if (unlikely(rlen % `4` != `0`))
308	error_msg = "rec_len % 4 != 0";
309	else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
310	error_msg = "rec_len is too small for name_len";
311	else if (unlikely(
312	((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
313	error_msg = "directory entry across blocks";
314
315	if (unlikely(error_msg != NULL))
316	mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
317	"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
318	(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
319	offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
320	de->name_len);
321
322	return error_msg == NULL ? `1` : `0`;
323	}
324
325	static inline int ocfs2_match(int len,
326	const char * const name,
327	struct ocfs2_dir_entry *de)
328	{
329	if (len != de->name_len)
330	return `0`;
331	if (!de->inode)
332	return `0`;
333	return !memcmp(p: name, q: de->name, size: len);
334	}
335
336	/*
337	* Returns 0 if not found, -1 on failure, and 1 on success
338	*/
339	static inline int ocfs2_search_dirblock(struct buffer_head *bh,
340	struct inode *dir,
341	const char name, int* namelen,
342	unsigned long offset,
343	char *first_de,
344	unsigned int bytes,
345	struct ocfs2_dir_entry **res_dir)
346	{
347	struct ocfs2_dir_entry *de;
348	char dlimit, de_buf;
349	int de_len;
350	int ret = `0`;
351
352	de_buf = first_de;
353	dlimit = de_buf + bytes;
354
355	while (de_buf < dlimit) {
356	/ this code is executed quadratically often /
357	/ do minimal checking `by hand' /
358
359	de = (struct ocfs2_dir_entry *) de_buf;
360
361	if (de_buf + namelen <= dlimit &&
362	ocfs2_match(len: namelen, name, de)) {
363	/ found a match - just to be sure, do a full check /
364	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
365	ret = -`1`;
366	goto bail;
367	}
368	*res_dir = de;
369	ret = `1`;
370	goto bail;
371	}
372
373	/ prevent looping on a bad block /
374	de_len = le16_to_cpu(de->rec_len);
375	if (de_len <= `0`) {
376	ret = -`1`;
377	goto bail;
378	}
379
380	de_buf += de_len;
381	offset += de_len;
382	}
383
384	bail:
385	trace_ocfs2_search_dirblock(num: ret);
386	return ret;
387	}
388
389	static struct buffer_head ocfs2_find_entry_id(const* char *name,
390	int namelen,
391	struct inode *dir,
392	struct ocfs2_dir_entry **res_dir)
393	{
394	int ret, found;
395	struct buffer_head *di_bh = NULL;
396	struct ocfs2_dinode *di;
397	struct ocfs2_inline_data *data;
398
399	ret = ocfs2_read_inode_block(inode: dir, bh: &di_bh);
400	if (ret) {
401	mlog_errno(ret);
402	goto out;
403	}
404
405	di = (struct ocfs2_dinode *)di_bh->b_data;
406	data = &di->id2.i_data;
407
408	found = ocfs2_search_dirblock(bh: di_bh, dir, name, namelen, offset: `0`,
409	first_de: data->id_data, bytes: i_size_read(inode: dir), res_dir);
410	if (found == `1`)
411	return di_bh;
412
413	brelse(bh: di_bh);
414	out:
415	return NULL;
416	}
417
418	static int ocfs2_validate_dir_block(struct super_block *sb,
419	struct buffer_head *bh)
420	{
421	int rc;
422	struct ocfs2_dir_block_trailer *trailer =
423	ocfs2_trailer_from_bh(bh, sb);
424
425
426	/*
427	* We don't validate dirents here, that's handled
428	* in-place when the code walks them.
429	*/
430	trace_ocfs2_validate_dir_block(num: (unsigned long long)bh->b_blocknr);
431
432	BUG_ON(!buffer_uptodate(bh));
433
434	/*
435	* If the ecc fails, we return the error but otherwise
436	* leave the filesystem running. We know any error is
437	* local to this block.
438	*
439	* Note that we are safe to call this even if the directory
440	* doesn't have a trailer. Filesystems without metaecc will do
441	* nothing, and filesystems with it will have one.
442	*/
443	rc = ocfs2_validate_meta_ecc(sb, data: bh->b_data, bc: &trailer->db_check);
444	if (rc)
445	mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
446	(unsigned long long)bh->b_blocknr);
447
448	return rc;
449	}
450
451	/*
452	* Validate a directory trailer.
453	*
454	* We check the trailer here rather than in ocfs2_validate_dir_block()
455	* because that function doesn't have the inode to test.
456	*/
457	static int ocfs2_check_dir_trailer(struct inode dir, struct* buffer_head *bh)
458	{
459	int rc = `0`;
460	struct ocfs2_dir_block_trailer *trailer;
461
462	trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
463	if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
464	rc = ocfs2_error(dir->i_sb,
465	"Invalid dirblock #%llu: signature = %.*s\n",
466	(unsigned long long)bh->b_blocknr, `7`,
467	trailer->db_signature);
468	goto out;
469	}
470	if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
471	rc = ocfs2_error(dir->i_sb,
472	"Directory block #%llu has an invalid db_blkno of %llu\n",
473	(unsigned long long)bh->b_blocknr,
474	(unsigned long long)le64_to_cpu(trailer->db_blkno));
475	goto out;
476	}
477	if (le64_to_cpu(trailer->db_parent_dinode) !=
478	OCFS2_I(inode: dir)->ip_blkno) {
479	rc = ocfs2_error(dir->i_sb,
480	"Directory block #%llu on dinode #%llu has an invalid parent_dinode of %llu\n",
481	(unsigned long long)bh->b_blocknr,
482	(unsigned long long)OCFS2_I(dir)->ip_blkno,
483	(unsigned long long)le64_to_cpu(trailer->db_blkno));
484	goto out;
485	}
486	out:
487	return rc;
488	}
489
490	/*
491	* This function forces all errors to -EIO for consistency with its
492	* predecessor, ocfs2_bread(). We haven't audited what returning the
493	* real error codes would do to callers. We log the real codes with
494	* mlog_errno() before we squash them.
495	*/
496	static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
497	struct buffer_head *bh, int* flags)
498	{
499	int rc = `0`;
500	struct buffer_head tmp = bh;
501
502	rc = ocfs2_read_virt_blocks(inode, v_block, nr: `1`, bhs: &tmp, flags,
503	validate: ocfs2_validate_dir_block);
504	if (rc) {
505	mlog_errno(rc);
506	goto out;
507	}
508
509	if (!(flags & OCFS2_BH_READAHEAD) &&
510	ocfs2_supports_dir_trailer(dir: inode)) {
511	rc = ocfs2_check_dir_trailer(dir: inode, bh: tmp);
512	if (rc) {
513	if (!*bh)
514	brelse(bh: tmp);
515	mlog_errno(rc);
516	goto out;
517	}
518	}
519
520	/ If ocfs2_read_virt_blocks() got us a new bh, pass it up. /
521	if (!*bh)
522	*bh = tmp;
523
524	out:
525	return rc ? -EIO : `0`;
526	}
527
528	/*
529	* Read the block at 'phys' which belongs to this directory
530	* inode. This function does no virtual->physical block translation -
531	* what's passed in is assumed to be a valid directory block.
532	*/
533	static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
534	struct buffer_head **bh)
535	{
536	int ret;
537	struct buffer_head tmp = bh;
538
539	ret = ocfs2_read_block(ci: INODE_CACHE(inode: dir), off: phys, bh: &tmp,
540	validate: ocfs2_validate_dir_block);
541	if (ret) {
542	mlog_errno(ret);
543	goto out;
544	}
545
546	if (ocfs2_supports_dir_trailer(dir)) {
547	ret = ocfs2_check_dir_trailer(dir, bh: tmp);
548	if (ret) {
549	if (!*bh)
550	brelse(bh: tmp);
551	mlog_errno(ret);
552	goto out;
553	}
554	}
555
556	if (!ret && !*bh)
557	*bh = tmp;
558	out:
559	return ret;
560	}
561
562	static int ocfs2_validate_dx_root(struct super_block *sb,
563	struct buffer_head *bh)
564	{
565	int ret;
566	struct ocfs2_dx_root_block *dx_root;
567
568	BUG_ON(!buffer_uptodate(bh));
569
570	dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
571
572	ret = ocfs2_validate_meta_ecc(sb, data: bh->b_data, bc: &dx_root->dr_check);
573	if (ret) {
574	mlog(ML_ERROR,
575	"Checksum failed for dir index root block %llu\n",
576	(unsigned long long)bh->b_blocknr);
577	return ret;
578	}
579
580	if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
581	ret = ocfs2_error(sb,
582	"Dir Index Root # %llu has bad signature %.*s\n",
583	(unsigned long long)le64_to_cpu(dx_root->dr_blkno),
584	`7`, dx_root->dr_signature);
585	}
586
587	return ret;
588	}
589
590	static int ocfs2_read_dx_root(struct inode dir, struct* ocfs2_dinode *di,
591	struct buffer_head **dx_root_bh)
592	{
593	int ret;
594	u64 blkno = le64_to_cpu(di->i_dx_root);
595	struct buffer_head tmp = dx_root_bh;
596
597	ret = ocfs2_read_block(ci: INODE_CACHE(inode: dir), off: blkno, bh: &tmp,
598	validate: ocfs2_validate_dx_root);
599
600	/ If ocfs2_read_block() got us a new bh, pass it up. /
601	if (!ret && !*dx_root_bh)
602	*dx_root_bh = tmp;
603
604	return ret;
605	}
606
607	static int ocfs2_validate_dx_leaf(struct super_block *sb,
608	struct buffer_head *bh)
609	{
610	int ret;
611	struct ocfs2_dx_leaf dx_leaf = (struct* ocfs2_dx_leaf *)bh->b_data;
612
613	BUG_ON(!buffer_uptodate(bh));
614
615	ret = ocfs2_validate_meta_ecc(sb, data: bh->b_data, bc: &dx_leaf->dl_check);
616	if (ret) {
617	mlog(ML_ERROR,
618	"Checksum failed for dir index leaf block %llu\n",
619	(unsigned long long)bh->b_blocknr);
620	return ret;
621	}
622
623	if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
624	ret = ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s\n",
625	`7`, dx_leaf->dl_signature);
626	}
627
628	return ret;
629	}
630
631	static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
632	struct buffer_head **dx_leaf_bh)
633	{
634	int ret;
635	struct buffer_head tmp = dx_leaf_bh;
636
637	ret = ocfs2_read_block(ci: INODE_CACHE(inode: dir), off: blkno, bh: &tmp,
638	validate: ocfs2_validate_dx_leaf);
639
640	/ If ocfs2_read_block() got us a new bh, pass it up. /
641	if (!ret && !*dx_leaf_bh)
642	*dx_leaf_bh = tmp;
643
644	return ret;
645	}
646
647	/*
648	* Read a series of dx_leaf blocks. This expects all buffer_head
649	* pointers to be NULL on function entry.
650	*/
651	static int ocfs2_read_dx_leaves(struct inode dir, u64 start, int* num,
652	struct buffer_head **dx_leaf_bhs)
653	{
654	int ret;
655
656	ret = ocfs2_read_blocks(ci: INODE_CACHE(inode: dir), block: start, nr: num, bhs: dx_leaf_bhs, flags: `0`,
657	validate: ocfs2_validate_dx_leaf);
658	if (ret)
659	mlog_errno(ret);
660
661	return ret;
662	}
663
664	static struct buffer_head ocfs2_find_entry_el(const* char name, int* namelen,
665	struct inode *dir,
666	struct ocfs2_dir_entry **res_dir)
667	{
668	struct super_block *sb;
669	struct buffer_head *bh_use[NAMEI_RA_SIZE];
670	struct buffer_head bh, ret = NULL;
671	unsigned long start, block, b;
672	int ra_max = `0`; / Number of bh's in the readahead*
673	buffer, bh_use[] /*
674	int ra_ptr = `0`; / Current index into readahead*
675	buffer /*
676	int num = `0`;
677	int nblocks, i;
678
679	sb = dir->i_sb;
680
681	nblocks = i_size_read(inode: dir) >> sb->s_blocksize_bits;
682	start = OCFS2_I(inode: dir)->ip_dir_start_lookup;
683	if (start >= nblocks)
684	start = `0`;
685	block = start;
686
687	restart:
688	do {
689	/*
690	* We deal with the read-ahead logic here.
691	*/
692	if (ra_ptr >= ra_max) {
693	/ Refill the readahead buffer /
694	ra_ptr = `0`;
695	b = block;
696	for (ra_max = `0`; ra_max < NAMEI_RA_SIZE; ra_max++) {
697	/*
698	* Terminate if we reach the end of the
699	* directory and must wrap, or if our
700	* search has finished at this block.
701	*/
702	if (b >= nblocks \|\| (num && block == start)) {
703	bh_use[ra_max] = NULL;
704	break;
705	}
706	num++;
707
708	bh = NULL;
709	ocfs2_read_dir_block(inode: dir, v_block: b++, bh: &bh,
710	OCFS2_BH_READAHEAD);
711	bh_use[ra_max] = bh;
712	}
713	}
714	if ((bh = bh_use[ra_ptr++]) == NULL)
715	goto next;
716	if (ocfs2_read_dir_block(inode: dir, v_block: block, bh: &bh, flags: `0`)) {
717	/ read error, skip block & hope for the best.*
718	* ocfs2_read_dir_block() has released the bh. */
719	mlog(ML_ERROR, "reading directory %llu, "
720	"offset %lu\n",
721	(unsigned long long)OCFS2_I(dir)->ip_blkno,
722	block);
723	goto next;
724	}
725	i = ocfs2_search_dirblock(bh, dir, name, namelen,
726	offset: block << sb->s_blocksize_bits,
727	first_de: bh->b_data, bytes: sb->s_blocksize,
728	res_dir);
729	if (i == `1`) {
730	OCFS2_I(inode: dir)->ip_dir_start_lookup = block;
731	ret = bh;
732	goto cleanup_and_exit;
733	} else {
734	brelse(bh);
735	if (i < `0`)
736	goto cleanup_and_exit;
737	}
738	next:
739	if (++block >= nblocks)
740	block = `0`;
741	} while (block != start);
742
743	/*
744	* If the directory has grown while we were searching, then
745	* search the last part of the directory before giving up.
746	*/
747	block = nblocks;
748	nblocks = i_size_read(inode: dir) >> sb->s_blocksize_bits;
749	if (block < nblocks) {
750	start = `0`;
751	goto restart;
752	}
753
754	cleanup_and_exit:
755	/ Clean up the read-ahead blocks /
756	for (; ra_ptr < ra_max; ra_ptr++)
757	brelse(bh: bh_use[ra_ptr]);
758
759	trace_ocfs2_find_entry_el(pointer: ret);
760	return ret;
761	}
762
763	static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
764	struct ocfs2_extent_list *el,
765	u32 major_hash,
766	u32 *ret_cpos,
767	u64 *ret_phys_blkno,
768	unsigned int *ret_clen)
769	{
770	int ret = `0`, i, found;
771	struct buffer_head *eb_bh = NULL;
772	struct ocfs2_extent_block *eb;
773	struct ocfs2_extent_rec *rec = NULL;
774
775	if (el->l_tree_depth) {
776	ret = ocfs2_find_leaf(ci: INODE_CACHE(inode), root_el: el, cpos: major_hash,
777	leaf_bh: &eb_bh);
778	if (ret) {
779	mlog_errno(ret);
780	goto out;
781	}
782
783	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
784	el = &eb->h_list;
785
786	if (el->l_tree_depth) {
787	ret = ocfs2_error(inode->i_sb,
788	"Inode %lu has non zero tree depth in btree tree block %llu\n",
789	inode->i_ino,
790	(unsigned long long)eb_bh->b_blocknr);
791	goto out;
792	}
793	}
794
795	found = `0`;
796	for (i = le16_to_cpu(el->l_next_free_rec) - `1`; i >= `0`; i--) {
797	rec = &el->l_recs[i];
798
799	if (le32_to_cpu(rec->e_cpos) <= major_hash) {
800	found = `1`;
801	break;
802	}
803	}
804
805	if (!found) {
806	ret = ocfs2_error(inode->i_sb,
807	"Inode %lu has bad extent record (%u, %u, 0) in btree\n",
808	inode->i_ino,
809	le32_to_cpu(rec->e_cpos),
810	ocfs2_rec_clusters(el, rec));
811	goto out;
812	}
813
814	if (ret_phys_blkno)
815	*ret_phys_blkno = le64_to_cpu(rec->e_blkno);
816	if (ret_cpos)
817	*ret_cpos = le32_to_cpu(rec->e_cpos);
818	if (ret_clen)
819	*ret_clen = le16_to_cpu(rec->e_leaf_clusters);
820
821	out:
822	brelse(bh: eb_bh);
823	return ret;
824	}
825
826	/*
827	* Returns the block index, from the start of the cluster which this
828	* hash belongs too.
829	*/
830	static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
831	u32 minor_hash)
832	{
833	return minor_hash & osb->osb_dx_mask;
834	}
835
836	static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
837	struct ocfs2_dx_hinfo *hinfo)
838	{
839	return __ocfs2_dx_dir_hash_idx(osb, minor_hash: hinfo->minor_hash);
840	}
841
842	static int ocfs2_dx_dir_lookup(struct inode *inode,
843	struct ocfs2_extent_list *el,
844	struct ocfs2_dx_hinfo *hinfo,
845	u32 *ret_cpos,
846	u64 *ret_phys_blkno)
847	{
848	int ret = `0`;
849	unsigned int cend, clen;
850	u32 cpos;
851	u64 blkno;
852	u32 name_hash = hinfo->major_hash;
853
854	ret = ocfs2_dx_dir_lookup_rec(inode, el, major_hash: name_hash, ret_cpos: &cpos, ret_phys_blkno: &blkno,
855	ret_clen: &clen);
856	if (ret) {
857	mlog_errno(ret);
858	goto out;
859	}
860
861	cend = cpos + clen;
862	if (name_hash >= cend) {
863	/ We want the last cluster /
864	blkno += ocfs2_clusters_to_blocks(sb: inode->i_sb, clusters: clen - `1`);
865	cpos += clen - `1`;
866	} else {
867	blkno += ocfs2_clusters_to_blocks(sb: inode->i_sb,
868	clusters: name_hash - cpos);
869	cpos = name_hash;
870	}
871
872	/*
873	* We now have the cluster which should hold our entry. To
874	* find the exact block from the start of the cluster to
875	* search, we take the lower bits of the hash.
876	*/
877	blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
878
879	if (ret_phys_blkno)
880	*ret_phys_blkno = blkno;
881	if (ret_cpos)
882	*ret_cpos = cpos;
883
884	out:
885
886	return ret;
887	}
888
889	static int ocfs2_dx_dir_search(const char name, int* namelen,
890	struct inode *dir,
891	struct ocfs2_dx_root_block *dx_root,
892	struct ocfs2_dir_lookup_result *res)
893	{
894	int ret, i, found;
895	u64 phys;
896	struct buffer_head *dx_leaf_bh = NULL;
897	struct ocfs2_dx_leaf *dx_leaf;
898	struct ocfs2_dx_entry *dx_entry = NULL;
899	struct buffer_head *dir_ent_bh = NULL;
900	struct ocfs2_dir_entry *dir_ent = NULL;
901	struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
902	struct ocfs2_extent_list *dr_el;
903	struct ocfs2_dx_entry_list *entry_list;
904
905	ocfs2_dx_dir_name_hash(dir, name, len: namelen, hinfo: &res->dl_hinfo);
906
907	if (ocfs2_dx_root_inline(dx_root)) {
908	entry_list = &dx_root->dr_entries;
909	goto search;
910	}
911
912	dr_el = &dx_root->dr_list;
913
914	ret = ocfs2_dx_dir_lookup(inode: dir, el: dr_el, hinfo, NULL, ret_phys_blkno: &phys);
915	if (ret) {
916	mlog_errno(ret);
917	goto out;
918	}
919
920	trace_ocfs2_dx_dir_search(ino: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
921	namelen, name, major_hash: hinfo->major_hash,
922	minor_hash: hinfo->minor_hash, blkno: (unsigned long long)phys);
923
924	ret = ocfs2_read_dx_leaf(dir, blkno: phys, dx_leaf_bh: &dx_leaf_bh);
925	if (ret) {
926	mlog_errno(ret);
927	goto out;
928	}
929
930	dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
931
932	trace_ocfs2_dx_dir_search_leaf_info(
933	le16_to_cpu(dx_leaf->dl_list.de_num_used),
934	le16_to_cpu(dx_leaf->dl_list.de_count));
935
936	entry_list = &dx_leaf->dl_list;
937
938	search:
939	/*
940	* Empty leaf is legal, so no need to check for that.
941	*/
942	found = `0`;
943	for (i = `0`; i < le16_to_cpu(entry_list->de_num_used); i++) {
944	dx_entry = &entry_list->de_entries[i];
945
946	if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
947	\|\| hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
948	continue;
949
950	/*
951	* Search unindexed leaf block now. We're not
952	* guaranteed to find anything.
953	*/
954	ret = ocfs2_read_dir_block_direct(dir,
955	le64_to_cpu(dx_entry->dx_dirent_blk),
956	bh: &dir_ent_bh);
957	if (ret) {
958	mlog_errno(ret);
959	goto out;
960	}
961
962	/*
963	* XXX: We should check the unindexed block here,
964	* before using it.
965	*/
966
967	found = ocfs2_search_dirblock(bh: dir_ent_bh, dir, name, namelen,
968	offset: `0`, first_de: dir_ent_bh->b_data,
969	bytes: dir->i_sb->s_blocksize, res_dir: &dir_ent);
970	if (found == `1`)
971	break;
972
973	if (found == -`1`) {
974	/ This means we found a bad directory entry. /
975	ret = -EIO;
976	mlog_errno(ret);
977	goto out;
978	}
979
980	brelse(bh: dir_ent_bh);
981	dir_ent_bh = NULL;
982	}
983
984	if (found <= `0`) {
985	ret = -ENOENT;
986	goto out;
987	}
988
989	res->dl_leaf_bh = dir_ent_bh;
990	res->dl_entry = dir_ent;
991	res->dl_dx_leaf_bh = dx_leaf_bh;
992	res->dl_dx_entry = dx_entry;
993
994	ret = `0`;
995	out:
996	if (ret) {
997	brelse(bh: dx_leaf_bh);
998	brelse(bh: dir_ent_bh);
999	}
1000	return ret;
1001	}
1002
1003	static int ocfs2_find_entry_dx(const char name, int* namelen,
1004	struct inode *dir,
1005	struct ocfs2_dir_lookup_result *lookup)
1006	{
1007	int ret;
1008	struct buffer_head *di_bh = NULL;
1009	struct ocfs2_dinode *di;
1010	struct buffer_head *dx_root_bh = NULL;
1011	struct ocfs2_dx_root_block *dx_root;
1012
1013	ret = ocfs2_read_inode_block(inode: dir, bh: &di_bh);
1014	if (ret) {
1015	mlog_errno(ret);
1016	goto out;
1017	}
1018
1019	di = (struct ocfs2_dinode *)di_bh->b_data;
1020
1021	ret = ocfs2_read_dx_root(dir, di, dx_root_bh: &dx_root_bh);
1022	if (ret) {
1023	mlog_errno(ret);
1024	goto out;
1025	}
1026	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
1027
1028	ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, res: lookup);
1029	if (ret) {
1030	if (ret != -ENOENT)
1031	mlog_errno(ret);
1032	goto out;
1033	}
1034
1035	lookup->dl_dx_root_bh = dx_root_bh;
1036	dx_root_bh = NULL;
1037	out:
1038	brelse(bh: di_bh);
1039	brelse(bh: dx_root_bh);
1040	return ret;
1041	}
1042
1043	/*
1044	* Try to find an entry of the provided name within 'dir'.
1045	*
1046	* If nothing was found, -ENOENT is returned. Otherwise, zero is
1047	* returned and the struct 'res' will contain information useful to
1048	* other directory manipulation functions.
1049	*
1050	* Caller can NOT assume anything about the contents of the
1051	* buffer_heads - they are passed back only so that it can be passed
1052	* into any one of the manipulation functions (add entry, delete
1053	* entry, etc). As an example, bh in the extent directory case is a
1054	* data block, in the inline-data case it actually points to an inode,
1055	* in the indexed directory case, multiple buffers are involved.
1056	*/
1057	int ocfs2_find_entry(const char name, int* namelen,
1058	struct inode dir, struct* ocfs2_dir_lookup_result *lookup)
1059	{
1060	struct buffer_head *bh;
1061	struct ocfs2_dir_entry *res_dir = NULL;
1062
1063	if (ocfs2_dir_indexed(inode: dir))
1064	return ocfs2_find_entry_dx(name, namelen, dir, lookup);
1065
1066	/*
1067	* The unindexed dir code only uses part of the lookup
1068	* structure, so there's no reason to push it down further
1069	* than this.
1070	*/
1071	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1072	bh = ocfs2_find_entry_id(name, namelen, dir, res_dir: &res_dir);
1073	else
1074	bh = ocfs2_find_entry_el(name, namelen, dir, res_dir: &res_dir);
1075
1076	if (bh == NULL)
1077	return -ENOENT;
1078
1079	lookup->dl_leaf_bh = bh;
1080	lookup->dl_entry = res_dir;
1081	return `0`;
1082	}
1083
1084	/*
1085	* Update inode number and type of a previously found directory entry.
1086	*/
1087	int ocfs2_update_entry(struct inode dir, handle_t handle,
1088	struct ocfs2_dir_lookup_result *res,
1089	struct inode *new_entry_inode)
1090	{
1091	int ret;
1092	ocfs2_journal_access_func access = ocfs2_journal_access_db;
1093	struct ocfs2_dir_entry *de = res->dl_entry;
1094	struct buffer_head *de_bh = res->dl_leaf_bh;
1095
1096	/*
1097	* The same code works fine for both inline-data and extent
1098	* based directories, so no need to split this up. The only
1099	* difference is the journal_access function.
1100	*/
1101
1102	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1103	access = ocfs2_journal_access_di;
1104
1105	ret = access(handle, INODE_CACHE(inode: dir), de_bh,
1106	OCFS2_JOURNAL_ACCESS_WRITE);
1107	if (ret) {
1108	mlog_errno(ret);
1109	goto out;
1110	}
1111
1112	de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
1113	ocfs2_set_de_type(de, mode: new_entry_inode->i_mode);
1114
1115	ocfs2_journal_dirty(handle, bh: de_bh);
1116
1117	out:
1118	return ret;
1119	}
1120
1121	/*
1122	* __ocfs2_delete_entry deletes a directory entry by merging it with the
1123	* previous entry
1124	*/
1125	static int __ocfs2_delete_entry(handle_t handle, struct* inode *dir,
1126	struct ocfs2_dir_entry *de_del,
1127	struct buffer_head bh, char* *first_de,
1128	unsigned int bytes)
1129	{
1130	struct ocfs2_dir_entry de, pde;
1131	int i, status = -ENOENT;
1132	ocfs2_journal_access_func access = ocfs2_journal_access_db;
1133
1134	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1135	access = ocfs2_journal_access_di;
1136
1137	i = `0`;
1138	pde = NULL;
1139	de = (struct ocfs2_dir_entry *) first_de;
1140	while (i < bytes) {
1141	if (!ocfs2_check_dir_entry(dir, de, bh, offset: i)) {
1142	status = -EIO;
1143	mlog_errno(status);
1144	goto bail;
1145	}
1146	if (de == de_del) {
1147	status = access(handle, INODE_CACHE(inode: dir), bh,
1148	OCFS2_JOURNAL_ACCESS_WRITE);
1149	if (status < `0`) {
1150	status = -EIO;
1151	mlog_errno(status);
1152	goto bail;
1153	}
1154	if (pde)
1155	le16_add_cpu(var: &pde->rec_len,
1156	le16_to_cpu(de->rec_len));
1157	de->inode = `0`;
1158	inode_inc_iversion(inode: dir);
1159	ocfs2_journal_dirty(handle, bh);
1160	goto bail;
1161	}
1162	i += le16_to_cpu(de->rec_len);
1163	pde = de;
1164	de = (struct ocfs2_dir_entry )((char* *)de + le16_to_cpu(de->rec_len));
1165	}
1166	bail:
1167	return status;
1168	}
1169
1170	static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
1171	{
1172	unsigned int hole;
1173
1174	if (le64_to_cpu(de->inode) == `0`)
1175	hole = le16_to_cpu(de->rec_len);
1176	else
1177	hole = le16_to_cpu(de->rec_len) -
1178	OCFS2_DIR_REC_LEN(de->name_len);
1179
1180	return hole;
1181	}
1182
1183	static int ocfs2_find_max_rec_len(struct super_block *sb,
1184	struct buffer_head *dirblock_bh)
1185	{
1186	int size, this_hole, largest_hole = `0`;
1187	char trailer, de_buf, limit, start = dirblock_bh->b_data;
1188	struct ocfs2_dir_entry *de;
1189
1190	trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb);
1191	size = ocfs2_dir_trailer_blk_off(sb);
1192	limit = start + size;
1193	de_buf = start;
1194	de = (struct ocfs2_dir_entry *)de_buf;
1195	do {
1196	if (de_buf != trailer) {
1197	this_hole = ocfs2_figure_dirent_hole(de);
1198	if (this_hole > largest_hole)
1199	largest_hole = this_hole;
1200	}
1201
1202	de_buf += le16_to_cpu(de->rec_len);
1203	de = (struct ocfs2_dir_entry *)de_buf;
1204	} while (de_buf < limit);
1205
1206	if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
1207	return largest_hole;
1208	return `0`;
1209	}
1210
1211	static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
1212	int index)
1213	{
1214	int num_used = le16_to_cpu(entry_list->de_num_used);
1215
1216	if (num_used == `1` \|\| index == (num_used - `1`))
1217	goto clear;
1218
1219	memmove(&entry_list->de_entries[index],
1220	&entry_list->de_entries[index + `1`],
1221	(num_used - index - `1`)*sizeof(struct ocfs2_dx_entry));
1222	clear:
1223	num_used--;
1224	memset(&entry_list->de_entries[num_used], `0`,
1225	sizeof(struct ocfs2_dx_entry));
1226	entry_list->de_num_used = cpu_to_le16(num_used);
1227	}
1228
1229	static int ocfs2_delete_entry_dx(handle_t handle, struct* inode *dir,
1230	struct ocfs2_dir_lookup_result *lookup)
1231	{
1232	int ret, index, max_rec_len, add_to_free_list = `0`;
1233	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
1234	struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
1235	struct ocfs2_dx_leaf *dx_leaf;
1236	struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
1237	struct ocfs2_dir_block_trailer *trailer;
1238	struct ocfs2_dx_root_block *dx_root;
1239	struct ocfs2_dx_entry_list *entry_list;
1240
1241	/*
1242	* This function gets a bit messy because we might have to
1243	* modify the root block, regardless of whether the indexed
1244	* entries are stored inline.
1245	*/
1246
1247	/*
1248	* Only set 'entry_list' here, based on where we're looking
1249	* for the indexed entries. Later, we might still want to
1250	* journal both blocks, based on free list state.
1251	*/
1252	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
1253	if (ocfs2_dx_root_inline(dx_root)) {
1254	entry_list = &dx_root->dr_entries;
1255	} else {
1256	dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
1257	entry_list = &dx_leaf->dl_list;
1258	}
1259
1260	/ Neither of these are a disk corruption - that should have*
1261	* been caught by lookup, before we got here. */
1262	BUG_ON(le16_to_cpu(entry_list->de_count) <= `0`);
1263	BUG_ON(le16_to_cpu(entry_list->de_num_used) <= `0`);
1264
1265	index = (char )dx_entry - (char* *)entry_list->de_entries;
1266	index /= sizeof(*dx_entry);
1267
1268	if (index >= le16_to_cpu(entry_list->de_num_used)) {
1269	mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
1270	(unsigned long long)OCFS2_I(dir)->ip_blkno, index,
1271	entry_list, dx_entry);
1272	return -EIO;
1273	}
1274
1275	/*
1276	* We know that removal of this dirent will leave enough room
1277	* for a new one, so add this block to the free list if it
1278	* isn't already there.
1279	*/
1280	trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
1281	if (trailer->db_free_rec_len == `0`)
1282	add_to_free_list = `1`;
1283
1284	/*
1285	* Add the block holding our index into the journal before
1286	* removing the unindexed entry. If we get an error return
1287	* from __ocfs2_delete_entry(), then it hasn't removed the
1288	* entry yet. Likewise, successful return means we must
1289	* remove the indexed entry.
1290	*
1291	* We're also careful to journal the root tree block here as
1292	* the entry count needs to be updated. Also, we might be
1293	* adding to the start of the free list.
1294	*/
1295	ret = ocfs2_journal_access_dr(handle, ci: INODE_CACHE(inode: dir), bh: dx_root_bh,
1296	OCFS2_JOURNAL_ACCESS_WRITE);
1297	if (ret) {
1298	mlog_errno(ret);
1299	goto out;
1300	}
1301
1302	if (!ocfs2_dx_root_inline(dx_root)) {
1303	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir),
1304	bh: lookup->dl_dx_leaf_bh,
1305	OCFS2_JOURNAL_ACCESS_WRITE);
1306	if (ret) {
1307	mlog_errno(ret);
1308	goto out;
1309	}
1310	}
1311
1312	trace_ocfs2_delete_entry_dx(val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
1313	val2: index);
1314
1315	ret = __ocfs2_delete_entry(handle, dir, de_del: lookup->dl_entry,
1316	bh: leaf_bh, first_de: leaf_bh->b_data, bytes: leaf_bh->b_size);
1317	if (ret) {
1318	mlog_errno(ret);
1319	goto out;
1320	}
1321
1322	max_rec_len = ocfs2_find_max_rec_len(sb: dir->i_sb, dirblock_bh: leaf_bh);
1323	trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1324	if (add_to_free_list) {
1325	trailer->db_free_next = dx_root->dr_free_blk;
1326	dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr);
1327	ocfs2_journal_dirty(handle, bh: dx_root_bh);
1328	}
1329
1330	/ leaf_bh was journal_accessed for us in __ocfs2_delete_entry /
1331	ocfs2_journal_dirty(handle, bh: leaf_bh);
1332
1333	le32_add_cpu(var: &dx_root->dr_num_entries, val: -`1`);
1334	ocfs2_journal_dirty(handle, bh: dx_root_bh);
1335
1336	ocfs2_dx_list_remove_entry(entry_list, index);
1337
1338	if (!ocfs2_dx_root_inline(dx_root))
1339	ocfs2_journal_dirty(handle, bh: lookup->dl_dx_leaf_bh);
1340
1341	out:
1342	return ret;
1343	}
1344
1345	static inline int ocfs2_delete_entry_id(handle_t *handle,
1346	struct inode *dir,
1347	struct ocfs2_dir_entry *de_del,
1348	struct buffer_head *bh)
1349	{
1350	int ret;
1351	struct buffer_head *di_bh = NULL;
1352	struct ocfs2_dinode *di;
1353	struct ocfs2_inline_data *data;
1354
1355	ret = ocfs2_read_inode_block(inode: dir, bh: &di_bh);
1356	if (ret) {
1357	mlog_errno(ret);
1358	goto out;
1359	}
1360
1361	di = (struct ocfs2_dinode *)di_bh->b_data;
1362	data = &di->id2.i_data;
1363
1364	ret = __ocfs2_delete_entry(handle, dir, de_del, bh, first_de: data->id_data,
1365	bytes: i_size_read(inode: dir));
1366
1367	brelse(bh: di_bh);
1368	out:
1369	return ret;
1370	}
1371
1372	static inline int ocfs2_delete_entry_el(handle_t *handle,
1373	struct inode *dir,
1374	struct ocfs2_dir_entry *de_del,
1375	struct buffer_head *bh)
1376	{
1377	return __ocfs2_delete_entry(handle, dir, de_del, bh, first_de: bh->b_data,
1378	bytes: bh->b_size);
1379	}
1380
1381	/*
1382	* Delete a directory entry. Hide the details of directory
1383	* implementation from the caller.
1384	*/
1385	int ocfs2_delete_entry(handle_t *handle,
1386	struct inode *dir,
1387	struct ocfs2_dir_lookup_result *res)
1388	{
1389	if (ocfs2_dir_indexed(inode: dir))
1390	return ocfs2_delete_entry_dx(handle, dir, lookup: res);
1391
1392	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1393	return ocfs2_delete_entry_id(handle, dir, de_del: res->dl_entry,
1394	bh: res->dl_leaf_bh);
1395
1396	return ocfs2_delete_entry_el(handle, dir, de_del: res->dl_entry,
1397	bh: res->dl_leaf_bh);
1398	}
1399
1400	/*
1401	* Check whether 'de' has enough room to hold an entry of
1402	* 'new_rec_len' bytes.
1403	*/
1404	static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
1405	unsigned int new_rec_len)
1406	{
1407	unsigned int de_really_used;
1408
1409	/ Check whether this is an empty record with enough space /
1410	if (le64_to_cpu(de->inode) == `0` &&
1411	le16_to_cpu(de->rec_len) >= new_rec_len)
1412	return `1`;
1413
1414	/*
1415	* Record might have free space at the end which we can
1416	* use.
1417	*/
1418	de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
1419	if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
1420	return `1`;
1421
1422	return `0`;
1423	}
1424
1425	static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
1426	struct ocfs2_dx_entry *dx_new_entry)
1427	{
1428	int i;
1429
1430	i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
1431	dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
1432
1433	le16_add_cpu(var: &dx_leaf->dl_list.de_num_used, val: `1`);
1434	}
1435
1436	static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
1437	struct ocfs2_dx_hinfo *hinfo,
1438	u64 dirent_blk)
1439	{
1440	int i;
1441	struct ocfs2_dx_entry *dx_entry;
1442
1443	i = le16_to_cpu(entry_list->de_num_used);
1444	dx_entry = &entry_list->de_entries[i];
1445
1446	memset(dx_entry, `0`, sizeof(*dx_entry));
1447	dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
1448	dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
1449	dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
1450
1451	le16_add_cpu(var: &entry_list->de_num_used, val: `1`);
1452	}
1453
1454	static int __ocfs2_dx_dir_leaf_insert(struct inode dir, handle_t handle,
1455	struct ocfs2_dx_hinfo *hinfo,
1456	u64 dirent_blk,
1457	struct buffer_head *dx_leaf_bh)
1458	{
1459	int ret;
1460	struct ocfs2_dx_leaf *dx_leaf;
1461
1462	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir), bh: dx_leaf_bh,
1463	OCFS2_JOURNAL_ACCESS_WRITE);
1464	if (ret) {
1465	mlog_errno(ret);
1466	goto out;
1467	}
1468
1469	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
1470	ocfs2_dx_entry_list_insert(entry_list: &dx_leaf->dl_list, hinfo, dirent_blk);
1471	ocfs2_journal_dirty(handle, bh: dx_leaf_bh);
1472
1473	out:
1474	return ret;
1475	}
1476
1477	static void ocfs2_dx_inline_root_insert(struct inode dir, handle_t handle,
1478	struct ocfs2_dx_hinfo *hinfo,
1479	u64 dirent_blk,
1480	struct ocfs2_dx_root_block *dx_root)
1481	{
1482	ocfs2_dx_entry_list_insert(entry_list: &dx_root->dr_entries, hinfo, dirent_blk);
1483	}
1484
1485	static int ocfs2_dx_dir_insert(struct inode dir, handle_t handle,
1486	struct ocfs2_dir_lookup_result *lookup)
1487	{
1488	int ret = `0`;
1489	struct ocfs2_dx_root_block *dx_root;
1490	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
1491
1492	ret = ocfs2_journal_access_dr(handle, ci: INODE_CACHE(inode: dir), bh: dx_root_bh,
1493	OCFS2_JOURNAL_ACCESS_WRITE);
1494	if (ret) {
1495	mlog_errno(ret);
1496	goto out;
1497	}
1498
1499	dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
1500	if (ocfs2_dx_root_inline(dx_root)) {
1501	ocfs2_dx_inline_root_insert(dir, handle,
1502	hinfo: &lookup->dl_hinfo,
1503	dirent_blk: lookup->dl_leaf_bh->b_blocknr,
1504	dx_root);
1505	} else {
1506	ret = __ocfs2_dx_dir_leaf_insert(dir, handle, hinfo: &lookup->dl_hinfo,
1507	dirent_blk: lookup->dl_leaf_bh->b_blocknr,
1508	dx_leaf_bh: lookup->dl_dx_leaf_bh);
1509	if (ret)
1510	goto out;
1511	}
1512
1513	le32_add_cpu(var: &dx_root->dr_num_entries, val: `1`);
1514	ocfs2_journal_dirty(handle, bh: dx_root_bh);
1515
1516	out:
1517	return ret;
1518	}
1519
1520	static void ocfs2_remove_block_from_free_list(struct inode *dir,
1521	handle_t *handle,
1522	struct ocfs2_dir_lookup_result *lookup)
1523	{
1524	struct ocfs2_dir_block_trailer trailer, prev;
1525	struct ocfs2_dx_root_block *dx_root;
1526	struct buffer_head *bh;
1527
1528	trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1529
1530	if (ocfs2_free_list_at_root(res: lookup)) {
1531	bh = lookup->dl_dx_root_bh;
1532	dx_root = (struct ocfs2_dx_root_block *)bh->b_data;
1533	dx_root->dr_free_blk = trailer->db_free_next;
1534	} else {
1535	bh = lookup->dl_prev_leaf_bh;
1536	prev = ocfs2_trailer_from_bh(bh, dir->i_sb);
1537	prev->db_free_next = trailer->db_free_next;
1538	}
1539
1540	trailer->db_free_rec_len = cpu_to_le16(`0`);
1541	trailer->db_free_next = cpu_to_le64(`0`);
1542
1543	ocfs2_journal_dirty(handle, bh);
1544	ocfs2_journal_dirty(handle, bh: lookup->dl_leaf_bh);
1545	}
1546
1547	/*
1548	* This expects that a journal write has been reserved on
1549	* lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh
1550	*/
1551	static void ocfs2_recalc_free_list(struct inode dir, handle_t handle,
1552	struct ocfs2_dir_lookup_result *lookup)
1553	{
1554	int max_rec_len;
1555	struct ocfs2_dir_block_trailer *trailer;
1556
1557	/ Walk dl_leaf_bh to figure out what the new free rec_len is. /
1558	max_rec_len = ocfs2_find_max_rec_len(sb: dir->i_sb, dirblock_bh: lookup->dl_leaf_bh);
1559	if (max_rec_len) {
1560	/*
1561	* There's still room in this block, so no need to remove it
1562	* from the free list. In this case, we just want to update
1563	* the rec len accounting.
1564	*/
1565	trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1566	trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1567	ocfs2_journal_dirty(handle, bh: lookup->dl_leaf_bh);
1568	} else {
1569	ocfs2_remove_block_from_free_list(dir, handle, lookup);
1570	}
1571	}
1572
1573	/ we don't always have a dentry for what we want to add, so people*
1574	* like orphan dir can call this instead.
1575	*
1576	* The lookup context must have been filled from
1577	* ocfs2_prepare_dir_for_insert.
1578	*/
1579	int __ocfs2_add_entry(handle_t *handle,
1580	struct inode *dir,
1581	const char name, int* namelen,
1582	struct inode *inode, u64 blkno,
1583	struct buffer_head *parent_fe_bh,
1584	struct ocfs2_dir_lookup_result *lookup)
1585	{
1586	unsigned long offset;
1587	unsigned short rec_len;
1588	struct ocfs2_dir_entry de, de1;
1589	struct ocfs2_dinode di = (struct* ocfs2_dinode *)parent_fe_bh->b_data;
1590	struct super_block *sb = dir->i_sb;
1591	int retval;
1592	unsigned int size = sb->s_blocksize;
1593	struct buffer_head *insert_bh = lookup->dl_leaf_bh;
1594	char *data_start = insert_bh->b_data;
1595
1596	if (ocfs2_dir_indexed(inode: dir)) {
1597	struct buffer_head *bh;
1598
1599	/*
1600	* An indexed dir may require that we update the free space
1601	* list. Reserve a write to the previous node in the list so
1602	* that we don't fail later.
1603	*
1604	* XXX: This can be either a dx_root_block, or an unindexed
1605	* directory tree leaf block.
1606	*/
1607	if (ocfs2_free_list_at_root(res: lookup)) {
1608	bh = lookup->dl_dx_root_bh;
1609	retval = ocfs2_journal_access_dr(handle,
1610	ci: INODE_CACHE(inode: dir), bh,
1611	OCFS2_JOURNAL_ACCESS_WRITE);
1612	} else {
1613	bh = lookup->dl_prev_leaf_bh;
1614	retval = ocfs2_journal_access_db(handle,
1615	ci: INODE_CACHE(inode: dir), bh,
1616	OCFS2_JOURNAL_ACCESS_WRITE);
1617	}
1618	if (retval) {
1619	mlog_errno(retval);
1620	return retval;
1621	}
1622	} else if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1623	data_start = di->id2.i_data.id_data;
1624	size = i_size_read(inode: dir);
1625
1626	BUG_ON(insert_bh != parent_fe_bh);
1627	}
1628
1629	rec_len = OCFS2_DIR_REC_LEN(namelen);
1630	offset = `0`;
1631	de = (struct ocfs2_dir_entry *) data_start;
1632	while (`1`) {
1633	BUG_ON((char *)de >= (size + data_start));
1634
1635	/ These checks should've already been passed by the*
1636	* prepare function, but I guess we can leave them
1637	* here anyway. */
1638	if (!ocfs2_check_dir_entry(dir, de, bh: insert_bh, offset)) {
1639	retval = -ENOENT;
1640	goto bail;
1641	}
1642	if (ocfs2_match(len: namelen, name, de)) {
1643	retval = -EEXIST;
1644	goto bail;
1645	}
1646
1647	/ We're guaranteed that we should have space, so we*
1648	* can't possibly have hit the trailer...right? */
1649	mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
1650	"Hit dir trailer trying to insert %.*s "
1651	"(namelen %d) into directory %llu. "
1652	"offset is %lu, trailer offset is %d\n",
1653	namelen, name, namelen,
1654	(unsigned long long)parent_fe_bh->b_blocknr,
1655	offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
1656
1657	if (ocfs2_dirent_would_fit(de, new_rec_len: rec_len)) {
1658	inode_set_mtime_to_ts(inode: dir,
1659	ts: inode_set_ctime_current(inode: dir));
1660	retval = ocfs2_mark_inode_dirty(handle, inode: dir, bh: parent_fe_bh);
1661	if (retval < `0`) {
1662	mlog_errno(retval);
1663	goto bail;
1664	}
1665
1666	if (insert_bh == parent_fe_bh)
1667	retval = ocfs2_journal_access_di(handle,
1668	ci: INODE_CACHE(inode: dir),
1669	bh: insert_bh,
1670	OCFS2_JOURNAL_ACCESS_WRITE);
1671	else {
1672	retval = ocfs2_journal_access_db(handle,
1673	ci: INODE_CACHE(inode: dir),
1674	bh: insert_bh,
1675	OCFS2_JOURNAL_ACCESS_WRITE);
1676
1677	if (!retval && ocfs2_dir_indexed(inode: dir))
1678	retval = ocfs2_dx_dir_insert(dir,
1679	handle,
1680	lookup);
1681	}
1682
1683	if (retval) {
1684	mlog_errno(retval);
1685	goto bail;
1686	}
1687
1688	/ By now the buffer is marked for journaling /
1689	offset += le16_to_cpu(de->rec_len);
1690	if (le64_to_cpu(de->inode)) {
1691	de1 = (struct ocfs2_dir_entry )((char* *) de +
1692	OCFS2_DIR_REC_LEN(de->name_len));
1693	de1->rec_len =
1694	cpu_to_le16(le16_to_cpu(de->rec_len) -
1695	OCFS2_DIR_REC_LEN(de->name_len));
1696	de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1697	de = de1;
1698	}
1699	de->file_type = FT_UNKNOWN;
1700	if (blkno) {
1701	de->inode = cpu_to_le64(blkno);
1702	ocfs2_set_de_type(de, mode: inode->i_mode);
1703	} else
1704	de->inode = `0`;
1705	de->name_len = namelen;
1706	memcpy(de->name, name, namelen);
1707
1708	if (ocfs2_dir_indexed(inode: dir))
1709	ocfs2_recalc_free_list(dir, handle, lookup);
1710
1711	inode_inc_iversion(inode: dir);
1712	ocfs2_journal_dirty(handle, bh: insert_bh);
1713	retval = `0`;
1714	goto bail;
1715	}
1716
1717	offset += le16_to_cpu(de->rec_len);
1718	de = (struct ocfs2_dir_entry ) ((char* *) de + le16_to_cpu(de->rec_len));
1719	}
1720
1721	/ when you think about it, the assert above should prevent us*
1722	* from ever getting here. */
1723	retval = -ENOSPC;
1724	bail:
1725	if (retval)
1726	mlog_errno(retval);
1727
1728	return retval;
1729	}
1730
1731	static int ocfs2_dir_foreach_blk_id(struct inode *inode,
1732	u64 *f_version,
1733	struct dir_context *ctx)
1734	{
1735	int ret, i;
1736	unsigned long offset = ctx->pos;
1737	struct buffer_head *di_bh = NULL;
1738	struct ocfs2_dinode *di;
1739	struct ocfs2_inline_data *data;
1740	struct ocfs2_dir_entry *de;
1741
1742	ret = ocfs2_read_inode_block(inode, bh: &di_bh);
1743	if (ret) {
1744	mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
1745	(unsigned long long)OCFS2_I(inode)->ip_blkno);
1746	goto out;
1747	}
1748
1749	di = (struct ocfs2_dinode *)di_bh->b_data;
1750	data = &di->id2.i_data;
1751
1752	while (ctx->pos < i_size_read(inode)) {
1753	/ If the dir block has changed since the last call to*
1754	* readdir(2), then we might be pointing to an invalid
1755	* dirent right now. Scan from the start of the block
1756	* to make sure. */
1757	if (!inode_eq_iversion(inode, old: *f_version)) {
1758	for (i = `0`; i < i_size_read(inode) && i < offset; ) {
1759	de = (struct ocfs2_dir_entry *)
1760	(data->id_data + i);
1761	/ It's too expensive to do a full*
1762	* dirent test each time round this
1763	* loop, but we do have to test at
1764	* least that it is non-zero. A
1765	* failure will be detected in the
1766	* dirent test below. */
1767	if (le16_to_cpu(de->rec_len) <
1768	OCFS2_DIR_REC_LEN(`1`))
1769	break;
1770	i += le16_to_cpu(de->rec_len);
1771	}
1772	ctx->pos = offset = i;
1773	*f_version = inode_query_iversion(inode);
1774	}
1775
1776	de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
1777	if (!ocfs2_check_dir_entry(dir: inode, de, bh: di_bh, offset: ctx->pos)) {
1778	/ On error, skip the f_pos to the end. /
1779	ctx->pos = i_size_read(inode);
1780	break;
1781	}
1782	offset += le16_to_cpu(de->rec_len);
1783	if (le64_to_cpu(de->inode)) {
1784	if (!dir_emit(ctx, name: de->name, namelen: de->name_len,
1785	le64_to_cpu(de->inode),
1786	type: fs_ftype_to_dtype(filetype: de->file_type)))
1787	goto out;
1788	}
1789	ctx->pos += le16_to_cpu(de->rec_len);
1790	}
1791	out:
1792	brelse(bh: di_bh);
1793	return `0`;
1794	}
1795
1796	/*
1797	* NOTE: This function can be called against unindexed directories,
1798	* and indexed ones.
1799	*/
1800	static int ocfs2_dir_foreach_blk_el(struct inode *inode,
1801	u64 *f_version,
1802	struct dir_context *ctx,
1803	bool persist)
1804	{
1805	unsigned long offset, blk, last_ra_blk = `0`;
1806	int i;
1807	struct buffer_head * bh, * tmp;
1808	struct ocfs2_dir_entry * de;
1809	struct super_block * sb = inode->i_sb;
1810	unsigned int ra_sectors = `16`;
1811	int stored = `0`;
1812
1813	bh = NULL;
1814
1815	offset = ctx->pos & (sb->s_blocksize - `1`);
1816
1817	while (ctx->pos < i_size_read(inode)) {
1818	blk = ctx->pos >> sb->s_blocksize_bits;
1819	if (ocfs2_read_dir_block(inode, v_block: blk, bh: &bh, flags: `0`)) {
1820	/ Skip the corrupt dirblock and keep trying /
1821	ctx->pos += sb->s_blocksize - offset;
1822	continue;
1823	}
1824
1825	/ The idea here is to begin with 8k read-ahead and to stay*
1826	* 4k ahead of our current position.
1827	*
1828	* TODO: Use the pagecache for this. We just need to
1829	* make sure it's cluster-safe... */
1830	if (!last_ra_blk
1831	\|\| (((last_ra_blk - blk) << `9`) <= (ra_sectors / `2`))) {
1832	for (i = ra_sectors >> (sb->s_blocksize_bits - `9`);
1833	i > `0`; i--) {
1834	tmp = NULL;
1835	if (!ocfs2_read_dir_block(inode, v_block: ++blk, bh: &tmp,
1836	OCFS2_BH_READAHEAD))
1837	brelse(bh: tmp);
1838	}
1839	last_ra_blk = blk;
1840	ra_sectors = `8`;
1841	}
1842
1843	/ If the dir block has changed since the last call to*
1844	* readdir(2), then we might be pointing to an invalid
1845	* dirent right now. Scan from the start of the block
1846	* to make sure. */
1847	if (!inode_eq_iversion(inode, old: *f_version)) {
1848	for (i = `0`; i < sb->s_blocksize && i < offset; ) {
1849	de = (struct ocfs2_dir_entry *) (bh->b_data + i);
1850	/ It's too expensive to do a full*
1851	* dirent test each time round this
1852	* loop, but we do have to test at
1853	* least that it is non-zero. A
1854	* failure will be detected in the
1855	* dirent test below. */
1856	if (le16_to_cpu(de->rec_len) <
1857	OCFS2_DIR_REC_LEN(`1`))
1858	break;
1859	i += le16_to_cpu(de->rec_len);
1860	}
1861	offset = i;
1862	ctx->pos = (ctx->pos & ~(sb->s_blocksize - `1`))
1863	\| offset;
1864	*f_version = inode_query_iversion(inode);
1865	}
1866
1867	while (ctx->pos < i_size_read(inode)
1868	&& offset < sb->s_blocksize) {
1869	de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
1870	if (!ocfs2_check_dir_entry(dir: inode, de, bh, offset)) {
1871	/ On error, skip the f_pos to the*
1872	next block. /*
1873	ctx->pos = (ctx->pos \| (sb->s_blocksize - `1`)) + `1`;
1874	break;
1875	}
1876	if (le64_to_cpu(de->inode)) {
1877	if (!dir_emit(ctx, name: de->name,
1878	namelen: de->name_len,
1879	le64_to_cpu(de->inode),
1880	type: fs_ftype_to_dtype(filetype: de->file_type))) {
1881	brelse(bh);
1882	return `0`;
1883	}
1884	stored++;
1885	}
1886	offset += le16_to_cpu(de->rec_len);
1887	ctx->pos += le16_to_cpu(de->rec_len);
1888	}
1889	offset = `0`;
1890	brelse(bh);
1891	bh = NULL;
1892	if (!persist && stored)
1893	break;
1894	}
1895	return `0`;
1896	}
1897
1898	static int ocfs2_dir_foreach_blk(struct inode inode, u64 f_version,
1899	struct dir_context *ctx,
1900	bool persist)
1901	{
1902	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1903	return ocfs2_dir_foreach_blk_id(inode, f_version, ctx);
1904	return ocfs2_dir_foreach_blk_el(inode, f_version, ctx, persist);
1905	}
1906
1907	/*
1908	* This is intended to be called from inside other kernel functions,
1909	* so we fake some arguments.
1910	*/
1911	int ocfs2_dir_foreach(struct inode inode, struct* dir_context *ctx)
1912	{
1913	u64 version = inode_query_iversion(inode);
1914	ocfs2_dir_foreach_blk(inode, f_version: &version, ctx, persist: true);
1915	return `0`;
1916	}
1917
1918	/*
1919	* ocfs2_readdir()
1920	*
1921	*/
1922	int ocfs2_readdir(struct file file, struct* dir_context *ctx)
1923	{
1924	int error = `0`;
1925	struct inode *inode = file_inode(f: file);
1926	int lock_level = `0`;
1927
1928	trace_ocfs2_readdir(num: (unsigned long long)OCFS2_I(inode)->ip_blkno);
1929
1930	error = ocfs2_inode_lock_atime(inode, vfsmnt: file->f_path.mnt, level: &lock_level, wait: `1`);
1931	if (lock_level && error >= `0`) {
1932	/ We release EX lock which used to update atime*
1933	* and get PR lock again to reduce contention
1934	* on commonly accessed directories. */
1935	ocfs2_inode_unlock(inode, ex: `1`);
1936	lock_level = `0`;
1937	error = ocfs2_inode_lock(inode, NULL, `0`);
1938	}
1939	if (error < `0`) {
1940	if (error != -ENOENT)
1941	mlog_errno(error);
1942	/ we haven't got any yet, so propagate the error. /
1943	goto bail_nolock;
1944	}
1945
1946	error = ocfs2_dir_foreach_blk(inode, f_version: &file->f_version, ctx, persist: false);
1947
1948	ocfs2_inode_unlock(inode, ex: lock_level);
1949	if (error)
1950	mlog_errno(error);
1951
1952	bail_nolock:
1953
1954	return error;
1955	}
1956
1957	/*
1958	* NOTE: this should always be called with parent dir i_rwsem taken.
1959	*/
1960	int ocfs2_find_files_on_disk(const char *name,
1961	int namelen,
1962	u64 *blkno,
1963	struct inode *inode,
1964	struct ocfs2_dir_lookup_result *lookup)
1965	{
1966	int status = -ENOENT;
1967
1968	trace_ocfs2_find_files_on_disk(namelen, name, blkno,
1969	dir: (unsigned long long)OCFS2_I(inode)->ip_blkno);
1970
1971	status = ocfs2_find_entry(name, namelen, dir: inode, lookup);
1972	if (status)
1973	goto leave;
1974
1975	*blkno = le64_to_cpu(lookup->dl_entry->inode);
1976
1977	status = `0`;
1978	leave:
1979
1980	return status;
1981	}
1982
1983	/*
1984	* Convenience function for callers which just want the block number
1985	* mapped to a name and don't require the full dirent info, etc.
1986	*/
1987	int ocfs2_lookup_ino_from_name(struct inode dir, const* char *name,
1988	int namelen, u64 *blkno)
1989	{
1990	int ret;
1991	struct ocfs2_dir_lookup_result lookup = { NULL, };
1992
1993	ret = ocfs2_find_files_on_disk(name, namelen, blkno, inode: dir, lookup: &lookup);
1994	ocfs2_free_dir_lookup_result(res: &lookup);
1995
1996	return ret;
1997	}
1998
1999	/ Check for a name within a directory.*
2000	*
2001	* Return 0 if the name does not exist
2002	* Return -EEXIST if the directory contains the name
2003	*
2004	* Callers should have i_rwsem + a cluster lock on dir
2005	*/
2006	int ocfs2_check_dir_for_entry(struct inode *dir,
2007	const char *name,
2008	int namelen)
2009	{
2010	int ret = `0`;
2011	struct ocfs2_dir_lookup_result lookup = { NULL, };
2012
2013	trace_ocfs2_check_dir_for_entry(
2014	dir: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno, namelen, name);
2015
2016	if (ocfs2_find_entry(name, namelen, dir, lookup: &lookup) == `0`) {
2017	ret = -EEXIST;
2018	mlog_errno(ret);
2019	}
2020
2021	ocfs2_free_dir_lookup_result(res: &lookup);
2022
2023	return ret;
2024	}
2025
2026	struct ocfs2_empty_dir_priv {
2027	struct dir_context ctx;
2028	unsigned seen_dot;
2029	unsigned seen_dot_dot;
2030	unsigned seen_other;
2031	unsigned dx_dir;
2032	};
2033	static bool ocfs2_empty_dir_filldir(struct dir_context ctx, const* char *name,
2034	int name_len, loff_t pos, u64 ino,
2035	unsigned type)
2036	{
2037	struct ocfs2_empty_dir_priv *p =
2038	container_of(ctx, struct ocfs2_empty_dir_priv, ctx);
2039
2040	/*
2041	* Check the positions of "." and ".." records to be sure
2042	* they're in the correct place.
2043	*
2044	* Indexed directories don't need to proceed past the first
2045	* two entries, so we end the scan after seeing '..'. Despite
2046	* that, we allow the scan to proceed In the event that we
2047	* have a corrupted indexed directory (no dot or dot dot
2048	* entries). This allows us to double check for existing
2049	* entries which might not have been found in the index.
2050	*/
2051	if (name_len == `1` && !strncmp(".", name, `1`) && pos == `0`) {
2052	p->seen_dot = `1`;
2053	return true;
2054	}
2055
2056	if (name_len == `2` && !strncmp("..", name, `2`) &&
2057	pos == OCFS2_DIR_REC_LEN(`1`)) {
2058	p->seen_dot_dot = `1`;
2059
2060	if (p->dx_dir && p->seen_dot)
2061	return false;
2062
2063	return true;
2064	}
2065
2066	p->seen_other = `1`;
2067	return false;
2068	}
2069
2070	static int ocfs2_empty_dir_dx(struct inode *inode,
2071	struct ocfs2_empty_dir_priv *priv)
2072	{
2073	int ret;
2074	struct buffer_head *di_bh = NULL;
2075	struct buffer_head *dx_root_bh = NULL;
2076	struct ocfs2_dinode *di;
2077	struct ocfs2_dx_root_block *dx_root;
2078
2079	priv->dx_dir = `1`;
2080
2081	ret = ocfs2_read_inode_block(inode, bh: &di_bh);
2082	if (ret) {
2083	mlog_errno(ret);
2084	goto out;
2085	}
2086	di = (struct ocfs2_dinode *)di_bh->b_data;
2087
2088	ret = ocfs2_read_dx_root(dir: inode, di, dx_root_bh: &dx_root_bh);
2089	if (ret) {
2090	mlog_errno(ret);
2091	goto out;
2092	}
2093	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2094
2095	if (le32_to_cpu(dx_root->dr_num_entries) != `2`)
2096	priv->seen_other = `1`;
2097
2098	out:
2099	brelse(bh: di_bh);
2100	brelse(bh: dx_root_bh);
2101	return ret;
2102	}
2103
2104	/*
2105	* routine to check that the specified directory is empty (for rmdir)
2106	*
2107	* Returns 1 if dir is empty, zero otherwise.
2108	*
2109	* XXX: This is a performance problem for unindexed directories.
2110	*/
2111	int ocfs2_empty_dir(struct inode *inode)
2112	{
2113	int ret;
2114	struct ocfs2_empty_dir_priv priv = {
2115	.ctx.actor = ocfs2_empty_dir_filldir,
2116	};
2117
2118	if (ocfs2_dir_indexed(inode)) {
2119	ret = ocfs2_empty_dir_dx(inode, priv: &priv);
2120	if (ret)
2121	mlog_errno(ret);
2122	/*
2123	* We still run ocfs2_dir_foreach to get the checks
2124	* for "." and "..".
2125	*/
2126	}
2127
2128	ret = ocfs2_dir_foreach(inode, ctx: &priv.ctx);
2129	if (ret)
2130	mlog_errno(ret);
2131
2132	if (!priv.seen_dot \|\| !priv.seen_dot_dot) {
2133	mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
2134	(unsigned long long)OCFS2_I(inode)->ip_blkno);
2135	/*
2136	* XXX: Is it really safe to allow an unlink to continue?
2137	*/
2138	return `1`;
2139	}
2140
2141	return !priv.seen_other;
2142	}
2143
2144	/*
2145	* Fills "." and ".." dirents in a new directory block. Returns dirent for
2146	* "..", which might be used during creation of a directory with a trailing
2147	* header. It is otherwise safe to ignore the return code.
2148	*/
2149	static struct ocfs2_dir_entry ocfs2_fill_initial_dirents(struct* inode *inode,
2150	struct inode *parent,
2151	char *start,
2152	unsigned int size)
2153	{
2154	struct ocfs2_dir_entry de = (struct* ocfs2_dir_entry *)start;
2155
2156	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
2157	de->name_len = `1`;
2158	de->rec_len =
2159	cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
2160	strcpy(p: de->name, q: ".");
2161	ocfs2_set_de_type(de, S_IFDIR);
2162
2163	de = (struct ocfs2_dir_entry ) ((char* *)de + le16_to_cpu(de->rec_len));
2164	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
2165	de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(`1`));
2166	de->name_len = `2`;
2167	strcpy(p: de->name, q: "..");
2168	ocfs2_set_de_type(de, S_IFDIR);
2169
2170	return de;
2171	}
2172
2173	/*
2174	* This works together with code in ocfs2_mknod_locked() which sets
2175	* the inline-data flag and initializes the inline-data section.
2176	*/
2177	static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2178	handle_t *handle,
2179	struct inode *parent,
2180	struct inode *inode,
2181	struct buffer_head *di_bh)
2182	{
2183	int ret;
2184	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
2185	struct ocfs2_inline_data *data = &di->id2.i_data;
2186	unsigned int size = le16_to_cpu(data->id_count);
2187
2188	ret = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode), bh: di_bh,
2189	OCFS2_JOURNAL_ACCESS_WRITE);
2190	if (ret) {
2191	mlog_errno(ret);
2192	goto out;
2193	}
2194
2195	ocfs2_fill_initial_dirents(inode, parent, start: data->id_data, size);
2196	ocfs2_journal_dirty(handle, bh: di_bh);
2197
2198	i_size_write(inode, i_size: size);
2199	set_nlink(inode, nlink: `2`);
2200	inode->i_blocks = ocfs2_inode_sector_count(inode);
2201
2202	ret = ocfs2_mark_inode_dirty(handle, inode, bh: di_bh);
2203	if (ret < `0`)
2204	mlog_errno(ret);
2205
2206	out:
2207	return ret;
2208	}
2209
2210	static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2211	handle_t *handle,
2212	struct inode *parent,
2213	struct inode *inode,
2214	struct buffer_head *fe_bh,
2215	struct ocfs2_alloc_context *data_ac,
2216	struct buffer_head **ret_new_bh)
2217	{
2218	int status;
2219	unsigned int size = osb->sb->s_blocksize;
2220	struct buffer_head *new_bh = NULL;
2221	struct ocfs2_dir_entry *de;
2222
2223	if (ocfs2_new_dir_wants_trailer(dir: inode))
2224	size = ocfs2_dir_trailer_blk_off(sb: parent->i_sb);
2225
2226	status = ocfs2_do_extend_dir(sb: osb->sb, handle, dir: inode, parent_fe_bh: fe_bh,
2227	data_ac, NULL, new_bh: &new_bh);
2228	if (status < `0`) {
2229	mlog_errno(status);
2230	goto bail;
2231	}
2232
2233	ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode), bh: new_bh);
2234
2235	status = ocfs2_journal_access_db(handle, ci: INODE_CACHE(inode), bh: new_bh,
2236	OCFS2_JOURNAL_ACCESS_CREATE);
2237	if (status < `0`) {
2238	mlog_errno(status);
2239	goto bail;
2240	}
2241	memset(new_bh->b_data, `0`, osb->sb->s_blocksize);
2242
2243	de = ocfs2_fill_initial_dirents(inode, parent, start: new_bh->b_data, size);
2244	if (ocfs2_new_dir_wants_trailer(dir: inode)) {
2245	int size = le16_to_cpu(de->rec_len);
2246
2247	/*
2248	* Figure out the size of the hole left over after
2249	* insertion of '.' and '..'. The trailer wants this
2250	* information.
2251	*/
2252	size -= OCFS2_DIR_REC_LEN(`2`);
2253	size -= sizeof(struct ocfs2_dir_block_trailer);
2254
2255	ocfs2_init_dir_trailer(inode, bh: new_bh, rec_len: size);
2256	}
2257
2258	ocfs2_journal_dirty(handle, bh: new_bh);
2259
2260	i_size_write(inode, i_size: inode->i_sb->s_blocksize);
2261	set_nlink(inode, nlink: `2`);
2262	inode->i_blocks = ocfs2_inode_sector_count(inode);
2263	status = ocfs2_mark_inode_dirty(handle, inode, bh: fe_bh);
2264	if (status < `0`) {
2265	mlog_errno(status);
2266	goto bail;
2267	}
2268
2269	status = `0`;
2270	if (ret_new_bh) {
2271	*ret_new_bh = new_bh;
2272	new_bh = NULL;
2273	}
2274	bail:
2275	brelse(bh: new_bh);
2276
2277	return status;
2278	}
2279
2280	static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2281	handle_t handle, struct* inode *dir,
2282	struct buffer_head *di_bh,
2283	struct buffer_head *dirdata_bh,
2284	struct ocfs2_alloc_context *meta_ac,
2285	int dx_inline, u32 num_entries,
2286	struct buffer_head **ret_dx_root_bh)
2287	{
2288	int ret;
2289	struct ocfs2_dinode di = (struct* ocfs2_dinode *) di_bh->b_data;
2290	u16 dr_suballoc_bit;
2291	u64 suballoc_loc, dr_blkno;
2292	unsigned int num_bits;
2293	struct buffer_head *dx_root_bh = NULL;
2294	struct ocfs2_dx_root_block *dx_root;
2295	struct ocfs2_dir_block_trailer *trailer =
2296	ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
2297
2298	ret = ocfs2_claim_metadata(handle, ac: meta_ac, bits_wanted: `1`, suballoc_loc: &suballoc_loc,
2299	suballoc_bit_start: &dr_suballoc_bit, num_bits: &num_bits, blkno_start: &dr_blkno);
2300	if (ret) {
2301	mlog_errno(ret);
2302	goto out;
2303	}
2304
2305	trace_ocfs2_dx_dir_attach_index(
2306	val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
2307	val2: (unsigned long long)dr_blkno);
2308
2309	dx_root_bh = sb_getblk(sb: osb->sb, block: dr_blkno);
2310	if (dx_root_bh == NULL) {
2311	ret = -ENOMEM;
2312	goto out;
2313	}
2314	ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: dir), bh: dx_root_bh);
2315
2316	ret = ocfs2_journal_access_dr(handle, ci: INODE_CACHE(inode: dir), bh: dx_root_bh,
2317	OCFS2_JOURNAL_ACCESS_CREATE);
2318	if (ret < `0`) {
2319	mlog_errno(ret);
2320	goto out;
2321	}
2322
2323	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2324	memset(dx_root, `0`, osb->sb->s_blocksize);
2325	strcpy(p: dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2326	dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2327	dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
2328	dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
2329	dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2330	dx_root->dr_blkno = cpu_to_le64(dr_blkno);
2331	dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
2332	dx_root->dr_num_entries = cpu_to_le32(num_entries);
2333	if (le16_to_cpu(trailer->db_free_rec_len))
2334	dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
2335	else
2336	dx_root->dr_free_blk = cpu_to_le64(`0`);
2337
2338	if (dx_inline) {
2339	dx_root->dr_flags \|= OCFS2_DX_FLAG_INLINE;
2340	dx_root->dr_entries.de_count =
2341	cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
2342	} else {
2343	dx_root->dr_list.l_count =
2344	cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
2345	}
2346	ocfs2_journal_dirty(handle, bh: dx_root_bh);
2347
2348	ret = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: dir), bh: di_bh,
2349	OCFS2_JOURNAL_ACCESS_CREATE);
2350	if (ret) {
2351	mlog_errno(ret);
2352	goto out;
2353	}
2354
2355	di->i_dx_root = cpu_to_le64(dr_blkno);
2356
2357	spin_lock(lock: &OCFS2_I(inode: dir)->ip_lock);
2358	OCFS2_I(inode: dir)->ip_dyn_features \|= OCFS2_INDEXED_DIR_FL;
2359	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2360	spin_unlock(lock: &OCFS2_I(inode: dir)->ip_lock);
2361
2362	ocfs2_journal_dirty(handle, bh: di_bh);
2363
2364	*ret_dx_root_bh = dx_root_bh;
2365	dx_root_bh = NULL;
2366
2367	out:
2368	brelse(bh: dx_root_bh);
2369	return ret;
2370	}
2371
2372	static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2373	handle_t handle, struct* inode *dir,
2374	struct buffer_head **dx_leaves,
2375	int num_dx_leaves, u64 start_blk)
2376	{
2377	int ret, i;
2378	struct ocfs2_dx_leaf *dx_leaf;
2379	struct buffer_head *bh;
2380
2381	for (i = `0`; i < num_dx_leaves; i++) {
2382	bh = sb_getblk(sb: osb->sb, block: start_blk + i);
2383	if (bh == NULL) {
2384	ret = -ENOMEM;
2385	goto out;
2386	}
2387	dx_leaves[i] = bh;
2388
2389	ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: dir), bh);
2390
2391	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir), bh,
2392	OCFS2_JOURNAL_ACCESS_CREATE);
2393	if (ret < `0`) {
2394	mlog_errno(ret);
2395	goto out;
2396	}
2397
2398	dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data;
2399
2400	memset(dx_leaf, `0`, osb->sb->s_blocksize);
2401	strcpy(p: dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
2402	dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation);
2403	dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr);
2404	dx_leaf->dl_list.de_count =
2405	cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
2406
2407	trace_ocfs2_dx_dir_format_cluster(
2408	val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
2409	val2: (unsigned long long)bh->b_blocknr,
2410	le16_to_cpu(dx_leaf->dl_list.de_count));
2411
2412	ocfs2_journal_dirty(handle, bh);
2413	}
2414
2415	ret = `0`;
2416	out:
2417	return ret;
2418	}
2419
2420	/*
2421	* Allocates and formats a new cluster for use in an indexed dir
2422	* leaf. This version will not do the extent insert, so that it can be
2423	* used by operations which need careful ordering.
2424	*/
2425	static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
2426	u32 cpos, handle_t *handle,
2427	struct ocfs2_alloc_context *data_ac,
2428	struct buffer_head **dx_leaves,
2429	int num_dx_leaves, u64 *ret_phys_blkno)
2430	{
2431	int ret;
2432	u32 phys, num;
2433	u64 phys_blkno;
2434	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2435
2436	/*
2437	* XXX: For create, this should claim cluster for the index
2438	* before the unindexed insert so that we have a better
2439	* chance of contiguousness as the directory grows in number
2440	* of entries.
2441	*/
2442	ret = __ocfs2_claim_clusters(handle, ac: data_ac, min_clusters: `1`, max_clusters: `1`, cluster_start: &phys, num_clusters: &num);
2443	if (ret) {
2444	mlog_errno(ret);
2445	goto out;
2446	}
2447
2448	/*
2449	* Format the new cluster first. That way, we're inserting
2450	* valid data.
2451	*/
2452	phys_blkno = ocfs2_clusters_to_blocks(sb: osb->sb, clusters: phys);
2453	ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves,
2454	num_dx_leaves, start_blk: phys_blkno);
2455	if (ret) {
2456	mlog_errno(ret);
2457	goto out;
2458	}
2459
2460	*ret_phys_blkno = phys_blkno;
2461	out:
2462	return ret;
2463	}
2464
2465	static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2466	struct ocfs2_extent_tree *et,
2467	u32 cpos, handle_t *handle,
2468	struct ocfs2_alloc_context *data_ac,
2469	struct ocfs2_alloc_context *meta_ac,
2470	struct buffer_head **dx_leaves,
2471	int num_dx_leaves)
2472	{
2473	int ret;
2474	u64 phys_blkno;
2475
2476	ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
2477	num_dx_leaves, ret_phys_blkno: &phys_blkno);
2478	if (ret) {
2479	mlog_errno(ret);
2480	goto out;
2481	}
2482
2483	ret = ocfs2_insert_extent(handle, et, cpos, start_blk: phys_blkno, new_clusters: `1`, flags: `0`,
2484	meta_ac);
2485	if (ret)
2486	mlog_errno(ret);
2487	out:
2488	return ret;
2489	}
2490
2491	static struct buffer_head ocfs2_dx_dir_kmalloc_leaves(struct** super_block *sb,
2492	int *ret_num_leaves)
2493	{
2494	int num_dx_leaves = ocfs2_clusters_to_blocks(sb, clusters: `1`);
2495	struct buffer_head **dx_leaves;
2496
2497	dx_leaves = kcalloc(n: num_dx_leaves, size: sizeof(struct buffer_head *),
2498	GFP_NOFS);
2499	if (dx_leaves && ret_num_leaves)
2500	*ret_num_leaves = num_dx_leaves;
2501
2502	return dx_leaves;
2503	}
2504
2505	static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
2506	handle_t *handle,
2507	struct inode *parent,
2508	struct inode *inode,
2509	struct buffer_head *di_bh,
2510	struct ocfs2_alloc_context *data_ac,
2511	struct ocfs2_alloc_context *meta_ac)
2512	{
2513	int ret;
2514	struct buffer_head *leaf_bh = NULL;
2515	struct buffer_head *dx_root_bh = NULL;
2516	struct ocfs2_dx_hinfo hinfo;
2517	struct ocfs2_dx_root_block *dx_root;
2518	struct ocfs2_dx_entry_list *entry_list;
2519
2520	/*
2521	* Our strategy is to create the directory as though it were
2522	* unindexed, then add the index block. This works with very
2523	* little complication since the state of a new directory is a
2524	* very well known quantity.
2525	*
2526	* Essentially, we have two dirents ("." and ".."), in the 1st
2527	* block which need indexing. These are easily inserted into
2528	* the index block.
2529	*/
2530
2531	ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh: di_bh,
2532	data_ac, ret_new_bh: &leaf_bh);
2533	if (ret) {
2534	mlog_errno(ret);
2535	goto out;
2536	}
2537
2538	ret = ocfs2_dx_dir_attach_index(osb, handle, dir: inode, di_bh, dirdata_bh: leaf_bh,
2539	meta_ac, dx_inline: `1`, num_entries: `2`, ret_dx_root_bh: &dx_root_bh);
2540	if (ret) {
2541	mlog_errno(ret);
2542	goto out;
2543	}
2544	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2545	entry_list = &dx_root->dr_entries;
2546
2547	/ Buffer has been journaled for us by ocfs2_dx_dir_attach_index /
2548	ocfs2_dx_dir_name_hash(dir: inode, name: ".", len: `1`, hinfo: &hinfo);
2549	ocfs2_dx_entry_list_insert(entry_list, hinfo: &hinfo, dirent_blk: leaf_bh->b_blocknr);
2550
2551	ocfs2_dx_dir_name_hash(dir: inode, name: "..", len: `2`, hinfo: &hinfo);
2552	ocfs2_dx_entry_list_insert(entry_list, hinfo: &hinfo, dirent_blk: leaf_bh->b_blocknr);
2553
2554	out:
2555	brelse(bh: dx_root_bh);
2556	brelse(bh: leaf_bh);
2557	return ret;
2558	}
2559
2560	int ocfs2_fill_new_dir(struct ocfs2_super *osb,
2561	handle_t *handle,
2562	struct inode *parent,
2563	struct inode *inode,
2564	struct buffer_head *fe_bh,
2565	struct ocfs2_alloc_context *data_ac,
2566	struct ocfs2_alloc_context *meta_ac)
2567
2568	{
2569	BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
2570
2571	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
2572	return ocfs2_fill_new_dir_id(osb, handle, parent, inode, di_bh: fe_bh);
2573
2574	if (ocfs2_supports_indexed_dirs(osb))
2575	return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, di_bh: fe_bh,
2576	data_ac, meta_ac);
2577
2578	return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
2579	data_ac, NULL);
2580	}
2581
2582	static int ocfs2_dx_dir_index_block(struct inode *dir,
2583	handle_t *handle,
2584	struct buffer_head **dx_leaves,
2585	int num_dx_leaves,
2586	u32 *num_dx_entries,
2587	struct buffer_head *dirent_bh)
2588	{
2589	int ret = `0`, namelen, i;
2590	char de_buf, limit;
2591	struct ocfs2_dir_entry *de;
2592	struct buffer_head *dx_leaf_bh;
2593	struct ocfs2_dx_hinfo hinfo;
2594	u64 dirent_blk = dirent_bh->b_blocknr;
2595
2596	de_buf = dirent_bh->b_data;
2597	limit = de_buf + dir->i_sb->s_blocksize;
2598
2599	while (de_buf < limit) {
2600	de = (struct ocfs2_dir_entry *)de_buf;
2601
2602	namelen = de->name_len;
2603	if (!namelen \|\| !de->inode)
2604	goto inc;
2605
2606	ocfs2_dx_dir_name_hash(dir, name: de->name, len: namelen, hinfo: &hinfo);
2607
2608	i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), hinfo: &hinfo);
2609	dx_leaf_bh = dx_leaves[i];
2610
2611	ret = __ocfs2_dx_dir_leaf_insert(dir, handle, hinfo: &hinfo,
2612	dirent_blk, dx_leaf_bh);
2613	if (ret) {
2614	mlog_errno(ret);
2615	goto out;
2616	}
2617
2618	num_dx_entries = num_dx_entries + `1`;
2619
2620	inc:
2621	de_buf += le16_to_cpu(de->rec_len);
2622	}
2623
2624	out:
2625	return ret;
2626	}
2627
2628	/*
2629	* XXX: This expects dx_root_bh to already be part of the transaction.
2630	*/
2631	static void ocfs2_dx_dir_index_root_block(struct inode *dir,
2632	struct buffer_head *dx_root_bh,
2633	struct buffer_head *dirent_bh)
2634	{
2635	char de_buf, limit;
2636	struct ocfs2_dx_root_block *dx_root;
2637	struct ocfs2_dir_entry *de;
2638	struct ocfs2_dx_hinfo hinfo;
2639	u64 dirent_blk = dirent_bh->b_blocknr;
2640
2641	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2642
2643	de_buf = dirent_bh->b_data;
2644	limit = de_buf + dir->i_sb->s_blocksize;
2645
2646	while (de_buf < limit) {
2647	de = (struct ocfs2_dir_entry *)de_buf;
2648
2649	if (!de->name_len \|\| !de->inode)
2650	goto inc;
2651
2652	ocfs2_dx_dir_name_hash(dir, name: de->name, len: de->name_len, hinfo: &hinfo);
2653
2654	trace_ocfs2_dx_dir_index_root_block(
2655	dir: (unsigned long long)dir->i_ino,
2656	major_hash: hinfo.major_hash, minor_hash: hinfo.minor_hash,
2657	namelen: de->name_len, name: de->name,
2658	le16_to_cpu(dx_root->dr_entries.de_num_used));
2659
2660	ocfs2_dx_entry_list_insert(entry_list: &dx_root->dr_entries, hinfo: &hinfo,
2661	dirent_blk);
2662
2663	le32_add_cpu(var: &dx_root->dr_num_entries, val: `1`);
2664	inc:
2665	de_buf += le16_to_cpu(de->rec_len);
2666	}
2667	}
2668
2669	/*
2670	* Count the number of inline directory entries in di_bh and compare
2671	* them against the number of entries we can hold in an inline dx root
2672	* block.
2673	*/
2674	static int ocfs2_new_dx_should_be_inline(struct inode *dir,
2675	struct buffer_head *di_bh)
2676	{
2677	int dirent_count = `0`;
2678	char de_buf, limit;
2679	struct ocfs2_dir_entry *de;
2680	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
2681
2682	de_buf = di->id2.i_data.id_data;
2683	limit = de_buf + i_size_read(inode: dir);
2684
2685	while (de_buf < limit) {
2686	de = (struct ocfs2_dir_entry *)de_buf;
2687
2688	if (de->name_len && de->inode)
2689	dirent_count++;
2690
2691	de_buf += le16_to_cpu(de->rec_len);
2692	}
2693
2694	/ We are careful to leave room for one extra record. /
2695	return dirent_count < ocfs2_dx_entries_per_root(sb: dir->i_sb);
2696	}
2697
2698	/*
2699	* Expand rec_len of the rightmost dirent in a directory block so that it
2700	* contains the end of our valid space for dirents. We do this during
2701	* expansion from an inline directory to one with extents. The first dir block
2702	* in that case is taken from the inline data portion of the inode block.
2703	*
2704	* This will also return the largest amount of contiguous space for a dirent
2705	* in the block. That value is not necessarily the last dirent, even after
2706	* expansion. The directory indexing code wants this value for free space
2707	* accounting. We do this here since we're already walking the entire dir
2708	* block.
2709	*
2710	* We add the dir trailer if this filesystem wants it.
2711	*/
2712	static unsigned int ocfs2_expand_last_dirent(char start, unsigned* int old_size,
2713	struct inode *dir)
2714	{
2715	struct super_block *sb = dir->i_sb;
2716	struct ocfs2_dir_entry *de;
2717	struct ocfs2_dir_entry *prev_de;
2718	char de_buf, limit;
2719	unsigned int new_size = sb->s_blocksize;
2720	unsigned int bytes, this_hole;
2721	unsigned int largest_hole = `0`;
2722
2723	if (ocfs2_new_dir_wants_trailer(dir))
2724	new_size = ocfs2_dir_trailer_blk_off(sb);
2725
2726	bytes = new_size - old_size;
2727
2728	limit = start + old_size;
2729	de_buf = start;
2730	de = (struct ocfs2_dir_entry *)de_buf;
2731	do {
2732	this_hole = ocfs2_figure_dirent_hole(de);
2733	if (this_hole > largest_hole)
2734	largest_hole = this_hole;
2735
2736	prev_de = de;
2737	de_buf += le16_to_cpu(de->rec_len);
2738	de = (struct ocfs2_dir_entry *)de_buf;
2739	} while (de_buf < limit);
2740
2741	le16_add_cpu(var: &prev_de->rec_len, val: bytes);
2742
2743	/ We need to double check this after modification of the final*
2744	* dirent. */
2745	this_hole = ocfs2_figure_dirent_hole(de: prev_de);
2746	if (this_hole > largest_hole)
2747	largest_hole = this_hole;
2748
2749	if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
2750	return largest_hole;
2751	return `0`;
2752	}
2753
2754	/*
2755	* We allocate enough clusters to fulfill "blocks_wanted", but set
2756	* i_size to exactly one block. Ocfs2_extend_dir() will handle the
2757	* rest automatically for us.
2758	*
2759	* *first_block_bh is a pointer to the 1st data block allocated to the
2760	* directory.
2761	*/
2762	static int ocfs2_expand_inline_dir(struct inode dir, struct* buffer_head *di_bh,
2763	unsigned int blocks_wanted,
2764	struct ocfs2_dir_lookup_result *lookup,
2765	struct buffer_head **first_block_bh)
2766	{
2767	u32 alloc, dx_alloc, bit_off, len, num_dx_entries = `0`;
2768	struct super_block *sb = dir->i_sb;
2769	int ret, i, num_dx_leaves = `0`, dx_inline = `0`,
2770	credits = ocfs2_inline_to_extents_credits(sb);
2771	u64 dx_insert_blkno, blkno,
2772	bytes = blocks_wanted << sb->s_blocksize_bits;
2773	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2774	struct ocfs2_inode_info *oi = OCFS2_I(inode: dir);
2775	struct ocfs2_alloc_context *data_ac = NULL;
2776	struct ocfs2_alloc_context *meta_ac = NULL;
2777	struct buffer_head *dirdata_bh = NULL;
2778	struct buffer_head *dx_root_bh = NULL;
2779	struct buffer_head **dx_leaves = NULL;
2780	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
2781	handle_t *handle;
2782	struct ocfs2_extent_tree et;
2783	struct ocfs2_extent_tree dx_et;
2784	int did_quota = `0`, bytes_allocated = `0`;
2785
2786	ocfs2_init_dinode_extent_tree(et: &et, ci: INODE_CACHE(inode: dir), bh: di_bh);
2787
2788	alloc = ocfs2_clusters_for_bytes(sb, bytes);
2789	dx_alloc = `0`;
2790
2791	down_write(sem: &oi->ip_alloc_sem);
2792
2793	if (ocfs2_supports_indexed_dirs(osb)) {
2794	credits += ocfs2_add_dir_index_credits(sb);
2795
2796	dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
2797	if (!dx_inline) {
2798	/ Add one more cluster for an index leaf /
2799	dx_alloc++;
2800	dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
2801	ret_num_leaves: &num_dx_leaves);
2802	if (!dx_leaves) {
2803	ret = -ENOMEM;
2804	mlog_errno(ret);
2805	goto out;
2806	}
2807	}
2808
2809	/ This gets us the dx_root /
2810	ret = ocfs2_reserve_new_metadata_blocks(osb, blocks: `1`, ac: &meta_ac);
2811	if (ret) {
2812	mlog_errno(ret);
2813	goto out;
2814	}
2815	}
2816
2817	/*
2818	* We should never need more than 2 clusters for the unindexed
2819	* tree - maximum dirent size is far less than one block. In
2820	* fact, the only time we'd need more than one cluster is if
2821	* blocksize == clustersize and the dirent won't fit in the
2822	* extra space that the expansion to a single block gives. As
2823	* of today, that only happens on 4k/4k file systems.
2824	*/
2825	BUG_ON(alloc > `2`);
2826
2827	ret = ocfs2_reserve_clusters(osb, bits_wanted: alloc + dx_alloc, ac: &data_ac);
2828	if (ret) {
2829	mlog_errno(ret);
2830	goto out;
2831	}
2832
2833	/*
2834	* Prepare for worst case allocation scenario of two separate
2835	* extents in the unindexed tree.
2836	*/
2837	if (alloc == `2`)
2838	credits += OCFS2_SUBALLOC_ALLOC;
2839
2840	handle = ocfs2_start_trans(osb, max_buffs: credits);
2841	if (IS_ERR(ptr: handle)) {
2842	ret = PTR_ERR(ptr: handle);
2843	mlog_errno(ret);
2844	goto out;
2845	}
2846
2847	ret = dquot_alloc_space_nodirty(inode: dir,
2848	nr: ocfs2_clusters_to_bytes(sb: osb->sb, clusters: alloc + dx_alloc));
2849	if (ret)
2850	goto out_commit;
2851	did_quota = `1`;
2852
2853	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
2854	/*
2855	* Allocate our index cluster first, to maximize the
2856	* possibility that unindexed leaves grow
2857	* contiguously.
2858	*/
2859	ret = __ocfs2_dx_dir_new_cluster(dir, cpos: `0`, handle, data_ac,
2860	dx_leaves, num_dx_leaves,
2861	ret_phys_blkno: &dx_insert_blkno);
2862	if (ret) {
2863	mlog_errno(ret);
2864	goto out_commit;
2865	}
2866	bytes_allocated += ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`);
2867	}
2868
2869	/*
2870	* Try to claim as many clusters as the bitmap can give though
2871	* if we only get one now, that's enough to continue. The rest
2872	* will be claimed after the conversion to extents.
2873	*/
2874	if (ocfs2_dir_resv_allowed(osb))
2875	data_ac->ac_resv = &oi->ip_la_data_resv;
2876	ret = ocfs2_claim_clusters(handle, ac: data_ac, min_clusters: `1`, cluster_start: &bit_off, num_clusters: &len);
2877	if (ret) {
2878	mlog_errno(ret);
2879	goto out_commit;
2880	}
2881	bytes_allocated += ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`);
2882
2883	/*
2884	* Operations are carefully ordered so that we set up the new
2885	* data block first. The conversion from inline data to
2886	* extents follows.
2887	*/
2888	blkno = ocfs2_clusters_to_blocks(sb: dir->i_sb, clusters: bit_off);
2889	dirdata_bh = sb_getblk(sb, block: blkno);
2890	if (!dirdata_bh) {
2891	ret = -ENOMEM;
2892	mlog_errno(ret);
2893	goto out_commit;
2894	}
2895
2896	ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: dir), bh: dirdata_bh);
2897
2898	ret = ocfs2_journal_access_db(handle, ci: INODE_CACHE(inode: dir), bh: dirdata_bh,
2899	OCFS2_JOURNAL_ACCESS_CREATE);
2900	if (ret) {
2901	mlog_errno(ret);
2902	goto out_commit;
2903	}
2904
2905	memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
2906	memset(dirdata_bh->b_data + i_size_read(dir), `0`,
2907	sb->s_blocksize - i_size_read(dir));
2908	i = ocfs2_expand_last_dirent(start: dirdata_bh->b_data, old_size: i_size_read(inode: dir), dir);
2909	if (ocfs2_new_dir_wants_trailer(dir)) {
2910	/*
2911	* Prepare the dir trailer up front. It will otherwise look
2912	* like a valid dirent. Even if inserting the index fails
2913	* (unlikely), then all we'll have done is given first dir
2914	* block a small amount of fragmentation.
2915	*/
2916	ocfs2_init_dir_trailer(inode: dir, bh: dirdata_bh, rec_len: i);
2917	}
2918
2919	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
2920	ocfs2_journal_dirty(handle, bh: dirdata_bh);
2921
2922	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
2923	/*
2924	* Dx dirs with an external cluster need to do this up
2925	* front. Inline dx root's get handled later, after
2926	* we've allocated our root block. We get passed back
2927	* a total number of items so that dr_num_entries can
2928	* be correctly set once the dx_root has been
2929	* allocated.
2930	*/
2931	ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
2932	num_dx_leaves, num_dx_entries: &num_dx_entries,
2933	dirent_bh: dirdata_bh);
2934	if (ret) {
2935	mlog_errno(ret);
2936	goto out_commit;
2937	}
2938	}
2939
2940	/*
2941	* Set extent, i_size, etc on the directory. After this, the
2942	* inode should contain the same exact dirents as before and
2943	* be fully accessible from system calls.
2944	*
2945	* We let the later dirent insert modify c/mtime - to the user
2946	* the data hasn't changed.
2947	*/
2948	ret = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: dir), bh: di_bh,
2949	OCFS2_JOURNAL_ACCESS_CREATE);
2950	if (ret) {
2951	mlog_errno(ret);
2952	goto out_commit;
2953	}
2954
2955	spin_lock(lock: &oi->ip_lock);
2956	oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
2957	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2958	spin_unlock(lock: &oi->ip_lock);
2959
2960	ocfs2_dinode_new_extent_list(inode: dir, di);
2961
2962	i_size_write(inode: dir, i_size: sb->s_blocksize);
2963	inode_set_mtime_to_ts(inode: dir, ts: inode_set_ctime_current(inode: dir));
2964
2965	di->i_size = cpu_to_le64(sb->s_blocksize);
2966	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(dir));
2967	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(dir));
2968	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
2969
2970	/*
2971	* This should never fail as our extent list is empty and all
2972	* related blocks have been journaled already.
2973	*/
2974	ret = ocfs2_insert_extent(handle, et: &et, cpos: `0`, start_blk: blkno, new_clusters: len,
2975	flags: `0`, NULL);
2976	if (ret) {
2977	mlog_errno(ret);
2978	goto out_commit;
2979	}
2980
2981	/*
2982	* Set i_blocks after the extent insert for the most up to
2983	* date ip_clusters value.
2984	*/
2985	dir->i_blocks = ocfs2_inode_sector_count(inode: dir);
2986
2987	ocfs2_journal_dirty(handle, bh: di_bh);
2988
2989	if (ocfs2_supports_indexed_dirs(osb)) {
2990	ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
2991	dirdata_bh, meta_ac, dx_inline,
2992	num_entries: num_dx_entries, ret_dx_root_bh: &dx_root_bh);
2993	if (ret) {
2994	mlog_errno(ret);
2995	goto out_commit;
2996	}
2997
2998	if (dx_inline) {
2999	ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
3000	dirent_bh: dirdata_bh);
3001	} else {
3002	ocfs2_init_dx_root_extent_tree(et: &dx_et,
3003	ci: INODE_CACHE(inode: dir),
3004	bh: dx_root_bh);
3005	ret = ocfs2_insert_extent(handle, et: &dx_et, cpos: `0`,
3006	start_blk: dx_insert_blkno, new_clusters: `1`, flags: `0`, NULL);
3007	if (ret)
3008	mlog_errno(ret);
3009	}
3010	}
3011
3012	/*
3013	* We asked for two clusters, but only got one in the 1st
3014	* pass. Claim the 2nd cluster as a separate extent.
3015	*/
3016	if (alloc > len) {
3017	ret = ocfs2_claim_clusters(handle, ac: data_ac, min_clusters: `1`, cluster_start: &bit_off,
3018	num_clusters: &len);
3019	if (ret) {
3020	mlog_errno(ret);
3021	goto out_commit;
3022	}
3023	blkno = ocfs2_clusters_to_blocks(sb: dir->i_sb, clusters: bit_off);
3024
3025	ret = ocfs2_insert_extent(handle, et: &et, cpos: `1`,
3026	start_blk: blkno, new_clusters: len, flags: `0`, NULL);
3027	if (ret) {
3028	mlog_errno(ret);
3029	goto out_commit;
3030	}
3031	bytes_allocated += ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`);
3032	}
3033
3034	*first_block_bh = dirdata_bh;
3035	dirdata_bh = NULL;
3036	if (ocfs2_supports_indexed_dirs(osb)) {
3037	unsigned int off;
3038
3039	if (!dx_inline) {
3040	/*
3041	* We need to return the correct block within the
3042	* cluster which should hold our entry.
3043	*/
3044	off = ocfs2_dx_dir_hash_idx(osb,
3045	hinfo: &lookup->dl_hinfo);
3046	get_bh(bh: dx_leaves[off]);
3047	lookup->dl_dx_leaf_bh = dx_leaves[off];
3048	}
3049	lookup->dl_dx_root_bh = dx_root_bh;
3050	dx_root_bh = NULL;
3051	}
3052
3053	out_commit:
3054	if (ret < `0` && did_quota)
3055	dquot_free_space_nodirty(inode: dir, nr: bytes_allocated);
3056
3057	ocfs2_commit_trans(osb, handle);
3058
3059	out:
3060	up_write(sem: &oi->ip_alloc_sem);
3061	if (data_ac)
3062	ocfs2_free_alloc_context(ac: data_ac);
3063	if (meta_ac)
3064	ocfs2_free_alloc_context(ac: meta_ac);
3065
3066	if (dx_leaves) {
3067	for (i = `0`; i < num_dx_leaves; i++)
3068	brelse(bh: dx_leaves[i]);
3069	kfree(objp: dx_leaves);
3070	}
3071
3072	brelse(bh: dirdata_bh);
3073	brelse(bh: dx_root_bh);
3074
3075	return ret;
3076	}
3077
3078	/ returns a bh of the 1st new block in the allocation. /
3079	static int ocfs2_do_extend_dir(struct super_block *sb,
3080	handle_t *handle,
3081	struct inode *dir,
3082	struct buffer_head *parent_fe_bh,
3083	struct ocfs2_alloc_context *data_ac,
3084	struct ocfs2_alloc_context *meta_ac,
3085	struct buffer_head **new_bh)
3086	{
3087	int status;
3088	int extend, did_quota = `0`;
3089	u64 p_blkno, v_blkno;
3090
3091	spin_lock(lock: &OCFS2_I(inode: dir)->ip_lock);
3092	extend = (i_size_read(inode: dir) == ocfs2_clusters_to_bytes(sb, clusters: OCFS2_I(inode: dir)->ip_clusters));
3093	spin_unlock(lock: &OCFS2_I(inode: dir)->ip_lock);
3094
3095	if (extend) {
3096	u32 offset = OCFS2_I(inode: dir)->ip_clusters;
3097
3098	status = dquot_alloc_space_nodirty(inode: dir,
3099	nr: ocfs2_clusters_to_bytes(sb, clusters: `1`));
3100	if (status)
3101	goto bail;
3102	did_quota = `1`;
3103
3104	status = ocfs2_add_inode_data(OCFS2_SB(sb), inode: dir, logical_offset: &offset,
3105	clusters_to_add: `1`, mark_unwritten: `0`, fe_bh: parent_fe_bh, handle,
3106	data_ac, meta_ac, NULL);
3107	BUG_ON(status == -EAGAIN);
3108	if (status < `0`) {
3109	mlog_errno(status);
3110	goto bail;
3111	}
3112	}
3113
3114	v_blkno = ocfs2_blocks_for_bytes(sb, bytes: i_size_read(inode: dir));
3115	status = ocfs2_extent_map_get_blocks(inode: dir, v_blkno, p_blkno: &p_blkno, NULL, NULL);
3116	if (status < `0`) {
3117	mlog_errno(status);
3118	goto bail;
3119	}
3120
3121	*new_bh = sb_getblk(sb, block: p_blkno);
3122	if (!*new_bh) {
3123	status = -ENOMEM;
3124	mlog_errno(status);
3125	goto bail;
3126	}
3127	status = `0`;
3128	bail:
3129	if (did_quota && status < `0`)
3130	dquot_free_space_nodirty(inode: dir, nr: ocfs2_clusters_to_bytes(sb, clusters: `1`));
3131	return status;
3132	}
3133
3134	/*
3135	* Assumes you already have a cluster lock on the directory.
3136	*
3137	* 'blocks_wanted' is only used if we have an inline directory which
3138	* is to be turned into an extent based one. The size of the dirent to
3139	* insert might be larger than the space gained by growing to just one
3140	* block, so we may have to grow the inode by two blocks in that case.
3141	*
3142	* If the directory is already indexed, dx_root_bh must be provided.
3143	*/
3144	static int ocfs2_extend_dir(struct ocfs2_super *osb,
3145	struct inode *dir,
3146	struct buffer_head *parent_fe_bh,
3147	unsigned int blocks_wanted,
3148	struct ocfs2_dir_lookup_result *lookup,
3149	struct buffer_head **new_de_bh)
3150	{
3151	int status = `0`;
3152	int credits, num_free_extents, drop_alloc_sem = `0`;
3153	loff_t dir_i_size;
3154	struct ocfs2_dinode fe = (struct* ocfs2_dinode *) parent_fe_bh->b_data;
3155	struct ocfs2_extent_list *el = &fe->id2.i_list;
3156	struct ocfs2_alloc_context *data_ac = NULL;
3157	struct ocfs2_alloc_context *meta_ac = NULL;
3158	handle_t *handle = NULL;
3159	struct buffer_head *new_bh = NULL;
3160	struct ocfs2_dir_entry * de;
3161	struct super_block *sb = osb->sb;
3162	struct ocfs2_extent_tree et;
3163	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
3164
3165	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3166	/*
3167	* This would be a code error as an inline directory should
3168	* never have an index root.
3169	*/
3170	BUG_ON(dx_root_bh);
3171
3172	status = ocfs2_expand_inline_dir(dir, di_bh: parent_fe_bh,
3173	blocks_wanted, lookup,
3174	first_block_bh: &new_bh);
3175	if (status) {
3176	mlog_errno(status);
3177	goto bail;
3178	}
3179
3180	/ Expansion from inline to an indexed directory will*
3181	* have given us this. */
3182	dx_root_bh = lookup->dl_dx_root_bh;
3183
3184	if (blocks_wanted == `1`) {
3185	/*
3186	* If the new dirent will fit inside the space
3187	* created by pushing out to one block, then
3188	* we can complete the operation
3189	* here. Otherwise we have to expand i_size
3190	* and format the 2nd block below.
3191	*/
3192	BUG_ON(new_bh == NULL);
3193	goto bail_bh;
3194	}
3195
3196	/*
3197	* Get rid of 'new_bh' - we want to format the 2nd
3198	* data block and return that instead.
3199	*/
3200	brelse(bh: new_bh);
3201	new_bh = NULL;
3202
3203	down_write(sem: &OCFS2_I(inode: dir)->ip_alloc_sem);
3204	drop_alloc_sem = `1`;
3205	dir_i_size = i_size_read(inode: dir);
3206	credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3207	goto do_extend;
3208	}
3209
3210	down_write(sem: &OCFS2_I(inode: dir)->ip_alloc_sem);
3211	drop_alloc_sem = `1`;
3212	dir_i_size = i_size_read(inode: dir);
3213	trace_ocfs2_extend_dir(val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
3214	val2: dir_i_size);
3215
3216	/ dir->i_size is always block aligned. /
3217	spin_lock(lock: &OCFS2_I(inode: dir)->ip_lock);
3218	if (dir_i_size == ocfs2_clusters_to_bytes(sb, clusters: OCFS2_I(inode: dir)->ip_clusters)) {
3219	spin_unlock(lock: &OCFS2_I(inode: dir)->ip_lock);
3220	ocfs2_init_dinode_extent_tree(et: &et, ci: INODE_CACHE(inode: dir),
3221	bh: parent_fe_bh);
3222	num_free_extents = ocfs2_num_free_extents(et: &et);
3223	if (num_free_extents < `0`) {
3224	status = num_free_extents;
3225	mlog_errno(status);
3226	goto bail;
3227	}
3228
3229	if (!num_free_extents) {
3230	status = ocfs2_reserve_new_metadata(osb, root_el: el, ac: &meta_ac);
3231	if (status < `0`) {
3232	if (status != -ENOSPC)
3233	mlog_errno(status);
3234	goto bail;
3235	}
3236	}
3237
3238	status = ocfs2_reserve_clusters(osb, bits_wanted: `1`, ac: &data_ac);
3239	if (status < `0`) {
3240	if (status != -ENOSPC)
3241	mlog_errno(status);
3242	goto bail;
3243	}
3244
3245	if (ocfs2_dir_resv_allowed(osb))
3246	data_ac->ac_resv = &OCFS2_I(inode: dir)->ip_la_data_resv;
3247
3248	credits = ocfs2_calc_extend_credits(sb, root_el: el);
3249	} else {
3250	spin_unlock(lock: &OCFS2_I(inode: dir)->ip_lock);
3251	credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3252	}
3253
3254	do_extend:
3255	if (ocfs2_dir_indexed(inode: dir))
3256	credits++; / For attaching the new dirent block to the*
3257	* dx_root */
3258
3259	handle = ocfs2_start_trans(osb, max_buffs: credits);
3260	if (IS_ERR(ptr: handle)) {
3261	status = PTR_ERR(ptr: handle);
3262	handle = NULL;
3263	mlog_errno(status);
3264	goto bail;
3265	}
3266
3267	status = ocfs2_do_extend_dir(sb: osb->sb, handle, dir, parent_fe_bh,
3268	data_ac, meta_ac, new_bh: &new_bh);
3269	if (status < `0`) {
3270	mlog_errno(status);
3271	goto bail;
3272	}
3273
3274	ocfs2_set_new_buffer_uptodate(ci: INODE_CACHE(inode: dir), bh: new_bh);
3275
3276	status = ocfs2_journal_access_db(handle, ci: INODE_CACHE(inode: dir), bh: new_bh,
3277	OCFS2_JOURNAL_ACCESS_CREATE);
3278	if (status < `0`) {
3279	mlog_errno(status);
3280	goto bail;
3281	}
3282	memset(new_bh->b_data, `0`, sb->s_blocksize);
3283
3284	de = (struct ocfs2_dir_entry *) new_bh->b_data;
3285	de->inode = `0`;
3286	if (ocfs2_supports_dir_trailer(dir)) {
3287	de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
3288
3289	ocfs2_init_dir_trailer(inode: dir, bh: new_bh, le16_to_cpu(de->rec_len));
3290
3291	if (ocfs2_dir_indexed(inode: dir)) {
3292	status = ocfs2_dx_dir_link_trailer(dir, handle,
3293	dx_root_bh, dirdata_bh: new_bh);
3294	if (status) {
3295	mlog_errno(status);
3296	goto bail;
3297	}
3298	}
3299	} else {
3300	de->rec_len = cpu_to_le16(sb->s_blocksize);
3301	}
3302	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
3303	ocfs2_journal_dirty(handle, bh: new_bh);
3304
3305	dir_i_size += dir->i_sb->s_blocksize;
3306	i_size_write(inode: dir, i_size: dir_i_size);
3307	dir->i_blocks = ocfs2_inode_sector_count(inode: dir);
3308	status = ocfs2_mark_inode_dirty(handle, inode: dir, bh: parent_fe_bh);
3309	if (status < `0`) {
3310	mlog_errno(status);
3311	goto bail;
3312	}
3313
3314	bail_bh:
3315	*new_de_bh = new_bh;
3316	get_bh(bh: *new_de_bh);
3317	bail:
3318	if (handle)
3319	ocfs2_commit_trans(osb, handle);
3320	if (drop_alloc_sem)
3321	up_write(sem: &OCFS2_I(inode: dir)->ip_alloc_sem);
3322
3323	if (data_ac)
3324	ocfs2_free_alloc_context(ac: data_ac);
3325	if (meta_ac)
3326	ocfs2_free_alloc_context(ac: meta_ac);
3327
3328	brelse(bh: new_bh);
3329
3330	return status;
3331	}
3332
3333	static int ocfs2_find_dir_space_id(struct inode dir, struct* buffer_head *di_bh,
3334	const char name, int* namelen,
3335	struct buffer_head **ret_de_bh,
3336	unsigned int *blocks_wanted)
3337	{
3338	int ret;
3339	struct super_block *sb = dir->i_sb;
3340	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
3341	struct ocfs2_dir_entry de, last_de = NULL;
3342	char de_buf, limit;
3343	unsigned long offset = `0`;
3344	unsigned int rec_len, new_rec_len, free_space;
3345
3346	/*
3347	* This calculates how many free bytes we'd have in block zero, should
3348	* this function force expansion to an extent tree.
3349	*/
3350	if (ocfs2_new_dir_wants_trailer(dir))
3351	free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(inode: dir);
3352	else
3353	free_space = dir->i_sb->s_blocksize - i_size_read(inode: dir);
3354
3355	de_buf = di->id2.i_data.id_data;
3356	limit = de_buf + i_size_read(inode: dir);
3357	rec_len = OCFS2_DIR_REC_LEN(namelen);
3358
3359	while (de_buf < limit) {
3360	de = (struct ocfs2_dir_entry *)de_buf;
3361
3362	if (!ocfs2_check_dir_entry(dir, de, bh: di_bh, offset)) {
3363	ret = -ENOENT;
3364	goto out;
3365	}
3366	if (ocfs2_match(len: namelen, name, de)) {
3367	ret = -EEXIST;
3368	goto out;
3369	}
3370	/*
3371	* No need to check for a trailing dirent record here as
3372	* they're not used for inline dirs.
3373	*/
3374
3375	if (ocfs2_dirent_would_fit(de, new_rec_len: rec_len)) {
3376	/ Ok, we found a spot. Return this bh and let*
3377	* the caller actually fill it in. */
3378	*ret_de_bh = di_bh;
3379	get_bh(bh: *ret_de_bh);
3380	ret = `0`;
3381	goto out;
3382	}
3383
3384	last_de = de;
3385	de_buf += le16_to_cpu(de->rec_len);
3386	offset += le16_to_cpu(de->rec_len);
3387	}
3388
3389	/*
3390	* We're going to require expansion of the directory - figure
3391	* out how many blocks we'll need so that a place for the
3392	* dirent can be found.
3393	*/
3394	*blocks_wanted = `1`;
3395	new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
3396	if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
3397	*blocks_wanted = `2`;
3398
3399	ret = -ENOSPC;
3400	out:
3401	return ret;
3402	}
3403
3404	static int ocfs2_find_dir_space_el(struct inode dir, const* char *name,
3405	int namelen, struct buffer_head **ret_de_bh)
3406	{
3407	unsigned long offset;
3408	struct buffer_head *bh = NULL;
3409	unsigned short rec_len;
3410	struct ocfs2_dir_entry *de;
3411	struct super_block *sb = dir->i_sb;
3412	int status;
3413	int blocksize = dir->i_sb->s_blocksize;
3414
3415	status = ocfs2_read_dir_block(inode: dir, v_block: `0`, bh: &bh, flags: `0`);
3416	if (status)
3417	goto bail;
3418
3419	rec_len = OCFS2_DIR_REC_LEN(namelen);
3420	offset = `0`;
3421	de = (struct ocfs2_dir_entry *) bh->b_data;
3422	while (`1`) {
3423	if ((char *)de >= sb->s_blocksize + bh->b_data) {
3424	brelse(bh);
3425	bh = NULL;
3426
3427	if (i_size_read(inode: dir) <= offset) {
3428	/*
3429	* Caller will have to expand this
3430	* directory.
3431	*/
3432	status = -ENOSPC;
3433	goto bail;
3434	}
3435	status = ocfs2_read_dir_block(inode: dir,
3436	v_block: offset >> sb->s_blocksize_bits,
3437	bh: &bh, flags: `0`);
3438	if (status)
3439	goto bail;
3440
3441	/ move to next block /
3442	de = (struct ocfs2_dir_entry *) bh->b_data;
3443	}
3444	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
3445	status = -ENOENT;
3446	goto bail;
3447	}
3448	if (ocfs2_match(len: namelen, name, de)) {
3449	status = -EEXIST;
3450	goto bail;
3451	}
3452
3453	if (ocfs2_skip_dir_trailer(dir, de, offset: offset % blocksize,
3454	blklen: blocksize))
3455	goto next;
3456
3457	if (ocfs2_dirent_would_fit(de, new_rec_len: rec_len)) {
3458	/ Ok, we found a spot. Return this bh and let*
3459	* the caller actually fill it in. */
3460	*ret_de_bh = bh;
3461	get_bh(bh: *ret_de_bh);
3462	status = `0`;
3463	goto bail;
3464	}
3465	next:
3466	offset += le16_to_cpu(de->rec_len);
3467	de = (struct ocfs2_dir_entry )((char* *) de + le16_to_cpu(de->rec_len));
3468	}
3469
3470	bail:
3471	brelse(bh);
3472	if (status)
3473	mlog_errno(status);
3474
3475	return status;
3476	}
3477
3478	static int dx_leaf_sort_cmp(const void a, const* void *b)
3479	{
3480	const struct ocfs2_dx_entry *entry1 = a;
3481	const struct ocfs2_dx_entry *entry2 = b;
3482	u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash);
3483	u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash);
3484	u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash);
3485	u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash);
3486
3487	if (major_hash1 > major_hash2)
3488	return `1`;
3489	if (major_hash1 < major_hash2)
3490	return -`1`;
3491
3492	/*
3493	* It is not strictly necessary to sort by minor
3494	*/
3495	if (minor_hash1 > minor_hash2)
3496	return `1`;
3497	if (minor_hash1 < minor_hash2)
3498	return -`1`;
3499	return `0`;
3500	}
3501
3502	static void dx_leaf_sort_swap(void a, void* b, int* size)
3503	{
3504	struct ocfs2_dx_entry *entry1 = a;
3505	struct ocfs2_dx_entry *entry2 = b;
3506
3507	BUG_ON(size != sizeof(*entry1));
3508
3509	swap(entry1, entry2);
3510	}
3511
3512	static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
3513	{
3514	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
3515	int i, num = le16_to_cpu(dl_list->de_num_used);
3516
3517	for (i = `0`; i < (num - `1`); i++) {
3518	if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) !=
3519	le32_to_cpu(dl_list->de_entries[i + `1`].dx_major_hash))
3520	return `0`;
3521	}
3522
3523	return `1`;
3524	}
3525
3526	/*
3527	* Find the optimal value to split this leaf on. This expects the leaf
3528	* entries to be in sorted order.
3529	*
3530	* leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
3531	* the hash we want to insert.
3532	*
3533	* This function is only concerned with the major hash - that which
3534	* determines which cluster an item belongs to.
3535	*/
3536	static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
3537	u32 leaf_cpos, u32 insert_hash,
3538	u32 *split_hash)
3539	{
3540	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
3541	int i, num_used = le16_to_cpu(dl_list->de_num_used);
3542	int allsame;
3543
3544	/*
3545	* There's a couple rare, but nasty corner cases we have to
3546	* check for here. All of them involve a leaf where all value
3547	* have the same hash, which is what we look for first.
3548	*
3549	* Most of the time, all of the above is false, and we simply
3550	* pick the median value for a split.
3551	*/
3552	allsame = ocfs2_dx_leaf_same_major(dx_leaf);
3553	if (allsame) {
3554	u32 val = le32_to_cpu(dl_list->de_entries[`0`].dx_major_hash);
3555
3556	if (val == insert_hash) {
3557	/*
3558	* No matter where we would choose to split,
3559	* the new entry would want to occupy the same
3560	* block as these. Since there's no space left
3561	* in their existing block, we know there
3562	* won't be space after the split.
3563	*/
3564	return -ENOSPC;
3565	}
3566
3567	if (val == leaf_cpos) {
3568	/*
3569	* Because val is the same as leaf_cpos (which
3570	* is the smallest value this leaf can have),
3571	* yet is not equal to insert_hash, then we
3572	* know that insert_hash must be larger than
3573	* val (and leaf_cpos). At least cpos+1 in value.
3574	*
3575	* We also know then, that there cannot be an
3576	* adjacent extent (otherwise we'd be looking
3577	* at it). Choosing this value gives us a
3578	* chance to get some contiguousness.
3579	*/
3580	*split_hash = leaf_cpos + `1`;
3581	return `0`;
3582	}
3583
3584	if (val > insert_hash) {
3585	/*
3586	* val can not be the same as insert hash, and
3587	* also must be larger than leaf_cpos. Also,
3588	* we know that there can't be a leaf between
3589	* cpos and val, otherwise the entries with
3590	* hash 'val' would be there.
3591	*/
3592	*split_hash = val;
3593	return `0`;
3594	}
3595
3596	*split_hash = insert_hash;
3597	return `0`;
3598	}
3599
3600	/*
3601	* Since the records are sorted and the checks above
3602	* guaranteed that not all records in this block are the same,
3603	* we simple travel forward, from the median, and pick the 1st
3604	* record whose value is larger than leaf_cpos.
3605	*/
3606	for (i = (num_used / `2`); i < num_used; i++)
3607	if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) >
3608	leaf_cpos)
3609	break;
3610
3611	BUG_ON(i == num_used); / Should be impossible /
3612	*split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash);
3613	return `0`;
3614	}
3615
3616	/*
3617	* Transfer all entries in orig_dx_leaves whose major hash is equal to or
3618	* larger than split_hash into new_dx_leaves. We use a temporary
3619	* buffer (tmp_dx_leaf) to make the changes to the original leaf blocks.
3620	*
3621	* Since the block offset inside a leaf (cluster) is a constant mask
3622	* of minor_hash, we can optimize - an item at block offset X within
3623	* the original cluster, will be at offset X within the new cluster.
3624	*/
3625	static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
3626	handle_t *handle,
3627	struct ocfs2_dx_leaf *tmp_dx_leaf,
3628	struct buffer_head **orig_dx_leaves,
3629	struct buffer_head **new_dx_leaves,
3630	int num_dx_leaves)
3631	{
3632	int i, j, num_used;
3633	u32 major_hash;
3634	struct ocfs2_dx_leaf orig_dx_leaf, new_dx_leaf;
3635	struct ocfs2_dx_entry_list orig_list, tmp_list;
3636	struct ocfs2_dx_entry *dx_entry;
3637
3638	tmp_list = &tmp_dx_leaf->dl_list;
3639
3640	for (i = `0`; i < num_dx_leaves; i++) {
3641	orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
3642	orig_list = &orig_dx_leaf->dl_list;
3643	new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
3644
3645	num_used = le16_to_cpu(orig_list->de_num_used);
3646
3647	memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize);
3648	tmp_list->de_num_used = cpu_to_le16(`0`);
3649	memset(&tmp_list->de_entries, `0`, sizeof(dx_entry)num_used);
3650
3651	for (j = `0`; j < num_used; j++) {
3652	dx_entry = &orig_list->de_entries[j];
3653	major_hash = le32_to_cpu(dx_entry->dx_major_hash);
3654	if (major_hash >= split_hash)
3655	ocfs2_dx_dir_leaf_insert_tail(dx_leaf: new_dx_leaf,
3656	dx_new_entry: dx_entry);
3657	else
3658	ocfs2_dx_dir_leaf_insert_tail(dx_leaf: tmp_dx_leaf,
3659	dx_new_entry: dx_entry);
3660	}
3661	memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize);
3662
3663	ocfs2_journal_dirty(handle, bh: orig_dx_leaves[i]);
3664	ocfs2_journal_dirty(handle, bh: new_dx_leaves[i]);
3665	}
3666	}
3667
3668	static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
3669	struct ocfs2_dx_root_block *dx_root)
3670	{
3671	int credits = ocfs2_clusters_to_blocks(sb: osb->sb, clusters: `3`);
3672
3673	credits += ocfs2_calc_extend_credits(sb: osb->sb, root_el: &dx_root->dr_list);
3674	credits += ocfs2_quota_trans_credits(sb: osb->sb);
3675	return credits;
3676	}
3677
3678	/*
3679	* Find the median value in dx_leaf_bh and allocate a new leaf to move
3680	* half our entries into.
3681	*/
3682	static int ocfs2_dx_dir_rebalance(struct ocfs2_super osb, struct* inode *dir,
3683	struct buffer_head *dx_root_bh,
3684	struct buffer_head *dx_leaf_bh,
3685	struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos,
3686	u64 leaf_blkno)
3687	{
3688	struct ocfs2_dx_leaf dx_leaf = (struct* ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3689	int credits, ret, i, num_used, did_quota = `0`;
3690	u32 cpos, split_hash, insert_hash = hinfo->major_hash;
3691	u64 orig_leaves_start;
3692	int num_dx_leaves;
3693	struct buffer_head **orig_dx_leaves = NULL;
3694	struct buffer_head **new_dx_leaves = NULL;
3695	struct ocfs2_alloc_context data_ac = NULL, meta_ac = NULL;
3696	struct ocfs2_extent_tree et;
3697	handle_t *handle = NULL;
3698	struct ocfs2_dx_root_block *dx_root;
3699	struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
3700
3701	trace_ocfs2_dx_dir_rebalance(val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno,
3702	val2: (unsigned long long)leaf_blkno,
3703	val3: insert_hash);
3704
3705	ocfs2_init_dx_root_extent_tree(et: &et, ci: INODE_CACHE(inode: dir), bh: dx_root_bh);
3706
3707	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3708	/*
3709	* XXX: This is a rather large limit. We should use a more
3710	* realistic value.
3711	*/
3712	if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX)
3713	return -ENOSPC;
3714
3715	num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used);
3716	if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) {
3717	mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: "
3718	"%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno,
3719	(unsigned long long)leaf_blkno, num_used);
3720	ret = -EIO;
3721	goto out;
3722	}
3723
3724	orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb: osb->sb, ret_num_leaves: &num_dx_leaves);
3725	if (!orig_dx_leaves) {
3726	ret = -ENOMEM;
3727	mlog_errno(ret);
3728	goto out;
3729	}
3730
3731	new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb: osb->sb, NULL);
3732	if (!new_dx_leaves) {
3733	ret = -ENOMEM;
3734	mlog_errno(ret);
3735	goto out;
3736	}
3737
3738	ret = ocfs2_lock_allocators(inode: dir, et: &et, clusters_to_add: `1`, extents_to_split: `0`, data_ac: &data_ac, meta_ac: &meta_ac);
3739	if (ret) {
3740	if (ret != -ENOSPC)
3741	mlog_errno(ret);
3742	goto out;
3743	}
3744
3745	credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root);
3746	handle = ocfs2_start_trans(osb, max_buffs: credits);
3747	if (IS_ERR(ptr: handle)) {
3748	ret = PTR_ERR(ptr: handle);
3749	handle = NULL;
3750	mlog_errno(ret);
3751	goto out;
3752	}
3753
3754	ret = dquot_alloc_space_nodirty(inode: dir,
3755	nr: ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`));
3756	if (ret)
3757	goto out_commit;
3758	did_quota = `1`;
3759
3760	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir), bh: dx_leaf_bh,
3761	OCFS2_JOURNAL_ACCESS_WRITE);
3762	if (ret) {
3763	mlog_errno(ret);
3764	goto out_commit;
3765	}
3766
3767	/*
3768	* This block is changing anyway, so we can sort it in place.
3769	*/
3770	sort(base: dx_leaf->dl_list.de_entries, num: num_used,
3771	size: sizeof(struct ocfs2_dx_entry), cmp_func: dx_leaf_sort_cmp,
3772	swap_func: dx_leaf_sort_swap);
3773
3774	ocfs2_journal_dirty(handle, bh: dx_leaf_bh);
3775
3776	ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
3777	split_hash: &split_hash);
3778	if (ret) {
3779	mlog_errno(ret);
3780	goto out_commit;
3781	}
3782
3783	trace_ocfs2_dx_dir_rebalance_split(value1: leaf_cpos, value2: split_hash, value3: insert_hash);
3784
3785	/*
3786	* We have to carefully order operations here. There are items
3787	* which want to be in the new cluster before insert, but in
3788	* order to put those items in the new cluster, we alter the
3789	* old cluster. A failure to insert gets nasty.
3790	*
3791	* So, start by reserving writes to the old
3792	* cluster. ocfs2_dx_dir_new_cluster will reserve writes on
3793	* the new cluster for us, before inserting it. The insert
3794	* won't happen if there's an error before that. Once the
3795	* insert is done then, we can transfer from one leaf into the
3796	* other without fear of hitting any error.
3797	*/
3798
3799	/*
3800	* The leaf transfer wants some scratch space so that we don't
3801	* wind up doing a bunch of expensive memmove().
3802	*/
3803	tmp_dx_leaf = kmalloc(size: osb->sb->s_blocksize, GFP_NOFS);
3804	if (!tmp_dx_leaf) {
3805	ret = -ENOMEM;
3806	mlog_errno(ret);
3807	goto out_commit;
3808	}
3809
3810	orig_leaves_start = ocfs2_block_to_cluster_start(sb: dir->i_sb, blocks: leaf_blkno);
3811	ret = ocfs2_read_dx_leaves(dir, start: orig_leaves_start, num: num_dx_leaves,
3812	dx_leaf_bhs: orig_dx_leaves);
3813	if (ret) {
3814	mlog_errno(ret);
3815	goto out_commit;
3816	}
3817
3818	cpos = split_hash;
3819	ret = ocfs2_dx_dir_new_cluster(dir, et: &et, cpos, handle,
3820	data_ac, meta_ac, dx_leaves: new_dx_leaves,
3821	num_dx_leaves);
3822	if (ret) {
3823	mlog_errno(ret);
3824	goto out_commit;
3825	}
3826
3827	for (i = `0`; i < num_dx_leaves; i++) {
3828	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir),
3829	bh: orig_dx_leaves[i],
3830	OCFS2_JOURNAL_ACCESS_WRITE);
3831	if (ret) {
3832	mlog_errno(ret);
3833	goto out_commit;
3834	}
3835
3836	ret = ocfs2_journal_access_dl(handle, ci: INODE_CACHE(inode: dir),
3837	bh: new_dx_leaves[i],
3838	OCFS2_JOURNAL_ACCESS_WRITE);
3839	if (ret) {
3840	mlog_errno(ret);
3841	goto out_commit;
3842	}
3843	}
3844
3845	ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
3846	orig_dx_leaves, new_dx_leaves, num_dx_leaves);
3847
3848	out_commit:
3849	if (ret < `0` && did_quota)
3850	dquot_free_space_nodirty(inode: dir,
3851	nr: ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`));
3852
3853	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
3854	ocfs2_commit_trans(osb, handle);
3855
3856	out:
3857	if (orig_dx_leaves \|\| new_dx_leaves) {
3858	for (i = `0`; i < num_dx_leaves; i++) {
3859	if (orig_dx_leaves)
3860	brelse(bh: orig_dx_leaves[i]);
3861	if (new_dx_leaves)
3862	brelse(bh: new_dx_leaves[i]);
3863	}
3864	kfree(objp: orig_dx_leaves);
3865	kfree(objp: new_dx_leaves);
3866	}
3867
3868	if (meta_ac)
3869	ocfs2_free_alloc_context(ac: meta_ac);
3870	if (data_ac)
3871	ocfs2_free_alloc_context(ac: data_ac);
3872
3873	kfree(objp: tmp_dx_leaf);
3874	return ret;
3875	}
3876
3877	static int ocfs2_find_dir_space_dx(struct ocfs2_super osb, struct* inode *dir,
3878	struct buffer_head *di_bh,
3879	struct buffer_head *dx_root_bh,
3880	const char name, int* namelen,
3881	struct ocfs2_dir_lookup_result *lookup)
3882	{
3883	int ret, rebalanced = `0`;
3884	struct ocfs2_dx_root_block *dx_root;
3885	struct buffer_head *dx_leaf_bh = NULL;
3886	struct ocfs2_dx_leaf *dx_leaf;
3887	u64 blkno;
3888	u32 leaf_cpos;
3889
3890	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3891
3892	restart_search:
3893	ret = ocfs2_dx_dir_lookup(inode: dir, el: &dx_root->dr_list, hinfo: &lookup->dl_hinfo,
3894	ret_cpos: &leaf_cpos, ret_phys_blkno: &blkno);
3895	if (ret) {
3896	mlog_errno(ret);
3897	goto out;
3898	}
3899
3900	ret = ocfs2_read_dx_leaf(dir, blkno, dx_leaf_bh: &dx_leaf_bh);
3901	if (ret) {
3902	mlog_errno(ret);
3903	goto out;
3904	}
3905
3906	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3907
3908	if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
3909	le16_to_cpu(dx_leaf->dl_list.de_count)) {
3910	if (rebalanced) {
3911	/*
3912	* Rebalancing should have provided us with
3913	* space in an appropriate leaf.
3914	*
3915	* XXX: Is this an abnormal condition then?
3916	* Should we print a message here?
3917	*/
3918	ret = -ENOSPC;
3919	goto out;
3920	}
3921
3922	ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
3923	hinfo: &lookup->dl_hinfo, leaf_cpos,
3924	leaf_blkno: blkno);
3925	if (ret) {
3926	if (ret != -ENOSPC)
3927	mlog_errno(ret);
3928	goto out;
3929	}
3930
3931	/*
3932	* Restart the lookup. The rebalance might have
3933	* changed which block our item fits into. Mark our
3934	* progress, so we only execute this once.
3935	*/
3936	brelse(bh: dx_leaf_bh);
3937	dx_leaf_bh = NULL;
3938	rebalanced = `1`;
3939	goto restart_search;
3940	}
3941
3942	lookup->dl_dx_leaf_bh = dx_leaf_bh;
3943	dx_leaf_bh = NULL;
3944
3945	out:
3946	brelse(bh: dx_leaf_bh);
3947	return ret;
3948	}
3949
3950	static int ocfs2_search_dx_free_list(struct inode *dir,
3951	struct buffer_head *dx_root_bh,
3952	int namelen,
3953	struct ocfs2_dir_lookup_result *lookup)
3954	{
3955	int ret = -ENOSPC;
3956	struct buffer_head leaf_bh = NULL, prev_leaf_bh = NULL;
3957	struct ocfs2_dir_block_trailer *db;
3958	u64 next_block;
3959	int rec_len = OCFS2_DIR_REC_LEN(namelen);
3960	struct ocfs2_dx_root_block *dx_root;
3961
3962	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3963	next_block = le64_to_cpu(dx_root->dr_free_blk);
3964
3965	while (next_block) {
3966	brelse(bh: prev_leaf_bh);
3967	prev_leaf_bh = leaf_bh;
3968	leaf_bh = NULL;
3969
3970	ret = ocfs2_read_dir_block_direct(dir, phys: next_block, bh: &leaf_bh);
3971	if (ret) {
3972	mlog_errno(ret);
3973	goto out;
3974	}
3975
3976	db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
3977	if (rec_len <= le16_to_cpu(db->db_free_rec_len)) {
3978	lookup->dl_leaf_bh = leaf_bh;
3979	lookup->dl_prev_leaf_bh = prev_leaf_bh;
3980	leaf_bh = NULL;
3981	prev_leaf_bh = NULL;
3982	break;
3983	}
3984
3985	next_block = le64_to_cpu(db->db_free_next);
3986	}
3987
3988	if (!next_block)
3989	ret = -ENOSPC;
3990
3991	out:
3992
3993	brelse(bh: leaf_bh);
3994	brelse(bh: prev_leaf_bh);
3995	return ret;
3996	}
3997
3998	static int ocfs2_expand_inline_dx_root(struct inode *dir,
3999	struct buffer_head *dx_root_bh)
4000	{
4001	int ret, num_dx_leaves, i, j, did_quota = `0`;
4002	struct buffer_head **dx_leaves = NULL;
4003	struct ocfs2_extent_tree et;
4004	u64 insert_blkno;
4005	struct ocfs2_alloc_context *data_ac = NULL;
4006	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4007	handle_t *handle = NULL;
4008	struct ocfs2_dx_root_block *dx_root;
4009	struct ocfs2_dx_entry_list *entry_list;
4010	struct ocfs2_dx_entry *dx_entry;
4011	struct ocfs2_dx_leaf *target_leaf;
4012
4013	ret = ocfs2_reserve_clusters(osb, bits_wanted: `1`, ac: &data_ac);
4014	if (ret) {
4015	mlog_errno(ret);
4016	goto out;
4017	}
4018
4019	dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb: osb->sb, ret_num_leaves: &num_dx_leaves);
4020	if (!dx_leaves) {
4021	ret = -ENOMEM;
4022	mlog_errno(ret);
4023	goto out;
4024	}
4025
4026	handle = ocfs2_start_trans(osb, max_buffs: ocfs2_calc_dxi_expand_credits(sb: osb->sb));
4027	if (IS_ERR(ptr: handle)) {
4028	ret = PTR_ERR(ptr: handle);
4029	mlog_errno(ret);
4030	goto out;
4031	}
4032
4033	ret = dquot_alloc_space_nodirty(inode: dir,
4034	nr: ocfs2_clusters_to_bytes(sb: osb->sb, clusters: `1`));
4035	if (ret)
4036	goto out_commit;
4037	did_quota = `1`;
4038
4039	/*
4040	* We do this up front, before the allocation, so that a
4041	* failure to add the dx_root_bh to the journal won't result
4042	* us losing clusters.
4043	*/
4044	ret = ocfs2_journal_access_dr(handle, ci: INODE_CACHE(inode: dir), bh: dx_root_bh,
4045	OCFS2_JOURNAL_ACCESS_WRITE);
4046	if (ret) {
4047	mlog_errno(ret);
4048	goto out_commit;
4049	}
4050
4051	ret = __ocfs2_dx_dir_new_cluster(dir, cpos: `0`, handle, data_ac, dx_leaves,
4052	num_dx_leaves, ret_phys_blkno: &insert_blkno);
4053	if (ret) {
4054	mlog_errno(ret);
4055	goto out_commit;
4056	}
4057
4058	/*
4059	* Transfer the entries from our dx_root into the appropriate
4060	* block
4061	*/
4062	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4063	entry_list = &dx_root->dr_entries;
4064
4065	for (i = `0`; i < le16_to_cpu(entry_list->de_num_used); i++) {
4066	dx_entry = &entry_list->de_entries[i];
4067
4068	j = __ocfs2_dx_dir_hash_idx(osb,
4069	le32_to_cpu(dx_entry->dx_minor_hash));
4070	target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
4071
4072	ocfs2_dx_dir_leaf_insert_tail(dx_leaf: target_leaf, dx_new_entry: dx_entry);
4073
4074	/ Each leaf has been passed to the journal already*
4075	* via __ocfs2_dx_dir_new_cluster() */
4076	}
4077
4078	dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
4079	memset(&dx_root->dr_list, `0`, osb->sb->s_blocksize -
4080	offsetof(struct ocfs2_dx_root_block, dr_list));
4081	dx_root->dr_list.l_count =
4082	cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
4083
4084	/ This should never fail considering we start with an empty*
4085	* dx_root. */
4086	ocfs2_init_dx_root_extent_tree(et: &et, ci: INODE_CACHE(inode: dir), bh: dx_root_bh);
4087	ret = ocfs2_insert_extent(handle, et: &et, cpos: `0`, start_blk: insert_blkno, new_clusters: `1`, flags: `0`, NULL);
4088	if (ret)
4089	mlog_errno(ret);
4090	did_quota = `0`;
4091
4092	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
4093	ocfs2_journal_dirty(handle, bh: dx_root_bh);
4094
4095	out_commit:
4096	if (ret < `0` && did_quota)
4097	dquot_free_space_nodirty(inode: dir,
4098	nr: ocfs2_clusters_to_bytes(sb: dir->i_sb, clusters: `1`));
4099
4100	ocfs2_commit_trans(osb, handle);
4101
4102	out:
4103	if (data_ac)
4104	ocfs2_free_alloc_context(ac: data_ac);
4105
4106	if (dx_leaves) {
4107	for (i = `0`; i < num_dx_leaves; i++)
4108	brelse(bh: dx_leaves[i]);
4109	kfree(objp: dx_leaves);
4110	}
4111	return ret;
4112	}
4113
4114	static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
4115	{
4116	struct ocfs2_dx_root_block *dx_root;
4117	struct ocfs2_dx_entry_list *entry_list;
4118
4119	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4120	entry_list = &dx_root->dr_entries;
4121
4122	if (le16_to_cpu(entry_list->de_num_used) >=
4123	le16_to_cpu(entry_list->de_count))
4124	return -ENOSPC;
4125
4126	return `0`;
4127	}
4128
4129	static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
4130	struct buffer_head *di_bh,
4131	const char *name,
4132	int namelen,
4133	struct ocfs2_dir_lookup_result *lookup)
4134	{
4135	int ret, free_dx_root = `1`;
4136	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4137	struct buffer_head *dx_root_bh = NULL;
4138	struct buffer_head *leaf_bh = NULL;
4139	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
4140	struct ocfs2_dx_root_block *dx_root;
4141
4142	ret = ocfs2_read_dx_root(dir, di, dx_root_bh: &dx_root_bh);
4143	if (ret) {
4144	mlog_errno(ret);
4145	goto out;
4146	}
4147
4148	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4149	if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) {
4150	ret = -ENOSPC;
4151	mlog_errno(ret);
4152	goto out;
4153	}
4154
4155	if (ocfs2_dx_root_inline(dx_root)) {
4156	ret = ocfs2_inline_dx_has_space(dx_root_bh);
4157
4158	if (ret == `0`)
4159	goto search_el;
4160
4161	/*
4162	* We ran out of room in the root block. Expand it to
4163	* an extent, then allow ocfs2_find_dir_space_dx to do
4164	* the rest.
4165	*/
4166	ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
4167	if (ret) {
4168	mlog_errno(ret);
4169	goto out;
4170	}
4171	}
4172
4173	/*
4174	* Insert preparation for an indexed directory is split into two
4175	* steps. The call to find_dir_space_dx reserves room in the index for
4176	* an additional item. If we run out of space there, it's a real error
4177	* we can't continue on.
4178	*/
4179	ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name,
4180	namelen, lookup);
4181	if (ret) {
4182	mlog_errno(ret);
4183	goto out;
4184	}
4185
4186	search_el:
4187	/*
4188	* Next, we need to find space in the unindexed tree. This call
4189	* searches using the free space linked list. If the unindexed tree
4190	* lacks sufficient space, we'll expand it below. The expansion code
4191	* is smart enough to add any new blocks to the free space list.
4192	*/
4193	ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup);
4194	if (ret && ret != -ENOSPC) {
4195	mlog_errno(ret);
4196	goto out;
4197	}
4198
4199	/ Do this up here - ocfs2_extend_dir might need the dx_root /
4200	lookup->dl_dx_root_bh = dx_root_bh;
4201	free_dx_root = `0`;
4202
4203	if (ret == -ENOSPC) {
4204	ret = ocfs2_extend_dir(osb, dir, parent_fe_bh: di_bh, blocks_wanted: `1`, lookup, new_de_bh: &leaf_bh);
4205
4206	if (ret) {
4207	mlog_errno(ret);
4208	goto out;
4209	}
4210
4211	/*
4212	* We make the assumption here that new leaf blocks are added
4213	* to the front of our free list.
4214	*/
4215	lookup->dl_prev_leaf_bh = NULL;
4216	lookup->dl_leaf_bh = leaf_bh;
4217	}
4218
4219	out:
4220	if (free_dx_root)
4221	brelse(bh: dx_root_bh);
4222	return ret;
4223	}
4224
4225	/*
4226	* Get a directory ready for insert. Any directory allocation required
4227	* happens here. Success returns zero, and enough context in the dir
4228	* lookup result that ocfs2_add_entry() will be able complete the task
4229	* with minimal performance impact.
4230	*/
4231	int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
4232	struct inode *dir,
4233	struct buffer_head *parent_fe_bh,
4234	const char *name,
4235	int namelen,
4236	struct ocfs2_dir_lookup_result *lookup)
4237	{
4238	int ret;
4239	unsigned int blocks_wanted = `1`;
4240	struct buffer_head *bh = NULL;
4241
4242	trace_ocfs2_prepare_dir_for_insert(
4243	val1: (unsigned long long)OCFS2_I(inode: dir)->ip_blkno, val2: namelen);
4244
4245	/*
4246	* Do this up front to reduce confusion.
4247	*
4248	* The directory might start inline, then be turned into an
4249	* indexed one, in which case we'd need to hash deep inside
4250	* ocfs2_find_dir_space_id(). Since
4251	* ocfs2_prepare_dx_dir_for_insert() also needs this hash
4252	* done, there seems no point in spreading out the calls. We
4253	* can optimize away the case where the file system doesn't
4254	* support indexing.
4255	*/
4256	if (ocfs2_supports_indexed_dirs(osb))
4257	ocfs2_dx_dir_name_hash(dir, name, len: namelen, hinfo: &lookup->dl_hinfo);
4258
4259	if (ocfs2_dir_indexed(inode: dir)) {
4260	ret = ocfs2_prepare_dx_dir_for_insert(dir, di_bh: parent_fe_bh,
4261	name, namelen, lookup);
4262	if (ret)
4263	mlog_errno(ret);
4264	goto out;
4265	}
4266
4267	if (OCFS2_I(inode: dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
4268	ret = ocfs2_find_dir_space_id(dir, di_bh: parent_fe_bh, name,
4269	namelen, ret_de_bh: &bh, blocks_wanted: &blocks_wanted);
4270	} else
4271	ret = ocfs2_find_dir_space_el(dir, name, namelen, ret_de_bh: &bh);
4272
4273	if (ret && ret != -ENOSPC) {
4274	mlog_errno(ret);
4275	goto out;
4276	}
4277
4278	if (ret == -ENOSPC) {
4279	/*
4280	* We have to expand the directory to add this name.
4281	*/
4282	BUG_ON(bh);
4283
4284	ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
4285	lookup, new_de_bh: &bh);
4286	if (ret) {
4287	if (ret != -ENOSPC)
4288	mlog_errno(ret);
4289	goto out;
4290	}
4291
4292	BUG_ON(!bh);
4293	}
4294
4295	lookup->dl_leaf_bh = bh;
4296	bh = NULL;
4297	out:
4298	brelse(bh);
4299	return ret;
4300	}
4301
4302	static int ocfs2_dx_dir_remove_index(struct inode *dir,
4303	struct buffer_head *di_bh,
4304	struct buffer_head *dx_root_bh)
4305	{
4306	int ret;
4307	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4308	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
4309	struct ocfs2_dx_root_block *dx_root;
4310	struct inode *dx_alloc_inode = NULL;
4311	struct buffer_head *dx_alloc_bh = NULL;
4312	handle_t *handle;
4313	u64 blk;
4314	u16 bit;
4315	u64 bg_blkno;
4316
4317	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4318
4319	dx_alloc_inode = ocfs2_get_system_file_inode(osb,
4320	type: EXTENT_ALLOC_SYSTEM_INODE,
4321	le16_to_cpu(dx_root->dr_suballoc_slot));
4322	if (!dx_alloc_inode) {
4323	ret = -ENOMEM;
4324	mlog_errno(ret);
4325	goto out;
4326	}
4327	inode_lock(inode: dx_alloc_inode);
4328
4329	ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, `1`);
4330	if (ret) {
4331	mlog_errno(ret);
4332	goto out_mutex;
4333	}
4334
4335	handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS);
4336	if (IS_ERR(ptr: handle)) {
4337	ret = PTR_ERR(ptr: handle);
4338	mlog_errno(ret);
4339	goto out_unlock;
4340	}
4341
4342	ret = ocfs2_journal_access_di(handle, ci: INODE_CACHE(inode: dir), bh: di_bh,
4343	OCFS2_JOURNAL_ACCESS_WRITE);
4344	if (ret) {
4345	mlog_errno(ret);
4346	goto out_commit;
4347	}
4348
4349	spin_lock(lock: &OCFS2_I(inode: dir)->ip_lock);
4350	OCFS2_I(inode: dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4351	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4352	spin_unlock(lock: &OCFS2_I(inode: dir)->ip_lock);
4353	di->i_dx_root = cpu_to_le64(`0ULL`);
4354	ocfs2_update_inode_fsync_trans(handle, inode: dir, datasync: `1`);
4355
4356	ocfs2_journal_dirty(handle, bh: di_bh);
4357
4358	blk = le64_to_cpu(dx_root->dr_blkno);
4359	bit = le16_to_cpu(dx_root->dr_suballoc_bit);
4360	if (dx_root->dr_suballoc_loc)
4361	bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
4362	else
4363	bg_blkno = ocfs2_which_suballoc_group(block: blk, bit);
4364	ret = ocfs2_free_suballoc_bits(handle, alloc_inode: dx_alloc_inode, alloc_bh: dx_alloc_bh,
4365	start_bit: bit, bg_blkno, count: `1`);
4366	if (ret)
4367	mlog_errno(ret);
4368
4369	out_commit:
4370	ocfs2_commit_trans(osb, handle);
4371
4372	out_unlock:
4373	ocfs2_inode_unlock(inode: dx_alloc_inode, ex: `1`);
4374
4375	out_mutex:
4376	inode_unlock(inode: dx_alloc_inode);
4377	brelse(bh: dx_alloc_bh);
4378	out:
4379	iput(dx_alloc_inode);
4380	return ret;
4381	}
4382
4383	int ocfs2_dx_dir_truncate(struct inode dir, struct* buffer_head *di_bh)
4384	{
4385	int ret;
4386	unsigned int clen;
4387	u32 major_hash = UINT_MAX, p_cpos, cpos;
4388	u64 blkno;
4389	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4390	struct buffer_head *dx_root_bh = NULL;
4391	struct ocfs2_dx_root_block *dx_root;
4392	struct ocfs2_dinode di = (struct* ocfs2_dinode *)di_bh->b_data;
4393	struct ocfs2_cached_dealloc_ctxt dealloc;
4394	struct ocfs2_extent_tree et;
4395
4396	ocfs2_init_dealloc_ctxt(c: &dealloc);
4397
4398	if (!ocfs2_dir_indexed(inode: dir))
4399	return `0`;
4400
4401	ret = ocfs2_read_dx_root(dir, di, dx_root_bh: &dx_root_bh);
4402	if (ret) {
4403	mlog_errno(ret);
4404	goto out;
4405	}
4406	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4407
4408	if (ocfs2_dx_root_inline(dx_root))
4409	goto remove_index;
4410
4411	ocfs2_init_dx_root_extent_tree(et: &et, ci: INODE_CACHE(inode: dir), bh: dx_root_bh);
4412
4413	/ XXX: What if dr_clusters is too large? /
4414	while (le32_to_cpu(dx_root->dr_clusters)) {
4415	ret = ocfs2_dx_dir_lookup_rec(inode: dir, el: &dx_root->dr_list,
4416	major_hash, ret_cpos: &cpos, ret_phys_blkno: &blkno, ret_clen: &clen);
4417	if (ret) {
4418	mlog_errno(ret);
4419	goto out;
4420	}
4421
4422	p_cpos = ocfs2_blocks_to_clusters(sb: dir->i_sb, blocks: blkno);
4423
4424	ret = ocfs2_remove_btree_range(inode: dir, et: &et, cpos, phys_cpos: p_cpos, len: clen, flags: `0`,
4425	dealloc: &dealloc, refcount_loc: `0`, refcount_tree_locked: false);
4426	if (ret) {
4427	mlog_errno(ret);
4428	goto out;
4429	}
4430
4431	if (cpos == `0`)
4432	break;
4433
4434	major_hash = cpos - `1`;
4435	}
4436
4437	remove_index:
4438	ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
4439	if (ret) {
4440	mlog_errno(ret);
4441	goto out;
4442	}
4443
4444	ocfs2_remove_from_cache(ci: INODE_CACHE(inode: dir), bh: dx_root_bh);
4445	out:
4446	ocfs2_schedule_truncate_log_flush(osb, cancel: `1`);
4447	ocfs2_run_deallocs(osb, ctxt: &dealloc);
4448
4449	brelse(bh: dx_root_bh);
4450	return ret;
4451	}
4452

source code of linux/fs/ocfs2/dir.c