util.c source code [linux/fs/gfs2/util.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4	* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
5	*/
6
7	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9	#include <linux/spinlock.h>
10	#include <linux/completion.h>
11	#include <linux/buffer_head.h>
12	#include <linux/kthread.h>
13	#include <linux/crc32.h>
14	#include <linux/gfs2_ondisk.h>
15	#include <linux/delay.h>
16	#include <linux/uaccess.h>
17
18	#include "gfs2.h"
19	#include "incore.h"
20	#include "glock.h"
21	#include "glops.h"
22	#include "log.h"
23	#include "lops.h"
24	#include "recovery.h"
25	#include "rgrp.h"
26	#include "super.h"
27	#include "util.h"
28
29	struct kmem_cache *gfs2_glock_cachep __read_mostly;
30	struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
31	struct kmem_cache *gfs2_inode_cachep __read_mostly;
32	struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
33	struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
34	struct kmem_cache *gfs2_quotad_cachep __read_mostly;
35	struct kmem_cache *gfs2_qadata_cachep __read_mostly;
36	struct kmem_cache *gfs2_trans_cachep __read_mostly;
37	mempool_t *gfs2_page_pool __read_mostly;
38
39	void gfs2_assert_i(struct gfs2_sbd *sdp)
40	{
41	fs_emerg(sdp, "fatal assertion failed\n");
42	}
43
44	/**
45	* check_journal_clean - Make sure a journal is clean for a spectator mount
46	* @sdp: The GFS2 superblock
47	* @jd: The journal descriptor
48	* @verbose: Show more prints in the log
49	*
50	* Returns: 0 if the journal is clean or locked, else an error
51	*/
52	int check_journal_clean(struct gfs2_sbd sdp, struct* gfs2_jdesc *jd,
53	bool verbose)
54	{
55	int error;
56	struct gfs2_holder j_gh;
57	struct gfs2_log_header_host head;
58	struct gfs2_inode *ip;
59
60	ip = GFS2_I(inode: jd->jd_inode);
61	error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP \|
62	GL_EXACT \| GL_NOCACHE, gh: &j_gh);
63	if (error) {
64	if (verbose)
65	fs_err(sdp, "Error %d locking journal for spectator "
66	"mount.\n", error);
67	return -EPERM;
68	}
69	error = gfs2_jdesc_check(jd);
70	if (error) {
71	if (verbose)
72	fs_err(sdp, "Error checking journal for spectator "
73	"mount.\n");
74	goto out_unlock;
75	}
76	error = gfs2_find_jhead(jd, head: &head, keep_cache: false);
77	if (error) {
78	if (verbose)
79	fs_err(sdp, "Error parsing journal for spectator "
80	"mount.\n");
81	goto out_unlock;
82	}
83	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
84	error = -EPERM;
85	if (verbose)
86	fs_err(sdp, "jid=%u: Journal is dirty, so the first "
87	"mounter must not be a spectator.\n",
88	jd->jd_jid);
89	}
90
91	out_unlock:
92	gfs2_glock_dq_uninit(gh: &j_gh);
93	return error;
94	}
95
96	/**
97	* gfs2_freeze_lock_shared - hold the freeze glock
98	* @sdp: the superblock
99	*/
100	int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
101	{
102	int error;
103
104	error = gfs2_glock_nq_init(gl: sdp->sd_freeze_gl, LM_ST_SHARED,
105	LM_FLAG_NOEXP \| GL_EXACT,
106	gh: &sdp->sd_freeze_gh);
107	if (error)
108	fs_err(sdp, "can't lock the freeze glock: %d\n", error);
109	return error;
110	}
111
112	void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
113	{
114	if (gfs2_holder_initialized(gh: freeze_gh))
115	gfs2_glock_dq_uninit(gh: freeze_gh);
116	}
117
118	static void signal_our_withdraw(struct gfs2_sbd *sdp)
119	{
120	struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
121	struct inode *inode;
122	struct gfs2_inode *ip;
123	struct gfs2_glock *i_gl;
124	u64 no_formal_ino;
125	int ret = `0`;
126	int tries;
127
128	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) \|\| !sdp->sd_jdesc)
129	return;
130
131	gfs2_ail_drain(sdp); / frees all transactions /
132	inode = sdp->sd_jdesc->jd_inode;
133	ip = GFS2_I(inode);
134	i_gl = ip->i_gl;
135	no_formal_ino = ip->i_no_formal_ino;
136
137	/ Prevent any glock dq until withdraw recovery is complete /
138	set_bit(nr: SDF_WITHDRAW_RECOVERY, addr: &sdp->sd_flags);
139	/*
140	* Don't tell dlm we're bailing until we have no more buffers in the
141	* wind. If journal had an IO error, the log code should just purge
142	* the outstanding buffers rather than submitting new IO. Making the
143	* file system read-only will flush the journal, etc.
144	*
145	* During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
146	* which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
147	* any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
148	* therefore we need to clear SDF_JOURNAL_LIVE manually.
149	*/
150	clear_bit(nr: SDF_JOURNAL_LIVE, addr: &sdp->sd_flags);
151	if (!sb_rdonly(sb: sdp->sd_vfs)) {
152	bool locked = mutex_trylock(lock: &sdp->sd_freeze_mutex);
153
154	wake_up(&sdp->sd_logd_waitq);
155	wake_up(&sdp->sd_quota_wait);
156
157	wait_event_timeout(sdp->sd_log_waitq,
158	gfs2_log_is_empty(sdp),
159	HZ * `5`);
160
161	sdp->sd_vfs->s_flags \|= SB_RDONLY;
162
163	if (locked)
164	mutex_unlock(lock: &sdp->sd_freeze_mutex);
165
166	/*
167	* Dequeue any pending non-system glock holders that can no
168	* longer be granted because the file system is withdrawn.
169	*/
170	gfs2_gl_dq_holders(sdp);
171	}
172
173	if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { / lock_nolock /
174	if (!ret)
175	ret = -EIO;
176	clear_bit(nr: SDF_WITHDRAW_RECOVERY, addr: &sdp->sd_flags);
177	goto skip_recovery;
178	}
179	/*
180	* Drop the glock for our journal so another node can recover it.
181	*/
182	if (gfs2_holder_initialized(gh: &sdp->sd_journal_gh)) {
183	gfs2_glock_dq_wait(gh: &sdp->sd_journal_gh);
184	gfs2_holder_uninit(gh: &sdp->sd_journal_gh);
185	}
186	sdp->sd_jinode_gh.gh_flags \|= GL_NOCACHE;
187	gfs2_glock_dq(gh: &sdp->sd_jinode_gh);
188	gfs2_thaw_freeze_initiator(sb: sdp->sd_vfs);
189	wait_on_bit(word: &i_gl->gl_flags, bit: GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
190
191	/*
192	* holder_uninit to force glock_put, to force dlm to let go
193	*/
194	gfs2_holder_uninit(gh: &sdp->sd_jinode_gh);
195
196	/*
197	* Note: We need to be careful here:
198	* Our iput of jd_inode will evict it. The evict will dequeue its
199	* glock, but the glock dq will wait for the withdraw unless we have
200	* exception code in glock_dq.
201	*/
202	iput(inode);
203	sdp->sd_jdesc->jd_inode = NULL;
204	/*
205	* Wait until the journal inode's glock is freed. This allows try locks
206	* on other nodes to be successful, otherwise we remain the owner of
207	* the glock as far as dlm is concerned.
208	*/
209	if (i_gl->gl_ops->go_free) {
210	set_bit(nr: GLF_FREEING, addr: &i_gl->gl_flags);
211	wait_on_bit(word: &i_gl->gl_flags, bit: GLF_FREEING, TASK_UNINTERRUPTIBLE);
212	}
213
214	/*
215	* Dequeue the "live" glock, but keep a reference so it's never freed.
216	*/
217	gfs2_glock_hold(gl: live_gl);
218	gfs2_glock_dq_wait(gh: &sdp->sd_live_gh);
219	/*
220	* We enqueue the "live" glock in EX so that all other nodes
221	* get a demote request and act on it. We don't really want the
222	* lock in EX, so we send a "try" lock with 1CB to produce a callback.
223	*/
224	fs_warn(sdp, "Requesting recovery of jid %d.\n",
225	sdp->sd_lockstruct.ls_jid);
226	gfs2_holder_reinit(LM_ST_EXCLUSIVE,
227	LM_FLAG_TRY_1CB \| LM_FLAG_NOEXP \| GL_NOPID,
228	gh: &sdp->sd_live_gh);
229	msleep(GL_GLOCK_MAX_HOLD);
230	/*
231	* This will likely fail in a cluster, but succeed standalone:
232	*/
233	ret = gfs2_glock_nq(gh: &sdp->sd_live_gh);
234
235	/*
236	* If we actually got the "live" lock in EX mode, there are no other
237	* nodes available to replay our journal. So we try to replay it
238	* ourselves. We hold the "live" glock to prevent other mounters
239	* during recovery, then just dequeue it and reacquire it in our
240	* normal SH mode. Just in case the problem that caused us to
241	* withdraw prevents us from recovering our journal (e.g. io errors
242	* and such) we still check if the journal is clean before proceeding
243	* but we may wait forever until another mounter does the recovery.
244	*/
245	if (ret == `0`) {
246	fs_warn(sdp, "No other mounters found. Trying to recover our "
247	"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
248	if (gfs2_recover_journal(gfs2_jd: sdp->sd_jdesc, wait: `1`))
249	fs_warn(sdp, "Unable to recover our journal jid %d.\n",
250	sdp->sd_lockstruct.ls_jid);
251	gfs2_glock_dq_wait(gh: &sdp->sd_live_gh);
252	gfs2_holder_reinit(LM_ST_SHARED,
253	LM_FLAG_NOEXP \| GL_EXACT \| GL_NOPID,
254	gh: &sdp->sd_live_gh);
255	gfs2_glock_nq(gh: &sdp->sd_live_gh);
256	}
257
258	gfs2_glock_queue_put(gl: live_gl); / drop extra reference we acquired /
259	clear_bit(nr: SDF_WITHDRAW_RECOVERY, addr: &sdp->sd_flags);
260
261	/*
262	* At this point our journal is evicted, so we need to get a new inode
263	* for it. Once done, we need to call gfs2_find_jhead which
264	* calls gfs2_map_journal_extents to map it for us again.
265	*
266	* Note that we don't really want it to look up a FREE block. The
267	* GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
268	* which would otherwise fail because it requires grabbing an rgrp
269	* glock, which would fail with -EIO because we're withdrawing.
270	*/
271	inode = gfs2_inode_lookup(sb: sdp->sd_vfs, DT_UNKNOWN,
272	no_addr: sdp->sd_jdesc->jd_no_addr, no_formal_ino,
273	GFS2_BLKST_FREE);
274	if (IS_ERR(ptr: inode)) {
275	fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
276	sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
277	goto skip_recovery;
278	}
279	sdp->sd_jdesc->jd_inode = inode;
280	d_mark_dontcache(inode);
281
282	/*
283	* Now wait until recovery is complete.
284	*/
285	for (tries = `0`; tries < `10`; tries++) {
286	ret = check_journal_clean(sdp, jd: sdp->sd_jdesc, verbose: false);
287	if (!ret)
288	break;
289	msleep(HZ);
290	fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
291	sdp->sd_lockstruct.ls_jid);
292	}
293	skip_recovery:
294	if (!ret)
295	fs_warn(sdp, "Journal recovery complete for jid %d.\n",
296	sdp->sd_lockstruct.ls_jid);
297	else
298	fs_warn(sdp, "Journal recovery skipped for jid %d until next "
299	"mount.\n", sdp->sd_lockstruct.ls_jid);
300	fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
301	sdp->sd_glock_dqs_held = `0`;
302	wake_up_bit(word: &sdp->sd_flags, bit: SDF_WITHDRAW_RECOVERY);
303	}
304
305	void gfs2_lm(struct gfs2_sbd sdp, const* char *fmt, ...)
306	{
307	struct va_format vaf;
308	va_list args;
309
310	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
311	test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
312	return;
313
314	va_start(args, fmt);
315	vaf.fmt = fmt;
316	vaf.va = &args;
317	fs_err(sdp, "%pV", &vaf);
318	va_end(args);
319	}
320
321	int gfs2_withdraw(struct gfs2_sbd *sdp)
322	{
323	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
324	const struct lm_lockops *lm = ls->ls_ops;
325
326	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
327	unsigned long old = READ_ONCE(sdp->sd_flags), new;
328
329	do {
330	if (old & BIT(SDF_WITHDRAWN)) {
331	wait_on_bit(word: &sdp->sd_flags,
332	bit: SDF_WITHDRAW_IN_PROG,
333	TASK_UNINTERRUPTIBLE);
334	return -`1`;
335	}
336	new = old \| BIT(SDF_WITHDRAWN) \| BIT(SDF_WITHDRAW_IN_PROG);
337	} while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));
338
339	fs_err(sdp, "about to withdraw this file system\n");
340	BUG_ON(sdp->sd_args.ar_debug);
341
342	signal_our_withdraw(sdp);
343
344	kobject_uevent(kobj: &sdp->sd_kobj, action: KOBJ_OFFLINE);
345
346	if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
347	wait_for_completion(&sdp->sd_wdack);
348
349	if (lm->lm_unmount) {
350	fs_err(sdp, "telling LM to unmount\n");
351	lm->lm_unmount(sdp);
352	}
353	set_bit(nr: SDF_SKIP_DLM_UNLOCK, addr: &sdp->sd_flags);
354	fs_err(sdp, "File system withdrawn\n");
355	dump_stack();
356	clear_bit(nr: SDF_WITHDRAW_IN_PROG, addr: &sdp->sd_flags);
357	smp_mb__after_atomic();
358	wake_up_bit(word: &sdp->sd_flags, bit: SDF_WITHDRAW_IN_PROG);
359	}
360
361	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
362	panic(fmt: "GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
363
364	return -`1`;
365	}
366
367	/*
368	* gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
369	*/
370
371	void gfs2_assert_withdraw_i(struct gfs2_sbd sdp, char* *assertion,
372	const char function, char* file, unsigned* int line,
373	bool delayed)
374	{
375	if (gfs2_withdrawing_or_withdrawn(sdp))
376	return;
377
378	fs_err(sdp,
379	"fatal: assertion \"%s\" failed\n"
380	" function = %s, file = %s, line = %u\n",
381	assertion, function, file, line);
382
383	/*
384	* If errors=panic was specified on mount, it won't help to delay the
385	* withdraw.
386	*/
387	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
388	delayed = false;
389
390	if (delayed)
391	gfs2_withdraw_delayed(sdp);
392	else
393	gfs2_withdraw(sdp);
394	dump_stack();
395	}
396
397	/*
398	* gfs2_assert_warn_i - Print a message to the console if @assertion is false
399	*/
400
401	void gfs2_assert_warn_i(struct gfs2_sbd sdp, char* *assertion,
402	const char function, char* file, unsigned* int line)
403	{
404	if (time_before(jiffies,
405	sdp->sd_last_warning +
406	gfs2_tune_get(sdp, gt_complain_secs) * HZ))
407	return;
408
409	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
410	fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
411	assertion, function, file, line);
412
413	if (sdp->sd_args.ar_debug)
414	BUG();
415	else
416	dump_stack();
417
418	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
419	panic(fmt: "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
420	"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
421	sdp->sd_fsname, assertion,
422	sdp->sd_fsname, function, file, line);
423
424	sdp->sd_last_warning = jiffies;
425	}
426
427	/*
428	* gfs2_consist_i - Flag a filesystem consistency error and withdraw
429	*/
430
431	void gfs2_consist_i(struct gfs2_sbd sdp, const* char *function,
432	char file, unsigned* int line)
433	{
434	gfs2_lm(sdp,
435	fmt: "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
436	function, file, line);
437	gfs2_withdraw(sdp);
438	}
439
440	/*
441	* gfs2_consist_inode_i - Flag an inode consistency error and withdraw
442	*/
443
444	void gfs2_consist_inode_i(struct gfs2_inode *ip,
445	const char function, char* file, unsigned* int line)
446	{
447	struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode);
448
449	gfs2_lm(sdp,
450	fmt: "fatal: filesystem consistency error\n"
451	" inode = %llu %llu\n"
452	" function = %s, file = %s, line = %u\n",
453	(unsigned long long)ip->i_no_formal_ino,
454	(unsigned long long)ip->i_no_addr,
455	function, file, line);
456	gfs2_dump_glock(NULL, gl: ip->i_gl, fsid: `1`);
457	gfs2_withdraw(sdp);
458	}
459
460	/*
461	* gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
462	*/
463
464	void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
465	const char function, char* file, unsigned* int line)
466	{
467	struct gfs2_sbd *sdp = rgd->rd_sbd;
468	char fs_id_buf[sizeof(sdp->sd_fsname) + `7`];
469
470	sprintf(buf: fs_id_buf, fmt: "fsid=%s: ", sdp->sd_fsname);
471	gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
472	gfs2_lm(sdp,
473	fmt: "fatal: filesystem consistency error\n"
474	" RG = %llu\n"
475	" function = %s, file = %s, line = %u\n",
476	(unsigned long long)rgd->rd_addr,
477	function, file, line);
478	gfs2_dump_glock(NULL, gl: rgd->rd_gl, fsid: `1`);
479	gfs2_withdraw(sdp);
480	}
481
482	/*
483	* gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
484	* Returns: -1 if this call withdrew the machine,
485	* -2 if it was already withdrawn
486	*/
487
488	int gfs2_meta_check_ii(struct gfs2_sbd sdp, struct* buffer_head *bh,
489	const char type, const* char function, char* *file,
490	unsigned int line)
491	{
492	int me;
493
494	gfs2_lm(sdp,
495	fmt: "fatal: invalid metadata block\n"
496	" bh = %llu (%s)\n"
497	" function = %s, file = %s, line = %u\n",
498	(unsigned long long)bh->b_blocknr, type,
499	function, file, line);
500	me = gfs2_withdraw(sdp);
501	return (me) ? -`1` : -`2`;
502	}
503
504	/*
505	* gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
506	* Returns: -1 if this call withdrew the machine,
507	* -2 if it was already withdrawn
508	*/
509
510	int gfs2_metatype_check_ii(struct gfs2_sbd sdp, struct* buffer_head *bh,
511	u16 type, u16 t, const char *function,
512	char file, unsigned* int line)
513	{
514	int me;
515
516	gfs2_lm(sdp,
517	fmt: "fatal: invalid metadata block\n"
518	" bh = %llu (type: exp=%u, found=%u)\n"
519	" function = %s, file = %s, line = %u\n",
520	(unsigned long long)bh->b_blocknr, type, t,
521	function, file, line);
522	me = gfs2_withdraw(sdp);
523	return (me) ? -`1` : -`2`;
524	}
525
526	/*
527	* gfs2_io_error_i - Flag an I/O error and withdraw
528	* Returns: -1 if this call withdrew the machine,
529	* 0 if it was already withdrawn
530	*/
531
532	int gfs2_io_error_i(struct gfs2_sbd sdp, const* char function, char* *file,
533	unsigned int line)
534	{
535	gfs2_lm(sdp,
536	fmt: "fatal: I/O error\n"
537	" function = %s, file = %s, line = %u\n",
538	function, file, line);
539	return gfs2_withdraw(sdp);
540	}
541
542	/*
543	* gfs2_io_error_bh_i - Flag a buffer I/O error
544	* @withdraw: withdraw the filesystem
545	*/
546
547	void gfs2_io_error_bh_i(struct gfs2_sbd sdp, struct* buffer_head *bh,
548	const char function, char* file, unsigned* int line,
549	bool withdraw)
550	{
551	if (gfs2_withdrawing_or_withdrawn(sdp))
552	return;
553
554	fs_err(sdp, "fatal: I/O error\n"
555	" block = %llu\n"
556	" function = %s, file = %s, line = %u\n",
557	(unsigned long long)bh->b_blocknr, function, file, line);
558	if (withdraw)
559	gfs2_withdraw(sdp);
560	}
561
562

source code of linux/fs/gfs2/util.c