1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
4 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
8 | |
9 | #include <linux/bio.h> |
10 | #include <linux/sched/signal.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/spinlock.h> |
13 | #include <linux/completion.h> |
14 | #include <linux/buffer_head.h> |
15 | #include <linux/statfs.h> |
16 | #include <linux/seq_file.h> |
17 | #include <linux/mount.h> |
18 | #include <linux/kthread.h> |
19 | #include <linux/delay.h> |
20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/crc32.h> |
22 | #include <linux/time.h> |
23 | #include <linux/wait.h> |
24 | #include <linux/writeback.h> |
25 | #include <linux/backing-dev.h> |
26 | #include <linux/kernel.h> |
27 | |
28 | #include "gfs2.h" |
29 | #include "incore.h" |
30 | #include "bmap.h" |
31 | #include "dir.h" |
32 | #include "glock.h" |
33 | #include "glops.h" |
34 | #include "inode.h" |
35 | #include "log.h" |
36 | #include "meta_io.h" |
37 | #include "quota.h" |
38 | #include "recovery.h" |
39 | #include "rgrp.h" |
40 | #include "super.h" |
41 | #include "trans.h" |
42 | #include "util.h" |
43 | #include "sys.h" |
44 | #include "xattr.h" |
45 | #include "lops.h" |
46 | |
47 | enum dinode_demise { |
48 | SHOULD_DELETE_DINODE, |
49 | SHOULD_NOT_DELETE_DINODE, |
50 | SHOULD_DEFER_EVICTION, |
51 | }; |
52 | |
53 | /** |
54 | * gfs2_jindex_free - Clear all the journal index information |
55 | * @sdp: The GFS2 superblock |
56 | * |
57 | */ |
58 | |
59 | void gfs2_jindex_free(struct gfs2_sbd *sdp) |
60 | { |
61 | struct list_head list; |
62 | struct gfs2_jdesc *jd; |
63 | |
64 | spin_lock(lock: &sdp->sd_jindex_spin); |
65 | list_add(new: &list, head: &sdp->sd_jindex_list); |
66 | list_del_init(entry: &sdp->sd_jindex_list); |
67 | sdp->sd_journals = 0; |
68 | spin_unlock(lock: &sdp->sd_jindex_spin); |
69 | |
70 | sdp->sd_jdesc = NULL; |
71 | while (!list_empty(head: &list)) { |
72 | jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); |
73 | gfs2_free_journal_extents(jd); |
74 | list_del(entry: &jd->jd_list); |
75 | iput(jd->jd_inode); |
76 | jd->jd_inode = NULL; |
77 | kfree(objp: jd); |
78 | } |
79 | } |
80 | |
81 | static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) |
82 | { |
83 | struct gfs2_jdesc *jd; |
84 | |
85 | list_for_each_entry(jd, head, jd_list) { |
86 | if (jd->jd_jid == jid) |
87 | return jd; |
88 | } |
89 | return NULL; |
90 | } |
91 | |
92 | struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) |
93 | { |
94 | struct gfs2_jdesc *jd; |
95 | |
96 | spin_lock(lock: &sdp->sd_jindex_spin); |
97 | jd = jdesc_find_i(head: &sdp->sd_jindex_list, jid); |
98 | spin_unlock(lock: &sdp->sd_jindex_spin); |
99 | |
100 | return jd; |
101 | } |
102 | |
103 | int gfs2_jdesc_check(struct gfs2_jdesc *jd) |
104 | { |
105 | struct gfs2_inode *ip = GFS2_I(inode: jd->jd_inode); |
106 | struct gfs2_sbd *sdp = GFS2_SB(inode: jd->jd_inode); |
107 | u64 size = i_size_read(inode: jd->jd_inode); |
108 | |
109 | if (gfs2_check_internal_file_size(inode: jd->jd_inode, minsize: 8 << 20, BIT(30))) |
110 | return -EIO; |
111 | |
112 | jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; |
113 | |
114 | if (gfs2_write_alloc_required(ip, offset: 0, len: size)) { |
115 | gfs2_consist_inode(ip); |
116 | return -EIO; |
117 | } |
118 | |
119 | return 0; |
120 | } |
121 | |
122 | /** |
123 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one |
124 | * @sdp: the filesystem |
125 | * |
126 | * Returns: errno |
127 | */ |
128 | |
129 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp) |
130 | { |
131 | struct gfs2_inode *ip = GFS2_I(inode: sdp->sd_jdesc->jd_inode); |
132 | struct gfs2_glock *j_gl = ip->i_gl; |
133 | struct gfs2_log_header_host head; |
134 | int error; |
135 | |
136 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
137 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
138 | return -EIO; |
139 | |
140 | error = gfs2_find_jhead(jd: sdp->sd_jdesc, head: &head, keep_cache: false); |
141 | if (error) { |
142 | gfs2_consist(sdp); |
143 | return error; |
144 | } |
145 | |
146 | if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { |
147 | gfs2_consist(sdp); |
148 | return -EIO; |
149 | } |
150 | |
151 | /* Initialize some head of the log stuff */ |
152 | sdp->sd_log_sequence = head.lh_sequence + 1; |
153 | gfs2_log_pointers_init(sdp, value: head.lh_blkno); |
154 | |
155 | error = gfs2_quota_init(sdp); |
156 | if (!error && gfs2_withdrawing_or_withdrawn(sdp)) |
157 | error = -EIO; |
158 | if (!error) |
159 | set_bit(nr: SDF_JOURNAL_LIVE, addr: &sdp->sd_flags); |
160 | return error; |
161 | } |
162 | |
163 | void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
164 | { |
165 | const struct gfs2_statfs_change *str = buf; |
166 | |
167 | sc->sc_total = be64_to_cpu(str->sc_total); |
168 | sc->sc_free = be64_to_cpu(str->sc_free); |
169 | sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); |
170 | } |
171 | |
172 | void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) |
173 | { |
174 | struct gfs2_statfs_change *str = buf; |
175 | |
176 | str->sc_total = cpu_to_be64(sc->sc_total); |
177 | str->sc_free = cpu_to_be64(sc->sc_free); |
178 | str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); |
179 | } |
180 | |
181 | int gfs2_statfs_init(struct gfs2_sbd *sdp) |
182 | { |
183 | struct gfs2_inode *m_ip = GFS2_I(inode: sdp->sd_statfs_inode); |
184 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
185 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
186 | struct buffer_head *m_bh; |
187 | struct gfs2_holder gh; |
188 | int error; |
189 | |
190 | error = gfs2_glock_nq_init(gl: m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, |
191 | gh: &gh); |
192 | if (error) |
193 | return error; |
194 | |
195 | error = gfs2_meta_inode_buffer(ip: m_ip, bhp: &m_bh); |
196 | if (error) |
197 | goto out; |
198 | |
199 | if (sdp->sd_args.ar_spectator) { |
200 | spin_lock(lock: &sdp->sd_statfs_spin); |
201 | gfs2_statfs_change_in(sc: m_sc, buf: m_bh->b_data + |
202 | sizeof(struct gfs2_dinode)); |
203 | spin_unlock(lock: &sdp->sd_statfs_spin); |
204 | } else { |
205 | spin_lock(lock: &sdp->sd_statfs_spin); |
206 | gfs2_statfs_change_in(sc: m_sc, buf: m_bh->b_data + |
207 | sizeof(struct gfs2_dinode)); |
208 | gfs2_statfs_change_in(sc: l_sc, buf: sdp->sd_sc_bh->b_data + |
209 | sizeof(struct gfs2_dinode)); |
210 | spin_unlock(lock: &sdp->sd_statfs_spin); |
211 | |
212 | } |
213 | |
214 | brelse(bh: m_bh); |
215 | out: |
216 | gfs2_glock_dq_uninit(gh: &gh); |
217 | return 0; |
218 | } |
219 | |
220 | void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, |
221 | s64 dinodes) |
222 | { |
223 | struct gfs2_inode *l_ip = GFS2_I(inode: sdp->sd_sc_inode); |
224 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
225 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
226 | s64 x, y; |
227 | int need_sync = 0; |
228 | |
229 | gfs2_trans_add_meta(gl: l_ip->i_gl, bh: sdp->sd_sc_bh); |
230 | |
231 | spin_lock(lock: &sdp->sd_statfs_spin); |
232 | l_sc->sc_total += total; |
233 | l_sc->sc_free += free; |
234 | l_sc->sc_dinodes += dinodes; |
235 | gfs2_statfs_change_out(sc: l_sc, buf: sdp->sd_sc_bh->b_data + |
236 | sizeof(struct gfs2_dinode)); |
237 | if (sdp->sd_args.ar_statfs_percent) { |
238 | x = 100 * l_sc->sc_free; |
239 | y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent; |
240 | if (x >= y || x <= -y) |
241 | need_sync = 1; |
242 | } |
243 | spin_unlock(lock: &sdp->sd_statfs_spin); |
244 | |
245 | if (need_sync) |
246 | gfs2_wake_up_statfs(sdp); |
247 | } |
248 | |
249 | void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh) |
250 | { |
251 | struct gfs2_inode *m_ip = GFS2_I(inode: sdp->sd_statfs_inode); |
252 | struct gfs2_inode *l_ip = GFS2_I(inode: sdp->sd_sc_inode); |
253 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
254 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
255 | |
256 | gfs2_trans_add_meta(gl: l_ip->i_gl, bh: sdp->sd_sc_bh); |
257 | gfs2_trans_add_meta(gl: m_ip->i_gl, bh: m_bh); |
258 | |
259 | spin_lock(lock: &sdp->sd_statfs_spin); |
260 | m_sc->sc_total += l_sc->sc_total; |
261 | m_sc->sc_free += l_sc->sc_free; |
262 | m_sc->sc_dinodes += l_sc->sc_dinodes; |
263 | memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); |
264 | memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode), |
265 | 0, sizeof(struct gfs2_statfs_change)); |
266 | gfs2_statfs_change_out(sc: m_sc, buf: m_bh->b_data + sizeof(struct gfs2_dinode)); |
267 | spin_unlock(lock: &sdp->sd_statfs_spin); |
268 | } |
269 | |
270 | int gfs2_statfs_sync(struct super_block *sb, int type) |
271 | { |
272 | struct gfs2_sbd *sdp = sb->s_fs_info; |
273 | struct gfs2_inode *m_ip = GFS2_I(inode: sdp->sd_statfs_inode); |
274 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
275 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
276 | struct gfs2_holder gh; |
277 | struct buffer_head *m_bh; |
278 | int error; |
279 | |
280 | error = gfs2_glock_nq_init(gl: m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, |
281 | gh: &gh); |
282 | if (error) |
283 | goto out; |
284 | |
285 | error = gfs2_meta_inode_buffer(ip: m_ip, bhp: &m_bh); |
286 | if (error) |
287 | goto out_unlock; |
288 | |
289 | spin_lock(lock: &sdp->sd_statfs_spin); |
290 | gfs2_statfs_change_in(sc: m_sc, buf: m_bh->b_data + |
291 | sizeof(struct gfs2_dinode)); |
292 | if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { |
293 | spin_unlock(lock: &sdp->sd_statfs_spin); |
294 | goto out_bh; |
295 | } |
296 | spin_unlock(lock: &sdp->sd_statfs_spin); |
297 | |
298 | error = gfs2_trans_begin(sdp, blocks: 2 * RES_DINODE, revokes: 0); |
299 | if (error) |
300 | goto out_bh; |
301 | |
302 | update_statfs(sdp, m_bh); |
303 | sdp->sd_statfs_force_sync = 0; |
304 | |
305 | gfs2_trans_end(sdp); |
306 | |
307 | out_bh: |
308 | brelse(bh: m_bh); |
309 | out_unlock: |
310 | gfs2_glock_dq_uninit(gh: &gh); |
311 | out: |
312 | return error; |
313 | } |
314 | |
315 | struct lfcc { |
316 | struct list_head list; |
317 | struct gfs2_holder gh; |
318 | }; |
319 | |
320 | /** |
321 | * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all |
322 | * journals are clean |
323 | * @sdp: the file system |
324 | * |
325 | * Returns: errno |
326 | */ |
327 | |
328 | static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) |
329 | { |
330 | struct gfs2_inode *ip; |
331 | struct gfs2_jdesc *jd; |
332 | struct lfcc *lfcc; |
333 | LIST_HEAD(list); |
334 | struct gfs2_log_header_host lh; |
335 | int error, error2; |
336 | |
337 | /* |
338 | * Grab all the journal glocks in SH mode. We are *probably* doing |
339 | * that to prevent recovery. |
340 | */ |
341 | |
342 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
343 | lfcc = kmalloc(size: sizeof(struct lfcc), GFP_KERNEL); |
344 | if (!lfcc) { |
345 | error = -ENOMEM; |
346 | goto out; |
347 | } |
348 | ip = GFS2_I(inode: jd->jd_inode); |
349 | error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, flags: 0, gh: &lfcc->gh); |
350 | if (error) { |
351 | kfree(objp: lfcc); |
352 | goto out; |
353 | } |
354 | list_add(new: &lfcc->list, head: &list); |
355 | } |
356 | |
357 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
358 | |
359 | error = gfs2_glock_nq_init(gl: sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, |
360 | LM_FLAG_NOEXP | GL_NOPID, |
361 | gh: &sdp->sd_freeze_gh); |
362 | if (error) |
363 | goto relock_shared; |
364 | |
365 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
366 | error = gfs2_jdesc_check(jd); |
367 | if (error) |
368 | break; |
369 | error = gfs2_find_jhead(jd, head: &lh, keep_cache: false); |
370 | if (error) |
371 | break; |
372 | if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { |
373 | error = -EBUSY; |
374 | break; |
375 | } |
376 | } |
377 | |
378 | if (!error) |
379 | goto out; /* success */ |
380 | |
381 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
382 | |
383 | relock_shared: |
384 | error2 = gfs2_freeze_lock_shared(sdp); |
385 | gfs2_assert_withdraw(sdp, !error2); |
386 | |
387 | out: |
388 | while (!list_empty(head: &list)) { |
389 | lfcc = list_first_entry(&list, struct lfcc, list); |
390 | list_del(entry: &lfcc->list); |
391 | gfs2_glock_dq_uninit(gh: &lfcc->gh); |
392 | kfree(objp: lfcc); |
393 | } |
394 | return error; |
395 | } |
396 | |
397 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) |
398 | { |
399 | const struct inode *inode = &ip->i_inode; |
400 | struct gfs2_dinode *str = buf; |
401 | |
402 | str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); |
403 | str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); |
404 | str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); |
405 | str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); |
406 | str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); |
407 | str->di_mode = cpu_to_be32(inode->i_mode); |
408 | str->di_uid = cpu_to_be32(i_uid_read(inode)); |
409 | str->di_gid = cpu_to_be32(i_gid_read(inode)); |
410 | str->di_nlink = cpu_to_be32(inode->i_nlink); |
411 | str->di_size = cpu_to_be64(i_size_read(inode)); |
412 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); |
413 | str->di_atime = cpu_to_be64(inode_get_atime_sec(inode)); |
414 | str->di_mtime = cpu_to_be64(inode_get_mtime_sec(inode)); |
415 | str->di_ctime = cpu_to_be64(inode_get_ctime_sec(inode)); |
416 | |
417 | str->di_goal_meta = cpu_to_be64(ip->i_goal); |
418 | str->di_goal_data = cpu_to_be64(ip->i_goal); |
419 | str->di_generation = cpu_to_be64(ip->i_generation); |
420 | |
421 | str->di_flags = cpu_to_be32(ip->i_diskflags); |
422 | str->di_height = cpu_to_be16(ip->i_height); |
423 | str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && |
424 | !(ip->i_diskflags & GFS2_DIF_EXHASH) ? |
425 | GFS2_FORMAT_DE : 0); |
426 | str->di_depth = cpu_to_be16(ip->i_depth); |
427 | str->di_entries = cpu_to_be32(ip->i_entries); |
428 | |
429 | str->di_eattr = cpu_to_be64(ip->i_eattr); |
430 | str->di_atime_nsec = cpu_to_be32(inode_get_atime_nsec(inode)); |
431 | str->di_mtime_nsec = cpu_to_be32(inode_get_mtime_nsec(inode)); |
432 | str->di_ctime_nsec = cpu_to_be32(inode_get_ctime_nsec(inode)); |
433 | } |
434 | |
435 | /** |
436 | * gfs2_write_inode - Make sure the inode is stable on the disk |
437 | * @inode: The inode |
438 | * @wbc: The writeback control structure |
439 | * |
440 | * Returns: errno |
441 | */ |
442 | |
443 | static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) |
444 | { |
445 | struct gfs2_inode *ip = GFS2_I(inode); |
446 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
447 | struct address_space *metamapping = gfs2_glock2aspace(gl: ip->i_gl); |
448 | struct backing_dev_info *bdi = inode_to_bdi(inode: metamapping->host); |
449 | int ret = 0; |
450 | bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip)); |
451 | |
452 | if (flush_all) |
453 | gfs2_log_flush(sdp: GFS2_SB(inode), gl: ip->i_gl, |
454 | GFS2_LOG_HEAD_FLUSH_NORMAL | |
455 | GFS2_LFC_WRITE_INODE); |
456 | if (bdi->wb.dirty_exceeded) |
457 | gfs2_ail1_flush(sdp, wbc); |
458 | else |
459 | filemap_fdatawrite(metamapping); |
460 | if (flush_all) |
461 | ret = filemap_fdatawait(mapping: metamapping); |
462 | if (ret) |
463 | mark_inode_dirty_sync(inode); |
464 | else { |
465 | spin_lock(lock: &inode->i_lock); |
466 | if (!(inode->i_flags & I_DIRTY)) |
467 | gfs2_ordered_del_inode(ip); |
468 | spin_unlock(lock: &inode->i_lock); |
469 | } |
470 | return ret; |
471 | } |
472 | |
473 | /** |
474 | * gfs2_dirty_inode - check for atime updates |
475 | * @inode: The inode in question |
476 | * @flags: The type of dirty |
477 | * |
478 | * Unfortunately it can be called under any combination of inode |
479 | * glock and freeze glock, so we have to check carefully. |
480 | * |
481 | * At the moment this deals only with atime - it should be possible |
482 | * to expand that role in future, once a review of the locking has |
483 | * been carried out. |
484 | */ |
485 | |
486 | static void gfs2_dirty_inode(struct inode *inode, int flags) |
487 | { |
488 | struct gfs2_inode *ip = GFS2_I(inode); |
489 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
490 | struct buffer_head *bh; |
491 | struct gfs2_holder gh; |
492 | int need_unlock = 0; |
493 | int need_endtrans = 0; |
494 | int ret; |
495 | |
496 | if (unlikely(!ip->i_gl)) { |
497 | /* This can only happen during incomplete inode creation. */ |
498 | BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); |
499 | return; |
500 | } |
501 | |
502 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
503 | return; |
504 | if (!gfs2_glock_is_locked_by_me(gl: ip->i_gl)) { |
505 | ret = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, flags: 0, gh: &gh); |
506 | if (ret) { |
507 | fs_err(sdp, "dirty_inode: glock %d\n" , ret); |
508 | gfs2_dump_glock(NULL, gl: ip->i_gl, fsid: true); |
509 | return; |
510 | } |
511 | need_unlock = 1; |
512 | } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) |
513 | return; |
514 | |
515 | if (current->journal_info == NULL) { |
516 | ret = gfs2_trans_begin(sdp, RES_DINODE, revokes: 0); |
517 | if (ret) { |
518 | fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n" , ret); |
519 | goto out; |
520 | } |
521 | need_endtrans = 1; |
522 | } |
523 | |
524 | ret = gfs2_meta_inode_buffer(ip, bhp: &bh); |
525 | if (ret == 0) { |
526 | gfs2_trans_add_meta(gl: ip->i_gl, bh); |
527 | gfs2_dinode_out(ip, buf: bh->b_data); |
528 | brelse(bh); |
529 | } |
530 | |
531 | if (need_endtrans) |
532 | gfs2_trans_end(sdp); |
533 | out: |
534 | if (need_unlock) |
535 | gfs2_glock_dq_uninit(gh: &gh); |
536 | } |
537 | |
538 | /** |
539 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one |
540 | * @sdp: the filesystem |
541 | * |
542 | * Returns: errno |
543 | */ |
544 | |
545 | void gfs2_make_fs_ro(struct gfs2_sbd *sdp) |
546 | { |
547 | int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); |
548 | |
549 | if (!test_bit(SDF_KILL, &sdp->sd_flags)) |
550 | gfs2_flush_delete_work(sdp); |
551 | |
552 | gfs2_destroy_threads(sdp); |
553 | |
554 | if (log_write_allowed) { |
555 | gfs2_quota_sync(sb: sdp->sd_vfs, type: 0); |
556 | gfs2_statfs_sync(sb: sdp->sd_vfs, type: 0); |
557 | |
558 | /* We do two log flushes here. The first one commits dirty inodes |
559 | * and rgrps to the journal, but queues up revokes to the ail list. |
560 | * The second flush writes out and removes the revokes. |
561 | * |
562 | * The first must be done before the FLUSH_SHUTDOWN code |
563 | * clears the LIVE flag, otherwise it will not be able to start |
564 | * a transaction to write its revokes, and the error will cause |
565 | * a withdraw of the file system. */ |
566 | gfs2_log_flush(sdp, NULL, GFS2_LFC_MAKE_FS_RO); |
567 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | |
568 | GFS2_LFC_MAKE_FS_RO); |
569 | wait_event_timeout(sdp->sd_log_waitq, |
570 | gfs2_log_is_empty(sdp), |
571 | HZ * 5); |
572 | gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp)); |
573 | } |
574 | gfs2_quota_cleanup(sdp); |
575 | } |
576 | |
577 | /** |
578 | * gfs2_put_super - Unmount the filesystem |
579 | * @sb: The VFS superblock |
580 | * |
581 | */ |
582 | |
583 | static void gfs2_put_super(struct super_block *sb) |
584 | { |
585 | struct gfs2_sbd *sdp = sb->s_fs_info; |
586 | struct gfs2_jdesc *jd; |
587 | |
588 | /* No more recovery requests */ |
589 | set_bit(nr: SDF_NORECOVERY, addr: &sdp->sd_flags); |
590 | smp_mb(); |
591 | |
592 | /* Wait on outstanding recovery */ |
593 | restart: |
594 | spin_lock(lock: &sdp->sd_jindex_spin); |
595 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
596 | if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) |
597 | continue; |
598 | spin_unlock(lock: &sdp->sd_jindex_spin); |
599 | wait_on_bit(word: &jd->jd_flags, JDF_RECOVERY, |
600 | TASK_UNINTERRUPTIBLE); |
601 | goto restart; |
602 | } |
603 | spin_unlock(lock: &sdp->sd_jindex_spin); |
604 | |
605 | if (!sb_rdonly(sb)) |
606 | gfs2_make_fs_ro(sdp); |
607 | else { |
608 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
609 | gfs2_destroy_threads(sdp); |
610 | |
611 | gfs2_quota_cleanup(sdp); |
612 | } |
613 | |
614 | WARN_ON(gfs2_withdrawing(sdp)); |
615 | |
616 | /* At this point, we're through modifying the disk */ |
617 | |
618 | /* Release stuff */ |
619 | |
620 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
621 | |
622 | iput(sdp->sd_jindex); |
623 | iput(sdp->sd_statfs_inode); |
624 | iput(sdp->sd_rindex); |
625 | iput(sdp->sd_quota_inode); |
626 | |
627 | gfs2_glock_put(gl: sdp->sd_rename_gl); |
628 | gfs2_glock_put(gl: sdp->sd_freeze_gl); |
629 | |
630 | if (!sdp->sd_args.ar_spectator) { |
631 | if (gfs2_holder_initialized(gh: &sdp->sd_journal_gh)) |
632 | gfs2_glock_dq_uninit(gh: &sdp->sd_journal_gh); |
633 | if (gfs2_holder_initialized(gh: &sdp->sd_jinode_gh)) |
634 | gfs2_glock_dq_uninit(gh: &sdp->sd_jinode_gh); |
635 | brelse(bh: sdp->sd_sc_bh); |
636 | gfs2_glock_dq_uninit(gh: &sdp->sd_sc_gh); |
637 | gfs2_glock_dq_uninit(gh: &sdp->sd_qc_gh); |
638 | free_local_statfs_inodes(sdp); |
639 | iput(sdp->sd_qc_inode); |
640 | } |
641 | |
642 | gfs2_glock_dq_uninit(gh: &sdp->sd_live_gh); |
643 | gfs2_clear_rgrpd(sdp); |
644 | gfs2_jindex_free(sdp); |
645 | /* Take apart glock structures and buffer lists */ |
646 | gfs2_gl_hash_clear(sdp); |
647 | truncate_inode_pages_final(&sdp->sd_aspace); |
648 | gfs2_delete_debugfs_file(sdp); |
649 | /* Unmount the locking protocol */ |
650 | gfs2_lm_unmount(sdp); |
651 | |
652 | /* At this point, we're through participating in the lockspace */ |
653 | gfs2_sys_fs_del(sdp); |
654 | free_sbd(sdp); |
655 | } |
656 | |
657 | /** |
658 | * gfs2_sync_fs - sync the filesystem |
659 | * @sb: the superblock |
660 | * @wait: true to wait for completion |
661 | * |
662 | * Flushes the log to disk. |
663 | */ |
664 | |
665 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
666 | { |
667 | struct gfs2_sbd *sdp = sb->s_fs_info; |
668 | |
669 | gfs2_quota_sync(sb, type: -1); |
670 | if (wait) |
671 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | |
672 | GFS2_LFC_SYNC_FS); |
673 | return sdp->sd_log_error; |
674 | } |
675 | |
676 | static int gfs2_do_thaw(struct gfs2_sbd *sdp) |
677 | { |
678 | struct super_block *sb = sdp->sd_vfs; |
679 | int error; |
680 | |
681 | error = gfs2_freeze_lock_shared(sdp); |
682 | if (error) |
683 | goto fail; |
684 | error = thaw_super(super: sb, who: FREEZE_HOLDER_USERSPACE); |
685 | if (!error) |
686 | return 0; |
687 | |
688 | fail: |
689 | fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n" , error); |
690 | gfs2_assert_withdraw(sdp, 0); |
691 | return error; |
692 | } |
693 | |
694 | void gfs2_freeze_func(struct work_struct *work) |
695 | { |
696 | struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); |
697 | struct super_block *sb = sdp->sd_vfs; |
698 | int error; |
699 | |
700 | mutex_lock(&sdp->sd_freeze_mutex); |
701 | error = -EBUSY; |
702 | if (test_bit(SDF_FROZEN, &sdp->sd_flags)) |
703 | goto freeze_failed; |
704 | |
705 | error = freeze_super(super: sb, who: FREEZE_HOLDER_USERSPACE); |
706 | if (error) |
707 | goto freeze_failed; |
708 | |
709 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
710 | set_bit(nr: SDF_FROZEN, addr: &sdp->sd_flags); |
711 | |
712 | error = gfs2_do_thaw(sdp); |
713 | if (error) |
714 | goto out; |
715 | |
716 | clear_bit(nr: SDF_FROZEN, addr: &sdp->sd_flags); |
717 | goto out; |
718 | |
719 | freeze_failed: |
720 | fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n" , error); |
721 | |
722 | out: |
723 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
724 | deactivate_super(sb); |
725 | } |
726 | |
727 | /** |
728 | * gfs2_freeze_super - prevent further writes to the filesystem |
729 | * @sb: the VFS structure for the filesystem |
730 | * |
731 | */ |
732 | |
733 | static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) |
734 | { |
735 | struct gfs2_sbd *sdp = sb->s_fs_info; |
736 | int error; |
737 | |
738 | if (!mutex_trylock(lock: &sdp->sd_freeze_mutex)) |
739 | return -EBUSY; |
740 | if (test_bit(SDF_FROZEN, &sdp->sd_flags)) { |
741 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
742 | return -EBUSY; |
743 | } |
744 | |
745 | for (;;) { |
746 | error = freeze_super(super: sb, who: FREEZE_HOLDER_USERSPACE); |
747 | if (error) { |
748 | fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n" , |
749 | error); |
750 | goto out; |
751 | } |
752 | |
753 | error = gfs2_lock_fs_check_clean(sdp); |
754 | if (!error) { |
755 | set_bit(nr: SDF_FREEZE_INITIATOR, addr: &sdp->sd_flags); |
756 | set_bit(nr: SDF_FROZEN, addr: &sdp->sd_flags); |
757 | break; |
758 | } |
759 | |
760 | error = gfs2_do_thaw(sdp); |
761 | if (error) |
762 | goto out; |
763 | |
764 | if (error == -EBUSY) |
765 | fs_err(sdp, "waiting for recovery before freeze\n" ); |
766 | else if (error == -EIO) { |
767 | fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due " |
768 | "to recovery error.\n" ); |
769 | goto out; |
770 | } else { |
771 | fs_err(sdp, "error freezing FS: %d\n" , error); |
772 | } |
773 | fs_err(sdp, "retrying...\n" ); |
774 | msleep(msecs: 1000); |
775 | } |
776 | |
777 | out: |
778 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
779 | return error; |
780 | } |
781 | |
782 | static int gfs2_freeze_fs(struct super_block *sb) |
783 | { |
784 | struct gfs2_sbd *sdp = sb->s_fs_info; |
785 | |
786 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
787 | gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | |
788 | GFS2_LFC_FREEZE_GO_SYNC); |
789 | if (gfs2_withdrawing_or_withdrawn(sdp)) |
790 | return -EIO; |
791 | } |
792 | return 0; |
793 | } |
794 | |
795 | /** |
796 | * gfs2_thaw_super - reallow writes to the filesystem |
797 | * @sb: the VFS structure for the filesystem |
798 | * |
799 | */ |
800 | |
801 | static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) |
802 | { |
803 | struct gfs2_sbd *sdp = sb->s_fs_info; |
804 | int error; |
805 | |
806 | if (!mutex_trylock(lock: &sdp->sd_freeze_mutex)) |
807 | return -EBUSY; |
808 | if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) { |
809 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
810 | return -EINVAL; |
811 | } |
812 | |
813 | atomic_inc(v: &sb->s_active); |
814 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
815 | |
816 | error = gfs2_do_thaw(sdp); |
817 | |
818 | if (!error) { |
819 | clear_bit(nr: SDF_FREEZE_INITIATOR, addr: &sdp->sd_flags); |
820 | clear_bit(nr: SDF_FROZEN, addr: &sdp->sd_flags); |
821 | } |
822 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
823 | deactivate_super(sb); |
824 | return error; |
825 | } |
826 | |
827 | void gfs2_thaw_freeze_initiator(struct super_block *sb) |
828 | { |
829 | struct gfs2_sbd *sdp = sb->s_fs_info; |
830 | |
831 | mutex_lock(&sdp->sd_freeze_mutex); |
832 | if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) |
833 | goto out; |
834 | |
835 | gfs2_freeze_unlock(freeze_gh: &sdp->sd_freeze_gh); |
836 | |
837 | out: |
838 | mutex_unlock(lock: &sdp->sd_freeze_mutex); |
839 | } |
840 | |
841 | /** |
842 | * statfs_slow_fill - fill in the sg for a given RG |
843 | * @rgd: the RG |
844 | * @sc: the sc structure |
845 | * |
846 | * Returns: 0 on success, -ESTALE if the LVB is invalid |
847 | */ |
848 | |
849 | static int statfs_slow_fill(struct gfs2_rgrpd *rgd, |
850 | struct gfs2_statfs_change_host *sc) |
851 | { |
852 | gfs2_rgrp_verify(rgd); |
853 | sc->sc_total += rgd->rd_data; |
854 | sc->sc_free += rgd->rd_free; |
855 | sc->sc_dinodes += rgd->rd_dinodes; |
856 | return 0; |
857 | } |
858 | |
859 | /** |
860 | * gfs2_statfs_slow - Stat a filesystem using asynchronous locking |
861 | * @sdp: the filesystem |
862 | * @sc: the sc info that will be returned |
863 | * |
864 | * Any error (other than a signal) will cause this routine to fall back |
865 | * to the synchronous version. |
866 | * |
867 | * FIXME: This really shouldn't busy wait like this. |
868 | * |
869 | * Returns: errno |
870 | */ |
871 | |
872 | static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) |
873 | { |
874 | struct gfs2_rgrpd *rgd_next; |
875 | struct gfs2_holder *gha, *gh; |
876 | unsigned int slots = 64; |
877 | unsigned int x; |
878 | int done; |
879 | int error = 0, err; |
880 | |
881 | memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); |
882 | gha = kmalloc_array(n: slots, size: sizeof(struct gfs2_holder), GFP_KERNEL); |
883 | if (!gha) |
884 | return -ENOMEM; |
885 | for (x = 0; x < slots; x++) |
886 | gfs2_holder_mark_uninitialized(gh: gha + x); |
887 | |
888 | rgd_next = gfs2_rgrpd_get_first(sdp); |
889 | |
890 | for (;;) { |
891 | done = 1; |
892 | |
893 | for (x = 0; x < slots; x++) { |
894 | gh = gha + x; |
895 | |
896 | if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) { |
897 | err = gfs2_glock_wait(gh); |
898 | if (err) { |
899 | gfs2_holder_uninit(gh); |
900 | error = err; |
901 | } else { |
902 | if (!error) { |
903 | struct gfs2_rgrpd *rgd = |
904 | gfs2_glock2rgrp(gl: gh->gh_gl); |
905 | |
906 | error = statfs_slow_fill(rgd, sc); |
907 | } |
908 | gfs2_glock_dq_uninit(gh); |
909 | } |
910 | } |
911 | |
912 | if (gfs2_holder_initialized(gh)) |
913 | done = 0; |
914 | else if (rgd_next && !error) { |
915 | error = gfs2_glock_nq_init(gl: rgd_next->rd_gl, |
916 | LM_ST_SHARED, |
917 | GL_ASYNC, |
918 | gh); |
919 | rgd_next = gfs2_rgrpd_get_next(rgd: rgd_next); |
920 | done = 0; |
921 | } |
922 | |
923 | if (signal_pending(current)) |
924 | error = -ERESTARTSYS; |
925 | } |
926 | |
927 | if (done) |
928 | break; |
929 | |
930 | yield(); |
931 | } |
932 | |
933 | kfree(objp: gha); |
934 | return error; |
935 | } |
936 | |
937 | /** |
938 | * gfs2_statfs_i - Do a statfs |
939 | * @sdp: the filesystem |
940 | * @sc: the sc structure |
941 | * |
942 | * Returns: errno |
943 | */ |
944 | |
945 | static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) |
946 | { |
947 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; |
948 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; |
949 | |
950 | spin_lock(lock: &sdp->sd_statfs_spin); |
951 | |
952 | *sc = *m_sc; |
953 | sc->sc_total += l_sc->sc_total; |
954 | sc->sc_free += l_sc->sc_free; |
955 | sc->sc_dinodes += l_sc->sc_dinodes; |
956 | |
957 | spin_unlock(lock: &sdp->sd_statfs_spin); |
958 | |
959 | if (sc->sc_free < 0) |
960 | sc->sc_free = 0; |
961 | if (sc->sc_free > sc->sc_total) |
962 | sc->sc_free = sc->sc_total; |
963 | if (sc->sc_dinodes < 0) |
964 | sc->sc_dinodes = 0; |
965 | |
966 | return 0; |
967 | } |
968 | |
969 | /** |
970 | * gfs2_statfs - Gather and return stats about the filesystem |
971 | * @dentry: The name of the link |
972 | * @buf: The buffer |
973 | * |
974 | * Returns: 0 on success or error code |
975 | */ |
976 | |
977 | static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) |
978 | { |
979 | struct super_block *sb = dentry->d_sb; |
980 | struct gfs2_sbd *sdp = sb->s_fs_info; |
981 | struct gfs2_statfs_change_host sc; |
982 | int error; |
983 | |
984 | error = gfs2_rindex_update(sdp); |
985 | if (error) |
986 | return error; |
987 | |
988 | if (gfs2_tune_get(sdp, gt_statfs_slow)) |
989 | error = gfs2_statfs_slow(sdp, sc: &sc); |
990 | else |
991 | error = gfs2_statfs_i(sdp, sc: &sc); |
992 | |
993 | if (error) |
994 | return error; |
995 | |
996 | buf->f_type = GFS2_MAGIC; |
997 | buf->f_bsize = sdp->sd_sb.sb_bsize; |
998 | buf->f_blocks = sc.sc_total; |
999 | buf->f_bfree = sc.sc_free; |
1000 | buf->f_bavail = sc.sc_free; |
1001 | buf->f_files = sc.sc_dinodes + sc.sc_free; |
1002 | buf->f_ffree = sc.sc_free; |
1003 | buf->f_namelen = GFS2_FNAMESIZE; |
1004 | buf->f_fsid = uuid_to_fsid(uuid: sb->s_uuid.b); |
1005 | |
1006 | return 0; |
1007 | } |
1008 | |
1009 | /** |
1010 | * gfs2_drop_inode - Drop an inode (test for remote unlink) |
1011 | * @inode: The inode to drop |
1012 | * |
1013 | * If we've received a callback on an iopen lock then it's because a |
1014 | * remote node tried to deallocate the inode but failed due to this node |
1015 | * still having the inode open. Here we mark the link count zero |
1016 | * since we know that it must have reached zero if the GLF_DEMOTE flag |
1017 | * is set on the iopen glock. If we didn't do a disk read since the |
1018 | * remote node removed the final link then we might otherwise miss |
1019 | * this event. This check ensures that this node will deallocate the |
1020 | * inode's blocks, or alternatively pass the baton on to another |
1021 | * node for later deallocation. |
1022 | */ |
1023 | |
1024 | static int gfs2_drop_inode(struct inode *inode) |
1025 | { |
1026 | struct gfs2_inode *ip = GFS2_I(inode); |
1027 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1028 | |
1029 | if (inode->i_nlink && |
1030 | gfs2_holder_initialized(gh: &ip->i_iopen_gh)) { |
1031 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; |
1032 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1033 | clear_nlink(inode); |
1034 | } |
1035 | |
1036 | /* |
1037 | * When under memory pressure when an inode's link count has dropped to |
1038 | * zero, defer deleting the inode to the delete workqueue. This avoids |
1039 | * calling into DLM under memory pressure, which can deadlock. |
1040 | */ |
1041 | if (!inode->i_nlink && |
1042 | unlikely(current->flags & PF_MEMALLOC) && |
1043 | gfs2_holder_initialized(gh: &ip->i_iopen_gh)) { |
1044 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; |
1045 | |
1046 | gfs2_glock_hold(gl); |
1047 | if (!gfs2_queue_try_to_evict(gl)) |
1048 | gfs2_glock_queue_put(gl); |
1049 | return 0; |
1050 | } |
1051 | |
1052 | /* |
1053 | * No longer cache inodes when trying to evict them all. |
1054 | */ |
1055 | if (test_bit(SDF_EVICTING, &sdp->sd_flags)) |
1056 | return 1; |
1057 | |
1058 | return generic_drop_inode(inode); |
1059 | } |
1060 | |
1061 | /** |
1062 | * gfs2_show_options - Show mount options for /proc/mounts |
1063 | * @s: seq_file structure |
1064 | * @root: root of this (sub)tree |
1065 | * |
1066 | * Returns: 0 on success or error code |
1067 | */ |
1068 | |
1069 | static int gfs2_show_options(struct seq_file *s, struct dentry *root) |
1070 | { |
1071 | struct gfs2_sbd *sdp = root->d_sb->s_fs_info; |
1072 | struct gfs2_args *args = &sdp->sd_args; |
1073 | unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum; |
1074 | |
1075 | spin_lock(lock: &sdp->sd_tune.gt_spin); |
1076 | logd_secs = sdp->sd_tune.gt_logd_secs; |
1077 | quota_quantum = sdp->sd_tune.gt_quota_quantum; |
1078 | statfs_quantum = sdp->sd_tune.gt_statfs_quantum; |
1079 | statfs_slow = sdp->sd_tune.gt_statfs_slow; |
1080 | spin_unlock(lock: &sdp->sd_tune.gt_spin); |
1081 | |
1082 | if (is_subdir(root, sdp->sd_master_dir)) |
1083 | seq_puts(m: s, s: ",meta" ); |
1084 | if (args->ar_lockproto[0]) |
1085 | seq_show_option(m: s, name: "lockproto" , value: args->ar_lockproto); |
1086 | if (args->ar_locktable[0]) |
1087 | seq_show_option(m: s, name: "locktable" , value: args->ar_locktable); |
1088 | if (args->ar_hostdata[0]) |
1089 | seq_show_option(m: s, name: "hostdata" , value: args->ar_hostdata); |
1090 | if (args->ar_spectator) |
1091 | seq_puts(m: s, s: ",spectator" ); |
1092 | if (args->ar_localflocks) |
1093 | seq_puts(m: s, s: ",localflocks" ); |
1094 | if (args->ar_debug) |
1095 | seq_puts(m: s, s: ",debug" ); |
1096 | if (args->ar_posix_acl) |
1097 | seq_puts(m: s, s: ",acl" ); |
1098 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
1099 | char *state; |
1100 | switch (args->ar_quota) { |
1101 | case GFS2_QUOTA_OFF: |
1102 | state = "off" ; |
1103 | break; |
1104 | case GFS2_QUOTA_ACCOUNT: |
1105 | state = "account" ; |
1106 | break; |
1107 | case GFS2_QUOTA_ON: |
1108 | state = "on" ; |
1109 | break; |
1110 | case GFS2_QUOTA_QUIET: |
1111 | state = "quiet" ; |
1112 | break; |
1113 | default: |
1114 | state = "unknown" ; |
1115 | break; |
1116 | } |
1117 | seq_printf(m: s, fmt: ",quota=%s" , state); |
1118 | } |
1119 | if (args->ar_suiddir) |
1120 | seq_puts(m: s, s: ",suiddir" ); |
1121 | if (args->ar_data != GFS2_DATA_DEFAULT) { |
1122 | char *state; |
1123 | switch (args->ar_data) { |
1124 | case GFS2_DATA_WRITEBACK: |
1125 | state = "writeback" ; |
1126 | break; |
1127 | case GFS2_DATA_ORDERED: |
1128 | state = "ordered" ; |
1129 | break; |
1130 | default: |
1131 | state = "unknown" ; |
1132 | break; |
1133 | } |
1134 | seq_printf(m: s, fmt: ",data=%s" , state); |
1135 | } |
1136 | if (args->ar_discard) |
1137 | seq_puts(m: s, s: ",discard" ); |
1138 | if (logd_secs != 30) |
1139 | seq_printf(m: s, fmt: ",commit=%d" , logd_secs); |
1140 | if (statfs_quantum != 30) |
1141 | seq_printf(m: s, fmt: ",statfs_quantum=%d" , statfs_quantum); |
1142 | else if (statfs_slow) |
1143 | seq_puts(m: s, s: ",statfs_quantum=0" ); |
1144 | if (quota_quantum != 60) |
1145 | seq_printf(m: s, fmt: ",quota_quantum=%d" , quota_quantum); |
1146 | if (args->ar_statfs_percent) |
1147 | seq_printf(m: s, fmt: ",statfs_percent=%d" , args->ar_statfs_percent); |
1148 | if (args->ar_errors != GFS2_ERRORS_DEFAULT) { |
1149 | const char *state; |
1150 | |
1151 | switch (args->ar_errors) { |
1152 | case GFS2_ERRORS_WITHDRAW: |
1153 | state = "withdraw" ; |
1154 | break; |
1155 | case GFS2_ERRORS_PANIC: |
1156 | state = "panic" ; |
1157 | break; |
1158 | default: |
1159 | state = "unknown" ; |
1160 | break; |
1161 | } |
1162 | seq_printf(m: s, fmt: ",errors=%s" , state); |
1163 | } |
1164 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
1165 | seq_puts(m: s, s: ",nobarrier" ); |
1166 | if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) |
1167 | seq_puts(m: s, s: ",demote_interface_used" ); |
1168 | if (args->ar_rgrplvb) |
1169 | seq_puts(m: s, s: ",rgrplvb" ); |
1170 | if (args->ar_loccookie) |
1171 | seq_puts(m: s, s: ",loccookie" ); |
1172 | return 0; |
1173 | } |
1174 | |
1175 | static void gfs2_final_release_pages(struct gfs2_inode *ip) |
1176 | { |
1177 | struct inode *inode = &ip->i_inode; |
1178 | struct gfs2_glock *gl = ip->i_gl; |
1179 | |
1180 | if (unlikely(!gl)) { |
1181 | /* This can only happen during incomplete inode creation. */ |
1182 | BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); |
1183 | return; |
1184 | } |
1185 | |
1186 | truncate_inode_pages(gfs2_glock2aspace(gl), 0); |
1187 | truncate_inode_pages(&inode->i_data, 0); |
1188 | |
1189 | if (atomic_read(v: &gl->gl_revokes) == 0) { |
1190 | clear_bit(nr: GLF_LFLUSH, addr: &gl->gl_flags); |
1191 | clear_bit(nr: GLF_DIRTY, addr: &gl->gl_flags); |
1192 | } |
1193 | } |
1194 | |
1195 | static int gfs2_dinode_dealloc(struct gfs2_inode *ip) |
1196 | { |
1197 | struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
1198 | struct gfs2_rgrpd *rgd; |
1199 | struct gfs2_holder gh; |
1200 | int error; |
1201 | |
1202 | if (gfs2_get_inode_blocks(inode: &ip->i_inode) != 1) { |
1203 | gfs2_consist_inode(ip); |
1204 | return -EIO; |
1205 | } |
1206 | |
1207 | gfs2_rindex_update(sdp); |
1208 | |
1209 | error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); |
1210 | if (error) |
1211 | return error; |
1212 | |
1213 | rgd = gfs2_blk2rgrpd(sdp, blk: ip->i_no_addr, exact: 1); |
1214 | if (!rgd) { |
1215 | gfs2_consist_inode(ip); |
1216 | error = -EIO; |
1217 | goto out_qs; |
1218 | } |
1219 | |
1220 | error = gfs2_glock_nq_init(gl: rgd->rd_gl, LM_ST_EXCLUSIVE, |
1221 | LM_FLAG_NODE_SCOPE, gh: &gh); |
1222 | if (error) |
1223 | goto out_qs; |
1224 | |
1225 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, |
1226 | revokes: sdp->sd_jdesc->jd_blocks); |
1227 | if (error) |
1228 | goto out_rg_gunlock; |
1229 | |
1230 | gfs2_free_di(rgd, ip); |
1231 | |
1232 | gfs2_final_release_pages(ip); |
1233 | |
1234 | gfs2_trans_end(sdp); |
1235 | |
1236 | out_rg_gunlock: |
1237 | gfs2_glock_dq_uninit(gh: &gh); |
1238 | out_qs: |
1239 | gfs2_quota_unhold(ip); |
1240 | return error; |
1241 | } |
1242 | |
1243 | /** |
1244 | * gfs2_glock_put_eventually |
1245 | * @gl: The glock to put |
1246 | * |
1247 | * When under memory pressure, trigger a deferred glock put to make sure we |
1248 | * won't call into DLM and deadlock. Otherwise, put the glock directly. |
1249 | */ |
1250 | |
1251 | static void gfs2_glock_put_eventually(struct gfs2_glock *gl) |
1252 | { |
1253 | if (current->flags & PF_MEMALLOC) |
1254 | gfs2_glock_queue_put(gl); |
1255 | else |
1256 | gfs2_glock_put(gl); |
1257 | } |
1258 | |
1259 | static bool gfs2_upgrade_iopen_glock(struct inode *inode) |
1260 | { |
1261 | struct gfs2_inode *ip = GFS2_I(inode); |
1262 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1263 | struct gfs2_holder *gh = &ip->i_iopen_gh; |
1264 | long timeout = 5 * HZ; |
1265 | int error; |
1266 | |
1267 | gh->gh_flags |= GL_NOCACHE; |
1268 | gfs2_glock_dq_wait(gh); |
1269 | |
1270 | /* |
1271 | * If there are no other lock holders, we will immediately get |
1272 | * exclusive access to the iopen glock here. |
1273 | * |
1274 | * Otherwise, the other nodes holding the lock will be notified about |
1275 | * our locking request. If they do not have the inode open, they are |
1276 | * expected to evict the cached inode and release the lock, allowing us |
1277 | * to proceed. |
1278 | * |
1279 | * Otherwise, if they cannot evict the inode, they are expected to poke |
1280 | * the inode glock (note: not the iopen glock). We will notice that |
1281 | * and stop waiting for the iopen glock immediately. The other node(s) |
1282 | * are then expected to take care of deleting the inode when they no |
1283 | * longer use it. |
1284 | * |
1285 | * As a last resort, if another node keeps holding the iopen glock |
1286 | * without showing any activity on the inode glock, we will eventually |
1287 | * time out and fail the iopen glock upgrade. |
1288 | */ |
1289 | |
1290 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); |
1291 | error = gfs2_glock_nq(gh); |
1292 | if (error) |
1293 | return false; |
1294 | |
1295 | timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait, |
1296 | !test_bit(HIF_WAIT, &gh->gh_iflags) || |
1297 | test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), |
1298 | timeout); |
1299 | if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { |
1300 | gfs2_glock_dq(gh); |
1301 | return false; |
1302 | } |
1303 | return gfs2_glock_holder_ready(gh) == 0; |
1304 | } |
1305 | |
1306 | /** |
1307 | * evict_should_delete - determine whether the inode is eligible for deletion |
1308 | * @inode: The inode to evict |
1309 | * @gh: The glock holder structure |
1310 | * |
1311 | * This function determines whether the evicted inode is eligible to be deleted |
1312 | * and locks the inode glock. |
1313 | * |
1314 | * Returns: the fate of the dinode |
1315 | */ |
1316 | static enum dinode_demise evict_should_delete(struct inode *inode, |
1317 | struct gfs2_holder *gh) |
1318 | { |
1319 | struct gfs2_inode *ip = GFS2_I(inode); |
1320 | struct super_block *sb = inode->i_sb; |
1321 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1322 | int ret; |
1323 | |
1324 | if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) |
1325 | goto should_delete; |
1326 | |
1327 | if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) |
1328 | return SHOULD_DEFER_EVICTION; |
1329 | |
1330 | /* Deletes should never happen under memory pressure anymore. */ |
1331 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) |
1332 | return SHOULD_DEFER_EVICTION; |
1333 | |
1334 | /* Must not read inode block until block type has been verified */ |
1335 | ret = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh); |
1336 | if (unlikely(ret)) { |
1337 | glock_clear_object(gl: ip->i_iopen_gh.gh_gl, object: ip); |
1338 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; |
1339 | gfs2_glock_dq_uninit(gh: &ip->i_iopen_gh); |
1340 | return SHOULD_DEFER_EVICTION; |
1341 | } |
1342 | |
1343 | if (gfs2_inode_already_deleted(gl: ip->i_gl, generation: ip->i_no_formal_ino)) |
1344 | return SHOULD_NOT_DELETE_DINODE; |
1345 | ret = gfs2_check_blk_type(sdp, no_addr: ip->i_no_addr, GFS2_BLKST_UNLINKED); |
1346 | if (ret) |
1347 | return SHOULD_NOT_DELETE_DINODE; |
1348 | |
1349 | ret = gfs2_instantiate(gh); |
1350 | if (ret) |
1351 | return SHOULD_NOT_DELETE_DINODE; |
1352 | |
1353 | /* |
1354 | * The inode may have been recreated in the meantime. |
1355 | */ |
1356 | if (inode->i_nlink) |
1357 | return SHOULD_NOT_DELETE_DINODE; |
1358 | |
1359 | should_delete: |
1360 | if (gfs2_holder_initialized(gh: &ip->i_iopen_gh) && |
1361 | test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { |
1362 | if (!gfs2_upgrade_iopen_glock(inode)) { |
1363 | gfs2_holder_uninit(gh: &ip->i_iopen_gh); |
1364 | return SHOULD_NOT_DELETE_DINODE; |
1365 | } |
1366 | } |
1367 | return SHOULD_DELETE_DINODE; |
1368 | } |
1369 | |
1370 | /** |
1371 | * evict_unlinked_inode - delete the pieces of an unlinked evicted inode |
1372 | * @inode: The inode to evict |
1373 | */ |
1374 | static int evict_unlinked_inode(struct inode *inode) |
1375 | { |
1376 | struct gfs2_inode *ip = GFS2_I(inode); |
1377 | int ret; |
1378 | |
1379 | if (S_ISDIR(inode->i_mode) && |
1380 | (ip->i_diskflags & GFS2_DIF_EXHASH)) { |
1381 | ret = gfs2_dir_exhash_dealloc(dip: ip); |
1382 | if (ret) |
1383 | goto out; |
1384 | } |
1385 | |
1386 | if (ip->i_eattr) { |
1387 | ret = gfs2_ea_dealloc(ip); |
1388 | if (ret) |
1389 | goto out; |
1390 | } |
1391 | |
1392 | if (!gfs2_is_stuffed(ip)) { |
1393 | ret = gfs2_file_dealloc(ip); |
1394 | if (ret) |
1395 | goto out; |
1396 | } |
1397 | |
1398 | /* |
1399 | * As soon as we clear the bitmap for the dinode, gfs2_create_inode() |
1400 | * can get called to recreate it, or even gfs2_inode_lookup() if the |
1401 | * inode was recreated on another node in the meantime. |
1402 | * |
1403 | * However, inserting the new inode into the inode hash table will not |
1404 | * succeed until the old inode is removed, and that only happens after |
1405 | * ->evict_inode() returns. The new inode is attached to its inode and |
1406 | * iopen glocks after inserting it into the inode hash table, so at |
1407 | * that point we can be sure that both glocks are unused. |
1408 | */ |
1409 | |
1410 | ret = gfs2_dinode_dealloc(ip); |
1411 | if (!ret && ip->i_gl) |
1412 | gfs2_inode_remember_delete(gl: ip->i_gl, generation: ip->i_no_formal_ino); |
1413 | |
1414 | out: |
1415 | return ret; |
1416 | } |
1417 | |
1418 | /* |
1419 | * evict_linked_inode - evict an inode whose dinode has not been unlinked |
1420 | * @inode: The inode to evict |
1421 | */ |
1422 | static int evict_linked_inode(struct inode *inode) |
1423 | { |
1424 | struct super_block *sb = inode->i_sb; |
1425 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1426 | struct gfs2_inode *ip = GFS2_I(inode); |
1427 | struct address_space *metamapping; |
1428 | int ret; |
1429 | |
1430 | gfs2_log_flush(sdp, gl: ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | |
1431 | GFS2_LFC_EVICT_INODE); |
1432 | metamapping = gfs2_glock2aspace(gl: ip->i_gl); |
1433 | if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { |
1434 | filemap_fdatawrite(metamapping); |
1435 | filemap_fdatawait(mapping: metamapping); |
1436 | } |
1437 | write_inode_now(inode, sync: 1); |
1438 | gfs2_ail_flush(gl: ip->i_gl, fsync: 0); |
1439 | |
1440 | ret = gfs2_trans_begin(sdp, blocks: 0, revokes: sdp->sd_jdesc->jd_blocks); |
1441 | if (ret) |
1442 | return ret; |
1443 | |
1444 | /* Needs to be done before glock release & also in a transaction */ |
1445 | truncate_inode_pages(&inode->i_data, 0); |
1446 | truncate_inode_pages(metamapping, 0); |
1447 | gfs2_trans_end(sdp); |
1448 | return 0; |
1449 | } |
1450 | |
1451 | /** |
1452 | * gfs2_evict_inode - Remove an inode from cache |
1453 | * @inode: The inode to evict |
1454 | * |
1455 | * There are three cases to consider: |
1456 | * 1. i_nlink == 0, we are final opener (and must deallocate) |
1457 | * 2. i_nlink == 0, we are not the final opener (and cannot deallocate) |
1458 | * 3. i_nlink > 0 |
1459 | * |
1460 | * If the fs is read only, then we have to treat all cases as per #3 |
1461 | * since we are unable to do any deallocation. The inode will be |
1462 | * deallocated by the next read/write node to attempt an allocation |
1463 | * in the same resource group |
1464 | * |
1465 | * We have to (at the moment) hold the inodes main lock to cover |
1466 | * the gap between unlocking the shared lock on the iopen lock and |
1467 | * taking the exclusive lock. I'd rather do a shared -> exclusive |
1468 | * conversion on the iopen lock, but we can change that later. This |
1469 | * is safe, just less efficient. |
1470 | */ |
1471 | |
1472 | static void gfs2_evict_inode(struct inode *inode) |
1473 | { |
1474 | struct super_block *sb = inode->i_sb; |
1475 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1476 | struct gfs2_inode *ip = GFS2_I(inode); |
1477 | struct gfs2_holder gh; |
1478 | int ret; |
1479 | |
1480 | if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) |
1481 | goto out; |
1482 | |
1483 | /* |
1484 | * In case of an incomplete mount, gfs2_evict_inode() may be called for |
1485 | * system files without having an active journal to write to. In that |
1486 | * case, skip the filesystem evict. |
1487 | */ |
1488 | if (!sdp->sd_jdesc) |
1489 | goto out; |
1490 | |
1491 | gfs2_holder_mark_uninitialized(gh: &gh); |
1492 | ret = evict_should_delete(inode, gh: &gh); |
1493 | if (ret == SHOULD_DEFER_EVICTION) |
1494 | goto out; |
1495 | if (ret == SHOULD_DELETE_DINODE) |
1496 | ret = evict_unlinked_inode(inode); |
1497 | else |
1498 | ret = evict_linked_inode(inode); |
1499 | |
1500 | if (gfs2_rs_active(rs: &ip->i_res)) |
1501 | gfs2_rs_deltree(rs: &ip->i_res); |
1502 | |
1503 | if (gfs2_holder_initialized(gh: &gh)) |
1504 | gfs2_glock_dq_uninit(gh: &gh); |
1505 | if (ret && ret != GLR_TRYFAILED && ret != -EROFS) |
1506 | fs_warn(sdp, "gfs2_evict_inode: %d\n" , ret); |
1507 | out: |
1508 | truncate_inode_pages_final(&inode->i_data); |
1509 | if (ip->i_qadata) |
1510 | gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); |
1511 | gfs2_rs_deltree(rs: &ip->i_res); |
1512 | gfs2_ordered_del_inode(ip); |
1513 | clear_inode(inode); |
1514 | gfs2_dir_hash_inval(ip); |
1515 | if (gfs2_holder_initialized(gh: &ip->i_iopen_gh)) { |
1516 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; |
1517 | |
1518 | glock_clear_object(gl, object: ip); |
1519 | gfs2_glock_hold(gl); |
1520 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; |
1521 | gfs2_glock_dq_uninit(gh: &ip->i_iopen_gh); |
1522 | gfs2_glock_put_eventually(gl); |
1523 | } |
1524 | if (ip->i_gl) { |
1525 | glock_clear_object(gl: ip->i_gl, object: ip); |
1526 | wait_on_bit_io(word: &ip->i_flags, bit: GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); |
1527 | gfs2_glock_add_to_lru(gl: ip->i_gl); |
1528 | gfs2_glock_put_eventually(gl: ip->i_gl); |
1529 | rcu_assign_pointer(ip->i_gl, NULL); |
1530 | } |
1531 | } |
1532 | |
1533 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
1534 | { |
1535 | struct gfs2_inode *ip; |
1536 | |
1537 | ip = alloc_inode_sb(sb, cache: gfs2_inode_cachep, GFP_KERNEL); |
1538 | if (!ip) |
1539 | return NULL; |
1540 | ip->i_no_addr = 0; |
1541 | ip->i_flags = 0; |
1542 | ip->i_gl = NULL; |
1543 | gfs2_holder_mark_uninitialized(gh: &ip->i_iopen_gh); |
1544 | memset(&ip->i_res, 0, sizeof(ip->i_res)); |
1545 | RB_CLEAR_NODE(&ip->i_res.rs_node); |
1546 | ip->i_rahead = 0; |
1547 | return &ip->i_inode; |
1548 | } |
1549 | |
1550 | static void gfs2_free_inode(struct inode *inode) |
1551 | { |
1552 | kmem_cache_free(s: gfs2_inode_cachep, objp: GFS2_I(inode)); |
1553 | } |
1554 | |
1555 | void free_local_statfs_inodes(struct gfs2_sbd *sdp) |
1556 | { |
1557 | struct local_statfs_inode *lsi, *safe; |
1558 | |
1559 | /* Run through the statfs inodes list to iput and free memory */ |
1560 | list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) { |
1561 | if (lsi->si_jid == sdp->sd_jdesc->jd_jid) |
1562 | sdp->sd_sc_inode = NULL; /* belongs to this node */ |
1563 | if (lsi->si_sc_inode) |
1564 | iput(lsi->si_sc_inode); |
1565 | list_del(entry: &lsi->si_list); |
1566 | kfree(objp: lsi); |
1567 | } |
1568 | } |
1569 | |
1570 | struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, |
1571 | unsigned int index) |
1572 | { |
1573 | struct local_statfs_inode *lsi; |
1574 | |
1575 | /* Return the local (per node) statfs inode in the |
1576 | * sdp->sd_sc_inodes_list corresponding to the 'index'. */ |
1577 | list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) { |
1578 | if (lsi->si_jid == index) |
1579 | return lsi->si_sc_inode; |
1580 | } |
1581 | return NULL; |
1582 | } |
1583 | |
1584 | const struct super_operations gfs2_super_ops = { |
1585 | .alloc_inode = gfs2_alloc_inode, |
1586 | .free_inode = gfs2_free_inode, |
1587 | .write_inode = gfs2_write_inode, |
1588 | .dirty_inode = gfs2_dirty_inode, |
1589 | .evict_inode = gfs2_evict_inode, |
1590 | .put_super = gfs2_put_super, |
1591 | .sync_fs = gfs2_sync_fs, |
1592 | .freeze_super = gfs2_freeze_super, |
1593 | .freeze_fs = gfs2_freeze_fs, |
1594 | .thaw_super = gfs2_thaw_super, |
1595 | .statfs = gfs2_statfs, |
1596 | .drop_inode = gfs2_drop_inode, |
1597 | .show_options = gfs2_show_options, |
1598 | }; |
1599 | |
1600 | |