1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
4 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. |
5 | */ |
6 | |
7 | #include <linux/slab.h> |
8 | #include <linux/spinlock.h> |
9 | #include <linux/compat.h> |
10 | #include <linux/completion.h> |
11 | #include <linux/buffer_head.h> |
12 | #include <linux/pagemap.h> |
13 | #include <linux/uio.h> |
14 | #include <linux/blkdev.h> |
15 | #include <linux/mm.h> |
16 | #include <linux/mount.h> |
17 | #include <linux/fs.h> |
18 | #include <linux/filelock.h> |
19 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/falloc.h> |
21 | #include <linux/swap.h> |
22 | #include <linux/crc32.h> |
23 | #include <linux/writeback.h> |
24 | #include <linux/uaccess.h> |
25 | #include <linux/dlm.h> |
26 | #include <linux/dlm_plock.h> |
27 | #include <linux/delay.h> |
28 | #include <linux/backing-dev.h> |
29 | #include <linux/fileattr.h> |
30 | |
31 | #include "gfs2.h" |
32 | #include "incore.h" |
33 | #include "bmap.h" |
34 | #include "aops.h" |
35 | #include "dir.h" |
36 | #include "glock.h" |
37 | #include "glops.h" |
38 | #include "inode.h" |
39 | #include "log.h" |
40 | #include "meta_io.h" |
41 | #include "quota.h" |
42 | #include "rgrp.h" |
43 | #include "trans.h" |
44 | #include "util.h" |
45 | |
46 | /** |
47 | * gfs2_llseek - seek to a location in a file |
48 | * @file: the file |
49 | * @offset: the offset |
50 | * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) |
51 | * |
52 | * SEEK_END requires the glock for the file because it references the |
53 | * file's size. |
54 | * |
55 | * Returns: The new offset, or errno |
56 | */ |
57 | |
58 | static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) |
59 | { |
60 | struct gfs2_inode *ip = GFS2_I(inode: file->f_mapping->host); |
61 | struct gfs2_holder i_gh; |
62 | loff_t error; |
63 | |
64 | switch (whence) { |
65 | case SEEK_END: |
66 | error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
67 | gh: &i_gh); |
68 | if (!error) { |
69 | error = generic_file_llseek(file, offset, whence); |
70 | gfs2_glock_dq_uninit(gh: &i_gh); |
71 | } |
72 | break; |
73 | |
74 | case SEEK_DATA: |
75 | error = gfs2_seek_data(file, offset); |
76 | break; |
77 | |
78 | case SEEK_HOLE: |
79 | error = gfs2_seek_hole(file, offset); |
80 | break; |
81 | |
82 | case SEEK_CUR: |
83 | case SEEK_SET: |
84 | /* |
85 | * These don't reference inode->i_size and don't depend on the |
86 | * block mapping, so we don't need the glock. |
87 | */ |
88 | error = generic_file_llseek(file, offset, whence); |
89 | break; |
90 | default: |
91 | error = -EINVAL; |
92 | } |
93 | |
94 | return error; |
95 | } |
96 | |
97 | /** |
98 | * gfs2_readdir - Iterator for a directory |
99 | * @file: The directory to read from |
100 | * @ctx: What to feed directory entries to |
101 | * |
102 | * Returns: errno |
103 | */ |
104 | |
105 | static int gfs2_readdir(struct file *file, struct dir_context *ctx) |
106 | { |
107 | struct inode *dir = file->f_mapping->host; |
108 | struct gfs2_inode *dip = GFS2_I(inode: dir); |
109 | struct gfs2_holder d_gh; |
110 | int error; |
111 | |
112 | error = gfs2_glock_nq_init(gl: dip->i_gl, LM_ST_SHARED, flags: 0, gh: &d_gh); |
113 | if (error) |
114 | return error; |
115 | |
116 | error = gfs2_dir_read(inode: dir, ctx, f_ra: &file->f_ra); |
117 | |
118 | gfs2_glock_dq_uninit(gh: &d_gh); |
119 | |
120 | return error; |
121 | } |
122 | |
123 | /* |
124 | * struct fsflag_gfs2flag |
125 | * |
126 | * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories, |
127 | * and to GFS2_DIF_JDATA for non-directories. |
128 | */ |
129 | static struct { |
130 | u32 fsflag; |
131 | u32 gfsflag; |
132 | } fsflag_gfs2flag[] = { |
133 | {FS_SYNC_FL, GFS2_DIF_SYNC}, |
134 | {FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE}, |
135 | {FS_APPEND_FL, GFS2_DIF_APPENDONLY}, |
136 | {FS_NOATIME_FL, GFS2_DIF_NOATIME}, |
137 | {FS_INDEX_FL, GFS2_DIF_EXHASH}, |
138 | {FS_TOPDIR_FL, GFS2_DIF_TOPDIR}, |
139 | {FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA}, |
140 | }; |
141 | |
142 | static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags) |
143 | { |
144 | int i; |
145 | u32 fsflags = 0; |
146 | |
147 | if (S_ISDIR(inode->i_mode)) |
148 | gfsflags &= ~GFS2_DIF_JDATA; |
149 | else |
150 | gfsflags &= ~GFS2_DIF_INHERIT_JDATA; |
151 | |
152 | for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) |
153 | if (gfsflags & fsflag_gfs2flag[i].gfsflag) |
154 | fsflags |= fsflag_gfs2flag[i].fsflag; |
155 | return fsflags; |
156 | } |
157 | |
158 | int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa) |
159 | { |
160 | struct inode *inode = d_inode(dentry); |
161 | struct gfs2_inode *ip = GFS2_I(inode); |
162 | struct gfs2_holder gh; |
163 | int error; |
164 | u32 fsflags; |
165 | |
166 | if (d_is_special(dentry)) |
167 | return -ENOTTY; |
168 | |
169 | gfs2_holder_init(gl: ip->i_gl, LM_ST_SHARED, flags: 0, gh: &gh); |
170 | error = gfs2_glock_nq(gh: &gh); |
171 | if (error) |
172 | goto out_uninit; |
173 | |
174 | fsflags = gfs2_gfsflags_to_fsflags(inode, gfsflags: ip->i_diskflags); |
175 | |
176 | fileattr_fill_flags(fa, flags: fsflags); |
177 | |
178 | gfs2_glock_dq(gh: &gh); |
179 | out_uninit: |
180 | gfs2_holder_uninit(gh: &gh); |
181 | return error; |
182 | } |
183 | |
184 | void gfs2_set_inode_flags(struct inode *inode) |
185 | { |
186 | struct gfs2_inode *ip = GFS2_I(inode); |
187 | unsigned int flags = inode->i_flags; |
188 | |
189 | flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); |
190 | if ((ip->i_eattr == 0) && !is_sxid(mode: inode->i_mode)) |
191 | flags |= S_NOSEC; |
192 | if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) |
193 | flags |= S_IMMUTABLE; |
194 | if (ip->i_diskflags & GFS2_DIF_APPENDONLY) |
195 | flags |= S_APPEND; |
196 | if (ip->i_diskflags & GFS2_DIF_NOATIME) |
197 | flags |= S_NOATIME; |
198 | if (ip->i_diskflags & GFS2_DIF_SYNC) |
199 | flags |= S_SYNC; |
200 | inode->i_flags = flags; |
201 | } |
202 | |
203 | /* Flags that can be set by user space */ |
204 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ |
205 | GFS2_DIF_IMMUTABLE| \ |
206 | GFS2_DIF_APPENDONLY| \ |
207 | GFS2_DIF_NOATIME| \ |
208 | GFS2_DIF_SYNC| \ |
209 | GFS2_DIF_TOPDIR| \ |
210 | GFS2_DIF_INHERIT_JDATA) |
211 | |
212 | /** |
213 | * do_gfs2_set_flags - set flags on an inode |
214 | * @inode: The inode |
215 | * @reqflags: The flags to set |
216 | * @mask: Indicates which flags are valid |
217 | * |
218 | */ |
219 | static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) |
220 | { |
221 | struct gfs2_inode *ip = GFS2_I(inode); |
222 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
223 | struct buffer_head *bh; |
224 | struct gfs2_holder gh; |
225 | int error; |
226 | u32 new_flags, flags; |
227 | |
228 | error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, flags: 0, gh: &gh); |
229 | if (error) |
230 | return error; |
231 | |
232 | error = 0; |
233 | flags = ip->i_diskflags; |
234 | new_flags = (flags & ~mask) | (reqflags & mask); |
235 | if ((new_flags ^ flags) == 0) |
236 | goto out; |
237 | |
238 | if (!IS_IMMUTABLE(inode)) { |
239 | error = gfs2_permission(idmap: &nop_mnt_idmap, inode, MAY_WRITE); |
240 | if (error) |
241 | goto out; |
242 | } |
243 | if ((flags ^ new_flags) & GFS2_DIF_JDATA) { |
244 | if (new_flags & GFS2_DIF_JDATA) |
245 | gfs2_log_flush(sdp, gl: ip->i_gl, |
246 | GFS2_LOG_HEAD_FLUSH_NORMAL | |
247 | GFS2_LFC_SET_FLAGS); |
248 | error = filemap_fdatawrite(inode->i_mapping); |
249 | if (error) |
250 | goto out; |
251 | error = filemap_fdatawait(mapping: inode->i_mapping); |
252 | if (error) |
253 | goto out; |
254 | if (new_flags & GFS2_DIF_JDATA) |
255 | gfs2_ordered_del_inode(ip); |
256 | } |
257 | error = gfs2_trans_begin(sdp, RES_DINODE, revokes: 0); |
258 | if (error) |
259 | goto out; |
260 | error = gfs2_meta_inode_buffer(ip, bhp: &bh); |
261 | if (error) |
262 | goto out_trans_end; |
263 | inode_set_ctime_current(inode); |
264 | gfs2_trans_add_meta(gl: ip->i_gl, bh); |
265 | ip->i_diskflags = new_flags; |
266 | gfs2_dinode_out(ip, buf: bh->b_data); |
267 | brelse(bh); |
268 | gfs2_set_inode_flags(inode); |
269 | gfs2_set_aops(inode); |
270 | out_trans_end: |
271 | gfs2_trans_end(sdp); |
272 | out: |
273 | gfs2_glock_dq_uninit(gh: &gh); |
274 | return error; |
275 | } |
276 | |
277 | int gfs2_fileattr_set(struct mnt_idmap *idmap, |
278 | struct dentry *dentry, struct fileattr *fa) |
279 | { |
280 | struct inode *inode = d_inode(dentry); |
281 | u32 fsflags = fa->flags, gfsflags = 0; |
282 | u32 mask; |
283 | int i; |
284 | |
285 | if (d_is_special(dentry)) |
286 | return -ENOTTY; |
287 | |
288 | if (fileattr_has_fsx(fa)) |
289 | return -EOPNOTSUPP; |
290 | |
291 | for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) { |
292 | if (fsflags & fsflag_gfs2flag[i].fsflag) { |
293 | fsflags &= ~fsflag_gfs2flag[i].fsflag; |
294 | gfsflags |= fsflag_gfs2flag[i].gfsflag; |
295 | } |
296 | } |
297 | if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET) |
298 | return -EINVAL; |
299 | |
300 | mask = GFS2_FLAGS_USER_SET; |
301 | if (S_ISDIR(inode->i_mode)) { |
302 | mask &= ~GFS2_DIF_JDATA; |
303 | } else { |
304 | /* The GFS2_DIF_TOPDIR flag is only valid for directories. */ |
305 | if (gfsflags & GFS2_DIF_TOPDIR) |
306 | return -EINVAL; |
307 | mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); |
308 | } |
309 | |
310 | return do_gfs2_set_flags(inode, reqflags: gfsflags, mask); |
311 | } |
312 | |
313 | static int gfs2_getlabel(struct file *filp, char __user *label) |
314 | { |
315 | struct inode *inode = file_inode(f: filp); |
316 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
317 | |
318 | if (copy_to_user(to: label, from: sdp->sd_sb.sb_locktable, GFS2_LOCKNAME_LEN)) |
319 | return -EFAULT; |
320 | |
321 | return 0; |
322 | } |
323 | |
324 | static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
325 | { |
326 | switch(cmd) { |
327 | case FITRIM: |
328 | return gfs2_fitrim(filp, argp: (void __user *)arg); |
329 | case FS_IOC_GETFSLABEL: |
330 | return gfs2_getlabel(filp, label: (char __user *)arg); |
331 | } |
332 | |
333 | return -ENOTTY; |
334 | } |
335 | |
336 | #ifdef CONFIG_COMPAT |
337 | static long gfs2_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
338 | { |
339 | switch(cmd) { |
340 | /* Keep this list in sync with gfs2_ioctl */ |
341 | case FITRIM: |
342 | case FS_IOC_GETFSLABEL: |
343 | break; |
344 | default: |
345 | return -ENOIOCTLCMD; |
346 | } |
347 | |
348 | return gfs2_ioctl(filp, cmd, arg: (unsigned long)compat_ptr(uptr: arg)); |
349 | } |
350 | #else |
351 | #define gfs2_compat_ioctl NULL |
352 | #endif |
353 | |
354 | /** |
355 | * gfs2_size_hint - Give a hint to the size of a write request |
356 | * @filep: The struct file |
357 | * @offset: The file offset of the write |
358 | * @size: The length of the write |
359 | * |
360 | * When we are about to do a write, this function records the total |
361 | * write size in order to provide a suitable hint to the lower layers |
362 | * about how many blocks will be required. |
363 | * |
364 | */ |
365 | |
366 | static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) |
367 | { |
368 | struct inode *inode = file_inode(f: filep); |
369 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
370 | struct gfs2_inode *ip = GFS2_I(inode); |
371 | size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; |
372 | int hint = min_t(size_t, INT_MAX, blks); |
373 | |
374 | if (hint > atomic_read(v: &ip->i_sizehint)) |
375 | atomic_set(v: &ip->i_sizehint, i: hint); |
376 | } |
377 | |
378 | /** |
379 | * gfs2_allocate_page_backing - Allocate blocks for a write fault |
380 | * @page: The (locked) page to allocate backing for |
381 | * @length: Size of the allocation |
382 | * |
383 | * We try to allocate all the blocks required for the page in one go. This |
384 | * might fail for various reasons, so we keep trying until all the blocks to |
385 | * back this page are allocated. If some of the blocks are already allocated, |
386 | * that is ok too. |
387 | */ |
388 | static int gfs2_allocate_page_backing(struct page *page, unsigned int length) |
389 | { |
390 | u64 pos = page_offset(page); |
391 | |
392 | do { |
393 | struct iomap iomap = { }; |
394 | |
395 | if (gfs2_iomap_alloc(inode: page->mapping->host, pos, length, iomap: &iomap)) |
396 | return -EIO; |
397 | |
398 | if (length < iomap.length) |
399 | iomap.length = length; |
400 | length -= iomap.length; |
401 | pos += iomap.length; |
402 | } while (length > 0); |
403 | |
404 | return 0; |
405 | } |
406 | |
407 | /** |
408 | * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable |
409 | * @vmf: The virtual memory fault containing the page to become writable |
410 | * |
411 | * When the page becomes writable, we need to ensure that we have |
412 | * blocks allocated on disk to back that page. |
413 | */ |
414 | |
415 | static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) |
416 | { |
417 | struct page *page = vmf->page; |
418 | struct inode *inode = file_inode(f: vmf->vma->vm_file); |
419 | struct gfs2_inode *ip = GFS2_I(inode); |
420 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
421 | struct gfs2_alloc_parms ap = {}; |
422 | u64 offset = page_offset(page); |
423 | unsigned int data_blocks, ind_blocks, rblocks; |
424 | vm_fault_t ret = VM_FAULT_LOCKED; |
425 | struct gfs2_holder gh; |
426 | unsigned int length; |
427 | loff_t size; |
428 | int err; |
429 | |
430 | sb_start_pagefault(sb: inode->i_sb); |
431 | |
432 | gfs2_holder_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, flags: 0, gh: &gh); |
433 | err = gfs2_glock_nq(gh: &gh); |
434 | if (err) { |
435 | ret = vmf_fs_error(err); |
436 | goto out_uninit; |
437 | } |
438 | |
439 | /* Check page index against inode size */ |
440 | size = i_size_read(inode); |
441 | if (offset >= size) { |
442 | ret = VM_FAULT_SIGBUS; |
443 | goto out_unlock; |
444 | } |
445 | |
446 | /* Update file times before taking page lock */ |
447 | file_update_time(file: vmf->vma->vm_file); |
448 | |
449 | /* page is wholly or partially inside EOF */ |
450 | if (size - offset < PAGE_SIZE) |
451 | length = size - offset; |
452 | else |
453 | length = PAGE_SIZE; |
454 | |
455 | gfs2_size_hint(filep: vmf->vma->vm_file, offset, size: length); |
456 | |
457 | set_bit(nr: GLF_DIRTY, addr: &ip->i_gl->gl_flags); |
458 | set_bit(nr: GIF_SW_PAGED, addr: &ip->i_flags); |
459 | |
460 | /* |
461 | * iomap_writepage / iomap_writepages currently don't support inline |
462 | * files, so always unstuff here. |
463 | */ |
464 | |
465 | if (!gfs2_is_stuffed(ip) && |
466 | !gfs2_write_alloc_required(ip, offset, len: length)) { |
467 | lock_page(page); |
468 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { |
469 | ret = VM_FAULT_NOPAGE; |
470 | unlock_page(page); |
471 | } |
472 | goto out_unlock; |
473 | } |
474 | |
475 | err = gfs2_rindex_update(sdp); |
476 | if (err) { |
477 | ret = vmf_fs_error(err); |
478 | goto out_unlock; |
479 | } |
480 | |
481 | gfs2_write_calc_reserv(ip, len: length, data_blocks: &data_blocks, ind_blocks: &ind_blocks); |
482 | ap.target = data_blocks + ind_blocks; |
483 | err = gfs2_quota_lock_check(ip, ap: &ap); |
484 | if (err) { |
485 | ret = vmf_fs_error(err); |
486 | goto out_unlock; |
487 | } |
488 | err = gfs2_inplace_reserve(ip, ap: &ap); |
489 | if (err) { |
490 | ret = vmf_fs_error(err); |
491 | goto out_quota_unlock; |
492 | } |
493 | |
494 | rblocks = RES_DINODE + ind_blocks; |
495 | if (gfs2_is_jdata(ip)) |
496 | rblocks += data_blocks ? data_blocks : 1; |
497 | if (ind_blocks || data_blocks) { |
498 | rblocks += RES_STATFS + RES_QUOTA; |
499 | rblocks += gfs2_rg_blocks(ip, requested: data_blocks + ind_blocks); |
500 | } |
501 | err = gfs2_trans_begin(sdp, blocks: rblocks, revokes: 0); |
502 | if (err) { |
503 | ret = vmf_fs_error(err); |
504 | goto out_trans_fail; |
505 | } |
506 | |
507 | /* Unstuff, if required, and allocate backing blocks for page */ |
508 | if (gfs2_is_stuffed(ip)) { |
509 | err = gfs2_unstuff_dinode(ip); |
510 | if (err) { |
511 | ret = vmf_fs_error(err); |
512 | goto out_trans_end; |
513 | } |
514 | } |
515 | |
516 | lock_page(page); |
517 | /* If truncated, we must retry the operation, we may have raced |
518 | * with the glock demotion code. |
519 | */ |
520 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { |
521 | ret = VM_FAULT_NOPAGE; |
522 | goto out_page_locked; |
523 | } |
524 | |
525 | err = gfs2_allocate_page_backing(page, length); |
526 | if (err) |
527 | ret = vmf_fs_error(err); |
528 | |
529 | out_page_locked: |
530 | if (ret != VM_FAULT_LOCKED) |
531 | unlock_page(page); |
532 | out_trans_end: |
533 | gfs2_trans_end(sdp); |
534 | out_trans_fail: |
535 | gfs2_inplace_release(ip); |
536 | out_quota_unlock: |
537 | gfs2_quota_unlock(ip); |
538 | out_unlock: |
539 | gfs2_glock_dq(gh: &gh); |
540 | out_uninit: |
541 | gfs2_holder_uninit(gh: &gh); |
542 | if (ret == VM_FAULT_LOCKED) { |
543 | set_page_dirty(page); |
544 | wait_for_stable_page(page); |
545 | } |
546 | sb_end_pagefault(sb: inode->i_sb); |
547 | return ret; |
548 | } |
549 | |
550 | static vm_fault_t gfs2_fault(struct vm_fault *vmf) |
551 | { |
552 | struct inode *inode = file_inode(f: vmf->vma->vm_file); |
553 | struct gfs2_inode *ip = GFS2_I(inode); |
554 | struct gfs2_holder gh; |
555 | vm_fault_t ret; |
556 | int err; |
557 | |
558 | gfs2_holder_init(gl: ip->i_gl, LM_ST_SHARED, flags: 0, gh: &gh); |
559 | err = gfs2_glock_nq(gh: &gh); |
560 | if (err) { |
561 | ret = vmf_fs_error(err); |
562 | goto out_uninit; |
563 | } |
564 | ret = filemap_fault(vmf); |
565 | gfs2_glock_dq(gh: &gh); |
566 | out_uninit: |
567 | gfs2_holder_uninit(gh: &gh); |
568 | return ret; |
569 | } |
570 | |
571 | static const struct vm_operations_struct gfs2_vm_ops = { |
572 | .fault = gfs2_fault, |
573 | .map_pages = filemap_map_pages, |
574 | .page_mkwrite = gfs2_page_mkwrite, |
575 | }; |
576 | |
577 | /** |
578 | * gfs2_mmap |
579 | * @file: The file to map |
580 | * @vma: The VMA which described the mapping |
581 | * |
582 | * There is no need to get a lock here unless we should be updating |
583 | * atime. We ignore any locking errors since the only consequence is |
584 | * a missed atime update (which will just be deferred until later). |
585 | * |
586 | * Returns: 0 |
587 | */ |
588 | |
589 | static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) |
590 | { |
591 | struct gfs2_inode *ip = GFS2_I(inode: file->f_mapping->host); |
592 | |
593 | if (!(file->f_flags & O_NOATIME) && |
594 | !IS_NOATIME(&ip->i_inode)) { |
595 | struct gfs2_holder i_gh; |
596 | int error; |
597 | |
598 | error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
599 | gh: &i_gh); |
600 | if (error) |
601 | return error; |
602 | /* grab lock to update inode */ |
603 | gfs2_glock_dq_uninit(gh: &i_gh); |
604 | file_accessed(file); |
605 | } |
606 | vma->vm_ops = &gfs2_vm_ops; |
607 | |
608 | return 0; |
609 | } |
610 | |
611 | /** |
612 | * gfs2_open_common - This is common to open and atomic_open |
613 | * @inode: The inode being opened |
614 | * @file: The file being opened |
615 | * |
616 | * This maybe called under a glock or not depending upon how it has |
617 | * been called. We must always be called under a glock for regular |
618 | * files, however. For other file types, it does not matter whether |
619 | * we hold the glock or not. |
620 | * |
621 | * Returns: Error code or 0 for success |
622 | */ |
623 | |
624 | int gfs2_open_common(struct inode *inode, struct file *file) |
625 | { |
626 | struct gfs2_file *fp; |
627 | int ret; |
628 | |
629 | if (S_ISREG(inode->i_mode)) { |
630 | ret = generic_file_open(inode, filp: file); |
631 | if (ret) |
632 | return ret; |
633 | |
634 | if (!gfs2_is_jdata(ip: GFS2_I(inode))) |
635 | file->f_mode |= FMODE_CAN_ODIRECT; |
636 | } |
637 | |
638 | fp = kzalloc(size: sizeof(struct gfs2_file), GFP_NOFS); |
639 | if (!fp) |
640 | return -ENOMEM; |
641 | |
642 | mutex_init(&fp->f_fl_mutex); |
643 | |
644 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); |
645 | file->private_data = fp; |
646 | if (file->f_mode & FMODE_WRITE) { |
647 | ret = gfs2_qa_get(ip: GFS2_I(inode)); |
648 | if (ret) |
649 | goto fail; |
650 | } |
651 | return 0; |
652 | |
653 | fail: |
654 | kfree(objp: file->private_data); |
655 | file->private_data = NULL; |
656 | return ret; |
657 | } |
658 | |
659 | /** |
660 | * gfs2_open - open a file |
661 | * @inode: the inode to open |
662 | * @file: the struct file for this opening |
663 | * |
664 | * After atomic_open, this function is only used for opening files |
665 | * which are already cached. We must still get the glock for regular |
666 | * files to ensure that we have the file size uptodate for the large |
667 | * file check which is in the common code. That is only an issue for |
668 | * regular files though. |
669 | * |
670 | * Returns: errno |
671 | */ |
672 | |
673 | static int gfs2_open(struct inode *inode, struct file *file) |
674 | { |
675 | struct gfs2_inode *ip = GFS2_I(inode); |
676 | struct gfs2_holder i_gh; |
677 | int error; |
678 | bool need_unlock = false; |
679 | |
680 | if (S_ISREG(ip->i_inode.i_mode)) { |
681 | error = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
682 | gh: &i_gh); |
683 | if (error) |
684 | return error; |
685 | need_unlock = true; |
686 | } |
687 | |
688 | error = gfs2_open_common(inode, file); |
689 | |
690 | if (need_unlock) |
691 | gfs2_glock_dq_uninit(gh: &i_gh); |
692 | |
693 | return error; |
694 | } |
695 | |
696 | /** |
697 | * gfs2_release - called to close a struct file |
698 | * @inode: the inode the struct file belongs to |
699 | * @file: the struct file being closed |
700 | * |
701 | * Returns: errno |
702 | */ |
703 | |
704 | static int gfs2_release(struct inode *inode, struct file *file) |
705 | { |
706 | struct gfs2_inode *ip = GFS2_I(inode); |
707 | |
708 | kfree(objp: file->private_data); |
709 | file->private_data = NULL; |
710 | |
711 | if (file->f_mode & FMODE_WRITE) { |
712 | if (gfs2_rs_active(rs: &ip->i_res)) |
713 | gfs2_rs_delete(ip); |
714 | gfs2_qa_put(ip); |
715 | } |
716 | return 0; |
717 | } |
718 | |
719 | /** |
720 | * gfs2_fsync - sync the dirty data for a file (across the cluster) |
721 | * @file: the file that points to the dentry |
722 | * @start: the start position in the file to sync |
723 | * @end: the end position in the file to sync |
724 | * @datasync: set if we can ignore timestamp changes |
725 | * |
726 | * We split the data flushing here so that we don't wait for the data |
727 | * until after we've also sent the metadata to disk. Note that for |
728 | * data=ordered, we will write & wait for the data at the log flush |
729 | * stage anyway, so this is unlikely to make much of a difference |
730 | * except in the data=writeback case. |
731 | * |
732 | * If the fdatawrite fails due to any reason except -EIO, we will |
733 | * continue the remainder of the fsync, although we'll still report |
734 | * the error at the end. This is to match filemap_write_and_wait_range() |
735 | * behaviour. |
736 | * |
737 | * Returns: errno |
738 | */ |
739 | |
740 | static int gfs2_fsync(struct file *file, loff_t start, loff_t end, |
741 | int datasync) |
742 | { |
743 | struct address_space *mapping = file->f_mapping; |
744 | struct inode *inode = mapping->host; |
745 | int sync_state = inode->i_state & I_DIRTY; |
746 | struct gfs2_inode *ip = GFS2_I(inode); |
747 | int ret = 0, ret1 = 0; |
748 | |
749 | if (mapping->nrpages) { |
750 | ret1 = filemap_fdatawrite_range(mapping, start, end); |
751 | if (ret1 == -EIO) |
752 | return ret1; |
753 | } |
754 | |
755 | if (!gfs2_is_jdata(ip)) |
756 | sync_state &= ~I_DIRTY_PAGES; |
757 | if (datasync) |
758 | sync_state &= ~I_DIRTY_SYNC; |
759 | |
760 | if (sync_state) { |
761 | ret = sync_inode_metadata(inode, wait: 1); |
762 | if (ret) |
763 | return ret; |
764 | if (gfs2_is_jdata(ip)) |
765 | ret = file_write_and_wait(file); |
766 | if (ret) |
767 | return ret; |
768 | gfs2_ail_flush(gl: ip->i_gl, fsync: 1); |
769 | } |
770 | |
771 | if (mapping->nrpages) |
772 | ret = file_fdatawait_range(file, lstart: start, lend: end); |
773 | |
774 | return ret ? ret : ret1; |
775 | } |
776 | |
777 | static inline bool should_fault_in_pages(struct iov_iter *i, |
778 | struct kiocb *iocb, |
779 | size_t *prev_count, |
780 | size_t *window_size) |
781 | { |
782 | size_t count = iov_iter_count(i); |
783 | size_t size, offs; |
784 | |
785 | if (!count) |
786 | return false; |
787 | if (!user_backed_iter(i)) |
788 | return false; |
789 | |
790 | /* |
791 | * Try to fault in multiple pages initially. When that doesn't result |
792 | * in any progress, fall back to a single page. |
793 | */ |
794 | size = PAGE_SIZE; |
795 | offs = offset_in_page(iocb->ki_pos); |
796 | if (*prev_count != count) { |
797 | size_t nr_dirtied; |
798 | |
799 | nr_dirtied = max(current->nr_dirtied_pause - |
800 | current->nr_dirtied, 8); |
801 | size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT); |
802 | } |
803 | |
804 | *prev_count = count; |
805 | *window_size = size - offs; |
806 | return true; |
807 | } |
808 | |
809 | static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, |
810 | struct gfs2_holder *gh) |
811 | { |
812 | struct file *file = iocb->ki_filp; |
813 | struct gfs2_inode *ip = GFS2_I(inode: file->f_mapping->host); |
814 | size_t prev_count = 0, window_size = 0; |
815 | size_t read = 0; |
816 | ssize_t ret; |
817 | |
818 | /* |
819 | * In this function, we disable page faults when we're holding the |
820 | * inode glock while doing I/O. If a page fault occurs, we indicate |
821 | * that the inode glock may be dropped, fault in the pages manually, |
822 | * and retry. |
823 | * |
824 | * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger |
825 | * physical as well as manual page faults, and we need to disable both |
826 | * kinds. |
827 | * |
828 | * For direct I/O, gfs2 takes the inode glock in deferred mode. This |
829 | * locking mode is compatible with other deferred holders, so multiple |
830 | * processes and nodes can do direct I/O to a file at the same time. |
831 | * There's no guarantee that reads or writes will be atomic. Any |
832 | * coordination among readers and writers needs to happen externally. |
833 | */ |
834 | |
835 | if (!iov_iter_count(i: to)) |
836 | return 0; /* skip atime */ |
837 | |
838 | gfs2_holder_init(gl: ip->i_gl, LM_ST_DEFERRED, flags: 0, gh); |
839 | retry: |
840 | ret = gfs2_glock_nq(gh); |
841 | if (ret) |
842 | goto out_uninit; |
843 | pagefault_disable(); |
844 | to->nofault = true; |
845 | ret = iomap_dio_rw(iocb, iter: to, ops: &gfs2_iomap_ops, NULL, |
846 | IOMAP_DIO_PARTIAL, NULL, done_before: read); |
847 | to->nofault = false; |
848 | pagefault_enable(); |
849 | if (ret <= 0 && ret != -EFAULT) |
850 | goto out_unlock; |
851 | /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ |
852 | if (ret > 0) |
853 | read = ret; |
854 | |
855 | if (should_fault_in_pages(i: to, iocb, prev_count: &prev_count, window_size: &window_size)) { |
856 | gfs2_glock_dq(gh); |
857 | window_size -= fault_in_iov_iter_writeable(i: to, bytes: window_size); |
858 | if (window_size) |
859 | goto retry; |
860 | } |
861 | out_unlock: |
862 | if (gfs2_holder_queued(gh)) |
863 | gfs2_glock_dq(gh); |
864 | out_uninit: |
865 | gfs2_holder_uninit(gh); |
866 | /* User space doesn't expect partial success. */ |
867 | if (ret < 0) |
868 | return ret; |
869 | return read; |
870 | } |
871 | |
872 | static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, |
873 | struct gfs2_holder *gh) |
874 | { |
875 | struct file *file = iocb->ki_filp; |
876 | struct inode *inode = file->f_mapping->host; |
877 | struct gfs2_inode *ip = GFS2_I(inode); |
878 | size_t prev_count = 0, window_size = 0; |
879 | size_t written = 0; |
880 | bool enough_retries; |
881 | ssize_t ret; |
882 | |
883 | /* |
884 | * In this function, we disable page faults when we're holding the |
885 | * inode glock while doing I/O. If a page fault occurs, we indicate |
886 | * that the inode glock may be dropped, fault in the pages manually, |
887 | * and retry. |
888 | * |
889 | * For writes, iomap_dio_rw only triggers manual page faults, so we |
890 | * don't need to disable physical ones. |
891 | */ |
892 | |
893 | /* |
894 | * Deferred lock, even if its a write, since we do no allocation on |
895 | * this path. All we need to change is the atime, and this lock mode |
896 | * ensures that other nodes have flushed their buffered read caches |
897 | * (i.e. their page cache entries for this inode). We do not, |
898 | * unfortunately, have the option of only flushing a range like the |
899 | * VFS does. |
900 | */ |
901 | gfs2_holder_init(gl: ip->i_gl, LM_ST_DEFERRED, flags: 0, gh); |
902 | retry: |
903 | ret = gfs2_glock_nq(gh); |
904 | if (ret) |
905 | goto out_uninit; |
906 | /* Silently fall back to buffered I/O when writing beyond EOF */ |
907 | if (iocb->ki_pos + iov_iter_count(i: from) > i_size_read(inode: &ip->i_inode)) |
908 | goto out_unlock; |
909 | |
910 | from->nofault = true; |
911 | ret = iomap_dio_rw(iocb, iter: from, ops: &gfs2_iomap_ops, NULL, |
912 | IOMAP_DIO_PARTIAL, NULL, done_before: written); |
913 | from->nofault = false; |
914 | if (ret <= 0) { |
915 | if (ret == -ENOTBLK) |
916 | ret = 0; |
917 | if (ret != -EFAULT) |
918 | goto out_unlock; |
919 | } |
920 | /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ |
921 | if (ret > 0) |
922 | written = ret; |
923 | |
924 | enough_retries = prev_count == iov_iter_count(i: from) && |
925 | window_size <= PAGE_SIZE; |
926 | if (should_fault_in_pages(i: from, iocb, prev_count: &prev_count, window_size: &window_size)) { |
927 | gfs2_glock_dq(gh); |
928 | window_size -= fault_in_iov_iter_readable(i: from, bytes: window_size); |
929 | if (window_size) { |
930 | if (!enough_retries) |
931 | goto retry; |
932 | /* fall back to buffered I/O */ |
933 | ret = 0; |
934 | } |
935 | } |
936 | out_unlock: |
937 | if (gfs2_holder_queued(gh)) |
938 | gfs2_glock_dq(gh); |
939 | out_uninit: |
940 | gfs2_holder_uninit(gh); |
941 | /* User space doesn't expect partial success. */ |
942 | if (ret < 0) |
943 | return ret; |
944 | return written; |
945 | } |
946 | |
947 | static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) |
948 | { |
949 | struct gfs2_inode *ip; |
950 | struct gfs2_holder gh; |
951 | size_t prev_count = 0, window_size = 0; |
952 | size_t read = 0; |
953 | ssize_t ret; |
954 | |
955 | /* |
956 | * In this function, we disable page faults when we're holding the |
957 | * inode glock while doing I/O. If a page fault occurs, we indicate |
958 | * that the inode glock may be dropped, fault in the pages manually, |
959 | * and retry. |
960 | */ |
961 | |
962 | if (iocb->ki_flags & IOCB_DIRECT) |
963 | return gfs2_file_direct_read(iocb, to, gh: &gh); |
964 | |
965 | pagefault_disable(); |
966 | iocb->ki_flags |= IOCB_NOIO; |
967 | ret = generic_file_read_iter(iocb, to); |
968 | iocb->ki_flags &= ~IOCB_NOIO; |
969 | pagefault_enable(); |
970 | if (ret >= 0) { |
971 | if (!iov_iter_count(i: to)) |
972 | return ret; |
973 | read = ret; |
974 | } else if (ret != -EFAULT) { |
975 | if (ret != -EAGAIN) |
976 | return ret; |
977 | if (iocb->ki_flags & IOCB_NOWAIT) |
978 | return ret; |
979 | } |
980 | ip = GFS2_I(inode: iocb->ki_filp->f_mapping->host); |
981 | gfs2_holder_init(gl: ip->i_gl, LM_ST_SHARED, flags: 0, gh: &gh); |
982 | retry: |
983 | ret = gfs2_glock_nq(gh: &gh); |
984 | if (ret) |
985 | goto out_uninit; |
986 | pagefault_disable(); |
987 | ret = generic_file_read_iter(iocb, to); |
988 | pagefault_enable(); |
989 | if (ret <= 0 && ret != -EFAULT) |
990 | goto out_unlock; |
991 | if (ret > 0) |
992 | read += ret; |
993 | |
994 | if (should_fault_in_pages(i: to, iocb, prev_count: &prev_count, window_size: &window_size)) { |
995 | gfs2_glock_dq(gh: &gh); |
996 | window_size -= fault_in_iov_iter_writeable(i: to, bytes: window_size); |
997 | if (window_size) |
998 | goto retry; |
999 | } |
1000 | out_unlock: |
1001 | if (gfs2_holder_queued(gh: &gh)) |
1002 | gfs2_glock_dq(gh: &gh); |
1003 | out_uninit: |
1004 | gfs2_holder_uninit(gh: &gh); |
1005 | return read ? read : ret; |
1006 | } |
1007 | |
1008 | static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, |
1009 | struct iov_iter *from, |
1010 | struct gfs2_holder *gh) |
1011 | { |
1012 | struct file *file = iocb->ki_filp; |
1013 | struct inode *inode = file_inode(f: file); |
1014 | struct gfs2_inode *ip = GFS2_I(inode); |
1015 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1016 | struct gfs2_holder *statfs_gh = NULL; |
1017 | size_t prev_count = 0, window_size = 0; |
1018 | size_t orig_count = iov_iter_count(i: from); |
1019 | size_t written = 0; |
1020 | ssize_t ret; |
1021 | |
1022 | /* |
1023 | * In this function, we disable page faults when we're holding the |
1024 | * inode glock while doing I/O. If a page fault occurs, we indicate |
1025 | * that the inode glock may be dropped, fault in the pages manually, |
1026 | * and retry. |
1027 | */ |
1028 | |
1029 | if (inode == sdp->sd_rindex) { |
1030 | statfs_gh = kmalloc(size: sizeof(*statfs_gh), GFP_NOFS); |
1031 | if (!statfs_gh) |
1032 | return -ENOMEM; |
1033 | } |
1034 | |
1035 | gfs2_holder_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, flags: 0, gh); |
1036 | if (should_fault_in_pages(i: from, iocb, prev_count: &prev_count, window_size: &window_size)) { |
1037 | retry: |
1038 | window_size -= fault_in_iov_iter_readable(i: from, bytes: window_size); |
1039 | if (!window_size) { |
1040 | ret = -EFAULT; |
1041 | goto out_uninit; |
1042 | } |
1043 | from->count = min(from->count, window_size); |
1044 | } |
1045 | ret = gfs2_glock_nq(gh); |
1046 | if (ret) |
1047 | goto out_uninit; |
1048 | |
1049 | if (inode == sdp->sd_rindex) { |
1050 | struct gfs2_inode *m_ip = GFS2_I(inode: sdp->sd_statfs_inode); |
1051 | |
1052 | ret = gfs2_glock_nq_init(gl: m_ip->i_gl, LM_ST_EXCLUSIVE, |
1053 | GL_NOCACHE, gh: statfs_gh); |
1054 | if (ret) |
1055 | goto out_unlock; |
1056 | } |
1057 | |
1058 | pagefault_disable(); |
1059 | ret = iomap_file_buffered_write(iocb, from, ops: &gfs2_iomap_ops); |
1060 | pagefault_enable(); |
1061 | if (ret > 0) |
1062 | written += ret; |
1063 | |
1064 | if (inode == sdp->sd_rindex) |
1065 | gfs2_glock_dq_uninit(gh: statfs_gh); |
1066 | |
1067 | if (ret <= 0 && ret != -EFAULT) |
1068 | goto out_unlock; |
1069 | |
1070 | from->count = orig_count - written; |
1071 | if (should_fault_in_pages(i: from, iocb, prev_count: &prev_count, window_size: &window_size)) { |
1072 | gfs2_glock_dq(gh); |
1073 | goto retry; |
1074 | } |
1075 | out_unlock: |
1076 | if (gfs2_holder_queued(gh)) |
1077 | gfs2_glock_dq(gh); |
1078 | out_uninit: |
1079 | gfs2_holder_uninit(gh); |
1080 | kfree(objp: statfs_gh); |
1081 | from->count = orig_count - written; |
1082 | return written ? written : ret; |
1083 | } |
1084 | |
1085 | /** |
1086 | * gfs2_file_write_iter - Perform a write to a file |
1087 | * @iocb: The io context |
1088 | * @from: The data to write |
1089 | * |
1090 | * We have to do a lock/unlock here to refresh the inode size for |
1091 | * O_APPEND writes, otherwise we can land up writing at the wrong |
1092 | * offset. There is still a race, but provided the app is using its |
1093 | * own file locking, this will make O_APPEND work as expected. |
1094 | * |
1095 | */ |
1096 | |
1097 | static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
1098 | { |
1099 | struct file *file = iocb->ki_filp; |
1100 | struct inode *inode = file_inode(f: file); |
1101 | struct gfs2_inode *ip = GFS2_I(inode); |
1102 | struct gfs2_holder gh; |
1103 | ssize_t ret; |
1104 | |
1105 | gfs2_size_hint(filep: file, offset: iocb->ki_pos, size: iov_iter_count(i: from)); |
1106 | |
1107 | if (iocb->ki_flags & IOCB_APPEND) { |
1108 | ret = gfs2_glock_nq_init(gl: ip->i_gl, LM_ST_SHARED, flags: 0, gh: &gh); |
1109 | if (ret) |
1110 | return ret; |
1111 | gfs2_glock_dq_uninit(gh: &gh); |
1112 | } |
1113 | |
1114 | inode_lock(inode); |
1115 | ret = generic_write_checks(iocb, from); |
1116 | if (ret <= 0) |
1117 | goto out_unlock; |
1118 | |
1119 | ret = file_remove_privs(file); |
1120 | if (ret) |
1121 | goto out_unlock; |
1122 | |
1123 | if (iocb->ki_flags & IOCB_DIRECT) { |
1124 | struct address_space *mapping = file->f_mapping; |
1125 | ssize_t buffered, ret2; |
1126 | |
1127 | /* |
1128 | * Note that under direct I/O, we don't allow and inode |
1129 | * timestamp updates, so we're not calling file_update_time() |
1130 | * here. |
1131 | */ |
1132 | |
1133 | ret = gfs2_file_direct_write(iocb, from, gh: &gh); |
1134 | if (ret < 0 || !iov_iter_count(i: from)) |
1135 | goto out_unlock; |
1136 | |
1137 | iocb->ki_flags |= IOCB_DSYNC; |
1138 | buffered = gfs2_file_buffered_write(iocb, from, gh: &gh); |
1139 | if (unlikely(buffered <= 0)) { |
1140 | if (!ret) |
1141 | ret = buffered; |
1142 | goto out_unlock; |
1143 | } |
1144 | |
1145 | /* |
1146 | * We need to ensure that the page cache pages are written to |
1147 | * disk and invalidated to preserve the expected O_DIRECT |
1148 | * semantics. If the writeback or invalidate fails, only report |
1149 | * the direct I/O range as we don't know if the buffered pages |
1150 | * made it to disk. |
1151 | */ |
1152 | ret2 = generic_write_sync(iocb, count: buffered); |
1153 | invalidate_mapping_pages(mapping, |
1154 | start: (iocb->ki_pos - buffered) >> PAGE_SHIFT, |
1155 | end: (iocb->ki_pos - 1) >> PAGE_SHIFT); |
1156 | if (!ret || ret2 > 0) |
1157 | ret += ret2; |
1158 | } else { |
1159 | ret = file_update_time(file); |
1160 | if (ret) |
1161 | goto out_unlock; |
1162 | |
1163 | ret = gfs2_file_buffered_write(iocb, from, gh: &gh); |
1164 | if (likely(ret > 0)) |
1165 | ret = generic_write_sync(iocb, count: ret); |
1166 | } |
1167 | |
1168 | out_unlock: |
1169 | inode_unlock(inode); |
1170 | return ret; |
1171 | } |
1172 | |
1173 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, |
1174 | int mode) |
1175 | { |
1176 | struct super_block *sb = inode->i_sb; |
1177 | struct gfs2_inode *ip = GFS2_I(inode); |
1178 | loff_t end = offset + len; |
1179 | struct buffer_head *dibh; |
1180 | int error; |
1181 | |
1182 | error = gfs2_meta_inode_buffer(ip, bhp: &dibh); |
1183 | if (unlikely(error)) |
1184 | return error; |
1185 | |
1186 | gfs2_trans_add_meta(gl: ip->i_gl, bh: dibh); |
1187 | |
1188 | if (gfs2_is_stuffed(ip)) { |
1189 | error = gfs2_unstuff_dinode(ip); |
1190 | if (unlikely(error)) |
1191 | goto out; |
1192 | } |
1193 | |
1194 | while (offset < end) { |
1195 | struct iomap iomap = { }; |
1196 | |
1197 | error = gfs2_iomap_alloc(inode, pos: offset, length: end - offset, iomap: &iomap); |
1198 | if (error) |
1199 | goto out; |
1200 | offset = iomap.offset + iomap.length; |
1201 | if (!(iomap.flags & IOMAP_F_NEW)) |
1202 | continue; |
1203 | error = sb_issue_zeroout(sb, block: iomap.addr >> inode->i_blkbits, |
1204 | nr_blocks: iomap.length >> inode->i_blkbits, |
1205 | GFP_NOFS); |
1206 | if (error) { |
1207 | fs_err(GFS2_SB(inode), "Failed to zero data buffers\n" ); |
1208 | goto out; |
1209 | } |
1210 | } |
1211 | out: |
1212 | brelse(bh: dibh); |
1213 | return error; |
1214 | } |
1215 | |
1216 | /** |
1217 | * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of |
1218 | * blocks, determine how many bytes can be written. |
1219 | * @ip: The inode in question. |
1220 | * @len: Max cap of bytes. What we return in *len must be <= this. |
1221 | * @data_blocks: Compute and return the number of data blocks needed |
1222 | * @ind_blocks: Compute and return the number of indirect blocks needed |
1223 | * @max_blocks: The total blocks available to work with. |
1224 | * |
1225 | * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. |
1226 | */ |
1227 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, |
1228 | unsigned int *data_blocks, unsigned int *ind_blocks, |
1229 | unsigned int max_blocks) |
1230 | { |
1231 | loff_t max = *len; |
1232 | const struct gfs2_sbd *sdp = GFS2_SB(inode: &ip->i_inode); |
1233 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); |
1234 | |
1235 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { |
1236 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); |
1237 | max_data -= tmp; |
1238 | } |
1239 | |
1240 | *data_blocks = max_data; |
1241 | *ind_blocks = max_blocks - max_data; |
1242 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; |
1243 | if (*len > max) { |
1244 | *len = max; |
1245 | gfs2_write_calc_reserv(ip, len: max, data_blocks, ind_blocks); |
1246 | } |
1247 | } |
1248 | |
1249 | static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
1250 | { |
1251 | struct inode *inode = file_inode(f: file); |
1252 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1253 | struct gfs2_inode *ip = GFS2_I(inode); |
1254 | struct gfs2_alloc_parms ap = {}; |
1255 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
1256 | loff_t bytes, max_bytes, max_blks; |
1257 | int error; |
1258 | const loff_t pos = offset; |
1259 | const loff_t count = len; |
1260 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); |
1261 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; |
1262 | loff_t max_chunk_size = UINT_MAX & bsize_mask; |
1263 | |
1264 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; |
1265 | |
1266 | offset &= bsize_mask; |
1267 | |
1268 | len = next - offset; |
1269 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; |
1270 | if (!bytes) |
1271 | bytes = UINT_MAX; |
1272 | bytes &= bsize_mask; |
1273 | if (bytes == 0) |
1274 | bytes = sdp->sd_sb.sb_bsize; |
1275 | |
1276 | gfs2_size_hint(filep: file, offset, size: len); |
1277 | |
1278 | gfs2_write_calc_reserv(ip, PAGE_SIZE, data_blocks: &data_blocks, ind_blocks: &ind_blocks); |
1279 | ap.min_target = data_blocks + ind_blocks; |
1280 | |
1281 | while (len > 0) { |
1282 | if (len < bytes) |
1283 | bytes = len; |
1284 | if (!gfs2_write_alloc_required(ip, offset, len: bytes)) { |
1285 | len -= bytes; |
1286 | offset += bytes; |
1287 | continue; |
1288 | } |
1289 | |
1290 | /* We need to determine how many bytes we can actually |
1291 | * fallocate without exceeding quota or going over the |
1292 | * end of the fs. We start off optimistically by assuming |
1293 | * we can write max_bytes */ |
1294 | max_bytes = (len > max_chunk_size) ? max_chunk_size : len; |
1295 | |
1296 | /* Since max_bytes is most likely a theoretical max, we |
1297 | * calculate a more realistic 'bytes' to serve as a good |
1298 | * starting point for the number of bytes we may be able |
1299 | * to write */ |
1300 | gfs2_write_calc_reserv(ip, len: bytes, data_blocks: &data_blocks, ind_blocks: &ind_blocks); |
1301 | ap.target = data_blocks + ind_blocks; |
1302 | |
1303 | error = gfs2_quota_lock_check(ip, ap: &ap); |
1304 | if (error) |
1305 | return error; |
1306 | /* ap.allowed tells us how many blocks quota will allow |
1307 | * us to write. Check if this reduces max_blks */ |
1308 | max_blks = UINT_MAX; |
1309 | if (ap.allowed) |
1310 | max_blks = ap.allowed; |
1311 | |
1312 | error = gfs2_inplace_reserve(ip, ap: &ap); |
1313 | if (error) |
1314 | goto out_qunlock; |
1315 | |
1316 | /* check if the selected rgrp limits our max_blks further */ |
1317 | if (ip->i_res.rs_reserved < max_blks) |
1318 | max_blks = ip->i_res.rs_reserved; |
1319 | |
1320 | /* Almost done. Calculate bytes that can be written using |
1321 | * max_blks. We also recompute max_bytes, data_blocks and |
1322 | * ind_blocks */ |
1323 | calc_max_reserv(ip, len: &max_bytes, data_blocks: &data_blocks, |
1324 | ind_blocks: &ind_blocks, max_blocks: max_blks); |
1325 | |
1326 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + |
1327 | RES_RG_HDR + gfs2_rg_blocks(ip, requested: data_blocks + ind_blocks); |
1328 | if (gfs2_is_jdata(ip)) |
1329 | rblocks += data_blocks ? data_blocks : 1; |
1330 | |
1331 | error = gfs2_trans_begin(sdp, blocks: rblocks, |
1332 | PAGE_SIZE >> inode->i_blkbits); |
1333 | if (error) |
1334 | goto out_trans_fail; |
1335 | |
1336 | error = fallocate_chunk(inode, offset, len: max_bytes, mode); |
1337 | gfs2_trans_end(sdp); |
1338 | |
1339 | if (error) |
1340 | goto out_trans_fail; |
1341 | |
1342 | len -= max_bytes; |
1343 | offset += max_bytes; |
1344 | gfs2_inplace_release(ip); |
1345 | gfs2_quota_unlock(ip); |
1346 | } |
1347 | |
1348 | if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) |
1349 | i_size_write(inode, i_size: pos + count); |
1350 | file_update_time(file); |
1351 | mark_inode_dirty(inode); |
1352 | |
1353 | if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) |
1354 | return vfs_fsync_range(file, start: pos, end: pos + count - 1, |
1355 | datasync: (file->f_flags & __O_SYNC) ? 0 : 1); |
1356 | return 0; |
1357 | |
1358 | out_trans_fail: |
1359 | gfs2_inplace_release(ip); |
1360 | out_qunlock: |
1361 | gfs2_quota_unlock(ip); |
1362 | return error; |
1363 | } |
1364 | |
1365 | static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
1366 | { |
1367 | struct inode *inode = file_inode(f: file); |
1368 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1369 | struct gfs2_inode *ip = GFS2_I(inode); |
1370 | struct gfs2_holder gh; |
1371 | int ret; |
1372 | |
1373 | if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) |
1374 | return -EOPNOTSUPP; |
1375 | /* fallocate is needed by gfs2_grow to reserve space in the rindex */ |
1376 | if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex) |
1377 | return -EOPNOTSUPP; |
1378 | |
1379 | inode_lock(inode); |
1380 | |
1381 | gfs2_holder_init(gl: ip->i_gl, LM_ST_EXCLUSIVE, flags: 0, gh: &gh); |
1382 | ret = gfs2_glock_nq(gh: &gh); |
1383 | if (ret) |
1384 | goto out_uninit; |
1385 | |
1386 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
1387 | (offset + len) > inode->i_size) { |
1388 | ret = inode_newsize_ok(inode, offset: offset + len); |
1389 | if (ret) |
1390 | goto out_unlock; |
1391 | } |
1392 | |
1393 | ret = get_write_access(inode); |
1394 | if (ret) |
1395 | goto out_unlock; |
1396 | |
1397 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
1398 | ret = __gfs2_punch_hole(file, offset, length: len); |
1399 | } else { |
1400 | ret = __gfs2_fallocate(file, mode, offset, len); |
1401 | if (ret) |
1402 | gfs2_rs_deltree(rs: &ip->i_res); |
1403 | } |
1404 | |
1405 | put_write_access(inode); |
1406 | out_unlock: |
1407 | gfs2_glock_dq(gh: &gh); |
1408 | out_uninit: |
1409 | gfs2_holder_uninit(gh: &gh); |
1410 | inode_unlock(inode); |
1411 | return ret; |
1412 | } |
1413 | |
1414 | static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, |
1415 | struct file *out, loff_t *ppos, |
1416 | size_t len, unsigned int flags) |
1417 | { |
1418 | ssize_t ret; |
1419 | |
1420 | gfs2_size_hint(filep: out, offset: *ppos, size: len); |
1421 | |
1422 | ret = iter_file_splice_write(pipe, out, ppos, len, flags); |
1423 | return ret; |
1424 | } |
1425 | |
1426 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
1427 | |
1428 | /** |
1429 | * gfs2_lock - acquire/release a posix lock on a file |
1430 | * @file: the file pointer |
1431 | * @cmd: either modify or retrieve lock state, possibly wait |
1432 | * @fl: type and range of lock |
1433 | * |
1434 | * Returns: errno |
1435 | */ |
1436 | |
1437 | static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) |
1438 | { |
1439 | struct gfs2_inode *ip = GFS2_I(inode: file->f_mapping->host); |
1440 | struct gfs2_sbd *sdp = GFS2_SB(inode: file->f_mapping->host); |
1441 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
1442 | |
1443 | if (!(fl->c.flc_flags & FL_POSIX)) |
1444 | return -ENOLCK; |
1445 | if (gfs2_withdrawing_or_withdrawn(sdp)) { |
1446 | if (lock_is_unlock(fl)) |
1447 | locks_lock_file_wait(filp: file, fl); |
1448 | return -EIO; |
1449 | } |
1450 | if (cmd == F_CANCELLK) |
1451 | return dlm_posix_cancel(lockspace: ls->ls_dlm, number: ip->i_no_addr, file, fl); |
1452 | else if (IS_GETLK(cmd)) |
1453 | return dlm_posix_get(lockspace: ls->ls_dlm, number: ip->i_no_addr, file, fl); |
1454 | else if (lock_is_unlock(fl)) |
1455 | return dlm_posix_unlock(lockspace: ls->ls_dlm, number: ip->i_no_addr, file, fl); |
1456 | else |
1457 | return dlm_posix_lock(lockspace: ls->ls_dlm, number: ip->i_no_addr, file, cmd, fl); |
1458 | } |
1459 | |
1460 | static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) |
1461 | { |
1462 | struct gfs2_glock *gl = gfs2_glock_hold(gl: fl_gh->gh_gl); |
1463 | |
1464 | /* |
1465 | * Make sure gfs2_glock_put() won't sleep under the file->f_lock |
1466 | * spinlock. |
1467 | */ |
1468 | |
1469 | spin_lock(lock: &file->f_lock); |
1470 | gfs2_holder_uninit(gh: fl_gh); |
1471 | spin_unlock(lock: &file->f_lock); |
1472 | gfs2_glock_put(gl); |
1473 | } |
1474 | |
1475 | static int do_flock(struct file *file, int cmd, struct file_lock *fl) |
1476 | { |
1477 | struct gfs2_file *fp = file->private_data; |
1478 | struct gfs2_holder *fl_gh = &fp->f_fl_gh; |
1479 | struct gfs2_inode *ip = GFS2_I(inode: file_inode(f: file)); |
1480 | struct gfs2_glock *gl; |
1481 | unsigned int state; |
1482 | u16 flags; |
1483 | int error = 0; |
1484 | int sleeptime; |
1485 | |
1486 | state = lock_is_write(fl) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; |
1487 | flags = GL_EXACT | GL_NOPID; |
1488 | if (!IS_SETLKW(cmd)) |
1489 | flags |= LM_FLAG_TRY_1CB; |
1490 | |
1491 | mutex_lock(&fp->f_fl_mutex); |
1492 | |
1493 | if (gfs2_holder_initialized(gh: fl_gh)) { |
1494 | struct file_lock request; |
1495 | if (fl_gh->gh_state == state) |
1496 | goto out; |
1497 | locks_init_lock(&request); |
1498 | request.c.flc_type = F_UNLCK; |
1499 | request.c.flc_flags = FL_FLOCK; |
1500 | locks_lock_file_wait(filp: file, fl: &request); |
1501 | gfs2_glock_dq(gh: fl_gh); |
1502 | gfs2_holder_reinit(state, flags, gh: fl_gh); |
1503 | } else { |
1504 | error = gfs2_glock_get(sdp: GFS2_SB(inode: &ip->i_inode), number: ip->i_no_addr, |
1505 | glops: &gfs2_flock_glops, create: CREATE, glp: &gl); |
1506 | if (error) |
1507 | goto out; |
1508 | spin_lock(lock: &file->f_lock); |
1509 | gfs2_holder_init(gl, state, flags, gh: fl_gh); |
1510 | spin_unlock(lock: &file->f_lock); |
1511 | gfs2_glock_put(gl); |
1512 | } |
1513 | for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) { |
1514 | error = gfs2_glock_nq(gh: fl_gh); |
1515 | if (error != GLR_TRYFAILED) |
1516 | break; |
1517 | fl_gh->gh_flags &= ~LM_FLAG_TRY_1CB; |
1518 | fl_gh->gh_flags |= LM_FLAG_TRY; |
1519 | msleep(msecs: sleeptime); |
1520 | } |
1521 | if (error) { |
1522 | __flock_holder_uninit(file, fl_gh); |
1523 | if (error == GLR_TRYFAILED) |
1524 | error = -EAGAIN; |
1525 | } else { |
1526 | error = locks_lock_file_wait(filp: file, fl); |
1527 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); |
1528 | } |
1529 | |
1530 | out: |
1531 | mutex_unlock(lock: &fp->f_fl_mutex); |
1532 | return error; |
1533 | } |
1534 | |
1535 | static void do_unflock(struct file *file, struct file_lock *fl) |
1536 | { |
1537 | struct gfs2_file *fp = file->private_data; |
1538 | struct gfs2_holder *fl_gh = &fp->f_fl_gh; |
1539 | |
1540 | mutex_lock(&fp->f_fl_mutex); |
1541 | locks_lock_file_wait(filp: file, fl); |
1542 | if (gfs2_holder_initialized(gh: fl_gh)) { |
1543 | gfs2_glock_dq(gh: fl_gh); |
1544 | __flock_holder_uninit(file, fl_gh); |
1545 | } |
1546 | mutex_unlock(lock: &fp->f_fl_mutex); |
1547 | } |
1548 | |
1549 | /** |
1550 | * gfs2_flock - acquire/release a flock lock on a file |
1551 | * @file: the file pointer |
1552 | * @cmd: either modify or retrieve lock state, possibly wait |
1553 | * @fl: type and range of lock |
1554 | * |
1555 | * Returns: errno |
1556 | */ |
1557 | |
1558 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) |
1559 | { |
1560 | if (!(fl->c.flc_flags & FL_FLOCK)) |
1561 | return -ENOLCK; |
1562 | |
1563 | if (lock_is_unlock(fl)) { |
1564 | do_unflock(file, fl); |
1565 | return 0; |
1566 | } else { |
1567 | return do_flock(file, cmd, fl); |
1568 | } |
1569 | } |
1570 | |
1571 | const struct file_operations gfs2_file_fops = { |
1572 | .llseek = gfs2_llseek, |
1573 | .read_iter = gfs2_file_read_iter, |
1574 | .write_iter = gfs2_file_write_iter, |
1575 | .iopoll = iocb_bio_iopoll, |
1576 | .unlocked_ioctl = gfs2_ioctl, |
1577 | .compat_ioctl = gfs2_compat_ioctl, |
1578 | .mmap = gfs2_mmap, |
1579 | .open = gfs2_open, |
1580 | .release = gfs2_release, |
1581 | .fsync = gfs2_fsync, |
1582 | .lock = gfs2_lock, |
1583 | .flock = gfs2_flock, |
1584 | .splice_read = copy_splice_read, |
1585 | .splice_write = gfs2_file_splice_write, |
1586 | .setlease = simple_nosetlease, |
1587 | .fallocate = gfs2_fallocate, |
1588 | }; |
1589 | |
1590 | const struct file_operations gfs2_dir_fops = { |
1591 | .iterate_shared = gfs2_readdir, |
1592 | .unlocked_ioctl = gfs2_ioctl, |
1593 | .compat_ioctl = gfs2_compat_ioctl, |
1594 | .open = gfs2_open, |
1595 | .release = gfs2_release, |
1596 | .fsync = gfs2_fsync, |
1597 | .lock = gfs2_lock, |
1598 | .flock = gfs2_flock, |
1599 | .llseek = default_llseek, |
1600 | }; |
1601 | |
1602 | #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ |
1603 | |
1604 | const struct file_operations gfs2_file_fops_nolock = { |
1605 | .llseek = gfs2_llseek, |
1606 | .read_iter = gfs2_file_read_iter, |
1607 | .write_iter = gfs2_file_write_iter, |
1608 | .iopoll = iocb_bio_iopoll, |
1609 | .unlocked_ioctl = gfs2_ioctl, |
1610 | .compat_ioctl = gfs2_compat_ioctl, |
1611 | .mmap = gfs2_mmap, |
1612 | .open = gfs2_open, |
1613 | .release = gfs2_release, |
1614 | .fsync = gfs2_fsync, |
1615 | .splice_read = copy_splice_read, |
1616 | .splice_write = gfs2_file_splice_write, |
1617 | .setlease = generic_setlease, |
1618 | .fallocate = gfs2_fallocate, |
1619 | }; |
1620 | |
1621 | const struct file_operations gfs2_dir_fops_nolock = { |
1622 | .iterate_shared = gfs2_readdir, |
1623 | .unlocked_ioctl = gfs2_ioctl, |
1624 | .compat_ioctl = gfs2_compat_ioctl, |
1625 | .open = gfs2_open, |
1626 | .release = gfs2_release, |
1627 | .fsync = gfs2_fsync, |
1628 | .llseek = default_llseek, |
1629 | }; |
1630 | |
1631 | |