1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/bio.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/pagemap.h> |
9 | #include <linux/highmem.h> |
10 | #include <linux/sched/mm.h> |
11 | #include <crypto/hash.h> |
12 | #include "messages.h" |
13 | #include "ctree.h" |
14 | #include "disk-io.h" |
15 | #include "transaction.h" |
16 | #include "bio.h" |
17 | #include "compression.h" |
18 | #include "fs.h" |
19 | #include "accessors.h" |
20 | #include "file-item.h" |
21 | |
22 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
23 | sizeof(struct btrfs_item) * 2) / \ |
24 | size) - 1)) |
25 | |
26 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
27 | PAGE_SIZE)) |
28 | |
29 | /* |
30 | * Set inode's size according to filesystem options. |
31 | * |
32 | * @inode: inode we want to update the disk_i_size for |
33 | * @new_i_size: i_size we want to set to, 0 if we use i_size |
34 | * |
35 | * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() |
36 | * returns as it is perfectly fine with a file that has holes without hole file |
37 | * extent items. |
38 | * |
39 | * However without NO_HOLES we need to only return the area that is contiguous |
40 | * from the 0 offset of the file. Otherwise we could end up adjust i_size up |
41 | * to an extent that has a gap in between. |
42 | * |
43 | * Finally new_i_size should only be set in the case of truncate where we're not |
44 | * ready to use i_size_read() as the limiter yet. |
45 | */ |
46 | void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) |
47 | { |
48 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
49 | u64 start, end, i_size; |
50 | int ret; |
51 | |
52 | spin_lock(lock: &inode->lock); |
53 | i_size = new_i_size ?: i_size_read(inode: &inode->vfs_inode); |
54 | if (btrfs_fs_incompat(fs_info, NO_HOLES)) { |
55 | inode->disk_i_size = i_size; |
56 | goto out_unlock; |
57 | } |
58 | |
59 | ret = find_contiguous_extent_bit(tree: inode->file_extent_tree, start: 0, start_ret: &start, |
60 | end_ret: &end, bits: EXTENT_DIRTY); |
61 | if (!ret && start == 0) |
62 | i_size = min(i_size, end + 1); |
63 | else |
64 | i_size = 0; |
65 | inode->disk_i_size = i_size; |
66 | out_unlock: |
67 | spin_unlock(lock: &inode->lock); |
68 | } |
69 | |
70 | /* |
71 | * Mark range within a file as having a new extent inserted. |
72 | * |
73 | * @inode: inode being modified |
74 | * @start: start file offset of the file extent we've inserted |
75 | * @len: logical length of the file extent item |
76 | * |
77 | * Call when we are inserting a new file extent where there was none before. |
78 | * Does not need to call this in the case where we're replacing an existing file |
79 | * extent, however if not sure it's fine to call this multiple times. |
80 | * |
81 | * The start and len must match the file extent item, so thus must be sectorsize |
82 | * aligned. |
83 | */ |
84 | int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, |
85 | u64 len) |
86 | { |
87 | if (len == 0) |
88 | return 0; |
89 | |
90 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); |
91 | |
92 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) |
93 | return 0; |
94 | return set_extent_bit(tree: inode->file_extent_tree, start, end: start + len - 1, |
95 | bits: EXTENT_DIRTY, NULL); |
96 | } |
97 | |
98 | /* |
99 | * Mark an inode range as not having a backing extent. |
100 | * |
101 | * @inode: inode being modified |
102 | * @start: start file offset of the file extent we've inserted |
103 | * @len: logical length of the file extent item |
104 | * |
105 | * Called when we drop a file extent, for example when we truncate. Doesn't |
106 | * need to be called for cases where we're replacing a file extent, like when |
107 | * we've COWed a file extent. |
108 | * |
109 | * The start and len must match the file extent item, so thus must be sectorsize |
110 | * aligned. |
111 | */ |
112 | int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, |
113 | u64 len) |
114 | { |
115 | if (len == 0) |
116 | return 0; |
117 | |
118 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || |
119 | len == (u64)-1); |
120 | |
121 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) |
122 | return 0; |
123 | return clear_extent_bit(tree: inode->file_extent_tree, start, |
124 | end: start + len - 1, bits: EXTENT_DIRTY, NULL); |
125 | } |
126 | |
127 | static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes) |
128 | { |
129 | ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize)); |
130 | |
131 | return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size; |
132 | } |
133 | |
134 | static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size) |
135 | { |
136 | ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size)); |
137 | |
138 | return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits; |
139 | } |
140 | |
141 | static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info) |
142 | { |
143 | u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum), |
144 | fs_info->csum_size); |
145 | |
146 | return csum_size_to_bytes(fs_info, csum_size: max_csum_size); |
147 | } |
148 | |
149 | /* |
150 | * Calculate the total size needed to allocate for an ordered sum structure |
151 | * spanning @bytes in the file. |
152 | */ |
153 | static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes) |
154 | { |
155 | return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes); |
156 | } |
157 | |
158 | int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, |
159 | struct btrfs_root *root, |
160 | u64 objectid, u64 pos, u64 num_bytes) |
161 | { |
162 | int ret = 0; |
163 | struct btrfs_file_extent_item *item; |
164 | struct btrfs_key file_key; |
165 | struct btrfs_path *path; |
166 | struct extent_buffer *leaf; |
167 | |
168 | path = btrfs_alloc_path(); |
169 | if (!path) |
170 | return -ENOMEM; |
171 | file_key.objectid = objectid; |
172 | file_key.offset = pos; |
173 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
174 | |
175 | ret = btrfs_insert_empty_item(trans, root, path, key: &file_key, |
176 | data_size: sizeof(*item)); |
177 | if (ret < 0) |
178 | goto out; |
179 | leaf = path->nodes[0]; |
180 | item = btrfs_item_ptr(leaf, path->slots[0], |
181 | struct btrfs_file_extent_item); |
182 | btrfs_set_file_extent_disk_bytenr(eb: leaf, s: item, val: 0); |
183 | btrfs_set_file_extent_disk_num_bytes(eb: leaf, s: item, val: 0); |
184 | btrfs_set_file_extent_offset(eb: leaf, s: item, val: 0); |
185 | btrfs_set_file_extent_num_bytes(eb: leaf, s: item, val: num_bytes); |
186 | btrfs_set_file_extent_ram_bytes(eb: leaf, s: item, val: num_bytes); |
187 | btrfs_set_file_extent_generation(eb: leaf, s: item, val: trans->transid); |
188 | btrfs_set_file_extent_type(eb: leaf, s: item, val: BTRFS_FILE_EXTENT_REG); |
189 | btrfs_set_file_extent_compression(eb: leaf, s: item, val: 0); |
190 | btrfs_set_file_extent_encryption(eb: leaf, s: item, val: 0); |
191 | btrfs_set_file_extent_other_encoding(eb: leaf, s: item, val: 0); |
192 | |
193 | btrfs_mark_buffer_dirty(trans, buf: leaf); |
194 | out: |
195 | btrfs_free_path(p: path); |
196 | return ret; |
197 | } |
198 | |
199 | static struct btrfs_csum_item * |
200 | btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
201 | struct btrfs_root *root, |
202 | struct btrfs_path *path, |
203 | u64 bytenr, int cow) |
204 | { |
205 | struct btrfs_fs_info *fs_info = root->fs_info; |
206 | int ret; |
207 | struct btrfs_key file_key; |
208 | struct btrfs_key found_key; |
209 | struct btrfs_csum_item *item; |
210 | struct extent_buffer *leaf; |
211 | u64 csum_offset = 0; |
212 | const u32 csum_size = fs_info->csum_size; |
213 | int csums_in_item; |
214 | |
215 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
216 | file_key.offset = bytenr; |
217 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
218 | ret = btrfs_search_slot(trans, root, key: &file_key, p: path, ins_len: 0, cow); |
219 | if (ret < 0) |
220 | goto fail; |
221 | leaf = path->nodes[0]; |
222 | if (ret > 0) { |
223 | ret = 1; |
224 | if (path->slots[0] == 0) |
225 | goto fail; |
226 | path->slots[0]--; |
227 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
228 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY) |
229 | goto fail; |
230 | |
231 | csum_offset = (bytenr - found_key.offset) >> |
232 | fs_info->sectorsize_bits; |
233 | csums_in_item = btrfs_item_size(eb: leaf, slot: path->slots[0]); |
234 | csums_in_item /= csum_size; |
235 | |
236 | if (csum_offset == csums_in_item) { |
237 | ret = -EFBIG; |
238 | goto fail; |
239 | } else if (csum_offset > csums_in_item) { |
240 | goto fail; |
241 | } |
242 | } |
243 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
244 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
245 | csum_offset * csum_size); |
246 | return item; |
247 | fail: |
248 | if (ret > 0) |
249 | ret = -ENOENT; |
250 | return ERR_PTR(error: ret); |
251 | } |
252 | |
253 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
254 | struct btrfs_root *root, |
255 | struct btrfs_path *path, u64 objectid, |
256 | u64 offset, int mod) |
257 | { |
258 | struct btrfs_key file_key; |
259 | int ins_len = mod < 0 ? -1 : 0; |
260 | int cow = mod != 0; |
261 | |
262 | file_key.objectid = objectid; |
263 | file_key.offset = offset; |
264 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
265 | |
266 | return btrfs_search_slot(trans, root, key: &file_key, p: path, ins_len, cow); |
267 | } |
268 | |
269 | /* |
270 | * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and |
271 | * store the result to @dst. |
272 | * |
273 | * Return >0 for the number of sectors we found. |
274 | * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum |
275 | * for it. Caller may want to try next sector until one range is hit. |
276 | * Return <0 for fatal error. |
277 | */ |
278 | static int search_csum_tree(struct btrfs_fs_info *fs_info, |
279 | struct btrfs_path *path, u64 disk_bytenr, |
280 | u64 len, u8 *dst) |
281 | { |
282 | struct btrfs_root *csum_root; |
283 | struct btrfs_csum_item *item = NULL; |
284 | struct btrfs_key key; |
285 | const u32 sectorsize = fs_info->sectorsize; |
286 | const u32 csum_size = fs_info->csum_size; |
287 | u32 itemsize; |
288 | int ret; |
289 | u64 csum_start; |
290 | u64 csum_len; |
291 | |
292 | ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && |
293 | IS_ALIGNED(len, sectorsize)); |
294 | |
295 | /* Check if the current csum item covers disk_bytenr */ |
296 | if (path->nodes[0]) { |
297 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
298 | struct btrfs_csum_item); |
299 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
300 | itemsize = btrfs_item_size(eb: path->nodes[0], slot: path->slots[0]); |
301 | |
302 | csum_start = key.offset; |
303 | csum_len = (itemsize / csum_size) * sectorsize; |
304 | |
305 | if (in_range(disk_bytenr, csum_start, csum_len)) |
306 | goto found; |
307 | } |
308 | |
309 | /* Current item doesn't contain the desired range, search again */ |
310 | btrfs_release_path(p: path); |
311 | csum_root = btrfs_csum_root(fs_info, bytenr: disk_bytenr); |
312 | item = btrfs_lookup_csum(NULL, root: csum_root, path, bytenr: disk_bytenr, cow: 0); |
313 | if (IS_ERR(ptr: item)) { |
314 | ret = PTR_ERR(ptr: item); |
315 | goto out; |
316 | } |
317 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
318 | itemsize = btrfs_item_size(eb: path->nodes[0], slot: path->slots[0]); |
319 | |
320 | csum_start = key.offset; |
321 | csum_len = (itemsize / csum_size) * sectorsize; |
322 | ASSERT(in_range(disk_bytenr, csum_start, csum_len)); |
323 | |
324 | found: |
325 | ret = (min(csum_start + csum_len, disk_bytenr + len) - |
326 | disk_bytenr) >> fs_info->sectorsize_bits; |
327 | read_extent_buffer(eb: path->nodes[0], dst, start: (unsigned long)item, |
328 | len: ret * csum_size); |
329 | out: |
330 | if (ret == -ENOENT || ret == -EFBIG) |
331 | ret = 0; |
332 | return ret; |
333 | } |
334 | |
335 | /* |
336 | * Lookup the checksum for the read bio in csum tree. |
337 | * |
338 | * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. |
339 | */ |
340 | blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) |
341 | { |
342 | struct btrfs_inode *inode = bbio->inode; |
343 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
344 | struct bio *bio = &bbio->bio; |
345 | struct btrfs_path *path; |
346 | const u32 sectorsize = fs_info->sectorsize; |
347 | const u32 csum_size = fs_info->csum_size; |
348 | u32 orig_len = bio->bi_iter.bi_size; |
349 | u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; |
350 | const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; |
351 | blk_status_t ret = BLK_STS_OK; |
352 | u32 bio_offset = 0; |
353 | |
354 | if ((inode->flags & BTRFS_INODE_NODATASUM) || |
355 | test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) |
356 | return BLK_STS_OK; |
357 | |
358 | /* |
359 | * This function is only called for read bio. |
360 | * |
361 | * This means two things: |
362 | * - All our csums should only be in csum tree |
363 | * No ordered extents csums, as ordered extents are only for write |
364 | * path. |
365 | * - No need to bother any other info from bvec |
366 | * Since we're looking up csums, the only important info is the |
367 | * disk_bytenr and the length, which can be extracted from bi_iter |
368 | * directly. |
369 | */ |
370 | ASSERT(bio_op(bio) == REQ_OP_READ); |
371 | path = btrfs_alloc_path(); |
372 | if (!path) |
373 | return BLK_STS_RESOURCE; |
374 | |
375 | if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { |
376 | bbio->csum = kmalloc_array(n: nblocks, size: csum_size, GFP_NOFS); |
377 | if (!bbio->csum) { |
378 | btrfs_free_path(p: path); |
379 | return BLK_STS_RESOURCE; |
380 | } |
381 | } else { |
382 | bbio->csum = bbio->csum_inline; |
383 | } |
384 | |
385 | /* |
386 | * If requested number of sectors is larger than one leaf can contain, |
387 | * kick the readahead for csum tree. |
388 | */ |
389 | if (nblocks > fs_info->csums_per_leaf) |
390 | path->reada = READA_FORWARD; |
391 | |
392 | /* |
393 | * the free space stuff is only read when it hasn't been |
394 | * updated in the current transaction. So, we can safely |
395 | * read from the commit root and sidestep a nasty deadlock |
396 | * between reading the free space cache and updating the csum tree. |
397 | */ |
398 | if (btrfs_is_free_space_inode(inode)) { |
399 | path->search_commit_root = 1; |
400 | path->skip_locking = 1; |
401 | } |
402 | |
403 | while (bio_offset < orig_len) { |
404 | int count; |
405 | u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset; |
406 | u8 *csum_dst = bbio->csum + |
407 | (bio_offset >> fs_info->sectorsize_bits) * csum_size; |
408 | |
409 | count = search_csum_tree(fs_info, path, disk_bytenr: cur_disk_bytenr, |
410 | len: orig_len - bio_offset, dst: csum_dst); |
411 | if (count < 0) { |
412 | ret = errno_to_blk_status(errno: count); |
413 | if (bbio->csum != bbio->csum_inline) |
414 | kfree(objp: bbio->csum); |
415 | bbio->csum = NULL; |
416 | break; |
417 | } |
418 | |
419 | /* |
420 | * We didn't find a csum for this range. We need to make sure |
421 | * we complain loudly about this, because we are not NODATASUM. |
422 | * |
423 | * However for the DATA_RELOC inode we could potentially be |
424 | * relocating data extents for a NODATASUM inode, so the inode |
425 | * itself won't be marked with NODATASUM, but the extent we're |
426 | * copying is in fact NODATASUM. If we don't find a csum we |
427 | * assume this is the case. |
428 | */ |
429 | if (count == 0) { |
430 | memset(csum_dst, 0, csum_size); |
431 | count = 1; |
432 | |
433 | if (inode->root->root_key.objectid == |
434 | BTRFS_DATA_RELOC_TREE_OBJECTID) { |
435 | u64 file_offset = bbio->file_offset + bio_offset; |
436 | |
437 | set_extent_bit(tree: &inode->io_tree, start: file_offset, |
438 | end: file_offset + sectorsize - 1, |
439 | bits: EXTENT_NODATASUM, NULL); |
440 | } else { |
441 | btrfs_warn_rl(fs_info, |
442 | "csum hole found for disk bytenr range [%llu, %llu)" , |
443 | cur_disk_bytenr, cur_disk_bytenr + sectorsize); |
444 | } |
445 | } |
446 | bio_offset += count * sectorsize; |
447 | } |
448 | |
449 | btrfs_free_path(p: path); |
450 | return ret; |
451 | } |
452 | |
453 | int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, |
454 | struct list_head *list, int search_commit, |
455 | bool nowait) |
456 | { |
457 | struct btrfs_fs_info *fs_info = root->fs_info; |
458 | struct btrfs_key key; |
459 | struct btrfs_path *path; |
460 | struct extent_buffer *leaf; |
461 | struct btrfs_ordered_sum *sums; |
462 | struct btrfs_csum_item *item; |
463 | LIST_HEAD(tmplist); |
464 | int ret; |
465 | |
466 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
467 | IS_ALIGNED(end + 1, fs_info->sectorsize)); |
468 | |
469 | path = btrfs_alloc_path(); |
470 | if (!path) |
471 | return -ENOMEM; |
472 | |
473 | path->nowait = nowait; |
474 | if (search_commit) { |
475 | path->skip_locking = 1; |
476 | path->reada = READA_FORWARD; |
477 | path->search_commit_root = 1; |
478 | } |
479 | |
480 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
481 | key.offset = start; |
482 | key.type = BTRFS_EXTENT_CSUM_KEY; |
483 | |
484 | ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: 0, cow: 0); |
485 | if (ret < 0) |
486 | goto fail; |
487 | if (ret > 0 && path->slots[0] > 0) { |
488 | leaf = path->nodes[0]; |
489 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0] - 1); |
490 | |
491 | /* |
492 | * There are two cases we can hit here for the previous csum |
493 | * item: |
494 | * |
495 | * |<- search range ->| |
496 | * |<- csum item ->| |
497 | * |
498 | * Or |
499 | * |<- search range ->| |
500 | * |<- csum item ->| |
501 | * |
502 | * Check if the previous csum item covers the leading part of |
503 | * the search range. If so we have to start from previous csum |
504 | * item. |
505 | */ |
506 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
507 | key.type == BTRFS_EXTENT_CSUM_KEY) { |
508 | if (bytes_to_csum_size(fs_info, bytes: start - key.offset) < |
509 | btrfs_item_size(eb: leaf, slot: path->slots[0] - 1)) |
510 | path->slots[0]--; |
511 | } |
512 | } |
513 | |
514 | while (start <= end) { |
515 | u64 csum_end; |
516 | |
517 | leaf = path->nodes[0]; |
518 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
519 | ret = btrfs_next_leaf(root, path); |
520 | if (ret < 0) |
521 | goto fail; |
522 | if (ret > 0) |
523 | break; |
524 | leaf = path->nodes[0]; |
525 | } |
526 | |
527 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
528 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
529 | key.type != BTRFS_EXTENT_CSUM_KEY || |
530 | key.offset > end) |
531 | break; |
532 | |
533 | if (key.offset > start) |
534 | start = key.offset; |
535 | |
536 | csum_end = key.offset + csum_size_to_bytes(fs_info, |
537 | csum_size: btrfs_item_size(eb: leaf, slot: path->slots[0])); |
538 | if (csum_end <= start) { |
539 | path->slots[0]++; |
540 | continue; |
541 | } |
542 | |
543 | csum_end = min(csum_end, end + 1); |
544 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
545 | struct btrfs_csum_item); |
546 | while (start < csum_end) { |
547 | unsigned long offset; |
548 | size_t size; |
549 | |
550 | size = min_t(size_t, csum_end - start, |
551 | max_ordered_sum_bytes(fs_info)); |
552 | sums = kzalloc(size: btrfs_ordered_sum_size(fs_info, bytes: size), |
553 | GFP_NOFS); |
554 | if (!sums) { |
555 | ret = -ENOMEM; |
556 | goto fail; |
557 | } |
558 | |
559 | sums->logical = start; |
560 | sums->len = size; |
561 | |
562 | offset = bytes_to_csum_size(fs_info, bytes: start - key.offset); |
563 | |
564 | read_extent_buffer(eb: path->nodes[0], |
565 | dst: sums->sums, |
566 | start: ((unsigned long)item) + offset, |
567 | len: bytes_to_csum_size(fs_info, bytes: size)); |
568 | |
569 | start += size; |
570 | list_add_tail(new: &sums->list, head: &tmplist); |
571 | } |
572 | path->slots[0]++; |
573 | } |
574 | ret = 0; |
575 | fail: |
576 | while (ret < 0 && !list_empty(head: &tmplist)) { |
577 | sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); |
578 | list_del(entry: &sums->list); |
579 | kfree(objp: sums); |
580 | } |
581 | list_splice_tail(list: &tmplist, head: list); |
582 | |
583 | btrfs_free_path(p: path); |
584 | return ret; |
585 | } |
586 | |
587 | /* |
588 | * Do the same work as btrfs_lookup_csums_list(), the difference is in how |
589 | * we return the result. |
590 | * |
591 | * This version will set the corresponding bits in @csum_bitmap to represent |
592 | * that there is a csum found. |
593 | * Each bit represents a sector. Thus caller should ensure @csum_buf passed |
594 | * in is large enough to contain all csums. |
595 | */ |
596 | int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path, |
597 | u64 start, u64 end, u8 *csum_buf, |
598 | unsigned long *csum_bitmap) |
599 | { |
600 | struct btrfs_fs_info *fs_info = root->fs_info; |
601 | struct btrfs_key key; |
602 | struct extent_buffer *leaf; |
603 | struct btrfs_csum_item *item; |
604 | const u64 orig_start = start; |
605 | bool free_path = false; |
606 | int ret; |
607 | |
608 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
609 | IS_ALIGNED(end + 1, fs_info->sectorsize)); |
610 | |
611 | if (!path) { |
612 | path = btrfs_alloc_path(); |
613 | if (!path) |
614 | return -ENOMEM; |
615 | free_path = true; |
616 | } |
617 | |
618 | /* Check if we can reuse the previous path. */ |
619 | if (path->nodes[0]) { |
620 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &key, nr: path->slots[0]); |
621 | |
622 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
623 | key.type == BTRFS_EXTENT_CSUM_KEY && |
624 | key.offset <= start) |
625 | goto search_forward; |
626 | btrfs_release_path(p: path); |
627 | } |
628 | |
629 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
630 | key.type = BTRFS_EXTENT_CSUM_KEY; |
631 | key.offset = start; |
632 | |
633 | ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: 0, cow: 0); |
634 | if (ret < 0) |
635 | goto fail; |
636 | if (ret > 0 && path->slots[0] > 0) { |
637 | leaf = path->nodes[0]; |
638 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0] - 1); |
639 | |
640 | /* |
641 | * There are two cases we can hit here for the previous csum |
642 | * item: |
643 | * |
644 | * |<- search range ->| |
645 | * |<- csum item ->| |
646 | * |
647 | * Or |
648 | * |<- search range ->| |
649 | * |<- csum item ->| |
650 | * |
651 | * Check if the previous csum item covers the leading part of |
652 | * the search range. If so we have to start from previous csum |
653 | * item. |
654 | */ |
655 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && |
656 | key.type == BTRFS_EXTENT_CSUM_KEY) { |
657 | if (bytes_to_csum_size(fs_info, bytes: start - key.offset) < |
658 | btrfs_item_size(eb: leaf, slot: path->slots[0] - 1)) |
659 | path->slots[0]--; |
660 | } |
661 | } |
662 | |
663 | search_forward: |
664 | while (start <= end) { |
665 | u64 csum_end; |
666 | |
667 | leaf = path->nodes[0]; |
668 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
669 | ret = btrfs_next_leaf(root, path); |
670 | if (ret < 0) |
671 | goto fail; |
672 | if (ret > 0) |
673 | break; |
674 | leaf = path->nodes[0]; |
675 | } |
676 | |
677 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
678 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
679 | key.type != BTRFS_EXTENT_CSUM_KEY || |
680 | key.offset > end) |
681 | break; |
682 | |
683 | if (key.offset > start) |
684 | start = key.offset; |
685 | |
686 | csum_end = key.offset + csum_size_to_bytes(fs_info, |
687 | csum_size: btrfs_item_size(eb: leaf, slot: path->slots[0])); |
688 | if (csum_end <= start) { |
689 | path->slots[0]++; |
690 | continue; |
691 | } |
692 | |
693 | csum_end = min(csum_end, end + 1); |
694 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
695 | struct btrfs_csum_item); |
696 | while (start < csum_end) { |
697 | unsigned long offset; |
698 | size_t size; |
699 | u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info, |
700 | bytes: start - orig_start); |
701 | |
702 | size = min_t(size_t, csum_end - start, end + 1 - start); |
703 | |
704 | offset = bytes_to_csum_size(fs_info, bytes: start - key.offset); |
705 | |
706 | read_extent_buffer(eb: path->nodes[0], dst: csum_dest, |
707 | start: ((unsigned long)item) + offset, |
708 | len: bytes_to_csum_size(fs_info, bytes: size)); |
709 | |
710 | bitmap_set(map: csum_bitmap, |
711 | start: (start - orig_start) >> fs_info->sectorsize_bits, |
712 | nbits: size >> fs_info->sectorsize_bits); |
713 | |
714 | start += size; |
715 | } |
716 | path->slots[0]++; |
717 | } |
718 | ret = 0; |
719 | fail: |
720 | if (free_path) |
721 | btrfs_free_path(p: path); |
722 | return ret; |
723 | } |
724 | |
725 | /* |
726 | * Calculate checksums of the data contained inside a bio. |
727 | */ |
728 | blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) |
729 | { |
730 | struct btrfs_ordered_extent *ordered = bbio->ordered; |
731 | struct btrfs_inode *inode = bbio->inode; |
732 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
733 | SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
734 | struct bio *bio = &bbio->bio; |
735 | struct btrfs_ordered_sum *sums; |
736 | char *data; |
737 | struct bvec_iter iter; |
738 | struct bio_vec bvec; |
739 | int index; |
740 | unsigned int blockcount; |
741 | int i; |
742 | unsigned nofs_flag; |
743 | |
744 | nofs_flag = memalloc_nofs_save(); |
745 | sums = kvzalloc(size: btrfs_ordered_sum_size(fs_info, bytes: bio->bi_iter.bi_size), |
746 | GFP_KERNEL); |
747 | memalloc_nofs_restore(flags: nofs_flag); |
748 | |
749 | if (!sums) |
750 | return BLK_STS_RESOURCE; |
751 | |
752 | sums->len = bio->bi_iter.bi_size; |
753 | INIT_LIST_HEAD(list: &sums->list); |
754 | |
755 | sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; |
756 | index = 0; |
757 | |
758 | shash->tfm = fs_info->csum_shash; |
759 | |
760 | bio_for_each_segment(bvec, bio, iter) { |
761 | blockcount = BTRFS_BYTES_TO_BLKS(fs_info, |
762 | bvec.bv_len + fs_info->sectorsize |
763 | - 1); |
764 | |
765 | for (i = 0; i < blockcount; i++) { |
766 | data = bvec_kmap_local(bvec: &bvec); |
767 | crypto_shash_digest(desc: shash, |
768 | data: data + (i * fs_info->sectorsize), |
769 | len: fs_info->sectorsize, |
770 | out: sums->sums + index); |
771 | kunmap_local(data); |
772 | index += fs_info->csum_size; |
773 | } |
774 | |
775 | } |
776 | |
777 | bbio->sums = sums; |
778 | btrfs_add_ordered_sum(entry: ordered, sum: sums); |
779 | return 0; |
780 | } |
781 | |
782 | /* |
783 | * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to |
784 | * record the updated logical address on Zone Append completion. |
785 | * Allocate just the structure with an empty sums array here for that case. |
786 | */ |
787 | blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) |
788 | { |
789 | bbio->sums = kmalloc(size: sizeof(*bbio->sums), GFP_NOFS); |
790 | if (!bbio->sums) |
791 | return BLK_STS_RESOURCE; |
792 | bbio->sums->len = bbio->bio.bi_iter.bi_size; |
793 | bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; |
794 | btrfs_add_ordered_sum(entry: bbio->ordered, sum: bbio->sums); |
795 | return 0; |
796 | } |
797 | |
798 | /* |
799 | * Remove one checksum overlapping a range. |
800 | * |
801 | * This expects the key to describe the csum pointed to by the path, and it |
802 | * expects the csum to overlap the range [bytenr, len] |
803 | * |
804 | * The csum should not be entirely contained in the range and the range should |
805 | * not be entirely contained in the csum. |
806 | * |
807 | * This calls btrfs_truncate_item with the correct args based on the overlap, |
808 | * and fixes up the key as required. |
809 | */ |
810 | static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, |
811 | struct btrfs_path *path, |
812 | struct btrfs_key *key, |
813 | u64 bytenr, u64 len) |
814 | { |
815 | struct btrfs_fs_info *fs_info = trans->fs_info; |
816 | struct extent_buffer *leaf; |
817 | const u32 csum_size = fs_info->csum_size; |
818 | u64 csum_end; |
819 | u64 end_byte = bytenr + len; |
820 | u32 blocksize_bits = fs_info->sectorsize_bits; |
821 | |
822 | leaf = path->nodes[0]; |
823 | csum_end = btrfs_item_size(eb: leaf, slot: path->slots[0]) / csum_size; |
824 | csum_end <<= blocksize_bits; |
825 | csum_end += key->offset; |
826 | |
827 | if (key->offset < bytenr && csum_end <= end_byte) { |
828 | /* |
829 | * [ bytenr - len ] |
830 | * [ ] |
831 | * [csum ] |
832 | * A simple truncate off the end of the item |
833 | */ |
834 | u32 new_size = (bytenr - key->offset) >> blocksize_bits; |
835 | new_size *= csum_size; |
836 | btrfs_truncate_item(trans, path, new_size, from_end: 1); |
837 | } else if (key->offset >= bytenr && csum_end > end_byte && |
838 | end_byte > key->offset) { |
839 | /* |
840 | * [ bytenr - len ] |
841 | * [ ] |
842 | * [csum ] |
843 | * we need to truncate from the beginning of the csum |
844 | */ |
845 | u32 new_size = (csum_end - end_byte) >> blocksize_bits; |
846 | new_size *= csum_size; |
847 | |
848 | btrfs_truncate_item(trans, path, new_size, from_end: 0); |
849 | |
850 | key->offset = end_byte; |
851 | btrfs_set_item_key_safe(trans, path, new_key: key); |
852 | } else { |
853 | BUG(); |
854 | } |
855 | } |
856 | |
857 | /* |
858 | * Delete the csum items from the csum tree for a given range of bytes. |
859 | */ |
860 | int btrfs_del_csums(struct btrfs_trans_handle *trans, |
861 | struct btrfs_root *root, u64 bytenr, u64 len) |
862 | { |
863 | struct btrfs_fs_info *fs_info = trans->fs_info; |
864 | struct btrfs_path *path; |
865 | struct btrfs_key key; |
866 | u64 end_byte = bytenr + len; |
867 | u64 csum_end; |
868 | struct extent_buffer *leaf; |
869 | int ret = 0; |
870 | const u32 csum_size = fs_info->csum_size; |
871 | u32 blocksize_bits = fs_info->sectorsize_bits; |
872 | |
873 | ASSERT(root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID || |
874 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); |
875 | |
876 | path = btrfs_alloc_path(); |
877 | if (!path) |
878 | return -ENOMEM; |
879 | |
880 | while (1) { |
881 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
882 | key.offset = end_byte - 1; |
883 | key.type = BTRFS_EXTENT_CSUM_KEY; |
884 | |
885 | ret = btrfs_search_slot(trans, root, key: &key, p: path, ins_len: -1, cow: 1); |
886 | if (ret > 0) { |
887 | ret = 0; |
888 | if (path->slots[0] == 0) |
889 | break; |
890 | path->slots[0]--; |
891 | } else if (ret < 0) { |
892 | break; |
893 | } |
894 | |
895 | leaf = path->nodes[0]; |
896 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
897 | |
898 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
899 | key.type != BTRFS_EXTENT_CSUM_KEY) { |
900 | break; |
901 | } |
902 | |
903 | if (key.offset >= end_byte) |
904 | break; |
905 | |
906 | csum_end = btrfs_item_size(eb: leaf, slot: path->slots[0]) / csum_size; |
907 | csum_end <<= blocksize_bits; |
908 | csum_end += key.offset; |
909 | |
910 | /* this csum ends before we start, we're done */ |
911 | if (csum_end <= bytenr) |
912 | break; |
913 | |
914 | /* delete the entire item, it is inside our range */ |
915 | if (key.offset >= bytenr && csum_end <= end_byte) { |
916 | int del_nr = 1; |
917 | |
918 | /* |
919 | * Check how many csum items preceding this one in this |
920 | * leaf correspond to our range and then delete them all |
921 | * at once. |
922 | */ |
923 | if (key.offset > bytenr && path->slots[0] > 0) { |
924 | int slot = path->slots[0] - 1; |
925 | |
926 | while (slot >= 0) { |
927 | struct btrfs_key pk; |
928 | |
929 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &pk, nr: slot); |
930 | if (pk.offset < bytenr || |
931 | pk.type != BTRFS_EXTENT_CSUM_KEY || |
932 | pk.objectid != |
933 | BTRFS_EXTENT_CSUM_OBJECTID) |
934 | break; |
935 | path->slots[0] = slot; |
936 | del_nr++; |
937 | key.offset = pk.offset; |
938 | slot--; |
939 | } |
940 | } |
941 | ret = btrfs_del_items(trans, root, path, |
942 | slot: path->slots[0], nr: del_nr); |
943 | if (ret) |
944 | break; |
945 | if (key.offset == bytenr) |
946 | break; |
947 | } else if (key.offset < bytenr && csum_end > end_byte) { |
948 | unsigned long offset; |
949 | unsigned long shift_len; |
950 | unsigned long item_offset; |
951 | /* |
952 | * [ bytenr - len ] |
953 | * [csum ] |
954 | * |
955 | * Our bytes are in the middle of the csum, |
956 | * we need to split this item and insert a new one. |
957 | * |
958 | * But we can't drop the path because the |
959 | * csum could change, get removed, extended etc. |
960 | * |
961 | * The trick here is the max size of a csum item leaves |
962 | * enough room in the tree block for a single |
963 | * item header. So, we split the item in place, |
964 | * adding a new header pointing to the existing |
965 | * bytes. Then we loop around again and we have |
966 | * a nicely formed csum item that we can neatly |
967 | * truncate. |
968 | */ |
969 | offset = (bytenr - key.offset) >> blocksize_bits; |
970 | offset *= csum_size; |
971 | |
972 | shift_len = (len >> blocksize_bits) * csum_size; |
973 | |
974 | item_offset = btrfs_item_ptr_offset(leaf, |
975 | path->slots[0]); |
976 | |
977 | memzero_extent_buffer(eb: leaf, start: item_offset + offset, |
978 | len: shift_len); |
979 | key.offset = bytenr; |
980 | |
981 | /* |
982 | * btrfs_split_item returns -EAGAIN when the |
983 | * item changed size or key |
984 | */ |
985 | ret = btrfs_split_item(trans, root, path, new_key: &key, split_offset: offset); |
986 | if (ret && ret != -EAGAIN) { |
987 | btrfs_abort_transaction(trans, ret); |
988 | break; |
989 | } |
990 | ret = 0; |
991 | |
992 | key.offset = end_byte - 1; |
993 | } else { |
994 | truncate_one_csum(trans, path, key: &key, bytenr, len); |
995 | if (key.offset < bytenr) |
996 | break; |
997 | } |
998 | btrfs_release_path(p: path); |
999 | } |
1000 | btrfs_free_path(p: path); |
1001 | return ret; |
1002 | } |
1003 | |
1004 | static int find_next_csum_offset(struct btrfs_root *root, |
1005 | struct btrfs_path *path, |
1006 | u64 *next_offset) |
1007 | { |
1008 | const u32 nritems = btrfs_header_nritems(eb: path->nodes[0]); |
1009 | struct btrfs_key found_key; |
1010 | int slot = path->slots[0] + 1; |
1011 | int ret; |
1012 | |
1013 | if (nritems == 0 || slot >= nritems) { |
1014 | ret = btrfs_next_leaf(root, path); |
1015 | if (ret < 0) { |
1016 | return ret; |
1017 | } else if (ret > 0) { |
1018 | *next_offset = (u64)-1; |
1019 | return 0; |
1020 | } |
1021 | slot = path->slots[0]; |
1022 | } |
1023 | |
1024 | btrfs_item_key_to_cpu(eb: path->nodes[0], cpu_key: &found_key, nr: slot); |
1025 | |
1026 | if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
1027 | found_key.type != BTRFS_EXTENT_CSUM_KEY) |
1028 | *next_offset = (u64)-1; |
1029 | else |
1030 | *next_offset = found_key.offset; |
1031 | |
1032 | return 0; |
1033 | } |
1034 | |
1035 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
1036 | struct btrfs_root *root, |
1037 | struct btrfs_ordered_sum *sums) |
1038 | { |
1039 | struct btrfs_fs_info *fs_info = root->fs_info; |
1040 | struct btrfs_key file_key; |
1041 | struct btrfs_key found_key; |
1042 | struct btrfs_path *path; |
1043 | struct btrfs_csum_item *item; |
1044 | struct btrfs_csum_item *item_end; |
1045 | struct extent_buffer *leaf = NULL; |
1046 | u64 next_offset; |
1047 | u64 total_bytes = 0; |
1048 | u64 csum_offset; |
1049 | u64 bytenr; |
1050 | u32 ins_size; |
1051 | int index = 0; |
1052 | int found_next; |
1053 | int ret; |
1054 | const u32 csum_size = fs_info->csum_size; |
1055 | |
1056 | path = btrfs_alloc_path(); |
1057 | if (!path) |
1058 | return -ENOMEM; |
1059 | again: |
1060 | next_offset = (u64)-1; |
1061 | found_next = 0; |
1062 | bytenr = sums->logical + total_bytes; |
1063 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
1064 | file_key.offset = bytenr; |
1065 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
1066 | |
1067 | item = btrfs_lookup_csum(trans, root, path, bytenr, cow: 1); |
1068 | if (!IS_ERR(ptr: item)) { |
1069 | ret = 0; |
1070 | leaf = path->nodes[0]; |
1071 | item_end = btrfs_item_ptr(leaf, path->slots[0], |
1072 | struct btrfs_csum_item); |
1073 | item_end = (struct btrfs_csum_item *)((char *)item_end + |
1074 | btrfs_item_size(eb: leaf, slot: path->slots[0])); |
1075 | goto found; |
1076 | } |
1077 | ret = PTR_ERR(ptr: item); |
1078 | if (ret != -EFBIG && ret != -ENOENT) |
1079 | goto out; |
1080 | |
1081 | if (ret == -EFBIG) { |
1082 | u32 item_size; |
1083 | /* we found one, but it isn't big enough yet */ |
1084 | leaf = path->nodes[0]; |
1085 | item_size = btrfs_item_size(eb: leaf, slot: path->slots[0]); |
1086 | if ((item_size / csum_size) >= |
1087 | MAX_CSUM_ITEMS(fs_info, csum_size)) { |
1088 | /* already at max size, make a new one */ |
1089 | goto insert; |
1090 | } |
1091 | } else { |
1092 | /* We didn't find a csum item, insert one. */ |
1093 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1094 | if (ret < 0) |
1095 | goto out; |
1096 | found_next = 1; |
1097 | goto insert; |
1098 | } |
1099 | |
1100 | /* |
1101 | * At this point, we know the tree has a checksum item that ends at an |
1102 | * offset matching the start of the checksum range we want to insert. |
1103 | * We try to extend that item as much as possible and then add as many |
1104 | * checksums to it as they fit. |
1105 | * |
1106 | * First check if the leaf has enough free space for at least one |
1107 | * checksum. If it has go directly to the item extension code, otherwise |
1108 | * release the path and do a search for insertion before the extension. |
1109 | */ |
1110 | if (btrfs_leaf_free_space(leaf) >= csum_size) { |
1111 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
1112 | csum_offset = (bytenr - found_key.offset) >> |
1113 | fs_info->sectorsize_bits; |
1114 | goto extend_csum; |
1115 | } |
1116 | |
1117 | btrfs_release_path(p: path); |
1118 | path->search_for_extension = 1; |
1119 | ret = btrfs_search_slot(trans, root, key: &file_key, p: path, |
1120 | ins_len: csum_size, cow: 1); |
1121 | path->search_for_extension = 0; |
1122 | if (ret < 0) |
1123 | goto out; |
1124 | |
1125 | if (ret > 0) { |
1126 | if (path->slots[0] == 0) |
1127 | goto insert; |
1128 | path->slots[0]--; |
1129 | } |
1130 | |
1131 | leaf = path->nodes[0]; |
1132 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: path->slots[0]); |
1133 | csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; |
1134 | |
1135 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY || |
1136 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
1137 | csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { |
1138 | goto insert; |
1139 | } |
1140 | |
1141 | extend_csum: |
1142 | if (csum_offset == btrfs_item_size(eb: leaf, slot: path->slots[0]) / |
1143 | csum_size) { |
1144 | int extend_nr; |
1145 | u64 tmp; |
1146 | u32 diff; |
1147 | |
1148 | tmp = sums->len - total_bytes; |
1149 | tmp >>= fs_info->sectorsize_bits; |
1150 | WARN_ON(tmp < 1); |
1151 | extend_nr = max_t(int, 1, tmp); |
1152 | |
1153 | /* |
1154 | * A log tree can already have checksum items with a subset of |
1155 | * the checksums we are trying to log. This can happen after |
1156 | * doing a sequence of partial writes into prealloc extents and |
1157 | * fsyncs in between, with a full fsync logging a larger subrange |
1158 | * of an extent for which a previous fast fsync logged a smaller |
1159 | * subrange. And this happens in particular due to merging file |
1160 | * extent items when we complete an ordered extent for a range |
1161 | * covered by a prealloc extent - this is done at |
1162 | * btrfs_mark_extent_written(). |
1163 | * |
1164 | * So if we try to extend the previous checksum item, which has |
1165 | * a range that ends at the start of the range we want to insert, |
1166 | * make sure we don't extend beyond the start offset of the next |
1167 | * checksum item. If we are at the last item in the leaf, then |
1168 | * forget the optimization of extending and add a new checksum |
1169 | * item - it is not worth the complexity of releasing the path, |
1170 | * getting the first key for the next leaf, repeat the btree |
1171 | * search, etc, because log trees are temporary anyway and it |
1172 | * would only save a few bytes of leaf space. |
1173 | */ |
1174 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
1175 | if (path->slots[0] + 1 >= |
1176 | btrfs_header_nritems(eb: path->nodes[0])) { |
1177 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1178 | if (ret < 0) |
1179 | goto out; |
1180 | found_next = 1; |
1181 | goto insert; |
1182 | } |
1183 | |
1184 | ret = find_next_csum_offset(root, path, next_offset: &next_offset); |
1185 | if (ret < 0) |
1186 | goto out; |
1187 | |
1188 | tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; |
1189 | if (tmp <= INT_MAX) |
1190 | extend_nr = min_t(int, extend_nr, tmp); |
1191 | } |
1192 | |
1193 | diff = (csum_offset + extend_nr) * csum_size; |
1194 | diff = min(diff, |
1195 | MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); |
1196 | |
1197 | diff = diff - btrfs_item_size(eb: leaf, slot: path->slots[0]); |
1198 | diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); |
1199 | diff /= csum_size; |
1200 | diff *= csum_size; |
1201 | |
1202 | btrfs_extend_item(trans, path, data_size: diff); |
1203 | ret = 0; |
1204 | goto csum; |
1205 | } |
1206 | |
1207 | insert: |
1208 | btrfs_release_path(p: path); |
1209 | csum_offset = 0; |
1210 | if (found_next) { |
1211 | u64 tmp; |
1212 | |
1213 | tmp = sums->len - total_bytes; |
1214 | tmp >>= fs_info->sectorsize_bits; |
1215 | tmp = min(tmp, (next_offset - file_key.offset) >> |
1216 | fs_info->sectorsize_bits); |
1217 | |
1218 | tmp = max_t(u64, 1, tmp); |
1219 | tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); |
1220 | ins_size = csum_size * tmp; |
1221 | } else { |
1222 | ins_size = csum_size; |
1223 | } |
1224 | ret = btrfs_insert_empty_item(trans, root, path, key: &file_key, |
1225 | data_size: ins_size); |
1226 | if (ret < 0) |
1227 | goto out; |
1228 | leaf = path->nodes[0]; |
1229 | csum: |
1230 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
1231 | item_end = (struct btrfs_csum_item *)((unsigned char *)item + |
1232 | btrfs_item_size(eb: leaf, slot: path->slots[0])); |
1233 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
1234 | csum_offset * csum_size); |
1235 | found: |
1236 | ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; |
1237 | ins_size *= csum_size; |
1238 | ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, |
1239 | ins_size); |
1240 | write_extent_buffer(eb: leaf, src: sums->sums + index, start: (unsigned long)item, |
1241 | len: ins_size); |
1242 | |
1243 | index += ins_size; |
1244 | ins_size /= csum_size; |
1245 | total_bytes += ins_size * fs_info->sectorsize; |
1246 | |
1247 | btrfs_mark_buffer_dirty(trans, buf: path->nodes[0]); |
1248 | if (total_bytes < sums->len) { |
1249 | btrfs_release_path(p: path); |
1250 | cond_resched(); |
1251 | goto again; |
1252 | } |
1253 | out: |
1254 | btrfs_free_path(p: path); |
1255 | return ret; |
1256 | } |
1257 | |
1258 | void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, |
1259 | const struct btrfs_path *path, |
1260 | struct btrfs_file_extent_item *fi, |
1261 | struct extent_map *em) |
1262 | { |
1263 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
1264 | struct btrfs_root *root = inode->root; |
1265 | struct extent_buffer *leaf = path->nodes[0]; |
1266 | const int slot = path->slots[0]; |
1267 | struct btrfs_key key; |
1268 | u64 extent_start, extent_end; |
1269 | u64 bytenr; |
1270 | u8 type = btrfs_file_extent_type(eb: leaf, s: fi); |
1271 | int compress_type = btrfs_file_extent_compression(eb: leaf, s: fi); |
1272 | |
1273 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot); |
1274 | extent_start = key.offset; |
1275 | extent_end = btrfs_file_extent_end(path); |
1276 | em->ram_bytes = btrfs_file_extent_ram_bytes(eb: leaf, s: fi); |
1277 | em->generation = btrfs_file_extent_generation(eb: leaf, s: fi); |
1278 | if (type == BTRFS_FILE_EXTENT_REG || |
1279 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1280 | em->start = extent_start; |
1281 | em->len = extent_end - extent_start; |
1282 | em->orig_start = extent_start - |
1283 | btrfs_file_extent_offset(eb: leaf, s: fi); |
1284 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(eb: leaf, s: fi); |
1285 | bytenr = btrfs_file_extent_disk_bytenr(eb: leaf, s: fi); |
1286 | if (bytenr == 0) { |
1287 | em->block_start = EXTENT_MAP_HOLE; |
1288 | return; |
1289 | } |
1290 | if (compress_type != BTRFS_COMPRESS_NONE) { |
1291 | extent_map_set_compression(em, type: compress_type); |
1292 | em->block_start = bytenr; |
1293 | em->block_len = em->orig_block_len; |
1294 | } else { |
1295 | bytenr += btrfs_file_extent_offset(eb: leaf, s: fi); |
1296 | em->block_start = bytenr; |
1297 | em->block_len = em->len; |
1298 | if (type == BTRFS_FILE_EXTENT_PREALLOC) |
1299 | em->flags |= EXTENT_FLAG_PREALLOC; |
1300 | } |
1301 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
1302 | em->block_start = EXTENT_MAP_INLINE; |
1303 | em->start = extent_start; |
1304 | em->len = extent_end - extent_start; |
1305 | /* |
1306 | * Initialize orig_start and block_len with the same values |
1307 | * as in inode.c:btrfs_get_extent(). |
1308 | */ |
1309 | em->orig_start = EXTENT_MAP_HOLE; |
1310 | em->block_len = (u64)-1; |
1311 | extent_map_set_compression(em, type: compress_type); |
1312 | } else { |
1313 | btrfs_err(fs_info, |
1314 | "unknown file extent item type %d, inode %llu, offset %llu, " |
1315 | "root %llu" , type, btrfs_ino(inode), extent_start, |
1316 | root->root_key.objectid); |
1317 | } |
1318 | } |
1319 | |
1320 | /* |
1321 | * Returns the end offset (non inclusive) of the file extent item the given path |
1322 | * points to. If it points to an inline extent, the returned offset is rounded |
1323 | * up to the sector size. |
1324 | */ |
1325 | u64 btrfs_file_extent_end(const struct btrfs_path *path) |
1326 | { |
1327 | const struct extent_buffer *leaf = path->nodes[0]; |
1328 | const int slot = path->slots[0]; |
1329 | struct btrfs_file_extent_item *fi; |
1330 | struct btrfs_key key; |
1331 | u64 end; |
1332 | |
1333 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot); |
1334 | ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); |
1335 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
1336 | |
1337 | if (btrfs_file_extent_type(eb: leaf, s: fi) == BTRFS_FILE_EXTENT_INLINE) { |
1338 | end = btrfs_file_extent_ram_bytes(eb: leaf, s: fi); |
1339 | end = ALIGN(key.offset + end, leaf->fs_info->sectorsize); |
1340 | } else { |
1341 | end = key.offset + btrfs_file_extent_num_bytes(eb: leaf, s: fi); |
1342 | } |
1343 | |
1344 | return end; |
1345 | } |
1346 | |