1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (c) 2021-2024 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_trans.h" |
14 | #include "xfs_btree.h" |
15 | #include "xfs_error.h" |
16 | #include "xfs_buf_mem.h" |
17 | #include "xfs_btree_mem.h" |
18 | #include "xfs_ag.h" |
19 | #include "xfs_buf_item.h" |
20 | #include "xfs_trace.h" |
21 | |
22 | /* Set the root of an in-memory btree. */ |
23 | void |
24 | xfbtree_set_root( |
25 | struct xfs_btree_cur *cur, |
26 | const union xfs_btree_ptr *ptr, |
27 | int inc) |
28 | { |
29 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); |
30 | |
31 | cur->bc_mem.xfbtree->root = *ptr; |
32 | cur->bc_mem.xfbtree->nlevels += inc; |
33 | } |
34 | |
35 | /* Initialize a pointer from the in-memory btree header. */ |
36 | void |
37 | xfbtree_init_ptr_from_cur( |
38 | struct xfs_btree_cur *cur, |
39 | union xfs_btree_ptr *ptr) |
40 | { |
41 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); |
42 | |
43 | *ptr = cur->bc_mem.xfbtree->root; |
44 | } |
45 | |
46 | /* Duplicate an in-memory btree cursor. */ |
47 | struct xfs_btree_cur * |
48 | xfbtree_dup_cursor( |
49 | struct xfs_btree_cur *cur) |
50 | { |
51 | struct xfs_btree_cur *ncur; |
52 | |
53 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); |
54 | |
55 | ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops, |
56 | cur->bc_maxlevels, cur->bc_cache); |
57 | ncur->bc_flags = cur->bc_flags; |
58 | ncur->bc_nlevels = cur->bc_nlevels; |
59 | ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree; |
60 | |
61 | if (cur->bc_mem.pag) |
62 | ncur->bc_mem.pag = xfs_perag_hold(pag: cur->bc_mem.pag); |
63 | |
64 | return ncur; |
65 | } |
66 | |
67 | /* Close the btree xfile and release all resources. */ |
68 | void |
69 | xfbtree_destroy( |
70 | struct xfbtree *xfbt) |
71 | { |
72 | xfs_buftarg_drain(xfbt->target); |
73 | } |
74 | |
75 | /* Compute the number of bytes available for records. */ |
76 | static inline unsigned int |
77 | xfbtree_rec_bytes( |
78 | struct xfs_mount *mp, |
79 | const struct xfs_btree_ops *ops) |
80 | { |
81 | return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; |
82 | } |
83 | |
84 | /* Initialize an empty leaf block as the btree root. */ |
85 | STATIC int |
86 | xfbtree_init_leaf_block( |
87 | struct xfs_mount *mp, |
88 | struct xfbtree *xfbt, |
89 | const struct xfs_btree_ops *ops) |
90 | { |
91 | struct xfs_buf *bp; |
92 | xfbno_t bno = xfbt->highest_bno++; |
93 | int error; |
94 | |
95 | error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE, |
96 | &bp); |
97 | if (error) |
98 | return error; |
99 | |
100 | trace_xfbtree_create_root_buf(xfbt, bp); |
101 | |
102 | bp->b_ops = ops->buf_ops; |
103 | xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner); |
104 | xfs_buf_relse(bp); |
105 | |
106 | xfbt->root.l = cpu_to_be64(bno); |
107 | return 0; |
108 | } |
109 | |
110 | /* |
111 | * Create an in-memory btree root that can be used with the given xmbuf. |
112 | * Callers must set xfbt->owner. |
113 | */ |
114 | int |
115 | xfbtree_init( |
116 | struct xfs_mount *mp, |
117 | struct xfbtree *xfbt, |
118 | struct xfs_buftarg *btp, |
119 | const struct xfs_btree_ops *ops) |
120 | { |
121 | unsigned int blocklen = xfbtree_rec_bytes(mp, ops); |
122 | unsigned int keyptr_len; |
123 | int error; |
124 | |
125 | /* Requires a long-format CRC-format btree */ |
126 | if (!xfs_has_crc(mp)) { |
127 | ASSERT(xfs_has_crc(mp)); |
128 | return -EINVAL; |
129 | } |
130 | if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) { |
131 | ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN); |
132 | return -EINVAL; |
133 | } |
134 | |
135 | memset(xfbt, 0, sizeof(*xfbt)); |
136 | xfbt->target = btp; |
137 | |
138 | /* Set up min/maxrecs for this btree. */ |
139 | keyptr_len = ops->key_len + sizeof(__be64); |
140 | xfbt->maxrecs[0] = blocklen / ops->rec_len; |
141 | xfbt->maxrecs[1] = blocklen / keyptr_len; |
142 | xfbt->minrecs[0] = xfbt->maxrecs[0] / 2; |
143 | xfbt->minrecs[1] = xfbt->maxrecs[1] / 2; |
144 | xfbt->highest_bno = 0; |
145 | xfbt->nlevels = 1; |
146 | |
147 | /* Initialize the empty btree. */ |
148 | error = xfbtree_init_leaf_block(mp, xfbt, ops); |
149 | if (error) |
150 | goto err_freesp; |
151 | |
152 | trace_xfbtree_init(mp, xfbt, ops); |
153 | |
154 | return 0; |
155 | |
156 | err_freesp: |
157 | xfs_buftarg_drain(xfbt->target); |
158 | return error; |
159 | } |
160 | |
161 | /* Allocate a block to our in-memory btree. */ |
162 | int |
163 | xfbtree_alloc_block( |
164 | struct xfs_btree_cur *cur, |
165 | const union xfs_btree_ptr *start, |
166 | union xfs_btree_ptr *new, |
167 | int *stat) |
168 | { |
169 | struct xfbtree *xfbt = cur->bc_mem.xfbtree; |
170 | xfbno_t bno = xfbt->highest_bno++; |
171 | |
172 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); |
173 | |
174 | trace_xfbtree_alloc_block(xfbt, cur, bno); |
175 | |
176 | /* Fail if the block address exceeds the maximum for the buftarg. */ |
177 | if (!xfbtree_verify_bno(xfbt, bno)) { |
178 | ASSERT(xfbtree_verify_bno(xfbt, bno)); |
179 | *stat = 0; |
180 | return 0; |
181 | } |
182 | |
183 | new->l = cpu_to_be64(bno); |
184 | *stat = 1; |
185 | return 0; |
186 | } |
187 | |
188 | /* Free a block from our in-memory btree. */ |
189 | int |
190 | xfbtree_free_block( |
191 | struct xfs_btree_cur *cur, |
192 | struct xfs_buf *bp) |
193 | { |
194 | struct xfbtree *xfbt = cur->bc_mem.xfbtree; |
195 | xfs_daddr_t daddr = xfs_buf_daddr(bp); |
196 | xfbno_t bno = xfs_daddr_to_xfbno(daddr); |
197 | |
198 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); |
199 | |
200 | trace_xfbtree_free_block(xfbt, cur, bno); |
201 | |
202 | if (bno + 1 == xfbt->highest_bno) |
203 | xfbt->highest_bno--; |
204 | |
205 | return 0; |
206 | } |
207 | |
208 | /* Return the minimum number of records for a btree block. */ |
209 | int |
210 | xfbtree_get_minrecs( |
211 | struct xfs_btree_cur *cur, |
212 | int level) |
213 | { |
214 | struct xfbtree *xfbt = cur->bc_mem.xfbtree; |
215 | |
216 | return xfbt->minrecs[level != 0]; |
217 | } |
218 | |
219 | /* Return the maximum number of records for a btree block. */ |
220 | int |
221 | xfbtree_get_maxrecs( |
222 | struct xfs_btree_cur *cur, |
223 | int level) |
224 | { |
225 | struct xfbtree *xfbt = cur->bc_mem.xfbtree; |
226 | |
227 | return xfbt->maxrecs[level != 0]; |
228 | } |
229 | |
230 | /* If this log item is a buffer item that came from the xfbtree, return it. */ |
231 | static inline struct xfs_buf * |
232 | xfbtree_buf_match( |
233 | struct xfbtree *xfbt, |
234 | const struct xfs_log_item *lip) |
235 | { |
236 | const struct xfs_buf_log_item *bli; |
237 | struct xfs_buf *bp; |
238 | |
239 | if (lip->li_type != XFS_LI_BUF) |
240 | return NULL; |
241 | |
242 | bli = container_of(lip, struct xfs_buf_log_item, bli_item); |
243 | bp = bli->bli_buf; |
244 | if (bp->b_target != xfbt->target) |
245 | return NULL; |
246 | |
247 | return bp; |
248 | } |
249 | |
250 | /* |
251 | * Commit changes to the incore btree immediately by writing all dirty xfbtree |
252 | * buffers to the backing xfile. This detaches all xfbtree buffers from the |
253 | * transaction, even on failure. The buffer locks are dropped between the |
254 | * delwri queue and submit, so the caller must synchronize btree access. |
255 | * |
256 | * Normally we'd let the buffers commit with the transaction and get written to |
257 | * the xfile via the log, but online repair stages ephemeral btrees in memory |
258 | * and uses the btree_staging functions to write new btrees to disk atomically. |
259 | * The in-memory btree (and its backing store) are discarded at the end of the |
260 | * repair phase, which means that xfbtree buffers cannot commit with the rest |
261 | * of a transaction. |
262 | * |
263 | * In other words, online repair only needs the transaction to collect buffer |
264 | * pointers and to avoid buffer deadlocks, not to guarantee consistency of |
265 | * updates. |
266 | */ |
267 | int |
268 | xfbtree_trans_commit( |
269 | struct xfbtree *xfbt, |
270 | struct xfs_trans *tp) |
271 | { |
272 | struct xfs_log_item *lip, *n; |
273 | bool tp_dirty = false; |
274 | int error = 0; |
275 | |
276 | /* |
277 | * For each xfbtree buffer attached to the transaction, write the dirty |
278 | * buffers to the xfile and release them. |
279 | */ |
280 | list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { |
281 | struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); |
282 | |
283 | if (!bp) { |
284 | if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) |
285 | tp_dirty |= true; |
286 | continue; |
287 | } |
288 | |
289 | trace_xfbtree_trans_commit_buf(xfbt, bp); |
290 | |
291 | xmbuf_trans_bdetach(tp, bp); |
292 | |
293 | /* |
294 | * If the buffer fails verification, note the failure but |
295 | * continue walking the transaction items so that we remove all |
296 | * ephemeral btree buffers. |
297 | */ |
298 | if (!error) |
299 | error = xmbuf_finalize(bp); |
300 | |
301 | xfs_buf_relse(bp); |
302 | } |
303 | |
304 | /* |
305 | * Reset the transaction's dirty flag to reflect the dirty state of the |
306 | * log items that are still attached. |
307 | */ |
308 | tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | |
309 | (tp_dirty ? XFS_TRANS_DIRTY : 0); |
310 | |
311 | return error; |
312 | } |
313 | |
314 | /* |
315 | * Cancel changes to the incore btree by detaching all the xfbtree buffers. |
316 | * Changes are not undone, so callers must not access the btree ever again. |
317 | */ |
318 | void |
319 | xfbtree_trans_cancel( |
320 | struct xfbtree *xfbt, |
321 | struct xfs_trans *tp) |
322 | { |
323 | struct xfs_log_item *lip, *n; |
324 | bool tp_dirty = false; |
325 | |
326 | list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { |
327 | struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); |
328 | |
329 | if (!bp) { |
330 | if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) |
331 | tp_dirty |= true; |
332 | continue; |
333 | } |
334 | |
335 | trace_xfbtree_trans_cancel_buf(xfbt, bp); |
336 | |
337 | xmbuf_trans_bdetach(tp, bp); |
338 | xfs_buf_relse(bp); |
339 | } |
340 | |
341 | /* |
342 | * Reset the transaction's dirty flag to reflect the dirty state of the |
343 | * log items that are still attached. |
344 | */ |
345 | tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | |
346 | (tp_dirty ? XFS_TRANS_DIRTY : 0); |
347 | } |
348 | |