1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _BCACHEFS_BTREE_TYPES_H |
3 | #define _BCACHEFS_BTREE_TYPES_H |
4 | |
5 | #include <linux/list.h> |
6 | #include <linux/rhashtable.h> |
7 | |
8 | #include "bbpos_types.h" |
9 | #include "btree_key_cache_types.h" |
10 | #include "buckets_types.h" |
11 | #include "darray.h" |
12 | #include "errcode.h" |
13 | #include "journal_types.h" |
14 | #include "replicas_types.h" |
15 | #include "six.h" |
16 | |
17 | struct open_bucket; |
18 | struct btree_update; |
19 | struct btree_trans; |
20 | |
21 | #define MAX_BSETS 3U |
22 | |
23 | struct btree_nr_keys { |
24 | |
25 | /* |
26 | * Amount of live metadata (i.e. size of node after a compaction) in |
27 | * units of u64s |
28 | */ |
29 | u16 live_u64s; |
30 | u16 bset_u64s[MAX_BSETS]; |
31 | |
32 | /* live keys only: */ |
33 | u16 packed_keys; |
34 | u16 unpacked_keys; |
35 | }; |
36 | |
37 | struct bset_tree { |
38 | /* |
39 | * We construct a binary tree in an array as if the array |
40 | * started at 1, so that things line up on the same cachelines |
41 | * better: see comments in bset.c at cacheline_to_bkey() for |
42 | * details |
43 | */ |
44 | |
45 | /* size of the binary tree and prev array */ |
46 | u16 size; |
47 | |
48 | /* function of size - precalculated for to_inorder() */ |
49 | u16 ; |
50 | |
51 | u16 data_offset; |
52 | u16 aux_data_offset; |
53 | u16 end_offset; |
54 | }; |
55 | |
56 | struct btree_write { |
57 | struct journal_entry_pin journal; |
58 | }; |
59 | |
60 | struct btree_alloc { |
61 | struct open_buckets ob; |
62 | __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); |
63 | }; |
64 | |
65 | struct btree_bkey_cached_common { |
66 | struct six_lock lock; |
67 | u8 level; |
68 | u8 btree_id; |
69 | bool cached; |
70 | }; |
71 | |
72 | struct btree { |
73 | struct btree_bkey_cached_common c; |
74 | |
75 | struct rhash_head hash; |
76 | u64 hash_val; |
77 | |
78 | unsigned long flags; |
79 | u16 written; |
80 | u8 nsets; |
81 | u8 nr_key_bits; |
82 | u16 version_ondisk; |
83 | |
84 | struct bkey_format format; |
85 | |
86 | struct btree_node *data; |
87 | void *aux_data; |
88 | |
89 | /* |
90 | * Sets of sorted keys - the real btree node - plus a binary search tree |
91 | * |
92 | * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point |
93 | * to the memory we have allocated for this btree node. Additionally, |
94 | * set[0]->data points to the entire btree node as it exists on disk. |
95 | */ |
96 | struct bset_tree set[MAX_BSETS]; |
97 | |
98 | struct btree_nr_keys nr; |
99 | u16 sib_u64s[2]; |
100 | u16 whiteout_u64s; |
101 | u8 byte_order; |
102 | u8 unpack_fn_len; |
103 | |
104 | struct btree_write writes[2]; |
105 | |
106 | /* Key/pointer for this btree node */ |
107 | __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); |
108 | |
109 | /* |
110 | * XXX: add a delete sequence number, so when bch2_btree_node_relock() |
111 | * fails because the lock sequence number has changed - i.e. the |
112 | * contents were modified - we can still relock the node if it's still |
113 | * the one we want, without redoing the traversal |
114 | */ |
115 | |
116 | /* |
117 | * For asynchronous splits/interior node updates: |
118 | * When we do a split, we allocate new child nodes and update the parent |
119 | * node to point to them: we update the parent in memory immediately, |
120 | * but then we must wait until the children have been written out before |
121 | * the update to the parent can be written - this is a list of the |
122 | * btree_updates that are blocking this node from being |
123 | * written: |
124 | */ |
125 | struct list_head write_blocked; |
126 | |
127 | /* |
128 | * Also for asynchronous splits/interior node updates: |
129 | * If a btree node isn't reachable yet, we don't want to kick off |
130 | * another write - because that write also won't yet be reachable and |
131 | * marking it as completed before it's reachable would be incorrect: |
132 | */ |
133 | unsigned long will_make_reachable; |
134 | |
135 | struct open_buckets ob; |
136 | |
137 | /* lru list */ |
138 | struct list_head list; |
139 | }; |
140 | |
141 | struct btree_cache { |
142 | struct rhashtable table; |
143 | bool table_init_done; |
144 | /* |
145 | * We never free a struct btree, except on shutdown - we just put it on |
146 | * the btree_cache_freed list and reuse it later. This simplifies the |
147 | * code, and it doesn't cost us much memory as the memory usage is |
148 | * dominated by buffers that hold the actual btree node data and those |
149 | * can be freed - and the number of struct btrees allocated is |
150 | * effectively bounded. |
151 | * |
152 | * btree_cache_freeable effectively is a small cache - we use it because |
153 | * high order page allocations can be rather expensive, and it's quite |
154 | * common to delete and allocate btree nodes in quick succession. It |
155 | * should never grow past ~2-3 nodes in practice. |
156 | */ |
157 | struct mutex lock; |
158 | struct list_head live; |
159 | struct list_head freeable; |
160 | struct list_head freed_pcpu; |
161 | struct list_head freed_nonpcpu; |
162 | |
163 | /* Number of elements in live + freeable lists */ |
164 | unsigned used; |
165 | unsigned reserve; |
166 | atomic_t dirty; |
167 | struct shrinker *shrink; |
168 | |
169 | /* |
170 | * If we need to allocate memory for a new btree node and that |
171 | * allocation fails, we can cannibalize another node in the btree cache |
172 | * to satisfy the allocation - lock to guarantee only one thread does |
173 | * this at a time: |
174 | */ |
175 | struct task_struct *alloc_lock; |
176 | struct closure_waitlist alloc_wait; |
177 | |
178 | struct bbpos pinned_nodes_start; |
179 | struct bbpos pinned_nodes_end; |
180 | u64 pinned_nodes_leaf_mask; |
181 | u64 pinned_nodes_interior_mask; |
182 | }; |
183 | |
184 | struct btree_node_iter { |
185 | struct btree_node_iter_set { |
186 | u16 k, end; |
187 | } data[MAX_BSETS]; |
188 | }; |
189 | |
190 | /* |
191 | * Iterate over all possible positions, synthesizing deleted keys for holes: |
192 | */ |
193 | static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; |
194 | /* |
195 | * Indicates that intent locks should be taken on leaf nodes, because we expect |
196 | * to be doing updates: |
197 | */ |
198 | static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1; |
199 | /* |
200 | * Causes the btree iterator code to prefetch additional btree nodes from disk: |
201 | */ |
202 | static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2; |
203 | /* |
204 | * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for |
205 | * @pos or the first key strictly greater than @pos |
206 | */ |
207 | static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3; |
208 | static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4; |
209 | static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5; |
210 | static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6; |
211 | static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7; |
212 | static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8; |
213 | static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9; |
214 | static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; |
215 | static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11; |
216 | static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12; |
217 | static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13; |
218 | static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14; |
219 | #define __BTREE_ITER_FLAGS_END 15 |
220 | |
221 | enum btree_path_uptodate { |
222 | BTREE_ITER_UPTODATE = 0, |
223 | BTREE_ITER_NEED_RELOCK = 1, |
224 | BTREE_ITER_NEED_TRAVERSE = 2, |
225 | }; |
226 | |
227 | #if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) |
228 | #define TRACK_PATH_ALLOCATED |
229 | #endif |
230 | |
231 | typedef u16 btree_path_idx_t; |
232 | |
233 | struct btree_path { |
234 | btree_path_idx_t sorted_idx; |
235 | u8 ref; |
236 | u8 intent_ref; |
237 | |
238 | /* btree_iter_copy starts here: */ |
239 | struct bpos pos; |
240 | |
241 | enum btree_id btree_id:5; |
242 | bool cached:1; |
243 | bool preserve:1; |
244 | enum btree_path_uptodate uptodate:2; |
245 | /* |
246 | * When true, failing to relock this path will cause the transaction to |
247 | * restart: |
248 | */ |
249 | bool should_be_locked:1; |
250 | unsigned level:3, |
251 | locks_want:3; |
252 | u8 nodes_locked; |
253 | |
254 | struct btree_path_level { |
255 | struct btree *b; |
256 | struct btree_node_iter iter; |
257 | u32 lock_seq; |
258 | #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS |
259 | u64 lock_taken_time; |
260 | #endif |
261 | } l[BTREE_MAX_DEPTH]; |
262 | #ifdef TRACK_PATH_ALLOCATED |
263 | unsigned long ip_allocated; |
264 | #endif |
265 | }; |
266 | |
267 | static inline struct btree_path_level *path_l(struct btree_path *path) |
268 | { |
269 | return path->l + path->level; |
270 | } |
271 | |
272 | static inline unsigned long btree_path_ip_allocated(struct btree_path *path) |
273 | { |
274 | #ifdef TRACK_PATH_ALLOCATED |
275 | return path->ip_allocated; |
276 | #else |
277 | return _THIS_IP_; |
278 | #endif |
279 | } |
280 | |
281 | /* |
282 | * @pos - iterator's current position |
283 | * @level - current btree depth |
284 | * @locks_want - btree level below which we start taking intent locks |
285 | * @nodes_locked - bitmask indicating which nodes in @nodes are locked |
286 | * @nodes_intent_locked - bitmask indicating which locks are intent locks |
287 | */ |
288 | struct btree_iter { |
289 | struct btree_trans *trans; |
290 | btree_path_idx_t path; |
291 | btree_path_idx_t update_path; |
292 | btree_path_idx_t key_cache_path; |
293 | |
294 | enum btree_id btree_id:8; |
295 | u8 min_depth; |
296 | |
297 | /* btree_iter_copy starts here: */ |
298 | u16 flags; |
299 | |
300 | /* When we're filtering by snapshot, the snapshot ID we're looking for: */ |
301 | unsigned snapshot; |
302 | |
303 | struct bpos pos; |
304 | /* |
305 | * Current unpacked key - so that bch2_btree_iter_next()/ |
306 | * bch2_btree_iter_next_slot() can correctly advance pos. |
307 | */ |
308 | struct bkey k; |
309 | |
310 | /* BTREE_ITER_WITH_JOURNAL: */ |
311 | size_t journal_idx; |
312 | #ifdef TRACK_PATH_ALLOCATED |
313 | unsigned long ip_allocated; |
314 | #endif |
315 | }; |
316 | |
317 | #define BKEY_CACHED_ACCESSED 0 |
318 | #define BKEY_CACHED_DIRTY 1 |
319 | |
320 | struct bkey_cached { |
321 | struct btree_bkey_cached_common c; |
322 | |
323 | unsigned long flags; |
324 | unsigned long btree_trans_barrier_seq; |
325 | u16 u64s; |
326 | bool valid; |
327 | struct bkey_cached_key key; |
328 | |
329 | struct rhash_head hash; |
330 | struct list_head list; |
331 | |
332 | struct journal_entry_pin journal; |
333 | u64 seq; |
334 | |
335 | struct bkey_i *k; |
336 | }; |
337 | |
338 | static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) |
339 | { |
340 | return !b->cached |
341 | ? container_of(b, struct btree, c)->key.k.p |
342 | : container_of(b, struct bkey_cached, c)->key.pos; |
343 | } |
344 | |
345 | struct btree_insert_entry { |
346 | unsigned flags; |
347 | u8 bkey_type; |
348 | enum btree_id btree_id:8; |
349 | u8 level:4; |
350 | bool cached:1; |
351 | bool insert_trigger_run:1; |
352 | bool overwrite_trigger_run:1; |
353 | bool key_cache_already_flushed:1; |
354 | /* |
355 | * @old_k may be a key from the journal; @old_btree_u64s always refers |
356 | * to the size of the key being overwritten in the btree: |
357 | */ |
358 | u8 old_btree_u64s; |
359 | btree_path_idx_t path; |
360 | struct bkey_i *k; |
361 | /* key being overwritten: */ |
362 | struct bkey old_k; |
363 | const struct bch_val *old_v; |
364 | unsigned long ip_allocated; |
365 | }; |
366 | |
367 | /* Number of btree paths we preallocate, usually enough */ |
368 | #define BTREE_ITER_INITIAL 64 |
369 | /* |
370 | * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code |
371 | * paths should run inside this limit, and if they don't it usually indicates a |
372 | * bug (leaking/duplicated btree paths). |
373 | * |
374 | * exception: some fsck paths |
375 | * |
376 | * bugs with excessive path usage seem to have possibly been eliminated now, so |
377 | * we might consider eliminating this (and btree_trans_too_many_iter()) at some |
378 | * point. |
379 | */ |
380 | #define BTREE_ITER_NORMAL_LIMIT 256 |
381 | /* never exceed limit */ |
382 | #define BTREE_ITER_MAX (1U << 10) |
383 | |
384 | struct btree_trans_commit_hook; |
385 | typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); |
386 | |
387 | struct btree_trans_commit_hook { |
388 | btree_trans_commit_hook_fn *fn; |
389 | struct btree_trans_commit_hook *next; |
390 | }; |
391 | |
392 | #define BTREE_TRANS_MEM_MAX (1U << 16) |
393 | |
394 | #define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 |
395 | |
396 | struct btree_trans_paths { |
397 | unsigned long nr_paths; |
398 | struct btree_path paths[]; |
399 | }; |
400 | |
401 | struct btree_trans { |
402 | struct bch_fs *c; |
403 | |
404 | unsigned long *paths_allocated; |
405 | struct btree_path *paths; |
406 | btree_path_idx_t *sorted; |
407 | struct btree_insert_entry *updates; |
408 | |
409 | void *mem; |
410 | unsigned mem_top; |
411 | unsigned mem_bytes; |
412 | |
413 | btree_path_idx_t nr_sorted; |
414 | btree_path_idx_t nr_paths; |
415 | btree_path_idx_t nr_paths_max; |
416 | u8 fn_idx; |
417 | u8 nr_updates; |
418 | u8 lock_must_abort; |
419 | bool lock_may_not_fail:1; |
420 | bool srcu_held:1; |
421 | bool used_mempool:1; |
422 | bool in_traverse_all:1; |
423 | bool paths_sorted:1; |
424 | bool memory_allocation_failure:1; |
425 | bool journal_transaction_names:1; |
426 | bool journal_replay_not_finished:1; |
427 | bool notrace_relock_fail:1; |
428 | bool write_locked:1; |
429 | enum bch_errcode restarted:16; |
430 | u32 restart_count; |
431 | |
432 | u64 last_begin_time; |
433 | unsigned long last_begin_ip; |
434 | unsigned long last_restarted_ip; |
435 | unsigned long srcu_lock_time; |
436 | |
437 | const char *fn; |
438 | struct btree_bkey_cached_common *locking; |
439 | struct six_lock_waiter locking_wait; |
440 | int srcu_idx; |
441 | |
442 | /* update path: */ |
443 | u16 journal_entries_u64s; |
444 | u16 journal_entries_size; |
445 | struct jset_entry *journal_entries; |
446 | |
447 | struct btree_trans_commit_hook *hooks; |
448 | struct journal_entry_pin *journal_pin; |
449 | |
450 | struct journal_res journal_res; |
451 | u64 *journal_seq; |
452 | struct disk_reservation *disk_res; |
453 | |
454 | struct bch_fs_usage_base fs_usage_delta; |
455 | |
456 | unsigned journal_u64s; |
457 | unsigned ; /* XXX kill */ |
458 | struct replicas_delta_list *fs_usage_deltas; |
459 | |
460 | /* Entries before this are zeroed out on every bch2_trans_get() call */ |
461 | |
462 | struct list_head list; |
463 | struct closure ref; |
464 | |
465 | unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)]; |
466 | struct btree_trans_paths trans_paths; |
467 | struct btree_path _paths[BTREE_ITER_INITIAL]; |
468 | btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4]; |
469 | struct btree_insert_entry _updates[BTREE_ITER_INITIAL]; |
470 | }; |
471 | |
472 | static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) |
473 | { |
474 | return trans->paths + iter->path; |
475 | } |
476 | |
477 | static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter) |
478 | { |
479 | return iter->key_cache_path |
480 | ? trans->paths + iter->key_cache_path |
481 | : NULL; |
482 | } |
483 | |
484 | #define BCH_BTREE_WRITE_TYPES() \ |
485 | x(initial, 0) \ |
486 | x(init_next_bset, 1) \ |
487 | x(cache_reclaim, 2) \ |
488 | x(journal_reclaim, 3) \ |
489 | x(interior, 4) |
490 | |
491 | enum btree_write_type { |
492 | #define x(t, n) BTREE_WRITE_##t, |
493 | BCH_BTREE_WRITE_TYPES() |
494 | #undef x |
495 | BTREE_WRITE_TYPE_NR, |
496 | }; |
497 | |
498 | #define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) |
499 | #define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) |
500 | |
501 | #define BTREE_FLAGS() \ |
502 | x(read_in_flight) \ |
503 | x(read_error) \ |
504 | x(dirty) \ |
505 | x(need_write) \ |
506 | x(write_blocked) \ |
507 | x(will_make_reachable) \ |
508 | x(noevict) \ |
509 | x(write_idx) \ |
510 | x(accessed) \ |
511 | x(write_in_flight) \ |
512 | x(write_in_flight_inner) \ |
513 | x(just_written) \ |
514 | x(dying) \ |
515 | x(fake) \ |
516 | x(need_rewrite) \ |
517 | x(never_write) |
518 | |
519 | enum btree_flags { |
520 | /* First bits for btree node write type */ |
521 | BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, |
522 | #define x(flag) BTREE_NODE_##flag, |
523 | BTREE_FLAGS() |
524 | #undef x |
525 | }; |
526 | |
527 | #define x(flag) \ |
528 | static inline bool btree_node_ ## flag(struct btree *b) \ |
529 | { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ |
530 | \ |
531 | static inline void set_btree_node_ ## flag(struct btree *b) \ |
532 | { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ |
533 | \ |
534 | static inline void clear_btree_node_ ## flag(struct btree *b) \ |
535 | { clear_bit(BTREE_NODE_ ## flag, &b->flags); } |
536 | |
537 | BTREE_FLAGS() |
538 | #undef x |
539 | |
540 | static inline struct btree_write *btree_current_write(struct btree *b) |
541 | { |
542 | return b->writes + btree_node_write_idx(b); |
543 | } |
544 | |
545 | static inline struct btree_write *btree_prev_write(struct btree *b) |
546 | { |
547 | return b->writes + (btree_node_write_idx(b) ^ 1); |
548 | } |
549 | |
550 | static inline struct bset_tree *bset_tree_last(struct btree *b) |
551 | { |
552 | EBUG_ON(!b->nsets); |
553 | return b->set + b->nsets - 1; |
554 | } |
555 | |
556 | static inline void * |
557 | __btree_node_offset_to_ptr(const struct btree *b, u16 offset) |
558 | { |
559 | return (void *) ((u64 *) b->data + 1 + offset); |
560 | } |
561 | |
562 | static inline u16 |
563 | __btree_node_ptr_to_offset(const struct btree *b, const void *p) |
564 | { |
565 | u16 ret = (u64 *) p - 1 - (u64 *) b->data; |
566 | |
567 | EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); |
568 | return ret; |
569 | } |
570 | |
571 | static inline struct bset *bset(const struct btree *b, |
572 | const struct bset_tree *t) |
573 | { |
574 | return __btree_node_offset_to_ptr(b, offset: t->data_offset); |
575 | } |
576 | |
577 | static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t) |
578 | { |
579 | t->end_offset = |
580 | __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t))); |
581 | } |
582 | |
583 | static inline void set_btree_bset(struct btree *b, struct bset_tree *t, |
584 | const struct bset *i) |
585 | { |
586 | t->data_offset = __btree_node_ptr_to_offset(b, p: i); |
587 | set_btree_bset_end(b, t); |
588 | } |
589 | |
590 | static inline struct bset *btree_bset_first(struct btree *b) |
591 | { |
592 | return bset(b, t: b->set); |
593 | } |
594 | |
595 | static inline struct bset *btree_bset_last(struct btree *b) |
596 | { |
597 | return bset(b, t: bset_tree_last(b)); |
598 | } |
599 | |
600 | static inline u16 |
601 | __btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k) |
602 | { |
603 | return __btree_node_ptr_to_offset(b, p: k); |
604 | } |
605 | |
606 | static inline struct bkey_packed * |
607 | __btree_node_offset_to_key(const struct btree *b, u16 k) |
608 | { |
609 | return __btree_node_offset_to_ptr(b, offset: k); |
610 | } |
611 | |
612 | static inline unsigned btree_bkey_first_offset(const struct bset_tree *t) |
613 | { |
614 | return t->data_offset + offsetof(struct bset, _data) / sizeof(u64); |
615 | } |
616 | |
617 | #define btree_bkey_first(_b, _t) \ |
618 | ({ \ |
619 | EBUG_ON(bset(_b, _t)->start != \ |
620 | __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\ |
621 | \ |
622 | bset(_b, _t)->start; \ |
623 | }) |
624 | |
625 | #define btree_bkey_last(_b, _t) \ |
626 | ({ \ |
627 | EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \ |
628 | vstruct_last(bset(_b, _t))); \ |
629 | \ |
630 | __btree_node_offset_to_key(_b, (_t)->end_offset); \ |
631 | }) |
632 | |
633 | static inline unsigned bset_u64s(struct bset_tree *t) |
634 | { |
635 | return t->end_offset - t->data_offset - |
636 | sizeof(struct bset) / sizeof(u64); |
637 | } |
638 | |
639 | static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t) |
640 | { |
641 | return bset_u64s(t) - b->nr.bset_u64s[t - b->set]; |
642 | } |
643 | |
644 | static inline unsigned bset_byte_offset(struct btree *b, void *i) |
645 | { |
646 | return i - (void *) b->data; |
647 | } |
648 | |
649 | enum btree_node_type { |
650 | BKEY_TYPE_btree, |
651 | #define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, |
652 | BCH_BTREE_IDS() |
653 | #undef x |
654 | BKEY_TYPE_NR |
655 | }; |
656 | |
657 | /* Type of a key in btree @id at level @level: */ |
658 | static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) |
659 | { |
660 | return level ? BKEY_TYPE_btree : (unsigned) id + 1; |
661 | } |
662 | |
663 | /* Type of keys @b contains: */ |
664 | static inline enum btree_node_type btree_node_type(struct btree *b) |
665 | { |
666 | return __btree_node_type(level: b->c.level, id: b->c.btree_id); |
667 | } |
668 | |
669 | const char *bch2_btree_node_type_str(enum btree_node_type); |
670 | |
671 | #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ |
672 | (BIT_ULL(BKEY_TYPE_extents)| \ |
673 | BIT_ULL(BKEY_TYPE_alloc)| \ |
674 | BIT_ULL(BKEY_TYPE_inodes)| \ |
675 | BIT_ULL(BKEY_TYPE_stripes)| \ |
676 | BIT_ULL(BKEY_TYPE_reflink)| \ |
677 | BIT_ULL(BKEY_TYPE_subvolumes)| \ |
678 | BIT_ULL(BKEY_TYPE_btree)) |
679 | |
680 | #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \ |
681 | (BIT_ULL(BKEY_TYPE_alloc)| \ |
682 | BIT_ULL(BKEY_TYPE_inodes)| \ |
683 | BIT_ULL(BKEY_TYPE_stripes)| \ |
684 | BIT_ULL(BKEY_TYPE_snapshots)) |
685 | |
686 | #define BTREE_NODE_TYPE_HAS_TRIGGERS \ |
687 | (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ |
688 | BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS) |
689 | |
690 | static inline bool btree_node_type_needs_gc(enum btree_node_type type) |
691 | { |
692 | return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); |
693 | } |
694 | |
695 | static inline bool btree_node_type_is_extents(enum btree_node_type type) |
696 | { |
697 | const unsigned mask = 0 |
698 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) |
699 | BCH_BTREE_IDS() |
700 | #undef x |
701 | ; |
702 | |
703 | return (1U << type) & mask; |
704 | } |
705 | |
706 | static inline bool btree_id_is_extents(enum btree_id btree) |
707 | { |
708 | return btree_node_type_is_extents(type: __btree_node_type(level: 0, id: btree)); |
709 | } |
710 | |
711 | static inline bool btree_type_has_snapshots(enum btree_id id) |
712 | { |
713 | const unsigned mask = 0 |
714 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) |
715 | BCH_BTREE_IDS() |
716 | #undef x |
717 | ; |
718 | |
719 | return (1U << id) & mask; |
720 | } |
721 | |
722 | static inline bool btree_type_has_snapshot_field(enum btree_id id) |
723 | { |
724 | const unsigned mask = 0 |
725 | #define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) |
726 | BCH_BTREE_IDS() |
727 | #undef x |
728 | ; |
729 | |
730 | return (1U << id) & mask; |
731 | } |
732 | |
733 | static inline bool btree_type_has_ptrs(enum btree_id id) |
734 | { |
735 | const unsigned mask = 0 |
736 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) |
737 | BCH_BTREE_IDS() |
738 | #undef x |
739 | ; |
740 | |
741 | return (1U << id) & mask; |
742 | } |
743 | |
744 | struct btree_root { |
745 | struct btree *b; |
746 | |
747 | /* On disk root - see async splits: */ |
748 | __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); |
749 | u8 level; |
750 | u8 alive; |
751 | s16 error; |
752 | }; |
753 | |
754 | enum btree_gc_coalesce_fail_reason { |
755 | BTREE_GC_COALESCE_FAIL_RESERVE_GET, |
756 | BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, |
757 | BTREE_GC_COALESCE_FAIL_FORMAT_FITS, |
758 | }; |
759 | |
760 | enum btree_node_sibling { |
761 | btree_prev_sib, |
762 | btree_next_sib, |
763 | }; |
764 | |
765 | struct get_locks_fail { |
766 | unsigned l; |
767 | struct btree *b; |
768 | }; |
769 | |
770 | #endif /* _BCACHEFS_BTREE_TYPES_H */ |
771 | |