osdmap.h source code [linux/include/linux/ceph/osdmap.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _FS_CEPH_OSDMAP_H
3	#define _FS_CEPH_OSDMAP_H
4
5	#include <linux/rbtree.h>
6	#include <linux/ceph/types.h>
7	#include <linux/ceph/decode.h>
8	#include <linux/crush/crush.h>
9
10	/*
11	* The osd map describes the current membership of the osd cluster and
12	* specifies the mapping of objects to placement groups and placement
13	* groups to (sets of) osds. That is, it completely specifies the
14	* (desired) distribution of all data objects in the system at some
15	* point in time.
16	*
17	* Each map version is identified by an epoch, which increases monotonically.
18	*
19	* The map can be updated either via an incremental map (diff) describing
20	* the change between two successive epochs, or as a fully encoded map.
21	*/
22	struct ceph_pg {
23	uint64_t pool;
24	uint32_t seed;
25	};
26
27	#define CEPH_SPG_NOSHARD -1
28
29	struct ceph_spg {
30	struct ceph_pg pgid;
31	s8 shard;
32	};
33
34	int ceph_pg_compare(const struct ceph_pg lhs, const* struct ceph_pg *rhs);
35	int ceph_spg_compare(const struct ceph_spg lhs, const* struct ceph_spg *rhs);
36
37	#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
38	together */
39	#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
40	#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota,
41	will set FULL too */
42	#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
43
44	struct ceph_pg_pool_info {
45	struct rb_node node;
46	s64 id;
47	u8 type; / CEPH_POOL_TYPE_* /
48	u8 size;
49	u8 min_size;
50	u8 crush_ruleset;
51	u8 object_hash;
52	u32 last_force_request_resend;
53	u32 pg_num, pgp_num;
54	int pg_num_mask, pgp_num_mask;
55	s64 read_tier;
56	s64 write_tier; / wins for read+write ops /
57	u64 flags; / CEPH_POOL_FLAG_* /
58	char *name;
59
60	bool was_full; / for handle_one_map() /
61	};
62
63	static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
64	{
65	switch (pool->type) {
66	case CEPH_POOL_TYPE_REP:
67	return true;
68	case CEPH_POOL_TYPE_EC:
69	return false;
70	default:
71	BUG();
72	}
73	}
74
75	struct ceph_object_locator {
76	s64 pool;
77	struct ceph_string *pool_ns;
78	};
79
80	static inline void ceph_oloc_init(struct ceph_object_locator *oloc)
81	{
82	oloc->pool = -`1`;
83	oloc->pool_ns = NULL;
84	}
85
86	static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc)
87	{
88	return oloc->pool == -`1`;
89	}
90
91	void ceph_oloc_copy(struct ceph_object_locator *dest,
92	const struct ceph_object_locator *src);
93	void ceph_oloc_destroy(struct ceph_object_locator *oloc);
94
95	/*
96	* 51-char inline_name is long enough for all cephfs and all but one
97	* rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
98	* arbitrarily long (~PAGE_SIZE). It's done once during rbd map; all
99	* other rbd requests fit into inline_name.
100	*
101	* Makes ceph_object_id 64 bytes on 64-bit.
102	*/
103	#define CEPH_OID_INLINE_LEN 52
104
105	/*
106	* Both inline and external buffers have space for a NUL-terminator,
107	* which is carried around. It's not required though - RADOS object
108	* names don't have to be NUL-terminated and may contain NULs.
109	*/
110	struct ceph_object_id {
111	char *name;
112	char inline_name[CEPH_OID_INLINE_LEN];
113	int name_len;
114	};
115
116	#define __CEPH_OID_INITIALIZER(oid) { .name = (oid).inline_name }
117
118	#define CEPH_DEFINE_OID_ONSTACK(oid) \
119	struct ceph_object_id oid = __CEPH_OID_INITIALIZER(oid)
120
121	static inline void ceph_oid_init(struct ceph_object_id *oid)
122	{
123	oid = (struct* ceph_object_id) __CEPH_OID_INITIALIZER(*oid);
124	}
125
126	static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
127	{
128	return oid->name == oid->inline_name && !oid->name_len;
129	}
130
131	void ceph_oid_copy(struct ceph_object_id *dest,
132	const struct ceph_object_id *src);
133	__printf(`2`, `3`)
134	void ceph_oid_printf(struct ceph_object_id oid, const* char *fmt, ...);
135	__printf(`3`, `4`)
136	int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
137	const char *fmt, ...);
138	void ceph_oid_destroy(struct ceph_object_id *oid);
139
140	struct workspace_manager {
141	struct list_head idle_ws;
142	spinlock_t ws_lock;
143	/ Number of free workspaces /
144	int free_ws;
145	/ Total number of allocated workspaces /
146	atomic_t total_ws;
147	/ Waiters for a free workspace /
148	wait_queue_head_t ws_wait;
149	};
150
151	struct ceph_pg_mapping {
152	struct rb_node node;
153	struct ceph_pg pgid;
154
155	union {
156	struct {
157	int len;
158	int osds[];
159	} pg_temp, pg_upmap;
160	struct {
161	int osd;
162	} primary_temp;
163	struct {
164	int len;
165	int from_to[][`2`];
166	} pg_upmap_items;
167	};
168	};
169
170	struct ceph_osdmap {
171	struct ceph_fsid fsid;
172	u32 epoch;
173	struct ceph_timespec created, modified;
174
175	u32 flags; / CEPH_OSDMAP_* /
176
177	u32 max_osd; / size of osd_state, _offload, _addr arrays /
178	u32 osd_state; /* CEPH_OSD_* /
179	u32 osd_weight; /* 0 = failed, 0x10000 = 100% normal /
180	struct ceph_entity_addr *osd_addr;
181
182	struct rb_root pg_temp;
183	struct rb_root primary_temp;
184
185	/ remap (post-CRUSH, pre-up) /
186	struct rb_root pg_upmap; / PG := raw set /
187	struct rb_root pg_upmap_items; / from -> to within raw set /
188
189	u32 *osd_primary_affinity;
190
191	struct rb_root pg_pools;
192	u32 pool_max;
193
194	/ the CRUSH map specifies the mapping of placement groups to*
195	* the list of osds that store+replicate them. */
196	struct crush_map *crush;
197
198	struct workspace_manager crush_wsm;
199	};
200
201	static inline bool ceph_osd_exists(struct ceph_osdmap map, int* osd)
202	{
203	return osd >= `0` && osd < map->max_osd &&
204	(map->osd_state[osd] & CEPH_OSD_EXISTS);
205	}
206
207	static inline bool ceph_osd_is_up(struct ceph_osdmap map, int* osd)
208	{
209	return ceph_osd_exists(map, osd) &&
210	(map->osd_state[osd] & CEPH_OSD_UP);
211	}
212
213	static inline bool ceph_osd_is_down(struct ceph_osdmap map, int* osd)
214	{
215	return !ceph_osd_is_up(map, osd);
216	}
217
218	char ceph_osdmap_state_str(char* str, int* len, u32 state);
219	extern u32 ceph_get_primary_affinity(struct ceph_osdmap map, int* osd);
220
221	static inline struct ceph_entity_addr ceph_osd_addr(struct* ceph_osdmap *map,
222	int osd)
223	{
224	if (osd >= map->max_osd)
225	return NULL;
226	return &map->osd_addr[osd];
227	}
228
229	#define CEPH_PGID_ENCODING_LEN (1 + 8 + 4 + 4)
230
231	static inline int ceph_decode_pgid(void *p, void* end, struct* ceph_pg *pgid)
232	{
233	__u8 version;
234
235	if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
236	pr_warn("incomplete pg encoding\n");
237	return -EINVAL;
238	}
239	version = ceph_decode_8(p);
240	if (version > `1`) {
241	pr_warn("do not understand pg encoding %d > 1\n",
242	(int)version);
243	return -EINVAL;
244	}
245
246	pgid->pool = ceph_decode_64(p);
247	pgid->seed = ceph_decode_32(p);
248	p += `4`; /* skip deprecated preferred value /
249
250	return `0`;
251	}
252
253	struct ceph_osdmap ceph_osdmap_alloc(void*);
254	struct ceph_osdmap ceph_osdmap_decode(void* *p, void* *end, bool msgr2);
255	struct ceph_osdmap osdmap_apply_incremental(void* *p, void* *end, bool msgr2,
256	struct ceph_osdmap *map);
257	extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
258
259	struct ceph_osds {
260	int osds[CEPH_PG_MAX_SIZE];
261	int size;
262	int primary; / id, NOT index /
263	};
264
265	static inline void ceph_osds_init(struct ceph_osds *set)
266	{
267	set->size = `0`;
268	set->primary = -`1`;
269	}
270
271	void ceph_osds_copy(struct ceph_osds dest, const* struct ceph_osds *src);
272
273	bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
274	u32 new_pg_num);
275	bool ceph_is_new_interval(const struct ceph_osds *old_acting,
276	const struct ceph_osds *new_acting,
277	const struct ceph_osds *old_up,
278	const struct ceph_osds *new_up,
279	int old_size,
280	int new_size,
281	int old_min_size,
282	int new_min_size,
283	u32 old_pg_num,
284	u32 new_pg_num,
285	bool old_sort_bitwise,
286	bool new_sort_bitwise,
287	bool old_recovery_deletes,
288	bool new_recovery_deletes,
289	const struct ceph_pg *pgid);
290	bool ceph_osds_changed(const struct ceph_osds *old_acting,
291	const struct ceph_osds *new_acting,
292	bool any_change);
293
294	void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
295	const struct ceph_object_id *oid,
296	const struct ceph_object_locator *oloc,
297	struct ceph_pg *raw_pgid);
298	int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
299	const struct ceph_object_id *oid,
300	const struct ceph_object_locator *oloc,
301	struct ceph_pg *raw_pgid);
302
303	void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
304	struct ceph_pg_pool_info *pi,
305	const struct ceph_pg *raw_pgid,
306	struct ceph_osds *up,
307	struct ceph_osds *acting);
308	bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
309	struct ceph_pg_pool_info *pi,
310	const struct ceph_pg *raw_pgid,
311	struct ceph_spg *spgid);
312	int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
313	const struct ceph_pg *raw_pgid);
314
315	struct crush_loc {
316	char *cl_type_name;
317	char *cl_name;
318	};
319
320	struct crush_loc_node {
321	struct rb_node cl_node;
322	struct crush_loc cl_loc; / pointers into cl_data /
323	char cl_data[];
324	};
325
326	int ceph_parse_crush_location(char crush_location, struct* rb_root *locs);
327	int ceph_compare_crush_locs(struct rb_root locs1, struct* rb_root *locs2);
328	void ceph_clear_crush_locs(struct rb_root *locs);
329
330	int ceph_get_crush_locality(struct ceph_osdmap osdmap, int* id,
331	struct rb_root *locs);
332
333	extern struct ceph_pg_pool_info ceph_pg_pool_by_id(struct* ceph_osdmap *map,
334	u64 id);
335	extern const char ceph_pg_pool_name_by_id(struct* ceph_osdmap *map, u64 id);
336	extern int ceph_pg_poolid_by_name(struct ceph_osdmap map, const* char *name);
337	u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
338
339	#endif
340

source code of linux/include/linux/ceph/osdmap.h