1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * dlmcommon.h |
4 | * |
5 | * Copyright (C) 2004 Oracle. All rights reserved. |
6 | */ |
7 | |
8 | #ifndef DLMCOMMON_H |
9 | #define DLMCOMMON_H |
10 | |
11 | #include <linux/kref.h> |
12 | |
13 | #define DLM_HB_NODE_DOWN_PRI (0xf000000) |
14 | #define DLM_HB_NODE_UP_PRI (0x8000000) |
15 | |
16 | #define DLM_LOCKID_NAME_MAX 32 |
17 | |
18 | #define DLM_LOCK_RES_OWNER_UNKNOWN O2NM_MAX_NODES |
19 | |
20 | #define DLM_HASH_SIZE_DEFAULT (1 << 17) |
21 | #if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE |
22 | # define DLM_HASH_PAGES 1 |
23 | #else |
24 | # define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE) |
25 | #endif |
26 | #define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head)) |
27 | #define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE) |
28 | |
29 | /* Intended to make it easier for us to switch out hash functions */ |
30 | #define dlm_lockid_hash(_n, _l) full_name_hash(NULL, _n, _l) |
31 | |
32 | enum dlm_mle_type { |
33 | DLM_MLE_BLOCK = 0, |
34 | DLM_MLE_MASTER = 1, |
35 | DLM_MLE_MIGRATION = 2, |
36 | DLM_MLE_NUM_TYPES = 3, |
37 | }; |
38 | |
39 | struct dlm_master_list_entry { |
40 | struct hlist_node master_hash_node; |
41 | struct list_head hb_events; |
42 | struct dlm_ctxt *dlm; |
43 | spinlock_t spinlock; |
44 | wait_queue_head_t wq; |
45 | atomic_t woken; |
46 | struct kref mle_refs; |
47 | int inuse; |
48 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
49 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
50 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
51 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
52 | u8 master; |
53 | u8 new_master; |
54 | enum dlm_mle_type type; |
55 | struct o2hb_callback_func mle_hb_up; |
56 | struct o2hb_callback_func mle_hb_down; |
57 | struct dlm_lock_resource *mleres; |
58 | unsigned char mname[DLM_LOCKID_NAME_MAX]; |
59 | unsigned int mnamelen; |
60 | unsigned int mnamehash; |
61 | }; |
62 | |
63 | enum dlm_ast_type { |
64 | DLM_AST = 0, |
65 | DLM_BAST = 1, |
66 | DLM_ASTUNLOCK = 2, |
67 | }; |
68 | |
69 | |
70 | #define LKM_VALID_FLAGS (LKM_VALBLK | LKM_CONVERT | LKM_UNLOCK | \ |
71 | LKM_CANCEL | LKM_INVVALBLK | LKM_FORCE | \ |
72 | LKM_RECOVERY | LKM_LOCAL | LKM_NOQUEUE) |
73 | |
74 | #define DLM_RECOVERY_LOCK_NAME "$RECOVERY" |
75 | #define DLM_RECOVERY_LOCK_NAME_LEN 9 |
76 | |
77 | static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) |
78 | { |
79 | if (name_len == DLM_RECOVERY_LOCK_NAME_LEN && |
80 | memcmp(p: lock_name, DLM_RECOVERY_LOCK_NAME, size: name_len)==0) |
81 | return 1; |
82 | return 0; |
83 | } |
84 | |
85 | #define DLM_RECO_STATE_ACTIVE 0x0001 |
86 | #define DLM_RECO_STATE_FINALIZE 0x0002 |
87 | |
88 | struct dlm_recovery_ctxt |
89 | { |
90 | struct list_head resources; |
91 | struct list_head node_data; |
92 | u8 new_master; |
93 | u8 dead_node; |
94 | u16 state; |
95 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
96 | wait_queue_head_t event; |
97 | }; |
98 | |
99 | enum dlm_ctxt_state { |
100 | DLM_CTXT_NEW = 0, |
101 | DLM_CTXT_JOINED = 1, |
102 | DLM_CTXT_IN_SHUTDOWN = 2, |
103 | DLM_CTXT_LEAVING = 3, |
104 | }; |
105 | |
106 | struct dlm_ctxt |
107 | { |
108 | struct list_head list; |
109 | struct hlist_head **lockres_hash; |
110 | struct list_head dirty_list; |
111 | struct list_head purge_list; |
112 | struct list_head pending_asts; |
113 | struct list_head pending_basts; |
114 | struct list_head tracking_list; |
115 | unsigned int purge_count; |
116 | spinlock_t spinlock; |
117 | spinlock_t ast_lock; |
118 | spinlock_t track_lock; |
119 | char *name; |
120 | u8 node_num; |
121 | u32 key; |
122 | u8 joining_node; |
123 | u8 migrate_done; /* set to 1 means node has migrated all lock resources */ |
124 | wait_queue_head_t dlm_join_events; |
125 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
126 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
127 | unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
128 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
129 | struct dlm_recovery_ctxt reco; |
130 | spinlock_t master_lock; |
131 | struct hlist_head **master_hash; |
132 | struct list_head mle_hb_events; |
133 | |
134 | /* these give a really vague idea of the system load */ |
135 | atomic_t mle_tot_count[DLM_MLE_NUM_TYPES]; |
136 | atomic_t mle_cur_count[DLM_MLE_NUM_TYPES]; |
137 | atomic_t res_tot_count; |
138 | atomic_t res_cur_count; |
139 | |
140 | struct dentry *dlm_debugfs_subroot; |
141 | |
142 | /* NOTE: Next three are protected by dlm_domain_lock */ |
143 | struct kref dlm_refs; |
144 | enum dlm_ctxt_state dlm_state; |
145 | unsigned int num_joins; |
146 | |
147 | struct o2hb_callback_func dlm_hb_up; |
148 | struct o2hb_callback_func dlm_hb_down; |
149 | struct task_struct *dlm_thread_task; |
150 | struct task_struct *dlm_reco_thread_task; |
151 | struct workqueue_struct *dlm_worker; |
152 | wait_queue_head_t dlm_thread_wq; |
153 | wait_queue_head_t dlm_reco_thread_wq; |
154 | wait_queue_head_t ast_wq; |
155 | wait_queue_head_t migration_wq; |
156 | |
157 | struct work_struct dispatched_work; |
158 | struct list_head work_list; |
159 | spinlock_t work_lock; |
160 | struct list_head dlm_domain_handlers; |
161 | struct list_head dlm_eviction_callbacks; |
162 | |
163 | /* The filesystem specifies this at domain registration. We |
164 | * cache it here to know what to tell other nodes. */ |
165 | struct dlm_protocol_version fs_locking_proto; |
166 | /* This is the inter-dlm communication version */ |
167 | struct dlm_protocol_version dlm_locking_proto; |
168 | }; |
169 | |
170 | static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i) |
171 | { |
172 | return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); |
173 | } |
174 | |
175 | static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm, |
176 | unsigned i) |
177 | { |
178 | return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + |
179 | (i % DLM_BUCKETS_PER_PAGE); |
180 | } |
181 | |
182 | /* these keventd work queue items are for less-frequently |
183 | * called functions that cannot be directly called from the |
184 | * net message handlers for some reason, usually because |
185 | * they need to send net messages of their own. */ |
186 | void dlm_dispatch_work(struct work_struct *work); |
187 | |
188 | struct dlm_lock_resource; |
189 | struct dlm_work_item; |
190 | |
191 | typedef void (dlm_workfunc_t)(struct dlm_work_item *, void *); |
192 | |
193 | struct dlm_request_all_locks_priv |
194 | { |
195 | u8 reco_master; |
196 | u8 dead_node; |
197 | }; |
198 | |
199 | struct dlm_mig_lockres_priv |
200 | { |
201 | struct dlm_lock_resource *lockres; |
202 | u8 real_master; |
203 | u8 ; |
204 | }; |
205 | |
206 | struct dlm_assert_master_priv |
207 | { |
208 | struct dlm_lock_resource *lockres; |
209 | u8 request_from; |
210 | u32 flags; |
211 | unsigned ignore_higher:1; |
212 | }; |
213 | |
214 | struct dlm_deref_lockres_priv |
215 | { |
216 | struct dlm_lock_resource *deref_res; |
217 | u8 deref_node; |
218 | }; |
219 | |
220 | struct dlm_work_item |
221 | { |
222 | struct list_head list; |
223 | dlm_workfunc_t *func; |
224 | struct dlm_ctxt *dlm; |
225 | void *data; |
226 | union { |
227 | struct dlm_request_all_locks_priv ral; |
228 | struct dlm_mig_lockres_priv ml; |
229 | struct dlm_assert_master_priv am; |
230 | struct dlm_deref_lockres_priv dl; |
231 | } u; |
232 | }; |
233 | |
234 | static inline void dlm_init_work_item(struct dlm_ctxt *dlm, |
235 | struct dlm_work_item *i, |
236 | dlm_workfunc_t *f, void *data) |
237 | { |
238 | memset(i, 0, sizeof(*i)); |
239 | i->func = f; |
240 | INIT_LIST_HEAD(list: &i->list); |
241 | i->data = data; |
242 | i->dlm = dlm; /* must have already done a dlm_grab on this! */ |
243 | } |
244 | |
245 | |
246 | |
247 | static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, |
248 | u8 node) |
249 | { |
250 | assert_spin_locked(&dlm->spinlock); |
251 | |
252 | dlm->joining_node = node; |
253 | wake_up(&dlm->dlm_join_events); |
254 | } |
255 | |
256 | #define DLM_LOCK_RES_UNINITED 0x00000001 |
257 | #define DLM_LOCK_RES_RECOVERING 0x00000002 |
258 | #define DLM_LOCK_RES_READY 0x00000004 |
259 | #define DLM_LOCK_RES_DIRTY 0x00000008 |
260 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 |
261 | #define DLM_LOCK_RES_MIGRATING 0x00000020 |
262 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 |
263 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 |
264 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 |
265 | #define DLM_LOCK_RES_RECOVERY_WAITING 0x00004000 |
266 | |
267 | /* max milliseconds to wait to sync up a network failure with a node death */ |
268 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) |
269 | |
270 | #define DLM_PURGE_INTERVAL_MS (8 * 1000) |
271 | |
272 | struct dlm_lock_resource |
273 | { |
274 | /* WARNING: Please see the comment in dlm_init_lockres before |
275 | * adding fields here. */ |
276 | struct hlist_node hash_node; |
277 | struct qstr lockname; |
278 | struct kref refs; |
279 | |
280 | /* |
281 | * Please keep granted, converting, and blocked in this order, |
282 | * as some funcs want to iterate over all lists. |
283 | * |
284 | * All four lists are protected by the hash's reference. |
285 | */ |
286 | struct list_head granted; |
287 | struct list_head converting; |
288 | struct list_head blocked; |
289 | struct list_head purge; |
290 | |
291 | /* |
292 | * These two lists require you to hold an additional reference |
293 | * while they are on the list. |
294 | */ |
295 | struct list_head dirty; |
296 | struct list_head recovering; // dlm_recovery_ctxt.resources list |
297 | |
298 | /* Added during init and removed during release */ |
299 | struct list_head tracking; /* dlm->tracking_list */ |
300 | |
301 | /* unused lock resources have their last_used stamped and are |
302 | * put on a list for the dlm thread to run. */ |
303 | unsigned long last_used; |
304 | |
305 | struct dlm_ctxt *dlm; |
306 | |
307 | unsigned migration_pending:1; |
308 | atomic_t asts_reserved; |
309 | spinlock_t spinlock; |
310 | wait_queue_head_t wq; |
311 | u8 owner; //node which owns the lock resource, or unknown |
312 | u16 state; |
313 | char lvb[DLM_LVB_LEN]; |
314 | unsigned int inflight_locks; |
315 | unsigned int inflight_assert_workers; |
316 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
317 | }; |
318 | |
319 | struct dlm_migratable_lock |
320 | { |
321 | __be64 cookie; |
322 | |
323 | /* these 3 are just padding for the in-memory structure, but |
324 | * list and flags are actually used when sent over the wire */ |
325 | __be16 pad1; |
326 | u8 list; // 0=granted, 1=converting, 2=blocked |
327 | u8 flags; |
328 | |
329 | s8 type; |
330 | s8 convert_type; |
331 | s8 highest_blocked; |
332 | u8 node; |
333 | }; // 16 bytes |
334 | |
335 | struct dlm_lock |
336 | { |
337 | struct dlm_migratable_lock ml; |
338 | |
339 | struct list_head list; |
340 | struct list_head ast_list; |
341 | struct list_head bast_list; |
342 | struct dlm_lock_resource *lockres; |
343 | spinlock_t spinlock; |
344 | struct kref lock_refs; |
345 | |
346 | // ast and bast must be callable while holding a spinlock! |
347 | dlm_astlockfunc_t *ast; |
348 | dlm_bastlockfunc_t *bast; |
349 | void *astdata; |
350 | struct dlm_lockstatus *lksb; |
351 | unsigned ast_pending:1, |
352 | bast_pending:1, |
353 | convert_pending:1, |
354 | lock_pending:1, |
355 | cancel_pending:1, |
356 | unlock_pending:1, |
357 | lksb_kernel_allocated:1; |
358 | }; |
359 | |
360 | enum dlm_lockres_list { |
361 | DLM_GRANTED_LIST = 0, |
362 | DLM_CONVERTING_LIST = 1, |
363 | DLM_BLOCKED_LIST = 2, |
364 | }; |
365 | |
366 | static inline int dlm_lvb_is_empty(char *lvb) |
367 | { |
368 | int i; |
369 | for (i=0; i<DLM_LVB_LEN; i++) |
370 | if (lvb[i]) |
371 | return 0; |
372 | return 1; |
373 | } |
374 | |
375 | static inline char *dlm_list_in_text(enum dlm_lockres_list idx) |
376 | { |
377 | if (idx == DLM_GRANTED_LIST) |
378 | return "granted" ; |
379 | else if (idx == DLM_CONVERTING_LIST) |
380 | return "converting" ; |
381 | else if (idx == DLM_BLOCKED_LIST) |
382 | return "blocked" ; |
383 | else |
384 | return "unknown" ; |
385 | } |
386 | |
387 | static inline struct list_head * |
388 | dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) |
389 | { |
390 | struct list_head *ret = NULL; |
391 | if (idx == DLM_GRANTED_LIST) |
392 | ret = &res->granted; |
393 | else if (idx == DLM_CONVERTING_LIST) |
394 | ret = &res->converting; |
395 | else if (idx == DLM_BLOCKED_LIST) |
396 | ret = &res->blocked; |
397 | else |
398 | BUG(); |
399 | return ret; |
400 | } |
401 | |
402 | |
403 | |
404 | |
405 | struct dlm_node_iter |
406 | { |
407 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
408 | int curnode; |
409 | }; |
410 | |
411 | |
412 | enum { |
413 | DLM_MASTER_REQUEST_MSG = 500, |
414 | DLM_UNUSED_MSG1 = 501, |
415 | DLM_ASSERT_MASTER_MSG = 502, |
416 | DLM_CREATE_LOCK_MSG = 503, |
417 | DLM_CONVERT_LOCK_MSG = 504, |
418 | DLM_PROXY_AST_MSG = 505, |
419 | DLM_UNLOCK_LOCK_MSG = 506, |
420 | DLM_DEREF_LOCKRES_MSG = 507, |
421 | DLM_MIGRATE_REQUEST_MSG = 508, |
422 | DLM_MIG_LOCKRES_MSG = 509, |
423 | DLM_QUERY_JOIN_MSG = 510, |
424 | DLM_ASSERT_JOINED_MSG = 511, |
425 | DLM_CANCEL_JOIN_MSG = 512, |
426 | DLM_EXIT_DOMAIN_MSG = 513, |
427 | DLM_MASTER_REQUERY_MSG = 514, |
428 | DLM_LOCK_REQUEST_MSG = 515, |
429 | DLM_RECO_DATA_DONE_MSG = 516, |
430 | DLM_BEGIN_RECO_MSG = 517, |
431 | DLM_FINALIZE_RECO_MSG = 518, |
432 | DLM_QUERY_REGION = 519, |
433 | DLM_QUERY_NODEINFO = 520, |
434 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, |
435 | DLM_DEREF_LOCKRES_DONE = 522, |
436 | }; |
437 | |
438 | struct dlm_reco_node_data |
439 | { |
440 | int state; |
441 | u8 node_num; |
442 | struct list_head list; |
443 | }; |
444 | |
445 | enum { |
446 | DLM_RECO_NODE_DATA_DEAD = -1, |
447 | DLM_RECO_NODE_DATA_INIT = 0, |
448 | DLM_RECO_NODE_DATA_REQUESTING = 1, |
449 | DLM_RECO_NODE_DATA_REQUESTED = 2, |
450 | DLM_RECO_NODE_DATA_RECEIVING = 3, |
451 | DLM_RECO_NODE_DATA_DONE = 4, |
452 | DLM_RECO_NODE_DATA_FINALIZE_SENT = 5, |
453 | }; |
454 | |
455 | |
456 | enum { |
457 | DLM_MASTER_RESP_NO = 0, |
458 | DLM_MASTER_RESP_YES = 1, |
459 | DLM_MASTER_RESP_MAYBE = 2, |
460 | DLM_MASTER_RESP_ERROR = 3, |
461 | }; |
462 | |
463 | |
464 | struct dlm_master_request |
465 | { |
466 | u8 node_idx; |
467 | u8 namelen; |
468 | __be16 pad1; |
469 | __be32 flags; |
470 | |
471 | u8 name[O2NM_MAX_NAME_LEN]; |
472 | }; |
473 | |
474 | #define DLM_ASSERT_RESPONSE_REASSERT 0x00000001 |
475 | #define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002 |
476 | |
477 | #define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 |
478 | #define DLM_ASSERT_MASTER_REQUERY 0x00000002 |
479 | #define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 |
480 | struct dlm_assert_master |
481 | { |
482 | u8 node_idx; |
483 | u8 namelen; |
484 | __be16 pad1; |
485 | __be32 flags; |
486 | |
487 | u8 name[O2NM_MAX_NAME_LEN]; |
488 | }; |
489 | |
490 | #define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001 |
491 | |
492 | struct dlm_migrate_request |
493 | { |
494 | u8 master; |
495 | u8 new_master; |
496 | u8 namelen; |
497 | u8 pad1; |
498 | __be32 pad2; |
499 | u8 name[O2NM_MAX_NAME_LEN]; |
500 | }; |
501 | |
502 | struct dlm_master_requery |
503 | { |
504 | u8 pad1; |
505 | u8 pad2; |
506 | u8 node_idx; |
507 | u8 namelen; |
508 | __be32 pad3; |
509 | u8 name[O2NM_MAX_NAME_LEN]; |
510 | }; |
511 | |
512 | #define DLM_MRES_RECOVERY 0x01 |
513 | #define DLM_MRES_MIGRATION 0x02 |
514 | #define DLM_MRES_ALL_DONE 0x04 |
515 | |
516 | /* |
517 | * We would like to get one whole lockres into a single network |
518 | * message whenever possible. Generally speaking, there will be |
519 | * at most one dlm_lock on a lockres for each node in the cluster, |
520 | * plus (infrequently) any additional locks coming in from userdlm. |
521 | * |
522 | * struct _dlm_lockres_page |
523 | * { |
524 | * dlm_migratable_lockres mres; |
525 | * dlm_migratable_lock ml[DLM_MAX_MIGRATABLE_LOCKS]; |
526 | * u8 pad[DLM_MIG_LOCKRES_RESERVED]; |
527 | * }; |
528 | * |
529 | * from ../cluster/tcp.h |
530 | * O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) |
531 | * (roughly 4080 bytes) |
532 | * and sizeof(dlm_migratable_lockres) = 112 bytes |
533 | * and sizeof(dlm_migratable_lock) = 16 bytes |
534 | * |
535 | * Choosing DLM_MAX_MIGRATABLE_LOCKS=240 and |
536 | * DLM_MIG_LOCKRES_RESERVED=128 means we have this: |
537 | * |
538 | * (DLM_MAX_MIGRATABLE_LOCKS * sizeof(dlm_migratable_lock)) + |
539 | * sizeof(dlm_migratable_lockres) + DLM_MIG_LOCKRES_RESERVED = |
540 | * NET_MAX_PAYLOAD_BYTES |
541 | * (240 * 16) + 112 + 128 = 4080 |
542 | * |
543 | * So a lockres would need more than 240 locks before it would |
544 | * use more than one network packet to recover. Not too bad. |
545 | */ |
546 | #define DLM_MAX_MIGRATABLE_LOCKS 240 |
547 | |
548 | struct dlm_migratable_lockres |
549 | { |
550 | u8 master; |
551 | u8 lockname_len; |
552 | u8 num_locks; // locks sent in this structure |
553 | u8 flags; |
554 | __be32 total_locks; // locks to be sent for this migration cookie |
555 | __be64 mig_cookie; // cookie for this lockres migration |
556 | // or zero if not needed |
557 | // 16 bytes |
558 | u8 lockname[DLM_LOCKID_NAME_MAX]; |
559 | // 48 bytes |
560 | u8 lvb[DLM_LVB_LEN]; |
561 | // 112 bytes |
562 | struct dlm_migratable_lock ml[]; // 16 bytes each, begins at byte 112 |
563 | }; |
564 | #define DLM_MIG_LOCKRES_MAX_LEN \ |
565 | (sizeof(struct dlm_migratable_lockres) + \ |
566 | (sizeof(struct dlm_migratable_lock) * \ |
567 | DLM_MAX_MIGRATABLE_LOCKS) ) |
568 | |
569 | /* from above, 128 bytes |
570 | * for some undetermined future use */ |
571 | #define DLM_MIG_LOCKRES_RESERVED (O2NET_MAX_PAYLOAD_BYTES - \ |
572 | DLM_MIG_LOCKRES_MAX_LEN) |
573 | |
574 | struct dlm_create_lock |
575 | { |
576 | __be64 cookie; |
577 | |
578 | __be32 flags; |
579 | u8 pad1; |
580 | u8 node_idx; |
581 | s8 requested_type; |
582 | u8 namelen; |
583 | |
584 | u8 name[O2NM_MAX_NAME_LEN]; |
585 | }; |
586 | |
587 | struct dlm_convert_lock |
588 | { |
589 | __be64 cookie; |
590 | |
591 | __be32 flags; |
592 | u8 pad1; |
593 | u8 node_idx; |
594 | s8 requested_type; |
595 | u8 namelen; |
596 | |
597 | u8 name[O2NM_MAX_NAME_LEN]; |
598 | |
599 | s8 lvb[]; |
600 | }; |
601 | #define DLM_CONVERT_LOCK_MAX_LEN (sizeof(struct dlm_convert_lock)+DLM_LVB_LEN) |
602 | |
603 | struct dlm_unlock_lock |
604 | { |
605 | __be64 cookie; |
606 | |
607 | __be32 flags; |
608 | __be16 pad1; |
609 | u8 node_idx; |
610 | u8 namelen; |
611 | |
612 | u8 name[O2NM_MAX_NAME_LEN]; |
613 | |
614 | s8 lvb[]; |
615 | }; |
616 | #define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(struct dlm_unlock_lock)+DLM_LVB_LEN) |
617 | |
618 | struct dlm_proxy_ast |
619 | { |
620 | __be64 cookie; |
621 | |
622 | __be32 flags; |
623 | u8 node_idx; |
624 | u8 type; |
625 | u8 blocked_type; |
626 | u8 namelen; |
627 | |
628 | u8 name[O2NM_MAX_NAME_LEN]; |
629 | |
630 | s8 lvb[]; |
631 | }; |
632 | #define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN) |
633 | |
634 | #define DLM_MOD_KEY (0x666c6172) |
635 | enum dlm_query_join_response_code { |
636 | JOIN_DISALLOW = 0, |
637 | JOIN_OK = 1, |
638 | JOIN_OK_NO_MAP = 2, |
639 | JOIN_PROTOCOL_MISMATCH = 3, |
640 | }; |
641 | |
642 | struct dlm_query_join_packet { |
643 | u8 code; /* Response code. dlm_minor and fs_minor |
644 | are only valid if this is JOIN_OK */ |
645 | u8 dlm_minor; /* The minor version of the protocol the |
646 | dlm is speaking. */ |
647 | u8 fs_minor; /* The minor version of the protocol the |
648 | filesystem is speaking. */ |
649 | u8 reserved; |
650 | }; |
651 | |
652 | union dlm_query_join_response { |
653 | __be32 intval; |
654 | struct dlm_query_join_packet packet; |
655 | }; |
656 | |
657 | struct dlm_lock_request |
658 | { |
659 | u8 node_idx; |
660 | u8 dead_node; |
661 | __be16 pad1; |
662 | __be32 pad2; |
663 | }; |
664 | |
665 | struct dlm_reco_data_done |
666 | { |
667 | u8 node_idx; |
668 | u8 dead_node; |
669 | __be16 pad1; |
670 | __be32 pad2; |
671 | |
672 | /* unused for now */ |
673 | /* eventually we can use this to attempt |
674 | * lvb recovery based on each node's info */ |
675 | u8 reco_lvb[DLM_LVB_LEN]; |
676 | }; |
677 | |
678 | struct dlm_begin_reco |
679 | { |
680 | u8 node_idx; |
681 | u8 dead_node; |
682 | __be16 pad1; |
683 | __be32 pad2; |
684 | }; |
685 | |
686 | struct dlm_query_join_request |
687 | { |
688 | u8 node_idx; |
689 | u8 pad1[2]; |
690 | u8 name_len; |
691 | struct dlm_protocol_version dlm_proto; |
692 | struct dlm_protocol_version fs_proto; |
693 | u8 domain[O2NM_MAX_NAME_LEN]; |
694 | u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)]; |
695 | }; |
696 | |
697 | struct dlm_assert_joined |
698 | { |
699 | u8 node_idx; |
700 | u8 pad1[2]; |
701 | u8 name_len; |
702 | u8 domain[O2NM_MAX_NAME_LEN]; |
703 | }; |
704 | |
705 | struct dlm_cancel_join |
706 | { |
707 | u8 node_idx; |
708 | u8 pad1[2]; |
709 | u8 name_len; |
710 | u8 domain[O2NM_MAX_NAME_LEN]; |
711 | }; |
712 | |
713 | struct dlm_query_region { |
714 | u8 qr_node; |
715 | u8 qr_numregions; |
716 | u8 qr_namelen; |
717 | u8 pad1; |
718 | u8 qr_domain[O2NM_MAX_NAME_LEN]; |
719 | u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; |
720 | }; |
721 | |
722 | struct dlm_node_info { |
723 | u8 ni_nodenum; |
724 | u8 pad1; |
725 | __be16 ni_ipv4_port; |
726 | __be32 ni_ipv4_address; |
727 | }; |
728 | |
729 | struct dlm_query_nodeinfo { |
730 | u8 qn_nodenum; |
731 | u8 qn_numnodes; |
732 | u8 qn_namelen; |
733 | u8 pad1; |
734 | u8 qn_domain[O2NM_MAX_NAME_LEN]; |
735 | struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; |
736 | }; |
737 | |
738 | struct dlm_exit_domain |
739 | { |
740 | u8 node_idx; |
741 | u8 pad1[3]; |
742 | }; |
743 | |
744 | struct dlm_finalize_reco |
745 | { |
746 | u8 node_idx; |
747 | u8 dead_node; |
748 | u8 flags; |
749 | u8 pad1; |
750 | __be32 pad2; |
751 | }; |
752 | |
753 | struct dlm_deref_lockres |
754 | { |
755 | u32 pad1; |
756 | u16 pad2; |
757 | u8 node_idx; |
758 | u8 namelen; |
759 | |
760 | u8 name[O2NM_MAX_NAME_LEN]; |
761 | }; |
762 | |
763 | enum { |
764 | DLM_DEREF_RESPONSE_DONE = 0, |
765 | DLM_DEREF_RESPONSE_INPROG = 1, |
766 | }; |
767 | |
768 | struct dlm_deref_lockres_done { |
769 | u32 pad1; |
770 | u16 pad2; |
771 | u8 node_idx; |
772 | u8 namelen; |
773 | |
774 | u8 name[O2NM_MAX_NAME_LEN]; |
775 | }; |
776 | |
777 | static inline enum dlm_status |
778 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) |
779 | { |
780 | enum dlm_status status = DLM_NORMAL; |
781 | |
782 | assert_spin_locked(&res->spinlock); |
783 | |
784 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
785 | DLM_LOCK_RES_RECOVERY_WAITING)) |
786 | status = DLM_RECOVERING; |
787 | else if (res->state & DLM_LOCK_RES_MIGRATING) |
788 | status = DLM_MIGRATING; |
789 | else if (res->state & DLM_LOCK_RES_IN_PROGRESS) |
790 | status = DLM_FORWARD; |
791 | |
792 | return status; |
793 | } |
794 | |
795 | static inline u8 dlm_get_lock_cookie_node(u64 cookie) |
796 | { |
797 | u8 ret; |
798 | cookie >>= 56; |
799 | ret = (u8)(cookie & 0xffULL); |
800 | return ret; |
801 | } |
802 | |
803 | static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie) |
804 | { |
805 | unsigned long long ret; |
806 | ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL; |
807 | return ret; |
808 | } |
809 | |
810 | struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, |
811 | struct dlm_lockstatus *lksb); |
812 | void dlm_lock_get(struct dlm_lock *lock); |
813 | void dlm_lock_put(struct dlm_lock *lock); |
814 | |
815 | void dlm_lock_attach_lockres(struct dlm_lock *lock, |
816 | struct dlm_lock_resource *res); |
817 | |
818 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
819 | void **ret_data); |
820 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
821 | void **ret_data); |
822 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, |
823 | void **ret_data); |
824 | |
825 | void dlm_revert_pending_convert(struct dlm_lock_resource *res, |
826 | struct dlm_lock *lock); |
827 | void dlm_revert_pending_lock(struct dlm_lock_resource *res, |
828 | struct dlm_lock *lock); |
829 | |
830 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
831 | void **ret_data); |
832 | void dlm_commit_pending_cancel(struct dlm_lock_resource *res, |
833 | struct dlm_lock *lock); |
834 | void dlm_commit_pending_unlock(struct dlm_lock_resource *res, |
835 | struct dlm_lock *lock); |
836 | |
837 | int dlm_launch_thread(struct dlm_ctxt *dlm); |
838 | void dlm_complete_thread(struct dlm_ctxt *dlm); |
839 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); |
840 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); |
841 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); |
842 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); |
843 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); |
844 | void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); |
845 | void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); |
846 | |
847 | void dlm_put(struct dlm_ctxt *dlm); |
848 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); |
849 | int dlm_domain_fully_joined(struct dlm_ctxt *dlm); |
850 | |
851 | void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, |
852 | struct dlm_lock_resource *res); |
853 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, |
854 | struct dlm_lock_resource *res); |
855 | static inline void dlm_lockres_get(struct dlm_lock_resource *res) |
856 | { |
857 | /* This is called on every lookup, so it might be worth |
858 | * inlining. */ |
859 | kref_get(kref: &res->refs); |
860 | } |
861 | void dlm_lockres_put(struct dlm_lock_resource *res); |
862 | void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
863 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
864 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
865 | const char *name, |
866 | unsigned int len, |
867 | unsigned int hash); |
868 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, |
869 | const char *name, |
870 | unsigned int len, |
871 | unsigned int hash); |
872 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, |
873 | const char *name, |
874 | unsigned int len); |
875 | |
876 | int dlm_is_host_down(int errno); |
877 | |
878 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
879 | const char *lockid, |
880 | int namelen, |
881 | int flags); |
882 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, |
883 | const char *name, |
884 | unsigned int namelen); |
885 | |
886 | void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, |
887 | struct dlm_lock_resource *res, int bit); |
888 | void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, |
889 | struct dlm_lock_resource *res, int bit); |
890 | |
891 | void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, |
892 | struct dlm_lock_resource *res); |
893 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
894 | struct dlm_lock_resource *res); |
895 | |
896 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, |
897 | struct dlm_lock_resource *res); |
898 | |
899 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
900 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
901 | void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
902 | void dlm_do_local_ast(struct dlm_ctxt *dlm, |
903 | struct dlm_lock_resource *res, |
904 | struct dlm_lock *lock); |
905 | int dlm_do_remote_ast(struct dlm_ctxt *dlm, |
906 | struct dlm_lock_resource *res, |
907 | struct dlm_lock *lock); |
908 | void dlm_do_local_bast(struct dlm_ctxt *dlm, |
909 | struct dlm_lock_resource *res, |
910 | struct dlm_lock *lock, |
911 | int blocked_type); |
912 | int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, |
913 | struct dlm_lock_resource *res, |
914 | struct dlm_lock *lock, |
915 | int msg_type, |
916 | int blocked_type, int flags); |
917 | static inline int dlm_send_proxy_bast(struct dlm_ctxt *dlm, |
918 | struct dlm_lock_resource *res, |
919 | struct dlm_lock *lock, |
920 | int blocked_type) |
921 | { |
922 | return dlm_send_proxy_ast_msg(dlm, res, lock, msg_type: DLM_BAST, |
923 | blocked_type, flags: 0); |
924 | } |
925 | |
926 | static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm, |
927 | struct dlm_lock_resource *res, |
928 | struct dlm_lock *lock, |
929 | int flags) |
930 | { |
931 | return dlm_send_proxy_ast_msg(dlm, res, lock, msg_type: DLM_AST, |
932 | blocked_type: 0, flags); |
933 | } |
934 | |
935 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res); |
936 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res); |
937 | |
938 | void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
939 | void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
940 | |
941 | |
942 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); |
943 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); |
944 | |
945 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
946 | int dlm_finish_migration(struct dlm_ctxt *dlm, |
947 | struct dlm_lock_resource *res, |
948 | u8 old_master); |
949 | void dlm_lockres_release_ast(struct dlm_ctxt *dlm, |
950 | struct dlm_lock_resource *res); |
951 | void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); |
952 | |
953 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, |
954 | void **ret_data); |
955 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, |
956 | void **ret_data); |
957 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); |
958 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
959 | void **ret_data); |
960 | int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, |
961 | void **ret_data); |
962 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
963 | void **ret_data); |
964 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
965 | void **ret_data); |
966 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, |
967 | void **ret_data); |
968 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, |
969 | void **ret_data); |
970 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, |
971 | void **ret_data); |
972 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
973 | void **ret_data); |
974 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
975 | void **ret_data); |
976 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
977 | u8 nodenum, u8 *real_master); |
978 | |
979 | void __dlm_do_purge_lockres(struct dlm_ctxt *dlm, |
980 | struct dlm_lock_resource *res); |
981 | |
982 | int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, |
983 | struct dlm_lock_resource *res, |
984 | int ignore_higher, |
985 | u8 request_from, |
986 | u32 flags); |
987 | |
988 | |
989 | int dlm_send_one_lockres(struct dlm_ctxt *dlm, |
990 | struct dlm_lock_resource *res, |
991 | struct dlm_migratable_lockres *mres, |
992 | u8 send_to, |
993 | u8 flags); |
994 | void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, |
995 | struct dlm_lock_resource *res); |
996 | |
997 | /* will exit holding res->spinlock, but may drop in function */ |
998 | void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags); |
999 | |
1000 | /* will exit holding res->spinlock, but may drop in function */ |
1001 | static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) |
1002 | { |
1003 | __dlm_wait_on_lockres_flags(res, flags: (DLM_LOCK_RES_IN_PROGRESS| |
1004 | DLM_LOCK_RES_RECOVERING| |
1005 | DLM_LOCK_RES_RECOVERY_WAITING| |
1006 | DLM_LOCK_RES_MIGRATING)); |
1007 | } |
1008 | |
1009 | void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); |
1010 | void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); |
1011 | |
1012 | /* create/destroy slab caches */ |
1013 | int dlm_init_master_caches(void); |
1014 | void dlm_destroy_master_caches(void); |
1015 | |
1016 | int dlm_init_lock_cache(void); |
1017 | void dlm_destroy_lock_cache(void); |
1018 | |
1019 | int dlm_init_mle_cache(void); |
1020 | void dlm_destroy_mle_cache(void); |
1021 | |
1022 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
1023 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, |
1024 | struct dlm_lock_resource *res); |
1025 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
1026 | u8 dead_node); |
1027 | void dlm_force_free_mles(struct dlm_ctxt *dlm); |
1028 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
1029 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); |
1030 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
1031 | |
1032 | static inline const char * dlm_lock_mode_name(int mode) |
1033 | { |
1034 | switch (mode) { |
1035 | case LKM_EXMODE: |
1036 | return "EX" ; |
1037 | case LKM_PRMODE: |
1038 | return "PR" ; |
1039 | case LKM_NLMODE: |
1040 | return "NL" ; |
1041 | } |
1042 | return "UNKNOWN" ; |
1043 | } |
1044 | |
1045 | |
1046 | static inline int dlm_lock_compatible(int existing, int request) |
1047 | { |
1048 | /* NO_LOCK compatible with all */ |
1049 | if (request == LKM_NLMODE || |
1050 | existing == LKM_NLMODE) |
1051 | return 1; |
1052 | |
1053 | /* EX incompatible with all non-NO_LOCK */ |
1054 | if (request == LKM_EXMODE) |
1055 | return 0; |
1056 | |
1057 | /* request must be PR, which is compatible with PR */ |
1058 | if (existing == LKM_PRMODE) |
1059 | return 1; |
1060 | |
1061 | return 0; |
1062 | } |
1063 | |
1064 | static inline int dlm_lock_on_list(struct list_head *head, |
1065 | struct dlm_lock *lock) |
1066 | { |
1067 | struct dlm_lock *tmplock; |
1068 | |
1069 | list_for_each_entry(tmplock, head, list) { |
1070 | if (tmplock == lock) |
1071 | return 1; |
1072 | } |
1073 | return 0; |
1074 | } |
1075 | |
1076 | |
1077 | static inline enum dlm_status dlm_err_to_dlm_status(int err) |
1078 | { |
1079 | enum dlm_status ret; |
1080 | if (err == -ENOMEM) |
1081 | ret = DLM_SYSERR; |
1082 | else if (err == -ETIMEDOUT || o2net_link_down(err, NULL)) |
1083 | ret = DLM_NOLOCKMGR; |
1084 | else if (err == -EINVAL) |
1085 | ret = DLM_BADPARAM; |
1086 | else if (err == -ENAMETOOLONG) |
1087 | ret = DLM_IVBUFLEN; |
1088 | else |
1089 | ret = DLM_BADARGS; |
1090 | return ret; |
1091 | } |
1092 | |
1093 | |
1094 | static inline void dlm_node_iter_init(unsigned long *map, |
1095 | struct dlm_node_iter *iter) |
1096 | { |
1097 | bitmap_copy(dst: iter->node_map, src: map, O2NM_MAX_NODES); |
1098 | iter->curnode = -1; |
1099 | } |
1100 | |
1101 | static inline int dlm_node_iter_next(struct dlm_node_iter *iter) |
1102 | { |
1103 | int bit; |
1104 | bit = find_next_bit(addr: iter->node_map, O2NM_MAX_NODES, offset: iter->curnode+1); |
1105 | if (bit >= O2NM_MAX_NODES) { |
1106 | iter->curnode = O2NM_MAX_NODES; |
1107 | return -ENOENT; |
1108 | } |
1109 | iter->curnode = bit; |
1110 | return bit; |
1111 | } |
1112 | |
1113 | static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm, |
1114 | struct dlm_lock_resource *res, |
1115 | u8 owner) |
1116 | { |
1117 | assert_spin_locked(&res->spinlock); |
1118 | |
1119 | res->owner = owner; |
1120 | } |
1121 | |
1122 | static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm, |
1123 | struct dlm_lock_resource *res, |
1124 | u8 owner) |
1125 | { |
1126 | assert_spin_locked(&res->spinlock); |
1127 | |
1128 | if (owner != res->owner) |
1129 | dlm_set_lockres_owner(dlm, res, owner); |
1130 | } |
1131 | |
1132 | #endif /* DLMCOMMON_H */ |
1133 | |