1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * dlmdomain.c |
4 | * |
5 | * defines domain join / leave apis |
6 | * |
7 | * Copyright (C) 2004 Oracle. All rights reserved. |
8 | */ |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/types.h> |
12 | #include <linux/slab.h> |
13 | #include <linux/highmem.h> |
14 | #include <linux/init.h> |
15 | #include <linux/spinlock.h> |
16 | #include <linux/delay.h> |
17 | #include <linux/err.h> |
18 | #include <linux/debugfs.h> |
19 | #include <linux/sched/signal.h> |
20 | |
21 | #include "../cluster/heartbeat.h" |
22 | #include "../cluster/nodemanager.h" |
23 | #include "../cluster/tcp.h" |
24 | |
25 | #include "dlmapi.h" |
26 | #include "dlmcommon.h" |
27 | #include "dlmdomain.h" |
28 | #include "dlmdebug.h" |
29 | |
30 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) |
31 | #include "../cluster/masklog.h" |
32 | |
33 | /* |
34 | * ocfs2 node maps are array of long int, which limits to send them freely |
35 | * across the wire due to endianness issues. To workaround this, we convert |
36 | * long ints to byte arrays. Following 3 routines are helper functions to |
37 | * set/test/copy bits within those array of bytes |
38 | */ |
39 | static inline void byte_set_bit(u8 nr, u8 map[]) |
40 | { |
41 | map[nr >> 3] |= (1UL << (nr & 7)); |
42 | } |
43 | |
44 | static inline int byte_test_bit(u8 nr, u8 map[]) |
45 | { |
46 | return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; |
47 | } |
48 | |
49 | static inline void byte_copymap(u8 dmap[], unsigned long smap[], |
50 | unsigned int sz) |
51 | { |
52 | unsigned int nn; |
53 | |
54 | if (!sz) |
55 | return; |
56 | |
57 | memset(dmap, 0, ((sz + 7) >> 3)); |
58 | for (nn = 0 ; nn < sz; nn++) |
59 | if (test_bit(nn, smap)) |
60 | byte_set_bit(nr: nn, map: dmap); |
61 | } |
62 | |
63 | static void dlm_free_pagevec(void **vec, int pages) |
64 | { |
65 | while (pages--) |
66 | free_page((unsigned long)vec[pages]); |
67 | kfree(objp: vec); |
68 | } |
69 | |
70 | static void **dlm_alloc_pagevec(int pages) |
71 | { |
72 | void **vec = kmalloc_array(n: pages, size: sizeof(void *), GFP_KERNEL); |
73 | int i; |
74 | |
75 | if (!vec) |
76 | return NULL; |
77 | |
78 | for (i = 0; i < pages; i++) |
79 | if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) |
80 | goto out_free; |
81 | |
82 | mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n" , |
83 | pages, (unsigned long)DLM_HASH_PAGES, |
84 | (unsigned long)DLM_BUCKETS_PER_PAGE); |
85 | return vec; |
86 | out_free: |
87 | dlm_free_pagevec(vec, pages: i); |
88 | return NULL; |
89 | } |
90 | |
91 | /* |
92 | * |
93 | * spinlock lock ordering: if multiple locks are needed, obey this ordering: |
94 | * dlm_domain_lock |
95 | * struct dlm_ctxt->spinlock |
96 | * struct dlm_lock_resource->spinlock |
97 | * struct dlm_ctxt->master_lock |
98 | * struct dlm_ctxt->ast_lock |
99 | * dlm_master_list_entry->spinlock |
100 | * dlm_lock->spinlock |
101 | * |
102 | */ |
103 | |
104 | DEFINE_SPINLOCK(dlm_domain_lock); |
105 | LIST_HEAD(dlm_domains); |
106 | static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); |
107 | |
108 | /* |
109 | * The supported protocol version for DLM communication. Running domains |
110 | * will have a negotiated version with the same major number and a minor |
111 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
112 | * be used to determine what a running domain is actually using. |
113 | * |
114 | * New in version 1.1: |
115 | * - Message DLM_QUERY_REGION added to support global heartbeat |
116 | * - Message DLM_QUERY_NODEINFO added to allow online node removes |
117 | * New in version 1.2: |
118 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain |
119 | * New in version 1.3: |
120 | * - Message DLM_DEREF_LOCKRES_DONE added to inform non-master that the |
121 | * refmap is cleared |
122 | */ |
123 | static const struct dlm_protocol_version dlm_protocol = { |
124 | .pv_major = 1, |
125 | .pv_minor = 3, |
126 | }; |
127 | |
128 | #define DLM_DOMAIN_BACKOFF_MS 200 |
129 | |
130 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
131 | void **ret_data); |
132 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
133 | void **ret_data); |
134 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
135 | void **ret_data); |
136 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, |
137 | void *data, void **ret_data); |
138 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
139 | void **ret_data); |
140 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
141 | struct dlm_protocol_version *request); |
142 | |
143 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); |
144 | |
145 | void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
146 | { |
147 | if (hlist_unhashed(h: &res->hash_node)) |
148 | return; |
149 | |
150 | mlog(0, "%s: Unhash res %.*s\n" , dlm->name, res->lockname.len, |
151 | res->lockname.name); |
152 | hlist_del_init(n: &res->hash_node); |
153 | dlm_lockres_put(res); |
154 | } |
155 | |
156 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
157 | { |
158 | struct hlist_head *bucket; |
159 | |
160 | assert_spin_locked(&dlm->spinlock); |
161 | |
162 | bucket = dlm_lockres_hash(dlm, i: res->lockname.hash); |
163 | |
164 | /* get a reference for our hashtable */ |
165 | dlm_lockres_get(res); |
166 | |
167 | hlist_add_head(n: &res->hash_node, h: bucket); |
168 | |
169 | mlog(0, "%s: Hash res %.*s\n" , dlm->name, res->lockname.len, |
170 | res->lockname.name); |
171 | } |
172 | |
173 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
174 | const char *name, |
175 | unsigned int len, |
176 | unsigned int hash) |
177 | { |
178 | struct hlist_head *bucket; |
179 | struct dlm_lock_resource *res; |
180 | |
181 | mlog(0, "%.*s\n" , len, name); |
182 | |
183 | assert_spin_locked(&dlm->spinlock); |
184 | |
185 | bucket = dlm_lockres_hash(dlm, i: hash); |
186 | |
187 | hlist_for_each_entry(res, bucket, hash_node) { |
188 | if (res->lockname.name[0] != name[0]) |
189 | continue; |
190 | if (unlikely(res->lockname.len != len)) |
191 | continue; |
192 | if (memcmp(p: res->lockname.name + 1, q: name + 1, size: len - 1)) |
193 | continue; |
194 | dlm_lockres_get(res); |
195 | return res; |
196 | } |
197 | return NULL; |
198 | } |
199 | |
200 | /* intended to be called by functions which do not care about lock |
201 | * resources which are being purged (most net _handler functions). |
202 | * this will return NULL for any lock resource which is found but |
203 | * currently in the process of dropping its mastery reference. |
204 | * use __dlm_lookup_lockres_full when you need the lock resource |
205 | * regardless (e.g. dlm_get_lock_resource) */ |
206 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, |
207 | const char *name, |
208 | unsigned int len, |
209 | unsigned int hash) |
210 | { |
211 | struct dlm_lock_resource *res = NULL; |
212 | |
213 | mlog(0, "%.*s\n" , len, name); |
214 | |
215 | assert_spin_locked(&dlm->spinlock); |
216 | |
217 | res = __dlm_lookup_lockres_full(dlm, name, len, hash); |
218 | if (res) { |
219 | spin_lock(lock: &res->spinlock); |
220 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
221 | spin_unlock(lock: &res->spinlock); |
222 | dlm_lockres_put(res); |
223 | return NULL; |
224 | } |
225 | spin_unlock(lock: &res->spinlock); |
226 | } |
227 | |
228 | return res; |
229 | } |
230 | |
231 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, |
232 | const char *name, |
233 | unsigned int len) |
234 | { |
235 | struct dlm_lock_resource *res; |
236 | unsigned int hash = dlm_lockid_hash(name, len); |
237 | |
238 | spin_lock(lock: &dlm->spinlock); |
239 | res = __dlm_lookup_lockres(dlm, name, len, hash); |
240 | spin_unlock(lock: &dlm->spinlock); |
241 | return res; |
242 | } |
243 | |
244 | static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) |
245 | { |
246 | struct dlm_ctxt *tmp; |
247 | |
248 | assert_spin_locked(&dlm_domain_lock); |
249 | |
250 | /* tmp->name here is always NULL terminated, |
251 | * but domain may not be! */ |
252 | list_for_each_entry(tmp, &dlm_domains, list) { |
253 | if (strlen(tmp->name) == len && |
254 | memcmp(p: tmp->name, q: domain, size: len)==0) |
255 | return tmp; |
256 | } |
257 | |
258 | return NULL; |
259 | } |
260 | |
261 | /* For null terminated domain strings ONLY */ |
262 | static struct dlm_ctxt * __dlm_lookup_domain(const char *domain) |
263 | { |
264 | assert_spin_locked(&dlm_domain_lock); |
265 | |
266 | return __dlm_lookup_domain_full(domain, strlen(domain)); |
267 | } |
268 | |
269 | |
270 | /* returns true on one of two conditions: |
271 | * 1) the domain does not exist |
272 | * 2) the domain exists and it's state is "joined" */ |
273 | static int dlm_wait_on_domain_helper(const char *domain) |
274 | { |
275 | int ret = 0; |
276 | struct dlm_ctxt *tmp = NULL; |
277 | |
278 | spin_lock(lock: &dlm_domain_lock); |
279 | |
280 | tmp = __dlm_lookup_domain(domain); |
281 | if (!tmp) |
282 | ret = 1; |
283 | else if (tmp->dlm_state == DLM_CTXT_JOINED) |
284 | ret = 1; |
285 | |
286 | spin_unlock(lock: &dlm_domain_lock); |
287 | return ret; |
288 | } |
289 | |
290 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) |
291 | { |
292 | dlm_destroy_debugfs_subroot(dlm); |
293 | |
294 | if (dlm->lockres_hash) |
295 | dlm_free_pagevec(vec: (void **)dlm->lockres_hash, DLM_HASH_PAGES); |
296 | |
297 | if (dlm->master_hash) |
298 | dlm_free_pagevec(vec: (void **)dlm->master_hash, DLM_HASH_PAGES); |
299 | |
300 | kfree(objp: dlm->name); |
301 | kfree(objp: dlm); |
302 | } |
303 | |
304 | /* A little strange - this function will be called while holding |
305 | * dlm_domain_lock and is expected to be holding it on the way out. We |
306 | * will however drop and reacquire it multiple times */ |
307 | static void dlm_ctxt_release(struct kref *kref) |
308 | { |
309 | struct dlm_ctxt *dlm; |
310 | |
311 | dlm = container_of(kref, struct dlm_ctxt, dlm_refs); |
312 | |
313 | BUG_ON(dlm->num_joins); |
314 | BUG_ON(dlm->dlm_state == DLM_CTXT_JOINED); |
315 | |
316 | /* we may still be in the list if we hit an error during join. */ |
317 | list_del_init(entry: &dlm->list); |
318 | |
319 | spin_unlock(lock: &dlm_domain_lock); |
320 | |
321 | mlog(0, "freeing memory from domain %s\n" , dlm->name); |
322 | |
323 | wake_up(&dlm_domain_events); |
324 | |
325 | dlm_free_ctxt_mem(dlm); |
326 | |
327 | spin_lock(lock: &dlm_domain_lock); |
328 | } |
329 | |
330 | void dlm_put(struct dlm_ctxt *dlm) |
331 | { |
332 | spin_lock(lock: &dlm_domain_lock); |
333 | kref_put(kref: &dlm->dlm_refs, release: dlm_ctxt_release); |
334 | spin_unlock(lock: &dlm_domain_lock); |
335 | } |
336 | |
337 | static void __dlm_get(struct dlm_ctxt *dlm) |
338 | { |
339 | kref_get(kref: &dlm->dlm_refs); |
340 | } |
341 | |
342 | /* given a questionable reference to a dlm object, gets a reference if |
343 | * it can find it in the list, otherwise returns NULL in which case |
344 | * you shouldn't trust your pointer. */ |
345 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) |
346 | { |
347 | struct dlm_ctxt *target; |
348 | struct dlm_ctxt *ret = NULL; |
349 | |
350 | spin_lock(lock: &dlm_domain_lock); |
351 | |
352 | list_for_each_entry(target, &dlm_domains, list) { |
353 | if (target == dlm) { |
354 | __dlm_get(dlm: target); |
355 | ret = target; |
356 | break; |
357 | } |
358 | } |
359 | |
360 | spin_unlock(lock: &dlm_domain_lock); |
361 | |
362 | return ret; |
363 | } |
364 | |
365 | int dlm_domain_fully_joined(struct dlm_ctxt *dlm) |
366 | { |
367 | int ret; |
368 | |
369 | spin_lock(lock: &dlm_domain_lock); |
370 | ret = (dlm->dlm_state == DLM_CTXT_JOINED) || |
371 | (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN); |
372 | spin_unlock(lock: &dlm_domain_lock); |
373 | |
374 | return ret; |
375 | } |
376 | |
377 | static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) |
378 | { |
379 | if (dlm->dlm_worker) { |
380 | destroy_workqueue(wq: dlm->dlm_worker); |
381 | dlm->dlm_worker = NULL; |
382 | } |
383 | } |
384 | |
385 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) |
386 | { |
387 | dlm_unregister_domain_handlers(dlm); |
388 | dlm_complete_thread(dlm); |
389 | dlm_complete_recovery_thread(dlm); |
390 | dlm_destroy_dlm_worker(dlm); |
391 | |
392 | /* We've left the domain. Now we can take ourselves out of the |
393 | * list and allow the kref stuff to help us free the |
394 | * memory. */ |
395 | spin_lock(lock: &dlm_domain_lock); |
396 | list_del_init(entry: &dlm->list); |
397 | spin_unlock(lock: &dlm_domain_lock); |
398 | |
399 | /* Wake up anyone waiting for us to remove this domain */ |
400 | wake_up(&dlm_domain_events); |
401 | } |
402 | |
403 | static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) |
404 | { |
405 | int i, num, n, ret = 0; |
406 | struct dlm_lock_resource *res; |
407 | struct hlist_node *iter; |
408 | struct hlist_head *bucket; |
409 | int dropped; |
410 | |
411 | mlog(0, "Migrating locks from domain %s\n" , dlm->name); |
412 | |
413 | num = 0; |
414 | spin_lock(lock: &dlm->spinlock); |
415 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
416 | redo_bucket: |
417 | n = 0; |
418 | bucket = dlm_lockres_hash(dlm, i); |
419 | iter = bucket->first; |
420 | while (iter) { |
421 | n++; |
422 | res = hlist_entry(iter, struct dlm_lock_resource, |
423 | hash_node); |
424 | dlm_lockres_get(res); |
425 | /* migrate, if necessary. this will drop the dlm |
426 | * spinlock and retake it if it does migration. */ |
427 | dropped = dlm_empty_lockres(dlm, res); |
428 | |
429 | spin_lock(lock: &res->spinlock); |
430 | if (dropped) |
431 | __dlm_lockres_calc_usage(dlm, res); |
432 | else |
433 | iter = res->hash_node.next; |
434 | spin_unlock(lock: &res->spinlock); |
435 | |
436 | dlm_lockres_put(res); |
437 | |
438 | if (dropped) { |
439 | cond_resched_lock(&dlm->spinlock); |
440 | goto redo_bucket; |
441 | } |
442 | } |
443 | cond_resched_lock(&dlm->spinlock); |
444 | num += n; |
445 | } |
446 | |
447 | if (!num) { |
448 | if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { |
449 | mlog(0, "%s: perhaps there are more lock resources " |
450 | "need to be migrated after dlm recovery\n" , dlm->name); |
451 | ret = -EAGAIN; |
452 | } else { |
453 | mlog(0, "%s: we won't do dlm recovery after migrating " |
454 | "all lock resources\n" , dlm->name); |
455 | dlm->migrate_done = 1; |
456 | } |
457 | } |
458 | |
459 | spin_unlock(lock: &dlm->spinlock); |
460 | wake_up(&dlm->dlm_thread_wq); |
461 | |
462 | /* let the dlm thread take care of purging, keep scanning until |
463 | * nothing remains in the hash */ |
464 | if (num) { |
465 | mlog(0, "%s: %d lock resources in hash last pass\n" , |
466 | dlm->name, num); |
467 | ret = -EAGAIN; |
468 | } |
469 | mlog(0, "DONE Migrating locks from domain %s\n" , dlm->name); |
470 | return ret; |
471 | } |
472 | |
473 | static int dlm_no_joining_node(struct dlm_ctxt *dlm) |
474 | { |
475 | int ret; |
476 | |
477 | spin_lock(lock: &dlm->spinlock); |
478 | ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN; |
479 | spin_unlock(lock: &dlm->spinlock); |
480 | |
481 | return ret; |
482 | } |
483 | |
484 | static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, |
485 | void *data, void **ret_data) |
486 | { |
487 | struct dlm_ctxt *dlm = data; |
488 | unsigned int node; |
489 | struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; |
490 | |
491 | if (!dlm_grab(dlm)) |
492 | return 0; |
493 | |
494 | node = exit_msg->node_idx; |
495 | mlog(0, "%s: Node %u sent a begin exit domain message\n" , dlm->name, node); |
496 | |
497 | spin_lock(lock: &dlm->spinlock); |
498 | set_bit(nr: node, addr: dlm->exit_domain_map); |
499 | spin_unlock(lock: &dlm->spinlock); |
500 | |
501 | dlm_put(dlm); |
502 | |
503 | return 0; |
504 | } |
505 | |
506 | static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) |
507 | { |
508 | /* Yikes, a double spinlock! I need domain_lock for the dlm |
509 | * state and the dlm spinlock for join state... Sorry! */ |
510 | again: |
511 | spin_lock(lock: &dlm_domain_lock); |
512 | spin_lock(lock: &dlm->spinlock); |
513 | |
514 | if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { |
515 | mlog(0, "Node %d is joining, we wait on it.\n" , |
516 | dlm->joining_node); |
517 | spin_unlock(lock: &dlm->spinlock); |
518 | spin_unlock(lock: &dlm_domain_lock); |
519 | |
520 | wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm)); |
521 | goto again; |
522 | } |
523 | |
524 | dlm->dlm_state = DLM_CTXT_LEAVING; |
525 | spin_unlock(lock: &dlm->spinlock); |
526 | spin_unlock(lock: &dlm_domain_lock); |
527 | } |
528 | |
529 | static void __dlm_print_nodes(struct dlm_ctxt *dlm) |
530 | { |
531 | int node = -1, num = 0; |
532 | |
533 | assert_spin_locked(&dlm->spinlock); |
534 | |
535 | printk("( " ); |
536 | while ((node = find_next_bit(addr: dlm->domain_map, O2NM_MAX_NODES, |
537 | offset: node + 1)) < O2NM_MAX_NODES) { |
538 | printk("%d " , node); |
539 | ++num; |
540 | } |
541 | printk(") %u nodes\n" , num); |
542 | } |
543 | |
544 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
545 | void **ret_data) |
546 | { |
547 | struct dlm_ctxt *dlm = data; |
548 | unsigned int node; |
549 | struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; |
550 | |
551 | mlog(0, "%p %u %p" , msg, len, data); |
552 | |
553 | if (!dlm_grab(dlm)) |
554 | return 0; |
555 | |
556 | node = exit_msg->node_idx; |
557 | |
558 | spin_lock(lock: &dlm->spinlock); |
559 | clear_bit(nr: node, addr: dlm->domain_map); |
560 | clear_bit(nr: node, addr: dlm->exit_domain_map); |
561 | printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s " , node, dlm->name); |
562 | __dlm_print_nodes(dlm); |
563 | |
564 | /* notify anything attached to the heartbeat events */ |
565 | dlm_hb_event_notify_attached(dlm, idx: node, node_up: 0); |
566 | |
567 | spin_unlock(lock: &dlm->spinlock); |
568 | |
569 | dlm_put(dlm); |
570 | |
571 | return 0; |
572 | } |
573 | |
574 | static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, |
575 | unsigned int node) |
576 | { |
577 | int status; |
578 | struct dlm_exit_domain leave_msg; |
579 | |
580 | mlog(0, "%s: Sending domain exit message %u to node %u\n" , dlm->name, |
581 | msg_type, node); |
582 | |
583 | memset(&leave_msg, 0, sizeof(leave_msg)); |
584 | leave_msg.node_idx = dlm->node_num; |
585 | |
586 | status = o2net_send_message(msg_type, key: dlm->key, data: &leave_msg, |
587 | len: sizeof(leave_msg), target_node: node, NULL); |
588 | if (status < 0) |
589 | mlog(ML_ERROR, "Error %d sending domain exit message %u " |
590 | "to node %u on domain %s\n" , status, msg_type, node, |
591 | dlm->name); |
592 | |
593 | return status; |
594 | } |
595 | |
596 | static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) |
597 | { |
598 | int node = -1; |
599 | |
600 | /* Support for begin exit domain was added in 1.2 */ |
601 | if (dlm->dlm_locking_proto.pv_major == 1 && |
602 | dlm->dlm_locking_proto.pv_minor < 2) |
603 | return; |
604 | |
605 | /* |
606 | * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely |
607 | * informational. Meaning if a node does not receive the message, |
608 | * so be it. |
609 | */ |
610 | spin_lock(lock: &dlm->spinlock); |
611 | while (1) { |
612 | node = find_next_bit(addr: dlm->domain_map, O2NM_MAX_NODES, offset: node + 1); |
613 | if (node >= O2NM_MAX_NODES) |
614 | break; |
615 | if (node == dlm->node_num) |
616 | continue; |
617 | |
618 | spin_unlock(lock: &dlm->spinlock); |
619 | dlm_send_one_domain_exit(dlm, msg_type: DLM_BEGIN_EXIT_DOMAIN_MSG, node); |
620 | spin_lock(lock: &dlm->spinlock); |
621 | } |
622 | spin_unlock(lock: &dlm->spinlock); |
623 | } |
624 | |
625 | static void dlm_leave_domain(struct dlm_ctxt *dlm) |
626 | { |
627 | int node, clear_node, status; |
628 | |
629 | /* At this point we've migrated away all our locks and won't |
630 | * accept mastership of new ones. The dlm is responsible for |
631 | * almost nothing now. We make sure not to confuse any joining |
632 | * nodes and then commence shutdown procedure. */ |
633 | |
634 | spin_lock(lock: &dlm->spinlock); |
635 | /* Clear ourselves from the domain map */ |
636 | clear_bit(nr: dlm->node_num, addr: dlm->domain_map); |
637 | while ((node = find_next_bit(addr: dlm->domain_map, O2NM_MAX_NODES, |
638 | offset: 0)) < O2NM_MAX_NODES) { |
639 | /* Drop the dlm spinlock. This is safe wrt the domain_map. |
640 | * -nodes cannot be added now as the |
641 | * query_join_handlers knows to respond with OK_NO_MAP |
642 | * -we catch the right network errors if a node is |
643 | * removed from the map while we're sending him the |
644 | * exit message. */ |
645 | spin_unlock(lock: &dlm->spinlock); |
646 | |
647 | clear_node = 1; |
648 | |
649 | status = dlm_send_one_domain_exit(dlm, msg_type: DLM_EXIT_DOMAIN_MSG, |
650 | node); |
651 | if (status < 0 && |
652 | status != -ENOPROTOOPT && |
653 | status != -ENOTCONN) { |
654 | mlog(ML_NOTICE, "Error %d sending domain exit message " |
655 | "to node %d\n" , status, node); |
656 | |
657 | /* Not sure what to do here but lets sleep for |
658 | * a bit in case this was a transient |
659 | * error... */ |
660 | msleep(DLM_DOMAIN_BACKOFF_MS); |
661 | clear_node = 0; |
662 | } |
663 | |
664 | spin_lock(lock: &dlm->spinlock); |
665 | /* If we're not clearing the node bit then we intend |
666 | * to loop back around to try again. */ |
667 | if (clear_node) |
668 | clear_bit(nr: node, addr: dlm->domain_map); |
669 | } |
670 | spin_unlock(lock: &dlm->spinlock); |
671 | } |
672 | |
673 | void dlm_unregister_domain(struct dlm_ctxt *dlm) |
674 | { |
675 | int leave = 0; |
676 | struct dlm_lock_resource *res; |
677 | |
678 | spin_lock(lock: &dlm_domain_lock); |
679 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); |
680 | BUG_ON(!dlm->num_joins); |
681 | |
682 | dlm->num_joins--; |
683 | if (!dlm->num_joins) { |
684 | /* We mark it "in shutdown" now so new register |
685 | * requests wait until we've completely left the |
686 | * domain. Don't use DLM_CTXT_LEAVING yet as we still |
687 | * want new domain joins to communicate with us at |
688 | * least until we've completed migration of our |
689 | * resources. */ |
690 | dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN; |
691 | leave = 1; |
692 | } |
693 | spin_unlock(lock: &dlm_domain_lock); |
694 | |
695 | if (leave) { |
696 | mlog(0, "shutting down domain %s\n" , dlm->name); |
697 | dlm_begin_exit_domain(dlm); |
698 | |
699 | /* We changed dlm state, notify the thread */ |
700 | dlm_kick_thread(dlm, NULL); |
701 | |
702 | while (dlm_migrate_all_locks(dlm)) { |
703 | /* Give dlm_thread time to purge the lockres' */ |
704 | msleep(msecs: 500); |
705 | mlog(0, "%s: more migration to do\n" , dlm->name); |
706 | } |
707 | |
708 | /* This list should be empty. If not, print remaining lockres */ |
709 | if (!list_empty(head: &dlm->tracking_list)) { |
710 | mlog(ML_ERROR, "Following lockres' are still on the " |
711 | "tracking list:\n" ); |
712 | list_for_each_entry(res, &dlm->tracking_list, tracking) |
713 | dlm_print_one_lock_resource(res); |
714 | } |
715 | |
716 | dlm_mark_domain_leaving(dlm); |
717 | dlm_leave_domain(dlm); |
718 | printk(KERN_NOTICE "o2dlm: Leaving domain %s\n" , dlm->name); |
719 | dlm_force_free_mles(dlm); |
720 | dlm_complete_dlm_shutdown(dlm); |
721 | } |
722 | dlm_put(dlm); |
723 | } |
724 | EXPORT_SYMBOL_GPL(dlm_unregister_domain); |
725 | |
726 | static int dlm_query_join_proto_check(char *proto_type, int node, |
727 | struct dlm_protocol_version *ours, |
728 | struct dlm_protocol_version *request) |
729 | { |
730 | int rc; |
731 | struct dlm_protocol_version proto = *request; |
732 | |
733 | if (!dlm_protocol_compare(existing: ours, request: &proto)) { |
734 | mlog(0, |
735 | "node %u wanted to join with %s locking protocol " |
736 | "%u.%u, we respond with %u.%u\n" , |
737 | node, proto_type, |
738 | request->pv_major, |
739 | request->pv_minor, |
740 | proto.pv_major, proto.pv_minor); |
741 | request->pv_minor = proto.pv_minor; |
742 | rc = 0; |
743 | } else { |
744 | mlog(ML_NOTICE, |
745 | "Node %u wanted to join with %s locking " |
746 | "protocol %u.%u, but we have %u.%u, disallowing\n" , |
747 | node, proto_type, |
748 | request->pv_major, |
749 | request->pv_minor, |
750 | ours->pv_major, |
751 | ours->pv_minor); |
752 | rc = 1; |
753 | } |
754 | |
755 | return rc; |
756 | } |
757 | |
758 | /* |
759 | * struct dlm_query_join_packet is made up of four one-byte fields. They |
760 | * are effectively in big-endian order already. However, little-endian |
761 | * machines swap them before putting the packet on the wire (because |
762 | * query_join's response is a status, and that status is treated as a u32 |
763 | * on the wire). Thus, a big-endian and little-endian machines will treat |
764 | * this structure differently. |
765 | * |
766 | * The solution is to have little-endian machines swap the structure when |
767 | * converting from the structure to the u32 representation. This will |
768 | * result in the structure having the correct format on the wire no matter |
769 | * the host endian format. |
770 | */ |
771 | static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, |
772 | u32 *wire) |
773 | { |
774 | union dlm_query_join_response response; |
775 | |
776 | response.packet = *packet; |
777 | *wire = be32_to_cpu(response.intval); |
778 | } |
779 | |
780 | static void dlm_query_join_wire_to_packet(u32 wire, |
781 | struct dlm_query_join_packet *packet) |
782 | { |
783 | union dlm_query_join_response response; |
784 | |
785 | response.intval = cpu_to_be32(wire); |
786 | *packet = response.packet; |
787 | } |
788 | |
789 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
790 | void **ret_data) |
791 | { |
792 | struct dlm_query_join_request *query; |
793 | struct dlm_query_join_packet packet = { |
794 | .code = JOIN_DISALLOW, |
795 | }; |
796 | struct dlm_ctxt *dlm = NULL; |
797 | u32 response; |
798 | u8 nodenum; |
799 | |
800 | query = (struct dlm_query_join_request *) msg->buf; |
801 | |
802 | mlog(0, "node %u wants to join domain %s\n" , query->node_idx, |
803 | query->domain); |
804 | |
805 | /* |
806 | * If heartbeat doesn't consider the node live, tell it |
807 | * to back off and try again. This gives heartbeat a chance |
808 | * to catch up. |
809 | */ |
810 | if (!o2hb_check_node_heartbeating_no_sem(node_num: query->node_idx)) { |
811 | mlog(0, "node %u is not in our live map yet\n" , |
812 | query->node_idx); |
813 | |
814 | packet.code = JOIN_DISALLOW; |
815 | goto respond; |
816 | } |
817 | |
818 | packet.code = JOIN_OK_NO_MAP; |
819 | |
820 | spin_lock(lock: &dlm_domain_lock); |
821 | dlm = __dlm_lookup_domain_full(domain: query->domain, len: query->name_len); |
822 | if (!dlm) |
823 | goto unlock_respond; |
824 | |
825 | /* |
826 | * There is a small window where the joining node may not see the |
827 | * node(s) that just left but still part of the cluster. DISALLOW |
828 | * join request if joining node has different node map. |
829 | */ |
830 | nodenum=0; |
831 | while (nodenum < O2NM_MAX_NODES) { |
832 | if (test_bit(nodenum, dlm->domain_map)) { |
833 | if (!byte_test_bit(nr: nodenum, map: query->node_map)) { |
834 | mlog(0, "disallow join as node %u does not " |
835 | "have node %u in its nodemap\n" , |
836 | query->node_idx, nodenum); |
837 | packet.code = JOIN_DISALLOW; |
838 | goto unlock_respond; |
839 | } |
840 | } |
841 | nodenum++; |
842 | } |
843 | |
844 | /* Once the dlm ctxt is marked as leaving then we don't want |
845 | * to be put in someone's domain map. |
846 | * Also, explicitly disallow joining at certain troublesome |
847 | * times (ie. during recovery). */ |
848 | if (dlm->dlm_state != DLM_CTXT_LEAVING) { |
849 | int bit = query->node_idx; |
850 | spin_lock(lock: &dlm->spinlock); |
851 | |
852 | if (dlm->dlm_state == DLM_CTXT_NEW && |
853 | dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN) { |
854 | /*If this is a brand new context and we |
855 | * haven't started our join process yet, then |
856 | * the other node won the race. */ |
857 | packet.code = JOIN_OK_NO_MAP; |
858 | } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { |
859 | /* Disallow parallel joins. */ |
860 | packet.code = JOIN_DISALLOW; |
861 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { |
862 | mlog(0, "node %u trying to join, but recovery " |
863 | "is ongoing.\n" , bit); |
864 | packet.code = JOIN_DISALLOW; |
865 | } else if (test_bit(bit, dlm->recovery_map)) { |
866 | mlog(0, "node %u trying to join, but it " |
867 | "still needs recovery.\n" , bit); |
868 | packet.code = JOIN_DISALLOW; |
869 | } else if (test_bit(bit, dlm->domain_map)) { |
870 | mlog(0, "node %u trying to join, but it " |
871 | "is still in the domain! needs recovery?\n" , |
872 | bit); |
873 | packet.code = JOIN_DISALLOW; |
874 | } else { |
875 | /* Alright we're fully a part of this domain |
876 | * so we keep some state as to who's joining |
877 | * and indicate to him that needs to be fixed |
878 | * up. */ |
879 | |
880 | /* Make sure we speak compatible locking protocols. */ |
881 | if (dlm_query_join_proto_check(proto_type: "DLM" , node: bit, |
882 | ours: &dlm->dlm_locking_proto, |
883 | request: &query->dlm_proto)) { |
884 | packet.code = JOIN_PROTOCOL_MISMATCH; |
885 | } else if (dlm_query_join_proto_check(proto_type: "fs" , node: bit, |
886 | ours: &dlm->fs_locking_proto, |
887 | request: &query->fs_proto)) { |
888 | packet.code = JOIN_PROTOCOL_MISMATCH; |
889 | } else { |
890 | packet.dlm_minor = query->dlm_proto.pv_minor; |
891 | packet.fs_minor = query->fs_proto.pv_minor; |
892 | packet.code = JOIN_OK; |
893 | __dlm_set_joining_node(dlm, node: query->node_idx); |
894 | } |
895 | } |
896 | |
897 | spin_unlock(lock: &dlm->spinlock); |
898 | } |
899 | unlock_respond: |
900 | spin_unlock(lock: &dlm_domain_lock); |
901 | |
902 | respond: |
903 | mlog(0, "We respond with %u\n" , packet.code); |
904 | |
905 | dlm_query_join_packet_to_wire(packet: &packet, wire: &response); |
906 | return response; |
907 | } |
908 | |
909 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
910 | void **ret_data) |
911 | { |
912 | struct dlm_assert_joined *assert; |
913 | struct dlm_ctxt *dlm = NULL; |
914 | |
915 | assert = (struct dlm_assert_joined *) msg->buf; |
916 | |
917 | mlog(0, "node %u asserts join on domain %s\n" , assert->node_idx, |
918 | assert->domain); |
919 | |
920 | spin_lock(lock: &dlm_domain_lock); |
921 | dlm = __dlm_lookup_domain_full(domain: assert->domain, len: assert->name_len); |
922 | /* XXX should we consider no dlm ctxt an error? */ |
923 | if (dlm) { |
924 | spin_lock(lock: &dlm->spinlock); |
925 | |
926 | /* Alright, this node has officially joined our |
927 | * domain. Set him in the map and clean up our |
928 | * leftover join state. */ |
929 | BUG_ON(dlm->joining_node != assert->node_idx); |
930 | |
931 | if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { |
932 | mlog(0, "dlm recovery is ongoing, disallow join\n" ); |
933 | spin_unlock(lock: &dlm->spinlock); |
934 | spin_unlock(lock: &dlm_domain_lock); |
935 | return -EAGAIN; |
936 | } |
937 | |
938 | set_bit(nr: assert->node_idx, addr: dlm->domain_map); |
939 | clear_bit(nr: assert->node_idx, addr: dlm->exit_domain_map); |
940 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
941 | |
942 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s " , |
943 | assert->node_idx, dlm->name); |
944 | __dlm_print_nodes(dlm); |
945 | |
946 | /* notify anything attached to the heartbeat events */ |
947 | dlm_hb_event_notify_attached(dlm, idx: assert->node_idx, node_up: 1); |
948 | |
949 | spin_unlock(lock: &dlm->spinlock); |
950 | } |
951 | spin_unlock(lock: &dlm_domain_lock); |
952 | |
953 | return 0; |
954 | } |
955 | |
956 | static int dlm_match_regions(struct dlm_ctxt *dlm, |
957 | struct dlm_query_region *qr, |
958 | char *local, int locallen) |
959 | { |
960 | char *remote = qr->qr_regions; |
961 | char *l, *r; |
962 | int localnr, i, j, foundit; |
963 | int status = 0; |
964 | |
965 | if (!o2hb_global_heartbeat_active()) { |
966 | if (qr->qr_numregions) { |
967 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " |
968 | "heartbeat enabled but local node %d does not\n" , |
969 | qr->qr_domain, qr->qr_node, dlm->node_num); |
970 | status = -EINVAL; |
971 | } |
972 | goto bail; |
973 | } |
974 | |
975 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { |
976 | mlog(ML_ERROR, "Domain %s: Local node %d has global " |
977 | "heartbeat enabled but joining node %d does not\n" , |
978 | qr->qr_domain, dlm->node_num, qr->qr_node); |
979 | status = -EINVAL; |
980 | goto bail; |
981 | } |
982 | |
983 | r = remote; |
984 | for (i = 0; i < qr->qr_numregions; ++i) { |
985 | mlog(0, "Region %.*s\n" , O2HB_MAX_REGION_NAME_LEN, r); |
986 | r += O2HB_MAX_REGION_NAME_LEN; |
987 | } |
988 | |
989 | localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN); |
990 | localnr = o2hb_get_all_regions(region_uuids: local, numregions: (u8)localnr); |
991 | |
992 | /* compare local regions with remote */ |
993 | l = local; |
994 | for (i = 0; i < localnr; ++i) { |
995 | foundit = 0; |
996 | r = remote; |
997 | for (j = 0; j <= qr->qr_numregions; ++j) { |
998 | if (!memcmp(p: l, q: r, O2HB_MAX_REGION_NAME_LEN)) { |
999 | foundit = 1; |
1000 | break; |
1001 | } |
1002 | r += O2HB_MAX_REGION_NAME_LEN; |
1003 | } |
1004 | if (!foundit) { |
1005 | status = -EINVAL; |
1006 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " |
1007 | "in local node %d but not in joining node %d\n" , |
1008 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, |
1009 | dlm->node_num, qr->qr_node); |
1010 | goto bail; |
1011 | } |
1012 | l += O2HB_MAX_REGION_NAME_LEN; |
1013 | } |
1014 | |
1015 | /* compare remote with local regions */ |
1016 | r = remote; |
1017 | for (i = 0; i < qr->qr_numregions; ++i) { |
1018 | foundit = 0; |
1019 | l = local; |
1020 | for (j = 0; j < localnr; ++j) { |
1021 | if (!memcmp(p: r, q: l, O2HB_MAX_REGION_NAME_LEN)) { |
1022 | foundit = 1; |
1023 | break; |
1024 | } |
1025 | l += O2HB_MAX_REGION_NAME_LEN; |
1026 | } |
1027 | if (!foundit) { |
1028 | status = -EINVAL; |
1029 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " |
1030 | "in joining node %d but not in local node %d\n" , |
1031 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, |
1032 | qr->qr_node, dlm->node_num); |
1033 | goto bail; |
1034 | } |
1035 | r += O2HB_MAX_REGION_NAME_LEN; |
1036 | } |
1037 | |
1038 | bail: |
1039 | return status; |
1040 | } |
1041 | |
1042 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) |
1043 | { |
1044 | struct dlm_query_region *qr = NULL; |
1045 | int status, ret = 0, i; |
1046 | char *p; |
1047 | |
1048 | if (find_first_bit(addr: node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) |
1049 | goto bail; |
1050 | |
1051 | qr = kzalloc(size: sizeof(struct dlm_query_region), GFP_KERNEL); |
1052 | if (!qr) { |
1053 | ret = -ENOMEM; |
1054 | mlog_errno(ret); |
1055 | goto bail; |
1056 | } |
1057 | |
1058 | qr->qr_node = dlm->node_num; |
1059 | qr->qr_namelen = strlen(dlm->name); |
1060 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); |
1061 | /* if local hb, the numregions will be zero */ |
1062 | if (o2hb_global_heartbeat_active()) |
1063 | qr->qr_numregions = o2hb_get_all_regions(region_uuids: qr->qr_regions, |
1064 | O2NM_MAX_REGIONS); |
1065 | |
1066 | p = qr->qr_regions; |
1067 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) |
1068 | mlog(0, "Region %.*s\n" , O2HB_MAX_REGION_NAME_LEN, p); |
1069 | |
1070 | i = -1; |
1071 | while ((i = find_next_bit(addr: node_map, O2NM_MAX_NODES, |
1072 | offset: i + 1)) < O2NM_MAX_NODES) { |
1073 | if (i == dlm->node_num) |
1074 | continue; |
1075 | |
1076 | mlog(0, "Sending regions to node %d\n" , i); |
1077 | |
1078 | ret = o2net_send_message(msg_type: DLM_QUERY_REGION, DLM_MOD_KEY, data: qr, |
1079 | len: sizeof(struct dlm_query_region), |
1080 | target_node: i, status: &status); |
1081 | if (ret >= 0) |
1082 | ret = status; |
1083 | if (ret) { |
1084 | mlog(ML_ERROR, "Region mismatch %d, node %d\n" , |
1085 | ret, i); |
1086 | break; |
1087 | } |
1088 | } |
1089 | |
1090 | bail: |
1091 | kfree(objp: qr); |
1092 | return ret; |
1093 | } |
1094 | |
1095 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, |
1096 | void *data, void **ret_data) |
1097 | { |
1098 | struct dlm_query_region *qr; |
1099 | struct dlm_ctxt *dlm = NULL; |
1100 | char *local = NULL; |
1101 | int status = 0; |
1102 | |
1103 | qr = (struct dlm_query_region *) msg->buf; |
1104 | |
1105 | mlog(0, "Node %u queries hb regions on domain %s\n" , qr->qr_node, |
1106 | qr->qr_domain); |
1107 | |
1108 | /* buffer used in dlm_mast_regions() */ |
1109 | local = kmalloc(size: sizeof(qr->qr_regions), GFP_KERNEL); |
1110 | if (!local) |
1111 | return -ENOMEM; |
1112 | |
1113 | status = -EINVAL; |
1114 | |
1115 | spin_lock(lock: &dlm_domain_lock); |
1116 | dlm = __dlm_lookup_domain_full(domain: qr->qr_domain, len: qr->qr_namelen); |
1117 | if (!dlm) { |
1118 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
1119 | "before join domain\n" , qr->qr_node, qr->qr_domain); |
1120 | goto out_domain_lock; |
1121 | } |
1122 | |
1123 | spin_lock(lock: &dlm->spinlock); |
1124 | if (dlm->joining_node != qr->qr_node) { |
1125 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
1126 | "but joining node is %d\n" , qr->qr_node, qr->qr_domain, |
1127 | dlm->joining_node); |
1128 | goto out_dlm_lock; |
1129 | } |
1130 | |
1131 | /* Support for global heartbeat was added in 1.1 */ |
1132 | if (dlm->dlm_locking_proto.pv_major == 1 && |
1133 | dlm->dlm_locking_proto.pv_minor == 0) { |
1134 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
1135 | "but active dlm protocol is %d.%d\n" , qr->qr_node, |
1136 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, |
1137 | dlm->dlm_locking_proto.pv_minor); |
1138 | goto out_dlm_lock; |
1139 | } |
1140 | |
1141 | status = dlm_match_regions(dlm, qr, local, locallen: sizeof(qr->qr_regions)); |
1142 | |
1143 | out_dlm_lock: |
1144 | spin_unlock(lock: &dlm->spinlock); |
1145 | |
1146 | out_domain_lock: |
1147 | spin_unlock(lock: &dlm_domain_lock); |
1148 | |
1149 | kfree(objp: local); |
1150 | |
1151 | return status; |
1152 | } |
1153 | |
1154 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) |
1155 | { |
1156 | struct o2nm_node *local; |
1157 | struct dlm_node_info *remote; |
1158 | int i, j; |
1159 | int status = 0; |
1160 | |
1161 | for (j = 0; j < qn->qn_numnodes; ++j) |
1162 | mlog(0, "Node %3d, %pI4:%u\n" , qn->qn_nodes[j].ni_nodenum, |
1163 | &(qn->qn_nodes[j].ni_ipv4_address), |
1164 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); |
1165 | |
1166 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { |
1167 | local = o2nm_get_node_by_num(node_num: i); |
1168 | remote = NULL; |
1169 | for (j = 0; j < qn->qn_numnodes; ++j) { |
1170 | if (qn->qn_nodes[j].ni_nodenum == i) { |
1171 | remote = &(qn->qn_nodes[j]); |
1172 | break; |
1173 | } |
1174 | } |
1175 | |
1176 | if (!local && !remote) |
1177 | continue; |
1178 | |
1179 | if ((local && !remote) || (!local && remote)) |
1180 | status = -EINVAL; |
1181 | |
1182 | if (!status && |
1183 | ((remote->ni_nodenum != local->nd_num) || |
1184 | (remote->ni_ipv4_port != local->nd_ipv4_port) || |
1185 | (remote->ni_ipv4_address != local->nd_ipv4_address))) |
1186 | status = -EINVAL; |
1187 | |
1188 | if (status) { |
1189 | if (remote && !local) |
1190 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " |
1191 | "registered in joining node %d but not in " |
1192 | "local node %d\n" , qn->qn_domain, |
1193 | remote->ni_nodenum, |
1194 | &(remote->ni_ipv4_address), |
1195 | ntohs(remote->ni_ipv4_port), |
1196 | qn->qn_nodenum, dlm->node_num); |
1197 | if (local && !remote) |
1198 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " |
1199 | "registered in local node %d but not in " |
1200 | "joining node %d\n" , qn->qn_domain, |
1201 | local->nd_num, &(local->nd_ipv4_address), |
1202 | ntohs(local->nd_ipv4_port), |
1203 | dlm->node_num, qn->qn_nodenum); |
1204 | BUG_ON((!local && !remote)); |
1205 | } |
1206 | |
1207 | if (local) |
1208 | o2nm_node_put(node: local); |
1209 | } |
1210 | |
1211 | return status; |
1212 | } |
1213 | |
1214 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) |
1215 | { |
1216 | struct dlm_query_nodeinfo *qn = NULL; |
1217 | struct o2nm_node *node; |
1218 | int ret = 0, status, count, i; |
1219 | |
1220 | if (find_first_bit(addr: node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) |
1221 | goto bail; |
1222 | |
1223 | qn = kzalloc(size: sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); |
1224 | if (!qn) { |
1225 | ret = -ENOMEM; |
1226 | mlog_errno(ret); |
1227 | goto bail; |
1228 | } |
1229 | |
1230 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { |
1231 | node = o2nm_get_node_by_num(node_num: i); |
1232 | if (!node) |
1233 | continue; |
1234 | qn->qn_nodes[count].ni_nodenum = node->nd_num; |
1235 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; |
1236 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; |
1237 | mlog(0, "Node %3d, %pI4:%u\n" , node->nd_num, |
1238 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); |
1239 | ++count; |
1240 | o2nm_node_put(node); |
1241 | } |
1242 | |
1243 | qn->qn_nodenum = dlm->node_num; |
1244 | qn->qn_numnodes = count; |
1245 | qn->qn_namelen = strlen(dlm->name); |
1246 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); |
1247 | |
1248 | i = -1; |
1249 | while ((i = find_next_bit(addr: node_map, O2NM_MAX_NODES, |
1250 | offset: i + 1)) < O2NM_MAX_NODES) { |
1251 | if (i == dlm->node_num) |
1252 | continue; |
1253 | |
1254 | mlog(0, "Sending nodeinfo to node %d\n" , i); |
1255 | |
1256 | ret = o2net_send_message(msg_type: DLM_QUERY_NODEINFO, DLM_MOD_KEY, |
1257 | data: qn, len: sizeof(struct dlm_query_nodeinfo), |
1258 | target_node: i, status: &status); |
1259 | if (ret >= 0) |
1260 | ret = status; |
1261 | if (ret) { |
1262 | mlog(ML_ERROR, "node mismatch %d, node %d\n" , ret, i); |
1263 | break; |
1264 | } |
1265 | } |
1266 | |
1267 | bail: |
1268 | kfree(objp: qn); |
1269 | return ret; |
1270 | } |
1271 | |
1272 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, |
1273 | void *data, void **ret_data) |
1274 | { |
1275 | struct dlm_query_nodeinfo *qn; |
1276 | struct dlm_ctxt *dlm = NULL; |
1277 | int locked = 0, status = -EINVAL; |
1278 | |
1279 | qn = (struct dlm_query_nodeinfo *) msg->buf; |
1280 | |
1281 | mlog(0, "Node %u queries nodes on domain %s\n" , qn->qn_nodenum, |
1282 | qn->qn_domain); |
1283 | |
1284 | spin_lock(lock: &dlm_domain_lock); |
1285 | dlm = __dlm_lookup_domain_full(domain: qn->qn_domain, len: qn->qn_namelen); |
1286 | if (!dlm) { |
1287 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " |
1288 | "join domain\n" , qn->qn_nodenum, qn->qn_domain); |
1289 | goto bail; |
1290 | } |
1291 | |
1292 | spin_lock(lock: &dlm->spinlock); |
1293 | locked = 1; |
1294 | if (dlm->joining_node != qn->qn_nodenum) { |
1295 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " |
1296 | "joining node is %d\n" , qn->qn_nodenum, qn->qn_domain, |
1297 | dlm->joining_node); |
1298 | goto bail; |
1299 | } |
1300 | |
1301 | /* Support for node query was added in 1.1 */ |
1302 | if (dlm->dlm_locking_proto.pv_major == 1 && |
1303 | dlm->dlm_locking_proto.pv_minor == 0) { |
1304 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " |
1305 | "but active dlm protocol is %d.%d\n" , qn->qn_nodenum, |
1306 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, |
1307 | dlm->dlm_locking_proto.pv_minor); |
1308 | goto bail; |
1309 | } |
1310 | |
1311 | status = dlm_match_nodes(dlm, qn); |
1312 | |
1313 | bail: |
1314 | if (locked) |
1315 | spin_unlock(lock: &dlm->spinlock); |
1316 | spin_unlock(lock: &dlm_domain_lock); |
1317 | |
1318 | return status; |
1319 | } |
1320 | |
1321 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
1322 | void **ret_data) |
1323 | { |
1324 | struct dlm_cancel_join *cancel; |
1325 | struct dlm_ctxt *dlm = NULL; |
1326 | |
1327 | cancel = (struct dlm_cancel_join *) msg->buf; |
1328 | |
1329 | mlog(0, "node %u cancels join on domain %s\n" , cancel->node_idx, |
1330 | cancel->domain); |
1331 | |
1332 | spin_lock(lock: &dlm_domain_lock); |
1333 | dlm = __dlm_lookup_domain_full(domain: cancel->domain, len: cancel->name_len); |
1334 | |
1335 | if (dlm) { |
1336 | spin_lock(lock: &dlm->spinlock); |
1337 | |
1338 | /* Yikes, this guy wants to cancel his join. No |
1339 | * problem, we simply cleanup our join state. */ |
1340 | BUG_ON(dlm->joining_node != cancel->node_idx); |
1341 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
1342 | |
1343 | spin_unlock(lock: &dlm->spinlock); |
1344 | } |
1345 | spin_unlock(lock: &dlm_domain_lock); |
1346 | |
1347 | return 0; |
1348 | } |
1349 | |
1350 | static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm, |
1351 | unsigned int node) |
1352 | { |
1353 | int status; |
1354 | struct dlm_cancel_join cancel_msg; |
1355 | |
1356 | memset(&cancel_msg, 0, sizeof(cancel_msg)); |
1357 | cancel_msg.node_idx = dlm->node_num; |
1358 | cancel_msg.name_len = strlen(dlm->name); |
1359 | memcpy(cancel_msg.domain, dlm->name, cancel_msg.name_len); |
1360 | |
1361 | status = o2net_send_message(msg_type: DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, |
1362 | data: &cancel_msg, len: sizeof(cancel_msg), target_node: node, |
1363 | NULL); |
1364 | if (status < 0) { |
1365 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " |
1366 | "node %u\n" , status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, |
1367 | node); |
1368 | goto bail; |
1369 | } |
1370 | |
1371 | bail: |
1372 | return status; |
1373 | } |
1374 | |
1375 | /* map_size should be in bytes. */ |
1376 | static int dlm_send_join_cancels(struct dlm_ctxt *dlm, |
1377 | unsigned long *node_map, |
1378 | unsigned int map_size) |
1379 | { |
1380 | int status, tmpstat; |
1381 | int node; |
1382 | |
1383 | if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * |
1384 | sizeof(unsigned long))) { |
1385 | mlog(ML_ERROR, |
1386 | "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n" , |
1387 | map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES)); |
1388 | return -EINVAL; |
1389 | } |
1390 | |
1391 | status = 0; |
1392 | node = -1; |
1393 | while ((node = find_next_bit(addr: node_map, O2NM_MAX_NODES, |
1394 | offset: node + 1)) < O2NM_MAX_NODES) { |
1395 | if (node == dlm->node_num) |
1396 | continue; |
1397 | |
1398 | tmpstat = dlm_send_one_join_cancel(dlm, node); |
1399 | if (tmpstat) { |
1400 | mlog(ML_ERROR, "Error return %d cancelling join on " |
1401 | "node %d\n" , tmpstat, node); |
1402 | if (!status) |
1403 | status = tmpstat; |
1404 | } |
1405 | } |
1406 | |
1407 | if (status) |
1408 | mlog_errno(status); |
1409 | return status; |
1410 | } |
1411 | |
1412 | static int dlm_request_join(struct dlm_ctxt *dlm, |
1413 | int node, |
1414 | enum dlm_query_join_response_code *response) |
1415 | { |
1416 | int status; |
1417 | struct dlm_query_join_request join_msg; |
1418 | struct dlm_query_join_packet packet; |
1419 | u32 join_resp; |
1420 | |
1421 | mlog(0, "querying node %d\n" , node); |
1422 | |
1423 | memset(&join_msg, 0, sizeof(join_msg)); |
1424 | join_msg.node_idx = dlm->node_num; |
1425 | join_msg.name_len = strlen(dlm->name); |
1426 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); |
1427 | join_msg.dlm_proto = dlm->dlm_locking_proto; |
1428 | join_msg.fs_proto = dlm->fs_locking_proto; |
1429 | |
1430 | /* copy live node map to join message */ |
1431 | byte_copymap(dmap: join_msg.node_map, smap: dlm->live_nodes_map, O2NM_MAX_NODES); |
1432 | |
1433 | status = o2net_send_message(msg_type: DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, data: &join_msg, |
1434 | len: sizeof(join_msg), target_node: node, status: &join_resp); |
1435 | if (status < 0 && status != -ENOPROTOOPT) { |
1436 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " |
1437 | "node %u\n" , status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, |
1438 | node); |
1439 | goto bail; |
1440 | } |
1441 | dlm_query_join_wire_to_packet(wire: join_resp, packet: &packet); |
1442 | |
1443 | /* -ENOPROTOOPT from the net code means the other side isn't |
1444 | listening for our message type -- that's fine, it means |
1445 | his dlm isn't up, so we can consider him a 'yes' but not |
1446 | joined into the domain. */ |
1447 | if (status == -ENOPROTOOPT) { |
1448 | status = 0; |
1449 | *response = JOIN_OK_NO_MAP; |
1450 | } else { |
1451 | *response = packet.code; |
1452 | switch (packet.code) { |
1453 | case JOIN_DISALLOW: |
1454 | case JOIN_OK_NO_MAP: |
1455 | break; |
1456 | case JOIN_PROTOCOL_MISMATCH: |
1457 | mlog(ML_NOTICE, |
1458 | "This node requested DLM locking protocol %u.%u and " |
1459 | "filesystem locking protocol %u.%u. At least one of " |
1460 | "the protocol versions on node %d is not compatible, " |
1461 | "disconnecting\n" , |
1462 | dlm->dlm_locking_proto.pv_major, |
1463 | dlm->dlm_locking_proto.pv_minor, |
1464 | dlm->fs_locking_proto.pv_major, |
1465 | dlm->fs_locking_proto.pv_minor, |
1466 | node); |
1467 | status = -EPROTO; |
1468 | break; |
1469 | case JOIN_OK: |
1470 | /* Use the same locking protocol as the remote node */ |
1471 | dlm->dlm_locking_proto.pv_minor = packet.dlm_minor; |
1472 | dlm->fs_locking_proto.pv_minor = packet.fs_minor; |
1473 | mlog(0, |
1474 | "Node %d responds JOIN_OK with DLM locking protocol " |
1475 | "%u.%u and fs locking protocol %u.%u\n" , |
1476 | node, |
1477 | dlm->dlm_locking_proto.pv_major, |
1478 | dlm->dlm_locking_proto.pv_minor, |
1479 | dlm->fs_locking_proto.pv_major, |
1480 | dlm->fs_locking_proto.pv_minor); |
1481 | break; |
1482 | default: |
1483 | status = -EINVAL; |
1484 | mlog(ML_ERROR, "invalid response %d from node %u\n" , |
1485 | packet.code, node); |
1486 | /* Reset response to JOIN_DISALLOW */ |
1487 | *response = JOIN_DISALLOW; |
1488 | break; |
1489 | } |
1490 | } |
1491 | |
1492 | mlog(0, "status %d, node %d response is %d\n" , status, node, |
1493 | *response); |
1494 | |
1495 | bail: |
1496 | return status; |
1497 | } |
1498 | |
1499 | static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, |
1500 | unsigned int node) |
1501 | { |
1502 | int status; |
1503 | int ret; |
1504 | struct dlm_assert_joined assert_msg; |
1505 | |
1506 | mlog(0, "Sending join assert to node %u\n" , node); |
1507 | |
1508 | memset(&assert_msg, 0, sizeof(assert_msg)); |
1509 | assert_msg.node_idx = dlm->node_num; |
1510 | assert_msg.name_len = strlen(dlm->name); |
1511 | memcpy(assert_msg.domain, dlm->name, assert_msg.name_len); |
1512 | |
1513 | status = o2net_send_message(msg_type: DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, |
1514 | data: &assert_msg, len: sizeof(assert_msg), target_node: node, |
1515 | status: &ret); |
1516 | if (status < 0) |
1517 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " |
1518 | "node %u\n" , status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, |
1519 | node); |
1520 | else |
1521 | status = ret; |
1522 | |
1523 | return status; |
1524 | } |
1525 | |
1526 | static void dlm_send_join_asserts(struct dlm_ctxt *dlm, |
1527 | unsigned long *node_map) |
1528 | { |
1529 | int status, node, live; |
1530 | |
1531 | status = 0; |
1532 | node = -1; |
1533 | while ((node = find_next_bit(addr: node_map, O2NM_MAX_NODES, |
1534 | offset: node + 1)) < O2NM_MAX_NODES) { |
1535 | if (node == dlm->node_num) |
1536 | continue; |
1537 | |
1538 | do { |
1539 | /* It is very important that this message be |
1540 | * received so we spin until either the node |
1541 | * has died or it gets the message. */ |
1542 | status = dlm_send_one_join_assert(dlm, node); |
1543 | |
1544 | spin_lock(lock: &dlm->spinlock); |
1545 | live = test_bit(node, dlm->live_nodes_map); |
1546 | spin_unlock(lock: &dlm->spinlock); |
1547 | |
1548 | if (status) { |
1549 | mlog(ML_ERROR, "Error return %d asserting " |
1550 | "join on node %d\n" , status, node); |
1551 | |
1552 | /* give us some time between errors... */ |
1553 | if (live) |
1554 | msleep(DLM_DOMAIN_BACKOFF_MS); |
1555 | } |
1556 | } while (status && live); |
1557 | } |
1558 | } |
1559 | |
1560 | struct domain_join_ctxt { |
1561 | unsigned long live_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
1562 | unsigned long yes_resp_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
1563 | }; |
1564 | |
1565 | static int dlm_should_restart_join(struct dlm_ctxt *dlm, |
1566 | struct domain_join_ctxt *ctxt, |
1567 | enum dlm_query_join_response_code response) |
1568 | { |
1569 | int ret; |
1570 | |
1571 | if (response == JOIN_DISALLOW) { |
1572 | mlog(0, "Latest response of disallow -- should restart\n" ); |
1573 | return 1; |
1574 | } |
1575 | |
1576 | spin_lock(lock: &dlm->spinlock); |
1577 | /* For now, we restart the process if the node maps have |
1578 | * changed at all */ |
1579 | ret = !bitmap_equal(src1: ctxt->live_map, src2: dlm->live_nodes_map, |
1580 | O2NM_MAX_NODES); |
1581 | spin_unlock(lock: &dlm->spinlock); |
1582 | |
1583 | if (ret) |
1584 | mlog(0, "Node maps changed -- should restart\n" ); |
1585 | |
1586 | return ret; |
1587 | } |
1588 | |
1589 | static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) |
1590 | { |
1591 | int status = 0, tmpstat, node; |
1592 | struct domain_join_ctxt *ctxt; |
1593 | enum dlm_query_join_response_code response = JOIN_DISALLOW; |
1594 | |
1595 | mlog(0, "%p" , dlm); |
1596 | |
1597 | ctxt = kzalloc(size: sizeof(*ctxt), GFP_KERNEL); |
1598 | if (!ctxt) { |
1599 | status = -ENOMEM; |
1600 | mlog_errno(status); |
1601 | goto bail; |
1602 | } |
1603 | |
1604 | /* group sem locking should work for us here -- we're already |
1605 | * registered for heartbeat events so filling this should be |
1606 | * atomic wrt getting those handlers called. */ |
1607 | o2hb_fill_node_map(map: dlm->live_nodes_map, O2NM_MAX_NODES); |
1608 | |
1609 | spin_lock(lock: &dlm->spinlock); |
1610 | bitmap_copy(dst: ctxt->live_map, src: dlm->live_nodes_map, O2NM_MAX_NODES); |
1611 | __dlm_set_joining_node(dlm, node: dlm->node_num); |
1612 | spin_unlock(lock: &dlm->spinlock); |
1613 | |
1614 | node = -1; |
1615 | while ((node = find_next_bit(addr: ctxt->live_map, O2NM_MAX_NODES, |
1616 | offset: node + 1)) < O2NM_MAX_NODES) { |
1617 | if (node == dlm->node_num) |
1618 | continue; |
1619 | |
1620 | status = dlm_request_join(dlm, node, response: &response); |
1621 | if (status < 0) { |
1622 | mlog_errno(status); |
1623 | goto bail; |
1624 | } |
1625 | |
1626 | /* Ok, either we got a response or the node doesn't have a |
1627 | * dlm up. */ |
1628 | if (response == JOIN_OK) |
1629 | set_bit(nr: node, addr: ctxt->yes_resp_map); |
1630 | |
1631 | if (dlm_should_restart_join(dlm, ctxt, response)) { |
1632 | status = -EAGAIN; |
1633 | goto bail; |
1634 | } |
1635 | } |
1636 | |
1637 | mlog(0, "Yay, done querying nodes!\n" ); |
1638 | |
1639 | /* Yay, everyone agree's we can join the domain. My domain is |
1640 | * comprised of all nodes who were put in the |
1641 | * yes_resp_map. Copy that into our domain map and send a join |
1642 | * assert message to clean up everyone elses state. */ |
1643 | spin_lock(lock: &dlm->spinlock); |
1644 | bitmap_copy(dst: dlm->domain_map, src: ctxt->yes_resp_map, O2NM_MAX_NODES); |
1645 | set_bit(nr: dlm->node_num, addr: dlm->domain_map); |
1646 | spin_unlock(lock: &dlm->spinlock); |
1647 | |
1648 | /* Support for global heartbeat and node info was added in 1.1 */ |
1649 | if (dlm->dlm_locking_proto.pv_major > 1 || |
1650 | dlm->dlm_locking_proto.pv_minor > 0) { |
1651 | status = dlm_send_nodeinfo(dlm, node_map: ctxt->yes_resp_map); |
1652 | if (status) { |
1653 | mlog_errno(status); |
1654 | goto bail; |
1655 | } |
1656 | status = dlm_send_regions(dlm, node_map: ctxt->yes_resp_map); |
1657 | if (status) { |
1658 | mlog_errno(status); |
1659 | goto bail; |
1660 | } |
1661 | } |
1662 | |
1663 | dlm_send_join_asserts(dlm, node_map: ctxt->yes_resp_map); |
1664 | |
1665 | /* Joined state *must* be set before the joining node |
1666 | * information, otherwise the query_join handler may read no |
1667 | * current joiner but a state of NEW and tell joining nodes |
1668 | * we're not in the domain. */ |
1669 | spin_lock(lock: &dlm_domain_lock); |
1670 | dlm->dlm_state = DLM_CTXT_JOINED; |
1671 | dlm->num_joins++; |
1672 | spin_unlock(lock: &dlm_domain_lock); |
1673 | |
1674 | bail: |
1675 | spin_lock(lock: &dlm->spinlock); |
1676 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
1677 | if (!status) { |
1678 | printk(KERN_NOTICE "o2dlm: Joining domain %s " , dlm->name); |
1679 | __dlm_print_nodes(dlm); |
1680 | } |
1681 | spin_unlock(lock: &dlm->spinlock); |
1682 | |
1683 | if (ctxt) { |
1684 | /* Do we need to send a cancel message to any nodes? */ |
1685 | if (status < 0) { |
1686 | tmpstat = dlm_send_join_cancels(dlm, |
1687 | node_map: ctxt->yes_resp_map, |
1688 | map_size: sizeof(ctxt->yes_resp_map)); |
1689 | if (tmpstat < 0) |
1690 | mlog_errno(tmpstat); |
1691 | } |
1692 | kfree(objp: ctxt); |
1693 | } |
1694 | |
1695 | mlog(0, "returning %d\n" , status); |
1696 | return status; |
1697 | } |
1698 | |
1699 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) |
1700 | { |
1701 | o2hb_unregister_callback(region_uuid: dlm->name, hc: &dlm->dlm_hb_up); |
1702 | o2hb_unregister_callback(region_uuid: dlm->name, hc: &dlm->dlm_hb_down); |
1703 | o2net_unregister_handler_list(list: &dlm->dlm_domain_handlers); |
1704 | } |
1705 | |
1706 | static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) |
1707 | { |
1708 | int status; |
1709 | |
1710 | mlog(0, "registering handlers.\n" ); |
1711 | |
1712 | o2hb_setup_callback(hc: &dlm->dlm_hb_down, type: O2HB_NODE_DOWN_CB, |
1713 | func: dlm_hb_node_down_cb, data: dlm, DLM_HB_NODE_DOWN_PRI); |
1714 | o2hb_setup_callback(hc: &dlm->dlm_hb_up, type: O2HB_NODE_UP_CB, |
1715 | func: dlm_hb_node_up_cb, data: dlm, DLM_HB_NODE_UP_PRI); |
1716 | |
1717 | status = o2hb_register_callback(region_uuid: dlm->name, hc: &dlm->dlm_hb_down); |
1718 | if (status) |
1719 | goto bail; |
1720 | |
1721 | status = o2hb_register_callback(region_uuid: dlm->name, hc: &dlm->dlm_hb_up); |
1722 | if (status) |
1723 | goto bail; |
1724 | |
1725 | status = o2net_register_handler(msg_type: DLM_MASTER_REQUEST_MSG, key: dlm->key, |
1726 | max_len: sizeof(struct dlm_master_request), |
1727 | func: dlm_master_request_handler, |
1728 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1729 | if (status) |
1730 | goto bail; |
1731 | |
1732 | status = o2net_register_handler(msg_type: DLM_ASSERT_MASTER_MSG, key: dlm->key, |
1733 | max_len: sizeof(struct dlm_assert_master), |
1734 | func: dlm_assert_master_handler, |
1735 | data: dlm, post_func: dlm_assert_master_post_handler, |
1736 | unreg_list: &dlm->dlm_domain_handlers); |
1737 | if (status) |
1738 | goto bail; |
1739 | |
1740 | status = o2net_register_handler(msg_type: DLM_CREATE_LOCK_MSG, key: dlm->key, |
1741 | max_len: sizeof(struct dlm_create_lock), |
1742 | func: dlm_create_lock_handler, |
1743 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1744 | if (status) |
1745 | goto bail; |
1746 | |
1747 | status = o2net_register_handler(msg_type: DLM_CONVERT_LOCK_MSG, key: dlm->key, |
1748 | DLM_CONVERT_LOCK_MAX_LEN, |
1749 | func: dlm_convert_lock_handler, |
1750 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1751 | if (status) |
1752 | goto bail; |
1753 | |
1754 | status = o2net_register_handler(msg_type: DLM_UNLOCK_LOCK_MSG, key: dlm->key, |
1755 | DLM_UNLOCK_LOCK_MAX_LEN, |
1756 | func: dlm_unlock_lock_handler, |
1757 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1758 | if (status) |
1759 | goto bail; |
1760 | |
1761 | status = o2net_register_handler(msg_type: DLM_PROXY_AST_MSG, key: dlm->key, |
1762 | DLM_PROXY_AST_MAX_LEN, |
1763 | func: dlm_proxy_ast_handler, |
1764 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1765 | if (status) |
1766 | goto bail; |
1767 | |
1768 | status = o2net_register_handler(msg_type: DLM_EXIT_DOMAIN_MSG, key: dlm->key, |
1769 | max_len: sizeof(struct dlm_exit_domain), |
1770 | func: dlm_exit_domain_handler, |
1771 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1772 | if (status) |
1773 | goto bail; |
1774 | |
1775 | status = o2net_register_handler(msg_type: DLM_DEREF_LOCKRES_MSG, key: dlm->key, |
1776 | max_len: sizeof(struct dlm_deref_lockres), |
1777 | func: dlm_deref_lockres_handler, |
1778 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1779 | if (status) |
1780 | goto bail; |
1781 | |
1782 | status = o2net_register_handler(msg_type: DLM_MIGRATE_REQUEST_MSG, key: dlm->key, |
1783 | max_len: sizeof(struct dlm_migrate_request), |
1784 | func: dlm_migrate_request_handler, |
1785 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1786 | if (status) |
1787 | goto bail; |
1788 | |
1789 | status = o2net_register_handler(msg_type: DLM_MIG_LOCKRES_MSG, key: dlm->key, |
1790 | DLM_MIG_LOCKRES_MAX_LEN, |
1791 | func: dlm_mig_lockres_handler, |
1792 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1793 | if (status) |
1794 | goto bail; |
1795 | |
1796 | status = o2net_register_handler(msg_type: DLM_MASTER_REQUERY_MSG, key: dlm->key, |
1797 | max_len: sizeof(struct dlm_master_requery), |
1798 | func: dlm_master_requery_handler, |
1799 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1800 | if (status) |
1801 | goto bail; |
1802 | |
1803 | status = o2net_register_handler(msg_type: DLM_LOCK_REQUEST_MSG, key: dlm->key, |
1804 | max_len: sizeof(struct dlm_lock_request), |
1805 | func: dlm_request_all_locks_handler, |
1806 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1807 | if (status) |
1808 | goto bail; |
1809 | |
1810 | status = o2net_register_handler(msg_type: DLM_RECO_DATA_DONE_MSG, key: dlm->key, |
1811 | max_len: sizeof(struct dlm_reco_data_done), |
1812 | func: dlm_reco_data_done_handler, |
1813 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1814 | if (status) |
1815 | goto bail; |
1816 | |
1817 | status = o2net_register_handler(msg_type: DLM_BEGIN_RECO_MSG, key: dlm->key, |
1818 | max_len: sizeof(struct dlm_begin_reco), |
1819 | func: dlm_begin_reco_handler, |
1820 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1821 | if (status) |
1822 | goto bail; |
1823 | |
1824 | status = o2net_register_handler(msg_type: DLM_FINALIZE_RECO_MSG, key: dlm->key, |
1825 | max_len: sizeof(struct dlm_finalize_reco), |
1826 | func: dlm_finalize_reco_handler, |
1827 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1828 | if (status) |
1829 | goto bail; |
1830 | |
1831 | status = o2net_register_handler(msg_type: DLM_BEGIN_EXIT_DOMAIN_MSG, key: dlm->key, |
1832 | max_len: sizeof(struct dlm_exit_domain), |
1833 | func: dlm_begin_exit_domain_handler, |
1834 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1835 | if (status) |
1836 | goto bail; |
1837 | |
1838 | status = o2net_register_handler(msg_type: DLM_DEREF_LOCKRES_DONE, key: dlm->key, |
1839 | max_len: sizeof(struct dlm_deref_lockres_done), |
1840 | func: dlm_deref_lockres_done_handler, |
1841 | data: dlm, NULL, unreg_list: &dlm->dlm_domain_handlers); |
1842 | bail: |
1843 | if (status) |
1844 | dlm_unregister_domain_handlers(dlm); |
1845 | |
1846 | return status; |
1847 | } |
1848 | |
1849 | static int dlm_join_domain(struct dlm_ctxt *dlm) |
1850 | { |
1851 | int status; |
1852 | unsigned int backoff; |
1853 | unsigned int total_backoff = 0; |
1854 | char wq_name[O2NM_MAX_NAME_LEN]; |
1855 | |
1856 | BUG_ON(!dlm); |
1857 | |
1858 | mlog(0, "Join domain %s\n" , dlm->name); |
1859 | |
1860 | status = dlm_register_domain_handlers(dlm); |
1861 | if (status) { |
1862 | mlog_errno(status); |
1863 | goto bail; |
1864 | } |
1865 | |
1866 | status = dlm_launch_thread(dlm); |
1867 | if (status < 0) { |
1868 | mlog_errno(status); |
1869 | goto bail; |
1870 | } |
1871 | |
1872 | status = dlm_launch_recovery_thread(dlm); |
1873 | if (status < 0) { |
1874 | mlog_errno(status); |
1875 | goto bail; |
1876 | } |
1877 | |
1878 | dlm_debug_init(dlm); |
1879 | |
1880 | snprintf(buf: wq_name, O2NM_MAX_NAME_LEN, fmt: "dlm_wq-%s" , dlm->name); |
1881 | dlm->dlm_worker = alloc_workqueue(fmt: wq_name, flags: WQ_MEM_RECLAIM, max_active: 0); |
1882 | if (!dlm->dlm_worker) { |
1883 | status = -ENOMEM; |
1884 | mlog_errno(status); |
1885 | goto bail; |
1886 | } |
1887 | |
1888 | do { |
1889 | status = dlm_try_to_join_domain(dlm); |
1890 | |
1891 | /* If we're racing another node to the join, then we |
1892 | * need to back off temporarily and let them |
1893 | * complete. */ |
1894 | #define DLM_JOIN_TIMEOUT_MSECS 90000 |
1895 | if (status == -EAGAIN) { |
1896 | if (signal_pending(current)) { |
1897 | status = -ERESTARTSYS; |
1898 | goto bail; |
1899 | } |
1900 | |
1901 | if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) { |
1902 | status = -ERESTARTSYS; |
1903 | mlog(ML_NOTICE, "Timed out joining dlm domain " |
1904 | "%s after %u msecs\n" , dlm->name, |
1905 | total_backoff); |
1906 | goto bail; |
1907 | } |
1908 | |
1909 | /* |
1910 | * <chip> After you! |
1911 | * <dale> No, after you! |
1912 | * <chip> I insist! |
1913 | * <dale> But you first! |
1914 | * ... |
1915 | */ |
1916 | backoff = (unsigned int)(jiffies & 0x3); |
1917 | backoff *= DLM_DOMAIN_BACKOFF_MS; |
1918 | total_backoff += backoff; |
1919 | mlog(0, "backoff %d\n" , backoff); |
1920 | msleep(msecs: backoff); |
1921 | } |
1922 | } while (status == -EAGAIN); |
1923 | |
1924 | if (status < 0) { |
1925 | mlog_errno(status); |
1926 | goto bail; |
1927 | } |
1928 | |
1929 | status = 0; |
1930 | bail: |
1931 | wake_up(&dlm_domain_events); |
1932 | |
1933 | if (status) { |
1934 | dlm_unregister_domain_handlers(dlm); |
1935 | dlm_complete_thread(dlm); |
1936 | dlm_complete_recovery_thread(dlm); |
1937 | dlm_destroy_dlm_worker(dlm); |
1938 | } |
1939 | |
1940 | return status; |
1941 | } |
1942 | |
1943 | static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, |
1944 | u32 key) |
1945 | { |
1946 | int i; |
1947 | int ret; |
1948 | struct dlm_ctxt *dlm = NULL; |
1949 | |
1950 | dlm = kzalloc(size: sizeof(*dlm), GFP_KERNEL); |
1951 | if (!dlm) { |
1952 | ret = -ENOMEM; |
1953 | mlog_errno(ret); |
1954 | goto leave; |
1955 | } |
1956 | |
1957 | dlm->name = kstrdup(s: domain, GFP_KERNEL); |
1958 | if (dlm->name == NULL) { |
1959 | ret = -ENOMEM; |
1960 | mlog_errno(ret); |
1961 | goto leave; |
1962 | } |
1963 | |
1964 | dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); |
1965 | if (!dlm->lockres_hash) { |
1966 | ret = -ENOMEM; |
1967 | mlog_errno(ret); |
1968 | goto leave; |
1969 | } |
1970 | |
1971 | for (i = 0; i < DLM_HASH_BUCKETS; i++) |
1972 | INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); |
1973 | |
1974 | dlm->master_hash = (struct hlist_head **) |
1975 | dlm_alloc_pagevec(DLM_HASH_PAGES); |
1976 | if (!dlm->master_hash) { |
1977 | ret = -ENOMEM; |
1978 | mlog_errno(ret); |
1979 | goto leave; |
1980 | } |
1981 | |
1982 | for (i = 0; i < DLM_HASH_BUCKETS; i++) |
1983 | INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); |
1984 | |
1985 | dlm->key = key; |
1986 | dlm->node_num = o2nm_this_node(); |
1987 | |
1988 | dlm_create_debugfs_subroot(dlm); |
1989 | |
1990 | spin_lock_init(&dlm->spinlock); |
1991 | spin_lock_init(&dlm->master_lock); |
1992 | spin_lock_init(&dlm->ast_lock); |
1993 | spin_lock_init(&dlm->track_lock); |
1994 | INIT_LIST_HEAD(list: &dlm->list); |
1995 | INIT_LIST_HEAD(list: &dlm->dirty_list); |
1996 | INIT_LIST_HEAD(list: &dlm->reco.resources); |
1997 | INIT_LIST_HEAD(list: &dlm->reco.node_data); |
1998 | INIT_LIST_HEAD(list: &dlm->purge_list); |
1999 | INIT_LIST_HEAD(list: &dlm->dlm_domain_handlers); |
2000 | INIT_LIST_HEAD(list: &dlm->tracking_list); |
2001 | dlm->reco.state = 0; |
2002 | |
2003 | INIT_LIST_HEAD(list: &dlm->pending_asts); |
2004 | INIT_LIST_HEAD(list: &dlm->pending_basts); |
2005 | |
2006 | mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n" , |
2007 | dlm->recovery_map, &(dlm->recovery_map[0])); |
2008 | |
2009 | bitmap_zero(dst: dlm->recovery_map, O2NM_MAX_NODES); |
2010 | bitmap_zero(dst: dlm->live_nodes_map, O2NM_MAX_NODES); |
2011 | bitmap_zero(dst: dlm->domain_map, O2NM_MAX_NODES); |
2012 | |
2013 | dlm->dlm_thread_task = NULL; |
2014 | dlm->dlm_reco_thread_task = NULL; |
2015 | dlm->dlm_worker = NULL; |
2016 | init_waitqueue_head(&dlm->dlm_thread_wq); |
2017 | init_waitqueue_head(&dlm->dlm_reco_thread_wq); |
2018 | init_waitqueue_head(&dlm->reco.event); |
2019 | init_waitqueue_head(&dlm->ast_wq); |
2020 | init_waitqueue_head(&dlm->migration_wq); |
2021 | INIT_LIST_HEAD(list: &dlm->mle_hb_events); |
2022 | |
2023 | dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; |
2024 | init_waitqueue_head(&dlm->dlm_join_events); |
2025 | |
2026 | dlm->migrate_done = 0; |
2027 | |
2028 | dlm->reco.new_master = O2NM_INVALID_NODE_NUM; |
2029 | dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; |
2030 | |
2031 | atomic_set(v: &dlm->res_tot_count, i: 0); |
2032 | atomic_set(v: &dlm->res_cur_count, i: 0); |
2033 | for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { |
2034 | atomic_set(v: &dlm->mle_tot_count[i], i: 0); |
2035 | atomic_set(v: &dlm->mle_cur_count[i], i: 0); |
2036 | } |
2037 | |
2038 | spin_lock_init(&dlm->work_lock); |
2039 | INIT_LIST_HEAD(list: &dlm->work_list); |
2040 | INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work); |
2041 | |
2042 | kref_init(kref: &dlm->dlm_refs); |
2043 | dlm->dlm_state = DLM_CTXT_NEW; |
2044 | |
2045 | INIT_LIST_HEAD(list: &dlm->dlm_eviction_callbacks); |
2046 | |
2047 | mlog(0, "context init: refcount %u\n" , |
2048 | kref_read(&dlm->dlm_refs)); |
2049 | |
2050 | ret = 0; |
2051 | leave: |
2052 | if (ret < 0 && dlm) { |
2053 | if (dlm->master_hash) |
2054 | dlm_free_pagevec(vec: (void **)dlm->master_hash, |
2055 | DLM_HASH_PAGES); |
2056 | |
2057 | if (dlm->lockres_hash) |
2058 | dlm_free_pagevec(vec: (void **)dlm->lockres_hash, |
2059 | DLM_HASH_PAGES); |
2060 | |
2061 | kfree(objp: dlm->name); |
2062 | kfree(objp: dlm); |
2063 | dlm = NULL; |
2064 | } |
2065 | return dlm; |
2066 | } |
2067 | |
2068 | /* |
2069 | * Compare a requested locking protocol version against the current one. |
2070 | * |
2071 | * If the major numbers are different, they are incompatible. |
2072 | * If the current minor is greater than the request, they are incompatible. |
2073 | * If the current minor is less than or equal to the request, they are |
2074 | * compatible, and the requester should run at the current minor version. |
2075 | */ |
2076 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
2077 | struct dlm_protocol_version *request) |
2078 | { |
2079 | if (existing->pv_major != request->pv_major) |
2080 | return 1; |
2081 | |
2082 | if (existing->pv_minor > request->pv_minor) |
2083 | return 1; |
2084 | |
2085 | if (existing->pv_minor < request->pv_minor) |
2086 | request->pv_minor = existing->pv_minor; |
2087 | |
2088 | return 0; |
2089 | } |
2090 | |
2091 | /* |
2092 | * dlm_register_domain: one-time setup per "domain". |
2093 | * |
2094 | * The filesystem passes in the requested locking version via proto. |
2095 | * If registration was successful, proto will contain the negotiated |
2096 | * locking protocol. |
2097 | */ |
2098 | struct dlm_ctxt * dlm_register_domain(const char *domain, |
2099 | u32 key, |
2100 | struct dlm_protocol_version *fs_proto) |
2101 | { |
2102 | int ret; |
2103 | struct dlm_ctxt *dlm = NULL; |
2104 | struct dlm_ctxt *new_ctxt = NULL; |
2105 | |
2106 | if (strlen(domain) >= O2NM_MAX_NAME_LEN) { |
2107 | ret = -ENAMETOOLONG; |
2108 | mlog(ML_ERROR, "domain name length too long\n" ); |
2109 | goto leave; |
2110 | } |
2111 | |
2112 | mlog(0, "register called for domain \"%s\"\n" , domain); |
2113 | |
2114 | retry: |
2115 | dlm = NULL; |
2116 | if (signal_pending(current)) { |
2117 | ret = -ERESTARTSYS; |
2118 | mlog_errno(ret); |
2119 | goto leave; |
2120 | } |
2121 | |
2122 | spin_lock(lock: &dlm_domain_lock); |
2123 | |
2124 | dlm = __dlm_lookup_domain(domain); |
2125 | if (dlm) { |
2126 | if (dlm->dlm_state != DLM_CTXT_JOINED) { |
2127 | spin_unlock(lock: &dlm_domain_lock); |
2128 | |
2129 | mlog(0, "This ctxt is not joined yet!\n" ); |
2130 | wait_event_interruptible(dlm_domain_events, |
2131 | dlm_wait_on_domain_helper( |
2132 | domain)); |
2133 | goto retry; |
2134 | } |
2135 | |
2136 | if (dlm_protocol_compare(existing: &dlm->fs_locking_proto, request: fs_proto)) { |
2137 | spin_unlock(lock: &dlm_domain_lock); |
2138 | mlog(ML_ERROR, |
2139 | "Requested locking protocol version is not " |
2140 | "compatible with already registered domain " |
2141 | "\"%s\"\n" , domain); |
2142 | ret = -EPROTO; |
2143 | goto leave; |
2144 | } |
2145 | |
2146 | __dlm_get(dlm); |
2147 | dlm->num_joins++; |
2148 | |
2149 | spin_unlock(lock: &dlm_domain_lock); |
2150 | |
2151 | ret = 0; |
2152 | goto leave; |
2153 | } |
2154 | |
2155 | /* doesn't exist */ |
2156 | if (!new_ctxt) { |
2157 | spin_unlock(lock: &dlm_domain_lock); |
2158 | |
2159 | new_ctxt = dlm_alloc_ctxt(domain, key); |
2160 | if (new_ctxt) |
2161 | goto retry; |
2162 | |
2163 | ret = -ENOMEM; |
2164 | mlog_errno(ret); |
2165 | goto leave; |
2166 | } |
2167 | |
2168 | /* a little variable switch-a-roo here... */ |
2169 | dlm = new_ctxt; |
2170 | new_ctxt = NULL; |
2171 | |
2172 | /* add the new domain */ |
2173 | list_add_tail(new: &dlm->list, head: &dlm_domains); |
2174 | spin_unlock(lock: &dlm_domain_lock); |
2175 | |
2176 | /* |
2177 | * Pass the locking protocol version into the join. If the join |
2178 | * succeeds, it will have the negotiated protocol set. |
2179 | */ |
2180 | dlm->dlm_locking_proto = dlm_protocol; |
2181 | dlm->fs_locking_proto = *fs_proto; |
2182 | |
2183 | ret = dlm_join_domain(dlm); |
2184 | if (ret) { |
2185 | mlog_errno(ret); |
2186 | dlm_put(dlm); |
2187 | goto leave; |
2188 | } |
2189 | |
2190 | /* Tell the caller what locking protocol we negotiated */ |
2191 | *fs_proto = dlm->fs_locking_proto; |
2192 | |
2193 | ret = 0; |
2194 | leave: |
2195 | if (new_ctxt) |
2196 | dlm_free_ctxt_mem(dlm: new_ctxt); |
2197 | |
2198 | if (ret < 0) |
2199 | dlm = ERR_PTR(error: ret); |
2200 | |
2201 | return dlm; |
2202 | } |
2203 | EXPORT_SYMBOL_GPL(dlm_register_domain); |
2204 | |
2205 | static LIST_HEAD(dlm_join_handlers); |
2206 | |
2207 | static void dlm_unregister_net_handlers(void) |
2208 | { |
2209 | o2net_unregister_handler_list(list: &dlm_join_handlers); |
2210 | } |
2211 | |
2212 | static int dlm_register_net_handlers(void) |
2213 | { |
2214 | int status = 0; |
2215 | |
2216 | status = o2net_register_handler(msg_type: DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, |
2217 | max_len: sizeof(struct dlm_query_join_request), |
2218 | func: dlm_query_join_handler, |
2219 | NULL, NULL, unreg_list: &dlm_join_handlers); |
2220 | if (status) |
2221 | goto bail; |
2222 | |
2223 | status = o2net_register_handler(msg_type: DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, |
2224 | max_len: sizeof(struct dlm_assert_joined), |
2225 | func: dlm_assert_joined_handler, |
2226 | NULL, NULL, unreg_list: &dlm_join_handlers); |
2227 | if (status) |
2228 | goto bail; |
2229 | |
2230 | status = o2net_register_handler(msg_type: DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, |
2231 | max_len: sizeof(struct dlm_cancel_join), |
2232 | func: dlm_cancel_join_handler, |
2233 | NULL, NULL, unreg_list: &dlm_join_handlers); |
2234 | if (status) |
2235 | goto bail; |
2236 | |
2237 | status = o2net_register_handler(msg_type: DLM_QUERY_REGION, DLM_MOD_KEY, |
2238 | max_len: sizeof(struct dlm_query_region), |
2239 | func: dlm_query_region_handler, |
2240 | NULL, NULL, unreg_list: &dlm_join_handlers); |
2241 | |
2242 | if (status) |
2243 | goto bail; |
2244 | |
2245 | status = o2net_register_handler(msg_type: DLM_QUERY_NODEINFO, DLM_MOD_KEY, |
2246 | max_len: sizeof(struct dlm_query_nodeinfo), |
2247 | func: dlm_query_nodeinfo_handler, |
2248 | NULL, NULL, unreg_list: &dlm_join_handlers); |
2249 | bail: |
2250 | if (status < 0) |
2251 | dlm_unregister_net_handlers(); |
2252 | |
2253 | return status; |
2254 | } |
2255 | |
2256 | /* Domain eviction callback handling. |
2257 | * |
2258 | * The file system requires notification of node death *before* the |
2259 | * dlm completes it's recovery work, otherwise it may be able to |
2260 | * acquire locks on resources requiring recovery. Since the dlm can |
2261 | * evict a node from it's domain *before* heartbeat fires, a similar |
2262 | * mechanism is required. */ |
2263 | |
2264 | /* Eviction is not expected to happen often, so a per-domain lock is |
2265 | * not necessary. Eviction callbacks are allowed to sleep for short |
2266 | * periods of time. */ |
2267 | static DECLARE_RWSEM(dlm_callback_sem); |
2268 | |
2269 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, |
2270 | int node_num) |
2271 | { |
2272 | struct dlm_eviction_cb *cb; |
2273 | |
2274 | down_read(sem: &dlm_callback_sem); |
2275 | list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { |
2276 | cb->ec_func(node_num, cb->ec_data); |
2277 | } |
2278 | up_read(sem: &dlm_callback_sem); |
2279 | } |
2280 | |
2281 | void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb, |
2282 | dlm_eviction_func *f, |
2283 | void *data) |
2284 | { |
2285 | INIT_LIST_HEAD(list: &cb->ec_item); |
2286 | cb->ec_func = f; |
2287 | cb->ec_data = data; |
2288 | } |
2289 | EXPORT_SYMBOL_GPL(dlm_setup_eviction_cb); |
2290 | |
2291 | void dlm_register_eviction_cb(struct dlm_ctxt *dlm, |
2292 | struct dlm_eviction_cb *cb) |
2293 | { |
2294 | down_write(sem: &dlm_callback_sem); |
2295 | list_add_tail(new: &cb->ec_item, head: &dlm->dlm_eviction_callbacks); |
2296 | up_write(sem: &dlm_callback_sem); |
2297 | } |
2298 | EXPORT_SYMBOL_GPL(dlm_register_eviction_cb); |
2299 | |
2300 | void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb) |
2301 | { |
2302 | down_write(sem: &dlm_callback_sem); |
2303 | list_del_init(entry: &cb->ec_item); |
2304 | up_write(sem: &dlm_callback_sem); |
2305 | } |
2306 | EXPORT_SYMBOL_GPL(dlm_unregister_eviction_cb); |
2307 | |
2308 | static int __init dlm_init(void) |
2309 | { |
2310 | int status; |
2311 | |
2312 | status = dlm_init_mle_cache(); |
2313 | if (status) { |
2314 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n" ); |
2315 | goto error; |
2316 | } |
2317 | |
2318 | status = dlm_init_master_caches(); |
2319 | if (status) { |
2320 | mlog(ML_ERROR, "Could not create o2dlm_lockres and " |
2321 | "o2dlm_lockname slabcaches\n" ); |
2322 | goto error; |
2323 | } |
2324 | |
2325 | status = dlm_init_lock_cache(); |
2326 | if (status) { |
2327 | mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n" ); |
2328 | goto error; |
2329 | } |
2330 | |
2331 | status = dlm_register_net_handlers(); |
2332 | if (status) { |
2333 | mlog(ML_ERROR, "Unable to register network handlers\n" ); |
2334 | goto error; |
2335 | } |
2336 | |
2337 | dlm_create_debugfs_root(); |
2338 | |
2339 | return 0; |
2340 | error: |
2341 | dlm_unregister_net_handlers(); |
2342 | dlm_destroy_lock_cache(); |
2343 | dlm_destroy_master_caches(); |
2344 | dlm_destroy_mle_cache(); |
2345 | return -1; |
2346 | } |
2347 | |
2348 | static void __exit dlm_exit (void) |
2349 | { |
2350 | dlm_destroy_debugfs_root(); |
2351 | dlm_unregister_net_handlers(); |
2352 | dlm_destroy_lock_cache(); |
2353 | dlm_destroy_master_caches(); |
2354 | dlm_destroy_mle_cache(); |
2355 | } |
2356 | |
2357 | MODULE_AUTHOR("Oracle" ); |
2358 | MODULE_LICENSE("GPL" ); |
2359 | MODULE_DESCRIPTION("OCFS2 Distributed Lock Management" ); |
2360 | |
2361 | module_init(dlm_init); |
2362 | module_exit(dlm_exit); |
2363 | |