1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* AFS volume management |
3 | * |
4 | * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/slab.h> |
10 | #include "internal.h" |
11 | |
12 | static unsigned __read_mostly afs_volume_record_life = 60 * 60; |
13 | |
14 | static void afs_destroy_volume(struct work_struct *work); |
15 | |
16 | /* |
17 | * Insert a volume into a cell. If there's an existing volume record, that is |
18 | * returned instead with a ref held. |
19 | */ |
20 | static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, |
21 | struct afs_volume *volume) |
22 | { |
23 | struct afs_volume *p; |
24 | struct rb_node *parent = NULL, **pp; |
25 | |
26 | write_seqlock(sl: &cell->volume_lock); |
27 | |
28 | pp = &cell->volumes.rb_node; |
29 | while (*pp) { |
30 | parent = *pp; |
31 | p = rb_entry(parent, struct afs_volume, cell_node); |
32 | if (p->vid < volume->vid) { |
33 | pp = &(*pp)->rb_left; |
34 | } else if (p->vid > volume->vid) { |
35 | pp = &(*pp)->rb_right; |
36 | } else { |
37 | if (afs_try_get_volume(volume: p, reason: afs_volume_trace_get_cell_insert)) { |
38 | volume = p; |
39 | goto found; |
40 | } |
41 | |
42 | set_bit(AFS_VOLUME_RM_TREE, addr: &volume->flags); |
43 | rb_replace_node_rcu(victim: &p->cell_node, new: &volume->cell_node, root: &cell->volumes); |
44 | } |
45 | } |
46 | |
47 | rb_link_node_rcu(node: &volume->cell_node, parent, rb_link: pp); |
48 | rb_insert_color(&volume->cell_node, &cell->volumes); |
49 | hlist_add_head_rcu(n: &volume->proc_link, h: &cell->proc_volumes); |
50 | |
51 | found: |
52 | write_sequnlock(sl: &cell->volume_lock); |
53 | return volume; |
54 | |
55 | } |
56 | |
57 | static void afs_remove_volume_from_cell(struct afs_volume *volume) |
58 | { |
59 | struct afs_cell *cell = volume->cell; |
60 | |
61 | if (!hlist_unhashed(h: &volume->proc_link)) { |
62 | trace_afs_volume(vid: volume->vid, ref: refcount_read(r: &cell->ref), |
63 | reason: afs_volume_trace_remove); |
64 | write_seqlock(sl: &cell->volume_lock); |
65 | hlist_del_rcu(n: &volume->proc_link); |
66 | if (!test_and_set_bit(AFS_VOLUME_RM_TREE, addr: &volume->flags)) |
67 | rb_erase(&volume->cell_node, &cell->volumes); |
68 | write_sequnlock(sl: &cell->volume_lock); |
69 | } |
70 | } |
71 | |
72 | /* |
73 | * Allocate a volume record and load it up from a vldb record. |
74 | */ |
75 | static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, |
76 | struct afs_vldb_entry *vldb, |
77 | struct afs_server_list **_slist) |
78 | { |
79 | struct afs_server_list *slist; |
80 | struct afs_volume *volume; |
81 | int ret = -ENOMEM, i; |
82 | |
83 | volume = kzalloc(size: sizeof(struct afs_volume), GFP_KERNEL); |
84 | if (!volume) |
85 | goto error_0; |
86 | |
87 | volume->vid = vldb->vid[params->type]; |
88 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; |
89 | volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); |
90 | volume->type = params->type; |
91 | volume->type_force = params->force; |
92 | volume->name_len = vldb->name_len; |
93 | volume->creation_time = TIME64_MIN; |
94 | volume->update_time = TIME64_MIN; |
95 | |
96 | refcount_set(r: &volume->ref, n: 1); |
97 | INIT_HLIST_NODE(h: &volume->proc_link); |
98 | INIT_WORK(&volume->destructor, afs_destroy_volume); |
99 | rwlock_init(&volume->servers_lock); |
100 | mutex_init(&volume->volsync_lock); |
101 | mutex_init(&volume->cb_check_lock); |
102 | rwlock_init(&volume->cb_v_break_lock); |
103 | INIT_LIST_HEAD(list: &volume->open_mmaps); |
104 | init_rwsem(&volume->open_mmaps_lock); |
105 | memcpy(volume->name, vldb->name, vldb->name_len + 1); |
106 | |
107 | for (i = 0; i < AFS_MAXTYPES; i++) |
108 | volume->vids[i] = vldb->vid[i]; |
109 | |
110 | slist = afs_alloc_server_list(volume, key: params->key, vldb); |
111 | if (IS_ERR(ptr: slist)) { |
112 | ret = PTR_ERR(ptr: slist); |
113 | goto error_1; |
114 | } |
115 | |
116 | *_slist = slist; |
117 | rcu_assign_pointer(volume->servers, slist); |
118 | trace_afs_volume(vid: volume->vid, ref: 1, reason: afs_volume_trace_alloc); |
119 | return volume; |
120 | |
121 | error_1: |
122 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
123 | kfree(objp: volume); |
124 | error_0: |
125 | return ERR_PTR(error: ret); |
126 | } |
127 | |
128 | /* |
129 | * Look up or allocate a volume record. |
130 | */ |
131 | static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, |
132 | struct afs_vldb_entry *vldb) |
133 | { |
134 | struct afs_server_list *slist; |
135 | struct afs_volume *candidate, *volume; |
136 | |
137 | candidate = afs_alloc_volume(params, vldb, slist: &slist); |
138 | if (IS_ERR(ptr: candidate)) |
139 | return candidate; |
140 | |
141 | volume = afs_insert_volume_into_cell(cell: params->cell, volume: candidate); |
142 | if (volume == candidate) |
143 | afs_attach_volume_to_servers(volume, slist); |
144 | else |
145 | afs_put_volume(volume: candidate, reason: afs_volume_trace_put_cell_dup); |
146 | return volume; |
147 | } |
148 | |
149 | /* |
150 | * Look up a VLDB record for a volume. |
151 | */ |
152 | static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, |
153 | struct key *key, |
154 | const char *volname, |
155 | size_t volnamesz) |
156 | { |
157 | struct afs_vldb_entry *vldb = ERR_PTR(error: -EDESTADDRREQ); |
158 | struct afs_vl_cursor vc; |
159 | int ret; |
160 | |
161 | if (!afs_begin_vlserver_operation(&vc, cell, key)) |
162 | return ERR_PTR(error: -ERESTARTSYS); |
163 | |
164 | while (afs_select_vlserver(&vc)) { |
165 | vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); |
166 | } |
167 | |
168 | ret = afs_end_vlserver_operation(&vc); |
169 | return ret < 0 ? ERR_PTR(error: ret) : vldb; |
170 | } |
171 | |
172 | /* |
173 | * Look up a volume in the VL server and create a candidate volume record for |
174 | * it. |
175 | * |
176 | * The volume name can be one of the following: |
177 | * "%[cell:]volume[.]" R/W volume |
178 | * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), |
179 | * or R/W (rwparent=1) volume |
180 | * "%[cell:]volume.readonly" R/O volume |
181 | * "#[cell:]volume.readonly" R/O volume |
182 | * "%[cell:]volume.backup" Backup volume |
183 | * "#[cell:]volume.backup" Backup volume |
184 | * |
185 | * The cell name is optional, and defaults to the current cell. |
186 | * |
187 | * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin |
188 | * Guide |
189 | * - Rule 1: Explicit type suffix forces access of that type or nothing |
190 | * (no suffix, then use Rule 2 & 3) |
191 | * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W |
192 | * if not available |
193 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless |
194 | * explicitly told otherwise |
195 | */ |
196 | struct afs_volume *afs_create_volume(struct afs_fs_context *params) |
197 | { |
198 | struct afs_vldb_entry *vldb; |
199 | struct afs_volume *volume; |
200 | unsigned long type_mask = 1UL << params->type; |
201 | |
202 | vldb = afs_vl_lookup_vldb(cell: params->cell, key: params->key, |
203 | volname: params->volname, volnamesz: params->volnamesz); |
204 | if (IS_ERR(ptr: vldb)) |
205 | return ERR_CAST(ptr: vldb); |
206 | |
207 | if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { |
208 | volume = ERR_PTR(error: vldb->error); |
209 | goto error; |
210 | } |
211 | |
212 | /* Make the final decision on the type we want */ |
213 | volume = ERR_PTR(error: -ENOMEDIUM); |
214 | if (params->force) { |
215 | if (!(vldb->flags & type_mask)) |
216 | goto error; |
217 | } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { |
218 | params->type = AFSVL_ROVOL; |
219 | } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { |
220 | params->type = AFSVL_RWVOL; |
221 | } else { |
222 | goto error; |
223 | } |
224 | |
225 | volume = afs_lookup_volume(params, vldb); |
226 | |
227 | error: |
228 | kfree(objp: vldb); |
229 | return volume; |
230 | } |
231 | |
232 | /* |
233 | * Destroy a volume record |
234 | */ |
235 | static void afs_destroy_volume(struct work_struct *work) |
236 | { |
237 | struct afs_volume *volume = container_of(work, struct afs_volume, destructor); |
238 | struct afs_server_list *slist = rcu_access_pointer(volume->servers); |
239 | |
240 | _enter("%p" , volume); |
241 | |
242 | #ifdef CONFIG_AFS_FSCACHE |
243 | ASSERTCMP(volume->cache, ==, NULL); |
244 | #endif |
245 | |
246 | afs_detach_volume_from_servers(volume, slist); |
247 | afs_remove_volume_from_cell(volume); |
248 | afs_put_serverlist(volume->cell->net, slist); |
249 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
250 | trace_afs_volume(vid: volume->vid, ref: refcount_read(r: &volume->ref), |
251 | reason: afs_volume_trace_free); |
252 | kfree_rcu(volume, rcu); |
253 | |
254 | _leave(" [destroyed]" ); |
255 | } |
256 | |
257 | /* |
258 | * Try to get a reference on a volume record. |
259 | */ |
260 | bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) |
261 | { |
262 | int r; |
263 | |
264 | if (__refcount_inc_not_zero(r: &volume->ref, oldp: &r)) { |
265 | trace_afs_volume(vid: volume->vid, ref: r + 1, reason); |
266 | return true; |
267 | } |
268 | return false; |
269 | } |
270 | |
271 | /* |
272 | * Get a reference on a volume record. |
273 | */ |
274 | struct afs_volume *afs_get_volume(struct afs_volume *volume, |
275 | enum afs_volume_trace reason) |
276 | { |
277 | if (volume) { |
278 | int r; |
279 | |
280 | __refcount_inc(r: &volume->ref, oldp: &r); |
281 | trace_afs_volume(vid: volume->vid, ref: r + 1, reason); |
282 | } |
283 | return volume; |
284 | } |
285 | |
286 | |
287 | /* |
288 | * Drop a reference on a volume record. |
289 | */ |
290 | void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason) |
291 | { |
292 | if (volume) { |
293 | afs_volid_t vid = volume->vid; |
294 | bool zero; |
295 | int r; |
296 | |
297 | zero = __refcount_dec_and_test(r: &volume->ref, oldp: &r); |
298 | trace_afs_volume(vid, ref: r - 1, reason); |
299 | if (zero) |
300 | schedule_work(work: &volume->destructor); |
301 | } |
302 | } |
303 | |
304 | /* |
305 | * Activate a volume. |
306 | */ |
307 | int afs_activate_volume(struct afs_volume *volume) |
308 | { |
309 | #ifdef CONFIG_AFS_FSCACHE |
310 | struct fscache_volume *vcookie; |
311 | char *name; |
312 | |
313 | name = kasprintf(GFP_KERNEL, fmt: "afs,%s,%llx" , |
314 | volume->cell->name, volume->vid); |
315 | if (!name) |
316 | return -ENOMEM; |
317 | |
318 | vcookie = fscache_acquire_volume(volume_key: name, NULL, NULL, coherency_len: 0); |
319 | if (IS_ERR(ptr: vcookie)) { |
320 | if (vcookie != ERR_PTR(error: -EBUSY)) { |
321 | kfree(objp: name); |
322 | return PTR_ERR(ptr: vcookie); |
323 | } |
324 | pr_err("AFS: Cache volume key already in use (%s)\n" , name); |
325 | vcookie = NULL; |
326 | } |
327 | volume->cache = vcookie; |
328 | kfree(objp: name); |
329 | #endif |
330 | return 0; |
331 | } |
332 | |
333 | /* |
334 | * Deactivate a volume. |
335 | */ |
336 | void afs_deactivate_volume(struct afs_volume *volume) |
337 | { |
338 | _enter("%s" , volume->name); |
339 | |
340 | #ifdef CONFIG_AFS_FSCACHE |
341 | fscache_relinquish_volume(volume: volume->cache, NULL, |
342 | test_bit(AFS_VOLUME_DELETED, &volume->flags)); |
343 | volume->cache = NULL; |
344 | #endif |
345 | |
346 | _leave("" ); |
347 | } |
348 | |
349 | /* |
350 | * Query the VL service to update the volume status. |
351 | */ |
352 | static int afs_update_volume_status(struct afs_volume *volume, struct key *key) |
353 | { |
354 | struct afs_server_list *new, *old, *discard; |
355 | struct afs_vldb_entry *vldb; |
356 | char idbuf[24]; |
357 | int ret, idsz; |
358 | |
359 | _enter("" ); |
360 | |
361 | /* We look up an ID by passing it as a decimal string in the |
362 | * operation's name parameter. |
363 | */ |
364 | idsz = snprintf(buf: idbuf, size: sizeof(idbuf), fmt: "%llu" , volume->vid); |
365 | |
366 | vldb = afs_vl_lookup_vldb(cell: volume->cell, key, volname: idbuf, volnamesz: idsz); |
367 | if (IS_ERR(ptr: vldb)) { |
368 | ret = PTR_ERR(ptr: vldb); |
369 | goto error; |
370 | } |
371 | |
372 | /* See if the volume got renamed. */ |
373 | if (vldb->name_len != volume->name_len || |
374 | memcmp(p: vldb->name, q: volume->name, size: vldb->name_len) != 0) { |
375 | /* TODO: Use RCU'd string. */ |
376 | memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); |
377 | volume->name_len = vldb->name_len; |
378 | } |
379 | |
380 | /* See if the volume's server list got updated. */ |
381 | new = afs_alloc_server_list(volume, key, vldb); |
382 | if (IS_ERR(ptr: new)) { |
383 | ret = PTR_ERR(ptr: new); |
384 | goto error_vldb; |
385 | } |
386 | |
387 | write_lock(&volume->servers_lock); |
388 | |
389 | discard = new; |
390 | old = rcu_dereference_protected(volume->servers, |
391 | lockdep_is_held(&volume->servers_lock)); |
392 | if (afs_annotate_server_list(new, old)) { |
393 | new->seq = volume->servers_seq + 1; |
394 | rcu_assign_pointer(volume->servers, new); |
395 | smp_wmb(); |
396 | volume->servers_seq++; |
397 | discard = old; |
398 | } |
399 | |
400 | /* Check more often if replication is ongoing. */ |
401 | if (new->ro_replicating) |
402 | volume->update_at = ktime_get_real_seconds() + 10 * 60; |
403 | else |
404 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; |
405 | write_unlock(&volume->servers_lock); |
406 | |
407 | if (discard == old) |
408 | afs_reattach_volume_to_servers(volume, slist: new, old); |
409 | afs_put_serverlist(volume->cell->net, discard); |
410 | ret = 0; |
411 | error_vldb: |
412 | kfree(objp: vldb); |
413 | error: |
414 | _leave(" = %d" , ret); |
415 | return ret; |
416 | } |
417 | |
418 | /* |
419 | * Make sure the volume record is up to date. |
420 | */ |
421 | int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) |
422 | { |
423 | int ret, retries = 0; |
424 | |
425 | _enter("" ); |
426 | |
427 | retry: |
428 | if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) |
429 | goto wait; |
430 | if (volume->update_at <= ktime_get_real_seconds() || |
431 | test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) |
432 | goto update; |
433 | _leave(" = 0" ); |
434 | return 0; |
435 | |
436 | update: |
437 | if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, addr: &volume->flags)) { |
438 | clear_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &volume->flags); |
439 | ret = afs_update_volume_status(volume, key: op->key); |
440 | if (ret < 0) |
441 | set_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &volume->flags); |
442 | clear_bit_unlock(AFS_VOLUME_WAIT, addr: &volume->flags); |
443 | clear_bit_unlock(AFS_VOLUME_UPDATING, addr: &volume->flags); |
444 | wake_up_bit(word: &volume->flags, AFS_VOLUME_WAIT); |
445 | _leave(" = %d" , ret); |
446 | return ret; |
447 | } |
448 | |
449 | wait: |
450 | if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { |
451 | _leave(" = 0 [no wait]" ); |
452 | return 0; |
453 | } |
454 | |
455 | ret = wait_on_bit(word: &volume->flags, AFS_VOLUME_WAIT, |
456 | mode: (op->flags & AFS_OPERATION_UNINTR) ? |
457 | TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); |
458 | if (ret == -ERESTARTSYS) { |
459 | _leave(" = %d" , ret); |
460 | return ret; |
461 | } |
462 | |
463 | retries++; |
464 | if (retries == 4) { |
465 | _leave(" = -ESTALE" ); |
466 | return -ESTALE; |
467 | } |
468 | goto retry; |
469 | } |
470 | |