1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Handle fileserver selection and rotation. |
3 | * |
4 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/fs.h> |
11 | #include <linux/sched.h> |
12 | #include <linux/delay.h> |
13 | #include <linux/sched/signal.h> |
14 | #include "internal.h" |
15 | #include "afs_fs.h" |
16 | #include "protocol_uae.h" |
17 | |
18 | void afs_clear_server_states(struct afs_operation *op) |
19 | { |
20 | unsigned int i; |
21 | |
22 | if (op->server_states) { |
23 | for (i = 0; i < op->server_list->nr_servers; i++) |
24 | afs_put_endpoint_state(estate: op->server_states[i].endpoint_state, |
25 | where: afs_estate_trace_put_server_state); |
26 | kfree(objp: op->server_states); |
27 | } |
28 | } |
29 | |
30 | /* |
31 | * Begin iteration through a server list, starting with the vnode's last used |
32 | * server if possible, or the last recorded good server if not. |
33 | */ |
34 | static bool afs_start_fs_iteration(struct afs_operation *op, |
35 | struct afs_vnode *vnode) |
36 | { |
37 | struct afs_server *server; |
38 | void *cb_server; |
39 | int i; |
40 | |
41 | trace_afs_rotate(op, reason: afs_rotate_trace_start, extra: 0); |
42 | |
43 | read_lock(&op->volume->servers_lock); |
44 | op->server_list = afs_get_serverlist( |
45 | rcu_dereference_protected(op->volume->servers, |
46 | lockdep_is_held(&op->volume->servers_lock))); |
47 | read_unlock(&op->volume->servers_lock); |
48 | |
49 | op->server_states = kcalloc(n: op->server_list->nr_servers, size: sizeof(op->server_states[0]), |
50 | GFP_KERNEL); |
51 | if (!op->server_states) { |
52 | afs_op_nomem(op); |
53 | trace_afs_rotate(op, reason: afs_rotate_trace_nomem, extra: 0); |
54 | return false; |
55 | } |
56 | |
57 | rcu_read_lock(); |
58 | for (i = 0; i < op->server_list->nr_servers; i++) { |
59 | struct afs_endpoint_state *estate; |
60 | struct afs_server_state *s = &op->server_states[i]; |
61 | |
62 | server = op->server_list->servers[i].server; |
63 | estate = rcu_dereference(server->endpoint_state); |
64 | s->endpoint_state = afs_get_endpoint_state(estate, |
65 | where: afs_estate_trace_get_server_state); |
66 | s->probe_seq = estate->probe_seq; |
67 | s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1; |
68 | init_waitqueue_entry(wq_entry: &s->probe_waiter, current); |
69 | afs_get_address_preferences(net: op->net, alist: estate->addresses); |
70 | } |
71 | rcu_read_unlock(); |
72 | |
73 | |
74 | op->untried_servers = (1UL << op->server_list->nr_servers) - 1; |
75 | op->server_index = -1; |
76 | |
77 | cb_server = vnode->cb_server; |
78 | if (cb_server) { |
79 | /* See if the vnode's preferred record is still available */ |
80 | for (i = 0; i < op->server_list->nr_servers; i++) { |
81 | server = op->server_list->servers[i].server; |
82 | if (server == cb_server) { |
83 | op->server_index = i; |
84 | goto found_interest; |
85 | } |
86 | } |
87 | |
88 | /* If we have a lock outstanding on a server that's no longer |
89 | * serving this vnode, then we can't switch to another server |
90 | * and have to return an error. |
91 | */ |
92 | if (op->flags & AFS_OPERATION_CUR_ONLY) { |
93 | afs_op_set_error(op, error: -ESTALE); |
94 | trace_afs_rotate(op, reason: afs_rotate_trace_stale_lock, extra: 0); |
95 | return false; |
96 | } |
97 | |
98 | /* Note that the callback promise is effectively broken */ |
99 | write_seqlock(sl: &vnode->cb_lock); |
100 | ASSERTCMP(cb_server, ==, vnode->cb_server); |
101 | vnode->cb_server = NULL; |
102 | if (atomic64_xchg(v: &vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) |
103 | vnode->cb_break++; |
104 | write_sequnlock(sl: &vnode->cb_lock); |
105 | } |
106 | |
107 | found_interest: |
108 | return true; |
109 | } |
110 | |
111 | /* |
112 | * Post volume busy note. |
113 | */ |
114 | static void afs_busy(struct afs_operation *op, u32 abort_code) |
115 | { |
116 | const char *m; |
117 | |
118 | switch (abort_code) { |
119 | case VOFFLINE: m = "offline" ; break; |
120 | case VRESTARTING: m = "restarting" ; break; |
121 | case VSALVAGING: m = "being salvaged" ; break; |
122 | default: m = "busy" ; break; |
123 | } |
124 | |
125 | pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n" , |
126 | op->volume->vid, op->volume->name, &op->server->uuid, m); |
127 | } |
128 | |
129 | /* |
130 | * Sleep and retry the operation to the same fileserver. |
131 | */ |
132 | static bool afs_sleep_and_retry(struct afs_operation *op) |
133 | { |
134 | trace_afs_rotate(op, reason: afs_rotate_trace_busy_sleep, extra: 0); |
135 | if (!(op->flags & AFS_OPERATION_UNINTR)) { |
136 | msleep_interruptible(msecs: 1000); |
137 | if (signal_pending(current)) { |
138 | afs_op_set_error(op, error: -ERESTARTSYS); |
139 | return false; |
140 | } |
141 | } else { |
142 | msleep(msecs: 1000); |
143 | } |
144 | |
145 | return true; |
146 | } |
147 | |
148 | /* |
149 | * Select the fileserver to use. May be called multiple times to rotate |
150 | * through the fileservers. |
151 | */ |
152 | bool afs_select_fileserver(struct afs_operation *op) |
153 | { |
154 | struct afs_addr_list *alist; |
155 | struct afs_server *server; |
156 | struct afs_vnode *vnode = op->file[0].vnode; |
157 | unsigned long set, failed; |
158 | s32 abort_code = op->call_abort_code; |
159 | int best_prio = 0; |
160 | int error = op->call_error, addr_index, i, j; |
161 | |
162 | op->nr_iterations++; |
163 | |
164 | _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d" , |
165 | op->debug_id, op->nr_iterations, op->volume->vid, |
166 | op->server_index, op->untried_servers, |
167 | op->addr_index, op->addr_tried, |
168 | error, abort_code); |
169 | |
170 | if (op->flags & AFS_OPERATION_STOP) { |
171 | trace_afs_rotate(op, reason: afs_rotate_trace_stopped, extra: 0); |
172 | _leave(" = f [stopped]" ); |
173 | return false; |
174 | } |
175 | |
176 | if (op->nr_iterations == 0) |
177 | goto start; |
178 | |
179 | WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error); |
180 | trace_afs_rotate(op, reason: afs_rotate_trace_iter, extra: op->call_error); |
181 | |
182 | /* Evaluate the result of the previous operation, if there was one. */ |
183 | switch (op->call_error) { |
184 | case 0: |
185 | clear_bit(AFS_SE_VOLUME_OFFLINE, |
186 | addr: &op->server_list->servers[op->server_index].flags); |
187 | clear_bit(AFS_SE_VOLUME_BUSY, |
188 | addr: &op->server_list->servers[op->server_index].flags); |
189 | op->cumul_error.responded = true; |
190 | |
191 | /* We succeeded, but we may need to redo the op from another |
192 | * server if we're looking at a set of RO volumes where some of |
193 | * the servers have not yet been brought up to date lest we |
194 | * regress the data. We only switch to the new version once |
195 | * >=50% of the servers are updated. |
196 | */ |
197 | error = afs_update_volume_state(op); |
198 | if (error != 0) { |
199 | if (error == 1) { |
200 | afs_sleep_and_retry(op); |
201 | goto restart_from_beginning; |
202 | } |
203 | afs_op_set_error(op, error); |
204 | goto failed; |
205 | } |
206 | fallthrough; |
207 | default: |
208 | /* Success or local failure. Stop. */ |
209 | afs_op_set_error(op, error); |
210 | op->flags |= AFS_OPERATION_STOP; |
211 | trace_afs_rotate(op, reason: afs_rotate_trace_stop, extra: error); |
212 | _leave(" = f [okay/local %d]" , error); |
213 | return false; |
214 | |
215 | case -ECONNABORTED: |
216 | /* The far side rejected the operation on some grounds. This |
217 | * might involve the server being busy or the volume having been moved. |
218 | * |
219 | * Note that various V* errors should not be sent to a cache manager |
220 | * by a fileserver as they should be translated to more modern UAE* |
221 | * errors instead. IBM AFS and OpenAFS fileservers, however, do leak |
222 | * these abort codes. |
223 | */ |
224 | trace_afs_rotate(op, reason: afs_rotate_trace_aborted, extra: abort_code); |
225 | op->cumul_error.responded = true; |
226 | switch (abort_code) { |
227 | case VNOVOL: |
228 | /* This fileserver doesn't know about the volume. |
229 | * - May indicate that the VL is wrong - retry once and compare |
230 | * the results. |
231 | * - May indicate that the fileserver couldn't attach to the vol. |
232 | * - The volume might have been temporarily removed so that it can |
233 | * be replaced by a volume restore. "vos" might have ended one |
234 | * transaction and has yet to create the next. |
235 | * - The volume might not be blessed or might not be in-service |
236 | * (administrative action). |
237 | */ |
238 | if (op->flags & AFS_OPERATION_VNOVOL) { |
239 | afs_op_accumulate_error(op, error: -EREMOTEIO, abort_code); |
240 | goto next_server; |
241 | } |
242 | |
243 | write_lock(&op->volume->servers_lock); |
244 | op->server_list->vnovol_mask |= 1 << op->server_index; |
245 | write_unlock(&op->volume->servers_lock); |
246 | |
247 | set_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &op->volume->flags); |
248 | error = afs_check_volume_status(op->volume, op); |
249 | if (error < 0) { |
250 | afs_op_set_error(op, error); |
251 | goto failed; |
252 | } |
253 | |
254 | if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) { |
255 | afs_op_set_error(op, error: -ENOMEDIUM); |
256 | goto failed; |
257 | } |
258 | |
259 | /* If the server list didn't change, then assume that |
260 | * it's the fileserver having trouble. |
261 | */ |
262 | if (rcu_access_pointer(op->volume->servers) == op->server_list) { |
263 | afs_op_accumulate_error(op, error: -EREMOTEIO, abort_code); |
264 | goto next_server; |
265 | } |
266 | |
267 | /* Try again */ |
268 | op->flags |= AFS_OPERATION_VNOVOL; |
269 | _leave(" = t [vnovol]" ); |
270 | return true; |
271 | |
272 | case VVOLEXISTS: |
273 | case VONLINE: |
274 | /* These should not be returned from the fileserver. */ |
275 | pr_warn("Fileserver returned unexpected abort %d\n" , |
276 | abort_code); |
277 | afs_op_accumulate_error(op, error: -EREMOTEIO, abort_code); |
278 | goto next_server; |
279 | |
280 | case VNOSERVICE: |
281 | /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver |
282 | * if the volume was neither in-service nor administratively |
283 | * blessed. All usage was replaced by VNOVOL because AFS 3.1 and |
284 | * earlier cache managers did not handle VNOSERVICE and assumed |
285 | * it was the client OSes errno 105. |
286 | * |
287 | * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the |
288 | * fileserver idle dead time error which was sent in place of |
289 | * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the |
290 | * fileserver took too long to send a reply to the client. |
291 | * RX_CALL_TIMEOUT would have caused the cache manager to mark the |
292 | * server down whereas VNOSERVICE since AFS 3.2 would cause cache |
293 | * manager to temporarily (up to 15 minutes) mark the volume |
294 | * instance as unusable. |
295 | * |
296 | * The idle dead logic resulted in cache inconsistency since a |
297 | * state changing call that the cache manager assumed was dead |
298 | * could still be processed to completion by the fileserver. This |
299 | * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer |
300 | * returned. However, many 1.4.8 through 1.6.24 fileservers are |
301 | * still in existence. |
302 | * |
303 | * AuriStorFS fileservers have never returned VNOSERVICE. |
304 | * |
305 | * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT. |
306 | */ |
307 | case RX_CALL_TIMEOUT: |
308 | afs_op_accumulate_error(op, error: -ETIMEDOUT, abort_code); |
309 | goto next_server; |
310 | |
311 | case VSALVAGING: /* This error should not be leaked to cache managers |
312 | * but is from OpenAFS demand attach fileservers. |
313 | * It should be treated as an alias for VOFFLINE. |
314 | */ |
315 | case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */ |
316 | case VOFFLINE: |
317 | /* The volume is in use by the volserver or another volume utility |
318 | * for an operation that might alter the contents. The volume is |
319 | * expected to come back but it might take a long time (could be |
320 | * days). |
321 | */ |
322 | if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE, |
323 | addr: &op->server_list->servers[op->server_index].flags)) { |
324 | afs_busy(op, abort_code); |
325 | clear_bit(AFS_SE_VOLUME_BUSY, |
326 | addr: &op->server_list->servers[op->server_index].flags); |
327 | } |
328 | if (op->flags & AFS_OPERATION_NO_VSLEEP) { |
329 | afs_op_set_error(op, error: -EADV); |
330 | goto failed; |
331 | } |
332 | goto busy; |
333 | |
334 | case VRESTARTING: /* The fileserver is either shutting down or starting up. */ |
335 | case VBUSY: |
336 | /* The volume is in use by the volserver or another volume |
337 | * utility for an operation that is not expected to alter the |
338 | * contents of the volume. VBUSY does not need to be returned |
339 | * for a ROVOL or BACKVOL bound to an ITBusy volserver |
340 | * transaction. The fileserver is permitted to continue serving |
341 | * content from ROVOLs and BACKVOLs during an ITBusy transaction |
342 | * because the content will not change. However, many fileserver |
343 | * releases do return VBUSY for ROVOL and BACKVOL instances under |
344 | * many circumstances. |
345 | * |
346 | * Retry after going round all the servers unless we have a file |
347 | * lock we need to maintain. |
348 | */ |
349 | if (op->flags & AFS_OPERATION_NO_VSLEEP) { |
350 | afs_op_set_error(op, error: -EBUSY); |
351 | goto failed; |
352 | } |
353 | if (!test_and_set_bit(AFS_SE_VOLUME_BUSY, |
354 | addr: &op->server_list->servers[op->server_index].flags)) { |
355 | afs_busy(op, abort_code); |
356 | clear_bit(AFS_SE_VOLUME_OFFLINE, |
357 | addr: &op->server_list->servers[op->server_index].flags); |
358 | } |
359 | busy: |
360 | if (op->flags & AFS_OPERATION_CUR_ONLY) { |
361 | if (!afs_sleep_and_retry(op)) |
362 | goto failed; |
363 | |
364 | /* Retry with same server & address */ |
365 | _leave(" = t [vbusy]" ); |
366 | return true; |
367 | } |
368 | |
369 | op->flags |= AFS_OPERATION_VBUSY; |
370 | goto next_server; |
371 | |
372 | case VMOVED: |
373 | /* The volume migrated to another server. We consider |
374 | * consider all locks and callbacks broken and request |
375 | * an update from the VLDB. |
376 | * |
377 | * We also limit the number of VMOVED hops we will |
378 | * honour, just in case someone sets up a loop. |
379 | */ |
380 | if (op->flags & AFS_OPERATION_VMOVED) { |
381 | afs_op_set_error(op, error: -EREMOTEIO); |
382 | goto failed; |
383 | } |
384 | op->flags |= AFS_OPERATION_VMOVED; |
385 | |
386 | set_bit(AFS_VOLUME_WAIT, addr: &op->volume->flags); |
387 | set_bit(AFS_VOLUME_NEEDS_UPDATE, addr: &op->volume->flags); |
388 | error = afs_check_volume_status(op->volume, op); |
389 | if (error < 0) { |
390 | afs_op_set_error(op, error); |
391 | goto failed; |
392 | } |
393 | |
394 | /* If the server list didn't change, then the VLDB is |
395 | * out of sync with the fileservers. This is hopefully |
396 | * a temporary condition, however, so we don't want to |
397 | * permanently block access to the file. |
398 | * |
399 | * TODO: Try other fileservers if we can. |
400 | * |
401 | * TODO: Retry a few times with sleeps. |
402 | */ |
403 | if (rcu_access_pointer(op->volume->servers) == op->server_list) { |
404 | afs_op_accumulate_error(op, error: -ENOMEDIUM, abort_code); |
405 | goto failed; |
406 | } |
407 | |
408 | goto restart_from_beginning; |
409 | |
410 | case UAEIO: |
411 | case VIO: |
412 | afs_op_accumulate_error(op, error: -EREMOTEIO, abort_code); |
413 | if (op->volume->type != AFSVL_RWVOL) |
414 | goto next_server; |
415 | goto failed; |
416 | |
417 | case VDISKFULL: |
418 | case UAENOSPC: |
419 | /* The partition is full. Only applies to RWVOLs. |
420 | * Translate locally and return ENOSPC. |
421 | * No replicas to failover to. |
422 | */ |
423 | afs_op_set_error(op, error: -ENOSPC); |
424 | goto failed_but_online; |
425 | |
426 | case VOVERQUOTA: |
427 | case UAEDQUOT: |
428 | /* Volume is full. Only applies to RWVOLs. |
429 | * Translate locally and return EDQUOT. |
430 | * No replicas to failover to. |
431 | */ |
432 | afs_op_set_error(op, error: -EDQUOT); |
433 | goto failed_but_online; |
434 | |
435 | default: |
436 | afs_op_accumulate_error(op, error, abort_code); |
437 | failed_but_online: |
438 | clear_bit(AFS_SE_VOLUME_OFFLINE, |
439 | addr: &op->server_list->servers[op->server_index].flags); |
440 | clear_bit(AFS_SE_VOLUME_BUSY, |
441 | addr: &op->server_list->servers[op->server_index].flags); |
442 | goto failed; |
443 | } |
444 | |
445 | case -ETIMEDOUT: |
446 | case -ETIME: |
447 | if (afs_op_error(op) != -EDESTADDRREQ) |
448 | goto iterate_address; |
449 | fallthrough; |
450 | case -ERFKILL: |
451 | case -EADDRNOTAVAIL: |
452 | case -ENETUNREACH: |
453 | case -EHOSTUNREACH: |
454 | case -EHOSTDOWN: |
455 | case -ECONNREFUSED: |
456 | _debug("no conn" ); |
457 | afs_op_accumulate_error(op, error, abort_code: 0); |
458 | goto iterate_address; |
459 | |
460 | case -ENETRESET: |
461 | pr_warn("kAFS: Peer reset %s (op=%x)\n" , |
462 | op->type ? op->type->name : "???" , op->debug_id); |
463 | fallthrough; |
464 | case -ECONNRESET: |
465 | _debug("call reset" ); |
466 | afs_op_set_error(op, error); |
467 | goto failed; |
468 | } |
469 | |
470 | restart_from_beginning: |
471 | trace_afs_rotate(op, reason: afs_rotate_trace_restart, extra: 0); |
472 | _debug("restart" ); |
473 | op->estate = NULL; |
474 | op->server = NULL; |
475 | afs_clear_server_states(op); |
476 | op->server_states = NULL; |
477 | afs_put_serverlist(op->net, op->server_list); |
478 | op->server_list = NULL; |
479 | start: |
480 | _debug("start" ); |
481 | ASSERTCMP(op->estate, ==, NULL); |
482 | /* See if we need to do an update of the volume record. Note that the |
483 | * volume may have moved or even have been deleted. |
484 | */ |
485 | error = afs_check_volume_status(op->volume, op); |
486 | trace_afs_rotate(op, reason: afs_rotate_trace_check_vol_status, extra: error); |
487 | if (error < 0) { |
488 | afs_op_set_error(op, error); |
489 | goto failed; |
490 | } |
491 | |
492 | if (!afs_start_fs_iteration(op, vnode)) |
493 | goto failed; |
494 | |
495 | _debug("__ VOL %llx __" , op->volume->vid); |
496 | |
497 | pick_server: |
498 | _debug("pick [%lx]" , op->untried_servers); |
499 | ASSERTCMP(op->estate, ==, NULL); |
500 | |
501 | error = afs_wait_for_fs_probes(op, states: op->server_states, |
502 | intr: !(op->flags & AFS_OPERATION_UNINTR)); |
503 | switch (error) { |
504 | case 0: /* No untried responsive servers and no outstanding probes */ |
505 | trace_afs_rotate(op, reason: afs_rotate_trace_probe_none, extra: 0); |
506 | goto no_more_servers; |
507 | case 1: /* Got a response */ |
508 | trace_afs_rotate(op, reason: afs_rotate_trace_probe_response, extra: 0); |
509 | break; |
510 | case 2: /* Probe data superseded */ |
511 | trace_afs_rotate(op, reason: afs_rotate_trace_probe_superseded, extra: 0); |
512 | goto restart_from_beginning; |
513 | default: |
514 | trace_afs_rotate(op, reason: afs_rotate_trace_probe_error, extra: error); |
515 | afs_op_set_error(op, error); |
516 | goto failed; |
517 | } |
518 | |
519 | /* Pick the untried server with the highest priority untried endpoint. |
520 | * If we have outstanding callbacks, we stick with the server we're |
521 | * already using if we can. |
522 | */ |
523 | if (op->server) { |
524 | _debug("server %u" , op->server_index); |
525 | if (test_bit(op->server_index, &op->untried_servers)) |
526 | goto selected_server; |
527 | op->server = NULL; |
528 | _debug("no server" ); |
529 | } |
530 | |
531 | rcu_read_lock(); |
532 | op->server_index = -1; |
533 | best_prio = -1; |
534 | for (i = 0; i < op->server_list->nr_servers; i++) { |
535 | struct afs_endpoint_state *es; |
536 | struct afs_server_entry *se = &op->server_list->servers[i]; |
537 | struct afs_addr_list *sal; |
538 | struct afs_server *s = se->server; |
539 | |
540 | if (!test_bit(i, &op->untried_servers) || |
541 | test_bit(AFS_SE_EXCLUDED, &se->flags) || |
542 | !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) |
543 | continue; |
544 | es = op->server_states->endpoint_state; |
545 | sal = es->addresses; |
546 | |
547 | afs_get_address_preferences_rcu(net: op->net, alist: sal); |
548 | for (j = 0; j < sal->nr_addrs; j++) { |
549 | if (!sal->addrs[j].peer) |
550 | continue; |
551 | if (sal->addrs[j].prio > best_prio) { |
552 | op->server_index = i; |
553 | best_prio = sal->addrs[j].prio; |
554 | } |
555 | } |
556 | } |
557 | rcu_read_unlock(); |
558 | |
559 | if (op->server_index == -1) |
560 | goto no_more_servers; |
561 | |
562 | selected_server: |
563 | trace_afs_rotate(op, reason: afs_rotate_trace_selected_server, extra: best_prio); |
564 | _debug("use %d prio %u" , op->server_index, best_prio); |
565 | __clear_bit(op->server_index, &op->untried_servers); |
566 | |
567 | /* We're starting on a different fileserver from the list. We need to |
568 | * check it, create a callback intercept, find its address list and |
569 | * probe its capabilities before we use it. |
570 | */ |
571 | ASSERTCMP(op->estate, ==, NULL); |
572 | server = op->server_list->servers[op->server_index].server; |
573 | |
574 | if (!afs_check_server_record(op, server, key: op->key)) |
575 | goto failed; |
576 | |
577 | _debug("USING SERVER: %pU" , &server->uuid); |
578 | |
579 | op->flags |= AFS_OPERATION_RETRY_SERVER; |
580 | op->server = server; |
581 | if (vnode->cb_server != server) { |
582 | vnode->cb_server = server; |
583 | vnode->cb_v_check = atomic_read(v: &vnode->volume->cb_v_break); |
584 | atomic64_set(v: &vnode->cb_expires_at, AFS_NO_CB_PROMISE); |
585 | } |
586 | |
587 | retry_server: |
588 | op->addr_tried = 0; |
589 | op->addr_index = -1; |
590 | |
591 | iterate_address: |
592 | /* Iterate over the current server's address list to try and find an |
593 | * address on which it will respond to us. |
594 | */ |
595 | op->estate = op->server_states[op->server_index].endpoint_state; |
596 | set = READ_ONCE(op->estate->responsive_set); |
597 | failed = READ_ONCE(op->estate->failed_set); |
598 | _debug("iterate ES=%x rs=%lx fs=%lx" , op->estate->probe_seq, set, failed); |
599 | set &= ~(failed | op->addr_tried); |
600 | trace_afs_rotate(op, reason: afs_rotate_trace_iterate_addr, extra: set); |
601 | if (!set) |
602 | goto wait_for_more_probe_results; |
603 | |
604 | alist = op->estate->addresses; |
605 | best_prio = -1; |
606 | addr_index = 0; |
607 | for (i = 0; i < alist->nr_addrs; i++) { |
608 | if (alist->addrs[i].prio > best_prio) { |
609 | addr_index = i; |
610 | best_prio = alist->addrs[i].prio; |
611 | } |
612 | } |
613 | |
614 | alist->preferred = addr_index; |
615 | |
616 | op->addr_index = addr_index; |
617 | set_bit(nr: addr_index, addr: &op->addr_tried); |
618 | |
619 | op->volsync.creation = TIME64_MIN; |
620 | op->volsync.update = TIME64_MIN; |
621 | op->call_responded = false; |
622 | _debug("address [%u] %u/%u %pISp" , |
623 | op->server_index, addr_index, alist->nr_addrs, |
624 | rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer)); |
625 | _leave(" = t" ); |
626 | return true; |
627 | |
628 | wait_for_more_probe_results: |
629 | error = afs_wait_for_one_fs_probe(server: op->server, estate: op->estate, exclude: op->addr_tried, |
630 | is_intr: !(op->flags & AFS_OPERATION_UNINTR)); |
631 | if (!error) |
632 | goto iterate_address; |
633 | |
634 | /* We've now had a failure to respond on all of a server's addresses - |
635 | * immediately probe them again and consider retrying the server. |
636 | */ |
637 | trace_afs_rotate(op, reason: afs_rotate_trace_probe_fileserver, extra: 0); |
638 | afs_probe_fileserver(op->net, op->server); |
639 | if (op->flags & AFS_OPERATION_RETRY_SERVER) { |
640 | error = afs_wait_for_one_fs_probe(server: op->server, estate: op->estate, exclude: op->addr_tried, |
641 | is_intr: !(op->flags & AFS_OPERATION_UNINTR)); |
642 | switch (error) { |
643 | case 0: |
644 | op->flags &= ~AFS_OPERATION_RETRY_SERVER; |
645 | trace_afs_rotate(op, reason: afs_rotate_trace_retry_server, extra: 0); |
646 | goto retry_server; |
647 | case -ERESTARTSYS: |
648 | afs_op_set_error(op, error); |
649 | goto failed; |
650 | case -ETIME: |
651 | case -EDESTADDRREQ: |
652 | goto next_server; |
653 | } |
654 | } |
655 | |
656 | next_server: |
657 | trace_afs_rotate(op, reason: afs_rotate_trace_next_server, extra: 0); |
658 | _debug("next" ); |
659 | op->estate = NULL; |
660 | goto pick_server; |
661 | |
662 | no_more_servers: |
663 | /* That's all the servers poked to no good effect. Try again if some |
664 | * of them were busy. |
665 | */ |
666 | trace_afs_rotate(op, reason: afs_rotate_trace_no_more_servers, extra: 0); |
667 | if (op->flags & AFS_OPERATION_VBUSY) { |
668 | afs_sleep_and_retry(op); |
669 | op->flags &= ~AFS_OPERATION_VBUSY; |
670 | goto restart_from_beginning; |
671 | } |
672 | |
673 | rcu_read_lock(); |
674 | for (i = 0; i < op->server_list->nr_servers; i++) { |
675 | struct afs_endpoint_state *estate; |
676 | |
677 | estate = op->server_states->endpoint_state; |
678 | error = READ_ONCE(estate->error); |
679 | if (error < 0) |
680 | afs_op_accumulate_error(op, error, abort_code: estate->abort_code); |
681 | } |
682 | rcu_read_unlock(); |
683 | |
684 | failed: |
685 | trace_afs_rotate(op, reason: afs_rotate_trace_failed, extra: 0); |
686 | op->flags |= AFS_OPERATION_STOP; |
687 | op->estate = NULL; |
688 | _leave(" = f [failed %d]" , afs_op_error(op)); |
689 | return false; |
690 | } |
691 | |
692 | /* |
693 | * Dump cursor state in the case of the error being EDESTADDRREQ. |
694 | */ |
695 | void afs_dump_edestaddrreq(const struct afs_operation *op) |
696 | { |
697 | static int count; |
698 | int i; |
699 | |
700 | if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) |
701 | return; |
702 | count++; |
703 | |
704 | rcu_read_lock(); |
705 | |
706 | pr_notice("EDESTADDR occurred\n" ); |
707 | pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n" , |
708 | op->file[0].cb_break_before, |
709 | op->file[1].cb_break_before, op->flags, op->cumul_error.error); |
710 | pr_notice("OP: ut=%lx ix=%d ni=%u\n" , |
711 | op->untried_servers, op->server_index, op->nr_iterations); |
712 | pr_notice("OP: call er=%d ac=%d r=%u\n" , |
713 | op->call_error, op->call_abort_code, op->call_responded); |
714 | |
715 | if (op->server_list) { |
716 | const struct afs_server_list *sl = op->server_list; |
717 | |
718 | pr_notice("FC: SL nr=%u vnov=%hx\n" , |
719 | sl->nr_servers, sl->vnovol_mask); |
720 | for (i = 0; i < sl->nr_servers; i++) { |
721 | const struct afs_server *s = sl->servers[i].server; |
722 | const struct afs_endpoint_state *e = |
723 | rcu_dereference(s->endpoint_state); |
724 | const struct afs_addr_list *a = e->addresses; |
725 | |
726 | pr_notice("FC: server fl=%lx av=%u %pU\n" , |
727 | s->flags, s->addr_version, &s->uuid); |
728 | pr_notice("FC: - pq=%x R=%lx F=%lx\n" , |
729 | e->probe_seq, e->responsive_set, e->failed_set); |
730 | if (a) { |
731 | pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n" , |
732 | a->version, |
733 | a->nr_ipv4, a->nr_addrs, a->max_addrs, |
734 | a->preferred); |
735 | if (a == e->addresses) |
736 | pr_notice("FC: - current\n" ); |
737 | } |
738 | } |
739 | } |
740 | |
741 | pr_notice("AC: t=%lx ax=%d\n" , op->addr_tried, op->addr_index); |
742 | rcu_read_unlock(); |
743 | } |
744 | |