1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Handle vlserver selection and rotation. |
3 | * |
4 | * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/signal.h> |
11 | #include "internal.h" |
12 | #include "afs_vl.h" |
13 | |
14 | /* |
15 | * Begin an operation on a volume location server. |
16 | */ |
17 | bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, |
18 | struct key *key) |
19 | { |
20 | static atomic_t debug_ids; |
21 | |
22 | memset(vc, 0, sizeof(*vc)); |
23 | vc->cell = cell; |
24 | vc->key = key; |
25 | vc->cumul_error.error = -EDESTADDRREQ; |
26 | vc->nr_iterations = -1; |
27 | |
28 | if (signal_pending(current)) { |
29 | vc->cumul_error.error = -EINTR; |
30 | vc->flags |= AFS_VL_CURSOR_STOP; |
31 | return false; |
32 | } |
33 | |
34 | vc->debug_id = atomic_inc_return(v: &debug_ids); |
35 | return true; |
36 | } |
37 | |
38 | /* |
39 | * Begin iteration through a server list, starting with the last used server if |
40 | * possible, or the last recorded good server if not. |
41 | */ |
42 | static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) |
43 | { |
44 | struct afs_cell *cell = vc->cell; |
45 | unsigned int dns_lookup_count; |
46 | |
47 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE || |
48 | cell->dns_expiry <= ktime_get_real_seconds()) { |
49 | dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); |
50 | set_bit(AFS_CELL_FL_DO_LOOKUP, addr: &cell->flags); |
51 | afs_queue_cell(cell, afs_cell_trace_get_queue_dns); |
52 | |
53 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { |
54 | if (wait_var_event_interruptible( |
55 | &cell->dns_lookup_count, |
56 | smp_load_acquire(&cell->dns_lookup_count) |
57 | != dns_lookup_count) < 0) { |
58 | vc->cumul_error.error = -ERESTARTSYS; |
59 | return false; |
60 | } |
61 | } |
62 | |
63 | /* Status load is ordered after lookup counter load */ |
64 | if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { |
65 | pr_warn("No record of cell %s\n" , cell->name); |
66 | vc->cumul_error.error = -ENOENT; |
67 | return false; |
68 | } |
69 | |
70 | if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { |
71 | vc->cumul_error.error = -EDESTADDRREQ; |
72 | return false; |
73 | } |
74 | } |
75 | |
76 | read_lock(&cell->vl_servers_lock); |
77 | vc->server_list = afs_get_vlserverlist( |
78 | rcu_dereference_protected(cell->vl_servers, |
79 | lockdep_is_held(&cell->vl_servers_lock))); |
80 | read_unlock(&cell->vl_servers_lock); |
81 | if (!vc->server_list->nr_servers) |
82 | return false; |
83 | |
84 | vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1; |
85 | vc->server_index = -1; |
86 | return true; |
87 | } |
88 | |
89 | /* |
90 | * Select the vlserver to use. May be called multiple times to rotate |
91 | * through the vlservers. |
92 | */ |
93 | bool afs_select_vlserver(struct afs_vl_cursor *vc) |
94 | { |
95 | struct afs_addr_list *alist = vc->alist; |
96 | struct afs_vlserver *vlserver; |
97 | unsigned long set, failed; |
98 | unsigned int rtt; |
99 | s32 abort_code = vc->call_abort_code; |
100 | int error = vc->call_error, i; |
101 | |
102 | vc->nr_iterations++; |
103 | |
104 | _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d" , |
105 | vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers, |
106 | vc->addr_index, vc->addr_tried, |
107 | error, abort_code); |
108 | |
109 | if (vc->flags & AFS_VL_CURSOR_STOP) { |
110 | _leave(" = f [stopped]" ); |
111 | return false; |
112 | } |
113 | |
114 | if (vc->nr_iterations == 0) |
115 | goto start; |
116 | |
117 | WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error); |
118 | |
119 | /* Evaluate the result of the previous operation, if there was one. */ |
120 | switch (error) { |
121 | default: |
122 | case 0: |
123 | /* Success or local failure. Stop. */ |
124 | vc->cumul_error.error = error; |
125 | vc->flags |= AFS_VL_CURSOR_STOP; |
126 | _leave(" = f [okay/local %d]" , vc->cumul_error.error); |
127 | return false; |
128 | |
129 | case -ECONNABORTED: |
130 | /* The far side rejected the operation on some grounds. This |
131 | * might involve the server being busy or the volume having been moved. |
132 | */ |
133 | switch (abort_code) { |
134 | case AFSVL_IO: |
135 | case AFSVL_BADVOLOPER: |
136 | case AFSVL_NOMEM: |
137 | /* The server went weird. */ |
138 | afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code); |
139 | //write_lock(&vc->cell->vl_servers_lock); |
140 | //vc->server_list->weird_mask |= 1 << vc->server_index; |
141 | //write_unlock(&vc->cell->vl_servers_lock); |
142 | goto next_server; |
143 | |
144 | default: |
145 | afs_prioritise_error(&vc->cumul_error, error, abort_code); |
146 | goto failed; |
147 | } |
148 | |
149 | case -ERFKILL: |
150 | case -EADDRNOTAVAIL: |
151 | case -ENETUNREACH: |
152 | case -EHOSTUNREACH: |
153 | case -EHOSTDOWN: |
154 | case -ECONNREFUSED: |
155 | case -ETIMEDOUT: |
156 | case -ETIME: |
157 | _debug("no conn %d" , error); |
158 | afs_prioritise_error(&vc->cumul_error, error, 0); |
159 | goto iterate_address; |
160 | |
161 | case -ECONNRESET: |
162 | _debug("call reset" ); |
163 | afs_prioritise_error(&vc->cumul_error, error, 0); |
164 | vc->flags |= AFS_VL_CURSOR_RETRY; |
165 | goto next_server; |
166 | |
167 | case -EOPNOTSUPP: |
168 | _debug("notsupp" ); |
169 | goto next_server; |
170 | } |
171 | |
172 | restart_from_beginning: |
173 | _debug("restart" ); |
174 | if (vc->call_responded && |
175 | vc->addr_index != vc->alist->preferred && |
176 | test_bit(alist->preferred, &vc->addr_tried)) |
177 | WRITE_ONCE(alist->preferred, vc->addr_index); |
178 | afs_put_addrlist(alist, reason: afs_alist_trace_put_vlrotate_restart); |
179 | alist = vc->alist = NULL; |
180 | |
181 | afs_put_vlserverlist(vc->cell->net, vc->server_list); |
182 | vc->server_list = NULL; |
183 | if (vc->flags & AFS_VL_CURSOR_RETRIED) |
184 | goto failed; |
185 | vc->flags |= AFS_VL_CURSOR_RETRIED; |
186 | start: |
187 | _debug("start" ); |
188 | ASSERTCMP(alist, ==, NULL); |
189 | |
190 | if (!afs_start_vl_iteration(vc)) |
191 | goto failed; |
192 | |
193 | error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); |
194 | if (error < 0) { |
195 | afs_prioritise_error(&vc->cumul_error, error, 0); |
196 | goto failed; |
197 | } |
198 | |
199 | pick_server: |
200 | _debug("pick [%lx]" , vc->untried_servers); |
201 | ASSERTCMP(alist, ==, NULL); |
202 | |
203 | error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers); |
204 | if (error < 0) { |
205 | afs_prioritise_error(&vc->cumul_error, error, 0); |
206 | goto failed; |
207 | } |
208 | |
209 | /* Pick the untried server with the lowest RTT. */ |
210 | vc->server_index = vc->server_list->preferred; |
211 | if (test_bit(vc->server_index, &vc->untried_servers)) |
212 | goto selected_server; |
213 | |
214 | vc->server_index = -1; |
215 | rtt = UINT_MAX; |
216 | for (i = 0; i < vc->server_list->nr_servers; i++) { |
217 | struct afs_vlserver *s = vc->server_list->servers[i].server; |
218 | |
219 | if (!test_bit(i, &vc->untried_servers) || |
220 | !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) |
221 | continue; |
222 | if (s->probe.rtt <= rtt) { |
223 | vc->server_index = i; |
224 | rtt = s->probe.rtt; |
225 | } |
226 | } |
227 | |
228 | if (vc->server_index == -1) |
229 | goto no_more_servers; |
230 | |
231 | selected_server: |
232 | _debug("use %d" , vc->server_index); |
233 | __clear_bit(vc->server_index, &vc->untried_servers); |
234 | |
235 | /* We're starting on a different vlserver from the list. We need to |
236 | * check it, find its address list and probe its capabilities before we |
237 | * use it. |
238 | */ |
239 | vlserver = vc->server_list->servers[vc->server_index].server; |
240 | vc->server = vlserver; |
241 | |
242 | _debug("USING VLSERVER: %s" , vlserver->name); |
243 | |
244 | read_lock(&vlserver->lock); |
245 | alist = rcu_dereference_protected(vlserver->addresses, |
246 | lockdep_is_held(&vlserver->lock)); |
247 | vc->alist = afs_get_addrlist(alist, reason: afs_alist_trace_get_vlrotate_set); |
248 | read_unlock(&vlserver->lock); |
249 | |
250 | vc->addr_tried = 0; |
251 | vc->addr_index = -1; |
252 | |
253 | iterate_address: |
254 | /* Iterate over the current server's address list to try and find an |
255 | * address on which it will respond to us. |
256 | */ |
257 | set = READ_ONCE(alist->responded); |
258 | failed = READ_ONCE(alist->probe_failed); |
259 | vc->addr_index = READ_ONCE(alist->preferred); |
260 | |
261 | _debug("%lx-%lx-%lx,%d" , set, failed, vc->addr_tried, vc->addr_index); |
262 | |
263 | set &= ~(failed | vc->addr_tried); |
264 | |
265 | if (!set) |
266 | goto next_server; |
267 | |
268 | if (!test_bit(vc->addr_index, &set)) |
269 | vc->addr_index = __ffs(set); |
270 | |
271 | set_bit(nr: vc->addr_index, addr: &vc->addr_tried); |
272 | vc->alist = alist; |
273 | |
274 | _debug("VL address %d/%d" , vc->addr_index, alist->nr_addrs); |
275 | |
276 | vc->call_responded = false; |
277 | _leave(" = t %pISpc" , rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer)); |
278 | return true; |
279 | |
280 | next_server: |
281 | _debug("next" ); |
282 | ASSERT(alist); |
283 | if (vc->call_responded && |
284 | vc->addr_index != alist->preferred && |
285 | test_bit(alist->preferred, &vc->addr_tried)) |
286 | WRITE_ONCE(alist->preferred, vc->addr_index); |
287 | afs_put_addrlist(alist, reason: afs_alist_trace_put_vlrotate_next); |
288 | alist = vc->alist = NULL; |
289 | goto pick_server; |
290 | |
291 | no_more_servers: |
292 | /* That's all the servers poked to no good effect. Try again if some |
293 | * of them were busy. |
294 | */ |
295 | if (vc->flags & AFS_VL_CURSOR_RETRY) |
296 | goto restart_from_beginning; |
297 | |
298 | for (i = 0; i < vc->server_list->nr_servers; i++) { |
299 | struct afs_vlserver *s = vc->server_list->servers[i].server; |
300 | |
301 | if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) |
302 | vc->cumul_error.responded = true; |
303 | afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error), |
304 | s->probe.abort_code); |
305 | } |
306 | |
307 | failed: |
308 | if (alist) { |
309 | if (vc->call_responded && |
310 | vc->addr_index != alist->preferred && |
311 | test_bit(alist->preferred, &vc->addr_tried)) |
312 | WRITE_ONCE(alist->preferred, vc->addr_index); |
313 | afs_put_addrlist(alist, reason: afs_alist_trace_put_vlrotate_fail); |
314 | alist = vc->alist = NULL; |
315 | } |
316 | vc->flags |= AFS_VL_CURSOR_STOP; |
317 | _leave(" = f [failed %d]" , vc->cumul_error.error); |
318 | return false; |
319 | } |
320 | |
321 | /* |
322 | * Dump cursor state in the case of the error being EDESTADDRREQ. |
323 | */ |
324 | static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) |
325 | { |
326 | struct afs_cell *cell = vc->cell; |
327 | static int count; |
328 | int i; |
329 | |
330 | if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) |
331 | return; |
332 | count++; |
333 | |
334 | rcu_read_lock(); |
335 | pr_notice("EDESTADDR occurred\n" ); |
336 | pr_notice("CELL: %s err=%d\n" , cell->name, cell->error); |
337 | pr_notice("DNS: src=%u st=%u lc=%x\n" , |
338 | cell->dns_source, cell->dns_status, cell->dns_lookup_count); |
339 | pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n" , |
340 | vc->untried_servers, vc->server_index, vc->nr_iterations, |
341 | vc->flags, vc->cumul_error.error); |
342 | pr_notice("VC: call er=%d ac=%d r=%u\n" , |
343 | vc->call_error, vc->call_abort_code, vc->call_responded); |
344 | |
345 | if (vc->server_list) { |
346 | const struct afs_vlserver_list *sl = vc->server_list; |
347 | pr_notice("VC: SL nr=%u ix=%u\n" , |
348 | sl->nr_servers, sl->index); |
349 | for (i = 0; i < sl->nr_servers; i++) { |
350 | const struct afs_vlserver *s = sl->servers[i].server; |
351 | pr_notice("VC: server %s+%hu fl=%lx E=%hd\n" , |
352 | s->name, s->port, s->flags, s->probe.error); |
353 | if (s->addresses) { |
354 | const struct afs_addr_list *a = |
355 | rcu_dereference(s->addresses); |
356 | pr_notice("VC: - nr=%u/%u/%u pf=%u\n" , |
357 | a->nr_ipv4, a->nr_addrs, a->max_addrs, |
358 | a->preferred); |
359 | pr_notice("VC: - R=%lx F=%lx\n" , |
360 | a->responded, a->probe_failed); |
361 | if (a == vc->alist) |
362 | pr_notice("VC: - current\n" ); |
363 | } |
364 | } |
365 | } |
366 | |
367 | pr_notice("AC: t=%lx ax=%u\n" , vc->addr_tried, vc->addr_index); |
368 | rcu_read_unlock(); |
369 | } |
370 | |
371 | /* |
372 | * Tidy up a volume location server cursor and unlock the vnode. |
373 | */ |
374 | int afs_end_vlserver_operation(struct afs_vl_cursor *vc) |
375 | { |
376 | struct afs_net *net = vc->cell->net; |
377 | |
378 | _enter("VC=%x+%x" , vc->debug_id, vc->nr_iterations); |
379 | |
380 | switch (vc->cumul_error.error) { |
381 | case -EDESTADDRREQ: |
382 | case -EADDRNOTAVAIL: |
383 | case -ENETUNREACH: |
384 | case -EHOSTUNREACH: |
385 | afs_vl_dump_edestaddrreq(vc); |
386 | break; |
387 | } |
388 | |
389 | if (vc->alist) { |
390 | if (vc->call_responded && |
391 | vc->addr_index != vc->alist->preferred && |
392 | test_bit(vc->alist->preferred, &vc->addr_tried)) |
393 | WRITE_ONCE(vc->alist->preferred, vc->addr_index); |
394 | afs_put_addrlist(alist: vc->alist, reason: afs_alist_trace_put_vlrotate_end); |
395 | vc->alist = NULL; |
396 | } |
397 | afs_put_vlserverlist(net, vc->server_list); |
398 | return vc->cumul_error.error; |
399 | } |
400 | |