1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/net/sunrpc/svc_xprt.c |
4 | * |
5 | * Author: Tom Tucker <tom@opengridcomputing.com> |
6 | */ |
7 | |
8 | #include <linux/sched.h> |
9 | #include <linux/sched/mm.h> |
10 | #include <linux/errno.h> |
11 | #include <linux/freezer.h> |
12 | #include <linux/slab.h> |
13 | #include <net/sock.h> |
14 | #include <linux/sunrpc/addr.h> |
15 | #include <linux/sunrpc/stats.h> |
16 | #include <linux/sunrpc/svc_xprt.h> |
17 | #include <linux/sunrpc/svcsock.h> |
18 | #include <linux/sunrpc/xprt.h> |
19 | #include <linux/sunrpc/bc_xprt.h> |
20 | #include <linux/module.h> |
21 | #include <linux/netdevice.h> |
22 | #include <trace/events/sunrpc.h> |
23 | |
24 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
25 | |
26 | static unsigned int svc_rpc_per_connection_limit __read_mostly; |
27 | module_param(svc_rpc_per_connection_limit, uint, 0644); |
28 | |
29 | |
30 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); |
31 | static int svc_deferred_recv(struct svc_rqst *rqstp); |
32 | static struct cache_deferred_req *svc_defer(struct cache_req *req); |
33 | static void svc_age_temp_xprts(struct timer_list *t); |
34 | static void svc_delete_xprt(struct svc_xprt *xprt); |
35 | |
36 | /* apparently the "standard" is that clients close |
37 | * idle connections after 5 minutes, servers after |
38 | * 6 minutes |
39 | * http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf |
40 | */ |
41 | static int svc_conn_age_period = 6*60; |
42 | |
43 | /* List of registered transport classes */ |
44 | static DEFINE_SPINLOCK(svc_xprt_class_lock); |
45 | static LIST_HEAD(svc_xprt_class_list); |
46 | |
47 | /* SMP locking strategy: |
48 | * |
49 | * svc_pool->sp_lock protects most of the fields of that pool. |
50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. |
51 | * when both need to be taken (rare), svc_serv->sv_lock is first. |
52 | * The "service mutex" protects svc_serv->sv_nrthread. |
53 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list |
54 | * and the ->sk_info_authunix cache. |
55 | * |
56 | * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being |
57 | * enqueued multiply. During normal transport processing this bit |
58 | * is set by svc_xprt_enqueue and cleared by svc_xprt_received. |
59 | * Providers should not manipulate this bit directly. |
60 | * |
61 | * Some flags can be set to certain values at any time |
62 | * providing that certain rules are followed: |
63 | * |
64 | * XPT_CONN, XPT_DATA: |
65 | * - Can be set or cleared at any time. |
66 | * - After a set, svc_xprt_enqueue must be called to enqueue |
67 | * the transport for processing. |
68 | * - After a clear, the transport must be read/accepted. |
69 | * If this succeeds, it must be set again. |
70 | * XPT_CLOSE: |
71 | * - Can set at any time. It is never cleared. |
72 | * XPT_DEAD: |
73 | * - Can only be set while XPT_BUSY is held which ensures |
74 | * that no other thread will be using the transport or will |
75 | * try to set XPT_DEAD. |
76 | */ |
77 | |
78 | /** |
79 | * svc_reg_xprt_class - Register a server-side RPC transport class |
80 | * @xcl: New transport class to be registered |
81 | * |
82 | * Returns zero on success; otherwise a negative errno is returned. |
83 | */ |
84 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) |
85 | { |
86 | struct svc_xprt_class *cl; |
87 | int res = -EEXIST; |
88 | |
89 | INIT_LIST_HEAD(list: &xcl->xcl_list); |
90 | spin_lock(lock: &svc_xprt_class_lock); |
91 | /* Make sure there isn't already a class with the same name */ |
92 | list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { |
93 | if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) |
94 | goto out; |
95 | } |
96 | list_add_tail(new: &xcl->xcl_list, head: &svc_xprt_class_list); |
97 | res = 0; |
98 | out: |
99 | spin_unlock(lock: &svc_xprt_class_lock); |
100 | return res; |
101 | } |
102 | EXPORT_SYMBOL_GPL(svc_reg_xprt_class); |
103 | |
104 | /** |
105 | * svc_unreg_xprt_class - Unregister a server-side RPC transport class |
106 | * @xcl: Transport class to be unregistered |
107 | * |
108 | */ |
109 | void svc_unreg_xprt_class(struct svc_xprt_class *xcl) |
110 | { |
111 | spin_lock(lock: &svc_xprt_class_lock); |
112 | list_del_init(entry: &xcl->xcl_list); |
113 | spin_unlock(lock: &svc_xprt_class_lock); |
114 | } |
115 | EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); |
116 | |
117 | /** |
118 | * svc_print_xprts - Format the transport list for printing |
119 | * @buf: target buffer for formatted address |
120 | * @maxlen: length of target buffer |
121 | * |
122 | * Fills in @buf with a string containing a list of transport names, each name |
123 | * terminated with '\n'. If the buffer is too small, some entries may be |
124 | * missing, but it is guaranteed that all lines in the output buffer are |
125 | * complete. |
126 | * |
127 | * Returns positive length of the filled-in string. |
128 | */ |
129 | int svc_print_xprts(char *buf, int maxlen) |
130 | { |
131 | struct svc_xprt_class *xcl; |
132 | char tmpstr[80]; |
133 | int len = 0; |
134 | buf[0] = '\0'; |
135 | |
136 | spin_lock(lock: &svc_xprt_class_lock); |
137 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { |
138 | int slen; |
139 | |
140 | slen = snprintf(buf: tmpstr, size: sizeof(tmpstr), fmt: "%s %d\n" , |
141 | xcl->xcl_name, xcl->xcl_max_payload); |
142 | if (slen >= sizeof(tmpstr) || len + slen >= maxlen) |
143 | break; |
144 | len += slen; |
145 | strcat(p: buf, q: tmpstr); |
146 | } |
147 | spin_unlock(lock: &svc_xprt_class_lock); |
148 | |
149 | return len; |
150 | } |
151 | |
152 | /** |
153 | * svc_xprt_deferred_close - Close a transport |
154 | * @xprt: transport instance |
155 | * |
156 | * Used in contexts that need to defer the work of shutting down |
157 | * the transport to an nfsd thread. |
158 | */ |
159 | void svc_xprt_deferred_close(struct svc_xprt *xprt) |
160 | { |
161 | if (!test_and_set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags)) |
162 | svc_xprt_enqueue(xprt); |
163 | } |
164 | EXPORT_SYMBOL_GPL(svc_xprt_deferred_close); |
165 | |
166 | static void svc_xprt_free(struct kref *kref) |
167 | { |
168 | struct svc_xprt *xprt = |
169 | container_of(kref, struct svc_xprt, xpt_ref); |
170 | struct module *owner = xprt->xpt_class->xcl_owner; |
171 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) |
172 | svcauth_unix_info_release(xpt: xprt); |
173 | put_cred(cred: xprt->xpt_cred); |
174 | put_net_track(net: xprt->xpt_net, tracker: &xprt->ns_tracker); |
175 | /* See comment on corresponding get in xs_setup_bc_tcp(): */ |
176 | if (xprt->xpt_bc_xprt) |
177 | xprt_put(xprt: xprt->xpt_bc_xprt); |
178 | if (xprt->xpt_bc_xps) |
179 | xprt_switch_put(xps: xprt->xpt_bc_xps); |
180 | trace_svc_xprt_free(xprt); |
181 | xprt->xpt_ops->xpo_free(xprt); |
182 | module_put(module: owner); |
183 | } |
184 | |
185 | void svc_xprt_put(struct svc_xprt *xprt) |
186 | { |
187 | kref_put(kref: &xprt->xpt_ref, release: svc_xprt_free); |
188 | } |
189 | EXPORT_SYMBOL_GPL(svc_xprt_put); |
190 | |
191 | /* |
192 | * Called by transport drivers to initialize the transport independent |
193 | * portion of the transport instance. |
194 | */ |
195 | void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, |
196 | struct svc_xprt *xprt, struct svc_serv *serv) |
197 | { |
198 | memset(xprt, 0, sizeof(*xprt)); |
199 | xprt->xpt_class = xcl; |
200 | xprt->xpt_ops = xcl->xcl_ops; |
201 | kref_init(kref: &xprt->xpt_ref); |
202 | xprt->xpt_server = serv; |
203 | INIT_LIST_HEAD(list: &xprt->xpt_list); |
204 | INIT_LIST_HEAD(list: &xprt->xpt_deferred); |
205 | INIT_LIST_HEAD(list: &xprt->xpt_users); |
206 | mutex_init(&xprt->xpt_mutex); |
207 | spin_lock_init(&xprt->xpt_lock); |
208 | set_bit(nr: XPT_BUSY, addr: &xprt->xpt_flags); |
209 | xprt->xpt_net = get_net_track(net, tracker: &xprt->ns_tracker, GFP_ATOMIC); |
210 | strcpy(p: xprt->xpt_remotebuf, q: "uninitialized" ); |
211 | } |
212 | EXPORT_SYMBOL_GPL(svc_xprt_init); |
213 | |
214 | static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, |
215 | struct svc_serv *serv, |
216 | struct net *net, |
217 | const int family, |
218 | const unsigned short port, |
219 | int flags) |
220 | { |
221 | struct sockaddr_in sin = { |
222 | .sin_family = AF_INET, |
223 | .sin_addr.s_addr = htonl(INADDR_ANY), |
224 | .sin_port = htons(port), |
225 | }; |
226 | #if IS_ENABLED(CONFIG_IPV6) |
227 | struct sockaddr_in6 sin6 = { |
228 | .sin6_family = AF_INET6, |
229 | .sin6_addr = IN6ADDR_ANY_INIT, |
230 | .sin6_port = htons(port), |
231 | }; |
232 | #endif |
233 | struct svc_xprt *xprt; |
234 | struct sockaddr *sap; |
235 | size_t len; |
236 | |
237 | switch (family) { |
238 | case PF_INET: |
239 | sap = (struct sockaddr *)&sin; |
240 | len = sizeof(sin); |
241 | break; |
242 | #if IS_ENABLED(CONFIG_IPV6) |
243 | case PF_INET6: |
244 | sap = (struct sockaddr *)&sin6; |
245 | len = sizeof(sin6); |
246 | break; |
247 | #endif |
248 | default: |
249 | return ERR_PTR(error: -EAFNOSUPPORT); |
250 | } |
251 | |
252 | xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); |
253 | if (IS_ERR(ptr: xprt)) |
254 | trace_svc_xprt_create_err(program: serv->sv_program->pg_name, |
255 | protocol: xcl->xcl_name, sap, salen: len, xprt); |
256 | return xprt; |
257 | } |
258 | |
259 | /** |
260 | * svc_xprt_received - start next receiver thread |
261 | * @xprt: controlling transport |
262 | * |
263 | * The caller must hold the XPT_BUSY bit and must |
264 | * not thereafter touch transport data. |
265 | * |
266 | * Note: XPT_DATA only gets cleared when a read-attempt finds no (or |
267 | * insufficient) data. |
268 | */ |
269 | void svc_xprt_received(struct svc_xprt *xprt) |
270 | { |
271 | if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { |
272 | WARN_ONCE(1, "xprt=0x%p already busy!" , xprt); |
273 | return; |
274 | } |
275 | |
276 | /* As soon as we clear busy, the xprt could be closed and |
277 | * 'put', so we need a reference to call svc_xprt_enqueue with: |
278 | */ |
279 | svc_xprt_get(xprt); |
280 | smp_mb__before_atomic(); |
281 | clear_bit(nr: XPT_BUSY, addr: &xprt->xpt_flags); |
282 | svc_xprt_enqueue(xprt); |
283 | svc_xprt_put(xprt); |
284 | } |
285 | EXPORT_SYMBOL_GPL(svc_xprt_received); |
286 | |
287 | void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) |
288 | { |
289 | clear_bit(nr: XPT_TEMP, addr: &new->xpt_flags); |
290 | spin_lock_bh(lock: &serv->sv_lock); |
291 | list_add(new: &new->xpt_list, head: &serv->sv_permsocks); |
292 | spin_unlock_bh(lock: &serv->sv_lock); |
293 | svc_xprt_received(new); |
294 | } |
295 | |
296 | static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, |
297 | struct net *net, const int family, |
298 | const unsigned short port, int flags, |
299 | const struct cred *cred) |
300 | { |
301 | struct svc_xprt_class *xcl; |
302 | |
303 | spin_lock(lock: &svc_xprt_class_lock); |
304 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { |
305 | struct svc_xprt *newxprt; |
306 | unsigned short newport; |
307 | |
308 | if (strcmp(xprt_name, xcl->xcl_name)) |
309 | continue; |
310 | |
311 | if (!try_module_get(module: xcl->xcl_owner)) |
312 | goto err; |
313 | |
314 | spin_unlock(lock: &svc_xprt_class_lock); |
315 | newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags); |
316 | if (IS_ERR(ptr: newxprt)) { |
317 | module_put(module: xcl->xcl_owner); |
318 | return PTR_ERR(ptr: newxprt); |
319 | } |
320 | newxprt->xpt_cred = get_cred(cred); |
321 | svc_add_new_perm_xprt(serv, new: newxprt); |
322 | newport = svc_xprt_local_port(xprt: newxprt); |
323 | return newport; |
324 | } |
325 | err: |
326 | spin_unlock(lock: &svc_xprt_class_lock); |
327 | /* This errno is exposed to user space. Provide a reasonable |
328 | * perror msg for a bad transport. */ |
329 | return -EPROTONOSUPPORT; |
330 | } |
331 | |
332 | /** |
333 | * svc_xprt_create - Add a new listener to @serv |
334 | * @serv: target RPC service |
335 | * @xprt_name: transport class name |
336 | * @net: network namespace |
337 | * @family: network address family |
338 | * @port: listener port |
339 | * @flags: SVC_SOCK flags |
340 | * @cred: credential to bind to this transport |
341 | * |
342 | * Return values: |
343 | * %0: New listener added successfully |
344 | * %-EPROTONOSUPPORT: Requested transport type not supported |
345 | */ |
346 | int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, |
347 | struct net *net, const int family, |
348 | const unsigned short port, int flags, |
349 | const struct cred *cred) |
350 | { |
351 | int err; |
352 | |
353 | err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); |
354 | if (err == -EPROTONOSUPPORT) { |
355 | request_module("svc%s" , xprt_name); |
356 | err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); |
357 | } |
358 | return err; |
359 | } |
360 | EXPORT_SYMBOL_GPL(svc_xprt_create); |
361 | |
362 | /* |
363 | * Copy the local and remote xprt addresses to the rqstp structure |
364 | */ |
365 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) |
366 | { |
367 | memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); |
368 | rqstp->rq_addrlen = xprt->xpt_remotelen; |
369 | |
370 | /* |
371 | * Destination address in request is needed for binding the |
372 | * source address in RPC replies/callbacks later. |
373 | */ |
374 | memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); |
375 | rqstp->rq_daddrlen = xprt->xpt_locallen; |
376 | } |
377 | EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); |
378 | |
379 | /** |
380 | * svc_print_addr - Format rq_addr field for printing |
381 | * @rqstp: svc_rqst struct containing address to print |
382 | * @buf: target buffer for formatted address |
383 | * @len: length of target buffer |
384 | * |
385 | */ |
386 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) |
387 | { |
388 | return __svc_print_addr(addr: svc_addr(rqst: rqstp), buf, len); |
389 | } |
390 | EXPORT_SYMBOL_GPL(svc_print_addr); |
391 | |
392 | static bool svc_xprt_slots_in_range(struct svc_xprt *xprt) |
393 | { |
394 | unsigned int limit = svc_rpc_per_connection_limit; |
395 | int nrqsts = atomic_read(v: &xprt->xpt_nr_rqsts); |
396 | |
397 | return limit == 0 || (nrqsts >= 0 && nrqsts < limit); |
398 | } |
399 | |
400 | static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt) |
401 | { |
402 | if (!test_bit(RQ_DATA, &rqstp->rq_flags)) { |
403 | if (!svc_xprt_slots_in_range(xprt)) |
404 | return false; |
405 | atomic_inc(v: &xprt->xpt_nr_rqsts); |
406 | set_bit(nr: RQ_DATA, addr: &rqstp->rq_flags); |
407 | } |
408 | return true; |
409 | } |
410 | |
411 | static void svc_xprt_release_slot(struct svc_rqst *rqstp) |
412 | { |
413 | struct svc_xprt *xprt = rqstp->rq_xprt; |
414 | if (test_and_clear_bit(nr: RQ_DATA, addr: &rqstp->rq_flags)) { |
415 | atomic_dec(v: &xprt->xpt_nr_rqsts); |
416 | smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ |
417 | svc_xprt_enqueue(xprt); |
418 | } |
419 | } |
420 | |
421 | static bool svc_xprt_ready(struct svc_xprt *xprt) |
422 | { |
423 | unsigned long xpt_flags; |
424 | |
425 | /* |
426 | * If another cpu has recently updated xpt_flags, |
427 | * sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to |
428 | * know about it; otherwise it's possible that both that cpu and |
429 | * this one could call svc_xprt_enqueue() without either |
430 | * svc_xprt_enqueue() recognizing that the conditions below |
431 | * are satisfied, and we could stall indefinitely: |
432 | */ |
433 | smp_rmb(); |
434 | xpt_flags = READ_ONCE(xprt->xpt_flags); |
435 | |
436 | trace_svc_xprt_enqueue(xprt, flags: xpt_flags); |
437 | if (xpt_flags & BIT(XPT_BUSY)) |
438 | return false; |
439 | if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) |
440 | return true; |
441 | if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { |
442 | if (xprt->xpt_ops->xpo_has_wspace(xprt) && |
443 | svc_xprt_slots_in_range(xprt)) |
444 | return true; |
445 | trace_svc_xprt_no_write_space(xprt); |
446 | return false; |
447 | } |
448 | return false; |
449 | } |
450 | |
451 | /** |
452 | * svc_xprt_enqueue - Queue a transport on an idle nfsd thread |
453 | * @xprt: transport with data pending |
454 | * |
455 | */ |
456 | void svc_xprt_enqueue(struct svc_xprt *xprt) |
457 | { |
458 | struct svc_pool *pool; |
459 | |
460 | if (!svc_xprt_ready(xprt)) |
461 | return; |
462 | |
463 | /* Mark transport as busy. It will remain in this state until |
464 | * the provider calls svc_xprt_received. We update XPT_BUSY |
465 | * atomically because it also guards against trying to enqueue |
466 | * the transport twice. |
467 | */ |
468 | if (test_and_set_bit(nr: XPT_BUSY, addr: &xprt->xpt_flags)) |
469 | return; |
470 | |
471 | pool = svc_pool_for_cpu(serv: xprt->xpt_server); |
472 | |
473 | percpu_counter_inc(fbc: &pool->sp_sockets_queued); |
474 | lwq_enqueue(n: &xprt->xpt_ready, q: &pool->sp_xprts); |
475 | |
476 | svc_pool_wake_idle_thread(pool); |
477 | } |
478 | EXPORT_SYMBOL_GPL(svc_xprt_enqueue); |
479 | |
480 | /* |
481 | * Dequeue the first transport, if there is one. |
482 | */ |
483 | static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) |
484 | { |
485 | struct svc_xprt *xprt = NULL; |
486 | |
487 | xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready); |
488 | if (xprt) |
489 | svc_xprt_get(xprt); |
490 | return xprt; |
491 | } |
492 | |
493 | /** |
494 | * svc_reserve - change the space reserved for the reply to a request. |
495 | * @rqstp: The request in question |
496 | * @space: new max space to reserve |
497 | * |
498 | * Each request reserves some space on the output queue of the transport |
499 | * to make sure the reply fits. This function reduces that reserved |
500 | * space to be the amount of space used already, plus @space. |
501 | * |
502 | */ |
503 | void svc_reserve(struct svc_rqst *rqstp, int space) |
504 | { |
505 | struct svc_xprt *xprt = rqstp->rq_xprt; |
506 | |
507 | space += rqstp->rq_res.head[0].iov_len; |
508 | |
509 | if (xprt && space < rqstp->rq_reserved) { |
510 | atomic_sub(i: (rqstp->rq_reserved - space), v: &xprt->xpt_reserved); |
511 | rqstp->rq_reserved = space; |
512 | smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ |
513 | svc_xprt_enqueue(xprt); |
514 | } |
515 | } |
516 | EXPORT_SYMBOL_GPL(svc_reserve); |
517 | |
518 | static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr) |
519 | { |
520 | if (!dr) |
521 | return; |
522 | |
523 | xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt); |
524 | kfree(objp: dr); |
525 | } |
526 | |
527 | static void svc_xprt_release(struct svc_rqst *rqstp) |
528 | { |
529 | struct svc_xprt *xprt = rqstp->rq_xprt; |
530 | |
531 | xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt); |
532 | rqstp->rq_xprt_ctxt = NULL; |
533 | |
534 | free_deferred(xprt, dr: rqstp->rq_deferred); |
535 | rqstp->rq_deferred = NULL; |
536 | |
537 | svc_rqst_release_pages(rqstp); |
538 | rqstp->rq_res.page_len = 0; |
539 | rqstp->rq_res.page_base = 0; |
540 | |
541 | /* Reset response buffer and release |
542 | * the reservation. |
543 | * But first, check that enough space was reserved |
544 | * for the reply, otherwise we have a bug! |
545 | */ |
546 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) |
547 | printk(KERN_ERR "RPC request reserved %d but used %d\n" , |
548 | rqstp->rq_reserved, |
549 | rqstp->rq_res.len); |
550 | |
551 | rqstp->rq_res.head[0].iov_len = 0; |
552 | svc_reserve(rqstp, 0); |
553 | svc_xprt_release_slot(rqstp); |
554 | rqstp->rq_xprt = NULL; |
555 | svc_xprt_put(xprt); |
556 | } |
557 | |
558 | /** |
559 | * svc_wake_up - Wake up a service thread for non-transport work |
560 | * @serv: RPC service |
561 | * |
562 | * Some svc_serv's will have occasional work to do, even when a xprt is not |
563 | * waiting to be serviced. This function is there to "kick" a task in one of |
564 | * those services so that it can wake up and do that work. Note that we only |
565 | * bother with pool 0 as we don't need to wake up more than one thread for |
566 | * this purpose. |
567 | */ |
568 | void svc_wake_up(struct svc_serv *serv) |
569 | { |
570 | struct svc_pool *pool = &serv->sv_pools[0]; |
571 | |
572 | set_bit(nr: SP_TASK_PENDING, addr: &pool->sp_flags); |
573 | svc_pool_wake_idle_thread(pool); |
574 | } |
575 | EXPORT_SYMBOL_GPL(svc_wake_up); |
576 | |
577 | int svc_port_is_privileged(struct sockaddr *sin) |
578 | { |
579 | switch (sin->sa_family) { |
580 | case AF_INET: |
581 | return ntohs(((struct sockaddr_in *)sin)->sin_port) |
582 | < PROT_SOCK; |
583 | case AF_INET6: |
584 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) |
585 | < PROT_SOCK; |
586 | default: |
587 | return 0; |
588 | } |
589 | } |
590 | |
591 | /* |
592 | * Make sure that we don't have too many active connections. If we have, |
593 | * something must be dropped. It's not clear what will happen if we allow |
594 | * "too many" connections, but when dealing with network-facing software, |
595 | * we have to code defensively. Here we do that by imposing hard limits. |
596 | * |
597 | * There's no point in trying to do random drop here for DoS |
598 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An |
599 | * attacker can easily beat that. |
600 | * |
601 | * The only somewhat efficient mechanism would be if drop old |
602 | * connections from the same IP first. But right now we don't even |
603 | * record the client IP in svc_sock. |
604 | * |
605 | * single-threaded services that expect a lot of clients will probably |
606 | * need to set sv_maxconn to override the default value which is based |
607 | * on the number of threads |
608 | */ |
609 | static void svc_check_conn_limits(struct svc_serv *serv) |
610 | { |
611 | unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : |
612 | (serv->sv_nrthreads+3) * 20; |
613 | |
614 | if (serv->sv_tmpcnt > limit) { |
615 | struct svc_xprt *xprt = NULL; |
616 | spin_lock_bh(lock: &serv->sv_lock); |
617 | if (!list_empty(head: &serv->sv_tempsocks)) { |
618 | /* Try to help the admin */ |
619 | net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n" , |
620 | serv->sv_name, serv->sv_maxconn ? |
621 | "max number of connections" : |
622 | "number of threads" ); |
623 | /* |
624 | * Always select the oldest connection. It's not fair, |
625 | * but so is life |
626 | */ |
627 | xprt = list_entry(serv->sv_tempsocks.prev, |
628 | struct svc_xprt, |
629 | xpt_list); |
630 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
631 | svc_xprt_get(xprt); |
632 | } |
633 | spin_unlock_bh(lock: &serv->sv_lock); |
634 | |
635 | if (xprt) { |
636 | svc_xprt_enqueue(xprt); |
637 | svc_xprt_put(xprt); |
638 | } |
639 | } |
640 | } |
641 | |
642 | static bool svc_alloc_arg(struct svc_rqst *rqstp) |
643 | { |
644 | struct svc_serv *serv = rqstp->rq_server; |
645 | struct xdr_buf *arg = &rqstp->rq_arg; |
646 | unsigned long pages, filled, ret; |
647 | |
648 | pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; |
649 | if (pages > RPCSVC_MAXPAGES) { |
650 | pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n" , |
651 | pages, RPCSVC_MAXPAGES); |
652 | /* use as many pages as possible */ |
653 | pages = RPCSVC_MAXPAGES; |
654 | } |
655 | |
656 | for (filled = 0; filled < pages; filled = ret) { |
657 | ret = alloc_pages_bulk_array(GFP_KERNEL, nr_pages: pages, |
658 | page_array: rqstp->rq_pages); |
659 | if (ret > filled) |
660 | /* Made progress, don't sleep yet */ |
661 | continue; |
662 | |
663 | set_current_state(TASK_IDLE); |
664 | if (svc_thread_should_stop(rqstp)) { |
665 | set_current_state(TASK_RUNNING); |
666 | return false; |
667 | } |
668 | trace_svc_alloc_arg_err(requested: pages, allocated: ret); |
669 | memalloc_retry_wait(GFP_KERNEL); |
670 | } |
671 | rqstp->rq_page_end = &rqstp->rq_pages[pages]; |
672 | rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ |
673 | |
674 | /* Make arg->head point to first page and arg->pages point to rest */ |
675 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); |
676 | arg->head[0].iov_len = PAGE_SIZE; |
677 | arg->pages = rqstp->rq_pages + 1; |
678 | arg->page_base = 0; |
679 | /* save at least one page for response */ |
680 | arg->page_len = (pages-2)*PAGE_SIZE; |
681 | arg->len = (pages-1)*PAGE_SIZE; |
682 | arg->tail[0].iov_len = 0; |
683 | |
684 | rqstp->rq_xid = xdr_zero; |
685 | return true; |
686 | } |
687 | |
688 | static bool |
689 | svc_thread_should_sleep(struct svc_rqst *rqstp) |
690 | { |
691 | struct svc_pool *pool = rqstp->rq_pool; |
692 | |
693 | /* did someone call svc_wake_up? */ |
694 | if (test_bit(SP_TASK_PENDING, &pool->sp_flags)) |
695 | return false; |
696 | |
697 | /* was a socket queued? */ |
698 | if (!lwq_empty(q: &pool->sp_xprts)) |
699 | return false; |
700 | |
701 | /* are we shutting down? */ |
702 | if (svc_thread_should_stop(rqstp)) |
703 | return false; |
704 | |
705 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
706 | if (svc_is_backchannel(rqstp)) { |
707 | if (!lwq_empty(q: &rqstp->rq_server->sv_cb_list)) |
708 | return false; |
709 | } |
710 | #endif |
711 | |
712 | return true; |
713 | } |
714 | |
715 | static void svc_thread_wait_for_work(struct svc_rqst *rqstp) |
716 | { |
717 | struct svc_pool *pool = rqstp->rq_pool; |
718 | |
719 | if (svc_thread_should_sleep(rqstp)) { |
720 | set_current_state(TASK_IDLE | TASK_FREEZABLE); |
721 | llist_add(new: &rqstp->rq_idle, head: &pool->sp_idle_threads); |
722 | if (likely(svc_thread_should_sleep(rqstp))) |
723 | schedule(); |
724 | |
725 | while (!llist_del_first_this(head: &pool->sp_idle_threads, |
726 | this: &rqstp->rq_idle)) { |
727 | /* Work just became available. This thread can only |
728 | * handle it after removing rqstp from the idle |
729 | * list. If that attempt failed, some other thread |
730 | * must have queued itself after finding no |
731 | * work to do, so that thread has taken responsibly |
732 | * for this new work. This thread can safely sleep |
733 | * until woken again. |
734 | */ |
735 | schedule(); |
736 | set_current_state(TASK_IDLE | TASK_FREEZABLE); |
737 | } |
738 | __set_current_state(TASK_RUNNING); |
739 | } else { |
740 | cond_resched(); |
741 | } |
742 | try_to_freeze(); |
743 | } |
744 | |
745 | static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) |
746 | { |
747 | spin_lock_bh(lock: &serv->sv_lock); |
748 | set_bit(nr: XPT_TEMP, addr: &newxpt->xpt_flags); |
749 | list_add(new: &newxpt->xpt_list, head: &serv->sv_tempsocks); |
750 | serv->sv_tmpcnt++; |
751 | if (serv->sv_temptimer.function == NULL) { |
752 | /* setup timer to age temp transports */ |
753 | serv->sv_temptimer.function = svc_age_temp_xprts; |
754 | mod_timer(timer: &serv->sv_temptimer, |
755 | expires: jiffies + svc_conn_age_period * HZ); |
756 | } |
757 | spin_unlock_bh(lock: &serv->sv_lock); |
758 | svc_xprt_received(newxpt); |
759 | } |
760 | |
761 | static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) |
762 | { |
763 | struct svc_serv *serv = rqstp->rq_server; |
764 | int len = 0; |
765 | |
766 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { |
767 | if (test_and_clear_bit(nr: XPT_KILL_TEMP, addr: &xprt->xpt_flags)) |
768 | xprt->xpt_ops->xpo_kill_temp_xprt(xprt); |
769 | svc_delete_xprt(xprt); |
770 | /* Leave XPT_BUSY set on the dead xprt: */ |
771 | goto out; |
772 | } |
773 | if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { |
774 | struct svc_xprt *newxpt; |
775 | /* |
776 | * We know this module_get will succeed because the |
777 | * listener holds a reference too |
778 | */ |
779 | __module_get(module: xprt->xpt_class->xcl_owner); |
780 | svc_check_conn_limits(serv: xprt->xpt_server); |
781 | newxpt = xprt->xpt_ops->xpo_accept(xprt); |
782 | if (newxpt) { |
783 | newxpt->xpt_cred = get_cred(cred: xprt->xpt_cred); |
784 | svc_add_new_temp_xprt(serv, newxpt); |
785 | trace_svc_xprt_accept(xprt: newxpt, service: serv->sv_name); |
786 | } else { |
787 | module_put(module: xprt->xpt_class->xcl_owner); |
788 | } |
789 | svc_xprt_received(xprt); |
790 | } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { |
791 | xprt->xpt_ops->xpo_handshake(xprt); |
792 | svc_xprt_received(xprt); |
793 | } else if (svc_xprt_reserve_slot(rqstp, xprt)) { |
794 | /* XPT_DATA|XPT_DEFERRED case: */ |
795 | rqstp->rq_deferred = svc_deferred_dequeue(xprt); |
796 | if (rqstp->rq_deferred) |
797 | len = svc_deferred_recv(rqstp); |
798 | else |
799 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); |
800 | rqstp->rq_reserved = serv->sv_max_mesg; |
801 | atomic_add(i: rqstp->rq_reserved, v: &xprt->xpt_reserved); |
802 | if (len <= 0) |
803 | goto out; |
804 | |
805 | trace_svc_xdr_recvfrom(xdr: &rqstp->rq_arg); |
806 | |
807 | clear_bit(nr: XPT_OLD, addr: &xprt->xpt_flags); |
808 | |
809 | rqstp->rq_chandle.defer = svc_defer; |
810 | |
811 | if (serv->sv_stats) |
812 | serv->sv_stats->netcnt++; |
813 | percpu_counter_inc(fbc: &rqstp->rq_pool->sp_messages_arrived); |
814 | rqstp->rq_stime = ktime_get(); |
815 | svc_process(rqstp); |
816 | } else |
817 | svc_xprt_received(xprt); |
818 | |
819 | out: |
820 | rqstp->rq_res.len = 0; |
821 | svc_xprt_release(rqstp); |
822 | } |
823 | |
824 | static void svc_thread_wake_next(struct svc_rqst *rqstp) |
825 | { |
826 | if (!svc_thread_should_sleep(rqstp)) |
827 | /* More work pending after I dequeued some, |
828 | * wake another worker |
829 | */ |
830 | svc_pool_wake_idle_thread(pool: rqstp->rq_pool); |
831 | } |
832 | |
833 | /** |
834 | * svc_recv - Receive and process the next request on any transport |
835 | * @rqstp: an idle RPC service thread |
836 | * |
837 | * This code is carefully organised not to touch any cachelines in |
838 | * the shared svc_serv structure, only cachelines in the local |
839 | * svc_pool. |
840 | */ |
841 | void svc_recv(struct svc_rqst *rqstp) |
842 | { |
843 | struct svc_pool *pool = rqstp->rq_pool; |
844 | |
845 | if (!svc_alloc_arg(rqstp)) |
846 | return; |
847 | |
848 | svc_thread_wait_for_work(rqstp); |
849 | |
850 | clear_bit(nr: SP_TASK_PENDING, addr: &pool->sp_flags); |
851 | |
852 | if (svc_thread_should_stop(rqstp)) { |
853 | svc_thread_wake_next(rqstp); |
854 | return; |
855 | } |
856 | |
857 | rqstp->rq_xprt = svc_xprt_dequeue(pool); |
858 | if (rqstp->rq_xprt) { |
859 | struct svc_xprt *xprt = rqstp->rq_xprt; |
860 | |
861 | svc_thread_wake_next(rqstp); |
862 | /* Normally we will wait up to 5 seconds for any required |
863 | * cache information to be provided. When there are no |
864 | * idle threads, we reduce the wait time. |
865 | */ |
866 | if (pool->sp_idle_threads.first) |
867 | rqstp->rq_chandle.thread_wait = 5 * HZ; |
868 | else |
869 | rqstp->rq_chandle.thread_wait = 1 * HZ; |
870 | |
871 | trace_svc_xprt_dequeue(rqst: rqstp); |
872 | svc_handle_xprt(rqstp, xprt); |
873 | } |
874 | |
875 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
876 | if (svc_is_backchannel(rqstp)) { |
877 | struct svc_serv *serv = rqstp->rq_server; |
878 | struct rpc_rqst *req; |
879 | |
880 | req = lwq_dequeue(&serv->sv_cb_list, |
881 | struct rpc_rqst, rq_bc_list); |
882 | if (req) { |
883 | svc_thread_wake_next(rqstp); |
884 | svc_process_bc(req, rqstp); |
885 | } |
886 | } |
887 | #endif |
888 | } |
889 | EXPORT_SYMBOL_GPL(svc_recv); |
890 | |
891 | /* |
892 | * Drop request |
893 | */ |
894 | void svc_drop(struct svc_rqst *rqstp) |
895 | { |
896 | trace_svc_drop(rqst: rqstp); |
897 | } |
898 | EXPORT_SYMBOL_GPL(svc_drop); |
899 | |
900 | /** |
901 | * svc_send - Return reply to client |
902 | * @rqstp: RPC transaction context |
903 | * |
904 | */ |
905 | void svc_send(struct svc_rqst *rqstp) |
906 | { |
907 | struct svc_xprt *xprt; |
908 | struct xdr_buf *xb; |
909 | int status; |
910 | |
911 | xprt = rqstp->rq_xprt; |
912 | |
913 | /* calculate over-all length */ |
914 | xb = &rqstp->rq_res; |
915 | xb->len = xb->head[0].iov_len + |
916 | xb->page_len + |
917 | xb->tail[0].iov_len; |
918 | trace_svc_xdr_sendto(xid: rqstp->rq_xid, xdr: xb); |
919 | trace_svc_stats_latency(rqst: rqstp); |
920 | |
921 | status = xprt->xpt_ops->xpo_sendto(rqstp); |
922 | |
923 | trace_svc_send(rqst: rqstp, status); |
924 | } |
925 | |
926 | /* |
927 | * Timer function to close old temporary transports, using |
928 | * a mark-and-sweep algorithm. |
929 | */ |
930 | static void svc_age_temp_xprts(struct timer_list *t) |
931 | { |
932 | struct svc_serv *serv = from_timer(serv, t, sv_temptimer); |
933 | struct svc_xprt *xprt; |
934 | struct list_head *le, *next; |
935 | |
936 | dprintk("svc_age_temp_xprts\n" ); |
937 | |
938 | if (!spin_trylock_bh(lock: &serv->sv_lock)) { |
939 | /* busy, try again 1 sec later */ |
940 | dprintk("svc_age_temp_xprts: busy\n" ); |
941 | mod_timer(timer: &serv->sv_temptimer, expires: jiffies + HZ); |
942 | return; |
943 | } |
944 | |
945 | list_for_each_safe(le, next, &serv->sv_tempsocks) { |
946 | xprt = list_entry(le, struct svc_xprt, xpt_list); |
947 | |
948 | /* First time through, just mark it OLD. Second time |
949 | * through, close it. */ |
950 | if (!test_and_set_bit(nr: XPT_OLD, addr: &xprt->xpt_flags)) |
951 | continue; |
952 | if (kref_read(kref: &xprt->xpt_ref) > 1 || |
953 | test_bit(XPT_BUSY, &xprt->xpt_flags)) |
954 | continue; |
955 | list_del_init(entry: le); |
956 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
957 | dprintk("queuing xprt %p for closing\n" , xprt); |
958 | |
959 | /* a thread will dequeue and close it soon */ |
960 | svc_xprt_enqueue(xprt); |
961 | } |
962 | spin_unlock_bh(lock: &serv->sv_lock); |
963 | |
964 | mod_timer(timer: &serv->sv_temptimer, expires: jiffies + svc_conn_age_period * HZ); |
965 | } |
966 | |
967 | /* Close temporary transports whose xpt_local matches server_addr immediately |
968 | * instead of waiting for them to be picked up by the timer. |
969 | * |
970 | * This is meant to be called from a notifier_block that runs when an ip |
971 | * address is deleted. |
972 | */ |
973 | void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) |
974 | { |
975 | struct svc_xprt *xprt; |
976 | struct list_head *le, *next; |
977 | LIST_HEAD(to_be_closed); |
978 | |
979 | spin_lock_bh(lock: &serv->sv_lock); |
980 | list_for_each_safe(le, next, &serv->sv_tempsocks) { |
981 | xprt = list_entry(le, struct svc_xprt, xpt_list); |
982 | if (rpc_cmp_addr(sap1: server_addr, sap2: (struct sockaddr *) |
983 | &xprt->xpt_local)) { |
984 | dprintk("svc_age_temp_xprts_now: found %p\n" , xprt); |
985 | list_move(list: le, head: &to_be_closed); |
986 | } |
987 | } |
988 | spin_unlock_bh(lock: &serv->sv_lock); |
989 | |
990 | while (!list_empty(head: &to_be_closed)) { |
991 | le = to_be_closed.next; |
992 | list_del_init(entry: le); |
993 | xprt = list_entry(le, struct svc_xprt, xpt_list); |
994 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
995 | set_bit(nr: XPT_KILL_TEMP, addr: &xprt->xpt_flags); |
996 | dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n" , |
997 | xprt); |
998 | svc_xprt_enqueue(xprt); |
999 | } |
1000 | } |
1001 | EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); |
1002 | |
1003 | static void call_xpt_users(struct svc_xprt *xprt) |
1004 | { |
1005 | struct svc_xpt_user *u; |
1006 | |
1007 | spin_lock(lock: &xprt->xpt_lock); |
1008 | while (!list_empty(head: &xprt->xpt_users)) { |
1009 | u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); |
1010 | list_del_init(entry: &u->list); |
1011 | u->callback(u); |
1012 | } |
1013 | spin_unlock(lock: &xprt->xpt_lock); |
1014 | } |
1015 | |
1016 | /* |
1017 | * Remove a dead transport |
1018 | */ |
1019 | static void svc_delete_xprt(struct svc_xprt *xprt) |
1020 | { |
1021 | struct svc_serv *serv = xprt->xpt_server; |
1022 | struct svc_deferred_req *dr; |
1023 | |
1024 | if (test_and_set_bit(nr: XPT_DEAD, addr: &xprt->xpt_flags)) |
1025 | return; |
1026 | |
1027 | trace_svc_xprt_detach(xprt); |
1028 | xprt->xpt_ops->xpo_detach(xprt); |
1029 | if (xprt->xpt_bc_xprt) |
1030 | xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); |
1031 | |
1032 | spin_lock_bh(lock: &serv->sv_lock); |
1033 | list_del_init(entry: &xprt->xpt_list); |
1034 | if (test_bit(XPT_TEMP, &xprt->xpt_flags)) |
1035 | serv->sv_tmpcnt--; |
1036 | spin_unlock_bh(lock: &serv->sv_lock); |
1037 | |
1038 | while ((dr = svc_deferred_dequeue(xprt)) != NULL) |
1039 | free_deferred(xprt, dr); |
1040 | |
1041 | call_xpt_users(xprt); |
1042 | svc_xprt_put(xprt); |
1043 | } |
1044 | |
1045 | /** |
1046 | * svc_xprt_close - Close a client connection |
1047 | * @xprt: transport to disconnect |
1048 | * |
1049 | */ |
1050 | void svc_xprt_close(struct svc_xprt *xprt) |
1051 | { |
1052 | trace_svc_xprt_close(xprt); |
1053 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
1054 | if (test_and_set_bit(nr: XPT_BUSY, addr: &xprt->xpt_flags)) |
1055 | /* someone else will have to effect the close */ |
1056 | return; |
1057 | /* |
1058 | * We expect svc_close_xprt() to work even when no threads are |
1059 | * running (e.g., while configuring the server before starting |
1060 | * any threads), so if the transport isn't busy, we delete |
1061 | * it ourself: |
1062 | */ |
1063 | svc_delete_xprt(xprt); |
1064 | } |
1065 | EXPORT_SYMBOL_GPL(svc_xprt_close); |
1066 | |
1067 | static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) |
1068 | { |
1069 | struct svc_xprt *xprt; |
1070 | int ret = 0; |
1071 | |
1072 | spin_lock_bh(lock: &serv->sv_lock); |
1073 | list_for_each_entry(xprt, xprt_list, xpt_list) { |
1074 | if (xprt->xpt_net != net) |
1075 | continue; |
1076 | ret++; |
1077 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
1078 | svc_xprt_enqueue(xprt); |
1079 | } |
1080 | spin_unlock_bh(lock: &serv->sv_lock); |
1081 | return ret; |
1082 | } |
1083 | |
1084 | static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) |
1085 | { |
1086 | struct svc_xprt *xprt; |
1087 | int i; |
1088 | |
1089 | for (i = 0; i < serv->sv_nrpools; i++) { |
1090 | struct svc_pool *pool = &serv->sv_pools[i]; |
1091 | struct llist_node *q, **t1, *t2; |
1092 | |
1093 | q = lwq_dequeue_all(q: &pool->sp_xprts); |
1094 | lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) { |
1095 | if (xprt->xpt_net == net) { |
1096 | set_bit(nr: XPT_CLOSE, addr: &xprt->xpt_flags); |
1097 | svc_delete_xprt(xprt); |
1098 | xprt = NULL; |
1099 | } |
1100 | } |
1101 | |
1102 | if (q) |
1103 | lwq_enqueue_batch(n: q, q: &pool->sp_xprts); |
1104 | } |
1105 | } |
1106 | |
1107 | /** |
1108 | * svc_xprt_destroy_all - Destroy transports associated with @serv |
1109 | * @serv: RPC service to be shut down |
1110 | * @net: target network namespace |
1111 | * |
1112 | * Server threads may still be running (especially in the case where the |
1113 | * service is still running in other network namespaces). |
1114 | * |
1115 | * So we shut down sockets the same way we would on a running server, by |
1116 | * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do |
1117 | * the close. In the case there are no such other threads, |
1118 | * threads running, svc_clean_up_xprts() does a simple version of a |
1119 | * server's main event loop, and in the case where there are other |
1120 | * threads, we may need to wait a little while and then check again to |
1121 | * see if they're done. |
1122 | */ |
1123 | void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) |
1124 | { |
1125 | int delay = 0; |
1126 | |
1127 | while (svc_close_list(serv, xprt_list: &serv->sv_permsocks, net) + |
1128 | svc_close_list(serv, xprt_list: &serv->sv_tempsocks, net)) { |
1129 | |
1130 | svc_clean_up_xprts(serv, net); |
1131 | msleep(msecs: delay++); |
1132 | } |
1133 | } |
1134 | EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); |
1135 | |
1136 | /* |
1137 | * Handle defer and revisit of requests |
1138 | */ |
1139 | |
1140 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) |
1141 | { |
1142 | struct svc_deferred_req *dr = |
1143 | container_of(dreq, struct svc_deferred_req, handle); |
1144 | struct svc_xprt *xprt = dr->xprt; |
1145 | |
1146 | spin_lock(lock: &xprt->xpt_lock); |
1147 | set_bit(nr: XPT_DEFERRED, addr: &xprt->xpt_flags); |
1148 | if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { |
1149 | spin_unlock(lock: &xprt->xpt_lock); |
1150 | trace_svc_defer_drop(dr); |
1151 | free_deferred(xprt, dr); |
1152 | svc_xprt_put(xprt); |
1153 | return; |
1154 | } |
1155 | dr->xprt = NULL; |
1156 | list_add(new: &dr->handle.recent, head: &xprt->xpt_deferred); |
1157 | spin_unlock(lock: &xprt->xpt_lock); |
1158 | trace_svc_defer_queue(dr); |
1159 | svc_xprt_enqueue(xprt); |
1160 | svc_xprt_put(xprt); |
1161 | } |
1162 | |
1163 | /* |
1164 | * Save the request off for later processing. The request buffer looks |
1165 | * like this: |
1166 | * |
1167 | * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> |
1168 | * |
1169 | * This code can only handle requests that consist of an xprt-header |
1170 | * and rpc-header. |
1171 | */ |
1172 | static struct cache_deferred_req *svc_defer(struct cache_req *req) |
1173 | { |
1174 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); |
1175 | struct svc_deferred_req *dr; |
1176 | |
1177 | if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags)) |
1178 | return NULL; /* if more than a page, give up FIXME */ |
1179 | if (rqstp->rq_deferred) { |
1180 | dr = rqstp->rq_deferred; |
1181 | rqstp->rq_deferred = NULL; |
1182 | } else { |
1183 | size_t skip; |
1184 | size_t size; |
1185 | /* FIXME maybe discard if size too large */ |
1186 | size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; |
1187 | dr = kmalloc(size, GFP_KERNEL); |
1188 | if (dr == NULL) |
1189 | return NULL; |
1190 | |
1191 | dr->handle.owner = rqstp->rq_server; |
1192 | dr->prot = rqstp->rq_prot; |
1193 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); |
1194 | dr->addrlen = rqstp->rq_addrlen; |
1195 | dr->daddr = rqstp->rq_daddr; |
1196 | dr->argslen = rqstp->rq_arg.len >> 2; |
1197 | |
1198 | /* back up head to the start of the buffer and copy */ |
1199 | skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; |
1200 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, |
1201 | dr->argslen << 2); |
1202 | } |
1203 | dr->xprt_ctxt = rqstp->rq_xprt_ctxt; |
1204 | rqstp->rq_xprt_ctxt = NULL; |
1205 | trace_svc_defer(rqst: rqstp); |
1206 | svc_xprt_get(xprt: rqstp->rq_xprt); |
1207 | dr->xprt = rqstp->rq_xprt; |
1208 | set_bit(nr: RQ_DROPME, addr: &rqstp->rq_flags); |
1209 | |
1210 | dr->handle.revisit = svc_revisit; |
1211 | return &dr->handle; |
1212 | } |
1213 | |
1214 | /* |
1215 | * recv data from a deferred request into an active one |
1216 | */ |
1217 | static noinline int svc_deferred_recv(struct svc_rqst *rqstp) |
1218 | { |
1219 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
1220 | |
1221 | trace_svc_defer_recv(dr); |
1222 | |
1223 | /* setup iov_base past transport header */ |
1224 | rqstp->rq_arg.head[0].iov_base = dr->args; |
1225 | /* The iov_len does not include the transport header bytes */ |
1226 | rqstp->rq_arg.head[0].iov_len = dr->argslen << 2; |
1227 | rqstp->rq_arg.page_len = 0; |
1228 | /* The rq_arg.len includes the transport header bytes */ |
1229 | rqstp->rq_arg.len = dr->argslen << 2; |
1230 | rqstp->rq_prot = dr->prot; |
1231 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); |
1232 | rqstp->rq_addrlen = dr->addrlen; |
1233 | /* Save off transport header len in case we get deferred again */ |
1234 | rqstp->rq_daddr = dr->daddr; |
1235 | rqstp->rq_respages = rqstp->rq_pages; |
1236 | rqstp->rq_xprt_ctxt = dr->xprt_ctxt; |
1237 | |
1238 | dr->xprt_ctxt = NULL; |
1239 | svc_xprt_received(rqstp->rq_xprt); |
1240 | return dr->argslen << 2; |
1241 | } |
1242 | |
1243 | |
1244 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) |
1245 | { |
1246 | struct svc_deferred_req *dr = NULL; |
1247 | |
1248 | if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) |
1249 | return NULL; |
1250 | spin_lock(lock: &xprt->xpt_lock); |
1251 | if (!list_empty(head: &xprt->xpt_deferred)) { |
1252 | dr = list_entry(xprt->xpt_deferred.next, |
1253 | struct svc_deferred_req, |
1254 | handle.recent); |
1255 | list_del_init(entry: &dr->handle.recent); |
1256 | } else |
1257 | clear_bit(nr: XPT_DEFERRED, addr: &xprt->xpt_flags); |
1258 | spin_unlock(lock: &xprt->xpt_lock); |
1259 | return dr; |
1260 | } |
1261 | |
1262 | /** |
1263 | * svc_find_xprt - find an RPC transport instance |
1264 | * @serv: pointer to svc_serv to search |
1265 | * @xcl_name: C string containing transport's class name |
1266 | * @net: owner net pointer |
1267 | * @af: Address family of transport's local address |
1268 | * @port: transport's IP port number |
1269 | * |
1270 | * Return the transport instance pointer for the endpoint accepting |
1271 | * connections/peer traffic from the specified transport class, |
1272 | * address family and port. |
1273 | * |
1274 | * Specifying 0 for the address family or port is effectively a |
1275 | * wild-card, and will result in matching the first transport in the |
1276 | * service's list that has a matching class name. |
1277 | */ |
1278 | struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, |
1279 | struct net *net, const sa_family_t af, |
1280 | const unsigned short port) |
1281 | { |
1282 | struct svc_xprt *xprt; |
1283 | struct svc_xprt *found = NULL; |
1284 | |
1285 | /* Sanity check the args */ |
1286 | if (serv == NULL || xcl_name == NULL) |
1287 | return found; |
1288 | |
1289 | spin_lock_bh(lock: &serv->sv_lock); |
1290 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { |
1291 | if (xprt->xpt_net != net) |
1292 | continue; |
1293 | if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) |
1294 | continue; |
1295 | if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) |
1296 | continue; |
1297 | if (port != 0 && port != svc_xprt_local_port(xprt)) |
1298 | continue; |
1299 | found = xprt; |
1300 | svc_xprt_get(xprt); |
1301 | break; |
1302 | } |
1303 | spin_unlock_bh(lock: &serv->sv_lock); |
1304 | return found; |
1305 | } |
1306 | EXPORT_SYMBOL_GPL(svc_find_xprt); |
1307 | |
1308 | static int svc_one_xprt_name(const struct svc_xprt *xprt, |
1309 | char *pos, int remaining) |
1310 | { |
1311 | int len; |
1312 | |
1313 | len = snprintf(buf: pos, size: remaining, fmt: "%s %u\n" , |
1314 | xprt->xpt_class->xcl_name, |
1315 | svc_xprt_local_port(xprt)); |
1316 | if (len >= remaining) |
1317 | return -ENAMETOOLONG; |
1318 | return len; |
1319 | } |
1320 | |
1321 | /** |
1322 | * svc_xprt_names - format a buffer with a list of transport names |
1323 | * @serv: pointer to an RPC service |
1324 | * @buf: pointer to a buffer to be filled in |
1325 | * @buflen: length of buffer to be filled in |
1326 | * |
1327 | * Fills in @buf with a string containing a list of transport names, |
1328 | * each name terminated with '\n'. |
1329 | * |
1330 | * Returns positive length of the filled-in string on success; otherwise |
1331 | * a negative errno value is returned if an error occurs. |
1332 | */ |
1333 | int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) |
1334 | { |
1335 | struct svc_xprt *xprt; |
1336 | int len, totlen; |
1337 | char *pos; |
1338 | |
1339 | /* Sanity check args */ |
1340 | if (!serv) |
1341 | return 0; |
1342 | |
1343 | spin_lock_bh(lock: &serv->sv_lock); |
1344 | |
1345 | pos = buf; |
1346 | totlen = 0; |
1347 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { |
1348 | len = svc_one_xprt_name(xprt, pos, remaining: buflen - totlen); |
1349 | if (len < 0) { |
1350 | *buf = '\0'; |
1351 | totlen = len; |
1352 | } |
1353 | if (len <= 0) |
1354 | break; |
1355 | |
1356 | pos += len; |
1357 | totlen += len; |
1358 | } |
1359 | |
1360 | spin_unlock_bh(lock: &serv->sv_lock); |
1361 | return totlen; |
1362 | } |
1363 | EXPORT_SYMBOL_GPL(svc_xprt_names); |
1364 | |
1365 | /*----------------------------------------------------------------------------*/ |
1366 | |
1367 | static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) |
1368 | { |
1369 | unsigned int pidx = (unsigned int)*pos; |
1370 | struct svc_info *si = m->private; |
1371 | |
1372 | dprintk("svc_pool_stats_start, *pidx=%u\n" , pidx); |
1373 | |
1374 | mutex_lock(si->mutex); |
1375 | |
1376 | if (!pidx) |
1377 | return SEQ_START_TOKEN; |
1378 | if (!si->serv) |
1379 | return NULL; |
1380 | return pidx > si->serv->sv_nrpools ? NULL |
1381 | : &si->serv->sv_pools[pidx - 1]; |
1382 | } |
1383 | |
1384 | static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) |
1385 | { |
1386 | struct svc_pool *pool = p; |
1387 | struct svc_info *si = m->private; |
1388 | struct svc_serv *serv = si->serv; |
1389 | |
1390 | dprintk("svc_pool_stats_next, *pos=%llu\n" , *pos); |
1391 | |
1392 | if (!serv) { |
1393 | pool = NULL; |
1394 | } else if (p == SEQ_START_TOKEN) { |
1395 | pool = &serv->sv_pools[0]; |
1396 | } else { |
1397 | unsigned int pidx = (pool - &serv->sv_pools[0]); |
1398 | if (pidx < serv->sv_nrpools-1) |
1399 | pool = &serv->sv_pools[pidx+1]; |
1400 | else |
1401 | pool = NULL; |
1402 | } |
1403 | ++*pos; |
1404 | return pool; |
1405 | } |
1406 | |
1407 | static void svc_pool_stats_stop(struct seq_file *m, void *p) |
1408 | { |
1409 | struct svc_info *si = m->private; |
1410 | |
1411 | mutex_unlock(lock: si->mutex); |
1412 | } |
1413 | |
1414 | static int svc_pool_stats_show(struct seq_file *m, void *p) |
1415 | { |
1416 | struct svc_pool *pool = p; |
1417 | |
1418 | if (p == SEQ_START_TOKEN) { |
1419 | seq_puts(m, s: "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n" ); |
1420 | return 0; |
1421 | } |
1422 | |
1423 | seq_printf(m, fmt: "%u %llu %llu %llu 0\n" , |
1424 | pool->sp_id, |
1425 | percpu_counter_sum_positive(fbc: &pool->sp_messages_arrived), |
1426 | percpu_counter_sum_positive(fbc: &pool->sp_sockets_queued), |
1427 | percpu_counter_sum_positive(fbc: &pool->sp_threads_woken)); |
1428 | |
1429 | return 0; |
1430 | } |
1431 | |
1432 | static const struct seq_operations svc_pool_stats_seq_ops = { |
1433 | .start = svc_pool_stats_start, |
1434 | .next = svc_pool_stats_next, |
1435 | .stop = svc_pool_stats_stop, |
1436 | .show = svc_pool_stats_show, |
1437 | }; |
1438 | |
1439 | int svc_pool_stats_open(struct svc_info *info, struct file *file) |
1440 | { |
1441 | struct seq_file *seq; |
1442 | int err; |
1443 | |
1444 | err = seq_open(file, &svc_pool_stats_seq_ops); |
1445 | if (err) |
1446 | return err; |
1447 | seq = file->private_data; |
1448 | seq->private = info; |
1449 | |
1450 | return 0; |
1451 | } |
1452 | EXPORT_SYMBOL(svc_pool_stats_open); |
1453 | |
1454 | /*----------------------------------------------------------------------------*/ |
1455 | |