1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/net/sunrpc/xprtsock.c |
4 | * |
5 | * Client-side transport implementation for sockets. |
6 | * |
7 | * TCP callback races fixes (C) 1998 Red Hat |
8 | * TCP send fixes (C) 1998 Red Hat |
9 | * TCP NFS related read + write fixes |
10 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> |
11 | * |
12 | * Rewrite of larges part of the code in order to stabilize TCP stuff. |
13 | * Fix behaviour when socket buffer is full. |
14 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> |
15 | * |
16 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> |
17 | * |
18 | * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. |
19 | * <gilles.quillard@bull.net> |
20 | */ |
21 | |
22 | #include <linux/types.h> |
23 | #include <linux/string.h> |
24 | #include <linux/slab.h> |
25 | #include <linux/module.h> |
26 | #include <linux/capability.h> |
27 | #include <linux/pagemap.h> |
28 | #include <linux/errno.h> |
29 | #include <linux/socket.h> |
30 | #include <linux/in.h> |
31 | #include <linux/net.h> |
32 | #include <linux/mm.h> |
33 | #include <linux/un.h> |
34 | #include <linux/udp.h> |
35 | #include <linux/tcp.h> |
36 | #include <linux/sunrpc/clnt.h> |
37 | #include <linux/sunrpc/addr.h> |
38 | #include <linux/sunrpc/sched.h> |
39 | #include <linux/sunrpc/svcsock.h> |
40 | #include <linux/sunrpc/xprtsock.h> |
41 | #include <linux/file.h> |
42 | #ifdef CONFIG_SUNRPC_BACKCHANNEL |
43 | #include <linux/sunrpc/bc_xprt.h> |
44 | #endif |
45 | |
46 | #include <net/sock.h> |
47 | #include <net/checksum.h> |
48 | #include <net/udp.h> |
49 | #include <net/tcp.h> |
50 | #include <net/tls_prot.h> |
51 | #include <net/handshake.h> |
52 | |
53 | #include <linux/bvec.h> |
54 | #include <linux/highmem.h> |
55 | #include <linux/uio.h> |
56 | #include <linux/sched/mm.h> |
57 | |
58 | #include <trace/events/sock.h> |
59 | #include <trace/events/sunrpc.h> |
60 | |
61 | #include "socklib.h" |
62 | #include "sunrpc.h" |
63 | |
64 | static void xs_close(struct rpc_xprt *xprt); |
65 | static void xs_reset_srcport(struct sock_xprt *transport); |
66 | static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); |
67 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
68 | struct socket *sock); |
69 | |
70 | /* |
71 | * xprtsock tunables |
72 | */ |
73 | static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; |
74 | static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; |
75 | static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; |
76 | |
77 | static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; |
78 | static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; |
79 | |
80 | #define XS_TCP_LINGER_TO (15U * HZ) |
81 | static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; |
82 | |
83 | /* |
84 | * We can register our own files under /proc/sys/sunrpc by |
85 | * calling register_sysctl() again. The files in that |
86 | * directory become the union of all files registered there. |
87 | * |
88 | * We simply need to make sure that we don't collide with |
89 | * someone else's file names! |
90 | */ |
91 | |
92 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
93 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
94 | static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; |
95 | static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; |
96 | static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; |
97 | |
98 | static struct ctl_table_header *; |
99 | |
100 | static struct xprt_class xs_local_transport; |
101 | static struct xprt_class xs_udp_transport; |
102 | static struct xprt_class xs_tcp_transport; |
103 | static struct xprt_class xs_tcp_tls_transport; |
104 | static struct xprt_class xs_bc_tcp_transport; |
105 | |
106 | /* |
107 | * FIXME: changing the UDP slot table size should also resize the UDP |
108 | * socket buffers for existing UDP transports |
109 | */ |
110 | static struct ctl_table xs_tunables_table[] = { |
111 | { |
112 | .procname = "udp_slot_table_entries" , |
113 | .data = &xprt_udp_slot_table_entries, |
114 | .maxlen = sizeof(unsigned int), |
115 | .mode = 0644, |
116 | .proc_handler = proc_dointvec_minmax, |
117 | .extra1 = &min_slot_table_size, |
118 | .extra2 = &max_slot_table_size |
119 | }, |
120 | { |
121 | .procname = "tcp_slot_table_entries" , |
122 | .data = &xprt_tcp_slot_table_entries, |
123 | .maxlen = sizeof(unsigned int), |
124 | .mode = 0644, |
125 | .proc_handler = proc_dointvec_minmax, |
126 | .extra1 = &min_slot_table_size, |
127 | .extra2 = &max_slot_table_size |
128 | }, |
129 | { |
130 | .procname = "tcp_max_slot_table_entries" , |
131 | .data = &xprt_max_tcp_slot_table_entries, |
132 | .maxlen = sizeof(unsigned int), |
133 | .mode = 0644, |
134 | .proc_handler = proc_dointvec_minmax, |
135 | .extra1 = &min_slot_table_size, |
136 | .extra2 = &max_tcp_slot_table_limit |
137 | }, |
138 | { |
139 | .procname = "min_resvport" , |
140 | .data = &xprt_min_resvport, |
141 | .maxlen = sizeof(unsigned int), |
142 | .mode = 0644, |
143 | .proc_handler = proc_dointvec_minmax, |
144 | .extra1 = &xprt_min_resvport_limit, |
145 | .extra2 = &xprt_max_resvport_limit |
146 | }, |
147 | { |
148 | .procname = "max_resvport" , |
149 | .data = &xprt_max_resvport, |
150 | .maxlen = sizeof(unsigned int), |
151 | .mode = 0644, |
152 | .proc_handler = proc_dointvec_minmax, |
153 | .extra1 = &xprt_min_resvport_limit, |
154 | .extra2 = &xprt_max_resvport_limit |
155 | }, |
156 | { |
157 | .procname = "tcp_fin_timeout" , |
158 | .data = &xs_tcp_fin_timeout, |
159 | .maxlen = sizeof(xs_tcp_fin_timeout), |
160 | .mode = 0644, |
161 | .proc_handler = proc_dointvec_jiffies, |
162 | }, |
163 | { }, |
164 | }; |
165 | |
166 | /* |
167 | * Wait duration for a reply from the RPC portmapper. |
168 | */ |
169 | #define XS_BIND_TO (60U * HZ) |
170 | |
171 | /* |
172 | * Delay if a UDP socket connect error occurs. This is most likely some |
173 | * kind of resource problem on the local host. |
174 | */ |
175 | #define XS_UDP_REEST_TO (2U * HZ) |
176 | |
177 | /* |
178 | * The reestablish timeout allows clients to delay for a bit before attempting |
179 | * to reconnect to a server that just dropped our connection. |
180 | * |
181 | * We implement an exponential backoff when trying to reestablish a TCP |
182 | * transport connection with the server. Some servers like to drop a TCP |
183 | * connection when they are overworked, so we start with a short timeout and |
184 | * increase over time if the server is down or not responding. |
185 | */ |
186 | #define XS_TCP_INIT_REEST_TO (3U * HZ) |
187 | |
188 | /* |
189 | * TCP idle timeout; client drops the transport socket if it is idle |
190 | * for this long. Note that we also timeout UDP sockets to prevent |
191 | * holding port numbers when there is no RPC traffic. |
192 | */ |
193 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) |
194 | |
195 | /* |
196 | * TLS handshake timeout. |
197 | */ |
198 | #define XS_TLS_HANDSHAKE_TO (10U * HZ) |
199 | |
200 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
201 | # undef RPC_DEBUG_DATA |
202 | # define RPCDBG_FACILITY RPCDBG_TRANS |
203 | #endif |
204 | |
205 | #ifdef RPC_DEBUG_DATA |
206 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) |
207 | { |
208 | u8 *buf = (u8 *) packet; |
209 | int j; |
210 | |
211 | dprintk("RPC: %s\n" , msg); |
212 | for (j = 0; j < count && j < 128; j += 4) { |
213 | if (!(j & 31)) { |
214 | if (j) |
215 | dprintk("\n" ); |
216 | dprintk("0x%04x " , j); |
217 | } |
218 | dprintk("%02x%02x%02x%02x " , |
219 | buf[j], buf[j+1], buf[j+2], buf[j+3]); |
220 | } |
221 | dprintk("\n" ); |
222 | } |
223 | #else |
224 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) |
225 | { |
226 | /* NOP */ |
227 | } |
228 | #endif |
229 | |
230 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) |
231 | { |
232 | return (struct rpc_xprt *) sk->sk_user_data; |
233 | } |
234 | |
235 | static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) |
236 | { |
237 | return (struct sockaddr *) &xprt->addr; |
238 | } |
239 | |
240 | static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) |
241 | { |
242 | return (struct sockaddr_un *) &xprt->addr; |
243 | } |
244 | |
245 | static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) |
246 | { |
247 | return (struct sockaddr_in *) &xprt->addr; |
248 | } |
249 | |
250 | static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) |
251 | { |
252 | return (struct sockaddr_in6 *) &xprt->addr; |
253 | } |
254 | |
255 | static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) |
256 | { |
257 | struct sockaddr *sap = xs_addr(xprt); |
258 | struct sockaddr_in6 *sin6; |
259 | struct sockaddr_in *sin; |
260 | struct sockaddr_un *sun; |
261 | char buf[128]; |
262 | |
263 | switch (sap->sa_family) { |
264 | case AF_LOCAL: |
265 | sun = xs_addr_un(xprt); |
266 | if (sun->sun_path[0]) { |
267 | strscpy(buf, sun->sun_path, sizeof(buf)); |
268 | } else { |
269 | buf[0] = '@'; |
270 | strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1); |
271 | } |
272 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
273 | kstrdup(s: buf, GFP_KERNEL); |
274 | break; |
275 | case AF_INET: |
276 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
277 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
278 | kstrdup(s: buf, GFP_KERNEL); |
279 | sin = xs_addr_in(xprt); |
280 | snprintf(buf, size: sizeof(buf), fmt: "%08x" , ntohl(sin->sin_addr.s_addr)); |
281 | break; |
282 | case AF_INET6: |
283 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
284 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
285 | kstrdup(s: buf, GFP_KERNEL); |
286 | sin6 = xs_addr_in6(xprt); |
287 | snprintf(buf, size: sizeof(buf), fmt: "%pi6" , &sin6->sin6_addr); |
288 | break; |
289 | default: |
290 | BUG(); |
291 | } |
292 | |
293 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(s: buf, GFP_KERNEL); |
294 | } |
295 | |
296 | static void xs_format_common_peer_ports(struct rpc_xprt *xprt) |
297 | { |
298 | struct sockaddr *sap = xs_addr(xprt); |
299 | char buf[128]; |
300 | |
301 | snprintf(buf, size: sizeof(buf), fmt: "%u" , rpc_get_port(sap)); |
302 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(s: buf, GFP_KERNEL); |
303 | |
304 | snprintf(buf, size: sizeof(buf), fmt: "%4hx" , rpc_get_port(sap)); |
305 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(s: buf, GFP_KERNEL); |
306 | } |
307 | |
308 | static void xs_format_peer_addresses(struct rpc_xprt *xprt, |
309 | const char *protocol, |
310 | const char *netid) |
311 | { |
312 | xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; |
313 | xprt->address_strings[RPC_DISPLAY_NETID] = netid; |
314 | xs_format_common_peer_addresses(xprt); |
315 | xs_format_common_peer_ports(xprt); |
316 | } |
317 | |
318 | static void xs_update_peer_port(struct rpc_xprt *xprt) |
319 | { |
320 | kfree(objp: xprt->address_strings[RPC_DISPLAY_HEX_PORT]); |
321 | kfree(objp: xprt->address_strings[RPC_DISPLAY_PORT]); |
322 | |
323 | xs_format_common_peer_ports(xprt); |
324 | } |
325 | |
326 | static void xs_free_peer_addresses(struct rpc_xprt *xprt) |
327 | { |
328 | unsigned int i; |
329 | |
330 | for (i = 0; i < RPC_DISPLAY_MAX; i++) |
331 | switch (i) { |
332 | case RPC_DISPLAY_PROTO: |
333 | case RPC_DISPLAY_NETID: |
334 | continue; |
335 | default: |
336 | kfree(objp: xprt->address_strings[i]); |
337 | } |
338 | } |
339 | |
340 | static size_t |
341 | xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) |
342 | { |
343 | size_t i,n; |
344 | |
345 | if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES)) |
346 | return want; |
347 | n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; |
348 | for (i = 0; i < n; i++) { |
349 | if (buf->pages[i]) |
350 | continue; |
351 | buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); |
352 | if (!buf->pages[i]) { |
353 | i *= PAGE_SIZE; |
354 | return i > buf->page_base ? i - buf->page_base : 0; |
355 | } |
356 | } |
357 | return want; |
358 | } |
359 | |
360 | static int |
361 | xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, |
362 | struct cmsghdr *cmsg, int ret) |
363 | { |
364 | u8 content_type = tls_get_record_type(sk: sock->sk, msg: cmsg); |
365 | u8 level, description; |
366 | |
367 | switch (content_type) { |
368 | case 0: |
369 | break; |
370 | case TLS_RECORD_TYPE_DATA: |
371 | /* TLS sets EOR at the end of each application data |
372 | * record, even though there might be more frames |
373 | * waiting to be decrypted. |
374 | */ |
375 | msg->msg_flags &= ~MSG_EOR; |
376 | break; |
377 | case TLS_RECORD_TYPE_ALERT: |
378 | tls_alert_recv(sk: sock->sk, msg, level: &level, description: &description); |
379 | ret = (level == TLS_ALERT_LEVEL_FATAL) ? |
380 | -EACCES : -EAGAIN; |
381 | break; |
382 | default: |
383 | /* discard this record type */ |
384 | ret = -EAGAIN; |
385 | } |
386 | return ret; |
387 | } |
388 | |
389 | static int |
390 | xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) |
391 | { |
392 | union { |
393 | struct cmsghdr cmsg; |
394 | u8 buf[CMSG_SPACE(sizeof(u8))]; |
395 | } u; |
396 | int ret; |
397 | |
398 | msg->msg_control = &u; |
399 | msg->msg_controllen = sizeof(u); |
400 | ret = sock_recvmsg(sock, msg, flags); |
401 | if (msg->msg_controllen != sizeof(u)) |
402 | ret = xs_sock_process_cmsg(sock, msg, cmsg: &u.cmsg, ret); |
403 | return ret; |
404 | } |
405 | |
406 | static ssize_t |
407 | xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) |
408 | { |
409 | ssize_t ret; |
410 | if (seek != 0) |
411 | iov_iter_advance(i: &msg->msg_iter, bytes: seek); |
412 | ret = xs_sock_recv_cmsg(sock, msg, flags); |
413 | return ret > 0 ? ret + seek : ret; |
414 | } |
415 | |
416 | static ssize_t |
417 | xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, |
418 | struct kvec *kvec, size_t count, size_t seek) |
419 | { |
420 | iov_iter_kvec(i: &msg->msg_iter, ITER_DEST, kvec, nr_segs: 1, count); |
421 | return xs_sock_recvmsg(sock, msg, flags, seek); |
422 | } |
423 | |
424 | static ssize_t |
425 | xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, |
426 | struct bio_vec *bvec, unsigned long nr, size_t count, |
427 | size_t seek) |
428 | { |
429 | iov_iter_bvec(i: &msg->msg_iter, ITER_DEST, bvec, nr_segs: nr, count); |
430 | return xs_sock_recvmsg(sock, msg, flags, seek); |
431 | } |
432 | |
433 | static ssize_t |
434 | xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, |
435 | size_t count) |
436 | { |
437 | iov_iter_discard(i: &msg->msg_iter, ITER_DEST, count); |
438 | return xs_sock_recv_cmsg(sock, msg, flags); |
439 | } |
440 | |
441 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
442 | static void |
443 | xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) |
444 | { |
445 | struct bvec_iter bi = { |
446 | .bi_size = count, |
447 | }; |
448 | struct bio_vec bv; |
449 | |
450 | bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); |
451 | for_each_bvec(bv, bvec, bi, bi) |
452 | flush_dcache_page(bv.bv_page); |
453 | } |
454 | #else |
455 | static inline void |
456 | xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) |
457 | { |
458 | } |
459 | #endif |
460 | |
461 | static ssize_t |
462 | xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, |
463 | struct xdr_buf *buf, size_t count, size_t seek, size_t *read) |
464 | { |
465 | size_t want, seek_init = seek, offset = 0; |
466 | ssize_t ret; |
467 | |
468 | want = min_t(size_t, count, buf->head[0].iov_len); |
469 | if (seek < want) { |
470 | ret = xs_read_kvec(sock, msg, flags, kvec: &buf->head[0], count: want, seek); |
471 | if (ret <= 0) |
472 | goto sock_err; |
473 | offset += ret; |
474 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
475 | goto out; |
476 | if (ret != want) |
477 | goto out; |
478 | seek = 0; |
479 | } else { |
480 | seek -= want; |
481 | offset += want; |
482 | } |
483 | |
484 | want = xs_alloc_sparse_pages( |
485 | buf, min_t(size_t, count - offset, buf->page_len), |
486 | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
487 | if (seek < want) { |
488 | ret = xs_read_bvec(sock, msg, flags, bvec: buf->bvec, |
489 | nr: xdr_buf_pagecount(buf), |
490 | count: want + buf->page_base, |
491 | seek: seek + buf->page_base); |
492 | if (ret <= 0) |
493 | goto sock_err; |
494 | xs_flush_bvec(bvec: buf->bvec, count: ret, seek: seek + buf->page_base); |
495 | ret -= buf->page_base; |
496 | offset += ret; |
497 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
498 | goto out; |
499 | if (ret != want) |
500 | goto out; |
501 | seek = 0; |
502 | } else { |
503 | seek -= want; |
504 | offset += want; |
505 | } |
506 | |
507 | want = min_t(size_t, count - offset, buf->tail[0].iov_len); |
508 | if (seek < want) { |
509 | ret = xs_read_kvec(sock, msg, flags, kvec: &buf->tail[0], count: want, seek); |
510 | if (ret <= 0) |
511 | goto sock_err; |
512 | offset += ret; |
513 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
514 | goto out; |
515 | if (ret != want) |
516 | goto out; |
517 | } else if (offset < seek_init) |
518 | offset = seek_init; |
519 | ret = -EMSGSIZE; |
520 | out: |
521 | *read = offset - seek_init; |
522 | return ret; |
523 | sock_err: |
524 | offset += seek; |
525 | goto out; |
526 | } |
527 | |
528 | static void |
529 | (struct sock_xprt *transport, struct xdr_buf *buf) |
530 | { |
531 | if (!transport->recv.copied) { |
532 | if (buf->head[0].iov_len >= transport->recv.offset) |
533 | memcpy(buf->head[0].iov_base, |
534 | &transport->recv.xid, |
535 | transport->recv.offset); |
536 | transport->recv.copied = transport->recv.offset; |
537 | } |
538 | } |
539 | |
540 | static bool |
541 | xs_read_stream_request_done(struct sock_xprt *transport) |
542 | { |
543 | return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); |
544 | } |
545 | |
546 | static void |
547 | xs_read_stream_check_eor(struct sock_xprt *transport, |
548 | struct msghdr *msg) |
549 | { |
550 | if (xs_read_stream_request_done(transport)) |
551 | msg->msg_flags |= MSG_EOR; |
552 | } |
553 | |
554 | static ssize_t |
555 | xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, |
556 | int flags, struct rpc_rqst *req) |
557 | { |
558 | struct xdr_buf *buf = &req->rq_private_buf; |
559 | size_t want, read; |
560 | ssize_t ret; |
561 | |
562 | xs_read_header(transport, buf); |
563 | |
564 | want = transport->recv.len - transport->recv.offset; |
565 | if (want != 0) { |
566 | ret = xs_read_xdr_buf(sock: transport->sock, msg, flags, buf, |
567 | count: transport->recv.copied + want, |
568 | seek: transport->recv.copied, |
569 | read: &read); |
570 | transport->recv.offset += read; |
571 | transport->recv.copied += read; |
572 | } |
573 | |
574 | if (transport->recv.offset == transport->recv.len) |
575 | xs_read_stream_check_eor(transport, msg); |
576 | |
577 | if (want == 0) |
578 | return 0; |
579 | |
580 | switch (ret) { |
581 | default: |
582 | break; |
583 | case -EFAULT: |
584 | case -EMSGSIZE: |
585 | msg->msg_flags |= MSG_TRUNC; |
586 | return read; |
587 | case 0: |
588 | return -ESHUTDOWN; |
589 | } |
590 | return ret < 0 ? ret : read; |
591 | } |
592 | |
593 | static size_t |
594 | (bool isfrag) |
595 | { |
596 | if (isfrag) |
597 | return sizeof(__be32); |
598 | return 3 * sizeof(__be32); |
599 | } |
600 | |
601 | static ssize_t |
602 | (struct sock_xprt *transport, struct msghdr *msg, |
603 | int flags, size_t want, size_t seek) |
604 | { |
605 | struct kvec kvec = { |
606 | .iov_base = &transport->recv.fraghdr, |
607 | .iov_len = want, |
608 | }; |
609 | return xs_read_kvec(sock: transport->sock, msg, flags, kvec: &kvec, count: want, seek); |
610 | } |
611 | |
612 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
613 | static ssize_t |
614 | xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) |
615 | { |
616 | struct rpc_xprt *xprt = &transport->xprt; |
617 | struct rpc_rqst *req; |
618 | ssize_t ret; |
619 | |
620 | /* Is this transport associated with the backchannel? */ |
621 | if (!xprt->bc_serv) |
622 | return -ESHUTDOWN; |
623 | |
624 | /* Look up and lock the request corresponding to the given XID */ |
625 | req = xprt_lookup_bc_request(xprt, xid: transport->recv.xid); |
626 | if (!req) { |
627 | printk(KERN_WARNING "Callback slot table overflowed\n" ); |
628 | return -ESHUTDOWN; |
629 | } |
630 | if (transport->recv.copied && !req->rq_private_buf.len) |
631 | return -ESHUTDOWN; |
632 | |
633 | ret = xs_read_stream_request(transport, msg, flags, req); |
634 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
635 | xprt_complete_bc_request(req, copied: transport->recv.copied); |
636 | else |
637 | req->rq_private_buf.len = transport->recv.copied; |
638 | |
639 | return ret; |
640 | } |
641 | #else /* CONFIG_SUNRPC_BACKCHANNEL */ |
642 | static ssize_t |
643 | xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) |
644 | { |
645 | return -ESHUTDOWN; |
646 | } |
647 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
648 | |
649 | static ssize_t |
650 | xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) |
651 | { |
652 | struct rpc_xprt *xprt = &transport->xprt; |
653 | struct rpc_rqst *req; |
654 | ssize_t ret = 0; |
655 | |
656 | /* Look up and lock the request corresponding to the given XID */ |
657 | spin_lock(lock: &xprt->queue_lock); |
658 | req = xprt_lookup_rqst(xprt, xid: transport->recv.xid); |
659 | if (!req || (transport->recv.copied && !req->rq_private_buf.len)) { |
660 | msg->msg_flags |= MSG_TRUNC; |
661 | goto out; |
662 | } |
663 | xprt_pin_rqst(req); |
664 | spin_unlock(lock: &xprt->queue_lock); |
665 | |
666 | ret = xs_read_stream_request(transport, msg, flags, req); |
667 | |
668 | spin_lock(lock: &xprt->queue_lock); |
669 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
670 | xprt_complete_rqst(task: req->rq_task, copied: transport->recv.copied); |
671 | else |
672 | req->rq_private_buf.len = transport->recv.copied; |
673 | xprt_unpin_rqst(req); |
674 | out: |
675 | spin_unlock(lock: &xprt->queue_lock); |
676 | return ret; |
677 | } |
678 | |
679 | static ssize_t |
680 | xs_read_stream(struct sock_xprt *transport, int flags) |
681 | { |
682 | struct msghdr msg = { 0 }; |
683 | size_t want, read = 0; |
684 | ssize_t ret = 0; |
685 | |
686 | if (transport->recv.len == 0) { |
687 | want = xs_read_stream_headersize(isfrag: transport->recv.copied != 0); |
688 | ret = xs_read_stream_header(transport, msg: &msg, flags, want, |
689 | seek: transport->recv.offset); |
690 | if (ret <= 0) |
691 | goto out_err; |
692 | transport->recv.offset = ret; |
693 | if (transport->recv.offset != want) |
694 | return transport->recv.offset; |
695 | transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & |
696 | RPC_FRAGMENT_SIZE_MASK; |
697 | transport->recv.offset -= sizeof(transport->recv.fraghdr); |
698 | read = ret; |
699 | } |
700 | |
701 | switch (be32_to_cpu(transport->recv.calldir)) { |
702 | default: |
703 | msg.msg_flags |= MSG_TRUNC; |
704 | break; |
705 | case RPC_CALL: |
706 | ret = xs_read_stream_call(transport, msg: &msg, flags); |
707 | break; |
708 | case RPC_REPLY: |
709 | ret = xs_read_stream_reply(transport, msg: &msg, flags); |
710 | } |
711 | if (msg.msg_flags & MSG_TRUNC) { |
712 | transport->recv.calldir = cpu_to_be32(-1); |
713 | transport->recv.copied = -1; |
714 | } |
715 | if (ret < 0) |
716 | goto out_err; |
717 | read += ret; |
718 | if (transport->recv.offset < transport->recv.len) { |
719 | if (!(msg.msg_flags & MSG_TRUNC)) |
720 | return read; |
721 | msg.msg_flags = 0; |
722 | ret = xs_read_discard(sock: transport->sock, msg: &msg, flags, |
723 | count: transport->recv.len - transport->recv.offset); |
724 | if (ret <= 0) |
725 | goto out_err; |
726 | transport->recv.offset += ret; |
727 | read += ret; |
728 | if (transport->recv.offset != transport->recv.len) |
729 | return read; |
730 | } |
731 | if (xs_read_stream_request_done(transport)) { |
732 | trace_xs_stream_read_request(xs: transport); |
733 | transport->recv.copied = 0; |
734 | } |
735 | transport->recv.offset = 0; |
736 | transport->recv.len = 0; |
737 | return read; |
738 | out_err: |
739 | return ret != 0 ? ret : -ESHUTDOWN; |
740 | } |
741 | |
742 | static __poll_t xs_poll_socket(struct sock_xprt *transport) |
743 | { |
744 | return transport->sock->ops->poll(transport->file, transport->sock, |
745 | NULL); |
746 | } |
747 | |
748 | static bool xs_poll_socket_readable(struct sock_xprt *transport) |
749 | { |
750 | __poll_t events = xs_poll_socket(transport); |
751 | |
752 | return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP); |
753 | } |
754 | |
755 | static void xs_poll_check_readable(struct sock_xprt *transport) |
756 | { |
757 | |
758 | clear_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state); |
759 | if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) |
760 | return; |
761 | if (!xs_poll_socket_readable(transport)) |
762 | return; |
763 | if (!test_and_set_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state)) |
764 | queue_work(wq: xprtiod_workqueue, work: &transport->recv_worker); |
765 | } |
766 | |
767 | static void xs_stream_data_receive(struct sock_xprt *transport) |
768 | { |
769 | size_t read = 0; |
770 | ssize_t ret = 0; |
771 | |
772 | mutex_lock(&transport->recv_mutex); |
773 | if (transport->sock == NULL) |
774 | goto out; |
775 | for (;;) { |
776 | ret = xs_read_stream(transport, MSG_DONTWAIT); |
777 | if (ret < 0) |
778 | break; |
779 | read += ret; |
780 | cond_resched(); |
781 | } |
782 | if (ret == -ESHUTDOWN) |
783 | kernel_sock_shutdown(sock: transport->sock, how: SHUT_RDWR); |
784 | else if (ret == -EACCES) |
785 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: -EACCES); |
786 | else |
787 | xs_poll_check_readable(transport); |
788 | out: |
789 | mutex_unlock(lock: &transport->recv_mutex); |
790 | trace_xs_stream_read_data(xprt: &transport->xprt, err: ret, total: read); |
791 | } |
792 | |
793 | static void xs_stream_data_receive_workfn(struct work_struct *work) |
794 | { |
795 | struct sock_xprt *transport = |
796 | container_of(work, struct sock_xprt, recv_worker); |
797 | unsigned int pflags = memalloc_nofs_save(); |
798 | |
799 | xs_stream_data_receive(transport); |
800 | memalloc_nofs_restore(flags: pflags); |
801 | } |
802 | |
803 | static void |
804 | xs_stream_reset_connect(struct sock_xprt *transport) |
805 | { |
806 | transport->recv.offset = 0; |
807 | transport->recv.len = 0; |
808 | transport->recv.copied = 0; |
809 | transport->xmit.offset = 0; |
810 | } |
811 | |
812 | static void |
813 | xs_stream_start_connect(struct sock_xprt *transport) |
814 | { |
815 | transport->xprt.stat.connect_count++; |
816 | transport->xprt.stat.connect_start = jiffies; |
817 | } |
818 | |
819 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) |
820 | |
821 | /** |
822 | * xs_nospace - handle transmit was incomplete |
823 | * @req: pointer to RPC request |
824 | * @transport: pointer to struct sock_xprt |
825 | * |
826 | */ |
827 | static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) |
828 | { |
829 | struct rpc_xprt *xprt = &transport->xprt; |
830 | struct sock *sk = transport->inet; |
831 | int ret = -EAGAIN; |
832 | |
833 | trace_rpc_socket_nospace(rqst: req, transport); |
834 | |
835 | /* Protect against races with write_space */ |
836 | spin_lock(lock: &xprt->transport_lock); |
837 | |
838 | /* Don't race with disconnect */ |
839 | if (xprt_connected(xprt)) { |
840 | /* wait for more buffer space */ |
841 | set_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state); |
842 | set_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags); |
843 | sk->sk_write_pending++; |
844 | xprt_wait_for_buffer_space(xprt); |
845 | } else |
846 | ret = -ENOTCONN; |
847 | |
848 | spin_unlock(lock: &xprt->transport_lock); |
849 | return ret; |
850 | } |
851 | |
852 | static int xs_sock_nospace(struct rpc_rqst *req) |
853 | { |
854 | struct sock_xprt *transport = |
855 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
856 | struct sock *sk = transport->inet; |
857 | int ret = -EAGAIN; |
858 | |
859 | lock_sock(sk); |
860 | if (!sock_writeable(sk)) |
861 | ret = xs_nospace(req, transport); |
862 | release_sock(sk); |
863 | return ret; |
864 | } |
865 | |
866 | static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) |
867 | { |
868 | struct sock_xprt *transport = |
869 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
870 | struct sock *sk = transport->inet; |
871 | int ret = -EAGAIN; |
872 | |
873 | if (vm_wait) |
874 | return -ENOBUFS; |
875 | lock_sock(sk); |
876 | if (!sk_stream_memory_free(sk)) |
877 | ret = xs_nospace(req, transport); |
878 | release_sock(sk); |
879 | return ret; |
880 | } |
881 | |
882 | static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf) |
883 | { |
884 | return xdr_alloc_bvec(buf, gfp: rpc_task_gfp_mask()); |
885 | } |
886 | |
887 | static void xs_stream_abort_send_request(struct rpc_rqst *req) |
888 | { |
889 | struct rpc_xprt *xprt = req->rq_xprt; |
890 | struct sock_xprt *transport = |
891 | container_of(xprt, struct sock_xprt, xprt); |
892 | |
893 | if (transport->xmit.offset != 0 && |
894 | !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) |
895 | xprt_force_disconnect(xprt); |
896 | } |
897 | |
898 | /* |
899 | * Determine if the previous message in the stream was aborted before it |
900 | * could complete transmission. |
901 | */ |
902 | static bool |
903 | xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req) |
904 | { |
905 | return transport->xmit.offset != 0 && req->rq_bytes_sent == 0; |
906 | } |
907 | |
908 | /* |
909 | * Return the stream record marker field for a record of length < 2^31-1 |
910 | */ |
911 | static rpc_fraghdr |
912 | xs_stream_record_marker(struct xdr_buf *xdr) |
913 | { |
914 | if (!xdr->len) |
915 | return 0; |
916 | return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); |
917 | } |
918 | |
919 | /** |
920 | * xs_local_send_request - write an RPC request to an AF_LOCAL socket |
921 | * @req: pointer to RPC request |
922 | * |
923 | * Return values: |
924 | * 0: The request has been sent |
925 | * EAGAIN: The socket was blocked, please call again later to |
926 | * complete the request |
927 | * ENOTCONN: Caller needs to invoke connect logic then call again |
928 | * other: Some other error occurred, the request was not sent |
929 | */ |
930 | static int xs_local_send_request(struct rpc_rqst *req) |
931 | { |
932 | struct rpc_xprt *xprt = req->rq_xprt; |
933 | struct sock_xprt *transport = |
934 | container_of(xprt, struct sock_xprt, xprt); |
935 | struct xdr_buf *xdr = &req->rq_snd_buf; |
936 | rpc_fraghdr rm = xs_stream_record_marker(xdr); |
937 | unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; |
938 | struct msghdr msg = { |
939 | .msg_flags = XS_SENDMSG_FLAGS, |
940 | }; |
941 | bool vm_wait; |
942 | unsigned int sent; |
943 | int status; |
944 | |
945 | /* Close the stream if the previous transmission was incomplete */ |
946 | if (xs_send_request_was_aborted(transport, req)) { |
947 | xprt_force_disconnect(xprt); |
948 | return -ENOTCONN; |
949 | } |
950 | |
951 | xs_pktdump(msg: "packet data:" , |
952 | packet: req->rq_svec->iov_base, count: req->rq_svec->iov_len); |
953 | |
954 | vm_wait = sk_stream_is_writeable(sk: transport->inet) ? true : false; |
955 | |
956 | req->rq_xtime = ktime_get(); |
957 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, |
958 | base: transport->xmit.offset, marker: rm, sent_p: &sent); |
959 | dprintk("RPC: %s(%u) = %d\n" , |
960 | __func__, xdr->len - transport->xmit.offset, status); |
961 | |
962 | if (likely(sent > 0) || status == 0) { |
963 | transport->xmit.offset += sent; |
964 | req->rq_bytes_sent = transport->xmit.offset; |
965 | if (likely(req->rq_bytes_sent >= msglen)) { |
966 | req->rq_xmit_bytes_sent += transport->xmit.offset; |
967 | transport->xmit.offset = 0; |
968 | return 0; |
969 | } |
970 | status = -EAGAIN; |
971 | vm_wait = false; |
972 | } |
973 | |
974 | switch (status) { |
975 | case -EAGAIN: |
976 | status = xs_stream_nospace(req, vm_wait); |
977 | break; |
978 | default: |
979 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
980 | -status); |
981 | fallthrough; |
982 | case -EPIPE: |
983 | xprt_force_disconnect(xprt); |
984 | status = -ENOTCONN; |
985 | } |
986 | |
987 | return status; |
988 | } |
989 | |
990 | /** |
991 | * xs_udp_send_request - write an RPC request to a UDP socket |
992 | * @req: pointer to RPC request |
993 | * |
994 | * Return values: |
995 | * 0: The request has been sent |
996 | * EAGAIN: The socket was blocked, please call again later to |
997 | * complete the request |
998 | * ENOTCONN: Caller needs to invoke connect logic then call again |
999 | * other: Some other error occurred, the request was not sent |
1000 | */ |
1001 | static int xs_udp_send_request(struct rpc_rqst *req) |
1002 | { |
1003 | struct rpc_xprt *xprt = req->rq_xprt; |
1004 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1005 | struct xdr_buf *xdr = &req->rq_snd_buf; |
1006 | struct msghdr msg = { |
1007 | .msg_name = xs_addr(xprt), |
1008 | .msg_namelen = xprt->addrlen, |
1009 | .msg_flags = XS_SENDMSG_FLAGS, |
1010 | }; |
1011 | unsigned int sent; |
1012 | int status; |
1013 | |
1014 | xs_pktdump(msg: "packet data:" , |
1015 | packet: req->rq_svec->iov_base, |
1016 | count: req->rq_svec->iov_len); |
1017 | |
1018 | if (!xprt_bound(xprt)) |
1019 | return -ENOTCONN; |
1020 | |
1021 | if (!xprt_request_get_cong(xprt, req)) |
1022 | return -EBADSLT; |
1023 | |
1024 | status = xdr_alloc_bvec(buf: xdr, gfp: rpc_task_gfp_mask()); |
1025 | if (status < 0) |
1026 | return status; |
1027 | req->rq_xtime = ktime_get(); |
1028 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, base: 0, marker: 0, sent_p: &sent); |
1029 | |
1030 | dprintk("RPC: xs_udp_send_request(%u) = %d\n" , |
1031 | xdr->len, status); |
1032 | |
1033 | /* firewall is blocking us, don't return -EAGAIN or we end up looping */ |
1034 | if (status == -EPERM) |
1035 | goto process_status; |
1036 | |
1037 | if (status == -EAGAIN && sock_writeable(sk: transport->inet)) |
1038 | status = -ENOBUFS; |
1039 | |
1040 | if (sent > 0 || status == 0) { |
1041 | req->rq_xmit_bytes_sent += sent; |
1042 | if (sent >= req->rq_slen) |
1043 | return 0; |
1044 | /* Still some bytes left; set up for a retry later. */ |
1045 | status = -EAGAIN; |
1046 | } |
1047 | |
1048 | process_status: |
1049 | switch (status) { |
1050 | case -ENOTSOCK: |
1051 | status = -ENOTCONN; |
1052 | /* Should we call xs_close() here? */ |
1053 | break; |
1054 | case -EAGAIN: |
1055 | status = xs_sock_nospace(req); |
1056 | break; |
1057 | case -ENETUNREACH: |
1058 | case -ENOBUFS: |
1059 | case -EPIPE: |
1060 | case -ECONNREFUSED: |
1061 | case -EPERM: |
1062 | /* When the server has died, an ICMP port unreachable message |
1063 | * prompts ECONNREFUSED. */ |
1064 | break; |
1065 | default: |
1066 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
1067 | -status); |
1068 | } |
1069 | |
1070 | return status; |
1071 | } |
1072 | |
1073 | /** |
1074 | * xs_tcp_send_request - write an RPC request to a TCP socket |
1075 | * @req: pointer to RPC request |
1076 | * |
1077 | * Return values: |
1078 | * 0: The request has been sent |
1079 | * EAGAIN: The socket was blocked, please call again later to |
1080 | * complete the request |
1081 | * ENOTCONN: Caller needs to invoke connect logic then call again |
1082 | * other: Some other error occurred, the request was not sent |
1083 | * |
1084 | * XXX: In the case of soft timeouts, should we eventually give up |
1085 | * if sendmsg is not able to make progress? |
1086 | */ |
1087 | static int xs_tcp_send_request(struct rpc_rqst *req) |
1088 | { |
1089 | struct rpc_xprt *xprt = req->rq_xprt; |
1090 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1091 | struct xdr_buf *xdr = &req->rq_snd_buf; |
1092 | rpc_fraghdr rm = xs_stream_record_marker(xdr); |
1093 | unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; |
1094 | struct msghdr msg = { |
1095 | .msg_flags = XS_SENDMSG_FLAGS, |
1096 | }; |
1097 | bool vm_wait; |
1098 | unsigned int sent; |
1099 | int status; |
1100 | |
1101 | /* Close the stream if the previous transmission was incomplete */ |
1102 | if (xs_send_request_was_aborted(transport, req)) { |
1103 | if (transport->sock != NULL) |
1104 | kernel_sock_shutdown(sock: transport->sock, how: SHUT_RDWR); |
1105 | return -ENOTCONN; |
1106 | } |
1107 | if (!transport->inet) |
1108 | return -ENOTCONN; |
1109 | |
1110 | xs_pktdump(msg: "packet data:" , |
1111 | packet: req->rq_svec->iov_base, |
1112 | count: req->rq_svec->iov_len); |
1113 | |
1114 | if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) |
1115 | xs_tcp_set_socket_timeouts(xprt, sock: transport->sock); |
1116 | |
1117 | xs_set_srcport(transport, sock: transport->sock); |
1118 | |
1119 | /* Continue transmitting the packet/record. We must be careful |
1120 | * to cope with writespace callbacks arriving _after_ we have |
1121 | * called sendmsg(). */ |
1122 | req->rq_xtime = ktime_get(); |
1123 | tcp_sock_set_cork(sk: transport->inet, on: true); |
1124 | |
1125 | vm_wait = sk_stream_is_writeable(sk: transport->inet) ? true : false; |
1126 | |
1127 | do { |
1128 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, |
1129 | base: transport->xmit.offset, marker: rm, sent_p: &sent); |
1130 | |
1131 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n" , |
1132 | xdr->len - transport->xmit.offset, status); |
1133 | |
1134 | /* If we've sent the entire packet, immediately |
1135 | * reset the count of bytes sent. */ |
1136 | transport->xmit.offset += sent; |
1137 | req->rq_bytes_sent = transport->xmit.offset; |
1138 | if (likely(req->rq_bytes_sent >= msglen)) { |
1139 | req->rq_xmit_bytes_sent += transport->xmit.offset; |
1140 | transport->xmit.offset = 0; |
1141 | if (atomic_long_read(v: &xprt->xmit_queuelen) == 1) |
1142 | tcp_sock_set_cork(sk: transport->inet, on: false); |
1143 | return 0; |
1144 | } |
1145 | |
1146 | WARN_ON_ONCE(sent == 0 && status == 0); |
1147 | |
1148 | if (sent > 0) |
1149 | vm_wait = false; |
1150 | |
1151 | } while (status == 0); |
1152 | |
1153 | switch (status) { |
1154 | case -ENOTSOCK: |
1155 | status = -ENOTCONN; |
1156 | /* Should we call xs_close() here? */ |
1157 | break; |
1158 | case -EAGAIN: |
1159 | status = xs_stream_nospace(req, vm_wait); |
1160 | break; |
1161 | case -ECONNRESET: |
1162 | case -ECONNREFUSED: |
1163 | case -ENOTCONN: |
1164 | case -EADDRINUSE: |
1165 | case -ENOBUFS: |
1166 | case -EPIPE: |
1167 | break; |
1168 | default: |
1169 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
1170 | -status); |
1171 | } |
1172 | |
1173 | return status; |
1174 | } |
1175 | |
1176 | static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) |
1177 | { |
1178 | transport->old_data_ready = sk->sk_data_ready; |
1179 | transport->old_state_change = sk->sk_state_change; |
1180 | transport->old_write_space = sk->sk_write_space; |
1181 | transport->old_error_report = sk->sk_error_report; |
1182 | } |
1183 | |
1184 | static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) |
1185 | { |
1186 | sk->sk_data_ready = transport->old_data_ready; |
1187 | sk->sk_state_change = transport->old_state_change; |
1188 | sk->sk_write_space = transport->old_write_space; |
1189 | sk->sk_error_report = transport->old_error_report; |
1190 | } |
1191 | |
1192 | static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) |
1193 | { |
1194 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1195 | |
1196 | transport->xprt_err = 0; |
1197 | clear_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state); |
1198 | clear_bit(XPRT_SOCK_WAKE_ERROR, addr: &transport->sock_state); |
1199 | clear_bit(XPRT_SOCK_WAKE_WRITE, addr: &transport->sock_state); |
1200 | clear_bit(XPRT_SOCK_WAKE_DISCONNECT, addr: &transport->sock_state); |
1201 | clear_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state); |
1202 | } |
1203 | |
1204 | static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) |
1205 | { |
1206 | set_bit(nr, addr: &transport->sock_state); |
1207 | queue_work(wq: xprtiod_workqueue, work: &transport->error_worker); |
1208 | } |
1209 | |
1210 | static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) |
1211 | { |
1212 | xprt->connect_cookie++; |
1213 | smp_mb__before_atomic(); |
1214 | clear_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1215 | clear_bit(XPRT_CLOSING, addr: &xprt->state); |
1216 | xs_sock_reset_state_flags(xprt); |
1217 | smp_mb__after_atomic(); |
1218 | } |
1219 | |
1220 | /** |
1221 | * xs_error_report - callback to handle TCP socket state errors |
1222 | * @sk: socket |
1223 | * |
1224 | * Note: we don't call sock_error() since there may be a rpc_task |
1225 | * using the socket, and so we don't want to clear sk->sk_err. |
1226 | */ |
1227 | static void xs_error_report(struct sock *sk) |
1228 | { |
1229 | struct sock_xprt *transport; |
1230 | struct rpc_xprt *xprt; |
1231 | |
1232 | if (!(xprt = xprt_from_sock(sk))) |
1233 | return; |
1234 | |
1235 | transport = container_of(xprt, struct sock_xprt, xprt); |
1236 | transport->xprt_err = -sk->sk_err; |
1237 | if (transport->xprt_err == 0) |
1238 | return; |
1239 | dprintk("RPC: xs_error_report client %p, error=%d...\n" , |
1240 | xprt, -transport->xprt_err); |
1241 | trace_rpc_socket_error(xprt, socket: sk->sk_socket, error: transport->xprt_err); |
1242 | |
1243 | /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ |
1244 | smp_mb__before_atomic(); |
1245 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); |
1246 | } |
1247 | |
1248 | static void xs_reset_transport(struct sock_xprt *transport) |
1249 | { |
1250 | struct socket *sock = transport->sock; |
1251 | struct sock *sk = transport->inet; |
1252 | struct rpc_xprt *xprt = &transport->xprt; |
1253 | struct file *filp = transport->file; |
1254 | |
1255 | if (sk == NULL) |
1256 | return; |
1257 | /* |
1258 | * Make sure we're calling this in a context from which it is safe |
1259 | * to call __fput_sync(). In practice that means rpciod and the |
1260 | * system workqueue. |
1261 | */ |
1262 | if (!(current->flags & PF_WQ_WORKER)) { |
1263 | WARN_ON_ONCE(1); |
1264 | set_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1265 | return; |
1266 | } |
1267 | |
1268 | if (atomic_read(v: &transport->xprt.swapper)) |
1269 | sk_clear_memalloc(sk); |
1270 | |
1271 | tls_handshake_cancel(sk); |
1272 | |
1273 | kernel_sock_shutdown(sock, how: SHUT_RDWR); |
1274 | |
1275 | mutex_lock(&transport->recv_mutex); |
1276 | lock_sock(sk); |
1277 | transport->inet = NULL; |
1278 | transport->sock = NULL; |
1279 | transport->file = NULL; |
1280 | |
1281 | sk->sk_user_data = NULL; |
1282 | |
1283 | xs_restore_old_callbacks(transport, sk); |
1284 | xprt_clear_connected(xprt); |
1285 | xs_sock_reset_connection_flags(xprt); |
1286 | /* Reset stream record info */ |
1287 | xs_stream_reset_connect(transport); |
1288 | release_sock(sk); |
1289 | mutex_unlock(lock: &transport->recv_mutex); |
1290 | |
1291 | trace_rpc_socket_close(xprt, socket: sock); |
1292 | __fput_sync(filp); |
1293 | |
1294 | xprt_disconnect_done(xprt); |
1295 | } |
1296 | |
1297 | /** |
1298 | * xs_close - close a socket |
1299 | * @xprt: transport |
1300 | * |
1301 | * This is used when all requests are complete; ie, no DRC state remains |
1302 | * on the server we want to save. |
1303 | * |
1304 | * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with |
1305 | * xs_reset_transport() zeroing the socket from underneath a writer. |
1306 | */ |
1307 | static void xs_close(struct rpc_xprt *xprt) |
1308 | { |
1309 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1310 | |
1311 | dprintk("RPC: xs_close xprt %p\n" , xprt); |
1312 | |
1313 | if (transport->sock) |
1314 | tls_handshake_close(sock: transport->sock); |
1315 | xs_reset_transport(transport); |
1316 | xprt->reestablish_timeout = 0; |
1317 | } |
1318 | |
1319 | static void xs_inject_disconnect(struct rpc_xprt *xprt) |
1320 | { |
1321 | dprintk("RPC: injecting transport disconnect on xprt=%p\n" , |
1322 | xprt); |
1323 | xprt_disconnect_done(xprt); |
1324 | } |
1325 | |
1326 | static void xs_xprt_free(struct rpc_xprt *xprt) |
1327 | { |
1328 | xs_free_peer_addresses(xprt); |
1329 | xprt_free(xprt); |
1330 | } |
1331 | |
1332 | /** |
1333 | * xs_destroy - prepare to shutdown a transport |
1334 | * @xprt: doomed transport |
1335 | * |
1336 | */ |
1337 | static void xs_destroy(struct rpc_xprt *xprt) |
1338 | { |
1339 | struct sock_xprt *transport = container_of(xprt, |
1340 | struct sock_xprt, xprt); |
1341 | dprintk("RPC: xs_destroy xprt %p\n" , xprt); |
1342 | |
1343 | cancel_delayed_work_sync(dwork: &transport->connect_worker); |
1344 | xs_close(xprt); |
1345 | cancel_work_sync(work: &transport->recv_worker); |
1346 | cancel_work_sync(work: &transport->error_worker); |
1347 | xs_xprt_free(xprt); |
1348 | module_put(THIS_MODULE); |
1349 | } |
1350 | |
1351 | /** |
1352 | * xs_udp_data_read_skb - receive callback for UDP sockets |
1353 | * @xprt: transport |
1354 | * @sk: socket |
1355 | * @skb: skbuff |
1356 | * |
1357 | */ |
1358 | static void xs_udp_data_read_skb(struct rpc_xprt *xprt, |
1359 | struct sock *sk, |
1360 | struct sk_buff *skb) |
1361 | { |
1362 | struct rpc_task *task; |
1363 | struct rpc_rqst *rovr; |
1364 | int repsize, copied; |
1365 | u32 _xid; |
1366 | __be32 *xp; |
1367 | |
1368 | repsize = skb->len; |
1369 | if (repsize < 4) { |
1370 | dprintk("RPC: impossible RPC reply size %d!\n" , repsize); |
1371 | return; |
1372 | } |
1373 | |
1374 | /* Copy the XID from the skb... */ |
1375 | xp = skb_header_pointer(skb, offset: 0, len: sizeof(_xid), buffer: &_xid); |
1376 | if (xp == NULL) |
1377 | return; |
1378 | |
1379 | /* Look up and lock the request corresponding to the given XID */ |
1380 | spin_lock(lock: &xprt->queue_lock); |
1381 | rovr = xprt_lookup_rqst(xprt, xid: *xp); |
1382 | if (!rovr) |
1383 | goto out_unlock; |
1384 | xprt_pin_rqst(req: rovr); |
1385 | xprt_update_rtt(task: rovr->rq_task); |
1386 | spin_unlock(lock: &xprt->queue_lock); |
1387 | task = rovr->rq_task; |
1388 | |
1389 | if ((copied = rovr->rq_private_buf.buflen) > repsize) |
1390 | copied = repsize; |
1391 | |
1392 | /* Suck it into the iovec, verify checksum if not done by hw. */ |
1393 | if (csum_partial_copy_to_xdr(xdr: &rovr->rq_private_buf, skb)) { |
1394 | spin_lock(lock: &xprt->queue_lock); |
1395 | __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); |
1396 | goto out_unpin; |
1397 | } |
1398 | |
1399 | |
1400 | spin_lock(lock: &xprt->transport_lock); |
1401 | xprt_adjust_cwnd(xprt, task, result: copied); |
1402 | spin_unlock(lock: &xprt->transport_lock); |
1403 | spin_lock(lock: &xprt->queue_lock); |
1404 | xprt_complete_rqst(task, copied); |
1405 | __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); |
1406 | out_unpin: |
1407 | xprt_unpin_rqst(req: rovr); |
1408 | out_unlock: |
1409 | spin_unlock(lock: &xprt->queue_lock); |
1410 | } |
1411 | |
1412 | static void xs_udp_data_receive(struct sock_xprt *transport) |
1413 | { |
1414 | struct sk_buff *skb; |
1415 | struct sock *sk; |
1416 | int err; |
1417 | |
1418 | mutex_lock(&transport->recv_mutex); |
1419 | sk = transport->inet; |
1420 | if (sk == NULL) |
1421 | goto out; |
1422 | for (;;) { |
1423 | skb = skb_recv_udp(sk, MSG_DONTWAIT, err: &err); |
1424 | if (skb == NULL) |
1425 | break; |
1426 | xs_udp_data_read_skb(xprt: &transport->xprt, sk, skb); |
1427 | consume_skb(skb); |
1428 | cond_resched(); |
1429 | } |
1430 | xs_poll_check_readable(transport); |
1431 | out: |
1432 | mutex_unlock(lock: &transport->recv_mutex); |
1433 | } |
1434 | |
1435 | static void xs_udp_data_receive_workfn(struct work_struct *work) |
1436 | { |
1437 | struct sock_xprt *transport = |
1438 | container_of(work, struct sock_xprt, recv_worker); |
1439 | unsigned int pflags = memalloc_nofs_save(); |
1440 | |
1441 | xs_udp_data_receive(transport); |
1442 | memalloc_nofs_restore(flags: pflags); |
1443 | } |
1444 | |
1445 | /** |
1446 | * xs_data_ready - "data ready" callback for sockets |
1447 | * @sk: socket with data to read |
1448 | * |
1449 | */ |
1450 | static void xs_data_ready(struct sock *sk) |
1451 | { |
1452 | struct rpc_xprt *xprt; |
1453 | |
1454 | trace_sk_data_ready(sk); |
1455 | |
1456 | xprt = xprt_from_sock(sk); |
1457 | if (xprt != NULL) { |
1458 | struct sock_xprt *transport = container_of(xprt, |
1459 | struct sock_xprt, xprt); |
1460 | |
1461 | trace_xs_data_ready(xprt); |
1462 | |
1463 | transport->old_data_ready(sk); |
1464 | |
1465 | if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) |
1466 | return; |
1467 | |
1468 | /* Any data means we had a useful conversation, so |
1469 | * then we don't need to delay the next reconnect |
1470 | */ |
1471 | if (xprt->reestablish_timeout) |
1472 | xprt->reestablish_timeout = 0; |
1473 | if (!test_and_set_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state)) |
1474 | queue_work(wq: xprtiod_workqueue, work: &transport->recv_worker); |
1475 | } |
1476 | } |
1477 | |
1478 | /* |
1479 | * Helper function to force a TCP close if the server is sending |
1480 | * junk and/or it has put us in CLOSE_WAIT |
1481 | */ |
1482 | static void xs_tcp_force_close(struct rpc_xprt *xprt) |
1483 | { |
1484 | xprt_force_disconnect(xprt); |
1485 | } |
1486 | |
1487 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
1488 | static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) |
1489 | { |
1490 | return PAGE_SIZE; |
1491 | } |
1492 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
1493 | |
1494 | /** |
1495 | * xs_local_state_change - callback to handle AF_LOCAL socket state changes |
1496 | * @sk: socket whose state has changed |
1497 | * |
1498 | */ |
1499 | static void xs_local_state_change(struct sock *sk) |
1500 | { |
1501 | struct rpc_xprt *xprt; |
1502 | struct sock_xprt *transport; |
1503 | |
1504 | if (!(xprt = xprt_from_sock(sk))) |
1505 | return; |
1506 | transport = container_of(xprt, struct sock_xprt, xprt); |
1507 | if (sk->sk_shutdown & SHUTDOWN_MASK) { |
1508 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1509 | /* Trigger the socket release */ |
1510 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1511 | } |
1512 | } |
1513 | |
1514 | /** |
1515 | * xs_tcp_state_change - callback to handle TCP socket state changes |
1516 | * @sk: socket whose state has changed |
1517 | * |
1518 | */ |
1519 | static void xs_tcp_state_change(struct sock *sk) |
1520 | { |
1521 | struct rpc_xprt *xprt; |
1522 | struct sock_xprt *transport; |
1523 | |
1524 | if (!(xprt = xprt_from_sock(sk))) |
1525 | return; |
1526 | dprintk("RPC: xs_tcp_state_change client %p...\n" , xprt); |
1527 | dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n" , |
1528 | sk->sk_state, xprt_connected(xprt), |
1529 | sock_flag(sk, SOCK_DEAD), |
1530 | sock_flag(sk, SOCK_ZAPPED), |
1531 | sk->sk_shutdown); |
1532 | |
1533 | transport = container_of(xprt, struct sock_xprt, xprt); |
1534 | trace_rpc_socket_state_change(xprt, socket: sk->sk_socket); |
1535 | switch (sk->sk_state) { |
1536 | case TCP_ESTABLISHED: |
1537 | if (!xprt_test_and_set_connected(xprt)) { |
1538 | xprt->connect_cookie++; |
1539 | clear_bit(XPRT_SOCK_CONNECTING, addr: &transport->sock_state); |
1540 | xprt_clear_connecting(xprt); |
1541 | |
1542 | xprt->stat.connect_count++; |
1543 | xprt->stat.connect_time += (long)jiffies - |
1544 | xprt->stat.connect_start; |
1545 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); |
1546 | } |
1547 | break; |
1548 | case TCP_FIN_WAIT1: |
1549 | /* The client initiated a shutdown of the socket */ |
1550 | xprt->connect_cookie++; |
1551 | xprt->reestablish_timeout = 0; |
1552 | set_bit(XPRT_CLOSING, addr: &xprt->state); |
1553 | smp_mb__before_atomic(); |
1554 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1555 | clear_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1556 | smp_mb__after_atomic(); |
1557 | break; |
1558 | case TCP_CLOSE_WAIT: |
1559 | /* The server initiated a shutdown of the socket */ |
1560 | xprt->connect_cookie++; |
1561 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1562 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1563 | fallthrough; |
1564 | case TCP_CLOSING: |
1565 | /* |
1566 | * If the server closed down the connection, make sure that |
1567 | * we back off before reconnecting |
1568 | */ |
1569 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) |
1570 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
1571 | break; |
1572 | case TCP_LAST_ACK: |
1573 | set_bit(XPRT_CLOSING, addr: &xprt->state); |
1574 | smp_mb__before_atomic(); |
1575 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1576 | smp_mb__after_atomic(); |
1577 | break; |
1578 | case TCP_CLOSE: |
1579 | if (test_and_clear_bit(XPRT_SOCK_CONNECTING, |
1580 | addr: &transport->sock_state)) { |
1581 | xs_reset_srcport(transport); |
1582 | xprt_clear_connecting(xprt); |
1583 | } |
1584 | clear_bit(XPRT_CLOSING, addr: &xprt->state); |
1585 | /* Trigger the socket release */ |
1586 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1587 | } |
1588 | } |
1589 | |
1590 | static void xs_write_space(struct sock *sk) |
1591 | { |
1592 | struct sock_xprt *transport; |
1593 | struct rpc_xprt *xprt; |
1594 | |
1595 | if (!sk->sk_socket) |
1596 | return; |
1597 | clear_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags); |
1598 | |
1599 | if (unlikely(!(xprt = xprt_from_sock(sk)))) |
1600 | return; |
1601 | transport = container_of(xprt, struct sock_xprt, xprt); |
1602 | if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state)) |
1603 | return; |
1604 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); |
1605 | sk->sk_write_pending--; |
1606 | } |
1607 | |
1608 | /** |
1609 | * xs_udp_write_space - callback invoked when socket buffer space |
1610 | * becomes available |
1611 | * @sk: socket whose state has changed |
1612 | * |
1613 | * Called when more output buffer space is available for this socket. |
1614 | * We try not to wake our writers until they can make "significant" |
1615 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg |
1616 | * with a bunch of small requests. |
1617 | */ |
1618 | static void xs_udp_write_space(struct sock *sk) |
1619 | { |
1620 | /* from net/core/sock.c:sock_def_write_space */ |
1621 | if (sock_writeable(sk)) |
1622 | xs_write_space(sk); |
1623 | } |
1624 | |
1625 | /** |
1626 | * xs_tcp_write_space - callback invoked when socket buffer space |
1627 | * becomes available |
1628 | * @sk: socket whose state has changed |
1629 | * |
1630 | * Called when more output buffer space is available for this socket. |
1631 | * We try not to wake our writers until they can make "significant" |
1632 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg |
1633 | * with a bunch of small requests. |
1634 | */ |
1635 | static void xs_tcp_write_space(struct sock *sk) |
1636 | { |
1637 | /* from net/core/stream.c:sk_stream_write_space */ |
1638 | if (sk_stream_is_writeable(sk)) |
1639 | xs_write_space(sk); |
1640 | } |
1641 | |
1642 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) |
1643 | { |
1644 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1645 | struct sock *sk = transport->inet; |
1646 | |
1647 | if (transport->rcvsize) { |
1648 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
1649 | sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; |
1650 | } |
1651 | if (transport->sndsize) { |
1652 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
1653 | sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; |
1654 | sk->sk_write_space(sk); |
1655 | } |
1656 | } |
1657 | |
1658 | /** |
1659 | * xs_udp_set_buffer_size - set send and receive limits |
1660 | * @xprt: generic transport |
1661 | * @sndsize: requested size of send buffer, in bytes |
1662 | * @rcvsize: requested size of receive buffer, in bytes |
1663 | * |
1664 | * Set socket send and receive buffer size limits. |
1665 | */ |
1666 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) |
1667 | { |
1668 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1669 | |
1670 | transport->sndsize = 0; |
1671 | if (sndsize) |
1672 | transport->sndsize = sndsize + 1024; |
1673 | transport->rcvsize = 0; |
1674 | if (rcvsize) |
1675 | transport->rcvsize = rcvsize + 1024; |
1676 | |
1677 | xs_udp_do_set_buffer_size(xprt); |
1678 | } |
1679 | |
1680 | /** |
1681 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport |
1682 | * @xprt: controlling transport |
1683 | * @task: task that timed out |
1684 | * |
1685 | * Adjust the congestion window after a retransmit timeout has occurred. |
1686 | */ |
1687 | static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
1688 | { |
1689 | spin_lock(lock: &xprt->transport_lock); |
1690 | xprt_adjust_cwnd(xprt, task, result: -ETIMEDOUT); |
1691 | spin_unlock(lock: &xprt->transport_lock); |
1692 | } |
1693 | |
1694 | static int xs_get_random_port(void) |
1695 | { |
1696 | unsigned short min = xprt_min_resvport, max = xprt_max_resvport; |
1697 | unsigned short range; |
1698 | unsigned short rand; |
1699 | |
1700 | if (max < min) |
1701 | return -EADDRINUSE; |
1702 | range = max - min + 1; |
1703 | rand = get_random_u32_below(ceil: range); |
1704 | return rand + min; |
1705 | } |
1706 | |
1707 | static unsigned short xs_sock_getport(struct socket *sock) |
1708 | { |
1709 | struct sockaddr_storage buf; |
1710 | unsigned short port = 0; |
1711 | |
1712 | if (kernel_getsockname(sock, addr: (struct sockaddr *)&buf) < 0) |
1713 | goto out; |
1714 | switch (buf.ss_family) { |
1715 | case AF_INET6: |
1716 | port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); |
1717 | break; |
1718 | case AF_INET: |
1719 | port = ntohs(((struct sockaddr_in *)&buf)->sin_port); |
1720 | } |
1721 | out: |
1722 | return port; |
1723 | } |
1724 | |
1725 | /** |
1726 | * xs_set_port - reset the port number in the remote endpoint address |
1727 | * @xprt: generic transport |
1728 | * @port: new port number |
1729 | * |
1730 | */ |
1731 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) |
1732 | { |
1733 | dprintk("RPC: setting port for xprt %p to %u\n" , xprt, port); |
1734 | |
1735 | rpc_set_port(sap: xs_addr(xprt), port); |
1736 | xs_update_peer_port(xprt); |
1737 | } |
1738 | |
1739 | static void xs_reset_srcport(struct sock_xprt *transport) |
1740 | { |
1741 | transport->srcport = 0; |
1742 | } |
1743 | |
1744 | static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) |
1745 | { |
1746 | if (transport->srcport == 0 && transport->xprt.reuseport) |
1747 | transport->srcport = xs_sock_getport(sock); |
1748 | } |
1749 | |
1750 | static int xs_get_srcport(struct sock_xprt *transport) |
1751 | { |
1752 | int port = transport->srcport; |
1753 | |
1754 | if (port == 0 && transport->xprt.resvport) |
1755 | port = xs_get_random_port(); |
1756 | return port; |
1757 | } |
1758 | |
1759 | static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) |
1760 | { |
1761 | struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); |
1762 | unsigned short ret = 0; |
1763 | mutex_lock(&sock->recv_mutex); |
1764 | if (sock->sock) |
1765 | ret = xs_sock_getport(sock: sock->sock); |
1766 | mutex_unlock(lock: &sock->recv_mutex); |
1767 | return ret; |
1768 | } |
1769 | |
1770 | static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) |
1771 | { |
1772 | struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); |
1773 | union { |
1774 | struct sockaddr sa; |
1775 | struct sockaddr_storage st; |
1776 | } saddr; |
1777 | int ret = -ENOTCONN; |
1778 | |
1779 | mutex_lock(&sock->recv_mutex); |
1780 | if (sock->sock) { |
1781 | ret = kernel_getsockname(sock: sock->sock, addr: &saddr.sa); |
1782 | if (ret >= 0) |
1783 | ret = snprintf(buf, size: buflen, fmt: "%pISc" , &saddr.sa); |
1784 | } |
1785 | mutex_unlock(lock: &sock->recv_mutex); |
1786 | return ret; |
1787 | } |
1788 | |
1789 | static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) |
1790 | { |
1791 | if (transport->srcport != 0) |
1792 | transport->srcport = 0; |
1793 | if (!transport->xprt.resvport) |
1794 | return 0; |
1795 | if (port <= xprt_min_resvport || port > xprt_max_resvport) |
1796 | return xprt_max_resvport; |
1797 | return --port; |
1798 | } |
1799 | static int xs_bind(struct sock_xprt *transport, struct socket *sock) |
1800 | { |
1801 | struct sockaddr_storage myaddr; |
1802 | int err, nloop = 0; |
1803 | int port = xs_get_srcport(transport); |
1804 | unsigned short last; |
1805 | |
1806 | /* |
1807 | * If we are asking for any ephemeral port (i.e. port == 0 && |
1808 | * transport->xprt.resvport == 0), don't bind. Let the local |
1809 | * port selection happen implicitly when the socket is used |
1810 | * (for example at connect time). |
1811 | * |
1812 | * This ensures that we can continue to establish TCP |
1813 | * connections even when all local ephemeral ports are already |
1814 | * a part of some TCP connection. This makes no difference |
1815 | * for UDP sockets, but also doesn't harm them. |
1816 | * |
1817 | * If we're asking for any reserved port (i.e. port == 0 && |
1818 | * transport->xprt.resvport == 1) xs_get_srcport above will |
1819 | * ensure that port is non-zero and we will bind as needed. |
1820 | */ |
1821 | if (port <= 0) |
1822 | return port; |
1823 | |
1824 | memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); |
1825 | do { |
1826 | rpc_set_port(sap: (struct sockaddr *)&myaddr, port); |
1827 | err = kernel_bind(sock, addr: (struct sockaddr *)&myaddr, |
1828 | addrlen: transport->xprt.addrlen); |
1829 | if (err == 0) { |
1830 | if (transport->xprt.reuseport) |
1831 | transport->srcport = port; |
1832 | break; |
1833 | } |
1834 | last = port; |
1835 | port = xs_next_srcport(transport, port); |
1836 | if (port > last) |
1837 | nloop++; |
1838 | } while (err == -EADDRINUSE && nloop != 2); |
1839 | |
1840 | if (myaddr.ss_family == AF_INET) |
1841 | dprintk("RPC: %s %pI4:%u: %s (%d)\n" , __func__, |
1842 | &((struct sockaddr_in *)&myaddr)->sin_addr, |
1843 | port, err ? "failed" : "ok" , err); |
1844 | else |
1845 | dprintk("RPC: %s %pI6:%u: %s (%d)\n" , __func__, |
1846 | &((struct sockaddr_in6 *)&myaddr)->sin6_addr, |
1847 | port, err ? "failed" : "ok" , err); |
1848 | return err; |
1849 | } |
1850 | |
1851 | /* |
1852 | * We don't support autobind on AF_LOCAL sockets |
1853 | */ |
1854 | static void xs_local_rpcbind(struct rpc_task *task) |
1855 | { |
1856 | xprt_set_bound(xprt: task->tk_xprt); |
1857 | } |
1858 | |
1859 | static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) |
1860 | { |
1861 | } |
1862 | |
1863 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1864 | static struct lock_class_key xs_key[3]; |
1865 | static struct lock_class_key xs_slock_key[3]; |
1866 | |
1867 | static inline void xs_reclassify_socketu(struct socket *sock) |
1868 | { |
1869 | struct sock *sk = sock->sk; |
1870 | |
1871 | sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC" , |
1872 | &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC" , &xs_key[0]); |
1873 | } |
1874 | |
1875 | static inline void xs_reclassify_socket4(struct socket *sock) |
1876 | { |
1877 | struct sock *sk = sock->sk; |
1878 | |
1879 | sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC" , |
1880 | &xs_slock_key[1], "sk_lock-AF_INET-RPC" , &xs_key[1]); |
1881 | } |
1882 | |
1883 | static inline void xs_reclassify_socket6(struct socket *sock) |
1884 | { |
1885 | struct sock *sk = sock->sk; |
1886 | |
1887 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC" , |
1888 | &xs_slock_key[2], "sk_lock-AF_INET6-RPC" , &xs_key[2]); |
1889 | } |
1890 | |
1891 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1892 | { |
1893 | if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk))) |
1894 | return; |
1895 | |
1896 | switch (family) { |
1897 | case AF_LOCAL: |
1898 | xs_reclassify_socketu(sock); |
1899 | break; |
1900 | case AF_INET: |
1901 | xs_reclassify_socket4(sock); |
1902 | break; |
1903 | case AF_INET6: |
1904 | xs_reclassify_socket6(sock); |
1905 | break; |
1906 | } |
1907 | } |
1908 | #else |
1909 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1910 | { |
1911 | } |
1912 | #endif |
1913 | |
1914 | static void xs_dummy_setup_socket(struct work_struct *work) |
1915 | { |
1916 | } |
1917 | |
1918 | static struct socket *xs_create_sock(struct rpc_xprt *xprt, |
1919 | struct sock_xprt *transport, int family, int type, |
1920 | int protocol, bool reuseport) |
1921 | { |
1922 | struct file *filp; |
1923 | struct socket *sock; |
1924 | int err; |
1925 | |
1926 | err = __sock_create(net: xprt->xprt_net, family, type, proto: protocol, res: &sock, kern: 1); |
1927 | if (err < 0) { |
1928 | dprintk("RPC: can't create %d transport socket (%d).\n" , |
1929 | protocol, -err); |
1930 | goto out; |
1931 | } |
1932 | xs_reclassify_socket(family, sock); |
1933 | |
1934 | if (reuseport) |
1935 | sock_set_reuseport(sk: sock->sk); |
1936 | |
1937 | err = xs_bind(transport, sock); |
1938 | if (err) { |
1939 | sock_release(sock); |
1940 | goto out; |
1941 | } |
1942 | |
1943 | filp = sock_alloc_file(sock, O_NONBLOCK, NULL); |
1944 | if (IS_ERR(ptr: filp)) |
1945 | return ERR_CAST(ptr: filp); |
1946 | transport->file = filp; |
1947 | |
1948 | return sock; |
1949 | out: |
1950 | return ERR_PTR(error: err); |
1951 | } |
1952 | |
1953 | static int xs_local_finish_connecting(struct rpc_xprt *xprt, |
1954 | struct socket *sock) |
1955 | { |
1956 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, |
1957 | xprt); |
1958 | |
1959 | if (!transport->inet) { |
1960 | struct sock *sk = sock->sk; |
1961 | |
1962 | lock_sock(sk); |
1963 | |
1964 | xs_save_old_callbacks(transport, sk); |
1965 | |
1966 | sk->sk_user_data = xprt; |
1967 | sk->sk_data_ready = xs_data_ready; |
1968 | sk->sk_write_space = xs_udp_write_space; |
1969 | sk->sk_state_change = xs_local_state_change; |
1970 | sk->sk_error_report = xs_error_report; |
1971 | sk->sk_use_task_frag = false; |
1972 | |
1973 | xprt_clear_connected(xprt); |
1974 | |
1975 | /* Reset to new socket */ |
1976 | transport->sock = sock; |
1977 | transport->inet = sk; |
1978 | |
1979 | release_sock(sk); |
1980 | } |
1981 | |
1982 | xs_stream_start_connect(transport); |
1983 | |
1984 | return kernel_connect(sock, addr: xs_addr(xprt), addrlen: xprt->addrlen, flags: 0); |
1985 | } |
1986 | |
1987 | /** |
1988 | * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint |
1989 | * @transport: socket transport to connect |
1990 | */ |
1991 | static int xs_local_setup_socket(struct sock_xprt *transport) |
1992 | { |
1993 | struct rpc_xprt *xprt = &transport->xprt; |
1994 | struct file *filp; |
1995 | struct socket *sock; |
1996 | int status; |
1997 | |
1998 | status = __sock_create(net: xprt->xprt_net, AF_LOCAL, |
1999 | type: SOCK_STREAM, proto: 0, res: &sock, kern: 1); |
2000 | if (status < 0) { |
2001 | dprintk("RPC: can't create AF_LOCAL " |
2002 | "transport socket (%d).\n" , -status); |
2003 | goto out; |
2004 | } |
2005 | xs_reclassify_socket(AF_LOCAL, sock); |
2006 | |
2007 | filp = sock_alloc_file(sock, O_NONBLOCK, NULL); |
2008 | if (IS_ERR(ptr: filp)) { |
2009 | status = PTR_ERR(ptr: filp); |
2010 | goto out; |
2011 | } |
2012 | transport->file = filp; |
2013 | |
2014 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n" , |
2015 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2016 | |
2017 | status = xs_local_finish_connecting(xprt, sock); |
2018 | trace_rpc_socket_connect(xprt, socket: sock, error: status); |
2019 | switch (status) { |
2020 | case 0: |
2021 | dprintk("RPC: xprt %p connected to %s\n" , |
2022 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2023 | xprt->stat.connect_count++; |
2024 | xprt->stat.connect_time += (long)jiffies - |
2025 | xprt->stat.connect_start; |
2026 | xprt_set_connected(xprt); |
2027 | break; |
2028 | case -ENOBUFS: |
2029 | break; |
2030 | case -ENOENT: |
2031 | dprintk("RPC: xprt %p: socket %s does not exist\n" , |
2032 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2033 | break; |
2034 | case -ECONNREFUSED: |
2035 | dprintk("RPC: xprt %p: connection refused for %s\n" , |
2036 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2037 | break; |
2038 | default: |
2039 | printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n" , |
2040 | __func__, -status, |
2041 | xprt->address_strings[RPC_DISPLAY_ADDR]); |
2042 | } |
2043 | |
2044 | out: |
2045 | xprt_clear_connecting(xprt); |
2046 | xprt_wake_pending_tasks(xprt, status); |
2047 | return status; |
2048 | } |
2049 | |
2050 | static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
2051 | { |
2052 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2053 | int ret; |
2054 | |
2055 | if (transport->file) |
2056 | goto force_disconnect; |
2057 | |
2058 | if (RPC_IS_ASYNC(task)) { |
2059 | /* |
2060 | * We want the AF_LOCAL connect to be resolved in the |
2061 | * filesystem namespace of the process making the rpc |
2062 | * call. Thus we connect synchronously. |
2063 | * |
2064 | * If we want to support asynchronous AF_LOCAL calls, |
2065 | * we'll need to figure out how to pass a namespace to |
2066 | * connect. |
2067 | */ |
2068 | rpc_task_set_rpc_status(task, rpc_status: -ENOTCONN); |
2069 | goto out_wake; |
2070 | } |
2071 | ret = xs_local_setup_socket(transport); |
2072 | if (ret && !RPC_IS_SOFTCONN(task)) |
2073 | msleep_interruptible(msecs: 15000); |
2074 | return; |
2075 | force_disconnect: |
2076 | xprt_force_disconnect(xprt); |
2077 | out_wake: |
2078 | xprt_clear_connecting(xprt); |
2079 | xprt_wake_pending_tasks(xprt, status: -ENOTCONN); |
2080 | } |
2081 | |
2082 | #if IS_ENABLED(CONFIG_SUNRPC_SWAP) |
2083 | /* |
2084 | * Note that this should be called with XPRT_LOCKED held, or recv_mutex |
2085 | * held, or when we otherwise know that we have exclusive access to the |
2086 | * socket, to guard against races with xs_reset_transport. |
2087 | */ |
2088 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
2089 | { |
2090 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, |
2091 | xprt); |
2092 | |
2093 | /* |
2094 | * If there's no sock, then we have nothing to set. The |
2095 | * reconnecting process will get it for us. |
2096 | */ |
2097 | if (!transport->inet) |
2098 | return; |
2099 | if (atomic_read(v: &xprt->swapper)) |
2100 | sk_set_memalloc(sk: transport->inet); |
2101 | } |
2102 | |
2103 | /** |
2104 | * xs_enable_swap - Tag this transport as being used for swap. |
2105 | * @xprt: transport to tag |
2106 | * |
2107 | * Take a reference to this transport on behalf of the rpc_clnt, and |
2108 | * optionally mark it for swapping if it wasn't already. |
2109 | */ |
2110 | static int |
2111 | xs_enable_swap(struct rpc_xprt *xprt) |
2112 | { |
2113 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); |
2114 | |
2115 | mutex_lock(&xs->recv_mutex); |
2116 | if (atomic_inc_return(v: &xprt->swapper) == 1 && |
2117 | xs->inet) |
2118 | sk_set_memalloc(sk: xs->inet); |
2119 | mutex_unlock(lock: &xs->recv_mutex); |
2120 | return 0; |
2121 | } |
2122 | |
2123 | /** |
2124 | * xs_disable_swap - Untag this transport as being used for swap. |
2125 | * @xprt: transport to tag |
2126 | * |
2127 | * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the |
2128 | * swapper refcount goes to 0, untag the socket as a memalloc socket. |
2129 | */ |
2130 | static void |
2131 | xs_disable_swap(struct rpc_xprt *xprt) |
2132 | { |
2133 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); |
2134 | |
2135 | mutex_lock(&xs->recv_mutex); |
2136 | if (atomic_dec_and_test(v: &xprt->swapper) && |
2137 | xs->inet) |
2138 | sk_clear_memalloc(sk: xs->inet); |
2139 | mutex_unlock(lock: &xs->recv_mutex); |
2140 | } |
2141 | #else |
2142 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
2143 | { |
2144 | } |
2145 | |
2146 | static int |
2147 | xs_enable_swap(struct rpc_xprt *xprt) |
2148 | { |
2149 | return -EINVAL; |
2150 | } |
2151 | |
2152 | static void |
2153 | xs_disable_swap(struct rpc_xprt *xprt) |
2154 | { |
2155 | } |
2156 | #endif |
2157 | |
2158 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2159 | { |
2160 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2161 | |
2162 | if (!transport->inet) { |
2163 | struct sock *sk = sock->sk; |
2164 | |
2165 | lock_sock(sk); |
2166 | |
2167 | xs_save_old_callbacks(transport, sk); |
2168 | |
2169 | sk->sk_user_data = xprt; |
2170 | sk->sk_data_ready = xs_data_ready; |
2171 | sk->sk_write_space = xs_udp_write_space; |
2172 | sk->sk_use_task_frag = false; |
2173 | |
2174 | xprt_set_connected(xprt); |
2175 | |
2176 | /* Reset to new socket */ |
2177 | transport->sock = sock; |
2178 | transport->inet = sk; |
2179 | |
2180 | xs_set_memalloc(xprt); |
2181 | |
2182 | release_sock(sk); |
2183 | } |
2184 | xs_udp_do_set_buffer_size(xprt); |
2185 | |
2186 | xprt->stat.connect_start = jiffies; |
2187 | } |
2188 | |
2189 | static void xs_udp_setup_socket(struct work_struct *work) |
2190 | { |
2191 | struct sock_xprt *transport = |
2192 | container_of(work, struct sock_xprt, connect_worker.work); |
2193 | struct rpc_xprt *xprt = &transport->xprt; |
2194 | struct socket *sock; |
2195 | int status = -EIO; |
2196 | unsigned int pflags = current->flags; |
2197 | |
2198 | if (atomic_read(v: &xprt->swapper)) |
2199 | current->flags |= PF_MEMALLOC; |
2200 | sock = xs_create_sock(xprt, transport, |
2201 | family: xs_addr(xprt)->sa_family, type: SOCK_DGRAM, |
2202 | IPPROTO_UDP, reuseport: false); |
2203 | if (IS_ERR(ptr: sock)) |
2204 | goto out; |
2205 | |
2206 | dprintk("RPC: worker connecting xprt %p via %s to " |
2207 | "%s (port %s)\n" , xprt, |
2208 | xprt->address_strings[RPC_DISPLAY_PROTO], |
2209 | xprt->address_strings[RPC_DISPLAY_ADDR], |
2210 | xprt->address_strings[RPC_DISPLAY_PORT]); |
2211 | |
2212 | xs_udp_finish_connecting(xprt, sock); |
2213 | trace_rpc_socket_connect(xprt, socket: sock, error: 0); |
2214 | status = 0; |
2215 | out: |
2216 | xprt_clear_connecting(xprt); |
2217 | xprt_unlock_connect(xprt, transport); |
2218 | xprt_wake_pending_tasks(xprt, status); |
2219 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2220 | } |
2221 | |
2222 | /** |
2223 | * xs_tcp_shutdown - gracefully shut down a TCP socket |
2224 | * @xprt: transport |
2225 | * |
2226 | * Initiates a graceful shutdown of the TCP socket by calling the |
2227 | * equivalent of shutdown(SHUT_RDWR); |
2228 | */ |
2229 | static void xs_tcp_shutdown(struct rpc_xprt *xprt) |
2230 | { |
2231 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2232 | struct socket *sock = transport->sock; |
2233 | int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE; |
2234 | |
2235 | if (sock == NULL) |
2236 | return; |
2237 | if (!xprt->reuseport) { |
2238 | xs_close(xprt); |
2239 | return; |
2240 | } |
2241 | switch (skst) { |
2242 | case TCP_FIN_WAIT1: |
2243 | case TCP_FIN_WAIT2: |
2244 | case TCP_LAST_ACK: |
2245 | break; |
2246 | case TCP_ESTABLISHED: |
2247 | case TCP_CLOSE_WAIT: |
2248 | kernel_sock_shutdown(sock, how: SHUT_RDWR); |
2249 | trace_rpc_socket_shutdown(xprt, socket: sock); |
2250 | break; |
2251 | default: |
2252 | xs_reset_transport(transport); |
2253 | } |
2254 | } |
2255 | |
2256 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
2257 | struct socket *sock) |
2258 | { |
2259 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2260 | struct net *net = sock_net(sk: sock->sk); |
2261 | unsigned long connect_timeout; |
2262 | unsigned long syn_retries; |
2263 | unsigned int keepidle; |
2264 | unsigned int keepcnt; |
2265 | unsigned int timeo; |
2266 | unsigned long t; |
2267 | |
2268 | spin_lock(lock: &xprt->transport_lock); |
2269 | keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); |
2270 | keepcnt = xprt->timeout->to_retries + 1; |
2271 | timeo = jiffies_to_msecs(j: xprt->timeout->to_initval) * |
2272 | (xprt->timeout->to_retries + 1); |
2273 | clear_bit(XPRT_SOCK_UPD_TIMEOUT, addr: &transport->sock_state); |
2274 | spin_unlock(lock: &xprt->transport_lock); |
2275 | |
2276 | /* TCP Keepalive options */ |
2277 | sock_set_keepalive(sk: sock->sk); |
2278 | tcp_sock_set_keepidle(sk: sock->sk, val: keepidle); |
2279 | tcp_sock_set_keepintvl(sk: sock->sk, val: keepidle); |
2280 | tcp_sock_set_keepcnt(sk: sock->sk, val: keepcnt); |
2281 | |
2282 | /* TCP user timeout (see RFC5482) */ |
2283 | tcp_sock_set_user_timeout(sk: sock->sk, val: timeo); |
2284 | |
2285 | /* Connect timeout */ |
2286 | connect_timeout = max_t(unsigned long, |
2287 | DIV_ROUND_UP(xprt->connect_timeout, HZ), 1); |
2288 | syn_retries = max_t(unsigned long, |
2289 | READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1); |
2290 | for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++) |
2291 | ; |
2292 | if (t <= syn_retries) |
2293 | tcp_sock_set_syncnt(sk: sock->sk, val: t - 1); |
2294 | } |
2295 | |
2296 | static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt, |
2297 | unsigned long connect_timeout) |
2298 | { |
2299 | struct sock_xprt *transport = |
2300 | container_of(xprt, struct sock_xprt, xprt); |
2301 | struct rpc_timeout to; |
2302 | unsigned long initval; |
2303 | |
2304 | memcpy(&to, xprt->timeout, sizeof(to)); |
2305 | /* Arbitrary lower limit */ |
2306 | initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO); |
2307 | to.to_initval = initval; |
2308 | to.to_maxval = initval; |
2309 | to.to_retries = 0; |
2310 | memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout)); |
2311 | xprt->timeout = &transport->tcp_timeout; |
2312 | xprt->connect_timeout = connect_timeout; |
2313 | } |
2314 | |
2315 | static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, |
2316 | unsigned long connect_timeout, |
2317 | unsigned long reconnect_timeout) |
2318 | { |
2319 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2320 | |
2321 | spin_lock(lock: &xprt->transport_lock); |
2322 | if (reconnect_timeout < xprt->max_reconnect_timeout) |
2323 | xprt->max_reconnect_timeout = reconnect_timeout; |
2324 | if (connect_timeout < xprt->connect_timeout) |
2325 | xs_tcp_do_set_connect_timeout(xprt, connect_timeout); |
2326 | set_bit(XPRT_SOCK_UPD_TIMEOUT, addr: &transport->sock_state); |
2327 | spin_unlock(lock: &xprt->transport_lock); |
2328 | } |
2329 | |
2330 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2331 | { |
2332 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2333 | |
2334 | if (!transport->inet) { |
2335 | struct sock *sk = sock->sk; |
2336 | |
2337 | /* Avoid temporary address, they are bad for long-lived |
2338 | * connections such as NFS mounts. |
2339 | * RFC4941, section 3.6 suggests that: |
2340 | * Individual applications, which have specific |
2341 | * knowledge about the normal duration of connections, |
2342 | * MAY override this as appropriate. |
2343 | */ |
2344 | if (xs_addr(xprt)->sa_family == PF_INET6) { |
2345 | ip6_sock_set_addr_preferences(sk, |
2346 | IPV6_PREFER_SRC_PUBLIC); |
2347 | } |
2348 | |
2349 | xs_tcp_set_socket_timeouts(xprt, sock); |
2350 | tcp_sock_set_nodelay(sk); |
2351 | |
2352 | lock_sock(sk); |
2353 | |
2354 | xs_save_old_callbacks(transport, sk); |
2355 | |
2356 | sk->sk_user_data = xprt; |
2357 | sk->sk_data_ready = xs_data_ready; |
2358 | sk->sk_state_change = xs_tcp_state_change; |
2359 | sk->sk_write_space = xs_tcp_write_space; |
2360 | sk->sk_error_report = xs_error_report; |
2361 | sk->sk_use_task_frag = false; |
2362 | |
2363 | /* socket options */ |
2364 | sock_reset_flag(sk, flag: SOCK_LINGER); |
2365 | |
2366 | xprt_clear_connected(xprt); |
2367 | |
2368 | /* Reset to new socket */ |
2369 | transport->sock = sock; |
2370 | transport->inet = sk; |
2371 | |
2372 | release_sock(sk); |
2373 | } |
2374 | |
2375 | if (!xprt_bound(xprt)) |
2376 | return -ENOTCONN; |
2377 | |
2378 | xs_set_memalloc(xprt); |
2379 | |
2380 | xs_stream_start_connect(transport); |
2381 | |
2382 | /* Tell the socket layer to start connecting... */ |
2383 | set_bit(XPRT_SOCK_CONNECTING, addr: &transport->sock_state); |
2384 | return kernel_connect(sock, addr: xs_addr(xprt), addrlen: xprt->addrlen, O_NONBLOCK); |
2385 | } |
2386 | |
2387 | /** |
2388 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint |
2389 | * @work: queued work item |
2390 | * |
2391 | * Invoked by a work queue tasklet. |
2392 | */ |
2393 | static void xs_tcp_setup_socket(struct work_struct *work) |
2394 | { |
2395 | struct sock_xprt *transport = |
2396 | container_of(work, struct sock_xprt, connect_worker.work); |
2397 | struct socket *sock = transport->sock; |
2398 | struct rpc_xprt *xprt = &transport->xprt; |
2399 | int status; |
2400 | unsigned int pflags = current->flags; |
2401 | |
2402 | if (atomic_read(v: &xprt->swapper)) |
2403 | current->flags |= PF_MEMALLOC; |
2404 | |
2405 | if (xprt_connected(xprt)) |
2406 | goto out; |
2407 | if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, |
2408 | addr: &transport->sock_state) || |
2409 | !sock) { |
2410 | xs_reset_transport(transport); |
2411 | sock = xs_create_sock(xprt, transport, family: xs_addr(xprt)->sa_family, |
2412 | type: SOCK_STREAM, IPPROTO_TCP, reuseport: true); |
2413 | if (IS_ERR(ptr: sock)) { |
2414 | xprt_wake_pending_tasks(xprt, status: PTR_ERR(ptr: sock)); |
2415 | goto out; |
2416 | } |
2417 | } |
2418 | |
2419 | dprintk("RPC: worker connecting xprt %p via %s to " |
2420 | "%s (port %s)\n" , xprt, |
2421 | xprt->address_strings[RPC_DISPLAY_PROTO], |
2422 | xprt->address_strings[RPC_DISPLAY_ADDR], |
2423 | xprt->address_strings[RPC_DISPLAY_PORT]); |
2424 | |
2425 | status = xs_tcp_finish_connecting(xprt, sock); |
2426 | trace_rpc_socket_connect(xprt, socket: sock, error: status); |
2427 | dprintk("RPC: %p connect status %d connected %d sock state %d\n" , |
2428 | xprt, -status, xprt_connected(xprt), |
2429 | sock->sk->sk_state); |
2430 | switch (status) { |
2431 | case 0: |
2432 | case -EINPROGRESS: |
2433 | /* SYN_SENT! */ |
2434 | set_bit(XPRT_SOCK_CONNECT_SENT, addr: &transport->sock_state); |
2435 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) |
2436 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
2437 | fallthrough; |
2438 | case -EALREADY: |
2439 | goto out_unlock; |
2440 | case -EADDRNOTAVAIL: |
2441 | /* Source port number is unavailable. Try a new one! */ |
2442 | transport->srcport = 0; |
2443 | status = -EAGAIN; |
2444 | break; |
2445 | case -EINVAL: |
2446 | /* Happens, for instance, if the user specified a link |
2447 | * local IPv6 address without a scope-id. |
2448 | */ |
2449 | case -ECONNREFUSED: |
2450 | case -ECONNRESET: |
2451 | case -ENETDOWN: |
2452 | case -ENETUNREACH: |
2453 | case -EHOSTUNREACH: |
2454 | case -EADDRINUSE: |
2455 | case -ENOBUFS: |
2456 | break; |
2457 | default: |
2458 | printk("%s: connect returned unhandled error %d\n" , |
2459 | __func__, status); |
2460 | status = -EAGAIN; |
2461 | } |
2462 | |
2463 | /* xs_tcp_force_close() wakes tasks with a fixed error code. |
2464 | * We need to wake them first to ensure the correct error code. |
2465 | */ |
2466 | xprt_wake_pending_tasks(xprt, status); |
2467 | xs_tcp_force_close(xprt); |
2468 | out: |
2469 | xprt_clear_connecting(xprt); |
2470 | out_unlock: |
2471 | xprt_unlock_connect(xprt, transport); |
2472 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2473 | } |
2474 | |
2475 | /* |
2476 | * Transfer the connected socket to @upper_transport, then mark that |
2477 | * xprt CONNECTED. |
2478 | */ |
2479 | static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt, |
2480 | struct sock_xprt *upper_transport) |
2481 | { |
2482 | struct sock_xprt *lower_transport = |
2483 | container_of(lower_xprt, struct sock_xprt, xprt); |
2484 | struct rpc_xprt *upper_xprt = &upper_transport->xprt; |
2485 | |
2486 | if (!upper_transport->inet) { |
2487 | struct socket *sock = lower_transport->sock; |
2488 | struct sock *sk = sock->sk; |
2489 | |
2490 | /* Avoid temporary address, they are bad for long-lived |
2491 | * connections such as NFS mounts. |
2492 | * RFC4941, section 3.6 suggests that: |
2493 | * Individual applications, which have specific |
2494 | * knowledge about the normal duration of connections, |
2495 | * MAY override this as appropriate. |
2496 | */ |
2497 | if (xs_addr(xprt: upper_xprt)->sa_family == PF_INET6) |
2498 | ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC); |
2499 | |
2500 | xs_tcp_set_socket_timeouts(xprt: upper_xprt, sock); |
2501 | tcp_sock_set_nodelay(sk); |
2502 | |
2503 | lock_sock(sk); |
2504 | |
2505 | /* @sk is already connected, so it now has the RPC callbacks. |
2506 | * Reach into @lower_transport to save the original ones. |
2507 | */ |
2508 | upper_transport->old_data_ready = lower_transport->old_data_ready; |
2509 | upper_transport->old_state_change = lower_transport->old_state_change; |
2510 | upper_transport->old_write_space = lower_transport->old_write_space; |
2511 | upper_transport->old_error_report = lower_transport->old_error_report; |
2512 | sk->sk_user_data = upper_xprt; |
2513 | |
2514 | /* socket options */ |
2515 | sock_reset_flag(sk, flag: SOCK_LINGER); |
2516 | |
2517 | xprt_clear_connected(xprt: upper_xprt); |
2518 | |
2519 | upper_transport->sock = sock; |
2520 | upper_transport->inet = sk; |
2521 | upper_transport->file = lower_transport->file; |
2522 | |
2523 | release_sock(sk); |
2524 | |
2525 | /* Reset lower_transport before shutting down its clnt */ |
2526 | mutex_lock(&lower_transport->recv_mutex); |
2527 | lower_transport->inet = NULL; |
2528 | lower_transport->sock = NULL; |
2529 | lower_transport->file = NULL; |
2530 | |
2531 | xprt_clear_connected(xprt: lower_xprt); |
2532 | xs_sock_reset_connection_flags(xprt: lower_xprt); |
2533 | xs_stream_reset_connect(transport: lower_transport); |
2534 | mutex_unlock(lock: &lower_transport->recv_mutex); |
2535 | } |
2536 | |
2537 | if (!xprt_bound(xprt: upper_xprt)) |
2538 | return -ENOTCONN; |
2539 | |
2540 | xs_set_memalloc(xprt: upper_xprt); |
2541 | |
2542 | if (!xprt_test_and_set_connected(xprt: upper_xprt)) { |
2543 | upper_xprt->connect_cookie++; |
2544 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2545 | xprt_clear_connecting(xprt: upper_xprt); |
2546 | |
2547 | upper_xprt->stat.connect_count++; |
2548 | upper_xprt->stat.connect_time += (long)jiffies - |
2549 | upper_xprt->stat.connect_start; |
2550 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2551 | } |
2552 | return 0; |
2553 | } |
2554 | |
2555 | /** |
2556 | * xs_tls_handshake_done - TLS handshake completion handler |
2557 | * @data: address of xprt to wake |
2558 | * @status: status of handshake |
2559 | * @peerid: serial number of key containing the remote's identity |
2560 | * |
2561 | */ |
2562 | static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) |
2563 | { |
2564 | struct rpc_xprt *lower_xprt = data; |
2565 | struct sock_xprt *lower_transport = |
2566 | container_of(lower_xprt, struct sock_xprt, xprt); |
2567 | |
2568 | lower_transport->xprt_err = status ? -EACCES : 0; |
2569 | complete(&lower_transport->handshake_done); |
2570 | xprt_put(xprt: lower_xprt); |
2571 | } |
2572 | |
2573 | static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec) |
2574 | { |
2575 | struct sock_xprt *lower_transport = |
2576 | container_of(lower_xprt, struct sock_xprt, xprt); |
2577 | struct tls_handshake_args args = { |
2578 | .ta_sock = lower_transport->sock, |
2579 | .ta_done = xs_tls_handshake_done, |
2580 | .ta_data = xprt_get(xprt: lower_xprt), |
2581 | .ta_peername = lower_xprt->servername, |
2582 | }; |
2583 | struct sock *sk = lower_transport->inet; |
2584 | int rc; |
2585 | |
2586 | init_completion(x: &lower_transport->handshake_done); |
2587 | set_bit(XPRT_SOCK_IGNORE_RECV, addr: &lower_transport->sock_state); |
2588 | lower_transport->xprt_err = -ETIMEDOUT; |
2589 | switch (xprtsec->policy) { |
2590 | case RPC_XPRTSEC_TLS_ANON: |
2591 | rc = tls_client_hello_anon(args: &args, GFP_KERNEL); |
2592 | if (rc) |
2593 | goto out_put_xprt; |
2594 | break; |
2595 | case RPC_XPRTSEC_TLS_X509: |
2596 | args.ta_my_cert = xprtsec->cert_serial; |
2597 | args.ta_my_privkey = xprtsec->privkey_serial; |
2598 | rc = tls_client_hello_x509(args: &args, GFP_KERNEL); |
2599 | if (rc) |
2600 | goto out_put_xprt; |
2601 | break; |
2602 | default: |
2603 | rc = -EACCES; |
2604 | goto out_put_xprt; |
2605 | } |
2606 | |
2607 | rc = wait_for_completion_interruptible_timeout(x: &lower_transport->handshake_done, |
2608 | XS_TLS_HANDSHAKE_TO); |
2609 | if (rc <= 0) { |
2610 | if (!tls_handshake_cancel(sk)) { |
2611 | if (rc == 0) |
2612 | rc = -ETIMEDOUT; |
2613 | goto out_put_xprt; |
2614 | } |
2615 | } |
2616 | |
2617 | rc = lower_transport->xprt_err; |
2618 | |
2619 | out: |
2620 | xs_stream_reset_connect(transport: lower_transport); |
2621 | clear_bit(XPRT_SOCK_IGNORE_RECV, addr: &lower_transport->sock_state); |
2622 | return rc; |
2623 | |
2624 | out_put_xprt: |
2625 | xprt_put(xprt: lower_xprt); |
2626 | goto out; |
2627 | } |
2628 | |
2629 | /** |
2630 | * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket |
2631 | * @work: queued work item |
2632 | * |
2633 | * Invoked by a work queue tasklet. |
2634 | * |
2635 | * For RPC-with-TLS, there is a two-stage connection process. |
2636 | * |
2637 | * The "upper-layer xprt" is visible to the RPC consumer. Once it has |
2638 | * been marked connected, the consumer knows that a TCP connection and |
2639 | * a TLS session have been established. |
2640 | * |
2641 | * A "lower-layer xprt", created in this function, handles the mechanics |
2642 | * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and |
2643 | * then driving the TLS handshake. Once all that is complete, the upper |
2644 | * layer xprt is marked connected. |
2645 | */ |
2646 | static void xs_tcp_tls_setup_socket(struct work_struct *work) |
2647 | { |
2648 | struct sock_xprt *upper_transport = |
2649 | container_of(work, struct sock_xprt, connect_worker.work); |
2650 | struct rpc_clnt *upper_clnt = upper_transport->clnt; |
2651 | struct rpc_xprt *upper_xprt = &upper_transport->xprt; |
2652 | struct rpc_create_args args = { |
2653 | .net = upper_xprt->xprt_net, |
2654 | .protocol = upper_xprt->prot, |
2655 | .address = (struct sockaddr *)&upper_xprt->addr, |
2656 | .addrsize = upper_xprt->addrlen, |
2657 | .timeout = upper_clnt->cl_timeout, |
2658 | .servername = upper_xprt->servername, |
2659 | .program = upper_clnt->cl_program, |
2660 | .prognumber = upper_clnt->cl_prog, |
2661 | .version = upper_clnt->cl_vers, |
2662 | .authflavor = RPC_AUTH_TLS, |
2663 | .cred = upper_clnt->cl_cred, |
2664 | .xprtsec = { |
2665 | .policy = RPC_XPRTSEC_NONE, |
2666 | }, |
2667 | }; |
2668 | unsigned int pflags = current->flags; |
2669 | struct rpc_clnt *lower_clnt; |
2670 | struct rpc_xprt *lower_xprt; |
2671 | int status; |
2672 | |
2673 | if (atomic_read(v: &upper_xprt->swapper)) |
2674 | current->flags |= PF_MEMALLOC; |
2675 | |
2676 | xs_stream_start_connect(transport: upper_transport); |
2677 | |
2678 | /* This implicitly sends an RPC_AUTH_TLS probe */ |
2679 | lower_clnt = rpc_create(args: &args); |
2680 | if (IS_ERR(ptr: lower_clnt)) { |
2681 | trace_rpc_tls_unavailable(clnt: upper_clnt, xprt: upper_xprt); |
2682 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2683 | xprt_clear_connecting(xprt: upper_xprt); |
2684 | xprt_wake_pending_tasks(xprt: upper_xprt, status: PTR_ERR(ptr: lower_clnt)); |
2685 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2686 | goto out_unlock; |
2687 | } |
2688 | |
2689 | /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on |
2690 | * the lower xprt. |
2691 | */ |
2692 | rcu_read_lock(); |
2693 | lower_xprt = rcu_dereference(lower_clnt->cl_xprt); |
2694 | rcu_read_unlock(); |
2695 | |
2696 | if (wait_on_bit_lock(word: &lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE)) |
2697 | goto out_unlock; |
2698 | |
2699 | status = xs_tls_handshake_sync(lower_xprt, xprtsec: &upper_xprt->xprtsec); |
2700 | if (status) { |
2701 | trace_rpc_tls_not_started(clnt: upper_clnt, xprt: upper_xprt); |
2702 | goto out_close; |
2703 | } |
2704 | |
2705 | status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); |
2706 | if (status) |
2707 | goto out_close; |
2708 | xprt_release_write(lower_xprt, NULL); |
2709 | |
2710 | trace_rpc_socket_connect(xprt: upper_xprt, socket: upper_transport->sock, error: 0); |
2711 | if (!xprt_test_and_set_connected(xprt: upper_xprt)) { |
2712 | upper_xprt->connect_cookie++; |
2713 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2714 | xprt_clear_connecting(xprt: upper_xprt); |
2715 | |
2716 | upper_xprt->stat.connect_count++; |
2717 | upper_xprt->stat.connect_time += (long)jiffies - |
2718 | upper_xprt->stat.connect_start; |
2719 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2720 | } |
2721 | rpc_shutdown_client(lower_clnt); |
2722 | |
2723 | out_unlock: |
2724 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2725 | upper_transport->clnt = NULL; |
2726 | xprt_unlock_connect(upper_xprt, upper_transport); |
2727 | return; |
2728 | |
2729 | out_close: |
2730 | xprt_release_write(lower_xprt, NULL); |
2731 | rpc_shutdown_client(lower_clnt); |
2732 | |
2733 | /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. |
2734 | * Wake them first here to ensure they get our tk_status code. |
2735 | */ |
2736 | xprt_wake_pending_tasks(xprt: upper_xprt, status); |
2737 | xs_tcp_force_close(xprt: upper_xprt); |
2738 | xprt_clear_connecting(xprt: upper_xprt); |
2739 | goto out_unlock; |
2740 | } |
2741 | |
2742 | /** |
2743 | * xs_connect - connect a socket to a remote endpoint |
2744 | * @xprt: pointer to transport structure |
2745 | * @task: address of RPC task that manages state of connect request |
2746 | * |
2747 | * TCP: If the remote end dropped the connection, delay reconnecting. |
2748 | * |
2749 | * UDP socket connects are synchronous, but we use a work queue anyway |
2750 | * to guarantee that even unprivileged user processes can set up a |
2751 | * socket on a privileged port. |
2752 | * |
2753 | * If a UDP socket connect fails, the delay behavior here prevents |
2754 | * retry floods (hard mounts). |
2755 | */ |
2756 | static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
2757 | { |
2758 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2759 | unsigned long delay = 0; |
2760 | |
2761 | WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); |
2762 | |
2763 | if (transport->sock != NULL) { |
2764 | dprintk("RPC: xs_connect delayed xprt %p for %lu " |
2765 | "seconds\n" , xprt, xprt->reestablish_timeout / HZ); |
2766 | |
2767 | delay = xprt_reconnect_delay(xprt); |
2768 | xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); |
2769 | |
2770 | } else |
2771 | dprintk("RPC: xs_connect scheduled xprt %p\n" , xprt); |
2772 | |
2773 | transport->clnt = task->tk_client; |
2774 | queue_delayed_work(wq: xprtiod_workqueue, |
2775 | dwork: &transport->connect_worker, |
2776 | delay); |
2777 | } |
2778 | |
2779 | static void xs_wake_disconnect(struct sock_xprt *transport) |
2780 | { |
2781 | if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, addr: &transport->sock_state)) |
2782 | xs_tcp_force_close(xprt: &transport->xprt); |
2783 | } |
2784 | |
2785 | static void xs_wake_write(struct sock_xprt *transport) |
2786 | { |
2787 | if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, addr: &transport->sock_state)) |
2788 | xprt_write_space(xprt: &transport->xprt); |
2789 | } |
2790 | |
2791 | static void xs_wake_error(struct sock_xprt *transport) |
2792 | { |
2793 | int sockerr; |
2794 | |
2795 | if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, addr: &transport->sock_state)) |
2796 | return; |
2797 | sockerr = xchg(&transport->xprt_err, 0); |
2798 | if (sockerr < 0) { |
2799 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: sockerr); |
2800 | xs_tcp_force_close(xprt: &transport->xprt); |
2801 | } |
2802 | } |
2803 | |
2804 | static void xs_wake_pending(struct sock_xprt *transport) |
2805 | { |
2806 | if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, addr: &transport->sock_state)) |
2807 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: -EAGAIN); |
2808 | } |
2809 | |
2810 | static void xs_error_handle(struct work_struct *work) |
2811 | { |
2812 | struct sock_xprt *transport = container_of(work, |
2813 | struct sock_xprt, error_worker); |
2814 | |
2815 | xs_wake_disconnect(transport); |
2816 | xs_wake_write(transport); |
2817 | xs_wake_error(transport); |
2818 | xs_wake_pending(transport); |
2819 | } |
2820 | |
2821 | /** |
2822 | * xs_local_print_stats - display AF_LOCAL socket-specific stats |
2823 | * @xprt: rpc_xprt struct containing statistics |
2824 | * @seq: output file |
2825 | * |
2826 | */ |
2827 | static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2828 | { |
2829 | long idle_time = 0; |
2830 | |
2831 | if (xprt_connected(xprt)) |
2832 | idle_time = (long)(jiffies - xprt->last_used) / HZ; |
2833 | |
2834 | seq_printf(m: seq, fmt: "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " |
2835 | "%llu %llu %lu %llu %llu\n" , |
2836 | xprt->stat.bind_count, |
2837 | xprt->stat.connect_count, |
2838 | xprt->stat.connect_time / HZ, |
2839 | idle_time, |
2840 | xprt->stat.sends, |
2841 | xprt->stat.recvs, |
2842 | xprt->stat.bad_xids, |
2843 | xprt->stat.req_u, |
2844 | xprt->stat.bklog_u, |
2845 | xprt->stat.max_slots, |
2846 | xprt->stat.sending_u, |
2847 | xprt->stat.pending_u); |
2848 | } |
2849 | |
2850 | /** |
2851 | * xs_udp_print_stats - display UDP socket-specific stats |
2852 | * @xprt: rpc_xprt struct containing statistics |
2853 | * @seq: output file |
2854 | * |
2855 | */ |
2856 | static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2857 | { |
2858 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2859 | |
2860 | seq_printf(m: seq, fmt: "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " |
2861 | "%lu %llu %llu\n" , |
2862 | transport->srcport, |
2863 | xprt->stat.bind_count, |
2864 | xprt->stat.sends, |
2865 | xprt->stat.recvs, |
2866 | xprt->stat.bad_xids, |
2867 | xprt->stat.req_u, |
2868 | xprt->stat.bklog_u, |
2869 | xprt->stat.max_slots, |
2870 | xprt->stat.sending_u, |
2871 | xprt->stat.pending_u); |
2872 | } |
2873 | |
2874 | /** |
2875 | * xs_tcp_print_stats - display TCP socket-specific stats |
2876 | * @xprt: rpc_xprt struct containing statistics |
2877 | * @seq: output file |
2878 | * |
2879 | */ |
2880 | static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2881 | { |
2882 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2883 | long idle_time = 0; |
2884 | |
2885 | if (xprt_connected(xprt)) |
2886 | idle_time = (long)(jiffies - xprt->last_used) / HZ; |
2887 | |
2888 | seq_printf(m: seq, fmt: "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " |
2889 | "%llu %llu %lu %llu %llu\n" , |
2890 | transport->srcport, |
2891 | xprt->stat.bind_count, |
2892 | xprt->stat.connect_count, |
2893 | xprt->stat.connect_time / HZ, |
2894 | idle_time, |
2895 | xprt->stat.sends, |
2896 | xprt->stat.recvs, |
2897 | xprt->stat.bad_xids, |
2898 | xprt->stat.req_u, |
2899 | xprt->stat.bklog_u, |
2900 | xprt->stat.max_slots, |
2901 | xprt->stat.sending_u, |
2902 | xprt->stat.pending_u); |
2903 | } |
2904 | |
2905 | /* |
2906 | * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason |
2907 | * we allocate pages instead doing a kmalloc like rpc_malloc is because we want |
2908 | * to use the server side send routines. |
2909 | */ |
2910 | static int bc_malloc(struct rpc_task *task) |
2911 | { |
2912 | struct rpc_rqst *rqst = task->tk_rqstp; |
2913 | size_t size = rqst->rq_callsize; |
2914 | struct page *page; |
2915 | struct rpc_buffer *buf; |
2916 | |
2917 | if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) { |
2918 | WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n" , |
2919 | size); |
2920 | return -EINVAL; |
2921 | } |
2922 | |
2923 | page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
2924 | if (!page) |
2925 | return -ENOMEM; |
2926 | |
2927 | buf = page_address(page); |
2928 | buf->len = PAGE_SIZE; |
2929 | |
2930 | rqst->rq_buffer = buf->data; |
2931 | rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; |
2932 | return 0; |
2933 | } |
2934 | |
2935 | /* |
2936 | * Free the space allocated in the bc_alloc routine |
2937 | */ |
2938 | static void bc_free(struct rpc_task *task) |
2939 | { |
2940 | void *buffer = task->tk_rqstp->rq_buffer; |
2941 | struct rpc_buffer *buf; |
2942 | |
2943 | buf = container_of(buffer, struct rpc_buffer, data); |
2944 | free_page((unsigned long)buf); |
2945 | } |
2946 | |
2947 | static int bc_sendto(struct rpc_rqst *req) |
2948 | { |
2949 | struct xdr_buf *xdr = &req->rq_snd_buf; |
2950 | struct sock_xprt *transport = |
2951 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
2952 | struct msghdr msg = { |
2953 | .msg_flags = 0, |
2954 | }; |
2955 | rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | |
2956 | (u32)xdr->len); |
2957 | unsigned int sent = 0; |
2958 | int err; |
2959 | |
2960 | req->rq_xtime = ktime_get(); |
2961 | err = xdr_alloc_bvec(buf: xdr, gfp: rpc_task_gfp_mask()); |
2962 | if (err < 0) |
2963 | return err; |
2964 | err = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, base: 0, marker, sent_p: &sent); |
2965 | xdr_free_bvec(buf: xdr); |
2966 | if (err < 0 || sent != (xdr->len + sizeof(marker))) |
2967 | return -EAGAIN; |
2968 | return sent; |
2969 | } |
2970 | |
2971 | /** |
2972 | * bc_send_request - Send a backchannel Call on a TCP socket |
2973 | * @req: rpc_rqst containing Call message to be sent |
2974 | * |
2975 | * xpt_mutex ensures @rqstp's whole message is written to the socket |
2976 | * without interruption. |
2977 | * |
2978 | * Return values: |
2979 | * %0 if the message was sent successfully |
2980 | * %ENOTCONN if the message was not sent |
2981 | */ |
2982 | static int bc_send_request(struct rpc_rqst *req) |
2983 | { |
2984 | struct svc_xprt *xprt; |
2985 | int len; |
2986 | |
2987 | /* |
2988 | * Get the server socket associated with this callback xprt |
2989 | */ |
2990 | xprt = req->rq_xprt->bc_xprt; |
2991 | |
2992 | /* |
2993 | * Grab the mutex to serialize data as the connection is shared |
2994 | * with the fore channel |
2995 | */ |
2996 | mutex_lock(&xprt->xpt_mutex); |
2997 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) |
2998 | len = -ENOTCONN; |
2999 | else |
3000 | len = bc_sendto(req); |
3001 | mutex_unlock(lock: &xprt->xpt_mutex); |
3002 | |
3003 | if (len > 0) |
3004 | len = 0; |
3005 | |
3006 | return len; |
3007 | } |
3008 | |
3009 | static void bc_close(struct rpc_xprt *xprt) |
3010 | { |
3011 | xprt_disconnect_done(xprt); |
3012 | } |
3013 | |
3014 | static void bc_destroy(struct rpc_xprt *xprt) |
3015 | { |
3016 | dprintk("RPC: bc_destroy xprt %p\n" , xprt); |
3017 | |
3018 | xs_xprt_free(xprt); |
3019 | module_put(THIS_MODULE); |
3020 | } |
3021 | |
3022 | static const struct rpc_xprt_ops xs_local_ops = { |
3023 | .reserve_xprt = xprt_reserve_xprt, |
3024 | .release_xprt = xprt_release_xprt, |
3025 | .alloc_slot = xprt_alloc_slot, |
3026 | .free_slot = xprt_free_slot, |
3027 | .rpcbind = xs_local_rpcbind, |
3028 | .set_port = xs_local_set_port, |
3029 | .connect = xs_local_connect, |
3030 | .buf_alloc = rpc_malloc, |
3031 | .buf_free = rpc_free, |
3032 | .prepare_request = xs_stream_prepare_request, |
3033 | .send_request = xs_local_send_request, |
3034 | .abort_send_request = xs_stream_abort_send_request, |
3035 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3036 | .close = xs_close, |
3037 | .destroy = xs_destroy, |
3038 | .print_stats = xs_local_print_stats, |
3039 | .enable_swap = xs_enable_swap, |
3040 | .disable_swap = xs_disable_swap, |
3041 | }; |
3042 | |
3043 | static const struct rpc_xprt_ops xs_udp_ops = { |
3044 | .set_buffer_size = xs_udp_set_buffer_size, |
3045 | .reserve_xprt = xprt_reserve_xprt_cong, |
3046 | .release_xprt = xprt_release_xprt_cong, |
3047 | .alloc_slot = xprt_alloc_slot, |
3048 | .free_slot = xprt_free_slot, |
3049 | .rpcbind = rpcb_getport_async, |
3050 | .set_port = xs_set_port, |
3051 | .connect = xs_connect, |
3052 | .get_srcaddr = xs_sock_srcaddr, |
3053 | .get_srcport = xs_sock_srcport, |
3054 | .buf_alloc = rpc_malloc, |
3055 | .buf_free = rpc_free, |
3056 | .send_request = xs_udp_send_request, |
3057 | .wait_for_reply_request = xprt_wait_for_reply_request_rtt, |
3058 | .timer = xs_udp_timer, |
3059 | .release_request = xprt_release_rqst_cong, |
3060 | .close = xs_close, |
3061 | .destroy = xs_destroy, |
3062 | .print_stats = xs_udp_print_stats, |
3063 | .enable_swap = xs_enable_swap, |
3064 | .disable_swap = xs_disable_swap, |
3065 | .inject_disconnect = xs_inject_disconnect, |
3066 | }; |
3067 | |
3068 | static const struct rpc_xprt_ops xs_tcp_ops = { |
3069 | .reserve_xprt = xprt_reserve_xprt, |
3070 | .release_xprt = xprt_release_xprt, |
3071 | .alloc_slot = xprt_alloc_slot, |
3072 | .free_slot = xprt_free_slot, |
3073 | .rpcbind = rpcb_getport_async, |
3074 | .set_port = xs_set_port, |
3075 | .connect = xs_connect, |
3076 | .get_srcaddr = xs_sock_srcaddr, |
3077 | .get_srcport = xs_sock_srcport, |
3078 | .buf_alloc = rpc_malloc, |
3079 | .buf_free = rpc_free, |
3080 | .prepare_request = xs_stream_prepare_request, |
3081 | .send_request = xs_tcp_send_request, |
3082 | .abort_send_request = xs_stream_abort_send_request, |
3083 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3084 | .close = xs_tcp_shutdown, |
3085 | .destroy = xs_destroy, |
3086 | .set_connect_timeout = xs_tcp_set_connect_timeout, |
3087 | .print_stats = xs_tcp_print_stats, |
3088 | .enable_swap = xs_enable_swap, |
3089 | .disable_swap = xs_disable_swap, |
3090 | .inject_disconnect = xs_inject_disconnect, |
3091 | #ifdef CONFIG_SUNRPC_BACKCHANNEL |
3092 | .bc_setup = xprt_setup_bc, |
3093 | .bc_maxpayload = xs_tcp_bc_maxpayload, |
3094 | .bc_num_slots = xprt_bc_max_slots, |
3095 | .bc_free_rqst = xprt_free_bc_rqst, |
3096 | .bc_destroy = xprt_destroy_bc, |
3097 | #endif |
3098 | }; |
3099 | |
3100 | /* |
3101 | * The rpc_xprt_ops for the server backchannel |
3102 | */ |
3103 | |
3104 | static const struct rpc_xprt_ops bc_tcp_ops = { |
3105 | .reserve_xprt = xprt_reserve_xprt, |
3106 | .release_xprt = xprt_release_xprt, |
3107 | .alloc_slot = xprt_alloc_slot, |
3108 | .free_slot = xprt_free_slot, |
3109 | .buf_alloc = bc_malloc, |
3110 | .buf_free = bc_free, |
3111 | .send_request = bc_send_request, |
3112 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3113 | .close = bc_close, |
3114 | .destroy = bc_destroy, |
3115 | .print_stats = xs_tcp_print_stats, |
3116 | .enable_swap = xs_enable_swap, |
3117 | .disable_swap = xs_disable_swap, |
3118 | .inject_disconnect = xs_inject_disconnect, |
3119 | }; |
3120 | |
3121 | static int xs_init_anyaddr(const int family, struct sockaddr *sap) |
3122 | { |
3123 | static const struct sockaddr_in sin = { |
3124 | .sin_family = AF_INET, |
3125 | .sin_addr.s_addr = htonl(INADDR_ANY), |
3126 | }; |
3127 | static const struct sockaddr_in6 sin6 = { |
3128 | .sin6_family = AF_INET6, |
3129 | .sin6_addr = IN6ADDR_ANY_INIT, |
3130 | }; |
3131 | |
3132 | switch (family) { |
3133 | case AF_LOCAL: |
3134 | break; |
3135 | case AF_INET: |
3136 | memcpy(sap, &sin, sizeof(sin)); |
3137 | break; |
3138 | case AF_INET6: |
3139 | memcpy(sap, &sin6, sizeof(sin6)); |
3140 | break; |
3141 | default: |
3142 | dprintk("RPC: %s: Bad address family\n" , __func__); |
3143 | return -EAFNOSUPPORT; |
3144 | } |
3145 | return 0; |
3146 | } |
3147 | |
3148 | static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, |
3149 | unsigned int slot_table_size, |
3150 | unsigned int max_slot_table_size) |
3151 | { |
3152 | struct rpc_xprt *xprt; |
3153 | struct sock_xprt *new; |
3154 | |
3155 | if (args->addrlen > sizeof(xprt->addr)) { |
3156 | dprintk("RPC: xs_setup_xprt: address too large\n" ); |
3157 | return ERR_PTR(error: -EBADF); |
3158 | } |
3159 | |
3160 | xprt = xprt_alloc(net: args->net, size: sizeof(*new), num_prealloc: slot_table_size, |
3161 | max_req: max_slot_table_size); |
3162 | if (xprt == NULL) { |
3163 | dprintk("RPC: xs_setup_xprt: couldn't allocate " |
3164 | "rpc_xprt\n" ); |
3165 | return ERR_PTR(error: -ENOMEM); |
3166 | } |
3167 | |
3168 | new = container_of(xprt, struct sock_xprt, xprt); |
3169 | mutex_init(&new->recv_mutex); |
3170 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); |
3171 | xprt->addrlen = args->addrlen; |
3172 | if (args->srcaddr) |
3173 | memcpy(&new->srcaddr, args->srcaddr, args->addrlen); |
3174 | else { |
3175 | int err; |
3176 | err = xs_init_anyaddr(family: args->dstaddr->sa_family, |
3177 | sap: (struct sockaddr *)&new->srcaddr); |
3178 | if (err != 0) { |
3179 | xprt_free(xprt); |
3180 | return ERR_PTR(error: err); |
3181 | } |
3182 | } |
3183 | |
3184 | return xprt; |
3185 | } |
3186 | |
3187 | static const struct rpc_timeout xs_local_default_timeout = { |
3188 | .to_initval = 10 * HZ, |
3189 | .to_maxval = 10 * HZ, |
3190 | .to_retries = 2, |
3191 | }; |
3192 | |
3193 | /** |
3194 | * xs_setup_local - Set up transport to use an AF_LOCAL socket |
3195 | * @args: rpc transport creation arguments |
3196 | * |
3197 | * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP |
3198 | */ |
3199 | static struct rpc_xprt *xs_setup_local(struct xprt_create *args) |
3200 | { |
3201 | struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; |
3202 | struct sock_xprt *transport; |
3203 | struct rpc_xprt *xprt; |
3204 | struct rpc_xprt *ret; |
3205 | |
3206 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3207 | max_slot_table_size: xprt_max_tcp_slot_table_entries); |
3208 | if (IS_ERR(ptr: xprt)) |
3209 | return xprt; |
3210 | transport = container_of(xprt, struct sock_xprt, xprt); |
3211 | |
3212 | xprt->prot = 0; |
3213 | xprt->xprt_class = &xs_local_transport; |
3214 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3215 | |
3216 | xprt->bind_timeout = XS_BIND_TO; |
3217 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3218 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3219 | |
3220 | xprt->ops = &xs_local_ops; |
3221 | xprt->timeout = &xs_local_default_timeout; |
3222 | |
3223 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3224 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3225 | INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); |
3226 | |
3227 | switch (sun->sun_family) { |
3228 | case AF_LOCAL: |
3229 | if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') { |
3230 | dprintk("RPC: bad AF_LOCAL address: %s\n" , |
3231 | sun->sun_path); |
3232 | ret = ERR_PTR(error: -EINVAL); |
3233 | goto out_err; |
3234 | } |
3235 | xprt_set_bound(xprt); |
3236 | xs_format_peer_addresses(xprt, protocol: "local" , RPCBIND_NETID_LOCAL); |
3237 | break; |
3238 | default: |
3239 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3240 | goto out_err; |
3241 | } |
3242 | |
3243 | dprintk("RPC: set up xprt to %s via AF_LOCAL\n" , |
3244 | xprt->address_strings[RPC_DISPLAY_ADDR]); |
3245 | |
3246 | if (try_module_get(THIS_MODULE)) |
3247 | return xprt; |
3248 | ret = ERR_PTR(error: -EINVAL); |
3249 | out_err: |
3250 | xs_xprt_free(xprt); |
3251 | return ret; |
3252 | } |
3253 | |
3254 | static const struct rpc_timeout xs_udp_default_timeout = { |
3255 | .to_initval = 5 * HZ, |
3256 | .to_maxval = 30 * HZ, |
3257 | .to_increment = 5 * HZ, |
3258 | .to_retries = 5, |
3259 | }; |
3260 | |
3261 | /** |
3262 | * xs_setup_udp - Set up transport to use a UDP socket |
3263 | * @args: rpc transport creation arguments |
3264 | * |
3265 | */ |
3266 | static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) |
3267 | { |
3268 | struct sockaddr *addr = args->dstaddr; |
3269 | struct rpc_xprt *xprt; |
3270 | struct sock_xprt *transport; |
3271 | struct rpc_xprt *ret; |
3272 | |
3273 | xprt = xs_setup_xprt(args, slot_table_size: xprt_udp_slot_table_entries, |
3274 | max_slot_table_size: xprt_udp_slot_table_entries); |
3275 | if (IS_ERR(ptr: xprt)) |
3276 | return xprt; |
3277 | transport = container_of(xprt, struct sock_xprt, xprt); |
3278 | |
3279 | xprt->prot = IPPROTO_UDP; |
3280 | xprt->xprt_class = &xs_udp_transport; |
3281 | /* XXX: header size can vary due to auth type, IPv6, etc. */ |
3282 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); |
3283 | |
3284 | xprt->bind_timeout = XS_BIND_TO; |
3285 | xprt->reestablish_timeout = XS_UDP_REEST_TO; |
3286 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3287 | |
3288 | xprt->ops = &xs_udp_ops; |
3289 | |
3290 | xprt->timeout = &xs_udp_default_timeout; |
3291 | |
3292 | INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); |
3293 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3294 | INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); |
3295 | |
3296 | switch (addr->sa_family) { |
3297 | case AF_INET: |
3298 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3299 | xprt_set_bound(xprt); |
3300 | |
3301 | xs_format_peer_addresses(xprt, protocol: "udp" , RPCBIND_NETID_UDP); |
3302 | break; |
3303 | case AF_INET6: |
3304 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3305 | xprt_set_bound(xprt); |
3306 | |
3307 | xs_format_peer_addresses(xprt, protocol: "udp" , RPCBIND_NETID_UDP6); |
3308 | break; |
3309 | default: |
3310 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3311 | goto out_err; |
3312 | } |
3313 | |
3314 | if (xprt_bound(xprt)) |
3315 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3316 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3317 | xprt->address_strings[RPC_DISPLAY_PORT], |
3318 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3319 | else |
3320 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3321 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3322 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3323 | |
3324 | if (try_module_get(THIS_MODULE)) |
3325 | return xprt; |
3326 | ret = ERR_PTR(error: -EINVAL); |
3327 | out_err: |
3328 | xs_xprt_free(xprt); |
3329 | return ret; |
3330 | } |
3331 | |
3332 | static const struct rpc_timeout xs_tcp_default_timeout = { |
3333 | .to_initval = 60 * HZ, |
3334 | .to_maxval = 60 * HZ, |
3335 | .to_retries = 2, |
3336 | }; |
3337 | |
3338 | /** |
3339 | * xs_setup_tcp - Set up transport to use a TCP socket |
3340 | * @args: rpc transport creation arguments |
3341 | * |
3342 | */ |
3343 | static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) |
3344 | { |
3345 | struct sockaddr *addr = args->dstaddr; |
3346 | struct rpc_xprt *xprt; |
3347 | struct sock_xprt *transport; |
3348 | struct rpc_xprt *ret; |
3349 | unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; |
3350 | |
3351 | if (args->flags & XPRT_CREATE_INFINITE_SLOTS) |
3352 | max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; |
3353 | |
3354 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3355 | max_slot_table_size); |
3356 | if (IS_ERR(ptr: xprt)) |
3357 | return xprt; |
3358 | transport = container_of(xprt, struct sock_xprt, xprt); |
3359 | |
3360 | xprt->prot = IPPROTO_TCP; |
3361 | xprt->xprt_class = &xs_tcp_transport; |
3362 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3363 | |
3364 | xprt->bind_timeout = XS_BIND_TO; |
3365 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3366 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3367 | |
3368 | xprt->ops = &xs_tcp_ops; |
3369 | xprt->timeout = &xs_tcp_default_timeout; |
3370 | |
3371 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
3372 | if (args->reconnect_timeout) |
3373 | xprt->max_reconnect_timeout = args->reconnect_timeout; |
3374 | |
3375 | xprt->connect_timeout = xprt->timeout->to_initval * |
3376 | (xprt->timeout->to_retries + 1); |
3377 | if (args->connect_timeout) |
3378 | xs_tcp_do_set_connect_timeout(xprt, connect_timeout: args->connect_timeout); |
3379 | |
3380 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3381 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3382 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); |
3383 | |
3384 | switch (addr->sa_family) { |
3385 | case AF_INET: |
3386 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3387 | xprt_set_bound(xprt); |
3388 | |
3389 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP); |
3390 | break; |
3391 | case AF_INET6: |
3392 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3393 | xprt_set_bound(xprt); |
3394 | |
3395 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP6); |
3396 | break; |
3397 | default: |
3398 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3399 | goto out_err; |
3400 | } |
3401 | |
3402 | if (xprt_bound(xprt)) |
3403 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3404 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3405 | xprt->address_strings[RPC_DISPLAY_PORT], |
3406 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3407 | else |
3408 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3409 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3410 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3411 | |
3412 | if (try_module_get(THIS_MODULE)) |
3413 | return xprt; |
3414 | ret = ERR_PTR(error: -EINVAL); |
3415 | out_err: |
3416 | xs_xprt_free(xprt); |
3417 | return ret; |
3418 | } |
3419 | |
3420 | /** |
3421 | * xs_setup_tcp_tls - Set up transport to use a TCP with TLS |
3422 | * @args: rpc transport creation arguments |
3423 | * |
3424 | */ |
3425 | static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args) |
3426 | { |
3427 | struct sockaddr *addr = args->dstaddr; |
3428 | struct rpc_xprt *xprt; |
3429 | struct sock_xprt *transport; |
3430 | struct rpc_xprt *ret; |
3431 | unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; |
3432 | |
3433 | if (args->flags & XPRT_CREATE_INFINITE_SLOTS) |
3434 | max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; |
3435 | |
3436 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3437 | max_slot_table_size); |
3438 | if (IS_ERR(ptr: xprt)) |
3439 | return xprt; |
3440 | transport = container_of(xprt, struct sock_xprt, xprt); |
3441 | |
3442 | xprt->prot = IPPROTO_TCP; |
3443 | xprt->xprt_class = &xs_tcp_transport; |
3444 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3445 | |
3446 | xprt->bind_timeout = XS_BIND_TO; |
3447 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3448 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3449 | |
3450 | xprt->ops = &xs_tcp_ops; |
3451 | xprt->timeout = &xs_tcp_default_timeout; |
3452 | |
3453 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
3454 | xprt->connect_timeout = xprt->timeout->to_initval * |
3455 | (xprt->timeout->to_retries + 1); |
3456 | |
3457 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3458 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3459 | |
3460 | switch (args->xprtsec.policy) { |
3461 | case RPC_XPRTSEC_TLS_ANON: |
3462 | case RPC_XPRTSEC_TLS_X509: |
3463 | xprt->xprtsec = args->xprtsec; |
3464 | INIT_DELAYED_WORK(&transport->connect_worker, |
3465 | xs_tcp_tls_setup_socket); |
3466 | break; |
3467 | default: |
3468 | ret = ERR_PTR(error: -EACCES); |
3469 | goto out_err; |
3470 | } |
3471 | |
3472 | switch (addr->sa_family) { |
3473 | case AF_INET: |
3474 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3475 | xprt_set_bound(xprt); |
3476 | |
3477 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP); |
3478 | break; |
3479 | case AF_INET6: |
3480 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3481 | xprt_set_bound(xprt); |
3482 | |
3483 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP6); |
3484 | break; |
3485 | default: |
3486 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3487 | goto out_err; |
3488 | } |
3489 | |
3490 | if (xprt_bound(xprt)) |
3491 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3492 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3493 | xprt->address_strings[RPC_DISPLAY_PORT], |
3494 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3495 | else |
3496 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3497 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3498 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3499 | |
3500 | if (try_module_get(THIS_MODULE)) |
3501 | return xprt; |
3502 | ret = ERR_PTR(error: -EINVAL); |
3503 | out_err: |
3504 | xs_xprt_free(xprt); |
3505 | return ret; |
3506 | } |
3507 | |
3508 | /** |
3509 | * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket |
3510 | * @args: rpc transport creation arguments |
3511 | * |
3512 | */ |
3513 | static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) |
3514 | { |
3515 | struct sockaddr *addr = args->dstaddr; |
3516 | struct rpc_xprt *xprt; |
3517 | struct sock_xprt *transport; |
3518 | struct svc_sock *bc_sock; |
3519 | struct rpc_xprt *ret; |
3520 | |
3521 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3522 | max_slot_table_size: xprt_tcp_slot_table_entries); |
3523 | if (IS_ERR(ptr: xprt)) |
3524 | return xprt; |
3525 | transport = container_of(xprt, struct sock_xprt, xprt); |
3526 | |
3527 | xprt->prot = IPPROTO_TCP; |
3528 | xprt->xprt_class = &xs_bc_tcp_transport; |
3529 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3530 | xprt->timeout = &xs_tcp_default_timeout; |
3531 | |
3532 | /* backchannel */ |
3533 | xprt_set_bound(xprt); |
3534 | xprt->bind_timeout = 0; |
3535 | xprt->reestablish_timeout = 0; |
3536 | xprt->idle_timeout = 0; |
3537 | |
3538 | xprt->ops = &bc_tcp_ops; |
3539 | |
3540 | switch (addr->sa_family) { |
3541 | case AF_INET: |
3542 | xs_format_peer_addresses(xprt, protocol: "tcp" , |
3543 | RPCBIND_NETID_TCP); |
3544 | break; |
3545 | case AF_INET6: |
3546 | xs_format_peer_addresses(xprt, protocol: "tcp" , |
3547 | RPCBIND_NETID_TCP6); |
3548 | break; |
3549 | default: |
3550 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3551 | goto out_err; |
3552 | } |
3553 | |
3554 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3555 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3556 | xprt->address_strings[RPC_DISPLAY_PORT], |
3557 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3558 | |
3559 | /* |
3560 | * Once we've associated a backchannel xprt with a connection, |
3561 | * we want to keep it around as long as the connection lasts, |
3562 | * in case we need to start using it for a backchannel again; |
3563 | * this reference won't be dropped until bc_xprt is destroyed. |
3564 | */ |
3565 | xprt_get(xprt); |
3566 | args->bc_xprt->xpt_bc_xprt = xprt; |
3567 | xprt->bc_xprt = args->bc_xprt; |
3568 | bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); |
3569 | transport->sock = bc_sock->sk_sock; |
3570 | transport->inet = bc_sock->sk_sk; |
3571 | |
3572 | /* |
3573 | * Since we don't want connections for the backchannel, we set |
3574 | * the xprt status to connected |
3575 | */ |
3576 | xprt_set_connected(xprt); |
3577 | |
3578 | if (try_module_get(THIS_MODULE)) |
3579 | return xprt; |
3580 | |
3581 | args->bc_xprt->xpt_bc_xprt = NULL; |
3582 | args->bc_xprt->xpt_bc_xps = NULL; |
3583 | xprt_put(xprt); |
3584 | ret = ERR_PTR(error: -EINVAL); |
3585 | out_err: |
3586 | xs_xprt_free(xprt); |
3587 | return ret; |
3588 | } |
3589 | |
3590 | static struct xprt_class xs_local_transport = { |
3591 | .list = LIST_HEAD_INIT(xs_local_transport.list), |
3592 | .name = "named UNIX socket" , |
3593 | .owner = THIS_MODULE, |
3594 | .ident = XPRT_TRANSPORT_LOCAL, |
3595 | .setup = xs_setup_local, |
3596 | .netid = { "" }, |
3597 | }; |
3598 | |
3599 | static struct xprt_class xs_udp_transport = { |
3600 | .list = LIST_HEAD_INIT(xs_udp_transport.list), |
3601 | .name = "udp" , |
3602 | .owner = THIS_MODULE, |
3603 | .ident = XPRT_TRANSPORT_UDP, |
3604 | .setup = xs_setup_udp, |
3605 | .netid = { "udp" , "udp6" , "" }, |
3606 | }; |
3607 | |
3608 | static struct xprt_class xs_tcp_transport = { |
3609 | .list = LIST_HEAD_INIT(xs_tcp_transport.list), |
3610 | .name = "tcp" , |
3611 | .owner = THIS_MODULE, |
3612 | .ident = XPRT_TRANSPORT_TCP, |
3613 | .setup = xs_setup_tcp, |
3614 | .netid = { "tcp" , "tcp6" , "" }, |
3615 | }; |
3616 | |
3617 | static struct xprt_class xs_tcp_tls_transport = { |
3618 | .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list), |
3619 | .name = "tcp-with-tls" , |
3620 | .owner = THIS_MODULE, |
3621 | .ident = XPRT_TRANSPORT_TCP_TLS, |
3622 | .setup = xs_setup_tcp_tls, |
3623 | .netid = { "tcp" , "tcp6" , "" }, |
3624 | }; |
3625 | |
3626 | static struct xprt_class xs_bc_tcp_transport = { |
3627 | .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), |
3628 | .name = "tcp NFSv4.1 backchannel" , |
3629 | .owner = THIS_MODULE, |
3630 | .ident = XPRT_TRANSPORT_BC_TCP, |
3631 | .setup = xs_setup_bc_tcp, |
3632 | .netid = { "" }, |
3633 | }; |
3634 | |
3635 | /** |
3636 | * init_socket_xprt - set up xprtsock's sysctls, register with RPC client |
3637 | * |
3638 | */ |
3639 | int init_socket_xprt(void) |
3640 | { |
3641 | if (!sunrpc_table_header) |
3642 | sunrpc_table_header = register_sysctl("sunrpc" , xs_tunables_table); |
3643 | |
3644 | xprt_register_transport(type: &xs_local_transport); |
3645 | xprt_register_transport(type: &xs_udp_transport); |
3646 | xprt_register_transport(type: &xs_tcp_transport); |
3647 | xprt_register_transport(type: &xs_tcp_tls_transport); |
3648 | xprt_register_transport(type: &xs_bc_tcp_transport); |
3649 | |
3650 | return 0; |
3651 | } |
3652 | |
3653 | /** |
3654 | * cleanup_socket_xprt - remove xprtsock's sysctls, unregister |
3655 | * |
3656 | */ |
3657 | void cleanup_socket_xprt(void) |
3658 | { |
3659 | if (sunrpc_table_header) { |
3660 | unregister_sysctl_table(table: sunrpc_table_header); |
3661 | sunrpc_table_header = NULL; |
3662 | } |
3663 | |
3664 | xprt_unregister_transport(type: &xs_local_transport); |
3665 | xprt_unregister_transport(type: &xs_udp_transport); |
3666 | xprt_unregister_transport(type: &xs_tcp_transport); |
3667 | xprt_unregister_transport(type: &xs_tcp_tls_transport); |
3668 | xprt_unregister_transport(type: &xs_bc_tcp_transport); |
3669 | } |
3670 | |
3671 | static int param_set_portnr(const char *val, const struct kernel_param *kp) |
3672 | { |
3673 | return param_set_uint_minmax(val, kp, |
3674 | RPC_MIN_RESVPORT, |
3675 | RPC_MAX_RESVPORT); |
3676 | } |
3677 | |
3678 | static const struct kernel_param_ops param_ops_portnr = { |
3679 | .set = param_set_portnr, |
3680 | .get = param_get_uint, |
3681 | }; |
3682 | |
3683 | #define param_check_portnr(name, p) \ |
3684 | __param_check(name, p, unsigned int); |
3685 | |
3686 | module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); |
3687 | module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); |
3688 | |
3689 | static int param_set_slot_table_size(const char *val, |
3690 | const struct kernel_param *kp) |
3691 | { |
3692 | return param_set_uint_minmax(val, kp, |
3693 | RPC_MIN_SLOT_TABLE, |
3694 | RPC_MAX_SLOT_TABLE); |
3695 | } |
3696 | |
3697 | static const struct kernel_param_ops param_ops_slot_table_size = { |
3698 | .set = param_set_slot_table_size, |
3699 | .get = param_get_uint, |
3700 | }; |
3701 | |
3702 | #define param_check_slot_table_size(name, p) \ |
3703 | __param_check(name, p, unsigned int); |
3704 | |
3705 | static int param_set_max_slot_table_size(const char *val, |
3706 | const struct kernel_param *kp) |
3707 | { |
3708 | return param_set_uint_minmax(val, kp, |
3709 | RPC_MIN_SLOT_TABLE, |
3710 | RPC_MAX_SLOT_TABLE_LIMIT); |
3711 | } |
3712 | |
3713 | static const struct kernel_param_ops param_ops_max_slot_table_size = { |
3714 | .set = param_set_max_slot_table_size, |
3715 | .get = param_get_uint, |
3716 | }; |
3717 | |
3718 | #define param_check_max_slot_table_size(name, p) \ |
3719 | __param_check(name, p, unsigned int); |
3720 | |
3721 | module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, |
3722 | slot_table_size, 0644); |
3723 | module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, |
3724 | max_slot_table_size, 0644); |
3725 | module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, |
3726 | slot_table_size, 0644); |
3727 | |