1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP |
3 | * |
4 | * Copyright (c) 2021, Red Hat. |
5 | */ |
6 | |
7 | #define pr_fmt(fmt) "MPTCP: " fmt |
8 | |
9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> |
11 | #include <net/sock.h> |
12 | #include <net/protocol.h> |
13 | #include <net/tcp.h> |
14 | #include <net/mptcp.h> |
15 | #include "protocol.h" |
16 | |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 |
19 | |
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
21 | { |
22 | msk_owned_by_me(msk); |
23 | |
24 | if (likely(!__mptcp_check_fallback(msk))) |
25 | return NULL; |
26 | |
27 | return msk->first; |
28 | } |
29 | |
30 | static u32 sockopt_seq_reset(const struct sock *sk) |
31 | { |
32 | sock_owned_by_me(sk); |
33 | |
34 | /* Highbits contain state. Allows to distinguish sockopt_seq |
35 | * of listener and established: |
36 | * s0 = new_listener() |
37 | * sockopt(s0) - seq is 1 |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) |
39 | * sockopt(s0) - seq increments to 2 on s0 |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) |
41 | * new ssk completes join, inherits options from s0 // seq 2 |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq |
43 | * |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test |
45 | * will fail. |
46 | */ |
47 | |
48 | return (u32)sk->sk_state << 24u; |
49 | } |
50 | |
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
52 | { |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; |
54 | |
55 | msk->setsockopt_seq = sockopt_seq_reset(sk: (struct sock *)msk) + seq; |
56 | } |
57 | |
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, |
59 | unsigned int optlen, int *val) |
60 | { |
61 | if (optlen < sizeof(int)) |
62 | return -EINVAL; |
63 | |
64 | if (copy_from_sockptr(dst: val, src: optval, size: sizeof(*val))) |
65 | return -EFAULT; |
66 | |
67 | return 0; |
68 | } |
69 | |
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) |
71 | { |
72 | struct mptcp_subflow_context *subflow; |
73 | struct sock *sk = (struct sock *)msk; |
74 | |
75 | lock_sock(sk); |
76 | sockopt_seq_inc(msk); |
77 | |
78 | mptcp_for_each_subflow(msk, subflow) { |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
80 | bool slow = lock_sock_fast(sk: ssk); |
81 | |
82 | switch (optname) { |
83 | case SO_DEBUG: |
84 | sock_valbool_flag(sk: ssk, bit: SOCK_DBG, valbool: !!val); |
85 | break; |
86 | case SO_KEEPALIVE: |
87 | if (ssk->sk_prot->keepalive) |
88 | ssk->sk_prot->keepalive(ssk, !!val); |
89 | sock_valbool_flag(sk: ssk, bit: SOCK_KEEPOPEN, valbool: !!val); |
90 | break; |
91 | case SO_PRIORITY: |
92 | WRITE_ONCE(ssk->sk_priority, val); |
93 | break; |
94 | case SO_SNDBUF: |
95 | case SO_SNDBUFFORCE: |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
98 | mptcp_subflow_ctx(sk: ssk)->cached_sndbuf = sk->sk_sndbuf; |
99 | break; |
100 | case SO_RCVBUF: |
101 | case SO_RCVBUFFORCE: |
102 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
103 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
104 | break; |
105 | case SO_MARK: |
106 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { |
107 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); |
108 | sk_dst_reset(sk: ssk); |
109 | } |
110 | break; |
111 | case SO_INCOMING_CPU: |
112 | WRITE_ONCE(ssk->sk_incoming_cpu, val); |
113 | break; |
114 | } |
115 | |
116 | subflow->setsockopt_seq = msk->setsockopt_seq; |
117 | unlock_sock_fast(sk: ssk, slow); |
118 | } |
119 | |
120 | release_sock(sk); |
121 | } |
122 | |
123 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) |
124 | { |
125 | sockptr_t optval = KERNEL_SOCKPTR(p: &val); |
126 | struct sock *sk = (struct sock *)msk; |
127 | int ret; |
128 | |
129 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
130 | optval, optlen: sizeof(val)); |
131 | if (ret) |
132 | return ret; |
133 | |
134 | mptcp_sol_socket_sync_intval(msk, optname, val); |
135 | return 0; |
136 | } |
137 | |
138 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
139 | { |
140 | struct sock *sk = (struct sock *)msk; |
141 | |
142 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
143 | |
144 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); |
145 | } |
146 | |
147 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
148 | { |
149 | sockptr_t optval = KERNEL_SOCKPTR(p: &val); |
150 | struct mptcp_subflow_context *subflow; |
151 | struct sock *sk = (struct sock *)msk; |
152 | int ret; |
153 | |
154 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
155 | optval, optlen: sizeof(val)); |
156 | if (ret) |
157 | return ret; |
158 | |
159 | lock_sock(sk); |
160 | mptcp_for_each_subflow(msk, subflow) { |
161 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
162 | bool slow = lock_sock_fast(sk: ssk); |
163 | |
164 | sock_set_timestamp(sk, optname, valbool: !!val); |
165 | unlock_sock_fast(sk: ssk, slow); |
166 | } |
167 | |
168 | release_sock(sk); |
169 | return 0; |
170 | } |
171 | |
172 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
173 | sockptr_t optval, |
174 | unsigned int optlen) |
175 | { |
176 | int val, ret; |
177 | |
178 | ret = mptcp_get_int_option(msk, optval, optlen, val: &val); |
179 | if (ret) |
180 | return ret; |
181 | |
182 | switch (optname) { |
183 | case SO_KEEPALIVE: |
184 | mptcp_sol_socket_sync_intval(msk, optname, val); |
185 | return 0; |
186 | case SO_DEBUG: |
187 | case SO_MARK: |
188 | case SO_PRIORITY: |
189 | case SO_SNDBUF: |
190 | case SO_SNDBUFFORCE: |
191 | case SO_RCVBUF: |
192 | case SO_RCVBUFFORCE: |
193 | return mptcp_sol_socket_intval(msk, optname, val); |
194 | case SO_INCOMING_CPU: |
195 | mptcp_so_incoming_cpu(msk, val); |
196 | return 0; |
197 | case SO_TIMESTAMP_OLD: |
198 | case SO_TIMESTAMP_NEW: |
199 | case SO_TIMESTAMPNS_OLD: |
200 | case SO_TIMESTAMPNS_NEW: |
201 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
202 | } |
203 | |
204 | return -ENOPROTOOPT; |
205 | } |
206 | |
207 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
208 | int optname, |
209 | sockptr_t optval, |
210 | unsigned int optlen) |
211 | { |
212 | struct mptcp_subflow_context *subflow; |
213 | struct sock *sk = (struct sock *)msk; |
214 | struct so_timestamping timestamping; |
215 | int ret; |
216 | |
217 | if (optlen == sizeof(timestamping)) { |
218 | if (copy_from_sockptr(dst: ×tamping, src: optval, |
219 | size: sizeof(timestamping))) |
220 | return -EFAULT; |
221 | } else if (optlen == sizeof(int)) { |
222 | memset(×tamping, 0, sizeof(timestamping)); |
223 | |
224 | if (copy_from_sockptr(dst: ×tamping.flags, src: optval, size: sizeof(int))) |
225 | return -EFAULT; |
226 | } else { |
227 | return -EINVAL; |
228 | } |
229 | |
230 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
231 | optval: KERNEL_SOCKPTR(p: ×tamping), |
232 | optlen: sizeof(timestamping)); |
233 | if (ret) |
234 | return ret; |
235 | |
236 | lock_sock(sk); |
237 | |
238 | mptcp_for_each_subflow(msk, subflow) { |
239 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
240 | bool slow = lock_sock_fast(sk: ssk); |
241 | |
242 | sock_set_timestamping(sk, optname, timestamping); |
243 | unlock_sock_fast(sk: ssk, slow); |
244 | } |
245 | |
246 | release_sock(sk); |
247 | |
248 | return 0; |
249 | } |
250 | |
251 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
252 | unsigned int optlen) |
253 | { |
254 | struct mptcp_subflow_context *subflow; |
255 | struct sock *sk = (struct sock *)msk; |
256 | struct linger ling; |
257 | sockptr_t kopt; |
258 | int ret; |
259 | |
260 | if (optlen < sizeof(ling)) |
261 | return -EINVAL; |
262 | |
263 | if (copy_from_sockptr(dst: &ling, src: optval, size: sizeof(ling))) |
264 | return -EFAULT; |
265 | |
266 | kopt = KERNEL_SOCKPTR(p: &ling); |
267 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, SO_LINGER, optval: kopt, optlen: sizeof(ling)); |
268 | if (ret) |
269 | return ret; |
270 | |
271 | lock_sock(sk); |
272 | sockopt_seq_inc(msk); |
273 | mptcp_for_each_subflow(msk, subflow) { |
274 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
275 | bool slow = lock_sock_fast(sk: ssk); |
276 | |
277 | if (!ling.l_onoff) { |
278 | sock_reset_flag(sk: ssk, flag: SOCK_LINGER); |
279 | } else { |
280 | ssk->sk_lingertime = sk->sk_lingertime; |
281 | sock_set_flag(sk: ssk, flag: SOCK_LINGER); |
282 | } |
283 | |
284 | subflow->setsockopt_seq = msk->setsockopt_seq; |
285 | unlock_sock_fast(sk: ssk, slow); |
286 | } |
287 | |
288 | release_sock(sk); |
289 | return 0; |
290 | } |
291 | |
292 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
293 | sockptr_t optval, unsigned int optlen) |
294 | { |
295 | struct sock *sk = (struct sock *)msk; |
296 | struct sock *ssk; |
297 | int ret; |
298 | |
299 | switch (optname) { |
300 | case SO_REUSEPORT: |
301 | case SO_REUSEADDR: |
302 | case SO_BINDTODEVICE: |
303 | case SO_BINDTOIFINDEX: |
304 | lock_sock(sk); |
305 | ssk = __mptcp_nmpc_sk(msk); |
306 | if (IS_ERR(ptr: ssk)) { |
307 | release_sock(sk); |
308 | return PTR_ERR(ptr: ssk); |
309 | } |
310 | |
311 | ret = sk_setsockopt(sk: ssk, SOL_SOCKET, optname, optval, optlen); |
312 | if (ret == 0) { |
313 | if (optname == SO_REUSEPORT) |
314 | sk->sk_reuseport = ssk->sk_reuseport; |
315 | else if (optname == SO_REUSEADDR) |
316 | sk->sk_reuse = ssk->sk_reuse; |
317 | else if (optname == SO_BINDTODEVICE) |
318 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
319 | else if (optname == SO_BINDTOIFINDEX) |
320 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
321 | } |
322 | release_sock(sk); |
323 | return ret; |
324 | case SO_KEEPALIVE: |
325 | case SO_PRIORITY: |
326 | case SO_SNDBUF: |
327 | case SO_SNDBUFFORCE: |
328 | case SO_RCVBUF: |
329 | case SO_RCVBUFFORCE: |
330 | case SO_MARK: |
331 | case SO_INCOMING_CPU: |
332 | case SO_DEBUG: |
333 | case SO_TIMESTAMP_OLD: |
334 | case SO_TIMESTAMP_NEW: |
335 | case SO_TIMESTAMPNS_OLD: |
336 | case SO_TIMESTAMPNS_NEW: |
337 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
338 | optlen); |
339 | case SO_TIMESTAMPING_OLD: |
340 | case SO_TIMESTAMPING_NEW: |
341 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
342 | optval, optlen); |
343 | case SO_LINGER: |
344 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); |
345 | case SO_RCVLOWAT: |
346 | case SO_RCVTIMEO_OLD: |
347 | case SO_RCVTIMEO_NEW: |
348 | case SO_SNDTIMEO_OLD: |
349 | case SO_SNDTIMEO_NEW: |
350 | case SO_BUSY_POLL: |
351 | case SO_PREFER_BUSY_POLL: |
352 | case SO_BUSY_POLL_BUDGET: |
353 | /* No need to copy: only relevant for msk */ |
354 | return sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, optval, optlen); |
355 | case SO_NO_CHECK: |
356 | case SO_DONTROUTE: |
357 | case SO_BROADCAST: |
358 | case SO_BSDCOMPAT: |
359 | case SO_PASSCRED: |
360 | case SO_PASSPIDFD: |
361 | case SO_PASSSEC: |
362 | case SO_RXQ_OVFL: |
363 | case SO_WIFI_STATUS: |
364 | case SO_NOFCS: |
365 | case SO_SELECT_ERR_QUEUE: |
366 | return 0; |
367 | } |
368 | |
369 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
370 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, |
371 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, |
372 | * we must be careful with subflows |
373 | * |
374 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks |
375 | * explicitly the sk_protocol field |
376 | * |
377 | * SO_PEEK_OFF is unsupported, as it is for plain TCP |
378 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows |
379 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, |
380 | * but likely needs careful design |
381 | * |
382 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg |
383 | * SO_TXTIME is currently unsupported |
384 | */ |
385 | |
386 | return -EOPNOTSUPP; |
387 | } |
388 | |
389 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, |
390 | sockptr_t optval, unsigned int optlen) |
391 | { |
392 | struct sock *sk = (struct sock *)msk; |
393 | int ret = -EOPNOTSUPP; |
394 | struct sock *ssk; |
395 | |
396 | switch (optname) { |
397 | case IPV6_V6ONLY: |
398 | case IPV6_TRANSPARENT: |
399 | case IPV6_FREEBIND: |
400 | lock_sock(sk); |
401 | ssk = __mptcp_nmpc_sk(msk); |
402 | if (IS_ERR(ptr: ssk)) { |
403 | release_sock(sk); |
404 | return PTR_ERR(ptr: ssk); |
405 | } |
406 | |
407 | ret = tcp_setsockopt(sk: ssk, SOL_IPV6, optname, optval, optlen); |
408 | if (ret != 0) { |
409 | release_sock(sk); |
410 | return ret; |
411 | } |
412 | |
413 | sockopt_seq_inc(msk); |
414 | |
415 | switch (optname) { |
416 | case IPV6_V6ONLY: |
417 | sk->sk_ipv6only = ssk->sk_ipv6only; |
418 | break; |
419 | case IPV6_TRANSPARENT: |
420 | inet_assign_bit(TRANSPARENT, sk, |
421 | inet_test_bit(TRANSPARENT, ssk)); |
422 | break; |
423 | case IPV6_FREEBIND: |
424 | inet_assign_bit(FREEBIND, sk, |
425 | inet_test_bit(FREEBIND, ssk)); |
426 | break; |
427 | } |
428 | |
429 | release_sock(sk); |
430 | break; |
431 | } |
432 | |
433 | return ret; |
434 | } |
435 | |
436 | static bool mptcp_supported_sockopt(int level, int optname) |
437 | { |
438 | if (level == SOL_IP) { |
439 | switch (optname) { |
440 | /* should work fine */ |
441 | case IP_FREEBIND: |
442 | case IP_TRANSPARENT: |
443 | |
444 | /* the following are control cmsg related */ |
445 | case IP_PKTINFO: |
446 | case IP_RECVTTL: |
447 | case IP_RECVTOS: |
448 | case IP_RECVOPTS: |
449 | case IP_RETOPTS: |
450 | case IP_PASSSEC: |
451 | case IP_RECVORIGDSTADDR: |
452 | case IP_CHECKSUM: |
453 | case IP_RECVFRAGSIZE: |
454 | |
455 | /* common stuff that need some love */ |
456 | case IP_TOS: |
457 | case IP_TTL: |
458 | case IP_BIND_ADDRESS_NO_PORT: |
459 | case IP_MTU_DISCOVER: |
460 | case IP_RECVERR: |
461 | |
462 | /* possibly less common may deserve some love */ |
463 | case IP_MINTTL: |
464 | |
465 | /* the following is apparently a no-op for plain TCP */ |
466 | case IP_RECVERR_RFC4884: |
467 | return true; |
468 | } |
469 | |
470 | /* IP_OPTIONS is not supported, needs subflow care */ |
471 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ |
472 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, |
473 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, |
474 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, |
475 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, |
476 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, |
477 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal |
478 | * with mcast stuff |
479 | */ |
480 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ |
481 | return false; |
482 | } |
483 | if (level == SOL_IPV6) { |
484 | switch (optname) { |
485 | case IPV6_V6ONLY: |
486 | |
487 | /* the following are control cmsg related */ |
488 | case IPV6_RECVPKTINFO: |
489 | case IPV6_2292PKTINFO: |
490 | case IPV6_RECVHOPLIMIT: |
491 | case IPV6_2292HOPLIMIT: |
492 | case IPV6_RECVRTHDR: |
493 | case IPV6_2292RTHDR: |
494 | case IPV6_RECVHOPOPTS: |
495 | case IPV6_2292HOPOPTS: |
496 | case IPV6_RECVDSTOPTS: |
497 | case IPV6_2292DSTOPTS: |
498 | case IPV6_RECVTCLASS: |
499 | case IPV6_FLOWINFO: |
500 | case IPV6_RECVPATHMTU: |
501 | case IPV6_RECVORIGDSTADDR: |
502 | case IPV6_RECVFRAGSIZE: |
503 | |
504 | /* the following ones need some love but are quite common */ |
505 | case IPV6_TCLASS: |
506 | case IPV6_TRANSPARENT: |
507 | case IPV6_FREEBIND: |
508 | case IPV6_PKTINFO: |
509 | case IPV6_2292PKTOPTIONS: |
510 | case IPV6_UNICAST_HOPS: |
511 | case IPV6_MTU_DISCOVER: |
512 | case IPV6_MTU: |
513 | case IPV6_RECVERR: |
514 | case IPV6_FLOWINFO_SEND: |
515 | case IPV6_FLOWLABEL_MGR: |
516 | case IPV6_MINHOPCOUNT: |
517 | case IPV6_DONTFRAG: |
518 | case IPV6_AUTOFLOWLABEL: |
519 | |
520 | /* the following one is a no-op for plain TCP */ |
521 | case IPV6_RECVERR_RFC4884: |
522 | return true; |
523 | } |
524 | |
525 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are |
526 | * not supported |
527 | */ |
528 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, |
529 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, |
530 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, |
531 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, |
532 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, |
533 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER |
534 | * are not supported better not deal with mcast |
535 | */ |
536 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ |
537 | |
538 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ |
539 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ |
540 | return false; |
541 | } |
542 | if (level == SOL_TCP) { |
543 | switch (optname) { |
544 | /* the following are no-op or should work just fine */ |
545 | case TCP_THIN_DUPACK: |
546 | case TCP_DEFER_ACCEPT: |
547 | |
548 | /* the following need some love */ |
549 | case TCP_MAXSEG: |
550 | case TCP_NODELAY: |
551 | case TCP_THIN_LINEAR_TIMEOUTS: |
552 | case TCP_CONGESTION: |
553 | case TCP_CORK: |
554 | case TCP_KEEPIDLE: |
555 | case TCP_KEEPINTVL: |
556 | case TCP_KEEPCNT: |
557 | case TCP_SYNCNT: |
558 | case TCP_SAVE_SYN: |
559 | case TCP_LINGER2: |
560 | case TCP_WINDOW_CLAMP: |
561 | case TCP_QUICKACK: |
562 | case TCP_USER_TIMEOUT: |
563 | case TCP_TIMESTAMP: |
564 | case TCP_NOTSENT_LOWAT: |
565 | case TCP_TX_DELAY: |
566 | case TCP_INQ: |
567 | case TCP_FASTOPEN: |
568 | case TCP_FASTOPEN_CONNECT: |
569 | case TCP_FASTOPEN_KEY: |
570 | case TCP_FASTOPEN_NO_COOKIE: |
571 | return true; |
572 | } |
573 | |
574 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ |
575 | |
576 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, |
577 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess |
578 | */ |
579 | } |
580 | return false; |
581 | } |
582 | |
583 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
584 | unsigned int optlen) |
585 | { |
586 | struct mptcp_subflow_context *subflow; |
587 | struct sock *sk = (struct sock *)msk; |
588 | char name[TCP_CA_NAME_MAX]; |
589 | bool cap_net_admin; |
590 | int ret; |
591 | |
592 | if (optlen < 1) |
593 | return -EINVAL; |
594 | |
595 | ret = strncpy_from_sockptr(dst: name, src: optval, |
596 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); |
597 | if (ret < 0) |
598 | return -EFAULT; |
599 | |
600 | name[ret] = 0; |
601 | |
602 | cap_net_admin = ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_ADMIN); |
603 | |
604 | ret = 0; |
605 | lock_sock(sk); |
606 | sockopt_seq_inc(msk); |
607 | mptcp_for_each_subflow(msk, subflow) { |
608 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
609 | int err; |
610 | |
611 | lock_sock(sk: ssk); |
612 | err = tcp_set_congestion_control(sk: ssk, name, load: true, cap_net_admin); |
613 | if (err < 0 && ret == 0) |
614 | ret = err; |
615 | subflow->setsockopt_seq = msk->setsockopt_seq; |
616 | release_sock(sk: ssk); |
617 | } |
618 | |
619 | if (ret == 0) |
620 | strcpy(p: msk->ca_name, q: name); |
621 | |
622 | release_sock(sk); |
623 | return ret; |
624 | } |
625 | |
626 | static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, |
627 | unsigned int optlen) |
628 | { |
629 | struct mptcp_subflow_context *subflow; |
630 | struct sock *sk = (struct sock *)msk; |
631 | int val; |
632 | |
633 | if (optlen < sizeof(int)) |
634 | return -EINVAL; |
635 | |
636 | if (copy_from_sockptr(dst: &val, src: optval, size: sizeof(val))) |
637 | return -EFAULT; |
638 | |
639 | lock_sock(sk); |
640 | sockopt_seq_inc(msk); |
641 | msk->cork = !!val; |
642 | mptcp_for_each_subflow(msk, subflow) { |
643 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
644 | |
645 | lock_sock(sk: ssk); |
646 | __tcp_sock_set_cork(sk: ssk, on: !!val); |
647 | release_sock(sk: ssk); |
648 | } |
649 | if (!val) |
650 | mptcp_check_and_set_pending(sk); |
651 | release_sock(sk); |
652 | |
653 | return 0; |
654 | } |
655 | |
656 | static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, |
657 | unsigned int optlen) |
658 | { |
659 | struct mptcp_subflow_context *subflow; |
660 | struct sock *sk = (struct sock *)msk; |
661 | int val; |
662 | |
663 | if (optlen < sizeof(int)) |
664 | return -EINVAL; |
665 | |
666 | if (copy_from_sockptr(dst: &val, src: optval, size: sizeof(val))) |
667 | return -EFAULT; |
668 | |
669 | lock_sock(sk); |
670 | sockopt_seq_inc(msk); |
671 | msk->nodelay = !!val; |
672 | mptcp_for_each_subflow(msk, subflow) { |
673 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
674 | |
675 | lock_sock(sk: ssk); |
676 | __tcp_sock_set_nodelay(sk: ssk, on: !!val); |
677 | release_sock(sk: ssk); |
678 | } |
679 | if (val) |
680 | mptcp_check_and_set_pending(sk); |
681 | release_sock(sk); |
682 | |
683 | return 0; |
684 | } |
685 | |
686 | static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, |
687 | sockptr_t optval, unsigned int optlen) |
688 | { |
689 | struct sock *sk = (struct sock *)msk; |
690 | struct sock *ssk; |
691 | int err; |
692 | |
693 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); |
694 | if (err != 0) |
695 | return err; |
696 | |
697 | lock_sock(sk); |
698 | |
699 | ssk = __mptcp_nmpc_sk(msk); |
700 | if (IS_ERR(ptr: ssk)) { |
701 | release_sock(sk); |
702 | return PTR_ERR(ptr: ssk); |
703 | } |
704 | |
705 | switch (optname) { |
706 | case IP_FREEBIND: |
707 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
708 | break; |
709 | case IP_TRANSPARENT: |
710 | inet_assign_bit(TRANSPARENT, ssk, |
711 | inet_test_bit(TRANSPARENT, sk)); |
712 | break; |
713 | default: |
714 | release_sock(sk); |
715 | WARN_ON_ONCE(1); |
716 | return -EOPNOTSUPP; |
717 | } |
718 | |
719 | sockopt_seq_inc(msk); |
720 | release_sock(sk); |
721 | return 0; |
722 | } |
723 | |
724 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
725 | sockptr_t optval, unsigned int optlen) |
726 | { |
727 | struct mptcp_subflow_context *subflow; |
728 | struct sock *sk = (struct sock *)msk; |
729 | int err, val; |
730 | |
731 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); |
732 | |
733 | if (err != 0) |
734 | return err; |
735 | |
736 | lock_sock(sk); |
737 | sockopt_seq_inc(msk); |
738 | val = READ_ONCE(inet_sk(sk)->tos); |
739 | mptcp_for_each_subflow(msk, subflow) { |
740 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
741 | |
742 | __ip_sock_set_tos(sk: ssk, val); |
743 | } |
744 | release_sock(sk); |
745 | |
746 | return 0; |
747 | } |
748 | |
749 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, |
750 | sockptr_t optval, unsigned int optlen) |
751 | { |
752 | switch (optname) { |
753 | case IP_FREEBIND: |
754 | case IP_TRANSPARENT: |
755 | return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); |
756 | case IP_TOS: |
757 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); |
758 | } |
759 | |
760 | return -EOPNOTSUPP; |
761 | } |
762 | |
763 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
764 | sockptr_t optval, unsigned int optlen) |
765 | { |
766 | struct sock *sk = (struct sock *)msk; |
767 | struct sock *ssk; |
768 | int ret; |
769 | |
770 | /* Limit to first subflow, before the connection establishment */ |
771 | lock_sock(sk); |
772 | ssk = __mptcp_nmpc_sk(msk); |
773 | if (IS_ERR(ptr: ssk)) { |
774 | ret = PTR_ERR(ptr: ssk); |
775 | goto unlock; |
776 | } |
777 | |
778 | ret = tcp_setsockopt(sk: ssk, level, optname, optval, optlen); |
779 | |
780 | unlock: |
781 | release_sock(sk); |
782 | return ret; |
783 | } |
784 | |
785 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
786 | sockptr_t optval, unsigned int optlen) |
787 | { |
788 | struct sock *sk = (void *)msk; |
789 | int ret, val; |
790 | |
791 | switch (optname) { |
792 | case TCP_INQ: |
793 | ret = mptcp_get_int_option(msk, optval, optlen, val: &val); |
794 | if (ret) |
795 | return ret; |
796 | if (val < 0 || val > 1) |
797 | return -EINVAL; |
798 | |
799 | lock_sock(sk); |
800 | msk->recvmsg_inq = !!val; |
801 | release_sock(sk); |
802 | return 0; |
803 | case TCP_ULP: |
804 | return -EOPNOTSUPP; |
805 | case TCP_CONGESTION: |
806 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); |
807 | case TCP_CORK: |
808 | return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); |
809 | case TCP_NODELAY: |
810 | return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); |
811 | case TCP_DEFER_ACCEPT: |
812 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ |
813 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); |
814 | return 0; |
815 | case TCP_FASTOPEN: |
816 | case TCP_FASTOPEN_CONNECT: |
817 | case TCP_FASTOPEN_KEY: |
818 | case TCP_FASTOPEN_NO_COOKIE: |
819 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
820 | optval, optlen); |
821 | } |
822 | |
823 | return -EOPNOTSUPP; |
824 | } |
825 | |
826 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
827 | sockptr_t optval, unsigned int optlen) |
828 | { |
829 | struct mptcp_sock *msk = mptcp_sk(sk); |
830 | struct sock *ssk; |
831 | |
832 | pr_debug("msk=%p" , msk); |
833 | |
834 | if (level == SOL_SOCKET) |
835 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); |
836 | |
837 | if (!mptcp_supported_sockopt(level, optname)) |
838 | return -ENOPROTOOPT; |
839 | |
840 | /* @@ the meaning of setsockopt() when the socket is connected and |
841 | * there are multiple subflows is not yet defined. It is up to the |
842 | * MPTCP-level socket to configure the subflows until the subflow |
843 | * is in TCP fallback, when TCP socket options are passed through |
844 | * to the one remaining subflow. |
845 | */ |
846 | lock_sock(sk); |
847 | ssk = __mptcp_tcp_fallback(msk); |
848 | release_sock(sk); |
849 | if (ssk) |
850 | return tcp_setsockopt(sk: ssk, level, optname, optval, optlen); |
851 | |
852 | if (level == SOL_IP) |
853 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); |
854 | |
855 | if (level == SOL_IPV6) |
856 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); |
857 | |
858 | if (level == SOL_TCP) |
859 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); |
860 | |
861 | return -EOPNOTSUPP; |
862 | } |
863 | |
864 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
865 | char __user *optval, int __user *optlen) |
866 | { |
867 | struct sock *sk = (struct sock *)msk; |
868 | struct sock *ssk; |
869 | int ret; |
870 | |
871 | lock_sock(sk); |
872 | ssk = msk->first; |
873 | if (ssk) { |
874 | ret = tcp_getsockopt(sk: ssk, level, optname, optval, optlen); |
875 | goto out; |
876 | } |
877 | |
878 | ssk = __mptcp_nmpc_sk(msk); |
879 | if (IS_ERR(ptr: ssk)) { |
880 | ret = PTR_ERR(ptr: ssk); |
881 | goto out; |
882 | } |
883 | |
884 | ret = tcp_getsockopt(sk: ssk, level, optname, optval, optlen); |
885 | |
886 | out: |
887 | release_sock(sk); |
888 | return ret; |
889 | } |
890 | |
891 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
892 | { |
893 | struct sock *sk = (struct sock *)msk; |
894 | u32 flags = 0; |
895 | bool slow; |
896 | |
897 | memset(info, 0, sizeof(*info)); |
898 | |
899 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
900 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); |
901 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); |
902 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); |
903 | |
904 | if (inet_sk_state_load(sk) == TCP_LISTEN) |
905 | return; |
906 | |
907 | /* The following limits only make sense for the in-kernel PM */ |
908 | if (mptcp_pm_is_kernel(msk)) { |
909 | info->mptcpi_subflows_max = |
910 | mptcp_pm_get_subflows_max(msk); |
911 | info->mptcpi_add_addr_signal_max = |
912 | mptcp_pm_get_add_addr_signal_max(msk); |
913 | info->mptcpi_add_addr_accepted_max = |
914 | mptcp_pm_get_add_addr_accept_max(msk); |
915 | info->mptcpi_local_addr_max = |
916 | mptcp_pm_get_local_addr_max(msk); |
917 | } |
918 | |
919 | if (__mptcp_check_fallback(msk)) |
920 | flags |= MPTCP_INFO_FLAG_FALLBACK; |
921 | if (READ_ONCE(msk->can_ack)) |
922 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; |
923 | info->mptcpi_flags = flags; |
924 | mptcp_data_lock(sk); |
925 | info->mptcpi_snd_una = msk->snd_una; |
926 | info->mptcpi_rcv_nxt = msk->ack_seq; |
927 | info->mptcpi_bytes_acked = msk->bytes_acked; |
928 | mptcp_data_unlock(sk); |
929 | |
930 | slow = lock_sock_fast(sk); |
931 | info->mptcpi_csum_enabled = msk->csum_enabled; |
932 | info->mptcpi_token = msk->token; |
933 | info->mptcpi_write_seq = msk->write_seq; |
934 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; |
935 | info->mptcpi_bytes_sent = msk->bytes_sent; |
936 | info->mptcpi_bytes_received = msk->bytes_received; |
937 | info->mptcpi_bytes_retrans = msk->bytes_retrans; |
938 | unlock_sock_fast(sk, slow); |
939 | } |
940 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); |
941 | |
942 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
943 | { |
944 | struct mptcp_info m_info; |
945 | int len; |
946 | |
947 | if (get_user(len, optlen)) |
948 | return -EFAULT; |
949 | |
950 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); |
951 | |
952 | mptcp_diag_fill_info(msk, &m_info); |
953 | |
954 | if (put_user(len, optlen)) |
955 | return -EFAULT; |
956 | |
957 | if (copy_to_user(to: optval, from: &m_info, n: len)) |
958 | return -EFAULT; |
959 | |
960 | return 0; |
961 | } |
962 | |
963 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
964 | char __user *optval, |
965 | u32 copied, |
966 | int __user *optlen) |
967 | { |
968 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); |
969 | |
970 | if (copied) |
971 | copied += sfd->size_subflow_data; |
972 | else |
973 | copied = copylen; |
974 | |
975 | if (put_user(copied, optlen)) |
976 | return -EFAULT; |
977 | |
978 | if (copy_to_user(to: optval, from: sfd, n: copylen)) |
979 | return -EFAULT; |
980 | |
981 | return 0; |
982 | } |
983 | |
984 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, |
985 | char __user *optval, |
986 | int __user *optlen) |
987 | { |
988 | int len, copylen; |
989 | |
990 | if (get_user(len, optlen)) |
991 | return -EFAULT; |
992 | |
993 | /* if mptcp_subflow_data size is changed, need to adjust |
994 | * this function to deal with programs using old version. |
995 | */ |
996 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); |
997 | |
998 | if (len < MIN_INFO_OPTLEN_SIZE) |
999 | return -EINVAL; |
1000 | |
1001 | memset(sfd, 0, sizeof(*sfd)); |
1002 | |
1003 | copylen = min_t(unsigned int, len, sizeof(*sfd)); |
1004 | if (copy_from_user(to: sfd, from: optval, n: copylen)) |
1005 | return -EFAULT; |
1006 | |
1007 | /* size_subflow_data is u32, but len is signed */ |
1008 | if (sfd->size_subflow_data > INT_MAX || |
1009 | sfd->size_user > INT_MAX) |
1010 | return -EINVAL; |
1011 | |
1012 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || |
1013 | sfd->size_subflow_data > len) |
1014 | return -EINVAL; |
1015 | |
1016 | if (sfd->num_subflows || sfd->size_kernel) |
1017 | return -EINVAL; |
1018 | |
1019 | return len - sfd->size_subflow_data; |
1020 | } |
1021 | |
1022 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, |
1023 | int __user *optlen) |
1024 | { |
1025 | struct mptcp_subflow_context *subflow; |
1026 | struct sock *sk = (struct sock *)msk; |
1027 | unsigned int sfcount = 0, copied = 0; |
1028 | struct mptcp_subflow_data sfd; |
1029 | char __user *infoptr; |
1030 | int len; |
1031 | |
1032 | len = mptcp_get_subflow_data(sfd: &sfd, optval, optlen); |
1033 | if (len < 0) |
1034 | return len; |
1035 | |
1036 | sfd.size_kernel = sizeof(struct tcp_info); |
1037 | sfd.size_user = min_t(unsigned int, sfd.size_user, |
1038 | sizeof(struct tcp_info)); |
1039 | |
1040 | infoptr = optval + sfd.size_subflow_data; |
1041 | |
1042 | lock_sock(sk); |
1043 | |
1044 | mptcp_for_each_subflow(msk, subflow) { |
1045 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1046 | |
1047 | ++sfcount; |
1048 | |
1049 | if (len && len >= sfd.size_user) { |
1050 | struct tcp_info info; |
1051 | |
1052 | tcp_get_info(ssk, &info); |
1053 | |
1054 | if (copy_to_user(to: infoptr, from: &info, n: sfd.size_user)) { |
1055 | release_sock(sk); |
1056 | return -EFAULT; |
1057 | } |
1058 | |
1059 | infoptr += sfd.size_user; |
1060 | copied += sfd.size_user; |
1061 | len -= sfd.size_user; |
1062 | } |
1063 | } |
1064 | |
1065 | release_sock(sk); |
1066 | |
1067 | sfd.num_subflows = sfcount; |
1068 | |
1069 | if (mptcp_put_subflow_data(sfd: &sfd, optval, copied, optlen)) |
1070 | return -EFAULT; |
1071 | |
1072 | return 0; |
1073 | } |
1074 | |
1075 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1076 | { |
1077 | const struct inet_sock *inet = inet_sk(sk); |
1078 | |
1079 | memset(a, 0, sizeof(*a)); |
1080 | |
1081 | if (sk->sk_family == AF_INET) { |
1082 | a->sin_local.sin_family = AF_INET; |
1083 | a->sin_local.sin_port = inet->inet_sport; |
1084 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; |
1085 | |
1086 | if (!a->sin_local.sin_addr.s_addr) |
1087 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; |
1088 | |
1089 | a->sin_remote.sin_family = AF_INET; |
1090 | a->sin_remote.sin_port = inet->inet_dport; |
1091 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; |
1092 | #if IS_ENABLED(CONFIG_IPV6) |
1093 | } else if (sk->sk_family == AF_INET6) { |
1094 | const struct ipv6_pinfo *np = inet6_sk(sk: sk); |
1095 | |
1096 | if (WARN_ON_ONCE(!np)) |
1097 | return; |
1098 | |
1099 | a->sin6_local.sin6_family = AF_INET6; |
1100 | a->sin6_local.sin6_port = inet->inet_sport; |
1101 | |
1102 | if (ipv6_addr_any(a: &sk->sk_v6_rcv_saddr)) |
1103 | a->sin6_local.sin6_addr = np->saddr; |
1104 | else |
1105 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; |
1106 | |
1107 | a->sin6_remote.sin6_family = AF_INET6; |
1108 | a->sin6_remote.sin6_port = inet->inet_dport; |
1109 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; |
1110 | #endif |
1111 | } |
1112 | } |
1113 | |
1114 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, |
1115 | int __user *optlen) |
1116 | { |
1117 | struct mptcp_subflow_context *subflow; |
1118 | struct sock *sk = (struct sock *)msk; |
1119 | unsigned int sfcount = 0, copied = 0; |
1120 | struct mptcp_subflow_data sfd; |
1121 | char __user *addrptr; |
1122 | int len; |
1123 | |
1124 | len = mptcp_get_subflow_data(sfd: &sfd, optval, optlen); |
1125 | if (len < 0) |
1126 | return len; |
1127 | |
1128 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); |
1129 | sfd.size_user = min_t(unsigned int, sfd.size_user, |
1130 | sizeof(struct mptcp_subflow_addrs)); |
1131 | |
1132 | addrptr = optval + sfd.size_subflow_data; |
1133 | |
1134 | lock_sock(sk); |
1135 | |
1136 | mptcp_for_each_subflow(msk, subflow) { |
1137 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1138 | |
1139 | ++sfcount; |
1140 | |
1141 | if (len && len >= sfd.size_user) { |
1142 | struct mptcp_subflow_addrs a; |
1143 | |
1144 | mptcp_get_sub_addrs(sk: ssk, a: &a); |
1145 | |
1146 | if (copy_to_user(to: addrptr, from: &a, n: sfd.size_user)) { |
1147 | release_sock(sk); |
1148 | return -EFAULT; |
1149 | } |
1150 | |
1151 | addrptr += sfd.size_user; |
1152 | copied += sfd.size_user; |
1153 | len -= sfd.size_user; |
1154 | } |
1155 | } |
1156 | |
1157 | release_sock(sk); |
1158 | |
1159 | sfd.num_subflows = sfcount; |
1160 | |
1161 | if (mptcp_put_subflow_data(sfd: &sfd, optval, copied, optlen)) |
1162 | return -EFAULT; |
1163 | |
1164 | return 0; |
1165 | } |
1166 | |
1167 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, |
1168 | char __user *optval, |
1169 | int __user *optlen) |
1170 | { |
1171 | int len; |
1172 | |
1173 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != |
1174 | MIN_FULL_INFO_OPTLEN_SIZE); |
1175 | |
1176 | if (get_user(len, optlen)) |
1177 | return -EFAULT; |
1178 | |
1179 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) |
1180 | return -EINVAL; |
1181 | |
1182 | memset(mfi, 0, sizeof(*mfi)); |
1183 | if (copy_from_user(to: mfi, from: optval, MIN_FULL_INFO_OPTLEN_SIZE)) |
1184 | return -EFAULT; |
1185 | |
1186 | if (mfi->size_tcpinfo_kernel || |
1187 | mfi->size_sfinfo_kernel || |
1188 | mfi->num_subflows) |
1189 | return -EINVAL; |
1190 | |
1191 | if (mfi->size_sfinfo_user > INT_MAX || |
1192 | mfi->size_tcpinfo_user > INT_MAX) |
1193 | return -EINVAL; |
1194 | |
1195 | return len - MIN_FULL_INFO_OPTLEN_SIZE; |
1196 | } |
1197 | |
1198 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, |
1199 | char __user *optval, |
1200 | u32 copylen, |
1201 | int __user *optlen) |
1202 | { |
1203 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; |
1204 | if (put_user(copylen, optlen)) |
1205 | return -EFAULT; |
1206 | |
1207 | if (copy_to_user(to: optval, from: mfi, n: copylen)) |
1208 | return -EFAULT; |
1209 | return 0; |
1210 | } |
1211 | |
1212 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, |
1213 | int __user *optlen) |
1214 | { |
1215 | unsigned int sfcount = 0, copylen = 0; |
1216 | struct mptcp_subflow_context *subflow; |
1217 | struct sock *sk = (struct sock *)msk; |
1218 | void __user *tcpinfoptr, *sfinfoptr; |
1219 | struct mptcp_full_info mfi; |
1220 | int len; |
1221 | |
1222 | len = mptcp_get_full_info(mfi: &mfi, optval, optlen); |
1223 | if (len < 0) |
1224 | return len; |
1225 | |
1226 | /* don't bother filling the mptcp info if there is not enough |
1227 | * user-space-provided storage |
1228 | */ |
1229 | if (len > 0) { |
1230 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); |
1231 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); |
1232 | } |
1233 | |
1234 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); |
1235 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, |
1236 | sizeof(struct tcp_info)); |
1237 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); |
1238 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); |
1239 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, |
1240 | sizeof(struct mptcp_subflow_info)); |
1241 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); |
1242 | |
1243 | lock_sock(sk); |
1244 | mptcp_for_each_subflow(msk, subflow) { |
1245 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1246 | struct mptcp_subflow_info sfinfo; |
1247 | struct tcp_info tcp_info; |
1248 | |
1249 | if (sfcount++ >= mfi.size_arrays_user) |
1250 | continue; |
1251 | |
1252 | /* fetch addr/tcp_info only if the user space buffers |
1253 | * are wide enough |
1254 | */ |
1255 | memset(&sfinfo, 0, sizeof(sfinfo)); |
1256 | sfinfo.id = subflow->subflow_id; |
1257 | if (mfi.size_sfinfo_user > |
1258 | offsetof(struct mptcp_subflow_info, addrs)) |
1259 | mptcp_get_sub_addrs(sk: ssk, a: &sfinfo.addrs); |
1260 | if (copy_to_user(to: sfinfoptr, from: &sfinfo, n: mfi.size_sfinfo_user)) |
1261 | goto fail_release; |
1262 | |
1263 | if (mfi.size_tcpinfo_user) { |
1264 | tcp_get_info(ssk, &tcp_info); |
1265 | if (copy_to_user(to: tcpinfoptr, from: &tcp_info, |
1266 | n: mfi.size_tcpinfo_user)) |
1267 | goto fail_release; |
1268 | } |
1269 | |
1270 | tcpinfoptr += mfi.size_tcpinfo_user; |
1271 | sfinfoptr += mfi.size_sfinfo_user; |
1272 | } |
1273 | release_sock(sk); |
1274 | |
1275 | mfi.num_subflows = sfcount; |
1276 | if (mptcp_put_full_info(mfi: &mfi, optval, copylen, optlen)) |
1277 | return -EFAULT; |
1278 | |
1279 | return 0; |
1280 | |
1281 | fail_release: |
1282 | release_sock(sk); |
1283 | return -EFAULT; |
1284 | } |
1285 | |
1286 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1287 | int __user *optlen, int val) |
1288 | { |
1289 | int len; |
1290 | |
1291 | if (get_user(len, optlen)) |
1292 | return -EFAULT; |
1293 | if (len < 0) |
1294 | return -EINVAL; |
1295 | |
1296 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1297 | unsigned char ucval = (unsigned char)val; |
1298 | |
1299 | len = 1; |
1300 | if (put_user(len, optlen)) |
1301 | return -EFAULT; |
1302 | if (copy_to_user(to: optval, from: &ucval, n: 1)) |
1303 | return -EFAULT; |
1304 | } else { |
1305 | len = min_t(unsigned int, len, sizeof(int)); |
1306 | if (put_user(len, optlen)) |
1307 | return -EFAULT; |
1308 | if (copy_to_user(to: optval, from: &val, n: len)) |
1309 | return -EFAULT; |
1310 | } |
1311 | |
1312 | return 0; |
1313 | } |
1314 | |
1315 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1316 | char __user *optval, int __user *optlen) |
1317 | { |
1318 | switch (optname) { |
1319 | case TCP_ULP: |
1320 | case TCP_CONGESTION: |
1321 | case TCP_INFO: |
1322 | case TCP_CC_INFO: |
1323 | case TCP_DEFER_ACCEPT: |
1324 | case TCP_FASTOPEN: |
1325 | case TCP_FASTOPEN_CONNECT: |
1326 | case TCP_FASTOPEN_KEY: |
1327 | case TCP_FASTOPEN_NO_COOKIE: |
1328 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1329 | optval, optlen); |
1330 | case TCP_INQ: |
1331 | return mptcp_put_int_option(msk, optval, optlen, val: msk->recvmsg_inq); |
1332 | case TCP_CORK: |
1333 | return mptcp_put_int_option(msk, optval, optlen, val: msk->cork); |
1334 | case TCP_NODELAY: |
1335 | return mptcp_put_int_option(msk, optval, optlen, val: msk->nodelay); |
1336 | } |
1337 | return -EOPNOTSUPP; |
1338 | } |
1339 | |
1340 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1341 | char __user *optval, int __user *optlen) |
1342 | { |
1343 | struct sock *sk = (void *)msk; |
1344 | |
1345 | switch (optname) { |
1346 | case IP_TOS: |
1347 | return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); |
1348 | } |
1349 | |
1350 | return -EOPNOTSUPP; |
1351 | } |
1352 | |
1353 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1354 | char __user *optval, int __user *optlen) |
1355 | { |
1356 | switch (optname) { |
1357 | case MPTCP_INFO: |
1358 | return mptcp_getsockopt_info(msk, optval, optlen); |
1359 | case MPTCP_FULL_INFO: |
1360 | return mptcp_getsockopt_full_info(msk, optval, optlen); |
1361 | case MPTCP_TCPINFO: |
1362 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); |
1363 | case MPTCP_SUBFLOW_ADDRS: |
1364 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); |
1365 | } |
1366 | |
1367 | return -EOPNOTSUPP; |
1368 | } |
1369 | |
1370 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1371 | char __user *optval, int __user *option) |
1372 | { |
1373 | struct mptcp_sock *msk = mptcp_sk(sk); |
1374 | struct sock *ssk; |
1375 | |
1376 | pr_debug("msk=%p" , msk); |
1377 | |
1378 | /* @@ the meaning of setsockopt() when the socket is connected and |
1379 | * there are multiple subflows is not yet defined. It is up to the |
1380 | * MPTCP-level socket to configure the subflows until the subflow |
1381 | * is in TCP fallback, when socket options are passed through |
1382 | * to the one remaining subflow. |
1383 | */ |
1384 | lock_sock(sk); |
1385 | ssk = __mptcp_tcp_fallback(msk); |
1386 | release_sock(sk); |
1387 | if (ssk) |
1388 | return tcp_getsockopt(sk: ssk, level, optname, optval, optlen: option); |
1389 | |
1390 | if (level == SOL_IP) |
1391 | return mptcp_getsockopt_v4(msk, optname, optval, optlen: option); |
1392 | if (level == SOL_TCP) |
1393 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, optlen: option); |
1394 | if (level == SOL_MPTCP) |
1395 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, optlen: option); |
1396 | return -EOPNOTSUPP; |
1397 | } |
1398 | |
1399 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1400 | { |
1401 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1402 | struct sock *sk = (struct sock *)msk; |
1403 | |
1404 | if (ssk->sk_prot->keepalive) { |
1405 | if (sock_flag(sk, flag: SOCK_KEEPOPEN)) |
1406 | ssk->sk_prot->keepalive(ssk, 1); |
1407 | else |
1408 | ssk->sk_prot->keepalive(ssk, 0); |
1409 | } |
1410 | |
1411 | ssk->sk_priority = sk->sk_priority; |
1412 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1413 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; |
1414 | ssk->sk_ipv6only = sk->sk_ipv6only; |
1415 | __ip_sock_set_tos(sk: ssk, inet_sk(sk)->tos); |
1416 | |
1417 | if (sk->sk_userlocks & tx_rx_locks) { |
1418 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; |
1419 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { |
1420 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
1421 | mptcp_subflow_ctx(sk: ssk)->cached_sndbuf = sk->sk_sndbuf; |
1422 | } |
1423 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1424 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
1425 | } |
1426 | |
1427 | if (sock_flag(sk, flag: SOCK_LINGER)) { |
1428 | ssk->sk_lingertime = sk->sk_lingertime; |
1429 | sock_set_flag(sk: ssk, flag: SOCK_LINGER); |
1430 | } else { |
1431 | sock_reset_flag(sk: ssk, flag: SOCK_LINGER); |
1432 | } |
1433 | |
1434 | if (sk->sk_mark != ssk->sk_mark) { |
1435 | ssk->sk_mark = sk->sk_mark; |
1436 | sk_dst_reset(sk: ssk); |
1437 | } |
1438 | |
1439 | sock_valbool_flag(sk: ssk, bit: SOCK_DBG, valbool: sock_flag(sk, flag: SOCK_DBG)); |
1440 | |
1441 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(sk: ssk)->icsk_ca_ops) |
1442 | tcp_set_congestion_control(sk: ssk, name: msk->ca_name, load: false, cap_net_admin: true); |
1443 | __tcp_sock_set_cork(sk: ssk, on: !!msk->cork); |
1444 | __tcp_sock_set_nodelay(sk: ssk, on: !!msk->nodelay); |
1445 | |
1446 | inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); |
1447 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
1448 | } |
1449 | |
1450 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
1451 | { |
1452 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk: ssk); |
1453 | |
1454 | msk_owned_by_me(msk); |
1455 | |
1456 | ssk->sk_rcvlowat = 0; |
1457 | |
1458 | /* subflows must ignore any latency-related settings: will not affect |
1459 | * the user-space - only the msk is relevant - but will foul the |
1460 | * mptcp scheduler |
1461 | */ |
1462 | tcp_sk(ssk)->notsent_lowat = UINT_MAX; |
1463 | |
1464 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1465 | sync_socket_options(msk, ssk); |
1466 | |
1467 | subflow->setsockopt_seq = msk->setsockopt_seq; |
1468 | } |
1469 | } |
1470 | |
1471 | /* unfortunately this is different enough from the tcp version so |
1472 | * that we can't factor it out |
1473 | */ |
1474 | int mptcp_set_rcvlowat(struct sock *sk, int val) |
1475 | { |
1476 | struct mptcp_subflow_context *subflow; |
1477 | int space, cap; |
1478 | |
1479 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1480 | cap = sk->sk_rcvbuf >> 1; |
1481 | else |
1482 | cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; |
1483 | val = min(val, cap); |
1484 | WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
1485 | |
1486 | /* Check if we need to signal EPOLLIN right now */ |
1487 | if (mptcp_epollin_ready(sk)) |
1488 | sk->sk_data_ready(sk); |
1489 | |
1490 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1491 | return 0; |
1492 | |
1493 | space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win: val); |
1494 | if (space <= sk->sk_rcvbuf) |
1495 | return 0; |
1496 | |
1497 | /* propagate the rcvbuf changes to all the subflows */ |
1498 | WRITE_ONCE(sk->sk_rcvbuf, space); |
1499 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { |
1500 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1501 | bool slow; |
1502 | |
1503 | slow = lock_sock_fast(sk: ssk); |
1504 | WRITE_ONCE(ssk->sk_rcvbuf, space); |
1505 | tcp_sk(ssk)->window_clamp = val; |
1506 | unlock_sock_fast(sk: ssk, slow); |
1507 | } |
1508 | return 0; |
1509 | } |
1510 | |