1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Local endpoint object management |
3 | * |
4 | * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/net.h> |
12 | #include <linux/skbuff.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/udp.h> |
15 | #include <linux/ip.h> |
16 | #include <linux/hashtable.h> |
17 | #include <net/sock.h> |
18 | #include <net/udp.h> |
19 | #include <net/udp_tunnel.h> |
20 | #include <net/af_rxrpc.h> |
21 | #include "ar-internal.h" |
22 | |
23 | static void rxrpc_local_rcu(struct rcu_head *); |
24 | |
25 | /* |
26 | * Handle an ICMP/ICMP6 error turning up at the tunnel. Push it through the |
27 | * usual mechanism so that it gets parsed and presented through the UDP |
28 | * socket's error_report(). |
29 | */ |
30 | static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err, |
31 | __be16 port, u32 info, u8 *payload) |
32 | { |
33 | if (ip_hdr(skb)->version == IPVERSION) |
34 | return ip_icmp_error(sk, skb, err, port, info, payload); |
35 | if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) |
36 | return ipv6_icmp_error(sk, skb, err, port, info, payload); |
37 | } |
38 | |
39 | /* |
40 | * Set or clear the Don't Fragment flag on a socket. |
41 | */ |
42 | void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set) |
43 | { |
44 | if (set) |
45 | ip_sock_set_mtu_discover(sk: local->socket->sk, IP_PMTUDISC_DO); |
46 | else |
47 | ip_sock_set_mtu_discover(sk: local->socket->sk, IP_PMTUDISC_DONT); |
48 | } |
49 | |
50 | /* |
51 | * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, |
52 | * same or greater than. |
53 | * |
54 | * We explicitly don't compare the RxRPC service ID as we want to reject |
55 | * conflicting uses by differing services. Further, we don't want to share |
56 | * addresses with different options (IPv6), so we don't compare those bits |
57 | * either. |
58 | */ |
59 | static long rxrpc_local_cmp_key(const struct rxrpc_local *local, |
60 | const struct sockaddr_rxrpc *srx) |
61 | { |
62 | long diff; |
63 | |
64 | diff = ((local->srx.transport_type - srx->transport_type) ?: |
65 | (local->srx.transport_len - srx->transport_len) ?: |
66 | (local->srx.transport.family - srx->transport.family)); |
67 | if (diff != 0) |
68 | return diff; |
69 | |
70 | switch (srx->transport.family) { |
71 | case AF_INET: |
72 | /* If the choice of UDP port is left up to the transport, then |
73 | * the endpoint record doesn't match. |
74 | */ |
75 | return ((u16 __force)local->srx.transport.sin.sin_port - |
76 | (u16 __force)srx->transport.sin.sin_port) ?: |
77 | memcmp(p: &local->srx.transport.sin.sin_addr, |
78 | q: &srx->transport.sin.sin_addr, |
79 | size: sizeof(struct in_addr)); |
80 | #ifdef CONFIG_AF_RXRPC_IPV6 |
81 | case AF_INET6: |
82 | /* If the choice of UDP6 port is left up to the transport, then |
83 | * the endpoint record doesn't match. |
84 | */ |
85 | return ((u16 __force)local->srx.transport.sin6.sin6_port - |
86 | (u16 __force)srx->transport.sin6.sin6_port) ?: |
87 | memcmp(p: &local->srx.transport.sin6.sin6_addr, |
88 | q: &srx->transport.sin6.sin6_addr, |
89 | size: sizeof(struct in6_addr)); |
90 | #endif |
91 | default: |
92 | BUG(); |
93 | } |
94 | } |
95 | |
96 | static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) |
97 | { |
98 | struct rxrpc_local *local = |
99 | container_of(timer, struct rxrpc_local, client_conn_reap_timer); |
100 | |
101 | if (!local->kill_all_client_conns && |
102 | test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, addr: &local->client_conn_flags)) |
103 | rxrpc_wake_up_io_thread(local); |
104 | } |
105 | |
106 | /* |
107 | * Allocate a new local endpoint. |
108 | */ |
109 | static struct rxrpc_local *rxrpc_alloc_local(struct net *net, |
110 | const struct sockaddr_rxrpc *srx) |
111 | { |
112 | struct rxrpc_local *local; |
113 | u32 tmp; |
114 | |
115 | local = kzalloc(size: sizeof(struct rxrpc_local), GFP_KERNEL); |
116 | if (local) { |
117 | refcount_set(r: &local->ref, n: 1); |
118 | atomic_set(v: &local->active_users, i: 1); |
119 | local->net = net; |
120 | local->rxnet = rxrpc_net(net); |
121 | INIT_HLIST_NODE(h: &local->link); |
122 | init_completion(x: &local->io_thread_ready); |
123 | #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY |
124 | skb_queue_head_init(list: &local->rx_delay_queue); |
125 | #endif |
126 | skb_queue_head_init(list: &local->rx_queue); |
127 | INIT_LIST_HEAD(list: &local->conn_attend_q); |
128 | INIT_LIST_HEAD(list: &local->call_attend_q); |
129 | |
130 | local->client_bundles = RB_ROOT; |
131 | spin_lock_init(&local->client_bundles_lock); |
132 | local->kill_all_client_conns = false; |
133 | INIT_LIST_HEAD(list: &local->idle_client_conns); |
134 | timer_setup(&local->client_conn_reap_timer, |
135 | rxrpc_client_conn_reap_timeout, 0); |
136 | |
137 | spin_lock_init(&local->lock); |
138 | rwlock_init(&local->services_lock); |
139 | local->debug_id = atomic_inc_return(v: &rxrpc_debug_id); |
140 | memcpy(&local->srx, srx, sizeof(*srx)); |
141 | local->srx.srx_service = 0; |
142 | idr_init(idr: &local->conn_ids); |
143 | get_random_bytes(buf: &tmp, len: sizeof(tmp)); |
144 | tmp &= 0x3fffffff; |
145 | if (tmp == 0) |
146 | tmp = 1; |
147 | idr_set_cursor(idr: &local->conn_ids, val: tmp); |
148 | INIT_LIST_HEAD(list: &local->new_client_calls); |
149 | spin_lock_init(&local->client_call_lock); |
150 | |
151 | trace_rxrpc_local(local_debug_id: local->debug_id, op: rxrpc_local_new, ref: 1, usage: 1); |
152 | } |
153 | |
154 | _leave(" = %p" , local); |
155 | return local; |
156 | } |
157 | |
158 | /* |
159 | * create the local socket |
160 | * - must be called with rxrpc_local_mutex locked |
161 | */ |
162 | static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) |
163 | { |
164 | struct udp_tunnel_sock_cfg tuncfg = {NULL}; |
165 | struct sockaddr_rxrpc *srx = &local->srx; |
166 | struct udp_port_cfg udp_conf = {0}; |
167 | struct task_struct *io_thread; |
168 | struct sock *usk; |
169 | int ret; |
170 | |
171 | _enter("%p{%d,%d}" , |
172 | local, srx->transport_type, srx->transport.family); |
173 | |
174 | udp_conf.family = srx->transport.family; |
175 | udp_conf.use_udp_checksums = true; |
176 | if (udp_conf.family == AF_INET) { |
177 | udp_conf.local_ip = srx->transport.sin.sin_addr; |
178 | udp_conf.local_udp_port = srx->transport.sin.sin_port; |
179 | #if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) |
180 | } else { |
181 | udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; |
182 | udp_conf.local_udp_port = srx->transport.sin6.sin6_port; |
183 | udp_conf.use_udp6_tx_checksums = true; |
184 | udp_conf.use_udp6_rx_checksums = true; |
185 | #endif |
186 | } |
187 | ret = udp_sock_create(net, cfg: &udp_conf, sockp: &local->socket); |
188 | if (ret < 0) { |
189 | _leave(" = %d [socket]" , ret); |
190 | return ret; |
191 | } |
192 | |
193 | tuncfg.encap_type = UDP_ENCAP_RXRPC; |
194 | tuncfg.encap_rcv = rxrpc_encap_rcv; |
195 | tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; |
196 | tuncfg.sk_user_data = local; |
197 | setup_udp_tunnel_sock(net, sock: local->socket, sock_cfg: &tuncfg); |
198 | |
199 | /* set the socket up */ |
200 | usk = local->socket->sk; |
201 | usk->sk_error_report = rxrpc_error_report; |
202 | |
203 | switch (srx->transport.family) { |
204 | case AF_INET6: |
205 | /* we want to receive ICMPv6 errors */ |
206 | ip6_sock_set_recverr(sk: usk); |
207 | |
208 | /* Fall through and set IPv4 options too otherwise we don't get |
209 | * errors from IPv4 packets sent through the IPv6 socket. |
210 | */ |
211 | fallthrough; |
212 | case AF_INET: |
213 | /* we want to receive ICMP errors */ |
214 | ip_sock_set_recverr(sk: usk); |
215 | |
216 | /* we want to set the don't fragment bit */ |
217 | rxrpc_local_dont_fragment(local, set: true); |
218 | |
219 | /* We want receive timestamps. */ |
220 | sock_enable_timestamps(sk: usk); |
221 | break; |
222 | |
223 | default: |
224 | BUG(); |
225 | } |
226 | |
227 | io_thread = kthread_run(rxrpc_io_thread, local, |
228 | "krxrpcio/%u" , ntohs(udp_conf.local_udp_port)); |
229 | if (IS_ERR(ptr: io_thread)) { |
230 | ret = PTR_ERR(ptr: io_thread); |
231 | goto error_sock; |
232 | } |
233 | |
234 | wait_for_completion(&local->io_thread_ready); |
235 | local->io_thread = io_thread; |
236 | _leave(" = 0" ); |
237 | return 0; |
238 | |
239 | error_sock: |
240 | kernel_sock_shutdown(sock: local->socket, how: SHUT_RDWR); |
241 | local->socket->sk->sk_user_data = NULL; |
242 | sock_release(sock: local->socket); |
243 | local->socket = NULL; |
244 | return ret; |
245 | } |
246 | |
247 | /* |
248 | * Look up or create a new local endpoint using the specified local address. |
249 | */ |
250 | struct rxrpc_local *rxrpc_lookup_local(struct net *net, |
251 | const struct sockaddr_rxrpc *srx) |
252 | { |
253 | struct rxrpc_local *local; |
254 | struct rxrpc_net *rxnet = rxrpc_net(net); |
255 | struct hlist_node *cursor; |
256 | long diff; |
257 | int ret; |
258 | |
259 | _enter("{%d,%d,%pISp}" , |
260 | srx->transport_type, srx->transport.family, &srx->transport); |
261 | |
262 | mutex_lock(&rxnet->local_mutex); |
263 | |
264 | hlist_for_each(cursor, &rxnet->local_endpoints) { |
265 | local = hlist_entry(cursor, struct rxrpc_local, link); |
266 | |
267 | diff = rxrpc_local_cmp_key(local, srx); |
268 | if (diff != 0) |
269 | continue; |
270 | |
271 | /* Services aren't allowed to share transport sockets, so |
272 | * reject that here. It is possible that the object is dying - |
273 | * but it may also still have the local transport address that |
274 | * we want bound. |
275 | */ |
276 | if (srx->srx_service) { |
277 | local = NULL; |
278 | goto addr_in_use; |
279 | } |
280 | |
281 | /* Found a match. We want to replace a dying object. |
282 | * Attempting to bind the transport socket may still fail if |
283 | * we're attempting to use a local address that the dying |
284 | * object is still using. |
285 | */ |
286 | if (!rxrpc_use_local(local, rxrpc_local_use_lookup)) |
287 | break; |
288 | |
289 | goto found; |
290 | } |
291 | |
292 | local = rxrpc_alloc_local(net, srx); |
293 | if (!local) |
294 | goto nomem; |
295 | |
296 | ret = rxrpc_open_socket(local, net); |
297 | if (ret < 0) |
298 | goto sock_error; |
299 | |
300 | if (cursor) { |
301 | hlist_replace_rcu(old: cursor, new: &local->link); |
302 | cursor->pprev = NULL; |
303 | } else { |
304 | hlist_add_head_rcu(n: &local->link, h: &rxnet->local_endpoints); |
305 | } |
306 | |
307 | found: |
308 | mutex_unlock(lock: &rxnet->local_mutex); |
309 | _leave(" = %p" , local); |
310 | return local; |
311 | |
312 | nomem: |
313 | ret = -ENOMEM; |
314 | sock_error: |
315 | mutex_unlock(lock: &rxnet->local_mutex); |
316 | if (local) |
317 | call_rcu(head: &local->rcu, func: rxrpc_local_rcu); |
318 | _leave(" = %d" , ret); |
319 | return ERR_PTR(error: ret); |
320 | |
321 | addr_in_use: |
322 | mutex_unlock(lock: &rxnet->local_mutex); |
323 | _leave(" = -EADDRINUSE" ); |
324 | return ERR_PTR(error: -EADDRINUSE); |
325 | } |
326 | |
327 | /* |
328 | * Get a ref on a local endpoint. |
329 | */ |
330 | struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local, |
331 | enum rxrpc_local_trace why) |
332 | { |
333 | int r, u; |
334 | |
335 | u = atomic_read(v: &local->active_users); |
336 | __refcount_inc(r: &local->ref, oldp: &r); |
337 | trace_rxrpc_local(local_debug_id: local->debug_id, op: why, ref: r + 1, usage: u); |
338 | return local; |
339 | } |
340 | |
341 | /* |
342 | * Get a ref on a local endpoint unless its usage has already reached 0. |
343 | */ |
344 | struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local, |
345 | enum rxrpc_local_trace why) |
346 | { |
347 | int r, u; |
348 | |
349 | if (local && __refcount_inc_not_zero(r: &local->ref, oldp: &r)) { |
350 | u = atomic_read(v: &local->active_users); |
351 | trace_rxrpc_local(local_debug_id: local->debug_id, op: why, ref: r + 1, usage: u); |
352 | return local; |
353 | } |
354 | |
355 | return NULL; |
356 | } |
357 | |
358 | /* |
359 | * Drop a ref on a local endpoint. |
360 | */ |
361 | void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why) |
362 | { |
363 | unsigned int debug_id; |
364 | bool dead; |
365 | int r, u; |
366 | |
367 | if (local) { |
368 | debug_id = local->debug_id; |
369 | |
370 | u = atomic_read(v: &local->active_users); |
371 | dead = __refcount_dec_and_test(r: &local->ref, oldp: &r); |
372 | trace_rxrpc_local(local_debug_id: debug_id, op: why, ref: r, usage: u); |
373 | |
374 | if (dead) |
375 | call_rcu(head: &local->rcu, func: rxrpc_local_rcu); |
376 | } |
377 | } |
378 | |
379 | /* |
380 | * Start using a local endpoint. |
381 | */ |
382 | struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local, |
383 | enum rxrpc_local_trace why) |
384 | { |
385 | local = rxrpc_get_local_maybe(local, why: rxrpc_local_get_for_use); |
386 | if (!local) |
387 | return NULL; |
388 | |
389 | if (!__rxrpc_use_local(local, why)) { |
390 | rxrpc_put_local(local, why: rxrpc_local_put_for_use); |
391 | return NULL; |
392 | } |
393 | |
394 | return local; |
395 | } |
396 | |
397 | /* |
398 | * Cease using a local endpoint. Once the number of active users reaches 0, we |
399 | * start the closure of the transport in the I/O thread.. |
400 | */ |
401 | void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why) |
402 | { |
403 | unsigned int debug_id; |
404 | int r, u; |
405 | |
406 | if (local) { |
407 | debug_id = local->debug_id; |
408 | r = refcount_read(r: &local->ref); |
409 | u = atomic_dec_return(v: &local->active_users); |
410 | trace_rxrpc_local(local_debug_id: debug_id, op: why, ref: r, usage: u); |
411 | if (u == 0) |
412 | kthread_stop(k: local->io_thread); |
413 | } |
414 | } |
415 | |
416 | /* |
417 | * Destroy a local endpoint's socket and then hand the record to RCU to dispose |
418 | * of. |
419 | * |
420 | * Closing the socket cannot be done from bottom half context or RCU callback |
421 | * context because it might sleep. |
422 | */ |
423 | void rxrpc_destroy_local(struct rxrpc_local *local) |
424 | { |
425 | struct socket *socket = local->socket; |
426 | struct rxrpc_net *rxnet = local->rxnet; |
427 | |
428 | _enter("%d" , local->debug_id); |
429 | |
430 | local->dead = true; |
431 | |
432 | mutex_lock(&rxnet->local_mutex); |
433 | hlist_del_init_rcu(n: &local->link); |
434 | mutex_unlock(lock: &rxnet->local_mutex); |
435 | |
436 | rxrpc_clean_up_local_conns(local); |
437 | rxrpc_service_connection_reaper(&rxnet->service_conn_reaper); |
438 | ASSERT(!local->service); |
439 | |
440 | if (socket) { |
441 | local->socket = NULL; |
442 | kernel_sock_shutdown(sock: socket, how: SHUT_RDWR); |
443 | socket->sk->sk_user_data = NULL; |
444 | sock_release(sock: socket); |
445 | } |
446 | |
447 | /* At this point, there should be no more packets coming in to the |
448 | * local endpoint. |
449 | */ |
450 | #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY |
451 | rxrpc_purge_queue(&local->rx_delay_queue); |
452 | #endif |
453 | rxrpc_purge_queue(&local->rx_queue); |
454 | rxrpc_purge_client_connections(local); |
455 | if (local->tx_alloc.va) |
456 | __page_frag_cache_drain(virt_to_page(local->tx_alloc.va), |
457 | count: local->tx_alloc.pagecnt_bias); |
458 | } |
459 | |
460 | /* |
461 | * Destroy a local endpoint after the RCU grace period expires. |
462 | */ |
463 | static void rxrpc_local_rcu(struct rcu_head *rcu) |
464 | { |
465 | struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu); |
466 | |
467 | rxrpc_see_local(local, why: rxrpc_local_free); |
468 | kfree(objp: local); |
469 | } |
470 | |
471 | /* |
472 | * Verify the local endpoint list is empty by this point. |
473 | */ |
474 | void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) |
475 | { |
476 | struct rxrpc_local *local; |
477 | |
478 | _enter("" ); |
479 | |
480 | flush_workqueue(rxrpc_workqueue); |
481 | |
482 | if (!hlist_empty(h: &rxnet->local_endpoints)) { |
483 | mutex_lock(&rxnet->local_mutex); |
484 | hlist_for_each_entry(local, &rxnet->local_endpoints, link) { |
485 | pr_err("AF_RXRPC: Leaked local %p {%d}\n" , |
486 | local, refcount_read(&local->ref)); |
487 | } |
488 | mutex_unlock(lock: &rxnet->local_mutex); |
489 | BUG(); |
490 | } |
491 | } |
492 | |