1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
4 | * |
5 | * Socket Closing - normal and abnormal |
6 | * |
7 | * Copyright IBM Corp. 2016 |
8 | * |
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
10 | */ |
11 | |
12 | #include <linux/workqueue.h> |
13 | #include <linux/sched/signal.h> |
14 | |
15 | #include <net/sock.h> |
16 | #include <net/tcp.h> |
17 | |
18 | #include "smc.h" |
19 | #include "smc_tx.h" |
20 | #include "smc_cdc.h" |
21 | #include "smc_close.h" |
22 | |
23 | /* release the clcsock that is assigned to the smc_sock */ |
24 | void smc_clcsock_release(struct smc_sock *smc) |
25 | { |
26 | struct socket *tcp; |
27 | |
28 | if (smc->listen_smc && current_work() != &smc->smc_listen_work) |
29 | cancel_work_sync(work: &smc->smc_listen_work); |
30 | mutex_lock(&smc->clcsock_release_lock); |
31 | if (smc->clcsock) { |
32 | tcp = smc->clcsock; |
33 | smc->clcsock = NULL; |
34 | sock_release(sock: tcp); |
35 | } |
36 | mutex_unlock(lock: &smc->clcsock_release_lock); |
37 | } |
38 | |
39 | static void smc_close_cleanup_listen(struct sock *parent) |
40 | { |
41 | struct sock *sk; |
42 | |
43 | /* Close non-accepted connections */ |
44 | while ((sk = smc_accept_dequeue(parent, NULL))) |
45 | smc_close_non_accepted(sk); |
46 | } |
47 | |
48 | /* wait for sndbuf data being transmitted */ |
49 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
50 | { |
51 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
52 | struct sock *sk = &smc->sk; |
53 | |
54 | if (!timeout) |
55 | return; |
56 | |
57 | if (!smc_tx_prepared_sends(conn: &smc->conn)) |
58 | return; |
59 | |
60 | /* Send out corked data remaining in sndbuf */ |
61 | smc_tx_pending(conn: &smc->conn); |
62 | |
63 | smc->wait_close_tx_prepared = 1; |
64 | add_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
65 | while (!signal_pending(current) && timeout) { |
66 | int rc; |
67 | |
68 | rc = sk_wait_event(sk, &timeout, |
69 | !smc_tx_prepared_sends(&smc->conn) || |
70 | READ_ONCE(sk->sk_err) == ECONNABORTED || |
71 | READ_ONCE(sk->sk_err) == ECONNRESET || |
72 | smc->conn.killed, |
73 | &wait); |
74 | if (rc) |
75 | break; |
76 | } |
77 | remove_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
78 | smc->wait_close_tx_prepared = 0; |
79 | } |
80 | |
81 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
82 | { |
83 | if (smc->wait_close_tx_prepared) |
84 | /* wake up socket closing */ |
85 | smc->sk.sk_state_change(&smc->sk); |
86 | } |
87 | |
88 | static int smc_close_wr(struct smc_connection *conn) |
89 | { |
90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
91 | |
92 | return smc_cdc_get_slot_and_msg_send(conn); |
93 | } |
94 | |
95 | static int smc_close_final(struct smc_connection *conn) |
96 | { |
97 | if (atomic_read(v: &conn->bytes_to_rcv)) |
98 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
99 | else |
100 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
101 | if (conn->killed) |
102 | return -EPIPE; |
103 | |
104 | return smc_cdc_get_slot_and_msg_send(conn); |
105 | } |
106 | |
107 | int smc_close_abort(struct smc_connection *conn) |
108 | { |
109 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
110 | |
111 | return smc_cdc_get_slot_and_msg_send(conn); |
112 | } |
113 | |
114 | static void smc_close_cancel_work(struct smc_sock *smc) |
115 | { |
116 | struct sock *sk = &smc->sk; |
117 | |
118 | release_sock(sk); |
119 | if (cancel_work_sync(work: &smc->conn.close_work)) |
120 | sock_put(sk); |
121 | cancel_delayed_work_sync(dwork: &smc->conn.tx_work); |
122 | lock_sock(sk); |
123 | } |
124 | |
125 | /* terminate smc socket abnormally - active abort |
126 | * link group is terminated, i.e. RDMA communication no longer possible |
127 | */ |
128 | void smc_close_active_abort(struct smc_sock *smc) |
129 | { |
130 | struct sock *sk = &smc->sk; |
131 | bool release_clcsock = false; |
132 | |
133 | if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { |
134 | sk->sk_err = ECONNABORTED; |
135 | if (smc->clcsock && smc->clcsock->sk) |
136 | tcp_abort(sk: smc->clcsock->sk, ECONNABORTED); |
137 | } |
138 | switch (sk->sk_state) { |
139 | case SMC_ACTIVE: |
140 | case SMC_APPCLOSEWAIT1: |
141 | case SMC_APPCLOSEWAIT2: |
142 | sk->sk_state = SMC_PEERABORTWAIT; |
143 | smc_close_cancel_work(smc); |
144 | if (sk->sk_state != SMC_PEERABORTWAIT) |
145 | break; |
146 | sk->sk_state = SMC_CLOSED; |
147 | sock_put(sk); /* (postponed) passive closing */ |
148 | break; |
149 | case SMC_PEERCLOSEWAIT1: |
150 | case SMC_PEERCLOSEWAIT2: |
151 | case SMC_PEERFINCLOSEWAIT: |
152 | sk->sk_state = SMC_PEERABORTWAIT; |
153 | smc_close_cancel_work(smc); |
154 | if (sk->sk_state != SMC_PEERABORTWAIT) |
155 | break; |
156 | sk->sk_state = SMC_CLOSED; |
157 | smc_conn_free(conn: &smc->conn); |
158 | release_clcsock = true; |
159 | sock_put(sk); /* passive closing */ |
160 | break; |
161 | case SMC_PROCESSABORT: |
162 | case SMC_APPFINCLOSEWAIT: |
163 | sk->sk_state = SMC_PEERABORTWAIT; |
164 | smc_close_cancel_work(smc); |
165 | if (sk->sk_state != SMC_PEERABORTWAIT) |
166 | break; |
167 | sk->sk_state = SMC_CLOSED; |
168 | smc_conn_free(conn: &smc->conn); |
169 | release_clcsock = true; |
170 | break; |
171 | case SMC_INIT: |
172 | case SMC_PEERABORTWAIT: |
173 | case SMC_CLOSED: |
174 | break; |
175 | } |
176 | |
177 | smc_sock_set_flag(sk, flag: SOCK_DEAD); |
178 | sk->sk_state_change(sk); |
179 | |
180 | if (release_clcsock) { |
181 | release_sock(sk); |
182 | smc_clcsock_release(smc); |
183 | lock_sock(sk); |
184 | } |
185 | } |
186 | |
187 | static inline bool smc_close_sent_any_close(struct smc_connection *conn) |
188 | { |
189 | return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || |
190 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; |
191 | } |
192 | |
193 | int smc_close_active(struct smc_sock *smc) |
194 | { |
195 | struct smc_cdc_conn_state_flags *txflags = |
196 | &smc->conn.local_tx_ctrl.conn_state_flags; |
197 | struct smc_connection *conn = &smc->conn; |
198 | struct sock *sk = &smc->sk; |
199 | int old_state; |
200 | long timeout; |
201 | int rc = 0; |
202 | int rc1 = 0; |
203 | |
204 | timeout = current->flags & PF_EXITING ? |
205 | 0 : sock_flag(sk, flag: SOCK_LINGER) ? |
206 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
207 | |
208 | old_state = sk->sk_state; |
209 | again: |
210 | switch (sk->sk_state) { |
211 | case SMC_INIT: |
212 | sk->sk_state = SMC_CLOSED; |
213 | break; |
214 | case SMC_LISTEN: |
215 | sk->sk_state = SMC_CLOSED; |
216 | sk->sk_state_change(sk); /* wake up accept */ |
217 | if (smc->clcsock && smc->clcsock->sk) { |
218 | write_lock_bh(&smc->clcsock->sk->sk_callback_lock); |
219 | smc_clcsock_restore_cb(target_cb: &smc->clcsock->sk->sk_data_ready, |
220 | saved_cb: &smc->clcsk_data_ready); |
221 | smc->clcsock->sk->sk_user_data = NULL; |
222 | write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); |
223 | rc = kernel_sock_shutdown(sock: smc->clcsock, how: SHUT_RDWR); |
224 | } |
225 | smc_close_cleanup_listen(parent: sk); |
226 | release_sock(sk); |
227 | flush_work(work: &smc->tcp_listen_work); |
228 | lock_sock(sk); |
229 | break; |
230 | case SMC_ACTIVE: |
231 | smc_close_stream_wait(smc, timeout); |
232 | release_sock(sk); |
233 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
234 | lock_sock(sk); |
235 | if (sk->sk_state == SMC_ACTIVE) { |
236 | /* send close request */ |
237 | rc = smc_close_final(conn); |
238 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
239 | |
240 | /* actively shutdown clcsock before peer close it, |
241 | * prevent peer from entering TIME_WAIT state. |
242 | */ |
243 | if (smc->clcsock && smc->clcsock->sk) { |
244 | rc1 = kernel_sock_shutdown(sock: smc->clcsock, |
245 | how: SHUT_RDWR); |
246 | rc = rc ? rc : rc1; |
247 | } |
248 | } else { |
249 | /* peer event has changed the state */ |
250 | goto again; |
251 | } |
252 | break; |
253 | case SMC_APPFINCLOSEWAIT: |
254 | /* socket already shutdown wr or both (active close) */ |
255 | if (txflags->peer_done_writing && |
256 | !smc_close_sent_any_close(conn)) { |
257 | /* just shutdown wr done, send close request */ |
258 | rc = smc_close_final(conn); |
259 | } |
260 | sk->sk_state = SMC_CLOSED; |
261 | break; |
262 | case SMC_APPCLOSEWAIT1: |
263 | case SMC_APPCLOSEWAIT2: |
264 | if (!smc_cdc_rxed_any_close(conn)) |
265 | smc_close_stream_wait(smc, timeout); |
266 | release_sock(sk); |
267 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
268 | lock_sock(sk); |
269 | if (sk->sk_state != SMC_APPCLOSEWAIT1 && |
270 | sk->sk_state != SMC_APPCLOSEWAIT2) |
271 | goto again; |
272 | /* confirm close from peer */ |
273 | rc = smc_close_final(conn); |
274 | if (smc_cdc_rxed_any_close(conn)) { |
275 | /* peer has closed the socket already */ |
276 | sk->sk_state = SMC_CLOSED; |
277 | sock_put(sk); /* postponed passive closing */ |
278 | } else { |
279 | /* peer has just issued a shutdown write */ |
280 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
281 | } |
282 | break; |
283 | case SMC_PEERCLOSEWAIT1: |
284 | case SMC_PEERCLOSEWAIT2: |
285 | if (txflags->peer_done_writing && |
286 | !smc_close_sent_any_close(conn)) { |
287 | /* just shutdown wr done, send close request */ |
288 | rc = smc_close_final(conn); |
289 | } |
290 | /* peer sending PeerConnectionClosed will cause transition */ |
291 | break; |
292 | case SMC_PEERFINCLOSEWAIT: |
293 | /* peer sending PeerConnectionClosed will cause transition */ |
294 | break; |
295 | case SMC_PROCESSABORT: |
296 | rc = smc_close_abort(conn); |
297 | sk->sk_state = SMC_CLOSED; |
298 | break; |
299 | case SMC_PEERABORTWAIT: |
300 | sk->sk_state = SMC_CLOSED; |
301 | break; |
302 | case SMC_CLOSED: |
303 | /* nothing to do, add tracing in future patch */ |
304 | break; |
305 | } |
306 | |
307 | if (old_state != sk->sk_state) |
308 | sk->sk_state_change(sk); |
309 | return rc; |
310 | } |
311 | |
312 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
313 | { |
314 | struct smc_cdc_conn_state_flags *txflags = |
315 | &smc->conn.local_tx_ctrl.conn_state_flags; |
316 | struct sock *sk = &smc->sk; |
317 | |
318 | switch (sk->sk_state) { |
319 | case SMC_INIT: |
320 | case SMC_ACTIVE: |
321 | case SMC_APPCLOSEWAIT1: |
322 | sk->sk_state = SMC_PROCESSABORT; |
323 | sock_put(sk); /* passive closing */ |
324 | break; |
325 | case SMC_APPFINCLOSEWAIT: |
326 | sk->sk_state = SMC_PROCESSABORT; |
327 | break; |
328 | case SMC_PEERCLOSEWAIT1: |
329 | case SMC_PEERCLOSEWAIT2: |
330 | if (txflags->peer_done_writing && |
331 | !smc_close_sent_any_close(conn: &smc->conn)) |
332 | /* just shutdown, but not yet closed locally */ |
333 | sk->sk_state = SMC_PROCESSABORT; |
334 | else |
335 | sk->sk_state = SMC_CLOSED; |
336 | sock_put(sk); /* passive closing */ |
337 | break; |
338 | case SMC_APPCLOSEWAIT2: |
339 | case SMC_PEERFINCLOSEWAIT: |
340 | sk->sk_state = SMC_CLOSED; |
341 | sock_put(sk); /* passive closing */ |
342 | break; |
343 | case SMC_PEERABORTWAIT: |
344 | sk->sk_state = SMC_CLOSED; |
345 | break; |
346 | case SMC_PROCESSABORT: |
347 | /* nothing to do, add tracing in future patch */ |
348 | break; |
349 | } |
350 | } |
351 | |
352 | /* Either some kind of closing has been received: peer_conn_closed, |
353 | * peer_conn_abort, or peer_done_writing |
354 | * or the link group of the connection terminates abnormally. |
355 | */ |
356 | static void smc_close_passive_work(struct work_struct *work) |
357 | { |
358 | struct smc_connection *conn = container_of(work, |
359 | struct smc_connection, |
360 | close_work); |
361 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
362 | struct smc_cdc_conn_state_flags *rxflags; |
363 | bool release_clcsock = false; |
364 | struct sock *sk = &smc->sk; |
365 | int old_state; |
366 | |
367 | lock_sock(sk); |
368 | old_state = sk->sk_state; |
369 | |
370 | rxflags = &conn->local_rx_ctrl.conn_state_flags; |
371 | if (rxflags->peer_conn_abort) { |
372 | /* peer has not received all data */ |
373 | smc_close_passive_abort_received(smc); |
374 | release_sock(sk); |
375 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
376 | lock_sock(sk); |
377 | goto wakeup; |
378 | } |
379 | |
380 | switch (sk->sk_state) { |
381 | case SMC_INIT: |
382 | sk->sk_state = SMC_APPCLOSEWAIT1; |
383 | break; |
384 | case SMC_ACTIVE: |
385 | sk->sk_state = SMC_APPCLOSEWAIT1; |
386 | /* postpone sock_put() for passive closing to cover |
387 | * received SEND_SHUTDOWN as well |
388 | */ |
389 | break; |
390 | case SMC_PEERCLOSEWAIT1: |
391 | if (rxflags->peer_done_writing) |
392 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
393 | fallthrough; |
394 | /* to check for closing */ |
395 | case SMC_PEERCLOSEWAIT2: |
396 | if (!smc_cdc_rxed_any_close(conn)) |
397 | break; |
398 | if (sock_flag(sk, flag: SOCK_DEAD) && |
399 | smc_close_sent_any_close(conn)) { |
400 | /* smc_release has already been called locally */ |
401 | sk->sk_state = SMC_CLOSED; |
402 | } else { |
403 | /* just shutdown, but not yet closed locally */ |
404 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
405 | } |
406 | sock_put(sk); /* passive closing */ |
407 | break; |
408 | case SMC_PEERFINCLOSEWAIT: |
409 | if (smc_cdc_rxed_any_close(conn)) { |
410 | sk->sk_state = SMC_CLOSED; |
411 | sock_put(sk); /* passive closing */ |
412 | } |
413 | break; |
414 | case SMC_APPCLOSEWAIT1: |
415 | case SMC_APPCLOSEWAIT2: |
416 | /* postpone sock_put() for passive closing to cover |
417 | * received SEND_SHUTDOWN as well |
418 | */ |
419 | break; |
420 | case SMC_APPFINCLOSEWAIT: |
421 | case SMC_PEERABORTWAIT: |
422 | case SMC_PROCESSABORT: |
423 | case SMC_CLOSED: |
424 | /* nothing to do, add tracing in future patch */ |
425 | break; |
426 | } |
427 | |
428 | wakeup: |
429 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
430 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
431 | |
432 | if (old_state != sk->sk_state) { |
433 | sk->sk_state_change(sk); |
434 | if ((sk->sk_state == SMC_CLOSED) && |
435 | (sock_flag(sk, flag: SOCK_DEAD) || !sk->sk_socket)) { |
436 | smc_conn_free(conn); |
437 | if (smc->clcsock) |
438 | release_clcsock = true; |
439 | } |
440 | } |
441 | release_sock(sk); |
442 | if (release_clcsock) |
443 | smc_clcsock_release(smc); |
444 | sock_put(sk); /* sock_hold done by schedulers of close_work */ |
445 | } |
446 | |
447 | int smc_close_shutdown_write(struct smc_sock *smc) |
448 | { |
449 | struct smc_connection *conn = &smc->conn; |
450 | struct sock *sk = &smc->sk; |
451 | int old_state; |
452 | long timeout; |
453 | int rc = 0; |
454 | |
455 | timeout = current->flags & PF_EXITING ? |
456 | 0 : sock_flag(sk, flag: SOCK_LINGER) ? |
457 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
458 | |
459 | old_state = sk->sk_state; |
460 | again: |
461 | switch (sk->sk_state) { |
462 | case SMC_ACTIVE: |
463 | smc_close_stream_wait(smc, timeout); |
464 | release_sock(sk); |
465 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
466 | lock_sock(sk); |
467 | if (sk->sk_state != SMC_ACTIVE) |
468 | goto again; |
469 | /* send close wr request */ |
470 | rc = smc_close_wr(conn); |
471 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
472 | break; |
473 | case SMC_APPCLOSEWAIT1: |
474 | /* passive close */ |
475 | if (!smc_cdc_rxed_any_close(conn)) |
476 | smc_close_stream_wait(smc, timeout); |
477 | release_sock(sk); |
478 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
479 | lock_sock(sk); |
480 | if (sk->sk_state != SMC_APPCLOSEWAIT1) |
481 | goto again; |
482 | /* confirm close from peer */ |
483 | rc = smc_close_wr(conn); |
484 | sk->sk_state = SMC_APPCLOSEWAIT2; |
485 | break; |
486 | case SMC_APPCLOSEWAIT2: |
487 | case SMC_PEERFINCLOSEWAIT: |
488 | case SMC_PEERCLOSEWAIT1: |
489 | case SMC_PEERCLOSEWAIT2: |
490 | case SMC_APPFINCLOSEWAIT: |
491 | case SMC_PROCESSABORT: |
492 | case SMC_PEERABORTWAIT: |
493 | /* nothing to do, add tracing in future patch */ |
494 | break; |
495 | } |
496 | |
497 | if (old_state != sk->sk_state) |
498 | sk->sk_state_change(sk); |
499 | return rc; |
500 | } |
501 | |
502 | /* Initialize close properties on connection establishment. */ |
503 | void smc_close_init(struct smc_sock *smc) |
504 | { |
505 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); |
506 | } |
507 | |