1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Management of Tx window, Tx resend, ACKs and out-of-sequence reception |
3 | * |
4 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/circ_buf.h> |
12 | #include <linux/net.h> |
13 | #include <linux/skbuff.h> |
14 | #include <linux/slab.h> |
15 | #include <linux/udp.h> |
16 | #include <net/sock.h> |
17 | #include <net/af_rxrpc.h> |
18 | #include "ar-internal.h" |
19 | |
20 | /* |
21 | * Propose a PING ACK be sent. |
22 | */ |
23 | void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, |
24 | enum rxrpc_propose_ack_trace why) |
25 | { |
26 | ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay)); |
27 | ktime_t now = ktime_get_real(); |
28 | ktime_t ping_at = ktime_add(now, delay); |
29 | |
30 | trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); |
31 | if (ktime_before(cmp1: ping_at, cmp2: call->ping_at)) { |
32 | call->ping_at = ping_at; |
33 | trace_rxrpc_timer_set(call, delay, why: rxrpc_timer_trace_ping); |
34 | } |
35 | } |
36 | |
37 | /* |
38 | * Propose a DELAY ACK be sent in the future. |
39 | */ |
40 | void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, |
41 | enum rxrpc_propose_ack_trace why) |
42 | { |
43 | ktime_t now = ktime_get_real(), delay; |
44 | |
45 | trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); |
46 | |
47 | if (call->peer->srtt_us) |
48 | delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC; |
49 | else |
50 | delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay)); |
51 | ktime_add_ms(kt: delay, msec: call->tx_backoff); |
52 | |
53 | call->delay_ack_at = ktime_add(now, delay); |
54 | trace_rxrpc_timer_set(call, delay, why: rxrpc_timer_trace_delayed_ack); |
55 | } |
56 | |
57 | /* |
58 | * Handle congestion being detected by the retransmit timeout. |
59 | */ |
60 | static void rxrpc_congestion_timeout(struct rxrpc_call *call) |
61 | { |
62 | set_bit(nr: RXRPC_CALL_RETRANS_TIMEOUT, addr: &call->flags); |
63 | } |
64 | |
65 | /* |
66 | * Perform retransmission of NAK'd and unack'd packets. |
67 | */ |
68 | void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) |
69 | { |
70 | struct rxrpc_ackpacket *ack = NULL; |
71 | struct rxrpc_skb_priv *sp; |
72 | struct rxrpc_txbuf *txb; |
73 | rxrpc_seq_t transmitted = call->tx_transmitted; |
74 | ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(ns: call->peer->rto_us * NSEC_PER_USEC); |
75 | ktime_t resend_at = KTIME_MAX, now, delay; |
76 | bool unacked = false, did_send = false; |
77 | unsigned int i; |
78 | |
79 | _enter("{%d,%d}" , call->acks_hard_ack, call->tx_top); |
80 | |
81 | now = ktime_get_real(); |
82 | |
83 | if (list_empty(head: &call->tx_buffer)) |
84 | goto no_resend; |
85 | |
86 | trace_rxrpc_resend(call, ack: ack_skb); |
87 | txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); |
88 | |
89 | /* Scan the soft ACK table without dropping the lock and resend any |
90 | * explicitly NAK'd packets. |
91 | */ |
92 | if (ack_skb) { |
93 | sp = rxrpc_skb(ack_skb); |
94 | ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); |
95 | |
96 | for (i = 0; i < sp->ack.nr_acks; i++) { |
97 | rxrpc_seq_t seq; |
98 | |
99 | if (ack->acks[i] & 1) |
100 | continue; |
101 | seq = sp->ack.first_ack + i; |
102 | if (after(seq1: txb->seq, seq2: transmitted)) |
103 | break; |
104 | if (after(seq1: txb->seq, seq2: seq)) |
105 | continue; /* A new hard ACK probably came in */ |
106 | list_for_each_entry_from(txb, &call->tx_buffer, call_link) { |
107 | if (txb->seq == seq) |
108 | goto found_txb; |
109 | } |
110 | goto no_further_resend; |
111 | |
112 | found_txb: |
113 | resend_at = ktime_add(txb->last_sent, rto); |
114 | if (after(seq1: txb->serial, seq2: call->acks_highest_serial)) { |
115 | if (ktime_after(cmp1: resend_at, cmp2: now) && |
116 | ktime_before(cmp1: resend_at, cmp2: next_resend)) |
117 | next_resend = resend_at; |
118 | continue; /* Ack point not yet reached */ |
119 | } |
120 | |
121 | rxrpc_see_txbuf(txb, what: rxrpc_txbuf_see_unacked); |
122 | |
123 | trace_rxrpc_retransmit(call, seq: txb->seq, serial: txb->serial, |
124 | ktime_sub(resend_at, now)); |
125 | |
126 | txb->flags |= RXRPC_TXBUF_RESENT; |
127 | rxrpc_transmit_one(call, txb); |
128 | did_send = true; |
129 | now = ktime_get_real(); |
130 | |
131 | if (list_is_last(list: &txb->call_link, head: &call->tx_buffer)) |
132 | goto no_further_resend; |
133 | txb = list_next_entry(txb, call_link); |
134 | } |
135 | } |
136 | |
137 | /* Fast-forward through the Tx queue to the point the peer says it has |
138 | * seen. Anything between the soft-ACK table and that point will get |
139 | * ACK'd or NACK'd in due course, so don't worry about it here; here we |
140 | * need to consider retransmitting anything beyond that point. |
141 | */ |
142 | if (after_eq(seq1: call->acks_prev_seq, seq2: call->tx_transmitted)) |
143 | goto no_further_resend; |
144 | |
145 | list_for_each_entry_from(txb, &call->tx_buffer, call_link) { |
146 | resend_at = ktime_add(txb->last_sent, rto); |
147 | |
148 | if (before_eq(seq1: txb->seq, seq2: call->acks_prev_seq)) |
149 | continue; |
150 | if (after(seq1: txb->seq, seq2: call->tx_transmitted)) |
151 | break; /* Not transmitted yet */ |
152 | |
153 | if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && |
154 | before(seq1: txb->serial, ntohl(ack->serial))) |
155 | goto do_resend; /* Wasn't accounted for by a more recent ping. */ |
156 | |
157 | if (ktime_after(cmp1: resend_at, cmp2: now)) { |
158 | if (ktime_before(cmp1: resend_at, cmp2: next_resend)) |
159 | next_resend = resend_at; |
160 | continue; |
161 | } |
162 | |
163 | do_resend: |
164 | unacked = true; |
165 | |
166 | txb->flags |= RXRPC_TXBUF_RESENT; |
167 | rxrpc_transmit_one(call, txb); |
168 | did_send = true; |
169 | rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); |
170 | now = ktime_get_real(); |
171 | } |
172 | |
173 | no_further_resend: |
174 | no_resend: |
175 | if (resend_at < KTIME_MAX) { |
176 | delay = rxrpc_get_rto_backoff(peer: call->peer, retrans: did_send); |
177 | resend_at = ktime_add(resend_at, delay); |
178 | trace_rxrpc_timer_set(call, delay: resend_at - now, why: rxrpc_timer_trace_resend_reset); |
179 | } |
180 | call->resend_at = resend_at; |
181 | |
182 | if (unacked) |
183 | rxrpc_congestion_timeout(call); |
184 | |
185 | /* If there was nothing that needed retransmission then it's likely |
186 | * that an ACK got lost somewhere. Send a ping to find out instead of |
187 | * retransmitting data. |
188 | */ |
189 | if (!did_send) { |
190 | ktime_t next_ping = ktime_add_us(kt: call->acks_latest_ts, |
191 | usec: call->peer->srtt_us >> 3); |
192 | |
193 | if (ktime_sub(next_ping, now) <= 0) |
194 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
195 | why: rxrpc_propose_ack_ping_for_0_retrans); |
196 | } |
197 | |
198 | _leave("" ); |
199 | } |
200 | |
201 | /* |
202 | * Start transmitting the reply to a service. This cancels the need to ACK the |
203 | * request if we haven't yet done so. |
204 | */ |
205 | static void rxrpc_begin_service_reply(struct rxrpc_call *call) |
206 | { |
207 | rxrpc_set_call_state(call, state: RXRPC_CALL_SERVER_SEND_REPLY); |
208 | if (call->ackr_reason == RXRPC_ACK_DELAY) |
209 | call->ackr_reason = 0; |
210 | call->delay_ack_at = KTIME_MAX; |
211 | trace_rxrpc_timer_can(call, why: rxrpc_timer_trace_delayed_ack); |
212 | } |
213 | |
214 | /* |
215 | * Close the transmission phase. After this point there is no more data to be |
216 | * transmitted in the call. |
217 | */ |
218 | static void rxrpc_close_tx_phase(struct rxrpc_call *call) |
219 | { |
220 | _debug("________awaiting reply/ACK__________" ); |
221 | |
222 | switch (__rxrpc_call_state(call)) { |
223 | case RXRPC_CALL_CLIENT_SEND_REQUEST: |
224 | rxrpc_set_call_state(call, state: RXRPC_CALL_CLIENT_AWAIT_REPLY); |
225 | break; |
226 | case RXRPC_CALL_SERVER_SEND_REPLY: |
227 | rxrpc_set_call_state(call, state: RXRPC_CALL_SERVER_AWAIT_ACK); |
228 | break; |
229 | default: |
230 | break; |
231 | } |
232 | } |
233 | |
234 | static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) |
235 | { |
236 | unsigned int winsize = min_t(unsigned int, call->tx_winsize, |
237 | call->cong_cwnd + call->cong_extra); |
238 | rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; |
239 | rxrpc_seq_t tx_top = call->tx_top; |
240 | int space; |
241 | |
242 | space = wtop - tx_top; |
243 | return space > 0; |
244 | } |
245 | |
246 | /* |
247 | * Decant some if the sendmsg prepared queue into the transmission buffer. |
248 | */ |
249 | static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) |
250 | { |
251 | struct rxrpc_txbuf *txb; |
252 | |
253 | if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { |
254 | if (list_empty(head: &call->tx_sendmsg)) |
255 | return; |
256 | rxrpc_expose_client_call(call); |
257 | } |
258 | |
259 | while ((txb = list_first_entry_or_null(&call->tx_sendmsg, |
260 | struct rxrpc_txbuf, call_link))) { |
261 | spin_lock(lock: &call->tx_lock); |
262 | list_del(entry: &txb->call_link); |
263 | spin_unlock(lock: &call->tx_lock); |
264 | |
265 | call->tx_top = txb->seq; |
266 | list_add_tail(new: &txb->call_link, head: &call->tx_buffer); |
267 | |
268 | if (txb->flags & RXRPC_LAST_PACKET) |
269 | rxrpc_close_tx_phase(call); |
270 | |
271 | rxrpc_transmit_one(call, txb); |
272 | |
273 | if (!rxrpc_tx_window_has_space(call)) |
274 | break; |
275 | } |
276 | } |
277 | |
278 | static void rxrpc_transmit_some_data(struct rxrpc_call *call) |
279 | { |
280 | switch (__rxrpc_call_state(call)) { |
281 | case RXRPC_CALL_SERVER_ACK_REQUEST: |
282 | if (list_empty(head: &call->tx_sendmsg)) |
283 | return; |
284 | rxrpc_begin_service_reply(call); |
285 | fallthrough; |
286 | |
287 | case RXRPC_CALL_SERVER_SEND_REPLY: |
288 | case RXRPC_CALL_CLIENT_SEND_REQUEST: |
289 | if (!rxrpc_tx_window_has_space(call)) |
290 | return; |
291 | if (list_empty(head: &call->tx_sendmsg)) { |
292 | rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); |
293 | return; |
294 | } |
295 | rxrpc_decant_prepared_tx(call); |
296 | break; |
297 | default: |
298 | return; |
299 | } |
300 | } |
301 | |
302 | /* |
303 | * Ping the other end to fill our RTT cache and to retrieve the rwind |
304 | * and MTU parameters. |
305 | */ |
306 | static void rxrpc_send_initial_ping(struct rxrpc_call *call) |
307 | { |
308 | if (call->peer->rtt_count < 3 || |
309 | ktime_before(cmp1: ktime_add_ms(kt: call->peer->rtt_last_req, msec: 1000), |
310 | cmp2: ktime_get_real())) |
311 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
312 | why: rxrpc_propose_ack_ping_for_params); |
313 | } |
314 | |
315 | /* |
316 | * Handle retransmission and deferred ACK/abort generation. |
317 | */ |
318 | bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) |
319 | { |
320 | ktime_t now, t; |
321 | bool resend = false; |
322 | s32 abort_code; |
323 | |
324 | rxrpc_see_call(call, rxrpc_call_see_input); |
325 | |
326 | //printk("\n--------------------\n"); |
327 | _enter("{%d,%s,%lx}" , |
328 | call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], |
329 | call->events); |
330 | |
331 | if (__rxrpc_call_is_complete(call)) |
332 | goto out; |
333 | |
334 | /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ |
335 | abort_code = smp_load_acquire(&call->send_abort); |
336 | if (abort_code) { |
337 | rxrpc_abort_call(call, seq: 0, abort_code: call->send_abort, error: call->send_abort_err, |
338 | why: call->send_abort_why); |
339 | goto out; |
340 | } |
341 | |
342 | if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) |
343 | goto out; |
344 | |
345 | if (skb) |
346 | rxrpc_input_call_packet(call, skb); |
347 | |
348 | /* If we see our async-event poke, check for timeout trippage. */ |
349 | now = ktime_get_real(); |
350 | t = ktime_sub(call->expect_rx_by, now); |
351 | if (t <= 0) { |
352 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_expect_rx); |
353 | goto expired; |
354 | } |
355 | |
356 | t = ktime_sub(call->expect_req_by, now); |
357 | if (t <= 0) { |
358 | call->expect_req_by = KTIME_MAX; |
359 | if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) { |
360 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_idle); |
361 | goto expired; |
362 | } |
363 | } |
364 | |
365 | t = ktime_sub(READ_ONCE(call->expect_term_by), now); |
366 | if (t <= 0) { |
367 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_hard); |
368 | goto expired; |
369 | } |
370 | |
371 | t = ktime_sub(call->delay_ack_at, now); |
372 | if (t <= 0) { |
373 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_delayed_ack); |
374 | call->delay_ack_at = KTIME_MAX; |
375 | rxrpc_send_ACK(call, RXRPC_ACK_DELAY, serial: 0, |
376 | why: rxrpc_propose_ack_delayed_ack); |
377 | } |
378 | |
379 | t = ktime_sub(call->ack_lost_at, now); |
380 | if (t <= 0) { |
381 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_lost_ack); |
382 | call->ack_lost_at = KTIME_MAX; |
383 | set_bit(nr: RXRPC_CALL_EV_ACK_LOST, addr: &call->events); |
384 | } |
385 | |
386 | t = ktime_sub(call->ping_at, now); |
387 | if (t <= 0) { |
388 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_ping); |
389 | call->ping_at = KTIME_MAX; |
390 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
391 | why: rxrpc_propose_ack_ping_for_keepalive); |
392 | } |
393 | |
394 | t = ktime_sub(call->resend_at, now); |
395 | if (t <= 0) { |
396 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_resend); |
397 | call->resend_at = KTIME_MAX; |
398 | resend = true; |
399 | } |
400 | |
401 | rxrpc_transmit_some_data(call); |
402 | |
403 | now = ktime_get_real(); |
404 | t = ktime_sub(call->keepalive_at, now); |
405 | if (t <= 0) { |
406 | trace_rxrpc_timer_exp(call, delay: t, why: rxrpc_timer_trace_keepalive); |
407 | call->keepalive_at = KTIME_MAX; |
408 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
409 | why: rxrpc_propose_ack_ping_for_keepalive); |
410 | } |
411 | |
412 | if (skb) { |
413 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); |
414 | |
415 | if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) |
416 | rxrpc_congestion_degrade(call); |
417 | } |
418 | |
419 | if (test_and_clear_bit(nr: RXRPC_CALL_EV_INITIAL_PING, addr: &call->events)) |
420 | rxrpc_send_initial_ping(call); |
421 | |
422 | /* Process events */ |
423 | if (test_and_clear_bit(nr: RXRPC_CALL_EV_ACK_LOST, addr: &call->events)) |
424 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
425 | why: rxrpc_propose_ack_ping_for_lost_ack); |
426 | |
427 | if (resend && |
428 | __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && |
429 | !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) |
430 | rxrpc_resend(call, NULL); |
431 | |
432 | if (test_and_clear_bit(nr: RXRPC_CALL_RX_IS_IDLE, addr: &call->flags)) |
433 | rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial: 0, |
434 | why: rxrpc_propose_ack_rx_idle); |
435 | |
436 | if (call->ackr_nr_unacked > 2) { |
437 | if (call->peer->rtt_count < 3) |
438 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
439 | why: rxrpc_propose_ack_ping_for_rtt); |
440 | else if (ktime_before(cmp1: ktime_add_ms(kt: call->peer->rtt_last_req, msec: 1000), |
441 | cmp2: ktime_get_real())) |
442 | rxrpc_send_ACK(call, RXRPC_ACK_PING, serial: 0, |
443 | why: rxrpc_propose_ack_ping_for_old_rtt); |
444 | else |
445 | rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial: 0, |
446 | why: rxrpc_propose_ack_input_data); |
447 | } |
448 | |
449 | /* Make sure the timer is restarted */ |
450 | if (!__rxrpc_call_is_complete(call)) { |
451 | ktime_t next = READ_ONCE(call->expect_term_by), delay; |
452 | |
453 | #define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; } |
454 | |
455 | set(call->expect_req_by); |
456 | set(call->expect_rx_by); |
457 | set(call->delay_ack_at); |
458 | set(call->ack_lost_at); |
459 | set(call->resend_at); |
460 | set(call->keepalive_at); |
461 | set(call->ping_at); |
462 | |
463 | now = ktime_get_real(); |
464 | delay = ktime_sub(next, now); |
465 | if (delay <= 0) { |
466 | rxrpc_poke_call(call, what: rxrpc_call_poke_timer_now); |
467 | } else { |
468 | unsigned long nowj = jiffies, delayj, nextj; |
469 | |
470 | delayj = max(nsecs_to_jiffies(delay), 1); |
471 | nextj = nowj + delayj; |
472 | if (time_before(nextj, call->timer.expires) || |
473 | !timer_pending(timer: &call->timer)) { |
474 | trace_rxrpc_timer_restart(call, delay, delayj); |
475 | timer_reduce(timer: &call->timer, expires: nextj); |
476 | } |
477 | } |
478 | } |
479 | |
480 | out: |
481 | if (__rxrpc_call_is_complete(call)) { |
482 | del_timer_sync(timer: &call->timer); |
483 | if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) |
484 | rxrpc_disconnect_call(call); |
485 | if (call->security) |
486 | call->security->free_call_crypto(call); |
487 | } |
488 | if (call->acks_hard_ack != call->tx_bottom) |
489 | rxrpc_shrink_call_tx_buffer(call); |
490 | _leave("" ); |
491 | return true; |
492 | |
493 | expired: |
494 | if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && |
495 | (int)call->conn->hi_serial - (int)call->rx_serial > 0) { |
496 | trace_rxrpc_call_reset(call); |
497 | rxrpc_abort_call(call, seq: 0, RX_CALL_DEAD, error: -ECONNRESET, |
498 | why: rxrpc_abort_call_reset); |
499 | } else { |
500 | rxrpc_abort_call(call, seq: 0, RX_CALL_TIMEOUT, error: -ETIME, |
501 | why: rxrpc_abort_call_timeout); |
502 | } |
503 | goto out; |
504 | } |
505 | |