1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * NET Generic infrastructure for Network protocols. |
4 | * |
5 | * Definitions for request_sock |
6 | * |
7 | * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> |
8 | * |
9 | * From code originally in include/net/tcp.h |
10 | */ |
11 | #ifndef _REQUEST_SOCK_H |
12 | #define _REQUEST_SOCK_H |
13 | |
14 | #include <linux/slab.h> |
15 | #include <linux/spinlock.h> |
16 | #include <linux/types.h> |
17 | #include <linux/bug.h> |
18 | #include <linux/refcount.h> |
19 | |
20 | #include <net/sock.h> |
21 | |
22 | struct request_sock; |
23 | struct sk_buff; |
24 | struct dst_entry; |
25 | struct proto; |
26 | |
27 | struct request_sock_ops { |
28 | int family; |
29 | unsigned int obj_size; |
30 | struct kmem_cache *slab; |
31 | char *slab_name; |
32 | int (*rtx_syn_ack)(const struct sock *sk, |
33 | struct request_sock *req); |
34 | void (*send_ack)(const struct sock *sk, struct sk_buff *skb, |
35 | struct request_sock *req); |
36 | void (*send_reset)(const struct sock *sk, |
37 | struct sk_buff *skb); |
38 | void (*destructor)(struct request_sock *req); |
39 | void (*syn_ack_timeout)(const struct request_sock *req); |
40 | }; |
41 | |
42 | int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); |
43 | |
44 | struct saved_syn { |
45 | u32 mac_hdrlen; |
46 | u32 network_hdrlen; |
47 | u32 tcp_hdrlen; |
48 | u8 data[]; |
49 | }; |
50 | |
51 | /* struct request_sock - mini sock to represent a connection request |
52 | */ |
53 | struct request_sock { |
54 | struct sock_common __req_common; |
55 | #define rsk_refcnt __req_common.skc_refcnt |
56 | #define rsk_hash __req_common.skc_hash |
57 | #define rsk_listener __req_common.skc_listener |
58 | #define rsk_window_clamp __req_common.skc_window_clamp |
59 | #define rsk_rcv_wnd __req_common.skc_rcv_wnd |
60 | |
61 | struct request_sock *dl_next; |
62 | u16 mss; |
63 | u8 num_retrans; /* number of retransmits */ |
64 | u8 syncookie:1; /* True if |
65 | * 1) tcpopts needs to be encoded in |
66 | * TS of SYN+ACK |
67 | * 2) ACK is validated by BPF kfunc. |
68 | */ |
69 | u8 num_timeout:7; /* number of timeouts */ |
70 | u32 ts_recent; |
71 | struct timer_list rsk_timer; |
72 | const struct request_sock_ops *rsk_ops; |
73 | struct sock *sk; |
74 | struct saved_syn *saved_syn; |
75 | u32 secid; |
76 | u32 peer_secid; |
77 | u32 timeout; |
78 | }; |
79 | |
80 | static inline struct request_sock *inet_reqsk(const struct sock *sk) |
81 | { |
82 | return (struct request_sock *)sk; |
83 | } |
84 | |
85 | static inline struct sock *req_to_sk(struct request_sock *req) |
86 | { |
87 | return (struct sock *)req; |
88 | } |
89 | |
90 | /** |
91 | * skb_steal_sock - steal a socket from an sk_buff |
92 | * @skb: sk_buff to steal the socket from |
93 | * @refcounted: is set to true if the socket is reference-counted |
94 | * @prefetched: is set to true if the socket was assigned from bpf |
95 | */ |
96 | static inline struct sock *skb_steal_sock(struct sk_buff *skb, |
97 | bool *refcounted, bool *prefetched) |
98 | { |
99 | struct sock *sk = skb->sk; |
100 | |
101 | if (!sk) { |
102 | *prefetched = false; |
103 | *refcounted = false; |
104 | return NULL; |
105 | } |
106 | |
107 | *prefetched = skb_sk_is_prefetched(skb); |
108 | if (*prefetched) { |
109 | #if IS_ENABLED(CONFIG_SYN_COOKIES) |
110 | if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { |
111 | struct request_sock *req = inet_reqsk(sk); |
112 | |
113 | *refcounted = false; |
114 | sk = req->rsk_listener; |
115 | req->rsk_listener = NULL; |
116 | return sk; |
117 | } |
118 | #endif |
119 | *refcounted = sk_is_refcounted(sk); |
120 | } else { |
121 | *refcounted = true; |
122 | } |
123 | |
124 | skb->destructor = NULL; |
125 | skb->sk = NULL; |
126 | return sk; |
127 | } |
128 | |
129 | static inline struct request_sock * |
130 | reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, |
131 | bool attach_listener) |
132 | { |
133 | struct request_sock *req; |
134 | |
135 | req = kmem_cache_alloc(cachep: ops->slab, GFP_ATOMIC | __GFP_NOWARN); |
136 | if (!req) |
137 | return NULL; |
138 | req->rsk_listener = NULL; |
139 | if (attach_listener) { |
140 | if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { |
141 | kmem_cache_free(s: ops->slab, objp: req); |
142 | return NULL; |
143 | } |
144 | req->rsk_listener = sk_listener; |
145 | } |
146 | req->rsk_ops = ops; |
147 | req_to_sk(req)->sk_prot = sk_listener->sk_prot; |
148 | sk_node_init(node: &req_to_sk(req)->sk_node); |
149 | sk_tx_queue_clear(sk: req_to_sk(req)); |
150 | req->saved_syn = NULL; |
151 | req->syncookie = 0; |
152 | req->timeout = 0; |
153 | req->num_timeout = 0; |
154 | req->num_retrans = 0; |
155 | req->sk = NULL; |
156 | refcount_set(r: &req->rsk_refcnt, n: 0); |
157 | |
158 | return req; |
159 | } |
160 | |
161 | static inline void __reqsk_free(struct request_sock *req) |
162 | { |
163 | req->rsk_ops->destructor(req); |
164 | if (req->rsk_listener) |
165 | sock_put(sk: req->rsk_listener); |
166 | kfree(objp: req->saved_syn); |
167 | kmem_cache_free(s: req->rsk_ops->slab, objp: req); |
168 | } |
169 | |
170 | static inline void reqsk_free(struct request_sock *req) |
171 | { |
172 | WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); |
173 | __reqsk_free(req); |
174 | } |
175 | |
176 | static inline void reqsk_put(struct request_sock *req) |
177 | { |
178 | if (refcount_dec_and_test(r: &req->rsk_refcnt)) |
179 | reqsk_free(req); |
180 | } |
181 | |
182 | /* |
183 | * For a TCP Fast Open listener - |
184 | * lock - protects the access to all the reqsk, which is co-owned by |
185 | * the listener and the child socket. |
186 | * qlen - pending TFO requests (still in TCP_SYN_RECV). |
187 | * max_qlen - max TFO reqs allowed before TFO is disabled. |
188 | * |
189 | * XXX (TFO) - ideally these fields can be made as part of "listen_sock" |
190 | * structure above. But there is some implementation difficulty due to |
191 | * listen_sock being part of request_sock_queue hence will be freed when |
192 | * a listener is stopped. But TFO related fields may continue to be |
193 | * accessed even after a listener is closed, until its sk_refcnt drops |
194 | * to 0 implying no more outstanding TFO reqs. One solution is to keep |
195 | * listen_opt around until sk_refcnt drops to 0. But there is some other |
196 | * complexity that needs to be resolved. E.g., a listener can be disabled |
197 | * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. |
198 | */ |
199 | struct fastopen_queue { |
200 | struct request_sock *rskq_rst_head; /* Keep track of past TFO */ |
201 | struct request_sock *rskq_rst_tail; /* requests that caused RST. |
202 | * This is part of the defense |
203 | * against spoofing attack. |
204 | */ |
205 | spinlock_t lock; |
206 | int qlen; /* # of pending (TCP_SYN_RECV) reqs */ |
207 | int max_qlen; /* != 0 iff TFO is currently enabled */ |
208 | |
209 | struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ |
210 | }; |
211 | |
212 | /** struct request_sock_queue - queue of request_socks |
213 | * |
214 | * @rskq_accept_head - FIFO head of established children |
215 | * @rskq_accept_tail - FIFO tail of established children |
216 | * @rskq_defer_accept - User waits for some data after accept() |
217 | * |
218 | */ |
219 | struct request_sock_queue { |
220 | spinlock_t rskq_lock; |
221 | u8 rskq_defer_accept; |
222 | |
223 | u32 synflood_warned; |
224 | atomic_t qlen; |
225 | atomic_t young; |
226 | |
227 | struct request_sock *rskq_accept_head; |
228 | struct request_sock *rskq_accept_tail; |
229 | struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine |
230 | * if TFO is enabled. |
231 | */ |
232 | }; |
233 | |
234 | void reqsk_queue_alloc(struct request_sock_queue *queue); |
235 | |
236 | void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, |
237 | bool reset); |
238 | |
239 | static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) |
240 | { |
241 | return READ_ONCE(queue->rskq_accept_head) == NULL; |
242 | } |
243 | |
244 | static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, |
245 | struct sock *parent) |
246 | { |
247 | struct request_sock *req; |
248 | |
249 | spin_lock_bh(lock: &queue->rskq_lock); |
250 | req = queue->rskq_accept_head; |
251 | if (req) { |
252 | sk_acceptq_removed(sk: parent); |
253 | WRITE_ONCE(queue->rskq_accept_head, req->dl_next); |
254 | if (queue->rskq_accept_head == NULL) |
255 | queue->rskq_accept_tail = NULL; |
256 | } |
257 | spin_unlock_bh(lock: &queue->rskq_lock); |
258 | return req; |
259 | } |
260 | |
261 | static inline void reqsk_queue_removed(struct request_sock_queue *queue, |
262 | const struct request_sock *req) |
263 | { |
264 | if (req->num_timeout == 0) |
265 | atomic_dec(v: &queue->young); |
266 | atomic_dec(v: &queue->qlen); |
267 | } |
268 | |
269 | static inline void reqsk_queue_added(struct request_sock_queue *queue) |
270 | { |
271 | atomic_inc(v: &queue->young); |
272 | atomic_inc(v: &queue->qlen); |
273 | } |
274 | |
275 | static inline int reqsk_queue_len(const struct request_sock_queue *queue) |
276 | { |
277 | return atomic_read(v: &queue->qlen); |
278 | } |
279 | |
280 | static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) |
281 | { |
282 | return atomic_read(v: &queue->young); |
283 | } |
284 | |
285 | #endif /* _REQUEST_SOCK_H */ |
286 | |