1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|---|
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * Definitions for inet_sock |
8 | * |
9 | * Authors: Many, reorganised here by |
10 | * Arnaldo Carvalho de Melo <acme@mandriva.com> |
11 | */ |
12 | #ifndef _INET_SOCK_H |
13 | #define _INET_SOCK_H |
14 | |
15 | #include <linux/bitops.h> |
16 | #include <linux/string.h> |
17 | #include <linux/types.h> |
18 | #include <linux/jhash.h> |
19 | #include <linux/netdevice.h> |
20 | |
21 | #include <net/flow.h> |
22 | #include <net/inet_dscp.h> |
23 | #include <net/sock.h> |
24 | #include <net/request_sock.h> |
25 | #include <net/netns/hash.h> |
26 | #include <net/tcp_states.h> |
27 | #include <net/l3mdev.h> |
28 | |
29 | /** struct ip_options - IP Options |
30 | * |
31 | * @faddr - Saved first hop address |
32 | * @nexthop - Saved nexthop address in LSRR and SSRR |
33 | * @is_strictroute - Strict source route |
34 | * @srr_is_hit - Packet destination addr was our one |
35 | * @is_changed - IP checksum more not valid |
36 | * @rr_needaddr - Need to record addr of outgoing dev |
37 | * @ts_needtime - Need to record timestamp |
38 | * @ts_needaddr - Need to record addr of outgoing dev |
39 | */ |
40 | struct ip_options { |
41 | __be32 faddr; |
42 | __be32 nexthop; |
43 | unsigned char optlen; |
44 | unsigned char srr; |
45 | unsigned char rr; |
46 | unsigned char ts; |
47 | unsigned char is_strictroute:1, |
48 | srr_is_hit:1, |
49 | is_changed:1, |
50 | rr_needaddr:1, |
51 | ts_needtime:1, |
52 | ts_needaddr:1; |
53 | unsigned char router_alert; |
54 | unsigned char cipso; |
55 | unsigned char __pad2; |
56 | unsigned char __data[]; |
57 | }; |
58 | |
59 | struct ip_options_rcu { |
60 | struct rcu_head rcu; |
61 | struct ip_options opt; |
62 | }; |
63 | |
64 | struct ip_options_data { |
65 | struct ip_options_rcu opt; |
66 | char data[40]; |
67 | }; |
68 | |
69 | struct inet_request_sock { |
70 | struct request_sock req; |
71 | #define ir_loc_addr req.__req_common.skc_rcv_saddr |
72 | #define ir_rmt_addr req.__req_common.skc_daddr |
73 | #define ir_num req.__req_common.skc_num |
74 | #define ir_rmt_port req.__req_common.skc_dport |
75 | #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr |
76 | #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr |
77 | #define ir_iif req.__req_common.skc_bound_dev_if |
78 | #define ir_cookie req.__req_common.skc_cookie |
79 | #define ireq_net req.__req_common.skc_net |
80 | #define ireq_state req.__req_common.skc_state |
81 | #define ireq_family req.__req_common.skc_family |
82 | |
83 | u16 snd_wscale : 4, |
84 | rcv_wscale : 4, |
85 | tstamp_ok : 1, |
86 | sack_ok : 1, |
87 | wscale_ok : 1, |
88 | ecn_ok : 1, |
89 | acked : 1, |
90 | no_srccheck: 1, |
91 | smc_ok : 1; |
92 | u32 ir_mark; |
93 | union { |
94 | struct ip_options_rcu __rcu *ireq_opt; |
95 | #if IS_ENABLED(CONFIG_IPV6) |
96 | struct { |
97 | struct ipv6_txoptions *ipv6_opt; |
98 | struct sk_buff *pktopts; |
99 | }; |
100 | #endif |
101 | }; |
102 | }; |
103 | |
104 | static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) |
105 | { |
106 | return (struct inet_request_sock *)sk; |
107 | } |
108 | |
109 | static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) |
110 | { |
111 | u32 mark = READ_ONCE(sk->sk_mark); |
112 | |
113 | if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) |
114 | return skb->mark; |
115 | |
116 | return mark; |
117 | } |
118 | |
119 | static inline int inet_request_bound_dev_if(const struct sock *sk, |
120 | struct sk_buff *skb) |
121 | { |
122 | int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); |
123 | #ifdef CONFIG_NET_L3_MASTER_DEV |
124 | struct net *net = sock_net(sk); |
125 | |
126 | if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) |
127 | return l3mdev_master_ifindex_by_index(net, ifindex: skb->skb_iif); |
128 | #endif |
129 | |
130 | return bound_dev_if; |
131 | } |
132 | |
133 | static inline int inet_sk_bound_l3mdev(const struct sock *sk) |
134 | { |
135 | #ifdef CONFIG_NET_L3_MASTER_DEV |
136 | struct net *net = sock_net(sk); |
137 | |
138 | if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) |
139 | return l3mdev_master_ifindex_by_index(net, |
140 | ifindex: sk->sk_bound_dev_if); |
141 | #endif |
142 | |
143 | return 0; |
144 | } |
145 | |
146 | static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, |
147 | int dif, int sdif) |
148 | { |
149 | if (!bound_dev_if) |
150 | return !sdif || l3mdev_accept; |
151 | return bound_dev_if == dif || bound_dev_if == sdif; |
152 | } |
153 | |
154 | static inline bool inet_sk_bound_dev_eq(const struct net *net, |
155 | int bound_dev_if, |
156 | int dif, int sdif) |
157 | { |
158 | #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) |
159 | return inet_bound_dev_eq(l3mdev_accept: !!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), |
160 | bound_dev_if, dif, sdif); |
161 | #else |
162 | return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); |
163 | #endif |
164 | } |
165 | |
166 | struct inet_cork { |
167 | unsigned int flags; |
168 | __be32 addr; |
169 | struct ip_options *opt; |
170 | unsigned int fragsize; |
171 | int length; /* Total length of all frames */ |
172 | struct dst_entry *dst; |
173 | u8 tx_flags; |
174 | __u8 ttl; |
175 | __s16 tos; |
176 | u32 priority; |
177 | __u16 gso_size; |
178 | u32 ts_opt_id; |
179 | u64 transmit_time; |
180 | u32 mark; |
181 | }; |
182 | |
183 | struct inet_cork_full { |
184 | struct inet_cork base; |
185 | struct flowi fl; |
186 | }; |
187 | |
188 | struct ip_mc_socklist; |
189 | struct ipv6_pinfo; |
190 | struct rtable; |
191 | |
192 | /** struct inet_sock - representation of INET sockets |
193 | * |
194 | * @sk - ancestor class |
195 | * @pinet6 - pointer to IPv6 control block |
196 | * @inet_daddr - Foreign IPv4 addr |
197 | * @inet_rcv_saddr - Bound local IPv4 addr |
198 | * @inet_dport - Destination port |
199 | * @inet_num - Local port |
200 | * @inet_flags - various atomic flags |
201 | * @inet_saddr - Sending source |
202 | * @uc_ttl - Unicast TTL |
203 | * @inet_sport - Source port |
204 | * @inet_id - ID counter for DF pkts |
205 | * @tos - TOS |
206 | * @mc_ttl - Multicasting TTL |
207 | * @uc_index - Unicast outgoing device index |
208 | * @mc_index - Multicast device index |
209 | * @mc_list - Group array |
210 | * @cork - info to build ip hdr on each ip frag while socket is corked |
211 | */ |
212 | struct inet_sock { |
213 | /* sk and pinet6 has to be the first two members of inet_sock */ |
214 | struct sock sk; |
215 | #if IS_ENABLED(CONFIG_IPV6) |
216 | struct ipv6_pinfo *pinet6; |
217 | #endif |
218 | /* Socket demultiplex comparisons on incoming packets. */ |
219 | #define inet_daddr sk.__sk_common.skc_daddr |
220 | #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr |
221 | #define inet_dport sk.__sk_common.skc_dport |
222 | #define inet_num sk.__sk_common.skc_num |
223 | |
224 | unsigned long inet_flags; |
225 | __be32 inet_saddr; |
226 | __s16 uc_ttl; |
227 | __be16 inet_sport; |
228 | struct ip_options_rcu __rcu *inet_opt; |
229 | atomic_t inet_id; |
230 | |
231 | __u8 tos; |
232 | __u8 min_ttl; |
233 | __u8 mc_ttl; |
234 | __u8 pmtudisc; |
235 | __u8 rcv_tos; |
236 | __u8 convert_csum; |
237 | int uc_index; |
238 | int mc_index; |
239 | __be32 mc_addr; |
240 | u32 local_port_range; /* high << 16 | low */ |
241 | |
242 | struct ip_mc_socklist __rcu *mc_list; |
243 | struct inet_cork_full cork; |
244 | }; |
245 | |
246 | #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ |
247 | #define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ |
248 | |
249 | enum { |
250 | INET_FLAGS_PKTINFO = 0, |
251 | INET_FLAGS_TTL = 1, |
252 | INET_FLAGS_TOS = 2, |
253 | INET_FLAGS_RECVOPTS = 3, |
254 | INET_FLAGS_RETOPTS = 4, |
255 | INET_FLAGS_PASSSEC = 5, |
256 | INET_FLAGS_ORIGDSTADDR = 6, |
257 | INET_FLAGS_CHECKSUM = 7, |
258 | INET_FLAGS_RECVFRAGSIZE = 8, |
259 | |
260 | INET_FLAGS_RECVERR = 9, |
261 | INET_FLAGS_RECVERR_RFC4884 = 10, |
262 | INET_FLAGS_FREEBIND = 11, |
263 | INET_FLAGS_HDRINCL = 12, |
264 | INET_FLAGS_MC_LOOP = 13, |
265 | INET_FLAGS_MC_ALL = 14, |
266 | INET_FLAGS_TRANSPARENT = 15, |
267 | INET_FLAGS_IS_ICSK = 16, |
268 | INET_FLAGS_NODEFRAG = 17, |
269 | INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, |
270 | INET_FLAGS_DEFER_CONNECT = 19, |
271 | INET_FLAGS_MC6_LOOP = 20, |
272 | INET_FLAGS_RECVERR6_RFC4884 = 21, |
273 | INET_FLAGS_MC6_ALL = 22, |
274 | INET_FLAGS_AUTOFLOWLABEL_SET = 23, |
275 | INET_FLAGS_AUTOFLOWLABEL = 24, |
276 | INET_FLAGS_DONTFRAG = 25, |
277 | INET_FLAGS_RECVERR6 = 26, |
278 | INET_FLAGS_REPFLOW = 27, |
279 | INET_FLAGS_RTALERT_ISOLATE = 28, |
280 | INET_FLAGS_SNDFLOW = 29, |
281 | INET_FLAGS_RTALERT = 30, |
282 | }; |
283 | |
284 | /* cmsg flags for inet */ |
285 | #define IP_CMSG_PKTINFO BIT(INET_FLAGS_PKTINFO) |
286 | #define IP_CMSG_TTL BIT(INET_FLAGS_TTL) |
287 | #define IP_CMSG_TOS BIT(INET_FLAGS_TOS) |
288 | #define IP_CMSG_RECVOPTS BIT(INET_FLAGS_RECVOPTS) |
289 | #define IP_CMSG_RETOPTS BIT(INET_FLAGS_RETOPTS) |
290 | #define IP_CMSG_PASSSEC BIT(INET_FLAGS_PASSSEC) |
291 | #define IP_CMSG_ORIGDSTADDR BIT(INET_FLAGS_ORIGDSTADDR) |
292 | #define IP_CMSG_CHECKSUM BIT(INET_FLAGS_CHECKSUM) |
293 | #define IP_CMSG_RECVFRAGSIZE BIT(INET_FLAGS_RECVFRAGSIZE) |
294 | |
295 | #define IP_CMSG_ALL (IP_CMSG_PKTINFO | IP_CMSG_TTL | \ |
296 | IP_CMSG_TOS | IP_CMSG_RECVOPTS | \ |
297 | IP_CMSG_RETOPTS | IP_CMSG_PASSSEC | \ |
298 | IP_CMSG_ORIGDSTADDR | IP_CMSG_CHECKSUM | \ |
299 | IP_CMSG_RECVFRAGSIZE) |
300 | |
301 | static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) |
302 | { |
303 | return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL; |
304 | } |
305 | |
306 | static inline dscp_t inet_sk_dscp(const struct inet_sock *inet) |
307 | { |
308 | return inet_dsfield_to_dscp(READ_ONCE(inet->tos)); |
309 | } |
310 | |
311 | #define inet_test_bit(nr, sk) \ |
312 | test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) |
313 | #define inet_set_bit(nr, sk) \ |
314 | set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) |
315 | #define inet_clear_bit(nr, sk) \ |
316 | clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) |
317 | #define inet_assign_bit(nr, sk, val) \ |
318 | assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) |
319 | |
320 | /** |
321 | * sk_to_full_sk - Access to a full socket |
322 | * @sk: pointer to a socket |
323 | * |
324 | * SYNACK messages might be attached to request sockets. |
325 | * Some places want to reach the listener in this case. |
326 | */ |
327 | static inline struct sock *sk_to_full_sk(struct sock *sk) |
328 | { |
329 | #ifdef CONFIG_INET |
330 | if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) |
331 | sk = inet_reqsk(sk)->rsk_listener; |
332 | if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) |
333 | sk = NULL; |
334 | #endif |
335 | return sk; |
336 | } |
337 | |
338 | /* sk_to_full_sk() variant with a const argument */ |
339 | static inline const struct sock *sk_const_to_full_sk(const struct sock *sk) |
340 | { |
341 | #ifdef CONFIG_INET |
342 | if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) |
343 | sk = ((const struct request_sock *)sk)->rsk_listener; |
344 | if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) |
345 | sk = NULL; |
346 | #endif |
347 | return sk; |
348 | } |
349 | |
350 | static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) |
351 | { |
352 | return sk_to_full_sk(sk: skb->sk); |
353 | } |
354 | |
355 | #define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk) |
356 | |
357 | static inline void __inet_sk_copy_descendant(struct sock *sk_to, |
358 | const struct sock *sk_from, |
359 | const int ancestor_size) |
360 | { |
361 | memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, |
362 | sk_from->sk_prot->obj_size - ancestor_size); |
363 | } |
364 | |
365 | int inet_sk_rebuild_header(struct sock *sk); |
366 | |
367 | /** |
368 | * inet_sk_state_load - read sk->sk_state for lockless contexts |
369 | * @sk: socket pointer |
370 | * |
371 | * Paired with inet_sk_state_store(). Used in places we don't hold socket lock: |
372 | * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... |
373 | */ |
374 | static inline int inet_sk_state_load(const struct sock *sk) |
375 | { |
376 | /* state change might impact lockless readers. */ |
377 | return smp_load_acquire(&sk->sk_state); |
378 | } |
379 | |
380 | /** |
381 | * inet_sk_state_store - update sk->sk_state |
382 | * @sk: socket pointer |
383 | * @newstate: new state |
384 | * |
385 | * Paired with inet_sk_state_load(). Should be used in contexts where |
386 | * state change might impact lockless readers. |
387 | */ |
388 | void inet_sk_state_store(struct sock *sk, int newstate); |
389 | |
390 | void inet_sk_set_state(struct sock *sk, int state); |
391 | |
392 | static inline unsigned int __inet_ehashfn(const __be32 laddr, |
393 | const __u16 lport, |
394 | const __be32 faddr, |
395 | const __be16 fport, |
396 | u32 initval) |
397 | { |
398 | return jhash_3words(a: (__force __u32) laddr, |
399 | b: (__force __u32) faddr, |
400 | c: ((__u32) lport) << 16 | (__force __u32)fport, |
401 | initval); |
402 | } |
403 | |
404 | struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, |
405 | struct sock *sk_listener, |
406 | bool attach_listener); |
407 | |
408 | static inline __u8 inet_sk_flowi_flags(const struct sock *sk) |
409 | { |
410 | __u8 flags = 0; |
411 | |
412 | if (inet_test_bit(TRANSPARENT, sk) || inet_test_bit(HDRINCL, sk)) |
413 | flags |= FLOWI_FLAG_ANYSRC; |
414 | return flags; |
415 | } |
416 | |
417 | static inline void inet_inc_convert_csum(struct sock *sk) |
418 | { |
419 | inet_sk(sk)->convert_csum++; |
420 | } |
421 | |
422 | static inline void inet_dec_convert_csum(struct sock *sk) |
423 | { |
424 | if (inet_sk(sk)->convert_csum > 0) |
425 | inet_sk(sk)->convert_csum--; |
426 | } |
427 | |
428 | static inline bool inet_get_convert_csum(struct sock *sk) |
429 | { |
430 | return !!inet_sk(sk)->convert_csum; |
431 | } |
432 | |
433 | |
434 | static inline bool inet_can_nonlocal_bind(struct net *net, |
435 | struct inet_sock *inet) |
436 | { |
437 | return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || |
438 | test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || |
439 | test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); |
440 | } |
441 | |
442 | static inline bool inet_addr_valid_or_nonlocal(struct net *net, |
443 | struct inet_sock *inet, |
444 | __be32 addr, |
445 | int addr_type) |
446 | { |
447 | return inet_can_nonlocal_bind(net, inet) || |
448 | addr == htonl(INADDR_ANY) || |
449 | addr_type == RTN_LOCAL || |
450 | addr_type == RTN_MULTICAST || |
451 | addr_type == RTN_BROADCAST; |
452 | } |
453 | |
454 | #endif /* _INET_SOCK_H */ |
455 |
Definitions
- ip_options
- ip_options_rcu
- ip_options_data
- inet_request_sock
- inet_rsk
- inet_request_mark
- inet_request_bound_dev_if
- inet_sk_bound_l3mdev
- inet_bound_dev_eq
- inet_sk_bound_dev_eq
- inet_cork
- inet_cork_full
- inet_sock
- inet_cmsg_flags
- inet_sk_dscp
- sk_to_full_sk
- sk_const_to_full_sk
- skb_to_full_sk
- __inet_sk_copy_descendant
- inet_sk_state_load
- __inet_ehashfn
- inet_sk_flowi_flags
- inet_inc_convert_csum
- inet_dec_convert_csum
- inet_get_convert_csum
- inet_can_nonlocal_bind
Improve your Profiling and Debugging skills
Find out more