sock.c source code [linux/net/core/sock.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* INET An implementation of the TCP/IP protocol suite for the LINUX
4	* operating system. INET is implemented using the BSD Socket
5	* interface as the means of communication with the user level.
6	*
7	* Generic socket support routines. Memory allocators, socket lock/release
8	* handler for protocols to use and generic option handler.
9	*
10	* Authors: Ross Biro
11	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12	* Florian La Roche, <flla@stud.uni-sb.de>
13	* Alan Cox, <A.Cox@swansea.ac.uk>
14	*
15	* Fixes:
16	* Alan Cox : Numerous verify_area() problems
17	* Alan Cox : Connecting on a connecting socket
18	* now returns an error for tcp.
19	* Alan Cox : sock->protocol is set correctly.
20	* and is not sometimes left as 0.
21	* Alan Cox : connect handles icmp errors on a
22	* connect properly. Unfortunately there
23	* is a restart syscall nasty there. I
24	* can't match BSD without hacking the C
25	* library. Ideas urgently sought!
26	* Alan Cox : Disallow bind() to addresses that are
27	* not ours - especially broadcast ones!!
28	* Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29	* Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30	* instead they leave that for the DESTROY timer.
31	* Alan Cox : Clean up error flag in accept
32	* Alan Cox : TCP ack handling is buggy, the DESTROY timer
33	* was buggy. Put a remove_sock() in the handler
34	* for memory when we hit 0. Also altered the timer
35	* code. The ACK stuff can wait and needs major
36	* TCP layer surgery.
37	* Alan Cox : Fixed TCP ack bug, removed remove sock
38	* and fixed timer/inet_bh race.
39	* Alan Cox : Added zapped flag for TCP
40	* Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41	* Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42	* Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43	* Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44	* Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45	* Rick Sladkey : Relaxed UDP rules for matching packets.
46	* C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47	* Pauline Middelink : identd support
48	* Alan Cox : Fixed connect() taking signals I think.
49	* Alan Cox : SO_LINGER supported
50	* Alan Cox : Error reporting fixes
51	* Anonymous : inet_create tidied up (sk->reuse setting)
52	* Alan Cox : inet sockets don't set sk->type!
53	* Alan Cox : Split socket option code
54	* Alan Cox : Callbacks
55	* Alan Cox : Nagle flag for Charles & Johannes stuff
56	* Alex : Removed restriction on inet fioctl
57	* Alan Cox : Splitting INET from NET core
58	* Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59	* Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60	* Alan Cox : Split IP from generic code
61	* Alan Cox : New kfree_skbmem()
62	* Alan Cox : Make SO_DEBUG superuser only.
63	* Alan Cox : Allow anyone to clear SO_DEBUG
64	* (compatibility fix)
65	* Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66	* Alan Cox : Allocator for a socket is settable.
67	* Alan Cox : SO_ERROR includes soft errors.
68	* Alan Cox : Allow NULL arguments on some SO_ opts
69	* Alan Cox : Generic socket allocation to make hooks
70	* easier (suggested by Craig Metz).
71	* Michael Pall : SO_ERROR returns positive errno again
72	* Steve Whitehouse: Added default destructor to free
73	* protocol private data.
74	* Steve Whitehouse: Added various other default routines
75	* common to several socket families.
76	* Chris Evans : Call suser() check last on F_SETOWN
77	* Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78	* Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79	* Andi Kleen : Fix write_space callback
80	* Chris Evans : Security fixes - signedness again
81	* Arnaldo C. Melo : cleanups, use skb_queue_purge
82	*
83	* To Fix:
84	*/
85
86	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87
88	#include <linux/unaligned.h>
89	#include <linux/capability.h>
90	#include <linux/errno.h>
91	#include <linux/errqueue.h>
92	#include <linux/types.h>
93	#include <linux/socket.h>
94	#include <linux/in.h>
95	#include <linux/kernel.h>
96	#include <linux/module.h>
97	#include <linux/proc_fs.h>
98	#include <linux/seq_file.h>
99	#include <linux/sched.h>
100	#include <linux/sched/mm.h>
101	#include <linux/timer.h>
102	#include <linux/string.h>
103	#include <linux/sockios.h>
104	#include <linux/net.h>
105	#include <linux/mm.h>
106	#include <linux/slab.h>
107	#include <linux/interrupt.h>
108	#include <linux/poll.h>
109	#include <linux/tcp.h>
110	#include <linux/udp.h>
111	#include <linux/init.h>
112	#include <linux/highmem.h>
113	#include <linux/user_namespace.h>
114	#include <linux/static_key.h>
115	#include <linux/memcontrol.h>
116	#include <linux/prefetch.h>
117	#include <linux/compat.h>
118	#include <linux/mroute.h>
119	#include <linux/mroute6.h>
120	#include <linux/icmpv6.h>
121
122	#include <linux/uaccess.h>
123
124	#include <linux/netdevice.h>
125	#include <net/protocol.h>
126	#include <linux/skbuff.h>
127	#include <linux/skbuff_ref.h>
128	#include <net/net_namespace.h>
129	#include <net/request_sock.h>
130	#include <net/sock.h>
131	#include <net/proto_memory.h>
132	#include <linux/net_tstamp.h>
133	#include <net/xfrm.h>
134	#include <linux/ipsec.h>
135	#include <net/cls_cgroup.h>
136	#include <net/netprio_cgroup.h>
137	#include <linux/sock_diag.h>
138
139	#include <linux/filter.h>
140	#include <net/sock_reuseport.h>
141	#include <net/bpf_sk_storage.h>
142
143	#include <trace/events/sock.h>
144
145	#include <net/tcp.h>
146	#include <net/busy_poll.h>
147	#include <net/phonet/phonet.h>
148
149	#include <linux/ethtool.h>
150
151	#include <uapi/linux/pidfd.h>
152
153	#include "dev.h"
154
155	static DEFINE_MUTEX(proto_list_mutex);
156	static LIST_HEAD(proto_list);
157
158	static void sock_def_write_space_wfree(struct sock *sk);
159	static void sock_def_write_space(struct sock *sk);
160
161	/**
162	* sk_ns_capable - General socket capability test
163	* @sk: Socket to use a capability on or through
164	* @user_ns: The user namespace of the capability to use
165	* @cap: The capability to use
166	*
167	* Test to see if the opener of the socket had when the socket was
168	* created and the current process has the capability @cap in the user
169	* namespace @user_ns.
170	*/
171	bool sk_ns_capable(const struct sock *sk,
172	struct user_namespace user_ns, int* cap)
173	{
174	return file_ns_capable(file: sk->sk_socket->file, ns: user_ns, cap) &&
175	ns_capable(ns: user_ns, cap);
176	}
177	EXPORT_SYMBOL(sk_ns_capable);
178
179	/**
180	* sk_capable - Socket global capability test
181	* @sk: Socket to use a capability on or through
182	* @cap: The global capability to use
183	*
184	* Test to see if the opener of the socket had when the socket was
185	* created and the current process has the capability @cap in all user
186	* namespaces.
187	*/
188	bool sk_capable(const struct sock sk, int* cap)
189	{
190	return sk_ns_capable(sk, &init_user_ns, cap);
191	}
192	EXPORT_SYMBOL(sk_capable);
193
194	/**
195	* sk_net_capable - Network namespace socket capability test
196	* @sk: Socket to use a capability on or through
197	* @cap: The capability to use
198	*
199	* Test to see if the opener of the socket had when the socket was created
200	* and the current process has the capability @cap over the network namespace
201	* the socket is a member of.
202	*/
203	bool sk_net_capable(const struct sock sk, int* cap)
204	{
205	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
206	}
207	EXPORT_SYMBOL(sk_net_capable);
208
209	/*
210	* Each address family might have different locking rules, so we have
211	* one slock key per address family and separate keys for internal and
212	* userspace sockets.
213	*/
214	static struct lock_class_key af_family_keys[AF_MAX];
215	static struct lock_class_key af_family_kern_keys[AF_MAX];
216	static struct lock_class_key af_family_slock_keys[AF_MAX];
217	static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
218
219	/*
220	* Make lock validator output more readable. (we pre-construct these
221	* strings build-time, so that runtime initialization of socket
222	* locks is fast):
223	*/
224
225	#define _sock_locks(x) \
226	x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
227	x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
228	x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
229	x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
230	x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
231	x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
232	x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
233	x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
234	x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
235	x "27" , x "28" , x "AF_CAN" , \
236	x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
237	x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
238	x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
239	x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
240	x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
241	x "AF_MCTP" , \
242	x "AF_MAX"
243
244	static const char *const af_family_key_strings[AF_MAX+`1`] = {
245	_sock_locks("sk_lock-")
246	};
247	static const char *const af_family_slock_key_strings[AF_MAX+`1`] = {
248	_sock_locks("slock-")
249	};
250	static const char *const af_family_clock_key_strings[AF_MAX+`1`] = {
251	_sock_locks("clock-")
252	};
253
254	static const char *const af_family_kern_key_strings[AF_MAX+`1`] = {
255	_sock_locks("k-sk_lock-")
256	};
257	static const char *const af_family_kern_slock_key_strings[AF_MAX+`1`] = {
258	_sock_locks("k-slock-")
259	};
260	static const char *const af_family_kern_clock_key_strings[AF_MAX+`1`] = {
261	_sock_locks("k-clock-")
262	};
263	static const char *const af_family_rlock_key_strings[AF_MAX+`1`] = {
264	_sock_locks("rlock-")
265	};
266	static const char *const af_family_wlock_key_strings[AF_MAX+`1`] = {
267	_sock_locks("wlock-")
268	};
269	static const char *const af_family_elock_key_strings[AF_MAX+`1`] = {
270	_sock_locks("elock-")
271	};
272
273	/*
274	* sk_callback_lock and sk queues locking rules are per-address-family,
275	* so split the lock classes by using a per-AF key:
276	*/
277	static struct lock_class_key af_callback_keys[AF_MAX];
278	static struct lock_class_key af_rlock_keys[AF_MAX];
279	static struct lock_class_key af_wlock_keys[AF_MAX];
280	static struct lock_class_key af_elock_keys[AF_MAX];
281	static struct lock_class_key af_kern_callback_keys[AF_MAX];
282
283	/ Run time adjustable parameters. /
284	__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
285	EXPORT_SYMBOL(sysctl_wmem_max);
286	__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
287	EXPORT_SYMBOL(sysctl_rmem_max);
288	__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
289	__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
290
291	DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
292	EXPORT_SYMBOL_GPL(memalloc_socks_key);
293
294	/**
295	* sk_set_memalloc - sets %SOCK_MEMALLOC
296	* @sk: socket to set it on
297	*
298	* Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
299	* It's the responsibility of the admin to adjust min_free_kbytes
300	* to meet the requirements
301	*/
302	void sk_set_memalloc(struct sock *sk)
303	{
304	sock_set_flag(sk, flag: SOCK_MEMALLOC);
305	sk->sk_allocation \|= __GFP_MEMALLOC;
306	static_branch_inc(&memalloc_socks_key);
307	}
308	EXPORT_SYMBOL_GPL(sk_set_memalloc);
309
310	void sk_clear_memalloc(struct sock *sk)
311	{
312	sock_reset_flag(sk, flag: SOCK_MEMALLOC);
313	sk->sk_allocation &= ~__GFP_MEMALLOC;
314	static_branch_dec(&memalloc_socks_key);
315
316	/*
317	* SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
318	* progress of swapping. SOCK_MEMALLOC may be cleared while
319	* it has rmem allocations due to the last swapfile being deactivated
320	* but there is a risk that the socket is unusable due to exceeding
321	* the rmem limits. Reclaim the reserves and obey rmem limits again.
322	*/
323	sk_mem_reclaim(sk);
324	}
325	EXPORT_SYMBOL_GPL(sk_clear_memalloc);
326
327	int __sk_backlog_rcv(struct sock sk, struct* sk_buff *skb)
328	{
329	int ret;
330	unsigned int noreclaim_flag;
331
332	/ these should have been dropped before queueing /
333	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
334
335	noreclaim_flag = memalloc_noreclaim_save();
336	ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
337	tcp_v6_do_rcv,
338	tcp_v4_do_rcv,
339	sk, skb);
340	memalloc_noreclaim_restore(flags: noreclaim_flag);
341
342	return ret;
343	}
344	EXPORT_SYMBOL(__sk_backlog_rcv);
345
346	void sk_error_report(struct sock *sk)
347	{
348	sk->sk_error_report(sk);
349
350	switch (sk->sk_family) {
351	case AF_INET:
352	fallthrough;
353	case AF_INET6:
354	trace_inet_sk_error_report(sk);
355	break;
356	default:
357	break;
358	}
359	}
360	EXPORT_SYMBOL(sk_error_report);
361
362	int sock_get_timeout(long timeo, void *optval, bool old_timeval)
363	{
364	struct __kernel_sock_timeval tv;
365
366	if (timeo == MAX_SCHEDULE_TIMEOUT) {
367	tv.tv_sec = `0`;
368	tv.tv_usec = `0`;
369	} else {
370	tv.tv_sec = timeo / HZ;
371	tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
372	}
373
374	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
375	struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
376	(struct* old_timeval32 *)optval = tv32;
377	return sizeof(tv32);
378	}
379
380	if (old_timeval) {
381	struct __kernel_old_timeval old_tv;
382	old_tv.tv_sec = tv.tv_sec;
383	old_tv.tv_usec = tv.tv_usec;
384	(struct* __kernel_old_timeval *)optval = old_tv;
385	return sizeof(old_tv);
386	}
387
388	(struct* __kernel_sock_timeval *)optval = tv;
389	return sizeof(tv);
390	}
391	EXPORT_SYMBOL(sock_get_timeout);
392
393	int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
394	sockptr_t optval, int optlen, bool old_timeval)
395	{
396	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
397	struct old_timeval32 tv32;
398
399	if (optlen < sizeof(tv32))
400	return -EINVAL;
401
402	if (copy_from_sockptr(dst: &tv32, src: optval, size: sizeof(tv32)))
403	return -EFAULT;
404	tv->tv_sec = tv32.tv_sec;
405	tv->tv_usec = tv32.tv_usec;
406	} else if (old_timeval) {
407	struct __kernel_old_timeval old_tv;
408
409	if (optlen < sizeof(old_tv))
410	return -EINVAL;
411	if (copy_from_sockptr(dst: &old_tv, src: optval, size: sizeof(old_tv)))
412	return -EFAULT;
413	tv->tv_sec = old_tv.tv_sec;
414	tv->tv_usec = old_tv.tv_usec;
415	} else {
416	if (optlen < sizeof(*tv))
417	return -EINVAL;
418	if (copy_from_sockptr(dst: tv, src: optval, size: sizeof(*tv)))
419	return -EFAULT;
420	}
421
422	return `0`;
423	}
424	EXPORT_SYMBOL(sock_copy_user_timeval);
425
426	static int sock_set_timeout(long timeo_p, sockptr_t optval, int* optlen,
427	bool old_timeval)
428	{
429	struct __kernel_sock_timeval tv;
430	int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
431	long val;
432
433	if (err)
434	return err;
435
436	if (tv.tv_usec < `0` \|\| tv.tv_usec >= USEC_PER_SEC)
437	return -EDOM;
438
439	if (tv.tv_sec < `0`) {
440	static int warned __read_mostly;
441
442	WRITE_ONCE(*timeo_p, `0`);
443	if (warned < `10` && net_ratelimit()) {
444	warned++;
445	pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
446	__func__, current->comm, task_pid_nr(current));
447	}
448	return `0`;
449	}
450	val = MAX_SCHEDULE_TIMEOUT;
451	if ((tv.tv_sec \|\| tv.tv_usec) &&
452	(tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - `1`)))
453	val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
454	USEC_PER_SEC / HZ);
455	WRITE_ONCE(*timeo_p, val);
456	return `0`;
457	}
458
459	static bool sk_set_prio_allowed(const struct sock sk, int* val)
460	{
461	return ((val >= TC_PRIO_BESTEFFORT && val <= TC_PRIO_INTERACTIVE) \|\|
462	sockopt_ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_RAW) \|\|
463	sockopt_ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_ADMIN));
464	}
465
466	static bool sock_needs_netstamp(const struct sock *sk)
467	{
468	switch (sk->sk_family) {
469	case AF_UNSPEC:
470	case AF_UNIX:
471	return false;
472	default:
473	return true;
474	}
475	}
476
477	static void sock_disable_timestamp(struct sock sk, unsigned* long flags)
478	{
479	if (sk->sk_flags & flags) {
480	sk->sk_flags &= ~flags;
481	if (sock_needs_netstamp(sk) &&
482	!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
483	net_disable_timestamp();
484	}
485	}
486
487
488	int __sock_queue_rcv_skb(struct sock sk, struct* sk_buff *skb)
489	{
490	unsigned long flags;
491	struct sk_buff_head *list = &sk->sk_receive_queue;
492
493	if (atomic_read(v: &sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
494	atomic_inc(v: &sk->sk_drops);
495	trace_sock_rcvqueue_full(sk, skb);
496	return -ENOMEM;
497	}
498
499	if (!sk_rmem_schedule(sk, skb, size: skb->truesize)) {
500	atomic_inc(v: &sk->sk_drops);
501	return -ENOBUFS;
502	}
503
504	skb->dev = NULL;
505	skb_set_owner_r(skb, sk);
506
507	/ we escape from rcu protected region, make sure we dont leak*
508	* a norefcounted dst
509	*/
510	skb_dst_force(skb);
511
512	spin_lock_irqsave(&list->lock, flags);
513	sock_skb_set_dropcount(sk, skb);
514	__skb_queue_tail(list, newsk: skb);
515	spin_unlock_irqrestore(lock: &list->lock, flags);
516
517	if (!sock_flag(sk, flag: SOCK_DEAD))
518	sk->sk_data_ready(sk);
519	return `0`;
520	}
521	EXPORT_SYMBOL(__sock_queue_rcv_skb);
522
523	int sock_queue_rcv_skb_reason(struct sock sk, struct* sk_buff *skb,
524	enum skb_drop_reason *reason)
525	{
526	enum skb_drop_reason drop_reason;
527	int err;
528
529	err = sk_filter(sk, skb);
530	if (err) {
531	drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
532	goto out;
533	}
534	err = __sock_queue_rcv_skb(sk, skb);
535	switch (err) {
536	case -ENOMEM:
537	drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
538	break;
539	case -ENOBUFS:
540	drop_reason = SKB_DROP_REASON_PROTO_MEM;
541	break;
542	default:
543	drop_reason = SKB_NOT_DROPPED_YET;
544	break;
545	}
546	out:
547	if (reason)
548	*reason = drop_reason;
549	return err;
550	}
551	EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
552
553	int __sk_receive_skb(struct sock sk, struct* sk_buff *skb,
554	const int nested, unsigned int trim_cap, bool refcounted)
555	{
556	int rc = NET_RX_SUCCESS;
557
558	if (sk_filter_trim_cap(sk, skb, cap: trim_cap))
559	goto discard_and_relse;
560
561	skb->dev = NULL;
562
563	if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
564	atomic_inc(v: &sk->sk_drops);
565	goto discard_and_relse;
566	}
567	if (nested)
568	bh_lock_sock_nested(sk);
569	else
570	bh_lock_sock(sk);
571	if (!sock_owned_by_user(sk)) {
572	/*
573	* trylock + unlock semantics:
574	*/
575	mutex_acquire(&sk->sk_lock.dep_map, `0`, `1`, _RET_IP_);
576
577	rc = sk_backlog_rcv(sk, skb);
578
579	mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
580	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
581	bh_unlock_sock(sk);
582	atomic_inc(v: &sk->sk_drops);
583	goto discard_and_relse;
584	}
585
586	bh_unlock_sock(sk);
587	out:
588	if (refcounted)
589	sock_put(sk);
590	return rc;
591	discard_and_relse:
592	kfree_skb(skb);
593	goto out;
594	}
595	EXPORT_SYMBOL(__sk_receive_skb);
596
597	INDIRECT_CALLABLE_DECLARE(struct dst_entry ip6_dst_check(struct* dst_entry *,
598	u32));
599	INDIRECT_CALLABLE_DECLARE(struct dst_entry ipv4_dst_check(struct* dst_entry *,
600	u32));
601	struct dst_entry __sk_dst_check(struct* sock *sk, u32 cookie)
602	{
603	struct dst_entry *dst = __sk_dst_get(sk);
604
605	if (dst && dst->obsolete &&
606	INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
607	dst, cookie) == NULL) {
608	sk_tx_queue_clear(sk);
609	WRITE_ONCE(sk->sk_dst_pending_confirm, `0`);
610	RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
611	dst_release(dst);
612	return NULL;
613	}
614
615	return dst;
616	}
617	EXPORT_SYMBOL(__sk_dst_check);
618
619	struct dst_entry sk_dst_check(struct* sock *sk, u32 cookie)
620	{
621	struct dst_entry *dst = sk_dst_get(sk);
622
623	if (dst && dst->obsolete &&
624	INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
625	dst, cookie) == NULL) {
626	sk_dst_reset(sk);
627	dst_release(dst);
628	return NULL;
629	}
630
631	return dst;
632	}
633	EXPORT_SYMBOL(sk_dst_check);
634
635	static int sock_bindtoindex_locked(struct sock sk, int* ifindex)
636	{
637	int ret = -ENOPROTOOPT;
638	#ifdef CONFIG_NETDEVICES
639	struct net *net = sock_net(sk);
640
641	/ Sorry... /
642	ret = -EPERM;
643	if (sk->sk_bound_dev_if && !ns_capable(ns: net->user_ns, CAP_NET_RAW))
644	goto out;
645
646	ret = -EINVAL;
647	if (ifindex < `0`)
648	goto out;
649
650	/ Paired with all READ_ONCE() done locklessly. /
651	WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
652
653	if (sk->sk_prot->rehash)
654	sk->sk_prot->rehash(sk);
655	sk_dst_reset(sk);
656
657	ret = `0`;
658
659	out:
660	#endif
661
662	return ret;
663	}
664
665	int sock_bindtoindex(struct sock sk, int* ifindex, bool lock_sk)
666	{
667	int ret;
668
669	if (lock_sk)
670	lock_sock(sk);
671	ret = sock_bindtoindex_locked(sk, ifindex);
672	if (lock_sk)
673	release_sock(sk);
674
675	return ret;
676	}
677	EXPORT_SYMBOL(sock_bindtoindex);
678
679	static int sock_setbindtodevice(struct sock sk, sockptr_t optval, int* optlen)
680	{
681	int ret = -ENOPROTOOPT;
682	#ifdef CONFIG_NETDEVICES
683	struct net *net = sock_net(sk);
684	char devname[IFNAMSIZ];
685	int index;
686
687	ret = -EINVAL;
688	if (optlen < `0`)
689	goto out;
690
691	/ Bind this socket to a particular device like "eth0",*
692	* as specified in the passed interface name. If the
693	* name is "" or the option length is zero the socket
694	* is not bound.
695	*/
696	if (optlen > IFNAMSIZ - `1`)
697	optlen = IFNAMSIZ - `1`;
698	memset(devname, `0`, sizeof(devname));
699
700	ret = -EFAULT;
701	if (copy_from_sockptr(dst: devname, src: optval, size: optlen))
702	goto out;
703
704	index = `0`;
705	if (devname[`0`] != `'\0'`) {
706	struct net_device *dev;
707
708	rcu_read_lock();
709	dev = dev_get_by_name_rcu(net, name: devname);
710	if (dev)
711	index = dev->ifindex;
712	rcu_read_unlock();
713	ret = -ENODEV;
714	if (!dev)
715	goto out;
716	}
717
718	sockopt_lock_sock(sk);
719	ret = sock_bindtoindex_locked(sk, ifindex: index);
720	sockopt_release_sock(sk);
721	out:
722	#endif
723
724	return ret;
725	}
726
727	static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
728	sockptr_t optlen, int len)
729	{
730	int ret = -ENOPROTOOPT;
731	#ifdef CONFIG_NETDEVICES
732	int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
733	struct net *net = sock_net(sk);
734	char devname[IFNAMSIZ];
735
736	if (bound_dev_if == `0`) {
737	len = `0`;
738	goto zero;
739	}
740
741	ret = -EINVAL;
742	if (len < IFNAMSIZ)
743	goto out;
744
745	ret = netdev_get_name(net, name: devname, ifindex: bound_dev_if);
746	if (ret)
747	goto out;
748
749	len = strlen(devname) + `1`;
750
751	ret = -EFAULT;
752	if (copy_to_sockptr(dst: optval, src: devname, size: len))
753	goto out;
754
755	zero:
756	ret = -EFAULT;
757	if (copy_to_sockptr(dst: optlen, src: &len, size: sizeof(int)))
758	goto out;
759
760	ret = `0`;
761
762	out:
763	#endif
764
765	return ret;
766	}
767
768	bool sk_mc_loop(const struct sock *sk)
769	{
770	if (dev_recursion_level())
771	return false;
772	if (!sk)
773	return true;
774	/ IPV6_ADDRFORM can change sk->sk_family under us. /
775	switch (READ_ONCE(sk->sk_family)) {
776	case AF_INET:
777	return inet_test_bit(MC_LOOP, sk);
778	#if IS_ENABLED(CONFIG_IPV6)
779	case AF_INET6:
780	return inet6_test_bit(MC6_LOOP, sk);
781	#endif
782	}
783	WARN_ON_ONCE(`1`);
784	return true;
785	}
786	EXPORT_SYMBOL(sk_mc_loop);
787
788	void sock_set_reuseaddr(struct sock *sk)
789	{
790	lock_sock(sk);
791	sk->sk_reuse = SK_CAN_REUSE;
792	release_sock(sk);
793	}
794	EXPORT_SYMBOL(sock_set_reuseaddr);
795
796	void sock_set_reuseport(struct sock *sk)
797	{
798	lock_sock(sk);
799	sk->sk_reuseport = true;
800	release_sock(sk);
801	}
802	EXPORT_SYMBOL(sock_set_reuseport);
803
804	void sock_no_linger(struct sock *sk)
805	{
806	lock_sock(sk);
807	WRITE_ONCE(sk->sk_lingertime, `0`);
808	sock_set_flag(sk, flag: SOCK_LINGER);
809	release_sock(sk);
810	}
811	EXPORT_SYMBOL(sock_no_linger);
812
813	void sock_set_priority(struct sock *sk, u32 priority)
814	{
815	WRITE_ONCE(sk->sk_priority, priority);
816	}
817	EXPORT_SYMBOL(sock_set_priority);
818
819	void sock_set_sndtimeo(struct sock *sk, s64 secs)
820	{
821	lock_sock(sk);
822	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - `1`)
823	WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
824	else
825	WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
826	release_sock(sk);
827	}
828	EXPORT_SYMBOL(sock_set_sndtimeo);
829
830	static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
831	{
832	sock_valbool_flag(sk, bit: SOCK_RCVTSTAMP, valbool: val);
833	sock_valbool_flag(sk, bit: SOCK_RCVTSTAMPNS, valbool: val && ns);
834	if (val) {
835	sock_valbool_flag(sk, bit: SOCK_TSTAMP_NEW, valbool: new);
836	sock_enable_timestamp(sk, flag: SOCK_TIMESTAMP);
837	}
838	}
839
840	void sock_enable_timestamps(struct sock *sk)
841	{
842	lock_sock(sk);
843	__sock_set_timestamps(sk, val: true, new: false, ns: true);
844	release_sock(sk);
845	}
846	EXPORT_SYMBOL(sock_enable_timestamps);
847
848	void sock_set_timestamp(struct sock sk, int* optname, bool valbool)
849	{
850	switch (optname) {
851	case SO_TIMESTAMP_OLD:
852	__sock_set_timestamps(sk, val: valbool, new: false, ns: false);
853	break;
854	case SO_TIMESTAMP_NEW:
855	__sock_set_timestamps(sk, val: valbool, new: true, ns: false);
856	break;
857	case SO_TIMESTAMPNS_OLD:
858	__sock_set_timestamps(sk, val: valbool, new: false, ns: true);
859	break;
860	case SO_TIMESTAMPNS_NEW:
861	__sock_set_timestamps(sk, val: valbool, new: true, ns: true);
862	break;
863	}
864	}
865
866	static int sock_timestamping_bind_phc(struct sock sk, int* phc_index)
867	{
868	struct net *net = sock_net(sk);
869	struct net_device *dev = NULL;
870	bool match = false;
871	int *vclock_index;
872	int i, num;
873
874	if (sk->sk_bound_dev_if)
875	dev = dev_get_by_index(net, ifindex: sk->sk_bound_dev_if);
876
877	if (!dev) {
878	pr_err("%s: sock not bind to device\n", __func__);
879	return -EOPNOTSUPP;
880	}
881
882	num = ethtool_get_phc_vclocks(dev, vclock_index: &vclock_index);
883	dev_put(dev);
884
885	for (i = `0`; i < num; i++) {
886	if (*(vclock_index + i) == phc_index) {
887	match = true;
888	break;
889	}
890	}
891
892	if (num > `0`)
893	kfree(objp: vclock_index);
894
895	if (!match)
896	return -EINVAL;
897
898	WRITE_ONCE(sk->sk_bind_phc, phc_index);
899
900	return `0`;
901	}
902
903	int sock_set_timestamping(struct sock sk, int* optname,
904	struct so_timestamping timestamping)
905	{
906	int val = timestamping.flags;
907	int ret;
908
909	if (val & ~SOF_TIMESTAMPING_MASK)
910	return -EINVAL;
911
912	if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
913	!(val & SOF_TIMESTAMPING_OPT_ID))
914	return -EINVAL;
915
916	if (val & SOF_TIMESTAMPING_OPT_ID &&
917	!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
918	if (sk_is_tcp(sk)) {
919	if ((`1` << sk->sk_state) &
920	(TCPF_CLOSE \| TCPF_LISTEN))
921	return -EINVAL;
922	if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
923	atomic_set(v: &sk->sk_tskey, tcp_sk(sk)->write_seq);
924	else
925	atomic_set(v: &sk->sk_tskey, tcp_sk(sk)->snd_una);
926	} else {
927	atomic_set(v: &sk->sk_tskey, i: `0`);
928	}
929	}
930
931	if (val & SOF_TIMESTAMPING_OPT_STATS &&
932	!(val & SOF_TIMESTAMPING_OPT_TSONLY))
933	return -EINVAL;
934
935	if (val & SOF_TIMESTAMPING_BIND_PHC) {
936	ret = sock_timestamping_bind_phc(sk, phc_index: timestamping.bind_phc);
937	if (ret)
938	return ret;
939	}
940
941	WRITE_ONCE(sk->sk_tsflags, val);
942	sock_valbool_flag(sk, bit: SOCK_TSTAMP_NEW, valbool: optname == SO_TIMESTAMPING_NEW);
943	sock_valbool_flag(sk, bit: SOCK_TIMESTAMPING_ANY, valbool: !!(val & TSFLAGS_ANY));
944
945	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
946	sock_enable_timestamp(sk,
947	flag: SOCK_TIMESTAMPING_RX_SOFTWARE);
948	else
949	sock_disable_timestamp(sk,
950	flags: (`1UL` << SOCK_TIMESTAMPING_RX_SOFTWARE));
951	return `0`;
952	}
953
954	#if defined(CONFIG_CGROUP_BPF)
955	void bpf_skops_tx_timestamping(struct sock sk, struct* sk_buff skb, int* op)
956	{
957	struct bpf_sock_ops_kern sock_ops;
958
959	memset(&sock_ops, `0`, offsetof(struct bpf_sock_ops_kern, temp));
960	sock_ops.op = op;
961	sock_ops.is_fullsock = `1`;
962	sock_ops.sk = sk;
963	bpf_skops_init_skb(skops: &sock_ops, skb, end_offset: `0`);
964	__cgroup_bpf_run_filter_sock_ops(sk, sock_ops: &sock_ops, atype: CGROUP_SOCK_OPS);
965	}
966	#endif
967
968	void sock_set_keepalive(struct sock *sk)
969	{
970	lock_sock(sk);
971	if (sk->sk_prot->keepalive)
972	sk->sk_prot->keepalive(sk, true);
973	sock_valbool_flag(sk, bit: SOCK_KEEPOPEN, valbool: true);
974	release_sock(sk);
975	}
976	EXPORT_SYMBOL(sock_set_keepalive);
977
978	static void __sock_set_rcvbuf(struct sock sk, int* val)
979	{
980	/ Ensure val * 2 fits into an int, to prevent max_t() from treating it*
981	* as a negative value.
982	*/
983	val = min_t(int, val, INT_MAX / `2`);
984	sk->sk_userlocks \|= SOCK_RCVBUF_LOCK;
985
986	/ We double it on the way in to account for "struct sk_buff" etc.*
987	* overhead. Applications assume that the SO_RCVBUF setting they make
988	* will allow that much actual data to be received on that socket.
989	*
990	* Applications are unaware that "struct sk_buff" and other overheads
991	* allocate from the receive buffer during socket buffer allocation.
992	*
993	* And after considering the possible alternatives, returning the value
994	* we actually used in getsockopt is the most desirable behavior.
995	*/
996	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * `2`, SOCK_MIN_RCVBUF));
997	}
998
999	void sock_set_rcvbuf(struct sock sk, int* val)
1000	{
1001	lock_sock(sk);
1002	__sock_set_rcvbuf(sk, val);
1003	release_sock(sk);
1004	}
1005	EXPORT_SYMBOL(sock_set_rcvbuf);
1006
1007	static void __sock_set_mark(struct sock *sk, u32 val)
1008	{
1009	if (val != sk->sk_mark) {
1010	WRITE_ONCE(sk->sk_mark, val);
1011	sk_dst_reset(sk);
1012	}
1013	}
1014
1015	void sock_set_mark(struct sock *sk, u32 val)
1016	{
1017	lock_sock(sk);
1018	__sock_set_mark(sk, val);
1019	release_sock(sk);
1020	}
1021	EXPORT_SYMBOL(sock_set_mark);
1022
1023	static void sock_release_reserved_memory(struct sock sk, int* bytes)
1024	{
1025	/ Round down bytes to multiple of pages /
1026	bytes = round_down(bytes, PAGE_SIZE);
1027
1028	WARN_ON(bytes > sk->sk_reserved_mem);
1029	WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
1030	sk_mem_reclaim(sk);
1031	}
1032
1033	static int sock_reserve_memory(struct sock sk, int* bytes)
1034	{
1035	long allocated;
1036	bool charged;
1037	int pages;
1038
1039	if (!mem_cgroup_sockets_enabled \|\| !sk->sk_memcg \|\| !sk_has_account(sk))
1040	return -EOPNOTSUPP;
1041
1042	if (!bytes)
1043	return `0`;
1044
1045	pages = sk_mem_pages(amt: bytes);
1046
1047	/ pre-charge to memcg /
1048	charged = mem_cgroup_charge_skmem(memcg: sk->sk_memcg, nr_pages: pages,
1049	GFP_KERNEL \| __GFP_RETRY_MAYFAIL);
1050	if (!charged)
1051	return -ENOMEM;
1052
1053	/ pre-charge to forward_alloc /
1054	sk_memory_allocated_add(sk, val: pages);
1055	allocated = sk_memory_allocated(sk);
1056	/ If the system goes into memory pressure with this*
1057	* precharge, give up and return error.
1058	*/
1059	if (allocated > sk_prot_mem_limits(sk, index: `1`)) {
1060	sk_memory_allocated_sub(sk, val: pages);
1061	mem_cgroup_uncharge_skmem(memcg: sk->sk_memcg, nr_pages: pages);
1062	return -ENOMEM;
1063	}
1064	sk_forward_alloc_add(sk, val: pages << PAGE_SHIFT);
1065
1066	WRITE_ONCE(sk->sk_reserved_mem,
1067	sk->sk_reserved_mem + (pages << PAGE_SHIFT));
1068
1069	return `0`;
1070	}
1071
1072	#ifdef CONFIG_PAGE_POOL
1073
1074	/ This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED*
1075	* in 1 syscall. The limit exists to limit the amount of memory the kernel
1076	* allocates to copy these tokens, and to prevent looping over the frags for
1077	* too long.
1078	*/
1079	#define MAX_DONTNEED_TOKENS 128
1080	#define MAX_DONTNEED_FRAGS 1024
1081
1082	static noinline_for_stack int
1083	sock_devmem_dontneed(struct sock sk, sockptr_t optval, unsigned* int optlen)
1084	{
1085	unsigned int num_tokens, i, j, k, netmem_num = `0`;
1086	struct dmabuf_token *tokens;
1087	int ret = `0`, num_frags = `0`;
1088	netmem_ref netmems[`16`];
1089
1090	if (!sk_is_tcp(sk))
1091	return -EBADF;
1092
1093	if (optlen % sizeof(*tokens) \|\|
1094	optlen > sizeof(tokens) MAX_DONTNEED_TOKENS)
1095	return -EINVAL;
1096
1097	num_tokens = optlen / sizeof(*tokens);
1098	tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
1099	if (!tokens)
1100	return -ENOMEM;
1101
1102	if (copy_from_sockptr(dst: tokens, src: optval, size: optlen)) {
1103	kvfree(addr: tokens);
1104	return -EFAULT;
1105	}
1106
1107	xa_lock_bh(&sk->sk_user_frags);
1108	for (i = `0`; i < num_tokens; i++) {
1109	for (j = `0`; j < tokens[i].token_count; j++) {
1110	if (++num_frags > MAX_DONTNEED_FRAGS)
1111	goto frag_limit_reached;
1112
1113	netmem_ref netmem = (__force netmem_ref)__xa_erase(
1114	&sk->sk_user_frags, index: tokens[i].token_start + j);
1115
1116	if (!netmem \|\| WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
1117	continue;
1118
1119	netmems[netmem_num++] = netmem;
1120	if (netmem_num == ARRAY_SIZE(netmems)) {
1121	xa_unlock_bh(&sk->sk_user_frags);
1122	for (k = `0`; k < netmem_num; k++)
1123	WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
1124	netmem_num = `0`;
1125	xa_lock_bh(&sk->sk_user_frags);
1126	}
1127	ret++;
1128	}
1129	}
1130
1131	frag_limit_reached:
1132	xa_unlock_bh(&sk->sk_user_frags);
1133	for (k = `0`; k < netmem_num; k++)
1134	WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
1135
1136	kvfree(addr: tokens);
1137	return ret;
1138	}
1139	#endif
1140
1141	void sockopt_lock_sock(struct sock *sk)
1142	{
1143	/ When current->bpf_ctx is set, the setsockopt is called from*
1144	* a bpf prog. bpf has ensured the sk lock has been
1145	* acquired before calling setsockopt().
1146	*/
1147	if (has_current_bpf_ctx())
1148	return;
1149
1150	lock_sock(sk);
1151	}
1152	EXPORT_SYMBOL(sockopt_lock_sock);
1153
1154	void sockopt_release_sock(struct sock *sk)
1155	{
1156	if (has_current_bpf_ctx())
1157	return;
1158
1159	release_sock(sk);
1160	}
1161	EXPORT_SYMBOL(sockopt_release_sock);
1162
1163	bool sockopt_ns_capable(struct user_namespace ns, int* cap)
1164	{
1165	return has_current_bpf_ctx() \|\| ns_capable(ns, cap);
1166	}
1167	EXPORT_SYMBOL(sockopt_ns_capable);
1168
1169	bool sockopt_capable(int cap)
1170	{
1171	return has_current_bpf_ctx() \|\| capable(cap);
1172	}
1173	EXPORT_SYMBOL(sockopt_capable);
1174
1175	static int sockopt_validate_clockid(__kernel_clockid_t value)
1176	{
1177	switch (value) {
1178	case CLOCK_REALTIME:
1179	case CLOCK_MONOTONIC:
1180	case CLOCK_TAI:
1181	return `0`;
1182	}
1183	return -EINVAL;
1184	}
1185
1186	/*
1187	* This is meant for all protocols to use and covers goings on
1188	* at the socket level. Everything here is generic.
1189	*/
1190
1191	int sk_setsockopt(struct sock sk, int* level, int optname,
1192	sockptr_t optval, unsigned int optlen)
1193	{
1194	struct so_timestamping timestamping;
1195	struct socket *sock = sk->sk_socket;
1196	struct sock_txtime sk_txtime;
1197	int val;
1198	int valbool;
1199	struct linger ling;
1200	int ret = `0`;
1201
1202	/*
1203	* Options without arguments
1204	*/
1205
1206	if (optname == SO_BINDTODEVICE)
1207	return sock_setbindtodevice(sk, optval, optlen);
1208
1209	if (optlen < sizeof(int))
1210	return -EINVAL;
1211
1212	if (copy_from_sockptr(dst: &val, src: optval, size: sizeof(val)))
1213	return -EFAULT;
1214
1215	valbool = val ? `1` : `0`;
1216
1217	/ handle options which do not require locking the socket. /
1218	switch (optname) {
1219	case SO_PRIORITY:
1220	if (sk_set_prio_allowed(sk, val)) {
1221	sock_set_priority(sk, val);
1222	return `0`;
1223	}
1224	return -EPERM;
1225	case SO_TYPE:
1226	case SO_PROTOCOL:
1227	case SO_DOMAIN:
1228	case SO_ERROR:
1229	return -ENOPROTOOPT;
1230	#ifdef CONFIG_NET_RX_BUSY_POLL
1231	case SO_BUSY_POLL:
1232	if (val < `0`)
1233	return -EINVAL;
1234	WRITE_ONCE(sk->sk_ll_usec, val);
1235	return `0`;
1236	case SO_PREFER_BUSY_POLL:
1237	if (valbool && !sockopt_capable(CAP_NET_ADMIN))
1238	return -EPERM;
1239	WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1240	return `0`;
1241	case SO_BUSY_POLL_BUDGET:
1242	if (val > READ_ONCE(sk->sk_busy_poll_budget) &&
1243	!sockopt_capable(CAP_NET_ADMIN))
1244	return -EPERM;
1245	if (val < `0` \|\| val > U16_MAX)
1246	return -EINVAL;
1247	WRITE_ONCE(sk->sk_busy_poll_budget, val);
1248	return `0`;
1249	#endif
1250	case SO_MAX_PACING_RATE:
1251	{
1252	unsigned long ulval = (val == ~`0U`) ? ~`0UL` : (unsigned int)val;
1253	unsigned long pacing_rate;
1254
1255	if (sizeof(ulval) != sizeof(val) &&
1256	optlen >= sizeof(ulval) &&
1257	copy_from_sockptr(dst: &ulval, src: optval, size: sizeof(ulval))) {
1258	return -EFAULT;
1259	}
1260	if (ulval != ~`0UL`)
1261	cmpxchg(&sk->sk_pacing_status,
1262	SK_PACING_NONE,
1263	SK_PACING_NEEDED);
1264	/ Pairs with READ_ONCE() from sk_getsockopt() /
1265	WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
1266	pacing_rate = READ_ONCE(sk->sk_pacing_rate);
1267	if (ulval < pacing_rate)
1268	WRITE_ONCE(sk->sk_pacing_rate, ulval);
1269	return `0`;
1270	}
1271	case SO_TXREHASH:
1272	if (!sk_is_tcp(sk))
1273	return -EOPNOTSUPP;
1274	if (val < -`1` \|\| val > `1`)
1275	return -EINVAL;
1276	if ((u8)val == SOCK_TXREHASH_DEFAULT)
1277	val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1278	/ Paired with READ_ONCE() in tcp_rtx_synack()*
1279	* and sk_getsockopt().
1280	*/
1281	WRITE_ONCE(sk->sk_txrehash, (u8)val);
1282	return `0`;
1283	case SO_PEEK_OFF:
1284	{
1285	int (set_peek_off)(struct* sock sk, int* val);
1286
1287	set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
1288	if (set_peek_off)
1289	ret = set_peek_off(sk, val);
1290	else
1291	ret = -EOPNOTSUPP;
1292	return ret;
1293	}
1294	#ifdef CONFIG_PAGE_POOL
1295	case SO_DEVMEM_DONTNEED:
1296	return sock_devmem_dontneed(sk, optval, optlen);
1297	#endif
1298	}
1299
1300	sockopt_lock_sock(sk);
1301
1302	switch (optname) {
1303	case SO_DEBUG:
1304	if (val && !sockopt_capable(CAP_NET_ADMIN))
1305	ret = -EACCES;
1306	else
1307	sock_valbool_flag(sk, bit: SOCK_DBG, valbool);
1308	break;
1309	case SO_REUSEADDR:
1310	sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
1311	break;
1312	case SO_REUSEPORT:
1313	if (valbool && !sk_is_inet(sk))
1314	ret = -EOPNOTSUPP;
1315	else
1316	sk->sk_reuseport = valbool;
1317	break;
1318	case SO_DONTROUTE:
1319	sock_valbool_flag(sk, bit: SOCK_LOCALROUTE, valbool);
1320	sk_dst_reset(sk);
1321	break;
1322	case SO_BROADCAST:
1323	sock_valbool_flag(sk, bit: SOCK_BROADCAST, valbool);
1324	break;
1325	case SO_SNDBUF:
1326	/ Don't error on this BSD doesn't and if you think*
1327	* about it this is right. Otherwise apps have to
1328	* play 'guess the biggest size' games. RCVBUF/SNDBUF
1329	* are treated in BSD as hints
1330	*/
1331	val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1332	set_sndbuf:
1333	/ Ensure val * 2 fits into an int, to prevent max_t()*
1334	* from treating it as a negative value.
1335	*/
1336	val = min_t(int, val, INT_MAX / `2`);
1337	sk->sk_userlocks \|= SOCK_SNDBUF_LOCK;
1338	WRITE_ONCE(sk->sk_sndbuf,
1339	max_t(int, val * `2`, SOCK_MIN_SNDBUF));
1340	/ Wake up sending tasks if we upped the value. /
1341	sk->sk_write_space(sk);
1342	break;
1343
1344	case SO_SNDBUFFORCE:
1345	if (!sockopt_capable(CAP_NET_ADMIN)) {
1346	ret = -EPERM;
1347	break;
1348	}
1349
1350	/ No negative values (to prevent underflow, as val will be*
1351	* multiplied by 2).
1352	*/
1353	if (val < `0`)
1354	val = `0`;
1355	goto set_sndbuf;
1356
1357	case SO_RCVBUF:
1358	/ Don't error on this BSD doesn't and if you think*
1359	* about it this is right. Otherwise apps have to
1360	* play 'guess the biggest size' games. RCVBUF/SNDBUF
1361	* are treated in BSD as hints
1362	*/
1363	__sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
1364	break;
1365
1366	case SO_RCVBUFFORCE:
1367	if (!sockopt_capable(CAP_NET_ADMIN)) {
1368	ret = -EPERM;
1369	break;
1370	}
1371
1372	/ No negative values (to prevent underflow, as val will be*
1373	* multiplied by 2).
1374	*/
1375	__sock_set_rcvbuf(sk, max(val, `0`));
1376	break;
1377
1378	case SO_KEEPALIVE:
1379	if (sk->sk_prot->keepalive)
1380	sk->sk_prot->keepalive(sk, valbool);
1381	sock_valbool_flag(sk, bit: SOCK_KEEPOPEN, valbool);
1382	break;
1383
1384	case SO_OOBINLINE:
1385	sock_valbool_flag(sk, bit: SOCK_URGINLINE, valbool);
1386	break;
1387
1388	case SO_NO_CHECK:
1389	sk->sk_no_check_tx = valbool;
1390	break;
1391
1392	case SO_LINGER:
1393	if (optlen < sizeof(ling)) {
1394	ret = -EINVAL; / 1003.1g /
1395	break;
1396	}
1397	if (copy_from_sockptr(dst: &ling, src: optval, size: sizeof(ling))) {
1398	ret = -EFAULT;
1399	break;
1400	}
1401	if (!ling.l_onoff) {
1402	sock_reset_flag(sk, flag: SOCK_LINGER);
1403	} else {
1404	unsigned long t_sec = ling.l_linger;
1405
1406	if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1407	WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
1408	else
1409	WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
1410	sock_set_flag(sk, flag: SOCK_LINGER);
1411	}
1412	break;
1413
1414	case SO_BSDCOMPAT:
1415	break;
1416
1417	case SO_TIMESTAMP_OLD:
1418	case SO_TIMESTAMP_NEW:
1419	case SO_TIMESTAMPNS_OLD:
1420	case SO_TIMESTAMPNS_NEW:
1421	sock_set_timestamp(sk, optname, valbool);
1422	break;
1423
1424	case SO_TIMESTAMPING_NEW:
1425	case SO_TIMESTAMPING_OLD:
1426	if (optlen == sizeof(timestamping)) {
1427	if (copy_from_sockptr(dst: &timestamping, src: optval,
1428	size: sizeof(timestamping))) {
1429	ret = -EFAULT;
1430	break;
1431	}
1432	} else {
1433	memset(&timestamping, `0`, sizeof(timestamping));
1434	timestamping.flags = val;
1435	}
1436	ret = sock_set_timestamping(sk, optname, timestamping);
1437	break;
1438
1439	case SO_RCVLOWAT:
1440	{
1441	int (set_rcvlowat)(struct* sock sk, int* val) = NULL;
1442
1443	if (val < `0`)
1444	val = INT_MAX;
1445	if (sock)
1446	set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
1447	if (set_rcvlowat)
1448	ret = set_rcvlowat(sk, val);
1449	else
1450	WRITE_ONCE(sk->sk_rcvlowat, val ? : `1`);
1451	break;
1452	}
1453	case SO_RCVTIMEO_OLD:
1454	case SO_RCVTIMEO_NEW:
1455	ret = sock_set_timeout(timeo_p: &sk->sk_rcvtimeo, optval,
1456	optlen, old_timeval: optname == SO_RCVTIMEO_OLD);
1457	break;
1458
1459	case SO_SNDTIMEO_OLD:
1460	case SO_SNDTIMEO_NEW:
1461	ret = sock_set_timeout(timeo_p: &sk->sk_sndtimeo, optval,
1462	optlen, old_timeval: optname == SO_SNDTIMEO_OLD);
1463	break;
1464
1465	case SO_ATTACH_FILTER: {
1466	struct sock_fprog fprog;
1467
1468	ret = copy_bpf_fprog_from_user(dst: &fprog, src: optval, len: optlen);
1469	if (!ret)
1470	ret = sk_attach_filter(fprog: &fprog, sk);
1471	break;
1472	}
1473	case SO_ATTACH_BPF:
1474	ret = -EINVAL;
1475	if (optlen == sizeof(u32)) {
1476	u32 ufd;
1477
1478	ret = -EFAULT;
1479	if (copy_from_sockptr(dst: &ufd, src: optval, size: sizeof(ufd)))
1480	break;
1481
1482	ret = sk_attach_bpf(ufd, sk);
1483	}
1484	break;
1485
1486	case SO_ATTACH_REUSEPORT_CBPF: {
1487	struct sock_fprog fprog;
1488
1489	ret = copy_bpf_fprog_from_user(dst: &fprog, src: optval, len: optlen);
1490	if (!ret)
1491	ret = sk_reuseport_attach_filter(fprog: &fprog, sk);
1492	break;
1493	}
1494	case SO_ATTACH_REUSEPORT_EBPF:
1495	ret = -EINVAL;
1496	if (optlen == sizeof(u32)) {
1497	u32 ufd;
1498
1499	ret = -EFAULT;
1500	if (copy_from_sockptr(dst: &ufd, src: optval, size: sizeof(ufd)))
1501	break;
1502
1503	ret = sk_reuseport_attach_bpf(ufd, sk);
1504	}
1505	break;
1506
1507	case SO_DETACH_REUSEPORT_BPF:
1508	ret = reuseport_detach_prog(sk);
1509	break;
1510
1511	case SO_DETACH_FILTER:
1512	ret = sk_detach_filter(sk);
1513	break;
1514
1515	case SO_LOCK_FILTER:
1516	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED) && !valbool)
1517	ret = -EPERM;
1518	else
1519	sock_valbool_flag(sk, bit: SOCK_FILTER_LOCKED, valbool);
1520	break;
1521
1522	case SO_MARK:
1523	if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1524	!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1525	ret = -EPERM;
1526	break;
1527	}
1528
1529	__sock_set_mark(sk, val);
1530	break;
1531	case SO_RCVMARK:
1532	sock_valbool_flag(sk, bit: SOCK_RCVMARK, valbool);
1533	break;
1534
1535	case SO_RCVPRIORITY:
1536	sock_valbool_flag(sk, bit: SOCK_RCVPRIORITY, valbool);
1537	break;
1538
1539	case SO_RXQ_OVFL:
1540	sock_valbool_flag(sk, bit: SOCK_RXQ_OVFL, valbool);
1541	break;
1542
1543	case SO_WIFI_STATUS:
1544	sock_valbool_flag(sk, bit: SOCK_WIFI_STATUS, valbool);
1545	break;
1546
1547	case SO_NOFCS:
1548	sock_valbool_flag(sk, bit: SOCK_NOFCS, valbool);
1549	break;
1550
1551	case SO_SELECT_ERR_QUEUE:
1552	sock_valbool_flag(sk, bit: SOCK_SELECT_ERR_QUEUE, valbool);
1553	break;
1554
1555	case SO_PASSCRED:
1556	if (sk_may_scm_recv(sk))
1557	sk->sk_scm_credentials = valbool;
1558	else
1559	ret = -EOPNOTSUPP;
1560	break;
1561
1562	case SO_PASSSEC:
1563	if (IS_ENABLED(CONFIG_SECURITY_NETWORK) && sk_may_scm_recv(sk))
1564	sk->sk_scm_security = valbool;
1565	else
1566	ret = -EOPNOTSUPP;
1567	break;
1568
1569	case SO_PASSPIDFD:
1570	if (sk_is_unix(sk))
1571	sk->sk_scm_pidfd = valbool;
1572	else
1573	ret = -EOPNOTSUPP;
1574	break;
1575
1576	case SO_PASSRIGHTS:
1577	if (sk_is_unix(sk))
1578	sk->sk_scm_rights = valbool;
1579	else
1580	ret = -EOPNOTSUPP;
1581	break;
1582
1583	case SO_INCOMING_CPU:
1584	reuseport_update_incoming_cpu(sk, val);
1585	break;
1586
1587	case SO_CNX_ADVICE:
1588	if (val == `1`)
1589	dst_negative_advice(sk);
1590	break;
1591
1592	case SO_ZEROCOPY:
1593	if (sk->sk_family == PF_INET \|\| sk->sk_family == PF_INET6) {
1594	if (!(sk_is_tcp(sk) \|\|
1595	(sk->sk_type == SOCK_DGRAM &&
1596	sk->sk_protocol == IPPROTO_UDP)))
1597	ret = -EOPNOTSUPP;
1598	} else if (sk->sk_family != PF_RDS) {
1599	ret = -EOPNOTSUPP;
1600	}
1601	if (!ret) {
1602	if (val < `0` \|\| val > `1`)
1603	ret = -EINVAL;
1604	else
1605	sock_valbool_flag(sk, bit: SOCK_ZEROCOPY, valbool);
1606	}
1607	break;
1608
1609	case SO_TXTIME:
1610	if (optlen != sizeof(struct sock_txtime)) {
1611	ret = -EINVAL;
1612	break;
1613	} else if (copy_from_sockptr(dst: &sk_txtime, src: optval,
1614	size: sizeof(struct sock_txtime))) {
1615	ret = -EFAULT;
1616	break;
1617	} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1618	ret = -EINVAL;
1619	break;
1620	}
1621	/ CLOCK_MONOTONIC is only used by sch_fq, and this packet*
1622	* scheduler has enough safe guards.
1623	*/
1624	if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1625	!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1626	ret = -EPERM;
1627	break;
1628	}
1629
1630	ret = sockopt_validate_clockid(value: sk_txtime.clockid);
1631	if (ret)
1632	break;
1633
1634	sock_valbool_flag(sk, bit: SOCK_TXTIME, valbool: true);
1635	sk->sk_clockid = sk_txtime.clockid;
1636	sk->sk_txtime_deadline_mode =
1637	!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1638	sk->sk_txtime_report_errors =
1639	!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1640	break;
1641
1642	case SO_BINDTOIFINDEX:
1643	ret = sock_bindtoindex_locked(sk, ifindex: val);
1644	break;
1645
1646	case SO_BUF_LOCK:
1647	if (val & ~SOCK_BUF_LOCK_MASK) {
1648	ret = -EINVAL;
1649	break;
1650	}
1651	sk->sk_userlocks = val \| (sk->sk_userlocks &
1652	~SOCK_BUF_LOCK_MASK);
1653	break;
1654
1655	case SO_RESERVE_MEM:
1656	{
1657	int delta;
1658
1659	if (val < `0`) {
1660	ret = -EINVAL;
1661	break;
1662	}
1663
1664	delta = val - sk->sk_reserved_mem;
1665	if (delta < `0`)
1666	sock_release_reserved_memory(sk, bytes: -delta);
1667	else
1668	ret = sock_reserve_memory(sk, bytes: delta);
1669	break;
1670	}
1671
1672	default:
1673	ret = -ENOPROTOOPT;
1674	break;
1675	}
1676	sockopt_release_sock(sk);
1677	return ret;
1678	}
1679
1680	int sock_setsockopt(struct socket sock, int* level, int optname,
1681	sockptr_t optval, unsigned int optlen)
1682	{
1683	return sk_setsockopt(sk: sock->sk, level, optname,
1684	optval, optlen);
1685	}
1686	EXPORT_SYMBOL(sock_setsockopt);
1687
1688	static const struct cred sk_get_peer_cred(struct* sock *sk)
1689	{
1690	const struct cred *cred;
1691
1692	spin_lock(lock: &sk->sk_peer_lock);
1693	cred = get_cred(cred: sk->sk_peer_cred);
1694	spin_unlock(lock: &sk->sk_peer_lock);
1695
1696	return cred;
1697	}
1698
1699	static void cred_to_ucred(struct pid pid, const* struct cred *cred,
1700	struct ucred *ucred)
1701	{
1702	ucred->pid = pid_vnr(pid);
1703	ucred->uid = ucred->gid = -`1`;
1704	if (cred) {
1705	struct user_namespace *current_ns = current_user_ns();
1706
1707	ucred->uid = from_kuid_munged(to: current_ns, uid: cred->euid);
1708	ucred->gid = from_kgid_munged(to: current_ns, gid: cred->egid);
1709	}
1710	}
1711
1712	static int groups_to_user(sockptr_t dst, const struct group_info *src)
1713	{
1714	struct user_namespace *user_ns = current_user_ns();
1715	int i;
1716
1717	for (i = `0`; i < src->ngroups; i++) {
1718	gid_t gid = from_kgid_munged(to: user_ns, gid: src->gid[i]);
1719
1720	if (copy_to_sockptr_offset(dst, offset: i * sizeof(gid), src: &gid, size: sizeof(gid)))
1721	return -EFAULT;
1722	}
1723
1724	return `0`;
1725	}
1726
1727	int sk_getsockopt(struct sock sk, int* level, int optname,
1728	sockptr_t optval, sockptr_t optlen)
1729	{
1730	struct socket *sock = sk->sk_socket;
1731
1732	union {
1733	int val;
1734	u64 val64;
1735	unsigned long ulval;
1736	struct linger ling;
1737	struct old_timeval32 tm32;
1738	struct __kernel_old_timeval tm;
1739	struct __kernel_sock_timeval stm;
1740	struct sock_txtime txtime;
1741	struct so_timestamping timestamping;
1742	} v;
1743
1744	int lv = sizeof(int);
1745	int len;
1746
1747	if (copy_from_sockptr(dst: &len, src: optlen, size: sizeof(int)))
1748	return -EFAULT;
1749	if (len < `0`)
1750	return -EINVAL;
1751
1752	memset(&v, `0`, sizeof(v));
1753
1754	switch (optname) {
1755	case SO_DEBUG:
1756	v.val = sock_flag(sk, flag: SOCK_DBG);
1757	break;
1758
1759	case SO_DONTROUTE:
1760	v.val = sock_flag(sk, flag: SOCK_LOCALROUTE);
1761	break;
1762
1763	case SO_BROADCAST:
1764	v.val = sock_flag(sk, flag: SOCK_BROADCAST);
1765	break;
1766
1767	case SO_SNDBUF:
1768	v.val = READ_ONCE(sk->sk_sndbuf);
1769	break;
1770
1771	case SO_RCVBUF:
1772	v.val = READ_ONCE(sk->sk_rcvbuf);
1773	break;
1774
1775	case SO_REUSEADDR:
1776	v.val = sk->sk_reuse;
1777	break;
1778
1779	case SO_REUSEPORT:
1780	v.val = sk->sk_reuseport;
1781	break;
1782
1783	case SO_KEEPALIVE:
1784	v.val = sock_flag(sk, flag: SOCK_KEEPOPEN);
1785	break;
1786
1787	case SO_TYPE:
1788	v.val = sk->sk_type;
1789	break;
1790
1791	case SO_PROTOCOL:
1792	v.val = sk->sk_protocol;
1793	break;
1794
1795	case SO_DOMAIN:
1796	v.val = sk->sk_family;
1797	break;
1798
1799	case SO_ERROR:
1800	v.val = -sock_error(sk);
1801	if (v.val == `0`)
1802	v.val = xchg(&sk->sk_err_soft, `0`);
1803	break;
1804
1805	case SO_OOBINLINE:
1806	v.val = sock_flag(sk, flag: SOCK_URGINLINE);
1807	break;
1808
1809	case SO_NO_CHECK:
1810	v.val = sk->sk_no_check_tx;
1811	break;
1812
1813	case SO_PRIORITY:
1814	v.val = READ_ONCE(sk->sk_priority);
1815	break;
1816
1817	case SO_LINGER:
1818	lv = sizeof(v.ling);
1819	v.ling.l_onoff = sock_flag(sk, flag: SOCK_LINGER);
1820	v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
1821	break;
1822
1823	case SO_BSDCOMPAT:
1824	break;
1825
1826	case SO_TIMESTAMP_OLD:
1827	v.val = sock_flag(sk, flag: SOCK_RCVTSTAMP) &&
1828	!sock_flag(sk, flag: SOCK_TSTAMP_NEW) &&
1829	!sock_flag(sk, flag: SOCK_RCVTSTAMPNS);
1830	break;
1831
1832	case SO_TIMESTAMPNS_OLD:
1833	v.val = sock_flag(sk, flag: SOCK_RCVTSTAMPNS) && !sock_flag(sk, flag: SOCK_TSTAMP_NEW);
1834	break;
1835
1836	case SO_TIMESTAMP_NEW:
1837	v.val = sock_flag(sk, flag: SOCK_RCVTSTAMP) && sock_flag(sk, flag: SOCK_TSTAMP_NEW);
1838	break;
1839
1840	case SO_TIMESTAMPNS_NEW:
1841	v.val = sock_flag(sk, flag: SOCK_RCVTSTAMPNS) && sock_flag(sk, flag: SOCK_TSTAMP_NEW);
1842	break;
1843
1844	case SO_TIMESTAMPING_OLD:
1845	case SO_TIMESTAMPING_NEW:
1846	lv = sizeof(v.timestamping);
1847	/ For the later-added case SO_TIMESTAMPING_NEW: Be strict about only*
1848	* returning the flags when they were set through the same option.
1849	* Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1850	*/
1851	if (optname == SO_TIMESTAMPING_OLD \|\| sock_flag(sk, flag: SOCK_TSTAMP_NEW)) {
1852	v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1853	v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1854	}
1855	break;
1856
1857	case SO_RCVTIMEO_OLD:
1858	case SO_RCVTIMEO_NEW:
1859	lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1860	SO_RCVTIMEO_OLD == optname);
1861	break;
1862
1863	case SO_SNDTIMEO_OLD:
1864	case SO_SNDTIMEO_NEW:
1865	lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1866	SO_SNDTIMEO_OLD == optname);
1867	break;
1868
1869	case SO_RCVLOWAT:
1870	v.val = READ_ONCE(sk->sk_rcvlowat);
1871	break;
1872
1873	case SO_SNDLOWAT:
1874	v.val = `1`;
1875	break;
1876
1877	case SO_PASSCRED:
1878	if (!sk_may_scm_recv(sk))
1879	return -EOPNOTSUPP;
1880
1881	v.val = sk->sk_scm_credentials;
1882	break;
1883
1884	case SO_PASSPIDFD:
1885	if (!sk_is_unix(sk))
1886	return -EOPNOTSUPP;
1887
1888	v.val = sk->sk_scm_pidfd;
1889	break;
1890
1891	case SO_PASSRIGHTS:
1892	if (!sk_is_unix(sk))
1893	return -EOPNOTSUPP;
1894
1895	v.val = sk->sk_scm_rights;
1896	break;
1897
1898	case SO_PEERCRED:
1899	{
1900	struct ucred peercred;
1901	if (len > sizeof(peercred))
1902	len = sizeof(peercred);
1903
1904	spin_lock(lock: &sk->sk_peer_lock);
1905	cred_to_ucred(pid: sk->sk_peer_pid, cred: sk->sk_peer_cred, ucred: &peercred);
1906	spin_unlock(lock: &sk->sk_peer_lock);
1907
1908	if (copy_to_sockptr(dst: optval, src: &peercred, size: len))
1909	return -EFAULT;
1910	goto lenout;
1911	}
1912
1913	case SO_PEERPIDFD:
1914	{
1915	struct pid *peer_pid;
1916	struct file *pidfd_file = NULL;
1917	unsigned int flags = `0`;
1918	int pidfd;
1919
1920	if (len > sizeof(pidfd))
1921	len = sizeof(pidfd);
1922
1923	spin_lock(lock: &sk->sk_peer_lock);
1924	peer_pid = get_pid(pid: sk->sk_peer_pid);
1925	spin_unlock(lock: &sk->sk_peer_lock);
1926
1927	if (!peer_pid)
1928	return -ENODATA;
1929
1930	/ The use of PIDFD_STALE requires stashing of struct pid*
1931	* on pidfs with pidfs_register_pid() and only AF_UNIX
1932	* were prepared for this.
1933	*/
1934	if (sk->sk_family == AF_UNIX)
1935	flags = PIDFD_STALE;
1936
1937	pidfd = pidfd_prepare(pid: peer_pid, flags, ret_file: &pidfd_file);
1938	put_pid(pid: peer_pid);
1939	if (pidfd < `0`)
1940	return pidfd;
1941
1942	if (copy_to_sockptr(dst: optval, src: &pidfd, size: len) \|\|
1943	copy_to_sockptr(dst: optlen, src: &len, size: sizeof(int))) {
1944	put_unused_fd(fd: pidfd);
1945	fput(pidfd_file);
1946
1947	return -EFAULT;
1948	}
1949
1950	fd_install(fd: pidfd, file: pidfd_file);
1951	return `0`;
1952	}
1953
1954	case SO_PEERGROUPS:
1955	{
1956	const struct cred *cred;
1957	int ret, n;
1958
1959	cred = sk_get_peer_cred(sk);
1960	if (!cred)
1961	return -ENODATA;
1962
1963	n = cred->group_info->ngroups;
1964	if (len < n * sizeof(gid_t)) {
1965	len = n * sizeof(gid_t);
1966	put_cred(cred);
1967	return copy_to_sockptr(dst: optlen, src: &len, size: sizeof(int)) ? -EFAULT : -ERANGE;
1968	}
1969	len = n * sizeof(gid_t);
1970
1971	ret = groups_to_user(dst: optval, src: cred->group_info);
1972	put_cred(cred);
1973	if (ret)
1974	return ret;
1975	goto lenout;
1976	}
1977
1978	case SO_PEERNAME:
1979	{
1980	struct sockaddr_storage address;
1981
1982	lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, `2`);
1983	if (lv < `0`)
1984	return -ENOTCONN;
1985	if (lv < len)
1986	return -EINVAL;
1987	if (copy_to_sockptr(dst: optval, src: &address, size: len))
1988	return -EFAULT;
1989	goto lenout;
1990	}
1991
1992	/ Dubious BSD thing... Probably nobody even uses it, but*
1993	* the UNIX standard wants it for whatever reason... -DaveM
1994	*/
1995	case SO_ACCEPTCONN:
1996	v.val = sk->sk_state == TCP_LISTEN;
1997	break;
1998
1999	case SO_PASSSEC:
2000	if (!IS_ENABLED(CONFIG_SECURITY_NETWORK) \|\| !sk_may_scm_recv(sk))
2001	return -EOPNOTSUPP;
2002
2003	v.val = sk->sk_scm_security;
2004	break;
2005
2006	case SO_PEERSEC:
2007	return security_socket_getpeersec_stream(sock,
2008	optval, optlen, len);
2009
2010	case SO_MARK:
2011	v.val = READ_ONCE(sk->sk_mark);
2012	break;
2013
2014	case SO_RCVMARK:
2015	v.val = sock_flag(sk, flag: SOCK_RCVMARK);
2016	break;
2017
2018	case SO_RCVPRIORITY:
2019	v.val = sock_flag(sk, flag: SOCK_RCVPRIORITY);
2020	break;
2021
2022	case SO_RXQ_OVFL:
2023	v.val = sock_flag(sk, flag: SOCK_RXQ_OVFL);
2024	break;
2025
2026	case SO_WIFI_STATUS:
2027	v.val = sock_flag(sk, flag: SOCK_WIFI_STATUS);
2028	break;
2029
2030	case SO_PEEK_OFF:
2031	if (!READ_ONCE(sock->ops)->set_peek_off)
2032	return -EOPNOTSUPP;
2033
2034	v.val = READ_ONCE(sk->sk_peek_off);
2035	break;
2036	case SO_NOFCS:
2037	v.val = sock_flag(sk, flag: SOCK_NOFCS);
2038	break;
2039
2040	case SO_BINDTODEVICE:
2041	return sock_getbindtodevice(sk, optval, optlen, len);
2042
2043	case SO_GET_FILTER:
2044	len = sk_get_filter(sk, optval, len);
2045	if (len < `0`)
2046	return len;
2047
2048	goto lenout;
2049
2050	case SO_LOCK_FILTER:
2051	v.val = sock_flag(sk, flag: SOCK_FILTER_LOCKED);
2052	break;
2053
2054	case SO_BPF_EXTENSIONS:
2055	v.val = bpf_tell_extensions();
2056	break;
2057
2058	case SO_SELECT_ERR_QUEUE:
2059	v.val = sock_flag(sk, flag: SOCK_SELECT_ERR_QUEUE);
2060	break;
2061
2062	#ifdef CONFIG_NET_RX_BUSY_POLL
2063	case SO_BUSY_POLL:
2064	v.val = READ_ONCE(sk->sk_ll_usec);
2065	break;
2066	case SO_PREFER_BUSY_POLL:
2067	v.val = READ_ONCE(sk->sk_prefer_busy_poll);
2068	break;
2069	#endif
2070
2071	case SO_MAX_PACING_RATE:
2072	/ The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() /
2073	if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
2074	lv = sizeof(v.ulval);
2075	v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
2076	} else {
2077	/ 32bit version /
2078	v.val = min_t(unsigned long, ~`0U`,
2079	READ_ONCE(sk->sk_max_pacing_rate));
2080	}
2081	break;
2082
2083	case SO_INCOMING_CPU:
2084	v.val = READ_ONCE(sk->sk_incoming_cpu);
2085	break;
2086
2087	case SO_MEMINFO:
2088	{
2089	u32 meminfo[SK_MEMINFO_VARS];
2090
2091	sk_get_meminfo(sk, meminfo);
2092
2093	len = min_t(unsigned int, len, sizeof(meminfo));
2094	if (copy_to_sockptr(dst: optval, src: &meminfo, size: len))
2095	return -EFAULT;
2096
2097	goto lenout;
2098	}
2099
2100	#ifdef CONFIG_NET_RX_BUSY_POLL
2101	case SO_INCOMING_NAPI_ID:
2102	v.val = READ_ONCE(sk->sk_napi_id);
2103
2104	/ aggregate non-NAPI IDs down to 0 /
2105	if (!napi_id_valid(napi_id: v.val))
2106	v.val = `0`;
2107
2108	break;
2109	#endif
2110
2111	case SO_COOKIE:
2112	lv = sizeof(u64);
2113	if (len < lv)
2114	return -EINVAL;
2115	v.val64 = sock_gen_cookie(sk);
2116	break;
2117
2118	case SO_ZEROCOPY:
2119	v.val = sock_flag(sk, flag: SOCK_ZEROCOPY);
2120	break;
2121
2122	case SO_TXTIME:
2123	lv = sizeof(v.txtime);
2124	v.txtime.clockid = sk->sk_clockid;
2125	v.txtime.flags \|= sk->sk_txtime_deadline_mode ?
2126	SOF_TXTIME_DEADLINE_MODE : `0`;
2127	v.txtime.flags \|= sk->sk_txtime_report_errors ?
2128	SOF_TXTIME_REPORT_ERRORS : `0`;
2129	break;
2130
2131	case SO_BINDTOIFINDEX:
2132	v.val = READ_ONCE(sk->sk_bound_dev_if);
2133	break;
2134
2135	case SO_NETNS_COOKIE:
2136	lv = sizeof(u64);
2137	if (len != lv)
2138	return -EINVAL;
2139	v.val64 = sock_net(sk)->net_cookie;
2140	break;
2141
2142	case SO_BUF_LOCK:
2143	v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
2144	break;
2145
2146	case SO_RESERVE_MEM:
2147	v.val = READ_ONCE(sk->sk_reserved_mem);
2148	break;
2149
2150	case SO_TXREHASH:
2151	if (!sk_is_tcp(sk))
2152	return -EOPNOTSUPP;
2153
2154	/ Paired with WRITE_ONCE() in sk_setsockopt() /
2155	v.val = READ_ONCE(sk->sk_txrehash);
2156	break;
2157
2158	default:
2159	/ We implement the SO_SNDLOWAT etc to not be settable*
2160	* (1003.1g 7).
2161	*/
2162	return -ENOPROTOOPT;
2163	}
2164
2165	if (len > lv)
2166	len = lv;
2167	if (copy_to_sockptr(dst: optval, src: &v, size: len))
2168	return -EFAULT;
2169	lenout:
2170	if (copy_to_sockptr(dst: optlen, src: &len, size: sizeof(int)))
2171	return -EFAULT;
2172	return `0`;
2173	}
2174
2175	/*
2176	* Initialize an sk_lock.
2177	*
2178	* (We also register the sk_lock with the lock validator.)
2179	*/
2180	static inline void sock_lock_init(struct sock *sk)
2181	{
2182	sk_owner_clear(sk);
2183
2184	if (sk->sk_kern_sock)
2185	sock_lock_init_class_and_name(
2186	sk,
2187	af_family_kern_slock_key_strings[sk->sk_family],
2188	af_family_kern_slock_keys + sk->sk_family,
2189	af_family_kern_key_strings[sk->sk_family],
2190	af_family_kern_keys + sk->sk_family);
2191	else
2192	sock_lock_init_class_and_name(
2193	sk,
2194	af_family_slock_key_strings[sk->sk_family],
2195	af_family_slock_keys + sk->sk_family,
2196	af_family_key_strings[sk->sk_family],
2197	af_family_keys + sk->sk_family);
2198	}
2199
2200	/*
2201	* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
2202	* even temporarily, because of RCU lookups. sk_node should also be left as is.
2203	* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
2204	*/
2205	static void sock_copy(struct sock nsk, const* struct sock *osk)
2206	{
2207	const struct proto *prot = READ_ONCE(osk->sk_prot);
2208	#ifdef CONFIG_SECURITY_NETWORK
2209	void *sptr = nsk->sk_security;
2210	#endif
2211
2212	/ If we move sk_tx_queue_mapping out of the private section,*
2213	* we must check if sk_tx_queue_clear() is called after
2214	* sock_copy() in sk_clone_lock().
2215	*/
2216	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2217	offsetof(struct sock, sk_dontcopy_begin) \|\|
2218	offsetof(struct sock, sk_tx_queue_mapping) >=
2219	offsetof(struct sock, sk_dontcopy_end));
2220
2221	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
2222
2223	unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2224	prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
2225	/ alloc is larger than struct, see sk_prot_alloc() /);
2226
2227	#ifdef CONFIG_SECURITY_NETWORK
2228	nsk->sk_security = sptr;
2229	security_sk_clone(sk: osk, newsk: nsk);
2230	#endif
2231	}
2232
2233	static struct sock sk_prot_alloc(struct* proto *prot, gfp_t priority,
2234	int family)
2235	{
2236	struct sock *sk;
2237	struct kmem_cache *slab;
2238
2239	slab = prot->slab;
2240	if (slab != NULL) {
2241	sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2242	if (!sk)
2243	return sk;
2244	if (want_init_on_alloc(flags: priority))
2245	sk_prot_clear_nulls(sk, size: prot->obj_size);
2246	} else
2247	sk = kmalloc(prot->obj_size, priority);
2248
2249	if (sk != NULL) {
2250	if (security_sk_alloc(sk, family, priority))
2251	goto out_free;
2252
2253	if (!try_module_get(module: prot->owner))
2254	goto out_free_sec;
2255	}
2256
2257	return sk;
2258
2259	out_free_sec:
2260	security_sk_free(sk);
2261	out_free:
2262	if (slab != NULL)
2263	kmem_cache_free(s: slab, objp: sk);
2264	else
2265	kfree(objp: sk);
2266	return NULL;
2267	}
2268
2269	static void sk_prot_free(struct proto prot, struct* sock *sk)
2270	{
2271	struct kmem_cache *slab;
2272	struct module *owner;
2273
2274	owner = prot->owner;
2275	slab = prot->slab;
2276
2277	cgroup_sk_free(skcd: &sk->sk_cgrp_data);
2278	mem_cgroup_sk_free(sk);
2279	security_sk_free(sk);
2280
2281	sk_owner_put(sk);
2282
2283	if (slab != NULL)
2284	kmem_cache_free(s: slab, objp: sk);
2285	else
2286	kfree(objp: sk);
2287	module_put(module: owner);
2288	}
2289
2290	/**
2291	* sk_alloc - All socket objects are allocated here
2292	* @net: the applicable net namespace
2293	* @family: protocol family
2294	* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2295	* @prot: struct proto associated with this new sock instance
2296	* @kern: is this to be a kernel socket?
2297	*/
2298	struct sock sk_alloc(struct* net net, int* family, gfp_t priority,
2299	struct proto prot, int* kern)
2300	{
2301	struct sock *sk;
2302
2303	sk = sk_prot_alloc(prot, priority: priority \| __GFP_ZERO, family);
2304	if (sk) {
2305	sk->sk_family = family;
2306	/*
2307	* See comment in struct sock definition to understand
2308	* why we need sk_prot_creator -acme
2309	*/
2310	sk->sk_prot = sk->sk_prot_creator = prot;
2311	sk->sk_kern_sock = kern;
2312	sock_lock_init(sk);
2313	sk->sk_net_refcnt = kern ? `0` : `1`;
2314	if (likely(sk->sk_net_refcnt)) {
2315	get_net_track(net, tracker: &sk->ns_tracker, gfp: priority);
2316	sock_inuse_add(net, val: `1`);
2317	} else {
2318	net_passive_inc(net);
2319	__netns_tracker_alloc(net, tracker: &sk->ns_tracker,
2320	refcounted: false, gfp: priority);
2321	}
2322
2323	sock_net_set(sk, net);
2324	refcount_set(r: &sk->sk_wmem_alloc, n: `1`);
2325
2326	mem_cgroup_sk_alloc(sk);
2327	cgroup_sk_alloc(skcd: &sk->sk_cgrp_data);
2328	sock_update_classid(skcd: &sk->sk_cgrp_data);
2329	sock_update_netprioidx(skcd: &sk->sk_cgrp_data);
2330	sk_tx_queue_clear(sk);
2331	}
2332
2333	return sk;
2334	}
2335	EXPORT_SYMBOL(sk_alloc);
2336
2337	/ Sockets having SOCK_RCU_FREE will call this function after one RCU*
2338	* grace period. This is the case for UDP sockets and TCP listeners.
2339	*/
2340	static void __sk_destruct(struct rcu_head *head)
2341	{
2342	struct sock sk = container_of(head, struct* sock, sk_rcu);
2343	struct net *net = sock_net(sk);
2344	struct sk_filter *filter;
2345
2346	if (sk->sk_destruct)
2347	sk->sk_destruct(sk);
2348
2349	filter = rcu_dereference_check(sk->sk_filter,
2350	refcount_read(&sk->sk_wmem_alloc) == `0`);
2351	if (filter) {
2352	sk_filter_uncharge(sk, fp: filter);
2353	RCU_INIT_POINTER(sk->sk_filter, NULL);
2354	}
2355
2356	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
2357
2358	#ifdef CONFIG_BPF_SYSCALL
2359	bpf_sk_storage_free(sk);
2360	#endif
2361
2362	if (atomic_read(v: &sk->sk_omem_alloc))
2363	pr_debug("%s: optmem leakage (%d bytes) detected\n",
2364	__func__, atomic_read(&sk->sk_omem_alloc));
2365
2366	if (sk->sk_frag.page) {
2367	put_page(page: sk->sk_frag.page);
2368	sk->sk_frag.page = NULL;
2369	}
2370
2371	/ We do not need to acquire sk->sk_peer_lock, we are the last user. /
2372	put_cred(cred: sk->sk_peer_cred);
2373	put_pid(pid: sk->sk_peer_pid);
2374
2375	if (likely(sk->sk_net_refcnt)) {
2376	put_net_track(net, tracker: &sk->ns_tracker);
2377	} else {
2378	__netns_tracker_free(net, tracker: &sk->ns_tracker, refcounted: false);
2379	net_passive_dec(net);
2380	}
2381	sk_prot_free(prot: sk->sk_prot_creator, sk);
2382	}
2383
2384	void sk_net_refcnt_upgrade(struct sock *sk)
2385	{
2386	struct net *net = sock_net(sk);
2387
2388	WARN_ON_ONCE(sk->sk_net_refcnt);
2389	__netns_tracker_free(net, tracker: &sk->ns_tracker, refcounted: false);
2390	net_passive_dec(net);
2391	sk->sk_net_refcnt = `1`;
2392	get_net_track(net, tracker: &sk->ns_tracker, GFP_KERNEL);
2393	sock_inuse_add(net, val: `1`);
2394	}
2395	EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade);
2396
2397	void sk_destruct(struct sock *sk)
2398	{
2399	bool use_call_rcu = sock_flag(sk, flag: SOCK_RCU_FREE);
2400
2401	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
2402	reuseport_detach_sock(sk);
2403	use_call_rcu = true;
2404	}
2405
2406	if (use_call_rcu)
2407	call_rcu(head: &sk->sk_rcu, func: __sk_destruct);
2408	else
2409	__sk_destruct(head: &sk->sk_rcu);
2410	}
2411
2412	static void __sk_free(struct sock *sk)
2413	{
2414	if (likely(sk->sk_net_refcnt))
2415	sock_inuse_add(net: sock_net(sk), val: -`1`);
2416
2417	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2418	sock_diag_broadcast_destroy(sk);
2419	else
2420	sk_destruct(sk);
2421	}
2422
2423	void sk_free(struct sock *sk)
2424	{
2425	/*
2426	* We subtract one from sk_wmem_alloc and can know if
2427	* some packets are still in some tx queue.
2428	* If not null, sock_wfree() will call __sk_free(sk) later
2429	*/
2430	if (refcount_dec_and_test(r: &sk->sk_wmem_alloc))
2431	__sk_free(sk);
2432	}
2433	EXPORT_SYMBOL(sk_free);
2434
2435	static void sk_init_common(struct sock *sk)
2436	{
2437	skb_queue_head_init(list: &sk->sk_receive_queue);
2438	skb_queue_head_init(list: &sk->sk_write_queue);
2439	skb_queue_head_init(list: &sk->sk_error_queue);
2440
2441	rwlock_init(&sk->sk_callback_lock);
2442	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2443	af_rlock_keys + sk->sk_family,
2444	af_family_rlock_key_strings[sk->sk_family]);
2445	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2446	af_wlock_keys + sk->sk_family,
2447	af_family_wlock_key_strings[sk->sk_family]);
2448	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2449	af_elock_keys + sk->sk_family,
2450	af_family_elock_key_strings[sk->sk_family]);
2451	if (sk->sk_kern_sock)
2452	lockdep_set_class_and_name(&sk->sk_callback_lock,
2453	af_kern_callback_keys + sk->sk_family,
2454	af_family_kern_clock_key_strings[sk->sk_family]);
2455	else
2456	lockdep_set_class_and_name(&sk->sk_callback_lock,
2457	af_callback_keys + sk->sk_family,
2458	af_family_clock_key_strings[sk->sk_family]);
2459	}
2460
2461	/**
2462	* sk_clone_lock - clone a socket, and lock its clone
2463	* @sk: the socket to clone
2464	* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2465	*
2466	* Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2467	*/
2468	struct sock sk_clone_lock(const* struct sock sk, const* gfp_t priority)
2469	{
2470	struct proto *prot = READ_ONCE(sk->sk_prot);
2471	struct sk_filter *filter;
2472	bool is_charged = true;
2473	struct sock *newsk;
2474
2475	newsk = sk_prot_alloc(prot, priority, family: sk->sk_family);
2476	if (!newsk)
2477	goto out;
2478
2479	sock_copy(nsk: newsk, osk: sk);
2480
2481	newsk->sk_prot_creator = prot;
2482
2483	/ SANITY /
2484	if (likely(newsk->sk_net_refcnt)) {
2485	get_net_track(net: sock_net(sk: newsk), tracker: &newsk->ns_tracker, gfp: priority);
2486	sock_inuse_add(net: sock_net(sk: newsk), val: `1`);
2487	} else {
2488	/ Kernel sockets are not elevating the struct net refcount.*
2489	* Instead, use a tracker to more easily detect if a layer
2490	* is not properly dismantling its kernel sockets at netns
2491	* destroy time.
2492	*/
2493	net_passive_inc(net: sock_net(sk: newsk));
2494	__netns_tracker_alloc(net: sock_net(sk: newsk), tracker: &newsk->ns_tracker,
2495	refcounted: false, gfp: priority);
2496	}
2497	sk_node_init(node: &newsk->sk_node);
2498	sock_lock_init(sk: newsk);
2499	bh_lock_sock(newsk);
2500	newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
2501	newsk->sk_backlog.len = `0`;
2502
2503	atomic_set(v: &newsk->sk_rmem_alloc, i: `0`);
2504
2505	/ sk_wmem_alloc set to one (see sk_free() and sock_wfree()) /
2506	refcount_set(r: &newsk->sk_wmem_alloc, n: `1`);
2507
2508	atomic_set(v: &newsk->sk_omem_alloc, i: `0`);
2509	sk_init_common(sk: newsk);
2510
2511	newsk->sk_dst_cache = NULL;
2512	newsk->sk_dst_pending_confirm = `0`;
2513	newsk->sk_wmem_queued = `0`;
2514	newsk->sk_forward_alloc = `0`;
2515	newsk->sk_reserved_mem = `0`;
2516	atomic_set(v: &newsk->sk_drops, i: `0`);
2517	newsk->sk_send_head = NULL;
2518	newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
2519	atomic_set(v: &newsk->sk_zckey, i: `0`);
2520
2521	sock_reset_flag(sk: newsk, flag: SOCK_DONE);
2522
2523	/ sk->sk_memcg will be populated at accept() time /
2524	newsk->sk_memcg = NULL;
2525
2526	cgroup_sk_clone(skcd: &newsk->sk_cgrp_data);
2527
2528	rcu_read_lock();
2529	filter = rcu_dereference(sk->sk_filter);
2530	if (filter != NULL)
2531	/ though it's an empty new sock, the charging may fail*
2532	* if sysctl_optmem_max was changed between creation of
2533	* original socket and cloning
2534	*/
2535	is_charged = sk_filter_charge(sk: newsk, fp: filter);
2536	RCU_INIT_POINTER(newsk->sk_filter, filter);
2537	rcu_read_unlock();
2538
2539	if (unlikely(!is_charged \|\| xfrm_sk_clone_policy(newsk, sk))) {
2540	/ We need to make sure that we don't uncharge the new*
2541	* socket if we couldn't charge it in the first place
2542	* as otherwise we uncharge the parent's filter.
2543	*/
2544	if (!is_charged)
2545	RCU_INIT_POINTER(newsk->sk_filter, NULL);
2546
2547	goto free;
2548	}
2549
2550	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2551
2552	if (bpf_sk_storage_clone(sk, newsk))
2553	goto free;
2554
2555	/ Clear sk_user_data if parent had the pointer tagged*
2556	* as not suitable for copying when cloning.
2557	*/
2558	if (sk_user_data_is_nocopy(sk: newsk))
2559	newsk->sk_user_data = NULL;
2560
2561	newsk->sk_err = `0`;
2562	newsk->sk_err_soft = `0`;
2563	newsk->sk_priority = `0`;
2564	newsk->sk_incoming_cpu = raw_smp_processor_id();
2565
2566	/ Before updating sk_refcnt, we must commit prior changes to memory*
2567	* (Documentation/RCU/rculist_nulls.rst for details)
2568	*/
2569	smp_wmb();
2570	refcount_set(r: &newsk->sk_refcnt, n: `2`);
2571
2572	sk_set_socket(sk: newsk, NULL);
2573	sk_tx_queue_clear(sk: newsk);
2574	RCU_INIT_POINTER(newsk->sk_wq, NULL);
2575
2576	if (newsk->sk_prot->sockets_allocated)
2577	sk_sockets_allocated_inc(sk: newsk);
2578
2579	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2580	net_enable_timestamp();
2581	out:
2582	return newsk;
2583	free:
2584	/ It is still raw copy of parent, so invalidate*
2585	* destructor and make plain sk_free()
2586	*/
2587	newsk->sk_destruct = NULL;
2588	bh_unlock_sock(newsk);
2589	sk_free(newsk);
2590	newsk = NULL;
2591	goto out;
2592	}
2593	EXPORT_SYMBOL_GPL(sk_clone_lock);
2594
2595	static u32 sk_dst_gso_max_size(struct sock sk, struct* dst_entry *dst)
2596	{
2597	bool is_ipv6 = false;
2598	u32 max_size;
2599
2600	#if IS_ENABLED(CONFIG_IPV6)
2601	is_ipv6 = (sk->sk_family == AF_INET6 &&
2602	!ipv6_addr_v4mapped(a: &sk->sk_v6_rcv_saddr));
2603	#endif
2604	/ pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() /
2605	max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2606	READ_ONCE(dst->dev->gso_ipv4_max_size);
2607	if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2608	max_size = GSO_LEGACY_MAX_SIZE;
2609
2610	return max_size - (MAX_TCP_HEADER + `1`);
2611	}
2612
2613	void sk_setup_caps(struct sock sk, struct* dst_entry *dst)
2614	{
2615	u32 max_segs = `1`;
2616
2617	sk->sk_route_caps = dst->dev->features;
2618	if (sk_is_tcp(sk)) {
2619	struct inet_connection_sock *icsk = inet_csk(sk);
2620
2621	sk->sk_route_caps \|= NETIF_F_GSO;
2622	icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK);
2623	}
2624	if (sk->sk_route_caps & NETIF_F_GSO)
2625	sk->sk_route_caps \|= NETIF_F_GSO_SOFTWARE;
2626	if (unlikely(sk->sk_gso_disabled))
2627	sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2628	if (sk_can_gso(sk)) {
2629	if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2630	sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2631	} else {
2632	sk->sk_route_caps \|= NETIF_F_SG \| NETIF_F_HW_CSUM;
2633	sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
2634	/ pairs with the WRITE_ONCE() in netif_set_gso_max_segs() /
2635	max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), `1`);
2636	}
2637	}
2638	sk->sk_gso_max_segs = max_segs;
2639	sk_dst_set(sk, dst);
2640	}
2641	EXPORT_SYMBOL_GPL(sk_setup_caps);
2642
2643	/*
2644	* Simple resource managers for sockets.
2645	*/
2646
2647
2648	/*
2649	* Write buffer destructor automatically called from kfree_skb.
2650	*/
2651	void sock_wfree(struct sk_buff *skb)
2652	{
2653	struct sock *sk = skb->sk;
2654	unsigned int len = skb->truesize;
2655	bool free;
2656
2657	if (!sock_flag(sk, flag: SOCK_USE_WRITE_QUEUE)) {
2658	if (sock_flag(sk, flag: SOCK_RCU_FREE) &&
2659	sk->sk_write_space == sock_def_write_space) {
2660	rcu_read_lock();
2661	free = refcount_sub_and_test(i: len, r: &sk->sk_wmem_alloc);
2662	sock_def_write_space_wfree(sk);
2663	rcu_read_unlock();
2664	if (unlikely(free))
2665	__sk_free(sk);
2666	return;
2667	}
2668
2669	/*
2670	* Keep a reference on sk_wmem_alloc, this will be released
2671	* after sk_write_space() call
2672	*/
2673	WARN_ON(refcount_sub_and_test(len - `1`, &sk->sk_wmem_alloc));
2674	sk->sk_write_space(sk);
2675	len = `1`;
2676	}
2677	/*
2678	* if sk_wmem_alloc reaches 0, we must finish what sk_free()
2679	* could not do because of in-flight packets
2680	*/
2681	if (refcount_sub_and_test(i: len, r: &sk->sk_wmem_alloc))
2682	__sk_free(sk);
2683	}
2684	EXPORT_SYMBOL(sock_wfree);
2685
2686	/ This variant of sock_wfree() is used by TCP,*
2687	* since it sets SOCK_USE_WRITE_QUEUE.
2688	*/
2689	void __sock_wfree(struct sk_buff *skb)
2690	{
2691	struct sock *sk = skb->sk;
2692
2693	if (refcount_sub_and_test(i: skb->truesize, r: &sk->sk_wmem_alloc))
2694	__sk_free(sk);
2695	}
2696
2697	void skb_set_owner_w(struct sk_buff skb, struct* sock *sk)
2698	{
2699	skb_orphan(skb);
2700	#ifdef CONFIG_INET
2701	if (unlikely(!sk_fullsock(sk)))
2702	return skb_set_owner_edemux(skb, sk);
2703	#endif
2704	skb->sk = sk;
2705	skb->destructor = sock_wfree;
2706	skb_set_hash_from_sk(skb, sk);
2707	/*
2708	* We used to take a refcount on sk, but following operation
2709	* is enough to guarantee sk_free() won't free this sock until
2710	* all in-flight packets are completed
2711	*/
2712	refcount_add(i: skb->truesize, r: &sk->sk_wmem_alloc);
2713	}
2714	EXPORT_SYMBOL(skb_set_owner_w);
2715
2716	static bool can_skb_orphan_partial(const struct sk_buff *skb)
2717	{
2718	/ Drivers depend on in-order delivery for crypto offload,*
2719	* partial orphan breaks out-of-order-OK logic.
2720	*/
2721	if (skb_is_decrypted(skb))
2722	return false;
2723
2724	return (skb->destructor == sock_wfree \|\|
2725	(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2726	}
2727
2728	/ This helper is used by netem, as it can hold packets in its*
2729	* delay queue. We want to allow the owner socket to send more
2730	* packets, as if they were already TX completed by a typical driver.
2731	* But we also want to keep skb->sk set because some packet schedulers
2732	* rely on it (sch_fq for example).
2733	*/
2734	void skb_orphan_partial(struct sk_buff *skb)
2735	{
2736	if (skb_is_tcp_pure_ack(skb))
2737	return;
2738
2739	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, sk: skb->sk))
2740	return;
2741
2742	skb_orphan(skb);
2743	}
2744	EXPORT_SYMBOL(skb_orphan_partial);
2745
2746	/*
2747	* Read buffer destructor automatically called from kfree_skb.
2748	*/
2749	void sock_rfree(struct sk_buff *skb)
2750	{
2751	struct sock *sk = skb->sk;
2752	unsigned int len = skb->truesize;
2753
2754	atomic_sub(i: len, v: &sk->sk_rmem_alloc);
2755	sk_mem_uncharge(sk, size: len);
2756	}
2757	EXPORT_SYMBOL(sock_rfree);
2758
2759	/*
2760	* Buffer destructor for skbs that are not used directly in read or write
2761	* path, e.g. for error handler skbs. Automatically called from kfree_skb.
2762	*/
2763	void sock_efree(struct sk_buff *skb)
2764	{
2765	sock_put(sk: skb->sk);
2766	}
2767	EXPORT_SYMBOL(sock_efree);
2768
2769	/ Buffer destructor for prefetch/receive path where reference count may*
2770	* not be held, e.g. for listen sockets.
2771	*/
2772	#ifdef CONFIG_INET
2773	void sock_pfree(struct sk_buff *skb)
2774	{
2775	struct sock *sk = skb->sk;
2776
2777	if (!sk_is_refcounted(sk))
2778	return;
2779
2780	if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
2781	inet_reqsk(sk)->rsk_listener = NULL;
2782	reqsk_free(req: inet_reqsk(sk));
2783	return;
2784	}
2785
2786	sock_gen_put(sk);
2787	}
2788	EXPORT_SYMBOL(sock_pfree);
2789	#endif /* CONFIG_INET */
2790
2791	kuid_t sock_i_uid(struct sock *sk)
2792	{
2793	kuid_t uid;
2794
2795	read_lock_bh(&sk->sk_callback_lock);
2796	uid = sk->sk_socket ? SOCK_INODE(socket: sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2797	read_unlock_bh(&sk->sk_callback_lock);
2798	return uid;
2799	}
2800	EXPORT_SYMBOL(sock_i_uid);
2801
2802	unsigned long __sock_i_ino(struct sock *sk)
2803	{
2804	unsigned long ino;
2805
2806	read_lock(&sk->sk_callback_lock);
2807	ino = sk->sk_socket ? SOCK_INODE(socket: sk->sk_socket)->i_ino : `0`;
2808	read_unlock(&sk->sk_callback_lock);
2809	return ino;
2810	}
2811	EXPORT_SYMBOL(__sock_i_ino);
2812
2813	unsigned long sock_i_ino(struct sock *sk)
2814	{
2815	unsigned long ino;
2816
2817	local_bh_disable();
2818	ino = __sock_i_ino(sk);
2819	local_bh_enable();
2820	return ino;
2821	}
2822	EXPORT_SYMBOL(sock_i_ino);
2823
2824	/*
2825	* Allocate a skb from the socket's send buffer.
2826	*/
2827	struct sk_buff sock_wmalloc(struct* sock sk, unsigned* long size, int force,
2828	gfp_t priority)
2829	{
2830	if (force \|\|
2831	refcount_read(r: &sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2832	struct sk_buff *skb = alloc_skb(size, priority);
2833
2834	if (skb) {
2835	skb_set_owner_w(skb, sk);
2836	return skb;
2837	}
2838	}
2839	return NULL;
2840	}
2841	EXPORT_SYMBOL(sock_wmalloc);
2842
2843	static void sock_ofree(struct sk_buff *skb)
2844	{
2845	struct sock *sk = skb->sk;
2846
2847	atomic_sub(i: skb->truesize, v: &sk->sk_omem_alloc);
2848	}
2849
2850	struct sk_buff sock_omalloc(struct* sock sk, unsigned* long size,
2851	gfp_t priority)
2852	{
2853	struct sk_buff *skb;
2854
2855	/ small safe race: SKB_TRUESIZE may differ from final skb->truesize /
2856	if (atomic_read(v: &sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2857	READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
2858	return NULL;
2859
2860	skb = alloc_skb(size, priority);
2861	if (!skb)
2862	return NULL;
2863
2864	atomic_add(i: skb->truesize, v: &sk->sk_omem_alloc);
2865	skb->sk = sk;
2866	skb->destructor = sock_ofree;
2867	return skb;
2868	}
2869
2870	/*
2871	* Allocate a memory block from the socket's option memory buffer.
2872	*/
2873	void sock_kmalloc(struct* sock sk, int* size, gfp_t priority)
2874	{
2875	int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
2876
2877	if ((unsigned int)size <= optmem_max &&
2878	atomic_read(v: &sk->sk_omem_alloc) + size < optmem_max) {
2879	void *mem;
2880	/ First do the add, to avoid the race if kmalloc*
2881	* might sleep.
2882	*/
2883	atomic_add(i: size, v: &sk->sk_omem_alloc);
2884	mem = kmalloc(size, priority);
2885	if (mem)
2886	return mem;
2887	atomic_sub(i: size, v: &sk->sk_omem_alloc);
2888	}
2889	return NULL;
2890	}
2891	EXPORT_SYMBOL(sock_kmalloc);
2892
2893	/*
2894	* Duplicate the input "src" memory block using the socket's
2895	* option memory buffer.
2896	*/
2897	void sock_kmemdup(struct* sock sk, const* void *src,
2898	int size, gfp_t priority)
2899	{
2900	void *mem;
2901
2902	mem = sock_kmalloc(sk, size, priority);
2903	if (mem)
2904	memcpy(mem, src, size);
2905	return mem;
2906	}
2907	EXPORT_SYMBOL(sock_kmemdup);
2908
2909	/ Free an option memory block. Note, we actually want the inline*
2910	* here as this allows gcc to detect the nullify and fold away the
2911	* condition entirely.
2912	*/
2913	static inline void __sock_kfree_s(struct sock sk, void* mem, int* size,
2914	const bool nullify)
2915	{
2916	if (WARN_ON_ONCE(!mem))
2917	return;
2918	if (nullify)
2919	kfree_sensitive(objp: mem);
2920	else
2921	kfree(objp: mem);
2922	atomic_sub(i: size, v: &sk->sk_omem_alloc);
2923	}
2924
2925	void sock_kfree_s(struct sock sk, void* mem, int* size)
2926	{
2927	__sock_kfree_s(sk, mem, size, nullify: false);
2928	}
2929	EXPORT_SYMBOL(sock_kfree_s);
2930
2931	void sock_kzfree_s(struct sock sk, void* mem, int* size)
2932	{
2933	__sock_kfree_s(sk, mem, size, nullify: true);
2934	}
2935	EXPORT_SYMBOL(sock_kzfree_s);
2936
2937	/ It is almost wait_for_tcp_memory minus release_sock/lock_sock.*
2938	I think, these locks should be removed for datagram sockets.
2939	*/
2940	static long sock_wait_for_wmem(struct sock sk, long* timeo)
2941	{
2942	DEFINE_WAIT(wait);
2943
2944	sk_clear_bit(nr: SOCKWQ_ASYNC_NOSPACE, sk);
2945	for (;;) {
2946	if (!timeo)
2947	break;
2948	if (signal_pending(current))
2949	break;
2950	set_bit(nr: SOCK_NOSPACE, addr: &sk->sk_socket->flags);
2951	prepare_to_wait(wq_head: sk_sleep(sk), wq_entry: &wait, TASK_INTERRUPTIBLE);
2952	if (refcount_read(r: &sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2953	break;
2954	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2955	break;
2956	if (READ_ONCE(sk->sk_err))
2957	break;
2958	timeo = schedule_timeout(timeout: timeo);
2959	}
2960	finish_wait(wq_head: sk_sleep(sk), wq_entry: &wait);
2961	return timeo;
2962	}
2963
2964
2965	/*
2966	* Generic send/receive buffer handlers
2967	*/
2968
2969	struct sk_buff sock_alloc_send_pskb(struct* sock sk, unsigned* long header_len,
2970	unsigned long data_len, int noblock,
2971	int errcode, int* max_page_order)
2972	{
2973	struct sk_buff *skb;
2974	long timeo;
2975	int err;
2976
2977	timeo = sock_sndtimeo(sk, noblock);
2978	for (;;) {
2979	err = sock_error(sk);
2980	if (err != `0`)
2981	goto failure;
2982
2983	err = -EPIPE;
2984	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2985	goto failure;
2986
2987	if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2988	break;
2989
2990	sk_set_bit(nr: SOCKWQ_ASYNC_NOSPACE, sk);
2991	set_bit(nr: SOCK_NOSPACE, addr: &sk->sk_socket->flags);
2992	err = -EAGAIN;
2993	if (!timeo)
2994	goto failure;
2995	if (signal_pending(current))
2996	goto interrupted;
2997	timeo = sock_wait_for_wmem(sk, timeo);
2998	}
2999	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
3000	errcode, gfp_mask: sk->sk_allocation);
3001	if (skb)
3002	skb_set_owner_w(skb, sk);
3003	return skb;
3004
3005	interrupted:
3006	err = sock_intr_errno(timeo);
3007	failure:
3008	*errcode = err;
3009	return NULL;
3010	}
3011	EXPORT_SYMBOL(sock_alloc_send_pskb);
3012
3013	int __sock_cmsg_send(struct sock sk, struct* cmsghdr *cmsg,
3014	struct sockcm_cookie *sockc)
3015	{
3016	u32 tsflags;
3017
3018	BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (`1` << `31`));
3019
3020	switch (cmsg->cmsg_type) {
3021	case SO_MARK:
3022	if (!ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_RAW) &&
3023	!ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_ADMIN))
3024	return -EPERM;
3025	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
3026	return -EINVAL;
3027	sockc->mark = (u32 )CMSG_DATA(cmsg);
3028	break;
3029	case SO_TIMESTAMPING_OLD:
3030	case SO_TIMESTAMPING_NEW:
3031	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
3032	return -EINVAL;
3033
3034	tsflags = (u32 )CMSG_DATA(cmsg);
3035	if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
3036	return -EINVAL;
3037
3038	sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
3039	sockc->tsflags \|= tsflags;
3040	break;
3041	case SCM_TXTIME:
3042	if (!sock_flag(sk, flag: SOCK_TXTIME))
3043	return -EINVAL;
3044	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
3045	return -EINVAL;
3046	sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
3047	break;
3048	case SCM_TS_OPT_ID:
3049	if (sk_is_tcp(sk))
3050	return -EINVAL;
3051	tsflags = READ_ONCE(sk->sk_tsflags);
3052	if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
3053	return -EINVAL;
3054	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
3055	return -EINVAL;
3056	sockc->ts_opt_id = (u32 )CMSG_DATA(cmsg);
3057	sockc->tsflags \|= SOCKCM_FLAG_TS_OPT_ID;
3058	break;
3059	/ SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. /
3060	case SCM_RIGHTS:
3061	case SCM_CREDENTIALS:
3062	break;
3063	case SO_PRIORITY:
3064	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
3065	return -EINVAL;
3066	if (!sk_set_prio_allowed(sk, val: (u32 )CMSG_DATA(cmsg)))
3067	return -EPERM;
3068	sockc->priority = (u32 )CMSG_DATA(cmsg);
3069	break;
3070	case SCM_DEVMEM_DMABUF:
3071	if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
3072	return -EINVAL;
3073	sockc->dmabuf_id = (u32 )CMSG_DATA(cmsg);
3074	break;
3075	default:
3076	return -EINVAL;
3077	}
3078	return `0`;
3079	}
3080	EXPORT_SYMBOL(__sock_cmsg_send);
3081
3082	int sock_cmsg_send(struct sock sk, struct* msghdr *msg,
3083	struct sockcm_cookie *sockc)
3084	{
3085	struct cmsghdr *cmsg;
3086	int ret;
3087
3088	for_each_cmsghdr(cmsg, msg) {
3089	if (!CMSG_OK(msg, cmsg))
3090	return -EINVAL;
3091	if (cmsg->cmsg_level != SOL_SOCKET)
3092	continue;
3093	ret = __sock_cmsg_send(sk, cmsg, sockc);
3094	if (ret)
3095	return ret;
3096	}
3097	return `0`;
3098	}
3099	EXPORT_SYMBOL(sock_cmsg_send);
3100
3101	static void sk_enter_memory_pressure(struct sock *sk)
3102	{
3103	if (!sk->sk_prot->enter_memory_pressure)
3104	return;
3105
3106	sk->sk_prot->enter_memory_pressure(sk);
3107	}
3108
3109	static void sk_leave_memory_pressure(struct sock *sk)
3110	{
3111	if (sk->sk_prot->leave_memory_pressure) {
3112	INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
3113	tcp_leave_memory_pressure, sk);
3114	} else {
3115	unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
3116
3117	if (memory_pressure && READ_ONCE(*memory_pressure))
3118	WRITE_ONCE(*memory_pressure, `0`);
3119	}
3120	}
3121
3122	DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
3123
3124	/**
3125	* skb_page_frag_refill - check that a page_frag contains enough room
3126	* @sz: minimum size of the fragment we want to get
3127	* @pfrag: pointer to page_frag
3128	* @gfp: priority for memory allocation
3129	*
3130	* Note: While this allocator tries to use high order pages, there is
3131	* no guarantee that allocations succeed. Therefore, @sz MUST be
3132	* less or equal than PAGE_SIZE.
3133	*/
3134	bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
3135	{
3136	if (pfrag->page) {
3137	if (page_ref_count(page: pfrag->page) == `1`) {
3138	pfrag->offset = `0`;
3139	return true;
3140	}
3141	if (pfrag->offset + sz <= pfrag->size)
3142	return true;
3143	put_page(page: pfrag->page);
3144	}
3145
3146	pfrag->offset = `0`;
3147	if (SKB_FRAG_PAGE_ORDER &&
3148	!static_branch_unlikely(&net_high_order_alloc_disable_key)) {
3149	/ Avoid direct reclaim but allow kswapd to wake /
3150	pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) \|
3151	__GFP_COMP \| __GFP_NOWARN \|
3152	__GFP_NORETRY,
3153	SKB_FRAG_PAGE_ORDER);
3154	if (likely(pfrag->page)) {
3155	pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
3156	return true;
3157	}
3158	}
3159	pfrag->page = alloc_page(gfp);
3160	if (likely(pfrag->page)) {
3161	pfrag->size = PAGE_SIZE;
3162	return true;
3163	}
3164	return false;
3165	}
3166	EXPORT_SYMBOL(skb_page_frag_refill);
3167
3168	bool sk_page_frag_refill(struct sock sk, struct* page_frag *pfrag)
3169	{
3170	if (likely(skb_page_frag_refill(`32U`, pfrag, sk->sk_allocation)))
3171	return true;
3172
3173	sk_enter_memory_pressure(sk);
3174	sk_stream_moderate_sndbuf(sk);
3175	return false;
3176	}
3177	EXPORT_SYMBOL(sk_page_frag_refill);
3178
3179	void __lock_sock(struct sock *sk)
3180	__releases(&sk->sk_lock.slock)
3181	__acquires(&sk->sk_lock.slock)
3182	{
3183	DEFINE_WAIT(wait);
3184
3185	for (;;) {
3186	prepare_to_wait_exclusive(wq_head: &sk->sk_lock.wq, wq_entry: &wait,
3187	TASK_UNINTERRUPTIBLE);
3188	spin_unlock_bh(lock: &sk->sk_lock.slock);
3189	schedule();
3190	spin_lock_bh(lock: &sk->sk_lock.slock);
3191	if (!sock_owned_by_user(sk))
3192	break;
3193	}
3194	finish_wait(wq_head: &sk->sk_lock.wq, wq_entry: &wait);
3195	}
3196
3197	void __release_sock(struct sock *sk)
3198	__releases(&sk->sk_lock.slock)
3199	__acquires(&sk->sk_lock.slock)
3200	{
3201	struct sk_buff skb, next;
3202
3203	while ((skb = sk->sk_backlog.head) != NULL) {
3204	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
3205
3206	spin_unlock_bh(lock: &sk->sk_lock.slock);
3207
3208	do {
3209	next = skb->next;
3210	prefetch(next);
3211	DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
3212	skb_mark_not_on_list(skb);
3213	sk_backlog_rcv(sk, skb);
3214
3215	cond_resched();
3216
3217	skb = next;
3218	} while (skb != NULL);
3219
3220	spin_lock_bh(lock: &sk->sk_lock.slock);
3221	}
3222
3223	/*
3224	* Doing the zeroing here guarantee we can not loop forever
3225	* while a wild producer attempts to flood us.
3226	*/
3227	sk->sk_backlog.len = `0`;
3228	}
3229
3230	void __sk_flush_backlog(struct sock *sk)
3231	{
3232	spin_lock_bh(lock: &sk->sk_lock.slock);
3233	__release_sock(sk);
3234
3235	if (sk->sk_prot->release_cb)
3236	INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3237	tcp_release_cb, sk);
3238
3239	spin_unlock_bh(lock: &sk->sk_lock.slock);
3240	}
3241	EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3242
3243	/**
3244	* sk_wait_data - wait for data to arrive at sk_receive_queue
3245	* @sk: sock to wait on
3246	* @timeo: for how long
3247	* @skb: last skb seen on sk_receive_queue
3248	*
3249	* Now socket state including sk->sk_err is changed only under lock,
3250	* hence we may omit checks after joining wait queue.
3251	* We check receive queue before schedule() only as optimization;
3252	* it is very likely that release_sock() added new data.
3253	*/
3254	int sk_wait_data(struct sock sk, long* timeo, const* struct sk_buff *skb)
3255	{
3256	DEFINE_WAIT_FUNC(wait, woken_wake_function);
3257	int rc;
3258
3259	add_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait);
3260	sk_set_bit(nr: SOCKWQ_ASYNC_WAITDATA, sk);
3261	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
3262	sk_clear_bit(nr: SOCKWQ_ASYNC_WAITDATA, sk);
3263	remove_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait);
3264	return rc;
3265	}
3266	EXPORT_SYMBOL(sk_wait_data);
3267
3268	/**
3269	* __sk_mem_raise_allocated - increase memory_allocated
3270	* @sk: socket
3271	* @size: memory size to allocate
3272	* @amt: pages to allocate
3273	* @kind: allocation type
3274	*
3275	* Similar to __sk_mem_schedule(), but does not update sk_forward_alloc.
3276	*
3277	* Unlike the globally shared limits among the sockets under same protocol,
3278	* consuming the budget of a memcg won't have direct effect on other ones.
3279	* So be optimistic about memcg's tolerance, and leave the callers to decide
3280	* whether or not to raise allocated through sk_under_memory_pressure() or
3281	* its variants.
3282	*/
3283	int __sk_mem_raise_allocated(struct sock sk, int* size, int amt, int kind)
3284	{
3285	struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
3286	struct proto *prot = sk->sk_prot;
3287	bool charged = true;
3288	long allocated;
3289
3290	sk_memory_allocated_add(sk, val: amt);
3291	allocated = sk_memory_allocated(sk);
3292
3293	if (memcg) {
3294	charged = mem_cgroup_charge_skmem(memcg, nr_pages: amt, gfp_mask: gfp_memcg_charge());
3295	if (!charged)
3296	goto suppress_allocation;
3297	}
3298
3299	/ Under limit. /
3300	if (allocated <= sk_prot_mem_limits(sk, index: `0`)) {
3301	sk_leave_memory_pressure(sk);
3302	return `1`;
3303	}
3304
3305	/ Under pressure. /
3306	if (allocated > sk_prot_mem_limits(sk, index: `1`))
3307	sk_enter_memory_pressure(sk);
3308
3309	/ Over hard limit. /
3310	if (allocated > sk_prot_mem_limits(sk, index: `2`))
3311	goto suppress_allocation;
3312
3313	/ Guarantee minimum buffer size under pressure (either global*
3314	* or memcg) to make sure features described in RFC 7323 (TCP
3315	* Extensions for High Performance) work properly.
3316	*
3317	* This rule does NOT stand when exceeds global or memcg's hard
3318	* limit, or else a DoS attack can be taken place by spawning
3319	* lots of sockets whose usage are under minimum buffer size.
3320	*/
3321	if (kind == SK_MEM_RECV) {
3322	if (atomic_read(v: &sk->sk_rmem_alloc) < sk_get_rmem0(sk, proto: prot))
3323	return `1`;
3324
3325	} else { / SK_MEM_SEND /
3326	int wmem0 = sk_get_wmem0(sk, proto: prot);
3327
3328	if (sk->sk_type == SOCK_STREAM) {
3329	if (sk->sk_wmem_queued < wmem0)
3330	return `1`;
3331	} else if (refcount_read(r: &sk->sk_wmem_alloc) < wmem0) {
3332	return `1`;
3333	}
3334	}
3335
3336	if (sk_has_memory_pressure(sk)) {
3337	u64 alloc;
3338
3339	/ The following 'average' heuristic is within the*
3340	* scope of global accounting, so it only makes
3341	* sense for global memory pressure.
3342	*/
3343	if (!sk_under_global_memory_pressure(sk))
3344	return `1`;
3345
3346	/ Try to be fair among all the sockets under global*
3347	* pressure by allowing the ones that below average
3348	* usage to raise.
3349	*/
3350	alloc = sk_sockets_allocated_read_positive(sk);
3351	if (sk_prot_mem_limits(sk, index: `2`) > alloc *
3352	sk_mem_pages(amt: sk->sk_wmem_queued +
3353	atomic_read(v: &sk->sk_rmem_alloc) +
3354	sk->sk_forward_alloc))
3355	return `1`;
3356	}
3357
3358	suppress_allocation:
3359
3360	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
3361	sk_stream_moderate_sndbuf(sk);
3362
3363	/ Fail only if socket is _under_ its sndbuf.*
3364	* In this case we cannot block, so that we have to fail.
3365	*/
3366	if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
3367	/ Force charge with __GFP_NOFAIL /
3368	if (memcg && !charged) {
3369	mem_cgroup_charge_skmem(memcg, nr_pages: amt,
3370	gfp_mask: gfp_memcg_charge() \| __GFP_NOFAIL);
3371	}
3372	return `1`;
3373	}
3374	}
3375
3376	if (kind == SK_MEM_SEND \|\| (kind == SK_MEM_RECV && charged))
3377	trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
3378
3379	sk_memory_allocated_sub(sk, val: amt);
3380
3381	if (memcg && charged)
3382	mem_cgroup_uncharge_skmem(memcg, nr_pages: amt);
3383
3384	return `0`;
3385	}
3386
3387	/**
3388	* __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3389	* @sk: socket
3390	* @size: memory size to allocate
3391	* @kind: allocation type
3392	*
3393	* If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3394	* rmem allocation. This function assumes that protocols which have
3395	* memory_pressure use sk_wmem_queued as write buffer accounting.
3396	*/
3397	int __sk_mem_schedule(struct sock sk, int* size, int kind)
3398	{
3399	int ret, amt = sk_mem_pages(amt: size);
3400
3401	sk_forward_alloc_add(sk, val: amt << PAGE_SHIFT);
3402	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3403	if (!ret)
3404	sk_forward_alloc_add(sk, val: -(amt << PAGE_SHIFT));
3405	return ret;
3406	}
3407	EXPORT_SYMBOL(__sk_mem_schedule);
3408
3409	/**
3410	* __sk_mem_reduce_allocated - reclaim memory_allocated
3411	* @sk: socket
3412	* @amount: number of quanta
3413	*
3414	* Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
3415	*/
3416	void __sk_mem_reduce_allocated(struct sock sk, int* amount)
3417	{
3418	sk_memory_allocated_sub(sk, val: amount);
3419
3420	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3421	mem_cgroup_uncharge_skmem(memcg: sk->sk_memcg, nr_pages: amount);
3422
3423	if (sk_under_global_memory_pressure(sk) &&
3424	(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, index: `0`)))
3425	sk_leave_memory_pressure(sk);
3426	}
3427
3428	/**
3429	* __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3430	* @sk: socket
3431	* @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3432	*/
3433	void __sk_mem_reclaim(struct sock sk, int* amount)
3434	{
3435	amount >>= PAGE_SHIFT;
3436	sk_forward_alloc_add(sk, val: -(amount << PAGE_SHIFT));
3437	__sk_mem_reduce_allocated(sk, amount);
3438	}
3439	EXPORT_SYMBOL(__sk_mem_reclaim);
3440
3441	int sk_set_peek_off(struct sock sk, int* val)
3442	{
3443	WRITE_ONCE(sk->sk_peek_off, val);
3444	return `0`;
3445	}
3446	EXPORT_SYMBOL_GPL(sk_set_peek_off);
3447
3448	/*
3449	* Set of default routines for initialising struct proto_ops when
3450	* the protocol does not support a particular function. In certain
3451	* cases where it makes no sense for a protocol to have a "do nothing"
3452	* function, some default processing is provided.
3453	*/
3454
3455	int sock_no_bind(struct socket sock, struct* sockaddr saddr, int* len)
3456	{
3457	return -EOPNOTSUPP;
3458	}
3459	EXPORT_SYMBOL(sock_no_bind);
3460
3461	int sock_no_connect(struct socket sock, struct* sockaddr *saddr,
3462	int len, int flags)
3463	{
3464	return -EOPNOTSUPP;
3465	}
3466	EXPORT_SYMBOL(sock_no_connect);
3467
3468	int sock_no_socketpair(struct socket sock1, struct* socket *sock2)
3469	{
3470	return -EOPNOTSUPP;
3471	}
3472	EXPORT_SYMBOL(sock_no_socketpair);
3473
3474	int sock_no_accept(struct socket sock, struct* socket *newsock,
3475	struct proto_accept_arg *arg)
3476	{
3477	return -EOPNOTSUPP;
3478	}
3479	EXPORT_SYMBOL(sock_no_accept);
3480
3481	int sock_no_getname(struct socket sock, struct* sockaddr *saddr,
3482	int peer)
3483	{
3484	return -EOPNOTSUPP;
3485	}
3486	EXPORT_SYMBOL(sock_no_getname);
3487
3488	int sock_no_ioctl(struct socket sock, unsigned* int cmd, unsigned long arg)
3489	{
3490	return -EOPNOTSUPP;
3491	}
3492	EXPORT_SYMBOL(sock_no_ioctl);
3493
3494	int sock_no_listen(struct socket sock, int* backlog)
3495	{
3496	return -EOPNOTSUPP;
3497	}
3498	EXPORT_SYMBOL(sock_no_listen);
3499
3500	int sock_no_shutdown(struct socket sock, int* how)
3501	{
3502	return -EOPNOTSUPP;
3503	}
3504	EXPORT_SYMBOL(sock_no_shutdown);
3505
3506	int sock_no_sendmsg(struct socket sock, struct* msghdr *m, size_t len)
3507	{
3508	return -EOPNOTSUPP;
3509	}
3510	EXPORT_SYMBOL(sock_no_sendmsg);
3511
3512	int sock_no_sendmsg_locked(struct sock sk, struct* msghdr *m, size_t len)
3513	{
3514	return -EOPNOTSUPP;
3515	}
3516	EXPORT_SYMBOL(sock_no_sendmsg_locked);
3517
3518	int sock_no_recvmsg(struct socket sock, struct* msghdr *m, size_t len,
3519	int flags)
3520	{
3521	return -EOPNOTSUPP;
3522	}
3523	EXPORT_SYMBOL(sock_no_recvmsg);
3524
3525	int sock_no_mmap(struct file file, struct* socket sock, struct* vm_area_struct *vma)
3526	{
3527	/ Mirror missing mmap method error code /
3528	return -ENODEV;
3529	}
3530	EXPORT_SYMBOL(sock_no_mmap);
3531
3532	/*
3533	* When a file is received (via SCM_RIGHTS, etc), we must bump the
3534	* various sock-based usage counts.
3535	*/
3536	void __receive_sock(struct file *file)
3537	{
3538	struct socket *sock;
3539
3540	sock = sock_from_file(file);
3541	if (sock) {
3542	sock_update_netprioidx(skcd: &sock->sk->sk_cgrp_data);
3543	sock_update_classid(skcd: &sock->sk->sk_cgrp_data);
3544	}
3545	}
3546
3547	/*
3548	* Default Socket Callbacks
3549	*/
3550
3551	static void sock_def_wakeup(struct sock *sk)
3552	{
3553	struct socket_wq *wq;
3554
3555	rcu_read_lock();
3556	wq = rcu_dereference(sk->sk_wq);
3557	if (skwq_has_sleeper(wq))
3558	wake_up_interruptible_all(&wq->wait);
3559	rcu_read_unlock();
3560	}
3561
3562	static void sock_def_error_report(struct sock *sk)
3563	{
3564	struct socket_wq *wq;
3565
3566	rcu_read_lock();
3567	wq = rcu_dereference(sk->sk_wq);
3568	if (skwq_has_sleeper(wq))
3569	wake_up_interruptible_poll(&wq->wait, EPOLLERR);
3570	sk_wake_async_rcu(sk, how: SOCK_WAKE_IO, POLL_ERR);
3571	rcu_read_unlock();
3572	}
3573
3574	void sock_def_readable(struct sock *sk)
3575	{
3576	struct socket_wq *wq;
3577
3578	trace_sk_data_ready(sk);
3579
3580	rcu_read_lock();
3581	wq = rcu_dereference(sk->sk_wq);
3582	if (skwq_has_sleeper(wq))
3583	wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN \| EPOLLPRI \|
3584	EPOLLRDNORM \| EPOLLRDBAND);
3585	sk_wake_async_rcu(sk, how: SOCK_WAKE_WAITD, POLL_IN);
3586	rcu_read_unlock();
3587	}
3588
3589	static void sock_def_write_space(struct sock *sk)
3590	{
3591	struct socket_wq *wq;
3592
3593	rcu_read_lock();
3594
3595	/ Do not wake up a writer until he can make "significant"*
3596	* progress. --DaveM
3597	*/
3598	if (sock_writeable(sk)) {
3599	wq = rcu_dereference(sk->sk_wq);
3600	if (skwq_has_sleeper(wq))
3601	wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT \|
3602	EPOLLWRNORM \| EPOLLWRBAND);
3603
3604	/ Should agree with poll, otherwise some programs break /
3605	sk_wake_async_rcu(sk, how: SOCK_WAKE_SPACE, POLL_OUT);
3606	}
3607
3608	rcu_read_unlock();
3609	}
3610
3611	/ An optimised version of sock_def_write_space(), should only be called*
3612	* for SOCK_RCU_FREE sockets under RCU read section and after putting
3613	* ->sk_wmem_alloc.
3614	*/
3615	static void sock_def_write_space_wfree(struct sock *sk)
3616	{
3617	/ Do not wake up a writer until he can make "significant"*
3618	* progress. --DaveM
3619	*/
3620	if (sock_writeable(sk)) {
3621	struct socket_wq *wq = rcu_dereference(sk->sk_wq);
3622
3623	/ rely on refcount_sub from sock_wfree() /
3624	smp_mb__after_atomic();
3625	if (wq && waitqueue_active(wq_head: &wq->wait))
3626	wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT \|
3627	EPOLLWRNORM \| EPOLLWRBAND);
3628
3629	/ Should agree with poll, otherwise some programs break /
3630	sk_wake_async_rcu(sk, how: SOCK_WAKE_SPACE, POLL_OUT);
3631	}
3632	}
3633
3634	static void sock_def_destruct(struct sock *sk)
3635	{
3636	}
3637
3638	void sk_send_sigurg(struct sock *sk)
3639	{
3640	if (sk->sk_socket && sk->sk_socket->file)
3641	if (send_sigurg(file: sk->sk_socket->file))
3642	sk_wake_async(sk, how: SOCK_WAKE_URG, POLL_PRI);
3643	}
3644	EXPORT_SYMBOL(sk_send_sigurg);
3645
3646	void sk_reset_timer(struct sock sk, struct* timer_list* timer,
3647	unsigned long expires)
3648	{
3649	if (!mod_timer(timer, expires))
3650	sock_hold(sk);
3651	}
3652	EXPORT_SYMBOL(sk_reset_timer);
3653
3654	void sk_stop_timer(struct sock sk, struct* timer_list* timer)
3655	{
3656	if (timer_delete(timer))
3657	__sock_put(sk);
3658	}
3659	EXPORT_SYMBOL(sk_stop_timer);
3660
3661	void sk_stop_timer_sync(struct sock sk, struct* timer_list *timer)
3662	{
3663	if (timer_delete_sync(timer))
3664	__sock_put(sk);
3665	}
3666	EXPORT_SYMBOL(sk_stop_timer_sync);
3667
3668	void sock_init_data_uid(struct socket sock, struct* sock *sk, kuid_t uid)
3669	{
3670	sk_init_common(sk);
3671	sk->sk_send_head = NULL;
3672
3673	timer_setup(&sk->sk_timer, NULL, `0`);
3674
3675	sk->sk_allocation = GFP_KERNEL;
3676	sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
3677	sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
3678	sk->sk_state = TCP_CLOSE;
3679	sk->sk_use_task_frag = true;
3680	sk_set_socket(sk, sock);
3681
3682	sock_set_flag(sk, flag: SOCK_ZAPPED);
3683
3684	if (sock) {
3685	sk->sk_type = sock->type;
3686	RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3687	sock->sk = sk;
3688	} else {
3689	RCU_INIT_POINTER(sk->sk_wq, NULL);
3690	}
3691	sk->sk_uid = uid;
3692
3693	sk->sk_state_change = sock_def_wakeup;
3694	sk->sk_data_ready = sock_def_readable;
3695	sk->sk_write_space = sock_def_write_space;
3696	sk->sk_error_report = sock_def_error_report;
3697	sk->sk_destruct = sock_def_destruct;
3698
3699	sk->sk_frag.page = NULL;
3700	sk->sk_frag.offset = `0`;
3701	sk->sk_peek_off = -`1`;
3702
3703	sk->sk_peer_pid = NULL;
3704	sk->sk_peer_cred = NULL;
3705	spin_lock_init(&sk->sk_peer_lock);
3706
3707	sk->sk_write_pending = `0`;
3708	sk->sk_rcvlowat = `1`;
3709	sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
3710	sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
3711
3712	sk->sk_stamp = SK_DEFAULT_STAMP;
3713	#if BITS_PER_LONG==32
3714	seqlock_init(&sk->sk_stamp_seq);
3715	#endif
3716	atomic_set(v: &sk->sk_zckey, i: `0`);
3717
3718	#ifdef CONFIG_NET_RX_BUSY_POLL
3719	sk->sk_napi_id = `0`;
3720	sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
3721	#endif
3722
3723	sk->sk_max_pacing_rate = ~`0UL`;
3724	sk->sk_pacing_rate = ~`0UL`;
3725	WRITE_ONCE(sk->sk_pacing_shift, `10`);
3726	sk->sk_incoming_cpu = -`1`;
3727
3728	sk_rx_queue_clear(sk);
3729	/*
3730	* Before updating sk_refcnt, we must commit prior changes to memory
3731	* (Documentation/RCU/rculist_nulls.rst for details)
3732	*/
3733	smp_wmb();
3734	refcount_set(r: &sk->sk_refcnt, n: `1`);
3735	atomic_set(v: &sk->sk_drops, i: `0`);
3736	}
3737	EXPORT_SYMBOL(sock_init_data_uid);
3738
3739	void sock_init_data(struct socket sock, struct* sock *sk)
3740	{
3741	kuid_t uid = sock ?
3742	SOCK_INODE(socket: sock)->i_uid :
3743	make_kuid(from: sock_net(sk)->user_ns, uid: `0`);
3744
3745	sock_init_data_uid(sock, sk, uid);
3746	}
3747	EXPORT_SYMBOL(sock_init_data);
3748
3749	void lock_sock_nested(struct sock sk, int* subclass)
3750	{
3751	/ The sk_lock has mutex_lock() semantics here. /
3752	mutex_acquire(&sk->sk_lock.dep_map, subclass, `0`, _RET_IP_);
3753
3754	might_sleep();
3755	spin_lock_bh(lock: &sk->sk_lock.slock);
3756	if (sock_owned_by_user_nocheck(sk))
3757	__lock_sock(sk);
3758	sk->sk_lock.owned = `1`;
3759	spin_unlock_bh(lock: &sk->sk_lock.slock);
3760	}
3761	EXPORT_SYMBOL(lock_sock_nested);
3762
3763	void release_sock(struct sock *sk)
3764	{
3765	spin_lock_bh(lock: &sk->sk_lock.slock);
3766	if (sk->sk_backlog.tail)
3767	__release_sock(sk);
3768
3769	if (sk->sk_prot->release_cb)
3770	INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3771	tcp_release_cb, sk);
3772
3773	sock_release_ownership(sk);
3774	if (waitqueue_active(wq_head: &sk->sk_lock.wq))
3775	wake_up(&sk->sk_lock.wq);
3776	spin_unlock_bh(lock: &sk->sk_lock.slock);
3777	}
3778	EXPORT_SYMBOL(release_sock);
3779
3780	bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3781	{
3782	might_sleep();
3783	spin_lock_bh(lock: &sk->sk_lock.slock);
3784
3785	if (!sock_owned_by_user_nocheck(sk)) {
3786	/*
3787	* Fast path return with bottom halves disabled and
3788	* sock::sk_lock.slock held.
3789	*
3790	* The 'mutex' is not contended and holding
3791	* sock::sk_lock.slock prevents all other lockers to
3792	* proceed so the corresponding unlock_sock_fast() can
3793	* avoid the slow path of release_sock() completely and
3794	* just release slock.
3795	*
3796	* From a semantical POV this is equivalent to 'acquiring'
3797	* the 'mutex', hence the corresponding lockdep
3798	* mutex_release() has to happen in the fast path of
3799	* unlock_sock_fast().
3800	*/
3801	return false;
3802	}
3803
3804	__lock_sock(sk);
3805	sk->sk_lock.owned = `1`;
3806	__acquire(&sk->sk_lock.slock);
3807	spin_unlock_bh(lock: &sk->sk_lock.slock);
3808	return true;
3809	}
3810	EXPORT_SYMBOL(__lock_sock_fast);
3811
3812	int sock_gettstamp(struct socket sock, void* __user *userstamp,
3813	bool timeval, bool time32)
3814	{
3815	struct sock *sk = sock->sk;
3816	struct timespec64 ts;
3817
3818	sock_enable_timestamp(sk, flag: SOCK_TIMESTAMP);
3819	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3820	if (ts.tv_sec == -`1`)
3821	return -ENOENT;
3822	if (ts.tv_sec == `0`) {
3823	ktime_t kt = ktime_get_real();
3824	sock_write_timestamp(sk, kt);
3825	ts = ktime_to_timespec64(kt);
3826	}
3827
3828	if (timeval)
3829	ts.tv_nsec /= `1000`;
3830
3831	#ifdef CONFIG_COMPAT_32BIT_TIME
3832	if (time32)
3833	return put_old_timespec32(&ts, userstamp);
3834	#endif
3835	#ifdef CONFIG_SPARC64
3836	/ beware of padding in sparc64 timeval /
3837	if (timeval && !in_compat_syscall()) {
3838	struct __kernel_old_timeval __user tv = {
3839	.tv_sec = ts.tv_sec,
3840	.tv_usec = ts.tv_nsec,
3841	};
3842	if (copy_to_user(userstamp, &tv, sizeof(tv)))
3843	return -EFAULT;
3844	return `0`;
3845	}
3846	#endif
3847	return put_timespec64(ts: &ts, uts: userstamp);
3848	}
3849	EXPORT_SYMBOL(sock_gettstamp);
3850
3851	void sock_enable_timestamp(struct sock sk, enum* sock_flags flag)
3852	{
3853	if (!sock_flag(sk, flag)) {
3854	unsigned long previous_flags = sk->sk_flags;
3855
3856	sock_set_flag(sk, flag);
3857	/*
3858	* we just set one of the two flags which require net
3859	* time stamping, but time stamping might have been on
3860	* already because of the other one
3861	*/
3862	if (sock_needs_netstamp(sk) &&
3863	!(previous_flags & SK_FLAGS_TIMESTAMP))
3864	net_enable_timestamp();
3865	}
3866	}
3867
3868	int sock_recv_errqueue(struct sock sk, struct* msghdr msg, int* len,
3869	int level, int type)
3870	{
3871	struct sock_exterr_skb *serr;
3872	struct sk_buff *skb;
3873	int copied, err;
3874
3875	err = -EAGAIN;
3876	skb = sock_dequeue_err_skb(sk);
3877	if (skb == NULL)
3878	goto out;
3879
3880	copied = skb->len;
3881	if (copied > len) {
3882	msg->msg_flags \|= MSG_TRUNC;
3883	copied = len;
3884	}
3885	err = skb_copy_datagram_msg(from: skb, offset: `0`, msg, size: copied);
3886	if (err)
3887	goto out_free_skb;
3888
3889	sock_recv_timestamp(msg, sk, skb);
3890
3891	serr = SKB_EXT_ERR(skb);
3892	put_cmsg(msg, level, type, len: sizeof(serr->ee), data: &serr->ee);
3893
3894	msg->msg_flags \|= MSG_ERRQUEUE;
3895	err = copied;
3896
3897	out_free_skb:
3898	kfree_skb(skb);
3899	out:
3900	return err;
3901	}
3902	EXPORT_SYMBOL(sock_recv_errqueue);
3903
3904	/*
3905	* Get a socket option on an socket.
3906	*
3907	* FIX: POSIX 1003.1g is very ambiguous here. It states that
3908	* asynchronous errors should be reported by getsockopt. We assume
3909	* this means if you specify SO_ERROR (otherwise what is the point of it).
3910	*/
3911	int sock_common_getsockopt(struct socket sock, int* level, int optname,
3912	char __user optval, int* __user *optlen)
3913	{
3914	struct sock *sk = sock->sk;
3915
3916	/ IPV6_ADDRFORM can change sk->sk_prot under us. /
3917	return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
3918	}
3919	EXPORT_SYMBOL(sock_common_getsockopt);
3920
3921	int sock_common_recvmsg(struct socket sock, struct* msghdr *msg, size_t size,
3922	int flags)
3923	{
3924	struct sock *sk = sock->sk;
3925	int addr_len = `0`;
3926	int err;
3927
3928	err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
3929	if (err >= `0`)
3930	msg->msg_namelen = addr_len;
3931	return err;
3932	}
3933	EXPORT_SYMBOL(sock_common_recvmsg);
3934
3935	/*
3936	* Set socket options on an inet socket.
3937	*/
3938	int sock_common_setsockopt(struct socket sock, int* level, int optname,
3939	sockptr_t optval, unsigned int optlen)
3940	{
3941	struct sock *sk = sock->sk;
3942
3943	/ IPV6_ADDRFORM can change sk->sk_prot under us. /
3944	return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
3945	}
3946	EXPORT_SYMBOL(sock_common_setsockopt);
3947
3948	void sk_common_release(struct sock *sk)
3949	{
3950	if (sk->sk_prot->destroy)
3951	sk->sk_prot->destroy(sk);
3952
3953	/*
3954	* Observation: when sk_common_release is called, processes have
3955	* no access to socket. But net still has.
3956	* Step one, detach it from networking:
3957	*
3958	* A. Remove from hash tables.
3959	*/
3960
3961	sk->sk_prot->unhash(sk);
3962
3963	/*
3964	* In this point socket cannot receive new packets, but it is possible
3965	* that some packets are in flight because some CPU runs receiver and
3966	* did hash table lookup before we unhashed socket. They will achieve
3967	* receive queue and will be purged by socket destructor.
3968	*
3969	* Also we still have packets pending on receive queue and probably,
3970	* our own packets waiting in device queues. sock_destroy will drain
3971	* receive queue, but transmitted packets will delay socket destruction
3972	* until the last reference will be released.
3973	*/
3974
3975	sock_orphan(sk);
3976
3977	xfrm_sk_free_policy(sk);
3978
3979	sock_put(sk);
3980	}
3981	EXPORT_SYMBOL(sk_common_release);
3982
3983	void sk_get_meminfo(const struct sock sk, u32 mem)
3984	{
3985	memset(mem, `0`, sizeof(mem) SK_MEMINFO_VARS);
3986
3987	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3988	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3989	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3990	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3991	mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc);
3992	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3993	mem[SK_MEMINFO_OPTMEM] = atomic_read(v: &sk->sk_omem_alloc);
3994	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3995	mem[SK_MEMINFO_DROPS] = atomic_read(v: &sk->sk_drops);
3996	}
3997
3998	#ifdef CONFIG_PROC_FS
3999	static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
4000
4001	int sock_prot_inuse_get(struct net net, struct* proto *prot)
4002	{
4003	int cpu, idx = prot->inuse_idx;
4004	int res = `0`;
4005
4006	for_each_possible_cpu(cpu)
4007	res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
4008
4009	return res >= `0` ? res : `0`;
4010	}
4011	EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
4012
4013	int sock_inuse_get(struct net *net)
4014	{
4015	int cpu, res = `0`;
4016
4017	for_each_possible_cpu(cpu)
4018	res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
4019
4020	return res;
4021	}
4022
4023	EXPORT_SYMBOL_GPL(sock_inuse_get);
4024
4025	static int __net_init sock_inuse_init_net(struct net *net)
4026	{
4027	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
4028	if (net->core.prot_inuse == NULL)
4029	return -ENOMEM;
4030	return `0`;
4031	}
4032
4033	static void __net_exit sock_inuse_exit_net(struct net *net)
4034	{
4035	free_percpu(pdata: net->core.prot_inuse);
4036	}
4037
4038	static struct pernet_operations net_inuse_ops = {
4039	.init = sock_inuse_init_net,
4040	.exit = sock_inuse_exit_net,
4041	};
4042
4043	static __init int net_inuse_init(void)
4044	{
4045	if (register_pernet_subsys(&net_inuse_ops))
4046	panic(fmt: "Cannot initialize net inuse counters");
4047
4048	return `0`;
4049	}
4050
4051	core_initcall(net_inuse_init);
4052
4053	static int assign_proto_idx(struct proto *prot)
4054	{
4055	prot->inuse_idx = find_first_zero_bit(addr: proto_inuse_idx, PROTO_INUSE_NR);
4056
4057	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) {
4058	pr_err("PROTO_INUSE_NR exhausted\n");
4059	return -ENOSPC;
4060	}
4061
4062	set_bit(nr: prot->inuse_idx, addr: proto_inuse_idx);
4063	return `0`;
4064	}
4065
4066	static void release_proto_idx(struct proto *prot)
4067	{
4068	if (prot->inuse_idx != PROTO_INUSE_NR)
4069	clear_bit(nr: prot->inuse_idx, addr: proto_inuse_idx);
4070	}
4071	#else
4072	static inline int assign_proto_idx(struct proto *prot)
4073	{
4074	return `0`;
4075	}
4076
4077	static inline void release_proto_idx(struct proto *prot)
4078	{
4079	}
4080
4081	#endif
4082
4083	static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
4084	{
4085	if (!twsk_prot)
4086	return;
4087	kfree(objp: twsk_prot->twsk_slab_name);
4088	twsk_prot->twsk_slab_name = NULL;
4089	kmem_cache_destroy(s: twsk_prot->twsk_slab);
4090	twsk_prot->twsk_slab = NULL;
4091	}
4092
4093	static int tw_prot_init(const struct proto *prot)
4094	{
4095	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
4096
4097	if (!twsk_prot)
4098	return `0`;
4099
4100	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, fmt: "tw_sock_%s",
4101	prot->name);
4102	if (!twsk_prot->twsk_slab_name)
4103	return -ENOMEM;
4104
4105	twsk_prot->twsk_slab =
4106	kmem_cache_create(twsk_prot->twsk_slab_name,
4107	twsk_prot->twsk_obj_size, `0`,
4108	SLAB_ACCOUNT \| prot->slab_flags,
4109	NULL);
4110	if (!twsk_prot->twsk_slab) {
4111	pr_crit("%s: Can't create timewait sock SLAB cache!\n",
4112	prot->name);
4113	return -ENOMEM;
4114	}
4115
4116	return `0`;
4117	}
4118
4119	static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
4120	{
4121	if (!rsk_prot)
4122	return;
4123	kfree(objp: rsk_prot->slab_name);
4124	rsk_prot->slab_name = NULL;
4125	kmem_cache_destroy(s: rsk_prot->slab);
4126	rsk_prot->slab = NULL;
4127	}
4128
4129	static int req_prot_init(const struct proto *prot)
4130	{
4131	struct request_sock_ops *rsk_prot = prot->rsk_prot;
4132
4133	if (!rsk_prot)
4134	return `0`;
4135
4136	rsk_prot->slab_name = kasprintf(GFP_KERNEL, fmt: "request_sock_%s",
4137	prot->name);
4138	if (!rsk_prot->slab_name)
4139	return -ENOMEM;
4140
4141	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
4142	rsk_prot->obj_size, `0`,
4143	SLAB_ACCOUNT \| prot->slab_flags,
4144	NULL);
4145
4146	if (!rsk_prot->slab) {
4147	pr_crit("%s: Can't create request sock SLAB cache!\n",
4148	prot->name);
4149	return -ENOMEM;
4150	}
4151	return `0`;
4152	}
4153
4154	int proto_register(struct proto prot, int* alloc_slab)
4155	{
4156	int ret = -ENOBUFS;
4157
4158	if (prot->memory_allocated && !prot->sysctl_mem) {
4159	pr_err("%s: missing sysctl_mem\n", prot->name);
4160	return -EINVAL;
4161	}
4162	if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
4163	pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
4164	return -EINVAL;
4165	}
4166	if (alloc_slab) {
4167	prot->slab = kmem_cache_create_usercopy(name: prot->name,
4168	size: prot->obj_size, align: `0`,
4169	SLAB_HWCACHE_ALIGN \| SLAB_ACCOUNT \|
4170	prot->slab_flags,
4171	useroffset: prot->useroffset, usersize: prot->usersize,
4172	NULL);
4173
4174	if (prot->slab == NULL) {
4175	pr_crit("%s: Can't create sock SLAB cache!\n",
4176	prot->name);
4177	goto out;
4178	}
4179
4180	if (req_prot_init(prot))
4181	goto out_free_request_sock_slab;
4182
4183	if (tw_prot_init(prot))
4184	goto out_free_timewait_sock_slab;
4185	}
4186
4187	mutex_lock(&proto_list_mutex);
4188	ret = assign_proto_idx(prot);
4189	if (ret) {
4190	mutex_unlock(lock: &proto_list_mutex);
4191	goto out_free_timewait_sock_slab;
4192	}
4193	list_add(new: &prot->node, head: &proto_list);
4194	mutex_unlock(lock: &proto_list_mutex);
4195	return ret;
4196
4197	out_free_timewait_sock_slab:
4198	if (alloc_slab)
4199	tw_prot_cleanup(twsk_prot: prot->twsk_prot);
4200	out_free_request_sock_slab:
4201	if (alloc_slab) {
4202	req_prot_cleanup(rsk_prot: prot->rsk_prot);
4203
4204	kmem_cache_destroy(s: prot->slab);
4205	prot->slab = NULL;
4206	}
4207	out:
4208	return ret;
4209	}
4210	EXPORT_SYMBOL(proto_register);
4211
4212	void proto_unregister(struct proto *prot)
4213	{
4214	mutex_lock(&proto_list_mutex);
4215	release_proto_idx(prot);
4216	list_del(entry: &prot->node);
4217	mutex_unlock(lock: &proto_list_mutex);
4218
4219	kmem_cache_destroy(s: prot->slab);
4220	prot->slab = NULL;
4221
4222	req_prot_cleanup(rsk_prot: prot->rsk_prot);
4223	tw_prot_cleanup(twsk_prot: prot->twsk_prot);
4224	}
4225	EXPORT_SYMBOL(proto_unregister);
4226
4227	int sock_load_diag_module(int family, int protocol)
4228	{
4229	if (!protocol) {
4230	if (!sock_is_registered(family))
4231	return -ENOENT;
4232
4233	return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
4234	NETLINK_SOCK_DIAG, family);
4235	}
4236
4237	#ifdef CONFIG_INET
4238	if (family == AF_INET &&
4239	protocol != IPPROTO_RAW &&
4240	protocol < MAX_INET_PROTOS &&
4241	!rcu_access_pointer(inet_protos[protocol]))
4242	return -ENOENT;
4243	#endif
4244
4245	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4246	NETLINK_SOCK_DIAG, family, protocol);
4247	}
4248	EXPORT_SYMBOL(sock_load_diag_module);
4249
4250	#ifdef CONFIG_PROC_FS
4251	static void proto_seq_start(struct* seq_file seq, loff_t pos)
4252	__acquires(proto_list_mutex)
4253	{
4254	mutex_lock(&proto_list_mutex);
4255	return seq_list_start_head(head: &proto_list, pos: *pos);
4256	}
4257
4258	static void proto_seq_next(struct* seq_file seq, void* v, loff_t pos)
4259	{
4260	return seq_list_next(v, head: &proto_list, ppos: pos);
4261	}
4262
4263	static void proto_seq_stop(struct seq_file seq, void* *v)
4264	__releases(proto_list_mutex)
4265	{
4266	mutex_unlock(lock: &proto_list_mutex);
4267	}
4268
4269	static char proto_method_implemented(const void *method)
4270	{
4271	return method == NULL ? `'n'` : `'y'`;
4272	}
4273	static long sock_prot_memory_allocated(struct proto *proto)
4274	{
4275	return proto->memory_allocated != NULL ? proto_memory_allocated(prot: proto) : -`1L`;
4276	}
4277
4278	static const char sock_prot_memory_pressure(struct* proto *proto)
4279	{
4280	return proto->memory_pressure != NULL ?
4281	proto_memory_pressure(prot: proto) ? "yes" : "no" : "NI";
4282	}
4283
4284	static void proto_seq_printf(struct seq_file seq, struct* proto *proto)
4285	{
4286
4287	seq_printf(m: seq, fmt: "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
4288	"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
4289	proto->name,
4290	proto->obj_size,
4291	sock_prot_inuse_get(seq_file_net(seq), proto),
4292	sock_prot_memory_allocated(proto),
4293	sock_prot_memory_pressure(proto),
4294	proto->max_header,
4295	proto->slab == NULL ? "no" : "yes",
4296	module_name(proto->owner),
4297	proto_method_implemented(method: proto->close),
4298	proto_method_implemented(method: proto->connect),
4299	proto_method_implemented(method: proto->disconnect),
4300	proto_method_implemented(method: proto->accept),
4301	proto_method_implemented(method: proto->ioctl),
4302	proto_method_implemented(method: proto->init),
4303	proto_method_implemented(method: proto->destroy),
4304	proto_method_implemented(method: proto->shutdown),
4305	proto_method_implemented(method: proto->setsockopt),
4306	proto_method_implemented(method: proto->getsockopt),
4307	proto_method_implemented(method: proto->sendmsg),
4308	proto_method_implemented(method: proto->recvmsg),
4309	proto_method_implemented(method: proto->bind),
4310	proto_method_implemented(method: proto->backlog_rcv),
4311	proto_method_implemented(method: proto->hash),
4312	proto_method_implemented(method: proto->unhash),
4313	proto_method_implemented(method: proto->get_port),
4314	proto_method_implemented(method: proto->enter_memory_pressure));
4315	}
4316
4317	static int proto_seq_show(struct seq_file seq, void* *v)
4318	{
4319	if (v == &proto_list)
4320	seq_printf(m: seq, fmt: "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
4321	"protocol",
4322	"size",
4323	"sockets",
4324	"memory",
4325	"press",
4326	"maxhdr",
4327	"slab",
4328	"module",
4329	"cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
4330	else
4331	proto_seq_printf(seq, list_entry(v, struct proto, node));
4332	return `0`;
4333	}
4334
4335	static const struct seq_operations proto_seq_ops = {
4336	.start = proto_seq_start,
4337	.next = proto_seq_next,
4338	.stop = proto_seq_stop,
4339	.show = proto_seq_show,
4340	};
4341
4342	static __net_init int proto_init_net(struct net *net)
4343	{
4344	if (!proc_create_net("protocols", `0444`, net->proc_net, &proto_seq_ops,
4345	sizeof(struct seq_net_private)))
4346	return -ENOMEM;
4347
4348	return `0`;
4349	}
4350
4351	static __net_exit void proto_exit_net(struct net *net)
4352	{
4353	remove_proc_entry("protocols", net->proc_net);
4354	}
4355
4356
4357	static __net_initdata struct pernet_operations proto_net_ops = {
4358	.init = proto_init_net,
4359	.exit = proto_exit_net,
4360	};
4361
4362	static int __init proto_init(void)
4363	{
4364	return register_pernet_subsys(&proto_net_ops);
4365	}
4366
4367	subsys_initcall(proto_init);
4368
4369	#endif /* PROC_FS */
4370
4371	#ifdef CONFIG_NET_RX_BUSY_POLL
4372	bool sk_busy_loop_end(void p, unsigned* long start_time)
4373	{
4374	struct sock *sk = p;
4375
4376	if (!skb_queue_empty_lockless(list: &sk->sk_receive_queue))
4377	return true;
4378
4379	if (sk_is_udp(sk) &&
4380	!skb_queue_empty_lockless(list: &udp_sk(sk)->reader_queue))
4381	return true;
4382
4383	return sk_busy_loop_timeout(sk, start_time);
4384	}
4385	EXPORT_SYMBOL(sk_busy_loop_end);
4386	#endif /* CONFIG_NET_RX_BUSY_POLL */
4387
4388	int sock_bind_add(struct sock sk, struct* sockaddr addr, int* addr_len)
4389	{
4390	if (!sk->sk_prot->bind_add)
4391	return -EOPNOTSUPP;
4392	return sk->sk_prot->bind_add(sk, addr, addr_len);
4393	}
4394	EXPORT_SYMBOL(sock_bind_add);
4395
4396	/ Copy 'size' bytes from userspace and return `size` back to userspace /
4397	int sock_ioctl_inout(struct sock sk, unsigned* int cmd,
4398	void __user arg, void* *karg, size_t size)
4399	{
4400	int ret;
4401
4402	if (copy_from_user(to: karg, from: arg, n: size))
4403	return -EFAULT;
4404
4405	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4406	if (ret)
4407	return ret;
4408
4409	if (copy_to_user(to: arg, from: karg, n: size))
4410	return -EFAULT;
4411
4412	return `0`;
4413	}
4414	EXPORT_SYMBOL(sock_ioctl_inout);
4415
4416	/ This is the most common ioctl prep function, where the result (4 bytes) is*
4417	* copied back to userspace if the ioctl() returns successfully. No input is
4418	* copied from userspace as input argument.
4419	*/
4420	static int sock_ioctl_out(struct sock sk, unsigned* int cmd, void __user *arg)
4421	{
4422	int ret, karg = `0`;
4423
4424	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4425	if (ret)
4426	return ret;
4427
4428	return put_user(karg, (int __user *)arg);
4429	}
4430
4431	/ A wrapper around sock ioctls, which copies the data from userspace*
4432	* (depending on the protocol/ioctl), and copies back the result to userspace.
4433	* The main motivation for this function is to pass kernel memory to the
4434	* protocol ioctl callbacks, instead of userspace memory.
4435	*/
4436	int sk_ioctl(struct sock sk, unsigned* int cmd, void __user *arg)
4437	{
4438	int rc = `1`;
4439
4440	if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4441	rc = ipmr_sk_ioctl(sk, cmd, arg);
4442	else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4443	rc = ip6mr_sk_ioctl(sk, cmd, arg);
4444	else if (sk_is_phonet(sk))
4445	rc = phonet_sk_ioctl(sk, cmd, arg);
4446
4447	/ If ioctl was processed, returns its value /
4448	if (rc <= `0`)
4449	return rc;
4450
4451	/ Otherwise call the default handler /
4452	return sock_ioctl_out(sk, cmd, arg);
4453	}
4454	EXPORT_SYMBOL(sk_ioctl);
4455
4456	static int __init sock_struct_check(void)
4457	{
4458	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
4459	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
4460	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
4461	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
4462	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);
4463
4464	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
4465	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
4466	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
4467	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
4468	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
4469	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
4470	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
4471	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
4472	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);
4473
4474	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
4475	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
4476	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
4477
4478	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
4479	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
4480	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
4481	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);
4482
4483	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4484	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4485	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
4486	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
4487	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
4488	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
4489	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
4490	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
4491	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
4492	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
4493	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
4494	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
4495	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
4496	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
4497	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
4498	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
4499
4500	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
4501	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
4502	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
4503	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
4504	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
4505	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
4506	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
4507	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
4508	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
4509	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
4510	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
4511	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
4512	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
4513	return `0`;
4514	}
4515
4516	core_initcall(sock_struct_check);
4517

Provided by KDAB

Definitions

proto_list_mutex
proto_list
sk_ns_capable
sk_capable
sk_net_capable
af_family_keys
af_family_kern_keys
af_family_slock_keys
af_family_kern_slock_keys
af_family_key_strings
af_family_slock_key_strings
af_family_clock_key_strings
af_family_kern_key_strings
af_family_kern_slock_key_strings
af_family_kern_clock_key_strings
af_family_rlock_key_strings
af_family_wlock_key_strings
af_family_elock_key_strings
af_callback_keys
af_rlock_keys
af_wlock_keys
af_elock_keys
af_kern_callback_keys
sysctl_wmem_max
sysctl_rmem_max
sysctl_wmem_default
sysctl_rmem_default
memalloc_socks_key
sk_set_memalloc
sk_clear_memalloc
__sk_backlog_rcv
sk_error_report
sock_get_timeout
sock_copy_user_timeval
sock_set_timeout
sk_set_prio_allowed
sock_needs_netstamp
sock_disable_timestamp
__sock_queue_rcv_skb
sock_queue_rcv_skb_reason
__sk_receive_skb
__sk_dst_check
sk_dst_check
sock_bindtoindex_locked
sock_bindtoindex
sock_setbindtodevice
sock_getbindtodevice
sk_mc_loop
sock_set_reuseaddr
sock_set_reuseport
sock_no_linger
sock_set_priority
sock_set_sndtimeo
__sock_set_timestamps
sock_enable_timestamps
sock_set_timestamp
sock_timestamping_bind_phc
sock_set_timestamping
bpf_skops_tx_timestamping
sock_set_keepalive
__sock_set_rcvbuf
sock_set_rcvbuf
__sock_set_mark
sock_set_mark
sock_release_reserved_memory
sock_reserve_memory
sock_devmem_dontneed
sockopt_lock_sock
sockopt_release_sock
sockopt_ns_capable
sockopt_capable
sockopt_validate_clockid
sk_setsockopt
sock_setsockopt
sk_get_peer_cred
cred_to_ucred
groups_to_user
sk_getsockopt
sock_lock_init
sock_copy
sk_prot_alloc
sk_prot_free
sk_alloc
__sk_destruct
sk_net_refcnt_upgrade
sk_destruct
__sk_free
sk_free
sk_init_common
sk_clone_lock
sk_dst_gso_max_size
sk_setup_caps
sock_wfree
__sock_wfree
skb_set_owner_w
can_skb_orphan_partial
skb_orphan_partial
sock_rfree
sock_efree
sock_pfree
sock_i_uid
__sock_i_ino
sock_i_ino
sock_wmalloc
sock_ofree
sock_omalloc
sock_kmalloc
sock_kmemdup
__sock_kfree_s
sock_kfree_s
sock_kzfree_s
sock_wait_for_wmem
sock_alloc_send_pskb
__sock_cmsg_send
sock_cmsg_send
sk_enter_memory_pressure
sk_leave_memory_pressure
net_high_order_alloc_disable_key
skb_page_frag_refill
sk_page_frag_refill
__lock_sock
__release_sock
__sk_flush_backlog
sk_wait_data
__sk_mem_raise_allocated
__sk_mem_schedule
__sk_mem_reduce_allocated
__sk_mem_reclaim
sk_set_peek_off
sock_no_bind
sock_no_connect
sock_no_socketpair
sock_no_accept
sock_no_getname
sock_no_ioctl
sock_no_listen
sock_no_shutdown
sock_no_sendmsg
sock_no_sendmsg_locked
sock_no_recvmsg
sock_no_mmap
__receive_sock
sock_def_wakeup
sock_def_error_report
sock_def_readable
sock_def_write_space
sock_def_write_space_wfree
sock_def_destruct
sk_send_sigurg
sk_reset_timer
sk_stop_timer
sk_stop_timer_sync
sock_init_data_uid
sock_init_data
lock_sock_nested
release_sock
__lock_sock_fast
sock_gettstamp
sock_enable_timestamp
sock_recv_errqueue
sock_common_getsockopt
sock_common_recvmsg
sock_common_setsockopt
sk_common_release
sk_get_meminfo
proto_inuse_idx
sock_prot_inuse_get
sock_inuse_get
sock_inuse_init_net
sock_inuse_exit_net
net_inuse_ops
net_inuse_init
assign_proto_idx
release_proto_idx
tw_prot_cleanup
tw_prot_init
req_prot_cleanup
req_prot_init
proto_register
proto_unregister
sock_load_diag_module
proto_seq_start
proto_seq_next
proto_seq_stop
proto_method_implemented
sock_prot_memory_allocated
sock_prot_memory_pressure
proto_seq_printf
proto_seq_show
proto_seq_ops
proto_init_net
proto_exit_net
proto_net_ops
proto_init
sk_busy_loop_end
sock_bind_add
sock_ioctl_inout
sock_ioctl_out
sk_ioctl

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/net/core/sock.c