socket.c source code [linux/net/socket.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* NET An implementation of the SOCKET network access protocol.
4	*
5	* Version: @(#)socket.c 1.1.93 18/02/95
6	*
7	* Authors: Orest Zborowski, <obz@Kodak.COM>
8	* Ross Biro
9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10	*
11	* Fixes:
12	* Anonymous : NOTSOCK/BADF cleanup. Error fix in
13	* shutdown()
14	* Alan Cox : verify_area() fixes
15	* Alan Cox : Removed DDI
16	* Jonathan Kamens : SOCK_DGRAM reconnect bug
17	* Alan Cox : Moved a load of checks to the very
18	* top level.
19	* Alan Cox : Move address structures to/from user
20	* mode above the protocol layers.
21	* Rob Janssen : Allow 0 length sends.
22	* Alan Cox : Asynchronous I/O support (cribbed from the
23	* tty drivers).
24	* Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25	* Jeff Uphoff : Made max number of sockets command-line
26	* configurable.
27	* Matti Aarnio : Made the number of sockets dynamic,
28	* to be allocated when needed, and mr.
29	* Uphoff's max is used as max to be
30	* allowed to allocate.
31	* Linus : Argh. removed all the socket allocation
32	* altogether: it's in the inode now.
33	* Alan Cox : Made sock_alloc()/sock_release() public
34	* for NetROM and future kernel nfsd type
35	* stuff.
36	* Alan Cox : sendmsg/recvmsg basics.
37	* Tom Dyas : Export net symbols.
38	* Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39	* Alan Cox : Added thread locking to sys_* calls
40	* for sockets. May have errors at the
41	* moment.
42	* Kevin Buhr : Fixed the dumb errors in the above.
43	* Andi Kleen : Some small cleanups, optimizations,
44	* and fixed a copy_from_user() bug.
45	* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
46	* Tigran Aivazian : Made listen(2) backlog sanity checks
47	* protocol-independent
48	*
49	* This module is effectively the top level interface to the BSD socket
50	* paradigm.
51	*
52	* Based upon Swansea University Computer Society NET3.039
53	*/
54
55	#include <linux/bpf-cgroup.h>
56	#include <linux/ethtool.h>
57	#include <linux/mm.h>
58	#include <linux/socket.h>
59	#include <linux/file.h>
60	#include <linux/splice.h>
61	#include <linux/net.h>
62	#include <linux/interrupt.h>
63	#include <linux/thread_info.h>
64	#include <linux/rcupdate.h>
65	#include <linux/netdevice.h>
66	#include <linux/proc_fs.h>
67	#include <linux/seq_file.h>
68	#include <linux/mutex.h>
69	#include <linux/if_bridge.h>
70	#include <linux/if_vlan.h>
71	#include <linux/ptp_classify.h>
72	#include <linux/init.h>
73	#include <linux/poll.h>
74	#include <linux/cache.h>
75	#include <linux/module.h>
76	#include <linux/highmem.h>
77	#include <linux/mount.h>
78	#include <linux/pseudo_fs.h>
79	#include <linux/security.h>
80	#include <linux/syscalls.h>
81	#include <linux/compat.h>
82	#include <linux/kmod.h>
83	#include <linux/audit.h>
84	#include <linux/wireless.h>
85	#include <linux/nsproxy.h>
86	#include <linux/magic.h>
87	#include <linux/slab.h>
88	#include <linux/xattr.h>
89	#include <linux/nospec.h>
90	#include <linux/indirect_call_wrapper.h>
91	#include <linux/io_uring/net.h>
92
93	#include <linux/uaccess.h>
94	#include <asm/unistd.h>
95
96	#include <net/compat.h>
97	#include <net/wext.h>
98	#include <net/cls_cgroup.h>
99
100	#include <net/sock.h>
101	#include <linux/netfilter.h>
102
103	#include <linux/if_tun.h>
104	#include <linux/ipv6_route.h>
105	#include <linux/route.h>
106	#include <linux/termios.h>
107	#include <linux/sockios.h>
108	#include <net/busy_poll.h>
109	#include <linux/errqueue.h>
110	#include <linux/ptp_clock_kernel.h>
111	#include <trace/events/sock.h>
112
113	#include "core/dev.h"
114
115	#ifdef CONFIG_NET_RX_BUSY_POLL
116	unsigned int sysctl_net_busy_read __read_mostly;
117	unsigned int sysctl_net_busy_poll __read_mostly;
118	#endif
119
120	static ssize_t sock_read_iter(struct kiocb iocb, struct* iov_iter *to);
121	static ssize_t sock_write_iter(struct kiocb iocb, struct* iov_iter *from);
122	static int sock_mmap(struct file file, struct* vm_area_struct *vma);
123
124	static int sock_close(struct inode inode, struct* file *file);
125	static __poll_t sock_poll(struct file *file,
126	struct poll_table_struct *wait);
127	static long sock_ioctl(struct file file, unsigned* int cmd, unsigned long arg);
128	#ifdef CONFIG_COMPAT
129	static long compat_sock_ioctl(struct file *file,
130	unsigned int cmd, unsigned long arg);
131	#endif
132	static int sock_fasync(int fd, struct file filp, int* on);
133	static ssize_t sock_splice_read(struct file file, loff_t ppos,
134	struct pipe_inode_info *pipe, size_t len,
135	unsigned int flags);
136	static void sock_splice_eof(struct file *file);
137
138	#ifdef CONFIG_PROC_FS
139	static void sock_show_fdinfo(struct seq_file m, struct* file *f)
140	{
141	struct socket *sock = f->private_data;
142	const struct proto_ops *ops = READ_ONCE(sock->ops);
143
144	if (ops->show_fdinfo)
145	ops->show_fdinfo(m, sock);
146	}
147	#else
148	#define sock_show_fdinfo NULL
149	#endif
150
151	/*
152	* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
153	* in the operation structures but are done directly via the socketcall() multiplexor.
154	*/
155
156	static const struct file_operations socket_file_ops = {
157	.owner = THIS_MODULE,
158	.read_iter = sock_read_iter,
159	.write_iter = sock_write_iter,
160	.poll = sock_poll,
161	.unlocked_ioctl = sock_ioctl,
162	#ifdef CONFIG_COMPAT
163	.compat_ioctl = compat_sock_ioctl,
164	#endif
165	.uring_cmd = io_uring_cmd_sock,
166	.mmap = sock_mmap,
167	.release = sock_close,
168	.fasync = sock_fasync,
169	.splice_write = splice_to_socket,
170	.splice_read = sock_splice_read,
171	.splice_eof = sock_splice_eof,
172	.show_fdinfo = sock_show_fdinfo,
173	};
174
175	static const char * const pf_family_names[] = {
176	[PF_UNSPEC] = "PF_UNSPEC",
177	[PF_UNIX] = "PF_UNIX/PF_LOCAL",
178	[PF_INET] = "PF_INET",
179	[PF_AX25] = "PF_AX25",
180	[PF_IPX] = "PF_IPX",
181	[PF_APPLETALK] = "PF_APPLETALK",
182	[PF_NETROM] = "PF_NETROM",
183	[PF_BRIDGE] = "PF_BRIDGE",
184	[PF_ATMPVC] = "PF_ATMPVC",
185	[PF_X25] = "PF_X25",
186	[PF_INET6] = "PF_INET6",
187	[PF_ROSE] = "PF_ROSE",
188	[PF_DECnet] = "PF_DECnet",
189	[PF_NETBEUI] = "PF_NETBEUI",
190	[PF_SECURITY] = "PF_SECURITY",
191	[PF_KEY] = "PF_KEY",
192	[PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
193	[PF_PACKET] = "PF_PACKET",
194	[PF_ASH] = "PF_ASH",
195	[PF_ECONET] = "PF_ECONET",
196	[PF_ATMSVC] = "PF_ATMSVC",
197	[PF_RDS] = "PF_RDS",
198	[PF_SNA] = "PF_SNA",
199	[PF_IRDA] = "PF_IRDA",
200	[PF_PPPOX] = "PF_PPPOX",
201	[PF_WANPIPE] = "PF_WANPIPE",
202	[PF_LLC] = "PF_LLC",
203	[PF_IB] = "PF_IB",
204	[PF_MPLS] = "PF_MPLS",
205	[PF_CAN] = "PF_CAN",
206	[PF_TIPC] = "PF_TIPC",
207	[PF_BLUETOOTH] = "PF_BLUETOOTH",
208	[PF_IUCV] = "PF_IUCV",
209	[PF_RXRPC] = "PF_RXRPC",
210	[PF_ISDN] = "PF_ISDN",
211	[PF_PHONET] = "PF_PHONET",
212	[PF_IEEE802154] = "PF_IEEE802154",
213	[PF_CAIF] = "PF_CAIF",
214	[PF_ALG] = "PF_ALG",
215	[PF_NFC] = "PF_NFC",
216	[PF_VSOCK] = "PF_VSOCK",
217	[PF_KCM] = "PF_KCM",
218	[PF_QIPCRTR] = "PF_QIPCRTR",
219	[PF_SMC] = "PF_SMC",
220	[PF_XDP] = "PF_XDP",
221	[PF_MCTP] = "PF_MCTP",
222	};
223
224	/*
225	* The protocol list. Each protocol is registered in here.
226	*/
227
228	static DEFINE_SPINLOCK(net_family_lock);
229	static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
230
231	/*
232	* Support routines.
233	* Move socket addresses back and forth across the kernel/user
234	* divide and look after the messy bits.
235	*/
236
237	/**
238	* move_addr_to_kernel - copy a socket address into kernel space
239	* @uaddr: Address in user space
240	* @kaddr: Address in kernel space
241	* @ulen: Length in user space
242	*
243	* The address is copied into kernel space. If the provided address is
244	* too long an error code of -EINVAL is returned. If the copy gives
245	* invalid addresses -EFAULT is returned. On a success 0 is returned.
246	*/
247
248	int move_addr_to_kernel(void __user uaddr, int* ulen, struct sockaddr_storage *kaddr)
249	{
250	if (ulen < `0` \|\| ulen > sizeof(struct sockaddr_storage))
251	return -EINVAL;
252	if (ulen == `0`)
253	return `0`;
254	if (copy_from_user(to: kaddr, from: uaddr, n: ulen))
255	return -EFAULT;
256	return audit_sockaddr(len: ulen, addr: kaddr);
257	}
258
259	/**
260	* move_addr_to_user - copy an address to user space
261	* @kaddr: kernel space address
262	* @klen: length of address in kernel
263	* @uaddr: user space address
264	* @ulen: pointer to user length field
265	*
266	* The value pointed to by ulen on entry is the buffer length available.
267	* This is overwritten with the buffer space used. -EINVAL is returned
268	* if an overlong buffer is specified or a negative buffer size. -EFAULT
269	* is returned if either the buffer or the length field are not
270	* accessible.
271	* After copying the data up to the limit the user specifies, the true
272	* length of the data is written over the length limit the user
273	* specified. Zero is returned for a success.
274	*/
275
276	static int move_addr_to_user(struct sockaddr_storage kaddr, int* klen,
277	void __user uaddr, int* __user *ulen)
278	{
279	int err;
280	int len;
281
282	BUG_ON(klen > sizeof(struct sockaddr_storage));
283	err = get_user(len, ulen);
284	if (err)
285	return err;
286	if (len > klen)
287	len = klen;
288	if (len < `0`)
289	return -EINVAL;
290	if (len) {
291	if (audit_sockaddr(len: klen, addr: kaddr))
292	return -ENOMEM;
293	if (copy_to_user(to: uaddr, from: kaddr, n: len))
294	return -EFAULT;
295	}
296	/*
297	* "fromlen shall refer to the value before truncation.."
298	* 1003.1g
299	*/
300	return __put_user(klen, ulen);
301	}
302
303	static struct kmem_cache *sock_inode_cachep __ro_after_init;
304
305	static struct inode sock_alloc_inode(struct* super_block *sb)
306	{
307	struct socket_alloc *ei;
308
309	ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
310	if (!ei)
311	return NULL;
312	init_waitqueue_head(&ei->socket.wq.wait);
313	ei->socket.wq.fasync_list = NULL;
314	ei->socket.wq.flags = `0`;
315
316	ei->socket.state = SS_UNCONNECTED;
317	ei->socket.flags = `0`;
318	ei->socket.ops = NULL;
319	ei->socket.sk = NULL;
320	ei->socket.file = NULL;
321
322	return &ei->vfs_inode;
323	}
324
325	static void sock_free_inode(struct inode *inode)
326	{
327	struct socket_alloc *ei;
328
329	ei = container_of(inode, struct socket_alloc, vfs_inode);
330	kmem_cache_free(s: sock_inode_cachep, objp: ei);
331	}
332
333	static void init_once(void *foo)
334	{
335	struct socket_alloc ei = (struct* socket_alloc *)foo;
336
337	inode_init_once(&ei->vfs_inode);
338	}
339
340	static void init_inodecache(void)
341	{
342	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
343	sizeof(struct socket_alloc),
344	`0`,
345	(SLAB_HWCACHE_ALIGN \|
346	SLAB_RECLAIM_ACCOUNT \|
347	SLAB_ACCOUNT),
348	init_once);
349	BUG_ON(sock_inode_cachep == NULL);
350	}
351
352	static const struct super_operations sockfs_ops = {
353	.alloc_inode = sock_alloc_inode,
354	.free_inode = sock_free_inode,
355	.statfs = simple_statfs,
356	};
357
358	/*
359	* sockfs_dname() is called from d_path().
360	*/
361	static char sockfs_dname(struct* dentry dentry, char* buffer, int* buflen)
362	{
363	return dynamic_dname(buffer, buflen, "socket:[%lu]",
364	d_inode(dentry)->i_ino);
365	}
366
367	static const struct dentry_operations sockfs_dentry_operations = {
368	.d_dname = sockfs_dname,
369	};
370
371	static int sockfs_xattr_get(const struct xattr_handler *handler,
372	struct dentry dentry, struct* inode *inode,
373	const char suffix, void* *value, size_t size)
374	{
375	if (value) {
376	if (dentry->d_name.len + `1` > size)
377	return -ERANGE;
378	memcpy(value, dentry->d_name.name, dentry->d_name.len + `1`);
379	}
380	return dentry->d_name.len + `1`;
381	}
382
383	#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
384	#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
385	#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
386
387	static const struct xattr_handler sockfs_xattr_handler = {
388	.name = XATTR_NAME_SOCKPROTONAME,
389	.get = sockfs_xattr_get,
390	};
391
392	static int sockfs_security_xattr_set(const struct xattr_handler *handler,
393	struct mnt_idmap *idmap,
394	struct dentry dentry, struct* inode *inode,
395	const char suffix, const* void *value,
396	size_t size, int flags)
397	{
398	/ Handled by LSM. /
399	return -EAGAIN;
400	}
401
402	static const struct xattr_handler sockfs_security_xattr_handler = {
403	.prefix = XATTR_SECURITY_PREFIX,
404	.set = sockfs_security_xattr_set,
405	};
406
407	static const struct xattr_handler * const sockfs_xattr_handlers[] = {
408	&sockfs_xattr_handler,
409	&sockfs_security_xattr_handler,
410	NULL
411	};
412
413	static int sockfs_init_fs_context(struct fs_context *fc)
414	{
415	struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
416	if (!ctx)
417	return -ENOMEM;
418	ctx->ops = &sockfs_ops;
419	ctx->dops = &sockfs_dentry_operations;
420	ctx->xattr = sockfs_xattr_handlers;
421	return `0`;
422	}
423
424	static struct vfsmount *sock_mnt __read_mostly;
425
426	static struct file_system_type sock_fs_type = {
427	.name = "sockfs",
428	.init_fs_context = sockfs_init_fs_context,
429	.kill_sb = kill_anon_super,
430	};
431
432	/*
433	* Obtains the first available file descriptor and sets it up for use.
434	*
435	* These functions create file structures and maps them to fd space
436	* of the current process. On success it returns file descriptor
437	* and file struct implicitly stored in sock->file.
438	* Note that another thread may close file descriptor before we return
439	* from this function. We use the fact that now we do not refer
440	* to socket after mapping. If one day we will need it, this
441	* function will increment ref. count on file by 1.
442	*
443	* In any case returned fd MAY BE not valid!
444	* This race condition is unavoidable
445	* with shared fd spaces, we cannot solve it inside kernel,
446	* but we take care of internal coherence yet.
447	*/
448
449	/**
450	* sock_alloc_file - Bind a &socket to a &file
451	* @sock: socket
452	* @flags: file status flags
453	* @dname: protocol name
454	*
455	* Returns the &file bound with @sock, implicitly storing it
456	* in sock->file. If dname is %NULL, sets to "".
457	*
458	* On failure @sock is released, and an ERR pointer is returned.
459	*
460	* This function uses GFP_KERNEL internally.
461	*/
462
463	struct file sock_alloc_file(struct* socket sock, int* flags, const char *dname)
464	{
465	struct file *file;
466
467	if (!dname)
468	dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
469
470	file = alloc_file_pseudo(SOCK_INODE(socket: sock), sock_mnt, dname,
471	O_RDWR \| (flags & O_NONBLOCK),
472	&socket_file_ops);
473	if (IS_ERR(ptr: file)) {
474	sock_release(sock);
475	return file;
476	}
477
478	file->f_mode \|= FMODE_NOWAIT;
479	sock->file = file;
480	file->private_data = sock;
481	stream_open(inode: SOCK_INODE(socket: sock), filp: file);
482	/*
483	* Disable permission and pre-content events, but enable legacy
484	* inotify events for legacy users.
485	*/
486	file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
487	return file;
488	}
489	EXPORT_SYMBOL(sock_alloc_file);
490
491	static int sock_map_fd(struct socket sock, int* flags)
492	{
493	struct file *newfile;
494	int fd = get_unused_fd_flags(flags);
495	if (unlikely(fd < `0`)) {
496	sock_release(sock);
497	return fd;
498	}
499
500	newfile = sock_alloc_file(sock, flags, NULL);
501	if (!IS_ERR(ptr: newfile)) {
502	fd_install(fd, file: newfile);
503	return fd;
504	}
505
506	put_unused_fd(fd);
507	return PTR_ERR(ptr: newfile);
508	}
509
510	/**
511	* sock_from_file - Return the &socket bounded to @file.
512	* @file: file
513	*
514	* On failure returns %NULL.
515	*/
516
517	struct socket sock_from_file(struct* file *file)
518	{
519	if (likely(file->f_op == &socket_file_ops))
520	return file->private_data; / set in sock_alloc_file /
521
522	return NULL;
523	}
524	EXPORT_SYMBOL(sock_from_file);
525
526	/**
527	* sockfd_lookup - Go from a file number to its socket slot
528	* @fd: file handle
529	* @err: pointer to an error code return
530	*
531	* The file handle passed in is locked and the socket it is bound
532	* to is returned. If an error occurs the err pointer is overwritten
533	* with a negative errno code and NULL is returned. The function checks
534	* for both invalid handles and passing a handle which is not a socket.
535	*
536	* On a success the socket object pointer is returned.
537	*/
538
539	struct socket sockfd_lookup(int* fd, int *err)
540	{
541	struct file *file;
542	struct socket *sock;
543
544	file = fget(fd);
545	if (!file) {
546	*err = -EBADF;
547	return NULL;
548	}
549
550	sock = sock_from_file(file);
551	if (!sock) {
552	*err = -ENOTSOCK;
553	fput(file);
554	}
555	return sock;
556	}
557	EXPORT_SYMBOL(sockfd_lookup);
558
559	static ssize_t sockfs_listxattr(struct dentry dentry, char* *buffer,
560	size_t size)
561	{
562	ssize_t len;
563	ssize_t used = `0`;
564
565	len = security_inode_listsecurity(inode: d_inode(dentry), buffer, buffer_size: size);
566	if (len < `0`)
567	return len;
568	used += len;
569	if (buffer) {
570	if (size < used)
571	return -ERANGE;
572	buffer += len;
573	}
574
575	len = (XATTR_NAME_SOCKPROTONAME_LEN + `1`);
576	used += len;
577	if (buffer) {
578	if (size < used)
579	return -ERANGE;
580	memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
581	buffer += len;
582	}
583
584	return used;
585	}
586
587	static int sockfs_setattr(struct mnt_idmap *idmap,
588	struct dentry dentry, struct* iattr *iattr)
589	{
590	int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
591
592	if (!err && (iattr->ia_valid & ATTR_UID)) {
593	struct socket *sock = SOCKET_I(inode: d_inode(dentry));
594
595	if (sock->sk)
596	sock->sk->sk_uid = iattr->ia_uid;
597	else
598	err = -ENOENT;
599	}
600
601	return err;
602	}
603
604	static const struct inode_operations sockfs_inode_ops = {
605	.listxattr = sockfs_listxattr,
606	.setattr = sockfs_setattr,
607	};
608
609	/**
610	* sock_alloc - allocate a socket
611	*
612	* Allocate a new inode and socket object. The two are bound together
613	* and initialised. The socket is then returned. If we are out of inodes
614	* NULL is returned. This functions uses GFP_KERNEL internally.
615	*/
616
617	struct socket sock_alloc(void*)
618	{
619	struct inode *inode;
620	struct socket *sock;
621
622	inode = new_inode_pseudo(sb: sock_mnt->mnt_sb);
623	if (!inode)
624	return NULL;
625
626	sock = SOCKET_I(inode);
627
628	inode->i_ino = get_next_ino();
629	inode->i_mode = S_IFSOCK \| S_IRWXUGO;
630	inode->i_uid = current_fsuid();
631	inode->i_gid = current_fsgid();
632	inode->i_op = &sockfs_inode_ops;
633
634	return sock;
635	}
636	EXPORT_SYMBOL(sock_alloc);
637
638	static void __sock_release(struct socket sock, struct* inode *inode)
639	{
640	const struct proto_ops *ops = READ_ONCE(sock->ops);
641
642	if (ops) {
643	struct module *owner = ops->owner;
644
645	if (inode)
646	inode_lock(inode);
647	ops->release(sock);
648	sock->sk = NULL;
649	if (inode)
650	inode_unlock(inode);
651	sock->ops = NULL;
652	module_put(module: owner);
653	}
654
655	if (sock->wq.fasync_list)
656	pr_err("%s: fasync list not empty!\n", __func__);
657
658	if (!sock->file) {
659	iput(SOCK_INODE(socket: sock));
660	return;
661	}
662	sock->file = NULL;
663	}
664
665	/**
666	* sock_release - close a socket
667	* @sock: socket to close
668	*
669	* The socket is released from the protocol stack if it has a release
670	* callback, and the inode is then released if the socket is bound to
671	* an inode not a file.
672	*/
673	void sock_release(struct socket *sock)
674	{
675	__sock_release(sock, NULL);
676	}
677	EXPORT_SYMBOL(sock_release);
678
679	void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
680	{
681	u8 flags = *tx_flags;
682
683	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
684	flags \|= SKBTX_HW_TSTAMP_NOBPF;
685
686	if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
687	flags \|= SKBTX_SW_TSTAMP;
688
689	if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
690	flags \|= SKBTX_SCHED_TSTAMP;
691
692	if (tsflags & SOF_TIMESTAMPING_TX_COMPLETION)
693	flags \|= SKBTX_COMPLETION_TSTAMP;
694
695	*tx_flags = flags;
696	}
697	EXPORT_SYMBOL(__sock_tx_timestamp);
698
699	INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket , struct* msghdr *,
700	size_t));
701	INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket , struct* msghdr *,
702	size_t));
703
704	static noinline void call_trace_sock_send_length(struct sock sk, int* ret,
705	int flags)
706	{
707	trace_sock_send_length(sk, ret, flags: `0`);
708	}
709
710	static inline int sock_sendmsg_nosec(struct socket sock, struct* msghdr *msg)
711	{
712	int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
713	inet_sendmsg, sock, msg,
714	msg_data_left(msg));
715	BUG_ON(ret == -EIOCBQUEUED);
716
717	if (trace_sock_send_length_enabled())
718	call_trace_sock_send_length(sk: sock->sk, ret, flags: `0`);
719	return ret;
720	}
721
722	static int __sock_sendmsg(struct socket sock, struct* msghdr *msg)
723	{
724	int err = security_socket_sendmsg(sock, msg,
725	size: msg_data_left(msg));
726
727	return err ?: sock_sendmsg_nosec(sock, msg);
728	}
729
730	/**
731	* sock_sendmsg - send a message through @sock
732	* @sock: socket
733	* @msg: message to send
734	*
735	* Sends @msg through @sock, passing through LSM.
736	* Returns the number of bytes sent, or an error code.
737	*/
738	int sock_sendmsg(struct socket sock, struct* msghdr *msg)
739	{
740	struct sockaddr_storage save_addr = (struct* sockaddr_storage *)msg->msg_name;
741	struct sockaddr_storage address;
742	int save_len = msg->msg_namelen;
743	int ret;
744
745	if (msg->msg_name) {
746	memcpy(&address, msg->msg_name, msg->msg_namelen);
747	msg->msg_name = &address;
748	}
749
750	ret = __sock_sendmsg(sock, msg);
751	msg->msg_name = save_addr;
752	msg->msg_namelen = save_len;
753
754	return ret;
755	}
756	EXPORT_SYMBOL(sock_sendmsg);
757
758	/**
759	* kernel_sendmsg - send a message through @sock (kernel-space)
760	* @sock: socket
761	* @msg: message header
762	* @vec: kernel vec
763	* @num: vec array length
764	* @size: total message data size
765	*
766	* Builds the message data with @vec and sends it through @sock.
767	* Returns the number of bytes sent, or an error code.
768	*/
769
770	int kernel_sendmsg(struct socket sock, struct* msghdr *msg,
771	struct kvec *vec, size_t num, size_t size)
772	{
773	iov_iter_kvec(i: &msg->msg_iter, ITER_SOURCE, kvec: vec, nr_segs: num, count: size);
774	return sock_sendmsg(sock, msg);
775	}
776	EXPORT_SYMBOL(kernel_sendmsg);
777
778	static bool skb_is_err_queue(const struct sk_buff *skb)
779	{
780	/ pkt_type of skbs enqueued on the error queue are set to*
781	* PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
782	* in recvmsg, since skbs received on a local socket will never
783	* have a pkt_type of PACKET_OUTGOING.
784	*/
785	return skb->pkt_type == PACKET_OUTGOING;
786	}
787
788	/ On transmit, software and hardware timestamps are returned independently.*
789	* As the two skb clones share the hardware timestamp, which may be updated
790	* before the software timestamp is received, a hardware TX timestamp may be
791	* returned only if there is no software TX timestamp. Ignore false software
792	* timestamps, which may be made in the __sock_recv_timestamp() call when the
793	* option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
794	* hardware timestamp.
795	*/
796	static bool skb_is_swtx_tstamp(const struct sk_buff skb, int* false_tstamp)
797	{
798	return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
799	}
800
801	static ktime_t get_timestamp(struct sock sk, struct* sk_buff skb, int* *if_index)
802	{
803	bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
804	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
805	struct net_device *orig_dev;
806	ktime_t hwtstamp;
807
808	rcu_read_lock();
809	orig_dev = dev_get_by_napi_id(napi_id: skb_napi_id(skb));
810	if (orig_dev) {
811	*if_index = orig_dev->ifindex;
812	hwtstamp = netdev_get_tstamp(dev: orig_dev, hwtstamps: shhwtstamps, cycles);
813	} else {
814	hwtstamp = shhwtstamps->hwtstamp;
815	}
816	rcu_read_unlock();
817
818	return hwtstamp;
819	}
820
821	static void put_ts_pktinfo(struct msghdr msg, struct* sk_buff *skb,
822	int if_index)
823	{
824	struct scm_ts_pktinfo ts_pktinfo;
825	struct net_device *orig_dev;
826
827	if (!skb_mac_header_was_set(skb))
828	return;
829
830	memset(&ts_pktinfo, `0`, sizeof(ts_pktinfo));
831
832	if (!if_index) {
833	rcu_read_lock();
834	orig_dev = dev_get_by_napi_id(napi_id: skb_napi_id(skb));
835	if (orig_dev)
836	if_index = orig_dev->ifindex;
837	rcu_read_unlock();
838	}
839	ts_pktinfo.if_index = if_index;
840
841	ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
842	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
843	len: sizeof(ts_pktinfo), data: &ts_pktinfo);
844	}
845
846	/*
847	* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
848	*/
849	void __sock_recv_timestamp(struct msghdr msg, struct* sock *sk,
850	struct sk_buff *skb)
851	{
852	int need_software_tstamp = sock_flag(sk, flag: SOCK_RCVTSTAMP);
853	int new_tstamp = sock_flag(sk, flag: SOCK_TSTAMP_NEW);
854	struct scm_timestamping_internal tss;
855	int empty = `1`, false_tstamp = `0`;
856	struct skb_shared_hwtstamps *shhwtstamps =
857	skb_hwtstamps(skb);
858	int if_index;
859	ktime_t hwtstamp;
860	u32 tsflags;
861
862	/ Race occurred between timestamp enabling and packet*
863	receiving. Fill in the current time for now. /*
864	if (need_software_tstamp && skb->tstamp == `0`) {
865	__net_timestamp(skb);
866	false_tstamp = `1`;
867	}
868
869	if (need_software_tstamp) {
870	if (!sock_flag(sk, flag: SOCK_RCVTSTAMPNS)) {
871	if (new_tstamp) {
872	struct __kernel_sock_timeval tv;
873
874	skb_get_new_timestamp(skb, stamp: &tv);
875	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
876	len: sizeof(tv), data: &tv);
877	} else {
878	struct __kernel_old_timeval tv;
879
880	skb_get_timestamp(skb, stamp: &tv);
881	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
882	len: sizeof(tv), data: &tv);
883	}
884	} else {
885	if (new_tstamp) {
886	struct __kernel_timespec ts;
887
888	skb_get_new_timestampns(skb, stamp: &ts);
889	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
890	len: sizeof(ts), data: &ts);
891	} else {
892	struct __kernel_old_timespec ts;
893
894	skb_get_timestampns(skb, stamp: &ts);
895	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
896	len: sizeof(ts), data: &ts);
897	}
898	}
899	}
900
901	memset(&tss, `0`, sizeof(tss));
902	tsflags = READ_ONCE(sk->sk_tsflags);
903	if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
904	(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE \|\|
905	skb_is_err_queue(skb) \|\|
906	!(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
907	ktime_to_timespec64_cond(kt: skb->tstamp, ts: tss.ts + `0`))
908	empty = `0`;
909	if (shhwtstamps &&
910	(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
911	(tsflags & SOF_TIMESTAMPING_RX_HARDWARE \|\|
912	skb_is_err_queue(skb) \|\|
913	!(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
914	!skb_is_swtx_tstamp(skb, false_tstamp)) {
915	if_index = `0`;
916	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
917	hwtstamp = get_timestamp(sk, skb, if_index: &if_index);
918	else
919	hwtstamp = shhwtstamps->hwtstamp;
920
921	if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
922	hwtstamp = ptp_convert_timestamp(hwtstamp: &hwtstamp,
923	READ_ONCE(sk->sk_bind_phc));
924
925	if (ktime_to_timespec64_cond(kt: hwtstamp, ts: tss.ts + `2`)) {
926	empty = `0`;
927
928	if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
929	!skb_is_err_queue(skb))
930	put_ts_pktinfo(msg, skb, if_index);
931	}
932	}
933	if (!empty) {
934	if (sock_flag(sk, flag: SOCK_TSTAMP_NEW))
935	put_cmsg_scm_timestamping64(msg, tss: &tss);
936	else
937	put_cmsg_scm_timestamping(msg, tss: &tss);
938
939	if (skb_is_err_queue(skb) && skb->len &&
940	SKB_EXT_ERR(skb)->opt_stats)
941	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
942	len: skb->len, data: skb->data);
943	}
944	}
945	EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
946
947	#ifdef CONFIG_WIRELESS
948	void __sock_recv_wifi_status(struct msghdr msg, struct* sock *sk,
949	struct sk_buff *skb)
950	{
951	int ack;
952
953	if (!sock_flag(sk, flag: SOCK_WIFI_STATUS))
954	return;
955	if (!skb->wifi_acked_valid)
956	return;
957
958	ack = skb->wifi_acked;
959
960	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, len: sizeof(ack), data: &ack);
961	}
962	EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
963	#endif
964
965	static inline void sock_recv_drops(struct msghdr msg, struct* sock *sk,
966	struct sk_buff *skb)
967	{
968	if (sock_flag(sk, flag: SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
969	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
970	len: sizeof(__u32), data: &SOCK_SKB_CB(skb)->dropcount);
971	}
972
973	static void sock_recv_mark(struct msghdr msg, struct* sock *sk,
974	struct sk_buff *skb)
975	{
976	if (sock_flag(sk, flag: SOCK_RCVMARK) && skb) {
977	/ We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y /
978	__u32 mark = skb->mark;
979
980	put_cmsg(msg, SOL_SOCKET, SO_MARK, len: sizeof(__u32), data: &mark);
981	}
982	}
983
984	static void sock_recv_priority(struct msghdr msg, struct* sock *sk,
985	struct sk_buff *skb)
986	{
987	if (sock_flag(sk, flag: SOCK_RCVPRIORITY) && skb) {
988	__u32 priority = skb->priority;
989
990	put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, len: sizeof(__u32), data: &priority);
991	}
992	}
993
994	void __sock_recv_cmsgs(struct msghdr msg, struct* sock *sk,
995	struct sk_buff *skb)
996	{
997	sock_recv_timestamp(msg, sk, skb);
998	sock_recv_drops(msg, sk, skb);
999	sock_recv_mark(msg, sk, skb);
1000	sock_recv_priority(msg, sk, skb);
1001	}
1002	EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
1003
1004	INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket , struct* msghdr *,
1005	size_t, int));
1006	INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket , struct* msghdr *,
1007	size_t, int));
1008
1009	static noinline void call_trace_sock_recv_length(struct sock sk, int* ret, int flags)
1010	{
1011	trace_sock_recv_length(sk, ret, flags);
1012	}
1013
1014	static inline int sock_recvmsg_nosec(struct socket sock, struct* msghdr *msg,
1015	int flags)
1016	{
1017	int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1018	inet6_recvmsg,
1019	inet_recvmsg, sock, msg,
1020	msg_data_left(msg), flags);
1021	if (trace_sock_recv_length_enabled())
1022	call_trace_sock_recv_length(sk: sock->sk, ret, flags);
1023	return ret;
1024	}
1025
1026	/**
1027	* sock_recvmsg - receive a message from @sock
1028	* @sock: socket
1029	* @msg: message to receive
1030	* @flags: message flags
1031	*
1032	* Receives @msg from @sock, passing through LSM. Returns the total number
1033	* of bytes received, or an error.
1034	*/
1035	int sock_recvmsg(struct socket sock, struct* msghdr msg, int* flags)
1036	{
1037	int err = security_socket_recvmsg(sock, msg, size: msg_data_left(msg), flags);
1038
1039	return err ?: sock_recvmsg_nosec(sock, msg, flags);
1040	}
1041	EXPORT_SYMBOL(sock_recvmsg);
1042
1043	/**
1044	* kernel_recvmsg - Receive a message from a socket (kernel space)
1045	* @sock: The socket to receive the message from
1046	* @msg: Received message
1047	* @vec: Input s/g array for message data
1048	* @num: Size of input s/g array
1049	* @size: Number of bytes to read
1050	* @flags: Message flags (MSG_DONTWAIT, etc...)
1051	*
1052	* On return the msg structure contains the scatter/gather array passed in the
1053	* vec argument. The array is modified so that it consists of the unfilled
1054	* portion of the original array.
1055	*
1056	* The returned value is the total number of bytes received, or an error.
1057	*/
1058
1059	int kernel_recvmsg(struct socket sock, struct* msghdr *msg,
1060	struct kvec vec, size_t num, size_t size, int* flags)
1061	{
1062	msg->msg_control_is_user = false;
1063	iov_iter_kvec(i: &msg->msg_iter, ITER_DEST, kvec: vec, nr_segs: num, count: size);
1064	return sock_recvmsg(sock, msg, flags);
1065	}
1066	EXPORT_SYMBOL(kernel_recvmsg);
1067
1068	static ssize_t sock_splice_read(struct file file, loff_t ppos,
1069	struct pipe_inode_info *pipe, size_t len,
1070	unsigned int flags)
1071	{
1072	struct socket *sock = file->private_data;
1073	const struct proto_ops *ops;
1074
1075	ops = READ_ONCE(sock->ops);
1076	if (unlikely(!ops->splice_read))
1077	return copy_splice_read(in: file, ppos, pipe, len, flags);
1078
1079	return ops->splice_read(sock, ppos, pipe, len, flags);
1080	}
1081
1082	static void sock_splice_eof(struct file *file)
1083	{
1084	struct socket *sock = file->private_data;
1085	const struct proto_ops *ops;
1086
1087	ops = READ_ONCE(sock->ops);
1088	if (ops->splice_eof)
1089	ops->splice_eof(sock);
1090	}
1091
1092	static ssize_t sock_read_iter(struct kiocb iocb, struct* iov_iter *to)
1093	{
1094	struct file *file = iocb->ki_filp;
1095	struct socket *sock = file->private_data;
1096	struct msghdr msg = {.msg_iter = *to,
1097	.msg_iocb = iocb};
1098	ssize_t res;
1099
1100	if (file->f_flags & O_NONBLOCK \|\| (iocb->ki_flags & IOCB_NOWAIT))
1101	msg.msg_flags = MSG_DONTWAIT;
1102
1103	if (iocb->ki_pos != `0`)
1104	return -ESPIPE;
1105
1106	if (!iov_iter_count(i: to)) / Match SYS5 behaviour /
1107	return `0`;
1108
1109	res = sock_recvmsg(sock, &msg, msg.msg_flags);
1110	*to = msg.msg_iter;
1111	return res;
1112	}
1113
1114	static ssize_t sock_write_iter(struct kiocb iocb, struct* iov_iter *from)
1115	{
1116	struct file *file = iocb->ki_filp;
1117	struct socket *sock = file->private_data;
1118	struct msghdr msg = {.msg_iter = *from,
1119	.msg_iocb = iocb};
1120	ssize_t res;
1121
1122	if (iocb->ki_pos != `0`)
1123	return -ESPIPE;
1124
1125	if (file->f_flags & O_NONBLOCK \|\| (iocb->ki_flags & IOCB_NOWAIT))
1126	msg.msg_flags = MSG_DONTWAIT;
1127
1128	if (sock->type == SOCK_SEQPACKET)
1129	msg.msg_flags \|= MSG_EOR;
1130
1131	res = __sock_sendmsg(sock, msg: &msg);
1132	*from = msg.msg_iter;
1133	return res;
1134	}
1135
1136	/*
1137	* Atomic setting of ioctl hooks to avoid race
1138	* with module unload.
1139	*/
1140
1141	static DEFINE_MUTEX(br_ioctl_mutex);
1142	static int (br_ioctl_hook)(struct* net net, unsigned* int cmd,
1143	void __user *uarg);
1144
1145	void brioctl_set(int (hook)(struct* net net, unsigned* int cmd,
1146	void __user *uarg))
1147	{
1148	mutex_lock(&br_ioctl_mutex);
1149	br_ioctl_hook = hook;
1150	mutex_unlock(lock: &br_ioctl_mutex);
1151	}
1152	EXPORT_SYMBOL(brioctl_set);
1153
1154	int br_ioctl_call(struct net net, unsigned* int cmd, void __user *uarg)
1155	{
1156	int err = -ENOPKG;
1157
1158	if (!br_ioctl_hook)
1159	request_module("bridge");
1160
1161	mutex_lock(&br_ioctl_mutex);
1162	if (br_ioctl_hook)
1163	err = br_ioctl_hook(net, cmd, uarg);
1164	mutex_unlock(lock: &br_ioctl_mutex);
1165
1166	return err;
1167	}
1168
1169	static DEFINE_MUTEX(vlan_ioctl_mutex);
1170	static int (vlan_ioctl_hook) (struct* net , void* __user *arg);
1171
1172	void vlan_ioctl_set(int (hook) (struct* net , void* __user *))
1173	{
1174	mutex_lock(&vlan_ioctl_mutex);
1175	vlan_ioctl_hook = hook;
1176	mutex_unlock(lock: &vlan_ioctl_mutex);
1177	}
1178	EXPORT_SYMBOL(vlan_ioctl_set);
1179
1180	static long sock_do_ioctl(struct net net, struct* socket *sock,
1181	unsigned int cmd, unsigned long arg)
1182	{
1183	const struct proto_ops *ops = READ_ONCE(sock->ops);
1184	struct ifreq ifr;
1185	bool need_copyout;
1186	int err;
1187	void __user argp = (void* __user *)arg;
1188	void __user *data;
1189
1190	err = ops->ioctl(sock, cmd, arg);
1191
1192	/*
1193	* If this ioctl is unknown try to hand it down
1194	* to the NIC driver.
1195	*/
1196	if (err != -ENOIOCTLCMD)
1197	return err;
1198
1199	if (!is_socket_ioctl_cmd(cmd))
1200	return -ENOTTY;
1201
1202	if (get_user_ifreq(ifr: &ifr, ifrdata: &data, arg: argp))
1203	return -EFAULT;
1204	err = dev_ioctl(net, cmd, ifr: &ifr, data, need_copyout: &need_copyout);
1205	if (!err && need_copyout)
1206	if (put_user_ifreq(ifr: &ifr, arg: argp))
1207	return -EFAULT;
1208
1209	return err;
1210	}
1211
1212	/*
1213	* With an ioctl, arg may well be a user mode pointer, but we don't know
1214	* what to do with it - that's up to the protocol still.
1215	*/
1216
1217	static long sock_ioctl(struct file file, unsigned* cmd, unsigned long arg)
1218	{
1219	const struct proto_ops *ops;
1220	struct socket *sock;
1221	struct sock *sk;
1222	void __user argp = (void* __user *)arg;
1223	int pid, err;
1224	struct net *net;
1225
1226	sock = file->private_data;
1227	ops = READ_ONCE(sock->ops);
1228	sk = sock->sk;
1229	net = sock_net(sk);
1230	if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + `15`))) {
1231	struct ifreq ifr;
1232	void __user *data;
1233	bool need_copyout;
1234	if (get_user_ifreq(ifr: &ifr, ifrdata: &data, arg: argp))
1235	return -EFAULT;
1236	err = dev_ioctl(net, cmd, ifr: &ifr, data, need_copyout: &need_copyout);
1237	if (!err && need_copyout)
1238	if (put_user_ifreq(ifr: &ifr, arg: argp))
1239	return -EFAULT;
1240	} else
1241	#ifdef CONFIG_WEXT_CORE
1242	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1243	err = wext_handle_ioctl(net, cmd, arg: argp);
1244	} else
1245	#endif
1246	switch (cmd) {
1247	case FIOSETOWN:
1248	case SIOCSPGRP:
1249	err = -EFAULT;
1250	if (get_user(pid, (int __user *)argp))
1251	break;
1252	err = f_setown(filp: sock->file, who: pid, force: `1`);
1253	break;
1254	case FIOGETOWN:
1255	case SIOCGPGRP:
1256	err = put_user(f_getown(sock->file),
1257	(int __user *)argp);
1258	break;
1259	case SIOCGIFBR:
1260	case SIOCSIFBR:
1261	case SIOCBRADDBR:
1262	case SIOCBRDELBR:
1263	case SIOCBRADDIF:
1264	case SIOCBRDELIF:
1265	err = br_ioctl_call(net, cmd, uarg: argp);
1266	break;
1267	case SIOCGIFVLAN:
1268	case SIOCSIFVLAN:
1269	err = -ENOPKG;
1270	if (!vlan_ioctl_hook)
1271	request_module("8021q");
1272
1273	mutex_lock(&vlan_ioctl_mutex);
1274	if (vlan_ioctl_hook)
1275	err = vlan_ioctl_hook(net, argp);
1276	mutex_unlock(lock: &vlan_ioctl_mutex);
1277	break;
1278	case SIOCGSKNS:
1279	err = -EPERM;
1280	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
1281	break;
1282
1283	err = open_related_ns(ns: &net->ns, get_ns: get_net_ns);
1284	break;
1285	case SIOCGSTAMP_OLD:
1286	case SIOCGSTAMPNS_OLD:
1287	if (!ops->gettstamp) {
1288	err = -ENOIOCTLCMD;
1289	break;
1290	}
1291	err = ops->gettstamp(sock, argp,
1292	cmd == SIOCGSTAMP_OLD,
1293	!IS_ENABLED(CONFIG_64BIT));
1294	break;
1295	case SIOCGSTAMP_NEW:
1296	case SIOCGSTAMPNS_NEW:
1297	if (!ops->gettstamp) {
1298	err = -ENOIOCTLCMD;
1299	break;
1300	}
1301	err = ops->gettstamp(sock, argp,
1302	cmd == SIOCGSTAMP_NEW,
1303	false);
1304	break;
1305
1306	case SIOCGIFCONF:
1307	err = dev_ifconf(net, ifc: argp);
1308	break;
1309
1310	default:
1311	err = sock_do_ioctl(net, sock, cmd, arg);
1312	break;
1313	}
1314	return err;
1315	}
1316
1317	/**
1318	* sock_create_lite - creates a socket
1319	* @family: protocol family (AF_INET, ...)
1320	* @type: communication type (SOCK_STREAM, ...)
1321	* @protocol: protocol (0, ...)
1322	* @res: new socket
1323	*
1324	* Creates a new socket and assigns it to @res, passing through LSM.
1325	* The new socket initialization is not complete, see kernel_accept().
1326	* Returns 0 or an error. On failure @res is set to %NULL.
1327	* This function internally uses GFP_KERNEL.
1328	*/
1329
1330	int sock_create_lite(int family, int type, int protocol, struct socket **res)
1331	{
1332	int err;
1333	struct socket *sock = NULL;
1334
1335	err = security_socket_create(family, type, protocol, kern: `1`);
1336	if (err)
1337	goto out;
1338
1339	sock = sock_alloc();
1340	if (!sock) {
1341	err = -ENOMEM;
1342	goto out;
1343	}
1344
1345	sock->type = type;
1346	err = security_socket_post_create(sock, family, type, protocol, kern: `1`);
1347	if (err)
1348	goto out_release;
1349
1350	out:
1351	*res = sock;
1352	return err;
1353	out_release:
1354	sock_release(sock);
1355	sock = NULL;
1356	goto out;
1357	}
1358	EXPORT_SYMBOL(sock_create_lite);
1359
1360	/ No kernel lock held - perfect /
1361	static __poll_t sock_poll(struct file file, poll_table wait)
1362	{
1363	struct socket *sock = file->private_data;
1364	const struct proto_ops *ops = READ_ONCE(sock->ops);
1365	__poll_t events = poll_requested_events(p: wait), flag = `0`;
1366
1367	if (!ops->poll)
1368	return `0`;
1369
1370	if (sk_can_busy_loop(sk: sock->sk)) {
1371	/ poll once if requested by the syscall /
1372	if (events & POLL_BUSY_LOOP)
1373	sk_busy_loop(sk: sock->sk, nonblock: `1`);
1374
1375	/ if this socket can poll_ll, tell the system call /
1376	flag = POLL_BUSY_LOOP;
1377	}
1378
1379	return ops->poll(file, sock, wait) \| flag;
1380	}
1381
1382	static int sock_mmap(struct file file, struct* vm_area_struct *vma)
1383	{
1384	struct socket *sock = file->private_data;
1385
1386	return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1387	}
1388
1389	static int sock_close(struct inode inode, struct* file *filp)
1390	{
1391	__sock_release(sock: SOCKET_I(inode), inode);
1392	return `0`;
1393	}
1394
1395	/*
1396	* Update the socket async list
1397	*
1398	* Fasync_list locking strategy.
1399	*
1400	* 1. fasync_list is modified only under process context socket lock
1401	* i.e. under semaphore.
1402	* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1403	* or under socket lock
1404	*/
1405
1406	static int sock_fasync(int fd, struct file filp, int* on)
1407	{
1408	struct socket *sock = filp->private_data;
1409	struct sock *sk = sock->sk;
1410	struct socket_wq *wq = &sock->wq;
1411
1412	if (sk == NULL)
1413	return -EINVAL;
1414
1415	lock_sock(sk);
1416	fasync_helper(fd, filp, on, &wq->fasync_list);
1417
1418	if (!wq->fasync_list)
1419	sock_reset_flag(sk, flag: SOCK_FASYNC);
1420	else
1421	sock_set_flag(sk, flag: SOCK_FASYNC);
1422
1423	release_sock(sk);
1424	return `0`;
1425	}
1426
1427	/ This function may be called only under rcu_lock /
1428
1429	int sock_wake_async(struct socket_wq wq, int* how, int band)
1430	{
1431	if (!wq \|\| !wq->fasync_list)
1432	return -`1`;
1433
1434	switch (how) {
1435	case SOCK_WAKE_WAITD:
1436	if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1437	break;
1438	goto call_kill;
1439	case SOCK_WAKE_SPACE:
1440	if (!test_and_clear_bit(nr: SOCKWQ_ASYNC_NOSPACE, addr: &wq->flags))
1441	break;
1442	fallthrough;
1443	case SOCK_WAKE_IO:
1444	call_kill:
1445	kill_fasync(&wq->fasync_list, SIGIO, band);
1446	break;
1447	case SOCK_WAKE_URG:
1448	kill_fasync(&wq->fasync_list, SIGURG, band);
1449	}
1450
1451	return `0`;
1452	}
1453	EXPORT_SYMBOL(sock_wake_async);
1454
1455	/**
1456	* __sock_create - creates a socket
1457	* @net: net namespace
1458	* @family: protocol family (AF_INET, ...)
1459	* @type: communication type (SOCK_STREAM, ...)
1460	* @protocol: protocol (0, ...)
1461	* @res: new socket
1462	* @kern: boolean for kernel space sockets
1463	*
1464	* Creates a new socket and assigns it to @res, passing through LSM.
1465	* Returns 0 or an error. On failure @res is set to %NULL. @kern must
1466	* be set to true if the socket resides in kernel space.
1467	* This function internally uses GFP_KERNEL.
1468	*/
1469
1470	int __sock_create(struct net net, int* family, int type, int protocol,
1471	struct socket *res, int* kern)
1472	{
1473	int err;
1474	struct socket *sock;
1475	const struct net_proto_family *pf;
1476
1477	/*
1478	* Check protocol is in range
1479	*/
1480	if (family < `0` \|\| family >= NPROTO)
1481	return -EAFNOSUPPORT;
1482	if (type < `0` \|\| type >= SOCK_MAX)
1483	return -EINVAL;
1484
1485	/ Compatibility.*
1486
1487	This uglymoron is moved from INET layer to here to avoid
1488	deadlock in module load.
1489	*/
1490	if (family == PF_INET && type == SOCK_PACKET) {
1491	pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1492	current->comm);
1493	family = PF_PACKET;
1494	}
1495
1496	err = security_socket_create(family, type, protocol, kern);
1497	if (err)
1498	return err;
1499
1500	/*
1501	* Allocate the socket and allow the family to set things up. if
1502	* the protocol is 0, the family is instructed to select an appropriate
1503	* default.
1504	*/
1505	sock = sock_alloc();
1506	if (!sock) {
1507	net_warn_ratelimited("socket: no more sockets\n");
1508	return -ENFILE; / Not exactly a match, but its the*
1509	closest posix thing /*
1510	}
1511
1512	sock->type = type;
1513
1514	#ifdef CONFIG_MODULES
1515	/ Attempt to load a protocol module if the find failed.*
1516	*
1517	* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1518	* requested real, full-featured networking support upon configuration.
1519	* Otherwise module support will break!
1520	*/
1521	if (rcu_access_pointer(net_families[family]) == NULL)
1522	request_module("net-pf-%d", family);
1523	#endif
1524
1525	rcu_read_lock();
1526	pf = rcu_dereference(net_families[family]);
1527	err = -EAFNOSUPPORT;
1528	if (!pf)
1529	goto out_release;
1530
1531	/*
1532	* We will call the ->create function, that possibly is in a loadable
1533	* module, so we have to bump that loadable module refcnt first.
1534	*/
1535	if (!try_module_get(module: pf->owner))
1536	goto out_release;
1537
1538	/ Now protected by module ref count /
1539	rcu_read_unlock();
1540
1541	err = pf->create(net, sock, protocol, kern);
1542	if (err < `0`) {
1543	/ ->create should release the allocated sock->sk object on error*
1544	* and make sure sock->sk is set to NULL to avoid use-after-free
1545	*/
1546	DEBUG_NET_WARN_ONCE(sock->sk,
1547	"%ps must clear sock->sk on failure, family: %d, type: %d, protocol: %d\n",
1548	pf->create, family, type, protocol);
1549	goto out_module_put;
1550	}
1551
1552	/*
1553	* Now to bump the refcnt of the [loadable] module that owns this
1554	* socket at sock_release time we decrement its refcnt.
1555	*/
1556	if (!try_module_get(module: sock->ops->owner))
1557	goto out_module_busy;
1558
1559	/*
1560	* Now that we're done with the ->create function, the [loadable]
1561	* module can have its refcnt decremented
1562	*/
1563	module_put(module: pf->owner);
1564	err = security_socket_post_create(sock, family, type, protocol, kern);
1565	if (err)
1566	goto out_sock_release;
1567	*res = sock;
1568
1569	return `0`;
1570
1571	out_module_busy:
1572	err = -EAFNOSUPPORT;
1573	out_module_put:
1574	sock->ops = NULL;
1575	module_put(module: pf->owner);
1576	out_sock_release:
1577	sock_release(sock);
1578	return err;
1579
1580	out_release:
1581	rcu_read_unlock();
1582	goto out_sock_release;
1583	}
1584	EXPORT_SYMBOL(__sock_create);
1585
1586	/**
1587	* sock_create - creates a socket
1588	* @family: protocol family (AF_INET, ...)
1589	* @type: communication type (SOCK_STREAM, ...)
1590	* @protocol: protocol (0, ...)
1591	* @res: new socket
1592	*
1593	* A wrapper around __sock_create().
1594	* Returns 0 or an error. This function internally uses GFP_KERNEL.
1595	*/
1596
1597	int sock_create(int family, int type, int protocol, struct socket **res)
1598	{
1599	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, `0`);
1600	}
1601	EXPORT_SYMBOL(sock_create);
1602
1603	/**
1604	* sock_create_kern - creates a socket (kernel space)
1605	* @net: net namespace
1606	* @family: protocol family (AF_INET, ...)
1607	* @type: communication type (SOCK_STREAM, ...)
1608	* @protocol: protocol (0, ...)
1609	* @res: new socket
1610	*
1611	* A wrapper around __sock_create().
1612	* Returns 0 or an error. This function internally uses GFP_KERNEL.
1613	*/
1614
1615	int sock_create_kern(struct net net, int* family, int type, int protocol, struct socket **res)
1616	{
1617	return __sock_create(net, family, type, protocol, res, `1`);
1618	}
1619	EXPORT_SYMBOL(sock_create_kern);
1620
1621	static struct socket __sys_socket_create(int* family, int type, int protocol)
1622	{
1623	struct socket *sock;
1624	int retval;
1625
1626	/ Check the SOCK_* constants for consistency. /
1627	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1628	BUILD_BUG_ON((SOCK_MAX \| SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1629	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1630	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1631
1632	if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1633	return ERR_PTR(error: -EINVAL);
1634	type &= SOCK_TYPE_MASK;
1635
1636	retval = sock_create(family, type, protocol, &sock);
1637	if (retval < `0`)
1638	return ERR_PTR(error: retval);
1639
1640	return sock;
1641	}
1642
1643	struct file __sys_socket_file(int* family, int type, int protocol)
1644	{
1645	struct socket *sock;
1646	int flags;
1647
1648	sock = __sys_socket_create(family, type, protocol);
1649	if (IS_ERR(ptr: sock))
1650	return ERR_CAST(ptr: sock);
1651
1652	flags = type & ~SOCK_TYPE_MASK;
1653	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1654	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1655
1656	return sock_alloc_file(sock, flags, NULL);
1657	}
1658
1659	/ A hook for bpf progs to attach to and update socket protocol.*
1660	*
1661	* A static noinline declaration here could cause the compiler to
1662	* optimize away the function. A global noinline declaration will
1663	* keep the definition, but may optimize away the callsite.
1664	* Therefore, __weak is needed to ensure that the call is still
1665	* emitted, by telling the compiler that we don't know what the
1666	* function might eventually be.
1667	*/
1668
1669	__bpf_hook_start();
1670
1671	__weak noinline int update_socket_protocol(int family, int type, int protocol)
1672	{
1673	return protocol;
1674	}
1675
1676	__bpf_hook_end();
1677
1678	int __sys_socket(int family, int type, int protocol)
1679	{
1680	struct socket *sock;
1681	int flags;
1682
1683	sock = __sys_socket_create(family, type,
1684	protocol: update_socket_protocol(family, type, protocol));
1685	if (IS_ERR(ptr: sock))
1686	return PTR_ERR(ptr: sock);
1687
1688	flags = type & ~SOCK_TYPE_MASK;
1689	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1690	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1691
1692	return sock_map_fd(sock, flags: flags & (O_CLOEXEC \| O_NONBLOCK));
1693	}
1694
1695	SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1696	{
1697	return __sys_socket(family, type, protocol);
1698	}
1699
1700	/*
1701	* Create a pair of connected sockets.
1702	*/
1703
1704	int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1705	{
1706	struct socket sock1, sock2;
1707	int fd1, fd2, err;
1708	struct file newfile1, newfile2;
1709	int flags;
1710
1711	flags = type & ~SOCK_TYPE_MASK;
1712	if (flags & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1713	return -EINVAL;
1714	type &= SOCK_TYPE_MASK;
1715
1716	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1717	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1718
1719	/*
1720	* reserve descriptors and make sure we won't fail
1721	* to return them to userland.
1722	*/
1723	fd1 = get_unused_fd_flags(flags);
1724	if (unlikely(fd1 < `0`))
1725	return fd1;
1726
1727	fd2 = get_unused_fd_flags(flags);
1728	if (unlikely(fd2 < `0`)) {
1729	put_unused_fd(fd: fd1);
1730	return fd2;
1731	}
1732
1733	err = put_user(fd1, &usockvec[`0`]);
1734	if (err)
1735	goto out;
1736
1737	err = put_user(fd2, &usockvec[`1`]);
1738	if (err)
1739	goto out;
1740
1741	/*
1742	* Obtain the first socket and check if the underlying protocol
1743	* supports the socketpair call.
1744	*/
1745
1746	err = sock_create(family, type, protocol, &sock1);
1747	if (unlikely(err < `0`))
1748	goto out;
1749
1750	err = sock_create(family, type, protocol, &sock2);
1751	if (unlikely(err < `0`)) {
1752	sock_release(sock1);
1753	goto out;
1754	}
1755
1756	err = security_socket_socketpair(socka: sock1, sockb: sock2);
1757	if (unlikely(err)) {
1758	sock_release(sock2);
1759	sock_release(sock1);
1760	goto out;
1761	}
1762
1763	err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
1764	if (unlikely(err < `0`)) {
1765	sock_release(sock2);
1766	sock_release(sock1);
1767	goto out;
1768	}
1769
1770	newfile1 = sock_alloc_file(sock1, flags, NULL);
1771	if (IS_ERR(ptr: newfile1)) {
1772	err = PTR_ERR(ptr: newfile1);
1773	sock_release(sock2);
1774	goto out;
1775	}
1776
1777	newfile2 = sock_alloc_file(sock2, flags, NULL);
1778	if (IS_ERR(ptr: newfile2)) {
1779	err = PTR_ERR(ptr: newfile2);
1780	fput(newfile1);
1781	goto out;
1782	}
1783
1784	audit_fd_pair(fd1, fd2);
1785
1786	fd_install(fd: fd1, file: newfile1);
1787	fd_install(fd: fd2, file: newfile2);
1788	return `0`;
1789
1790	out:
1791	put_unused_fd(fd: fd2);
1792	put_unused_fd(fd: fd1);
1793	return err;
1794	}
1795
1796	SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1797	int __user *, usockvec)
1798	{
1799	return __sys_socketpair(family, type, protocol, usockvec);
1800	}
1801
1802	int __sys_bind_socket(struct socket sock, struct* sockaddr_storage *address,
1803	int addrlen)
1804	{
1805	int err;
1806
1807	err = security_socket_bind(sock, address: (struct sockaddr *)address,
1808	addrlen);
1809	if (!err)
1810	err = READ_ONCE(sock->ops)->bind(sock,
1811	(struct sockaddr *)address,
1812	addrlen);
1813	return err;
1814	}
1815
1816	/*
1817	* Bind a name to a socket. Nothing much to do here since it's
1818	* the protocol's responsibility to handle the local address.
1819	*
1820	* We move the socket address to kernel space before we call
1821	* the protocol layer (having also checked the address is ok).
1822	*/
1823
1824	int __sys_bind(int fd, struct sockaddr __user umyaddr, int* addrlen)
1825	{
1826	struct socket *sock;
1827	struct sockaddr_storage address;
1828	CLASS(fd, f)(fd);
1829	int err;
1830
1831	if (fd_empty(f))
1832	return -EBADF;
1833	sock = sock_from_file(fd_file(f));
1834	if (unlikely(!sock))
1835	return -ENOTSOCK;
1836
1837	err = move_addr_to_kernel(uaddr: umyaddr, ulen: addrlen, kaddr: &address);
1838	if (unlikely(err))
1839	return err;
1840
1841	return __sys_bind_socket(sock, address: &address, addrlen);
1842	}
1843
1844	SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user , umyaddr, int*, addrlen)
1845	{
1846	return __sys_bind(fd, umyaddr, addrlen);
1847	}
1848
1849	/*
1850	* Perform a listen. Basically, we allow the protocol to do anything
1851	* necessary for a listen, and if that works, we mark the socket as
1852	* ready for listening.
1853	*/
1854	int __sys_listen_socket(struct socket sock, int* backlog)
1855	{
1856	int somaxconn, err;
1857
1858	somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
1859	if ((unsigned int)backlog > somaxconn)
1860	backlog = somaxconn;
1861
1862	err = security_socket_listen(sock, backlog);
1863	if (!err)
1864	err = READ_ONCE(sock->ops)->listen(sock, backlog);
1865	return err;
1866	}
1867
1868	int __sys_listen(int fd, int backlog)
1869	{
1870	CLASS(fd, f)(fd);
1871	struct socket *sock;
1872
1873	if (fd_empty(f))
1874	return -EBADF;
1875	sock = sock_from_file(fd_file(f));
1876	if (unlikely(!sock))
1877	return -ENOTSOCK;
1878
1879	return __sys_listen_socket(sock, backlog);
1880	}
1881
1882	SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1883	{
1884	return __sys_listen(fd, backlog);
1885	}
1886
1887	struct file do_accept(struct* file file, struct* proto_accept_arg *arg,
1888	struct sockaddr __user *upeer_sockaddr,
1889	int __user upeer_addrlen, int* flags)
1890	{
1891	struct socket sock, newsock;
1892	struct file *newfile;
1893	int err, len;
1894	struct sockaddr_storage address;
1895	const struct proto_ops *ops;
1896
1897	sock = sock_from_file(file);
1898	if (!sock)
1899	return ERR_PTR(error: -ENOTSOCK);
1900
1901	newsock = sock_alloc();
1902	if (!newsock)
1903	return ERR_PTR(error: -ENFILE);
1904	ops = READ_ONCE(sock->ops);
1905
1906	newsock->type = sock->type;
1907	newsock->ops = ops;
1908
1909	/*
1910	* We don't need try_module_get here, as the listening socket (sock)
1911	* has the protocol module (sock->ops->owner) held.
1912	*/
1913	__module_get(module: ops->owner);
1914
1915	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1916	if (IS_ERR(ptr: newfile))
1917	return newfile;
1918
1919	err = security_socket_accept(sock, newsock);
1920	if (err)
1921	goto out_fd;
1922
1923	arg->flags \|= sock->file->f_flags;
1924	err = ops->accept(sock, newsock, arg);
1925	if (err < `0`)
1926	goto out_fd;
1927
1928	if (upeer_sockaddr) {
1929	len = ops->getname(newsock, (struct sockaddr *)&address, `2`);
1930	if (len < `0`) {
1931	err = -ECONNABORTED;
1932	goto out_fd;
1933	}
1934	err = move_addr_to_user(kaddr: &address,
1935	klen: len, uaddr: upeer_sockaddr, ulen: upeer_addrlen);
1936	if (err < `0`)
1937	goto out_fd;
1938	}
1939
1940	/ File flags are not inherited via accept() unlike another OSes. /
1941	return newfile;
1942	out_fd:
1943	fput(newfile);
1944	return ERR_PTR(error: err);
1945	}
1946
1947	static int __sys_accept4_file(struct file file, struct* sockaddr __user *upeer_sockaddr,
1948	int __user upeer_addrlen, int* flags)
1949	{
1950	struct proto_accept_arg arg = { };
1951	struct file *newfile;
1952	int newfd;
1953
1954	if (flags & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1955	return -EINVAL;
1956
1957	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1958	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1959
1960	newfd = get_unused_fd_flags(flags);
1961	if (unlikely(newfd < `0`))
1962	return newfd;
1963
1964	newfile = do_accept(file, arg: &arg, upeer_sockaddr, upeer_addrlen,
1965	flags);
1966	if (IS_ERR(ptr: newfile)) {
1967	put_unused_fd(fd: newfd);
1968	return PTR_ERR(ptr: newfile);
1969	}
1970	fd_install(fd: newfd, file: newfile);
1971	return newfd;
1972	}
1973
1974	/*
1975	* For accept, we attempt to create a new socket, set up the link
1976	* with the client, wake up the client, then return the new
1977	* connected fd. We collect the address of the connector in kernel
1978	* space and move it to user at the very end. This is unclean because
1979	* we open the socket then return an error.
1980	*
1981	* 1003.1g adds the ability to recvmsg() to query connection pending
1982	* status to recvmsg. We need to add that support in a way thats
1983	* clean when we restructure accept also.
1984	*/
1985
1986	int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1987	int __user upeer_addrlen, int* flags)
1988	{
1989	CLASS(fd, f)(fd);
1990
1991	if (fd_empty(f))
1992	return -EBADF;
1993	return __sys_accept4_file(fd_file(f), upeer_sockaddr,
1994	upeer_addrlen, flags);
1995	}
1996
1997	SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1998	int __user , upeer_addrlen, int*, flags)
1999	{
2000	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2001	}
2002
2003	SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2004	int __user *, upeer_addrlen)
2005	{
2006	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags: `0`);
2007	}
2008
2009	/*
2010	* Attempt to connect to a socket with the server address. The address
2011	* is in user space so we verify it is OK and move it to kernel space.
2012	*
2013	* For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2014	* break bindings
2015	*
2016	* NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2017	* other SEQPACKET protocols that take time to connect() as it doesn't
2018	* include the -EINPROGRESS status for such sockets.
2019	*/
2020
2021	int __sys_connect_file(struct file file, struct* sockaddr_storage *address,
2022	int addrlen, int file_flags)
2023	{
2024	struct socket *sock;
2025	int err;
2026
2027	sock = sock_from_file(file);
2028	if (!sock) {
2029	err = -ENOTSOCK;
2030	goto out;
2031	}
2032
2033	err =
2034	security_socket_connect(sock, address: (struct sockaddr *)address, addrlen);
2035	if (err)
2036	goto out;
2037
2038	err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2039	addrlen, sock->file->f_flags \| file_flags);
2040	out:
2041	return err;
2042	}
2043
2044	int __sys_connect(int fd, struct sockaddr __user uservaddr, int* addrlen)
2045	{
2046	struct sockaddr_storage address;
2047	CLASS(fd, f)(fd);
2048	int ret;
2049
2050	if (fd_empty(f))
2051	return -EBADF;
2052
2053	ret = move_addr_to_kernel(uaddr: uservaddr, ulen: addrlen, kaddr: &address);
2054	if (ret)
2055	return ret;
2056
2057	return __sys_connect_file(fd_file(f), address: &address, addrlen, file_flags: `0`);
2058	}
2059
2060	SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2061	int, addrlen)
2062	{
2063	return __sys_connect(fd, uservaddr, addrlen);
2064	}
2065
2066	/*
2067	* Get the local address ('name') of a socket object. Move the obtained
2068	* name to user space.
2069	*/
2070
2071	int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2072	int __user *usockaddr_len)
2073	{
2074	struct socket *sock;
2075	struct sockaddr_storage address;
2076	CLASS(fd, f)(fd);
2077	int err;
2078
2079	if (fd_empty(f))
2080	return -EBADF;
2081	sock = sock_from_file(fd_file(f));
2082	if (unlikely(!sock))
2083	return -ENOTSOCK;
2084
2085	err = security_socket_getsockname(sock);
2086	if (err)
2087	return err;
2088
2089	err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, `0`);
2090	if (err < `0`)
2091	return err;
2092
2093	/ "err" is actually length in this case /
2094	return move_addr_to_user(kaddr: &address, klen: err, uaddr: usockaddr, ulen: usockaddr_len);
2095	}
2096
2097	SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2098	int __user *, usockaddr_len)
2099	{
2100	return __sys_getsockname(fd, usockaddr, usockaddr_len);
2101	}
2102
2103	/*
2104	* Get the remote address ('name') of a socket object. Move the obtained
2105	* name to user space.
2106	*/
2107
2108	int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2109	int __user *usockaddr_len)
2110	{
2111	struct socket *sock;
2112	struct sockaddr_storage address;
2113	CLASS(fd, f)(fd);
2114	int err;
2115
2116	if (fd_empty(f))
2117	return -EBADF;
2118	sock = sock_from_file(fd_file(f));
2119	if (unlikely(!sock))
2120	return -ENOTSOCK;
2121
2122	err = security_socket_getpeername(sock);
2123	if (err)
2124	return err;
2125
2126	err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, `1`);
2127	if (err < `0`)
2128	return err;
2129
2130	/ "err" is actually length in this case /
2131	return move_addr_to_user(kaddr: &address, klen: err, uaddr: usockaddr, ulen: usockaddr_len);
2132	}
2133
2134	SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2135	int __user *, usockaddr_len)
2136	{
2137	return __sys_getpeername(fd, usockaddr, usockaddr_len);
2138	}
2139
2140	/*
2141	* Send a datagram to a given address. We move the address into kernel
2142	* space and check the user space data area is readable before invoking
2143	* the protocol.
2144	*/
2145	int __sys_sendto(int fd, void __user buff, size_t len, unsigned* int flags,
2146	struct sockaddr __user addr, int* addr_len)
2147	{
2148	struct socket *sock;
2149	struct sockaddr_storage address;
2150	int err;
2151	struct msghdr msg;
2152
2153	err = import_ubuf(ITER_SOURCE, buf: buff, len, i: &msg.msg_iter);
2154	if (unlikely(err))
2155	return err;
2156
2157	CLASS(fd, f)(fd);
2158	if (fd_empty(f))
2159	return -EBADF;
2160	sock = sock_from_file(fd_file(f));
2161	if (unlikely(!sock))
2162	return -ENOTSOCK;
2163
2164	msg.msg_name = NULL;
2165	msg.msg_control = NULL;
2166	msg.msg_controllen = `0`;
2167	msg.msg_namelen = `0`;
2168	msg.msg_ubuf = NULL;
2169	if (addr) {
2170	err = move_addr_to_kernel(uaddr: addr, ulen: addr_len, kaddr: &address);
2171	if (err < `0`)
2172	return err;
2173	msg.msg_name = (struct sockaddr *)&address;
2174	msg.msg_namelen = addr_len;
2175	}
2176	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2177	if (sock->file->f_flags & O_NONBLOCK)
2178	flags \|= MSG_DONTWAIT;
2179	msg.msg_flags = flags;
2180	return __sock_sendmsg(sock, msg: &msg);
2181	}
2182
2183	SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2184	unsigned int, flags, struct sockaddr __user *, addr,
2185	int, addr_len)
2186	{
2187	return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2188	}
2189
2190	/*
2191	* Send a datagram down a socket.
2192	*/
2193
2194	SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
2195	unsigned int, flags)
2196	{
2197	return __sys_sendto(fd, buff, len, flags, NULL, addr_len: `0`);
2198	}
2199
2200	/*
2201	* Receive a frame from the socket and optionally record the address of the
2202	* sender. We verify the buffers are writable and if needed move the
2203	* sender address from kernel to user space.
2204	*/
2205	int __sys_recvfrom(int fd, void __user ubuf, size_t size, unsigned* int flags,
2206	struct sockaddr __user addr, int* __user *addr_len)
2207	{
2208	struct sockaddr_storage address;
2209	struct msghdr msg = {
2210	/ Save some cycles and don't copy the address if not needed /
2211	.msg_name = addr ? (struct sockaddr *)&address : NULL,
2212	};
2213	struct socket *sock;
2214	int err, err2;
2215
2216	err = import_ubuf(ITER_DEST, buf: ubuf, len: size, i: &msg.msg_iter);
2217	if (unlikely(err))
2218	return err;
2219
2220	CLASS(fd, f)(fd);
2221
2222	if (fd_empty(f))
2223	return -EBADF;
2224	sock = sock_from_file(fd_file(f));
2225	if (unlikely(!sock))
2226	return -ENOTSOCK;
2227
2228	if (sock->file->f_flags & O_NONBLOCK)
2229	flags \|= MSG_DONTWAIT;
2230	err = sock_recvmsg(sock, &msg, flags);
2231
2232	if (err >= `0` && addr != NULL) {
2233	err2 = move_addr_to_user(kaddr: &address,
2234	klen: msg.msg_namelen, uaddr: addr, ulen: addr_len);
2235	if (err2 < `0`)
2236	err = err2;
2237	}
2238	return err;
2239	}
2240
2241	SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2242	unsigned int, flags, struct sockaddr __user *, addr,
2243	int __user *, addr_len)
2244	{
2245	return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2246	}
2247
2248	/*
2249	* Receive a datagram from a socket.
2250	*/
2251
2252	SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2253	unsigned int, flags)
2254	{
2255	return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
2256	}
2257
2258	static bool sock_use_custom_sol_socket(const struct socket *sock)
2259	{
2260	return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
2261	}
2262
2263	int do_sock_setsockopt(struct socket sock, bool compat, int* level,
2264	int optname, sockptr_t optval, int optlen)
2265	{
2266	const struct proto_ops *ops;
2267	char *kernel_optval = NULL;
2268	int err;
2269
2270	if (optlen < `0`)
2271	return -EINVAL;
2272
2273	err = security_socket_setsockopt(sock, level, optname);
2274	if (err)
2275	goto out_put;
2276
2277	if (!compat)
2278	err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2279	optval, &optlen,
2280	&kernel_optval);
2281	if (err < `0`)
2282	goto out_put;
2283	if (err > `0`) {
2284	err = `0`;
2285	goto out_put;
2286	}
2287
2288	if (kernel_optval)
2289	optval = KERNEL_SOCKPTR(p: kernel_optval);
2290	ops = READ_ONCE(sock->ops);
2291	if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2292	err = sock_setsockopt(sock, level, op: optname, optval, optlen);
2293	else if (unlikely(!ops->setsockopt))
2294	err = -EOPNOTSUPP;
2295	else
2296	err = ops->setsockopt(sock, level, optname, optval,
2297	optlen);
2298	kfree(objp: kernel_optval);
2299	out_put:
2300	return err;
2301	}
2302	EXPORT_SYMBOL(do_sock_setsockopt);
2303
2304	/ Set a socket option. Because we don't know the option lengths we have*
2305	* to pass the user mode parameter for the protocols to sort out.
2306	*/
2307	int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2308	int optlen)
2309	{
2310	sockptr_t optval = USER_SOCKPTR(p: user_optval);
2311	bool compat = in_compat_syscall();
2312	struct socket *sock;
2313	CLASS(fd, f)(fd);
2314
2315	if (fd_empty(f))
2316	return -EBADF;
2317	sock = sock_from_file(fd_file(f));
2318	if (unlikely(!sock))
2319	return -ENOTSOCK;
2320
2321	return do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2322	}
2323
2324	SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2325	char __user , optval, int*, optlen)
2326	{
2327	return __sys_setsockopt(fd, level, optname, user_optval: optval, optlen);
2328	}
2329
2330	INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2331	int optname));
2332
2333	int do_sock_getsockopt(struct socket sock, bool compat, int* level,
2334	int optname, sockptr_t optval, sockptr_t optlen)
2335	{
2336	int max_optlen __maybe_unused = `0`;
2337	const struct proto_ops *ops;
2338	int err;
2339
2340	err = security_socket_getsockopt(sock, level, optname);
2341	if (err)
2342	return err;
2343
2344	if (!compat)
2345	copy_from_sockptr(dst: &max_optlen, src: optlen, size: sizeof(int));
2346
2347	ops = READ_ONCE(sock->ops);
2348	if (level == SOL_SOCKET) {
2349	err = sk_getsockopt(sk: sock->sk, level, optname, optval, optlen);
2350	} else if (unlikely(!ops->getsockopt)) {
2351	err = -EOPNOTSUPP;
2352	} else {
2353	if (WARN_ONCE(optval.is_kernel \|\| optlen.is_kernel,
2354	"Invalid argument type"))
2355	return -EOPNOTSUPP;
2356
2357	err = ops->getsockopt(sock, level, optname, optval.user,
2358	optlen.user);
2359	}
2360
2361	if (!compat)
2362	err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2363	optval, optlen, max_optlen,
2364	err);
2365
2366	return err;
2367	}
2368	EXPORT_SYMBOL(do_sock_getsockopt);
2369
2370	/*
2371	* Get a socket option. Because we don't know the option lengths we have
2372	* to pass a user mode parameter for the protocols to sort out.
2373	*/
2374	int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2375	int __user *optlen)
2376	{
2377	struct socket *sock;
2378	CLASS(fd, f)(fd);
2379
2380	if (fd_empty(f))
2381	return -EBADF;
2382	sock = sock_from_file(fd_file(f));
2383	if (unlikely(!sock))
2384	return -ENOTSOCK;
2385
2386	return do_sock_getsockopt(sock, in_compat_syscall(), level, optname,
2387	USER_SOCKPTR(p: optval), USER_SOCKPTR(p: optlen));
2388	}
2389
2390	SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2391	char __user , optval, int* __user *, optlen)
2392	{
2393	return __sys_getsockopt(fd, level, optname, optval, optlen);
2394	}
2395
2396	/*
2397	* Shutdown a socket.
2398	*/
2399
2400	int __sys_shutdown_sock(struct socket sock, int* how)
2401	{
2402	int err;
2403
2404	err = security_socket_shutdown(sock, how);
2405	if (!err)
2406	err = READ_ONCE(sock->ops)->shutdown(sock, how);
2407
2408	return err;
2409	}
2410
2411	int __sys_shutdown(int fd, int how)
2412	{
2413	struct socket *sock;
2414	CLASS(fd, f)(fd);
2415
2416	if (fd_empty(f))
2417	return -EBADF;
2418	sock = sock_from_file(fd_file(f));
2419	if (unlikely(!sock))
2420	return -ENOTSOCK;
2421
2422	return __sys_shutdown_sock(sock, how);
2423	}
2424
2425	SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2426	{
2427	return __sys_shutdown(fd, how);
2428	}
2429
2430	/ A couple of helpful macros for getting the address of the 32/64 bit*
2431	* fields which are the same type (int / unsigned) on our platforms.
2432	*/
2433	#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2434	#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2435	#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2436
2437	struct used_address {
2438	struct sockaddr_storage name;
2439	unsigned int name_len;
2440	};
2441
2442	int __copy_msghdr(struct msghdr *kmsg,
2443	struct user_msghdr *msg,
2444	struct sockaddr __user **save_addr)
2445	{
2446	ssize_t err;
2447
2448	kmsg->msg_control_is_user = true;
2449	kmsg->msg_get_inq = `0`;
2450	kmsg->msg_control_user = msg->msg_control;
2451	kmsg->msg_controllen = msg->msg_controllen;
2452	kmsg->msg_flags = msg->msg_flags;
2453
2454	kmsg->msg_namelen = msg->msg_namelen;
2455	if (!msg->msg_name)
2456	kmsg->msg_namelen = `0`;
2457
2458	if (kmsg->msg_namelen < `0`)
2459	return -EINVAL;
2460
2461	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
2462	kmsg->msg_namelen = sizeof(struct sockaddr_storage);
2463
2464	if (save_addr)
2465	*save_addr = msg->msg_name;
2466
2467	if (msg->msg_name && kmsg->msg_namelen) {
2468	if (!save_addr) {
2469	err = move_addr_to_kernel(uaddr: msg->msg_name,
2470	ulen: kmsg->msg_namelen,
2471	kaddr: kmsg->msg_name);
2472	if (err < `0`)
2473	return err;
2474	}
2475	} else {
2476	kmsg->msg_name = NULL;
2477	kmsg->msg_namelen = `0`;
2478	}
2479
2480	if (msg->msg_iovlen > UIO_MAXIOV)
2481	return -EMSGSIZE;
2482
2483	kmsg->msg_iocb = NULL;
2484	kmsg->msg_ubuf = NULL;
2485	return `0`;
2486	}
2487
2488	static int copy_msghdr_from_user(struct msghdr *kmsg,
2489	struct user_msghdr __user *umsg,
2490	struct sockaddr __user **save_addr,
2491	struct iovec **iov)
2492	{
2493	struct user_msghdr msg;
2494	ssize_t err;
2495
2496	if (copy_from_user(to: &msg, from: umsg, n: sizeof(*umsg)))
2497	return -EFAULT;
2498
2499	err = __copy_msghdr(kmsg, msg: &msg, save_addr);
2500	if (err)
2501	return err;
2502
2503	err = import_iovec(type: save_addr ? ITER_DEST : ITER_SOURCE,
2504	uvec: msg.msg_iov, nr_segs: msg.msg_iovlen,
2505	UIO_FASTIOV, iovp: iov, i: &kmsg->msg_iter);
2506	return err < `0` ? err : `0`;
2507	}
2508
2509	static int ____sys_sendmsg(struct socket sock, struct* msghdr *msg_sys,
2510	unsigned int flags, struct used_address *used_address,
2511	unsigned int allowed_msghdr_flags)
2512	{
2513	unsigned char ctl[sizeof(struct cmsghdr) + `20`]
2514	__aligned(sizeof(__kernel_size_t));
2515	/ 20 is size of ipv6_pktinfo /
2516	unsigned char *ctl_buf = ctl;
2517	int ctl_len;
2518	ssize_t err;
2519
2520	err = -ENOBUFS;
2521
2522	if (msg_sys->msg_controllen > INT_MAX)
2523	goto out;
2524	flags \|= (msg_sys->msg_flags & allowed_msghdr_flags);
2525	ctl_len = msg_sys->msg_controllen;
2526	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
2527	err =
2528	cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
2529	sizeof(ctl));
2530	if (err)
2531	goto out;
2532	ctl_buf = msg_sys->msg_control;
2533	ctl_len = msg_sys->msg_controllen;
2534	} else if (ctl_len) {
2535	BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2536	CMSG_ALIGN(sizeof(struct cmsghdr)));
2537	if (ctl_len > sizeof(ctl)) {
2538	ctl_buf = sock_kmalloc(sk: sock->sk, size: ctl_len, GFP_KERNEL);
2539	if (ctl_buf == NULL)
2540	goto out;
2541	}
2542	err = -EFAULT;
2543	if (copy_from_user(to: ctl_buf, from: msg_sys->msg_control_user, n: ctl_len))
2544	goto out_freectl;
2545	msg_sys->msg_control = ctl_buf;
2546	msg_sys->msg_control_is_user = false;
2547	}
2548	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2549	msg_sys->msg_flags = flags;
2550
2551	if (sock->file->f_flags & O_NONBLOCK)
2552	msg_sys->msg_flags \|= MSG_DONTWAIT;
2553	/*
2554	* If this is sendmmsg() and current destination address is same as
2555	* previously succeeded address, omit asking LSM's decision.
2556	* used_address->name_len is initialized to UINT_MAX so that the first
2557	* destination address never matches.
2558	*/
2559	if (used_address && msg_sys->msg_name &&
2560	used_address->name_len == msg_sys->msg_namelen &&
2561	!memcmp(p: &used_address->name, q: msg_sys->msg_name,
2562	size: used_address->name_len)) {
2563	err = sock_sendmsg_nosec(sock, msg: msg_sys);
2564	goto out_freectl;
2565	}
2566	err = __sock_sendmsg(sock, msg: msg_sys);
2567	/*
2568	* If this is sendmmsg() and sending to current destination address was
2569	* successful, remember it.
2570	*/
2571	if (used_address && err >= `0`) {
2572	used_address->name_len = msg_sys->msg_namelen;
2573	if (msg_sys->msg_name)
2574	memcpy(&used_address->name, msg_sys->msg_name,
2575	used_address->name_len);
2576	}
2577
2578	out_freectl:
2579	if (ctl_buf != ctl)
2580	sock_kfree_s(sk: sock->sk, mem: ctl_buf, size: ctl_len);
2581	out:
2582	return err;
2583	}
2584
2585	static int sendmsg_copy_msghdr(struct msghdr *msg,
2586	struct user_msghdr __user umsg, unsigned* flags,
2587	struct iovec **iov)
2588	{
2589	int err;
2590
2591	if (flags & MSG_CMSG_COMPAT) {
2592	struct compat_msghdr __user *msg_compat;
2593
2594	msg_compat = (struct compat_msghdr __user *) umsg;
2595	err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2596	} else {
2597	err = copy_msghdr_from_user(kmsg: msg, umsg, NULL, iov);
2598	}
2599	if (err < `0`)
2600	return err;
2601
2602	return `0`;
2603	}
2604
2605	static int ___sys_sendmsg(struct socket sock, struct* user_msghdr __user *msg,
2606	struct msghdr msg_sys, unsigned* int flags,
2607	struct used_address *used_address,
2608	unsigned int allowed_msghdr_flags)
2609	{
2610	struct sockaddr_storage address;
2611	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2612	ssize_t err;
2613
2614	msg_sys->msg_name = &address;
2615
2616	err = sendmsg_copy_msghdr(msg: msg_sys, umsg: msg, flags, iov: &iov);
2617	if (err < `0`)
2618	return err;
2619
2620	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2621	allowed_msghdr_flags);
2622	kfree(objp: iov);
2623	return err;
2624	}
2625
2626	/*
2627	* BSD sendmsg interface
2628	*/
2629	long __sys_sendmsg_sock(struct socket sock, struct* msghdr *msg,
2630	unsigned int flags)
2631	{
2632	return ____sys_sendmsg(sock, msg_sys: msg, flags, NULL, allowed_msghdr_flags: `0`);
2633	}
2634
2635	long __sys_sendmsg(int fd, struct user_msghdr __user msg, unsigned* int flags,
2636	bool forbid_cmsg_compat)
2637	{
2638	struct msghdr msg_sys;
2639	struct socket *sock;
2640
2641	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2642	return -EINVAL;
2643
2644	CLASS(fd, f)(fd);
2645
2646	if (fd_empty(f))
2647	return -EBADF;
2648	sock = sock_from_file(fd_file(f));
2649	if (unlikely(!sock))
2650	return -ENOTSOCK;
2651
2652	return ___sys_sendmsg(sock, msg, msg_sys: &msg_sys, flags, NULL, allowed_msghdr_flags: `0`);
2653	}
2654
2655	SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user , msg, unsigned* int, flags)
2656	{
2657	return __sys_sendmsg(fd, msg, flags, forbid_cmsg_compat: true);
2658	}
2659
2660	/*
2661	* Linux sendmmsg interface
2662	*/
2663
2664	int __sys_sendmmsg(int fd, struct mmsghdr __user mmsg, unsigned* int vlen,
2665	unsigned int flags, bool forbid_cmsg_compat)
2666	{
2667	int err, datagrams;
2668	struct socket *sock;
2669	struct mmsghdr __user *entry;
2670	struct compat_mmsghdr __user *compat_entry;
2671	struct msghdr msg_sys;
2672	struct used_address used_address;
2673	unsigned int oflags = flags;
2674
2675	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2676	return -EINVAL;
2677
2678	if (vlen > UIO_MAXIOV)
2679	vlen = UIO_MAXIOV;
2680
2681	datagrams = `0`;
2682
2683	CLASS(fd, f)(fd);
2684
2685	if (fd_empty(f))
2686	return -EBADF;
2687	sock = sock_from_file(fd_file(f));
2688	if (unlikely(!sock))
2689	return -ENOTSOCK;
2690
2691	used_address.name_len = UINT_MAX;
2692	entry = mmsg;
2693	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2694	err = `0`;
2695	flags \|= MSG_BATCH;
2696
2697	while (datagrams < vlen) {
2698	if (datagrams == vlen - `1`)
2699	flags = oflags;
2700
2701	if (MSG_CMSG_COMPAT & flags) {
2702	err = ___sys_sendmsg(sock, msg: (struct user_msghdr __user *)compat_entry,
2703	msg_sys: &msg_sys, flags, used_address: &used_address, MSG_EOR);
2704	if (err < `0`)
2705	break;
2706	err = __put_user(err, &compat_entry->msg_len);
2707	++compat_entry;
2708	} else {
2709	err = ___sys_sendmsg(sock,
2710	msg: (struct user_msghdr __user *)entry,
2711	msg_sys: &msg_sys, flags, used_address: &used_address, MSG_EOR);
2712	if (err < `0`)
2713	break;
2714	err = put_user(err, &entry->msg_len);
2715	++entry;
2716	}
2717
2718	if (err)
2719	break;
2720	++datagrams;
2721	if (msg_data_left(msg: &msg_sys))
2722	break;
2723	cond_resched();
2724	}
2725
2726	/ We only return an error if no datagrams were able to be sent /
2727	if (datagrams != `0`)
2728	return datagrams;
2729
2730	return err;
2731	}
2732
2733	SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2734	unsigned int, vlen, unsigned int, flags)
2735	{
2736	return __sys_sendmmsg(fd, mmsg, vlen, flags, forbid_cmsg_compat: true);
2737	}
2738
2739	static int recvmsg_copy_msghdr(struct msghdr *msg,
2740	struct user_msghdr __user umsg, unsigned* flags,
2741	struct sockaddr __user **uaddr,
2742	struct iovec **iov)
2743	{
2744	ssize_t err;
2745
2746	if (MSG_CMSG_COMPAT & flags) {
2747	struct compat_msghdr __user *msg_compat;
2748
2749	msg_compat = (struct compat_msghdr __user *) umsg;
2750	err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2751	} else {
2752	err = copy_msghdr_from_user(kmsg: msg, umsg, save_addr: uaddr, iov);
2753	}
2754	if (err < `0`)
2755	return err;
2756
2757	return `0`;
2758	}
2759
2760	static int ____sys_recvmsg(struct socket sock, struct* msghdr *msg_sys,
2761	struct user_msghdr __user *msg,
2762	struct sockaddr __user *uaddr,
2763	unsigned int flags, int nosec)
2764	{
2765	struct compat_msghdr __user *msg_compat =
2766	(struct compat_msghdr __user *) msg;
2767	int __user *uaddr_len = COMPAT_NAMELEN(msg);
2768	struct sockaddr_storage addr;
2769	unsigned long cmsg_ptr;
2770	int len;
2771	ssize_t err;
2772
2773	msg_sys->msg_name = &addr;
2774	cmsg_ptr = (unsigned long)msg_sys->msg_control;
2775	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC\|MSG_CMSG_COMPAT);
2776
2777	/ We assume all kernel code knows the size of sockaddr_storage /
2778	msg_sys->msg_namelen = `0`;
2779
2780	if (sock->file->f_flags & O_NONBLOCK)
2781	flags \|= MSG_DONTWAIT;
2782
2783	if (unlikely(nosec))
2784	err = sock_recvmsg_nosec(sock, msg: msg_sys, flags);
2785	else
2786	err = sock_recvmsg(sock, msg_sys, flags);
2787
2788	if (err < `0`)
2789	goto out;
2790	len = err;
2791
2792	if (uaddr != NULL) {
2793	err = move_addr_to_user(kaddr: &addr,
2794	klen: msg_sys->msg_namelen, uaddr,
2795	ulen: uaddr_len);
2796	if (err < `0`)
2797	goto out;
2798	}
2799	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2800	COMPAT_FLAGS(msg));
2801	if (err)
2802	goto out;
2803	if (MSG_CMSG_COMPAT & flags)
2804	err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2805	&msg_compat->msg_controllen);
2806	else
2807	err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2808	&msg->msg_controllen);
2809	if (err)
2810	goto out;
2811	err = len;
2812	out:
2813	return err;
2814	}
2815
2816	static int ___sys_recvmsg(struct socket sock, struct* user_msghdr __user *msg,
2817	struct msghdr msg_sys, unsigned* int flags, int nosec)
2818	{
2819	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2820	/ user mode address pointers /
2821	struct sockaddr __user *uaddr;
2822	ssize_t err;
2823
2824	err = recvmsg_copy_msghdr(msg: msg_sys, umsg: msg, flags, uaddr: &uaddr, iov: &iov);
2825	if (err < `0`)
2826	return err;
2827
2828	err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
2829	kfree(objp: iov);
2830	return err;
2831	}
2832
2833	/*
2834	* BSD recvmsg interface
2835	*/
2836
2837	long __sys_recvmsg_sock(struct socket sock, struct* msghdr *msg,
2838	struct user_msghdr __user *umsg,
2839	struct sockaddr __user uaddr, unsigned* int flags)
2840	{
2841	return ____sys_recvmsg(sock, msg_sys: msg, msg: umsg, uaddr, flags, nosec: `0`);
2842	}
2843
2844	long __sys_recvmsg(int fd, struct user_msghdr __user msg, unsigned* int flags,
2845	bool forbid_cmsg_compat)
2846	{
2847	struct msghdr msg_sys;
2848	struct socket *sock;
2849
2850	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2851	return -EINVAL;
2852
2853	CLASS(fd, f)(fd);
2854
2855	if (fd_empty(f))
2856	return -EBADF;
2857	sock = sock_from_file(fd_file(f));
2858	if (unlikely(!sock))
2859	return -ENOTSOCK;
2860
2861	return ___sys_recvmsg(sock, msg, msg_sys: &msg_sys, flags, nosec: `0`);
2862	}
2863
2864	SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2865	unsigned int, flags)
2866	{
2867	return __sys_recvmsg(fd, msg, flags, forbid_cmsg_compat: true);
2868	}
2869
2870	/*
2871	* Linux recvmmsg interface
2872	*/
2873
2874	static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2875	unsigned int vlen, unsigned int flags,
2876	struct timespec64 *timeout)
2877	{
2878	int err = `0`, datagrams;
2879	struct socket *sock;
2880	struct mmsghdr __user *entry;
2881	struct compat_mmsghdr __user *compat_entry;
2882	struct msghdr msg_sys;
2883	struct timespec64 end_time;
2884	struct timespec64 timeout64;
2885
2886	if (timeout &&
2887	poll_select_set_timeout(to: &end_time, sec: timeout->tv_sec,
2888	nsec: timeout->tv_nsec))
2889	return -EINVAL;
2890
2891	datagrams = `0`;
2892
2893	CLASS(fd, f)(fd);
2894
2895	if (fd_empty(f))
2896	return -EBADF;
2897	sock = sock_from_file(fd_file(f));
2898	if (unlikely(!sock))
2899	return -ENOTSOCK;
2900
2901	if (likely(!(flags & MSG_ERRQUEUE))) {
2902	err = sock_error(sk: sock->sk);
2903	if (err)
2904	return err;
2905	}
2906
2907	entry = mmsg;
2908	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2909
2910	while (datagrams < vlen) {
2911	/*
2912	* No need to ask LSM for more than the first datagram.
2913	*/
2914	if (MSG_CMSG_COMPAT & flags) {
2915	err = ___sys_recvmsg(sock, msg: (struct user_msghdr __user *)compat_entry,
2916	msg_sys: &msg_sys, flags: flags & ~MSG_WAITFORONE,
2917	nosec: datagrams);
2918	if (err < `0`)
2919	break;
2920	err = __put_user(err, &compat_entry->msg_len);
2921	++compat_entry;
2922	} else {
2923	err = ___sys_recvmsg(sock,
2924	msg: (struct user_msghdr __user *)entry,
2925	msg_sys: &msg_sys, flags: flags & ~MSG_WAITFORONE,
2926	nosec: datagrams);
2927	if (err < `0`)
2928	break;
2929	err = put_user(err, &entry->msg_len);
2930	++entry;
2931	}
2932
2933	if (err)
2934	break;
2935	++datagrams;
2936
2937	/ MSG_WAITFORONE turns on MSG_DONTWAIT after one packet /
2938	if (flags & MSG_WAITFORONE)
2939	flags \|= MSG_DONTWAIT;
2940
2941	if (timeout) {
2942	ktime_get_ts64(ts: &timeout64);
2943	*timeout = timespec64_sub(lhs: end_time, rhs: timeout64);
2944	if (timeout->tv_sec < `0`) {
2945	timeout->tv_sec = timeout->tv_nsec = `0`;
2946	break;
2947	}
2948
2949	/ Timeout, return less than vlen datagrams /
2950	if (timeout->tv_nsec == `0` && timeout->tv_sec == `0`)
2951	break;
2952	}
2953
2954	/ Out of band data, return right away /
2955	if (msg_sys.msg_flags & MSG_OOB)
2956	break;
2957	cond_resched();
2958	}
2959
2960	if (err == `0`)
2961	return datagrams;
2962
2963	if (datagrams == `0`)
2964	return err;
2965
2966	/*
2967	* We may return less entries than requested (vlen) if the
2968	* sock is non block and there aren't enough datagrams...
2969	*/
2970	if (err != -EAGAIN) {
2971	/*
2972	* ... or if recvmsg returns an error after we
2973	* received some datagrams, where we record the
2974	* error to return on the next call or if the
2975	* app asks about it using getsockopt(SO_ERROR).
2976	*/
2977	WRITE_ONCE(sock->sk->sk_err, -err);
2978	}
2979	return datagrams;
2980	}
2981
2982	int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2983	unsigned int vlen, unsigned int flags,
2984	struct __kernel_timespec __user *timeout,
2985	struct old_timespec32 __user *timeout32)
2986	{
2987	int datagrams;
2988	struct timespec64 timeout_sys;
2989
2990	if (timeout && get_timespec64(ts: &timeout_sys, uts: timeout))
2991	return -EFAULT;
2992
2993	if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
2994	return -EFAULT;
2995
2996	if (!timeout && !timeout32)
2997	return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2998
2999	datagrams = do_recvmmsg(fd, mmsg, vlen, flags, timeout: &timeout_sys);
3000
3001	if (datagrams <= `0`)
3002	return datagrams;
3003
3004	if (timeout && put_timespec64(ts: &timeout_sys, uts: timeout))
3005	datagrams = -EFAULT;
3006
3007	if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
3008	datagrams = -EFAULT;
3009
3010	return datagrams;
3011	}
3012
3013	SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3014	unsigned int, vlen, unsigned int, flags,
3015	struct __kernel_timespec __user *, timeout)
3016	{
3017	if (flags & MSG_CMSG_COMPAT)
3018	return -EINVAL;
3019
3020	return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3021	}
3022
3023	#ifdef CONFIG_COMPAT_32BIT_TIME
3024	SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3025	unsigned int, vlen, unsigned int, flags,
3026	struct old_timespec32 __user *, timeout)
3027	{
3028	if (flags & MSG_CMSG_COMPAT)
3029	return -EINVAL;
3030
3031	return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout32: timeout);
3032	}
3033	#endif
3034
3035	#ifdef __ARCH_WANT_SYS_SOCKETCALL
3036	/ Argument list sizes for sys_socketcall /
3037	#define AL(x) ((x) * sizeof(unsigned long))
3038	static const unsigned char nargs[`21`] = {
3039	AL(`0`), AL(`3`), AL(`3`), AL(`3`), AL(`2`), AL(`3`),
3040	AL(`3`), AL(`3`), AL(`4`), AL(`4`), AL(`4`), AL(`6`),
3041	AL(`6`), AL(`2`), AL(`5`), AL(`5`), AL(`3`), AL(`3`),
3042	AL(`4`), AL(`5`), AL(`4`)
3043	};
3044
3045	#undef AL
3046
3047	/*
3048	* System call vectors.
3049	*
3050	* Argument checking cleaned up. Saved 20% in size.
3051	* This function doesn't need to set the kernel lock because
3052	* it is set by the callees.
3053	*/
3054
3055	SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
3056	{
3057	unsigned long a[AUDITSC_ARGS];
3058	unsigned long a0, a1;
3059	int err;
3060	unsigned int len;
3061
3062	if (call < `1` \|\| call > SYS_SENDMMSG)
3063	return -EINVAL;
3064	call = array_index_nospec(call, SYS_SENDMMSG + `1`);
3065
3066	len = nargs[call];
3067	if (len > sizeof(a))
3068	return -EINVAL;
3069
3070	/ copy_from_user should be SMP safe. /
3071	if (copy_from_user(to: a, from: args, n: len))
3072	return -EFAULT;
3073
3074	err = audit_socketcall(nargs: nargs[call] / sizeof(unsigned long), args: a);
3075	if (err)
3076	return err;
3077
3078	a0 = a[`0`];
3079	a1 = a[`1`];
3080
3081	switch (call) {
3082	case SYS_SOCKET:
3083	err = __sys_socket(family: a0, type: a1, protocol: a[`2`]);
3084	break;
3085	case SYS_BIND:
3086	err = __sys_bind(fd: a0, umyaddr: (struct sockaddr __user *)a1, addrlen: a[`2`]);
3087	break;
3088	case SYS_CONNECT:
3089	err = __sys_connect(fd: a0, uservaddr: (struct sockaddr __user *)a1, addrlen: a[`2`]);
3090	break;
3091	case SYS_LISTEN:
3092	err = __sys_listen(fd: a0, backlog: a1);
3093	break;
3094	case SYS_ACCEPT:
3095	err = __sys_accept4(fd: a0, upeer_sockaddr: (struct sockaddr __user *)a1,
3096	upeer_addrlen: (int __user *)a[`2`], flags: `0`);
3097	break;
3098	case SYS_GETSOCKNAME:
3099	err =
3100	__sys_getsockname(fd: a0, usockaddr: (struct sockaddr __user *)a1,
3101	usockaddr_len: (int __user *)a[`2`]);
3102	break;
3103	case SYS_GETPEERNAME:
3104	err =
3105	__sys_getpeername(fd: a0, usockaddr: (struct sockaddr __user *)a1,
3106	usockaddr_len: (int __user *)a[`2`]);
3107	break;
3108	case SYS_SOCKETPAIR:
3109	err = __sys_socketpair(family: a0, type: a1, protocol: a[`2`], usockvec: (int __user *)a[`3`]);
3110	break;
3111	case SYS_SEND:
3112	err = __sys_sendto(fd: a0, buff: (void __user *)a1, len: a[`2`], flags: a[`3`],
3113	NULL, addr_len: `0`);
3114	break;
3115	case SYS_SENDTO:
3116	err = __sys_sendto(fd: a0, buff: (void __user *)a1, len: a[`2`], flags: a[`3`],
3117	addr: (struct sockaddr __user *)a[`4`], addr_len: a[`5`]);
3118	break;
3119	case SYS_RECV:
3120	err = __sys_recvfrom(fd: a0, ubuf: (void __user *)a1, size: a[`2`], flags: a[`3`],
3121	NULL, NULL);
3122	break;
3123	case SYS_RECVFROM:
3124	err = __sys_recvfrom(fd: a0, ubuf: (void __user *)a1, size: a[`2`], flags: a[`3`],
3125	addr: (struct sockaddr __user *)a[`4`],
3126	addr_len: (int __user *)a[`5`]);
3127	break;
3128	case SYS_SHUTDOWN:
3129	err = __sys_shutdown(fd: a0, how: a1);
3130	break;
3131	case SYS_SETSOCKOPT:
3132	err = __sys_setsockopt(fd: a0, level: a1, optname: a[`2`], user_optval: (char __user *)a[`3`],
3133	optlen: a[`4`]);
3134	break;
3135	case SYS_GETSOCKOPT:
3136	err =
3137	__sys_getsockopt(fd: a0, level: a1, optname: a[`2`], optval: (char __user *)a[`3`],
3138	optlen: (int __user *)a[`4`]);
3139	break;
3140	case SYS_SENDMSG:
3141	err = __sys_sendmsg(fd: a0, msg: (struct user_msghdr __user *)a1,
3142	flags: a[`2`], forbid_cmsg_compat: true);
3143	break;
3144	case SYS_SENDMMSG:
3145	err = __sys_sendmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1, vlen: a[`2`],
3146	flags: a[`3`], forbid_cmsg_compat: true);
3147	break;
3148	case SYS_RECVMSG:
3149	err = __sys_recvmsg(fd: a0, msg: (struct user_msghdr __user *)a1,
3150	flags: a[`2`], forbid_cmsg_compat: true);
3151	break;
3152	case SYS_RECVMMSG:
3153	if (IS_ENABLED(CONFIG_64BIT))
3154	err = __sys_recvmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1,
3155	vlen: a[`2`], flags: a[`3`],
3156	timeout: (struct __kernel_timespec __user *)a[`4`],
3157	NULL);
3158	else
3159	err = __sys_recvmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1,
3160	vlen: a[`2`], flags: a[`3`], NULL,
3161	timeout32: (struct old_timespec32 __user *)a[`4`]);
3162	break;
3163	case SYS_ACCEPT4:
3164	err = __sys_accept4(fd: a0, upeer_sockaddr: (struct sockaddr __user *)a1,
3165	upeer_addrlen: (int __user *)a[`2`], flags: a[`3`]);
3166	break;
3167	default:
3168	err = -EINVAL;
3169	break;
3170	}
3171	return err;
3172	}
3173
3174	#endif /* __ARCH_WANT_SYS_SOCKETCALL */
3175
3176	/**
3177	* sock_register - add a socket protocol handler
3178	* @ops: description of protocol
3179	*
3180	* This function is called by a protocol handler that wants to
3181	* advertise its address family, and have it linked into the
3182	* socket interface. The value ops->family corresponds to the
3183	* socket system call protocol family.
3184	*/
3185	int sock_register(const struct net_proto_family *ops)
3186	{
3187	int err;
3188
3189	if (ops->family >= NPROTO) {
3190	pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
3191	return -ENOBUFS;
3192	}
3193
3194	spin_lock(lock: &net_family_lock);
3195	if (rcu_dereference_protected(net_families[ops->family],
3196	lockdep_is_held(&net_family_lock)))
3197	err = -EEXIST;
3198	else {
3199	rcu_assign_pointer(net_families[ops->family], ops);
3200	err = `0`;
3201	}
3202	spin_unlock(lock: &net_family_lock);
3203
3204	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
3205	return err;
3206	}
3207	EXPORT_SYMBOL(sock_register);
3208
3209	/**
3210	* sock_unregister - remove a protocol handler
3211	* @family: protocol family to remove
3212	*
3213	* This function is called by a protocol handler that wants to
3214	* remove its address family, and have it unlinked from the
3215	* new socket creation.
3216	*
3217	* If protocol handler is a module, then it can use module reference
3218	* counts to protect against new references. If protocol handler is not
3219	* a module then it needs to provide its own protection in
3220	* the ops->create routine.
3221	*/
3222	void sock_unregister(int family)
3223	{
3224	BUG_ON(family < `0` \|\| family >= NPROTO);
3225
3226	spin_lock(lock: &net_family_lock);
3227	RCU_INIT_POINTER(net_families[family], NULL);
3228	spin_unlock(lock: &net_family_lock);
3229
3230	synchronize_rcu();
3231
3232	pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
3233	}
3234	EXPORT_SYMBOL(sock_unregister);
3235
3236	bool sock_is_registered(int family)
3237	{
3238	return family < NPROTO && rcu_access_pointer(net_families[family]);
3239	}
3240
3241	static int __init sock_init(void)
3242	{
3243	int err;
3244	/*
3245	* Initialize the network sysctl infrastructure.
3246	*/
3247	err = net_sysctl_init();
3248	if (err)
3249	goto out;
3250
3251	/*
3252	* Initialize skbuff SLAB cache
3253	*/
3254	skb_init();
3255
3256	/*
3257	* Initialize the protocols module.
3258	*/
3259
3260	init_inodecache();
3261
3262	err = register_filesystem(&sock_fs_type);
3263	if (err)
3264	goto out;
3265	sock_mnt = kern_mount(&sock_fs_type);
3266	if (IS_ERR(ptr: sock_mnt)) {
3267	err = PTR_ERR(ptr: sock_mnt);
3268	goto out_mount;
3269	}
3270
3271	/ The real protocol initialization is performed in later initcalls.*
3272	*/
3273
3274	#ifdef CONFIG_NETFILTER
3275	err = netfilter_init();
3276	if (err)
3277	goto out;
3278	#endif
3279
3280	ptp_classifier_init();
3281
3282	out:
3283	return err;
3284
3285	out_mount:
3286	unregister_filesystem(&sock_fs_type);
3287	goto out;
3288	}
3289
3290	core_initcall(sock_init); / early initcall /
3291
3292	#ifdef CONFIG_PROC_FS
3293	void socket_seq_show(struct seq_file *seq)
3294	{
3295	seq_printf(m: seq, fmt: "sockets: used %d\n",
3296	sock_inuse_get(net: seq->private));
3297	}
3298	#endif /* CONFIG_PROC_FS */
3299
3300	/ Handle the fact that while struct ifreq has the same layout on*
3301	* 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3302	* which are handled elsewhere, it still has different size due to
3303	* ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3304	* resulting in struct ifreq being 32 and 40 bytes respectively).
3305	* As a result, if the struct happens to be at the end of a page and
3306	* the next page isn't readable/writable, we get a fault. To prevent
3307	* that, copy back and forth to the full size.
3308	*/
3309	int get_user_ifreq(struct ifreq ifr, void* __user *ifrdata, void* __user *arg)
3310	{
3311	if (in_compat_syscall()) {
3312	struct compat_ifreq ifr32 = (struct* compat_ifreq *)ifr;
3313
3314	memset(ifr, `0`, sizeof(*ifr));
3315	if (copy_from_user(to: ifr32, from: arg, n: sizeof(*ifr32)))
3316	return -EFAULT;
3317
3318	if (ifrdata)
3319	*ifrdata = compat_ptr(uptr: ifr32->ifr_data);
3320
3321	return `0`;
3322	}
3323
3324	if (copy_from_user(to: ifr, from: arg, n: sizeof(*ifr)))
3325	return -EFAULT;
3326
3327	if (ifrdata)
3328	*ifrdata = ifr->ifr_data;
3329
3330	return `0`;
3331	}
3332	EXPORT_SYMBOL(get_user_ifreq);
3333
3334	int put_user_ifreq(struct ifreq ifr, void* __user *arg)
3335	{
3336	size_t size = sizeof(*ifr);
3337
3338	if (in_compat_syscall())
3339	size = sizeof(struct compat_ifreq);
3340
3341	if (copy_to_user(to: arg, from: ifr, n: size))
3342	return -EFAULT;
3343
3344	return `0`;
3345	}
3346	EXPORT_SYMBOL(put_user_ifreq);
3347
3348	#ifdef CONFIG_COMPAT
3349	static int compat_siocwandev(struct net net, struct* compat_ifreq __user *uifr32)
3350	{
3351	compat_uptr_t uptr32;
3352	struct ifreq ifr;
3353	void __user *saved;
3354	int err;
3355
3356	if (get_user_ifreq(&ifr, NULL, uifr32))
3357	return -EFAULT;
3358
3359	if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3360	return -EFAULT;
3361
3362	saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3363	ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr: uptr32);
3364
3365	err = dev_ioctl(net, SIOCWANDEV, ifr: &ifr, NULL, NULL);
3366	if (!err) {
3367	ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3368	if (put_user_ifreq(&ifr, uifr32))
3369	err = -EFAULT;
3370	}
3371	return err;
3372	}
3373
3374	/ Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted /
3375	static int compat_ifr_data_ioctl(struct net net, unsigned* int cmd,
3376	struct compat_ifreq __user *u_ifreq32)
3377	{
3378	struct ifreq ifreq;
3379	void __user *data;
3380
3381	if (!is_socket_ioctl_cmd(cmd))
3382	return -ENOTTY;
3383	if (get_user_ifreq(&ifreq, &data, u_ifreq32))
3384	return -EFAULT;
3385	ifreq.ifr_data = data;
3386
3387	return dev_ioctl(net, cmd, ifr: &ifreq, data, NULL);
3388	}
3389
3390	static int compat_sock_ioctl_trans(struct file file, struct* socket *sock,
3391	unsigned int cmd, unsigned long arg)
3392	{
3393	void __user *argp = compat_ptr(uptr: arg);
3394	struct sock *sk = sock->sk;
3395	struct net *net = sock_net(sk);
3396	const struct proto_ops *ops;
3397
3398	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + `15`))
3399	return sock_ioctl(file, cmd, arg: (unsigned long)argp);
3400
3401	switch (cmd) {
3402	case SIOCWANDEV:
3403	return compat_siocwandev(net, uifr32: argp);
3404	case SIOCGSTAMP_OLD:
3405	case SIOCGSTAMPNS_OLD:
3406	ops = READ_ONCE(sock->ops);
3407	if (!ops->gettstamp)
3408	return -ENOIOCTLCMD;
3409	return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3410	!COMPAT_USE_64BIT_TIME);
3411
3412	case SIOCETHTOOL:
3413	case SIOCBONDSLAVEINFOQUERY:
3414	case SIOCBONDINFOQUERY:
3415	case SIOCSHWTSTAMP:
3416	case SIOCGHWTSTAMP:
3417	return compat_ifr_data_ioctl(net, cmd, u_ifreq32: argp);
3418
3419	case FIOSETOWN:
3420	case SIOCSPGRP:
3421	case FIOGETOWN:
3422	case SIOCGPGRP:
3423	case SIOCBRADDBR:
3424	case SIOCBRDELBR:
3425	case SIOCBRADDIF:
3426	case SIOCBRDELIF:
3427	case SIOCGIFVLAN:
3428	case SIOCSIFVLAN:
3429	case SIOCGSKNS:
3430	case SIOCGSTAMP_NEW:
3431	case SIOCGSTAMPNS_NEW:
3432	case SIOCGIFCONF:
3433	case SIOCSIFBR:
3434	case SIOCGIFBR:
3435	return sock_ioctl(file, cmd, arg);
3436
3437	case SIOCGIFFLAGS:
3438	case SIOCSIFFLAGS:
3439	case SIOCGIFMAP:
3440	case SIOCSIFMAP:
3441	case SIOCGIFMETRIC:
3442	case SIOCSIFMETRIC:
3443	case SIOCGIFMTU:
3444	case SIOCSIFMTU:
3445	case SIOCGIFMEM:
3446	case SIOCSIFMEM:
3447	case SIOCGIFHWADDR:
3448	case SIOCSIFHWADDR:
3449	case SIOCADDMULTI:
3450	case SIOCDELMULTI:
3451	case SIOCGIFINDEX:
3452	case SIOCGIFADDR:
3453	case SIOCSIFADDR:
3454	case SIOCSIFHWBROADCAST:
3455	case SIOCDIFADDR:
3456	case SIOCGIFBRDADDR:
3457	case SIOCSIFBRDADDR:
3458	case SIOCGIFDSTADDR:
3459	case SIOCSIFDSTADDR:
3460	case SIOCGIFNETMASK:
3461	case SIOCSIFNETMASK:
3462	case SIOCSIFPFLAGS:
3463	case SIOCGIFPFLAGS:
3464	case SIOCGIFTXQLEN:
3465	case SIOCSIFTXQLEN:
3466	case SIOCGIFNAME:
3467	case SIOCSIFNAME:
3468	case SIOCGMIIPHY:
3469	case SIOCGMIIREG:
3470	case SIOCSMIIREG:
3471	case SIOCBONDENSLAVE:
3472	case SIOCBONDRELEASE:
3473	case SIOCBONDSETHWADDR:
3474	case SIOCBONDCHANGEACTIVE:
3475	case SIOCSARP:
3476	case SIOCGARP:
3477	case SIOCDARP:
3478	case SIOCOUTQ:
3479	case SIOCOUTQNSD:
3480	case SIOCATMARK:
3481	return sock_do_ioctl(net, sock, cmd, arg);
3482	}
3483
3484	return -ENOIOCTLCMD;
3485	}
3486
3487	static long compat_sock_ioctl(struct file file, unsigned* int cmd,
3488	unsigned long arg)
3489	{
3490	struct socket *sock = file->private_data;
3491	const struct proto_ops *ops = READ_ONCE(sock->ops);
3492	int ret = -ENOIOCTLCMD;
3493	struct sock *sk;
3494	struct net *net;
3495
3496	sk = sock->sk;
3497	net = sock_net(sk);
3498
3499	if (ops->compat_ioctl)
3500	ret = ops->compat_ioctl(sock, cmd, arg);
3501
3502	if (ret == -ENOIOCTLCMD &&
3503	(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3504	ret = compat_wext_handle_ioctl(net, cmd, arg);
3505
3506	if (ret == -ENOIOCTLCMD)
3507	ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3508
3509	return ret;
3510	}
3511	#endif
3512
3513	/**
3514	* kernel_bind - bind an address to a socket (kernel space)
3515	* @sock: socket
3516	* @addr: address
3517	* @addrlen: length of address
3518	*
3519	* Returns 0 or an error.
3520	*/
3521
3522	int kernel_bind(struct socket sock, struct* sockaddr addr, int* addrlen)
3523	{
3524	struct sockaddr_storage address;
3525
3526	memcpy(&address, addr, addrlen);
3527
3528	return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3529	addrlen);
3530	}
3531	EXPORT_SYMBOL(kernel_bind);
3532
3533	/**
3534	* kernel_listen - move socket to listening state (kernel space)
3535	* @sock: socket
3536	* @backlog: pending connections queue size
3537	*
3538	* Returns 0 or an error.
3539	*/
3540
3541	int kernel_listen(struct socket sock, int* backlog)
3542	{
3543	return READ_ONCE(sock->ops)->listen(sock, backlog);
3544	}
3545	EXPORT_SYMBOL(kernel_listen);
3546
3547	/**
3548	* kernel_accept - accept a connection (kernel space)
3549	* @sock: listening socket
3550	* @newsock: new connected socket
3551	* @flags: flags
3552	*
3553	* @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3554	* If it fails, @newsock is guaranteed to be %NULL.
3555	* Returns 0 or an error.
3556	*/
3557
3558	int kernel_accept(struct socket sock, struct* socket *newsock, int* flags)
3559	{
3560	struct sock *sk = sock->sk;
3561	const struct proto_ops *ops = READ_ONCE(sock->ops);
3562	struct proto_accept_arg arg = {
3563	.flags = flags,
3564	.kern = true,
3565	};
3566	int err;
3567
3568	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3569	newsock);
3570	if (err < `0`)
3571	goto done;
3572
3573	err = ops->accept(sock, *newsock, &arg);
3574	if (err < `0`) {
3575	sock_release(*newsock);
3576	*newsock = NULL;
3577	goto done;
3578	}
3579
3580	(*newsock)->ops = ops;
3581	__module_get(module: ops->owner);
3582
3583	done:
3584	return err;
3585	}
3586	EXPORT_SYMBOL(kernel_accept);
3587
3588	/**
3589	* kernel_connect - connect a socket (kernel space)
3590	* @sock: socket
3591	* @addr: address
3592	* @addrlen: address length
3593	* @flags: flags (O_NONBLOCK, ...)
3594	*
3595	* For datagram sockets, @addr is the address to which datagrams are sent
3596	* by default, and the only address from which datagrams are received.
3597	* For stream sockets, attempts to connect to @addr.
3598	* Returns 0 or an error code.
3599	*/
3600
3601	int kernel_connect(struct socket sock, struct* sockaddr addr, int* addrlen,
3602	int flags)
3603	{
3604	struct sockaddr_storage address;
3605
3606	memcpy(&address, addr, addrlen);
3607
3608	return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3609	addrlen, flags);
3610	}
3611	EXPORT_SYMBOL(kernel_connect);
3612
3613	/**
3614	* kernel_getsockname - get the address which the socket is bound (kernel space)
3615	* @sock: socket
3616	* @addr: address holder
3617	*
3618	* Fills the @addr pointer with the address which the socket is bound.
3619	* Returns the length of the address in bytes or an error code.
3620	*/
3621
3622	int kernel_getsockname(struct socket sock, struct* sockaddr *addr)
3623	{
3624	return READ_ONCE(sock->ops)->getname(sock, addr, `0`);
3625	}
3626	EXPORT_SYMBOL(kernel_getsockname);
3627
3628	/**
3629	* kernel_getpeername - get the address which the socket is connected (kernel space)
3630	* @sock: socket
3631	* @addr: address holder
3632	*
3633	* Fills the @addr pointer with the address which the socket is connected.
3634	* Returns the length of the address in bytes or an error code.
3635	*/
3636
3637	int kernel_getpeername(struct socket sock, struct* sockaddr *addr)
3638	{
3639	return READ_ONCE(sock->ops)->getname(sock, addr, `1`);
3640	}
3641	EXPORT_SYMBOL(kernel_getpeername);
3642
3643	/**
3644	* kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
3645	* @sock: socket
3646	* @how: connection part
3647	*
3648	* Returns 0 or an error.
3649	*/
3650
3651	int kernel_sock_shutdown(struct socket sock, enum* sock_shutdown_cmd how)
3652	{
3653	return READ_ONCE(sock->ops)->shutdown(sock, how);
3654	}
3655	EXPORT_SYMBOL(kernel_sock_shutdown);
3656
3657	/**
3658	* kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3659	* @sk: socket
3660	*
3661	* This routine returns the IP overhead imposed by a socket i.e.
3662	* the length of the underlying IP header, depending on whether
3663	* this is an IPv4 or IPv6 socket and the length from IP options turned
3664	* on at the socket. Assumes that the caller has a lock on the socket.
3665	*/
3666
3667	u32 kernel_sock_ip_overhead(struct sock *sk)
3668	{
3669	struct inet_sock *inet;
3670	struct ip_options_rcu *opt;
3671	u32 overhead = `0`;
3672	#if IS_ENABLED(CONFIG_IPV6)
3673	struct ipv6_pinfo *np;
3674	struct ipv6_txoptions *optv6 = NULL;
3675	#endif /* IS_ENABLED(CONFIG_IPV6) */
3676
3677	if (!sk)
3678	return overhead;
3679
3680	switch (sk->sk_family) {
3681	case AF_INET:
3682	inet = inet_sk(sk);
3683	overhead += sizeof(struct iphdr);
3684	opt = rcu_dereference_protected(inet->inet_opt,
3685	sock_owned_by_user(sk));
3686	if (opt)
3687	overhead += opt->opt.optlen;
3688	return overhead;
3689	#if IS_ENABLED(CONFIG_IPV6)
3690	case AF_INET6:
3691	np = inet6_sk(sk: sk);
3692	overhead += sizeof(struct ipv6hdr);
3693	if (np)
3694	optv6 = rcu_dereference_protected(np->opt,
3695	sock_owned_by_user(sk));
3696	if (optv6)
3697	overhead += (optv6->opt_flen + optv6->opt_nflen);
3698	return overhead;
3699	#endif /* IS_ENABLED(CONFIG_IPV6) */
3700	default: / Returns 0 overhead if the socket is not ipv4 or ipv6 /
3701	return overhead;
3702	}
3703	}
3704	EXPORT_SYMBOL(kernel_sock_ip_overhead);
3705

source code of linux/net/socket.c