filter.c source code [linux/net/core/filter.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Linux Socket Filter - Kernel level socket filtering
4	*
5	* Based on the design of the Berkeley Packet Filter. The new
6	* internal format has been designed by PLUMgrid:
7	*
8	* Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9	*
10	* Authors:
11	*
12	* Jay Schulist <jschlst@samba.org>
13	* Alexei Starovoitov <ast@plumgrid.com>
14	* Daniel Borkmann <dborkman@redhat.com>
15	*
16	* Andi Kleen - Fix a few bad bugs and races.
17	* Kris Katterjohn - Added many additional checks in bpf_check_classic()
18	*/
19
20	#include <linux/atomic.h>
21	#include <linux/bpf_verifier.h>
22	#include <linux/module.h>
23	#include <linux/types.h>
24	#include <linux/mm.h>
25	#include <linux/fcntl.h>
26	#include <linux/socket.h>
27	#include <linux/sock_diag.h>
28	#include <linux/in.h>
29	#include <linux/inet.h>
30	#include <linux/netdevice.h>
31	#include <linux/if_packet.h>
32	#include <linux/if_arp.h>
33	#include <linux/gfp.h>
34	#include <net/inet_common.h>
35	#include <net/ip.h>
36	#include <net/protocol.h>
37	#include <net/netlink.h>
38	#include <linux/skbuff.h>
39	#include <linux/skmsg.h>
40	#include <net/sock.h>
41	#include <net/flow_dissector.h>
42	#include <linux/errno.h>
43	#include <linux/timer.h>
44	#include <linux/uaccess.h>
45	#include <linux/unaligned.h>
46	#include <linux/filter.h>
47	#include <linux/ratelimit.h>
48	#include <linux/seccomp.h>
49	#include <linux/if_vlan.h>
50	#include <linux/bpf.h>
51	#include <linux/btf.h>
52	#include <net/sch_generic.h>
53	#include <net/cls_cgroup.h>
54	#include <net/dst_metadata.h>
55	#include <net/dst.h>
56	#include <net/sock_reuseport.h>
57	#include <net/busy_poll.h>
58	#include <net/tcp.h>
59	#include <net/xfrm.h>
60	#include <net/udp.h>
61	#include <linux/bpf_trace.h>
62	#include <net/xdp_sock.h>
63	#include <linux/inetdevice.h>
64	#include <net/inet_hashtables.h>
65	#include <net/inet6_hashtables.h>
66	#include <net/ip_fib.h>
67	#include <net/nexthop.h>
68	#include <net/flow.h>
69	#include <net/arp.h>
70	#include <net/ipv6.h>
71	#include <net/net_namespace.h>
72	#include <linux/seg6_local.h>
73	#include <net/seg6.h>
74	#include <net/seg6_local.h>
75	#include <net/lwtunnel.h>
76	#include <net/ipv6_stubs.h>
77	#include <net/bpf_sk_storage.h>
78	#include <net/transp_v6.h>
79	#include <linux/btf_ids.h>
80	#include <net/tls.h>
81	#include <net/xdp.h>
82	#include <net/mptcp.h>
83	#include <net/netfilter/nf_conntrack_bpf.h>
84	#include <net/netkit.h>
85	#include <linux/un.h>
86	#include <net/xdp_sock_drv.h>
87	#include <net/inet_dscp.h>
88
89	#include "dev.h"
90
91	/ Keep the struct bpf_fib_lookup small so that it fits into a cacheline /
92	static_assert(sizeof(struct bpf_fib_lookup) == `64`, "struct bpf_fib_lookup size check");
93
94	static const struct bpf_func_proto *
95	bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
96
97	int copy_bpf_fprog_from_user(struct sock_fprog dst, sockptr_t src, int* len)
98	{
99	if (in_compat_syscall()) {
100	struct compat_sock_fprog f32;
101
102	if (len != sizeof(f32))
103	return -EINVAL;
104	if (copy_from_sockptr(dst: &f32, src, size: sizeof(f32)))
105	return -EFAULT;
106	memset(dst, `0`, sizeof(*dst));
107	dst->len = f32.len;
108	dst->filter = compat_ptr(uptr: f32.filter);
109	} else {
110	if (len != sizeof(*dst))
111	return -EINVAL;
112	if (copy_from_sockptr(dst, src, size: sizeof(*dst)))
113	return -EFAULT;
114	}
115
116	return `0`;
117	}
118	EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
119
120	/**
121	* sk_filter_trim_cap - run a packet through a socket filter
122	* @sk: sock associated with &sk_buff
123	* @skb: buffer to filter
124	* @cap: limit on how short the eBPF program may trim the packet
125	*
126	* Run the eBPF program and then cut skb->data to correct size returned by
127	* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
128	* than pkt_len we keep whole skb->data. This is the socket level
129	* wrapper to bpf_prog_run. It returns 0 if the packet should
130	* be accepted or -EPERM if the packet should be tossed.
131	*
132	*/
133	int sk_filter_trim_cap(struct sock sk, struct* sk_buff skb, unsigned* int cap)
134	{
135	int err;
136	struct sk_filter *filter;
137
138	/*
139	* If the skb was allocated from pfmemalloc reserves, only
140	* allow SOCK_MEMALLOC sockets to use it as this socket is
141	* helping free memory
142	*/
143	if (skb_pfmemalloc(skb) && !sock_flag(sk, flag: SOCK_MEMALLOC)) {
144	NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
145	return -ENOMEM;
146	}
147	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
148	if (err)
149	return err;
150
151	err = security_sock_rcv_skb(sk, skb);
152	if (err)
153	return err;
154
155	rcu_read_lock();
156	filter = rcu_dereference(sk->sk_filter);
157	if (filter) {
158	struct sock *save_sk = skb->sk;
159	unsigned int pkt_len;
160
161	skb->sk = sk;
162	pkt_len = bpf_prog_run_save_cb(prog: filter->prog, skb);
163	skb->sk = save_sk;
164	err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
165	}
166	rcu_read_unlock();
167
168	return err;
169	}
170	EXPORT_SYMBOL(sk_filter_trim_cap);
171
172	BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
173	{
174	return skb_get_poff(skb);
175	}
176
177	BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
178	{
179	struct nlattr *nla;
180
181	if (skb_is_nonlinear(skb))
182	return `0`;
183
184	if (skb->len < sizeof(struct nlattr))
185	return `0`;
186
187	if (a > skb->len - sizeof(struct nlattr))
188	return `0`;
189
190	nla = nla_find(head: (struct nlattr *) &skb->data[a], len: skb->len - a, attrtype: x);
191	if (nla)
192	return (void ) nla - (void* *) skb->data;
193
194	return `0`;
195	}
196
197	BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
198	{
199	struct nlattr *nla;
200
201	if (skb_is_nonlinear(skb))
202	return `0`;
203
204	if (skb->len < sizeof(struct nlattr))
205	return `0`;
206
207	if (a > skb->len - sizeof(struct nlattr))
208	return `0`;
209
210	nla = (struct nlattr *) &skb->data[a];
211	if (!nla_ok(nla, remaining: skb->len - a))
212	return `0`;
213
214	nla = nla_find_nested(nla, attrtype: x);
215	if (nla)
216	return (void ) nla - (void* *) skb->data;
217
218	return `0`;
219	}
220
221	static int bpf_skb_load_helper_convert_offset(const struct sk_buff skb, int* offset)
222	{
223	if (likely(offset >= `0`))
224	return offset;
225
226	if (offset >= SKF_NET_OFF)
227	return offset - SKF_NET_OFF + skb_network_offset(skb);
228
229	if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
230	return offset - SKF_LL_OFF + skb_mac_offset(skb);
231
232	return INT_MIN;
233	}
234
235	BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff , skb, const* void *,
236	data, int, headlen, int, offset)
237	{
238	u8 tmp;
239	const int len = sizeof(tmp);
240
241	offset = bpf_skb_load_helper_convert_offset(skb, offset);
242	if (offset == INT_MIN)
243	return -EFAULT;
244
245	if (headlen - offset >= len)
246	return (u8 )(data + offset);
247	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
248	return tmp;
249	else
250	return -EFAULT;
251	}
252
253	BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
254	int, offset)
255	{
256	return ____bpf_skb_load_helper_8(skb, data: skb->data, headlen: skb->len - skb->data_len,
257	offset);
258	}
259
260	BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff , skb, const* void *,
261	data, int, headlen, int, offset)
262	{
263	__be16 tmp;
264	const int len = sizeof(tmp);
265
266	offset = bpf_skb_load_helper_convert_offset(skb, offset);
267	if (offset == INT_MIN)
268	return -EFAULT;
269
270	if (headlen - offset >= len)
271	return get_unaligned_be16(p: data + offset);
272	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
273	return be16_to_cpu(tmp);
274	else
275	return -EFAULT;
276	}
277
278	BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
279	int, offset)
280	{
281	return ____bpf_skb_load_helper_16(skb, data: skb->data, headlen: skb->len - skb->data_len,
282	offset);
283	}
284
285	BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff , skb, const* void *,
286	data, int, headlen, int, offset)
287	{
288	__be32 tmp;
289	const int len = sizeof(tmp);
290
291	offset = bpf_skb_load_helper_convert_offset(skb, offset);
292	if (offset == INT_MIN)
293	return -EFAULT;
294
295	if (headlen - offset >= len)
296	return get_unaligned_be32(p: data + offset);
297	if (!skb_copy_bits(skb, offset, to: &tmp, len: sizeof(tmp)))
298	return be32_to_cpu(tmp);
299	else
300	return -EFAULT;
301	}
302
303	BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
304	int, offset)
305	{
306	return ____bpf_skb_load_helper_32(skb, data: skb->data, headlen: skb->len - skb->data_len,
307	offset);
308	}
309
310	static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
311	struct bpf_insn *insn_buf)
312	{
313	struct bpf_insn *insn = insn_buf;
314
315	switch (skb_field) {
316	case SKF_AD_MARK:
317	BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != `4`);
318
319	*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
320	offsetof(struct sk_buff, mark));
321	break;
322
323	case SKF_AD_PKTTYPE:
324	*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET);
325	*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
326	#ifdef __BIG_ENDIAN_BITFIELD
327	*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, `5`);
328	#endif
329	break;
330
331	case SKF_AD_QUEUE:
332	BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != `2`);
333
334	*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
335	offsetof(struct sk_buff, queue_mapping));
336	break;
337
338	case SKF_AD_VLAN_TAG:
339	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != `2`);
340
341	/ dst_reg = (u16 ) (src_reg + offsetof(vlan_tci)) /
342	*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
343	offsetof(struct sk_buff, vlan_tci));
344	break;
345	case SKF_AD_VLAN_TAG_PRESENT:
346	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_all) != `4`);
347	*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
348	offsetof(struct sk_buff, vlan_all));
349	*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, `0`, `1`);
350	*insn++ = BPF_ALU32_IMM(BPF_MOV, dst_reg, `1`);
351	break;
352	}
353
354	return insn - insn_buf;
355	}
356
357	static bool convert_bpf_extensions(struct sock_filter *fp,
358	struct bpf_insn **insnp)
359	{
360	struct bpf_insn insn = insnp;
361	u32 cnt;
362
363	switch (fp->k) {
364	case SKF_AD_OFF + SKF_AD_PROTOCOL:
365	BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != `2`);
366
367	/ A = (u16 ) (CTX + offsetof(protocol)) /
368	*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
369	offsetof(struct sk_buff, protocol));
370	/ A = ntohs(A) [emitting a nop or swap16] /
371	*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, `16`);
372	break;
373
374	case SKF_AD_OFF + SKF_AD_PKTTYPE:
375	cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
376	insn += cnt - `1`;
377	break;
378
379	case SKF_AD_OFF + SKF_AD_IFINDEX:
380	case SKF_AD_OFF + SKF_AD_HATYPE:
381	BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != `4`);
382	BUILD_BUG_ON(sizeof_field(struct net_device, type) != `2`);
383
384	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
385	BPF_REG_TMP, BPF_REG_CTX,
386	offsetof(struct sk_buff, dev));
387	/ if (tmp != 0) goto pc + 1 /
388	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, `0`, `1`);
389	*insn++ = BPF_EXIT_INSN();
390	if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
391	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
392	offsetof(struct net_device, ifindex));
393	else
394	*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
395	offsetof(struct net_device, type));
396	break;
397
398	case SKF_AD_OFF + SKF_AD_MARK:
399	cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
400	insn += cnt - `1`;
401	break;
402
403	case SKF_AD_OFF + SKF_AD_RXHASH:
404	BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != `4`);
405
406	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
407	offsetof(struct sk_buff, hash));
408	break;
409
410	case SKF_AD_OFF + SKF_AD_QUEUE:
411	cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
412	insn += cnt - `1`;
413	break;
414
415	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
416	cnt = convert_skb_access(SKF_AD_VLAN_TAG,
417	BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
418	insn += cnt - `1`;
419	break;
420
421	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
422	cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
423	BPF_REG_A, BPF_REG_CTX, insn_buf: insn);
424	insn += cnt - `1`;
425	break;
426
427	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
428	BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != `2`);
429
430	/ A = (u16 ) (CTX + offsetof(vlan_proto)) /
431	*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
432	offsetof(struct sk_buff, vlan_proto));
433	/ A = ntohs(A) [emitting a nop or swap16] /
434	*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, `16`);
435	break;
436
437	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
438	case SKF_AD_OFF + SKF_AD_NLATTR:
439	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
440	case SKF_AD_OFF + SKF_AD_CPU:
441	case SKF_AD_OFF + SKF_AD_RANDOM:
442	/ arg1 = CTX /
443	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
444	/ arg2 = A /
445	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
446	/ arg3 = X /
447	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
448	/ Emit call(arg1=CTX, arg2=A, arg3=X) /
449	switch (fp->k) {
450	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
451	*insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
452	break;
453	case SKF_AD_OFF + SKF_AD_NLATTR:
454	*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
455	break;
456	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
457	*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
458	break;
459	case SKF_AD_OFF + SKF_AD_CPU:
460	*insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
461	break;
462	case SKF_AD_OFF + SKF_AD_RANDOM:
463	*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
464	bpf_user_rnd_init_once();
465	break;
466	}
467	break;
468
469	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
470	/ A ^= X /
471	*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
472	break;
473
474	default:
475	/ This is just a dummy call to avoid letting the compiler*
476	* evict __bpf_call_base() as an optimization. Placed here
477	* where no-one bothers.
478	*/
479	BUG_ON(__bpf_call_base(`0`, `0`, `0`, `0`, `0`) != `0`);
480	return false;
481	}
482
483	*insnp = insn;
484	return true;
485	}
486
487	static bool convert_bpf_ld_abs(struct sock_filter fp, struct* bpf_insn **insnp)
488	{
489	const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
490	int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
491	bool endian = BPF_SIZE(fp->code) == BPF_H \|\|
492	BPF_SIZE(fp->code) == BPF_W;
493	bool indirect = BPF_MODE(fp->code) == BPF_IND;
494	const int ip_align = NET_IP_ALIGN;
495	struct bpf_insn insn = insnp;
496	int offset = fp->k;
497
498	if (!indirect &&
499	((unaligned_ok && offset >= `0`) \|\|
500	(!unaligned_ok && offset >= `0` &&
501	offset + ip_align >= `0` &&
502	offset + ip_align % size == `0`))) {
503	bool ldx_off_ok = offset <= S16_MAX;
504
505	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
506	if (offset)
507	*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
508	*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
509	size, `2` + endian + (!ldx_off_ok * `2`));
510	if (ldx_off_ok) {
511	*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
512	BPF_REG_D, offset);
513	} else {
514	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
515	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
516	*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
517	BPF_REG_TMP, `0`);
518	}
519	if (endian)
520	insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size `8`);
521	*insn++ = BPF_JMP_A(`8`);
522	}
523
524	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
525	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
526	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
527	if (!indirect) {
528	*insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
529	} else {
530	*insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
531	if (fp->k)
532	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
533	}
534
535	switch (BPF_SIZE(fp->code)) {
536	case BPF_B:
537	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
538	break;
539	case BPF_H:
540	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
541	break;
542	case BPF_W:
543	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
544	break;
545	default:
546	return false;
547	}
548
549	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, `0`, `2`);
550	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
551	*insn = BPF_EXIT_INSN();
552
553	*insnp = insn;
554	return true;
555	}
556
557	/**
558	* bpf_convert_filter - convert filter program
559	* @prog: the user passed filter program
560	* @len: the length of the user passed filter program
561	* @new_prog: allocated 'struct bpf_prog' or NULL
562	* @new_len: pointer to store length of converted program
563	* @seen_ld_abs: bool whether we've seen ld_abs/ind
564	*
565	* Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
566	* style extended BPF (eBPF).
567	* Conversion workflow:
568	*
569	* 1) First pass for calculating the new program length:
570	* bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
571	*
572	* 2) 2nd pass to remap in two passes: 1st pass finds new
573	* jump offsets, 2nd pass remapping:
574	* bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
575	*/
576	static int bpf_convert_filter(struct sock_filter prog, int* len,
577	struct bpf_prog new_prog, int* *new_len,
578	bool *seen_ld_abs)
579	{
580	int new_flen = `0`, pass = `0`, target, i, stack_off;
581	struct bpf_insn new_insn, first_insn = NULL;
582	struct sock_filter *fp;
583	int *addrs = NULL;
584	u8 bpf_src;
585
586	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
587	BUILD_BUG_ON(BPF_REG_FP + `1` != MAX_BPF_REG);
588
589	if (len <= `0` \|\| len > BPF_MAXINSNS)
590	return -EINVAL;
591
592	if (new_prog) {
593	first_insn = new_prog->insnsi;
594	addrs = kcalloc(len, sizeof(*addrs),
595	GFP_KERNEL \| __GFP_NOWARN);
596	if (!addrs)
597	return -ENOMEM;
598	}
599
600	do_pass:
601	new_insn = first_insn;
602	fp = prog;
603
604	/ Classic BPF related prologue emission. /
605	if (new_prog) {
606	/ Classic BPF expects A and X to be reset first. These need*
607	* to be guaranteed to be the first two instructions.
608	*/
609	*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
610	*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
611
612	/ All programs must keep CTX in callee saved BPF_REG_CTX.*
613	* In eBPF case it's done by the compiler, here we need to
614	* do this ourself. Initial CTX is present in BPF_REG_ARG1.
615	*/
616	*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
617	if (*seen_ld_abs) {
618	/ For packet access in classic BPF, cache skb->data*
619	* in callee-saved BPF R8 and skb->len - skb->data_len
620	* (headlen) in BPF R9. Since classic BPF is read-only
621	* on CTX, we only need to cache it once.
622	*/
623	new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
624	BPF_REG_D, BPF_REG_CTX,
625	offsetof(struct sk_buff, data));
626	*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
627	offsetof(struct sk_buff, len));
628	*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
629	offsetof(struct sk_buff, data_len));
630	*new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
631	}
632	} else {
633	new_insn += `3`;
634	}
635
636	for (i = `0`; i < len; fp++, i++) {
637	struct bpf_insn tmp_insns[`32`] = { };
638	struct bpf_insn *insn = tmp_insns;
639
640	if (addrs)
641	addrs[i] = new_insn - first_insn;
642
643	switch (fp->code) {
644	/ All arithmetic insns and skb loads map as-is. /
645	case BPF_ALU \| BPF_ADD \| BPF_X:
646	case BPF_ALU \| BPF_ADD \| BPF_K:
647	case BPF_ALU \| BPF_SUB \| BPF_X:
648	case BPF_ALU \| BPF_SUB \| BPF_K:
649	case BPF_ALU \| BPF_AND \| BPF_X:
650	case BPF_ALU \| BPF_AND \| BPF_K:
651	case BPF_ALU \| BPF_OR \| BPF_X:
652	case BPF_ALU \| BPF_OR \| BPF_K:
653	case BPF_ALU \| BPF_LSH \| BPF_X:
654	case BPF_ALU \| BPF_LSH \| BPF_K:
655	case BPF_ALU \| BPF_RSH \| BPF_X:
656	case BPF_ALU \| BPF_RSH \| BPF_K:
657	case BPF_ALU \| BPF_XOR \| BPF_X:
658	case BPF_ALU \| BPF_XOR \| BPF_K:
659	case BPF_ALU \| BPF_MUL \| BPF_X:
660	case BPF_ALU \| BPF_MUL \| BPF_K:
661	case BPF_ALU \| BPF_DIV \| BPF_X:
662	case BPF_ALU \| BPF_DIV \| BPF_K:
663	case BPF_ALU \| BPF_MOD \| BPF_X:
664	case BPF_ALU \| BPF_MOD \| BPF_K:
665	case BPF_ALU \| BPF_NEG:
666	case BPF_LD \| BPF_ABS \| BPF_W:
667	case BPF_LD \| BPF_ABS \| BPF_H:
668	case BPF_LD \| BPF_ABS \| BPF_B:
669	case BPF_LD \| BPF_IND \| BPF_W:
670	case BPF_LD \| BPF_IND \| BPF_H:
671	case BPF_LD \| BPF_IND \| BPF_B:
672	/ Check for overloaded BPF extension and*
673	* directly convert it if found, otherwise
674	* just move on with mapping.
675	*/
676	if (BPF_CLASS(fp->code) == BPF_LD &&
677	BPF_MODE(fp->code) == BPF_ABS &&
678	convert_bpf_extensions(fp, insnp: &insn))
679	break;
680	if (BPF_CLASS(fp->code) == BPF_LD &&
681	convert_bpf_ld_abs(fp, insnp: &insn)) {
682	*seen_ld_abs = true;
683	break;
684	}
685
686	if (fp->code == (BPF_ALU \| BPF_DIV \| BPF_X) \|\|
687	fp->code == (BPF_ALU \| BPF_MOD \| BPF_X)) {
688	*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
689	/ Error with exception code on div/mod by 0.*
690	* For cBPF programs, this was always return 0.
691	*/
692	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, `0`, `2`);
693	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
694	*insn++ = BPF_EXIT_INSN();
695	}
696
697	*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, `0`, fp->k);
698	break;
699
700	/ Jump transformation cannot use BPF block macros*
701	* everywhere as offset calculation and target updates
702	* require a bit more work than the rest, i.e. jump
703	* opcodes map as-is, but offsets need adjustment.
704	*/
705
706	#define BPF_EMIT_JMP \
707	do { \
708	const s32 off_min = S16_MIN, off_max = S16_MAX; \
709	s32 off; \
710	\
711	if (target >= len \|\| target < 0) \
712	goto err; \
713	off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
714	/* Adjust pc relative offset for 2nd or 3rd insn. */ \
715	off -= insn - tmp_insns; \
716	/* Reject anything not fitting into insn->off. */ \
717	if (off < off_min \|\| off > off_max) \
718	goto err; \
719	insn->off = off; \
720	} while (0)
721
722	case BPF_JMP \| BPF_JA:
723	target = i + fp->k + `1`;
724	insn->code = fp->code;
725	BPF_EMIT_JMP;
726	break;
727
728	case BPF_JMP \| BPF_JEQ \| BPF_K:
729	case BPF_JMP \| BPF_JEQ \| BPF_X:
730	case BPF_JMP \| BPF_JSET \| BPF_K:
731	case BPF_JMP \| BPF_JSET \| BPF_X:
732	case BPF_JMP \| BPF_JGT \| BPF_K:
733	case BPF_JMP \| BPF_JGT \| BPF_X:
734	case BPF_JMP \| BPF_JGE \| BPF_K:
735	case BPF_JMP \| BPF_JGE \| BPF_X:
736	if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < `0`) {
737	/ BPF immediates are signed, zero extend*
738	* immediate into tmp register and use it
739	* in compare insn.
740	*/
741	*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
742
743	insn->dst_reg = BPF_REG_A;
744	insn->src_reg = BPF_REG_TMP;
745	bpf_src = BPF_X;
746	} else {
747	insn->dst_reg = BPF_REG_A;
748	insn->imm = fp->k;
749	bpf_src = BPF_SRC(fp->code);
750	insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : `0`;
751	}
752
753	/ Common case where 'jump_false' is next insn. /
754	if (fp->jf == `0`) {
755	insn->code = BPF_JMP \| BPF_OP(fp->code) \| bpf_src;
756	target = i + fp->jt + `1`;
757	BPF_EMIT_JMP;
758	break;
759	}
760
761	/ Convert some jumps when 'jump_true' is next insn. /
762	if (fp->jt == `0`) {
763	switch (BPF_OP(fp->code)) {
764	case BPF_JEQ:
765	insn->code = BPF_JMP \| BPF_JNE \| bpf_src;
766	break;
767	case BPF_JGT:
768	insn->code = BPF_JMP \| BPF_JLE \| bpf_src;
769	break;
770	case BPF_JGE:
771	insn->code = BPF_JMP \| BPF_JLT \| bpf_src;
772	break;
773	default:
774	goto jmp_rest;
775	}
776
777	target = i + fp->jf + `1`;
778	BPF_EMIT_JMP;
779	break;
780	}
781	jmp_rest:
782	/ Other jumps are mapped into two insns: Jxx and JA. /
783	target = i + fp->jt + `1`;
784	insn->code = BPF_JMP \| BPF_OP(fp->code) \| bpf_src;
785	BPF_EMIT_JMP;
786	insn++;
787
788	insn->code = BPF_JMP \| BPF_JA;
789	target = i + fp->jf + `1`;
790	BPF_EMIT_JMP;
791	break;
792
793	/ ldxb 4 * ([14] & 0xf) is remapped into 6 insns. /
794	case BPF_LDX \| BPF_MSH \| BPF_B: {
795	struct sock_filter tmp = {
796	.code = BPF_LD \| BPF_ABS \| BPF_B,
797	.k = fp->k,
798	};
799
800	*seen_ld_abs = true;
801
802	/ X = A /
803	*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
804	/ A = BPF_R0 = (u8 ) (skb->data + K) /
805	convert_bpf_ld_abs(fp: &tmp, insnp: &insn);
806	insn++;
807	/ A &= 0xf /
808	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, `0xf`);
809	/ A <<= 2 /
810	*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, `2`);
811	/ tmp = X /
812	*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
813	/ X = A /
814	*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
815	/ A = tmp /
816	*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
817	break;
818	}
819	/ RET_K is remapped into 2 insns. RET_A case doesn't need an*
820	* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
821	*/
822	case BPF_RET \| BPF_A:
823	case BPF_RET \| BPF_K:
824	if (BPF_RVAL(fp->code) == BPF_K)
825	*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
826	`0`, fp->k);
827	*insn = BPF_EXIT_INSN();
828	break;
829
830	/ Store to stack. /
831	case BPF_ST:
832	case BPF_STX:
833	stack_off = fp->k * `4` + `4`;
834	*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
835	BPF_ST ? BPF_REG_A : BPF_REG_X,
836	-stack_off);
837	/ check_load_and_stores() verifies that classic BPF can*
838	* load from stack only after write, so tracking
839	* stack_depth for ST\|STX insns is enough
840	*/
841	if (new_prog && new_prog->aux->stack_depth < stack_off)
842	new_prog->aux->stack_depth = stack_off;
843	break;
844
845	/ Load from stack. /
846	case BPF_LD \| BPF_MEM:
847	case BPF_LDX \| BPF_MEM:
848	stack_off = fp->k * `4` + `4`;
849	*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
850	BPF_REG_A : BPF_REG_X, BPF_REG_FP,
851	-stack_off);
852	break;
853
854	/ A = K or X = K /
855	case BPF_LD \| BPF_IMM:
856	case BPF_LDX \| BPF_IMM:
857	*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
858	BPF_REG_A : BPF_REG_X, fp->k);
859	break;
860
861	/ X = A /
862	case BPF_MISC \| BPF_TAX:
863	*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
864	break;
865
866	/ A = X /
867	case BPF_MISC \| BPF_TXA:
868	*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
869	break;
870
871	/ A = skb->len or X = skb->len /
872	case BPF_LD \| BPF_W \| BPF_LEN:
873	case BPF_LDX \| BPF_W \| BPF_LEN:
874	*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
875	BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
876	offsetof(struct sk_buff, len));
877	break;
878
879	/ Access seccomp_data fields. /
880	case BPF_LDX \| BPF_ABS \| BPF_W:
881	/ A = (u32 ) (ctx + K) /
882	*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
883	break;
884
885	/ Unknown instruction. /
886	default:
887	goto err;
888	}
889
890	insn++;
891	if (new_prog)
892	memcpy(new_insn, tmp_insns,
893	sizeof(insn) (insn - tmp_insns));
894	new_insn += insn - tmp_insns;
895	}
896
897	if (!new_prog) {
898	/ Only calculating new length. /
899	*new_len = new_insn - first_insn;
900	if (*seen_ld_abs)
901	new_len += `4`; /* Prologue bits. /
902	return `0`;
903	}
904
905	pass++;
906	if (new_flen != new_insn - first_insn) {
907	new_flen = new_insn - first_insn;
908	if (pass > `2`)
909	goto err;
910	goto do_pass;
911	}
912
913	kfree(objp: addrs);
914	BUG_ON(*new_len != new_flen);
915	return `0`;
916	err:
917	kfree(objp: addrs);
918	return -EINVAL;
919	}
920
921	/ Security:*
922	*
923	* As we dont want to clear mem[] array for each packet going through
924	* __bpf_prog_run(), we check that filter loaded by user never try to read
925	* a cell if not previously written, and we check all branches to be sure
926	* a malicious user doesn't try to abuse us.
927	*/
928	static int check_load_and_stores(const struct sock_filter filter, int* flen)
929	{
930	u16 masks, memvalid = `0`; /* One bit per cell, 16 cells /
931	int pc, ret = `0`;
932
933	BUILD_BUG_ON(BPF_MEMWORDS > `16`);
934
935	masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
936	if (!masks)
937	return -ENOMEM;
938
939	memset(masks, `0xff`, flen * sizeof(*masks));
940
941	for (pc = `0`; pc < flen; pc++) {
942	memvalid &= masks[pc];
943
944	switch (filter[pc].code) {
945	case BPF_ST:
946	case BPF_STX:
947	memvalid \|= (`1` << filter[pc].k);
948	break;
949	case BPF_LD \| BPF_MEM:
950	case BPF_LDX \| BPF_MEM:
951	if (!(memvalid & (`1` << filter[pc].k))) {
952	ret = -EINVAL;
953	goto error;
954	}
955	break;
956	case BPF_JMP \| BPF_JA:
957	/ A jump must set masks on target /
958	masks[pc + `1` + filter[pc].k] &= memvalid;
959	memvalid = ~`0`;
960	break;
961	case BPF_JMP \| BPF_JEQ \| BPF_K:
962	case BPF_JMP \| BPF_JEQ \| BPF_X:
963	case BPF_JMP \| BPF_JGE \| BPF_K:
964	case BPF_JMP \| BPF_JGE \| BPF_X:
965	case BPF_JMP \| BPF_JGT \| BPF_K:
966	case BPF_JMP \| BPF_JGT \| BPF_X:
967	case BPF_JMP \| BPF_JSET \| BPF_K:
968	case BPF_JMP \| BPF_JSET \| BPF_X:
969	/ A jump must set masks on targets /
970	masks[pc + `1` + filter[pc].jt] &= memvalid;
971	masks[pc + `1` + filter[pc].jf] &= memvalid;
972	memvalid = ~`0`;
973	break;
974	}
975	}
976	error:
977	kfree(objp: masks);
978	return ret;
979	}
980
981	static bool chk_code_allowed(u16 code_to_probe)
982	{
983	static const bool codes[] = {
984	/ 32 bit ALU operations /
985	[BPF_ALU \| BPF_ADD \| BPF_K] = true,
986	[BPF_ALU \| BPF_ADD \| BPF_X] = true,
987	[BPF_ALU \| BPF_SUB \| BPF_K] = true,
988	[BPF_ALU \| BPF_SUB \| BPF_X] = true,
989	[BPF_ALU \| BPF_MUL \| BPF_K] = true,
990	[BPF_ALU \| BPF_MUL \| BPF_X] = true,
991	[BPF_ALU \| BPF_DIV \| BPF_K] = true,
992	[BPF_ALU \| BPF_DIV \| BPF_X] = true,
993	[BPF_ALU \| BPF_MOD \| BPF_K] = true,
994	[BPF_ALU \| BPF_MOD \| BPF_X] = true,
995	[BPF_ALU \| BPF_AND \| BPF_K] = true,
996	[BPF_ALU \| BPF_AND \| BPF_X] = true,
997	[BPF_ALU \| BPF_OR \| BPF_K] = true,
998	[BPF_ALU \| BPF_OR \| BPF_X] = true,
999	[BPF_ALU \| BPF_XOR \| BPF_K] = true,
1000	[BPF_ALU \| BPF_XOR \| BPF_X] = true,
1001	[BPF_ALU \| BPF_LSH \| BPF_K] = true,
1002	[BPF_ALU \| BPF_LSH \| BPF_X] = true,
1003	[BPF_ALU \| BPF_RSH \| BPF_K] = true,
1004	[BPF_ALU \| BPF_RSH \| BPF_X] = true,
1005	[BPF_ALU \| BPF_NEG] = true,
1006	/ Load instructions /
1007	[BPF_LD \| BPF_W \| BPF_ABS] = true,
1008	[BPF_LD \| BPF_H \| BPF_ABS] = true,
1009	[BPF_LD \| BPF_B \| BPF_ABS] = true,
1010	[BPF_LD \| BPF_W \| BPF_LEN] = true,
1011	[BPF_LD \| BPF_W \| BPF_IND] = true,
1012	[BPF_LD \| BPF_H \| BPF_IND] = true,
1013	[BPF_LD \| BPF_B \| BPF_IND] = true,
1014	[BPF_LD \| BPF_IMM] = true,
1015	[BPF_LD \| BPF_MEM] = true,
1016	[BPF_LDX \| BPF_W \| BPF_LEN] = true,
1017	[BPF_LDX \| BPF_B \| BPF_MSH] = true,
1018	[BPF_LDX \| BPF_IMM] = true,
1019	[BPF_LDX \| BPF_MEM] = true,
1020	/ Store instructions /
1021	[BPF_ST] = true,
1022	[BPF_STX] = true,
1023	/ Misc instructions /
1024	[BPF_MISC \| BPF_TAX] = true,
1025	[BPF_MISC \| BPF_TXA] = true,
1026	/ Return instructions /
1027	[BPF_RET \| BPF_K] = true,
1028	[BPF_RET \| BPF_A] = true,
1029	/ Jump instructions /
1030	[BPF_JMP \| BPF_JA] = true,
1031	[BPF_JMP \| BPF_JEQ \| BPF_K] = true,
1032	[BPF_JMP \| BPF_JEQ \| BPF_X] = true,
1033	[BPF_JMP \| BPF_JGE \| BPF_K] = true,
1034	[BPF_JMP \| BPF_JGE \| BPF_X] = true,
1035	[BPF_JMP \| BPF_JGT \| BPF_K] = true,
1036	[BPF_JMP \| BPF_JGT \| BPF_X] = true,
1037	[BPF_JMP \| BPF_JSET \| BPF_K] = true,
1038	[BPF_JMP \| BPF_JSET \| BPF_X] = true,
1039	};
1040
1041	if (code_to_probe >= ARRAY_SIZE(codes))
1042	return false;
1043
1044	return codes[code_to_probe];
1045	}
1046
1047	static bool bpf_check_basics_ok(const struct sock_filter *filter,
1048	unsigned int flen)
1049	{
1050	if (filter == NULL)
1051	return false;
1052	if (flen == `0` \|\| flen > BPF_MAXINSNS)
1053	return false;
1054
1055	return true;
1056	}
1057
1058	/**
1059	* bpf_check_classic - verify socket filter code
1060	* @filter: filter to verify
1061	* @flen: length of filter
1062	*
1063	* Check the user's filter code. If we let some ugly
1064	* filter code slip through kaboom! The filter must contain
1065	* no references or jumps that are out of range, no illegal
1066	* instructions, and must end with a RET instruction.
1067	*
1068	* All jumps are forward as they are not signed.
1069	*
1070	* Returns 0 if the rule set is legal or -EINVAL if not.
1071	*/
1072	static int bpf_check_classic(const struct sock_filter *filter,
1073	unsigned int flen)
1074	{
1075	bool anc_found;
1076	int pc;
1077
1078	/ Check the filter code now /
1079	for (pc = `0`; pc < flen; pc++) {
1080	const struct sock_filter *ftest = &filter[pc];
1081
1082	/ May we actually operate on this code? /
1083	if (!chk_code_allowed(code_to_probe: ftest->code))
1084	return -EINVAL;
1085
1086	/ Some instructions need special checks /
1087	switch (ftest->code) {
1088	case BPF_ALU \| BPF_DIV \| BPF_K:
1089	case BPF_ALU \| BPF_MOD \| BPF_K:
1090	/ Check for division by zero /
1091	if (ftest->k == `0`)
1092	return -EINVAL;
1093	break;
1094	case BPF_ALU \| BPF_LSH \| BPF_K:
1095	case BPF_ALU \| BPF_RSH \| BPF_K:
1096	if (ftest->k >= `32`)
1097	return -EINVAL;
1098	break;
1099	case BPF_LD \| BPF_MEM:
1100	case BPF_LDX \| BPF_MEM:
1101	case BPF_ST:
1102	case BPF_STX:
1103	/ Check for invalid memory addresses /
1104	if (ftest->k >= BPF_MEMWORDS)
1105	return -EINVAL;
1106	break;
1107	case BPF_JMP \| BPF_JA:
1108	/ Note, the large ftest->k might cause loops.*
1109	* Compare this with conditional jumps below,
1110	* where offsets are limited. --ANK (981016)
1111	*/
1112	if (ftest->k >= (unsigned int)(flen - pc - `1`))
1113	return -EINVAL;
1114	break;
1115	case BPF_JMP \| BPF_JEQ \| BPF_K:
1116	case BPF_JMP \| BPF_JEQ \| BPF_X:
1117	case BPF_JMP \| BPF_JGE \| BPF_K:
1118	case BPF_JMP \| BPF_JGE \| BPF_X:
1119	case BPF_JMP \| BPF_JGT \| BPF_K:
1120	case BPF_JMP \| BPF_JGT \| BPF_X:
1121	case BPF_JMP \| BPF_JSET \| BPF_K:
1122	case BPF_JMP \| BPF_JSET \| BPF_X:
1123	/ Both conditionals must be safe /
1124	if (pc + ftest->jt + `1` >= flen \|\|
1125	pc + ftest->jf + `1` >= flen)
1126	return -EINVAL;
1127	break;
1128	case BPF_LD \| BPF_W \| BPF_ABS:
1129	case BPF_LD \| BPF_H \| BPF_ABS:
1130	case BPF_LD \| BPF_B \| BPF_ABS:
1131	anc_found = false;
1132	if (bpf_anc_helper(ftest) & BPF_ANC)
1133	anc_found = true;
1134	/ Ancillary operation unknown or unsupported /
1135	if (anc_found == false && ftest->k >= SKF_AD_OFF)
1136	return -EINVAL;
1137	}
1138	}
1139
1140	/ Last instruction must be a RET code /
1141	switch (filter[flen - `1`].code) {
1142	case BPF_RET \| BPF_K:
1143	case BPF_RET \| BPF_A:
1144	return check_load_and_stores(filter, flen);
1145	}
1146
1147	return -EINVAL;
1148	}
1149
1150	static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1151	const struct sock_fprog *fprog)
1152	{
1153	unsigned int fsize = bpf_classic_proglen(fprog);
1154	struct sock_fprog_kern *fkprog;
1155
1156	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1157	if (!fp->orig_prog)
1158	return -ENOMEM;
1159
1160	fkprog = fp->orig_prog;
1161	fkprog->len = fprog->len;
1162
1163	fkprog->filter = kmemdup(fp->insns, fsize,
1164	GFP_KERNEL \| __GFP_NOWARN);
1165	if (!fkprog->filter) {
1166	kfree(objp: fp->orig_prog);
1167	return -ENOMEM;
1168	}
1169
1170	return `0`;
1171	}
1172
1173	static void bpf_release_orig_filter(struct bpf_prog *fp)
1174	{
1175	struct sock_fprog_kern *fprog = fp->orig_prog;
1176
1177	if (fprog) {
1178	kfree(objp: fprog->filter);
1179	kfree(objp: fprog);
1180	}
1181	}
1182
1183	static void __bpf_prog_release(struct bpf_prog *prog)
1184	{
1185	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1186	bpf_prog_put(prog);
1187	} else {
1188	bpf_release_orig_filter(fp: prog);
1189	bpf_prog_free(fp: prog);
1190	}
1191	}
1192
1193	static void __sk_filter_release(struct sk_filter *fp)
1194	{
1195	__bpf_prog_release(prog: fp->prog);
1196	kfree(objp: fp);
1197	}
1198
1199	/**
1200	* sk_filter_release_rcu - Release a socket filter by rcu_head
1201	* @rcu: rcu_head that contains the sk_filter to free
1202	*/
1203	static void sk_filter_release_rcu(struct rcu_head *rcu)
1204	{
1205	struct sk_filter fp = container_of(rcu, struct* sk_filter, rcu);
1206
1207	__sk_filter_release(fp);
1208	}
1209
1210	/**
1211	* sk_filter_release - release a socket filter
1212	* @fp: filter to remove
1213	*
1214	* Remove a filter from a socket and release its resources.
1215	*/
1216	static void sk_filter_release(struct sk_filter *fp)
1217	{
1218	if (refcount_dec_and_test(r: &fp->refcnt))
1219	call_rcu(head: &fp->rcu, func: sk_filter_release_rcu);
1220	}
1221
1222	void sk_filter_uncharge(struct sock sk, struct* sk_filter *fp)
1223	{
1224	u32 filter_size = bpf_prog_size(proglen: fp->prog->len);
1225
1226	atomic_sub(i: filter_size, v: &sk->sk_omem_alloc);
1227	sk_filter_release(fp);
1228	}
1229
1230	/ try to charge the socket memory if there is space available*
1231	* return true on success
1232	*/
1233	static bool __sk_filter_charge(struct sock sk, struct* sk_filter *fp)
1234	{
1235	int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
1236	u32 filter_size = bpf_prog_size(proglen: fp->prog->len);
1237
1238	/ same check as in sock_kmalloc() /
1239	if (filter_size <= optmem_max &&
1240	atomic_read(v: &sk->sk_omem_alloc) + filter_size < optmem_max) {
1241	atomic_add(i: filter_size, v: &sk->sk_omem_alloc);
1242	return true;
1243	}
1244	return false;
1245	}
1246
1247	bool sk_filter_charge(struct sock sk, struct* sk_filter *fp)
1248	{
1249	if (!refcount_inc_not_zero(r: &fp->refcnt))
1250	return false;
1251
1252	if (!__sk_filter_charge(sk, fp)) {
1253	sk_filter_release(fp);
1254	return false;
1255	}
1256	return true;
1257	}
1258
1259	static struct bpf_prog bpf_migrate_filter(struct* bpf_prog *fp)
1260	{
1261	struct sock_filter *old_prog;
1262	struct bpf_prog *old_fp;
1263	int err, new_len, old_len = fp->len;
1264	bool seen_ld_abs = false;
1265
1266	/ We are free to overwrite insns et al right here as it won't be used at*
1267	* this point in time anymore internally after the migration to the eBPF
1268	* instruction representation.
1269	*/
1270	BUILD_BUG_ON(sizeof(struct sock_filter) !=
1271	sizeof(struct bpf_insn));
1272
1273	/ Conversion cannot happen on overlapping memory areas,*
1274	* so we need to keep the user BPF around until the 2nd
1275	* pass. At this time, the user BPF is stored in fp->insns.
1276	*/
1277	old_prog = kmemdup_array(src: fp->insns, count: old_len, element_size: sizeof(struct sock_filter),
1278	GFP_KERNEL \| __GFP_NOWARN);
1279	if (!old_prog) {
1280	err = -ENOMEM;
1281	goto out_err;
1282	}
1283
1284	/ 1st pass: calculate the new program length. /
1285	err = bpf_convert_filter(prog: old_prog, len: old_len, NULL, new_len: &new_len,
1286	seen_ld_abs: &seen_ld_abs);
1287	if (err)
1288	goto out_err_free;
1289
1290	/ Expand fp for appending the new filter representation. /
1291	old_fp = fp;
1292	fp = bpf_prog_realloc(fp_old: old_fp, size: bpf_prog_size(proglen: new_len), gfp_extra_flags: `0`);
1293	if (!fp) {
1294	/ The old_fp is still around in case we couldn't*
1295	* allocate new memory, so uncharge on that one.
1296	*/
1297	fp = old_fp;
1298	err = -ENOMEM;
1299	goto out_err_free;
1300	}
1301
1302	fp->len = new_len;
1303
1304	/ 2nd pass: remap sock_filter insns into bpf_insn insns. /
1305	err = bpf_convert_filter(prog: old_prog, len: old_len, new_prog: fp, new_len: &new_len,
1306	seen_ld_abs: &seen_ld_abs);
1307	if (err)
1308	/ 2nd bpf_convert_filter() can fail only if it fails*
1309	* to allocate memory, remapping must succeed. Note,
1310	* that at this time old_fp has already been released
1311	* by krealloc().
1312	*/
1313	goto out_err_free;
1314
1315	fp = bpf_prog_select_runtime(fp, err: &err);
1316	if (err)
1317	goto out_err_free;
1318
1319	kfree(objp: old_prog);
1320	return fp;
1321
1322	out_err_free:
1323	kfree(objp: old_prog);
1324	out_err:
1325	__bpf_prog_release(prog: fp);
1326	return ERR_PTR(error: err);
1327	}
1328
1329	static struct bpf_prog bpf_prepare_filter(struct* bpf_prog *fp,
1330	bpf_aux_classic_check_t trans)
1331	{
1332	int err;
1333
1334	fp->bpf_func = NULL;
1335	fp->jited = `0`;
1336
1337	err = bpf_check_classic(filter: fp->insns, flen: fp->len);
1338	if (err) {
1339	__bpf_prog_release(prog: fp);
1340	return ERR_PTR(error: err);
1341	}
1342
1343	/ There might be additional checks and transformations*
1344	* needed on classic filters, f.e. in case of seccomp.
1345	*/
1346	if (trans) {
1347	err = trans(fp->insns, fp->len);
1348	if (err) {
1349	__bpf_prog_release(prog: fp);
1350	return ERR_PTR(error: err);
1351	}
1352	}
1353
1354	/ Probe if we can JIT compile the filter and if so, do*
1355	* the compilation of the filter.
1356	*/
1357	bpf_jit_compile(prog: fp);
1358
1359	/ JIT compiler couldn't process this filter, so do the eBPF translation*
1360	* for the optimized interpreter.
1361	*/
1362	if (!fp->jited)
1363	fp = bpf_migrate_filter(fp);
1364
1365	return fp;
1366	}
1367
1368	/**
1369	* bpf_prog_create - create an unattached filter
1370	* @pfp: the unattached filter that is created
1371	* @fprog: the filter program
1372	*
1373	* Create a filter independent of any socket. We first run some
1374	* sanity checks on it to make sure it does not explode on us later.
1375	* If an error occurs or there is insufficient memory for the filter
1376	* a negative errno code is returned. On success the return is zero.
1377	*/
1378	int bpf_prog_create(struct bpf_prog pfp, struct** sock_fprog_kern *fprog)
1379	{
1380	unsigned int fsize = bpf_classic_proglen(fprog);
1381	struct bpf_prog *fp;
1382
1383	/ Make sure new filter is there and in the right amounts. /
1384	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1385	return -EINVAL;
1386
1387	fp = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1388	if (!fp)
1389	return -ENOMEM;
1390
1391	memcpy(fp->insns, fprog->filter, fsize);
1392
1393	fp->len = fprog->len;
1394	/ Since unattached filters are not copied back to user*
1395	* space through sk_get_filter(), we do not need to hold
1396	* a copy here, and can spare us the work.
1397	*/
1398	fp->orig_prog = NULL;
1399
1400	/ bpf_prepare_filter() already takes care of freeing*
1401	* memory in case something goes wrong.
1402	*/
1403	fp = bpf_prepare_filter(fp, NULL);
1404	if (IS_ERR(ptr: fp))
1405	return PTR_ERR(ptr: fp);
1406
1407	*pfp = fp;
1408	return `0`;
1409	}
1410	EXPORT_SYMBOL_GPL(bpf_prog_create);
1411
1412	/**
1413	* bpf_prog_create_from_user - create an unattached filter from user buffer
1414	* @pfp: the unattached filter that is created
1415	* @fprog: the filter program
1416	* @trans: post-classic verifier transformation handler
1417	* @save_orig: save classic BPF program
1418	*
1419	* This function effectively does the same as bpf_prog_create(), only
1420	* that it builds up its insns buffer from user space provided buffer.
1421	* It also allows for passing a bpf_aux_classic_check_t handler.
1422	*/
1423	int bpf_prog_create_from_user(struct bpf_prog pfp, struct** sock_fprog *fprog,
1424	bpf_aux_classic_check_t trans, bool save_orig)
1425	{
1426	unsigned int fsize = bpf_classic_proglen(fprog);
1427	struct bpf_prog *fp;
1428	int err;
1429
1430	/ Make sure new filter is there and in the right amounts. /
1431	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1432	return -EINVAL;
1433
1434	fp = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1435	if (!fp)
1436	return -ENOMEM;
1437
1438	if (copy_from_user(to: fp->insns, from: fprog->filter, n: fsize)) {
1439	__bpf_prog_free(fp);
1440	return -EFAULT;
1441	}
1442
1443	fp->len = fprog->len;
1444	fp->orig_prog = NULL;
1445
1446	if (save_orig) {
1447	err = bpf_prog_store_orig_filter(fp, fprog);
1448	if (err) {
1449	__bpf_prog_free(fp);
1450	return -ENOMEM;
1451	}
1452	}
1453
1454	/ bpf_prepare_filter() already takes care of freeing*
1455	* memory in case something goes wrong.
1456	*/
1457	fp = bpf_prepare_filter(fp, trans);
1458	if (IS_ERR(ptr: fp))
1459	return PTR_ERR(ptr: fp);
1460
1461	*pfp = fp;
1462	return `0`;
1463	}
1464	EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1465
1466	void bpf_prog_destroy(struct bpf_prog *fp)
1467	{
1468	__bpf_prog_release(prog: fp);
1469	}
1470	EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1471
1472	static int __sk_attach_prog(struct bpf_prog prog, struct* sock *sk)
1473	{
1474	struct sk_filter fp, old_fp;
1475
1476	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1477	if (!fp)
1478	return -ENOMEM;
1479
1480	fp->prog = prog;
1481
1482	if (!__sk_filter_charge(sk, fp)) {
1483	kfree(objp: fp);
1484	return -ENOMEM;
1485	}
1486	refcount_set(r: &fp->refcnt, n: `1`);
1487
1488	old_fp = rcu_dereference_protected(sk->sk_filter,
1489	lockdep_sock_is_held(sk));
1490	rcu_assign_pointer(sk->sk_filter, fp);
1491
1492	if (old_fp)
1493	sk_filter_uncharge(sk, fp: old_fp);
1494
1495	return `0`;
1496	}
1497
1498	static
1499	struct bpf_prog __get_filter(struct* sock_fprog fprog, struct* sock *sk)
1500	{
1501	unsigned int fsize = bpf_classic_proglen(fprog);
1502	struct bpf_prog *prog;
1503	int err;
1504
1505	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1506	return ERR_PTR(error: -EPERM);
1507
1508	/ Make sure new filter is there and in the right amounts. /
1509	if (!bpf_check_basics_ok(filter: fprog->filter, flen: fprog->len))
1510	return ERR_PTR(error: -EINVAL);
1511
1512	prog = bpf_prog_alloc(size: bpf_prog_size(proglen: fprog->len), gfp_extra_flags: `0`);
1513	if (!prog)
1514	return ERR_PTR(error: -ENOMEM);
1515
1516	if (copy_from_user(to: prog->insns, from: fprog->filter, n: fsize)) {
1517	__bpf_prog_free(fp: prog);
1518	return ERR_PTR(error: -EFAULT);
1519	}
1520
1521	prog->len = fprog->len;
1522
1523	err = bpf_prog_store_orig_filter(fp: prog, fprog);
1524	if (err) {
1525	__bpf_prog_free(fp: prog);
1526	return ERR_PTR(error: -ENOMEM);
1527	}
1528
1529	/ bpf_prepare_filter() already takes care of freeing*
1530	* memory in case something goes wrong.
1531	*/
1532	return bpf_prepare_filter(fp: prog, NULL);
1533	}
1534
1535	/**
1536	* sk_attach_filter - attach a socket filter
1537	* @fprog: the filter program
1538	* @sk: the socket to use
1539	*
1540	* Attach the user's filter code. We first run some sanity checks on
1541	* it to make sure it does not explode on us later. If an error
1542	* occurs or there is insufficient memory for the filter a negative
1543	* errno code is returned. On success the return is zero.
1544	*/
1545	int sk_attach_filter(struct sock_fprog fprog, struct* sock *sk)
1546	{
1547	struct bpf_prog *prog = __get_filter(fprog, sk);
1548	int err;
1549
1550	if (IS_ERR(ptr: prog))
1551	return PTR_ERR(ptr: prog);
1552
1553	err = __sk_attach_prog(prog, sk);
1554	if (err < `0`) {
1555	__bpf_prog_release(prog);
1556	return err;
1557	}
1558
1559	return `0`;
1560	}
1561	EXPORT_SYMBOL_GPL(sk_attach_filter);
1562
1563	int sk_reuseport_attach_filter(struct sock_fprog fprog, struct* sock *sk)
1564	{
1565	struct bpf_prog *prog = __get_filter(fprog, sk);
1566	int err, optmem_max;
1567
1568	if (IS_ERR(ptr: prog))
1569	return PTR_ERR(ptr: prog);
1570
1571	optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
1572	if (bpf_prog_size(proglen: prog->len) > optmem_max)
1573	err = -ENOMEM;
1574	else
1575	err = reuseport_attach_prog(sk, prog);
1576
1577	if (err)
1578	__bpf_prog_release(prog);
1579
1580	return err;
1581	}
1582
1583	static struct bpf_prog __get_bpf(u32 ufd, struct* sock *sk)
1584	{
1585	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1586	return ERR_PTR(error: -EPERM);
1587
1588	return bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SOCKET_FILTER);
1589	}
1590
1591	int sk_attach_bpf(u32 ufd, struct sock *sk)
1592	{
1593	struct bpf_prog *prog = __get_bpf(ufd, sk);
1594	int err;
1595
1596	if (IS_ERR(ptr: prog))
1597	return PTR_ERR(ptr: prog);
1598
1599	err = __sk_attach_prog(prog, sk);
1600	if (err < `0`) {
1601	bpf_prog_put(prog);
1602	return err;
1603	}
1604
1605	return `0`;
1606	}
1607
1608	int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1609	{
1610	struct bpf_prog *prog;
1611	int err, optmem_max;
1612
1613	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
1614	return -EPERM;
1615
1616	prog = bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SOCKET_FILTER);
1617	if (PTR_ERR(ptr: prog) == -EINVAL)
1618	prog = bpf_prog_get_type(ufd, type: BPF_PROG_TYPE_SK_REUSEPORT);
1619	if (IS_ERR(ptr: prog))
1620	return PTR_ERR(ptr: prog);
1621
1622	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1623	/ Like other non BPF_PROG_TYPE_SOCKET_FILTER*
1624	* bpf prog (e.g. sockmap). It depends on the
1625	* limitation imposed by bpf_prog_load().
1626	* Hence, sysctl_optmem_max is not checked.
1627	*/
1628	if ((sk->sk_type != SOCK_STREAM &&
1629	sk->sk_type != SOCK_DGRAM) \|\|
1630	(sk->sk_protocol != IPPROTO_UDP &&
1631	sk->sk_protocol != IPPROTO_TCP) \|\|
1632	(sk->sk_family != AF_INET &&
1633	sk->sk_family != AF_INET6)) {
1634	err = -ENOTSUPP;
1635	goto err_prog_put;
1636	}
1637	} else {
1638	/ BPF_PROG_TYPE_SOCKET_FILTER /
1639	optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
1640	if (bpf_prog_size(proglen: prog->len) > optmem_max) {
1641	err = -ENOMEM;
1642	goto err_prog_put;
1643	}
1644	}
1645
1646	err = reuseport_attach_prog(sk, prog);
1647	err_prog_put:
1648	if (err)
1649	bpf_prog_put(prog);
1650
1651	return err;
1652	}
1653
1654	void sk_reuseport_prog_free(struct bpf_prog *prog)
1655	{
1656	if (!prog)
1657	return;
1658
1659	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1660	bpf_prog_put(prog);
1661	else
1662	bpf_prog_destroy(prog);
1663	}
1664
1665	static inline int __bpf_try_make_writable(struct sk_buff *skb,
1666	unsigned int write_len)
1667	{
1668	#ifdef CONFIG_DEBUG_NET
1669	/ Avoid a splat in pskb_may_pull_reason() /
1670	if (write_len > INT_MAX)
1671	return -EINVAL;
1672	#endif
1673	return skb_ensure_writable(skb, write_len);
1674	}
1675
1676	static inline int bpf_try_make_writable(struct sk_buff *skb,
1677	unsigned int write_len)
1678	{
1679	int err = __bpf_try_make_writable(skb, write_len);
1680
1681	bpf_compute_data_pointers(skb);
1682	return err;
1683	}
1684
1685	static int bpf_try_make_head_writable(struct sk_buff *skb)
1686	{
1687	return bpf_try_make_writable(skb, write_len: skb_headlen(skb));
1688	}
1689
1690	static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1691	{
1692	if (skb_at_tc_ingress(skb))
1693	skb_postpush_rcsum(skb, start: skb_mac_header(skb), len: skb->mac_len);
1694	}
1695
1696	static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1697	{
1698	if (skb_at_tc_ingress(skb))
1699	skb_postpull_rcsum(skb, start: skb_mac_header(skb), len: skb->mac_len);
1700	}
1701
1702	BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1703	const void *, from, u32, len, u64, flags)
1704	{
1705	void *ptr;
1706
1707	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM \| BPF_F_INVALIDATE_HASH)))
1708	return -EINVAL;
1709	if (unlikely(offset > INT_MAX))
1710	return -EFAULT;
1711	if (unlikely(bpf_try_make_writable(skb, offset + len)))
1712	return -EFAULT;
1713
1714	ptr = skb->data + offset;
1715	if (flags & BPF_F_RECOMPUTE_CSUM)
1716	__skb_postpull_rcsum(skb, start: ptr, len, off: offset);
1717
1718	memcpy(ptr, from, len);
1719
1720	if (flags & BPF_F_RECOMPUTE_CSUM)
1721	__skb_postpush_rcsum(skb, start: ptr, len, off: offset);
1722	if (flags & BPF_F_INVALIDATE_HASH)
1723	skb_clear_hash(skb);
1724
1725	return `0`;
1726	}
1727
1728	static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1729	.func = bpf_skb_store_bytes,
1730	.gpl_only = false,
1731	.ret_type = RET_INTEGER,
1732	.arg1_type = ARG_PTR_TO_CTX,
1733	.arg2_type = ARG_ANYTHING,
1734	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1735	.arg4_type = ARG_CONST_SIZE,
1736	.arg5_type = ARG_ANYTHING,
1737	};
1738
1739	int __bpf_skb_store_bytes(struct sk_buff skb, u32 offset, const* void *from,
1740	u32 len, u64 flags)
1741	{
1742	return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
1743	}
1744
1745	BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1746	void *, to, u32, len)
1747	{
1748	void *ptr;
1749
1750	if (unlikely(offset > INT_MAX))
1751	goto err_clear;
1752
1753	ptr = skb_header_pointer(skb, offset, len, buffer: to);
1754	if (unlikely(!ptr))
1755	goto err_clear;
1756	if (ptr != to)
1757	memcpy(to, ptr, len);
1758
1759	return `0`;
1760	err_clear:
1761	memset(to, `0`, len);
1762	return -EFAULT;
1763	}
1764
1765	static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1766	.func = bpf_skb_load_bytes,
1767	.gpl_only = false,
1768	.ret_type = RET_INTEGER,
1769	.arg1_type = ARG_PTR_TO_CTX,
1770	.arg2_type = ARG_ANYTHING,
1771	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1772	.arg4_type = ARG_CONST_SIZE,
1773	};
1774
1775	int __bpf_skb_load_bytes(const struct sk_buff skb, u32 offset, void* *to, u32 len)
1776	{
1777	return ____bpf_skb_load_bytes(skb, offset, to, len);
1778	}
1779
1780	BPF_CALL_4(bpf_flow_dissector_load_bytes,
1781	const struct bpf_flow_dissector *, ctx, u32, offset,
1782	void *, to, u32, len)
1783	{
1784	void *ptr;
1785
1786	if (unlikely(offset > `0xffff`))
1787	goto err_clear;
1788
1789	if (unlikely(!ctx->skb))
1790	goto err_clear;
1791
1792	ptr = skb_header_pointer(skb: ctx->skb, offset, len, buffer: to);
1793	if (unlikely(!ptr))
1794	goto err_clear;
1795	if (ptr != to)
1796	memcpy(to, ptr, len);
1797
1798	return `0`;
1799	err_clear:
1800	memset(to, `0`, len);
1801	return -EFAULT;
1802	}
1803
1804	static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1805	.func = bpf_flow_dissector_load_bytes,
1806	.gpl_only = false,
1807	.ret_type = RET_INTEGER,
1808	.arg1_type = ARG_PTR_TO_CTX,
1809	.arg2_type = ARG_ANYTHING,
1810	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1811	.arg4_type = ARG_CONST_SIZE,
1812	};
1813
1814	BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1815	u32, offset, void *, to, u32, len, u32, start_header)
1816	{
1817	u8 *end = skb_tail_pointer(skb);
1818	u8 start, ptr;
1819
1820	if (unlikely(offset > `0xffff`))
1821	goto err_clear;
1822
1823	switch (start_header) {
1824	case BPF_HDR_START_MAC:
1825	if (unlikely(!skb_mac_header_was_set(skb)))
1826	goto err_clear;
1827	start = skb_mac_header(skb);
1828	break;
1829	case BPF_HDR_START_NET:
1830	start = skb_network_header(skb);
1831	break;
1832	default:
1833	goto err_clear;
1834	}
1835
1836	ptr = start + offset;
1837
1838	if (likely(ptr + len <= end)) {
1839	memcpy(to, ptr, len);
1840	return `0`;
1841	}
1842
1843	err_clear:
1844	memset(to, `0`, len);
1845	return -EFAULT;
1846	}
1847
1848	static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1849	.func = bpf_skb_load_bytes_relative,
1850	.gpl_only = false,
1851	.ret_type = RET_INTEGER,
1852	.arg1_type = ARG_PTR_TO_CTX,
1853	.arg2_type = ARG_ANYTHING,
1854	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
1855	.arg4_type = ARG_CONST_SIZE,
1856	.arg5_type = ARG_ANYTHING,
1857	};
1858
1859	BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1860	{
1861	/ Idea is the following: should the needed direct read/write*
1862	* test fail during runtime, we can pull in more data and redo
1863	* again, since implicitly, we invalidate previous checks here.
1864	*
1865	* Or, since we know how much we need to make read/writeable,
1866	* this can be done once at the program beginning for direct
1867	* access case. By this we overcome limitations of only current
1868	* headroom being accessible.
1869	*/
1870	return bpf_try_make_writable(skb, write_len: len ? : skb_headlen(skb));
1871	}
1872
1873	static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1874	.func = bpf_skb_pull_data,
1875	.gpl_only = false,
1876	.ret_type = RET_INTEGER,
1877	.arg1_type = ARG_PTR_TO_CTX,
1878	.arg2_type = ARG_ANYTHING,
1879	};
1880
1881	BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1882	{
1883	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1884	}
1885
1886	static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1887	.func = bpf_sk_fullsock,
1888	.gpl_only = false,
1889	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1890	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
1891	};
1892
1893	static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1894	unsigned int write_len)
1895	{
1896	return __bpf_try_make_writable(skb, write_len);
1897	}
1898
1899	BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1900	{
1901	/ Idea is the following: should the needed direct read/write*
1902	* test fail during runtime, we can pull in more data and redo
1903	* again, since implicitly, we invalidate previous checks here.
1904	*
1905	* Or, since we know how much we need to make read/writeable,
1906	* this can be done once at the program beginning for direct
1907	* access case. By this we overcome limitations of only current
1908	* headroom being accessible.
1909	*/
1910	return sk_skb_try_make_writable(skb, write_len: len ? : skb_headlen(skb));
1911	}
1912
1913	static const struct bpf_func_proto sk_skb_pull_data_proto = {
1914	.func = sk_skb_pull_data,
1915	.gpl_only = false,
1916	.ret_type = RET_INTEGER,
1917	.arg1_type = ARG_PTR_TO_CTX,
1918	.arg2_type = ARG_ANYTHING,
1919	};
1920
1921	BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1922	u64, from, u64, to, u64, flags)
1923	{
1924	__sum16 *ptr;
1925
1926	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1927	return -EINVAL;
1928	if (unlikely(offset > `0xffff` \|\| offset & `1`))
1929	return -EFAULT;
1930	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1931	return -EFAULT;
1932
1933	ptr = (__sum16 *)(skb->data + offset);
1934	switch (flags & BPF_F_HDR_FIELD_MASK) {
1935	case `0`:
1936	if (unlikely(from != `0`))
1937	return -EINVAL;
1938
1939	csum_replace_by_diff(sum: ptr, diff: to);
1940	break;
1941	case `2`:
1942	csum_replace2(sum: ptr, old: from, new: to);
1943	break;
1944	case `4`:
1945	csum_replace4(sum: ptr, from, to);
1946	break;
1947	default:
1948	return -EINVAL;
1949	}
1950
1951	return `0`;
1952	}
1953
1954	static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1955	.func = bpf_l3_csum_replace,
1956	.gpl_only = false,
1957	.ret_type = RET_INTEGER,
1958	.arg1_type = ARG_PTR_TO_CTX,
1959	.arg2_type = ARG_ANYTHING,
1960	.arg3_type = ARG_ANYTHING,
1961	.arg4_type = ARG_ANYTHING,
1962	.arg5_type = ARG_ANYTHING,
1963	};
1964
1965	BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1966	u64, from, u64, to, u64, flags)
1967	{
1968	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1969	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1970	bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1971	bool is_ipv6 = flags & BPF_F_IPV6;
1972	__sum16 *ptr;
1973
1974	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 \| BPF_F_MARK_ENFORCE \|
1975	BPF_F_PSEUDO_HDR \| BPF_F_HDR_FIELD_MASK \| BPF_F_IPV6)))
1976	return -EINVAL;
1977	if (unlikely(offset > `0xffff` \|\| offset & `1`))
1978	return -EFAULT;
1979	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1980	return -EFAULT;
1981
1982	ptr = (__sum16 *)(skb->data + offset);
1983	if (is_mmzero && !do_mforce && !*ptr)
1984	return `0`;
1985
1986	switch (flags & BPF_F_HDR_FIELD_MASK) {
1987	case `0`:
1988	if (unlikely(from != `0`))
1989	return -EINVAL;
1990
1991	inet_proto_csum_replace_by_diff(sum: ptr, skb, diff: to, pseudohdr: is_pseudo, ipv6: is_ipv6);
1992	break;
1993	case `2`:
1994	inet_proto_csum_replace2(sum: ptr, skb, from, to, pseudohdr: is_pseudo);
1995	break;
1996	case `4`:
1997	inet_proto_csum_replace4(sum: ptr, skb, from, to, pseudohdr: is_pseudo);
1998	break;
1999	default:
2000	return -EINVAL;
2001	}
2002
2003	if (is_mmzero && !*ptr)
2004	*ptr = CSUM_MANGLED_0;
2005	return `0`;
2006	}
2007
2008	static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
2009	.func = bpf_l4_csum_replace,
2010	.gpl_only = false,
2011	.ret_type = RET_INTEGER,
2012	.arg1_type = ARG_PTR_TO_CTX,
2013	.arg2_type = ARG_ANYTHING,
2014	.arg3_type = ARG_ANYTHING,
2015	.arg4_type = ARG_ANYTHING,
2016	.arg5_type = ARG_ANYTHING,
2017	};
2018
2019	BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
2020	__be32 *, to, u32, to_size, __wsum, seed)
2021	{
2022	/ This is quite flexible, some examples:*
2023	*
2024	* from_size == 0, to_size > 0, seed := csum --> pushing data
2025	* from_size > 0, to_size == 0, seed := csum --> pulling data
2026	* from_size > 0, to_size > 0, seed := 0 --> diffing data
2027	*
2028	* Even for diffing, from_size and to_size don't need to be equal.
2029	*/
2030
2031	__wsum ret = seed;
2032
2033	if (from_size && to_size)
2034	ret = csum_sub(csum: csum_partial(buff: to, len: to_size, sum: ret),
2035	addend: csum_partial(buff: from, len: from_size, sum: `0`));
2036	else if (to_size)
2037	ret = csum_partial(buff: to, len: to_size, sum: ret);
2038
2039	else if (from_size)
2040	ret = ~csum_partial(buff: from, len: from_size, sum: ~ret);
2041
2042	return csum_from32to16(sum: (__force unsigned int)ret);
2043	}
2044
2045	static const struct bpf_func_proto bpf_csum_diff_proto = {
2046	.func = bpf_csum_diff,
2047	.gpl_only = false,
2048	.pkt_access = true,
2049	.ret_type = RET_INTEGER,
2050	.arg1_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2051	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
2052	.arg3_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2053	.arg4_type = ARG_CONST_SIZE_OR_ZERO,
2054	.arg5_type = ARG_ANYTHING,
2055	};
2056
2057	BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2058	{
2059	/ The interface is to be used in combination with bpf_csum_diff()*
2060	* for direct packet writes. csum rotation for alignment as well
2061	* as emulating csum_sub() can be done from the eBPF program.
2062	*/
2063	if (skb->ip_summed == CHECKSUM_COMPLETE)
2064	return (skb->csum = csum_add(csum: skb->csum, addend: csum));
2065
2066	return -ENOTSUPP;
2067	}
2068
2069	static const struct bpf_func_proto bpf_csum_update_proto = {
2070	.func = bpf_csum_update,
2071	.gpl_only = false,
2072	.ret_type = RET_INTEGER,
2073	.arg1_type = ARG_PTR_TO_CTX,
2074	.arg2_type = ARG_ANYTHING,
2075	};
2076
2077	BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2078	{
2079	/ The interface is to be used in combination with bpf_skb_adjust_room()*
2080	* for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2081	* is passed as flags, for example.
2082	*/
2083	switch (level) {
2084	case BPF_CSUM_LEVEL_INC:
2085	__skb_incr_checksum_unnecessary(skb);
2086	break;
2087	case BPF_CSUM_LEVEL_DEC:
2088	__skb_decr_checksum_unnecessary(skb);
2089	break;
2090	case BPF_CSUM_LEVEL_RESET:
2091	__skb_reset_checksum_unnecessary(skb);
2092	break;
2093	case BPF_CSUM_LEVEL_QUERY:
2094	return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2095	skb->csum_level : -EACCES;
2096	default:
2097	return -EINVAL;
2098	}
2099
2100	return `0`;
2101	}
2102
2103	static const struct bpf_func_proto bpf_csum_level_proto = {
2104	.func = bpf_csum_level,
2105	.gpl_only = false,
2106	.ret_type = RET_INTEGER,
2107	.arg1_type = ARG_PTR_TO_CTX,
2108	.arg2_type = ARG_ANYTHING,
2109	};
2110
2111	static inline int __bpf_rx_skb(struct net_device dev, struct* sk_buff *skb)
2112	{
2113	return dev_forward_skb_nomtu(dev, skb);
2114	}
2115
2116	static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2117	struct sk_buff *skb)
2118	{
2119	int ret = ____dev_forward_skb(dev, skb, check_mtu: false);
2120
2121	if (likely(!ret)) {
2122	skb->dev = dev;
2123	ret = netif_rx(skb);
2124	}
2125
2126	return ret;
2127	}
2128
2129	static inline int __bpf_tx_skb(struct net_device dev, struct* sk_buff *skb)
2130	{
2131	int ret;
2132
2133	if (dev_xmit_recursion()) {
2134	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2135	kfree_skb(skb);
2136	return -ENETDOWN;
2137	}
2138
2139	skb->dev = dev;
2140	skb_set_redirected_noclear(skb, from_ingress: skb_at_tc_ingress(skb));
2141	skb_clear_tstamp(skb);
2142
2143	dev_xmit_recursion_inc();
2144	ret = dev_queue_xmit(skb);
2145	dev_xmit_recursion_dec();
2146
2147	return ret;
2148	}
2149
2150	static int __bpf_redirect_no_mac(struct sk_buff skb, struct* net_device *dev,
2151	u32 flags)
2152	{
2153	unsigned int mlen = skb_network_offset(skb);
2154
2155	if (unlikely(skb->len <= mlen)) {
2156	kfree_skb(skb);
2157	return -ERANGE;
2158	}
2159
2160	if (mlen) {
2161	__skb_pull(skb, len: mlen);
2162
2163	/ At ingress, the mac header has already been pulled once.*
2164	* At egress, skb_pospull_rcsum has to be done in case that
2165	* the skb is originated from ingress (i.e. a forwarded skb)
2166	* to ensure that rcsum starts at net header.
2167	*/
2168	if (!skb_at_tc_ingress(skb))
2169	skb_postpull_rcsum(skb, start: skb_mac_header(skb), len: mlen);
2170	}
2171	skb_pop_mac_header(skb);
2172	skb_reset_mac_len(skb);
2173	return flags & BPF_F_INGRESS ?
2174	__bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2175	}
2176
2177	static int __bpf_redirect_common(struct sk_buff skb, struct* net_device *dev,
2178	u32 flags)
2179	{
2180	/ Verify that a link layer header is carried /
2181	if (unlikely(skb->mac_header >= skb->network_header \|\| skb->len == `0`)) {
2182	kfree_skb(skb);
2183	return -ERANGE;
2184	}
2185
2186	bpf_push_mac_rcsum(skb);
2187	return flags & BPF_F_INGRESS ?
2188	__bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2189	}
2190
2191	static int __bpf_redirect(struct sk_buff skb, struct* net_device *dev,
2192	u32 flags)
2193	{
2194	if (dev_is_mac_header_xmit(dev))
2195	return __bpf_redirect_common(skb, dev, flags);
2196	else
2197	return __bpf_redirect_no_mac(skb, dev, flags);
2198	}
2199
2200	#if IS_ENABLED(CONFIG_IPV6)
2201	static int bpf_out_neigh_v6(struct net net, struct* sk_buff *skb,
2202	struct net_device dev, struct* bpf_nh_params *nh)
2203	{
2204	u32 hh_len = LL_RESERVED_SPACE(dev);
2205	const struct in6_addr *nexthop;
2206	struct dst_entry *dst = NULL;
2207	struct neighbour *neigh;
2208
2209	if (dev_xmit_recursion()) {
2210	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2211	goto out_drop;
2212	}
2213
2214	skb->dev = dev;
2215	skb_clear_tstamp(skb);
2216
2217	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2218	skb = skb_expand_head(skb, headroom: hh_len);
2219	if (!skb)
2220	return -ENOMEM;
2221	}
2222
2223	rcu_read_lock();
2224	if (!nh) {
2225	dst = skb_dst(skb);
2226	nexthop = rt6_nexthop(dst_rt6_info(dst),
2227	daddr: &ipv6_hdr(skb)->daddr);
2228	} else {
2229	nexthop = &nh->ipv6_nh;
2230	}
2231	neigh = ip_neigh_gw6(dev, addr: nexthop);
2232	if (likely(!IS_ERR(neigh))) {
2233	int ret;
2234
2235	sock_confirm_neigh(skb, n: neigh);
2236	local_bh_disable();
2237	dev_xmit_recursion_inc();
2238	ret = neigh_output(n: neigh, skb, skip_cache: false);
2239	dev_xmit_recursion_dec();
2240	local_bh_enable();
2241	rcu_read_unlock();
2242	return ret;
2243	}
2244	rcu_read_unlock();
2245	if (dst)
2246	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2247	out_drop:
2248	kfree_skb(skb);
2249	return -ENETDOWN;
2250	}
2251
2252	static int __bpf_redirect_neigh_v6(struct sk_buff skb, struct* net_device *dev,
2253	struct bpf_nh_params *nh)
2254	{
2255	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2256	struct net *net = dev_net(dev);
2257	int err, ret = NET_XMIT_DROP;
2258
2259	if (!nh) {
2260	struct dst_entry *dst;
2261	struct flowi6 fl6 = {
2262	.flowi6_flags = FLOWI_FLAG_ANYSRC,
2263	.flowi6_mark = skb->mark,
2264	.flowlabel = ip6_flowinfo(hdr: ip6h),
2265	.flowi6_oif = dev->ifindex,
2266	.flowi6_proto = ip6h->nexthdr,
2267	.daddr = ip6h->daddr,
2268	.saddr = ip6h->saddr,
2269	};
2270
2271	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2272	if (IS_ERR(ptr: dst))
2273	goto out_drop;
2274
2275	skb_dst_set(skb, dst);
2276	} else if (nh->nh_family != AF_INET6) {
2277	goto out_drop;
2278	}
2279
2280	err = bpf_out_neigh_v6(net, skb, dev, nh);
2281	if (unlikely(net_xmit_eval(err)))
2282	DEV_STATS_INC(dev, tx_errors);
2283	else
2284	ret = NET_XMIT_SUCCESS;
2285	goto out_xmit;
2286	out_drop:
2287	DEV_STATS_INC(dev, tx_errors);
2288	kfree_skb(skb);
2289	out_xmit:
2290	return ret;
2291	}
2292	#else
2293	static int __bpf_redirect_neigh_v6(struct sk_buff skb, struct* net_device *dev,
2294	struct bpf_nh_params *nh)
2295	{
2296	kfree_skb(skb);
2297	return NET_XMIT_DROP;
2298	}
2299	#endif /* CONFIG_IPV6 */
2300
2301	#if IS_ENABLED(CONFIG_INET)
2302	static int bpf_out_neigh_v4(struct net net, struct* sk_buff *skb,
2303	struct net_device dev, struct* bpf_nh_params *nh)
2304	{
2305	u32 hh_len = LL_RESERVED_SPACE(dev);
2306	struct neighbour *neigh;
2307	bool is_v6gw = false;
2308
2309	if (dev_xmit_recursion()) {
2310	net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2311	goto out_drop;
2312	}
2313
2314	skb->dev = dev;
2315	skb_clear_tstamp(skb);
2316
2317	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2318	skb = skb_expand_head(skb, headroom: hh_len);
2319	if (!skb)
2320	return -ENOMEM;
2321	}
2322
2323	rcu_read_lock();
2324	if (!nh) {
2325	struct rtable *rt = skb_rtable(skb);
2326
2327	neigh = ip_neigh_for_gw(rt, skb, is_v6gw: &is_v6gw);
2328	} else if (nh->nh_family == AF_INET6) {
2329	neigh = ip_neigh_gw6(dev, addr: &nh->ipv6_nh);
2330	is_v6gw = true;
2331	} else if (nh->nh_family == AF_INET) {
2332	neigh = ip_neigh_gw4(dev, daddr: nh->ipv4_nh);
2333	} else {
2334	rcu_read_unlock();
2335	goto out_drop;
2336	}
2337
2338	if (likely(!IS_ERR(neigh))) {
2339	int ret;
2340
2341	sock_confirm_neigh(skb, n: neigh);
2342	local_bh_disable();
2343	dev_xmit_recursion_inc();
2344	ret = neigh_output(n: neigh, skb, skip_cache: is_v6gw);
2345	dev_xmit_recursion_dec();
2346	local_bh_enable();
2347	rcu_read_unlock();
2348	return ret;
2349	}
2350	rcu_read_unlock();
2351	out_drop:
2352	kfree_skb(skb);
2353	return -ENETDOWN;
2354	}
2355
2356	static int __bpf_redirect_neigh_v4(struct sk_buff skb, struct* net_device *dev,
2357	struct bpf_nh_params *nh)
2358	{
2359	const struct iphdr *ip4h = ip_hdr(skb);
2360	struct net *net = dev_net(dev);
2361	int err, ret = NET_XMIT_DROP;
2362
2363	if (!nh) {
2364	struct flowi4 fl4 = {
2365	.flowi4_flags = FLOWI_FLAG_ANYSRC,
2366	.flowi4_mark = skb->mark,
2367	.flowi4_tos = inet_dscp_to_dsfield(dscp: ip4h_dscp(ip4h)),
2368	.flowi4_oif = dev->ifindex,
2369	.flowi4_proto = ip4h->protocol,
2370	.daddr = ip4h->daddr,
2371	.saddr = ip4h->saddr,
2372	};
2373	struct rtable *rt;
2374
2375	rt = ip_route_output_flow(net, flp: &fl4, NULL);
2376	if (IS_ERR(ptr: rt))
2377	goto out_drop;
2378	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2379	ip_rt_put(rt);
2380	goto out_drop;
2381	}
2382
2383	skb_dst_set(skb, dst: &rt->dst);
2384	}
2385
2386	err = bpf_out_neigh_v4(net, skb, dev, nh);
2387	if (unlikely(net_xmit_eval(err)))
2388	DEV_STATS_INC(dev, tx_errors);
2389	else
2390	ret = NET_XMIT_SUCCESS;
2391	goto out_xmit;
2392	out_drop:
2393	DEV_STATS_INC(dev, tx_errors);
2394	kfree_skb(skb);
2395	out_xmit:
2396	return ret;
2397	}
2398	#else
2399	static int __bpf_redirect_neigh_v4(struct sk_buff skb, struct* net_device *dev,
2400	struct bpf_nh_params *nh)
2401	{
2402	kfree_skb(skb);
2403	return NET_XMIT_DROP;
2404	}
2405	#endif /* CONFIG_INET */
2406
2407	static int __bpf_redirect_neigh(struct sk_buff skb, struct* net_device *dev,
2408	struct bpf_nh_params *nh)
2409	{
2410	struct ethhdr *ethh = eth_hdr(skb);
2411
2412	if (unlikely(skb->mac_header >= skb->network_header))
2413	goto out;
2414	bpf_push_mac_rcsum(skb);
2415	if (is_multicast_ether_addr(addr: ethh->h_dest))
2416	goto out;
2417
2418	skb_pull(skb, len: sizeof(*ethh));
2419	skb_unset_mac_header(skb);
2420	skb_reset_network_header(skb);
2421
2422	if (skb->protocol == htons(ETH_P_IP))
2423	return __bpf_redirect_neigh_v4(skb, dev, nh);
2424	else if (skb->protocol == htons(ETH_P_IPV6))
2425	return __bpf_redirect_neigh_v6(skb, dev, nh);
2426	out:
2427	kfree_skb(skb);
2428	return -ENOTSUPP;
2429	}
2430
2431	/ Internal, non-exposed redirect flags. /
2432	enum {
2433	BPF_F_NEIGH = (`1ULL` << `16`),
2434	BPF_F_PEER = (`1ULL` << `17`),
2435	BPF_F_NEXTHOP = (`1ULL` << `18`),
2436	#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH \| BPF_F_PEER \| BPF_F_NEXTHOP)
2437	};
2438
2439	BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2440	{
2441	struct net_device *dev;
2442	struct sk_buff *clone;
2443	int ret;
2444
2445	BUILD_BUG_ON(BPF_F_REDIRECT_INTERNAL & BPF_F_REDIRECT_FLAGS);
2446
2447	if (unlikely(flags & (~(BPF_F_INGRESS) \| BPF_F_REDIRECT_INTERNAL)))
2448	return -EINVAL;
2449
2450	dev = dev_get_by_index_rcu(net: dev_net(dev: skb->dev), ifindex);
2451	if (unlikely(!dev))
2452	return -EINVAL;
2453
2454	clone = skb_clone(skb, GFP_ATOMIC);
2455	if (unlikely(!clone))
2456	return -ENOMEM;
2457
2458	/ For direct write, we need to keep the invariant that the skbs*
2459	* we're dealing with need to be uncloned. Should uncloning fail
2460	* here, we need to free the just generated clone to unclone once
2461	* again.
2462	*/
2463	ret = bpf_try_make_head_writable(skb);
2464	if (unlikely(ret)) {
2465	kfree_skb(skb: clone);
2466	return -ENOMEM;
2467	}
2468
2469	return __bpf_redirect(skb: clone, dev, flags);
2470	}
2471
2472	static const struct bpf_func_proto bpf_clone_redirect_proto = {
2473	.func = bpf_clone_redirect,
2474	.gpl_only = false,
2475	.ret_type = RET_INTEGER,
2476	.arg1_type = ARG_PTR_TO_CTX,
2477	.arg2_type = ARG_ANYTHING,
2478	.arg3_type = ARG_ANYTHING,
2479	};
2480
2481	static struct net_device skb_get_peer_dev(struct* net_device *dev)
2482	{
2483	const struct net_device_ops *ops = dev->netdev_ops;
2484
2485	if (likely(ops->ndo_get_peer_dev))
2486	return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
2487	netkit_peer_dev, dev);
2488	return NULL;
2489	}
2490
2491	int skb_do_redirect(struct sk_buff *skb)
2492	{
2493	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
2494	struct net *net = dev_net(dev: skb->dev);
2495	struct net_device *dev;
2496	u32 flags = ri->flags;
2497
2498	dev = dev_get_by_index_rcu(net, ifindex: ri->tgt_index);
2499	ri->tgt_index = `0`;
2500	ri->flags = `0`;
2501	if (unlikely(!dev))
2502	goto out_drop;
2503	if (flags & BPF_F_PEER) {
2504	if (unlikely(!skb_at_tc_ingress(skb)))
2505	goto out_drop;
2506	dev = skb_get_peer_dev(dev);
2507	if (unlikely(!dev \|\|
2508	!(dev->flags & IFF_UP) \|\|
2509	net_eq(net, dev_net(dev))))
2510	goto out_drop;
2511	skb->dev = dev;
2512	dev_sw_netstats_rx_add(dev, len: skb->len);
2513	skb_scrub_packet(skb, xnet: false);
2514	return -EAGAIN;
2515	}
2516	return flags & BPF_F_NEIGH ?
2517	__bpf_redirect_neigh(skb, dev, nh: flags & BPF_F_NEXTHOP ?
2518	&ri->nh : NULL) :
2519	__bpf_redirect(skb, dev, flags);
2520	out_drop:
2521	kfree_skb(skb);
2522	return -EINVAL;
2523	}
2524
2525	BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2526	{
2527	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
2528
2529	if (unlikely(flags & (~(BPF_F_INGRESS) \| BPF_F_REDIRECT_INTERNAL)))
2530	return TC_ACT_SHOT;
2531
2532	ri->flags = flags;
2533	ri->tgt_index = ifindex;
2534
2535	return TC_ACT_REDIRECT;
2536	}
2537
2538	static const struct bpf_func_proto bpf_redirect_proto = {
2539	.func = bpf_redirect,
2540	.gpl_only = false,
2541	.ret_type = RET_INTEGER,
2542	.arg1_type = ARG_ANYTHING,
2543	.arg2_type = ARG_ANYTHING,
2544	};
2545
2546	BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2547	{
2548	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
2549
2550	if (unlikely(flags))
2551	return TC_ACT_SHOT;
2552
2553	ri->flags = BPF_F_PEER;
2554	ri->tgt_index = ifindex;
2555
2556	return TC_ACT_REDIRECT;
2557	}
2558
2559	static const struct bpf_func_proto bpf_redirect_peer_proto = {
2560	.func = bpf_redirect_peer,
2561	.gpl_only = false,
2562	.ret_type = RET_INTEGER,
2563	.arg1_type = ARG_ANYTHING,
2564	.arg2_type = ARG_ANYTHING,
2565	};
2566
2567	BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2568	int, plen, u64, flags)
2569	{
2570	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
2571
2572	if (unlikely((plen && plen < sizeof(*params)) \|\| flags))
2573	return TC_ACT_SHOT;
2574
2575	ri->flags = BPF_F_NEIGH \| (plen ? BPF_F_NEXTHOP : `0`);
2576	ri->tgt_index = ifindex;
2577
2578	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2579	if (plen)
2580	memcpy(&ri->nh, params, sizeof(ri->nh));
2581
2582	return TC_ACT_REDIRECT;
2583	}
2584
2585	static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2586	.func = bpf_redirect_neigh,
2587	.gpl_only = false,
2588	.ret_type = RET_INTEGER,
2589	.arg1_type = ARG_ANYTHING,
2590	.arg2_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
2591	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
2592	.arg4_type = ARG_ANYTHING,
2593	};
2594
2595	BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2596	{
2597	msg->apply_bytes = bytes;
2598	return `0`;
2599	}
2600
2601	static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2602	.func = bpf_msg_apply_bytes,
2603	.gpl_only = false,
2604	.ret_type = RET_INTEGER,
2605	.arg1_type = ARG_PTR_TO_CTX,
2606	.arg2_type = ARG_ANYTHING,
2607	};
2608
2609	BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2610	{
2611	msg->cork_bytes = bytes;
2612	return `0`;
2613	}
2614
2615	static void sk_msg_reset_curr(struct sk_msg *msg)
2616	{
2617	if (!msg->sg.size) {
2618	msg->sg.curr = msg->sg.start;
2619	msg->sg.copybreak = `0`;
2620	} else {
2621	u32 i = msg->sg.end;
2622
2623	sk_msg_iter_var_prev(i);
2624	msg->sg.curr = i;
2625	msg->sg.copybreak = msg->sg.data[i].length;
2626	}
2627	}
2628
2629	static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2630	.func = bpf_msg_cork_bytes,
2631	.gpl_only = false,
2632	.ret_type = RET_INTEGER,
2633	.arg1_type = ARG_PTR_TO_CTX,
2634	.arg2_type = ARG_ANYTHING,
2635	};
2636
2637	BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2638	u32, end, u64, flags)
2639	{
2640	u32 len = `0`, offset = `0`, copy = `0`, poffset = `0`, bytes = end - start;
2641	u32 first_sge, last_sge, i, shift, bytes_sg_total;
2642	struct scatterlist *sge;
2643	u8 raw, to, *from;
2644	struct page *page;
2645
2646	if (unlikely(flags \|\| end <= start))
2647	return -EINVAL;
2648
2649	/ First find the starting scatterlist element /
2650	i = msg->sg.start;
2651	do {
2652	offset += len;
2653	len = sk_msg_elem(msg, which: i)->length;
2654	if (start < offset + len)
2655	break;
2656	sk_msg_iter_var_next(i);
2657	} while (i != msg->sg.end);
2658
2659	if (unlikely(start >= offset + len))
2660	return -EINVAL;
2661
2662	first_sge = i;
2663	/ The start may point into the sg element so we need to also*
2664	* account for the headroom.
2665	*/
2666	bytes_sg_total = start - offset + bytes;
2667	if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
2668	goto out;
2669
2670	/ At this point we need to linearize multiple scatterlist*
2671	* elements or a single shared page. Either way we need to
2672	* copy into a linear buffer exclusively owned by BPF. Then
2673	* place the buffer in the scatterlist and fixup the original
2674	* entries by removing the entries now in the linear buffer
2675	* and shifting the remaining entries. For now we do not try
2676	* to copy partial entries to avoid complexity of running out
2677	* of sg_entry slots. The downside is reading a single byte
2678	* will copy the entire sg entry.
2679	*/
2680	do {
2681	copy += sk_msg_elem(msg, which: i)->length;
2682	sk_msg_iter_var_next(i);
2683	if (bytes_sg_total <= copy)
2684	break;
2685	} while (i != msg->sg.end);
2686	last_sge = i;
2687
2688	if (unlikely(bytes_sg_total > copy))
2689	return -EINVAL;
2690
2691	page = alloc_pages(__GFP_NOWARN \| GFP_ATOMIC \| __GFP_COMP,
2692	get_order(copy));
2693	if (unlikely(!page))
2694	return -ENOMEM;
2695
2696	raw = page_address(page);
2697	i = first_sge;
2698	do {
2699	sge = sk_msg_elem(msg, which: i);
2700	from = sg_virt(sg: sge);
2701	len = sge->length;
2702	to = raw + poffset;
2703
2704	memcpy(to, from, len);
2705	poffset += len;
2706	sge->length = `0`;
2707	put_page(page: sg_page(sg: sge));
2708
2709	sk_msg_iter_var_next(i);
2710	} while (i != last_sge);
2711
2712	sg_set_page(sg: &msg->sg.data[first_sge], page, len: copy, offset: `0`);
2713
2714	/ To repair sg ring we need to shift entries. If we only*
2715	* had a single entry though we can just replace it and
2716	* be done. Otherwise walk the ring and shift the entries.
2717	*/
2718	WARN_ON_ONCE(last_sge == first_sge);
2719	shift = last_sge > first_sge ?
2720	last_sge - first_sge - `1` :
2721	NR_MSG_FRAG_IDS - first_sge + last_sge - `1`;
2722	if (!shift)
2723	goto out;
2724
2725	i = first_sge;
2726	sk_msg_iter_var_next(i);
2727	do {
2728	u32 move_from;
2729
2730	if (i + shift >= NR_MSG_FRAG_IDS)
2731	move_from = i + shift - NR_MSG_FRAG_IDS;
2732	else
2733	move_from = i + shift;
2734	if (move_from == msg->sg.end)
2735	break;
2736
2737	msg->sg.data[i] = msg->sg.data[move_from];
2738	msg->sg.data[move_from].length = `0`;
2739	msg->sg.data[move_from].page_link = `0`;
2740	msg->sg.data[move_from].offset = `0`;
2741	sk_msg_iter_var_next(i);
2742	} while (`1`);
2743
2744	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2745	msg->sg.end - shift + NR_MSG_FRAG_IDS :
2746	msg->sg.end - shift;
2747	out:
2748	sk_msg_reset_curr(msg);
2749	msg->data = sg_virt(sg: &msg->sg.data[first_sge]) + start - offset;
2750	msg->data_end = msg->data + bytes;
2751	return `0`;
2752	}
2753
2754	static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2755	.func = bpf_msg_pull_data,
2756	.gpl_only = false,
2757	.ret_type = RET_INTEGER,
2758	.arg1_type = ARG_PTR_TO_CTX,
2759	.arg2_type = ARG_ANYTHING,
2760	.arg3_type = ARG_ANYTHING,
2761	.arg4_type = ARG_ANYTHING,
2762	};
2763
2764	BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2765	u32, len, u64, flags)
2766	{
2767	struct scatterlist sge, nsge, nnsge, rsge = {`0`}, *psge;
2768	u32 new, i = `0`, l = `0`, space, copy = `0`, offset = `0`;
2769	u8 raw, to, *from;
2770	struct page *page;
2771
2772	if (unlikely(flags))
2773	return -EINVAL;
2774
2775	if (unlikely(len == `0`))
2776	return `0`;
2777
2778	/ First find the starting scatterlist element /
2779	i = msg->sg.start;
2780	do {
2781	offset += l;
2782	l = sk_msg_elem(msg, which: i)->length;
2783
2784	if (start < offset + l)
2785	break;
2786	sk_msg_iter_var_next(i);
2787	} while (i != msg->sg.end);
2788
2789	if (start > offset + l)
2790	return -EINVAL;
2791
2792	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2793
2794	/ If no space available will fallback to copy, we need at*
2795	* least one scatterlist elem available to push data into
2796	* when start aligns to the beginning of an element or two
2797	* when it falls inside an element. We handle the start equals
2798	* offset case because its the common case for inserting a
2799	* header.
2800	*/
2801	if (!space \|\| (space == `1` && start != offset))
2802	copy = msg->sg.data[i].length;
2803
2804	page = alloc_pages(__GFP_NOWARN \| GFP_ATOMIC \| __GFP_COMP,
2805	get_order(copy + len));
2806	if (unlikely(!page))
2807	return -ENOMEM;
2808
2809	if (copy) {
2810	int front, back;
2811
2812	raw = page_address(page);
2813
2814	if (i == msg->sg.end)
2815	sk_msg_iter_var_prev(i);
2816	psge = sk_msg_elem(msg, which: i);
2817	front = start - offset;
2818	back = psge->length - front;
2819	from = sg_virt(sg: psge);
2820
2821	if (front)
2822	memcpy(raw, from, front);
2823
2824	if (back) {
2825	from += front;
2826	to = raw + front + len;
2827
2828	memcpy(to, from, back);
2829	}
2830
2831	put_page(page: sg_page(sg: psge));
2832	new = i;
2833	goto place_new;
2834	}
2835
2836	if (start - offset) {
2837	if (i == msg->sg.end)
2838	sk_msg_iter_var_prev(i);
2839	psge = sk_msg_elem(msg, which: i);
2840	rsge = sk_msg_elem_cpy(msg, which: i);
2841
2842	psge->length = start - offset;
2843	rsge.length -= psge->length;
2844	rsge.offset += start;
2845
2846	sk_msg_iter_var_next(i);
2847	sg_unmark_end(sg: psge);
2848	sg_unmark_end(sg: &rsge);
2849	}
2850
2851	/ Slot(s) to place newly allocated data /
2852	sk_msg_iter_next(msg, end);
2853	new = i;
2854	sk_msg_iter_var_next(i);
2855
2856	if (i == msg->sg.end) {
2857	if (!rsge.length)
2858	goto place_new;
2859	sk_msg_iter_next(msg, end);
2860	goto place_new;
2861	}
2862
2863	/ Shift one or two slots as needed /
2864	sge = sk_msg_elem_cpy(msg, which: new);
2865	sg_unmark_end(sg: &sge);
2866
2867	nsge = sk_msg_elem_cpy(msg, which: i);
2868	if (rsge.length) {
2869	sk_msg_iter_var_next(i);
2870	nnsge = sk_msg_elem_cpy(msg, which: i);
2871	sk_msg_iter_next(msg, end);
2872	}
2873
2874	while (i != msg->sg.end) {
2875	msg->sg.data[i] = sge;
2876	sge = nsge;
2877	sk_msg_iter_var_next(i);
2878	if (rsge.length) {
2879	nsge = nnsge;
2880	nnsge = sk_msg_elem_cpy(msg, which: i);
2881	} else {
2882	nsge = sk_msg_elem_cpy(msg, which: i);
2883	}
2884	}
2885
2886	place_new:
2887	/ Place newly allocated data buffer /
2888	sk_mem_charge(sk: msg->sk, size: len);
2889	msg->sg.size += len;
2890	__clear_bit(new, msg->sg.copy);
2891	sg_set_page(sg: &msg->sg.data[new], page, len: len + copy, offset: `0`);
2892	if (rsge.length) {
2893	get_page(page: sg_page(sg: &rsge));
2894	sk_msg_iter_var_next(new);
2895	msg->sg.data[new] = rsge;
2896	}
2897
2898	sk_msg_reset_curr(msg);
2899	sk_msg_compute_data_pointers(msg);
2900	return `0`;
2901	}
2902
2903	static const struct bpf_func_proto bpf_msg_push_data_proto = {
2904	.func = bpf_msg_push_data,
2905	.gpl_only = false,
2906	.ret_type = RET_INTEGER,
2907	.arg1_type = ARG_PTR_TO_CTX,
2908	.arg2_type = ARG_ANYTHING,
2909	.arg3_type = ARG_ANYTHING,
2910	.arg4_type = ARG_ANYTHING,
2911	};
2912
2913	static void sk_msg_shift_left(struct sk_msg msg, int* i)
2914	{
2915	struct scatterlist *sge = sk_msg_elem(msg, which: i);
2916	int prev;
2917
2918	put_page(page: sg_page(sg: sge));
2919	do {
2920	prev = i;
2921	sk_msg_iter_var_next(i);
2922	msg->sg.data[prev] = msg->sg.data[i];
2923	} while (i != msg->sg.end);
2924
2925	sk_msg_iter_prev(msg, end);
2926	}
2927
2928	static void sk_msg_shift_right(struct sk_msg msg, int* i)
2929	{
2930	struct scatterlist tmp, sge;
2931
2932	sk_msg_iter_next(msg, end);
2933	sge = sk_msg_elem_cpy(msg, which: i);
2934	sk_msg_iter_var_next(i);
2935	tmp = sk_msg_elem_cpy(msg, which: i);
2936
2937	while (i != msg->sg.end) {
2938	msg->sg.data[i] = sge;
2939	sk_msg_iter_var_next(i);
2940	sge = tmp;
2941	tmp = sk_msg_elem_cpy(msg, which: i);
2942	}
2943	}
2944
2945	BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2946	u32, len, u64, flags)
2947	{
2948	u32 i = `0`, l = `0`, space, offset = `0`;
2949	u64 last = start + len;
2950	int pop;
2951
2952	if (unlikely(flags))
2953	return -EINVAL;
2954
2955	if (unlikely(len == `0`))
2956	return `0`;
2957
2958	/ First find the starting scatterlist element /
2959	i = msg->sg.start;
2960	do {
2961	offset += l;
2962	l = sk_msg_elem(msg, which: i)->length;
2963
2964	if (start < offset + l)
2965	break;
2966	sk_msg_iter_var_next(i);
2967	} while (i != msg->sg.end);
2968
2969	/ Bounds checks: start and pop must be inside message /
2970	if (start >= offset + l \|\| last > msg->sg.size)
2971	return -EINVAL;
2972
2973	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2974
2975	pop = len;
2976	/ --------------\| offset*
2977	* -\| start \|-------- len -------\|
2978	*
2979	* \|----- a ----\|-------- pop -------\|----- b ----\|
2980	* \|______________________________________________\| length
2981	*
2982	*
2983	* a: region at front of scatter element to save
2984	* b: region at back of scatter element to save when length > A + pop
2985	* pop: region to pop from element, same as input 'pop' here will be
2986	* decremented below per iteration.
2987	*
2988	* Two top-level cases to handle when start != offset, first B is non
2989	* zero and second B is zero corresponding to when a pop includes more
2990	* than one element.
2991	*
2992	* Then if B is non-zero AND there is no space allocate space and
2993	* compact A, B regions into page. If there is space shift ring to
2994	* the right free'ing the next element in ring to place B, leaving
2995	* A untouched except to reduce length.
2996	*/
2997	if (start != offset) {
2998	struct scatterlist nsge, sge = sk_msg_elem(msg, which: i);
2999	int a = start - offset;
3000	int b = sge->length - pop - a;
3001
3002	sk_msg_iter_var_next(i);
3003
3004	if (b > `0`) {
3005	if (space) {
3006	sge->length = a;
3007	sk_msg_shift_right(msg, i);
3008	nsge = sk_msg_elem(msg, which: i);
3009	get_page(page: sg_page(sg: sge));
3010	sg_set_page(sg: nsge,
3011	page: sg_page(sg: sge),
3012	len: b, offset: sge->offset + pop + a);
3013	} else {
3014	struct page page, orig;
3015	u8 to, from;
3016
3017	page = alloc_pages(__GFP_NOWARN \|
3018	__GFP_COMP \| GFP_ATOMIC,
3019	get_order(a + b));
3020	if (unlikely(!page))
3021	return -ENOMEM;
3022
3023	orig = sg_page(sg: sge);
3024	from = sg_virt(sg: sge);
3025	to = page_address(page);
3026	memcpy(to, from, a);
3027	memcpy(to + a, from + a + pop, b);
3028	sg_set_page(sg: sge, page, len: a + b, offset: `0`);
3029	put_page(page: orig);
3030	}
3031	pop = `0`;
3032	} else {
3033	pop -= (sge->length - a);
3034	sge->length = a;
3035	}
3036	}
3037
3038	/ From above the current layout _must_ be as follows,*
3039	*
3040	* -\| offset
3041	* -\| start
3042	*
3043	* \|---- pop ---\|---------------- b ------------\|
3044	* \|____________________________________________\| length
3045	*
3046	* Offset and start of the current msg elem are equal because in the
3047	* previous case we handled offset != start and either consumed the
3048	* entire element and advanced to the next element OR pop == 0.
3049	*
3050	* Two cases to handle here are first pop is less than the length
3051	* leaving some remainder b above. Simply adjust the element's layout
3052	* in this case. Or pop >= length of the element so that b = 0. In this
3053	* case advance to next element decrementing pop.
3054	*/
3055	while (pop) {
3056	struct scatterlist *sge = sk_msg_elem(msg, which: i);
3057
3058	if (pop < sge->length) {
3059	sge->length -= pop;
3060	sge->offset += pop;
3061	pop = `0`;
3062	} else {
3063	pop -= sge->length;
3064	sk_msg_shift_left(msg, i);
3065	}
3066	}
3067
3068	sk_mem_uncharge(sk: msg->sk, size: len - pop);
3069	msg->sg.size -= (len - pop);
3070	sk_msg_reset_curr(msg);
3071	sk_msg_compute_data_pointers(msg);
3072	return `0`;
3073	}
3074
3075	static const struct bpf_func_proto bpf_msg_pop_data_proto = {
3076	.func = bpf_msg_pop_data,
3077	.gpl_only = false,
3078	.ret_type = RET_INTEGER,
3079	.arg1_type = ARG_PTR_TO_CTX,
3080	.arg2_type = ARG_ANYTHING,
3081	.arg3_type = ARG_ANYTHING,
3082	.arg4_type = ARG_ANYTHING,
3083	};
3084
3085	#ifdef CONFIG_CGROUP_NET_CLASSID
3086	BPF_CALL_0(bpf_get_cgroup_classid_curr)
3087	{
3088	return __task_get_classid(current);
3089	}
3090
3091	const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
3092	.func = bpf_get_cgroup_classid_curr,
3093	.gpl_only = false,
3094	.ret_type = RET_INTEGER,
3095	};
3096
3097	BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3098	{
3099	struct sock *sk = skb_to_full_sk(skb);
3100
3101	if (!sk \|\| !sk_fullsock(sk))
3102	return `0`;
3103
3104	return sock_cgroup_classid(skcd: &sk->sk_cgrp_data);
3105	}
3106
3107	static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3108	.func = bpf_skb_cgroup_classid,
3109	.gpl_only = false,
3110	.ret_type = RET_INTEGER,
3111	.arg1_type = ARG_PTR_TO_CTX,
3112	};
3113	#endif
3114
3115	BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
3116	{
3117	return task_get_classid(skb);
3118	}
3119
3120	static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3121	.func = bpf_get_cgroup_classid,
3122	.gpl_only = false,
3123	.ret_type = RET_INTEGER,
3124	.arg1_type = ARG_PTR_TO_CTX,
3125	};
3126
3127	BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
3128	{
3129	return dst_tclassid(skb);
3130	}
3131
3132	static const struct bpf_func_proto bpf_get_route_realm_proto = {
3133	.func = bpf_get_route_realm,
3134	.gpl_only = false,
3135	.ret_type = RET_INTEGER,
3136	.arg1_type = ARG_PTR_TO_CTX,
3137	};
3138
3139	BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
3140	{
3141	/ If skb_clear_hash() was called due to mangling, we can*
3142	* trigger SW recalculation here. Later access to hash
3143	* can then use the inline skb->hash via context directly
3144	* instead of calling this helper again.
3145	*/
3146	return skb_get_hash(skb);
3147	}
3148
3149	static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3150	.func = bpf_get_hash_recalc,
3151	.gpl_only = false,
3152	.ret_type = RET_INTEGER,
3153	.arg1_type = ARG_PTR_TO_CTX,
3154	};
3155
3156	BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3157	{
3158	/ After all direct packet write, this can be used once for*
3159	* triggering a lazy recalc on next skb_get_hash() invocation.
3160	*/
3161	skb_clear_hash(skb);
3162	return `0`;
3163	}
3164
3165	static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3166	.func = bpf_set_hash_invalid,
3167	.gpl_only = false,
3168	.ret_type = RET_INTEGER,
3169	.arg1_type = ARG_PTR_TO_CTX,
3170	};
3171
3172	BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3173	{
3174	/ Set user specified hash as L4(+), so that it gets returned*
3175	* on skb_get_hash() call unless BPF prog later on triggers a
3176	* skb_clear_hash().
3177	*/
3178	__skb_set_sw_hash(skb, hash, is_l4: true);
3179	return `0`;
3180	}
3181
3182	static const struct bpf_func_proto bpf_set_hash_proto = {
3183	.func = bpf_set_hash,
3184	.gpl_only = false,
3185	.ret_type = RET_INTEGER,
3186	.arg1_type = ARG_PTR_TO_CTX,
3187	.arg2_type = ARG_ANYTHING,
3188	};
3189
3190	BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3191	u16, vlan_tci)
3192	{
3193	int ret;
3194
3195	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3196	vlan_proto != htons(ETH_P_8021AD)))
3197	vlan_proto = htons(ETH_P_8021Q);
3198
3199	bpf_push_mac_rcsum(skb);
3200	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
3201	bpf_pull_mac_rcsum(skb);
3202	skb_reset_mac_len(skb);
3203
3204	bpf_compute_data_pointers(skb);
3205	return ret;
3206	}
3207
3208	static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
3209	.func = bpf_skb_vlan_push,
3210	.gpl_only = false,
3211	.ret_type = RET_INTEGER,
3212	.arg1_type = ARG_PTR_TO_CTX,
3213	.arg2_type = ARG_ANYTHING,
3214	.arg3_type = ARG_ANYTHING,
3215	};
3216
3217	BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
3218	{
3219	int ret;
3220
3221	bpf_push_mac_rcsum(skb);
3222	ret = skb_vlan_pop(skb);
3223	bpf_pull_mac_rcsum(skb);
3224
3225	bpf_compute_data_pointers(skb);
3226	return ret;
3227	}
3228
3229	static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
3230	.func = bpf_skb_vlan_pop,
3231	.gpl_only = false,
3232	.ret_type = RET_INTEGER,
3233	.arg1_type = ARG_PTR_TO_CTX,
3234	};
3235
3236	static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3237	{
3238	/ Caller already did skb_cow() with len as headroom,*
3239	* so no need to do it here.
3240	*/
3241	skb_push(skb, len);
3242	memmove(skb->data, skb->data + len, off);
3243	memset(skb->data + off, `0`, len);
3244
3245	/ No skb_postpush_rcsum(skb, skb->data + off, len)*
3246	* needed here as it does not change the skb->csum
3247	* result for checksum complete when summing over
3248	* zeroed blocks.
3249	*/
3250	return `0`;
3251	}
3252
3253	static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3254	{
3255	void *old_data;
3256
3257	/ skb_ensure_writable() is not needed here, as we're*
3258	* already working on an uncloned skb.
3259	*/
3260	if (unlikely(!pskb_may_pull(skb, off + len)))
3261	return -ENOMEM;
3262
3263	old_data = skb->data;
3264	__skb_pull(skb, len);
3265	skb_postpull_rcsum(skb, start: old_data + off, len);
3266	memmove(skb->data, old_data, off);
3267
3268	return `0`;
3269	}
3270
3271	static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3272	{
3273	bool trans_same = skb->transport_header == skb->network_header;
3274	int ret;
3275
3276	/ There's no need for __skb_push()/__skb_pull() pair to*
3277	* get to the start of the mac header as we're guaranteed
3278	* to always start from here under eBPF.
3279	*/
3280	ret = bpf_skb_generic_push(skb, off, len);
3281	if (likely(!ret)) {
3282	skb->mac_header -= len;
3283	skb->network_header -= len;
3284	if (trans_same)
3285	skb->transport_header = skb->network_header;
3286	}
3287
3288	return ret;
3289	}
3290
3291	static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3292	{
3293	bool trans_same = skb->transport_header == skb->network_header;
3294	int ret;
3295
3296	/ Same here, __skb_push()/__skb_pull() pair not needed. /
3297	ret = bpf_skb_generic_pop(skb, off, len);
3298	if (likely(!ret)) {
3299	skb->mac_header += len;
3300	skb->network_header += len;
3301	if (trans_same)
3302	skb->transport_header = skb->network_header;
3303	}
3304
3305	return ret;
3306	}
3307
3308	static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3309	{
3310	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3311	u32 off = skb_mac_header_len(skb);
3312	int ret;
3313
3314	ret = skb_cow(skb, headroom: len_diff);
3315	if (unlikely(ret < `0`))
3316	return ret;
3317
3318	ret = bpf_skb_net_hdr_push(skb, off, len: len_diff);
3319	if (unlikely(ret < `0`))
3320	return ret;
3321
3322	if (skb_is_gso(skb)) {
3323	struct skb_shared_info *shinfo = skb_shinfo(skb);
3324
3325	/ SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. /
3326	if (shinfo->gso_type & SKB_GSO_TCPV4) {
3327	shinfo->gso_type &= ~SKB_GSO_TCPV4;
3328	shinfo->gso_type \|= SKB_GSO_TCPV6;
3329	}
3330	}
3331
3332	skb->protocol = htons(ETH_P_IPV6);
3333	skb_clear_hash(skb);
3334
3335	return `0`;
3336	}
3337
3338	static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3339	{
3340	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3341	u32 off = skb_mac_header_len(skb);
3342	int ret;
3343
3344	ret = skb_unclone(skb, GFP_ATOMIC);
3345	if (unlikely(ret < `0`))
3346	return ret;
3347
3348	ret = bpf_skb_net_hdr_pop(skb, off, len: len_diff);
3349	if (unlikely(ret < `0`))
3350	return ret;
3351
3352	if (skb_is_gso(skb)) {
3353	struct skb_shared_info *shinfo = skb_shinfo(skb);
3354
3355	/ SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. /
3356	if (shinfo->gso_type & SKB_GSO_TCPV6) {
3357	shinfo->gso_type &= ~SKB_GSO_TCPV6;
3358	shinfo->gso_type \|= SKB_GSO_TCPV4;
3359	}
3360	}
3361
3362	skb->protocol = htons(ETH_P_IP);
3363	skb_clear_hash(skb);
3364
3365	return `0`;
3366	}
3367
3368	static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3369	{
3370	__be16 from_proto = skb->protocol;
3371
3372	if (from_proto == htons(ETH_P_IP) &&
3373	to_proto == htons(ETH_P_IPV6))
3374	return bpf_skb_proto_4_to_6(skb);
3375
3376	if (from_proto == htons(ETH_P_IPV6) &&
3377	to_proto == htons(ETH_P_IP))
3378	return bpf_skb_proto_6_to_4(skb);
3379
3380	return -ENOTSUPP;
3381	}
3382
3383	BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3384	u64, flags)
3385	{
3386	int ret;
3387
3388	if (unlikely(flags))
3389	return -EINVAL;
3390
3391	/ General idea is that this helper does the basic groundwork*
3392	* needed for changing the protocol, and eBPF program fills the
3393	* rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3394	* and other helpers, rather than passing a raw buffer here.
3395	*
3396	* The rationale is to keep this minimal and without a need to
3397	* deal with raw packet data. F.e. even if we would pass buffers
3398	* here, the program still needs to call the bpf_lX_csum_replace()
3399	* helpers anyway. Plus, this way we keep also separation of
3400	* concerns, since f.e. bpf_skb_store_bytes() should only take
3401	* care of stores.
3402	*
3403	* Currently, additional options and extension header space are
3404	* not supported, but flags register is reserved so we can adapt
3405	* that. For offloads, we mark packet as dodgy, so that headers
3406	* need to be verified first.
3407	*/
3408	ret = bpf_skb_proto_xlat(skb, to_proto: proto);
3409	bpf_compute_data_pointers(skb);
3410	return ret;
3411	}
3412
3413	static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3414	.func = bpf_skb_change_proto,
3415	.gpl_only = false,
3416	.ret_type = RET_INTEGER,
3417	.arg1_type = ARG_PTR_TO_CTX,
3418	.arg2_type = ARG_ANYTHING,
3419	.arg3_type = ARG_ANYTHING,
3420	};
3421
3422	BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3423	{
3424	/ We only allow a restricted subset to be changed for now. /
3425	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) \|\|
3426	!skb_pkt_type_ok(pkt_type)))
3427	return -EINVAL;
3428
3429	skb->pkt_type = pkt_type;
3430	return `0`;
3431	}
3432
3433	static const struct bpf_func_proto bpf_skb_change_type_proto = {
3434	.func = bpf_skb_change_type,
3435	.gpl_only = false,
3436	.ret_type = RET_INTEGER,
3437	.arg1_type = ARG_PTR_TO_CTX,
3438	.arg2_type = ARG_ANYTHING,
3439	};
3440
3441	static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3442	{
3443	switch (skb->protocol) {
3444	case htons(ETH_P_IP):
3445	return sizeof(struct iphdr);
3446	case htons(ETH_P_IPV6):
3447	return sizeof(struct ipv6hdr);
3448	default:
3449	return ~`0U`;
3450	}
3451	}
3452
3453	#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 \| \
3454	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3455
3456	#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 \| \
3457	BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
3458
3459	#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO \| \
3460	BPF_F_ADJ_ROOM_ENCAP_L3_MASK \| \
3461	BPF_F_ADJ_ROOM_ENCAP_L4_GRE \| \
3462	BPF_F_ADJ_ROOM_ENCAP_L4_UDP \| \
3463	BPF_F_ADJ_ROOM_ENCAP_L2_ETH \| \
3464	BPF_F_ADJ_ROOM_ENCAP_L2( \
3465	BPF_ADJ_ROOM_ENCAP_L2_MASK) \| \
3466	BPF_F_ADJ_ROOM_DECAP_L3_MASK)
3467
3468	static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3469	u64 flags)
3470	{
3471	u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3472	bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3473	u16 mac_len = `0`, inner_net = `0`, inner_trans = `0`;
3474	unsigned int gso_type = SKB_GSO_DODGY;
3475	int ret;
3476
3477	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3478	/ udp gso_size delineates datagrams, only allow if fixed /
3479	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) \|\|
3480	!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3481	return -ENOTSUPP;
3482	}
3483
3484	ret = skb_cow_head(skb, headroom: len_diff);
3485	if (unlikely(ret < `0`))
3486	return ret;
3487
3488	if (encap) {
3489	if (skb->protocol != htons(ETH_P_IP) &&
3490	skb->protocol != htons(ETH_P_IPV6))
3491	return -ENOTSUPP;
3492
3493	if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3494	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3495	return -EINVAL;
3496
3497	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3498	flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3499	return -EINVAL;
3500
3501	if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3502	inner_mac_len < ETH_HLEN)
3503	return -EINVAL;
3504
3505	if (skb->encapsulation)
3506	return -EALREADY;
3507
3508	mac_len = skb->network_header - skb->mac_header;
3509	inner_net = skb->network_header;
3510	if (inner_mac_len > len_diff)
3511	return -EINVAL;
3512	inner_trans = skb->transport_header;
3513	}
3514
3515	ret = bpf_skb_net_hdr_push(skb, off, len: len_diff);
3516	if (unlikely(ret < `0`))
3517	return ret;
3518
3519	if (encap) {
3520	skb->inner_mac_header = inner_net - inner_mac_len;
3521	skb->inner_network_header = inner_net;
3522	skb->inner_transport_header = inner_trans;
3523
3524	if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3525	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3526	else
3527	skb_set_inner_protocol(skb, protocol: skb->protocol);
3528
3529	skb->encapsulation = `1`;
3530	skb_set_network_header(skb, offset: mac_len);
3531
3532	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3533	gso_type \|= SKB_GSO_UDP_TUNNEL;
3534	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3535	gso_type \|= SKB_GSO_GRE;
3536	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3537	gso_type \|= SKB_GSO_IPXIP6;
3538	else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3539	gso_type \|= SKB_GSO_IPXIP4;
3540
3541	if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE \|\|
3542	flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3543	int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3544	sizeof(struct ipv6hdr) :
3545	sizeof(struct iphdr);
3546
3547	skb_set_transport_header(skb, offset: mac_len + nh_len);
3548	}
3549
3550	/ Match skb->protocol to new outer l3 protocol /
3551	if (skb->protocol == htons(ETH_P_IP) &&
3552	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3553	skb->protocol = htons(ETH_P_IPV6);
3554	else if (skb->protocol == htons(ETH_P_IPV6) &&
3555	flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3556	skb->protocol = htons(ETH_P_IP);
3557	}
3558
3559	if (skb_is_gso(skb)) {
3560	struct skb_shared_info *shinfo = skb_shinfo(skb);
3561
3562	/ Header must be checked, and gso_segs recomputed. /
3563	shinfo->gso_type \|= gso_type;
3564	shinfo->gso_segs = `0`;
3565
3566	/ Due to header growth, MSS needs to be downgraded.*
3567	* There is a BUG_ON() when segmenting the frag_list with
3568	* head_frag true, so linearize the skb after downgrading
3569	* the MSS.
3570	*/
3571	if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
3572	skb_decrease_gso_size(shinfo, decrement: len_diff);
3573	if (shinfo->frag_list)
3574	return skb_linearize(skb);
3575	}
3576	}
3577
3578	return `0`;
3579	}
3580
3581	static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3582	u64 flags)
3583	{
3584	int ret;
3585
3586	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO \|
3587	BPF_F_ADJ_ROOM_DECAP_L3_MASK \|
3588	BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3589	return -EINVAL;
3590
3591	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3592	/ udp gso_size delineates datagrams, only allow if fixed /
3593	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) \|\|
3594	!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3595	return -ENOTSUPP;
3596	}
3597
3598	ret = skb_unclone(skb, GFP_ATOMIC);
3599	if (unlikely(ret < `0`))
3600	return ret;
3601
3602	ret = bpf_skb_net_hdr_pop(skb, off, len: len_diff);
3603	if (unlikely(ret < `0`))
3604	return ret;
3605
3606	/ Match skb->protocol to new outer l3 protocol /
3607	if (skb->protocol == htons(ETH_P_IP) &&
3608	flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
3609	skb->protocol = htons(ETH_P_IPV6);
3610	else if (skb->protocol == htons(ETH_P_IPV6) &&
3611	flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
3612	skb->protocol = htons(ETH_P_IP);
3613
3614	if (skb_is_gso(skb)) {
3615	struct skb_shared_info *shinfo = skb_shinfo(skb);
3616
3617	/ Due to header shrink, MSS can be upgraded. /
3618	if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3619	skb_increase_gso_size(shinfo, increment: len_diff);
3620
3621	/ Header must be checked, and gso_segs recomputed. /
3622	shinfo->gso_type \|= SKB_GSO_DODGY;
3623	shinfo->gso_segs = `0`;
3624	}
3625
3626	return `0`;
3627	}
3628
3629	#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
3630
3631	BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3632	u32, mode, u64, flags)
3633	{
3634	u32 len_diff_abs = abs(len_diff);
3635	bool shrink = len_diff < `0`;
3636	int ret = `0`;
3637
3638	if (unlikely(flags \|\| mode))
3639	return -EINVAL;
3640	if (unlikely(len_diff_abs > `0xfffU`))
3641	return -EFAULT;
3642
3643	if (!shrink) {
3644	ret = skb_cow(skb, headroom: len_diff);
3645	if (unlikely(ret < `0`))
3646	return ret;
3647	__skb_push(skb, len: len_diff_abs);
3648	memset(skb->data, `0`, len_diff_abs);
3649	} else {
3650	if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3651	return -ENOMEM;
3652	__skb_pull(skb, len: len_diff_abs);
3653	}
3654	if (tls_sw_has_ctx_rx(sk: skb->sk)) {
3655	struct strp_msg *rxm = strp_msg(skb);
3656
3657	rxm->full_len += len_diff;
3658	}
3659	return ret;
3660	}
3661
3662	static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3663	.func = sk_skb_adjust_room,
3664	.gpl_only = false,
3665	.ret_type = RET_INTEGER,
3666	.arg1_type = ARG_PTR_TO_CTX,
3667	.arg2_type = ARG_ANYTHING,
3668	.arg3_type = ARG_ANYTHING,
3669	.arg4_type = ARG_ANYTHING,
3670	};
3671
3672	BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3673	u32, mode, u64, flags)
3674	{
3675	u32 len_cur, len_diff_abs = abs(len_diff);
3676	u32 len_min = bpf_skb_net_base_len(skb);
3677	u32 len_max = BPF_SKB_MAX_LEN;
3678	__be16 proto = skb->protocol;
3679	bool shrink = len_diff < `0`;
3680	u32 off;
3681	int ret;
3682
3683	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK \|
3684	BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3685	return -EINVAL;
3686	if (unlikely(len_diff_abs > `0xfffU`))
3687	return -EFAULT;
3688	if (unlikely(proto != htons(ETH_P_IP) &&
3689	proto != htons(ETH_P_IPV6)))
3690	return -ENOTSUPP;
3691
3692	off = skb_mac_header_len(skb);
3693	switch (mode) {
3694	case BPF_ADJ_ROOM_NET:
3695	off += bpf_skb_net_base_len(skb);
3696	break;
3697	case BPF_ADJ_ROOM_MAC:
3698	break;
3699	default:
3700	return -ENOTSUPP;
3701	}
3702
3703	if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
3704	if (!shrink)
3705	return -EINVAL;
3706
3707	switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
3708	case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
3709	len_min = sizeof(struct iphdr);
3710	break;
3711	case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
3712	len_min = sizeof(struct ipv6hdr);
3713	break;
3714	default:
3715	return -EINVAL;
3716	}
3717	}
3718
3719	len_cur = skb->len - skb_network_offset(skb);
3720	if ((shrink && (len_diff_abs >= len_cur \|\|
3721	len_cur - len_diff_abs < len_min)) \|\|
3722	(!shrink && (skb->len + len_diff_abs > len_max &&
3723	!skb_is_gso(skb))))
3724	return -ENOTSUPP;
3725
3726	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff: len_diff_abs, flags) :
3727	bpf_skb_net_grow(skb, off, len_diff: len_diff_abs, flags);
3728	if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3729	__skb_reset_checksum_unnecessary(skb);
3730
3731	bpf_compute_data_pointers(skb);
3732	return ret;
3733	}
3734
3735	static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3736	.func = bpf_skb_adjust_room,
3737	.gpl_only = false,
3738	.ret_type = RET_INTEGER,
3739	.arg1_type = ARG_PTR_TO_CTX,
3740	.arg2_type = ARG_ANYTHING,
3741	.arg3_type = ARG_ANYTHING,
3742	.arg4_type = ARG_ANYTHING,
3743	};
3744
3745	static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3746	{
3747	int offset = skb_network_offset(skb);
3748	u32 min_len = `0`;
3749
3750	if (offset > `0`)
3751	min_len = offset;
3752	if (skb_transport_header_was_set(skb)) {
3753	offset = skb_transport_offset(skb);
3754	if (offset > `0`)
3755	min_len = offset;
3756	}
3757	if (skb->ip_summed == CHECKSUM_PARTIAL) {
3758	offset = skb_checksum_start_offset(skb) +
3759	skb->csum_offset + sizeof(__sum16);
3760	if (offset > `0`)
3761	min_len = offset;
3762	}
3763	return min_len;
3764	}
3765
3766	static int bpf_skb_grow_rcsum(struct sk_buff skb, unsigned* int new_len)
3767	{
3768	unsigned int old_len = skb->len;
3769	int ret;
3770
3771	ret = __skb_grow_rcsum(skb, len: new_len);
3772	if (!ret)
3773	memset(skb->data + old_len, `0`, new_len - old_len);
3774	return ret;
3775	}
3776
3777	static int bpf_skb_trim_rcsum(struct sk_buff skb, unsigned* int new_len)
3778	{
3779	return __skb_trim_rcsum(skb, len: new_len);
3780	}
3781
3782	static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3783	u64 flags)
3784	{
3785	u32 max_len = BPF_SKB_MAX_LEN;
3786	u32 min_len = __bpf_skb_min_len(skb);
3787	int ret;
3788
3789	if (unlikely(flags \|\| new_len > max_len \|\| new_len < min_len))
3790	return -EINVAL;
3791	if (skb->encapsulation)
3792	return -ENOTSUPP;
3793
3794	/ The basic idea of this helper is that it's performing the*
3795	* needed work to either grow or trim an skb, and eBPF program
3796	* rewrites the rest via helpers like bpf_skb_store_bytes(),
3797	* bpf_lX_csum_replace() and others rather than passing a raw
3798	* buffer here. This one is a slow path helper and intended
3799	* for replies with control messages.
3800	*
3801	* Like in bpf_skb_change_proto(), we want to keep this rather
3802	* minimal and without protocol specifics so that we are able
3803	* to separate concerns as in bpf_skb_store_bytes() should only
3804	* be the one responsible for writing buffers.
3805	*
3806	* It's really expected to be a slow path operation here for
3807	* control message replies, so we're implicitly linearizing,
3808	* uncloning and drop offloads from the skb by this.
3809	*/
3810	ret = __bpf_try_make_writable(skb, write_len: skb->len);
3811	if (!ret) {
3812	if (new_len > skb->len)
3813	ret = bpf_skb_grow_rcsum(skb, new_len);
3814	else if (new_len < skb->len)
3815	ret = bpf_skb_trim_rcsum(skb, new_len);
3816	if (!ret && skb_is_gso(skb))
3817	skb_gso_reset(skb);
3818	}
3819	return ret;
3820	}
3821
3822	BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3823	u64, flags)
3824	{
3825	int ret = __bpf_skb_change_tail(skb, new_len, flags);
3826
3827	bpf_compute_data_pointers(skb);
3828	return ret;
3829	}
3830
3831	static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3832	.func = bpf_skb_change_tail,
3833	.gpl_only = false,
3834	.ret_type = RET_INTEGER,
3835	.arg1_type = ARG_PTR_TO_CTX,
3836	.arg2_type = ARG_ANYTHING,
3837	.arg3_type = ARG_ANYTHING,
3838	};
3839
3840	BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3841	u64, flags)
3842	{
3843	return __bpf_skb_change_tail(skb, new_len, flags);
3844	}
3845
3846	static const struct bpf_func_proto sk_skb_change_tail_proto = {
3847	.func = sk_skb_change_tail,
3848	.gpl_only = false,
3849	.ret_type = RET_INTEGER,
3850	.arg1_type = ARG_PTR_TO_CTX,
3851	.arg2_type = ARG_ANYTHING,
3852	.arg3_type = ARG_ANYTHING,
3853	};
3854
3855	static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3856	u64 flags)
3857	{
3858	u32 max_len = BPF_SKB_MAX_LEN;
3859	u32 new_len = skb->len + head_room;
3860	int ret;
3861
3862	if (unlikely(flags \|\| (!skb_is_gso(skb) && new_len > max_len) \|\|
3863	new_len < skb->len))
3864	return -EINVAL;
3865
3866	ret = skb_cow(skb, headroom: head_room);
3867	if (likely(!ret)) {
3868	/ Idea for this helper is that we currently only*
3869	* allow to expand on mac header. This means that
3870	* skb->protocol network header, etc, stay as is.
3871	* Compared to bpf_skb_change_tail(), we're more
3872	* flexible due to not needing to linearize or
3873	* reset GSO. Intention for this helper is to be
3874	* used by an L3 skb that needs to push mac header
3875	* for redirection into L2 device.
3876	*/
3877	__skb_push(skb, len: head_room);
3878	memset(skb->data, `0`, head_room);
3879	skb_reset_mac_header(skb);
3880	skb_reset_mac_len(skb);
3881	}
3882
3883	return ret;
3884	}
3885
3886	BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3887	u64, flags)
3888	{
3889	int ret = __bpf_skb_change_head(skb, head_room, flags);
3890
3891	bpf_compute_data_pointers(skb);
3892	return ret;
3893	}
3894
3895	static const struct bpf_func_proto bpf_skb_change_head_proto = {
3896	.func = bpf_skb_change_head,
3897	.gpl_only = false,
3898	.ret_type = RET_INTEGER,
3899	.arg1_type = ARG_PTR_TO_CTX,
3900	.arg2_type = ARG_ANYTHING,
3901	.arg3_type = ARG_ANYTHING,
3902	};
3903
3904	BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3905	u64, flags)
3906	{
3907	return __bpf_skb_change_head(skb, head_room, flags);
3908	}
3909
3910	static const struct bpf_func_proto sk_skb_change_head_proto = {
3911	.func = sk_skb_change_head,
3912	.gpl_only = false,
3913	.ret_type = RET_INTEGER,
3914	.arg1_type = ARG_PTR_TO_CTX,
3915	.arg2_type = ARG_ANYTHING,
3916	.arg3_type = ARG_ANYTHING,
3917	};
3918
3919	BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
3920	{
3921	return xdp_get_buff_len(xdp);
3922	}
3923
3924	static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
3925	.func = bpf_xdp_get_buff_len,
3926	.gpl_only = false,
3927	.ret_type = RET_INTEGER,
3928	.arg1_type = ARG_PTR_TO_CTX,
3929	};
3930
3931	BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
3932
3933	const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
3934	.func = bpf_xdp_get_buff_len,
3935	.gpl_only = false,
3936	.arg1_type = ARG_PTR_TO_BTF_ID,
3937	.arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[`0`],
3938	};
3939
3940	static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3941	{
3942	return xdp_data_meta_unsupported(xdp) ? `0` :
3943	xdp->data - xdp->data_meta;
3944	}
3945
3946	BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff , xdp, int*, offset)
3947	{
3948	void xdp_frame_end = xdp->data_hard_start + sizeof(struct* xdp_frame);
3949	unsigned long metalen = xdp_get_metalen(xdp);
3950	void *data_start = xdp_frame_end + metalen;
3951	void *data = xdp->data + offset;
3952
3953	if (unlikely(data < data_start \|\|
3954	data > xdp->data_end - ETH_HLEN))
3955	return -EINVAL;
3956
3957	if (metalen)
3958	memmove(xdp->data_meta + offset,
3959	xdp->data_meta, metalen);
3960	xdp->data_meta += offset;
3961	xdp->data = data;
3962
3963	return `0`;
3964	}
3965
3966	static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3967	.func = bpf_xdp_adjust_head,
3968	.gpl_only = false,
3969	.ret_type = RET_INTEGER,
3970	.arg1_type = ARG_PTR_TO_CTX,
3971	.arg2_type = ARG_ANYTHING,
3972	};
3973
3974	void bpf_xdp_copy_buf(struct xdp_buff xdp, unsigned* long off,
3975	void buf, unsigned* long len, bool flush)
3976	{
3977	unsigned long ptr_len, ptr_off = `0`;
3978	skb_frag_t next_frag, end_frag;
3979	struct skb_shared_info *sinfo;
3980	void src, dst;
3981	u8 *ptr_buf;
3982
3983	if (likely(xdp->data_end - xdp->data >= off + len)) {
3984	src = flush ? buf : xdp->data + off;
3985	dst = flush ? xdp->data + off : buf;
3986	memcpy(dst, src, len);
3987	return;
3988	}
3989
3990	sinfo = xdp_get_shared_info_from_buff(xdp);
3991	end_frag = &sinfo->frags[sinfo->nr_frags];
3992	next_frag = &sinfo->frags[`0`];
3993
3994	ptr_len = xdp->data_end - xdp->data;
3995	ptr_buf = xdp->data;
3996
3997	while (true) {
3998	if (off < ptr_off + ptr_len) {
3999	unsigned long copy_off = off - ptr_off;
4000	unsigned long copy_len = min(len, ptr_len - copy_off);
4001
4002	src = flush ? buf : ptr_buf + copy_off;
4003	dst = flush ? ptr_buf + copy_off : buf;
4004	memcpy(dst, src, copy_len);
4005
4006	off += copy_len;
4007	len -= copy_len;
4008	buf += copy_len;
4009	}
4010
4011	if (!len \|\| next_frag == end_frag)
4012	break;
4013
4014	ptr_off += ptr_len;
4015	ptr_buf = skb_frag_address(frag: next_frag);
4016	ptr_len = skb_frag_size(frag: next_frag);
4017	next_frag++;
4018	}
4019	}
4020
4021	void bpf_xdp_pointer(struct* xdp_buff *xdp, u32 offset, u32 len)
4022	{
4023	u32 size = xdp->data_end - xdp->data;
4024	struct skb_shared_info *sinfo;
4025	void *addr = xdp->data;
4026	int i;
4027
4028	if (unlikely(offset > `0xffff` \|\| len > `0xffff`))
4029	return ERR_PTR(error: -EFAULT);
4030
4031	if (unlikely(offset + len > xdp_get_buff_len(xdp)))
4032	return ERR_PTR(error: -EINVAL);
4033
4034	if (likely(offset < size)) / linear area /
4035	goto out;
4036
4037	sinfo = xdp_get_shared_info_from_buff(xdp);
4038	offset -= size;
4039	for (i = `0`; i < sinfo->nr_frags; i++) { / paged area /
4040	u32 frag_size = skb_frag_size(frag: &sinfo->frags[i]);
4041
4042	if (offset < frag_size) {
4043	addr = skb_frag_address(frag: &sinfo->frags[i]);
4044	size = frag_size;
4045	break;
4046	}
4047	offset -= frag_size;
4048	}
4049	out:
4050	return offset + len <= size ? addr + offset : NULL;
4051	}
4052
4053	BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
4054	void *, buf, u32, len)
4055	{
4056	void *ptr;
4057
4058	ptr = bpf_xdp_pointer(xdp, offset, len);
4059	if (IS_ERR(ptr))
4060	return PTR_ERR(ptr);
4061
4062	if (!ptr)
4063	bpf_xdp_copy_buf(xdp, off: offset, buf, len, flush: false);
4064	else
4065	memcpy(buf, ptr, len);
4066
4067	return `0`;
4068	}
4069
4070	static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
4071	.func = bpf_xdp_load_bytes,
4072	.gpl_only = false,
4073	.ret_type = RET_INTEGER,
4074	.arg1_type = ARG_PTR_TO_CTX,
4075	.arg2_type = ARG_ANYTHING,
4076	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
4077	.arg4_type = ARG_CONST_SIZE,
4078	};
4079
4080	int __bpf_xdp_load_bytes(struct xdp_buff xdp, u32 offset, void* *buf, u32 len)
4081	{
4082	return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
4083	}
4084
4085	BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
4086	void *, buf, u32, len)
4087	{
4088	void *ptr;
4089
4090	ptr = bpf_xdp_pointer(xdp, offset, len);
4091	if (IS_ERR(ptr))
4092	return PTR_ERR(ptr);
4093
4094	if (!ptr)
4095	bpf_xdp_copy_buf(xdp, off: offset, buf, len, flush: true);
4096	else
4097	memcpy(ptr, buf, len);
4098
4099	return `0`;
4100	}
4101
4102	static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
4103	.func = bpf_xdp_store_bytes,
4104	.gpl_only = false,
4105	.ret_type = RET_INTEGER,
4106	.arg1_type = ARG_PTR_TO_CTX,
4107	.arg2_type = ARG_ANYTHING,
4108	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
4109	.arg4_type = ARG_CONST_SIZE,
4110	};
4111
4112	int __bpf_xdp_store_bytes(struct xdp_buff xdp, u32 offset, void* *buf, u32 len)
4113	{
4114	return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
4115	}
4116
4117	static int bpf_xdp_frags_increase_tail(struct xdp_buff xdp, int* offset)
4118	{
4119	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
4120	skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - `1`];
4121	struct xdp_rxq_info *rxq = xdp->rxq;
4122	unsigned int tailroom;
4123
4124	if (!rxq->frag_size \|\| rxq->frag_size > xdp->frame_sz)
4125	return -EOPNOTSUPP;
4126
4127	tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
4128	if (unlikely(offset > tailroom))
4129	return -EINVAL;
4130
4131	memset(skb_frag_address(frag) + skb_frag_size(frag), `0`, offset);
4132	skb_frag_size_add(frag, delta: offset);
4133	sinfo->xdp_frags_size += offset;
4134	if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
4135	xsk_buff_get_tail(first: xdp)->data_end += offset;
4136
4137	return `0`;
4138	}
4139
4140	static void bpf_xdp_shrink_data_zc(struct xdp_buff xdp, int* shrink,
4141	enum xdp_mem_type mem_type, bool release)
4142	{
4143	struct xdp_buff *zc_frag = xsk_buff_get_tail(first: xdp);
4144
4145	if (release) {
4146	xsk_buff_del_tail(tail: zc_frag);
4147	__xdp_return(netmem: `0`, mem_type, napi_direct: false, xdp: zc_frag);
4148	} else {
4149	zc_frag->data_end -= shrink;
4150	}
4151	}
4152
4153	static bool bpf_xdp_shrink_data(struct xdp_buff xdp, skb_frag_t frag,
4154	int shrink)
4155	{
4156	enum xdp_mem_type mem_type = xdp->rxq->mem.type;
4157	bool release = skb_frag_size(frag) == shrink;
4158
4159	if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
4160	bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
4161	goto out;
4162	}
4163
4164	if (release)
4165	__xdp_return(netmem: skb_frag_netmem(frag), mem_type, napi_direct: false, NULL);
4166
4167	out:
4168	return release;
4169	}
4170
4171	static int bpf_xdp_frags_shrink_tail(struct xdp_buff xdp, int* offset)
4172	{
4173	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
4174	int i, n_frags_free = `0`, len_free = `0`;
4175
4176	if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
4177	return -EINVAL;
4178
4179	for (i = sinfo->nr_frags - `1`; i >= `0` && offset > `0`; i--) {
4180	skb_frag_t *frag = &sinfo->frags[i];
4181	int shrink = min_t(int, offset, skb_frag_size(frag));
4182
4183	len_free += shrink;
4184	offset -= shrink;
4185	if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
4186	n_frags_free++;
4187	} else {
4188	skb_frag_size_sub(frag, delta: shrink);
4189	break;
4190	}
4191	}
4192	sinfo->nr_frags -= n_frags_free;
4193	sinfo->xdp_frags_size -= len_free;
4194
4195	if (unlikely(!sinfo->nr_frags)) {
4196	xdp_buff_clear_frags_flag(xdp);
4197	xdp->data_end -= offset;
4198	}
4199
4200	return `0`;
4201	}
4202
4203	BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff , xdp, int*, offset)
4204	{
4205	void data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz /
4206	void *data_end = xdp->data_end + offset;
4207
4208	if (unlikely(xdp_buff_has_frags(xdp))) { / non-linear xdp buff /
4209	if (offset < `0`)
4210	return bpf_xdp_frags_shrink_tail(xdp, offset: -offset);
4211
4212	return bpf_xdp_frags_increase_tail(xdp, offset);
4213	}
4214
4215	/ Notice that xdp_data_hard_end have reserved some tailroom /
4216	if (unlikely(data_end > data_hard_end))
4217	return -EINVAL;
4218
4219	if (unlikely(data_end < xdp->data + ETH_HLEN))
4220	return -EINVAL;
4221
4222	/ Clear memory area on grow, can contain uninit kernel memory /
4223	if (offset > `0`)
4224	memset(xdp->data_end, `0`, offset);
4225
4226	xdp->data_end = data_end;
4227
4228	return `0`;
4229	}
4230
4231	static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
4232	.func = bpf_xdp_adjust_tail,
4233	.gpl_only = false,
4234	.ret_type = RET_INTEGER,
4235	.arg1_type = ARG_PTR_TO_CTX,
4236	.arg2_type = ARG_ANYTHING,
4237	};
4238
4239	BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff , xdp, int*, offset)
4240	{
4241	void xdp_frame_end = xdp->data_hard_start + sizeof(struct* xdp_frame);
4242	void *meta = xdp->data_meta + offset;
4243	unsigned long metalen = xdp->data - meta;
4244
4245	if (xdp_data_meta_unsupported(xdp))
4246	return -ENOTSUPP;
4247	if (unlikely(meta < xdp_frame_end \|\|
4248	meta > xdp->data))
4249	return -EINVAL;
4250	if (unlikely(xdp_metalen_invalid(metalen)))
4251	return -EACCES;
4252
4253	xdp->data_meta = meta;
4254
4255	return `0`;
4256	}
4257
4258	static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
4259	.func = bpf_xdp_adjust_meta,
4260	.gpl_only = false,
4261	.ret_type = RET_INTEGER,
4262	.arg1_type = ARG_PTR_TO_CTX,
4263	.arg2_type = ARG_ANYTHING,
4264	};
4265
4266	/**
4267	* DOC: xdp redirect
4268	*
4269	* XDP_REDIRECT works by a three-step process, implemented in the functions
4270	* below:
4271	*
4272	* 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
4273	* of the redirect and store it (along with some other metadata) in a per-CPU
4274	* struct bpf_redirect_info.
4275	*
4276	* 2. When the program returns the XDP_REDIRECT return code, the driver will
4277	* call xdp_do_redirect() which will use the information in struct
4278	* bpf_redirect_info to actually enqueue the frame into a map type-specific
4279	* bulk queue structure.
4280	*
4281	* 3. Before exiting its NAPI poll loop, the driver will call
4282	* xdp_do_flush(), which will flush all the different bulk queues,
4283	* thus completing the redirect. Note that xdp_do_flush() must be
4284	* called before napi_complete_done() in the driver, as the
4285	* XDP_REDIRECT logic relies on being inside a single NAPI instance
4286	* through to the xdp_do_flush() call for RCU protection of all
4287	* in-kernel data structures.
4288	*/
4289	/*
4290	* Pointers to the map entries will be kept around for this whole sequence of
4291	* steps, protected by RCU. However, there is no top-level rcu_read_lock() in
4292	* the core code; instead, the RCU protection relies on everything happening
4293	* inside a single NAPI poll sequence, which means it's between a pair of calls
4294	* to local_bh_disable()/local_bh_enable().
4295	*
4296	* The map entries are marked as __rcu and the map code makes sure to
4297	* dereference those pointers with rcu_dereference_check() in a way that works
4298	* for both sections that to hold an rcu_read_lock() and sections that are
4299	* called from NAPI without a separate rcu_read_lock(). The code below does not
4300	* use RCU annotations, but relies on those in the map code.
4301	*/
4302	void xdp_do_flush(void)
4303	{
4304	struct list_head lh_map, lh_dev, *lh_xsk;
4305
4306	bpf_net_ctx_get_all_used_flush_lists(lh_map: &lh_map, lh_dev: &lh_dev, lh_xsk: &lh_xsk);
4307	if (lh_dev)
4308	__dev_flush(flush_list: lh_dev);
4309	if (lh_map)
4310	__cpu_map_flush(flush_list: lh_map);
4311	if (lh_xsk)
4312	__xsk_map_flush(flush_list: lh_xsk);
4313	}
4314	EXPORT_SYMBOL_GPL(xdp_do_flush);
4315
4316	#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
4317	void xdp_do_check_flushed(struct napi_struct *napi)
4318	{
4319	struct list_head lh_map, lh_dev, *lh_xsk;
4320	bool missed = false;
4321
4322	bpf_net_ctx_get_all_used_flush_lists(lh_map: &lh_map, lh_dev: &lh_dev, lh_xsk: &lh_xsk);
4323	if (lh_dev) {
4324	__dev_flush(flush_list: lh_dev);
4325	missed = true;
4326	}
4327	if (lh_map) {
4328	__cpu_map_flush(flush_list: lh_map);
4329	missed = true;
4330	}
4331	if (lh_xsk) {
4332	__xsk_map_flush(flush_list: lh_xsk);
4333	missed = true;
4334	}
4335
4336	WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
4337	napi->poll);
4338	}
4339	#endif
4340
4341	DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
4342	EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
4343
4344	u32 xdp_master_redirect(struct xdp_buff *xdp)
4345	{
4346	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4347	struct net_device master, slave;
4348
4349	master = netdev_master_upper_dev_get_rcu(dev: xdp->rxq->dev);
4350	slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
4351	if (slave && slave != xdp->rxq->dev) {
4352	/ The target device is different from the receiving device, so*
4353	* redirect it to the new device.
4354	* Using XDP_REDIRECT gets the correct behaviour from XDP enabled
4355	* drivers to unmap the packet from their rx ring.
4356	*/
4357	ri->tgt_index = slave->ifindex;
4358	ri->map_id = INT_MAX;
4359	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4360	return XDP_REDIRECT;
4361	}
4362	return XDP_TX;
4363	}
4364	EXPORT_SYMBOL_GPL(xdp_master_redirect);
4365
4366	static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
4367	const struct net_device *dev,
4368	struct xdp_buff *xdp,
4369	const struct bpf_prog *xdp_prog)
4370	{
4371	enum bpf_map_type map_type = ri->map_type;
4372	void *fwd = ri->tgt_value;
4373	u32 map_id = ri->map_id;
4374	int err;
4375
4376	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4377	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4378
4379	err = __xsk_map_redirect(xs: fwd, xdp);
4380	if (unlikely(err))
4381	goto err;
4382
4383	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4384	return `0`;
4385	err:
4386	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4387	return err;
4388	}
4389
4390	static __always_inline int
4391	__xdp_do_redirect_frame(struct bpf_redirect_info ri, struct* net_device *dev,
4392	struct xdp_frame *xdpf,
4393	const struct bpf_prog *xdp_prog)
4394	{
4395	enum bpf_map_type map_type = ri->map_type;
4396	void *fwd = ri->tgt_value;
4397	u32 map_id = ri->map_id;
4398	u32 flags = ri->flags;
4399	struct bpf_map *map;
4400	int err;
4401
4402	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4403	ri->flags = `0`;
4404	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4405
4406	if (unlikely(!xdpf)) {
4407	err = -EOVERFLOW;
4408	goto err;
4409	}
4410
4411	switch (map_type) {
4412	case BPF_MAP_TYPE_DEVMAP:
4413	fallthrough;
4414	case BPF_MAP_TYPE_DEVMAP_HASH:
4415	if (unlikely(flags & BPF_F_BROADCAST)) {
4416	map = READ_ONCE(ri->map);
4417
4418	/ The map pointer is cleared when the map is being torn*
4419	* down by dev_map_free()
4420	*/
4421	if (unlikely(!map)) {
4422	err = -ENOENT;
4423	break;
4424	}
4425
4426	WRITE_ONCE(ri->map, NULL);
4427	err = dev_map_enqueue_multi(xdpf, dev_rx: dev, map,
4428	exclude_ingress: flags & BPF_F_EXCLUDE_INGRESS);
4429	} else {
4430	err = dev_map_enqueue(dst: fwd, xdpf, dev_rx: dev);
4431	}
4432	break;
4433	case BPF_MAP_TYPE_CPUMAP:
4434	err = cpu_map_enqueue(rcpu: fwd, xdpf, dev_rx: dev);
4435	break;
4436	case BPF_MAP_TYPE_UNSPEC:
4437	if (map_id == INT_MAX) {
4438	fwd = dev_get_by_index_rcu(net: dev_net(dev), ifindex: ri->tgt_index);
4439	if (unlikely(!fwd)) {
4440	err = -EINVAL;
4441	break;
4442	}
4443	err = dev_xdp_enqueue(dev: fwd, xdpf, dev_rx: dev);
4444	break;
4445	}
4446	fallthrough;
4447	default:
4448	err = -EBADRQC;
4449	}
4450
4451	if (unlikely(err))
4452	goto err;
4453
4454	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4455	return `0`;
4456	err:
4457	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4458	return err;
4459	}
4460
4461	int xdp_do_redirect(struct net_device dev, struct* xdp_buff *xdp,
4462	const struct bpf_prog *xdp_prog)
4463	{
4464	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4465	enum bpf_map_type map_type = ri->map_type;
4466
4467	if (map_type == BPF_MAP_TYPE_XSKMAP)
4468	return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4469
4470	return __xdp_do_redirect_frame(ri, dev, xdpf: xdp_convert_buff_to_frame(xdp),
4471	xdp_prog);
4472	}
4473	EXPORT_SYMBOL_GPL(xdp_do_redirect);
4474
4475	int xdp_do_redirect_frame(struct net_device dev, struct* xdp_buff *xdp,
4476	struct xdp_frame *xdpf,
4477	const struct bpf_prog *xdp_prog)
4478	{
4479	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4480	enum bpf_map_type map_type = ri->map_type;
4481
4482	if (map_type == BPF_MAP_TYPE_XSKMAP)
4483	return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4484
4485	return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
4486	}
4487	EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
4488
4489	static int xdp_do_generic_redirect_map(struct net_device *dev,
4490	struct sk_buff *skb,
4491	struct xdp_buff *xdp,
4492	const struct bpf_prog *xdp_prog,
4493	void fwd, enum* bpf_map_type map_type,
4494	u32 map_id, u32 flags)
4495	{
4496	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4497	struct bpf_map *map;
4498	int err;
4499
4500	switch (map_type) {
4501	case BPF_MAP_TYPE_DEVMAP:
4502	fallthrough;
4503	case BPF_MAP_TYPE_DEVMAP_HASH:
4504	if (unlikely(flags & BPF_F_BROADCAST)) {
4505	map = READ_ONCE(ri->map);
4506
4507	/ The map pointer is cleared when the map is being torn*
4508	* down by dev_map_free()
4509	*/
4510	if (unlikely(!map)) {
4511	err = -ENOENT;
4512	break;
4513	}
4514
4515	WRITE_ONCE(ri->map, NULL);
4516	err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4517	exclude_ingress: flags & BPF_F_EXCLUDE_INGRESS);
4518	} else {
4519	err = dev_map_generic_redirect(dst: fwd, skb, xdp_prog);
4520	}
4521	if (unlikely(err))
4522	goto err;
4523	break;
4524	case BPF_MAP_TYPE_XSKMAP:
4525	err = xsk_generic_rcv(xs: fwd, xdp);
4526	if (err)
4527	goto err;
4528	consume_skb(skb);
4529	break;
4530	case BPF_MAP_TYPE_CPUMAP:
4531	err = cpu_map_generic_redirect(rcpu: fwd, skb);
4532	if (unlikely(err))
4533	goto err;
4534	break;
4535	default:
4536	err = -EBADRQC;
4537	goto err;
4538	}
4539
4540	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4541	return `0`;
4542	err:
4543	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4544	return err;
4545	}
4546
4547	int xdp_do_generic_redirect(struct net_device dev, struct* sk_buff *skb,
4548	struct xdp_buff *xdp,
4549	const struct bpf_prog *xdp_prog)
4550	{
4551	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4552	enum bpf_map_type map_type = ri->map_type;
4553	void *fwd = ri->tgt_value;
4554	u32 map_id = ri->map_id;
4555	u32 flags = ri->flags;
4556	int err;
4557
4558	ri->map_id = `0`; / Valid map id idr range: [1,INT_MAX[ /
4559	ri->flags = `0`;
4560	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4561
4562	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4563	fwd = dev_get_by_index_rcu(net: dev_net(dev), ifindex: ri->tgt_index);
4564	if (unlikely(!fwd)) {
4565	err = -EINVAL;
4566	goto err;
4567	}
4568
4569	err = xdp_ok_fwd_dev(fwd, pktlen: skb->len);
4570	if (unlikely(err))
4571	goto err;
4572
4573	skb->dev = fwd;
4574	_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4575	generic_xdp_tx(skb, xdp_prog);
4576	return `0`;
4577	}
4578
4579	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
4580	err:
4581	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
4582	return err;
4583	}
4584
4585	BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4586	{
4587	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
4588
4589	if (unlikely(flags))
4590	return XDP_ABORTED;
4591
4592	/ NB! Map type UNSPEC and map_id == INT_MAX (never generated*
4593	* by map_idr) is used for ifindex based XDP redirect.
4594	*/
4595	ri->tgt_index = ifindex;
4596	ri->map_id = INT_MAX;
4597	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4598
4599	return XDP_REDIRECT;
4600	}
4601
4602	static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4603	.func = bpf_xdp_redirect,
4604	.gpl_only = false,
4605	.ret_type = RET_INTEGER,
4606	.arg1_type = ARG_ANYTHING,
4607	.arg2_type = ARG_ANYTHING,
4608	};
4609
4610	BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
4611	u64, flags)
4612	{
4613	return map->ops->map_redirect(map, key, flags);
4614	}
4615
4616	static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4617	.func = bpf_xdp_redirect_map,
4618	.gpl_only = false,
4619	.ret_type = RET_INTEGER,
4620	.arg1_type = ARG_CONST_MAP_PTR,
4621	.arg2_type = ARG_ANYTHING,
4622	.arg3_type = ARG_ANYTHING,
4623	};
4624
4625	static unsigned long bpf_skb_copy(void dst_buff, const* void *skb,
4626	unsigned long off, unsigned long len)
4627	{
4628	void *ptr = skb_header_pointer(skb, offset: off, len, buffer: dst_buff);
4629
4630	if (unlikely(!ptr))
4631	return len;
4632	if (ptr != dst_buff)
4633	memcpy(dst_buff, ptr, len);
4634
4635	return `0`;
4636	}
4637
4638	BPF_CALL_5(bpf_skb_event_output, struct sk_buff , skb, struct* bpf_map *, map,
4639	u64, flags, void *, meta, u64, meta_size)
4640	{
4641	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> `32`;
4642
4643	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK \| BPF_F_INDEX_MASK)))
4644	return -EINVAL;
4645	if (unlikely(!skb \|\| skb_size > skb->len))
4646	return -EFAULT;
4647
4648	return bpf_event_output(map, flags, meta, meta_size, ctx: skb, ctx_size: skb_size,
4649	ctx_copy: bpf_skb_copy);
4650	}
4651
4652	static const struct bpf_func_proto bpf_skb_event_output_proto = {
4653	.func = bpf_skb_event_output,
4654	.gpl_only = true,
4655	.ret_type = RET_INTEGER,
4656	.arg1_type = ARG_PTR_TO_CTX,
4657	.arg2_type = ARG_CONST_MAP_PTR,
4658	.arg3_type = ARG_ANYTHING,
4659	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4660	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4661	};
4662
4663	BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
4664
4665	const struct bpf_func_proto bpf_skb_output_proto = {
4666	.func = bpf_skb_event_output,
4667	.gpl_only = true,
4668	.ret_type = RET_INTEGER,
4669	.arg1_type = ARG_PTR_TO_BTF_ID,
4670	.arg1_btf_id = &bpf_skb_output_btf_ids[`0`],
4671	.arg2_type = ARG_CONST_MAP_PTR,
4672	.arg3_type = ARG_ANYTHING,
4673	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4674	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
4675	};
4676
4677	static unsigned short bpf_tunnel_key_af(u64 flags)
4678	{
4679	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4680	}
4681
4682	BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff , skb, struct* bpf_tunnel_key *, to,
4683	u32, size, u64, flags)
4684	{
4685	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4686	u8 compat[sizeof(struct bpf_tunnel_key)];
4687	void *to_orig = to;
4688	int err;
4689
4690	if (unlikely(!info \|\| (flags & ~(BPF_F_TUNINFO_IPV6 \|
4691	BPF_F_TUNINFO_FLAGS)))) {
4692	err = -EINVAL;
4693	goto err_clear;
4694	}
4695	if (ip_tunnel_info_af(tun_info: info) != bpf_tunnel_key_af(flags)) {
4696	err = -EPROTO;
4697	goto err_clear;
4698	}
4699	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4700	err = -EINVAL;
4701	switch (size) {
4702	case offsetof(struct bpf_tunnel_key, local_ipv6[`0`]):
4703	case offsetof(struct bpf_tunnel_key, tunnel_label):
4704	case offsetof(struct bpf_tunnel_key, tunnel_ext):
4705	goto set_compat;
4706	case offsetof(struct bpf_tunnel_key, remote_ipv6[`1`]):
4707	/ Fixup deprecated structure layouts here, so we have*
4708	* a common path later on.
4709	*/
4710	if (ip_tunnel_info_af(tun_info: info) != AF_INET)
4711	goto err_clear;
4712	set_compat:
4713	to = (struct bpf_tunnel_key *)compat;
4714	break;
4715	default:
4716	goto err_clear;
4717	}
4718	}
4719
4720	to->tunnel_id = be64_to_cpu(info->key.tun_id);
4721	to->tunnel_tos = info->key.tos;
4722	to->tunnel_ttl = info->key.ttl;
4723	if (flags & BPF_F_TUNINFO_FLAGS)
4724	to->tunnel_flags = ip_tunnel_flags_to_be16(flags: info->key.tun_flags);
4725	else
4726	to->tunnel_ext = `0`;
4727
4728	if (flags & BPF_F_TUNINFO_IPV6) {
4729	memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4730	sizeof(to->remote_ipv6));
4731	memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
4732	sizeof(to->local_ipv6));
4733	to->tunnel_label = be32_to_cpu(info->key.label);
4734	} else {
4735	to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4736	memset(&to->remote_ipv6[`1`], `0`, sizeof(__u32) * `3`);
4737	to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
4738	memset(&to->local_ipv6[`1`], `0`, sizeof(__u32) * `3`);
4739	to->tunnel_label = `0`;
4740	}
4741
4742	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
4743	memcpy(to_orig, to, size);
4744
4745	return `0`;
4746	err_clear:
4747	memset(to_orig, `0`, size);
4748	return err;
4749	}
4750
4751	static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
4752	.func = bpf_skb_get_tunnel_key,
4753	.gpl_only = false,
4754	.ret_type = RET_INTEGER,
4755	.arg1_type = ARG_PTR_TO_CTX,
4756	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
4757	.arg3_type = ARG_CONST_SIZE,
4758	.arg4_type = ARG_ANYTHING,
4759	};
4760
4761	BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff , skb, u8 , to, u32, size)
4762	{
4763	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4764	int err;
4765
4766	if (unlikely(!info \|\|
4767	!ip_tunnel_is_options_present(info->key.tun_flags))) {
4768	err = -ENOENT;
4769	goto err_clear;
4770	}
4771	if (unlikely(size < info->options_len)) {
4772	err = -ENOMEM;
4773	goto err_clear;
4774	}
4775
4776	ip_tunnel_info_opts_get(to, info);
4777	if (size > info->options_len)
4778	memset(to + info->options_len, `0`, size - info->options_len);
4779
4780	return info->options_len;
4781	err_clear:
4782	memset(to, `0`, size);
4783	return err;
4784	}
4785
4786	static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4787	.func = bpf_skb_get_tunnel_opt,
4788	.gpl_only = false,
4789	.ret_type = RET_INTEGER,
4790	.arg1_type = ARG_PTR_TO_CTX,
4791	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
4792	.arg3_type = ARG_CONST_SIZE,
4793	};
4794
4795	static struct metadata_dst __percpu *md_dst;
4796
4797	BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4798	const struct bpf_tunnel_key *, from, u32, size, u64, flags)
4799	{
4800	struct metadata_dst *md = this_cpu_ptr(md_dst);
4801	u8 compat[sizeof(struct bpf_tunnel_key)];
4802	struct ip_tunnel_info *info;
4803
4804	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 \| BPF_F_ZERO_CSUM_TX \|
4805	BPF_F_DONT_FRAGMENT \| BPF_F_SEQ_NUMBER \|
4806	BPF_F_NO_TUNNEL_KEY)))
4807	return -EINVAL;
4808	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4809	switch (size) {
4810	case offsetof(struct bpf_tunnel_key, local_ipv6[`0`]):
4811	case offsetof(struct bpf_tunnel_key, tunnel_label):
4812	case offsetof(struct bpf_tunnel_key, tunnel_ext):
4813	case offsetof(struct bpf_tunnel_key, remote_ipv6[`1`]):
4814	/ Fixup deprecated structure layouts here, so we have*
4815	* a common path later on.
4816	*/
4817	memcpy(compat, from, size);
4818	memset(compat + size, `0`, sizeof(compat) - size);
4819	from = (const struct bpf_tunnel_key *) compat;
4820	break;
4821	default:
4822	return -EINVAL;
4823	}
4824	}
4825	if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) \|\|
4826	from->tunnel_ext))
4827	return -EINVAL;
4828
4829	skb_dst_drop(skb);
4830	dst_hold(dst: (struct dst_entry *) md);
4831	skb_dst_set(skb, dst: (struct dst_entry *) md);
4832
4833	info = &md->u.tun_info;
4834	memset(info, `0`, sizeof(*info));
4835	info->mode = IP_TUNNEL_INFO_TX;
4836
4837	__set_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags);
4838	__assign_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags,
4839	flags & BPF_F_DONT_FRAGMENT);
4840	__assign_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags,
4841	!(flags & BPF_F_ZERO_CSUM_TX));
4842	__assign_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags,
4843	flags & BPF_F_SEQ_NUMBER);
4844	__assign_bit(IP_TUNNEL_KEY_BIT, info->key.tun_flags,
4845	!(flags & BPF_F_NO_TUNNEL_KEY));
4846
4847	info->key.tun_id = cpu_to_be64(from->tunnel_id);
4848	info->key.tos = from->tunnel_tos;
4849	info->key.ttl = from->tunnel_ttl;
4850
4851	if (flags & BPF_F_TUNINFO_IPV6) {
4852	info->mode \|= IP_TUNNEL_INFO_IPV6;
4853	memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4854	sizeof(from->remote_ipv6));
4855	memcpy(&info->key.u.ipv6.src, from->local_ipv6,
4856	sizeof(from->local_ipv6));
4857	info->key.label = cpu_to_be32(from->tunnel_label) &
4858	IPV6_FLOWLABEL_MASK;
4859	} else {
4860	info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
4861	info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
4862	info->key.flow_flags = FLOWI_FLAG_ANYSRC;
4863	}
4864
4865	return `0`;
4866	}
4867
4868	static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
4869	.func = bpf_skb_set_tunnel_key,
4870	.gpl_only = false,
4871	.ret_type = RET_INTEGER,
4872	.arg1_type = ARG_PTR_TO_CTX,
4873	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4874	.arg3_type = ARG_CONST_SIZE,
4875	.arg4_type = ARG_ANYTHING,
4876	};
4877
4878	BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4879	const u8 *, from, u32, size)
4880	{
4881	struct ip_tunnel_info *info = skb_tunnel_info(skb);
4882	const struct metadata_dst *md = this_cpu_ptr(md_dst);
4883	IP_TUNNEL_DECLARE_FLAGS(present) = { };
4884
4885	if (unlikely(info != &md->u.tun_info \|\| (size & (sizeof(u32) - `1`))))
4886	return -EINVAL;
4887	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4888	return -ENOMEM;
4889
4890	ip_tunnel_set_options_present(flags: present);
4891	ip_tunnel_info_opts_set(info, from, len: size, flags: present);
4892
4893	return `0`;
4894	}
4895
4896	static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4897	.func = bpf_skb_set_tunnel_opt,
4898	.gpl_only = false,
4899	.ret_type = RET_INTEGER,
4900	.arg1_type = ARG_PTR_TO_CTX,
4901	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
4902	.arg3_type = ARG_CONST_SIZE,
4903	};
4904
4905	static const struct bpf_func_proto *
4906	bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4907	{
4908	if (!md_dst) {
4909	struct metadata_dst __percpu *tmp;
4910
4911	tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4912	type: METADATA_IP_TUNNEL,
4913	GFP_KERNEL);
4914	if (!tmp)
4915	return NULL;
4916	if (cmpxchg(&md_dst, NULL, tmp))
4917	metadata_dst_free_percpu(md_dst: tmp);
4918	}
4919
4920	switch (which) {
4921	case BPF_FUNC_skb_set_tunnel_key:
4922	return &bpf_skb_set_tunnel_key_proto;
4923	case BPF_FUNC_skb_set_tunnel_opt:
4924	return &bpf_skb_set_tunnel_opt_proto;
4925	default:
4926	return NULL;
4927	}
4928	}
4929
4930	BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff , skb, struct* bpf_map *, map,
4931	u32, idx)
4932	{
4933	struct bpf_array array = container_of(map, struct* bpf_array, map);
4934	struct cgroup *cgrp;
4935	struct sock *sk;
4936
4937	sk = skb_to_full_sk(skb);
4938	if (!sk \|\| !sk_fullsock(sk))
4939	return -ENOENT;
4940	if (unlikely(idx >= array->map.max_entries))
4941	return -E2BIG;
4942
4943	cgrp = READ_ONCE(array->ptrs[idx]);
4944	if (unlikely(!cgrp))
4945	return -EAGAIN;
4946
4947	return sk_under_cgroup_hierarchy(sk, ancestor: cgrp);
4948	}
4949
4950	static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4951	.func = bpf_skb_under_cgroup,
4952	.gpl_only = false,
4953	.ret_type = RET_INTEGER,
4954	.arg1_type = ARG_PTR_TO_CTX,
4955	.arg2_type = ARG_CONST_MAP_PTR,
4956	.arg3_type = ARG_ANYTHING,
4957	};
4958
4959	#ifdef CONFIG_SOCK_CGROUP_DATA
4960	static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4961	{
4962	struct cgroup *cgrp;
4963
4964	sk = sk_to_full_sk(sk);
4965	if (!sk \|\| !sk_fullsock(sk))
4966	return `0`;
4967
4968	cgrp = sock_cgroup_ptr(skcd: &sk->sk_cgrp_data);
4969	return cgroup_id(cgrp);
4970	}
4971
4972	BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4973	{
4974	return __bpf_sk_cgroup_id(sk: skb->sk);
4975	}
4976
4977	static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4978	.func = bpf_skb_cgroup_id,
4979	.gpl_only = false,
4980	.ret_type = RET_INTEGER,
4981	.arg1_type = ARG_PTR_TO_CTX,
4982	};
4983
4984	static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4985	int ancestor_level)
4986	{
4987	struct cgroup *ancestor;
4988	struct cgroup *cgrp;
4989
4990	sk = sk_to_full_sk(sk);
4991	if (!sk \|\| !sk_fullsock(sk))
4992	return `0`;
4993
4994	cgrp = sock_cgroup_ptr(skcd: &sk->sk_cgrp_data);
4995	ancestor = cgroup_ancestor(cgrp, ancestor_level);
4996	if (!ancestor)
4997	return `0`;
4998
4999	return cgroup_id(cgrp: ancestor);
5000	}
5001
5002	BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff , skb, int*,
5003	ancestor_level)
5004	{
5005	return __bpf_sk_ancestor_cgroup_id(sk: skb->sk, ancestor_level);
5006	}
5007
5008	static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
5009	.func = bpf_skb_ancestor_cgroup_id,
5010	.gpl_only = false,
5011	.ret_type = RET_INTEGER,
5012	.arg1_type = ARG_PTR_TO_CTX,
5013	.arg2_type = ARG_ANYTHING,
5014	};
5015
5016	BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
5017	{
5018	return __bpf_sk_cgroup_id(sk);
5019	}
5020
5021	static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
5022	.func = bpf_sk_cgroup_id,
5023	.gpl_only = false,
5024	.ret_type = RET_INTEGER,
5025	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5026	};
5027
5028	BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock , sk, int*, ancestor_level)
5029	{
5030	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
5031	}
5032
5033	static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
5034	.func = bpf_sk_ancestor_cgroup_id,
5035	.gpl_only = false,
5036	.ret_type = RET_INTEGER,
5037	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5038	.arg2_type = ARG_ANYTHING,
5039	};
5040	#endif
5041
5042	static unsigned long bpf_xdp_copy(void dst, const* void *ctx,
5043	unsigned long off, unsigned long len)
5044	{
5045	struct xdp_buff xdp = (struct* xdp_buff *)ctx;
5046
5047	bpf_xdp_copy_buf(xdp, off, buf: dst, len, flush: false);
5048	return `0`;
5049	}
5050
5051	BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff , xdp, struct* bpf_map *, map,
5052	u64, flags, void *, meta, u64, meta_size)
5053	{
5054	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> `32`;
5055
5056	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK \| BPF_F_INDEX_MASK)))
5057	return -EINVAL;
5058
5059	if (unlikely(!xdp \|\| xdp_size > xdp_get_buff_len(xdp)))
5060	return -EFAULT;
5061
5062	return bpf_event_output(map, flags, meta, meta_size, ctx: xdp,
5063	ctx_size: xdp_size, ctx_copy: bpf_xdp_copy);
5064	}
5065
5066	static const struct bpf_func_proto bpf_xdp_event_output_proto = {
5067	.func = bpf_xdp_event_output,
5068	.gpl_only = true,
5069	.ret_type = RET_INTEGER,
5070	.arg1_type = ARG_PTR_TO_CTX,
5071	.arg2_type = ARG_CONST_MAP_PTR,
5072	.arg3_type = ARG_ANYTHING,
5073	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5074	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
5075	};
5076
5077	BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
5078
5079	const struct bpf_func_proto bpf_xdp_output_proto = {
5080	.func = bpf_xdp_event_output,
5081	.gpl_only = true,
5082	.ret_type = RET_INTEGER,
5083	.arg1_type = ARG_PTR_TO_BTF_ID,
5084	.arg1_btf_id = &bpf_xdp_output_btf_ids[`0`],
5085	.arg2_type = ARG_CONST_MAP_PTR,
5086	.arg3_type = ARG_ANYTHING,
5087	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5088	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
5089	};
5090
5091	BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
5092	{
5093	return skb->sk ? __sock_gen_cookie(sk: skb->sk) : `0`;
5094	}
5095
5096	static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
5097	.func = bpf_get_socket_cookie,
5098	.gpl_only = false,
5099	.ret_type = RET_INTEGER,
5100	.arg1_type = ARG_PTR_TO_CTX,
5101	};
5102
5103	BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
5104	{
5105	return __sock_gen_cookie(sk: ctx->sk);
5106	}
5107
5108	static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
5109	.func = bpf_get_socket_cookie_sock_addr,
5110	.gpl_only = false,
5111	.ret_type = RET_INTEGER,
5112	.arg1_type = ARG_PTR_TO_CTX,
5113	};
5114
5115	BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
5116	{
5117	return __sock_gen_cookie(sk: ctx);
5118	}
5119
5120	static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
5121	.func = bpf_get_socket_cookie_sock,
5122	.gpl_only = false,
5123	.ret_type = RET_INTEGER,
5124	.arg1_type = ARG_PTR_TO_CTX,
5125	};
5126
5127	BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
5128	{
5129	return sk ? sock_gen_cookie(sk) : `0`;
5130	}
5131
5132	const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
5133	.func = bpf_get_socket_ptr_cookie,
5134	.gpl_only = false,
5135	.ret_type = RET_INTEGER,
5136	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON \| PTR_MAYBE_NULL,
5137	};
5138
5139	BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
5140	{
5141	return __sock_gen_cookie(sk: ctx->sk);
5142	}
5143
5144	static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
5145	.func = bpf_get_socket_cookie_sock_ops,
5146	.gpl_only = false,
5147	.ret_type = RET_INTEGER,
5148	.arg1_type = ARG_PTR_TO_CTX,
5149	};
5150
5151	static u64 __bpf_get_netns_cookie(struct sock *sk)
5152	{
5153	const struct net *net = sk ? sock_net(sk) : &init_net;
5154
5155	return net->net_cookie;
5156	}
5157
5158	BPF_CALL_1(bpf_get_netns_cookie, struct sk_buff *, skb)
5159	{
5160	return __bpf_get_netns_cookie(sk: skb && skb->sk ? skb->sk : NULL);
5161	}
5162
5163	static const struct bpf_func_proto bpf_get_netns_cookie_proto = {
5164	.func = bpf_get_netns_cookie,
5165	.ret_type = RET_INTEGER,
5166	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5167	};
5168
5169	BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
5170	{
5171	return __bpf_get_netns_cookie(sk: ctx);
5172	}
5173
5174	static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
5175	.func = bpf_get_netns_cookie_sock,
5176	.gpl_only = false,
5177	.ret_type = RET_INTEGER,
5178	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5179	};
5180
5181	BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
5182	{
5183	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5184	}
5185
5186	static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
5187	.func = bpf_get_netns_cookie_sock_addr,
5188	.gpl_only = false,
5189	.ret_type = RET_INTEGER,
5190	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5191	};
5192
5193	BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
5194	{
5195	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5196	}
5197
5198	static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
5199	.func = bpf_get_netns_cookie_sock_ops,
5200	.gpl_only = false,
5201	.ret_type = RET_INTEGER,
5202	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5203	};
5204
5205	BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
5206	{
5207	return __bpf_get_netns_cookie(sk: ctx ? ctx->sk : NULL);
5208	}
5209
5210	static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
5211	.func = bpf_get_netns_cookie_sk_msg,
5212	.gpl_only = false,
5213	.ret_type = RET_INTEGER,
5214	.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5215	};
5216
5217	BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
5218	{
5219	struct sock *sk = sk_to_full_sk(sk: skb->sk);
5220	kuid_t kuid;
5221
5222	if (!sk \|\| !sk_fullsock(sk))
5223	return overflowuid;
5224	kuid = sock_net_uid(net: sock_net(sk), sk);
5225	return from_kuid_munged(to: sock_net(sk)->user_ns, uid: kuid);
5226	}
5227
5228	static const struct bpf_func_proto bpf_get_socket_uid_proto = {
5229	.func = bpf_get_socket_uid,
5230	.gpl_only = false,
5231	.ret_type = RET_INTEGER,
5232	.arg1_type = ARG_PTR_TO_CTX,
5233	};
5234
5235	static int sk_bpf_set_get_cb_flags(struct sock sk, char* *optval, bool getopt)
5236	{
5237	u32 sk_bpf_cb_flags;
5238
5239	if (getopt) {
5240	(u32 )optval = sk->sk_bpf_cb_flags;
5241	return `0`;
5242	}
5243
5244	sk_bpf_cb_flags = (u32 )optval;
5245
5246	if (sk_bpf_cb_flags & ~SK_BPF_CB_MASK)
5247	return -EINVAL;
5248
5249	sk->sk_bpf_cb_flags = sk_bpf_cb_flags;
5250
5251	return `0`;
5252	}
5253
5254	static int sol_socket_sockopt(struct sock sk, int* optname,
5255	char optval, int* *optlen,
5256	bool getopt)
5257	{
5258	switch (optname) {
5259	case SO_REUSEADDR:
5260	case SO_SNDBUF:
5261	case SO_RCVBUF:
5262	case SO_KEEPALIVE:
5263	case SO_PRIORITY:
5264	case SO_REUSEPORT:
5265	case SO_RCVLOWAT:
5266	case SO_MARK:
5267	case SO_MAX_PACING_RATE:
5268	case SO_BINDTOIFINDEX:
5269	case SO_TXREHASH:
5270	case SK_BPF_CB_FLAGS:
5271	if (optlen != sizeof(int*))
5272	return -EINVAL;
5273	break;
5274	case SO_BINDTODEVICE:
5275	break;
5276	default:
5277	return -EINVAL;
5278	}
5279
5280	if (optname == SK_BPF_CB_FLAGS)
5281	return sk_bpf_set_get_cb_flags(sk, optval, getopt);
5282
5283	if (getopt) {
5284	if (optname == SO_BINDTODEVICE)
5285	return -EINVAL;
5286	return sk_getsockopt(sk, SOL_SOCKET, optname,
5287	optval: KERNEL_SOCKPTR(p: optval),
5288	optlen: KERNEL_SOCKPTR(p: optlen));
5289	}
5290
5291	return sk_setsockopt(sk, SOL_SOCKET, optname,
5292	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5293	}
5294
5295	static int bpf_sol_tcp_getsockopt(struct sock sk, int* optname,
5296	char optval, int* optlen)
5297	{
5298	if (optlen != sizeof(int))
5299	return -EINVAL;
5300
5301	switch (optname) {
5302	case TCP_BPF_SOCK_OPS_CB_FLAGS: {
5303	int cb_flags = tcp_sk(sk)->bpf_sock_ops_cb_flags;
5304
5305	memcpy(optval, &cb_flags, optlen);
5306	break;
5307	}
5308	case TCP_BPF_RTO_MIN: {
5309	int rto_min_us = jiffies_to_usecs(inet_csk(sk)->icsk_rto_min);
5310
5311	memcpy(optval, &rto_min_us, optlen);
5312	break;
5313	}
5314	case TCP_BPF_DELACK_MAX: {
5315	int delack_max_us = jiffies_to_usecs(inet_csk(sk)->icsk_delack_max);
5316
5317	memcpy(optval, &delack_max_us, optlen);
5318	break;
5319	}
5320	default:
5321	return -EINVAL;
5322	}
5323
5324	return `0`;
5325	}
5326
5327	static int bpf_sol_tcp_setsockopt(struct sock sk, int* optname,
5328	char optval, int* optlen)
5329	{
5330	struct tcp_sock *tp = tcp_sk(sk);
5331	unsigned long timeout;
5332	int val;
5333
5334	if (optlen != sizeof(int))
5335	return -EINVAL;
5336
5337	val = (int* *)optval;
5338
5339	/ Only some options are supported /
5340	switch (optname) {
5341	case TCP_BPF_IW:
5342	if (val <= `0` \|\| tp->data_segs_out > tp->syn_data)
5343	return -EINVAL;
5344	tcp_snd_cwnd_set(tp, val);
5345	break;
5346	case TCP_BPF_SNDCWND_CLAMP:
5347	if (val <= `0`)
5348	return -EINVAL;
5349	tp->snd_cwnd_clamp = val;
5350	tp->snd_ssthresh = val;
5351	break;
5352	case TCP_BPF_DELACK_MAX:
5353	timeout = usecs_to_jiffies(u: val);
5354	if (timeout > TCP_DELACK_MAX \|\|
5355	timeout < TCP_TIMEOUT_MIN)
5356	return -EINVAL;
5357	inet_csk(sk)->icsk_delack_max = timeout;
5358	break;
5359	case TCP_BPF_RTO_MIN:
5360	timeout = usecs_to_jiffies(u: val);
5361	if (timeout > TCP_RTO_MIN \|\|
5362	timeout < TCP_TIMEOUT_MIN)
5363	return -EINVAL;
5364	inet_csk(sk)->icsk_rto_min = timeout;
5365	break;
5366	case TCP_BPF_SOCK_OPS_CB_FLAGS:
5367	if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
5368	return -EINVAL;
5369	tp->bpf_sock_ops_cb_flags = val;
5370	break;
5371	default:
5372	return -EINVAL;
5373	}
5374
5375	return `0`;
5376	}
5377
5378	static int sol_tcp_sockopt_congestion(struct sock sk, char* *optval,
5379	int *optlen, bool getopt)
5380	{
5381	struct tcp_sock *tp;
5382	int ret;
5383
5384	if (*optlen < `2`)
5385	return -EINVAL;
5386
5387	if (getopt) {
5388	if (!inet_csk(sk)->icsk_ca_ops)
5389	return -EINVAL;
5390	/ BPF expects NULL-terminated tcp-cc string /
5391	optval[--(*optlen)] = `'\0'`;
5392	return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
5393	optval: KERNEL_SOCKPTR(p: optval),
5394	optlen: KERNEL_SOCKPTR(p: optlen));
5395	}
5396
5397	/ "cdg" is the only cc that alloc a ptr*
5398	* in inet_csk_ca area. The bpf-tcp-cc may
5399	* overwrite this ptr after switching to cdg.
5400	*/
5401	if (optlen >= sizeof("cdg") - `1` && !strncmp("cdg", optval, optlen))
5402	return -ENOTSUPP;
5403
5404	/ It stops this looping*
5405	*
5406	* .init => bpf_setsockopt(tcp_cc) => .init =>
5407	* bpf_setsockopt(tcp_cc)" => .init => ....
5408	*
5409	* The second bpf_setsockopt(tcp_cc) is not allowed
5410	* in order to break the loop when both .init
5411	* are the same bpf prog.
5412	*
5413	* This applies even the second bpf_setsockopt(tcp_cc)
5414	* does not cause a loop. This limits only the first
5415	* '.init' can call bpf_setsockopt(TCP_CONGESTION) to
5416	* pick a fallback cc (eg. peer does not support ECN)
5417	* and the second '.init' cannot fallback to
5418	* another.
5419	*/
5420	tp = tcp_sk(sk);
5421	if (tp->bpf_chg_cc_inprogress)
5422	return -EBUSY;
5423
5424	tp->bpf_chg_cc_inprogress = `1`;
5425	ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
5426	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5427	tp->bpf_chg_cc_inprogress = `0`;
5428	return ret;
5429	}
5430
5431	static int sol_tcp_sockopt(struct sock sk, int* optname,
5432	char optval, int* *optlen,
5433	bool getopt)
5434	{
5435	if (sk->sk_protocol != IPPROTO_TCP)
5436	return -EINVAL;
5437
5438	switch (optname) {
5439	case TCP_NODELAY:
5440	case TCP_MAXSEG:
5441	case TCP_KEEPIDLE:
5442	case TCP_KEEPINTVL:
5443	case TCP_KEEPCNT:
5444	case TCP_SYNCNT:
5445	case TCP_WINDOW_CLAMP:
5446	case TCP_THIN_LINEAR_TIMEOUTS:
5447	case TCP_USER_TIMEOUT:
5448	case TCP_NOTSENT_LOWAT:
5449	case TCP_SAVE_SYN:
5450	case TCP_RTO_MAX_MS:
5451	if (optlen != sizeof(int*))
5452	return -EINVAL;
5453	break;
5454	case TCP_CONGESTION:
5455	return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt);
5456	case TCP_SAVED_SYN:
5457	if (*optlen < `1`)
5458	return -EINVAL;
5459	break;
5460	default:
5461	if (getopt)
5462	return bpf_sol_tcp_getsockopt(sk, optname, optval, optlen: *optlen);
5463	return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen: *optlen);
5464	}
5465
5466	if (getopt) {
5467	if (optname == TCP_SAVED_SYN) {
5468	struct tcp_sock *tp = tcp_sk(sk);
5469
5470	if (!tp->saved_syn \|\|
5471	*optlen > tcp_saved_syn_len(saved_syn: tp->saved_syn))
5472	return -EINVAL;
5473	memcpy(optval, tp->saved_syn->data, *optlen);
5474	/ It cannot free tp->saved_syn here because it*
5475	* does not know if the user space still needs it.
5476	*/
5477	return `0`;
5478	}
5479
5480	return do_tcp_getsockopt(sk, SOL_TCP, optname,
5481	optval: KERNEL_SOCKPTR(p: optval),
5482	optlen: KERNEL_SOCKPTR(p: optlen));
5483	}
5484
5485	return do_tcp_setsockopt(sk, SOL_TCP, optname,
5486	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5487	}
5488
5489	static int sol_ip_sockopt(struct sock sk, int* optname,
5490	char optval, int* *optlen,
5491	bool getopt)
5492	{
5493	if (sk->sk_family != AF_INET)
5494	return -EINVAL;
5495
5496	switch (optname) {
5497	case IP_TOS:
5498	if (optlen != sizeof(int*))
5499	return -EINVAL;
5500	break;
5501	default:
5502	return -EINVAL;
5503	}
5504
5505	if (getopt)
5506	return do_ip_getsockopt(sk, SOL_IP, optname,
5507	optval: KERNEL_SOCKPTR(p: optval),
5508	optlen: KERNEL_SOCKPTR(p: optlen));
5509
5510	return do_ip_setsockopt(sk, SOL_IP, optname,
5511	optval: KERNEL_SOCKPTR(p: optval), optlen: *optlen);
5512	}
5513
5514	static int sol_ipv6_sockopt(struct sock sk, int* optname,
5515	char optval, int* *optlen,
5516	bool getopt)
5517	{
5518	if (sk->sk_family != AF_INET6)
5519	return -EINVAL;
5520
5521	switch (optname) {
5522	case IPV6_TCLASS:
5523	case IPV6_AUTOFLOWLABEL:
5524	if (optlen != sizeof(int*))
5525	return -EINVAL;
5526	break;
5527	default:
5528	return -EINVAL;
5529	}
5530
5531	if (getopt)
5532	return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
5533	KERNEL_SOCKPTR(p: optval),
5534	KERNEL_SOCKPTR(p: optlen));
5535
5536	return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
5537	KERNEL_SOCKPTR(p: optval), *optlen);
5538	}
5539
5540	static int __bpf_setsockopt(struct sock sk, int* level, int optname,
5541	char optval, int* optlen)
5542	{
5543	if (!sk_fullsock(sk))
5544	return -EINVAL;
5545
5546	if (level == SOL_SOCKET)
5547	return sol_socket_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5548	else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
5549	return sol_ip_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5550	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
5551	return sol_ipv6_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5552	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
5553	return sol_tcp_sockopt(sk, optname, optval, optlen: &optlen, getopt: false);
5554
5555	return -EINVAL;
5556	}
5557
5558	static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
5559	{
5560	return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
5561	}
5562
5563	static int _bpf_setsockopt(struct sock sk, int* level, int optname,
5564	char optval, int* optlen)
5565	{
5566	if (sk_fullsock(sk))
5567	sock_owned_by_me(sk);
5568	return __bpf_setsockopt(sk, level, optname, optval, optlen);
5569	}
5570
5571	static int __bpf_getsockopt(struct sock sk, int* level, int optname,
5572	char optval, int* optlen)
5573	{
5574	int err, saved_optlen = optlen;
5575
5576	if (!sk_fullsock(sk)) {
5577	err = -EINVAL;
5578	goto done;
5579	}
5580
5581	if (level == SOL_SOCKET)
5582	err = sol_socket_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5583	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
5584	err = sol_tcp_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5585	else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
5586	err = sol_ip_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5587	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
5588	err = sol_ipv6_sockopt(sk, optname, optval, optlen: &optlen, getopt: true);
5589	else
5590	err = -EINVAL;
5591
5592	done:
5593	if (err)
5594	optlen = `0`;
5595	if (optlen < saved_optlen)
5596	memset(optval + optlen, `0`, saved_optlen - optlen);
5597	return err;
5598	}
5599
5600	static int _bpf_getsockopt(struct sock sk, int* level, int optname,
5601	char optval, int* optlen)
5602	{
5603	if (sk_fullsock(sk))
5604	sock_owned_by_me(sk);
5605	return __bpf_getsockopt(sk, level, optname, optval, optlen);
5606	}
5607
5608	BPF_CALL_5(bpf_sk_setsockopt, struct sock , sk, int*, level,
5609	int, optname, char , optval, int*, optlen)
5610	{
5611	return _bpf_setsockopt(sk, level, optname, optval, optlen);
5612	}
5613
5614	const struct bpf_func_proto bpf_sk_setsockopt_proto = {
5615	.func = bpf_sk_setsockopt,
5616	.gpl_only = false,
5617	.ret_type = RET_INTEGER,
5618	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5619	.arg2_type = ARG_ANYTHING,
5620	.arg3_type = ARG_ANYTHING,
5621	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5622	.arg5_type = ARG_CONST_SIZE,
5623	};
5624
5625	BPF_CALL_5(bpf_sk_getsockopt, struct sock , sk, int*, level,
5626	int, optname, char , optval, int*, optlen)
5627	{
5628	return _bpf_getsockopt(sk, level, optname, optval, optlen);
5629	}
5630
5631	const struct bpf_func_proto bpf_sk_getsockopt_proto = {
5632	.func = bpf_sk_getsockopt,
5633	.gpl_only = false,
5634	.ret_type = RET_INTEGER,
5635	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5636	.arg2_type = ARG_ANYTHING,
5637	.arg3_type = ARG_ANYTHING,
5638	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5639	.arg5_type = ARG_CONST_SIZE,
5640	};
5641
5642	BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock , sk, int*, level,
5643	int, optname, char , optval, int*, optlen)
5644	{
5645	return __bpf_setsockopt(sk, level, optname, optval, optlen);
5646	}
5647
5648	const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
5649	.func = bpf_unlocked_sk_setsockopt,
5650	.gpl_only = false,
5651	.ret_type = RET_INTEGER,
5652	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5653	.arg2_type = ARG_ANYTHING,
5654	.arg3_type = ARG_ANYTHING,
5655	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5656	.arg5_type = ARG_CONST_SIZE,
5657	};
5658
5659	BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock , sk, int*, level,
5660	int, optname, char , optval, int*, optlen)
5661	{
5662	return __bpf_getsockopt(sk, level, optname, optval, optlen);
5663	}
5664
5665	const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
5666	.func = bpf_unlocked_sk_getsockopt,
5667	.gpl_only = false,
5668	.ret_type = RET_INTEGER,
5669	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5670	.arg2_type = ARG_ANYTHING,
5671	.arg3_type = ARG_ANYTHING,
5672	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5673	.arg5_type = ARG_CONST_SIZE,
5674	};
5675
5676	BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
5677	int, level, int, optname, char , optval, int*, optlen)
5678	{
5679	return _bpf_setsockopt(sk: ctx->sk, level, optname, optval, optlen);
5680	}
5681
5682	static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
5683	.func = bpf_sock_addr_setsockopt,
5684	.gpl_only = false,
5685	.ret_type = RET_INTEGER,
5686	.arg1_type = ARG_PTR_TO_CTX,
5687	.arg2_type = ARG_ANYTHING,
5688	.arg3_type = ARG_ANYTHING,
5689	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5690	.arg5_type = ARG_CONST_SIZE,
5691	};
5692
5693	BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
5694	int, level, int, optname, char , optval, int*, optlen)
5695	{
5696	return _bpf_getsockopt(sk: ctx->sk, level, optname, optval, optlen);
5697	}
5698
5699	static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
5700	.func = bpf_sock_addr_getsockopt,
5701	.gpl_only = false,
5702	.ret_type = RET_INTEGER,
5703	.arg1_type = ARG_PTR_TO_CTX,
5704	.arg2_type = ARG_ANYTHING,
5705	.arg3_type = ARG_ANYTHING,
5706	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5707	.arg5_type = ARG_CONST_SIZE,
5708	};
5709
5710	BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5711	int, level, int, optname, char , optval, int*, optlen)
5712	{
5713	if (!is_locked_tcp_sock_ops(bpf_sock))
5714	return -EOPNOTSUPP;
5715
5716	return _bpf_setsockopt(sk: bpf_sock->sk, level, optname, optval, optlen);
5717	}
5718
5719	static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
5720	.func = bpf_sock_ops_setsockopt,
5721	.gpl_only = false,
5722	.ret_type = RET_INTEGER,
5723	.arg1_type = ARG_PTR_TO_CTX,
5724	.arg2_type = ARG_ANYTHING,
5725	.arg3_type = ARG_ANYTHING,
5726	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5727	.arg5_type = ARG_CONST_SIZE,
5728	};
5729
5730	static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
5731	int optname, const u8 **start)
5732	{
5733	struct sk_buff *syn_skb = bpf_sock->syn_skb;
5734	const u8 *hdr_start;
5735	int ret;
5736
5737	if (syn_skb) {
5738	/ sk is a request_sock here /
5739
5740	if (optname == TCP_BPF_SYN) {
5741	hdr_start = syn_skb->data;
5742	ret = tcp_hdrlen(skb: syn_skb);
5743	} else if (optname == TCP_BPF_SYN_IP) {
5744	hdr_start = skb_network_header(skb: syn_skb);
5745	ret = skb_network_header_len(skb: syn_skb) +
5746	tcp_hdrlen(skb: syn_skb);
5747	} else {
5748	/ optname == TCP_BPF_SYN_MAC /
5749	hdr_start = skb_mac_header(skb: syn_skb);
5750	ret = skb_mac_header_len(skb: syn_skb) +
5751	skb_network_header_len(skb: syn_skb) +
5752	tcp_hdrlen(skb: syn_skb);
5753	}
5754	} else {
5755	struct sock *sk = bpf_sock->sk;
5756	struct saved_syn *saved_syn;
5757
5758	if (sk->sk_state == TCP_NEW_SYN_RECV)
5759	/ synack retransmit. bpf_sock->syn_skb will*
5760	* not be available. It has to resort to
5761	* saved_syn (if it is saved).
5762	*/
5763	saved_syn = inet_reqsk(sk)->saved_syn;
5764	else
5765	saved_syn = tcp_sk(sk)->saved_syn;
5766
5767	if (!saved_syn)
5768	return -ENOENT;
5769
5770	if (optname == TCP_BPF_SYN) {
5771	hdr_start = saved_syn->data +
5772	saved_syn->mac_hdrlen +
5773	saved_syn->network_hdrlen;
5774	ret = saved_syn->tcp_hdrlen;
5775	} else if (optname == TCP_BPF_SYN_IP) {
5776	hdr_start = saved_syn->data +
5777	saved_syn->mac_hdrlen;
5778	ret = saved_syn->network_hdrlen +
5779	saved_syn->tcp_hdrlen;
5780	} else {
5781	/ optname == TCP_BPF_SYN_MAC /
5782
5783	/ TCP_SAVE_SYN may not have saved the mac hdr /
5784	if (!saved_syn->mac_hdrlen)
5785	return -ENOENT;
5786
5787	hdr_start = saved_syn->data;
5788	ret = saved_syn->mac_hdrlen +
5789	saved_syn->network_hdrlen +
5790	saved_syn->tcp_hdrlen;
5791	}
5792	}
5793
5794	*start = hdr_start;
5795	return ret;
5796	}
5797
5798	BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5799	int, level, int, optname, char , optval, int*, optlen)
5800	{
5801	if (!is_locked_tcp_sock_ops(bpf_sock))
5802	return -EOPNOTSUPP;
5803
5804	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
5805	optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
5806	int ret, copy_len = `0`;
5807	const u8 *start;
5808
5809	ret = bpf_sock_ops_get_syn(bpf_sock, optname, start: &start);
5810	if (ret > `0`) {
5811	copy_len = ret;
5812	if (optlen < copy_len) {
5813	copy_len = optlen;
5814	ret = -ENOSPC;
5815	}
5816
5817	memcpy(optval, start, copy_len);
5818	}
5819
5820	/ Zero out unused buffer at the end /
5821	memset(optval + copy_len, `0`, optlen - copy_len);
5822
5823	return ret;
5824	}
5825
5826	return _bpf_getsockopt(sk: bpf_sock->sk, level, optname, optval, optlen);
5827	}
5828
5829	static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
5830	.func = bpf_sock_ops_getsockopt,
5831	.gpl_only = false,
5832	.ret_type = RET_INTEGER,
5833	.arg1_type = ARG_PTR_TO_CTX,
5834	.arg2_type = ARG_ANYTHING,
5835	.arg3_type = ARG_ANYTHING,
5836	.arg4_type = ARG_PTR_TO_UNINIT_MEM,
5837	.arg5_type = ARG_CONST_SIZE,
5838	};
5839
5840	BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
5841	int, argval)
5842	{
5843	struct sock *sk = bpf_sock->sk;
5844	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
5845
5846	if (!is_locked_tcp_sock_ops(bpf_sock))
5847	return -EOPNOTSUPP;
5848
5849	if (!IS_ENABLED(CONFIG_INET) \|\| !sk_fullsock(sk))
5850	return -EINVAL;
5851
5852	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
5853
5854	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
5855	}
5856
5857	static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
5858	.func = bpf_sock_ops_cb_flags_set,
5859	.gpl_only = false,
5860	.ret_type = RET_INTEGER,
5861	.arg1_type = ARG_PTR_TO_CTX,
5862	.arg2_type = ARG_ANYTHING,
5863	};
5864
5865	const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
5866	EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
5867
5868	BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern , ctx, struct* sockaddr *, addr,
5869	int, addr_len)
5870	{
5871	#ifdef CONFIG_INET
5872	struct sock *sk = ctx->sk;
5873	u32 flags = BIND_FROM_BPF;
5874	int err;
5875
5876	err = -EINVAL;
5877	if (addr_len < offsetofend(struct sockaddr, sa_family))
5878	return err;
5879	if (addr->sa_family == AF_INET) {
5880	if (addr_len < sizeof(struct sockaddr_in))
5881	return err;
5882	if (((struct sockaddr_in *)addr)->sin_port == htons(`0`))
5883	flags \|= BIND_FORCE_ADDRESS_NO_PORT;
5884	return __inet_bind(sk, uaddr: addr, addr_len, flags);
5885	#if IS_ENABLED(CONFIG_IPV6)
5886	} else if (addr->sa_family == AF_INET6) {
5887	if (addr_len < SIN6_LEN_RFC2133)
5888	return err;
5889	if (((struct sockaddr_in6 *)addr)->sin6_port == htons(`0`))
5890	flags \|= BIND_FORCE_ADDRESS_NO_PORT;
5891	/ ipv6_bpf_stub cannot be NULL, since it's called from*
5892	* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
5893	*/
5894	return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
5895	#endif /* CONFIG_IPV6 */
5896	}
5897	#endif /* CONFIG_INET */
5898
5899	return -EAFNOSUPPORT;
5900	}
5901
5902	static const struct bpf_func_proto bpf_bind_proto = {
5903	.func = bpf_bind,
5904	.gpl_only = false,
5905	.ret_type = RET_INTEGER,
5906	.arg1_type = ARG_PTR_TO_CTX,
5907	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
5908	.arg3_type = ARG_CONST_SIZE,
5909	};
5910
5911	#ifdef CONFIG_XFRM
5912
5913	#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) \|\| \
5914	(IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
5915
5916	struct metadata_dst __percpu *xfrm_bpf_md_dst;
5917	EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
5918
5919	#endif
5920
5921	BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
5922	struct bpf_xfrm_state *, to, u32, size, u64, flags)
5923	{
5924	const struct sec_path *sp = skb_sec_path(skb);
5925	const struct xfrm_state *x;
5926
5927	if (!sp \|\| unlikely(index >= sp->len \|\| flags))
5928	goto err_clear;
5929
5930	x = sp->xvec[index];
5931
5932	if (unlikely(size != sizeof(struct bpf_xfrm_state)))
5933	goto err_clear;
5934
5935	to->reqid = x->props.reqid;
5936	to->spi = x->id.spi;
5937	to->family = x->props.family;
5938	to->ext = `0`;
5939
5940	if (to->family == AF_INET6) {
5941	memcpy(to->remote_ipv6, x->props.saddr.a6,
5942	sizeof(to->remote_ipv6));
5943	} else {
5944	to->remote_ipv4 = x->props.saddr.a4;
5945	memset(&to->remote_ipv6[`1`], `0`, sizeof(__u32) * `3`);
5946	}
5947
5948	return `0`;
5949	err_clear:
5950	memset(to, `0`, size);
5951	return -EINVAL;
5952	}
5953
5954	static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
5955	.func = bpf_skb_get_xfrm_state,
5956	.gpl_only = false,
5957	.ret_type = RET_INTEGER,
5958	.arg1_type = ARG_PTR_TO_CTX,
5959	.arg2_type = ARG_ANYTHING,
5960	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
5961	.arg4_type = ARG_CONST_SIZE,
5962	.arg5_type = ARG_ANYTHING,
5963	};
5964	#endif
5965
5966	#if IS_ENABLED(CONFIG_INET) \|\| IS_ENABLED(CONFIG_IPV6)
5967	static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
5968	{
5969	params->h_vlan_TCI = `0`;
5970	params->h_vlan_proto = `0`;
5971	if (mtu)
5972	params->mtu_result = mtu; / union with tot_len /
5973
5974	return `0`;
5975	}
5976	#endif
5977
5978	#if IS_ENABLED(CONFIG_INET)
5979	static int bpf_ipv4_fib_lookup(struct net net, struct* bpf_fib_lookup *params,
5980	u32 flags, bool check_mtu)
5981	{
5982	struct fib_nh_common *nhc;
5983	struct in_device *in_dev;
5984	struct neighbour *neigh;
5985	struct net_device *dev;
5986	struct fib_result res;
5987	struct flowi4 fl4;
5988	u32 mtu = `0`;
5989	int err;
5990
5991	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
5992	if (unlikely(!dev))
5993	return -ENODEV;
5994
5995	/ verify forwarding is enabled on this interface /
5996	in_dev = __in_dev_get_rcu(dev);
5997	if (unlikely(!in_dev \|\| !IN_DEV_FORWARD(in_dev)))
5998	return BPF_FIB_LKUP_RET_FWD_DISABLED;
5999
6000	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
6001	fl4.flowi4_iif = `1`;
6002	fl4.flowi4_oif = params->ifindex;
6003	} else {
6004	fl4.flowi4_iif = params->ifindex;
6005	fl4.flowi4_oif = `0`;
6006	}
6007	fl4.flowi4_tos = params->tos & INET_DSCP_MASK;
6008	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
6009	fl4.flowi4_flags = `0`;
6010
6011	fl4.flowi4_proto = params->l4_protocol;
6012	fl4.daddr = params->ipv4_dst;
6013	fl4.saddr = params->ipv4_src;
6014	fl4.fl4_sport = params->sport;
6015	fl4.fl4_dport = params->dport;
6016	fl4.flowi4_multipath_hash = `0`;
6017
6018	if (flags & BPF_FIB_LOOKUP_DIRECT) {
6019	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
6020	struct fib_table *tb;
6021
6022	if (flags & BPF_FIB_LOOKUP_TBID) {
6023	tbid = params->tbid;
6024	/ zero out for vlan output /
6025	params->tbid = `0`;
6026	}
6027
6028	tb = fib_get_table(net, id: tbid);
6029	if (unlikely(!tb))
6030	return BPF_FIB_LKUP_RET_NOT_FWDED;
6031
6032	err = fib_table_lookup(tb, flp: &fl4, res: &res, FIB_LOOKUP_NOREF);
6033	} else {
6034	if (flags & BPF_FIB_LOOKUP_MARK)
6035	fl4.flowi4_mark = params->mark;
6036	else
6037	fl4.flowi4_mark = `0`;
6038	fl4.flowi4_secid = `0`;
6039	fl4.flowi4_tun_key.tun_id = `0`;
6040	fl4.flowi4_uid = sock_net_uid(net, NULL);
6041
6042	err = fib_lookup(net, flp: &fl4, res: &res, FIB_LOOKUP_NOREF);
6043	}
6044
6045	if (err) {
6046	/ map fib lookup errors to RTN_ type /
6047	if (err == -EINVAL)
6048	return BPF_FIB_LKUP_RET_BLACKHOLE;
6049	if (err == -EHOSTUNREACH)
6050	return BPF_FIB_LKUP_RET_UNREACHABLE;
6051	if (err == -EACCES)
6052	return BPF_FIB_LKUP_RET_PROHIBIT;
6053
6054	return BPF_FIB_LKUP_RET_NOT_FWDED;
6055	}
6056
6057	if (res.type != RTN_UNICAST)
6058	return BPF_FIB_LKUP_RET_NOT_FWDED;
6059
6060	if (fib_info_num_path(fi: res.fi) > `1`)
6061	fib_select_path(net, res: &res, fl4: &fl4, NULL);
6062
6063	if (check_mtu) {
6064	mtu = ip_mtu_from_fib_result(res: &res, daddr: params->ipv4_dst);
6065	if (params->tot_len > mtu) {
6066	params->mtu_result = mtu; / union with tot_len /
6067	return BPF_FIB_LKUP_RET_FRAG_NEEDED;
6068	}
6069	}
6070
6071	nhc = res.nhc;
6072
6073	/ do not handle lwt encaps right now /
6074	if (nhc->nhc_lwtstate)
6075	return BPF_FIB_LKUP_RET_UNSUPP_LWT;
6076
6077	dev = nhc->nhc_dev;
6078
6079	params->rt_metric = res.fi->fib_priority;
6080	params->ifindex = dev->ifindex;
6081
6082	if (flags & BPF_FIB_LOOKUP_SRC)
6083	params->ipv4_src = fib_result_prefsrc(net, res: &res);
6084
6085	/ xdp and cls_bpf programs are run in RCU-bh so*
6086	* rcu_read_lock_bh is not needed here
6087	*/
6088	if (likely(nhc->nhc_gw_family != AF_INET6)) {
6089	if (nhc->nhc_gw_family)
6090	params->ipv4_dst = nhc->nhc_gw.ipv4;
6091	} else {
6092	struct in6_addr dst = (struct* in6_addr *)params->ipv6_dst;
6093
6094	params->family = AF_INET6;
6095	*dst = nhc->nhc_gw.ipv6;
6096	}
6097
6098	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
6099	goto set_fwd_params;
6100
6101	if (likely(nhc->nhc_gw_family != AF_INET6))
6102	neigh = __ipv4_neigh_lookup_noref(dev,
6103	key: (__force u32)params->ipv4_dst);
6104	else
6105	neigh = __ipv6_neigh_lookup_noref_stub(dev, pkey: params->ipv6_dst);
6106
6107	if (!neigh \|\| !(READ_ONCE(neigh->nud_state) & NUD_VALID))
6108	return BPF_FIB_LKUP_RET_NO_NEIGH;
6109	memcpy(params->dmac, neigh->ha, ETH_ALEN);
6110	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
6111
6112	set_fwd_params:
6113	return bpf_fib_set_fwd_params(params, mtu);
6114	}
6115	#endif
6116
6117	#if IS_ENABLED(CONFIG_IPV6)
6118	static int bpf_ipv6_fib_lookup(struct net net, struct* bpf_fib_lookup *params,
6119	u32 flags, bool check_mtu)
6120	{
6121	struct in6_addr src = (struct* in6_addr *) params->ipv6_src;
6122	struct in6_addr dst = (struct* in6_addr *) params->ipv6_dst;
6123	struct fib6_result res = {};
6124	struct neighbour *neigh;
6125	struct net_device *dev;
6126	struct inet6_dev *idev;
6127	struct flowi6 fl6;
6128	int strict = `0`;
6129	int oif, err;
6130	u32 mtu = `0`;
6131
6132	/ link local addresses are never forwarded /
6133	if (rt6_need_strict(daddr: dst) \|\| rt6_need_strict(daddr: src))
6134	return BPF_FIB_LKUP_RET_NOT_FWDED;
6135
6136	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
6137	if (unlikely(!dev))
6138	return -ENODEV;
6139
6140	idev = __in6_dev_get_safely(dev);
6141	if (unlikely(!idev \|\| !READ_ONCE(idev->cnf.forwarding)))
6142	return BPF_FIB_LKUP_RET_FWD_DISABLED;
6143
6144	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
6145	fl6.flowi6_iif = `1`;
6146	oif = fl6.flowi6_oif = params->ifindex;
6147	} else {
6148	oif = fl6.flowi6_iif = params->ifindex;
6149	fl6.flowi6_oif = `0`;
6150	strict = RT6_LOOKUP_F_HAS_SADDR;
6151	}
6152	fl6.flowlabel = params->flowinfo;
6153	fl6.flowi6_scope = `0`;
6154	fl6.flowi6_flags = `0`;
6155	fl6.mp_hash = `0`;
6156
6157	fl6.flowi6_proto = params->l4_protocol;
6158	fl6.daddr = *dst;
6159	fl6.saddr = *src;
6160	fl6.fl6_sport = params->sport;
6161	fl6.fl6_dport = params->dport;
6162
6163	if (flags & BPF_FIB_LOOKUP_DIRECT) {
6164	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
6165	struct fib6_table *tb;
6166
6167	if (flags & BPF_FIB_LOOKUP_TBID) {
6168	tbid = params->tbid;
6169	/ zero out for vlan output /
6170	params->tbid = `0`;
6171	}
6172
6173	tb = ipv6_stub->fib6_get_table(net, tbid);
6174	if (unlikely(!tb))
6175	return BPF_FIB_LKUP_RET_NOT_FWDED;
6176
6177	err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
6178	strict);
6179	} else {
6180	if (flags & BPF_FIB_LOOKUP_MARK)
6181	fl6.flowi6_mark = params->mark;
6182	else
6183	fl6.flowi6_mark = `0`;
6184	fl6.flowi6_secid = `0`;
6185	fl6.flowi6_tun_key.tun_id = `0`;
6186	fl6.flowi6_uid = sock_net_uid(net, NULL);
6187
6188	err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
6189	}
6190
6191	if (unlikely(err \|\| IS_ERR_OR_NULL(res.f6i) \|\|
6192	res.f6i == net->ipv6.fib6_null_entry))
6193	return BPF_FIB_LKUP_RET_NOT_FWDED;
6194
6195	switch (res.fib6_type) {
6196	/ only unicast is forwarded /
6197	case RTN_UNICAST:
6198	break;
6199	case RTN_BLACKHOLE:
6200	return BPF_FIB_LKUP_RET_BLACKHOLE;
6201	case RTN_UNREACHABLE:
6202	return BPF_FIB_LKUP_RET_UNREACHABLE;
6203	case RTN_PROHIBIT:
6204	return BPF_FIB_LKUP_RET_PROHIBIT;
6205	default:
6206	return BPF_FIB_LKUP_RET_NOT_FWDED;
6207	}
6208
6209	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
6210	fl6.flowi6_oif != `0`, NULL, strict);
6211
6212	if (check_mtu) {
6213	mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
6214	if (params->tot_len > mtu) {
6215	params->mtu_result = mtu; / union with tot_len /
6216	return BPF_FIB_LKUP_RET_FRAG_NEEDED;
6217	}
6218	}
6219
6220	if (res.nh->fib_nh_lws)
6221	return BPF_FIB_LKUP_RET_UNSUPP_LWT;
6222
6223	if (res.nh->fib_nh_gw_family)
6224	*dst = res.nh->fib_nh_gw6;
6225
6226	dev = res.nh->fib_nh_dev;
6227	params->rt_metric = res.f6i->fib6_metric;
6228	params->ifindex = dev->ifindex;
6229
6230	if (flags & BPF_FIB_LOOKUP_SRC) {
6231	if (res.f6i->fib6_prefsrc.plen) {
6232	*src = res.f6i->fib6_prefsrc.addr;
6233	} else {
6234	err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
6235	&fl6.daddr, `0`,
6236	src);
6237	if (err)
6238	return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
6239	}
6240	}
6241
6242	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
6243	goto set_fwd_params;
6244
6245	/ xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is*
6246	* not needed here.
6247	*/
6248	neigh = __ipv6_neigh_lookup_noref_stub(dev, pkey: dst);
6249	if (!neigh \|\| !(READ_ONCE(neigh->nud_state) & NUD_VALID))
6250	return BPF_FIB_LKUP_RET_NO_NEIGH;
6251	memcpy(params->dmac, neigh->ha, ETH_ALEN);
6252	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
6253
6254	set_fwd_params:
6255	return bpf_fib_set_fwd_params(params, mtu);
6256	}
6257	#endif
6258
6259	#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT \| BPF_FIB_LOOKUP_OUTPUT \| \
6260	BPF_FIB_LOOKUP_SKIP_NEIGH \| BPF_FIB_LOOKUP_TBID \| \
6261	BPF_FIB_LOOKUP_SRC \| BPF_FIB_LOOKUP_MARK)
6262
6263	BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
6264	struct bpf_fib_lookup , params, int*, plen, u32, flags)
6265	{
6266	if (plen < sizeof(*params))
6267	return -EINVAL;
6268
6269	if (flags & ~BPF_FIB_LOOKUP_MASK)
6270	return -EINVAL;
6271
6272	switch (params->family) {
6273	#if IS_ENABLED(CONFIG_INET)
6274	case AF_INET:
6275	return bpf_ipv4_fib_lookup(net: dev_net(dev: ctx->rxq->dev), params,
6276	flags, check_mtu: true);
6277	#endif
6278	#if IS_ENABLED(CONFIG_IPV6)
6279	case AF_INET6:
6280	return bpf_ipv6_fib_lookup(net: dev_net(dev: ctx->rxq->dev), params,
6281	flags, check_mtu: true);
6282	#endif
6283	}
6284	return -EAFNOSUPPORT;
6285	}
6286
6287	static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
6288	.func = bpf_xdp_fib_lookup,
6289	.gpl_only = true,
6290	.ret_type = RET_INTEGER,
6291	.arg1_type = ARG_PTR_TO_CTX,
6292	.arg2_type = ARG_PTR_TO_MEM,
6293	.arg3_type = ARG_CONST_SIZE,
6294	.arg4_type = ARG_ANYTHING,
6295	};
6296
6297	BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
6298	struct bpf_fib_lookup , params, int*, plen, u32, flags)
6299	{
6300	struct net *net = dev_net(dev: skb->dev);
6301	int rc = -EAFNOSUPPORT;
6302	bool check_mtu = false;
6303
6304	if (plen < sizeof(*params))
6305	return -EINVAL;
6306
6307	if (flags & ~BPF_FIB_LOOKUP_MASK)
6308	return -EINVAL;
6309
6310	if (params->tot_len)
6311	check_mtu = true;
6312
6313	switch (params->family) {
6314	#if IS_ENABLED(CONFIG_INET)
6315	case AF_INET:
6316	rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
6317	break;
6318	#endif
6319	#if IS_ENABLED(CONFIG_IPV6)
6320	case AF_INET6:
6321	rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
6322	break;
6323	#endif
6324	}
6325
6326	if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
6327	struct net_device *dev;
6328
6329	/ When tot_len isn't provided by user, check skb*
6330	* against MTU of FIB lookup resulting net_device
6331	*/
6332	dev = dev_get_by_index_rcu(net, ifindex: params->ifindex);
6333	if (!is_skb_forwardable(dev, skb))
6334	rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
6335
6336	params->mtu_result = dev->mtu; / union with tot_len /
6337	}
6338
6339	return rc;
6340	}
6341
6342	static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
6343	.func = bpf_skb_fib_lookup,
6344	.gpl_only = true,
6345	.ret_type = RET_INTEGER,
6346	.arg1_type = ARG_PTR_TO_CTX,
6347	.arg2_type = ARG_PTR_TO_MEM,
6348	.arg3_type = ARG_CONST_SIZE,
6349	.arg4_type = ARG_ANYTHING,
6350	};
6351
6352	static struct net_device __dev_via_ifindex(struct* net_device *dev_curr,
6353	u32 ifindex)
6354	{
6355	struct net *netns = dev_net(dev: dev_curr);
6356
6357	/ Non-redirect use-cases can use ifindex=0 and save ifindex lookup /
6358	if (ifindex == `0`)
6359	return dev_curr;
6360
6361	return dev_get_by_index_rcu(net: netns, ifindex);
6362	}
6363
6364	BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
6365	u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6366	{
6367	int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6368	struct net_device *dev = skb->dev;
6369	int mtu, dev_len, skb_len;
6370
6371	if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
6372	return -EINVAL;
6373	if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff \|\| *mtu_len)))
6374	return -EINVAL;
6375
6376	dev = __dev_via_ifindex(dev_curr: dev, ifindex);
6377	if (unlikely(!dev))
6378	return -ENODEV;
6379
6380	mtu = READ_ONCE(dev->mtu);
6381	dev_len = mtu + dev->hard_header_len;
6382
6383	/ If set use mtu_len as input, L3 as iph->tot_len (like fib_lookup) /*
6384	skb_len = mtu_len ? mtu_len + dev->hard_header_len : skb->len;
6385
6386	skb_len += len_diff; / minus result pass check /
6387	if (skb_len <= dev_len) {
6388	ret = BPF_MTU_CHK_RET_SUCCESS;
6389	goto out;
6390	}
6391	/ At this point, skb->len exceed MTU, but as it include length of all*
6392	* segments, it can still be below MTU. The SKB can possibly get
6393	* re-segmented in transmit path (see validate_xmit_skb). Thus, user
6394	* must choose if segs are to be MTU checked.
6395	*/
6396	if (skb_is_gso(skb)) {
6397	ret = BPF_MTU_CHK_RET_SUCCESS;
6398	if (flags & BPF_MTU_CHK_SEGS &&
6399	!skb_gso_validate_network_len(skb, mtu))
6400	ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
6401	}
6402	out:
6403	*mtu_len = mtu;
6404	return ret;
6405	}
6406
6407	BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
6408	u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6409	{
6410	struct net_device *dev = xdp->rxq->dev;
6411	int xdp_len = xdp->data_end - xdp->data;
6412	int ret = BPF_MTU_CHK_RET_SUCCESS;
6413	int mtu, dev_len;
6414
6415	/ XDP variant doesn't support multi-buffer segment check (yet) /
6416	if (unlikely(flags))
6417	return -EINVAL;
6418
6419	dev = __dev_via_ifindex(dev_curr: dev, ifindex);
6420	if (unlikely(!dev))
6421	return -ENODEV;
6422
6423	mtu = READ_ONCE(dev->mtu);
6424	dev_len = mtu + dev->hard_header_len;
6425
6426	/ Use mtu_len as input, L3 as iph->tot_len (like fib_lookup) /*
6427	if (*mtu_len)
6428	xdp_len = *mtu_len + dev->hard_header_len;
6429
6430	xdp_len += len_diff; / minus result pass check /
6431	if (xdp_len > dev_len)
6432	ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6433
6434	*mtu_len = mtu;
6435	return ret;
6436	}
6437
6438	static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
6439	.func = bpf_skb_check_mtu,
6440	.gpl_only = true,
6441	.ret_type = RET_INTEGER,
6442	.arg1_type = ARG_PTR_TO_CTX,
6443	.arg2_type = ARG_ANYTHING,
6444	.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM \| MEM_WRITE \| MEM_ALIGNED,
6445	.arg3_size = sizeof(u32),
6446	.arg4_type = ARG_ANYTHING,
6447	.arg5_type = ARG_ANYTHING,
6448	};
6449
6450	static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
6451	.func = bpf_xdp_check_mtu,
6452	.gpl_only = true,
6453	.ret_type = RET_INTEGER,
6454	.arg1_type = ARG_PTR_TO_CTX,
6455	.arg2_type = ARG_ANYTHING,
6456	.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM \| MEM_WRITE \| MEM_ALIGNED,
6457	.arg3_size = sizeof(u32),
6458	.arg4_type = ARG_ANYTHING,
6459	.arg5_type = ARG_ANYTHING,
6460	};
6461
6462	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6463	static int bpf_push_seg6_encap(struct sk_buff skb, u32 type, void* *hdr, u32 len)
6464	{
6465	int err;
6466	struct ipv6_sr_hdr srh = (struct* ipv6_sr_hdr *)hdr;
6467
6468	if (!seg6_validate_srh(srh, len, reduced: false))
6469	return -EINVAL;
6470
6471	switch (type) {
6472	case BPF_LWT_ENCAP_SEG6_INLINE:
6473	if (skb->protocol != htons(ETH_P_IPV6))
6474	return -EBADMSG;
6475
6476	err = seg6_do_srh_inline(skb, osrh: srh);
6477	break;
6478	case BPF_LWT_ENCAP_SEG6:
6479	skb_reset_inner_headers(skb);
6480	skb->encapsulation = `1`;
6481	err = seg6_do_srh_encap(skb, osrh: srh, IPPROTO_IPV6);
6482	break;
6483	default:
6484	return -EINVAL;
6485	}
6486
6487	bpf_compute_data_pointers(skb);
6488	if (err)
6489	return err;
6490
6491	skb_set_transport_header(skb, offset: sizeof(struct ipv6hdr));
6492
6493	return seg6_lookup_nexthop(skb, NULL, tbl_id: `0`);
6494	}
6495	#endif /* CONFIG_IPV6_SEG6_BPF */
6496
6497	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6498	static int bpf_push_ip_encap(struct sk_buff skb, void* *hdr, u32 len,
6499	bool ingress)
6500	{
6501	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
6502	}
6503	#endif
6504
6505	BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff , skb, u32, type, void* *, hdr,
6506	u32, len)
6507	{
6508	switch (type) {
6509	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6510	case BPF_LWT_ENCAP_SEG6:
6511	case BPF_LWT_ENCAP_SEG6_INLINE:
6512	return bpf_push_seg6_encap(skb, type, hdr, len);
6513	#endif
6514	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6515	case BPF_LWT_ENCAP_IP:
6516	return bpf_push_ip_encap(skb, hdr, len, ingress: true / ingress /);
6517	#endif
6518	default:
6519	return -EINVAL;
6520	}
6521	}
6522
6523	BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
6524	void *, hdr, u32, len)
6525	{
6526	switch (type) {
6527	#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6528	case BPF_LWT_ENCAP_IP:
6529	return bpf_push_ip_encap(skb, hdr, len, ingress: false / egress /);
6530	#endif
6531	default:
6532	return -EINVAL;
6533	}
6534	}
6535
6536	static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
6537	.func = bpf_lwt_in_push_encap,
6538	.gpl_only = false,
6539	.ret_type = RET_INTEGER,
6540	.arg1_type = ARG_PTR_TO_CTX,
6541	.arg2_type = ARG_ANYTHING,
6542	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6543	.arg4_type = ARG_CONST_SIZE
6544	};
6545
6546	static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
6547	.func = bpf_lwt_xmit_push_encap,
6548	.gpl_only = false,
6549	.ret_type = RET_INTEGER,
6550	.arg1_type = ARG_PTR_TO_CTX,
6551	.arg2_type = ARG_ANYTHING,
6552	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6553	.arg4_type = ARG_CONST_SIZE
6554	};
6555
6556	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6557	BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
6558	const void *, from, u32, len)
6559	{
6560	struct seg6_bpf_srh_state *srh_state =
6561	this_cpu_ptr(&seg6_bpf_srh_states);
6562	struct ipv6_sr_hdr *srh = srh_state->srh;
6563	void srh_tlvs, srh_end, *ptr;
6564	int srhoff = `0`;
6565
6566	lockdep_assert_held(&srh_state->bh_lock);
6567	if (srh == NULL)
6568	return -EINVAL;
6569
6570	srh_tlvs = (void )((char* *)srh + ((srh->first_segment + `1`) << `4`));
6571	srh_end = (void )((char* )srh + sizeof(srh) + srh_state->hdrlen);
6572
6573	ptr = skb->data + offset;
6574	if (ptr >= srh_tlvs && ptr + len <= srh_end)
6575	srh_state->valid = false;
6576	else if (ptr < (void *)&srh->flags \|\|
6577	ptr + len > (void *)&srh->segments)
6578	return -EFAULT;
6579
6580	if (unlikely(bpf_try_make_writable(skb, offset + len)))
6581	return -EFAULT;
6582	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`)
6583	return -EINVAL;
6584	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6585
6586	memcpy(skb->data + offset, from, len);
6587	return `0`;
6588	}
6589
6590	static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
6591	.func = bpf_lwt_seg6_store_bytes,
6592	.gpl_only = false,
6593	.ret_type = RET_INTEGER,
6594	.arg1_type = ARG_PTR_TO_CTX,
6595	.arg2_type = ARG_ANYTHING,
6596	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6597	.arg4_type = ARG_CONST_SIZE
6598	};
6599
6600	static void bpf_update_srh_state(struct sk_buff *skb)
6601	{
6602	struct seg6_bpf_srh_state *srh_state =
6603	this_cpu_ptr(&seg6_bpf_srh_states);
6604	int srhoff = `0`;
6605
6606	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`) {
6607	srh_state->srh = NULL;
6608	} else {
6609	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6610	srh_state->hdrlen = srh_state->srh->hdrlen << `3`;
6611	srh_state->valid = true;
6612	}
6613	}
6614
6615	BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
6616	u32, action, void *, param, u32, param_len)
6617	{
6618	struct seg6_bpf_srh_state *srh_state =
6619	this_cpu_ptr(&seg6_bpf_srh_states);
6620	int hdroff = `0`;
6621	int err;
6622
6623	lockdep_assert_held(&srh_state->bh_lock);
6624	switch (action) {
6625	case SEG6_LOCAL_ACTION_END_X:
6626	if (!seg6_bpf_has_valid_srh(skb))
6627	return -EBADMSG;
6628	if (param_len != sizeof(struct in6_addr))
6629	return -EINVAL;
6630	return seg6_lookup_nexthop(skb, nhaddr: (struct in6_addr *)param, tbl_id: `0`);
6631	case SEG6_LOCAL_ACTION_END_T:
6632	if (!seg6_bpf_has_valid_srh(skb))
6633	return -EBADMSG;
6634	if (param_len != sizeof(int))
6635	return -EINVAL;
6636	return seg6_lookup_nexthop(skb, NULL, tbl_id: (int* *)param);
6637	case SEG6_LOCAL_ACTION_END_DT6:
6638	if (!seg6_bpf_has_valid_srh(skb))
6639	return -EBADMSG;
6640	if (param_len != sizeof(int))
6641	return -EINVAL;
6642
6643	if (ipv6_find_hdr(skb, offset: &hdroff, IPPROTO_IPV6, NULL, NULL) < `0`)
6644	return -EBADMSG;
6645	if (!pskb_pull(skb, len: hdroff))
6646	return -EBADMSG;
6647
6648	skb_postpull_rcsum(skb, start: skb_network_header(skb), len: hdroff);
6649	skb_reset_network_header(skb);
6650	skb_reset_transport_header(skb);
6651	skb->encapsulation = `0`;
6652
6653	bpf_compute_data_pointers(skb);
6654	bpf_update_srh_state(skb);
6655	return seg6_lookup_nexthop(skb, NULL, tbl_id: (int* *)param);
6656	case SEG6_LOCAL_ACTION_END_B6:
6657	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6658	return -EBADMSG;
6659	err = bpf_push_seg6_encap(skb, type: BPF_LWT_ENCAP_SEG6_INLINE,
6660	hdr: param, len: param_len);
6661	if (!err)
6662	bpf_update_srh_state(skb);
6663
6664	return err;
6665	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
6666	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6667	return -EBADMSG;
6668	err = bpf_push_seg6_encap(skb, type: BPF_LWT_ENCAP_SEG6,
6669	hdr: param, len: param_len);
6670	if (!err)
6671	bpf_update_srh_state(skb);
6672
6673	return err;
6674	default:
6675	return -EINVAL;
6676	}
6677	}
6678
6679	static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
6680	.func = bpf_lwt_seg6_action,
6681	.gpl_only = false,
6682	.ret_type = RET_INTEGER,
6683	.arg1_type = ARG_PTR_TO_CTX,
6684	.arg2_type = ARG_ANYTHING,
6685	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6686	.arg4_type = ARG_CONST_SIZE
6687	};
6688
6689	BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
6690	s32, len)
6691	{
6692	struct seg6_bpf_srh_state *srh_state =
6693	this_cpu_ptr(&seg6_bpf_srh_states);
6694	struct ipv6_sr_hdr *srh = srh_state->srh;
6695	void srh_end, srh_tlvs, *ptr;
6696	struct ipv6hdr *hdr;
6697	int srhoff = `0`;
6698	int ret;
6699
6700	lockdep_assert_held(&srh_state->bh_lock);
6701	if (unlikely(srh == NULL))
6702	return -EINVAL;
6703
6704	srh_tlvs = (void )((unsigned* char )srh + sizeof(srh) +
6705	((srh->first_segment + `1`) << `4`));
6706	srh_end = (void )((unsigned* char )srh + sizeof(srh) +
6707	srh_state->hdrlen);
6708	ptr = skb->data + offset;
6709
6710	if (unlikely(ptr < srh_tlvs \|\| ptr > srh_end))
6711	return -EFAULT;
6712	if (unlikely(len < `0` && (void )((char* *)ptr - len) > srh_end))
6713	return -EFAULT;
6714
6715	if (len > `0`) {
6716	ret = skb_cow_head(skb, headroom: len);
6717	if (unlikely(ret < `0`))
6718	return ret;
6719
6720	ret = bpf_skb_net_hdr_push(skb, off: offset, len);
6721	} else {
6722	ret = bpf_skb_net_hdr_pop(skb, off: offset, len: -`1` * len);
6723	}
6724
6725	bpf_compute_data_pointers(skb);
6726	if (unlikely(ret < `0`))
6727	return ret;
6728
6729	hdr = (struct ipv6hdr *)skb->data;
6730	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
6731
6732	if (ipv6_find_hdr(skb, offset: &srhoff, IPPROTO_ROUTING, NULL, NULL) < `0`)
6733	return -EINVAL;
6734	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6735	srh_state->hdrlen += len;
6736	srh_state->valid = false;
6737	return `0`;
6738	}
6739
6740	static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
6741	.func = bpf_lwt_seg6_adjust_srh,
6742	.gpl_only = false,
6743	.ret_type = RET_INTEGER,
6744	.arg1_type = ARG_PTR_TO_CTX,
6745	.arg2_type = ARG_ANYTHING,
6746	.arg3_type = ARG_ANYTHING,
6747	};
6748	#endif /* CONFIG_IPV6_SEG6_BPF */
6749
6750	#ifdef CONFIG_INET
6751	static struct sock sk_lookup(struct* net net, struct* bpf_sock_tuple *tuple,
6752	int dif, int sdif, u8 family, u8 proto)
6753	{
6754	struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
6755	bool refcounted = false;
6756	struct sock *sk = NULL;
6757
6758	if (family == AF_INET) {
6759	__be32 src4 = tuple->ipv4.saddr;
6760	__be32 dst4 = tuple->ipv4.daddr;
6761
6762	if (proto == IPPROTO_TCP)
6763	sk = __inet_lookup(net, hashinfo: hinfo, NULL, doff: `0`,
6764	saddr: src4, sport: tuple->ipv4.sport,
6765	daddr: dst4, dport: tuple->ipv4.dport,
6766	dif, sdif, refcounted: &refcounted);
6767	else
6768	sk = __udp4_lib_lookup(net, saddr: src4, sport: tuple->ipv4.sport,
6769	daddr: dst4, dport: tuple->ipv4.dport,
6770	dif, sdif, tbl: net->ipv4.udp_table, NULL);
6771	#if IS_ENABLED(CONFIG_IPV6)
6772	} else {
6773	struct in6_addr src6 = (struct* in6_addr *)&tuple->ipv6.saddr;
6774	struct in6_addr dst6 = (struct* in6_addr *)&tuple->ipv6.daddr;
6775
6776	if (proto == IPPROTO_TCP)
6777	sk = __inet6_lookup(net, hashinfo: hinfo, NULL, doff: `0`,
6778	saddr: src6, sport: tuple->ipv6.sport,
6779	daddr: dst6, ntohs(tuple->ipv6.dport),
6780	dif, sdif, refcounted: &refcounted);
6781	else if (likely(ipv6_bpf_stub))
6782	sk = ipv6_bpf_stub->udp6_lib_lookup(net,
6783	src6, tuple->ipv6.sport,
6784	dst6, tuple->ipv6.dport,
6785	dif, sdif,
6786	net->ipv4.udp_table, NULL);
6787	#endif
6788	}
6789
6790	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
6791	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6792	sk = NULL;
6793	}
6794	return sk;
6795	}
6796
6797	/ bpf_skc_lookup performs the core lookup for different types of sockets,*
6798	* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
6799	*/
6800	static struct sock *
6801	__bpf_skc_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6802	struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6803	u64 flags, int sdif)
6804	{
6805	struct sock *sk = NULL;
6806	struct net *net;
6807	u8 family;
6808
6809	if (len == sizeof(tuple->ipv4))
6810	family = AF_INET;
6811	else if (len == sizeof(tuple->ipv6))
6812	family = AF_INET6;
6813	else
6814	return NULL;
6815
6816	if (unlikely(flags \|\| !((s32)netns_id < `0` \|\| netns_id <= S32_MAX)))
6817	goto out;
6818
6819	if (sdif < `0`) {
6820	if (family == AF_INET)
6821	sdif = inet_sdif(skb);
6822	else
6823	sdif = inet6_sdif(skb);
6824	}
6825
6826	if ((s32)netns_id < `0`) {
6827	net = caller_net;
6828	sk = sk_lookup(net, tuple, dif: ifindex, sdif, family, proto);
6829	} else {
6830	net = get_net_ns_by_id(net: caller_net, id: netns_id);
6831	if (unlikely(!net))
6832	goto out;
6833	sk = sk_lookup(net, tuple, dif: ifindex, sdif, family, proto);
6834	put_net(net);
6835	}
6836
6837	out:
6838	return sk;
6839	}
6840
6841	static struct sock *
6842	__bpf_sk_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6843	struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6844	u64 flags, int sdif)
6845	{
6846	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
6847	ifindex, proto, netns_id, flags,
6848	sdif);
6849
6850	if (sk) {
6851	struct sock *sk2 = sk_to_full_sk(sk);
6852
6853	/ sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk*
6854	* sock refcnt is decremented to prevent a request_sock leak.
6855	*/
6856	if (sk2 != sk) {
6857	sock_gen_put(sk);
6858	/ Ensure there is no need to bump sk2 refcnt /
6859	if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6860	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6861	return NULL;
6862	}
6863	sk = sk2;
6864	}
6865	}
6866
6867	return sk;
6868	}
6869
6870	static struct sock *
6871	bpf_skc_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6872	u8 proto, u64 netns_id, u64 flags)
6873	{
6874	struct net *caller_net;
6875	int ifindex;
6876
6877	if (skb->dev) {
6878	caller_net = dev_net(dev: skb->dev);
6879	ifindex = skb->dev->ifindex;
6880	} else {
6881	caller_net = sock_net(sk: skb->sk);
6882	ifindex = `0`;
6883	}
6884
6885	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
6886	netns_id, flags, sdif: -`1`);
6887	}
6888
6889	static struct sock *
6890	bpf_sk_lookup(struct sk_buff skb, struct* bpf_sock_tuple *tuple, u32 len,
6891	u8 proto, u64 netns_id, u64 flags)
6892	{
6893	struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
6894	flags);
6895
6896	if (sk) {
6897	struct sock *sk2 = sk_to_full_sk(sk);
6898
6899	/ sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk*
6900	* sock refcnt is decremented to prevent a request_sock leak.
6901	*/
6902	if (sk2 != sk) {
6903	sock_gen_put(sk);
6904	/ Ensure there is no need to bump sk2 refcnt /
6905	if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6906	WARN_ONCE(`1`, "Found non-RCU, unreferenced socket!");
6907	return NULL;
6908	}
6909	sk = sk2;
6910	}
6911	}
6912
6913	return sk;
6914	}
6915
6916	BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
6917	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6918	{
6919	return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
6920	netns_id, flags);
6921	}
6922
6923	static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
6924	.func = bpf_skc_lookup_tcp,
6925	.gpl_only = false,
6926	.pkt_access = true,
6927	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6928	.arg1_type = ARG_PTR_TO_CTX,
6929	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6930	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
6931	.arg4_type = ARG_ANYTHING,
6932	.arg5_type = ARG_ANYTHING,
6933	};
6934
6935	BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
6936	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6937	{
6938	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
6939	netns_id, flags);
6940	}
6941
6942	static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
6943	.func = bpf_sk_lookup_tcp,
6944	.gpl_only = false,
6945	.pkt_access = true,
6946	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6947	.arg1_type = ARG_PTR_TO_CTX,
6948	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6949	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
6950	.arg4_type = ARG_ANYTHING,
6951	.arg5_type = ARG_ANYTHING,
6952	};
6953
6954	BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
6955	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6956	{
6957	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
6958	netns_id, flags);
6959	}
6960
6961	static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
6962	.func = bpf_sk_lookup_udp,
6963	.gpl_only = false,
6964	.pkt_access = true,
6965	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6966	.arg1_type = ARG_PTR_TO_CTX,
6967	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6968	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
6969	.arg4_type = ARG_ANYTHING,
6970	.arg5_type = ARG_ANYTHING,
6971	};
6972
6973	BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
6974	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6975	{
6976	struct net_device *dev = skb->dev;
6977	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
6978	struct net *caller_net = dev_net(dev);
6979
6980	return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
6981	ifindex, IPPROTO_TCP, netns_id,
6982	flags, sdif);
6983	}
6984
6985	static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
6986	.func = bpf_tc_skc_lookup_tcp,
6987	.gpl_only = false,
6988	.pkt_access = true,
6989	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6990	.arg1_type = ARG_PTR_TO_CTX,
6991	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
6992	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
6993	.arg4_type = ARG_ANYTHING,
6994	.arg5_type = ARG_ANYTHING,
6995	};
6996
6997	BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
6998	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6999	{
7000	struct net_device *dev = skb->dev;
7001	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
7002	struct net *caller_net = dev_net(dev);
7003
7004	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
7005	ifindex, IPPROTO_TCP, netns_id,
7006	flags, sdif);
7007	}
7008
7009	static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
7010	.func = bpf_tc_sk_lookup_tcp,
7011	.gpl_only = false,
7012	.pkt_access = true,
7013	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7014	.arg1_type = ARG_PTR_TO_CTX,
7015	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7016	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7017	.arg4_type = ARG_ANYTHING,
7018	.arg5_type = ARG_ANYTHING,
7019	};
7020
7021	BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
7022	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
7023	{
7024	struct net_device *dev = skb->dev;
7025	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
7026	struct net *caller_net = dev_net(dev);
7027
7028	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
7029	ifindex, IPPROTO_UDP, netns_id,
7030	flags, sdif);
7031	}
7032
7033	static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
7034	.func = bpf_tc_sk_lookup_udp,
7035	.gpl_only = false,
7036	.pkt_access = true,
7037	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7038	.arg1_type = ARG_PTR_TO_CTX,
7039	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7040	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7041	.arg4_type = ARG_ANYTHING,
7042	.arg5_type = ARG_ANYTHING,
7043	};
7044
7045	BPF_CALL_1(bpf_sk_release, struct sock *, sk)
7046	{
7047	if (sk && sk_is_refcounted(sk))
7048	sock_gen_put(sk);
7049	return `0`;
7050	}
7051
7052	static const struct bpf_func_proto bpf_sk_release_proto = {
7053	.func = bpf_sk_release,
7054	.gpl_only = false,
7055	.ret_type = RET_INTEGER,
7056	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON \| OBJ_RELEASE,
7057	};
7058
7059	BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
7060	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
7061	{
7062	struct net_device *dev = ctx->rxq->dev;
7063	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
7064	struct net *caller_net = dev_net(dev);
7065
7066	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
7067	ifindex, IPPROTO_UDP, netns_id,
7068	flags, sdif);
7069	}
7070
7071	static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
7072	.func = bpf_xdp_sk_lookup_udp,
7073	.gpl_only = false,
7074	.pkt_access = true,
7075	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7076	.arg1_type = ARG_PTR_TO_CTX,
7077	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7078	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7079	.arg4_type = ARG_ANYTHING,
7080	.arg5_type = ARG_ANYTHING,
7081	};
7082
7083	BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
7084	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
7085	{
7086	struct net_device *dev = ctx->rxq->dev;
7087	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
7088	struct net *caller_net = dev_net(dev);
7089
7090	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
7091	ifindex, IPPROTO_TCP, netns_id,
7092	flags, sdif);
7093	}
7094
7095	static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
7096	.func = bpf_xdp_skc_lookup_tcp,
7097	.gpl_only = false,
7098	.pkt_access = true,
7099	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
7100	.arg1_type = ARG_PTR_TO_CTX,
7101	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7102	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7103	.arg4_type = ARG_ANYTHING,
7104	.arg5_type = ARG_ANYTHING,
7105	};
7106
7107	BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
7108	struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
7109	{
7110	struct net_device *dev = ctx->rxq->dev;
7111	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
7112	struct net *caller_net = dev_net(dev);
7113
7114	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
7115	ifindex, IPPROTO_TCP, netns_id,
7116	flags, sdif);
7117	}
7118
7119	static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
7120	.func = bpf_xdp_sk_lookup_tcp,
7121	.gpl_only = false,
7122	.pkt_access = true,
7123	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7124	.arg1_type = ARG_PTR_TO_CTX,
7125	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7126	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7127	.arg4_type = ARG_ANYTHING,
7128	.arg5_type = ARG_ANYTHING,
7129	};
7130
7131	BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
7132	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
7133	{
7134	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
7135	caller_net: sock_net(sk: ctx->sk), ifindex: `0`,
7136	IPPROTO_TCP, netns_id, flags,
7137	sdif: -`1`);
7138	}
7139
7140	static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
7141	.func = bpf_sock_addr_skc_lookup_tcp,
7142	.gpl_only = false,
7143	.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
7144	.arg1_type = ARG_PTR_TO_CTX,
7145	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7146	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7147	.arg4_type = ARG_ANYTHING,
7148	.arg5_type = ARG_ANYTHING,
7149	};
7150
7151	BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
7152	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
7153	{
7154	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
7155	caller_net: sock_net(sk: ctx->sk), ifindex: `0`, IPPROTO_TCP,
7156	netns_id, flags, sdif: -`1`);
7157	}
7158
7159	static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
7160	.func = bpf_sock_addr_sk_lookup_tcp,
7161	.gpl_only = false,
7162	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7163	.arg1_type = ARG_PTR_TO_CTX,
7164	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7165	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7166	.arg4_type = ARG_ANYTHING,
7167	.arg5_type = ARG_ANYTHING,
7168	};
7169
7170	BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
7171	struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
7172	{
7173	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
7174	caller_net: sock_net(sk: ctx->sk), ifindex: `0`, IPPROTO_UDP,
7175	netns_id, flags, sdif: -`1`);
7176	}
7177
7178	static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
7179	.func = bpf_sock_addr_sk_lookup_udp,
7180	.gpl_only = false,
7181	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7182	.arg1_type = ARG_PTR_TO_CTX,
7183	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7184	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7185	.arg4_type = ARG_ANYTHING,
7186	.arg5_type = ARG_ANYTHING,
7187	};
7188
7189	bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
7190	struct bpf_insn_access_aux *info)
7191	{
7192	if (off < `0` \|\| off >= offsetofend(struct bpf_tcp_sock,
7193	icsk_retransmits))
7194	return false;
7195
7196	if (off % size != `0`)
7197	return false;
7198
7199	switch (off) {
7200	case offsetof(struct bpf_tcp_sock, bytes_received):
7201	case offsetof(struct bpf_tcp_sock, bytes_acked):
7202	return size == sizeof(__u64);
7203	default:
7204	return size == sizeof(__u32);
7205	}
7206	}
7207
7208	u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
7209	const struct bpf_insn *si,
7210	struct bpf_insn *insn_buf,
7211	struct bpf_prog prog, u32 target_size)
7212	{
7213	struct bpf_insn *insn = insn_buf;
7214
7215	#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
7216	do { \
7217	BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
7218	sizeof_field(struct bpf_tcp_sock, FIELD)); \
7219	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
7220	si->dst_reg, si->src_reg, \
7221	offsetof(struct tcp_sock, FIELD)); \
7222	} while (0)
7223
7224	#define BPF_INET_SOCK_GET_COMMON(FIELD) \
7225	do { \
7226	BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
7227	FIELD) > \
7228	sizeof_field(struct bpf_tcp_sock, FIELD)); \
7229	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7230	struct inet_connection_sock, \
7231	FIELD), \
7232	si->dst_reg, si->src_reg, \
7233	offsetof( \
7234	struct inet_connection_sock, \
7235	FIELD)); \
7236	} while (0)
7237
7238	BTF_TYPE_EMIT(struct bpf_tcp_sock);
7239
7240	switch (si->off) {
7241	case offsetof(struct bpf_tcp_sock, rtt_min):
7242	BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
7243	sizeof(struct minmax));
7244	BUILD_BUG_ON(sizeof(struct minmax) <
7245	sizeof(struct minmax_sample));
7246
7247	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7248	offsetof(struct tcp_sock, rtt_min) +
7249	offsetof(struct minmax_sample, v));
7250	break;
7251	case offsetof(struct bpf_tcp_sock, snd_cwnd):
7252	BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
7253	break;
7254	case offsetof(struct bpf_tcp_sock, srtt_us):
7255	BPF_TCP_SOCK_GET_COMMON(srtt_us);
7256	break;
7257	case offsetof(struct bpf_tcp_sock, snd_ssthresh):
7258	BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
7259	break;
7260	case offsetof(struct bpf_tcp_sock, rcv_nxt):
7261	BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
7262	break;
7263	case offsetof(struct bpf_tcp_sock, snd_nxt):
7264	BPF_TCP_SOCK_GET_COMMON(snd_nxt);
7265	break;
7266	case offsetof(struct bpf_tcp_sock, snd_una):
7267	BPF_TCP_SOCK_GET_COMMON(snd_una);
7268	break;
7269	case offsetof(struct bpf_tcp_sock, mss_cache):
7270	BPF_TCP_SOCK_GET_COMMON(mss_cache);
7271	break;
7272	case offsetof(struct bpf_tcp_sock, ecn_flags):
7273	BPF_TCP_SOCK_GET_COMMON(ecn_flags);
7274	break;
7275	case offsetof(struct bpf_tcp_sock, rate_delivered):
7276	BPF_TCP_SOCK_GET_COMMON(rate_delivered);
7277	break;
7278	case offsetof(struct bpf_tcp_sock, rate_interval_us):
7279	BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
7280	break;
7281	case offsetof(struct bpf_tcp_sock, packets_out):
7282	BPF_TCP_SOCK_GET_COMMON(packets_out);
7283	break;
7284	case offsetof(struct bpf_tcp_sock, retrans_out):
7285	BPF_TCP_SOCK_GET_COMMON(retrans_out);
7286	break;
7287	case offsetof(struct bpf_tcp_sock, total_retrans):
7288	BPF_TCP_SOCK_GET_COMMON(total_retrans);
7289	break;
7290	case offsetof(struct bpf_tcp_sock, segs_in):
7291	BPF_TCP_SOCK_GET_COMMON(segs_in);
7292	break;
7293	case offsetof(struct bpf_tcp_sock, data_segs_in):
7294	BPF_TCP_SOCK_GET_COMMON(data_segs_in);
7295	break;
7296	case offsetof(struct bpf_tcp_sock, segs_out):
7297	BPF_TCP_SOCK_GET_COMMON(segs_out);
7298	break;
7299	case offsetof(struct bpf_tcp_sock, data_segs_out):
7300	BPF_TCP_SOCK_GET_COMMON(data_segs_out);
7301	break;
7302	case offsetof(struct bpf_tcp_sock, lost_out):
7303	BPF_TCP_SOCK_GET_COMMON(lost_out);
7304	break;
7305	case offsetof(struct bpf_tcp_sock, sacked_out):
7306	BPF_TCP_SOCK_GET_COMMON(sacked_out);
7307	break;
7308	case offsetof(struct bpf_tcp_sock, bytes_received):
7309	BPF_TCP_SOCK_GET_COMMON(bytes_received);
7310	break;
7311	case offsetof(struct bpf_tcp_sock, bytes_acked):
7312	BPF_TCP_SOCK_GET_COMMON(bytes_acked);
7313	break;
7314	case offsetof(struct bpf_tcp_sock, dsack_dups):
7315	BPF_TCP_SOCK_GET_COMMON(dsack_dups);
7316	break;
7317	case offsetof(struct bpf_tcp_sock, delivered):
7318	BPF_TCP_SOCK_GET_COMMON(delivered);
7319	break;
7320	case offsetof(struct bpf_tcp_sock, delivered_ce):
7321	BPF_TCP_SOCK_GET_COMMON(delivered_ce);
7322	break;
7323	case offsetof(struct bpf_tcp_sock, icsk_retransmits):
7324	BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
7325	break;
7326	}
7327
7328	return insn - insn_buf;
7329	}
7330
7331	BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
7332	{
7333	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
7334	return (unsigned long)sk;
7335
7336	return (unsigned long)NULL;
7337	}
7338
7339	const struct bpf_func_proto bpf_tcp_sock_proto = {
7340	.func = bpf_tcp_sock,
7341	.gpl_only = false,
7342	.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
7343	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
7344	};
7345
7346	BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
7347	{
7348	sk = sk_to_full_sk(sk);
7349
7350	if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, flag: SOCK_RCU_FREE))
7351	return (unsigned long)sk;
7352
7353	return (unsigned long)NULL;
7354	}
7355
7356	static const struct bpf_func_proto bpf_get_listener_sock_proto = {
7357	.func = bpf_get_listener_sock,
7358	.gpl_only = false,
7359	.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
7360	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
7361	};
7362
7363	BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
7364	{
7365	unsigned int iphdr_len;
7366
7367	switch (skb_protocol(skb, skip_vlan: true)) {
7368	case cpu_to_be16(ETH_P_IP):
7369	iphdr_len = sizeof(struct iphdr);
7370	break;
7371	case cpu_to_be16(ETH_P_IPV6):
7372	iphdr_len = sizeof(struct ipv6hdr);
7373	break;
7374	default:
7375	return `0`;
7376	}
7377
7378	if (skb_headlen(skb) < iphdr_len)
7379	return `0`;
7380
7381	if (skb_cloned(skb) && !skb_clone_writable(skb, len: iphdr_len))
7382	return `0`;
7383
7384	return INET_ECN_set_ce(skb);
7385	}
7386
7387	bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
7388	struct bpf_insn_access_aux *info)
7389	{
7390	if (off < `0` \|\| off >= offsetofend(struct bpf_xdp_sock, queue_id))
7391	return false;
7392
7393	if (off % size != `0`)
7394	return false;
7395
7396	switch (off) {
7397	default:
7398	return size == sizeof(__u32);
7399	}
7400	}
7401
7402	u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
7403	const struct bpf_insn *si,
7404	struct bpf_insn *insn_buf,
7405	struct bpf_prog prog, u32 target_size)
7406	{
7407	struct bpf_insn *insn = insn_buf;
7408
7409	#define BPF_XDP_SOCK_GET(FIELD) \
7410	do { \
7411	BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
7412	sizeof_field(struct bpf_xdp_sock, FIELD)); \
7413	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
7414	si->dst_reg, si->src_reg, \
7415	offsetof(struct xdp_sock, FIELD)); \
7416	} while (0)
7417
7418	switch (si->off) {
7419	case offsetof(struct bpf_xdp_sock, queue_id):
7420	BPF_XDP_SOCK_GET(queue_id);
7421	break;
7422	}
7423
7424	return insn - insn_buf;
7425	}
7426
7427	static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
7428	.func = bpf_skb_ecn_set_ce,
7429	.gpl_only = false,
7430	.ret_type = RET_INTEGER,
7431	.arg1_type = ARG_PTR_TO_CTX,
7432	};
7433
7434	BPF_CALL_5(bpf_tcp_check_syncookie, struct sock , sk, void* *, iph, u32, iph_len,
7435	struct tcphdr *, th, u32, th_len)
7436	{
7437	#ifdef CONFIG_SYN_COOKIES
7438	int ret;
7439
7440	if (unlikely(!sk \|\| th_len < sizeof(*th)))
7441	return -EINVAL;
7442
7443	/ sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. /
7444	if (sk->sk_protocol != IPPROTO_TCP \|\| sk->sk_state != TCP_LISTEN)
7445	return -EINVAL;
7446
7447	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
7448	return -EINVAL;
7449
7450	if (!th->ack \|\| th->rst \|\| th->syn)
7451	return -ENOENT;
7452
7453	if (unlikely(iph_len < sizeof(struct iphdr)))
7454	return -EINVAL;
7455
7456	if (tcp_synq_no_recent_overflow(sk))
7457	return -ENOENT;
7458
7459	/ Both struct iphdr and struct ipv6hdr have the version field at the*
7460	* same offset so we can cast to the shorter header (struct iphdr).
7461	*/
7462	switch (((struct iphdr *)iph)->version) {
7463	case `4`:
7464	if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
7465	return -EINVAL;
7466
7467	ret = __cookie_v4_check(iph: (struct iphdr *)iph, th);
7468	break;
7469
7470	#if IS_BUILTIN(CONFIG_IPV6)
7471	case `6`:
7472	if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7473	return -EINVAL;
7474
7475	if (sk->sk_family != AF_INET6)
7476	return -EINVAL;
7477
7478	ret = __cookie_v6_check(iph: (struct ipv6hdr *)iph, th);
7479	break;
7480	#endif /* CONFIG_IPV6 */
7481
7482	default:
7483	return -EPROTONOSUPPORT;
7484	}
7485
7486	if (ret > `0`)
7487	return `0`;
7488
7489	return -ENOENT;
7490	#else
7491	return -ENOTSUPP;
7492	#endif
7493	}
7494
7495	static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
7496	.func = bpf_tcp_check_syncookie,
7497	.gpl_only = true,
7498	.pkt_access = true,
7499	.ret_type = RET_INTEGER,
7500	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7501	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7502	.arg3_type = ARG_CONST_SIZE,
7503	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7504	.arg5_type = ARG_CONST_SIZE,
7505	};
7506
7507	BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock , sk, void* *, iph, u32, iph_len,
7508	struct tcphdr *, th, u32, th_len)
7509	{
7510	#ifdef CONFIG_SYN_COOKIES
7511	u32 cookie;
7512	u16 mss;
7513
7514	if (unlikely(!sk \|\| th_len < sizeof(th) \|\| th_len != th->doff `4`))
7515	return -EINVAL;
7516
7517	if (sk->sk_protocol != IPPROTO_TCP \|\| sk->sk_state != TCP_LISTEN)
7518	return -EINVAL;
7519
7520	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
7521	return -ENOENT;
7522
7523	if (!th->syn \|\| th->ack \|\| th->fin \|\| th->rst)
7524	return -EINVAL;
7525
7526	if (unlikely(iph_len < sizeof(struct iphdr)))
7527	return -EINVAL;
7528
7529	/ Both struct iphdr and struct ipv6hdr have the version field at the*
7530	* same offset so we can cast to the shorter header (struct iphdr).
7531	*/
7532	switch (((struct iphdr *)iph)->version) {
7533	case `4`:
7534	if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
7535	return -EINVAL;
7536
7537	mss = tcp_v4_get_syncookie(sk, iph, th, cookie: &cookie);
7538	break;
7539
7540	#if IS_BUILTIN(CONFIG_IPV6)
7541	case `6`:
7542	if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7543	return -EINVAL;
7544
7545	if (sk->sk_family != AF_INET6)
7546	return -EINVAL;
7547
7548	mss = tcp_v6_get_syncookie(sk, iph, th, cookie: &cookie);
7549	break;
7550	#endif /* CONFIG_IPV6 */
7551
7552	default:
7553	return -EPROTONOSUPPORT;
7554	}
7555	if (mss == `0`)
7556	return -ENOENT;
7557
7558	return cookie \| ((u64)mss << `32`);
7559	#else
7560	return -EOPNOTSUPP;
7561	#endif /* CONFIG_SYN_COOKIES */
7562	}
7563
7564	static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
7565	.func = bpf_tcp_gen_syncookie,
7566	.gpl_only = true, / __cookie_v_init_sequence() is GPL /*
7567	.pkt_access = true,
7568	.ret_type = RET_INTEGER,
7569	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7570	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7571	.arg3_type = ARG_CONST_SIZE,
7572	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7573	.arg5_type = ARG_CONST_SIZE,
7574	};
7575
7576	BPF_CALL_3(bpf_sk_assign, struct sk_buff , skb, struct* sock *, sk, u64, flags)
7577	{
7578	if (!sk \|\| flags != `0`)
7579	return -EINVAL;
7580	if (!skb_at_tc_ingress(skb))
7581	return -EOPNOTSUPP;
7582	if (unlikely(dev_net(skb->dev) != sock_net(sk)))
7583	return -ENETUNREACH;
7584	if (sk_unhashed(sk))
7585	return -EOPNOTSUPP;
7586	if (sk_is_refcounted(sk) &&
7587	unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
7588	return -ENOENT;
7589
7590	skb_orphan(skb);
7591	skb->sk = sk;
7592	skb->destructor = sock_pfree;
7593
7594	return `0`;
7595	}
7596
7597	static const struct bpf_func_proto bpf_sk_assign_proto = {
7598	.func = bpf_sk_assign,
7599	.gpl_only = false,
7600	.ret_type = RET_INTEGER,
7601	.arg1_type = ARG_PTR_TO_CTX,
7602	.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
7603	.arg3_type = ARG_ANYTHING,
7604	};
7605
7606	static const u8 bpf_search_tcp_opt(const* u8 op, const* u8 *opend,
7607	u8 search_kind, const u8 *magic,
7608	u8 magic_len, bool *eol)
7609	{
7610	u8 kind, kind_len;
7611
7612	*eol = false;
7613
7614	while (op < opend) {
7615	kind = op[`0`];
7616
7617	if (kind == TCPOPT_EOL) {
7618	*eol = true;
7619	return ERR_PTR(error: -ENOMSG);
7620	} else if (kind == TCPOPT_NOP) {
7621	op++;
7622	continue;
7623	}
7624
7625	if (opend - op < `2` \|\| opend - op < op[`1`] \|\| op[`1`] < `2`)
7626	/ Something is wrong in the received header.*
7627	* Follow the TCP stack's tcp_parse_options()
7628	* and just bail here.
7629	*/
7630	return ERR_PTR(error: -EFAULT);
7631
7632	kind_len = op[`1`];
7633	if (search_kind == kind) {
7634	if (!magic_len)
7635	return op;
7636
7637	if (magic_len > kind_len - `2`)
7638	return ERR_PTR(error: -ENOMSG);
7639
7640	if (!memcmp(p: &op[`2`], q: magic, size: magic_len))
7641	return op;
7642	}
7643
7644	op += kind_len;
7645	}
7646
7647	return ERR_PTR(error: -ENOMSG);
7648	}
7649
7650	BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7651	void *, search_res, u32, len, u64, flags)
7652	{
7653	bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
7654	const u8 op, opend, magic, search = search_res;
7655	u8 search_kind, search_len, copy_len, magic_len;
7656	int ret;
7657
7658	if (!is_locked_tcp_sock_ops(bpf_sock))
7659	return -EOPNOTSUPP;
7660
7661	/ 2 byte is the minimal option len except TCPOPT_NOP and*
7662	* TCPOPT_EOL which are useless for the bpf prog to learn
7663	* and this helper disallow loading them also.
7664	*/
7665	if (len < `2` \|\| flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
7666	return -EINVAL;
7667
7668	search_kind = search[`0`];
7669	search_len = search[`1`];
7670
7671	if (search_len > len \|\| search_kind == TCPOPT_NOP \|\|
7672	search_kind == TCPOPT_EOL)
7673	return -EINVAL;
7674
7675	if (search_kind == TCPOPT_EXP \|\| search_kind == `253`) {
7676	/ 16 or 32 bit magic. +2 for kind and kind length /
7677	if (search_len != `4` && search_len != `6`)
7678	return -EINVAL;
7679	magic = &search[`2`];
7680	magic_len = search_len - `2`;
7681	} else {
7682	if (search_len)
7683	return -EINVAL;
7684	magic = NULL;
7685	magic_len = `0`;
7686	}
7687
7688	if (load_syn) {
7689	ret = bpf_sock_ops_get_syn(bpf_sock, optname: TCP_BPF_SYN, start: &op);
7690	if (ret < `0`)
7691	return ret;
7692
7693	opend = op + ret;
7694	op += sizeof(struct tcphdr);
7695	} else {
7696	if (!bpf_sock->skb \|\|
7697	bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7698	/ This bpf_sock->op cannot call this helper /
7699	return -EPERM;
7700
7701	opend = bpf_sock->skb_data_end;
7702	op = bpf_sock->skb->data + sizeof(struct tcphdr);
7703	}
7704
7705	op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
7706	eol: &eol);
7707	if (IS_ERR(ptr: op))
7708	return PTR_ERR(ptr: op);
7709
7710	copy_len = op[`1`];
7711	ret = copy_len;
7712	if (copy_len > len) {
7713	ret = -ENOSPC;
7714	copy_len = len;
7715	}
7716
7717	memcpy(search_res, op, copy_len);
7718	return ret;
7719	}
7720
7721	static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
7722	.func = bpf_sock_ops_load_hdr_opt,
7723	.gpl_only = false,
7724	.ret_type = RET_INTEGER,
7725	.arg1_type = ARG_PTR_TO_CTX,
7726	.arg2_type = ARG_PTR_TO_MEM \| MEM_WRITE,
7727	.arg3_type = ARG_CONST_SIZE,
7728	.arg4_type = ARG_ANYTHING,
7729	};
7730
7731	BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7732	const void *, from, u32, len, u64, flags)
7733	{
7734	u8 new_kind, new_kind_len, magic_len = `0`, *opend;
7735	const u8 op, new_op, *magic = NULL;
7736	struct sk_buff *skb;
7737	bool eol;
7738
7739	if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
7740	return -EPERM;
7741
7742	if (len < `2` \|\| flags)
7743	return -EINVAL;
7744
7745	new_op = from;
7746	new_kind = new_op[`0`];
7747	new_kind_len = new_op[`1`];
7748
7749	if (new_kind_len > len \|\| new_kind == TCPOPT_NOP \|\|
7750	new_kind == TCPOPT_EOL)
7751	return -EINVAL;
7752
7753	if (new_kind_len > bpf_sock->remaining_opt_len)
7754	return -ENOSPC;
7755
7756	/ 253 is another experimental kind /
7757	if (new_kind == TCPOPT_EXP \|\| new_kind == `253`) {
7758	if (new_kind_len < `4`)
7759	return -EINVAL;
7760	/ Match for the 2 byte magic also.*
7761	* RFC 6994: the magic could be 2 or 4 bytes.
7762	* Hence, matching by 2 byte only is on the
7763	* conservative side but it is the right
7764	* thing to do for the 'search-for-duplication'
7765	* purpose.
7766	*/
7767	magic = &new_op[`2`];
7768	magic_len = `2`;
7769	}
7770
7771	/ Check for duplication /
7772	skb = bpf_sock->skb;
7773	op = skb->data + sizeof(struct tcphdr);
7774	opend = bpf_sock->skb_data_end;
7775
7776	op = bpf_search_tcp_opt(op, opend, search_kind: new_kind, magic, magic_len,
7777	eol: &eol);
7778	if (!IS_ERR(ptr: op))
7779	return -EEXIST;
7780
7781	if (PTR_ERR(ptr: op) != -ENOMSG)
7782	return PTR_ERR(ptr: op);
7783
7784	if (eol)
7785	/ The option has been ended. Treat it as no more*
7786	* header option can be written.
7787	*/
7788	return -ENOSPC;
7789
7790	/ No duplication found. Store the header option. /
7791	memcpy(opend, from, new_kind_len);
7792
7793	bpf_sock->remaining_opt_len -= new_kind_len;
7794	bpf_sock->skb_data_end += new_kind_len;
7795
7796	return `0`;
7797	}
7798
7799	static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
7800	.func = bpf_sock_ops_store_hdr_opt,
7801	.gpl_only = false,
7802	.ret_type = RET_INTEGER,
7803	.arg1_type = ARG_PTR_TO_CTX,
7804	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
7805	.arg3_type = ARG_CONST_SIZE,
7806	.arg4_type = ARG_ANYTHING,
7807	};
7808
7809	BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7810	u32, len, u64, flags)
7811	{
7812	if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7813	return -EPERM;
7814
7815	if (flags \|\| len < `2`)
7816	return -EINVAL;
7817
7818	if (len > bpf_sock->remaining_opt_len)
7819	return -ENOSPC;
7820
7821	bpf_sock->remaining_opt_len -= len;
7822
7823	return `0`;
7824	}
7825
7826	static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
7827	.func = bpf_sock_ops_reserve_hdr_opt,
7828	.gpl_only = false,
7829	.ret_type = RET_INTEGER,
7830	.arg1_type = ARG_PTR_TO_CTX,
7831	.arg2_type = ARG_ANYTHING,
7832	.arg3_type = ARG_ANYTHING,
7833	};
7834
7835	BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
7836	u64, tstamp, u32, tstamp_type)
7837	{
7838	/ skb_clear_delivery_time() is done for inet protocol /
7839	if (skb->protocol != htons(ETH_P_IP) &&
7840	skb->protocol != htons(ETH_P_IPV6))
7841	return -EOPNOTSUPP;
7842
7843	switch (tstamp_type) {
7844	case BPF_SKB_CLOCK_REALTIME:
7845	skb->tstamp = tstamp;
7846	skb->tstamp_type = SKB_CLOCK_REALTIME;
7847	break;
7848	case BPF_SKB_CLOCK_MONOTONIC:
7849	if (!tstamp)
7850	return -EINVAL;
7851	skb->tstamp = tstamp;
7852	skb->tstamp_type = SKB_CLOCK_MONOTONIC;
7853	break;
7854	case BPF_SKB_CLOCK_TAI:
7855	if (!tstamp)
7856	return -EINVAL;
7857	skb->tstamp = tstamp;
7858	skb->tstamp_type = SKB_CLOCK_TAI;
7859	break;
7860	default:
7861	return -EINVAL;
7862	}
7863
7864	return `0`;
7865	}
7866
7867	static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
7868	.func = bpf_skb_set_tstamp,
7869	.gpl_only = false,
7870	.ret_type = RET_INTEGER,
7871	.arg1_type = ARG_PTR_TO_CTX,
7872	.arg2_type = ARG_ANYTHING,
7873	.arg3_type = ARG_ANYTHING,
7874	};
7875
7876	#ifdef CONFIG_SYN_COOKIES
7877	BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
7878	struct tcphdr *, th, u32, th_len)
7879	{
7880	u32 cookie;
7881	u16 mss;
7882
7883	if (unlikely(th_len < sizeof(th) \|\| th_len != th->doff `4`))
7884	return -EINVAL;
7885
7886	mss = tcp_parse_mss_option(th, user_mss: `0`) ?: TCP_MSS_DEFAULT;
7887	cookie = __cookie_v4_init_sequence(iph, th, mssp: &mss);
7888
7889	return cookie \| ((u64)mss << `32`);
7890	}
7891
7892	static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
7893	.func = bpf_tcp_raw_gen_syncookie_ipv4,
7894	.gpl_only = true, / __cookie_v4_init_sequence() is GPL /
7895	.pkt_access = true,
7896	.ret_type = RET_INTEGER,
7897	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7898	.arg1_size = sizeof(struct iphdr),
7899	.arg2_type = ARG_PTR_TO_MEM,
7900	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7901	};
7902
7903	BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
7904	struct tcphdr *, th, u32, th_len)
7905	{
7906	#if IS_BUILTIN(CONFIG_IPV6)
7907	const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
7908	sizeof(struct ipv6hdr);
7909	u32 cookie;
7910	u16 mss;
7911
7912	if (unlikely(th_len < sizeof(th) \|\| th_len != th->doff `4`))
7913	return -EINVAL;
7914
7915	mss = tcp_parse_mss_option(th, user_mss: `0`) ?: mss_clamp;
7916	cookie = __cookie_v6_init_sequence(iph, th, mssp: &mss);
7917
7918	return cookie \| ((u64)mss << `32`);
7919	#else
7920	return -EPROTONOSUPPORT;
7921	#endif
7922	}
7923
7924	static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
7925	.func = bpf_tcp_raw_gen_syncookie_ipv6,
7926	.gpl_only = true, / __cookie_v6_init_sequence() is GPL /
7927	.pkt_access = true,
7928	.ret_type = RET_INTEGER,
7929	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7930	.arg1_size = sizeof(struct ipv6hdr),
7931	.arg2_type = ARG_PTR_TO_MEM,
7932	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
7933	};
7934
7935	BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
7936	struct tcphdr *, th)
7937	{
7938	if (__cookie_v4_check(iph, th) > `0`)
7939	return `0`;
7940
7941	return -EACCES;
7942	}
7943
7944	static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
7945	.func = bpf_tcp_raw_check_syncookie_ipv4,
7946	.gpl_only = true, / __cookie_v4_check is GPL /
7947	.pkt_access = true,
7948	.ret_type = RET_INTEGER,
7949	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7950	.arg1_size = sizeof(struct iphdr),
7951	.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7952	.arg2_size = sizeof(struct tcphdr),
7953	};
7954
7955	BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
7956	struct tcphdr *, th)
7957	{
7958	#if IS_BUILTIN(CONFIG_IPV6)
7959	if (__cookie_v6_check(iph, th) > `0`)
7960	return `0`;
7961
7962	return -EACCES;
7963	#else
7964	return -EPROTONOSUPPORT;
7965	#endif
7966	}
7967
7968	static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
7969	.func = bpf_tcp_raw_check_syncookie_ipv6,
7970	.gpl_only = true, / __cookie_v6_check is GPL /
7971	.pkt_access = true,
7972	.ret_type = RET_INTEGER,
7973	.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7974	.arg1_size = sizeof(struct ipv6hdr),
7975	.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7976	.arg2_size = sizeof(struct tcphdr),
7977	};
7978	#endif /* CONFIG_SYN_COOKIES */
7979
7980	#endif /* CONFIG_INET */
7981
7982	bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id)
7983	{
7984	switch (func_id) {
7985	case BPF_FUNC_clone_redirect:
7986	case BPF_FUNC_l3_csum_replace:
7987	case BPF_FUNC_l4_csum_replace:
7988	case BPF_FUNC_lwt_push_encap:
7989	case BPF_FUNC_lwt_seg6_action:
7990	case BPF_FUNC_lwt_seg6_adjust_srh:
7991	case BPF_FUNC_lwt_seg6_store_bytes:
7992	case BPF_FUNC_msg_pop_data:
7993	case BPF_FUNC_msg_pull_data:
7994	case BPF_FUNC_msg_push_data:
7995	case BPF_FUNC_skb_adjust_room:
7996	case BPF_FUNC_skb_change_head:
7997	case BPF_FUNC_skb_change_proto:
7998	case BPF_FUNC_skb_change_tail:
7999	case BPF_FUNC_skb_pull_data:
8000	case BPF_FUNC_skb_store_bytes:
8001	case BPF_FUNC_skb_vlan_pop:
8002	case BPF_FUNC_skb_vlan_push:
8003	case BPF_FUNC_store_hdr_opt:
8004	case BPF_FUNC_xdp_adjust_head:
8005	case BPF_FUNC_xdp_adjust_meta:
8006	case BPF_FUNC_xdp_adjust_tail:
8007	/ tail-called program could call any of the above /
8008	case BPF_FUNC_tail_call:
8009	return true;
8010	default:
8011	return false;
8012	}
8013	}
8014
8015	const struct bpf_func_proto bpf_event_output_data_proto __weak;
8016	const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
8017
8018	static const struct bpf_func_proto *
8019	sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8020	{
8021	const struct bpf_func_proto *func_proto;
8022
8023	func_proto = cgroup_common_func_proto(func_id, prog);
8024	if (func_proto)
8025	return func_proto;
8026
8027	switch (func_id) {
8028	case BPF_FUNC_get_socket_cookie:
8029	return &bpf_get_socket_cookie_sock_proto;
8030	case BPF_FUNC_get_netns_cookie:
8031	return &bpf_get_netns_cookie_sock_proto;
8032	case BPF_FUNC_perf_event_output:
8033	return &bpf_event_output_data_proto;
8034	case BPF_FUNC_sk_storage_get:
8035	return &bpf_sk_storage_get_cg_sock_proto;
8036	case BPF_FUNC_ktime_get_coarse_ns:
8037	return &bpf_ktime_get_coarse_ns_proto;
8038	default:
8039	return bpf_base_func_proto(func_id, prog);
8040	}
8041	}
8042
8043	static const struct bpf_func_proto *
8044	sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8045	{
8046	const struct bpf_func_proto *func_proto;
8047
8048	func_proto = cgroup_common_func_proto(func_id, prog);
8049	if (func_proto)
8050	return func_proto;
8051
8052	switch (func_id) {
8053	case BPF_FUNC_bind:
8054	switch (prog->expected_attach_type) {
8055	case BPF_CGROUP_INET4_CONNECT:
8056	case BPF_CGROUP_INET6_CONNECT:
8057	return &bpf_bind_proto;
8058	default:
8059	return NULL;
8060	}
8061	case BPF_FUNC_get_socket_cookie:
8062	return &bpf_get_socket_cookie_sock_addr_proto;
8063	case BPF_FUNC_get_netns_cookie:
8064	return &bpf_get_netns_cookie_sock_addr_proto;
8065	case BPF_FUNC_perf_event_output:
8066	return &bpf_event_output_data_proto;
8067	#ifdef CONFIG_INET
8068	case BPF_FUNC_sk_lookup_tcp:
8069	return &bpf_sock_addr_sk_lookup_tcp_proto;
8070	case BPF_FUNC_sk_lookup_udp:
8071	return &bpf_sock_addr_sk_lookup_udp_proto;
8072	case BPF_FUNC_sk_release:
8073	return &bpf_sk_release_proto;
8074	case BPF_FUNC_skc_lookup_tcp:
8075	return &bpf_sock_addr_skc_lookup_tcp_proto;
8076	#endif /* CONFIG_INET */
8077	case BPF_FUNC_sk_storage_get:
8078	return &bpf_sk_storage_get_proto;
8079	case BPF_FUNC_sk_storage_delete:
8080	return &bpf_sk_storage_delete_proto;
8081	case BPF_FUNC_setsockopt:
8082	switch (prog->expected_attach_type) {
8083	case BPF_CGROUP_INET4_BIND:
8084	case BPF_CGROUP_INET6_BIND:
8085	case BPF_CGROUP_INET4_CONNECT:
8086	case BPF_CGROUP_INET6_CONNECT:
8087	case BPF_CGROUP_UNIX_CONNECT:
8088	case BPF_CGROUP_UDP4_RECVMSG:
8089	case BPF_CGROUP_UDP6_RECVMSG:
8090	case BPF_CGROUP_UNIX_RECVMSG:
8091	case BPF_CGROUP_UDP4_SENDMSG:
8092	case BPF_CGROUP_UDP6_SENDMSG:
8093	case BPF_CGROUP_UNIX_SENDMSG:
8094	case BPF_CGROUP_INET4_GETPEERNAME:
8095	case BPF_CGROUP_INET6_GETPEERNAME:
8096	case BPF_CGROUP_UNIX_GETPEERNAME:
8097	case BPF_CGROUP_INET4_GETSOCKNAME:
8098	case BPF_CGROUP_INET6_GETSOCKNAME:
8099	case BPF_CGROUP_UNIX_GETSOCKNAME:
8100	return &bpf_sock_addr_setsockopt_proto;
8101	default:
8102	return NULL;
8103	}
8104	case BPF_FUNC_getsockopt:
8105	switch (prog->expected_attach_type) {
8106	case BPF_CGROUP_INET4_BIND:
8107	case BPF_CGROUP_INET6_BIND:
8108	case BPF_CGROUP_INET4_CONNECT:
8109	case BPF_CGROUP_INET6_CONNECT:
8110	case BPF_CGROUP_UNIX_CONNECT:
8111	case BPF_CGROUP_UDP4_RECVMSG:
8112	case BPF_CGROUP_UDP6_RECVMSG:
8113	case BPF_CGROUP_UNIX_RECVMSG:
8114	case BPF_CGROUP_UDP4_SENDMSG:
8115	case BPF_CGROUP_UDP6_SENDMSG:
8116	case BPF_CGROUP_UNIX_SENDMSG:
8117	case BPF_CGROUP_INET4_GETPEERNAME:
8118	case BPF_CGROUP_INET6_GETPEERNAME:
8119	case BPF_CGROUP_UNIX_GETPEERNAME:
8120	case BPF_CGROUP_INET4_GETSOCKNAME:
8121	case BPF_CGROUP_INET6_GETSOCKNAME:
8122	case BPF_CGROUP_UNIX_GETSOCKNAME:
8123	return &bpf_sock_addr_getsockopt_proto;
8124	default:
8125	return NULL;
8126	}
8127	default:
8128	return bpf_sk_base_func_proto(func_id, prog);
8129	}
8130	}
8131
8132	static const struct bpf_func_proto *
8133	sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8134	{
8135	switch (func_id) {
8136	case BPF_FUNC_skb_load_bytes:
8137	return &bpf_skb_load_bytes_proto;
8138	case BPF_FUNC_skb_load_bytes_relative:
8139	return &bpf_skb_load_bytes_relative_proto;
8140	case BPF_FUNC_get_socket_cookie:
8141	return &bpf_get_socket_cookie_proto;
8142	case BPF_FUNC_get_netns_cookie:
8143	return &bpf_get_netns_cookie_proto;
8144	case BPF_FUNC_get_socket_uid:
8145	return &bpf_get_socket_uid_proto;
8146	case BPF_FUNC_perf_event_output:
8147	return &bpf_skb_event_output_proto;
8148	default:
8149	return bpf_sk_base_func_proto(func_id, prog);
8150	}
8151	}
8152
8153	const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
8154	const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
8155
8156	static const struct bpf_func_proto *
8157	cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8158	{
8159	const struct bpf_func_proto *func_proto;
8160
8161	func_proto = cgroup_common_func_proto(func_id, prog);
8162	if (func_proto)
8163	return func_proto;
8164
8165	switch (func_id) {
8166	case BPF_FUNC_sk_fullsock:
8167	return &bpf_sk_fullsock_proto;
8168	case BPF_FUNC_sk_storage_get:
8169	return &bpf_sk_storage_get_proto;
8170	case BPF_FUNC_sk_storage_delete:
8171	return &bpf_sk_storage_delete_proto;
8172	case BPF_FUNC_perf_event_output:
8173	return &bpf_skb_event_output_proto;
8174	#ifdef CONFIG_SOCK_CGROUP_DATA
8175	case BPF_FUNC_skb_cgroup_id:
8176	return &bpf_skb_cgroup_id_proto;
8177	case BPF_FUNC_skb_ancestor_cgroup_id:
8178	return &bpf_skb_ancestor_cgroup_id_proto;
8179	case BPF_FUNC_sk_cgroup_id:
8180	return &bpf_sk_cgroup_id_proto;
8181	case BPF_FUNC_sk_ancestor_cgroup_id:
8182	return &bpf_sk_ancestor_cgroup_id_proto;
8183	#endif
8184	#ifdef CONFIG_INET
8185	case BPF_FUNC_sk_lookup_tcp:
8186	return &bpf_sk_lookup_tcp_proto;
8187	case BPF_FUNC_sk_lookup_udp:
8188	return &bpf_sk_lookup_udp_proto;
8189	case BPF_FUNC_sk_release:
8190	return &bpf_sk_release_proto;
8191	case BPF_FUNC_skc_lookup_tcp:
8192	return &bpf_skc_lookup_tcp_proto;
8193	case BPF_FUNC_tcp_sock:
8194	return &bpf_tcp_sock_proto;
8195	case BPF_FUNC_get_listener_sock:
8196	return &bpf_get_listener_sock_proto;
8197	case BPF_FUNC_skb_ecn_set_ce:
8198	return &bpf_skb_ecn_set_ce_proto;
8199	#endif
8200	default:
8201	return sk_filter_func_proto(func_id, prog);
8202	}
8203	}
8204
8205	static const struct bpf_func_proto *
8206	tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8207	{
8208	switch (func_id) {
8209	case BPF_FUNC_skb_store_bytes:
8210	return &bpf_skb_store_bytes_proto;
8211	case BPF_FUNC_skb_load_bytes:
8212	return &bpf_skb_load_bytes_proto;
8213	case BPF_FUNC_skb_load_bytes_relative:
8214	return &bpf_skb_load_bytes_relative_proto;
8215	case BPF_FUNC_skb_pull_data:
8216	return &bpf_skb_pull_data_proto;
8217	case BPF_FUNC_csum_diff:
8218	return &bpf_csum_diff_proto;
8219	case BPF_FUNC_csum_update:
8220	return &bpf_csum_update_proto;
8221	case BPF_FUNC_csum_level:
8222	return &bpf_csum_level_proto;
8223	case BPF_FUNC_l3_csum_replace:
8224	return &bpf_l3_csum_replace_proto;
8225	case BPF_FUNC_l4_csum_replace:
8226	return &bpf_l4_csum_replace_proto;
8227	case BPF_FUNC_clone_redirect:
8228	return &bpf_clone_redirect_proto;
8229	case BPF_FUNC_get_cgroup_classid:
8230	return &bpf_get_cgroup_classid_proto;
8231	case BPF_FUNC_skb_vlan_push:
8232	return &bpf_skb_vlan_push_proto;
8233	case BPF_FUNC_skb_vlan_pop:
8234	return &bpf_skb_vlan_pop_proto;
8235	case BPF_FUNC_skb_change_proto:
8236	return &bpf_skb_change_proto_proto;
8237	case BPF_FUNC_skb_change_type:
8238	return &bpf_skb_change_type_proto;
8239	case BPF_FUNC_skb_adjust_room:
8240	return &bpf_skb_adjust_room_proto;
8241	case BPF_FUNC_skb_change_tail:
8242	return &bpf_skb_change_tail_proto;
8243	case BPF_FUNC_skb_change_head:
8244	return &bpf_skb_change_head_proto;
8245	case BPF_FUNC_skb_get_tunnel_key:
8246	return &bpf_skb_get_tunnel_key_proto;
8247	case BPF_FUNC_skb_set_tunnel_key:
8248	return bpf_get_skb_set_tunnel_proto(which: func_id);
8249	case BPF_FUNC_skb_get_tunnel_opt:
8250	return &bpf_skb_get_tunnel_opt_proto;
8251	case BPF_FUNC_skb_set_tunnel_opt:
8252	return bpf_get_skb_set_tunnel_proto(which: func_id);
8253	case BPF_FUNC_redirect:
8254	return &bpf_redirect_proto;
8255	case BPF_FUNC_redirect_neigh:
8256	return &bpf_redirect_neigh_proto;
8257	case BPF_FUNC_redirect_peer:
8258	return &bpf_redirect_peer_proto;
8259	case BPF_FUNC_get_route_realm:
8260	return &bpf_get_route_realm_proto;
8261	case BPF_FUNC_get_hash_recalc:
8262	return &bpf_get_hash_recalc_proto;
8263	case BPF_FUNC_set_hash_invalid:
8264	return &bpf_set_hash_invalid_proto;
8265	case BPF_FUNC_set_hash:
8266	return &bpf_set_hash_proto;
8267	case BPF_FUNC_perf_event_output:
8268	return &bpf_skb_event_output_proto;
8269	case BPF_FUNC_get_smp_processor_id:
8270	return &bpf_get_smp_processor_id_proto;
8271	case BPF_FUNC_skb_under_cgroup:
8272	return &bpf_skb_under_cgroup_proto;
8273	case BPF_FUNC_get_socket_cookie:
8274	return &bpf_get_socket_cookie_proto;
8275	case BPF_FUNC_get_netns_cookie:
8276	return &bpf_get_netns_cookie_proto;
8277	case BPF_FUNC_get_socket_uid:
8278	return &bpf_get_socket_uid_proto;
8279	case BPF_FUNC_fib_lookup:
8280	return &bpf_skb_fib_lookup_proto;
8281	case BPF_FUNC_check_mtu:
8282	return &bpf_skb_check_mtu_proto;
8283	case BPF_FUNC_sk_fullsock:
8284	return &bpf_sk_fullsock_proto;
8285	case BPF_FUNC_sk_storage_get:
8286	return &bpf_sk_storage_get_proto;
8287	case BPF_FUNC_sk_storage_delete:
8288	return &bpf_sk_storage_delete_proto;
8289	#ifdef CONFIG_XFRM
8290	case BPF_FUNC_skb_get_xfrm_state:
8291	return &bpf_skb_get_xfrm_state_proto;
8292	#endif
8293	#ifdef CONFIG_CGROUP_NET_CLASSID
8294	case BPF_FUNC_skb_cgroup_classid:
8295	return &bpf_skb_cgroup_classid_proto;
8296	#endif
8297	#ifdef CONFIG_SOCK_CGROUP_DATA
8298	case BPF_FUNC_skb_cgroup_id:
8299	return &bpf_skb_cgroup_id_proto;
8300	case BPF_FUNC_skb_ancestor_cgroup_id:
8301	return &bpf_skb_ancestor_cgroup_id_proto;
8302	#endif
8303	#ifdef CONFIG_INET
8304	case BPF_FUNC_sk_lookup_tcp:
8305	return &bpf_tc_sk_lookup_tcp_proto;
8306	case BPF_FUNC_sk_lookup_udp:
8307	return &bpf_tc_sk_lookup_udp_proto;
8308	case BPF_FUNC_sk_release:
8309	return &bpf_sk_release_proto;
8310	case BPF_FUNC_tcp_sock:
8311	return &bpf_tcp_sock_proto;
8312	case BPF_FUNC_get_listener_sock:
8313	return &bpf_get_listener_sock_proto;
8314	case BPF_FUNC_skc_lookup_tcp:
8315	return &bpf_tc_skc_lookup_tcp_proto;
8316	case BPF_FUNC_tcp_check_syncookie:
8317	return &bpf_tcp_check_syncookie_proto;
8318	case BPF_FUNC_skb_ecn_set_ce:
8319	return &bpf_skb_ecn_set_ce_proto;
8320	case BPF_FUNC_tcp_gen_syncookie:
8321	return &bpf_tcp_gen_syncookie_proto;
8322	case BPF_FUNC_sk_assign:
8323	return &bpf_sk_assign_proto;
8324	case BPF_FUNC_skb_set_tstamp:
8325	return &bpf_skb_set_tstamp_proto;
8326	#ifdef CONFIG_SYN_COOKIES
8327	case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
8328	return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
8329	case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
8330	return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
8331	case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
8332	return &bpf_tcp_raw_check_syncookie_ipv4_proto;
8333	case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
8334	return &bpf_tcp_raw_check_syncookie_ipv6_proto;
8335	#endif
8336	#endif
8337	default:
8338	return bpf_sk_base_func_proto(func_id, prog);
8339	}
8340	}
8341
8342	static const struct bpf_func_proto *
8343	xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8344	{
8345	switch (func_id) {
8346	case BPF_FUNC_perf_event_output:
8347	return &bpf_xdp_event_output_proto;
8348	case BPF_FUNC_get_smp_processor_id:
8349	return &bpf_get_smp_processor_id_proto;
8350	case BPF_FUNC_csum_diff:
8351	return &bpf_csum_diff_proto;
8352	case BPF_FUNC_xdp_adjust_head:
8353	return &bpf_xdp_adjust_head_proto;
8354	case BPF_FUNC_xdp_adjust_meta:
8355	return &bpf_xdp_adjust_meta_proto;
8356	case BPF_FUNC_redirect:
8357	return &bpf_xdp_redirect_proto;
8358	case BPF_FUNC_redirect_map:
8359	return &bpf_xdp_redirect_map_proto;
8360	case BPF_FUNC_xdp_adjust_tail:
8361	return &bpf_xdp_adjust_tail_proto;
8362	case BPF_FUNC_xdp_get_buff_len:
8363	return &bpf_xdp_get_buff_len_proto;
8364	case BPF_FUNC_xdp_load_bytes:
8365	return &bpf_xdp_load_bytes_proto;
8366	case BPF_FUNC_xdp_store_bytes:
8367	return &bpf_xdp_store_bytes_proto;
8368	case BPF_FUNC_fib_lookup:
8369	return &bpf_xdp_fib_lookup_proto;
8370	case BPF_FUNC_check_mtu:
8371	return &bpf_xdp_check_mtu_proto;
8372	#ifdef CONFIG_INET
8373	case BPF_FUNC_sk_lookup_udp:
8374	return &bpf_xdp_sk_lookup_udp_proto;
8375	case BPF_FUNC_sk_lookup_tcp:
8376	return &bpf_xdp_sk_lookup_tcp_proto;
8377	case BPF_FUNC_sk_release:
8378	return &bpf_sk_release_proto;
8379	case BPF_FUNC_skc_lookup_tcp:
8380	return &bpf_xdp_skc_lookup_tcp_proto;
8381	case BPF_FUNC_tcp_check_syncookie:
8382	return &bpf_tcp_check_syncookie_proto;
8383	case BPF_FUNC_tcp_gen_syncookie:
8384	return &bpf_tcp_gen_syncookie_proto;
8385	#ifdef CONFIG_SYN_COOKIES
8386	case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
8387	return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
8388	case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
8389	return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
8390	case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
8391	return &bpf_tcp_raw_check_syncookie_ipv4_proto;
8392	case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
8393	return &bpf_tcp_raw_check_syncookie_ipv6_proto;
8394	#endif
8395	#endif
8396	default:
8397	return bpf_sk_base_func_proto(func_id, prog);
8398	}
8399
8400	#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
8401	/ The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The*
8402	* kfuncs are defined in two different modules, and we want to be able
8403	* to use them interchangeably with the same BTF type ID. Because modules
8404	* can't de-duplicate BTF IDs between each other, we need the type to be
8405	* referenced in the vmlinux BTF or the verifier will get confused about
8406	* the different types. So we add this dummy type reference which will
8407	* be included in vmlinux BTF, allowing both modules to refer to the
8408	* same type ID.
8409	*/
8410	BTF_TYPE_EMIT(struct nf_conn___init);
8411	#endif
8412	}
8413
8414	const struct bpf_func_proto bpf_sock_map_update_proto __weak;
8415	const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
8416
8417	static const struct bpf_func_proto *
8418	sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8419	{
8420	const struct bpf_func_proto *func_proto;
8421
8422	func_proto = cgroup_common_func_proto(func_id, prog);
8423	if (func_proto)
8424	return func_proto;
8425
8426	switch (func_id) {
8427	case BPF_FUNC_setsockopt:
8428	return &bpf_sock_ops_setsockopt_proto;
8429	case BPF_FUNC_getsockopt:
8430	return &bpf_sock_ops_getsockopt_proto;
8431	case BPF_FUNC_sock_ops_cb_flags_set:
8432	return &bpf_sock_ops_cb_flags_set_proto;
8433	case BPF_FUNC_sock_map_update:
8434	return &bpf_sock_map_update_proto;
8435	case BPF_FUNC_sock_hash_update:
8436	return &bpf_sock_hash_update_proto;
8437	case BPF_FUNC_get_socket_cookie:
8438	return &bpf_get_socket_cookie_sock_ops_proto;
8439	case BPF_FUNC_perf_event_output:
8440	return &bpf_event_output_data_proto;
8441	case BPF_FUNC_sk_storage_get:
8442	return &bpf_sk_storage_get_proto;
8443	case BPF_FUNC_sk_storage_delete:
8444	return &bpf_sk_storage_delete_proto;
8445	case BPF_FUNC_get_netns_cookie:
8446	return &bpf_get_netns_cookie_sock_ops_proto;
8447	#ifdef CONFIG_INET
8448	case BPF_FUNC_load_hdr_opt:
8449	return &bpf_sock_ops_load_hdr_opt_proto;
8450	case BPF_FUNC_store_hdr_opt:
8451	return &bpf_sock_ops_store_hdr_opt_proto;
8452	case BPF_FUNC_reserve_hdr_opt:
8453	return &bpf_sock_ops_reserve_hdr_opt_proto;
8454	case BPF_FUNC_tcp_sock:
8455	return &bpf_tcp_sock_proto;
8456	#endif /* CONFIG_INET */
8457	default:
8458	return bpf_sk_base_func_proto(func_id, prog);
8459	}
8460	}
8461
8462	const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
8463	const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
8464
8465	static const struct bpf_func_proto *
8466	sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8467	{
8468	switch (func_id) {
8469	case BPF_FUNC_msg_redirect_map:
8470	return &bpf_msg_redirect_map_proto;
8471	case BPF_FUNC_msg_redirect_hash:
8472	return &bpf_msg_redirect_hash_proto;
8473	case BPF_FUNC_msg_apply_bytes:
8474	return &bpf_msg_apply_bytes_proto;
8475	case BPF_FUNC_msg_cork_bytes:
8476	return &bpf_msg_cork_bytes_proto;
8477	case BPF_FUNC_msg_pull_data:
8478	return &bpf_msg_pull_data_proto;
8479	case BPF_FUNC_msg_push_data:
8480	return &bpf_msg_push_data_proto;
8481	case BPF_FUNC_msg_pop_data:
8482	return &bpf_msg_pop_data_proto;
8483	case BPF_FUNC_perf_event_output:
8484	return &bpf_event_output_data_proto;
8485	case BPF_FUNC_sk_storage_get:
8486	return &bpf_sk_storage_get_proto;
8487	case BPF_FUNC_sk_storage_delete:
8488	return &bpf_sk_storage_delete_proto;
8489	case BPF_FUNC_get_netns_cookie:
8490	return &bpf_get_netns_cookie_sk_msg_proto;
8491	default:
8492	return bpf_sk_base_func_proto(func_id, prog);
8493	}
8494	}
8495
8496	const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
8497	const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
8498
8499	static const struct bpf_func_proto *
8500	sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8501	{
8502	switch (func_id) {
8503	case BPF_FUNC_skb_store_bytes:
8504	return &bpf_skb_store_bytes_proto;
8505	case BPF_FUNC_skb_load_bytes:
8506	return &bpf_skb_load_bytes_proto;
8507	case BPF_FUNC_skb_pull_data:
8508	return &sk_skb_pull_data_proto;
8509	case BPF_FUNC_skb_change_tail:
8510	return &sk_skb_change_tail_proto;
8511	case BPF_FUNC_skb_change_head:
8512	return &sk_skb_change_head_proto;
8513	case BPF_FUNC_skb_adjust_room:
8514	return &sk_skb_adjust_room_proto;
8515	case BPF_FUNC_get_socket_cookie:
8516	return &bpf_get_socket_cookie_proto;
8517	case BPF_FUNC_get_socket_uid:
8518	return &bpf_get_socket_uid_proto;
8519	case BPF_FUNC_sk_redirect_map:
8520	return &bpf_sk_redirect_map_proto;
8521	case BPF_FUNC_sk_redirect_hash:
8522	return &bpf_sk_redirect_hash_proto;
8523	case BPF_FUNC_perf_event_output:
8524	return &bpf_skb_event_output_proto;
8525	#ifdef CONFIG_INET
8526	case BPF_FUNC_sk_lookup_tcp:
8527	return &bpf_sk_lookup_tcp_proto;
8528	case BPF_FUNC_sk_lookup_udp:
8529	return &bpf_sk_lookup_udp_proto;
8530	case BPF_FUNC_sk_release:
8531	return &bpf_sk_release_proto;
8532	case BPF_FUNC_skc_lookup_tcp:
8533	return &bpf_skc_lookup_tcp_proto;
8534	#endif
8535	default:
8536	return bpf_sk_base_func_proto(func_id, prog);
8537	}
8538	}
8539
8540	static const struct bpf_func_proto *
8541	flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8542	{
8543	switch (func_id) {
8544	case BPF_FUNC_skb_load_bytes:
8545	return &bpf_flow_dissector_load_bytes_proto;
8546	default:
8547	return bpf_sk_base_func_proto(func_id, prog);
8548	}
8549	}
8550
8551	static const struct bpf_func_proto *
8552	lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8553	{
8554	switch (func_id) {
8555	case BPF_FUNC_skb_load_bytes:
8556	return &bpf_skb_load_bytes_proto;
8557	case BPF_FUNC_skb_pull_data:
8558	return &bpf_skb_pull_data_proto;
8559	case BPF_FUNC_csum_diff:
8560	return &bpf_csum_diff_proto;
8561	case BPF_FUNC_get_cgroup_classid:
8562	return &bpf_get_cgroup_classid_proto;
8563	case BPF_FUNC_get_route_realm:
8564	return &bpf_get_route_realm_proto;
8565	case BPF_FUNC_get_hash_recalc:
8566	return &bpf_get_hash_recalc_proto;
8567	case BPF_FUNC_perf_event_output:
8568	return &bpf_skb_event_output_proto;
8569	case BPF_FUNC_get_smp_processor_id:
8570	return &bpf_get_smp_processor_id_proto;
8571	case BPF_FUNC_skb_under_cgroup:
8572	return &bpf_skb_under_cgroup_proto;
8573	default:
8574	return bpf_sk_base_func_proto(func_id, prog);
8575	}
8576	}
8577
8578	static const struct bpf_func_proto *
8579	lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8580	{
8581	switch (func_id) {
8582	case BPF_FUNC_lwt_push_encap:
8583	return &bpf_lwt_in_push_encap_proto;
8584	default:
8585	return lwt_out_func_proto(func_id, prog);
8586	}
8587	}
8588
8589	static const struct bpf_func_proto *
8590	lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8591	{
8592	switch (func_id) {
8593	case BPF_FUNC_skb_get_tunnel_key:
8594	return &bpf_skb_get_tunnel_key_proto;
8595	case BPF_FUNC_skb_set_tunnel_key:
8596	return bpf_get_skb_set_tunnel_proto(which: func_id);
8597	case BPF_FUNC_skb_get_tunnel_opt:
8598	return &bpf_skb_get_tunnel_opt_proto;
8599	case BPF_FUNC_skb_set_tunnel_opt:
8600	return bpf_get_skb_set_tunnel_proto(which: func_id);
8601	case BPF_FUNC_redirect:
8602	return &bpf_redirect_proto;
8603	case BPF_FUNC_clone_redirect:
8604	return &bpf_clone_redirect_proto;
8605	case BPF_FUNC_skb_change_tail:
8606	return &bpf_skb_change_tail_proto;
8607	case BPF_FUNC_skb_change_head:
8608	return &bpf_skb_change_head_proto;
8609	case BPF_FUNC_skb_store_bytes:
8610	return &bpf_skb_store_bytes_proto;
8611	case BPF_FUNC_csum_update:
8612	return &bpf_csum_update_proto;
8613	case BPF_FUNC_csum_level:
8614	return &bpf_csum_level_proto;
8615	case BPF_FUNC_l3_csum_replace:
8616	return &bpf_l3_csum_replace_proto;
8617	case BPF_FUNC_l4_csum_replace:
8618	return &bpf_l4_csum_replace_proto;
8619	case BPF_FUNC_set_hash_invalid:
8620	return &bpf_set_hash_invalid_proto;
8621	case BPF_FUNC_lwt_push_encap:
8622	return &bpf_lwt_xmit_push_encap_proto;
8623	default:
8624	return lwt_out_func_proto(func_id, prog);
8625	}
8626	}
8627
8628	static const struct bpf_func_proto *
8629	lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8630	{
8631	switch (func_id) {
8632	#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
8633	case BPF_FUNC_lwt_seg6_store_bytes:
8634	return &bpf_lwt_seg6_store_bytes_proto;
8635	case BPF_FUNC_lwt_seg6_action:
8636	return &bpf_lwt_seg6_action_proto;
8637	case BPF_FUNC_lwt_seg6_adjust_srh:
8638	return &bpf_lwt_seg6_adjust_srh_proto;
8639	#endif
8640	default:
8641	return lwt_out_func_proto(func_id, prog);
8642	}
8643	}
8644
8645	static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
8646	const struct bpf_prog *prog,
8647	struct bpf_insn_access_aux *info)
8648	{
8649	const int size_default = sizeof(__u32);
8650
8651	if (off < `0` \|\| off >= sizeof(struct __sk_buff))
8652	return false;
8653
8654	/ The verifier guarantees that size > 0. /
8655	if (off % size != `0`)
8656	return false;
8657
8658	switch (off) {
8659	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8660	if (off + size > offsetofend(struct __sk_buff, cb[`4`]))
8661	return false;
8662	break;
8663	case bpf_ctx_range(struct __sk_buff, data):
8664	case bpf_ctx_range(struct __sk_buff, data_meta):
8665	case bpf_ctx_range(struct __sk_buff, data_end):
8666	if (info->is_ldsx \|\| size != size_default)
8667	return false;
8668	break;
8669	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[`0`], remote_ip6[`3`]):
8670	case bpf_ctx_range_till(struct __sk_buff, local_ip6[`0`], local_ip6[`3`]):
8671	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
8672	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
8673	if (size != size_default)
8674	return false;
8675	break;
8676	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8677	return false;
8678	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8679	if (type == BPF_WRITE \|\| size != sizeof(__u64))
8680	return false;
8681	break;
8682	case bpf_ctx_range(struct __sk_buff, tstamp):
8683	if (size != sizeof(__u64))
8684	return false;
8685	break;
8686	case offsetof(struct __sk_buff, sk):
8687	if (type == BPF_WRITE \|\| size != sizeof(__u64))
8688	return false;
8689	info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
8690	break;
8691	case offsetof(struct __sk_buff, tstamp_type):
8692	return false;
8693	case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - `1`:
8694	/ Explicitly prohibit access to padding in __sk_buff. /
8695	return false;
8696	default:
8697	/ Only narrow read access allowed for now. /
8698	if (type == BPF_WRITE) {
8699	if (size != size_default)
8700	return false;
8701	} else {
8702	bpf_ctx_record_field_size(aux: info, size: size_default);
8703	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
8704	return false;
8705	}
8706	}
8707
8708	return true;
8709	}
8710
8711	static bool sk_filter_is_valid_access(int off, int size,
8712	enum bpf_access_type type,
8713	const struct bpf_prog *prog,
8714	struct bpf_insn_access_aux *info)
8715	{
8716	switch (off) {
8717	case bpf_ctx_range(struct __sk_buff, tc_classid):
8718	case bpf_ctx_range(struct __sk_buff, data):
8719	case bpf_ctx_range(struct __sk_buff, data_meta):
8720	case bpf_ctx_range(struct __sk_buff, data_end):
8721	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8722	case bpf_ctx_range(struct __sk_buff, tstamp):
8723	case bpf_ctx_range(struct __sk_buff, wire_len):
8724	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8725	return false;
8726	}
8727
8728	if (type == BPF_WRITE) {
8729	switch (off) {
8730	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8731	break;
8732	default:
8733	return false;
8734	}
8735	}
8736
8737	return bpf_skb_is_valid_access(off, size, type, prog, info);
8738	}
8739
8740	static bool cg_skb_is_valid_access(int off, int size,
8741	enum bpf_access_type type,
8742	const struct bpf_prog *prog,
8743	struct bpf_insn_access_aux *info)
8744	{
8745	switch (off) {
8746	case bpf_ctx_range(struct __sk_buff, tc_classid):
8747	case bpf_ctx_range(struct __sk_buff, data_meta):
8748	case bpf_ctx_range(struct __sk_buff, wire_len):
8749	return false;
8750	case bpf_ctx_range(struct __sk_buff, data):
8751	case bpf_ctx_range(struct __sk_buff, data_end):
8752	if (!bpf_token_capable(token: prog->aux->token, CAP_BPF))
8753	return false;
8754	break;
8755	}
8756
8757	if (type == BPF_WRITE) {
8758	switch (off) {
8759	case bpf_ctx_range(struct __sk_buff, mark):
8760	case bpf_ctx_range(struct __sk_buff, priority):
8761	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8762	break;
8763	case bpf_ctx_range(struct __sk_buff, tstamp):
8764	if (!bpf_token_capable(token: prog->aux->token, CAP_BPF))
8765	return false;
8766	break;
8767	default:
8768	return false;
8769	}
8770	}
8771
8772	switch (off) {
8773	case bpf_ctx_range(struct __sk_buff, data):
8774	info->reg_type = PTR_TO_PACKET;
8775	break;
8776	case bpf_ctx_range(struct __sk_buff, data_end):
8777	info->reg_type = PTR_TO_PACKET_END;
8778	break;
8779	}
8780
8781	return bpf_skb_is_valid_access(off, size, type, prog, info);
8782	}
8783
8784	static bool lwt_is_valid_access(int off, int size,
8785	enum bpf_access_type type,
8786	const struct bpf_prog *prog,
8787	struct bpf_insn_access_aux *info)
8788	{
8789	switch (off) {
8790	case bpf_ctx_range(struct __sk_buff, tc_classid):
8791	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8792	case bpf_ctx_range(struct __sk_buff, data_meta):
8793	case bpf_ctx_range(struct __sk_buff, tstamp):
8794	case bpf_ctx_range(struct __sk_buff, wire_len):
8795	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8796	return false;
8797	}
8798
8799	if (type == BPF_WRITE) {
8800	switch (off) {
8801	case bpf_ctx_range(struct __sk_buff, mark):
8802	case bpf_ctx_range(struct __sk_buff, priority):
8803	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
8804	break;
8805	default:
8806	return false;
8807	}
8808	}
8809
8810	switch (off) {
8811	case bpf_ctx_range(struct __sk_buff, data):
8812	info->reg_type = PTR_TO_PACKET;
8813	break;
8814	case bpf_ctx_range(struct __sk_buff, data_end):
8815	info->reg_type = PTR_TO_PACKET_END;
8816	break;
8817	}
8818
8819	return bpf_skb_is_valid_access(off, size, type, prog, info);
8820	}
8821
8822	/ Attach type specific accesses /
8823	static bool __sock_filter_check_attach_type(int off,
8824	enum bpf_access_type access_type,
8825	enum bpf_attach_type attach_type)
8826	{
8827	switch (off) {
8828	case offsetof(struct bpf_sock, bound_dev_if):
8829	case offsetof(struct bpf_sock, mark):
8830	case offsetof(struct bpf_sock, priority):
8831	switch (attach_type) {
8832	case BPF_CGROUP_INET_SOCK_CREATE:
8833	case BPF_CGROUP_INET_SOCK_RELEASE:
8834	goto full_access;
8835	default:
8836	return false;
8837	}
8838	case bpf_ctx_range(struct bpf_sock, src_ip4):
8839	switch (attach_type) {
8840	case BPF_CGROUP_INET4_POST_BIND:
8841	goto read_only;
8842	default:
8843	return false;
8844	}
8845	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
8846	switch (attach_type) {
8847	case BPF_CGROUP_INET6_POST_BIND:
8848	goto read_only;
8849	default:
8850	return false;
8851	}
8852	case bpf_ctx_range(struct bpf_sock, src_port):
8853	switch (attach_type) {
8854	case BPF_CGROUP_INET4_POST_BIND:
8855	case BPF_CGROUP_INET6_POST_BIND:
8856	goto read_only;
8857	default:
8858	return false;
8859	}
8860	}
8861	read_only:
8862	return access_type == BPF_READ;
8863	full_access:
8864	return true;
8865	}
8866
8867	bool bpf_sock_common_is_valid_access(int off, int size,
8868	enum bpf_access_type type,
8869	struct bpf_insn_access_aux *info)
8870	{
8871	switch (off) {
8872	case bpf_ctx_range_till(struct bpf_sock, type, priority):
8873	return false;
8874	default:
8875	return bpf_sock_is_valid_access(off, size, type, info);
8876	}
8877	}
8878
8879	bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
8880	struct bpf_insn_access_aux *info)
8881	{
8882	const int size_default = sizeof(__u32);
8883	int field_size;
8884
8885	if (off < `0` \|\| off >= sizeof(struct bpf_sock))
8886	return false;
8887	if (off % size != `0`)
8888	return false;
8889
8890	switch (off) {
8891	case offsetof(struct bpf_sock, state):
8892	case offsetof(struct bpf_sock, family):
8893	case offsetof(struct bpf_sock, type):
8894	case offsetof(struct bpf_sock, protocol):
8895	case offsetof(struct bpf_sock, src_port):
8896	case offsetof(struct bpf_sock, rx_queue_mapping):
8897	case bpf_ctx_range(struct bpf_sock, src_ip4):
8898	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
8899	case bpf_ctx_range(struct bpf_sock, dst_ip4):
8900	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[`0`], dst_ip6[`3`]):
8901	bpf_ctx_record_field_size(aux: info, size: size_default);
8902	return bpf_ctx_narrow_access_ok(off, size, size_default);
8903	case bpf_ctx_range(struct bpf_sock, dst_port):
8904	field_size = size == size_default ?
8905	size_default : sizeof_field(struct bpf_sock, dst_port);
8906	bpf_ctx_record_field_size(aux: info, size: field_size);
8907	return bpf_ctx_narrow_access_ok(off, size, size_default: field_size);
8908	case offsetofend(struct bpf_sock, dst_port) ...
8909	offsetof(struct bpf_sock, dst_ip4) - `1`:
8910	return false;
8911	}
8912
8913	return size == size_default;
8914	}
8915
8916	static bool sock_filter_is_valid_access(int off, int size,
8917	enum bpf_access_type type,
8918	const struct bpf_prog *prog,
8919	struct bpf_insn_access_aux *info)
8920	{
8921	if (!bpf_sock_is_valid_access(off, size, type, info))
8922	return false;
8923	return __sock_filter_check_attach_type(off, access_type: type,
8924	attach_type: prog->expected_attach_type);
8925	}
8926
8927	static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
8928	const struct bpf_prog *prog)
8929	{
8930	/ Neither direct read nor direct write requires any preliminary*
8931	* action.
8932	*/
8933	return `0`;
8934	}
8935
8936	static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
8937	const struct bpf_prog prog, int* drop_verdict)
8938	{
8939	struct bpf_insn *insn = insn_buf;
8940
8941	if (!direct_write)
8942	return `0`;
8943
8944	/ if (!skb->cloned)*
8945	* goto start;
8946	*
8947	* (Fast-path, otherwise approximation that we might be
8948	* a clone, do the rest in helper.)
8949	*/
8950	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET);
8951	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
8952	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, `0`, `7`);
8953
8954	/ ret = bpf_skb_pull_data(skb, 0); /
8955	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
8956	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
8957	*insn++ = BPF_RAW_INSN(BPF_JMP \| BPF_CALL, `0`, `0`, `0`,
8958	BPF_FUNC_skb_pull_data);
8959	/ if (!ret)*
8960	* goto restore;
8961	* return TC_ACT_SHOT;
8962	*/
8963	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, `0`, `2`);
8964	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
8965	*insn++ = BPF_EXIT_INSN();
8966
8967	/ restore: /
8968	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
8969	/ start: /
8970	*insn++ = prog->insnsi[`0`];
8971
8972	return insn - insn_buf;
8973	}
8974
8975	static int bpf_gen_ld_abs(const struct bpf_insn *orig,
8976	struct bpf_insn *insn_buf)
8977	{
8978	bool indirect = BPF_MODE(orig->code) == BPF_IND;
8979	struct bpf_insn *insn = insn_buf;
8980
8981	if (!indirect) {
8982	*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
8983	} else {
8984	*insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
8985	if (orig->imm)
8986	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
8987	}
8988	/ We're guaranteed here that CTX is in R6. /
8989	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
8990
8991	switch (BPF_SIZE(orig->code)) {
8992	case BPF_B:
8993	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
8994	break;
8995	case BPF_H:
8996	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
8997	break;
8998	case BPF_W:
8999	*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
9000	break;
9001	}
9002
9003	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, `0`, `2`);
9004	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
9005	*insn++ = BPF_EXIT_INSN();
9006
9007	return insn - insn_buf;
9008	}
9009
9010	static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
9011	const struct bpf_prog *prog)
9012	{
9013	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
9014	}
9015
9016	static bool tc_cls_act_is_valid_access(int off, int size,
9017	enum bpf_access_type type,
9018	const struct bpf_prog *prog,
9019	struct bpf_insn_access_aux *info)
9020	{
9021	if (type == BPF_WRITE) {
9022	switch (off) {
9023	case bpf_ctx_range(struct __sk_buff, mark):
9024	case bpf_ctx_range(struct __sk_buff, tc_index):
9025	case bpf_ctx_range(struct __sk_buff, priority):
9026	case bpf_ctx_range(struct __sk_buff, tc_classid):
9027	case bpf_ctx_range_till(struct __sk_buff, cb[`0`], cb[`4`]):
9028	case bpf_ctx_range(struct __sk_buff, tstamp):
9029	case bpf_ctx_range(struct __sk_buff, queue_mapping):
9030	break;
9031	default:
9032	return false;
9033	}
9034	}
9035
9036	switch (off) {
9037	case bpf_ctx_range(struct __sk_buff, data):
9038	info->reg_type = PTR_TO_PACKET;
9039	break;
9040	case bpf_ctx_range(struct __sk_buff, data_meta):
9041	info->reg_type = PTR_TO_PACKET_META;
9042	break;
9043	case bpf_ctx_range(struct __sk_buff, data_end):
9044	info->reg_type = PTR_TO_PACKET_END;
9045	break;
9046	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
9047	return false;
9048	case offsetof(struct __sk_buff, tstamp_type):
9049	/ The convert_ctx_access() on reading and writing*
9050	* __sk_buff->tstamp depends on whether the bpf prog
9051	* has used __sk_buff->tstamp_type or not.
9052	* Thus, we need to set prog->tstamp_type_access
9053	* earlier during is_valid_access() here.
9054	*/
9055	((struct bpf_prog *)prog)->tstamp_type_access = `1`;
9056	return size == sizeof(__u8);
9057	}
9058
9059	return bpf_skb_is_valid_access(off, size, type, prog, info);
9060	}
9061
9062	DEFINE_MUTEX(nf_conn_btf_access_lock);
9063	EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
9064
9065	int (nfct_btf_struct_access)(struct* bpf_verifier_log *log,
9066	const struct bpf_reg_state *reg,
9067	int off, int size);
9068	EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
9069
9070	static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
9071	const struct bpf_reg_state *reg,
9072	int off, int size)
9073	{
9074	int ret = -EACCES;
9075
9076	mutex_lock(&nf_conn_btf_access_lock);
9077	if (nfct_btf_struct_access)
9078	ret = nfct_btf_struct_access(log, reg, off, size);
9079	mutex_unlock(lock: &nf_conn_btf_access_lock);
9080
9081	return ret;
9082	}
9083
9084	static bool __is_valid_xdp_access(int off, int size)
9085	{
9086	if (off < `0` \|\| off >= sizeof(struct xdp_md))
9087	return false;
9088	if (off % size != `0`)
9089	return false;
9090	if (size != sizeof(__u32))
9091	return false;
9092
9093	return true;
9094	}
9095
9096	static bool xdp_is_valid_access(int off, int size,
9097	enum bpf_access_type type,
9098	const struct bpf_prog *prog,
9099	struct bpf_insn_access_aux *info)
9100	{
9101	if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
9102	switch (off) {
9103	case offsetof(struct xdp_md, egress_ifindex):
9104	return false;
9105	}
9106	}
9107
9108	if (type == BPF_WRITE) {
9109	if (bpf_prog_is_offloaded(aux: prog->aux)) {
9110	switch (off) {
9111	case offsetof(struct xdp_md, rx_queue_index):
9112	return __is_valid_xdp_access(off, size);
9113	}
9114	}
9115	return false;
9116	} else {
9117	switch (off) {
9118	case offsetof(struct xdp_md, data_meta):
9119	case offsetof(struct xdp_md, data):
9120	case offsetof(struct xdp_md, data_end):
9121	if (info->is_ldsx)
9122	return false;
9123	}
9124	}
9125
9126	switch (off) {
9127	case offsetof(struct xdp_md, data):
9128	info->reg_type = PTR_TO_PACKET;
9129	break;
9130	case offsetof(struct xdp_md, data_meta):
9131	info->reg_type = PTR_TO_PACKET_META;
9132	break;
9133	case offsetof(struct xdp_md, data_end):
9134	info->reg_type = PTR_TO_PACKET_END;
9135	break;
9136	}
9137
9138	return __is_valid_xdp_access(off, size);
9139	}
9140
9141	void bpf_warn_invalid_xdp_action(const struct net_device *dev,
9142	const struct bpf_prog *prog, u32 act)
9143	{
9144	const u32 act_max = XDP_REDIRECT;
9145
9146	pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n",
9147	act > act_max ? "Illegal" : "Driver unsupported",
9148	act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A");
9149	}
9150	EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
9151
9152	static int xdp_btf_struct_access(struct bpf_verifier_log *log,
9153	const struct bpf_reg_state *reg,
9154	int off, int size)
9155	{
9156	int ret = -EACCES;
9157
9158	mutex_lock(&nf_conn_btf_access_lock);
9159	if (nfct_btf_struct_access)
9160	ret = nfct_btf_struct_access(log, reg, off, size);
9161	mutex_unlock(lock: &nf_conn_btf_access_lock);
9162
9163	return ret;
9164	}
9165
9166	static bool sock_addr_is_valid_access(int off, int size,
9167	enum bpf_access_type type,
9168	const struct bpf_prog *prog,
9169	struct bpf_insn_access_aux *info)
9170	{
9171	const int size_default = sizeof(__u32);
9172
9173	if (off < `0` \|\| off >= sizeof(struct bpf_sock_addr))
9174	return false;
9175	if (off % size != `0`)
9176	return false;
9177
9178	/ Disallow access to fields not belonging to the attach type's address*
9179	* family.
9180	*/
9181	switch (off) {
9182	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
9183	switch (prog->expected_attach_type) {
9184	case BPF_CGROUP_INET4_BIND:
9185	case BPF_CGROUP_INET4_CONNECT:
9186	case BPF_CGROUP_INET4_GETPEERNAME:
9187	case BPF_CGROUP_INET4_GETSOCKNAME:
9188	case BPF_CGROUP_UDP4_SENDMSG:
9189	case BPF_CGROUP_UDP4_RECVMSG:
9190	break;
9191	default:
9192	return false;
9193	}
9194	break;
9195	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
9196	switch (prog->expected_attach_type) {
9197	case BPF_CGROUP_INET6_BIND:
9198	case BPF_CGROUP_INET6_CONNECT:
9199	case BPF_CGROUP_INET6_GETPEERNAME:
9200	case BPF_CGROUP_INET6_GETSOCKNAME:
9201	case BPF_CGROUP_UDP6_SENDMSG:
9202	case BPF_CGROUP_UDP6_RECVMSG:
9203	break;
9204	default:
9205	return false;
9206	}
9207	break;
9208	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
9209	switch (prog->expected_attach_type) {
9210	case BPF_CGROUP_UDP4_SENDMSG:
9211	break;
9212	default:
9213	return false;
9214	}
9215	break;
9216	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
9217	msg_src_ip6[`3`]):
9218	switch (prog->expected_attach_type) {
9219	case BPF_CGROUP_UDP6_SENDMSG:
9220	break;
9221	default:
9222	return false;
9223	}
9224	break;
9225	}
9226
9227	switch (off) {
9228	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
9229	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
9230	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
9231	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
9232	msg_src_ip6[`3`]):
9233	case bpf_ctx_range(struct bpf_sock_addr, user_port):
9234	if (type == BPF_READ) {
9235	bpf_ctx_record_field_size(aux: info, size: size_default);
9236
9237	if (bpf_ctx_wide_access_ok(off, size,
9238	struct bpf_sock_addr,
9239	user_ip6))
9240	return true;
9241
9242	if (bpf_ctx_wide_access_ok(off, size,
9243	struct bpf_sock_addr,
9244	msg_src_ip6))
9245	return true;
9246
9247	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
9248	return false;
9249	} else {
9250	if (bpf_ctx_wide_access_ok(off, size,
9251	struct bpf_sock_addr,
9252	user_ip6))
9253	return true;
9254
9255	if (bpf_ctx_wide_access_ok(off, size,
9256	struct bpf_sock_addr,
9257	msg_src_ip6))
9258	return true;
9259
9260	if (size != size_default)
9261	return false;
9262	}
9263	break;
9264	case offsetof(struct bpf_sock_addr, sk):
9265	if (type != BPF_READ)
9266	return false;
9267	if (size != sizeof(__u64))
9268	return false;
9269	info->reg_type = PTR_TO_SOCKET;
9270	break;
9271	default:
9272	if (type == BPF_READ) {
9273	if (size != size_default)
9274	return false;
9275	} else {
9276	return false;
9277	}
9278	}
9279
9280	return true;
9281	}
9282
9283	static bool sock_ops_is_valid_access(int off, int size,
9284	enum bpf_access_type type,
9285	const struct bpf_prog *prog,
9286	struct bpf_insn_access_aux *info)
9287	{
9288	const int size_default = sizeof(__u32);
9289
9290	if (off < `0` \|\| off >= sizeof(struct bpf_sock_ops))
9291	return false;
9292
9293	/ The verifier guarantees that size > 0. /
9294	if (off % size != `0`)
9295	return false;
9296
9297	if (type == BPF_WRITE) {
9298	switch (off) {
9299	case offsetof(struct bpf_sock_ops, reply):
9300	case offsetof(struct bpf_sock_ops, sk_txhash):
9301	if (size != size_default)
9302	return false;
9303	break;
9304	default:
9305	return false;
9306	}
9307	} else {
9308	switch (off) {
9309	case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
9310	bytes_acked):
9311	if (size != sizeof(__u64))
9312	return false;
9313	break;
9314	case offsetof(struct bpf_sock_ops, sk):
9315	if (size != sizeof(__u64))
9316	return false;
9317	info->reg_type = PTR_TO_SOCKET_OR_NULL;
9318	break;
9319	case offsetof(struct bpf_sock_ops, skb_data):
9320	if (size != sizeof(__u64))
9321	return false;
9322	info->reg_type = PTR_TO_PACKET;
9323	break;
9324	case offsetof(struct bpf_sock_ops, skb_data_end):
9325	if (size != sizeof(__u64))
9326	return false;
9327	info->reg_type = PTR_TO_PACKET_END;
9328	break;
9329	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
9330	bpf_ctx_record_field_size(aux: info, size: size_default);
9331	return bpf_ctx_narrow_access_ok(off, size,
9332	size_default);
9333	case offsetof(struct bpf_sock_ops, skb_hwtstamp):
9334	if (size != sizeof(__u64))
9335	return false;
9336	break;
9337	default:
9338	if (size != size_default)
9339	return false;
9340	break;
9341	}
9342	}
9343
9344	return true;
9345	}
9346
9347	static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
9348	const struct bpf_prog *prog)
9349	{
9350	return bpf_unclone_prologue(insn_buf, direct_write, prog, drop_verdict: SK_DROP);
9351	}
9352
9353	static bool sk_skb_is_valid_access(int off, int size,
9354	enum bpf_access_type type,
9355	const struct bpf_prog *prog,
9356	struct bpf_insn_access_aux *info)
9357	{
9358	switch (off) {
9359	case bpf_ctx_range(struct __sk_buff, tc_classid):
9360	case bpf_ctx_range(struct __sk_buff, data_meta):
9361	case bpf_ctx_range(struct __sk_buff, tstamp):
9362	case bpf_ctx_range(struct __sk_buff, wire_len):
9363	case bpf_ctx_range(struct __sk_buff, hwtstamp):
9364	return false;
9365	}
9366
9367	if (type == BPF_WRITE) {
9368	switch (off) {
9369	case bpf_ctx_range(struct __sk_buff, tc_index):
9370	case bpf_ctx_range(struct __sk_buff, priority):
9371	break;
9372	default:
9373	return false;
9374	}
9375	}
9376
9377	switch (off) {
9378	case bpf_ctx_range(struct __sk_buff, mark):
9379	return false;
9380	case bpf_ctx_range(struct __sk_buff, data):
9381	info->reg_type = PTR_TO_PACKET;
9382	break;
9383	case bpf_ctx_range(struct __sk_buff, data_end):
9384	info->reg_type = PTR_TO_PACKET_END;
9385	break;
9386	}
9387
9388	return bpf_skb_is_valid_access(off, size, type, prog, info);
9389	}
9390
9391	static bool sk_msg_is_valid_access(int off, int size,
9392	enum bpf_access_type type,
9393	const struct bpf_prog *prog,
9394	struct bpf_insn_access_aux *info)
9395	{
9396	if (type == BPF_WRITE)
9397	return false;
9398
9399	if (off % size != `0`)
9400	return false;
9401
9402	switch (off) {
9403	case offsetof(struct sk_msg_md, data):
9404	info->reg_type = PTR_TO_PACKET;
9405	if (size != sizeof(__u64))
9406	return false;
9407	break;
9408	case offsetof(struct sk_msg_md, data_end):
9409	info->reg_type = PTR_TO_PACKET_END;
9410	if (size != sizeof(__u64))
9411	return false;
9412	break;
9413	case offsetof(struct sk_msg_md, sk):
9414	if (size != sizeof(__u64))
9415	return false;
9416	info->reg_type = PTR_TO_SOCKET;
9417	break;
9418	case bpf_ctx_range(struct sk_msg_md, family):
9419	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
9420	case bpf_ctx_range(struct sk_msg_md, local_ip4):
9421	case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[`0`], remote_ip6[`3`]):
9422	case bpf_ctx_range_till(struct sk_msg_md, local_ip6[`0`], local_ip6[`3`]):
9423	case bpf_ctx_range(struct sk_msg_md, remote_port):
9424	case bpf_ctx_range(struct sk_msg_md, local_port):
9425	case bpf_ctx_range(struct sk_msg_md, size):
9426	if (size != sizeof(__u32))
9427	return false;
9428	break;
9429	default:
9430	return false;
9431	}
9432	return true;
9433	}
9434
9435	static bool flow_dissector_is_valid_access(int off, int size,
9436	enum bpf_access_type type,
9437	const struct bpf_prog *prog,
9438	struct bpf_insn_access_aux *info)
9439	{
9440	const int size_default = sizeof(__u32);
9441
9442	if (off < `0` \|\| off >= sizeof(struct __sk_buff))
9443	return false;
9444
9445	if (type == BPF_WRITE)
9446	return false;
9447
9448	switch (off) {
9449	case bpf_ctx_range(struct __sk_buff, data):
9450	if (info->is_ldsx \|\| size != size_default)
9451	return false;
9452	info->reg_type = PTR_TO_PACKET;
9453	return true;
9454	case bpf_ctx_range(struct __sk_buff, data_end):
9455	if (info->is_ldsx \|\| size != size_default)
9456	return false;
9457	info->reg_type = PTR_TO_PACKET_END;
9458	return true;
9459	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
9460	if (size != sizeof(__u64))
9461	return false;
9462	info->reg_type = PTR_TO_FLOW_KEYS;
9463	return true;
9464	default:
9465	return false;
9466	}
9467	}
9468
9469	static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
9470	const struct bpf_insn *si,
9471	struct bpf_insn *insn_buf,
9472	struct bpf_prog *prog,
9473	u32 *target_size)
9474
9475	{
9476	struct bpf_insn *insn = insn_buf;
9477
9478	switch (si->off) {
9479	case offsetof(struct __sk_buff, data):
9480	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, data),
9481	si->dst_reg, si->src_reg,
9482	offsetof(struct bpf_flow_dissector, data));
9483	break;
9484
9485	case offsetof(struct __sk_buff, data_end):
9486	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, data_end),
9487	si->dst_reg, si->src_reg,
9488	offsetof(struct bpf_flow_dissector, data_end));
9489	break;
9490
9491	case offsetof(struct __sk_buff, flow_keys):
9492	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_flow_dissector, flow_keys),
9493	si->dst_reg, si->src_reg,
9494	offsetof(struct bpf_flow_dissector, flow_keys));
9495	break;
9496	}
9497
9498	return insn - insn_buf;
9499	}
9500
9501	static struct bpf_insn bpf_convert_tstamp_type_read(const* struct bpf_insn *si,
9502	struct bpf_insn *insn)
9503	{
9504	__u8 value_reg = si->dst_reg;
9505	__u8 skb_reg = si->src_reg;
9506	BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI);
9507	BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME);
9508	BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC);
9509	BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI);
9510	*insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
9511	*insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
9512	#ifdef __BIG_ENDIAN_BITFIELD
9513	*insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT);
9514	#else
9515	BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & `0x1`));
9516	#endif
9517
9518	return insn;
9519	}
9520
9521	static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
9522	struct bpf_insn *insn)
9523	{
9524	/ si->dst_reg = skb_shinfo(SKB); /
9525	#ifdef NET_SKBUFF_DATA_USES_OFFSET
9526	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, end),
9527	BPF_REG_AX, skb_reg,
9528	offsetof(struct sk_buff, end));
9529	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, head),
9530	dst_reg, skb_reg,
9531	offsetof(struct sk_buff, head));
9532	*insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
9533	#else
9534	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, end),
9535	dst_reg, skb_reg,
9536	offsetof(struct sk_buff, end));
9537	#endif
9538
9539	return insn;
9540	}
9541
9542	static struct bpf_insn bpf_convert_tstamp_read(const* struct bpf_prog *prog,
9543	const struct bpf_insn *si,
9544	struct bpf_insn *insn)
9545	{
9546	__u8 value_reg = si->dst_reg;
9547	__u8 skb_reg = si->src_reg;
9548
9549	#ifdef CONFIG_NET_XGRESS
9550	/ If the tstamp_type is read,*
9551	* the bpf prog is aware the tstamp could have delivery time.
9552	* Thus, read skb->tstamp as is if tstamp_type_access is true.
9553	*/
9554	if (!prog->tstamp_type_access) {
9555	/ AX is needed because src_reg and dst_reg could be the same /
9556	__u8 tmp_reg = BPF_REG_AX;
9557
9558	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
9559	/ check if ingress mask bits is set /
9560	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, `1`);
9561	*insn++ = BPF_JMP_A(`4`);
9562	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, `1`);
9563	*insn++ = BPF_JMP_A(`2`);
9564	/ skb->tc_at_ingress && skb->tstamp_type,*
9565	* read 0 as the (rcv) timestamp.
9566	*/
9567	*insn++ = BPF_MOV64_IMM(value_reg, `0`);
9568	*insn++ = BPF_JMP_A(`1`);
9569	}
9570	#endif
9571
9572	*insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
9573	offsetof(struct sk_buff, tstamp));
9574	return insn;
9575	}
9576
9577	static struct bpf_insn bpf_convert_tstamp_write(const* struct bpf_prog *prog,
9578	const struct bpf_insn *si,
9579	struct bpf_insn *insn)
9580	{
9581	__u8 value_reg = si->src_reg;
9582	__u8 skb_reg = si->dst_reg;
9583
9584	#ifdef CONFIG_NET_XGRESS
9585	/ If the tstamp_type is read,*
9586	* the bpf prog is aware the tstamp could have delivery time.
9587	* Thus, write skb->tstamp as is if tstamp_type_access is true.
9588	* Otherwise, writing at ingress will have to clear the
9589	* skb->tstamp_type bit also.
9590	*/
9591	if (!prog->tstamp_type_access) {
9592	__u8 tmp_reg = BPF_REG_AX;
9593
9594	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
9595	/ Writing __sk_buff->tstamp as ingress, goto <clear> /
9596	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, `1`);
9597	/ goto <store> /
9598	*insn++ = BPF_JMP_A(`2`);
9599	/ <clear>: skb->tstamp_type /
9600	*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
9601	*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
9602	}
9603	#endif
9604
9605	/ <store>: skb->tstamp = tstamp /
9606	*insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) \| BPF_DW \| BPF_MEM,
9607	skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
9608	return insn;
9609	}
9610
9611	#define BPF_EMIT_STORE(size, si, off) \
9612	BPF_RAW_INSN(BPF_CLASS((si)->code) \| (size) \| BPF_MEM, \
9613	(si)->dst_reg, (si)->src_reg, (off), (si)->imm)
9614
9615	static u32 bpf_convert_ctx_access(enum bpf_access_type type,
9616	const struct bpf_insn *si,
9617	struct bpf_insn *insn_buf,
9618	struct bpf_prog prog, u32 target_size)
9619	{
9620	struct bpf_insn *insn = insn_buf;
9621	int off;
9622
9623	switch (si->off) {
9624	case offsetof(struct __sk_buff, len):
9625	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9626	bpf_target_off(struct sk_buff, len, `4`,
9627	target_size));
9628	break;
9629
9630	case offsetof(struct __sk_buff, protocol):
9631	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9632	bpf_target_off(struct sk_buff, protocol, `2`,
9633	target_size));
9634	break;
9635
9636	case offsetof(struct __sk_buff, vlan_proto):
9637	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9638	bpf_target_off(struct sk_buff, vlan_proto, `2`,
9639	target_size));
9640	break;
9641
9642	case offsetof(struct __sk_buff, priority):
9643	if (type == BPF_WRITE)
9644	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9645	bpf_target_off(struct sk_buff, priority, `4`,
9646	target_size));
9647	else
9648	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9649	bpf_target_off(struct sk_buff, priority, `4`,
9650	target_size));
9651	break;
9652
9653	case offsetof(struct __sk_buff, ingress_ifindex):
9654	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9655	bpf_target_off(struct sk_buff, skb_iif, `4`,
9656	target_size));
9657	break;
9658
9659	case offsetof(struct __sk_buff, ifindex):
9660	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
9661	si->dst_reg, si->src_reg,
9662	offsetof(struct sk_buff, dev));
9663	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
9664	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9665	bpf_target_off(struct net_device, ifindex, `4`,
9666	target_size));
9667	break;
9668
9669	case offsetof(struct __sk_buff, hash):
9670	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9671	bpf_target_off(struct sk_buff, hash, `4`,
9672	target_size));
9673	break;
9674
9675	case offsetof(struct __sk_buff, mark):
9676	if (type == BPF_WRITE)
9677	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9678	bpf_target_off(struct sk_buff, mark, `4`,
9679	target_size));
9680	else
9681	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9682	bpf_target_off(struct sk_buff, mark, `4`,
9683	target_size));
9684	break;
9685
9686	case offsetof(struct __sk_buff, pkt_type):
9687	*target_size = `1`;
9688	*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
9689	PKT_TYPE_OFFSET);
9690	*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
9691	#ifdef __BIG_ENDIAN_BITFIELD
9692	*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, `5`);
9693	#endif
9694	break;
9695
9696	case offsetof(struct __sk_buff, queue_mapping):
9697	if (type == BPF_WRITE) {
9698	u32 offset = bpf_target_off(struct sk_buff, queue_mapping, `2`, target_size);
9699
9700	if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
9701	insn++ = BPF_JMP_A(`0`); /* noop /
9702	break;
9703	}
9704
9705	if (BPF_CLASS(si->code) == BPF_STX)
9706	*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, `1`);
9707	*insn++ = BPF_EMIT_STORE(BPF_H, si, offset);
9708	} else {
9709	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9710	bpf_target_off(struct sk_buff,
9711	queue_mapping,
9712	`2`, target_size));
9713	}
9714	break;
9715
9716	case offsetof(struct __sk_buff, vlan_present):
9717	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9718	bpf_target_off(struct sk_buff,
9719	vlan_all, `4`, target_size));
9720	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
9721	*insn++ = BPF_ALU32_IMM(BPF_MOV, si->dst_reg, `1`);
9722	break;
9723
9724	case offsetof(struct __sk_buff, vlan_tci):
9725	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9726	bpf_target_off(struct sk_buff, vlan_tci, `2`,
9727	target_size));
9728	break;
9729
9730	case offsetof(struct __sk_buff, cb[`0`]) ...
9731	offsetofend(struct __sk_buff, cb[`4`]) - `1`:
9732	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < `20`);
9733	BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
9734	offsetof(struct qdisc_skb_cb, data)) %
9735	sizeof(__u64));
9736
9737	prog->cb_access = `1`;
9738	off = si->off;
9739	off -= offsetof(struct __sk_buff, cb[`0`]);
9740	off += offsetof(struct sk_buff, cb);
9741	off += offsetof(struct qdisc_skb_cb, data);
9742	if (type == BPF_WRITE)
9743	*insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
9744	else
9745	*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
9746	si->src_reg, off);
9747	break;
9748
9749	case offsetof(struct __sk_buff, tc_classid):
9750	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != `2`);
9751
9752	off = si->off;
9753	off -= offsetof(struct __sk_buff, tc_classid);
9754	off += offsetof(struct sk_buff, cb);
9755	off += offsetof(struct qdisc_skb_cb, tc_classid);
9756	*target_size = `2`;
9757	if (type == BPF_WRITE)
9758	*insn++ = BPF_EMIT_STORE(BPF_H, si, off);
9759	else
9760	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
9761	si->src_reg, off);
9762	break;
9763
9764	case offsetof(struct __sk_buff, data):
9765	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
9766	si->dst_reg, si->src_reg,
9767	offsetof(struct sk_buff, data));
9768	break;
9769
9770	case offsetof(struct __sk_buff, data_meta):
9771	off = si->off;
9772	off -= offsetof(struct __sk_buff, data_meta);
9773	off += offsetof(struct sk_buff, cb);
9774	off += offsetof(struct bpf_skb_data_end, data_meta);
9775	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg,
9776	si->src_reg, off);
9777	break;
9778
9779	case offsetof(struct __sk_buff, data_end):
9780	off = si->off;
9781	off -= offsetof(struct __sk_buff, data_end);
9782	off += offsetof(struct sk_buff, cb);
9783	off += offsetof(struct bpf_skb_data_end, data_end);
9784	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg,
9785	si->src_reg, off);
9786	break;
9787
9788	case offsetof(struct __sk_buff, tc_index):
9789	#ifdef CONFIG_NET_SCHED
9790	if (type == BPF_WRITE)
9791	*insn++ = BPF_EMIT_STORE(BPF_H, si,
9792	bpf_target_off(struct sk_buff, tc_index, `2`,
9793	target_size));
9794	else
9795	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9796	bpf_target_off(struct sk_buff, tc_index, `2`,
9797	target_size));
9798	#else
9799	*target_size = `2`;
9800	if (type == BPF_WRITE)
9801	*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
9802	else
9803	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9804	#endif
9805	break;
9806
9807	case offsetof(struct __sk_buff, napi_id):
9808	#if defined(CONFIG_NET_RX_BUSY_POLL)
9809	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9810	bpf_target_off(struct sk_buff, napi_id, `4`,
9811	target_size));
9812	*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, `1`);
9813	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9814	#else
9815	*target_size = `4`;
9816	*insn++ = BPF_MOV64_IMM(si->dst_reg, `0`);
9817	#endif
9818	break;
9819	case offsetof(struct __sk_buff, family):
9820	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
9821
9822	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9823	si->dst_reg, si->src_reg,
9824	offsetof(struct sk_buff, sk));
9825	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9826	bpf_target_off(struct sock_common,
9827	skc_family,
9828	`2`, target_size));
9829	break;
9830	case offsetof(struct __sk_buff, remote_ip4):
9831	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
9832
9833	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9834	si->dst_reg, si->src_reg,
9835	offsetof(struct sk_buff, sk));
9836	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9837	bpf_target_off(struct sock_common,
9838	skc_daddr,
9839	`4`, target_size));
9840	break;
9841	case offsetof(struct __sk_buff, local_ip4):
9842	BUILD_BUG_ON(sizeof_field(struct sock_common,
9843	skc_rcv_saddr) != `4`);
9844
9845	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9846	si->dst_reg, si->src_reg,
9847	offsetof(struct sk_buff, sk));
9848	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9849	bpf_target_off(struct sock_common,
9850	skc_rcv_saddr,
9851	`4`, target_size));
9852	break;
9853	case offsetof(struct __sk_buff, remote_ip6[`0`]) ...
9854	offsetof(struct __sk_buff, remote_ip6[`3`]):
9855	#if IS_ENABLED(CONFIG_IPV6)
9856	BUILD_BUG_ON(sizeof_field(struct sock_common,
9857	skc_v6_daddr.s6_addr32[`0`]) != `4`);
9858
9859	off = si->off;
9860	off -= offsetof(struct __sk_buff, remote_ip6[`0`]);
9861
9862	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9863	si->dst_reg, si->src_reg,
9864	offsetof(struct sk_buff, sk));
9865	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9866	offsetof(struct sock_common,
9867	skc_v6_daddr.s6_addr32[`0`]) +
9868	off);
9869	#else
9870	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9871	#endif
9872	break;
9873	case offsetof(struct __sk_buff, local_ip6[`0`]) ...
9874	offsetof(struct __sk_buff, local_ip6[`3`]):
9875	#if IS_ENABLED(CONFIG_IPV6)
9876	BUILD_BUG_ON(sizeof_field(struct sock_common,
9877	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
9878
9879	off = si->off;
9880	off -= offsetof(struct __sk_buff, local_ip6[`0`]);
9881
9882	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9883	si->dst_reg, si->src_reg,
9884	offsetof(struct sk_buff, sk));
9885	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9886	offsetof(struct sock_common,
9887	skc_v6_rcv_saddr.s6_addr32[`0`]) +
9888	off);
9889	#else
9890	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
9891	#endif
9892	break;
9893
9894	case offsetof(struct __sk_buff, remote_port):
9895	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
9896
9897	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9898	si->dst_reg, si->src_reg,
9899	offsetof(struct sk_buff, sk));
9900	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9901	bpf_target_off(struct sock_common,
9902	skc_dport,
9903	`2`, target_size));
9904	#ifndef __BIG_ENDIAN_BITFIELD
9905	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
9906	#endif
9907	break;
9908
9909	case offsetof(struct __sk_buff, local_port):
9910	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
9911
9912	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9913	si->dst_reg, si->src_reg,
9914	offsetof(struct sk_buff, sk));
9915	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9916	bpf_target_off(struct sock_common,
9917	skc_num, `2`, target_size));
9918	break;
9919
9920	case offsetof(struct __sk_buff, tstamp):
9921	BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != `8`);
9922
9923	if (type == BPF_WRITE)
9924	insn = bpf_convert_tstamp_write(prog, si, insn);
9925	else
9926	insn = bpf_convert_tstamp_read(prog, si, insn);
9927	break;
9928
9929	case offsetof(struct __sk_buff, tstamp_type):
9930	insn = bpf_convert_tstamp_type_read(si, insn);
9931	break;
9932
9933	case offsetof(struct __sk_buff, gso_segs):
9934	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9935	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* skb_shared_info, gso_segs),
9936	si->dst_reg, si->dst_reg,
9937	bpf_target_off(struct skb_shared_info,
9938	gso_segs, `2`,
9939	target_size));
9940	break;
9941	case offsetof(struct __sk_buff, gso_size):
9942	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9943	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* skb_shared_info, gso_size),
9944	si->dst_reg, si->dst_reg,
9945	bpf_target_off(struct skb_shared_info,
9946	gso_size, `2`,
9947	target_size));
9948	break;
9949	case offsetof(struct __sk_buff, wire_len):
9950	BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != `4`);
9951
9952	off = si->off;
9953	off -= offsetof(struct __sk_buff, wire_len);
9954	off += offsetof(struct sk_buff, cb);
9955	off += offsetof(struct qdisc_skb_cb, pkt_len);
9956	*target_size = `4`;
9957	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
9958	break;
9959
9960	case offsetof(struct __sk_buff, sk):
9961	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, sk),
9962	si->dst_reg, si->src_reg,
9963	offsetof(struct sk_buff, sk));
9964	break;
9965	case offsetof(struct __sk_buff, hwtstamp):
9966	BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != `8`);
9967	BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != `0`);
9968
9969	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->src_reg, insn);
9970	*insn++ = BPF_LDX_MEM(BPF_DW,
9971	si->dst_reg, si->dst_reg,
9972	bpf_target_off(struct skb_shared_info,
9973	hwtstamps, `8`,
9974	target_size));
9975	break;
9976	}
9977
9978	return insn - insn_buf;
9979	}
9980
9981	u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
9982	const struct bpf_insn *si,
9983	struct bpf_insn *insn_buf,
9984	struct bpf_prog prog, u32 target_size)
9985	{
9986	struct bpf_insn *insn = insn_buf;
9987	int off;
9988
9989	switch (si->off) {
9990	case offsetof(struct bpf_sock, bound_dev_if):
9991	BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != `4`);
9992
9993	if (type == BPF_WRITE)
9994	*insn++ = BPF_EMIT_STORE(BPF_W, si,
9995	offsetof(struct sock, sk_bound_dev_if));
9996	else
9997	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9998	offsetof(struct sock, sk_bound_dev_if));
9999	break;
10000
10001	case offsetof(struct bpf_sock, mark):
10002	BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != `4`);
10003
10004	if (type == BPF_WRITE)
10005	*insn++ = BPF_EMIT_STORE(BPF_W, si,
10006	offsetof(struct sock, sk_mark));
10007	else
10008	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10009	offsetof(struct sock, sk_mark));
10010	break;
10011
10012	case offsetof(struct bpf_sock, priority):
10013	BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != `4`);
10014
10015	if (type == BPF_WRITE)
10016	*insn++ = BPF_EMIT_STORE(BPF_W, si,
10017	offsetof(struct sock, sk_priority));
10018	else
10019	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10020	offsetof(struct sock, sk_priority));
10021	break;
10022
10023	case offsetof(struct bpf_sock, family):
10024	*insn++ = BPF_LDX_MEM(
10025	BPF_FIELD_SIZEOF(struct sock_common, skc_family),
10026	si->dst_reg, si->src_reg,
10027	bpf_target_off(struct sock_common,
10028	skc_family,
10029	sizeof_field(struct sock_common,
10030	skc_family),
10031	target_size));
10032	break;
10033
10034	case offsetof(struct bpf_sock, type):
10035	*insn++ = BPF_LDX_MEM(
10036	BPF_FIELD_SIZEOF(struct sock, sk_type),
10037	si->dst_reg, si->src_reg,
10038	bpf_target_off(struct sock, sk_type,
10039	sizeof_field(struct sock, sk_type),
10040	target_size));
10041	break;
10042
10043	case offsetof(struct bpf_sock, protocol):
10044	*insn++ = BPF_LDX_MEM(
10045	BPF_FIELD_SIZEOF(struct sock, sk_protocol),
10046	si->dst_reg, si->src_reg,
10047	bpf_target_off(struct sock, sk_protocol,
10048	sizeof_field(struct sock, sk_protocol),
10049	target_size));
10050	break;
10051
10052	case offsetof(struct bpf_sock, src_ip4):
10053	*insn++ = BPF_LDX_MEM(
10054	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
10055	bpf_target_off(struct sock_common, skc_rcv_saddr,
10056	sizeof_field(struct sock_common,
10057	skc_rcv_saddr),
10058	target_size));
10059	break;
10060
10061	case offsetof(struct bpf_sock, dst_ip4):
10062	*insn++ = BPF_LDX_MEM(
10063	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
10064	bpf_target_off(struct sock_common, skc_daddr,
10065	sizeof_field(struct sock_common,
10066	skc_daddr),
10067	target_size));
10068	break;
10069
10070	case bpf_ctx_range_till(struct bpf_sock, src_ip6[`0`], src_ip6[`3`]):
10071	#if IS_ENABLED(CONFIG_IPV6)
10072	off = si->off;
10073	off -= offsetof(struct bpf_sock, src_ip6[`0`]);
10074	*insn++ = BPF_LDX_MEM(
10075	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
10076	bpf_target_off(
10077	struct sock_common,
10078	skc_v6_rcv_saddr.s6_addr32[`0`],
10079	sizeof_field(struct sock_common,
10080	skc_v6_rcv_saddr.s6_addr32[`0`]),
10081	target_size) + off);
10082	#else
10083	(void)off;
10084	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10085	#endif
10086	break;
10087
10088	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[`0`], dst_ip6[`3`]):
10089	#if IS_ENABLED(CONFIG_IPV6)
10090	off = si->off;
10091	off -= offsetof(struct bpf_sock, dst_ip6[`0`]);
10092	*insn++ = BPF_LDX_MEM(
10093	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
10094	bpf_target_off(struct sock_common,
10095	skc_v6_daddr.s6_addr32[`0`],
10096	sizeof_field(struct sock_common,
10097	skc_v6_daddr.s6_addr32[`0`]),
10098	target_size) + off);
10099	#else
10100	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10101	*target_size = `4`;
10102	#endif
10103	break;
10104
10105	case offsetof(struct bpf_sock, src_port):
10106	*insn++ = BPF_LDX_MEM(
10107	BPF_FIELD_SIZEOF(struct sock_common, skc_num),
10108	si->dst_reg, si->src_reg,
10109	bpf_target_off(struct sock_common, skc_num,
10110	sizeof_field(struct sock_common,
10111	skc_num),
10112	target_size));
10113	break;
10114
10115	case offsetof(struct bpf_sock, dst_port):
10116	*insn++ = BPF_LDX_MEM(
10117	BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
10118	si->dst_reg, si->src_reg,
10119	bpf_target_off(struct sock_common, skc_dport,
10120	sizeof_field(struct sock_common,
10121	skc_dport),
10122	target_size));
10123	break;
10124
10125	case offsetof(struct bpf_sock, state):
10126	*insn++ = BPF_LDX_MEM(
10127	BPF_FIELD_SIZEOF(struct sock_common, skc_state),
10128	si->dst_reg, si->src_reg,
10129	bpf_target_off(struct sock_common, skc_state,
10130	sizeof_field(struct sock_common,
10131	skc_state),
10132	target_size));
10133	break;
10134	case offsetof(struct bpf_sock, rx_queue_mapping):
10135	#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
10136	*insn++ = BPF_LDX_MEM(
10137	BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
10138	si->dst_reg, si->src_reg,
10139	bpf_target_off(struct sock, sk_rx_queue_mapping,
10140	sizeof_field(struct sock,
10141	sk_rx_queue_mapping),
10142	target_size));
10143	*insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
10144	`1`);
10145	*insn++ = BPF_MOV64_IMM(si->dst_reg, -`1`);
10146	#else
10147	*insn++ = BPF_MOV64_IMM(si->dst_reg, -`1`);
10148	*target_size = `2`;
10149	#endif
10150	break;
10151	}
10152
10153	return insn - insn_buf;
10154	}
10155
10156	static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
10157	const struct bpf_insn *si,
10158	struct bpf_insn *insn_buf,
10159	struct bpf_prog prog, u32 target_size)
10160	{
10161	struct bpf_insn *insn = insn_buf;
10162
10163	switch (si->off) {
10164	case offsetof(struct __sk_buff, ifindex):
10165	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, dev),
10166	si->dst_reg, si->src_reg,
10167	offsetof(struct sk_buff, dev));
10168	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10169	bpf_target_off(struct net_device, ifindex, `4`,
10170	target_size));
10171	break;
10172	default:
10173	return bpf_convert_ctx_access(type, si, insn_buf, prog,
10174	target_size);
10175	}
10176
10177	return insn - insn_buf;
10178	}
10179
10180	static u32 xdp_convert_ctx_access(enum bpf_access_type type,
10181	const struct bpf_insn *si,
10182	struct bpf_insn *insn_buf,
10183	struct bpf_prog prog, u32 target_size)
10184	{
10185	struct bpf_insn *insn = insn_buf;
10186
10187	switch (si->off) {
10188	case offsetof(struct xdp_md, data):
10189	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data),
10190	si->dst_reg, si->src_reg,
10191	offsetof(struct xdp_buff, data));
10192	break;
10193	case offsetof(struct xdp_md, data_meta):
10194	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data_meta),
10195	si->dst_reg, si->src_reg,
10196	offsetof(struct xdp_buff, data_meta));
10197	break;
10198	case offsetof(struct xdp_md, data_end):
10199	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, data_end),
10200	si->dst_reg, si->src_reg,
10201	offsetof(struct xdp_buff, data_end));
10202	break;
10203	case offsetof(struct xdp_md, ingress_ifindex):
10204	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, rxq),
10205	si->dst_reg, si->src_reg,
10206	offsetof(struct xdp_buff, rxq));
10207	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_rxq_info, dev),
10208	si->dst_reg, si->dst_reg,
10209	offsetof(struct xdp_rxq_info, dev));
10210	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10211	offsetof(struct net_device, ifindex));
10212	break;
10213	case offsetof(struct xdp_md, rx_queue_index):
10214	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, rxq),
10215	si->dst_reg, si->src_reg,
10216	offsetof(struct xdp_buff, rxq));
10217	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10218	offsetof(struct xdp_rxq_info,
10219	queue_index));
10220	break;
10221	case offsetof(struct xdp_md, egress_ifindex):
10222	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_buff, txq),
10223	si->dst_reg, si->src_reg,
10224	offsetof(struct xdp_buff, txq));
10225	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* xdp_txq_info, dev),
10226	si->dst_reg, si->dst_reg,
10227	offsetof(struct xdp_txq_info, dev));
10228	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10229	offsetof(struct net_device, ifindex));
10230	break;
10231	}
10232
10233	return insn - insn_buf;
10234	}
10235
10236	/ SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of*
10237	* context Structure, F is Field in context structure that contains a pointer
10238	* to Nested Structure of type NS that has the field NF.
10239	*
10240	* SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
10241	* sure that SIZE is not greater than actual size of S.F.NF.
10242	*
10243	* If offset OFF is provided, the load happens from that offset relative to
10244	* offset of NF.
10245	*/
10246	#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
10247	do { \
10248	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
10249	si->src_reg, offsetof(S, F)); \
10250	*insn++ = BPF_LDX_MEM( \
10251	SIZE, si->dst_reg, si->dst_reg, \
10252	bpf_target_off(NS, NF, sizeof_field(NS, NF), \
10253	target_size) \
10254	+ OFF); \
10255	} while (0)
10256
10257	#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
10258	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
10259	BPF_FIELD_SIZEOF(NS, NF), 0)
10260
10261	/ SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to*
10262	* SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
10263	*
10264	* In addition it uses Temporary Field TF (member of struct S) as the 3rd
10265	* "register" since two registers available in convert_ctx_access are not
10266	* enough: we can't override neither SRC, since it contains value to store, nor
10267	* DST since it contains pointer to context that may be used by later
10268	* instructions. But we need a temporary place to save pointer to nested
10269	* structure whose field we want to store to.
10270	*/
10271	#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
10272	do { \
10273	int tmp_reg = BPF_REG_9; \
10274	if (si->src_reg == tmp_reg \|\| si->dst_reg == tmp_reg) \
10275	--tmp_reg; \
10276	if (si->src_reg == tmp_reg \|\| si->dst_reg == tmp_reg) \
10277	--tmp_reg; \
10278	*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
10279	offsetof(S, TF)); \
10280	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
10281	si->dst_reg, offsetof(S, F)); \
10282	*insn++ = BPF_RAW_INSN(SIZE \| BPF_MEM \| BPF_CLASS(si->code), \
10283	tmp_reg, si->src_reg, \
10284	bpf_target_off(NS, NF, sizeof_field(NS, NF), \
10285	target_size) \
10286	+ OFF, \
10287	si->imm); \
10288	*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
10289	offsetof(S, TF)); \
10290	} while (0)
10291
10292	#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
10293	TF) \
10294	do { \
10295	if (type == BPF_WRITE) { \
10296	SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
10297	OFF, TF); \
10298	} else { \
10299	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
10300	S, NS, F, NF, SIZE, OFF); \
10301	} \
10302	} while (0)
10303
10304	static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
10305	const struct bpf_insn *si,
10306	struct bpf_insn *insn_buf,
10307	struct bpf_prog prog, u32 target_size)
10308	{
10309	int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
10310	struct bpf_insn *insn = insn_buf;
10311
10312	switch (si->off) {
10313	case offsetof(struct bpf_sock_addr, user_family):
10314	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10315	struct sockaddr, uaddr, sa_family);
10316	break;
10317
10318	case offsetof(struct bpf_sock_addr, user_ip4):
10319	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10320	struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
10321	sin_addr, BPF_SIZE(si->code), `0`, tmp_reg);
10322	break;
10323
10324	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[`0`], user_ip6[`3`]):
10325	off = si->off;
10326	off -= offsetof(struct bpf_sock_addr, user_ip6[`0`]);
10327	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10328	struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
10329	sin6_addr.s6_addr32[`0`], BPF_SIZE(si->code), off,
10330	tmp_reg);
10331	break;
10332
10333	case offsetof(struct bpf_sock_addr, user_port):
10334	/ To get port we need to know sa_family first and then treat*
10335	* sockaddr as either sockaddr_in or sockaddr_in6.
10336	* Though we can simplify since port field has same offset and
10337	* size in both structures.
10338	* Here we check this invariant and use just one of the
10339	* structures if it's true.
10340	*/
10341	BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
10342	offsetof(struct sockaddr_in6, sin6_port));
10343	BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
10344	sizeof_field(struct sockaddr_in6, sin6_port));
10345	/ Account for sin6_port being smaller than user_port. /
10346	port_size = min(port_size, BPF_LDST_BYTES(si));
10347	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10348	struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
10349	sin6_port, bytes_to_bpf_size(port_size), `0`, tmp_reg);
10350	break;
10351
10352	case offsetof(struct bpf_sock_addr, family):
10353	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10354	struct sock, sk, sk_family);
10355	break;
10356
10357	case offsetof(struct bpf_sock_addr, type):
10358	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10359	struct sock, sk, sk_type);
10360	break;
10361
10362	case offsetof(struct bpf_sock_addr, protocol):
10363	SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
10364	struct sock, sk, sk_protocol);
10365	break;
10366
10367	case offsetof(struct bpf_sock_addr, msg_src_ip4):
10368	/ Treat t_ctx as struct in_addr for msg_src_ip4. /
10369	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10370	struct bpf_sock_addr_kern, struct in_addr, t_ctx,
10371	s_addr, BPF_SIZE(si->code), `0`, tmp_reg);
10372	break;
10373
10374	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[`0`],
10375	msg_src_ip6[`3`]):
10376	off = si->off;
10377	off -= offsetof(struct bpf_sock_addr, msg_src_ip6[`0`]);
10378	/ Treat t_ctx as struct in6_addr for msg_src_ip6. /
10379	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
10380	struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
10381	s6_addr32[`0`], BPF_SIZE(si->code), off, tmp_reg);
10382	break;
10383	case offsetof(struct bpf_sock_addr, sk):
10384	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_addr_kern, sk),
10385	si->dst_reg, si->src_reg,
10386	offsetof(struct bpf_sock_addr_kern, sk));
10387	break;
10388	}
10389
10390	return insn - insn_buf;
10391	}
10392
10393	static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
10394	const struct bpf_insn *si,
10395	struct bpf_insn *insn_buf,
10396	struct bpf_prog *prog,
10397	u32 *target_size)
10398	{
10399	struct bpf_insn *insn = insn_buf;
10400	int off;
10401
10402	/ Helper macro for adding read access to tcp_sock or sock fields. /
10403	#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10404	do { \
10405	int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \
10406	BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10407	sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
10408	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10409	reg--; \
10410	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10411	reg--; \
10412	if (si->dst_reg == si->src_reg) { \
10413	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10414	offsetof(struct bpf_sock_ops_kern, \
10415	temp)); \
10416	fullsock_reg = reg; \
10417	jmp += 2; \
10418	} \
10419	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10420	struct bpf_sock_ops_kern, \
10421	is_locked_tcp_sock), \
10422	fullsock_reg, si->src_reg, \
10423	offsetof(struct bpf_sock_ops_kern, \
10424	is_locked_tcp_sock)); \
10425	*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10426	if (si->dst_reg == si->src_reg) \
10427	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10428	offsetof(struct bpf_sock_ops_kern, \
10429	temp)); \
10430	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10431	struct bpf_sock_ops_kern, sk),\
10432	si->dst_reg, si->src_reg, \
10433	offsetof(struct bpf_sock_ops_kern, sk));\
10434	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
10435	OBJ_FIELD), \
10436	si->dst_reg, si->dst_reg, \
10437	offsetof(OBJ, OBJ_FIELD)); \
10438	if (si->dst_reg == si->src_reg) { \
10439	*insn++ = BPF_JMP_A(1); \
10440	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10441	offsetof(struct bpf_sock_ops_kern, \
10442	temp)); \
10443	} \
10444	} while (0)
10445
10446	#define SOCK_OPS_GET_SK() \
10447	do { \
10448	int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \
10449	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10450	reg--; \
10451	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10452	reg--; \
10453	if (si->dst_reg == si->src_reg) { \
10454	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10455	offsetof(struct bpf_sock_ops_kern, \
10456	temp)); \
10457	fullsock_reg = reg; \
10458	jmp += 2; \
10459	} \
10460	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10461	struct bpf_sock_ops_kern, \
10462	is_fullsock), \
10463	fullsock_reg, si->src_reg, \
10464	offsetof(struct bpf_sock_ops_kern, \
10465	is_fullsock)); \
10466	*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10467	if (si->dst_reg == si->src_reg) \
10468	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10469	offsetof(struct bpf_sock_ops_kern, \
10470	temp)); \
10471	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10472	struct bpf_sock_ops_kern, sk),\
10473	si->dst_reg, si->src_reg, \
10474	offsetof(struct bpf_sock_ops_kern, sk));\
10475	if (si->dst_reg == si->src_reg) { \
10476	*insn++ = BPF_JMP_A(1); \
10477	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10478	offsetof(struct bpf_sock_ops_kern, \
10479	temp)); \
10480	} \
10481	} while (0)
10482
10483	#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
10484	SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
10485
10486	/ Helper macro for adding write access to tcp_sock or sock fields.*
10487	* The macro is called with two registers, dst_reg which contains a pointer
10488	* to ctx (context) and src_reg which contains the value that should be
10489	* stored. However, we need an additional register since we cannot overwrite
10490	* dst_reg because it may be used later in the program.
10491	* Instead we "borrow" one of the other register. We first save its value
10492	* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
10493	* it at the end of the macro.
10494	*/
10495	#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10496	do { \
10497	int reg = BPF_REG_9; \
10498	BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10499	sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
10500	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10501	reg--; \
10502	if (si->dst_reg == reg \|\| si->src_reg == reg) \
10503	reg--; \
10504	*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
10505	offsetof(struct bpf_sock_ops_kern, \
10506	temp)); \
10507	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10508	struct bpf_sock_ops_kern, \
10509	is_locked_tcp_sock), \
10510	reg, si->dst_reg, \
10511	offsetof(struct bpf_sock_ops_kern, \
10512	is_locked_tcp_sock)); \
10513	*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
10514	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10515	struct bpf_sock_ops_kern, sk),\
10516	reg, si->dst_reg, \
10517	offsetof(struct bpf_sock_ops_kern, sk));\
10518	*insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) \| \
10519	BPF_MEM \| BPF_CLASS(si->code), \
10520	reg, si->src_reg, \
10521	offsetof(OBJ, OBJ_FIELD), \
10522	si->imm); \
10523	*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
10524	offsetof(struct bpf_sock_ops_kern, \
10525	temp)); \
10526	} while (0)
10527
10528	#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
10529	do { \
10530	if (TYPE == BPF_WRITE) \
10531	SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10532	else \
10533	SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10534	} while (0)
10535
10536	switch (si->off) {
10537	case offsetof(struct bpf_sock_ops, op):
10538	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10539	op),
10540	si->dst_reg, si->src_reg,
10541	offsetof(struct bpf_sock_ops_kern, op));
10542	break;
10543
10544	case offsetof(struct bpf_sock_ops, replylong[`0`]) ...
10545	offsetof(struct bpf_sock_ops, replylong[`3`]):
10546	BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
10547	sizeof_field(struct bpf_sock_ops_kern, reply));
10548	BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
10549	sizeof_field(struct bpf_sock_ops_kern, replylong));
10550	off = si->off;
10551	off -= offsetof(struct bpf_sock_ops, replylong[`0`]);
10552	off += offsetof(struct bpf_sock_ops_kern, replylong[`0`]);
10553	if (type == BPF_WRITE)
10554	*insn++ = BPF_EMIT_STORE(BPF_W, si, off);
10555	else
10556	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10557	off);
10558	break;
10559
10560	case offsetof(struct bpf_sock_ops, family):
10561	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
10562
10563	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10564	struct bpf_sock_ops_kern, sk),
10565	si->dst_reg, si->src_reg,
10566	offsetof(struct bpf_sock_ops_kern, sk));
10567	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10568	offsetof(struct sock_common, skc_family));
10569	break;
10570
10571	case offsetof(struct bpf_sock_ops, remote_ip4):
10572	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
10573
10574	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10575	struct bpf_sock_ops_kern, sk),
10576	si->dst_reg, si->src_reg,
10577	offsetof(struct bpf_sock_ops_kern, sk));
10578	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10579	offsetof(struct sock_common, skc_daddr));
10580	break;
10581
10582	case offsetof(struct bpf_sock_ops, local_ip4):
10583	BUILD_BUG_ON(sizeof_field(struct sock_common,
10584	skc_rcv_saddr) != `4`);
10585
10586	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10587	struct bpf_sock_ops_kern, sk),
10588	si->dst_reg, si->src_reg,
10589	offsetof(struct bpf_sock_ops_kern, sk));
10590	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10591	offsetof(struct sock_common,
10592	skc_rcv_saddr));
10593	break;
10594
10595	case offsetof(struct bpf_sock_ops, remote_ip6[`0`]) ...
10596	offsetof(struct bpf_sock_ops, remote_ip6[`3`]):
10597	#if IS_ENABLED(CONFIG_IPV6)
10598	BUILD_BUG_ON(sizeof_field(struct sock_common,
10599	skc_v6_daddr.s6_addr32[`0`]) != `4`);
10600
10601	off = si->off;
10602	off -= offsetof(struct bpf_sock_ops, remote_ip6[`0`]);
10603	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10604	struct bpf_sock_ops_kern, sk),
10605	si->dst_reg, si->src_reg,
10606	offsetof(struct bpf_sock_ops_kern, sk));
10607	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10608	offsetof(struct sock_common,
10609	skc_v6_daddr.s6_addr32[`0`]) +
10610	off);
10611	#else
10612	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10613	#endif
10614	break;
10615
10616	case offsetof(struct bpf_sock_ops, local_ip6[`0`]) ...
10617	offsetof(struct bpf_sock_ops, local_ip6[`3`]):
10618	#if IS_ENABLED(CONFIG_IPV6)
10619	BUILD_BUG_ON(sizeof_field(struct sock_common,
10620	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
10621
10622	off = si->off;
10623	off -= offsetof(struct bpf_sock_ops, local_ip6[`0`]);
10624	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10625	struct bpf_sock_ops_kern, sk),
10626	si->dst_reg, si->src_reg,
10627	offsetof(struct bpf_sock_ops_kern, sk));
10628	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10629	offsetof(struct sock_common,
10630	skc_v6_rcv_saddr.s6_addr32[`0`]) +
10631	off);
10632	#else
10633	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
10634	#endif
10635	break;
10636
10637	case offsetof(struct bpf_sock_ops, remote_port):
10638	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
10639
10640	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10641	struct bpf_sock_ops_kern, sk),
10642	si->dst_reg, si->src_reg,
10643	offsetof(struct bpf_sock_ops_kern, sk));
10644	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10645	offsetof(struct sock_common, skc_dport));
10646	#ifndef __BIG_ENDIAN_BITFIELD
10647	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
10648	#endif
10649	break;
10650
10651	case offsetof(struct bpf_sock_ops, local_port):
10652	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
10653
10654	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10655	struct bpf_sock_ops_kern, sk),
10656	si->dst_reg, si->src_reg,
10657	offsetof(struct bpf_sock_ops_kern, sk));
10658	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10659	offsetof(struct sock_common, skc_num));
10660	break;
10661
10662	case offsetof(struct bpf_sock_ops, is_fullsock):
10663	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10664	struct bpf_sock_ops_kern,
10665	is_fullsock),
10666	si->dst_reg, si->src_reg,
10667	offsetof(struct bpf_sock_ops_kern,
10668	is_fullsock));
10669	break;
10670
10671	case offsetof(struct bpf_sock_ops, state):
10672	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != `1`);
10673
10674	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10675	struct bpf_sock_ops_kern, sk),
10676	si->dst_reg, si->src_reg,
10677	offsetof(struct bpf_sock_ops_kern, sk));
10678	*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
10679	offsetof(struct sock_common, skc_state));
10680	break;
10681
10682	case offsetof(struct bpf_sock_ops, rtt_min):
10683	BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
10684	sizeof(struct minmax));
10685	BUILD_BUG_ON(sizeof(struct minmax) <
10686	sizeof(struct minmax_sample));
10687
10688	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10689	struct bpf_sock_ops_kern, sk),
10690	si->dst_reg, si->src_reg,
10691	offsetof(struct bpf_sock_ops_kern, sk));
10692	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10693	offsetof(struct tcp_sock, rtt_min) +
10694	sizeof_field(struct minmax_sample, t));
10695	break;
10696
10697	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
10698	SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
10699	struct tcp_sock);
10700	break;
10701
10702	case offsetof(struct bpf_sock_ops, sk_txhash):
10703	SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
10704	struct sock, type);
10705	break;
10706	case offsetof(struct bpf_sock_ops, snd_cwnd):
10707	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
10708	break;
10709	case offsetof(struct bpf_sock_ops, srtt_us):
10710	SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
10711	break;
10712	case offsetof(struct bpf_sock_ops, snd_ssthresh):
10713	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
10714	break;
10715	case offsetof(struct bpf_sock_ops, rcv_nxt):
10716	SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
10717	break;
10718	case offsetof(struct bpf_sock_ops, snd_nxt):
10719	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
10720	break;
10721	case offsetof(struct bpf_sock_ops, snd_una):
10722	SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
10723	break;
10724	case offsetof(struct bpf_sock_ops, mss_cache):
10725	SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
10726	break;
10727	case offsetof(struct bpf_sock_ops, ecn_flags):
10728	SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
10729	break;
10730	case offsetof(struct bpf_sock_ops, rate_delivered):
10731	SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
10732	break;
10733	case offsetof(struct bpf_sock_ops, rate_interval_us):
10734	SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
10735	break;
10736	case offsetof(struct bpf_sock_ops, packets_out):
10737	SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
10738	break;
10739	case offsetof(struct bpf_sock_ops, retrans_out):
10740	SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
10741	break;
10742	case offsetof(struct bpf_sock_ops, total_retrans):
10743	SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
10744	break;
10745	case offsetof(struct bpf_sock_ops, segs_in):
10746	SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
10747	break;
10748	case offsetof(struct bpf_sock_ops, data_segs_in):
10749	SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
10750	break;
10751	case offsetof(struct bpf_sock_ops, segs_out):
10752	SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
10753	break;
10754	case offsetof(struct bpf_sock_ops, data_segs_out):
10755	SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
10756	break;
10757	case offsetof(struct bpf_sock_ops, lost_out):
10758	SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
10759	break;
10760	case offsetof(struct bpf_sock_ops, sacked_out):
10761	SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
10762	break;
10763	case offsetof(struct bpf_sock_ops, bytes_received):
10764	SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
10765	break;
10766	case offsetof(struct bpf_sock_ops, bytes_acked):
10767	SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
10768	break;
10769	case offsetof(struct bpf_sock_ops, sk):
10770	SOCK_OPS_GET_SK();
10771	break;
10772	case offsetof(struct bpf_sock_ops, skb_data_end):
10773	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10774	skb_data_end),
10775	si->dst_reg, si->src_reg,
10776	offsetof(struct bpf_sock_ops_kern,
10777	skb_data_end));
10778	break;
10779	case offsetof(struct bpf_sock_ops, skb_data):
10780	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10781	skb),
10782	si->dst_reg, si->src_reg,
10783	offsetof(struct bpf_sock_ops_kern,
10784	skb));
10785	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10786	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
10787	si->dst_reg, si->dst_reg,
10788	offsetof(struct sk_buff, data));
10789	break;
10790	case offsetof(struct bpf_sock_ops, skb_len):
10791	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10792	skb),
10793	si->dst_reg, si->src_reg,
10794	offsetof(struct bpf_sock_ops_kern,
10795	skb));
10796	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10797	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, len),
10798	si->dst_reg, si->dst_reg,
10799	offsetof(struct sk_buff, len));
10800	break;
10801	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
10802	off = offsetof(struct sk_buff, cb);
10803	off += offsetof(struct tcp_skb_cb, tcp_flags);
10804	target_size = sizeof_field(struct* tcp_skb_cb, tcp_flags);
10805	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10806	skb),
10807	si->dst_reg, si->src_reg,
10808	offsetof(struct bpf_sock_ops_kern,
10809	skb));
10810	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
10811	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* tcp_skb_cb,
10812	tcp_flags),
10813	si->dst_reg, si->dst_reg, off);
10814	break;
10815	case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
10816	struct bpf_insn *jmp_on_null_skb;
10817
10818	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_sock_ops_kern,
10819	skb),
10820	si->dst_reg, si->src_reg,
10821	offsetof(struct bpf_sock_ops_kern,
10822	skb));
10823	/ Reserve one insn to test skb == NULL /
10824	jmp_on_null_skb = insn++;
10825	insn = bpf_convert_shinfo_access(dst_reg: si->dst_reg, skb_reg: si->dst_reg, insn);
10826	*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
10827	bpf_target_off(struct skb_shared_info,
10828	hwtstamps, `8`,
10829	target_size));
10830	*jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`,
10831	insn - jmp_on_null_skb - `1`);
10832	break;
10833	}
10834	}
10835	return insn - insn_buf;
10836	}
10837
10838	/ data_end = skb->data + skb_headlen() /
10839	static struct bpf_insn bpf_convert_data_end_access(const* struct bpf_insn *si,
10840	struct bpf_insn *insn)
10841	{
10842	int reg;
10843	int temp_reg_off = offsetof(struct sk_buff, cb) +
10844	offsetof(struct sk_skb_cb, temp_reg);
10845
10846	if (si->src_reg == si->dst_reg) {
10847	/ We need an extra register, choose and save a register. /
10848	reg = BPF_REG_9;
10849	if (si->src_reg == reg \|\| si->dst_reg == reg)
10850	reg--;
10851	if (si->src_reg == reg \|\| si->dst_reg == reg)
10852	reg--;
10853	*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
10854	} else {
10855	reg = si->dst_reg;
10856	}
10857
10858	/ reg = skb->data /
10859	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data),
10860	reg, si->src_reg,
10861	offsetof(struct sk_buff, data));
10862	/ AX = skb->len /
10863	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, len),
10864	BPF_REG_AX, si->src_reg,
10865	offsetof(struct sk_buff, len));
10866	/ reg = skb->data + skb->len /
10867	*insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
10868	/ AX = skb->data_len /
10869	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_buff, data_len),
10870	BPF_REG_AX, si->src_reg,
10871	offsetof(struct sk_buff, data_len));
10872
10873	/ reg = skb->data + skb->len - skb->data_len /
10874	*insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
10875
10876	if (si->src_reg == si->dst_reg) {
10877	/ Restore the saved register /
10878	*insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
10879	*insn++ = BPF_MOV64_REG(si->dst_reg, reg);
10880	*insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
10881	}
10882
10883	return insn;
10884	}
10885
10886	static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
10887	const struct bpf_insn *si,
10888	struct bpf_insn *insn_buf,
10889	struct bpf_prog prog, u32 target_size)
10890	{
10891	struct bpf_insn *insn = insn_buf;
10892	int off;
10893
10894	switch (si->off) {
10895	case offsetof(struct __sk_buff, data_end):
10896	insn = bpf_convert_data_end_access(si, insn);
10897	break;
10898	case offsetof(struct __sk_buff, cb[`0`]) ...
10899	offsetofend(struct __sk_buff, cb[`4`]) - `1`:
10900	BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < `20`);
10901	BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
10902	offsetof(struct sk_skb_cb, data)) %
10903	sizeof(__u64));
10904
10905	prog->cb_access = `1`;
10906	off = si->off;
10907	off -= offsetof(struct __sk_buff, cb[`0`]);
10908	off += offsetof(struct sk_buff, cb);
10909	off += offsetof(struct sk_skb_cb, data);
10910	if (type == BPF_WRITE)
10911	*insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
10912	else
10913	*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
10914	si->src_reg, off);
10915	break;
10916
10917
10918	default:
10919	return bpf_convert_ctx_access(type, si, insn_buf, prog,
10920	target_size);
10921	}
10922
10923	return insn - insn_buf;
10924	}
10925
10926	static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
10927	const struct bpf_insn *si,
10928	struct bpf_insn *insn_buf,
10929	struct bpf_prog prog, u32 target_size)
10930	{
10931	struct bpf_insn *insn = insn_buf;
10932	#if IS_ENABLED(CONFIG_IPV6)
10933	int off;
10934	#endif
10935
10936	/ convert ctx uses the fact sg element is first in struct /
10937	BUILD_BUG_ON(offsetof(struct sk_msg, sg) != `0`);
10938
10939	switch (si->off) {
10940	case offsetof(struct sk_msg_md, data):
10941	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, data),
10942	si->dst_reg, si->src_reg,
10943	offsetof(struct sk_msg, data));
10944	break;
10945	case offsetof(struct sk_msg_md, data_end):
10946	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, data_end),
10947	si->dst_reg, si->src_reg,
10948	offsetof(struct sk_msg, data_end));
10949	break;
10950	case offsetof(struct sk_msg_md, family):
10951	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != `2`);
10952
10953	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10954	struct sk_msg, sk),
10955	si->dst_reg, si->src_reg,
10956	offsetof(struct sk_msg, sk));
10957	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10958	offsetof(struct sock_common, skc_family));
10959	break;
10960
10961	case offsetof(struct sk_msg_md, remote_ip4):
10962	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != `4`);
10963
10964	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10965	struct sk_msg, sk),
10966	si->dst_reg, si->src_reg,
10967	offsetof(struct sk_msg, sk));
10968	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10969	offsetof(struct sock_common, skc_daddr));
10970	break;
10971
10972	case offsetof(struct sk_msg_md, local_ip4):
10973	BUILD_BUG_ON(sizeof_field(struct sock_common,
10974	skc_rcv_saddr) != `4`);
10975
10976	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10977	struct sk_msg, sk),
10978	si->dst_reg, si->src_reg,
10979	offsetof(struct sk_msg, sk));
10980	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10981	offsetof(struct sock_common,
10982	skc_rcv_saddr));
10983	break;
10984
10985	case offsetof(struct sk_msg_md, remote_ip6[`0`]) ...
10986	offsetof(struct sk_msg_md, remote_ip6[`3`]):
10987	#if IS_ENABLED(CONFIG_IPV6)
10988	BUILD_BUG_ON(sizeof_field(struct sock_common,
10989	skc_v6_daddr.s6_addr32[`0`]) != `4`);
10990
10991	off = si->off;
10992	off -= offsetof(struct sk_msg_md, remote_ip6[`0`]);
10993	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10994	struct sk_msg, sk),
10995	si->dst_reg, si->src_reg,
10996	offsetof(struct sk_msg, sk));
10997	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10998	offsetof(struct sock_common,
10999	skc_v6_daddr.s6_addr32[`0`]) +
11000	off);
11001	#else
11002	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11003	#endif
11004	break;
11005
11006	case offsetof(struct sk_msg_md, local_ip6[`0`]) ...
11007	offsetof(struct sk_msg_md, local_ip6[`3`]):
11008	#if IS_ENABLED(CONFIG_IPV6)
11009	BUILD_BUG_ON(sizeof_field(struct sock_common,
11010	skc_v6_rcv_saddr.s6_addr32[`0`]) != `4`);
11011
11012	off = si->off;
11013	off -= offsetof(struct sk_msg_md, local_ip6[`0`]);
11014	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
11015	struct sk_msg, sk),
11016	si->dst_reg, si->src_reg,
11017	offsetof(struct sk_msg, sk));
11018	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
11019	offsetof(struct sock_common,
11020	skc_v6_rcv_saddr.s6_addr32[`0`]) +
11021	off);
11022	#else
11023	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11024	#endif
11025	break;
11026
11027	case offsetof(struct sk_msg_md, remote_port):
11028	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != `2`);
11029
11030	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
11031	struct sk_msg, sk),
11032	si->dst_reg, si->src_reg,
11033	offsetof(struct sk_msg, sk));
11034	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
11035	offsetof(struct sock_common, skc_dport));
11036	#ifndef __BIG_ENDIAN_BITFIELD
11037	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, `16`);
11038	#endif
11039	break;
11040
11041	case offsetof(struct sk_msg_md, local_port):
11042	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != `2`);
11043
11044	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
11045	struct sk_msg, sk),
11046	si->dst_reg, si->src_reg,
11047	offsetof(struct sk_msg, sk));
11048	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
11049	offsetof(struct sock_common, skc_num));
11050	break;
11051
11052	case offsetof(struct sk_msg_md, size):
11053	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg_sg, size),
11054	si->dst_reg, si->src_reg,
11055	offsetof(struct sk_msg_sg, size));
11056	break;
11057
11058	case offsetof(struct sk_msg_md, sk):
11059	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* sk_msg, sk),
11060	si->dst_reg, si->src_reg,
11061	offsetof(struct sk_msg, sk));
11062	break;
11063	}
11064
11065	return insn - insn_buf;
11066	}
11067
11068	const struct bpf_verifier_ops sk_filter_verifier_ops = {
11069	.get_func_proto = sk_filter_func_proto,
11070	.is_valid_access = sk_filter_is_valid_access,
11071	.convert_ctx_access = bpf_convert_ctx_access,
11072	.gen_ld_abs = bpf_gen_ld_abs,
11073	};
11074
11075	const struct bpf_prog_ops sk_filter_prog_ops = {
11076	.test_run = bpf_prog_test_run_skb,
11077	};
11078
11079	const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
11080	.get_func_proto = tc_cls_act_func_proto,
11081	.is_valid_access = tc_cls_act_is_valid_access,
11082	.convert_ctx_access = tc_cls_act_convert_ctx_access,
11083	.gen_prologue = tc_cls_act_prologue,
11084	.gen_ld_abs = bpf_gen_ld_abs,
11085	.btf_struct_access = tc_cls_act_btf_struct_access,
11086	};
11087
11088	const struct bpf_prog_ops tc_cls_act_prog_ops = {
11089	.test_run = bpf_prog_test_run_skb,
11090	};
11091
11092	const struct bpf_verifier_ops xdp_verifier_ops = {
11093	.get_func_proto = xdp_func_proto,
11094	.is_valid_access = xdp_is_valid_access,
11095	.convert_ctx_access = xdp_convert_ctx_access,
11096	.gen_prologue = bpf_noop_prologue,
11097	.btf_struct_access = xdp_btf_struct_access,
11098	};
11099
11100	const struct bpf_prog_ops xdp_prog_ops = {
11101	.test_run = bpf_prog_test_run_xdp,
11102	};
11103
11104	const struct bpf_verifier_ops cg_skb_verifier_ops = {
11105	.get_func_proto = cg_skb_func_proto,
11106	.is_valid_access = cg_skb_is_valid_access,
11107	.convert_ctx_access = bpf_convert_ctx_access,
11108	};
11109
11110	const struct bpf_prog_ops cg_skb_prog_ops = {
11111	.test_run = bpf_prog_test_run_skb,
11112	};
11113
11114	const struct bpf_verifier_ops lwt_in_verifier_ops = {
11115	.get_func_proto = lwt_in_func_proto,
11116	.is_valid_access = lwt_is_valid_access,
11117	.convert_ctx_access = bpf_convert_ctx_access,
11118	};
11119
11120	const struct bpf_prog_ops lwt_in_prog_ops = {
11121	.test_run = bpf_prog_test_run_skb,
11122	};
11123
11124	const struct bpf_verifier_ops lwt_out_verifier_ops = {
11125	.get_func_proto = lwt_out_func_proto,
11126	.is_valid_access = lwt_is_valid_access,
11127	.convert_ctx_access = bpf_convert_ctx_access,
11128	};
11129
11130	const struct bpf_prog_ops lwt_out_prog_ops = {
11131	.test_run = bpf_prog_test_run_skb,
11132	};
11133
11134	const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
11135	.get_func_proto = lwt_xmit_func_proto,
11136	.is_valid_access = lwt_is_valid_access,
11137	.convert_ctx_access = bpf_convert_ctx_access,
11138	.gen_prologue = tc_cls_act_prologue,
11139	};
11140
11141	const struct bpf_prog_ops lwt_xmit_prog_ops = {
11142	.test_run = bpf_prog_test_run_skb,
11143	};
11144
11145	const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
11146	.get_func_proto = lwt_seg6local_func_proto,
11147	.is_valid_access = lwt_is_valid_access,
11148	.convert_ctx_access = bpf_convert_ctx_access,
11149	};
11150
11151	const struct bpf_prog_ops lwt_seg6local_prog_ops = {
11152	};
11153
11154	const struct bpf_verifier_ops cg_sock_verifier_ops = {
11155	.get_func_proto = sock_filter_func_proto,
11156	.is_valid_access = sock_filter_is_valid_access,
11157	.convert_ctx_access = bpf_sock_convert_ctx_access,
11158	};
11159
11160	const struct bpf_prog_ops cg_sock_prog_ops = {
11161	};
11162
11163	const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
11164	.get_func_proto = sock_addr_func_proto,
11165	.is_valid_access = sock_addr_is_valid_access,
11166	.convert_ctx_access = sock_addr_convert_ctx_access,
11167	};
11168
11169	const struct bpf_prog_ops cg_sock_addr_prog_ops = {
11170	};
11171
11172	const struct bpf_verifier_ops sock_ops_verifier_ops = {
11173	.get_func_proto = sock_ops_func_proto,
11174	.is_valid_access = sock_ops_is_valid_access,
11175	.convert_ctx_access = sock_ops_convert_ctx_access,
11176	};
11177
11178	const struct bpf_prog_ops sock_ops_prog_ops = {
11179	};
11180
11181	const struct bpf_verifier_ops sk_skb_verifier_ops = {
11182	.get_func_proto = sk_skb_func_proto,
11183	.is_valid_access = sk_skb_is_valid_access,
11184	.convert_ctx_access = sk_skb_convert_ctx_access,
11185	.gen_prologue = sk_skb_prologue,
11186	};
11187
11188	const struct bpf_prog_ops sk_skb_prog_ops = {
11189	};
11190
11191	const struct bpf_verifier_ops sk_msg_verifier_ops = {
11192	.get_func_proto = sk_msg_func_proto,
11193	.is_valid_access = sk_msg_is_valid_access,
11194	.convert_ctx_access = sk_msg_convert_ctx_access,
11195	.gen_prologue = bpf_noop_prologue,
11196	};
11197
11198	const struct bpf_prog_ops sk_msg_prog_ops = {
11199	};
11200
11201	const struct bpf_verifier_ops flow_dissector_verifier_ops = {
11202	.get_func_proto = flow_dissector_func_proto,
11203	.is_valid_access = flow_dissector_is_valid_access,
11204	.convert_ctx_access = flow_dissector_convert_ctx_access,
11205	};
11206
11207	const struct bpf_prog_ops flow_dissector_prog_ops = {
11208	.test_run = bpf_prog_test_run_flow_dissector,
11209	};
11210
11211	int sk_detach_filter(struct sock *sk)
11212	{
11213	int ret = -ENOENT;
11214	struct sk_filter *filter;
11215
11216	if (sock_flag(sk, flag: SOCK_FILTER_LOCKED))
11217	return -EPERM;
11218
11219	filter = rcu_dereference_protected(sk->sk_filter,
11220	lockdep_sock_is_held(sk));
11221	if (filter) {
11222	RCU_INIT_POINTER(sk->sk_filter, NULL);
11223	sk_filter_uncharge(sk, fp: filter);
11224	ret = `0`;
11225	}
11226
11227	return ret;
11228	}
11229	EXPORT_SYMBOL_GPL(sk_detach_filter);
11230
11231	int sk_get_filter(struct sock sk, sockptr_t optval, unsigned* int len)
11232	{
11233	struct sock_fprog_kern *fprog;
11234	struct sk_filter *filter;
11235	int ret = `0`;
11236
11237	sockopt_lock_sock(sk);
11238	filter = rcu_dereference_protected(sk->sk_filter,
11239	lockdep_sock_is_held(sk));
11240	if (!filter)
11241	goto out;
11242
11243	/ We're copying the filter that has been originally attached,*
11244	* so no conversion/decode needed anymore. eBPF programs that
11245	* have no original program cannot be dumped through this.
11246	*/
11247	ret = -EACCES;
11248	fprog = filter->prog->orig_prog;
11249	if (!fprog)
11250	goto out;
11251
11252	ret = fprog->len;
11253	if (!len)
11254	/ User space only enquires number of filter blocks. /
11255	goto out;
11256
11257	ret = -EINVAL;
11258	if (len < fprog->len)
11259	goto out;
11260
11261	ret = -EFAULT;
11262	if (copy_to_sockptr(dst: optval, src: fprog->filter, bpf_classic_proglen(fprog)))
11263	goto out;
11264
11265	/ Instead of bytes, the API requests to return the number*
11266	* of filter blocks.
11267	*/
11268	ret = fprog->len;
11269	out:
11270	sockopt_release_sock(sk);
11271	return ret;
11272	}
11273
11274	#ifdef CONFIG_INET
11275	static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
11276	struct sock_reuseport *reuse,
11277	struct sock sk, struct* sk_buff *skb,
11278	struct sock *migrating_sk,
11279	u32 hash)
11280	{
11281	reuse_kern->skb = skb;
11282	reuse_kern->sk = sk;
11283	reuse_kern->selected_sk = NULL;
11284	reuse_kern->migrating_sk = migrating_sk;
11285	reuse_kern->data_end = skb->data + skb_headlen(skb);
11286	reuse_kern->hash = hash;
11287	reuse_kern->reuseport_id = reuse->reuseport_id;
11288	reuse_kern->bind_inany = reuse->bind_inany;
11289	}
11290
11291	struct sock bpf_run_sk_reuseport(struct* sock_reuseport reuse, struct* sock *sk,
11292	struct bpf_prog prog, struct* sk_buff *skb,
11293	struct sock *migrating_sk,
11294	u32 hash)
11295	{
11296	struct sk_reuseport_kern reuse_kern;
11297	enum sk_action action;
11298
11299	bpf_init_reuseport_kern(reuse_kern: &reuse_kern, reuse, sk, skb, migrating_sk, hash);
11300	action = bpf_prog_run(prog, ctx: &reuse_kern);
11301
11302	if (action == SK_PASS)
11303	return reuse_kern.selected_sk;
11304	else
11305	return ERR_PTR(error: -ECONNREFUSED);
11306	}
11307
11308	BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
11309	struct bpf_map , map, void* *, key, u32, flags)
11310	{
11311	bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
11312	struct sock_reuseport *reuse;
11313	struct sock *selected_sk;
11314	int err;
11315
11316	selected_sk = map->ops->map_lookup_elem(map, key);
11317	if (!selected_sk)
11318	return -ENOENT;
11319
11320	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
11321	if (!reuse) {
11322	/ reuseport_array has only sk with non NULL sk_reuseport_cb.*
11323	* The only (!reuse) case here is - the sk has already been
11324	* unhashed (e.g. by close()), so treat it as -ENOENT.
11325	*
11326	* Other maps (e.g. sock_map) do not provide this guarantee and
11327	* the sk may never be in the reuseport group to begin with.
11328	*/
11329	err = is_sockarray ? -ENOENT : -EINVAL;
11330	goto error;
11331	}
11332
11333	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
11334	struct sock *sk = reuse_kern->sk;
11335
11336	if (sk->sk_protocol != selected_sk->sk_protocol) {
11337	err = -EPROTOTYPE;
11338	} else if (sk->sk_family != selected_sk->sk_family) {
11339	err = -EAFNOSUPPORT;
11340	} else {
11341	/ Catch all. Likely bound to a different sockaddr. /
11342	err = -EBADFD;
11343	}
11344	goto error;
11345	}
11346
11347	reuse_kern->selected_sk = selected_sk;
11348
11349	return `0`;
11350	error:
11351	/ Lookup in sock_map can return TCP ESTABLISHED sockets. /
11352	if (sk_is_refcounted(sk: selected_sk))
11353	sock_put(sk: selected_sk);
11354
11355	return err;
11356	}
11357
11358	static const struct bpf_func_proto sk_select_reuseport_proto = {
11359	.func = sk_select_reuseport,
11360	.gpl_only = false,
11361	.ret_type = RET_INTEGER,
11362	.arg1_type = ARG_PTR_TO_CTX,
11363	.arg2_type = ARG_CONST_MAP_PTR,
11364	.arg3_type = ARG_PTR_TO_MAP_KEY,
11365	.arg4_type = ARG_ANYTHING,
11366	};
11367
11368	BPF_CALL_4(sk_reuseport_load_bytes,
11369	const struct sk_reuseport_kern *, reuse_kern, u32, offset,
11370	void *, to, u32, len)
11371	{
11372	return ____bpf_skb_load_bytes(skb: reuse_kern->skb, offset, to, len);
11373	}
11374
11375	static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
11376	.func = sk_reuseport_load_bytes,
11377	.gpl_only = false,
11378	.ret_type = RET_INTEGER,
11379	.arg1_type = ARG_PTR_TO_CTX,
11380	.arg2_type = ARG_ANYTHING,
11381	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
11382	.arg4_type = ARG_CONST_SIZE,
11383	};
11384
11385	BPF_CALL_5(sk_reuseport_load_bytes_relative,
11386	const struct sk_reuseport_kern *, reuse_kern, u32, offset,
11387	void *, to, u32, len, u32, start_header)
11388	{
11389	return ____bpf_skb_load_bytes_relative(skb: reuse_kern->skb, offset, to,
11390	len, start_header);
11391	}
11392
11393	static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
11394	.func = sk_reuseport_load_bytes_relative,
11395	.gpl_only = false,
11396	.ret_type = RET_INTEGER,
11397	.arg1_type = ARG_PTR_TO_CTX,
11398	.arg2_type = ARG_ANYTHING,
11399	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
11400	.arg4_type = ARG_CONST_SIZE,
11401	.arg5_type = ARG_ANYTHING,
11402	};
11403
11404	static const struct bpf_func_proto *
11405	sk_reuseport_func_proto(enum bpf_func_id func_id,
11406	const struct bpf_prog *prog)
11407	{
11408	switch (func_id) {
11409	case BPF_FUNC_sk_select_reuseport:
11410	return &sk_select_reuseport_proto;
11411	case BPF_FUNC_skb_load_bytes:
11412	return &sk_reuseport_load_bytes_proto;
11413	case BPF_FUNC_skb_load_bytes_relative:
11414	return &sk_reuseport_load_bytes_relative_proto;
11415	case BPF_FUNC_get_socket_cookie:
11416	return &bpf_get_socket_ptr_cookie_proto;
11417	case BPF_FUNC_ktime_get_coarse_ns:
11418	return &bpf_ktime_get_coarse_ns_proto;
11419	default:
11420	return bpf_base_func_proto(func_id, prog);
11421	}
11422	}
11423
11424	static bool
11425	sk_reuseport_is_valid_access(int off, int size,
11426	enum bpf_access_type type,
11427	const struct bpf_prog *prog,
11428	struct bpf_insn_access_aux *info)
11429	{
11430	const u32 size_default = sizeof(__u32);
11431
11432	if (off < `0` \|\| off >= sizeof(struct sk_reuseport_md) \|\|
11433	off % size \|\| type != BPF_READ)
11434	return false;
11435
11436	switch (off) {
11437	case offsetof(struct sk_reuseport_md, data):
11438	info->reg_type = PTR_TO_PACKET;
11439	return size == sizeof(__u64);
11440
11441	case offsetof(struct sk_reuseport_md, data_end):
11442	info->reg_type = PTR_TO_PACKET_END;
11443	return size == sizeof(__u64);
11444
11445	case offsetof(struct sk_reuseport_md, hash):
11446	return size == size_default;
11447
11448	case offsetof(struct sk_reuseport_md, sk):
11449	info->reg_type = PTR_TO_SOCKET;
11450	return size == sizeof(__u64);
11451
11452	case offsetof(struct sk_reuseport_md, migrating_sk):
11453	info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
11454	return size == sizeof(__u64);
11455
11456	/ Fields that allow narrowing /
11457	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
11458	if (size < sizeof_field(struct sk_buff, protocol))
11459	return false;
11460	fallthrough;
11461	case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
11462	case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
11463	case bpf_ctx_range(struct sk_reuseport_md, len):
11464	bpf_ctx_record_field_size(aux: info, size: size_default);
11465	return bpf_ctx_narrow_access_ok(off, size, size_default);
11466
11467	default:
11468	return false;
11469	}
11470	}
11471
11472	#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
11473	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
11474	si->dst_reg, si->src_reg, \
11475	bpf_target_off(struct sk_reuseport_kern, F, \
11476	sizeof_field(struct sk_reuseport_kern, F), \
11477	target_size)); \
11478	})
11479
11480	#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
11481	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11482	struct sk_buff, \
11483	skb, \
11484	SKB_FIELD)
11485
11486	#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
11487	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11488	struct sock, \
11489	sk, \
11490	SK_FIELD)
11491
11492	static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
11493	const struct bpf_insn *si,
11494	struct bpf_insn *insn_buf,
11495	struct bpf_prog *prog,
11496	u32 *target_size)
11497	{
11498	struct bpf_insn *insn = insn_buf;
11499
11500	switch (si->off) {
11501	case offsetof(struct sk_reuseport_md, data):
11502	SK_REUSEPORT_LOAD_SKB_FIELD(data);
11503	break;
11504
11505	case offsetof(struct sk_reuseport_md, len):
11506	SK_REUSEPORT_LOAD_SKB_FIELD(len);
11507	break;
11508
11509	case offsetof(struct sk_reuseport_md, eth_protocol):
11510	SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
11511	break;
11512
11513	case offsetof(struct sk_reuseport_md, ip_protocol):
11514	SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
11515	break;
11516
11517	case offsetof(struct sk_reuseport_md, data_end):
11518	SK_REUSEPORT_LOAD_FIELD(data_end);
11519	break;
11520
11521	case offsetof(struct sk_reuseport_md, hash):
11522	SK_REUSEPORT_LOAD_FIELD(hash);
11523	break;
11524
11525	case offsetof(struct sk_reuseport_md, bind_inany):
11526	SK_REUSEPORT_LOAD_FIELD(bind_inany);
11527	break;
11528
11529	case offsetof(struct sk_reuseport_md, sk):
11530	SK_REUSEPORT_LOAD_FIELD(sk);
11531	break;
11532
11533	case offsetof(struct sk_reuseport_md, migrating_sk):
11534	SK_REUSEPORT_LOAD_FIELD(migrating_sk);
11535	break;
11536	}
11537
11538	return insn - insn_buf;
11539	}
11540
11541	const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
11542	.get_func_proto = sk_reuseport_func_proto,
11543	.is_valid_access = sk_reuseport_is_valid_access,
11544	.convert_ctx_access = sk_reuseport_convert_ctx_access,
11545	};
11546
11547	const struct bpf_prog_ops sk_reuseport_prog_ops = {
11548	};
11549
11550	DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
11551	EXPORT_SYMBOL(bpf_sk_lookup_enabled);
11552
11553	BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
11554	struct sock *, sk, u64, flags)
11555	{
11556	if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE \|
11557	BPF_SK_LOOKUP_F_NO_REUSEPORT)))
11558	return -EINVAL;
11559	if (unlikely(sk && sk_is_refcounted(sk)))
11560	return -ESOCKTNOSUPPORT; / reject non-RCU freed sockets /
11561	if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
11562	return -ESOCKTNOSUPPORT; / only accept TCP socket in LISTEN /
11563	if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
11564	return -ESOCKTNOSUPPORT; / only accept UDP socket in CLOSE /
11565
11566	/ Check if socket is suitable for packet L3/L4 protocol /
11567	if (sk && sk->sk_protocol != ctx->protocol)
11568	return -EPROTOTYPE;
11569	if (sk && sk->sk_family != ctx->family &&
11570	(sk->sk_family == AF_INET \|\| ipv6_only_sock(sk)))
11571	return -EAFNOSUPPORT;
11572
11573	if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
11574	return -EEXIST;
11575
11576	/ Select socket as lookup result /
11577	ctx->selected_sk = sk;
11578	ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
11579	return `0`;
11580	}
11581
11582	static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
11583	.func = bpf_sk_lookup_assign,
11584	.gpl_only = false,
11585	.ret_type = RET_INTEGER,
11586	.arg1_type = ARG_PTR_TO_CTX,
11587	.arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
11588	.arg3_type = ARG_ANYTHING,
11589	};
11590
11591	static const struct bpf_func_proto *
11592	sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
11593	{
11594	switch (func_id) {
11595	case BPF_FUNC_perf_event_output:
11596	return &bpf_event_output_data_proto;
11597	case BPF_FUNC_sk_assign:
11598	return &bpf_sk_lookup_assign_proto;
11599	case BPF_FUNC_sk_release:
11600	return &bpf_sk_release_proto;
11601	default:
11602	return bpf_sk_base_func_proto(func_id, prog);
11603	}
11604	}
11605
11606	static bool sk_lookup_is_valid_access(int off, int size,
11607	enum bpf_access_type type,
11608	const struct bpf_prog *prog,
11609	struct bpf_insn_access_aux *info)
11610	{
11611	if (off < `0` \|\| off >= sizeof(struct bpf_sk_lookup))
11612	return false;
11613	if (off % size != `0`)
11614	return false;
11615	if (type != BPF_READ)
11616	return false;
11617
11618	switch (off) {
11619	case offsetof(struct bpf_sk_lookup, sk):
11620	info->reg_type = PTR_TO_SOCKET_OR_NULL;
11621	return size == sizeof(__u64);
11622
11623	case bpf_ctx_range(struct bpf_sk_lookup, family):
11624	case bpf_ctx_range(struct bpf_sk_lookup, protocol):
11625	case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
11626	case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
11627	case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[`0`], remote_ip6[`3`]):
11628	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[`0`], local_ip6[`3`]):
11629	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
11630	case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
11631	bpf_ctx_record_field_size(aux: info, size: sizeof(__u32));
11632	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__u32));
11633
11634	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
11635	/ Allow 4-byte access to 2-byte field for backward compatibility /
11636	if (size == sizeof(__u32))
11637	return true;
11638	bpf_ctx_record_field_size(aux: info, size: sizeof(__be16));
11639	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__be16));
11640
11641	case offsetofend(struct bpf_sk_lookup, remote_port) ...
11642	offsetof(struct bpf_sk_lookup, local_ip4) - `1`:
11643	/ Allow access to zero padding for backward compatibility /
11644	bpf_ctx_record_field_size(aux: info, size: sizeof(__u16));
11645	return bpf_ctx_narrow_access_ok(off, size, size_default: sizeof(__u16));
11646
11647	default:
11648	return false;
11649	}
11650	}
11651
11652	static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
11653	const struct bpf_insn *si,
11654	struct bpf_insn *insn_buf,
11655	struct bpf_prog *prog,
11656	u32 *target_size)
11657	{
11658	struct bpf_insn *insn = insn_buf;
11659
11660	switch (si->off) {
11661	case offsetof(struct bpf_sk_lookup, sk):
11662	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11663	offsetof(struct bpf_sk_lookup_kern, selected_sk));
11664	break;
11665
11666	case offsetof(struct bpf_sk_lookup, family):
11667	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11668	bpf_target_off(struct bpf_sk_lookup_kern,
11669	family, `2`, target_size));
11670	break;
11671
11672	case offsetof(struct bpf_sk_lookup, protocol):
11673	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11674	bpf_target_off(struct bpf_sk_lookup_kern,
11675	protocol, `2`, target_size));
11676	break;
11677
11678	case offsetof(struct bpf_sk_lookup, remote_ip4):
11679	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11680	bpf_target_off(struct bpf_sk_lookup_kern,
11681	v4.saddr, `4`, target_size));
11682	break;
11683
11684	case offsetof(struct bpf_sk_lookup, local_ip4):
11685	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11686	bpf_target_off(struct bpf_sk_lookup_kern,
11687	v4.daddr, `4`, target_size));
11688	break;
11689
11690	case bpf_ctx_range_till(struct bpf_sk_lookup,
11691	remote_ip6[`0`], remote_ip6[`3`]): {
11692	#if IS_ENABLED(CONFIG_IPV6)
11693	int off = si->off;
11694
11695	off -= offsetof(struct bpf_sk_lookup, remote_ip6[`0`]);
11696	off += bpf_target_off(struct in6_addr, s6_addr32[`0`], `4`, target_size);
11697	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11698	offsetof(struct bpf_sk_lookup_kern, v6.saddr));
11699	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
11700	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11701	#else
11702	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11703	#endif
11704	break;
11705	}
11706	case bpf_ctx_range_till(struct bpf_sk_lookup,
11707	local_ip6[`0`], local_ip6[`3`]): {
11708	#if IS_ENABLED(CONFIG_IPV6)
11709	int off = si->off;
11710
11711	off -= offsetof(struct bpf_sk_lookup, local_ip6[`0`]);
11712	off += bpf_target_off(struct in6_addr, s6_addr32[`0`], `4`, target_size);
11713	insn++ = BPF_LDX_MEM(BPF_SIZEOF(void* *), si->dst_reg, si->src_reg,
11714	offsetof(struct bpf_sk_lookup_kern, v6.daddr));
11715	*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, `0`, `1`);
11716	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11717	#else
11718	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11719	#endif
11720	break;
11721	}
11722	case offsetof(struct bpf_sk_lookup, remote_port):
11723	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11724	bpf_target_off(struct bpf_sk_lookup_kern,
11725	sport, `2`, target_size));
11726	break;
11727
11728	case offsetofend(struct bpf_sk_lookup, remote_port):
11729	*target_size = `2`;
11730	*insn++ = BPF_MOV32_IMM(si->dst_reg, `0`);
11731	break;
11732
11733	case offsetof(struct bpf_sk_lookup, local_port):
11734	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11735	bpf_target_off(struct bpf_sk_lookup_kern,
11736	dport, `2`, target_size));
11737	break;
11738
11739	case offsetof(struct bpf_sk_lookup, ingress_ifindex):
11740	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11741	bpf_target_off(struct bpf_sk_lookup_kern,
11742	ingress_ifindex, `4`, target_size));
11743	break;
11744	}
11745
11746	return insn - insn_buf;
11747	}
11748
11749	const struct bpf_prog_ops sk_lookup_prog_ops = {
11750	.test_run = bpf_prog_test_run_sk_lookup,
11751	};
11752
11753	const struct bpf_verifier_ops sk_lookup_verifier_ops = {
11754	.get_func_proto = sk_lookup_func_proto,
11755	.is_valid_access = sk_lookup_is_valid_access,
11756	.convert_ctx_access = sk_lookup_convert_ctx_access,
11757	};
11758
11759	#endif /* CONFIG_INET */
11760
11761	DEFINE_BPF_DISPATCHER(xdp)
11762
11763	void bpf_prog_change_xdp(struct bpf_prog prev_prog, struct* bpf_prog *prog)
11764	{
11765	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), from: prev_prog, to: prog);
11766	}
11767
11768	BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE)
11769	#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
11770	BTF_SOCK_TYPE_xxx
11771	#undef BTF_SOCK_TYPE
11772
11773	BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
11774	{
11775	/ tcp6_sock type is not generated in dwarf and hence btf,*
11776	* trigger an explicit type generation here.
11777	*/
11778	BTF_TYPE_EMIT(struct tcp6_sock);
11779	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
11780	sk->sk_family == AF_INET6)
11781	return (unsigned long)sk;
11782
11783	return (unsigned long)NULL;
11784	}
11785
11786	const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
11787	.func = bpf_skc_to_tcp6_sock,
11788	.gpl_only = false,
11789	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11790	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11791	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
11792	};
11793
11794	BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
11795	{
11796	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
11797	return (unsigned long)sk;
11798
11799	return (unsigned long)NULL;
11800	}
11801
11802	const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
11803	.func = bpf_skc_to_tcp_sock,
11804	.gpl_only = false,
11805	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11806	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11807	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
11808	};
11809
11810	BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
11811	{
11812	/ BTF types for tcp_timewait_sock and inet_timewait_sock are not*
11813	* generated if CONFIG_INET=n. Trigger an explicit generation here.
11814	*/
11815	BTF_TYPE_EMIT(struct inet_timewait_sock);
11816	BTF_TYPE_EMIT(struct tcp_timewait_sock);
11817
11818	#ifdef CONFIG_INET
11819	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
11820	return (unsigned long)sk;
11821	#endif
11822
11823	#if IS_BUILTIN(CONFIG_IPV6)
11824	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
11825	return (unsigned long)sk;
11826	#endif
11827
11828	return (unsigned long)NULL;
11829	}
11830
11831	const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
11832	.func = bpf_skc_to_tcp_timewait_sock,
11833	.gpl_only = false,
11834	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11835	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11836	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
11837	};
11838
11839	BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
11840	{
11841	#ifdef CONFIG_INET
11842	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
11843	return (unsigned long)sk;
11844	#endif
11845
11846	#if IS_BUILTIN(CONFIG_IPV6)
11847	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
11848	return (unsigned long)sk;
11849	#endif
11850
11851	return (unsigned long)NULL;
11852	}
11853
11854	const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
11855	.func = bpf_skc_to_tcp_request_sock,
11856	.gpl_only = false,
11857	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11858	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11859	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
11860	};
11861
11862	BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
11863	{
11864	/ udp6_sock type is not generated in dwarf and hence btf,*
11865	* trigger an explicit type generation here.
11866	*/
11867	BTF_TYPE_EMIT(struct udp6_sock);
11868	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
11869	sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
11870	return (unsigned long)sk;
11871
11872	return (unsigned long)NULL;
11873	}
11874
11875	const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
11876	.func = bpf_skc_to_udp6_sock,
11877	.gpl_only = false,
11878	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11879	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11880	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
11881	};
11882
11883	BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
11884	{
11885	/ unix_sock type is not generated in dwarf and hence btf,*
11886	* trigger an explicit type generation here.
11887	*/
11888	BTF_TYPE_EMIT(struct unix_sock);
11889	if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
11890	return (unsigned long)sk;
11891
11892	return (unsigned long)NULL;
11893	}
11894
11895	const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
11896	.func = bpf_skc_to_unix_sock,
11897	.gpl_only = false,
11898	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11899	.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11900	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
11901	};
11902
11903	BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
11904	{
11905	BTF_TYPE_EMIT(struct mptcp_sock);
11906	return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
11907	}
11908
11909	const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
11910	.func = bpf_skc_to_mptcp_sock,
11911	.gpl_only = false,
11912	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11913	.arg1_type = ARG_PTR_TO_SOCK_COMMON,
11914	.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
11915	};
11916
11917	BPF_CALL_1(bpf_sock_from_file, struct file *, file)
11918	{
11919	return (unsigned long)sock_from_file(file);
11920	}
11921
11922	BTF_ID_LIST(bpf_sock_from_file_btf_ids)
11923	BTF_ID(struct, socket)
11924	BTF_ID(struct, file)
11925
11926	const struct bpf_func_proto bpf_sock_from_file_proto = {
11927	.func = bpf_sock_from_file,
11928	.gpl_only = false,
11929	.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11930	.ret_btf_id = &bpf_sock_from_file_btf_ids[`0`],
11931	.arg1_type = ARG_PTR_TO_BTF_ID,
11932	.arg1_btf_id = &bpf_sock_from_file_btf_ids[`1`],
11933	};
11934
11935	static const struct bpf_func_proto *
11936	bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
11937	{
11938	const struct bpf_func_proto *func;
11939
11940	switch (func_id) {
11941	case BPF_FUNC_skc_to_tcp6_sock:
11942	func = &bpf_skc_to_tcp6_sock_proto;
11943	break;
11944	case BPF_FUNC_skc_to_tcp_sock:
11945	func = &bpf_skc_to_tcp_sock_proto;
11946	break;
11947	case BPF_FUNC_skc_to_tcp_timewait_sock:
11948	func = &bpf_skc_to_tcp_timewait_sock_proto;
11949	break;
11950	case BPF_FUNC_skc_to_tcp_request_sock:
11951	func = &bpf_skc_to_tcp_request_sock_proto;
11952	break;
11953	case BPF_FUNC_skc_to_udp6_sock:
11954	func = &bpf_skc_to_udp6_sock_proto;
11955	break;
11956	case BPF_FUNC_skc_to_unix_sock:
11957	func = &bpf_skc_to_unix_sock_proto;
11958	break;
11959	case BPF_FUNC_skc_to_mptcp_sock:
11960	func = &bpf_skc_to_mptcp_sock_proto;
11961	break;
11962	case BPF_FUNC_ktime_get_coarse_ns:
11963	return &bpf_ktime_get_coarse_ns_proto;
11964	default:
11965	return bpf_base_func_proto(func_id, prog);
11966	}
11967
11968	if (!bpf_token_capable(token: prog->aux->token, CAP_PERFMON))
11969	return NULL;
11970
11971	return func;
11972	}
11973
11974	__bpf_kfunc_start_defs();
11975	__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
11976	struct bpf_dynptr *ptr__uninit)
11977	{
11978	struct bpf_dynptr_kern ptr = (struct* bpf_dynptr_kern *)ptr__uninit;
11979	struct sk_buff skb = (struct* sk_buff *)s;
11980
11981	if (flags) {
11982	bpf_dynptr_set_null(ptr);
11983	return -EINVAL;
11984	}
11985
11986	bpf_dynptr_init(ptr, data: skb, type: BPF_DYNPTR_TYPE_SKB, offset: `0`, size: skb->len);
11987
11988	return `0`;
11989	}
11990
11991	__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
11992	struct bpf_dynptr *ptr__uninit)
11993	{
11994	struct bpf_dynptr_kern ptr = (struct* bpf_dynptr_kern *)ptr__uninit;
11995	struct xdp_buff xdp = (struct* xdp_buff *)x;
11996
11997	if (flags) {
11998	bpf_dynptr_set_null(ptr);
11999	return -EINVAL;
12000	}
12001
12002	bpf_dynptr_init(ptr, data: xdp, type: BPF_DYNPTR_TYPE_XDP, offset: `0`, size: xdp_get_buff_len(xdp));
12003
12004	return `0`;
12005	}
12006
12007	__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
12008	const u8 *sun_path, u32 sun_path__sz)
12009	{
12010	struct sockaddr_un *un;
12011
12012	if (sa_kern->sk->sk_family != AF_UNIX)
12013	return -EINVAL;
12014
12015	/ We do not allow changing the address to unnamed or larger than the*
12016	* maximum allowed address size for a unix sockaddr.
12017	*/
12018	if (sun_path__sz == `0` \|\| sun_path__sz > UNIX_PATH_MAX)
12019	return -EINVAL;
12020
12021	un = (struct sockaddr_un *)sa_kern->uaddr;
12022	memcpy(un->sun_path, sun_path, sun_path__sz);
12023	sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;
12024
12025	return `0`;
12026	}
12027
12028	__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff s, struct* sock *sk,
12029	struct bpf_tcp_req_attrs attrs, int* attrs__sz)
12030	{
12031	#if IS_ENABLED(CONFIG_SYN_COOKIES)
12032	struct sk_buff skb = (struct* sk_buff *)s;
12033	const struct request_sock_ops *ops;
12034	struct inet_request_sock *ireq;
12035	struct tcp_request_sock *treq;
12036	struct request_sock *req;
12037	struct net *net;
12038	__u16 min_mss;
12039	u32 tsoff = `0`;
12040
12041	if (attrs__sz != sizeof(*attrs) \|\|
12042	attrs->reserved[`0`] \|\| attrs->reserved[`1`] \|\| attrs->reserved[`2`])
12043	return -EINVAL;
12044
12045	if (!skb_at_tc_ingress(skb))
12046	return -EINVAL;
12047
12048	net = dev_net(dev: skb->dev);
12049	if (net != sock_net(sk))
12050	return -ENETUNREACH;
12051
12052	switch (skb->protocol) {
12053	case htons(ETH_P_IP):
12054	ops = &tcp_request_sock_ops;
12055	min_mss = `536`;
12056	break;
12057	#if IS_BUILTIN(CONFIG_IPV6)
12058	case htons(ETH_P_IPV6):
12059	ops = &tcp6_request_sock_ops;
12060	min_mss = IPV6_MIN_MTU - `60`;
12061	break;
12062	#endif
12063	default:
12064	return -EINVAL;
12065	}
12066
12067	if (sk->sk_type != SOCK_STREAM \|\| sk->sk_state != TCP_LISTEN \|\|
12068	sk_is_mptcp(sk))
12069	return -EINVAL;
12070
12071	if (attrs->mss < min_mss)
12072	return -EINVAL;
12073
12074	if (attrs->wscale_ok) {
12075	if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling))
12076	return -EINVAL;
12077
12078	if (attrs->snd_wscale > TCP_MAX_WSCALE \|\|
12079	attrs->rcv_wscale > TCP_MAX_WSCALE)
12080	return -EINVAL;
12081	}
12082
12083	if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
12084	return -EINVAL;
12085
12086	if (attrs->tstamp_ok) {
12087	if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
12088	return -EINVAL;
12089
12090	tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(usec_ts: attrs->usec_ts_ok, val: tcp_clock_ns());
12091	}
12092
12093	req = inet_reqsk_alloc(ops, sk_listener: sk, attach_listener: false);
12094	if (!req)
12095	return -ENOMEM;
12096
12097	ireq = inet_rsk(sk: req);
12098	treq = tcp_rsk(req);
12099
12100	req->rsk_listener = sk;
12101	req->syncookie = `1`;
12102	req->mss = attrs->mss;
12103	req->ts_recent = attrs->rcv_tsval;
12104
12105	ireq->snd_wscale = attrs->snd_wscale;
12106	ireq->rcv_wscale = attrs->rcv_wscale;
12107	ireq->tstamp_ok = !!attrs->tstamp_ok;
12108	ireq->sack_ok = !!attrs->sack_ok;
12109	ireq->wscale_ok = !!attrs->wscale_ok;
12110	ireq->ecn_ok = !!attrs->ecn_ok;
12111
12112	treq->req_usec_ts = !!attrs->usec_ts_ok;
12113	treq->ts_off = tsoff;
12114
12115	skb_orphan(skb);
12116	skb->sk = req_to_sk(req);
12117	skb->destructor = sock_pfree;
12118
12119	return `0`;
12120	#else
12121	return -EOPNOTSUPP;
12122	#endif
12123	}
12124
12125	__bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
12126	u64 flags)
12127	{
12128	struct sk_buff *skb;
12129
12130	if (skops->op != BPF_SOCK_OPS_TSTAMP_SENDMSG_CB)
12131	return -EOPNOTSUPP;
12132
12133	if (flags)
12134	return -EINVAL;
12135
12136	skb = skops->skb;
12137	skb_shinfo(skb)->tx_flags \|= SKBTX_BPF;
12138	TCP_SKB_CB(skb)->txstamp_ack \|= TSTAMP_ACK_BPF;
12139	skb_shinfo(skb)->tskey = TCP_SKB_CB(skb)->seq + skb->len - `1`;
12140
12141	return `0`;
12142	}
12143
12144	__bpf_kfunc_end_defs();
12145
12146	int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
12147	struct bpf_dynptr *ptr__uninit)
12148	{
12149	struct bpf_dynptr_kern ptr = (struct* bpf_dynptr_kern *)ptr__uninit;
12150	int err;
12151
12152	err = bpf_dynptr_from_skb(s: skb, flags, ptr__uninit);
12153	if (err)
12154	return err;
12155
12156	bpf_dynptr_set_rdonly(ptr);
12157
12158	return `0`;
12159	}
12160
12161	BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
12162	BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
12163	BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
12164
12165	BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
12166	BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
12167	BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
12168
12169	BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
12170	BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
12171	BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr)
12172
12173	BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk)
12174	BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
12175	BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk)
12176
12177	BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops)
12178	BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp, KF_TRUSTED_ARGS)
12179	BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops)
12180
12181	static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
12182	.owner = THIS_MODULE,
12183	.set = &bpf_kfunc_check_set_skb,
12184	};
12185
12186	static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
12187	.owner = THIS_MODULE,
12188	.set = &bpf_kfunc_check_set_xdp,
12189	};
12190
12191	static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
12192	.owner = THIS_MODULE,
12193	.set = &bpf_kfunc_check_set_sock_addr,
12194	};
12195
12196	static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
12197	.owner = THIS_MODULE,
12198	.set = &bpf_kfunc_check_set_tcp_reqsk,
12199	};
12200
12201	static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = {
12202	.owner = THIS_MODULE,
12203	.set = &bpf_kfunc_check_set_sock_ops,
12204	};
12205
12206	static int __init bpf_kfunc_init(void)
12207	{
12208	int ret;
12209
12210	ret = register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_CLS, s: &bpf_kfunc_set_skb);
12211	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_ACT, s: &bpf_kfunc_set_skb);
12212	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SK_SKB, s: &bpf_kfunc_set_skb);
12213	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SOCKET_FILTER, s: &bpf_kfunc_set_skb);
12214	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_CGROUP_SKB, s: &bpf_kfunc_set_skb);
12215	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_OUT, s: &bpf_kfunc_set_skb);
12216	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_IN, s: &bpf_kfunc_set_skb);
12217	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_XMIT, s: &bpf_kfunc_set_skb);
12218	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_LWT_SEG6LOCAL, s: &bpf_kfunc_set_skb);
12219	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_NETFILTER, s: &bpf_kfunc_set_skb);
12220	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_TRACING, s: &bpf_kfunc_set_skb);
12221	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_XDP, s: &bpf_kfunc_set_xdp);
12222	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
12223	s: &bpf_kfunc_set_sock_addr);
12224	ret = ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SCHED_CLS, s: &bpf_kfunc_set_tcp_reqsk);
12225	return ret ?: register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_SOCK_OPS, s: &bpf_kfunc_set_sock_ops);
12226	}
12227	late_initcall(bpf_kfunc_init);
12228
12229	__bpf_kfunc_start_defs();
12230
12231	/ bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.*
12232	*
12233	* The function expects a non-NULL pointer to a socket, and invokes the
12234	* protocol specific socket destroy handlers.
12235	*
12236	* The helper can only be called from BPF contexts that have acquired the socket
12237	* locks.
12238	*
12239	* Parameters:
12240	* @sock: Pointer to socket to be destroyed
12241	*
12242	* Return:
12243	* On error, may return EPROTONOSUPPORT, EINVAL.
12244	* EPROTONOSUPPORT if protocol specific destroy handler is not supported.
12245	* 0 otherwise
12246	*/
12247	__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
12248	{
12249	struct sock sk = (struct* sock *)sock;
12250
12251	/ The locking semantics that allow for synchronous execution of the*
12252	* destroy handlers are only supported for TCP and UDP.
12253	* Supporting protocols will need to acquire sock lock in the BPF context
12254	* prior to invoking this kfunc.
12255	*/
12256	if (!sk->sk_prot->diag_destroy \|\| (sk->sk_protocol != IPPROTO_TCP &&
12257	sk->sk_protocol != IPPROTO_UDP))
12258	return -EOPNOTSUPP;
12259
12260	return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
12261	}
12262
12263	__bpf_kfunc_end_defs();
12264
12265	BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids)
12266	BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
12267	BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids)
12268
12269	static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
12270	{
12271	if (btf_id_set8_contains(set: &bpf_sk_iter_kfunc_ids, id: kfunc_id) &&
12272	prog->expected_attach_type != BPF_TRACE_ITER)
12273	return -EACCES;
12274	return `0`;
12275	}
12276
12277	static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
12278	.owner = THIS_MODULE,
12279	.set = &bpf_sk_iter_kfunc_ids,
12280	.filter = tracing_iter_filter,
12281	};
12282
12283	static int init_subsystem(void)
12284	{
12285	return register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_TRACING, s: &bpf_sk_iter_kfunc_set);
12286	}
12287	late_initcall(init_subsystem);
12288

Provided by KDAB

Definitions

copy_bpf_fprog_from_user
sk_filter_trim_cap
bpf_skb_get_pay_offset
bpf_skb_get_nlattr
bpf_skb_get_nlattr_nest
bpf_skb_load_helper_convert_offset
bpf_skb_load_helper_8
bpf_skb_load_helper_8_no_cache
bpf_skb_load_helper_16
bpf_skb_load_helper_16_no_cache
bpf_skb_load_helper_32
bpf_skb_load_helper_32_no_cache
convert_skb_access
convert_bpf_extensions
convert_bpf_ld_abs
bpf_convert_filter
check_load_and_stores
chk_code_allowed
bpf_check_basics_ok
bpf_check_classic
bpf_prog_store_orig_filter
bpf_release_orig_filter
__bpf_prog_release
__sk_filter_release
sk_filter_release_rcu
sk_filter_release
sk_filter_uncharge
__sk_filter_charge
sk_filter_charge
bpf_migrate_filter
bpf_prepare_filter
bpf_prog_create
bpf_prog_create_from_user
bpf_prog_destroy
__sk_attach_prog
__get_filter
sk_attach_filter
sk_reuseport_attach_filter
__get_bpf
sk_attach_bpf
sk_reuseport_attach_bpf
sk_reuseport_prog_free
__bpf_try_make_writable
bpf_try_make_writable
bpf_try_make_head_writable
bpf_push_mac_rcsum
bpf_pull_mac_rcsum
bpf_skb_store_bytes
bpf_skb_store_bytes_proto
__bpf_skb_store_bytes
bpf_skb_load_bytes
bpf_skb_load_bytes_proto
__bpf_skb_load_bytes
bpf_flow_dissector_load_bytes
bpf_flow_dissector_load_bytes_proto
bpf_skb_load_bytes_relative
bpf_skb_load_bytes_relative_proto
bpf_skb_pull_data
bpf_skb_pull_data_proto
bpf_sk_fullsock
bpf_sk_fullsock_proto
sk_skb_try_make_writable
sk_skb_pull_data
sk_skb_pull_data_proto
bpf_l3_csum_replace
bpf_l3_csum_replace_proto
bpf_l4_csum_replace
bpf_l4_csum_replace_proto
bpf_csum_diff
bpf_csum_diff_proto
bpf_csum_update
bpf_csum_update_proto
bpf_csum_level
bpf_csum_level_proto
__bpf_rx_skb
__bpf_rx_skb_no_mac
__bpf_tx_skb
__bpf_redirect_no_mac
__bpf_redirect_common
__bpf_redirect
bpf_out_neigh_v6
__bpf_redirect_neigh_v6
bpf_out_neigh_v4
__bpf_redirect_neigh_v4
__bpf_redirect_neigh
bpf_clone_redirect
bpf_clone_redirect_proto
skb_get_peer_dev
skb_do_redirect
bpf_redirect
bpf_redirect_proto
bpf_redirect_peer
bpf_redirect_peer_proto
bpf_redirect_neigh
bpf_redirect_neigh_proto
bpf_msg_apply_bytes
bpf_msg_apply_bytes_proto
bpf_msg_cork_bytes
sk_msg_reset_curr
bpf_msg_cork_bytes_proto
bpf_msg_pull_data
bpf_msg_pull_data_proto
bpf_msg_push_data
bpf_msg_push_data_proto
sk_msg_shift_left
sk_msg_shift_right
bpf_msg_pop_data
bpf_msg_pop_data_proto
bpf_get_cgroup_classid_curr
bpf_get_cgroup_classid_curr_proto
bpf_skb_cgroup_classid
bpf_skb_cgroup_classid_proto
bpf_get_cgroup_classid
bpf_get_cgroup_classid_proto
bpf_get_route_realm
bpf_get_route_realm_proto
bpf_get_hash_recalc
bpf_get_hash_recalc_proto
bpf_set_hash_invalid
bpf_set_hash_invalid_proto
bpf_set_hash
bpf_set_hash_proto
bpf_skb_vlan_push
bpf_skb_vlan_push_proto
bpf_skb_vlan_pop
bpf_skb_vlan_pop_proto
bpf_skb_generic_push
bpf_skb_generic_pop
bpf_skb_net_hdr_push
bpf_skb_net_hdr_pop
bpf_skb_proto_4_to_6
bpf_skb_proto_6_to_4
bpf_skb_proto_xlat
bpf_skb_change_proto
bpf_skb_change_proto_proto
bpf_skb_change_type
bpf_skb_change_type_proto
bpf_skb_net_base_len
bpf_skb_net_grow
bpf_skb_net_shrink
sk_skb_adjust_room
sk_skb_adjust_room_proto
bpf_skb_adjust_room
bpf_skb_adjust_room_proto
__bpf_skb_min_len
bpf_skb_grow_rcsum
bpf_skb_trim_rcsum
__bpf_skb_change_tail
bpf_skb_change_tail
bpf_skb_change_tail_proto
sk_skb_change_tail
sk_skb_change_tail_proto
__bpf_skb_change_head
bpf_skb_change_head
bpf_skb_change_head_proto
sk_skb_change_head
sk_skb_change_head_proto
bpf_xdp_get_buff_len
bpf_xdp_get_buff_len_proto
bpf_xdp_get_buff_len_bpf_ids
bpf_xdp_get_buff_len_trace_proto
xdp_get_metalen
bpf_xdp_adjust_head
bpf_xdp_adjust_head_proto
bpf_xdp_copy_buf
bpf_xdp_pointer
bpf_xdp_load_bytes
bpf_xdp_load_bytes_proto
__bpf_xdp_load_bytes
bpf_xdp_store_bytes
bpf_xdp_store_bytes_proto
__bpf_xdp_store_bytes
bpf_xdp_frags_increase_tail
bpf_xdp_shrink_data_zc
bpf_xdp_shrink_data
bpf_xdp_frags_shrink_tail
bpf_xdp_adjust_tail
bpf_xdp_adjust_tail_proto
bpf_xdp_adjust_meta
bpf_xdp_adjust_meta_proto
xdp_do_flush
xdp_do_check_flushed
bpf_master_redirect_enabled_key
xdp_master_redirect
__xdp_do_redirect_xsk
__xdp_do_redirect_frame
xdp_do_redirect
xdp_do_redirect_frame
xdp_do_generic_redirect_map
xdp_do_generic_redirect
bpf_xdp_redirect
bpf_xdp_redirect_proto
bpf_xdp_redirect_map
bpf_xdp_redirect_map_proto
bpf_skb_copy
bpf_skb_event_output
bpf_skb_event_output_proto
bpf_skb_output_btf_ids
bpf_skb_output_proto
bpf_tunnel_key_af
bpf_skb_get_tunnel_key
bpf_skb_get_tunnel_key_proto
bpf_skb_get_tunnel_opt
bpf_skb_get_tunnel_opt_proto
md_dst
bpf_skb_set_tunnel_key
bpf_skb_set_tunnel_key_proto
bpf_skb_set_tunnel_opt
bpf_skb_set_tunnel_opt_proto
bpf_get_skb_set_tunnel_proto
bpf_skb_under_cgroup
bpf_skb_under_cgroup_proto
__bpf_sk_cgroup_id
bpf_skb_cgroup_id
bpf_skb_cgroup_id_proto
__bpf_sk_ancestor_cgroup_id
bpf_skb_ancestor_cgroup_id
bpf_skb_ancestor_cgroup_id_proto
bpf_sk_cgroup_id
bpf_sk_cgroup_id_proto
bpf_sk_ancestor_cgroup_id
bpf_sk_ancestor_cgroup_id_proto
bpf_xdp_copy
bpf_xdp_event_output
bpf_xdp_event_output_proto
bpf_xdp_output_btf_ids
bpf_xdp_output_proto
bpf_get_socket_cookie
bpf_get_socket_cookie_proto
bpf_get_socket_cookie_sock_addr
bpf_get_socket_cookie_sock_addr_proto
bpf_get_socket_cookie_sock
bpf_get_socket_cookie_sock_proto
bpf_get_socket_ptr_cookie
bpf_get_socket_ptr_cookie_proto
bpf_get_socket_cookie_sock_ops
bpf_get_socket_cookie_sock_ops_proto
__bpf_get_netns_cookie
bpf_get_netns_cookie
bpf_get_netns_cookie_proto
bpf_get_netns_cookie_sock
bpf_get_netns_cookie_sock_proto
bpf_get_netns_cookie_sock_addr
bpf_get_netns_cookie_sock_addr_proto
bpf_get_netns_cookie_sock_ops
bpf_get_netns_cookie_sock_ops_proto
bpf_get_netns_cookie_sk_msg
bpf_get_netns_cookie_sk_msg_proto
bpf_get_socket_uid
bpf_get_socket_uid_proto
sk_bpf_set_get_cb_flags
sol_socket_sockopt
bpf_sol_tcp_getsockopt
bpf_sol_tcp_setsockopt
sol_tcp_sockopt_congestion
sol_tcp_sockopt
sol_ip_sockopt
sol_ipv6_sockopt
__bpf_setsockopt
is_locked_tcp_sock_ops
_bpf_setsockopt
__bpf_getsockopt
_bpf_getsockopt
bpf_sk_setsockopt
bpf_sk_setsockopt_proto
bpf_sk_getsockopt
bpf_sk_getsockopt_proto
bpf_unlocked_sk_setsockopt
bpf_unlocked_sk_setsockopt_proto
bpf_unlocked_sk_getsockopt
bpf_unlocked_sk_getsockopt_proto
bpf_sock_addr_setsockopt
bpf_sock_addr_setsockopt_proto
bpf_sock_addr_getsockopt
bpf_sock_addr_getsockopt_proto
bpf_sock_ops_setsockopt
bpf_sock_ops_setsockopt_proto
bpf_sock_ops_get_syn
bpf_sock_ops_getsockopt
bpf_sock_ops_getsockopt_proto
bpf_sock_ops_cb_flags_set
bpf_sock_ops_cb_flags_set_proto
ipv6_bpf_stub
bpf_bind
bpf_bind_proto
bpf_skb_get_xfrm_state
bpf_skb_get_xfrm_state_proto
bpf_fib_set_fwd_params
bpf_ipv4_fib_lookup
bpf_ipv6_fib_lookup
bpf_xdp_fib_lookup
bpf_xdp_fib_lookup_proto
bpf_skb_fib_lookup
bpf_skb_fib_lookup_proto
__dev_via_ifindex
bpf_skb_check_mtu
bpf_xdp_check_mtu
bpf_skb_check_mtu_proto
bpf_xdp_check_mtu_proto
bpf_push_seg6_encap
bpf_push_ip_encap
bpf_lwt_in_push_encap
bpf_lwt_xmit_push_encap
bpf_lwt_in_push_encap_proto
bpf_lwt_xmit_push_encap_proto
bpf_lwt_seg6_store_bytes
bpf_lwt_seg6_store_bytes_proto
bpf_update_srh_state
bpf_lwt_seg6_action
bpf_lwt_seg6_action_proto
bpf_lwt_seg6_adjust_srh
bpf_lwt_seg6_adjust_srh_proto
sk_lookup
__bpf_skc_lookup
__bpf_sk_lookup
bpf_skc_lookup
bpf_sk_lookup
bpf_skc_lookup_tcp
bpf_skc_lookup_tcp_proto
bpf_sk_lookup_tcp
bpf_sk_lookup_tcp_proto
bpf_sk_lookup_udp
bpf_sk_lookup_udp_proto
bpf_tc_skc_lookup_tcp
bpf_tc_skc_lookup_tcp_proto
bpf_tc_sk_lookup_tcp
bpf_tc_sk_lookup_tcp_proto
bpf_tc_sk_lookup_udp
bpf_tc_sk_lookup_udp_proto
bpf_sk_release
bpf_sk_release_proto
bpf_xdp_sk_lookup_udp
bpf_xdp_sk_lookup_udp_proto
bpf_xdp_skc_lookup_tcp
bpf_xdp_skc_lookup_tcp_proto
bpf_xdp_sk_lookup_tcp
bpf_xdp_sk_lookup_tcp_proto
bpf_sock_addr_skc_lookup_tcp
bpf_sock_addr_skc_lookup_tcp_proto
bpf_sock_addr_sk_lookup_tcp
bpf_sock_addr_sk_lookup_tcp_proto
bpf_sock_addr_sk_lookup_udp
bpf_sock_addr_sk_lookup_udp_proto
bpf_tcp_sock_is_valid_access
bpf_tcp_sock_convert_ctx_access
bpf_tcp_sock
bpf_tcp_sock_proto
bpf_get_listener_sock
bpf_get_listener_sock_proto
bpf_skb_ecn_set_ce
bpf_xdp_sock_is_valid_access
bpf_xdp_sock_convert_ctx_access
bpf_skb_ecn_set_ce_proto
bpf_tcp_check_syncookie
bpf_tcp_check_syncookie_proto
bpf_tcp_gen_syncookie
bpf_tcp_gen_syncookie_proto
bpf_sk_assign
bpf_sk_assign_proto
bpf_search_tcp_opt
bpf_sock_ops_load_hdr_opt
bpf_sock_ops_load_hdr_opt_proto
bpf_sock_ops_store_hdr_opt
bpf_sock_ops_store_hdr_opt_proto
bpf_sock_ops_reserve_hdr_opt
bpf_sock_ops_reserve_hdr_opt_proto
bpf_skb_set_tstamp
bpf_skb_set_tstamp_proto
bpf_tcp_raw_gen_syncookie_ipv4
bpf_tcp_raw_gen_syncookie_ipv4_proto
bpf_tcp_raw_gen_syncookie_ipv6
bpf_tcp_raw_gen_syncookie_ipv6_proto
bpf_tcp_raw_check_syncookie_ipv4
bpf_tcp_raw_check_syncookie_ipv4_proto
bpf_tcp_raw_check_syncookie_ipv6
bpf_tcp_raw_check_syncookie_ipv6_proto
bpf_helper_changes_pkt_data
bpf_event_output_data_proto
bpf_sk_storage_get_cg_sock_proto
sock_filter_func_proto
sock_addr_func_proto
sk_filter_func_proto
bpf_sk_storage_get_proto
bpf_sk_storage_delete_proto
cg_skb_func_proto
tc_cls_act_func_proto
xdp_func_proto
bpf_sock_map_update_proto
bpf_sock_hash_update_proto
sock_ops_func_proto
bpf_msg_redirect_map_proto
bpf_msg_redirect_hash_proto
sk_msg_func_proto
bpf_sk_redirect_map_proto
bpf_sk_redirect_hash_proto
sk_skb_func_proto
flow_dissector_func_proto
lwt_out_func_proto
lwt_in_func_proto
lwt_xmit_func_proto
lwt_seg6local_func_proto
bpf_skb_is_valid_access
sk_filter_is_valid_access
cg_skb_is_valid_access
lwt_is_valid_access
__sock_filter_check_attach_type
bpf_sock_common_is_valid_access
bpf_sock_is_valid_access
sock_filter_is_valid_access
bpf_noop_prologue
bpf_unclone_prologue
bpf_gen_ld_abs
tc_cls_act_prologue
tc_cls_act_is_valid_access
nf_conn_btf_access_lock
nfct_btf_struct_access
tc_cls_act_btf_struct_access
__is_valid_xdp_access
xdp_is_valid_access
bpf_warn_invalid_xdp_action
xdp_btf_struct_access
sock_addr_is_valid_access
sock_ops_is_valid_access
sk_skb_prologue
sk_skb_is_valid_access
sk_msg_is_valid_access
flow_dissector_is_valid_access
flow_dissector_convert_ctx_access
bpf_convert_tstamp_type_read
bpf_convert_shinfo_access
bpf_convert_tstamp_read
bpf_convert_tstamp_write
bpf_convert_ctx_access
bpf_sock_convert_ctx_access
tc_cls_act_convert_ctx_access
xdp_convert_ctx_access
sock_addr_convert_ctx_access
sock_ops_convert_ctx_access
bpf_convert_data_end_access
sk_skb_convert_ctx_access
sk_msg_convert_ctx_access
sk_filter_verifier_ops
sk_filter_prog_ops
tc_cls_act_verifier_ops
tc_cls_act_prog_ops
xdp_verifier_ops
xdp_prog_ops
cg_skb_verifier_ops
cg_skb_prog_ops
lwt_in_verifier_ops
lwt_in_prog_ops
lwt_out_verifier_ops
lwt_out_prog_ops
lwt_xmit_verifier_ops
lwt_xmit_prog_ops
lwt_seg6local_verifier_ops
lwt_seg6local_prog_ops
cg_sock_verifier_ops
cg_sock_prog_ops
cg_sock_addr_verifier_ops
cg_sock_addr_prog_ops
sock_ops_verifier_ops
sock_ops_prog_ops
sk_skb_verifier_ops
sk_skb_prog_ops
sk_msg_verifier_ops
sk_msg_prog_ops
flow_dissector_verifier_ops
flow_dissector_prog_ops
sk_detach_filter
sk_get_filter
bpf_init_reuseport_kern
bpf_run_sk_reuseport
sk_select_reuseport
sk_select_reuseport_proto
sk_reuseport_load_bytes
sk_reuseport_load_bytes_proto
sk_reuseport_load_bytes_relative
sk_reuseport_load_bytes_relative_proto
sk_reuseport_func_proto
sk_reuseport_is_valid_access
sk_reuseport_convert_ctx_access
sk_reuseport_verifier_ops
sk_reuseport_prog_ops
bpf_sk_lookup_enabled
bpf_sk_lookup_assign
bpf_sk_lookup_assign_proto
sk_lookup_func_proto
sk_lookup_is_valid_access
sk_lookup_convert_ctx_access
sk_lookup_prog_ops
sk_lookup_verifier_ops
bpf_prog_change_xdp
btf_sock_ids
bpf_skc_to_tcp6_sock
bpf_skc_to_tcp6_sock_proto
bpf_skc_to_tcp_sock
bpf_skc_to_tcp_sock_proto
bpf_skc_to_tcp_timewait_sock
bpf_skc_to_tcp_timewait_sock_proto
bpf_skc_to_tcp_request_sock
bpf_skc_to_tcp_request_sock_proto
bpf_skc_to_udp6_sock
bpf_skc_to_udp6_sock_proto
bpf_skc_to_unix_sock
bpf_skc_to_unix_sock_proto
bpf_skc_to_mptcp_sock
bpf_skc_to_mptcp_sock_proto
bpf_sock_from_file
bpf_sock_from_file_btf_ids
bpf_sock_from_file_proto
bpf_sk_base_func_proto
bpf_dynptr_from_skb
bpf_dynptr_from_xdp
bpf_sock_addr_set_sun_path
bpf_sk_assign_tcp_reqsk
bpf_sock_ops_enable_tx_tstamp
bpf_dynptr_from_skb_rdonly
bpf_kfunc_check_set_skb
bpf_kfunc_check_set_xdp
bpf_kfunc_check_set_sock_addr
bpf_kfunc_check_set_tcp_reqsk
bpf_kfunc_check_set_sock_ops
bpf_kfunc_set_skb
bpf_kfunc_set_xdp
bpf_kfunc_set_sock_addr
bpf_kfunc_set_tcp_reqsk
bpf_kfunc_set_sock_ops
bpf_kfunc_init
bpf_sock_destroy
bpf_sk_iter_kfunc_ids
tracing_iter_filter
bpf_sk_iter_kfunc_set

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/net/core/filter.c