verifier.c source code [linux/kernel/bpf/verifier.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com*
3	* Copyright (c) 2016 Facebook
4	* Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5	*/
6	#include <uapi/linux/btf.h>
7	#include <linux/bpf-cgroup.h>
8	#include <linux/kernel.h>
9	#include <linux/types.h>
10	#include <linux/slab.h>
11	#include <linux/bpf.h>
12	#include <linux/btf.h>
13	#include <linux/bpf_verifier.h>
14	#include <linux/filter.h>
15	#include <net/netlink.h>
16	#include <linux/file.h>
17	#include <linux/vmalloc.h>
18	#include <linux/stringify.h>
19	#include <linux/bsearch.h>
20	#include <linux/sort.h>
21	#include <linux/perf_event.h>
22	#include <linux/ctype.h>
23	#include <linux/error-injection.h>
24	#include <linux/bpf_lsm.h>
25	#include <linux/btf_ids.h>
26	#include <linux/poison.h>
27	#include <linux/module.h>
28	#include <linux/cpumask.h>
29	#include <linux/bpf_mem_alloc.h>
30	#include <net/xdp.h>
31
32	#include "disasm.h"
33
34	static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
35	#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
36	[_id] = & _name ## _verifier_ops,
37	#define BPF_MAP_TYPE(_id, _ops)
38	#define BPF_LINK_TYPE(_id, _name)
39	#include <linux/bpf_types.h>
40	#undef BPF_PROG_TYPE
41	#undef BPF_MAP_TYPE
42	#undef BPF_LINK_TYPE
43	};
44
45	struct bpf_mem_alloc bpf_global_percpu_ma;
46	static bool bpf_global_percpu_ma_set;
47
48	/ bpf_check() is a static code analyzer that walks eBPF program*
49	* instruction by instruction and updates register/stack state.
50	* All paths of conditional branches are analyzed until 'bpf_exit' insn.
51	*
52	* The first pass is depth-first-search to check that the program is a DAG.
53	* It rejects the following programs:
54	* - larger than BPF_MAXINSNS insns
55	* - if loop is present (detected via back-edge)
56	* - unreachable insns exist (shouldn't be a forest. program = one function)
57	* - out of bounds or malformed jumps
58	* The second pass is all possible path descent from the 1st insn.
59	* Since it's analyzing all paths through the program, the length of the
60	* analysis is limited to 64k insn, which may be hit even if total number of
61	* insn is less then 4K, but there are too many branches that change stack/regs.
62	* Number of 'branches to be analyzed' is limited to 1k
63	*
64	* On entry to each instruction, each register has a type, and the instruction
65	* changes the types of the registers depending on instruction semantics.
66	* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
67	* copied to R1.
68	*
69	* All registers are 64-bit.
70	* R0 - return register
71	* R1-R5 argument passing registers
72	* R6-R9 callee saved registers
73	* R10 - frame pointer read-only
74	*
75	* At the start of BPF program the register R1 contains a pointer to bpf_context
76	* and has type PTR_TO_CTX.
77	*
78	* Verifier tracks arithmetic operations on pointers in case:
79	* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
80	* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
81	* 1st insn copies R10 (which has FRAME_PTR) type into R1
82	* and 2nd arithmetic instruction is pattern matched to recognize
83	* that it wants to construct a pointer to some element within stack.
84	* So after 2nd insn, the register R1 has type PTR_TO_STACK
85	* (and -20 constant is saved for further stack bounds checking).
86	* Meaning that this reg is a pointer to stack plus known immediate constant.
87	*
88	* Most of the time the registers have SCALAR_VALUE type, which
89	* means the register has some value, but it's not a valid pointer.
90	* (like pointer plus pointer becomes SCALAR_VALUE type)
91	*
92	* When verifier sees load or store instructions the type of base register
93	* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
94	* four pointer types recognized by check_mem_access() function.
95	*
96	* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
97	* and the range of [ptr, ptr + map's value_size) is accessible.
98	*
99	* registers used to pass values to function calls are checked against
100	* function argument constraints.
101	*
102	* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
103	* It means that the register type passed to this function must be
104	* PTR_TO_STACK and it will be used inside the function as
105	* 'pointer to map element key'
106	*
107	* For example the argument constraints for bpf_map_lookup_elem():
108	* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
109	* .arg1_type = ARG_CONST_MAP_PTR,
110	* .arg2_type = ARG_PTR_TO_MAP_KEY,
111	*
112	* ret_type says that this function returns 'pointer to map elem value or null'
113	* function expects 1st argument to be a const pointer to 'struct bpf_map' and
114	* 2nd argument should be a pointer to stack, which will be used inside
115	* the helper function as a pointer to map element key.
116	*
117	* On the kernel side the helper function looks like:
118	* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
119	* {
120	* struct bpf_map map = (struct bpf_map ) (unsigned long) r1;
121	* void key = (void ) (unsigned long) r2;
122	* void *value;
123	*
124	* here kernel can access 'key' and 'map' pointers safely, knowing that
125	* [key, key + map->key_size) bytes are valid and were initialized on
126	* the stack of eBPF program.
127	* }
128	*
129	* Corresponding eBPF program may look like:
130	* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
131	* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
132	* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
133	* BPF_RAW_INSN(BPF_JMP \| BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
134	* here verifier looks at prototype of map_lookup_elem() and sees:
135	* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
136	* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
137	*
138	* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
139	* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
140	* and were initialized prior to this call.
141	* If it's ok, then verifier allows this BPF_CALL insn and looks at
142	* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
143	* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
144	* returns either pointer to map value or NULL.
145	*
146	* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
147	* insn, the register holding that pointer in the true branch changes state to
148	* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
149	* branch. See check_cond_jmp_op().
150	*
151	* After the call R0 is set to return type of the function and registers R1-R5
152	* are set to NOT_INIT to indicate that they are no longer readable.
153	*
154	* The following reference types represent a potential reference to a kernel
155	* resource which, after first being allocated, must be checked and freed by
156	* the BPF program:
157	* - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
158	*
159	* When the verifier sees a helper call return a reference type, it allocates a
160	* pointer id for the reference and stores it in the current function state.
161	* Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
162	* PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
163	* passes through a NULL-check conditional. For the branch wherein the state is
164	* changed to CONST_IMM, the verifier releases the reference.
165	*
166	* For each helper function that allocates a reference, such as
167	* bpf_sk_lookup_tcp(), there is a corresponding release function, such as
168	* bpf_sk_release(). When a reference type passes into the release function,
169	* the verifier also releases the reference. If any unchecked or unreleased
170	* reference remains at the end of the program, the verifier rejects it.
171	*/
172
173	/ verifier_state + insn_idx are pushed to stack when branch is encountered /
174	struct bpf_verifier_stack_elem {
175	/ verifer state is 'st'*
176	* before processing instruction 'insn_idx'
177	* and after processing instruction 'prev_insn_idx'
178	*/
179	struct bpf_verifier_state st;
180	int insn_idx;
181	int prev_insn_idx;
182	struct bpf_verifier_stack_elem *next;
183	/ length of verifier log at the time this state was pushed on stack /
184	u32 log_pos;
185	};
186
187	#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
188	#define BPF_COMPLEXITY_LIMIT_STATES 64
189
190	#define BPF_MAP_KEY_POISON (1ULL << 63)
191	#define BPF_MAP_KEY_SEEN (1ULL << 62)
192
193	#define BPF_MAP_PTR_UNPRIV 1UL
194	#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
195	POISON_POINTER_DELTA))
196	#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
197
198	#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
199
200	static int acquire_reference_state(struct bpf_verifier_env env, int* insn_idx);
201	static int release_reference(struct bpf_verifier_env env, int* ref_obj_id);
202	static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
203	static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
204	static int ref_set_non_owning(struct bpf_verifier_env *env,
205	struct bpf_reg_state *reg);
206	static void specialize_kfunc(struct bpf_verifier_env *env,
207	u32 func_id, u16 offset, unsigned long *addr);
208	static bool is_trusted_reg(const struct bpf_reg_state *reg);
209
210	static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
211	{
212	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
213	}
214
215	static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
216	{
217	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
218	}
219
220	static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
221	const struct bpf_map *map, bool unpriv)
222	{
223	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
224	unpriv \|= bpf_map_ptr_unpriv(aux);
225	aux->map_ptr_state = (unsigned long)map \|
226	(unpriv ? BPF_MAP_PTR_UNPRIV : `0UL`);
227	}
228
229	static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
230	{
231	return aux->map_key_state & BPF_MAP_KEY_POISON;
232	}
233
234	static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
235	{
236	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
237	}
238
239	static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
240	{
241	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN \| BPF_MAP_KEY_POISON);
242	}
243
244	static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
245	{
246	bool poisoned = bpf_map_key_poisoned(aux);
247
248	aux->map_key_state = state \| BPF_MAP_KEY_SEEN \|
249	(poisoned ? BPF_MAP_KEY_POISON : `0ULL`);
250	}
251
252	static bool bpf_helper_call(const struct bpf_insn *insn)
253	{
254	return insn->code == (BPF_JMP \| BPF_CALL) &&
255	insn->src_reg == `0`;
256	}
257
258	static bool bpf_pseudo_call(const struct bpf_insn *insn)
259	{
260	return insn->code == (BPF_JMP \| BPF_CALL) &&
261	insn->src_reg == BPF_PSEUDO_CALL;
262	}
263
264	static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
265	{
266	return insn->code == (BPF_JMP \| BPF_CALL) &&
267	insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
268	}
269
270	struct bpf_call_arg_meta {
271	struct bpf_map *map_ptr;
272	bool raw_mode;
273	bool pkt_access;
274	u8 release_regno;
275	int regno;
276	int access_size;
277	int mem_size;
278	u64 msize_max_value;
279	int ref_obj_id;
280	int dynptr_id;
281	int map_uid;
282	int func_id;
283	struct btf *btf;
284	u32 btf_id;
285	struct btf *ret_btf;
286	u32 ret_btf_id;
287	u32 subprogno;
288	struct btf_field *kptr_field;
289	};
290
291	struct bpf_kfunc_call_arg_meta {
292	/ In parameters /
293	struct btf *btf;
294	u32 func_id;
295	u32 kfunc_flags;
296	const struct btf_type *func_proto;
297	const char *func_name;
298	/ Out parameters /
299	u32 ref_obj_id;
300	u8 release_regno;
301	bool r0_rdonly;
302	u32 ret_btf_id;
303	u64 r0_size;
304	u32 subprogno;
305	struct {
306	u64 value;
307	bool found;
308	} arg_constant;
309
310	/ arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,*
311	* generally to pass info about user-defined local kptr types to later
312	* verification logic
313	* bpf_obj_drop/bpf_percpu_obj_drop
314	* Record the local kptr type to be drop'd
315	* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
316	* Record the local kptr type to be refcount_incr'd and use
317	* arg_owning_ref to determine whether refcount_acquire should be
318	* fallible
319	*/
320	struct btf *arg_btf;
321	u32 arg_btf_id;
322	bool arg_owning_ref;
323
324	struct {
325	struct btf_field *field;
326	} arg_list_head;
327	struct {
328	struct btf_field *field;
329	} arg_rbtree_root;
330	struct {
331	enum bpf_dynptr_type type;
332	u32 id;
333	u32 ref_obj_id;
334	} initialized_dynptr;
335	struct {
336	u8 spi;
337	u8 frameno;
338	} iter;
339	u64 mem_size;
340	};
341
342	struct btf *btf_vmlinux;
343
344	static const char btf_type_name(const* struct btf *btf, u32 id)
345	{
346	return btf_name_by_offset(btf, offset: btf_type_by_id(btf, type_id: id)->name_off);
347	}
348
349	static DEFINE_MUTEX(bpf_verifier_lock);
350	static DEFINE_MUTEX(bpf_percpu_ma_lock);
351
352	__printf(`2`, `3`) static void verbose(void private_data, const* char *fmt, ...)
353	{
354	struct bpf_verifier_env *env = private_data;
355	va_list args;
356
357	if (!bpf_verifier_log_needed(log: &env->log))
358	return;
359
360	va_start(args, fmt);
361	bpf_verifier_vlog(log: &env->log, fmt, args);
362	va_end(args);
363	}
364
365	static void verbose_invalid_scalar(struct bpf_verifier_env *env,
366	struct bpf_reg_state *reg,
367	struct bpf_retval_range range, const char *ctx,
368	const char *reg_name)
369	{
370	bool unknown = true;
371
372	verbose(private_data: env, fmt: "%s the register %s has", ctx, reg_name);
373	if (reg->smin_value > S64_MIN) {
374	verbose(private_data: env, fmt: " smin=%lld", reg->smin_value);
375	unknown = false;
376	}
377	if (reg->smax_value < S64_MAX) {
378	verbose(private_data: env, fmt: " smax=%lld", reg->smax_value);
379	unknown = false;
380	}
381	if (unknown)
382	verbose(private_data: env, fmt: " unknown scalar value");
383	verbose(private_data: env, fmt: " should have been in [%d, %d]\n", range.minval, range.maxval);
384	}
385
386	static bool type_may_be_null(u32 type)
387	{
388	return type & PTR_MAYBE_NULL;
389	}
390
391	static bool reg_not_null(const struct bpf_reg_state *reg)
392	{
393	enum bpf_reg_type type;
394
395	type = reg->type;
396	if (type_may_be_null(type))
397	return false;
398
399	type = base_type(type);
400	return type == PTR_TO_SOCKET \|\|
401	type == PTR_TO_TCP_SOCK \|\|
402	type == PTR_TO_MAP_VALUE \|\|
403	type == PTR_TO_MAP_KEY \|\|
404	type == PTR_TO_SOCK_COMMON \|\|
405	(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) \|\|
406	type == PTR_TO_MEM;
407	}
408
409	static struct btf_record reg_btf_record(const* struct bpf_reg_state *reg)
410	{
411	struct btf_record *rec = NULL;
412	struct btf_struct_meta *meta;
413
414	if (reg->type == PTR_TO_MAP_VALUE) {
415	rec = reg->map_ptr->record;
416	} else if (type_is_ptr_alloc_obj(type: reg->type)) {
417	meta = btf_find_struct_meta(btf: reg->btf, btf_id: reg->btf_id);
418	if (meta)
419	rec = meta->record;
420	}
421	return rec;
422	}
423
424	static bool subprog_is_global(const struct bpf_verifier_env env, int* subprog)
425	{
426	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
427
428	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
429	}
430
431	static const char subprog_name(const* struct bpf_verifier_env env, int* subprog)
432	{
433	struct bpf_func_info *info;
434
435	if (!env->prog->aux->func_info)
436	return "";
437
438	info = &env->prog->aux->func_info[subprog];
439	return btf_type_name(btf: env->prog->aux->btf, id: info->type_id);
440	}
441
442	static void mark_subprog_exc_cb(struct bpf_verifier_env env, int* subprog)
443	{
444	struct bpf_subprog_info *info = subprog_info(env, subprog);
445
446	info->is_cb = true;
447	info->is_async_cb = true;
448	info->is_exception_cb = true;
449	}
450
451	static bool subprog_is_exc_cb(struct bpf_verifier_env env, int* subprog)
452	{
453	return subprog_info(env, subprog)->is_exception_cb;
454	}
455
456	static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
457	{
458	return btf_record_has_field(rec: reg_btf_record(reg), type: BPF_SPIN_LOCK);
459	}
460
461	static bool type_is_rdonly_mem(u32 type)
462	{
463	return type & MEM_RDONLY;
464	}
465
466	static bool is_acquire_function(enum bpf_func_id func_id,
467	const struct bpf_map *map)
468	{
469	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
470
471	if (func_id == BPF_FUNC_sk_lookup_tcp \|\|
472	func_id == BPF_FUNC_sk_lookup_udp \|\|
473	func_id == BPF_FUNC_skc_lookup_tcp \|\|
474	func_id == BPF_FUNC_ringbuf_reserve \|\|
475	func_id == BPF_FUNC_kptr_xchg)
476	return true;
477
478	if (func_id == BPF_FUNC_map_lookup_elem &&
479	(map_type == BPF_MAP_TYPE_SOCKMAP \|\|
480	map_type == BPF_MAP_TYPE_SOCKHASH))
481	return true;
482
483	return false;
484	}
485
486	static bool is_ptr_cast_function(enum bpf_func_id func_id)
487	{
488	return func_id == BPF_FUNC_tcp_sock \|\|
489	func_id == BPF_FUNC_sk_fullsock \|\|
490	func_id == BPF_FUNC_skc_to_tcp_sock \|\|
491	func_id == BPF_FUNC_skc_to_tcp6_sock \|\|
492	func_id == BPF_FUNC_skc_to_udp6_sock \|\|
493	func_id == BPF_FUNC_skc_to_mptcp_sock \|\|
494	func_id == BPF_FUNC_skc_to_tcp_timewait_sock \|\|
495	func_id == BPF_FUNC_skc_to_tcp_request_sock;
496	}
497
498	static bool is_dynptr_ref_function(enum bpf_func_id func_id)
499	{
500	return func_id == BPF_FUNC_dynptr_data;
501	}
502
503	static bool is_sync_callback_calling_kfunc(u32 btf_id);
504	static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
505
506	static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
507	{
508	return func_id == BPF_FUNC_for_each_map_elem \|\|
509	func_id == BPF_FUNC_find_vma \|\|
510	func_id == BPF_FUNC_loop \|\|
511	func_id == BPF_FUNC_user_ringbuf_drain;
512	}
513
514	static bool is_async_callback_calling_function(enum bpf_func_id func_id)
515	{
516	return func_id == BPF_FUNC_timer_set_callback;
517	}
518
519	static bool is_callback_calling_function(enum bpf_func_id func_id)
520	{
521	return is_sync_callback_calling_function(func_id) \|\|
522	is_async_callback_calling_function(func_id);
523	}
524
525	static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
526	{
527	return (bpf_helper_call(insn) && is_sync_callback_calling_function(func_id: insn->imm)) \|\|
528	(bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(btf_id: insn->imm));
529	}
530
531	static bool is_async_callback_calling_insn(struct bpf_insn *insn)
532	{
533	return bpf_helper_call(insn) && is_async_callback_calling_function(func_id: insn->imm);
534	}
535
536	static bool is_may_goto_insn(struct bpf_insn *insn)
537	{
538	return insn->code == (BPF_JMP \| BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
539	}
540
541	static bool is_may_goto_insn_at(struct bpf_verifier_env env, int* insn_idx)
542	{
543	return is_may_goto_insn(insn: &env->prog->insnsi[insn_idx]);
544	}
545
546	static bool is_storage_get_function(enum bpf_func_id func_id)
547	{
548	return func_id == BPF_FUNC_sk_storage_get \|\|
549	func_id == BPF_FUNC_inode_storage_get \|\|
550	func_id == BPF_FUNC_task_storage_get \|\|
551	func_id == BPF_FUNC_cgrp_storage_get;
552	}
553
554	static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
555	const struct bpf_map *map)
556	{
557	int ref_obj_uses = `0`;
558
559	if (is_ptr_cast_function(func_id))
560	ref_obj_uses++;
561	if (is_acquire_function(func_id, map))
562	ref_obj_uses++;
563	if (is_dynptr_ref_function(func_id))
564	ref_obj_uses++;
565
566	return ref_obj_uses > `1`;
567	}
568
569	static bool is_cmpxchg_insn(const struct bpf_insn *insn)
570	{
571	return BPF_CLASS(insn->code) == BPF_STX &&
572	BPF_MODE(insn->code) == BPF_ATOMIC &&
573	insn->imm == BPF_CMPXCHG;
574	}
575
576	static int __get_spi(s32 off)
577	{
578	return (-off - `1`) / BPF_REG_SIZE;
579	}
580
581	static struct bpf_func_state func(struct* bpf_verifier_env *env,
582	const struct bpf_reg_state *reg)
583	{
584	struct bpf_verifier_state *cur = env->cur_state;
585
586	return cur->frame[reg->frameno];
587	}
588
589	static bool is_spi_bounds_valid(struct bpf_func_state state, int* spi, int nr_slots)
590	{
591	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
592
593	/ We need to check that slots between [spi - nr_slots + 1, spi] are*
594	* within [0, allocated_stack).
595	*
596	* Please note that the spi grows downwards. For example, a dynptr
597	* takes the size of two stack slots; the first slot will be at
598	* spi and the second slot will be at spi - 1.
599	*/
600	return spi - nr_slots + `1` >= `0` && spi < allocated_slots;
601	}
602
603	static int stack_slot_obj_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
604	const char obj_kind, int* nr_slots)
605	{
606	int off, spi;
607
608	if (!tnum_is_const(a: reg->var_off)) {
609	verbose(private_data: env, fmt: "%s has to be at a constant offset\n", obj_kind);
610	return -EINVAL;
611	}
612
613	off = reg->off + reg->var_off.value;
614	if (off % BPF_REG_SIZE) {
615	verbose(private_data: env, fmt: "cannot pass in %s at an offset=%d\n", obj_kind, off);
616	return -EINVAL;
617	}
618
619	spi = __get_spi(off);
620	if (spi + `1` < nr_slots) {
621	verbose(private_data: env, fmt: "cannot pass in %s at an offset=%d\n", obj_kind, off);
622	return -EINVAL;
623	}
624
625	if (!is_spi_bounds_valid(state: func(env, reg), spi, nr_slots))
626	return -ERANGE;
627	return spi;
628	}
629
630	static int dynptr_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
631	{
632	return stack_slot_obj_get_spi(env, reg, obj_kind: "dynptr", BPF_DYNPTR_NR_SLOTS);
633	}
634
635	static int iter_get_spi(struct bpf_verifier_env env, struct* bpf_reg_state reg, int* nr_slots)
636	{
637	return stack_slot_obj_get_spi(env, reg, obj_kind: "iter", nr_slots);
638	}
639
640	static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
641	{
642	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
643	case DYNPTR_TYPE_LOCAL:
644	return BPF_DYNPTR_TYPE_LOCAL;
645	case DYNPTR_TYPE_RINGBUF:
646	return BPF_DYNPTR_TYPE_RINGBUF;
647	case DYNPTR_TYPE_SKB:
648	return BPF_DYNPTR_TYPE_SKB;
649	case DYNPTR_TYPE_XDP:
650	return BPF_DYNPTR_TYPE_XDP;
651	default:
652	return BPF_DYNPTR_TYPE_INVALID;
653	}
654	}
655
656	static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
657	{
658	switch (type) {
659	case BPF_DYNPTR_TYPE_LOCAL:
660	return DYNPTR_TYPE_LOCAL;
661	case BPF_DYNPTR_TYPE_RINGBUF:
662	return DYNPTR_TYPE_RINGBUF;
663	case BPF_DYNPTR_TYPE_SKB:
664	return DYNPTR_TYPE_SKB;
665	case BPF_DYNPTR_TYPE_XDP:
666	return DYNPTR_TYPE_XDP;
667	default:
668	return `0`;
669	}
670	}
671
672	static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
673	{
674	return type == BPF_DYNPTR_TYPE_RINGBUF;
675	}
676
677	static void __mark_dynptr_reg(struct bpf_reg_state *reg,
678	enum bpf_dynptr_type type,
679	bool first_slot, int dynptr_id);
680
681	static void __mark_reg_not_init(const struct bpf_verifier_env *env,
682	struct bpf_reg_state *reg);
683
684	static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
685	struct bpf_reg_state *sreg1,
686	struct bpf_reg_state *sreg2,
687	enum bpf_dynptr_type type)
688	{
689	int id = ++env->id_gen;
690
691	__mark_dynptr_reg(reg: sreg1, type, first_slot: true, dynptr_id: id);
692	__mark_dynptr_reg(reg: sreg2, type, first_slot: false, dynptr_id: id);
693	}
694
695	static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
696	struct bpf_reg_state *reg,
697	enum bpf_dynptr_type type)
698	{
699	__mark_dynptr_reg(reg, type, first_slot: true, dynptr_id: ++env->id_gen);
700	}
701
702	static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
703	struct bpf_func_state state, int* spi);
704
705	static int mark_stack_slots_dynptr(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
706	enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
707	{
708	struct bpf_func_state *state = func(env, reg);
709	enum bpf_dynptr_type type;
710	int spi, i, err;
711
712	spi = dynptr_get_spi(env, reg);
713	if (spi < `0`)
714	return spi;
715
716	/ We cannot assume both spi and spi - 1 belong to the same dynptr,*
717	* hence we need to call destroy_if_dynptr_stack_slot twice for both,
718	* to ensure that for the following example:
719	* [d1][d1][d2][d2]
720	* spi 3 2 1 0
721	* So marking spi = 2 should lead to destruction of both d1 and d2. In
722	* case they do belong to same dynptr, second call won't see slot_type
723	* as STACK_DYNPTR and will simply skip destruction.
724	*/
725	err = destroy_if_dynptr_stack_slot(env, state, spi);
726	if (err)
727	return err;
728	err = destroy_if_dynptr_stack_slot(env, state, spi: spi - `1`);
729	if (err)
730	return err;
731
732	for (i = `0`; i < BPF_REG_SIZE; i++) {
733	state->stack[spi].slot_type[i] = STACK_DYNPTR;
734	state->stack[spi - `1`].slot_type[i] = STACK_DYNPTR;
735	}
736
737	type = arg_to_dynptr_type(arg_type);
738	if (type == BPF_DYNPTR_TYPE_INVALID)
739	return -EINVAL;
740
741	mark_dynptr_stack_regs(env, sreg1: &state->stack[spi].spilled_ptr,
742	sreg2: &state->stack[spi - `1`].spilled_ptr, type);
743
744	if (dynptr_type_refcounted(type)) {
745	/ The id is used to track proper releasing /
746	int id;
747
748	if (clone_ref_obj_id)
749	id = clone_ref_obj_id;
750	else
751	id = acquire_reference_state(env, insn_idx);
752
753	if (id < `0`)
754	return id;
755
756	state->stack[spi].spilled_ptr.ref_obj_id = id;
757	state->stack[spi - `1`].spilled_ptr.ref_obj_id = id;
758	}
759
760	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
761	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
762
763	return `0`;
764	}
765
766	static void invalidate_dynptr(struct bpf_verifier_env env, struct* bpf_func_state state, int* spi)
767	{
768	int i;
769
770	for (i = `0`; i < BPF_REG_SIZE; i++) {
771	state->stack[spi].slot_type[i] = STACK_INVALID;
772	state->stack[spi - `1`].slot_type[i] = STACK_INVALID;
773	}
774
775	__mark_reg_not_init(env, reg: &state->stack[spi].spilled_ptr);
776	__mark_reg_not_init(env, reg: &state->stack[spi - `1`].spilled_ptr);
777
778	/ Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?*
779	*
780	* While we don't allow reading STACK_INVALID, it is still possible to
781	* do <8 byte writes marking some but not all slots as STACK_MISC. Then,
782	* helpers or insns can do partial read of that part without failing,
783	* but check_stack_range_initialized, check_stack_read_var_off, and
784	* check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
785	* the slot conservatively. Hence we need to prevent those liveness
786	* marking walks.
787	*
788	* This was not a problem before because STACK_INVALID is only set by
789	* default (where the default reg state has its reg->parent as NULL), or
790	* in clean_live_states after REG_LIVE_DONE (at which point
791	* mark_reg_read won't walk reg->parent chain), but not randomly during
792	* verifier state exploration (like we did above). Hence, for our case
793	* parentage chain will still be live (i.e. reg->parent may be
794	* non-NULL), while earlier reg->parent was NULL, so we need
795	* REG_LIVE_WRITTEN to screen off read marker propagation when it is
796	* done later on reads or by mark_dynptr_read as well to unnecessary
797	* mark registers in verifier state.
798	*/
799	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
800	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
801	}
802
803	static int unmark_stack_slots_dynptr(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
804	{
805	struct bpf_func_state *state = func(env, reg);
806	int spi, ref_obj_id, i;
807
808	spi = dynptr_get_spi(env, reg);
809	if (spi < `0`)
810	return spi;
811
812	if (!dynptr_type_refcounted(type: state->stack[spi].spilled_ptr.dynptr.type)) {
813	invalidate_dynptr(env, state, spi);
814	return `0`;
815	}
816
817	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
818
819	/ If the dynptr has a ref_obj_id, then we need to invalidate*
820	* two things:
821	*
822	* 1) Any dynptrs with a matching ref_obj_id (clones)
823	* 2) Any slices derived from this dynptr.
824	*/
825
826	/ Invalidate any slices associated with this dynptr /
827	WARN_ON_ONCE(release_reference(env, ref_obj_id));
828
829	/ Invalidate any dynptr clones /
830	for (i = `1`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
831	if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
832	continue;
833
834	/ it should always be the case that if the ref obj id*
835	* matches then the stack slot also belongs to a
836	* dynptr
837	*/
838	if (state->stack[i].slot_type[`0`] != STACK_DYNPTR) {
839	verbose(private_data: env, fmt: "verifier internal error: misconfigured ref_obj_id\n");
840	return -EFAULT;
841	}
842	if (state->stack[i].spilled_ptr.dynptr.first_slot)
843	invalidate_dynptr(env, state, spi: i);
844	}
845
846	return `0`;
847	}
848
849	static void __mark_reg_unknown(const struct bpf_verifier_env *env,
850	struct bpf_reg_state *reg);
851
852	static void mark_reg_invalid(const struct bpf_verifier_env env, struct* bpf_reg_state *reg)
853	{
854	if (!env->allow_ptr_leaks)
855	__mark_reg_not_init(env, reg);
856	else
857	__mark_reg_unknown(env, reg);
858	}
859
860	static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
861	struct bpf_func_state state, int* spi)
862	{
863	struct bpf_func_state *fstate;
864	struct bpf_reg_state *dreg;
865	int i, dynptr_id;
866
867	/ We always ensure that STACK_DYNPTR is never set partially,*
868	* hence just checking for slot_type[0] is enough. This is
869	* different for STACK_SPILL, where it may be only set for
870	* 1 byte, so code has to use is_spilled_reg.
871	*/
872	if (state->stack[spi].slot_type[`0`] != STACK_DYNPTR)
873	return `0`;
874
875	/ Reposition spi to first slot /
876	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
877	spi = spi + `1`;
878
879	if (dynptr_type_refcounted(type: state->stack[spi].spilled_ptr.dynptr.type)) {
880	verbose(private_data: env, fmt: "cannot overwrite referenced dynptr\n");
881	return -EINVAL;
882	}
883
884	mark_stack_slot_scratched(env, spi);
885	mark_stack_slot_scratched(env, spi: spi - `1`);
886
887	/ Writing partially to one dynptr stack slot destroys both. /
888	for (i = `0`; i < BPF_REG_SIZE; i++) {
889	state->stack[spi].slot_type[i] = STACK_INVALID;
890	state->stack[spi - `1`].slot_type[i] = STACK_INVALID;
891	}
892
893	dynptr_id = state->stack[spi].spilled_ptr.id;
894	/ Invalidate any slices associated with this dynptr /
895	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
896	/ Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM /
897	if (dreg->type != (PTR_TO_MEM \| PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
898	continue;
899	if (dreg->dynptr_id == dynptr_id)
900	mark_reg_invalid(env, dreg);
901	}));
902
903	/ Do not release reference state, we are destroying dynptr on stack,*
904	* not using some helper to release it. Just reset register.
905	*/
906	__mark_reg_not_init(env, reg: &state->stack[spi].spilled_ptr);
907	__mark_reg_not_init(env, reg: &state->stack[spi - `1`].spilled_ptr);
908
909	/ Same reason as unmark_stack_slots_dynptr above /
910	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
911	state->stack[spi - `1`].spilled_ptr.live \|= REG_LIVE_WRITTEN;
912
913	return `0`;
914	}
915
916	static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
917	{
918	int spi;
919
920	if (reg->type == CONST_PTR_TO_DYNPTR)
921	return false;
922
923	spi = dynptr_get_spi(env, reg);
924
925	/ -ERANGE (i.e. spi not falling into allocated stack slots) isn't an*
926	* error because this just means the stack state hasn't been updated yet.
927	* We will do check_mem_access to check and update stack bounds later.
928	*/
929	if (spi < `0` && spi != -ERANGE)
930	return false;
931
932	/ We don't need to check if the stack slots are marked by previous*
933	* dynptr initializations because we allow overwriting existing unreferenced
934	* STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
935	* destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
936	* touching are completely destructed before we reinitialize them for a new
937	* one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
938	* instead of delaying it until the end where the user will get "Unreleased
939	* reference" error.
940	*/
941	return true;
942	}
943
944	static bool is_dynptr_reg_valid_init(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
945	{
946	struct bpf_func_state *state = func(env, reg);
947	int i, spi;
948
949	/ This already represents first slot of initialized bpf_dynptr.*
950	*
951	* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
952	* check_func_arg_reg_off's logic, so we don't need to check its
953	* offset and alignment.
954	*/
955	if (reg->type == CONST_PTR_TO_DYNPTR)
956	return true;
957
958	spi = dynptr_get_spi(env, reg);
959	if (spi < `0`)
960	return false;
961	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
962	return false;
963
964	for (i = `0`; i < BPF_REG_SIZE; i++) {
965	if (state->stack[spi].slot_type[i] != STACK_DYNPTR \|\|
966	state->stack[spi - `1`].slot_type[i] != STACK_DYNPTR)
967	return false;
968	}
969
970	return true;
971	}
972
973	static bool is_dynptr_type_expected(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
974	enum bpf_arg_type arg_type)
975	{
976	struct bpf_func_state *state = func(env, reg);
977	enum bpf_dynptr_type dynptr_type;
978	int spi;
979
980	/ ARG_PTR_TO_DYNPTR takes any type of dynptr /
981	if (arg_type == ARG_PTR_TO_DYNPTR)
982	return true;
983
984	dynptr_type = arg_to_dynptr_type(arg_type);
985	if (reg->type == CONST_PTR_TO_DYNPTR) {
986	return reg->dynptr.type == dynptr_type;
987	} else {
988	spi = dynptr_get_spi(env, reg);
989	if (spi < `0`)
990	return false;
991	return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
992	}
993	}
994
995	static void __mark_reg_known_zero(struct bpf_reg_state *reg);
996
997	static bool in_rcu_cs(struct bpf_verifier_env *env);
998
999	static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1000
1001	static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1002	struct bpf_kfunc_call_arg_meta *meta,
1003	struct bpf_reg_state reg, int* insn_idx,
1004	struct btf btf, u32 btf_id, int* nr_slots)
1005	{
1006	struct bpf_func_state *state = func(env, reg);
1007	int spi, i, j, id;
1008
1009	spi = iter_get_spi(env, reg, nr_slots);
1010	if (spi < `0`)
1011	return spi;
1012
1013	id = acquire_reference_state(env, insn_idx);
1014	if (id < `0`)
1015	return id;
1016
1017	for (i = `0`; i < nr_slots; i++) {
1018	struct bpf_stack_state *slot = &state->stack[spi - i];
1019	struct bpf_reg_state *st = &slot->spilled_ptr;
1020
1021	__mark_reg_known_zero(reg: st);
1022	st->type = PTR_TO_STACK; / we don't have dedicated reg type /
1023	if (is_kfunc_rcu_protected(meta)) {
1024	if (in_rcu_cs(env))
1025	st->type \|= MEM_RCU;
1026	else
1027	st->type \|= PTR_UNTRUSTED;
1028	}
1029	st->live \|= REG_LIVE_WRITTEN;
1030	st->ref_obj_id = i == `0` ? id : `0`;
1031	st->iter.btf = btf;
1032	st->iter.btf_id = btf_id;
1033	st->iter.state = BPF_ITER_STATE_ACTIVE;
1034	st->iter.depth = `0`;
1035
1036	for (j = `0`; j < BPF_REG_SIZE; j++)
1037	slot->slot_type[j] = STACK_ITER;
1038
1039	mark_stack_slot_scratched(env, spi: spi - i);
1040	}
1041
1042	return `0`;
1043	}
1044
1045	static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1046	struct bpf_reg_state reg, int* nr_slots)
1047	{
1048	struct bpf_func_state *state = func(env, reg);
1049	int spi, i, j;
1050
1051	spi = iter_get_spi(env, reg, nr_slots);
1052	if (spi < `0`)
1053	return spi;
1054
1055	for (i = `0`; i < nr_slots; i++) {
1056	struct bpf_stack_state *slot = &state->stack[spi - i];
1057	struct bpf_reg_state *st = &slot->spilled_ptr;
1058
1059	if (i == `0`)
1060	WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1061
1062	__mark_reg_not_init(env, reg: st);
1063
1064	/ see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN /
1065	st->live \|= REG_LIVE_WRITTEN;
1066
1067	for (j = `0`; j < BPF_REG_SIZE; j++)
1068	slot->slot_type[j] = STACK_INVALID;
1069
1070	mark_stack_slot_scratched(env, spi: spi - i);
1071	}
1072
1073	return `0`;
1074	}
1075
1076	static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1077	struct bpf_reg_state reg, int* nr_slots)
1078	{
1079	struct bpf_func_state *state = func(env, reg);
1080	int spi, i, j;
1081
1082	/ For -ERANGE (i.e. spi not falling into allocated stack slots), we*
1083	* will do check_mem_access to check and update stack bounds later, so
1084	* return true for that case.
1085	*/
1086	spi = iter_get_spi(env, reg, nr_slots);
1087	if (spi == -ERANGE)
1088	return true;
1089	if (spi < `0`)
1090	return false;
1091
1092	for (i = `0`; i < nr_slots; i++) {
1093	struct bpf_stack_state *slot = &state->stack[spi - i];
1094
1095	for (j = `0`; j < BPF_REG_SIZE; j++)
1096	if (slot->slot_type[j] == STACK_ITER)
1097	return false;
1098	}
1099
1100	return true;
1101	}
1102
1103	static int is_iter_reg_valid_init(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
1104	struct btf btf, u32 btf_id, int* nr_slots)
1105	{
1106	struct bpf_func_state *state = func(env, reg);
1107	int spi, i, j;
1108
1109	spi = iter_get_spi(env, reg, nr_slots);
1110	if (spi < `0`)
1111	return -EINVAL;
1112
1113	for (i = `0`; i < nr_slots; i++) {
1114	struct bpf_stack_state *slot = &state->stack[spi - i];
1115	struct bpf_reg_state *st = &slot->spilled_ptr;
1116
1117	if (st->type & PTR_UNTRUSTED)
1118	return -EPROTO;
1119	/ only main (first) slot has ref_obj_id set /
1120	if (i == `0` && !st->ref_obj_id)
1121	return -EINVAL;
1122	if (i != `0` && st->ref_obj_id)
1123	return -EINVAL;
1124	if (st->iter.btf != btf \|\| st->iter.btf_id != btf_id)
1125	return -EINVAL;
1126
1127	for (j = `0`; j < BPF_REG_SIZE; j++)
1128	if (slot->slot_type[j] != STACK_ITER)
1129	return -EINVAL;
1130	}
1131
1132	return `0`;
1133	}
1134
1135	/ Check if given stack slot is "special":*
1136	* - spilled register state (STACK_SPILL);
1137	* - dynptr state (STACK_DYNPTR);
1138	* - iter state (STACK_ITER).
1139	*/
1140	static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1141	{
1142	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - `1`];
1143
1144	switch (type) {
1145	case STACK_SPILL:
1146	case STACK_DYNPTR:
1147	case STACK_ITER:
1148	return true;
1149	case STACK_INVALID:
1150	case STACK_MISC:
1151	case STACK_ZERO:
1152	return false;
1153	default:
1154	WARN_ONCE(`1`, "unknown stack slot type %d\n", type);
1155	return true;
1156	}
1157	}
1158
1159	/ The reg state of a pointer or a bounded scalar was saved when*
1160	* it was spilled to the stack.
1161	*/
1162	static bool is_spilled_reg(const struct bpf_stack_state *stack)
1163	{
1164	return stack->slot_type[BPF_REG_SIZE - `1`] == STACK_SPILL;
1165	}
1166
1167	static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1168	{
1169	return stack->slot_type[BPF_REG_SIZE - `1`] == STACK_SPILL &&
1170	stack->spilled_ptr.type == SCALAR_VALUE;
1171	}
1172
1173	static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
1174	{
1175	return stack->slot_type[`0`] == STACK_SPILL &&
1176	stack->spilled_ptr.type == SCALAR_VALUE;
1177	}
1178
1179	/ Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which*
1180	* case they are equivalent, or it's STACK_ZERO, in which case we preserve
1181	* more precise STACK_ZERO.
1182	* Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
1183	* env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
1184	*/
1185	static void mark_stack_slot_misc(struct bpf_verifier_env env, u8 stype)
1186	{
1187	if (*stype == STACK_ZERO)
1188	return;
1189	if (env->allow_ptr_leaks && *stype == STACK_INVALID)
1190	return;
1191	*stype = STACK_MISC;
1192	}
1193
1194	static void scrub_spilled_slot(u8 *stype)
1195	{
1196	if (*stype != STACK_INVALID)
1197	*stype = STACK_MISC;
1198	}
1199
1200	/ copy array src of length n * size bytes to dst. dst is reallocated if it's too*
1201	* small to hold src. This is different from krealloc since we don't want to preserve
1202	* the contents of dst.
1203	*
1204	* Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1205	* not be allocated.
1206	*/
1207	static void copy_array(void* dst, const* void *src, size_t n, size_t size, gfp_t flags)
1208	{
1209	size_t alloc_bytes;
1210	void *orig = dst;
1211	size_t bytes;
1212
1213	if (ZERO_OR_NULL_PTR(src))
1214	goto out;
1215
1216	if (unlikely(check_mul_overflow(n, size, &bytes)))
1217	return NULL;
1218
1219	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1220	dst = krealloc(objp: orig, new_size: alloc_bytes, flags);
1221	if (!dst) {
1222	kfree(objp: orig);
1223	return NULL;
1224	}
1225
1226	memcpy(dst, src, bytes);
1227	out:
1228	return dst ? dst : ZERO_SIZE_PTR;
1229	}
1230
1231	/ resize an array from old_n items to new_n items. the array is reallocated if it's too*
1232	* small to hold new_n items. new items are zeroed out if the array grows.
1233	*
1234	* Contrary to krealloc_array, does not free arr if new_n is zero.
1235	*/
1236	static void realloc_array(void* *arr, size_t old_n, size_t new_n, size_t size)
1237	{
1238	size_t alloc_size;
1239	void *new_arr;
1240
1241	if (!new_n \|\| old_n == new_n)
1242	goto out;
1243
1244	alloc_size = kmalloc_size_roundup(size: size_mul(factor1: new_n, factor2: size));
1245	new_arr = krealloc(objp: arr, new_size: alloc_size, GFP_KERNEL);
1246	if (!new_arr) {
1247	kfree(objp: arr);
1248	return NULL;
1249	}
1250	arr = new_arr;
1251
1252	if (new_n > old_n)
1253	memset(arr + old_n * size, `0`, (new_n - old_n) * size);
1254
1255	out:
1256	return arr ? arr : ZERO_SIZE_PTR;
1257	}
1258
1259	static int copy_reference_state(struct bpf_func_state dst, const* struct bpf_func_state *src)
1260	{
1261	dst->refs = copy_array(dst: dst->refs, src: src->refs, n: src->acquired_refs,
1262	size: sizeof(struct bpf_reference_state), GFP_KERNEL);
1263	if (!dst->refs)
1264	return -ENOMEM;
1265
1266	dst->acquired_refs = src->acquired_refs;
1267	return `0`;
1268	}
1269
1270	static int copy_stack_state(struct bpf_func_state dst, const* struct bpf_func_state *src)
1271	{
1272	size_t n = src->allocated_stack / BPF_REG_SIZE;
1273
1274	dst->stack = copy_array(dst: dst->stack, src: src->stack, n, size: sizeof(struct bpf_stack_state),
1275	GFP_KERNEL);
1276	if (!dst->stack)
1277	return -ENOMEM;
1278
1279	dst->allocated_stack = src->allocated_stack;
1280	return `0`;
1281	}
1282
1283	static int resize_reference_state(struct bpf_func_state *state, size_t n)
1284	{
1285	state->refs = realloc_array(arr: state->refs, old_n: state->acquired_refs, new_n: n,
1286	size: sizeof(struct bpf_reference_state));
1287	if (!state->refs)
1288	return -ENOMEM;
1289
1290	state->acquired_refs = n;
1291	return `0`;
1292	}
1293
1294	/ Possibly update state->allocated_stack to be at least size bytes. Also*
1295	* possibly update the function's high-water mark in its bpf_subprog_info.
1296	*/
1297	static int grow_stack_state(struct bpf_verifier_env env, struct* bpf_func_state state, int* size)
1298	{
1299	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1300
1301	/ The stack size is always a multiple of BPF_REG_SIZE. /
1302	size = round_up(size, BPF_REG_SIZE);
1303	n = size / BPF_REG_SIZE;
1304
1305	if (old_n >= n)
1306	return `0`;
1307
1308	state->stack = realloc_array(arr: state->stack, old_n, new_n: n, size: sizeof(struct bpf_stack_state));
1309	if (!state->stack)
1310	return -ENOMEM;
1311
1312	state->allocated_stack = size;
1313
1314	/ update known max for given subprogram /
1315	if (env->subprog_info[state->subprogno].stack_depth < size)
1316	env->subprog_info[state->subprogno].stack_depth = size;
1317
1318	return `0`;
1319	}
1320
1321	/ Acquire a pointer id from the env and update the state->refs to include*
1322	* this new pointer reference.
1323	* On success, returns a valid pointer id to associate with the register
1324	* On failure, returns a negative errno.
1325	*/
1326	static int acquire_reference_state(struct bpf_verifier_env env, int* insn_idx)
1327	{
1328	struct bpf_func_state *state = cur_func(env);
1329	int new_ofs = state->acquired_refs;
1330	int id, err;
1331
1332	err = resize_reference_state(state, n: state->acquired_refs + `1`);
1333	if (err)
1334	return err;
1335	id = ++env->id_gen;
1336	state->refs[new_ofs].id = id;
1337	state->refs[new_ofs].insn_idx = insn_idx;
1338	state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : `0`;
1339
1340	return id;
1341	}
1342
1343	/ release function corresponding to acquire_reference_state(). Idempotent. /
1344	static int release_reference_state(struct bpf_func_state state, int* ptr_id)
1345	{
1346	int i, last_idx;
1347
1348	last_idx = state->acquired_refs - `1`;
1349	for (i = `0`; i < state->acquired_refs; i++) {
1350	if (state->refs[i].id == ptr_id) {
1351	/ Cannot release caller references in callbacks /
1352	if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1353	return -EINVAL;
1354	if (last_idx && i != last_idx)
1355	memcpy(&state->refs[i], &state->refs[last_idx],
1356	sizeof(*state->refs));
1357	memset(&state->refs[last_idx], `0`, sizeof(*state->refs));
1358	state->acquired_refs--;
1359	return `0`;
1360	}
1361	}
1362	return -EINVAL;
1363	}
1364
1365	static void free_func_state(struct bpf_func_state *state)
1366	{
1367	if (!state)
1368	return;
1369	kfree(objp: state->refs);
1370	kfree(objp: state->stack);
1371	kfree(objp: state);
1372	}
1373
1374	static void clear_jmp_history(struct bpf_verifier_state *state)
1375	{
1376	kfree(objp: state->jmp_history);
1377	state->jmp_history = NULL;
1378	state->jmp_history_cnt = `0`;
1379	}
1380
1381	static void free_verifier_state(struct bpf_verifier_state *state,
1382	bool free_self)
1383	{
1384	int i;
1385
1386	for (i = `0`; i <= state->curframe; i++) {
1387	free_func_state(state: state->frame[i]);
1388	state->frame[i] = NULL;
1389	}
1390	clear_jmp_history(state);
1391	if (free_self)
1392	kfree(objp: state);
1393	}
1394
1395	/ copy verifier state from src to dst growing dst stack space*
1396	* when necessary to accommodate larger src stack
1397	*/
1398	static int copy_func_state(struct bpf_func_state *dst,
1399	const struct bpf_func_state *src)
1400	{
1401	int err;
1402
1403	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1404	err = copy_reference_state(dst, src);
1405	if (err)
1406	return err;
1407	return copy_stack_state(dst, src);
1408	}
1409
1410	static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1411	const struct bpf_verifier_state *src)
1412	{
1413	struct bpf_func_state *dst;
1414	int i, err;
1415
1416	dst_state->jmp_history = copy_array(dst: dst_state->jmp_history, src: src->jmp_history,
1417	n: src->jmp_history_cnt, size: sizeof(*dst_state->jmp_history),
1418	GFP_USER);
1419	if (!dst_state->jmp_history)
1420	return -ENOMEM;
1421	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1422
1423	/ if dst has more stack frames then src frame, free them, this is also*
1424	* necessary in case of exceptional exits using bpf_throw.
1425	*/
1426	for (i = src->curframe + `1`; i <= dst_state->curframe; i++) {
1427	free_func_state(state: dst_state->frame[i]);
1428	dst_state->frame[i] = NULL;
1429	}
1430	dst_state->speculative = src->speculative;
1431	dst_state->active_rcu_lock = src->active_rcu_lock;
1432	dst_state->curframe = src->curframe;
1433	dst_state->active_lock.ptr = src->active_lock.ptr;
1434	dst_state->active_lock.id = src->active_lock.id;
1435	dst_state->branches = src->branches;
1436	dst_state->parent = src->parent;
1437	dst_state->first_insn_idx = src->first_insn_idx;
1438	dst_state->last_insn_idx = src->last_insn_idx;
1439	dst_state->dfs_depth = src->dfs_depth;
1440	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1441	dst_state->used_as_loop_entry = src->used_as_loop_entry;
1442	dst_state->may_goto_depth = src->may_goto_depth;
1443	for (i = `0`; i <= src->curframe; i++) {
1444	dst = dst_state->frame[i];
1445	if (!dst) {
1446	dst = kzalloc(size: sizeof(*dst), GFP_KERNEL);
1447	if (!dst)
1448	return -ENOMEM;
1449	dst_state->frame[i] = dst;
1450	}
1451	err = copy_func_state(dst, src: src->frame[i]);
1452	if (err)
1453	return err;
1454	}
1455	return `0`;
1456	}
1457
1458	static u32 state_htab_size(struct bpf_verifier_env *env)
1459	{
1460	return env->prog->len;
1461	}
1462
1463	static struct bpf_verifier_state_list explored_state(struct** bpf_verifier_env env, int* idx)
1464	{
1465	struct bpf_verifier_state *cur = env->cur_state;
1466	struct bpf_func_state *state = cur->frame[cur->curframe];
1467
1468	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1469	}
1470
1471	static bool same_callsites(struct bpf_verifier_state a, struct* bpf_verifier_state *b)
1472	{
1473	int fr;
1474
1475	if (a->curframe != b->curframe)
1476	return false;
1477
1478	for (fr = a->curframe; fr >= `0`; fr--)
1479	if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1480	return false;
1481
1482	return true;
1483	}
1484
1485	/ Open coded iterators allow back-edges in the state graph in order to*
1486	* check unbounded loops that iterators.
1487	*
1488	* In is_state_visited() it is necessary to know if explored states are
1489	* part of some loops in order to decide whether non-exact states
1490	* comparison could be used:
1491	* - non-exact states comparison establishes sub-state relation and uses
1492	* read and precision marks to do so, these marks are propagated from
1493	* children states and thus are not guaranteed to be final in a loop;
1494	* - exact states comparison just checks if current and explored states
1495	* are identical (and thus form a back-edge).
1496	*
1497	* Paper "A New Algorithm for Identifying Loops in Decompilation"
1498	* by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1499	* algorithm for loop structure detection and gives an overview of
1500	* relevant terminology. It also has helpful illustrations.
1501	*
1502	* [1] https://api.semanticscholar.org/CorpusID:15784067
1503	*
1504	* We use a similar algorithm but because loop nested structure is
1505	* irrelevant for verifier ours is significantly simpler and resembles
1506	* strongly connected components algorithm from Sedgewick's textbook.
1507	*
1508	* Define topmost loop entry as a first node of the loop traversed in a
1509	* depth first search starting from initial state. The goal of the loop
1510	* tracking algorithm is to associate topmost loop entries with states
1511	* derived from these entries.
1512	*
1513	* For each step in the DFS states traversal algorithm needs to identify
1514	* the following situations:
1515	*
1516	* initial initial initial
1517	* \| \| \|
1518	* V V V
1519	* ... ... .---------> hdr
1520	* \| \| \| \|
1521	* V V \| V
1522	* cur .-> succ \| .------...
1523	* \| \| \| \| \| \|
1524	* V \| V \| V V
1525	* succ '-- cur \| ... ...
1526	* \| \| \|
1527	* \| V V
1528	* \| succ <- cur
1529	* \| \|
1530	* \| V
1531	* \| ...
1532	* \| \|
1533	* '----'
1534	*
1535	* (A) successor state of cur (B) successor state of cur or it's entry
1536	* not yet traversed are in current DFS path, thus cur and succ
1537	* are members of the same outermost loop
1538	*
1539	* initial initial
1540	* \| \|
1541	* V V
1542	* ... ...
1543	* \| \|
1544	* V V
1545	* .------... .------...
1546	* \| \| \| \|
1547	* V V V V
1548	* .-> hdr ... ... ...
1549	* \| \| \| \| \|
1550	* \| V V V V
1551	* \| succ <- cur succ <- cur
1552	* \| \| \|
1553	* \| V V
1554	* \| ... ...
1555	* \| \| \|
1556	* '----' exit
1557	*
1558	* (C) successor state of cur is a part of some loop but this loop
1559	* does not include cur or successor state is not in a loop at all.
1560	*
1561	* Algorithm could be described as the following python code:
1562	*
1563	* traversed = set() # Set of traversed nodes
1564	* entries = {} # Mapping from node to loop entry
1565	* depths = {} # Depth level assigned to graph node
1566	* path = set() # Current DFS path
1567	*
1568	* # Find outermost loop entry known for n
1569	* def get_loop_entry(n):
1570	* h = entries.get(n, None)
1571	* while h in entries and entries[h] != h:
1572	* h = entries[h]
1573	* return h
1574	*
1575	* # Update n's loop entry if h's outermost entry comes
1576	* # before n's outermost entry in current DFS path.
1577	* def update_loop_entry(n, h):
1578	* n1 = get_loop_entry(n) or n
1579	* h1 = get_loop_entry(h) or h
1580	* if h1 in path and depths[h1] <= depths[n1]:
1581	* entries[n] = h1
1582	*
1583	* def dfs(n, depth):
1584	* traversed.add(n)
1585	* path.add(n)
1586	* depths[n] = depth
1587	* for succ in G.successors(n):
1588	* if succ not in traversed:
1589	* # Case A: explore succ and update cur's loop entry
1590	* # only if succ's entry is in current DFS path.
1591	* dfs(succ, depth + 1)
1592	* h = get_loop_entry(succ)
1593	* update_loop_entry(n, h)
1594	* else:
1595	* # Case B or C depending on `h1 in path` check in update_loop_entry().
1596	* update_loop_entry(n, succ)
1597	* path.remove(n)
1598	*
1599	* To adapt this algorithm for use with verifier:
1600	* - use st->branch == 0 as a signal that DFS of succ had been finished
1601	* and cur's loop entry has to be updated (case A), handle this in
1602	* update_branch_counts();
1603	* - use st->branch > 0 as a signal that st is in the current DFS path;
1604	* - handle cases B and C in is_state_visited();
1605	* - update topmost loop entry for intermediate states in get_loop_entry().
1606	*/
1607	static struct bpf_verifier_state get_loop_entry(struct* bpf_verifier_state *st)
1608	{
1609	struct bpf_verifier_state topmost = st->loop_entry, old;
1610
1611	while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
1612	topmost = topmost->loop_entry;
1613	/ Update loop entries for intermediate states to avoid this*
1614	* traversal in future get_loop_entry() calls.
1615	*/
1616	while (st && st->loop_entry != topmost) {
1617	old = st->loop_entry;
1618	st->loop_entry = topmost;
1619	st = old;
1620	}
1621	return topmost;
1622	}
1623
1624	static void update_loop_entry(struct bpf_verifier_state cur, struct* bpf_verifier_state *hdr)
1625	{
1626	struct bpf_verifier_state cur1, hdr1;
1627
1628	cur1 = get_loop_entry(st: cur) ?: cur;
1629	hdr1 = get_loop_entry(st: hdr) ?: hdr;
1630	/ The head1->branches check decides between cases B and C in*
1631	* comment for get_loop_entry(). If hdr1->branches == 0 then
1632	* head's topmost loop entry is not in current DFS path,
1633	* hence 'cur' and 'hdr' are not in the same loop and there is
1634	* no need to update cur->loop_entry.
1635	*/
1636	if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
1637	cur->loop_entry = hdr;
1638	hdr->used_as_loop_entry = true;
1639	}
1640	}
1641
1642	static void update_branch_counts(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
1643	{
1644	while (st) {
1645	u32 br = --st->branches;
1646
1647	/ br == 0 signals that DFS exploration for 'st' is finished,*
1648	* thus it is necessary to update parent's loop entry if it
1649	* turned out that st is a part of some loop.
1650	* This is a part of 'case A' in get_loop_entry() comment.
1651	*/
1652	if (br == `0` && st->parent && st->loop_entry)
1653	update_loop_entry(cur: st->parent, hdr: st->loop_entry);
1654
1655	/ WARN_ON(br > 1) technically makes sense here,*
1656	* but see comment in push_stack(), hence:
1657	*/
1658	WARN_ONCE((int)br < `0`,
1659	"BUG update_branch_counts:branches_to_explore=%d\n",
1660	br);
1661	if (br)
1662	break;
1663	st = st->parent;
1664	}
1665	}
1666
1667	static int pop_stack(struct bpf_verifier_env env, int* *prev_insn_idx,
1668	int *insn_idx, bool pop_log)
1669	{
1670	struct bpf_verifier_state *cur = env->cur_state;
1671	struct bpf_verifier_stack_elem elem, head = env->head;
1672	int err;
1673
1674	if (env->head == NULL)
1675	return -ENOENT;
1676
1677	if (cur) {
1678	err = copy_verifier_state(dst_state: cur, src: &head->st);
1679	if (err)
1680	return err;
1681	}
1682	if (pop_log)
1683	bpf_vlog_reset(log: &env->log, new_pos: head->log_pos);
1684	if (insn_idx)
1685	*insn_idx = head->insn_idx;
1686	if (prev_insn_idx)
1687	*prev_insn_idx = head->prev_insn_idx;
1688	elem = head->next;
1689	free_verifier_state(state: &head->st, free_self: false);
1690	kfree(objp: head);
1691	env->head = elem;
1692	env->stack_size--;
1693	return `0`;
1694	}
1695
1696	static struct bpf_verifier_state push_stack(struct* bpf_verifier_env *env,
1697	int insn_idx, int prev_insn_idx,
1698	bool speculative)
1699	{
1700	struct bpf_verifier_state *cur = env->cur_state;
1701	struct bpf_verifier_stack_elem *elem;
1702	int err;
1703
1704	elem = kzalloc(size: sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1705	if (!elem)
1706	goto err;
1707
1708	elem->insn_idx = insn_idx;
1709	elem->prev_insn_idx = prev_insn_idx;
1710	elem->next = env->head;
1711	elem->log_pos = env->log.end_pos;
1712	env->head = elem;
1713	env->stack_size++;
1714	err = copy_verifier_state(dst_state: &elem->st, src: cur);
1715	if (err)
1716	goto err;
1717	elem->st.speculative \|= speculative;
1718	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1719	verbose(private_data: env, fmt: "The sequence of %d jumps is too complex.\n",
1720	env->stack_size);
1721	goto err;
1722	}
1723	if (elem->st.parent) {
1724	++elem->st.parent->branches;
1725	/ WARN_ON(branches > 2) technically makes sense here,*
1726	* but
1727	* 1. speculative states will bump 'branches' for non-branch
1728	* instructions
1729	* 2. is_state_visited() heuristics may decide not to create
1730	* a new state for a sequence of branches and all such current
1731	* and cloned states will be pointing to a single parent state
1732	* which might have large 'branches' count.
1733	*/
1734	}
1735	return &elem->st;
1736	err:
1737	free_verifier_state(state: env->cur_state, free_self: true);
1738	env->cur_state = NULL;
1739	/ pop all elements and return /
1740	while (!pop_stack(env, NULL, NULL, pop_log: false));
1741	return NULL;
1742	}
1743
1744	#define CALLER_SAVED_REGS 6
1745	static const int caller_saved[CALLER_SAVED_REGS] = {
1746	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1747	};
1748
1749	/ This helper doesn't clear reg->id /
1750	static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1751	{
1752	reg->var_off = tnum_const(value: imm);
1753	reg->smin_value = (s64)imm;
1754	reg->smax_value = (s64)imm;
1755	reg->umin_value = imm;
1756	reg->umax_value = imm;
1757
1758	reg->s32_min_value = (s32)imm;
1759	reg->s32_max_value = (s32)imm;
1760	reg->u32_min_value = (u32)imm;
1761	reg->u32_max_value = (u32)imm;
1762	}
1763
1764	/ Mark the unknown part of a register (variable offset or scalar value) as*
1765	* known to have the value @imm.
1766	*/
1767	static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1768	{
1769	/ Clear off and union(map_ptr, range) /
1770	memset(((u8 )reg) + sizeof*(reg->type), `0`,
1771	offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1772	reg->id = `0`;
1773	reg->ref_obj_id = `0`;
1774	___mark_reg_known(reg, imm);
1775	}
1776
1777	static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1778	{
1779	reg->var_off = tnum_const_subreg(a: reg->var_off, value: imm);
1780	reg->s32_min_value = (s32)imm;
1781	reg->s32_max_value = (s32)imm;
1782	reg->u32_min_value = (u32)imm;
1783	reg->u32_max_value = (u32)imm;
1784	}
1785
1786	/ Mark the 'variable offset' part of a register as zero. This should be*
1787	* used only on registers holding a pointer type.
1788	*/
1789	static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1790	{
1791	__mark_reg_known(reg, imm: `0`);
1792	}
1793
1794	static void __mark_reg_const_zero(const struct bpf_verifier_env env, struct* bpf_reg_state *reg)
1795	{
1796	__mark_reg_known(reg, imm: `0`);
1797	reg->type = SCALAR_VALUE;
1798	/ all scalars are assumed imprecise initially (unless unprivileged,*
1799	* in which case everything is forced to be precise)
1800	*/
1801	reg->precise = !env->bpf_capable;
1802	}
1803
1804	static void mark_reg_known_zero(struct bpf_verifier_env *env,
1805	struct bpf_reg_state *regs, u32 regno)
1806	{
1807	if (WARN_ON(regno >= MAX_BPF_REG)) {
1808	verbose(private_data: env, fmt: "mark_reg_known_zero(regs, %u)\n", regno);
1809	/ Something bad happened, let's kill all regs /
1810	for (regno = `0`; regno < MAX_BPF_REG; regno++)
1811	__mark_reg_not_init(env, reg: regs + regno);
1812	return;
1813	}
1814	__mark_reg_known_zero(reg: regs + regno);
1815	}
1816
1817	static void __mark_dynptr_reg(struct bpf_reg_state reg, enum* bpf_dynptr_type type,
1818	bool first_slot, int dynptr_id)
1819	{
1820	/ reg->type has no meaning for STACK_DYNPTR, but when we set reg for*
1821	* callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1822	* set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1823	*/
1824	__mark_reg_known_zero(reg);
1825	reg->type = CONST_PTR_TO_DYNPTR;
1826	/ Give each dynptr a unique id to uniquely associate slices to it. /
1827	reg->id = dynptr_id;
1828	reg->dynptr.type = type;
1829	reg->dynptr.first_slot = first_slot;
1830	}
1831
1832	static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1833	{
1834	if (base_type(type: reg->type) == PTR_TO_MAP_VALUE) {
1835	const struct bpf_map *map = reg->map_ptr;
1836
1837	if (map->inner_map_meta) {
1838	reg->type = CONST_PTR_TO_MAP;
1839	reg->map_ptr = map->inner_map_meta;
1840	/ transfer reg's id which is unique for every map_lookup_elem*
1841	* as UID of the inner map.
1842	*/
1843	if (btf_record_has_field(rec: map->inner_map_meta->record, type: BPF_TIMER))
1844	reg->map_uid = reg->id;
1845	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1846	reg->type = PTR_TO_XDP_SOCK;
1847	} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP \|\|
1848	map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1849	reg->type = PTR_TO_SOCKET;
1850	} else {
1851	reg->type = PTR_TO_MAP_VALUE;
1852	}
1853	return;
1854	}
1855
1856	reg->type &= ~PTR_MAYBE_NULL;
1857	}
1858
1859	static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1860	struct btf_field_graph_root *ds_head)
1861	{
1862	__mark_reg_known_zero(reg: &regs[regno]);
1863	regs[regno].type = PTR_TO_BTF_ID \| MEM_ALLOC;
1864	regs[regno].btf = ds_head->btf;
1865	regs[regno].btf_id = ds_head->value_btf_id;
1866	regs[regno].off = ds_head->node_offset;
1867	}
1868
1869	static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1870	{
1871	return type_is_pkt_pointer(type: reg->type);
1872	}
1873
1874	static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1875	{
1876	return reg_is_pkt_pointer(reg) \|\|
1877	reg->type == PTR_TO_PACKET_END;
1878	}
1879
1880	static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1881	{
1882	return base_type(type: reg->type) == PTR_TO_MEM &&
1883	(reg->type & DYNPTR_TYPE_SKB \|\| reg->type & DYNPTR_TYPE_XDP);
1884	}
1885
1886	/ Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. /
1887	static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1888	enum bpf_reg_type which)
1889	{
1890	/ The register can already have a range from prior markings.*
1891	* This is fine as long as it hasn't been advanced from its
1892	* origin.
1893	*/
1894	return reg->type == which &&
1895	reg->id == `0` &&
1896	reg->off == `0` &&
1897	tnum_equals_const(a: reg->var_off, b: `0`);
1898	}
1899
1900	/ Reset the min/max bounds of a register /
1901	static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1902	{
1903	reg->smin_value = S64_MIN;
1904	reg->smax_value = S64_MAX;
1905	reg->umin_value = `0`;
1906	reg->umax_value = U64_MAX;
1907
1908	reg->s32_min_value = S32_MIN;
1909	reg->s32_max_value = S32_MAX;
1910	reg->u32_min_value = `0`;
1911	reg->u32_max_value = U32_MAX;
1912	}
1913
1914	static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1915	{
1916	reg->smin_value = S64_MIN;
1917	reg->smax_value = S64_MAX;
1918	reg->umin_value = `0`;
1919	reg->umax_value = U64_MAX;
1920	}
1921
1922	static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1923	{
1924	reg->s32_min_value = S32_MIN;
1925	reg->s32_max_value = S32_MAX;
1926	reg->u32_min_value = `0`;
1927	reg->u32_max_value = U32_MAX;
1928	}
1929
1930	static void __update_reg32_bounds(struct bpf_reg_state *reg)
1931	{
1932	struct tnum var32_off = tnum_subreg(a: reg->var_off);
1933
1934	/ min signed is max(sign bit) \| min(other bits) /
1935	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1936	var32_off.value \| (var32_off.mask & S32_MIN));
1937	/ max signed is min(sign bit) \| max(other bits) /
1938	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1939	var32_off.value \| (var32_off.mask & S32_MAX));
1940	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1941	reg->u32_max_value = min(reg->u32_max_value,
1942	(u32)(var32_off.value \| var32_off.mask));
1943	}
1944
1945	static void __update_reg64_bounds(struct bpf_reg_state *reg)
1946	{
1947	/ min signed is max(sign bit) \| min(other bits) /
1948	reg->smin_value = max_t(s64, reg->smin_value,
1949	reg->var_off.value \| (reg->var_off.mask & S64_MIN));
1950	/ max signed is min(sign bit) \| max(other bits) /
1951	reg->smax_value = min_t(s64, reg->smax_value,
1952	reg->var_off.value \| (reg->var_off.mask & S64_MAX));
1953	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1954	reg->umax_value = min(reg->umax_value,
1955	reg->var_off.value \| reg->var_off.mask);
1956	}
1957
1958	static void __update_reg_bounds(struct bpf_reg_state *reg)
1959	{
1960	__update_reg32_bounds(reg);
1961	__update_reg64_bounds(reg);
1962	}
1963
1964	/ Uses signed min/max values to inform unsigned, and vice-versa /
1965	static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1966	{
1967	/ If upper 32 bits of u64/s64 range don't change, we can use lower 32*
1968	* bits to improve our u32/s32 boundaries.
1969	*
1970	* E.g., the case where we have upper 32 bits as zero ([10, 20] in
1971	* u64) is pretty trivial, it's obvious that in u32 we'll also have
1972	* [10, 20] range. But this property holds for any 64-bit range as
1973	* long as upper 32 bits in that entire range of values stay the same.
1974	*
1975	* E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
1976	* in decimal) has the same upper 32 bits throughout all the values in
1977	* that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
1978	* range.
1979	*
1980	* Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
1981	* following the rules outlined below about u64/s64 correspondence
1982	* (which equally applies to u32 vs s32 correspondence). In general it
1983	* depends on actual hexadecimal values of 32-bit range. They can form
1984	* only valid u32, or only valid s32 ranges in some cases.
1985	*
1986	* So we use all these insights to derive bounds for subregisters here.
1987	*/
1988	if ((reg->umin_value >> `32`) == (reg->umax_value >> `32`)) {
1989	/ u64 to u32 casting preserves validity of low 32 bits as*
1990	* a range, if upper 32 bits are the same
1991	*/
1992	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
1993	reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
1994
1995	if ((s32)reg->umin_value <= (s32)reg->umax_value) {
1996	reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
1997	reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
1998	}
1999	}
2000	if ((reg->smin_value >> `32`) == (reg->smax_value >> `32`)) {
2001	/ low 32 bits should form a proper u32 range /
2002	if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2003	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2004	reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2005	}
2006	/ low 32 bits should form a proper s32 range /
2007	if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2008	reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2009	reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2010	}
2011	}
2012	/ Special case where upper bits form a small sequence of two*
2013	* sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2014	* 0x00000000 is also valid), while lower bits form a proper s32 range
2015	* going from negative numbers to positive numbers. E.g., let's say we
2016	* have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2017	* Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2018	* 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2019	* we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2020	* Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2021	* upper 32 bits. As a random example, s64 range
2022	* [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2023	* [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2024	*/
2025	if ((u32)(reg->umin_value >> `32`) + `1` == (u32)(reg->umax_value >> `32`) &&
2026	(s32)reg->umin_value < `0` && (s32)reg->umax_value >= `0`) {
2027	reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2028	reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2029	}
2030	if ((u32)(reg->smin_value >> `32`) + `1` == (u32)(reg->smax_value >> `32`) &&
2031	(s32)reg->smin_value < `0` && (s32)reg->smax_value >= `0`) {
2032	reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2033	reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2034	}
2035	/ if u32 range forms a valid s32 range (due to matching sign bit),*
2036	* try to learn from that
2037	*/
2038	if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2039	reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2040	reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2041	}
2042	/ If we cannot cross the sign boundary, then signed and unsigned bounds*
2043	* are the same, so combine. This works even in the negative case, e.g.
2044	* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2045	*/
2046	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2047	reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2048	reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2049	}
2050	}
2051
2052	static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2053	{
2054	/ If u64 range forms a valid s64 range (due to matching sign bit),*
2055	* try to learn from that. Let's do a bit of ASCII art to see when
2056	* this is happening. Let's take u64 range first:
2057	*
2058	* 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2059	* \|-------------------------------\|--------------------------------\|
2060	*
2061	* Valid u64 range is formed when umin and umax are anywhere in the
2062	* range [0, U64_MAX], and umin <= umax. u64 case is simple and
2063	* straightforward. Let's see how s64 range maps onto the same range
2064	* of values, annotated below the line for comparison:
2065	*
2066	* 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2067	* \|-------------------------------\|--------------------------------\|
2068	* 0 S64_MAX S64_MIN -1
2069	*
2070	* So s64 values basically start in the middle and they are logically
2071	* contiguous to the right of it, wrapping around from -1 to 0, and
2072	* then finishing as S64_MAX (0x7fffffffffffffff) right before
2073	* S64_MIN. We can try drawing the continuity of u64 vs s64 values
2074	* more visually as mapped to sign-agnostic range of hex values.
2075	*
2076	* u64 start u64 end
2077	* _______________________________________________________________
2078	* / \
2079	* 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2080	* \|-------------------------------\|--------------------------------\|
2081	* 0 S64_MAX S64_MIN -1
2082	* / \
2083	* >------------------------------ ------------------------------->
2084	* s64 continues... s64 end s64 start s64 "midpoint"
2085	*
2086	* What this means is that, in general, we can't always derive
2087	* something new about u64 from any random s64 range, and vice versa.
2088	*
2089	* But we can do that in two particular cases. One is when entire
2090	* u64/s64 range is entirely contained within left half of the above
2091	* diagram or when it is entirely contained in the right half. I.e.:
2092	*
2093	* \|-------------------------------\|--------------------------------\|
2094	* ^ ^ ^ ^
2095	* A B C D
2096	*
2097	* [A, B] and [C, D] are contained entirely in their respective halves
2098	* and form valid contiguous ranges as both u64 and s64 values. [A, B]
2099	* will be non-negative both as u64 and s64 (and in fact it will be
2100	* identical ranges no matter the signedness). [C, D] treated as s64
2101	* will be a range of negative values, while in u64 it will be
2102	* non-negative range of values larger than 0x8000000000000000.
2103	*
2104	* Now, any other range here can't be represented in both u64 and s64
2105	* simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2106	* contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2107	* in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2108	* for example. Similarly, valid s64 range [D, A] (going from negative
2109	* to positive values), would be two separate [D, U64_MAX] and [0, A]
2110	* ranges as u64. Currently reg_state can't represent two segments per
2111	* numeric domain, so in such situations we can only derive maximal
2112	* possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2113	*
2114	* So we use these facts to derive umin/umax from smin/smax and vice
2115	* versa only if they stay within the same "half". This is equivalent
2116	* to checking sign bit: lower half will have sign bit as zero, upper
2117	* half have sign bit 1. Below in code we simplify this by just
2118	* casting umin/umax as smin/smax and checking if they form valid
2119	* range, and vice versa. Those are equivalent checks.
2120	*/
2121	if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2122	reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2123	reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2124	}
2125	/ If we cannot cross the sign boundary, then signed and unsigned bounds*
2126	* are the same, so combine. This works even in the negative case, e.g.
2127	* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2128	*/
2129	if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2130	reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2131	reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2132	}
2133	}
2134
2135	static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
2136	{
2137	/ Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit*
2138	* values on both sides of 64-bit range in hope to have tigher range.
2139	* E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2140	* 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2141	* With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2142	* (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2143	* _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2144	* better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2145	* We just need to make sure that derived bounds we are intersecting
2146	* with are well-formed ranges in respecitve s64 or u64 domain, just
2147	* like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2148	*/
2149	__u64 new_umin, new_umax;
2150	__s64 new_smin, new_smax;
2151
2152	/ u32 -> u64 tightening, it's always well-formed /
2153	new_umin = (reg->umin_value & ~`0xffffffffULL`) \| reg->u32_min_value;
2154	new_umax = (reg->umax_value & ~`0xffffffffULL`) \| reg->u32_max_value;
2155	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2156	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2157	/ u32 -> s64 tightening, u32 range embedded into s64 preserves range validity /
2158	new_smin = (reg->smin_value & ~`0xffffffffULL`) \| reg->u32_min_value;
2159	new_smax = (reg->smax_value & ~`0xffffffffULL`) \| reg->u32_max_value;
2160	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2161	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2162
2163	/ if s32 can be treated as valid u32 range, we can use it as well /
2164	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2165	/ s32 -> u64 tightening /
2166	new_umin = (reg->umin_value & ~`0xffffffffULL`) \| (u32)reg->s32_min_value;
2167	new_umax = (reg->umax_value & ~`0xffffffffULL`) \| (u32)reg->s32_max_value;
2168	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2169	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2170	/ s32 -> s64 tightening /
2171	new_smin = (reg->smin_value & ~`0xffffffffULL`) \| (u32)reg->s32_min_value;
2172	new_smax = (reg->smax_value & ~`0xffffffffULL`) \| (u32)reg->s32_max_value;
2173	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2174	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2175	}
2176	}
2177
2178	static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2179	{
2180	__reg32_deduce_bounds(reg);
2181	__reg64_deduce_bounds(reg);
2182	__reg_deduce_mixed_bounds(reg);
2183	}
2184
2185	/ Attempts to improve var_off based on unsigned min/max information /
2186	static void __reg_bound_offset(struct bpf_reg_state *reg)
2187	{
2188	struct tnum var64_off = tnum_intersect(a: reg->var_off,
2189	b: tnum_range(min: reg->umin_value,
2190	max: reg->umax_value));
2191	struct tnum var32_off = tnum_intersect(a: tnum_subreg(a: var64_off),
2192	b: tnum_range(min: reg->u32_min_value,
2193	max: reg->u32_max_value));
2194
2195	reg->var_off = tnum_or(a: tnum_clear_subreg(a: var64_off), b: var32_off);
2196	}
2197
2198	static void reg_bounds_sync(struct bpf_reg_state *reg)
2199	{
2200	/ We might have learned new bounds from the var_off. /
2201	__update_reg_bounds(reg);
2202	/ We might have learned something about the sign bit. /
2203	__reg_deduce_bounds(reg);
2204	__reg_deduce_bounds(reg);
2205	/ We might have learned some bits from the bounds. /
2206	__reg_bound_offset(reg);
2207	/ Intersecting with the old var_off might have improved our bounds*
2208	* slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2209	* then new var_off is (0; 0x7f...fc) which improves our umax.
2210	*/
2211	__update_reg_bounds(reg);
2212	}
2213
2214	static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2215	struct bpf_reg_state reg, const* char *ctx)
2216	{
2217	const char *msg;
2218
2219	if (reg->umin_value > reg->umax_value \|\|
2220	reg->smin_value > reg->smax_value \|\|
2221	reg->u32_min_value > reg->u32_max_value \|\|
2222	reg->s32_min_value > reg->s32_max_value) {
2223	msg = "range bounds violation";
2224	goto out;
2225	}
2226
2227	if (tnum_is_const(a: reg->var_off)) {
2228	u64 uval = reg->var_off.value;
2229	s64 sval = (s64)uval;
2230
2231	if (reg->umin_value != uval \|\| reg->umax_value != uval \|\|
2232	reg->smin_value != sval \|\| reg->smax_value != sval) {
2233	msg = "const tnum out of sync with range bounds";
2234	goto out;
2235	}
2236	}
2237
2238	if (tnum_subreg_is_const(a: reg->var_off)) {
2239	u32 uval32 = tnum_subreg(a: reg->var_off).value;
2240	s32 sval32 = (s32)uval32;
2241
2242	if (reg->u32_min_value != uval32 \|\| reg->u32_max_value != uval32 \|\|
2243	reg->s32_min_value != sval32 \|\| reg->s32_max_value != sval32) {
2244	msg = "const subreg tnum out of sync with range bounds";
2245	goto out;
2246	}
2247	}
2248
2249	return `0`;
2250	out:
2251	verbose(private_data: env, fmt: "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2252	"s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
2253	ctx, msg, reg->umin_value, reg->umax_value,
2254	reg->smin_value, reg->smax_value,
2255	reg->u32_min_value, reg->u32_max_value,
2256	reg->s32_min_value, reg->s32_max_value,
2257	reg->var_off.value, reg->var_off.mask);
2258	if (env->test_reg_invariants)
2259	return -EFAULT;
2260	__mark_reg_unbounded(reg);
2261	return `0`;
2262	}
2263
2264	static bool __reg32_bound_s64(s32 a)
2265	{
2266	return a >= `0` && a <= S32_MAX;
2267	}
2268
2269	static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2270	{
2271	reg->umin_value = reg->u32_min_value;
2272	reg->umax_value = reg->u32_max_value;
2273
2274	/ Attempt to pull 32-bit signed bounds into 64-bit bounds but must*
2275	* be positive otherwise set to worse case bounds and refine later
2276	* from tnum.
2277	*/
2278	if (__reg32_bound_s64(a: reg->s32_min_value) &&
2279	__reg32_bound_s64(a: reg->s32_max_value)) {
2280	reg->smin_value = reg->s32_min_value;
2281	reg->smax_value = reg->s32_max_value;
2282	} else {
2283	reg->smin_value = `0`;
2284	reg->smax_value = U32_MAX;
2285	}
2286	}
2287
2288	/ Mark a register as having a completely unknown (scalar) value. /
2289	static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2290	{
2291	/*
2292	* Clear type, off, and union(map_ptr, range) and
2293	* padding between 'type' and union
2294	*/
2295	memset(reg, `0`, offsetof(struct bpf_reg_state, var_off));
2296	reg->type = SCALAR_VALUE;
2297	reg->id = `0`;
2298	reg->ref_obj_id = `0`;
2299	reg->var_off = tnum_unknown;
2300	reg->frameno = `0`;
2301	reg->precise = false;
2302	__mark_reg_unbounded(reg);
2303	}
2304
2305	/ Mark a register as having a completely unknown (scalar) value,*
2306	* initialize .precise as true when not bpf capable.
2307	*/
2308	static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2309	struct bpf_reg_state *reg)
2310	{
2311	__mark_reg_unknown_imprecise(reg);
2312	reg->precise = !env->bpf_capable;
2313	}
2314
2315	static void mark_reg_unknown(struct bpf_verifier_env *env,
2316	struct bpf_reg_state *regs, u32 regno)
2317	{
2318	if (WARN_ON(regno >= MAX_BPF_REG)) {
2319	verbose(private_data: env, fmt: "mark_reg_unknown(regs, %u)\n", regno);
2320	/ Something bad happened, let's kill all regs except FP /
2321	for (regno = `0`; regno < BPF_REG_FP; regno++)
2322	__mark_reg_not_init(env, reg: regs + regno);
2323	return;
2324	}
2325	__mark_reg_unknown(env, reg: regs + regno);
2326	}
2327
2328	static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2329	struct bpf_reg_state *reg)
2330	{
2331	__mark_reg_unknown(env, reg);
2332	reg->type = NOT_INIT;
2333	}
2334
2335	static void mark_reg_not_init(struct bpf_verifier_env *env,
2336	struct bpf_reg_state *regs, u32 regno)
2337	{
2338	if (WARN_ON(regno >= MAX_BPF_REG)) {
2339	verbose(private_data: env, fmt: "mark_reg_not_init(regs, %u)\n", regno);
2340	/ Something bad happened, let's kill all regs except FP /
2341	for (regno = `0`; regno < BPF_REG_FP; regno++)
2342	__mark_reg_not_init(env, reg: regs + regno);
2343	return;
2344	}
2345	__mark_reg_not_init(env, reg: regs + regno);
2346	}
2347
2348	static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2349	struct bpf_reg_state *regs, u32 regno,
2350	enum bpf_reg_type reg_type,
2351	struct btf *btf, u32 btf_id,
2352	enum bpf_type_flag flag)
2353	{
2354	if (reg_type == SCALAR_VALUE) {
2355	mark_reg_unknown(env, regs, regno);
2356	return;
2357	}
2358	mark_reg_known_zero(env, regs, regno);
2359	regs[regno].type = PTR_TO_BTF_ID \| flag;
2360	regs[regno].btf = btf;
2361	regs[regno].btf_id = btf_id;
2362	}
2363
2364	#define DEF_NOT_SUBREG (0)
2365	static void init_reg_state(struct bpf_verifier_env *env,
2366	struct bpf_func_state *state)
2367	{
2368	struct bpf_reg_state *regs = state->regs;
2369	int i;
2370
2371	for (i = `0`; i < MAX_BPF_REG; i++) {
2372	mark_reg_not_init(env, regs, regno: i);
2373	regs[i].live = REG_LIVE_NONE;
2374	regs[i].parent = NULL;
2375	regs[i].subreg_def = DEF_NOT_SUBREG;
2376	}
2377
2378	/ frame pointer /
2379	regs[BPF_REG_FP].type = PTR_TO_STACK;
2380	mark_reg_known_zero(env, regs, BPF_REG_FP);
2381	regs[BPF_REG_FP].frameno = state->frameno;
2382	}
2383
2384	static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2385	{
2386	return (struct bpf_retval_range){ minval, maxval };
2387	}
2388
2389	#define BPF_MAIN_FUNC (-1)
2390	static void init_func_state(struct bpf_verifier_env *env,
2391	struct bpf_func_state *state,
2392	int callsite, int frameno, int subprogno)
2393	{
2394	state->callsite = callsite;
2395	state->frameno = frameno;
2396	state->subprogno = subprogno;
2397	state->callback_ret_range = retval_range(minval: `0`, maxval: `0`);
2398	init_reg_state(env, state);
2399	mark_verifier_state_scratched(env);
2400	}
2401
2402	/ Similar to push_stack(), but for async callbacks /
2403	static struct bpf_verifier_state push_async_cb(struct* bpf_verifier_env *env,
2404	int insn_idx, int prev_insn_idx,
2405	int subprog)
2406	{
2407	struct bpf_verifier_stack_elem *elem;
2408	struct bpf_func_state *frame;
2409
2410	elem = kzalloc(size: sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2411	if (!elem)
2412	goto err;
2413
2414	elem->insn_idx = insn_idx;
2415	elem->prev_insn_idx = prev_insn_idx;
2416	elem->next = env->head;
2417	elem->log_pos = env->log.end_pos;
2418	env->head = elem;
2419	env->stack_size++;
2420	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2421	verbose(private_data: env,
2422	fmt: "The sequence of %d jumps is too complex for async cb.\n",
2423	env->stack_size);
2424	goto err;
2425	}
2426	/ Unlike push_stack() do not copy_verifier_state().*
2427	* The caller state doesn't matter.
2428	* This is async callback. It starts in a fresh stack.
2429	* Initialize it similar to do_check_common().
2430	*/
2431	elem->st.branches = `1`;
2432	frame = kzalloc(size: sizeof(*frame), GFP_KERNEL);
2433	if (!frame)
2434	goto err;
2435	init_func_state(env, state: frame,
2436	BPF_MAIN_FUNC / callsite /,
2437	frameno: `0` / frameno within this callchain /,
2438	subprogno: subprog / subprog number within this prog /);
2439	elem->st.frame[`0`] = frame;
2440	return &elem->st;
2441	err:
2442	free_verifier_state(state: env->cur_state, free_self: true);
2443	env->cur_state = NULL;
2444	/ pop all elements and return /
2445	while (!pop_stack(env, NULL, NULL, pop_log: false));
2446	return NULL;
2447	}
2448
2449
2450	enum reg_arg_type {
2451	SRC_OP, / register is used as source operand /
2452	DST_OP, / register is used as destination operand /
2453	DST_OP_NO_MARK / same as above, check only, don't mark /
2454	};
2455
2456	static int cmp_subprogs(const void a, const* void *b)
2457	{
2458	return ((struct bpf_subprog_info *)a)->start -
2459	((struct bpf_subprog_info *)b)->start;
2460	}
2461
2462	static int find_subprog(struct bpf_verifier_env env, int* off)
2463	{
2464	struct bpf_subprog_info *p;
2465
2466	p = bsearch(key: &off, base: env->subprog_info, num: env->subprog_cnt,
2467	size: sizeof(env->subprog_info[`0`]), cmp: cmp_subprogs);
2468	if (!p)
2469	return -ENOENT;
2470	return p - env->subprog_info;
2471
2472	}
2473
2474	static int add_subprog(struct bpf_verifier_env env, int* off)
2475	{
2476	int insn_cnt = env->prog->len;
2477	int ret;
2478
2479	if (off >= insn_cnt \|\| off < `0`) {
2480	verbose(private_data: env, fmt: "call to invalid destination\n");
2481	return -EINVAL;
2482	}
2483	ret = find_subprog(env, off);
2484	if (ret >= `0`)
2485	return ret;
2486	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2487	verbose(private_data: env, fmt: "too many subprograms\n");
2488	return -E2BIG;
2489	}
2490	/ determine subprog starts. The end is one before the next starts /
2491	env->subprog_info[env->subprog_cnt++].start = off;
2492	sort(base: env->subprog_info, num: env->subprog_cnt,
2493	size: sizeof(env->subprog_info[`0`]), cmp_func: cmp_subprogs, NULL);
2494	return env->subprog_cnt - `1`;
2495	}
2496
2497	static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2498	{
2499	struct bpf_prog_aux *aux = env->prog->aux;
2500	struct btf *btf = aux->btf;
2501	const struct btf_type *t;
2502	u32 main_btf_id, id;
2503	const char *name;
2504	int ret, i;
2505
2506	/ Non-zero func_info_cnt implies valid btf /
2507	if (!aux->func_info_cnt)
2508	return `0`;
2509	main_btf_id = aux->func_info[`0`].type_id;
2510
2511	t = btf_type_by_id(btf, type_id: main_btf_id);
2512	if (!t) {
2513	verbose(private_data: env, fmt: "invalid btf id for main subprog in func_info\n");
2514	return -EINVAL;
2515	}
2516
2517	name = btf_find_decl_tag_value(btf, pt: t, comp_idx: -`1`, tag_key: "exception_callback:");
2518	if (IS_ERR(ptr: name)) {
2519	ret = PTR_ERR(ptr: name);
2520	/ If there is no tag present, there is no exception callback /
2521	if (ret == -ENOENT)
2522	ret = `0`;
2523	else if (ret == -EEXIST)
2524	verbose(private_data: env, fmt: "multiple exception callback tags for main subprog\n");
2525	return ret;
2526	}
2527
2528	ret = btf_find_by_name_kind(btf, name, kind: BTF_KIND_FUNC);
2529	if (ret < `0`) {
2530	verbose(private_data: env, fmt: "exception callback '%s' could not be found in BTF\n", name);
2531	return ret;
2532	}
2533	id = ret;
2534	t = btf_type_by_id(btf, type_id: id);
2535	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2536	verbose(private_data: env, fmt: "exception callback '%s' must have global linkage\n", name);
2537	return -EINVAL;
2538	}
2539	ret = `0`;
2540	for (i = `0`; i < aux->func_info_cnt; i++) {
2541	if (aux->func_info[i].type_id != id)
2542	continue;
2543	ret = aux->func_info[i].insn_off;
2544	/ Further func_info and subprog checks will also happen*
2545	* later, so assume this is the right insn_off for now.
2546	*/
2547	if (!ret) {
2548	verbose(private_data: env, fmt: "invalid exception callback insn_off in func_info: 0\n");
2549	ret = -EINVAL;
2550	}
2551	}
2552	if (!ret) {
2553	verbose(private_data: env, fmt: "exception callback type id not found in func_info\n");
2554	ret = -EINVAL;
2555	}
2556	return ret;
2557	}
2558
2559	#define MAX_KFUNC_DESCS 256
2560	#define MAX_KFUNC_BTFS 256
2561
2562	struct bpf_kfunc_desc {
2563	struct btf_func_model func_model;
2564	u32 func_id;
2565	s32 imm;
2566	u16 offset;
2567	unsigned long addr;
2568	};
2569
2570	struct bpf_kfunc_btf {
2571	struct btf *btf;
2572	struct module *module;
2573	u16 offset;
2574	};
2575
2576	struct bpf_kfunc_desc_tab {
2577	/ Sorted by func_id (BTF ID) and offset (fd_array offset) during*
2578	* verification. JITs do lookups by bpf_insn, where func_id may not be
2579	* available, therefore at the end of verification do_misc_fixups()
2580	* sorts this by imm and offset.
2581	*/
2582	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2583	u32 nr_descs;
2584	};
2585
2586	struct bpf_kfunc_btf_tab {
2587	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2588	u32 nr_descs;
2589	};
2590
2591	static int kfunc_desc_cmp_by_id_off(const void a, const* void *b)
2592	{
2593	const struct bpf_kfunc_desc *d0 = a;
2594	const struct bpf_kfunc_desc *d1 = b;
2595
2596	/ func_id is not greater than BTF_MAX_TYPE /
2597	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2598	}
2599
2600	static int kfunc_btf_cmp_by_off(const void a, const* void *b)
2601	{
2602	const struct bpf_kfunc_btf *d0 = a;
2603	const struct bpf_kfunc_btf *d1 = b;
2604
2605	return d0->offset - d1->offset;
2606	}
2607
2608	static const struct bpf_kfunc_desc *
2609	find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2610	{
2611	struct bpf_kfunc_desc desc = {
2612	.func_id = func_id,
2613	.offset = offset,
2614	};
2615	struct bpf_kfunc_desc_tab *tab;
2616
2617	tab = prog->aux->kfunc_tab;
2618	return bsearch(key: &desc, base: tab->descs, num: tab->nr_descs,
2619	size: sizeof(tab->descs[`0`]), cmp: kfunc_desc_cmp_by_id_off);
2620	}
2621
2622	int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2623	u16 btf_fd_idx, u8 **func_addr)
2624	{
2625	const struct bpf_kfunc_desc *desc;
2626
2627	desc = find_kfunc_desc(prog, func_id, offset: btf_fd_idx);
2628	if (!desc)
2629	return -EFAULT;
2630
2631	func_addr = (u8 )desc->addr;
2632	return `0`;
2633	}
2634
2635	static struct btf __find_kfunc_desc_btf(struct* bpf_verifier_env *env,
2636	s16 offset)
2637	{
2638	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2639	struct bpf_kfunc_btf_tab *tab;
2640	struct bpf_kfunc_btf *b;
2641	struct module *mod;
2642	struct btf *btf;
2643	int btf_fd;
2644
2645	tab = env->prog->aux->kfunc_btf_tab;
2646	b = bsearch(key: &kf_btf, base: tab->descs, num: tab->nr_descs,
2647	size: sizeof(tab->descs[`0`]), cmp: kfunc_btf_cmp_by_off);
2648	if (!b) {
2649	if (tab->nr_descs == MAX_KFUNC_BTFS) {
2650	verbose(private_data: env, fmt: "too many different module BTFs\n");
2651	return ERR_PTR(error: -E2BIG);
2652	}
2653
2654	if (bpfptr_is_null(bpfptr: env->fd_array)) {
2655	verbose(private_data: env, fmt: "kfunc offset > 0 without fd_array is invalid\n");
2656	return ERR_PTR(error: -EPROTO);
2657	}
2658
2659	if (copy_from_bpfptr_offset(dst: &btf_fd, src: env->fd_array,
2660	offset: offset * sizeof(btf_fd),
2661	size: sizeof(btf_fd)))
2662	return ERR_PTR(error: -EFAULT);
2663
2664	btf = btf_get_by_fd(fd: btf_fd);
2665	if (IS_ERR(ptr: btf)) {
2666	verbose(private_data: env, fmt: "invalid module BTF fd specified\n");
2667	return btf;
2668	}
2669
2670	if (!btf_is_module(btf)) {
2671	verbose(private_data: env, fmt: "BTF fd for kfunc is not a module BTF\n");
2672	btf_put(btf);
2673	return ERR_PTR(error: -EINVAL);
2674	}
2675
2676	mod = btf_try_get_module(btf);
2677	if (!mod) {
2678	btf_put(btf);
2679	return ERR_PTR(error: -ENXIO);
2680	}
2681
2682	b = &tab->descs[tab->nr_descs++];
2683	b->btf = btf;
2684	b->module = mod;
2685	b->offset = offset;
2686
2687	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
2688	cmp_func: kfunc_btf_cmp_by_off, NULL);
2689	}
2690	return b->btf;
2691	}
2692
2693	void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2694	{
2695	if (!tab)
2696	return;
2697
2698	while (tab->nr_descs--) {
2699	module_put(module: tab->descs[tab->nr_descs].module);
2700	btf_put(btf: tab->descs[tab->nr_descs].btf);
2701	}
2702	kfree(objp: tab);
2703	}
2704
2705	static struct btf find_kfunc_desc_btf(struct* bpf_verifier_env *env, s16 offset)
2706	{
2707	if (offset) {
2708	if (offset < `0`) {
2709	/ In the future, this can be allowed to increase limit*
2710	* of fd index into fd_array, interpreted as u16.
2711	*/
2712	verbose(private_data: env, fmt: "negative offset disallowed for kernel module function call\n");
2713	return ERR_PTR(error: -EINVAL);
2714	}
2715
2716	return __find_kfunc_desc_btf(env, offset);
2717	}
2718	return btf_vmlinux ?: ERR_PTR(error: -ENOENT);
2719	}
2720
2721	static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2722	{
2723	const struct btf_type func, func_proto;
2724	struct bpf_kfunc_btf_tab *btf_tab;
2725	struct bpf_kfunc_desc_tab *tab;
2726	struct bpf_prog_aux *prog_aux;
2727	struct bpf_kfunc_desc *desc;
2728	const char *func_name;
2729	struct btf *desc_btf;
2730	unsigned long call_imm;
2731	unsigned long addr;
2732	int err;
2733
2734	prog_aux = env->prog->aux;
2735	tab = prog_aux->kfunc_tab;
2736	btf_tab = prog_aux->kfunc_btf_tab;
2737	if (!tab) {
2738	if (!btf_vmlinux) {
2739	verbose(private_data: env, fmt: "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2740	return -ENOTSUPP;
2741	}
2742
2743	if (!env->prog->jit_requested) {
2744	verbose(private_data: env, fmt: "JIT is required for calling kernel function\n");
2745	return -ENOTSUPP;
2746	}
2747
2748	if (!bpf_jit_supports_kfunc_call()) {
2749	verbose(private_data: env, fmt: "JIT does not support calling kernel function\n");
2750	return -ENOTSUPP;
2751	}
2752
2753	if (!env->prog->gpl_compatible) {
2754	verbose(private_data: env, fmt: "cannot call kernel function from non-GPL compatible program\n");
2755	return -EINVAL;
2756	}
2757
2758	tab = kzalloc(size: sizeof(*tab), GFP_KERNEL);
2759	if (!tab)
2760	return -ENOMEM;
2761	prog_aux->kfunc_tab = tab;
2762	}
2763
2764	/ func_id == 0 is always invalid, but instead of returning an error, be*
2765	* conservative and wait until the code elimination pass before returning
2766	* error, so that invalid calls that get pruned out can be in BPF programs
2767	* loaded from userspace. It is also required that offset be untouched
2768	* for such calls.
2769	*/
2770	if (!func_id && !offset)
2771	return `0`;
2772
2773	if (!btf_tab && offset) {
2774	btf_tab = kzalloc(size: sizeof(*btf_tab), GFP_KERNEL);
2775	if (!btf_tab)
2776	return -ENOMEM;
2777	prog_aux->kfunc_btf_tab = btf_tab;
2778	}
2779
2780	desc_btf = find_kfunc_desc_btf(env, offset);
2781	if (IS_ERR(ptr: desc_btf)) {
2782	verbose(private_data: env, fmt: "failed to find BTF for kernel function\n");
2783	return PTR_ERR(ptr: desc_btf);
2784	}
2785
2786	if (find_kfunc_desc(prog: env->prog, func_id, offset))
2787	return `0`;
2788
2789	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2790	verbose(private_data: env, fmt: "too many different kernel function calls\n");
2791	return -E2BIG;
2792	}
2793
2794	func = btf_type_by_id(btf: desc_btf, type_id: func_id);
2795	if (!func \|\| !btf_type_is_func(t: func)) {
2796	verbose(private_data: env, fmt: "kernel btf_id %u is not a function\n",
2797	func_id);
2798	return -EINVAL;
2799	}
2800	func_proto = btf_type_by_id(btf: desc_btf, type_id: func->type);
2801	if (!func_proto \|\| !btf_type_is_func_proto(t: func_proto)) {
2802	verbose(private_data: env, fmt: "kernel function btf_id %u does not have a valid func_proto\n",
2803	func_id);
2804	return -EINVAL;
2805	}
2806
2807	func_name = btf_name_by_offset(btf: desc_btf, offset: func->name_off);
2808	addr = kallsyms_lookup_name(name: func_name);
2809	if (!addr) {
2810	verbose(private_data: env, fmt: "cannot find address for kernel function %s\n",
2811	func_name);
2812	return -EINVAL;
2813	}
2814	specialize_kfunc(env, func_id, offset, addr: &addr);
2815
2816	if (bpf_jit_supports_far_kfunc_call()) {
2817	call_imm = func_id;
2818	} else {
2819	call_imm = BPF_CALL_IMM(addr);
2820	/ Check whether the relative offset overflows desc->imm /
2821	if ((unsigned long)(s32)call_imm != call_imm) {
2822	verbose(private_data: env, fmt: "address of kernel function %s is out of range\n",
2823	func_name);
2824	return -EINVAL;
2825	}
2826	}
2827
2828	if (bpf_dev_bound_kfunc_id(btf_id: func_id)) {
2829	err = bpf_dev_bound_kfunc_check(log: &env->log, prog_aux);
2830	if (err)
2831	return err;
2832	}
2833
2834	desc = &tab->descs[tab->nr_descs++];
2835	desc->func_id = func_id;
2836	desc->imm = call_imm;
2837	desc->offset = offset;
2838	desc->addr = addr;
2839	err = btf_distill_func_proto(log: &env->log, btf: desc_btf,
2840	func_proto, func_name,
2841	m: &desc->func_model);
2842	if (!err)
2843	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
2844	cmp_func: kfunc_desc_cmp_by_id_off, NULL);
2845	return err;
2846	}
2847
2848	static int kfunc_desc_cmp_by_imm_off(const void a, const* void *b)
2849	{
2850	const struct bpf_kfunc_desc *d0 = a;
2851	const struct bpf_kfunc_desc *d1 = b;
2852
2853	if (d0->imm != d1->imm)
2854	return d0->imm < d1->imm ? -`1` : `1`;
2855	if (d0->offset != d1->offset)
2856	return d0->offset < d1->offset ? -`1` : `1`;
2857	return `0`;
2858	}
2859
2860	static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
2861	{
2862	struct bpf_kfunc_desc_tab *tab;
2863
2864	tab = prog->aux->kfunc_tab;
2865	if (!tab)
2866	return;
2867
2868	sort(base: tab->descs, num: tab->nr_descs, size: sizeof(tab->descs[`0`]),
2869	cmp_func: kfunc_desc_cmp_by_imm_off, NULL);
2870	}
2871
2872	bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2873	{
2874	return !!prog->aux->kfunc_tab;
2875	}
2876
2877	const struct btf_func_model *
2878	bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2879	const struct bpf_insn *insn)
2880	{
2881	const struct bpf_kfunc_desc desc = {
2882	.imm = insn->imm,
2883	.offset = insn->off,
2884	};
2885	const struct bpf_kfunc_desc *res;
2886	struct bpf_kfunc_desc_tab *tab;
2887
2888	tab = prog->aux->kfunc_tab;
2889	res = bsearch(key: &desc, base: tab->descs, num: tab->nr_descs,
2890	size: sizeof(tab->descs[`0`]), cmp: kfunc_desc_cmp_by_imm_off);
2891
2892	return res ? &res->func_model : NULL;
2893	}
2894
2895	static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2896	{
2897	struct bpf_subprog_info *subprog = env->subprog_info;
2898	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2899	struct bpf_insn *insn = env->prog->insnsi;
2900
2901	/ Add entry function. /
2902	ret = add_subprog(env, off: `0`);
2903	if (ret)
2904	return ret;
2905
2906	for (i = `0`; i < insn_cnt; i++, insn++) {
2907	if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2908	!bpf_pseudo_kfunc_call(insn))
2909	continue;
2910
2911	if (!env->bpf_capable) {
2912	verbose(private_data: env, fmt: "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2913	return -EPERM;
2914	}
2915
2916	if (bpf_pseudo_func(insn) \|\| bpf_pseudo_call(insn))
2917	ret = add_subprog(env, off: i + insn->imm + `1`);
2918	else
2919	ret = add_kfunc_call(env, func_id: insn->imm, offset: insn->off);
2920
2921	if (ret < `0`)
2922	return ret;
2923	}
2924
2925	ret = bpf_find_exception_callback_insn_off(env);
2926	if (ret < `0`)
2927	return ret;
2928	ex_cb_insn = ret;
2929
2930	/ If ex_cb_insn > 0, this means that the main program has a subprog*
2931	* marked using BTF decl tag to serve as the exception callback.
2932	*/
2933	if (ex_cb_insn) {
2934	ret = add_subprog(env, off: ex_cb_insn);
2935	if (ret < `0`)
2936	return ret;
2937	for (i = `1`; i < env->subprog_cnt; i++) {
2938	if (env->subprog_info[i].start != ex_cb_insn)
2939	continue;
2940	env->exception_callback_subprog = i;
2941	mark_subprog_exc_cb(env, subprog: i);
2942	break;
2943	}
2944	}
2945
2946	/ Add a fake 'exit' subprog which could simplify subprog iteration*
2947	* logic. 'subprog_cnt' should not be increased.
2948	*/
2949	subprog[env->subprog_cnt].start = insn_cnt;
2950
2951	if (env->log.level & BPF_LOG_LEVEL2)
2952	for (i = `0`; i < env->subprog_cnt; i++)
2953	verbose(private_data: env, fmt: "func#%d @%d\n", i, subprog[i].start);
2954
2955	return `0`;
2956	}
2957
2958	static int check_subprogs(struct bpf_verifier_env *env)
2959	{
2960	int i, subprog_start, subprog_end, off, cur_subprog = `0`;
2961	struct bpf_subprog_info *subprog = env->subprog_info;
2962	struct bpf_insn *insn = env->prog->insnsi;
2963	int insn_cnt = env->prog->len;
2964
2965	/ now check that all jumps are within the same subprog /
2966	subprog_start = subprog[cur_subprog].start;
2967	subprog_end = subprog[cur_subprog + `1`].start;
2968	for (i = `0`; i < insn_cnt; i++) {
2969	u8 code = insn[i].code;
2970
2971	if (code == (BPF_JMP \| BPF_CALL) &&
2972	insn[i].src_reg == `0` &&
2973	insn[i].imm == BPF_FUNC_tail_call)
2974	subprog[cur_subprog].has_tail_call = true;
2975	if (BPF_CLASS(code) == BPF_LD &&
2976	(BPF_MODE(code) == BPF_ABS \|\| BPF_MODE(code) == BPF_IND))
2977	subprog[cur_subprog].has_ld_abs = true;
2978	if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2979	goto next;
2980	if (BPF_OP(code) == BPF_EXIT \|\| BPF_OP(code) == BPF_CALL)
2981	goto next;
2982	if (code == (BPF_JMP32 \| BPF_JA))
2983	off = i + insn[i].imm + `1`;
2984	else
2985	off = i + insn[i].off + `1`;
2986	if (off < subprog_start \|\| off >= subprog_end) {
2987	verbose(private_data: env, fmt: "jump out of range from insn %d to %d\n", i, off);
2988	return -EINVAL;
2989	}
2990	next:
2991	if (i == subprog_end - `1`) {
2992	/ to avoid fall-through from one subprog into another*
2993	* the last insn of the subprog should be either exit
2994	* or unconditional jump back or bpf_throw call
2995	*/
2996	if (code != (BPF_JMP \| BPF_EXIT) &&
2997	code != (BPF_JMP32 \| BPF_JA) &&
2998	code != (BPF_JMP \| BPF_JA)) {
2999	verbose(private_data: env, fmt: "last insn is not an exit or jmp\n");
3000	return -EINVAL;
3001	}
3002	subprog_start = subprog_end;
3003	cur_subprog++;
3004	if (cur_subprog < env->subprog_cnt)
3005	subprog_end = subprog[cur_subprog + `1`].start;
3006	}
3007	}
3008	return `0`;
3009	}
3010
3011	/ Parentage chain of this register (or stack slot) should take care of all*
3012	* issues like callee-saved registers, stack slot allocation time, etc.
3013	*/
3014	static int mark_reg_read(struct bpf_verifier_env *env,
3015	const struct bpf_reg_state *state,
3016	struct bpf_reg_state *parent, u8 flag)
3017	{
3018	bool writes = parent == state->parent; / Observe write marks /
3019	int cnt = `0`;
3020
3021	while (parent) {
3022	/ if read wasn't screened by an earlier write ... /
3023	if (writes && state->live & REG_LIVE_WRITTEN)
3024	break;
3025	if (parent->live & REG_LIVE_DONE) {
3026	verbose(private_data: env, fmt: "verifier BUG type %s var_off %lld off %d\n",
3027	reg_type_str(env, type: parent->type),
3028	parent->var_off.value, parent->off);
3029	return -EFAULT;
3030	}
3031	/ The first condition is more likely to be true than the*
3032	* second, checked it first.
3033	*/
3034	if ((parent->live & REG_LIVE_READ) == flag \|\|
3035	parent->live & REG_LIVE_READ64)
3036	/ The parentage chain never changes and*
3037	* this parent was already marked as LIVE_READ.
3038	* There is no need to keep walking the chain again and
3039	* keep re-marking all parents as LIVE_READ.
3040	* This case happens when the same register is read
3041	* multiple times without writes into it in-between.
3042	* Also, if parent has the stronger REG_LIVE_READ64 set,
3043	* then no need to set the weak REG_LIVE_READ32.
3044	*/
3045	break;
3046	/ ... then we depend on parent's value /
3047	parent->live \|= flag;
3048	/ REG_LIVE_READ64 overrides REG_LIVE_READ32. /
3049	if (flag == REG_LIVE_READ64)
3050	parent->live &= ~REG_LIVE_READ32;
3051	state = parent;
3052	parent = state->parent;
3053	writes = true;
3054	cnt++;
3055	}
3056
3057	if (env->longest_mark_read_walk < cnt)
3058	env->longest_mark_read_walk = cnt;
3059	return `0`;
3060	}
3061
3062	static int mark_dynptr_read(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
3063	{
3064	struct bpf_func_state *state = func(env, reg);
3065	int spi, ret;
3066
3067	/ For CONST_PTR_TO_DYNPTR, it must have already been done by*
3068	* check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3069	* check_kfunc_call.
3070	*/
3071	if (reg->type == CONST_PTR_TO_DYNPTR)
3072	return `0`;
3073	spi = dynptr_get_spi(env, reg);
3074	if (spi < `0`)
3075	return spi;
3076	/ Caller ensures dynptr is valid and initialized, which means spi is in*
3077	* bounds and spi is the first dynptr slot. Simply mark stack slot as
3078	* read.
3079	*/
3080	ret = mark_reg_read(env, state: &state->stack[spi].spilled_ptr,
3081	parent: state->stack[spi].spilled_ptr.parent, flag: REG_LIVE_READ64);
3082	if (ret)
3083	return ret;
3084	return mark_reg_read(env, state: &state->stack[spi - `1`].spilled_ptr,
3085	parent: state->stack[spi - `1`].spilled_ptr.parent, flag: REG_LIVE_READ64);
3086	}
3087
3088	static int mark_iter_read(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
3089	int spi, int nr_slots)
3090	{
3091	struct bpf_func_state *state = func(env, reg);
3092	int err, i;
3093
3094	for (i = `0`; i < nr_slots; i++) {
3095	struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3096
3097	err = mark_reg_read(env, state: st, parent: st->parent, flag: REG_LIVE_READ64);
3098	if (err)
3099	return err;
3100
3101	mark_stack_slot_scratched(env, spi: spi - i);
3102	}
3103
3104	return `0`;
3105	}
3106
3107	/ This function is supposed to be used by the following 32-bit optimization*
3108	* code only. It returns TRUE if the source or destination register operates
3109	* on 64-bit, otherwise return FALSE.
3110	*/
3111	static bool is_reg64(struct bpf_verifier_env env, struct* bpf_insn *insn,
3112	u32 regno, struct bpf_reg_state reg, enum* reg_arg_type t)
3113	{
3114	u8 code, class, op;
3115
3116	code = insn->code;
3117	class = BPF_CLASS(code);
3118	op = BPF_OP(code);
3119	if (class == BPF_JMP) {
3120	/ BPF_EXIT for "main" will reach here. Return TRUE*
3121	* conservatively.
3122	*/
3123	if (op == BPF_EXIT)
3124	return true;
3125	if (op == BPF_CALL) {
3126	/ BPF to BPF call will reach here because of marking*
3127	* caller saved clobber with DST_OP_NO_MARK for which we
3128	* don't care the register def because they are anyway
3129	* marked as NOT_INIT already.
3130	*/
3131	if (insn->src_reg == BPF_PSEUDO_CALL)
3132	return false;
3133	/ Helper call will reach here because of arg type*
3134	* check, conservatively return TRUE.
3135	*/
3136	if (t == SRC_OP)
3137	return true;
3138
3139	return false;
3140	}
3141	}
3142
3143	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == `16` \|\| insn->imm == `32`))
3144	return false;
3145
3146	if (class == BPF_ALU64 \|\| class == BPF_JMP \|\|
3147	(class == BPF_ALU && op == BPF_END && insn->imm == `64`))
3148	return true;
3149
3150	if (class == BPF_ALU \|\| class == BPF_JMP32)
3151	return false;
3152
3153	if (class == BPF_LDX) {
3154	if (t != SRC_OP)
3155	return BPF_SIZE(code) == BPF_DW \|\| BPF_MODE(code) == BPF_MEMSX;
3156	/ LDX source must be ptr. /
3157	return true;
3158	}
3159
3160	if (class == BPF_STX) {
3161	/ BPF_STX (including atomic variants) has multiple source*
3162	* operands, one of which is a ptr. Check whether the caller is
3163	* asking about it.
3164	*/
3165	if (t == SRC_OP && reg->type != SCALAR_VALUE)
3166	return true;
3167	return BPF_SIZE(code) == BPF_DW;
3168	}
3169
3170	if (class == BPF_LD) {
3171	u8 mode = BPF_MODE(code);
3172
3173	/ LD_IMM64 /
3174	if (mode == BPF_IMM)
3175	return true;
3176
3177	/ Both LD_IND and LD_ABS return 32-bit data. /
3178	if (t != SRC_OP)
3179	return false;
3180
3181	/ Implicit ctx ptr. /
3182	if (regno == BPF_REG_6)
3183	return true;
3184
3185	/ Explicit source could be any width. /
3186	return true;
3187	}
3188
3189	if (class == BPF_ST)
3190	/ The only source register for BPF_ST is a ptr. /
3191	return true;
3192
3193	/ Conservatively return true at default. /
3194	return true;
3195	}
3196
3197	/ Return the regno defined by the insn, or -1. /
3198	static int insn_def_regno(const struct bpf_insn *insn)
3199	{
3200	switch (BPF_CLASS(insn->code)) {
3201	case BPF_JMP:
3202	case BPF_JMP32:
3203	case BPF_ST:
3204	return -`1`;
3205	case BPF_STX:
3206	if (BPF_MODE(insn->code) == BPF_ATOMIC &&
3207	(insn->imm & BPF_FETCH)) {
3208	if (insn->imm == BPF_CMPXCHG)
3209	return BPF_REG_0;
3210	else
3211	return insn->src_reg;
3212	} else {
3213	return -`1`;
3214	}
3215	default:
3216	return insn->dst_reg;
3217	}
3218	}
3219
3220	/ Return TRUE if INSN has defined any 32-bit value explicitly. /
3221	static bool insn_has_def32(struct bpf_verifier_env env, struct* bpf_insn *insn)
3222	{
3223	int dst_reg = insn_def_regno(insn);
3224
3225	if (dst_reg == -`1`)
3226	return false;
3227
3228	return !is_reg64(env, insn, regno: dst_reg, NULL, t: DST_OP);
3229	}
3230
3231	static void mark_insn_zext(struct bpf_verifier_env *env,
3232	struct bpf_reg_state *reg)
3233	{
3234	s32 def_idx = reg->subreg_def;
3235
3236	if (def_idx == DEF_NOT_SUBREG)
3237	return;
3238
3239	env->insn_aux_data[def_idx - `1`].zext_dst = true;
3240	/ The dst will be zero extended, so won't be sub-register anymore. /
3241	reg->subreg_def = DEF_NOT_SUBREG;
3242	}
3243
3244	static int __check_reg_arg(struct bpf_verifier_env env, struct* bpf_reg_state *regs, u32 regno,
3245	enum reg_arg_type t)
3246	{
3247	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3248	struct bpf_reg_state *reg;
3249	bool rw64;
3250
3251	if (regno >= MAX_BPF_REG) {
3252	verbose(private_data: env, fmt: "R%d is invalid\n", regno);
3253	return -EINVAL;
3254	}
3255
3256	mark_reg_scratched(env, regno);
3257
3258	reg = &regs[regno];
3259	rw64 = is_reg64(env, insn, regno, reg, t);
3260	if (t == SRC_OP) {
3261	/ check whether register used as source operand can be read /
3262	if (reg->type == NOT_INIT) {
3263	verbose(private_data: env, fmt: "R%d !read_ok\n", regno);
3264	return -EACCES;
3265	}
3266	/ We don't need to worry about FP liveness because it's read-only /
3267	if (regno == BPF_REG_FP)
3268	return `0`;
3269
3270	if (rw64)
3271	mark_insn_zext(env, reg);
3272
3273	return mark_reg_read(env, state: reg, parent: reg->parent,
3274	flag: rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3275	} else {
3276	/ check whether register used as dest operand can be written to /
3277	if (regno == BPF_REG_FP) {
3278	verbose(private_data: env, fmt: "frame pointer is read only\n");
3279	return -EACCES;
3280	}
3281	reg->live \|= REG_LIVE_WRITTEN;
3282	reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + `1`;
3283	if (t == DST_OP)
3284	mark_reg_unknown(env, regs, regno);
3285	}
3286	return `0`;
3287	}
3288
3289	static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3290	enum reg_arg_type t)
3291	{
3292	struct bpf_verifier_state *vstate = env->cur_state;
3293	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3294
3295	return __check_reg_arg(env, regs: state->regs, regno, t);
3296	}
3297
3298	static int insn_stack_access_flags(int frameno, int spi)
3299	{
3300	return INSN_F_STACK_ACCESS \| (spi << INSN_F_SPI_SHIFT) \| frameno;
3301	}
3302
3303	static int insn_stack_access_spi(int insn_flags)
3304	{
3305	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
3306	}
3307
3308	static int insn_stack_access_frameno(int insn_flags)
3309	{
3310	return insn_flags & INSN_F_FRAMENO_MASK;
3311	}
3312
3313	static void mark_jmp_point(struct bpf_verifier_env env, int* idx)
3314	{
3315	env->insn_aux_data[idx].jmp_point = true;
3316	}
3317
3318	static bool is_jmp_point(struct bpf_verifier_env env, int* insn_idx)
3319	{
3320	return env->insn_aux_data[insn_idx].jmp_point;
3321	}
3322
3323	/ for any branch, call, exit record the history of jmps in the given state /
3324	static int push_jmp_history(struct bpf_verifier_env env, struct* bpf_verifier_state *cur,
3325	int insn_flags)
3326	{
3327	u32 cnt = cur->jmp_history_cnt;
3328	struct bpf_jmp_history_entry *p;
3329	size_t alloc_size;
3330
3331	/ combine instruction flags if we already recorded this instruction /
3332	if (env->cur_hist_ent) {
3333	/ atomic instructions push insn_flags twice, for READ and*
3334	* WRITE sides, but they should agree on stack slot
3335	*/
3336	WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
3337	(env->cur_hist_ent->flags & insn_flags) != insn_flags,
3338	"verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
3339	env->insn_idx, env->cur_hist_ent->flags, insn_flags);
3340	env->cur_hist_ent->flags \|= insn_flags;
3341	return `0`;
3342	}
3343
3344	cnt++;
3345	alloc_size = kmalloc_size_roundup(size: size_mul(factor1: cnt, factor2: sizeof(*p)));
3346	p = krealloc(objp: cur->jmp_history, new_size: alloc_size, GFP_USER);
3347	if (!p)
3348	return -ENOMEM;
3349	cur->jmp_history = p;
3350
3351	p = &cur->jmp_history[cnt - `1`];
3352	p->idx = env->insn_idx;
3353	p->prev_idx = env->prev_insn_idx;
3354	p->flags = insn_flags;
3355	cur->jmp_history_cnt = cnt;
3356	env->cur_hist_ent = p;
3357
3358	return `0`;
3359	}
3360
3361	static struct bpf_jmp_history_entry get_jmp_hist_entry(struct* bpf_verifier_state *st,
3362	u32 hist_end, int insn_idx)
3363	{
3364	if (hist_end > `0` && st->jmp_history[hist_end - `1`].idx == insn_idx)
3365	return &st->jmp_history[hist_end - `1`];
3366	return NULL;
3367	}
3368
3369	/ Backtrack one insn at a time. If idx is not at the top of recorded*
3370	* history then previous instruction came from straight line execution.
3371	* Return -ENOENT if we exhausted all instructions within given state.
3372	*
3373	* It's legal to have a bit of a looping with the same starting and ending
3374	* insn index within the same state, e.g.: 3->4->5->3, so just because current
3375	* instruction index is the same as state's first_idx doesn't mean we are
3376	* done. If there is still some jump history left, we should keep going. We
3377	* need to take into account that we might have a jump history between given
3378	* state's parent and itself, due to checkpointing. In this case, we'll have
3379	* history entry recording a jump from last instruction of parent state and
3380	* first instruction of given state.
3381	*/
3382	static int get_prev_insn_idx(struct bpf_verifier_state st, int* i,
3383	u32 *history)
3384	{
3385	u32 cnt = *history;
3386
3387	if (i == st->first_insn_idx) {
3388	if (cnt == `0`)
3389	return -ENOENT;
3390	if (cnt == `1` && st->jmp_history[`0`].idx == i)
3391	return -ENOENT;
3392	}
3393
3394	if (cnt && st->jmp_history[cnt - `1`].idx == i) {
3395	i = st->jmp_history[cnt - `1`].prev_idx;
3396	(*history)--;
3397	} else {
3398	i--;
3399	}
3400	return i;
3401	}
3402
3403	static const char disasm_kfunc_name(void* data, const* struct bpf_insn *insn)
3404	{
3405	const struct btf_type *func;
3406	struct btf *desc_btf;
3407
3408	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3409	return NULL;
3410
3411	desc_btf = find_kfunc_desc_btf(env: data, offset: insn->off);
3412	if (IS_ERR(ptr: desc_btf))
3413	return "<error>";
3414
3415	func = btf_type_by_id(btf: desc_btf, type_id: insn->imm);
3416	return btf_name_by_offset(btf: desc_btf, offset: func->name_off);
3417	}
3418
3419	static inline void bt_init(struct backtrack_state *bt, u32 frame)
3420	{
3421	bt->frame = frame;
3422	}
3423
3424	static inline void bt_reset(struct backtrack_state *bt)
3425	{
3426	struct bpf_verifier_env *env = bt->env;
3427
3428	memset(bt, `0`, sizeof(*bt));
3429	bt->env = env;
3430	}
3431
3432	static inline u32 bt_empty(struct backtrack_state *bt)
3433	{
3434	u64 mask = `0`;
3435	int i;
3436
3437	for (i = `0`; i <= bt->frame; i++)
3438	mask \|= bt->reg_masks[i] \| bt->stack_masks[i];
3439
3440	return mask == `0`;
3441	}
3442
3443	static inline int bt_subprog_enter(struct backtrack_state *bt)
3444	{
3445	if (bt->frame == MAX_CALL_FRAMES - `1`) {
3446	verbose(private_data: bt->env, fmt: "BUG subprog enter from frame %d\n", bt->frame);
3447	WARN_ONCE(`1`, "verifier backtracking bug");
3448	return -EFAULT;
3449	}
3450	bt->frame++;
3451	return `0`;
3452	}
3453
3454	static inline int bt_subprog_exit(struct backtrack_state *bt)
3455	{
3456	if (bt->frame == `0`) {
3457	verbose(private_data: bt->env, fmt: "BUG subprog exit from frame 0\n");
3458	WARN_ONCE(`1`, "verifier backtracking bug");
3459	return -EFAULT;
3460	}
3461	bt->frame--;
3462	return `0`;
3463	}
3464
3465	static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3466	{
3467	bt->reg_masks[frame] \|= `1` << reg;
3468	}
3469
3470	static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3471	{
3472	bt->reg_masks[frame] &= ~(`1` << reg);
3473	}
3474
3475	static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3476	{
3477	bt_set_frame_reg(bt, frame: bt->frame, reg);
3478	}
3479
3480	static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3481	{
3482	bt_clear_frame_reg(bt, frame: bt->frame, reg);
3483	}
3484
3485	static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3486	{
3487	bt->stack_masks[frame] \|= `1ull` << slot;
3488	}
3489
3490	static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3491	{
3492	bt->stack_masks[frame] &= ~(`1ull` << slot);
3493	}
3494
3495	static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3496	{
3497	return bt->reg_masks[frame];
3498	}
3499
3500	static inline u32 bt_reg_mask(struct backtrack_state *bt)
3501	{
3502	return bt->reg_masks[bt->frame];
3503	}
3504
3505	static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3506	{
3507	return bt->stack_masks[frame];
3508	}
3509
3510	static inline u64 bt_stack_mask(struct backtrack_state *bt)
3511	{
3512	return bt->stack_masks[bt->frame];
3513	}
3514
3515	static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3516	{
3517	return bt->reg_masks[bt->frame] & (`1` << reg);
3518	}
3519
3520	static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
3521	{
3522	return bt->stack_masks[frame] & (`1ull` << slot);
3523	}
3524
3525	/ format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask /
3526	static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3527	{
3528	DECLARE_BITMAP(mask, `64`);
3529	bool first = true;
3530	int i, n;
3531
3532	buf[`0`] = `'\0'`;
3533
3534	bitmap_from_u64(dst: mask, mask: reg_mask);
3535	for_each_set_bit(i, mask, `32`) {
3536	n = snprintf(buf, size: buf_sz, fmt: "%sr%d", first ? "" : ",", i);
3537	first = false;
3538	buf += n;
3539	buf_sz -= n;
3540	if (buf_sz < `0`)
3541	break;
3542	}
3543	}
3544	/ format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask /
3545	static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3546	{
3547	DECLARE_BITMAP(mask, `64`);
3548	bool first = true;
3549	int i, n;
3550
3551	buf[`0`] = `'\0'`;
3552
3553	bitmap_from_u64(dst: mask, mask: stack_mask);
3554	for_each_set_bit(i, mask, `64`) {
3555	n = snprintf(buf, size: buf_sz, fmt: "%s%d", first ? "" : ",", -(i + `1`) * `8`);
3556	first = false;
3557	buf += n;
3558	buf_sz -= n;
3559	if (buf_sz < `0`)
3560	break;
3561	}
3562	}
3563
3564	static bool calls_callback(struct bpf_verifier_env env, int* insn_idx);
3565
3566	/ For given verifier state backtrack_insn() is called from the last insn to*
3567	* the first insn. Its purpose is to compute a bitmask of registers and
3568	* stack slots that needs precision in the parent verifier state.
3569	*
3570	* @idx is an index of the instruction we are currently processing;
3571	* @subseq_idx is an index of the subsequent instruction that:
3572	* - would be executed next, if jump history is viewed in forward order;
3573	* - was processed previously during backtracking.
3574	*/
3575	static int backtrack_insn(struct bpf_verifier_env env, int* idx, int subseq_idx,
3576	struct bpf_jmp_history_entry hist, struct* backtrack_state *bt)
3577	{
3578	const struct bpf_insn_cbs cbs = {
3579	.cb_call = disasm_kfunc_name,
3580	.cb_print = verbose,
3581	.private_data = env,
3582	};
3583	struct bpf_insn *insn = env->prog->insnsi + idx;
3584	u8 class = BPF_CLASS(insn->code);
3585	u8 opcode = BPF_OP(insn->code);
3586	u8 mode = BPF_MODE(insn->code);
3587	u32 dreg = insn->dst_reg;
3588	u32 sreg = insn->src_reg;
3589	u32 spi, i, fr;
3590
3591	if (insn->code == `0`)
3592	return `0`;
3593	if (env->log.level & BPF_LOG_LEVEL2) {
3594	fmt_reg_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN, reg_mask: bt_reg_mask(bt));
3595	verbose(private_data: env, fmt: "mark_precise: frame%d: regs=%s ",
3596	bt->frame, env->tmp_str_buf);
3597	fmt_stack_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN, stack_mask: bt_stack_mask(bt));
3598	verbose(private_data: env, fmt: "stack=%s before ", env->tmp_str_buf);
3599	verbose(private_data: env, fmt: "%d: ", idx);
3600	print_bpf_insn(cbs: &cbs, insn, allow_ptr_leaks: env->allow_ptr_leaks);
3601	}
3602
3603	if (class == BPF_ALU \|\| class == BPF_ALU64) {
3604	if (!bt_is_reg_set(bt, reg: dreg))
3605	return `0`;
3606	if (opcode == BPF_END \|\| opcode == BPF_NEG) {
3607	/ sreg is reserved and unused*
3608	* dreg still need precision before this insn
3609	*/
3610	return `0`;
3611	} else if (opcode == BPF_MOV) {
3612	if (BPF_SRC(insn->code) == BPF_X) {
3613	/ dreg = sreg or dreg = (s8, s16, s32)sreg*
3614	* dreg needs precision after this insn
3615	* sreg needs precision before this insn
3616	*/
3617	bt_clear_reg(bt, reg: dreg);
3618	bt_set_reg(bt, reg: sreg);
3619	} else {
3620	/ dreg = K*
3621	* dreg needs precision after this insn.
3622	* Corresponding register is already marked
3623	* as precise=true in this verifier state.
3624	* No further markings in parent are necessary
3625	*/
3626	bt_clear_reg(bt, reg: dreg);
3627	}
3628	} else {
3629	if (BPF_SRC(insn->code) == BPF_X) {
3630	/ dreg += sreg*
3631	* both dreg and sreg need precision
3632	* before this insn
3633	*/
3634	bt_set_reg(bt, reg: sreg);
3635	} / else dreg += K*
3636	* dreg still needs precision before this insn
3637	*/
3638	}
3639	} else if (class == BPF_LDX) {
3640	if (!bt_is_reg_set(bt, reg: dreg))
3641	return `0`;
3642	bt_clear_reg(bt, reg: dreg);
3643
3644	/ scalars can only be spilled into stack w/o losing precision.*
3645	* Load from any other memory can be zero extended.
3646	* The desire to keep that precision is already indicated
3647	* by 'precise' mark in corresponding register of this state.
3648	* No further tracking necessary.
3649	*/
3650	if (!hist \|\| !(hist->flags & INSN_F_STACK_ACCESS))
3651	return `0`;
3652	/ dreg = (u64 )[fp - off] was a fill from the stack.*
3653	* that [fp - off] slot contains scalar that needs to be
3654	* tracked with precision
3655	*/
3656	spi = insn_stack_access_spi(insn_flags: hist->flags);
3657	fr = insn_stack_access_frameno(insn_flags: hist->flags);
3658	bt_set_frame_slot(bt, frame: fr, slot: spi);
3659	} else if (class == BPF_STX \|\| class == BPF_ST) {
3660	if (bt_is_reg_set(bt, reg: dreg))
3661	/ stx & st shouldn't be using _scalar_ dst_reg*
3662	* to access memory. It means backtracking
3663	* encountered a case of pointer subtraction.
3664	*/
3665	return -ENOTSUPP;
3666	/ scalars can only be spilled into stack /
3667	if (!hist \|\| !(hist->flags & INSN_F_STACK_ACCESS))
3668	return `0`;
3669	spi = insn_stack_access_spi(insn_flags: hist->flags);
3670	fr = insn_stack_access_frameno(insn_flags: hist->flags);
3671	if (!bt_is_frame_slot_set(bt, frame: fr, slot: spi))
3672	return `0`;
3673	bt_clear_frame_slot(bt, frame: fr, slot: spi);
3674	if (class == BPF_STX)
3675	bt_set_reg(bt, reg: sreg);
3676	} else if (class == BPF_JMP \|\| class == BPF_JMP32) {
3677	if (bpf_pseudo_call(insn)) {
3678	int subprog_insn_idx, subprog;
3679
3680	subprog_insn_idx = idx + insn->imm + `1`;
3681	subprog = find_subprog(env, off: subprog_insn_idx);
3682	if (subprog < `0`)
3683	return -EFAULT;
3684
3685	if (subprog_is_global(env, subprog)) {
3686	/ check that jump history doesn't have any*
3687	* extra instructions from subprog; the next
3688	* instruction after call to global subprog
3689	* should be literally next instruction in
3690	* caller program
3691	*/
3692	WARN_ONCE(idx + `1` != subseq_idx, "verifier backtracking bug");
3693	/ r1-r5 are invalidated after subprog call,*
3694	* so for global func call it shouldn't be set
3695	* anymore
3696	*/
3697	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3698	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3699	WARN_ONCE(`1`, "verifier backtracking bug");
3700	return -EFAULT;
3701	}
3702	/ global subprog always sets R0 /
3703	bt_clear_reg(bt, reg: BPF_REG_0);
3704	return `0`;
3705	} else {
3706	/ static subprog call instruction, which*
3707	* means that we are exiting current subprog,
3708	* so only r1-r5 could be still requested as
3709	* precise, r0 and r6-r10 or any stack slot in
3710	* the current frame should be zero by now
3711	*/
3712	if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3713	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3714	WARN_ONCE(`1`, "verifier backtracking bug");
3715	return -EFAULT;
3716	}
3717	/ we are now tracking register spills correctly,*
3718	* so any instance of leftover slots is a bug
3719	*/
3720	if (bt_stack_mask(bt) != `0`) {
3721	verbose(private_data: env, fmt: "BUG stack slots %llx\n", bt_stack_mask(bt));
3722	WARN_ONCE(`1`, "verifier backtracking bug (subprog leftover stack slots)");
3723	return -EFAULT;
3724	}
3725	/ propagate r1-r5 to the caller /
3726	for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
3727	if (bt_is_reg_set(bt, reg: i)) {
3728	bt_clear_reg(bt, reg: i);
3729	bt_set_frame_reg(bt, frame: bt->frame - `1`, reg: i);
3730	}
3731	}
3732	if (bt_subprog_exit(bt))
3733	return -EFAULT;
3734	return `0`;
3735	}
3736	} else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - `1`) {
3737	/ exit from callback subprog to callback-calling helper or*
3738	* kfunc call. Use idx/subseq_idx check to discern it from
3739	* straight line code backtracking.
3740	* Unlike the subprog call handling above, we shouldn't
3741	* propagate precision of r1-r5 (if any requested), as they are
3742	* not actually arguments passed directly to callback subprogs
3743	*/
3744	if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3745	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3746	WARN_ONCE(`1`, "verifier backtracking bug");
3747	return -EFAULT;
3748	}
3749	if (bt_stack_mask(bt) != `0`) {
3750	verbose(private_data: env, fmt: "BUG stack slots %llx\n", bt_stack_mask(bt));
3751	WARN_ONCE(`1`, "verifier backtracking bug (callback leftover stack slots)");
3752	return -EFAULT;
3753	}
3754	/ clear r1-r5 in callback subprog's mask /
3755	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3756	bt_clear_reg(bt, reg: i);
3757	if (bt_subprog_exit(bt))
3758	return -EFAULT;
3759	return `0`;
3760	} else if (opcode == BPF_CALL) {
3761	/ kfunc with imm==0 is invalid and fixup_kfunc_call will*
3762	* catch this error later. Make backtracking conservative
3763	* with ENOTSUPP.
3764	*/
3765	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == `0`)
3766	return -ENOTSUPP;
3767	/ regular helper call sets R0 /
3768	bt_clear_reg(bt, reg: BPF_REG_0);
3769	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3770	/ if backtracing was looking for registers R1-R5*
3771	* they should have been found already.
3772	*/
3773	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3774	WARN_ONCE(`1`, "verifier backtracking bug");
3775	return -EFAULT;
3776	}
3777	} else if (opcode == BPF_EXIT) {
3778	bool r0_precise;
3779
3780	/ Backtracking to a nested function call, 'idx' is a part of*
3781	* the inner frame 'subseq_idx' is a part of the outer frame.
3782	* In case of a regular function call, instructions giving
3783	* precision to registers R1-R5 should have been found already.
3784	* In case of a callback, it is ok to have R1-R5 marked for
3785	* backtracking, as these registers are set by the function
3786	* invoking callback.
3787	*/
3788	if (subseq_idx >= `0` && calls_callback(env, insn_idx: subseq_idx))
3789	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3790	bt_clear_reg(bt, reg: i);
3791	if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3792	verbose(private_data: env, fmt: "BUG regs %x\n", bt_reg_mask(bt));
3793	WARN_ONCE(`1`, "verifier backtracking bug");
3794	return -EFAULT;
3795	}
3796
3797	/ BPF_EXIT in subprog or callback always returns*
3798	* right after the call instruction, so by checking
3799	* whether the instruction at subseq_idx-1 is subprog
3800	* call or not we can distinguish actual exit from
3801	* subprog from exit from callback. In the former
3802	* case, we need to propagate r0 precision, if
3803	* necessary. In the former we never do that.
3804	*/
3805	r0_precise = subseq_idx - `1` >= `0` &&
3806	bpf_pseudo_call(insn: &env->prog->insnsi[subseq_idx - `1`]) &&
3807	bt_is_reg_set(bt, reg: BPF_REG_0);
3808
3809	bt_clear_reg(bt, reg: BPF_REG_0);
3810	if (bt_subprog_enter(bt))
3811	return -EFAULT;
3812
3813	if (r0_precise)
3814	bt_set_reg(bt, reg: BPF_REG_0);
3815	/ r6-r9 and stack slots will stay set in caller frame*
3816	* bitmasks until we return back from callee(s)
3817	*/
3818	return `0`;
3819	} else if (BPF_SRC(insn->code) == BPF_X) {
3820	if (!bt_is_reg_set(bt, reg: dreg) && !bt_is_reg_set(bt, reg: sreg))
3821	return `0`;
3822	/ dreg <cond> sreg*
3823	* Both dreg and sreg need precision before
3824	* this insn. If only sreg was marked precise
3825	* before it would be equally necessary to
3826	* propagate it to dreg.
3827	*/
3828	bt_set_reg(bt, reg: dreg);
3829	bt_set_reg(bt, reg: sreg);
3830	/ else dreg <cond> K*
3831	* Only dreg still needs precision before
3832	* this insn, so for the K-based conditional
3833	* there is nothing new to be marked.
3834	*/
3835	}
3836	} else if (class == BPF_LD) {
3837	if (!bt_is_reg_set(bt, reg: dreg))
3838	return `0`;
3839	bt_clear_reg(bt, reg: dreg);
3840	/ It's ld_imm64 or ld_abs or ld_ind.*
3841	* For ld_imm64 no further tracking of precision
3842	* into parent is necessary
3843	*/
3844	if (mode == BPF_IND \|\| mode == BPF_ABS)
3845	/ to be analyzed /
3846	return -ENOTSUPP;
3847	}
3848	return `0`;
3849	}
3850
3851	/ the scalar precision tracking algorithm:*
3852	* . at the start all registers have precise=false.
3853	* . scalar ranges are tracked as normal through alu and jmp insns.
3854	* . once precise value of the scalar register is used in:
3855	* . ptr + scalar alu
3856	* . if (scalar cond K\|scalar)
3857	* . helper_call(.., scalar, ...) where ARG_CONST is expected
3858	* backtrack through the verifier states and mark all registers and
3859	* stack slots with spilled constants that these scalar regisers
3860	* should be precise.
3861	* . during state pruning two registers (or spilled stack slots)
3862	* are equivalent if both are not precise.
3863	*
3864	* Note the verifier cannot simply walk register parentage chain,
3865	* since many different registers and stack slots could have been
3866	* used to compute single precise scalar.
3867	*
3868	* The approach of starting with precise=true for all registers and then
3869	* backtrack to mark a register as not precise when the verifier detects
3870	* that program doesn't care about specific value (e.g., when helper
3871	* takes register as ARG_ANYTHING parameter) is not safe.
3872	*
3873	* It's ok to walk single parentage chain of the verifier states.
3874	* It's possible that this backtracking will go all the way till 1st insn.
3875	* All other branches will be explored for needing precision later.
3876	*
3877	* The backtracking needs to deal with cases like:
3878	* R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3879	* r9 -= r8
3880	* r5 = r9
3881	* if r5 > 0x79f goto pc+7
3882	* R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3883	* r5 += 1
3884	* ...
3885	* call bpf_perf_event_output#25
3886	* where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3887	*
3888	* and this case:
3889	* r6 = 1
3890	* call foo // uses callee's r6 inside to compute r0
3891	* r0 += r6
3892	* if r0 == 0 goto
3893	*
3894	* to track above reg_mask/stack_mask needs to be independent for each frame.
3895	*
3896	* Also if parent's curframe > frame where backtracking started,
3897	* the verifier need to mark registers in both frames, otherwise callees
3898	* may incorrectly prune callers. This is similar to
3899	* commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3900	*
3901	* For now backtracking falls back into conservative marking.
3902	*/
3903	static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3904	struct bpf_verifier_state *st)
3905	{
3906	struct bpf_func_state *func;
3907	struct bpf_reg_state *reg;
3908	int i, j;
3909
3910	if (env->log.level & BPF_LOG_LEVEL2) {
3911	verbose(private_data: env, fmt: "mark_precise: frame%d: falling back to forcing all scalars precise\n",
3912	st->curframe);
3913	}
3914
3915	/ big hammer: mark all scalars precise in this path.*
3916	* pop_stack may still get !precise scalars.
3917	* We also skip current state and go straight to first parent state,
3918	* because precision markings in current non-checkpointed state are
3919	* not needed. See why in the comment in __mark_chain_precision below.
3920	*/
3921	for (st = st->parent; st; st = st->parent) {
3922	for (i = `0`; i <= st->curframe; i++) {
3923	func = st->frame[i];
3924	for (j = `0`; j < BPF_REG_FP; j++) {
3925	reg = &func->regs[j];
3926	if (reg->type != SCALAR_VALUE \|\| reg->precise)
3927	continue;
3928	reg->precise = true;
3929	if (env->log.level & BPF_LOG_LEVEL2) {
3930	verbose(private_data: env, fmt: "force_precise: frame%d: forcing r%d to be precise\n",
3931	i, j);
3932	}
3933	}
3934	for (j = `0`; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3935	if (!is_spilled_reg(stack: &func->stack[j]))
3936	continue;
3937	reg = &func->stack[j].spilled_ptr;
3938	if (reg->type != SCALAR_VALUE \|\| reg->precise)
3939	continue;
3940	reg->precise = true;
3941	if (env->log.level & BPF_LOG_LEVEL2) {
3942	verbose(private_data: env, fmt: "force_precise: frame%d: forcing fp%d to be precise\n",
3943	i, -(j + `1`) * `8`);
3944	}
3945	}
3946	}
3947	}
3948	}
3949
3950	static void mark_all_scalars_imprecise(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
3951	{
3952	struct bpf_func_state *func;
3953	struct bpf_reg_state *reg;
3954	int i, j;
3955
3956	for (i = `0`; i <= st->curframe; i++) {
3957	func = st->frame[i];
3958	for (j = `0`; j < BPF_REG_FP; j++) {
3959	reg = &func->regs[j];
3960	if (reg->type != SCALAR_VALUE)
3961	continue;
3962	reg->precise = false;
3963	}
3964	for (j = `0`; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3965	if (!is_spilled_reg(stack: &func->stack[j]))
3966	continue;
3967	reg = &func->stack[j].spilled_ptr;
3968	if (reg->type != SCALAR_VALUE)
3969	continue;
3970	reg->precise = false;
3971	}
3972	}
3973	}
3974
3975	static bool idset_contains(struct bpf_idset *s, u32 id)
3976	{
3977	u32 i;
3978
3979	for (i = `0`; i < s->count; ++i)
3980	if (s->ids[i] == id)
3981	return true;
3982
3983	return false;
3984	}
3985
3986	static int idset_push(struct bpf_idset *s, u32 id)
3987	{
3988	if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
3989	return -EFAULT;
3990	s->ids[s->count++] = id;
3991	return `0`;
3992	}
3993
3994	static void idset_reset(struct bpf_idset *s)
3995	{
3996	s->count = `0`;
3997	}
3998
3999	/ Collect a set of IDs for all registers currently marked as precise in env->bt.*
4000	* Mark all registers with these IDs as precise.
4001	*/
4002	static int mark_precise_scalar_ids(struct bpf_verifier_env env, struct* bpf_verifier_state *st)
4003	{
4004	struct bpf_idset *precise_ids = &env->idset_scratch;
4005	struct backtrack_state *bt = &env->bt;
4006	struct bpf_func_state *func;
4007	struct bpf_reg_state *reg;
4008	DECLARE_BITMAP(mask, `64`);
4009	int i, fr;
4010
4011	idset_reset(s: precise_ids);
4012
4013	for (fr = bt->frame; fr >= `0`; fr--) {
4014	func = st->frame[fr];
4015
4016	bitmap_from_u64(dst: mask, mask: bt_frame_reg_mask(bt, frame: fr));
4017	for_each_set_bit(i, mask, `32`) {
4018	reg = &func->regs[i];
4019	if (!reg->id \|\| reg->type != SCALAR_VALUE)
4020	continue;
4021	if (idset_push(s: precise_ids, id: reg->id))
4022	return -EFAULT;
4023	}
4024
4025	bitmap_from_u64(dst: mask, mask: bt_frame_stack_mask(bt, frame: fr));
4026	for_each_set_bit(i, mask, `64`) {
4027	if (i >= func->allocated_stack / BPF_REG_SIZE)
4028	break;
4029	if (!is_spilled_scalar_reg(stack: &func->stack[i]))
4030	continue;
4031	reg = &func->stack[i].spilled_ptr;
4032	if (!reg->id)
4033	continue;
4034	if (idset_push(s: precise_ids, id: reg->id))
4035	return -EFAULT;
4036	}
4037	}
4038
4039	for (fr = `0`; fr <= st->curframe; ++fr) {
4040	func = st->frame[fr];
4041
4042	for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
4043	reg = &func->regs[i];
4044	if (!reg->id)
4045	continue;
4046	if (!idset_contains(s: precise_ids, id: reg->id))
4047	continue;
4048	bt_set_frame_reg(bt, frame: fr, reg: i);
4049	}
4050	for (i = `0`; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
4051	if (!is_spilled_scalar_reg(stack: &func->stack[i]))
4052	continue;
4053	reg = &func->stack[i].spilled_ptr;
4054	if (!reg->id)
4055	continue;
4056	if (!idset_contains(s: precise_ids, id: reg->id))
4057	continue;
4058	bt_set_frame_slot(bt, frame: fr, slot: i);
4059	}
4060	}
4061
4062	return `0`;
4063	}
4064
4065	/*
4066	* __mark_chain_precision() backtracks BPF program instruction sequence and
4067	* chain of verifier states making sure that register regno (if regno >= 0)
4068	* and/or stack slot spi (if spi >= 0) are marked as precisely tracked
4069	* SCALARS, as well as any other registers and slots that contribute to
4070	* a tracked state of given registers/stack slots, depending on specific BPF
4071	* assembly instructions (see backtrack_insns() for exact instruction handling
4072	* logic). This backtracking relies on recorded jmp_history and is able to
4073	* traverse entire chain of parent states. This process ends only when all the
4074	* necessary registers/slots and their transitive dependencies are marked as
4075	* precise.
4076	*
4077	* One important and subtle aspect is that precise marks do not matter in
4078	* the currently verified state (current state). It is important to understand
4079	* why this is the case.
4080	*
4081	* First, note that current state is the state that is not yet "checkpointed",
4082	* i.e., it is not yet put into env->explored_states, and it has no children
4083	* states as well. It's ephemeral, and can end up either a) being discarded if
4084	* compatible explored state is found at some point or BPF_EXIT instruction is
4085	* reached or b) checkpointed and put into env->explored_states, branching out
4086	* into one or more children states.
4087	*
4088	* In the former case, precise markings in current state are completely
4089	* ignored by state comparison code (see regsafe() for details). Only
4090	* checkpointed ("old") state precise markings are important, and if old
4091	* state's register/slot is precise, regsafe() assumes current state's
4092	* register/slot as precise and checks value ranges exactly and precisely. If
4093	* states turn out to be compatible, current state's necessary precise
4094	* markings and any required parent states' precise markings are enforced
4095	* after the fact with propagate_precision() logic, after the fact. But it's
4096	* important to realize that in this case, even after marking current state
4097	* registers/slots as precise, we immediately discard current state. So what
4098	* actually matters is any of the precise markings propagated into current
4099	* state's parent states, which are always checkpointed (due to b) case above).
4100	* As such, for scenario a) it doesn't matter if current state has precise
4101	* markings set or not.
4102	*
4103	* Now, for the scenario b), checkpointing and forking into child(ren)
4104	* state(s). Note that before current state gets to checkpointing step, any
4105	* processed instruction always assumes precise SCALAR register/slot
4106	* knowledge: if precise value or range is useful to prune jump branch, BPF
4107	* verifier takes this opportunity enthusiastically. Similarly, when
4108	* register's value is used to calculate offset or memory address, exact
4109	* knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4110	* what we mentioned above about state comparison ignoring precise markings
4111	* during state comparison, BPF verifier ignores and also assumes precise
4112	* markings at will during instruction verification process. But as verifier
4113	* assumes precision, it also propagates any precision dependencies across
4114	* parent states, which are not yet finalized, so can be further restricted
4115	* based on new knowledge gained from restrictions enforced by their children
4116	* states. This is so that once those parent states are finalized, i.e., when
4117	* they have no more active children state, state comparison logic in
4118	* is_state_visited() would enforce strict and precise SCALAR ranges, if
4119	* required for correctness.
4120	*
4121	* To build a bit more intuition, note also that once a state is checkpointed,
4122	* the path we took to get to that state is not important. This is crucial
4123	* property for state pruning. When state is checkpointed and finalized at
4124	* some instruction index, it can be correctly and safely used to "short
4125	* circuit" any compatible state that reaches exactly the same instruction
4126	* index. I.e., if we jumped to that instruction from a completely different
4127	* code path than original finalized state was derived from, it doesn't
4128	* matter, current state can be discarded because from that instruction
4129	* forward having a compatible state will ensure we will safely reach the
4130	* exit. States describe preconditions for further exploration, but completely
4131	* forget the history of how we got here.
4132	*
4133	* This also means that even if we needed precise SCALAR range to get to
4134	* finalized state, but from that point forward that same SCALAR register is
4135	* never used in a precise context (i.e., it's precise value is not needed for
4136	* correctness), it's correct and safe to mark such register as "imprecise"
4137	* (i.e., precise marking set to false). This is what we rely on when we do
4138	* not set precise marking in current state. If no child state requires
4139	* precision for any given SCALAR register, it's safe to dictate that it can
4140	* be imprecise. If any child state does require this register to be precise,
4141	* we'll mark it precise later retroactively during precise markings
4142	* propagation from child state to parent states.
4143	*
4144	* Skipping precise marking setting in current state is a mild version of
4145	* relying on the above observation. But we can utilize this property even
4146	* more aggressively by proactively forgetting any precise marking in the
4147	* current state (which we inherited from the parent state), right before we
4148	* checkpoint it and branch off into new child state. This is done by
4149	* mark_all_scalars_imprecise() to hopefully get more permissive and generic
4150	* finalized states which help in short circuiting more future states.
4151	*/
4152	static int __mark_chain_precision(struct bpf_verifier_env env, int* regno)
4153	{
4154	struct backtrack_state *bt = &env->bt;
4155	struct bpf_verifier_state *st = env->cur_state;
4156	int first_idx = st->first_insn_idx;
4157	int last_idx = env->insn_idx;
4158	int subseq_idx = -`1`;
4159	struct bpf_func_state *func;
4160	struct bpf_reg_state *reg;
4161	bool skip_first = true;
4162	int i, fr, err;
4163
4164	if (!env->bpf_capable)
4165	return `0`;
4166
4167	/ set frame number from which we are starting to backtrack /
4168	bt_init(bt, frame: env->cur_state->curframe);
4169
4170	/ Do sanity checks against current state of register and/or stack*
4171	* slot, but don't set precise flag in current state, as precision
4172	* tracking in the current state is unnecessary.
4173	*/
4174	func = st->frame[bt->frame];
4175	if (regno >= `0`) {
4176	reg = &func->regs[regno];
4177	if (reg->type != SCALAR_VALUE) {
4178	WARN_ONCE(`1`, "backtracing misuse");
4179	return -EFAULT;
4180	}
4181	bt_set_reg(bt, reg: regno);
4182	}
4183
4184	if (bt_empty(bt))
4185	return `0`;
4186
4187	for (;;) {
4188	DECLARE_BITMAP(mask, `64`);
4189	u32 history = st->jmp_history_cnt;
4190	struct bpf_jmp_history_entry *hist;
4191
4192	if (env->log.level & BPF_LOG_LEVEL2) {
4193	verbose(private_data: env, fmt: "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4194	bt->frame, last_idx, first_idx, subseq_idx);
4195	}
4196
4197	/ If some register with scalar ID is marked as precise,*
4198	* make sure that all registers sharing this ID are also precise.
4199	* This is needed to estimate effect of find_equal_scalars().
4200	* Do this at the last instruction of each state,
4201	* bpf_reg_state::id fields are valid for these instructions.
4202	*
4203	* Allows to track precision in situation like below:
4204	*
4205	* r2 = unknown value
4206	* ...
4207	* --- state #0 ---
4208	* ...
4209	* r1 = r2 // r1 and r2 now share the same ID
4210	* ...
4211	* --- state #1 {r1.id = A, r2.id = A} ---
4212	* ...
4213	* if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4214	* ...
4215	* --- state #2 {r1.id = A, r2.id = A} ---
4216	* r3 = r10
4217	* r3 += r1 // need to mark both r1 and r2
4218	*/
4219	if (mark_precise_scalar_ids(env, st))
4220	return -EFAULT;
4221
4222	if (last_idx < `0`) {
4223	/ we are at the entry into subprog, which*
4224	* is expected for global funcs, but only if
4225	* requested precise registers are R1-R5
4226	* (which are global func's input arguments)
4227	*/
4228	if (st->curframe == `0` &&
4229	st->frame[`0`]->subprogno > `0` &&
4230	st->frame[`0`]->callsite == BPF_MAIN_FUNC &&
4231	bt_stack_mask(bt) == `0` &&
4232	(bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == `0`) {
4233	bitmap_from_u64(dst: mask, mask: bt_reg_mask(bt));
4234	for_each_set_bit(i, mask, `32`) {
4235	reg = &st->frame[`0`]->regs[i];
4236	bt_clear_reg(bt, reg: i);
4237	if (reg->type == SCALAR_VALUE)
4238	reg->precise = true;
4239	}
4240	return `0`;
4241	}
4242
4243	verbose(private_data: env, fmt: "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4244	st->frame[`0`]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4245	WARN_ONCE(`1`, "verifier backtracking bug");
4246	return -EFAULT;
4247	}
4248
4249	for (i = last_idx;;) {
4250	if (skip_first) {
4251	err = `0`;
4252	skip_first = false;
4253	} else {
4254	hist = get_jmp_hist_entry(st, hist_end: history, insn_idx: i);
4255	err = backtrack_insn(env, idx: i, subseq_idx, hist, bt);
4256	}
4257	if (err == -ENOTSUPP) {
4258	mark_all_scalars_precise(env, st: env->cur_state);
4259	bt_reset(bt);
4260	return `0`;
4261	} else if (err) {
4262	return err;
4263	}
4264	if (bt_empty(bt))
4265	/ Found assignment(s) into tracked register in this state.*
4266	* Since this state is already marked, just return.
4267	* Nothing to be tracked further in the parent state.
4268	*/
4269	return `0`;
4270	subseq_idx = i;
4271	i = get_prev_insn_idx(st, i, history: &history);
4272	if (i == -ENOENT)
4273	break;
4274	if (i >= env->prog->len) {
4275	/ This can happen if backtracking reached insn 0*
4276	* and there are still reg_mask or stack_mask
4277	* to backtrack.
4278	* It means the backtracking missed the spot where
4279	* particular register was initialized with a constant.
4280	*/
4281	verbose(private_data: env, fmt: "BUG backtracking idx %d\n", i);
4282	WARN_ONCE(`1`, "verifier backtracking bug");
4283	return -EFAULT;
4284	}
4285	}
4286	st = st->parent;
4287	if (!st)
4288	break;
4289
4290	for (fr = bt->frame; fr >= `0`; fr--) {
4291	func = st->frame[fr];
4292	bitmap_from_u64(dst: mask, mask: bt_frame_reg_mask(bt, frame: fr));
4293	for_each_set_bit(i, mask, `32`) {
4294	reg = &func->regs[i];
4295	if (reg->type != SCALAR_VALUE) {
4296	bt_clear_frame_reg(bt, frame: fr, reg: i);
4297	continue;
4298	}
4299	if (reg->precise)
4300	bt_clear_frame_reg(bt, frame: fr, reg: i);
4301	else
4302	reg->precise = true;
4303	}
4304
4305	bitmap_from_u64(dst: mask, mask: bt_frame_stack_mask(bt, frame: fr));
4306	for_each_set_bit(i, mask, `64`) {
4307	if (i >= func->allocated_stack / BPF_REG_SIZE) {
4308	verbose(private_data: env, fmt: "BUG backtracking (stack slot %d, total slots %d)\n",
4309	i, func->allocated_stack / BPF_REG_SIZE);
4310	WARN_ONCE(`1`, "verifier backtracking bug (stack slot out of bounds)");
4311	return -EFAULT;
4312	}
4313
4314	if (!is_spilled_scalar_reg(stack: &func->stack[i])) {
4315	bt_clear_frame_slot(bt, frame: fr, slot: i);
4316	continue;
4317	}
4318	reg = &func->stack[i].spilled_ptr;
4319	if (reg->precise)
4320	bt_clear_frame_slot(bt, frame: fr, slot: i);
4321	else
4322	reg->precise = true;
4323	}
4324	if (env->log.level & BPF_LOG_LEVEL2) {
4325	fmt_reg_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN,
4326	reg_mask: bt_frame_reg_mask(bt, frame: fr));
4327	verbose(private_data: env, fmt: "mark_precise: frame%d: parent state regs=%s ",
4328	fr, env->tmp_str_buf);
4329	fmt_stack_mask(buf: env->tmp_str_buf, TMP_STR_BUF_LEN,
4330	stack_mask: bt_frame_stack_mask(bt, frame: fr));
4331	verbose(private_data: env, fmt: "stack=%s: ", env->tmp_str_buf);
4332	print_verifier_state(env, state: func, print_all: true);
4333	}
4334	}
4335
4336	if (bt_empty(bt))
4337	return `0`;
4338
4339	subseq_idx = first_idx;
4340	last_idx = st->last_insn_idx;
4341	first_idx = st->first_insn_idx;
4342	}
4343
4344	/ if we still have requested precise regs or slots, we missed*
4345	* something (e.g., stack access through non-r10 register), so
4346	* fallback to marking all precise
4347	*/
4348	if (!bt_empty(bt)) {
4349	mark_all_scalars_precise(env, st: env->cur_state);
4350	bt_reset(bt);
4351	}
4352
4353	return `0`;
4354	}
4355
4356	int mark_chain_precision(struct bpf_verifier_env env, int* regno)
4357	{
4358	return __mark_chain_precision(env, regno);
4359	}
4360
4361	/ mark_chain_precision_batch() assumes that env->bt is set in the caller to*
4362	* desired reg and stack masks across all relevant frames
4363	*/
4364	static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4365	{
4366	return __mark_chain_precision(env, regno: -`1`);
4367	}
4368
4369	static bool is_spillable_regtype(enum bpf_reg_type type)
4370	{
4371	switch (base_type(type)) {
4372	case PTR_TO_MAP_VALUE:
4373	case PTR_TO_STACK:
4374	case PTR_TO_CTX:
4375	case PTR_TO_PACKET:
4376	case PTR_TO_PACKET_META:
4377	case PTR_TO_PACKET_END:
4378	case PTR_TO_FLOW_KEYS:
4379	case CONST_PTR_TO_MAP:
4380	case PTR_TO_SOCKET:
4381	case PTR_TO_SOCK_COMMON:
4382	case PTR_TO_TCP_SOCK:
4383	case PTR_TO_XDP_SOCK:
4384	case PTR_TO_BTF_ID:
4385	case PTR_TO_BUF:
4386	case PTR_TO_MEM:
4387	case PTR_TO_FUNC:
4388	case PTR_TO_MAP_KEY:
4389	case PTR_TO_ARENA:
4390	return true;
4391	default:
4392	return false;
4393	}
4394	}
4395
4396	/ Does this register contain a constant zero? /
4397	static bool register_is_null(struct bpf_reg_state *reg)
4398	{
4399	return reg->type == SCALAR_VALUE && tnum_equals_const(a: reg->var_off, b: `0`);
4400	}
4401
4402	/ check if register is a constant scalar value /
4403	static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
4404	{
4405	return reg->type == SCALAR_VALUE &&
4406	tnum_is_const(a: subreg32 ? tnum_subreg(a: reg->var_off) : reg->var_off);
4407	}
4408
4409	/ assuming is_reg_const() is true, return constant value of a register /
4410	static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
4411	{
4412	return subreg32 ? tnum_subreg(a: reg->var_off).value : reg->var_off.value;
4413	}
4414
4415	static bool __is_pointer_value(bool allow_ptr_leaks,
4416	const struct bpf_reg_state *reg)
4417	{
4418	if (allow_ptr_leaks)
4419	return false;
4420
4421	return reg->type != SCALAR_VALUE;
4422	}
4423
4424	static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
4425	struct bpf_reg_state *src_reg)
4426	{
4427	if (src_reg->type == SCALAR_VALUE && !src_reg->id &&
4428	!tnum_is_const(a: src_reg->var_off))
4429	/ Ensure that src_reg has a valid ID that will be copied to*
4430	* dst_reg and then will be used by find_equal_scalars() to
4431	* propagate min/max range.
4432	*/
4433	src_reg->id = ++env->id_gen;
4434	}
4435
4436	/ Copy src state preserving dst->parent and dst->live fields /
4437	static void copy_register_state(struct bpf_reg_state dst, const* struct bpf_reg_state *src)
4438	{
4439	struct bpf_reg_state *parent = dst->parent;
4440	enum bpf_reg_liveness live = dst->live;
4441
4442	dst = src;
4443	dst->parent = parent;
4444	dst->live = live;
4445	}
4446
4447	static void save_register_state(struct bpf_verifier_env *env,
4448	struct bpf_func_state *state,
4449	int spi, struct bpf_reg_state *reg,
4450	int size)
4451	{
4452	int i;
4453
4454	copy_register_state(dst: &state->stack[spi].spilled_ptr, src: reg);
4455	if (size == BPF_REG_SIZE)
4456	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
4457
4458	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4459	state->stack[spi].slot_type[i - `1`] = STACK_SPILL;
4460
4461	/ size < 8 bytes spill /
4462	for (; i; i--)
4463	mark_stack_slot_misc(env, stype: &state->stack[spi].slot_type[i - `1`]);
4464	}
4465
4466	static bool is_bpf_st_mem(struct bpf_insn *insn)
4467	{
4468	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4469	}
4470
4471	static int get_reg_width(struct bpf_reg_state *reg)
4472	{
4473	return fls64(x: reg->umax_value);
4474	}
4475
4476	/ check_stack_{read,write}_fixed_off functions track spill/fill of registers,*
4477	* stack boundary and alignment are checked in check_mem_access()
4478	*/
4479	static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4480	/ stack frame we're writing to /
4481	struct bpf_func_state *state,
4482	int off, int size, int value_regno,
4483	int insn_idx)
4484	{
4485	struct bpf_func_state cur; /* state of the current function /
4486	int i, slot = -off - `1`, spi = slot / BPF_REG_SIZE, err;
4487	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4488	struct bpf_reg_state *reg = NULL;
4489	int insn_flags = insn_stack_access_flags(frameno: state->frameno, spi);
4490
4491	/ caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,*
4492	* so it's aligned access and [off, off + size) are within stack limits
4493	*/
4494	if (!env->allow_ptr_leaks &&
4495	is_spilled_reg(stack: &state->stack[spi]) &&
4496	size != BPF_REG_SIZE) {
4497	verbose(private_data: env, fmt: "attempt to corrupt spilled pointer on stack\n");
4498	return -EACCES;
4499	}
4500
4501	cur = env->cur_state->frame[env->cur_state->curframe];
4502	if (value_regno >= `0`)
4503	reg = &cur->regs[value_regno];
4504	if (!env->bypass_spec_v4) {
4505	bool sanitize = reg && is_spillable_regtype(type: reg->type);
4506
4507	for (i = `0`; i < size; i++) {
4508	u8 type = state->stack[spi].slot_type[i];
4509
4510	if (type != STACK_MISC && type != STACK_ZERO) {
4511	sanitize = true;
4512	break;
4513	}
4514	}
4515
4516	if (sanitize)
4517	env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4518	}
4519
4520	err = destroy_if_dynptr_stack_slot(env, state, spi);
4521	if (err)
4522	return err;
4523
4524	mark_stack_slot_scratched(env, spi);
4525	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
4526	bool reg_value_fits;
4527
4528	reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
4529	/ Make sure that reg had an ID to build a relation on spill. /
4530	if (reg_value_fits)
4531	assign_scalar_id_before_mov(env, src_reg: reg);
4532	save_register_state(env, state, spi, reg, size);
4533	/ Break the relation on a narrowing spill. /
4534	if (!reg_value_fits)
4535	state->stack[spi].spilled_ptr.id = `0`;
4536	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4537	env->bpf_capable) {
4538	struct bpf_reg_state fake_reg = {};
4539
4540	__mark_reg_known(reg: &fake_reg, imm: insn->imm);
4541	fake_reg.type = SCALAR_VALUE;
4542	save_register_state(env, state, spi, reg: &fake_reg, size);
4543	} else if (reg && is_spillable_regtype(type: reg->type)) {
4544	/ register containing pointer is being spilled into stack /
4545	if (size != BPF_REG_SIZE) {
4546	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
4547	verbose(private_data: env, fmt: "invalid size of register spill\n");
4548	return -EACCES;
4549	}
4550	if (state != cur && reg->type == PTR_TO_STACK) {
4551	verbose(private_data: env, fmt: "cannot spill pointers to stack into stack frame of the caller\n");
4552	return -EINVAL;
4553	}
4554	save_register_state(env, state, spi, reg, size);
4555	} else {
4556	u8 type = STACK_MISC;
4557
4558	/ regular write of data into stack destroys any spilled ptr /
4559	state->stack[spi].spilled_ptr.type = NOT_INIT;
4560	/ Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. /
4561	if (is_stack_slot_special(stack: &state->stack[spi]))
4562	for (i = `0`; i < BPF_REG_SIZE; i++)
4563	scrub_spilled_slot(stype: &state->stack[spi].slot_type[i]);
4564
4565	/ only mark the slot as written if all 8 bytes were written*
4566	* otherwise read propagation may incorrectly stop too soon
4567	* when stack slots are partially written.
4568	* This heuristic means that read propagation will be
4569	* conservative, since it will add reg_live_read marks
4570	* to stack slots all the way to first state when programs
4571	* writes+reads less than 8 bytes
4572	*/
4573	if (size == BPF_REG_SIZE)
4574	state->stack[spi].spilled_ptr.live \|= REG_LIVE_WRITTEN;
4575
4576	/ when we zero initialize stack slots mark them as such /
4577	if ((reg && register_is_null(reg)) \|\|
4578	(!reg && is_bpf_st_mem(insn) && insn->imm == `0`)) {
4579	/ STACK_ZERO case happened because register spill*
4580	* wasn't properly aligned at the stack slot boundary,
4581	* so it's not a register spill anymore; force
4582	* originating register to be precise to make
4583	* STACK_ZERO correct for subsequent states
4584	*/
4585	err = mark_chain_precision(env, regno: value_regno);
4586	if (err)
4587	return err;
4588	type = STACK_ZERO;
4589	}
4590
4591	/ Mark slots affected by this stack write. /
4592	for (i = `0`; i < size; i++)
4593	state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
4594	insn_flags = `0`; / not a register spill /
4595	}
4596
4597	if (insn_flags)
4598	return push_jmp_history(env, cur: env->cur_state, insn_flags);
4599	return `0`;
4600	}
4601
4602	/ Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is*
4603	* known to contain a variable offset.
4604	* This function checks whether the write is permitted and conservatively
4605	* tracks the effects of the write, considering that each stack slot in the
4606	* dynamic range is potentially written to.
4607	*
4608	* 'off' includes 'regno->off'.
4609	* 'value_regno' can be -1, meaning that an unknown value is being written to
4610	* the stack.
4611	*
4612	* Spilled pointers in range are not marked as written because we don't know
4613	* what's going to be actually written. This means that read propagation for
4614	* future reads cannot be terminated by this write.
4615	*
4616	* For privileged programs, uninitialized stack slots are considered
4617	* initialized by this write (even though we don't know exactly what offsets
4618	* are going to be written to). The idea is that we don't want the verifier to
4619	* reject future reads that access slots written to through variable offsets.
4620	*/
4621	static int check_stack_write_var_off(struct bpf_verifier_env *env,
4622	/ func where register points to /
4623	struct bpf_func_state *state,
4624	int ptr_regno, int off, int size,
4625	int value_regno, int insn_idx)
4626	{
4627	struct bpf_func_state cur; /* state of the current function /
4628	int min_off, max_off;
4629	int i, err;
4630	struct bpf_reg_state ptr_reg = NULL, value_reg = NULL;
4631	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4632	bool writing_zero = false;
4633	/ set if the fact that we're writing a zero is used to let any*
4634	* stack slots remain STACK_ZERO
4635	*/
4636	bool zero_used = false;
4637
4638	cur = env->cur_state->frame[env->cur_state->curframe];
4639	ptr_reg = &cur->regs[ptr_regno];
4640	min_off = ptr_reg->smin_value + off;
4641	max_off = ptr_reg->smax_value + off + size;
4642	if (value_regno >= `0`)
4643	value_reg = &cur->regs[value_regno];
4644	if ((value_reg && register_is_null(reg: value_reg)) \|\|
4645	(!value_reg && is_bpf_st_mem(insn) && insn->imm == `0`))
4646	writing_zero = true;
4647
4648	for (i = min_off; i < max_off; i++) {
4649	int spi;
4650
4651	spi = __get_spi(off: i);
4652	err = destroy_if_dynptr_stack_slot(env, state, spi);
4653	if (err)
4654	return err;
4655	}
4656
4657	/ Variable offset writes destroy any spilled pointers in range. /
4658	for (i = min_off; i < max_off; i++) {
4659	u8 new_type, *stype;
4660	int slot, spi;
4661
4662	slot = -i - `1`;
4663	spi = slot / BPF_REG_SIZE;
4664	stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4665	mark_stack_slot_scratched(env, spi);
4666
4667	if (!env->allow_ptr_leaks && stype != STACK_MISC && stype != STACK_ZERO) {
4668	/ Reject the write if range we may write to has not*
4669	* been initialized beforehand. If we didn't reject
4670	* here, the ptr status would be erased below (even
4671	* though not all slots are actually overwritten),
4672	* possibly opening the door to leaks.
4673	*
4674	* We do however catch STACK_INVALID case below, and
4675	* only allow reading possibly uninitialized memory
4676	* later for CAP_PERFMON, as the write may not happen to
4677	* that slot.
4678	*/
4679	verbose(private_data: env, fmt: "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4680	insn_idx, i);
4681	return -EINVAL;
4682	}
4683
4684	/ If writing_zero and the spi slot contains a spill of value 0,*
4685	* maintain the spill type.
4686	*/
4687	if (writing_zero && *stype == STACK_SPILL &&
4688	is_spilled_scalar_reg(stack: &state->stack[spi])) {
4689	struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
4690
4691	if (tnum_is_const(a: spill_reg->var_off) && spill_reg->var_off.value == `0`) {
4692	zero_used = true;
4693	continue;
4694	}
4695	}
4696
4697	/ Erase all other spilled pointers. /
4698	state->stack[spi].spilled_ptr.type = NOT_INIT;
4699
4700	/ Update the slot type. /
4701	new_type = STACK_MISC;
4702	if (writing_zero && *stype == STACK_ZERO) {
4703	new_type = STACK_ZERO;
4704	zero_used = true;
4705	}
4706	/ If the slot is STACK_INVALID, we check whether it's OK to*
4707	* pretend that it will be initialized by this write. The slot
4708	* might not actually be written to, and so if we mark it as
4709	* initialized future reads might leak uninitialized memory.
4710	* For privileged programs, we will accept such reads to slots
4711	* that may or may not be written because, if we're reject
4712	* them, the error would be too confusing.
4713	*/
4714	if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
4715	verbose(private_data: env, fmt: "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4716	insn_idx, i);
4717	return -EINVAL;
4718	}
4719	*stype = new_type;
4720	}
4721	if (zero_used) {
4722	/ backtracking doesn't work for STACK_ZERO yet. /
4723	err = mark_chain_precision(env, regno: value_regno);
4724	if (err)
4725	return err;
4726	}
4727	return `0`;
4728	}
4729
4730	/ When register 'dst_regno' is assigned some values from stack[min_off,*
4731	* max_off), we set the register's type according to the types of the
4732	* respective stack slots. If all the stack values are known to be zeros, then
4733	* so is the destination reg. Otherwise, the register is considered to be
4734	* SCALAR. This function does not deal with register filling; the caller must
4735	* ensure that all spilled registers in the stack range have been marked as
4736	* read.
4737	*/
4738	static void mark_reg_stack_read(struct bpf_verifier_env *env,
4739	/ func where src register points to /
4740	struct bpf_func_state *ptr_state,
4741	int min_off, int max_off, int dst_regno)
4742	{
4743	struct bpf_verifier_state *vstate = env->cur_state;
4744	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4745	int i, slot, spi;
4746	u8 *stype;
4747	int zeros = `0`;
4748
4749	for (i = min_off; i < max_off; i++) {
4750	slot = -i - `1`;
4751	spi = slot / BPF_REG_SIZE;
4752	mark_stack_slot_scratched(env, spi);
4753	stype = ptr_state->stack[spi].slot_type;
4754	if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4755	break;
4756	zeros++;
4757	}
4758	if (zeros == max_off - min_off) {
4759	/ Any access_size read into register is zero extended,*
4760	* so the whole register == const_zero.
4761	*/
4762	__mark_reg_const_zero(env, reg: &state->regs[dst_regno]);
4763	} else {
4764	/ have read misc data from the stack /
4765	mark_reg_unknown(env, regs: state->regs, regno: dst_regno);
4766	}
4767	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4768	}
4769
4770	/ Read the stack at 'off' and put the results into the register indicated by*
4771	* 'dst_regno'. It handles reg filling if the addressed stack slot is a
4772	* spilled reg.
4773	*
4774	* 'dst_regno' can be -1, meaning that the read value is not going to a
4775	* register.
4776	*
4777	* The access is assumed to be within the current stack bounds.
4778	*/
4779	static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4780	/ func where src register points to /
4781	struct bpf_func_state *reg_state,
4782	int off, int size, int dst_regno)
4783	{
4784	struct bpf_verifier_state *vstate = env->cur_state;
4785	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4786	int i, slot = -off - `1`, spi = slot / BPF_REG_SIZE;
4787	struct bpf_reg_state *reg;
4788	u8 *stype, type;
4789	int insn_flags = insn_stack_access_flags(frameno: reg_state->frameno, spi);
4790
4791	stype = reg_state->stack[spi].slot_type;
4792	reg = &reg_state->stack[spi].spilled_ptr;
4793
4794	mark_stack_slot_scratched(env, spi);
4795
4796	if (is_spilled_reg(stack: &reg_state->stack[spi])) {
4797	u8 spill_size = `1`;
4798
4799	for (i = BPF_REG_SIZE - `1`; i > `0` && stype[i - `1`] == STACK_SPILL; i--)
4800	spill_size++;
4801
4802	if (size != BPF_REG_SIZE \|\| spill_size != BPF_REG_SIZE) {
4803	if (reg->type != SCALAR_VALUE) {
4804	verbose_linfo(env, insn_off: env->insn_idx, prefix_fmt: "; ");
4805	verbose(private_data: env, fmt: "invalid size of register fill\n");
4806	return -EACCES;
4807	}
4808
4809	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
4810	if (dst_regno < `0`)
4811	return `0`;
4812
4813	if (size <= spill_size &&
4814	bpf_stack_narrow_access_ok(off, fill_size: size, spill_size)) {
4815	/ The earlier check_reg_arg() has decided the*
4816	* subreg_def for this insn. Save it first.
4817	*/
4818	s32 subreg_def = state->regs[dst_regno].subreg_def;
4819
4820	copy_register_state(dst: &state->regs[dst_regno], src: reg);
4821	state->regs[dst_regno].subreg_def = subreg_def;
4822
4823	/ Break the relation on a narrowing fill.*
4824	* coerce_reg_to_size will adjust the boundaries.
4825	*/
4826	if (get_reg_width(reg) > size * BITS_PER_BYTE)
4827	state->regs[dst_regno].id = `0`;
4828	} else {
4829	int spill_cnt = `0`, zero_cnt = `0`;
4830
4831	for (i = `0`; i < size; i++) {
4832	type = stype[(slot - i) % BPF_REG_SIZE];
4833	if (type == STACK_SPILL) {
4834	spill_cnt++;
4835	continue;
4836	}
4837	if (type == STACK_MISC)
4838	continue;
4839	if (type == STACK_ZERO) {
4840	zero_cnt++;
4841	continue;
4842	}
4843	if (type == STACK_INVALID && env->allow_uninit_stack)
4844	continue;
4845	verbose(private_data: env, fmt: "invalid read from stack off %d+%d size %d\n",
4846	off, i, size);
4847	return -EACCES;
4848	}
4849
4850	if (spill_cnt == size &&
4851	tnum_is_const(a: reg->var_off) && reg->var_off.value == `0`) {
4852	__mark_reg_const_zero(env, reg: &state->regs[dst_regno]);
4853	/ this IS register fill, so keep insn_flags /
4854	} else if (zero_cnt == size) {
4855	/ similarly to mark_reg_stack_read(), preserve zeroes /
4856	__mark_reg_const_zero(env, reg: &state->regs[dst_regno]);
4857	insn_flags = `0`; / not restoring original register state /
4858	} else {
4859	mark_reg_unknown(env, regs: state->regs, regno: dst_regno);
4860	insn_flags = `0`; / not restoring original register state /
4861	}
4862	}
4863	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4864	} else if (dst_regno >= `0`) {
4865	/ restore register state from stack /
4866	copy_register_state(dst: &state->regs[dst_regno], src: reg);
4867	/ mark reg as written since spilled pointer state likely*
4868	* has its liveness marks cleared by is_state_visited()
4869	* which resets stack/reg liveness for state transitions
4870	*/
4871	state->regs[dst_regno].live \|= REG_LIVE_WRITTEN;
4872	} else if (__is_pointer_value(allow_ptr_leaks: env->allow_ptr_leaks, reg)) {
4873	/ If dst_regno==-1, the caller is asking us whether*
4874	* it is acceptable to use this value as a SCALAR_VALUE
4875	* (e.g. for XADD).
4876	* We must not allow unprivileged callers to do that
4877	* with spilled pointers.
4878	*/
4879	verbose(private_data: env, fmt: "leaking pointer from stack off %d\n",
4880	off);
4881	return -EACCES;
4882	}
4883	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
4884	} else {
4885	for (i = `0`; i < size; i++) {
4886	type = stype[(slot - i) % BPF_REG_SIZE];
4887	if (type == STACK_MISC)
4888	continue;
4889	if (type == STACK_ZERO)
4890	continue;
4891	if (type == STACK_INVALID && env->allow_uninit_stack)
4892	continue;
4893	verbose(private_data: env, fmt: "invalid read from stack off %d+%d size %d\n",
4894	off, i, size);
4895	return -EACCES;
4896	}
4897	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
4898	if (dst_regno >= `0`)
4899	mark_reg_stack_read(env, ptr_state: reg_state, min_off: off, max_off: off + size, dst_regno);
4900	insn_flags = `0`; / we are not restoring spilled register /
4901	}
4902	if (insn_flags)
4903	return push_jmp_history(env, cur: env->cur_state, insn_flags);
4904	return `0`;
4905	}
4906
4907	enum bpf_access_src {
4908	ACCESS_DIRECT = `1`, / the access is performed by an instruction /
4909	ACCESS_HELPER = `2`, / the access is performed by a helper /
4910	};
4911
4912	static int check_stack_range_initialized(struct bpf_verifier_env *env,
4913	int regno, int off, int access_size,
4914	bool zero_size_allowed,
4915	enum bpf_access_src type,
4916	struct bpf_call_arg_meta *meta);
4917
4918	static struct bpf_reg_state reg_state(struct* bpf_verifier_env env, int* regno)
4919	{
4920	return cur_regs(env) + regno;
4921	}
4922
4923	/ Read the stack at 'ptr_regno + off' and put the result into the register*
4924	* 'dst_regno'.
4925	* 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4926	* but not its variable offset.
4927	* 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4928	*
4929	* As opposed to check_stack_read_fixed_off, this function doesn't deal with
4930	* filling registers (i.e. reads of spilled register cannot be detected when
4931	* the offset is not fixed). We conservatively mark 'dst_regno' as containing
4932	* SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4933	* offset; for a fixed offset check_stack_read_fixed_off should be used
4934	* instead.
4935	*/
4936	static int check_stack_read_var_off(struct bpf_verifier_env *env,
4937	int ptr_regno, int off, int size, int dst_regno)
4938	{
4939	/ The state of the source register. /
4940	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
4941	struct bpf_func_state *ptr_state = func(env, reg);
4942	int err;
4943	int min_off, max_off;
4944
4945	/ Note that we pass a NULL meta, so raw access will not be permitted.*
4946	*/
4947	err = check_stack_range_initialized(env, regno: ptr_regno, off, access_size: size,
4948	zero_size_allowed: false, type: ACCESS_DIRECT, NULL);
4949	if (err)
4950	return err;
4951
4952	min_off = reg->smin_value + off;
4953	max_off = reg->smax_value + off;
4954	mark_reg_stack_read(env, ptr_state, min_off, max_off: max_off + size, dst_regno);
4955	return `0`;
4956	}
4957
4958	/ check_stack_read dispatches to check_stack_read_fixed_off or*
4959	* check_stack_read_var_off.
4960	*
4961	* The caller must ensure that the offset falls within the allocated stack
4962	* bounds.
4963	*
4964	* 'dst_regno' is a register which will receive the value from the stack. It
4965	* can be -1, meaning that the read value is not going to a register.
4966	*/
4967	static int check_stack_read(struct bpf_verifier_env *env,
4968	int ptr_regno, int off, int size,
4969	int dst_regno)
4970	{
4971	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
4972	struct bpf_func_state *state = func(env, reg);
4973	int err;
4974	/ Some accesses are only permitted with a static offset. /
4975	bool var_off = !tnum_is_const(a: reg->var_off);
4976
4977	/ The offset is required to be static when reads don't go to a*
4978	* register, in order to not leak pointers (see
4979	* check_stack_read_fixed_off).
4980	*/
4981	if (dst_regno < `0` && var_off) {
4982	char tn_buf[`48`];
4983
4984	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
4985	verbose(private_data: env, fmt: "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4986	tn_buf, off, size);
4987	return -EACCES;
4988	}
4989	/ Variable offset is prohibited for unprivileged mode for simplicity*
4990	* since it requires corresponding support in Spectre masking for stack
4991	* ALU. See also retrieve_ptr_limit(). The check in
4992	* check_stack_access_for_ptr_arithmetic() called by
4993	* adjust_ptr_min_max_vals() prevents users from creating stack pointers
4994	* with variable offsets, therefore no check is required here. Further,
4995	* just checking it here would be insufficient as speculative stack
4996	* writes could still lead to unsafe speculative behaviour.
4997	*/
4998	if (!var_off) {
4999	off += reg->var_off.value;
5000	err = check_stack_read_fixed_off(env, reg_state: state, off, size,
5001	dst_regno);
5002	} else {
5003	/ Variable offset stack reads need more conservative handling*
5004	* than fixed offset ones. Note that dst_regno >= 0 on this
5005	* branch.
5006	*/
5007	err = check_stack_read_var_off(env, ptr_regno, off, size,
5008	dst_regno);
5009	}
5010	return err;
5011	}
5012
5013
5014	/ check_stack_write dispatches to check_stack_write_fixed_off or*
5015	* check_stack_write_var_off.
5016	*
5017	* 'ptr_regno' is the register used as a pointer into the stack.
5018	* 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5019	* 'value_regno' is the register whose value we're writing to the stack. It can
5020	* be -1, meaning that we're not writing from a register.
5021	*
5022	* The caller must ensure that the offset falls within the maximum stack size.
5023	*/
5024	static int check_stack_write(struct bpf_verifier_env *env,
5025	int ptr_regno, int off, int size,
5026	int value_regno, int insn_idx)
5027	{
5028	struct bpf_reg_state *reg = reg_state(env, regno: ptr_regno);
5029	struct bpf_func_state *state = func(env, reg);
5030	int err;
5031
5032	if (tnum_is_const(a: reg->var_off)) {
5033	off += reg->var_off.value;
5034	err = check_stack_write_fixed_off(env, state, off, size,
5035	value_regno, insn_idx);
5036	} else {
5037	/ Variable offset stack reads need more conservative handling*
5038	* than fixed offset ones.
5039	*/
5040	err = check_stack_write_var_off(env, state,
5041	ptr_regno, off, size,
5042	value_regno, insn_idx);
5043	}
5044	return err;
5045	}
5046
5047	static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5048	int off, int size, enum bpf_access_type type)
5049	{
5050	struct bpf_reg_state *regs = cur_regs(env);
5051	struct bpf_map *map = regs[regno].map_ptr;
5052	u32 cap = bpf_map_flags_to_cap(map);
5053
5054	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5055	verbose(private_data: env, fmt: "write into map forbidden, value_size=%d off=%d size=%d\n",
5056	map->value_size, off, size);
5057	return -EACCES;
5058	}
5059
5060	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5061	verbose(private_data: env, fmt: "read from map forbidden, value_size=%d off=%d size=%d\n",
5062	map->value_size, off, size);
5063	return -EACCES;
5064	}
5065
5066	return `0`;
5067	}
5068
5069	/ check read/write into memory region (e.g., map value, ringbuf sample, etc) /
5070	static int __check_mem_access(struct bpf_verifier_env env, int* regno,
5071	int off, int size, u32 mem_size,
5072	bool zero_size_allowed)
5073	{
5074	bool size_ok = size > `0` \|\| (size == `0` && zero_size_allowed);
5075	struct bpf_reg_state *reg;
5076
5077	if (off >= `0` && size_ok && (u64)off + size <= mem_size)
5078	return `0`;
5079
5080	reg = &cur_regs(env)[regno];
5081	switch (reg->type) {
5082	case PTR_TO_MAP_KEY:
5083	verbose(private_data: env, fmt: "invalid access to map key, key_size=%d off=%d size=%d\n",
5084	mem_size, off, size);
5085	break;
5086	case PTR_TO_MAP_VALUE:
5087	verbose(private_data: env, fmt: "invalid access to map value, value_size=%d off=%d size=%d\n",
5088	mem_size, off, size);
5089	break;
5090	case PTR_TO_PACKET:
5091	case PTR_TO_PACKET_META:
5092	case PTR_TO_PACKET_END:
5093	verbose(private_data: env, fmt: "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5094	off, size, regno, reg->id, off, mem_size);
5095	break;
5096	case PTR_TO_MEM:
5097	default:
5098	verbose(private_data: env, fmt: "invalid access to memory, mem_size=%u off=%d size=%d\n",
5099	mem_size, off, size);
5100	}
5101
5102	return -EACCES;
5103	}
5104
5105	/ check read/write into a memory region with possible variable offset /
5106	static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5107	int off, int size, u32 mem_size,
5108	bool zero_size_allowed)
5109	{
5110	struct bpf_verifier_state *vstate = env->cur_state;
5111	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5112	struct bpf_reg_state *reg = &state->regs[regno];
5113	int err;
5114
5115	/ We may have adjusted the register pointing to memory region, so we*
5116	* need to try adding each of min_value and max_value to off
5117	* to make sure our theoretical access will be safe.
5118	*
5119	* The minimum value is only important with signed
5120	* comparisons where we can't assume the floor of a
5121	* value is 0. If we are using signed variables for our
5122	* index'es we need to make sure that whatever we use
5123	* will have a set floor within our range.
5124	*/
5125	if (reg->smin_value < `0` &&
5126	(reg->smin_value == S64_MIN \|\|
5127	(off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) \|\|
5128	reg->smin_value + off < `0`)) {
5129	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5130	regno);
5131	return -EACCES;
5132	}
5133	err = __check_mem_access(env, regno, off: reg->smin_value + off, size,
5134	mem_size, zero_size_allowed);
5135	if (err) {
5136	verbose(private_data: env, fmt: "R%d min value is outside of the allowed memory range\n",
5137	regno);
5138	return err;
5139	}
5140
5141	/ If we haven't set a max value then we need to bail since we can't be*
5142	* sure we won't do bad things.
5143	* If reg->umax_value + off could overflow, treat that as unbounded too.
5144	*/
5145	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5146	verbose(private_data: env, fmt: "R%d unbounded memory access, make sure to bounds check any such access\n",
5147	regno);
5148	return -EACCES;
5149	}
5150	err = __check_mem_access(env, regno, off: reg->umax_value + off, size,
5151	mem_size, zero_size_allowed);
5152	if (err) {
5153	verbose(private_data: env, fmt: "R%d max value is outside of the allowed memory range\n",
5154	regno);
5155	return err;
5156	}
5157
5158	return `0`;
5159	}
5160
5161	static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5162	const struct bpf_reg_state reg, int* regno,
5163	bool fixed_off_ok)
5164	{
5165	/ Access to this pointer-typed register or passing it to a helper*
5166	* is only allowed in its original, unmodified form.
5167	*/
5168
5169	if (reg->off < `0`) {
5170	verbose(private_data: env, fmt: "negative offset %s ptr R%d off=%d disallowed\n",
5171	reg_type_str(env, type: reg->type), regno, reg->off);
5172	return -EACCES;
5173	}
5174
5175	if (!fixed_off_ok && reg->off) {
5176	verbose(private_data: env, fmt: "dereference of modified %s ptr R%d off=%d disallowed\n",
5177	reg_type_str(env, type: reg->type), regno, reg->off);
5178	return -EACCES;
5179	}
5180
5181	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
5182	char tn_buf[`48`];
5183
5184	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5185	verbose(private_data: env, fmt: "variable %s access var_off=%s disallowed\n",
5186	reg_type_str(env, type: reg->type), tn_buf);
5187	return -EACCES;
5188	}
5189
5190	return `0`;
5191	}
5192
5193	static int check_ptr_off_reg(struct bpf_verifier_env *env,
5194	const struct bpf_reg_state reg, int* regno)
5195	{
5196	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
5197	}
5198
5199	static int map_kptr_match_type(struct bpf_verifier_env *env,
5200	struct btf_field *kptr_field,
5201	struct bpf_reg_state *reg, u32 regno)
5202	{
5203	const char *targ_name = btf_type_name(btf: kptr_field->kptr.btf, id: kptr_field->kptr.btf_id);
5204	int perm_flags;
5205	const char *reg_name = "";
5206
5207	if (btf_is_kernel(btf: reg->btf)) {
5208	perm_flags = PTR_MAYBE_NULL \| PTR_TRUSTED \| MEM_RCU;
5209
5210	/ Only unreferenced case accepts untrusted pointers /
5211	if (kptr_field->type == BPF_KPTR_UNREF)
5212	perm_flags \|= PTR_UNTRUSTED;
5213	} else {
5214	perm_flags = PTR_MAYBE_NULL \| MEM_ALLOC;
5215	if (kptr_field->type == BPF_KPTR_PERCPU)
5216	perm_flags \|= MEM_PERCPU;
5217	}
5218
5219	if (base_type(type: reg->type) != PTR_TO_BTF_ID \|\| (type_flag(type: reg->type) & ~perm_flags))
5220	goto bad_type;
5221
5222	/ We need to verify reg->type and reg->btf, before accessing reg->btf /
5223	reg_name = btf_type_name(btf: reg->btf, id: reg->btf_id);
5224
5225	/ For ref_ptr case, release function check should ensure we get one*
5226	* referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5227	* normal store of unreferenced kptr, we must ensure var_off is zero.
5228	* Since ref_ptr cannot be accessed directly by BPF insns, checks for
5229	* reg->off and reg->ref_obj_id are not needed here.
5230	*/
5231	if (__check_ptr_off_reg(env, reg, regno, fixed_off_ok: true))
5232	return -EACCES;
5233
5234	/ A full type match is needed, as BTF can be vmlinux, module or prog BTF, and*
5235	* we also need to take into account the reg->off.
5236	*
5237	* We want to support cases like:
5238	*
5239	* struct foo {
5240	* struct bar br;
5241	* struct baz bz;
5242	* };
5243	*
5244	* struct foo *v;
5245	* v = func(); // PTR_TO_BTF_ID
5246	* val->foo = v; // reg->off is zero, btf and btf_id match type
5247	* val->bar = &v->br; // reg->off is still zero, but we need to retry with
5248	* // first member type of struct after comparison fails
5249	* val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5250	* // to match type
5251	*
5252	* In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5253	* is zero. We must also ensure that btf_struct_ids_match does not walk
5254	* the struct to match type against first member of struct, i.e. reject
5255	* second case from above. Hence, when type is BPF_KPTR_REF, we set
5256	* strict mode to true for type match.
5257	*/
5258	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: reg->off,
5259	need_btf: kptr_field->kptr.btf, need_type_id: kptr_field->kptr.btf_id,
5260	strict: kptr_field->type != BPF_KPTR_UNREF))
5261	goto bad_type;
5262	return `0`;
5263	bad_type:
5264	verbose(private_data: env, fmt: "invalid kptr access, R%d type=%s%s ", regno,
5265	reg_type_str(env, type: reg->type), reg_name);
5266	verbose(private_data: env, fmt: "expected=%s%s", reg_type_str(env, type: PTR_TO_BTF_ID), targ_name);
5267	if (kptr_field->type == BPF_KPTR_UNREF)
5268	verbose(private_data: env, fmt: " or %s%s\n", reg_type_str(env, type: PTR_TO_BTF_ID \| PTR_UNTRUSTED),
5269	targ_name);
5270	else
5271	verbose(private_data: env, fmt: "\n");
5272	return -EINVAL;
5273	}
5274
5275	static bool in_sleepable(struct bpf_verifier_env *env)
5276	{
5277	return env->prog->sleepable;
5278	}
5279
5280	/ The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()*
5281	* can dereference RCU protected pointers and result is PTR_TRUSTED.
5282	*/
5283	static bool in_rcu_cs(struct bpf_verifier_env *env)
5284	{
5285	return env->cur_state->active_rcu_lock \|\|
5286	env->cur_state->active_lock.ptr \|\|
5287	!in_sleepable(env);
5288	}
5289
5290	/ Once GCC supports btf_type_tag the following mechanism will be replaced with tag check /
5291	BTF_SET_START(rcu_protected_types)
5292	BTF_ID(struct, prog_test_ref_kfunc)
5293	#ifdef CONFIG_CGROUPS
5294	BTF_ID(struct, cgroup)
5295	#endif
5296	#ifdef CONFIG_BPF_JIT
5297	BTF_ID(struct, bpf_cpumask)
5298	#endif
5299	BTF_ID(struct, task_struct)
5300	BTF_SET_END(rcu_protected_types)
5301
5302	static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5303	{
5304	if (!btf_is_kernel(btf))
5305	return true;
5306	return btf_id_set_contains(set: &rcu_protected_types, id: btf_id);
5307	}
5308
5309	static struct btf_record kptr_pointee_btf_record(struct* btf_field *kptr_field)
5310	{
5311	struct btf_struct_meta *meta;
5312
5313	if (btf_is_kernel(btf: kptr_field->kptr.btf))
5314	return NULL;
5315
5316	meta = btf_find_struct_meta(btf: kptr_field->kptr.btf,
5317	btf_id: kptr_field->kptr.btf_id);
5318
5319	return meta ? meta->record : NULL;
5320	}
5321
5322	static bool rcu_safe_kptr(const struct btf_field *field)
5323	{
5324	const struct btf_field_kptr *kptr = &field->kptr;
5325
5326	return field->type == BPF_KPTR_PERCPU \|\|
5327	(field->type == BPF_KPTR_REF && rcu_protected_object(btf: kptr->btf, btf_id: kptr->btf_id));
5328	}
5329
5330	static u32 btf_ld_kptr_type(struct bpf_verifier_env env, struct* btf_field *kptr_field)
5331	{
5332	struct btf_record *rec;
5333	u32 ret;
5334
5335	ret = PTR_MAYBE_NULL;
5336	if (rcu_safe_kptr(field: kptr_field) && in_rcu_cs(env)) {
5337	ret \|= MEM_RCU;
5338	if (kptr_field->type == BPF_KPTR_PERCPU)
5339	ret \|= MEM_PERCPU;
5340	else if (!btf_is_kernel(btf: kptr_field->kptr.btf))
5341	ret \|= MEM_ALLOC;
5342
5343	rec = kptr_pointee_btf_record(kptr_field);
5344	if (rec && btf_record_has_field(rec, type: BPF_GRAPH_NODE))
5345	ret \|= NON_OWN_REF;
5346	} else {
5347	ret \|= PTR_UNTRUSTED;
5348	}
5349
5350	return ret;
5351	}
5352
5353	static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5354	int value_regno, int insn_idx,
5355	struct btf_field *kptr_field)
5356	{
5357	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5358	int class = BPF_CLASS(insn->code);
5359	struct bpf_reg_state *val_reg;
5360
5361	/ Things we already checked for in check_map_access and caller:*
5362	* - Reject cases where variable offset may touch kptr
5363	* - size of access (must be BPF_DW)
5364	* - tnum_is_const(reg->var_off)
5365	* - kptr_field->offset == off + reg->var_off.value
5366	*/
5367	/ Only BPF_[LDX,STX,ST] \| BPF_MEM \| BPF_DW is supported /
5368	if (BPF_MODE(insn->code) != BPF_MEM) {
5369	verbose(private_data: env, fmt: "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5370	return -EACCES;
5371	}
5372
5373	/ We only allow loading referenced kptr, since it will be marked as*
5374	* untrusted, similar to unreferenced kptr.
5375	*/
5376	if (class != BPF_LDX &&
5377	(kptr_field->type == BPF_KPTR_REF \|\| kptr_field->type == BPF_KPTR_PERCPU)) {
5378	verbose(private_data: env, fmt: "store to referenced kptr disallowed\n");
5379	return -EACCES;
5380	}
5381
5382	if (class == BPF_LDX) {
5383	val_reg = reg_state(env, regno: value_regno);
5384	/ We can simply mark the value_regno receiving the pointer*
5385	* value from map as PTR_TO_BTF_ID, with the correct type.
5386	*/
5387	mark_btf_ld_reg(env, regs: cur_regs(env), regno: value_regno, reg_type: PTR_TO_BTF_ID, btf: kptr_field->kptr.btf,
5388	btf_id: kptr_field->kptr.btf_id, flag: btf_ld_kptr_type(env, kptr_field));
5389	/ For mark_ptr_or_null_reg /
5390	val_reg->id = ++env->id_gen;
5391	} else if (class == BPF_STX) {
5392	val_reg = reg_state(env, regno: value_regno);
5393	if (!register_is_null(reg: val_reg) &&
5394	map_kptr_match_type(env, kptr_field, reg: val_reg, regno: value_regno))
5395	return -EACCES;
5396	} else if (class == BPF_ST) {
5397	if (insn->imm) {
5398	verbose(private_data: env, fmt: "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5399	kptr_field->offset);
5400	return -EACCES;
5401	}
5402	} else {
5403	verbose(private_data: env, fmt: "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5404	return -EACCES;
5405	}
5406	return `0`;
5407	}
5408
5409	/ check read/write into a map element with possible variable offset /
5410	static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5411	int off, int size, bool zero_size_allowed,
5412	enum bpf_access_src src)
5413	{
5414	struct bpf_verifier_state *vstate = env->cur_state;
5415	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5416	struct bpf_reg_state *reg = &state->regs[regno];
5417	struct bpf_map *map = reg->map_ptr;
5418	struct btf_record *rec;
5419	int err, i;
5420
5421	err = check_mem_region_access(env, regno, off, size, mem_size: map->value_size,
5422	zero_size_allowed);
5423	if (err)
5424	return err;
5425
5426	if (IS_ERR_OR_NULL(ptr: map->record))
5427	return `0`;
5428	rec = map->record;
5429	for (i = `0`; i < rec->cnt; i++) {
5430	struct btf_field *field = &rec->fields[i];
5431	u32 p = field->offset;
5432
5433	/ If any part of a field can be touched by load/store, reject*
5434	* this program. To check that [x1, x2) overlaps with [y1, y2),
5435	* it is sufficient to check x1 < y2 && y1 < x2.
5436	*/
5437	if (reg->smin_value + off < p + btf_field_type_size(type: field->type) &&
5438	p < reg->umax_value + off + size) {
5439	switch (field->type) {
5440	case BPF_KPTR_UNREF:
5441	case BPF_KPTR_REF:
5442	case BPF_KPTR_PERCPU:
5443	if (src != ACCESS_DIRECT) {
5444	verbose(private_data: env, fmt: "kptr cannot be accessed indirectly by helper\n");
5445	return -EACCES;
5446	}
5447	if (!tnum_is_const(a: reg->var_off)) {
5448	verbose(private_data: env, fmt: "kptr access cannot have variable offset\n");
5449	return -EACCES;
5450	}
5451	if (p != off + reg->var_off.value) {
5452	verbose(private_data: env, fmt: "kptr access misaligned expected=%u off=%llu\n",
5453	p, off + reg->var_off.value);
5454	return -EACCES;
5455	}
5456	if (size != bpf_size_to_bytes(BPF_DW)) {
5457	verbose(private_data: env, fmt: "kptr access size must be BPF_DW\n");
5458	return -EACCES;
5459	}
5460	break;
5461	default:
5462	verbose(private_data: env, fmt: "%s cannot be accessed directly by load/store\n",
5463	btf_field_type_name(type: field->type));
5464	return -EACCES;
5465	}
5466	}
5467	}
5468	return `0`;
5469	}
5470
5471	#define MAX_PACKET_OFF 0xffff
5472
5473	static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5474	const struct bpf_call_arg_meta *meta,
5475	enum bpf_access_type t)
5476	{
5477	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
5478
5479	switch (prog_type) {
5480	/ Program types only with direct read access go here! /
5481	case BPF_PROG_TYPE_LWT_IN:
5482	case BPF_PROG_TYPE_LWT_OUT:
5483	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5484	case BPF_PROG_TYPE_SK_REUSEPORT:
5485	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5486	case BPF_PROG_TYPE_CGROUP_SKB:
5487	if (t == BPF_WRITE)
5488	return false;
5489	fallthrough;
5490
5491	/ Program types with direct read + write access go here! /
5492	case BPF_PROG_TYPE_SCHED_CLS:
5493	case BPF_PROG_TYPE_SCHED_ACT:
5494	case BPF_PROG_TYPE_XDP:
5495	case BPF_PROG_TYPE_LWT_XMIT:
5496	case BPF_PROG_TYPE_SK_SKB:
5497	case BPF_PROG_TYPE_SK_MSG:
5498	if (meta)
5499	return meta->pkt_access;
5500
5501	env->seen_direct_write = true;
5502	return true;
5503
5504	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5505	if (t == BPF_WRITE)
5506	env->seen_direct_write = true;
5507
5508	return true;
5509
5510	default:
5511	return false;
5512	}
5513	}
5514
5515	static int check_packet_access(struct bpf_verifier_env env, u32 regno, int* off,
5516	int size, bool zero_size_allowed)
5517	{
5518	struct bpf_reg_state *regs = cur_regs(env);
5519	struct bpf_reg_state *reg = &regs[regno];
5520	int err;
5521
5522	/ We may have added a variable offset to the packet pointer; but any*
5523	* reg->range we have comes after that. We are only checking the fixed
5524	* offset.
5525	*/
5526
5527	/ We don't allow negative numbers, because we aren't tracking enough*
5528	* detail to prove they're safe.
5529	*/
5530	if (reg->smin_value < `0`) {
5531	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5532	regno);
5533	return -EACCES;
5534	}
5535
5536	err = reg->range < `0` ? -EINVAL :
5537	__check_mem_access(env, regno, off, size, mem_size: reg->range,
5538	zero_size_allowed);
5539	if (err) {
5540	verbose(private_data: env, fmt: "R%d offset is outside of the packet\n", regno);
5541	return err;
5542	}
5543
5544	/ __check_mem_access has made sure "off + size - 1" is within u16.*
5545	* reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5546	* otherwise find_good_pkt_pointers would have refused to set range info
5547	* that __check_mem_access would have rejected this pkt access.
5548	* Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5549	*/
5550	env->prog->aux->max_pkt_offset =
5551	max_t(u32, env->prog->aux->max_pkt_offset,
5552	off + reg->umax_value + size - `1`);
5553
5554	return err;
5555	}
5556
5557	/ check access to 'struct bpf_context' fields. Supports fixed offsets only /
5558	static int check_ctx_access(struct bpf_verifier_env env, int* insn_idx, int off, int size,
5559	enum bpf_access_type t, enum bpf_reg_type *reg_type,
5560	struct btf *btf, u32 btf_id)
5561	{
5562	struct bpf_insn_access_aux info = {
5563	.reg_type = *reg_type,
5564	.log = &env->log,
5565	};
5566
5567	if (env->ops->is_valid_access &&
5568	env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5569	/ A non zero info.ctx_field_size indicates that this field is a*
5570	* candidate for later verifier transformation to load the whole
5571	* field and then apply a mask when accessed with a narrower
5572	* access than actual ctx access size. A zero info.ctx_field_size
5573	* will only allow for whole field access and rejects any other
5574	* type of narrower access.
5575	*/
5576	*reg_type = info.reg_type;
5577
5578	if (base_type(type: *reg_type) == PTR_TO_BTF_ID) {
5579	*btf = info.btf;
5580	*btf_id = info.btf_id;
5581	} else {
5582	env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
5583	}
5584	/ remember the offset of last byte accessed in ctx /
5585	if (env->prog->aux->max_ctx_offset < off + size)
5586	env->prog->aux->max_ctx_offset = off + size;
5587	return `0`;
5588	}
5589
5590	verbose(private_data: env, fmt: "invalid bpf_context access off=%d size=%d\n", off, size);
5591	return -EACCES;
5592	}
5593
5594	static int check_flow_keys_access(struct bpf_verifier_env env, int* off,
5595	int size)
5596	{
5597	if (size < `0` \|\| off < `0` \|\|
5598	(u64)off + size > sizeof(struct bpf_flow_keys)) {
5599	verbose(private_data: env, fmt: "invalid access to flow keys off=%d size=%d\n",
5600	off, size);
5601	return -EACCES;
5602	}
5603	return `0`;
5604	}
5605
5606	static int check_sock_access(struct bpf_verifier_env env, int* insn_idx,
5607	u32 regno, int off, int size,
5608	enum bpf_access_type t)
5609	{
5610	struct bpf_reg_state *regs = cur_regs(env);
5611	struct bpf_reg_state *reg = &regs[regno];
5612	struct bpf_insn_access_aux info = {};
5613	bool valid;
5614
5615	if (reg->smin_value < `0`) {
5616	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5617	regno);
5618	return -EACCES;
5619	}
5620
5621	switch (reg->type) {
5622	case PTR_TO_SOCK_COMMON:
5623	valid = bpf_sock_common_is_valid_access(off, size, type: t, info: &info);
5624	break;
5625	case PTR_TO_SOCKET:
5626	valid = bpf_sock_is_valid_access(off, size, type: t, info: &info);
5627	break;
5628	case PTR_TO_TCP_SOCK:
5629	valid = bpf_tcp_sock_is_valid_access(off, size, type: t, info: &info);
5630	break;
5631	case PTR_TO_XDP_SOCK:
5632	valid = bpf_xdp_sock_is_valid_access(off, size, type: t, info: &info);
5633	break;
5634	default:
5635	valid = false;
5636	}
5637
5638
5639	if (valid) {
5640	env->insn_aux_data[insn_idx].ctx_field_size =
5641	info.ctx_field_size;
5642	return `0`;
5643	}
5644
5645	verbose(private_data: env, fmt: "R%d invalid %s access off=%d size=%d\n",
5646	regno, reg_type_str(env, type: reg->type), off, size);
5647
5648	return -EACCES;
5649	}
5650
5651	static bool is_pointer_value(struct bpf_verifier_env env, int* regno)
5652	{
5653	return __is_pointer_value(allow_ptr_leaks: env->allow_ptr_leaks, reg: reg_state(env, regno));
5654	}
5655
5656	static bool is_ctx_reg(struct bpf_verifier_env env, int* regno)
5657	{
5658	const struct bpf_reg_state *reg = reg_state(env, regno);
5659
5660	return reg->type == PTR_TO_CTX;
5661	}
5662
5663	static bool is_sk_reg(struct bpf_verifier_env env, int* regno)
5664	{
5665	const struct bpf_reg_state *reg = reg_state(env, regno);
5666
5667	return type_is_sk_pointer(type: reg->type);
5668	}
5669
5670	static bool is_pkt_reg(struct bpf_verifier_env env, int* regno)
5671	{
5672	const struct bpf_reg_state *reg = reg_state(env, regno);
5673
5674	return type_is_pkt_pointer(type: reg->type);
5675	}
5676
5677	static bool is_flow_key_reg(struct bpf_verifier_env env, int* regno)
5678	{
5679	const struct bpf_reg_state *reg = reg_state(env, regno);
5680
5681	/ Separate to is_ctx_reg() since we still want to allow BPF_ST here. /
5682	return reg->type == PTR_TO_FLOW_KEYS;
5683	}
5684
5685	static bool is_arena_reg(struct bpf_verifier_env env, int* regno)
5686	{
5687	const struct bpf_reg_state *reg = reg_state(env, regno);
5688
5689	return reg->type == PTR_TO_ARENA;
5690	}
5691
5692	static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5693	#ifdef CONFIG_NET
5694	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5695	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5696	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5697	#endif
5698	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
5699	};
5700
5701	static bool is_trusted_reg(const struct bpf_reg_state *reg)
5702	{
5703	/ A referenced register is always trusted. /
5704	if (reg->ref_obj_id)
5705	return true;
5706
5707	/ Types listed in the reg2btf_ids are always trusted /
5708	if (reg2btf_ids[base_type(type: reg->type)])
5709	return true;
5710
5711	/ If a register is not referenced, it is trusted if it has the*
5712	* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5713	* other type modifiers may be safe, but we elect to take an opt-in
5714	* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5715	* not.
5716	*
5717	* Eventually, we should make PTR_TRUSTED the single source of truth
5718	* for whether a register is trusted.
5719	*/
5720	return type_flag(type: reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5721	!bpf_type_has_unsafe_modifiers(type: reg->type);
5722	}
5723
5724	static bool is_rcu_reg(const struct bpf_reg_state *reg)
5725	{
5726	return reg->type & MEM_RCU;
5727	}
5728
5729	static void clear_trusted_flags(enum bpf_type_flag *flag)
5730	{
5731	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS \| MEM_RCU);
5732	}
5733
5734	static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5735	const struct bpf_reg_state *reg,
5736	int off, int size, bool strict)
5737	{
5738	struct tnum reg_off;
5739	int ip_align;
5740
5741	/ Byte size accesses are always allowed. /
5742	if (!strict \|\| size == `1`)
5743	return `0`;
5744
5745	/ For platforms that do not have a Kconfig enabling*
5746	* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5747	* NET_IP_ALIGN is universally set to '2'. And on platforms
5748	* that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5749	* to this code only in strict mode where we want to emulate
5750	* the NET_IP_ALIGN==2 checking. Therefore use an
5751	* unconditional IP align value of '2'.
5752	*/
5753	ip_align = `2`;
5754
5755	reg_off = tnum_add(a: reg->var_off, b: tnum_const(value: ip_align + reg->off + off));
5756	if (!tnum_is_aligned(a: reg_off, size)) {
5757	char tn_buf[`48`];
5758
5759	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5760	verbose(private_data: env,
5761	fmt: "misaligned packet access off %d+%s+%d+%d size %d\n",
5762	ip_align, tn_buf, reg->off, off, size);
5763	return -EACCES;
5764	}
5765
5766	return `0`;
5767	}
5768
5769	static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5770	const struct bpf_reg_state *reg,
5771	const char *pointer_desc,
5772	int off, int size, bool strict)
5773	{
5774	struct tnum reg_off;
5775
5776	/ Byte size accesses are always allowed. /
5777	if (!strict \|\| size == `1`)
5778	return `0`;
5779
5780	reg_off = tnum_add(a: reg->var_off, b: tnum_const(value: reg->off + off));
5781	if (!tnum_is_aligned(a: reg_off, size)) {
5782	char tn_buf[`48`];
5783
5784	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
5785	verbose(private_data: env, fmt: "misaligned %saccess off %s+%d+%d size %d\n",
5786	pointer_desc, tn_buf, reg->off, off, size);
5787	return -EACCES;
5788	}
5789
5790	return `0`;
5791	}
5792
5793	static int check_ptr_alignment(struct bpf_verifier_env *env,
5794	const struct bpf_reg_state reg, int* off,
5795	int size, bool strict_alignment_once)
5796	{
5797	bool strict = env->strict_alignment \|\| strict_alignment_once;
5798	const char *pointer_desc = "";
5799
5800	switch (reg->type) {
5801	case PTR_TO_PACKET:
5802	case PTR_TO_PACKET_META:
5803	/ Special case, because of NET_IP_ALIGN. Given metadata sits*
5804	* right in front, treat it the very same way.
5805	*/
5806	return check_pkt_ptr_alignment(env, reg, off, size, strict);
5807	case PTR_TO_FLOW_KEYS:
5808	pointer_desc = "flow keys ";
5809	break;
5810	case PTR_TO_MAP_KEY:
5811	pointer_desc = "key ";
5812	break;
5813	case PTR_TO_MAP_VALUE:
5814	pointer_desc = "value ";
5815	break;
5816	case PTR_TO_CTX:
5817	pointer_desc = "context ";
5818	break;
5819	case PTR_TO_STACK:
5820	pointer_desc = "stack ";
5821	/ The stack spill tracking logic in check_stack_write_fixed_off()*
5822	* and check_stack_read_fixed_off() relies on stack accesses being
5823	* aligned.
5824	*/
5825	strict = true;
5826	break;
5827	case PTR_TO_SOCKET:
5828	pointer_desc = "sock ";
5829	break;
5830	case PTR_TO_SOCK_COMMON:
5831	pointer_desc = "sock_common ";
5832	break;
5833	case PTR_TO_TCP_SOCK:
5834	pointer_desc = "tcp_sock ";
5835	break;
5836	case PTR_TO_XDP_SOCK:
5837	pointer_desc = "xdp_sock ";
5838	break;
5839	case PTR_TO_ARENA:
5840	return `0`;
5841	default:
5842	break;
5843	}
5844	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5845	strict);
5846	}
5847
5848	static int round_up_stack_depth(struct bpf_verifier_env env, int* stack_depth)
5849	{
5850	if (env->prog->jit_requested)
5851	return round_up(stack_depth, `16`);
5852
5853	/ round up to 32-bytes, since this is granularity*
5854	* of interpreter stack size
5855	*/
5856	return round_up(max_t(u32, stack_depth, `1`), `32`);
5857	}
5858
5859	/ starting from main bpf function walk all instructions of the function*
5860	* and recursively walk all callees that given function can call.
5861	* Ignore jump and exit insns.
5862	* Since recursion is prevented by check_cfg() this algorithm
5863	* only needs a local stack of MAX_CALL_FRAMES to remember callsites
5864	*/
5865	static int check_max_stack_depth_subprog(struct bpf_verifier_env env, int* idx)
5866	{
5867	struct bpf_subprog_info *subprog = env->subprog_info;
5868	struct bpf_insn *insn = env->prog->insnsi;
5869	int depth = `0`, frame = `0`, i, subprog_end;
5870	bool tail_call_reachable = false;
5871	int ret_insn[MAX_CALL_FRAMES];
5872	int ret_prog[MAX_CALL_FRAMES];
5873	int j;
5874
5875	i = subprog[idx].start;
5876	process_func:
5877	/ protect against potential stack overflow that might happen when*
5878	* bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5879	* depth for such case down to 256 so that the worst case scenario
5880	* would result in 8k stack size (32 which is tailcall limit * 256 =
5881	* 8k).
5882	*
5883	* To get the idea what might happen, see an example:
5884	* func1 -> sub rsp, 128
5885	* subfunc1 -> sub rsp, 256
5886	* tailcall1 -> add rsp, 256
5887	* func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5888	* subfunc2 -> sub rsp, 64
5889	* subfunc22 -> sub rsp, 128
5890	* tailcall2 -> add rsp, 128
5891	* func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5892	*
5893	* tailcall will unwind the current stack frame but it will not get rid
5894	* of caller's stack as shown on the example above.
5895	*/
5896	if (idx && subprog[idx].has_tail_call && depth >= `256`) {
5897	verbose(private_data: env,
5898	fmt: "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5899	depth);
5900	return -EACCES;
5901	}
5902	depth += round_up_stack_depth(env, stack_depth: subprog[idx].stack_depth);
5903	if (depth > MAX_BPF_STACK) {
5904	verbose(private_data: env, fmt: "combined stack size of %d calls is %d. Too large\n",
5905	frame + `1`, depth);
5906	return -EACCES;
5907	}
5908	continue_func:
5909	subprog_end = subprog[idx + `1`].start;
5910	for (; i < subprog_end; i++) {
5911	int next_insn, sidx;
5912
5913	if (bpf_pseudo_kfunc_call(insn: insn + i) && !insn[i].off) {
5914	bool err = false;
5915
5916	if (!is_bpf_throw_kfunc(insn: insn + i))
5917	continue;
5918	if (subprog[idx].is_cb)
5919	err = true;
5920	for (int c = `0`; c < frame && !err; c++) {
5921	if (subprog[ret_prog[c]].is_cb) {
5922	err = true;
5923	break;
5924	}
5925	}
5926	if (!err)
5927	continue;
5928	verbose(private_data: env,
5929	fmt: "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5930	i, idx);
5931	return -EINVAL;
5932	}
5933
5934	if (!bpf_pseudo_call(insn: insn + i) && !bpf_pseudo_func(insn: insn + i))
5935	continue;
5936	/ remember insn and function to return to /
5937	ret_insn[frame] = i + `1`;
5938	ret_prog[frame] = idx;
5939
5940	/ find the callee /
5941	next_insn = i + insn[i].imm + `1`;
5942	sidx = find_subprog(env, off: next_insn);
5943	if (sidx < `0`) {
5944	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
5945	next_insn);
5946	return -EFAULT;
5947	}
5948	if (subprog[sidx].is_async_cb) {
5949	if (subprog[sidx].has_tail_call) {
5950	verbose(private_data: env, fmt: "verifier bug. subprog has tail_call and async cb\n");
5951	return -EFAULT;
5952	}
5953	/ async callbacks don't increase bpf prog stack size unless called directly /
5954	if (!bpf_pseudo_call(insn: insn + i))
5955	continue;
5956	if (subprog[sidx].is_exception_cb) {
5957	verbose(private_data: env, fmt: "insn %d cannot call exception cb directly\n", i);
5958	return -EINVAL;
5959	}
5960	}
5961	i = next_insn;
5962	idx = sidx;
5963
5964	if (subprog[idx].has_tail_call)
5965	tail_call_reachable = true;
5966
5967	frame++;
5968	if (frame >= MAX_CALL_FRAMES) {
5969	verbose(private_data: env, fmt: "the call stack of %d frames is too deep !\n",
5970	frame);
5971	return -E2BIG;
5972	}
5973	goto process_func;
5974	}
5975	/ if tail call got detected across bpf2bpf calls then mark each of the*
5976	* currently present subprog frames as tail call reachable subprogs;
5977	* this info will be utilized by JIT so that we will be preserving the
5978	* tail call counter throughout bpf2bpf calls combined with tailcalls
5979	*/
5980	if (tail_call_reachable)
5981	for (j = `0`; j < frame; j++) {
5982	if (subprog[ret_prog[j]].is_exception_cb) {
5983	verbose(private_data: env, fmt: "cannot tail call within exception cb\n");
5984	return -EINVAL;
5985	}
5986	subprog[ret_prog[j]].tail_call_reachable = true;
5987	}
5988	if (subprog[`0`].tail_call_reachable)
5989	env->prog->aux->tail_call_reachable = true;
5990
5991	/ end of for() loop means the last insn of the 'subprog'*
5992	* was reached. Doesn't matter whether it was JA or EXIT
5993	*/
5994	if (frame == `0`)
5995	return `0`;
5996	depth -= round_up_stack_depth(env, stack_depth: subprog[idx].stack_depth);
5997	frame--;
5998	i = ret_insn[frame];
5999	idx = ret_prog[frame];
6000	goto continue_func;
6001	}
6002
6003	static int check_max_stack_depth(struct bpf_verifier_env *env)
6004	{
6005	struct bpf_subprog_info *si = env->subprog_info;
6006	int ret;
6007
6008	for (int i = `0`; i < env->subprog_cnt; i++) {
6009	if (!i \|\| si[i].is_async_cb) {
6010	ret = check_max_stack_depth_subprog(env, idx: i);
6011	if (ret < `0`)
6012	return ret;
6013	}
6014	continue;
6015	}
6016	return `0`;
6017	}
6018
6019	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
6020	static int get_callee_stack_depth(struct bpf_verifier_env *env,
6021	const struct bpf_insn insn, int* idx)
6022	{
6023	int start = idx + insn->imm + `1`, subprog;
6024
6025	subprog = find_subprog(env, start);
6026	if (subprog < `0`) {
6027	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
6028	start);
6029	return -EFAULT;
6030	}
6031	return env->subprog_info[subprog].stack_depth;
6032	}
6033	#endif
6034
6035	static int __check_buffer_access(struct bpf_verifier_env *env,
6036	const char *buf_info,
6037	const struct bpf_reg_state *reg,
6038	int regno, int off, int size)
6039	{
6040	if (off < `0`) {
6041	verbose(private_data: env,
6042	fmt: "R%d invalid %s buffer access: off=%d, size=%d\n",
6043	regno, buf_info, off, size);
6044	return -EACCES;
6045	}
6046	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
6047	char tn_buf[`48`];
6048
6049	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6050	verbose(private_data: env,
6051	fmt: "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6052	regno, off, tn_buf);
6053	return -EACCES;
6054	}
6055
6056	return `0`;
6057	}
6058
6059	static int check_tp_buffer_access(struct bpf_verifier_env *env,
6060	const struct bpf_reg_state *reg,
6061	int regno, int off, int size)
6062	{
6063	int err;
6064
6065	err = __check_buffer_access(env, buf_info: "tracepoint", reg, regno, off, size);
6066	if (err)
6067	return err;
6068
6069	if (off + size > env->prog->aux->max_tp_access)
6070	env->prog->aux->max_tp_access = off + size;
6071
6072	return `0`;
6073	}
6074
6075	static int check_buffer_access(struct bpf_verifier_env *env,
6076	const struct bpf_reg_state *reg,
6077	int regno, int off, int size,
6078	bool zero_size_allowed,
6079	u32 *max_access)
6080	{
6081	const char *buf_info = type_is_rdonly_mem(type: reg->type) ? "rdonly" : "rdwr";
6082	int err;
6083
6084	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6085	if (err)
6086	return err;
6087
6088	if (off + size > *max_access)
6089	*max_access = off + size;
6090
6091	return `0`;
6092	}
6093
6094	/ BPF architecture zero extends alu32 ops into 64-bit registesr /
6095	static void zext_32_to_64(struct bpf_reg_state *reg)
6096	{
6097	reg->var_off = tnum_subreg(a: reg->var_off);
6098	__reg_assign_32_into_64(reg);
6099	}
6100
6101	/ truncate register to smaller size (in bytes)*
6102	* must be called with size < BPF_REG_SIZE
6103	*/
6104	static void coerce_reg_to_size(struct bpf_reg_state reg, int* size)
6105	{
6106	u64 mask;
6107
6108	/ clear high bits in bit representation /
6109	reg->var_off = tnum_cast(a: reg->var_off, size);
6110
6111	/ fix arithmetic bounds /
6112	mask = ((u64)`1` << (size * `8`)) - `1`;
6113	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6114	reg->umin_value &= mask;
6115	reg->umax_value &= mask;
6116	} else {
6117	reg->umin_value = `0`;
6118	reg->umax_value = mask;
6119	}
6120	reg->smin_value = reg->umin_value;
6121	reg->smax_value = reg->umax_value;
6122
6123	/ If size is smaller than 32bit register the 32bit register*
6124	* values are also truncated so we push 64-bit bounds into
6125	* 32-bit bounds. Above were truncated < 32-bits already.
6126	*/
6127	if (size < `4`)
6128	__mark_reg32_unbounded(reg);
6129
6130	reg_bounds_sync(reg);
6131	}
6132
6133	static void set_sext64_default_val(struct bpf_reg_state reg, int* size)
6134	{
6135	if (size == `1`) {
6136	reg->smin_value = reg->s32_min_value = S8_MIN;
6137	reg->smax_value = reg->s32_max_value = S8_MAX;
6138	} else if (size == `2`) {
6139	reg->smin_value = reg->s32_min_value = S16_MIN;
6140	reg->smax_value = reg->s32_max_value = S16_MAX;
6141	} else {
6142	/ size == 4 /
6143	reg->smin_value = reg->s32_min_value = S32_MIN;
6144	reg->smax_value = reg->s32_max_value = S32_MAX;
6145	}
6146	reg->umin_value = reg->u32_min_value = `0`;
6147	reg->umax_value = U64_MAX;
6148	reg->u32_max_value = U32_MAX;
6149	reg->var_off = tnum_unknown;
6150	}
6151
6152	static void coerce_reg_to_size_sx(struct bpf_reg_state reg, int* size)
6153	{
6154	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6155	u64 top_smax_value, top_smin_value;
6156	u64 num_bits = size * `8`;
6157
6158	if (tnum_is_const(a: reg->var_off)) {
6159	u64_cval = reg->var_off.value;
6160	if (size == `1`)
6161	reg->var_off = tnum_const(value: (s8)u64_cval);
6162	else if (size == `2`)
6163	reg->var_off = tnum_const(value: (s16)u64_cval);
6164	else
6165	/ size == 4 /
6166	reg->var_off = tnum_const(value: (s32)u64_cval);
6167
6168	u64_cval = reg->var_off.value;
6169	reg->smax_value = reg->smin_value = u64_cval;
6170	reg->umax_value = reg->umin_value = u64_cval;
6171	reg->s32_max_value = reg->s32_min_value = u64_cval;
6172	reg->u32_max_value = reg->u32_min_value = u64_cval;
6173	return;
6174	}
6175
6176	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6177	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6178
6179	if (top_smax_value != top_smin_value)
6180	goto out;
6181
6182	/ find the s64_min and s64_min after sign extension /
6183	if (size == `1`) {
6184	init_s64_max = (s8)reg->smax_value;
6185	init_s64_min = (s8)reg->smin_value;
6186	} else if (size == `2`) {
6187	init_s64_max = (s16)reg->smax_value;
6188	init_s64_min = (s16)reg->smin_value;
6189	} else {
6190	init_s64_max = (s32)reg->smax_value;
6191	init_s64_min = (s32)reg->smin_value;
6192	}
6193
6194	s64_max = max(init_s64_max, init_s64_min);
6195	s64_min = min(init_s64_max, init_s64_min);
6196
6197	/ both of s64_max/s64_min positive or negative /
6198	if ((s64_max >= `0`) == (s64_min >= `0`)) {
6199	reg->smin_value = reg->s32_min_value = s64_min;
6200	reg->smax_value = reg->s32_max_value = s64_max;
6201	reg->umin_value = reg->u32_min_value = s64_min;
6202	reg->umax_value = reg->u32_max_value = s64_max;
6203	reg->var_off = tnum_range(min: s64_min, max: s64_max);
6204	return;
6205	}
6206
6207	out:
6208	set_sext64_default_val(reg, size);
6209	}
6210
6211	static void set_sext32_default_val(struct bpf_reg_state reg, int* size)
6212	{
6213	if (size == `1`) {
6214	reg->s32_min_value = S8_MIN;
6215	reg->s32_max_value = S8_MAX;
6216	} else {
6217	/ size == 2 /
6218	reg->s32_min_value = S16_MIN;
6219	reg->s32_max_value = S16_MAX;
6220	}
6221	reg->u32_min_value = `0`;
6222	reg->u32_max_value = U32_MAX;
6223	}
6224
6225	static void coerce_subreg_to_size_sx(struct bpf_reg_state reg, int* size)
6226	{
6227	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6228	u32 top_smax_value, top_smin_value;
6229	u32 num_bits = size * `8`;
6230
6231	if (tnum_is_const(a: reg->var_off)) {
6232	u32_val = reg->var_off.value;
6233	if (size == `1`)
6234	reg->var_off = tnum_const(value: (s8)u32_val);
6235	else
6236	reg->var_off = tnum_const(value: (s16)u32_val);
6237
6238	u32_val = reg->var_off.value;
6239	reg->s32_min_value = reg->s32_max_value = u32_val;
6240	reg->u32_min_value = reg->u32_max_value = u32_val;
6241	return;
6242	}
6243
6244	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6245	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6246
6247	if (top_smax_value != top_smin_value)
6248	goto out;
6249
6250	/ find the s32_min and s32_min after sign extension /
6251	if (size == `1`) {
6252	init_s32_max = (s8)reg->s32_max_value;
6253	init_s32_min = (s8)reg->s32_min_value;
6254	} else {
6255	/ size == 2 /
6256	init_s32_max = (s16)reg->s32_max_value;
6257	init_s32_min = (s16)reg->s32_min_value;
6258	}
6259	s32_max = max(init_s32_max, init_s32_min);
6260	s32_min = min(init_s32_max, init_s32_min);
6261
6262	if ((s32_min >= `0`) == (s32_max >= `0`)) {
6263	reg->s32_min_value = s32_min;
6264	reg->s32_max_value = s32_max;
6265	reg->u32_min_value = (u32)s32_min;
6266	reg->u32_max_value = (u32)s32_max;
6267	return;
6268	}
6269
6270	out:
6271	set_sext32_default_val(reg, size);
6272	}
6273
6274	static bool bpf_map_is_rdonly(const struct bpf_map *map)
6275	{
6276	/ A map is considered read-only if the following condition are true:*
6277	*
6278	* 1) BPF program side cannot change any of the map content. The
6279	* BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6280	* and was set at map creation time.
6281	* 2) The map value(s) have been initialized from user space by a
6282	* loader and then "frozen", such that no new map update/delete
6283	* operations from syscall side are possible for the rest of
6284	* the map's lifetime from that point onwards.
6285	* 3) Any parallel/pending map update/delete operations from syscall
6286	* side have been completed. Only after that point, it's safe to
6287	* assume that map value(s) are immutable.
6288	*/
6289	return (map->map_flags & BPF_F_RDONLY_PROG) &&
6290	READ_ONCE(map->frozen) &&
6291	!bpf_map_write_active(map);
6292	}
6293
6294	static int bpf_map_direct_read(struct bpf_map map, int* off, int size, u64 *val,
6295	bool is_ldsx)
6296	{
6297	void *ptr;
6298	u64 addr;
6299	int err;
6300
6301	err = map->ops->map_direct_value_addr(map, &addr, off);
6302	if (err)
6303	return err;
6304	ptr = (void )(long*)addr + off;
6305
6306	switch (size) {
6307	case sizeof(u8):
6308	val = is_ldsx ? (s64)(s8 )ptr : (u64)(u8 *)ptr;
6309	break;
6310	case sizeof(u16):
6311	val = is_ldsx ? (s64)(s16 )ptr : (u64)(u16 *)ptr;
6312	break;
6313	case sizeof(u32):
6314	val = is_ldsx ? (s64)(s32 )ptr : (u64)(u32 *)ptr;
6315	break;
6316	case sizeof(u64):
6317	val = (u64 *)ptr;
6318	break;
6319	default:
6320	return -EINVAL;
6321	}
6322	return `0`;
6323	}
6324
6325	#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
6326	#define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
6327	#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
6328
6329	/*
6330	* Allow list few fields as RCU trusted or full trusted.
6331	* This logic doesn't allow mix tagging and will be removed once GCC supports
6332	* btf_type_tag.
6333	*/
6334
6335	/ RCU trusted: these fields are trusted in RCU CS and never NULL /
6336	BTF_TYPE_SAFE_RCU(struct task_struct) {
6337	const cpumask_t *cpus_ptr;
6338	struct css_set __rcu *cgroups;
6339	struct task_struct __rcu *real_parent;
6340	struct task_struct *group_leader;
6341	};
6342
6343	BTF_TYPE_SAFE_RCU(struct cgroup) {
6344	/ cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c /
6345	struct kernfs_node *kn;
6346	};
6347
6348	BTF_TYPE_SAFE_RCU(struct css_set) {
6349	struct cgroup *dfl_cgrp;
6350	};
6351
6352	/ RCU trusted: these fields are trusted in RCU CS and can be NULL /
6353	BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6354	struct file __rcu *exe_file;
6355	};
6356
6357	/ skb->sk, req->sk are not RCU protected, but we mark them as such*
6358	* because bpf prog accessible sockets are SOCK_RCU_FREE.
6359	*/
6360	BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6361	struct sock *sk;
6362	};
6363
6364	BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6365	struct sock *sk;
6366	};
6367
6368	/ full trusted: these fields are trusted even outside of RCU CS and never NULL /
6369	BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6370	struct seq_file *seq;
6371	};
6372
6373	BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6374	struct bpf_iter_meta *meta;
6375	struct task_struct *task;
6376	};
6377
6378	BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6379	struct file *file;
6380	};
6381
6382	BTF_TYPE_SAFE_TRUSTED(struct file) {
6383	struct inode *f_inode;
6384	};
6385
6386	BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6387	/ no negative dentry-s in places where bpf can see it /
6388	struct inode *d_inode;
6389	};
6390
6391	BTF_TYPE_SAFE_TRUSTED(struct socket) {
6392	struct sock *sk;
6393	};
6394
6395	static bool type_is_rcu(struct bpf_verifier_env *env,
6396	struct bpf_reg_state *reg,
6397	const char *field_name, u32 btf_id)
6398	{
6399	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6400	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6401	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6402
6403	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_rcu");
6404	}
6405
6406	static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6407	struct bpf_reg_state *reg,
6408	const char *field_name, u32 btf_id)
6409	{
6410	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6411	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6412	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6413
6414	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_rcu_or_null");
6415	}
6416
6417	static bool type_is_trusted(struct bpf_verifier_env *env,
6418	struct bpf_reg_state *reg,
6419	const char *field_name, u32 btf_id)
6420	{
6421	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6422	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6423	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6424	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6425	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6426	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
6427
6428	return btf_nested_type_is_trusted(log: &env->log, reg, field_name, btf_id, suffix: "__safe_trusted");
6429	}
6430
6431	static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6432	struct bpf_reg_state *regs,
6433	int regno, int off, int size,
6434	enum bpf_access_type atype,
6435	int value_regno)
6436	{
6437	struct bpf_reg_state *reg = regs + regno;
6438	const struct btf_type *t = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
6439	const char *tname = btf_name_by_offset(btf: reg->btf, offset: t->name_off);
6440	const char *field_name = NULL;
6441	enum bpf_type_flag flag = `0`;
6442	u32 btf_id = `0`;
6443	int ret;
6444
6445	if (!env->allow_ptr_leaks) {
6446	verbose(private_data: env,
6447	fmt: "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6448	tname);
6449	return -EPERM;
6450	}
6451	if (!env->prog->gpl_compatible && btf_is_kernel(btf: reg->btf)) {
6452	verbose(private_data: env,
6453	fmt: "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6454	tname);
6455	return -EINVAL;
6456	}
6457	if (off < `0`) {
6458	verbose(private_data: env,
6459	fmt: "R%d is ptr_%s invalid negative access: off=%d\n",
6460	regno, tname, off);
6461	return -EACCES;
6462	}
6463	if (!tnum_is_const(a: reg->var_off) \|\| reg->var_off.value) {
6464	char tn_buf[`48`];
6465
6466	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6467	verbose(private_data: env,
6468	fmt: "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6469	regno, tname, off, tn_buf);
6470	return -EACCES;
6471	}
6472
6473	if (reg->type & MEM_USER) {
6474	verbose(private_data: env,
6475	fmt: "R%d is ptr_%s access user memory: off=%d\n",
6476	regno, tname, off);
6477	return -EACCES;
6478	}
6479
6480	if (reg->type & MEM_PERCPU) {
6481	verbose(private_data: env,
6482	fmt: "R%d is ptr_%s access percpu memory: off=%d\n",
6483	regno, tname, off);
6484	return -EACCES;
6485	}
6486
6487	if (env->ops->btf_struct_access && !type_is_alloc(type: reg->type) && atype == BPF_WRITE) {
6488	if (!btf_is_kernel(btf: reg->btf)) {
6489	verbose(private_data: env, fmt: "verifier internal error: reg->btf must be kernel btf\n");
6490	return -EFAULT;
6491	}
6492	ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6493	} else {
6494	/ Writes are permitted with default btf_struct_access for*
6495	* program allocated objects (which always have ref_obj_id > 0),
6496	* but not for untrusted PTR_TO_BTF_ID \| MEM_ALLOC.
6497	*/
6498	if (atype != BPF_READ && !type_is_ptr_alloc_obj(type: reg->type)) {
6499	verbose(private_data: env, fmt: "only read is supported\n");
6500	return -EACCES;
6501	}
6502
6503	if (type_is_alloc(type: reg->type) && !type_is_non_owning_ref(type: reg->type) &&
6504	!(reg->type & MEM_RCU) && !reg->ref_obj_id) {
6505	verbose(private_data: env, fmt: "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6506	return -EFAULT;
6507	}
6508
6509	ret = btf_struct_access(log: &env->log, reg, off, size, atype, next_btf_id: &btf_id, flag: &flag, field_name: &field_name);
6510	}
6511
6512	if (ret < `0`)
6513	return ret;
6514
6515	if (ret != PTR_TO_BTF_ID) {
6516	/ just mark; /
6517
6518	} else if (type_flag(type: reg->type) & PTR_UNTRUSTED) {
6519	/ If this is an untrusted pointer, all pointers formed by walking it*
6520	* also inherit the untrusted flag.
6521	*/
6522	flag = PTR_UNTRUSTED;
6523
6524	} else if (is_trusted_reg(reg) \|\| is_rcu_reg(reg)) {
6525	/ By default any pointer obtained from walking a trusted pointer is no*
6526	* longer trusted, unless the field being accessed has explicitly been
6527	* marked as inheriting its parent's state of trust (either full or RCU).
6528	* For example:
6529	* 'cgroups' pointer is untrusted if task->cgroups dereference
6530	* happened in a sleepable program outside of bpf_rcu_read_lock()
6531	* section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6532	* Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6533	*
6534	* A regular RCU-protected pointer with __rcu tag can also be deemed
6535	* trusted if we are in an RCU CS. Such pointer can be NULL.
6536	*/
6537	if (type_is_trusted(env, reg, field_name, btf_id)) {
6538	flag \|= PTR_TRUSTED;
6539	} else if (in_rcu_cs(env) && !type_may_be_null(type: reg->type)) {
6540	if (type_is_rcu(env, reg, field_name, btf_id)) {
6541	/ ignore __rcu tag and mark it MEM_RCU /
6542	flag \|= MEM_RCU;
6543	} else if (flag & MEM_RCU \|\|
6544	type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6545	/ __rcu tagged pointers can be NULL /
6546	flag \|= MEM_RCU \| PTR_MAYBE_NULL;
6547
6548	/ We always trust them /
6549	if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6550	flag & PTR_UNTRUSTED)
6551	flag &= ~PTR_UNTRUSTED;
6552	} else if (flag & (MEM_PERCPU \| MEM_USER)) {
6553	/ keep as-is /
6554	} else {
6555	/ walking unknown pointers yields old deprecated PTR_TO_BTF_ID /
6556	clear_trusted_flags(flag: &flag);
6557	}
6558	} else {
6559	/*
6560	* If not in RCU CS or MEM_RCU pointer can be NULL then
6561	* aggressively mark as untrusted otherwise such
6562	* pointers will be plain PTR_TO_BTF_ID without flags
6563	* and will be allowed to be passed into helpers for
6564	* compat reasons.
6565	*/
6566	flag = PTR_UNTRUSTED;
6567	}
6568	} else {
6569	/ Old compat. Deprecated /
6570	clear_trusted_flags(flag: &flag);
6571	}
6572
6573	if (atype == BPF_READ && value_regno >= `0`)
6574	mark_btf_ld_reg(env, regs, regno: value_regno, reg_type: ret, btf: reg->btf, btf_id, flag);
6575
6576	return `0`;
6577	}
6578
6579	static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6580	struct bpf_reg_state *regs,
6581	int regno, int off, int size,
6582	enum bpf_access_type atype,
6583	int value_regno)
6584	{
6585	struct bpf_reg_state *reg = regs + regno;
6586	struct bpf_map *map = reg->map_ptr;
6587	struct bpf_reg_state map_reg;
6588	enum bpf_type_flag flag = `0`;
6589	const struct btf_type *t;
6590	const char *tname;
6591	u32 btf_id;
6592	int ret;
6593
6594	if (!btf_vmlinux) {
6595	verbose(private_data: env, fmt: "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6596	return -ENOTSUPP;
6597	}
6598
6599	if (!map->ops->map_btf_id \|\| !*map->ops->map_btf_id) {
6600	verbose(private_data: env, fmt: "map_ptr access not supported for map type %d\n",
6601	map->map_type);
6602	return -ENOTSUPP;
6603	}
6604
6605	t = btf_type_by_id(btf: btf_vmlinux, type_id: *map->ops->map_btf_id);
6606	tname = btf_name_by_offset(btf: btf_vmlinux, offset: t->name_off);
6607
6608	if (!env->allow_ptr_leaks) {
6609	verbose(private_data: env,
6610	fmt: "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6611	tname);
6612	return -EPERM;
6613	}
6614
6615	if (off < `0`) {
6616	verbose(private_data: env, fmt: "R%d is %s invalid negative access: off=%d\n",
6617	regno, tname, off);
6618	return -EACCES;
6619	}
6620
6621	if (atype != BPF_READ) {
6622	verbose(private_data: env, fmt: "only read from %s is supported\n", tname);
6623	return -EACCES;
6624	}
6625
6626	/ Simulate access to a PTR_TO_BTF_ID /
6627	memset(&map_reg, `0`, sizeof(map_reg));
6628	mark_btf_ld_reg(env, regs: &map_reg, regno: `0`, reg_type: PTR_TO_BTF_ID, btf: btf_vmlinux, btf_id: *map->ops->map_btf_id, flag: `0`);
6629	ret = btf_struct_access(log: &env->log, reg: &map_reg, off, size, atype, next_btf_id: &btf_id, flag: &flag, NULL);
6630	if (ret < `0`)
6631	return ret;
6632
6633	if (value_regno >= `0`)
6634	mark_btf_ld_reg(env, regs, regno: value_regno, reg_type: ret, btf: btf_vmlinux, btf_id, flag);
6635
6636	return `0`;
6637	}
6638
6639	/ Check that the stack access at the given offset is within bounds. The*
6640	* maximum valid offset is -1.
6641	*
6642	* The minimum valid offset is -MAX_BPF_STACK for writes, and
6643	* -state->allocated_stack for reads.
6644	*/
6645	static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
6646	s64 off,
6647	struct bpf_func_state *state,
6648	enum bpf_access_type t)
6649	{
6650	int min_valid_off;
6651
6652	if (t == BPF_WRITE \|\| env->allow_uninit_stack)
6653	min_valid_off = -MAX_BPF_STACK;
6654	else
6655	min_valid_off = -state->allocated_stack;
6656
6657	if (off < min_valid_off \|\| off > -`1`)
6658	return -EACCES;
6659	return `0`;
6660	}
6661
6662	/ Check that the stack access at 'regno + off' falls within the maximum stack*
6663	* bounds.
6664	*
6665	* 'off' includes `regno->offset`, but not its dynamic part (if any).
6666	*/
6667	static int check_stack_access_within_bounds(
6668	struct bpf_verifier_env *env,
6669	int regno, int off, int access_size,
6670	enum bpf_access_src src, enum bpf_access_type type)
6671	{
6672	struct bpf_reg_state *regs = cur_regs(env);
6673	struct bpf_reg_state *reg = regs + regno;
6674	struct bpf_func_state *state = func(env, reg);
6675	s64 min_off, max_off;
6676	int err;
6677	char *err_extra;
6678
6679	if (src == ACCESS_HELPER)
6680	/ We don't know if helpers are reading or writing (or both). /
6681	err_extra = " indirect access to";
6682	else if (type == BPF_READ)
6683	err_extra = " read from";
6684	else
6685	err_extra = " write to";
6686
6687	if (tnum_is_const(a: reg->var_off)) {
6688	min_off = (s64)reg->var_off.value + off;
6689	max_off = min_off + access_size;
6690	} else {
6691	if (reg->smax_value >= BPF_MAX_VAR_OFF \|\|
6692	reg->smin_value <= -BPF_MAX_VAR_OFF) {
6693	verbose(private_data: env, fmt: "invalid unbounded variable-offset%s stack R%d\n",
6694	err_extra, regno);
6695	return -EACCES;
6696	}
6697	min_off = reg->smin_value + off;
6698	max_off = reg->smax_value + off + access_size;
6699	}
6700
6701	err = check_stack_slot_within_bounds(env, off: min_off, state, t: type);
6702	if (!err && max_off > `0`)
6703	err = -EINVAL; / out of stack access into non-negative offsets /
6704	if (!err && access_size < `0`)
6705	/ access_size should not be negative (or overflow an int); others checks*
6706	* along the way should have prevented such an access.
6707	*/
6708	err = -EFAULT; / invalid negative access size; integer overflow? /
6709
6710	if (err) {
6711	if (tnum_is_const(a: reg->var_off)) {
6712	verbose(private_data: env, fmt: "invalid%s stack R%d off=%d size=%d\n",
6713	err_extra, regno, off, access_size);
6714	} else {
6715	char tn_buf[`48`];
6716
6717	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
6718	verbose(private_data: env, fmt: "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
6719	err_extra, regno, tn_buf, off, access_size);
6720	}
6721	return err;
6722	}
6723
6724	/ Note that there is no stack access with offset zero, so the needed stack*
6725	* size is -min_off, not -min_off+1.
6726	*/
6727	return grow_stack_state(env, state, size: -min_off / size /);
6728	}
6729
6730	/ check whether memory at (regno + off) is accessible for t = (read \| write)*
6731	* if t==write, value_regno is a register which value is stored into memory
6732	* if t==read, value_regno is a register which will receive the value from memory
6733	* if t==write && value_regno==-1, some unknown value is stored into memory
6734	* if t==read && value_regno==-1, don't care what we read from memory
6735	*/
6736	static int check_mem_access(struct bpf_verifier_env env, int* insn_idx, u32 regno,
6737	int off, int bpf_size, enum bpf_access_type t,
6738	int value_regno, bool strict_alignment_once, bool is_ldsx)
6739	{
6740	struct bpf_reg_state *regs = cur_regs(env);
6741	struct bpf_reg_state *reg = regs + regno;
6742	int size, err = `0`;
6743
6744	size = bpf_size_to_bytes(bpf_size);
6745	if (size < `0`)
6746	return size;
6747
6748	/ alignment checks will add in reg->off themselves /
6749	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6750	if (err)
6751	return err;
6752
6753	/ for access checks, reg->off is just part of off /
6754	off += reg->off;
6755
6756	if (reg->type == PTR_TO_MAP_KEY) {
6757	if (t == BPF_WRITE) {
6758	verbose(private_data: env, fmt: "write to change key R%d not allowed\n", regno);
6759	return -EACCES;
6760	}
6761
6762	err = check_mem_region_access(env, regno, off, size,
6763	mem_size: reg->map_ptr->key_size, zero_size_allowed: false);
6764	if (err)
6765	return err;
6766	if (value_regno >= `0`)
6767	mark_reg_unknown(env, regs, regno: value_regno);
6768	} else if (reg->type == PTR_TO_MAP_VALUE) {
6769	struct btf_field *kptr_field = NULL;
6770
6771	if (t == BPF_WRITE && value_regno >= `0` &&
6772	is_pointer_value(env, regno: value_regno)) {
6773	verbose(private_data: env, fmt: "R%d leaks addr into map\n", value_regno);
6774	return -EACCES;
6775	}
6776	err = check_map_access_type(env, regno, off, size, type: t);
6777	if (err)
6778	return err;
6779	err = check_map_access(env, regno, off, size, zero_size_allowed: false, src: ACCESS_DIRECT);
6780	if (err)
6781	return err;
6782	if (tnum_is_const(a: reg->var_off))
6783	kptr_field = btf_record_find(rec: reg->map_ptr->record,
6784	offset: off + reg->var_off.value, field_mask: BPF_KPTR);
6785	if (kptr_field) {
6786	err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6787	} else if (t == BPF_READ && value_regno >= `0`) {
6788	struct bpf_map *map = reg->map_ptr;
6789
6790	/ if map is read-only, track its contents as scalars /
6791	if (tnum_is_const(a: reg->var_off) &&
6792	bpf_map_is_rdonly(map) &&
6793	map->ops->map_direct_value_addr) {
6794	int map_off = off + reg->var_off.value;
6795	u64 val = `0`;
6796
6797	err = bpf_map_direct_read(map, off: map_off, size,
6798	val: &val, is_ldsx);
6799	if (err)
6800	return err;
6801
6802	regs[value_regno].type = SCALAR_VALUE;
6803	__mark_reg_known(reg: &regs[value_regno], imm: val);
6804	} else {
6805	mark_reg_unknown(env, regs, regno: value_regno);
6806	}
6807	}
6808	} else if (base_type(type: reg->type) == PTR_TO_MEM) {
6809	bool rdonly_mem = type_is_rdonly_mem(type: reg->type);
6810
6811	if (type_may_be_null(type: reg->type)) {
6812	verbose(private_data: env, fmt: "R%d invalid mem access '%s'\n", regno,
6813	reg_type_str(env, type: reg->type));
6814	return -EACCES;
6815	}
6816
6817	if (t == BPF_WRITE && rdonly_mem) {
6818	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
6819	regno, reg_type_str(env, type: reg->type));
6820	return -EACCES;
6821	}
6822
6823	if (t == BPF_WRITE && value_regno >= `0` &&
6824	is_pointer_value(env, regno: value_regno)) {
6825	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", value_regno);
6826	return -EACCES;
6827	}
6828
6829	err = check_mem_region_access(env, regno, off, size,
6830	mem_size: reg->mem_size, zero_size_allowed: false);
6831	if (!err && value_regno >= `0` && (t == BPF_READ \|\| rdonly_mem))
6832	mark_reg_unknown(env, regs, regno: value_regno);
6833	} else if (reg->type == PTR_TO_CTX) {
6834	enum bpf_reg_type reg_type = SCALAR_VALUE;
6835	struct btf *btf = NULL;
6836	u32 btf_id = `0`;
6837
6838	if (t == BPF_WRITE && value_regno >= `0` &&
6839	is_pointer_value(env, regno: value_regno)) {
6840	verbose(private_data: env, fmt: "R%d leaks addr into ctx\n", value_regno);
6841	return -EACCES;
6842	}
6843
6844	err = check_ptr_off_reg(env, reg, regno);
6845	if (err < `0`)
6846	return err;
6847
6848	err = check_ctx_access(env, insn_idx, off, size, t, reg_type: &reg_type, btf: &btf,
6849	btf_id: &btf_id);
6850	if (err)
6851	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
6852	if (!err && t == BPF_READ && value_regno >= `0`) {
6853	/ ctx access returns either a scalar, or a*
6854	* PTR_TO_PACKET[_META,_END]. In the latter
6855	* case, we know the offset is zero.
6856	*/
6857	if (reg_type == SCALAR_VALUE) {
6858	mark_reg_unknown(env, regs, regno: value_regno);
6859	} else {
6860	mark_reg_known_zero(env, regs,
6861	regno: value_regno);
6862	if (type_may_be_null(type: reg_type))
6863	regs[value_regno].id = ++env->id_gen;
6864	/ A load of ctx field could have different*
6865	* actual load size with the one encoded in the
6866	* insn. When the dst is PTR, it is for sure not
6867	* a sub-register.
6868	*/
6869	regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6870	if (base_type(type: reg_type) == PTR_TO_BTF_ID) {
6871	regs[value_regno].btf = btf;
6872	regs[value_regno].btf_id = btf_id;
6873	}
6874	}
6875	regs[value_regno].type = reg_type;
6876	}
6877
6878	} else if (reg->type == PTR_TO_STACK) {
6879	/ Basic bounds checks. /
6880	err = check_stack_access_within_bounds(env, regno, off, access_size: size, src: ACCESS_DIRECT, type: t);
6881	if (err)
6882	return err;
6883
6884	if (t == BPF_READ)
6885	err = check_stack_read(env, ptr_regno: regno, off, size,
6886	dst_regno: value_regno);
6887	else
6888	err = check_stack_write(env, ptr_regno: regno, off, size,
6889	value_regno, insn_idx);
6890	} else if (reg_is_pkt_pointer(reg)) {
6891	if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6892	verbose(private_data: env, fmt: "cannot write into packet\n");
6893	return -EACCES;
6894	}
6895	if (t == BPF_WRITE && value_regno >= `0` &&
6896	is_pointer_value(env, regno: value_regno)) {
6897	verbose(private_data: env, fmt: "R%d leaks addr into packet\n",
6898	value_regno);
6899	return -EACCES;
6900	}
6901	err = check_packet_access(env, regno, off, size, zero_size_allowed: false);
6902	if (!err && t == BPF_READ && value_regno >= `0`)
6903	mark_reg_unknown(env, regs, regno: value_regno);
6904	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6905	if (t == BPF_WRITE && value_regno >= `0` &&
6906	is_pointer_value(env, regno: value_regno)) {
6907	verbose(private_data: env, fmt: "R%d leaks addr into flow keys\n",
6908	value_regno);
6909	return -EACCES;
6910	}
6911
6912	err = check_flow_keys_access(env, off, size);
6913	if (!err && t == BPF_READ && value_regno >= `0`)
6914	mark_reg_unknown(env, regs, regno: value_regno);
6915	} else if (type_is_sk_pointer(type: reg->type)) {
6916	if (t == BPF_WRITE) {
6917	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
6918	regno, reg_type_str(env, type: reg->type));
6919	return -EACCES;
6920	}
6921	err = check_sock_access(env, insn_idx, regno, off, size, t);
6922	if (!err && value_regno >= `0`)
6923	mark_reg_unknown(env, regs, regno: value_regno);
6924	} else if (reg->type == PTR_TO_TP_BUFFER) {
6925	err = check_tp_buffer_access(env, reg, regno, off, size);
6926	if (!err && t == BPF_READ && value_regno >= `0`)
6927	mark_reg_unknown(env, regs, regno: value_regno);
6928	} else if (base_type(type: reg->type) == PTR_TO_BTF_ID &&
6929	!type_may_be_null(type: reg->type)) {
6930	err = check_ptr_to_btf_access(env, regs, regno, off, size, atype: t,
6931	value_regno);
6932	} else if (reg->type == CONST_PTR_TO_MAP) {
6933	err = check_ptr_to_map_access(env, regs, regno, off, size, atype: t,
6934	value_regno);
6935	} else if (base_type(type: reg->type) == PTR_TO_BUF) {
6936	bool rdonly_mem = type_is_rdonly_mem(type: reg->type);
6937	u32 *max_access;
6938
6939	if (rdonly_mem) {
6940	if (t == BPF_WRITE) {
6941	verbose(private_data: env, fmt: "R%d cannot write into %s\n",
6942	regno, reg_type_str(env, type: reg->type));
6943	return -EACCES;
6944	}
6945	max_access = &env->prog->aux->max_rdonly_access;
6946	} else {
6947	max_access = &env->prog->aux->max_rdwr_access;
6948	}
6949
6950	err = check_buffer_access(env, reg, regno, off, size, zero_size_allowed: false,
6951	max_access);
6952
6953	if (!err && value_regno >= `0` && (rdonly_mem \|\| t == BPF_READ))
6954	mark_reg_unknown(env, regs, regno: value_regno);
6955	} else if (reg->type == PTR_TO_ARENA) {
6956	if (t == BPF_READ && value_regno >= `0`)
6957	mark_reg_unknown(env, regs, regno: value_regno);
6958	} else {
6959	verbose(private_data: env, fmt: "R%d invalid mem access '%s'\n", regno,
6960	reg_type_str(env, type: reg->type));
6961	return -EACCES;
6962	}
6963
6964	if (!err && size < BPF_REG_SIZE && value_regno >= `0` && t == BPF_READ &&
6965	regs[value_regno].type == SCALAR_VALUE) {
6966	if (!is_ldsx)
6967	/ b/h/w load zero-extends, mark upper bits as known 0 /
6968	coerce_reg_to_size(reg: &regs[value_regno], size);
6969	else
6970	coerce_reg_to_size_sx(reg: &regs[value_regno], size);
6971	}
6972	return err;
6973	}
6974
6975	static int check_atomic(struct bpf_verifier_env env, int* insn_idx, struct bpf_insn *insn)
6976	{
6977	int load_reg;
6978	int err;
6979
6980	switch (insn->imm) {
6981	case BPF_ADD:
6982	case BPF_ADD \| BPF_FETCH:
6983	case BPF_AND:
6984	case BPF_AND \| BPF_FETCH:
6985	case BPF_OR:
6986	case BPF_OR \| BPF_FETCH:
6987	case BPF_XOR:
6988	case BPF_XOR \| BPF_FETCH:
6989	case BPF_XCHG:
6990	case BPF_CMPXCHG:
6991	break;
6992	default:
6993	verbose(private_data: env, fmt: "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
6994	return -EINVAL;
6995	}
6996
6997	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6998	verbose(private_data: env, fmt: "invalid atomic operand size\n");
6999	return -EINVAL;
7000	}
7001
7002	/ check src1 operand /
7003	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
7004	if (err)
7005	return err;
7006
7007	/ check src2 operand /
7008	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
7009	if (err)
7010	return err;
7011
7012	if (insn->imm == BPF_CMPXCHG) {
7013	/ Check comparison of R0 with memory location /
7014	const u32 aux_reg = BPF_REG_0;
7015
7016	err = check_reg_arg(env, regno: aux_reg, t: SRC_OP);
7017	if (err)
7018	return err;
7019
7020	if (is_pointer_value(env, regno: aux_reg)) {
7021	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", aux_reg);
7022	return -EACCES;
7023	}
7024	}
7025
7026	if (is_pointer_value(env, regno: insn->src_reg)) {
7027	verbose(private_data: env, fmt: "R%d leaks addr into mem\n", insn->src_reg);
7028	return -EACCES;
7029	}
7030
7031	if (is_ctx_reg(env, regno: insn->dst_reg) \|\|
7032	is_pkt_reg(env, regno: insn->dst_reg) \|\|
7033	is_flow_key_reg(env, regno: insn->dst_reg) \|\|
7034	is_sk_reg(env, regno: insn->dst_reg) \|\|
7035	is_arena_reg(env, regno: insn->dst_reg)) {
7036	verbose(private_data: env, fmt: "BPF_ATOMIC stores into R%d %s is not allowed\n",
7037	insn->dst_reg,
7038	reg_type_str(env, type: reg_state(env, regno: insn->dst_reg)->type));
7039	return -EACCES;
7040	}
7041
7042	if (insn->imm & BPF_FETCH) {
7043	if (insn->imm == BPF_CMPXCHG)
7044	load_reg = BPF_REG_0;
7045	else
7046	load_reg = insn->src_reg;
7047
7048	/ check and record load of old value /
7049	err = check_reg_arg(env, regno: load_reg, t: DST_OP);
7050	if (err)
7051	return err;
7052	} else {
7053	/ This instruction accesses a memory location but doesn't*
7054	* actually load it into a register.
7055	*/
7056	load_reg = -`1`;
7057	}
7058
7059	/ Check whether we can read the memory, with second call for fetch*
7060	* case to simulate the register fill.
7061	*/
7062	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7063	BPF_SIZE(insn->code), t: BPF_READ, value_regno: -`1`, strict_alignment_once: true, is_ldsx: false);
7064	if (!err && load_reg >= `0`)
7065	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7066	BPF_SIZE(insn->code), t: BPF_READ, value_regno: load_reg,
7067	strict_alignment_once: true, is_ldsx: false);
7068	if (err)
7069	return err;
7070
7071	/ Check whether we can write into the same memory. /
7072	err = check_mem_access(env, insn_idx, regno: insn->dst_reg, off: insn->off,
7073	BPF_SIZE(insn->code), t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: true, is_ldsx: false);
7074	if (err)
7075	return err;
7076	return `0`;
7077	}
7078
7079	/ When register 'regno' is used to read the stack (either directly or through*
7080	* a helper function) make sure that it's within stack boundary and, depending
7081	* on the access type and privileges, that all elements of the stack are
7082	* initialized.
7083	*
7084	* 'off' includes 'regno->off', but not its dynamic part (if any).
7085	*
7086	* All registers that have been spilled on the stack in the slots within the
7087	* read offsets are marked as read.
7088	*/
7089	static int check_stack_range_initialized(
7090	struct bpf_verifier_env env, int* regno, int off,
7091	int access_size, bool zero_size_allowed,
7092	enum bpf_access_src type, struct bpf_call_arg_meta *meta)
7093	{
7094	struct bpf_reg_state *reg = reg_state(env, regno);
7095	struct bpf_func_state *state = func(env, reg);
7096	int err, min_off, max_off, i, j, slot, spi;
7097	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
7098	enum bpf_access_type bounds_check_type;
7099	/ Some accesses can write anything into the stack, others are*
7100	* read-only.
7101	*/
7102	bool clobber = false;
7103
7104	if (access_size == `0` && !zero_size_allowed) {
7105	verbose(private_data: env, fmt: "invalid zero-sized read\n");
7106	return -EACCES;
7107	}
7108
7109	if (type == ACCESS_HELPER) {
7110	/ The bounds checks for writes are more permissive than for*
7111	* reads. However, if raw_mode is not set, we'll do extra
7112	* checks below.
7113	*/
7114	bounds_check_type = BPF_WRITE;
7115	clobber = true;
7116	} else {
7117	bounds_check_type = BPF_READ;
7118	}
7119	err = check_stack_access_within_bounds(env, regno, off, access_size,
7120	src: type, type: bounds_check_type);
7121	if (err)
7122	return err;
7123
7124
7125	if (tnum_is_const(a: reg->var_off)) {
7126	min_off = max_off = reg->var_off.value + off;
7127	} else {
7128	/ Variable offset is prohibited for unprivileged mode for*
7129	* simplicity since it requires corresponding support in
7130	* Spectre masking for stack ALU.
7131	* See also retrieve_ptr_limit().
7132	*/
7133	if (!env->bypass_spec_v1) {
7134	char tn_buf[`48`];
7135
7136	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
7137	verbose(private_data: env, fmt: "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7138	regno, err_extra, tn_buf);
7139	return -EACCES;
7140	}
7141	/ Only initialized buffer on stack is allowed to be accessed*
7142	* with variable offset. With uninitialized buffer it's hard to
7143	* guarantee that whole memory is marked as initialized on
7144	* helper return since specific bounds are unknown what may
7145	* cause uninitialized stack leaking.
7146	*/
7147	if (meta && meta->raw_mode)
7148	meta = NULL;
7149
7150	min_off = reg->smin_value + off;
7151	max_off = reg->smax_value + off;
7152	}
7153
7154	if (meta && meta->raw_mode) {
7155	/ Ensure we won't be overwriting dynptrs when simulating byte*
7156	* by byte access in check_helper_call using meta.access_size.
7157	* This would be a problem if we have a helper in the future
7158	* which takes:
7159	*
7160	* helper(uninit_mem, len, dynptr)
7161	*
7162	* Now, uninint_mem may overlap with dynptr pointer. Hence, it
7163	* may end up writing to dynptr itself when touching memory from
7164	* arg 1. This can be relaxed on a case by case basis for known
7165	* safe cases, but reject due to the possibilitiy of aliasing by
7166	* default.
7167	*/
7168	for (i = min_off; i < max_off + access_size; i++) {
7169	int stack_off = -i - `1`;
7170
7171	spi = __get_spi(off: i);
7172	/ raw_mode may write past allocated_stack /
7173	if (state->allocated_stack <= stack_off)
7174	continue;
7175	if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7176	verbose(private_data: env, fmt: "potential write to dynptr at off=%d disallowed\n", i);
7177	return -EACCES;
7178	}
7179	}
7180	meta->access_size = access_size;
7181	meta->regno = regno;
7182	return `0`;
7183	}
7184
7185	for (i = min_off; i < max_off + access_size; i++) {
7186	u8 *stype;
7187
7188	slot = -i - `1`;
7189	spi = slot / BPF_REG_SIZE;
7190	if (state->allocated_stack <= slot) {
7191	verbose(private_data: env, fmt: "verifier bug: allocated_stack too small");
7192	return -EFAULT;
7193	}
7194
7195	stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7196	if (*stype == STACK_MISC)
7197	goto mark;
7198	if ((*stype == STACK_ZERO) \|\|
7199	(*stype == STACK_INVALID && env->allow_uninit_stack)) {
7200	if (clobber) {
7201	/ helper can write anything into the stack /
7202	*stype = STACK_MISC;
7203	}
7204	goto mark;
7205	}
7206
7207	if (is_spilled_reg(stack: &state->stack[spi]) &&
7208	(state->stack[spi].spilled_ptr.type == SCALAR_VALUE \|\|
7209	env->allow_ptr_leaks)) {
7210	if (clobber) {
7211	__mark_reg_unknown(env, reg: &state->stack[spi].spilled_ptr);
7212	for (j = `0`; j < BPF_REG_SIZE; j++)
7213	scrub_spilled_slot(stype: &state->stack[spi].slot_type[j]);
7214	}
7215	goto mark;
7216	}
7217
7218	if (tnum_is_const(a: reg->var_off)) {
7219	verbose(private_data: env, fmt: "invalid%s read from stack R%d off %d+%d size %d\n",
7220	err_extra, regno, min_off, i - min_off, access_size);
7221	} else {
7222	char tn_buf[`48`];
7223
7224	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
7225	verbose(private_data: env, fmt: "invalid%s read from stack R%d var_off %s+%d size %d\n",
7226	err_extra, regno, tn_buf, i - min_off, access_size);
7227	}
7228	return -EACCES;
7229	mark:
7230	/ reading any byte out of 8-byte 'spill_slot' will cause*
7231	* the whole slot to be marked as 'read'
7232	*/
7233	mark_reg_read(env, state: &state->stack[spi].spilled_ptr,
7234	parent: state->stack[spi].spilled_ptr.parent,
7235	flag: REG_LIVE_READ64);
7236	/ We do not set REG_LIVE_WRITTEN for stack slot, as we can not*
7237	* be sure that whether stack slot is written to or not. Hence,
7238	* we must still conservatively propagate reads upwards even if
7239	* helper may write to the entire memory range.
7240	*/
7241	}
7242	return `0`;
7243	}
7244
7245	static int check_helper_mem_access(struct bpf_verifier_env env, int* regno,
7246	int access_size, bool zero_size_allowed,
7247	struct bpf_call_arg_meta *meta)
7248	{
7249	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7250	u32 *max_access;
7251
7252	switch (base_type(type: reg->type)) {
7253	case PTR_TO_PACKET:
7254	case PTR_TO_PACKET_META:
7255	return check_packet_access(env, regno, off: reg->off, size: access_size,
7256	zero_size_allowed);
7257	case PTR_TO_MAP_KEY:
7258	if (meta && meta->raw_mode) {
7259	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7260	reg_type_str(env, type: reg->type));
7261	return -EACCES;
7262	}
7263	return check_mem_region_access(env, regno, off: reg->off, size: access_size,
7264	mem_size: reg->map_ptr->key_size, zero_size_allowed: false);
7265	case PTR_TO_MAP_VALUE:
7266	if (check_map_access_type(env, regno, off: reg->off, size: access_size,
7267	type: meta && meta->raw_mode ? BPF_WRITE :
7268	BPF_READ))
7269	return -EACCES;
7270	return check_map_access(env, regno, off: reg->off, size: access_size,
7271	zero_size_allowed, src: ACCESS_HELPER);
7272	case PTR_TO_MEM:
7273	if (type_is_rdonly_mem(type: reg->type)) {
7274	if (meta && meta->raw_mode) {
7275	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7276	reg_type_str(env, type: reg->type));
7277	return -EACCES;
7278	}
7279	}
7280	return check_mem_region_access(env, regno, off: reg->off,
7281	size: access_size, mem_size: reg->mem_size,
7282	zero_size_allowed);
7283	case PTR_TO_BUF:
7284	if (type_is_rdonly_mem(type: reg->type)) {
7285	if (meta && meta->raw_mode) {
7286	verbose(private_data: env, fmt: "R%d cannot write into %s\n", regno,
7287	reg_type_str(env, type: reg->type));
7288	return -EACCES;
7289	}
7290
7291	max_access = &env->prog->aux->max_rdonly_access;
7292	} else {
7293	max_access = &env->prog->aux->max_rdwr_access;
7294	}
7295	return check_buffer_access(env, reg, regno, off: reg->off,
7296	size: access_size, zero_size_allowed,
7297	max_access);
7298	case PTR_TO_STACK:
7299	return check_stack_range_initialized(
7300	env,
7301	regno, off: reg->off, access_size,
7302	zero_size_allowed, type: ACCESS_HELPER, meta);
7303	case PTR_TO_BTF_ID:
7304	return check_ptr_to_btf_access(env, regs, regno, off: reg->off,
7305	size: access_size, atype: BPF_READ, value_regno: -`1`);
7306	case PTR_TO_CTX:
7307	/ in case the function doesn't know how to access the context,*
7308	* (because we are in a program of type SYSCALL for example), we
7309	* can not statically check its size.
7310	* Dynamically check it now.
7311	*/
7312	if (!env->ops->convert_ctx_access) {
7313	enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
7314	int offset = access_size - `1`;
7315
7316	/ Allow zero-byte read from PTR_TO_CTX /
7317	if (access_size == `0`)
7318	return zero_size_allowed ? `0` : -EACCES;
7319
7320	return check_mem_access(env, insn_idx: env->insn_idx, regno, off: offset, BPF_B,
7321	t: atype, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7322	}
7323
7324	fallthrough;
7325	default: / scalar_value or invalid ptr /
7326	/ Allow zero-byte read from NULL, regardless of pointer type /
7327	if (zero_size_allowed && access_size == `0` &&
7328	register_is_null(reg))
7329	return `0`;
7330
7331	verbose(private_data: env, fmt: "R%d type=%s ", regno,
7332	reg_type_str(env, type: reg->type));
7333	verbose(private_data: env, fmt: "expected=%s\n", reg_type_str(env, type: PTR_TO_STACK));
7334	return -EACCES;
7335	}
7336	}
7337
7338	/ verify arguments to helpers or kfuncs consisting of a pointer and an access*
7339	* size.
7340	*
7341	* @regno is the register containing the access size. regno-1 is the register
7342	* containing the pointer.
7343	*/
7344	static int check_mem_size_reg(struct bpf_verifier_env *env,
7345	struct bpf_reg_state *reg, u32 regno,
7346	bool zero_size_allowed,
7347	struct bpf_call_arg_meta *meta)
7348	{
7349	int err;
7350
7351	/ This is used to refine r0 return value bounds for helpers*
7352	* that enforce this value as an upper bound on return values.
7353	* See do_refine_retval_range() for helpers that can refine
7354	* the return value. C type of helper is u32 so we pull register
7355	* bound from umax_value however, if negative verifier errors
7356	* out. Only upper bounds can be learned because retval is an
7357	* int type and negative retvals are allowed.
7358	*/
7359	meta->msize_max_value = reg->umax_value;
7360
7361	/ The register is SCALAR_VALUE; the access check*
7362	* happens using its boundaries.
7363	*/
7364	if (!tnum_is_const(a: reg->var_off))
7365	/ For unprivileged variable accesses, disable raw*
7366	* mode so that the program is required to
7367	* initialize all the memory that the helper could
7368	* just partially fill up.
7369	*/
7370	meta = NULL;
7371
7372	if (reg->smin_value < `0`) {
7373	verbose(private_data: env, fmt: "R%d min value is negative, either use unsigned or 'var &= const'\n",
7374	regno);
7375	return -EACCES;
7376	}
7377
7378	if (reg->umin_value == `0` && !zero_size_allowed) {
7379	verbose(private_data: env, fmt: "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
7380	regno, reg->umin_value, reg->umax_value);
7381	return -EACCES;
7382	}
7383
7384	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7385	verbose(private_data: env, fmt: "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7386	regno);
7387	return -EACCES;
7388	}
7389	err = check_helper_mem_access(env, regno: regno - `1`,
7390	access_size: reg->umax_value,
7391	zero_size_allowed, meta);
7392	if (!err)
7393	err = mark_chain_precision(env, regno);
7394	return err;
7395	}
7396
7397	static int check_mem_reg(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
7398	u32 regno, u32 mem_size)
7399	{
7400	bool may_be_null = type_may_be_null(type: reg->type);
7401	struct bpf_reg_state saved_reg;
7402	struct bpf_call_arg_meta meta;
7403	int err;
7404
7405	if (register_is_null(reg))
7406	return `0`;
7407
7408	memset(&meta, `0`, sizeof(meta));
7409	/ Assuming that the register contains a value check if the memory*
7410	* access is safe. Temporarily save and restore the register's state as
7411	* the conversion shouldn't be visible to a caller.
7412	*/
7413	if (may_be_null) {
7414	saved_reg = *reg;
7415	mark_ptr_not_null_reg(reg);
7416	}
7417
7418	err = check_helper_mem_access(env, regno, access_size: mem_size, zero_size_allowed: true, meta: &meta);
7419	/ Check access for BPF_WRITE /
7420	meta.raw_mode = true;
7421	err = err ?: check_helper_mem_access(env, regno, access_size: mem_size, zero_size_allowed: true, meta: &meta);
7422
7423	if (may_be_null)
7424	*reg = saved_reg;
7425
7426	return err;
7427	}
7428
7429	static int check_kfunc_mem_size_reg(struct bpf_verifier_env env, struct* bpf_reg_state *reg,
7430	u32 regno)
7431	{
7432	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - `1`];
7433	bool may_be_null = type_may_be_null(type: mem_reg->type);
7434	struct bpf_reg_state saved_reg;
7435	struct bpf_call_arg_meta meta;
7436	int err;
7437
7438	WARN_ON_ONCE(regno < BPF_REG_2 \|\| regno > BPF_REG_5);
7439
7440	memset(&meta, `0`, sizeof(meta));
7441
7442	if (may_be_null) {
7443	saved_reg = *mem_reg;
7444	mark_ptr_not_null_reg(reg: mem_reg);
7445	}
7446
7447	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta: &meta);
7448	/ Check access for BPF_WRITE /
7449	meta.raw_mode = true;
7450	err = err ?: check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta: &meta);
7451
7452	if (may_be_null)
7453	*mem_reg = saved_reg;
7454	return err;
7455	}
7456
7457	/ Implementation details:*
7458	* bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7459	* bpf_obj_new returns PTR_TO_BTF_ID \| MEM_ALLOC \| PTR_MAYBE_NULL.
7460	* Two bpf_map_lookups (even with the same key) will have different reg->id.
7461	* Two separate bpf_obj_new will also have different reg->id.
7462	* For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID \| MEM_ALLOC, the verifier
7463	* clears reg->id after value_or_null->value transition, since the verifier only
7464	* cares about the range of access to valid map value pointer and doesn't care
7465	* about actual address of the map element.
7466	* For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7467	* reg->id > 0 after value_or_null->value transition. By doing so
7468	* two bpf_map_lookups will be considered two different pointers that
7469	* point to different bpf_spin_locks. Likewise for pointers to allocated objects
7470	* returned from bpf_obj_new.
7471	* The verifier allows taking only one bpf_spin_lock at a time to avoid
7472	* dead-locks.
7473	* Since only one bpf_spin_lock is allowed the checks are simpler than
7474	* reg_is_refcounted() logic. The verifier needs to remember only
7475	* one spin_lock instead of array of acquired_refs.
7476	* cur_state->active_lock remembers which map value element or allocated
7477	* object got locked and clears it after bpf_spin_unlock.
7478	*/
7479	static int process_spin_lock(struct bpf_verifier_env env, int* regno,
7480	bool is_lock)
7481	{
7482	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7483	struct bpf_verifier_state *cur = env->cur_state;
7484	bool is_const = tnum_is_const(a: reg->var_off);
7485	u64 val = reg->var_off.value;
7486	struct bpf_map *map = NULL;
7487	struct btf *btf = NULL;
7488	struct btf_record *rec;
7489
7490	if (!is_const) {
7491	verbose(private_data: env,
7492	fmt: "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7493	regno);
7494	return -EINVAL;
7495	}
7496	if (reg->type == PTR_TO_MAP_VALUE) {
7497	map = reg->map_ptr;
7498	if (!map->btf) {
7499	verbose(private_data: env,
7500	fmt: "map '%s' has to have BTF in order to use bpf_spin_lock\n",
7501	map->name);
7502	return -EINVAL;
7503	}
7504	} else {
7505	btf = reg->btf;
7506	}
7507
7508	rec = reg_btf_record(reg);
7509	if (!btf_record_has_field(rec, type: BPF_SPIN_LOCK)) {
7510	verbose(private_data: env, fmt: "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
7511	map ? map->name : "kptr");
7512	return -EINVAL;
7513	}
7514	if (rec->spin_lock_off != val + reg->off) {
7515	verbose(private_data: env, fmt: "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7516	val + reg->off, rec->spin_lock_off);
7517	return -EINVAL;
7518	}
7519	if (is_lock) {
7520	if (cur->active_lock.ptr) {
7521	verbose(private_data: env,
7522	fmt: "Locking two bpf_spin_locks are not allowed\n");
7523	return -EINVAL;
7524	}
7525	if (map)
7526	cur->active_lock.ptr = map;
7527	else
7528	cur->active_lock.ptr = btf;
7529	cur->active_lock.id = reg->id;
7530	} else {
7531	void *ptr;
7532
7533	if (map)
7534	ptr = map;
7535	else
7536	ptr = btf;
7537
7538	if (!cur->active_lock.ptr) {
7539	verbose(private_data: env, fmt: "bpf_spin_unlock without taking a lock\n");
7540	return -EINVAL;
7541	}
7542	if (cur->active_lock.ptr != ptr \|\|
7543	cur->active_lock.id != reg->id) {
7544	verbose(private_data: env, fmt: "bpf_spin_unlock of different lock\n");
7545	return -EINVAL;
7546	}
7547
7548	invalidate_non_owning_refs(env);
7549
7550	cur->active_lock.ptr = NULL;
7551	cur->active_lock.id = `0`;
7552	}
7553	return `0`;
7554	}
7555
7556	static int process_timer_func(struct bpf_verifier_env env, int* regno,
7557	struct bpf_call_arg_meta *meta)
7558	{
7559	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7560	bool is_const = tnum_is_const(a: reg->var_off);
7561	struct bpf_map *map = reg->map_ptr;
7562	u64 val = reg->var_off.value;
7563
7564	if (!is_const) {
7565	verbose(private_data: env,
7566	fmt: "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7567	regno);
7568	return -EINVAL;
7569	}
7570	if (!map->btf) {
7571	verbose(private_data: env, fmt: "map '%s' has to have BTF in order to use bpf_timer\n",
7572	map->name);
7573	return -EINVAL;
7574	}
7575	if (!btf_record_has_field(rec: map->record, type: BPF_TIMER)) {
7576	verbose(private_data: env, fmt: "map '%s' has no valid bpf_timer\n", map->name);
7577	return -EINVAL;
7578	}
7579	if (map->record->timer_off != val + reg->off) {
7580	verbose(private_data: env, fmt: "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7581	val + reg->off, map->record->timer_off);
7582	return -EINVAL;
7583	}
7584	if (meta->map_ptr) {
7585	verbose(private_data: env, fmt: "verifier bug. Two map pointers in a timer helper\n");
7586	return -EFAULT;
7587	}
7588	meta->map_uid = reg->map_uid;
7589	meta->map_ptr = map;
7590	return `0`;
7591	}
7592
7593	static int process_kptr_func(struct bpf_verifier_env env, int* regno,
7594	struct bpf_call_arg_meta *meta)
7595	{
7596	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7597	struct bpf_map *map_ptr = reg->map_ptr;
7598	struct btf_field *kptr_field;
7599	u32 kptr_off;
7600
7601	if (!tnum_is_const(a: reg->var_off)) {
7602	verbose(private_data: env,
7603	fmt: "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7604	regno);
7605	return -EINVAL;
7606	}
7607	if (!map_ptr->btf) {
7608	verbose(private_data: env, fmt: "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7609	map_ptr->name);
7610	return -EINVAL;
7611	}
7612	if (!btf_record_has_field(rec: map_ptr->record, type: BPF_KPTR)) {
7613	verbose(private_data: env, fmt: "map '%s' has no valid kptr\n", map_ptr->name);
7614	return -EINVAL;
7615	}
7616
7617	meta->map_ptr = map_ptr;
7618	kptr_off = reg->off + reg->var_off.value;
7619	kptr_field = btf_record_find(rec: map_ptr->record, offset: kptr_off, field_mask: BPF_KPTR);
7620	if (!kptr_field) {
7621	verbose(private_data: env, fmt: "off=%d doesn't point to kptr\n", kptr_off);
7622	return -EACCES;
7623	}
7624	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7625	verbose(private_data: env, fmt: "off=%d kptr isn't referenced kptr\n", kptr_off);
7626	return -EACCES;
7627	}
7628	meta->kptr_field = kptr_field;
7629	return `0`;
7630	}
7631
7632	/ There are two register types representing a bpf_dynptr, one is PTR_TO_STACK*
7633	* which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7634	*
7635	* In both cases we deal with the first 8 bytes, but need to mark the next 8
7636	* bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7637	* CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7638	*
7639	* Mutability of bpf_dynptr is at two levels, one is at the level of struct
7640	* bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7641	* bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7642	* mutate the view of the dynptr and also possibly destroy it. In the latter
7643	* case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7644	* memory that dynptr points to.
7645	*
7646	* The verifier will keep track both levels of mutation (bpf_dynptr's in
7647	* reg->type and the memory's in reg->dynptr.type), but there is no support for
7648	* readonly dynptr view yet, hence only the first case is tracked and checked.
7649	*
7650	* This is consistent with how C applies the const modifier to a struct object,
7651	* where the pointer itself inside bpf_dynptr becomes const but not what it
7652	* points to.
7653	*
7654	* Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7655	* type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7656	*/
7657	static int process_dynptr_func(struct bpf_verifier_env env, int* regno, int insn_idx,
7658	enum bpf_arg_type arg_type, int clone_ref_obj_id)
7659	{
7660	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7661	int err;
7662
7663	/ MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an*
7664	* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR \| DYNPTR_TYPE_*):
7665	*/
7666	if ((arg_type & (MEM_UNINIT \| MEM_RDONLY)) == (MEM_UNINIT \| MEM_RDONLY)) {
7667	verbose(private_data: env, fmt: "verifier internal error: misconfigured dynptr helper type flags\n");
7668	return -EFAULT;
7669	}
7670
7671	/ MEM_UNINIT - Points to memory that is an appropriate candidate for*
7672	* constructing a mutable bpf_dynptr object.
7673	*
7674	* Currently, this is only possible with PTR_TO_STACK
7675	* pointing to a region of at least 16 bytes which doesn't
7676	* contain an existing bpf_dynptr.
7677	*
7678	* MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7679	* mutated or destroyed. However, the memory it points to
7680	* may be mutated.
7681	*
7682	* None - Points to a initialized dynptr that can be mutated and
7683	* destroyed, including mutation of the memory it points
7684	* to.
7685	*/
7686	if (arg_type & MEM_UNINIT) {
7687	int i;
7688
7689	if (!is_dynptr_reg_valid_uninit(env, reg)) {
7690	verbose(private_data: env, fmt: "Dynptr has to be an uninitialized dynptr\n");
7691	return -EINVAL;
7692	}
7693
7694	/ we write BPF_DW bits (8 bytes) at a time /
7695	for (i = `0`; i < BPF_DYNPTR_SIZE; i += `8`) {
7696	err = check_mem_access(env, insn_idx, regno,
7697	off: i, BPF_DW, t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7698	if (err)
7699	return err;
7700	}
7701
7702	err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7703	} else / MEM_RDONLY and None case from above / {
7704	/ For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const /
7705	if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7706	verbose(private_data: env, fmt: "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7707	return -EINVAL;
7708	}
7709
7710	if (!is_dynptr_reg_valid_init(env, reg)) {
7711	verbose(private_data: env,
7712	fmt: "Expected an initialized dynptr as arg #%d\n",
7713	regno);
7714	return -EINVAL;
7715	}
7716
7717	/ Fold modifiers (in this case, MEM_RDONLY) when checking expected type /
7718	if (!is_dynptr_type_expected(env, reg, arg_type: arg_type & ~MEM_RDONLY)) {
7719	verbose(private_data: env,
7720	fmt: "Expected a dynptr of type %s as arg #%d\n",
7721	dynptr_type_str(type: arg_to_dynptr_type(arg_type)), regno);
7722	return -EINVAL;
7723	}
7724
7725	err = mark_dynptr_read(env, reg);
7726	}
7727	return err;
7728	}
7729
7730	static u32 iter_ref_obj_id(struct bpf_verifier_env env, struct* bpf_reg_state reg, int* spi)
7731	{
7732	struct bpf_func_state *state = func(env, reg);
7733
7734	return state->stack[spi].spilled_ptr.ref_obj_id;
7735	}
7736
7737	static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7738	{
7739	return meta->kfunc_flags & (KF_ITER_NEW \| KF_ITER_NEXT \| KF_ITER_DESTROY);
7740	}
7741
7742	static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7743	{
7744	return meta->kfunc_flags & KF_ITER_NEW;
7745	}
7746
7747	static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7748	{
7749	return meta->kfunc_flags & KF_ITER_NEXT;
7750	}
7751
7752	static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7753	{
7754	return meta->kfunc_flags & KF_ITER_DESTROY;
7755	}
7756
7757	static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta meta, int* arg)
7758	{
7759	/ btf_check_iter_kfuncs() guarantees that first argument of any iter*
7760	* kfunc is iter state pointer
7761	*/
7762	return arg == `0` && is_iter_kfunc(meta);
7763	}
7764
7765	static int process_iter_arg(struct bpf_verifier_env env, int* regno, int insn_idx,
7766	struct bpf_kfunc_call_arg_meta *meta)
7767	{
7768	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
7769	const struct btf_type *t;
7770	const struct btf_param *arg;
7771	int spi, err, i, nr_slots;
7772	u32 btf_id;
7773
7774	/ btf_check_iter_kfuncs() ensures we don't need to validate anything here /
7775	arg = &btf_params(t: meta->func_proto)[`0`];
7776	t = btf_type_skip_modifiers(btf: meta->btf, id: arg->type, NULL); / PTR /
7777	t = btf_type_skip_modifiers(btf: meta->btf, id: t->type, res_id: &btf_id); / STRUCT /
7778	nr_slots = t->size / BPF_REG_SIZE;
7779
7780	if (is_iter_new_kfunc(meta)) {
7781	/ bpf_iter_<type>_new() expects pointer to uninit iter state /
7782	if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7783	verbose(private_data: env, fmt: "expected uninitialized iter_%s as arg #%d\n",
7784	iter_type_str(btf: meta->btf, btf_id), regno);
7785	return -EINVAL;
7786	}
7787
7788	for (i = `0`; i < nr_slots * `8`; i += BPF_REG_SIZE) {
7789	err = check_mem_access(env, insn_idx, regno,
7790	off: i, BPF_DW, t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
7791	if (err)
7792	return err;
7793	}
7794
7795	err = mark_stack_slots_iter(env, meta, reg, insn_idx, btf: meta->btf, btf_id, nr_slots);
7796	if (err)
7797	return err;
7798	} else {
7799	/ iter_next() or iter_destroy() expect initialized iter state/
7800	err = is_iter_reg_valid_init(env, reg, btf: meta->btf, btf_id, nr_slots);
7801	switch (err) {
7802	case `0`:
7803	break;
7804	case -EINVAL:
7805	verbose(private_data: env, fmt: "expected an initialized iter_%s as arg #%d\n",
7806	iter_type_str(btf: meta->btf, btf_id), regno);
7807	return err;
7808	case -EPROTO:
7809	verbose(private_data: env, fmt: "expected an RCU CS when using %s\n", meta->func_name);
7810	return err;
7811	default:
7812	return err;
7813	}
7814
7815	spi = iter_get_spi(env, reg, nr_slots);
7816	if (spi < `0`)
7817	return spi;
7818
7819	err = mark_iter_read(env, reg, spi, nr_slots);
7820	if (err)
7821	return err;
7822
7823	/ remember meta->iter info for process_iter_next_call() /
7824	meta->iter.spi = spi;
7825	meta->iter.frameno = reg->frameno;
7826	meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7827
7828	if (is_iter_destroy_kfunc(meta)) {
7829	err = unmark_stack_slots_iter(env, reg, nr_slots);
7830	if (err)
7831	return err;
7832	}
7833	}
7834
7835	return `0`;
7836	}
7837
7838	/ Look for a previous loop entry at insn_idx: nearest parent state*
7839	* stopped at insn_idx with callsites matching those in cur->frame.
7840	*/
7841	static struct bpf_verifier_state find_prev_entry(struct* bpf_verifier_env *env,
7842	struct bpf_verifier_state *cur,
7843	int insn_idx)
7844	{
7845	struct bpf_verifier_state_list *sl;
7846	struct bpf_verifier_state *st;
7847
7848	/ Explored states are pushed in stack order, most recent states come first /
7849	sl = *explored_state(env, idx: insn_idx);
7850	for (; sl; sl = sl->next) {
7851	/ If st->branches != 0 state is a part of current DFS verification path,*
7852	* hence cur & st for a loop.
7853	*/
7854	st = &sl->state;
7855	if (st->insn_idx == insn_idx && st->branches && same_callsites(a: st, b: cur) &&
7856	st->dfs_depth < cur->dfs_depth)
7857	return st;
7858	}
7859
7860	return NULL;
7861	}
7862
7863	static void reset_idmap_scratch(struct bpf_verifier_env *env);
7864	static bool regs_exact(const struct bpf_reg_state *rold,
7865	const struct bpf_reg_state *rcur,
7866	struct bpf_idmap *idmap);
7867
7868	static void maybe_widen_reg(struct bpf_verifier_env *env,
7869	struct bpf_reg_state rold, struct* bpf_reg_state *rcur,
7870	struct bpf_idmap *idmap)
7871	{
7872	if (rold->type != SCALAR_VALUE)
7873	return;
7874	if (rold->type != rcur->type)
7875	return;
7876	if (rold->precise \|\| rcur->precise \|\| regs_exact(rold, rcur, idmap))
7877	return;
7878	__mark_reg_unknown(env, reg: rcur);
7879	}
7880
7881	static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7882	struct bpf_verifier_state *old,
7883	struct bpf_verifier_state *cur)
7884	{
7885	struct bpf_func_state fold, fcur;
7886	int i, fr;
7887
7888	reset_idmap_scratch(env);
7889	for (fr = old->curframe; fr >= `0`; fr--) {
7890	fold = old->frame[fr];
7891	fcur = cur->frame[fr];
7892
7893	for (i = `0`; i < MAX_BPF_REG; i++)
7894	maybe_widen_reg(env,
7895	rold: &fold->regs[i],
7896	rcur: &fcur->regs[i],
7897	idmap: &env->idmap_scratch);
7898
7899	for (i = `0`; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
7900	if (!is_spilled_reg(stack: &fold->stack[i]) \|\|
7901	!is_spilled_reg(stack: &fcur->stack[i]))
7902	continue;
7903
7904	maybe_widen_reg(env,
7905	rold: &fold->stack[i].spilled_ptr,
7906	rcur: &fcur->stack[i].spilled_ptr,
7907	idmap: &env->idmap_scratch);
7908	}
7909	}
7910	return `0`;
7911	}
7912
7913	/ process_iter_next_call() is called when verifier gets to iterator's next*
7914	* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7915	* to it as just "iter_next()" in comments below.
7916	*
7917	* BPF verifier relies on a crucial contract for any iter_next()
7918	* implementation: it should eventually return NULL, and once that happens
7919	* it should keep returning NULL. That is, once iterator exhausts elements to
7920	* iterate, it should never reset or spuriously return new elements.
7921	*
7922	* With the assumption of such contract, process_iter_next_call() simulates
7923	* a fork in the verifier state to validate loop logic correctness and safety
7924	* without having to simulate infinite amount of iterations.
7925	*
7926	* In current state, we first assume that iter_next() returned NULL and
7927	* iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7928	* conditions we should not form an infinite loop and should eventually reach
7929	* exit.
7930	*
7931	* Besides that, we also fork current state and enqueue it for later
7932	* verification. In a forked state we keep iterator state as ACTIVE
7933	* (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7934	* also bump iteration depth to prevent erroneous infinite loop detection
7935	* later on (see iter_active_depths_differ() comment for details). In this
7936	* state we assume that we'll eventually loop back to another iter_next()
7937	* calls (it could be in exactly same location or in some other instruction,
7938	* it doesn't matter, we don't make any unnecessary assumptions about this,
7939	* everything revolves around iterator state in a stack slot, not which
7940	* instruction is calling iter_next()). When that happens, we either will come
7941	* to iter_next() with equivalent state and can conclude that next iteration
7942	* will proceed in exactly the same way as we just verified, so it's safe to
7943	* assume that loop converges. If not, we'll go on another iteration
7944	* simulation with a different input state, until all possible starting states
7945	* are validated or we reach maximum number of instructions limit.
7946	*
7947	* This way, we will either exhaustively discover all possible input states
7948	* that iterator loop can start with and eventually will converge, or we'll
7949	* effectively regress into bounded loop simulation logic and either reach
7950	* maximum number of instructions if loop is not provably convergent, or there
7951	* is some statically known limit on number of iterations (e.g., if there is
7952	* an explicit `if n > 100 then break;` statement somewhere in the loop).
7953	*
7954	* Iteration convergence logic in is_state_visited() relies on exact
7955	* states comparison, which ignores read and precision marks.
7956	* This is necessary because read and precision marks are not finalized
7957	* while in the loop. Exact comparison might preclude convergence for
7958	* simple programs like below:
7959	*
7960	* i = 0;
7961	* while(iter_next(&it))
7962	* i++;
7963	*
7964	* At each iteration step i++ would produce a new distinct state and
7965	* eventually instruction processing limit would be reached.
7966	*
7967	* To avoid such behavior speculatively forget (widen) range for
7968	* imprecise scalar registers, if those registers were not precise at the
7969	* end of the previous iteration and do not match exactly.
7970	*
7971	* This is a conservative heuristic that allows to verify wide range of programs,
7972	* however it precludes verification of programs that conjure an
7973	* imprecise value on the first loop iteration and use it as precise on a second.
7974	* For example, the following safe program would fail to verify:
7975	*
7976	* struct bpf_num_iter it;
7977	* int arr[10];
7978	* int i = 0, a = 0;
7979	* bpf_iter_num_new(&it, 0, 10);
7980	* while (bpf_iter_num_next(&it)) {
7981	* if (a == 0) {
7982	* a = 1;
7983	* i = 7; // Because i changed verifier would forget
7984	* // it's range on second loop entry.
7985	* } else {
7986	* arr[i] = 42; // This would fail to verify.
7987	* }
7988	* }
7989	* bpf_iter_num_destroy(&it);
7990	*/
7991	static int process_iter_next_call(struct bpf_verifier_env env, int* insn_idx,
7992	struct bpf_kfunc_call_arg_meta *meta)
7993	{
7994	struct bpf_verifier_state cur_st = env->cur_state, queued_st, *prev_st;
7995	struct bpf_func_state cur_fr = cur_st->frame[cur_st->curframe], queued_fr;
7996	struct bpf_reg_state cur_iter, queued_iter;
7997	int iter_frameno = meta->iter.frameno;
7998	int iter_spi = meta->iter.spi;
7999
8000	BTF_TYPE_EMIT(struct bpf_iter);
8001
8002	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8003
8004	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
8005	cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
8006	verbose(private_data: env, fmt: "verifier internal error: unexpected iterator state %d (%s)\n",
8007	cur_iter->iter.state, iter_state_str(state: cur_iter->iter.state));
8008	return -EFAULT;
8009	}
8010
8011	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
8012	/ Because iter_next() call is a checkpoint is_state_visitied()*
8013	* should guarantee parent state with same call sites and insn_idx.
8014	*/
8015	if (!cur_st->parent \|\| cur_st->parent->insn_idx != insn_idx \|\|
8016	!same_callsites(a: cur_st->parent, b: cur_st)) {
8017	verbose(private_data: env, fmt: "bug: bad parent state for iter next call");
8018	return -EFAULT;
8019	}
8020	/ Note cur_st->parent in the call below, it is necessary to skip*
8021	* checkpoint created for cur_st by is_state_visited()
8022	* right at this instruction.
8023	*/
8024	prev_st = find_prev_entry(env, cur: cur_st->parent, insn_idx);
8025	/ branch out active iter state /
8026	queued_st = push_stack(env, insn_idx: insn_idx + `1`, prev_insn_idx: insn_idx, speculative: false);
8027	if (!queued_st)
8028	return -ENOMEM;
8029
8030	queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8031	queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
8032	queued_iter->iter.depth++;
8033	if (prev_st)
8034	widen_imprecise_scalars(env, old: prev_st, cur: queued_st);
8035
8036	queued_fr = queued_st->frame[queued_st->curframe];
8037	mark_ptr_not_null_reg(reg: &queued_fr->regs[BPF_REG_0]);
8038	}
8039
8040	/ switch to DRAINED state, but keep the depth unchanged /
8041	/ mark current iter state as drained and assume returned NULL /
8042	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
8043	__mark_reg_const_zero(env, reg: &cur_fr->regs[BPF_REG_0]);
8044
8045	return `0`;
8046	}
8047
8048	static bool arg_type_is_mem_size(enum bpf_arg_type type)
8049	{
8050	return type == ARG_CONST_SIZE \|\|
8051	type == ARG_CONST_SIZE_OR_ZERO;
8052	}
8053
8054	static bool arg_type_is_release(enum bpf_arg_type type)
8055	{
8056	return type & OBJ_RELEASE;
8057	}
8058
8059	static bool arg_type_is_dynptr(enum bpf_arg_type type)
8060	{
8061	return base_type(type) == ARG_PTR_TO_DYNPTR;
8062	}
8063
8064	static int int_ptr_type_to_size(enum bpf_arg_type type)
8065	{
8066	if (type == ARG_PTR_TO_INT)
8067	return sizeof(u32);
8068	else if (type == ARG_PTR_TO_LONG)
8069	return sizeof(u64);
8070
8071	return -EINVAL;
8072	}
8073
8074	static int resolve_map_arg_type(struct bpf_verifier_env *env,
8075	const struct bpf_call_arg_meta *meta,
8076	enum bpf_arg_type *arg_type)
8077	{
8078	if (!meta->map_ptr) {
8079	/ kernel subsystem misconfigured verifier /
8080	verbose(private_data: env, fmt: "invalid map_ptr to access map->type\n");
8081	return -EACCES;
8082	}
8083
8084	switch (meta->map_ptr->map_type) {
8085	case BPF_MAP_TYPE_SOCKMAP:
8086	case BPF_MAP_TYPE_SOCKHASH:
8087	if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
8088	*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
8089	} else {
8090	verbose(private_data: env, fmt: "invalid arg_type for sockmap/sockhash\n");
8091	return -EINVAL;
8092	}
8093	break;
8094	case BPF_MAP_TYPE_BLOOM_FILTER:
8095	if (meta->func_id == BPF_FUNC_map_peek_elem)
8096	*arg_type = ARG_PTR_TO_MAP_VALUE;
8097	break;
8098	default:
8099	break;
8100	}
8101	return `0`;
8102	}
8103
8104	struct bpf_reg_types {
8105	const enum bpf_reg_type types[`10`];
8106	u32 *btf_id;
8107	};
8108
8109	static const struct bpf_reg_types sock_types = {
8110	.types = {
8111	PTR_TO_SOCK_COMMON,
8112	PTR_TO_SOCKET,
8113	PTR_TO_TCP_SOCK,
8114	PTR_TO_XDP_SOCK,
8115	},
8116	};
8117
8118	#ifdef CONFIG_NET
8119	static const struct bpf_reg_types btf_id_sock_common_types = {
8120	.types = {
8121	PTR_TO_SOCK_COMMON,
8122	PTR_TO_SOCKET,
8123	PTR_TO_TCP_SOCK,
8124	PTR_TO_XDP_SOCK,
8125	PTR_TO_BTF_ID,
8126	PTR_TO_BTF_ID \| PTR_TRUSTED,
8127	},
8128	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8129	};
8130	#endif
8131
8132	static const struct bpf_reg_types mem_types = {
8133	.types = {
8134	PTR_TO_STACK,
8135	PTR_TO_PACKET,
8136	PTR_TO_PACKET_META,
8137	PTR_TO_MAP_KEY,
8138	PTR_TO_MAP_VALUE,
8139	PTR_TO_MEM,
8140	PTR_TO_MEM \| MEM_RINGBUF,
8141	PTR_TO_BUF,
8142	PTR_TO_BTF_ID \| PTR_TRUSTED,
8143	},
8144	};
8145
8146	static const struct bpf_reg_types int_ptr_types = {
8147	.types = {
8148	PTR_TO_STACK,
8149	PTR_TO_PACKET,
8150	PTR_TO_PACKET_META,
8151	PTR_TO_MAP_KEY,
8152	PTR_TO_MAP_VALUE,
8153	},
8154	};
8155
8156	static const struct bpf_reg_types spin_lock_types = {
8157	.types = {
8158	PTR_TO_MAP_VALUE,
8159	PTR_TO_BTF_ID \| MEM_ALLOC,
8160	}
8161	};
8162
8163	static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
8164	static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
8165	static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
8166	static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM \| MEM_RINGBUF } };
8167	static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
8168	static const struct bpf_reg_types btf_ptr_types = {
8169	.types = {
8170	PTR_TO_BTF_ID,
8171	PTR_TO_BTF_ID \| PTR_TRUSTED,
8172	PTR_TO_BTF_ID \| MEM_RCU,
8173	},
8174	};
8175	static const struct bpf_reg_types percpu_btf_ptr_types = {
8176	.types = {
8177	PTR_TO_BTF_ID \| MEM_PERCPU,
8178	PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_RCU,
8179	PTR_TO_BTF_ID \| MEM_PERCPU \| PTR_TRUSTED,
8180	}
8181	};
8182	static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8183	static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8184	static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8185	static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8186	static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
8187	static const struct bpf_reg_types dynptr_types = {
8188	.types = {
8189	PTR_TO_STACK,
8190	CONST_PTR_TO_DYNPTR,
8191	}
8192	};
8193
8194	static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8195	[ARG_PTR_TO_MAP_KEY] = &mem_types,
8196	[ARG_PTR_TO_MAP_VALUE] = &mem_types,
8197	[ARG_CONST_SIZE] = &scalar_types,
8198	[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
8199	[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
8200	[ARG_CONST_MAP_PTR] = &const_map_ptr_types,
8201	[ARG_PTR_TO_CTX] = &context_types,
8202	[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
8203	#ifdef CONFIG_NET
8204	[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
8205	#endif
8206	[ARG_PTR_TO_SOCKET] = &fullsock_types,
8207	[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
8208	[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
8209	[ARG_PTR_TO_MEM] = &mem_types,
8210	[ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
8211	[ARG_PTR_TO_INT] = &int_ptr_types,
8212	[ARG_PTR_TO_LONG] = &int_ptr_types,
8213	[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
8214	[ARG_PTR_TO_FUNC] = &func_ptr_types,
8215	[ARG_PTR_TO_STACK] = &stack_ptr_types,
8216	[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
8217	[ARG_PTR_TO_TIMER] = &timer_types,
8218	[ARG_PTR_TO_KPTR] = &kptr_types,
8219	[ARG_PTR_TO_DYNPTR] = &dynptr_types,
8220	};
8221
8222	static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8223	enum bpf_arg_type arg_type,
8224	const u32 *arg_btf_id,
8225	struct bpf_call_arg_meta *meta)
8226	{
8227	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
8228	enum bpf_reg_type expected, type = reg->type;
8229	const struct bpf_reg_types *compatible;
8230	int i, j;
8231
8232	compatible = compatible_reg_types[base_type(type: arg_type)];
8233	if (!compatible) {
8234	verbose(private_data: env, fmt: "verifier internal error: unsupported arg type %d\n", arg_type);
8235	return -EFAULT;
8236	}
8237
8238	/ ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,*
8239	* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8240	*
8241	* Same for MAYBE_NULL:
8242	*
8243	* ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8244	* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8245	*
8246	* ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8247	*
8248	* Therefore we fold these flags depending on the arg_type before comparison.
8249	*/
8250	if (arg_type & MEM_RDONLY)
8251	type &= ~MEM_RDONLY;
8252	if (arg_type & PTR_MAYBE_NULL)
8253	type &= ~PTR_MAYBE_NULL;
8254	if (base_type(type: arg_type) == ARG_PTR_TO_MEM)
8255	type &= ~DYNPTR_TYPE_FLAG_MASK;
8256
8257	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
8258	type &= ~MEM_ALLOC;
8259	type &= ~MEM_PERCPU;
8260	}
8261
8262	for (i = `0`; i < ARRAY_SIZE(compatible->types); i++) {
8263	expected = compatible->types[i];
8264	if (expected == NOT_INIT)
8265	break;
8266
8267	if (type == expected)
8268	goto found;
8269	}
8270
8271	verbose(private_data: env, fmt: "R%d type=%s expected=", regno, reg_type_str(env, type: reg->type));
8272	for (j = `0`; j + `1` < i; j++)
8273	verbose(private_data: env, fmt: "%s, ", reg_type_str(env, type: compatible->types[j]));
8274	verbose(private_data: env, fmt: "%s\n", reg_type_str(env, type: compatible->types[j]));
8275	return -EACCES;
8276
8277	found:
8278	if (base_type(type: reg->type) != PTR_TO_BTF_ID)
8279	return `0`;
8280
8281	if (compatible == &mem_types) {
8282	if (!(arg_type & MEM_RDONLY)) {
8283	verbose(private_data: env,
8284	fmt: "%s() may write into memory pointed by R%d type=%s\n",
8285	func_id_name(id: meta->func_id),
8286	regno, reg_type_str(env, type: reg->type));
8287	return -EACCES;
8288	}
8289	return `0`;
8290	}
8291
8292	switch ((int)reg->type) {
8293	case PTR_TO_BTF_ID:
8294	case PTR_TO_BTF_ID \| PTR_TRUSTED:
8295	case PTR_TO_BTF_ID \| PTR_TRUSTED \| PTR_MAYBE_NULL:
8296	case PTR_TO_BTF_ID \| MEM_RCU:
8297	case PTR_TO_BTF_ID \| PTR_MAYBE_NULL:
8298	case PTR_TO_BTF_ID \| PTR_MAYBE_NULL \| MEM_RCU:
8299	{
8300	/ For bpf_sk_release, it needs to match against first member*
8301	* 'struct sock_common', hence make an exception for it. This
8302	* allows bpf_sk_release to work for multiple socket types.
8303	*/
8304	bool strict_type_match = arg_type_is_release(type: arg_type) &&
8305	meta->func_id != BPF_FUNC_sk_release;
8306
8307	if (type_may_be_null(type: reg->type) &&
8308	(!type_may_be_null(type: arg_type) \|\| arg_type_is_release(type: arg_type))) {
8309	verbose(private_data: env, fmt: "Possibly NULL pointer passed to helper arg%d\n", regno);
8310	return -EACCES;
8311	}
8312
8313	if (!arg_btf_id) {
8314	if (!compatible->btf_id) {
8315	verbose(private_data: env, fmt: "verifier internal error: missing arg compatible BTF ID\n");
8316	return -EFAULT;
8317	}
8318	arg_btf_id = compatible->btf_id;
8319	}
8320
8321	if (meta->func_id == BPF_FUNC_kptr_xchg) {
8322	if (map_kptr_match_type(env, kptr_field: meta->kptr_field, reg, regno))
8323	return -EACCES;
8324	} else {
8325	if (arg_btf_id == BPF_PTR_POISON) {
8326	verbose(private_data: env, fmt: "verifier internal error:");
8327	verbose(private_data: env, fmt: "R%d has non-overwritten BPF_PTR_POISON type\n",
8328	regno);
8329	return -EACCES;
8330	}
8331
8332	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: reg->off,
8333	need_btf: btf_vmlinux, need_type_id: *arg_btf_id,
8334	strict: strict_type_match)) {
8335	verbose(private_data: env, fmt: "R%d is of type %s but %s is expected\n",
8336	regno, btf_type_name(btf: reg->btf, id: reg->btf_id),
8337	btf_type_name(btf: btf_vmlinux, id: *arg_btf_id));
8338	return -EACCES;
8339	}
8340	}
8341	break;
8342	}
8343	case PTR_TO_BTF_ID \| MEM_ALLOC:
8344	case PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_ALLOC:
8345	if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8346	meta->func_id != BPF_FUNC_kptr_xchg) {
8347	verbose(private_data: env, fmt: "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8348	return -EFAULT;
8349	}
8350	if (meta->func_id == BPF_FUNC_kptr_xchg) {
8351	if (map_kptr_match_type(env, kptr_field: meta->kptr_field, reg, regno))
8352	return -EACCES;
8353	}
8354	break;
8355	case PTR_TO_BTF_ID \| MEM_PERCPU:
8356	case PTR_TO_BTF_ID \| MEM_PERCPU \| MEM_RCU:
8357	case PTR_TO_BTF_ID \| MEM_PERCPU \| PTR_TRUSTED:
8358	/ Handled by helper specific checks /
8359	break;
8360	default:
8361	verbose(private_data: env, fmt: "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8362	return -EFAULT;
8363	}
8364	return `0`;
8365	}
8366
8367	static struct btf_field *
8368	reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8369	{
8370	struct btf_field *field;
8371	struct btf_record *rec;
8372
8373	rec = reg_btf_record(reg);
8374	if (!rec)
8375	return NULL;
8376
8377	field = btf_record_find(rec, offset: off, field_mask: fields);
8378	if (!field)
8379	return NULL;
8380
8381	return field;
8382	}
8383
8384	static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8385	const struct bpf_reg_state reg, int* regno,
8386	enum bpf_arg_type arg_type)
8387	{
8388	u32 type = reg->type;
8389
8390	/ When referenced register is passed to release function, its fixed*
8391	* offset must be 0.
8392	*
8393	* We will check arg_type_is_release reg has ref_obj_id when storing
8394	* meta->release_regno.
8395	*/
8396	if (arg_type_is_release(type: arg_type)) {
8397	/ ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it*
8398	* may not directly point to the object being released, but to
8399	* dynptr pointing to such object, which might be at some offset
8400	* on the stack. In that case, we simply to fallback to the
8401	* default handling.
8402	*/
8403	if (arg_type_is_dynptr(type: arg_type) && type == PTR_TO_STACK)
8404	return `0`;
8405
8406	/ Doing check_ptr_off_reg check for the offset will catch this*
8407	* because fixed_off_ok is false, but checking here allows us
8408	* to give the user a better error message.
8409	*/
8410	if (reg->off) {
8411	verbose(private_data: env, fmt: "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8412	regno);
8413	return -EINVAL;
8414	}
8415	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
8416	}
8417
8418	switch (type) {
8419	/ Pointer types where both fixed and variable offset is explicitly allowed: /
8420	case PTR_TO_STACK:
8421	case PTR_TO_PACKET:
8422	case PTR_TO_PACKET_META:
8423	case PTR_TO_MAP_KEY:
8424	case PTR_TO_MAP_VALUE:
8425	case PTR_TO_MEM:
8426	case PTR_TO_MEM \| MEM_RDONLY:
8427	case PTR_TO_MEM \| MEM_RINGBUF:
8428	case PTR_TO_BUF:
8429	case PTR_TO_BUF \| MEM_RDONLY:
8430	case PTR_TO_ARENA:
8431	case SCALAR_VALUE:
8432	return `0`;
8433	/ All the rest must be rejected, except PTR_TO_BTF_ID which allows*
8434	* fixed offset.
8435	*/
8436	case PTR_TO_BTF_ID:
8437	case PTR_TO_BTF_ID \| MEM_ALLOC:
8438	case PTR_TO_BTF_ID \| PTR_TRUSTED:
8439	case PTR_TO_BTF_ID \| MEM_RCU:
8440	case PTR_TO_BTF_ID \| MEM_ALLOC \| NON_OWN_REF:
8441	case PTR_TO_BTF_ID \| MEM_ALLOC \| NON_OWN_REF \| MEM_RCU:
8442	/ When referenced PTR_TO_BTF_ID is passed to release function,*
8443	* its fixed offset must be 0. In the other cases, fixed offset
8444	* can be non-zero. This was already checked above. So pass
8445	* fixed_off_ok as true to allow fixed offset for all other
8446	* cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8447	* still need to do checks instead of returning.
8448	*/
8449	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: true);
8450	default:
8451	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok: false);
8452	}
8453	}
8454
8455	static struct bpf_reg_state get_dynptr_arg_reg(struct* bpf_verifier_env *env,
8456	const struct bpf_func_proto *fn,
8457	struct bpf_reg_state *regs)
8458	{
8459	struct bpf_reg_state *state = NULL;
8460	int i;
8461
8462	for (i = `0`; i < MAX_BPF_FUNC_REG_ARGS; i++)
8463	if (arg_type_is_dynptr(type: fn->arg_type[i])) {
8464	if (state) {
8465	verbose(private_data: env, fmt: "verifier internal error: multiple dynptr args\n");
8466	return NULL;
8467	}
8468	state = &regs[BPF_REG_1 + i];
8469	}
8470
8471	if (!state)
8472	verbose(private_data: env, fmt: "verifier internal error: no dynptr arg found\n");
8473
8474	return state;
8475	}
8476
8477	static int dynptr_id(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
8478	{
8479	struct bpf_func_state *state = func(env, reg);
8480	int spi;
8481
8482	if (reg->type == CONST_PTR_TO_DYNPTR)
8483	return reg->id;
8484	spi = dynptr_get_spi(env, reg);
8485	if (spi < `0`)
8486	return spi;
8487	return state->stack[spi].spilled_ptr.id;
8488	}
8489
8490	static int dynptr_ref_obj_id(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
8491	{
8492	struct bpf_func_state *state = func(env, reg);
8493	int spi;
8494
8495	if (reg->type == CONST_PTR_TO_DYNPTR)
8496	return reg->ref_obj_id;
8497	spi = dynptr_get_spi(env, reg);
8498	if (spi < `0`)
8499	return spi;
8500	return state->stack[spi].spilled_ptr.ref_obj_id;
8501	}
8502
8503	static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8504	struct bpf_reg_state *reg)
8505	{
8506	struct bpf_func_state *state = func(env, reg);
8507	int spi;
8508
8509	if (reg->type == CONST_PTR_TO_DYNPTR)
8510	return reg->dynptr.type;
8511
8512	spi = __get_spi(off: reg->off);
8513	if (spi < `0`) {
8514	verbose(private_data: env, fmt: "verifier internal error: invalid spi when querying dynptr type\n");
8515	return BPF_DYNPTR_TYPE_INVALID;
8516	}
8517
8518	return state->stack[spi].spilled_ptr.dynptr.type;
8519	}
8520
8521	static int check_reg_const_str(struct bpf_verifier_env *env,
8522	struct bpf_reg_state *reg, u32 regno)
8523	{
8524	struct bpf_map *map = reg->map_ptr;
8525	int err;
8526	int map_off;
8527	u64 map_addr;
8528	char *str_ptr;
8529
8530	if (reg->type != PTR_TO_MAP_VALUE)
8531	return -EINVAL;
8532
8533	if (!bpf_map_is_rdonly(map)) {
8534	verbose(private_data: env, fmt: "R%d does not point to a readonly map'\n", regno);
8535	return -EACCES;
8536	}
8537
8538	if (!tnum_is_const(a: reg->var_off)) {
8539	verbose(private_data: env, fmt: "R%d is not a constant address'\n", regno);
8540	return -EACCES;
8541	}
8542
8543	if (!map->ops->map_direct_value_addr) {
8544	verbose(private_data: env, fmt: "no direct value access support for this map type\n");
8545	return -EACCES;
8546	}
8547
8548	err = check_map_access(env, regno, off: reg->off,
8549	size: map->value_size - reg->off, zero_size_allowed: false,
8550	src: ACCESS_HELPER);
8551	if (err)
8552	return err;
8553
8554	map_off = reg->off + reg->var_off.value;
8555	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8556	if (err) {
8557	verbose(private_data: env, fmt: "direct value access on string failed\n");
8558	return err;
8559	}
8560
8561	str_ptr = (char )(long*)(map_addr);
8562	if (!strnchr(str_ptr + map_off, map->value_size - map_off, `0`)) {
8563	verbose(private_data: env, fmt: "string is not zero-terminated\n");
8564	return -EINVAL;
8565	}
8566	return `0`;
8567	}
8568
8569	static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8570	struct bpf_call_arg_meta *meta,
8571	const struct bpf_func_proto *fn,
8572	int insn_idx)
8573	{
8574	u32 regno = BPF_REG_1 + arg;
8575	struct bpf_reg_state regs = cur_regs(env), reg = &regs[regno];
8576	enum bpf_arg_type arg_type = fn->arg_type[arg];
8577	enum bpf_reg_type type = reg->type;
8578	u32 *arg_btf_id = NULL;
8579	int err = `0`;
8580
8581	if (arg_type == ARG_DONTCARE)
8582	return `0`;
8583
8584	err = check_reg_arg(env, regno, t: SRC_OP);
8585	if (err)
8586	return err;
8587
8588	if (arg_type == ARG_ANYTHING) {
8589	if (is_pointer_value(env, regno)) {
8590	verbose(private_data: env, fmt: "R%d leaks addr into helper function\n",
8591	regno);
8592	return -EACCES;
8593	}
8594	return `0`;
8595	}
8596
8597	if (type_is_pkt_pointer(type) &&
8598	!may_access_direct_pkt_data(env, meta, t: BPF_READ)) {
8599	verbose(private_data: env, fmt: "helper access to the packet is not allowed\n");
8600	return -EACCES;
8601	}
8602
8603	if (base_type(type: arg_type) == ARG_PTR_TO_MAP_VALUE) {
8604	err = resolve_map_arg_type(env, meta, arg_type: &arg_type);
8605	if (err)
8606	return err;
8607	}
8608
8609	if (register_is_null(reg) && type_may_be_null(type: arg_type))
8610	/ A NULL register has a SCALAR_VALUE type, so skip*
8611	* type checking.
8612	*/
8613	goto skip_type_check;
8614
8615	/ arg_btf_id and arg_size are in a union. /
8616	if (base_type(type: arg_type) == ARG_PTR_TO_BTF_ID \|\|
8617	base_type(type: arg_type) == ARG_PTR_TO_SPIN_LOCK)
8618	arg_btf_id = fn->arg_btf_id[arg];
8619
8620	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8621	if (err)
8622	return err;
8623
8624	err = check_func_arg_reg_off(env, reg, regno, arg_type);
8625	if (err)
8626	return err;
8627
8628	skip_type_check:
8629	if (arg_type_is_release(type: arg_type)) {
8630	if (arg_type_is_dynptr(type: arg_type)) {
8631	struct bpf_func_state *state = func(env, reg);
8632	int spi;
8633
8634	/ Only dynptr created on stack can be released, thus*
8635	* the get_spi and stack state checks for spilled_ptr
8636	* should only be done before process_dynptr_func for
8637	* PTR_TO_STACK.
8638	*/
8639	if (reg->type == PTR_TO_STACK) {
8640	spi = dynptr_get_spi(env, reg);
8641	if (spi < `0` \|\| !state->stack[spi].spilled_ptr.ref_obj_id) {
8642	verbose(private_data: env, fmt: "arg %d is an unacquired reference\n", regno);
8643	return -EINVAL;
8644	}
8645	} else {
8646	verbose(private_data: env, fmt: "cannot release unowned const bpf_dynptr\n");
8647	return -EINVAL;
8648	}
8649	} else if (!reg->ref_obj_id && !register_is_null(reg)) {
8650	verbose(private_data: env, fmt: "R%d must be referenced when passed to release function\n",
8651	regno);
8652	return -EINVAL;
8653	}
8654	if (meta->release_regno) {
8655	verbose(private_data: env, fmt: "verifier internal error: more than one release argument\n");
8656	return -EFAULT;
8657	}
8658	meta->release_regno = regno;
8659	}
8660
8661	if (reg->ref_obj_id) {
8662	if (meta->ref_obj_id) {
8663	verbose(private_data: env, fmt: "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8664	regno, reg->ref_obj_id,
8665	meta->ref_obj_id);
8666	return -EFAULT;
8667	}
8668	meta->ref_obj_id = reg->ref_obj_id;
8669	}
8670
8671	switch (base_type(type: arg_type)) {
8672	case ARG_CONST_MAP_PTR:
8673	/ bpf_map_xxx(map_ptr) call: remember that map_ptr /
8674	if (meta->map_ptr) {
8675	/ Use map_uid (which is unique id of inner map) to reject:*
8676	* inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8677	* inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8678	* if (inner_map1 && inner_map2) {
8679	* timer = bpf_map_lookup_elem(inner_map1);
8680	* if (timer)
8681	* // mismatch would have been allowed
8682	* bpf_timer_init(timer, inner_map2);
8683	* }
8684	*
8685	* Comparing map_ptr is enough to distinguish normal and outer maps.
8686	*/
8687	if (meta->map_ptr != reg->map_ptr \|\|
8688	meta->map_uid != reg->map_uid) {
8689	verbose(private_data: env,
8690	fmt: "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8691	meta->map_uid, reg->map_uid);
8692	return -EINVAL;
8693	}
8694	}
8695	meta->map_ptr = reg->map_ptr;
8696	meta->map_uid = reg->map_uid;
8697	break;
8698	case ARG_PTR_TO_MAP_KEY:
8699	/ bpf_map_xxx(..., map_ptr, ..., key) call:*
8700	* check that [key, key + map->key_size) are within
8701	* stack limits and initialized
8702	*/
8703	if (!meta->map_ptr) {
8704	/ in function declaration map_ptr must come before*
8705	* map_key, so that it's verified and known before
8706	* we have to check map_key here. Otherwise it means
8707	* that kernel subsystem misconfigured verifier
8708	*/
8709	verbose(private_data: env, fmt: "invalid map_ptr to access map->key\n");
8710	return -EACCES;
8711	}
8712	err = check_helper_mem_access(env, regno,
8713	access_size: meta->map_ptr->key_size, zero_size_allowed: false,
8714	NULL);
8715	break;
8716	case ARG_PTR_TO_MAP_VALUE:
8717	if (type_may_be_null(type: arg_type) && register_is_null(reg))
8718	return `0`;
8719
8720	/ bpf_map_xxx(..., map_ptr, ..., value) call:*
8721	* check [value, value + map->value_size) validity
8722	*/
8723	if (!meta->map_ptr) {
8724	/ kernel subsystem misconfigured verifier /
8725	verbose(private_data: env, fmt: "invalid map_ptr to access map->value\n");
8726	return -EACCES;
8727	}
8728	meta->raw_mode = arg_type & MEM_UNINIT;
8729	err = check_helper_mem_access(env, regno,
8730	access_size: meta->map_ptr->value_size, zero_size_allowed: false,
8731	meta);
8732	break;
8733	case ARG_PTR_TO_PERCPU_BTF_ID:
8734	if (!reg->btf_id) {
8735	verbose(private_data: env, fmt: "Helper has invalid btf_id in R%d\n", regno);
8736	return -EACCES;
8737	}
8738	meta->ret_btf = reg->btf;
8739	meta->ret_btf_id = reg->btf_id;
8740	break;
8741	case ARG_PTR_TO_SPIN_LOCK:
8742	if (in_rbtree_lock_required_cb(env)) {
8743	verbose(private_data: env, fmt: "can't spin_{lock,unlock} in rbtree cb\n");
8744	return -EACCES;
8745	}
8746	if (meta->func_id == BPF_FUNC_spin_lock) {
8747	err = process_spin_lock(env, regno, is_lock: true);
8748	if (err)
8749	return err;
8750	} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8751	err = process_spin_lock(env, regno, is_lock: false);
8752	if (err)
8753	return err;
8754	} else {
8755	verbose(private_data: env, fmt: "verifier internal error\n");
8756	return -EFAULT;
8757	}
8758	break;
8759	case ARG_PTR_TO_TIMER:
8760	err = process_timer_func(env, regno, meta);
8761	if (err)
8762	return err;
8763	break;
8764	case ARG_PTR_TO_FUNC:
8765	meta->subprogno = reg->subprogno;
8766	break;
8767	case ARG_PTR_TO_MEM:
8768	/ The access to this pointer is only checked when we hit the*
8769	* next is_mem_size argument below.
8770	*/
8771	meta->raw_mode = arg_type & MEM_UNINIT;
8772	if (arg_type & MEM_FIXED_SIZE) {
8773	err = check_helper_mem_access(env, regno,
8774	access_size: fn->arg_size[arg], zero_size_allowed: false,
8775	meta);
8776	}
8777	break;
8778	case ARG_CONST_SIZE:
8779	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: false, meta);
8780	break;
8781	case ARG_CONST_SIZE_OR_ZERO:
8782	err = check_mem_size_reg(env, reg, regno, zero_size_allowed: true, meta);
8783	break;
8784	case ARG_PTR_TO_DYNPTR:
8785	err = process_dynptr_func(env, regno, insn_idx, arg_type, clone_ref_obj_id: `0`);
8786	if (err)
8787	return err;
8788	break;
8789	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8790	if (!tnum_is_const(a: reg->var_off)) {
8791	verbose(private_data: env, fmt: "R%d is not a known constant'\n",
8792	regno);
8793	return -EACCES;
8794	}
8795	meta->mem_size = reg->var_off.value;
8796	err = mark_chain_precision(env, regno);
8797	if (err)
8798	return err;
8799	break;
8800	case ARG_PTR_TO_INT:
8801	case ARG_PTR_TO_LONG:
8802	{
8803	int size = int_ptr_type_to_size(type: arg_type);
8804
8805	err = check_helper_mem_access(env, regno, access_size: size, zero_size_allowed: false, meta);
8806	if (err)
8807	return err;
8808	err = check_ptr_alignment(env, reg, off: `0`, size, strict_alignment_once: true);
8809	break;
8810	}
8811	case ARG_PTR_TO_CONST_STR:
8812	{
8813	err = check_reg_const_str(env, reg, regno);
8814	if (err)
8815	return err;
8816	break;
8817	}
8818	case ARG_PTR_TO_KPTR:
8819	err = process_kptr_func(env, regno, meta);
8820	if (err)
8821	return err;
8822	break;
8823	}
8824
8825	return err;
8826	}
8827
8828	static bool may_update_sockmap(struct bpf_verifier_env env, int* func_id)
8829	{
8830	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8831	enum bpf_prog_type type = resolve_prog_type(prog: env->prog);
8832
8833	if (func_id != BPF_FUNC_map_update_elem)
8834	return false;
8835
8836	/ It's not possible to get access to a locked struct sock in these*
8837	* contexts, so updating is safe.
8838	*/
8839	switch (type) {
8840	case BPF_PROG_TYPE_TRACING:
8841	if (eatype == BPF_TRACE_ITER)
8842	return true;
8843	break;
8844	case BPF_PROG_TYPE_SOCKET_FILTER:
8845	case BPF_PROG_TYPE_SCHED_CLS:
8846	case BPF_PROG_TYPE_SCHED_ACT:
8847	case BPF_PROG_TYPE_XDP:
8848	case BPF_PROG_TYPE_SK_REUSEPORT:
8849	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8850	case BPF_PROG_TYPE_SK_LOOKUP:
8851	return true;
8852	default:
8853	break;
8854	}
8855
8856	verbose(private_data: env, fmt: "cannot update sockmap in this context\n");
8857	return false;
8858	}
8859
8860	static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8861	{
8862	return env->prog->jit_requested &&
8863	bpf_jit_supports_subprog_tailcalls();
8864	}
8865
8866	static int check_map_func_compatibility(struct bpf_verifier_env *env,
8867	struct bpf_map map, int* func_id)
8868	{
8869	if (!map)
8870	return `0`;
8871
8872	/ We need a two way check, first is from map perspective ... /
8873	switch (map->map_type) {
8874	case BPF_MAP_TYPE_PROG_ARRAY:
8875	if (func_id != BPF_FUNC_tail_call)
8876	goto error;
8877	break;
8878	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8879	if (func_id != BPF_FUNC_perf_event_read &&
8880	func_id != BPF_FUNC_perf_event_output &&
8881	func_id != BPF_FUNC_skb_output &&
8882	func_id != BPF_FUNC_perf_event_read_value &&
8883	func_id != BPF_FUNC_xdp_output)
8884	goto error;
8885	break;
8886	case BPF_MAP_TYPE_RINGBUF:
8887	if (func_id != BPF_FUNC_ringbuf_output &&
8888	func_id != BPF_FUNC_ringbuf_reserve &&
8889	func_id != BPF_FUNC_ringbuf_query &&
8890	func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8891	func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8892	func_id != BPF_FUNC_ringbuf_discard_dynptr)
8893	goto error;
8894	break;
8895	case BPF_MAP_TYPE_USER_RINGBUF:
8896	if (func_id != BPF_FUNC_user_ringbuf_drain)
8897	goto error;
8898	break;
8899	case BPF_MAP_TYPE_STACK_TRACE:
8900	if (func_id != BPF_FUNC_get_stackid)
8901	goto error;
8902	break;
8903	case BPF_MAP_TYPE_CGROUP_ARRAY:
8904	if (func_id != BPF_FUNC_skb_under_cgroup &&
8905	func_id != BPF_FUNC_current_task_under_cgroup)
8906	goto error;
8907	break;
8908	case BPF_MAP_TYPE_CGROUP_STORAGE:
8909	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8910	if (func_id != BPF_FUNC_get_local_storage)
8911	goto error;
8912	break;
8913	case BPF_MAP_TYPE_DEVMAP:
8914	case BPF_MAP_TYPE_DEVMAP_HASH:
8915	if (func_id != BPF_FUNC_redirect_map &&
8916	func_id != BPF_FUNC_map_lookup_elem)
8917	goto error;
8918	break;
8919	/ Restrict bpf side of cpumap and xskmap, open when use-cases*
8920	* appear.
8921	*/
8922	case BPF_MAP_TYPE_CPUMAP:
8923	if (func_id != BPF_FUNC_redirect_map)
8924	goto error;
8925	break;
8926	case BPF_MAP_TYPE_XSKMAP:
8927	if (func_id != BPF_FUNC_redirect_map &&
8928	func_id != BPF_FUNC_map_lookup_elem)
8929	goto error;
8930	break;
8931	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8932	case BPF_MAP_TYPE_HASH_OF_MAPS:
8933	if (func_id != BPF_FUNC_map_lookup_elem)
8934	goto error;
8935	break;
8936	case BPF_MAP_TYPE_SOCKMAP:
8937	if (func_id != BPF_FUNC_sk_redirect_map &&
8938	func_id != BPF_FUNC_sock_map_update &&
8939	func_id != BPF_FUNC_map_delete_elem &&
8940	func_id != BPF_FUNC_msg_redirect_map &&
8941	func_id != BPF_FUNC_sk_select_reuseport &&
8942	func_id != BPF_FUNC_map_lookup_elem &&
8943	!may_update_sockmap(env, func_id))
8944	goto error;
8945	break;
8946	case BPF_MAP_TYPE_SOCKHASH:
8947	if (func_id != BPF_FUNC_sk_redirect_hash &&
8948	func_id != BPF_FUNC_sock_hash_update &&
8949	func_id != BPF_FUNC_map_delete_elem &&
8950	func_id != BPF_FUNC_msg_redirect_hash &&
8951	func_id != BPF_FUNC_sk_select_reuseport &&
8952	func_id != BPF_FUNC_map_lookup_elem &&
8953	!may_update_sockmap(env, func_id))
8954	goto error;
8955	break;
8956	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8957	if (func_id != BPF_FUNC_sk_select_reuseport)
8958	goto error;
8959	break;
8960	case BPF_MAP_TYPE_QUEUE:
8961	case BPF_MAP_TYPE_STACK:
8962	if (func_id != BPF_FUNC_map_peek_elem &&
8963	func_id != BPF_FUNC_map_pop_elem &&
8964	func_id != BPF_FUNC_map_push_elem)
8965	goto error;
8966	break;
8967	case BPF_MAP_TYPE_SK_STORAGE:
8968	if (func_id != BPF_FUNC_sk_storage_get &&
8969	func_id != BPF_FUNC_sk_storage_delete &&
8970	func_id != BPF_FUNC_kptr_xchg)
8971	goto error;
8972	break;
8973	case BPF_MAP_TYPE_INODE_STORAGE:
8974	if (func_id != BPF_FUNC_inode_storage_get &&
8975	func_id != BPF_FUNC_inode_storage_delete &&
8976	func_id != BPF_FUNC_kptr_xchg)
8977	goto error;
8978	break;
8979	case BPF_MAP_TYPE_TASK_STORAGE:
8980	if (func_id != BPF_FUNC_task_storage_get &&
8981	func_id != BPF_FUNC_task_storage_delete &&
8982	func_id != BPF_FUNC_kptr_xchg)
8983	goto error;
8984	break;
8985	case BPF_MAP_TYPE_CGRP_STORAGE:
8986	if (func_id != BPF_FUNC_cgrp_storage_get &&
8987	func_id != BPF_FUNC_cgrp_storage_delete &&
8988	func_id != BPF_FUNC_kptr_xchg)
8989	goto error;
8990	break;
8991	case BPF_MAP_TYPE_BLOOM_FILTER:
8992	if (func_id != BPF_FUNC_map_peek_elem &&
8993	func_id != BPF_FUNC_map_push_elem)
8994	goto error;
8995	break;
8996	default:
8997	break;
8998	}
8999
9000	/ ... and second from the function itself. /
9001	switch (func_id) {
9002	case BPF_FUNC_tail_call:
9003	if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
9004	goto error;
9005	if (env->subprog_cnt > `1` && !allow_tail_call_in_subprogs(env)) {
9006	verbose(private_data: env, fmt: "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
9007	return -EINVAL;
9008	}
9009	break;
9010	case BPF_FUNC_perf_event_read:
9011	case BPF_FUNC_perf_event_output:
9012	case BPF_FUNC_perf_event_read_value:
9013	case BPF_FUNC_skb_output:
9014	case BPF_FUNC_xdp_output:
9015	if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
9016	goto error;
9017	break;
9018	case BPF_FUNC_ringbuf_output:
9019	case BPF_FUNC_ringbuf_reserve:
9020	case BPF_FUNC_ringbuf_query:
9021	case BPF_FUNC_ringbuf_reserve_dynptr:
9022	case BPF_FUNC_ringbuf_submit_dynptr:
9023	case BPF_FUNC_ringbuf_discard_dynptr:
9024	if (map->map_type != BPF_MAP_TYPE_RINGBUF)
9025	goto error;
9026	break;
9027	case BPF_FUNC_user_ringbuf_drain:
9028	if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
9029	goto error;
9030	break;
9031	case BPF_FUNC_get_stackid:
9032	if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
9033	goto error;
9034	break;
9035	case BPF_FUNC_current_task_under_cgroup:
9036	case BPF_FUNC_skb_under_cgroup:
9037	if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
9038	goto error;
9039	break;
9040	case BPF_FUNC_redirect_map:
9041	if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
9042	map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
9043	map->map_type != BPF_MAP_TYPE_CPUMAP &&
9044	map->map_type != BPF_MAP_TYPE_XSKMAP)
9045	goto error;
9046	break;
9047	case BPF_FUNC_sk_redirect_map:
9048	case BPF_FUNC_msg_redirect_map:
9049	case BPF_FUNC_sock_map_update:
9050	if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
9051	goto error;
9052	break;
9053	case BPF_FUNC_sk_redirect_hash:
9054	case BPF_FUNC_msg_redirect_hash:
9055	case BPF_FUNC_sock_hash_update:
9056	if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
9057	goto error;
9058	break;
9059	case BPF_FUNC_get_local_storage:
9060	if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
9061	map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
9062	goto error;
9063	break;
9064	case BPF_FUNC_sk_select_reuseport:
9065	if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
9066	map->map_type != BPF_MAP_TYPE_SOCKMAP &&
9067	map->map_type != BPF_MAP_TYPE_SOCKHASH)
9068	goto error;
9069	break;
9070	case BPF_FUNC_map_pop_elem:
9071	if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9072	map->map_type != BPF_MAP_TYPE_STACK)
9073	goto error;
9074	break;
9075	case BPF_FUNC_map_peek_elem:
9076	case BPF_FUNC_map_push_elem:
9077	if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9078	map->map_type != BPF_MAP_TYPE_STACK &&
9079	map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
9080	goto error;
9081	break;
9082	case BPF_FUNC_map_lookup_percpu_elem:
9083	if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9084	map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9085	map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9086	goto error;
9087	break;
9088	case BPF_FUNC_sk_storage_get:
9089	case BPF_FUNC_sk_storage_delete:
9090	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9091	goto error;
9092	break;
9093	case BPF_FUNC_inode_storage_get:
9094	case BPF_FUNC_inode_storage_delete:
9095	if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9096	goto error;
9097	break;
9098	case BPF_FUNC_task_storage_get:
9099	case BPF_FUNC_task_storage_delete:
9100	if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9101	goto error;
9102	break;
9103	case BPF_FUNC_cgrp_storage_get:
9104	case BPF_FUNC_cgrp_storage_delete:
9105	if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9106	goto error;
9107	break;
9108	default:
9109	break;
9110	}
9111
9112	return `0`;
9113	error:
9114	verbose(private_data: env, fmt: "cannot pass map_type %d into func %s#%d\n",
9115	map->map_type, func_id_name(id: func_id), func_id);
9116	return -EINVAL;
9117	}
9118
9119	static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9120	{
9121	int count = `0`;
9122
9123	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
9124	count++;
9125	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
9126	count++;
9127	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
9128	count++;
9129	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
9130	count++;
9131	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
9132	count++;
9133
9134	/ We only support one arg being in raw mode at the moment,*
9135	* which is sufficient for the helper functions we have
9136	* right now.
9137	*/
9138	return count <= `1`;
9139	}
9140
9141	static bool check_args_pair_invalid(const struct bpf_func_proto fn, int* arg)
9142	{
9143	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9144	bool has_size = fn->arg_size[arg] != `0`;
9145	bool is_next_size = false;
9146
9147	if (arg + `1` < ARRAY_SIZE(fn->arg_type))
9148	is_next_size = arg_type_is_mem_size(type: fn->arg_type[arg + `1`]);
9149
9150	if (base_type(type: fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9151	return is_next_size;
9152
9153	return has_size == is_next_size \|\| is_next_size == is_fixed;
9154	}
9155
9156	static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9157	{
9158	/ bpf_xxx(..., buf, len) call will access 'len'*
9159	* bytes from memory 'buf'. Both arg types need
9160	* to be paired, so make sure there's no buggy
9161	* helper function specification.
9162	*/
9163	if (arg_type_is_mem_size(type: fn->arg1_type) \|\|
9164	check_args_pair_invalid(fn, arg: `0`) \|\|
9165	check_args_pair_invalid(fn, arg: `1`) \|\|
9166	check_args_pair_invalid(fn, arg: `2`) \|\|
9167	check_args_pair_invalid(fn, arg: `3`) \|\|
9168	check_args_pair_invalid(fn, arg: `4`))
9169	return false;
9170
9171	return true;
9172	}
9173
9174	static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9175	{
9176	int i;
9177
9178	for (i = `0`; i < ARRAY_SIZE(fn->arg_type); i++) {
9179	if (base_type(type: fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9180	return !!fn->arg_btf_id[i];
9181	if (base_type(type: fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9182	return fn->arg_btf_id[i] == BPF_PTR_POISON;
9183	if (base_type(type: fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9184	/ arg_btf_id and arg_size are in a union. /
9185	(base_type(type: fn->arg_type[i]) != ARG_PTR_TO_MEM \|\|
9186	!(fn->arg_type[i] & MEM_FIXED_SIZE)))
9187	return false;
9188	}
9189
9190	return true;
9191	}
9192
9193	static int check_func_proto(const struct bpf_func_proto fn, int* func_id)
9194	{
9195	return check_raw_mode_ok(fn) &&
9196	check_arg_pair_ok(fn) &&
9197	check_btf_id_ok(fn) ? `0` : -EINVAL;
9198	}
9199
9200	/ Packet data might have moved, any old PTR_TO_PACKET[_META,_END]*
9201	* are now invalid, so turn them into unknown SCALAR_VALUE.
9202	*
9203	* This also applies to dynptr slices belonging to skb and xdp dynptrs,
9204	* since these slices point to packet data.
9205	*/
9206	static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9207	{
9208	struct bpf_func_state *state;
9209	struct bpf_reg_state *reg;
9210
9211	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9212	if (reg_is_pkt_pointer_any(reg) \|\| reg_is_dynptr_slice_pkt(reg))
9213	mark_reg_invalid(env, reg);
9214	}));
9215	}
9216
9217	enum {
9218	AT_PKT_END = -`1`,
9219	BEYOND_PKT_END = -`2`,
9220	};
9221
9222	static void mark_pkt_end(struct bpf_verifier_state vstate, int* regn, bool range_open)
9223	{
9224	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9225	struct bpf_reg_state *reg = &state->regs[regn];
9226
9227	if (reg->type != PTR_TO_PACKET)
9228	/ PTR_TO_PACKET_META is not supported yet /
9229	return;
9230
9231	/ The 'reg' is pkt > pkt_end or pkt >= pkt_end.*
9232	* How far beyond pkt_end it goes is unknown.
9233	* if (!range_open) it's the case of pkt >= pkt_end
9234	* if (range_open) it's the case of pkt > pkt_end
9235	* hence this pointer is at least 1 byte bigger than pkt_end
9236	*/
9237	if (range_open)
9238	reg->range = BEYOND_PKT_END;
9239	else
9240	reg->range = AT_PKT_END;
9241	}
9242
9243	/ The pointer with the specified id has released its reference to kernel*
9244	* resources. Identify all copies of the same pointer and clear the reference.
9245	*/
9246	static int release_reference(struct bpf_verifier_env *env,
9247	int ref_obj_id)
9248	{
9249	struct bpf_func_state *state;
9250	struct bpf_reg_state *reg;
9251	int err;
9252
9253	err = release_reference_state(state: cur_func(env), ptr_id: ref_obj_id);
9254	if (err)
9255	return err;
9256
9257	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9258	if (reg->ref_obj_id == ref_obj_id)
9259	mark_reg_invalid(env, reg);
9260	}));
9261
9262	return `0`;
9263	}
9264
9265	static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9266	{
9267	struct bpf_func_state *unused;
9268	struct bpf_reg_state *reg;
9269
9270	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9271	if (type_is_non_owning_ref(reg->type))
9272	mark_reg_invalid(env, reg);
9273	}));
9274	}
9275
9276	static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9277	struct bpf_reg_state *regs)
9278	{
9279	int i;
9280
9281	/ after the call registers r0 - r5 were scratched /
9282	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
9283	mark_reg_not_init(env, regs, regno: caller_saved[i]);
9284	__check_reg_arg(env, regs, regno: caller_saved[i], t: DST_OP_NO_MARK);
9285	}
9286	}
9287
9288	typedef int (set_callee_state_fn)(struct* bpf_verifier_env *env,
9289	struct bpf_func_state *caller,
9290	struct bpf_func_state *callee,
9291	int insn_idx);
9292
9293	static int set_callee_state(struct bpf_verifier_env *env,
9294	struct bpf_func_state *caller,
9295	struct bpf_func_state callee, int* insn_idx);
9296
9297	static int setup_func_entry(struct bpf_verifier_env env, int* subprog, int callsite,
9298	set_callee_state_fn set_callee_state_cb,
9299	struct bpf_verifier_state *state)
9300	{
9301	struct bpf_func_state caller, callee;
9302	int err;
9303
9304	if (state->curframe + `1` >= MAX_CALL_FRAMES) {
9305	verbose(private_data: env, fmt: "the call stack of %d frames is too deep\n",
9306	state->curframe + `2`);
9307	return -E2BIG;
9308	}
9309
9310	if (state->frame[state->curframe + `1`]) {
9311	verbose(private_data: env, fmt: "verifier bug. Frame %d already allocated\n",
9312	state->curframe + `1`);
9313	return -EFAULT;
9314	}
9315
9316	caller = state->frame[state->curframe];
9317	callee = kzalloc(size: sizeof(*callee), GFP_KERNEL);
9318	if (!callee)
9319	return -ENOMEM;
9320	state->frame[state->curframe + `1`] = callee;
9321
9322	/ callee cannot access r0, r6 - r9 for reading and has to write*
9323	* into its own stack before reading from it.
9324	* callee can read/write into caller's stack
9325	*/
9326	init_func_state(env, state: callee,
9327	/ remember the callsite, it will be used by bpf_exit /
9328	callsite,
9329	frameno: state->curframe + `1` / frameno within this callchain /,
9330	subprogno: subprog / subprog number within this prog /);
9331	/ Transfer references to the callee /
9332	err = copy_reference_state(dst: callee, src: caller);
9333	err = err ?: set_callee_state_cb(env, caller, callee, callsite);
9334	if (err)
9335	goto err_out;
9336
9337	/ only increment it after check_reg_arg() finished /
9338	state->curframe++;
9339
9340	return `0`;
9341
9342	err_out:
9343	free_func_state(state: callee);
9344	state->frame[state->curframe + `1`] = NULL;
9345	return err;
9346	}
9347
9348	static int btf_check_func_arg_match(struct bpf_verifier_env env, int* subprog,
9349	const struct btf *btf,
9350	struct bpf_reg_state *regs)
9351	{
9352	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9353	struct bpf_verifier_log *log = &env->log;
9354	u32 i;
9355	int ret;
9356
9357	ret = btf_prepare_func_args(env, subprog);
9358	if (ret)
9359	return ret;
9360
9361	/ check that BTF function arguments match actual types that the*
9362	* verifier sees.
9363	*/
9364	for (i = `0`; i < sub->arg_cnt; i++) {
9365	u32 regno = i + `1`;
9366	struct bpf_reg_state *reg = &regs[regno];
9367	struct bpf_subprog_arg_info *arg = &sub->args[i];
9368
9369	if (arg->arg_type == ARG_ANYTHING) {
9370	if (reg->type != SCALAR_VALUE) {
9371	bpf_log(log, fmt: "R%d is not a scalar\n", regno);
9372	return -EINVAL;
9373	}
9374	} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9375	ret = check_func_arg_reg_off(env, reg, regno, arg_type: ARG_DONTCARE);
9376	if (ret < `0`)
9377	return ret;
9378	/ If function expects ctx type in BTF check that caller*
9379	* is passing PTR_TO_CTX.
9380	*/
9381	if (reg->type != PTR_TO_CTX) {
9382	bpf_log(log, fmt: "arg#%d expects pointer to ctx\n", i);
9383	return -EINVAL;
9384	}
9385	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_MEM) {
9386	ret = check_func_arg_reg_off(env, reg, regno, arg_type: ARG_DONTCARE);
9387	if (ret < `0`)
9388	return ret;
9389	if (check_mem_reg(env, reg, regno, mem_size: arg->mem_size))
9390	return -EINVAL;
9391	if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9392	bpf_log(log, fmt: "arg#%d is expected to be non-NULL\n", i);
9393	return -EINVAL;
9394	}
9395	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_ARENA) {
9396	/*
9397	* Can pass any value and the kernel won't crash, but
9398	* only PTR_TO_ARENA or SCALAR make sense. Everything
9399	* else is a bug in the bpf program. Point it out to
9400	* the user at the verification time instead of
9401	* run-time debug nightmare.
9402	*/
9403	if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9404	bpf_log(log, fmt: "R%d is not a pointer to arena or scalar.\n", regno);
9405	return -EINVAL;
9406	}
9407	} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR \| MEM_RDONLY)) {
9408	ret = process_dynptr_func(env, regno, insn_idx: -`1`, arg_type: arg->arg_type, clone_ref_obj_id: `0`);
9409	if (ret)
9410	return ret;
9411	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9412	struct bpf_call_arg_meta meta;
9413	int err;
9414
9415	if (register_is_null(reg) && type_may_be_null(type: arg->arg_type))
9416	continue;
9417
9418	memset(&meta, `0`, sizeof(meta)); / leave func_id as zero /
9419	err = check_reg_type(env, regno, arg_type: arg->arg_type, arg_btf_id: &arg->btf_id, meta: &meta);
9420	err = err ?: check_func_arg_reg_off(env, reg, regno, arg_type: arg->arg_type);
9421	if (err)
9422	return err;
9423	} else {
9424	bpf_log(log, fmt: "verifier bug: unrecognized arg#%d type %d\n",
9425	i, arg->arg_type);
9426	return -EFAULT;
9427	}
9428	}
9429
9430	return `0`;
9431	}
9432
9433	/ Compare BTF of a function call with given bpf_reg_state.*
9434	* Returns:
9435	* EFAULT - there is a verifier bug. Abort verification.
9436	* EINVAL - there is a type mismatch or BTF is not available.
9437	* 0 - BTF matches with what bpf_reg_state expects.
9438	* Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9439	*/
9440	static int btf_check_subprog_call(struct bpf_verifier_env env, int* subprog,
9441	struct bpf_reg_state *regs)
9442	{
9443	struct bpf_prog *prog = env->prog;
9444	struct btf *btf = prog->aux->btf;
9445	u32 btf_id;
9446	int err;
9447
9448	if (!prog->aux->func_info)
9449	return -EINVAL;
9450
9451	btf_id = prog->aux->func_info[subprog].type_id;
9452	if (!btf_id)
9453	return -EFAULT;
9454
9455	if (prog->aux->func_info_aux[subprog].unreliable)
9456	return -EINVAL;
9457
9458	err = btf_check_func_arg_match(env, subprog, btf, regs);
9459	/ Compiler optimizations can remove arguments from static functions*
9460	* or mismatched type can be passed into a global function.
9461	* In such cases mark the function as unreliable from BTF point of view.
9462	*/
9463	if (err)
9464	prog->aux->func_info_aux[subprog].unreliable = true;
9465	return err;
9466	}
9467
9468	static int push_callback_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
9469	int insn_idx, int subprog,
9470	set_callee_state_fn set_callee_state_cb)
9471	{
9472	struct bpf_verifier_state state = env->cur_state, callback_state;
9473	struct bpf_func_state caller, callee;
9474	int err;
9475
9476	caller = state->frame[state->curframe];
9477	err = btf_check_subprog_call(env, subprog, regs: caller->regs);
9478	if (err == -EFAULT)
9479	return err;
9480
9481	/ set_callee_state is used for direct subprog calls, but we are*
9482	* interested in validating only BPF helpers that can call subprogs as
9483	* callbacks
9484	*/
9485	env->subprog_info[subprog].is_cb = true;
9486	if (bpf_pseudo_kfunc_call(insn) &&
9487	!is_sync_callback_calling_kfunc(btf_id: insn->imm)) {
9488	verbose(private_data: env, fmt: "verifier bug: kfunc %s#%d not marked as callback-calling\n",
9489	func_id_name(id: insn->imm), insn->imm);
9490	return -EFAULT;
9491	} else if (!bpf_pseudo_kfunc_call(insn) &&
9492	!is_callback_calling_function(func_id: insn->imm)) { / helper /
9493	verbose(private_data: env, fmt: "verifier bug: helper %s#%d not marked as callback-calling\n",
9494	func_id_name(id: insn->imm), insn->imm);
9495	return -EFAULT;
9496	}
9497
9498	if (is_async_callback_calling_insn(insn)) {
9499	struct bpf_verifier_state *async_cb;
9500
9501	/ there is no real recursion here. timer callbacks are async /
9502	env->subprog_info[subprog].is_async_cb = true;
9503	async_cb = push_async_cb(env, insn_idx: env->subprog_info[subprog].start,
9504	prev_insn_idx: insn_idx, subprog);
9505	if (!async_cb)
9506	return -EFAULT;
9507	callee = async_cb->frame[`0`];
9508	callee->async_entry_cnt = caller->async_entry_cnt + `1`;
9509
9510	/ Convert bpf_timer_set_callback() args into timer callback args /
9511	err = set_callee_state_cb(env, caller, callee, insn_idx);
9512	if (err)
9513	return err;
9514
9515	return `0`;
9516	}
9517
9518	/ for callback functions enqueue entry to callback and*
9519	* proceed with next instruction within current frame.
9520	*/
9521	callback_state = push_stack(env, insn_idx: env->subprog_info[subprog].start, prev_insn_idx: insn_idx, speculative: false);
9522	if (!callback_state)
9523	return -ENOMEM;
9524
9525	err = setup_func_entry(env, subprog, callsite: insn_idx, set_callee_state_cb,
9526	state: callback_state);
9527	if (err)
9528	return err;
9529
9530	callback_state->callback_unroll_depth++;
9531	callback_state->frame[callback_state->curframe - `1`]->callback_depth++;
9532	caller->callback_depth = `0`;
9533	return `0`;
9534	}
9535
9536	static int check_func_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
9537	int *insn_idx)
9538	{
9539	struct bpf_verifier_state *state = env->cur_state;
9540	struct bpf_func_state *caller;
9541	int err, subprog, target_insn;
9542
9543	target_insn = *insn_idx + insn->imm + `1`;
9544	subprog = find_subprog(env, off: target_insn);
9545	if (subprog < `0`) {
9546	verbose(private_data: env, fmt: "verifier bug. No program starts at insn %d\n", target_insn);
9547	return -EFAULT;
9548	}
9549
9550	caller = state->frame[state->curframe];
9551	err = btf_check_subprog_call(env, subprog, regs: caller->regs);
9552	if (err == -EFAULT)
9553	return err;
9554	if (subprog_is_global(env, subprog)) {
9555	const char *sub_name = subprog_name(env, subprog);
9556
9557	/ Only global subprogs cannot be called with a lock held. /
9558	if (env->cur_state->active_lock.ptr) {
9559	verbose(private_data: env, fmt: "global function calls are not allowed while holding a lock,\n"
9560	"use static function instead\n");
9561	return -EINVAL;
9562	}
9563
9564	if (err) {
9565	verbose(private_data: env, fmt: "Caller passes invalid args into func#%d ('%s')\n",
9566	subprog, sub_name);
9567	return err;
9568	}
9569
9570	verbose(private_data: env, fmt: "Func#%d ('%s') is global and assumed valid.\n",
9571	subprog, sub_name);
9572	/ mark global subprog for verifying after main prog /
9573	subprog_aux(env, subprog)->called = true;
9574	clear_caller_saved_regs(env, regs: caller->regs);
9575
9576	/ All global functions return a 64-bit SCALAR_VALUE /
9577	mark_reg_unknown(env, regs: caller->regs, regno: BPF_REG_0);
9578	caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9579
9580	/ continue with next insn after call /
9581	return `0`;
9582	}
9583
9584	/ for regular function entry setup new frame and continue*
9585	* from that frame.
9586	*/
9587	err = setup_func_entry(env, subprog, callsite: *insn_idx, set_callee_state_cb: set_callee_state, state);
9588	if (err)
9589	return err;
9590
9591	clear_caller_saved_regs(env, regs: caller->regs);
9592
9593	/ and go analyze first insn of the callee /
9594	*insn_idx = env->subprog_info[subprog].start - `1`;
9595
9596	if (env->log.level & BPF_LOG_LEVEL) {
9597	verbose(private_data: env, fmt: "caller:\n");
9598	print_verifier_state(env, state: caller, print_all: true);
9599	verbose(private_data: env, fmt: "callee:\n");
9600	print_verifier_state(env, state: state->frame[state->curframe], print_all: true);
9601	}
9602
9603	return `0`;
9604	}
9605
9606	int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9607	struct bpf_func_state *caller,
9608	struct bpf_func_state *callee)
9609	{
9610	/ bpf_for_each_map_elem(struct bpf_map map, void callback_fn,*
9611	* void *callback_ctx, u64 flags);
9612	* callback_fn(struct bpf_map map, void key, void *value,
9613	* void *callback_ctx);
9614	*/
9615	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9616
9617	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9618	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9619	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9620
9621	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9622	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_3]);
9623	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9624
9625	/ pointer to stack or null /
9626	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9627
9628	/ unused /
9629	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9630	return `0`;
9631	}
9632
9633	static int set_callee_state(struct bpf_verifier_env *env,
9634	struct bpf_func_state *caller,
9635	struct bpf_func_state callee, int* insn_idx)
9636	{
9637	int i;
9638
9639	/ copy r1 - r5 args that callee can access. The copy includes parent*
9640	* pointers, which connects us up to the liveness chain
9641	*/
9642	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9643	callee->regs[i] = caller->regs[i];
9644	return `0`;
9645	}
9646
9647	static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9648	struct bpf_func_state *caller,
9649	struct bpf_func_state *callee,
9650	int insn_idx)
9651	{
9652	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9653	struct bpf_map *map;
9654	int err;
9655
9656	if (bpf_map_ptr_poisoned(aux: insn_aux)) {
9657	verbose(private_data: env, fmt: "tail_call abusing map_ptr\n");
9658	return -EINVAL;
9659	}
9660
9661	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
9662	if (!map->ops->map_set_for_each_callback_args \|\|
9663	!map->ops->map_for_each_callback) {
9664	verbose(private_data: env, fmt: "callback function not allowed for map\n");
9665	return -ENOTSUPP;
9666	}
9667
9668	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9669	if (err)
9670	return err;
9671
9672	callee->in_callback_fn = true;
9673	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9674	return `0`;
9675	}
9676
9677	static int set_loop_callback_state(struct bpf_verifier_env *env,
9678	struct bpf_func_state *caller,
9679	struct bpf_func_state *callee,
9680	int insn_idx)
9681	{
9682	/ bpf_loop(u32 nr_loops, void callback_fn, void callback_ctx,*
9683	* u64 flags);
9684	* callback_fn(u32 index, void *callback_ctx);
9685	*/
9686	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9687	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9688
9689	/ unused /
9690	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9691	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9692	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9693
9694	callee->in_callback_fn = true;
9695	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9696	return `0`;
9697	}
9698
9699	static int set_timer_callback_state(struct bpf_verifier_env *env,
9700	struct bpf_func_state *caller,
9701	struct bpf_func_state *callee,
9702	int insn_idx)
9703	{
9704	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9705
9706	/ bpf_timer_set_callback(struct bpf_timer timer, void callback_fn);*
9707	* callback_fn(struct bpf_map map, void key, void *value);
9708	*/
9709	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9710	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_1]);
9711	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9712
9713	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9714	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9715	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9716
9717	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9718	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_3]);
9719	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9720
9721	/ unused /
9722	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9723	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9724	callee->in_async_callback_fn = true;
9725	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9726	return `0`;
9727	}
9728
9729	static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9730	struct bpf_func_state *caller,
9731	struct bpf_func_state *callee,
9732	int insn_idx)
9733	{
9734	/ bpf_find_vma(struct task_struct task, u64 addr,
9735	* void callback_fn, void callback_ctx, u64 flags)
9736	* (callback_fn)(struct task_struct *task,
9737	* struct vm_area_struct vma, void callback_ctx);
9738	*/
9739	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9740
9741	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9742	__mark_reg_known_zero(reg: &callee->regs[BPF_REG_2]);
9743	callee->regs[BPF_REG_2].btf = btf_vmlinux;
9744	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9745
9746	/ pointer to stack or null /
9747	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9748
9749	/ unused /
9750	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9751	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9752	callee->in_callback_fn = true;
9753	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9754	return `0`;
9755	}
9756
9757	static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9758	struct bpf_func_state *caller,
9759	struct bpf_func_state *callee,
9760	int insn_idx)
9761	{
9762	/ bpf_user_ringbuf_drain(struct bpf_map map, void callback_fn, void*
9763	* callback_ctx, u64 flags);
9764	* callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9765	*/
9766	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_0]);
9767	mark_dynptr_cb_reg(env, reg: &callee->regs[BPF_REG_1], type: BPF_DYNPTR_TYPE_LOCAL);
9768	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9769
9770	/ unused /
9771	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9772	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9773	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9774
9775	callee->in_callback_fn = true;
9776	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9777	return `0`;
9778	}
9779
9780	static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9781	struct bpf_func_state *caller,
9782	struct bpf_func_state *callee,
9783	int insn_idx)
9784	{
9785	/ void bpf_rbtree_add_impl(struct bpf_rb_root root, struct bpf_rb_node node,*
9786	* bool (less)(struct bpf_rb_node a, const struct bpf_rb_node b));
9787	*
9788	* 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9789	* that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9790	* by this point, so look at 'root'
9791	*/
9792	struct btf_field *field;
9793
9794	field = reg_find_field_offset(reg: &caller->regs[BPF_REG_1], off: caller->regs[BPF_REG_1].off,
9795	fields: BPF_RB_ROOT);
9796	if (!field \|\| !field->graph_root.value_btf_id)
9797	return -EFAULT;
9798
9799	mark_reg_graph_node(regs: callee->regs, regno: BPF_REG_1, ds_head: &field->graph_root);
9800	ref_set_non_owning(env, reg: &callee->regs[BPF_REG_1]);
9801	mark_reg_graph_node(regs: callee->regs, regno: BPF_REG_2, ds_head: &field->graph_root);
9802	ref_set_non_owning(env, reg: &callee->regs[BPF_REG_2]);
9803
9804	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_3]);
9805	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_4]);
9806	__mark_reg_not_init(env, reg: &callee->regs[BPF_REG_5]);
9807	callee->in_callback_fn = true;
9808	callee->callback_ret_range = retval_range(minval: `0`, maxval: `1`);
9809	return `0`;
9810	}
9811
9812	static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9813
9814	/ Are we currently verifying the callback for a rbtree helper that must*
9815	* be called with lock held? If so, no need to complain about unreleased
9816	* lock
9817	*/
9818	static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9819	{
9820	struct bpf_verifier_state *state = env->cur_state;
9821	struct bpf_insn *insn = env->prog->insnsi;
9822	struct bpf_func_state *callee;
9823	int kfunc_btf_id;
9824
9825	if (!state->curframe)
9826	return false;
9827
9828	callee = state->frame[state->curframe];
9829
9830	if (!callee->in_callback_fn)
9831	return false;
9832
9833	kfunc_btf_id = insn[callee->callsite].imm;
9834	return is_rbtree_lock_required_kfunc(btf_id: kfunc_btf_id);
9835	}
9836
9837	static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9838	{
9839	return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
9840	}
9841
9842	static int prepare_func_exit(struct bpf_verifier_env env, int* *insn_idx)
9843	{
9844	struct bpf_verifier_state state = env->cur_state, prev_st;
9845	struct bpf_func_state caller, callee;
9846	struct bpf_reg_state *r0;
9847	bool in_callback_fn;
9848	int err;
9849
9850	callee = state->frame[state->curframe];
9851	r0 = &callee->regs[BPF_REG_0];
9852	if (r0->type == PTR_TO_STACK) {
9853	/ technically it's ok to return caller's stack pointer*
9854	* (or caller's caller's pointer) back to the caller,
9855	* since these pointers are valid. Only current stack
9856	* pointer will be invalid as soon as function exits,
9857	* but let's be conservative
9858	*/
9859	verbose(private_data: env, fmt: "cannot return stack pointer to the caller\n");
9860	return -EINVAL;
9861	}
9862
9863	caller = state->frame[state->curframe - `1`];
9864	if (callee->in_callback_fn) {
9865	if (r0->type != SCALAR_VALUE) {
9866	verbose(private_data: env, fmt: "R0 not a scalar value\n");
9867	return -EACCES;
9868	}
9869
9870	/ we are going to rely on register's precise value /
9871	err = mark_reg_read(env, state: r0, parent: r0->parent, flag: REG_LIVE_READ64);
9872	err = err ?: mark_chain_precision(env, regno: BPF_REG_0);
9873	if (err)
9874	return err;
9875
9876	/ enforce R0 return value range /
9877	if (!retval_range_within(range: callee->callback_ret_range, reg: r0)) {
9878	verbose_invalid_scalar(env, reg: r0, range: callee->callback_ret_range,
9879	ctx: "At callback return", reg_name: "R0");
9880	return -EINVAL;
9881	}
9882	if (!calls_callback(env, insn_idx: callee->callsite)) {
9883	verbose(private_data: env, fmt: "BUG: in callback at %d, callsite %d !calls_callback\n",
9884	*insn_idx, callee->callsite);
9885	return -EFAULT;
9886	}
9887	} else {
9888	/ return to the caller whatever r0 had in the callee /
9889	caller->regs[BPF_REG_0] = *r0;
9890	}
9891
9892	/ callback_fn frame should have released its own additions to parent's*
9893	* reference state at this point, or check_reference_leak would
9894	* complain, hence it must be the same as the caller. There is no need
9895	* to copy it back.
9896	*/
9897	if (!callee->in_callback_fn) {
9898	/ Transfer references to the caller /
9899	err = copy_reference_state(dst: caller, src: callee);
9900	if (err)
9901	return err;
9902	}
9903
9904	/ for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,*
9905	* there function call logic would reschedule callback visit. If iteration
9906	* converges is_state_visited() would prune that visit eventually.
9907	*/
9908	in_callback_fn = callee->in_callback_fn;
9909	if (in_callback_fn)
9910	*insn_idx = callee->callsite;
9911	else
9912	*insn_idx = callee->callsite + `1`;
9913
9914	if (env->log.level & BPF_LOG_LEVEL) {
9915	verbose(private_data: env, fmt: "returning from callee:\n");
9916	print_verifier_state(env, state: callee, print_all: true);
9917	verbose(private_data: env, fmt: "to caller at %d:\n", *insn_idx);
9918	print_verifier_state(env, state: caller, print_all: true);
9919	}
9920	/ clear everything in the callee. In case of exceptional exits using*
9921	* bpf_throw, this will be done by copy_verifier_state for extra frames. */
9922	free_func_state(state: callee);
9923	state->frame[state->curframe--] = NULL;
9924
9925	/ for callbacks widen imprecise scalars to make programs like below verify:*
9926	*
9927	* struct ctx { int i; }
9928	* void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9929	* ...
9930	* struct ctx = { .i = 0; }
9931	* bpf_loop(100, cb, &ctx, 0);
9932	*
9933	* This is similar to what is done in process_iter_next_call() for open
9934	* coded iterators.
9935	*/
9936	prev_st = in_callback_fn ? find_prev_entry(env, cur: state, insn_idx: *insn_idx) : NULL;
9937	if (prev_st) {
9938	err = widen_imprecise_scalars(env, old: prev_st, cur: state);
9939	if (err)
9940	return err;
9941	}
9942	return `0`;
9943	}
9944
9945	static int do_refine_retval_range(struct bpf_verifier_env *env,
9946	struct bpf_reg_state regs, int* ret_type,
9947	int func_id,
9948	struct bpf_call_arg_meta *meta)
9949	{
9950	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9951
9952	if (ret_type != RET_INTEGER)
9953	return `0`;
9954
9955	switch (func_id) {
9956	case BPF_FUNC_get_stack:
9957	case BPF_FUNC_get_task_stack:
9958	case BPF_FUNC_probe_read_str:
9959	case BPF_FUNC_probe_read_kernel_str:
9960	case BPF_FUNC_probe_read_user_str:
9961	ret_reg->smax_value = meta->msize_max_value;
9962	ret_reg->s32_max_value = meta->msize_max_value;
9963	ret_reg->smin_value = -MAX_ERRNO;
9964	ret_reg->s32_min_value = -MAX_ERRNO;
9965	reg_bounds_sync(reg: ret_reg);
9966	break;
9967	case BPF_FUNC_get_smp_processor_id:
9968	ret_reg->umax_value = nr_cpu_ids - `1`;
9969	ret_reg->u32_max_value = nr_cpu_ids - `1`;
9970	ret_reg->smax_value = nr_cpu_ids - `1`;
9971	ret_reg->s32_max_value = nr_cpu_ids - `1`;
9972	ret_reg->umin_value = `0`;
9973	ret_reg->u32_min_value = `0`;
9974	ret_reg->smin_value = `0`;
9975	ret_reg->s32_min_value = `0`;
9976	reg_bounds_sync(reg: ret_reg);
9977	break;
9978	}
9979
9980	return reg_bounds_sanity_check(env, reg: ret_reg, ctx: "retval");
9981	}
9982
9983	static int
9984	record_func_map(struct bpf_verifier_env env, struct* bpf_call_arg_meta *meta,
9985	int func_id, int insn_idx)
9986	{
9987	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9988	struct bpf_map *map = meta->map_ptr;
9989
9990	if (func_id != BPF_FUNC_tail_call &&
9991	func_id != BPF_FUNC_map_lookup_elem &&
9992	func_id != BPF_FUNC_map_update_elem &&
9993	func_id != BPF_FUNC_map_delete_elem &&
9994	func_id != BPF_FUNC_map_push_elem &&
9995	func_id != BPF_FUNC_map_pop_elem &&
9996	func_id != BPF_FUNC_map_peek_elem &&
9997	func_id != BPF_FUNC_for_each_map_elem &&
9998	func_id != BPF_FUNC_redirect_map &&
9999	func_id != BPF_FUNC_map_lookup_percpu_elem)
10000	return `0`;
10001
10002	if (map == NULL) {
10003	verbose(private_data: env, fmt: "kernel subsystem misconfigured verifier\n");
10004	return -EINVAL;
10005	}
10006
10007	/ In case of read-only, some additional restrictions*
10008	* need to be applied in order to prevent altering the
10009	* state of the map from program side.
10010	*/
10011	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
10012	(func_id == BPF_FUNC_map_delete_elem \|\|
10013	func_id == BPF_FUNC_map_update_elem \|\|
10014	func_id == BPF_FUNC_map_push_elem \|\|
10015	func_id == BPF_FUNC_map_pop_elem)) {
10016	verbose(private_data: env, fmt: "write into map forbidden\n");
10017	return -EACCES;
10018	}
10019
10020	if (!BPF_MAP_PTR(aux->map_ptr_state))
10021	bpf_map_ptr_store(aux, map: meta->map_ptr,
10022	unpriv: !meta->map_ptr->bypass_spec_v1);
10023	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
10024	bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
10025	unpriv: !meta->map_ptr->bypass_spec_v1);
10026	return `0`;
10027	}
10028
10029	static int
10030	record_func_key(struct bpf_verifier_env env, struct* bpf_call_arg_meta *meta,
10031	int func_id, int insn_idx)
10032	{
10033	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10034	struct bpf_reg_state regs = cur_regs(env), reg;
10035	struct bpf_map *map = meta->map_ptr;
10036	u64 val, max;
10037	int err;
10038
10039	if (func_id != BPF_FUNC_tail_call)
10040	return `0`;
10041	if (!map \|\| map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
10042	verbose(private_data: env, fmt: "kernel subsystem misconfigured verifier\n");
10043	return -EINVAL;
10044	}
10045
10046	reg = &regs[BPF_REG_3];
10047	val = reg->var_off.value;
10048	max = map->max_entries;
10049
10050	if (!(is_reg_const(reg, subreg32: false) && val < max)) {
10051	bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10052	return `0`;
10053	}
10054
10055	err = mark_chain_precision(env, regno: BPF_REG_3);
10056	if (err)
10057	return err;
10058	if (bpf_map_key_unseen(aux))
10059	bpf_map_key_store(aux, state: val);
10060	else if (!bpf_map_key_poisoned(aux) &&
10061	bpf_map_key_immediate(aux) != val)
10062	bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10063	return `0`;
10064	}
10065
10066	static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
10067	{
10068	struct bpf_func_state *state = cur_func(env);
10069	bool refs_lingering = false;
10070	int i;
10071
10072	if (!exception_exit && state->frameno && !state->in_callback_fn)
10073	return `0`;
10074
10075	for (i = `0`; i < state->acquired_refs; i++) {
10076	if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
10077	continue;
10078	verbose(private_data: env, fmt: "Unreleased reference id=%d alloc_insn=%d\n",
10079	state->refs[i].id, state->refs[i].insn_idx);
10080	refs_lingering = true;
10081	}
10082	return refs_lingering ? -EINVAL : `0`;
10083	}
10084
10085	static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
10086	struct bpf_reg_state *regs)
10087	{
10088	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
10089	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
10090	struct bpf_map *fmt_map = fmt_reg->map_ptr;
10091	struct bpf_bprintf_data data = {};
10092	int err, fmt_map_off, num_args;
10093	u64 fmt_addr;
10094	char *fmt;
10095
10096	/ data must be an array of u64 /
10097	if (data_len_reg->var_off.value % `8`)
10098	return -EINVAL;
10099	num_args = data_len_reg->var_off.value / `8`;
10100
10101	/ fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const*
10102	* and map_direct_value_addr is set.
10103	*/
10104	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
10105	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10106	fmt_map_off);
10107	if (err) {
10108	verbose(private_data: env, fmt: "verifier bug\n");
10109	return -EFAULT;
10110	}
10111	fmt = (char )(long*)fmt_addr + fmt_map_off;
10112
10113	/ We are also guaranteed that fmt+fmt_map_off is NULL terminated, we*
10114	* can focus on validating the format specifiers.
10115	*/
10116	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, data: &data);
10117	if (err < `0`)
10118	verbose(private_data: env, fmt: "Invalid format string\n");
10119
10120	return err;
10121	}
10122
10123	static int check_get_func_ip(struct bpf_verifier_env *env)
10124	{
10125	enum bpf_prog_type type = resolve_prog_type(prog: env->prog);
10126	int func_id = BPF_FUNC_get_func_ip;
10127
10128	if (type == BPF_PROG_TYPE_TRACING) {
10129	if (!bpf_prog_has_trampoline(prog: env->prog)) {
10130	verbose(private_data: env, fmt: "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
10131	func_id_name(id: func_id), func_id);
10132	return -ENOTSUPP;
10133	}
10134	return `0`;
10135	} else if (type == BPF_PROG_TYPE_KPROBE) {
10136	return `0`;
10137	}
10138
10139	verbose(private_data: env, fmt: "func %s#%d not supported for program type %d\n",
10140	func_id_name(id: func_id), func_id, type);
10141	return -ENOTSUPP;
10142	}
10143
10144	static struct bpf_insn_aux_data cur_aux(struct* bpf_verifier_env *env)
10145	{
10146	return &env->insn_aux_data[env->insn_idx];
10147	}
10148
10149	static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10150	{
10151	struct bpf_reg_state *regs = cur_regs(env);
10152	struct bpf_reg_state *reg = &regs[BPF_REG_4];
10153	bool reg_is_null = register_is_null(reg);
10154
10155	if (reg_is_null)
10156	mark_chain_precision(env, regno: BPF_REG_4);
10157
10158	return reg_is_null;
10159	}
10160
10161	static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10162	{
10163	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10164
10165	if (!state->initialized) {
10166	state->initialized = `1`;
10167	state->fit_for_inline = loop_flag_is_zero(env);
10168	state->callback_subprogno = subprogno;
10169	return;
10170	}
10171
10172	if (!state->fit_for_inline)
10173	return;
10174
10175	state->fit_for_inline = (loop_flag_is_zero(env) &&
10176	state->callback_subprogno == subprogno);
10177	}
10178
10179	static int check_helper_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
10180	int *insn_idx_p)
10181	{
10182	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
10183	bool returns_cpu_specific_alloc_ptr = false;
10184	const struct bpf_func_proto *fn = NULL;
10185	enum bpf_return_type ret_type;
10186	enum bpf_type_flag ret_flag;
10187	struct bpf_reg_state *regs;
10188	struct bpf_call_arg_meta meta;
10189	int insn_idx = *insn_idx_p;
10190	bool changes_data;
10191	int i, err, func_id;
10192
10193	/ find function prototype /
10194	func_id = insn->imm;
10195	if (func_id < `0` \|\| func_id >= __BPF_FUNC_MAX_ID) {
10196	verbose(private_data: env, fmt: "invalid func %s#%d\n", func_id_name(id: func_id),
10197	func_id);
10198	return -EINVAL;
10199	}
10200
10201	if (env->ops->get_func_proto)
10202	fn = env->ops->get_func_proto(func_id, env->prog);
10203	if (!fn) {
10204	verbose(private_data: env, fmt: "unknown func %s#%d\n", func_id_name(id: func_id),
10205	func_id);
10206	return -EINVAL;
10207	}
10208
10209	/ eBPF programs must be GPL compatible to use GPL-ed functions /
10210	if (!env->prog->gpl_compatible && fn->gpl_only) {
10211	verbose(private_data: env, fmt: "cannot call GPL-restricted function from non-GPL compatible program\n");
10212	return -EINVAL;
10213	}
10214
10215	if (fn->allowed && !fn->allowed(env->prog)) {
10216	verbose(private_data: env, fmt: "helper call is not allowed in probe\n");
10217	return -EINVAL;
10218	}
10219
10220	if (!in_sleepable(env) && fn->might_sleep) {
10221	verbose(private_data: env, fmt: "helper call might sleep in a non-sleepable prog\n");
10222	return -EINVAL;
10223	}
10224
10225	/ With LD_ABS/IND some JITs save/restore skb from r1. /
10226	changes_data = bpf_helper_changes_pkt_data(func: fn->func);
10227	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10228	verbose(private_data: env, fmt: "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
10229	func_id_name(id: func_id), func_id);
10230	return -EINVAL;
10231	}
10232
10233	memset(&meta, `0`, sizeof(meta));
10234	meta.pkt_access = fn->pkt_access;
10235
10236	err = check_func_proto(fn, func_id);
10237	if (err) {
10238	verbose(private_data: env, fmt: "kernel subsystem misconfigured func %s#%d\n",
10239	func_id_name(id: func_id), func_id);
10240	return err;
10241	}
10242
10243	if (env->cur_state->active_rcu_lock) {
10244	if (fn->might_sleep) {
10245	verbose(private_data: env, fmt: "sleepable helper %s#%d in rcu_read_lock region\n",
10246	func_id_name(id: func_id), func_id);
10247	return -EINVAL;
10248	}
10249
10250	if (in_sleepable(env) && is_storage_get_function(func_id))
10251	env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
10252	}
10253
10254	meta.func_id = func_id;
10255	/ check args /
10256	for (i = `0`; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10257	err = check_func_arg(env, arg: i, meta: &meta, fn, insn_idx);
10258	if (err)
10259	return err;
10260	}
10261
10262	err = record_func_map(env, meta: &meta, func_id, insn_idx);
10263	if (err)
10264	return err;
10265
10266	err = record_func_key(env, meta: &meta, func_id, insn_idx);
10267	if (err)
10268	return err;
10269
10270	/ Mark slots with STACK_MISC in case of raw mode, stack offset*
10271	* is inferred from register state.
10272	*/
10273	for (i = `0`; i < meta.access_size; i++) {
10274	err = check_mem_access(env, insn_idx, regno: meta.regno, off: i, BPF_B,
10275	t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
10276	if (err)
10277	return err;
10278	}
10279
10280	regs = cur_regs(env);
10281
10282	if (meta.release_regno) {
10283	err = -EINVAL;
10284	/ This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot*
10285	* be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
10286	* is safe to do directly.
10287	*/
10288	if (arg_type_is_dynptr(type: fn->arg_type[meta.release_regno - BPF_REG_1])) {
10289	if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
10290	verbose(private_data: env, fmt: "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
10291	return -EFAULT;
10292	}
10293	err = unmark_stack_slots_dynptr(env, reg: &regs[meta.release_regno]);
10294	} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
10295	u32 ref_obj_id = meta.ref_obj_id;
10296	bool in_rcu = in_rcu_cs(env);
10297	struct bpf_func_state *state;
10298	struct bpf_reg_state *reg;
10299
10300	err = release_reference_state(state: cur_func(env), ptr_id: ref_obj_id);
10301	if (!err) {
10302	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10303	if (reg->ref_obj_id == ref_obj_id) {
10304	if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
10305	reg->ref_obj_id = `0`;
10306	reg->type &= ~MEM_ALLOC;
10307	reg->type \|= MEM_RCU;
10308	} else {
10309	mark_reg_invalid(env, reg);
10310	}
10311	}
10312	}));
10313	}
10314	} else if (meta.ref_obj_id) {
10315	err = release_reference(env, ref_obj_id: meta.ref_obj_id);
10316	} else if (register_is_null(reg: &regs[meta.release_regno])) {
10317	/ meta.ref_obj_id can only be 0 if register that is meant to be*
10318	* released is NULL, which must be > R0.
10319	*/
10320	err = `0`;
10321	}
10322	if (err) {
10323	verbose(private_data: env, fmt: "func %s#%d reference has not been acquired before\n",
10324	func_id_name(id: func_id), func_id);
10325	return err;
10326	}
10327	}
10328
10329	switch (func_id) {
10330	case BPF_FUNC_tail_call:
10331	err = check_reference_leak(env, exception_exit: false);
10332	if (err) {
10333	verbose(private_data: env, fmt: "tail_call would lead to reference leak\n");
10334	return err;
10335	}
10336	break;
10337	case BPF_FUNC_get_local_storage:
10338	/ check that flags argument in get_local_storage(map, flags) is 0,*
10339	* this is required because get_local_storage() can't return an error.
10340	*/
10341	if (!register_is_null(reg: &regs[BPF_REG_2])) {
10342	verbose(private_data: env, fmt: "get_local_storage() doesn't support non-zero flags\n");
10343	return -EINVAL;
10344	}
10345	break;
10346	case BPF_FUNC_for_each_map_elem:
10347	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
10348	set_callee_state_cb: set_map_elem_callback_state);
10349	break;
10350	case BPF_FUNC_timer_set_callback:
10351	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
10352	set_callee_state_cb: set_timer_callback_state);
10353	break;
10354	case BPF_FUNC_find_vma:
10355	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
10356	set_callee_state_cb: set_find_vma_callback_state);
10357	break;
10358	case BPF_FUNC_snprintf:
10359	err = check_bpf_snprintf_call(env, regs);
10360	break;
10361	case BPF_FUNC_loop:
10362	update_loop_inline_state(env, subprogno: meta.subprogno);
10363	/ Verifier relies on R1 value to determine if bpf_loop() iteration*
10364	* is finished, thus mark it precise.
10365	*/
10366	err = mark_chain_precision(env, regno: BPF_REG_1);
10367	if (err)
10368	return err;
10369	if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
10370	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
10371	set_callee_state_cb: set_loop_callback_state);
10372	} else {
10373	cur_func(env)->callback_depth = `0`;
10374	if (env->log.level & BPF_LOG_LEVEL2)
10375	verbose(private_data: env, fmt: "frame%d bpf_loop iteration limit reached\n",
10376	env->cur_state->curframe);
10377	}
10378	break;
10379	case BPF_FUNC_dynptr_from_mem:
10380	if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10381	verbose(private_data: env, fmt: "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10382	reg_type_str(env, type: regs[BPF_REG_1].type));
10383	return -EACCES;
10384	}
10385	break;
10386	case BPF_FUNC_set_retval:
10387	if (prog_type == BPF_PROG_TYPE_LSM &&
10388	env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10389	if (!env->prog->aux->attach_func_proto->type) {
10390	/ Make sure programs that attach to void*
10391	* hooks don't try to modify return value.
10392	*/
10393	verbose(private_data: env, fmt: "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10394	return -EINVAL;
10395	}
10396	}
10397	break;
10398	case BPF_FUNC_dynptr_data:
10399	{
10400	struct bpf_reg_state *reg;
10401	int id, ref_obj_id;
10402
10403	reg = get_dynptr_arg_reg(env, fn, regs);
10404	if (!reg)
10405	return -EFAULT;
10406
10407
10408	if (meta.dynptr_id) {
10409	verbose(private_data: env, fmt: "verifier internal error: meta.dynptr_id already set\n");
10410	return -EFAULT;
10411	}
10412	if (meta.ref_obj_id) {
10413	verbose(private_data: env, fmt: "verifier internal error: meta.ref_obj_id already set\n");
10414	return -EFAULT;
10415	}
10416
10417	id = dynptr_id(env, reg);
10418	if (id < `0`) {
10419	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr id\n");
10420	return id;
10421	}
10422
10423	ref_obj_id = dynptr_ref_obj_id(env, reg);
10424	if (ref_obj_id < `0`) {
10425	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr ref_obj_id\n");
10426	return ref_obj_id;
10427	}
10428
10429	meta.dynptr_id = id;
10430	meta.ref_obj_id = ref_obj_id;
10431
10432	break;
10433	}
10434	case BPF_FUNC_dynptr_write:
10435	{
10436	enum bpf_dynptr_type dynptr_type;
10437	struct bpf_reg_state *reg;
10438
10439	reg = get_dynptr_arg_reg(env, fn, regs);
10440	if (!reg)
10441	return -EFAULT;
10442
10443	dynptr_type = dynptr_get_type(env, reg);
10444	if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10445	return -EFAULT;
10446
10447	if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
10448	/ this will trigger clear_all_pkt_pointers(), which will*
10449	* invalidate all dynptr slices associated with the skb
10450	*/
10451	changes_data = true;
10452
10453	break;
10454	}
10455	case BPF_FUNC_per_cpu_ptr:
10456	case BPF_FUNC_this_cpu_ptr:
10457	{
10458	struct bpf_reg_state *reg = &regs[BPF_REG_1];
10459	const struct btf_type *type;
10460
10461	if (reg->type & MEM_RCU) {
10462	type = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
10463	if (!type \|\| !btf_type_is_struct(t: type)) {
10464	verbose(private_data: env, fmt: "Helper has invalid btf/btf_id in R1\n");
10465	return -EFAULT;
10466	}
10467	returns_cpu_specific_alloc_ptr = true;
10468	env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10469	}
10470	break;
10471	}
10472	case BPF_FUNC_user_ringbuf_drain:
10473	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
10474	set_callee_state_cb: set_user_ringbuf_callback_state);
10475	break;
10476	}
10477
10478	if (err)
10479	return err;
10480
10481	/ reset caller saved regs /
10482	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
10483	mark_reg_not_init(env, regs, regno: caller_saved[i]);
10484	check_reg_arg(env, regno: caller_saved[i], t: DST_OP_NO_MARK);
10485	}
10486
10487	/ helper call returns 64-bit value. /
10488	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10489
10490	/ update return register (already marked as written above) /
10491	ret_type = fn->ret_type;
10492	ret_flag = type_flag(type: ret_type);
10493
10494	switch (base_type(type: ret_type)) {
10495	case RET_INTEGER:
10496	/ sets type to SCALAR_VALUE /
10497	mark_reg_unknown(env, regs, regno: BPF_REG_0);
10498	break;
10499	case RET_VOID:
10500	regs[BPF_REG_0].type = NOT_INIT;
10501	break;
10502	case RET_PTR_TO_MAP_VALUE:
10503	/ There is no offset yet applied, variable or fixed /
10504	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10505	/ remember map_ptr, so that check_map_access()*
10506	* can check 'value_size' boundary of memory access
10507	* to map element returned from bpf_map_lookup_elem()
10508	*/
10509	if (meta.map_ptr == NULL) {
10510	verbose(private_data: env,
10511	fmt: "kernel subsystem misconfigured verifier\n");
10512	return -EINVAL;
10513	}
10514	regs[BPF_REG_0].map_ptr = meta.map_ptr;
10515	regs[BPF_REG_0].map_uid = meta.map_uid;
10516	regs[BPF_REG_0].type = PTR_TO_MAP_VALUE \| ret_flag;
10517	if (!type_may_be_null(type: ret_type) &&
10518	btf_record_has_field(rec: meta.map_ptr->record, type: BPF_SPIN_LOCK)) {
10519	regs[BPF_REG_0].id = ++env->id_gen;
10520	}
10521	break;
10522	case RET_PTR_TO_SOCKET:
10523	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10524	regs[BPF_REG_0].type = PTR_TO_SOCKET \| ret_flag;
10525	break;
10526	case RET_PTR_TO_SOCK_COMMON:
10527	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10528	regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON \| ret_flag;
10529	break;
10530	case RET_PTR_TO_TCP_SOCK:
10531	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10532	regs[BPF_REG_0].type = PTR_TO_TCP_SOCK \| ret_flag;
10533	break;
10534	case RET_PTR_TO_MEM:
10535	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10536	regs[BPF_REG_0].type = PTR_TO_MEM \| ret_flag;
10537	regs[BPF_REG_0].mem_size = meta.mem_size;
10538	break;
10539	case RET_PTR_TO_MEM_OR_BTF_ID:
10540	{
10541	const struct btf_type *t;
10542
10543	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10544	t = btf_type_skip_modifiers(btf: meta.ret_btf, id: meta.ret_btf_id, NULL);
10545	if (!btf_type_is_struct(t)) {
10546	u32 tsize;
10547	const struct btf_type *ret;
10548	const char *tname;
10549
10550	/ resolve the type size of ksym. /
10551	ret = btf_resolve_size(btf: meta.ret_btf, type: t, type_size: &tsize);
10552	if (IS_ERR(ptr: ret)) {
10553	tname = btf_name_by_offset(btf: meta.ret_btf, offset: t->name_off);
10554	verbose(private_data: env, fmt: "unable to resolve the size of type '%s': %ld\n",
10555	tname, PTR_ERR(ptr: ret));
10556	return -EINVAL;
10557	}
10558	regs[BPF_REG_0].type = PTR_TO_MEM \| ret_flag;
10559	regs[BPF_REG_0].mem_size = tsize;
10560	} else {
10561	if (returns_cpu_specific_alloc_ptr) {
10562	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC \| MEM_RCU;
10563	} else {
10564	/ MEM_RDONLY may be carried from ret_flag, but it*
10565	* doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10566	* it will confuse the check of PTR_TO_BTF_ID in
10567	* check_mem_access().
10568	*/
10569	ret_flag &= ~MEM_RDONLY;
10570	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| ret_flag;
10571	}
10572
10573	regs[BPF_REG_0].btf = meta.ret_btf;
10574	regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10575	}
10576	break;
10577	}
10578	case RET_PTR_TO_BTF_ID:
10579	{
10580	struct btf *ret_btf;
10581	int ret_btf_id;
10582
10583	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
10584	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| ret_flag;
10585	if (func_id == BPF_FUNC_kptr_xchg) {
10586	ret_btf = meta.kptr_field->kptr.btf;
10587	ret_btf_id = meta.kptr_field->kptr.btf_id;
10588	if (!btf_is_kernel(btf: ret_btf)) {
10589	regs[BPF_REG_0].type \|= MEM_ALLOC;
10590	if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10591	regs[BPF_REG_0].type \|= MEM_PERCPU;
10592	}
10593	} else {
10594	if (fn->ret_btf_id == BPF_PTR_POISON) {
10595	verbose(private_data: env, fmt: "verifier internal error:");
10596	verbose(private_data: env, fmt: "func %s has non-overwritten BPF_PTR_POISON return type\n",
10597	func_id_name(id: func_id));
10598	return -EINVAL;
10599	}
10600	ret_btf = btf_vmlinux;
10601	ret_btf_id = *fn->ret_btf_id;
10602	}
10603	if (ret_btf_id == `0`) {
10604	verbose(private_data: env, fmt: "invalid return type %u of func %s#%d\n",
10605	base_type(type: ret_type), func_id_name(id: func_id),
10606	func_id);
10607	return -EINVAL;
10608	}
10609	regs[BPF_REG_0].btf = ret_btf;
10610	regs[BPF_REG_0].btf_id = ret_btf_id;
10611	break;
10612	}
10613	default:
10614	verbose(private_data: env, fmt: "unknown return type %u of func %s#%d\n",
10615	base_type(type: ret_type), func_id_name(id: func_id), func_id);
10616	return -EINVAL;
10617	}
10618
10619	if (type_may_be_null(type: regs[BPF_REG_0].type))
10620	regs[BPF_REG_0].id = ++env->id_gen;
10621
10622	if (helper_multiple_ref_obj_use(func_id, map: meta.map_ptr)) {
10623	verbose(private_data: env, fmt: "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
10624	func_id_name(id: func_id), func_id);
10625	return -EFAULT;
10626	}
10627
10628	if (is_dynptr_ref_function(func_id))
10629	regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10630
10631	if (is_ptr_cast_function(func_id) \|\| is_dynptr_ref_function(func_id)) {
10632	/ For release_reference() /
10633	regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10634	} else if (is_acquire_function(func_id, map: meta.map_ptr)) {
10635	int id = acquire_reference_state(env, insn_idx);
10636
10637	if (id < `0`)
10638	return id;
10639	/ For mark_ptr_or_null_reg() /
10640	regs[BPF_REG_0].id = id;
10641	/ For release_reference() /
10642	regs[BPF_REG_0].ref_obj_id = id;
10643	}
10644
10645	err = do_refine_retval_range(env, regs, ret_type: fn->ret_type, func_id, meta: &meta);
10646	if (err)
10647	return err;
10648
10649	err = check_map_func_compatibility(env, map: meta.map_ptr, func_id);
10650	if (err)
10651	return err;
10652
10653	if ((func_id == BPF_FUNC_get_stack \|\|
10654	func_id == BPF_FUNC_get_task_stack) &&
10655	!env->prog->has_callchain_buf) {
10656	const char *err_str;
10657
10658	#ifdef CONFIG_PERF_EVENTS
10659	err = get_callchain_buffers(max_stack: sysctl_perf_event_max_stack);
10660	err_str = "cannot get callchain buffer for func %s#%d\n";
10661	#else
10662	err = -ENOTSUPP;
10663	err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10664	#endif
10665	if (err) {
10666	verbose(private_data: env, fmt: err_str, func_id_name(id: func_id), func_id);
10667	return err;
10668	}
10669
10670	env->prog->has_callchain_buf = true;
10671	}
10672
10673	if (func_id == BPF_FUNC_get_stackid \|\| func_id == BPF_FUNC_get_stack)
10674	env->prog->call_get_stack = true;
10675
10676	if (func_id == BPF_FUNC_get_func_ip) {
10677	if (check_get_func_ip(env))
10678	return -ENOTSUPP;
10679	env->prog->call_get_func_ip = true;
10680	}
10681
10682	if (changes_data)
10683	clear_all_pkt_pointers(env);
10684	return `0`;
10685	}
10686
10687	/ mark_btf_func_reg_size() is used when the reg size is determined by*
10688	* the BTF func_proto's return value size and argument.
10689	*/
10690	static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10691	size_t reg_size)
10692	{
10693	struct bpf_reg_state *reg = &cur_regs(env)[regno];
10694
10695	if (regno == BPF_REG_0) {
10696	/ Function return value /
10697	reg->live \|= REG_LIVE_WRITTEN;
10698	reg->subreg_def = reg_size == sizeof(u64) ?
10699	DEF_NOT_SUBREG : env->insn_idx + `1`;
10700	} else {
10701	/ Function argument /
10702	if (reg_size == sizeof(u64)) {
10703	mark_insn_zext(env, reg);
10704	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ64);
10705	} else {
10706	mark_reg_read(env, state: reg, parent: reg->parent, flag: REG_LIVE_READ32);
10707	}
10708	}
10709	}
10710
10711	static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10712	{
10713	return meta->kfunc_flags & KF_ACQUIRE;
10714	}
10715
10716	static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10717	{
10718	return meta->kfunc_flags & KF_RELEASE;
10719	}
10720
10721	static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
10722	{
10723	return (meta->kfunc_flags & KF_TRUSTED_ARGS) \|\| is_kfunc_release(meta);
10724	}
10725
10726	static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
10727	{
10728	return meta->kfunc_flags & KF_SLEEPABLE;
10729	}
10730
10731	static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10732	{
10733	return meta->kfunc_flags & KF_DESTRUCTIVE;
10734	}
10735
10736	static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10737	{
10738	return meta->kfunc_flags & KF_RCU;
10739	}
10740
10741	static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10742	{
10743	return meta->kfunc_flags & KF_RCU_PROTECTED;
10744	}
10745
10746	static bool is_kfunc_arg_mem_size(const struct btf *btf,
10747	const struct btf_param *arg,
10748	const struct bpf_reg_state *reg)
10749	{
10750	const struct btf_type *t;
10751
10752	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10753	if (!btf_type_is_scalar(t) \|\| reg->type != SCALAR_VALUE)
10754	return false;
10755
10756	return btf_param_match_suffix(btf, arg, suffix: "__sz");
10757	}
10758
10759	static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10760	const struct btf_param *arg,
10761	const struct bpf_reg_state *reg)
10762	{
10763	const struct btf_type *t;
10764
10765	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10766	if (!btf_type_is_scalar(t) \|\| reg->type != SCALAR_VALUE)
10767	return false;
10768
10769	return btf_param_match_suffix(btf, arg, suffix: "__szk");
10770	}
10771
10772	static bool is_kfunc_arg_optional(const struct btf btf, const* struct btf_param *arg)
10773	{
10774	return btf_param_match_suffix(btf, arg, suffix: "__opt");
10775	}
10776
10777	static bool is_kfunc_arg_constant(const struct btf btf, const* struct btf_param *arg)
10778	{
10779	return btf_param_match_suffix(btf, arg, suffix: "__k");
10780	}
10781
10782	static bool is_kfunc_arg_ignore(const struct btf btf, const* struct btf_param *arg)
10783	{
10784	return btf_param_match_suffix(btf, arg, suffix: "__ign");
10785	}
10786
10787	static bool is_kfunc_arg_map(const struct btf btf, const* struct btf_param *arg)
10788	{
10789	return btf_param_match_suffix(btf, arg, suffix: "__map");
10790	}
10791
10792	static bool is_kfunc_arg_alloc_obj(const struct btf btf, const* struct btf_param *arg)
10793	{
10794	return btf_param_match_suffix(btf, arg, suffix: "__alloc");
10795	}
10796
10797	static bool is_kfunc_arg_uninit(const struct btf btf, const* struct btf_param *arg)
10798	{
10799	return btf_param_match_suffix(btf, arg, suffix: "__uninit");
10800	}
10801
10802	static bool is_kfunc_arg_refcounted_kptr(const struct btf btf, const* struct btf_param *arg)
10803	{
10804	return btf_param_match_suffix(btf, arg, suffix: "__refcounted_kptr");
10805	}
10806
10807	static bool is_kfunc_arg_nullable(const struct btf btf, const* struct btf_param *arg)
10808	{
10809	return btf_param_match_suffix(btf, arg, suffix: "__nullable");
10810	}
10811
10812	static bool is_kfunc_arg_const_str(const struct btf btf, const* struct btf_param *arg)
10813	{
10814	return btf_param_match_suffix(btf, arg, suffix: "__str");
10815	}
10816
10817	static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10818	const struct btf_param *arg,
10819	const char *name)
10820	{
10821	int len, target_len = strlen(name);
10822	const char *param_name;
10823
10824	param_name = btf_name_by_offset(btf, offset: arg->name_off);
10825	if (str_is_empty(s: param_name))
10826	return false;
10827	len = strlen(param_name);
10828	if (len != target_len)
10829	return false;
10830	if (strcmp(param_name, name))
10831	return false;
10832
10833	return true;
10834	}
10835
10836	enum {
10837	KF_ARG_DYNPTR_ID,
10838	KF_ARG_LIST_HEAD_ID,
10839	KF_ARG_LIST_NODE_ID,
10840	KF_ARG_RB_ROOT_ID,
10841	KF_ARG_RB_NODE_ID,
10842	};
10843
10844	BTF_ID_LIST(kf_arg_btf_ids)
10845	BTF_ID(struct, bpf_dynptr_kern)
10846	BTF_ID(struct, bpf_list_head)
10847	BTF_ID(struct, bpf_list_node)
10848	BTF_ID(struct, bpf_rb_root)
10849	BTF_ID(struct, bpf_rb_node)
10850
10851	static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10852	const struct btf_param arg, int* type)
10853	{
10854	const struct btf_type *t;
10855	u32 res_id;
10856
10857	t = btf_type_skip_modifiers(btf, id: arg->type, NULL);
10858	if (!t)
10859	return false;
10860	if (!btf_type_is_ptr(t))
10861	return false;
10862	t = btf_type_skip_modifiers(btf, id: t->type, res_id: &res_id);
10863	if (!t)
10864	return false;
10865	return btf_types_are_same(btf1: btf, id1: res_id, btf2: btf_vmlinux, id2: kf_arg_btf_ids[type]);
10866	}
10867
10868	static bool is_kfunc_arg_dynptr(const struct btf btf, const* struct btf_param *arg)
10869	{
10870	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_DYNPTR_ID);
10871	}
10872
10873	static bool is_kfunc_arg_list_head(const struct btf btf, const* struct btf_param *arg)
10874	{
10875	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_LIST_HEAD_ID);
10876	}
10877
10878	static bool is_kfunc_arg_list_node(const struct btf btf, const* struct btf_param *arg)
10879	{
10880	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_LIST_NODE_ID);
10881	}
10882
10883	static bool is_kfunc_arg_rbtree_root(const struct btf btf, const* struct btf_param *arg)
10884	{
10885	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_RB_ROOT_ID);
10886	}
10887
10888	static bool is_kfunc_arg_rbtree_node(const struct btf btf, const* struct btf_param *arg)
10889	{
10890	return __is_kfunc_ptr_arg_type(btf, arg, type: KF_ARG_RB_NODE_ID);
10891	}
10892
10893	static bool is_kfunc_arg_callback(struct bpf_verifier_env env, const* struct btf *btf,
10894	const struct btf_param *arg)
10895	{
10896	const struct btf_type *t;
10897
10898	t = btf_type_resolve_func_ptr(btf, id: arg->type, NULL);
10899	if (!t)
10900	return false;
10901
10902	return true;
10903	}
10904
10905	/ Returns true if struct is composed of scalars, 4 levels of nesting allowed /
10906	static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10907	const struct btf *btf,
10908	const struct btf_type t, int* rec)
10909	{
10910	const struct btf_type *member_type;
10911	const struct btf_member *member;
10912	u32 i;
10913
10914	if (!btf_type_is_struct(t))
10915	return false;
10916
10917	for_each_member(i, t, member) {
10918	const struct btf_array *array;
10919
10920	member_type = btf_type_skip_modifiers(btf, id: member->type, NULL);
10921	if (btf_type_is_struct(t: member_type)) {
10922	if (rec >= `3`) {
10923	verbose(private_data: env, fmt: "max struct nesting depth exceeded\n");
10924	return false;
10925	}
10926	if (!__btf_type_is_scalar_struct(env, btf, t: member_type, rec: rec + `1`))
10927	return false;
10928	continue;
10929	}
10930	if (btf_type_is_array(t: member_type)) {
10931	array = btf_array(t: member_type);
10932	if (!array->nelems)
10933	return false;
10934	member_type = btf_type_skip_modifiers(btf, id: array->type, NULL);
10935	if (!btf_type_is_scalar(t: member_type))
10936	return false;
10937	continue;
10938	}
10939	if (!btf_type_is_scalar(t: member_type))
10940	return false;
10941	}
10942	return true;
10943	}
10944
10945	enum kfunc_ptr_arg_type {
10946	KF_ARG_PTR_TO_CTX,
10947	KF_ARG_PTR_TO_ALLOC_BTF_ID, / Allocated object /
10948	KF_ARG_PTR_TO_REFCOUNTED_KPTR, / Refcounted local kptr /
10949	KF_ARG_PTR_TO_DYNPTR,
10950	KF_ARG_PTR_TO_ITER,
10951	KF_ARG_PTR_TO_LIST_HEAD,
10952	KF_ARG_PTR_TO_LIST_NODE,
10953	KF_ARG_PTR_TO_BTF_ID, / Also covers reg2btf_ids conversions /
10954	KF_ARG_PTR_TO_MEM,
10955	KF_ARG_PTR_TO_MEM_SIZE, / Size derived from next argument, skip it /
10956	KF_ARG_PTR_TO_CALLBACK,
10957	KF_ARG_PTR_TO_RB_ROOT,
10958	KF_ARG_PTR_TO_RB_NODE,
10959	KF_ARG_PTR_TO_NULL,
10960	KF_ARG_PTR_TO_CONST_STR,
10961	KF_ARG_PTR_TO_MAP,
10962	};
10963
10964	enum special_kfunc_type {
10965	KF_bpf_obj_new_impl,
10966	KF_bpf_obj_drop_impl,
10967	KF_bpf_refcount_acquire_impl,
10968	KF_bpf_list_push_front_impl,
10969	KF_bpf_list_push_back_impl,
10970	KF_bpf_list_pop_front,
10971	KF_bpf_list_pop_back,
10972	KF_bpf_cast_to_kern_ctx,
10973	KF_bpf_rdonly_cast,
10974	KF_bpf_rcu_read_lock,
10975	KF_bpf_rcu_read_unlock,
10976	KF_bpf_rbtree_remove,
10977	KF_bpf_rbtree_add_impl,
10978	KF_bpf_rbtree_first,
10979	KF_bpf_dynptr_from_skb,
10980	KF_bpf_dynptr_from_xdp,
10981	KF_bpf_dynptr_slice,
10982	KF_bpf_dynptr_slice_rdwr,
10983	KF_bpf_dynptr_clone,
10984	KF_bpf_percpu_obj_new_impl,
10985	KF_bpf_percpu_obj_drop_impl,
10986	KF_bpf_throw,
10987	KF_bpf_iter_css_task_new,
10988	};
10989
10990	BTF_SET_START(special_kfunc_set)
10991	BTF_ID(func, bpf_obj_new_impl)
10992	BTF_ID(func, bpf_obj_drop_impl)
10993	BTF_ID(func, bpf_refcount_acquire_impl)
10994	BTF_ID(func, bpf_list_push_front_impl)
10995	BTF_ID(func, bpf_list_push_back_impl)
10996	BTF_ID(func, bpf_list_pop_front)
10997	BTF_ID(func, bpf_list_pop_back)
10998	BTF_ID(func, bpf_cast_to_kern_ctx)
10999	BTF_ID(func, bpf_rdonly_cast)
11000	BTF_ID(func, bpf_rbtree_remove)
11001	BTF_ID(func, bpf_rbtree_add_impl)
11002	BTF_ID(func, bpf_rbtree_first)
11003	BTF_ID(func, bpf_dynptr_from_skb)
11004	BTF_ID(func, bpf_dynptr_from_xdp)
11005	BTF_ID(func, bpf_dynptr_slice)
11006	BTF_ID(func, bpf_dynptr_slice_rdwr)
11007	BTF_ID(func, bpf_dynptr_clone)
11008	BTF_ID(func, bpf_percpu_obj_new_impl)
11009	BTF_ID(func, bpf_percpu_obj_drop_impl)
11010	BTF_ID(func, bpf_throw)
11011	#ifdef CONFIG_CGROUPS
11012	BTF_ID(func, bpf_iter_css_task_new)
11013	#endif
11014	BTF_SET_END(special_kfunc_set)
11015
11016	BTF_ID_LIST(special_kfunc_list)
11017	BTF_ID(func, bpf_obj_new_impl)
11018	BTF_ID(func, bpf_obj_drop_impl)
11019	BTF_ID(func, bpf_refcount_acquire_impl)
11020	BTF_ID(func, bpf_list_push_front_impl)
11021	BTF_ID(func, bpf_list_push_back_impl)
11022	BTF_ID(func, bpf_list_pop_front)
11023	BTF_ID(func, bpf_list_pop_back)
11024	BTF_ID(func, bpf_cast_to_kern_ctx)
11025	BTF_ID(func, bpf_rdonly_cast)
11026	BTF_ID(func, bpf_rcu_read_lock)
11027	BTF_ID(func, bpf_rcu_read_unlock)
11028	BTF_ID(func, bpf_rbtree_remove)
11029	BTF_ID(func, bpf_rbtree_add_impl)
11030	BTF_ID(func, bpf_rbtree_first)
11031	BTF_ID(func, bpf_dynptr_from_skb)
11032	BTF_ID(func, bpf_dynptr_from_xdp)
11033	BTF_ID(func, bpf_dynptr_slice)
11034	BTF_ID(func, bpf_dynptr_slice_rdwr)
11035	BTF_ID(func, bpf_dynptr_clone)
11036	BTF_ID(func, bpf_percpu_obj_new_impl)
11037	BTF_ID(func, bpf_percpu_obj_drop_impl)
11038	BTF_ID(func, bpf_throw)
11039	#ifdef CONFIG_CGROUPS
11040	BTF_ID(func, bpf_iter_css_task_new)
11041	#else
11042	BTF_ID_UNUSED
11043	#endif
11044
11045	static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11046	{
11047	if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
11048	meta->arg_owning_ref) {
11049	return false;
11050	}
11051
11052	return meta->kfunc_flags & KF_RET_NULL;
11053	}
11054
11055	static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11056	{
11057	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11058	}
11059
11060	static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11061	{
11062	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11063	}
11064
11065	static enum kfunc_ptr_arg_type
11066	get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
11067	struct bpf_kfunc_call_arg_meta *meta,
11068	const struct btf_type t, const* struct btf_type *ref_t,
11069	const char ref_tname, const* struct btf_param *args,
11070	int argno, int nargs)
11071	{
11072	u32 regno = argno + `1`;
11073	struct bpf_reg_state *regs = cur_regs(env);
11074	struct bpf_reg_state *reg = &regs[regno];
11075	bool arg_mem_size = false;
11076
11077	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
11078	return KF_ARG_PTR_TO_CTX;
11079
11080	/ In this function, we verify the kfunc's BTF as per the argument type,*
11081	* leaving the rest of the verification with respect to the register
11082	* type to our caller. When a set of conditions hold in the BTF type of
11083	* arguments, we resolve it to a known kfunc_ptr_arg_type.
11084	*/
11085	if (btf_is_prog_ctx_type(log: &env->log, btf: meta->btf, t, prog_type: resolve_prog_type(prog: env->prog), arg: argno))
11086	return KF_ARG_PTR_TO_CTX;
11087
11088	if (is_kfunc_arg_alloc_obj(btf: meta->btf, arg: &args[argno]))
11089	return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11090
11091	if (is_kfunc_arg_refcounted_kptr(btf: meta->btf, arg: &args[argno]))
11092	return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11093
11094	if (is_kfunc_arg_dynptr(btf: meta->btf, arg: &args[argno]))
11095	return KF_ARG_PTR_TO_DYNPTR;
11096
11097	if (is_kfunc_arg_iter(meta, arg: argno))
11098	return KF_ARG_PTR_TO_ITER;
11099
11100	if (is_kfunc_arg_list_head(btf: meta->btf, arg: &args[argno]))
11101	return KF_ARG_PTR_TO_LIST_HEAD;
11102
11103	if (is_kfunc_arg_list_node(btf: meta->btf, arg: &args[argno]))
11104	return KF_ARG_PTR_TO_LIST_NODE;
11105
11106	if (is_kfunc_arg_rbtree_root(btf: meta->btf, arg: &args[argno]))
11107	return KF_ARG_PTR_TO_RB_ROOT;
11108
11109	if (is_kfunc_arg_rbtree_node(btf: meta->btf, arg: &args[argno]))
11110	return KF_ARG_PTR_TO_RB_NODE;
11111
11112	if (is_kfunc_arg_const_str(btf: meta->btf, arg: &args[argno]))
11113	return KF_ARG_PTR_TO_CONST_STR;
11114
11115	if (is_kfunc_arg_map(btf: meta->btf, arg: &args[argno]))
11116	return KF_ARG_PTR_TO_MAP;
11117
11118	if ((base_type(type: reg->type) == PTR_TO_BTF_ID \|\| reg2btf_ids[base_type(type: reg->type)])) {
11119	if (!btf_type_is_struct(t: ref_t)) {
11120	verbose(private_data: env, fmt: "kernel function %s args#%d pointer type %s %s is not supported\n",
11121	meta->func_name, argno, btf_type_str(t: ref_t), ref_tname);
11122	return -EINVAL;
11123	}
11124	return KF_ARG_PTR_TO_BTF_ID;
11125	}
11126
11127	if (is_kfunc_arg_callback(env, btf: meta->btf, arg: &args[argno]))
11128	return KF_ARG_PTR_TO_CALLBACK;
11129
11130	if (is_kfunc_arg_nullable(btf: meta->btf, arg: &args[argno]) && register_is_null(reg))
11131	return KF_ARG_PTR_TO_NULL;
11132
11133	if (argno + `1` < nargs &&
11134	(is_kfunc_arg_mem_size(btf: meta->btf, arg: &args[argno + `1`], reg: &regs[regno + `1`]) \|\|
11135	is_kfunc_arg_const_mem_size(btf: meta->btf, arg: &args[argno + `1`], reg: &regs[regno + `1`])))
11136	arg_mem_size = true;
11137
11138	/ This is the catch all argument type of register types supported by*
11139	* check_helper_mem_access. However, we only allow when argument type is
11140	* pointer to scalar, or struct composed (recursively) of scalars. When
11141	* arg_mem_size is true, the pointer can be void *.
11142	*/
11143	if (!btf_type_is_scalar(t: ref_t) && !__btf_type_is_scalar_struct(env, btf: meta->btf, t: ref_t, rec: `0`) &&
11144	(arg_mem_size ? !btf_type_is_void(t: ref_t) : `1`)) {
11145	verbose(private_data: env, fmt: "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11146	argno, btf_type_str(t: ref_t), ref_tname, arg_mem_size ? "void, " : "");
11147	return -EINVAL;
11148	}
11149	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11150	}
11151
11152	static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11153	struct bpf_reg_state *reg,
11154	const struct btf_type *ref_t,
11155	const char *ref_tname, u32 ref_id,
11156	struct bpf_kfunc_call_arg_meta *meta,
11157	int argno)
11158	{
11159	const struct btf_type *reg_ref_t;
11160	bool strict_type_match = false;
11161	const struct btf *reg_btf;
11162	const char *reg_ref_tname;
11163	u32 reg_ref_id;
11164
11165	if (base_type(type: reg->type) == PTR_TO_BTF_ID) {
11166	reg_btf = reg->btf;
11167	reg_ref_id = reg->btf_id;
11168	} else {
11169	reg_btf = btf_vmlinux;
11170	reg_ref_id = *reg2btf_ids[base_type(type: reg->type)];
11171	}
11172
11173	/ Enforce strict type matching for calls to kfuncs that are acquiring*
11174	* or releasing a reference, or are no-cast aliases. We do _not_
11175	* enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
11176	* as we want to enable BPF programs to pass types that are bitwise
11177	* equivalent without forcing them to explicitly cast with something
11178	* like bpf_cast_to_kern_ctx().
11179	*
11180	* For example, say we had a type like the following:
11181	*
11182	* struct bpf_cpumask {
11183	* cpumask_t cpumask;
11184	* refcount_t usage;
11185	* };
11186	*
11187	* Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11188	* to a struct cpumask, so it would be safe to pass a struct
11189	* bpf_cpumask * to a kfunc expecting a struct cpumask *.
11190	*
11191	* The philosophy here is similar to how we allow scalars of different
11192	* types to be passed to kfuncs as long as the size is the same. The
11193	* only difference here is that we're simply allowing
11194	* btf_struct_ids_match() to walk the struct at the 0th offset, and
11195	* resolve types.
11196	*/
11197	if (is_kfunc_acquire(meta) \|\|
11198	(is_kfunc_release(meta) && reg->ref_obj_id) \|\|
11199	btf_type_ids_nocast_alias(log: &env->log, reg_btf, reg_id: reg_ref_id, arg_btf: meta->btf, arg_id: ref_id))
11200	strict_type_match = true;
11201
11202	WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
11203
11204	reg_ref_t = btf_type_skip_modifiers(btf: reg_btf, id: reg_ref_id, res_id: &reg_ref_id);
11205	reg_ref_tname = btf_name_by_offset(btf: reg_btf, offset: reg_ref_t->name_off);
11206	if (!btf_struct_ids_match(log: &env->log, btf: reg_btf, id: reg_ref_id, off: reg->off, need_btf: meta->btf, need_type_id: ref_id, strict: strict_type_match)) {
11207	verbose(private_data: env, fmt: "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11208	meta->func_name, argno, btf_type_str(t: ref_t), ref_tname, argno + `1`,
11209	btf_type_str(t: reg_ref_t), reg_ref_tname);
11210	return -EINVAL;
11211	}
11212	return `0`;
11213	}
11214
11215	static int ref_set_non_owning(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
11216	{
11217	struct bpf_verifier_state *state = env->cur_state;
11218	struct btf_record *rec = reg_btf_record(reg);
11219
11220	if (!state->active_lock.ptr) {
11221	verbose(private_data: env, fmt: "verifier internal error: ref_set_non_owning w/o active lock\n");
11222	return -EFAULT;
11223	}
11224
11225	if (type_flag(type: reg->type) & NON_OWN_REF) {
11226	verbose(private_data: env, fmt: "verifier internal error: NON_OWN_REF already set\n");
11227	return -EFAULT;
11228	}
11229
11230	reg->type \|= NON_OWN_REF;
11231	if (rec->refcount_off >= `0`)
11232	reg->type \|= MEM_RCU;
11233
11234	return `0`;
11235	}
11236
11237	static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
11238	{
11239	struct bpf_func_state state, unused;
11240	struct bpf_reg_state *reg;
11241	int i;
11242
11243	state = cur_func(env);
11244
11245	if (!ref_obj_id) {
11246	verbose(private_data: env, fmt: "verifier internal error: ref_obj_id is zero for "
11247	"owning -> non-owning conversion\n");
11248	return -EFAULT;
11249	}
11250
11251	for (i = `0`; i < state->acquired_refs; i++) {
11252	if (state->refs[i].id != ref_obj_id)
11253	continue;
11254
11255	/ Clear ref_obj_id here so release_reference doesn't clobber*
11256	* the whole reg
11257	*/
11258	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11259	if (reg->ref_obj_id == ref_obj_id) {
11260	reg->ref_obj_id = `0`;
11261	ref_set_non_owning(env, reg);
11262	}
11263	}));
11264	return `0`;
11265	}
11266
11267	verbose(private_data: env, fmt: "verifier internal error: ref state missing for ref_obj_id\n");
11268	return -EFAULT;
11269	}
11270
11271	/ Implementation details:*
11272	*
11273	* Each register points to some region of memory, which we define as an
11274	* allocation. Each allocation may embed a bpf_spin_lock which protects any
11275	* special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11276	* allocation. The lock and the data it protects are colocated in the same
11277	* memory region.
11278	*
11279	* Hence, everytime a register holds a pointer value pointing to such
11280	* allocation, the verifier preserves a unique reg->id for it.
11281	*
11282	* The verifier remembers the lock 'ptr' and the lock 'id' whenever
11283	* bpf_spin_lock is called.
11284	*
11285	* To enable this, lock state in the verifier captures two values:
11286	* active_lock.ptr = Register's type specific pointer
11287	* active_lock.id = A unique ID for each register pointer value
11288	*
11289	* Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID \| MEM_ALLOC are the two
11290	* supported register types.
11291	*
11292	* The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11293	* allocated objects is the reg->btf pointer.
11294	*
11295	* The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11296	* can establish the provenance of the map value statically for each distinct
11297	* lookup into such maps. They always contain a single map value hence unique
11298	* IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11299	*
11300	* So, in case of global variables, they use array maps with max_entries = 1,
11301	* hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11302	* into the same map value as max_entries is 1, as described above).
11303	*
11304	* In case of inner map lookups, the inner map pointer has same map_ptr as the
11305	* outer map pointer (in verifier context), but each lookup into an inner map
11306	* assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11307	* maps from the same outer map share the same map_ptr as active_lock.ptr, they
11308	* will get different reg->id assigned to each lookup, hence different
11309	* active_lock.id.
11310	*
11311	* In case of allocated objects, active_lock.ptr is the reg->btf, and the
11312	* reg->id is a unique ID preserved after the NULL pointer check on the pointer
11313	* returned from bpf_obj_new. Each allocation receives a new reg->id.
11314	*/
11315	static int check_reg_allocation_locked(struct bpf_verifier_env env, struct* bpf_reg_state *reg)
11316	{
11317	void *ptr;
11318	u32 id;
11319
11320	switch ((int)reg->type) {
11321	case PTR_TO_MAP_VALUE:
11322	ptr = reg->map_ptr;
11323	break;
11324	case PTR_TO_BTF_ID \| MEM_ALLOC:
11325	ptr = reg->btf;
11326	break;
11327	default:
11328	verbose(private_data: env, fmt: "verifier internal error: unknown reg type for lock check\n");
11329	return -EFAULT;
11330	}
11331	id = reg->id;
11332
11333	if (!env->cur_state->active_lock.ptr)
11334	return -EINVAL;
11335	if (env->cur_state->active_lock.ptr != ptr \|\|
11336	env->cur_state->active_lock.id != id) {
11337	verbose(private_data: env, fmt: "held lock and object are not in the same allocation\n");
11338	return -EINVAL;
11339	}
11340	return `0`;
11341	}
11342
11343	static bool is_bpf_list_api_kfunc(u32 btf_id)
11344	{
11345	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
11346	btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
11347	btf_id == special_kfunc_list[KF_bpf_list_pop_front] \|\|
11348	btf_id == special_kfunc_list[KF_bpf_list_pop_back];
11349	}
11350
11351	static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11352	{
11353	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] \|\|
11354	btf_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
11355	btf_id == special_kfunc_list[KF_bpf_rbtree_first];
11356	}
11357
11358	static bool is_bpf_graph_api_kfunc(u32 btf_id)
11359	{
11360	return is_bpf_list_api_kfunc(btf_id) \|\| is_bpf_rbtree_api_kfunc(btf_id) \|\|
11361	btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11362	}
11363
11364	static bool is_sync_callback_calling_kfunc(u32 btf_id)
11365	{
11366	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11367	}
11368
11369	static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
11370	{
11371	return bpf_pseudo_kfunc_call(insn) && insn->off == `0` &&
11372	insn->imm == special_kfunc_list[KF_bpf_throw];
11373	}
11374
11375	static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11376	{
11377	return is_bpf_rbtree_api_kfunc(btf_id);
11378	}
11379
11380	static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11381	enum btf_field_type head_field_type,
11382	u32 kfunc_btf_id)
11383	{
11384	bool ret;
11385
11386	switch (head_field_type) {
11387	case BPF_LIST_HEAD:
11388	ret = is_bpf_list_api_kfunc(btf_id: kfunc_btf_id);
11389	break;
11390	case BPF_RB_ROOT:
11391	ret = is_bpf_rbtree_api_kfunc(btf_id: kfunc_btf_id);
11392	break;
11393	default:
11394	verbose(private_data: env, fmt: "verifier internal error: unexpected graph root argument type %s\n",
11395	btf_field_type_name(type: head_field_type));
11396	return false;
11397	}
11398
11399	if (!ret)
11400	verbose(private_data: env, fmt: "verifier internal error: %s head arg for unknown kfunc\n",
11401	btf_field_type_name(type: head_field_type));
11402	return ret;
11403	}
11404
11405	static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11406	enum btf_field_type node_field_type,
11407	u32 kfunc_btf_id)
11408	{
11409	bool ret;
11410
11411	switch (node_field_type) {
11412	case BPF_LIST_NODE:
11413	ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
11414	kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
11415	break;
11416	case BPF_RB_NODE:
11417	ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
11418	kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
11419	break;
11420	default:
11421	verbose(private_data: env, fmt: "verifier internal error: unexpected graph node argument type %s\n",
11422	btf_field_type_name(type: node_field_type));
11423	return false;
11424	}
11425
11426	if (!ret)
11427	verbose(private_data: env, fmt: "verifier internal error: %s node arg for unknown kfunc\n",
11428	btf_field_type_name(type: node_field_type));
11429	return ret;
11430	}
11431
11432	static int
11433	__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11434	struct bpf_reg_state *reg, u32 regno,
11435	struct bpf_kfunc_call_arg_meta *meta,
11436	enum btf_field_type head_field_type,
11437	struct btf_field **head_field)
11438	{
11439	const char *head_type_name;
11440	struct btf_field *field;
11441	struct btf_record *rec;
11442	u32 head_off;
11443
11444	if (meta->btf != btf_vmlinux) {
11445	verbose(private_data: env, fmt: "verifier internal error: unexpected btf mismatch in kfunc call\n");
11446	return -EFAULT;
11447	}
11448
11449	if (!check_kfunc_is_graph_root_api(env, head_field_type, kfunc_btf_id: meta->func_id))
11450	return -EFAULT;
11451
11452	head_type_name = btf_field_type_name(type: head_field_type);
11453	if (!tnum_is_const(a: reg->var_off)) {
11454	verbose(private_data: env,
11455	fmt: "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11456	regno, head_type_name);
11457	return -EINVAL;
11458	}
11459
11460	rec = reg_btf_record(reg);
11461	head_off = reg->off + reg->var_off.value;
11462	field = btf_record_find(rec, offset: head_off, field_mask: head_field_type);
11463	if (!field) {
11464	verbose(private_data: env, fmt: "%s not found at offset=%u\n", head_type_name, head_off);
11465	return -EINVAL;
11466	}
11467
11468	/ All functions require bpf_list_head to be protected using a bpf_spin_lock /
11469	if (check_reg_allocation_locked(env, reg)) {
11470	verbose(private_data: env, fmt: "bpf_spin_lock at off=%d must be held for %s\n",
11471	rec->spin_lock_off, head_type_name);
11472	return -EINVAL;
11473	}
11474
11475	if (*head_field) {
11476	verbose(private_data: env, fmt: "verifier internal error: repeating %s arg\n", head_type_name);
11477	return -EFAULT;
11478	}
11479	*head_field = field;
11480	return `0`;
11481	}
11482
11483	static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11484	struct bpf_reg_state *reg, u32 regno,
11485	struct bpf_kfunc_call_arg_meta *meta)
11486	{
11487	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, head_field_type: BPF_LIST_HEAD,
11488	head_field: &meta->arg_list_head.field);
11489	}
11490
11491	static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11492	struct bpf_reg_state *reg, u32 regno,
11493	struct bpf_kfunc_call_arg_meta *meta)
11494	{
11495	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, head_field_type: BPF_RB_ROOT,
11496	head_field: &meta->arg_rbtree_root.field);
11497	}
11498
11499	static int
11500	__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11501	struct bpf_reg_state *reg, u32 regno,
11502	struct bpf_kfunc_call_arg_meta *meta,
11503	enum btf_field_type head_field_type,
11504	enum btf_field_type node_field_type,
11505	struct btf_field **node_field)
11506	{
11507	const char *node_type_name;
11508	const struct btf_type et, t;
11509	struct btf_field *field;
11510	u32 node_off;
11511
11512	if (meta->btf != btf_vmlinux) {
11513	verbose(private_data: env, fmt: "verifier internal error: unexpected btf mismatch in kfunc call\n");
11514	return -EFAULT;
11515	}
11516
11517	if (!check_kfunc_is_graph_node_api(env, node_field_type, kfunc_btf_id: meta->func_id))
11518	return -EFAULT;
11519
11520	node_type_name = btf_field_type_name(type: node_field_type);
11521	if (!tnum_is_const(a: reg->var_off)) {
11522	verbose(private_data: env,
11523	fmt: "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11524	regno, node_type_name);
11525	return -EINVAL;
11526	}
11527
11528	node_off = reg->off + reg->var_off.value;
11529	field = reg_find_field_offset(reg, off: node_off, fields: node_field_type);
11530	if (!field \|\| field->offset != node_off) {
11531	verbose(private_data: env, fmt: "%s not found at offset=%u\n", node_type_name, node_off);
11532	return -EINVAL;
11533	}
11534
11535	field = *node_field;
11536
11537	et = btf_type_by_id(btf: field->graph_root.btf, type_id: field->graph_root.value_btf_id);
11538	t = btf_type_by_id(btf: reg->btf, type_id: reg->btf_id);
11539	if (!btf_struct_ids_match(log: &env->log, btf: reg->btf, id: reg->btf_id, off: `0`, need_btf: field->graph_root.btf,
11540	need_type_id: field->graph_root.value_btf_id, strict: true)) {
11541	verbose(private_data: env, fmt: "operation on %s expects arg#1 %s at offset=%d "
11542	"in struct %s, but arg is at offset=%d in struct %s\n",
11543	btf_field_type_name(type: head_field_type),
11544	btf_field_type_name(type: node_field_type),
11545	field->graph_root.node_offset,
11546	btf_name_by_offset(btf: field->graph_root.btf, offset: et->name_off),
11547	node_off, btf_name_by_offset(btf: reg->btf, offset: t->name_off));
11548	return -EINVAL;
11549	}
11550	meta->arg_btf = reg->btf;
11551	meta->arg_btf_id = reg->btf_id;
11552
11553	if (node_off != field->graph_root.node_offset) {
11554	verbose(private_data: env, fmt: "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11555	node_off, btf_field_type_name(type: node_field_type),
11556	field->graph_root.node_offset,
11557	btf_name_by_offset(btf: field->graph_root.btf, offset: et->name_off));
11558	return -EINVAL;
11559	}
11560
11561	return `0`;
11562	}
11563
11564	static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11565	struct bpf_reg_state *reg, u32 regno,
11566	struct bpf_kfunc_call_arg_meta *meta)
11567	{
11568	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11569	head_field_type: BPF_LIST_HEAD, node_field_type: BPF_LIST_NODE,
11570	node_field: &meta->arg_list_head.field);
11571	}
11572
11573	static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11574	struct bpf_reg_state *reg, u32 regno,
11575	struct bpf_kfunc_call_arg_meta *meta)
11576	{
11577	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11578	head_field_type: BPF_RB_ROOT, node_field_type: BPF_RB_NODE,
11579	node_field: &meta->arg_rbtree_root.field);
11580	}
11581
11582	/*
11583	* css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11584	* LSM hooks and iters (both sleepable and non-sleepable) are safe.
11585	* Any sleepable progs are also safe since bpf_check_attach_target() enforce
11586	* them can only be attached to some specific hook points.
11587	*/
11588	static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11589	{
11590	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
11591
11592	switch (prog_type) {
11593	case BPF_PROG_TYPE_LSM:
11594	return true;
11595	case BPF_PROG_TYPE_TRACING:
11596	if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11597	return true;
11598	fallthrough;
11599	default:
11600	return in_sleepable(env);
11601	}
11602	}
11603
11604	static int check_kfunc_args(struct bpf_verifier_env env, struct* bpf_kfunc_call_arg_meta *meta,
11605	int insn_idx)
11606	{
11607	const char func_name = meta->func_name, ref_tname;
11608	const struct btf *btf = meta->btf;
11609	const struct btf_param *args;
11610	struct btf_record *rec;
11611	u32 i, nargs;
11612	int ret;
11613
11614	args = (const struct btf_param *)(meta->func_proto + `1`);
11615	nargs = btf_type_vlen(t: meta->func_proto);
11616	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
11617	verbose(private_data: env, fmt: "Function %s has %d > %d args\n", func_name, nargs,
11618	MAX_BPF_FUNC_REG_ARGS);
11619	return -EINVAL;
11620	}
11621
11622	/ Check that BTF function arguments match actual types that the*
11623	* verifier sees.
11624	*/
11625	for (i = `0`; i < nargs; i++) {
11626	struct bpf_reg_state regs = cur_regs(env), reg = &regs[i + `1`];
11627	const struct btf_type t, ref_t, *resolve_ret;
11628	enum bpf_arg_type arg_type = ARG_DONTCARE;
11629	u32 regno = i + `1`, ref_id, type_size;
11630	bool is_ret_buf_sz = false;
11631	int kf_arg_type;
11632
11633	t = btf_type_skip_modifiers(btf, id: args[i].type, NULL);
11634
11635	if (is_kfunc_arg_ignore(btf, arg: &args[i]))
11636	continue;
11637
11638	if (btf_type_is_scalar(t)) {
11639	if (reg->type != SCALAR_VALUE) {
11640	verbose(private_data: env, fmt: "R%d is not a scalar\n", regno);
11641	return -EINVAL;
11642	}
11643
11644	if (is_kfunc_arg_constant(btf: meta->btf, arg: &args[i])) {
11645	if (meta->arg_constant.found) {
11646	verbose(private_data: env, fmt: "verifier internal error: only one constant argument permitted\n");
11647	return -EFAULT;
11648	}
11649	if (!tnum_is_const(a: reg->var_off)) {
11650	verbose(private_data: env, fmt: "R%d must be a known constant\n", regno);
11651	return -EINVAL;
11652	}
11653	ret = mark_chain_precision(env, regno);
11654	if (ret < `0`)
11655	return ret;
11656	meta->arg_constant.found = true;
11657	meta->arg_constant.value = reg->var_off.value;
11658	} else if (is_kfunc_arg_scalar_with_name(btf, arg: &args[i], name: "rdonly_buf_size")) {
11659	meta->r0_rdonly = true;
11660	is_ret_buf_sz = true;
11661	} else if (is_kfunc_arg_scalar_with_name(btf, arg: &args[i], name: "rdwr_buf_size")) {
11662	is_ret_buf_sz = true;
11663	}
11664
11665	if (is_ret_buf_sz) {
11666	if (meta->r0_size) {
11667	verbose(private_data: env, fmt: "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11668	return -EINVAL;
11669	}
11670
11671	if (!tnum_is_const(a: reg->var_off)) {
11672	verbose(private_data: env, fmt: "R%d is not a const\n", regno);
11673	return -EINVAL;
11674	}
11675
11676	meta->r0_size = reg->var_off.value;
11677	ret = mark_chain_precision(env, regno);
11678	if (ret)
11679	return ret;
11680	}
11681	continue;
11682	}
11683
11684	if (!btf_type_is_ptr(t)) {
11685	verbose(private_data: env, fmt: "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
11686	return -EINVAL;
11687	}
11688
11689	if ((is_kfunc_trusted_args(meta) \|\| is_kfunc_rcu(meta)) &&
11690	(register_is_null(reg) \|\| type_may_be_null(type: reg->type)) &&
11691	!is_kfunc_arg_nullable(btf: meta->btf, arg: &args[i])) {
11692	verbose(private_data: env, fmt: "Possibly NULL pointer passed to trusted arg%d\n", i);
11693	return -EACCES;
11694	}
11695
11696	if (reg->ref_obj_id) {
11697	if (is_kfunc_release(meta) && meta->ref_obj_id) {
11698	verbose(private_data: env, fmt: "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
11699	regno, reg->ref_obj_id,
11700	meta->ref_obj_id);
11701	return -EFAULT;
11702	}
11703	meta->ref_obj_id = reg->ref_obj_id;
11704	if (is_kfunc_release(meta))
11705	meta->release_regno = regno;
11706	}
11707
11708	ref_t = btf_type_skip_modifiers(btf, id: t->type, res_id: &ref_id);
11709	ref_tname = btf_name_by_offset(btf, offset: ref_t->name_off);
11710
11711	kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, argno: i, nargs);
11712	if (kf_arg_type < `0`)
11713	return kf_arg_type;
11714
11715	switch (kf_arg_type) {
11716	case KF_ARG_PTR_TO_NULL:
11717	continue;
11718	case KF_ARG_PTR_TO_MAP:
11719	case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11720	case KF_ARG_PTR_TO_BTF_ID:
11721	if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
11722	break;
11723
11724	if (!is_trusted_reg(reg)) {
11725	if (!is_kfunc_rcu(meta)) {
11726	verbose(private_data: env, fmt: "R%d must be referenced or trusted\n", regno);
11727	return -EINVAL;
11728	}
11729	if (!is_rcu_reg(reg)) {
11730	verbose(private_data: env, fmt: "R%d must be a rcu pointer\n", regno);
11731	return -EINVAL;
11732	}
11733	}
11734
11735	fallthrough;
11736	case KF_ARG_PTR_TO_CTX:
11737	/ Trusted arguments have the same offset checks as release arguments /
11738	arg_type \|= OBJ_RELEASE;
11739	break;
11740	case KF_ARG_PTR_TO_DYNPTR:
11741	case KF_ARG_PTR_TO_ITER:
11742	case KF_ARG_PTR_TO_LIST_HEAD:
11743	case KF_ARG_PTR_TO_LIST_NODE:
11744	case KF_ARG_PTR_TO_RB_ROOT:
11745	case KF_ARG_PTR_TO_RB_NODE:
11746	case KF_ARG_PTR_TO_MEM:
11747	case KF_ARG_PTR_TO_MEM_SIZE:
11748	case KF_ARG_PTR_TO_CALLBACK:
11749	case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11750	case KF_ARG_PTR_TO_CONST_STR:
11751	/ Trusted by default /
11752	break;
11753	default:
11754	WARN_ON_ONCE(`1`);
11755	return -EFAULT;
11756	}
11757
11758	if (is_kfunc_release(meta) && reg->ref_obj_id)
11759	arg_type \|= OBJ_RELEASE;
11760	ret = check_func_arg_reg_off(env, reg, regno, arg_type);
11761	if (ret < `0`)
11762	return ret;
11763
11764	switch (kf_arg_type) {
11765	case KF_ARG_PTR_TO_CTX:
11766	if (reg->type != PTR_TO_CTX) {
11767	verbose(private_data: env, fmt: "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
11768	return -EINVAL;
11769	}
11770
11771	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11772	ret = get_kern_ctx_btf_id(log: &env->log, prog_type: resolve_prog_type(prog: env->prog));
11773	if (ret < `0`)
11774	return -EINVAL;
11775	meta->ret_btf_id = ret;
11776	}
11777	break;
11778	case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11779	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11780	if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
11781	verbose(private_data: env, fmt: "arg#%d expected for bpf_obj_drop_impl()\n", i);
11782	return -EINVAL;
11783	}
11784	} else if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC \| MEM_PERCPU)) {
11785	if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
11786	verbose(private_data: env, fmt: "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
11787	return -EINVAL;
11788	}
11789	} else {
11790	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11791	return -EINVAL;
11792	}
11793	if (!reg->ref_obj_id) {
11794	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11795	return -EINVAL;
11796	}
11797	if (meta->btf == btf_vmlinux) {
11798	meta->arg_btf = reg->btf;
11799	meta->arg_btf_id = reg->btf_id;
11800	}
11801	break;
11802	case KF_ARG_PTR_TO_DYNPTR:
11803	{
11804	enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
11805	int clone_ref_obj_id = `0`;
11806
11807	if (reg->type != PTR_TO_STACK &&
11808	reg->type != CONST_PTR_TO_DYNPTR) {
11809	verbose(private_data: env, fmt: "arg#%d expected pointer to stack or dynptr_ptr\n", i);
11810	return -EINVAL;
11811	}
11812
11813	if (reg->type == CONST_PTR_TO_DYNPTR)
11814	dynptr_arg_type \|= MEM_RDONLY;
11815
11816	if (is_kfunc_arg_uninit(btf, arg: &args[i]))
11817	dynptr_arg_type \|= MEM_UNINIT;
11818
11819	if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
11820	dynptr_arg_type \|= DYNPTR_TYPE_SKB;
11821	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
11822	dynptr_arg_type \|= DYNPTR_TYPE_XDP;
11823	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
11824	(dynptr_arg_type & MEM_UNINIT)) {
11825	enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
11826
11827	if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
11828	verbose(private_data: env, fmt: "verifier internal error: no dynptr type for parent of clone\n");
11829	return -EFAULT;
11830	}
11831
11832	dynptr_arg_type \|= (unsigned int)get_dynptr_type_flag(type: parent_type);
11833	clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
11834	if (dynptr_type_refcounted(type: parent_type) && !clone_ref_obj_id) {
11835	verbose(private_data: env, fmt: "verifier internal error: missing ref obj id for parent of clone\n");
11836	return -EFAULT;
11837	}
11838	}
11839
11840	ret = process_dynptr_func(env, regno, insn_idx, arg_type: dynptr_arg_type, clone_ref_obj_id);
11841	if (ret < `0`)
11842	return ret;
11843
11844	if (!(dynptr_arg_type & MEM_UNINIT)) {
11845	int id = dynptr_id(env, reg);
11846
11847	if (id < `0`) {
11848	verbose(private_data: env, fmt: "verifier internal error: failed to obtain dynptr id\n");
11849	return id;
11850	}
11851	meta->initialized_dynptr.id = id;
11852	meta->initialized_dynptr.type = dynptr_get_type(env, reg);
11853	meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
11854	}
11855
11856	break;
11857	}
11858	case KF_ARG_PTR_TO_ITER:
11859	if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
11860	if (!check_css_task_iter_allowlist(env)) {
11861	verbose(private_data: env, fmt: "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
11862	return -EINVAL;
11863	}
11864	}
11865	ret = process_iter_arg(env, regno, insn_idx, meta);
11866	if (ret < `0`)
11867	return ret;
11868	break;
11869	case KF_ARG_PTR_TO_LIST_HEAD:
11870	if (reg->type != PTR_TO_MAP_VALUE &&
11871	reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11872	verbose(private_data: env, fmt: "arg#%d expected pointer to map value or allocated object\n", i);
11873	return -EINVAL;
11874	}
11875	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC) && !reg->ref_obj_id) {
11876	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11877	return -EINVAL;
11878	}
11879	ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
11880	if (ret < `0`)
11881	return ret;
11882	break;
11883	case KF_ARG_PTR_TO_RB_ROOT:
11884	if (reg->type != PTR_TO_MAP_VALUE &&
11885	reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11886	verbose(private_data: env, fmt: "arg#%d expected pointer to map value or allocated object\n", i);
11887	return -EINVAL;
11888	}
11889	if (reg->type == (PTR_TO_BTF_ID \| MEM_ALLOC) && !reg->ref_obj_id) {
11890	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11891	return -EINVAL;
11892	}
11893	ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
11894	if (ret < `0`)
11895	return ret;
11896	break;
11897	case KF_ARG_PTR_TO_LIST_NODE:
11898	if (reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11899	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11900	return -EINVAL;
11901	}
11902	if (!reg->ref_obj_id) {
11903	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11904	return -EINVAL;
11905	}
11906	ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
11907	if (ret < `0`)
11908	return ret;
11909	break;
11910	case KF_ARG_PTR_TO_RB_NODE:
11911	if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
11912	if (!type_is_non_owning_ref(type: reg->type) \|\| reg->ref_obj_id) {
11913	verbose(private_data: env, fmt: "rbtree_remove node input must be non-owning ref\n");
11914	return -EINVAL;
11915	}
11916	if (in_rbtree_lock_required_cb(env)) {
11917	verbose(private_data: env, fmt: "rbtree_remove not allowed in rbtree cb\n");
11918	return -EINVAL;
11919	}
11920	} else {
11921	if (reg->type != (PTR_TO_BTF_ID \| MEM_ALLOC)) {
11922	verbose(private_data: env, fmt: "arg#%d expected pointer to allocated object\n", i);
11923	return -EINVAL;
11924	}
11925	if (!reg->ref_obj_id) {
11926	verbose(private_data: env, fmt: "allocated object must be referenced\n");
11927	return -EINVAL;
11928	}
11929	}
11930
11931	ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
11932	if (ret < `0`)
11933	return ret;
11934	break;
11935	case KF_ARG_PTR_TO_MAP:
11936	/ If argument has '__map' suffix expect 'struct bpf_map ' /*
11937	ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
11938	ref_t = btf_type_by_id(btf: btf_vmlinux, type_id: ref_id);
11939	ref_tname = btf_name_by_offset(btf, offset: ref_t->name_off);
11940	fallthrough;
11941	case KF_ARG_PTR_TO_BTF_ID:
11942	/ Only base_type is checked, further checks are done here /
11943	if ((base_type(type: reg->type) != PTR_TO_BTF_ID \|\|
11944	(bpf_type_has_unsafe_modifiers(type: reg->type) && !is_rcu_reg(reg))) &&
11945	!reg2btf_ids[base_type(type: reg->type)]) {
11946	verbose(private_data: env, fmt: "arg#%d is %s ", i, reg_type_str(env, type: reg->type));
11947	verbose(private_data: env, fmt: "expected %s or socket\n",
11948	reg_type_str(env, type: base_type(type: reg->type) \|
11949	(type_flag(type: reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
11950	return -EINVAL;
11951	}
11952	ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, argno: i);
11953	if (ret < `0`)
11954	return ret;
11955	break;
11956	case KF_ARG_PTR_TO_MEM:
11957	resolve_ret = btf_resolve_size(btf, type: ref_t, type_size: &type_size);
11958	if (IS_ERR(ptr: resolve_ret)) {
11959	verbose(private_data: env, fmt: "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
11960	i, btf_type_str(t: ref_t), ref_tname, PTR_ERR(ptr: resolve_ret));
11961	return -EINVAL;
11962	}
11963	ret = check_mem_reg(env, reg, regno, mem_size: type_size);
11964	if (ret < `0`)
11965	return ret;
11966	break;
11967	case KF_ARG_PTR_TO_MEM_SIZE:
11968	{
11969	struct bpf_reg_state *buff_reg = &regs[regno];
11970	const struct btf_param *buff_arg = &args[i];
11971	struct bpf_reg_state *size_reg = &regs[regno + `1`];
11972	const struct btf_param *size_arg = &args[i + `1`];
11973
11974	if (!register_is_null(reg: buff_reg) \|\| !is_kfunc_arg_optional(btf: meta->btf, arg: buff_arg)) {
11975	ret = check_kfunc_mem_size_reg(env, reg: size_reg, regno: regno + `1`);
11976	if (ret < `0`) {
11977	verbose(private_data: env, fmt: "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + `1`);
11978	return ret;
11979	}
11980	}
11981
11982	if (is_kfunc_arg_const_mem_size(btf: meta->btf, arg: size_arg, reg: size_reg)) {
11983	if (meta->arg_constant.found) {
11984	verbose(private_data: env, fmt: "verifier internal error: only one constant argument permitted\n");
11985	return -EFAULT;
11986	}
11987	if (!tnum_is_const(a: size_reg->var_off)) {
11988	verbose(private_data: env, fmt: "R%d must be a known constant\n", regno + `1`);
11989	return -EINVAL;
11990	}
11991	meta->arg_constant.found = true;
11992	meta->arg_constant.value = size_reg->var_off.value;
11993	}
11994
11995	/ Skip next '__sz' or '__szk' argument /
11996	i++;
11997	break;
11998	}
11999	case KF_ARG_PTR_TO_CALLBACK:
12000	if (reg->type != PTR_TO_FUNC) {
12001	verbose(private_data: env, fmt: "arg%d expected pointer to func\n", i);
12002	return -EINVAL;
12003	}
12004	meta->subprogno = reg->subprogno;
12005	break;
12006	case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12007	if (!type_is_ptr_alloc_obj(type: reg->type)) {
12008	verbose(private_data: env, fmt: "arg#%d is neither owning or non-owning ref\n", i);
12009	return -EINVAL;
12010	}
12011	if (!type_is_non_owning_ref(type: reg->type))
12012	meta->arg_owning_ref = true;
12013
12014	rec = reg_btf_record(reg);
12015	if (!rec) {
12016	verbose(private_data: env, fmt: "verifier internal error: Couldn't find btf_record\n");
12017	return -EFAULT;
12018	}
12019
12020	if (rec->refcount_off < `0`) {
12021	verbose(private_data: env, fmt: "arg#%d doesn't point to a type with bpf_refcount field\n", i);
12022	return -EINVAL;
12023	}
12024
12025	meta->arg_btf = reg->btf;
12026	meta->arg_btf_id = reg->btf_id;
12027	break;
12028	case KF_ARG_PTR_TO_CONST_STR:
12029	if (reg->type != PTR_TO_MAP_VALUE) {
12030	verbose(private_data: env, fmt: "arg#%d doesn't point to a const string\n", i);
12031	return -EINVAL;
12032	}
12033	ret = check_reg_const_str(env, reg, regno);
12034	if (ret)
12035	return ret;
12036	break;
12037	}
12038	}
12039
12040	if (is_kfunc_release(meta) && !meta->release_regno) {
12041	verbose(private_data: env, fmt: "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
12042	func_name);
12043	return -EINVAL;
12044	}
12045
12046	return `0`;
12047	}
12048
12049	static int fetch_kfunc_meta(struct bpf_verifier_env *env,
12050	struct bpf_insn *insn,
12051	struct bpf_kfunc_call_arg_meta *meta,
12052	const char **kfunc_name)
12053	{
12054	const struct btf_type func, func_proto;
12055	u32 func_id, *kfunc_flags;
12056	const char *func_name;
12057	struct btf *desc_btf;
12058
12059	if (kfunc_name)
12060	*kfunc_name = NULL;
12061
12062	if (!insn->imm)
12063	return -EINVAL;
12064
12065	desc_btf = find_kfunc_desc_btf(env, offset: insn->off);
12066	if (IS_ERR(ptr: desc_btf))
12067	return PTR_ERR(ptr: desc_btf);
12068
12069	func_id = insn->imm;
12070	func = btf_type_by_id(btf: desc_btf, type_id: func_id);
12071	func_name = btf_name_by_offset(btf: desc_btf, offset: func->name_off);
12072	if (kfunc_name)
12073	*kfunc_name = func_name;
12074	func_proto = btf_type_by_id(btf: desc_btf, type_id: func->type);
12075
12076	kfunc_flags = btf_kfunc_id_set_contains(btf: desc_btf, kfunc_btf_id: func_id, prog: env->prog);
12077	if (!kfunc_flags) {
12078	return -EACCES;
12079	}
12080
12081	memset(meta, `0`, sizeof(*meta));
12082	meta->btf = desc_btf;
12083	meta->func_id = func_id;
12084	meta->kfunc_flags = *kfunc_flags;
12085	meta->func_proto = func_proto;
12086	meta->func_name = func_name;
12087
12088	return `0`;
12089	}
12090
12091	static int check_return_code(struct bpf_verifier_env env, int* regno, const char *reg_name);
12092
12093	static int check_kfunc_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
12094	int *insn_idx_p)
12095	{
12096	const struct btf_type t, ptr_type;
12097	u32 i, nargs, ptr_type_id, release_ref_obj_id;
12098	struct bpf_reg_state *regs = cur_regs(env);
12099	const char func_name, ptr_type_name;
12100	bool sleepable, rcu_lock, rcu_unlock;
12101	struct bpf_kfunc_call_arg_meta meta;
12102	struct bpf_insn_aux_data *insn_aux;
12103	int err, insn_idx = *insn_idx_p;
12104	const struct btf_param *args;
12105	const struct btf_type *ret_t;
12106	struct btf *desc_btf;
12107
12108	/ skip for now, but return error when we find this in fixup_kfunc_call /
12109	if (!insn->imm)
12110	return `0`;
12111
12112	err = fetch_kfunc_meta(env, insn, meta: &meta, kfunc_name: &func_name);
12113	if (err == -EACCES && func_name)
12114	verbose(private_data: env, fmt: "calling kernel function %s is not allowed\n", func_name);
12115	if (err)
12116	return err;
12117	desc_btf = meta.btf;
12118	insn_aux = &env->insn_aux_data[insn_idx];
12119
12120	insn_aux->is_iter_next = is_iter_next_kfunc(meta: &meta);
12121
12122	if (is_kfunc_destructive(meta: &meta) && !capable(CAP_SYS_BOOT)) {
12123	verbose(private_data: env, fmt: "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12124	return -EACCES;
12125	}
12126
12127	sleepable = is_kfunc_sleepable(meta: &meta);
12128	if (sleepable && !in_sleepable(env)) {
12129	verbose(private_data: env, fmt: "program must be sleepable to call sleepable kfunc %s\n", func_name);
12130	return -EACCES;
12131	}
12132
12133	/ Check the arguments /
12134	err = check_kfunc_args(env, meta: &meta, insn_idx);
12135	if (err < `0`)
12136	return err;
12137
12138	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
12139	err = push_callback_call(env, insn, insn_idx, subprog: meta.subprogno,
12140	set_callee_state_cb: set_rbtree_add_callback_state);
12141	if (err) {
12142	verbose(private_data: env, fmt: "kfunc %s#%d failed callback verification\n",
12143	func_name, meta.func_id);
12144	return err;
12145	}
12146	}
12147
12148	rcu_lock = is_kfunc_bpf_rcu_read_lock(meta: &meta);
12149	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(meta: &meta);
12150
12151	if (env->cur_state->active_rcu_lock) {
12152	struct bpf_func_state *state;
12153	struct bpf_reg_state *reg;
12154	u32 clear_mask = (`1` << STACK_SPILL) \| (`1` << STACK_ITER);
12155
12156	if (in_rbtree_lock_required_cb(env) && (rcu_lock \|\| rcu_unlock)) {
12157	verbose(private_data: env, fmt: "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
12158	return -EACCES;
12159	}
12160
12161	if (rcu_lock) {
12162	verbose(private_data: env, fmt: "nested rcu read lock (kernel function %s)\n", func_name);
12163	return -EINVAL;
12164	} else if (rcu_unlock) {
12165	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
12166	if (reg->type & MEM_RCU) {
12167	reg->type &= ~(MEM_RCU \| PTR_MAYBE_NULL);
12168	reg->type \|= PTR_UNTRUSTED;
12169	}
12170	}));
12171	env->cur_state->active_rcu_lock = false;
12172	} else if (sleepable) {
12173	verbose(private_data: env, fmt: "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
12174	return -EACCES;
12175	}
12176	} else if (rcu_lock) {
12177	env->cur_state->active_rcu_lock = true;
12178	} else if (rcu_unlock) {
12179	verbose(private_data: env, fmt: "unmatched rcu read unlock (kernel function %s)\n", func_name);
12180	return -EINVAL;
12181	}
12182
12183	/ In case of release function, we get register number of refcounted*
12184	* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
12185	*/
12186	if (meta.release_regno) {
12187	err = release_reference(env, ref_obj_id: regs[meta.release_regno].ref_obj_id);
12188	if (err) {
12189	verbose(private_data: env, fmt: "kfunc %s#%d reference has not been acquired before\n",
12190	func_name, meta.func_id);
12191	return err;
12192	}
12193	}
12194
12195	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
12196	meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
12197	meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
12198	release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
12199	insn_aux->insert_off = regs[BPF_REG_2].off;
12200	insn_aux->kptr_struct_meta = btf_find_struct_meta(btf: meta.arg_btf, btf_id: meta.arg_btf_id);
12201	err = ref_convert_owning_non_owning(env, ref_obj_id: release_ref_obj_id);
12202	if (err) {
12203	verbose(private_data: env, fmt: "kfunc %s#%d conversion of owning ref to non-owning failed\n",
12204	func_name, meta.func_id);
12205	return err;
12206	}
12207
12208	err = release_reference(env, ref_obj_id: release_ref_obj_id);
12209	if (err) {
12210	verbose(private_data: env, fmt: "kfunc %s#%d reference has not been acquired before\n",
12211	func_name, meta.func_id);
12212	return err;
12213	}
12214	}
12215
12216	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
12217	if (!bpf_jit_supports_exceptions()) {
12218	verbose(private_data: env, fmt: "JIT does not support calling kfunc %s#%d\n",
12219	func_name, meta.func_id);
12220	return -ENOTSUPP;
12221	}
12222	env->seen_exception = true;
12223
12224	/ In the case of the default callback, the cookie value passed*
12225	* to bpf_throw becomes the return value of the program.
12226	*/
12227	if (!env->exception_callback_subprog) {
12228	err = check_return_code(env, regno: BPF_REG_1, reg_name: "R1");
12229	if (err < `0`)
12230	return err;
12231	}
12232	}
12233
12234	for (i = `0`; i < CALLER_SAVED_REGS; i++)
12235	mark_reg_not_init(env, regs, regno: caller_saved[i]);
12236
12237	/ Check return type /
12238	t = btf_type_skip_modifiers(btf: desc_btf, id: meta.func_proto->type, NULL);
12239
12240	if (is_kfunc_acquire(meta: &meta) && !btf_type_is_struct_ptr(btf: meta.btf, t)) {
12241	/ Only exception is bpf_obj_new_impl /
12242	if (meta.btf != btf_vmlinux \|\|
12243	(meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
12244	meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
12245	meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
12246	verbose(private_data: env, fmt: "acquire kernel function does not return PTR_TO_BTF_ID\n");
12247	return -EINVAL;
12248	}
12249	}
12250
12251	if (btf_type_is_scalar(t)) {
12252	mark_reg_unknown(env, regs, regno: BPF_REG_0);
12253	mark_btf_func_reg_size(env, regno: BPF_REG_0, reg_size: t->size);
12254	} else if (btf_type_is_ptr(t)) {
12255	ptr_type = btf_type_skip_modifiers(btf: desc_btf, id: t->type, res_id: &ptr_type_id);
12256
12257	if (meta.btf == btf_vmlinux && btf_id_set_contains(set: &special_kfunc_set, id: meta.func_id)) {
12258	if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] \|\|
12259	meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12260	struct btf_struct_meta *struct_meta;
12261	struct btf *ret_btf;
12262	u32 ret_btf_id;
12263
12264	if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
12265	return -ENOMEM;
12266
12267	if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
12268	verbose(private_data: env, fmt: "local type ID argument must be in range [0, U32_MAX]\n");
12269	return -EINVAL;
12270	}
12271
12272	ret_btf = env->prog->aux->btf;
12273	ret_btf_id = meta.arg_constant.value;
12274
12275	/ This may be NULL due to user not supplying a BTF /
12276	if (!ret_btf) {
12277	verbose(private_data: env, fmt: "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12278	return -EINVAL;
12279	}
12280
12281	ret_t = btf_type_by_id(btf: ret_btf, type_id: ret_btf_id);
12282	if (!ret_t \|\| !__btf_type_is_struct(t: ret_t)) {
12283	verbose(private_data: env, fmt: "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12284	return -EINVAL;
12285	}
12286
12287	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12288	if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12289	verbose(private_data: env, fmt: "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12290	ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12291	return -EINVAL;
12292	}
12293
12294	if (!bpf_global_percpu_ma_set) {
12295	mutex_lock(&bpf_percpu_ma_lock);
12296	if (!bpf_global_percpu_ma_set) {
12297	/ Charge memory allocated with bpf_global_percpu_ma to*
12298	* root memcg. The obj_cgroup for root memcg is NULL.
12299	*/
12300	err = bpf_mem_alloc_percpu_init(ma: &bpf_global_percpu_ma, NULL);
12301	if (!err)
12302	bpf_global_percpu_ma_set = true;
12303	}
12304	mutex_unlock(lock: &bpf_percpu_ma_lock);
12305	if (err)
12306	return err;
12307	}
12308
12309	mutex_lock(&bpf_percpu_ma_lock);
12310	err = bpf_mem_alloc_percpu_unit_init(ma: &bpf_global_percpu_ma, size: ret_t->size);
12311	mutex_unlock(lock: &bpf_percpu_ma_lock);
12312	if (err)
12313	return err;
12314	}
12315
12316	struct_meta = btf_find_struct_meta(btf: ret_btf, btf_id: ret_btf_id);
12317	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12318	if (!__btf_type_is_scalar_struct(env, btf: ret_btf, t: ret_t, rec: `0`)) {
12319	verbose(private_data: env, fmt: "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12320	return -EINVAL;
12321	}
12322
12323	if (struct_meta) {
12324	verbose(private_data: env, fmt: "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12325	return -EINVAL;
12326	}
12327	}
12328
12329	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12330	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC;
12331	regs[BPF_REG_0].btf = ret_btf;
12332	regs[BPF_REG_0].btf_id = ret_btf_id;
12333	if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
12334	regs[BPF_REG_0].type \|= MEM_PERCPU;
12335
12336	insn_aux->obj_new_size = ret_t->size;
12337	insn_aux->kptr_struct_meta = struct_meta;
12338	} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
12339	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12340	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| MEM_ALLOC;
12341	regs[BPF_REG_0].btf = meta.arg_btf;
12342	regs[BPF_REG_0].btf_id = meta.arg_btf_id;
12343
12344	insn_aux->kptr_struct_meta =
12345	btf_find_struct_meta(btf: meta.arg_btf,
12346	btf_id: meta.arg_btf_id);
12347	} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] \|\|
12348	meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
12349	struct btf_field *field = meta.arg_list_head.field;
12350
12351	mark_reg_graph_node(regs, regno: BPF_REG_0, ds_head: &field->graph_root);
12352	} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] \|\|
12353	meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12354	struct btf_field *field = meta.arg_rbtree_root.field;
12355
12356	mark_reg_graph_node(regs, regno: BPF_REG_0, ds_head: &field->graph_root);
12357	} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12358	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12359	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| PTR_TRUSTED;
12360	regs[BPF_REG_0].btf = desc_btf;
12361	regs[BPF_REG_0].btf_id = meta.ret_btf_id;
12362	} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12363	ret_t = btf_type_by_id(btf: desc_btf, type_id: meta.arg_constant.value);
12364	if (!ret_t \|\| !btf_type_is_struct(t: ret_t)) {
12365	verbose(private_data: env,
12366	fmt: "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
12367	return -EINVAL;
12368	}
12369
12370	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12371	regs[BPF_REG_0].type = PTR_TO_BTF_ID \| PTR_UNTRUSTED;
12372	regs[BPF_REG_0].btf = desc_btf;
12373	regs[BPF_REG_0].btf_id = meta.arg_constant.value;
12374	} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] \|\|
12375	meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12376	enum bpf_type_flag type_flag = get_dynptr_type_flag(type: meta.initialized_dynptr.type);
12377
12378	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12379
12380	if (!meta.arg_constant.found) {
12381	verbose(private_data: env, fmt: "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
12382	return -EFAULT;
12383	}
12384
12385	regs[BPF_REG_0].mem_size = meta.arg_constant.value;
12386
12387	/ PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked /
12388	regs[BPF_REG_0].type = PTR_TO_MEM \| type_flag;
12389
12390	if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12391	regs[BPF_REG_0].type \|= MEM_RDONLY;
12392	} else {
12393	/ this will set env->seen_direct_write to true /
12394	if (!may_access_direct_pkt_data(env, NULL, t: BPF_WRITE)) {
12395	verbose(private_data: env, fmt: "the prog does not allow writes to packet data\n");
12396	return -EINVAL;
12397	}
12398	}
12399
12400	if (!meta.initialized_dynptr.id) {
12401	verbose(private_data: env, fmt: "verifier internal error: no dynptr id\n");
12402	return -EFAULT;
12403	}
12404	regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
12405
12406	/ we don't need to set BPF_REG_0's ref obj id*
12407	* because packet slices are not refcounted (see
12408	* dynptr_type_refcounted)
12409	*/
12410	} else {
12411	verbose(private_data: env, fmt: "kernel function %s unhandled dynamic return type\n",
12412	meta.func_name);
12413	return -EFAULT;
12414	}
12415	} else if (btf_type_is_void(t: ptr_type)) {
12416	/ kfunc returning 'void ' is equivalent to returning scalar /*
12417	mark_reg_unknown(env, regs, regno: BPF_REG_0);
12418	} else if (!__btf_type_is_struct(t: ptr_type)) {
12419	if (!meta.r0_size) {
12420	__u32 sz;
12421
12422	if (!IS_ERR(ptr: btf_resolve_size(btf: desc_btf, type: ptr_type, type_size: &sz))) {
12423	meta.r0_size = sz;
12424	meta.r0_rdonly = true;
12425	}
12426	}
12427	if (!meta.r0_size) {
12428	ptr_type_name = btf_name_by_offset(btf: desc_btf,
12429	offset: ptr_type->name_off);
12430	verbose(private_data: env,
12431	fmt: "kernel function %s returns pointer type %s %s is not supported\n",
12432	func_name,
12433	btf_type_str(t: ptr_type),
12434	ptr_type_name);
12435	return -EINVAL;
12436	}
12437
12438	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12439	regs[BPF_REG_0].type = PTR_TO_MEM;
12440	regs[BPF_REG_0].mem_size = meta.r0_size;
12441
12442	if (meta.r0_rdonly)
12443	regs[BPF_REG_0].type \|= MEM_RDONLY;
12444
12445	/ Ensures we don't access the memory after a release_reference() /
12446	if (meta.ref_obj_id)
12447	regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
12448	} else {
12449	mark_reg_known_zero(env, regs, regno: BPF_REG_0);
12450	regs[BPF_REG_0].btf = desc_btf;
12451	regs[BPF_REG_0].type = PTR_TO_BTF_ID;
12452	regs[BPF_REG_0].btf_id = ptr_type_id;
12453	}
12454
12455	if (is_kfunc_ret_null(meta: &meta)) {
12456	regs[BPF_REG_0].type \|= PTR_MAYBE_NULL;
12457	/ For mark_ptr_or_null_reg, see 93c230e3f5bd6 /
12458	regs[BPF_REG_0].id = ++env->id_gen;
12459	}
12460	mark_btf_func_reg_size(env, regno: BPF_REG_0, reg_size: sizeof(void *));
12461	if (is_kfunc_acquire(meta: &meta)) {
12462	int id = acquire_reference_state(env, insn_idx);
12463
12464	if (id < `0`)
12465	return id;
12466	if (is_kfunc_ret_null(meta: &meta))
12467	regs[BPF_REG_0].id = id;
12468	regs[BPF_REG_0].ref_obj_id = id;
12469	} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12470	ref_set_non_owning(env, reg: &regs[BPF_REG_0]);
12471	}
12472
12473	if (reg_may_point_to_spin_lock(reg: &regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
12474	regs[BPF_REG_0].id = ++env->id_gen;
12475	} else if (btf_type_is_void(t)) {
12476	if (meta.btf == btf_vmlinux && btf_id_set_contains(set: &special_kfunc_set, id: meta.func_id)) {
12477	if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] \|\|
12478	meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
12479	insn_aux->kptr_struct_meta =
12480	btf_find_struct_meta(btf: meta.arg_btf,
12481	btf_id: meta.arg_btf_id);
12482	}
12483	}
12484	}
12485
12486	nargs = btf_type_vlen(t: meta.func_proto);
12487	args = (const struct btf_param *)(meta.func_proto + `1`);
12488	for (i = `0`; i < nargs; i++) {
12489	u32 regno = i + `1`;
12490
12491	t = btf_type_skip_modifiers(btf: desc_btf, id: args[i].type, NULL);
12492	if (btf_type_is_ptr(t))
12493	mark_btf_func_reg_size(env, regno, reg_size: sizeof(void *));
12494	else
12495	/ scalar. ensured by btf_check_kfunc_arg_match() /
12496	mark_btf_func_reg_size(env, regno, reg_size: t->size);
12497	}
12498
12499	if (is_iter_next_kfunc(meta: &meta)) {
12500	err = process_iter_next_call(env, insn_idx, meta: &meta);
12501	if (err)
12502	return err;
12503	}
12504
12505	return `0`;
12506	}
12507
12508	static bool signed_add_overflows(s64 a, s64 b)
12509	{
12510	/ Do the add in u64, where overflow is well-defined /
12511	s64 res = (s64)((u64)a + (u64)b);
12512
12513	if (b < `0`)
12514	return res > a;
12515	return res < a;
12516	}
12517
12518	static bool signed_add32_overflows(s32 a, s32 b)
12519	{
12520	/ Do the add in u32, where overflow is well-defined /
12521	s32 res = (s32)((u32)a + (u32)b);
12522
12523	if (b < `0`)
12524	return res > a;
12525	return res < a;
12526	}
12527
12528	static bool signed_sub_overflows(s64 a, s64 b)
12529	{
12530	/ Do the sub in u64, where overflow is well-defined /
12531	s64 res = (s64)((u64)a - (u64)b);
12532
12533	if (b < `0`)
12534	return res < a;
12535	return res > a;
12536	}
12537
12538	static bool signed_sub32_overflows(s32 a, s32 b)
12539	{
12540	/ Do the sub in u32, where overflow is well-defined /
12541	s32 res = (s32)((u32)a - (u32)b);
12542
12543	if (b < `0`)
12544	return res < a;
12545	return res > a;
12546	}
12547
12548	static bool check_reg_sane_offset(struct bpf_verifier_env *env,
12549	const struct bpf_reg_state *reg,
12550	enum bpf_reg_type type)
12551	{
12552	bool known = tnum_is_const(a: reg->var_off);
12553	s64 val = reg->var_off.value;
12554	s64 smin = reg->smin_value;
12555
12556	if (known && (val >= BPF_MAX_VAR_OFF \|\| val <= -BPF_MAX_VAR_OFF)) {
12557	verbose(private_data: env, fmt: "math between %s pointer and %lld is not allowed\n",
12558	reg_type_str(env, type), val);
12559	return false;
12560	}
12561
12562	if (reg->off >= BPF_MAX_VAR_OFF \|\| reg->off <= -BPF_MAX_VAR_OFF) {
12563	verbose(private_data: env, fmt: "%s pointer offset %d is not allowed\n",
12564	reg_type_str(env, type), reg->off);
12565	return false;
12566	}
12567
12568	if (smin == S64_MIN) {
12569	verbose(private_data: env, fmt: "math between %s pointer and register with unbounded min value is not allowed\n",
12570	reg_type_str(env, type));
12571	return false;
12572	}
12573
12574	if (smin >= BPF_MAX_VAR_OFF \|\| smin <= -BPF_MAX_VAR_OFF) {
12575	verbose(private_data: env, fmt: "value %lld makes %s pointer be out of bounds\n",
12576	smin, reg_type_str(env, type));
12577	return false;
12578	}
12579
12580	return true;
12581	}
12582
12583	enum {
12584	REASON_BOUNDS = -`1`,
12585	REASON_TYPE = -`2`,
12586	REASON_PATHS = -`3`,
12587	REASON_LIMIT = -`4`,
12588	REASON_STACK = -`5`,
12589	};
12590
12591	static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
12592	u32 *alu_limit, bool mask_to_left)
12593	{
12594	u32 max = `0`, ptr_limit = `0`;
12595
12596	switch (ptr_reg->type) {
12597	case PTR_TO_STACK:
12598	/ Offset 0 is out-of-bounds, but acceptable start for the*
12599	* left direction, see BPF_REG_FP. Also, unknown scalar
12600	* offset where we would need to deal with min/max bounds is
12601	* currently prohibited for unprivileged.
12602	*/
12603	max = MAX_BPF_STACK + mask_to_left;
12604	ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
12605	break;
12606	case PTR_TO_MAP_VALUE:
12607	max = ptr_reg->map_ptr->value_size;
12608	ptr_limit = (mask_to_left ?
12609	ptr_reg->smin_value :
12610	ptr_reg->umax_value) + ptr_reg->off;
12611	break;
12612	default:
12613	return REASON_TYPE;
12614	}
12615
12616	if (ptr_limit >= max)
12617	return REASON_LIMIT;
12618	*alu_limit = ptr_limit;
12619	return `0`;
12620	}
12621
12622	static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
12623	const struct bpf_insn *insn)
12624	{
12625	return env->bypass_spec_v1 \|\| BPF_SRC(insn->code) == BPF_K;
12626	}
12627
12628	static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
12629	u32 alu_state, u32 alu_limit)
12630	{
12631	/ If we arrived here from different branches with different*
12632	* state or limits to sanitize, then this won't work.
12633	*/
12634	if (aux->alu_state &&
12635	(aux->alu_state != alu_state \|\|
12636	aux->alu_limit != alu_limit))
12637	return REASON_PATHS;
12638
12639	/ Corresponding fixup done in do_misc_fixups(). /
12640	aux->alu_state = alu_state;
12641	aux->alu_limit = alu_limit;
12642	return `0`;
12643	}
12644
12645	static int sanitize_val_alu(struct bpf_verifier_env *env,
12646	struct bpf_insn *insn)
12647	{
12648	struct bpf_insn_aux_data *aux = cur_aux(env);
12649
12650	if (can_skip_alu_sanitation(env, insn))
12651	return `0`;
12652
12653	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, alu_limit: `0`);
12654	}
12655
12656	static bool sanitize_needed(u8 opcode)
12657	{
12658	return opcode == BPF_ADD \|\| opcode == BPF_SUB;
12659	}
12660
12661	struct bpf_sanitize_info {
12662	struct bpf_insn_aux_data aux;
12663	bool mask_to_left;
12664	};
12665
12666	static struct bpf_verifier_state *
12667	sanitize_speculative_path(struct bpf_verifier_env *env,
12668	const struct bpf_insn *insn,
12669	u32 next_idx, u32 curr_idx)
12670	{
12671	struct bpf_verifier_state *branch;
12672	struct bpf_reg_state *regs;
12673
12674	branch = push_stack(env, insn_idx: next_idx, prev_insn_idx: curr_idx, speculative: true);
12675	if (branch && insn) {
12676	regs = branch->frame[branch->curframe]->regs;
12677	if (BPF_SRC(insn->code) == BPF_K) {
12678	mark_reg_unknown(env, regs, regno: insn->dst_reg);
12679	} else if (BPF_SRC(insn->code) == BPF_X) {
12680	mark_reg_unknown(env, regs, regno: insn->dst_reg);
12681	mark_reg_unknown(env, regs, regno: insn->src_reg);
12682	}
12683	}
12684	return branch;
12685	}
12686
12687	static int sanitize_ptr_alu(struct bpf_verifier_env *env,
12688	struct bpf_insn *insn,
12689	const struct bpf_reg_state *ptr_reg,
12690	const struct bpf_reg_state *off_reg,
12691	struct bpf_reg_state *dst_reg,
12692	struct bpf_sanitize_info *info,
12693	const bool commit_window)
12694	{
12695	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
12696	struct bpf_verifier_state *vstate = env->cur_state;
12697	bool off_is_imm = tnum_is_const(a: off_reg->var_off);
12698	bool off_is_neg = off_reg->smin_value < `0`;
12699	bool ptr_is_dst_reg = ptr_reg == dst_reg;
12700	u8 opcode = BPF_OP(insn->code);
12701	u32 alu_state, alu_limit;
12702	struct bpf_reg_state tmp;
12703	bool ret;
12704	int err;
12705
12706	if (can_skip_alu_sanitation(env, insn))
12707	return `0`;
12708
12709	/ We already marked aux for masking from non-speculative*
12710	* paths, thus we got here in the first place. We only care
12711	* to explore bad access from here.
12712	*/
12713	if (vstate->speculative)
12714	goto do_sim;
12715
12716	if (!commit_window) {
12717	if (!tnum_is_const(a: off_reg->var_off) &&
12718	(off_reg->smin_value < `0`) != (off_reg->smax_value < `0`))
12719	return REASON_BOUNDS;
12720
12721	info->mask_to_left = (opcode == BPF_ADD && off_is_neg) \|\|
12722	(opcode == BPF_SUB && !off_is_neg);
12723	}
12724
12725	err = retrieve_ptr_limit(ptr_reg, alu_limit: &alu_limit, mask_to_left: info->mask_to_left);
12726	if (err < `0`)
12727	return err;
12728
12729	if (commit_window) {
12730	/ In commit phase we narrow the masking window based on*
12731	* the observed pointer move after the simulated operation.
12732	*/
12733	alu_state = info->aux.alu_state;
12734	alu_limit = abs(info->aux.alu_limit - alu_limit);
12735	} else {
12736	alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : `0`;
12737	alu_state \|= off_is_imm ? BPF_ALU_IMMEDIATE : `0`;
12738	alu_state \|= ptr_is_dst_reg ?
12739	BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
12740
12741	/ Limit pruning on unknown scalars to enable deep search for*
12742	* potential masking differences from other program paths.
12743	*/
12744	if (!off_is_imm)
12745	env->explore_alu_limits = true;
12746	}
12747
12748	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
12749	if (err < `0`)
12750	return err;
12751	do_sim:
12752	/ If we're in commit phase, we're done here given we already*
12753	* pushed the truncated dst_reg into the speculative verification
12754	* stack.
12755	*
12756	* Also, when register is a known constant, we rewrite register-based
12757	* operation to immediate-based, and thus do not need masking (and as
12758	* a consequence, do not need to simulate the zero-truncation either).
12759	*/
12760	if (commit_window \|\| off_is_imm)
12761	return `0`;
12762
12763	/ Simulate and find potential out-of-bounds access under*
12764	* speculative execution from truncation as a result of
12765	* masking when off was not within expected range. If off
12766	* sits in dst, then we temporarily need to move ptr there
12767	* to simulate dst (== 0) +/-= ptr. Needed, for example,
12768	* for cases where we use K-based arithmetic in one direction
12769	* and truncated reg-based in the other in order to explore
12770	* bad access.
12771	*/
12772	if (!ptr_is_dst_reg) {
12773	tmp = *dst_reg;
12774	copy_register_state(dst: dst_reg, src: ptr_reg);
12775	}
12776	ret = sanitize_speculative_path(env, NULL, next_idx: env->insn_idx + `1`,
12777	curr_idx: env->insn_idx);
12778	if (!ptr_is_dst_reg && ret)
12779	*dst_reg = tmp;
12780	return !ret ? REASON_STACK : `0`;
12781	}
12782
12783	static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
12784	{
12785	struct bpf_verifier_state *vstate = env->cur_state;
12786
12787	/ If we simulate paths under speculation, we don't update the*
12788	* insn as 'seen' such that when we verify unreachable paths in
12789	* the non-speculative domain, sanitize_dead_code() can still
12790	* rewrite/sanitize them.
12791	*/
12792	if (!vstate->speculative)
12793	env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
12794	}
12795
12796	static int sanitize_err(struct bpf_verifier_env *env,
12797	const struct bpf_insn insn, int* reason,
12798	const struct bpf_reg_state *off_reg,
12799	const struct bpf_reg_state *dst_reg)
12800	{
12801	static const char *err = "pointer arithmetic with it prohibited for !root";
12802	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
12803	u32 dst = insn->dst_reg, src = insn->src_reg;
12804
12805	switch (reason) {
12806	case REASON_BOUNDS:
12807	verbose(private_data: env, fmt: "R%d has unknown scalar with mixed signed bounds, %s\n",
12808	off_reg == dst_reg ? dst : src, err);
12809	break;
12810	case REASON_TYPE:
12811	verbose(private_data: env, fmt: "R%d has pointer with unsupported alu operation, %s\n",
12812	off_reg == dst_reg ? src : dst, err);
12813	break;
12814	case REASON_PATHS:
12815	verbose(private_data: env, fmt: "R%d tried to %s from different maps, paths or scalars, %s\n",
12816	dst, op, err);
12817	break;
12818	case REASON_LIMIT:
12819	verbose(private_data: env, fmt: "R%d tried to %s beyond pointer bounds, %s\n",
12820	dst, op, err);
12821	break;
12822	case REASON_STACK:
12823	verbose(private_data: env, fmt: "R%d could not be pushed for speculative verification, %s\n",
12824	dst, err);
12825	break;
12826	default:
12827	verbose(private_data: env, fmt: "verifier internal error: unknown reason (%d)\n",
12828	reason);
12829	break;
12830	}
12831
12832	return -EACCES;
12833	}
12834
12835	/ check that stack access falls within stack limits and that 'reg' doesn't*
12836	* have a variable offset.
12837	*
12838	* Variable offset is prohibited for unprivileged mode for simplicity since it
12839	* requires corresponding support in Spectre masking for stack ALU. See also
12840	* retrieve_ptr_limit().
12841	*
12842	*
12843	* 'off' includes 'reg->off'.
12844	*/
12845	static int check_stack_access_for_ptr_arithmetic(
12846	struct bpf_verifier_env *env,
12847	int regno,
12848	const struct bpf_reg_state *reg,
12849	int off)
12850	{
12851	if (!tnum_is_const(a: reg->var_off)) {
12852	char tn_buf[`48`];
12853
12854	tnum_strn(str: tn_buf, size: sizeof(tn_buf), a: reg->var_off);
12855	verbose(private_data: env, fmt: "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
12856	regno, tn_buf, off);
12857	return -EACCES;
12858	}
12859
12860	if (off >= `0` \|\| off < -MAX_BPF_STACK) {
12861	verbose(private_data: env, fmt: "R%d stack pointer arithmetic goes out of range, "
12862	"prohibited for !root; off=%d\n", regno, off);
12863	return -EACCES;
12864	}
12865
12866	return `0`;
12867	}
12868
12869	static int sanitize_check_bounds(struct bpf_verifier_env *env,
12870	const struct bpf_insn *insn,
12871	const struct bpf_reg_state *dst_reg)
12872	{
12873	u32 dst = insn->dst_reg;
12874
12875	/ For unprivileged we require that resulting offset must be in bounds*
12876	* in order to be able to sanitize access later on.
12877	*/
12878	if (env->bypass_spec_v1)
12879	return `0`;
12880
12881	switch (dst_reg->type) {
12882	case PTR_TO_STACK:
12883	if (check_stack_access_for_ptr_arithmetic(env, regno: dst, reg: dst_reg,
12884	off: dst_reg->off + dst_reg->var_off.value))
12885	return -EACCES;
12886	break;
12887	case PTR_TO_MAP_VALUE:
12888	if (check_map_access(env, regno: dst, off: dst_reg->off, size: `1`, zero_size_allowed: false, src: ACCESS_HELPER)) {
12889	verbose(private_data: env, fmt: "R%d pointer arithmetic of map value goes out of range, "
12890	"prohibited for !root\n", dst);
12891	return -EACCES;
12892	}
12893	break;
12894	default:
12895	break;
12896	}
12897
12898	return `0`;
12899	}
12900
12901	/ Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.*
12902	* Caller should also handle BPF_MOV case separately.
12903	* If we return -EACCES, caller may want to try again treating pointer as a
12904	* scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
12905	*/
12906	static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
12907	struct bpf_insn *insn,
12908	const struct bpf_reg_state *ptr_reg,
12909	const struct bpf_reg_state *off_reg)
12910	{
12911	struct bpf_verifier_state *vstate = env->cur_state;
12912	struct bpf_func_state *state = vstate->frame[vstate->curframe];
12913	struct bpf_reg_state regs = state->regs, dst_reg;
12914	bool known = tnum_is_const(a: off_reg->var_off);
12915	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
12916	smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
12917	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
12918	umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
12919	struct bpf_sanitize_info info = {};
12920	u8 opcode = BPF_OP(insn->code);
12921	u32 dst = insn->dst_reg;
12922	int ret;
12923
12924	dst_reg = &regs[dst];
12925
12926	if ((known && (smin_val != smax_val \|\| umin_val != umax_val)) \|\|
12927	smin_val > smax_val \|\| umin_val > umax_val) {
12928	/ Taint dst register if offset had invalid bounds derived from*
12929	* e.g. dead branches.
12930	*/
12931	__mark_reg_unknown(env, reg: dst_reg);
12932	return `0`;
12933	}
12934
12935	if (BPF_CLASS(insn->code) != BPF_ALU64) {
12936	/ 32-bit ALU ops on pointers produce (meaningless) scalars /
12937	if (opcode == BPF_SUB && env->allow_ptr_leaks) {
12938	__mark_reg_unknown(env, reg: dst_reg);
12939	return `0`;
12940	}
12941
12942	verbose(private_data: env,
12943	fmt: "R%d 32-bit pointer arithmetic prohibited\n",
12944	dst);
12945	return -EACCES;
12946	}
12947
12948	if (ptr_reg->type & PTR_MAYBE_NULL) {
12949	verbose(private_data: env, fmt: "R%d pointer arithmetic on %s prohibited, null-check it first\n",
12950	dst, reg_type_str(env, type: ptr_reg->type));
12951	return -EACCES;
12952	}
12953
12954	switch (base_type(type: ptr_reg->type)) {
12955	case PTR_TO_CTX:
12956	case PTR_TO_MAP_VALUE:
12957	case PTR_TO_MAP_KEY:
12958	case PTR_TO_STACK:
12959	case PTR_TO_PACKET_META:
12960	case PTR_TO_PACKET:
12961	case PTR_TO_TP_BUFFER:
12962	case PTR_TO_BTF_ID:
12963	case PTR_TO_MEM:
12964	case PTR_TO_BUF:
12965	case PTR_TO_FUNC:
12966	case CONST_PTR_TO_DYNPTR:
12967	break;
12968	case PTR_TO_FLOW_KEYS:
12969	if (known)
12970	break;
12971	fallthrough;
12972	case CONST_PTR_TO_MAP:
12973	/ smin_val represents the known value /
12974	if (known && smin_val == `0` && opcode == BPF_ADD)
12975	break;
12976	fallthrough;
12977	default:
12978	verbose(private_data: env, fmt: "R%d pointer arithmetic on %s prohibited\n",
12979	dst, reg_type_str(env, type: ptr_reg->type));
12980	return -EACCES;
12981	}
12982
12983	/ In case of 'scalar += pointer', dst_reg inherits pointer type and id.*
12984	* The id may be overwritten later if we create a new variable offset.
12985	*/
12986	dst_reg->type = ptr_reg->type;
12987	dst_reg->id = ptr_reg->id;
12988
12989	if (!check_reg_sane_offset(env, reg: off_reg, type: ptr_reg->type) \|\|
12990	!check_reg_sane_offset(env, reg: ptr_reg, type: ptr_reg->type))
12991	return -EINVAL;
12992
12993	/ pointer types do not carry 32-bit bounds at the moment. /
12994	__mark_reg32_unbounded(reg: dst_reg);
12995
12996	if (sanitize_needed(opcode)) {
12997	ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
12998	info: &info, commit_window: false);
12999	if (ret < `0`)
13000	return sanitize_err(env, insn, reason: ret, off_reg, dst_reg);
13001	}
13002
13003	switch (opcode) {
13004	case BPF_ADD:
13005	/ We can take a fixed offset as long as it doesn't overflow*
13006	* the s32 'off' field
13007	*/
13008	if (known && (ptr_reg->off + smin_val ==
13009	(s64)(s32)(ptr_reg->off + smin_val))) {
13010	/ pointer += K. Accumulate it into fixed offset /
13011	dst_reg->smin_value = smin_ptr;
13012	dst_reg->smax_value = smax_ptr;
13013	dst_reg->umin_value = umin_ptr;
13014	dst_reg->umax_value = umax_ptr;
13015	dst_reg->var_off = ptr_reg->var_off;
13016	dst_reg->off = ptr_reg->off + smin_val;
13017	dst_reg->raw = ptr_reg->raw;
13018	break;
13019	}
13020	/ A new variable offset is created. Note that off_reg->off*
13021	* == 0, since it's a scalar.
13022	* dst_reg gets the pointer type and since some positive
13023	* integer value was added to the pointer, give it a new 'id'
13024	* if it's a PTR_TO_PACKET.
13025	* this creates a new 'base' pointer, off_reg (variable) gets
13026	* added into the variable offset, and we copy the fixed offset
13027	* from ptr_reg.
13028	*/
13029	if (signed_add_overflows(a: smin_ptr, b: smin_val) \|\|
13030	signed_add_overflows(a: smax_ptr, b: smax_val)) {
13031	dst_reg->smin_value = S64_MIN;
13032	dst_reg->smax_value = S64_MAX;
13033	} else {
13034	dst_reg->smin_value = smin_ptr + smin_val;
13035	dst_reg->smax_value = smax_ptr + smax_val;
13036	}
13037	if (umin_ptr + umin_val < umin_ptr \|\|
13038	umax_ptr + umax_val < umax_ptr) {
13039	dst_reg->umin_value = `0`;
13040	dst_reg->umax_value = U64_MAX;
13041	} else {
13042	dst_reg->umin_value = umin_ptr + umin_val;
13043	dst_reg->umax_value = umax_ptr + umax_val;
13044	}
13045	dst_reg->var_off = tnum_add(a: ptr_reg->var_off, b: off_reg->var_off);
13046	dst_reg->off = ptr_reg->off;
13047	dst_reg->raw = ptr_reg->raw;
13048	if (reg_is_pkt_pointer(reg: ptr_reg)) {
13049	dst_reg->id = ++env->id_gen;
13050	/ something was added to pkt_ptr, set range to zero /
13051	memset(&dst_reg->raw, `0`, sizeof(dst_reg->raw));
13052	}
13053	break;
13054	case BPF_SUB:
13055	if (dst_reg == off_reg) {
13056	/ scalar -= pointer. Creates an unknown scalar /
13057	verbose(private_data: env, fmt: "R%d tried to subtract pointer from scalar\n",
13058	dst);
13059	return -EACCES;
13060	}
13061	/ We don't allow subtraction from FP, because (according to*
13062	* test_verifier.c test "invalid fp arithmetic", JITs might not
13063	* be able to deal with it.
13064	*/
13065	if (ptr_reg->type == PTR_TO_STACK) {
13066	verbose(private_data: env, fmt: "R%d subtraction from stack pointer prohibited\n",
13067	dst);
13068	return -EACCES;
13069	}
13070	if (known && (ptr_reg->off - smin_val ==
13071	(s64)(s32)(ptr_reg->off - smin_val))) {
13072	/ pointer -= K. Subtract it from fixed offset /
13073	dst_reg->smin_value = smin_ptr;
13074	dst_reg->smax_value = smax_ptr;
13075	dst_reg->umin_value = umin_ptr;
13076	dst_reg->umax_value = umax_ptr;
13077	dst_reg->var_off = ptr_reg->var_off;
13078	dst_reg->id = ptr_reg->id;
13079	dst_reg->off = ptr_reg->off - smin_val;
13080	dst_reg->raw = ptr_reg->raw;
13081	break;
13082	}
13083	/ A new variable offset is created. If the subtrahend is known*
13084	* nonnegative, then any reg->range we had before is still good.
13085	*/
13086	if (signed_sub_overflows(a: smin_ptr, b: smax_val) \|\|
13087	signed_sub_overflows(a: smax_ptr, b: smin_val)) {
13088	/ Overflow possible, we know nothing /
13089	dst_reg->smin_value = S64_MIN;
13090	dst_reg->smax_value = S64_MAX;
13091	} else {
13092	dst_reg->smin_value = smin_ptr - smax_val;
13093	dst_reg->smax_value = smax_ptr - smin_val;
13094	}
13095	if (umin_ptr < umax_val) {
13096	/ Overflow possible, we know nothing /
13097	dst_reg->umin_value = `0`;
13098	dst_reg->umax_value = U64_MAX;
13099	} else {
13100	/ Cannot overflow (as long as bounds are consistent) /
13101	dst_reg->umin_value = umin_ptr - umax_val;
13102	dst_reg->umax_value = umax_ptr - umin_val;
13103	}
13104	dst_reg->var_off = tnum_sub(a: ptr_reg->var_off, b: off_reg->var_off);
13105	dst_reg->off = ptr_reg->off;
13106	dst_reg->raw = ptr_reg->raw;
13107	if (reg_is_pkt_pointer(reg: ptr_reg)) {
13108	dst_reg->id = ++env->id_gen;
13109	/ something was added to pkt_ptr, set range to zero /
13110	if (smin_val < `0`)
13111	memset(&dst_reg->raw, `0`, sizeof(dst_reg->raw));
13112	}
13113	break;
13114	case BPF_AND:
13115	case BPF_OR:
13116	case BPF_XOR:
13117	/ bitwise ops on pointers are troublesome, prohibit. /
13118	verbose(private_data: env, fmt: "R%d bitwise operator %s on pointer prohibited\n",
13119	dst, bpf_alu_string[opcode >> `4`]);
13120	return -EACCES;
13121	default:
13122	/ other operators (e.g. MUL,LSH) produce non-pointer results /
13123	verbose(private_data: env, fmt: "R%d pointer arithmetic with %s operator prohibited\n",
13124	dst, bpf_alu_string[opcode >> `4`]);
13125	return -EACCES;
13126	}
13127
13128	if (!check_reg_sane_offset(env, reg: dst_reg, type: ptr_reg->type))
13129	return -EINVAL;
13130	reg_bounds_sync(reg: dst_reg);
13131	if (sanitize_check_bounds(env, insn, dst_reg) < `0`)
13132	return -EACCES;
13133	if (sanitize_needed(opcode)) {
13134	ret = sanitize_ptr_alu(env, insn, ptr_reg: dst_reg, off_reg, dst_reg,
13135	info: &info, commit_window: true);
13136	if (ret < `0`)
13137	return sanitize_err(env, insn, reason: ret, off_reg, dst_reg);
13138	}
13139
13140	return `0`;
13141	}
13142
13143	static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13144	struct bpf_reg_state *src_reg)
13145	{
13146	s32 smin_val = src_reg->s32_min_value;
13147	s32 smax_val = src_reg->s32_max_value;
13148	u32 umin_val = src_reg->u32_min_value;
13149	u32 umax_val = src_reg->u32_max_value;
13150
13151	if (signed_add32_overflows(a: dst_reg->s32_min_value, b: smin_val) \|\|
13152	signed_add32_overflows(a: dst_reg->s32_max_value, b: smax_val)) {
13153	dst_reg->s32_min_value = S32_MIN;
13154	dst_reg->s32_max_value = S32_MAX;
13155	} else {
13156	dst_reg->s32_min_value += smin_val;
13157	dst_reg->s32_max_value += smax_val;
13158	}
13159	if (dst_reg->u32_min_value + umin_val < umin_val \|\|
13160	dst_reg->u32_max_value + umax_val < umax_val) {
13161	dst_reg->u32_min_value = `0`;
13162	dst_reg->u32_max_value = U32_MAX;
13163	} else {
13164	dst_reg->u32_min_value += umin_val;
13165	dst_reg->u32_max_value += umax_val;
13166	}
13167	}
13168
13169	static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13170	struct bpf_reg_state *src_reg)
13171	{
13172	s64 smin_val = src_reg->smin_value;
13173	s64 smax_val = src_reg->smax_value;
13174	u64 umin_val = src_reg->umin_value;
13175	u64 umax_val = src_reg->umax_value;
13176
13177	if (signed_add_overflows(a: dst_reg->smin_value, b: smin_val) \|\|
13178	signed_add_overflows(a: dst_reg->smax_value, b: smax_val)) {
13179	dst_reg->smin_value = S64_MIN;
13180	dst_reg->smax_value = S64_MAX;
13181	} else {
13182	dst_reg->smin_value += smin_val;
13183	dst_reg->smax_value += smax_val;
13184	}
13185	if (dst_reg->umin_value + umin_val < umin_val \|\|
13186	dst_reg->umax_value + umax_val < umax_val) {
13187	dst_reg->umin_value = `0`;
13188	dst_reg->umax_value = U64_MAX;
13189	} else {
13190	dst_reg->umin_value += umin_val;
13191	dst_reg->umax_value += umax_val;
13192	}
13193	}
13194
13195	static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13196	struct bpf_reg_state *src_reg)
13197	{
13198	s32 smin_val = src_reg->s32_min_value;
13199	s32 smax_val = src_reg->s32_max_value;
13200	u32 umin_val = src_reg->u32_min_value;
13201	u32 umax_val = src_reg->u32_max_value;
13202
13203	if (signed_sub32_overflows(a: dst_reg->s32_min_value, b: smax_val) \|\|
13204	signed_sub32_overflows(a: dst_reg->s32_max_value, b: smin_val)) {
13205	/ Overflow possible, we know nothing /
13206	dst_reg->s32_min_value = S32_MIN;
13207	dst_reg->s32_max_value = S32_MAX;
13208	} else {
13209	dst_reg->s32_min_value -= smax_val;
13210	dst_reg->s32_max_value -= smin_val;
13211	}
13212	if (dst_reg->u32_min_value < umax_val) {
13213	/ Overflow possible, we know nothing /
13214	dst_reg->u32_min_value = `0`;
13215	dst_reg->u32_max_value = U32_MAX;
13216	} else {
13217	/ Cannot overflow (as long as bounds are consistent) /
13218	dst_reg->u32_min_value -= umax_val;
13219	dst_reg->u32_max_value -= umin_val;
13220	}
13221	}
13222
13223	static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13224	struct bpf_reg_state *src_reg)
13225	{
13226	s64 smin_val = src_reg->smin_value;
13227	s64 smax_val = src_reg->smax_value;
13228	u64 umin_val = src_reg->umin_value;
13229	u64 umax_val = src_reg->umax_value;
13230
13231	if (signed_sub_overflows(a: dst_reg->smin_value, b: smax_val) \|\|
13232	signed_sub_overflows(a: dst_reg->smax_value, b: smin_val)) {
13233	/ Overflow possible, we know nothing /
13234	dst_reg->smin_value = S64_MIN;
13235	dst_reg->smax_value = S64_MAX;
13236	} else {
13237	dst_reg->smin_value -= smax_val;
13238	dst_reg->smax_value -= smin_val;
13239	}
13240	if (dst_reg->umin_value < umax_val) {
13241	/ Overflow possible, we know nothing /
13242	dst_reg->umin_value = `0`;
13243	dst_reg->umax_value = U64_MAX;
13244	} else {
13245	/ Cannot overflow (as long as bounds are consistent) /
13246	dst_reg->umin_value -= umax_val;
13247	dst_reg->umax_value -= umin_val;
13248	}
13249	}
13250
13251	static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13252	struct bpf_reg_state *src_reg)
13253	{
13254	s32 smin_val = src_reg->s32_min_value;
13255	u32 umin_val = src_reg->u32_min_value;
13256	u32 umax_val = src_reg->u32_max_value;
13257
13258	if (smin_val < `0` \|\| dst_reg->s32_min_value < `0`) {
13259	/ Ain't nobody got time to multiply that sign /
13260	__mark_reg32_unbounded(reg: dst_reg);
13261	return;
13262	}
13263	/ Both values are positive, so we can work with unsigned and*
13264	* copy the result to signed (unless it exceeds S32_MAX).
13265	*/
13266	if (umax_val > U16_MAX \|\| dst_reg->u32_max_value > U16_MAX) {
13267	/ Potential overflow, we know nothing /
13268	__mark_reg32_unbounded(reg: dst_reg);
13269	return;
13270	}
13271	dst_reg->u32_min_value *= umin_val;
13272	dst_reg->u32_max_value *= umax_val;
13273	if (dst_reg->u32_max_value > S32_MAX) {
13274	/ Overflow possible, we know nothing /
13275	dst_reg->s32_min_value = S32_MIN;
13276	dst_reg->s32_max_value = S32_MAX;
13277	} else {
13278	dst_reg->s32_min_value = dst_reg->u32_min_value;
13279	dst_reg->s32_max_value = dst_reg->u32_max_value;
13280	}
13281	}
13282
13283	static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13284	struct bpf_reg_state *src_reg)
13285	{
13286	s64 smin_val = src_reg->smin_value;
13287	u64 umin_val = src_reg->umin_value;
13288	u64 umax_val = src_reg->umax_value;
13289
13290	if (smin_val < `0` \|\| dst_reg->smin_value < `0`) {
13291	/ Ain't nobody got time to multiply that sign /
13292	__mark_reg64_unbounded(reg: dst_reg);
13293	return;
13294	}
13295	/ Both values are positive, so we can work with unsigned and*
13296	* copy the result to signed (unless it exceeds S64_MAX).
13297	*/
13298	if (umax_val > U32_MAX \|\| dst_reg->umax_value > U32_MAX) {
13299	/ Potential overflow, we know nothing /
13300	__mark_reg64_unbounded(reg: dst_reg);
13301	return;
13302	}
13303	dst_reg->umin_value *= umin_val;
13304	dst_reg->umax_value *= umax_val;
13305	if (dst_reg->umax_value > S64_MAX) {
13306	/ Overflow possible, we know nothing /
13307	dst_reg->smin_value = S64_MIN;
13308	dst_reg->smax_value = S64_MAX;
13309	} else {
13310	dst_reg->smin_value = dst_reg->umin_value;
13311	dst_reg->smax_value = dst_reg->umax_value;
13312	}
13313	}
13314
13315	static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
13316	struct bpf_reg_state *src_reg)
13317	{
13318	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13319	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13320	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13321	s32 smin_val = src_reg->s32_min_value;
13322	u32 umax_val = src_reg->u32_max_value;
13323
13324	if (src_known && dst_known) {
13325	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13326	return;
13327	}
13328
13329	/ We get our minimum from the var_off, since that's inherently*
13330	* bitwise. Our maximum is the minimum of the operands' maxima.
13331	*/
13332	dst_reg->u32_min_value = var32_off.value;
13333	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
13334	if (dst_reg->s32_min_value < `0` \|\| smin_val < `0`) {
13335	/ Lose signed bounds when ANDing negative numbers,*
13336	* ain't nobody got time for that.
13337	*/
13338	dst_reg->s32_min_value = S32_MIN;
13339	dst_reg->s32_max_value = S32_MAX;
13340	} else {
13341	/ ANDing two positives gives a positive, so safe to*
13342	* cast result into s64.
13343	*/
13344	dst_reg->s32_min_value = dst_reg->u32_min_value;
13345	dst_reg->s32_max_value = dst_reg->u32_max_value;
13346	}
13347	}
13348
13349	static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
13350	struct bpf_reg_state *src_reg)
13351	{
13352	bool src_known = tnum_is_const(a: src_reg->var_off);
13353	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13354	s64 smin_val = src_reg->smin_value;
13355	u64 umax_val = src_reg->umax_value;
13356
13357	if (src_known && dst_known) {
13358	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13359	return;
13360	}
13361
13362	/ We get our minimum from the var_off, since that's inherently*
13363	* bitwise. Our maximum is the minimum of the operands' maxima.
13364	*/
13365	dst_reg->umin_value = dst_reg->var_off.value;
13366	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
13367	if (dst_reg->smin_value < `0` \|\| smin_val < `0`) {
13368	/ Lose signed bounds when ANDing negative numbers,*
13369	* ain't nobody got time for that.
13370	*/
13371	dst_reg->smin_value = S64_MIN;
13372	dst_reg->smax_value = S64_MAX;
13373	} else {
13374	/ ANDing two positives gives a positive, so safe to*
13375	* cast result into s64.
13376	*/
13377	dst_reg->smin_value = dst_reg->umin_value;
13378	dst_reg->smax_value = dst_reg->umax_value;
13379	}
13380	/ We may learn something more from the var_off /
13381	__update_reg_bounds(reg: dst_reg);
13382	}
13383
13384	static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
13385	struct bpf_reg_state *src_reg)
13386	{
13387	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13388	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13389	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13390	s32 smin_val = src_reg->s32_min_value;
13391	u32 umin_val = src_reg->u32_min_value;
13392
13393	if (src_known && dst_known) {
13394	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13395	return;
13396	}
13397
13398	/ We get our maximum from the var_off, and our minimum is the*
13399	* maximum of the operands' minima
13400	*/
13401	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
13402	dst_reg->u32_max_value = var32_off.value \| var32_off.mask;
13403	if (dst_reg->s32_min_value < `0` \|\| smin_val < `0`) {
13404	/ Lose signed bounds when ORing negative numbers,*
13405	* ain't nobody got time for that.
13406	*/
13407	dst_reg->s32_min_value = S32_MIN;
13408	dst_reg->s32_max_value = S32_MAX;
13409	} else {
13410	/ ORing two positives gives a positive, so safe to*
13411	* cast result into s64.
13412	*/
13413	dst_reg->s32_min_value = dst_reg->u32_min_value;
13414	dst_reg->s32_max_value = dst_reg->u32_max_value;
13415	}
13416	}
13417
13418	static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
13419	struct bpf_reg_state *src_reg)
13420	{
13421	bool src_known = tnum_is_const(a: src_reg->var_off);
13422	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13423	s64 smin_val = src_reg->smin_value;
13424	u64 umin_val = src_reg->umin_value;
13425
13426	if (src_known && dst_known) {
13427	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13428	return;
13429	}
13430
13431	/ We get our maximum from the var_off, and our minimum is the*
13432	* maximum of the operands' minima
13433	*/
13434	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
13435	dst_reg->umax_value = dst_reg->var_off.value \| dst_reg->var_off.mask;
13436	if (dst_reg->smin_value < `0` \|\| smin_val < `0`) {
13437	/ Lose signed bounds when ORing negative numbers,*
13438	* ain't nobody got time for that.
13439	*/
13440	dst_reg->smin_value = S64_MIN;
13441	dst_reg->smax_value = S64_MAX;
13442	} else {
13443	/ ORing two positives gives a positive, so safe to*
13444	* cast result into s64.
13445	*/
13446	dst_reg->smin_value = dst_reg->umin_value;
13447	dst_reg->smax_value = dst_reg->umax_value;
13448	}
13449	/ We may learn something more from the var_off /
13450	__update_reg_bounds(reg: dst_reg);
13451	}
13452
13453	static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
13454	struct bpf_reg_state *src_reg)
13455	{
13456	bool src_known = tnum_subreg_is_const(a: src_reg->var_off);
13457	bool dst_known = tnum_subreg_is_const(a: dst_reg->var_off);
13458	struct tnum var32_off = tnum_subreg(a: dst_reg->var_off);
13459	s32 smin_val = src_reg->s32_min_value;
13460
13461	if (src_known && dst_known) {
13462	__mark_reg32_known(reg: dst_reg, imm: var32_off.value);
13463	return;
13464	}
13465
13466	/ We get both minimum and maximum from the var32_off. /
13467	dst_reg->u32_min_value = var32_off.value;
13468	dst_reg->u32_max_value = var32_off.value \| var32_off.mask;
13469
13470	if (dst_reg->s32_min_value >= `0` && smin_val >= `0`) {
13471	/ XORing two positive sign numbers gives a positive,*
13472	* so safe to cast u32 result into s32.
13473	*/
13474	dst_reg->s32_min_value = dst_reg->u32_min_value;
13475	dst_reg->s32_max_value = dst_reg->u32_max_value;
13476	} else {
13477	dst_reg->s32_min_value = S32_MIN;
13478	dst_reg->s32_max_value = S32_MAX;
13479	}
13480	}
13481
13482	static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
13483	struct bpf_reg_state *src_reg)
13484	{
13485	bool src_known = tnum_is_const(a: src_reg->var_off);
13486	bool dst_known = tnum_is_const(a: dst_reg->var_off);
13487	s64 smin_val = src_reg->smin_value;
13488
13489	if (src_known && dst_known) {
13490	/ dst_reg->var_off.value has been updated earlier /
13491	__mark_reg_known(reg: dst_reg, imm: dst_reg->var_off.value);
13492	return;
13493	}
13494
13495	/ We get both minimum and maximum from the var_off. /
13496	dst_reg->umin_value = dst_reg->var_off.value;
13497	dst_reg->umax_value = dst_reg->var_off.value \| dst_reg->var_off.mask;
13498
13499	if (dst_reg->smin_value >= `0` && smin_val >= `0`) {
13500	/ XORing two positive sign numbers gives a positive,*
13501	* so safe to cast u64 result into s64.
13502	*/
13503	dst_reg->smin_value = dst_reg->umin_value;
13504	dst_reg->smax_value = dst_reg->umax_value;
13505	} else {
13506	dst_reg->smin_value = S64_MIN;
13507	dst_reg->smax_value = S64_MAX;
13508	}
13509
13510	__update_reg_bounds(reg: dst_reg);
13511	}
13512
13513	static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13514	u64 umin_val, u64 umax_val)
13515	{
13516	/ We lose all sign bit information (except what we can pick*
13517	* up from var_off)
13518	*/
13519	dst_reg->s32_min_value = S32_MIN;
13520	dst_reg->s32_max_value = S32_MAX;
13521	/ If we might shift our top bit out, then we know nothing /
13522	if (umax_val > `31` \|\| dst_reg->u32_max_value > `1ULL` << (`31` - umax_val)) {
13523	dst_reg->u32_min_value = `0`;
13524	dst_reg->u32_max_value = U32_MAX;
13525	} else {
13526	dst_reg->u32_min_value <<= umin_val;
13527	dst_reg->u32_max_value <<= umax_val;
13528	}
13529	}
13530
13531	static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13532	struct bpf_reg_state *src_reg)
13533	{
13534	u32 umax_val = src_reg->u32_max_value;
13535	u32 umin_val = src_reg->u32_min_value;
13536	/ u32 alu operation will zext upper bits /
13537	struct tnum subreg = tnum_subreg(a: dst_reg->var_off);
13538
13539	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13540	dst_reg->var_off = tnum_subreg(a: tnum_lshift(a: subreg, shift: umin_val));
13541	/ Not required but being careful mark reg64 bounds as unknown so*
13542	* that we are forced to pick them up from tnum and zext later and
13543	* if some path skips this step we are still safe.
13544	*/
13545	__mark_reg64_unbounded(reg: dst_reg);
13546	__update_reg32_bounds(reg: dst_reg);
13547	}
13548
13549	static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
13550	u64 umin_val, u64 umax_val)
13551	{
13552	/ Special case <<32 because it is a common compiler pattern to sign*
13553	* extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
13554	* positive we know this shift will also be positive so we can track
13555	* bounds correctly. Otherwise we lose all sign bit information except
13556	* what we can pick up from var_off. Perhaps we can generalize this
13557	* later to shifts of any length.
13558	*/
13559	if (umin_val == `32` && umax_val == `32` && dst_reg->s32_max_value >= `0`)
13560	dst_reg->smax_value = (s64)dst_reg->s32_max_value << `32`;
13561	else
13562	dst_reg->smax_value = S64_MAX;
13563
13564	if (umin_val == `32` && umax_val == `32` && dst_reg->s32_min_value >= `0`)
13565	dst_reg->smin_value = (s64)dst_reg->s32_min_value << `32`;
13566	else
13567	dst_reg->smin_value = S64_MIN;
13568
13569	/ If we might shift our top bit out, then we know nothing /
13570	if (dst_reg->umax_value > `1ULL` << (`63` - umax_val)) {
13571	dst_reg->umin_value = `0`;
13572	dst_reg->umax_value = U64_MAX;
13573	} else {
13574	dst_reg->umin_value <<= umin_val;
13575	dst_reg->umax_value <<= umax_val;
13576	}
13577	}
13578
13579	static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
13580	struct bpf_reg_state *src_reg)
13581	{
13582	u64 umax_val = src_reg->umax_value;
13583	u64 umin_val = src_reg->umin_value;
13584
13585	/ scalar64 calc uses 32bit unshifted bounds so must be called first /
13586	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
13587	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13588
13589	dst_reg->var_off = tnum_lshift(a: dst_reg->var_off, shift: umin_val);
13590	/ We may learn something more from the var_off /
13591	__update_reg_bounds(reg: dst_reg);
13592	}
13593
13594	static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
13595	struct bpf_reg_state *src_reg)
13596	{
13597	struct tnum subreg = tnum_subreg(a: dst_reg->var_off);
13598	u32 umax_val = src_reg->u32_max_value;
13599	u32 umin_val = src_reg->u32_min_value;
13600
13601	/ BPF_RSH is an unsigned shift. If the value in dst_reg might*
13602	* be negative, then either:
13603	* 1) src_reg might be zero, so the sign bit of the result is
13604	* unknown, so we lose our signed bounds
13605	* 2) it's known negative, thus the unsigned bounds capture the
13606	* signed bounds
13607	* 3) the signed bounds cross zero, so they tell us nothing
13608	* about the result
13609	* If the value in dst_reg is known nonnegative, then again the
13610	* unsigned bounds capture the signed bounds.
13611	* Thus, in all cases it suffices to blow away our signed bounds
13612	* and rely on inferring new ones from the unsigned bounds and
13613	* var_off of the result.
13614	*/
13615	dst_reg->s32_min_value = S32_MIN;
13616	dst_reg->s32_max_value = S32_MAX;
13617
13618	dst_reg->var_off = tnum_rshift(a: subreg, shift: umin_val);
13619	dst_reg->u32_min_value >>= umax_val;
13620	dst_reg->u32_max_value >>= umin_val;
13621
13622	__mark_reg64_unbounded(reg: dst_reg);
13623	__update_reg32_bounds(reg: dst_reg);
13624	}
13625
13626	static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
13627	struct bpf_reg_state *src_reg)
13628	{
13629	u64 umax_val = src_reg->umax_value;
13630	u64 umin_val = src_reg->umin_value;
13631
13632	/ BPF_RSH is an unsigned shift. If the value in dst_reg might*
13633	* be negative, then either:
13634	* 1) src_reg might be zero, so the sign bit of the result is
13635	* unknown, so we lose our signed bounds
13636	* 2) it's known negative, thus the unsigned bounds capture the
13637	* signed bounds
13638	* 3) the signed bounds cross zero, so they tell us nothing
13639	* about the result
13640	* If the value in dst_reg is known nonnegative, then again the
13641	* unsigned bounds capture the signed bounds.
13642	* Thus, in all cases it suffices to blow away our signed bounds
13643	* and rely on inferring new ones from the unsigned bounds and
13644	* var_off of the result.
13645	*/
13646	dst_reg->smin_value = S64_MIN;
13647	dst_reg->smax_value = S64_MAX;
13648	dst_reg->var_off = tnum_rshift(a: dst_reg->var_off, shift: umin_val);
13649	dst_reg->umin_value >>= umax_val;
13650	dst_reg->umax_value >>= umin_val;
13651
13652	/ Its not easy to operate on alu32 bounds here because it depends*
13653	* on bits being shifted in. Take easy way out and mark unbounded
13654	* so we can recalculate later from tnum.
13655	*/
13656	__mark_reg32_unbounded(reg: dst_reg);
13657	__update_reg_bounds(reg: dst_reg);
13658	}
13659
13660	static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
13661	struct bpf_reg_state *src_reg)
13662	{
13663	u64 umin_val = src_reg->u32_min_value;
13664
13665	/ Upon reaching here, src_known is true and*
13666	* umax_val is equal to umin_val.
13667	*/
13668	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
13669	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
13670
13671	dst_reg->var_off = tnum_arshift(a: tnum_subreg(a: dst_reg->var_off), min_shift: umin_val, insn_bitness: `32`);
13672
13673	/ blow away the dst_reg umin_value/umax_value and rely on*
13674	* dst_reg var_off to refine the result.
13675	*/
13676	dst_reg->u32_min_value = `0`;
13677	dst_reg->u32_max_value = U32_MAX;
13678
13679	__mark_reg64_unbounded(reg: dst_reg);
13680	__update_reg32_bounds(reg: dst_reg);
13681	}
13682
13683	static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
13684	struct bpf_reg_state *src_reg)
13685	{
13686	u64 umin_val = src_reg->umin_value;
13687
13688	/ Upon reaching here, src_known is true and umax_val is equal*
13689	* to umin_val.
13690	*/
13691	dst_reg->smin_value >>= umin_val;
13692	dst_reg->smax_value >>= umin_val;
13693
13694	dst_reg->var_off = tnum_arshift(a: dst_reg->var_off, min_shift: umin_val, insn_bitness: `64`);
13695
13696	/ blow away the dst_reg umin_value/umax_value and rely on*
13697	* dst_reg var_off to refine the result.
13698	*/
13699	dst_reg->umin_value = `0`;
13700	dst_reg->umax_value = U64_MAX;
13701
13702	/ Its not easy to operate on alu32 bounds here because it depends*
13703	* on bits being shifted in from upper 32-bits. Take easy way out
13704	* and mark unbounded so we can recalculate later from tnum.
13705	*/
13706	__mark_reg32_unbounded(reg: dst_reg);
13707	__update_reg_bounds(reg: dst_reg);
13708	}
13709
13710	/ WARNING: This function does calculations on 64-bit values, but the actual*
13711	* execution may occur on 32-bit values. Therefore, things like bitshifts
13712	* need extra checks in the 32-bit case.
13713	*/
13714	static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
13715	struct bpf_insn *insn,
13716	struct bpf_reg_state *dst_reg,
13717	struct bpf_reg_state src_reg)
13718	{
13719	struct bpf_reg_state *regs = cur_regs(env);
13720	u8 opcode = BPF_OP(insn->code);
13721	bool src_known;
13722	s64 smin_val, smax_val;
13723	u64 umin_val, umax_val;
13724	s32 s32_min_val, s32_max_val;
13725	u32 u32_min_val, u32_max_val;
13726	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? `64` : `32`;
13727	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
13728	int ret;
13729
13730	smin_val = src_reg.smin_value;
13731	smax_val = src_reg.smax_value;
13732	umin_val = src_reg.umin_value;
13733	umax_val = src_reg.umax_value;
13734
13735	s32_min_val = src_reg.s32_min_value;
13736	s32_max_val = src_reg.s32_max_value;
13737	u32_min_val = src_reg.u32_min_value;
13738	u32_max_val = src_reg.u32_max_value;
13739
13740	if (alu32) {
13741	src_known = tnum_subreg_is_const(a: src_reg.var_off);
13742	if ((src_known &&
13743	(s32_min_val != s32_max_val \|\| u32_min_val != u32_max_val)) \|\|
13744	s32_min_val > s32_max_val \|\| u32_min_val > u32_max_val) {
13745	/ Taint dst register if offset had invalid bounds*
13746	* derived from e.g. dead branches.
13747	*/
13748	__mark_reg_unknown(env, reg: dst_reg);
13749	return `0`;
13750	}
13751	} else {
13752	src_known = tnum_is_const(a: src_reg.var_off);
13753	if ((src_known &&
13754	(smin_val != smax_val \|\| umin_val != umax_val)) \|\|
13755	smin_val > smax_val \|\| umin_val > umax_val) {
13756	/ Taint dst register if offset had invalid bounds*
13757	* derived from e.g. dead branches.
13758	*/
13759	__mark_reg_unknown(env, reg: dst_reg);
13760	return `0`;
13761	}
13762	}
13763
13764	if (!src_known &&
13765	opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
13766	__mark_reg_unknown(env, reg: dst_reg);
13767	return `0`;
13768	}
13769
13770	if (sanitize_needed(opcode)) {
13771	ret = sanitize_val_alu(env, insn);
13772	if (ret < `0`)
13773	return sanitize_err(env, insn, reason: ret, NULL, NULL);
13774	}
13775
13776	/ Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.*
13777	* There are two classes of instructions: The first class we track both
13778	* alu32 and alu64 sign/unsigned bounds independently this provides the
13779	* greatest amount of precision when alu operations are mixed with jmp32
13780	* operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
13781	* and BPF_OR. This is possible because these ops have fairly easy to
13782	* understand and calculate behavior in both 32-bit and 64-bit alu ops.
13783	* See alu32 verifier tests for examples. The second class of
13784	* operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
13785	* with regards to tracking sign/unsigned bounds because the bits may
13786	* cross subreg boundaries in the alu64 case. When this happens we mark
13787	* the reg unbounded in the subreg bound space and use the resulting
13788	* tnum to calculate an approximation of the sign/unsigned bounds.
13789	*/
13790	switch (opcode) {
13791	case BPF_ADD:
13792	scalar32_min_max_add(dst_reg, src_reg: &src_reg);
13793	scalar_min_max_add(dst_reg, src_reg: &src_reg);
13794	dst_reg->var_off = tnum_add(a: dst_reg->var_off, b: src_reg.var_off);
13795	break;
13796	case BPF_SUB:
13797	scalar32_min_max_sub(dst_reg, src_reg: &src_reg);
13798	scalar_min_max_sub(dst_reg, src_reg: &src_reg);
13799	dst_reg->var_off = tnum_sub(a: dst_reg->var_off, b: src_reg.var_off);
13800	break;
13801	case BPF_MUL:
13802	dst_reg->var_off = tnum_mul(a: dst_reg->var_off, b: src_reg.var_off);
13803	scalar32_min_max_mul(dst_reg, src_reg: &src_reg);
13804	scalar_min_max_mul(dst_reg, src_reg: &src_reg);
13805	break;
13806	case BPF_AND:
13807	dst_reg->var_off = tnum_and(a: dst_reg->var_off, b: src_reg.var_off);
13808	scalar32_min_max_and(dst_reg, src_reg: &src_reg);
13809	scalar_min_max_and(dst_reg, src_reg: &src_reg);
13810	break;
13811	case BPF_OR:
13812	dst_reg->var_off = tnum_or(a: dst_reg->var_off, b: src_reg.var_off);
13813	scalar32_min_max_or(dst_reg, src_reg: &src_reg);
13814	scalar_min_max_or(dst_reg, src_reg: &src_reg);
13815	break;
13816	case BPF_XOR:
13817	dst_reg->var_off = tnum_xor(a: dst_reg->var_off, b: src_reg.var_off);
13818	scalar32_min_max_xor(dst_reg, src_reg: &src_reg);
13819	scalar_min_max_xor(dst_reg, src_reg: &src_reg);
13820	break;
13821	case BPF_LSH:
13822	if (umax_val >= insn_bitness) {
13823	/ Shifts greater than 31 or 63 are undefined.*
13824	* This includes shifts by a negative number.
13825	*/
13826	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13827	break;
13828	}
13829	if (alu32)
13830	scalar32_min_max_lsh(dst_reg, src_reg: &src_reg);
13831	else
13832	scalar_min_max_lsh(dst_reg, src_reg: &src_reg);
13833	break;
13834	case BPF_RSH:
13835	if (umax_val >= insn_bitness) {
13836	/ Shifts greater than 31 or 63 are undefined.*
13837	* This includes shifts by a negative number.
13838	*/
13839	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13840	break;
13841	}
13842	if (alu32)
13843	scalar32_min_max_rsh(dst_reg, src_reg: &src_reg);
13844	else
13845	scalar_min_max_rsh(dst_reg, src_reg: &src_reg);
13846	break;
13847	case BPF_ARSH:
13848	if (umax_val >= insn_bitness) {
13849	/ Shifts greater than 31 or 63 are undefined.*
13850	* This includes shifts by a negative number.
13851	*/
13852	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13853	break;
13854	}
13855	if (alu32)
13856	scalar32_min_max_arsh(dst_reg, src_reg: &src_reg);
13857	else
13858	scalar_min_max_arsh(dst_reg, src_reg: &src_reg);
13859	break;
13860	default:
13861	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13862	break;
13863	}
13864
13865	/ ALU32 ops are zero extended into 64bit register /
13866	if (alu32)
13867	zext_32_to_64(reg: dst_reg);
13868	reg_bounds_sync(reg: dst_reg);
13869	return `0`;
13870	}
13871
13872	/ Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max*
13873	* and var_off.
13874	*/
13875	static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
13876	struct bpf_insn *insn)
13877	{
13878	struct bpf_verifier_state *vstate = env->cur_state;
13879	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13880	struct bpf_reg_state regs = state->regs, dst_reg, *src_reg;
13881	struct bpf_reg_state *ptr_reg = NULL, off_reg = {`0`};
13882	u8 opcode = BPF_OP(insn->code);
13883	int err;
13884
13885	dst_reg = &regs[insn->dst_reg];
13886	src_reg = NULL;
13887
13888	if (dst_reg->type == PTR_TO_ARENA) {
13889	struct bpf_insn_aux_data *aux = cur_aux(env);
13890
13891	if (BPF_CLASS(insn->code) == BPF_ALU64)
13892	/*
13893	* 32-bit operations zero upper bits automatically.
13894	* 64-bit operations need to be converted to 32.
13895	*/
13896	aux->needs_zext = true;
13897
13898	/ Any arithmetic operations are allowed on arena pointers /
13899	return `0`;
13900	}
13901
13902	if (dst_reg->type != SCALAR_VALUE)
13903	ptr_reg = dst_reg;
13904	else
13905	/ Make sure ID is cleared otherwise dst_reg min/max could be*
13906	* incorrectly propagated into other registers by find_equal_scalars()
13907	*/
13908	dst_reg->id = `0`;
13909	if (BPF_SRC(insn->code) == BPF_X) {
13910	src_reg = &regs[insn->src_reg];
13911	if (src_reg->type != SCALAR_VALUE) {
13912	if (dst_reg->type != SCALAR_VALUE) {
13913	/ Combining two pointers by any ALU op yields*
13914	* an arbitrary scalar. Disallow all math except
13915	* pointer subtraction
13916	*/
13917	if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13918	mark_reg_unknown(env, regs, regno: insn->dst_reg);
13919	return `0`;
13920	}
13921	verbose(private_data: env, fmt: "R%d pointer %s pointer prohibited\n",
13922	insn->dst_reg,
13923	bpf_alu_string[opcode >> `4`]);
13924	return -EACCES;
13925	} else {
13926	/ scalar += pointer*
13927	* This is legal, but we have to reverse our
13928	* src/dest handling in computing the range
13929	*/
13930	err = mark_chain_precision(env, regno: insn->dst_reg);
13931	if (err)
13932	return err;
13933	return adjust_ptr_min_max_vals(env, insn,
13934	ptr_reg: src_reg, off_reg: dst_reg);
13935	}
13936	} else if (ptr_reg) {
13937	/ pointer += scalar /
13938	err = mark_chain_precision(env, regno: insn->src_reg);
13939	if (err)
13940	return err;
13941	return adjust_ptr_min_max_vals(env, insn,
13942	ptr_reg: dst_reg, off_reg: src_reg);
13943	} else if (dst_reg->precise) {
13944	/ if dst_reg is precise, src_reg should be precise as well /
13945	err = mark_chain_precision(env, regno: insn->src_reg);
13946	if (err)
13947	return err;
13948	}
13949	} else {
13950	/ Pretend the src is a reg with a known value, since we only*
13951	* need to be able to read from this state.
13952	*/
13953	off_reg.type = SCALAR_VALUE;
13954	__mark_reg_known(reg: &off_reg, imm: insn->imm);
13955	src_reg = &off_reg;
13956	if (ptr_reg) / pointer += K /
13957	return adjust_ptr_min_max_vals(env, insn,
13958	ptr_reg, off_reg: src_reg);
13959	}
13960
13961	/ Got here implies adding two SCALAR_VALUEs /
13962	if (WARN_ON_ONCE(ptr_reg)) {
13963	print_verifier_state(env, state, print_all: true);
13964	verbose(private_data: env, fmt: "verifier internal error: unexpected ptr_reg\n");
13965	return -EINVAL;
13966	}
13967	if (WARN_ON(!src_reg)) {
13968	print_verifier_state(env, state, print_all: true);
13969	verbose(private_data: env, fmt: "verifier internal error: no src_reg\n");
13970	return -EINVAL;
13971	}
13972	return adjust_scalar_min_max_vals(env, insn, dst_reg, src_reg: *src_reg);
13973	}
13974
13975	/ check validity of 32-bit and 64-bit arithmetic operations /
13976	static int check_alu_op(struct bpf_verifier_env env, struct* bpf_insn *insn)
13977	{
13978	struct bpf_reg_state *regs = cur_regs(env);
13979	u8 opcode = BPF_OP(insn->code);
13980	int err;
13981
13982	if (opcode == BPF_END \|\| opcode == BPF_NEG) {
13983	if (opcode == BPF_NEG) {
13984	if (BPF_SRC(insn->code) != BPF_K \|\|
13985	insn->src_reg != BPF_REG_0 \|\|
13986	insn->off != `0` \|\| insn->imm != `0`) {
13987	verbose(private_data: env, fmt: "BPF_NEG uses reserved fields\n");
13988	return -EINVAL;
13989	}
13990	} else {
13991	if (insn->src_reg != BPF_REG_0 \|\| insn->off != `0` \|\|
13992	(insn->imm != `16` && insn->imm != `32` && insn->imm != `64`) \|\|
13993	(BPF_CLASS(insn->code) == BPF_ALU64 &&
13994	BPF_SRC(insn->code) != BPF_TO_LE)) {
13995	verbose(private_data: env, fmt: "BPF_END uses reserved fields\n");
13996	return -EINVAL;
13997	}
13998	}
13999
14000	/ check src operand /
14001	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
14002	if (err)
14003	return err;
14004
14005	if (is_pointer_value(env, regno: insn->dst_reg)) {
14006	verbose(private_data: env, fmt: "R%d pointer arithmetic prohibited\n",
14007	insn->dst_reg);
14008	return -EACCES;
14009	}
14010
14011	/ check dest operand /
14012	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP);
14013	if (err)
14014	return err;
14015
14016	} else if (opcode == BPF_MOV) {
14017
14018	if (BPF_SRC(insn->code) == BPF_X) {
14019	if (BPF_CLASS(insn->code) == BPF_ALU) {
14020	if ((insn->off != `0` && insn->off != `8` && insn->off != `16`) \|\|
14021	insn->imm) {
14022	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
14023	return -EINVAL;
14024	}
14025	} else if (insn->off == BPF_ADDR_SPACE_CAST) {
14026	if (insn->imm != `1` && insn->imm != `1u` << `16`) {
14027	verbose(private_data: env, fmt: "addr_space_cast insn can only convert between address space 1 and 0\n");
14028	return -EINVAL;
14029	}
14030	if (!env->prog->aux->arena) {
14031	verbose(private_data: env, fmt: "addr_space_cast insn can only be used in a program that has an associated arena\n");
14032	return -EINVAL;
14033	}
14034	} else {
14035	if ((insn->off != `0` && insn->off != `8` && insn->off != `16` &&
14036	insn->off != `32`) \|\| insn->imm) {
14037	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
14038	return -EINVAL;
14039	}
14040	}
14041
14042	/ check src operand /
14043	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
14044	if (err)
14045	return err;
14046	} else {
14047	if (insn->src_reg != BPF_REG_0 \|\| insn->off != `0`) {
14048	verbose(private_data: env, fmt: "BPF_MOV uses reserved fields\n");
14049	return -EINVAL;
14050	}
14051	}
14052
14053	/ check dest operand, mark as required later /
14054	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
14055	if (err)
14056	return err;
14057
14058	if (BPF_SRC(insn->code) == BPF_X) {
14059	struct bpf_reg_state *src_reg = regs + insn->src_reg;
14060	struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14061
14062	if (BPF_CLASS(insn->code) == BPF_ALU64) {
14063	if (insn->imm) {
14064	/ off == BPF_ADDR_SPACE_CAST /
14065	mark_reg_unknown(env, regs, regno: insn->dst_reg);
14066	if (insn->imm == `1`) { / cast from as(1) to as(0) /
14067	dst_reg->type = PTR_TO_ARENA;
14068	/ PTR_TO_ARENA is 32-bit /
14069	dst_reg->subreg_def = env->insn_idx + `1`;
14070	}
14071	} else if (insn->off == `0`) {
14072	/ case: R1 = R2*
14073	* copy register state to dest reg
14074	*/
14075	assign_scalar_id_before_mov(env, src_reg);
14076	copy_register_state(dst: dst_reg, src: src_reg);
14077	dst_reg->live \|= REG_LIVE_WRITTEN;
14078	dst_reg->subreg_def = DEF_NOT_SUBREG;
14079	} else {
14080	/ case: R1 = (s8, s16 s32)R2 /
14081	if (is_pointer_value(env, regno: insn->src_reg)) {
14082	verbose(private_data: env,
14083	fmt: "R%d sign-extension part of pointer\n",
14084	insn->src_reg);
14085	return -EACCES;
14086	} else if (src_reg->type == SCALAR_VALUE) {
14087	bool no_sext;
14088
14089	no_sext = src_reg->umax_value < (`1ULL` << (insn->off - `1`));
14090	if (no_sext)
14091	assign_scalar_id_before_mov(env, src_reg);
14092	copy_register_state(dst: dst_reg, src: src_reg);
14093	if (!no_sext)
14094	dst_reg->id = `0`;
14095	coerce_reg_to_size_sx(reg: dst_reg, size: insn->off >> `3`);
14096	dst_reg->live \|= REG_LIVE_WRITTEN;
14097	dst_reg->subreg_def = DEF_NOT_SUBREG;
14098	} else {
14099	mark_reg_unknown(env, regs, regno: insn->dst_reg);
14100	}
14101	}
14102	} else {
14103	/ R1 = (u32) R2 /
14104	if (is_pointer_value(env, regno: insn->src_reg)) {
14105	verbose(private_data: env,
14106	fmt: "R%d partial copy of pointer\n",
14107	insn->src_reg);
14108	return -EACCES;
14109	} else if (src_reg->type == SCALAR_VALUE) {
14110	if (insn->off == `0`) {
14111	bool is_src_reg_u32 = get_reg_width(reg: src_reg) <= `32`;
14112
14113	if (is_src_reg_u32)
14114	assign_scalar_id_before_mov(env, src_reg);
14115	copy_register_state(dst: dst_reg, src: src_reg);
14116	/ Make sure ID is cleared if src_reg is not in u32*
14117	* range otherwise dst_reg min/max could be incorrectly
14118	* propagated into src_reg by find_equal_scalars()
14119	*/
14120	if (!is_src_reg_u32)
14121	dst_reg->id = `0`;
14122	dst_reg->live \|= REG_LIVE_WRITTEN;
14123	dst_reg->subreg_def = env->insn_idx + `1`;
14124	} else {
14125	/ case: W1 = (s8, s16)W2 /
14126	bool no_sext = src_reg->umax_value < (`1ULL` << (insn->off - `1`));
14127
14128	if (no_sext)
14129	assign_scalar_id_before_mov(env, src_reg);
14130	copy_register_state(dst: dst_reg, src: src_reg);
14131	if (!no_sext)
14132	dst_reg->id = `0`;
14133	dst_reg->live \|= REG_LIVE_WRITTEN;
14134	dst_reg->subreg_def = env->insn_idx + `1`;
14135	coerce_subreg_to_size_sx(reg: dst_reg, size: insn->off >> `3`);
14136	}
14137	} else {
14138	mark_reg_unknown(env, regs,
14139	regno: insn->dst_reg);
14140	}
14141	zext_32_to_64(reg: dst_reg);
14142	reg_bounds_sync(reg: dst_reg);
14143	}
14144	} else {
14145	/ case: R = imm*
14146	* remember the value we stored into this reg
14147	*/
14148	/ clear any state __mark_reg_known doesn't set /
14149	mark_reg_unknown(env, regs, regno: insn->dst_reg);
14150	regs[insn->dst_reg].type = SCALAR_VALUE;
14151	if (BPF_CLASS(insn->code) == BPF_ALU64) {
14152	__mark_reg_known(reg: regs + insn->dst_reg,
14153	imm: insn->imm);
14154	} else {
14155	__mark_reg_known(reg: regs + insn->dst_reg,
14156	imm: (u32)insn->imm);
14157	}
14158	}
14159
14160	} else if (opcode > BPF_END) {
14161	verbose(private_data: env, fmt: "invalid BPF_ALU opcode %x\n", opcode);
14162	return -EINVAL;
14163
14164	} else { / all other ALU ops: and, sub, xor, add, ... /
14165
14166	if (BPF_SRC(insn->code) == BPF_X) {
14167	if (insn->imm != `0` \|\| insn->off > `1` \|\|
14168	(insn->off == `1` && opcode != BPF_MOD && opcode != BPF_DIV)) {
14169	verbose(private_data: env, fmt: "BPF_ALU uses reserved fields\n");
14170	return -EINVAL;
14171	}
14172	/ check src1 operand /
14173	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
14174	if (err)
14175	return err;
14176	} else {
14177	if (insn->src_reg != BPF_REG_0 \|\| insn->off > `1` \|\|
14178	(insn->off == `1` && opcode != BPF_MOD && opcode != BPF_DIV)) {
14179	verbose(private_data: env, fmt: "BPF_ALU uses reserved fields\n");
14180	return -EINVAL;
14181	}
14182	}
14183
14184	/ check src2 operand /
14185	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
14186	if (err)
14187	return err;
14188
14189	if ((opcode == BPF_MOD \|\| opcode == BPF_DIV) &&
14190	BPF_SRC(insn->code) == BPF_K && insn->imm == `0`) {
14191	verbose(private_data: env, fmt: "div by zero\n");
14192	return -EINVAL;
14193	}
14194
14195	if ((opcode == BPF_LSH \|\| opcode == BPF_RSH \|\|
14196	opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
14197	int size = BPF_CLASS(insn->code) == BPF_ALU64 ? `64` : `32`;
14198
14199	if (insn->imm < `0` \|\| insn->imm >= size) {
14200	verbose(private_data: env, fmt: "invalid shift %d\n", insn->imm);
14201	return -EINVAL;
14202	}
14203	}
14204
14205	/ check dest operand /
14206	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
14207	err = err ?: adjust_reg_min_max_vals(env, insn);
14208	if (err)
14209	return err;
14210	}
14211
14212	return reg_bounds_sanity_check(env, reg: &regs[insn->dst_reg], ctx: "alu");
14213	}
14214
14215	static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
14216	struct bpf_reg_state *dst_reg,
14217	enum bpf_reg_type type,
14218	bool range_right_open)
14219	{
14220	struct bpf_func_state *state;
14221	struct bpf_reg_state *reg;
14222	int new_range;
14223
14224	if (dst_reg->off < `0` \|\|
14225	(dst_reg->off == `0` && range_right_open))
14226	/ This doesn't give us any range /
14227	return;
14228
14229	if (dst_reg->umax_value > MAX_PACKET_OFF \|\|
14230	dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
14231	/ Risk of overflow. For instance, ptr + (1<<63) may be less*
14232	* than pkt_end, but that's because it's also less than pkt.
14233	*/
14234	return;
14235
14236	new_range = dst_reg->off;
14237	if (range_right_open)
14238	new_range++;
14239
14240	/ Examples for register markings:*
14241	*
14242	* pkt_data in dst register:
14243	*
14244	* r2 = r3;
14245	* r2 += 8;
14246	* if (r2 > pkt_end) goto <handle exception>
14247	* <access okay>
14248	*
14249	* r2 = r3;
14250	* r2 += 8;
14251	* if (r2 < pkt_end) goto <access okay>
14252	* <handle exception>
14253	*
14254	* Where:
14255	* r2 == dst_reg, pkt_end == src_reg
14256	* r2=pkt(id=n,off=8,r=0)
14257	* r3=pkt(id=n,off=0,r=0)
14258	*
14259	* pkt_data in src register:
14260	*
14261	* r2 = r3;
14262	* r2 += 8;
14263	* if (pkt_end >= r2) goto <access okay>
14264	* <handle exception>
14265	*
14266	* r2 = r3;
14267	* r2 += 8;
14268	* if (pkt_end <= r2) goto <handle exception>
14269	* <access okay>
14270	*
14271	* Where:
14272	* pkt_end == dst_reg, r2 == src_reg
14273	* r2=pkt(id=n,off=8,r=0)
14274	* r3=pkt(id=n,off=0,r=0)
14275	*
14276	* Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
14277	* or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
14278	* and [r3, r3 + 8-1) respectively is safe to access depending on
14279	* the check.
14280	*/
14281
14282	/ If our ids match, then we must have the same max_value. And we*
14283	* don't care about the other reg's fixed offset, since if it's too big
14284	* the range won't allow anything.
14285	* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
14286	*/
14287	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14288	if (reg->type == type && reg->id == dst_reg->id)
14289	/ keep the maximum range already checked /
14290	reg->range = max(reg->range, new_range);
14291	}));
14292	}
14293
14294	/*
14295	* <reg1> <op> <reg2>, currently assuming reg2 is a constant
14296	*/
14297	static int is_scalar_branch_taken(struct bpf_reg_state reg1, struct* bpf_reg_state *reg2,
14298	u8 opcode, bool is_jmp32)
14299	{
14300	struct tnum t1 = is_jmp32 ? tnum_subreg(a: reg1->var_off) : reg1->var_off;
14301	struct tnum t2 = is_jmp32 ? tnum_subreg(a: reg2->var_off) : reg2->var_off;
14302	u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
14303	u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
14304	s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
14305	s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
14306	u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
14307	u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
14308	s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
14309	s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
14310
14311	switch (opcode) {
14312	case BPF_JEQ:
14313	/ constants, umin/umax and smin/smax checks would be*
14314	* redundant in this case because they all should match
14315	*/
14316	if (tnum_is_const(a: t1) && tnum_is_const(a: t2))
14317	return t1.value == t2.value;
14318	/ non-overlapping ranges /
14319	if (umin1 > umax2 \|\| umax1 < umin2)
14320	return `0`;
14321	if (smin1 > smax2 \|\| smax1 < smin2)
14322	return `0`;
14323	if (!is_jmp32) {
14324	/ if 64-bit ranges are inconclusive, see if we can*
14325	* utilize 32-bit subrange knowledge to eliminate
14326	* branches that can't be taken a priori
14327	*/
14328	if (reg1->u32_min_value > reg2->u32_max_value \|\|
14329	reg1->u32_max_value < reg2->u32_min_value)
14330	return `0`;
14331	if (reg1->s32_min_value > reg2->s32_max_value \|\|
14332	reg1->s32_max_value < reg2->s32_min_value)
14333	return `0`;
14334	}
14335	break;
14336	case BPF_JNE:
14337	/ constants, umin/umax and smin/smax checks would be*
14338	* redundant in this case because they all should match
14339	*/
14340	if (tnum_is_const(a: t1) && tnum_is_const(a: t2))
14341	return t1.value != t2.value;
14342	/ non-overlapping ranges /
14343	if (umin1 > umax2 \|\| umax1 < umin2)
14344	return `1`;
14345	if (smin1 > smax2 \|\| smax1 < smin2)
14346	return `1`;
14347	if (!is_jmp32) {
14348	/ if 64-bit ranges are inconclusive, see if we can*
14349	* utilize 32-bit subrange knowledge to eliminate
14350	* branches that can't be taken a priori
14351	*/
14352	if (reg1->u32_min_value > reg2->u32_max_value \|\|
14353	reg1->u32_max_value < reg2->u32_min_value)
14354	return `1`;
14355	if (reg1->s32_min_value > reg2->s32_max_value \|\|
14356	reg1->s32_max_value < reg2->s32_min_value)
14357	return `1`;
14358	}
14359	break;
14360	case BPF_JSET:
14361	if (!is_reg_const(reg: reg2, subreg32: is_jmp32)) {
14362	swap(reg1, reg2);
14363	swap(t1, t2);
14364	}
14365	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14366	return -`1`;
14367	if ((~t1.mask & t1.value) & t2.value)
14368	return `1`;
14369	if (!((t1.mask \| t1.value) & t2.value))
14370	return `0`;
14371	break;
14372	case BPF_JGT:
14373	if (umin1 > umax2)
14374	return `1`;
14375	else if (umax1 <= umin2)
14376	return `0`;
14377	break;
14378	case BPF_JSGT:
14379	if (smin1 > smax2)
14380	return `1`;
14381	else if (smax1 <= smin2)
14382	return `0`;
14383	break;
14384	case BPF_JLT:
14385	if (umax1 < umin2)
14386	return `1`;
14387	else if (umin1 >= umax2)
14388	return `0`;
14389	break;
14390	case BPF_JSLT:
14391	if (smax1 < smin2)
14392	return `1`;
14393	else if (smin1 >= smax2)
14394	return `0`;
14395	break;
14396	case BPF_JGE:
14397	if (umin1 >= umax2)
14398	return `1`;
14399	else if (umax1 < umin2)
14400	return `0`;
14401	break;
14402	case BPF_JSGE:
14403	if (smin1 >= smax2)
14404	return `1`;
14405	else if (smax1 < smin2)
14406	return `0`;
14407	break;
14408	case BPF_JLE:
14409	if (umax1 <= umin2)
14410	return `1`;
14411	else if (umin1 > umax2)
14412	return `0`;
14413	break;
14414	case BPF_JSLE:
14415	if (smax1 <= smin2)
14416	return `1`;
14417	else if (smin1 > smax2)
14418	return `0`;
14419	break;
14420	}
14421
14422	return -`1`;
14423	}
14424
14425	static int flip_opcode(u32 opcode)
14426	{
14427	/ How can we transform "a <op> b" into "b <op> a"? /
14428	static const u8 opcode_flip[`16`] = {
14429	/ these stay the same /
14430	[BPF_JEQ >> `4`] = BPF_JEQ,
14431	[BPF_JNE >> `4`] = BPF_JNE,
14432	[BPF_JSET >> `4`] = BPF_JSET,
14433	/ these swap "lesser" and "greater" (L and G in the opcodes) /
14434	[BPF_JGE >> `4`] = BPF_JLE,
14435	[BPF_JGT >> `4`] = BPF_JLT,
14436	[BPF_JLE >> `4`] = BPF_JGE,
14437	[BPF_JLT >> `4`] = BPF_JGT,
14438	[BPF_JSGE >> `4`] = BPF_JSLE,
14439	[BPF_JSGT >> `4`] = BPF_JSLT,
14440	[BPF_JSLE >> `4`] = BPF_JSGE,
14441	[BPF_JSLT >> `4`] = BPF_JSGT
14442	};
14443	return opcode_flip[opcode >> `4`];
14444	}
14445
14446	static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
14447	struct bpf_reg_state *src_reg,
14448	u8 opcode)
14449	{
14450	struct bpf_reg_state *pkt;
14451
14452	if (src_reg->type == PTR_TO_PACKET_END) {
14453	pkt = dst_reg;
14454	} else if (dst_reg->type == PTR_TO_PACKET_END) {
14455	pkt = src_reg;
14456	opcode = flip_opcode(opcode);
14457	} else {
14458	return -`1`;
14459	}
14460
14461	if (pkt->range >= `0`)
14462	return -`1`;
14463
14464	switch (opcode) {
14465	case BPF_JLE:
14466	/ pkt <= pkt_end /
14467	fallthrough;
14468	case BPF_JGT:
14469	/ pkt > pkt_end /
14470	if (pkt->range == BEYOND_PKT_END)
14471	/ pkt has at last one extra byte beyond pkt_end /
14472	return opcode == BPF_JGT;
14473	break;
14474	case BPF_JLT:
14475	/ pkt < pkt_end /
14476	fallthrough;
14477	case BPF_JGE:
14478	/ pkt >= pkt_end /
14479	if (pkt->range == BEYOND_PKT_END \|\| pkt->range == AT_PKT_END)
14480	return opcode == BPF_JGE;
14481	break;
14482	}
14483	return -`1`;
14484	}
14485
14486	/ compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"*
14487	* and return:
14488	* 1 - branch will be taken and "goto target" will be executed
14489	* 0 - branch will not be taken and fall-through to next insn
14490	* -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
14491	* range [0,10]
14492	*/
14493	static int is_branch_taken(struct bpf_reg_state reg1, struct* bpf_reg_state *reg2,
14494	u8 opcode, bool is_jmp32)
14495	{
14496	if (reg_is_pkt_pointer_any(reg: reg1) && reg_is_pkt_pointer_any(reg: reg2) && !is_jmp32)
14497	return is_pkt_ptr_branch_taken(dst_reg: reg1, src_reg: reg2, opcode);
14498
14499	if (__is_pointer_value(allow_ptr_leaks: false, reg: reg1) \|\| __is_pointer_value(allow_ptr_leaks: false, reg: reg2)) {
14500	u64 val;
14501
14502	/ arrange that reg2 is a scalar, and reg1 is a pointer /
14503	if (!is_reg_const(reg: reg2, subreg32: is_jmp32)) {
14504	opcode = flip_opcode(opcode);
14505	swap(reg1, reg2);
14506	}
14507	/ and ensure that reg2 is a constant /
14508	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14509	return -`1`;
14510
14511	if (!reg_not_null(reg: reg1))
14512	return -`1`;
14513
14514	/ If pointer is valid tests against zero will fail so we can*
14515	* use this to direct branch taken.
14516	*/
14517	val = reg_const_value(reg: reg2, subreg32: is_jmp32);
14518	if (val != `0`)
14519	return -`1`;
14520
14521	switch (opcode) {
14522	case BPF_JEQ:
14523	return `0`;
14524	case BPF_JNE:
14525	return `1`;
14526	default:
14527	return -`1`;
14528	}
14529	}
14530
14531	/ now deal with two scalars, but not necessarily constants /
14532	return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
14533	}
14534
14535	/ Opcode that corresponds to a false branch condition.*
14536	* E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
14537	*/
14538	static u8 rev_opcode(u8 opcode)
14539	{
14540	switch (opcode) {
14541	case BPF_JEQ: return BPF_JNE;
14542	case BPF_JNE: return BPF_JEQ;
14543	/ JSET doesn't have it's reverse opcode in BPF, so add*
14544	* BPF_X flag to denote the reverse of that operation
14545	*/
14546	case BPF_JSET: return BPF_JSET \| BPF_X;
14547	case BPF_JSET \| BPF_X: return BPF_JSET;
14548	case BPF_JGE: return BPF_JLT;
14549	case BPF_JGT: return BPF_JLE;
14550	case BPF_JLE: return BPF_JGT;
14551	case BPF_JLT: return BPF_JGE;
14552	case BPF_JSGE: return BPF_JSLT;
14553	case BPF_JSGT: return BPF_JSLE;
14554	case BPF_JSLE: return BPF_JSGT;
14555	case BPF_JSLT: return BPF_JSGE;
14556	default: return `0`;
14557	}
14558	}
14559
14560	/ Refine range knowledge for <reg1> <op> <reg>2 conditional operation. /
14561	static void regs_refine_cond_op(struct bpf_reg_state reg1, struct* bpf_reg_state *reg2,
14562	u8 opcode, bool is_jmp32)
14563	{
14564	struct tnum t;
14565	u64 val;
14566
14567	again:
14568	switch (opcode) {
14569	case BPF_JEQ:
14570	if (is_jmp32) {
14571	reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
14572	reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
14573	reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
14574	reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
14575	reg2->u32_min_value = reg1->u32_min_value;
14576	reg2->u32_max_value = reg1->u32_max_value;
14577	reg2->s32_min_value = reg1->s32_min_value;
14578	reg2->s32_max_value = reg1->s32_max_value;
14579
14580	t = tnum_intersect(a: tnum_subreg(a: reg1->var_off), b: tnum_subreg(a: reg2->var_off));
14581	reg1->var_off = tnum_with_subreg(reg: reg1->var_off, subreg: t);
14582	reg2->var_off = tnum_with_subreg(reg: reg2->var_off, subreg: t);
14583	} else {
14584	reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
14585	reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
14586	reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
14587	reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
14588	reg2->umin_value = reg1->umin_value;
14589	reg2->umax_value = reg1->umax_value;
14590	reg2->smin_value = reg1->smin_value;
14591	reg2->smax_value = reg1->smax_value;
14592
14593	reg1->var_off = tnum_intersect(a: reg1->var_off, b: reg2->var_off);
14594	reg2->var_off = reg1->var_off;
14595	}
14596	break;
14597	case BPF_JNE:
14598	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14599	swap(reg1, reg2);
14600	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14601	break;
14602
14603	/ try to recompute the bound of reg1 if reg2 is a const and*
14604	* is exactly the edge of reg1.
14605	*/
14606	val = reg_const_value(reg: reg2, subreg32: is_jmp32);
14607	if (is_jmp32) {
14608	/ u32_min_value is not equal to 0xffffffff at this point,*
14609	* because otherwise u32_max_value is 0xffffffff as well,
14610	* in such a case both reg1 and reg2 would be constants,
14611	* jump would be predicted and reg_set_min_max() won't
14612	* be called.
14613	*
14614	* Same reasoning works for all {u,s}{min,max}{32,64} cases
14615	* below.
14616	*/
14617	if (reg1->u32_min_value == (u32)val)
14618	reg1->u32_min_value++;
14619	if (reg1->u32_max_value == (u32)val)
14620	reg1->u32_max_value--;
14621	if (reg1->s32_min_value == (s32)val)
14622	reg1->s32_min_value++;
14623	if (reg1->s32_max_value == (s32)val)
14624	reg1->s32_max_value--;
14625	} else {
14626	if (reg1->umin_value == (u64)val)
14627	reg1->umin_value++;
14628	if (reg1->umax_value == (u64)val)
14629	reg1->umax_value--;
14630	if (reg1->smin_value == (s64)val)
14631	reg1->smin_value++;
14632	if (reg1->smax_value == (s64)val)
14633	reg1->smax_value--;
14634	}
14635	break;
14636	case BPF_JSET:
14637	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14638	swap(reg1, reg2);
14639	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14640	break;
14641	val = reg_const_value(reg: reg2, subreg32: is_jmp32);
14642	/ BPF_JSET (i.e., TRUE branch, not BPF_JSET \| BPF_X)*
14643	* requires single bit to learn something useful. E.g., if we
14644	* know that `r1 & 0x3` is true, then which bits (0, 1, or both)
14645	* are actually set? We can learn something definite only if
14646	* it's a single-bit value to begin with.
14647	*
14648	* BPF_JSET \| BPF_X (i.e., negation of BPF_JSET) doesn't have
14649	* this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
14650	* bit 1 is set, which we can readily use in adjustments.
14651	*/
14652	if (!is_power_of_2(n: val))
14653	break;
14654	if (is_jmp32) {
14655	t = tnum_or(a: tnum_subreg(a: reg1->var_off), b: tnum_const(value: val));
14656	reg1->var_off = tnum_with_subreg(reg: reg1->var_off, subreg: t);
14657	} else {
14658	reg1->var_off = tnum_or(a: reg1->var_off, b: tnum_const(value: val));
14659	}
14660	break;
14661	case BPF_JSET \| BPF_X: / reverse of BPF_JSET, see rev_opcode() /
14662	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14663	swap(reg1, reg2);
14664	if (!is_reg_const(reg: reg2, subreg32: is_jmp32))
14665	break;
14666	val = reg_const_value(reg: reg2, subreg32: is_jmp32);
14667	if (is_jmp32) {
14668	t = tnum_and(a: tnum_subreg(a: reg1->var_off), b: tnum_const(value: ~val));
14669	reg1->var_off = tnum_with_subreg(reg: reg1->var_off, subreg: t);
14670	} else {
14671	reg1->var_off = tnum_and(a: reg1->var_off, b: tnum_const(value: ~val));
14672	}
14673	break;
14674	case BPF_JLE:
14675	if (is_jmp32) {
14676	reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
14677	reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
14678	} else {
14679	reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
14680	reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
14681	}
14682	break;
14683	case BPF_JLT:
14684	if (is_jmp32) {
14685	reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - `1`);
14686	reg2->u32_min_value = max(reg1->u32_min_value + `1`, reg2->u32_min_value);
14687	} else {
14688	reg1->umax_value = min(reg1->umax_value, reg2->umax_value - `1`);
14689	reg2->umin_value = max(reg1->umin_value + `1`, reg2->umin_value);
14690	}
14691	break;
14692	case BPF_JSLE:
14693	if (is_jmp32) {
14694	reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
14695	reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
14696	} else {
14697	reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
14698	reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
14699	}
14700	break;
14701	case BPF_JSLT:
14702	if (is_jmp32) {
14703	reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - `1`);
14704	reg2->s32_min_value = max(reg1->s32_min_value + `1`, reg2->s32_min_value);
14705	} else {
14706	reg1->smax_value = min(reg1->smax_value, reg2->smax_value - `1`);
14707	reg2->smin_value = max(reg1->smin_value + `1`, reg2->smin_value);
14708	}
14709	break;
14710	case BPF_JGE:
14711	case BPF_JGT:
14712	case BPF_JSGE:
14713	case BPF_JSGT:
14714	/ just reuse LE/LT logic above /
14715	opcode = flip_opcode(opcode);
14716	swap(reg1, reg2);
14717	goto again;
14718	default:
14719	return;
14720	}
14721	}
14722
14723	/ Adjusts the register min/max values in the case that the dst_reg and*
14724	* src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
14725	* check, in which case we havea fake SCALAR_VALUE representing insn->imm).
14726	* Technically we can do similar adjustments for pointers to the same object,
14727	* but we don't support that right now.
14728	*/
14729	static int reg_set_min_max(struct bpf_verifier_env *env,
14730	struct bpf_reg_state *true_reg1,
14731	struct bpf_reg_state *true_reg2,
14732	struct bpf_reg_state *false_reg1,
14733	struct bpf_reg_state *false_reg2,
14734	u8 opcode, bool is_jmp32)
14735	{
14736	int err;
14737
14738	/ If either register is a pointer, we can't learn anything about its*
14739	* variable offset from the compare (unless they were a pointer into
14740	* the same object, but we don't bother with that).
14741	*/
14742	if (false_reg1->type != SCALAR_VALUE \|\| false_reg2->type != SCALAR_VALUE)
14743	return `0`;
14744
14745	/ fallthrough (FALSE) branch /
14746	regs_refine_cond_op(reg1: false_reg1, reg2: false_reg2, opcode: rev_opcode(opcode), is_jmp32);
14747	reg_bounds_sync(reg: false_reg1);
14748	reg_bounds_sync(reg: false_reg2);
14749
14750	/ jump (TRUE) branch /
14751	regs_refine_cond_op(reg1: true_reg1, reg2: true_reg2, opcode, is_jmp32);
14752	reg_bounds_sync(reg: true_reg1);
14753	reg_bounds_sync(reg: true_reg2);
14754
14755	err = reg_bounds_sanity_check(env, reg: true_reg1, ctx: "true_reg1");
14756	err = err ?: reg_bounds_sanity_check(env, reg: true_reg2, ctx: "true_reg2");
14757	err = err ?: reg_bounds_sanity_check(env, reg: false_reg1, ctx: "false_reg1");
14758	err = err ?: reg_bounds_sanity_check(env, reg: false_reg2, ctx: "false_reg2");
14759	return err;
14760	}
14761
14762	static void mark_ptr_or_null_reg(struct bpf_func_state *state,
14763	struct bpf_reg_state *reg, u32 id,
14764	bool is_null)
14765	{
14766	if (type_may_be_null(type: reg->type) && reg->id == id &&
14767	(is_rcu_reg(reg) \|\| !WARN_ON_ONCE(!reg->id))) {
14768	/ Old offset (both fixed and variable parts) should have been*
14769	* known-zero, because we don't allow pointer arithmetic on
14770	* pointers that might be NULL. If we see this happening, don't
14771	* convert the register.
14772	*
14773	* But in some cases, some helpers that return local kptrs
14774	* advance offset for the returned pointer. In those cases, it
14775	* is fine to expect to see reg->off.
14776	*/
14777	if (WARN_ON_ONCE(reg->smin_value \|\| reg->smax_value \|\| !tnum_equals_const(reg->var_off, `0`)))
14778	return;
14779	if (!(type_is_ptr_alloc_obj(type: reg->type) \|\| type_is_non_owning_ref(type: reg->type)) &&
14780	WARN_ON_ONCE(reg->off))
14781	return;
14782
14783	if (is_null) {
14784	reg->type = SCALAR_VALUE;
14785	/ We don't need id and ref_obj_id from this point*
14786	* onwards anymore, thus we should better reset it,
14787	* so that state pruning has chances to take effect.
14788	*/
14789	reg->id = `0`;
14790	reg->ref_obj_id = `0`;
14791
14792	return;
14793	}
14794
14795	mark_ptr_not_null_reg(reg);
14796
14797	if (!reg_may_point_to_spin_lock(reg)) {
14798	/ For not-NULL ptr, reg->ref_obj_id will be reset*
14799	* in release_reference().
14800	*
14801	* reg->id is still used by spin_lock ptr. Other
14802	* than spin_lock ptr type, reg->id can be reset.
14803	*/
14804	reg->id = `0`;
14805	}
14806	}
14807	}
14808
14809	/ The logic is similar to find_good_pkt_pointers(), both could eventually*
14810	* be folded together at some point.
14811	*/
14812	static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
14813	bool is_null)
14814	{
14815	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14816	struct bpf_reg_state regs = state->regs, reg;
14817	u32 ref_obj_id = regs[regno].ref_obj_id;
14818	u32 id = regs[regno].id;
14819
14820	if (ref_obj_id && ref_obj_id == id && is_null)
14821	/ regs[regno] is in the " == NULL" branch.*
14822	* No one could have freed the reference state before
14823	* doing the NULL check.
14824	*/
14825	WARN_ON_ONCE(release_reference_state(state, id));
14826
14827	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14828	mark_ptr_or_null_reg(state, reg, id, is_null);
14829	}));
14830	}
14831
14832	static bool try_match_pkt_pointers(const struct bpf_insn *insn,
14833	struct bpf_reg_state *dst_reg,
14834	struct bpf_reg_state *src_reg,
14835	struct bpf_verifier_state *this_branch,
14836	struct bpf_verifier_state *other_branch)
14837	{
14838	if (BPF_SRC(insn->code) != BPF_X)
14839	return false;
14840
14841	/ Pointers are always 64-bit. /
14842	if (BPF_CLASS(insn->code) == BPF_JMP32)
14843	return false;
14844
14845	switch (BPF_OP(insn->code)) {
14846	case BPF_JGT:
14847	if ((dst_reg->type == PTR_TO_PACKET &&
14848	src_reg->type == PTR_TO_PACKET_END) \|\|
14849	(dst_reg->type == PTR_TO_PACKET_META &&
14850	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14851	/ pkt_data' > pkt_end, pkt_meta' > pkt_data /
14852	find_good_pkt_pointers(vstate: this_branch, dst_reg,
14853	type: dst_reg->type, range_right_open: false);
14854	mark_pkt_end(vstate: other_branch, regn: insn->dst_reg, range_open: true);
14855	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14856	src_reg->type == PTR_TO_PACKET) \|\|
14857	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14858	src_reg->type == PTR_TO_PACKET_META)) {
14859	/ pkt_end > pkt_data', pkt_data > pkt_meta' /
14860	find_good_pkt_pointers(vstate: other_branch, dst_reg: src_reg,
14861	type: src_reg->type, range_right_open: true);
14862	mark_pkt_end(vstate: this_branch, regn: insn->src_reg, range_open: false);
14863	} else {
14864	return false;
14865	}
14866	break;
14867	case BPF_JLT:
14868	if ((dst_reg->type == PTR_TO_PACKET &&
14869	src_reg->type == PTR_TO_PACKET_END) \|\|
14870	(dst_reg->type == PTR_TO_PACKET_META &&
14871	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14872	/ pkt_data' < pkt_end, pkt_meta' < pkt_data /
14873	find_good_pkt_pointers(vstate: other_branch, dst_reg,
14874	type: dst_reg->type, range_right_open: true);
14875	mark_pkt_end(vstate: this_branch, regn: insn->dst_reg, range_open: false);
14876	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14877	src_reg->type == PTR_TO_PACKET) \|\|
14878	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14879	src_reg->type == PTR_TO_PACKET_META)) {
14880	/ pkt_end < pkt_data', pkt_data > pkt_meta' /
14881	find_good_pkt_pointers(vstate: this_branch, dst_reg: src_reg,
14882	type: src_reg->type, range_right_open: false);
14883	mark_pkt_end(vstate: other_branch, regn: insn->src_reg, range_open: true);
14884	} else {
14885	return false;
14886	}
14887	break;
14888	case BPF_JGE:
14889	if ((dst_reg->type == PTR_TO_PACKET &&
14890	src_reg->type == PTR_TO_PACKET_END) \|\|
14891	(dst_reg->type == PTR_TO_PACKET_META &&
14892	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14893	/ pkt_data' >= pkt_end, pkt_meta' >= pkt_data /
14894	find_good_pkt_pointers(vstate: this_branch, dst_reg,
14895	type: dst_reg->type, range_right_open: true);
14896	mark_pkt_end(vstate: other_branch, regn: insn->dst_reg, range_open: false);
14897	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14898	src_reg->type == PTR_TO_PACKET) \|\|
14899	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14900	src_reg->type == PTR_TO_PACKET_META)) {
14901	/ pkt_end >= pkt_data', pkt_data >= pkt_meta' /
14902	find_good_pkt_pointers(vstate: other_branch, dst_reg: src_reg,
14903	type: src_reg->type, range_right_open: false);
14904	mark_pkt_end(vstate: this_branch, regn: insn->src_reg, range_open: true);
14905	} else {
14906	return false;
14907	}
14908	break;
14909	case BPF_JLE:
14910	if ((dst_reg->type == PTR_TO_PACKET &&
14911	src_reg->type == PTR_TO_PACKET_END) \|\|
14912	(dst_reg->type == PTR_TO_PACKET_META &&
14913	reg_is_init_pkt_pointer(reg: src_reg, which: PTR_TO_PACKET))) {
14914	/ pkt_data' <= pkt_end, pkt_meta' <= pkt_data /
14915	find_good_pkt_pointers(vstate: other_branch, dst_reg,
14916	type: dst_reg->type, range_right_open: false);
14917	mark_pkt_end(vstate: this_branch, regn: insn->dst_reg, range_open: true);
14918	} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14919	src_reg->type == PTR_TO_PACKET) \|\|
14920	(reg_is_init_pkt_pointer(reg: dst_reg, which: PTR_TO_PACKET) &&
14921	src_reg->type == PTR_TO_PACKET_META)) {
14922	/ pkt_end <= pkt_data', pkt_data <= pkt_meta' /
14923	find_good_pkt_pointers(vstate: this_branch, dst_reg: src_reg,
14924	type: src_reg->type, range_right_open: true);
14925	mark_pkt_end(vstate: other_branch, regn: insn->src_reg, range_open: false);
14926	} else {
14927	return false;
14928	}
14929	break;
14930	default:
14931	return false;
14932	}
14933
14934	return true;
14935	}
14936
14937	static void find_equal_scalars(struct bpf_verifier_state *vstate,
14938	struct bpf_reg_state *known_reg)
14939	{
14940	struct bpf_func_state *state;
14941	struct bpf_reg_state *reg;
14942
14943	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14944	if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
14945	copy_register_state(reg, known_reg);
14946	}));
14947	}
14948
14949	static int check_cond_jmp_op(struct bpf_verifier_env *env,
14950	struct bpf_insn insn, int* *insn_idx)
14951	{
14952	struct bpf_verifier_state *this_branch = env->cur_state;
14953	struct bpf_verifier_state *other_branch;
14954	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
14955	struct bpf_reg_state dst_reg, other_branch_regs, *src_reg = NULL;
14956	struct bpf_reg_state *eq_branch_regs;
14957	struct bpf_reg_state fake_reg = {};
14958	u8 opcode = BPF_OP(insn->code);
14959	bool is_jmp32;
14960	int pred = -`1`;
14961	int err;
14962
14963	/ Only conditional jumps are expected to reach here. /
14964	if (opcode == BPF_JA \|\| opcode > BPF_JCOND) {
14965	verbose(private_data: env, fmt: "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
14966	return -EINVAL;
14967	}
14968
14969	if (opcode == BPF_JCOND) {
14970	struct bpf_verifier_state cur_st = env->cur_state, queued_st, *prev_st;
14971	int idx = *insn_idx;
14972
14973	if (insn->code != (BPF_JMP \| BPF_JCOND) \|\|
14974	insn->src_reg != BPF_MAY_GOTO \|\|
14975	insn->dst_reg \|\| insn->imm \|\| insn->off == `0`) {
14976	verbose(private_data: env, fmt: "invalid may_goto off %d imm %d\n",
14977	insn->off, insn->imm);
14978	return -EINVAL;
14979	}
14980	prev_st = find_prev_entry(env, cur: cur_st->parent, insn_idx: idx);
14981
14982	/ branch out 'fallthrough' insn as a new state to explore /
14983	queued_st = push_stack(env, insn_idx: idx + `1`, prev_insn_idx: idx, speculative: false);
14984	if (!queued_st)
14985	return -ENOMEM;
14986
14987	queued_st->may_goto_depth++;
14988	if (prev_st)
14989	widen_imprecise_scalars(env, old: prev_st, cur: queued_st);
14990	*insn_idx += insn->off;
14991	return `0`;
14992	}
14993
14994	/ check src2 operand /
14995	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
14996	if (err)
14997	return err;
14998
14999	dst_reg = &regs[insn->dst_reg];
15000	if (BPF_SRC(insn->code) == BPF_X) {
15001	if (insn->imm != `0`) {
15002	verbose(private_data: env, fmt: "BPF_JMP/JMP32 uses reserved fields\n");
15003	return -EINVAL;
15004	}
15005
15006	/ check src1 operand /
15007	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
15008	if (err)
15009	return err;
15010
15011	src_reg = &regs[insn->src_reg];
15012	if (!(reg_is_pkt_pointer_any(reg: dst_reg) && reg_is_pkt_pointer_any(reg: src_reg)) &&
15013	is_pointer_value(env, regno: insn->src_reg)) {
15014	verbose(private_data: env, fmt: "R%d pointer comparison prohibited\n",
15015	insn->src_reg);
15016	return -EACCES;
15017	}
15018	} else {
15019	if (insn->src_reg != BPF_REG_0) {
15020	verbose(private_data: env, fmt: "BPF_JMP/JMP32 uses reserved fields\n");
15021	return -EINVAL;
15022	}
15023	src_reg = &fake_reg;
15024	src_reg->type = SCALAR_VALUE;
15025	__mark_reg_known(reg: src_reg, imm: insn->imm);
15026	}
15027
15028	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
15029	pred = is_branch_taken(reg1: dst_reg, reg2: src_reg, opcode, is_jmp32);
15030	if (pred >= `0`) {
15031	/ If we get here with a dst_reg pointer type it is because*
15032	* above is_branch_taken() special cased the 0 comparison.
15033	*/
15034	if (!__is_pointer_value(allow_ptr_leaks: false, reg: dst_reg))
15035	err = mark_chain_precision(env, regno: insn->dst_reg);
15036	if (BPF_SRC(insn->code) == BPF_X && !err &&
15037	!__is_pointer_value(allow_ptr_leaks: false, reg: src_reg))
15038	err = mark_chain_precision(env, regno: insn->src_reg);
15039	if (err)
15040	return err;
15041	}
15042
15043	if (pred == `1`) {
15044	/ Only follow the goto, ignore fall-through. If needed, push*
15045	* the fall-through branch for simulation under speculative
15046	* execution.
15047	*/
15048	if (!env->bypass_spec_v1 &&
15049	!sanitize_speculative_path(env, insn, next_idx: *insn_idx + `1`,
15050	curr_idx: *insn_idx))
15051	return -EFAULT;
15052	if (env->log.level & BPF_LOG_LEVEL)
15053	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
15054	*insn_idx += insn->off;
15055	return `0`;
15056	} else if (pred == `0`) {
15057	/ Only follow the fall-through branch, since that's where the*
15058	* program will go. If needed, push the goto branch for
15059	* simulation under speculative execution.
15060	*/
15061	if (!env->bypass_spec_v1 &&
15062	!sanitize_speculative_path(env, insn,
15063	next_idx: *insn_idx + insn->off + `1`,
15064	curr_idx: *insn_idx))
15065	return -EFAULT;
15066	if (env->log.level & BPF_LOG_LEVEL)
15067	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
15068	return `0`;
15069	}
15070
15071	other_branch = push_stack(env, insn_idx: insn_idx + insn->off + `1`, prev_insn_idx: insn_idx,
15072	speculative: false);
15073	if (!other_branch)
15074	return -EFAULT;
15075	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
15076
15077	if (BPF_SRC(insn->code) == BPF_X) {
15078	err = reg_set_min_max(env,
15079	true_reg1: &other_branch_regs[insn->dst_reg],
15080	true_reg2: &other_branch_regs[insn->src_reg],
15081	false_reg1: dst_reg, false_reg2: src_reg, opcode, is_jmp32);
15082	} else / BPF_SRC(insn->code) == BPF_K / {
15083	err = reg_set_min_max(env,
15084	true_reg1: &other_branch_regs[insn->dst_reg],
15085	true_reg2: src_reg / fake one /,
15086	false_reg1: dst_reg, false_reg2: src_reg / same fake one /,
15087	opcode, is_jmp32);
15088	}
15089	if (err)
15090	return err;
15091
15092	if (BPF_SRC(insn->code) == BPF_X &&
15093	src_reg->type == SCALAR_VALUE && src_reg->id &&
15094	!WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
15095	find_equal_scalars(vstate: this_branch, known_reg: src_reg);
15096	find_equal_scalars(vstate: other_branch, known_reg: &other_branch_regs[insn->src_reg]);
15097	}
15098	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
15099	!WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
15100	find_equal_scalars(vstate: this_branch, known_reg: dst_reg);
15101	find_equal_scalars(vstate: other_branch, known_reg: &other_branch_regs[insn->dst_reg]);
15102	}
15103
15104	/ if one pointer register is compared to another pointer*
15105	* register check if PTR_MAYBE_NULL could be lifted.
15106	* E.g. register A - maybe null
15107	* register B - not null
15108	* for JNE A, B, ... - A is not null in the false branch;
15109	* for JEQ A, B, ... - A is not null in the true branch.
15110	*
15111	* Since PTR_TO_BTF_ID points to a kernel struct that does
15112	* not need to be null checked by the BPF program, i.e.,
15113	* could be null even without PTR_MAYBE_NULL marking, so
15114	* only propagate nullness when neither reg is that type.
15115	*/
15116	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
15117	__is_pointer_value(allow_ptr_leaks: false, reg: src_reg) && __is_pointer_value(allow_ptr_leaks: false, reg: dst_reg) &&
15118	type_may_be_null(type: src_reg->type) != type_may_be_null(type: dst_reg->type) &&
15119	base_type(type: src_reg->type) != PTR_TO_BTF_ID &&
15120	base_type(type: dst_reg->type) != PTR_TO_BTF_ID) {
15121	eq_branch_regs = NULL;
15122	switch (opcode) {
15123	case BPF_JEQ:
15124	eq_branch_regs = other_branch_regs;
15125	break;
15126	case BPF_JNE:
15127	eq_branch_regs = regs;
15128	break;
15129	default:
15130	/ do nothing /
15131	break;
15132	}
15133	if (eq_branch_regs) {
15134	if (type_may_be_null(type: src_reg->type))
15135	mark_ptr_not_null_reg(reg: &eq_branch_regs[insn->src_reg]);
15136	else
15137	mark_ptr_not_null_reg(reg: &eq_branch_regs[insn->dst_reg]);
15138	}
15139	}
15140
15141	/ detect if R == 0 where R is returned from bpf_map_lookup_elem().*
15142	* NOTE: these optimizations below are related with pointer comparison
15143	* which will never be JMP32.
15144	*/
15145	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
15146	insn->imm == `0` && (opcode == BPF_JEQ \|\| opcode == BPF_JNE) &&
15147	type_may_be_null(type: dst_reg->type)) {
15148	/ Mark all identical registers in each branch as either*
15149	* safe or unknown depending R == 0 or R != 0 conditional.
15150	*/
15151	mark_ptr_or_null_regs(vstate: this_branch, regno: insn->dst_reg,
15152	is_null: opcode == BPF_JNE);
15153	mark_ptr_or_null_regs(vstate: other_branch, regno: insn->dst_reg,
15154	is_null: opcode == BPF_JEQ);
15155	} else if (!try_match_pkt_pointers(insn, dst_reg, src_reg: &regs[insn->src_reg],
15156	this_branch, other_branch) &&
15157	is_pointer_value(env, regno: insn->dst_reg)) {
15158	verbose(private_data: env, fmt: "R%d pointer comparison prohibited\n",
15159	insn->dst_reg);
15160	return -EACCES;
15161	}
15162	if (env->log.level & BPF_LOG_LEVEL)
15163	print_insn_state(env, state: this_branch->frame[this_branch->curframe]);
15164	return `0`;
15165	}
15166
15167	/ verify BPF_LD_IMM64 instruction /
15168	static int check_ld_imm(struct bpf_verifier_env env, struct* bpf_insn *insn)
15169	{
15170	struct bpf_insn_aux_data *aux = cur_aux(env);
15171	struct bpf_reg_state *regs = cur_regs(env);
15172	struct bpf_reg_state *dst_reg;
15173	struct bpf_map *map;
15174	int err;
15175
15176	if (BPF_SIZE(insn->code) != BPF_DW) {
15177	verbose(private_data: env, fmt: "invalid BPF_LD_IMM insn\n");
15178	return -EINVAL;
15179	}
15180	if (insn->off != `0`) {
15181	verbose(private_data: env, fmt: "BPF_LD_IMM64 uses reserved fields\n");
15182	return -EINVAL;
15183	}
15184
15185	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP);
15186	if (err)
15187	return err;
15188
15189	dst_reg = &regs[insn->dst_reg];
15190	if (insn->src_reg == `0`) {
15191	u64 imm = ((u64)(insn + `1`)->imm << `32`) \| (u32)insn->imm;
15192
15193	dst_reg->type = SCALAR_VALUE;
15194	__mark_reg_known(reg: &regs[insn->dst_reg], imm);
15195	return `0`;
15196	}
15197
15198	/ All special src_reg cases are listed below. From this point onwards*
15199	* we either succeed and assign a corresponding dst_reg->type after
15200	* zeroing the offset, or fail and reject the program.
15201	*/
15202	mark_reg_known_zero(env, regs, regno: insn->dst_reg);
15203
15204	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
15205	dst_reg->type = aux->btf_var.reg_type;
15206	switch (base_type(type: dst_reg->type)) {
15207	case PTR_TO_MEM:
15208	dst_reg->mem_size = aux->btf_var.mem_size;
15209	break;
15210	case PTR_TO_BTF_ID:
15211	dst_reg->btf = aux->btf_var.btf;
15212	dst_reg->btf_id = aux->btf_var.btf_id;
15213	break;
15214	default:
15215	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
15216	return -EFAULT;
15217	}
15218	return `0`;
15219	}
15220
15221	if (insn->src_reg == BPF_PSEUDO_FUNC) {
15222	struct bpf_prog_aux *aux = env->prog->aux;
15223	u32 subprogno = find_subprog(env,
15224	off: env->insn_idx + insn->imm + `1`);
15225
15226	if (!aux->func_info) {
15227	verbose(private_data: env, fmt: "missing btf func_info\n");
15228	return -EINVAL;
15229	}
15230	if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
15231	verbose(private_data: env, fmt: "callback function not static\n");
15232	return -EINVAL;
15233	}
15234
15235	dst_reg->type = PTR_TO_FUNC;
15236	dst_reg->subprogno = subprogno;
15237	return `0`;
15238	}
15239
15240	map = env->used_maps[aux->map_index];
15241	dst_reg->map_ptr = map;
15242
15243	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE \|\|
15244	insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
15245	if (map->map_type == BPF_MAP_TYPE_ARENA) {
15246	__mark_reg_unknown(env, reg: dst_reg);
15247	return `0`;
15248	}
15249	dst_reg->type = PTR_TO_MAP_VALUE;
15250	dst_reg->off = aux->map_off;
15251	WARN_ON_ONCE(map->max_entries != `1`);
15252	/ We want reg->id to be same (0) as map_value is not distinct /
15253	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD \|\|
15254	insn->src_reg == BPF_PSEUDO_MAP_IDX) {
15255	dst_reg->type = CONST_PTR_TO_MAP;
15256	} else {
15257	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
15258	return -EINVAL;
15259	}
15260
15261	return `0`;
15262	}
15263
15264	static bool may_access_skb(enum bpf_prog_type type)
15265	{
15266	switch (type) {
15267	case BPF_PROG_TYPE_SOCKET_FILTER:
15268	case BPF_PROG_TYPE_SCHED_CLS:
15269	case BPF_PROG_TYPE_SCHED_ACT:
15270	return true;
15271	default:
15272	return false;
15273	}
15274	}
15275
15276	/ verify safety of LD_ABS\|LD_IND instructions:*
15277	* - they can only appear in the programs where ctx == skb
15278	* - since they are wrappers of function calls, they scratch R1-R5 registers,
15279	* preserve R6-R9, and store return value into R0
15280	*
15281	* Implicit input:
15282	* ctx == skb == R6 == CTX
15283	*
15284	* Explicit input:
15285	* SRC == any register
15286	* IMM == 32-bit immediate
15287	*
15288	* Output:
15289	* R0 - 8/16/32-bit skb data converted to cpu endianness
15290	*/
15291	static int check_ld_abs(struct bpf_verifier_env env, struct* bpf_insn *insn)
15292	{
15293	struct bpf_reg_state *regs = cur_regs(env);
15294	static const int ctx_reg = BPF_REG_6;
15295	u8 mode = BPF_MODE(insn->code);
15296	int i, err;
15297
15298	if (!may_access_skb(type: resolve_prog_type(prog: env->prog))) {
15299	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] instructions not allowed for this program type\n");
15300	return -EINVAL;
15301	}
15302
15303	if (!env->ops->gen_ld_abs) {
15304	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
15305	return -EINVAL;
15306	}
15307
15308	if (insn->dst_reg != BPF_REG_0 \|\| insn->off != `0` \|\|
15309	BPF_SIZE(insn->code) == BPF_DW \|\|
15310	(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
15311	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] uses reserved fields\n");
15312	return -EINVAL;
15313	}
15314
15315	/ check whether implicit source operand (register R6) is readable /
15316	err = check_reg_arg(env, regno: ctx_reg, t: SRC_OP);
15317	if (err)
15318	return err;
15319
15320	/ Disallow usage of BPF_LD_[ABS\|IND] with reference tracking, as*
15321	* gen_ld_abs() may terminate the program at runtime, leading to
15322	* reference leak.
15323	*/
15324	err = check_reference_leak(env, exception_exit: false);
15325	if (err) {
15326	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be mixed with socket references\n");
15327	return err;
15328	}
15329
15330	if (env->cur_state->active_lock.ptr) {
15331	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be used inside bpf_spin_lock-ed region\n");
15332	return -EINVAL;
15333	}
15334
15335	if (env->cur_state->active_rcu_lock) {
15336	verbose(private_data: env, fmt: "BPF_LD_[ABS\|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
15337	return -EINVAL;
15338	}
15339
15340	if (regs[ctx_reg].type != PTR_TO_CTX) {
15341	verbose(private_data: env,
15342	fmt: "at the time of BPF_LD_ABS\|IND R6 != pointer to skb\n");
15343	return -EINVAL;
15344	}
15345
15346	if (mode == BPF_IND) {
15347	/ check explicit source operand /
15348	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
15349	if (err)
15350	return err;
15351	}
15352
15353	err = check_ptr_off_reg(env, reg: &regs[ctx_reg], regno: ctx_reg);
15354	if (err < `0`)
15355	return err;
15356
15357	/ reset caller saved regs to unreadable /
15358	for (i = `0`; i < CALLER_SAVED_REGS; i++) {
15359	mark_reg_not_init(env, regs, regno: caller_saved[i]);
15360	check_reg_arg(env, regno: caller_saved[i], t: DST_OP_NO_MARK);
15361	}
15362
15363	/ mark destination R0 register as readable, since it contains*
15364	* the value fetched from the packet.
15365	* Already marked as written above.
15366	*/
15367	mark_reg_unknown(env, regs, regno: BPF_REG_0);
15368	/ ld_abs load up to 32-bit skb data. /
15369	regs[BPF_REG_0].subreg_def = env->insn_idx + `1`;
15370	return `0`;
15371	}
15372
15373	static int check_return_code(struct bpf_verifier_env env, int* regno, const char *reg_name)
15374	{
15375	const char *exit_ctx = "At program exit";
15376	struct tnum enforce_attach_type_range = tnum_unknown;
15377	const struct bpf_prog *prog = env->prog;
15378	struct bpf_reg_state *reg;
15379	struct bpf_retval_range range = retval_range(minval: `0`, maxval: `1`);
15380	enum bpf_prog_type prog_type = resolve_prog_type(prog: env->prog);
15381	int err;
15382	struct bpf_func_state *frame = env->cur_state->frame[`0`];
15383	const bool is_subprog = frame->subprogno;
15384
15385	/ LSM and struct_ops func-ptr's return type could be "void" /
15386	if (!is_subprog \|\| frame->in_exception_callback_fn) {
15387	switch (prog_type) {
15388	case BPF_PROG_TYPE_LSM:
15389	if (prog->expected_attach_type == BPF_LSM_CGROUP)
15390	/ See below, can be 0 or 0-1 depending on hook. /
15391	break;
15392	fallthrough;
15393	case BPF_PROG_TYPE_STRUCT_OPS:
15394	if (!prog->aux->attach_func_proto->type)
15395	return `0`;
15396	break;
15397	default:
15398	break;
15399	}
15400	}
15401
15402	/ eBPF calling convention is such that R0 is used*
15403	* to return the value from eBPF program.
15404	* Make sure that it's readable at this time
15405	* of bpf_exit, which means that program wrote
15406	* something into it earlier
15407	*/
15408	err = check_reg_arg(env, regno, t: SRC_OP);
15409	if (err)
15410	return err;
15411
15412	if (is_pointer_value(env, regno)) {
15413	verbose(private_data: env, fmt: "R%d leaks addr as return value\n", regno);
15414	return -EACCES;
15415	}
15416
15417	reg = cur_regs(env) + regno;
15418
15419	if (frame->in_async_callback_fn) {
15420	/ enforce return zero from async callbacks like timer /
15421	exit_ctx = "At async callback return";
15422	range = retval_range(minval: `0`, maxval: `0`);
15423	goto enforce_retval;
15424	}
15425
15426	if (is_subprog && !frame->in_exception_callback_fn) {
15427	if (reg->type != SCALAR_VALUE) {
15428	verbose(private_data: env, fmt: "At subprogram exit the register R%d is not a scalar value (%s)\n",
15429	regno, reg_type_str(env, type: reg->type));
15430	return -EINVAL;
15431	}
15432	return `0`;
15433	}
15434
15435	switch (prog_type) {
15436	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
15437	if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG \|\|
15438	env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG \|\|
15439	env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG \|\|
15440	env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME \|\|
15441	env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME \|\|
15442	env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME \|\|
15443	env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME \|\|
15444	env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME \|\|
15445	env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
15446	range = retval_range(minval: `1`, maxval: `1`);
15447	if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND \|\|
15448	env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
15449	range = retval_range(minval: `0`, maxval: `3`);
15450	break;
15451	case BPF_PROG_TYPE_CGROUP_SKB:
15452	if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
15453	range = retval_range(minval: `0`, maxval: `3`);
15454	enforce_attach_type_range = tnum_range(min: `2`, max: `3`);
15455	}
15456	break;
15457	case BPF_PROG_TYPE_CGROUP_SOCK:
15458	case BPF_PROG_TYPE_SOCK_OPS:
15459	case BPF_PROG_TYPE_CGROUP_DEVICE:
15460	case BPF_PROG_TYPE_CGROUP_SYSCTL:
15461	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
15462	break;
15463	case BPF_PROG_TYPE_RAW_TRACEPOINT:
15464	if (!env->prog->aux->attach_btf_id)
15465	return `0`;
15466	range = retval_range(minval: `0`, maxval: `0`);
15467	break;
15468	case BPF_PROG_TYPE_TRACING:
15469	switch (env->prog->expected_attach_type) {
15470	case BPF_TRACE_FENTRY:
15471	case BPF_TRACE_FEXIT:
15472	range = retval_range(minval: `0`, maxval: `0`);
15473	break;
15474	case BPF_TRACE_RAW_TP:
15475	case BPF_MODIFY_RETURN:
15476	return `0`;
15477	case BPF_TRACE_ITER:
15478	break;
15479	default:
15480	return -ENOTSUPP;
15481	}
15482	break;
15483	case BPF_PROG_TYPE_SK_LOOKUP:
15484	range = retval_range(minval: SK_DROP, maxval: SK_PASS);
15485	break;
15486
15487	case BPF_PROG_TYPE_LSM:
15488	if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
15489	/ Regular BPF_PROG_TYPE_LSM programs can return*
15490	* any value.
15491	*/
15492	return `0`;
15493	}
15494	if (!env->prog->aux->attach_func_proto->type) {
15495	/ Make sure programs that attach to void*
15496	* hooks don't try to modify return value.
15497	*/
15498	range = retval_range(minval: `1`, maxval: `1`);
15499	}
15500	break;
15501
15502	case BPF_PROG_TYPE_NETFILTER:
15503	range = retval_range(NF_DROP, NF_ACCEPT);
15504	break;
15505	case BPF_PROG_TYPE_EXT:
15506	/ freplace program can return anything as its return value*
15507	* depends on the to-be-replaced kernel func or bpf program.
15508	*/
15509	default:
15510	return `0`;
15511	}
15512
15513	enforce_retval:
15514	if (reg->type != SCALAR_VALUE) {
15515	verbose(private_data: env, fmt: "%s the register R%d is not a known value (%s)\n",
15516	exit_ctx, regno, reg_type_str(env, type: reg->type));
15517	return -EINVAL;
15518	}
15519
15520	err = mark_chain_precision(env, regno);
15521	if (err)
15522	return err;
15523
15524	if (!retval_range_within(range, reg)) {
15525	verbose_invalid_scalar(env, reg, range, ctx: exit_ctx, reg_name);
15526	if (!is_subprog &&
15527	prog->expected_attach_type == BPF_LSM_CGROUP &&
15528	prog_type == BPF_PROG_TYPE_LSM &&
15529	!prog->aux->attach_func_proto->type)
15530	verbose(private_data: env, fmt: "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
15531	return -EINVAL;
15532	}
15533
15534	if (!tnum_is_unknown(a: enforce_attach_type_range) &&
15535	tnum_in(a: enforce_attach_type_range, b: reg->var_off))
15536	env->prog->enforce_expected_attach_type = `1`;
15537	return `0`;
15538	}
15539
15540	/ non-recursive DFS pseudo code*
15541	* 1 procedure DFS-iterative(G,v):
15542	* 2 label v as discovered
15543	* 3 let S be a stack
15544	* 4 S.push(v)
15545	* 5 while S is not empty
15546	* 6 t <- S.peek()
15547	* 7 if t is what we're looking for:
15548	* 8 return t
15549	* 9 for all edges e in G.adjacentEdges(t) do
15550	* 10 if edge e is already labelled
15551	* 11 continue with the next edge
15552	* 12 w <- G.adjacentVertex(t,e)
15553	* 13 if vertex w is not discovered and not explored
15554	* 14 label e as tree-edge
15555	* 15 label w as discovered
15556	* 16 S.push(w)
15557	* 17 continue at 5
15558	* 18 else if vertex w is discovered
15559	* 19 label e as back-edge
15560	* 20 else
15561	* 21 // vertex w is explored
15562	* 22 label e as forward- or cross-edge
15563	* 23 label t as explored
15564	* 24 S.pop()
15565	*
15566	* convention:
15567	* 0x10 - discovered
15568	* 0x11 - discovered and fall-through edge labelled
15569	* 0x12 - discovered and fall-through and branch edges labelled
15570	* 0x20 - explored
15571	*/
15572
15573	enum {
15574	DISCOVERED = `0x10`,
15575	EXPLORED = `0x20`,
15576	FALLTHROUGH = `1`,
15577	BRANCH = `2`,
15578	};
15579
15580	static void mark_prune_point(struct bpf_verifier_env env, int* idx)
15581	{
15582	env->insn_aux_data[idx].prune_point = true;
15583	}
15584
15585	static bool is_prune_point(struct bpf_verifier_env env, int* insn_idx)
15586	{
15587	return env->insn_aux_data[insn_idx].prune_point;
15588	}
15589
15590	static void mark_force_checkpoint(struct bpf_verifier_env env, int* idx)
15591	{
15592	env->insn_aux_data[idx].force_checkpoint = true;
15593	}
15594
15595	static bool is_force_checkpoint(struct bpf_verifier_env env, int* insn_idx)
15596	{
15597	return env->insn_aux_data[insn_idx].force_checkpoint;
15598	}
15599
15600	static void mark_calls_callback(struct bpf_verifier_env env, int* idx)
15601	{
15602	env->insn_aux_data[idx].calls_callback = true;
15603	}
15604
15605	static bool calls_callback(struct bpf_verifier_env env, int* insn_idx)
15606	{
15607	return env->insn_aux_data[insn_idx].calls_callback;
15608	}
15609
15610	enum {
15611	DONE_EXPLORING = `0`,
15612	KEEP_EXPLORING = `1`,
15613	};
15614
15615	/ t, w, e - match pseudo-code above:*
15616	* t - index of current instruction
15617	* w - next instruction
15618	* e - edge
15619	*/
15620	static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
15621	{
15622	int *insn_stack = env->cfg.insn_stack;
15623	int *insn_state = env->cfg.insn_state;
15624
15625	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED \| FALLTHROUGH))
15626	return DONE_EXPLORING;
15627
15628	if (e == BRANCH && insn_state[t] >= (DISCOVERED \| BRANCH))
15629	return DONE_EXPLORING;
15630
15631	if (w < `0` \|\| w >= env->prog->len) {
15632	verbose_linfo(env, insn_off: t, prefix_fmt: "%d: ", t);
15633	verbose(private_data: env, fmt: "jump out of range from insn %d to %d\n", t, w);
15634	return -EINVAL;
15635	}
15636
15637	if (e == BRANCH) {
15638	/ mark branch target for state pruning /
15639	mark_prune_point(env, idx: w);
15640	mark_jmp_point(env, idx: w);
15641	}
15642
15643	if (insn_state[w] == `0`) {
15644	/ tree-edge /
15645	insn_state[t] = DISCOVERED \| e;
15646	insn_state[w] = DISCOVERED;
15647	if (env->cfg.cur_stack >= env->prog->len)
15648	return -E2BIG;
15649	insn_stack[env->cfg.cur_stack++] = w;
15650	return KEEP_EXPLORING;
15651	} else if ((insn_state[w] & `0xF0`) == DISCOVERED) {
15652	if (env->bpf_capable)
15653	return DONE_EXPLORING;
15654	verbose_linfo(env, insn_off: t, prefix_fmt: "%d: ", t);
15655	verbose_linfo(env, insn_off: w, prefix_fmt: "%d: ", w);
15656	verbose(private_data: env, fmt: "back-edge from insn %d to %d\n", t, w);
15657	return -EINVAL;
15658	} else if (insn_state[w] == EXPLORED) {
15659	/ forward- or cross-edge /
15660	insn_state[t] = DISCOVERED \| e;
15661	} else {
15662	verbose(private_data: env, fmt: "insn state internal bug\n");
15663	return -EFAULT;
15664	}
15665	return DONE_EXPLORING;
15666	}
15667
15668	static int visit_func_call_insn(int t, struct bpf_insn *insns,
15669	struct bpf_verifier_env *env,
15670	bool visit_callee)
15671	{
15672	int ret, insn_sz;
15673
15674	insn_sz = bpf_is_ldimm64(insn: &insns[t]) ? `2` : `1`;
15675	ret = push_insn(t, w: t + insn_sz, e: FALLTHROUGH, env);
15676	if (ret)
15677	return ret;
15678
15679	mark_prune_point(env, idx: t + insn_sz);
15680	/ when we exit from subprog, we need to record non-linear history /
15681	mark_jmp_point(env, idx: t + insn_sz);
15682
15683	if (visit_callee) {
15684	mark_prune_point(env, idx: t);
15685	ret = push_insn(t, w: t + insns[t].imm + `1`, e: BRANCH, env);
15686	}
15687	return ret;
15688	}
15689
15690	/ Visits the instruction at index t and returns one of the following:*
15691	* < 0 - an error occurred
15692	* DONE_EXPLORING - the instruction was fully explored
15693	* KEEP_EXPLORING - there is still work to be done before it is fully explored
15694	*/
15695	static int visit_insn(int t, struct bpf_verifier_env *env)
15696	{
15697	struct bpf_insn insns = env->prog->insnsi, insn = &insns[t];
15698	int ret, off, insn_sz;
15699
15700	if (bpf_pseudo_func(insn))
15701	return visit_func_call_insn(t, insns, env, visit_callee: true);
15702
15703	/ All non-branch instructions have a single fall-through edge. /
15704	if (BPF_CLASS(insn->code) != BPF_JMP &&
15705	BPF_CLASS(insn->code) != BPF_JMP32) {
15706	insn_sz = bpf_is_ldimm64(insn) ? `2` : `1`;
15707	return push_insn(t, w: t + insn_sz, e: FALLTHROUGH, env);
15708	}
15709
15710	switch (BPF_OP(insn->code)) {
15711	case BPF_EXIT:
15712	return DONE_EXPLORING;
15713
15714	case BPF_CALL:
15715	if (is_async_callback_calling_insn(insn))
15716	/ Mark this call insn as a prune point to trigger*
15717	* is_state_visited() check before call itself is
15718	* processed by __check_func_call(). Otherwise new
15719	* async state will be pushed for further exploration.
15720	*/
15721	mark_prune_point(env, idx: t);
15722	/ For functions that invoke callbacks it is not known how many times*
15723	* callback would be called. Verifier models callback calling functions
15724	* by repeatedly visiting callback bodies and returning to origin call
15725	* instruction.
15726	* In order to stop such iteration verifier needs to identify when a
15727	* state identical some state from a previous iteration is reached.
15728	* Check below forces creation of checkpoint before callback calling
15729	* instruction to allow search for such identical states.
15730	*/
15731	if (is_sync_callback_calling_insn(insn)) {
15732	mark_calls_callback(env, idx: t);
15733	mark_force_checkpoint(env, idx: t);
15734	mark_prune_point(env, idx: t);
15735	mark_jmp_point(env, idx: t);
15736	}
15737	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15738	struct bpf_kfunc_call_arg_meta meta;
15739
15740	ret = fetch_kfunc_meta(env, insn, meta: &meta, NULL);
15741	if (ret == `0` && is_iter_next_kfunc(meta: &meta)) {
15742	mark_prune_point(env, idx: t);
15743	/ Checking and saving state checkpoints at iter_next() call*
15744	* is crucial for fast convergence of open-coded iterator loop
15745	* logic, so we need to force it. If we don't do that,
15746	* is_state_visited() might skip saving a checkpoint, causing
15747	* unnecessarily long sequence of not checkpointed
15748	* instructions and jumps, leading to exhaustion of jump
15749	* history buffer, and potentially other undesired outcomes.
15750	* It is expected that with correct open-coded iterators
15751	* convergence will happen quickly, so we don't run a risk of
15752	* exhausting memory.
15753	*/
15754	mark_force_checkpoint(env, idx: t);
15755	}
15756	}
15757	return visit_func_call_insn(t, insns, env, visit_callee: insn->src_reg == BPF_PSEUDO_CALL);
15758
15759	case BPF_JA:
15760	if (BPF_SRC(insn->code) != BPF_K)
15761	return -EINVAL;
15762
15763	if (BPF_CLASS(insn->code) == BPF_JMP)
15764	off = insn->off;
15765	else
15766	off = insn->imm;
15767
15768	/ unconditional jump with single edge /
15769	ret = push_insn(t, w: t + off + `1`, e: FALLTHROUGH, env);
15770	if (ret)
15771	return ret;
15772
15773	mark_prune_point(env, idx: t + off + `1`);
15774	mark_jmp_point(env, idx: t + off + `1`);
15775
15776	return ret;
15777
15778	default:
15779	/ conditional jump with two edges /
15780	mark_prune_point(env, idx: t);
15781	if (is_may_goto_insn(insn))
15782	mark_force_checkpoint(env, idx: t);
15783
15784	ret = push_insn(t, w: t + `1`, e: FALLTHROUGH, env);
15785	if (ret)
15786	return ret;
15787
15788	return push_insn(t, w: t + insn->off + `1`, e: BRANCH, env);
15789	}
15790	}
15791
15792	/ non-recursive depth-first-search to detect loops in BPF program*
15793	* loop == back-edge in directed graph
15794	*/
15795	static int check_cfg(struct bpf_verifier_env *env)
15796	{
15797	int insn_cnt = env->prog->len;
15798	int insn_stack, insn_state;
15799	int ex_insn_beg, i, ret = `0`;
15800	bool ex_done = false;
15801
15802	insn_state = env->cfg.insn_state = kvcalloc(n: insn_cnt, size: sizeof(int), GFP_KERNEL);
15803	if (!insn_state)
15804	return -ENOMEM;
15805
15806	insn_stack = env->cfg.insn_stack = kvcalloc(n: insn_cnt, size: sizeof(int), GFP_KERNEL);
15807	if (!insn_stack) {
15808	kvfree(addr: insn_state);
15809	return -ENOMEM;
15810	}
15811
15812	insn_state[`0`] = DISCOVERED; / mark 1st insn as discovered /
15813	insn_stack[`0`] = `0`; / 0 is the first instruction /
15814	env->cfg.cur_stack = `1`;
15815
15816	walk_cfg:
15817	while (env->cfg.cur_stack > `0`) {
15818	int t = insn_stack[env->cfg.cur_stack - `1`];
15819
15820	ret = visit_insn(t, env);
15821	switch (ret) {
15822	case DONE_EXPLORING:
15823	insn_state[t] = EXPLORED;
15824	env->cfg.cur_stack--;
15825	break;
15826	case KEEP_EXPLORING:
15827	break;
15828	default:
15829	if (ret > `0`) {
15830	verbose(private_data: env, fmt: "visit_insn internal bug\n");
15831	ret = -EFAULT;
15832	}
15833	goto err_free;
15834	}
15835	}
15836
15837	if (env->cfg.cur_stack < `0`) {
15838	verbose(private_data: env, fmt: "pop stack internal bug\n");
15839	ret = -EFAULT;
15840	goto err_free;
15841	}
15842
15843	if (env->exception_callback_subprog && !ex_done) {
15844	ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
15845
15846	insn_state[ex_insn_beg] = DISCOVERED;
15847	insn_stack[`0`] = ex_insn_beg;
15848	env->cfg.cur_stack = `1`;
15849	ex_done = true;
15850	goto walk_cfg;
15851	}
15852
15853	for (i = `0`; i < insn_cnt; i++) {
15854	struct bpf_insn *insn = &env->prog->insnsi[i];
15855
15856	if (insn_state[i] != EXPLORED) {
15857	verbose(private_data: env, fmt: "unreachable insn %d\n", i);
15858	ret = -EINVAL;
15859	goto err_free;
15860	}
15861	if (bpf_is_ldimm64(insn)) {
15862	if (insn_state[i + `1`] != `0`) {
15863	verbose(private_data: env, fmt: "jump into the middle of ldimm64 insn %d\n", i);
15864	ret = -EINVAL;
15865	goto err_free;
15866	}
15867	i++; / skip second half of ldimm64 /
15868	}
15869	}
15870	ret = `0`; / cfg looks good /
15871
15872	err_free:
15873	kvfree(addr: insn_state);
15874	kvfree(addr: insn_stack);
15875	env->cfg.insn_state = env->cfg.insn_stack = NULL;
15876	return ret;
15877	}
15878
15879	static int check_abnormal_return(struct bpf_verifier_env *env)
15880	{
15881	int i;
15882
15883	for (i = `1`; i < env->subprog_cnt; i++) {
15884	if (env->subprog_info[i].has_ld_abs) {
15885	verbose(private_data: env, fmt: "LD_ABS is not allowed in subprogs without BTF\n");
15886	return -EINVAL;
15887	}
15888	if (env->subprog_info[i].has_tail_call) {
15889	verbose(private_data: env, fmt: "tail_call is not allowed in subprogs without BTF\n");
15890	return -EINVAL;
15891	}
15892	}
15893	return `0`;
15894	}
15895
15896	/ The minimum supported BTF func info size /
15897	#define MIN_BPF_FUNCINFO_SIZE 8
15898	#define MAX_FUNCINFO_REC_SIZE 252
15899
15900	static int check_btf_func_early(struct bpf_verifier_env *env,
15901	const union bpf_attr *attr,
15902	bpfptr_t uattr)
15903	{
15904	u32 krec_size = sizeof(struct bpf_func_info);
15905	const struct btf_type type, func_proto;
15906	u32 i, nfuncs, urec_size, min_size;
15907	struct bpf_func_info *krecord;
15908	struct bpf_prog *prog;
15909	const struct btf *btf;
15910	u32 prev_offset = `0`;
15911	bpfptr_t urecord;
15912	int ret = -ENOMEM;
15913
15914	nfuncs = attr->func_info_cnt;
15915	if (!nfuncs) {
15916	if (check_abnormal_return(env))
15917	return -EINVAL;
15918	return `0`;
15919	}
15920
15921	urec_size = attr->func_info_rec_size;
15922	if (urec_size < MIN_BPF_FUNCINFO_SIZE \|\|
15923	urec_size > MAX_FUNCINFO_REC_SIZE \|\|
15924	urec_size % sizeof(u32)) {
15925	verbose(private_data: env, fmt: "invalid func info rec size %u\n", urec_size);
15926	return -EINVAL;
15927	}
15928
15929	prog = env->prog;
15930	btf = prog->aux->btf;
15931
15932	urecord = make_bpfptr(addr: attr->func_info, is_kernel: uattr.is_kernel);
15933	min_size = min_t(u32, krec_size, urec_size);
15934
15935	krecord = kvcalloc(n: nfuncs, size: krec_size, GFP_KERNEL \| __GFP_NOWARN);
15936	if (!krecord)
15937	return -ENOMEM;
15938
15939	for (i = `0`; i < nfuncs; i++) {
15940	ret = bpf_check_uarg_tail_zero(uaddr: urecord, expected_size: krec_size, actual_size: urec_size);
15941	if (ret) {
15942	if (ret == -E2BIG) {
15943	verbose(private_data: env, fmt: "nonzero tailing record in func info");
15944	/ set the size kernel expects so loader can zero*
15945	* out the rest of the record.
15946	*/
15947	if (copy_to_bpfptr_offset(dst: uattr,
15948	offsetof(union bpf_attr, func_info_rec_size),
15949	src: &min_size, size: sizeof(min_size)))
15950	ret = -EFAULT;
15951	}
15952	goto err_free;
15953	}
15954
15955	if (copy_from_bpfptr(dst: &krecord[i], src: urecord, size: min_size)) {
15956	ret = -EFAULT;
15957	goto err_free;
15958	}
15959
15960	/ check insn_off /
15961	ret = -EINVAL;
15962	if (i == `0`) {
15963	if (krecord[i].insn_off) {
15964	verbose(private_data: env,
15965	fmt: "nonzero insn_off %u for the first func info record",
15966	krecord[i].insn_off);
15967	goto err_free;
15968	}
15969	} else if (krecord[i].insn_off <= prev_offset) {
15970	verbose(private_data: env,
15971	fmt: "same or smaller insn offset (%u) than previous func info record (%u)",
15972	krecord[i].insn_off, prev_offset);
15973	goto err_free;
15974	}
15975
15976	/ check type_id /
15977	type = btf_type_by_id(btf, type_id: krecord[i].type_id);
15978	if (!type \|\| !btf_type_is_func(t: type)) {
15979	verbose(private_data: env, fmt: "invalid type id %d in func info",
15980	krecord[i].type_id);
15981	goto err_free;
15982	}
15983
15984	func_proto = btf_type_by_id(btf, type_id: type->type);
15985	if (unlikely(!func_proto \|\| !btf_type_is_func_proto(func_proto)))
15986	/ btf_func_check() already verified it during BTF load /
15987	goto err_free;
15988
15989	prev_offset = krecord[i].insn_off;
15990	bpfptr_add(bpfptr: &urecord, val: urec_size);
15991	}
15992
15993	prog->aux->func_info = krecord;
15994	prog->aux->func_info_cnt = nfuncs;
15995	return `0`;
15996
15997	err_free:
15998	kvfree(addr: krecord);
15999	return ret;
16000	}
16001
16002	static int check_btf_func(struct bpf_verifier_env *env,
16003	const union bpf_attr *attr,
16004	bpfptr_t uattr)
16005	{
16006	const struct btf_type type, func_proto, *ret_type;
16007	u32 i, nfuncs, urec_size;
16008	struct bpf_func_info *krecord;
16009	struct bpf_func_info_aux *info_aux = NULL;
16010	struct bpf_prog *prog;
16011	const struct btf *btf;
16012	bpfptr_t urecord;
16013	bool scalar_return;
16014	int ret = -ENOMEM;
16015
16016	nfuncs = attr->func_info_cnt;
16017	if (!nfuncs) {
16018	if (check_abnormal_return(env))
16019	return -EINVAL;
16020	return `0`;
16021	}
16022	if (nfuncs != env->subprog_cnt) {
16023	verbose(private_data: env, fmt: "number of funcs in func_info doesn't match number of subprogs\n");
16024	return -EINVAL;
16025	}
16026
16027	urec_size = attr->func_info_rec_size;
16028
16029	prog = env->prog;
16030	btf = prog->aux->btf;
16031
16032	urecord = make_bpfptr(addr: attr->func_info, is_kernel: uattr.is_kernel);
16033
16034	krecord = prog->aux->func_info;
16035	info_aux = kcalloc(n: nfuncs, size: sizeof(*info_aux), GFP_KERNEL \| __GFP_NOWARN);
16036	if (!info_aux)
16037	return -ENOMEM;
16038
16039	for (i = `0`; i < nfuncs; i++) {
16040	/ check insn_off /
16041	ret = -EINVAL;
16042
16043	if (env->subprog_info[i].start != krecord[i].insn_off) {
16044	verbose(private_data: env, fmt: "func_info BTF section doesn't match subprog layout in BPF program\n");
16045	goto err_free;
16046	}
16047
16048	/ Already checked type_id /
16049	type = btf_type_by_id(btf, type_id: krecord[i].type_id);
16050	info_aux[i].linkage = BTF_INFO_VLEN(type->info);
16051	/ Already checked func_proto /
16052	func_proto = btf_type_by_id(btf, type_id: type->type);
16053
16054	ret_type = btf_type_skip_modifiers(btf, id: func_proto->type, NULL);
16055	scalar_return =
16056	btf_type_is_small_int(t: ret_type) \|\| btf_is_any_enum(t: ret_type);
16057	if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
16058	verbose(private_data: env, fmt: "LD_ABS is only allowed in functions that return 'int'.\n");
16059	goto err_free;
16060	}
16061	if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
16062	verbose(private_data: env, fmt: "tail_call is only allowed in functions that return 'int'.\n");
16063	goto err_free;
16064	}
16065
16066	bpfptr_add(bpfptr: &urecord, val: urec_size);
16067	}
16068
16069	prog->aux->func_info_aux = info_aux;
16070	return `0`;
16071
16072	err_free:
16073	kfree(objp: info_aux);
16074	return ret;
16075	}
16076
16077	static void adjust_btf_func(struct bpf_verifier_env *env)
16078	{
16079	struct bpf_prog_aux *aux = env->prog->aux;
16080	int i;
16081
16082	if (!aux->func_info)
16083	return;
16084
16085	/ func_info is not available for hidden subprogs /
16086	for (i = `0`; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16087	aux->func_info[i].insn_off = env->subprog_info[i].start;
16088	}
16089
16090	#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
16091	#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
16092
16093	static int check_btf_line(struct bpf_verifier_env *env,
16094	const union bpf_attr *attr,
16095	bpfptr_t uattr)
16096	{
16097	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = `0`;
16098	struct bpf_subprog_info *sub;
16099	struct bpf_line_info *linfo;
16100	struct bpf_prog *prog;
16101	const struct btf *btf;
16102	bpfptr_t ulinfo;
16103	int err;
16104
16105	nr_linfo = attr->line_info_cnt;
16106	if (!nr_linfo)
16107	return `0`;
16108	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
16109	return -EINVAL;
16110
16111	rec_size = attr->line_info_rec_size;
16112	if (rec_size < MIN_BPF_LINEINFO_SIZE \|\|
16113	rec_size > MAX_LINEINFO_REC_SIZE \|\|
16114	rec_size & (sizeof(u32) - `1`))
16115	return -EINVAL;
16116
16117	/ Need to zero it in case the userspace may*
16118	* pass in a smaller bpf_line_info object.
16119	*/
16120	linfo = kvcalloc(n: nr_linfo, size: sizeof(struct bpf_line_info),
16121	GFP_KERNEL \| __GFP_NOWARN);
16122	if (!linfo)
16123	return -ENOMEM;
16124
16125	prog = env->prog;
16126	btf = prog->aux->btf;
16127
16128	s = `0`;
16129	sub = env->subprog_info;
16130	ulinfo = make_bpfptr(addr: attr->line_info, is_kernel: uattr.is_kernel);
16131	expected_size = sizeof(struct bpf_line_info);
16132	ncopy = min_t(u32, expected_size, rec_size);
16133	for (i = `0`; i < nr_linfo; i++) {
16134	err = bpf_check_uarg_tail_zero(uaddr: ulinfo, expected_size, actual_size: rec_size);
16135	if (err) {
16136	if (err == -E2BIG) {
16137	verbose(private_data: env, fmt: "nonzero tailing record in line_info");
16138	if (copy_to_bpfptr_offset(dst: uattr,
16139	offsetof(union bpf_attr, line_info_rec_size),
16140	src: &expected_size, size: sizeof(expected_size)))
16141	err = -EFAULT;
16142	}
16143	goto err_free;
16144	}
16145
16146	if (copy_from_bpfptr(dst: &linfo[i], src: ulinfo, size: ncopy)) {
16147	err = -EFAULT;
16148	goto err_free;
16149	}
16150
16151	/*
16152	* Check insn_off to ensure
16153	* 1) strictly increasing AND
16154	* 2) bounded by prog->len
16155	*
16156	* The linfo[0].insn_off == 0 check logically falls into
16157	* the later "missing bpf_line_info for func..." case
16158	* because the first linfo[0].insn_off must be the
16159	* first sub also and the first sub must have
16160	* subprog_info[0].start == 0.
16161	*/
16162	if ((i && linfo[i].insn_off <= prev_offset) \|\|
16163	linfo[i].insn_off >= prog->len) {
16164	verbose(private_data: env, fmt: "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
16165	i, linfo[i].insn_off, prev_offset,
16166	prog->len);
16167	err = -EINVAL;
16168	goto err_free;
16169	}
16170
16171	if (!prog->insnsi[linfo[i].insn_off].code) {
16172	verbose(private_data: env,
16173	fmt: "Invalid insn code at line_info[%u].insn_off\n",
16174	i);
16175	err = -EINVAL;
16176	goto err_free;
16177	}
16178
16179	if (!btf_name_by_offset(btf, offset: linfo[i].line_off) \|\|
16180	!btf_name_by_offset(btf, offset: linfo[i].file_name_off)) {
16181	verbose(private_data: env, fmt: "Invalid line_info[%u].line_off or .file_name_off\n", i);
16182	err = -EINVAL;
16183	goto err_free;
16184	}
16185
16186	if (s != env->subprog_cnt) {
16187	if (linfo[i].insn_off == sub[s].start) {
16188	sub[s].linfo_idx = i;
16189	s++;
16190	} else if (sub[s].start < linfo[i].insn_off) {
16191	verbose(private_data: env, fmt: "missing bpf_line_info for func#%u\n", s);
16192	err = -EINVAL;
16193	goto err_free;
16194	}
16195	}
16196
16197	prev_offset = linfo[i].insn_off;
16198	bpfptr_add(bpfptr: &ulinfo, val: rec_size);
16199	}
16200
16201	if (s != env->subprog_cnt) {
16202	verbose(private_data: env, fmt: "missing bpf_line_info for %u funcs starting from func#%u\n",
16203	env->subprog_cnt - s, s);
16204	err = -EINVAL;
16205	goto err_free;
16206	}
16207
16208	prog->aux->linfo = linfo;
16209	prog->aux->nr_linfo = nr_linfo;
16210
16211	return `0`;
16212
16213	err_free:
16214	kvfree(addr: linfo);
16215	return err;
16216	}
16217
16218	#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
16219	#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
16220
16221	static int check_core_relo(struct bpf_verifier_env *env,
16222	const union bpf_attr *attr,
16223	bpfptr_t uattr)
16224	{
16225	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
16226	struct bpf_core_relo core_relo = {};
16227	struct bpf_prog *prog = env->prog;
16228	const struct btf *btf = prog->aux->btf;
16229	struct bpf_core_ctx ctx = {
16230	.log = &env->log,
16231	.btf = btf,
16232	};
16233	bpfptr_t u_core_relo;
16234	int err;
16235
16236	nr_core_relo = attr->core_relo_cnt;
16237	if (!nr_core_relo)
16238	return `0`;
16239	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
16240	return -EINVAL;
16241
16242	rec_size = attr->core_relo_rec_size;
16243	if (rec_size < MIN_CORE_RELO_SIZE \|\|
16244	rec_size > MAX_CORE_RELO_SIZE \|\|
16245	rec_size % sizeof(u32))
16246	return -EINVAL;
16247
16248	u_core_relo = make_bpfptr(addr: attr->core_relos, is_kernel: uattr.is_kernel);
16249	expected_size = sizeof(struct bpf_core_relo);
16250	ncopy = min_t(u32, expected_size, rec_size);
16251
16252	/ Unlike func_info and line_info, copy and apply each CO-RE*
16253	* relocation record one at a time.
16254	*/
16255	for (i = `0`; i < nr_core_relo; i++) {
16256	/ future proofing when sizeof(bpf_core_relo) changes /
16257	err = bpf_check_uarg_tail_zero(uaddr: u_core_relo, expected_size, actual_size: rec_size);
16258	if (err) {
16259	if (err == -E2BIG) {
16260	verbose(private_data: env, fmt: "nonzero tailing record in core_relo");
16261	if (copy_to_bpfptr_offset(dst: uattr,
16262	offsetof(union bpf_attr, core_relo_rec_size),
16263	src: &expected_size, size: sizeof(expected_size)))
16264	err = -EFAULT;
16265	}
16266	break;
16267	}
16268
16269	if (copy_from_bpfptr(dst: &core_relo, src: u_core_relo, size: ncopy)) {
16270	err = -EFAULT;
16271	break;
16272	}
16273
16274	if (core_relo.insn_off % `8` \|\| core_relo.insn_off / `8` >= prog->len) {
16275	verbose(private_data: env, fmt: "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
16276	i, core_relo.insn_off, prog->len);
16277	err = -EINVAL;
16278	break;
16279	}
16280
16281	err = bpf_core_apply(ctx: &ctx, relo: &core_relo, relo_idx: i,
16282	insn: &prog->insnsi[core_relo.insn_off / `8`]);
16283	if (err)
16284	break;
16285	bpfptr_add(bpfptr: &u_core_relo, val: rec_size);
16286	}
16287	return err;
16288	}
16289
16290	static int check_btf_info_early(struct bpf_verifier_env *env,
16291	const union bpf_attr *attr,
16292	bpfptr_t uattr)
16293	{
16294	struct btf *btf;
16295	int err;
16296
16297	if (!attr->func_info_cnt && !attr->line_info_cnt) {
16298	if (check_abnormal_return(env))
16299	return -EINVAL;
16300	return `0`;
16301	}
16302
16303	btf = btf_get_by_fd(fd: attr->prog_btf_fd);
16304	if (IS_ERR(ptr: btf))
16305	return PTR_ERR(ptr: btf);
16306	if (btf_is_kernel(btf)) {
16307	btf_put(btf);
16308	return -EACCES;
16309	}
16310	env->prog->aux->btf = btf;
16311
16312	err = check_btf_func_early(env, attr, uattr);
16313	if (err)
16314	return err;
16315	return `0`;
16316	}
16317
16318	static int check_btf_info(struct bpf_verifier_env *env,
16319	const union bpf_attr *attr,
16320	bpfptr_t uattr)
16321	{
16322	int err;
16323
16324	if (!attr->func_info_cnt && !attr->line_info_cnt) {
16325	if (check_abnormal_return(env))
16326	return -EINVAL;
16327	return `0`;
16328	}
16329
16330	err = check_btf_func(env, attr, uattr);
16331	if (err)
16332	return err;
16333
16334	err = check_btf_line(env, attr, uattr);
16335	if (err)
16336	return err;
16337
16338	err = check_core_relo(env, attr, uattr);
16339	if (err)
16340	return err;
16341
16342	return `0`;
16343	}
16344
16345	/ check %cur's range satisfies %old's /
16346	static bool range_within(const struct bpf_reg_state *old,
16347	const struct bpf_reg_state *cur)
16348	{
16349	return old->umin_value <= cur->umin_value &&
16350	old->umax_value >= cur->umax_value &&
16351	old->smin_value <= cur->smin_value &&
16352	old->smax_value >= cur->smax_value &&
16353	old->u32_min_value <= cur->u32_min_value &&
16354	old->u32_max_value >= cur->u32_max_value &&
16355	old->s32_min_value <= cur->s32_min_value &&
16356	old->s32_max_value >= cur->s32_max_value;
16357	}
16358
16359	/ If in the old state two registers had the same id, then they need to have*
16360	* the same id in the new state as well. But that id could be different from
16361	* the old state, so we need to track the mapping from old to new ids.
16362	* Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
16363	* regs with old id 5 must also have new id 9 for the new state to be safe. But
16364	* regs with a different old id could still have new id 9, we don't care about
16365	* that.
16366	* So we look through our idmap to see if this old id has been seen before. If
16367	* so, we require the new id to match; otherwise, we add the id pair to the map.
16368	*/
16369	static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16370	{
16371	struct bpf_id_pair *map = idmap->map;
16372	unsigned int i;
16373
16374	/ either both IDs should be set or both should be zero /
16375	if (!!old_id != !!cur_id)
16376	return false;
16377
16378	if (old_id == `0`) / cur_id == 0 as well /
16379	return true;
16380
16381	for (i = `0`; i < BPF_ID_MAP_SIZE; i++) {
16382	if (!map[i].old) {
16383	/ Reached an empty slot; haven't seen this id before /
16384	map[i].old = old_id;
16385	map[i].cur = cur_id;
16386	return true;
16387	}
16388	if (map[i].old == old_id)
16389	return map[i].cur == cur_id;
16390	if (map[i].cur == cur_id)
16391	return false;
16392	}
16393	/ We ran out of idmap slots, which should be impossible /
16394	WARN_ON_ONCE(`1`);
16395	return false;
16396	}
16397
16398	/ Similar to check_ids(), but allocate a unique temporary ID*
16399	* for 'old_id' or 'cur_id' of zero.
16400	* This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
16401	*/
16402	static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16403	{
16404	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
16405	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
16406
16407	return check_ids(old_id, cur_id, idmap);
16408	}
16409
16410	static void clean_func_state(struct bpf_verifier_env *env,
16411	struct bpf_func_state *st)
16412	{
16413	enum bpf_reg_liveness live;
16414	int i, j;
16415
16416	for (i = `0`; i < BPF_REG_FP; i++) {
16417	live = st->regs[i].live;
16418	/ liveness must not touch this register anymore /
16419	st->regs[i].live \|= REG_LIVE_DONE;
16420	if (!(live & REG_LIVE_READ))
16421	/ since the register is unused, clear its state*
16422	* to make further comparison simpler
16423	*/
16424	__mark_reg_not_init(env, reg: &st->regs[i]);
16425	}
16426
16427	for (i = `0`; i < st->allocated_stack / BPF_REG_SIZE; i++) {
16428	live = st->stack[i].spilled_ptr.live;
16429	/ liveness must not touch this stack slot anymore /
16430	st->stack[i].spilled_ptr.live \|= REG_LIVE_DONE;
16431	if (!(live & REG_LIVE_READ)) {
16432	__mark_reg_not_init(env, reg: &st->stack[i].spilled_ptr);
16433	for (j = `0`; j < BPF_REG_SIZE; j++)
16434	st->stack[i].slot_type[j] = STACK_INVALID;
16435	}
16436	}
16437	}
16438
16439	static void clean_verifier_state(struct bpf_verifier_env *env,
16440	struct bpf_verifier_state *st)
16441	{
16442	int i;
16443
16444	if (st->frame[`0`]->regs[`0`].live & REG_LIVE_DONE)
16445	/ all regs in this state in all frames were already marked /
16446	return;
16447
16448	for (i = `0`; i <= st->curframe; i++)
16449	clean_func_state(env, st: st->frame[i]);
16450	}
16451
16452	/ the parentage chains form a tree.*
16453	* the verifier states are added to state lists at given insn and
16454	* pushed into state stack for future exploration.
16455	* when the verifier reaches bpf_exit insn some of the verifer states
16456	* stored in the state lists have their final liveness state already,
16457	* but a lot of states will get revised from liveness point of view when
16458	* the verifier explores other branches.
16459	* Example:
16460	* 1: r0 = 1
16461	* 2: if r1 == 100 goto pc+1
16462	* 3: r0 = 2
16463	* 4: exit
16464	* when the verifier reaches exit insn the register r0 in the state list of
16465	* insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
16466	* of insn 2 and goes exploring further. At the insn 4 it will walk the
16467	* parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
16468	*
16469	* Since the verifier pushes the branch states as it sees them while exploring
16470	* the program the condition of walking the branch instruction for the second
16471	* time means that all states below this branch were already explored and
16472	* their final liveness marks are already propagated.
16473	* Hence when the verifier completes the search of state list in is_state_visited()
16474	* we can call this clean_live_states() function to mark all liveness states
16475	* as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
16476	* will not be used.
16477	* This function also clears the registers and stack for states that !READ
16478	* to simplify state merging.
16479	*
16480	* Important note here that walking the same branch instruction in the callee
16481	* doesn't meant that the states are DONE. The verifier has to compare
16482	* the callsites
16483	*/
16484	static void clean_live_states(struct bpf_verifier_env env, int* insn,
16485	struct bpf_verifier_state *cur)
16486	{
16487	struct bpf_verifier_state_list *sl;
16488
16489	sl = *explored_state(env, idx: insn);
16490	while (sl) {
16491	if (sl->state.branches)
16492	goto next;
16493	if (sl->state.insn_idx != insn \|\|
16494	!same_callsites(a: &sl->state, b: cur))
16495	goto next;
16496	clean_verifier_state(env, st: &sl->state);
16497	next:
16498	sl = sl->next;
16499	}
16500	}
16501
16502	static bool regs_exact(const struct bpf_reg_state *rold,
16503	const struct bpf_reg_state *rcur,
16504	struct bpf_idmap *idmap)
16505	{
16506	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, id)) == `0` &&
16507	check_ids(old_id: rold->id, cur_id: rcur->id, idmap) &&
16508	check_ids(old_id: rold->ref_obj_id, cur_id: rcur->ref_obj_id, idmap);
16509	}
16510
16511	enum exact_level {
16512	NOT_EXACT,
16513	EXACT,
16514	RANGE_WITHIN
16515	};
16516
16517	/ Returns true if (rold safe implies rcur safe) /
16518	static bool regsafe(struct bpf_verifier_env env, struct* bpf_reg_state *rold,
16519	struct bpf_reg_state rcur, struct* bpf_idmap *idmap,
16520	enum exact_level exact)
16521	{
16522	if (exact == EXACT)
16523	return regs_exact(rold, rcur, idmap);
16524
16525	if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
16526	/ explored state didn't use this /
16527	return true;
16528	if (rold->type == NOT_INIT) {
16529	if (exact == NOT_EXACT \|\| rcur->type == NOT_INIT)
16530	/ explored state can't have used this /
16531	return true;
16532	}
16533
16534	/ Enforce that register types have to match exactly, including their*
16535	* modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
16536	* rule.
16537	*
16538	* One can make a point that using a pointer register as unbounded
16539	* SCALAR would be technically acceptable, but this could lead to
16540	* pointer leaks because scalars are allowed to leak while pointers
16541	* are not. We could make this safe in special cases if root is
16542	* calling us, but it's probably not worth the hassle.
16543	*
16544	* Also, register types that are not MAYBE_NULL could technically be
16545	* safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
16546	* is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
16547	* to the same map).
16548	* However, if the old MAYBE_NULL register then got NULL checked,
16549	* doing so could have affected others with the same id, and we can't
16550	* check for that because we lost the id when we converted to
16551	* a non-MAYBE_NULL variant.
16552	* So, as a general rule we don't allow mixing MAYBE_NULL and
16553	* non-MAYBE_NULL registers as well.
16554	*/
16555	if (rold->type != rcur->type)
16556	return false;
16557
16558	switch (base_type(type: rold->type)) {
16559	case SCALAR_VALUE:
16560	if (env->explore_alu_limits) {
16561	/ explore_alu_limits disables tnum_in() and range_within()*
16562	* logic and requires everything to be strict
16563	*/
16564	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, id)) == `0` &&
16565	check_scalar_ids(old_id: rold->id, cur_id: rcur->id, idmap);
16566	}
16567	if (!rold->precise && exact == NOT_EXACT)
16568	return true;
16569	/ Why check_ids() for scalar registers?*
16570	*
16571	* Consider the following BPF code:
16572	* 1: r6 = ... unbound scalar, ID=a ...
16573	* 2: r7 = ... unbound scalar, ID=b ...
16574	* 3: if (r6 > r7) goto +1
16575	* 4: r6 = r7
16576	* 5: if (r6 > X) goto ...
16577	* 6: ... memory operation using r7 ...
16578	*
16579	* First verification path is [1-6]:
16580	* - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
16581	* - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
16582	* r7 <= X, because r6 and r7 share same id.
16583	* Next verification path is [1-4, 6].
16584	*
16585	* Instruction (6) would be reached in two states:
16586	* I. r6{.id=b}, r7{.id=b} via path 1-6;
16587	* II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
16588	*
16589	* Use check_ids() to distinguish these states.
16590	* ---
16591	* Also verify that new value satisfies old value range knowledge.
16592	*/
16593	return range_within(old: rold, cur: rcur) &&
16594	tnum_in(a: rold->var_off, b: rcur->var_off) &&
16595	check_scalar_ids(old_id: rold->id, cur_id: rcur->id, idmap);
16596	case PTR_TO_MAP_KEY:
16597	case PTR_TO_MAP_VALUE:
16598	case PTR_TO_MEM:
16599	case PTR_TO_BUF:
16600	case PTR_TO_TP_BUFFER:
16601	/ If the new min/max/var_off satisfy the old ones and*
16602	* everything else matches, we are OK.
16603	*/
16604	return memcmp(p: rold, q: rcur, offsetof(struct bpf_reg_state, var_off)) == `0` &&
16605	range_within(old: rold, cur: rcur) &&
16606	tnum_in(a: rold->var_off, b: rcur->var_off) &&
16607	check_ids(old_id: rold->id, cur_id: rcur->id, idmap) &&
16608	check_ids(old_id: rold->ref_obj_id, cur_id: rcur->ref_obj_id, idmap);
16609	case PTR_TO_PACKET_META:
16610	case PTR_TO_PACKET:
16611	/ We must have at least as much range as the old ptr*
16612	* did, so that any accesses which were safe before are
16613	* still safe. This is true even if old range < old off,
16614	* since someone could have accessed through (ptr - k), or
16615	* even done ptr -= k in a register, to get a safe access.
16616	*/
16617	if (rold->range > rcur->range)
16618	return false;
16619	/ If the offsets don't match, we can't trust our alignment;*
16620	* nor can we be sure that we won't fall out of range.
16621	*/
16622	if (rold->off != rcur->off)
16623	return false;
16624	/ id relations must be preserved /
16625	if (!check_ids(old_id: rold->id, cur_id: rcur->id, idmap))
16626	return false;
16627	/ new val must satisfy old val knowledge /
16628	return range_within(old: rold, cur: rcur) &&
16629	tnum_in(a: rold->var_off, b: rcur->var_off);
16630	case PTR_TO_STACK:
16631	/ two stack pointers are equal only if they're pointing to*
16632	* the same stack frame, since fp-8 in foo != fp-8 in bar
16633	*/
16634	return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
16635	case PTR_TO_ARENA:
16636	return true;
16637	default:
16638	return regs_exact(rold, rcur, idmap);
16639	}
16640	}
16641
16642	static struct bpf_reg_state unbound_reg;
16643
16644	static __init int unbound_reg_init(void)
16645	{
16646	__mark_reg_unknown_imprecise(reg: &unbound_reg);
16647	unbound_reg.live \|= REG_LIVE_READ;
16648	return `0`;
16649	}
16650	late_initcall(unbound_reg_init);
16651
16652	static bool is_stack_all_misc(struct bpf_verifier_env *env,
16653	struct bpf_stack_state *stack)
16654	{
16655	u32 i;
16656
16657	for (i = `0`; i < ARRAY_SIZE(stack->slot_type); ++i) {
16658	if ((stack->slot_type[i] == STACK_MISC) \|\|
16659	(stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
16660	continue;
16661	return false;
16662	}
16663
16664	return true;
16665	}
16666
16667	static struct bpf_reg_state scalar_reg_for_stack(struct* bpf_verifier_env *env,
16668	struct bpf_stack_state *stack)
16669	{
16670	if (is_spilled_scalar_reg64(stack))
16671	return &stack->spilled_ptr;
16672
16673	if (is_stack_all_misc(env, stack))
16674	return &unbound_reg;
16675
16676	return NULL;
16677	}
16678
16679	static bool stacksafe(struct bpf_verifier_env env, struct* bpf_func_state *old,
16680	struct bpf_func_state cur, struct* bpf_idmap *idmap,
16681	enum exact_level exact)
16682	{
16683	int i, spi;
16684
16685	/ walk slots of the explored stack and ignore any additional*
16686	* slots in the current stack, since explored(safe) state
16687	* didn't use them
16688	*/
16689	for (i = `0`; i < old->allocated_stack; i++) {
16690	struct bpf_reg_state old_reg, cur_reg;
16691
16692	spi = i / BPF_REG_SIZE;
16693
16694	if (exact != NOT_EXACT &&
16695	old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16696	cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16697	return false;
16698
16699	if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
16700	&& exact == NOT_EXACT) {
16701	i += BPF_REG_SIZE - `1`;
16702	/ explored state didn't use this /
16703	continue;
16704	}
16705
16706	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
16707	continue;
16708
16709	if (env->allow_uninit_stack &&
16710	old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
16711	continue;
16712
16713	/ explored stack has more populated slots than current stack*
16714	* and these slots were used
16715	*/
16716	if (i >= cur->allocated_stack)
16717	return false;
16718
16719	/ 64-bit scalar spill vs all slots MISC and vice versa.*
16720	* Load from all slots MISC produces unbound scalar.
16721	* Construct a fake register for such stack and call
16722	* regsafe() to ensure scalar ids are compared.
16723	*/
16724	old_reg = scalar_reg_for_stack(env, stack: &old->stack[spi]);
16725	cur_reg = scalar_reg_for_stack(env, stack: &cur->stack[spi]);
16726	if (old_reg && cur_reg) {
16727	if (!regsafe(env, rold: old_reg, rcur: cur_reg, idmap, exact))
16728	return false;
16729	i += BPF_REG_SIZE - `1`;
16730	continue;
16731	}
16732
16733	/ if old state was safe with misc data in the stack*
16734	* it will be safe with zero-initialized stack.
16735	* The opposite is not true
16736	*/
16737	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
16738	cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
16739	continue;
16740	if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16741	cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16742	/ Ex: old explored (safe) state has STACK_SPILL in*
16743	* this stack slot, but current has STACK_MISC ->
16744	* this verifier states are not equivalent,
16745	* return false to continue verification of this path
16746	*/
16747	return false;
16748	if (i % BPF_REG_SIZE != BPF_REG_SIZE - `1`)
16749	continue;
16750	/ Both old and cur are having same slot_type /
16751	switch (old->stack[spi].slot_type[BPF_REG_SIZE - `1`]) {
16752	case STACK_SPILL:
16753	/ when explored and current stack slot are both storing*
16754	* spilled registers, check that stored pointers types
16755	* are the same as well.
16756	* Ex: explored safe path could have stored
16757	* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
16758	* but current path has stored:
16759	* (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
16760	* such verifier states are not equivalent.
16761	* return false to continue verification of this path
16762	*/
16763	if (!regsafe(env, rold: &old->stack[spi].spilled_ptr,
16764	rcur: &cur->stack[spi].spilled_ptr, idmap, exact))
16765	return false;
16766	break;
16767	case STACK_DYNPTR:
16768	old_reg = &old->stack[spi].spilled_ptr;
16769	cur_reg = &cur->stack[spi].spilled_ptr;
16770	if (old_reg->dynptr.type != cur_reg->dynptr.type \|\|
16771	old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot \|\|
16772	!check_ids(old_id: old_reg->ref_obj_id, cur_id: cur_reg->ref_obj_id, idmap))
16773	return false;
16774	break;
16775	case STACK_ITER:
16776	old_reg = &old->stack[spi].spilled_ptr;
16777	cur_reg = &cur->stack[spi].spilled_ptr;
16778	/ iter.depth is not compared between states as it*
16779	* doesn't matter for correctness and would otherwise
16780	* prevent convergence; we maintain it only to prevent
16781	* infinite loop check triggering, see
16782	* iter_active_depths_differ()
16783	*/
16784	if (old_reg->iter.btf != cur_reg->iter.btf \|\|
16785	old_reg->iter.btf_id != cur_reg->iter.btf_id \|\|
16786	old_reg->iter.state != cur_reg->iter.state \|\|
16787	/ ignore {old_reg,cur_reg}->iter.depth, see above /
16788	!check_ids(old_id: old_reg->ref_obj_id, cur_id: cur_reg->ref_obj_id, idmap))
16789	return false;
16790	break;
16791	case STACK_MISC:
16792	case STACK_ZERO:
16793	case STACK_INVALID:
16794	continue;
16795	/ Ensure that new unhandled slot types return false by default /
16796	default:
16797	return false;
16798	}
16799	}
16800	return true;
16801	}
16802
16803	static bool refsafe(struct bpf_func_state old, struct* bpf_func_state *cur,
16804	struct bpf_idmap *idmap)
16805	{
16806	int i;
16807
16808	if (old->acquired_refs != cur->acquired_refs)
16809	return false;
16810
16811	for (i = `0`; i < old->acquired_refs; i++) {
16812	if (!check_ids(old_id: old->refs[i].id, cur_id: cur->refs[i].id, idmap))
16813	return false;
16814	}
16815
16816	return true;
16817	}
16818
16819	/ compare two verifier states*
16820	*
16821	* all states stored in state_list are known to be valid, since
16822	* verifier reached 'bpf_exit' instruction through them
16823	*
16824	* this function is called when verifier exploring different branches of
16825	* execution popped from the state stack. If it sees an old state that has
16826	* more strict register state and more strict stack state then this execution
16827	* branch doesn't need to be explored further, since verifier already
16828	* concluded that more strict state leads to valid finish.
16829	*
16830	* Therefore two states are equivalent if register state is more conservative
16831	* and explored stack state is more conservative than the current one.
16832	* Example:
16833	* explored current
16834	* (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
16835	* (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
16836	*
16837	* In other words if current stack state (one being explored) has more
16838	* valid slots than old one that already passed validation, it means
16839	* the verifier can stop exploring and conclude that current state is valid too
16840	*
16841	* Similarly with registers. If explored state has register type as invalid
16842	* whereas register type in current state is meaningful, it means that
16843	* the current state will reach 'bpf_exit' instruction safely
16844	*/
16845	static bool func_states_equal(struct bpf_verifier_env env, struct* bpf_func_state *old,
16846	struct bpf_func_state cur, enum* exact_level exact)
16847	{
16848	int i;
16849
16850	if (old->callback_depth > cur->callback_depth)
16851	return false;
16852
16853	for (i = `0`; i < MAX_BPF_REG; i++)
16854	if (!regsafe(env, rold: &old->regs[i], rcur: &cur->regs[i],
16855	idmap: &env->idmap_scratch, exact))
16856	return false;
16857
16858	if (!stacksafe(env, old, cur, idmap: &env->idmap_scratch, exact))
16859	return false;
16860
16861	if (!refsafe(old, cur, idmap: &env->idmap_scratch))
16862	return false;
16863
16864	return true;
16865	}
16866
16867	static void reset_idmap_scratch(struct bpf_verifier_env *env)
16868	{
16869	env->idmap_scratch.tmp_id_gen = env->id_gen;
16870	memset(&env->idmap_scratch.map, `0`, sizeof(env->idmap_scratch.map));
16871	}
16872
16873	static bool states_equal(struct bpf_verifier_env *env,
16874	struct bpf_verifier_state *old,
16875	struct bpf_verifier_state *cur,
16876	enum exact_level exact)
16877	{
16878	int i;
16879
16880	if (old->curframe != cur->curframe)
16881	return false;
16882
16883	reset_idmap_scratch(env);
16884
16885	/ Verification state from speculative execution simulation*
16886	* must never prune a non-speculative execution one.
16887	*/
16888	if (old->speculative && !cur->speculative)
16889	return false;
16890
16891	if (old->active_lock.ptr != cur->active_lock.ptr)
16892	return false;
16893
16894	/ Old and cur active_lock's have to be either both present*
16895	* or both absent.
16896	*/
16897	if (!!old->active_lock.id != !!cur->active_lock.id)
16898	return false;
16899
16900	if (old->active_lock.id &&
16901	!check_ids(old_id: old->active_lock.id, cur_id: cur->active_lock.id, idmap: &env->idmap_scratch))
16902	return false;
16903
16904	if (old->active_rcu_lock != cur->active_rcu_lock)
16905	return false;
16906
16907	/ for states to be equal callsites have to be the same*
16908	* and all frame states need to be equivalent
16909	*/
16910	for (i = `0`; i <= old->curframe; i++) {
16911	if (old->frame[i]->callsite != cur->frame[i]->callsite)
16912	return false;
16913	if (!func_states_equal(env, old: old->frame[i], cur: cur->frame[i], exact))
16914	return false;
16915	}
16916	return true;
16917	}
16918
16919	/ Return 0 if no propagation happened. Return negative error code if error*
16920	* happened. Otherwise, return the propagated bit.
16921	*/
16922	static int propagate_liveness_reg(struct bpf_verifier_env *env,
16923	struct bpf_reg_state *reg,
16924	struct bpf_reg_state *parent_reg)
16925	{
16926	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
16927	u8 flag = reg->live & REG_LIVE_READ;
16928	int err;
16929
16930	/ When comes here, read flags of PARENT_REG or REG could be any of*
16931	* REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
16932	* of propagation if PARENT_REG has strongest REG_LIVE_READ64.
16933	*/
16934	if (parent_flag == REG_LIVE_READ64 \|\|
16935	/ Or if there is no read flag from REG. /
16936	!flag \|\|
16937	/ Or if the read flag from REG is the same as PARENT_REG. /
16938	parent_flag == flag)
16939	return `0`;
16940
16941	err = mark_reg_read(env, state: reg, parent: parent_reg, flag);
16942	if (err)
16943	return err;
16944
16945	return flag;
16946	}
16947
16948	/ A write screens off any subsequent reads; but write marks come from the*
16949	* straight-line code between a state and its parent. When we arrive at an
16950	* equivalent state (jump target or such) we didn't arrive by the straight-line
16951	* code, so read marks in the state must propagate to the parent regardless
16952	* of the state's write marks. That's what 'parent == state->parent' comparison
16953	* in mark_reg_read() is for.
16954	*/
16955	static int propagate_liveness(struct bpf_verifier_env *env,
16956	const struct bpf_verifier_state *vstate,
16957	struct bpf_verifier_state *vparent)
16958	{
16959	struct bpf_reg_state state_reg, parent_reg;
16960	struct bpf_func_state state, parent;
16961	int i, frame, err = `0`;
16962
16963	if (vparent->curframe != vstate->curframe) {
16964	WARN(`1`, "propagate_live: parent frame %d current frame %d\n",
16965	vparent->curframe, vstate->curframe);
16966	return -EFAULT;
16967	}
16968	/ Propagate read liveness of registers... /
16969	BUILD_BUG_ON(BPF_REG_FP + `1` != MAX_BPF_REG);
16970	for (frame = `0`; frame <= vstate->curframe; frame++) {
16971	parent = vparent->frame[frame];
16972	state = vstate->frame[frame];
16973	parent_reg = parent->regs;
16974	state_reg = state->regs;
16975	/ We don't need to worry about FP liveness, it's read-only /
16976	for (i = frame < vstate->curframe ? BPF_REG_6 : `0`; i < BPF_REG_FP; i++) {
16977	err = propagate_liveness_reg(env, reg: &state_reg[i],
16978	parent_reg: &parent_reg[i]);
16979	if (err < `0`)
16980	return err;
16981	if (err == REG_LIVE_READ64)
16982	mark_insn_zext(env, reg: &parent_reg[i]);
16983	}
16984
16985	/ Propagate stack slots. /
16986	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE &&
16987	i < parent->allocated_stack / BPF_REG_SIZE; i++) {
16988	parent_reg = &parent->stack[i].spilled_ptr;
16989	state_reg = &state->stack[i].spilled_ptr;
16990	err = propagate_liveness_reg(env, reg: state_reg,
16991	parent_reg);
16992	if (err < `0`)
16993	return err;
16994	}
16995	}
16996	return `0`;
16997	}
16998
16999	/ find precise scalars in the previous equivalent state and*
17000	* propagate them into the current state
17001	*/
17002	static int propagate_precision(struct bpf_verifier_env *env,
17003	const struct bpf_verifier_state *old)
17004	{
17005	struct bpf_reg_state *state_reg;
17006	struct bpf_func_state *state;
17007	int i, err = `0`, fr;
17008	bool first;
17009
17010	for (fr = old->curframe; fr >= `0`; fr--) {
17011	state = old->frame[fr];
17012	state_reg = state->regs;
17013	first = true;
17014	for (i = `0`; i < BPF_REG_FP; i++, state_reg++) {
17015	if (state_reg->type != SCALAR_VALUE \|\|
17016	!state_reg->precise \|\|
17017	!(state_reg->live & REG_LIVE_READ))
17018	continue;
17019	if (env->log.level & BPF_LOG_LEVEL2) {
17020	if (first)
17021	verbose(private_data: env, fmt: "frame %d: propagating r%d", fr, i);
17022	else
17023	verbose(private_data: env, fmt: ",r%d", i);
17024	}
17025	bt_set_frame_reg(bt: &env->bt, frame: fr, reg: i);
17026	first = false;
17027	}
17028
17029	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
17030	if (!is_spilled_reg(stack: &state->stack[i]))
17031	continue;
17032	state_reg = &state->stack[i].spilled_ptr;
17033	if (state_reg->type != SCALAR_VALUE \|\|
17034	!state_reg->precise \|\|
17035	!(state_reg->live & REG_LIVE_READ))
17036	continue;
17037	if (env->log.level & BPF_LOG_LEVEL2) {
17038	if (first)
17039	verbose(private_data: env, fmt: "frame %d: propagating fp%d",
17040	fr, (-i - `1`) * BPF_REG_SIZE);
17041	else
17042	verbose(private_data: env, fmt: ",fp%d", (-i - `1`) * BPF_REG_SIZE);
17043	}
17044	bt_set_frame_slot(bt: &env->bt, frame: fr, slot: i);
17045	first = false;
17046	}
17047	if (!first)
17048	verbose(private_data: env, fmt: "\n");
17049	}
17050
17051	err = mark_chain_precision_batch(env);
17052	if (err < `0`)
17053	return err;
17054
17055	return `0`;
17056	}
17057
17058	static bool states_maybe_looping(struct bpf_verifier_state *old,
17059	struct bpf_verifier_state *cur)
17060	{
17061	struct bpf_func_state fold, fcur;
17062	int i, fr = cur->curframe;
17063
17064	if (old->curframe != fr)
17065	return false;
17066
17067	fold = old->frame[fr];
17068	fcur = cur->frame[fr];
17069	for (i = `0`; i < MAX_BPF_REG; i++)
17070	if (memcmp(p: &fold->regs[i], q: &fcur->regs[i],
17071	offsetof(struct bpf_reg_state, parent)))
17072	return false;
17073	return true;
17074	}
17075
17076	static bool is_iter_next_insn(struct bpf_verifier_env env, int* insn_idx)
17077	{
17078	return env->insn_aux_data[insn_idx].is_iter_next;
17079	}
17080
17081	/ is_state_visited() handles iter_next() (see process_iter_next_call() for*
17082	* terminology) calls specially: as opposed to bounded BPF loops, it expects
17083	* states to match, which otherwise would look like an infinite loop. So while
17084	* iter_next() calls are taken care of, we still need to be careful and
17085	* prevent erroneous and too eager declaration of "ininite loop", when
17086	* iterators are involved.
17087	*
17088	* Here's a situation in pseudo-BPF assembly form:
17089	*
17090	* 0: again: ; set up iter_next() call args
17091	* 1: r1 = &it ; <CHECKPOINT HERE>
17092	* 2: call bpf_iter_num_next ; this is iter_next() call
17093	* 3: if r0 == 0 goto done
17094	* 4: ... something useful here ...
17095	* 5: goto again ; another iteration
17096	* 6: done:
17097	* 7: r1 = &it
17098	* 8: call bpf_iter_num_destroy ; clean up iter state
17099	* 9: exit
17100	*
17101	* This is a typical loop. Let's assume that we have a prune point at 1:,
17102	* before we get to `call bpf_iter_num_next` (e.g., because of that `goto
17103	* again`, assuming other heuristics don't get in a way).
17104	*
17105	* When we first time come to 1:, let's say we have some state X. We proceed
17106	* to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
17107	* Now we come back to validate that forked ACTIVE state. We proceed through
17108	* 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
17109	* are converging. But the problem is that we don't know that yet, as this
17110	* convergence has to happen at iter_next() call site only. So if nothing is
17111	* done, at 1: verifier will use bounded loop logic and declare infinite
17112	* looping (and would be technically correct, if not for iterator's
17113	* "eventual sticky NULL" contract, see process_iter_next_call()). But we
17114	* don't want that. So what we do in process_iter_next_call() when we go on
17115	* another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
17116	* a different iteration. So when we suspect an infinite loop, we additionally
17117	* check if any of the ACTIVE iterator states depths differ. If yes, we
17118	* pretend we are not looping and wait for next iter_next() call.
17119	*
17120	* This only applies to ACTIVE state. In DRAINED state we don't expect to
17121	* loop, because that would actually mean infinite loop, as DRAINED state is
17122	* "sticky", and so we'll keep returning into the same instruction with the
17123	* same state (at least in one of possible code paths).
17124	*
17125	* This approach allows to keep infinite loop heuristic even in the face of
17126	* active iterator. E.g., C snippet below is and will be detected as
17127	* inifintely looping:
17128	*
17129	* struct bpf_iter_num it;
17130	* int *p, x;
17131	*
17132	* bpf_iter_num_new(&it, 0, 10);
17133	* while ((p = bpf_iter_num_next(&t))) {
17134	* x = p;
17135	* while (x--) {} // <<-- infinite loop here
17136	* }
17137	*
17138	*/
17139	static bool iter_active_depths_differ(struct bpf_verifier_state old, struct* bpf_verifier_state *cur)
17140	{
17141	struct bpf_reg_state slot, cur_slot;
17142	struct bpf_func_state *state;
17143	int i, fr;
17144
17145	for (fr = old->curframe; fr >= `0`; fr--) {
17146	state = old->frame[fr];
17147	for (i = `0`; i < state->allocated_stack / BPF_REG_SIZE; i++) {
17148	if (state->stack[i].slot_type[`0`] != STACK_ITER)
17149	continue;
17150
17151	slot = &state->stack[i].spilled_ptr;
17152	if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
17153	continue;
17154
17155	cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
17156	if (cur_slot->iter.depth != slot->iter.depth)
17157	return true;
17158	}
17159	}
17160	return false;
17161	}
17162
17163	static int is_state_visited(struct bpf_verifier_env env, int* insn_idx)
17164	{
17165	struct bpf_verifier_state_list *new_sl;
17166	struct bpf_verifier_state_list sl, *pprev;
17167	struct bpf_verifier_state cur = env->cur_state, new, *loop_entry;
17168	int i, j, n, err, states_cnt = `0`;
17169	bool force_new_state = env->test_state_freq \|\| is_force_checkpoint(env, insn_idx);
17170	bool add_new_state = force_new_state;
17171	bool force_exact;
17172
17173	/ bpf progs typically have pruning point every 4 instructions*
17174	* http://vger.kernel.org/bpfconf2019.html#session-1
17175	* Do not add new state for future pruning if the verifier hasn't seen
17176	* at least 2 jumps and at least 8 instructions.
17177	* This heuristics helps decrease 'total_states' and 'peak_states' metric.
17178	* In tests that amounts to up to 50% reduction into total verifier
17179	* memory consumption and 20% verifier time speedup.
17180	*/
17181	if (env->jmps_processed - env->prev_jmps_processed >= `2` &&
17182	env->insn_processed - env->prev_insn_processed >= `8`)
17183	add_new_state = true;
17184
17185	pprev = explored_state(env, idx: insn_idx);
17186	sl = *pprev;
17187
17188	clean_live_states(env, insn: insn_idx, cur);
17189
17190	while (sl) {
17191	states_cnt++;
17192	if (sl->state.insn_idx != insn_idx)
17193	goto next;
17194
17195	if (sl->state.branches) {
17196	struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
17197
17198	if (frame->in_async_callback_fn &&
17199	frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
17200	/ Different async_entry_cnt means that the verifier is*
17201	* processing another entry into async callback.
17202	* Seeing the same state is not an indication of infinite
17203	* loop or infinite recursion.
17204	* But finding the same state doesn't mean that it's safe
17205	* to stop processing the current state. The previous state
17206	* hasn't yet reached bpf_exit, since state.branches > 0.
17207	* Checking in_async_callback_fn alone is not enough either.
17208	* Since the verifier still needs to catch infinite loops
17209	* inside async callbacks.
17210	*/
17211	goto skip_inf_loop_check;
17212	}
17213	/ BPF open-coded iterators loop detection is special.*
17214	* states_maybe_looping() logic is too simplistic in detecting
17215	* states that might be equivalent, because it doesn't know
17216	* about ID remapping, so don't even perform it.
17217	* See process_iter_next_call() and iter_active_depths_differ()
17218	* for overview of the logic. When current and one of parent
17219	* states are detected as equivalent, it's a good thing: we prove
17220	* convergence and can stop simulating further iterations.
17221	* It's safe to assume that iterator loop will finish, taking into
17222	* account iter_next() contract of eventually returning
17223	* sticky NULL result.
17224	*
17225	* Note, that states have to be compared exactly in this case because
17226	* read and precision marks might not be finalized inside the loop.
17227	* E.g. as in the program below:
17228	*
17229	* 1. r7 = -16
17230	* 2. r6 = bpf_get_prandom_u32()
17231	* 3. while (bpf_iter_num_next(&fp[-8])) {
17232	* 4. if (r6 != 42) {
17233	* 5. r7 = -32
17234	* 6. r6 = bpf_get_prandom_u32()
17235	* 7. continue
17236	* 8. }
17237	* 9. r0 = r10
17238	* 10. r0 += r7
17239	* 11. r8 = (u64 )(r0 + 0)
17240	* 12. r6 = bpf_get_prandom_u32()
17241	* 13. }
17242	*
17243	* Here verifier would first visit path 1-3, create a checkpoint at 3
17244	* with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
17245	* not have read or precision mark for r7 yet, thus inexact states
17246	* comparison would discard current state with r7=-32
17247	* => unsafe memory access at 11 would not be caught.
17248	*/
17249	if (is_iter_next_insn(env, insn_idx)) {
17250	if (states_equal(env, old: &sl->state, cur, exact: RANGE_WITHIN)) {
17251	struct bpf_func_state *cur_frame;
17252	struct bpf_reg_state iter_state, iter_reg;
17253	int spi;
17254
17255	cur_frame = cur->frame[cur->curframe];
17256	/ btf_check_iter_kfuncs() enforces that*
17257	* iter state pointer is always the first arg
17258	*/
17259	iter_reg = &cur_frame->regs[BPF_REG_1];
17260	/ current state is valid due to states_equal(),*
17261	* so we can assume valid iter and reg state,
17262	* no need for extra (re-)validations
17263	*/
17264	spi = __get_spi(off: iter_reg->off + iter_reg->var_off.value);
17265	iter_state = &func(env, reg: iter_reg)->stack[spi].spilled_ptr;
17266	if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
17267	update_loop_entry(cur, hdr: &sl->state);
17268	goto hit;
17269	}
17270	}
17271	goto skip_inf_loop_check;
17272	}
17273	if (is_may_goto_insn_at(env, insn_idx)) {
17274	if (states_equal(env, old: &sl->state, cur, exact: RANGE_WITHIN)) {
17275	update_loop_entry(cur, hdr: &sl->state);
17276	goto hit;
17277	}
17278	goto skip_inf_loop_check;
17279	}
17280	if (calls_callback(env, insn_idx)) {
17281	if (states_equal(env, old: &sl->state, cur, exact: RANGE_WITHIN))
17282	goto hit;
17283	goto skip_inf_loop_check;
17284	}
17285	/ attempt to detect infinite loop to avoid unnecessary doomed work /
17286	if (states_maybe_looping(old: &sl->state, cur) &&
17287	states_equal(env, old: &sl->state, cur, exact: EXACT) &&
17288	!iter_active_depths_differ(old: &sl->state, cur) &&
17289	sl->state.may_goto_depth == cur->may_goto_depth &&
17290	sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
17291	verbose_linfo(env, insn_off: insn_idx, prefix_fmt: "; ");
17292	verbose(private_data: env, fmt: "infinite loop detected at insn %d\n", insn_idx);
17293	verbose(private_data: env, fmt: "cur state:");
17294	print_verifier_state(env, state: cur->frame[cur->curframe], print_all: true);
17295	verbose(private_data: env, fmt: "old state:");
17296	print_verifier_state(env, state: sl->state.frame[cur->curframe], print_all: true);
17297	return -EINVAL;
17298	}
17299	/ if the verifier is processing a loop, avoid adding new state*
17300	* too often, since different loop iterations have distinct
17301	* states and may not help future pruning.
17302	* This threshold shouldn't be too low to make sure that
17303	* a loop with large bound will be rejected quickly.
17304	* The most abusive loop will be:
17305	* r1 += 1
17306	* if r1 < 1000000 goto pc-2
17307	* 1M insn_procssed limit / 100 == 10k peak states.
17308	* This threshold shouldn't be too high either, since states
17309	* at the end of the loop are likely to be useful in pruning.
17310	*/
17311	skip_inf_loop_check:
17312	if (!force_new_state &&
17313	env->jmps_processed - env->prev_jmps_processed < `20` &&
17314	env->insn_processed - env->prev_insn_processed < `100`)
17315	add_new_state = false;
17316	goto miss;
17317	}
17318	/ If sl->state is a part of a loop and this loop's entry is a part of*
17319	* current verification path then states have to be compared exactly.
17320	* 'force_exact' is needed to catch the following case:
17321	*
17322	* initial Here state 'succ' was processed first,
17323	* \| it was eventually tracked to produce a
17324	* V state identical to 'hdr'.
17325	* .---------> hdr All branches from 'succ' had been explored
17326	* \| \| and thus 'succ' has its .branches == 0.
17327	* \| V
17328	* \| .------... Suppose states 'cur' and 'succ' correspond
17329	* \| \| \| to the same instruction + callsites.
17330	* \| V V In such case it is necessary to check
17331	* \| ... ... if 'succ' and 'cur' are states_equal().
17332	* \| \| \| If 'succ' and 'cur' are a part of the
17333	* \| V V same loop exact flag has to be set.
17334	* \| succ <- cur To check if that is the case, verify
17335	* \| \| if loop entry of 'succ' is in current
17336	* \| V DFS path.
17337	* \| ...
17338	* \| \|
17339	* '----'
17340	*
17341	* Additional details are in the comment before get_loop_entry().
17342	*/
17343	loop_entry = get_loop_entry(st: &sl->state);
17344	force_exact = loop_entry && loop_entry->branches > `0`;
17345	if (states_equal(env, old: &sl->state, cur, exact: force_exact ? RANGE_WITHIN : NOT_EXACT)) {
17346	if (force_exact)
17347	update_loop_entry(cur, hdr: loop_entry);
17348	hit:
17349	sl->hit_cnt++;
17350	/ reached equivalent register/stack state,*
17351	* prune the search.
17352	* Registers read by the continuation are read by us.
17353	* If we have any write marks in env->cur_state, they
17354	* will prevent corresponding reads in the continuation
17355	* from reaching our parent (an explored_state). Our
17356	* own state will get the read marks recorded, but
17357	* they'll be immediately forgotten as we're pruning
17358	* this state and will pop a new one.
17359	*/
17360	err = propagate_liveness(env, vstate: &sl->state, vparent: cur);
17361
17362	/ if previous state reached the exit with precision and*
17363	* current state is equivalent to it (except precsion marks)
17364	* the precision needs to be propagated back in
17365	* the current state.
17366	*/
17367	if (is_jmp_point(env, insn_idx: env->insn_idx))
17368	err = err ? : push_jmp_history(env, cur, insn_flags: `0`);
17369	err = err ? : propagate_precision(env, old: &sl->state);
17370	if (err)
17371	return err;
17372	return `1`;
17373	}
17374	miss:
17375	/ when new state is not going to be added do not increase miss count.*
17376	* Otherwise several loop iterations will remove the state
17377	* recorded earlier. The goal of these heuristics is to have
17378	* states from some iterations of the loop (some in the beginning
17379	* and some at the end) to help pruning.
17380	*/
17381	if (add_new_state)
17382	sl->miss_cnt++;
17383	/ heuristic to determine whether this state is beneficial*
17384	* to keep checking from state equivalence point of view.
17385	* Higher numbers increase max_states_per_insn and verification time,
17386	* but do not meaningfully decrease insn_processed.
17387	* 'n' controls how many times state could miss before eviction.
17388	* Use bigger 'n' for checkpoints because evicting checkpoint states
17389	* too early would hinder iterator convergence.
17390	*/
17391	n = is_force_checkpoint(env, insn_idx) && sl->state.branches > `0` ? `64` : `3`;
17392	if (sl->miss_cnt > sl->hit_cnt * n + n) {
17393	/ the state is unlikely to be useful. Remove it to*
17394	* speed up verification
17395	*/
17396	*pprev = sl->next;
17397	if (sl->state.frame[`0`]->regs[`0`].live & REG_LIVE_DONE &&
17398	!sl->state.used_as_loop_entry) {
17399	u32 br = sl->state.branches;
17400
17401	WARN_ONCE(br,
17402	"BUG live_done but branches_to_explore %d\n",
17403	br);
17404	free_verifier_state(state: &sl->state, free_self: false);
17405	kfree(objp: sl);
17406	env->peak_states--;
17407	} else {
17408	/ cannot free this state, since parentage chain may*
17409	* walk it later. Add it for free_list instead to
17410	* be freed at the end of verification
17411	*/
17412	sl->next = env->free_list;
17413	env->free_list = sl;
17414	}
17415	sl = *pprev;
17416	continue;
17417	}
17418	next:
17419	pprev = &sl->next;
17420	sl = *pprev;
17421	}
17422
17423	if (env->max_states_per_insn < states_cnt)
17424	env->max_states_per_insn = states_cnt;
17425
17426	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
17427	return `0`;
17428
17429	if (!add_new_state)
17430	return `0`;
17431
17432	/ There were no equivalent states, remember the current one.*
17433	* Technically the current state is not proven to be safe yet,
17434	* but it will either reach outer most bpf_exit (which means it's safe)
17435	* or it will be rejected. When there are no loops the verifier won't be
17436	* seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
17437	* again on the way to bpf_exit.
17438	* When looping the sl->state.branches will be > 0 and this state
17439	* will not be considered for equivalence until branches == 0.
17440	*/
17441	new_sl = kzalloc(size: sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
17442	if (!new_sl)
17443	return -ENOMEM;
17444	env->total_states++;
17445	env->peak_states++;
17446	env->prev_jmps_processed = env->jmps_processed;
17447	env->prev_insn_processed = env->insn_processed;
17448
17449	/ forget precise markings we inherited, see __mark_chain_precision /
17450	if (env->bpf_capable)
17451	mark_all_scalars_imprecise(env, st: cur);
17452
17453	/ add new state to the head of linked list /
17454	new = &new_sl->state;
17455	err = copy_verifier_state(dst_state: new, src: cur);
17456	if (err) {
17457	free_verifier_state(state: new, free_self: false);
17458	kfree(objp: new_sl);
17459	return err;
17460	}
17461	new->insn_idx = insn_idx;
17462	WARN_ONCE(new->branches != `1`,
17463	"BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
17464
17465	cur->parent = new;
17466	cur->first_insn_idx = insn_idx;
17467	cur->dfs_depth = new->dfs_depth + `1`;
17468	clear_jmp_history(state: cur);
17469	new_sl->next = *explored_state(env, idx: insn_idx);
17470	*explored_state(env, idx: insn_idx) = new_sl;
17471	/ connect new state to parentage chain. Current frame needs all*
17472	* registers connected. Only r6 - r9 of the callers are alive (pushed
17473	* to the stack implicitly by JITs) so in callers' frames connect just
17474	* r6 - r9 as an optimization. Callers will have r1 - r5 connected to
17475	* the state of the call instruction (with WRITTEN set), and r0 comes
17476	* from callee with its full parentage chain, anyway.
17477	*/
17478	/ clear write marks in current state: the writes we did are not writes*
17479	* our child did, so they don't screen off its reads from us.
17480	* (There are no read marks in current state, because reads always mark
17481	* their parent and current state never has children yet. Only
17482	* explored_states can get read marks.)
17483	*/
17484	for (j = `0`; j <= cur->curframe; j++) {
17485	for (i = j < cur->curframe ? BPF_REG_6 : `0`; i < BPF_REG_FP; i++)
17486	cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
17487	for (i = `0`; i < BPF_REG_FP; i++)
17488	cur->frame[j]->regs[i].live = REG_LIVE_NONE;
17489	}
17490
17491	/ all stack frames are accessible from callee, clear them all /
17492	for (j = `0`; j <= cur->curframe; j++) {
17493	struct bpf_func_state *frame = cur->frame[j];
17494	struct bpf_func_state *newframe = new->frame[j];
17495
17496	for (i = `0`; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
17497	frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
17498	frame->stack[i].spilled_ptr.parent =
17499	&newframe->stack[i].spilled_ptr;
17500	}
17501	}
17502	return `0`;
17503	}
17504
17505	/ Return true if it's OK to have the same insn return a different type. /
17506	static bool reg_type_mismatch_ok(enum bpf_reg_type type)
17507	{
17508	switch (base_type(type)) {
17509	case PTR_TO_CTX:
17510	case PTR_TO_SOCKET:
17511	case PTR_TO_SOCK_COMMON:
17512	case PTR_TO_TCP_SOCK:
17513	case PTR_TO_XDP_SOCK:
17514	case PTR_TO_BTF_ID:
17515	case PTR_TO_ARENA:
17516	return false;
17517	default:
17518	return true;
17519	}
17520	}
17521
17522	/ If an instruction was previously used with particular pointer types, then we*
17523	* need to be careful to avoid cases such as the below, where it may be ok
17524	* for one branch accessing the pointer, but not ok for the other branch:
17525	*
17526	* R1 = sock_ptr
17527	* goto X;
17528	* ...
17529	* R1 = some_other_valid_ptr;
17530	* goto X;
17531	* ...
17532	* R2 = (u32 )(R1 + 0);
17533	*/
17534	static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
17535	{
17536	return src != prev && (!reg_type_mismatch_ok(type: src) \|\|
17537	!reg_type_mismatch_ok(type: prev));
17538	}
17539
17540	static int save_aux_ptr_type(struct bpf_verifier_env env, enum* bpf_reg_type type,
17541	bool allow_trust_missmatch)
17542	{
17543	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17544
17545	if (*prev_type == NOT_INIT) {
17546	/ Saw a valid insn*
17547	* dst_reg = (u32 )(src_reg + off)
17548	* save type to validate intersecting paths
17549	*/
17550	*prev_type = type;
17551	} else if (reg_type_mismatch(src: type, prev: *prev_type)) {
17552	/ Abuser program is trying to use the same insn*
17553	* dst_reg = (u32) (src_reg + off)
17554	* with different pointer types:
17555	* src_reg == ctx in one branch and
17556	* src_reg == stack\|map in some other branch.
17557	* Reject it.
17558	*/
17559	if (allow_trust_missmatch &&
17560	base_type(type) == PTR_TO_BTF_ID &&
17561	base_type(type: *prev_type) == PTR_TO_BTF_ID) {
17562	/*
17563	* Have to support a use case when one path through
17564	* the program yields TRUSTED pointer while another
17565	* is UNTRUSTED. Fallback to UNTRUSTED to generate
17566	* BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17567	*/
17568	*prev_type = PTR_TO_BTF_ID \| PTR_UNTRUSTED;
17569	} else {
17570	verbose(private_data: env, fmt: "same insn cannot be used with different pointers\n");
17571	return -EINVAL;
17572	}
17573	}
17574
17575	return `0`;
17576	}
17577
17578	static int do_check(struct bpf_verifier_env *env)
17579	{
17580	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17581	struct bpf_verifier_state *state = env->cur_state;
17582	struct bpf_insn *insns = env->prog->insnsi;
17583	struct bpf_reg_state *regs;
17584	int insn_cnt = env->prog->len;
17585	bool do_print_state = false;
17586	int prev_insn_idx = -`1`;
17587
17588	for (;;) {
17589	bool exception_exit = false;
17590	struct bpf_insn *insn;
17591	u8 class;
17592	int err;
17593
17594	/ reset current history entry on each new instruction /
17595	env->cur_hist_ent = NULL;
17596
17597	env->prev_insn_idx = prev_insn_idx;
17598	if (env->insn_idx >= insn_cnt) {
17599	verbose(private_data: env, fmt: "invalid insn idx %d insn_cnt %d\n",
17600	env->insn_idx, insn_cnt);
17601	return -EFAULT;
17602	}
17603
17604	insn = &insns[env->insn_idx];
17605	class = BPF_CLASS(insn->code);
17606
17607	if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17608	verbose(private_data: env,
17609	fmt: "BPF program is too large. Processed %d insn\n",
17610	env->insn_processed);
17611	return -E2BIG;
17612	}
17613
17614	state->last_insn_idx = env->prev_insn_idx;
17615
17616	if (is_prune_point(env, insn_idx: env->insn_idx)) {
17617	err = is_state_visited(env, insn_idx: env->insn_idx);
17618	if (err < `0`)
17619	return err;
17620	if (err == `1`) {
17621	/ found equivalent state, can prune the search /
17622	if (env->log.level & BPF_LOG_LEVEL) {
17623	if (do_print_state)
17624	verbose(private_data: env, fmt: "\nfrom %d to %d%s: safe\n",
17625	env->prev_insn_idx, env->insn_idx,
17626	env->cur_state->speculative ?
17627	" (speculative execution)" : "");
17628	else
17629	verbose(private_data: env, fmt: "%d: safe\n", env->insn_idx);
17630	}
17631	goto process_bpf_exit;
17632	}
17633	}
17634
17635	if (is_jmp_point(env, insn_idx: env->insn_idx)) {
17636	err = push_jmp_history(env, cur: state, insn_flags: `0`);
17637	if (err)
17638	return err;
17639	}
17640
17641	if (signal_pending(current))
17642	return -EAGAIN;
17643
17644	if (need_resched())
17645	cond_resched();
17646
17647	if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17648	verbose(private_data: env, fmt: "\nfrom %d to %d%s:",
17649	env->prev_insn_idx, env->insn_idx,
17650	env->cur_state->speculative ?
17651	" (speculative execution)" : "");
17652	print_verifier_state(env, state: state->frame[state->curframe], print_all: true);
17653	do_print_state = false;
17654	}
17655
17656	if (env->log.level & BPF_LOG_LEVEL) {
17657	const struct bpf_insn_cbs cbs = {
17658	.cb_call = disasm_kfunc_name,
17659	.cb_print = verbose,
17660	.private_data = env,
17661	};
17662
17663	if (verifier_state_scratched(env))
17664	print_insn_state(env, state: state->frame[state->curframe]);
17665
17666	verbose_linfo(env, insn_off: env->insn_idx, prefix_fmt: "; ");
17667	env->prev_log_pos = env->log.end_pos;
17668	verbose(private_data: env, fmt: "%d: ", env->insn_idx);
17669	print_bpf_insn(cbs: &cbs, insn, allow_ptr_leaks: env->allow_ptr_leaks);
17670	env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17671	env->prev_log_pos = env->log.end_pos;
17672	}
17673
17674	if (bpf_prog_is_offloaded(aux: env->prog->aux)) {
17675	err = bpf_prog_offload_verify_insn(env, insn_idx: env->insn_idx,
17676	prev_insn_idx: env->prev_insn_idx);
17677	if (err)
17678	return err;
17679	}
17680
17681	regs = cur_regs(env);
17682	sanitize_mark_insn_seen(env);
17683	prev_insn_idx = env->insn_idx;
17684
17685	if (class == BPF_ALU \|\| class == BPF_ALU64) {
17686	err = check_alu_op(env, insn);
17687	if (err)
17688	return err;
17689
17690	} else if (class == BPF_LDX) {
17691	enum bpf_reg_type src_reg_type;
17692
17693	/ check for reserved fields is already done /
17694
17695	/ check src operand /
17696	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
17697	if (err)
17698	return err;
17699
17700	err = check_reg_arg(env, regno: insn->dst_reg, t: DST_OP_NO_MARK);
17701	if (err)
17702	return err;
17703
17704	src_reg_type = regs[insn->src_reg].type;
17705
17706	/ check that memory (src_reg + off) is readable,*
17707	* the state of dst_reg will be updated by this func
17708	*/
17709	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->src_reg,
17710	off: insn->off, BPF_SIZE(insn->code),
17711	t: BPF_READ, value_regno: insn->dst_reg, strict_alignment_once: false,
17712	BPF_MODE(insn->code) == BPF_MEMSX);
17713	err = err ?: save_aux_ptr_type(env, type: src_reg_type, allow_trust_missmatch: true);
17714	err = err ?: reg_bounds_sanity_check(env, reg: &regs[insn->dst_reg], ctx: "ldx");
17715	if (err)
17716	return err;
17717	} else if (class == BPF_STX) {
17718	enum bpf_reg_type dst_reg_type;
17719
17720	if (BPF_MODE(insn->code) == BPF_ATOMIC) {
17721	err = check_atomic(env, insn_idx: env->insn_idx, insn);
17722	if (err)
17723	return err;
17724	env->insn_idx++;
17725	continue;
17726	}
17727
17728	if (BPF_MODE(insn->code) != BPF_MEM \|\| insn->imm != `0`) {
17729	verbose(private_data: env, fmt: "BPF_STX uses reserved fields\n");
17730	return -EINVAL;
17731	}
17732
17733	/ check src1 operand /
17734	err = check_reg_arg(env, regno: insn->src_reg, t: SRC_OP);
17735	if (err)
17736	return err;
17737	/ check src2 operand /
17738	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
17739	if (err)
17740	return err;
17741
17742	dst_reg_type = regs[insn->dst_reg].type;
17743
17744	/ check that memory (dst_reg + off) is writeable /
17745	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->dst_reg,
17746	off: insn->off, BPF_SIZE(insn->code),
17747	t: BPF_WRITE, value_regno: insn->src_reg, strict_alignment_once: false, is_ldsx: false);
17748	if (err)
17749	return err;
17750
17751	err = save_aux_ptr_type(env, type: dst_reg_type, allow_trust_missmatch: false);
17752	if (err)
17753	return err;
17754	} else if (class == BPF_ST) {
17755	enum bpf_reg_type dst_reg_type;
17756
17757	if (BPF_MODE(insn->code) != BPF_MEM \|\|
17758	insn->src_reg != BPF_REG_0) {
17759	verbose(private_data: env, fmt: "BPF_ST uses reserved fields\n");
17760	return -EINVAL;
17761	}
17762	/ check src operand /
17763	err = check_reg_arg(env, regno: insn->dst_reg, t: SRC_OP);
17764	if (err)
17765	return err;
17766
17767	dst_reg_type = regs[insn->dst_reg].type;
17768
17769	/ check that memory (dst_reg + off) is writeable /
17770	err = check_mem_access(env, insn_idx: env->insn_idx, regno: insn->dst_reg,
17771	off: insn->off, BPF_SIZE(insn->code),
17772	t: BPF_WRITE, value_regno: -`1`, strict_alignment_once: false, is_ldsx: false);
17773	if (err)
17774	return err;
17775
17776	err = save_aux_ptr_type(env, type: dst_reg_type, allow_trust_missmatch: false);
17777	if (err)
17778	return err;
17779	} else if (class == BPF_JMP \|\| class == BPF_JMP32) {
17780	u8 opcode = BPF_OP(insn->code);
17781
17782	env->jmps_processed++;
17783	if (opcode == BPF_CALL) {
17784	if (BPF_SRC(insn->code) != BPF_K \|\|
17785	(insn->src_reg != BPF_PSEUDO_KFUNC_CALL
17786	&& insn->off != `0`) \|\|
17787	(insn->src_reg != BPF_REG_0 &&
17788	insn->src_reg != BPF_PSEUDO_CALL &&
17789	insn->src_reg != BPF_PSEUDO_KFUNC_CALL) \|\|
17790	insn->dst_reg != BPF_REG_0 \|\|
17791	class == BPF_JMP32) {
17792	verbose(private_data: env, fmt: "BPF_CALL uses reserved fields\n");
17793	return -EINVAL;
17794	}
17795
17796	if (env->cur_state->active_lock.ptr) {
17797	if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) \|\|
17798	(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17799	(insn->off != `0` \|\| !is_bpf_graph_api_kfunc(btf_id: insn->imm)))) {
17800	verbose(private_data: env, fmt: "function calls are not allowed while holding a lock\n");
17801	return -EINVAL;
17802	}
17803	}
17804	if (insn->src_reg == BPF_PSEUDO_CALL) {
17805	err = check_func_call(env, insn, insn_idx: &env->insn_idx);
17806	} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
17807	err = check_kfunc_call(env, insn, insn_idx_p: &env->insn_idx);
17808	if (!err && is_bpf_throw_kfunc(insn)) {
17809	exception_exit = true;
17810	goto process_bpf_exit_full;
17811	}
17812	} else {
17813	err = check_helper_call(env, insn, insn_idx_p: &env->insn_idx);
17814	}
17815	if (err)
17816	return err;
17817
17818	mark_reg_scratched(env, regno: BPF_REG_0);
17819	} else if (opcode == BPF_JA) {
17820	if (BPF_SRC(insn->code) != BPF_K \|\|
17821	insn->src_reg != BPF_REG_0 \|\|
17822	insn->dst_reg != BPF_REG_0 \|\|
17823	(class == BPF_JMP && insn->imm != `0`) \|\|
17824	(class == BPF_JMP32 && insn->off != `0`)) {
17825	verbose(private_data: env, fmt: "BPF_JA uses reserved fields\n");
17826	return -EINVAL;
17827	}
17828
17829	if (class == BPF_JMP)
17830	env->insn_idx += insn->off + `1`;
17831	else
17832	env->insn_idx += insn->imm + `1`;
17833	continue;
17834
17835	} else if (opcode == BPF_EXIT) {
17836	if (BPF_SRC(insn->code) != BPF_K \|\|
17837	insn->imm != `0` \|\|
17838	insn->src_reg != BPF_REG_0 \|\|
17839	insn->dst_reg != BPF_REG_0 \|\|
17840	class == BPF_JMP32) {
17841	verbose(private_data: env, fmt: "BPF_EXIT uses reserved fields\n");
17842	return -EINVAL;
17843	}
17844	process_bpf_exit_full:
17845	if (env->cur_state->active_lock.ptr && !env->cur_state->curframe) {
17846	verbose(private_data: env, fmt: "bpf_spin_unlock is missing\n");
17847	return -EINVAL;
17848	}
17849
17850	if (env->cur_state->active_rcu_lock && !env->cur_state->curframe) {
17851	verbose(private_data: env, fmt: "bpf_rcu_read_unlock is missing\n");
17852	return -EINVAL;
17853	}
17854
17855	/ We must do check_reference_leak here before*
17856	* prepare_func_exit to handle the case when
17857	* state->curframe > 0, it may be a callback
17858	* function, for which reference_state must
17859	* match caller reference state when it exits.
17860	*/
17861	err = check_reference_leak(env, exception_exit);
17862	if (err)
17863	return err;
17864
17865	/ The side effect of the prepare_func_exit*
17866	* which is being skipped is that it frees
17867	* bpf_func_state. Typically, process_bpf_exit
17868	* will only be hit with outermost exit.
17869	* copy_verifier_state in pop_stack will handle
17870	* freeing of any extra bpf_func_state left over
17871	* from not processing all nested function
17872	* exits. We also skip return code checks as
17873	* they are not needed for exceptional exits.
17874	*/
17875	if (exception_exit)
17876	goto process_bpf_exit;
17877
17878	if (state->curframe) {
17879	/ exit from nested function /
17880	err = prepare_func_exit(env, insn_idx: &env->insn_idx);
17881	if (err)
17882	return err;
17883	do_print_state = true;
17884	continue;
17885	}
17886
17887	err = check_return_code(env, regno: BPF_REG_0, reg_name: "R0");
17888	if (err)
17889	return err;
17890	process_bpf_exit:
17891	mark_verifier_state_scratched(env);
17892	update_branch_counts(env, st: env->cur_state);
17893	err = pop_stack(env, prev_insn_idx: &prev_insn_idx,
17894	insn_idx: &env->insn_idx, pop_log);
17895	if (err < `0`) {
17896	if (err != -ENOENT)
17897	return err;
17898	break;
17899	} else {
17900	do_print_state = true;
17901	continue;
17902	}
17903	} else {
17904	err = check_cond_jmp_op(env, insn, insn_idx: &env->insn_idx);
17905	if (err)
17906	return err;
17907	}
17908	} else if (class == BPF_LD) {
17909	u8 mode = BPF_MODE(insn->code);
17910
17911	if (mode == BPF_ABS \|\| mode == BPF_IND) {
17912	err = check_ld_abs(env, insn);
17913	if (err)
17914	return err;
17915
17916	} else if (mode == BPF_IMM) {
17917	err = check_ld_imm(env, insn);
17918	if (err)
17919	return err;
17920
17921	env->insn_idx++;
17922	sanitize_mark_insn_seen(env);
17923	} else {
17924	verbose(private_data: env, fmt: "invalid BPF_LD mode\n");
17925	return -EINVAL;
17926	}
17927	} else {
17928	verbose(private_data: env, fmt: "unknown insn class %d\n", class);
17929	return -EINVAL;
17930	}
17931
17932	env->insn_idx++;
17933	}
17934
17935	return `0`;
17936	}
17937
17938	static int find_btf_percpu_datasec(struct btf *btf)
17939	{
17940	const struct btf_type *t;
17941	const char *tname;
17942	int i, n;
17943
17944	/*
17945	* Both vmlinux and module each have their own ".data..percpu"
17946	* DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17947	* types to look at only module's own BTF types.
17948	*/
17949	n = btf_nr_types(btf);
17950	if (btf_is_module(btf))
17951	i = btf_nr_types(btf: btf_vmlinux);
17952	else
17953	i = `1`;
17954
17955	for(; i < n; i++) {
17956	t = btf_type_by_id(btf, type_id: i);
17957	if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17958	continue;
17959
17960	tname = btf_name_by_offset(btf, offset: t->name_off);
17961	if (!strcmp(tname, ".data..percpu"))
17962	return i;
17963	}
17964
17965	return -ENOENT;
17966	}
17967
17968	/ replace pseudo btf_id with kernel symbol address /
17969	static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17970	struct bpf_insn *insn,
17971	struct bpf_insn_aux_data *aux)
17972	{
17973	const struct btf_var_secinfo *vsi;
17974	const struct btf_type *datasec;
17975	struct btf_mod_pair *btf_mod;
17976	const struct btf_type *t;
17977	const char *sym_name;
17978	bool percpu = false;
17979	u32 type, id = insn->imm;
17980	struct btf *btf;
17981	s32 datasec_id;
17982	u64 addr;
17983	int i, btf_fd, err;
17984
17985	btf_fd = insn[`1`].imm;
17986	if (btf_fd) {
17987	btf = btf_get_by_fd(fd: btf_fd);
17988	if (IS_ERR(ptr: btf)) {
17989	verbose(private_data: env, fmt: "invalid module BTF object FD specified.\n");
17990	return -EINVAL;
17991	}
17992	} else {
17993	if (!btf_vmlinux) {
17994	verbose(private_data: env, fmt: "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17995	return -EINVAL;
17996	}
17997	btf = btf_vmlinux;
17998	btf_get(btf);
17999	}
18000
18001	t = btf_type_by_id(btf, type_id: id);
18002	if (!t) {
18003	verbose(private_data: env, fmt: "ldimm64 insn specifies invalid btf_id %d.\n", id);
18004	err = -ENOENT;
18005	goto err_put;
18006	}
18007
18008	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
18009	verbose(private_data: env, fmt: "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
18010	err = -EINVAL;
18011	goto err_put;
18012	}
18013
18014	sym_name = btf_name_by_offset(btf, offset: t->name_off);
18015	addr = kallsyms_lookup_name(name: sym_name);
18016	if (!addr) {
18017	verbose(private_data: env, fmt: "ldimm64 failed to find the address for kernel symbol '%s'.\n",
18018	sym_name);
18019	err = -ENOENT;
18020	goto err_put;
18021	}
18022	insn[`0`].imm = (u32)addr;
18023	insn[`1`].imm = addr >> `32`;
18024
18025	if (btf_type_is_func(t)) {
18026	aux->btf_var.reg_type = PTR_TO_MEM \| MEM_RDONLY;
18027	aux->btf_var.mem_size = `0`;
18028	goto check_btf;
18029	}
18030
18031	datasec_id = find_btf_percpu_datasec(btf);
18032	if (datasec_id > `0`) {
18033	datasec = btf_type_by_id(btf, type_id: datasec_id);
18034	for_each_vsi(i, datasec, vsi) {
18035	if (vsi->type == id) {
18036	percpu = true;
18037	break;
18038	}
18039	}
18040	}
18041
18042	type = t->type;
18043	t = btf_type_skip_modifiers(btf, id: type, NULL);
18044	if (percpu) {
18045	aux->btf_var.reg_type = PTR_TO_BTF_ID \| MEM_PERCPU;
18046	aux->btf_var.btf = btf;
18047	aux->btf_var.btf_id = type;
18048	} else if (!btf_type_is_struct(t)) {
18049	const struct btf_type *ret;
18050	const char *tname;
18051	u32 tsize;
18052
18053	/ resolve the type size of ksym. /
18054	ret = btf_resolve_size(btf, type: t, type_size: &tsize);
18055	if (IS_ERR(ptr: ret)) {
18056	tname = btf_name_by_offset(btf, offset: t->name_off);
18057	verbose(private_data: env, fmt: "ldimm64 unable to resolve the size of type '%s': %ld\n",
18058	tname, PTR_ERR(ptr: ret));
18059	err = -EINVAL;
18060	goto err_put;
18061	}
18062	aux->btf_var.reg_type = PTR_TO_MEM \| MEM_RDONLY;
18063	aux->btf_var.mem_size = tsize;
18064	} else {
18065	aux->btf_var.reg_type = PTR_TO_BTF_ID;
18066	aux->btf_var.btf = btf;
18067	aux->btf_var.btf_id = type;
18068	}
18069	check_btf:
18070	/ check whether we recorded this BTF (and maybe module) already /
18071	for (i = `0`; i < env->used_btf_cnt; i++) {
18072	if (env->used_btfs[i].btf == btf) {
18073	btf_put(btf);
18074	return `0`;
18075	}
18076	}
18077
18078	if (env->used_btf_cnt >= MAX_USED_BTFS) {
18079	err = -E2BIG;
18080	goto err_put;
18081	}
18082
18083	btf_mod = &env->used_btfs[env->used_btf_cnt];
18084	btf_mod->btf = btf;
18085	btf_mod->module = NULL;
18086
18087	/ if we reference variables from kernel module, bump its refcount /
18088	if (btf_is_module(btf)) {
18089	btf_mod->module = btf_try_get_module(btf);
18090	if (!btf_mod->module) {
18091	err = -ENXIO;
18092	goto err_put;
18093	}
18094	}
18095
18096	env->used_btf_cnt++;
18097
18098	return `0`;
18099	err_put:
18100	btf_put(btf);
18101	return err;
18102	}
18103
18104	static bool is_tracing_prog_type(enum bpf_prog_type type)
18105	{
18106	switch (type) {
18107	case BPF_PROG_TYPE_KPROBE:
18108	case BPF_PROG_TYPE_TRACEPOINT:
18109	case BPF_PROG_TYPE_PERF_EVENT:
18110	case BPF_PROG_TYPE_RAW_TRACEPOINT:
18111	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
18112	return true;
18113	default:
18114	return false;
18115	}
18116	}
18117
18118	static int check_map_prog_compatibility(struct bpf_verifier_env *env,
18119	struct bpf_map *map,
18120	struct bpf_prog *prog)
18121
18122	{
18123	enum bpf_prog_type prog_type = resolve_prog_type(prog);
18124
18125	if (btf_record_has_field(rec: map->record, type: BPF_LIST_HEAD) \|\|
18126	btf_record_has_field(rec: map->record, type: BPF_RB_ROOT)) {
18127	if (is_tracing_prog_type(type: prog_type)) {
18128	verbose(private_data: env, fmt: "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
18129	return -EINVAL;
18130	}
18131	}
18132
18133	if (btf_record_has_field(rec: map->record, type: BPF_SPIN_LOCK)) {
18134	if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
18135	verbose(private_data: env, fmt: "socket filter progs cannot use bpf_spin_lock yet\n");
18136	return -EINVAL;
18137	}
18138
18139	if (is_tracing_prog_type(type: prog_type)) {
18140	verbose(private_data: env, fmt: "tracing progs cannot use bpf_spin_lock yet\n");
18141	return -EINVAL;
18142	}
18143	}
18144
18145	if (btf_record_has_field(rec: map->record, type: BPF_TIMER)) {
18146	if (is_tracing_prog_type(type: prog_type)) {
18147	verbose(private_data: env, fmt: "tracing progs cannot use bpf_timer yet\n");
18148	return -EINVAL;
18149	}
18150	}
18151
18152	if ((bpf_prog_is_offloaded(aux: prog->aux) \|\| bpf_map_is_offloaded(map)) &&
18153	!bpf_offload_prog_map_match(prog, map)) {
18154	verbose(private_data: env, fmt: "offload device mismatch between prog and map\n");
18155	return -EINVAL;
18156	}
18157
18158	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
18159	verbose(private_data: env, fmt: "bpf_struct_ops map cannot be used in prog\n");
18160	return -EINVAL;
18161	}
18162
18163	if (prog->sleepable)
18164	switch (map->map_type) {
18165	case BPF_MAP_TYPE_HASH:
18166	case BPF_MAP_TYPE_LRU_HASH:
18167	case BPF_MAP_TYPE_ARRAY:
18168	case BPF_MAP_TYPE_PERCPU_HASH:
18169	case BPF_MAP_TYPE_PERCPU_ARRAY:
18170	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
18171	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
18172	case BPF_MAP_TYPE_HASH_OF_MAPS:
18173	case BPF_MAP_TYPE_RINGBUF:
18174	case BPF_MAP_TYPE_USER_RINGBUF:
18175	case BPF_MAP_TYPE_INODE_STORAGE:
18176	case BPF_MAP_TYPE_SK_STORAGE:
18177	case BPF_MAP_TYPE_TASK_STORAGE:
18178	case BPF_MAP_TYPE_CGRP_STORAGE:
18179	case BPF_MAP_TYPE_QUEUE:
18180	case BPF_MAP_TYPE_STACK:
18181	case BPF_MAP_TYPE_ARENA:
18182	break;
18183	default:
18184	verbose(private_data: env,
18185	fmt: "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
18186	return -EINVAL;
18187	}
18188
18189	return `0`;
18190	}
18191
18192	static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
18193	{
18194	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE \|\|
18195	map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
18196	}
18197
18198	/ find and rewrite pseudo imm in ld_imm64 instructions:*
18199	*
18200	* 1. if it accesses map FD, replace it with actual map pointer.
18201	* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18202	*
18203	* NOTE: btf_vmlinux is required for converting pseudo btf_id.
18204	*/
18205	static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
18206	{
18207	struct bpf_insn *insn = env->prog->insnsi;
18208	int insn_cnt = env->prog->len;
18209	int i, j, err;
18210
18211	err = bpf_prog_calc_tag(fp: env->prog);
18212	if (err)
18213	return err;
18214
18215	for (i = `0`; i < insn_cnt; i++, insn++) {
18216	if (BPF_CLASS(insn->code) == BPF_LDX &&
18217	((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) \|\|
18218	insn->imm != `0`)) {
18219	verbose(private_data: env, fmt: "BPF_LDX uses reserved fields\n");
18220	return -EINVAL;
18221	}
18222
18223	if (insn[`0`].code == (BPF_LD \| BPF_IMM \| BPF_DW)) {
18224	struct bpf_insn_aux_data *aux;
18225	struct bpf_map *map;
18226	struct fd f;
18227	u64 addr;
18228	u32 fd;
18229
18230	if (i == insn_cnt - `1` \|\| insn[`1`].code != `0` \|\|
18231	insn[`1`].dst_reg != `0` \|\| insn[`1`].src_reg != `0` \|\|
18232	insn[`1`].off != `0`) {
18233	verbose(private_data: env, fmt: "invalid bpf_ld_imm64 insn\n");
18234	return -EINVAL;
18235	}
18236
18237	if (insn[`0`].src_reg == `0`)
18238	/ valid generic load 64-bit imm /
18239	goto next_insn;
18240
18241	if (insn[`0`].src_reg == BPF_PSEUDO_BTF_ID) {
18242	aux = &env->insn_aux_data[i];
18243	err = check_pseudo_btf_id(env, insn, aux);
18244	if (err)
18245	return err;
18246	goto next_insn;
18247	}
18248
18249	if (insn[`0`].src_reg == BPF_PSEUDO_FUNC) {
18250	aux = &env->insn_aux_data[i];
18251	aux->ptr_type = PTR_TO_FUNC;
18252	goto next_insn;
18253	}
18254
18255	/ In final convert_pseudo_ld_imm64() step, this is*
18256	* converted into regular 64-bit imm load insn.
18257	*/
18258	switch (insn[`0`].src_reg) {
18259	case BPF_PSEUDO_MAP_VALUE:
18260	case BPF_PSEUDO_MAP_IDX_VALUE:
18261	break;
18262	case BPF_PSEUDO_MAP_FD:
18263	case BPF_PSEUDO_MAP_IDX:
18264	if (insn[`1`].imm == `0`)
18265	break;
18266	fallthrough;
18267	default:
18268	verbose(private_data: env, fmt: "unrecognized bpf_ld_imm64 insn\n");
18269	return -EINVAL;
18270	}
18271
18272	switch (insn[`0`].src_reg) {
18273	case BPF_PSEUDO_MAP_IDX_VALUE:
18274	case BPF_PSEUDO_MAP_IDX:
18275	if (bpfptr_is_null(bpfptr: env->fd_array)) {
18276	verbose(private_data: env, fmt: "fd_idx without fd_array is invalid\n");
18277	return -EPROTO;
18278	}
18279	if (copy_from_bpfptr_offset(dst: &fd, src: env->fd_array,
18280	offset: insn[`0`].imm * sizeof(fd),
18281	size: sizeof(fd)))
18282	return -EFAULT;
18283	break;
18284	default:
18285	fd = insn[`0`].imm;
18286	break;
18287	}
18288
18289	f = fdget(fd);
18290	map = __bpf_map_get(f);
18291	if (IS_ERR(ptr: map)) {
18292	verbose(private_data: env, fmt: "fd %d is not pointing to valid bpf_map\n",
18293	insn[`0`].imm);
18294	return PTR_ERR(ptr: map);
18295	}
18296
18297	err = check_map_prog_compatibility(env, map, prog: env->prog);
18298	if (err) {
18299	fdput(fd: f);
18300	return err;
18301	}
18302
18303	aux = &env->insn_aux_data[i];
18304	if (insn[`0`].src_reg == BPF_PSEUDO_MAP_FD \|\|
18305	insn[`0`].src_reg == BPF_PSEUDO_MAP_IDX) {
18306	addr = (unsigned long)map;
18307	} else {
18308	u32 off = insn[`1`].imm;
18309
18310	if (off >= BPF_MAX_VAR_OFF) {
18311	verbose(private_data: env, fmt: "direct value offset of %u is not allowed\n", off);
18312	fdput(fd: f);
18313	return -EINVAL;
18314	}
18315
18316	if (!map->ops->map_direct_value_addr) {
18317	verbose(private_data: env, fmt: "no direct value access support for this map type\n");
18318	fdput(fd: f);
18319	return -EINVAL;
18320	}
18321
18322	err = map->ops->map_direct_value_addr(map, &addr, off);
18323	if (err) {
18324	verbose(private_data: env, fmt: "invalid access to map value pointer, value_size=%u off=%u\n",
18325	map->value_size, off);
18326	fdput(fd: f);
18327	return err;
18328	}
18329
18330	aux->map_off = off;
18331	addr += off;
18332	}
18333
18334	insn[`0`].imm = (u32)addr;
18335	insn[`1`].imm = addr >> `32`;
18336
18337	/ check whether we recorded this map already /
18338	for (j = `0`; j < env->used_map_cnt; j++) {
18339	if (env->used_maps[j] == map) {
18340	aux->map_index = j;
18341	fdput(fd: f);
18342	goto next_insn;
18343	}
18344	}
18345
18346	if (env->used_map_cnt >= MAX_USED_MAPS) {
18347	fdput(fd: f);
18348	return -E2BIG;
18349	}
18350
18351	if (env->prog->sleepable)
18352	atomic64_inc(v: &map->sleepable_refcnt);
18353	/ hold the map. If the program is rejected by verifier,*
18354	* the map will be released by release_maps() or it
18355	* will be used by the valid program until it's unloaded
18356	* and all maps are released in bpf_free_used_maps()
18357	*/
18358	bpf_map_inc(map);
18359
18360	aux->map_index = env->used_map_cnt;
18361	env->used_maps[env->used_map_cnt++] = map;
18362
18363	if (bpf_map_is_cgroup_storage(map) &&
18364	bpf_cgroup_storage_assign(aux: env->prog->aux, map)) {
18365	verbose(private_data: env, fmt: "only one cgroup storage of each type is allowed\n");
18366	fdput(fd: f);
18367	return -EBUSY;
18368	}
18369	if (map->map_type == BPF_MAP_TYPE_ARENA) {
18370	if (env->prog->aux->arena) {
18371	verbose(private_data: env, fmt: "Only one arena per program\n");
18372	fdput(fd: f);
18373	return -EBUSY;
18374	}
18375	if (!env->allow_ptr_leaks \|\| !env->bpf_capable) {
18376	verbose(private_data: env, fmt: "CAP_BPF and CAP_PERFMON are required to use arena\n");
18377	fdput(fd: f);
18378	return -EPERM;
18379	}
18380	if (!env->prog->jit_requested) {
18381	verbose(private_data: env, fmt: "JIT is required to use arena\n");
18382	fdput(fd: f);
18383	return -EOPNOTSUPP;
18384	}
18385	if (!bpf_jit_supports_arena()) {
18386	verbose(private_data: env, fmt: "JIT doesn't support arena\n");
18387	fdput(fd: f);
18388	return -EOPNOTSUPP;
18389	}
18390	env->prog->aux->arena = (void *)map;
18391	if (!bpf_arena_get_user_vm_start(arena: env->prog->aux->arena)) {
18392	verbose(private_data: env, fmt: "arena's user address must be set via map_extra or mmap()\n");
18393	fdput(fd: f);
18394	return -EINVAL;
18395	}
18396	}
18397
18398	fdput(fd: f);
18399	next_insn:
18400	insn++;
18401	i++;
18402	continue;
18403	}
18404
18405	/ Basic sanity check before we invest more work here. /
18406	if (!bpf_opcode_in_insntable(code: insn->code)) {
18407	verbose(private_data: env, fmt: "unknown opcode %02x\n", insn->code);
18408	return -EINVAL;
18409	}
18410	}
18411
18412	/ now all pseudo BPF_LD_IMM64 instructions load valid*
18413	* 'struct bpf_map *' into a register instead of user map_fd.
18414	* These pointers will be used later by verifier to validate map access.
18415	*/
18416	return `0`;
18417	}
18418
18419	/ drop refcnt of maps used by the rejected program /
18420	static void release_maps(struct bpf_verifier_env *env)
18421	{
18422	__bpf_free_used_maps(aux: env->prog->aux, used_maps: env->used_maps,
18423	len: env->used_map_cnt);
18424	}
18425
18426	/ drop refcnt of maps used by the rejected program /
18427	static void release_btfs(struct bpf_verifier_env *env)
18428	{
18429	__bpf_free_used_btfs(aux: env->prog->aux, used_btfs: env->used_btfs,
18430	len: env->used_btf_cnt);
18431	}
18432
18433	/ convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 /
18434	static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18435	{
18436	struct bpf_insn *insn = env->prog->insnsi;
18437	int insn_cnt = env->prog->len;
18438	int i;
18439
18440	for (i = `0`; i < insn_cnt; i++, insn++) {
18441	if (insn->code != (BPF_LD \| BPF_IMM \| BPF_DW))
18442	continue;
18443	if (insn->src_reg == BPF_PSEUDO_FUNC)
18444	continue;
18445	insn->src_reg = `0`;
18446	}
18447	}
18448
18449	/ single env->prog->insni[off] instruction was replaced with the range*
18450	* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
18451	* [0, off) and [off, end) to new locations, so the patched range stays zero
18452	*/
18453	static void adjust_insn_aux_data(struct bpf_verifier_env *env,
18454	struct bpf_insn_aux_data *new_data,
18455	struct bpf_prog *new_prog, u32 off, u32 cnt)
18456	{
18457	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
18458	struct bpf_insn *insn = new_prog->insnsi;
18459	u32 old_seen = old_data[off].seen;
18460	u32 prog_len;
18461	int i;
18462
18463	/ aux info at OFF always needs adjustment, no matter fast path*
18464	* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
18465	* original insn at old prog.
18466	*/
18467	old_data[off].zext_dst = insn_has_def32(env, insn: insn + off + cnt - `1`);
18468
18469	if (cnt == `1`)
18470	return;
18471	prog_len = new_prog->len;
18472
18473	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
18474	memcpy(new_data + off + cnt - `1`, old_data + off,
18475	sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + `1`));
18476	for (i = off; i < off + cnt - `1`; i++) {
18477	/ Expand insni[off]'s seen count to the patched range. /
18478	new_data[i].seen = old_seen;
18479	new_data[i].zext_dst = insn_has_def32(env, insn: insn + i);
18480	}
18481	env->insn_aux_data = new_data;
18482	vfree(addr: old_data);
18483	}
18484
18485	static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
18486	{
18487	int i;
18488
18489	if (len == `1`)
18490	return;
18491	/ NOTE: fake 'exit' subprog should be updated as well. /
18492	for (i = `0`; i <= env->subprog_cnt; i++) {
18493	if (env->subprog_info[i].start <= off)
18494	continue;
18495	env->subprog_info[i].start += len - `1`;
18496	}
18497	}
18498
18499	static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
18500	{
18501	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
18502	int i, sz = prog->aux->size_poke_tab;
18503	struct bpf_jit_poke_descriptor *desc;
18504
18505	for (i = `0`; i < sz; i++) {
18506	desc = &tab[i];
18507	if (desc->insn_idx <= off)
18508	continue;
18509	desc->insn_idx += len - `1`;
18510	}
18511	}
18512
18513	static struct bpf_prog bpf_patch_insn_data(struct* bpf_verifier_env *env, u32 off,
18514	const struct bpf_insn *patch, u32 len)
18515	{
18516	struct bpf_prog *new_prog;
18517	struct bpf_insn_aux_data *new_data = NULL;
18518
18519	if (len > `1`) {
18520	new_data = vzalloc(array_size(env->prog->len + len - `1`,
18521	sizeof(struct bpf_insn_aux_data)));
18522	if (!new_data)
18523	return NULL;
18524	}
18525
18526	new_prog = bpf_patch_insn_single(prog: env->prog, off, patch, len);
18527	if (IS_ERR(ptr: new_prog)) {
18528	if (PTR_ERR(ptr: new_prog) == -ERANGE)
18529	verbose(private_data: env,
18530	fmt: "insn %d cannot be patched due to 16-bit range\n",
18531	env->insn_aux_data[off].orig_idx);
18532	vfree(addr: new_data);
18533	return NULL;
18534	}
18535	adjust_insn_aux_data(env, new_data, new_prog, off, cnt: len);
18536	adjust_subprog_starts(env, off, len);
18537	adjust_poke_descs(prog: new_prog, off, len);
18538	return new_prog;
18539	}
18540
18541	static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
18542	u32 off, u32 cnt)
18543	{
18544	int i, j;
18545
18546	/ find first prog starting at or after off (first to remove) /
18547	for (i = `0`; i < env->subprog_cnt; i++)
18548	if (env->subprog_info[i].start >= off)
18549	break;
18550	/ find first prog starting at or after off + cnt (first to stay) /
18551	for (j = i; j < env->subprog_cnt; j++)
18552	if (env->subprog_info[j].start >= off + cnt)
18553	break;
18554	/ if j doesn't start exactly at off + cnt, we are just removing*
18555	* the front of previous prog
18556	*/
18557	if (env->subprog_info[j].start != off + cnt)
18558	j--;
18559
18560	if (j > i) {
18561	struct bpf_prog_aux *aux = env->prog->aux;
18562	int move;
18563
18564	/ move fake 'exit' subprog as well /
18565	move = env->subprog_cnt + `1` - j;
18566
18567	memmove(env->subprog_info + i,
18568	env->subprog_info + j,
18569	sizeof(env->subprog_info) move);
18570	env->subprog_cnt -= j - i;
18571
18572	/ remove func_info /
18573	if (aux->func_info) {
18574	move = aux->func_info_cnt - j;
18575
18576	memmove(aux->func_info + i,
18577	aux->func_info + j,
18578	sizeof(aux->func_info) move);
18579	aux->func_info_cnt -= j - i;
18580	/ func_info->insn_off is set after all code rewrites,*
18581	* in adjust_btf_func() - no need to adjust
18582	*/
18583	}
18584	} else {
18585	/ convert i from "first prog to remove" to "first to adjust" /
18586	if (env->subprog_info[i].start == off)
18587	i++;
18588	}
18589
18590	/ update fake 'exit' subprog as well /
18591	for (; i <= env->subprog_cnt; i++)
18592	env->subprog_info[i].start -= cnt;
18593
18594	return `0`;
18595	}
18596
18597	static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
18598	u32 cnt)
18599	{
18600	struct bpf_prog *prog = env->prog;
18601	u32 i, l_off, l_cnt, nr_linfo;
18602	struct bpf_line_info *linfo;
18603
18604	nr_linfo = prog->aux->nr_linfo;
18605	if (!nr_linfo)
18606	return `0`;
18607
18608	linfo = prog->aux->linfo;
18609
18610	/ find first line info to remove, count lines to be removed /
18611	for (i = `0`; i < nr_linfo; i++)
18612	if (linfo[i].insn_off >= off)
18613	break;
18614
18615	l_off = i;
18616	l_cnt = `0`;
18617	for (; i < nr_linfo; i++)
18618	if (linfo[i].insn_off < off + cnt)
18619	l_cnt++;
18620	else
18621	break;
18622
18623	/ First live insn doesn't match first live linfo, it needs to "inherit"*
18624	* last removed linfo. prog is already modified, so prog->len == off
18625	* means no live instructions after (tail of the program was removed).
18626	*/
18627	if (prog->len != off && l_cnt &&
18628	(i == nr_linfo \|\| linfo[i].insn_off != off + cnt)) {
18629	l_cnt--;
18630	linfo[--i].insn_off = off + cnt;
18631	}
18632
18633	/ remove the line info which refer to the removed instructions /
18634	if (l_cnt) {
18635	memmove(linfo + l_off, linfo + i,
18636	sizeof(linfo) (nr_linfo - i));
18637
18638	prog->aux->nr_linfo -= l_cnt;
18639	nr_linfo = prog->aux->nr_linfo;
18640	}
18641
18642	/ pull all linfo[i].insn_off >= off + cnt in by cnt /
18643	for (i = l_off; i < nr_linfo; i++)
18644	linfo[i].insn_off -= cnt;
18645
18646	/ fix up all subprogs (incl. 'exit') which start >= off /
18647	for (i = `0`; i <= env->subprog_cnt; i++)
18648	if (env->subprog_info[i].linfo_idx > l_off) {
18649	/ program may have started in the removed region but*
18650	* may not be fully removed
18651	*/
18652	if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
18653	env->subprog_info[i].linfo_idx -= l_cnt;
18654	else
18655	env->subprog_info[i].linfo_idx = l_off;
18656	}
18657
18658	return `0`;
18659	}
18660
18661	static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
18662	{
18663	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18664	unsigned int orig_prog_len = env->prog->len;
18665	int err;
18666
18667	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18668	bpf_prog_offload_remove_insns(env, off, cnt);
18669
18670	err = bpf_remove_insns(prog: env->prog, off, cnt);
18671	if (err)
18672	return err;
18673
18674	err = adjust_subprog_starts_after_remove(env, off, cnt);
18675	if (err)
18676	return err;
18677
18678	err = bpf_adj_linfo_after_remove(env, off, cnt);
18679	if (err)
18680	return err;
18681
18682	memmove(aux_data + off, aux_data + off + cnt,
18683	sizeof(aux_data) (orig_prog_len - off - cnt));
18684
18685	return `0`;
18686	}
18687
18688	/ The verifier does more data flow analysis than llvm and will not*
18689	* explore branches that are dead at run time. Malicious programs can
18690	* have dead code too. Therefore replace all dead at-run-time code
18691	* with 'ja -1'.
18692	*
18693	* Just nops are not optimal, e.g. if they would sit at the end of the
18694	* program and through another bug we would manage to jump there, then
18695	* we'd execute beyond program memory otherwise. Returning exception
18696	* code also wouldn't work since we can have subprogs where the dead
18697	* code could be located.
18698	*/
18699	static void sanitize_dead_code(struct bpf_verifier_env *env)
18700	{
18701	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18702	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, `0`, `0`, -`1`);
18703	struct bpf_insn *insn = env->prog->insnsi;
18704	const int insn_cnt = env->prog->len;
18705	int i;
18706
18707	for (i = `0`; i < insn_cnt; i++) {
18708	if (aux_data[i].seen)
18709	continue;
18710	memcpy(insn + i, &trap, sizeof(trap));
18711	aux_data[i].zext_dst = false;
18712	}
18713	}
18714
18715	static bool insn_is_cond_jump(u8 code)
18716	{
18717	u8 op;
18718
18719	op = BPF_OP(code);
18720	if (BPF_CLASS(code) == BPF_JMP32)
18721	return op != BPF_JA;
18722
18723	if (BPF_CLASS(code) != BPF_JMP)
18724	return false;
18725
18726	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
18727	}
18728
18729	static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
18730	{
18731	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18732	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, `0`, `0`, `0`);
18733	struct bpf_insn *insn = env->prog->insnsi;
18734	const int insn_cnt = env->prog->len;
18735	int i;
18736
18737	for (i = `0`; i < insn_cnt; i++, insn++) {
18738	if (!insn_is_cond_jump(code: insn->code))
18739	continue;
18740
18741	if (!aux_data[i + `1`].seen)
18742	ja.off = insn->off;
18743	else if (!aux_data[i + `1` + insn->off].seen)
18744	ja.off = `0`;
18745	else
18746	continue;
18747
18748	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18749	bpf_prog_offload_replace_insn(env, off: i, insn: &ja);
18750
18751	memcpy(insn, &ja, sizeof(ja));
18752	}
18753	}
18754
18755	static int opt_remove_dead_code(struct bpf_verifier_env *env)
18756	{
18757	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18758	int insn_cnt = env->prog->len;
18759	int i, err;
18760
18761	for (i = `0`; i < insn_cnt; i++) {
18762	int j;
18763
18764	j = `0`;
18765	while (i + j < insn_cnt && !aux_data[i + j].seen)
18766	j++;
18767	if (!j)
18768	continue;
18769
18770	err = verifier_remove_insns(env, off: i, cnt: j);
18771	if (err)
18772	return err;
18773	insn_cnt = env->prog->len;
18774	}
18775
18776	return `0`;
18777	}
18778
18779	static int opt_remove_nops(struct bpf_verifier_env *env)
18780	{
18781	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, `0`, `0`, `0`);
18782	struct bpf_insn *insn = env->prog->insnsi;
18783	int insn_cnt = env->prog->len;
18784	int i, err;
18785
18786	for (i = `0`; i < insn_cnt; i++) {
18787	if (memcmp(p: &insn[i], q: &ja, size: sizeof(ja)))
18788	continue;
18789
18790	err = verifier_remove_insns(env, off: i, cnt: `1`);
18791	if (err)
18792	return err;
18793	insn_cnt--;
18794	i--;
18795	}
18796
18797	return `0`;
18798	}
18799
18800	static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
18801	const union bpf_attr *attr)
18802	{
18803	struct bpf_insn *patch, zext_patch[`2`], rnd_hi32_patch[`4`];
18804	struct bpf_insn_aux_data *aux = env->insn_aux_data;
18805	int i, patch_len, delta = `0`, len = env->prog->len;
18806	struct bpf_insn *insns = env->prog->insnsi;
18807	struct bpf_prog *new_prog;
18808	bool rnd_hi32;
18809
18810	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
18811	zext_patch[`1`] = BPF_ZEXT_REG(`0`);
18812	rnd_hi32_patch[`1`] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, `0`);
18813	rnd_hi32_patch[`2`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, `32`);
18814	rnd_hi32_patch[`3`] = BPF_ALU64_REG(BPF_OR, `0`, BPF_REG_AX);
18815	for (i = `0`; i < len; i++) {
18816	int adj_idx = i + delta;
18817	struct bpf_insn insn;
18818	int load_reg;
18819
18820	insn = insns[adj_idx];
18821	load_reg = insn_def_regno(insn: &insn);
18822	if (!aux[adj_idx].zext_dst) {
18823	u8 code, class;
18824	u32 imm_rnd;
18825
18826	if (!rnd_hi32)
18827	continue;
18828
18829	code = insn.code;
18830	class = BPF_CLASS(code);
18831	if (load_reg == -`1`)
18832	continue;
18833
18834	/ NOTE: arg "reg" (the fourth one) is only used for*
18835	* BPF_STX + SRC_OP, so it is safe to pass NULL
18836	* here.
18837	*/
18838	if (is_reg64(env, insn: &insn, regno: load_reg, NULL, t: DST_OP)) {
18839	if (class == BPF_LD &&
18840	BPF_MODE(code) == BPF_IMM)
18841	i++;
18842	continue;
18843	}
18844
18845	/ ctx load could be transformed into wider load. /
18846	if (class == BPF_LDX &&
18847	aux[adj_idx].ptr_type == PTR_TO_CTX)
18848	continue;
18849
18850	imm_rnd = get_random_u32();
18851	rnd_hi32_patch[`0`] = insn;
18852	rnd_hi32_patch[`1`].imm = imm_rnd;
18853	rnd_hi32_patch[`3`].dst_reg = load_reg;
18854	patch = rnd_hi32_patch;
18855	patch_len = `4`;
18856	goto apply_patch_buffer;
18857	}
18858
18859	/ Add in an zero-extend instruction if a) the JIT has requested*
18860	* it or b) it's a CMPXCHG.
18861	*
18862	* The latter is because: BPF_CMPXCHG always loads a value into
18863	* R0, therefore always zero-extends. However some archs'
18864	* equivalent instruction only does this load when the
18865	* comparison is successful. This detail of CMPXCHG is
18866	* orthogonal to the general zero-extension behaviour of the
18867	* CPU, so it's treated independently of bpf_jit_needs_zext.
18868	*/
18869	if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(insn: &insn))
18870	continue;
18871
18872	/ Zero-extension is done by the caller. /
18873	if (bpf_pseudo_kfunc_call(insn: &insn))
18874	continue;
18875
18876	if (WARN_ON(load_reg == -`1`)) {
18877	verbose(private_data: env, fmt: "verifier bug. zext_dst is set, but no reg is defined\n");
18878	return -EFAULT;
18879	}
18880
18881	zext_patch[`0`] = insn;
18882	zext_patch[`1`].dst_reg = load_reg;
18883	zext_patch[`1`].src_reg = load_reg;
18884	patch = zext_patch;
18885	patch_len = `2`;
18886	apply_patch_buffer:
18887	new_prog = bpf_patch_insn_data(env, off: adj_idx, patch, len: patch_len);
18888	if (!new_prog)
18889	return -ENOMEM;
18890	env->prog = new_prog;
18891	insns = new_prog->insnsi;
18892	aux = env->insn_aux_data;
18893	delta += patch_len - `1`;
18894	}
18895
18896	return `0`;
18897	}
18898
18899	/ convert load instructions that access fields of a context type into a*
18900	* sequence of instructions that access fields of the underlying structure:
18901	* struct __sk_buff -> struct sk_buff
18902	* struct bpf_sock_ops -> struct sock
18903	*/
18904	static int convert_ctx_accesses(struct bpf_verifier_env *env)
18905	{
18906	const struct bpf_verifier_ops *ops = env->ops;
18907	int i, cnt, size, ctx_field_size, delta = `0`;
18908	const int insn_cnt = env->prog->len;
18909	struct bpf_insn insn_buf[`16`], *insn;
18910	u32 target_size, size_default, off;
18911	struct bpf_prog *new_prog;
18912	enum bpf_access_type type;
18913	bool is_narrower_load;
18914
18915	if (ops->gen_prologue \|\| env->seen_direct_write) {
18916	if (!ops->gen_prologue) {
18917	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
18918	return -EINVAL;
18919	}
18920	cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
18921	env->prog);
18922	if (cnt >= ARRAY_SIZE(insn_buf)) {
18923	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
18924	return -EINVAL;
18925	} else if (cnt) {
18926	new_prog = bpf_patch_insn_data(env, off: `0`, patch: insn_buf, len: cnt);
18927	if (!new_prog)
18928	return -ENOMEM;
18929
18930	env->prog = new_prog;
18931	delta += cnt - `1`;
18932	}
18933	}
18934
18935	if (bpf_prog_is_offloaded(aux: env->prog->aux))
18936	return `0`;
18937
18938	insn = env->prog->insnsi + delta;
18939
18940	for (i = `0`; i < insn_cnt; i++, insn++) {
18941	bpf_convert_ctx_access_t convert_ctx_access;
18942	u8 mode;
18943
18944	if (insn->code == (BPF_LDX \| BPF_MEM \| BPF_B) \|\|
18945	insn->code == (BPF_LDX \| BPF_MEM \| BPF_H) \|\|
18946	insn->code == (BPF_LDX \| BPF_MEM \| BPF_W) \|\|
18947	insn->code == (BPF_LDX \| BPF_MEM \| BPF_DW) \|\|
18948	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_B) \|\|
18949	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_H) \|\|
18950	insn->code == (BPF_LDX \| BPF_MEMSX \| BPF_W)) {
18951	type = BPF_READ;
18952	} else if (insn->code == (BPF_STX \| BPF_MEM \| BPF_B) \|\|
18953	insn->code == (BPF_STX \| BPF_MEM \| BPF_H) \|\|
18954	insn->code == (BPF_STX \| BPF_MEM \| BPF_W) \|\|
18955	insn->code == (BPF_STX \| BPF_MEM \| BPF_DW) \|\|
18956	insn->code == (BPF_ST \| BPF_MEM \| BPF_B) \|\|
18957	insn->code == (BPF_ST \| BPF_MEM \| BPF_H) \|\|
18958	insn->code == (BPF_ST \| BPF_MEM \| BPF_W) \|\|
18959	insn->code == (BPF_ST \| BPF_MEM \| BPF_DW)) {
18960	type = BPF_WRITE;
18961	} else {
18962	continue;
18963	}
18964
18965	if (type == BPF_WRITE &&
18966	env->insn_aux_data[i + delta].sanitize_stack_spill) {
18967	struct bpf_insn patch[] = {
18968	*insn,
18969	BPF_ST_NOSPEC(),
18970	};
18971
18972	cnt = ARRAY_SIZE(patch);
18973	new_prog = bpf_patch_insn_data(env, off: i + delta, patch, len: cnt);
18974	if (!new_prog)
18975	return -ENOMEM;
18976
18977	delta += cnt - `1`;
18978	env->prog = new_prog;
18979	insn = new_prog->insnsi + i + delta;
18980	continue;
18981	}
18982
18983	switch ((int)env->insn_aux_data[i + delta].ptr_type) {
18984	case PTR_TO_CTX:
18985	if (!ops->convert_ctx_access)
18986	continue;
18987	convert_ctx_access = ops->convert_ctx_access;
18988	break;
18989	case PTR_TO_SOCKET:
18990	case PTR_TO_SOCK_COMMON:
18991	convert_ctx_access = bpf_sock_convert_ctx_access;
18992	break;
18993	case PTR_TO_TCP_SOCK:
18994	convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
18995	break;
18996	case PTR_TO_XDP_SOCK:
18997	convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
18998	break;
18999	case PTR_TO_BTF_ID:
19000	case PTR_TO_BTF_ID \| PTR_UNTRUSTED:
19001	/ PTR_TO_BTF_ID \| MEM_ALLOC always has a valid lifetime, unlike*
19002	* PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
19003	* be said once it is marked PTR_UNTRUSTED, hence we must handle
19004	* any faults for loads into such types. BPF_WRITE is disallowed
19005	* for this case.
19006	*/
19007	case PTR_TO_BTF_ID \| MEM_ALLOC \| PTR_UNTRUSTED:
19008	if (type == BPF_READ) {
19009	if (BPF_MODE(insn->code) == BPF_MEM)
19010	insn->code = BPF_LDX \| BPF_PROBE_MEM \|
19011	BPF_SIZE((insn)->code);
19012	else
19013	insn->code = BPF_LDX \| BPF_PROBE_MEMSX \|
19014	BPF_SIZE((insn)->code);
19015	env->prog->aux->num_exentries++;
19016	}
19017	continue;
19018	case PTR_TO_ARENA:
19019	if (BPF_MODE(insn->code) == BPF_MEMSX) {
19020	verbose(private_data: env, fmt: "sign extending loads from arena are not supported yet\n");
19021	return -EOPNOTSUPP;
19022	}
19023	insn->code = BPF_CLASS(insn->code) \| BPF_PROBE_MEM32 \| BPF_SIZE(insn->code);
19024	env->prog->aux->num_exentries++;
19025	continue;
19026	default:
19027	continue;
19028	}
19029
19030	ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
19031	size = BPF_LDST_BYTES(insn);
19032	mode = BPF_MODE(insn->code);
19033
19034	/ If the read access is a narrower load of the field,*
19035	* convert to a 4/8-byte load, to minimum program type specific
19036	* convert_ctx_access changes. If conversion is successful,
19037	* we will apply proper mask to the result.
19038	*/
19039	is_narrower_load = size < ctx_field_size;
19040	size_default = bpf_ctx_off_adjust_machine(size: ctx_field_size);
19041	off = insn->off;
19042	if (is_narrower_load) {
19043	u8 size_code;
19044
19045	if (type == BPF_WRITE) {
19046	verbose(private_data: env, fmt: "bpf verifier narrow ctx access misconfigured\n");
19047	return -EINVAL;
19048	}
19049
19050	size_code = BPF_H;
19051	if (ctx_field_size == `4`)
19052	size_code = BPF_W;
19053	else if (ctx_field_size == `8`)
19054	size_code = BPF_DW;
19055
19056	insn->off = off & ~(size_default - `1`);
19057	insn->code = BPF_LDX \| BPF_MEM \| size_code;
19058	}
19059
19060	target_size = `0`;
19061	cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
19062	&target_size);
19063	if (cnt == `0` \|\| cnt >= ARRAY_SIZE(insn_buf) \|\|
19064	(ctx_field_size && !target_size)) {
19065	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19066	return -EINVAL;
19067	}
19068
19069	if (is_narrower_load && size < target_size) {
19070	u8 shift = bpf_ctx_narrow_access_offset(
19071	off, size, size_default) * `8`;
19072	if (shift && cnt + `1` >= ARRAY_SIZE(insn_buf)) {
19073	verbose(private_data: env, fmt: "bpf verifier narrow ctx load misconfigured\n");
19074	return -EINVAL;
19075	}
19076	if (ctx_field_size <= `4`) {
19077	if (shift)
19078	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
19079	insn->dst_reg,
19080	shift);
19081	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
19082	(`1` << size * `8`) - `1`);
19083	} else {
19084	if (shift)
19085	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
19086	insn->dst_reg,
19087	shift);
19088	insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
19089	(`1ULL` << size * `8`) - `1`);
19090	}
19091	}
19092	if (mode == BPF_MEMSX)
19093	insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 \| BPF_MOV \| BPF_X,
19094	insn->dst_reg, insn->dst_reg,
19095	size * `8`, `0`);
19096
19097	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19098	if (!new_prog)
19099	return -ENOMEM;
19100
19101	delta += cnt - `1`;
19102
19103	/ keep walking new program and skip insns we just inserted /
19104	env->prog = new_prog;
19105	insn = new_prog->insnsi + i + delta;
19106	}
19107
19108	return `0`;
19109	}
19110
19111	static int jit_subprogs(struct bpf_verifier_env *env)
19112	{
19113	struct bpf_prog prog = env->prog, func, tmp;
19114	int i, j, subprog_start, subprog_end = `0`, len, subprog;
19115	struct bpf_map *map_ptr;
19116	struct bpf_insn *insn;
19117	void *old_bpf_func;
19118	int err, num_exentries;
19119
19120	if (env->subprog_cnt <= `1`)
19121	return `0`;
19122
19123	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
19124	if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
19125	continue;
19126
19127	/ Upon error here we cannot fall back to interpreter but*
19128	* need a hard reject of the program. Thus -EFAULT is
19129	* propagated in any case.
19130	*/
19131	subprog = find_subprog(env, off: i + insn->imm + `1`);
19132	if (subprog < `0`) {
19133	WARN_ONCE(`1`, "verifier bug. No program starts at insn %d\n",
19134	i + insn->imm + `1`);
19135	return -EFAULT;
19136	}
19137	/ temporarily remember subprog id inside insn instead of*
19138	* aux_data, since next loop will split up all insns into funcs
19139	*/
19140	insn->off = subprog;
19141	/ remember original imm in case JIT fails and fallback*
19142	* to interpreter will be needed
19143	*/
19144	env->insn_aux_data[i].call_imm = insn->imm;
19145	/ point imm to __bpf_call_base+1 from JITs point of view /
19146	insn->imm = `1`;
19147	if (bpf_pseudo_func(insn))
19148	/ jit (e.g. x86_64) may emit fewer instructions*
19149	* if it learns a u32 imm is the same as a u64 imm.
19150	* Force a non zero here.
19151	*/
19152	insn[`1`].imm = `1`;
19153	}
19154
19155	err = bpf_prog_alloc_jited_linfo(prog);
19156	if (err)
19157	goto out_undo_insn;
19158
19159	err = -ENOMEM;
19160	func = kcalloc(n: env->subprog_cnt, size: sizeof(prog), GFP_KERNEL);
19161	if (!func)
19162	goto out_undo_insn;
19163
19164	for (i = `0`; i < env->subprog_cnt; i++) {
19165	subprog_start = subprog_end;
19166	subprog_end = env->subprog_info[i + `1`].start;
19167
19168	len = subprog_end - subprog_start;
19169	/ bpf_prog_run() doesn't call subprogs directly,*
19170	* hence main prog stats include the runtime of subprogs.
19171	* subprogs don't have IDs and not reachable via prog_get_next_id
19172	* func[i]->stats will never be accessed and stays NULL
19173	*/
19174	func[i] = bpf_prog_alloc_no_stats(size: bpf_prog_size(proglen: len), GFP_USER);
19175	if (!func[i])
19176	goto out_free;
19177	memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
19178	len * sizeof(struct bpf_insn));
19179	func[i]->type = prog->type;
19180	func[i]->len = len;
19181	if (bpf_prog_calc_tag(fp: func[i]))
19182	goto out_free;
19183	func[i]->is_func = `1`;
19184	func[i]->aux->func_idx = i;
19185	/ Below members will be freed only at prog->aux /
19186	func[i]->aux->btf = prog->aux->btf;
19187	func[i]->aux->func_info = prog->aux->func_info;
19188	func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
19189	func[i]->aux->poke_tab = prog->aux->poke_tab;
19190	func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
19191
19192	for (j = `0`; j < prog->aux->size_poke_tab; j++) {
19193	struct bpf_jit_poke_descriptor *poke;
19194
19195	poke = &prog->aux->poke_tab[j];
19196	if (poke->insn_idx < subprog_end &&
19197	poke->insn_idx >= subprog_start)
19198	poke->aux = func[i]->aux;
19199	}
19200
19201	func[i]->aux->name[`0`] = `'F'`;
19202	func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
19203	func[i]->jit_requested = `1`;
19204	func[i]->blinding_requested = prog->blinding_requested;
19205	func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
19206	func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
19207	func[i]->aux->linfo = prog->aux->linfo;
19208	func[i]->aux->nr_linfo = prog->aux->nr_linfo;
19209	func[i]->aux->jited_linfo = prog->aux->jited_linfo;
19210	func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
19211	func[i]->aux->arena = prog->aux->arena;
19212	num_exentries = `0`;
19213	insn = func[i]->insnsi;
19214	for (j = `0`; j < func[i]->len; j++, insn++) {
19215	if (BPF_CLASS(insn->code) == BPF_LDX &&
19216	(BPF_MODE(insn->code) == BPF_PROBE_MEM \|\|
19217	BPF_MODE(insn->code) == BPF_PROBE_MEM32 \|\|
19218	BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
19219	num_exentries++;
19220	if ((BPF_CLASS(insn->code) == BPF_STX \|\|
19221	BPF_CLASS(insn->code) == BPF_ST) &&
19222	BPF_MODE(insn->code) == BPF_PROBE_MEM32)
19223	num_exentries++;
19224	}
19225	func[i]->aux->num_exentries = num_exentries;
19226	func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
19227	func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
19228	if (!i)
19229	func[i]->aux->exception_boundary = env->seen_exception;
19230	func[i] = bpf_int_jit_compile(prog: func[i]);
19231	if (!func[i]->jited) {
19232	err = -ENOTSUPP;
19233	goto out_free;
19234	}
19235	cond_resched();
19236	}
19237
19238	/ at this point all bpf functions were successfully JITed*
19239	* now populate all bpf_calls with correct addresses and
19240	* run last pass of JIT
19241	*/
19242	for (i = `0`; i < env->subprog_cnt; i++) {
19243	insn = func[i]->insnsi;
19244	for (j = `0`; j < func[i]->len; j++, insn++) {
19245	if (bpf_pseudo_func(insn)) {
19246	subprog = insn->off;
19247	insn[`0`].imm = (u32)(long)func[subprog]->bpf_func;
19248	insn[`1`].imm = ((u64)(long)func[subprog]->bpf_func) >> `32`;
19249	continue;
19250	}
19251	if (!bpf_pseudo_call(insn))
19252	continue;
19253	subprog = insn->off;
19254	insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
19255	}
19256
19257	/ we use the aux data to keep a list of the start addresses*
19258	* of the JITed images for each function in the program
19259	*
19260	* for some architectures, such as powerpc64, the imm field
19261	* might not be large enough to hold the offset of the start
19262	* address of the callee's JITed image from __bpf_call_base
19263	*
19264	* in such cases, we can lookup the start address of a callee
19265	* by using its subprog id, available from the off field of
19266	* the call instruction, as an index for this list
19267	*/
19268	func[i]->aux->func = func;
19269	func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
19270	func[i]->aux->real_func_cnt = env->subprog_cnt;
19271	}
19272	for (i = `0`; i < env->subprog_cnt; i++) {
19273	old_bpf_func = func[i]->bpf_func;
19274	tmp = bpf_int_jit_compile(prog: func[i]);
19275	if (tmp != func[i] \|\| func[i]->bpf_func != old_bpf_func) {
19276	verbose(private_data: env, fmt: "JIT doesn't support bpf-to-bpf calls\n");
19277	err = -ENOTSUPP;
19278	goto out_free;
19279	}
19280	cond_resched();
19281	}
19282
19283	/ finally lock prog and jit images for all functions and*
19284	* populate kallsysm. Begin at the first subprogram, since
19285	* bpf_prog_load will add the kallsyms for the main program.
19286	*/
19287	for (i = `1`; i < env->subprog_cnt; i++) {
19288	bpf_prog_lock_ro(fp: func[i]);
19289	bpf_prog_kallsyms_add(fp: func[i]);
19290	}
19291
19292	/ Last step: make now unused interpreter insns from main*
19293	* prog consistent for later dump requests, so they can
19294	* later look the same as if they were interpreted only.
19295	*/
19296	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
19297	if (bpf_pseudo_func(insn)) {
19298	insn[`0`].imm = env->insn_aux_data[i].call_imm;
19299	insn[`1`].imm = insn->off;
19300	insn->off = `0`;
19301	continue;
19302	}
19303	if (!bpf_pseudo_call(insn))
19304	continue;
19305	insn->off = env->insn_aux_data[i].call_imm;
19306	subprog = find_subprog(env, off: i + insn->off + `1`);
19307	insn->imm = subprog;
19308	}
19309
19310	prog->jited = `1`;
19311	prog->bpf_func = func[`0`]->bpf_func;
19312	prog->jited_len = func[`0`]->jited_len;
19313	prog->aux->extable = func[`0`]->aux->extable;
19314	prog->aux->num_exentries = func[`0`]->aux->num_exentries;
19315	prog->aux->func = func;
19316	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
19317	prog->aux->real_func_cnt = env->subprog_cnt;
19318	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
19319	prog->aux->exception_boundary = func[`0`]->aux->exception_boundary;
19320	bpf_prog_jit_attempt_done(prog);
19321	return `0`;
19322	out_free:
19323	/ We failed JIT'ing, so at this point we need to unregister poke*
19324	* descriptors from subprogs, so that kernel is not attempting to
19325	* patch it anymore as we're freeing the subprog JIT memory.
19326	*/
19327	for (i = `0`; i < prog->aux->size_poke_tab; i++) {
19328	map_ptr = prog->aux->poke_tab[i].tail_call.map;
19329	map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
19330	}
19331	/ At this point we're guaranteed that poke descriptors are not*
19332	* live anymore. We can just unlink its descriptor table as it's
19333	* released with the main prog.
19334	*/
19335	for (i = `0`; i < env->subprog_cnt; i++) {
19336	if (!func[i])
19337	continue;
19338	func[i]->aux->poke_tab = NULL;
19339	bpf_jit_free(fp: func[i]);
19340	}
19341	kfree(objp: func);
19342	out_undo_insn:
19343	/ cleanup main prog to be interpreted /
19344	prog->jit_requested = `0`;
19345	prog->blinding_requested = `0`;
19346	for (i = `0`, insn = prog->insnsi; i < prog->len; i++, insn++) {
19347	if (!bpf_pseudo_call(insn))
19348	continue;
19349	insn->off = `0`;
19350	insn->imm = env->insn_aux_data[i].call_imm;
19351	}
19352	bpf_prog_jit_attempt_done(prog);
19353	return err;
19354	}
19355
19356	static int fixup_call_args(struct bpf_verifier_env *env)
19357	{
19358	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
19359	struct bpf_prog *prog = env->prog;
19360	struct bpf_insn *insn = prog->insnsi;
19361	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
19362	int i, depth;
19363	#endif
19364	int err = `0`;
19365
19366	if (env->prog->jit_requested &&
19367	!bpf_prog_is_offloaded(aux: env->prog->aux)) {
19368	err = jit_subprogs(env);
19369	if (err == `0`)
19370	return `0`;
19371	if (err == -EFAULT)
19372	return err;
19373	}
19374	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
19375	if (has_kfunc_call) {
19376	verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
19377	return -EINVAL;
19378	}
19379	if (env->subprog_cnt > `1` && env->prog->aux->tail_call_reachable) {
19380	/ When JIT fails the progs with bpf2bpf calls and tail_calls*
19381	* have to be rejected, since interpreter doesn't support them yet.
19382	*/
19383	verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
19384	return -EINVAL;
19385	}
19386	for (i = `0`; i < prog->len; i++, insn++) {
19387	if (bpf_pseudo_func(insn)) {
19388	/ When JIT fails the progs with callback calls*
19389	* have to be rejected, since interpreter doesn't support them yet.
19390	*/
19391	verbose(env, "callbacks are not allowed in non-JITed programs\n");
19392	return -EINVAL;
19393	}
19394
19395	if (!bpf_pseudo_call(insn))
19396	continue;
19397	depth = get_callee_stack_depth(env, insn, i);
19398	if (depth < `0`)
19399	return depth;
19400	bpf_patch_call_args(insn, depth);
19401	}
19402	err = `0`;
19403	#endif
19404	return err;
19405	}
19406
19407	/ replace a generic kfunc with a specialized version if necessary /
19408	static void specialize_kfunc(struct bpf_verifier_env *env,
19409	u32 func_id, u16 offset, unsigned long *addr)
19410	{
19411	struct bpf_prog *prog = env->prog;
19412	bool seen_direct_write;
19413	void *xdp_kfunc;
19414	bool is_rdonly;
19415
19416	if (bpf_dev_bound_kfunc_id(btf_id: func_id)) {
19417	xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19418	if (xdp_kfunc) {
19419	addr = (unsigned* long)xdp_kfunc;
19420	return;
19421	}
19422	/ fallback to default kfunc when not supported by netdev /
19423	}
19424
19425	if (offset)
19426	return;
19427
19428	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19429	seen_direct_write = env->seen_direct_write;
19430	is_rdonly = !may_access_direct_pkt_data(env, NULL, t: BPF_WRITE);
19431
19432	if (is_rdonly)
19433	addr = (unsigned* long)bpf_dynptr_from_skb_rdonly;
19434
19435	/ restore env->seen_direct_write to its original value, since*
19436	* may_access_direct_pkt_data mutates it
19437	*/
19438	env->seen_direct_write = seen_direct_write;
19439	}
19440	}
19441
19442	static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19443	u16 struct_meta_reg,
19444	u16 node_offset_reg,
19445	struct bpf_insn *insn,
19446	struct bpf_insn *insn_buf,
19447	int *cnt)
19448	{
19449	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19450	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19451
19452	insn_buf[`0`] = addr[`0`];
19453	insn_buf[`1`] = addr[`1`];
19454	insn_buf[`2`] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19455	insn_buf[`3`] = *insn;
19456	*cnt = `4`;
19457	}
19458
19459	static int fixup_kfunc_call(struct bpf_verifier_env env, struct* bpf_insn *insn,
19460	struct bpf_insn insn_buf, int* insn_idx, int *cnt)
19461	{
19462	const struct bpf_kfunc_desc *desc;
19463
19464	if (!insn->imm) {
19465	verbose(private_data: env, fmt: "invalid kernel function call not eliminated in verifier pass\n");
19466	return -EINVAL;
19467	}
19468
19469	*cnt = `0`;
19470
19471	/ insn->imm has the btf func_id. Replace it with an offset relative to*
19472	* __bpf_call_base, unless the JIT needs to call functions that are
19473	* further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19474	*/
19475	desc = find_kfunc_desc(prog: env->prog, func_id: insn->imm, offset: insn->off);
19476	if (!desc) {
19477	verbose(private_data: env, fmt: "verifier internal error: kernel function descriptor not found for func_id %u\n",
19478	insn->imm);
19479	return -EFAULT;
19480	}
19481
19482	if (!bpf_jit_supports_far_kfunc_call())
19483	insn->imm = BPF_CALL_IMM(desc->addr);
19484	if (insn->off)
19485	return `0`;
19486	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] \|\|
19487	desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
19488	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19489	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19490	u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19491
19492	if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
19493	verbose(private_data: env, fmt: "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19494	insn_idx);
19495	return -EFAULT;
19496	}
19497
19498	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19499	insn_buf[`1`] = addr[`0`];
19500	insn_buf[`2`] = addr[`1`];
19501	insn_buf[`3`] = *insn;
19502	*cnt = `4`;
19503	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] \|\|
19504	desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] \|\|
19505	desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
19506	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19507	struct bpf_insn addr[`2`] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19508
19509	if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
19510	verbose(private_data: env, fmt: "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19511	insn_idx);
19512	return -EFAULT;
19513	}
19514
19515	if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
19516	!kptr_struct_meta) {
19517	verbose(private_data: env, fmt: "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19518	insn_idx);
19519	return -EFAULT;
19520	}
19521
19522	insn_buf[`0`] = addr[`0`];
19523	insn_buf[`1`] = addr[`1`];
19524	insn_buf[`2`] = *insn;
19525	*cnt = `3`;
19526	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] \|\|
19527	desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] \|\|
19528	desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19529	struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19530	int struct_meta_reg = BPF_REG_3;
19531	int node_offset_reg = BPF_REG_4;
19532
19533	/ rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs /
19534	if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19535	struct_meta_reg = BPF_REG_4;
19536	node_offset_reg = BPF_REG_5;
19537	}
19538
19539	if (!kptr_struct_meta) {
19540	verbose(private_data: env, fmt: "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19541	insn_idx);
19542	return -EFAULT;
19543	}
19544
19545	__fixup_collection_insert_kfunc(insn_aux: &env->insn_aux_data[insn_idx], struct_meta_reg,
19546	node_offset_reg, insn, insn_buf, cnt);
19547	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] \|\|
19548	desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19549	insn_buf[`0`] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19550	*cnt = `1`;
19551	}
19552	return `0`;
19553	}
19554
19555	/ The function requires that first instruction in 'patch' is insnsi[prog->len - 1] /
19556	static int add_hidden_subprog(struct bpf_verifier_env env, struct* bpf_insn patch, int* len)
19557	{
19558	struct bpf_subprog_info *info = env->subprog_info;
19559	int cnt = env->subprog_cnt;
19560	struct bpf_prog *prog;
19561
19562	/ We only reserve one slot for hidden subprogs in subprog_info. /
19563	if (env->hidden_subprog_cnt) {
19564	verbose(private_data: env, fmt: "verifier internal error: only one hidden subprog supported\n");
19565	return -EFAULT;
19566	}
19567	/ We're not patching any existing instruction, just appending the new*
19568	* ones for the hidden subprog. Hence all of the adjustment operations
19569	* in bpf_patch_insn_data are no-ops.
19570	*/
19571	prog = bpf_patch_insn_data(env, off: env->prog->len - `1`, patch, len);
19572	if (!prog)
19573	return -ENOMEM;
19574	env->prog = prog;
19575	info[cnt + `1`].start = info[cnt].start;
19576	info[cnt].start = prog->len - len + `1`;
19577	env->subprog_cnt++;
19578	env->hidden_subprog_cnt++;
19579	return `0`;
19580	}
19581
19582	/ Do various post-verification rewrites in a single program pass.*
19583	* These rewrites simplify JIT and interpreter implementations.
19584	*/
19585	static int do_misc_fixups(struct bpf_verifier_env *env)
19586	{
19587	struct bpf_prog *prog = env->prog;
19588	enum bpf_attach_type eatype = prog->expected_attach_type;
19589	enum bpf_prog_type prog_type = resolve_prog_type(prog);
19590	struct bpf_insn *insn = prog->insnsi;
19591	const struct bpf_func_proto *fn;
19592	const int insn_cnt = prog->len;
19593	const struct bpf_map_ops *ops;
19594	struct bpf_insn_aux_data *aux;
19595	struct bpf_insn insn_buf[`16`];
19596	struct bpf_prog *new_prog;
19597	struct bpf_map *map_ptr;
19598	int i, ret, cnt, delta = `0`, cur_subprog = `0`;
19599	struct bpf_subprog_info *subprogs = env->subprog_info;
19600	u16 stack_depth = subprogs[cur_subprog].stack_depth;
19601	u16 stack_depth_extra = `0`;
19602
19603	if (env->seen_exception && !env->exception_callback_subprog) {
19604	struct bpf_insn patch[] = {
19605	env->prog->insnsi[insn_cnt - `1`],
19606	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
19607	BPF_EXIT_INSN(),
19608	};
19609
19610	ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
19611	if (ret < `0`)
19612	return ret;
19613	prog = env->prog;
19614	insn = prog->insnsi;
19615
19616	env->exception_callback_subprog = env->subprog_cnt - `1`;
19617	/ Don't update insn_cnt, as add_hidden_subprog always appends insns /
19618	mark_subprog_exc_cb(env, subprog: env->exception_callback_subprog);
19619	}
19620
19621	for (i = `0`; i < insn_cnt;) {
19622	if (insn->code == (BPF_ALU64 \| BPF_MOV \| BPF_X) && insn->imm) {
19623	if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == `1`) \|\|
19624	(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
19625	/ convert to 32-bit mov that clears upper 32-bit /
19626	insn->code = BPF_ALU \| BPF_MOV \| BPF_X;
19627	/ clear off and imm, so it's a normal 'wX = wY' from JIT pov /
19628	insn->off = `0`;
19629	insn->imm = `0`;
19630	} / cast from as(0) to as(1) should be handled by JIT /
19631	goto next_insn;
19632	}
19633
19634	if (env->insn_aux_data[i + delta].needs_zext)
19635	/ Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU /
19636	insn->code = BPF_ALU \| BPF_OP(insn->code) \| BPF_SRC(insn->code);
19637
19638	/ Make divide-by-zero exceptions impossible. /
19639	if (insn->code == (BPF_ALU64 \| BPF_MOD \| BPF_X) \|\|
19640	insn->code == (BPF_ALU64 \| BPF_DIV \| BPF_X) \|\|
19641	insn->code == (BPF_ALU \| BPF_MOD \| BPF_X) \|\|
19642	insn->code == (BPF_ALU \| BPF_DIV \| BPF_X)) {
19643	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
19644	bool isdiv = BPF_OP(insn->code) == BPF_DIV;
19645	struct bpf_insn *patchlet;
19646	struct bpf_insn chk_and_div[] = {
19647	/ [R,W]x div 0 -> 0 /
19648	BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) \|
19649	BPF_JNE \| BPF_K, insn->src_reg,
19650	`0`, `2`, `0`),
19651	BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
19652	BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`),
19653	*insn,
19654	};
19655	struct bpf_insn chk_and_mod[] = {
19656	/ [R,W]x mod 0 -> [R,W]x /
19657	BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) \|
19658	BPF_JEQ \| BPF_K, insn->src_reg,
19659	`0`, `1` + (is64 ? `0` : `1`), `0`),
19660	*insn,
19661	BPF_JMP_IMM(BPF_JA, `0`, `0`, `1`),
19662	BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
19663	};
19664
19665	patchlet = isdiv ? chk_and_div : chk_and_mod;
19666	cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
19667	ARRAY_SIZE(chk_and_mod) - (is64 ? `2` : `0`);
19668
19669	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: patchlet, len: cnt);
19670	if (!new_prog)
19671	return -ENOMEM;
19672
19673	delta += cnt - `1`;
19674	env->prog = prog = new_prog;
19675	insn = new_prog->insnsi + i + delta;
19676	goto next_insn;
19677	}
19678
19679	/ Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. /
19680	if (BPF_CLASS(insn->code) == BPF_LD &&
19681	(BPF_MODE(insn->code) == BPF_ABS \|\|
19682	BPF_MODE(insn->code) == BPF_IND)) {
19683	cnt = env->ops->gen_ld_abs(insn, insn_buf);
19684	if (cnt == `0` \|\| cnt >= ARRAY_SIZE(insn_buf)) {
19685	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19686	return -EINVAL;
19687	}
19688
19689	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19690	if (!new_prog)
19691	return -ENOMEM;
19692
19693	delta += cnt - `1`;
19694	env->prog = prog = new_prog;
19695	insn = new_prog->insnsi + i + delta;
19696	goto next_insn;
19697	}
19698
19699	/ Rewrite pointer arithmetic to mitigate speculation attacks. /
19700	if (insn->code == (BPF_ALU64 \| BPF_ADD \| BPF_X) \|\|
19701	insn->code == (BPF_ALU64 \| BPF_SUB \| BPF_X)) {
19702	const u8 code_add = BPF_ALU64 \| BPF_ADD \| BPF_X;
19703	const u8 code_sub = BPF_ALU64 \| BPF_SUB \| BPF_X;
19704	struct bpf_insn *patch = &insn_buf[`0`];
19705	bool issrc, isneg, isimm;
19706	u32 off_reg;
19707
19708	aux = &env->insn_aux_data[i + delta];
19709	if (!aux->alu_state \|\|
19710	aux->alu_state == BPF_ALU_NON_POINTER)
19711	goto next_insn;
19712
19713	isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
19714	issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
19715	BPF_ALU_SANITIZE_SRC;
19716	isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
19717
19718	off_reg = issrc ? insn->src_reg : insn->dst_reg;
19719	if (isimm) {
19720	*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
19721	} else {
19722	if (isneg)
19723	*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -`1`);
19724	*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
19725	*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
19726	*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
19727	*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, `0`);
19728	*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, `63`);
19729	*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
19730	}
19731	if (!issrc)
19732	*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
19733	insn->src_reg = BPF_REG_AX;
19734	if (isneg)
19735	insn->code = insn->code == code_add ?
19736	code_sub : code_add;
19737	patch++ = insn;
19738	if (issrc && isneg && !isimm)
19739	*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -`1`);
19740	cnt = patch - insn_buf;
19741
19742	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19743	if (!new_prog)
19744	return -ENOMEM;
19745
19746	delta += cnt - `1`;
19747	env->prog = prog = new_prog;
19748	insn = new_prog->insnsi + i + delta;
19749	goto next_insn;
19750	}
19751
19752	if (is_may_goto_insn(insn)) {
19753	int stack_off = -stack_depth - `8`;
19754
19755	stack_depth_extra = `8`;
19756	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
19757	insn_buf[`1`] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, `0`, insn->off + `2`);
19758	insn_buf[`2`] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, `1`);
19759	insn_buf[`3`] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
19760	cnt = `4`;
19761
19762	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19763	if (!new_prog)
19764	return -ENOMEM;
19765
19766	delta += cnt - `1`;
19767	env->prog = prog = new_prog;
19768	insn = new_prog->insnsi + i + delta;
19769	goto next_insn;
19770	}
19771
19772	if (insn->code != (BPF_JMP \| BPF_CALL))
19773	goto next_insn;
19774	if (insn->src_reg == BPF_PSEUDO_CALL)
19775	goto next_insn;
19776	if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
19777	ret = fixup_kfunc_call(env, insn, insn_buf, insn_idx: i + delta, cnt: &cnt);
19778	if (ret)
19779	return ret;
19780	if (cnt == `0`)
19781	goto next_insn;
19782
19783	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19784	if (!new_prog)
19785	return -ENOMEM;
19786
19787	delta += cnt - `1`;
19788	env->prog = prog = new_prog;
19789	insn = new_prog->insnsi + i + delta;
19790	goto next_insn;
19791	}
19792
19793	if (insn->imm == BPF_FUNC_get_route_realm)
19794	prog->dst_needed = `1`;
19795	if (insn->imm == BPF_FUNC_get_prandom_u32)
19796	bpf_user_rnd_init_once();
19797	if (insn->imm == BPF_FUNC_override_return)
19798	prog->kprobe_override = `1`;
19799	if (insn->imm == BPF_FUNC_tail_call) {
19800	/ If we tail call into other programs, we*
19801	* cannot make any assumptions since they can
19802	* be replaced dynamically during runtime in
19803	* the program array.
19804	*/
19805	prog->cb_access = `1`;
19806	if (!allow_tail_call_in_subprogs(env))
19807	prog->aux->stack_depth = MAX_BPF_STACK;
19808	prog->aux->max_pkt_offset = MAX_PACKET_OFF;
19809
19810	/ mark bpf_tail_call as different opcode to avoid*
19811	* conditional branch in the interpreter for every normal
19812	* call and to prevent accidental JITing by JIT compiler
19813	* that doesn't support bpf_tail_call yet
19814	*/
19815	insn->imm = `0`;
19816	insn->code = BPF_JMP \| BPF_TAIL_CALL;
19817
19818	aux = &env->insn_aux_data[i + delta];
19819	if (env->bpf_capable && !prog->blinding_requested &&
19820	prog->jit_requested &&
19821	!bpf_map_key_poisoned(aux) &&
19822	!bpf_map_ptr_poisoned(aux) &&
19823	!bpf_map_ptr_unpriv(aux)) {
19824	struct bpf_jit_poke_descriptor desc = {
19825	.reason = BPF_POKE_REASON_TAIL_CALL,
19826	.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
19827	.tail_call.key = bpf_map_key_immediate(aux),
19828	.insn_idx = i + delta,
19829	};
19830
19831	ret = bpf_jit_add_poke_descriptor(prog, poke: &desc);
19832	if (ret < `0`) {
19833	verbose(private_data: env, fmt: "adding tail call poke descriptor failed\n");
19834	return ret;
19835	}
19836
19837	insn->imm = ret + `1`;
19838	goto next_insn;
19839	}
19840
19841	if (!bpf_map_ptr_unpriv(aux))
19842	goto next_insn;
19843
19844	/ instead of changing every JIT dealing with tail_call*
19845	* emit two extra insns:
19846	* if (index >= max_entries) goto out;
19847	* index &= array->index_mask;
19848	* to avoid out-of-bounds cpu speculation
19849	*/
19850	if (bpf_map_ptr_poisoned(aux)) {
19851	verbose(private_data: env, fmt: "tail_call abusing map_ptr\n");
19852	return -EINVAL;
19853	}
19854
19855	map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
19856	insn_buf[`0`] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
19857	map_ptr->max_entries, `2`);
19858	insn_buf[`1`] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
19859	container_of(map_ptr,
19860	struct bpf_array,
19861	map)->index_mask);
19862	insn_buf[`2`] = *insn;
19863	cnt = `3`;
19864	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19865	if (!new_prog)
19866	return -ENOMEM;
19867
19868	delta += cnt - `1`;
19869	env->prog = prog = new_prog;
19870	insn = new_prog->insnsi + i + delta;
19871	goto next_insn;
19872	}
19873
19874	if (insn->imm == BPF_FUNC_timer_set_callback) {
19875	/ The verifier will process callback_fn as many times as necessary*
19876	* with different maps and the register states prepared by
19877	* set_timer_callback_state will be accurate.
19878	*
19879	* The following use case is valid:
19880	* map1 is shared by prog1, prog2, prog3.
19881	* prog1 calls bpf_timer_init for some map1 elements
19882	* prog2 calls bpf_timer_set_callback for some map1 elements.
19883	* Those that were not bpf_timer_init-ed will return -EINVAL.
19884	* prog3 calls bpf_timer_start for some map1 elements.
19885	* Those that were not both bpf_timer_init-ed and
19886	* bpf_timer_set_callback-ed will return -EINVAL.
19887	*/
19888	struct bpf_insn ld_addrs[`2`] = {
19889	BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
19890	};
19891
19892	insn_buf[`0`] = ld_addrs[`0`];
19893	insn_buf[`1`] = ld_addrs[`1`];
19894	insn_buf[`2`] = *insn;
19895	cnt = `3`;
19896
19897	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19898	if (!new_prog)
19899	return -ENOMEM;
19900
19901	delta += cnt - `1`;
19902	env->prog = prog = new_prog;
19903	insn = new_prog->insnsi + i + delta;
19904	goto patch_call_imm;
19905	}
19906
19907	if (is_storage_get_function(func_id: insn->imm)) {
19908	if (!in_sleepable(env) \|\|
19909	env->insn_aux_data[i + delta].storage_get_func_atomic)
19910	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
19911	else
19912	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
19913	insn_buf[`1`] = *insn;
19914	cnt = `2`;
19915
19916	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19917	if (!new_prog)
19918	return -ENOMEM;
19919
19920	delta += cnt - `1`;
19921	env->prog = prog = new_prog;
19922	insn = new_prog->insnsi + i + delta;
19923	goto patch_call_imm;
19924	}
19925
19926	/ bpf_per_cpu_ptr() and bpf_this_cpu_ptr() /
19927	if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
19928	/ patch with 'r1 = (u64 )(r1 + 0)' since for percpu data,*
19929	* bpf_mem_alloc() returns a ptr to the percpu data ptr.
19930	*/
19931	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, `0`);
19932	insn_buf[`1`] = *insn;
19933	cnt = `2`;
19934
19935	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
19936	if (!new_prog)
19937	return -ENOMEM;
19938
19939	delta += cnt - `1`;
19940	env->prog = prog = new_prog;
19941	insn = new_prog->insnsi + i + delta;
19942	goto patch_call_imm;
19943	}
19944
19945	/ BPF_EMIT_CALL() assumptions in some of the map_gen_lookup*
19946	* and other inlining handlers are currently limited to 64 bit
19947	* only.
19948	*/
19949	if (prog->jit_requested && BITS_PER_LONG == `64` &&
19950	(insn->imm == BPF_FUNC_map_lookup_elem \|\|
19951	insn->imm == BPF_FUNC_map_update_elem \|\|
19952	insn->imm == BPF_FUNC_map_delete_elem \|\|
19953	insn->imm == BPF_FUNC_map_push_elem \|\|
19954	insn->imm == BPF_FUNC_map_pop_elem \|\|
19955	insn->imm == BPF_FUNC_map_peek_elem \|\|
19956	insn->imm == BPF_FUNC_redirect_map \|\|
19957	insn->imm == BPF_FUNC_for_each_map_elem \|\|
19958	insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
19959	aux = &env->insn_aux_data[i + delta];
19960	if (bpf_map_ptr_poisoned(aux))
19961	goto patch_call_imm;
19962
19963	map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
19964	ops = map_ptr->ops;
19965	if (insn->imm == BPF_FUNC_map_lookup_elem &&
19966	ops->map_gen_lookup) {
19967	cnt = ops->map_gen_lookup(map_ptr, insn_buf);
19968	if (cnt == -EOPNOTSUPP)
19969	goto patch_map_ops_generic;
19970	if (cnt <= `0` \|\| cnt >= ARRAY_SIZE(insn_buf)) {
19971	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
19972	return -EINVAL;
19973	}
19974
19975	new_prog = bpf_patch_insn_data(env, off: i + delta,
19976	patch: insn_buf, len: cnt);
19977	if (!new_prog)
19978	return -ENOMEM;
19979
19980	delta += cnt - `1`;
19981	env->prog = prog = new_prog;
19982	insn = new_prog->insnsi + i + delta;
19983	goto next_insn;
19984	}
19985
19986	BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
19987	(void ()(struct bpf_map map, void* *key))NULL));
19988	BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
19989	(long ()(struct* bpf_map map, void* *key))NULL));
19990	BUILD_BUG_ON(!__same_type(ops->map_update_elem,
19991	(long ()(struct* bpf_map map, void* key, void* *value,
19992	u64 flags))NULL));
19993	BUILD_BUG_ON(!__same_type(ops->map_push_elem,
19994	(long ()(struct* bpf_map map, void* *value,
19995	u64 flags))NULL));
19996	BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
19997	(long ()(struct* bpf_map map, void* *value))NULL));
19998	BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
19999	(long ()(struct* bpf_map map, void* *value))NULL));
20000	BUILD_BUG_ON(!__same_type(ops->map_redirect,
20001	(long ()(struct* bpf_map *map, u64 index, u64 flags))NULL));
20002	BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
20003	(long ()(struct* bpf_map *map,
20004	bpf_callback_t callback_fn,
20005	void *callback_ctx,
20006	u64 flags))NULL));
20007	BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
20008	(void ()(struct bpf_map map, void* *key, u32 cpu))NULL));
20009
20010	patch_map_ops_generic:
20011	switch (insn->imm) {
20012	case BPF_FUNC_map_lookup_elem:
20013	insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
20014	goto next_insn;
20015	case BPF_FUNC_map_update_elem:
20016	insn->imm = BPF_CALL_IMM(ops->map_update_elem);
20017	goto next_insn;
20018	case BPF_FUNC_map_delete_elem:
20019	insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
20020	goto next_insn;
20021	case BPF_FUNC_map_push_elem:
20022	insn->imm = BPF_CALL_IMM(ops->map_push_elem);
20023	goto next_insn;
20024	case BPF_FUNC_map_pop_elem:
20025	insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
20026	goto next_insn;
20027	case BPF_FUNC_map_peek_elem:
20028	insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
20029	goto next_insn;
20030	case BPF_FUNC_redirect_map:
20031	insn->imm = BPF_CALL_IMM(ops->map_redirect);
20032	goto next_insn;
20033	case BPF_FUNC_for_each_map_elem:
20034	insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
20035	goto next_insn;
20036	case BPF_FUNC_map_lookup_percpu_elem:
20037	insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
20038	goto next_insn;
20039	}
20040
20041	goto patch_call_imm;
20042	}
20043
20044	/ Implement bpf_jiffies64 inline. /
20045	if (prog->jit_requested && BITS_PER_LONG == `64` &&
20046	insn->imm == BPF_FUNC_jiffies64) {
20047	struct bpf_insn ld_jiffies_addr[`2`] = {
20048	BPF_LD_IMM64(BPF_REG_0,
20049	(unsigned long)&jiffies),
20050	};
20051
20052	insn_buf[`0`] = ld_jiffies_addr[`0`];
20053	insn_buf[`1`] = ld_jiffies_addr[`1`];
20054	insn_buf[`2`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
20055	BPF_REG_0, `0`);
20056	cnt = `3`;
20057
20058	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf,
20059	len: cnt);
20060	if (!new_prog)
20061	return -ENOMEM;
20062
20063	delta += cnt - `1`;
20064	env->prog = prog = new_prog;
20065	insn = new_prog->insnsi + i + delta;
20066	goto next_insn;
20067	}
20068
20069	/ Implement bpf_get_func_arg inline. /
20070	if (prog_type == BPF_PROG_TYPE_TRACING &&
20071	insn->imm == BPF_FUNC_get_func_arg) {
20072	/ Load nr_args from ctx - 8 /
20073	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
20074	insn_buf[`1`] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, `6`);
20075	insn_buf[`2`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, `3`);
20076	insn_buf[`3`] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
20077	insn_buf[`4`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, `0`);
20078	insn_buf[`5`] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, `0`);
20079	insn_buf[`6`] = BPF_MOV64_IMM(BPF_REG_0, `0`);
20080	insn_buf[`7`] = BPF_JMP_A(`1`);
20081	insn_buf[`8`] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
20082	cnt = `9`;
20083
20084	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
20085	if (!new_prog)
20086	return -ENOMEM;
20087
20088	delta += cnt - `1`;
20089	env->prog = prog = new_prog;
20090	insn = new_prog->insnsi + i + delta;
20091	goto next_insn;
20092	}
20093
20094	/ Implement bpf_get_func_ret inline. /
20095	if (prog_type == BPF_PROG_TYPE_TRACING &&
20096	insn->imm == BPF_FUNC_get_func_ret) {
20097	if (eatype == BPF_TRACE_FEXIT \|\|
20098	eatype == BPF_MODIFY_RETURN) {
20099	/ Load nr_args from ctx - 8 /
20100	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
20101	insn_buf[`1`] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, `3`);
20102	insn_buf[`2`] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
20103	insn_buf[`3`] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, `0`);
20104	insn_buf[`4`] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, `0`);
20105	insn_buf[`5`] = BPF_MOV64_IMM(BPF_REG_0, `0`);
20106	cnt = `6`;
20107	} else {
20108	insn_buf[`0`] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
20109	cnt = `1`;
20110	}
20111
20112	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
20113	if (!new_prog)
20114	return -ENOMEM;
20115
20116	delta += cnt - `1`;
20117	env->prog = prog = new_prog;
20118	insn = new_prog->insnsi + i + delta;
20119	goto next_insn;
20120	}
20121
20122	/ Implement get_func_arg_cnt inline. /
20123	if (prog_type == BPF_PROG_TYPE_TRACING &&
20124	insn->imm == BPF_FUNC_get_func_arg_cnt) {
20125	/ Load nr_args from ctx - 8 /
20126	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`8`);
20127
20128	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: `1`);
20129	if (!new_prog)
20130	return -ENOMEM;
20131
20132	env->prog = prog = new_prog;
20133	insn = new_prog->insnsi + i + delta;
20134	goto next_insn;
20135	}
20136
20137	/ Implement bpf_get_func_ip inline. /
20138	if (prog_type == BPF_PROG_TYPE_TRACING &&
20139	insn->imm == BPF_FUNC_get_func_ip) {
20140	/ Load IP address from ctx - 16 /
20141	insn_buf[`0`] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -`16`);
20142
20143	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: `1`);
20144	if (!new_prog)
20145	return -ENOMEM;
20146
20147	env->prog = prog = new_prog;
20148	insn = new_prog->insnsi + i + delta;
20149	goto next_insn;
20150	}
20151
20152	/ Implement bpf_kptr_xchg inline /
20153	if (prog->jit_requested && BITS_PER_LONG == `64` &&
20154	insn->imm == BPF_FUNC_kptr_xchg &&
20155	bpf_jit_supports_ptr_xchg()) {
20156	insn_buf[`0`] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
20157	insn_buf[`1`] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, `0`);
20158	cnt = `2`;
20159
20160	new_prog = bpf_patch_insn_data(env, off: i + delta, patch: insn_buf, len: cnt);
20161	if (!new_prog)
20162	return -ENOMEM;
20163
20164	delta += cnt - `1`;
20165	env->prog = prog = new_prog;
20166	insn = new_prog->insnsi + i + delta;
20167	goto next_insn;
20168	}
20169	patch_call_imm:
20170	fn = env->ops->get_func_proto(insn->imm, env->prog);
20171	/ all functions that have prototype and verifier allowed*
20172	* programs to call them, must be real in-kernel functions
20173	*/
20174	if (!fn->func) {
20175	verbose(private_data: env,
20176	fmt: "kernel subsystem misconfigured func %s#%d\n",
20177	func_id_name(id: insn->imm), insn->imm);
20178	return -EFAULT;
20179	}
20180	insn->imm = fn->func - __bpf_call_base;
20181	next_insn:
20182	if (subprogs[cur_subprog + `1`].start == i + delta + `1`) {
20183	subprogs[cur_subprog].stack_depth += stack_depth_extra;
20184	subprogs[cur_subprog].stack_extra = stack_depth_extra;
20185	cur_subprog++;
20186	stack_depth = subprogs[cur_subprog].stack_depth;
20187	stack_depth_extra = `0`;
20188	}
20189	i++;
20190	insn++;
20191	}
20192
20193	env->prog->aux->stack_depth = subprogs[`0`].stack_depth;
20194	for (i = `0`; i < env->subprog_cnt; i++) {
20195	int subprog_start = subprogs[i].start;
20196	int stack_slots = subprogs[i].stack_extra / `8`;
20197
20198	if (!stack_slots)
20199	continue;
20200	if (stack_slots > `1`) {
20201	verbose(private_data: env, fmt: "verifier bug: stack_slots supports may_goto only\n");
20202	return -EFAULT;
20203	}
20204
20205	/ Add ST insn to subprog prologue to init extra stack /
20206	insn_buf[`0`] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
20207	-subprogs[i].stack_depth, BPF_MAX_LOOPS);
20208	/ Copy first actual insn to preserve it /
20209	insn_buf[`1`] = env->prog->insnsi[subprog_start];
20210
20211	new_prog = bpf_patch_insn_data(env, off: subprog_start, patch: insn_buf, len: `2`);
20212	if (!new_prog)
20213	return -ENOMEM;
20214	env->prog = prog = new_prog;
20215	}
20216
20217	/ Since poke tab is now finalized, publish aux to tracker. /
20218	for (i = `0`; i < prog->aux->size_poke_tab; i++) {
20219	map_ptr = prog->aux->poke_tab[i].tail_call.map;
20220	if (!map_ptr->ops->map_poke_track \|\|
20221	!map_ptr->ops->map_poke_untrack \|\|
20222	!map_ptr->ops->map_poke_run) {
20223	verbose(private_data: env, fmt: "bpf verifier is misconfigured\n");
20224	return -EINVAL;
20225	}
20226
20227	ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
20228	if (ret < `0`) {
20229	verbose(private_data: env, fmt: "tracking tail call prog failed\n");
20230	return ret;
20231	}
20232	}
20233
20234	sort_kfunc_descs_by_imm_off(prog: env->prog);
20235
20236	return `0`;
20237	}
20238
20239	static struct bpf_prog inline_bpf_loop(struct* bpf_verifier_env *env,
20240	int position,
20241	s32 stack_base,
20242	u32 callback_subprogno,
20243	u32 *cnt)
20244	{
20245	s32 r6_offset = stack_base + `0` * BPF_REG_SIZE;
20246	s32 r7_offset = stack_base + `1` * BPF_REG_SIZE;
20247	s32 r8_offset = stack_base + `2` * BPF_REG_SIZE;
20248	int reg_loop_max = BPF_REG_6;
20249	int reg_loop_cnt = BPF_REG_7;
20250	int reg_loop_ctx = BPF_REG_8;
20251
20252	struct bpf_prog *new_prog;
20253	u32 callback_start;
20254	u32 call_insn_offset;
20255	s32 callback_offset;
20256
20257	/ This represents an inlined version of bpf_iter.c:bpf_loop,*
20258	* be careful to modify this code in sync.
20259	*/
20260	struct bpf_insn insn_buf[] = {
20261	/ Return error and jump to the end of the patch if*
20262	* expected number of iterations is too big.
20263	*/
20264	BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, `2`),
20265	BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
20266	BPF_JMP_IMM(BPF_JA, `0`, `0`, `16`),
20267	/ spill R6, R7, R8 to use these as loop vars /
20268	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
20269	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
20270	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
20271	/ initialize loop vars /
20272	BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
20273	BPF_MOV32_IMM(reg_loop_cnt, `0`),
20274	BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
20275	/ loop header,*
20276	* if reg_loop_cnt >= reg_loop_max skip the loop body
20277	*/
20278	BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, `5`),
20279	/ callback call,*
20280	* correct callback offset would be set after patching
20281	*/
20282	BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
20283	BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
20284	BPF_CALL_REL(`0`),
20285	/ increment loop counter /
20286	BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, `1`),
20287	/ jump to loop header if callback returned 0 /
20288	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, `0`, -`6`),
20289	/ return value of bpf_loop,*
20290	* set R0 to the number of iterations
20291	*/
20292	BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
20293	/ restore original values of R6, R7, R8 /
20294	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
20295	BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
20296	BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
20297	};
20298
20299	*cnt = ARRAY_SIZE(insn_buf);
20300	new_prog = bpf_patch_insn_data(env, off: position, patch: insn_buf, len: *cnt);
20301	if (!new_prog)
20302	return new_prog;
20303
20304	/ callback start is known only after patching /
20305	callback_start = env->subprog_info[callback_subprogno].start;
20306	/ Note: insn_buf[12] is an offset of BPF_CALL_REL instruction /
20307	call_insn_offset = position + `12`;
20308	callback_offset = callback_start - call_insn_offset - `1`;
20309	new_prog->insnsi[call_insn_offset].imm = callback_offset;
20310
20311	return new_prog;
20312	}
20313
20314	static bool is_bpf_loop_call(struct bpf_insn *insn)
20315	{
20316	return insn->code == (BPF_JMP \| BPF_CALL) &&
20317	insn->src_reg == `0` &&
20318	insn->imm == BPF_FUNC_loop;
20319	}
20320
20321	/ For all sub-programs in the program (including main) check*
20322	* insn_aux_data to see if there are bpf_loop calls that require
20323	* inlining. If such calls are found the calls are replaced with a
20324	* sequence of instructions produced by `inline_bpf_loop` function and
20325	* subprog stack_depth is increased by the size of 3 registers.
20326	* This stack space is used to spill values of the R6, R7, R8. These
20327	* registers are used to store the loop bound, counter and context
20328	* variables.
20329	*/
20330	static int optimize_bpf_loop(struct bpf_verifier_env *env)
20331	{
20332	struct bpf_subprog_info *subprogs = env->subprog_info;
20333	int i, cur_subprog = `0`, cnt, delta = `0`;
20334	struct bpf_insn *insn = env->prog->insnsi;
20335	int insn_cnt = env->prog->len;
20336	u16 stack_depth = subprogs[cur_subprog].stack_depth;
20337	u16 stack_depth_roundup = round_up(stack_depth, `8`) - stack_depth;
20338	u16 stack_depth_extra = `0`;
20339
20340	for (i = `0`; i < insn_cnt; i++, insn++) {
20341	struct bpf_loop_inline_state *inline_state =
20342	&env->insn_aux_data[i + delta].loop_inline_state;
20343
20344	if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
20345	struct bpf_prog *new_prog;
20346
20347	stack_depth_extra = BPF_REG_SIZE * `3` + stack_depth_roundup;
20348	new_prog = inline_bpf_loop(env,
20349	position: i + delta,
20350	stack_base: -(stack_depth + stack_depth_extra),
20351	callback_subprogno: inline_state->callback_subprogno,
20352	cnt: &cnt);
20353	if (!new_prog)
20354	return -ENOMEM;
20355
20356	delta += cnt - `1`;
20357	env->prog = new_prog;
20358	insn = new_prog->insnsi + i + delta;
20359	}
20360
20361	if (subprogs[cur_subprog + `1`].start == i + delta + `1`) {
20362	subprogs[cur_subprog].stack_depth += stack_depth_extra;
20363	cur_subprog++;
20364	stack_depth = subprogs[cur_subprog].stack_depth;
20365	stack_depth_roundup = round_up(stack_depth, `8`) - stack_depth;
20366	stack_depth_extra = `0`;
20367	}
20368	}
20369
20370	env->prog->aux->stack_depth = env->subprog_info[`0`].stack_depth;
20371
20372	return `0`;
20373	}
20374
20375	static void free_states(struct bpf_verifier_env *env)
20376	{
20377	struct bpf_verifier_state_list sl, sln;
20378	int i;
20379
20380	sl = env->free_list;
20381	while (sl) {
20382	sln = sl->next;
20383	free_verifier_state(state: &sl->state, free_self: false);
20384	kfree(objp: sl);
20385	sl = sln;
20386	}
20387	env->free_list = NULL;
20388
20389	if (!env->explored_states)
20390	return;
20391
20392	for (i = `0`; i < state_htab_size(env); i++) {
20393	sl = env->explored_states[i];
20394
20395	while (sl) {
20396	sln = sl->next;
20397	free_verifier_state(state: &sl->state, free_self: false);
20398	kfree(objp: sl);
20399	sl = sln;
20400	}
20401	env->explored_states[i] = NULL;
20402	}
20403	}
20404
20405	static int do_check_common(struct bpf_verifier_env env, int* subprog)
20406	{
20407	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
20408	struct bpf_subprog_info *sub = subprog_info(env, subprog);
20409	struct bpf_verifier_state *state;
20410	struct bpf_reg_state *regs;
20411	int ret, i;
20412
20413	env->prev_linfo = NULL;
20414	env->pass_cnt++;
20415
20416	state = kzalloc(size: sizeof(struct bpf_verifier_state), GFP_KERNEL);
20417	if (!state)
20418	return -ENOMEM;
20419	state->curframe = `0`;
20420	state->speculative = false;
20421	state->branches = `1`;
20422	state->frame[`0`] = kzalloc(size: sizeof(struct bpf_func_state), GFP_KERNEL);
20423	if (!state->frame[`0`]) {
20424	kfree(objp: state);
20425	return -ENOMEM;
20426	}
20427	env->cur_state = state;
20428	init_func_state(env, state: state->frame[`0`],
20429	BPF_MAIN_FUNC / callsite /,
20430	frameno: `0` / frameno /,
20431	subprogno: subprog);
20432	state->first_insn_idx = env->subprog_info[subprog].start;
20433	state->last_insn_idx = -`1`;
20434
20435	regs = state->frame[state->curframe]->regs;
20436	if (subprog \|\| env->prog->type == BPF_PROG_TYPE_EXT) {
20437	const char *sub_name = subprog_name(env, subprog);
20438	struct bpf_subprog_arg_info *arg;
20439	struct bpf_reg_state *reg;
20440
20441	verbose(private_data: env, fmt: "Validating %s() func#%d...\n", sub_name, subprog);
20442	ret = btf_prepare_func_args(env, subprog);
20443	if (ret)
20444	goto out;
20445
20446	if (subprog_is_exc_cb(env, subprog)) {
20447	state->frame[`0`]->in_exception_callback_fn = true;
20448	/ We have already ensured that the callback returns an integer, just*
20449	* like all global subprogs. We need to determine it only has a single
20450	* scalar argument.
20451	*/
20452	if (sub->arg_cnt != `1` \|\| sub->args[`0`].arg_type != ARG_ANYTHING) {
20453	verbose(private_data: env, fmt: "exception cb only supports single integer argument\n");
20454	ret = -EINVAL;
20455	goto out;
20456	}
20457	}
20458	for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
20459	arg = &sub->args[i - BPF_REG_1];
20460	reg = &regs[i];
20461
20462	if (arg->arg_type == ARG_PTR_TO_CTX) {
20463	reg->type = PTR_TO_CTX;
20464	mark_reg_known_zero(env, regs, regno: i);
20465	} else if (arg->arg_type == ARG_ANYTHING) {
20466	reg->type = SCALAR_VALUE;
20467	mark_reg_unknown(env, regs, regno: i);
20468	} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR \| MEM_RDONLY)) {
20469	/ assume unspecial LOCAL dynptr type /
20470	__mark_dynptr_reg(reg, type: BPF_DYNPTR_TYPE_LOCAL, first_slot: true, dynptr_id: ++env->id_gen);
20471	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_MEM) {
20472	reg->type = PTR_TO_MEM;
20473	if (arg->arg_type & PTR_MAYBE_NULL)
20474	reg->type \|= PTR_MAYBE_NULL;
20475	mark_reg_known_zero(env, regs, regno: i);
20476	reg->mem_size = arg->mem_size;
20477	reg->id = ++env->id_gen;
20478	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_BTF_ID) {
20479	reg->type = PTR_TO_BTF_ID;
20480	if (arg->arg_type & PTR_MAYBE_NULL)
20481	reg->type \|= PTR_MAYBE_NULL;
20482	if (arg->arg_type & PTR_UNTRUSTED)
20483	reg->type \|= PTR_UNTRUSTED;
20484	if (arg->arg_type & PTR_TRUSTED)
20485	reg->type \|= PTR_TRUSTED;
20486	mark_reg_known_zero(env, regs, regno: i);
20487	reg->btf = bpf_get_btf_vmlinux(); / can't fail at this point /
20488	reg->btf_id = arg->btf_id;
20489	reg->id = ++env->id_gen;
20490	} else if (base_type(type: arg->arg_type) == ARG_PTR_TO_ARENA) {
20491	/ caller can pass either PTR_TO_ARENA or SCALAR /
20492	mark_reg_unknown(env, regs, regno: i);
20493	} else {
20494	WARN_ONCE(`1`, "BUG: unhandled arg#%d type %d\n",
20495	i - BPF_REG_1, arg->arg_type);
20496	ret = -EFAULT;
20497	goto out;
20498	}
20499	}
20500	} else {
20501	/ if main BPF program has associated BTF info, validate that*
20502	* it's matching expected signature, and otherwise mark BTF
20503	* info for main program as unreliable
20504	*/
20505	if (env->prog->aux->func_info_aux) {
20506	ret = btf_prepare_func_args(env, subprog: `0`);
20507	if (ret \|\| sub->arg_cnt != `1` \|\| sub->args[`0`].arg_type != ARG_PTR_TO_CTX)
20508	env->prog->aux->func_info_aux[`0`].unreliable = true;
20509	}
20510
20511	/ 1st arg to a function /
20512	regs[BPF_REG_1].type = PTR_TO_CTX;
20513	mark_reg_known_zero(env, regs, regno: BPF_REG_1);
20514	}
20515
20516	ret = do_check(env);
20517	out:
20518	/ check for NULL is necessary, since cur_state can be freed inside*
20519	* do_check() under memory pressure.
20520	*/
20521	if (env->cur_state) {
20522	free_verifier_state(state: env->cur_state, free_self: true);
20523	env->cur_state = NULL;
20524	}
20525	while (!pop_stack(env, NULL, NULL, pop_log: false));
20526	if (!ret && pop_log)
20527	bpf_vlog_reset(log: &env->log, new_pos: `0`);
20528	free_states(env);
20529	return ret;
20530	}
20531
20532	/ Lazily verify all global functions based on their BTF, if they are called*
20533	* from main BPF program or any of subprograms transitively.
20534	* BPF global subprogs called from dead code are not validated.
20535	* All callable global functions must pass verification.
20536	* Otherwise the whole program is rejected.
20537	* Consider:
20538	* int bar(int);
20539	* int foo(int f)
20540	* {
20541	* return bar(f);
20542	* }
20543	* int bar(int b)
20544	* {
20545	* ...
20546	* }
20547	* foo() will be verified first for R1=any_scalar_value. During verification it
20548	* will be assumed that bar() already verified successfully and call to bar()
20549	* from foo() will be checked for type match only. Later bar() will be verified
20550	* independently to check that it's safe for R1=any_scalar_value.
20551	*/
20552	static int do_check_subprogs(struct bpf_verifier_env *env)
20553	{
20554	struct bpf_prog_aux *aux = env->prog->aux;
20555	struct bpf_func_info_aux *sub_aux;
20556	int i, ret, new_cnt;
20557
20558	if (!aux->func_info)
20559	return `0`;
20560
20561	/ exception callback is presumed to be always called /
20562	if (env->exception_callback_subprog)
20563	subprog_aux(env, subprog: env->exception_callback_subprog)->called = true;
20564
20565	again:
20566	new_cnt = `0`;
20567	for (i = `1`; i < env->subprog_cnt; i++) {
20568	if (!subprog_is_global(env, subprog: i))
20569	continue;
20570
20571	sub_aux = subprog_aux(env, subprog: i);
20572	if (!sub_aux->called \|\| sub_aux->verified)
20573	continue;
20574
20575	env->insn_idx = env->subprog_info[i].start;
20576	WARN_ON_ONCE(env->insn_idx == `0`);
20577	ret = do_check_common(env, subprog: i);
20578	if (ret) {
20579	return ret;
20580	} else if (env->log.level & BPF_LOG_LEVEL) {
20581	verbose(private_data: env, fmt: "Func#%d ('%s') is safe for any args that match its prototype\n",
20582	i, subprog_name(env, subprog: i));
20583	}
20584
20585	/ We verified new global subprog, it might have called some*
20586	* more global subprogs that we haven't verified yet, so we
20587	* need to do another pass over subprogs to verify those.
20588	*/
20589	sub_aux->verified = true;
20590	new_cnt++;
20591	}
20592
20593	/ We can't loop forever as we verify at least one global subprog on*
20594	* each pass.
20595	*/
20596	if (new_cnt)
20597	goto again;
20598
20599	return `0`;
20600	}
20601
20602	static int do_check_main(struct bpf_verifier_env *env)
20603	{
20604	int ret;
20605
20606	env->insn_idx = `0`;
20607	ret = do_check_common(env, subprog: `0`);
20608	if (!ret)
20609	env->prog->aux->stack_depth = env->subprog_info[`0`].stack_depth;
20610	return ret;
20611	}
20612
20613
20614	static void print_verification_stats(struct bpf_verifier_env *env)
20615	{
20616	int i;
20617
20618	if (env->log.level & BPF_LOG_STATS) {
20619	verbose(private_data: env, fmt: "verification time %lld usec\n",
20620	div_u64(dividend: env->verification_time, divisor: `1000`));
20621	verbose(private_data: env, fmt: "stack depth ");
20622	for (i = `0`; i < env->subprog_cnt; i++) {
20623	u32 depth = env->subprog_info[i].stack_depth;
20624
20625	verbose(private_data: env, fmt: "%d", depth);
20626	if (i + `1` < env->subprog_cnt)
20627	verbose(private_data: env, fmt: "+");
20628	}
20629	verbose(private_data: env, fmt: "\n");
20630	}
20631	verbose(private_data: env, fmt: "processed %d insns (limit %d) max_states_per_insn %d "
20632	"total_states %d peak_states %d mark_read %d\n",
20633	env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
20634	env->max_states_per_insn, env->total_states,
20635	env->peak_states, env->longest_mark_read_walk);
20636	}
20637
20638	static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
20639	{
20640	const struct btf_type t, func_proto;
20641	const struct bpf_struct_ops_desc *st_ops_desc;
20642	const struct bpf_struct_ops *st_ops;
20643	const struct btf_member *member;
20644	struct bpf_prog *prog = env->prog;
20645	u32 btf_id, member_idx;
20646	struct btf *btf;
20647	const char *mname;
20648
20649	if (!prog->gpl_compatible) {
20650	verbose(private_data: env, fmt: "struct ops programs must have a GPL compatible license\n");
20651	return -EINVAL;
20652	}
20653
20654	if (!prog->aux->attach_btf_id)
20655	return -ENOTSUPP;
20656
20657	btf = prog->aux->attach_btf;
20658	if (btf_is_module(btf)) {
20659	/ Make sure st_ops is valid through the lifetime of env /
20660	env->attach_btf_mod = btf_try_get_module(btf);
20661	if (!env->attach_btf_mod) {
20662	verbose(private_data: env, fmt: "struct_ops module %s is not found\n",
20663	btf_get_name(btf));
20664	return -ENOTSUPP;
20665	}
20666	}
20667
20668	btf_id = prog->aux->attach_btf_id;
20669	st_ops_desc = bpf_struct_ops_find(btf, type_id: btf_id);
20670	if (!st_ops_desc) {
20671	verbose(private_data: env, fmt: "attach_btf_id %u is not a supported struct\n",
20672	btf_id);
20673	return -ENOTSUPP;
20674	}
20675	st_ops = st_ops_desc->st_ops;
20676
20677	t = st_ops_desc->type;
20678	member_idx = prog->expected_attach_type;
20679	if (member_idx >= btf_type_vlen(t)) {
20680	verbose(private_data: env, fmt: "attach to invalid member idx %u of struct %s\n",
20681	member_idx, st_ops->name);
20682	return -EINVAL;
20683	}
20684
20685	member = &btf_type_member(t)[member_idx];
20686	mname = btf_name_by_offset(btf, offset: member->name_off);
20687	func_proto = btf_type_resolve_func_ptr(btf, id: member->type,
20688	NULL);
20689	if (!func_proto) {
20690	verbose(private_data: env, fmt: "attach to invalid member %s(@idx %u) of struct %s\n",
20691	mname, member_idx, st_ops->name);
20692	return -EINVAL;
20693	}
20694
20695	if (st_ops->check_member) {
20696	int err = st_ops->check_member(t, member, prog);
20697
20698	if (err) {
20699	verbose(private_data: env, fmt: "attach to unsupported member %s of struct %s\n",
20700	mname, st_ops->name);
20701	return err;
20702	}
20703	}
20704
20705	/ btf_ctx_access() used this to provide argument type info /
20706	prog->aux->ctx_arg_info =
20707	st_ops_desc->arg_info[member_idx].info;
20708	prog->aux->ctx_arg_info_size =
20709	st_ops_desc->arg_info[member_idx].cnt;
20710
20711	prog->aux->attach_func_proto = func_proto;
20712	prog->aux->attach_func_name = mname;
20713	env->ops = st_ops->verifier_ops;
20714
20715	return `0`;
20716	}
20717	#define SECURITY_PREFIX "security_"
20718
20719	static int check_attach_modify_return(unsigned long addr, const char *func_name)
20720	{
20721	if (within_error_injection_list(addr) \|\|
20722	!strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - `1`))
20723	return `0`;
20724
20725	return -EINVAL;
20726	}
20727
20728	/ list of non-sleepable functions that are otherwise on*
20729	* ALLOW_ERROR_INJECTION list
20730	*/
20731	BTF_SET_START(btf_non_sleepable_error_inject)
20732	/ Three functions below can be called from sleepable and non-sleepable context.*
20733	* Assume non-sleepable from bpf safety point of view.
20734	*/
20735	BTF_ID(func, __filemap_add_folio)
20736	BTF_ID(func, should_fail_alloc_page)
20737	BTF_ID(func, should_failslab)
20738	BTF_SET_END(btf_non_sleepable_error_inject)
20739
20740	static int check_non_sleepable_error_inject(u32 btf_id)
20741	{
20742	return btf_id_set_contains(set: &btf_non_sleepable_error_inject, id: btf_id);
20743	}
20744
20745	int bpf_check_attach_target(struct bpf_verifier_log *log,
20746	const struct bpf_prog *prog,
20747	const struct bpf_prog *tgt_prog,
20748	u32 btf_id,
20749	struct bpf_attach_target_info *tgt_info)
20750	{
20751	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
20752	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
20753	const char prefix[] = "btf_trace_";
20754	int ret = `0`, subprog = -`1`, i;
20755	const struct btf_type *t;
20756	bool conservative = true;
20757	const char *tname;
20758	struct btf *btf;
20759	long addr = `0`;
20760	struct module *mod = NULL;
20761
20762	if (!btf_id) {
20763	bpf_log(log, fmt: "Tracing programs must provide btf_id\n");
20764	return -EINVAL;
20765	}
20766	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
20767	if (!btf) {
20768	bpf_log(log,
20769	fmt: "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
20770	return -EINVAL;
20771	}
20772	t = btf_type_by_id(btf, type_id: btf_id);
20773	if (!t) {
20774	bpf_log(log, fmt: "attach_btf_id %u is invalid\n", btf_id);
20775	return -EINVAL;
20776	}
20777	tname = btf_name_by_offset(btf, offset: t->name_off);
20778	if (!tname) {
20779	bpf_log(log, fmt: "attach_btf_id %u doesn't have a name\n", btf_id);
20780	return -EINVAL;
20781	}
20782	if (tgt_prog) {
20783	struct bpf_prog_aux *aux = tgt_prog->aux;
20784
20785	if (bpf_prog_is_dev_bound(aux: prog->aux) &&
20786	!bpf_prog_dev_bound_match(lhs: prog, rhs: tgt_prog)) {
20787	bpf_log(log, fmt: "Target program bound device mismatch");
20788	return -EINVAL;
20789	}
20790
20791	for (i = `0`; i < aux->func_info_cnt; i++)
20792	if (aux->func_info[i].type_id == btf_id) {
20793	subprog = i;
20794	break;
20795	}
20796	if (subprog == -`1`) {
20797	bpf_log(log, fmt: "Subprog %s doesn't exist\n", tname);
20798	return -EINVAL;
20799	}
20800	if (aux->func && aux->func[subprog]->aux->exception_cb) {
20801	bpf_log(log,
20802	fmt: "%s programs cannot attach to exception callback\n",
20803	prog_extension ? "Extension" : "FENTRY/FEXIT");
20804	return -EINVAL;
20805	}
20806	conservative = aux->func_info_aux[subprog].unreliable;
20807	if (prog_extension) {
20808	if (conservative) {
20809	bpf_log(log,
20810	fmt: "Cannot replace static functions\n");
20811	return -EINVAL;
20812	}
20813	if (!prog->jit_requested) {
20814	bpf_log(log,
20815	fmt: "Extension programs should be JITed\n");
20816	return -EINVAL;
20817	}
20818	}
20819	if (!tgt_prog->jited) {
20820	bpf_log(log, fmt: "Can attach to only JITed progs\n");
20821	return -EINVAL;
20822	}
20823	if (prog_tracing) {
20824	if (aux->attach_tracing_prog) {
20825	/*
20826	* Target program is an fentry/fexit which is already attached
20827	* to another tracing program. More levels of nesting
20828	* attachment are not allowed.
20829	*/
20830	bpf_log(log, fmt: "Cannot nest tracing program attach more than once\n");
20831	return -EINVAL;
20832	}
20833	} else if (tgt_prog->type == prog->type) {
20834	/*
20835	* To avoid potential call chain cycles, prevent attaching of a
20836	* program extension to another extension. It's ok to attach
20837	* fentry/fexit to extension program.
20838	*/
20839	bpf_log(log, fmt: "Cannot recursively attach\n");
20840	return -EINVAL;
20841	}
20842	if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
20843	prog_extension &&
20844	(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY \|\|
20845	tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
20846	/ Program extensions can extend all program types*
20847	* except fentry/fexit. The reason is the following.
20848	* The fentry/fexit programs are used for performance
20849	* analysis, stats and can be attached to any program
20850	* type. When extension program is replacing XDP function
20851	* it is necessary to allow performance analysis of all
20852	* functions. Both original XDP program and its program
20853	* extension. Hence attaching fentry/fexit to
20854	* BPF_PROG_TYPE_EXT is allowed. If extending of
20855	* fentry/fexit was allowed it would be possible to create
20856	* long call chain fentry->extension->fentry->extension
20857	* beyond reasonable stack size. Hence extending fentry
20858	* is not allowed.
20859	*/
20860	bpf_log(log, fmt: "Cannot extend fentry/fexit\n");
20861	return -EINVAL;
20862	}
20863	} else {
20864	if (prog_extension) {
20865	bpf_log(log, fmt: "Cannot replace kernel functions\n");
20866	return -EINVAL;
20867	}
20868	}
20869
20870	switch (prog->expected_attach_type) {
20871	case BPF_TRACE_RAW_TP:
20872	if (tgt_prog) {
20873	bpf_log(log,
20874	fmt: "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
20875	return -EINVAL;
20876	}
20877	if (!btf_type_is_typedef(t)) {
20878	bpf_log(log, fmt: "attach_btf_id %u is not a typedef\n",
20879	btf_id);
20880	return -EINVAL;
20881	}
20882	if (strncmp(prefix, tname, sizeof(prefix) - `1`)) {
20883	bpf_log(log, fmt: "attach_btf_id %u points to wrong type name %s\n",
20884	btf_id, tname);
20885	return -EINVAL;
20886	}
20887	tname += sizeof(prefix) - `1`;
20888	t = btf_type_by_id(btf, type_id: t->type);
20889	if (!btf_type_is_ptr(t))
20890	/ should never happen in valid vmlinux build /
20891	return -EINVAL;
20892	t = btf_type_by_id(btf, type_id: t->type);
20893	if (!btf_type_is_func_proto(t))
20894	/ should never happen in valid vmlinux build /
20895	return -EINVAL;
20896
20897	break;
20898	case BPF_TRACE_ITER:
20899	if (!btf_type_is_func(t)) {
20900	bpf_log(log, fmt: "attach_btf_id %u is not a function\n",
20901	btf_id);
20902	return -EINVAL;
20903	}
20904	t = btf_type_by_id(btf, type_id: t->type);
20905	if (!btf_type_is_func_proto(t))
20906	return -EINVAL;
20907	ret = btf_distill_func_proto(log, btf, func_proto: t, func_name: tname, m: &tgt_info->fmodel);
20908	if (ret)
20909	return ret;
20910	break;
20911	default:
20912	if (!prog_extension)
20913	return -EINVAL;
20914	fallthrough;
20915	case BPF_MODIFY_RETURN:
20916	case BPF_LSM_MAC:
20917	case BPF_LSM_CGROUP:
20918	case BPF_TRACE_FENTRY:
20919	case BPF_TRACE_FEXIT:
20920	if (!btf_type_is_func(t)) {
20921	bpf_log(log, fmt: "attach_btf_id %u is not a function\n",
20922	btf_id);
20923	return -EINVAL;
20924	}
20925	if (prog_extension &&
20926	btf_check_type_match(log, prog, btf, t))
20927	return -EINVAL;
20928	t = btf_type_by_id(btf, type_id: t->type);
20929	if (!btf_type_is_func_proto(t))
20930	return -EINVAL;
20931
20932	if ((prog->aux->saved_dst_prog_type \|\| prog->aux->saved_dst_attach_type) &&
20933	(!tgt_prog \|\| prog->aux->saved_dst_prog_type != tgt_prog->type \|\|
20934	prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
20935	return -EINVAL;
20936
20937	if (tgt_prog && conservative)
20938	t = NULL;
20939
20940	ret = btf_distill_func_proto(log, btf, func_proto: t, func_name: tname, m: &tgt_info->fmodel);
20941	if (ret < `0`)
20942	return ret;
20943
20944	if (tgt_prog) {
20945	if (subprog == `0`)
20946	addr = (long) tgt_prog->bpf_func;
20947	else
20948	addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
20949	} else {
20950	if (btf_is_module(btf)) {
20951	mod = btf_try_get_module(btf);
20952	if (mod)
20953	addr = find_kallsyms_symbol_value(mod, name: tname);
20954	else
20955	addr = `0`;
20956	} else {
20957	addr = kallsyms_lookup_name(name: tname);
20958	}
20959	if (!addr) {
20960	module_put(module: mod);
20961	bpf_log(log,
20962	fmt: "The address of function %s cannot be found\n",
20963	tname);
20964	return -ENOENT;
20965	}
20966	}
20967
20968	if (prog->sleepable) {
20969	ret = -EINVAL;
20970	switch (prog->type) {
20971	case BPF_PROG_TYPE_TRACING:
20972
20973	/ fentry/fexit/fmod_ret progs can be sleepable if they are*
20974	* attached to ALLOW_ERROR_INJECTION and are not in denylist.
20975	*/
20976	if (!check_non_sleepable_error_inject(btf_id) &&
20977	within_error_injection_list(addr))
20978	ret = `0`;
20979	/ fentry/fexit/fmod_ret progs can also be sleepable if they are*
20980	* in the fmodret id set with the KF_SLEEPABLE flag.
20981	*/
20982	else {
20983	u32 *flags = btf_kfunc_is_modify_return(btf, kfunc_btf_id: btf_id,
20984	prog);
20985
20986	if (flags && (*flags & KF_SLEEPABLE))
20987	ret = `0`;
20988	}
20989	break;
20990	case BPF_PROG_TYPE_LSM:
20991	/ LSM progs check that they are attached to bpf_lsm_() funcs.
20992	* Only some of them are sleepable.
20993	*/
20994	if (bpf_lsm_is_sleepable_hook(btf_id))
20995	ret = `0`;
20996	break;
20997	default:
20998	break;
20999	}
21000	if (ret) {
21001	module_put(module: mod);
21002	bpf_log(log, fmt: "%s is not sleepable\n", tname);
21003	return ret;
21004	}
21005	} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
21006	if (tgt_prog) {
21007	module_put(module: mod);
21008	bpf_log(log, fmt: "can't modify return codes of BPF programs\n");
21009	return -EINVAL;
21010	}
21011	ret = -EINVAL;
21012	if (btf_kfunc_is_modify_return(btf, kfunc_btf_id: btf_id, prog) \|\|
21013	!check_attach_modify_return(addr, func_name: tname))
21014	ret = `0`;
21015	if (ret) {
21016	module_put(module: mod);
21017	bpf_log(log, fmt: "%s() is not modifiable\n", tname);
21018	return ret;
21019	}
21020	}
21021
21022	break;
21023	}
21024	tgt_info->tgt_addr = addr;
21025	tgt_info->tgt_name = tname;
21026	tgt_info->tgt_type = t;
21027	tgt_info->tgt_mod = mod;
21028	return `0`;
21029	}
21030
21031	BTF_SET_START(btf_id_deny)
21032	BTF_ID_UNUSED
21033	#ifdef CONFIG_SMP
21034	BTF_ID(func, migrate_disable)
21035	BTF_ID(func, migrate_enable)
21036	#endif
21037	#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
21038	BTF_ID(func, rcu_read_unlock_strict)
21039	#endif
21040	#if defined(CONFIG_DEBUG_PREEMPT) \|\| defined(CONFIG_TRACE_PREEMPT_TOGGLE)
21041	BTF_ID(func, preempt_count_add)
21042	BTF_ID(func, preempt_count_sub)
21043	#endif
21044	#ifdef CONFIG_PREEMPT_RCU
21045	BTF_ID(func, __rcu_read_lock)
21046	BTF_ID(func, __rcu_read_unlock)
21047	#endif
21048	BTF_SET_END(btf_id_deny)
21049
21050	static bool can_be_sleepable(struct bpf_prog *prog)
21051	{
21052	if (prog->type == BPF_PROG_TYPE_TRACING) {
21053	switch (prog->expected_attach_type) {
21054	case BPF_TRACE_FENTRY:
21055	case BPF_TRACE_FEXIT:
21056	case BPF_MODIFY_RETURN:
21057	case BPF_TRACE_ITER:
21058	return true;
21059	default:
21060	return false;
21061	}
21062	}
21063	return prog->type == BPF_PROG_TYPE_LSM \|\|
21064	prog->type == BPF_PROG_TYPE_KPROBE / only for uprobes / \|\|
21065	prog->type == BPF_PROG_TYPE_STRUCT_OPS;
21066	}
21067
21068	static int check_attach_btf_id(struct bpf_verifier_env *env)
21069	{
21070	struct bpf_prog *prog = env->prog;
21071	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
21072	struct bpf_attach_target_info tgt_info = {};
21073	u32 btf_id = prog->aux->attach_btf_id;
21074	struct bpf_trampoline *tr;
21075	int ret;
21076	u64 key;
21077
21078	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
21079	if (prog->sleepable)
21080	/ attach_btf_id checked to be zero already /
21081	return `0`;
21082	verbose(private_data: env, fmt: "Syscall programs can only be sleepable\n");
21083	return -EINVAL;
21084	}
21085
21086	if (prog->sleepable && !can_be_sleepable(prog)) {
21087	verbose(private_data: env, fmt: "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
21088	return -EINVAL;
21089	}
21090
21091	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
21092	return check_struct_ops_btf_id(env);
21093
21094	if (prog->type != BPF_PROG_TYPE_TRACING &&
21095	prog->type != BPF_PROG_TYPE_LSM &&
21096	prog->type != BPF_PROG_TYPE_EXT)
21097	return `0`;
21098
21099	ret = bpf_check_attach_target(log: &env->log, prog, tgt_prog, btf_id, tgt_info: &tgt_info);
21100	if (ret)
21101	return ret;
21102
21103	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
21104	/ to make freplace equivalent to their targets, they need to*
21105	* inherit env->ops and expected_attach_type for the rest of the
21106	* verification
21107	*/
21108	env->ops = bpf_verifier_ops[tgt_prog->type];
21109	prog->expected_attach_type = tgt_prog->expected_attach_type;
21110	}
21111
21112	/ store info about the attachment target that will be used later /
21113	prog->aux->attach_func_proto = tgt_info.tgt_type;
21114	prog->aux->attach_func_name = tgt_info.tgt_name;
21115	prog->aux->mod = tgt_info.tgt_mod;
21116
21117	if (tgt_prog) {
21118	prog->aux->saved_dst_prog_type = tgt_prog->type;
21119	prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
21120	}
21121
21122	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
21123	prog->aux->attach_btf_trace = true;
21124	return `0`;
21125	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
21126	if (!bpf_iter_prog_supported(prog))
21127	return -EINVAL;
21128	return `0`;
21129	}
21130
21131	if (prog->type == BPF_PROG_TYPE_LSM) {
21132	ret = bpf_lsm_verify_prog(vlog: &env->log, prog);
21133	if (ret < `0`)
21134	return ret;
21135	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
21136	btf_id_set_contains(set: &btf_id_deny, id: btf_id)) {
21137	return -EINVAL;
21138	}
21139
21140	key = bpf_trampoline_compute_key(tgt_prog, btf: prog->aux->attach_btf, btf_id);
21141	tr = bpf_trampoline_get(key, tgt_info: &tgt_info);
21142	if (!tr)
21143	return -ENOMEM;
21144
21145	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
21146	tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
21147
21148	prog->aux->dst_trampoline = tr;
21149	return `0`;
21150	}
21151
21152	struct btf bpf_get_btf_vmlinux(void*)
21153	{
21154	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
21155	mutex_lock(&bpf_verifier_lock);
21156	if (!btf_vmlinux)
21157	btf_vmlinux = btf_parse_vmlinux();
21158	mutex_unlock(lock: &bpf_verifier_lock);
21159	}
21160	return btf_vmlinux;
21161	}
21162
21163	int bpf_check(struct bpf_prog prog, union** bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
21164	{
21165	u64 start_time = ktime_get_ns();
21166	struct bpf_verifier_env *env;
21167	int i, len, ret = -EINVAL, err;
21168	u32 log_true_size;
21169	bool is_priv;
21170
21171	/ no program is valid /
21172	if (ARRAY_SIZE(bpf_verifier_ops) == `0`)
21173	return -EINVAL;
21174
21175	/ 'struct bpf_verifier_env' can be global, but since it's not small,*
21176	* allocate/free it every time bpf_check() is called
21177	*/
21178	env = kzalloc(size: sizeof(struct bpf_verifier_env), GFP_KERNEL);
21179	if (!env)
21180	return -ENOMEM;
21181
21182	env->bt.env = env;
21183
21184	len = (*prog)->len;
21185	env->insn_aux_data =
21186	vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
21187	ret = -ENOMEM;
21188	if (!env->insn_aux_data)
21189	goto err_free_env;
21190	for (i = `0`; i < len; i++)
21191	env->insn_aux_data[i].orig_idx = i;
21192	env->prog = *prog;
21193	env->ops = bpf_verifier_ops[env->prog->type];
21194	env->fd_array = make_bpfptr(addr: attr->fd_array, is_kernel: uattr.is_kernel);
21195
21196	env->allow_ptr_leaks = bpf_allow_ptr_leaks(token: env->prog->aux->token);
21197	env->allow_uninit_stack = bpf_allow_uninit_stack(token: env->prog->aux->token);
21198	env->bypass_spec_v1 = bpf_bypass_spec_v1(token: env->prog->aux->token);
21199	env->bypass_spec_v4 = bpf_bypass_spec_v4(token: env->prog->aux->token);
21200	env->bpf_capable = is_priv = bpf_token_capable(token: env->prog->aux->token, CAP_BPF);
21201
21202	bpf_get_btf_vmlinux();
21203
21204	/ grab the mutex to protect few globals used by verifier /
21205	if (!is_priv)
21206	mutex_lock(&bpf_verifier_lock);
21207
21208	/ user could have requested verbose verifier output*
21209	* and supplied buffer to store the verification trace
21210	*/
21211	ret = bpf_vlog_init(log: &env->log, log_level: attr->log_level,
21212	log_buf: (char __user ) (unsigned* long) attr->log_buf,
21213	log_size: attr->log_size);
21214	if (ret)
21215	goto err_unlock;
21216
21217	mark_verifier_state_clean(env);
21218
21219	if (IS_ERR(ptr: btf_vmlinux)) {
21220	/ Either gcc or pahole or kernel are broken. /
21221	verbose(private_data: env, fmt: "in-kernel BTF is malformed\n");
21222	ret = PTR_ERR(ptr: btf_vmlinux);
21223	goto skip_full_check;
21224	}
21225
21226	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
21227	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
21228	env->strict_alignment = true;
21229	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
21230	env->strict_alignment = false;
21231
21232	if (is_priv)
21233	env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
21234	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
21235
21236	env->explored_states = kvcalloc(n: state_htab_size(env),
21237	size: sizeof(struct bpf_verifier_state_list *),
21238	GFP_USER);
21239	ret = -ENOMEM;
21240	if (!env->explored_states)
21241	goto skip_full_check;
21242
21243	ret = check_btf_info_early(env, attr, uattr);
21244	if (ret < `0`)
21245	goto skip_full_check;
21246
21247	ret = add_subprog_and_kfunc(env);
21248	if (ret < `0`)
21249	goto skip_full_check;
21250
21251	ret = check_subprogs(env);
21252	if (ret < `0`)
21253	goto skip_full_check;
21254
21255	ret = check_btf_info(env, attr, uattr);
21256	if (ret < `0`)
21257	goto skip_full_check;
21258
21259	ret = check_attach_btf_id(env);
21260	if (ret)
21261	goto skip_full_check;
21262
21263	ret = resolve_pseudo_ldimm64(env);
21264	if (ret < `0`)
21265	goto skip_full_check;
21266
21267	if (bpf_prog_is_offloaded(aux: env->prog->aux)) {
21268	ret = bpf_prog_offload_verifier_prep(prog: env->prog);
21269	if (ret)
21270	goto skip_full_check;
21271	}
21272
21273	ret = check_cfg(env);
21274	if (ret < `0`)
21275	goto skip_full_check;
21276
21277	ret = do_check_main(env);
21278	ret = ret ?: do_check_subprogs(env);
21279
21280	if (ret == `0` && bpf_prog_is_offloaded(aux: env->prog->aux))
21281	ret = bpf_prog_offload_finalize(env);
21282
21283	skip_full_check:
21284	kvfree(addr: env->explored_states);
21285
21286	if (ret == `0`)
21287	ret = check_max_stack_depth(env);
21288
21289	/ instruction rewrites happen after this point /
21290	if (ret == `0`)
21291	ret = optimize_bpf_loop(env);
21292
21293	if (is_priv) {
21294	if (ret == `0`)
21295	opt_hard_wire_dead_code_branches(env);
21296	if (ret == `0`)
21297	ret = opt_remove_dead_code(env);
21298	if (ret == `0`)
21299	ret = opt_remove_nops(env);
21300	} else {
21301	if (ret == `0`)
21302	sanitize_dead_code(env);
21303	}
21304
21305	if (ret == `0`)
21306	/ program is valid, convert (u32)(ctx + off) accesses /
21307	ret = convert_ctx_accesses(env);
21308
21309	if (ret == `0`)
21310	ret = do_misc_fixups(env);
21311
21312	/ do 32-bit optimization after insn patching has done so those patched*
21313	* insns could be handled correctly.
21314	*/
21315	if (ret == `0` && !bpf_prog_is_offloaded(aux: env->prog->aux)) {
21316	ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
21317	env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
21318	: false;
21319	}
21320
21321	if (ret == `0`)
21322	ret = fixup_call_args(env);
21323
21324	env->verification_time = ktime_get_ns() - start_time;
21325	print_verification_stats(env);
21326	env->prog->aux->verified_insns = env->insn_processed;
21327
21328	/ preserve original error even if log finalization is successful /
21329	err = bpf_vlog_finalize(log: &env->log, log_size_actual: &log_true_size);
21330	if (err)
21331	ret = err;
21332
21333	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
21334	copy_to_bpfptr_offset(dst: uattr, offsetof(union bpf_attr, log_true_size),
21335	src: &log_true_size, size: sizeof(log_true_size))) {
21336	ret = -EFAULT;
21337	goto err_release_maps;
21338	}
21339
21340	if (ret)
21341	goto err_release_maps;
21342
21343	if (env->used_map_cnt) {
21344	/ if program passed verifier, update used_maps in bpf_prog_info /
21345	env->prog->aux->used_maps = kmalloc_array(n: env->used_map_cnt,
21346	size: sizeof(env->used_maps[`0`]),
21347	GFP_KERNEL);
21348
21349	if (!env->prog->aux->used_maps) {
21350	ret = -ENOMEM;
21351	goto err_release_maps;
21352	}
21353
21354	memcpy(env->prog->aux->used_maps, env->used_maps,
21355	sizeof(env->used_maps[`0`]) * env->used_map_cnt);
21356	env->prog->aux->used_map_cnt = env->used_map_cnt;
21357	}
21358	if (env->used_btf_cnt) {
21359	/ if program passed verifier, update used_btfs in bpf_prog_aux /
21360	env->prog->aux->used_btfs = kmalloc_array(n: env->used_btf_cnt,
21361	size: sizeof(env->used_btfs[`0`]),
21362	GFP_KERNEL);
21363	if (!env->prog->aux->used_btfs) {
21364	ret = -ENOMEM;
21365	goto err_release_maps;
21366	}
21367
21368	memcpy(env->prog->aux->used_btfs, env->used_btfs,
21369	sizeof(env->used_btfs[`0`]) * env->used_btf_cnt);
21370	env->prog->aux->used_btf_cnt = env->used_btf_cnt;
21371	}
21372	if (env->used_map_cnt \|\| env->used_btf_cnt) {
21373	/ program is valid. Convert pseudo bpf_ld_imm64 into generic*
21374	* bpf_ld_imm64 instructions
21375	*/
21376	convert_pseudo_ld_imm64(env);
21377	}
21378
21379	adjust_btf_func(env);
21380
21381	err_release_maps:
21382	if (!env->prog->aux->used_maps)
21383	/ if we didn't copy map pointers into bpf_prog_info, release*
21384	* them now. Otherwise free_used_maps() will release them.
21385	*/
21386	release_maps(env);
21387	if (!env->prog->aux->used_btfs)
21388	release_btfs(env);
21389
21390	/ extension progs temporarily inherit the attach_type of their targets*
21391	for verification purposes, so set it back to zero before returning
21392	*/
21393	if (env->prog->type == BPF_PROG_TYPE_EXT)
21394	env->prog->expected_attach_type = `0`;
21395
21396	*prog = env->prog;
21397
21398	module_put(module: env->attach_btf_mod);
21399	err_unlock:
21400	if (!is_priv)
21401	mutex_unlock(lock: &bpf_verifier_lock);
21402	vfree(addr: env->insn_aux_data);
21403	err_free_env:
21404	kfree(objp: env);
21405	return ret;
21406	}
21407

source code of linux/kernel/bpf/verifier.c