1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Just-In-Time compiler for eBPF filters on 32bit ARM
4 *
5 * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
6 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
7 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
8 */
9
10#include <linux/bpf.h>
11#include <linux/bitops.h>
12#include <linux/compiler.h>
13#include <linux/errno.h>
14#include <linux/filter.h>
15#include <linux/netdevice.h>
16#include <linux/string.h>
17#include <linux/slab.h>
18#include <linux/if_vlan.h>
19#include <linux/math64.h>
20
21#include <asm/cacheflush.h>
22#include <asm/hwcap.h>
23#include <asm/opcodes.h>
24#include <asm/system_info.h>
25
26#include "bpf_jit_32.h"
27
28/*
29 * eBPF prog stack layout:
30 *
31 * high
32 * original ARM_SP => +-----+
33 * | | callee saved registers
34 * +-----+ <= (BPF_FP + SCRATCH_SIZE)
35 * | ... | eBPF JIT scratch space
36 * eBPF fp register => +-----+
37 * (BPF_FP) | ... | eBPF prog stack
38 * +-----+
39 * |RSVD | JIT scratchpad
40 * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
41 * | ... | caller-saved registers
42 * +-----+
43 * | ... | arguments passed on stack
44 * ARM_SP during call => +-----|
45 * | |
46 * | ... | Function call stack
47 * | |
48 * +-----+
49 * low
50 *
51 * The callee saved registers depends on whether frame pointers are enabled.
52 * With frame pointers (to be compliant with the ABI):
53 *
54 * high
55 * original ARM_SP => +--------------+ \
56 * | pc | |
57 * current ARM_FP => +--------------+ } callee saved registers
58 * |r4-r9,fp,ip,lr| |
59 * +--------------+ /
60 * low
61 *
62 * Without frame pointers:
63 *
64 * high
65 * original ARM_SP => +--------------+
66 * | r4-r9,fp,lr | callee saved registers
67 * current ARM_FP => +--------------+
68 * low
69 *
70 * When popping registers off the stack at the end of a BPF function, we
71 * reference them via the current ARM_FP register.
72 *
73 * Some eBPF operations are implemented via a call to a helper function.
74 * Such calls are "invisible" in the eBPF code, so it is up to the calling
75 * program to preserve any caller-saved ARM registers during the call. The
76 * JIT emits code to push and pop those registers onto the stack, immediately
77 * above the callee stack frame.
78 */
79#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
80 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
81 1 << ARM_FP)
82#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
83#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
84
85#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3)
86
87enum {
88 /* Stack layout - these are offsets from (top of stack - 4) */
89 BPF_R2_HI,
90 BPF_R2_LO,
91 BPF_R3_HI,
92 BPF_R3_LO,
93 BPF_R4_HI,
94 BPF_R4_LO,
95 BPF_R5_HI,
96 BPF_R5_LO,
97 BPF_R7_HI,
98 BPF_R7_LO,
99 BPF_R8_HI,
100 BPF_R8_LO,
101 BPF_R9_HI,
102 BPF_R9_LO,
103 BPF_FP_HI,
104 BPF_FP_LO,
105 BPF_TC_HI,
106 BPF_TC_LO,
107 BPF_AX_HI,
108 BPF_AX_LO,
109 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
110 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
111 * BPF_REG_FP and Tail call counts.
112 */
113 BPF_JIT_SCRATCH_REGS,
114};
115
116/*
117 * Negative "register" values indicate the register is stored on the stack
118 * and are the offset from the top of the eBPF JIT scratch space.
119 */
120#define STACK_OFFSET(k) (-4 - (k) * 4)
121#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4)
122
123#ifdef CONFIG_FRAME_POINTER
124#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4)
125#else
126#define EBPF_SCRATCH_TO_ARM_FP(x) (x)
127#endif
128
129#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
130#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
131#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
132
133#define FLAG_IMM_OVERFLOW (1 << 0)
134
135/*
136 * Map eBPF registers to ARM 32bit registers or stack scratch space.
137 *
138 * 1. First argument is passed using the arm 32bit registers and rest of the
139 * arguments are passed on stack scratch space.
140 * 2. First callee-saved argument is mapped to arm 32 bit registers and rest
141 * arguments are mapped to scratch space on stack.
142 * 3. We need two 64 bit temp registers to do complex operations on eBPF
143 * registers.
144 *
145 * As the eBPF registers are all 64 bit registers and arm has only 32 bit
146 * registers, we have to map each eBPF registers with two arm 32 bit regs or
147 * scratch memory space and we have to build eBPF 64 bit register from those.
148 *
149 */
150static const s8 bpf2a32[][2] = {
151 /* return value from in-kernel function, and exit value from eBPF */
152 [BPF_REG_0] = {ARM_R1, ARM_R0},
153 /* arguments from eBPF program to in-kernel function */
154 [BPF_REG_1] = {ARM_R3, ARM_R2},
155 /* Stored on stack scratch space */
156 [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)},
157 [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)},
158 [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)},
159 [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)},
160 /* callee saved registers that in-kernel function will preserve */
161 [BPF_REG_6] = {ARM_R5, ARM_R4},
162 /* Stored on stack scratch space */
163 [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)},
164 [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
165 [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
166 /* Read only Frame Pointer to access Stack */
167 [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
168 /* Temporary Register for BPF JIT, can be used
169 * for constant blindings and others.
170 */
171 [TMP_REG_1] = {ARM_R7, ARM_R6},
172 [TMP_REG_2] = {ARM_R9, ARM_R8},
173 /* Tail call count. Stored on stack scratch space. */
174 [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)},
175 /* temporary register for blinding constants.
176 * Stored on stack scratch space.
177 */
178 [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
179};
180
181#define dst_lo dst[1]
182#define dst_hi dst[0]
183#define src_lo src[1]
184#define src_hi src[0]
185
186/*
187 * JIT Context:
188 *
189 * prog : bpf_prog
190 * idx : index of current last JITed instruction.
191 * prologue_bytes : bytes used in prologue.
192 * epilogue_offset : offset of epilogue starting.
193 * offsets : array of eBPF instruction offsets in
194 * JITed code.
195 * target : final JITed code.
196 * epilogue_bytes : no of bytes used in epilogue.
197 * imm_count : no of immediate counts used for global
198 * variables.
199 * imms : array of global variable addresses.
200 */
201
202struct jit_ctx {
203 const struct bpf_prog *prog;
204 unsigned int idx;
205 unsigned int prologue_bytes;
206 unsigned int epilogue_offset;
207 unsigned int cpu_architecture;
208 u32 flags;
209 u32 *offsets;
210 u32 *target;
211 u32 stack_size;
212#if __LINUX_ARM_ARCH__ < 7
213 u16 epilogue_bytes;
214 u16 imm_count;
215 u32 *imms;
216#endif
217};
218
219/*
220 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
221 * (where the assembly routines like __aeabi_uidiv could cause problems).
222 */
223static u32 jit_udiv32(u32 dividend, u32 divisor)
224{
225 return dividend / divisor;
226}
227
228static u32 jit_mod32(u32 dividend, u32 divisor)
229{
230 return dividend % divisor;
231}
232
233static s32 jit_sdiv32(s32 dividend, s32 divisor)
234{
235 return dividend / divisor;
236}
237
238static s32 jit_smod32(s32 dividend, s32 divisor)
239{
240 return dividend % divisor;
241}
242
243/* Wrappers for 64-bit div/mod */
244static u64 jit_udiv64(u64 dividend, u64 divisor)
245{
246 return div64_u64(dividend, divisor);
247}
248
249static u64 jit_mod64(u64 dividend, u64 divisor)
250{
251 u64 rem;
252
253 div64_u64_rem(dividend, divisor, remainder: &rem);
254 return rem;
255}
256
257static s64 jit_sdiv64(s64 dividend, s64 divisor)
258{
259 return div64_s64(dividend, divisor);
260}
261
262static s64 jit_smod64(s64 dividend, s64 divisor)
263{
264 u64 q;
265
266 q = div64_s64(dividend, divisor);
267
268 return dividend - q * divisor;
269}
270
271static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
272{
273 inst |= (cond << 28);
274 inst = __opcode_to_mem_arm(inst);
275
276 if (ctx->target != NULL)
277 ctx->target[ctx->idx] = inst;
278
279 ctx->idx++;
280}
281
282/*
283 * Emit an instruction that will be executed unconditionally.
284 */
285static inline void emit(u32 inst, struct jit_ctx *ctx)
286{
287 _emit(ARM_COND_AL, inst, ctx);
288}
289
290/*
291 * This is rather horrid, but necessary to convert an integer constant
292 * to an immediate operand for the opcodes, and be able to detect at
293 * build time whether the constant can't be converted (iow, usable in
294 * BUILD_BUG_ON()).
295 */
296#define imm12val(v, s) (rol32(v, (s)) | (s) << 7)
297#define const_imm8m(x) \
298 ({ int r; \
299 u32 v = (x); \
300 if (!(v & ~0x000000ff)) \
301 r = imm12val(v, 0); \
302 else if (!(v & ~0xc000003f)) \
303 r = imm12val(v, 2); \
304 else if (!(v & ~0xf000000f)) \
305 r = imm12val(v, 4); \
306 else if (!(v & ~0xfc000003)) \
307 r = imm12val(v, 6); \
308 else if (!(v & ~0xff000000)) \
309 r = imm12val(v, 8); \
310 else if (!(v & ~0x3fc00000)) \
311 r = imm12val(v, 10); \
312 else if (!(v & ~0x0ff00000)) \
313 r = imm12val(v, 12); \
314 else if (!(v & ~0x03fc0000)) \
315 r = imm12val(v, 14); \
316 else if (!(v & ~0x00ff0000)) \
317 r = imm12val(v, 16); \
318 else if (!(v & ~0x003fc000)) \
319 r = imm12val(v, 18); \
320 else if (!(v & ~0x000ff000)) \
321 r = imm12val(v, 20); \
322 else if (!(v & ~0x0003fc00)) \
323 r = imm12val(v, 22); \
324 else if (!(v & ~0x0000ff00)) \
325 r = imm12val(v, 24); \
326 else if (!(v & ~0x00003fc0)) \
327 r = imm12val(v, 26); \
328 else if (!(v & ~0x00000ff0)) \
329 r = imm12val(v, 28); \
330 else if (!(v & ~0x000003fc)) \
331 r = imm12val(v, 30); \
332 else \
333 r = -1; \
334 r; })
335
336/*
337 * Checks if immediate value can be converted to imm12(12 bits) value.
338 */
339static int imm8m(u32 x)
340{
341 u32 rot;
342
343 for (rot = 0; rot < 16; rot++)
344 if ((x & ~ror32(word: 0xff, shift: 2 * rot)) == 0)
345 return rol32(word: x, shift: 2 * rot) | (rot << 8);
346 return -1;
347}
348
349#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x))
350
351static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12)
352{
353 op |= rt << 12 | rn << 16;
354 if (imm12 >= 0)
355 op |= ARM_INST_LDST__U;
356 else
357 imm12 = -imm12;
358 return op | (imm12 & ARM_INST_LDST__IMM12);
359}
360
361static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
362{
363 op |= rt << 12 | rn << 16;
364 if (imm8 >= 0)
365 op |= ARM_INST_LDST__U;
366 else
367 imm8 = -imm8;
368 return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f);
369}
370
371#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off)
372#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off)
373#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
374#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
375
376#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
377#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
378
379#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
380#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
381#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
382#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off)
383
384/*
385 * Initializes the JIT space with undefined instructions.
386 */
387static void jit_fill_hole(void *area, unsigned int size)
388{
389 u32 *ptr;
390 /* We are guaranteed to have aligned memory. */
391 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
392 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
393}
394
395#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
396/* EABI requires the stack to be aligned to 64-bit boundaries */
397#define STACK_ALIGNMENT 8
398#else
399/* Stack must be aligned to 32-bit boundaries */
400#define STACK_ALIGNMENT 4
401#endif
402
403/* total stack size used in JITed code */
404#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
405#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
406
407#if __LINUX_ARM_ARCH__ < 7
408
409static u16 imm_offset(u32 k, struct jit_ctx *ctx)
410{
411 unsigned int i = 0, offset;
412 u16 imm;
413
414 /* on the "fake" run we just count them (duplicates included) */
415 if (ctx->target == NULL) {
416 ctx->imm_count++;
417 return 0;
418 }
419
420 while ((i < ctx->imm_count) && ctx->imms[i]) {
421 if (ctx->imms[i] == k)
422 break;
423 i++;
424 }
425
426 if (ctx->imms[i] == 0)
427 ctx->imms[i] = k;
428
429 /* constants go just after the epilogue */
430 offset = ctx->offsets[ctx->prog->len - 1] * 4;
431 offset += ctx->prologue_bytes;
432 offset += ctx->epilogue_bytes;
433 offset += i * 4;
434
435 ctx->target[offset / 4] = k;
436
437 /* PC in ARM mode == address of the instruction + 8 */
438 imm = offset - (8 + ctx->idx * 4);
439
440 if (imm & ~0xfff) {
441 /*
442 * literal pool is too far, signal it into flags. we
443 * can only detect it on the second pass unfortunately.
444 */
445 ctx->flags |= FLAG_IMM_OVERFLOW;
446 return 0;
447 }
448
449 return imm;
450}
451
452#endif /* __LINUX_ARM_ARCH__ */
453
454static inline int bpf2a32_offset(int bpf_to, int bpf_from,
455 const struct jit_ctx *ctx) {
456 int to, from;
457
458 if (ctx->target == NULL)
459 return 0;
460 to = ctx->offsets[bpf_to];
461 from = ctx->offsets[bpf_from];
462
463 return to - from - 1;
464}
465
466/*
467 * Move an immediate that's not an imm8m to a core register.
468 */
469static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
470{
471#if __LINUX_ARM_ARCH__ < 7
472 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
473#else
474 emit(ARM_MOVW(rd, val & 0xffff), ctx);
475 if (val > 0xffff)
476 emit(ARM_MOVT(rd, val >> 16), ctx);
477#endif
478}
479
480static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
481{
482 int imm12 = imm8m(val);
483
484 if (imm12 >= 0)
485 emit(ARM_MOV_I(rd, imm12), ctx);
486 else
487 emit_mov_i_no8m(rd, val, ctx);
488}
489
490static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
491{
492 if (elf_hwcap & HWCAP_THUMB)
493 emit(ARM_BX(tgt_reg), ctx);
494 else
495 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
496}
497
498static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
499{
500#if __LINUX_ARM_ARCH__ < 5
501 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
502 emit_bx_r(tgt_reg, ctx);
503#else
504 emit(ARM_BLX_R(tgt_reg), ctx);
505#endif
506}
507
508static inline int epilogue_offset(const struct jit_ctx *ctx)
509{
510 int to, from;
511 /* No need for 1st dummy run */
512 if (ctx->target == NULL)
513 return 0;
514 to = ctx->epilogue_offset;
515 from = ctx->idx;
516
517 return to - from - 2;
518}
519
520static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
521{
522 const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
523 const s8 *tmp = bpf2a32[TMP_REG_1];
524 u32 dst;
525
526#if __LINUX_ARM_ARCH__ == 7
527 if (elf_hwcap & HWCAP_IDIVA) {
528 if (op == BPF_DIV) {
529 emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
530 } else {
531 emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
532 emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
533 }
534 return;
535 }
536#endif
537
538 /*
539 * For BPF_ALU | BPF_DIV | BPF_K instructions
540 * As ARM_R1 and ARM_R0 contains 1st argument of bpf
541 * function, we need to save it on caller side to save
542 * it from getting destroyed within callee.
543 * After the return from the callee, we restore ARM_R0
544 * ARM_R1.
545 */
546 if (rn != ARM_R1) {
547 emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
548 emit(ARM_MOV_R(ARM_R1, rn), ctx);
549 }
550 if (rm != ARM_R0) {
551 emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
552 emit(ARM_MOV_R(ARM_R0, rm), ctx);
553 }
554
555 /* Push caller-saved registers on stack */
556 emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
557
558 /* Call appropriate function */
559 if (sign) {
560 if (op == BPF_DIV)
561 dst = (u32)jit_sdiv32;
562 else
563 dst = (u32)jit_smod32;
564 } else {
565 if (op == BPF_DIV)
566 dst = (u32)jit_udiv32;
567 else
568 dst = (u32)jit_mod32;
569 }
570
571 emit_mov_i(ARM_IP, val: dst, ctx);
572 emit_blx_r(ARM_IP, ctx);
573
574 /* Restore caller-saved registers from stack */
575 emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx);
576
577 /* Save return value */
578 if (rd != ARM_R0)
579 emit(ARM_MOV_R(rd, ARM_R0), ctx);
580
581 /* Restore ARM_R0 and ARM_R1 */
582 if (rn != ARM_R1)
583 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
584 if (rm != ARM_R0)
585 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
586}
587
588static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
589 u8 op, u8 sign)
590{
591 u32 dst;
592
593 /* Push caller-saved registers on stack */
594 emit(ARM_PUSH(CALLER_MASK), ctx);
595
596 /*
597 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
598 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
599 * we can recover them later after returning from the function call.
600 */
601 if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
602 /*
603 * Move Rm to {R1, R0} if it is not already there.
604 */
605 if (rm[1] != ARM_R0) {
606 if (rn[1] == ARM_R0)
607 emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
608 emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
609 emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
610 if (rn[1] == ARM_R0) {
611 emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
612 goto cont;
613 }
614 }
615 /*
616 * Move Rn to {R3, R2} if it is not already there.
617 */
618 if (rn[1] != ARM_R2) {
619 emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
620 emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
621 }
622 }
623
624cont:
625
626 /* Call appropriate function */
627 if (sign) {
628 if (op == BPF_DIV)
629 dst = (u32)jit_sdiv64;
630 else
631 dst = (u32)jit_smod64;
632 } else {
633 if (op == BPF_DIV)
634 dst = (u32)jit_udiv64;
635 else
636 dst = (u32)jit_mod64;
637 }
638
639 emit_mov_i(ARM_IP, val: dst, ctx);
640 emit_blx_r(ARM_IP, ctx);
641
642 /* Save return value */
643 if (rd[1] != ARM_R0) {
644 emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
645 emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
646 }
647
648 /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
649 if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
650 emit(ARM_POP(CALLER_MASK), ctx);
651 } else if (rd[1] != ARM_R0) {
652 emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
653 emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
654 } else {
655 emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
656 emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
657 }
658}
659
660/* Is the translated BPF register on stack? */
661static bool is_stacked(s8 reg)
662{
663 return reg < 0;
664}
665
666/* If a BPF register is on the stack (stk is true), load it to the
667 * supplied temporary register and return the temporary register
668 * for subsequent operations, otherwise just use the CPU register.
669 */
670static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx)
671{
672 if (is_stacked(reg)) {
673 emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
674 reg = tmp;
675 }
676 return reg;
677}
678
679static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp,
680 struct jit_ctx *ctx)
681{
682 if (is_stacked(reg: reg[1])) {
683 if (__LINUX_ARM_ARCH__ >= 6 ||
684 ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
685 emit(ARM_LDRD_I(tmp[1], ARM_FP,
686 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
687 } else {
688 emit(ARM_LDR_I(tmp[1], ARM_FP,
689 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
690 emit(ARM_LDR_I(tmp[0], ARM_FP,
691 EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
692 }
693 reg = tmp;
694 }
695 return reg;
696}
697
698/* If a BPF register is on the stack (stk is true), save the register
699 * back to the stack. If the source register is not the same, then
700 * move it into the correct register.
701 */
702static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx)
703{
704 if (is_stacked(reg))
705 emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
706 else if (reg != src)
707 emit(ARM_MOV_R(reg, src), ctx);
708}
709
710static void arm_bpf_put_reg64(const s8 *reg, const s8 *src,
711 struct jit_ctx *ctx)
712{
713 if (is_stacked(reg: reg[1])) {
714 if (__LINUX_ARM_ARCH__ >= 6 ||
715 ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
716 emit(ARM_STRD_I(src[1], ARM_FP,
717 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
718 } else {
719 emit(ARM_STR_I(src[1], ARM_FP,
720 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
721 emit(ARM_STR_I(src[0], ARM_FP,
722 EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
723 }
724 } else {
725 if (reg[1] != src[1])
726 emit(ARM_MOV_R(reg[1], src[1]), ctx);
727 if (reg[0] != src[0])
728 emit(ARM_MOV_R(reg[0], src[0]), ctx);
729 }
730}
731
732static inline void emit_a32_mov_i(const s8 dst, const u32 val,
733 struct jit_ctx *ctx)
734{
735 const s8 *tmp = bpf2a32[TMP_REG_1];
736
737 if (is_stacked(reg: dst)) {
738 emit_mov_i(rd: tmp[1], val, ctx);
739 arm_bpf_put_reg32(reg: dst, src: tmp[1], ctx);
740 } else {
741 emit_mov_i(rd: dst, val, ctx);
742 }
743}
744
745static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
746{
747 const s8 *tmp = bpf2a32[TMP_REG_1];
748 const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
749
750 emit_mov_i(rd: rd[1], val: (u32)val, ctx);
751 emit_mov_i(rd: rd[0], val: val >> 32, ctx);
752
753 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
754}
755
756/* Sign extended move */
757static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
758 const u32 val, struct jit_ctx *ctx) {
759 u64 val64 = val;
760
761 if (is64 && (val & (1<<31)))
762 val64 |= 0xffffffff00000000ULL;
763 emit_a32_mov_i64(dst, val: val64, ctx);
764}
765
766static inline void emit_a32_add_r(const u8 dst, const u8 src,
767 const bool is64, const bool hi,
768 struct jit_ctx *ctx) {
769 /* 64 bit :
770 * adds dst_lo, dst_lo, src_lo
771 * adc dst_hi, dst_hi, src_hi
772 * 32 bit :
773 * add dst_lo, dst_lo, src_lo
774 */
775 if (!hi && is64)
776 emit(ARM_ADDS_R(dst, dst, src), ctx);
777 else if (hi && is64)
778 emit(ARM_ADC_R(dst, dst, src), ctx);
779 else
780 emit(ARM_ADD_R(dst, dst, src), ctx);
781}
782
783static inline void emit_a32_sub_r(const u8 dst, const u8 src,
784 const bool is64, const bool hi,
785 struct jit_ctx *ctx) {
786 /* 64 bit :
787 * subs dst_lo, dst_lo, src_lo
788 * sbc dst_hi, dst_hi, src_hi
789 * 32 bit :
790 * sub dst_lo, dst_lo, src_lo
791 */
792 if (!hi && is64)
793 emit(ARM_SUBS_R(dst, dst, src), ctx);
794 else if (hi && is64)
795 emit(ARM_SBC_R(dst, dst, src), ctx);
796 else
797 emit(ARM_SUB_R(dst, dst, src), ctx);
798}
799
800static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
801 const bool hi, const u8 op, struct jit_ctx *ctx){
802 switch (BPF_OP(op)) {
803 /* dst = dst + src */
804 case BPF_ADD:
805 emit_a32_add_r(dst, src, is64, hi, ctx);
806 break;
807 /* dst = dst - src */
808 case BPF_SUB:
809 emit_a32_sub_r(dst, src, is64, hi, ctx);
810 break;
811 /* dst = dst | src */
812 case BPF_OR:
813 emit(ARM_ORR_R(dst, dst, src), ctx);
814 break;
815 /* dst = dst & src */
816 case BPF_AND:
817 emit(ARM_AND_R(dst, dst, src), ctx);
818 break;
819 /* dst = dst ^ src */
820 case BPF_XOR:
821 emit(ARM_EOR_R(dst, dst, src), ctx);
822 break;
823 /* dst = dst * src */
824 case BPF_MUL:
825 emit(ARM_MUL(dst, dst, src), ctx);
826 break;
827 /* dst = dst << src */
828 case BPF_LSH:
829 emit(ARM_LSL_R(dst, dst, src), ctx);
830 break;
831 /* dst = dst >> src */
832 case BPF_RSH:
833 emit(ARM_LSR_R(dst, dst, src), ctx);
834 break;
835 /* dst = dst >> src (signed)*/
836 case BPF_ARSH:
837 emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
838 break;
839 }
840}
841
842/* ALU operation (64 bit) */
843static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
844 const s8 src[], struct jit_ctx *ctx,
845 const u8 op) {
846 const s8 *tmp = bpf2a32[TMP_REG_1];
847 const s8 *tmp2 = bpf2a32[TMP_REG_2];
848 const s8 *rd;
849
850 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
851 if (is64) {
852 const s8 *rs;
853
854 rs = arm_bpf_get_reg64(reg: src, tmp: tmp2, ctx);
855
856 /* ALU operation */
857 emit_alu_r(dst: rd[1], src: rs[1], is64: true, hi: false, op, ctx);
858 emit_alu_r(dst: rd[0], src: rs[0], is64: true, hi: true, op, ctx);
859 } else {
860 s8 rs;
861
862 rs = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
863
864 /* ALU operation */
865 emit_alu_r(dst: rd[1], src: rs, is64: true, hi: false, op, ctx);
866 if (!ctx->prog->aux->verifier_zext)
867 emit_a32_mov_i(dst: rd[0], val: 0, ctx);
868 }
869
870 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
871}
872
873/* dst = src (4 bytes)*/
874static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
875 struct jit_ctx *ctx) {
876 const s8 *tmp = bpf2a32[TMP_REG_1];
877 s8 rt;
878
879 rt = arm_bpf_get_reg32(reg: src, tmp: tmp[0], ctx);
880 if (off && off != 32) {
881 emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
882 emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
883 }
884 arm_bpf_put_reg32(reg: dst, src: rt, ctx);
885}
886
887/* dst = src */
888static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
889 const s8 src[],
890 struct jit_ctx *ctx) {
891 if (!is64) {
892 emit_a32_mov_r(dst_lo, src_lo, off: 0, ctx);
893 if (!ctx->prog->aux->verifier_zext)
894 /* Zero out high 4 bytes */
895 emit_a32_mov_i(dst_hi, val: 0, ctx);
896 } else if (__LINUX_ARM_ARCH__ < 6 &&
897 ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
898 /* complete 8 byte move */
899 emit_a32_mov_r(dst_lo, src_lo, off: 0, ctx);
900 emit_a32_mov_r(dst_hi, src_hi, off: 0, ctx);
901 } else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
902 const u8 *tmp = bpf2a32[TMP_REG_1];
903
904 emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
905 emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
906 } else if (is_stacked(src_lo)) {
907 emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
908 } else if (is_stacked(dst_lo)) {
909 emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
910 } else {
911 emit(ARM_MOV_R(dst[0], src[0]), ctx);
912 emit(ARM_MOV_R(dst[1], src[1]), ctx);
913 }
914}
915
916/* dst = (signed)src */
917static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
918 struct jit_ctx *ctx) {
919 const s8 *tmp = bpf2a32[TMP_REG_1];
920 const s8 *rt;
921
922 rt = arm_bpf_get_reg64(reg: dst, tmp, ctx);
923
924 emit_a32_mov_r(dst_lo, src_lo, off, ctx);
925 if (!is64) {
926 if (!ctx->prog->aux->verifier_zext)
927 /* Zero out high 4 bytes */
928 emit_a32_mov_i(dst_hi, val: 0, ctx);
929 } else {
930 emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
931 }
932}
933
934/* Shift operations */
935static inline void emit_a32_alu_i(const s8 dst, const u32 val,
936 struct jit_ctx *ctx, const u8 op) {
937 const s8 *tmp = bpf2a32[TMP_REG_1];
938 s8 rd;
939
940 rd = arm_bpf_get_reg32(reg: dst, tmp: tmp[0], ctx);
941
942 /* Do shift operation */
943 switch (op) {
944 case BPF_LSH:
945 emit(ARM_LSL_I(rd, rd, val), ctx);
946 break;
947 case BPF_RSH:
948 emit(ARM_LSR_I(rd, rd, val), ctx);
949 break;
950 case BPF_ARSH:
951 emit(ARM_ASR_I(rd, rd, val), ctx);
952 break;
953 case BPF_NEG:
954 emit(ARM_RSB_I(rd, rd, val), ctx);
955 break;
956 }
957
958 arm_bpf_put_reg32(reg: dst, src: rd, ctx);
959}
960
961/* dst = ~dst (64 bit) */
962static inline void emit_a32_neg64(const s8 dst[],
963 struct jit_ctx *ctx){
964 const s8 *tmp = bpf2a32[TMP_REG_1];
965 const s8 *rd;
966
967 /* Setup Operand */
968 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
969
970 /* Do Negate Operation */
971 emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx);
972 emit(ARM_RSC_I(rd[0], rd[0], 0), ctx);
973
974 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
975}
976
977/* dst = dst << src */
978static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[],
979 struct jit_ctx *ctx) {
980 const s8 *tmp = bpf2a32[TMP_REG_1];
981 const s8 *tmp2 = bpf2a32[TMP_REG_2];
982 const s8 *rd;
983 s8 rt;
984
985 /* Setup Operands */
986 rt = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
987 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
988
989 /* Do LSH operation */
990 emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
991 emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
992 emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx);
993 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx);
994 emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx);
995 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx);
996
997 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
998 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
999}
1000
1001/* dst = dst >> src (signed)*/
1002static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
1003 struct jit_ctx *ctx) {
1004 const s8 *tmp = bpf2a32[TMP_REG_1];
1005 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1006 const s8 *rd;
1007 s8 rt;
1008
1009 /* Setup Operands */
1010 rt = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
1011 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1012
1013 /* Do the ARSH operation */
1014 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
1015 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
1016 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
1017 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
1018 _emit(ARM_COND_PL,
1019 ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
1020 emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
1021
1022 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
1023 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
1024}
1025
1026/* dst = dst >> src */
1027static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[],
1028 struct jit_ctx *ctx) {
1029 const s8 *tmp = bpf2a32[TMP_REG_1];
1030 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1031 const s8 *rd;
1032 s8 rt;
1033
1034 /* Setup Operands */
1035 rt = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
1036 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1037
1038 /* Do RSH operation */
1039 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
1040 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
1041 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
1042 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
1043 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx);
1044 emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx);
1045
1046 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
1047 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
1048}
1049
1050/* dst = dst << val */
1051static inline void emit_a32_lsh_i64(const s8 dst[],
1052 const u32 val, struct jit_ctx *ctx){
1053 const s8 *tmp = bpf2a32[TMP_REG_1];
1054 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1055 const s8 *rd;
1056
1057 /* Setup operands */
1058 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1059
1060 /* Do LSH operation */
1061 if (val < 32) {
1062 emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx);
1063 emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx);
1064 emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx);
1065 } else {
1066 if (val == 32)
1067 emit(ARM_MOV_R(rd[0], rd[1]), ctx);
1068 else
1069 emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx);
1070 emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx);
1071 }
1072
1073 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1074}
1075
1076/* dst = dst >> val */
1077static inline void emit_a32_rsh_i64(const s8 dst[],
1078 const u32 val, struct jit_ctx *ctx) {
1079 const s8 *tmp = bpf2a32[TMP_REG_1];
1080 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1081 const s8 *rd;
1082
1083 /* Setup operands */
1084 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1085
1086 /* Do LSR operation */
1087 if (val == 0) {
1088 /* An immediate value of 0 encodes a shift amount of 32
1089 * for LSR. To shift by 0, don't do anything.
1090 */
1091 } else if (val < 32) {
1092 emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
1093 emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
1094 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
1095 } else if (val == 32) {
1096 emit(ARM_MOV_R(rd[1], rd[0]), ctx);
1097 emit(ARM_MOV_I(rd[0], 0), ctx);
1098 } else {
1099 emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx);
1100 emit(ARM_MOV_I(rd[0], 0), ctx);
1101 }
1102
1103 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1104}
1105
1106/* dst = dst >> val (signed) */
1107static inline void emit_a32_arsh_i64(const s8 dst[],
1108 const u32 val, struct jit_ctx *ctx){
1109 const s8 *tmp = bpf2a32[TMP_REG_1];
1110 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1111 const s8 *rd;
1112
1113 /* Setup operands */
1114 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1115
1116 /* Do ARSH operation */
1117 if (val == 0) {
1118 /* An immediate value of 0 encodes a shift amount of 32
1119 * for ASR. To shift by 0, don't do anything.
1120 */
1121 } else if (val < 32) {
1122 emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
1123 emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
1124 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
1125 } else if (val == 32) {
1126 emit(ARM_MOV_R(rd[1], rd[0]), ctx);
1127 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
1128 } else {
1129 emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx);
1130 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
1131 }
1132
1133 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1134}
1135
1136static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
1137 struct jit_ctx *ctx) {
1138 const s8 *tmp = bpf2a32[TMP_REG_1];
1139 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1140 const s8 *rd, *rt;
1141
1142 /* Setup operands for multiplication */
1143 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1144 rt = arm_bpf_get_reg64(reg: src, tmp: tmp2, ctx);
1145
1146 /* Do Multiplication */
1147 emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx);
1148 emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx);
1149 emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
1150
1151 emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx);
1152 emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx);
1153
1154 arm_bpf_put_reg32(dst_lo, ARM_IP, ctx);
1155 arm_bpf_put_reg32(dst_hi, src: rd[0], ctx);
1156}
1157
1158static bool is_ldst_imm(s16 off, const u8 size)
1159{
1160 s16 off_max = 0;
1161
1162 switch (size) {
1163 case BPF_B:
1164 case BPF_W:
1165 off_max = 0xfff;
1166 break;
1167 case BPF_H:
1168 off_max = 0xff;
1169 break;
1170 case BPF_DW:
1171 /* Need to make sure off+4 does not overflow. */
1172 off_max = 0xfff - 4;
1173 break;
1174 }
1175 return -off_max <= off && off <= off_max;
1176}
1177
1178static bool is_ldst_imm8(s16 off, const u8 size)
1179{
1180 s16 off_max = 0;
1181
1182 switch (size) {
1183 case BPF_B:
1184 off_max = 0xff;
1185 break;
1186 case BPF_W:
1187 off_max = 0xfff;
1188 break;
1189 case BPF_H:
1190 off_max = 0xff;
1191 break;
1192 }
1193 return -off_max <= off && off <= off_max;
1194}
1195
1196/* *(size *)(dst + off) = src */
1197static inline void emit_str_r(const s8 dst, const s8 src[],
1198 s16 off, struct jit_ctx *ctx, const u8 sz){
1199 const s8 *tmp = bpf2a32[TMP_REG_1];
1200 s8 rd;
1201
1202 rd = arm_bpf_get_reg32(reg: dst, tmp: tmp[1], ctx);
1203
1204 if (!is_ldst_imm(off, size: sz)) {
1205 emit_a32_mov_i(dst: tmp[0], val: off, ctx);
1206 emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
1207 rd = tmp[0];
1208 off = 0;
1209 }
1210 switch (sz) {
1211 case BPF_B:
1212 /* Store a Byte */
1213 emit(ARM_STRB_I(src_lo, rd, off), ctx);
1214 break;
1215 case BPF_H:
1216 /* Store a HalfWord */
1217 emit(ARM_STRH_I(src_lo, rd, off), ctx);
1218 break;
1219 case BPF_W:
1220 /* Store a Word */
1221 emit(ARM_STR_I(src_lo, rd, off), ctx);
1222 break;
1223 case BPF_DW:
1224 /* Store a Double Word */
1225 emit(ARM_STR_I(src_lo, rd, off), ctx);
1226 emit(ARM_STR_I(src_hi, rd, off + 4), ctx);
1227 break;
1228 }
1229}
1230
1231/* dst = *(size*)(src + off) */
1232static inline void emit_ldx_r(const s8 dst[], const s8 src,
1233 s16 off, struct jit_ctx *ctx, const u8 sz){
1234 const s8 *tmp = bpf2a32[TMP_REG_1];
1235 const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
1236 s8 rm = src;
1237
1238 if (!is_ldst_imm(off, size: sz)) {
1239 emit_a32_mov_i(dst: tmp[0], val: off, ctx);
1240 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
1241 rm = tmp[0];
1242 off = 0;
1243 } else if (rd[1] == rm) {
1244 emit(ARM_MOV_R(tmp[0], rm), ctx);
1245 rm = tmp[0];
1246 }
1247 switch (sz) {
1248 case BPF_B:
1249 /* Load a Byte */
1250 emit(ARM_LDRB_I(rd[1], rm, off), ctx);
1251 if (!ctx->prog->aux->verifier_zext)
1252 emit_a32_mov_i(dst: rd[0], val: 0, ctx);
1253 break;
1254 case BPF_H:
1255 /* Load a HalfWord */
1256 emit(ARM_LDRH_I(rd[1], rm, off), ctx);
1257 if (!ctx->prog->aux->verifier_zext)
1258 emit_a32_mov_i(dst: rd[0], val: 0, ctx);
1259 break;
1260 case BPF_W:
1261 /* Load a Word */
1262 emit(ARM_LDR_I(rd[1], rm, off), ctx);
1263 if (!ctx->prog->aux->verifier_zext)
1264 emit_a32_mov_i(dst: rd[0], val: 0, ctx);
1265 break;
1266 case BPF_DW:
1267 /* Load a Double Word */
1268 emit(ARM_LDR_I(rd[1], rm, off), ctx);
1269 emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
1270 break;
1271 }
1272 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1273}
1274
1275/* dst = *(signed size*)(src + off) */
1276static inline void emit_ldsx_r(const s8 dst[], const s8 src,
1277 s16 off, struct jit_ctx *ctx, const u8 sz){
1278 const s8 *tmp = bpf2a32[TMP_REG_1];
1279 const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
1280 s8 rm = src;
1281 int add_off;
1282
1283 if (!is_ldst_imm8(off, size: sz)) {
1284 /*
1285 * offset does not fit in the load/store immediate,
1286 * construct an ADD instruction to apply the offset.
1287 */
1288 add_off = imm8m(off);
1289 if (add_off > 0) {
1290 emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
1291 rm = tmp[0];
1292 } else {
1293 emit_a32_mov_i(dst: tmp[0], val: off, ctx);
1294 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
1295 rm = tmp[0];
1296 }
1297 off = 0;
1298 }
1299
1300 switch (sz) {
1301 case BPF_B:
1302 /* Load a Byte with sign extension*/
1303 emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
1304 break;
1305 case BPF_H:
1306 /* Load a HalfWord with sign extension*/
1307 emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
1308 break;
1309 case BPF_W:
1310 /* Load a Word*/
1311 emit(ARM_LDR_I(rd[1], rm, off), ctx);
1312 break;
1313 }
1314 /* Carry the sign extension to upper 32 bits */
1315 emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
1316 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1317}
1318
1319/* Arithmatic Operation */
1320static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
1321 const u8 rn, struct jit_ctx *ctx, u8 op,
1322 bool is_jmp64) {
1323 switch (op) {
1324 case BPF_JSET:
1325 if (is_jmp64) {
1326 emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
1327 emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
1328 emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
1329 } else {
1330 emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
1331 }
1332 break;
1333 case BPF_JEQ:
1334 case BPF_JNE:
1335 case BPF_JGT:
1336 case BPF_JGE:
1337 case BPF_JLE:
1338 case BPF_JLT:
1339 if (is_jmp64) {
1340 emit(ARM_CMP_R(rd, rm), ctx);
1341 /* Only compare low halve if high halve are equal. */
1342 _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
1343 } else {
1344 emit(ARM_CMP_R(rt, rn), ctx);
1345 }
1346 break;
1347 case BPF_JSLE:
1348 case BPF_JSGT:
1349 emit(ARM_CMP_R(rn, rt), ctx);
1350 if (is_jmp64)
1351 emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
1352 break;
1353 case BPF_JSLT:
1354 case BPF_JSGE:
1355 emit(ARM_CMP_R(rt, rn), ctx);
1356 if (is_jmp64)
1357 emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
1358 break;
1359 }
1360}
1361
1362static int out_offset = -1; /* initialized on the first pass of build_body() */
1363static int emit_bpf_tail_call(struct jit_ctx *ctx)
1364{
1365
1366 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
1367 const s8 *r2 = bpf2a32[BPF_REG_2];
1368 const s8 *r3 = bpf2a32[BPF_REG_3];
1369 const s8 *tmp = bpf2a32[TMP_REG_1];
1370 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1371 const s8 *tcc = bpf2a32[TCALL_CNT];
1372 const s8 *tc;
1373 const int idx0 = ctx->idx;
1374#define cur_offset (ctx->idx - idx0)
1375#define jmp_offset (out_offset - (cur_offset) - 2)
1376 u32 lo, hi;
1377 s8 r_array, r_index;
1378 int off;
1379
1380 /* if (index >= array->map.max_entries)
1381 * goto out;
1382 */
1383 BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) >
1384 ARM_INST_LDST__IMM12);
1385 off = offsetof(struct bpf_array, map.max_entries);
1386 r_array = arm_bpf_get_reg32(reg: r2[1], tmp: tmp2[0], ctx);
1387 /* index is 32-bit for arrays */
1388 r_index = arm_bpf_get_reg32(reg: r3[1], tmp: tmp2[1], ctx);
1389 /* array->map.max_entries */
1390 emit(ARM_LDR_I(tmp[1], r_array, off), ctx);
1391 /* index >= array->map.max_entries */
1392 emit(ARM_CMP_R(r_index, tmp[1]), ctx);
1393 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
1394
1395 /* tmp2[0] = array, tmp2[1] = index */
1396
1397 /*
1398 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
1399 * goto out;
1400 * tail_call_cnt++;
1401 */
1402 lo = (u32)MAX_TAIL_CALL_CNT;
1403 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1404 tc = arm_bpf_get_reg64(reg: tcc, tmp, ctx);
1405 emit(ARM_CMP_I(tc[0], hi), ctx);
1406 _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
1407 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
1408 emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
1409 emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
1410 arm_bpf_put_reg64(reg: tcc, src: tmp, ctx);
1411
1412 /* prog = array->ptrs[index]
1413 * if (prog == NULL)
1414 * goto out;
1415 */
1416 BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0);
1417 off = imm8m(offsetof(struct bpf_array, ptrs));
1418 emit(ARM_ADD_I(tmp[1], r_array, off), ctx);
1419 emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx);
1420 emit(ARM_CMP_I(tmp[1], 0), ctx);
1421 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
1422
1423 /* goto *(prog->bpf_func + prologue_size); */
1424 BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) >
1425 ARM_INST_LDST__IMM12);
1426 off = offsetof(struct bpf_prog, bpf_func);
1427 emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx);
1428 emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
1429 emit_bx_r(tgt_reg: tmp[1], ctx);
1430
1431 /* out: */
1432 if (out_offset == -1)
1433 out_offset = cur_offset;
1434 if (cur_offset != out_offset) {
1435 pr_err_once("tail_call out_offset = %d, expected %d!\n",
1436 cur_offset, out_offset);
1437 return -1;
1438 }
1439 return 0;
1440#undef cur_offset
1441#undef jmp_offset
1442}
1443
1444/* 0xabcd => 0xcdab */
1445static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
1446{
1447#if __LINUX_ARM_ARCH__ < 6
1448 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1449
1450 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
1451 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
1452 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
1453 emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
1454#else /* ARMv6+ */
1455 emit(ARM_REV16(rd, rn), ctx);
1456#endif
1457}
1458
1459/* 0xabcdefgh => 0xghefcdab */
1460static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
1461{
1462#if __LINUX_ARM_ARCH__ < 6
1463 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1464
1465 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
1466 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
1467 emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
1468
1469 emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
1470 emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
1471 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
1472 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
1473 emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
1474 emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
1475 emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
1476
1477#else /* ARMv6+ */
1478 emit(ARM_REV(rd, rn), ctx);
1479#endif
1480}
1481
1482// push the scratch stack register on top of the stack
1483static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
1484{
1485 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1486 const s8 *rt;
1487 u16 reg_set = 0;
1488
1489 rt = arm_bpf_get_reg64(reg: src, tmp: tmp2, ctx);
1490
1491 reg_set = (1 << rt[1]) | (1 << rt[0]);
1492 emit(ARM_PUSH(reg_set), ctx);
1493}
1494
1495static void build_prologue(struct jit_ctx *ctx)
1496{
1497 const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
1498 const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
1499 const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
1500 const s8 *tcc = bpf2a32[TCALL_CNT];
1501
1502 /* Save callee saved registers. */
1503#ifdef CONFIG_FRAME_POINTER
1504 u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
1505 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
1506 emit(ARM_PUSH(reg_set), ctx);
1507 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
1508#else
1509 emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
1510 emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
1511#endif
1512 /* mov r3, #0 */
1513 /* sub r2, sp, #SCRATCH_SIZE */
1514 emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
1515 emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
1516
1517 ctx->stack_size = imm8m(STACK_SIZE);
1518
1519 /* Set up function call stack */
1520 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
1521
1522 /* Set up BPF prog stack base register */
1523 emit_a32_mov_r64(is64: true, dst: bpf_fp, src: bpf_r1, ctx);
1524
1525 /* Initialize Tail Count */
1526 emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
1527 emit_a32_mov_r64(is64: true, dst: tcc, src: bpf_r1, ctx);
1528
1529 /* Move BPF_CTX to BPF_R1 */
1530 emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
1531
1532 /* end of prologue */
1533}
1534
1535/* restore callee saved registers. */
1536static void build_epilogue(struct jit_ctx *ctx)
1537{
1538#ifdef CONFIG_FRAME_POINTER
1539 /* When using frame pointers, some additional registers need to
1540 * be loaded. */
1541 u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
1542 emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
1543 emit(ARM_LDM(ARM_SP, reg_set), ctx);
1544#else
1545 /* Restore callee saved registers. */
1546 emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
1547 emit(ARM_POP(CALLEE_POP_MASK), ctx);
1548#endif
1549}
1550
1551/*
1552 * Convert an eBPF instruction to native instruction, i.e
1553 * JITs an eBPF instruction.
1554 * Returns :
1555 * 0 - Successfully JITed an 8-byte eBPF instruction
1556 * >0 - Successfully JITed a 16-byte eBPF instruction
1557 * <0 - Failed to JIT.
1558 */
1559static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1560{
1561 const u8 code = insn->code;
1562 const s8 *dst = bpf2a32[insn->dst_reg];
1563 const s8 *src = bpf2a32[insn->src_reg];
1564 const s8 *tmp = bpf2a32[TMP_REG_1];
1565 const s8 *tmp2 = bpf2a32[TMP_REG_2];
1566 const s16 off = insn->off;
1567 const s32 imm = insn->imm;
1568 const int i = insn - ctx->prog->insnsi;
1569 const bool is64 = BPF_CLASS(code) == BPF_ALU64;
1570 const s8 *rd, *rs;
1571 s8 rd_lo, rt, rm, rn;
1572 s32 jmp_offset;
1573
1574#define check_imm(bits, imm) do { \
1575 if ((imm) >= (1 << ((bits) - 1)) || \
1576 (imm) < -(1 << ((bits) - 1))) { \
1577 pr_info("[%2d] imm=%d(0x%x) out of range\n", \
1578 i, imm, imm); \
1579 return -EINVAL; \
1580 } \
1581} while (0)
1582#define check_imm24(imm) check_imm(24, imm)
1583
1584 switch (code) {
1585 /* ALU operations */
1586
1587 /* dst = src */
1588 case BPF_ALU | BPF_MOV | BPF_K:
1589 case BPF_ALU | BPF_MOV | BPF_X:
1590 case BPF_ALU64 | BPF_MOV | BPF_K:
1591 case BPF_ALU64 | BPF_MOV | BPF_X:
1592 switch (BPF_SRC(code)) {
1593 case BPF_X:
1594 if (imm == 1) {
1595 /* Special mov32 for zext */
1596 emit_a32_mov_i(dst_hi, val: 0, ctx);
1597 break;
1598 }
1599 if (insn->off)
1600 emit_a32_movsx_r64(is64, off: insn->off, dst, src, ctx);
1601 else
1602 emit_a32_mov_r64(is64, dst, src, ctx);
1603 break;
1604 case BPF_K:
1605 /* Sign-extend immediate value to destination reg */
1606 emit_a32_mov_se_i64(is64, dst, val: imm, ctx);
1607 break;
1608 }
1609 break;
1610 /* dst = dst + src/imm */
1611 /* dst = dst - src/imm */
1612 /* dst = dst | src/imm */
1613 /* dst = dst & src/imm */
1614 /* dst = dst ^ src/imm */
1615 /* dst = dst * src/imm */
1616 /* dst = dst << src */
1617 /* dst = dst >> src */
1618 case BPF_ALU | BPF_ADD | BPF_K:
1619 case BPF_ALU | BPF_ADD | BPF_X:
1620 case BPF_ALU | BPF_SUB | BPF_K:
1621 case BPF_ALU | BPF_SUB | BPF_X:
1622 case BPF_ALU | BPF_OR | BPF_K:
1623 case BPF_ALU | BPF_OR | BPF_X:
1624 case BPF_ALU | BPF_AND | BPF_K:
1625 case BPF_ALU | BPF_AND | BPF_X:
1626 case BPF_ALU | BPF_XOR | BPF_K:
1627 case BPF_ALU | BPF_XOR | BPF_X:
1628 case BPF_ALU | BPF_MUL | BPF_K:
1629 case BPF_ALU | BPF_MUL | BPF_X:
1630 case BPF_ALU | BPF_LSH | BPF_X:
1631 case BPF_ALU | BPF_RSH | BPF_X:
1632 case BPF_ALU | BPF_ARSH | BPF_X:
1633 case BPF_ALU64 | BPF_ADD | BPF_K:
1634 case BPF_ALU64 | BPF_ADD | BPF_X:
1635 case BPF_ALU64 | BPF_SUB | BPF_K:
1636 case BPF_ALU64 | BPF_SUB | BPF_X:
1637 case BPF_ALU64 | BPF_OR | BPF_K:
1638 case BPF_ALU64 | BPF_OR | BPF_X:
1639 case BPF_ALU64 | BPF_AND | BPF_K:
1640 case BPF_ALU64 | BPF_AND | BPF_X:
1641 case BPF_ALU64 | BPF_XOR | BPF_K:
1642 case BPF_ALU64 | BPF_XOR | BPF_X:
1643 switch (BPF_SRC(code)) {
1644 case BPF_X:
1645 emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code));
1646 break;
1647 case BPF_K:
1648 /* Move immediate value to the temporary register
1649 * and then do the ALU operation on the temporary
1650 * register as this will sign-extend the immediate
1651 * value into temporary reg and then it would be
1652 * safe to do the operation on it.
1653 */
1654 emit_a32_mov_se_i64(is64, dst: tmp2, val: imm, ctx);
1655 emit_a32_alu_r64(is64, dst, src: tmp2, ctx, BPF_OP(code));
1656 break;
1657 }
1658 break;
1659 /* dst = dst / src(imm) */
1660 /* dst = dst % src(imm) */
1661 case BPF_ALU | BPF_DIV | BPF_K:
1662 case BPF_ALU | BPF_DIV | BPF_X:
1663 case BPF_ALU | BPF_MOD | BPF_K:
1664 case BPF_ALU | BPF_MOD | BPF_X:
1665 rd_lo = arm_bpf_get_reg32(dst_lo, tmp: tmp2[1], ctx);
1666 switch (BPF_SRC(code)) {
1667 case BPF_X:
1668 rt = arm_bpf_get_reg32(src_lo, tmp: tmp2[0], ctx);
1669 break;
1670 case BPF_K:
1671 rt = tmp2[0];
1672 emit_a32_mov_i(dst: rt, val: imm, ctx);
1673 break;
1674 default:
1675 rt = src_lo;
1676 break;
1677 }
1678 emit_udivmod(rd: rd_lo, rm: rd_lo, rn: rt, ctx, BPF_OP(code), sign: off);
1679 arm_bpf_put_reg32(dst_lo, src: rd_lo, ctx);
1680 if (!ctx->prog->aux->verifier_zext)
1681 emit_a32_mov_i(dst_hi, val: 0, ctx);
1682 break;
1683 case BPF_ALU64 | BPF_DIV | BPF_K:
1684 case BPF_ALU64 | BPF_DIV | BPF_X:
1685 case BPF_ALU64 | BPF_MOD | BPF_K:
1686 case BPF_ALU64 | BPF_MOD | BPF_X:
1687 rd = arm_bpf_get_reg64(reg: dst, tmp: tmp2, ctx);
1688 switch (BPF_SRC(code)) {
1689 case BPF_X:
1690 rs = arm_bpf_get_reg64(reg: src, tmp, ctx);
1691 break;
1692 case BPF_K:
1693 rs = tmp;
1694 emit_a32_mov_se_i64(is64, dst: rs, val: imm, ctx);
1695 break;
1696 }
1697 emit_udivmod64(rd, rm: rd, rn: rs, ctx, BPF_OP(code), sign: off);
1698 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1699 break;
1700 /* dst = dst << imm */
1701 /* dst = dst >> imm */
1702 /* dst = dst >> imm (signed) */
1703 case BPF_ALU | BPF_LSH | BPF_K:
1704 case BPF_ALU | BPF_RSH | BPF_K:
1705 case BPF_ALU | BPF_ARSH | BPF_K:
1706 if (unlikely(imm > 31))
1707 return -EINVAL;
1708 if (imm)
1709 emit_a32_alu_i(dst_lo, val: imm, ctx, BPF_OP(code));
1710 if (!ctx->prog->aux->verifier_zext)
1711 emit_a32_mov_i(dst_hi, val: 0, ctx);
1712 break;
1713 /* dst = dst << imm */
1714 case BPF_ALU64 | BPF_LSH | BPF_K:
1715 if (unlikely(imm > 63))
1716 return -EINVAL;
1717 emit_a32_lsh_i64(dst, val: imm, ctx);
1718 break;
1719 /* dst = dst >> imm */
1720 case BPF_ALU64 | BPF_RSH | BPF_K:
1721 if (unlikely(imm > 63))
1722 return -EINVAL;
1723 emit_a32_rsh_i64(dst, val: imm, ctx);
1724 break;
1725 /* dst = dst << src */
1726 case BPF_ALU64 | BPF_LSH | BPF_X:
1727 emit_a32_lsh_r64(dst, src, ctx);
1728 break;
1729 /* dst = dst >> src */
1730 case BPF_ALU64 | BPF_RSH | BPF_X:
1731 emit_a32_rsh_r64(dst, src, ctx);
1732 break;
1733 /* dst = dst >> src (signed) */
1734 case BPF_ALU64 | BPF_ARSH | BPF_X:
1735 emit_a32_arsh_r64(dst, src, ctx);
1736 break;
1737 /* dst = dst >> imm (signed) */
1738 case BPF_ALU64 | BPF_ARSH | BPF_K:
1739 if (unlikely(imm > 63))
1740 return -EINVAL;
1741 emit_a32_arsh_i64(dst, val: imm, ctx);
1742 break;
1743 /* dst = ~dst */
1744 case BPF_ALU | BPF_NEG:
1745 emit_a32_alu_i(dst_lo, val: 0, ctx, BPF_OP(code));
1746 if (!ctx->prog->aux->verifier_zext)
1747 emit_a32_mov_i(dst_hi, val: 0, ctx);
1748 break;
1749 /* dst = ~dst (64 bit) */
1750 case BPF_ALU64 | BPF_NEG:
1751 emit_a32_neg64(dst, ctx);
1752 break;
1753 /* dst = dst * src/imm */
1754 case BPF_ALU64 | BPF_MUL | BPF_X:
1755 case BPF_ALU64 | BPF_MUL | BPF_K:
1756 switch (BPF_SRC(code)) {
1757 case BPF_X:
1758 emit_a32_mul_r64(dst, src, ctx);
1759 break;
1760 case BPF_K:
1761 /* Move immediate value to the temporary register
1762 * and then do the multiplication on it as this
1763 * will sign-extend the immediate value into temp
1764 * reg then it would be safe to do the operation
1765 * on it.
1766 */
1767 emit_a32_mov_se_i64(is64, dst: tmp2, val: imm, ctx);
1768 emit_a32_mul_r64(dst, src: tmp2, ctx);
1769 break;
1770 }
1771 break;
1772 /* dst = htole(dst) */
1773 /* dst = htobe(dst) */
1774 case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
1775 case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
1776 /* dst = bswap(dst) */
1777 case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
1778 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1779 if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
1780 goto emit_bswap_uxt;
1781 switch (imm) {
1782 case 16:
1783 emit_rev16(rd: rd[1], rn: rd[1], ctx);
1784 goto emit_bswap_uxt;
1785 case 32:
1786 emit_rev32(rd: rd[1], rn: rd[1], ctx);
1787 goto emit_bswap_uxt;
1788 case 64:
1789 emit_rev32(ARM_LR, rn: rd[1], ctx);
1790 emit_rev32(rd: rd[1], rn: rd[0], ctx);
1791 emit(ARM_MOV_R(rd[0], ARM_LR), ctx);
1792 break;
1793 }
1794 goto exit;
1795emit_bswap_uxt:
1796 switch (imm) {
1797 case 16:
1798 /* zero-extend 16 bits into 64 bits */
1799#if __LINUX_ARM_ARCH__ < 6
1800 emit_a32_mov_i(dst: tmp2[1], val: 0xffff, ctx);
1801 emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx);
1802#else /* ARMv6+ */
1803 emit(ARM_UXTH(rd[1], rd[1]), ctx);
1804#endif
1805 if (!ctx->prog->aux->verifier_zext)
1806 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1807 break;
1808 case 32:
1809 /* zero-extend 32 bits into 64 bits */
1810 if (!ctx->prog->aux->verifier_zext)
1811 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1812 break;
1813 case 64:
1814 /* nop */
1815 break;
1816 }
1817exit:
1818 arm_bpf_put_reg64(reg: dst, src: rd, ctx);
1819 break;
1820 /* dst = imm64 */
1821 case BPF_LD | BPF_IMM | BPF_DW:
1822 {
1823 u64 val = (u32)imm | (u64)insn[1].imm << 32;
1824
1825 emit_a32_mov_i64(dst, val, ctx);
1826
1827 return 1;
1828 }
1829 /* LDX: dst = *(size *)(src + off) */
1830 case BPF_LDX | BPF_MEM | BPF_W:
1831 case BPF_LDX | BPF_MEM | BPF_H:
1832 case BPF_LDX | BPF_MEM | BPF_B:
1833 case BPF_LDX | BPF_MEM | BPF_DW:
1834 /* LDSX: dst = *(signed size *)(src + off) */
1835 case BPF_LDX | BPF_MEMSX | BPF_B:
1836 case BPF_LDX | BPF_MEMSX | BPF_H:
1837 case BPF_LDX | BPF_MEMSX | BPF_W:
1838 rn = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
1839 if (BPF_MODE(insn->code) == BPF_MEMSX)
1840 emit_ldsx_r(dst, src: rn, off, ctx, BPF_SIZE(code));
1841 else
1842 emit_ldx_r(dst, src: rn, off, ctx, BPF_SIZE(code));
1843 break;
1844 /* speculation barrier */
1845 case BPF_ST | BPF_NOSPEC:
1846 break;
1847 /* ST: *(size *)(dst + off) = imm */
1848 case BPF_ST | BPF_MEM | BPF_W:
1849 case BPF_ST | BPF_MEM | BPF_H:
1850 case BPF_ST | BPF_MEM | BPF_B:
1851 case BPF_ST | BPF_MEM | BPF_DW:
1852 switch (BPF_SIZE(code)) {
1853 case BPF_DW:
1854 /* Sign-extend immediate value into temp reg */
1855 emit_a32_mov_se_i64(is64: true, dst: tmp2, val: imm, ctx);
1856 break;
1857 case BPF_W:
1858 case BPF_H:
1859 case BPF_B:
1860 emit_a32_mov_i(dst: tmp2[1], val: imm, ctx);
1861 break;
1862 }
1863 emit_str_r(dst_lo, src: tmp2, off, ctx, BPF_SIZE(code));
1864 break;
1865 /* Atomic ops */
1866 case BPF_STX | BPF_ATOMIC | BPF_W:
1867 case BPF_STX | BPF_ATOMIC | BPF_DW:
1868 goto notyet;
1869 /* STX: *(size *)(dst + off) = src */
1870 case BPF_STX | BPF_MEM | BPF_W:
1871 case BPF_STX | BPF_MEM | BPF_H:
1872 case BPF_STX | BPF_MEM | BPF_B:
1873 case BPF_STX | BPF_MEM | BPF_DW:
1874 rs = arm_bpf_get_reg64(reg: src, tmp: tmp2, ctx);
1875 emit_str_r(dst_lo, src: rs, off, ctx, BPF_SIZE(code));
1876 break;
1877 /* PC += off if dst == src */
1878 /* PC += off if dst > src */
1879 /* PC += off if dst >= src */
1880 /* PC += off if dst < src */
1881 /* PC += off if dst <= src */
1882 /* PC += off if dst != src */
1883 /* PC += off if dst > src (signed) */
1884 /* PC += off if dst >= src (signed) */
1885 /* PC += off if dst < src (signed) */
1886 /* PC += off if dst <= src (signed) */
1887 /* PC += off if dst & src */
1888 case BPF_JMP | BPF_JEQ | BPF_X:
1889 case BPF_JMP | BPF_JGT | BPF_X:
1890 case BPF_JMP | BPF_JGE | BPF_X:
1891 case BPF_JMP | BPF_JNE | BPF_X:
1892 case BPF_JMP | BPF_JSGT | BPF_X:
1893 case BPF_JMP | BPF_JSGE | BPF_X:
1894 case BPF_JMP | BPF_JSET | BPF_X:
1895 case BPF_JMP | BPF_JLE | BPF_X:
1896 case BPF_JMP | BPF_JLT | BPF_X:
1897 case BPF_JMP | BPF_JSLT | BPF_X:
1898 case BPF_JMP | BPF_JSLE | BPF_X:
1899 case BPF_JMP32 | BPF_JEQ | BPF_X:
1900 case BPF_JMP32 | BPF_JGT | BPF_X:
1901 case BPF_JMP32 | BPF_JGE | BPF_X:
1902 case BPF_JMP32 | BPF_JNE | BPF_X:
1903 case BPF_JMP32 | BPF_JSGT | BPF_X:
1904 case BPF_JMP32 | BPF_JSGE | BPF_X:
1905 case BPF_JMP32 | BPF_JSET | BPF_X:
1906 case BPF_JMP32 | BPF_JLE | BPF_X:
1907 case BPF_JMP32 | BPF_JLT | BPF_X:
1908 case BPF_JMP32 | BPF_JSLT | BPF_X:
1909 case BPF_JMP32 | BPF_JSLE | BPF_X:
1910 /* Setup source registers */
1911 rm = arm_bpf_get_reg32(src_hi, tmp: tmp2[0], ctx);
1912 rn = arm_bpf_get_reg32(src_lo, tmp: tmp2[1], ctx);
1913 goto go_jmp;
1914 /* PC += off if dst == imm */
1915 /* PC += off if dst > imm */
1916 /* PC += off if dst >= imm */
1917 /* PC += off if dst < imm */
1918 /* PC += off if dst <= imm */
1919 /* PC += off if dst != imm */
1920 /* PC += off if dst > imm (signed) */
1921 /* PC += off if dst >= imm (signed) */
1922 /* PC += off if dst < imm (signed) */
1923 /* PC += off if dst <= imm (signed) */
1924 /* PC += off if dst & imm */
1925 case BPF_JMP | BPF_JEQ | BPF_K:
1926 case BPF_JMP | BPF_JGT | BPF_K:
1927 case BPF_JMP | BPF_JGE | BPF_K:
1928 case BPF_JMP | BPF_JNE | BPF_K:
1929 case BPF_JMP | BPF_JSGT | BPF_K:
1930 case BPF_JMP | BPF_JSGE | BPF_K:
1931 case BPF_JMP | BPF_JSET | BPF_K:
1932 case BPF_JMP | BPF_JLT | BPF_K:
1933 case BPF_JMP | BPF_JLE | BPF_K:
1934 case BPF_JMP | BPF_JSLT | BPF_K:
1935 case BPF_JMP | BPF_JSLE | BPF_K:
1936 case BPF_JMP32 | BPF_JEQ | BPF_K:
1937 case BPF_JMP32 | BPF_JGT | BPF_K:
1938 case BPF_JMP32 | BPF_JGE | BPF_K:
1939 case BPF_JMP32 | BPF_JNE | BPF_K:
1940 case BPF_JMP32 | BPF_JSGT | BPF_K:
1941 case BPF_JMP32 | BPF_JSGE | BPF_K:
1942 case BPF_JMP32 | BPF_JSET | BPF_K:
1943 case BPF_JMP32 | BPF_JLT | BPF_K:
1944 case BPF_JMP32 | BPF_JLE | BPF_K:
1945 case BPF_JMP32 | BPF_JSLT | BPF_K:
1946 case BPF_JMP32 | BPF_JSLE | BPF_K:
1947 if (off == 0)
1948 break;
1949 rm = tmp2[0];
1950 rn = tmp2[1];
1951 /* Sign-extend immediate value */
1952 emit_a32_mov_se_i64(is64: true, dst: tmp2, val: imm, ctx);
1953go_jmp:
1954 /* Setup destination register */
1955 rd = arm_bpf_get_reg64(reg: dst, tmp, ctx);
1956
1957 /* Check for the condition */
1958 emit_ar_r(rd: rd[0], rt: rd[1], rm, rn, ctx, BPF_OP(code),
1959 BPF_CLASS(code) == BPF_JMP);
1960
1961 /* Setup JUMP instruction */
1962 jmp_offset = bpf2a32_offset(bpf_to: i+off, bpf_from: i, ctx);
1963 switch (BPF_OP(code)) {
1964 case BPF_JNE:
1965 case BPF_JSET:
1966 _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
1967 break;
1968 case BPF_JEQ:
1969 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
1970 break;
1971 case BPF_JGT:
1972 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
1973 break;
1974 case BPF_JGE:
1975 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
1976 break;
1977 case BPF_JSGT:
1978 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
1979 break;
1980 case BPF_JSGE:
1981 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
1982 break;
1983 case BPF_JLE:
1984 _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
1985 break;
1986 case BPF_JLT:
1987 _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
1988 break;
1989 case BPF_JSLT:
1990 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
1991 break;
1992 case BPF_JSLE:
1993 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
1994 break;
1995 }
1996 break;
1997 /* JMP OFF */
1998 case BPF_JMP | BPF_JA:
1999 case BPF_JMP32 | BPF_JA:
2000 {
2001 if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
2002 jmp_offset = bpf2a32_offset(bpf_to: i + imm, bpf_from: i, ctx);
2003 else if (BPF_CLASS(code) == BPF_JMP && off != 0)
2004 jmp_offset = bpf2a32_offset(bpf_to: i + off, bpf_from: i, ctx);
2005 else
2006 break;
2007
2008 check_imm24(jmp_offset);
2009 emit(ARM_B(jmp_offset), ctx);
2010 break;
2011 }
2012 /* tail call */
2013 case BPF_JMP | BPF_TAIL_CALL:
2014 if (emit_bpf_tail_call(ctx))
2015 return -EFAULT;
2016 break;
2017 /* function call */
2018 case BPF_JMP | BPF_CALL:
2019 {
2020 const s8 *r0 = bpf2a32[BPF_REG_0];
2021 const s8 *r1 = bpf2a32[BPF_REG_1];
2022 const s8 *r2 = bpf2a32[BPF_REG_2];
2023 const s8 *r3 = bpf2a32[BPF_REG_3];
2024 const s8 *r4 = bpf2a32[BPF_REG_4];
2025 const s8 *r5 = bpf2a32[BPF_REG_5];
2026 const u32 func = (u32)__bpf_call_base + (u32)imm;
2027
2028 emit_a32_mov_r64(is64: true, dst: r0, src: r1, ctx);
2029 emit_a32_mov_r64(is64: true, dst: r1, src: r2, ctx);
2030 emit_push_r64(src: r5, ctx);
2031 emit_push_r64(src: r4, ctx);
2032 emit_push_r64(src: r3, ctx);
2033
2034 emit_a32_mov_i(dst: tmp[1], val: func, ctx);
2035 emit_blx_r(tgt_reg: tmp[1], ctx);
2036
2037 emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
2038 break;
2039 }
2040 /* function return */
2041 case BPF_JMP | BPF_EXIT:
2042 /* Optimization: when last instruction is EXIT
2043 * simply fallthrough to epilogue.
2044 */
2045 if (i == ctx->prog->len - 1)
2046 break;
2047 jmp_offset = epilogue_offset(ctx);
2048 check_imm24(jmp_offset);
2049 emit(ARM_B(jmp_offset), ctx);
2050 break;
2051notyet:
2052 pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2053 return -EFAULT;
2054 default:
2055 pr_err_once("unknown opcode %02x\n", code);
2056 return -EINVAL;
2057 }
2058
2059 if (ctx->flags & FLAG_IMM_OVERFLOW)
2060 /*
2061 * this instruction generated an overflow when
2062 * trying to access the literal pool, so
2063 * delegate this filter to the kernel interpreter.
2064 */
2065 return -1;
2066 return 0;
2067}
2068
2069static int build_body(struct jit_ctx *ctx)
2070{
2071 const struct bpf_prog *prog = ctx->prog;
2072 unsigned int i;
2073
2074 for (i = 0; i < prog->len; i++) {
2075 const struct bpf_insn *insn = &(prog->insnsi[i]);
2076 int ret;
2077
2078 ret = build_insn(insn, ctx);
2079
2080 /* It's used with loading the 64 bit immediate value. */
2081 if (ret > 0) {
2082 i++;
2083 if (ctx->target == NULL)
2084 ctx->offsets[i] = ctx->idx;
2085 continue;
2086 }
2087
2088 if (ctx->target == NULL)
2089 ctx->offsets[i] = ctx->idx;
2090
2091 /* If unsuccesful, return with error code */
2092 if (ret)
2093 return ret;
2094 }
2095 return 0;
2096}
2097
2098static int validate_code(struct jit_ctx *ctx)
2099{
2100 int i;
2101
2102 for (i = 0; i < ctx->idx; i++) {
2103 if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
2104 return -1;
2105 }
2106
2107 return 0;
2108}
2109
2110bool bpf_jit_needs_zext(void)
2111{
2112 return true;
2113}
2114
2115struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2116{
2117 struct bpf_prog *tmp, *orig_prog = prog;
2118 struct bpf_binary_header *header;
2119 bool tmp_blinded = false;
2120 struct jit_ctx ctx;
2121 unsigned int tmp_idx;
2122 unsigned int image_size;
2123 u8 *image_ptr;
2124
2125 /* If BPF JIT was not enabled then we must fall back to
2126 * the interpreter.
2127 */
2128 if (!prog->jit_requested)
2129 return orig_prog;
2130
2131 /* If constant blinding was enabled and we failed during blinding
2132 * then we must fall back to the interpreter. Otherwise, we save
2133 * the new JITed code.
2134 */
2135 tmp = bpf_jit_blind_constants(fp: prog);
2136
2137 if (IS_ERR(ptr: tmp))
2138 return orig_prog;
2139 if (tmp != prog) {
2140 tmp_blinded = true;
2141 prog = tmp;
2142 }
2143
2144 memset(&ctx, 0, sizeof(ctx));
2145 ctx.prog = prog;
2146 ctx.cpu_architecture = cpu_architecture();
2147
2148 /* Not able to allocate memory for offsets[] , then
2149 * we must fall back to the interpreter
2150 */
2151 ctx.offsets = kcalloc(n: prog->len, size: sizeof(int), GFP_KERNEL);
2152 if (ctx.offsets == NULL) {
2153 prog = orig_prog;
2154 goto out;
2155 }
2156
2157 /* 1) fake pass to find in the length of the JITed code,
2158 * to compute ctx->offsets and other context variables
2159 * needed to compute final JITed code.
2160 * Also, calculate random starting pointer/start of JITed code
2161 * which is prefixed by random number of fault instructions.
2162 *
2163 * If the first pass fails then there is no chance of it
2164 * being successful in the second pass, so just fall back
2165 * to the interpreter.
2166 */
2167 if (build_body(ctx: &ctx)) {
2168 prog = orig_prog;
2169 goto out_off;
2170 }
2171
2172 tmp_idx = ctx.idx;
2173 build_prologue(ctx: &ctx);
2174 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
2175
2176 ctx.epilogue_offset = ctx.idx;
2177
2178#if __LINUX_ARM_ARCH__ < 7
2179 tmp_idx = ctx.idx;
2180 build_epilogue(ctx: &ctx);
2181 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4;
2182
2183 ctx.idx += ctx.imm_count;
2184 if (ctx.imm_count) {
2185 ctx.imms = kcalloc(n: ctx.imm_count, size: sizeof(u32), GFP_KERNEL);
2186 if (ctx.imms == NULL) {
2187 prog = orig_prog;
2188 goto out_off;
2189 }
2190 }
2191#else
2192 /* there's nothing about the epilogue on ARMv7 */
2193 build_epilogue(&ctx);
2194#endif
2195 /* Now we can get the actual image size of the JITed arm code.
2196 * Currently, we are not considering the THUMB-2 instructions
2197 * for jit, although it can decrease the size of the image.
2198 *
2199 * As each arm instruction is of length 32bit, we are translating
2200 * number of JITed instructions into the size required to store these
2201 * JITed code.
2202 */
2203 image_size = sizeof(u32) * ctx.idx;
2204
2205 /* Now we know the size of the structure to make */
2206 header = bpf_jit_binary_alloc(proglen: image_size, image_ptr: &image_ptr,
2207 alignment: sizeof(u32), bpf_fill_ill_insns: jit_fill_hole);
2208 /* Not able to allocate memory for the structure then
2209 * we must fall back to the interpretation
2210 */
2211 if (header == NULL) {
2212 prog = orig_prog;
2213 goto out_imms;
2214 }
2215
2216 /* 2.) Actual pass to generate final JIT code */
2217 ctx.target = (u32 *) image_ptr;
2218 ctx.idx = 0;
2219
2220 build_prologue(ctx: &ctx);
2221
2222 /* If building the body of the JITed code fails somehow,
2223 * we fall back to the interpretation.
2224 */
2225 if (build_body(ctx: &ctx) < 0) {
2226 image_ptr = NULL;
2227 bpf_jit_binary_free(hdr: header);
2228 prog = orig_prog;
2229 goto out_imms;
2230 }
2231 build_epilogue(ctx: &ctx);
2232
2233 /* 3.) Extra pass to validate JITed Code */
2234 if (validate_code(ctx: &ctx)) {
2235 image_ptr = NULL;
2236 bpf_jit_binary_free(hdr: header);
2237 prog = orig_prog;
2238 goto out_imms;
2239 }
2240 flush_icache_range(start: (u32)header, end: (u32)(ctx.target + ctx.idx));
2241
2242 if (bpf_jit_enable > 1)
2243 /* there are 2 passes here */
2244 bpf_jit_dump(flen: prog->len, proglen: image_size, pass: 2, image: ctx.target);
2245
2246 bpf_jit_binary_lock_ro(hdr: header);
2247 prog->bpf_func = (void *)ctx.target;
2248 prog->jited = 1;
2249 prog->jited_len = image_size;
2250
2251out_imms:
2252#if __LINUX_ARM_ARCH__ < 7
2253 if (ctx.imm_count)
2254 kfree(objp: ctx.imms);
2255#endif
2256out_off:
2257 kfree(objp: ctx.offsets);
2258out:
2259 if (tmp_blinded)
2260 bpf_jit_prog_release_other(fp: prog, fp_other: prog == orig_prog ?
2261 tmp : orig_prog);
2262 return prog;
2263}
2264
2265

source code of linux/arch/arm/net/bpf_jit_32.c