1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13 .text
14 .align 5
15
16 rk .req r0
17 rounds .req r1
18 in .req r2
19 out .req r3
20 ttab .req ip
21
22 t0 .req lr
23 t1 .req r2
24 t2 .req r3
25
26 .macro __select, out, in, idx
27 .if __LINUX_ARM_ARCH__ < 7
28 and \out, \in, #0xff << (8 * \idx)
29 .else
30 ubfx \out, \in, #(8 * \idx), #8
31 .endif
32 .endm
33
34 .macro __load, out, in, idx, sz, op
35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
37 .else
38 ldr\op \out, [ttab, \in, lsl #\sz]
39 .endif
40 .endm
41
42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43 __select \out0, \in0, 0
44 __select t0, \in1, 1
45 __load \out0, \out0, 0, \sz, \op
46 __load t0, t0, 1, \sz, \op
47
48 .if \enc
49 __select \out1, \in1, 0
50 __select t1, \in2, 1
51 .else
52 __select \out1, \in3, 0
53 __select t1, \in0, 1
54 .endif
55 __load \out1, \out1, 0, \sz, \op
56 __select t2, \in2, 2
57 __load t1, t1, 1, \sz, \op
58 __load t2, t2, 2, \sz, \op
59
60 eor \out0, \out0, t0, ror #24
61
62 __select t0, \in3, 3
63 .if \enc
64 __select \t3, \in3, 2
65 __select \t4, \in0, 3
66 .else
67 __select \t3, \in1, 2
68 __select \t4, \in2, 3
69 .endif
70 __load \t3, \t3, 2, \sz, \op
71 __load t0, t0, 3, \sz, \op
72 __load \t4, \t4, 3, \sz, \op
73
74 .ifnb \oldcpsr
75 /*
76 * This is the final round and we're done with all data-dependent table
77 * lookups, so we can safely re-enable interrupts.
78 */
79 restore_irqs \oldcpsr
80 .endif
81
82 eor \out1, \out1, t1, ror #24
83 eor \out0, \out0, t2, ror #16
84 ldm rk!, {t1, t2}
85 eor \out1, \out1, \t3, ror #16
86 eor \out0, \out0, t0, ror #8
87 eor \out1, \out1, \t4, ror #8
88 eor \out0, \out0, t1
89 eor \out1, \out1, t2
90 .endm
91
92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95 .endm
96
97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100 .endm
101
102 .macro do_crypt, round, ttab, ltab, bsz
103 push {r3-r11, lr}
104
105 // Load keys first, to reduce latency in case they're not cached yet.
106 ldm rk!, {r8-r11}
107
108 ldr r4, [in]
109 ldr r5, [in, #4]
110 ldr r6, [in, #8]
111 ldr r7, [in, #12]
112
113#ifdef CONFIG_CPU_BIG_ENDIAN
114 rev_l r4, t0
115 rev_l r5, t0
116 rev_l r6, t0
117 rev_l r7, t0
118#endif
119
120 eor r4, r4, r8
121 eor r5, r5, r9
122 eor r6, r6, r10
123 eor r7, r7, r11
124
125 mov_l ttab, \ttab
126 /*
127 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128 * L1 cache, assuming cacheline size >= 32. This is a hardening measure
129 * intended to make cache-timing attacks more difficult. They may not
130 * be fully prevented, however; see the paper
131 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132 * ("Cache-timing attacks on AES") for a discussion of the many
133 * difficulties involved in writing truly constant-time AES software.
134 */
135 save_and_disable_irqs t0
136 .set i, 0
137 .rept 1024 / 128
138 ldr r8, [ttab, #i + 0]
139 ldr r9, [ttab, #i + 32]
140 ldr r10, [ttab, #i + 64]
141 ldr r11, [ttab, #i + 96]
142 .set i, i + 128
143 .endr
144 push {t0} // oldcpsr
145
146 tst rounds, #2
147 bne 1f
148
1490: \round r8, r9, r10, r11, r4, r5, r6, r7
150 \round r4, r5, r6, r7, r8, r9, r10, r11
151
1521: subs rounds, rounds, #4
153 \round r8, r9, r10, r11, r4, r5, r6, r7
154 bls 2f
155 \round r4, r5, r6, r7, r8, r9, r10, r11
156 b 0b
157
1582: .ifb \ltab
159 add ttab, ttab, #1
160 .else
161 mov_l ttab, \ltab
162 // Prefetch inverse S-box for final round; see explanation above
163 .set i, 0
164 .rept 256 / 64
165 ldr t0, [ttab, #i + 0]
166 ldr t1, [ttab, #i + 32]
167 .set i, i + 64
168 .endr
169 .endif
170
171 pop {rounds} // oldcpsr
172 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
173
174#ifdef CONFIG_CPU_BIG_ENDIAN
175 rev_l r4, t0
176 rev_l r5, t0
177 rev_l r6, t0
178 rev_l r7, t0
179#endif
180
181 ldr out, [sp]
182
183 str r4, [out]
184 str r5, [out, #4]
185 str r6, [out, #8]
186 str r7, [out, #12]
187
188 pop {r3-r11, pc}
189
190 .align 3
191 .ltorg
192 .endm
193
194ENTRY(__aes_arm_encrypt)
195 do_crypt fround, crypto_ft_tab,, 2
196ENDPROC(__aes_arm_encrypt)
197
198 .align 5
199ENTRY(__aes_arm_decrypt)
200 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
201ENDPROC(__aes_arm_decrypt)
202

source code of linux/arch/arm/crypto/aes-cipher-core.S