1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Scalar AES core transform |
4 | * |
5 | * Copyright (C) 2017 Linaro Ltd. |
6 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
7 | */ |
8 | |
9 | #include <linux/linkage.h> |
10 | #include <asm/assembler.h> |
11 | #include <asm/cache.h> |
12 | |
13 | .text |
14 | .align 5 |
15 | |
16 | rk .req r0 |
17 | rounds .req r1 |
18 | in .req r2 |
19 | out .req r3 |
20 | ttab .req ip |
21 | |
22 | t0 .req lr |
23 | t1 .req r2 |
24 | t2 .req r3 |
25 | |
26 | .macro __select, out, in, idx |
27 | .if __LINUX_ARM_ARCH__ < 7 |
28 | and \out, \in, #0xff << (8 * \idx) |
29 | .else |
30 | ubfx \out, \in, #(8 * \idx), #8 |
31 | .endif |
32 | .endm |
33 | |
34 | .macro __load, out, in, idx, sz, op |
35 | .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 |
36 | ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] |
37 | .else |
38 | ldr\op \out, [ttab, \in, lsl #\sz] |
39 | .endif |
40 | .endm |
41 | |
42 | .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr |
43 | __select \out0, \in0, 0 |
44 | __select t0, \in1, 1 |
45 | __load \out0, \out0, 0, \sz, \op |
46 | __load t0, t0, 1, \sz, \op |
47 | |
48 | .if \enc |
49 | __select \out1, \in1, 0 |
50 | __select t1, \in2, 1 |
51 | .else |
52 | __select \out1, \in3, 0 |
53 | __select t1, \in0, 1 |
54 | .endif |
55 | __load \out1, \out1, 0, \sz, \op |
56 | __select t2, \in2, 2 |
57 | __load t1, t1, 1, \sz, \op |
58 | __load t2, t2, 2, \sz, \op |
59 | |
60 | eor \out0, \out0, t0, ror #24 |
61 | |
62 | __select t0, \in3, 3 |
63 | .if \enc |
64 | __select \t3, \in3, 2 |
65 | __select \t4, \in0, 3 |
66 | .else |
67 | __select \t3, \in1, 2 |
68 | __select \t4, \in2, 3 |
69 | .endif |
70 | __load \t3, \t3, 2, \sz, \op |
71 | __load t0, t0, 3, \sz, \op |
72 | __load \t4, \t4, 3, \sz, \op |
73 | |
74 | .ifnb \oldcpsr |
75 | /* |
76 | * This is the final round and we're done with all data-dependent table |
77 | * lookups, so we can safely re-enable interrupts. |
78 | */ |
79 | restore_irqs \oldcpsr |
80 | .endif |
81 | |
82 | eor \out1, \out1, t1, ror #24 |
83 | eor \out0, \out0, t2, ror #16 |
84 | ldm rk!, {t1, t2} |
85 | eor \out1, \out1, \t3, ror #16 |
86 | eor \out0, \out0, t0, ror #8 |
87 | eor \out1, \out1, \t4, ror #8 |
88 | eor \out0, \out0, t1 |
89 | eor \out1, \out1, t2 |
90 | .endm |
91 | |
92 | .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
93 | __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op |
94 | __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr |
95 | .endm |
96 | |
97 | .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
98 | __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op |
99 | __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr |
100 | .endm |
101 | |
102 | .macro do_crypt, round, ttab, ltab, bsz |
103 | push {r3-r11, lr} |
104 | |
105 | // Load keys first, to reduce latency in case they're not cached yet. |
106 | ldm rk!, {r8-r11} |
107 | |
108 | ldr r4, [in] |
109 | ldr r5, [in, #4] |
110 | ldr r6, [in, #8] |
111 | ldr r7, [in, #12] |
112 | |
113 | #ifdef CONFIG_CPU_BIG_ENDIAN |
114 | rev_l r4, t0 |
115 | rev_l r5, t0 |
116 | rev_l r6, t0 |
117 | rev_l r7, t0 |
118 | #endif |
119 | |
120 | eor r4, r4, r8 |
121 | eor r5, r5, r9 |
122 | eor r6, r6, r10 |
123 | eor r7, r7, r11 |
124 | |
125 | mov_l ttab, \ttab |
126 | /* |
127 | * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into |
128 | * L1 cache, assuming cacheline size >= 32. This is a hardening measure |
129 | * intended to make cache-timing attacks more difficult. They may not |
130 | * be fully prevented, however; see the paper |
131 | * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf |
132 | * ("Cache-timing attacks on AES") for a discussion of the many |
133 | * difficulties involved in writing truly constant-time AES software. |
134 | */ |
135 | save_and_disable_irqs t0 |
136 | .set i, 0 |
137 | .rept 1024 / 128 |
138 | ldr r8, [ttab, #i + 0] |
139 | ldr r9, [ttab, #i + 32] |
140 | ldr r10, [ttab, #i + 64] |
141 | ldr r11, [ttab, #i + 96] |
142 | .set i, i + 128 |
143 | .endr |
144 | push {t0} // oldcpsr |
145 | |
146 | tst rounds, #2 |
147 | bne 1f |
148 | |
149 | 0: \round r8, r9, r10, r11, r4, r5, r6, r7 |
150 | \round r4, r5, r6, r7, r8, r9, r10, r11 |
151 | |
152 | 1: subs rounds, rounds, #4 |
153 | \round r8, r9, r10, r11, r4, r5, r6, r7 |
154 | bls 2f |
155 | \round r4, r5, r6, r7, r8, r9, r10, r11 |
156 | b 0b |
157 | |
158 | 2: .ifb \ltab |
159 | add ttab, ttab, #1 |
160 | .else |
161 | mov_l ttab, \ltab |
162 | // Prefetch inverse S-box for final round; see explanation above |
163 | .set i, 0 |
164 | .rept 256 / 64 |
165 | ldr t0, [ttab, #i + 0] |
166 | ldr t1, [ttab, #i + 32] |
167 | .set i, i + 64 |
168 | .endr |
169 | .endif |
170 | |
171 | pop {rounds} // oldcpsr |
172 | \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds |
173 | |
174 | #ifdef CONFIG_CPU_BIG_ENDIAN |
175 | rev_l r4, t0 |
176 | rev_l r5, t0 |
177 | rev_l r6, t0 |
178 | rev_l r7, t0 |
179 | #endif |
180 | |
181 | ldr out, [sp] |
182 | |
183 | str r4, [out] |
184 | str r5, [out, #4] |
185 | str r6, [out, #8] |
186 | str r7, [out, #12] |
187 | |
188 | pop {r3-r11, pc} |
189 | |
190 | .align 3 |
191 | .ltorg |
192 | .endm |
193 | |
194 | ENTRY(__aes_arm_encrypt) |
195 | do_crypt fround, crypto_ft_tab,, 2 |
196 | ENDPROC(__aes_arm_encrypt) |
197 | |
198 | .align 5 |
199 | ENTRY(__aes_arm_decrypt) |
200 | do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 |
201 | ENDPROC(__aes_arm_decrypt) |
202 | |