1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function |
3 | * |
4 | * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | */ |
6 | |
7 | #include <linux/linkage.h> |
8 | #include <asm/assembler.h> |
9 | |
10 | .syntax unified |
11 | .fpu neon |
12 | |
13 | .text |
14 | |
15 | |
16 | /* Context structure */ |
17 | |
18 | #define state_h0 0 |
19 | #define state_h1 4 |
20 | #define state_h2 8 |
21 | #define state_h3 12 |
22 | #define state_h4 16 |
23 | |
24 | |
25 | /* Constants */ |
26 | |
27 | #define K1 0x5A827999 |
28 | #define K2 0x6ED9EBA1 |
29 | #define K3 0x8F1BBCDC |
30 | #define K4 0xCA62C1D6 |
31 | .align 4 |
32 | .LK_VEC: |
33 | .LK1: .long K1, K1, K1, K1 |
34 | .LK2: .long K2, K2, K2, K2 |
35 | .LK3: .long K3, K3, K3, K3 |
36 | .LK4: .long K4, K4, K4, K4 |
37 | |
38 | |
39 | /* Register macros */ |
40 | |
41 | #define RSTATE r0 |
42 | #define RDATA r1 |
43 | #define RNBLKS r2 |
44 | #define ROLDSTACK r3 |
45 | #define RWK lr |
46 | |
47 | #define _a r4 |
48 | #define _b r5 |
49 | #define _c r6 |
50 | #define _d r7 |
51 | #define _e r8 |
52 | |
53 | #define RT0 r9 |
54 | #define RT1 r10 |
55 | #define RT2 r11 |
56 | #define RT3 r12 |
57 | |
58 | #define W0 q0 |
59 | #define W1 q7 |
60 | #define W2 q2 |
61 | #define W3 q3 |
62 | #define W4 q4 |
63 | #define W5 q6 |
64 | #define W6 q5 |
65 | #define W7 q1 |
66 | |
67 | #define tmp0 q8 |
68 | #define tmp1 q9 |
69 | #define tmp2 q10 |
70 | #define tmp3 q11 |
71 | |
72 | #define qK1 q12 |
73 | #define qK2 q13 |
74 | #define qK3 q14 |
75 | #define qK4 q15 |
76 | |
77 | #ifdef CONFIG_CPU_BIG_ENDIAN |
78 | #define ARM_LE(code...) |
79 | #else |
80 | #define ARM_LE(code...) code |
81 | #endif |
82 | |
83 | /* Round function macros. */ |
84 | |
85 | #define WK_offs(i) (((i) & 15) * 4) |
86 | |
87 | #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
88 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
89 | ldr RT3, [sp, WK_offs(i)]; \ |
90 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
91 | bic RT0, d, b; \ |
92 | add e, e, a, ror #(32 - 5); \ |
93 | and RT1, c, b; \ |
94 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
95 | add RT0, RT0, RT3; \ |
96 | add e, e, RT1; \ |
97 | ror b, #(32 - 30); \ |
98 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
99 | add e, e, RT0; |
100 | |
101 | #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
102 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
103 | ldr RT3, [sp, WK_offs(i)]; \ |
104 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
105 | eor RT0, d, b; \ |
106 | add e, e, a, ror #(32 - 5); \ |
107 | eor RT0, RT0, c; \ |
108 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
109 | add e, e, RT3; \ |
110 | ror b, #(32 - 30); \ |
111 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
112 | add e, e, RT0; \ |
113 | |
114 | #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
115 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
116 | ldr RT3, [sp, WK_offs(i)]; \ |
117 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
118 | eor RT0, b, c; \ |
119 | and RT1, b, c; \ |
120 | add e, e, a, ror #(32 - 5); \ |
121 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
122 | and RT0, RT0, d; \ |
123 | add RT1, RT1, RT3; \ |
124 | add e, e, RT0; \ |
125 | ror b, #(32 - 30); \ |
126 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ |
127 | add e, e, RT1; |
128 | |
129 | #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
130 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
131 | _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
132 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) |
133 | |
134 | #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ |
135 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
136 | _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ |
137 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) |
138 | |
139 | #define R(a,b,c,d,e,f,i) \ |
140 | _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ |
141 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) |
142 | |
143 | #define dummy(...) |
144 | |
145 | |
146 | /* Input expansion macros. */ |
147 | |
148 | /********* Precalc macros for rounds 0-15 *************************************/ |
149 | |
150 | #define W_PRECALC_00_15() \ |
151 | add RWK, sp, #(WK_offs(0)); \ |
152 | \ |
153 | vld1.32 {W0, W7}, [RDATA]!; \ |
154 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
155 | vld1.32 {W6, W5}, [RDATA]!; \ |
156 | vadd.u32 tmp0, W0, curK; \ |
157 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
158 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
159 | vadd.u32 tmp1, W7, curK; \ |
160 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
161 | vadd.u32 tmp2, W6, curK; \ |
162 | vst1.32 {tmp0, tmp1}, [RWK]!; \ |
163 | vadd.u32 tmp3, W5, curK; \ |
164 | vst1.32 {tmp2, tmp3}, [RWK]; \ |
165 | |
166 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
167 | vld1.32 {W0, W7}, [RDATA]!; \ |
168 | |
169 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
170 | add RWK, sp, #(WK_offs(0)); \ |
171 | |
172 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
173 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
174 | |
175 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
176 | vld1.32 {W6, W5}, [RDATA]!; \ |
177 | |
178 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
179 | vadd.u32 tmp0, W0, curK; \ |
180 | |
181 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
182 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
183 | |
184 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
185 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
186 | |
187 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
188 | vadd.u32 tmp1, W7, curK; \ |
189 | |
190 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
191 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
192 | |
193 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
194 | vadd.u32 tmp2, W6, curK; \ |
195 | |
196 | #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
197 | vst1.32 {tmp0, tmp1}, [RWK]!; \ |
198 | |
199 | #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
200 | vadd.u32 tmp3, W5, curK; \ |
201 | |
202 | #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
203 | vst1.32 {tmp2, tmp3}, [RWK]; \ |
204 | |
205 | |
206 | /********* Precalc macros for rounds 16-31 ************************************/ |
207 | |
208 | #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
209 | veor tmp0, tmp0; \ |
210 | vext.8 W, W_m16, W_m12, #8; \ |
211 | |
212 | #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
213 | add RWK, sp, #(WK_offs(i)); \ |
214 | vext.8 tmp0, W_m04, tmp0, #4; \ |
215 | |
216 | #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
217 | veor tmp0, tmp0, W_m16; \ |
218 | veor.32 W, W, W_m08; \ |
219 | |
220 | #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
221 | veor tmp1, tmp1; \ |
222 | veor W, W, tmp0; \ |
223 | |
224 | #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
225 | vshl.u32 tmp0, W, #1; \ |
226 | |
227 | #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
228 | vext.8 tmp1, tmp1, W, #(16-12); \ |
229 | vshr.u32 W, W, #31; \ |
230 | |
231 | #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
232 | vorr tmp0, tmp0, W; \ |
233 | vshr.u32 W, tmp1, #30; \ |
234 | |
235 | #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
236 | vshl.u32 tmp1, tmp1, #2; \ |
237 | |
238 | #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
239 | veor tmp0, tmp0, W; \ |
240 | |
241 | #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
242 | veor W, tmp0, tmp1; \ |
243 | |
244 | #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
245 | vadd.u32 tmp0, W, curK; \ |
246 | |
247 | #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
248 | vst1.32 {tmp0}, [RWK]; |
249 | |
250 | |
251 | /********* Precalc macros for rounds 32-79 ************************************/ |
252 | |
253 | #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
254 | veor W, W_m28; \ |
255 | |
256 | #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
257 | vext.8 tmp0, W_m08, W_m04, #8; \ |
258 | |
259 | #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
260 | veor W, W_m16; \ |
261 | |
262 | #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
263 | veor W, tmp0; \ |
264 | |
265 | #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
266 | add RWK, sp, #(WK_offs(i&~3)); \ |
267 | |
268 | #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
269 | vshl.u32 tmp1, W, #2; \ |
270 | |
271 | #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
272 | vshr.u32 tmp0, W, #30; \ |
273 | |
274 | #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
275 | vorr W, tmp0, tmp1; \ |
276 | |
277 | #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
278 | vadd.u32 tmp0, W, curK; \ |
279 | |
280 | #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
281 | vst1.32 {tmp0}, [RWK]; |
282 | |
283 | |
284 | /* |
285 | * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. |
286 | * |
287 | * unsigned int |
288 | * sha1_transform_neon (void *ctx, const unsigned char *data, |
289 | * unsigned int nblks) |
290 | */ |
291 | .align 3 |
292 | ENTRY(sha1_transform_neon) |
293 | /* input: |
294 | * r0: ctx, CTX |
295 | * r1: data (64*nblks bytes) |
296 | * r2: nblks |
297 | */ |
298 | |
299 | cmp RNBLKS, #0; |
300 | beq .Ldo_nothing; |
301 | |
302 | push {r4-r12, lr}; |
303 | /*vpush {q4-q7};*/ |
304 | |
305 | adr RT3, .LK_VEC; |
306 | |
307 | mov ROLDSTACK, sp; |
308 | |
309 | /* Align stack. */ |
310 | sub RT0, sp, #(16*4); |
311 | and RT0, #(~(16-1)); |
312 | mov sp, RT0; |
313 | |
314 | vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ |
315 | |
316 | /* Get the values of the chaining variables. */ |
317 | ldm RSTATE, {_a-_e}; |
318 | |
319 | vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ |
320 | |
321 | #undef curK |
322 | #define curK qK1 |
323 | /* Precalc 0-15. */ |
324 | W_PRECALC_00_15(); |
325 | |
326 | .Loop: |
327 | /* Transform 0-15 + Precalc 16-31. */ |
328 | _R( _a, _b, _c, _d, _e, F1, 0, |
329 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, |
330 | W4, W5, W6, W7, W0, _, _, _ ); |
331 | _R( _e, _a, _b, _c, _d, F1, 1, |
332 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, |
333 | W4, W5, W6, W7, W0, _, _, _ ); |
334 | _R( _d, _e, _a, _b, _c, F1, 2, |
335 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, |
336 | W4, W5, W6, W7, W0, _, _, _ ); |
337 | _R( _c, _d, _e, _a, _b, F1, 3, |
338 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, |
339 | W4, W5, W6, W7, W0, _, _, _ ); |
340 | |
341 | #undef curK |
342 | #define curK qK2 |
343 | _R( _b, _c, _d, _e, _a, F1, 4, |
344 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, |
345 | W3, W4, W5, W6, W7, _, _, _ ); |
346 | _R( _a, _b, _c, _d, _e, F1, 5, |
347 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, |
348 | W3, W4, W5, W6, W7, _, _, _ ); |
349 | _R( _e, _a, _b, _c, _d, F1, 6, |
350 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, |
351 | W3, W4, W5, W6, W7, _, _, _ ); |
352 | _R( _d, _e, _a, _b, _c, F1, 7, |
353 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, |
354 | W3, W4, W5, W6, W7, _, _, _ ); |
355 | |
356 | _R( _c, _d, _e, _a, _b, F1, 8, |
357 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, |
358 | W2, W3, W4, W5, W6, _, _, _ ); |
359 | _R( _b, _c, _d, _e, _a, F1, 9, |
360 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, |
361 | W2, W3, W4, W5, W6, _, _, _ ); |
362 | _R( _a, _b, _c, _d, _e, F1, 10, |
363 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, |
364 | W2, W3, W4, W5, W6, _, _, _ ); |
365 | _R( _e, _a, _b, _c, _d, F1, 11, |
366 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, |
367 | W2, W3, W4, W5, W6, _, _, _ ); |
368 | |
369 | _R( _d, _e, _a, _b, _c, F1, 12, |
370 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, |
371 | W1, W2, W3, W4, W5, _, _, _ ); |
372 | _R( _c, _d, _e, _a, _b, F1, 13, |
373 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, |
374 | W1, W2, W3, W4, W5, _, _, _ ); |
375 | _R( _b, _c, _d, _e, _a, F1, 14, |
376 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, |
377 | W1, W2, W3, W4, W5, _, _, _ ); |
378 | _R( _a, _b, _c, _d, _e, F1, 15, |
379 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, |
380 | W1, W2, W3, W4, W5, _, _, _ ); |
381 | |
382 | /* Transform 16-63 + Precalc 32-79. */ |
383 | _R( _e, _a, _b, _c, _d, F1, 16, |
384 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, |
385 | W0, W1, W2, W3, W4, W5, W6, W7); |
386 | _R( _d, _e, _a, _b, _c, F1, 17, |
387 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, |
388 | W0, W1, W2, W3, W4, W5, W6, W7); |
389 | _R( _c, _d, _e, _a, _b, F1, 18, |
390 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, |
391 | W0, W1, W2, W3, W4, W5, W6, W7); |
392 | _R( _b, _c, _d, _e, _a, F1, 19, |
393 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, |
394 | W0, W1, W2, W3, W4, W5, W6, W7); |
395 | |
396 | _R( _a, _b, _c, _d, _e, F2, 20, |
397 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, |
398 | W7, W0, W1, W2, W3, W4, W5, W6); |
399 | _R( _e, _a, _b, _c, _d, F2, 21, |
400 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, |
401 | W7, W0, W1, W2, W3, W4, W5, W6); |
402 | _R( _d, _e, _a, _b, _c, F2, 22, |
403 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, |
404 | W7, W0, W1, W2, W3, W4, W5, W6); |
405 | _R( _c, _d, _e, _a, _b, F2, 23, |
406 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, |
407 | W7, W0, W1, W2, W3, W4, W5, W6); |
408 | |
409 | #undef curK |
410 | #define curK qK3 |
411 | _R( _b, _c, _d, _e, _a, F2, 24, |
412 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, |
413 | W6, W7, W0, W1, W2, W3, W4, W5); |
414 | _R( _a, _b, _c, _d, _e, F2, 25, |
415 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, |
416 | W6, W7, W0, W1, W2, W3, W4, W5); |
417 | _R( _e, _a, _b, _c, _d, F2, 26, |
418 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, |
419 | W6, W7, W0, W1, W2, W3, W4, W5); |
420 | _R( _d, _e, _a, _b, _c, F2, 27, |
421 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, |
422 | W6, W7, W0, W1, W2, W3, W4, W5); |
423 | |
424 | _R( _c, _d, _e, _a, _b, F2, 28, |
425 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, |
426 | W5, W6, W7, W0, W1, W2, W3, W4); |
427 | _R( _b, _c, _d, _e, _a, F2, 29, |
428 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, |
429 | W5, W6, W7, W0, W1, W2, W3, W4); |
430 | _R( _a, _b, _c, _d, _e, F2, 30, |
431 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, |
432 | W5, W6, W7, W0, W1, W2, W3, W4); |
433 | _R( _e, _a, _b, _c, _d, F2, 31, |
434 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, |
435 | W5, W6, W7, W0, W1, W2, W3, W4); |
436 | |
437 | _R( _d, _e, _a, _b, _c, F2, 32, |
438 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, |
439 | W4, W5, W6, W7, W0, W1, W2, W3); |
440 | _R( _c, _d, _e, _a, _b, F2, 33, |
441 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, |
442 | W4, W5, W6, W7, W0, W1, W2, W3); |
443 | _R( _b, _c, _d, _e, _a, F2, 34, |
444 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, |
445 | W4, W5, W6, W7, W0, W1, W2, W3); |
446 | _R( _a, _b, _c, _d, _e, F2, 35, |
447 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, |
448 | W4, W5, W6, W7, W0, W1, W2, W3); |
449 | |
450 | _R( _e, _a, _b, _c, _d, F2, 36, |
451 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, |
452 | W3, W4, W5, W6, W7, W0, W1, W2); |
453 | _R( _d, _e, _a, _b, _c, F2, 37, |
454 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, |
455 | W3, W4, W5, W6, W7, W0, W1, W2); |
456 | _R( _c, _d, _e, _a, _b, F2, 38, |
457 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, |
458 | W3, W4, W5, W6, W7, W0, W1, W2); |
459 | _R( _b, _c, _d, _e, _a, F2, 39, |
460 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, |
461 | W3, W4, W5, W6, W7, W0, W1, W2); |
462 | |
463 | _R( _a, _b, _c, _d, _e, F3, 40, |
464 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, |
465 | W2, W3, W4, W5, W6, W7, W0, W1); |
466 | _R( _e, _a, _b, _c, _d, F3, 41, |
467 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, |
468 | W2, W3, W4, W5, W6, W7, W0, W1); |
469 | _R( _d, _e, _a, _b, _c, F3, 42, |
470 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, |
471 | W2, W3, W4, W5, W6, W7, W0, W1); |
472 | _R( _c, _d, _e, _a, _b, F3, 43, |
473 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, |
474 | W2, W3, W4, W5, W6, W7, W0, W1); |
475 | |
476 | #undef curK |
477 | #define curK qK4 |
478 | _R( _b, _c, _d, _e, _a, F3, 44, |
479 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, |
480 | W1, W2, W3, W4, W5, W6, W7, W0); |
481 | _R( _a, _b, _c, _d, _e, F3, 45, |
482 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, |
483 | W1, W2, W3, W4, W5, W6, W7, W0); |
484 | _R( _e, _a, _b, _c, _d, F3, 46, |
485 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, |
486 | W1, W2, W3, W4, W5, W6, W7, W0); |
487 | _R( _d, _e, _a, _b, _c, F3, 47, |
488 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, |
489 | W1, W2, W3, W4, W5, W6, W7, W0); |
490 | |
491 | _R( _c, _d, _e, _a, _b, F3, 48, |
492 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, |
493 | W0, W1, W2, W3, W4, W5, W6, W7); |
494 | _R( _b, _c, _d, _e, _a, F3, 49, |
495 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, |
496 | W0, W1, W2, W3, W4, W5, W6, W7); |
497 | _R( _a, _b, _c, _d, _e, F3, 50, |
498 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, |
499 | W0, W1, W2, W3, W4, W5, W6, W7); |
500 | _R( _e, _a, _b, _c, _d, F3, 51, |
501 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, |
502 | W0, W1, W2, W3, W4, W5, W6, W7); |
503 | |
504 | _R( _d, _e, _a, _b, _c, F3, 52, |
505 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, |
506 | W7, W0, W1, W2, W3, W4, W5, W6); |
507 | _R( _c, _d, _e, _a, _b, F3, 53, |
508 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, |
509 | W7, W0, W1, W2, W3, W4, W5, W6); |
510 | _R( _b, _c, _d, _e, _a, F3, 54, |
511 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, |
512 | W7, W0, W1, W2, W3, W4, W5, W6); |
513 | _R( _a, _b, _c, _d, _e, F3, 55, |
514 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, |
515 | W7, W0, W1, W2, W3, W4, W5, W6); |
516 | |
517 | _R( _e, _a, _b, _c, _d, F3, 56, |
518 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, |
519 | W6, W7, W0, W1, W2, W3, W4, W5); |
520 | _R( _d, _e, _a, _b, _c, F3, 57, |
521 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, |
522 | W6, W7, W0, W1, W2, W3, W4, W5); |
523 | _R( _c, _d, _e, _a, _b, F3, 58, |
524 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, |
525 | W6, W7, W0, W1, W2, W3, W4, W5); |
526 | _R( _b, _c, _d, _e, _a, F3, 59, |
527 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, |
528 | W6, W7, W0, W1, W2, W3, W4, W5); |
529 | |
530 | subs RNBLKS, #1; |
531 | |
532 | _R( _a, _b, _c, _d, _e, F4, 60, |
533 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, |
534 | W5, W6, W7, W0, W1, W2, W3, W4); |
535 | _R( _e, _a, _b, _c, _d, F4, 61, |
536 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, |
537 | W5, W6, W7, W0, W1, W2, W3, W4); |
538 | _R( _d, _e, _a, _b, _c, F4, 62, |
539 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, |
540 | W5, W6, W7, W0, W1, W2, W3, W4); |
541 | _R( _c, _d, _e, _a, _b, F4, 63, |
542 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, |
543 | W5, W6, W7, W0, W1, W2, W3, W4); |
544 | |
545 | beq .Lend; |
546 | |
547 | /* Transform 64-79 + Precalc 0-15 of next block. */ |
548 | #undef curK |
549 | #define curK qK1 |
550 | _R( _b, _c, _d, _e, _a, F4, 64, |
551 | WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
552 | _R( _a, _b, _c, _d, _e, F4, 65, |
553 | WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
554 | _R( _e, _a, _b, _c, _d, F4, 66, |
555 | WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
556 | _R( _d, _e, _a, _b, _c, F4, 67, |
557 | WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
558 | |
559 | _R( _c, _d, _e, _a, _b, F4, 68, |
560 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
561 | _R( _b, _c, _d, _e, _a, F4, 69, |
562 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
563 | _R( _a, _b, _c, _d, _e, F4, 70, |
564 | WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
565 | _R( _e, _a, _b, _c, _d, F4, 71, |
566 | WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
567 | |
568 | _R( _d, _e, _a, _b, _c, F4, 72, |
569 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
570 | _R( _c, _d, _e, _a, _b, F4, 73, |
571 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
572 | _R( _b, _c, _d, _e, _a, F4, 74, |
573 | WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
574 | _R( _a, _b, _c, _d, _e, F4, 75, |
575 | WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
576 | |
577 | _R( _e, _a, _b, _c, _d, F4, 76, |
578 | WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
579 | _R( _d, _e, _a, _b, _c, F4, 77, |
580 | WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
581 | _R( _c, _d, _e, _a, _b, F4, 78, |
582 | WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); |
583 | _R( _b, _c, _d, _e, _a, F4, 79, |
584 | WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); |
585 | |
586 | /* Update the chaining variables. */ |
587 | ldm RSTATE, {RT0-RT3}; |
588 | add _a, RT0; |
589 | ldr RT0, [RSTATE, #state_h4]; |
590 | add _b, RT1; |
591 | add _c, RT2; |
592 | add _d, RT3; |
593 | add _e, RT0; |
594 | stm RSTATE, {_a-_e}; |
595 | |
596 | b .Loop; |
597 | |
598 | .Lend: |
599 | /* Transform 64-79 */ |
600 | R( _b, _c, _d, _e, _a, F4, 64 ); |
601 | R( _a, _b, _c, _d, _e, F4, 65 ); |
602 | R( _e, _a, _b, _c, _d, F4, 66 ); |
603 | R( _d, _e, _a, _b, _c, F4, 67 ); |
604 | R( _c, _d, _e, _a, _b, F4, 68 ); |
605 | R( _b, _c, _d, _e, _a, F4, 69 ); |
606 | R( _a, _b, _c, _d, _e, F4, 70 ); |
607 | R( _e, _a, _b, _c, _d, F4, 71 ); |
608 | R( _d, _e, _a, _b, _c, F4, 72 ); |
609 | R( _c, _d, _e, _a, _b, F4, 73 ); |
610 | R( _b, _c, _d, _e, _a, F4, 74 ); |
611 | R( _a, _b, _c, _d, _e, F4, 75 ); |
612 | R( _e, _a, _b, _c, _d, F4, 76 ); |
613 | R( _d, _e, _a, _b, _c, F4, 77 ); |
614 | R( _c, _d, _e, _a, _b, F4, 78 ); |
615 | R( _b, _c, _d, _e, _a, F4, 79 ); |
616 | |
617 | mov sp, ROLDSTACK; |
618 | |
619 | /* Update the chaining variables. */ |
620 | ldm RSTATE, {RT0-RT3}; |
621 | add _a, RT0; |
622 | ldr RT0, [RSTATE, #state_h4]; |
623 | add _b, RT1; |
624 | add _c, RT2; |
625 | add _d, RT3; |
626 | /*vpop {q4-q7};*/ |
627 | add _e, RT0; |
628 | stm RSTATE, {_a-_e}; |
629 | |
630 | pop {r4-r12, pc}; |
631 | |
632 | .Ldo_nothing: |
633 | bx lr |
634 | ENDPROC(sha1_transform_neon) |
635 | |