1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4 *
5 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11 .text
12 .arch armv8-a
13 .fpu crypto-neon-fp-armv8
14 .align 3
15
16 .macro enc_round, state, key
17 aese.8 \state, \key
18 aesmc.8 \state, \state
19 .endm
20
21 .macro dec_round, state, key
22 aesd.8 \state, \key
23 aesimc.8 \state, \state
24 .endm
25
26 .macro enc_dround, key1, key2
27 enc_round q0, \key1
28 enc_round q0, \key2
29 .endm
30
31 .macro dec_dround, key1, key2
32 dec_round q0, \key1
33 dec_round q0, \key2
34 .endm
35
36 .macro enc_fround, key1, key2, key3
37 enc_round q0, \key1
38 aese.8 q0, \key2
39 veor q0, q0, \key3
40 .endm
41
42 .macro dec_fround, key1, key2, key3
43 dec_round q0, \key1
44 aesd.8 q0, \key2
45 veor q0, q0, \key3
46 .endm
47
48 .macro enc_dround_4x, key1, key2
49 enc_round q0, \key1
50 enc_round q1, \key1
51 enc_round q2, \key1
52 enc_round q3, \key1
53 enc_round q0, \key2
54 enc_round q1, \key2
55 enc_round q2, \key2
56 enc_round q3, \key2
57 .endm
58
59 .macro dec_dround_4x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q3, \key1
64 dec_round q0, \key2
65 dec_round q1, \key2
66 dec_round q2, \key2
67 dec_round q3, \key2
68 .endm
69
70 .macro enc_fround_4x, key1, key2, key3
71 enc_round q0, \key1
72 enc_round q1, \key1
73 enc_round q2, \key1
74 enc_round q3, \key1
75 aese.8 q0, \key2
76 aese.8 q1, \key2
77 aese.8 q2, \key2
78 aese.8 q3, \key2
79 veor q0, q0, \key3
80 veor q1, q1, \key3
81 veor q2, q2, \key3
82 veor q3, q3, \key3
83 .endm
84
85 .macro dec_fround_4x, key1, key2, key3
86 dec_round q0, \key1
87 dec_round q1, \key1
88 dec_round q2, \key1
89 dec_round q3, \key1
90 aesd.8 q0, \key2
91 aesd.8 q1, \key2
92 aesd.8 q2, \key2
93 aesd.8 q3, \key2
94 veor q0, q0, \key3
95 veor q1, q1, \key3
96 veor q2, q2, \key3
97 veor q3, q3, \key3
98 .endm
99
100 .macro do_block, dround, fround
101 cmp r3, #12 @ which key size?
102 vld1.32 {q10-q11}, [ip]!
103 \dround q8, q9
104 vld1.32 {q12-q13}, [ip]!
105 \dround q10, q11
106 vld1.32 {q10-q11}, [ip]!
107 \dround q12, q13
108 vld1.32 {q12-q13}, [ip]!
109 \dround q10, q11
110 blo 0f @ AES-128: 10 rounds
111 vld1.32 {q10-q11}, [ip]!
112 \dround q12, q13
113 beq 1f @ AES-192: 12 rounds
114 vld1.32 {q12-q13}, [ip]
115 \dround q10, q11
1160: \fround q12, q13, q14
117 bx lr
118
1191: \fround q10, q11, q14
120 bx lr
121 .endm
122
123 /*
124 * Internal, non-AAPCS compliant functions that implement the core AES
125 * transforms. These should preserve all registers except q0 - q2 and ip
126 * Arguments:
127 * q0 : first in/output block
128 * q1 : second in/output block (_4x version only)
129 * q2 : third in/output block (_4x version only)
130 * q3 : fourth in/output block (_4x version only)
131 * q8 : first round key
132 * q9 : secound round key
133 * q14 : final round key
134 * r2 : address of round key array
135 * r3 : number of rounds
136 */
137 .align 6
138aes_encrypt:
139 add ip, r2, #32 @ 3rd round key
140.Laes_encrypt_tweak:
141 do_block enc_dround, enc_fround
142ENDPROC(aes_encrypt)
143
144 .align 6
145aes_decrypt:
146 add ip, r2, #32 @ 3rd round key
147 do_block dec_dround, dec_fround
148ENDPROC(aes_decrypt)
149
150 .align 6
151aes_encrypt_4x:
152 add ip, r2, #32 @ 3rd round key
153 do_block enc_dround_4x, enc_fround_4x
154ENDPROC(aes_encrypt_4x)
155
156 .align 6
157aes_decrypt_4x:
158 add ip, r2, #32 @ 3rd round key
159 do_block dec_dround_4x, dec_fround_4x
160ENDPROC(aes_decrypt_4x)
161
162 .macro prepare_key, rk, rounds
163 add ip, \rk, \rounds, lsl #4
164 vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
165 vld1.32 {q14}, [ip] @ load last round key
166 .endm
167
168 /*
169 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
170 * int blocks)
171 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
172 * int blocks)
173 */
174ENTRY(ce_aes_ecb_encrypt)
175 push {r4, lr}
176 ldr r4, [sp, #8]
177 prepare_key r2, r3
178.Lecbencloop4x:
179 subs r4, r4, #4
180 bmi .Lecbenc1x
181 vld1.8 {q0-q1}, [r1]!
182 vld1.8 {q2-q3}, [r1]!
183 bl aes_encrypt_4x
184 vst1.8 {q0-q1}, [r0]!
185 vst1.8 {q2-q3}, [r0]!
186 b .Lecbencloop4x
187.Lecbenc1x:
188 adds r4, r4, #4
189 beq .Lecbencout
190.Lecbencloop:
191 vld1.8 {q0}, [r1]!
192 bl aes_encrypt
193 vst1.8 {q0}, [r0]!
194 subs r4, r4, #1
195 bne .Lecbencloop
196.Lecbencout:
197 pop {r4, pc}
198ENDPROC(ce_aes_ecb_encrypt)
199
200ENTRY(ce_aes_ecb_decrypt)
201 push {r4, lr}
202 ldr r4, [sp, #8]
203 prepare_key r2, r3
204.Lecbdecloop4x:
205 subs r4, r4, #4
206 bmi .Lecbdec1x
207 vld1.8 {q0-q1}, [r1]!
208 vld1.8 {q2-q3}, [r1]!
209 bl aes_decrypt_4x
210 vst1.8 {q0-q1}, [r0]!
211 vst1.8 {q2-q3}, [r0]!
212 b .Lecbdecloop4x
213.Lecbdec1x:
214 adds r4, r4, #4
215 beq .Lecbdecout
216.Lecbdecloop:
217 vld1.8 {q0}, [r1]!
218 bl aes_decrypt
219 vst1.8 {q0}, [r0]!
220 subs r4, r4, #1
221 bne .Lecbdecloop
222.Lecbdecout:
223 pop {r4, pc}
224ENDPROC(ce_aes_ecb_decrypt)
225
226 /*
227 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
228 * int blocks, u8 iv[])
229 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
230 * int blocks, u8 iv[])
231 */
232ENTRY(ce_aes_cbc_encrypt)
233 push {r4-r6, lr}
234 ldrd r4, r5, [sp, #16]
235 vld1.8 {q0}, [r5]
236 prepare_key r2, r3
237.Lcbcencloop:
238 vld1.8 {q1}, [r1]! @ get next pt block
239 veor q0, q0, q1 @ ..and xor with iv
240 bl aes_encrypt
241 vst1.8 {q0}, [r0]!
242 subs r4, r4, #1
243 bne .Lcbcencloop
244 vst1.8 {q0}, [r5]
245 pop {r4-r6, pc}
246ENDPROC(ce_aes_cbc_encrypt)
247
248ENTRY(ce_aes_cbc_decrypt)
249 push {r4-r6, lr}
250 ldrd r4, r5, [sp, #16]
251 vld1.8 {q15}, [r5] @ keep iv in q15
252 prepare_key r2, r3
253.Lcbcdecloop4x:
254 subs r4, r4, #4
255 bmi .Lcbcdec1x
256 vld1.8 {q0-q1}, [r1]!
257 vld1.8 {q2-q3}, [r1]!
258 vmov q4, q0
259 vmov q5, q1
260 vmov q6, q2
261 vmov q7, q3
262 bl aes_decrypt_4x
263 veor q0, q0, q15
264 veor q1, q1, q4
265 veor q2, q2, q5
266 veor q3, q3, q6
267 vmov q15, q7
268 vst1.8 {q0-q1}, [r0]!
269 vst1.8 {q2-q3}, [r0]!
270 b .Lcbcdecloop4x
271.Lcbcdec1x:
272 adds r4, r4, #4
273 beq .Lcbcdecout
274 vmov q6, q14 @ preserve last round key
275.Lcbcdecloop:
276 vld1.8 {q0}, [r1]! @ get next ct block
277 veor q14, q15, q6 @ combine prev ct with last key
278 vmov q15, q0
279 bl aes_decrypt
280 vst1.8 {q0}, [r0]!
281 subs r4, r4, #1
282 bne .Lcbcdecloop
283.Lcbcdecout:
284 vst1.8 {q15}, [r5] @ keep iv in q15
285 pop {r4-r6, pc}
286ENDPROC(ce_aes_cbc_decrypt)
287
288
289 /*
290 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
291 * int rounds, int bytes, u8 const iv[])
292 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
293 * int rounds, int bytes, u8 const iv[])
294 */
295
296ENTRY(ce_aes_cbc_cts_encrypt)
297 push {r4-r6, lr}
298 ldrd r4, r5, [sp, #16]
299
300 movw ip, :lower16:.Lcts_permute_table
301 movt ip, :upper16:.Lcts_permute_table
302 sub r4, r4, #16
303 add lr, ip, #32
304 add ip, ip, r4
305 sub lr, lr, r4
306 vld1.8 {q5}, [ip]
307 vld1.8 {q6}, [lr]
308
309 add ip, r1, r4
310 vld1.8 {q0}, [r1] @ overlapping loads
311 vld1.8 {q3}, [ip]
312
313 vld1.8 {q1}, [r5] @ get iv
314 prepare_key r2, r3
315
316 veor q0, q0, q1 @ xor with iv
317 bl aes_encrypt
318
319 vtbl.8 d4, {d0-d1}, d10
320 vtbl.8 d5, {d0-d1}, d11
321 vtbl.8 d2, {d6-d7}, d12
322 vtbl.8 d3, {d6-d7}, d13
323
324 veor q0, q0, q1
325 bl aes_encrypt
326
327 add r4, r0, r4
328 vst1.8 {q2}, [r4] @ overlapping stores
329 vst1.8 {q0}, [r0]
330
331 pop {r4-r6, pc}
332ENDPROC(ce_aes_cbc_cts_encrypt)
333
334ENTRY(ce_aes_cbc_cts_decrypt)
335 push {r4-r6, lr}
336 ldrd r4, r5, [sp, #16]
337
338 movw ip, :lower16:.Lcts_permute_table
339 movt ip, :upper16:.Lcts_permute_table
340 sub r4, r4, #16
341 add lr, ip, #32
342 add ip, ip, r4
343 sub lr, lr, r4
344 vld1.8 {q5}, [ip]
345 vld1.8 {q6}, [lr]
346
347 add ip, r1, r4
348 vld1.8 {q0}, [r1] @ overlapping loads
349 vld1.8 {q1}, [ip]
350
351 vld1.8 {q3}, [r5] @ get iv
352 prepare_key r2, r3
353
354 bl aes_decrypt
355
356 vtbl.8 d4, {d0-d1}, d10
357 vtbl.8 d5, {d0-d1}, d11
358 vtbx.8 d0, {d2-d3}, d12
359 vtbx.8 d1, {d2-d3}, d13
360
361 veor q1, q1, q2
362 bl aes_decrypt
363 veor q0, q0, q3 @ xor with iv
364
365 add r4, r0, r4
366 vst1.8 {q1}, [r4] @ overlapping stores
367 vst1.8 {q0}, [r0]
368
369 pop {r4-r6, pc}
370ENDPROC(ce_aes_cbc_cts_decrypt)
371
372
373 /*
374 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
375 * int blocks, u8 ctr[])
376 */
377ENTRY(ce_aes_ctr_encrypt)
378 push {r4-r6, lr}
379 ldrd r4, r5, [sp, #16]
380 vld1.8 {q7}, [r5] @ load ctr
381 prepare_key r2, r3
382 vmov r6, s31 @ keep swabbed ctr in r6
383 rev r6, r6
384 cmn r6, r4 @ 32 bit overflow?
385 bcs .Lctrloop
386.Lctrloop4x:
387 subs r4, r4, #4
388 bmi .Lctr1x
389
390 /*
391 * NOTE: the sequence below has been carefully tweaked to avoid
392 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
394 * may produce an incorrect result if they take their input from a
395 * register of which a single 32-bit lane has been updated the last
396 * time it was modified. To work around this, the lanes of registers
397 * q0-q3 below are not manipulated individually, and the different
398 * counter values are prepared by successive manipulations of q7.
399 */
400 add ip, r6, #1
401 vmov q0, q7
402 rev ip, ip
403 add lr, r6, #2
404 vmov s31, ip @ set lane 3 of q1 via q7
405 add ip, r6, #3
406 rev lr, lr
407 vmov q1, q7
408 vmov s31, lr @ set lane 3 of q2 via q7
409 rev ip, ip
410 vmov q2, q7
411 vmov s31, ip @ set lane 3 of q3 via q7
412 add r6, r6, #4
413 vmov q3, q7
414
415 vld1.8 {q4-q5}, [r1]!
416 vld1.8 {q6}, [r1]!
417 vld1.8 {q15}, [r1]!
418 bl aes_encrypt_4x
419 veor q0, q0, q4
420 veor q1, q1, q5
421 veor q2, q2, q6
422 veor q3, q3, q15
423 rev ip, r6
424 vst1.8 {q0-q1}, [r0]!
425 vst1.8 {q2-q3}, [r0]!
426 vmov s31, ip
427 b .Lctrloop4x
428.Lctr1x:
429 adds r4, r4, #4
430 beq .Lctrout
431.Lctrloop:
432 vmov q0, q7
433 bl aes_encrypt
434
435 adds r6, r6, #1 @ increment BE ctr
436 rev ip, r6
437 vmov s31, ip
438 bcs .Lctrcarry
439
440.Lctrcarrydone:
441 subs r4, r4, #1
442 bmi .Lctrtailblock @ blocks < 0 means tail block
443 vld1.8 {q3}, [r1]!
444 veor q3, q0, q3
445 vst1.8 {q3}, [r0]!
446 bne .Lctrloop
447
448.Lctrout:
449 vst1.8 {q7}, [r5] @ return next CTR value
450 pop {r4-r6, pc}
451
452.Lctrtailblock:
453 vst1.8 {q0}, [r0, :64] @ return the key stream
454 b .Lctrout
455
456.Lctrcarry:
457 .irp sreg, s30, s29, s28
458 vmov ip, \sreg @ load next word of ctr
459 rev ip, ip @ ... to handle the carry
460 adds ip, ip, #1
461 rev ip, ip
462 vmov \sreg, ip
463 bcc .Lctrcarrydone
464 .endr
465 b .Lctrcarrydone
466ENDPROC(ce_aes_ctr_encrypt)
467
468 /*
469 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
470 * int bytes, u8 iv[], u32 const rk2[], int first)
471 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
472 * int bytes, u8 iv[], u32 const rk2[], int first)
473 */
474
475 .macro next_tweak, out, in, const, tmp
476 vshr.s64 \tmp, \in, #63
477 vand \tmp, \tmp, \const
478 vadd.u64 \out, \in, \in
479 vext.8 \tmp, \tmp, \tmp, #8
480 veor \out, \out, \tmp
481 .endm
482
483ce_aes_xts_init:
484 vmov.i32 d30, #0x87 @ compose tweak mask vector
485 vmovl.u32 q15, d30
486 vshr.u64 d30, d31, #7
487
488 ldrd r4, r5, [sp, #16] @ load args
489 ldr r6, [sp, #28]
490 vld1.8 {q0}, [r5] @ load iv
491 teq r6, #1 @ start of a block?
492 bxne lr
493
494 @ Encrypt the IV in q0 with the second AES key. This should only
495 @ be done at the start of a block.
496 ldr r6, [sp, #24] @ load AES key 2
497 prepare_key r6, r3
498 add ip, r6, #32 @ 3rd round key of key 2
499 b .Laes_encrypt_tweak @ tail call
500ENDPROC(ce_aes_xts_init)
501
502ENTRY(ce_aes_xts_encrypt)
503 push {r4-r6, lr}
504
505 bl ce_aes_xts_init @ run shared prologue
506 prepare_key r2, r3
507 vmov q4, q0
508
509 teq r6, #0 @ start of a block?
510 bne .Lxtsenc4x
511
512.Lxtsencloop4x:
513 next_tweak q4, q4, q15, q10
514.Lxtsenc4x:
515 subs r4, r4, #64
516 bmi .Lxtsenc1x
517 vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
518 vld1.8 {q2-q3}, [r1]!
519 next_tweak q5, q4, q15, q10
520 veor q0, q0, q4
521 next_tweak q6, q5, q15, q10
522 veor q1, q1, q5
523 next_tweak q7, q6, q15, q10
524 veor q2, q2, q6
525 veor q3, q3, q7
526 bl aes_encrypt_4x
527 veor q0, q0, q4
528 veor q1, q1, q5
529 veor q2, q2, q6
530 veor q3, q3, q7
531 vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
532 vst1.8 {q2-q3}, [r0]!
533 vmov q4, q7
534 teq r4, #0
535 beq .Lxtsencret
536 b .Lxtsencloop4x
537.Lxtsenc1x:
538 adds r4, r4, #64
539 beq .Lxtsencout
540 subs r4, r4, #16
541 bmi .LxtsencctsNx
542.Lxtsencloop:
543 vld1.8 {q0}, [r1]!
544.Lxtsencctsout:
545 veor q0, q0, q4
546 bl aes_encrypt
547 veor q0, q0, q4
548 teq r4, #0
549 beq .Lxtsencout
550 subs r4, r4, #16
551 next_tweak q4, q4, q15, q6
552 bmi .Lxtsenccts
553 vst1.8 {q0}, [r0]!
554 b .Lxtsencloop
555.Lxtsencout:
556 vst1.8 {q0}, [r0]
557.Lxtsencret:
558 vst1.8 {q4}, [r5]
559 pop {r4-r6, pc}
560
561.LxtsencctsNx:
562 vmov q0, q3
563 sub r0, r0, #16
564.Lxtsenccts:
565 movw ip, :lower16:.Lcts_permute_table
566 movt ip, :upper16:.Lcts_permute_table
567
568 add r1, r1, r4 @ rewind input pointer
569 add r4, r4, #16 @ # bytes in final block
570 add lr, ip, #32
571 add ip, ip, r4
572 sub lr, lr, r4
573 add r4, r0, r4 @ output address of final block
574
575 vld1.8 {q1}, [r1] @ load final partial block
576 vld1.8 {q2}, [ip]
577 vld1.8 {q3}, [lr]
578
579 vtbl.8 d4, {d0-d1}, d4
580 vtbl.8 d5, {d0-d1}, d5
581 vtbx.8 d0, {d2-d3}, d6
582 vtbx.8 d1, {d2-d3}, d7
583
584 vst1.8 {q2}, [r4] @ overlapping stores
585 mov r4, #0
586 b .Lxtsencctsout
587ENDPROC(ce_aes_xts_encrypt)
588
589
590ENTRY(ce_aes_xts_decrypt)
591 push {r4-r6, lr}
592
593 bl ce_aes_xts_init @ run shared prologue
594 prepare_key r2, r3
595 vmov q4, q0
596
597 /* subtract 16 bytes if we are doing CTS */
598 tst r4, #0xf
599 subne r4, r4, #0x10
600
601 teq r6, #0 @ start of a block?
602 bne .Lxtsdec4x
603
604.Lxtsdecloop4x:
605 next_tweak q4, q4, q15, q10
606.Lxtsdec4x:
607 subs r4, r4, #64
608 bmi .Lxtsdec1x
609 vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
610 vld1.8 {q2-q3}, [r1]!
611 next_tweak q5, q4, q15, q10
612 veor q0, q0, q4
613 next_tweak q6, q5, q15, q10
614 veor q1, q1, q5
615 next_tweak q7, q6, q15, q10
616 veor q2, q2, q6
617 veor q3, q3, q7
618 bl aes_decrypt_4x
619 veor q0, q0, q4
620 veor q1, q1, q5
621 veor q2, q2, q6
622 veor q3, q3, q7
623 vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
624 vst1.8 {q2-q3}, [r0]!
625 vmov q4, q7
626 teq r4, #0
627 beq .Lxtsdecout
628 b .Lxtsdecloop4x
629.Lxtsdec1x:
630 adds r4, r4, #64
631 beq .Lxtsdecout
632 subs r4, r4, #16
633.Lxtsdecloop:
634 vld1.8 {q0}, [r1]!
635 bmi .Lxtsdeccts
636.Lxtsdecctsout:
637 veor q0, q0, q4
638 bl aes_decrypt
639 veor q0, q0, q4
640 vst1.8 {q0}, [r0]!
641 teq r4, #0
642 beq .Lxtsdecout
643 subs r4, r4, #16
644 next_tweak q4, q4, q15, q6
645 b .Lxtsdecloop
646.Lxtsdecout:
647 vst1.8 {q4}, [r5]
648 pop {r4-r6, pc}
649
650.Lxtsdeccts:
651 movw ip, :lower16:.Lcts_permute_table
652 movt ip, :upper16:.Lcts_permute_table
653
654 add r1, r1, r4 @ rewind input pointer
655 add r4, r4, #16 @ # bytes in final block
656 add lr, ip, #32
657 add ip, ip, r4
658 sub lr, lr, r4
659 add r4, r0, r4 @ output address of final block
660
661 next_tweak q5, q4, q15, q6
662
663 vld1.8 {q1}, [r1] @ load final partial block
664 vld1.8 {q2}, [ip]
665 vld1.8 {q3}, [lr]
666
667 veor q0, q0, q5
668 bl aes_decrypt
669 veor q0, q0, q5
670
671 vtbl.8 d4, {d0-d1}, d4
672 vtbl.8 d5, {d0-d1}, d5
673 vtbx.8 d0, {d2-d3}, d6
674 vtbx.8 d1, {d2-d3}, d7
675
676 vst1.8 {q2}, [r4] @ overlapping stores
677 mov r4, #0
678 b .Lxtsdecctsout
679ENDPROC(ce_aes_xts_decrypt)
680
681 /*
682 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
683 * AES sbox substitution on each byte in
684 * 'input'
685 */
686ENTRY(ce_aes_sub)
687 vdup.32 q1, r0
688 veor q0, q0, q0
689 aese.8 q0, q1
690 vmov r0, s0
691 bx lr
692ENDPROC(ce_aes_sub)
693
694 /*
695 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
696 * operation on round key *src
697 */
698ENTRY(ce_aes_invert)
699 vld1.32 {q0}, [r1]
700 aesimc.8 q0, q0
701 vst1.32 {q0}, [r0]
702 bx lr
703ENDPROC(ce_aes_invert)
704
705 .section ".rodata", "a"
706 .align 6
707.Lcts_permute_table:
708 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
711 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
712 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
714

source code of linux/arch/arm/crypto/aes-ce-core.S