1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * AES-NI + SSE2 implementation of AEGIS-128
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 */
8
9#include <linux/linkage.h>
10#include <linux/cfi_types.h>
11#include <asm/frame.h>
12
13#define STATE0 %xmm0
14#define STATE1 %xmm1
15#define STATE2 %xmm2
16#define STATE3 %xmm3
17#define STATE4 %xmm4
18#define KEY %xmm5
19#define MSG %xmm5
20#define T0 %xmm6
21#define T1 %xmm7
22
23#define STATEP %rdi
24#define LEN %rsi
25#define SRC %rdx
26#define DST %rcx
27
28.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
29.align 16
30.Laegis128_const_0:
31 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
32 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
33.Laegis128_const_1:
34 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
35 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
36
37.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
38.align 16
39.Laegis128_counter:
40 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
41 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
42
43.text
44
45/*
46 * aegis128_update
47 * input:
48 * STATE[0-4] - input state
49 * output:
50 * STATE[0-4] - output state (shifted positions)
51 * changed:
52 * T0
53 */
54.macro aegis128_update
55 movdqa STATE4, T0
56 aesenc STATE0, STATE4
57 aesenc STATE1, STATE0
58 aesenc STATE2, STATE1
59 aesenc STATE3, STATE2
60 aesenc T0, STATE3
61.endm
62
63/*
64 * __load_partial: internal ABI
65 * input:
66 * LEN - bytes
67 * SRC - src
68 * output:
69 * MSG - message block
70 * changed:
71 * T0
72 * %r8
73 * %r9
74 */
75SYM_FUNC_START_LOCAL(__load_partial)
76 xor %r9d, %r9d
77 pxor MSG, MSG
78
79 mov LEN, %r8
80 and $0x1, %r8
81 jz .Lld_partial_1
82
83 mov LEN, %r8
84 and $0x1E, %r8
85 add SRC, %r8
86 mov (%r8), %r9b
87
88.Lld_partial_1:
89 mov LEN, %r8
90 and $0x2, %r8
91 jz .Lld_partial_2
92
93 mov LEN, %r8
94 and $0x1C, %r8
95 add SRC, %r8
96 shl $0x10, %r9
97 mov (%r8), %r9w
98
99.Lld_partial_2:
100 mov LEN, %r8
101 and $0x4, %r8
102 jz .Lld_partial_4
103
104 mov LEN, %r8
105 and $0x18, %r8
106 add SRC, %r8
107 shl $32, %r9
108 mov (%r8), %r8d
109 xor %r8, %r9
110
111.Lld_partial_4:
112 movq %r9, MSG
113
114 mov LEN, %r8
115 and $0x8, %r8
116 jz .Lld_partial_8
117
118 mov LEN, %r8
119 and $0x10, %r8
120 add SRC, %r8
121 pslldq $8, MSG
122 movq (%r8), T0
123 pxor T0, MSG
124
125.Lld_partial_8:
126 RET
127SYM_FUNC_END(__load_partial)
128
129/*
130 * __store_partial: internal ABI
131 * input:
132 * LEN - bytes
133 * DST - dst
134 * output:
135 * T0 - message block
136 * changed:
137 * %r8
138 * %r9
139 * %r10
140 */
141SYM_FUNC_START_LOCAL(__store_partial)
142 mov LEN, %r8
143 mov DST, %r9
144
145 movq T0, %r10
146
147 cmp $8, %r8
148 jl .Lst_partial_8
149
150 mov %r10, (%r9)
151 psrldq $8, T0
152 movq T0, %r10
153
154 sub $8, %r8
155 add $8, %r9
156
157.Lst_partial_8:
158 cmp $4, %r8
159 jl .Lst_partial_4
160
161 mov %r10d, (%r9)
162 shr $32, %r10
163
164 sub $4, %r8
165 add $4, %r9
166
167.Lst_partial_4:
168 cmp $2, %r8
169 jl .Lst_partial_2
170
171 mov %r10w, (%r9)
172 shr $0x10, %r10
173
174 sub $2, %r8
175 add $2, %r9
176
177.Lst_partial_2:
178 cmp $1, %r8
179 jl .Lst_partial_1
180
181 mov %r10b, (%r9)
182
183.Lst_partial_1:
184 RET
185SYM_FUNC_END(__store_partial)
186
187/*
188 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
189 */
190SYM_FUNC_START(crypto_aegis128_aesni_init)
191 FRAME_BEGIN
192
193 /* load IV: */
194 movdqu (%rdx), T1
195
196 /* load key: */
197 movdqa (%rsi), KEY
198 pxor KEY, T1
199 movdqa T1, STATE0
200 movdqa KEY, STATE3
201 movdqa KEY, STATE4
202
203 /* load the constants: */
204 movdqa .Laegis128_const_0(%rip), STATE2
205 movdqa .Laegis128_const_1(%rip), STATE1
206 pxor STATE2, STATE3
207 pxor STATE1, STATE4
208
209 /* update 10 times with KEY / KEY xor IV: */
210 aegis128_update; pxor KEY, STATE4
211 aegis128_update; pxor T1, STATE3
212 aegis128_update; pxor KEY, STATE2
213 aegis128_update; pxor T1, STATE1
214 aegis128_update; pxor KEY, STATE0
215 aegis128_update; pxor T1, STATE4
216 aegis128_update; pxor KEY, STATE3
217 aegis128_update; pxor T1, STATE2
218 aegis128_update; pxor KEY, STATE1
219 aegis128_update; pxor T1, STATE0
220
221 /* store the state: */
222 movdqu STATE0, 0x00(STATEP)
223 movdqu STATE1, 0x10(STATEP)
224 movdqu STATE2, 0x20(STATEP)
225 movdqu STATE3, 0x30(STATEP)
226 movdqu STATE4, 0x40(STATEP)
227
228 FRAME_END
229 RET
230SYM_FUNC_END(crypto_aegis128_aesni_init)
231
232/*
233 * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
234 * const void *data);
235 */
236SYM_FUNC_START(crypto_aegis128_aesni_ad)
237 FRAME_BEGIN
238
239 cmp $0x10, LEN
240 jb .Lad_out
241
242 /* load the state: */
243 movdqu 0x00(STATEP), STATE0
244 movdqu 0x10(STATEP), STATE1
245 movdqu 0x20(STATEP), STATE2
246 movdqu 0x30(STATEP), STATE3
247 movdqu 0x40(STATEP), STATE4
248
249 mov SRC, %r8
250 and $0xF, %r8
251 jnz .Lad_u_loop
252
253.align 8
254.Lad_a_loop:
255 movdqa 0x00(SRC), MSG
256 aegis128_update
257 pxor MSG, STATE4
258 sub $0x10, LEN
259 cmp $0x10, LEN
260 jl .Lad_out_1
261
262 movdqa 0x10(SRC), MSG
263 aegis128_update
264 pxor MSG, STATE3
265 sub $0x10, LEN
266 cmp $0x10, LEN
267 jl .Lad_out_2
268
269 movdqa 0x20(SRC), MSG
270 aegis128_update
271 pxor MSG, STATE2
272 sub $0x10, LEN
273 cmp $0x10, LEN
274 jl .Lad_out_3
275
276 movdqa 0x30(SRC), MSG
277 aegis128_update
278 pxor MSG, STATE1
279 sub $0x10, LEN
280 cmp $0x10, LEN
281 jl .Lad_out_4
282
283 movdqa 0x40(SRC), MSG
284 aegis128_update
285 pxor MSG, STATE0
286 sub $0x10, LEN
287 cmp $0x10, LEN
288 jl .Lad_out_0
289
290 add $0x50, SRC
291 jmp .Lad_a_loop
292
293.align 8
294.Lad_u_loop:
295 movdqu 0x00(SRC), MSG
296 aegis128_update
297 pxor MSG, STATE4
298 sub $0x10, LEN
299 cmp $0x10, LEN
300 jl .Lad_out_1
301
302 movdqu 0x10(SRC), MSG
303 aegis128_update
304 pxor MSG, STATE3
305 sub $0x10, LEN
306 cmp $0x10, LEN
307 jl .Lad_out_2
308
309 movdqu 0x20(SRC), MSG
310 aegis128_update
311 pxor MSG, STATE2
312 sub $0x10, LEN
313 cmp $0x10, LEN
314 jl .Lad_out_3
315
316 movdqu 0x30(SRC), MSG
317 aegis128_update
318 pxor MSG, STATE1
319 sub $0x10, LEN
320 cmp $0x10, LEN
321 jl .Lad_out_4
322
323 movdqu 0x40(SRC), MSG
324 aegis128_update
325 pxor MSG, STATE0
326 sub $0x10, LEN
327 cmp $0x10, LEN
328 jl .Lad_out_0
329
330 add $0x50, SRC
331 jmp .Lad_u_loop
332
333 /* store the state: */
334.Lad_out_0:
335 movdqu STATE0, 0x00(STATEP)
336 movdqu STATE1, 0x10(STATEP)
337 movdqu STATE2, 0x20(STATEP)
338 movdqu STATE3, 0x30(STATEP)
339 movdqu STATE4, 0x40(STATEP)
340 FRAME_END
341 RET
342
343.Lad_out_1:
344 movdqu STATE4, 0x00(STATEP)
345 movdqu STATE0, 0x10(STATEP)
346 movdqu STATE1, 0x20(STATEP)
347 movdqu STATE2, 0x30(STATEP)
348 movdqu STATE3, 0x40(STATEP)
349 FRAME_END
350 RET
351
352.Lad_out_2:
353 movdqu STATE3, 0x00(STATEP)
354 movdqu STATE4, 0x10(STATEP)
355 movdqu STATE0, 0x20(STATEP)
356 movdqu STATE1, 0x30(STATEP)
357 movdqu STATE2, 0x40(STATEP)
358 FRAME_END
359 RET
360
361.Lad_out_3:
362 movdqu STATE2, 0x00(STATEP)
363 movdqu STATE3, 0x10(STATEP)
364 movdqu STATE4, 0x20(STATEP)
365 movdqu STATE0, 0x30(STATEP)
366 movdqu STATE1, 0x40(STATEP)
367 FRAME_END
368 RET
369
370.Lad_out_4:
371 movdqu STATE1, 0x00(STATEP)
372 movdqu STATE2, 0x10(STATEP)
373 movdqu STATE3, 0x20(STATEP)
374 movdqu STATE4, 0x30(STATEP)
375 movdqu STATE0, 0x40(STATEP)
376 FRAME_END
377 RET
378
379.Lad_out:
380 FRAME_END
381 RET
382SYM_FUNC_END(crypto_aegis128_aesni_ad)
383
384.macro encrypt_block a s0 s1 s2 s3 s4 i
385 movdq\a (\i * 0x10)(SRC), MSG
386 movdqa MSG, T0
387 pxor \s1, T0
388 pxor \s4, T0
389 movdqa \s2, T1
390 pand \s3, T1
391 pxor T1, T0
392 movdq\a T0, (\i * 0x10)(DST)
393
394 aegis128_update
395 pxor MSG, \s4
396
397 sub $0x10, LEN
398 cmp $0x10, LEN
399 jl .Lenc_out_\i
400.endm
401
402/*
403 * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
404 * const void *src, void *dst);
405 */
406SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
407 FRAME_BEGIN
408
409 cmp $0x10, LEN
410 jb .Lenc_out
411
412 /* load the state: */
413 movdqu 0x00(STATEP), STATE0
414 movdqu 0x10(STATEP), STATE1
415 movdqu 0x20(STATEP), STATE2
416 movdqu 0x30(STATEP), STATE3
417 movdqu 0x40(STATEP), STATE4
418
419 mov SRC, %r8
420 or DST, %r8
421 and $0xF, %r8
422 jnz .Lenc_u_loop
423
424.align 8
425.Lenc_a_loop:
426 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
427 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
428 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
429 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
430 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
431
432 add $0x50, SRC
433 add $0x50, DST
434 jmp .Lenc_a_loop
435
436.align 8
437.Lenc_u_loop:
438 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
439 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
440 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
441 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
442 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
443
444 add $0x50, SRC
445 add $0x50, DST
446 jmp .Lenc_u_loop
447
448 /* store the state: */
449.Lenc_out_0:
450 movdqu STATE4, 0x00(STATEP)
451 movdqu STATE0, 0x10(STATEP)
452 movdqu STATE1, 0x20(STATEP)
453 movdqu STATE2, 0x30(STATEP)
454 movdqu STATE3, 0x40(STATEP)
455 FRAME_END
456 RET
457
458.Lenc_out_1:
459 movdqu STATE3, 0x00(STATEP)
460 movdqu STATE4, 0x10(STATEP)
461 movdqu STATE0, 0x20(STATEP)
462 movdqu STATE1, 0x30(STATEP)
463 movdqu STATE2, 0x40(STATEP)
464 FRAME_END
465 RET
466
467.Lenc_out_2:
468 movdqu STATE2, 0x00(STATEP)
469 movdqu STATE3, 0x10(STATEP)
470 movdqu STATE4, 0x20(STATEP)
471 movdqu STATE0, 0x30(STATEP)
472 movdqu STATE1, 0x40(STATEP)
473 FRAME_END
474 RET
475
476.Lenc_out_3:
477 movdqu STATE1, 0x00(STATEP)
478 movdqu STATE2, 0x10(STATEP)
479 movdqu STATE3, 0x20(STATEP)
480 movdqu STATE4, 0x30(STATEP)
481 movdqu STATE0, 0x40(STATEP)
482 FRAME_END
483 RET
484
485.Lenc_out_4:
486 movdqu STATE0, 0x00(STATEP)
487 movdqu STATE1, 0x10(STATEP)
488 movdqu STATE2, 0x20(STATEP)
489 movdqu STATE3, 0x30(STATEP)
490 movdqu STATE4, 0x40(STATEP)
491 FRAME_END
492 RET
493
494.Lenc_out:
495 FRAME_END
496 RET
497SYM_FUNC_END(crypto_aegis128_aesni_enc)
498
499/*
500 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
501 * const void *src, void *dst);
502 */
503SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
504 FRAME_BEGIN
505
506 /* load the state: */
507 movdqu 0x00(STATEP), STATE0
508 movdqu 0x10(STATEP), STATE1
509 movdqu 0x20(STATEP), STATE2
510 movdqu 0x30(STATEP), STATE3
511 movdqu 0x40(STATEP), STATE4
512
513 /* encrypt message: */
514 call __load_partial
515
516 movdqa MSG, T0
517 pxor STATE1, T0
518 pxor STATE4, T0
519 movdqa STATE2, T1
520 pand STATE3, T1
521 pxor T1, T0
522
523 call __store_partial
524
525 aegis128_update
526 pxor MSG, STATE4
527
528 /* store the state: */
529 movdqu STATE4, 0x00(STATEP)
530 movdqu STATE0, 0x10(STATEP)
531 movdqu STATE1, 0x20(STATEP)
532 movdqu STATE2, 0x30(STATEP)
533 movdqu STATE3, 0x40(STATEP)
534
535 FRAME_END
536 RET
537SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
538
539.macro decrypt_block a s0 s1 s2 s3 s4 i
540 movdq\a (\i * 0x10)(SRC), MSG
541 pxor \s1, MSG
542 pxor \s4, MSG
543 movdqa \s2, T1
544 pand \s3, T1
545 pxor T1, MSG
546 movdq\a MSG, (\i * 0x10)(DST)
547
548 aegis128_update
549 pxor MSG, \s4
550
551 sub $0x10, LEN
552 cmp $0x10, LEN
553 jl .Ldec_out_\i
554.endm
555
556/*
557 * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
558 * const void *src, void *dst);
559 */
560SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
561 FRAME_BEGIN
562
563 cmp $0x10, LEN
564 jb .Ldec_out
565
566 /* load the state: */
567 movdqu 0x00(STATEP), STATE0
568 movdqu 0x10(STATEP), STATE1
569 movdqu 0x20(STATEP), STATE2
570 movdqu 0x30(STATEP), STATE3
571 movdqu 0x40(STATEP), STATE4
572
573 mov SRC, %r8
574 or DST, %r8
575 and $0xF, %r8
576 jnz .Ldec_u_loop
577
578.align 8
579.Ldec_a_loop:
580 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
581 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
582 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
583 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
584 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
585
586 add $0x50, SRC
587 add $0x50, DST
588 jmp .Ldec_a_loop
589
590.align 8
591.Ldec_u_loop:
592 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
593 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
594 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
595 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
596 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
597
598 add $0x50, SRC
599 add $0x50, DST
600 jmp .Ldec_u_loop
601
602 /* store the state: */
603.Ldec_out_0:
604 movdqu STATE4, 0x00(STATEP)
605 movdqu STATE0, 0x10(STATEP)
606 movdqu STATE1, 0x20(STATEP)
607 movdqu STATE2, 0x30(STATEP)
608 movdqu STATE3, 0x40(STATEP)
609 FRAME_END
610 RET
611
612.Ldec_out_1:
613 movdqu STATE3, 0x00(STATEP)
614 movdqu STATE4, 0x10(STATEP)
615 movdqu STATE0, 0x20(STATEP)
616 movdqu STATE1, 0x30(STATEP)
617 movdqu STATE2, 0x40(STATEP)
618 FRAME_END
619 RET
620
621.Ldec_out_2:
622 movdqu STATE2, 0x00(STATEP)
623 movdqu STATE3, 0x10(STATEP)
624 movdqu STATE4, 0x20(STATEP)
625 movdqu STATE0, 0x30(STATEP)
626 movdqu STATE1, 0x40(STATEP)
627 FRAME_END
628 RET
629
630.Ldec_out_3:
631 movdqu STATE1, 0x00(STATEP)
632 movdqu STATE2, 0x10(STATEP)
633 movdqu STATE3, 0x20(STATEP)
634 movdqu STATE4, 0x30(STATEP)
635 movdqu STATE0, 0x40(STATEP)
636 FRAME_END
637 RET
638
639.Ldec_out_4:
640 movdqu STATE0, 0x00(STATEP)
641 movdqu STATE1, 0x10(STATEP)
642 movdqu STATE2, 0x20(STATEP)
643 movdqu STATE3, 0x30(STATEP)
644 movdqu STATE4, 0x40(STATEP)
645 FRAME_END
646 RET
647
648.Ldec_out:
649 FRAME_END
650 RET
651SYM_FUNC_END(crypto_aegis128_aesni_dec)
652
653/*
654 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
655 * const void *src, void *dst);
656 */
657SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
658 FRAME_BEGIN
659
660 /* load the state: */
661 movdqu 0x00(STATEP), STATE0
662 movdqu 0x10(STATEP), STATE1
663 movdqu 0x20(STATEP), STATE2
664 movdqu 0x30(STATEP), STATE3
665 movdqu 0x40(STATEP), STATE4
666
667 /* decrypt message: */
668 call __load_partial
669
670 pxor STATE1, MSG
671 pxor STATE4, MSG
672 movdqa STATE2, T1
673 pand STATE3, T1
674 pxor T1, MSG
675
676 movdqa MSG, T0
677 call __store_partial
678
679 /* mask with byte count: */
680 movq LEN, T0
681 punpcklbw T0, T0
682 punpcklbw T0, T0
683 punpcklbw T0, T0
684 punpcklbw T0, T0
685 movdqa .Laegis128_counter(%rip), T1
686 pcmpgtb T1, T0
687 pand T0, MSG
688
689 aegis128_update
690 pxor MSG, STATE4
691
692 /* store the state: */
693 movdqu STATE4, 0x00(STATEP)
694 movdqu STATE0, 0x10(STATEP)
695 movdqu STATE1, 0x20(STATEP)
696 movdqu STATE2, 0x30(STATEP)
697 movdqu STATE3, 0x40(STATEP)
698
699 FRAME_END
700 RET
701SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
702
703/*
704 * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
705 * u64 assoclen, u64 cryptlen);
706 */
707SYM_FUNC_START(crypto_aegis128_aesni_final)
708 FRAME_BEGIN
709
710 /* load the state: */
711 movdqu 0x00(STATEP), STATE0
712 movdqu 0x10(STATEP), STATE1
713 movdqu 0x20(STATEP), STATE2
714 movdqu 0x30(STATEP), STATE3
715 movdqu 0x40(STATEP), STATE4
716
717 /* prepare length block: */
718 movq %rdx, MSG
719 movq %rcx, T0
720 pslldq $8, T0
721 pxor T0, MSG
722 psllq $3, MSG /* multiply by 8 (to get bit count) */
723
724 pxor STATE3, MSG
725
726 /* update state: */
727 aegis128_update; pxor MSG, STATE4
728 aegis128_update; pxor MSG, STATE3
729 aegis128_update; pxor MSG, STATE2
730 aegis128_update; pxor MSG, STATE1
731 aegis128_update; pxor MSG, STATE0
732 aegis128_update; pxor MSG, STATE4
733 aegis128_update; pxor MSG, STATE3
734
735 /* xor tag: */
736 movdqu (%rsi), MSG
737
738 pxor STATE0, MSG
739 pxor STATE1, MSG
740 pxor STATE2, MSG
741 pxor STATE3, MSG
742 pxor STATE4, MSG
743
744 movdqu MSG, (%rsi)
745
746 FRAME_END
747 RET
748SYM_FUNC_END(crypto_aegis128_aesni_final)
749

source code of linux/arch/x86/crypto/aegis128-aesni-asm.S