1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * AES-NI + SSE2 implementation of AEGIS-128 |
4 | * |
5 | * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> |
6 | * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. |
7 | */ |
8 | |
9 | #include <linux/linkage.h> |
10 | #include <linux/cfi_types.h> |
11 | #include <asm/frame.h> |
12 | |
13 | #define STATE0 %xmm0 |
14 | #define STATE1 %xmm1 |
15 | #define STATE2 %xmm2 |
16 | #define STATE3 %xmm3 |
17 | #define STATE4 %xmm4 |
18 | #define KEY %xmm5 |
19 | #define MSG %xmm5 |
20 | #define T0 %xmm6 |
21 | #define T1 %xmm7 |
22 | |
23 | #define STATEP %rdi |
24 | #define LEN %rsi |
25 | #define SRC %rdx |
26 | #define DST %rcx |
27 | |
28 | .section .rodata.cst16.aegis128_const, "aM" , @progbits, 32 |
29 | .align 16 |
30 | .Laegis128_const_0: |
31 | .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d |
32 | .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 |
33 | .Laegis128_const_1: |
34 | .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 |
35 | .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd |
36 | |
37 | .section .rodata.cst16.aegis128_counter, "aM" , @progbits, 16 |
38 | .align 16 |
39 | .Laegis128_counter: |
40 | .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 |
41 | .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f |
42 | |
43 | .text |
44 | |
45 | /* |
46 | * aegis128_update |
47 | * input: |
48 | * STATE[0-4] - input state |
49 | * output: |
50 | * STATE[0-4] - output state (shifted positions) |
51 | * changed: |
52 | * T0 |
53 | */ |
54 | .macro aegis128_update |
55 | movdqa STATE4, T0 |
56 | aesenc STATE0, STATE4 |
57 | aesenc STATE1, STATE0 |
58 | aesenc STATE2, STATE1 |
59 | aesenc STATE3, STATE2 |
60 | aesenc T0, STATE3 |
61 | .endm |
62 | |
63 | /* |
64 | * __load_partial: internal ABI |
65 | * input: |
66 | * LEN - bytes |
67 | * SRC - src |
68 | * output: |
69 | * MSG - message block |
70 | * changed: |
71 | * T0 |
72 | * %r8 |
73 | * %r9 |
74 | */ |
75 | SYM_FUNC_START_LOCAL(__load_partial) |
76 | xor %r9d, %r9d |
77 | pxor MSG, MSG |
78 | |
79 | mov LEN, %r8 |
80 | and $0x1, %r8 |
81 | jz .Lld_partial_1 |
82 | |
83 | mov LEN, %r8 |
84 | and $0x1E, %r8 |
85 | add SRC, %r8 |
86 | mov (%r8), %r9b |
87 | |
88 | .Lld_partial_1: |
89 | mov LEN, %r8 |
90 | and $0x2, %r8 |
91 | jz .Lld_partial_2 |
92 | |
93 | mov LEN, %r8 |
94 | and $0x1C, %r8 |
95 | add SRC, %r8 |
96 | shl $0x10, %r9 |
97 | mov (%r8), %r9w |
98 | |
99 | .Lld_partial_2: |
100 | mov LEN, %r8 |
101 | and $0x4, %r8 |
102 | jz .Lld_partial_4 |
103 | |
104 | mov LEN, %r8 |
105 | and $0x18, %r8 |
106 | add SRC, %r8 |
107 | shl $32, %r9 |
108 | mov (%r8), %r8d |
109 | xor %r8, %r9 |
110 | |
111 | .Lld_partial_4: |
112 | movq %r9, MSG |
113 | |
114 | mov LEN, %r8 |
115 | and $0x8, %r8 |
116 | jz .Lld_partial_8 |
117 | |
118 | mov LEN, %r8 |
119 | and $0x10, %r8 |
120 | add SRC, %r8 |
121 | pslldq $8, MSG |
122 | movq (%r8), T0 |
123 | pxor T0, MSG |
124 | |
125 | .Lld_partial_8: |
126 | RET |
127 | SYM_FUNC_END(__load_partial) |
128 | |
129 | /* |
130 | * __store_partial: internal ABI |
131 | * input: |
132 | * LEN - bytes |
133 | * DST - dst |
134 | * output: |
135 | * T0 - message block |
136 | * changed: |
137 | * %r8 |
138 | * %r9 |
139 | * %r10 |
140 | */ |
141 | SYM_FUNC_START_LOCAL(__store_partial) |
142 | mov LEN, %r8 |
143 | mov DST, %r9 |
144 | |
145 | movq T0, %r10 |
146 | |
147 | cmp $8, %r8 |
148 | jl .Lst_partial_8 |
149 | |
150 | mov %r10, (%r9) |
151 | psrldq $8, T0 |
152 | movq T0, %r10 |
153 | |
154 | sub $8, %r8 |
155 | add $8, %r9 |
156 | |
157 | .Lst_partial_8: |
158 | cmp $4, %r8 |
159 | jl .Lst_partial_4 |
160 | |
161 | mov %r10d, (%r9) |
162 | shr $32, %r10 |
163 | |
164 | sub $4, %r8 |
165 | add $4, %r9 |
166 | |
167 | .Lst_partial_4: |
168 | cmp $2, %r8 |
169 | jl .Lst_partial_2 |
170 | |
171 | mov %r10w, (%r9) |
172 | shr $0x10, %r10 |
173 | |
174 | sub $2, %r8 |
175 | add $2, %r9 |
176 | |
177 | .Lst_partial_2: |
178 | cmp $1, %r8 |
179 | jl .Lst_partial_1 |
180 | |
181 | mov %r10b, (%r9) |
182 | |
183 | .Lst_partial_1: |
184 | RET |
185 | SYM_FUNC_END(__store_partial) |
186 | |
187 | /* |
188 | * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); |
189 | */ |
190 | SYM_FUNC_START(crypto_aegis128_aesni_init) |
191 | FRAME_BEGIN |
192 | |
193 | /* load IV: */ |
194 | movdqu (%rdx), T1 |
195 | |
196 | /* load key: */ |
197 | movdqa (%rsi), KEY |
198 | pxor KEY, T1 |
199 | movdqa T1, STATE0 |
200 | movdqa KEY, STATE3 |
201 | movdqa KEY, STATE4 |
202 | |
203 | /* load the constants: */ |
204 | movdqa .Laegis128_const_0(%rip), STATE2 |
205 | movdqa .Laegis128_const_1(%rip), STATE1 |
206 | pxor STATE2, STATE3 |
207 | pxor STATE1, STATE4 |
208 | |
209 | /* update 10 times with KEY / KEY xor IV: */ |
210 | aegis128_update; pxor KEY, STATE4 |
211 | aegis128_update; pxor T1, STATE3 |
212 | aegis128_update; pxor KEY, STATE2 |
213 | aegis128_update; pxor T1, STATE1 |
214 | aegis128_update; pxor KEY, STATE0 |
215 | aegis128_update; pxor T1, STATE4 |
216 | aegis128_update; pxor KEY, STATE3 |
217 | aegis128_update; pxor T1, STATE2 |
218 | aegis128_update; pxor KEY, STATE1 |
219 | aegis128_update; pxor T1, STATE0 |
220 | |
221 | /* store the state: */ |
222 | movdqu STATE0, 0x00(STATEP) |
223 | movdqu STATE1, 0x10(STATEP) |
224 | movdqu STATE2, 0x20(STATEP) |
225 | movdqu STATE3, 0x30(STATEP) |
226 | movdqu STATE4, 0x40(STATEP) |
227 | |
228 | FRAME_END |
229 | RET |
230 | SYM_FUNC_END(crypto_aegis128_aesni_init) |
231 | |
232 | /* |
233 | * void crypto_aegis128_aesni_ad(void *state, unsigned int length, |
234 | * const void *data); |
235 | */ |
236 | SYM_FUNC_START(crypto_aegis128_aesni_ad) |
237 | FRAME_BEGIN |
238 | |
239 | cmp $0x10, LEN |
240 | jb .Lad_out |
241 | |
242 | /* load the state: */ |
243 | movdqu 0x00(STATEP), STATE0 |
244 | movdqu 0x10(STATEP), STATE1 |
245 | movdqu 0x20(STATEP), STATE2 |
246 | movdqu 0x30(STATEP), STATE3 |
247 | movdqu 0x40(STATEP), STATE4 |
248 | |
249 | mov SRC, %r8 |
250 | and $0xF, %r8 |
251 | jnz .Lad_u_loop |
252 | |
253 | .align 8 |
254 | .Lad_a_loop: |
255 | movdqa 0x00(SRC), MSG |
256 | aegis128_update |
257 | pxor MSG, STATE4 |
258 | sub $0x10, LEN |
259 | cmp $0x10, LEN |
260 | jl .Lad_out_1 |
261 | |
262 | movdqa 0x10(SRC), MSG |
263 | aegis128_update |
264 | pxor MSG, STATE3 |
265 | sub $0x10, LEN |
266 | cmp $0x10, LEN |
267 | jl .Lad_out_2 |
268 | |
269 | movdqa 0x20(SRC), MSG |
270 | aegis128_update |
271 | pxor MSG, STATE2 |
272 | sub $0x10, LEN |
273 | cmp $0x10, LEN |
274 | jl .Lad_out_3 |
275 | |
276 | movdqa 0x30(SRC), MSG |
277 | aegis128_update |
278 | pxor MSG, STATE1 |
279 | sub $0x10, LEN |
280 | cmp $0x10, LEN |
281 | jl .Lad_out_4 |
282 | |
283 | movdqa 0x40(SRC), MSG |
284 | aegis128_update |
285 | pxor MSG, STATE0 |
286 | sub $0x10, LEN |
287 | cmp $0x10, LEN |
288 | jl .Lad_out_0 |
289 | |
290 | add $0x50, SRC |
291 | jmp .Lad_a_loop |
292 | |
293 | .align 8 |
294 | .Lad_u_loop: |
295 | movdqu 0x00(SRC), MSG |
296 | aegis128_update |
297 | pxor MSG, STATE4 |
298 | sub $0x10, LEN |
299 | cmp $0x10, LEN |
300 | jl .Lad_out_1 |
301 | |
302 | movdqu 0x10(SRC), MSG |
303 | aegis128_update |
304 | pxor MSG, STATE3 |
305 | sub $0x10, LEN |
306 | cmp $0x10, LEN |
307 | jl .Lad_out_2 |
308 | |
309 | movdqu 0x20(SRC), MSG |
310 | aegis128_update |
311 | pxor MSG, STATE2 |
312 | sub $0x10, LEN |
313 | cmp $0x10, LEN |
314 | jl .Lad_out_3 |
315 | |
316 | movdqu 0x30(SRC), MSG |
317 | aegis128_update |
318 | pxor MSG, STATE1 |
319 | sub $0x10, LEN |
320 | cmp $0x10, LEN |
321 | jl .Lad_out_4 |
322 | |
323 | movdqu 0x40(SRC), MSG |
324 | aegis128_update |
325 | pxor MSG, STATE0 |
326 | sub $0x10, LEN |
327 | cmp $0x10, LEN |
328 | jl .Lad_out_0 |
329 | |
330 | add $0x50, SRC |
331 | jmp .Lad_u_loop |
332 | |
333 | /* store the state: */ |
334 | .Lad_out_0: |
335 | movdqu STATE0, 0x00(STATEP) |
336 | movdqu STATE1, 0x10(STATEP) |
337 | movdqu STATE2, 0x20(STATEP) |
338 | movdqu STATE3, 0x30(STATEP) |
339 | movdqu STATE4, 0x40(STATEP) |
340 | FRAME_END |
341 | RET |
342 | |
343 | .Lad_out_1: |
344 | movdqu STATE4, 0x00(STATEP) |
345 | movdqu STATE0, 0x10(STATEP) |
346 | movdqu STATE1, 0x20(STATEP) |
347 | movdqu STATE2, 0x30(STATEP) |
348 | movdqu STATE3, 0x40(STATEP) |
349 | FRAME_END |
350 | RET |
351 | |
352 | .Lad_out_2: |
353 | movdqu STATE3, 0x00(STATEP) |
354 | movdqu STATE4, 0x10(STATEP) |
355 | movdqu STATE0, 0x20(STATEP) |
356 | movdqu STATE1, 0x30(STATEP) |
357 | movdqu STATE2, 0x40(STATEP) |
358 | FRAME_END |
359 | RET |
360 | |
361 | .Lad_out_3: |
362 | movdqu STATE2, 0x00(STATEP) |
363 | movdqu STATE3, 0x10(STATEP) |
364 | movdqu STATE4, 0x20(STATEP) |
365 | movdqu STATE0, 0x30(STATEP) |
366 | movdqu STATE1, 0x40(STATEP) |
367 | FRAME_END |
368 | RET |
369 | |
370 | .Lad_out_4: |
371 | movdqu STATE1, 0x00(STATEP) |
372 | movdqu STATE2, 0x10(STATEP) |
373 | movdqu STATE3, 0x20(STATEP) |
374 | movdqu STATE4, 0x30(STATEP) |
375 | movdqu STATE0, 0x40(STATEP) |
376 | FRAME_END |
377 | RET |
378 | |
379 | .Lad_out: |
380 | FRAME_END |
381 | RET |
382 | SYM_FUNC_END(crypto_aegis128_aesni_ad) |
383 | |
384 | .macro encrypt_block a s0 s1 s2 s3 s4 i |
385 | movdq\a (\i * 0x10)(SRC), MSG |
386 | movdqa MSG, T0 |
387 | pxor \s1, T0 |
388 | pxor \s4, T0 |
389 | movdqa \s2, T1 |
390 | pand \s3, T1 |
391 | pxor T1, T0 |
392 | movdq\a T0, (\i * 0x10)(DST) |
393 | |
394 | aegis128_update |
395 | pxor MSG, \s4 |
396 | |
397 | sub $0x10, LEN |
398 | cmp $0x10, LEN |
399 | jl .Lenc_out_\i |
400 | .endm |
401 | |
402 | /* |
403 | * void crypto_aegis128_aesni_enc(void *state, unsigned int length, |
404 | * const void *src, void *dst); |
405 | */ |
406 | SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) |
407 | FRAME_BEGIN |
408 | |
409 | cmp $0x10, LEN |
410 | jb .Lenc_out |
411 | |
412 | /* load the state: */ |
413 | movdqu 0x00(STATEP), STATE0 |
414 | movdqu 0x10(STATEP), STATE1 |
415 | movdqu 0x20(STATEP), STATE2 |
416 | movdqu 0x30(STATEP), STATE3 |
417 | movdqu 0x40(STATEP), STATE4 |
418 | |
419 | mov SRC, %r8 |
420 | or DST, %r8 |
421 | and $0xF, %r8 |
422 | jnz .Lenc_u_loop |
423 | |
424 | .align 8 |
425 | .Lenc_a_loop: |
426 | encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 |
427 | encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 |
428 | encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 |
429 | encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 |
430 | encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 |
431 | |
432 | add $0x50, SRC |
433 | add $0x50, DST |
434 | jmp .Lenc_a_loop |
435 | |
436 | .align 8 |
437 | .Lenc_u_loop: |
438 | encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 |
439 | encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 |
440 | encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 |
441 | encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 |
442 | encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 |
443 | |
444 | add $0x50, SRC |
445 | add $0x50, DST |
446 | jmp .Lenc_u_loop |
447 | |
448 | /* store the state: */ |
449 | .Lenc_out_0: |
450 | movdqu STATE4, 0x00(STATEP) |
451 | movdqu STATE0, 0x10(STATEP) |
452 | movdqu STATE1, 0x20(STATEP) |
453 | movdqu STATE2, 0x30(STATEP) |
454 | movdqu STATE3, 0x40(STATEP) |
455 | FRAME_END |
456 | RET |
457 | |
458 | .Lenc_out_1: |
459 | movdqu STATE3, 0x00(STATEP) |
460 | movdqu STATE4, 0x10(STATEP) |
461 | movdqu STATE0, 0x20(STATEP) |
462 | movdqu STATE1, 0x30(STATEP) |
463 | movdqu STATE2, 0x40(STATEP) |
464 | FRAME_END |
465 | RET |
466 | |
467 | .Lenc_out_2: |
468 | movdqu STATE2, 0x00(STATEP) |
469 | movdqu STATE3, 0x10(STATEP) |
470 | movdqu STATE4, 0x20(STATEP) |
471 | movdqu STATE0, 0x30(STATEP) |
472 | movdqu STATE1, 0x40(STATEP) |
473 | FRAME_END |
474 | RET |
475 | |
476 | .Lenc_out_3: |
477 | movdqu STATE1, 0x00(STATEP) |
478 | movdqu STATE2, 0x10(STATEP) |
479 | movdqu STATE3, 0x20(STATEP) |
480 | movdqu STATE4, 0x30(STATEP) |
481 | movdqu STATE0, 0x40(STATEP) |
482 | FRAME_END |
483 | RET |
484 | |
485 | .Lenc_out_4: |
486 | movdqu STATE0, 0x00(STATEP) |
487 | movdqu STATE1, 0x10(STATEP) |
488 | movdqu STATE2, 0x20(STATEP) |
489 | movdqu STATE3, 0x30(STATEP) |
490 | movdqu STATE4, 0x40(STATEP) |
491 | FRAME_END |
492 | RET |
493 | |
494 | .Lenc_out: |
495 | FRAME_END |
496 | RET |
497 | SYM_FUNC_END(crypto_aegis128_aesni_enc) |
498 | |
499 | /* |
500 | * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, |
501 | * const void *src, void *dst); |
502 | */ |
503 | SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) |
504 | FRAME_BEGIN |
505 | |
506 | /* load the state: */ |
507 | movdqu 0x00(STATEP), STATE0 |
508 | movdqu 0x10(STATEP), STATE1 |
509 | movdqu 0x20(STATEP), STATE2 |
510 | movdqu 0x30(STATEP), STATE3 |
511 | movdqu 0x40(STATEP), STATE4 |
512 | |
513 | /* encrypt message: */ |
514 | call __load_partial |
515 | |
516 | movdqa MSG, T0 |
517 | pxor STATE1, T0 |
518 | pxor STATE4, T0 |
519 | movdqa STATE2, T1 |
520 | pand STATE3, T1 |
521 | pxor T1, T0 |
522 | |
523 | call __store_partial |
524 | |
525 | aegis128_update |
526 | pxor MSG, STATE4 |
527 | |
528 | /* store the state: */ |
529 | movdqu STATE4, 0x00(STATEP) |
530 | movdqu STATE0, 0x10(STATEP) |
531 | movdqu STATE1, 0x20(STATEP) |
532 | movdqu STATE2, 0x30(STATEP) |
533 | movdqu STATE3, 0x40(STATEP) |
534 | |
535 | FRAME_END |
536 | RET |
537 | SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) |
538 | |
539 | .macro decrypt_block a s0 s1 s2 s3 s4 i |
540 | movdq\a (\i * 0x10)(SRC), MSG |
541 | pxor \s1, MSG |
542 | pxor \s4, MSG |
543 | movdqa \s2, T1 |
544 | pand \s3, T1 |
545 | pxor T1, MSG |
546 | movdq\a MSG, (\i * 0x10)(DST) |
547 | |
548 | aegis128_update |
549 | pxor MSG, \s4 |
550 | |
551 | sub $0x10, LEN |
552 | cmp $0x10, LEN |
553 | jl .Ldec_out_\i |
554 | .endm |
555 | |
556 | /* |
557 | * void crypto_aegis128_aesni_dec(void *state, unsigned int length, |
558 | * const void *src, void *dst); |
559 | */ |
560 | SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) |
561 | FRAME_BEGIN |
562 | |
563 | cmp $0x10, LEN |
564 | jb .Ldec_out |
565 | |
566 | /* load the state: */ |
567 | movdqu 0x00(STATEP), STATE0 |
568 | movdqu 0x10(STATEP), STATE1 |
569 | movdqu 0x20(STATEP), STATE2 |
570 | movdqu 0x30(STATEP), STATE3 |
571 | movdqu 0x40(STATEP), STATE4 |
572 | |
573 | mov SRC, %r8 |
574 | or DST, %r8 |
575 | and $0xF, %r8 |
576 | jnz .Ldec_u_loop |
577 | |
578 | .align 8 |
579 | .Ldec_a_loop: |
580 | decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 |
581 | decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 |
582 | decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 |
583 | decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 |
584 | decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 |
585 | |
586 | add $0x50, SRC |
587 | add $0x50, DST |
588 | jmp .Ldec_a_loop |
589 | |
590 | .align 8 |
591 | .Ldec_u_loop: |
592 | decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 |
593 | decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 |
594 | decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 |
595 | decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 |
596 | decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 |
597 | |
598 | add $0x50, SRC |
599 | add $0x50, DST |
600 | jmp .Ldec_u_loop |
601 | |
602 | /* store the state: */ |
603 | .Ldec_out_0: |
604 | movdqu STATE4, 0x00(STATEP) |
605 | movdqu STATE0, 0x10(STATEP) |
606 | movdqu STATE1, 0x20(STATEP) |
607 | movdqu STATE2, 0x30(STATEP) |
608 | movdqu STATE3, 0x40(STATEP) |
609 | FRAME_END |
610 | RET |
611 | |
612 | .Ldec_out_1: |
613 | movdqu STATE3, 0x00(STATEP) |
614 | movdqu STATE4, 0x10(STATEP) |
615 | movdqu STATE0, 0x20(STATEP) |
616 | movdqu STATE1, 0x30(STATEP) |
617 | movdqu STATE2, 0x40(STATEP) |
618 | FRAME_END |
619 | RET |
620 | |
621 | .Ldec_out_2: |
622 | movdqu STATE2, 0x00(STATEP) |
623 | movdqu STATE3, 0x10(STATEP) |
624 | movdqu STATE4, 0x20(STATEP) |
625 | movdqu STATE0, 0x30(STATEP) |
626 | movdqu STATE1, 0x40(STATEP) |
627 | FRAME_END |
628 | RET |
629 | |
630 | .Ldec_out_3: |
631 | movdqu STATE1, 0x00(STATEP) |
632 | movdqu STATE2, 0x10(STATEP) |
633 | movdqu STATE3, 0x20(STATEP) |
634 | movdqu STATE4, 0x30(STATEP) |
635 | movdqu STATE0, 0x40(STATEP) |
636 | FRAME_END |
637 | RET |
638 | |
639 | .Ldec_out_4: |
640 | movdqu STATE0, 0x00(STATEP) |
641 | movdqu STATE1, 0x10(STATEP) |
642 | movdqu STATE2, 0x20(STATEP) |
643 | movdqu STATE3, 0x30(STATEP) |
644 | movdqu STATE4, 0x40(STATEP) |
645 | FRAME_END |
646 | RET |
647 | |
648 | .Ldec_out: |
649 | FRAME_END |
650 | RET |
651 | SYM_FUNC_END(crypto_aegis128_aesni_dec) |
652 | |
653 | /* |
654 | * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, |
655 | * const void *src, void *dst); |
656 | */ |
657 | SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) |
658 | FRAME_BEGIN |
659 | |
660 | /* load the state: */ |
661 | movdqu 0x00(STATEP), STATE0 |
662 | movdqu 0x10(STATEP), STATE1 |
663 | movdqu 0x20(STATEP), STATE2 |
664 | movdqu 0x30(STATEP), STATE3 |
665 | movdqu 0x40(STATEP), STATE4 |
666 | |
667 | /* decrypt message: */ |
668 | call __load_partial |
669 | |
670 | pxor STATE1, MSG |
671 | pxor STATE4, MSG |
672 | movdqa STATE2, T1 |
673 | pand STATE3, T1 |
674 | pxor T1, MSG |
675 | |
676 | movdqa MSG, T0 |
677 | call __store_partial |
678 | |
679 | /* mask with byte count: */ |
680 | movq LEN, T0 |
681 | punpcklbw T0, T0 |
682 | punpcklbw T0, T0 |
683 | punpcklbw T0, T0 |
684 | punpcklbw T0, T0 |
685 | movdqa .Laegis128_counter(%rip), T1 |
686 | pcmpgtb T1, T0 |
687 | pand T0, MSG |
688 | |
689 | aegis128_update |
690 | pxor MSG, STATE4 |
691 | |
692 | /* store the state: */ |
693 | movdqu STATE4, 0x00(STATEP) |
694 | movdqu STATE0, 0x10(STATEP) |
695 | movdqu STATE1, 0x20(STATEP) |
696 | movdqu STATE2, 0x30(STATEP) |
697 | movdqu STATE3, 0x40(STATEP) |
698 | |
699 | FRAME_END |
700 | RET |
701 | SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) |
702 | |
703 | /* |
704 | * void crypto_aegis128_aesni_final(void *state, void *tag_xor, |
705 | * u64 assoclen, u64 cryptlen); |
706 | */ |
707 | SYM_FUNC_START(crypto_aegis128_aesni_final) |
708 | FRAME_BEGIN |
709 | |
710 | /* load the state: */ |
711 | movdqu 0x00(STATEP), STATE0 |
712 | movdqu 0x10(STATEP), STATE1 |
713 | movdqu 0x20(STATEP), STATE2 |
714 | movdqu 0x30(STATEP), STATE3 |
715 | movdqu 0x40(STATEP), STATE4 |
716 | |
717 | /* prepare length block: */ |
718 | movq %rdx, MSG |
719 | movq %rcx, T0 |
720 | pslldq $8, T0 |
721 | pxor T0, MSG |
722 | psllq $3, MSG /* multiply by 8 (to get bit count) */ |
723 | |
724 | pxor STATE3, MSG |
725 | |
726 | /* update state: */ |
727 | aegis128_update; pxor MSG, STATE4 |
728 | aegis128_update; pxor MSG, STATE3 |
729 | aegis128_update; pxor MSG, STATE2 |
730 | aegis128_update; pxor MSG, STATE1 |
731 | aegis128_update; pxor MSG, STATE0 |
732 | aegis128_update; pxor MSG, STATE4 |
733 | aegis128_update; pxor MSG, STATE3 |
734 | |
735 | /* xor tag: */ |
736 | movdqu (%rsi), MSG |
737 | |
738 | pxor STATE0, MSG |
739 | pxor STATE1, MSG |
740 | pxor STATE2, MSG |
741 | pxor STATE3, MSG |
742 | pxor STATE4, MSG |
743 | |
744 | movdqu MSG, (%rsi) |
745 | |
746 | FRAME_END |
747 | RET |
748 | SYM_FUNC_END(crypto_aegis128_aesni_final) |
749 | |