1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Original implementation written by Andy Polyakov, @dot-asm.
4 * This is an adaptation of the original code for kernel use.
5 *
6 * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
7 */
8
9#include <linux/linkage.h>
10#include <asm/nospec-insn.h>
11#include <asm/fpu-insn.h>
12
13#define SP %r15
14#define FRAME (16 * 8 + 4 * 8)
15
16 .data
17 .balign 32
18
19SYM_DATA_START_LOCAL(sigma)
20 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
21 .long 1,0,0,0
22 .long 2,0,0,0
23 .long 3,0,0,0
24 .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
25
26 .long 0,1,2,3
27 .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
28 .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
29 .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
30 .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
31SYM_DATA_END(sigma)
32
33 .previous
34
35 GEN_BR_THUNK %r14
36
37 .text
38
39#############################################################################
40# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
41# counst u32 *key, const u32 *counter)
42
43#define OUT %r2
44#define INP %r3
45#define LEN %r4
46#define KEY %r5
47#define COUNTER %r6
48
49#define BEPERM %v31
50#define CTR %v26
51
52#define K0 %v16
53#define K1 %v17
54#define K2 %v18
55#define K3 %v19
56
57#define XA0 %v0
58#define XA1 %v1
59#define XA2 %v2
60#define XA3 %v3
61
62#define XB0 %v4
63#define XB1 %v5
64#define XB2 %v6
65#define XB3 %v7
66
67#define XC0 %v8
68#define XC1 %v9
69#define XC2 %v10
70#define XC3 %v11
71
72#define XD0 %v12
73#define XD1 %v13
74#define XD2 %v14
75#define XD3 %v15
76
77#define XT0 %v27
78#define XT1 %v28
79#define XT2 %v29
80#define XT3 %v30
81
82SYM_FUNC_START(chacha20_vx_4x)
83 stmg %r6,%r7,6*8(SP)
84
85 larl %r7,sigma
86 lhi %r0,10
87 lhi %r1,0
88
89 VL K0,0,,%r7 # load sigma
90 VL K1,0,,KEY # load key
91 VL K2,16,,KEY
92 VL K3,0,,COUNTER # load counter
93
94 VL BEPERM,0x40,,%r7
95 VL CTR,0x50,,%r7
96
97 VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
98
99 VREPF XB0,K1,0 # smash the key
100 VREPF XB1,K1,1
101 VREPF XB2,K1,2
102 VREPF XB3,K1,3
103
104 VREPF XD0,K3,0
105 VREPF XD1,K3,1
106 VREPF XD2,K3,2
107 VREPF XD3,K3,3
108 VAF XD0,XD0,CTR
109
110 VREPF XC0,K2,0
111 VREPF XC1,K2,1
112 VREPF XC2,K2,2
113 VREPF XC3,K2,3
114
115.Loop_4x:
116 VAF XA0,XA0,XB0
117 VX XD0,XD0,XA0
118 VERLLF XD0,XD0,16
119
120 VAF XA1,XA1,XB1
121 VX XD1,XD1,XA1
122 VERLLF XD1,XD1,16
123
124 VAF XA2,XA2,XB2
125 VX XD2,XD2,XA2
126 VERLLF XD2,XD2,16
127
128 VAF XA3,XA3,XB3
129 VX XD3,XD3,XA3
130 VERLLF XD3,XD3,16
131
132 VAF XC0,XC0,XD0
133 VX XB0,XB0,XC0
134 VERLLF XB0,XB0,12
135
136 VAF XC1,XC1,XD1
137 VX XB1,XB1,XC1
138 VERLLF XB1,XB1,12
139
140 VAF XC2,XC2,XD2
141 VX XB2,XB2,XC2
142 VERLLF XB2,XB2,12
143
144 VAF XC3,XC3,XD3
145 VX XB3,XB3,XC3
146 VERLLF XB3,XB3,12
147
148 VAF XA0,XA0,XB0
149 VX XD0,XD0,XA0
150 VERLLF XD0,XD0,8
151
152 VAF XA1,XA1,XB1
153 VX XD1,XD1,XA1
154 VERLLF XD1,XD1,8
155
156 VAF XA2,XA2,XB2
157 VX XD2,XD2,XA2
158 VERLLF XD2,XD2,8
159
160 VAF XA3,XA3,XB3
161 VX XD3,XD3,XA3
162 VERLLF XD3,XD3,8
163
164 VAF XC0,XC0,XD0
165 VX XB0,XB0,XC0
166 VERLLF XB0,XB0,7
167
168 VAF XC1,XC1,XD1
169 VX XB1,XB1,XC1
170 VERLLF XB1,XB1,7
171
172 VAF XC2,XC2,XD2
173 VX XB2,XB2,XC2
174 VERLLF XB2,XB2,7
175
176 VAF XC3,XC3,XD3
177 VX XB3,XB3,XC3
178 VERLLF XB3,XB3,7
179
180 VAF XA0,XA0,XB1
181 VX XD3,XD3,XA0
182 VERLLF XD3,XD3,16
183
184 VAF XA1,XA1,XB2
185 VX XD0,XD0,XA1
186 VERLLF XD0,XD0,16
187
188 VAF XA2,XA2,XB3
189 VX XD1,XD1,XA2
190 VERLLF XD1,XD1,16
191
192 VAF XA3,XA3,XB0
193 VX XD2,XD2,XA3
194 VERLLF XD2,XD2,16
195
196 VAF XC2,XC2,XD3
197 VX XB1,XB1,XC2
198 VERLLF XB1,XB1,12
199
200 VAF XC3,XC3,XD0
201 VX XB2,XB2,XC3
202 VERLLF XB2,XB2,12
203
204 VAF XC0,XC0,XD1
205 VX XB3,XB3,XC0
206 VERLLF XB3,XB3,12
207
208 VAF XC1,XC1,XD2
209 VX XB0,XB0,XC1
210 VERLLF XB0,XB0,12
211
212 VAF XA0,XA0,XB1
213 VX XD3,XD3,XA0
214 VERLLF XD3,XD3,8
215
216 VAF XA1,XA1,XB2
217 VX XD0,XD0,XA1
218 VERLLF XD0,XD0,8
219
220 VAF XA2,XA2,XB3
221 VX XD1,XD1,XA2
222 VERLLF XD1,XD1,8
223
224 VAF XA3,XA3,XB0
225 VX XD2,XD2,XA3
226 VERLLF XD2,XD2,8
227
228 VAF XC2,XC2,XD3
229 VX XB1,XB1,XC2
230 VERLLF XB1,XB1,7
231
232 VAF XC3,XC3,XD0
233 VX XB2,XB2,XC3
234 VERLLF XB2,XB2,7
235
236 VAF XC0,XC0,XD1
237 VX XB3,XB3,XC0
238 VERLLF XB3,XB3,7
239
240 VAF XC1,XC1,XD2
241 VX XB0,XB0,XC1
242 VERLLF XB0,XB0,7
243 brct %r0,.Loop_4x
244
245 VAF XD0,XD0,CTR
246
247 VMRHF XT0,XA0,XA1 # transpose data
248 VMRHF XT1,XA2,XA3
249 VMRLF XT2,XA0,XA1
250 VMRLF XT3,XA2,XA3
251 VPDI XA0,XT0,XT1,0b0000
252 VPDI XA1,XT0,XT1,0b0101
253 VPDI XA2,XT2,XT3,0b0000
254 VPDI XA3,XT2,XT3,0b0101
255
256 VMRHF XT0,XB0,XB1
257 VMRHF XT1,XB2,XB3
258 VMRLF XT2,XB0,XB1
259 VMRLF XT3,XB2,XB3
260 VPDI XB0,XT0,XT1,0b0000
261 VPDI XB1,XT0,XT1,0b0101
262 VPDI XB2,XT2,XT3,0b0000
263 VPDI XB3,XT2,XT3,0b0101
264
265 VMRHF XT0,XC0,XC1
266 VMRHF XT1,XC2,XC3
267 VMRLF XT2,XC0,XC1
268 VMRLF XT3,XC2,XC3
269 VPDI XC0,XT0,XT1,0b0000
270 VPDI XC1,XT0,XT1,0b0101
271 VPDI XC2,XT2,XT3,0b0000
272 VPDI XC3,XT2,XT3,0b0101
273
274 VMRHF XT0,XD0,XD1
275 VMRHF XT1,XD2,XD3
276 VMRLF XT2,XD0,XD1
277 VMRLF XT3,XD2,XD3
278 VPDI XD0,XT0,XT1,0b0000
279 VPDI XD1,XT0,XT1,0b0101
280 VPDI XD2,XT2,XT3,0b0000
281 VPDI XD3,XT2,XT3,0b0101
282
283 VAF XA0,XA0,K0
284 VAF XB0,XB0,K1
285 VAF XC0,XC0,K2
286 VAF XD0,XD0,K3
287
288 VPERM XA0,XA0,XA0,BEPERM
289 VPERM XB0,XB0,XB0,BEPERM
290 VPERM XC0,XC0,XC0,BEPERM
291 VPERM XD0,XD0,XD0,BEPERM
292
293 VLM XT0,XT3,0,INP,0
294
295 VX XT0,XT0,XA0
296 VX XT1,XT1,XB0
297 VX XT2,XT2,XC0
298 VX XT3,XT3,XD0
299
300 VSTM XT0,XT3,0,OUT,0
301
302 la INP,0x40(INP)
303 la OUT,0x40(OUT)
304 aghi LEN,-0x40
305
306 VAF XA0,XA1,K0
307 VAF XB0,XB1,K1
308 VAF XC0,XC1,K2
309 VAF XD0,XD1,K3
310
311 VPERM XA0,XA0,XA0,BEPERM
312 VPERM XB0,XB0,XB0,BEPERM
313 VPERM XC0,XC0,XC0,BEPERM
314 VPERM XD0,XD0,XD0,BEPERM
315
316 clgfi LEN,0x40
317 jl .Ltail_4x
318
319 VLM XT0,XT3,0,INP,0
320
321 VX XT0,XT0,XA0
322 VX XT1,XT1,XB0
323 VX XT2,XT2,XC0
324 VX XT3,XT3,XD0
325
326 VSTM XT0,XT3,0,OUT,0
327
328 la INP,0x40(INP)
329 la OUT,0x40(OUT)
330 aghi LEN,-0x40
331 je .Ldone_4x
332
333 VAF XA0,XA2,K0
334 VAF XB0,XB2,K1
335 VAF XC0,XC2,K2
336 VAF XD0,XD2,K3
337
338 VPERM XA0,XA0,XA0,BEPERM
339 VPERM XB0,XB0,XB0,BEPERM
340 VPERM XC0,XC0,XC0,BEPERM
341 VPERM XD0,XD0,XD0,BEPERM
342
343 clgfi LEN,0x40
344 jl .Ltail_4x
345
346 VLM XT0,XT3,0,INP,0
347
348 VX XT0,XT0,XA0
349 VX XT1,XT1,XB0
350 VX XT2,XT2,XC0
351 VX XT3,XT3,XD0
352
353 VSTM XT0,XT3,0,OUT,0
354
355 la INP,0x40(INP)
356 la OUT,0x40(OUT)
357 aghi LEN,-0x40
358 je .Ldone_4x
359
360 VAF XA0,XA3,K0
361 VAF XB0,XB3,K1
362 VAF XC0,XC3,K2
363 VAF XD0,XD3,K3
364
365 VPERM XA0,XA0,XA0,BEPERM
366 VPERM XB0,XB0,XB0,BEPERM
367 VPERM XC0,XC0,XC0,BEPERM
368 VPERM XD0,XD0,XD0,BEPERM
369
370 clgfi LEN,0x40
371 jl .Ltail_4x
372
373 VLM XT0,XT3,0,INP,0
374
375 VX XT0,XT0,XA0
376 VX XT1,XT1,XB0
377 VX XT2,XT2,XC0
378 VX XT3,XT3,XD0
379
380 VSTM XT0,XT3,0,OUT,0
381
382.Ldone_4x:
383 lmg %r6,%r7,6*8(SP)
384 BR_EX %r14
385
386.Ltail_4x:
387 VLR XT0,XC0
388 VLR XT1,XD0
389
390 VST XA0,8*8+0x00,,SP
391 VST XB0,8*8+0x10,,SP
392 VST XT0,8*8+0x20,,SP
393 VST XT1,8*8+0x30,,SP
394
395 lghi %r1,0
396
397.Loop_tail_4x:
398 llgc %r5,0(%r1,INP)
399 llgc %r6,8*8(%r1,SP)
400 xr %r6,%r5
401 stc %r6,0(%r1,OUT)
402 la %r1,1(%r1)
403 brct LEN,.Loop_tail_4x
404
405 lmg %r6,%r7,6*8(SP)
406 BR_EX %r14
407SYM_FUNC_END(chacha20_vx_4x)
408
409#undef OUT
410#undef INP
411#undef LEN
412#undef KEY
413#undef COUNTER
414
415#undef BEPERM
416
417#undef K0
418#undef K1
419#undef K2
420#undef K3
421
422
423#############################################################################
424# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
425# counst u32 *key, const u32 *counter)
426
427#define OUT %r2
428#define INP %r3
429#define LEN %r4
430#define KEY %r5
431#define COUNTER %r6
432
433#define BEPERM %v31
434
435#define K0 %v27
436#define K1 %v24
437#define K2 %v25
438#define K3 %v26
439
440#define A0 %v0
441#define B0 %v1
442#define C0 %v2
443#define D0 %v3
444
445#define A1 %v4
446#define B1 %v5
447#define C1 %v6
448#define D1 %v7
449
450#define A2 %v8
451#define B2 %v9
452#define C2 %v10
453#define D2 %v11
454
455#define A3 %v12
456#define B3 %v13
457#define C3 %v14
458#define D3 %v15
459
460#define A4 %v16
461#define B4 %v17
462#define C4 %v18
463#define D4 %v19
464
465#define A5 %v20
466#define B5 %v21
467#define C5 %v22
468#define D5 %v23
469
470#define T0 %v27
471#define T1 %v28
472#define T2 %v29
473#define T3 %v30
474
475SYM_FUNC_START(chacha20_vx)
476 clgfi LEN,256
477 jle chacha20_vx_4x
478 stmg %r6,%r7,6*8(SP)
479
480 lghi %r1,-FRAME
481 lgr %r0,SP
482 la SP,0(%r1,SP)
483 stg %r0,0(SP) # back-chain
484
485 larl %r7,sigma
486 lhi %r0,10
487
488 VLM K1,K2,0,KEY,0 # load key
489 VL K3,0,,COUNTER # load counter
490
491 VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
492
493.Loop_outer_vx:
494 VLR A0,K0
495 VLR B0,K1
496 VLR A1,K0
497 VLR B1,K1
498 VLR A2,K0
499 VLR B2,K1
500 VLR A3,K0
501 VLR B3,K1
502 VLR A4,K0
503 VLR B4,K1
504 VLR A5,K0
505 VLR B5,K1
506
507 VLR D0,K3
508 VAF D1,K3,T1 # K[3]+1
509 VAF D2,K3,T2 # K[3]+2
510 VAF D3,K3,T3 # K[3]+3
511 VAF D4,D2,T2 # K[3]+4
512 VAF D5,D2,T3 # K[3]+5
513
514 VLR C0,K2
515 VLR C1,K2
516 VLR C2,K2
517 VLR C3,K2
518 VLR C4,K2
519 VLR C5,K2
520
521 VLR T1,D1
522 VLR T2,D2
523 VLR T3,D3
524
525.Loop_vx:
526 VAF A0,A0,B0
527 VAF A1,A1,B1
528 VAF A2,A2,B2
529 VAF A3,A3,B3
530 VAF A4,A4,B4
531 VAF A5,A5,B5
532 VX D0,D0,A0
533 VX D1,D1,A1
534 VX D2,D2,A2
535 VX D3,D3,A3
536 VX D4,D4,A4
537 VX D5,D5,A5
538 VERLLF D0,D0,16
539 VERLLF D1,D1,16
540 VERLLF D2,D2,16
541 VERLLF D3,D3,16
542 VERLLF D4,D4,16
543 VERLLF D5,D5,16
544
545 VAF C0,C0,D0
546 VAF C1,C1,D1
547 VAF C2,C2,D2
548 VAF C3,C3,D3
549 VAF C4,C4,D4
550 VAF C5,C5,D5
551 VX B0,B0,C0
552 VX B1,B1,C1
553 VX B2,B2,C2
554 VX B3,B3,C3
555 VX B4,B4,C4
556 VX B5,B5,C5
557 VERLLF B0,B0,12
558 VERLLF B1,B1,12
559 VERLLF B2,B2,12
560 VERLLF B3,B3,12
561 VERLLF B4,B4,12
562 VERLLF B5,B5,12
563
564 VAF A0,A0,B0
565 VAF A1,A1,B1
566 VAF A2,A2,B2
567 VAF A3,A3,B3
568 VAF A4,A4,B4
569 VAF A5,A5,B5
570 VX D0,D0,A0
571 VX D1,D1,A1
572 VX D2,D2,A2
573 VX D3,D3,A3
574 VX D4,D4,A4
575 VX D5,D5,A5
576 VERLLF D0,D0,8
577 VERLLF D1,D1,8
578 VERLLF D2,D2,8
579 VERLLF D3,D3,8
580 VERLLF D4,D4,8
581 VERLLF D5,D5,8
582
583 VAF C0,C0,D0
584 VAF C1,C1,D1
585 VAF C2,C2,D2
586 VAF C3,C3,D3
587 VAF C4,C4,D4
588 VAF C5,C5,D5
589 VX B0,B0,C0
590 VX B1,B1,C1
591 VX B2,B2,C2
592 VX B3,B3,C3
593 VX B4,B4,C4
594 VX B5,B5,C5
595 VERLLF B0,B0,7
596 VERLLF B1,B1,7
597 VERLLF B2,B2,7
598 VERLLF B3,B3,7
599 VERLLF B4,B4,7
600 VERLLF B5,B5,7
601
602 VSLDB C0,C0,C0,8
603 VSLDB C1,C1,C1,8
604 VSLDB C2,C2,C2,8
605 VSLDB C3,C3,C3,8
606 VSLDB C4,C4,C4,8
607 VSLDB C5,C5,C5,8
608 VSLDB B0,B0,B0,4
609 VSLDB B1,B1,B1,4
610 VSLDB B2,B2,B2,4
611 VSLDB B3,B3,B3,4
612 VSLDB B4,B4,B4,4
613 VSLDB B5,B5,B5,4
614 VSLDB D0,D0,D0,12
615 VSLDB D1,D1,D1,12
616 VSLDB D2,D2,D2,12
617 VSLDB D3,D3,D3,12
618 VSLDB D4,D4,D4,12
619 VSLDB D5,D5,D5,12
620
621 VAF A0,A0,B0
622 VAF A1,A1,B1
623 VAF A2,A2,B2
624 VAF A3,A3,B3
625 VAF A4,A4,B4
626 VAF A5,A5,B5
627 VX D0,D0,A0
628 VX D1,D1,A1
629 VX D2,D2,A2
630 VX D3,D3,A3
631 VX D4,D4,A4
632 VX D5,D5,A5
633 VERLLF D0,D0,16
634 VERLLF D1,D1,16
635 VERLLF D2,D2,16
636 VERLLF D3,D3,16
637 VERLLF D4,D4,16
638 VERLLF D5,D5,16
639
640 VAF C0,C0,D0
641 VAF C1,C1,D1
642 VAF C2,C2,D2
643 VAF C3,C3,D3
644 VAF C4,C4,D4
645 VAF C5,C5,D5
646 VX B0,B0,C0
647 VX B1,B1,C1
648 VX B2,B2,C2
649 VX B3,B3,C3
650 VX B4,B4,C4
651 VX B5,B5,C5
652 VERLLF B0,B0,12
653 VERLLF B1,B1,12
654 VERLLF B2,B2,12
655 VERLLF B3,B3,12
656 VERLLF B4,B4,12
657 VERLLF B5,B5,12
658
659 VAF A0,A0,B0
660 VAF A1,A1,B1
661 VAF A2,A2,B2
662 VAF A3,A3,B3
663 VAF A4,A4,B4
664 VAF A5,A5,B5
665 VX D0,D0,A0
666 VX D1,D1,A1
667 VX D2,D2,A2
668 VX D3,D3,A3
669 VX D4,D4,A4
670 VX D5,D5,A5
671 VERLLF D0,D0,8
672 VERLLF D1,D1,8
673 VERLLF D2,D2,8
674 VERLLF D3,D3,8
675 VERLLF D4,D4,8
676 VERLLF D5,D5,8
677
678 VAF C0,C0,D0
679 VAF C1,C1,D1
680 VAF C2,C2,D2
681 VAF C3,C3,D3
682 VAF C4,C4,D4
683 VAF C5,C5,D5
684 VX B0,B0,C0
685 VX B1,B1,C1
686 VX B2,B2,C2
687 VX B3,B3,C3
688 VX B4,B4,C4
689 VX B5,B5,C5
690 VERLLF B0,B0,7
691 VERLLF B1,B1,7
692 VERLLF B2,B2,7
693 VERLLF B3,B3,7
694 VERLLF B4,B4,7
695 VERLLF B5,B5,7
696
697 VSLDB C0,C0,C0,8
698 VSLDB C1,C1,C1,8
699 VSLDB C2,C2,C2,8
700 VSLDB C3,C3,C3,8
701 VSLDB C4,C4,C4,8
702 VSLDB C5,C5,C5,8
703 VSLDB B0,B0,B0,12
704 VSLDB B1,B1,B1,12
705 VSLDB B2,B2,B2,12
706 VSLDB B3,B3,B3,12
707 VSLDB B4,B4,B4,12
708 VSLDB B5,B5,B5,12
709 VSLDB D0,D0,D0,4
710 VSLDB D1,D1,D1,4
711 VSLDB D2,D2,D2,4
712 VSLDB D3,D3,D3,4
713 VSLDB D4,D4,D4,4
714 VSLDB D5,D5,D5,4
715 brct %r0,.Loop_vx
716
717 VAF A0,A0,K0
718 VAF B0,B0,K1
719 VAF C0,C0,K2
720 VAF D0,D0,K3
721 VAF A1,A1,K0
722 VAF D1,D1,T1 # +K[3]+1
723
724 VPERM A0,A0,A0,BEPERM
725 VPERM B0,B0,B0,BEPERM
726 VPERM C0,C0,C0,BEPERM
727 VPERM D0,D0,D0,BEPERM
728
729 clgfi LEN,0x40
730 jl .Ltail_vx
731
732 VAF D2,D2,T2 # +K[3]+2
733 VAF D3,D3,T3 # +K[3]+3
734 VLM T0,T3,0,INP,0
735
736 VX A0,A0,T0
737 VX B0,B0,T1
738 VX C0,C0,T2
739 VX D0,D0,T3
740
741 VLM K0,T3,0,%r7,4 # re-load sigma and increments
742
743 VSTM A0,D0,0,OUT,0
744
745 la INP,0x40(INP)
746 la OUT,0x40(OUT)
747 aghi LEN,-0x40
748 je .Ldone_vx
749
750 VAF B1,B1,K1
751 VAF C1,C1,K2
752
753 VPERM A0,A1,A1,BEPERM
754 VPERM B0,B1,B1,BEPERM
755 VPERM C0,C1,C1,BEPERM
756 VPERM D0,D1,D1,BEPERM
757
758 clgfi LEN,0x40
759 jl .Ltail_vx
760
761 VLM A1,D1,0,INP,0
762
763 VX A0,A0,A1
764 VX B0,B0,B1
765 VX C0,C0,C1
766 VX D0,D0,D1
767
768 VSTM A0,D0,0,OUT,0
769
770 la INP,0x40(INP)
771 la OUT,0x40(OUT)
772 aghi LEN,-0x40
773 je .Ldone_vx
774
775 VAF A2,A2,K0
776 VAF B2,B2,K1
777 VAF C2,C2,K2
778
779 VPERM A0,A2,A2,BEPERM
780 VPERM B0,B2,B2,BEPERM
781 VPERM C0,C2,C2,BEPERM
782 VPERM D0,D2,D2,BEPERM
783
784 clgfi LEN,0x40
785 jl .Ltail_vx
786
787 VLM A1,D1,0,INP,0
788
789 VX A0,A0,A1
790 VX B0,B0,B1
791 VX C0,C0,C1
792 VX D0,D0,D1
793
794 VSTM A0,D0,0,OUT,0
795
796 la INP,0x40(INP)
797 la OUT,0x40(OUT)
798 aghi LEN,-0x40
799 je .Ldone_vx
800
801 VAF A3,A3,K0
802 VAF B3,B3,K1
803 VAF C3,C3,K2
804 VAF D2,K3,T3 # K[3]+3
805
806 VPERM A0,A3,A3,BEPERM
807 VPERM B0,B3,B3,BEPERM
808 VPERM C0,C3,C3,BEPERM
809 VPERM D0,D3,D3,BEPERM
810
811 clgfi LEN,0x40
812 jl .Ltail_vx
813
814 VAF D3,D2,T1 # K[3]+4
815 VLM A1,D1,0,INP,0
816
817 VX A0,A0,A1
818 VX B0,B0,B1
819 VX C0,C0,C1
820 VX D0,D0,D1
821
822 VSTM A0,D0,0,OUT,0
823
824 la INP,0x40(INP)
825 la OUT,0x40(OUT)
826 aghi LEN,-0x40
827 je .Ldone_vx
828
829 VAF A4,A4,K0
830 VAF B4,B4,K1
831 VAF C4,C4,K2
832 VAF D4,D4,D3 # +K[3]+4
833 VAF D3,D3,T1 # K[3]+5
834 VAF K3,D2,T3 # K[3]+=6
835
836 VPERM A0,A4,A4,BEPERM
837 VPERM B0,B4,B4,BEPERM
838 VPERM C0,C4,C4,BEPERM
839 VPERM D0,D4,D4,BEPERM
840
841 clgfi LEN,0x40
842 jl .Ltail_vx
843
844 VLM A1,D1,0,INP,0
845
846 VX A0,A0,A1
847 VX B0,B0,B1
848 VX C0,C0,C1
849 VX D0,D0,D1
850
851 VSTM A0,D0,0,OUT,0
852
853 la INP,0x40(INP)
854 la OUT,0x40(OUT)
855 aghi LEN,-0x40
856 je .Ldone_vx
857
858 VAF A5,A5,K0
859 VAF B5,B5,K1
860 VAF C5,C5,K2
861 VAF D5,D5,D3 # +K[3]+5
862
863 VPERM A0,A5,A5,BEPERM
864 VPERM B0,B5,B5,BEPERM
865 VPERM C0,C5,C5,BEPERM
866 VPERM D0,D5,D5,BEPERM
867
868 clgfi LEN,0x40
869 jl .Ltail_vx
870
871 VLM A1,D1,0,INP,0
872
873 VX A0,A0,A1
874 VX B0,B0,B1
875 VX C0,C0,C1
876 VX D0,D0,D1
877
878 VSTM A0,D0,0,OUT,0
879
880 la INP,0x40(INP)
881 la OUT,0x40(OUT)
882 lhi %r0,10
883 aghi LEN,-0x40
884 jne .Loop_outer_vx
885
886.Ldone_vx:
887 lmg %r6,%r7,FRAME+6*8(SP)
888 la SP,FRAME(SP)
889 BR_EX %r14
890
891.Ltail_vx:
892 VSTM A0,D0,8*8,SP,3
893 lghi %r1,0
894
895.Loop_tail_vx:
896 llgc %r5,0(%r1,INP)
897 llgc %r6,8*8(%r1,SP)
898 xr %r6,%r5
899 stc %r6,0(%r1,OUT)
900 la %r1,1(%r1)
901 brct LEN,.Loop_tail_vx
902
903 lmg %r6,%r7,FRAME+6*8(SP)
904 la SP,FRAME(SP)
905 BR_EX %r14
906SYM_FUNC_END(chacha20_vx)
907
908.previous
909

source code of linux/arch/s390/crypto/chacha-s390.S