1 | #include "llvm_blake3_prefix.h" |
2 | |
3 | .intel_syntax noprefix |
4 | .global blake3_hash_many_sse41 |
5 | .global _blake3_hash_many_sse41 |
6 | .global blake3_compress_in_place_sse41 |
7 | .global _blake3_compress_in_place_sse41 |
8 | .global blake3_compress_xof_sse41 |
9 | .global _blake3_compress_xof_sse41 |
10 | .section .text |
11 | .p2align 6 |
12 | _blake3_hash_many_sse41: |
13 | blake3_hash_many_sse41: |
14 | push r15 |
15 | push r14 |
16 | push r13 |
17 | push r12 |
18 | push rsi |
19 | push rdi |
20 | push rbx |
21 | push rbp |
22 | mov rbp, rsp |
23 | sub rsp, 528 |
24 | and rsp, 0xFFFFFFFFFFFFFFC0 |
25 | movdqa xmmword ptr [rsp+0x170], xmm6 |
26 | movdqa xmmword ptr [rsp+0x180], xmm7 |
27 | movdqa xmmword ptr [rsp+0x190], xmm8 |
28 | movdqa xmmword ptr [rsp+0x1A0], xmm9 |
29 | movdqa xmmword ptr [rsp+0x1B0], xmm10 |
30 | movdqa xmmword ptr [rsp+0x1C0], xmm11 |
31 | movdqa xmmword ptr [rsp+0x1D0], xmm12 |
32 | movdqa xmmword ptr [rsp+0x1E0], xmm13 |
33 | movdqa xmmword ptr [rsp+0x1F0], xmm14 |
34 | movdqa xmmword ptr [rsp+0x200], xmm15 |
35 | mov rdi, rcx |
36 | mov rsi, rdx |
37 | mov rdx, r8 |
38 | mov rcx, r9 |
39 | mov r8, qword ptr [rbp+0x68] |
40 | movzx r9, byte ptr [rbp+0x70] |
41 | neg r9d |
42 | movd xmm0, r9d |
43 | pshufd xmm0, xmm0, 0x00 |
44 | movdqa xmmword ptr [rsp+0x130], xmm0 |
45 | movdqa xmm1, xmm0 |
46 | pand xmm1, xmmword ptr [ADD0+rip] |
47 | pand xmm0, xmmword ptr [ADD1+rip] |
48 | movdqa xmmword ptr [rsp+0x150], xmm0 |
49 | movd xmm0, r8d |
50 | pshufd xmm0, xmm0, 0x00 |
51 | paddd xmm0, xmm1 |
52 | movdqa xmmword ptr [rsp+0x110], xmm0 |
53 | pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
54 | pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
55 | pcmpgtd xmm1, xmm0 |
56 | shr r8, 32 |
57 | movd xmm2, r8d |
58 | pshufd xmm2, xmm2, 0x00 |
59 | psubd xmm2, xmm1 |
60 | movdqa xmmword ptr [rsp+0x120], xmm2 |
61 | mov rbx, qword ptr [rbp+0x90] |
62 | mov r15, rdx |
63 | shl r15, 6 |
64 | movzx r13d, byte ptr [rbp+0x78] |
65 | movzx r12d, byte ptr [rbp+0x88] |
66 | cmp rsi, 4 |
67 | jc 3f |
68 | 2: |
69 | movdqu xmm3, xmmword ptr [rcx] |
70 | pshufd xmm0, xmm3, 0x00 |
71 | pshufd xmm1, xmm3, 0x55 |
72 | pshufd xmm2, xmm3, 0xAA |
73 | pshufd xmm3, xmm3, 0xFF |
74 | movdqu xmm7, xmmword ptr [rcx+0x10] |
75 | pshufd xmm4, xmm7, 0x00 |
76 | pshufd xmm5, xmm7, 0x55 |
77 | pshufd xmm6, xmm7, 0xAA |
78 | pshufd xmm7, xmm7, 0xFF |
79 | mov r8, qword ptr [rdi] |
80 | mov r9, qword ptr [rdi+0x8] |
81 | mov r10, qword ptr [rdi+0x10] |
82 | mov r11, qword ptr [rdi+0x18] |
83 | movzx eax, byte ptr [rbp+0x80] |
84 | or eax, r13d |
85 | xor edx, edx |
86 | 9: |
87 | mov r14d, eax |
88 | or eax, r12d |
89 | add rdx, 64 |
90 | cmp rdx, r15 |
91 | cmovne eax, r14d |
92 | movdqu xmm8, xmmword ptr [r8+rdx-0x40] |
93 | movdqu xmm9, xmmword ptr [r9+rdx-0x40] |
94 | movdqu xmm10, xmmword ptr [r10+rdx-0x40] |
95 | movdqu xmm11, xmmword ptr [r11+rdx-0x40] |
96 | movdqa xmm12, xmm8 |
97 | punpckldq xmm8, xmm9 |
98 | punpckhdq xmm12, xmm9 |
99 | movdqa xmm14, xmm10 |
100 | punpckldq xmm10, xmm11 |
101 | punpckhdq xmm14, xmm11 |
102 | movdqa xmm9, xmm8 |
103 | punpcklqdq xmm8, xmm10 |
104 | punpckhqdq xmm9, xmm10 |
105 | movdqa xmm13, xmm12 |
106 | punpcklqdq xmm12, xmm14 |
107 | punpckhqdq xmm13, xmm14 |
108 | movdqa xmmword ptr [rsp], xmm8 |
109 | movdqa xmmword ptr [rsp+0x10], xmm9 |
110 | movdqa xmmword ptr [rsp+0x20], xmm12 |
111 | movdqa xmmword ptr [rsp+0x30], xmm13 |
112 | movdqu xmm8, xmmword ptr [r8+rdx-0x30] |
113 | movdqu xmm9, xmmword ptr [r9+rdx-0x30] |
114 | movdqu xmm10, xmmword ptr [r10+rdx-0x30] |
115 | movdqu xmm11, xmmword ptr [r11+rdx-0x30] |
116 | movdqa xmm12, xmm8 |
117 | punpckldq xmm8, xmm9 |
118 | punpckhdq xmm12, xmm9 |
119 | movdqa xmm14, xmm10 |
120 | punpckldq xmm10, xmm11 |
121 | punpckhdq xmm14, xmm11 |
122 | movdqa xmm9, xmm8 |
123 | punpcklqdq xmm8, xmm10 |
124 | punpckhqdq xmm9, xmm10 |
125 | movdqa xmm13, xmm12 |
126 | punpcklqdq xmm12, xmm14 |
127 | punpckhqdq xmm13, xmm14 |
128 | movdqa xmmword ptr [rsp+0x40], xmm8 |
129 | movdqa xmmword ptr [rsp+0x50], xmm9 |
130 | movdqa xmmword ptr [rsp+0x60], xmm12 |
131 | movdqa xmmword ptr [rsp+0x70], xmm13 |
132 | movdqu xmm8, xmmword ptr [r8+rdx-0x20] |
133 | movdqu xmm9, xmmword ptr [r9+rdx-0x20] |
134 | movdqu xmm10, xmmword ptr [r10+rdx-0x20] |
135 | movdqu xmm11, xmmword ptr [r11+rdx-0x20] |
136 | movdqa xmm12, xmm8 |
137 | punpckldq xmm8, xmm9 |
138 | punpckhdq xmm12, xmm9 |
139 | movdqa xmm14, xmm10 |
140 | punpckldq xmm10, xmm11 |
141 | punpckhdq xmm14, xmm11 |
142 | movdqa xmm9, xmm8 |
143 | punpcklqdq xmm8, xmm10 |
144 | punpckhqdq xmm9, xmm10 |
145 | movdqa xmm13, xmm12 |
146 | punpcklqdq xmm12, xmm14 |
147 | punpckhqdq xmm13, xmm14 |
148 | movdqa xmmword ptr [rsp+0x80], xmm8 |
149 | movdqa xmmword ptr [rsp+0x90], xmm9 |
150 | movdqa xmmword ptr [rsp+0xA0], xmm12 |
151 | movdqa xmmword ptr [rsp+0xB0], xmm13 |
152 | movdqu xmm8, xmmword ptr [r8+rdx-0x10] |
153 | movdqu xmm9, xmmword ptr [r9+rdx-0x10] |
154 | movdqu xmm10, xmmword ptr [r10+rdx-0x10] |
155 | movdqu xmm11, xmmword ptr [r11+rdx-0x10] |
156 | movdqa xmm12, xmm8 |
157 | punpckldq xmm8, xmm9 |
158 | punpckhdq xmm12, xmm9 |
159 | movdqa xmm14, xmm10 |
160 | punpckldq xmm10, xmm11 |
161 | punpckhdq xmm14, xmm11 |
162 | movdqa xmm9, xmm8 |
163 | punpcklqdq xmm8, xmm10 |
164 | punpckhqdq xmm9, xmm10 |
165 | movdqa xmm13, xmm12 |
166 | punpcklqdq xmm12, xmm14 |
167 | punpckhqdq xmm13, xmm14 |
168 | movdqa xmmword ptr [rsp+0xC0], xmm8 |
169 | movdqa xmmword ptr [rsp+0xD0], xmm9 |
170 | movdqa xmmword ptr [rsp+0xE0], xmm12 |
171 | movdqa xmmword ptr [rsp+0xF0], xmm13 |
172 | movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] |
173 | movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] |
174 | movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] |
175 | movdqa xmm12, xmmword ptr [rsp+0x110] |
176 | movdqa xmm13, xmmword ptr [rsp+0x120] |
177 | movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] |
178 | movd xmm15, eax |
179 | pshufd xmm15, xmm15, 0x00 |
180 | prefetcht0 [r8+rdx+0x80] |
181 | prefetcht0 [r9+rdx+0x80] |
182 | prefetcht0 [r10+rdx+0x80] |
183 | prefetcht0 [r11+rdx+0x80] |
184 | paddd xmm0, xmmword ptr [rsp] |
185 | paddd xmm1, xmmword ptr [rsp+0x20] |
186 | paddd xmm2, xmmword ptr [rsp+0x40] |
187 | paddd xmm3, xmmword ptr [rsp+0x60] |
188 | paddd xmm0, xmm4 |
189 | paddd xmm1, xmm5 |
190 | paddd xmm2, xmm6 |
191 | paddd xmm3, xmm7 |
192 | pxor xmm12, xmm0 |
193 | pxor xmm13, xmm1 |
194 | pxor xmm14, xmm2 |
195 | pxor xmm15, xmm3 |
196 | movdqa xmm8, xmmword ptr [ROT16+rip] |
197 | pshufb xmm12, xmm8 |
198 | pshufb xmm13, xmm8 |
199 | pshufb xmm14, xmm8 |
200 | pshufb xmm15, xmm8 |
201 | movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] |
202 | paddd xmm8, xmm12 |
203 | paddd xmm9, xmm13 |
204 | paddd xmm10, xmm14 |
205 | paddd xmm11, xmm15 |
206 | pxor xmm4, xmm8 |
207 | pxor xmm5, xmm9 |
208 | pxor xmm6, xmm10 |
209 | pxor xmm7, xmm11 |
210 | movdqa xmmword ptr [rsp+0x100], xmm8 |
211 | movdqa xmm8, xmm4 |
212 | psrld xmm8, 12 |
213 | pslld xmm4, 20 |
214 | por xmm4, xmm8 |
215 | movdqa xmm8, xmm5 |
216 | psrld xmm8, 12 |
217 | pslld xmm5, 20 |
218 | por xmm5, xmm8 |
219 | movdqa xmm8, xmm6 |
220 | psrld xmm8, 12 |
221 | pslld xmm6, 20 |
222 | por xmm6, xmm8 |
223 | movdqa xmm8, xmm7 |
224 | psrld xmm8, 12 |
225 | pslld xmm7, 20 |
226 | por xmm7, xmm8 |
227 | paddd xmm0, xmmword ptr [rsp+0x10] |
228 | paddd xmm1, xmmword ptr [rsp+0x30] |
229 | paddd xmm2, xmmword ptr [rsp+0x50] |
230 | paddd xmm3, xmmword ptr [rsp+0x70] |
231 | paddd xmm0, xmm4 |
232 | paddd xmm1, xmm5 |
233 | paddd xmm2, xmm6 |
234 | paddd xmm3, xmm7 |
235 | pxor xmm12, xmm0 |
236 | pxor xmm13, xmm1 |
237 | pxor xmm14, xmm2 |
238 | pxor xmm15, xmm3 |
239 | movdqa xmm8, xmmword ptr [ROT8+rip] |
240 | pshufb xmm12, xmm8 |
241 | pshufb xmm13, xmm8 |
242 | pshufb xmm14, xmm8 |
243 | pshufb xmm15, xmm8 |
244 | movdqa xmm8, xmmword ptr [rsp+0x100] |
245 | paddd xmm8, xmm12 |
246 | paddd xmm9, xmm13 |
247 | paddd xmm10, xmm14 |
248 | paddd xmm11, xmm15 |
249 | pxor xmm4, xmm8 |
250 | pxor xmm5, xmm9 |
251 | pxor xmm6, xmm10 |
252 | pxor xmm7, xmm11 |
253 | movdqa xmmword ptr [rsp+0x100], xmm8 |
254 | movdqa xmm8, xmm4 |
255 | psrld xmm8, 7 |
256 | pslld xmm4, 25 |
257 | por xmm4, xmm8 |
258 | movdqa xmm8, xmm5 |
259 | psrld xmm8, 7 |
260 | pslld xmm5, 25 |
261 | por xmm5, xmm8 |
262 | movdqa xmm8, xmm6 |
263 | psrld xmm8, 7 |
264 | pslld xmm6, 25 |
265 | por xmm6, xmm8 |
266 | movdqa xmm8, xmm7 |
267 | psrld xmm8, 7 |
268 | pslld xmm7, 25 |
269 | por xmm7, xmm8 |
270 | paddd xmm0, xmmword ptr [rsp+0x80] |
271 | paddd xmm1, xmmword ptr [rsp+0xA0] |
272 | paddd xmm2, xmmword ptr [rsp+0xC0] |
273 | paddd xmm3, xmmword ptr [rsp+0xE0] |
274 | paddd xmm0, xmm5 |
275 | paddd xmm1, xmm6 |
276 | paddd xmm2, xmm7 |
277 | paddd xmm3, xmm4 |
278 | pxor xmm15, xmm0 |
279 | pxor xmm12, xmm1 |
280 | pxor xmm13, xmm2 |
281 | pxor xmm14, xmm3 |
282 | movdqa xmm8, xmmword ptr [ROT16+rip] |
283 | pshufb xmm15, xmm8 |
284 | pshufb xmm12, xmm8 |
285 | pshufb xmm13, xmm8 |
286 | pshufb xmm14, xmm8 |
287 | paddd xmm10, xmm15 |
288 | paddd xmm11, xmm12 |
289 | movdqa xmm8, xmmword ptr [rsp+0x100] |
290 | paddd xmm8, xmm13 |
291 | paddd xmm9, xmm14 |
292 | pxor xmm5, xmm10 |
293 | pxor xmm6, xmm11 |
294 | pxor xmm7, xmm8 |
295 | pxor xmm4, xmm9 |
296 | movdqa xmmword ptr [rsp+0x100], xmm8 |
297 | movdqa xmm8, xmm5 |
298 | psrld xmm8, 12 |
299 | pslld xmm5, 20 |
300 | por xmm5, xmm8 |
301 | movdqa xmm8, xmm6 |
302 | psrld xmm8, 12 |
303 | pslld xmm6, 20 |
304 | por xmm6, xmm8 |
305 | movdqa xmm8, xmm7 |
306 | psrld xmm8, 12 |
307 | pslld xmm7, 20 |
308 | por xmm7, xmm8 |
309 | movdqa xmm8, xmm4 |
310 | psrld xmm8, 12 |
311 | pslld xmm4, 20 |
312 | por xmm4, xmm8 |
313 | paddd xmm0, xmmword ptr [rsp+0x90] |
314 | paddd xmm1, xmmword ptr [rsp+0xB0] |
315 | paddd xmm2, xmmword ptr [rsp+0xD0] |
316 | paddd xmm3, xmmword ptr [rsp+0xF0] |
317 | paddd xmm0, xmm5 |
318 | paddd xmm1, xmm6 |
319 | paddd xmm2, xmm7 |
320 | paddd xmm3, xmm4 |
321 | pxor xmm15, xmm0 |
322 | pxor xmm12, xmm1 |
323 | pxor xmm13, xmm2 |
324 | pxor xmm14, xmm3 |
325 | movdqa xmm8, xmmword ptr [ROT8+rip] |
326 | pshufb xmm15, xmm8 |
327 | pshufb xmm12, xmm8 |
328 | pshufb xmm13, xmm8 |
329 | pshufb xmm14, xmm8 |
330 | paddd xmm10, xmm15 |
331 | paddd xmm11, xmm12 |
332 | movdqa xmm8, xmmword ptr [rsp+0x100] |
333 | paddd xmm8, xmm13 |
334 | paddd xmm9, xmm14 |
335 | pxor xmm5, xmm10 |
336 | pxor xmm6, xmm11 |
337 | pxor xmm7, xmm8 |
338 | pxor xmm4, xmm9 |
339 | movdqa xmmword ptr [rsp+0x100], xmm8 |
340 | movdqa xmm8, xmm5 |
341 | psrld xmm8, 7 |
342 | pslld xmm5, 25 |
343 | por xmm5, xmm8 |
344 | movdqa xmm8, xmm6 |
345 | psrld xmm8, 7 |
346 | pslld xmm6, 25 |
347 | por xmm6, xmm8 |
348 | movdqa xmm8, xmm7 |
349 | psrld xmm8, 7 |
350 | pslld xmm7, 25 |
351 | por xmm7, xmm8 |
352 | movdqa xmm8, xmm4 |
353 | psrld xmm8, 7 |
354 | pslld xmm4, 25 |
355 | por xmm4, xmm8 |
356 | paddd xmm0, xmmword ptr [rsp+0x20] |
357 | paddd xmm1, xmmword ptr [rsp+0x30] |
358 | paddd xmm2, xmmword ptr [rsp+0x70] |
359 | paddd xmm3, xmmword ptr [rsp+0x40] |
360 | paddd xmm0, xmm4 |
361 | paddd xmm1, xmm5 |
362 | paddd xmm2, xmm6 |
363 | paddd xmm3, xmm7 |
364 | pxor xmm12, xmm0 |
365 | pxor xmm13, xmm1 |
366 | pxor xmm14, xmm2 |
367 | pxor xmm15, xmm3 |
368 | movdqa xmm8, xmmword ptr [ROT16+rip] |
369 | pshufb xmm12, xmm8 |
370 | pshufb xmm13, xmm8 |
371 | pshufb xmm14, xmm8 |
372 | pshufb xmm15, xmm8 |
373 | movdqa xmm8, xmmword ptr [rsp+0x100] |
374 | paddd xmm8, xmm12 |
375 | paddd xmm9, xmm13 |
376 | paddd xmm10, xmm14 |
377 | paddd xmm11, xmm15 |
378 | pxor xmm4, xmm8 |
379 | pxor xmm5, xmm9 |
380 | pxor xmm6, xmm10 |
381 | pxor xmm7, xmm11 |
382 | movdqa xmmword ptr [rsp+0x100], xmm8 |
383 | movdqa xmm8, xmm4 |
384 | psrld xmm8, 12 |
385 | pslld xmm4, 20 |
386 | por xmm4, xmm8 |
387 | movdqa xmm8, xmm5 |
388 | psrld xmm8, 12 |
389 | pslld xmm5, 20 |
390 | por xmm5, xmm8 |
391 | movdqa xmm8, xmm6 |
392 | psrld xmm8, 12 |
393 | pslld xmm6, 20 |
394 | por xmm6, xmm8 |
395 | movdqa xmm8, xmm7 |
396 | psrld xmm8, 12 |
397 | pslld xmm7, 20 |
398 | por xmm7, xmm8 |
399 | paddd xmm0, xmmword ptr [rsp+0x60] |
400 | paddd xmm1, xmmword ptr [rsp+0xA0] |
401 | paddd xmm2, xmmword ptr [rsp] |
402 | paddd xmm3, xmmword ptr [rsp+0xD0] |
403 | paddd xmm0, xmm4 |
404 | paddd xmm1, xmm5 |
405 | paddd xmm2, xmm6 |
406 | paddd xmm3, xmm7 |
407 | pxor xmm12, xmm0 |
408 | pxor xmm13, xmm1 |
409 | pxor xmm14, xmm2 |
410 | pxor xmm15, xmm3 |
411 | movdqa xmm8, xmmword ptr [ROT8+rip] |
412 | pshufb xmm12, xmm8 |
413 | pshufb xmm13, xmm8 |
414 | pshufb xmm14, xmm8 |
415 | pshufb xmm15, xmm8 |
416 | movdqa xmm8, xmmword ptr [rsp+0x100] |
417 | paddd xmm8, xmm12 |
418 | paddd xmm9, xmm13 |
419 | paddd xmm10, xmm14 |
420 | paddd xmm11, xmm15 |
421 | pxor xmm4, xmm8 |
422 | pxor xmm5, xmm9 |
423 | pxor xmm6, xmm10 |
424 | pxor xmm7, xmm11 |
425 | movdqa xmmword ptr [rsp+0x100], xmm8 |
426 | movdqa xmm8, xmm4 |
427 | psrld xmm8, 7 |
428 | pslld xmm4, 25 |
429 | por xmm4, xmm8 |
430 | movdqa xmm8, xmm5 |
431 | psrld xmm8, 7 |
432 | pslld xmm5, 25 |
433 | por xmm5, xmm8 |
434 | movdqa xmm8, xmm6 |
435 | psrld xmm8, 7 |
436 | pslld xmm6, 25 |
437 | por xmm6, xmm8 |
438 | movdqa xmm8, xmm7 |
439 | psrld xmm8, 7 |
440 | pslld xmm7, 25 |
441 | por xmm7, xmm8 |
442 | paddd xmm0, xmmword ptr [rsp+0x10] |
443 | paddd xmm1, xmmword ptr [rsp+0xC0] |
444 | paddd xmm2, xmmword ptr [rsp+0x90] |
445 | paddd xmm3, xmmword ptr [rsp+0xF0] |
446 | paddd xmm0, xmm5 |
447 | paddd xmm1, xmm6 |
448 | paddd xmm2, xmm7 |
449 | paddd xmm3, xmm4 |
450 | pxor xmm15, xmm0 |
451 | pxor xmm12, xmm1 |
452 | pxor xmm13, xmm2 |
453 | pxor xmm14, xmm3 |
454 | movdqa xmm8, xmmword ptr [ROT16+rip] |
455 | pshufb xmm15, xmm8 |
456 | pshufb xmm12, xmm8 |
457 | pshufb xmm13, xmm8 |
458 | pshufb xmm14, xmm8 |
459 | paddd xmm10, xmm15 |
460 | paddd xmm11, xmm12 |
461 | movdqa xmm8, xmmword ptr [rsp+0x100] |
462 | paddd xmm8, xmm13 |
463 | paddd xmm9, xmm14 |
464 | pxor xmm5, xmm10 |
465 | pxor xmm6, xmm11 |
466 | pxor xmm7, xmm8 |
467 | pxor xmm4, xmm9 |
468 | movdqa xmmword ptr [rsp+0x100], xmm8 |
469 | movdqa xmm8, xmm5 |
470 | psrld xmm8, 12 |
471 | pslld xmm5, 20 |
472 | por xmm5, xmm8 |
473 | movdqa xmm8, xmm6 |
474 | psrld xmm8, 12 |
475 | pslld xmm6, 20 |
476 | por xmm6, xmm8 |
477 | movdqa xmm8, xmm7 |
478 | psrld xmm8, 12 |
479 | pslld xmm7, 20 |
480 | por xmm7, xmm8 |
481 | movdqa xmm8, xmm4 |
482 | psrld xmm8, 12 |
483 | pslld xmm4, 20 |
484 | por xmm4, xmm8 |
485 | paddd xmm0, xmmword ptr [rsp+0xB0] |
486 | paddd xmm1, xmmword ptr [rsp+0x50] |
487 | paddd xmm2, xmmword ptr [rsp+0xE0] |
488 | paddd xmm3, xmmword ptr [rsp+0x80] |
489 | paddd xmm0, xmm5 |
490 | paddd xmm1, xmm6 |
491 | paddd xmm2, xmm7 |
492 | paddd xmm3, xmm4 |
493 | pxor xmm15, xmm0 |
494 | pxor xmm12, xmm1 |
495 | pxor xmm13, xmm2 |
496 | pxor xmm14, xmm3 |
497 | movdqa xmm8, xmmword ptr [ROT8+rip] |
498 | pshufb xmm15, xmm8 |
499 | pshufb xmm12, xmm8 |
500 | pshufb xmm13, xmm8 |
501 | pshufb xmm14, xmm8 |
502 | paddd xmm10, xmm15 |
503 | paddd xmm11, xmm12 |
504 | movdqa xmm8, xmmword ptr [rsp+0x100] |
505 | paddd xmm8, xmm13 |
506 | paddd xmm9, xmm14 |
507 | pxor xmm5, xmm10 |
508 | pxor xmm6, xmm11 |
509 | pxor xmm7, xmm8 |
510 | pxor xmm4, xmm9 |
511 | movdqa xmmword ptr [rsp+0x100], xmm8 |
512 | movdqa xmm8, xmm5 |
513 | psrld xmm8, 7 |
514 | pslld xmm5, 25 |
515 | por xmm5, xmm8 |
516 | movdqa xmm8, xmm6 |
517 | psrld xmm8, 7 |
518 | pslld xmm6, 25 |
519 | por xmm6, xmm8 |
520 | movdqa xmm8, xmm7 |
521 | psrld xmm8, 7 |
522 | pslld xmm7, 25 |
523 | por xmm7, xmm8 |
524 | movdqa xmm8, xmm4 |
525 | psrld xmm8, 7 |
526 | pslld xmm4, 25 |
527 | por xmm4, xmm8 |
528 | paddd xmm0, xmmword ptr [rsp+0x30] |
529 | paddd xmm1, xmmword ptr [rsp+0xA0] |
530 | paddd xmm2, xmmword ptr [rsp+0xD0] |
531 | paddd xmm3, xmmword ptr [rsp+0x70] |
532 | paddd xmm0, xmm4 |
533 | paddd xmm1, xmm5 |
534 | paddd xmm2, xmm6 |
535 | paddd xmm3, xmm7 |
536 | pxor xmm12, xmm0 |
537 | pxor xmm13, xmm1 |
538 | pxor xmm14, xmm2 |
539 | pxor xmm15, xmm3 |
540 | movdqa xmm8, xmmword ptr [ROT16+rip] |
541 | pshufb xmm12, xmm8 |
542 | pshufb xmm13, xmm8 |
543 | pshufb xmm14, xmm8 |
544 | pshufb xmm15, xmm8 |
545 | movdqa xmm8, xmmword ptr [rsp+0x100] |
546 | paddd xmm8, xmm12 |
547 | paddd xmm9, xmm13 |
548 | paddd xmm10, xmm14 |
549 | paddd xmm11, xmm15 |
550 | pxor xmm4, xmm8 |
551 | pxor xmm5, xmm9 |
552 | pxor xmm6, xmm10 |
553 | pxor xmm7, xmm11 |
554 | movdqa xmmword ptr [rsp+0x100], xmm8 |
555 | movdqa xmm8, xmm4 |
556 | psrld xmm8, 12 |
557 | pslld xmm4, 20 |
558 | por xmm4, xmm8 |
559 | movdqa xmm8, xmm5 |
560 | psrld xmm8, 12 |
561 | pslld xmm5, 20 |
562 | por xmm5, xmm8 |
563 | movdqa xmm8, xmm6 |
564 | psrld xmm8, 12 |
565 | pslld xmm6, 20 |
566 | por xmm6, xmm8 |
567 | movdqa xmm8, xmm7 |
568 | psrld xmm8, 12 |
569 | pslld xmm7, 20 |
570 | por xmm7, xmm8 |
571 | paddd xmm0, xmmword ptr [rsp+0x40] |
572 | paddd xmm1, xmmword ptr [rsp+0xC0] |
573 | paddd xmm2, xmmword ptr [rsp+0x20] |
574 | paddd xmm3, xmmword ptr [rsp+0xE0] |
575 | paddd xmm0, xmm4 |
576 | paddd xmm1, xmm5 |
577 | paddd xmm2, xmm6 |
578 | paddd xmm3, xmm7 |
579 | pxor xmm12, xmm0 |
580 | pxor xmm13, xmm1 |
581 | pxor xmm14, xmm2 |
582 | pxor xmm15, xmm3 |
583 | movdqa xmm8, xmmword ptr [ROT8+rip] |
584 | pshufb xmm12, xmm8 |
585 | pshufb xmm13, xmm8 |
586 | pshufb xmm14, xmm8 |
587 | pshufb xmm15, xmm8 |
588 | movdqa xmm8, xmmword ptr [rsp+0x100] |
589 | paddd xmm8, xmm12 |
590 | paddd xmm9, xmm13 |
591 | paddd xmm10, xmm14 |
592 | paddd xmm11, xmm15 |
593 | pxor xmm4, xmm8 |
594 | pxor xmm5, xmm9 |
595 | pxor xmm6, xmm10 |
596 | pxor xmm7, xmm11 |
597 | movdqa xmmword ptr [rsp+0x100], xmm8 |
598 | movdqa xmm8, xmm4 |
599 | psrld xmm8, 7 |
600 | pslld xmm4, 25 |
601 | por xmm4, xmm8 |
602 | movdqa xmm8, xmm5 |
603 | psrld xmm8, 7 |
604 | pslld xmm5, 25 |
605 | por xmm5, xmm8 |
606 | movdqa xmm8, xmm6 |
607 | psrld xmm8, 7 |
608 | pslld xmm6, 25 |
609 | por xmm6, xmm8 |
610 | movdqa xmm8, xmm7 |
611 | psrld xmm8, 7 |
612 | pslld xmm7, 25 |
613 | por xmm7, xmm8 |
614 | paddd xmm0, xmmword ptr [rsp+0x60] |
615 | paddd xmm1, xmmword ptr [rsp+0x90] |
616 | paddd xmm2, xmmword ptr [rsp+0xB0] |
617 | paddd xmm3, xmmword ptr [rsp+0x80] |
618 | paddd xmm0, xmm5 |
619 | paddd xmm1, xmm6 |
620 | paddd xmm2, xmm7 |
621 | paddd xmm3, xmm4 |
622 | pxor xmm15, xmm0 |
623 | pxor xmm12, xmm1 |
624 | pxor xmm13, xmm2 |
625 | pxor xmm14, xmm3 |
626 | movdqa xmm8, xmmword ptr [ROT16+rip] |
627 | pshufb xmm15, xmm8 |
628 | pshufb xmm12, xmm8 |
629 | pshufb xmm13, xmm8 |
630 | pshufb xmm14, xmm8 |
631 | paddd xmm10, xmm15 |
632 | paddd xmm11, xmm12 |
633 | movdqa xmm8, xmmword ptr [rsp+0x100] |
634 | paddd xmm8, xmm13 |
635 | paddd xmm9, xmm14 |
636 | pxor xmm5, xmm10 |
637 | pxor xmm6, xmm11 |
638 | pxor xmm7, xmm8 |
639 | pxor xmm4, xmm9 |
640 | movdqa xmmword ptr [rsp+0x100], xmm8 |
641 | movdqa xmm8, xmm5 |
642 | psrld xmm8, 12 |
643 | pslld xmm5, 20 |
644 | por xmm5, xmm8 |
645 | movdqa xmm8, xmm6 |
646 | psrld xmm8, 12 |
647 | pslld xmm6, 20 |
648 | por xmm6, xmm8 |
649 | movdqa xmm8, xmm7 |
650 | psrld xmm8, 12 |
651 | pslld xmm7, 20 |
652 | por xmm7, xmm8 |
653 | movdqa xmm8, xmm4 |
654 | psrld xmm8, 12 |
655 | pslld xmm4, 20 |
656 | por xmm4, xmm8 |
657 | paddd xmm0, xmmword ptr [rsp+0x50] |
658 | paddd xmm1, xmmword ptr [rsp] |
659 | paddd xmm2, xmmword ptr [rsp+0xF0] |
660 | paddd xmm3, xmmword ptr [rsp+0x10] |
661 | paddd xmm0, xmm5 |
662 | paddd xmm1, xmm6 |
663 | paddd xmm2, xmm7 |
664 | paddd xmm3, xmm4 |
665 | pxor xmm15, xmm0 |
666 | pxor xmm12, xmm1 |
667 | pxor xmm13, xmm2 |
668 | pxor xmm14, xmm3 |
669 | movdqa xmm8, xmmword ptr [ROT8+rip] |
670 | pshufb xmm15, xmm8 |
671 | pshufb xmm12, xmm8 |
672 | pshufb xmm13, xmm8 |
673 | pshufb xmm14, xmm8 |
674 | paddd xmm10, xmm15 |
675 | paddd xmm11, xmm12 |
676 | movdqa xmm8, xmmword ptr [rsp+0x100] |
677 | paddd xmm8, xmm13 |
678 | paddd xmm9, xmm14 |
679 | pxor xmm5, xmm10 |
680 | pxor xmm6, xmm11 |
681 | pxor xmm7, xmm8 |
682 | pxor xmm4, xmm9 |
683 | movdqa xmmword ptr [rsp+0x100], xmm8 |
684 | movdqa xmm8, xmm5 |
685 | psrld xmm8, 7 |
686 | pslld xmm5, 25 |
687 | por xmm5, xmm8 |
688 | movdqa xmm8, xmm6 |
689 | psrld xmm8, 7 |
690 | pslld xmm6, 25 |
691 | por xmm6, xmm8 |
692 | movdqa xmm8, xmm7 |
693 | psrld xmm8, 7 |
694 | pslld xmm7, 25 |
695 | por xmm7, xmm8 |
696 | movdqa xmm8, xmm4 |
697 | psrld xmm8, 7 |
698 | pslld xmm4, 25 |
699 | por xmm4, xmm8 |
700 | paddd xmm0, xmmword ptr [rsp+0xA0] |
701 | paddd xmm1, xmmword ptr [rsp+0xC0] |
702 | paddd xmm2, xmmword ptr [rsp+0xE0] |
703 | paddd xmm3, xmmword ptr [rsp+0xD0] |
704 | paddd xmm0, xmm4 |
705 | paddd xmm1, xmm5 |
706 | paddd xmm2, xmm6 |
707 | paddd xmm3, xmm7 |
708 | pxor xmm12, xmm0 |
709 | pxor xmm13, xmm1 |
710 | pxor xmm14, xmm2 |
711 | pxor xmm15, xmm3 |
712 | movdqa xmm8, xmmword ptr [ROT16+rip] |
713 | pshufb xmm12, xmm8 |
714 | pshufb xmm13, xmm8 |
715 | pshufb xmm14, xmm8 |
716 | pshufb xmm15, xmm8 |
717 | movdqa xmm8, xmmword ptr [rsp+0x100] |
718 | paddd xmm8, xmm12 |
719 | paddd xmm9, xmm13 |
720 | paddd xmm10, xmm14 |
721 | paddd xmm11, xmm15 |
722 | pxor xmm4, xmm8 |
723 | pxor xmm5, xmm9 |
724 | pxor xmm6, xmm10 |
725 | pxor xmm7, xmm11 |
726 | movdqa xmmword ptr [rsp+0x100], xmm8 |
727 | movdqa xmm8, xmm4 |
728 | psrld xmm8, 12 |
729 | pslld xmm4, 20 |
730 | por xmm4, xmm8 |
731 | movdqa xmm8, xmm5 |
732 | psrld xmm8, 12 |
733 | pslld xmm5, 20 |
734 | por xmm5, xmm8 |
735 | movdqa xmm8, xmm6 |
736 | psrld xmm8, 12 |
737 | pslld xmm6, 20 |
738 | por xmm6, xmm8 |
739 | movdqa xmm8, xmm7 |
740 | psrld xmm8, 12 |
741 | pslld xmm7, 20 |
742 | por xmm7, xmm8 |
743 | paddd xmm0, xmmword ptr [rsp+0x70] |
744 | paddd xmm1, xmmword ptr [rsp+0x90] |
745 | paddd xmm2, xmmword ptr [rsp+0x30] |
746 | paddd xmm3, xmmword ptr [rsp+0xF0] |
747 | paddd xmm0, xmm4 |
748 | paddd xmm1, xmm5 |
749 | paddd xmm2, xmm6 |
750 | paddd xmm3, xmm7 |
751 | pxor xmm12, xmm0 |
752 | pxor xmm13, xmm1 |
753 | pxor xmm14, xmm2 |
754 | pxor xmm15, xmm3 |
755 | movdqa xmm8, xmmword ptr [ROT8+rip] |
756 | pshufb xmm12, xmm8 |
757 | pshufb xmm13, xmm8 |
758 | pshufb xmm14, xmm8 |
759 | pshufb xmm15, xmm8 |
760 | movdqa xmm8, xmmword ptr [rsp+0x100] |
761 | paddd xmm8, xmm12 |
762 | paddd xmm9, xmm13 |
763 | paddd xmm10, xmm14 |
764 | paddd xmm11, xmm15 |
765 | pxor xmm4, xmm8 |
766 | pxor xmm5, xmm9 |
767 | pxor xmm6, xmm10 |
768 | pxor xmm7, xmm11 |
769 | movdqa xmmword ptr [rsp+0x100], xmm8 |
770 | movdqa xmm8, xmm4 |
771 | psrld xmm8, 7 |
772 | pslld xmm4, 25 |
773 | por xmm4, xmm8 |
774 | movdqa xmm8, xmm5 |
775 | psrld xmm8, 7 |
776 | pslld xmm5, 25 |
777 | por xmm5, xmm8 |
778 | movdqa xmm8, xmm6 |
779 | psrld xmm8, 7 |
780 | pslld xmm6, 25 |
781 | por xmm6, xmm8 |
782 | movdqa xmm8, xmm7 |
783 | psrld xmm8, 7 |
784 | pslld xmm7, 25 |
785 | por xmm7, xmm8 |
786 | paddd xmm0, xmmword ptr [rsp+0x40] |
787 | paddd xmm1, xmmword ptr [rsp+0xB0] |
788 | paddd xmm2, xmmword ptr [rsp+0x50] |
789 | paddd xmm3, xmmword ptr [rsp+0x10] |
790 | paddd xmm0, xmm5 |
791 | paddd xmm1, xmm6 |
792 | paddd xmm2, xmm7 |
793 | paddd xmm3, xmm4 |
794 | pxor xmm15, xmm0 |
795 | pxor xmm12, xmm1 |
796 | pxor xmm13, xmm2 |
797 | pxor xmm14, xmm3 |
798 | movdqa xmm8, xmmword ptr [ROT16+rip] |
799 | pshufb xmm15, xmm8 |
800 | pshufb xmm12, xmm8 |
801 | pshufb xmm13, xmm8 |
802 | pshufb xmm14, xmm8 |
803 | paddd xmm10, xmm15 |
804 | paddd xmm11, xmm12 |
805 | movdqa xmm8, xmmword ptr [rsp+0x100] |
806 | paddd xmm8, xmm13 |
807 | paddd xmm9, xmm14 |
808 | pxor xmm5, xmm10 |
809 | pxor xmm6, xmm11 |
810 | pxor xmm7, xmm8 |
811 | pxor xmm4, xmm9 |
812 | movdqa xmmword ptr [rsp+0x100], xmm8 |
813 | movdqa xmm8, xmm5 |
814 | psrld xmm8, 12 |
815 | pslld xmm5, 20 |
816 | por xmm5, xmm8 |
817 | movdqa xmm8, xmm6 |
818 | psrld xmm8, 12 |
819 | pslld xmm6, 20 |
820 | por xmm6, xmm8 |
821 | movdqa xmm8, xmm7 |
822 | psrld xmm8, 12 |
823 | pslld xmm7, 20 |
824 | por xmm7, xmm8 |
825 | movdqa xmm8, xmm4 |
826 | psrld xmm8, 12 |
827 | pslld xmm4, 20 |
828 | por xmm4, xmm8 |
829 | paddd xmm0, xmmword ptr [rsp] |
830 | paddd xmm1, xmmword ptr [rsp+0x20] |
831 | paddd xmm2, xmmword ptr [rsp+0x80] |
832 | paddd xmm3, xmmword ptr [rsp+0x60] |
833 | paddd xmm0, xmm5 |
834 | paddd xmm1, xmm6 |
835 | paddd xmm2, xmm7 |
836 | paddd xmm3, xmm4 |
837 | pxor xmm15, xmm0 |
838 | pxor xmm12, xmm1 |
839 | pxor xmm13, xmm2 |
840 | pxor xmm14, xmm3 |
841 | movdqa xmm8, xmmword ptr [ROT8+rip] |
842 | pshufb xmm15, xmm8 |
843 | pshufb xmm12, xmm8 |
844 | pshufb xmm13, xmm8 |
845 | pshufb xmm14, xmm8 |
846 | paddd xmm10, xmm15 |
847 | paddd xmm11, xmm12 |
848 | movdqa xmm8, xmmword ptr [rsp+0x100] |
849 | paddd xmm8, xmm13 |
850 | paddd xmm9, xmm14 |
851 | pxor xmm5, xmm10 |
852 | pxor xmm6, xmm11 |
853 | pxor xmm7, xmm8 |
854 | pxor xmm4, xmm9 |
855 | movdqa xmmword ptr [rsp+0x100], xmm8 |
856 | movdqa xmm8, xmm5 |
857 | psrld xmm8, 7 |
858 | pslld xmm5, 25 |
859 | por xmm5, xmm8 |
860 | movdqa xmm8, xmm6 |
861 | psrld xmm8, 7 |
862 | pslld xmm6, 25 |
863 | por xmm6, xmm8 |
864 | movdqa xmm8, xmm7 |
865 | psrld xmm8, 7 |
866 | pslld xmm7, 25 |
867 | por xmm7, xmm8 |
868 | movdqa xmm8, xmm4 |
869 | psrld xmm8, 7 |
870 | pslld xmm4, 25 |
871 | por xmm4, xmm8 |
872 | paddd xmm0, xmmword ptr [rsp+0xC0] |
873 | paddd xmm1, xmmword ptr [rsp+0x90] |
874 | paddd xmm2, xmmword ptr [rsp+0xF0] |
875 | paddd xmm3, xmmword ptr [rsp+0xE0] |
876 | paddd xmm0, xmm4 |
877 | paddd xmm1, xmm5 |
878 | paddd xmm2, xmm6 |
879 | paddd xmm3, xmm7 |
880 | pxor xmm12, xmm0 |
881 | pxor xmm13, xmm1 |
882 | pxor xmm14, xmm2 |
883 | pxor xmm15, xmm3 |
884 | movdqa xmm8, xmmword ptr [ROT16+rip] |
885 | pshufb xmm12, xmm8 |
886 | pshufb xmm13, xmm8 |
887 | pshufb xmm14, xmm8 |
888 | pshufb xmm15, xmm8 |
889 | movdqa xmm8, xmmword ptr [rsp+0x100] |
890 | paddd xmm8, xmm12 |
891 | paddd xmm9, xmm13 |
892 | paddd xmm10, xmm14 |
893 | paddd xmm11, xmm15 |
894 | pxor xmm4, xmm8 |
895 | pxor xmm5, xmm9 |
896 | pxor xmm6, xmm10 |
897 | pxor xmm7, xmm11 |
898 | movdqa xmmword ptr [rsp+0x100], xmm8 |
899 | movdqa xmm8, xmm4 |
900 | psrld xmm8, 12 |
901 | pslld xmm4, 20 |
902 | por xmm4, xmm8 |
903 | movdqa xmm8, xmm5 |
904 | psrld xmm8, 12 |
905 | pslld xmm5, 20 |
906 | por xmm5, xmm8 |
907 | movdqa xmm8, xmm6 |
908 | psrld xmm8, 12 |
909 | pslld xmm6, 20 |
910 | por xmm6, xmm8 |
911 | movdqa xmm8, xmm7 |
912 | psrld xmm8, 12 |
913 | pslld xmm7, 20 |
914 | por xmm7, xmm8 |
915 | paddd xmm0, xmmword ptr [rsp+0xD0] |
916 | paddd xmm1, xmmword ptr [rsp+0xB0] |
917 | paddd xmm2, xmmword ptr [rsp+0xA0] |
918 | paddd xmm3, xmmword ptr [rsp+0x80] |
919 | paddd xmm0, xmm4 |
920 | paddd xmm1, xmm5 |
921 | paddd xmm2, xmm6 |
922 | paddd xmm3, xmm7 |
923 | pxor xmm12, xmm0 |
924 | pxor xmm13, xmm1 |
925 | pxor xmm14, xmm2 |
926 | pxor xmm15, xmm3 |
927 | movdqa xmm8, xmmword ptr [ROT8+rip] |
928 | pshufb xmm12, xmm8 |
929 | pshufb xmm13, xmm8 |
930 | pshufb xmm14, xmm8 |
931 | pshufb xmm15, xmm8 |
932 | movdqa xmm8, xmmword ptr [rsp+0x100] |
933 | paddd xmm8, xmm12 |
934 | paddd xmm9, xmm13 |
935 | paddd xmm10, xmm14 |
936 | paddd xmm11, xmm15 |
937 | pxor xmm4, xmm8 |
938 | pxor xmm5, xmm9 |
939 | pxor xmm6, xmm10 |
940 | pxor xmm7, xmm11 |
941 | movdqa xmmword ptr [rsp+0x100], xmm8 |
942 | movdqa xmm8, xmm4 |
943 | psrld xmm8, 7 |
944 | pslld xmm4, 25 |
945 | por xmm4, xmm8 |
946 | movdqa xmm8, xmm5 |
947 | psrld xmm8, 7 |
948 | pslld xmm5, 25 |
949 | por xmm5, xmm8 |
950 | movdqa xmm8, xmm6 |
951 | psrld xmm8, 7 |
952 | pslld xmm6, 25 |
953 | por xmm6, xmm8 |
954 | movdqa xmm8, xmm7 |
955 | psrld xmm8, 7 |
956 | pslld xmm7, 25 |
957 | por xmm7, xmm8 |
958 | paddd xmm0, xmmword ptr [rsp+0x70] |
959 | paddd xmm1, xmmword ptr [rsp+0x50] |
960 | paddd xmm2, xmmword ptr [rsp] |
961 | paddd xmm3, xmmword ptr [rsp+0x60] |
962 | paddd xmm0, xmm5 |
963 | paddd xmm1, xmm6 |
964 | paddd xmm2, xmm7 |
965 | paddd xmm3, xmm4 |
966 | pxor xmm15, xmm0 |
967 | pxor xmm12, xmm1 |
968 | pxor xmm13, xmm2 |
969 | pxor xmm14, xmm3 |
970 | movdqa xmm8, xmmword ptr [ROT16+rip] |
971 | pshufb xmm15, xmm8 |
972 | pshufb xmm12, xmm8 |
973 | pshufb xmm13, xmm8 |
974 | pshufb xmm14, xmm8 |
975 | paddd xmm10, xmm15 |
976 | paddd xmm11, xmm12 |
977 | movdqa xmm8, xmmword ptr [rsp+0x100] |
978 | paddd xmm8, xmm13 |
979 | paddd xmm9, xmm14 |
980 | pxor xmm5, xmm10 |
981 | pxor xmm6, xmm11 |
982 | pxor xmm7, xmm8 |
983 | pxor xmm4, xmm9 |
984 | movdqa xmmword ptr [rsp+0x100], xmm8 |
985 | movdqa xmm8, xmm5 |
986 | psrld xmm8, 12 |
987 | pslld xmm5, 20 |
988 | por xmm5, xmm8 |
989 | movdqa xmm8, xmm6 |
990 | psrld xmm8, 12 |
991 | pslld xmm6, 20 |
992 | por xmm6, xmm8 |
993 | movdqa xmm8, xmm7 |
994 | psrld xmm8, 12 |
995 | pslld xmm7, 20 |
996 | por xmm7, xmm8 |
997 | movdqa xmm8, xmm4 |
998 | psrld xmm8, 12 |
999 | pslld xmm4, 20 |
1000 | por xmm4, xmm8 |
1001 | paddd xmm0, xmmword ptr [rsp+0x20] |
1002 | paddd xmm1, xmmword ptr [rsp+0x30] |
1003 | paddd xmm2, xmmword ptr [rsp+0x10] |
1004 | paddd xmm3, xmmword ptr [rsp+0x40] |
1005 | paddd xmm0, xmm5 |
1006 | paddd xmm1, xmm6 |
1007 | paddd xmm2, xmm7 |
1008 | paddd xmm3, xmm4 |
1009 | pxor xmm15, xmm0 |
1010 | pxor xmm12, xmm1 |
1011 | pxor xmm13, xmm2 |
1012 | pxor xmm14, xmm3 |
1013 | movdqa xmm8, xmmword ptr [ROT8+rip] |
1014 | pshufb xmm15, xmm8 |
1015 | pshufb xmm12, xmm8 |
1016 | pshufb xmm13, xmm8 |
1017 | pshufb xmm14, xmm8 |
1018 | paddd xmm10, xmm15 |
1019 | paddd xmm11, xmm12 |
1020 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1021 | paddd xmm8, xmm13 |
1022 | paddd xmm9, xmm14 |
1023 | pxor xmm5, xmm10 |
1024 | pxor xmm6, xmm11 |
1025 | pxor xmm7, xmm8 |
1026 | pxor xmm4, xmm9 |
1027 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1028 | movdqa xmm8, xmm5 |
1029 | psrld xmm8, 7 |
1030 | pslld xmm5, 25 |
1031 | por xmm5, xmm8 |
1032 | movdqa xmm8, xmm6 |
1033 | psrld xmm8, 7 |
1034 | pslld xmm6, 25 |
1035 | por xmm6, xmm8 |
1036 | movdqa xmm8, xmm7 |
1037 | psrld xmm8, 7 |
1038 | pslld xmm7, 25 |
1039 | por xmm7, xmm8 |
1040 | movdqa xmm8, xmm4 |
1041 | psrld xmm8, 7 |
1042 | pslld xmm4, 25 |
1043 | por xmm4, xmm8 |
1044 | paddd xmm0, xmmword ptr [rsp+0x90] |
1045 | paddd xmm1, xmmword ptr [rsp+0xB0] |
1046 | paddd xmm2, xmmword ptr [rsp+0x80] |
1047 | paddd xmm3, xmmword ptr [rsp+0xF0] |
1048 | paddd xmm0, xmm4 |
1049 | paddd xmm1, xmm5 |
1050 | paddd xmm2, xmm6 |
1051 | paddd xmm3, xmm7 |
1052 | pxor xmm12, xmm0 |
1053 | pxor xmm13, xmm1 |
1054 | pxor xmm14, xmm2 |
1055 | pxor xmm15, xmm3 |
1056 | movdqa xmm8, xmmword ptr [ROT16+rip] |
1057 | pshufb xmm12, xmm8 |
1058 | pshufb xmm13, xmm8 |
1059 | pshufb xmm14, xmm8 |
1060 | pshufb xmm15, xmm8 |
1061 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1062 | paddd xmm8, xmm12 |
1063 | paddd xmm9, xmm13 |
1064 | paddd xmm10, xmm14 |
1065 | paddd xmm11, xmm15 |
1066 | pxor xmm4, xmm8 |
1067 | pxor xmm5, xmm9 |
1068 | pxor xmm6, xmm10 |
1069 | pxor xmm7, xmm11 |
1070 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1071 | movdqa xmm8, xmm4 |
1072 | psrld xmm8, 12 |
1073 | pslld xmm4, 20 |
1074 | por xmm4, xmm8 |
1075 | movdqa xmm8, xmm5 |
1076 | psrld xmm8, 12 |
1077 | pslld xmm5, 20 |
1078 | por xmm5, xmm8 |
1079 | movdqa xmm8, xmm6 |
1080 | psrld xmm8, 12 |
1081 | pslld xmm6, 20 |
1082 | por xmm6, xmm8 |
1083 | movdqa xmm8, xmm7 |
1084 | psrld xmm8, 12 |
1085 | pslld xmm7, 20 |
1086 | por xmm7, xmm8 |
1087 | paddd xmm0, xmmword ptr [rsp+0xE0] |
1088 | paddd xmm1, xmmword ptr [rsp+0x50] |
1089 | paddd xmm2, xmmword ptr [rsp+0xC0] |
1090 | paddd xmm3, xmmword ptr [rsp+0x10] |
1091 | paddd xmm0, xmm4 |
1092 | paddd xmm1, xmm5 |
1093 | paddd xmm2, xmm6 |
1094 | paddd xmm3, xmm7 |
1095 | pxor xmm12, xmm0 |
1096 | pxor xmm13, xmm1 |
1097 | pxor xmm14, xmm2 |
1098 | pxor xmm15, xmm3 |
1099 | movdqa xmm8, xmmword ptr [ROT8+rip] |
1100 | pshufb xmm12, xmm8 |
1101 | pshufb xmm13, xmm8 |
1102 | pshufb xmm14, xmm8 |
1103 | pshufb xmm15, xmm8 |
1104 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1105 | paddd xmm8, xmm12 |
1106 | paddd xmm9, xmm13 |
1107 | paddd xmm10, xmm14 |
1108 | paddd xmm11, xmm15 |
1109 | pxor xmm4, xmm8 |
1110 | pxor xmm5, xmm9 |
1111 | pxor xmm6, xmm10 |
1112 | pxor xmm7, xmm11 |
1113 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1114 | movdqa xmm8, xmm4 |
1115 | psrld xmm8, 7 |
1116 | pslld xmm4, 25 |
1117 | por xmm4, xmm8 |
1118 | movdqa xmm8, xmm5 |
1119 | psrld xmm8, 7 |
1120 | pslld xmm5, 25 |
1121 | por xmm5, xmm8 |
1122 | movdqa xmm8, xmm6 |
1123 | psrld xmm8, 7 |
1124 | pslld xmm6, 25 |
1125 | por xmm6, xmm8 |
1126 | movdqa xmm8, xmm7 |
1127 | psrld xmm8, 7 |
1128 | pslld xmm7, 25 |
1129 | por xmm7, xmm8 |
1130 | paddd xmm0, xmmword ptr [rsp+0xD0] |
1131 | paddd xmm1, xmmword ptr [rsp] |
1132 | paddd xmm2, xmmword ptr [rsp+0x20] |
1133 | paddd xmm3, xmmword ptr [rsp+0x40] |
1134 | paddd xmm0, xmm5 |
1135 | paddd xmm1, xmm6 |
1136 | paddd xmm2, xmm7 |
1137 | paddd xmm3, xmm4 |
1138 | pxor xmm15, xmm0 |
1139 | pxor xmm12, xmm1 |
1140 | pxor xmm13, xmm2 |
1141 | pxor xmm14, xmm3 |
1142 | movdqa xmm8, xmmword ptr [ROT16+rip] |
1143 | pshufb xmm15, xmm8 |
1144 | pshufb xmm12, xmm8 |
1145 | pshufb xmm13, xmm8 |
1146 | pshufb xmm14, xmm8 |
1147 | paddd xmm10, xmm15 |
1148 | paddd xmm11, xmm12 |
1149 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1150 | paddd xmm8, xmm13 |
1151 | paddd xmm9, xmm14 |
1152 | pxor xmm5, xmm10 |
1153 | pxor xmm6, xmm11 |
1154 | pxor xmm7, xmm8 |
1155 | pxor xmm4, xmm9 |
1156 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1157 | movdqa xmm8, xmm5 |
1158 | psrld xmm8, 12 |
1159 | pslld xmm5, 20 |
1160 | por xmm5, xmm8 |
1161 | movdqa xmm8, xmm6 |
1162 | psrld xmm8, 12 |
1163 | pslld xmm6, 20 |
1164 | por xmm6, xmm8 |
1165 | movdqa xmm8, xmm7 |
1166 | psrld xmm8, 12 |
1167 | pslld xmm7, 20 |
1168 | por xmm7, xmm8 |
1169 | movdqa xmm8, xmm4 |
1170 | psrld xmm8, 12 |
1171 | pslld xmm4, 20 |
1172 | por xmm4, xmm8 |
1173 | paddd xmm0, xmmword ptr [rsp+0x30] |
1174 | paddd xmm1, xmmword ptr [rsp+0xA0] |
1175 | paddd xmm2, xmmword ptr [rsp+0x60] |
1176 | paddd xmm3, xmmword ptr [rsp+0x70] |
1177 | paddd xmm0, xmm5 |
1178 | paddd xmm1, xmm6 |
1179 | paddd xmm2, xmm7 |
1180 | paddd xmm3, xmm4 |
1181 | pxor xmm15, xmm0 |
1182 | pxor xmm12, xmm1 |
1183 | pxor xmm13, xmm2 |
1184 | pxor xmm14, xmm3 |
1185 | movdqa xmm8, xmmword ptr [ROT8+rip] |
1186 | pshufb xmm15, xmm8 |
1187 | pshufb xmm12, xmm8 |
1188 | pshufb xmm13, xmm8 |
1189 | pshufb xmm14, xmm8 |
1190 | paddd xmm10, xmm15 |
1191 | paddd xmm11, xmm12 |
1192 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1193 | paddd xmm8, xmm13 |
1194 | paddd xmm9, xmm14 |
1195 | pxor xmm5, xmm10 |
1196 | pxor xmm6, xmm11 |
1197 | pxor xmm7, xmm8 |
1198 | pxor xmm4, xmm9 |
1199 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1200 | movdqa xmm8, xmm5 |
1201 | psrld xmm8, 7 |
1202 | pslld xmm5, 25 |
1203 | por xmm5, xmm8 |
1204 | movdqa xmm8, xmm6 |
1205 | psrld xmm8, 7 |
1206 | pslld xmm6, 25 |
1207 | por xmm6, xmm8 |
1208 | movdqa xmm8, xmm7 |
1209 | psrld xmm8, 7 |
1210 | pslld xmm7, 25 |
1211 | por xmm7, xmm8 |
1212 | movdqa xmm8, xmm4 |
1213 | psrld xmm8, 7 |
1214 | pslld xmm4, 25 |
1215 | por xmm4, xmm8 |
1216 | paddd xmm0, xmmword ptr [rsp+0xB0] |
1217 | paddd xmm1, xmmword ptr [rsp+0x50] |
1218 | paddd xmm2, xmmword ptr [rsp+0x10] |
1219 | paddd xmm3, xmmword ptr [rsp+0x80] |
1220 | paddd xmm0, xmm4 |
1221 | paddd xmm1, xmm5 |
1222 | paddd xmm2, xmm6 |
1223 | paddd xmm3, xmm7 |
1224 | pxor xmm12, xmm0 |
1225 | pxor xmm13, xmm1 |
1226 | pxor xmm14, xmm2 |
1227 | pxor xmm15, xmm3 |
1228 | movdqa xmm8, xmmword ptr [ROT16+rip] |
1229 | pshufb xmm12, xmm8 |
1230 | pshufb xmm13, xmm8 |
1231 | pshufb xmm14, xmm8 |
1232 | pshufb xmm15, xmm8 |
1233 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1234 | paddd xmm8, xmm12 |
1235 | paddd xmm9, xmm13 |
1236 | paddd xmm10, xmm14 |
1237 | paddd xmm11, xmm15 |
1238 | pxor xmm4, xmm8 |
1239 | pxor xmm5, xmm9 |
1240 | pxor xmm6, xmm10 |
1241 | pxor xmm7, xmm11 |
1242 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1243 | movdqa xmm8, xmm4 |
1244 | psrld xmm8, 12 |
1245 | pslld xmm4, 20 |
1246 | por xmm4, xmm8 |
1247 | movdqa xmm8, xmm5 |
1248 | psrld xmm8, 12 |
1249 | pslld xmm5, 20 |
1250 | por xmm5, xmm8 |
1251 | movdqa xmm8, xmm6 |
1252 | psrld xmm8, 12 |
1253 | pslld xmm6, 20 |
1254 | por xmm6, xmm8 |
1255 | movdqa xmm8, xmm7 |
1256 | psrld xmm8, 12 |
1257 | pslld xmm7, 20 |
1258 | por xmm7, xmm8 |
1259 | paddd xmm0, xmmword ptr [rsp+0xF0] |
1260 | paddd xmm1, xmmword ptr [rsp] |
1261 | paddd xmm2, xmmword ptr [rsp+0x90] |
1262 | paddd xmm3, xmmword ptr [rsp+0x60] |
1263 | paddd xmm0, xmm4 |
1264 | paddd xmm1, xmm5 |
1265 | paddd xmm2, xmm6 |
1266 | paddd xmm3, xmm7 |
1267 | pxor xmm12, xmm0 |
1268 | pxor xmm13, xmm1 |
1269 | pxor xmm14, xmm2 |
1270 | pxor xmm15, xmm3 |
1271 | movdqa xmm8, xmmword ptr [ROT8+rip] |
1272 | pshufb xmm12, xmm8 |
1273 | pshufb xmm13, xmm8 |
1274 | pshufb xmm14, xmm8 |
1275 | pshufb xmm15, xmm8 |
1276 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1277 | paddd xmm8, xmm12 |
1278 | paddd xmm9, xmm13 |
1279 | paddd xmm10, xmm14 |
1280 | paddd xmm11, xmm15 |
1281 | pxor xmm4, xmm8 |
1282 | pxor xmm5, xmm9 |
1283 | pxor xmm6, xmm10 |
1284 | pxor xmm7, xmm11 |
1285 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1286 | movdqa xmm8, xmm4 |
1287 | psrld xmm8, 7 |
1288 | pslld xmm4, 25 |
1289 | por xmm4, xmm8 |
1290 | movdqa xmm8, xmm5 |
1291 | psrld xmm8, 7 |
1292 | pslld xmm5, 25 |
1293 | por xmm5, xmm8 |
1294 | movdqa xmm8, xmm6 |
1295 | psrld xmm8, 7 |
1296 | pslld xmm6, 25 |
1297 | por xmm6, xmm8 |
1298 | movdqa xmm8, xmm7 |
1299 | psrld xmm8, 7 |
1300 | pslld xmm7, 25 |
1301 | por xmm7, xmm8 |
1302 | paddd xmm0, xmmword ptr [rsp+0xE0] |
1303 | paddd xmm1, xmmword ptr [rsp+0x20] |
1304 | paddd xmm2, xmmword ptr [rsp+0x30] |
1305 | paddd xmm3, xmmword ptr [rsp+0x70] |
1306 | paddd xmm0, xmm5 |
1307 | paddd xmm1, xmm6 |
1308 | paddd xmm2, xmm7 |
1309 | paddd xmm3, xmm4 |
1310 | pxor xmm15, xmm0 |
1311 | pxor xmm12, xmm1 |
1312 | pxor xmm13, xmm2 |
1313 | pxor xmm14, xmm3 |
1314 | movdqa xmm8, xmmword ptr [ROT16+rip] |
1315 | pshufb xmm15, xmm8 |
1316 | pshufb xmm12, xmm8 |
1317 | pshufb xmm13, xmm8 |
1318 | pshufb xmm14, xmm8 |
1319 | paddd xmm10, xmm15 |
1320 | paddd xmm11, xmm12 |
1321 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1322 | paddd xmm8, xmm13 |
1323 | paddd xmm9, xmm14 |
1324 | pxor xmm5, xmm10 |
1325 | pxor xmm6, xmm11 |
1326 | pxor xmm7, xmm8 |
1327 | pxor xmm4, xmm9 |
1328 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1329 | movdqa xmm8, xmm5 |
1330 | psrld xmm8, 12 |
1331 | pslld xmm5, 20 |
1332 | por xmm5, xmm8 |
1333 | movdqa xmm8, xmm6 |
1334 | psrld xmm8, 12 |
1335 | pslld xmm6, 20 |
1336 | por xmm6, xmm8 |
1337 | movdqa xmm8, xmm7 |
1338 | psrld xmm8, 12 |
1339 | pslld xmm7, 20 |
1340 | por xmm7, xmm8 |
1341 | movdqa xmm8, xmm4 |
1342 | psrld xmm8, 12 |
1343 | pslld xmm4, 20 |
1344 | por xmm4, xmm8 |
1345 | paddd xmm0, xmmword ptr [rsp+0xA0] |
1346 | paddd xmm1, xmmword ptr [rsp+0xC0] |
1347 | paddd xmm2, xmmword ptr [rsp+0x40] |
1348 | paddd xmm3, xmmword ptr [rsp+0xD0] |
1349 | paddd xmm0, xmm5 |
1350 | paddd xmm1, xmm6 |
1351 | paddd xmm2, xmm7 |
1352 | paddd xmm3, xmm4 |
1353 | pxor xmm15, xmm0 |
1354 | pxor xmm12, xmm1 |
1355 | pxor xmm13, xmm2 |
1356 | pxor xmm14, xmm3 |
1357 | movdqa xmm8, xmmword ptr [ROT8+rip] |
1358 | pshufb xmm15, xmm8 |
1359 | pshufb xmm12, xmm8 |
1360 | pshufb xmm13, xmm8 |
1361 | pshufb xmm14, xmm8 |
1362 | paddd xmm10, xmm15 |
1363 | paddd xmm11, xmm12 |
1364 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1365 | paddd xmm8, xmm13 |
1366 | paddd xmm9, xmm14 |
1367 | pxor xmm5, xmm10 |
1368 | pxor xmm6, xmm11 |
1369 | pxor xmm7, xmm8 |
1370 | pxor xmm4, xmm9 |
1371 | pxor xmm0, xmm8 |
1372 | pxor xmm1, xmm9 |
1373 | pxor xmm2, xmm10 |
1374 | pxor xmm3, xmm11 |
1375 | movdqa xmm8, xmm5 |
1376 | psrld xmm8, 7 |
1377 | pslld xmm5, 25 |
1378 | por xmm5, xmm8 |
1379 | movdqa xmm8, xmm6 |
1380 | psrld xmm8, 7 |
1381 | pslld xmm6, 25 |
1382 | por xmm6, xmm8 |
1383 | movdqa xmm8, xmm7 |
1384 | psrld xmm8, 7 |
1385 | pslld xmm7, 25 |
1386 | por xmm7, xmm8 |
1387 | movdqa xmm8, xmm4 |
1388 | psrld xmm8, 7 |
1389 | pslld xmm4, 25 |
1390 | por xmm4, xmm8 |
1391 | pxor xmm4, xmm12 |
1392 | pxor xmm5, xmm13 |
1393 | pxor xmm6, xmm14 |
1394 | pxor xmm7, xmm15 |
1395 | mov eax, r13d |
1396 | jne 9b |
1397 | movdqa xmm9, xmm0 |
1398 | punpckldq xmm0, xmm1 |
1399 | punpckhdq xmm9, xmm1 |
1400 | movdqa xmm11, xmm2 |
1401 | punpckldq xmm2, xmm3 |
1402 | punpckhdq xmm11, xmm3 |
1403 | movdqa xmm1, xmm0 |
1404 | punpcklqdq xmm0, xmm2 |
1405 | punpckhqdq xmm1, xmm2 |
1406 | movdqa xmm3, xmm9 |
1407 | punpcklqdq xmm9, xmm11 |
1408 | punpckhqdq xmm3, xmm11 |
1409 | movdqu xmmword ptr [rbx], xmm0 |
1410 | movdqu xmmword ptr [rbx+0x20], xmm1 |
1411 | movdqu xmmword ptr [rbx+0x40], xmm9 |
1412 | movdqu xmmword ptr [rbx+0x60], xmm3 |
1413 | movdqa xmm9, xmm4 |
1414 | punpckldq xmm4, xmm5 |
1415 | punpckhdq xmm9, xmm5 |
1416 | movdqa xmm11, xmm6 |
1417 | punpckldq xmm6, xmm7 |
1418 | punpckhdq xmm11, xmm7 |
1419 | movdqa xmm5, xmm4 |
1420 | punpcklqdq xmm4, xmm6 |
1421 | punpckhqdq xmm5, xmm6 |
1422 | movdqa xmm7, xmm9 |
1423 | punpcklqdq xmm9, xmm11 |
1424 | punpckhqdq xmm7, xmm11 |
1425 | movdqu xmmword ptr [rbx+0x10], xmm4 |
1426 | movdqu xmmword ptr [rbx+0x30], xmm5 |
1427 | movdqu xmmword ptr [rbx+0x50], xmm9 |
1428 | movdqu xmmword ptr [rbx+0x70], xmm7 |
1429 | movdqa xmm1, xmmword ptr [rsp+0x110] |
1430 | movdqa xmm0, xmm1 |
1431 | paddd xmm1, xmmword ptr [rsp+0x150] |
1432 | movdqa xmmword ptr [rsp+0x110], xmm1 |
1433 | pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
1434 | pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
1435 | pcmpgtd xmm0, xmm1 |
1436 | movdqa xmm1, xmmword ptr [rsp+0x120] |
1437 | psubd xmm1, xmm0 |
1438 | movdqa xmmword ptr [rsp+0x120], xmm1 |
1439 | add rbx, 128 |
1440 | add rdi, 32 |
1441 | sub rsi, 4 |
1442 | cmp rsi, 4 |
1443 | jnc 2b |
1444 | test rsi, rsi |
1445 | jne 3f |
1446 | 4: |
1447 | movdqa xmm6, xmmword ptr [rsp+0x170] |
1448 | movdqa xmm7, xmmword ptr [rsp+0x180] |
1449 | movdqa xmm8, xmmword ptr [rsp+0x190] |
1450 | movdqa xmm9, xmmword ptr [rsp+0x1A0] |
1451 | movdqa xmm10, xmmword ptr [rsp+0x1B0] |
1452 | movdqa xmm11, xmmword ptr [rsp+0x1C0] |
1453 | movdqa xmm12, xmmword ptr [rsp+0x1D0] |
1454 | movdqa xmm13, xmmword ptr [rsp+0x1E0] |
1455 | movdqa xmm14, xmmword ptr [rsp+0x1F0] |
1456 | movdqa xmm15, xmmword ptr [rsp+0x200] |
1457 | mov rsp, rbp |
1458 | pop rbp |
1459 | pop rbx |
1460 | pop rdi |
1461 | pop rsi |
1462 | pop r12 |
1463 | pop r13 |
1464 | pop r14 |
1465 | pop r15 |
1466 | ret |
1467 | .p2align 5 |
1468 | 3: |
1469 | test esi, 0x2 |
1470 | je 3f |
1471 | movups xmm0, xmmword ptr [rcx] |
1472 | movups xmm1, xmmword ptr [rcx+0x10] |
1473 | movaps xmm8, xmm0 |
1474 | movaps xmm9, xmm1 |
1475 | movd xmm13, dword ptr [rsp+0x110] |
1476 | pinsrd xmm13, dword ptr [rsp+0x120], 1 |
1477 | pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
1478 | movaps xmmword ptr [rsp], xmm13 |
1479 | movd xmm14, dword ptr [rsp+0x114] |
1480 | pinsrd xmm14, dword ptr [rsp+0x124], 1 |
1481 | pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
1482 | movaps xmmword ptr [rsp+0x10], xmm14 |
1483 | mov r8, qword ptr [rdi] |
1484 | mov r9, qword ptr [rdi+0x8] |
1485 | movzx eax, byte ptr [rbp+0x80] |
1486 | or eax, r13d |
1487 | xor edx, edx |
1488 | 2: |
1489 | mov r14d, eax |
1490 | or eax, r12d |
1491 | add rdx, 64 |
1492 | cmp rdx, r15 |
1493 | cmovne eax, r14d |
1494 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1495 | movaps xmm10, xmm2 |
1496 | movups xmm4, xmmword ptr [r8+rdx-0x40] |
1497 | movups xmm5, xmmword ptr [r8+rdx-0x30] |
1498 | movaps xmm3, xmm4 |
1499 | shufps xmm4, xmm5, 136 |
1500 | shufps xmm3, xmm5, 221 |
1501 | movaps xmm5, xmm3 |
1502 | movups xmm6, xmmword ptr [r8+rdx-0x20] |
1503 | movups xmm7, xmmword ptr [r8+rdx-0x10] |
1504 | movaps xmm3, xmm6 |
1505 | shufps xmm6, xmm7, 136 |
1506 | pshufd xmm6, xmm6, 0x93 |
1507 | shufps xmm3, xmm7, 221 |
1508 | pshufd xmm7, xmm3, 0x93 |
1509 | movups xmm12, xmmword ptr [r9+rdx-0x40] |
1510 | movups xmm13, xmmword ptr [r9+rdx-0x30] |
1511 | movaps xmm11, xmm12 |
1512 | shufps xmm12, xmm13, 136 |
1513 | shufps xmm11, xmm13, 221 |
1514 | movaps xmm13, xmm11 |
1515 | movups xmm14, xmmword ptr [r9+rdx-0x20] |
1516 | movups xmm15, xmmword ptr [r9+rdx-0x10] |
1517 | movaps xmm11, xmm14 |
1518 | shufps xmm14, xmm15, 136 |
1519 | pshufd xmm14, xmm14, 0x93 |
1520 | shufps xmm11, xmm15, 221 |
1521 | pshufd xmm15, xmm11, 0x93 |
1522 | movaps xmm3, xmmword ptr [rsp] |
1523 | movaps xmm11, xmmword ptr [rsp+0x10] |
1524 | pinsrd xmm3, eax, 3 |
1525 | pinsrd xmm11, eax, 3 |
1526 | mov al, 7 |
1527 | 9: |
1528 | paddd xmm0, xmm4 |
1529 | paddd xmm8, xmm12 |
1530 | movaps xmmword ptr [rsp+0x20], xmm4 |
1531 | movaps xmmword ptr [rsp+0x30], xmm12 |
1532 | paddd xmm0, xmm1 |
1533 | paddd xmm8, xmm9 |
1534 | pxor xmm3, xmm0 |
1535 | pxor xmm11, xmm8 |
1536 | movaps xmm12, xmmword ptr [ROT16+rip] |
1537 | pshufb xmm3, xmm12 |
1538 | pshufb xmm11, xmm12 |
1539 | paddd xmm2, xmm3 |
1540 | paddd xmm10, xmm11 |
1541 | pxor xmm1, xmm2 |
1542 | pxor xmm9, xmm10 |
1543 | movdqa xmm4, xmm1 |
1544 | pslld xmm1, 20 |
1545 | psrld xmm4, 12 |
1546 | por xmm1, xmm4 |
1547 | movdqa xmm4, xmm9 |
1548 | pslld xmm9, 20 |
1549 | psrld xmm4, 12 |
1550 | por xmm9, xmm4 |
1551 | paddd xmm0, xmm5 |
1552 | paddd xmm8, xmm13 |
1553 | movaps xmmword ptr [rsp+0x40], xmm5 |
1554 | movaps xmmword ptr [rsp+0x50], xmm13 |
1555 | paddd xmm0, xmm1 |
1556 | paddd xmm8, xmm9 |
1557 | pxor xmm3, xmm0 |
1558 | pxor xmm11, xmm8 |
1559 | movaps xmm13, xmmword ptr [ROT8+rip] |
1560 | pshufb xmm3, xmm13 |
1561 | pshufb xmm11, xmm13 |
1562 | paddd xmm2, xmm3 |
1563 | paddd xmm10, xmm11 |
1564 | pxor xmm1, xmm2 |
1565 | pxor xmm9, xmm10 |
1566 | movdqa xmm4, xmm1 |
1567 | pslld xmm1, 25 |
1568 | psrld xmm4, 7 |
1569 | por xmm1, xmm4 |
1570 | movdqa xmm4, xmm9 |
1571 | pslld xmm9, 25 |
1572 | psrld xmm4, 7 |
1573 | por xmm9, xmm4 |
1574 | pshufd xmm0, xmm0, 0x93 |
1575 | pshufd xmm8, xmm8, 0x93 |
1576 | pshufd xmm3, xmm3, 0x4E |
1577 | pshufd xmm11, xmm11, 0x4E |
1578 | pshufd xmm2, xmm2, 0x39 |
1579 | pshufd xmm10, xmm10, 0x39 |
1580 | paddd xmm0, xmm6 |
1581 | paddd xmm8, xmm14 |
1582 | paddd xmm0, xmm1 |
1583 | paddd xmm8, xmm9 |
1584 | pxor xmm3, xmm0 |
1585 | pxor xmm11, xmm8 |
1586 | pshufb xmm3, xmm12 |
1587 | pshufb xmm11, xmm12 |
1588 | paddd xmm2, xmm3 |
1589 | paddd xmm10, xmm11 |
1590 | pxor xmm1, xmm2 |
1591 | pxor xmm9, xmm10 |
1592 | movdqa xmm4, xmm1 |
1593 | pslld xmm1, 20 |
1594 | psrld xmm4, 12 |
1595 | por xmm1, xmm4 |
1596 | movdqa xmm4, xmm9 |
1597 | pslld xmm9, 20 |
1598 | psrld xmm4, 12 |
1599 | por xmm9, xmm4 |
1600 | paddd xmm0, xmm7 |
1601 | paddd xmm8, xmm15 |
1602 | paddd xmm0, xmm1 |
1603 | paddd xmm8, xmm9 |
1604 | pxor xmm3, xmm0 |
1605 | pxor xmm11, xmm8 |
1606 | pshufb xmm3, xmm13 |
1607 | pshufb xmm11, xmm13 |
1608 | paddd xmm2, xmm3 |
1609 | paddd xmm10, xmm11 |
1610 | pxor xmm1, xmm2 |
1611 | pxor xmm9, xmm10 |
1612 | movdqa xmm4, xmm1 |
1613 | pslld xmm1, 25 |
1614 | psrld xmm4, 7 |
1615 | por xmm1, xmm4 |
1616 | movdqa xmm4, xmm9 |
1617 | pslld xmm9, 25 |
1618 | psrld xmm4, 7 |
1619 | por xmm9, xmm4 |
1620 | pshufd xmm0, xmm0, 0x39 |
1621 | pshufd xmm8, xmm8, 0x39 |
1622 | pshufd xmm3, xmm3, 0x4E |
1623 | pshufd xmm11, xmm11, 0x4E |
1624 | pshufd xmm2, xmm2, 0x93 |
1625 | pshufd xmm10, xmm10, 0x93 |
1626 | dec al |
1627 | je 9f |
1628 | movdqa xmm12, xmmword ptr [rsp+0x20] |
1629 | movdqa xmm5, xmmword ptr [rsp+0x40] |
1630 | pshufd xmm13, xmm12, 0x0F |
1631 | shufps xmm12, xmm5, 214 |
1632 | pshufd xmm4, xmm12, 0x39 |
1633 | movdqa xmm12, xmm6 |
1634 | shufps xmm12, xmm7, 250 |
1635 | pblendw xmm13, xmm12, 0xCC |
1636 | movdqa xmm12, xmm7 |
1637 | punpcklqdq xmm12, xmm5 |
1638 | pblendw xmm12, xmm6, 0xC0 |
1639 | pshufd xmm12, xmm12, 0x78 |
1640 | punpckhdq xmm5, xmm7 |
1641 | punpckldq xmm6, xmm5 |
1642 | pshufd xmm7, xmm6, 0x1E |
1643 | movdqa xmmword ptr [rsp+0x20], xmm13 |
1644 | movdqa xmmword ptr [rsp+0x40], xmm12 |
1645 | movdqa xmm5, xmmword ptr [rsp+0x30] |
1646 | movdqa xmm13, xmmword ptr [rsp+0x50] |
1647 | pshufd xmm6, xmm5, 0x0F |
1648 | shufps xmm5, xmm13, 214 |
1649 | pshufd xmm12, xmm5, 0x39 |
1650 | movdqa xmm5, xmm14 |
1651 | shufps xmm5, xmm15, 250 |
1652 | pblendw xmm6, xmm5, 0xCC |
1653 | movdqa xmm5, xmm15 |
1654 | punpcklqdq xmm5, xmm13 |
1655 | pblendw xmm5, xmm14, 0xC0 |
1656 | pshufd xmm5, xmm5, 0x78 |
1657 | punpckhdq xmm13, xmm15 |
1658 | punpckldq xmm14, xmm13 |
1659 | pshufd xmm15, xmm14, 0x1E |
1660 | movdqa xmm13, xmm6 |
1661 | movdqa xmm14, xmm5 |
1662 | movdqa xmm5, xmmword ptr [rsp+0x20] |
1663 | movdqa xmm6, xmmword ptr [rsp+0x40] |
1664 | jmp 9b |
1665 | 9: |
1666 | pxor xmm0, xmm2 |
1667 | pxor xmm1, xmm3 |
1668 | pxor xmm8, xmm10 |
1669 | pxor xmm9, xmm11 |
1670 | mov eax, r13d |
1671 | cmp rdx, r15 |
1672 | jne 2b |
1673 | movups xmmword ptr [rbx], xmm0 |
1674 | movups xmmword ptr [rbx+0x10], xmm1 |
1675 | movups xmmword ptr [rbx+0x20], xmm8 |
1676 | movups xmmword ptr [rbx+0x30], xmm9 |
1677 | movdqa xmm0, xmmword ptr [rsp+0x130] |
1678 | movdqa xmm1, xmmword ptr [rsp+0x110] |
1679 | movdqa xmm2, xmmword ptr [rsp+0x120] |
1680 | movdqu xmm3, xmmword ptr [rsp+0x118] |
1681 | movdqu xmm4, xmmword ptr [rsp+0x128] |
1682 | blendvps xmm1, xmm3, xmm0 |
1683 | blendvps xmm2, xmm4, xmm0 |
1684 | movdqa xmmword ptr [rsp+0x110], xmm1 |
1685 | movdqa xmmword ptr [rsp+0x120], xmm2 |
1686 | add rdi, 16 |
1687 | add rbx, 64 |
1688 | sub rsi, 2 |
1689 | 3: |
1690 | test esi, 0x1 |
1691 | je 4b |
1692 | movups xmm0, xmmword ptr [rcx] |
1693 | movups xmm1, xmmword ptr [rcx+0x10] |
1694 | movd xmm13, dword ptr [rsp+0x110] |
1695 | pinsrd xmm13, dword ptr [rsp+0x120], 1 |
1696 | pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 |
1697 | movaps xmm14, xmmword ptr [ROT8+rip] |
1698 | movaps xmm15, xmmword ptr [ROT16+rip] |
1699 | mov r8, qword ptr [rdi] |
1700 | movzx eax, byte ptr [rbp+0x80] |
1701 | or eax, r13d |
1702 | xor edx, edx |
1703 | 2: |
1704 | mov r14d, eax |
1705 | or eax, r12d |
1706 | add rdx, 64 |
1707 | cmp rdx, r15 |
1708 | cmovne eax, r14d |
1709 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1710 | movaps xmm3, xmm13 |
1711 | pinsrd xmm3, eax, 3 |
1712 | movups xmm4, xmmword ptr [r8+rdx-0x40] |
1713 | movups xmm5, xmmword ptr [r8+rdx-0x30] |
1714 | movaps xmm8, xmm4 |
1715 | shufps xmm4, xmm5, 136 |
1716 | shufps xmm8, xmm5, 221 |
1717 | movaps xmm5, xmm8 |
1718 | movups xmm6, xmmword ptr [r8+rdx-0x20] |
1719 | movups xmm7, xmmword ptr [r8+rdx-0x10] |
1720 | movaps xmm8, xmm6 |
1721 | shufps xmm6, xmm7, 136 |
1722 | pshufd xmm6, xmm6, 0x93 |
1723 | shufps xmm8, xmm7, 221 |
1724 | pshufd xmm7, xmm8, 0x93 |
1725 | mov al, 7 |
1726 | 9: |
1727 | paddd xmm0, xmm4 |
1728 | paddd xmm0, xmm1 |
1729 | pxor xmm3, xmm0 |
1730 | pshufb xmm3, xmm15 |
1731 | paddd xmm2, xmm3 |
1732 | pxor xmm1, xmm2 |
1733 | movdqa xmm11, xmm1 |
1734 | pslld xmm1, 20 |
1735 | psrld xmm11, 12 |
1736 | por xmm1, xmm11 |
1737 | paddd xmm0, xmm5 |
1738 | paddd xmm0, xmm1 |
1739 | pxor xmm3, xmm0 |
1740 | pshufb xmm3, xmm14 |
1741 | paddd xmm2, xmm3 |
1742 | pxor xmm1, xmm2 |
1743 | movdqa xmm11, xmm1 |
1744 | pslld xmm1, 25 |
1745 | psrld xmm11, 7 |
1746 | por xmm1, xmm11 |
1747 | pshufd xmm0, xmm0, 0x93 |
1748 | pshufd xmm3, xmm3, 0x4E |
1749 | pshufd xmm2, xmm2, 0x39 |
1750 | paddd xmm0, xmm6 |
1751 | paddd xmm0, xmm1 |
1752 | pxor xmm3, xmm0 |
1753 | pshufb xmm3, xmm15 |
1754 | paddd xmm2, xmm3 |
1755 | pxor xmm1, xmm2 |
1756 | movdqa xmm11, xmm1 |
1757 | pslld xmm1, 20 |
1758 | psrld xmm11, 12 |
1759 | por xmm1, xmm11 |
1760 | paddd xmm0, xmm7 |
1761 | paddd xmm0, xmm1 |
1762 | pxor xmm3, xmm0 |
1763 | pshufb xmm3, xmm14 |
1764 | paddd xmm2, xmm3 |
1765 | pxor xmm1, xmm2 |
1766 | movdqa xmm11, xmm1 |
1767 | pslld xmm1, 25 |
1768 | psrld xmm11, 7 |
1769 | por xmm1, xmm11 |
1770 | pshufd xmm0, xmm0, 0x39 |
1771 | pshufd xmm3, xmm3, 0x4E |
1772 | pshufd xmm2, xmm2, 0x93 |
1773 | dec al |
1774 | jz 9f |
1775 | movdqa xmm8, xmm4 |
1776 | shufps xmm8, xmm5, 214 |
1777 | pshufd xmm9, xmm4, 0x0F |
1778 | pshufd xmm4, xmm8, 0x39 |
1779 | movdqa xmm8, xmm6 |
1780 | shufps xmm8, xmm7, 250 |
1781 | pblendw xmm9, xmm8, 0xCC |
1782 | movdqa xmm8, xmm7 |
1783 | punpcklqdq xmm8, xmm5 |
1784 | pblendw xmm8, xmm6, 0xC0 |
1785 | pshufd xmm8, xmm8, 0x78 |
1786 | punpckhdq xmm5, xmm7 |
1787 | punpckldq xmm6, xmm5 |
1788 | pshufd xmm7, xmm6, 0x1E |
1789 | movdqa xmm5, xmm9 |
1790 | movdqa xmm6, xmm8 |
1791 | jmp 9b |
1792 | 9: |
1793 | pxor xmm0, xmm2 |
1794 | pxor xmm1, xmm3 |
1795 | mov eax, r13d |
1796 | cmp rdx, r15 |
1797 | jne 2b |
1798 | movups xmmword ptr [rbx], xmm0 |
1799 | movups xmmword ptr [rbx+0x10], xmm1 |
1800 | jmp 4b |
1801 | |
1802 | .p2align 6 |
1803 | blake3_compress_in_place_sse41: |
1804 | _blake3_compress_in_place_sse41: |
1805 | sub rsp, 120 |
1806 | movdqa xmmword ptr [rsp], xmm6 |
1807 | movdqa xmmword ptr [rsp+0x10], xmm7 |
1808 | movdqa xmmword ptr [rsp+0x20], xmm8 |
1809 | movdqa xmmword ptr [rsp+0x30], xmm9 |
1810 | movdqa xmmword ptr [rsp+0x40], xmm11 |
1811 | movdqa xmmword ptr [rsp+0x50], xmm14 |
1812 | movdqa xmmword ptr [rsp+0x60], xmm15 |
1813 | movups xmm0, xmmword ptr [rcx] |
1814 | movups xmm1, xmmword ptr [rcx+0x10] |
1815 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1816 | movzx eax, byte ptr [rsp+0xA0] |
1817 | movzx r8d, r8b |
1818 | shl rax, 32 |
1819 | add r8, rax |
1820 | movq xmm3, r9 |
1821 | movq xmm4, r8 |
1822 | punpcklqdq xmm3, xmm4 |
1823 | movups xmm4, xmmword ptr [rdx] |
1824 | movups xmm5, xmmword ptr [rdx+0x10] |
1825 | movaps xmm8, xmm4 |
1826 | shufps xmm4, xmm5, 136 |
1827 | shufps xmm8, xmm5, 221 |
1828 | movaps xmm5, xmm8 |
1829 | movups xmm6, xmmword ptr [rdx+0x20] |
1830 | movups xmm7, xmmword ptr [rdx+0x30] |
1831 | movaps xmm8, xmm6 |
1832 | shufps xmm6, xmm7, 136 |
1833 | pshufd xmm6, xmm6, 0x93 |
1834 | shufps xmm8, xmm7, 221 |
1835 | pshufd xmm7, xmm8, 0x93 |
1836 | movaps xmm14, xmmword ptr [ROT8+rip] |
1837 | movaps xmm15, xmmword ptr [ROT16+rip] |
1838 | mov al, 7 |
1839 | 9: |
1840 | paddd xmm0, xmm4 |
1841 | paddd xmm0, xmm1 |
1842 | pxor xmm3, xmm0 |
1843 | pshufb xmm3, xmm15 |
1844 | paddd xmm2, xmm3 |
1845 | pxor xmm1, xmm2 |
1846 | movdqa xmm11, xmm1 |
1847 | pslld xmm1, 20 |
1848 | psrld xmm11, 12 |
1849 | por xmm1, xmm11 |
1850 | paddd xmm0, xmm5 |
1851 | paddd xmm0, xmm1 |
1852 | pxor xmm3, xmm0 |
1853 | pshufb xmm3, xmm14 |
1854 | paddd xmm2, xmm3 |
1855 | pxor xmm1, xmm2 |
1856 | movdqa xmm11, xmm1 |
1857 | pslld xmm1, 25 |
1858 | psrld xmm11, 7 |
1859 | por xmm1, xmm11 |
1860 | pshufd xmm0, xmm0, 0x93 |
1861 | pshufd xmm3, xmm3, 0x4E |
1862 | pshufd xmm2, xmm2, 0x39 |
1863 | paddd xmm0, xmm6 |
1864 | paddd xmm0, xmm1 |
1865 | pxor xmm3, xmm0 |
1866 | pshufb xmm3, xmm15 |
1867 | paddd xmm2, xmm3 |
1868 | pxor xmm1, xmm2 |
1869 | movdqa xmm11, xmm1 |
1870 | pslld xmm1, 20 |
1871 | psrld xmm11, 12 |
1872 | por xmm1, xmm11 |
1873 | paddd xmm0, xmm7 |
1874 | paddd xmm0, xmm1 |
1875 | pxor xmm3, xmm0 |
1876 | pshufb xmm3, xmm14 |
1877 | paddd xmm2, xmm3 |
1878 | pxor xmm1, xmm2 |
1879 | movdqa xmm11, xmm1 |
1880 | pslld xmm1, 25 |
1881 | psrld xmm11, 7 |
1882 | por xmm1, xmm11 |
1883 | pshufd xmm0, xmm0, 0x39 |
1884 | pshufd xmm3, xmm3, 0x4E |
1885 | pshufd xmm2, xmm2, 0x93 |
1886 | dec al |
1887 | jz 9f |
1888 | movdqa xmm8, xmm4 |
1889 | shufps xmm8, xmm5, 214 |
1890 | pshufd xmm9, xmm4, 0x0F |
1891 | pshufd xmm4, xmm8, 0x39 |
1892 | movdqa xmm8, xmm6 |
1893 | shufps xmm8, xmm7, 250 |
1894 | pblendw xmm9, xmm8, 0xCC |
1895 | movdqa xmm8, xmm7 |
1896 | punpcklqdq xmm8, xmm5 |
1897 | pblendw xmm8, xmm6, 0xC0 |
1898 | pshufd xmm8, xmm8, 0x78 |
1899 | punpckhdq xmm5, xmm7 |
1900 | punpckldq xmm6, xmm5 |
1901 | pshufd xmm7, xmm6, 0x1E |
1902 | movdqa xmm5, xmm9 |
1903 | movdqa xmm6, xmm8 |
1904 | jmp 9b |
1905 | 9: |
1906 | pxor xmm0, xmm2 |
1907 | pxor xmm1, xmm3 |
1908 | movups xmmword ptr [rcx], xmm0 |
1909 | movups xmmword ptr [rcx+0x10], xmm1 |
1910 | movdqa xmm6, xmmword ptr [rsp] |
1911 | movdqa xmm7, xmmword ptr [rsp+0x10] |
1912 | movdqa xmm8, xmmword ptr [rsp+0x20] |
1913 | movdqa xmm9, xmmword ptr [rsp+0x30] |
1914 | movdqa xmm11, xmmword ptr [rsp+0x40] |
1915 | movdqa xmm14, xmmword ptr [rsp+0x50] |
1916 | movdqa xmm15, xmmword ptr [rsp+0x60] |
1917 | add rsp, 120 |
1918 | ret |
1919 | |
1920 | |
1921 | .p2align 6 |
1922 | _blake3_compress_xof_sse41: |
1923 | blake3_compress_xof_sse41: |
1924 | sub rsp, 120 |
1925 | movdqa xmmword ptr [rsp], xmm6 |
1926 | movdqa xmmword ptr [rsp+0x10], xmm7 |
1927 | movdqa xmmword ptr [rsp+0x20], xmm8 |
1928 | movdqa xmmword ptr [rsp+0x30], xmm9 |
1929 | movdqa xmmword ptr [rsp+0x40], xmm11 |
1930 | movdqa xmmword ptr [rsp+0x50], xmm14 |
1931 | movdqa xmmword ptr [rsp+0x60], xmm15 |
1932 | movups xmm0, xmmword ptr [rcx] |
1933 | movups xmm1, xmmword ptr [rcx+0x10] |
1934 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1935 | movzx eax, byte ptr [rsp+0xA0] |
1936 | movzx r8d, r8b |
1937 | mov r10, qword ptr [rsp+0xA8] |
1938 | shl rax, 32 |
1939 | add r8, rax |
1940 | movq xmm3, r9 |
1941 | movq xmm4, r8 |
1942 | punpcklqdq xmm3, xmm4 |
1943 | movups xmm4, xmmword ptr [rdx] |
1944 | movups xmm5, xmmword ptr [rdx+0x10] |
1945 | movaps xmm8, xmm4 |
1946 | shufps xmm4, xmm5, 136 |
1947 | shufps xmm8, xmm5, 221 |
1948 | movaps xmm5, xmm8 |
1949 | movups xmm6, xmmword ptr [rdx+0x20] |
1950 | movups xmm7, xmmword ptr [rdx+0x30] |
1951 | movaps xmm8, xmm6 |
1952 | shufps xmm6, xmm7, 136 |
1953 | pshufd xmm6, xmm6, 0x93 |
1954 | shufps xmm8, xmm7, 221 |
1955 | pshufd xmm7, xmm8, 0x93 |
1956 | movaps xmm14, xmmword ptr [ROT8+rip] |
1957 | movaps xmm15, xmmword ptr [ROT16+rip] |
1958 | mov al, 7 |
1959 | 9: |
1960 | paddd xmm0, xmm4 |
1961 | paddd xmm0, xmm1 |
1962 | pxor xmm3, xmm0 |
1963 | pshufb xmm3, xmm15 |
1964 | paddd xmm2, xmm3 |
1965 | pxor xmm1, xmm2 |
1966 | movdqa xmm11, xmm1 |
1967 | pslld xmm1, 20 |
1968 | psrld xmm11, 12 |
1969 | por xmm1, xmm11 |
1970 | paddd xmm0, xmm5 |
1971 | paddd xmm0, xmm1 |
1972 | pxor xmm3, xmm0 |
1973 | pshufb xmm3, xmm14 |
1974 | paddd xmm2, xmm3 |
1975 | pxor xmm1, xmm2 |
1976 | movdqa xmm11, xmm1 |
1977 | pslld xmm1, 25 |
1978 | psrld xmm11, 7 |
1979 | por xmm1, xmm11 |
1980 | pshufd xmm0, xmm0, 0x93 |
1981 | pshufd xmm3, xmm3, 0x4E |
1982 | pshufd xmm2, xmm2, 0x39 |
1983 | paddd xmm0, xmm6 |
1984 | paddd xmm0, xmm1 |
1985 | pxor xmm3, xmm0 |
1986 | pshufb xmm3, xmm15 |
1987 | paddd xmm2, xmm3 |
1988 | pxor xmm1, xmm2 |
1989 | movdqa xmm11, xmm1 |
1990 | pslld xmm1, 20 |
1991 | psrld xmm11, 12 |
1992 | por xmm1, xmm11 |
1993 | paddd xmm0, xmm7 |
1994 | paddd xmm0, xmm1 |
1995 | pxor xmm3, xmm0 |
1996 | pshufb xmm3, xmm14 |
1997 | paddd xmm2, xmm3 |
1998 | pxor xmm1, xmm2 |
1999 | movdqa xmm11, xmm1 |
2000 | pslld xmm1, 25 |
2001 | psrld xmm11, 7 |
2002 | por xmm1, xmm11 |
2003 | pshufd xmm0, xmm0, 0x39 |
2004 | pshufd xmm3, xmm3, 0x4E |
2005 | pshufd xmm2, xmm2, 0x93 |
2006 | dec al |
2007 | jz 9f |
2008 | movdqa xmm8, xmm4 |
2009 | shufps xmm8, xmm5, 214 |
2010 | pshufd xmm9, xmm4, 0x0F |
2011 | pshufd xmm4, xmm8, 0x39 |
2012 | movdqa xmm8, xmm6 |
2013 | shufps xmm8, xmm7, 250 |
2014 | pblendw xmm9, xmm8, 0xCC |
2015 | movdqa xmm8, xmm7 |
2016 | punpcklqdq xmm8, xmm5 |
2017 | pblendw xmm8, xmm6, 0xC0 |
2018 | pshufd xmm8, xmm8, 0x78 |
2019 | punpckhdq xmm5, xmm7 |
2020 | punpckldq xmm6, xmm5 |
2021 | pshufd xmm7, xmm6, 0x1E |
2022 | movdqa xmm5, xmm9 |
2023 | movdqa xmm6, xmm8 |
2024 | jmp 9b |
2025 | 9: |
2026 | movdqu xmm4, xmmword ptr [rcx] |
2027 | movdqu xmm5, xmmword ptr [rcx+0x10] |
2028 | pxor xmm0, xmm2 |
2029 | pxor xmm1, xmm3 |
2030 | pxor xmm2, xmm4 |
2031 | pxor xmm3, xmm5 |
2032 | movups xmmword ptr [r10], xmm0 |
2033 | movups xmmword ptr [r10+0x10], xmm1 |
2034 | movups xmmword ptr [r10+0x20], xmm2 |
2035 | movups xmmword ptr [r10+0x30], xmm3 |
2036 | movdqa xmm6, xmmword ptr [rsp] |
2037 | movdqa xmm7, xmmword ptr [rsp+0x10] |
2038 | movdqa xmm8, xmmword ptr [rsp+0x20] |
2039 | movdqa xmm9, xmmword ptr [rsp+0x30] |
2040 | movdqa xmm11, xmmword ptr [rsp+0x40] |
2041 | movdqa xmm14, xmmword ptr [rsp+0x50] |
2042 | movdqa xmm15, xmmword ptr [rsp+0x60] |
2043 | add rsp, 120 |
2044 | ret |
2045 | |
2046 | |
2047 | .section .rodata |
2048 | .p2align 6 |
2049 | BLAKE3_IV: |
2050 | .long 0x6A09E667, 0xBB67AE85 |
2051 | .long 0x3C6EF372, 0xA54FF53A |
2052 | ROT16: |
2053 | .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 |
2054 | ROT8: |
2055 | .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 |
2056 | ADD0: |
2057 | .long 0, 1, 2, 3 |
2058 | ADD1: |
2059 | .long 4, 4, 4, 4 |
2060 | BLAKE3_IV_0: |
2061 | .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 |
2062 | BLAKE3_IV_1: |
2063 | .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 |
2064 | BLAKE3_IV_2: |
2065 | .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 |
2066 | BLAKE3_IV_3: |
2067 | .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A |
2068 | BLAKE3_BLOCK_LEN: |
2069 | .long 64, 64, 64, 64 |
2070 | CMP_MSB_MASK: |
2071 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 |
2072 | |