1#include "llvm_blake3_prefix.h"
2
3.intel_syntax noprefix
4.global blake3_hash_many_sse2
5.global _blake3_hash_many_sse2
6.global blake3_compress_in_place_sse2
7.global _blake3_compress_in_place_sse2
8.global blake3_compress_xof_sse2
9.global _blake3_compress_xof_sse2
10.section .text
11 .p2align 6
12_blake3_hash_many_sse2:
13blake3_hash_many_sse2:
14 push r15
15 push r14
16 push r13
17 push r12
18 push rsi
19 push rdi
20 push rbx
21 push rbp
22 mov rbp, rsp
23 sub rsp, 528
24 and rsp, 0xFFFFFFFFFFFFFFC0
25 movdqa xmmword ptr [rsp+0x170], xmm6
26 movdqa xmmword ptr [rsp+0x180], xmm7
27 movdqa xmmword ptr [rsp+0x190], xmm8
28 movdqa xmmword ptr [rsp+0x1A0], xmm9
29 movdqa xmmword ptr [rsp+0x1B0], xmm10
30 movdqa xmmword ptr [rsp+0x1C0], xmm11
31 movdqa xmmword ptr [rsp+0x1D0], xmm12
32 movdqa xmmword ptr [rsp+0x1E0], xmm13
33 movdqa xmmword ptr [rsp+0x1F0], xmm14
34 movdqa xmmword ptr [rsp+0x200], xmm15
35 mov rdi, rcx
36 mov rsi, rdx
37 mov rdx, r8
38 mov rcx, r9
39 mov r8, qword ptr [rbp+0x68]
40 movzx r9, byte ptr [rbp+0x70]
41 neg r9d
42 movd xmm0, r9d
43 pshufd xmm0, xmm0, 0x00
44 movdqa xmmword ptr [rsp+0x130], xmm0
45 movdqa xmm1, xmm0
46 pand xmm1, xmmword ptr [ADD0+rip]
47 pand xmm0, xmmword ptr [ADD1+rip]
48 movdqa xmmword ptr [rsp+0x150], xmm0
49 movd xmm0, r8d
50 pshufd xmm0, xmm0, 0x00
51 paddd xmm0, xmm1
52 movdqa xmmword ptr [rsp+0x110], xmm0
53 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
54 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
55 pcmpgtd xmm1, xmm0
56 shr r8, 32
57 movd xmm2, r8d
58 pshufd xmm2, xmm2, 0x00
59 psubd xmm2, xmm1
60 movdqa xmmword ptr [rsp+0x120], xmm2
61 mov rbx, qword ptr [rbp+0x90]
62 mov r15, rdx
63 shl r15, 6
64 movzx r13d, byte ptr [rbp+0x78]
65 movzx r12d, byte ptr [rbp+0x88]
66 cmp rsi, 4
67 jc 3f
682:
69 movdqu xmm3, xmmword ptr [rcx]
70 pshufd xmm0, xmm3, 0x00
71 pshufd xmm1, xmm3, 0x55
72 pshufd xmm2, xmm3, 0xAA
73 pshufd xmm3, xmm3, 0xFF
74 movdqu xmm7, xmmword ptr [rcx+0x10]
75 pshufd xmm4, xmm7, 0x00
76 pshufd xmm5, xmm7, 0x55
77 pshufd xmm6, xmm7, 0xAA
78 pshufd xmm7, xmm7, 0xFF
79 mov r8, qword ptr [rdi]
80 mov r9, qword ptr [rdi+0x8]
81 mov r10, qword ptr [rdi+0x10]
82 mov r11, qword ptr [rdi+0x18]
83 movzx eax, byte ptr [rbp+0x80]
84 or eax, r13d
85 xor edx, edx
869:
87 mov r14d, eax
88 or eax, r12d
89 add rdx, 64
90 cmp rdx, r15
91 cmovne eax, r14d
92 movdqu xmm8, xmmword ptr [r8+rdx-0x40]
93 movdqu xmm9, xmmword ptr [r9+rdx-0x40]
94 movdqu xmm10, xmmword ptr [r10+rdx-0x40]
95 movdqu xmm11, xmmword ptr [r11+rdx-0x40]
96 movdqa xmm12, xmm8
97 punpckldq xmm8, xmm9
98 punpckhdq xmm12, xmm9
99 movdqa xmm14, xmm10
100 punpckldq xmm10, xmm11
101 punpckhdq xmm14, xmm11
102 movdqa xmm9, xmm8
103 punpcklqdq xmm8, xmm10
104 punpckhqdq xmm9, xmm10
105 movdqa xmm13, xmm12
106 punpcklqdq xmm12, xmm14
107 punpckhqdq xmm13, xmm14
108 movdqa xmmword ptr [rsp], xmm8
109 movdqa xmmword ptr [rsp+0x10], xmm9
110 movdqa xmmword ptr [rsp+0x20], xmm12
111 movdqa xmmword ptr [rsp+0x30], xmm13
112 movdqu xmm8, xmmword ptr [r8+rdx-0x30]
113 movdqu xmm9, xmmword ptr [r9+rdx-0x30]
114 movdqu xmm10, xmmword ptr [r10+rdx-0x30]
115 movdqu xmm11, xmmword ptr [r11+rdx-0x30]
116 movdqa xmm12, xmm8
117 punpckldq xmm8, xmm9
118 punpckhdq xmm12, xmm9
119 movdqa xmm14, xmm10
120 punpckldq xmm10, xmm11
121 punpckhdq xmm14, xmm11
122 movdqa xmm9, xmm8
123 punpcklqdq xmm8, xmm10
124 punpckhqdq xmm9, xmm10
125 movdqa xmm13, xmm12
126 punpcklqdq xmm12, xmm14
127 punpckhqdq xmm13, xmm14
128 movdqa xmmword ptr [rsp+0x40], xmm8
129 movdqa xmmword ptr [rsp+0x50], xmm9
130 movdqa xmmword ptr [rsp+0x60], xmm12
131 movdqa xmmword ptr [rsp+0x70], xmm13
132 movdqu xmm8, xmmword ptr [r8+rdx-0x20]
133 movdqu xmm9, xmmword ptr [r9+rdx-0x20]
134 movdqu xmm10, xmmword ptr [r10+rdx-0x20]
135 movdqu xmm11, xmmword ptr [r11+rdx-0x20]
136 movdqa xmm12, xmm8
137 punpckldq xmm8, xmm9
138 punpckhdq xmm12, xmm9
139 movdqa xmm14, xmm10
140 punpckldq xmm10, xmm11
141 punpckhdq xmm14, xmm11
142 movdqa xmm9, xmm8
143 punpcklqdq xmm8, xmm10
144 punpckhqdq xmm9, xmm10
145 movdqa xmm13, xmm12
146 punpcklqdq xmm12, xmm14
147 punpckhqdq xmm13, xmm14
148 movdqa xmmword ptr [rsp+0x80], xmm8
149 movdqa xmmword ptr [rsp+0x90], xmm9
150 movdqa xmmword ptr [rsp+0xA0], xmm12
151 movdqa xmmword ptr [rsp+0xB0], xmm13
152 movdqu xmm8, xmmword ptr [r8+rdx-0x10]
153 movdqu xmm9, xmmword ptr [r9+rdx-0x10]
154 movdqu xmm10, xmmword ptr [r10+rdx-0x10]
155 movdqu xmm11, xmmword ptr [r11+rdx-0x10]
156 movdqa xmm12, xmm8
157 punpckldq xmm8, xmm9
158 punpckhdq xmm12, xmm9
159 movdqa xmm14, xmm10
160 punpckldq xmm10, xmm11
161 punpckhdq xmm14, xmm11
162 movdqa xmm9, xmm8
163 punpcklqdq xmm8, xmm10
164 punpckhqdq xmm9, xmm10
165 movdqa xmm13, xmm12
166 punpcklqdq xmm12, xmm14
167 punpckhqdq xmm13, xmm14
168 movdqa xmmword ptr [rsp+0xC0], xmm8
169 movdqa xmmword ptr [rsp+0xD0], xmm9
170 movdqa xmmword ptr [rsp+0xE0], xmm12
171 movdqa xmmword ptr [rsp+0xF0], xmm13
172 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
173 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
174 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
175 movdqa xmm12, xmmword ptr [rsp+0x110]
176 movdqa xmm13, xmmword ptr [rsp+0x120]
177 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
178 movd xmm15, eax
179 pshufd xmm15, xmm15, 0x00
180 prefetcht0 [r8+rdx+0x80]
181 prefetcht0 [r9+rdx+0x80]
182 prefetcht0 [r10+rdx+0x80]
183 prefetcht0 [r11+rdx+0x80]
184 paddd xmm0, xmmword ptr [rsp]
185 paddd xmm1, xmmword ptr [rsp+0x20]
186 paddd xmm2, xmmword ptr [rsp+0x40]
187 paddd xmm3, xmmword ptr [rsp+0x60]
188 paddd xmm0, xmm4
189 paddd xmm1, xmm5
190 paddd xmm2, xmm6
191 paddd xmm3, xmm7
192 pxor xmm12, xmm0
193 pxor xmm13, xmm1
194 pxor xmm14, xmm2
195 pxor xmm15, xmm3
196 pshuflw xmm12, xmm12, 0xB1
197 pshufhw xmm12, xmm12, 0xB1
198 pshuflw xmm13, xmm13, 0xB1
199 pshufhw xmm13, xmm13, 0xB1
200 pshuflw xmm14, xmm14, 0xB1
201 pshufhw xmm14, xmm14, 0xB1
202 pshuflw xmm15, xmm15, 0xB1
203 pshufhw xmm15, xmm15, 0xB1
204 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
205 paddd xmm8, xmm12
206 paddd xmm9, xmm13
207 paddd xmm10, xmm14
208 paddd xmm11, xmm15
209 pxor xmm4, xmm8
210 pxor xmm5, xmm9
211 pxor xmm6, xmm10
212 pxor xmm7, xmm11
213 movdqa xmmword ptr [rsp+0x100], xmm8
214 movdqa xmm8, xmm4
215 psrld xmm8, 12
216 pslld xmm4, 20
217 por xmm4, xmm8
218 movdqa xmm8, xmm5
219 psrld xmm8, 12
220 pslld xmm5, 20
221 por xmm5, xmm8
222 movdqa xmm8, xmm6
223 psrld xmm8, 12
224 pslld xmm6, 20
225 por xmm6, xmm8
226 movdqa xmm8, xmm7
227 psrld xmm8, 12
228 pslld xmm7, 20
229 por xmm7, xmm8
230 paddd xmm0, xmmword ptr [rsp+0x10]
231 paddd xmm1, xmmword ptr [rsp+0x30]
232 paddd xmm2, xmmword ptr [rsp+0x50]
233 paddd xmm3, xmmword ptr [rsp+0x70]
234 paddd xmm0, xmm4
235 paddd xmm1, xmm5
236 paddd xmm2, xmm6
237 paddd xmm3, xmm7
238 pxor xmm12, xmm0
239 pxor xmm13, xmm1
240 pxor xmm14, xmm2
241 pxor xmm15, xmm3
242 movdqa xmm8, xmm12
243 psrld xmm12, 8
244 pslld xmm8, 24
245 pxor xmm12, xmm8
246 movdqa xmm8, xmm13
247 psrld xmm13, 8
248 pslld xmm8, 24
249 pxor xmm13, xmm8
250 movdqa xmm8, xmm14
251 psrld xmm14, 8
252 pslld xmm8, 24
253 pxor xmm14, xmm8
254 movdqa xmm8, xmm15
255 psrld xmm15, 8
256 pslld xmm8, 24
257 pxor xmm15, xmm8
258 movdqa xmm8, xmmword ptr [rsp+0x100]
259 paddd xmm8, xmm12
260 paddd xmm9, xmm13
261 paddd xmm10, xmm14
262 paddd xmm11, xmm15
263 pxor xmm4, xmm8
264 pxor xmm5, xmm9
265 pxor xmm6, xmm10
266 pxor xmm7, xmm11
267 movdqa xmmword ptr [rsp+0x100], xmm8
268 movdqa xmm8, xmm4
269 psrld xmm8, 7
270 pslld xmm4, 25
271 por xmm4, xmm8
272 movdqa xmm8, xmm5
273 psrld xmm8, 7
274 pslld xmm5, 25
275 por xmm5, xmm8
276 movdqa xmm8, xmm6
277 psrld xmm8, 7
278 pslld xmm6, 25
279 por xmm6, xmm8
280 movdqa xmm8, xmm7
281 psrld xmm8, 7
282 pslld xmm7, 25
283 por xmm7, xmm8
284 paddd xmm0, xmmword ptr [rsp+0x80]
285 paddd xmm1, xmmword ptr [rsp+0xA0]
286 paddd xmm2, xmmword ptr [rsp+0xC0]
287 paddd xmm3, xmmword ptr [rsp+0xE0]
288 paddd xmm0, xmm5
289 paddd xmm1, xmm6
290 paddd xmm2, xmm7
291 paddd xmm3, xmm4
292 pxor xmm15, xmm0
293 pxor xmm12, xmm1
294 pxor xmm13, xmm2
295 pxor xmm14, xmm3
296 pshuflw xmm15, xmm15, 0xB1
297 pshufhw xmm15, xmm15, 0xB1
298 pshuflw xmm12, xmm12, 0xB1
299 pshufhw xmm12, xmm12, 0xB1
300 pshuflw xmm13, xmm13, 0xB1
301 pshufhw xmm13, xmm13, 0xB1
302 pshuflw xmm14, xmm14, 0xB1
303 pshufhw xmm14, xmm14, 0xB1
304 paddd xmm10, xmm15
305 paddd xmm11, xmm12
306 movdqa xmm8, xmmword ptr [rsp+0x100]
307 paddd xmm8, xmm13
308 paddd xmm9, xmm14
309 pxor xmm5, xmm10
310 pxor xmm6, xmm11
311 pxor xmm7, xmm8
312 pxor xmm4, xmm9
313 movdqa xmmword ptr [rsp+0x100], xmm8
314 movdqa xmm8, xmm5
315 psrld xmm8, 12
316 pslld xmm5, 20
317 por xmm5, xmm8
318 movdqa xmm8, xmm6
319 psrld xmm8, 12
320 pslld xmm6, 20
321 por xmm6, xmm8
322 movdqa xmm8, xmm7
323 psrld xmm8, 12
324 pslld xmm7, 20
325 por xmm7, xmm8
326 movdqa xmm8, xmm4
327 psrld xmm8, 12
328 pslld xmm4, 20
329 por xmm4, xmm8
330 paddd xmm0, xmmword ptr [rsp+0x90]
331 paddd xmm1, xmmword ptr [rsp+0xB0]
332 paddd xmm2, xmmword ptr [rsp+0xD0]
333 paddd xmm3, xmmword ptr [rsp+0xF0]
334 paddd xmm0, xmm5
335 paddd xmm1, xmm6
336 paddd xmm2, xmm7
337 paddd xmm3, xmm4
338 pxor xmm15, xmm0
339 pxor xmm12, xmm1
340 pxor xmm13, xmm2
341 pxor xmm14, xmm3
342 movdqa xmm8, xmm15
343 psrld xmm15, 8
344 pslld xmm8, 24
345 pxor xmm15, xmm8
346 movdqa xmm8, xmm12
347 psrld xmm12, 8
348 pslld xmm8, 24
349 pxor xmm12, xmm8
350 movdqa xmm8, xmm13
351 psrld xmm13, 8
352 pslld xmm8, 24
353 pxor xmm13, xmm8
354 movdqa xmm8, xmm14
355 psrld xmm14, 8
356 pslld xmm8, 24
357 pxor xmm14, xmm8
358 paddd xmm10, xmm15
359 paddd xmm11, xmm12
360 movdqa xmm8, xmmword ptr [rsp+0x100]
361 paddd xmm8, xmm13
362 paddd xmm9, xmm14
363 pxor xmm5, xmm10
364 pxor xmm6, xmm11
365 pxor xmm7, xmm8
366 pxor xmm4, xmm9
367 movdqa xmmword ptr [rsp+0x100], xmm8
368 movdqa xmm8, xmm5
369 psrld xmm8, 7
370 pslld xmm5, 25
371 por xmm5, xmm8
372 movdqa xmm8, xmm6
373 psrld xmm8, 7
374 pslld xmm6, 25
375 por xmm6, xmm8
376 movdqa xmm8, xmm7
377 psrld xmm8, 7
378 pslld xmm7, 25
379 por xmm7, xmm8
380 movdqa xmm8, xmm4
381 psrld xmm8, 7
382 pslld xmm4, 25
383 por xmm4, xmm8
384 paddd xmm0, xmmword ptr [rsp+0x20]
385 paddd xmm1, xmmword ptr [rsp+0x30]
386 paddd xmm2, xmmword ptr [rsp+0x70]
387 paddd xmm3, xmmword ptr [rsp+0x40]
388 paddd xmm0, xmm4
389 paddd xmm1, xmm5
390 paddd xmm2, xmm6
391 paddd xmm3, xmm7
392 pxor xmm12, xmm0
393 pxor xmm13, xmm1
394 pxor xmm14, xmm2
395 pxor xmm15, xmm3
396 pshuflw xmm12, xmm12, 0xB1
397 pshufhw xmm12, xmm12, 0xB1
398 pshuflw xmm13, xmm13, 0xB1
399 pshufhw xmm13, xmm13, 0xB1
400 pshuflw xmm14, xmm14, 0xB1
401 pshufhw xmm14, xmm14, 0xB1
402 pshuflw xmm15, xmm15, 0xB1
403 pshufhw xmm15, xmm15, 0xB1
404 movdqa xmm8, xmmword ptr [rsp+0x100]
405 paddd xmm8, xmm12
406 paddd xmm9, xmm13
407 paddd xmm10, xmm14
408 paddd xmm11, xmm15
409 pxor xmm4, xmm8
410 pxor xmm5, xmm9
411 pxor xmm6, xmm10
412 pxor xmm7, xmm11
413 movdqa xmmword ptr [rsp+0x100], xmm8
414 movdqa xmm8, xmm4
415 psrld xmm8, 12
416 pslld xmm4, 20
417 por xmm4, xmm8
418 movdqa xmm8, xmm5
419 psrld xmm8, 12
420 pslld xmm5, 20
421 por xmm5, xmm8
422 movdqa xmm8, xmm6
423 psrld xmm8, 12
424 pslld xmm6, 20
425 por xmm6, xmm8
426 movdqa xmm8, xmm7
427 psrld xmm8, 12
428 pslld xmm7, 20
429 por xmm7, xmm8
430 paddd xmm0, xmmword ptr [rsp+0x60]
431 paddd xmm1, xmmword ptr [rsp+0xA0]
432 paddd xmm2, xmmword ptr [rsp]
433 paddd xmm3, xmmword ptr [rsp+0xD0]
434 paddd xmm0, xmm4
435 paddd xmm1, xmm5
436 paddd xmm2, xmm6
437 paddd xmm3, xmm7
438 pxor xmm12, xmm0
439 pxor xmm13, xmm1
440 pxor xmm14, xmm2
441 pxor xmm15, xmm3
442 movdqa xmm8, xmm12
443 psrld xmm12, 8
444 pslld xmm8, 24
445 pxor xmm12, xmm8
446 movdqa xmm8, xmm13
447 psrld xmm13, 8
448 pslld xmm8, 24
449 pxor xmm13, xmm8
450 movdqa xmm8, xmm14
451 psrld xmm14, 8
452 pslld xmm8, 24
453 pxor xmm14, xmm8
454 movdqa xmm8, xmm15
455 psrld xmm15, 8
456 pslld xmm8, 24
457 pxor xmm15, xmm8
458 movdqa xmm8, xmmword ptr [rsp+0x100]
459 paddd xmm8, xmm12
460 paddd xmm9, xmm13
461 paddd xmm10, xmm14
462 paddd xmm11, xmm15
463 pxor xmm4, xmm8
464 pxor xmm5, xmm9
465 pxor xmm6, xmm10
466 pxor xmm7, xmm11
467 movdqa xmmword ptr [rsp+0x100], xmm8
468 movdqa xmm8, xmm4
469 psrld xmm8, 7
470 pslld xmm4, 25
471 por xmm4, xmm8
472 movdqa xmm8, xmm5
473 psrld xmm8, 7
474 pslld xmm5, 25
475 por xmm5, xmm8
476 movdqa xmm8, xmm6
477 psrld xmm8, 7
478 pslld xmm6, 25
479 por xmm6, xmm8
480 movdqa xmm8, xmm7
481 psrld xmm8, 7
482 pslld xmm7, 25
483 por xmm7, xmm8
484 paddd xmm0, xmmword ptr [rsp+0x10]
485 paddd xmm1, xmmword ptr [rsp+0xC0]
486 paddd xmm2, xmmword ptr [rsp+0x90]
487 paddd xmm3, xmmword ptr [rsp+0xF0]
488 paddd xmm0, xmm5
489 paddd xmm1, xmm6
490 paddd xmm2, xmm7
491 paddd xmm3, xmm4
492 pxor xmm15, xmm0
493 pxor xmm12, xmm1
494 pxor xmm13, xmm2
495 pxor xmm14, xmm3
496 pshuflw xmm15, xmm15, 0xB1
497 pshufhw xmm15, xmm15, 0xB1
498 pshuflw xmm12, xmm12, 0xB1
499 pshufhw xmm12, xmm12, 0xB1
500 pshuflw xmm13, xmm13, 0xB1
501 pshufhw xmm13, xmm13, 0xB1
502 pshuflw xmm14, xmm14, 0xB1
503 pshufhw xmm14, xmm14, 0xB1
504 paddd xmm10, xmm15
505 paddd xmm11, xmm12
506 movdqa xmm8, xmmword ptr [rsp+0x100]
507 paddd xmm8, xmm13
508 paddd xmm9, xmm14
509 pxor xmm5, xmm10
510 pxor xmm6, xmm11
511 pxor xmm7, xmm8
512 pxor xmm4, xmm9
513 movdqa xmmword ptr [rsp+0x100], xmm8
514 movdqa xmm8, xmm5
515 psrld xmm8, 12
516 pslld xmm5, 20
517 por xmm5, xmm8
518 movdqa xmm8, xmm6
519 psrld xmm8, 12
520 pslld xmm6, 20
521 por xmm6, xmm8
522 movdqa xmm8, xmm7
523 psrld xmm8, 12
524 pslld xmm7, 20
525 por xmm7, xmm8
526 movdqa xmm8, xmm4
527 psrld xmm8, 12
528 pslld xmm4, 20
529 por xmm4, xmm8
530 paddd xmm0, xmmword ptr [rsp+0xB0]
531 paddd xmm1, xmmword ptr [rsp+0x50]
532 paddd xmm2, xmmword ptr [rsp+0xE0]
533 paddd xmm3, xmmword ptr [rsp+0x80]
534 paddd xmm0, xmm5
535 paddd xmm1, xmm6
536 paddd xmm2, xmm7
537 paddd xmm3, xmm4
538 pxor xmm15, xmm0
539 pxor xmm12, xmm1
540 pxor xmm13, xmm2
541 pxor xmm14, xmm3
542 movdqa xmm8, xmm15
543 psrld xmm15, 8
544 pslld xmm8, 24
545 pxor xmm15, xmm8
546 movdqa xmm8, xmm12
547 psrld xmm12, 8
548 pslld xmm8, 24
549 pxor xmm12, xmm8
550 movdqa xmm8, xmm13
551 psrld xmm13, 8
552 pslld xmm8, 24
553 pxor xmm13, xmm8
554 movdqa xmm8, xmm14
555 psrld xmm14, 8
556 pslld xmm8, 24
557 pxor xmm14, xmm8
558 paddd xmm10, xmm15
559 paddd xmm11, xmm12
560 movdqa xmm8, xmmword ptr [rsp+0x100]
561 paddd xmm8, xmm13
562 paddd xmm9, xmm14
563 pxor xmm5, xmm10
564 pxor xmm6, xmm11
565 pxor xmm7, xmm8
566 pxor xmm4, xmm9
567 movdqa xmmword ptr [rsp+0x100], xmm8
568 movdqa xmm8, xmm5
569 psrld xmm8, 7
570 pslld xmm5, 25
571 por xmm5, xmm8
572 movdqa xmm8, xmm6
573 psrld xmm8, 7
574 pslld xmm6, 25
575 por xmm6, xmm8
576 movdqa xmm8, xmm7
577 psrld xmm8, 7
578 pslld xmm7, 25
579 por xmm7, xmm8
580 movdqa xmm8, xmm4
581 psrld xmm8, 7
582 pslld xmm4, 25
583 por xmm4, xmm8
584 paddd xmm0, xmmword ptr [rsp+0x30]
585 paddd xmm1, xmmword ptr [rsp+0xA0]
586 paddd xmm2, xmmword ptr [rsp+0xD0]
587 paddd xmm3, xmmword ptr [rsp+0x70]
588 paddd xmm0, xmm4
589 paddd xmm1, xmm5
590 paddd xmm2, xmm6
591 paddd xmm3, xmm7
592 pxor xmm12, xmm0
593 pxor xmm13, xmm1
594 pxor xmm14, xmm2
595 pxor xmm15, xmm3
596 pshuflw xmm12, xmm12, 0xB1
597 pshufhw xmm12, xmm12, 0xB1
598 pshuflw xmm13, xmm13, 0xB1
599 pshufhw xmm13, xmm13, 0xB1
600 pshuflw xmm14, xmm14, 0xB1
601 pshufhw xmm14, xmm14, 0xB1
602 pshuflw xmm15, xmm15, 0xB1
603 pshufhw xmm15, xmm15, 0xB1
604 movdqa xmm8, xmmword ptr [rsp+0x100]
605 paddd xmm8, xmm12
606 paddd xmm9, xmm13
607 paddd xmm10, xmm14
608 paddd xmm11, xmm15
609 pxor xmm4, xmm8
610 pxor xmm5, xmm9
611 pxor xmm6, xmm10
612 pxor xmm7, xmm11
613 movdqa xmmword ptr [rsp+0x100], xmm8
614 movdqa xmm8, xmm4
615 psrld xmm8, 12
616 pslld xmm4, 20
617 por xmm4, xmm8
618 movdqa xmm8, xmm5
619 psrld xmm8, 12
620 pslld xmm5, 20
621 por xmm5, xmm8
622 movdqa xmm8, xmm6
623 psrld xmm8, 12
624 pslld xmm6, 20
625 por xmm6, xmm8
626 movdqa xmm8, xmm7
627 psrld xmm8, 12
628 pslld xmm7, 20
629 por xmm7, xmm8
630 paddd xmm0, xmmword ptr [rsp+0x40]
631 paddd xmm1, xmmword ptr [rsp+0xC0]
632 paddd xmm2, xmmword ptr [rsp+0x20]
633 paddd xmm3, xmmword ptr [rsp+0xE0]
634 paddd xmm0, xmm4
635 paddd xmm1, xmm5
636 paddd xmm2, xmm6
637 paddd xmm3, xmm7
638 pxor xmm12, xmm0
639 pxor xmm13, xmm1
640 pxor xmm14, xmm2
641 pxor xmm15, xmm3
642 movdqa xmm8, xmm12
643 psrld xmm12, 8
644 pslld xmm8, 24
645 pxor xmm12, xmm8
646 movdqa xmm8, xmm13
647 psrld xmm13, 8
648 pslld xmm8, 24
649 pxor xmm13, xmm8
650 movdqa xmm8, xmm14
651 psrld xmm14, 8
652 pslld xmm8, 24
653 pxor xmm14, xmm8
654 movdqa xmm8, xmm15
655 psrld xmm15, 8
656 pslld xmm8, 24
657 pxor xmm15, xmm8
658 movdqa xmm8, xmmword ptr [rsp+0x100]
659 paddd xmm8, xmm12
660 paddd xmm9, xmm13
661 paddd xmm10, xmm14
662 paddd xmm11, xmm15
663 pxor xmm4, xmm8
664 pxor xmm5, xmm9
665 pxor xmm6, xmm10
666 pxor xmm7, xmm11
667 movdqa xmmword ptr [rsp+0x100], xmm8
668 movdqa xmm8, xmm4
669 psrld xmm8, 7
670 pslld xmm4, 25
671 por xmm4, xmm8
672 movdqa xmm8, xmm5
673 psrld xmm8, 7
674 pslld xmm5, 25
675 por xmm5, xmm8
676 movdqa xmm8, xmm6
677 psrld xmm8, 7
678 pslld xmm6, 25
679 por xmm6, xmm8
680 movdqa xmm8, xmm7
681 psrld xmm8, 7
682 pslld xmm7, 25
683 por xmm7, xmm8
684 paddd xmm0, xmmword ptr [rsp+0x60]
685 paddd xmm1, xmmword ptr [rsp+0x90]
686 paddd xmm2, xmmword ptr [rsp+0xB0]
687 paddd xmm3, xmmword ptr [rsp+0x80]
688 paddd xmm0, xmm5
689 paddd xmm1, xmm6
690 paddd xmm2, xmm7
691 paddd xmm3, xmm4
692 pxor xmm15, xmm0
693 pxor xmm12, xmm1
694 pxor xmm13, xmm2
695 pxor xmm14, xmm3
696 pshuflw xmm15, xmm15, 0xB1
697 pshufhw xmm15, xmm15, 0xB1
698 pshuflw xmm12, xmm12, 0xB1
699 pshufhw xmm12, xmm12, 0xB1
700 pshuflw xmm13, xmm13, 0xB1
701 pshufhw xmm13, xmm13, 0xB1
702 pshuflw xmm14, xmm14, 0xB1
703 pshufhw xmm14, xmm14, 0xB1
704 paddd xmm10, xmm15
705 paddd xmm11, xmm12
706 movdqa xmm8, xmmword ptr [rsp+0x100]
707 paddd xmm8, xmm13
708 paddd xmm9, xmm14
709 pxor xmm5, xmm10
710 pxor xmm6, xmm11
711 pxor xmm7, xmm8
712 pxor xmm4, xmm9
713 movdqa xmmword ptr [rsp+0x100], xmm8
714 movdqa xmm8, xmm5
715 psrld xmm8, 12
716 pslld xmm5, 20
717 por xmm5, xmm8
718 movdqa xmm8, xmm6
719 psrld xmm8, 12
720 pslld xmm6, 20
721 por xmm6, xmm8
722 movdqa xmm8, xmm7
723 psrld xmm8, 12
724 pslld xmm7, 20
725 por xmm7, xmm8
726 movdqa xmm8, xmm4
727 psrld xmm8, 12
728 pslld xmm4, 20
729 por xmm4, xmm8
730 paddd xmm0, xmmword ptr [rsp+0x50]
731 paddd xmm1, xmmword ptr [rsp]
732 paddd xmm2, xmmword ptr [rsp+0xF0]
733 paddd xmm3, xmmword ptr [rsp+0x10]
734 paddd xmm0, xmm5
735 paddd xmm1, xmm6
736 paddd xmm2, xmm7
737 paddd xmm3, xmm4
738 pxor xmm15, xmm0
739 pxor xmm12, xmm1
740 pxor xmm13, xmm2
741 pxor xmm14, xmm3
742 movdqa xmm8, xmm15
743 psrld xmm15, 8
744 pslld xmm8, 24
745 pxor xmm15, xmm8
746 movdqa xmm8, xmm12
747 psrld xmm12, 8
748 pslld xmm8, 24
749 pxor xmm12, xmm8
750 movdqa xmm8, xmm13
751 psrld xmm13, 8
752 pslld xmm8, 24
753 pxor xmm13, xmm8
754 movdqa xmm8, xmm14
755 psrld xmm14, 8
756 pslld xmm8, 24
757 pxor xmm14, xmm8
758 paddd xmm10, xmm15
759 paddd xmm11, xmm12
760 movdqa xmm8, xmmword ptr [rsp+0x100]
761 paddd xmm8, xmm13
762 paddd xmm9, xmm14
763 pxor xmm5, xmm10
764 pxor xmm6, xmm11
765 pxor xmm7, xmm8
766 pxor xmm4, xmm9
767 movdqa xmmword ptr [rsp+0x100], xmm8
768 movdqa xmm8, xmm5
769 psrld xmm8, 7
770 pslld xmm5, 25
771 por xmm5, xmm8
772 movdqa xmm8, xmm6
773 psrld xmm8, 7
774 pslld xmm6, 25
775 por xmm6, xmm8
776 movdqa xmm8, xmm7
777 psrld xmm8, 7
778 pslld xmm7, 25
779 por xmm7, xmm8
780 movdqa xmm8, xmm4
781 psrld xmm8, 7
782 pslld xmm4, 25
783 por xmm4, xmm8
784 paddd xmm0, xmmword ptr [rsp+0xA0]
785 paddd xmm1, xmmword ptr [rsp+0xC0]
786 paddd xmm2, xmmword ptr [rsp+0xE0]
787 paddd xmm3, xmmword ptr [rsp+0xD0]
788 paddd xmm0, xmm4
789 paddd xmm1, xmm5
790 paddd xmm2, xmm6
791 paddd xmm3, xmm7
792 pxor xmm12, xmm0
793 pxor xmm13, xmm1
794 pxor xmm14, xmm2
795 pxor xmm15, xmm3
796 pshuflw xmm12, xmm12, 0xB1
797 pshufhw xmm12, xmm12, 0xB1
798 pshuflw xmm13, xmm13, 0xB1
799 pshufhw xmm13, xmm13, 0xB1
800 pshuflw xmm14, xmm14, 0xB1
801 pshufhw xmm14, xmm14, 0xB1
802 pshuflw xmm15, xmm15, 0xB1
803 pshufhw xmm15, xmm15, 0xB1
804 movdqa xmm8, xmmword ptr [rsp+0x100]
805 paddd xmm8, xmm12
806 paddd xmm9, xmm13
807 paddd xmm10, xmm14
808 paddd xmm11, xmm15
809 pxor xmm4, xmm8
810 pxor xmm5, xmm9
811 pxor xmm6, xmm10
812 pxor xmm7, xmm11
813 movdqa xmmword ptr [rsp+0x100], xmm8
814 movdqa xmm8, xmm4
815 psrld xmm8, 12
816 pslld xmm4, 20
817 por xmm4, xmm8
818 movdqa xmm8, xmm5
819 psrld xmm8, 12
820 pslld xmm5, 20
821 por xmm5, xmm8
822 movdqa xmm8, xmm6
823 psrld xmm8, 12
824 pslld xmm6, 20
825 por xmm6, xmm8
826 movdqa xmm8, xmm7
827 psrld xmm8, 12
828 pslld xmm7, 20
829 por xmm7, xmm8
830 paddd xmm0, xmmword ptr [rsp+0x70]
831 paddd xmm1, xmmword ptr [rsp+0x90]
832 paddd xmm2, xmmword ptr [rsp+0x30]
833 paddd xmm3, xmmword ptr [rsp+0xF0]
834 paddd xmm0, xmm4
835 paddd xmm1, xmm5
836 paddd xmm2, xmm6
837 paddd xmm3, xmm7
838 pxor xmm12, xmm0
839 pxor xmm13, xmm1
840 pxor xmm14, xmm2
841 pxor xmm15, xmm3
842 movdqa xmm8, xmm12
843 psrld xmm12, 8
844 pslld xmm8, 24
845 pxor xmm12, xmm8
846 movdqa xmm8, xmm13
847 psrld xmm13, 8
848 pslld xmm8, 24
849 pxor xmm13, xmm8
850 movdqa xmm8, xmm14
851 psrld xmm14, 8
852 pslld xmm8, 24
853 pxor xmm14, xmm8
854 movdqa xmm8, xmm15
855 psrld xmm15, 8
856 pslld xmm8, 24
857 pxor xmm15, xmm8
858 movdqa xmm8, xmmword ptr [rsp+0x100]
859 paddd xmm8, xmm12
860 paddd xmm9, xmm13
861 paddd xmm10, xmm14
862 paddd xmm11, xmm15
863 pxor xmm4, xmm8
864 pxor xmm5, xmm9
865 pxor xmm6, xmm10
866 pxor xmm7, xmm11
867 movdqa xmmword ptr [rsp+0x100], xmm8
868 movdqa xmm8, xmm4
869 psrld xmm8, 7
870 pslld xmm4, 25
871 por xmm4, xmm8
872 movdqa xmm8, xmm5
873 psrld xmm8, 7
874 pslld xmm5, 25
875 por xmm5, xmm8
876 movdqa xmm8, xmm6
877 psrld xmm8, 7
878 pslld xmm6, 25
879 por xmm6, xmm8
880 movdqa xmm8, xmm7
881 psrld xmm8, 7
882 pslld xmm7, 25
883 por xmm7, xmm8
884 paddd xmm0, xmmword ptr [rsp+0x40]
885 paddd xmm1, xmmword ptr [rsp+0xB0]
886 paddd xmm2, xmmword ptr [rsp+0x50]
887 paddd xmm3, xmmword ptr [rsp+0x10]
888 paddd xmm0, xmm5
889 paddd xmm1, xmm6
890 paddd xmm2, xmm7
891 paddd xmm3, xmm4
892 pxor xmm15, xmm0
893 pxor xmm12, xmm1
894 pxor xmm13, xmm2
895 pxor xmm14, xmm3
896 pshuflw xmm15, xmm15, 0xB1
897 pshufhw xmm15, xmm15, 0xB1
898 pshuflw xmm12, xmm12, 0xB1
899 pshufhw xmm12, xmm12, 0xB1
900 pshuflw xmm13, xmm13, 0xB1
901 pshufhw xmm13, xmm13, 0xB1
902 pshuflw xmm14, xmm14, 0xB1
903 pshufhw xmm14, xmm14, 0xB1
904 paddd xmm10, xmm15
905 paddd xmm11, xmm12
906 movdqa xmm8, xmmword ptr [rsp+0x100]
907 paddd xmm8, xmm13
908 paddd xmm9, xmm14
909 pxor xmm5, xmm10
910 pxor xmm6, xmm11
911 pxor xmm7, xmm8
912 pxor xmm4, xmm9
913 movdqa xmmword ptr [rsp+0x100], xmm8
914 movdqa xmm8, xmm5
915 psrld xmm8, 12
916 pslld xmm5, 20
917 por xmm5, xmm8
918 movdqa xmm8, xmm6
919 psrld xmm8, 12
920 pslld xmm6, 20
921 por xmm6, xmm8
922 movdqa xmm8, xmm7
923 psrld xmm8, 12
924 pslld xmm7, 20
925 por xmm7, xmm8
926 movdqa xmm8, xmm4
927 psrld xmm8, 12
928 pslld xmm4, 20
929 por xmm4, xmm8
930 paddd xmm0, xmmword ptr [rsp]
931 paddd xmm1, xmmword ptr [rsp+0x20]
932 paddd xmm2, xmmword ptr [rsp+0x80]
933 paddd xmm3, xmmword ptr [rsp+0x60]
934 paddd xmm0, xmm5
935 paddd xmm1, xmm6
936 paddd xmm2, xmm7
937 paddd xmm3, xmm4
938 pxor xmm15, xmm0
939 pxor xmm12, xmm1
940 pxor xmm13, xmm2
941 pxor xmm14, xmm3
942 movdqa xmm8, xmm15
943 psrld xmm15, 8
944 pslld xmm8, 24
945 pxor xmm15, xmm8
946 movdqa xmm8, xmm12
947 psrld xmm12, 8
948 pslld xmm8, 24
949 pxor xmm12, xmm8
950 movdqa xmm8, xmm13
951 psrld xmm13, 8
952 pslld xmm8, 24
953 pxor xmm13, xmm8
954 movdqa xmm8, xmm14
955 psrld xmm14, 8
956 pslld xmm8, 24
957 pxor xmm14, xmm8
958 paddd xmm10, xmm15
959 paddd xmm11, xmm12
960 movdqa xmm8, xmmword ptr [rsp+0x100]
961 paddd xmm8, xmm13
962 paddd xmm9, xmm14
963 pxor xmm5, xmm10
964 pxor xmm6, xmm11
965 pxor xmm7, xmm8
966 pxor xmm4, xmm9
967 movdqa xmmword ptr [rsp+0x100], xmm8
968 movdqa xmm8, xmm5
969 psrld xmm8, 7
970 pslld xmm5, 25
971 por xmm5, xmm8
972 movdqa xmm8, xmm6
973 psrld xmm8, 7
974 pslld xmm6, 25
975 por xmm6, xmm8
976 movdqa xmm8, xmm7
977 psrld xmm8, 7
978 pslld xmm7, 25
979 por xmm7, xmm8
980 movdqa xmm8, xmm4
981 psrld xmm8, 7
982 pslld xmm4, 25
983 por xmm4, xmm8
984 paddd xmm0, xmmword ptr [rsp+0xC0]
985 paddd xmm1, xmmword ptr [rsp+0x90]
986 paddd xmm2, xmmword ptr [rsp+0xF0]
987 paddd xmm3, xmmword ptr [rsp+0xE0]
988 paddd xmm0, xmm4
989 paddd xmm1, xmm5
990 paddd xmm2, xmm6
991 paddd xmm3, xmm7
992 pxor xmm12, xmm0
993 pxor xmm13, xmm1
994 pxor xmm14, xmm2
995 pxor xmm15, xmm3
996 pshuflw xmm12, xmm12, 0xB1
997 pshufhw xmm12, xmm12, 0xB1
998 pshuflw xmm13, xmm13, 0xB1
999 pshufhw xmm13, xmm13, 0xB1
1000 pshuflw xmm14, xmm14, 0xB1
1001 pshufhw xmm14, xmm14, 0xB1
1002 pshuflw xmm15, xmm15, 0xB1
1003 pshufhw xmm15, xmm15, 0xB1
1004 movdqa xmm8, xmmword ptr [rsp+0x100]
1005 paddd xmm8, xmm12
1006 paddd xmm9, xmm13
1007 paddd xmm10, xmm14
1008 paddd xmm11, xmm15
1009 pxor xmm4, xmm8
1010 pxor xmm5, xmm9
1011 pxor xmm6, xmm10
1012 pxor xmm7, xmm11
1013 movdqa xmmword ptr [rsp+0x100], xmm8
1014 movdqa xmm8, xmm4
1015 psrld xmm8, 12
1016 pslld xmm4, 20
1017 por xmm4, xmm8
1018 movdqa xmm8, xmm5
1019 psrld xmm8, 12
1020 pslld xmm5, 20
1021 por xmm5, xmm8
1022 movdqa xmm8, xmm6
1023 psrld xmm8, 12
1024 pslld xmm6, 20
1025 por xmm6, xmm8
1026 movdqa xmm8, xmm7
1027 psrld xmm8, 12
1028 pslld xmm7, 20
1029 por xmm7, xmm8
1030 paddd xmm0, xmmword ptr [rsp+0xD0]
1031 paddd xmm1, xmmword ptr [rsp+0xB0]
1032 paddd xmm2, xmmword ptr [rsp+0xA0]
1033 paddd xmm3, xmmword ptr [rsp+0x80]
1034 paddd xmm0, xmm4
1035 paddd xmm1, xmm5
1036 paddd xmm2, xmm6
1037 paddd xmm3, xmm7
1038 pxor xmm12, xmm0
1039 pxor xmm13, xmm1
1040 pxor xmm14, xmm2
1041 pxor xmm15, xmm3
1042 movdqa xmm8, xmm12
1043 psrld xmm12, 8
1044 pslld xmm8, 24
1045 pxor xmm12, xmm8
1046 movdqa xmm8, xmm13
1047 psrld xmm13, 8
1048 pslld xmm8, 24
1049 pxor xmm13, xmm8
1050 movdqa xmm8, xmm14
1051 psrld xmm14, 8
1052 pslld xmm8, 24
1053 pxor xmm14, xmm8
1054 movdqa xmm8, xmm15
1055 psrld xmm15, 8
1056 pslld xmm8, 24
1057 pxor xmm15, xmm8
1058 movdqa xmm8, xmmword ptr [rsp+0x100]
1059 paddd xmm8, xmm12
1060 paddd xmm9, xmm13
1061 paddd xmm10, xmm14
1062 paddd xmm11, xmm15
1063 pxor xmm4, xmm8
1064 pxor xmm5, xmm9
1065 pxor xmm6, xmm10
1066 pxor xmm7, xmm11
1067 movdqa xmmword ptr [rsp+0x100], xmm8
1068 movdqa xmm8, xmm4
1069 psrld xmm8, 7
1070 pslld xmm4, 25
1071 por xmm4, xmm8
1072 movdqa xmm8, xmm5
1073 psrld xmm8, 7
1074 pslld xmm5, 25
1075 por xmm5, xmm8
1076 movdqa xmm8, xmm6
1077 psrld xmm8, 7
1078 pslld xmm6, 25
1079 por xmm6, xmm8
1080 movdqa xmm8, xmm7
1081 psrld xmm8, 7
1082 pslld xmm7, 25
1083 por xmm7, xmm8
1084 paddd xmm0, xmmword ptr [rsp+0x70]
1085 paddd xmm1, xmmword ptr [rsp+0x50]
1086 paddd xmm2, xmmword ptr [rsp]
1087 paddd xmm3, xmmword ptr [rsp+0x60]
1088 paddd xmm0, xmm5
1089 paddd xmm1, xmm6
1090 paddd xmm2, xmm7
1091 paddd xmm3, xmm4
1092 pxor xmm15, xmm0
1093 pxor xmm12, xmm1
1094 pxor xmm13, xmm2
1095 pxor xmm14, xmm3
1096 pshuflw xmm15, xmm15, 0xB1
1097 pshufhw xmm15, xmm15, 0xB1
1098 pshuflw xmm12, xmm12, 0xB1
1099 pshufhw xmm12, xmm12, 0xB1
1100 pshuflw xmm13, xmm13, 0xB1
1101 pshufhw xmm13, xmm13, 0xB1
1102 pshuflw xmm14, xmm14, 0xB1
1103 pshufhw xmm14, xmm14, 0xB1
1104 paddd xmm10, xmm15
1105 paddd xmm11, xmm12
1106 movdqa xmm8, xmmword ptr [rsp+0x100]
1107 paddd xmm8, xmm13
1108 paddd xmm9, xmm14
1109 pxor xmm5, xmm10
1110 pxor xmm6, xmm11
1111 pxor xmm7, xmm8
1112 pxor xmm4, xmm9
1113 movdqa xmmword ptr [rsp+0x100], xmm8
1114 movdqa xmm8, xmm5
1115 psrld xmm8, 12
1116 pslld xmm5, 20
1117 por xmm5, xmm8
1118 movdqa xmm8, xmm6
1119 psrld xmm8, 12
1120 pslld xmm6, 20
1121 por xmm6, xmm8
1122 movdqa xmm8, xmm7
1123 psrld xmm8, 12
1124 pslld xmm7, 20
1125 por xmm7, xmm8
1126 movdqa xmm8, xmm4
1127 psrld xmm8, 12
1128 pslld xmm4, 20
1129 por xmm4, xmm8
1130 paddd xmm0, xmmword ptr [rsp+0x20]
1131 paddd xmm1, xmmword ptr [rsp+0x30]
1132 paddd xmm2, xmmword ptr [rsp+0x10]
1133 paddd xmm3, xmmword ptr [rsp+0x40]
1134 paddd xmm0, xmm5
1135 paddd xmm1, xmm6
1136 paddd xmm2, xmm7
1137 paddd xmm3, xmm4
1138 pxor xmm15, xmm0
1139 pxor xmm12, xmm1
1140 pxor xmm13, xmm2
1141 pxor xmm14, xmm3
1142 movdqa xmm8, xmm15
1143 psrld xmm15, 8
1144 pslld xmm8, 24
1145 pxor xmm15, xmm8
1146 movdqa xmm8, xmm12
1147 psrld xmm12, 8
1148 pslld xmm8, 24
1149 pxor xmm12, xmm8
1150 movdqa xmm8, xmm13
1151 psrld xmm13, 8
1152 pslld xmm8, 24
1153 pxor xmm13, xmm8
1154 movdqa xmm8, xmm14
1155 psrld xmm14, 8
1156 pslld xmm8, 24
1157 pxor xmm14, xmm8
1158 paddd xmm10, xmm15
1159 paddd xmm11, xmm12
1160 movdqa xmm8, xmmword ptr [rsp+0x100]
1161 paddd xmm8, xmm13
1162 paddd xmm9, xmm14
1163 pxor xmm5, xmm10
1164 pxor xmm6, xmm11
1165 pxor xmm7, xmm8
1166 pxor xmm4, xmm9
1167 movdqa xmmword ptr [rsp+0x100], xmm8
1168 movdqa xmm8, xmm5
1169 psrld xmm8, 7
1170 pslld xmm5, 25
1171 por xmm5, xmm8
1172 movdqa xmm8, xmm6
1173 psrld xmm8, 7
1174 pslld xmm6, 25
1175 por xmm6, xmm8
1176 movdqa xmm8, xmm7
1177 psrld xmm8, 7
1178 pslld xmm7, 25
1179 por xmm7, xmm8
1180 movdqa xmm8, xmm4
1181 psrld xmm8, 7
1182 pslld xmm4, 25
1183 por xmm4, xmm8
1184 paddd xmm0, xmmword ptr [rsp+0x90]
1185 paddd xmm1, xmmword ptr [rsp+0xB0]
1186 paddd xmm2, xmmword ptr [rsp+0x80]
1187 paddd xmm3, xmmword ptr [rsp+0xF0]
1188 paddd xmm0, xmm4
1189 paddd xmm1, xmm5
1190 paddd xmm2, xmm6
1191 paddd xmm3, xmm7
1192 pxor xmm12, xmm0
1193 pxor xmm13, xmm1
1194 pxor xmm14, xmm2
1195 pxor xmm15, xmm3
1196 pshuflw xmm12, xmm12, 0xB1
1197 pshufhw xmm12, xmm12, 0xB1
1198 pshuflw xmm13, xmm13, 0xB1
1199 pshufhw xmm13, xmm13, 0xB1
1200 pshuflw xmm14, xmm14, 0xB1
1201 pshufhw xmm14, xmm14, 0xB1
1202 pshuflw xmm15, xmm15, 0xB1
1203 pshufhw xmm15, xmm15, 0xB1
1204 movdqa xmm8, xmmword ptr [rsp+0x100]
1205 paddd xmm8, xmm12
1206 paddd xmm9, xmm13
1207 paddd xmm10, xmm14
1208 paddd xmm11, xmm15
1209 pxor xmm4, xmm8
1210 pxor xmm5, xmm9
1211 pxor xmm6, xmm10
1212 pxor xmm7, xmm11
1213 movdqa xmmword ptr [rsp+0x100], xmm8
1214 movdqa xmm8, xmm4
1215 psrld xmm8, 12
1216 pslld xmm4, 20
1217 por xmm4, xmm8
1218 movdqa xmm8, xmm5
1219 psrld xmm8, 12
1220 pslld xmm5, 20
1221 por xmm5, xmm8
1222 movdqa xmm8, xmm6
1223 psrld xmm8, 12
1224 pslld xmm6, 20
1225 por xmm6, xmm8
1226 movdqa xmm8, xmm7
1227 psrld xmm8, 12
1228 pslld xmm7, 20
1229 por xmm7, xmm8
1230 paddd xmm0, xmmword ptr [rsp+0xE0]
1231 paddd xmm1, xmmword ptr [rsp+0x50]
1232 paddd xmm2, xmmword ptr [rsp+0xC0]
1233 paddd xmm3, xmmword ptr [rsp+0x10]
1234 paddd xmm0, xmm4
1235 paddd xmm1, xmm5
1236 paddd xmm2, xmm6
1237 paddd xmm3, xmm7
1238 pxor xmm12, xmm0
1239 pxor xmm13, xmm1
1240 pxor xmm14, xmm2
1241 pxor xmm15, xmm3
1242 movdqa xmm8, xmm12
1243 psrld xmm12, 8
1244 pslld xmm8, 24
1245 pxor xmm12, xmm8
1246 movdqa xmm8, xmm13
1247 psrld xmm13, 8
1248 pslld xmm8, 24
1249 pxor xmm13, xmm8
1250 movdqa xmm8, xmm14
1251 psrld xmm14, 8
1252 pslld xmm8, 24
1253 pxor xmm14, xmm8
1254 movdqa xmm8, xmm15
1255 psrld xmm15, 8
1256 pslld xmm8, 24
1257 pxor xmm15, xmm8
1258 movdqa xmm8, xmmword ptr [rsp+0x100]
1259 paddd xmm8, xmm12
1260 paddd xmm9, xmm13
1261 paddd xmm10, xmm14
1262 paddd xmm11, xmm15
1263 pxor xmm4, xmm8
1264 pxor xmm5, xmm9
1265 pxor xmm6, xmm10
1266 pxor xmm7, xmm11
1267 movdqa xmmword ptr [rsp+0x100], xmm8
1268 movdqa xmm8, xmm4
1269 psrld xmm8, 7
1270 pslld xmm4, 25
1271 por xmm4, xmm8
1272 movdqa xmm8, xmm5
1273 psrld xmm8, 7
1274 pslld xmm5, 25
1275 por xmm5, xmm8
1276 movdqa xmm8, xmm6
1277 psrld xmm8, 7
1278 pslld xmm6, 25
1279 por xmm6, xmm8
1280 movdqa xmm8, xmm7
1281 psrld xmm8, 7
1282 pslld xmm7, 25
1283 por xmm7, xmm8
1284 paddd xmm0, xmmword ptr [rsp+0xD0]
1285 paddd xmm1, xmmword ptr [rsp]
1286 paddd xmm2, xmmword ptr [rsp+0x20]
1287 paddd xmm3, xmmword ptr [rsp+0x40]
1288 paddd xmm0, xmm5
1289 paddd xmm1, xmm6
1290 paddd xmm2, xmm7
1291 paddd xmm3, xmm4
1292 pxor xmm15, xmm0
1293 pxor xmm12, xmm1
1294 pxor xmm13, xmm2
1295 pxor xmm14, xmm3
1296 pshuflw xmm15, xmm15, 0xB1
1297 pshufhw xmm15, xmm15, 0xB1
1298 pshuflw xmm12, xmm12, 0xB1
1299 pshufhw xmm12, xmm12, 0xB1
1300 pshuflw xmm13, xmm13, 0xB1
1301 pshufhw xmm13, xmm13, 0xB1
1302 pshuflw xmm14, xmm14, 0xB1
1303 pshufhw xmm14, xmm14, 0xB1
1304 paddd xmm10, xmm15
1305 paddd xmm11, xmm12
1306 movdqa xmm8, xmmword ptr [rsp+0x100]
1307 paddd xmm8, xmm13
1308 paddd xmm9, xmm14
1309 pxor xmm5, xmm10
1310 pxor xmm6, xmm11
1311 pxor xmm7, xmm8
1312 pxor xmm4, xmm9
1313 movdqa xmmword ptr [rsp+0x100], xmm8
1314 movdqa xmm8, xmm5
1315 psrld xmm8, 12
1316 pslld xmm5, 20
1317 por xmm5, xmm8
1318 movdqa xmm8, xmm6
1319 psrld xmm8, 12
1320 pslld xmm6, 20
1321 por xmm6, xmm8
1322 movdqa xmm8, xmm7
1323 psrld xmm8, 12
1324 pslld xmm7, 20
1325 por xmm7, xmm8
1326 movdqa xmm8, xmm4
1327 psrld xmm8, 12
1328 pslld xmm4, 20
1329 por xmm4, xmm8
1330 paddd xmm0, xmmword ptr [rsp+0x30]
1331 paddd xmm1, xmmword ptr [rsp+0xA0]
1332 paddd xmm2, xmmword ptr [rsp+0x60]
1333 paddd xmm3, xmmword ptr [rsp+0x70]
1334 paddd xmm0, xmm5
1335 paddd xmm1, xmm6
1336 paddd xmm2, xmm7
1337 paddd xmm3, xmm4
1338 pxor xmm15, xmm0
1339 pxor xmm12, xmm1
1340 pxor xmm13, xmm2
1341 pxor xmm14, xmm3
1342 movdqa xmm8, xmm15
1343 psrld xmm15, 8
1344 pslld xmm8, 24
1345 pxor xmm15, xmm8
1346 movdqa xmm8, xmm12
1347 psrld xmm12, 8
1348 pslld xmm8, 24
1349 pxor xmm12, xmm8
1350 movdqa xmm8, xmm13
1351 psrld xmm13, 8
1352 pslld xmm8, 24
1353 pxor xmm13, xmm8
1354 movdqa xmm8, xmm14
1355 psrld xmm14, 8
1356 pslld xmm8, 24
1357 pxor xmm14, xmm8
1358 paddd xmm10, xmm15
1359 paddd xmm11, xmm12
1360 movdqa xmm8, xmmword ptr [rsp+0x100]
1361 paddd xmm8, xmm13
1362 paddd xmm9, xmm14
1363 pxor xmm5, xmm10
1364 pxor xmm6, xmm11
1365 pxor xmm7, xmm8
1366 pxor xmm4, xmm9
1367 movdqa xmmword ptr [rsp+0x100], xmm8
1368 movdqa xmm8, xmm5
1369 psrld xmm8, 7
1370 pslld xmm5, 25
1371 por xmm5, xmm8
1372 movdqa xmm8, xmm6
1373 psrld xmm8, 7
1374 pslld xmm6, 25
1375 por xmm6, xmm8
1376 movdqa xmm8, xmm7
1377 psrld xmm8, 7
1378 pslld xmm7, 25
1379 por xmm7, xmm8
1380 movdqa xmm8, xmm4
1381 psrld xmm8, 7
1382 pslld xmm4, 25
1383 por xmm4, xmm8
1384 paddd xmm0, xmmword ptr [rsp+0xB0]
1385 paddd xmm1, xmmword ptr [rsp+0x50]
1386 paddd xmm2, xmmword ptr [rsp+0x10]
1387 paddd xmm3, xmmword ptr [rsp+0x80]
1388 paddd xmm0, xmm4
1389 paddd xmm1, xmm5
1390 paddd xmm2, xmm6
1391 paddd xmm3, xmm7
1392 pxor xmm12, xmm0
1393 pxor xmm13, xmm1
1394 pxor xmm14, xmm2
1395 pxor xmm15, xmm3
1396 pshuflw xmm12, xmm12, 0xB1
1397 pshufhw xmm12, xmm12, 0xB1
1398 pshuflw xmm13, xmm13, 0xB1
1399 pshufhw xmm13, xmm13, 0xB1
1400 pshuflw xmm14, xmm14, 0xB1
1401 pshufhw xmm14, xmm14, 0xB1
1402 pshuflw xmm15, xmm15, 0xB1
1403 pshufhw xmm15, xmm15, 0xB1
1404 movdqa xmm8, xmmword ptr [rsp+0x100]
1405 paddd xmm8, xmm12
1406 paddd xmm9, xmm13
1407 paddd xmm10, xmm14
1408 paddd xmm11, xmm15
1409 pxor xmm4, xmm8
1410 pxor xmm5, xmm9
1411 pxor xmm6, xmm10
1412 pxor xmm7, xmm11
1413 movdqa xmmword ptr [rsp+0x100], xmm8
1414 movdqa xmm8, xmm4
1415 psrld xmm8, 12
1416 pslld xmm4, 20
1417 por xmm4, xmm8
1418 movdqa xmm8, xmm5
1419 psrld xmm8, 12
1420 pslld xmm5, 20
1421 por xmm5, xmm8
1422 movdqa xmm8, xmm6
1423 psrld xmm8, 12
1424 pslld xmm6, 20
1425 por xmm6, xmm8
1426 movdqa xmm8, xmm7
1427 psrld xmm8, 12
1428 pslld xmm7, 20
1429 por xmm7, xmm8
1430 paddd xmm0, xmmword ptr [rsp+0xF0]
1431 paddd xmm1, xmmword ptr [rsp]
1432 paddd xmm2, xmmword ptr [rsp+0x90]
1433 paddd xmm3, xmmword ptr [rsp+0x60]
1434 paddd xmm0, xmm4
1435 paddd xmm1, xmm5
1436 paddd xmm2, xmm6
1437 paddd xmm3, xmm7
1438 pxor xmm12, xmm0
1439 pxor xmm13, xmm1
1440 pxor xmm14, xmm2
1441 pxor xmm15, xmm3
1442 movdqa xmm8, xmm12
1443 psrld xmm12, 8
1444 pslld xmm8, 24
1445 pxor xmm12, xmm8
1446 movdqa xmm8, xmm13
1447 psrld xmm13, 8
1448 pslld xmm8, 24
1449 pxor xmm13, xmm8
1450 movdqa xmm8, xmm14
1451 psrld xmm14, 8
1452 pslld xmm8, 24
1453 pxor xmm14, xmm8
1454 movdqa xmm8, xmm15
1455 psrld xmm15, 8
1456 pslld xmm8, 24
1457 pxor xmm15, xmm8
1458 movdqa xmm8, xmmword ptr [rsp+0x100]
1459 paddd xmm8, xmm12
1460 paddd xmm9, xmm13
1461 paddd xmm10, xmm14
1462 paddd xmm11, xmm15
1463 pxor xmm4, xmm8
1464 pxor xmm5, xmm9
1465 pxor xmm6, xmm10
1466 pxor xmm7, xmm11
1467 movdqa xmmword ptr [rsp+0x100], xmm8
1468 movdqa xmm8, xmm4
1469 psrld xmm8, 7
1470 pslld xmm4, 25
1471 por xmm4, xmm8
1472 movdqa xmm8, xmm5
1473 psrld xmm8, 7
1474 pslld xmm5, 25
1475 por xmm5, xmm8
1476 movdqa xmm8, xmm6
1477 psrld xmm8, 7
1478 pslld xmm6, 25
1479 por xmm6, xmm8
1480 movdqa xmm8, xmm7
1481 psrld xmm8, 7
1482 pslld xmm7, 25
1483 por xmm7, xmm8
1484 paddd xmm0, xmmword ptr [rsp+0xE0]
1485 paddd xmm1, xmmword ptr [rsp+0x20]
1486 paddd xmm2, xmmword ptr [rsp+0x30]
1487 paddd xmm3, xmmword ptr [rsp+0x70]
1488 paddd xmm0, xmm5
1489 paddd xmm1, xmm6
1490 paddd xmm2, xmm7
1491 paddd xmm3, xmm4
1492 pxor xmm15, xmm0
1493 pxor xmm12, xmm1
1494 pxor xmm13, xmm2
1495 pxor xmm14, xmm3
1496 pshuflw xmm15, xmm15, 0xB1
1497 pshufhw xmm15, xmm15, 0xB1
1498 pshuflw xmm12, xmm12, 0xB1
1499 pshufhw xmm12, xmm12, 0xB1
1500 pshuflw xmm13, xmm13, 0xB1
1501 pshufhw xmm13, xmm13, 0xB1
1502 pshuflw xmm14, xmm14, 0xB1
1503 pshufhw xmm14, xmm14, 0xB1
1504 paddd xmm10, xmm15
1505 paddd xmm11, xmm12
1506 movdqa xmm8, xmmword ptr [rsp+0x100]
1507 paddd xmm8, xmm13
1508 paddd xmm9, xmm14
1509 pxor xmm5, xmm10
1510 pxor xmm6, xmm11
1511 pxor xmm7, xmm8
1512 pxor xmm4, xmm9
1513 movdqa xmmword ptr [rsp+0x100], xmm8
1514 movdqa xmm8, xmm5
1515 psrld xmm8, 12
1516 pslld xmm5, 20
1517 por xmm5, xmm8
1518 movdqa xmm8, xmm6
1519 psrld xmm8, 12
1520 pslld xmm6, 20
1521 por xmm6, xmm8
1522 movdqa xmm8, xmm7
1523 psrld xmm8, 12
1524 pslld xmm7, 20
1525 por xmm7, xmm8
1526 movdqa xmm8, xmm4
1527 psrld xmm8, 12
1528 pslld xmm4, 20
1529 por xmm4, xmm8
1530 paddd xmm0, xmmword ptr [rsp+0xA0]
1531 paddd xmm1, xmmword ptr [rsp+0xC0]
1532 paddd xmm2, xmmword ptr [rsp+0x40]
1533 paddd xmm3, xmmword ptr [rsp+0xD0]
1534 paddd xmm0, xmm5
1535 paddd xmm1, xmm6
1536 paddd xmm2, xmm7
1537 paddd xmm3, xmm4
1538 pxor xmm15, xmm0
1539 pxor xmm12, xmm1
1540 pxor xmm13, xmm2
1541 pxor xmm14, xmm3
1542 movdqa xmm8, xmm15
1543 psrld xmm15, 8
1544 pslld xmm8, 24
1545 pxor xmm15, xmm8
1546 movdqa xmm8, xmm12
1547 psrld xmm12, 8
1548 pslld xmm8, 24
1549 pxor xmm12, xmm8
1550 movdqa xmm8, xmm13
1551 psrld xmm13, 8
1552 pslld xmm8, 24
1553 pxor xmm13, xmm8
1554 movdqa xmm8, xmm14
1555 psrld xmm14, 8
1556 pslld xmm8, 24
1557 pxor xmm14, xmm8
1558 paddd xmm10, xmm15
1559 paddd xmm11, xmm12
1560 movdqa xmm8, xmmword ptr [rsp+0x100]
1561 paddd xmm8, xmm13
1562 paddd xmm9, xmm14
1563 pxor xmm5, xmm10
1564 pxor xmm6, xmm11
1565 pxor xmm7, xmm8
1566 pxor xmm4, xmm9
1567 pxor xmm0, xmm8
1568 pxor xmm1, xmm9
1569 pxor xmm2, xmm10
1570 pxor xmm3, xmm11
1571 movdqa xmm8, xmm5
1572 psrld xmm8, 7
1573 pslld xmm5, 25
1574 por xmm5, xmm8
1575 movdqa xmm8, xmm6
1576 psrld xmm8, 7
1577 pslld xmm6, 25
1578 por xmm6, xmm8
1579 movdqa xmm8, xmm7
1580 psrld xmm8, 7
1581 pslld xmm7, 25
1582 por xmm7, xmm8
1583 movdqa xmm8, xmm4
1584 psrld xmm8, 7
1585 pslld xmm4, 25
1586 por xmm4, xmm8
1587 pxor xmm4, xmm12
1588 pxor xmm5, xmm13
1589 pxor xmm6, xmm14
1590 pxor xmm7, xmm15
1591 mov eax, r13d
1592 jne 9b
1593 movdqa xmm9, xmm0
1594 punpckldq xmm0, xmm1
1595 punpckhdq xmm9, xmm1
1596 movdqa xmm11, xmm2
1597 punpckldq xmm2, xmm3
1598 punpckhdq xmm11, xmm3
1599 movdqa xmm1, xmm0
1600 punpcklqdq xmm0, xmm2
1601 punpckhqdq xmm1, xmm2
1602 movdqa xmm3, xmm9
1603 punpcklqdq xmm9, xmm11
1604 punpckhqdq xmm3, xmm11
1605 movdqu xmmword ptr [rbx], xmm0
1606 movdqu xmmword ptr [rbx+0x20], xmm1
1607 movdqu xmmword ptr [rbx+0x40], xmm9
1608 movdqu xmmword ptr [rbx+0x60], xmm3
1609 movdqa xmm9, xmm4
1610 punpckldq xmm4, xmm5
1611 punpckhdq xmm9, xmm5
1612 movdqa xmm11, xmm6
1613 punpckldq xmm6, xmm7
1614 punpckhdq xmm11, xmm7
1615 movdqa xmm5, xmm4
1616 punpcklqdq xmm4, xmm6
1617 punpckhqdq xmm5, xmm6
1618 movdqa xmm7, xmm9
1619 punpcklqdq xmm9, xmm11
1620 punpckhqdq xmm7, xmm11
1621 movdqu xmmword ptr [rbx+0x10], xmm4
1622 movdqu xmmword ptr [rbx+0x30], xmm5
1623 movdqu xmmword ptr [rbx+0x50], xmm9
1624 movdqu xmmword ptr [rbx+0x70], xmm7
1625 movdqa xmm1, xmmword ptr [rsp+0x110]
1626 movdqa xmm0, xmm1
1627 paddd xmm1, xmmword ptr [rsp+0x150]
1628 movdqa xmmword ptr [rsp+0x110], xmm1
1629 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
1630 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
1631 pcmpgtd xmm0, xmm1
1632 movdqa xmm1, xmmword ptr [rsp+0x120]
1633 psubd xmm1, xmm0
1634 movdqa xmmword ptr [rsp+0x120], xmm1
1635 add rbx, 128
1636 add rdi, 32
1637 sub rsi, 4
1638 cmp rsi, 4
1639 jnc 2b
1640 test rsi, rsi
1641 jne 3f
16424:
1643 movdqa xmm6, xmmword ptr [rsp+0x170]
1644 movdqa xmm7, xmmword ptr [rsp+0x180]
1645 movdqa xmm8, xmmword ptr [rsp+0x190]
1646 movdqa xmm9, xmmword ptr [rsp+0x1A0]
1647 movdqa xmm10, xmmword ptr [rsp+0x1B0]
1648 movdqa xmm11, xmmword ptr [rsp+0x1C0]
1649 movdqa xmm12, xmmword ptr [rsp+0x1D0]
1650 movdqa xmm13, xmmword ptr [rsp+0x1E0]
1651 movdqa xmm14, xmmword ptr [rsp+0x1F0]
1652 movdqa xmm15, xmmword ptr [rsp+0x200]
1653 mov rsp, rbp
1654 pop rbp
1655 pop rbx
1656 pop rdi
1657 pop rsi
1658 pop r12
1659 pop r13
1660 pop r14
1661 pop r15
1662 ret
1663.p2align 5
16643:
1665 test esi, 0x2
1666 je 3f
1667 movups xmm0, xmmword ptr [rcx]
1668 movups xmm1, xmmword ptr [rcx+0x10]
1669 movaps xmm8, xmm0
1670 movaps xmm9, xmm1
1671 movd xmm13, dword ptr [rsp+0x110]
1672 movd xmm14, dword ptr [rsp+0x120]
1673 punpckldq xmm13, xmm14
1674 movaps xmmword ptr [rsp], xmm13
1675 movd xmm14, dword ptr [rsp+0x114]
1676 movd xmm13, dword ptr [rsp+0x124]
1677 punpckldq xmm14, xmm13
1678 movaps xmmword ptr [rsp+0x10], xmm14
1679 mov r8, qword ptr [rdi]
1680 mov r9, qword ptr [rdi+0x8]
1681 movzx eax, byte ptr [rbp+0x80]
1682 or eax, r13d
1683 xor edx, edx
16842:
1685 mov r14d, eax
1686 or eax, r12d
1687 add rdx, 64
1688 cmp rdx, r15
1689 cmovne eax, r14d
1690 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1691 movaps xmm10, xmm2
1692 movups xmm4, xmmword ptr [r8+rdx-0x40]
1693 movups xmm5, xmmword ptr [r8+rdx-0x30]
1694 movaps xmm3, xmm4
1695 shufps xmm4, xmm5, 136
1696 shufps xmm3, xmm5, 221
1697 movaps xmm5, xmm3
1698 movups xmm6, xmmword ptr [r8+rdx-0x20]
1699 movups xmm7, xmmword ptr [r8+rdx-0x10]
1700 movaps xmm3, xmm6
1701 shufps xmm6, xmm7, 136
1702 pshufd xmm6, xmm6, 0x93
1703 shufps xmm3, xmm7, 221
1704 pshufd xmm7, xmm3, 0x93
1705 movups xmm12, xmmword ptr [r9+rdx-0x40]
1706 movups xmm13, xmmword ptr [r9+rdx-0x30]
1707 movaps xmm11, xmm12
1708 shufps xmm12, xmm13, 136
1709 shufps xmm11, xmm13, 221
1710 movaps xmm13, xmm11
1711 movups xmm14, xmmword ptr [r9+rdx-0x20]
1712 movups xmm15, xmmword ptr [r9+rdx-0x10]
1713 movaps xmm11, xmm14
1714 shufps xmm14, xmm15, 136
1715 pshufd xmm14, xmm14, 0x93
1716 shufps xmm11, xmm15, 221
1717 pshufd xmm15, xmm11, 0x93
1718 shl rax, 0x20
1719 or rax, 0x40
1720 movq xmm3, rax
1721 movdqa xmmword ptr [rsp+0x20], xmm3
1722 movaps xmm3, xmmword ptr [rsp]
1723 movaps xmm11, xmmword ptr [rsp+0x10]
1724 punpcklqdq xmm3, xmmword ptr [rsp+0x20]
1725 punpcklqdq xmm11, xmmword ptr [rsp+0x20]
1726 mov al, 7
17279:
1728 paddd xmm0, xmm4
1729 paddd xmm8, xmm12
1730 movaps xmmword ptr [rsp+0x20], xmm4
1731 movaps xmmword ptr [rsp+0x30], xmm12
1732 paddd xmm0, xmm1
1733 paddd xmm8, xmm9
1734 pxor xmm3, xmm0
1735 pxor xmm11, xmm8
1736 pshuflw xmm3, xmm3, 0xB1
1737 pshufhw xmm3, xmm3, 0xB1
1738 pshuflw xmm11, xmm11, 0xB1
1739 pshufhw xmm11, xmm11, 0xB1
1740 paddd xmm2, xmm3
1741 paddd xmm10, xmm11
1742 pxor xmm1, xmm2
1743 pxor xmm9, xmm10
1744 movdqa xmm4, xmm1
1745 pslld xmm1, 20
1746 psrld xmm4, 12
1747 por xmm1, xmm4
1748 movdqa xmm4, xmm9
1749 pslld xmm9, 20
1750 psrld xmm4, 12
1751 por xmm9, xmm4
1752 paddd xmm0, xmm5
1753 paddd xmm8, xmm13
1754 movaps xmmword ptr [rsp+0x40], xmm5
1755 movaps xmmword ptr [rsp+0x50], xmm13
1756 paddd xmm0, xmm1
1757 paddd xmm8, xmm9
1758 pxor xmm3, xmm0
1759 pxor xmm11, xmm8
1760 movdqa xmm13, xmm3
1761 psrld xmm3, 8
1762 pslld xmm13, 24
1763 pxor xmm3, xmm13
1764 movdqa xmm13, xmm11
1765 psrld xmm11, 8
1766 pslld xmm13, 24
1767 pxor xmm11, xmm13
1768 paddd xmm2, xmm3
1769 paddd xmm10, xmm11
1770 pxor xmm1, xmm2
1771 pxor xmm9, xmm10
1772 movdqa xmm4, xmm1
1773 pslld xmm1, 25
1774 psrld xmm4, 7
1775 por xmm1, xmm4
1776 movdqa xmm4, xmm9
1777 pslld xmm9, 25
1778 psrld xmm4, 7
1779 por xmm9, xmm4
1780 pshufd xmm0, xmm0, 0x93
1781 pshufd xmm8, xmm8, 0x93
1782 pshufd xmm3, xmm3, 0x4E
1783 pshufd xmm11, xmm11, 0x4E
1784 pshufd xmm2, xmm2, 0x39
1785 pshufd xmm10, xmm10, 0x39
1786 paddd xmm0, xmm6
1787 paddd xmm8, xmm14
1788 paddd xmm0, xmm1
1789 paddd xmm8, xmm9
1790 pxor xmm3, xmm0
1791 pxor xmm11, xmm8
1792 pshuflw xmm3, xmm3, 0xB1
1793 pshufhw xmm3, xmm3, 0xB1
1794 pshuflw xmm11, xmm11, 0xB1
1795 pshufhw xmm11, xmm11, 0xB1
1796 paddd xmm2, xmm3
1797 paddd xmm10, xmm11
1798 pxor xmm1, xmm2
1799 pxor xmm9, xmm10
1800 movdqa xmm4, xmm1
1801 pslld xmm1, 20
1802 psrld xmm4, 12
1803 por xmm1, xmm4
1804 movdqa xmm4, xmm9
1805 pslld xmm9, 20
1806 psrld xmm4, 12
1807 por xmm9, xmm4
1808 paddd xmm0, xmm7
1809 paddd xmm8, xmm15
1810 paddd xmm0, xmm1
1811 paddd xmm8, xmm9
1812 pxor xmm3, xmm0
1813 pxor xmm11, xmm8
1814 movdqa xmm13, xmm3
1815 psrld xmm3, 8
1816 pslld xmm13, 24
1817 pxor xmm3, xmm13
1818 movdqa xmm13, xmm11
1819 psrld xmm11, 8
1820 pslld xmm13, 24
1821 pxor xmm11, xmm13
1822 paddd xmm2, xmm3
1823 paddd xmm10, xmm11
1824 pxor xmm1, xmm2
1825 pxor xmm9, xmm10
1826 movdqa xmm4, xmm1
1827 pslld xmm1, 25
1828 psrld xmm4, 7
1829 por xmm1, xmm4
1830 movdqa xmm4, xmm9
1831 pslld xmm9, 25
1832 psrld xmm4, 7
1833 por xmm9, xmm4
1834 pshufd xmm0, xmm0, 0x39
1835 pshufd xmm8, xmm8, 0x39
1836 pshufd xmm3, xmm3, 0x4E
1837 pshufd xmm11, xmm11, 0x4E
1838 pshufd xmm2, xmm2, 0x93
1839 pshufd xmm10, xmm10, 0x93
1840 dec al
1841 je 9f
1842 movdqa xmm12, xmmword ptr [rsp+0x20]
1843 movdqa xmm5, xmmword ptr [rsp+0x40]
1844 pshufd xmm13, xmm12, 0x0F
1845 shufps xmm12, xmm5, 214
1846 pshufd xmm4, xmm12, 0x39
1847 movdqa xmm12, xmm6
1848 shufps xmm12, xmm7, 250
1849 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
1850 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
1851 por xmm13, xmm12
1852 movdqa xmmword ptr [rsp+0x20], xmm13
1853 movdqa xmm12, xmm7
1854 punpcklqdq xmm12, xmm5
1855 movdqa xmm13, xmm6
1856 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
1857 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
1858 por xmm12, xmm13
1859 pshufd xmm12, xmm12, 0x78
1860 punpckhdq xmm5, xmm7
1861 punpckldq xmm6, xmm5
1862 pshufd xmm7, xmm6, 0x1E
1863 movdqa xmmword ptr [rsp+0x40], xmm12
1864 movdqa xmm5, xmmword ptr [rsp+0x30]
1865 movdqa xmm13, xmmword ptr [rsp+0x50]
1866 pshufd xmm6, xmm5, 0x0F
1867 shufps xmm5, xmm13, 214
1868 pshufd xmm12, xmm5, 0x39
1869 movdqa xmm5, xmm14
1870 shufps xmm5, xmm15, 250
1871 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
1872 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
1873 por xmm6, xmm5
1874 movdqa xmm5, xmm15
1875 punpcklqdq xmm5, xmm13
1876 movdqa xmmword ptr [rsp+0x30], xmm2
1877 movdqa xmm2, xmm14
1878 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
1879 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
1880 por xmm5, xmm2
1881 movdqa xmm2, xmmword ptr [rsp+0x30]
1882 pshufd xmm5, xmm5, 0x78
1883 punpckhdq xmm13, xmm15
1884 punpckldq xmm14, xmm13
1885 pshufd xmm15, xmm14, 0x1E
1886 movdqa xmm13, xmm6
1887 movdqa xmm14, xmm5
1888 movdqa xmm5, xmmword ptr [rsp+0x20]
1889 movdqa xmm6, xmmword ptr [rsp+0x40]
1890 jmp 9b
18919:
1892 pxor xmm0, xmm2
1893 pxor xmm1, xmm3
1894 pxor xmm8, xmm10
1895 pxor xmm9, xmm11
1896 mov eax, r13d
1897 cmp rdx, r15
1898 jne 2b
1899 movups xmmword ptr [rbx], xmm0
1900 movups xmmword ptr [rbx+0x10], xmm1
1901 movups xmmword ptr [rbx+0x20], xmm8
1902 movups xmmword ptr [rbx+0x30], xmm9
1903 mov eax, dword ptr [rsp+0x130]
1904 neg eax
1905 mov r10d, dword ptr [rsp+0x110+8*rax]
1906 mov r11d, dword ptr [rsp+0x120+8*rax]
1907 mov dword ptr [rsp+0x110], r10d
1908 mov dword ptr [rsp+0x120], r11d
1909 add rdi, 16
1910 add rbx, 64
1911 sub rsi, 2
19123:
1913 test esi, 0x1
1914 je 4b
1915 movups xmm0, xmmword ptr [rcx]
1916 movups xmm1, xmmword ptr [rcx+0x10]
1917 movd xmm13, dword ptr [rsp+0x110]
1918 movd xmm14, dword ptr [rsp+0x120]
1919 punpckldq xmm13, xmm14
1920 mov r8, qword ptr [rdi]
1921 movzx eax, byte ptr [rbp+0x80]
1922 or eax, r13d
1923 xor edx, edx
19242:
1925 mov r14d, eax
1926 or eax, r12d
1927 add rdx, 64
1928 cmp rdx, r15
1929 cmovne eax, r14d
1930 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1931 shl rax, 32
1932 or rax, 64
1933 movq xmm12, rax
1934 movdqa xmm3, xmm13
1935 punpcklqdq xmm3, xmm12
1936 movups xmm4, xmmword ptr [r8+rdx-0x40]
1937 movups xmm5, xmmword ptr [r8+rdx-0x30]
1938 movaps xmm8, xmm4
1939 shufps xmm4, xmm5, 136
1940 shufps xmm8, xmm5, 221
1941 movaps xmm5, xmm8
1942 movups xmm6, xmmword ptr [r8+rdx-0x20]
1943 movups xmm7, xmmword ptr [r8+rdx-0x10]
1944 movaps xmm8, xmm6
1945 shufps xmm6, xmm7, 136
1946 pshufd xmm6, xmm6, 0x93
1947 shufps xmm8, xmm7, 221
1948 pshufd xmm7, xmm8, 0x93
1949 mov al, 7
19509:
1951 paddd xmm0, xmm4
1952 paddd xmm0, xmm1
1953 pxor xmm3, xmm0
1954 pshuflw xmm3, xmm3, 0xB1
1955 pshufhw xmm3, xmm3, 0xB1
1956 paddd xmm2, xmm3
1957 pxor xmm1, xmm2
1958 movdqa xmm11, xmm1
1959 pslld xmm1, 20
1960 psrld xmm11, 12
1961 por xmm1, xmm11
1962 paddd xmm0, xmm5
1963 paddd xmm0, xmm1
1964 pxor xmm3, xmm0
1965 movdqa xmm14, xmm3
1966 psrld xmm3, 8
1967 pslld xmm14, 24
1968 pxor xmm3, xmm14
1969 paddd xmm2, xmm3
1970 pxor xmm1, xmm2
1971 movdqa xmm11, xmm1
1972 pslld xmm1, 25
1973 psrld xmm11, 7
1974 por xmm1, xmm11
1975 pshufd xmm0, xmm0, 0x93
1976 pshufd xmm3, xmm3, 0x4E
1977 pshufd xmm2, xmm2, 0x39
1978 paddd xmm0, xmm6
1979 paddd xmm0, xmm1
1980 pxor xmm3, xmm0
1981 pshuflw xmm3, xmm3, 0xB1
1982 pshufhw xmm3, xmm3, 0xB1
1983 paddd xmm2, xmm3
1984 pxor xmm1, xmm2
1985 movdqa xmm11, xmm1
1986 pslld xmm1, 20
1987 psrld xmm11, 12
1988 por xmm1, xmm11
1989 paddd xmm0, xmm7
1990 paddd xmm0, xmm1
1991 pxor xmm3, xmm0
1992 movdqa xmm14, xmm3
1993 psrld xmm3, 8
1994 pslld xmm14, 24
1995 pxor xmm3, xmm14
1996 paddd xmm2, xmm3
1997 pxor xmm1, xmm2
1998 movdqa xmm11, xmm1
1999 pslld xmm1, 25
2000 psrld xmm11, 7
2001 por xmm1, xmm11
2002 pshufd xmm0, xmm0, 0x39
2003 pshufd xmm3, xmm3, 0x4E
2004 pshufd xmm2, xmm2, 0x93
2005 dec al
2006 jz 9f
2007 movdqa xmm8, xmm4
2008 shufps xmm8, xmm5, 214
2009 pshufd xmm9, xmm4, 0x0F
2010 pshufd xmm4, xmm8, 0x39
2011 movdqa xmm8, xmm6
2012 shufps xmm8, xmm7, 250
2013 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2014 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2015 por xmm9, xmm8
2016 movdqa xmm8, xmm7
2017 punpcklqdq xmm8, xmm5
2018 movdqa xmm10, xmm6
2019 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2020 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2021 por xmm8, xmm10
2022 pshufd xmm8, xmm8, 0x78
2023 punpckhdq xmm5, xmm7
2024 punpckldq xmm6, xmm5
2025 pshufd xmm7, xmm6, 0x1E
2026 movdqa xmm5, xmm9
2027 movdqa xmm6, xmm8
2028 jmp 9b
20299:
2030 pxor xmm0, xmm2
2031 pxor xmm1, xmm3
2032 mov eax, r13d
2033 cmp rdx, r15
2034 jne 2b
2035 movups xmmword ptr [rbx], xmm0
2036 movups xmmword ptr [rbx+0x10], xmm1
2037 jmp 4b
2038
2039.p2align 6
2040blake3_compress_in_place_sse2:
2041_blake3_compress_in_place_sse2:
2042 sub rsp, 120
2043 movdqa xmmword ptr [rsp], xmm6
2044 movdqa xmmword ptr [rsp+0x10], xmm7
2045 movdqa xmmword ptr [rsp+0x20], xmm8
2046 movdqa xmmword ptr [rsp+0x30], xmm9
2047 movdqa xmmword ptr [rsp+0x40], xmm11
2048 movdqa xmmword ptr [rsp+0x50], xmm14
2049 movdqa xmmword ptr [rsp+0x60], xmm15
2050 movups xmm0, xmmword ptr [rcx]
2051 movups xmm1, xmmword ptr [rcx+0x10]
2052 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
2053 movzx eax, byte ptr [rsp+0xA0]
2054 movzx r8d, r8b
2055 shl rax, 32
2056 add r8, rax
2057 movq xmm3, r9
2058 movq xmm4, r8
2059 punpcklqdq xmm3, xmm4
2060 movups xmm4, xmmword ptr [rdx]
2061 movups xmm5, xmmword ptr [rdx+0x10]
2062 movaps xmm8, xmm4
2063 shufps xmm4, xmm5, 136
2064 shufps xmm8, xmm5, 221
2065 movaps xmm5, xmm8
2066 movups xmm6, xmmword ptr [rdx+0x20]
2067 movups xmm7, xmmword ptr [rdx+0x30]
2068 movaps xmm8, xmm6
2069 shufps xmm6, xmm7, 136
2070 pshufd xmm6, xmm6, 0x93
2071 shufps xmm8, xmm7, 221
2072 pshufd xmm7, xmm8, 0x93
2073 mov al, 7
20749:
2075 paddd xmm0, xmm4
2076 paddd xmm0, xmm1
2077 pxor xmm3, xmm0
2078 pshuflw xmm3, xmm3, 0xB1
2079 pshufhw xmm3, xmm3, 0xB1
2080 paddd xmm2, xmm3
2081 pxor xmm1, xmm2
2082 movdqa xmm11, xmm1
2083 pslld xmm1, 20
2084 psrld xmm11, 12
2085 por xmm1, xmm11
2086 paddd xmm0, xmm5
2087 paddd xmm0, xmm1
2088 pxor xmm3, xmm0
2089 movdqa xmm14, xmm3
2090 psrld xmm3, 8
2091 pslld xmm14, 24
2092 pxor xmm3, xmm14
2093 paddd xmm2, xmm3
2094 pxor xmm1, xmm2
2095 movdqa xmm11, xmm1
2096 pslld xmm1, 25
2097 psrld xmm11, 7
2098 por xmm1, xmm11
2099 pshufd xmm0, xmm0, 0x93
2100 pshufd xmm3, xmm3, 0x4E
2101 pshufd xmm2, xmm2, 0x39
2102 paddd xmm0, xmm6
2103 paddd xmm0, xmm1
2104 pxor xmm3, xmm0
2105 pshuflw xmm3, xmm3, 0xB1
2106 pshufhw xmm3, xmm3, 0xB1
2107 paddd xmm2, xmm3
2108 pxor xmm1, xmm2
2109 movdqa xmm11, xmm1
2110 pslld xmm1, 20
2111 psrld xmm11, 12
2112 por xmm1, xmm11
2113 paddd xmm0, xmm7
2114 paddd xmm0, xmm1
2115 pxor xmm3, xmm0
2116 movdqa xmm14, xmm3
2117 psrld xmm3, 8
2118 pslld xmm14, 24
2119 pxor xmm3, xmm14
2120 paddd xmm2, xmm3
2121 pxor xmm1, xmm2
2122 movdqa xmm11, xmm1
2123 pslld xmm1, 25
2124 psrld xmm11, 7
2125 por xmm1, xmm11
2126 pshufd xmm0, xmm0, 0x39
2127 pshufd xmm3, xmm3, 0x4E
2128 pshufd xmm2, xmm2, 0x93
2129 dec al
2130 jz 9f
2131 movdqa xmm8, xmm4
2132 shufps xmm8, xmm5, 214
2133 pshufd xmm9, xmm4, 0x0F
2134 pshufd xmm4, xmm8, 0x39
2135 movdqa xmm8, xmm6
2136 shufps xmm8, xmm7, 250
2137 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2138 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2139 por xmm9, xmm8
2140 movdqa xmm8, xmm7
2141 punpcklqdq xmm8, xmm5
2142 movdqa xmm14, xmm6
2143 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2144 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2145 por xmm8, xmm14
2146 pshufd xmm8, xmm8, 0x78
2147 punpckhdq xmm5, xmm7
2148 punpckldq xmm6, xmm5
2149 pshufd xmm7, xmm6, 0x1E
2150 movdqa xmm5, xmm9
2151 movdqa xmm6, xmm8
2152 jmp 9b
21539:
2154 pxor xmm0, xmm2
2155 pxor xmm1, xmm3
2156 movups xmmword ptr [rcx], xmm0
2157 movups xmmword ptr [rcx+0x10], xmm1
2158 movdqa xmm6, xmmword ptr [rsp]
2159 movdqa xmm7, xmmword ptr [rsp+0x10]
2160 movdqa xmm8, xmmword ptr [rsp+0x20]
2161 movdqa xmm9, xmmword ptr [rsp+0x30]
2162 movdqa xmm11, xmmword ptr [rsp+0x40]
2163 movdqa xmm14, xmmword ptr [rsp+0x50]
2164 movdqa xmm15, xmmword ptr [rsp+0x60]
2165 add rsp, 120
2166 ret
2167
2168
2169.p2align 6
2170_blake3_compress_xof_sse2:
2171blake3_compress_xof_sse2:
2172 sub rsp, 120
2173 movdqa xmmword ptr [rsp], xmm6
2174 movdqa xmmword ptr [rsp+0x10], xmm7
2175 movdqa xmmword ptr [rsp+0x20], xmm8
2176 movdqa xmmword ptr [rsp+0x30], xmm9
2177 movdqa xmmword ptr [rsp+0x40], xmm11
2178 movdqa xmmword ptr [rsp+0x50], xmm14
2179 movdqa xmmword ptr [rsp+0x60], xmm15
2180 movups xmm0, xmmword ptr [rcx]
2181 movups xmm1, xmmword ptr [rcx+0x10]
2182 movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
2183 movzx eax, byte ptr [rsp+0xA0]
2184 movzx r8d, r8b
2185 mov r10, qword ptr [rsp+0xA8]
2186 shl rax, 32
2187 add r8, rax
2188 movq xmm3, r9
2189 movq xmm4, r8
2190 punpcklqdq xmm3, xmm4
2191 movups xmm4, xmmword ptr [rdx]
2192 movups xmm5, xmmword ptr [rdx+0x10]
2193 movaps xmm8, xmm4
2194 shufps xmm4, xmm5, 136
2195 shufps xmm8, xmm5, 221
2196 movaps xmm5, xmm8
2197 movups xmm6, xmmword ptr [rdx+0x20]
2198 movups xmm7, xmmword ptr [rdx+0x30]
2199 movaps xmm8, xmm6
2200 shufps xmm6, xmm7, 136
2201 pshufd xmm6, xmm6, 0x93
2202 shufps xmm8, xmm7, 221
2203 pshufd xmm7, xmm8, 0x93
2204 mov al, 7
22059:
2206 paddd xmm0, xmm4
2207 paddd xmm0, xmm1
2208 pxor xmm3, xmm0
2209 pshuflw xmm3, xmm3, 0xB1
2210 pshufhw xmm3, xmm3, 0xB1
2211 paddd xmm2, xmm3
2212 pxor xmm1, xmm2
2213 movdqa xmm11, xmm1
2214 pslld xmm1, 20
2215 psrld xmm11, 12
2216 por xmm1, xmm11
2217 paddd xmm0, xmm5
2218 paddd xmm0, xmm1
2219 pxor xmm3, xmm0
2220 movdqa xmm14, xmm3
2221 psrld xmm3, 8
2222 pslld xmm14, 24
2223 pxor xmm3, xmm14
2224 paddd xmm2, xmm3
2225 pxor xmm1, xmm2
2226 movdqa xmm11, xmm1
2227 pslld xmm1, 25
2228 psrld xmm11, 7
2229 por xmm1, xmm11
2230 pshufd xmm0, xmm0, 0x93
2231 pshufd xmm3, xmm3, 0x4E
2232 pshufd xmm2, xmm2, 0x39
2233 paddd xmm0, xmm6
2234 paddd xmm0, xmm1
2235 pxor xmm3, xmm0
2236 pshuflw xmm3, xmm3, 0xB1
2237 pshufhw xmm3, xmm3, 0xB1
2238 paddd xmm2, xmm3
2239 pxor xmm1, xmm2
2240 movdqa xmm11, xmm1
2241 pslld xmm1, 20
2242 psrld xmm11, 12
2243 por xmm1, xmm11
2244 paddd xmm0, xmm7
2245 paddd xmm0, xmm1
2246 pxor xmm3, xmm0
2247 movdqa xmm14, xmm3
2248 psrld xmm3, 8
2249 pslld xmm14, 24
2250 pxor xmm3, xmm14
2251 paddd xmm2, xmm3
2252 pxor xmm1, xmm2
2253 movdqa xmm11, xmm1
2254 pslld xmm1, 25
2255 psrld xmm11, 7
2256 por xmm1, xmm11
2257 pshufd xmm0, xmm0, 0x39
2258 pshufd xmm3, xmm3, 0x4E
2259 pshufd xmm2, xmm2, 0x93
2260 dec al
2261 jz 9f
2262 movdqa xmm8, xmm4
2263 shufps xmm8, xmm5, 214
2264 pshufd xmm9, xmm4, 0x0F
2265 pshufd xmm4, xmm8, 0x39
2266 movdqa xmm8, xmm6
2267 shufps xmm8, xmm7, 250
2268 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
2269 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
2270 por xmm9, xmm8
2271 movdqa xmm8, xmm7
2272 punpcklqdq xmm8, xmm5
2273 movdqa xmm14, xmm6
2274 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
2275 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
2276 por xmm8, xmm14
2277 pshufd xmm8, xmm8, 0x78
2278 punpckhdq xmm5, xmm7
2279 punpckldq xmm6, xmm5
2280 pshufd xmm7, xmm6, 0x1E
2281 movdqa xmm5, xmm9
2282 movdqa xmm6, xmm8
2283 jmp 9b
22849:
2285 movdqu xmm4, xmmword ptr [rcx]
2286 movdqu xmm5, xmmword ptr [rcx+0x10]
2287 pxor xmm0, xmm2
2288 pxor xmm1, xmm3
2289 pxor xmm2, xmm4
2290 pxor xmm3, xmm5
2291 movups xmmword ptr [r10], xmm0
2292 movups xmmword ptr [r10+0x10], xmm1
2293 movups xmmword ptr [r10+0x20], xmm2
2294 movups xmmword ptr [r10+0x30], xmm3
2295 movdqa xmm6, xmmword ptr [rsp]
2296 movdqa xmm7, xmmword ptr [rsp+0x10]
2297 movdqa xmm8, xmmword ptr [rsp+0x20]
2298 movdqa xmm9, xmmword ptr [rsp+0x30]
2299 movdqa xmm11, xmmword ptr [rsp+0x40]
2300 movdqa xmm14, xmmword ptr [rsp+0x50]
2301 movdqa xmm15, xmmword ptr [rsp+0x60]
2302 add rsp, 120
2303 ret
2304
2305
2306.section .rodata
2307.p2align 6
2308BLAKE3_IV:
2309 .long 0x6A09E667, 0xBB67AE85
2310 .long 0x3C6EF372, 0xA54FF53A
2311ADD0:
2312 .long 0, 1, 2, 3
2313ADD1:
2314 .long 4, 4, 4, 4
2315BLAKE3_IV_0:
2316 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
2317BLAKE3_IV_1:
2318 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
2319BLAKE3_IV_2:
2320 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
2321BLAKE3_IV_3:
2322 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
2323BLAKE3_BLOCK_LEN:
2324 .long 64, 64, 64, 64
2325CMP_MSB_MASK:
2326 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
2327PBLENDW_0x33_MASK:
2328 .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
2329PBLENDW_0xCC_MASK:
2330 .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
2331PBLENDW_0x3F_MASK:
2332 .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
2333PBLENDW_0xC0_MASK:
2334 .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
2335

source code of llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S