1/* wcscmp optimized with SSE2.
2 Copyright (C) 2018-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <isa-level.h>
20
21/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */
22#if ISA_SHOULD_BUILD (2)
23# include <sysdep.h>
24
25/* Needed to get right name. */
26# define USE_AS_WCSCMP
27# define STRCMP_ISA _sse2
28# include "strcmp-naming.h"
29
30/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
31
32 .text
33ENTRY (STRCMP)
34/*
35 * This implementation uses SSE to compare up to 16 bytes at a time.
36*/
37 mov %esi, %eax
38 mov %edi, %edx
39 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
40 mov %al, %ch
41 mov %dl, %cl
42 and $63, %eax /* rsi alignment in cache line */
43 and $63, %edx /* rdi alignment in cache line */
44 and $15, %cl
45 jz L(continue_00)
46 cmp $16, %edx
47 jb L(continue_0)
48 cmp $32, %edx
49 jb L(continue_16)
50 cmp $48, %edx
51 jb L(continue_32)
52
53L(continue_48):
54 and $15, %ch
55 jz L(continue_48_00)
56 cmp $16, %eax
57 jb L(continue_0_48)
58 cmp $32, %eax
59 jb L(continue_16_48)
60 cmp $48, %eax
61 jb L(continue_32_48)
62
63 .p2align 4
64L(continue_48_48):
65 mov (%rsi), %ecx
66 cmp %ecx, (%rdi)
67 jne L(nequal)
68 test %ecx, %ecx
69 jz L(equal)
70
71 mov 4(%rsi), %ecx
72 cmp %ecx, 4(%rdi)
73 jne L(nequal)
74 test %ecx, %ecx
75 jz L(equal)
76
77 mov 8(%rsi), %ecx
78 cmp %ecx, 8(%rdi)
79 jne L(nequal)
80 test %ecx, %ecx
81 jz L(equal)
82
83 mov 12(%rsi), %ecx
84 cmp %ecx, 12(%rdi)
85 jne L(nequal)
86 test %ecx, %ecx
87 jz L(equal)
88
89 movdqu 16(%rdi), %xmm1
90 movdqu 16(%rsi), %xmm2
91 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
92 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
93 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
94 pmovmskb %xmm1, %edx
95 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
96 jnz L(less4_double_words_16)
97
98 movdqu 32(%rdi), %xmm1
99 movdqu 32(%rsi), %xmm2
100 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
101 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
102 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
103 pmovmskb %xmm1, %edx
104 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
105 jnz L(less4_double_words_32)
106
107 movdqu 48(%rdi), %xmm1
108 movdqu 48(%rsi), %xmm2
109 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
110 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
111 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
112 pmovmskb %xmm1, %edx
113 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
114 jnz L(less4_double_words_48)
115
116 add $64, %rsi
117 add $64, %rdi
118 jmp L(continue_48_48)
119
120L(continue_0):
121 and $15, %ch
122 jz L(continue_0_00)
123 cmp $16, %eax
124 jb L(continue_0_0)
125 cmp $32, %eax
126 jb L(continue_0_16)
127 cmp $48, %eax
128 jb L(continue_0_32)
129
130 .p2align 4
131L(continue_0_48):
132 mov (%rsi), %ecx
133 cmp %ecx, (%rdi)
134 jne L(nequal)
135 test %ecx, %ecx
136 jz L(equal)
137
138 mov 4(%rsi), %ecx
139 cmp %ecx, 4(%rdi)
140 jne L(nequal)
141 test %ecx, %ecx
142 jz L(equal)
143
144 mov 8(%rsi), %ecx
145 cmp %ecx, 8(%rdi)
146 jne L(nequal)
147 test %ecx, %ecx
148 jz L(equal)
149
150 mov 12(%rsi), %ecx
151 cmp %ecx, 12(%rdi)
152 jne L(nequal)
153 test %ecx, %ecx
154 jz L(equal)
155
156 movdqu 16(%rdi), %xmm1
157 movdqu 16(%rsi), %xmm2
158 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
159 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
160 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
161 pmovmskb %xmm1, %edx
162 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
163 jnz L(less4_double_words_16)
164
165 movdqu 32(%rdi), %xmm1
166 movdqu 32(%rsi), %xmm2
167 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
168 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
169 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
170 pmovmskb %xmm1, %edx
171 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
172 jnz L(less4_double_words_32)
173
174 mov 48(%rsi), %ecx
175 cmp %ecx, 48(%rdi)
176 jne L(nequal)
177 test %ecx, %ecx
178 jz L(equal)
179
180 mov 52(%rsi), %ecx
181 cmp %ecx, 52(%rdi)
182 jne L(nequal)
183 test %ecx, %ecx
184 jz L(equal)
185
186 mov 56(%rsi), %ecx
187 cmp %ecx, 56(%rdi)
188 jne L(nequal)
189 test %ecx, %ecx
190 jz L(equal)
191
192 mov 60(%rsi), %ecx
193 cmp %ecx, 60(%rdi)
194 jne L(nequal)
195 test %ecx, %ecx
196 jz L(equal)
197
198 add $64, %rsi
199 add $64, %rdi
200 jmp L(continue_0_48)
201
202 .p2align 4
203L(continue_00):
204 and $15, %ch
205 jz L(continue_00_00)
206 cmp $16, %eax
207 jb L(continue_00_0)
208 cmp $32, %eax
209 jb L(continue_00_16)
210 cmp $48, %eax
211 jb L(continue_00_32)
212
213 .p2align 4
214L(continue_00_48):
215 pcmpeqd (%rdi), %xmm0
216 mov (%rdi), %eax
217 pmovmskb %xmm0, %ecx
218 test %ecx, %ecx
219 jnz L(less4_double_words1)
220
221 cmp (%rsi), %eax
222 jne L(nequal)
223
224 mov 4(%rdi), %eax
225 cmp 4(%rsi), %eax
226 jne L(nequal)
227
228 mov 8(%rdi), %eax
229 cmp 8(%rsi), %eax
230 jne L(nequal)
231
232 mov 12(%rdi), %eax
233 cmp 12(%rsi), %eax
234 jne L(nequal)
235
236 movdqu 16(%rsi), %xmm2
237 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
238 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
239 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
240 pmovmskb %xmm2, %edx
241 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
242 jnz L(less4_double_words_16)
243
244 movdqu 32(%rsi), %xmm2
245 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
246 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
247 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
248 pmovmskb %xmm2, %edx
249 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
250 jnz L(less4_double_words_32)
251
252 movdqu 48(%rsi), %xmm2
253 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
254 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */
255 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
256 pmovmskb %xmm2, %edx
257 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
258 jnz L(less4_double_words_48)
259
260 add $64, %rsi
261 add $64, %rdi
262 jmp L(continue_00_48)
263
264 .p2align 4
265L(continue_32):
266 and $15, %ch
267 jz L(continue_32_00)
268 cmp $16, %eax
269 jb L(continue_0_32)
270 cmp $32, %eax
271 jb L(continue_16_32)
272 cmp $48, %eax
273 jb L(continue_32_32)
274
275 .p2align 4
276L(continue_32_48):
277 mov (%rsi), %ecx
278 cmp %ecx, (%rdi)
279 jne L(nequal)
280 test %ecx, %ecx
281 jz L(equal)
282
283 mov 4(%rsi), %ecx
284 cmp %ecx, 4(%rdi)
285 jne L(nequal)
286 test %ecx, %ecx
287 jz L(equal)
288
289 mov 8(%rsi), %ecx
290 cmp %ecx, 8(%rdi)
291 jne L(nequal)
292 test %ecx, %ecx
293 jz L(equal)
294
295 mov 12(%rsi), %ecx
296 cmp %ecx, 12(%rdi)
297 jne L(nequal)
298 test %ecx, %ecx
299 jz L(equal)
300
301 mov 16(%rsi), %ecx
302 cmp %ecx, 16(%rdi)
303 jne L(nequal)
304 test %ecx, %ecx
305 jz L(equal)
306
307 mov 20(%rsi), %ecx
308 cmp %ecx, 20(%rdi)
309 jne L(nequal)
310 test %ecx, %ecx
311 jz L(equal)
312
313 mov 24(%rsi), %ecx
314 cmp %ecx, 24(%rdi)
315 jne L(nequal)
316 test %ecx, %ecx
317 jz L(equal)
318
319 mov 28(%rsi), %ecx
320 cmp %ecx, 28(%rdi)
321 jne L(nequal)
322 test %ecx, %ecx
323 jz L(equal)
324
325 movdqu 32(%rdi), %xmm1
326 movdqu 32(%rsi), %xmm2
327 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
328 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
329 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
330 pmovmskb %xmm1, %edx
331 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
332 jnz L(less4_double_words_32)
333
334 movdqu 48(%rdi), %xmm1
335 movdqu 48(%rsi), %xmm2
336 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
337 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
338 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
339 pmovmskb %xmm1, %edx
340 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
341 jnz L(less4_double_words_48)
342
343 add $64, %rsi
344 add $64, %rdi
345 jmp L(continue_32_48)
346
347 .p2align 4
348L(continue_16):
349 and $15, %ch
350 jz L(continue_16_00)
351 cmp $16, %eax
352 jb L(continue_0_16)
353 cmp $32, %eax
354 jb L(continue_16_16)
355 cmp $48, %eax
356 jb L(continue_16_32)
357
358 .p2align 4
359L(continue_16_48):
360 mov (%rsi), %ecx
361 cmp %ecx, (%rdi)
362 jne L(nequal)
363 test %ecx, %ecx
364 jz L(equal)
365
366 mov 4(%rsi), %ecx
367 cmp %ecx, 4(%rdi)
368 jne L(nequal)
369 test %ecx, %ecx
370 jz L(equal)
371
372 mov 8(%rsi), %ecx
373 cmp %ecx, 8(%rdi)
374 jne L(nequal)
375 test %ecx, %ecx
376 jz L(equal)
377
378 mov 12(%rsi), %ecx
379 cmp %ecx, 12(%rdi)
380 jne L(nequal)
381 test %ecx, %ecx
382 jz L(equal)
383
384 movdqu 16(%rdi), %xmm1
385 movdqu 16(%rsi), %xmm2
386 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
387 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
388 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
389 pmovmskb %xmm1, %edx
390 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
391 jnz L(less4_double_words_16)
392
393 mov 32(%rsi), %ecx
394 cmp %ecx, 32(%rdi)
395 jne L(nequal)
396 test %ecx, %ecx
397 jz L(equal)
398
399 mov 36(%rsi), %ecx
400 cmp %ecx, 36(%rdi)
401 jne L(nequal)
402 test %ecx, %ecx
403 jz L(equal)
404
405 mov 40(%rsi), %ecx
406 cmp %ecx, 40(%rdi)
407 jne L(nequal)
408 test %ecx, %ecx
409 jz L(equal)
410
411 mov 44(%rsi), %ecx
412 cmp %ecx, 44(%rdi)
413 jne L(nequal)
414 test %ecx, %ecx
415 jz L(equal)
416
417 movdqu 48(%rdi), %xmm1
418 movdqu 48(%rsi), %xmm2
419 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
420 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
421 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
422 pmovmskb %xmm1, %edx
423 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
424 jnz L(less4_double_words_48)
425
426 add $64, %rsi
427 add $64, %rdi
428 jmp L(continue_16_48)
429
430 .p2align 4
431L(continue_00_00):
432 movdqa (%rdi), %xmm1
433 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
434 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
435 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
436 pmovmskb %xmm1, %edx
437 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
438 jnz L(less4_double_words)
439
440 movdqa 16(%rdi), %xmm3
441 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
442 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */
443 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
444 pmovmskb %xmm3, %edx
445 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
446 jnz L(less4_double_words_16)
447
448 movdqa 32(%rdi), %xmm5
449 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
450 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */
451 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
452 pmovmskb %xmm5, %edx
453 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
454 jnz L(less4_double_words_32)
455
456 movdqa 48(%rdi), %xmm1
457 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
458 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
459 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
460 pmovmskb %xmm1, %edx
461 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
462 jnz L(less4_double_words_48)
463
464 add $64, %rsi
465 add $64, %rdi
466 jmp L(continue_00_00)
467
468 .p2align 4
469L(continue_00_32):
470 movdqu (%rsi), %xmm2
471 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
472 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
473 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
474 pmovmskb %xmm2, %edx
475 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
476 jnz L(less4_double_words)
477
478 add $16, %rsi
479 add $16, %rdi
480 jmp L(continue_00_48)
481
482 .p2align 4
483L(continue_00_16):
484 movdqu (%rsi), %xmm2
485 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
486 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
487 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
488 pmovmskb %xmm2, %edx
489 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
490 jnz L(less4_double_words)
491
492 movdqu 16(%rsi), %xmm2
493 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
494 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
495 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
496 pmovmskb %xmm2, %edx
497 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
498 jnz L(less4_double_words_16)
499
500 add $32, %rsi
501 add $32, %rdi
502 jmp L(continue_00_48)
503
504 .p2align 4
505L(continue_00_0):
506 movdqu (%rsi), %xmm2
507 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
508 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
509 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
510 pmovmskb %xmm2, %edx
511 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
512 jnz L(less4_double_words)
513
514 movdqu 16(%rsi), %xmm2
515 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
516 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
517 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
518 pmovmskb %xmm2, %edx
519 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
520 jnz L(less4_double_words_16)
521
522 movdqu 32(%rsi), %xmm2
523 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
524 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
525 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
526 pmovmskb %xmm2, %edx
527 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
528 jnz L(less4_double_words_32)
529
530 add $48, %rsi
531 add $48, %rdi
532 jmp L(continue_00_48)
533
534 .p2align 4
535L(continue_48_00):
536 pcmpeqd (%rsi), %xmm0
537 mov (%rdi), %eax
538 pmovmskb %xmm0, %ecx
539 test %ecx, %ecx
540 jnz L(less4_double_words1)
541
542 cmp (%rsi), %eax
543 jne L(nequal)
544
545 mov 4(%rdi), %eax
546 cmp 4(%rsi), %eax
547 jne L(nequal)
548
549 mov 8(%rdi), %eax
550 cmp 8(%rsi), %eax
551 jne L(nequal)
552
553 mov 12(%rdi), %eax
554 cmp 12(%rsi), %eax
555 jne L(nequal)
556
557 movdqu 16(%rdi), %xmm1
558 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
559 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
560 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
561 pmovmskb %xmm1, %edx
562 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
563 jnz L(less4_double_words_16)
564
565 movdqu 32(%rdi), %xmm1
566 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
567 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
568 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
569 pmovmskb %xmm1, %edx
570 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
571 jnz L(less4_double_words_32)
572
573 movdqu 48(%rdi), %xmm1
574 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
575 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
576 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
577 pmovmskb %xmm1, %edx
578 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
579 jnz L(less4_double_words_48)
580
581 add $64, %rsi
582 add $64, %rdi
583 jmp L(continue_48_00)
584
585 .p2align 4
586L(continue_32_00):
587 movdqu (%rdi), %xmm1
588 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
589 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
590 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
591 pmovmskb %xmm1, %edx
592 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
593 jnz L(less4_double_words)
594
595 add $16, %rsi
596 add $16, %rdi
597 jmp L(continue_48_00)
598
599 .p2align 4
600L(continue_16_00):
601 movdqu (%rdi), %xmm1
602 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
603 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
604 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
605 pmovmskb %xmm1, %edx
606 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
607 jnz L(less4_double_words)
608
609 movdqu 16(%rdi), %xmm1
610 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
611 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
612 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
613 pmovmskb %xmm1, %edx
614 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
615 jnz L(less4_double_words_16)
616
617 add $32, %rsi
618 add $32, %rdi
619 jmp L(continue_48_00)
620
621 .p2align 4
622L(continue_0_00):
623 movdqu (%rdi), %xmm1
624 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
625 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
626 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
627 pmovmskb %xmm1, %edx
628 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
629 jnz L(less4_double_words)
630
631 movdqu 16(%rdi), %xmm1
632 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
633 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
634 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
635 pmovmskb %xmm1, %edx
636 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
637 jnz L(less4_double_words_16)
638
639 movdqu 32(%rdi), %xmm1
640 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
641 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
642 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
643 pmovmskb %xmm1, %edx
644 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
645 jnz L(less4_double_words_32)
646
647 add $48, %rsi
648 add $48, %rdi
649 jmp L(continue_48_00)
650
651 .p2align 4
652L(continue_32_32):
653 movdqu (%rdi), %xmm1
654 movdqu (%rsi), %xmm2
655 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
656 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
657 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
658 pmovmskb %xmm1, %edx
659 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
660 jnz L(less4_double_words)
661
662 add $16, %rsi
663 add $16, %rdi
664 jmp L(continue_48_48)
665
666 .p2align 4
667L(continue_16_16):
668 movdqu (%rdi), %xmm1
669 movdqu (%rsi), %xmm2
670 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
671 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
672 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
673 pmovmskb %xmm1, %edx
674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
675 jnz L(less4_double_words)
676
677 movdqu 16(%rdi), %xmm3
678 movdqu 16(%rsi), %xmm4
679 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
680 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
681 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
682 pmovmskb %xmm3, %edx
683 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
684 jnz L(less4_double_words_16)
685
686 add $32, %rsi
687 add $32, %rdi
688 jmp L(continue_48_48)
689
690 .p2align 4
691L(continue_0_0):
692 movdqu (%rdi), %xmm1
693 movdqu (%rsi), %xmm2
694 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
695 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
696 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
697 pmovmskb %xmm1, %edx
698 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
699 jnz L(less4_double_words)
700
701 movdqu 16(%rdi), %xmm3
702 movdqu 16(%rsi), %xmm4
703 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
704 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
705 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
706 pmovmskb %xmm3, %edx
707 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
708 jnz L(less4_double_words_16)
709
710 movdqu 32(%rdi), %xmm1
711 movdqu 32(%rsi), %xmm2
712 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
713 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
714 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
715 pmovmskb %xmm1, %edx
716 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
717 jnz L(less4_double_words_32)
718
719 add $48, %rsi
720 add $48, %rdi
721 jmp L(continue_48_48)
722
723 .p2align 4
724L(continue_0_16):
725 movdqu (%rdi), %xmm1
726 movdqu (%rsi), %xmm2
727 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
728 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
729 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
730 pmovmskb %xmm1, %edx
731 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
732 jnz L(less4_double_words)
733
734 movdqu 16(%rdi), %xmm1
735 movdqu 16(%rsi), %xmm2
736 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
737 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
738 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
739 pmovmskb %xmm1, %edx
740 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
741 jnz L(less4_double_words_16)
742
743 add $32, %rsi
744 add $32, %rdi
745 jmp L(continue_32_48)
746
747 .p2align 4
748L(continue_0_32):
749 movdqu (%rdi), %xmm1
750 movdqu (%rsi), %xmm2
751 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
752 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
753 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
754 pmovmskb %xmm1, %edx
755 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
756 jnz L(less4_double_words)
757
758 add $16, %rsi
759 add $16, %rdi
760 jmp L(continue_16_48)
761
762 .p2align 4
763L(continue_16_32):
764 movdqu (%rdi), %xmm1
765 movdqu (%rsi), %xmm2
766 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
767 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
768 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
769 pmovmskb %xmm1, %edx
770 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
771 jnz L(less4_double_words)
772
773 add $16, %rsi
774 add $16, %rdi
775 jmp L(continue_32_48)
776
777 .p2align 4
778L(less4_double_words1):
779 cmp (%rsi), %eax
780 jne L(nequal)
781 test %eax, %eax
782 jz L(equal)
783
784 mov 4(%rsi), %ecx
785 cmp %ecx, 4(%rdi)
786 jne L(nequal)
787 test %ecx, %ecx
788 jz L(equal)
789
790 mov 8(%rsi), %ecx
791 cmp %ecx, 8(%rdi)
792 jne L(nequal)
793 test %ecx, %ecx
794 jz L(equal)
795
796 mov 12(%rsi), %ecx
797 cmp %ecx, 12(%rdi)
798 jne L(nequal)
799 xor %eax, %eax
800 ret
801
802 .p2align 4
803L(less4_double_words):
804 xor %eax, %eax
805 test %dl, %dl
806 jz L(next_two_double_words)
807 and $15, %dl
808 jz L(second_double_word)
809 mov (%rdi), %eax
810 cmp (%rsi), %eax
811 jne L(nequal)
812 ret
813
814 .p2align 4
815L(second_double_word):
816 mov 4(%rdi), %eax
817 cmp 4(%rsi), %eax
818 jne L(nequal)
819 ret
820
821 .p2align 4
822L(next_two_double_words):
823 and $15, %dh
824 jz L(fourth_double_word)
825 mov 8(%rdi), %eax
826 cmp 8(%rsi), %eax
827 jne L(nequal)
828 ret
829
830 .p2align 4
831L(fourth_double_word):
832 mov 12(%rdi), %eax
833 cmp 12(%rsi), %eax
834 jne L(nequal)
835 ret
836
837 .p2align 4
838L(less4_double_words_16):
839 xor %eax, %eax
840 test %dl, %dl
841 jz L(next_two_double_words_16)
842 and $15, %dl
843 jz L(second_double_word_16)
844 mov 16(%rdi), %eax
845 cmp 16(%rsi), %eax
846 jne L(nequal)
847 ret
848
849 .p2align 4
850L(second_double_word_16):
851 mov 20(%rdi), %eax
852 cmp 20(%rsi), %eax
853 jne L(nequal)
854 ret
855
856 .p2align 4
857L(next_two_double_words_16):
858 and $15, %dh
859 jz L(fourth_double_word_16)
860 mov 24(%rdi), %eax
861 cmp 24(%rsi), %eax
862 jne L(nequal)
863 ret
864
865 .p2align 4
866L(fourth_double_word_16):
867 mov 28(%rdi), %eax
868 cmp 28(%rsi), %eax
869 jne L(nequal)
870 ret
871
872 .p2align 4
873L(less4_double_words_32):
874 xor %eax, %eax
875 test %dl, %dl
876 jz L(next_two_double_words_32)
877 and $15, %dl
878 jz L(second_double_word_32)
879 mov 32(%rdi), %eax
880 cmp 32(%rsi), %eax
881 jne L(nequal)
882 ret
883
884 .p2align 4
885L(second_double_word_32):
886 mov 36(%rdi), %eax
887 cmp 36(%rsi), %eax
888 jne L(nequal)
889 ret
890
891 .p2align 4
892L(next_two_double_words_32):
893 and $15, %dh
894 jz L(fourth_double_word_32)
895 mov 40(%rdi), %eax
896 cmp 40(%rsi), %eax
897 jne L(nequal)
898 ret
899
900 .p2align 4
901L(fourth_double_word_32):
902 mov 44(%rdi), %eax
903 cmp 44(%rsi), %eax
904 jne L(nequal)
905 ret
906
907 .p2align 4
908L(less4_double_words_48):
909 xor %eax, %eax
910 test %dl, %dl
911 jz L(next_two_double_words_48)
912 and $15, %dl
913 jz L(second_double_word_48)
914 mov 48(%rdi), %eax
915 cmp 48(%rsi), %eax
916 jne L(nequal)
917 ret
918
919 .p2align 4
920L(second_double_word_48):
921 mov 52(%rdi), %eax
922 cmp 52(%rsi), %eax
923 jne L(nequal)
924 ret
925
926 .p2align 4
927L(next_two_double_words_48):
928 and $15, %dh
929 jz L(fourth_double_word_48)
930 mov 56(%rdi), %eax
931 cmp 56(%rsi), %eax
932 jne L(nequal)
933 ret
934
935 .p2align 4
936L(fourth_double_word_48):
937 mov 60(%rdi), %eax
938 cmp 60(%rsi), %eax
939 jne L(nequal)
940 ret
941
942 .p2align 4
943L(nequal):
944 mov $1, %eax
945 jg L(nequal_bigger)
946 neg %eax
947
948L(nequal_bigger):
949 ret
950
951 .p2align 4
952L(equal):
953 xor %rax, %rax
954 ret
955
956END (STRCMP)
957#endif
958

source code of glibc/sysdeps/x86_64/multiarch/wcscmp-sse2.S