1/* wcschr optimized with SSE2.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20#include <isa-level.h>
21
22/* ISA level >= 2 because there is no wcschr-sse4 implementations. */
23#if ISA_SHOULD_BUILD (2)
24
25# ifndef WCSCHR
26# define WCSCHR __wcschr_sse2
27# endif
28
29# include <sysdep.h>
30
31 .text
32ENTRY (WCSCHR)
33
34 movd %rsi, %xmm1
35 pxor %xmm2, %xmm2
36 mov %rdi, %rcx
37 punpckldq %xmm1, %xmm1
38 punpckldq %xmm1, %xmm1
39
40 and $63, %rcx
41 cmp $48, %rcx
42 ja L(cross_cache)
43
44 movdqu (%rdi), %xmm0
45 pcmpeqd %xmm0, %xmm2
46 add $16, %rdi
47 pcmpeqd %xmm1, %xmm0
48 pmovmskb %xmm2, %rdx
49 pmovmskb %xmm0, %rax
50 or %rax, %rdx
51 jnz L(matches)
52
53 and $-16, %rdi
54
55 movdqa (%rdi), %xmm0
56 pcmpeqd %xmm0, %xmm2
57 add $16, %rdi
58 pcmpeqd %xmm1, %xmm0
59 pmovmskb %xmm2, %rdx
60 pmovmskb %xmm0, %rax
61 or %rax, %rdx
62 jnz L(matches)
63
64 jmp L(loop)
65
66L(cross_cache):
67 and $15, %rcx
68 and $-16, %rdi
69 movdqa (%rdi), %xmm0
70 pcmpeqd %xmm0, %xmm2
71 pcmpeqd %xmm1, %xmm0
72 pmovmskb %xmm2, %rdx
73 pmovmskb %xmm0, %rax
74
75 sar %cl, %rdx
76 sar %cl, %rax
77 test %rax, %rax
78 je L(unaligned_no_match)
79
80 bsf %rax, %rax
81 test %rdx, %rdx
82 je L(unaligned_match)
83 bsf %rdx, %rdx
84 cmp %rdx, %rax
85 ja L(return_null)
86
87L(unaligned_match):
88 add %rdi, %rax
89 add %rcx, %rax
90 ret
91
92 .p2align 4
93L(unaligned_no_match):
94 test %rdx, %rdx
95 jne L(return_null)
96 pxor %xmm2, %xmm2
97
98 add $16, %rdi
99
100 .p2align 4
101/* Loop start on aligned string. */
102L(loop):
103 movdqa (%rdi), %xmm0
104 pcmpeqd %xmm0, %xmm2
105 add $16, %rdi
106 pcmpeqd %xmm1, %xmm0
107 pmovmskb %xmm2, %rdx
108 pmovmskb %xmm0, %rax
109 or %rax, %rdx
110 jnz L(matches)
111
112 movdqa (%rdi), %xmm0
113 pcmpeqd %xmm0, %xmm2
114 add $16, %rdi
115 pcmpeqd %xmm1, %xmm0
116 pmovmskb %xmm2, %rdx
117 pmovmskb %xmm0, %rax
118 or %rax, %rdx
119 jnz L(matches)
120
121 movdqa (%rdi), %xmm0
122 pcmpeqd %xmm0, %xmm2
123 add $16, %rdi
124 pcmpeqd %xmm1, %xmm0
125 pmovmskb %xmm2, %rdx
126 pmovmskb %xmm0, %rax
127 or %rax, %rdx
128 jnz L(matches)
129
130 movdqa (%rdi), %xmm0
131 pcmpeqd %xmm0, %xmm2
132 add $16, %rdi
133 pcmpeqd %xmm1, %xmm0
134 pmovmskb %xmm2, %rdx
135 pmovmskb %xmm0, %rax
136 or %rax, %rdx
137 jnz L(matches)
138 jmp L(loop)
139
140 .p2align 4
141L(matches):
142 pmovmskb %xmm2, %rdx
143 test %rax, %rax
144 jz L(return_null)
145 bsf %rax, %rax
146 test %rdx, %rdx
147 je L(match)
148 bsf %rdx, %rcx
149 cmp %rcx, %rax
150 ja L(return_null)
151L(match):
152 sub $16, %rdi
153 add %rdi, %rax
154 ret
155
156 .p2align 4
157L(return_null):
158 xor %rax, %rax
159 ret
160
161END (WCSCHR)
162#endif
163

source code of glibc/sysdeps/x86_64/multiarch/wcschr-sse2.S