1/* strchr with SSE2 without bsf
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <isa-level.h>
20
21/* NB: atom builds with ISA level == 1 so no reason to hold onto this
22 at ISA level >= 2. */
23#if ISA_SHOULD_BUILD (1)
24
25# include <sysdep.h>
26# include "asm-syntax.h"
27
28 atom_text_section
29ENTRY (__strchr_sse2_no_bsf)
30 movd %esi, %xmm1
31 movq %rdi, %rcx
32 punpcklbw %xmm1, %xmm1
33 andq $~15, %rdi
34 pxor %xmm2, %xmm2
35 punpcklbw %xmm1, %xmm1
36 orl $0xffffffff, %esi
37 movdqa (%rdi), %xmm0
38 pshufd $0, %xmm1, %xmm1
39 subq %rdi, %rcx
40 movdqa %xmm0, %xmm3
41 leaq 16(%rdi), %rdi
42 pcmpeqb %xmm1, %xmm0
43 pcmpeqb %xmm2, %xmm3
44 shl %cl, %esi
45 pmovmskb %xmm0, %eax
46 pmovmskb %xmm3, %edx
47 andl %esi, %eax
48 andl %esi, %edx
49 test %eax, %eax
50 jnz L(matches)
51 test %edx, %edx
52 jnz L(return_null)
53
54L(loop):
55 movdqa (%rdi), %xmm0
56 leaq 16(%rdi), %rdi
57 movdqa %xmm0, %xmm3
58 pcmpeqb %xmm1, %xmm0
59 pcmpeqb %xmm2, %xmm3
60 pmovmskb %xmm0, %eax
61 pmovmskb %xmm3, %edx
62 or %eax, %edx
63 jz L(loop)
64
65 pmovmskb %xmm3, %edx
66 test %eax, %eax
67 jnz L(matches)
68
69/* Return NULL. */
70 .p2align 4
71L(return_null):
72 xor %rax, %rax
73 ret
74
75L(matches):
76 /* There is a match. First find where NULL is. */
77 leaq -16(%rdi), %rdi
78 test %edx, %edx
79 jz L(match_case1)
80
81 .p2align 4
82L(match_case2):
83 test %al, %al
84 jz L(match_high_case2)
85
86 mov %al, %cl
87 and $15, %cl
88 jnz L(match_case2_4)
89
90 mov %dl, %ch
91 and $15, %ch
92 jnz L(return_null)
93
94 test $0x10, %al
95 jnz L(Exit5)
96 test $0x10, %dl
97 jnz L(return_null)
98 test $0x20, %al
99 jnz L(Exit6)
100 test $0x20, %dl
101 jnz L(return_null)
102 test $0x40, %al
103 jnz L(Exit7)
104 test $0x40, %dl
105 jnz L(return_null)
106 lea 7(%rdi), %rax
107 ret
108
109 .p2align 4
110L(match_case2_4):
111 test $0x01, %al
112 jnz L(Exit1)
113 test $0x01, %dl
114 jnz L(return_null)
115 test $0x02, %al
116 jnz L(Exit2)
117 test $0x02, %dl
118 jnz L(return_null)
119 test $0x04, %al
120 jnz L(Exit3)
121 test $0x04, %dl
122 jnz L(return_null)
123 lea 3(%rdi), %rax
124 ret
125
126 .p2align 4
127L(match_high_case2):
128 test %dl, %dl
129 jnz L(return_null)
130
131 mov %ah, %cl
132 and $15, %cl
133 jnz L(match_case2_12)
134
135 mov %dh, %ch
136 and $15, %ch
137 jnz L(return_null)
138
139 test $0x10, %ah
140 jnz L(Exit13)
141 test $0x10, %dh
142 jnz L(return_null)
143 test $0x20, %ah
144 jnz L(Exit14)
145 test $0x20, %dh
146 jnz L(return_null)
147 test $0x40, %ah
148 jnz L(Exit15)
149 test $0x40, %dh
150 jnz L(return_null)
151 lea 15(%rdi), %rax
152 ret
153
154 .p2align 4
155L(match_case2_12):
156 test $0x01, %ah
157 jnz L(Exit9)
158 test $0x01, %dh
159 jnz L(return_null)
160 test $0x02, %ah
161 jnz L(Exit10)
162 test $0x02, %dh
163 jnz L(return_null)
164 test $0x04, %ah
165 jnz L(Exit11)
166 test $0x04, %dh
167 jnz L(return_null)
168 lea 11(%rdi), %rax
169 ret
170
171 .p2align 4
172L(match_case1):
173 test %al, %al
174 jz L(match_high_case1)
175
176 test $0x01, %al
177 jnz L(Exit1)
178 test $0x02, %al
179 jnz L(Exit2)
180 test $0x04, %al
181 jnz L(Exit3)
182 test $0x08, %al
183 jnz L(Exit4)
184 test $0x10, %al
185 jnz L(Exit5)
186 test $0x20, %al
187 jnz L(Exit6)
188 test $0x40, %al
189 jnz L(Exit7)
190 lea 7(%rdi), %rax
191 ret
192
193 .p2align 4
194L(match_high_case1):
195 test $0x01, %ah
196 jnz L(Exit9)
197 test $0x02, %ah
198 jnz L(Exit10)
199 test $0x04, %ah
200 jnz L(Exit11)
201 test $0x08, %ah
202 jnz L(Exit12)
203 test $0x10, %ah
204 jnz L(Exit13)
205 test $0x20, %ah
206 jnz L(Exit14)
207 test $0x40, %ah
208 jnz L(Exit15)
209 lea 15(%rdi), %rax
210 ret
211
212 .p2align 4
213L(Exit1):
214 lea (%rdi), %rax
215 ret
216
217 .p2align 4
218L(Exit2):
219 lea 1(%rdi), %rax
220 ret
221
222 .p2align 4
223L(Exit3):
224 lea 2(%rdi), %rax
225 ret
226
227 .p2align 4
228L(Exit4):
229 lea 3(%rdi), %rax
230 ret
231
232 .p2align 4
233L(Exit5):
234 lea 4(%rdi), %rax
235 ret
236
237 .p2align 4
238L(Exit6):
239 lea 5(%rdi), %rax
240 ret
241
242 .p2align 4
243L(Exit7):
244 lea 6(%rdi), %rax
245 ret
246
247 .p2align 4
248L(Exit9):
249 lea 8(%rdi), %rax
250 ret
251
252 .p2align 4
253L(Exit10):
254 lea 9(%rdi), %rax
255 ret
256
257 .p2align 4
258L(Exit11):
259 lea 10(%rdi), %rax
260 ret
261
262 .p2align 4
263L(Exit12):
264 lea 11(%rdi), %rax
265 ret
266
267 .p2align 4
268L(Exit13):
269 lea 12(%rdi), %rax
270 ret
271
272 .p2align 4
273L(Exit14):
274 lea 13(%rdi), %rax
275 ret
276
277 .p2align 4
278L(Exit15):
279 lea 14(%rdi), %rax
280 ret
281
282END (__strchr_sse2_no_bsf)
283#endif
284

source code of glibc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S