1 | /* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less |
2 | than N. |
3 | For SPARC v9. |
4 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
5 | This file is part of the GNU C Library. |
6 | |
7 | The GNU C Library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2.1 of the License, or (at your option) any later version. |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Lesser General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, see |
19 | <https://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <sysdep.h> |
22 | #include <asm/asi.h> |
23 | #ifndef XCC |
24 | #define XCC xcc |
25 | #define USE_BPR |
26 | .register %g2, #scratch |
27 | .register %g3, #scratch |
28 | #endif |
29 | |
30 | /* Normally, this uses |
31 | ((xword - 0x0101010101010101) & 0x8080808080808080) test |
32 | to find out if any byte in xword could be zero. This is fast, but |
33 | also gives false alarm for any byte in range 0x81-0xff. It does |
34 | not matter for correctness, as if this test tells us there could |
35 | be some zero byte, we check it byte by byte, but if bytes with |
36 | high bits set are common in the strings, then this will give poor |
37 | performance. You can #define EIGHTBIT_NOT_RARE and the algorithm |
38 | will use one tick slower, but more precise test |
39 | ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), |
40 | which does not give any false alarms (but if some bits are set, |
41 | one cannot assume from it which bytes are zero and which are not). |
42 | It is yet to be measured, what is the correct default for glibc |
43 | in these days for an average user. |
44 | */ |
45 | |
46 | .text |
47 | .align 32 |
48 | ENTRY(__memchr) |
49 | and %o1, 0xff, %o1 /* IEU0 Group */ |
50 | #ifdef USE_BPR |
51 | brz,pn %o2, 12f /* CTI+IEU1 */ |
52 | #else |
53 | tst %o2 /* IEU1 */ |
54 | be,pn %XCC, 12f /* CTI */ |
55 | #endif |
56 | sll %o1, 8, %g3 /* IEU0 Group */ |
57 | addcc %o0, %o2, %o2 /* IEU1 */ |
58 | movcs %XCC, -1, %o2 /* IEU0 Group */ |
59 | |
60 | sethi %hi(0x01010101), %g1 /* IEU0 Group */ |
61 | or %g3, %o1, %g3 /* IEU1 */ |
62 | ldub [%o0], %o3 /* Load */ |
63 | sllx %g3, 16, %g5 /* IEU0 Group */ |
64 | |
65 | or %g1, %lo(0x01010101), %g1 /* IEU1 */ |
66 | sllx %g1, 32, %g2 /* IEU0 Group */ |
67 | or %g3, %g5, %g3 /* IEU1 */ |
68 | sllx %g3, 32, %g5 /* IEU0 Group */ |
69 | |
70 | cmp %o3, %o1 /* IEU1 */ |
71 | be,pn %xcc, 13f /* CTI */ |
72 | or %g1, %g2, %g1 /* IEU0 Group */ |
73 | andcc %o0, 7, %g0 /* IEU1 */ |
74 | |
75 | bne,a,pn %icc, 21f /* CTI */ |
76 | add %o0, 1, %o0 /* IEU0 Group */ |
77 | ldx [%o0], %o3 /* Load Group */ |
78 | sllx %g1, 7, %g2 /* IEU0 */ |
79 | |
80 | or %g3, %g5, %g3 /* IEU1 */ |
81 | 1: add %o0, 8, %o0 /* IEU0 Group */ |
82 | xor %o3, %g3, %o4 /* IEU1 */ |
83 | /* %g1 = 0101010101010101 * |
84 | * %g2 = 8080088080808080 * |
85 | * %g3 = c c c c c c c c * |
86 | * %o3 = value * |
87 | * %o4 = value XOR c */ |
88 | 2: cmp %o0, %o2 /* IEU1 Group */ |
89 | |
90 | bgu,pn %XCC, 11f /* CTI */ |
91 | ldxa [%o0] ASI_PNF, %o3 /* Load */ |
92 | sub %o4, %g1, %o5 /* IEU0 Group */ |
93 | add %o0, 8, %o0 /* IEU1 */ |
94 | #ifdef EIGHTBIT_NOT_RARE |
95 | andn %o5, %o4, %o5 /* IEU0 Group */ |
96 | #endif |
97 | |
98 | andcc %o5, %g2, %g0 /* IEU1 Group */ |
99 | be,a,pt %xcc, 2b /* CTI */ |
100 | xor %o3, %g3, %o4 /* IEU0 */ |
101 | srlx %o4, 56, %g5 /* IEU0 */ |
102 | |
103 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
104 | be,pn %icc, 3f /* CTI */ |
105 | srlx %o4, 48, %g5 /* IEU0 */ |
106 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
107 | |
108 | be,pn %icc, 4f /* CTI */ |
109 | srlx %o4, 40, %g5 /* IEU0 */ |
110 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
111 | be,pn %icc, 5f /* CTI */ |
112 | |
113 | srlx %o4, 32, %g5 /* IEU0 */ |
114 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
115 | be,pn %icc, 6f /* CTI */ |
116 | srlx %o4, 24, %g5 /* IEU0 */ |
117 | |
118 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
119 | be,pn %icc, 7f /* CTI */ |
120 | srlx %o4, 16, %g5 /* IEU0 */ |
121 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
122 | |
123 | be,pn %icc, 8f /* CTI */ |
124 | srlx %o4, 8, %g5 /* IEU0 */ |
125 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
126 | be,pn %icc, 9f /* CTI */ |
127 | |
128 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
129 | bne,pt %icc, 2b /* CTI */ |
130 | xor %o3, %g3, %o4 /* IEU0 */ |
131 | retl /* CTI+IEU1 Group */ |
132 | |
133 | add %o0, -9, %o0 /* IEU0 */ |
134 | |
135 | .align 16 |
136 | 3: retl /* CTI+IEU1 Group */ |
137 | add %o0, -16, %o0 /* IEU0 */ |
138 | 4: retl /* CTI+IEU1 Group */ |
139 | add %o0, -15, %o0 /* IEU0 */ |
140 | |
141 | 5: retl /* CTI+IEU1 Group */ |
142 | add %o0, -14, %o0 /* IEU0 */ |
143 | 6: retl /* CTI+IEU1 Group */ |
144 | add %o0, -13, %o0 /* IEU0 */ |
145 | |
146 | 7: retl /* CTI+IEU1 Group */ |
147 | add %o0, -12, %o0 /* IEU0 */ |
148 | 8: retl /* CTI+IEU1 Group */ |
149 | add %o0, -11, %o0 /* IEU0 */ |
150 | |
151 | 9: retl /* CTI+IEU1 Group */ |
152 | add %o0, -10, %o0 /* IEU0 */ |
153 | 11: sub %o4, %g1, %o5 /* IEU0 Group */ |
154 | sub %o0, 8, %o0 /* IEU1 */ |
155 | |
156 | andcc %o5, %g2, %g0 /* IEU1 Group */ |
157 | be,pt %xcc, 12f /* CTI */ |
158 | sub %o2, %o0, %o2 /* IEU0 */ |
159 | tst %o2 /* IEU1 Group */ |
160 | |
161 | be,pn %XCC, 12f /* CTI */ |
162 | srlx %o4, 56, %g5 /* IEU0 */ |
163 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
164 | be,pn %icc, 13f /* CTI */ |
165 | |
166 | cmp %o2, 1 /* IEU0 */ |
167 | be,pn %XCC, 12f /* CTI Group */ |
168 | srlx %o4, 48, %g5 /* IEU0 */ |
169 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
170 | |
171 | be,pn %icc, 14f /* CTI */ |
172 | cmp %o2, 2 /* IEU1 Group */ |
173 | be,pn %XCC, 12f /* CTI */ |
174 | srlx %o4, 40, %g5 /* IEU0 */ |
175 | |
176 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
177 | be,pn %icc, 15f /* CTI */ |
178 | cmp %o2, 3 /* IEU1 Group */ |
179 | be,pn %XCC, 12f /* CTI */ |
180 | |
181 | srlx %o4, 32, %g5 /* IEU0 */ |
182 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
183 | be,pn %icc, 16f /* CTI */ |
184 | cmp %o2, 4 /* IEU1 Group */ |
185 | |
186 | be,pn %XCC, 12f /* CTI */ |
187 | srlx %o4, 24, %g5 /* IEU0 */ |
188 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
189 | be,pn %icc, 17f /* CTI */ |
190 | |
191 | cmp %o2, 5 /* IEU1 Group */ |
192 | be,pn %XCC, 12f /* CTI */ |
193 | srlx %o4, 16, %g5 /* IEU0 */ |
194 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
195 | |
196 | be,pn %icc, 18f /* CTI */ |
197 | cmp %o2, 6 /* IEU1 Group */ |
198 | be,pn %XCC, 12f /* CTI */ |
199 | srlx %o4, 8, %g5 /* IEU0 */ |
200 | |
201 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
202 | be,pn %icc, 19f /* CTI */ |
203 | nop /* IEU0 */ |
204 | 12: retl /* CTI+IEU1 Group */ |
205 | |
206 | clr %o0 /* IEU0 */ |
207 | nop /* Stub */ |
208 | 13: retl /* CTI+IEU1 Group */ |
209 | nop /* IEU0 */ |
210 | |
211 | 14: retl /* CTI+IEU1 Group */ |
212 | add %o0, 1, %o0 /* IEU0 */ |
213 | 15: retl /* CTI+IEU1 Group */ |
214 | add %o0, 2, %o0 /* IEU0 */ |
215 | |
216 | 16: retl /* CTI+IEU1 Group */ |
217 | add %o0, 3, %o0 /* IEU0 */ |
218 | 17: retl /* CTI+IEU1 Group */ |
219 | add %o0, 4, %o0 /* IEU0 */ |
220 | |
221 | 18: retl /* CTI+IEU1 Group */ |
222 | add %o0, 5, %o0 /* IEU0 */ |
223 | 19: retl /* CTI+IEU1 Group */ |
224 | add %o0, 6, %o0 /* IEU0 */ |
225 | |
226 | 21: cmp %o0, %o2 /* IEU1 */ |
227 | be,pn %XCC, 12b /* CTI */ |
228 | sllx %g1, 7, %g2 /* IEU0 Group */ |
229 | ldub [%o0], %o3 /* Load */ |
230 | |
231 | or %g3, %g5, %g3 /* IEU1 */ |
232 | 22: andcc %o0, 7, %g0 /* IEU1 Group */ |
233 | be,a,pn %icc, 1b /* CTI */ |
234 | ldx [%o0], %o3 /* Load */ |
235 | |
236 | cmp %o3, %o1 /* IEU1 Group */ |
237 | be,pn %xcc, 23f /* CTI */ |
238 | add %o0, 1, %o0 /* IEU0 */ |
239 | cmp %o0, %o2 /* IEU1 Group */ |
240 | |
241 | bne,a,pt %XCC, 22b /* CTI */ |
242 | ldub [%o0], %o3 /* Load */ |
243 | retl /* CTI+IEU1 Group */ |
244 | clr %o0 /* IEU0 */ |
245 | |
246 | 23: retl /* CTI+IEU1 Group */ |
247 | add %o0, -1, %o0 /* IEU0 */ |
248 | END(__memchr) |
249 | |
250 | weak_alias (__memchr, memchr) |
251 | libc_hidden_builtin_def (memchr) |
252 | |