1 | /* Compare no more than N characters of S1 and S2, returning less than, |
2 | equal to or greater than zero if S1 is lexicographically less than, |
3 | equal to or greater than S2. |
4 | For SPARC v9. |
5 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
6 | This file is part of the GNU C Library. |
7 | |
8 | The GNU C Library is free software; you can redistribute it and/or |
9 | modify it under the terms of the GNU Lesser General Public |
10 | License as published by the Free Software Foundation; either |
11 | version 2.1 of the License, or (at your option) any later version. |
12 | |
13 | The GNU C Library is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | Lesser General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU Lesser General Public |
19 | License along with the GNU C Library; if not, see |
20 | <https://www.gnu.org/licenses/>. */ |
21 | |
22 | #include <sysdep.h> |
23 | #include <asm/asi.h> |
24 | #ifndef XCC |
25 | #define XCC xcc |
26 | #define USE_BPR |
27 | .register %g2, #scratch |
28 | .register %g3, #scratch |
29 | .register %g6, #scratch |
30 | #endif |
31 | |
32 | /* Normally, this uses |
33 | ((xword - 0x0101010101010101) & 0x8080808080808080) test |
34 | to find out if any byte in xword could be zero. This is fast, but |
35 | also gives false alarm for any byte in range 0x81-0xff. It does |
36 | not matter for correctness, as if this test tells us there could |
37 | be some zero byte, we check it byte by byte, but if bytes with |
38 | high bits set are common in the strings, then this will give poor |
39 | performance. You can #define EIGHTBIT_NOT_RARE and the algorithm |
40 | will use one tick slower, but more precise test |
41 | ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), |
42 | which does not give any false alarms (but if some bits are set, |
43 | one cannot assume from it which bytes are zero and which are not). |
44 | It is yet to be measured, what is the correct default for glibc |
45 | in these days for an average user. |
46 | */ |
47 | |
48 | .text |
49 | .align 32 |
50 | ENTRY(strncmp) |
51 | #ifdef USE_BPR |
52 | brz,pn %o2, 4f /* CTI+IEU1 Group */ |
53 | #else |
54 | tst %o2 /* IEU1 Group */ |
55 | be,pn %XCC, 4f /* CTI */ |
56 | #endif |
57 | sethi %hi(0x1010101), %g1 /* IEU0 */ |
58 | andcc %o0, 7, %g0 /* IEU1 Group */ |
59 | bne,pn %icc, 9f /* CTI */ |
60 | |
61 | or %g1, %lo(0x1010101), %g1 /* IEU0 */ |
62 | andcc %o1, 7, %g3 /* IEU1 Group */ |
63 | bne,pn %icc, 11f /* CTI */ |
64 | sllx %g1, 32, %g2 /* IEU0 */ |
65 | |
66 | ldx [%o0], %g4 /* Load Group */ |
67 | or %g1, %g2, %g1 /* IEU0 */ |
68 | 1: ldx [%o1], %o3 /* Load Group */ |
69 | sllx %g1, 7, %g2 /* IEU0 */ |
70 | |
71 | add %o0, 8, %o0 /* IEU1 */ |
72 | 2: subcc %o2, 8, %o2 /* IEU1 Group */ |
73 | bcs,pn %XCC, 5f /* CTI */ |
74 | add %o1, 8, %o1 /* IEU0 */ |
75 | |
76 | sub %g4, %g1, %g3 /* IEU0 Group */ |
77 | subcc %g4, %o3, %o4 /* IEU1 */ |
78 | #ifdef EIGHTBIT_NOT_RARE |
79 | andn %g3, %g4, %g6 /* IEU0 Group */ |
80 | #endif |
81 | bne,pn %xcc, 6f /* CTI */ |
82 | ldxa [%o0] ASI_PNF, %g4 /* Load Group */ |
83 | |
84 | add %o0, 8, %o0 /* IEU0 */ |
85 | #ifdef EIGHTBIT_NOT_RARE |
86 | andcc %g6, %g2, %g0 /* IEU1 */ |
87 | #else |
88 | andcc %g3, %g2, %g0 /* IEU1 */ |
89 | #endif |
90 | be,a,pt %xcc, 2b /* CTI */ |
91 | ldxa [%o1] ASI_PNF, %o3 /* Load Group */ |
92 | |
93 | addcc %g3, %g1, %o4 /* IEU1 */ |
94 | #ifdef EIGHTBIT_NOT_RARE |
95 | srlx %g6, 32, %g6 /* IEU0 */ |
96 | andcc %g6, %g2, %g0 /* IEU1 Group */ |
97 | #else |
98 | srlx %g3, 32, %g3 /* IEU0 */ |
99 | andcc %g3, %g2, %g0 /* IEU1 Group */ |
100 | #endif |
101 | be,pt %xcc, 3f /* CTI */ |
102 | |
103 | srlx %o4, 56, %o5 /* IEU0 */ |
104 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
105 | be,pn %icc, 4f /* CTI */ |
106 | srlx %o4, 48, %o5 /* IEU0 */ |
107 | |
108 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
109 | be,pn %icc, 4f /* CTI */ |
110 | srlx %o4, 40, %o5 /* IEU0 */ |
111 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
112 | |
113 | be,pn %icc, 4f /* CTI */ |
114 | srlx %o4, 32, %o5 /* IEU0 */ |
115 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
116 | be,pn %icc, 4f /* CTI */ |
117 | |
118 | 3: srlx %o4, 24, %o5 /* IEU0 */ |
119 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
120 | be,pn %icc, 4f /* CTI */ |
121 | srlx %o4, 16, %o5 /* IEU0 */ |
122 | |
123 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
124 | be,pn %icc, 4f /* CTI */ |
125 | srlx %o4, 8, %o5 /* IEU0 */ |
126 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
127 | |
128 | be,pn %icc, 4f /* CTI */ |
129 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
130 | bne,a,pn %icc, 2b /* CTI */ |
131 | ldxa [%o1] ASI_PNF, %o3 /* Load */ |
132 | |
133 | 4: retl /* CTI+IEU1 Group */ |
134 | clr %o0 /* IEU0 */ |
135 | |
136 | .align 16 |
137 | 5: srlx %g4, 56, %o4 /* IEU0 Group */ |
138 | cmp %o2, -8 /* IEU1 */ |
139 | be,pn %XCC, 4b /* CTI */ |
140 | srlx %o3, 56, %o5 /* IEU0 Group */ |
141 | |
142 | andcc %o4, 0xff, %g0 /* IEU1 */ |
143 | be,pn %xcc, 8f /* CTI */ |
144 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
145 | bne,pn %xcc, 8f /* CTI */ |
146 | |
147 | srlx %o3, 48, %o5 /* IEU0 */ |
148 | cmp %o2, -7 /* IEU1 Group */ |
149 | be,pn %XCC, 4b /* CTI */ |
150 | srlx %g4, 48, %o4 /* IEU0 */ |
151 | |
152 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
153 | be,pn %xcc, 8f /* CTI */ |
154 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
155 | bne,pn %xcc, 8f /* CTI */ |
156 | |
157 | srlx %o3, 40, %o5 /* IEU0 */ |
158 | cmp %o2, -6 /* IEU1 Group */ |
159 | be,pn %XCC, 4b /* CTI */ |
160 | srlx %g4, 40, %o4 /* IEU0 */ |
161 | |
162 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
163 | be,pn %xcc, 8f /* CTI */ |
164 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
165 | bne,pn %xcc, 8f /* CTI */ |
166 | |
167 | srlx %o3, 32, %o5 /* IEU0 */ |
168 | cmp %o2, -5 /* IEU1 Group */ |
169 | be,pn %XCC, 4b /* CTI */ |
170 | srlx %g4, 32, %o4 /* IEU0 */ |
171 | |
172 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
173 | be,pn %xcc, 8f /* CTI */ |
174 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
175 | bne,pn %xcc, 8f /* CTI */ |
176 | |
177 | srlx %o3, 24, %o5 /* IEU0 */ |
178 | cmp %o2, -4 /* IEU1 Group */ |
179 | be,pn %XCC, 4b /* CTI */ |
180 | srlx %g4, 24, %o4 /* IEU0 */ |
181 | |
182 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
183 | be,pn %xcc, 8f /* CTI */ |
184 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
185 | bne,pn %xcc, 8f /* CTI */ |
186 | |
187 | srlx %o3, 16, %o5 /* IEU0 */ |
188 | cmp %o2, -3 /* IEU1 Group */ |
189 | be,pn %XCC, 4b /* CTI */ |
190 | srlx %g4, 16, %o4 /* IEU0 */ |
191 | |
192 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
193 | be,pn %xcc, 8f /* CTI */ |
194 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
195 | bne,pn %xcc, 8f /* CTI */ |
196 | |
197 | srlx %o3, 8, %o5 /* IEU0 */ |
198 | cmp %o2, -2 /* IEU1 Group */ |
199 | be,pn %XCC, 4b /* CTI */ |
200 | srlx %g4, 8, %o4 /* IEU0 */ |
201 | |
202 | retl /* CTI+IEU1 Group */ |
203 | sub %o4, %o5, %o0 /* IEU0 */ |
204 | 6: addcc %o3, %o4, %g4 /* IEU1 */ |
205 | 7: srlx %o3, 56, %o5 /* IEU0 */ |
206 | |
207 | srlx %g4, 56, %o4 /* IEU0 Group */ |
208 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
209 | be,pn %xcc, 8f /* CTI */ |
210 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
211 | |
212 | bne,pn %xcc, 8f /* CTI */ |
213 | srlx %o3, 48, %o5 /* IEU0 */ |
214 | srlx %g4, 48, %o4 /* IEU0 Group */ |
215 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
216 | |
217 | be,pn %xcc, 8f /* CTI */ |
218 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
219 | bne,pn %xcc, 8f /* CTI */ |
220 | srlx %o3, 40, %o5 /* IEU0 */ |
221 | |
222 | srlx %g4, 40, %o4 /* IEU0 Group */ |
223 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
224 | be,pn %xcc, 8f /* CTI */ |
225 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
226 | |
227 | bne,pn %xcc, 8f /* CTI */ |
228 | srlx %o3, 32, %o5 /* IEU0 */ |
229 | srlx %g4, 32, %o4 /* IEU0 Group */ |
230 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
231 | |
232 | be,pn %xcc, 8f /* CTI */ |
233 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
234 | bne,pn %xcc, 8f /* CTI */ |
235 | srlx %o3, 24, %o5 /* IEU0 */ |
236 | |
237 | srlx %g4, 24, %o4 /* IEU0 Group */ |
238 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
239 | be,pn %xcc, 8f /* CTI */ |
240 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
241 | |
242 | bne,pn %xcc, 8f /* CTI */ |
243 | srlx %o3, 16, %o5 /* IEU0 */ |
244 | srlx %g4, 16, %o4 /* IEU0 Group */ |
245 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
246 | |
247 | be,pn %xcc, 8f /* CTI */ |
248 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
249 | bne,pn %xcc, 8f /* CTI */ |
250 | srlx %o3, 8, %o5 /* IEU0 */ |
251 | |
252 | srlx %g4, 8, %o4 /* IEU0 Group */ |
253 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
254 | be,pn %xcc, 8f /* CTI */ |
255 | subcc %o4, %o5, %o4 /* IEU1 Group */ |
256 | |
257 | retl /* CTI+IEU1 Group */ |
258 | sub %g4, %o3, %o0 /* IEU0 */ |
259 | 8: retl /* CTI+IEU1 Group */ |
260 | mov %o4, %o0 /* IEU0 */ |
261 | |
262 | 9: ldub [%o0], %g4 /* Load Group */ |
263 | add %o0, 1, %o0 /* IEU0 */ |
264 | ldub [%o1], %o3 /* Load Group */ |
265 | sllx %g1, 32, %g2 /* IEU0 */ |
266 | |
267 | 10: subcc %o2, 1, %o2 /* IEU1 Group */ |
268 | be,pn %XCC, 8b /* CTI */ |
269 | sub %g4, %o3, %o4 /* IEU0 */ |
270 | add %o1, 1, %o1 /* IEU0 Group */ |
271 | |
272 | cmp %g4, %o3 /* IEU1 */ |
273 | bne,pn %xcc, 8b /* CTI */ |
274 | lduba [%o0] ASI_PNF, %g4 /* Load Group */ |
275 | andcc %o3, 0xff, %g0 /* IEU1 */ |
276 | |
277 | be,pn %icc, 4b /* CTI */ |
278 | lduba [%o1] ASI_PNF, %o3 /* Load Group */ |
279 | andcc %o0, 7, %g0 /* IEU1 */ |
280 | bne,a,pn %icc, 10b /* CTI */ |
281 | |
282 | add %o0, 1, %o0 /* IEU0 Group */ |
283 | or %g1, %g2, %g1 /* IEU1 */ |
284 | andcc %o1, 7, %g3 /* IEU1 Group */ |
285 | be,pn %icc, 1b /* CTI */ |
286 | |
287 | ldxa [%o0] ASI_PNF, %g4 /* Load */ |
288 | 11: sllx %g3, 3, %g5 /* IEU0 Group */ |
289 | mov 64, %g6 /* IEU1 */ |
290 | or %g1, %g2, %g1 /* IEU0 Group */ |
291 | sub %o1, %g3, %o1 /* IEU1 */ |
292 | |
293 | sub %g6, %g5, %g6 /* IEU0 Group */ |
294 | ldxa [%o1] ASI_PNF, %o4 /* Load */ |
295 | sllx %g1, 7, %g2 /* IEU1 */ |
296 | add %o1, 8, %o1 /* IEU0 Group */ |
297 | /* %g1 = 0101010101010101 |
298 | %g2 = 8080808080808080 |
299 | %g3 = %o1 alignment |
300 | %g5 = number of bits to shift left |
301 | %g6 = number of bits to shift right */ |
302 | |
303 | 12: sllx %o4, %g5, %o3 /* IEU0 Group */ |
304 | ldxa [%o1] ASI_PNF, %o4 /* Load */ |
305 | add %o1, 8, %o1 /* IEU1 */ |
306 | 13: ldxa [%o0] ASI_PNF, %g4 /* Load Group */ |
307 | |
308 | addcc %o0, 8, %o0 /* IEU1 */ |
309 | srlx %o4, %g6, %o5 /* IEU0 */ |
310 | subcc %o2, 8, %o2 /* IEU1 Group */ |
311 | bcs,pn %XCC, 5b /* CTI */ |
312 | |
313 | or %o3, %o5, %o3 /* IEU0 */ |
314 | cmp %g4, %o3 /* IEU1 Group */ |
315 | bne,pn %xcc, 7b /* CTI */ |
316 | sub %g4, %g1, %o5 /* IEU0 */ |
317 | |
318 | #ifdef EIGHTBIT_NOT_RARE |
319 | andn %o5, %g4, %o5 /* IEU0 Group */ |
320 | #endif |
321 | andcc %o5, %g2, %g0 /* IEU1 Group */ |
322 | be,pt %xcc, 12b /* CTI */ |
323 | srlx %o5, 32, %o5 /* IEU0 */ |
324 | andcc %o5, %g2, %g0 /* IEU1 Group */ |
325 | |
326 | be,pt %xcc, 14f /* CTI */ |
327 | srlx %g4, 56, %o5 /* IEU0 */ |
328 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
329 | be,pn %icc, 4b /* CTI */ |
330 | |
331 | srlx %g4, 48, %o5 /* IEU0 */ |
332 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
333 | be,pn %icc, 4b /* CTI */ |
334 | srlx %g4, 40, %o5 /* IEU0 */ |
335 | |
336 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
337 | be,pn %icc, 4b /* CTI */ |
338 | srlx %g4, 32, %o5 /* IEU0 */ |
339 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
340 | |
341 | be,pn %icc, 4b /* CTI */ |
342 | 14: srlx %g4, 24, %o5 /* IEU0 */ |
343 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
344 | be,pn %icc, 4b /* CTI */ |
345 | |
346 | srlx %g4, 16, %o5 /* IEU0 */ |
347 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
348 | be,pn %icc, 4b /* CTI */ |
349 | srlx %g4, 8, %o5 /* IEU0 */ |
350 | |
351 | andcc %o5, 0xff, %g0 /* IEU1 Group */ |
352 | be,pn %icc, 4b /* CTI */ |
353 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
354 | be,pn %icc, 4b /* CTI */ |
355 | |
356 | sllx %o4, %g5, %o3 /* IEU0 */ |
357 | ldxa [%o1] ASI_PNF, %o4 /* Load Group */ |
358 | ba,pt %xcc, 13b /* CTI */ |
359 | add %o1, 8, %o1 /* IEU0 */ |
360 | END(strncmp) |
361 | libc_hidden_builtin_def (strncmp) |
362 | |