1/* Compare two strings for differences.
2 For SPARC v9.
3 Copyright (C) 2011-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22
23#ifndef XCC
24 .register %g2, #scratch
25 .register %g3, #scratch
26 .register %g6, #scratch
27#endif
28
29#define rSTR1 %o0
30#define rSTR2 %o1
31#define r0101 %o2 /* 0x0101010101010101 */
32#define r8080 %o3 /* 0x8080808080808080 */
33#define rSTRXOR %o4
34#define rWORD1 %o5
35#define rTMP1 %g1
36#define rTMP2 %g2
37#define rWORD2 %g3
38#define rSLL %g4
39#define rSRL %g5
40#define rBARREL %g6
41
42 /* There are two cases, either the two pointers are aligned
43 * identically or they are not. If they have the same
44 * alignment we can use the normal full speed loop. Otherwise
45 * we have to use the barrel-shifter version.
46 */
47
48 .text
49 .align 32
50ENTRY(strcmp)
51 or rSTR2, rSTR1, rTMP1
52 sethi %hi(0x80808080), r8080
53
54 andcc rTMP1, 0x7, %g0
55 bne,pn %icc, .Lmaybe_barrel_shift
56 or r8080, %lo(0x80808080), r8080
57 ldx [rSTR1], rWORD1
58
59 sub rSTR2, rSTR1, rSTR2
60 sllx r8080, 32, rTMP1
61
62 ldx [rSTR1 + rSTR2], rWORD2
63 or r8080, rTMP1, r8080
64
65 ba,pt %xcc, .Laligned_loop_entry
66 srlx r8080, 7, r0101
67
68 .align 32
69.Laligned_loop_entry:
70.Laligned_loop:
71 add rSTR1, 8, rSTR1
72
73 sub rWORD1, r0101, rTMP2
74 xorcc rWORD1, rWORD2, rSTRXOR
75 bne,pn %xcc, .Lcommon_endstring
76
77 andn r8080, rWORD1, rTMP1
78
79 ldxa [rSTR1] ASI_PNF, rWORD1
80 andcc rTMP1, rTMP2, %g0
81 be,a,pt %xcc, .Laligned_loop
82
83 ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2
84
85.Lcommon_equal:
86 retl
87 mov 0, %o0
88
89 /* All loops terminate here once they find an unequal word.
90 * If a zero byte appears in the word before the first unequal
91 * byte, we must report zero. Otherwise we report '1' or '-1'
92 * depending upon whether the first mis-matching byte is larger
93 * in the first string or the second, respectively.
94 *
95 * First we compute a 64-bit mask value that has "0x01" in
96 * each byte where a zero exists in rWORD1. rSTRXOR holds the
97 * value (rWORD1 ^ rWORD2). Therefore, if considered as an
98 * unsigned quantity, our "0x01" mask value is "greater than"
99 * rSTRXOR then a zero terminating byte comes first and
100 * therefore we report '0'.
101 *
102 * The formula for this mask is:
103 *
104 * mask_tmp1 = ~rWORD1 & 0x8080808080808080;
105 * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
106 * 0x7f7f7f7f7f7f7f7f);
107 *
108 * mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
109 */
110.Lcommon_endstring:
111 andn rWORD1, r8080, rTMP2
112 or r8080, 1, %o1
113
114 mov 1, %o0
115 sub rTMP2, %o1, rTMP2
116
117 cmp rWORD1, rWORD2
118 andn rTMP1, rTMP2, rTMP1
119
120 movleu %xcc, -1, %o0
121 srlx rTMP1, 7, rTMP1
122
123 /* In order not to be influenced by bytes after the zero byte, we
124 * have to retain only the highest bit in the mask for the comparison
125 * with rSTRXOR to work properly.
126 */
127 mov 0, rTMP2
128 andcc rTMP1, 0x0100, %g0
129
130 movne %xcc, 8, rTMP2
131 sllx rTMP1, 63 - 16, %o1
132
133 movrlz %o1, 16, rTMP2
134 sllx rTMP1, 63 - 24, %o1
135
136 movrlz %o1, 24, rTMP2
137 sllx rTMP1, 63 - 32, %o1
138
139 movrlz %o1, 32, rTMP2
140 sllx rTMP1, 63 - 40, %o1
141
142 movrlz %o1, 40, rTMP2
143 sllx rTMP1, 63 - 48, %o1
144
145 movrlz %o1, 48, rTMP2
146 sllx rTMP1, 63 - 56, %o1
147
148 movrlz %o1, 56, rTMP2
149
150 srlx rTMP1, rTMP2, rTMP1
151
152 sllx rTMP1, rTMP2, rTMP1
153
154 cmp rTMP1, rSTRXOR
155 retl
156 movgu %xcc, 0, %o0
157
158.Lmaybe_barrel_shift:
159 sub rSTR2, rSTR1, rSTR2
160 sllx r8080, 32, rTMP1
161
162 or r8080, rTMP1, r8080
163 and rSTR1, 0x7, rTMP2
164
165 srlx r8080, 7, r0101
166 andn rSTR1, 0x7, rSTR1
167
168 ldxa [rSTR1] ASI_PNF, rWORD1
169 andcc rSTR2, 0x7, rSLL
170 sll rTMP2, 3, rSTRXOR
171
172 bne,pn %icc, .Lneed_barrel_shift
173 mov -1, rTMP1
174 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
175
176 srlx rTMP1, rSTRXOR, rTMP2
177
178 orn rWORD1, rTMP2, rWORD1
179 ba,pt %xcc, .Laligned_loop_entry
180 orn rBARREL, rTMP2, rWORD2
181
182.Lneed_barrel_shift:
183 sllx rSLL, 3, rSLL
184 andn rSTR2, 0x7, rSTR2
185
186 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
187 mov 64, rTMP2
188 sub rTMP2, rSLL, rSRL
189
190 srlx rTMP1, rSTRXOR, rTMP1
191 add rSTR2, 8, rSTR2
192
193 orn rWORD1, rTMP1, rWORD1
194 sllx rBARREL, rSLL, rWORD2
195 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
196
197 add rSTR1, 8, rSTR1
198 sub rWORD1, r0101, rTMP2
199
200 srlx rBARREL, rSRL, rSTRXOR
201
202 or rWORD2, rSTRXOR, rWORD2
203
204 orn rWORD2, rTMP1, rWORD2
205 ba,pt %xcc, .Lbarrel_shift_loop_entry
206 andn r8080, rWORD1, rTMP1
207
208.Lbarrel_shift_loop:
209 sllx rBARREL, rSLL, rWORD2
210 ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL
211
212 add rSTR1, 8, rSTR1
213 sub rWORD1, r0101, rTMP2
214
215 srlx rBARREL, rSRL, rSTRXOR
216 andn r8080, rWORD1, rTMP1
217
218 or rWORD2, rSTRXOR, rWORD2
219
220.Lbarrel_shift_loop_entry:
221 xorcc rWORD1, rWORD2, rSTRXOR
222 bne,pn %xcc, .Lcommon_endstring
223
224 andcc rTMP1, rTMP2, %g0
225 be,a,pt %xcc, .Lbarrel_shift_loop
226 ldxa [rSTR1] ASI_PNF, rWORD1
227
228 retl
229 mov 0, %o0
230END(strcmp)
231libc_hidden_builtin_def (strcmp)
232

source code of glibc/sysdeps/sparc/sparc64/strcmp.S