1 | /* Compare two strings for differences. |
2 | For SPARC v9. |
3 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include <asm/asi.h> |
22 | |
23 | #ifndef XCC |
24 | .register %g2, #scratch |
25 | .register %g3, #scratch |
26 | .register %g6, #scratch |
27 | #endif |
28 | |
29 | #define rSTR1 %o0 |
30 | #define rSTR2 %o1 |
31 | #define r0101 %o2 /* 0x0101010101010101 */ |
32 | #define r8080 %o3 /* 0x8080808080808080 */ |
33 | #define rSTRXOR %o4 |
34 | #define rWORD1 %o5 |
35 | #define rTMP1 %g1 |
36 | #define rTMP2 %g2 |
37 | #define rWORD2 %g3 |
38 | #define rSLL %g4 |
39 | #define rSRL %g5 |
40 | #define rBARREL %g6 |
41 | |
42 | /* There are two cases, either the two pointers are aligned |
43 | * identically or they are not. If they have the same |
44 | * alignment we can use the normal full speed loop. Otherwise |
45 | * we have to use the barrel-shifter version. |
46 | */ |
47 | |
48 | .text |
49 | .align 32 |
50 | ENTRY(strcmp) |
51 | or rSTR2, rSTR1, rTMP1 |
52 | sethi %hi(0x80808080), r8080 |
53 | |
54 | andcc rTMP1, 0x7, %g0 |
55 | bne,pn %icc, .Lmaybe_barrel_shift |
56 | or r8080, %lo(0x80808080), r8080 |
57 | ldx [rSTR1], rWORD1 |
58 | |
59 | sub rSTR2, rSTR1, rSTR2 |
60 | sllx r8080, 32, rTMP1 |
61 | |
62 | ldx [rSTR1 + rSTR2], rWORD2 |
63 | or r8080, rTMP1, r8080 |
64 | |
65 | ba,pt %xcc, .Laligned_loop_entry |
66 | srlx r8080, 7, r0101 |
67 | |
68 | .align 32 |
69 | .Laligned_loop_entry: |
70 | .Laligned_loop: |
71 | add rSTR1, 8, rSTR1 |
72 | |
73 | sub rWORD1, r0101, rTMP2 |
74 | xorcc rWORD1, rWORD2, rSTRXOR |
75 | bne,pn %xcc, .Lcommon_endstring |
76 | |
77 | andn r8080, rWORD1, rTMP1 |
78 | |
79 | ldxa [rSTR1] ASI_PNF, rWORD1 |
80 | andcc rTMP1, rTMP2, %g0 |
81 | be,a,pt %xcc, .Laligned_loop |
82 | |
83 | ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2 |
84 | |
85 | .Lcommon_equal: |
86 | retl |
87 | mov 0, %o0 |
88 | |
89 | /* All loops terminate here once they find an unequal word. |
90 | * If a zero byte appears in the word before the first unequal |
91 | * byte, we must report zero. Otherwise we report '1' or '-1' |
92 | * depending upon whether the first mis-matching byte is larger |
93 | * in the first string or the second, respectively. |
94 | * |
95 | * First we compute a 64-bit mask value that has "0x01" in |
96 | * each byte where a zero exists in rWORD1. rSTRXOR holds the |
97 | * value (rWORD1 ^ rWORD2). Therefore, if considered as an |
98 | * unsigned quantity, our "0x01" mask value is "greater than" |
99 | * rSTRXOR then a zero terminating byte comes first and |
100 | * therefore we report '0'. |
101 | * |
102 | * The formula for this mask is: |
103 | * |
104 | * mask_tmp1 = ~rWORD1 & 0x8080808080808080; |
105 | * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) + |
106 | * 0x7f7f7f7f7f7f7f7f); |
107 | * |
108 | * mask = ((mask_tmp1 & ~mask_tmp2) >> 7); |
109 | */ |
110 | .Lcommon_endstring: |
111 | andn rWORD1, r8080, rTMP2 |
112 | or r8080, 1, %o1 |
113 | |
114 | mov 1, %o0 |
115 | sub rTMP2, %o1, rTMP2 |
116 | |
117 | cmp rWORD1, rWORD2 |
118 | andn rTMP1, rTMP2, rTMP1 |
119 | |
120 | movleu %xcc, -1, %o0 |
121 | srlx rTMP1, 7, rTMP1 |
122 | |
123 | /* In order not to be influenced by bytes after the zero byte, we |
124 | * have to retain only the highest bit in the mask for the comparison |
125 | * with rSTRXOR to work properly. |
126 | */ |
127 | mov 0, rTMP2 |
128 | andcc rTMP1, 0x0100, %g0 |
129 | |
130 | movne %xcc, 8, rTMP2 |
131 | sllx rTMP1, 63 - 16, %o1 |
132 | |
133 | movrlz %o1, 16, rTMP2 |
134 | sllx rTMP1, 63 - 24, %o1 |
135 | |
136 | movrlz %o1, 24, rTMP2 |
137 | sllx rTMP1, 63 - 32, %o1 |
138 | |
139 | movrlz %o1, 32, rTMP2 |
140 | sllx rTMP1, 63 - 40, %o1 |
141 | |
142 | movrlz %o1, 40, rTMP2 |
143 | sllx rTMP1, 63 - 48, %o1 |
144 | |
145 | movrlz %o1, 48, rTMP2 |
146 | sllx rTMP1, 63 - 56, %o1 |
147 | |
148 | movrlz %o1, 56, rTMP2 |
149 | |
150 | srlx rTMP1, rTMP2, rTMP1 |
151 | |
152 | sllx rTMP1, rTMP2, rTMP1 |
153 | |
154 | cmp rTMP1, rSTRXOR |
155 | retl |
156 | movgu %xcc, 0, %o0 |
157 | |
158 | .Lmaybe_barrel_shift: |
159 | sub rSTR2, rSTR1, rSTR2 |
160 | sllx r8080, 32, rTMP1 |
161 | |
162 | or r8080, rTMP1, r8080 |
163 | and rSTR1, 0x7, rTMP2 |
164 | |
165 | srlx r8080, 7, r0101 |
166 | andn rSTR1, 0x7, rSTR1 |
167 | |
168 | ldxa [rSTR1] ASI_PNF, rWORD1 |
169 | andcc rSTR2, 0x7, rSLL |
170 | sll rTMP2, 3, rSTRXOR |
171 | |
172 | bne,pn %icc, .Lneed_barrel_shift |
173 | mov -1, rTMP1 |
174 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL |
175 | |
176 | srlx rTMP1, rSTRXOR, rTMP2 |
177 | |
178 | orn rWORD1, rTMP2, rWORD1 |
179 | ba,pt %xcc, .Laligned_loop_entry |
180 | orn rBARREL, rTMP2, rWORD2 |
181 | |
182 | .Lneed_barrel_shift: |
183 | sllx rSLL, 3, rSLL |
184 | andn rSTR2, 0x7, rSTR2 |
185 | |
186 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL |
187 | mov 64, rTMP2 |
188 | sub rTMP2, rSLL, rSRL |
189 | |
190 | srlx rTMP1, rSTRXOR, rTMP1 |
191 | add rSTR2, 8, rSTR2 |
192 | |
193 | orn rWORD1, rTMP1, rWORD1 |
194 | sllx rBARREL, rSLL, rWORD2 |
195 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL |
196 | |
197 | add rSTR1, 8, rSTR1 |
198 | sub rWORD1, r0101, rTMP2 |
199 | |
200 | srlx rBARREL, rSRL, rSTRXOR |
201 | |
202 | or rWORD2, rSTRXOR, rWORD2 |
203 | |
204 | orn rWORD2, rTMP1, rWORD2 |
205 | ba,pt %xcc, .Lbarrel_shift_loop_entry |
206 | andn r8080, rWORD1, rTMP1 |
207 | |
208 | .Lbarrel_shift_loop: |
209 | sllx rBARREL, rSLL, rWORD2 |
210 | ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL |
211 | |
212 | add rSTR1, 8, rSTR1 |
213 | sub rWORD1, r0101, rTMP2 |
214 | |
215 | srlx rBARREL, rSRL, rSTRXOR |
216 | andn r8080, rWORD1, rTMP1 |
217 | |
218 | or rWORD2, rSTRXOR, rWORD2 |
219 | |
220 | .Lbarrel_shift_loop_entry: |
221 | xorcc rWORD1, rWORD2, rSTRXOR |
222 | bne,pn %xcc, .Lcommon_endstring |
223 | |
224 | andcc rTMP1, rTMP2, %g0 |
225 | be,a,pt %xcc, .Lbarrel_shift_loop |
226 | ldxa [rSTR1] ASI_PNF, rWORD1 |
227 | |
228 | retl |
229 | mov 0, %o0 |
230 | END(strcmp) |
231 | libc_hidden_builtin_def (strcmp) |
232 | |