1 | /* strcat (dest, src) -- Append SRC on the end of DEST. |
2 | For SPARC v9. |
3 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include <asm/asi.h> |
22 | #ifndef XCC |
23 | #define XCC xcc |
24 | #define USE_BPR |
25 | .register %g2, #scratch |
26 | .register %g3, #scratch |
27 | .register %g6, #scratch |
28 | #endif |
29 | |
30 | /* Normally, this uses |
31 | ((xword - 0x0101010101010101) & 0x8080808080808080) test |
32 | to find out if any byte in xword could be zero. This is fast, but |
33 | also gives false alarm for any byte in range 0x81-0xff. It does |
34 | not matter for correctness, as if this test tells us there could |
35 | be some zero byte, we check it byte by byte, but if bytes with |
36 | high bits set are common in the strings, then this will give poor |
37 | performance. You can #define EIGHTBIT_NOT_RARE and the algorithm |
38 | will use one tick slower, but more precise test |
39 | ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), |
40 | which does not give any false alarms (but if some bits are set, |
41 | one cannot assume from it which bytes are zero and which are not). |
42 | It is yet to be measured, what is the correct default for glibc |
43 | in these days for an average user. |
44 | */ |
45 | |
46 | .text |
47 | .align 32 |
48 | ENTRY(strcat) |
49 | sethi %hi(0x01010101), %g1 /* IEU0 Group */ |
50 | ldub [%o0], %o3 /* Load */ |
51 | or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ |
52 | mov %o0, %g6 /* IEU1 */ |
53 | |
54 | sllx %g1, 32, %g2 /* IEU0 Group */ |
55 | andcc %o0, 7, %g0 /* IEU1 */ |
56 | or %g1, %g2, %g1 /* IEU0 Group */ |
57 | bne,pn %icc, 32f /* CTI */ |
58 | |
59 | sllx %g1, 7, %g2 /* IEU0 Group */ |
60 | brz,pn %o3, 30f /* CTI+IEU1 */ |
61 | ldx [%o0], %o3 /* Load */ |
62 | 48: add %o0, 8, %o0 /* IEU0 Group */ |
63 | |
64 | 49: sub %o3, %g1, %o2 /* IEU0 Group */ |
65 | #ifdef EIGHTBIT_NOT_RARE |
66 | andn %o2, %o3, %g5 /* IEU0 Group */ |
67 | ldxa [%o0] ASI_PNF, %o3 /* Load */ |
68 | andcc %g5, %g2, %g0 /* IEU1 Group */ |
69 | #else |
70 | ldxa [%o0] ASI_PNF, %o3 /* Load */ |
71 | andcc %o2, %g2, %g0 /* IEU1 Group */ |
72 | #endif |
73 | be,pt %xcc, 49b /* CTI */ |
74 | |
75 | add %o0, 8, %o0 /* IEU0 */ |
76 | addcc %o2, %g1, %g3 /* IEU1 Group */ |
77 | srlx %o2, 32, %o2 /* IEU0 */ |
78 | 50: andcc %o2, %g2, %g0 /* IEU1 Group */ |
79 | |
80 | be,pn %xcc, 51f /* CTI */ |
81 | srlx %g3, 56, %o2 /* IEU0 */ |
82 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
83 | be,pn %icc, 29f /* CTI */ |
84 | |
85 | srlx %g3, 48, %o2 /* IEU0 */ |
86 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
87 | be,pn %icc, 28f /* CTI */ |
88 | srlx %g3, 40, %o2 /* IEU0 */ |
89 | |
90 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
91 | be,pn %icc, 27f /* CTI */ |
92 | srlx %g3, 32, %o2 /* IEU0 */ |
93 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
94 | |
95 | be,pn %icc, 26f /* CTI */ |
96 | 51: srlx %g3, 24, %o2 /* IEU0 */ |
97 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
98 | be,pn %icc, 25f /* CTI */ |
99 | |
100 | srlx %g3, 16, %o2 /* IEU0 */ |
101 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
102 | be,pn %icc, 24f /* CTI */ |
103 | srlx %g3, 8, %o2 /* IEU0 */ |
104 | |
105 | andcc %o2, 0xff, %g0 /* IEU1 Group */ |
106 | be,pn %icc, 23f /* CTI */ |
107 | sub %o3, %g1, %o2 /* IEU0 */ |
108 | andcc %g3, 0xff, %g0 /* IEU1 Group */ |
109 | |
110 | be,pn %icc, 52f /* CTI */ |
111 | ldxa [%o0] ASI_PNF, %o3 /* Load */ |
112 | andcc %o2, %g2, %g0 /* IEU1 Group */ |
113 | be,pt %xcc, 49b /* CTI */ |
114 | |
115 | add %o0, 8, %o0 /* IEU0 */ |
116 | addcc %o2, %g1, %g3 /* IEU1 Group */ |
117 | ba,pt %xcc, 50b /* CTI */ |
118 | srlx %o2, 32, %o2 /* IEU0 */ |
119 | |
120 | .align 16 |
121 | 52: ba,pt %xcc, 12f /* CTI Group */ |
122 | add %o0, -9, %o0 /* IEU0 */ |
123 | 23: ba,pt %xcc, 12f /* CTI Group */ |
124 | add %o0, -10, %o0 /* IEU0 */ |
125 | |
126 | 24: ba,pt %xcc, 12f /* CTI Group */ |
127 | add %o0, -11, %o0 /* IEU0 */ |
128 | 25: ba,pt %xcc, 12f /* CTI Group */ |
129 | add %o0, -12, %o0 /* IEU0 */ |
130 | |
131 | 26: ba,pt %xcc, 12f /* CTI Group */ |
132 | add %o0, -13, %o0 /* IEU0 */ |
133 | 27: ba,pt %xcc, 12f /* CTI Group */ |
134 | add %o0, -14, %o0 /* IEU0 */ |
135 | |
136 | 28: ba,pt %xcc, 12f /* CTI Group */ |
137 | add %o0, -15, %o0 /* IEU0 */ |
138 | 29: add %o0, -16, %o0 /* IEU0 Group */ |
139 | 30: andcc %o1, 7, %g3 /* IEU1 */ |
140 | |
141 | 31: bne,pn %icc, 14f /* CTI */ |
142 | orcc %g0, 64, %g4 /* IEU1 Group */ |
143 | 1: ldx [%o1], %o3 /* Load */ |
144 | add %o1, 8, %o1 /* IEU1 */ |
145 | |
146 | 2: mov %o3, %g3 /* IEU0 Group */ |
147 | 3: sub %o3, %g1, %o2 /* IEU1 */ |
148 | ldxa [%o1] ASI_PNF, %o3 /* Load */ |
149 | #ifdef EIGHTBIT_NOT_RARE |
150 | andn %o2, %g3, %o2 /* IEU0 Group */ |
151 | #endif |
152 | add %o0, 8, %o0 /* IEU0 Group */ |
153 | |
154 | andcc %o2, %g2, %g0 /* IEU1 */ |
155 | add %o1, 8, %o1 /* IEU0 Group */ |
156 | be,a,pt %xcc, 2b /* CTI */ |
157 | stx %g3, [%o0 - 8] /* Store */ |
158 | |
159 | srlx %g3, 56, %g5 /* IEU0 Group */ |
160 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
161 | be,pn %icc, 11f /* CTI */ |
162 | srlx %g3, 48, %g4 /* IEU0 */ |
163 | |
164 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
165 | be,pn %icc, 10f /* CTI */ |
166 | srlx %g3, 40, %g5 /* IEU0 */ |
167 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
168 | |
169 | be,pn %icc, 9f /* CTI */ |
170 | srlx %g3, 32, %g4 /* IEU0 */ |
171 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
172 | be,pn %icc, 8f /* CTI */ |
173 | |
174 | srlx %g3, 24, %g5 /* IEU0 */ |
175 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
176 | be,pn %icc, 7f /* CTI */ |
177 | srlx %g3, 16, %g4 /* IEU0 */ |
178 | |
179 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
180 | be,pn %icc, 6f /* CTI */ |
181 | srlx %g3, 8, %g5 /* IEU0 */ |
182 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
183 | |
184 | be,pn %icc, 5f /* CTI */ |
185 | sub %o3, %g1, %o2 /* IEU0 */ |
186 | stx %g3, [%o0 - 8] /* Store Group */ |
187 | andcc %g3, 0xff, %g0 /* IEU1 */ |
188 | |
189 | bne,pt %icc, 3b /* CTI */ |
190 | mov %o3, %g3 /* IEU0 Group */ |
191 | 4: retl /* CTI+IEU1 Group */ |
192 | mov %g6, %o0 /* IEU0 */ |
193 | |
194 | .align 16 |
195 | 5: stb %g5, [%o0 - 2] /* Store Group */ |
196 | srlx %g3, 16, %g4 /* IEU0 */ |
197 | 6: sth %g4, [%o0 - 4] /* Store Group */ |
198 | srlx %g3, 32, %g4 /* IEU0 */ |
199 | |
200 | stw %g4, [%o0 - 8] /* Store Group */ |
201 | retl /* CTI+IEU1 Group */ |
202 | mov %g6, %o0 /* IEU0 */ |
203 | 7: stb %g5, [%o0 - 4] /* Store Group */ |
204 | |
205 | srlx %g3, 32, %g4 /* IEU0 */ |
206 | 8: stw %g4, [%o0 - 8] /* Store Group */ |
207 | retl /* CTI+IEU1 Group */ |
208 | mov %g6, %o0 /* IEU0 */ |
209 | |
210 | 9: stb %g5, [%o0 - 6] /* Store Group */ |
211 | srlx %g3, 48, %g4 /* IEU0 */ |
212 | 10: sth %g4, [%o0 - 8] /* Store Group */ |
213 | retl /* CTI+IEU1 Group */ |
214 | |
215 | mov %g6, %o0 /* IEU0 */ |
216 | 11: stb %g5, [%o0 - 8] /* Store Group */ |
217 | retl /* CTI+IEU1 Group */ |
218 | mov %g6, %o0 /* IEU0 */ |
219 | |
220 | .align 16 |
221 | 32: andcc %o0, 7, %g0 /* IEU1 Group */ |
222 | be,a,pn %icc, 48b /* CTI */ |
223 | ldx [%o0], %o3 /* Load */ |
224 | add %o0, 1, %o0 /* IEU0 Group */ |
225 | |
226 | brnz,a,pt %o3, 32b /* CTI+IEU1 */ |
227 | lduba [%o0] ASI_PNF, %o3 /* Load */ |
228 | add %o0, -1, %o0 /* IEU0 Group */ |
229 | andcc %o0, 7, %g0 /* IEU1 Group */ |
230 | |
231 | be,a,pn %icc, 31b /* CTI */ |
232 | andcc %o1, 7, %g3 /* IEU1 Group */ |
233 | 12: ldub [%o1], %o3 /* Load */ |
234 | stb %o3, [%o0] /* Store Group */ |
235 | |
236 | 13: add %o0, 1, %o0 /* IEU0 */ |
237 | add %o1, 1, %o1 /* IEU1 */ |
238 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
239 | be,pn %icc, 4b /* CTI */ |
240 | |
241 | lduba [%o1] ASI_PNF, %o3 /* Load */ |
242 | andcc %o0, 7, %g0 /* IEU1 Group */ |
243 | bne,a,pt %icc, 13b /* CTI */ |
244 | stb %o3, [%o0] /* Store */ |
245 | |
246 | andcc %o1, 7, %g3 /* IEU1 Group */ |
247 | be,a,pt %icc, 1b /* CTI */ |
248 | ldx [%o1], %o3 /* Load */ |
249 | orcc %g0, 64, %g4 /* IEU1 Group */ |
250 | |
251 | 14: sllx %g3, 3, %g5 /* IEU0 */ |
252 | sub %o1, %g3, %o1 /* IEU0 Group */ |
253 | sub %g4, %g5, %g4 /* IEU1 */ |
254 | /* %g1 = 0101010101010101 * |
255 | * %g2 = 8080808080808080 * |
256 | * %g3 = source alignment * |
257 | * %g5 = number of bits to shift left * |
258 | * %g4 = number of bits to shift right */ |
259 | ldxa [%o1] ASI_PNF, %o5 /* Load Group */ |
260 | |
261 | addcc %o1, 8, %o1 /* IEU1 */ |
262 | 15: sllx %o5, %g5, %o3 /* IEU0 Group */ |
263 | ldxa [%o1] ASI_PNF, %o5 /* Load */ |
264 | srlx %o5, %g4, %o4 /* IEU0 Group */ |
265 | |
266 | add %o0, 8, %o0 /* IEU1 */ |
267 | or %o3, %o4, %o3 /* IEU0 Group */ |
268 | add %o1, 8, %o1 /* IEU1 */ |
269 | sub %o3, %g1, %o4 /* IEU0 Group */ |
270 | |
271 | #ifdef EIGHTBIT_NOT_RARE |
272 | andn %o4, %o3, %o4 /* IEU0 Group */ |
273 | #endif |
274 | andcc %o4, %g2, %g0 /* IEU1 Group */ |
275 | be,a,pt %xcc, 15b /* CTI */ |
276 | stx %o3, [%o0 - 8] /* Store */ |
277 | srlx %o3, 56, %o4 /* IEU0 Group */ |
278 | |
279 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
280 | be,pn %icc, 22f /* CTI */ |
281 | srlx %o3, 48, %o4 /* IEU0 */ |
282 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
283 | |
284 | be,pn %icc, 21f /* CTI */ |
285 | srlx %o3, 40, %o4 /* IEU0 */ |
286 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
287 | be,pn %icc, 20f /* CTI */ |
288 | |
289 | srlx %o3, 32, %o4 /* IEU0 */ |
290 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
291 | be,pn %icc, 19f /* CTI */ |
292 | srlx %o3, 24, %o4 /* IEU0 */ |
293 | |
294 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
295 | be,pn %icc, 18f /* CTI */ |
296 | srlx %o3, 16, %o4 /* IEU0 */ |
297 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
298 | |
299 | be,pn %icc, 17f /* CTI */ |
300 | srlx %o3, 8, %o4 /* IEU0 */ |
301 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
302 | be,pn %icc, 16f /* CTI */ |
303 | |
304 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
305 | bne,pn %icc, 15b /* CTI */ |
306 | stx %o3, [%o0 - 8] /* Store */ |
307 | retl /* CTI+IEU1 Group */ |
308 | |
309 | mov %g6, %o0 /* IEU0 */ |
310 | |
311 | .align 16 |
312 | 16: srlx %o3, 8, %o4 /* IEU0 Group */ |
313 | stb %o4, [%o0 - 2] /* Store */ |
314 | 17: srlx %o3, 16, %o4 /* IEU0 Group */ |
315 | stb %o4, [%o0 - 3] /* Store */ |
316 | |
317 | 18: srlx %o3, 24, %o4 /* IEU0 Group */ |
318 | stb %o4, [%o0 - 4] /* Store */ |
319 | 19: srlx %o3, 32, %o4 /* IEU0 Group */ |
320 | stw %o4, [%o0 - 8] /* Store */ |
321 | |
322 | retl /* CTI+IEU1 Group */ |
323 | mov %g6, %o0 /* IEU0 */ |
324 | nop |
325 | nop |
326 | |
327 | 20: srlx %o3, 40, %o4 /* IEU0 Group */ |
328 | stb %o4, [%o0 - 6] /* Store */ |
329 | 21: srlx %o3, 48, %o4 /* IEU0 Group */ |
330 | stb %o4, [%o0 - 7] /* Store */ |
331 | |
332 | 22: srlx %o3, 56, %o4 /* IEU0 Group */ |
333 | stb %o4, [%o0 - 8] /* Store */ |
334 | retl /* CTI+IEU1 Group */ |
335 | mov %g6, %o0 /* IEU0 */ |
336 | END(strcat) |
337 | libc_hidden_builtin_def (strcat) |
338 | |