1/* strcat (dest, src) -- Append SRC on the end of DEST.
2 For SPARC v9.
3 Copyright (C) 1998-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23#define XCC xcc
24#define USE_BPR
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28#endif
29
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
44 */
45
46 .text
47 .align 32
48ENTRY(strcat)
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 ldub [%o0], %o3 /* Load */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
52 mov %o0, %g6 /* IEU1 */
53
54 sllx %g1, 32, %g2 /* IEU0 Group */
55 andcc %o0, 7, %g0 /* IEU1 */
56 or %g1, %g2, %g1 /* IEU0 Group */
57 bne,pn %icc, 32f /* CTI */
58
59 sllx %g1, 7, %g2 /* IEU0 Group */
60 brz,pn %o3, 30f /* CTI+IEU1 */
61 ldx [%o0], %o3 /* Load */
6248: add %o0, 8, %o0 /* IEU0 Group */
63
6449: sub %o3, %g1, %o2 /* IEU0 Group */
65#ifdef EIGHTBIT_NOT_RARE
66 andn %o2, %o3, %g5 /* IEU0 Group */
67 ldxa [%o0] ASI_PNF, %o3 /* Load */
68 andcc %g5, %g2, %g0 /* IEU1 Group */
69#else
70 ldxa [%o0] ASI_PNF, %o3 /* Load */
71 andcc %o2, %g2, %g0 /* IEU1 Group */
72#endif
73 be,pt %xcc, 49b /* CTI */
74
75 add %o0, 8, %o0 /* IEU0 */
76 addcc %o2, %g1, %g3 /* IEU1 Group */
77 srlx %o2, 32, %o2 /* IEU0 */
7850: andcc %o2, %g2, %g0 /* IEU1 Group */
79
80 be,pn %xcc, 51f /* CTI */
81 srlx %g3, 56, %o2 /* IEU0 */
82 andcc %o2, 0xff, %g0 /* IEU1 Group */
83 be,pn %icc, 29f /* CTI */
84
85 srlx %g3, 48, %o2 /* IEU0 */
86 andcc %o2, 0xff, %g0 /* IEU1 Group */
87 be,pn %icc, 28f /* CTI */
88 srlx %g3, 40, %o2 /* IEU0 */
89
90 andcc %o2, 0xff, %g0 /* IEU1 Group */
91 be,pn %icc, 27f /* CTI */
92 srlx %g3, 32, %o2 /* IEU0 */
93 andcc %o2, 0xff, %g0 /* IEU1 Group */
94
95 be,pn %icc, 26f /* CTI */
9651: srlx %g3, 24, %o2 /* IEU0 */
97 andcc %o2, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 25f /* CTI */
99
100 srlx %g3, 16, %o2 /* IEU0 */
101 andcc %o2, 0xff, %g0 /* IEU1 Group */
102 be,pn %icc, 24f /* CTI */
103 srlx %g3, 8, %o2 /* IEU0 */
104
105 andcc %o2, 0xff, %g0 /* IEU1 Group */
106 be,pn %icc, 23f /* CTI */
107 sub %o3, %g1, %o2 /* IEU0 */
108 andcc %g3, 0xff, %g0 /* IEU1 Group */
109
110 be,pn %icc, 52f /* CTI */
111 ldxa [%o0] ASI_PNF, %o3 /* Load */
112 andcc %o2, %g2, %g0 /* IEU1 Group */
113 be,pt %xcc, 49b /* CTI */
114
115 add %o0, 8, %o0 /* IEU0 */
116 addcc %o2, %g1, %g3 /* IEU1 Group */
117 ba,pt %xcc, 50b /* CTI */
118 srlx %o2, 32, %o2 /* IEU0 */
119
120 .align 16
12152: ba,pt %xcc, 12f /* CTI Group */
122 add %o0, -9, %o0 /* IEU0 */
12323: ba,pt %xcc, 12f /* CTI Group */
124 add %o0, -10, %o0 /* IEU0 */
125
12624: ba,pt %xcc, 12f /* CTI Group */
127 add %o0, -11, %o0 /* IEU0 */
12825: ba,pt %xcc, 12f /* CTI Group */
129 add %o0, -12, %o0 /* IEU0 */
130
13126: ba,pt %xcc, 12f /* CTI Group */
132 add %o0, -13, %o0 /* IEU0 */
13327: ba,pt %xcc, 12f /* CTI Group */
134 add %o0, -14, %o0 /* IEU0 */
135
13628: ba,pt %xcc, 12f /* CTI Group */
137 add %o0, -15, %o0 /* IEU0 */
13829: add %o0, -16, %o0 /* IEU0 Group */
13930: andcc %o1, 7, %g3 /* IEU1 */
140
14131: bne,pn %icc, 14f /* CTI */
142 orcc %g0, 64, %g4 /* IEU1 Group */
1431: ldx [%o1], %o3 /* Load */
144 add %o1, 8, %o1 /* IEU1 */
145
1462: mov %o3, %g3 /* IEU0 Group */
1473: sub %o3, %g1, %o2 /* IEU1 */
148 ldxa [%o1] ASI_PNF, %o3 /* Load */
149#ifdef EIGHTBIT_NOT_RARE
150 andn %o2, %g3, %o2 /* IEU0 Group */
151#endif
152 add %o0, 8, %o0 /* IEU0 Group */
153
154 andcc %o2, %g2, %g0 /* IEU1 */
155 add %o1, 8, %o1 /* IEU0 Group */
156 be,a,pt %xcc, 2b /* CTI */
157 stx %g3, [%o0 - 8] /* Store */
158
159 srlx %g3, 56, %g5 /* IEU0 Group */
160 andcc %g5, 0xff, %g0 /* IEU1 Group */
161 be,pn %icc, 11f /* CTI */
162 srlx %g3, 48, %g4 /* IEU0 */
163
164 andcc %g4, 0xff, %g0 /* IEU1 Group */
165 be,pn %icc, 10f /* CTI */
166 srlx %g3, 40, %g5 /* IEU0 */
167 andcc %g5, 0xff, %g0 /* IEU1 Group */
168
169 be,pn %icc, 9f /* CTI */
170 srlx %g3, 32, %g4 /* IEU0 */
171 andcc %g4, 0xff, %g0 /* IEU1 Group */
172 be,pn %icc, 8f /* CTI */
173
174 srlx %g3, 24, %g5 /* IEU0 */
175 andcc %g5, 0xff, %g0 /* IEU1 Group */
176 be,pn %icc, 7f /* CTI */
177 srlx %g3, 16, %g4 /* IEU0 */
178
179 andcc %g4, 0xff, %g0 /* IEU1 Group */
180 be,pn %icc, 6f /* CTI */
181 srlx %g3, 8, %g5 /* IEU0 */
182 andcc %g5, 0xff, %g0 /* IEU1 Group */
183
184 be,pn %icc, 5f /* CTI */
185 sub %o3, %g1, %o2 /* IEU0 */
186 stx %g3, [%o0 - 8] /* Store Group */
187 andcc %g3, 0xff, %g0 /* IEU1 */
188
189 bne,pt %icc, 3b /* CTI */
190 mov %o3, %g3 /* IEU0 Group */
1914: retl /* CTI+IEU1 Group */
192 mov %g6, %o0 /* IEU0 */
193
194 .align 16
1955: stb %g5, [%o0 - 2] /* Store Group */
196 srlx %g3, 16, %g4 /* IEU0 */
1976: sth %g4, [%o0 - 4] /* Store Group */
198 srlx %g3, 32, %g4 /* IEU0 */
199
200 stw %g4, [%o0 - 8] /* Store Group */
201 retl /* CTI+IEU1 Group */
202 mov %g6, %o0 /* IEU0 */
2037: stb %g5, [%o0 - 4] /* Store Group */
204
205 srlx %g3, 32, %g4 /* IEU0 */
2068: stw %g4, [%o0 - 8] /* Store Group */
207 retl /* CTI+IEU1 Group */
208 mov %g6, %o0 /* IEU0 */
209
2109: stb %g5, [%o0 - 6] /* Store Group */
211 srlx %g3, 48, %g4 /* IEU0 */
21210: sth %g4, [%o0 - 8] /* Store Group */
213 retl /* CTI+IEU1 Group */
214
215 mov %g6, %o0 /* IEU0 */
21611: stb %g5, [%o0 - 8] /* Store Group */
217 retl /* CTI+IEU1 Group */
218 mov %g6, %o0 /* IEU0 */
219
220 .align 16
22132: andcc %o0, 7, %g0 /* IEU1 Group */
222 be,a,pn %icc, 48b /* CTI */
223 ldx [%o0], %o3 /* Load */
224 add %o0, 1, %o0 /* IEU0 Group */
225
226 brnz,a,pt %o3, 32b /* CTI+IEU1 */
227 lduba [%o0] ASI_PNF, %o3 /* Load */
228 add %o0, -1, %o0 /* IEU0 Group */
229 andcc %o0, 7, %g0 /* IEU1 Group */
230
231 be,a,pn %icc, 31b /* CTI */
232 andcc %o1, 7, %g3 /* IEU1 Group */
23312: ldub [%o1], %o3 /* Load */
234 stb %o3, [%o0] /* Store Group */
235
23613: add %o0, 1, %o0 /* IEU0 */
237 add %o1, 1, %o1 /* IEU1 */
238 andcc %o3, 0xff, %g0 /* IEU1 Group */
239 be,pn %icc, 4b /* CTI */
240
241 lduba [%o1] ASI_PNF, %o3 /* Load */
242 andcc %o0, 7, %g0 /* IEU1 Group */
243 bne,a,pt %icc, 13b /* CTI */
244 stb %o3, [%o0] /* Store */
245
246 andcc %o1, 7, %g3 /* IEU1 Group */
247 be,a,pt %icc, 1b /* CTI */
248 ldx [%o1], %o3 /* Load */
249 orcc %g0, 64, %g4 /* IEU1 Group */
250
25114: sllx %g3, 3, %g5 /* IEU0 */
252 sub %o1, %g3, %o1 /* IEU0 Group */
253 sub %g4, %g5, %g4 /* IEU1 */
254 /* %g1 = 0101010101010101 *
255 * %g2 = 8080808080808080 *
256 * %g3 = source alignment *
257 * %g5 = number of bits to shift left *
258 * %g4 = number of bits to shift right */
259 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
260
261 addcc %o1, 8, %o1 /* IEU1 */
26215: sllx %o5, %g5, %o3 /* IEU0 Group */
263 ldxa [%o1] ASI_PNF, %o5 /* Load */
264 srlx %o5, %g4, %o4 /* IEU0 Group */
265
266 add %o0, 8, %o0 /* IEU1 */
267 or %o3, %o4, %o3 /* IEU0 Group */
268 add %o1, 8, %o1 /* IEU1 */
269 sub %o3, %g1, %o4 /* IEU0 Group */
270
271#ifdef EIGHTBIT_NOT_RARE
272 andn %o4, %o3, %o4 /* IEU0 Group */
273#endif
274 andcc %o4, %g2, %g0 /* IEU1 Group */
275 be,a,pt %xcc, 15b /* CTI */
276 stx %o3, [%o0 - 8] /* Store */
277 srlx %o3, 56, %o4 /* IEU0 Group */
278
279 andcc %o4, 0xff, %g0 /* IEU1 Group */
280 be,pn %icc, 22f /* CTI */
281 srlx %o3, 48, %o4 /* IEU0 */
282 andcc %o4, 0xff, %g0 /* IEU1 Group */
283
284 be,pn %icc, 21f /* CTI */
285 srlx %o3, 40, %o4 /* IEU0 */
286 andcc %o4, 0xff, %g0 /* IEU1 Group */
287 be,pn %icc, 20f /* CTI */
288
289 srlx %o3, 32, %o4 /* IEU0 */
290 andcc %o4, 0xff, %g0 /* IEU1 Group */
291 be,pn %icc, 19f /* CTI */
292 srlx %o3, 24, %o4 /* IEU0 */
293
294 andcc %o4, 0xff, %g0 /* IEU1 Group */
295 be,pn %icc, 18f /* CTI */
296 srlx %o3, 16, %o4 /* IEU0 */
297 andcc %o4, 0xff, %g0 /* IEU1 Group */
298
299 be,pn %icc, 17f /* CTI */
300 srlx %o3, 8, %o4 /* IEU0 */
301 andcc %o4, 0xff, %g0 /* IEU1 Group */
302 be,pn %icc, 16f /* CTI */
303
304 andcc %o3, 0xff, %g0 /* IEU1 Group */
305 bne,pn %icc, 15b /* CTI */
306 stx %o3, [%o0 - 8] /* Store */
307 retl /* CTI+IEU1 Group */
308
309 mov %g6, %o0 /* IEU0 */
310
311 .align 16
31216: srlx %o3, 8, %o4 /* IEU0 Group */
313 stb %o4, [%o0 - 2] /* Store */
31417: srlx %o3, 16, %o4 /* IEU0 Group */
315 stb %o4, [%o0 - 3] /* Store */
316
31718: srlx %o3, 24, %o4 /* IEU0 Group */
318 stb %o4, [%o0 - 4] /* Store */
31919: srlx %o3, 32, %o4 /* IEU0 Group */
320 stw %o4, [%o0 - 8] /* Store */
321
322 retl /* CTI+IEU1 Group */
323 mov %g6, %o0 /* IEU0 */
324 nop
325 nop
326
32720: srlx %o3, 40, %o4 /* IEU0 Group */
328 stb %o4, [%o0 - 6] /* Store */
32921: srlx %o3, 48, %o4 /* IEU0 Group */
330 stb %o4, [%o0 - 7] /* Store */
331
33222: srlx %o3, 56, %o4 /* IEU0 Group */
333 stb %o4, [%o0 - 8] /* Store */
334 retl /* CTI+IEU1 Group */
335 mov %g6, %o0 /* IEU0 */
336END(strcat)
337libc_hidden_builtin_def (strcat)
338

source code of glibc/sysdeps/sparc/sparc64/strcat.S