1 | /* Copy SRC to DEST returning the address of the terminating '\0' in DEST. |
2 | For SPARC v9. |
3 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include <asm/asi.h> |
22 | #ifndef XCC |
23 | .register %g2, #scratch |
24 | .register %g3, #scratch |
25 | .register %g6, #scratch |
26 | #endif |
27 | |
28 | /* Normally, this uses |
29 | ((xword - 0x0101010101010101) & 0x8080808080808080) test |
30 | to find out if any byte in xword could be zero. This is fast, but |
31 | also gives false alarm for any byte in range 0x81-0xff. It does |
32 | not matter for correctness, as if this test tells us there could |
33 | be some zero byte, we check it byte by byte, but if bytes with |
34 | high bits set are common in the strings, then this will give poor |
35 | performance. You can #define EIGHTBIT_NOT_RARE and the algorithm |
36 | will use one tick slower, but more precise test |
37 | ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), |
38 | which does not give any false alarms (but if some bits are set, |
39 | one cannot assume from it which bytes are zero and which are not). |
40 | It is yet to be measured, what is the correct default for glibc |
41 | in these days for an average user. |
42 | */ |
43 | |
44 | .text |
45 | .align 32 |
46 | ENTRY(__stpcpy) |
47 | sethi %hi(0x01010101), %g1 /* IEU0 Group */ |
48 | or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ |
49 | andcc %o0, 7, %g0 /* IEU1 */ |
50 | sllx %g1, 32, %g2 /* IEU0 Group */ |
51 | |
52 | bne,pn %icc, 12f /* CTI */ |
53 | andcc %o1, 7, %g3 /* IEU1 */ |
54 | or %g1, %g2, %g1 /* IEU0 Group */ |
55 | bne,pn %icc, 14f /* CTI */ |
56 | |
57 | sllx %g1, 7, %g2 /* IEU0 Group */ |
58 | 1: ldx [%o1], %o3 /* Load */ |
59 | add %o1, 8, %o1 /* IEU1 */ |
60 | 2: mov %o3, %g3 /* IEU0 Group */ |
61 | |
62 | sub %o3, %g1, %o2 /* IEU1 */ |
63 | 3: ldxa [%o1] ASI_PNF, %o3 /* Load */ |
64 | #ifdef EIGHTBIT_NOT_RARE |
65 | andn %o2, %g3, %o2 /* IEU0 Group */ |
66 | #endif |
67 | add %o0, 8, %o0 /* IEU0 Group */ |
68 | andcc %o2, %g2, %g0 /* IEU1 */ |
69 | |
70 | add %o1, 8, %o1 /* IEU0 Group */ |
71 | be,a,pt %xcc, 2b /* CTI */ |
72 | stx %g3, [%o0 - 8] /* Store */ |
73 | srlx %g3, 56, %g5 /* IEU0 Group */ |
74 | |
75 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
76 | be,pn %icc, 11f /* CTI */ |
77 | srlx %g3, 48, %g4 /* IEU0 */ |
78 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
79 | |
80 | be,pn %icc, 10f /* CTI */ |
81 | srlx %g3, 40, %g5 /* IEU0 */ |
82 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
83 | be,pn %icc, 9f /* CTI */ |
84 | |
85 | srlx %g3, 32, %g4 /* IEU0 */ |
86 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
87 | be,pn %icc, 8f /* CTI */ |
88 | srlx %g3, 24, %g5 /* IEU0 */ |
89 | |
90 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
91 | be,pn %icc, 7f /* CTI */ |
92 | srlx %g3, 16, %g4 /* IEU0 */ |
93 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
94 | |
95 | be,pn %icc, 6f /* CTI */ |
96 | srlx %g3, 8, %g5 /* IEU0 */ |
97 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
98 | be,pn %icc, 5f /* CTI */ |
99 | |
100 | sub %o3, %g1, %o2 /* IEU0 */ |
101 | stx %g3, [%o0 - 8] /* Store Group */ |
102 | andcc %g3, 0xff, %g0 /* IEU1 */ |
103 | bne,pt %icc, 3b /* CTI */ |
104 | |
105 | mov %o3, %g3 /* IEU0 Group */ |
106 | 4: retl /* CTI+IEU1 Group */ |
107 | sub %o0, 1, %o0 /* IEU0 */ |
108 | |
109 | .align 16 |
110 | 6: ba,pt %xcc, 23f /* CTI Group */ |
111 | sub %o0, 3, %g6 /* IEU0 */ |
112 | 5: sub %o0, 2, %g6 /* IEU0 Group */ |
113 | stb %g5, [%o0 - 2] /* Store */ |
114 | |
115 | srlx %g3, 16, %g4 /* IEU0 Group */ |
116 | 23: sth %g4, [%o0 - 4] /* Store */ |
117 | srlx %g3, 32, %g4 /* IEU0 Group */ |
118 | stw %g4, [%o0 - 8] /* Store */ |
119 | |
120 | retl /* CTI+IEU1 Group */ |
121 | mov %g6, %o0 /* IEU0 */ |
122 | 8: ba,pt %xcc, 24f /* CTI Group */ |
123 | sub %o0, 5, %g6 /* IEU0 */ |
124 | |
125 | 7: sub %o0, 4, %g6 /* IEU0 Group */ |
126 | stb %g5, [%o0 - 4] /* Store */ |
127 | srlx %g3, 32, %g4 /* IEU0 Group */ |
128 | 24: stw %g4, [%o0 - 8] /* Store */ |
129 | |
130 | retl /* CTI+IEU1 Group */ |
131 | mov %g6, %o0 /* IEU0 */ |
132 | 10: ba,pt %xcc, 25f /* CTI Group */ |
133 | sub %o0, 7, %g6 /* IEU0 */ |
134 | |
135 | 9: sub %o0, 6, %g6 /* IEU0 Group */ |
136 | stb %g5, [%o0 - 6] /* Store */ |
137 | srlx %g3, 48, %g4 /* IEU0 */ |
138 | 25: sth %g4, [%o0 - 8] /* Store Group */ |
139 | |
140 | retl /* CTI+IEU1 Group */ |
141 | mov %g6, %o0 /* IEU0 */ |
142 | 11: stb %g5, [%o0 - 8] /* Store Group */ |
143 | retl /* CTI+IEU1 Group */ |
144 | |
145 | sub %o0, 8, %o0 /* IEU0 */ |
146 | |
147 | .align 16 |
148 | 12: or %g1, %g2, %g1 /* IEU0 Group */ |
149 | ldub [%o1], %o3 /* Load */ |
150 | sllx %g1, 7, %g2 /* IEU0 Group */ |
151 | stb %o3, [%o0] /* Store Group */ |
152 | |
153 | 13: add %o0, 1, %o0 /* IEU0 */ |
154 | add %o1, 1, %o1 /* IEU1 */ |
155 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
156 | be,pn %icc, 4b /* CTI */ |
157 | |
158 | lduba [%o1] ASI_PNF, %o3 /* Load */ |
159 | andcc %o0, 7, %g0 /* IEU1 Group */ |
160 | bne,a,pt %icc, 13b /* CTI */ |
161 | stb %o3, [%o0] /* Store */ |
162 | |
163 | andcc %o1, 7, %g3 /* IEU1 Group */ |
164 | be,a,pt %icc, 1b /* CTI */ |
165 | ldx [%o1], %o3 /* Load */ |
166 | 14: orcc %g0, 64, %g4 /* IEU1 Group */ |
167 | |
168 | sllx %g3, 3, %g5 /* IEU0 */ |
169 | sub %o1, %g3, %o1 /* IEU0 Group */ |
170 | sub %g4, %g5, %g4 /* IEU1 */ |
171 | /* %g1 = 0101010101010101 * |
172 | * %g2 = 8080808080808080 * |
173 | * %g3 = source alignment * |
174 | * %g5 = number of bits to shift left * |
175 | * %g4 = number of bits to shift right */ |
176 | ldxa [%o1] ASI_PNF, %o5 /* Load Group */ |
177 | |
178 | addcc %o1, 8, %o1 /* IEU1 */ |
179 | 15: sllx %o5, %g5, %o3 /* IEU0 Group */ |
180 | ldxa [%o1] ASI_PNF, %o5 /* Load */ |
181 | srlx %o5, %g4, %o4 /* IEU0 Group */ |
182 | |
183 | add %o0, 8, %o0 /* IEU1 */ |
184 | or %o3, %o4, %o3 /* IEU0 Group */ |
185 | add %o1, 8, %o1 /* IEU1 */ |
186 | sub %o3, %g1, %o4 /* IEU0 Group */ |
187 | |
188 | #ifdef EIGHTBIT_NOT_RARE |
189 | andn %o4, %o3, %o4 /* IEU0 Group */ |
190 | #endif |
191 | andcc %o4, %g2, %g0 /* IEU1 Group */ |
192 | be,a,pt %xcc, 15b /* CTI */ |
193 | stx %o3, [%o0 - 8] /* Store */ |
194 | srlx %o3, 56, %o4 /* IEU0 Group */ |
195 | |
196 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
197 | be,pn %icc, 22f /* CTI */ |
198 | srlx %o3, 48, %o4 /* IEU0 */ |
199 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
200 | |
201 | be,pn %icc, 21f /* CTI */ |
202 | srlx %o3, 40, %o4 /* IEU0 */ |
203 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
204 | be,pn %icc, 20f /* CTI */ |
205 | |
206 | srlx %o3, 32, %o4 /* IEU0 */ |
207 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
208 | be,pn %icc, 19f /* CTI */ |
209 | srlx %o3, 24, %o4 /* IEU0 */ |
210 | |
211 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
212 | be,pn %icc, 18f /* CTI */ |
213 | srlx %o3, 16, %o4 /* IEU0 */ |
214 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
215 | |
216 | be,pn %icc, 17f /* CTI */ |
217 | srlx %o3, 8, %o4 /* IEU0 */ |
218 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
219 | be,pn %icc, 16f /* CTI */ |
220 | |
221 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
222 | bne,pn %icc, 15b /* CTI */ |
223 | stx %o3, [%o0 - 8] /* Store */ |
224 | retl /* CTI+IEU1 Group */ |
225 | |
226 | sub %o0, 1, %o0 /* IEU0 */ |
227 | |
228 | .align 16 |
229 | 17: ba,pt %xcc, 26f /* CTI Group */ |
230 | subcc %o0, 3, %g6 /* IEU1 */ |
231 | 18: ba,pt %xcc, 27f /* CTI Group */ |
232 | subcc %o0, 4, %g6 /* IEU1 */ |
233 | |
234 | 19: ba,pt %xcc, 28f /* CTI Group */ |
235 | subcc %o0, 5, %g6 /* IEU1 */ |
236 | 16: subcc %o0, 2, %g6 /* IEU1 Group */ |
237 | srlx %o3, 8, %o4 /* IEU0 */ |
238 | |
239 | stb %o4, [%o0 - 2] /* Store */ |
240 | 26: srlx %o3, 16, %o4 /* IEU0 Group */ |
241 | stb %o4, [%o0 - 3] /* Store */ |
242 | 27: srlx %o3, 24, %o4 /* IEU0 Group */ |
243 | |
244 | stb %o4, [%o0 - 4] /* Store */ |
245 | 28: srlx %o3, 32, %o4 /* IEU0 Group */ |
246 | stw %o4, [%o0 - 8] /* Store */ |
247 | retl /* CTI+IEU1 Group */ |
248 | |
249 | mov %g6, %o0 /* IEU0 */ |
250 | |
251 | .align 16 |
252 | 21: ba,pt %xcc, 29f /* CTI Group */ |
253 | subcc %o0, 7, %g6 /* IEU1 */ |
254 | 22: ba,pt %xcc, 30f /* CTI Group */ |
255 | subcc %o0, 8, %g6 /* IEU1 */ |
256 | |
257 | 20: subcc %o0, 6, %g6 /* IEU1 Group */ |
258 | srlx %o3, 40, %o4 /* IEU0 */ |
259 | stb %o4, [%o0 - 6] /* Store */ |
260 | 29: srlx %o3, 48, %o4 /* IEU0 Group */ |
261 | |
262 | stb %o4, [%o0 - 7] /* Store */ |
263 | 30: srlx %o3, 56, %o4 /* IEU0 Group */ |
264 | stb %o4, [%o0 - 8] /* Store */ |
265 | retl /* CTI+IEU1 Group */ |
266 | |
267 | mov %g6, %o0 /* IEU0 */ |
268 | END(__stpcpy) |
269 | |
270 | weak_alias (__stpcpy, stpcpy) |
271 | libc_hidden_def (__stpcpy) |
272 | libc_hidden_builtin_def (stpcpy) |
273 | |