1 | /* Copy SRC to DEST returning DEST. |
2 | For SPARC v9. |
3 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include <asm/asi.h> |
22 | #ifndef XCC |
23 | .register %g2, #scratch |
24 | .register %g3, #scratch |
25 | .register %g6, #scratch |
26 | #endif |
27 | |
28 | /* Normally, this uses |
29 | ((xword - 0x0101010101010101) & 0x8080808080808080) test |
30 | to find out if any byte in xword could be zero. This is fast, but |
31 | also gives false alarm for any byte in range 0x81-0xff. It does |
32 | not matter for correctness, as if this test tells us there could |
33 | be some zero byte, we check it byte by byte, but if bytes with |
34 | high bits set are common in the strings, then this will give poor |
35 | performance. You can #define EIGHTBIT_NOT_RARE and the algorithm |
36 | will use one tick slower, but more precise test |
37 | ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), |
38 | which does not give any false alarms (but if some bits are set, |
39 | one cannot assume from it which bytes are zero and which are not). |
40 | It is yet to be measured, what is the correct default for glibc |
41 | in these days for an average user. |
42 | */ |
43 | |
44 | .text |
45 | .align 32 |
46 | ENTRY(strcpy) |
47 | sethi %hi(0x01010101), %g1 /* IEU0 Group */ |
48 | mov %o0, %g6 /* IEU1 */ |
49 | or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ |
50 | andcc %o0, 7, %g0 /* IEU1 */ |
51 | |
52 | sllx %g1, 32, %g2 /* IEU0 Group */ |
53 | bne,pn %icc, 12f /* CTI */ |
54 | andcc %o1, 7, %g3 /* IEU1 */ |
55 | or %g1, %g2, %g1 /* IEU0 Group */ |
56 | |
57 | bne,pn %icc, 14f /* CTI */ |
58 | sllx %g1, 7, %g2 /* IEU0 Group */ |
59 | 1: ldx [%o1], %o3 /* Load */ |
60 | add %o1, 8, %o1 /* IEU1 */ |
61 | |
62 | 2: mov %o3, %g3 /* IEU0 Group */ |
63 | 3: sub %o3, %g1, %o2 /* IEU1 */ |
64 | ldxa [%o1] ASI_PNF, %o3 /* Load */ |
65 | #ifdef EIGHTBIT_NOT_RARE |
66 | andn %o2, %g3, %o2 /* IEU0 Group */ |
67 | #endif |
68 | add %o0, 8, %o0 /* IEU0 Group */ |
69 | |
70 | andcc %o2, %g2, %g0 /* IEU1 */ |
71 | add %o1, 8, %o1 /* IEU0 Group */ |
72 | be,a,pt %xcc, 2b /* CTI */ |
73 | stx %g3, [%o0 - 8] /* Store */ |
74 | |
75 | srlx %g3, 56, %g5 /* IEU0 Group */ |
76 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
77 | be,pn %icc, 11f /* CTI */ |
78 | srlx %g3, 48, %g4 /* IEU0 */ |
79 | |
80 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
81 | be,pn %icc, 10f /* CTI */ |
82 | srlx %g3, 40, %g5 /* IEU0 */ |
83 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
84 | |
85 | be,pn %icc, 9f /* CTI */ |
86 | srlx %g3, 32, %g4 /* IEU0 */ |
87 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
88 | be,pn %icc, 8f /* CTI */ |
89 | |
90 | srlx %g3, 24, %g5 /* IEU0 */ |
91 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
92 | be,pn %icc, 7f /* CTI */ |
93 | srlx %g3, 16, %g4 /* IEU0 */ |
94 | |
95 | andcc %g4, 0xff, %g0 /* IEU1 Group */ |
96 | be,pn %icc, 6f /* CTI */ |
97 | srlx %g3, 8, %g5 /* IEU0 */ |
98 | andcc %g5, 0xff, %g0 /* IEU1 Group */ |
99 | |
100 | be,pn %icc, 5f /* CTI */ |
101 | sub %o3, %g1, %o2 /* IEU0 */ |
102 | stx %g3, [%o0 - 8] /* Store Group */ |
103 | andcc %g3, 0xff, %g0 /* IEU1 */ |
104 | |
105 | bne,pt %icc, 3b /* CTI */ |
106 | mov %o3, %g3 /* IEU0 Group */ |
107 | 4: retl /* CTI+IEU1 Group */ |
108 | mov %g6, %o0 /* IEU0 */ |
109 | |
110 | .align 16 |
111 | 5: stb %g5, [%o0 - 2] /* Store Group */ |
112 | srlx %g3, 16, %g4 /* IEU0 */ |
113 | 6: sth %g4, [%o0 - 4] /* Store Group */ |
114 | srlx %g3, 32, %g4 /* IEU0 */ |
115 | |
116 | stw %g4, [%o0 - 8] /* Store Group */ |
117 | retl /* CTI+IEU1 Group */ |
118 | mov %g6, %o0 /* IEU0 */ |
119 | 7: stb %g5, [%o0 - 4] /* Store Group */ |
120 | |
121 | srlx %g3, 32, %g4 /* IEU0 */ |
122 | 8: stw %g4, [%o0 - 8] /* Store Group */ |
123 | retl /* CTI+IEU1 Group */ |
124 | mov %g6, %o0 /* IEU0 */ |
125 | |
126 | 9: stb %g5, [%o0 - 6] /* Store Group */ |
127 | srlx %g3, 48, %g4 /* IEU0 */ |
128 | 10: sth %g4, [%o0 - 8] /* Store Group */ |
129 | retl /* CTI+IEU1 Group */ |
130 | |
131 | mov %g6, %o0 /* IEU0 */ |
132 | 11: stb %g5, [%o0 - 8] /* Store Group */ |
133 | retl /* CTI+IEU1 Group */ |
134 | mov %g6, %o0 /* IEU0 */ |
135 | |
136 | 12: or %g1, %g2, %g1 /* IEU0 Group */ |
137 | ldub [%o1], %o3 /* Load */ |
138 | sllx %g1, 7, %g2 /* IEU0 Group */ |
139 | stb %o3, [%o0] /* Store Group */ |
140 | |
141 | 13: add %o0, 1, %o0 /* IEU0 */ |
142 | add %o1, 1, %o1 /* IEU1 */ |
143 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
144 | be,pn %icc, 4b /* CTI */ |
145 | |
146 | lduba [%o1] ASI_PNF, %o3 /* Load */ |
147 | andcc %o0, 7, %g0 /* IEU1 Group */ |
148 | bne,a,pt %icc, 13b /* CTI */ |
149 | stb %o3, [%o0] /* Store */ |
150 | |
151 | andcc %o1, 7, %g3 /* IEU1 Group */ |
152 | be,a,pt %icc, 1b /* CTI */ |
153 | ldx [%o1], %o3 /* Load */ |
154 | 14: orcc %g0, 64, %g4 /* IEU1 Group */ |
155 | |
156 | sllx %g3, 3, %g5 /* IEU0 */ |
157 | sub %o1, %g3, %o1 /* IEU0 Group */ |
158 | sub %g4, %g5, %g4 /* IEU1 */ |
159 | /* %g1 = 0101010101010101 * |
160 | * %g2 = 8080808080808080 * |
161 | * %g3 = source alignment * |
162 | * %g5 = number of bits to shift left * |
163 | * %g4 = number of bits to shift right */ |
164 | ldxa [%o1] ASI_PNF, %o5 /* Load Group */ |
165 | |
166 | addcc %o1, 8, %o1 /* IEU1 */ |
167 | 15: sllx %o5, %g5, %o3 /* IEU0 Group */ |
168 | ldxa [%o1] ASI_PNF, %o5 /* Load */ |
169 | srlx %o5, %g4, %o4 /* IEU0 Group */ |
170 | |
171 | add %o0, 8, %o0 /* IEU1 */ |
172 | or %o3, %o4, %o3 /* IEU0 Group */ |
173 | add %o1, 8, %o1 /* IEU1 */ |
174 | sub %o3, %g1, %o4 /* IEU0 Group */ |
175 | |
176 | #ifdef EIGHTBIT_NOT_RARE |
177 | andn %o4, %o3, %o4 /* IEU0 Group */ |
178 | #endif |
179 | andcc %o4, %g2, %g0 /* IEU1 Group */ |
180 | be,a,pt %xcc, 15b /* CTI */ |
181 | stx %o3, [%o0 - 8] /* Store */ |
182 | srlx %o3, 56, %o4 /* IEU0 Group */ |
183 | |
184 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
185 | be,pn %icc, 22f /* CTI */ |
186 | srlx %o3, 48, %o4 /* IEU0 */ |
187 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
188 | |
189 | be,pn %icc, 21f /* CTI */ |
190 | srlx %o3, 40, %o4 /* IEU0 */ |
191 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
192 | be,pn %icc, 20f /* CTI */ |
193 | |
194 | srlx %o3, 32, %o4 /* IEU0 */ |
195 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
196 | be,pn %icc, 19f /* CTI */ |
197 | srlx %o3, 24, %o4 /* IEU0 */ |
198 | |
199 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
200 | be,pn %icc, 18f /* CTI */ |
201 | srlx %o3, 16, %o4 /* IEU0 */ |
202 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
203 | |
204 | be,pn %icc, 17f /* CTI */ |
205 | srlx %o3, 8, %o4 /* IEU0 */ |
206 | andcc %o4, 0xff, %g0 /* IEU1 Group */ |
207 | be,pn %icc, 16f /* CTI */ |
208 | |
209 | andcc %o3, 0xff, %g0 /* IEU1 Group */ |
210 | bne,pn %icc, 15b /* CTI */ |
211 | stx %o3, [%o0 - 8] /* Store */ |
212 | retl /* CTI+IEU1 Group */ |
213 | |
214 | mov %g6, %o0 /* IEU0 */ |
215 | |
216 | .align 16 |
217 | 16: srlx %o3, 8, %o4 /* IEU0 Group */ |
218 | stb %o4, [%o0 - 2] /* Store */ |
219 | 17: srlx %o3, 16, %o4 /* IEU0 Group */ |
220 | stb %o4, [%o0 - 3] /* Store */ |
221 | |
222 | 18: srlx %o3, 24, %o4 /* IEU0 Group */ |
223 | stb %o4, [%o0 - 4] /* Store */ |
224 | 19: srlx %o3, 32, %o4 /* IEU0 Group */ |
225 | stw %o4, [%o0 - 8] /* Store */ |
226 | |
227 | retl /* CTI+IEU1 Group */ |
228 | mov %g6, %o0 /* IEU0 */ |
229 | nop |
230 | nop |
231 | |
232 | 20: srlx %o3, 40, %o4 /* IEU0 Group */ |
233 | stb %o4, [%o0 - 6] /* Store */ |
234 | 21: srlx %o3, 48, %o4 /* IEU0 Group */ |
235 | stb %o4, [%o0 - 7] /* Store */ |
236 | |
237 | 22: srlx %o3, 56, %o4 /* IEU0 Group */ |
238 | stb %o4, [%o0 - 8] /* Store */ |
239 | retl /* CTI+IEU1 Group */ |
240 | mov %g6, %o0 /* IEU0 */ |
241 | END(strcpy) |
242 | libc_hidden_builtin_def (strcpy) |
243 | |