1 | /* Set a block of memory to some byte value. |
2 | For UltraSPARC. |
3 | Copyright (C) 1996-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include <asm/asi.h> |
22 | #ifndef XCC |
23 | #define XCC xcc |
24 | #define USE_BPR |
25 | #endif |
26 | #define FPRS_FEF 4 |
27 | |
28 | #define SET_BLOCKS(base, offset, source) \ |
29 | stx source, [base - offset - 0x18]; \ |
30 | stx source, [base - offset - 0x10]; \ |
31 | stx source, [base - offset - 0x08]; \ |
32 | stx source, [base - offset - 0x00]; |
33 | |
34 | #define ZERO_BLOCKS(base, offset, source) \ |
35 | stx source, [base - offset - 0x38]; \ |
36 | stx source, [base - offset - 0x30]; \ |
37 | stx source, [base - offset - 0x28]; \ |
38 | stx source, [base - offset - 0x20]; \ |
39 | stx source, [base - offset - 0x18]; \ |
40 | stx source, [base - offset - 0x10]; \ |
41 | stx source, [base - offset - 0x08]; \ |
42 | stx source, [base - offset - 0x00]; |
43 | |
44 | /* Well, memset is a lot easier to get right than bcopy... */ |
45 | .text |
46 | .align 32 |
47 | ENTRY(memset) |
48 | andcc %o1, 0xff, %o1 |
49 | mov %o0, %o5 |
50 | be,a,pt %icc, 50f |
51 | #ifndef USE_BPR |
52 | srl %o2, 0, %o1 |
53 | #else |
54 | mov %o2, %o1 |
55 | #endif |
56 | cmp %o2, 7 |
57 | #ifndef USE_BPR |
58 | srl %o2, 0, %o2 |
59 | #endif |
60 | bleu,pn %XCC, 17f |
61 | andcc %o0, 3, %g5 |
62 | be,pt %xcc, 4f |
63 | and %o1, 0xff, %o1 |
64 | cmp %g5, 3 |
65 | be,pn %xcc, 2f |
66 | stb %o1, [%o0 + 0x00] |
67 | cmp %g5, 2 |
68 | be,pt %xcc, 2f |
69 | stb %o1, [%o0 + 0x01] |
70 | stb %o1, [%o0 + 0x02] |
71 | 2: sub %g5, 4, %g5 |
72 | sub %o0, %g5, %o0 |
73 | add %o2, %g5, %o2 |
74 | 4: sllx %o1, 8, %g1 |
75 | andcc %o0, 4, %g0 |
76 | or %o1, %g1, %o1 |
77 | sllx %o1, 16, %g1 |
78 | or %o1, %g1, %o1 |
79 | be,pt %xcc, 2f |
80 | sllx %o1, 32, %g1 |
81 | stw %o1, [%o0] |
82 | sub %o2, 4, %o2 |
83 | add %o0, 4, %o0 |
84 | 2: cmp %o2, 128 |
85 | or %o1, %g1, %o1 |
86 | blu,pn %xcc, 9f |
87 | andcc %o0, 0x38, %g5 |
88 | be,pn %icc, 6f |
89 | mov 64, %o4 |
90 | andcc %o0, 8, %g0 |
91 | be,pn %icc, 1f |
92 | sub %o4, %g5, %o4 |
93 | stx %o1, [%o0] |
94 | add %o0, 8, %o0 |
95 | 1: andcc %o4, 16, %g0 |
96 | be,pn %icc, 1f |
97 | sub %o2, %o4, %o2 |
98 | stx %o1, [%o0] |
99 | stx %o1, [%o0 + 8] |
100 | add %o0, 16, %o0 |
101 | 1: andcc %o4, 32, %g0 |
102 | be,pn %icc, 7f |
103 | andncc %o2, 0x3f, %o3 |
104 | stw %o1, [%o0] |
105 | stw %o1, [%o0 + 4] |
106 | stw %o1, [%o0 + 8] |
107 | stw %o1, [%o0 + 12] |
108 | stw %o1, [%o0 + 16] |
109 | stw %o1, [%o0 + 20] |
110 | stw %o1, [%o0 + 24] |
111 | stw %o1, [%o0 + 28] |
112 | add %o0, 32, %o0 |
113 | 7: be,pn %xcc, 9f |
114 | nop |
115 | ldd [%o0 - 8], %f0 |
116 | 18: wr %g0, ASI_BLK_P, %asi |
117 | membar #StoreStore | #LoadStore |
118 | andcc %o3, 0xc0, %g5 |
119 | and %o2, 0x3f, %o2 |
120 | fsrc2 %f0, %f2 |
121 | fsrc2 %f0, %f4 |
122 | andn %o3, 0xff, %o3 |
123 | fsrc2 %f0, %f6 |
124 | cmp %g5, 64 |
125 | fsrc2 %f0, %f8 |
126 | fsrc2 %f0, %f10 |
127 | fsrc2 %f0, %f12 |
128 | brz,pn %g5, 10f |
129 | fsrc2 %f0, %f14 |
130 | be,pn %icc, 2f |
131 | stda %f0, [%o0 + 0x00] %asi |
132 | cmp %g5, 128 |
133 | be,pn %icc, 2f |
134 | stda %f0, [%o0 + 0x40] %asi |
135 | stda %f0, [%o0 + 0x80] %asi |
136 | 2: brz,pn %o3, 12f |
137 | add %o0, %g5, %o0 |
138 | 10: stda %f0, [%o0 + 0x00] %asi |
139 | stda %f0, [%o0 + 0x40] %asi |
140 | stda %f0, [%o0 + 0x80] %asi |
141 | stda %f0, [%o0 + 0xc0] %asi |
142 | 11: subcc %o3, 256, %o3 |
143 | bne,pt %xcc, 10b |
144 | add %o0, 256, %o0 |
145 | 12: wr %g0, FPRS_FEF, %fprs |
146 | membar #StoreLoad | #StoreStore |
147 | 9: andcc %o2, 0x78, %g5 |
148 | be,pn %xcc, 13f |
149 | andcc %o2, 7, %o2 |
150 | 14: rd %pc, %o4 |
151 | srl %g5, 1, %o3 |
152 | sub %o4, %o3, %o4 |
153 | jmpl %o4 + (13f - 14b), %g0 |
154 | add %o0, %g5, %o0 |
155 | 12: SET_BLOCKS (%o0, 0x68, %o1) |
156 | SET_BLOCKS (%o0, 0x48, %o1) |
157 | SET_BLOCKS (%o0, 0x28, %o1) |
158 | SET_BLOCKS (%o0, 0x08, %o1) |
159 | 13: be,pn %xcc, 8f |
160 | andcc %o2, 4, %g0 |
161 | be,pn %xcc, 1f |
162 | andcc %o2, 2, %g0 |
163 | stw %o1, [%o0] |
164 | add %o0, 4, %o0 |
165 | 1: be,pn %xcc, 1f |
166 | andcc %o2, 1, %g0 |
167 | sth %o1, [%o0] |
168 | add %o0, 2, %o0 |
169 | 1: bne,a,pn %xcc, 8f |
170 | stb %o1, [%o0] |
171 | 8: retl |
172 | mov %o5, %o0 |
173 | 17: brz,pn %o2, 0f |
174 | 8: add %o0, 1, %o0 |
175 | subcc %o2, 1, %o2 |
176 | bne,pt %xcc, 8b |
177 | stb %o1, [%o0 - 1] |
178 | 0: retl |
179 | mov %o5, %o0 |
180 | |
181 | 6: stx %o1, [%o0] |
182 | andncc %o2, 0x3f, %o3 |
183 | be,pn %xcc, 9b |
184 | nop |
185 | ba,pt %xcc, 18b |
186 | ldd [%o0], %f0 |
187 | |
188 | #ifndef USE_BPR |
189 | srl %o1, 0, %o1 |
190 | #endif |
191 | mov %o0, %o5 |
192 | 50: cmp %o1, 7 |
193 | bleu,pn %xcc, 17f |
194 | andcc %o0, 3, %o2 |
195 | be,a,pt %xcc, 4f |
196 | andcc %o0, 4, %g0 |
197 | cmp %o2, 3 |
198 | be,pn %xcc, 2f |
199 | stb %g0, [%o0 + 0x00] |
200 | cmp %o2, 2 |
201 | be,pt %xcc, 2f |
202 | stb %g0, [%o0 + 0x01] |
203 | stb %g0, [%o0 + 0x02] |
204 | 2: sub %o2, 4, %o2 |
205 | sub %o0, %o2, %o0 |
206 | add %o1, %o2, %o1 |
207 | andcc %o0, 4, %g0 |
208 | 4: be,pt %xcc, 2f |
209 | cmp %o1, 128 |
210 | stw %g0, [%o0] |
211 | sub %o1, 4, %o1 |
212 | add %o0, 4, %o0 |
213 | 2: blu,pn %xcc, 9f |
214 | andcc %o0, 0x38, %o2 |
215 | be,pn %icc, 6f |
216 | mov 64, %o4 |
217 | andcc %o0, 8, %g0 |
218 | be,pn %icc, 1f |
219 | sub %o4, %o2, %o4 |
220 | stx %g0, [%o0] |
221 | add %o0, 8, %o0 |
222 | 1: andcc %o4, 16, %g0 |
223 | be,pn %icc, 1f |
224 | sub %o1, %o4, %o1 |
225 | stx %g0, [%o0] |
226 | stx %g0, [%o0 + 8] |
227 | add %o0, 16, %o0 |
228 | 1: andcc %o4, 32, %g0 |
229 | be,pn %icc, 7f |
230 | andncc %o1, 0x3f, %o3 |
231 | stx %g0, [%o0] |
232 | stx %g0, [%o0 + 8] |
233 | stx %g0, [%o0 + 16] |
234 | stx %g0, [%o0 + 24] |
235 | add %o0, 32, %o0 |
236 | 6: andncc %o1, 0x3f, %o3 |
237 | 7: be,pn %xcc, 9f |
238 | wr %g0, ASI_BLK_P, %asi |
239 | membar #StoreLoad | #StoreStore | #LoadStore |
240 | fzero %f0 |
241 | andcc %o3, 0xc0, %o2 |
242 | and %o1, 0x3f, %o1 |
243 | fzero %f2 |
244 | andn %o3, 0xff, %o3 |
245 | faddd %f0, %f2, %f4 |
246 | fmuld %f0, %f2, %f6 |
247 | cmp %o2, 64 |
248 | faddd %f0, %f2, %f8 |
249 | fmuld %f0, %f2, %f10 |
250 | faddd %f0, %f2, %f12 |
251 | brz,pn %o2, 10f |
252 | fmuld %f0, %f2, %f14 |
253 | be,pn %icc, 2f |
254 | stda %f0, [%o0 + 0x00] %asi |
255 | cmp %o2, 128 |
256 | be,pn %icc, 2f |
257 | stda %f0, [%o0 + 0x40] %asi |
258 | stda %f0, [%o0 + 0x80] %asi |
259 | 2: brz,pn %o3, 12f |
260 | add %o0, %o2, %o0 |
261 | 10: stda %f0, [%o0 + 0x00] %asi |
262 | stda %f0, [%o0 + 0x40] %asi |
263 | stda %f0, [%o0 + 0x80] %asi |
264 | stda %f0, [%o0 + 0xc0] %asi |
265 | 11: subcc %o3, 256, %o3 |
266 | bne,pt %xcc, 10b |
267 | add %o0, 256, %o0 |
268 | 12: wr %g0, FPRS_FEF, %fprs |
269 | membar #StoreLoad | #StoreStore |
270 | 9: andcc %o1, 0xf8, %o2 |
271 | be,pn %xcc, 13f |
272 | andcc %o1, 7, %o1 |
273 | 14: rd %pc, %o4 |
274 | srl %o2, 1, %o3 |
275 | sub %o4, %o3, %o4 |
276 | jmpl %o4 + (13f - 14b), %g0 |
277 | add %o0, %o2, %o0 |
278 | 12: ZERO_BLOCKS (%o0, 0xc8, %g0) |
279 | ZERO_BLOCKS (%o0, 0x88, %g0) |
280 | ZERO_BLOCKS (%o0, 0x48, %g0) |
281 | ZERO_BLOCKS (%o0, 0x08, %g0) |
282 | 13: be,pn %xcc, 8f |
283 | andcc %o1, 4, %g0 |
284 | be,pn %xcc, 1f |
285 | andcc %o1, 2, %g0 |
286 | stw %g0, [%o0] |
287 | add %o0, 4, %o0 |
288 | 1: be,pn %xcc, 1f |
289 | andcc %o1, 1, %g0 |
290 | sth %g0, [%o0] |
291 | add %o0, 2, %o0 |
292 | 1: bne,a,pn %xcc, 8f |
293 | stb %g0, [%o0] |
294 | 8: retl |
295 | mov %o5, %o0 |
296 | 17: be,pn %xcc, 13b |
297 | orcc %o1, 0, %g0 |
298 | be,pn %xcc, 0f |
299 | 8: add %o0, 1, %o0 |
300 | subcc %o1, 1, %o1 |
301 | bne,pt %xcc, 8b |
302 | stb %g0, [%o0 - 1] |
303 | 0: retl |
304 | mov %o5, %o0 |
305 | END(memset) |
306 | libc_hidden_builtin_def (memset) |
307 | |