1/* Set a block of memory to some byte value.
2 For UltraSPARC.
3 Copyright (C) 1996-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23#define XCC xcc
24#define USE_BPR
25#endif
26#define FPRS_FEF 4
27
28#define SET_BLOCKS(base, offset, source) \
29 stx source, [base - offset - 0x18]; \
30 stx source, [base - offset - 0x10]; \
31 stx source, [base - offset - 0x08]; \
32 stx source, [base - offset - 0x00];
33
34#define ZERO_BLOCKS(base, offset, source) \
35 stx source, [base - offset - 0x38]; \
36 stx source, [base - offset - 0x30]; \
37 stx source, [base - offset - 0x28]; \
38 stx source, [base - offset - 0x20]; \
39 stx source, [base - offset - 0x18]; \
40 stx source, [base - offset - 0x10]; \
41 stx source, [base - offset - 0x08]; \
42 stx source, [base - offset - 0x00];
43
44 /* Well, memset is a lot easier to get right than bcopy... */
45 .text
46 .align 32
47ENTRY(memset)
48 andcc %o1, 0xff, %o1
49 mov %o0, %o5
50 be,a,pt %icc, 50f
51#ifndef USE_BPR
52 srl %o2, 0, %o1
53#else
54 mov %o2, %o1
55#endif
56 cmp %o2, 7
57#ifndef USE_BPR
58 srl %o2, 0, %o2
59#endif
60 bleu,pn %XCC, 17f
61 andcc %o0, 3, %g5
62 be,pt %xcc, 4f
63 and %o1, 0xff, %o1
64 cmp %g5, 3
65 be,pn %xcc, 2f
66 stb %o1, [%o0 + 0x00]
67 cmp %g5, 2
68 be,pt %xcc, 2f
69 stb %o1, [%o0 + 0x01]
70 stb %o1, [%o0 + 0x02]
712: sub %g5, 4, %g5
72 sub %o0, %g5, %o0
73 add %o2, %g5, %o2
744: sllx %o1, 8, %g1
75 andcc %o0, 4, %g0
76 or %o1, %g1, %o1
77 sllx %o1, 16, %g1
78 or %o1, %g1, %o1
79 be,pt %xcc, 2f
80 sllx %o1, 32, %g1
81 stw %o1, [%o0]
82 sub %o2, 4, %o2
83 add %o0, 4, %o0
842: cmp %o2, 128
85 or %o1, %g1, %o1
86 blu,pn %xcc, 9f
87 andcc %o0, 0x38, %g5
88 be,pn %icc, 6f
89 mov 64, %o4
90 andcc %o0, 8, %g0
91 be,pn %icc, 1f
92 sub %o4, %g5, %o4
93 stx %o1, [%o0]
94 add %o0, 8, %o0
951: andcc %o4, 16, %g0
96 be,pn %icc, 1f
97 sub %o2, %o4, %o2
98 stx %o1, [%o0]
99 stx %o1, [%o0 + 8]
100 add %o0, 16, %o0
1011: andcc %o4, 32, %g0
102 be,pn %icc, 7f
103 andncc %o2, 0x3f, %o3
104 stw %o1, [%o0]
105 stw %o1, [%o0 + 4]
106 stw %o1, [%o0 + 8]
107 stw %o1, [%o0 + 12]
108 stw %o1, [%o0 + 16]
109 stw %o1, [%o0 + 20]
110 stw %o1, [%o0 + 24]
111 stw %o1, [%o0 + 28]
112 add %o0, 32, %o0
1137: be,pn %xcc, 9f
114 nop
115 ldd [%o0 - 8], %f0
11618: wr %g0, ASI_BLK_P, %asi
117 membar #StoreStore | #LoadStore
118 andcc %o3, 0xc0, %g5
119 and %o2, 0x3f, %o2
120 fsrc2 %f0, %f2
121 fsrc2 %f0, %f4
122 andn %o3, 0xff, %o3
123 fsrc2 %f0, %f6
124 cmp %g5, 64
125 fsrc2 %f0, %f8
126 fsrc2 %f0, %f10
127 fsrc2 %f0, %f12
128 brz,pn %g5, 10f
129 fsrc2 %f0, %f14
130 be,pn %icc, 2f
131 stda %f0, [%o0 + 0x00] %asi
132 cmp %g5, 128
133 be,pn %icc, 2f
134 stda %f0, [%o0 + 0x40] %asi
135 stda %f0, [%o0 + 0x80] %asi
1362: brz,pn %o3, 12f
137 add %o0, %g5, %o0
13810: stda %f0, [%o0 + 0x00] %asi
139 stda %f0, [%o0 + 0x40] %asi
140 stda %f0, [%o0 + 0x80] %asi
141 stda %f0, [%o0 + 0xc0] %asi
14211: subcc %o3, 256, %o3
143 bne,pt %xcc, 10b
144 add %o0, 256, %o0
14512: wr %g0, FPRS_FEF, %fprs
146 membar #StoreLoad | #StoreStore
1479: andcc %o2, 0x78, %g5
148 be,pn %xcc, 13f
149 andcc %o2, 7, %o2
15014: rd %pc, %o4
151 srl %g5, 1, %o3
152 sub %o4, %o3, %o4
153 jmpl %o4 + (13f - 14b), %g0
154 add %o0, %g5, %o0
15512: SET_BLOCKS (%o0, 0x68, %o1)
156 SET_BLOCKS (%o0, 0x48, %o1)
157 SET_BLOCKS (%o0, 0x28, %o1)
158 SET_BLOCKS (%o0, 0x08, %o1)
15913: be,pn %xcc, 8f
160 andcc %o2, 4, %g0
161 be,pn %xcc, 1f
162 andcc %o2, 2, %g0
163 stw %o1, [%o0]
164 add %o0, 4, %o0
1651: be,pn %xcc, 1f
166 andcc %o2, 1, %g0
167 sth %o1, [%o0]
168 add %o0, 2, %o0
1691: bne,a,pn %xcc, 8f
170 stb %o1, [%o0]
1718: retl
172 mov %o5, %o0
17317: brz,pn %o2, 0f
1748: add %o0, 1, %o0
175 subcc %o2, 1, %o2
176 bne,pt %xcc, 8b
177 stb %o1, [%o0 - 1]
1780: retl
179 mov %o5, %o0
180
1816: stx %o1, [%o0]
182 andncc %o2, 0x3f, %o3
183 be,pn %xcc, 9b
184 nop
185 ba,pt %xcc, 18b
186 ldd [%o0], %f0
187
188#ifndef USE_BPR
189 srl %o1, 0, %o1
190#endif
191 mov %o0, %o5
19250: cmp %o1, 7
193 bleu,pn %xcc, 17f
194 andcc %o0, 3, %o2
195 be,a,pt %xcc, 4f
196 andcc %o0, 4, %g0
197 cmp %o2, 3
198 be,pn %xcc, 2f
199 stb %g0, [%o0 + 0x00]
200 cmp %o2, 2
201 be,pt %xcc, 2f
202 stb %g0, [%o0 + 0x01]
203 stb %g0, [%o0 + 0x02]
2042: sub %o2, 4, %o2
205 sub %o0, %o2, %o0
206 add %o1, %o2, %o1
207 andcc %o0, 4, %g0
2084: be,pt %xcc, 2f
209 cmp %o1, 128
210 stw %g0, [%o0]
211 sub %o1, 4, %o1
212 add %o0, 4, %o0
2132: blu,pn %xcc, 9f
214 andcc %o0, 0x38, %o2
215 be,pn %icc, 6f
216 mov 64, %o4
217 andcc %o0, 8, %g0
218 be,pn %icc, 1f
219 sub %o4, %o2, %o4
220 stx %g0, [%o0]
221 add %o0, 8, %o0
2221: andcc %o4, 16, %g0
223 be,pn %icc, 1f
224 sub %o1, %o4, %o1
225 stx %g0, [%o0]
226 stx %g0, [%o0 + 8]
227 add %o0, 16, %o0
2281: andcc %o4, 32, %g0
229 be,pn %icc, 7f
230 andncc %o1, 0x3f, %o3
231 stx %g0, [%o0]
232 stx %g0, [%o0 + 8]
233 stx %g0, [%o0 + 16]
234 stx %g0, [%o0 + 24]
235 add %o0, 32, %o0
2366: andncc %o1, 0x3f, %o3
2377: be,pn %xcc, 9f
238 wr %g0, ASI_BLK_P, %asi
239 membar #StoreLoad | #StoreStore | #LoadStore
240 fzero %f0
241 andcc %o3, 0xc0, %o2
242 and %o1, 0x3f, %o1
243 fzero %f2
244 andn %o3, 0xff, %o3
245 faddd %f0, %f2, %f4
246 fmuld %f0, %f2, %f6
247 cmp %o2, 64
248 faddd %f0, %f2, %f8
249 fmuld %f0, %f2, %f10
250 faddd %f0, %f2, %f12
251 brz,pn %o2, 10f
252 fmuld %f0, %f2, %f14
253 be,pn %icc, 2f
254 stda %f0, [%o0 + 0x00] %asi
255 cmp %o2, 128
256 be,pn %icc, 2f
257 stda %f0, [%o0 + 0x40] %asi
258 stda %f0, [%o0 + 0x80] %asi
2592: brz,pn %o3, 12f
260 add %o0, %o2, %o0
26110: stda %f0, [%o0 + 0x00] %asi
262 stda %f0, [%o0 + 0x40] %asi
263 stda %f0, [%o0 + 0x80] %asi
264 stda %f0, [%o0 + 0xc0] %asi
26511: subcc %o3, 256, %o3
266 bne,pt %xcc, 10b
267 add %o0, 256, %o0
26812: wr %g0, FPRS_FEF, %fprs
269 membar #StoreLoad | #StoreStore
2709: andcc %o1, 0xf8, %o2
271 be,pn %xcc, 13f
272 andcc %o1, 7, %o1
27314: rd %pc, %o4
274 srl %o2, 1, %o3
275 sub %o4, %o3, %o4
276 jmpl %o4 + (13f - 14b), %g0
277 add %o0, %o2, %o0
27812: ZERO_BLOCKS (%o0, 0xc8, %g0)
279 ZERO_BLOCKS (%o0, 0x88, %g0)
280 ZERO_BLOCKS (%o0, 0x48, %g0)
281 ZERO_BLOCKS (%o0, 0x08, %g0)
28213: be,pn %xcc, 8f
283 andcc %o1, 4, %g0
284 be,pn %xcc, 1f
285 andcc %o1, 2, %g0
286 stw %g0, [%o0]
287 add %o0, 4, %o0
2881: be,pn %xcc, 1f
289 andcc %o1, 1, %g0
290 sth %g0, [%o0]
291 add %o0, 2, %o0
2921: bne,a,pn %xcc, 8f
293 stb %g0, [%o0]
2948: retl
295 mov %o5, %o0
29617: be,pn %xcc, 13b
297 orcc %o1, 0, %g0
298 be,pn %xcc, 0f
2998: add %o0, 1, %o0
300 subcc %o1, 1, %o1
301 bne,pt %xcc, 8b
302 stb %g0, [%o0 - 1]
3030: retl
304 mov %o5, %o0
305END(memset)
306libc_hidden_builtin_def (memset)
307

source code of glibc/sysdeps/sparc/sparc64/memset.S