1/* SPDX-License-Identifier: GPL-2.0 */
2/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
3 *
4 * Copyright(C) 1995 Linus Torvalds
5 * Copyright(C) 1996 David S. Miller
6 * Copyright(C) 1996 Eddie C. Dost
7 * Copyright(C) 1996,1998 Jakub Jelinek
8 *
9 * derived from:
10 * e-mail between David and Eddie.
11 *
12 * Returns 0 if successful, otherwise count of bytes not copied yet
13 */
14
15#include <linux/export.h>
16#include <asm/ptrace.h>
17#include <asm/asmmacro.h>
18#include <asm/page.h>
19#include <asm/thread_info.h>
20
21/* Work around cpp -rob */
22#define ALLOC #alloc
23#define EXECINSTR #execinstr
24
25#define EX_ENTRY(l1, l2) \
26 .section __ex_table,ALLOC; \
27 .align 4; \
28 .word l1, l2; \
29 .text;
30
31#define EX(x,y,a,b) \
3298: x,y; \
33 .section .fixup,ALLOC,EXECINSTR; \
34 .align 4; \
3599: retl; \
36 a, b, %o0; \
37 EX_ENTRY(98b, 99b)
38
39#define EX2(x,y,c,d,e,a,b) \
4098: x,y; \
41 .section .fixup,ALLOC,EXECINSTR; \
42 .align 4; \
4399: c, d, e; \
44 retl; \
45 a, b, %o0; \
46 EX_ENTRY(98b, 99b)
47
48#define EXO2(x,y) \
4998: x, y; \
50 EX_ENTRY(98b, 97f)
51
52#define LD(insn, src, offset, reg, label) \
5398: insn [%src + (offset)], %reg; \
54 .section .fixup,ALLOC,EXECINSTR; \
5599: ba label; \
56 mov offset, %g5; \
57 EX_ENTRY(98b, 99b)
58
59#define ST(insn, dst, offset, reg, label) \
6098: insn %reg, [%dst + (offset)]; \
61 .section .fixup,ALLOC,EXECINSTR; \
6299: ba label; \
63 mov offset, %g5; \
64 EX_ENTRY(98b, 99b)
65
66/* Both these macros have to start with exactly the same insn */
67/* left: g7 + (g1 % 128) - offset */
68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
69 LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
70 LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
71 LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
72 LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
73 ST(st, dst, offset + 0x00, t0, bigchunk_fault) \
74 ST(st, dst, offset + 0x04, t1, bigchunk_fault) \
75 ST(st, dst, offset + 0x08, t2, bigchunk_fault) \
76 ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \
77 ST(st, dst, offset + 0x10, t4, bigchunk_fault) \
78 ST(st, dst, offset + 0x14, t5, bigchunk_fault) \
79 ST(st, dst, offset + 0x18, t6, bigchunk_fault) \
80 ST(st, dst, offset + 0x1c, t7, bigchunk_fault)
81
82/* left: g7 + (g1 % 128) - offset */
83#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
84 LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
85 LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
86 LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
87 LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
88 ST(std, dst, offset + 0x00, t0, bigchunk_fault) \
89 ST(std, dst, offset + 0x08, t2, bigchunk_fault) \
90 ST(std, dst, offset + 0x10, t4, bigchunk_fault) \
91 ST(std, dst, offset + 0x18, t6, bigchunk_fault)
92
93 .section .fixup,#alloc,#execinstr
94bigchunk_fault:
95 sub %g7, %g5, %o0
96 and %g1, 127, %g1
97 retl
98 add %o0, %g1, %o0
99
100/* left: offset + 16 + (g1 % 16) */
101#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
102 LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \
103 LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \
104 ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \
105 ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \
106 ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \
107 ST(st, dst, -(offset + 0x04), t3, lastchunk_fault)
108
109 .section .fixup,#alloc,#execinstr
110lastchunk_fault:
111 and %g1, 15, %g1
112 retl
113 sub %g1, %g5, %o0
114
115/* left: o3 + (o2 % 16) - offset */
116#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
117 LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \
118 LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \
119 LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \
120 LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \
121 ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \
122 ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \
123 ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \
124 ST(sth, dst, offset + 0x06, t3, halfchunk_fault)
125
126/* left: o3 + (o2 % 16) + offset + 2 */
127#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
128 LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \
129 LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \
130 ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \
131 ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault)
132
133 .section .fixup,#alloc,#execinstr
134halfchunk_fault:
135 and %o2, 15, %o2
136 sub %o3, %g5, %o3
137 retl
138 add %o2, %o3, %o0
139
140/* left: offset + 2 + (o2 % 2) */
141#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \
142 LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \
143 LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \
144 ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \
145 ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault)
146
147 .section .fixup,#alloc,#execinstr
148last_shortchunk_fault:
149 and %o2, 1, %o2
150 retl
151 sub %o2, %g5, %o0
152
153 .text
154 .align 4
155
156 .globl __copy_user_begin
157__copy_user_begin:
158
159 .globl __copy_user
160 EXPORT_SYMBOL(__copy_user)
161dword_align:
162 andcc %o1, 1, %g0
163 be 4f
164 andcc %o1, 2, %g0
165
166 EXO2(ldub [%o1], %g2)
167 add %o1, 1, %o1
168 EXO2(stb %g2, [%o0])
169 sub %o2, 1, %o2
170 bne 3f
171 add %o0, 1, %o0
172
173 EXO2(lduh [%o1], %g2)
174 add %o1, 2, %o1
175 EXO2(sth %g2, [%o0])
176 sub %o2, 2, %o2
177 b 3f
178 add %o0, 2, %o0
1794:
180 EXO2(lduh [%o1], %g2)
181 add %o1, 2, %o1
182 EXO2(sth %g2, [%o0])
183 sub %o2, 2, %o2
184 b 3f
185 add %o0, 2, %o0
186
187__copy_user: /* %o0=dst %o1=src %o2=len */
188 xor %o0, %o1, %o4
1891:
190 andcc %o4, 3, %o5
1912:
192 bne cannot_optimize
193 cmp %o2, 15
194
195 bleu short_aligned_end
196 andcc %o1, 3, %g0
197
198 bne dword_align
1993:
200 andcc %o1, 4, %g0
201
202 be 2f
203 mov %o2, %g1
204
205 EXO2(ld [%o1], %o4)
206 sub %g1, 4, %g1
207 EXO2(st %o4, [%o0])
208 add %o1, 4, %o1
209 add %o0, 4, %o0
2102:
211 andcc %g1, 0xffffff80, %g7
212 be 3f
213 andcc %o0, 4, %g0
214
215 be ldd_std + 4
2165:
217 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
218 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
219 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
220 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
221 subcc %g7, 128, %g7
222 add %o1, 128, %o1
223 bne 5b
224 add %o0, 128, %o0
2253:
226 andcc %g1, 0x70, %g7
227 be copy_user_table_end
228 andcc %g1, 8, %g0
229
230 sethi %hi(copy_user_table_end), %o5
231 srl %g7, 1, %o4
232 add %g7, %o4, %o4
233 add %o1, %g7, %o1
234 sub %o5, %o4, %o5
235 jmpl %o5 + %lo(copy_user_table_end), %g0
236 add %o0, %g7, %o0
237
238 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
239 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
240 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
241 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
242 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
243 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
244 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
245copy_user_table_end:
246 be copy_user_last7
247 andcc %g1, 4, %g0
248
249 EX(ldd [%o1], %g2, and %g1, 0xf)
250 add %o0, 8, %o0
251 add %o1, 8, %o1
252 EX(st %g2, [%o0 - 0x08], and %g1, 0xf)
253 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
254copy_user_last7:
255 be 1f
256 andcc %g1, 2, %g0
257
258 EX(ld [%o1], %g2, and %g1, 7)
259 add %o1, 4, %o1
260 EX(st %g2, [%o0], and %g1, 7)
261 add %o0, 4, %o0
2621:
263 be 1f
264 andcc %g1, 1, %g0
265
266 EX(lduh [%o1], %g2, and %g1, 3)
267 add %o1, 2, %o1
268 EX(sth %g2, [%o0], and %g1, 3)
269 add %o0, 2, %o0
2701:
271 be 1f
272 nop
273
274 EX(ldub [%o1], %g2, add %g0, 1)
275 EX(stb %g2, [%o0], add %g0, 1)
2761:
277 retl
278 clr %o0
279
280ldd_std:
281 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
282 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
283 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
284 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
285 subcc %g7, 128, %g7
286 add %o1, 128, %o1
287 bne ldd_std
288 add %o0, 128, %o0
289
290 andcc %g1, 0x70, %g7
291 be copy_user_table_end
292 andcc %g1, 8, %g0
293
294 sethi %hi(copy_user_table_end), %o5
295 srl %g7, 1, %o4
296 add %g7, %o4, %o4
297 add %o1, %g7, %o1
298 sub %o5, %o4, %o5
299 jmpl %o5 + %lo(copy_user_table_end), %g0
300 add %o0, %g7, %o0
301
302cannot_optimize:
303 bleu short_end
304 cmp %o5, 2
305
306 bne byte_chunk
307 and %o2, 0xfffffff0, %o3
308
309 andcc %o1, 1, %g0
310 be 10f
311 nop
312
313 EXO2(ldub [%o1], %g2)
314 add %o1, 1, %o1
315 EXO2(stb %g2, [%o0])
316 sub %o2, 1, %o2
317 andcc %o2, 0xfffffff0, %o3
318 be short_end
319 add %o0, 1, %o0
32010:
321 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
322 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
323 subcc %o3, 0x10, %o3
324 add %o1, 0x10, %o1
325 bne 10b
326 add %o0, 0x10, %o0
327 b 2f
328 and %o2, 0xe, %o3
329
330byte_chunk:
331 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
332 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
333 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
334 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
335 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
336 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
337 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
338 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
339 subcc %o3, 0x10, %o3
340 add %o1, 0x10, %o1
341 bne byte_chunk
342 add %o0, 0x10, %o0
343
344short_end:
345 and %o2, 0xe, %o3
3462:
347 sethi %hi(short_table_end), %o5
348 sll %o3, 3, %o4
349 add %o0, %o3, %o0
350 sub %o5, %o4, %o5
351 add %o1, %o3, %o1
352 jmpl %o5 + %lo(short_table_end), %g0
353 andcc %o2, 1, %g0
354 MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
355 MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
356 MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3)
357 MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3)
358 MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3)
359 MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3)
360 MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3)
361short_table_end:
362 be 1f
363 nop
364 EX(ldub [%o1], %g2, add %g0, 1)
365 EX(stb %g2, [%o0], add %g0, 1)
3661:
367 retl
368 clr %o0
369
370short_aligned_end:
371 bne short_end
372 andcc %o2, 8, %g0
373
374 be 1f
375 andcc %o2, 4, %g0
376
377 EXO2(ld [%o1 + 0x00], %g2)
378 EXO2(ld [%o1 + 0x04], %g3)
379 add %o1, 8, %o1
380 EXO2(st %g2, [%o0 + 0x00])
381 EX(st %g3, [%o0 + 0x04], sub %o2, 4)
382 add %o0, 8, %o0
3831:
384 b copy_user_last7
385 mov %o2, %g1
386
387 .section .fixup,#alloc,#execinstr
388 .align 4
38997:
390 retl
391 mov %o2, %o0
392
393 .globl __copy_user_end
394__copy_user_end:
395

source code of linux/arch/sparc/lib/copy_user.S