1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * copy_page, __copy_user_page, __copy_user implementation of SuperH |
4 | * |
5 | * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima |
6 | * Copyright (C) 2002 Toshinobu Sugioka |
7 | * Copyright (C) 2006 Paul Mundt |
8 | */ |
9 | #include <linux/linkage.h> |
10 | #include <asm/page.h> |
11 | |
12 | /* |
13 | * copy_page |
14 | * @to: P1 address |
15 | * @from: P1 address |
16 | * |
17 | * void copy_page(void *to, void *from) |
18 | */ |
19 | |
20 | /* |
21 | * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch |
22 | * r8 --- from + PAGE_SIZE |
23 | * r9 --- not used |
24 | * r10 --- to |
25 | * r11 --- from |
26 | */ |
27 | ENTRY(copy_page) |
28 | mov.l r8,@-r15 |
29 | mov.l r10,@-r15 |
30 | mov.l r11,@-r15 |
31 | mov r4,r10 |
32 | mov r5,r11 |
33 | mov r5,r8 |
34 | mov #(PAGE_SIZE >> 10), r0 |
35 | shll8 r0 |
36 | shll2 r0 |
37 | add r0,r8 |
38 | ! |
39 | 1: mov.l @r11+,r0 |
40 | mov.l @r11+,r1 |
41 | mov.l @r11+,r2 |
42 | mov.l @r11+,r3 |
43 | mov.l @r11+,r4 |
44 | mov.l @r11+,r5 |
45 | mov.l @r11+,r6 |
46 | mov.l @r11+,r7 |
47 | #if defined(CONFIG_CPU_SH4) |
48 | movca.l r0,@r10 |
49 | #else |
50 | mov.l r0,@r10 |
51 | #endif |
52 | add #32,r10 |
53 | mov.l r7,@-r10 |
54 | mov.l r6,@-r10 |
55 | mov.l r5,@-r10 |
56 | mov.l r4,@-r10 |
57 | mov.l r3,@-r10 |
58 | mov.l r2,@-r10 |
59 | mov.l r1,@-r10 |
60 | cmp/eq r11,r8 |
61 | bf/s 1b |
62 | add #28,r10 |
63 | ! |
64 | mov.l @r15+,r11 |
65 | mov.l @r15+,r10 |
66 | mov.l @r15+,r8 |
67 | rts |
68 | nop |
69 | |
70 | /* |
71 | * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); |
72 | * Return the number of bytes NOT copied |
73 | */ |
74 | #define EX(...) \ |
75 | 9999: __VA_ARGS__ ; \ |
76 | .section __ex_table, "a"; \ |
77 | .long 9999b, 6000f ; \ |
78 | .previous |
79 | #define EX_NO_POP(...) \ |
80 | 9999: __VA_ARGS__ ; \ |
81 | .section __ex_table, "a"; \ |
82 | .long 9999b, 6005f ; \ |
83 | .previous |
84 | ENTRY(__copy_user) |
85 | ! Check if small number of bytes |
86 | mov #11,r0 |
87 | mov r4,r3 |
88 | cmp/gt r0,r6 ! r6 (len) > r0 (11) |
89 | bf/s .L_cleanup_loop_no_pop |
90 | add r6,r3 ! last destination address |
91 | |
92 | ! Calculate bytes needed to align to src |
93 | mov.l r11,@-r15 |
94 | neg r5,r0 |
95 | mov.l r10,@-r15 |
96 | add #4,r0 |
97 | mov.l r9,@-r15 |
98 | and #3,r0 |
99 | mov.l r8,@-r15 |
100 | tst r0,r0 |
101 | bt 2f |
102 | |
103 | 1: |
104 | ! Copy bytes to long word align src |
105 | EX( mov.b @r5+,r1 ) |
106 | dt r0 |
107 | add #-1,r6 |
108 | EX( mov.b r1,@r4 ) |
109 | bf/s 1b |
110 | add #1,r4 |
111 | |
112 | ! Jump to appropriate routine depending on dest |
113 | 2: mov #3,r1 |
114 | mov r6, r2 |
115 | and r4,r1 |
116 | shlr2 r2 |
117 | shll2 r1 |
118 | mova .L_jump_tbl,r0 |
119 | mov.l @(r0,r1),r1 |
120 | jmp @r1 |
121 | nop |
122 | |
123 | .align 2 |
124 | .L_jump_tbl: |
125 | .long .L_dest00 |
126 | .long .L_dest01 |
127 | .long .L_dest10 |
128 | .long .L_dest11 |
129 | |
130 | /* |
131 | * Come here if there are less than 12 bytes to copy |
132 | * |
133 | * Keep the branch target close, so the bf/s callee doesn't overflow |
134 | * and result in a more expensive branch being inserted. This is the |
135 | * fast-path for small copies, the jump via the jump table will hit the |
136 | * default slow-path cleanup. -PFM. |
137 | */ |
138 | .L_cleanup_loop_no_pop: |
139 | tst r6,r6 ! Check explicitly for zero |
140 | bt 1f |
141 | |
142 | 2: |
143 | EX_NO_POP( mov.b @r5+,r0 ) |
144 | dt r6 |
145 | EX_NO_POP( mov.b r0,@r4 ) |
146 | bf/s 2b |
147 | add #1,r4 |
148 | |
149 | 1: mov #0,r0 ! normal return |
150 | 5000: |
151 | |
152 | # Exception handler: |
153 | .section .fixup, "ax" |
154 | 6005: |
155 | mov.l 8000f,r1 |
156 | mov r3,r0 |
157 | jmp @r1 |
158 | sub r4,r0 |
159 | .align 2 |
160 | 8000: .long 5000b |
161 | |
162 | .previous |
163 | rts |
164 | nop |
165 | |
166 | ! Destination = 00 |
167 | |
168 | .L_dest00: |
169 | ! Skip the large copy for small transfers |
170 | mov #(32+32-4), r0 |
171 | cmp/gt r6, r0 ! r0 (60) > r6 (len) |
172 | bt 1f |
173 | |
174 | ! Align dest to a 32 byte boundary |
175 | neg r4,r0 |
176 | add #0x20, r0 |
177 | and #0x1f, r0 |
178 | tst r0, r0 |
179 | bt 2f |
180 | |
181 | sub r0, r6 |
182 | shlr2 r0 |
183 | 3: |
184 | EX( mov.l @r5+,r1 ) |
185 | dt r0 |
186 | EX( mov.l r1,@r4 ) |
187 | bf/s 3b |
188 | add #4,r4 |
189 | |
190 | 2: |
191 | EX( mov.l @r5+,r0 ) |
192 | EX( mov.l @r5+,r1 ) |
193 | EX( mov.l @r5+,r2 ) |
194 | EX( mov.l @r5+,r7 ) |
195 | EX( mov.l @r5+,r8 ) |
196 | EX( mov.l @r5+,r9 ) |
197 | EX( mov.l @r5+,r10 ) |
198 | EX( mov.l @r5+,r11 ) |
199 | #ifdef CONFIG_CPU_SH4 |
200 | EX( movca.l r0,@r4 ) |
201 | #else |
202 | EX( mov.l r0,@r4 ) |
203 | #endif |
204 | add #-32, r6 |
205 | EX( mov.l r1,@(4,r4) ) |
206 | mov #32, r0 |
207 | EX( mov.l r2,@(8,r4) ) |
208 | cmp/gt r6, r0 ! r0 (32) > r6 (len) |
209 | EX( mov.l r7,@(12,r4) ) |
210 | EX( mov.l r8,@(16,r4) ) |
211 | EX( mov.l r9,@(20,r4) ) |
212 | EX( mov.l r10,@(24,r4) ) |
213 | EX( mov.l r11,@(28,r4) ) |
214 | bf/s 2b |
215 | add #32,r4 |
216 | |
217 | 1: mov r6, r0 |
218 | shlr2 r0 |
219 | tst r0, r0 |
220 | bt .L_cleanup |
221 | 1: |
222 | EX( mov.l @r5+,r1 ) |
223 | dt r0 |
224 | EX( mov.l r1,@r4 ) |
225 | bf/s 1b |
226 | add #4,r4 |
227 | |
228 | bra .L_cleanup |
229 | nop |
230 | |
231 | ! Destination = 10 |
232 | |
233 | .L_dest10: |
234 | mov r2,r7 |
235 | shlr2 r7 |
236 | shlr r7 |
237 | tst r7,r7 |
238 | mov #7,r0 |
239 | bt/s 1f |
240 | and r0,r2 |
241 | 2: |
242 | dt r7 |
243 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
244 | EX( mov.l @r5+,r0 ) |
245 | EX( mov.l @r5+,r1 ) |
246 | EX( mov.l @r5+,r8 ) |
247 | EX( mov.l @r5+,r9 ) |
248 | EX( mov.l @r5+,r10 ) |
249 | EX( mov.w r0,@r4 ) |
250 | add #2,r4 |
251 | xtrct r1,r0 |
252 | xtrct r8,r1 |
253 | xtrct r9,r8 |
254 | xtrct r10,r9 |
255 | |
256 | EX( mov.l r0,@r4 ) |
257 | EX( mov.l r1,@(4,r4) ) |
258 | EX( mov.l r8,@(8,r4) ) |
259 | EX( mov.l r9,@(12,r4) ) |
260 | |
261 | EX( mov.l @r5+,r1 ) |
262 | EX( mov.l @r5+,r8 ) |
263 | EX( mov.l @r5+,r0 ) |
264 | xtrct r1,r10 |
265 | xtrct r8,r1 |
266 | xtrct r0,r8 |
267 | shlr16 r0 |
268 | EX( mov.l r10,@(16,r4) ) |
269 | EX( mov.l r1,@(20,r4) ) |
270 | EX( mov.l r8,@(24,r4) ) |
271 | EX( mov.w r0,@(28,r4) ) |
272 | bf/s 2b |
273 | add #30,r4 |
274 | #else |
275 | EX( mov.l @(28,r5),r0 ) |
276 | EX( mov.l @(24,r5),r8 ) |
277 | EX( mov.l @(20,r5),r9 ) |
278 | EX( mov.l @(16,r5),r10 ) |
279 | EX( mov.w r0,@(30,r4) ) |
280 | add #-2,r4 |
281 | xtrct r8,r0 |
282 | xtrct r9,r8 |
283 | xtrct r10,r9 |
284 | EX( mov.l r0,@(28,r4) ) |
285 | EX( mov.l r8,@(24,r4) ) |
286 | EX( mov.l r9,@(20,r4) ) |
287 | |
288 | EX( mov.l @(12,r5),r0 ) |
289 | EX( mov.l @(8,r5),r8 ) |
290 | xtrct r0,r10 |
291 | EX( mov.l @(4,r5),r9 ) |
292 | mov.l r10,@(16,r4) |
293 | EX( mov.l @r5,r10 ) |
294 | xtrct r8,r0 |
295 | xtrct r9,r8 |
296 | xtrct r10,r9 |
297 | EX( mov.l r0,@(12,r4) ) |
298 | EX( mov.l r8,@(8,r4) ) |
299 | swap.w r10,r0 |
300 | EX( mov.l r9,@(4,r4) ) |
301 | EX( mov.w r0,@(2,r4) ) |
302 | |
303 | add #32,r5 |
304 | bf/s 2b |
305 | add #34,r4 |
306 | #endif |
307 | tst r2,r2 |
308 | bt .L_cleanup |
309 | |
310 | 1: ! Read longword, write two words per iteration |
311 | EX( mov.l @r5+,r0 ) |
312 | dt r2 |
313 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
314 | EX( mov.w r0,@r4 ) |
315 | shlr16 r0 |
316 | EX( mov.w r0,@(2,r4) ) |
317 | #else |
318 | EX( mov.w r0,@(2,r4) ) |
319 | shlr16 r0 |
320 | EX( mov.w r0,@r4 ) |
321 | #endif |
322 | bf/s 1b |
323 | add #4,r4 |
324 | |
325 | bra .L_cleanup |
326 | nop |
327 | |
328 | ! Destination = 01 or 11 |
329 | |
330 | .L_dest01: |
331 | .L_dest11: |
332 | ! Read longword, write byte, word, byte per iteration |
333 | EX( mov.l @r5+,r0 ) |
334 | dt r2 |
335 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
336 | EX( mov.b r0,@r4 ) |
337 | shlr8 r0 |
338 | add #1,r4 |
339 | EX( mov.w r0,@r4 ) |
340 | shlr16 r0 |
341 | EX( mov.b r0,@(2,r4) ) |
342 | bf/s .L_dest01 |
343 | add #3,r4 |
344 | #else |
345 | EX( mov.b r0,@(3,r4) ) |
346 | shlr8 r0 |
347 | swap.w r0,r7 |
348 | EX( mov.b r7,@r4 ) |
349 | add #1,r4 |
350 | EX( mov.w r0,@r4 ) |
351 | bf/s .L_dest01 |
352 | add #3,r4 |
353 | #endif |
354 | |
355 | ! Cleanup last few bytes |
356 | .L_cleanup: |
357 | mov r6,r0 |
358 | and #3,r0 |
359 | tst r0,r0 |
360 | bt .L_exit |
361 | mov r0,r6 |
362 | |
363 | .L_cleanup_loop: |
364 | EX( mov.b @r5+,r0 ) |
365 | dt r6 |
366 | EX( mov.b r0,@r4 ) |
367 | bf/s .L_cleanup_loop |
368 | add #1,r4 |
369 | |
370 | .L_exit: |
371 | mov #0,r0 ! normal return |
372 | |
373 | 5000: |
374 | |
375 | # Exception handler: |
376 | .section .fixup, "ax" |
377 | 6000: |
378 | mov.l 8000f,r1 |
379 | mov r3,r0 |
380 | jmp @r1 |
381 | sub r4,r0 |
382 | .align 2 |
383 | 8000: .long 5000b |
384 | |
385 | .previous |
386 | mov.l @r15+,r8 |
387 | mov.l @r15+,r9 |
388 | mov.l @r15+,r10 |
389 | rts |
390 | mov.l @r15+,r11 |
391 | |