1 | /* |
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines |
3 | * |
4 | * Author: Nicolas Pitre <nico@fluxnic.net> |
5 | * - contributed to gcc-3.4 on Sep 30, 2003 |
6 | * - adapted for the Linux kernel on Oct 2, 2003 |
7 | */ |
8 | |
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. |
10 | |
11 | This file is free software; you can redistribute it and/or modify it |
12 | under the terms of the GNU General Public License as published by the |
13 | Free Software Foundation; either version 2, or (at your option) any |
14 | later version. |
15 | |
16 | In addition to the permissions in the GNU General Public License, the |
17 | Free Software Foundation gives you unlimited permission to link the |
18 | compiled version of this file into combinations with other programs, |
19 | and to distribute those combinations without any restriction coming |
20 | from the use of this file. (The General Public License restrictions |
21 | do apply in other respects; for example, they cover modification of |
22 | the file, and distribution when not linked into a combine |
23 | executable.) |
24 | |
25 | This file is distributed in the hope that it will be useful, but |
26 | WITHOUT ANY WARRANTY; without even the implied warranty of |
27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
28 | General Public License for more details. |
29 | |
30 | You should have received a copy of the GNU General Public License |
31 | along with this program; see the file COPYING. If not, write to |
32 | the Free Software Foundation, 59 Temple Place - Suite 330, |
33 | Boston, MA 02111-1307, USA. */ |
34 | |
35 | |
36 | #include <linux/linkage.h> |
37 | #include <asm/assembler.h> |
38 | #include <asm/unwind.h> |
39 | |
40 | .macro ARM_DIV_BODY dividend, divisor, result, curbit |
41 | |
42 | #if __LINUX_ARM_ARCH__ >= 5 |
43 | |
44 | clz \curbit, \divisor |
45 | clz \result, \dividend |
46 | sub \result, \curbit, \result |
47 | mov \curbit, #1 |
48 | mov \divisor, \divisor, lsl \result |
49 | mov \curbit, \curbit, lsl \result |
50 | mov \result, #0 |
51 | |
52 | #else |
53 | |
54 | @ Initially shift the divisor left 3 bits if possible, |
55 | @ set curbit accordingly. This allows for curbit to be located |
56 | @ at the left end of each 4 bit nibbles in the division loop |
57 | @ to save one loop in most cases. |
58 | tst \divisor, #0xe0000000 |
59 | moveq \divisor, \divisor, lsl #3 |
60 | moveq \curbit, #8 |
61 | movne \curbit, #1 |
62 | |
63 | @ Unless the divisor is very big, shift it up in multiples of |
64 | @ four bits, since this is the amount of unwinding in the main |
65 | @ division loop. Continue shifting until the divisor is |
66 | @ larger than the dividend. |
67 | 1: cmp \divisor, #0x10000000 |
68 | cmplo \divisor, \dividend |
69 | movlo \divisor, \divisor, lsl #4 |
70 | movlo \curbit, \curbit, lsl #4 |
71 | blo 1b |
72 | |
73 | @ For very big divisors, we must shift it a bit at a time, or |
74 | @ we will be in danger of overflowing. |
75 | 1: cmp \divisor, #0x80000000 |
76 | cmplo \divisor, \dividend |
77 | movlo \divisor, \divisor, lsl #1 |
78 | movlo \curbit, \curbit, lsl #1 |
79 | blo 1b |
80 | |
81 | mov \result, #0 |
82 | |
83 | #endif |
84 | |
85 | @ Division loop |
86 | 1: cmp \dividend, \divisor |
87 | subhs \dividend, \dividend, \divisor |
88 | orrhs \result, \result, \curbit |
89 | cmp \dividend, \divisor, lsr #1 |
90 | subhs \dividend, \dividend, \divisor, lsr #1 |
91 | orrhs \result, \result, \curbit, lsr #1 |
92 | cmp \dividend, \divisor, lsr #2 |
93 | subhs \dividend, \dividend, \divisor, lsr #2 |
94 | orrhs \result, \result, \curbit, lsr #2 |
95 | cmp \dividend, \divisor, lsr #3 |
96 | subhs \dividend, \dividend, \divisor, lsr #3 |
97 | orrhs \result, \result, \curbit, lsr #3 |
98 | cmp \dividend, #0 @ Early termination? |
99 | movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? |
100 | movne \divisor, \divisor, lsr #4 |
101 | bne 1b |
102 | |
103 | .endm |
104 | |
105 | |
106 | .macro ARM_DIV2_ORDER divisor, order |
107 | |
108 | #if __LINUX_ARM_ARCH__ >= 5 |
109 | |
110 | clz \order, \divisor |
111 | rsb \order, \order, #31 |
112 | |
113 | #else |
114 | |
115 | cmp \divisor, #(1 << 16) |
116 | movhs \divisor, \divisor, lsr #16 |
117 | movhs \order, #16 |
118 | movlo \order, #0 |
119 | |
120 | cmp \divisor, #(1 << 8) |
121 | movhs \divisor, \divisor, lsr #8 |
122 | addhs \order, \order, #8 |
123 | |
124 | cmp \divisor, #(1 << 4) |
125 | movhs \divisor, \divisor, lsr #4 |
126 | addhs \order, \order, #4 |
127 | |
128 | cmp \divisor, #(1 << 2) |
129 | addhi \order, \order, #3 |
130 | addls \order, \order, \divisor, lsr #1 |
131 | |
132 | #endif |
133 | |
134 | .endm |
135 | |
136 | |
137 | .macro ARM_MOD_BODY dividend, divisor, order, spare |
138 | |
139 | #if __LINUX_ARM_ARCH__ >= 5 |
140 | |
141 | clz \order, \divisor |
142 | clz \spare, \dividend |
143 | sub \order, \order, \spare |
144 | mov \divisor, \divisor, lsl \order |
145 | |
146 | #else |
147 | |
148 | mov \order, #0 |
149 | |
150 | @ Unless the divisor is very big, shift it up in multiples of |
151 | @ four bits, since this is the amount of unwinding in the main |
152 | @ division loop. Continue shifting until the divisor is |
153 | @ larger than the dividend. |
154 | 1: cmp \divisor, #0x10000000 |
155 | cmplo \divisor, \dividend |
156 | movlo \divisor, \divisor, lsl #4 |
157 | addlo \order, \order, #4 |
158 | blo 1b |
159 | |
160 | @ For very big divisors, we must shift it a bit at a time, or |
161 | @ we will be in danger of overflowing. |
162 | 1: cmp \divisor, #0x80000000 |
163 | cmplo \divisor, \dividend |
164 | movlo \divisor, \divisor, lsl #1 |
165 | addlo \order, \order, #1 |
166 | blo 1b |
167 | |
168 | #endif |
169 | |
170 | @ Perform all needed subtractions to keep only the reminder. |
171 | @ Do comparisons in batch of 4 first. |
172 | subs \order, \order, #3 @ yes, 3 is intended here |
173 | blt 2f |
174 | |
175 | 1: cmp \dividend, \divisor |
176 | subhs \dividend, \dividend, \divisor |
177 | cmp \dividend, \divisor, lsr #1 |
178 | subhs \dividend, \dividend, \divisor, lsr #1 |
179 | cmp \dividend, \divisor, lsr #2 |
180 | subhs \dividend, \dividend, \divisor, lsr #2 |
181 | cmp \dividend, \divisor, lsr #3 |
182 | subhs \dividend, \dividend, \divisor, lsr #3 |
183 | cmp \dividend, #1 |
184 | mov \divisor, \divisor, lsr #4 |
185 | subsge \order, \order, #4 |
186 | bge 1b |
187 | |
188 | tst \order, #3 |
189 | teqne \dividend, #0 |
190 | beq 5f |
191 | |
192 | @ Either 1, 2 or 3 comparison/subtractions are left. |
193 | 2: cmn \order, #2 |
194 | blt 4f |
195 | beq 3f |
196 | cmp \dividend, \divisor |
197 | subhs \dividend, \dividend, \divisor |
198 | mov \divisor, \divisor, lsr #1 |
199 | 3: cmp \dividend, \divisor |
200 | subhs \dividend, \dividend, \divisor |
201 | mov \divisor, \divisor, lsr #1 |
202 | 4: cmp \dividend, \divisor |
203 | subhs \dividend, \dividend, \divisor |
204 | 5: |
205 | .endm |
206 | |
207 | |
208 | #ifdef CONFIG_ARM_PATCH_IDIV |
209 | .align 3 |
210 | #endif |
211 | |
212 | ENTRY(__udivsi3) |
213 | ENTRY(__aeabi_uidiv) |
214 | UNWIND(.fnstart) |
215 | |
216 | subs r2, r1, #1 |
217 | reteq lr |
218 | bcc Ldiv0 |
219 | cmp r0, r1 |
220 | bls 11f |
221 | tst r1, r2 |
222 | beq 12f |
223 | |
224 | ARM_DIV_BODY r0, r1, r2, r3 |
225 | |
226 | mov r0, r2 |
227 | ret lr |
228 | |
229 | 11: moveq r0, #1 |
230 | movne r0, #0 |
231 | ret lr |
232 | |
233 | 12: ARM_DIV2_ORDER r1, r2 |
234 | |
235 | mov r0, r0, lsr r2 |
236 | ret lr |
237 | |
238 | UNWIND(.fnend) |
239 | ENDPROC(__udivsi3) |
240 | ENDPROC(__aeabi_uidiv) |
241 | |
242 | ENTRY(__umodsi3) |
243 | UNWIND(.fnstart) |
244 | |
245 | subs r2, r1, #1 @ compare divisor with 1 |
246 | bcc Ldiv0 |
247 | cmpne r0, r1 @ compare dividend with divisor |
248 | moveq r0, #0 |
249 | tsthi r1, r2 @ see if divisor is power of 2 |
250 | andeq r0, r0, r2 |
251 | retls lr |
252 | |
253 | ARM_MOD_BODY r0, r1, r2, r3 |
254 | |
255 | ret lr |
256 | |
257 | UNWIND(.fnend) |
258 | ENDPROC(__umodsi3) |
259 | |
260 | #ifdef CONFIG_ARM_PATCH_IDIV |
261 | .align 3 |
262 | #endif |
263 | |
264 | ENTRY(__divsi3) |
265 | ENTRY(__aeabi_idiv) |
266 | UNWIND(.fnstart) |
267 | |
268 | cmp r1, #0 |
269 | eor ip, r0, r1 @ save the sign of the result. |
270 | beq Ldiv0 |
271 | rsbmi r1, r1, #0 @ loops below use unsigned. |
272 | subs r2, r1, #1 @ division by 1 or -1 ? |
273 | beq 10f |
274 | movs r3, r0 |
275 | rsbmi r3, r0, #0 @ positive dividend value |
276 | cmp r3, r1 |
277 | bls 11f |
278 | tst r1, r2 @ divisor is power of 2 ? |
279 | beq 12f |
280 | |
281 | ARM_DIV_BODY r3, r1, r0, r2 |
282 | |
283 | cmp ip, #0 |
284 | rsbmi r0, r0, #0 |
285 | ret lr |
286 | |
287 | 10: teq ip, r0 @ same sign ? |
288 | rsbmi r0, r0, #0 |
289 | ret lr |
290 | |
291 | 11: movlo r0, #0 |
292 | moveq r0, ip, asr #31 |
293 | orreq r0, r0, #1 |
294 | ret lr |
295 | |
296 | 12: ARM_DIV2_ORDER r1, r2 |
297 | |
298 | cmp ip, #0 |
299 | mov r0, r3, lsr r2 |
300 | rsbmi r0, r0, #0 |
301 | ret lr |
302 | |
303 | UNWIND(.fnend) |
304 | ENDPROC(__divsi3) |
305 | ENDPROC(__aeabi_idiv) |
306 | |
307 | ENTRY(__modsi3) |
308 | UNWIND(.fnstart) |
309 | |
310 | cmp r1, #0 |
311 | beq Ldiv0 |
312 | rsbmi r1, r1, #0 @ loops below use unsigned. |
313 | movs ip, r0 @ preserve sign of dividend |
314 | rsbmi r0, r0, #0 @ if negative make positive |
315 | subs r2, r1, #1 @ compare divisor with 1 |
316 | cmpne r0, r1 @ compare dividend with divisor |
317 | moveq r0, #0 |
318 | tsthi r1, r2 @ see if divisor is power of 2 |
319 | andeq r0, r0, r2 |
320 | bls 10f |
321 | |
322 | ARM_MOD_BODY r0, r1, r2, r3 |
323 | |
324 | 10: cmp ip, #0 |
325 | rsbmi r0, r0, #0 |
326 | ret lr |
327 | |
328 | UNWIND(.fnend) |
329 | ENDPROC(__modsi3) |
330 | |
331 | #ifdef CONFIG_AEABI |
332 | |
333 | ENTRY(__aeabi_uidivmod) |
334 | UNWIND(.fnstart) |
335 | UNWIND(.save {r0, r1, ip, lr} ) |
336 | |
337 | stmfd sp!, {r0, r1, ip, lr} |
338 | bl __aeabi_uidiv |
339 | ldmfd sp!, {r1, r2, ip, lr} |
340 | mul r3, r0, r2 |
341 | sub r1, r1, r3 |
342 | ret lr |
343 | |
344 | UNWIND(.fnend) |
345 | ENDPROC(__aeabi_uidivmod) |
346 | |
347 | ENTRY(__aeabi_idivmod) |
348 | UNWIND(.fnstart) |
349 | UNWIND(.save {r0, r1, ip, lr} ) |
350 | stmfd sp!, {r0, r1, ip, lr} |
351 | bl __aeabi_idiv |
352 | ldmfd sp!, {r1, r2, ip, lr} |
353 | mul r3, r0, r2 |
354 | sub r1, r1, r3 |
355 | ret lr |
356 | |
357 | UNWIND(.fnend) |
358 | ENDPROC(__aeabi_idivmod) |
359 | |
360 | #endif |
361 | |
362 | Ldiv0: |
363 | UNWIND(.fnstart) |
364 | UNWIND(.pad #4) |
365 | UNWIND(.save {lr}) |
366 | str lr, [sp, #-8]! |
367 | bl __div0 |
368 | mov r0, #0 @ About as wrong as it could be. |
369 | ldr pc, [sp], #8 |
370 | UNWIND(.fnend) |
371 | ENDPROC(Ldiv0) |
372 | |