1 | /* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0 |
2 | |
3 | Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, |
4 | 2004, 2005, 2006 |
5 | Free Software Foundation, Inc. |
6 | */ |
7 | |
8 | !! libgcc routines for the Renesas / SuperH SH CPUs. |
9 | !! Contributed by Steve Chamberlain. |
10 | !! sac@cygnus.com |
11 | |
12 | !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines |
13 | !! recoded in assembly by Toshiyasu Morita |
14 | !! tm@netcom.com |
15 | |
16 | /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and |
17 | ELF local label prefixes by J"orn Rennecke |
18 | amylaar@cygnus.com */ |
19 | |
20 | /* This code used shld, thus is not suitable for SH1 / SH2. */ |
21 | |
22 | /* Signed / unsigned division without use of FPU, optimized for SH4. |
23 | Uses a lookup table for divisors in the range -128 .. +128, and |
24 | div1 with case distinction for larger divisors in three more ranges. |
25 | The code is lumped together with the table to allow the use of mova. */ |
26 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
27 | #define L_LSB 0 |
28 | #define L_LSWMSB 1 |
29 | #define L_MSWLSB 2 |
30 | #else |
31 | #define L_LSB 3 |
32 | #define L_LSWMSB 2 |
33 | #define L_MSWLSB 1 |
34 | #endif |
35 | |
36 | .balign 4 |
37 | .global __udivsi3_i4i |
38 | .global __udivsi3_i4 |
39 | .set __udivsi3_i4, __udivsi3_i4i |
40 | .type __udivsi3_i4i, @function |
41 | __udivsi3_i4i: |
42 | mov.w c128_w, r1 |
43 | div0u |
44 | mov r4,r0 |
45 | shlr8 r0 |
46 | cmp/hi r1,r5 |
47 | extu.w r5,r1 |
48 | bf udiv_le128 |
49 | cmp/eq r5,r1 |
50 | bf udiv_ge64k |
51 | shlr r0 |
52 | mov r5,r1 |
53 | shll16 r5 |
54 | mov.l r4,@-r15 |
55 | div1 r5,r0 |
56 | mov.l r1,@-r15 |
57 | div1 r5,r0 |
58 | div1 r5,r0 |
59 | bra udiv_25 |
60 | div1 r5,r0 |
61 | |
62 | div_le128: |
63 | mova div_table_ix,r0 |
64 | bra div_le128_2 |
65 | mov.b @(r0,r5),r1 |
66 | udiv_le128: |
67 | mov.l r4,@-r15 |
68 | mova div_table_ix,r0 |
69 | mov.b @(r0,r5),r1 |
70 | mov.l r5,@-r15 |
71 | div_le128_2: |
72 | mova div_table_inv,r0 |
73 | mov.l @(r0,r1),r1 |
74 | mov r5,r0 |
75 | tst #0xfe,r0 |
76 | mova div_table_clz,r0 |
77 | dmulu.l r1,r4 |
78 | mov.b @(r0,r5),r1 |
79 | bt/s div_by_1 |
80 | mov r4,r0 |
81 | mov.l @r15+,r5 |
82 | sts mach,r0 |
83 | /* clrt */ |
84 | addc r4,r0 |
85 | mov.l @r15+,r4 |
86 | rotcr r0 |
87 | rts |
88 | shld r1,r0 |
89 | |
90 | div_by_1_neg: |
91 | neg r4,r0 |
92 | div_by_1: |
93 | mov.l @r15+,r5 |
94 | rts |
95 | mov.l @r15+,r4 |
96 | |
97 | div_ge64k: |
98 | bt/s div_r8 |
99 | div0u |
100 | shll8 r5 |
101 | bra div_ge64k_2 |
102 | div1 r5,r0 |
103 | udiv_ge64k: |
104 | cmp/hi r0,r5 |
105 | mov r5,r1 |
106 | bt udiv_r8 |
107 | shll8 r5 |
108 | mov.l r4,@-r15 |
109 | div1 r5,r0 |
110 | mov.l r1,@-r15 |
111 | div_ge64k_2: |
112 | div1 r5,r0 |
113 | mov.l zero_l,r1 |
114 | .rept 4 |
115 | div1 r5,r0 |
116 | .endr |
117 | mov.l r1,@-r15 |
118 | div1 r5,r0 |
119 | mov.w m256_w,r1 |
120 | div1 r5,r0 |
121 | mov.b r0,@(L_LSWMSB,r15) |
122 | xor r4,r0 |
123 | and r1,r0 |
124 | bra div_ge64k_end |
125 | xor r4,r0 |
126 | |
127 | div_r8: |
128 | shll16 r4 |
129 | bra div_r8_2 |
130 | shll8 r4 |
131 | udiv_r8: |
132 | mov.l r4,@-r15 |
133 | shll16 r4 |
134 | clrt |
135 | shll8 r4 |
136 | mov.l r5,@-r15 |
137 | div_r8_2: |
138 | rotcl r4 |
139 | mov r0,r1 |
140 | div1 r5,r1 |
141 | mov r4,r0 |
142 | rotcl r0 |
143 | mov r5,r4 |
144 | div1 r5,r1 |
145 | .rept 5 |
146 | rotcl r0; div1 r5,r1 |
147 | .endr |
148 | rotcl r0 |
149 | mov.l @r15+,r5 |
150 | div1 r4,r1 |
151 | mov.l @r15+,r4 |
152 | rts |
153 | rotcl r0 |
154 | |
155 | .global __sdivsi3_i4i |
156 | .global __sdivsi3_i4 |
157 | .global __sdivsi3 |
158 | .set __sdivsi3_i4, __sdivsi3_i4i |
159 | .set __sdivsi3, __sdivsi3_i4i |
160 | .type __sdivsi3_i4i, @function |
161 | /* This is link-compatible with a __sdivsi3 call, |
162 | but we effectively clobber only r1. */ |
163 | __sdivsi3_i4i: |
164 | mov.l r4,@-r15 |
165 | cmp/pz r5 |
166 | mov.w c128_w, r1 |
167 | bt/s pos_divisor |
168 | cmp/pz r4 |
169 | mov.l r5,@-r15 |
170 | neg r5,r5 |
171 | bt/s neg_result |
172 | cmp/hi r1,r5 |
173 | neg r4,r4 |
174 | pos_result: |
175 | extu.w r5,r0 |
176 | bf div_le128 |
177 | cmp/eq r5,r0 |
178 | mov r4,r0 |
179 | shlr8 r0 |
180 | bf/s div_ge64k |
181 | cmp/hi r0,r5 |
182 | div0u |
183 | shll16 r5 |
184 | div1 r5,r0 |
185 | div1 r5,r0 |
186 | div1 r5,r0 |
187 | udiv_25: |
188 | mov.l zero_l,r1 |
189 | div1 r5,r0 |
190 | div1 r5,r0 |
191 | mov.l r1,@-r15 |
192 | .rept 3 |
193 | div1 r5,r0 |
194 | .endr |
195 | mov.b r0,@(L_MSWLSB,r15) |
196 | xtrct r4,r0 |
197 | swap.w r0,r0 |
198 | .rept 8 |
199 | div1 r5,r0 |
200 | .endr |
201 | mov.b r0,@(L_LSWMSB,r15) |
202 | div_ge64k_end: |
203 | .rept 8 |
204 | div1 r5,r0 |
205 | .endr |
206 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. |
207 | extu.b r0,r0 |
208 | mov.l @r15+,r5 |
209 | or r4,r0 |
210 | mov.l @r15+,r4 |
211 | rts |
212 | rotcl r0 |
213 | |
214 | div_le128_neg: |
215 | tst #0xfe,r0 |
216 | mova div_table_ix,r0 |
217 | mov.b @(r0,r5),r1 |
218 | mova div_table_inv,r0 |
219 | bt/s div_by_1_neg |
220 | mov.l @(r0,r1),r1 |
221 | mova div_table_clz,r0 |
222 | dmulu.l r1,r4 |
223 | mov.b @(r0,r5),r1 |
224 | mov.l @r15+,r5 |
225 | sts mach,r0 |
226 | /* clrt */ |
227 | addc r4,r0 |
228 | mov.l @r15+,r4 |
229 | rotcr r0 |
230 | shld r1,r0 |
231 | rts |
232 | neg r0,r0 |
233 | |
234 | pos_divisor: |
235 | mov.l r5,@-r15 |
236 | bt/s pos_result |
237 | cmp/hi r1,r5 |
238 | neg r4,r4 |
239 | neg_result: |
240 | extu.w r5,r0 |
241 | bf div_le128_neg |
242 | cmp/eq r5,r0 |
243 | mov r4,r0 |
244 | shlr8 r0 |
245 | bf/s div_ge64k_neg |
246 | cmp/hi r0,r5 |
247 | div0u |
248 | mov.l zero_l,r1 |
249 | shll16 r5 |
250 | div1 r5,r0 |
251 | mov.l r1,@-r15 |
252 | .rept 7 |
253 | div1 r5,r0 |
254 | .endr |
255 | mov.b r0,@(L_MSWLSB,r15) |
256 | xtrct r4,r0 |
257 | swap.w r0,r0 |
258 | .rept 8 |
259 | div1 r5,r0 |
260 | .endr |
261 | mov.b r0,@(L_LSWMSB,r15) |
262 | div_ge64k_neg_end: |
263 | .rept 8 |
264 | div1 r5,r0 |
265 | .endr |
266 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. |
267 | extu.b r0,r1 |
268 | mov.l @r15+,r5 |
269 | or r4,r1 |
270 | div_r8_neg_end: |
271 | mov.l @r15+,r4 |
272 | rotcl r1 |
273 | rts |
274 | neg r1,r0 |
275 | |
276 | div_ge64k_neg: |
277 | bt/s div_r8_neg |
278 | div0u |
279 | shll8 r5 |
280 | mov.l zero_l,r1 |
281 | .rept 6 |
282 | div1 r5,r0 |
283 | .endr |
284 | mov.l r1,@-r15 |
285 | div1 r5,r0 |
286 | mov.w m256_w,r1 |
287 | div1 r5,r0 |
288 | mov.b r0,@(L_LSWMSB,r15) |
289 | xor r4,r0 |
290 | and r1,r0 |
291 | bra div_ge64k_neg_end |
292 | xor r4,r0 |
293 | |
294 | c128_w: |
295 | .word 128 |
296 | |
297 | div_r8_neg: |
298 | clrt |
299 | shll16 r4 |
300 | mov r4,r1 |
301 | shll8 r1 |
302 | mov r5,r4 |
303 | .rept 7 |
304 | rotcl r1; div1 r5,r0 |
305 | .endr |
306 | mov.l @r15+,r5 |
307 | rotcl r1 |
308 | bra div_r8_neg_end |
309 | div1 r4,r0 |
310 | |
311 | m256_w: |
312 | .word 0xff00 |
313 | /* This table has been generated by divtab-sh4.c. */ |
314 | .balign 4 |
315 | div_table_clz: |
316 | .byte 0 |
317 | .byte 1 |
318 | .byte 0 |
319 | .byte -1 |
320 | .byte -1 |
321 | .byte -2 |
322 | .byte -2 |
323 | .byte -2 |
324 | .byte -2 |
325 | .byte -3 |
326 | .byte -3 |
327 | .byte -3 |
328 | .byte -3 |
329 | .byte -3 |
330 | .byte -3 |
331 | .byte -3 |
332 | .byte -3 |
333 | .byte -4 |
334 | .byte -4 |
335 | .byte -4 |
336 | .byte -4 |
337 | .byte -4 |
338 | .byte -4 |
339 | .byte -4 |
340 | .byte -4 |
341 | .byte -4 |
342 | .byte -4 |
343 | .byte -4 |
344 | .byte -4 |
345 | .byte -4 |
346 | .byte -4 |
347 | .byte -4 |
348 | .byte -4 |
349 | .byte -5 |
350 | .byte -5 |
351 | .byte -5 |
352 | .byte -5 |
353 | .byte -5 |
354 | .byte -5 |
355 | .byte -5 |
356 | .byte -5 |
357 | .byte -5 |
358 | .byte -5 |
359 | .byte -5 |
360 | .byte -5 |
361 | .byte -5 |
362 | .byte -5 |
363 | .byte -5 |
364 | .byte -5 |
365 | .byte -5 |
366 | .byte -5 |
367 | .byte -5 |
368 | .byte -5 |
369 | .byte -5 |
370 | .byte -5 |
371 | .byte -5 |
372 | .byte -5 |
373 | .byte -5 |
374 | .byte -5 |
375 | .byte -5 |
376 | .byte -5 |
377 | .byte -5 |
378 | .byte -5 |
379 | .byte -5 |
380 | .byte -5 |
381 | .byte -6 |
382 | .byte -6 |
383 | .byte -6 |
384 | .byte -6 |
385 | .byte -6 |
386 | .byte -6 |
387 | .byte -6 |
388 | .byte -6 |
389 | .byte -6 |
390 | .byte -6 |
391 | .byte -6 |
392 | .byte -6 |
393 | .byte -6 |
394 | .byte -6 |
395 | .byte -6 |
396 | .byte -6 |
397 | .byte -6 |
398 | .byte -6 |
399 | .byte -6 |
400 | .byte -6 |
401 | .byte -6 |
402 | .byte -6 |
403 | .byte -6 |
404 | .byte -6 |
405 | .byte -6 |
406 | .byte -6 |
407 | .byte -6 |
408 | .byte -6 |
409 | .byte -6 |
410 | .byte -6 |
411 | .byte -6 |
412 | .byte -6 |
413 | .byte -6 |
414 | .byte -6 |
415 | .byte -6 |
416 | .byte -6 |
417 | .byte -6 |
418 | .byte -6 |
419 | .byte -6 |
420 | .byte -6 |
421 | .byte -6 |
422 | .byte -6 |
423 | .byte -6 |
424 | .byte -6 |
425 | .byte -6 |
426 | .byte -6 |
427 | .byte -6 |
428 | .byte -6 |
429 | .byte -6 |
430 | .byte -6 |
431 | .byte -6 |
432 | .byte -6 |
433 | .byte -6 |
434 | .byte -6 |
435 | .byte -6 |
436 | .byte -6 |
437 | .byte -6 |
438 | .byte -6 |
439 | .byte -6 |
440 | .byte -6 |
441 | .byte -6 |
442 | .byte -6 |
443 | .byte -6 |
444 | /* Lookup table translating positive divisor to index into table of |
445 | normalized inverse. N.B. the '0' entry is also the last entry of the |
446 | previous table, and causes an unaligned access for division by zero. */ |
447 | div_table_ix: |
448 | .byte -6 |
449 | .byte -128 |
450 | .byte -128 |
451 | .byte 0 |
452 | .byte -128 |
453 | .byte -64 |
454 | .byte 0 |
455 | .byte 64 |
456 | .byte -128 |
457 | .byte -96 |
458 | .byte -64 |
459 | .byte -32 |
460 | .byte 0 |
461 | .byte 32 |
462 | .byte 64 |
463 | .byte 96 |
464 | .byte -128 |
465 | .byte -112 |
466 | .byte -96 |
467 | .byte -80 |
468 | .byte -64 |
469 | .byte -48 |
470 | .byte -32 |
471 | .byte -16 |
472 | .byte 0 |
473 | .byte 16 |
474 | .byte 32 |
475 | .byte 48 |
476 | .byte 64 |
477 | .byte 80 |
478 | .byte 96 |
479 | .byte 112 |
480 | .byte -128 |
481 | .byte -120 |
482 | .byte -112 |
483 | .byte -104 |
484 | .byte -96 |
485 | .byte -88 |
486 | .byte -80 |
487 | .byte -72 |
488 | .byte -64 |
489 | .byte -56 |
490 | .byte -48 |
491 | .byte -40 |
492 | .byte -32 |
493 | .byte -24 |
494 | .byte -16 |
495 | .byte -8 |
496 | .byte 0 |
497 | .byte 8 |
498 | .byte 16 |
499 | .byte 24 |
500 | .byte 32 |
501 | .byte 40 |
502 | .byte 48 |
503 | .byte 56 |
504 | .byte 64 |
505 | .byte 72 |
506 | .byte 80 |
507 | .byte 88 |
508 | .byte 96 |
509 | .byte 104 |
510 | .byte 112 |
511 | .byte 120 |
512 | .byte -128 |
513 | .byte -124 |
514 | .byte -120 |
515 | .byte -116 |
516 | .byte -112 |
517 | .byte -108 |
518 | .byte -104 |
519 | .byte -100 |
520 | .byte -96 |
521 | .byte -92 |
522 | .byte -88 |
523 | .byte -84 |
524 | .byte -80 |
525 | .byte -76 |
526 | .byte -72 |
527 | .byte -68 |
528 | .byte -64 |
529 | .byte -60 |
530 | .byte -56 |
531 | .byte -52 |
532 | .byte -48 |
533 | .byte -44 |
534 | .byte -40 |
535 | .byte -36 |
536 | .byte -32 |
537 | .byte -28 |
538 | .byte -24 |
539 | .byte -20 |
540 | .byte -16 |
541 | .byte -12 |
542 | .byte -8 |
543 | .byte -4 |
544 | .byte 0 |
545 | .byte 4 |
546 | .byte 8 |
547 | .byte 12 |
548 | .byte 16 |
549 | .byte 20 |
550 | .byte 24 |
551 | .byte 28 |
552 | .byte 32 |
553 | .byte 36 |
554 | .byte 40 |
555 | .byte 44 |
556 | .byte 48 |
557 | .byte 52 |
558 | .byte 56 |
559 | .byte 60 |
560 | .byte 64 |
561 | .byte 68 |
562 | .byte 72 |
563 | .byte 76 |
564 | .byte 80 |
565 | .byte 84 |
566 | .byte 88 |
567 | .byte 92 |
568 | .byte 96 |
569 | .byte 100 |
570 | .byte 104 |
571 | .byte 108 |
572 | .byte 112 |
573 | .byte 116 |
574 | .byte 120 |
575 | .byte 124 |
576 | .byte -128 |
577 | /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ |
578 | .balign 4 |
579 | zero_l: |
580 | .long 0x0 |
581 | .long 0xF81F81F9 |
582 | .long 0xF07C1F08 |
583 | .long 0xE9131AC0 |
584 | .long 0xE1E1E1E2 |
585 | .long 0xDAE6076C |
586 | .long 0xD41D41D5 |
587 | .long 0xCD856891 |
588 | .long 0xC71C71C8 |
589 | .long 0xC0E07039 |
590 | .long 0xBACF914D |
591 | .long 0xB4E81B4F |
592 | .long 0xAF286BCB |
593 | .long 0xA98EF607 |
594 | .long 0xA41A41A5 |
595 | .long 0x9EC8E952 |
596 | .long 0x9999999A |
597 | .long 0x948B0FCE |
598 | .long 0x8F9C18FA |
599 | .long 0x8ACB90F7 |
600 | .long 0x86186187 |
601 | .long 0x81818182 |
602 | .long 0x7D05F418 |
603 | .long 0x78A4C818 |
604 | .long 0x745D1746 |
605 | .long 0x702E05C1 |
606 | .long 0x6C16C16D |
607 | .long 0x68168169 |
608 | .long 0x642C8591 |
609 | .long 0x60581606 |
610 | .long 0x5C9882BA |
611 | .long 0x58ED2309 |
612 | div_table_inv: |
613 | .long 0x55555556 |
614 | .long 0x51D07EAF |
615 | .long 0x4E5E0A73 |
616 | .long 0x4AFD6A06 |
617 | .long 0x47AE147B |
618 | .long 0x446F8657 |
619 | .long 0x41414142 |
620 | .long 0x3E22CBCF |
621 | .long 0x3B13B13C |
622 | .long 0x38138139 |
623 | .long 0x3521CFB3 |
624 | .long 0x323E34A3 |
625 | .long 0x2F684BDB |
626 | .long 0x2C9FB4D9 |
627 | .long 0x29E4129F |
628 | .long 0x27350B89 |
629 | .long 0x24924925 |
630 | .long 0x21FB7813 |
631 | .long 0x1F7047DD |
632 | .long 0x1CF06ADB |
633 | .long 0x1A7B9612 |
634 | .long 0x18118119 |
635 | .long 0x15B1E5F8 |
636 | .long 0x135C8114 |
637 | .long 0x11111112 |
638 | .long 0xECF56BF |
639 | .long 0xC9714FC |
640 | .long 0xA6810A7 |
641 | .long 0x8421085 |
642 | .long 0x624DD30 |
643 | .long 0x4104105 |
644 | .long 0x2040811 |
645 | /* maximum error: 0.987342 scaled: 0.921875*/ |
646 | |