1 | /* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0 |
2 | * |
3 | * Copyright (C) 2006 Free Software Foundation, Inc. |
4 | */ |
5 | |
6 | /* Moderately Space-optimized libgcc routines for the Renesas SH / |
7 | STMicroelectronics ST40 CPUs. |
8 | Contributed by J"orn Rennecke joern.rennecke@st.com. */ |
9 | |
10 | /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i |
11 | sh4-200 run times: |
12 | udiv small divisor: 55 cycles |
13 | udiv large divisor: 52 cycles |
14 | sdiv small divisor, positive result: 59 cycles |
15 | sdiv large divisor, positive result: 56 cycles |
16 | sdiv small divisor, negative result: 65 cycles (*) |
17 | sdiv large divisor, negative result: 62 cycles (*) |
18 | (*): r2 is restored in the rts delay slot and has a lingering latency |
19 | of two more cycles. */ |
20 | .balign 4 |
21 | .global __udivsi3_i4i |
22 | .global __udivsi3_i4 |
23 | .set __udivsi3_i4, __udivsi3_i4i |
24 | .type __udivsi3_i4i, @function |
25 | .type __sdivsi3_i4i, @function |
26 | __udivsi3_i4i: |
27 | sts pr,r1 |
28 | mov.l r4,@-r15 |
29 | extu.w r5,r0 |
30 | cmp/eq r5,r0 |
31 | swap.w r4,r0 |
32 | shlr16 r4 |
33 | bf/s large_divisor |
34 | div0u |
35 | mov.l r5,@-r15 |
36 | shll16 r5 |
37 | sdiv_small_divisor: |
38 | div1 r5,r4 |
39 | bsr div6 |
40 | div1 r5,r4 |
41 | div1 r5,r4 |
42 | bsr div6 |
43 | div1 r5,r4 |
44 | xtrct r4,r0 |
45 | xtrct r0,r4 |
46 | bsr div7 |
47 | swap.w r4,r4 |
48 | div1 r5,r4 |
49 | bsr div7 |
50 | div1 r5,r4 |
51 | xtrct r4,r0 |
52 | mov.l @r15+,r5 |
53 | swap.w r0,r0 |
54 | mov.l @r15+,r4 |
55 | jmp @r1 |
56 | rotcl r0 |
57 | div7: |
58 | div1 r5,r4 |
59 | div6: |
60 | div1 r5,r4; div1 r5,r4; div1 r5,r4 |
61 | div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 |
62 | |
63 | divx3: |
64 | rotcl r0 |
65 | div1 r5,r4 |
66 | rotcl r0 |
67 | div1 r5,r4 |
68 | rotcl r0 |
69 | rts |
70 | div1 r5,r4 |
71 | |
72 | large_divisor: |
73 | mov.l r5,@-r15 |
74 | sdiv_large_divisor: |
75 | xor r4,r0 |
76 | .rept 4 |
77 | rotcl r0 |
78 | bsr divx3 |
79 | div1 r5,r4 |
80 | .endr |
81 | mov.l @r15+,r5 |
82 | mov.l @r15+,r4 |
83 | jmp @r1 |
84 | rotcl r0 |
85 | |
86 | .global __sdivsi3_i4i |
87 | .global __sdivsi3_i4 |
88 | .global __sdivsi3 |
89 | .set __sdivsi3_i4, __sdivsi3_i4i |
90 | .set __sdivsi3, __sdivsi3_i4i |
91 | __sdivsi3_i4i: |
92 | mov.l r4,@-r15 |
93 | cmp/pz r5 |
94 | mov.l r5,@-r15 |
95 | bt/s pos_divisor |
96 | cmp/pz r4 |
97 | neg r5,r5 |
98 | extu.w r5,r0 |
99 | bt/s neg_result |
100 | cmp/eq r5,r0 |
101 | neg r4,r4 |
102 | pos_result: |
103 | swap.w r4,r0 |
104 | bra sdiv_check_divisor |
105 | sts pr,r1 |
106 | pos_divisor: |
107 | extu.w r5,r0 |
108 | bt/s pos_result |
109 | cmp/eq r5,r0 |
110 | neg r4,r4 |
111 | neg_result: |
112 | mova negate_result,r0 |
113 | ; |
114 | mov r0,r1 |
115 | swap.w r4,r0 |
116 | lds r2,macl |
117 | sts pr,r2 |
118 | sdiv_check_divisor: |
119 | shlr16 r4 |
120 | bf/s sdiv_large_divisor |
121 | div0u |
122 | bra sdiv_small_divisor |
123 | shll16 r5 |
124 | .balign 4 |
125 | negate_result: |
126 | neg r0,r0 |
127 | jmp @r2 |
128 | sts macl,r2 |
129 | |