1 | //===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements __udivmoddi4 for the compiler_rt library. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "int_lib.h" |
14 | |
15 | // Effects: if rem != 0, *rem = a % b |
16 | // Returns: a / b |
17 | |
18 | // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide |
19 | |
20 | #if defined(_MSC_VER) && !defined(__clang__) |
21 | // MSVC throws a warning about mod 0 here, disable it for builds that |
22 | // warn-as-error |
23 | #pragma warning(push) |
24 | #pragma warning(disable : 4723 4724) |
25 | #endif |
26 | |
27 | COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { |
28 | const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; |
29 | const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; |
30 | udwords n; |
31 | n.all = a; |
32 | udwords d; |
33 | d.all = b; |
34 | udwords q; |
35 | udwords r; |
36 | unsigned sr; |
37 | // special cases, X is unknown, K != 0 |
38 | if (n.s.high == 0) { |
39 | if (d.s.high == 0) { |
40 | // 0 X |
41 | // --- |
42 | // 0 X |
43 | if (rem) |
44 | *rem = n.s.low % d.s.low; |
45 | return n.s.low / d.s.low; |
46 | } |
47 | // 0 X |
48 | // --- |
49 | // K X |
50 | if (rem) |
51 | *rem = n.s.low; |
52 | return 0; |
53 | } |
54 | // n.s.high != 0 |
55 | if (d.s.low == 0) { |
56 | if (d.s.high == 0) { |
57 | // K X |
58 | // --- |
59 | // 0 0 |
60 | if (rem) |
61 | *rem = n.s.high % d.s.low; |
62 | return n.s.high / d.s.low; |
63 | } |
64 | // d.s.high != 0 |
65 | if (n.s.low == 0) { |
66 | // K 0 |
67 | // --- |
68 | // K 0 |
69 | if (rem) { |
70 | r.s.high = n.s.high % d.s.high; |
71 | r.s.low = 0; |
72 | *rem = r.all; |
73 | } |
74 | return n.s.high / d.s.high; |
75 | } |
76 | // K K |
77 | // --- |
78 | // K 0 |
79 | if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { |
80 | if (rem) { |
81 | r.s.low = n.s.low; |
82 | r.s.high = n.s.high & (d.s.high - 1); |
83 | *rem = r.all; |
84 | } |
85 | return n.s.high >> ctzsi(d.s.high); |
86 | } |
87 | // K K |
88 | // --- |
89 | // K 0 |
90 | sr = clzsi(d.s.high) - clzsi(n.s.high); |
91 | // 0 <= sr <= n_uword_bits - 2 or sr large |
92 | if (sr > n_uword_bits - 2) { |
93 | if (rem) |
94 | *rem = n.all; |
95 | return 0; |
96 | } |
97 | ++sr; |
98 | // 1 <= sr <= n_uword_bits - 1 |
99 | // q.all = n.all << (n_udword_bits - sr); |
100 | q.s.low = 0; |
101 | q.s.high = n.s.low << (n_uword_bits - sr); |
102 | // r.all = n.all >> sr; |
103 | r.s.high = n.s.high >> sr; |
104 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
105 | } else /* d.s.low != 0 */ { |
106 | if (d.s.high == 0) { |
107 | // K X |
108 | // --- |
109 | // 0 K |
110 | if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { |
111 | if (rem) |
112 | *rem = n.s.low & (d.s.low - 1); |
113 | if (d.s.low == 1) |
114 | return n.all; |
115 | sr = ctzsi(d.s.low); |
116 | q.s.high = n.s.high >> sr; |
117 | q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
118 | return q.all; |
119 | } |
120 | // K X |
121 | // --- |
122 | // 0 K |
123 | sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high); |
124 | // 2 <= sr <= n_udword_bits - 1 |
125 | // q.all = n.all << (n_udword_bits - sr); |
126 | // r.all = n.all >> sr; |
127 | if (sr == n_uword_bits) { |
128 | q.s.low = 0; |
129 | q.s.high = n.s.low; |
130 | r.s.high = 0; |
131 | r.s.low = n.s.high; |
132 | } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ { |
133 | q.s.low = 0; |
134 | q.s.high = n.s.low << (n_uword_bits - sr); |
135 | r.s.high = n.s.high >> sr; |
136 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
137 | } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ { |
138 | q.s.low = n.s.low << (n_udword_bits - sr); |
139 | q.s.high = (n.s.high << (n_udword_bits - sr)) | |
140 | (n.s.low >> (sr - n_uword_bits)); |
141 | r.s.high = 0; |
142 | r.s.low = n.s.high >> (sr - n_uword_bits); |
143 | } |
144 | } else { |
145 | // K X |
146 | // --- |
147 | // K K |
148 | sr = clzsi(d.s.high) - clzsi(n.s.high); |
149 | // 0 <= sr <= n_uword_bits - 1 or sr large |
150 | if (sr > n_uword_bits - 1) { |
151 | if (rem) |
152 | *rem = n.all; |
153 | return 0; |
154 | } |
155 | ++sr; |
156 | // 1 <= sr <= n_uword_bits |
157 | // q.all = n.all << (n_udword_bits - sr); |
158 | q.s.low = 0; |
159 | if (sr == n_uword_bits) { |
160 | q.s.high = n.s.low; |
161 | r.s.high = 0; |
162 | r.s.low = n.s.high; |
163 | } else { |
164 | q.s.high = n.s.low << (n_uword_bits - sr); |
165 | r.s.high = n.s.high >> sr; |
166 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
167 | } |
168 | } |
169 | } |
170 | // Not a special case |
171 | // q and r are initialized with: |
172 | // q.all = n.all << (n_udword_bits - sr); |
173 | // r.all = n.all >> sr; |
174 | // 1 <= sr <= n_udword_bits - 1 |
175 | su_int carry = 0; |
176 | for (; sr > 0; --sr) { |
177 | // r:q = ((r:q) << 1) | carry |
178 | r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); |
179 | r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); |
180 | q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); |
181 | q.s.low = (q.s.low << 1) | carry; |
182 | // carry = 0; |
183 | // if (r.all >= d.all) |
184 | // { |
185 | // r.all -= d.all; |
186 | // carry = 1; |
187 | // } |
188 | const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); |
189 | carry = s & 1; |
190 | r.all -= d.all & s; |
191 | } |
192 | q.all = (q.all << 1) | carry; |
193 | if (rem) |
194 | *rem = r.all; |
195 | return q.all; |
196 | } |
197 | |
198 | #if defined(_MSC_VER) && !defined(__clang__) |
199 | #pragma warning(pop) |
200 | #endif |
201 |