1/// Creates an unsigned division function that uses a combination of hardware division and
2/// binary long division to divide integers larger than what hardware division by itself can do. This
3/// function is intended for microarchitectures that have division hardware, but not fast enough
4/// multiplication hardware for `impl_trifecta` to be faster.
5#[allow(unused_macros)]
6macro_rules! impl_delegate {
7 (
8 $fn:ident, // name of the unsigned division function
9 $zero_div_fn:ident, // function called when division by zero is attempted
10 $half_normalization_shift:ident, // function for finding the normalization shift of $uX
11 $half_division:ident, // function for division of a $uX by a $uX
12 $n_h:expr, // the number of bits in $iH or $uH
13 $uH:ident, // unsigned integer with half the bit width of $uX
14 $uX:ident, // unsigned integer with half the bit width of $uD.
15 $uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
16 $iD:ident // signed integer type with the same bitwidth as `$uD`
17 ) => {
18 /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
19 /// tuple.
20 pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
21 // The two possibility algorithm, undersubtracting long division algorithm, or any kind
22 // of reciprocal based algorithm will not be fastest, because they involve large
23 // multiplications that we assume to not be fast enough relative to the divisions to
24 // outweigh setup times.
25
26 // the number of bits in a $uX
27 let n = $n_h * 2;
28
29 let duo_lo = duo as $uX;
30 let duo_hi = (duo >> n) as $uX;
31 let div_lo = div as $uX;
32 let div_hi = (div >> n) as $uX;
33
34 match (div_lo == 0, div_hi == 0, duo_hi == 0) {
35 (true, true, _) => $zero_div_fn(),
36 (_, false, true) => {
37 // `duo` < `div`
38 return (0, duo);
39 }
40 (false, true, true) => {
41 // delegate to smaller division
42 let tmp = $half_division(duo_lo, div_lo);
43 return (tmp.0 as $uD, tmp.1 as $uD);
44 }
45 (false, true, false) => {
46 if duo_hi < div_lo {
47 // `quo_hi` will always be 0. This performs a binary long division algorithm
48 // to zero `duo_hi` followed by a half division.
49
50 // We can calculate the normalization shift using only `$uX` size functions.
51 // If we calculated the normalization shift using
52 // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the
53 // assumption the function has that the first argument is more than the
54 // second argument. If the arguments are switched, the assumption holds true
55 // since `duo_hi < div_lo`.
56 let norm_shift = $half_normalization_shift(div_lo, duo_hi, false);
57 let shl = if norm_shift == 0 {
58 // Consider what happens if the msbs of `duo_hi` and `div_lo` align with
59 // no shifting. The normalization shift will always return
60 // `norm_shift == 0` regardless of whether it is fully normalized,
61 // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would
62 // result in shift overflow down the line. For the edge case, because
63 // both `duo_hi < div_lo` and we are comparing all the significant bits
64 // of `duo_hi` and `div`, we can make `shl = n - 1`.
65 n - 1
66 } else {
67 // We also cannot just use `shl = n - norm_shift - 1` in the general
68 // case, because when we are not in the edge case comparing all the
69 // significant bits, then the full `duo < div` may not be true and thus
70 // breaks the division algorithm.
71 n - norm_shift
72 };
73
74 // The 3 variable restoring division algorithm (see binary_long.rs) is ideal
75 // for this task, since `pow` and `quo` can be `$uX` and the delegation
76 // check is simple.
77 let mut div: $uD = div << shl;
78 let mut pow_lo: $uX = 1 << shl;
79 let mut quo_lo: $uX = 0;
80 let mut duo = duo;
81 loop {
82 let sub = duo.wrapping_sub(div);
83 if 0 <= (sub as $iD) {
84 duo = sub;
85 quo_lo |= pow_lo;
86 let duo_hi = (duo >> n) as $uX;
87 if duo_hi == 0 {
88 // Delegate to get the rest of the quotient. Note that the
89 // `div_lo` here is the original unshifted `div`.
90 let tmp = $half_division(duo as $uX, div_lo);
91 return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD);
92 }
93 }
94 div >>= 1;
95 pow_lo >>= 1;
96 }
97 } else if duo_hi == div_lo {
98 // `quo_hi == 1`. This branch is cheap and helps with edge cases.
99 let tmp = $half_division(duo as $uX, div as $uX);
100 return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD);
101 } else {
102 // `div_lo < duo_hi`
103 // `rem_hi == 0`
104 if (div_lo >> $n_h) == 0 {
105 // Short division of $uD by a $uH, using $uX by $uX division
106 let div_0 = div_lo as $uH as $uX;
107 let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
108
109 let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
110 let (quo_1, rem_2) = $half_division(duo_mid, div_0);
111
112 let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
113 let (quo_0, rem_1) = $half_division(duo_lo, div_0);
114
115 return (
116 (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
117 rem_1 as $uD,
118 );
119 }
120
121 // This is basically a short division composed of a half division for the hi
122 // part, specialized 3 variable binary long division in the middle, and
123 // another half division for the lo part.
124 let duo_lo = duo as $uX;
125 let tmp = $half_division(duo_hi, div_lo);
126 let quo_hi = tmp.0;
127 let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n);
128 // This check is required to avoid breaking the long division below.
129 if duo < div {
130 return ((quo_hi as $uD) << n, duo);
131 }
132
133 // The half division handled all shift alignments down to `n`, so this
134 // division can continue with a shift of `n - 1`.
135 let mut div: $uD = div << (n - 1);
136 let mut pow_lo: $uX = 1 << (n - 1);
137 let mut quo_lo: $uX = 0;
138 loop {
139 let sub = duo.wrapping_sub(div);
140 if 0 <= (sub as $iD) {
141 duo = sub;
142 quo_lo |= pow_lo;
143 let duo_hi = (duo >> n) as $uX;
144 if duo_hi == 0 {
145 // Delegate to get the rest of the quotient. Note that the
146 // `div_lo` here is the original unshifted `div`.
147 let tmp = $half_division(duo as $uX, div_lo);
148 return (
149 (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
150 tmp.1 as $uD,
151 );
152 }
153 }
154 div >>= 1;
155 pow_lo >>= 1;
156 }
157 }
158 }
159 (_, false, false) => {
160 // Full $uD by $uD binary long division. `quo_hi` will always be 0.
161 if duo < div {
162 return (0, duo);
163 }
164 let div_original = div;
165 let shl = $half_normalization_shift(duo_hi, div_hi, false);
166 let mut duo = duo;
167 let mut div: $uD = div << shl;
168 let mut pow_lo: $uX = 1 << shl;
169 let mut quo_lo: $uX = 0;
170 loop {
171 let sub = duo.wrapping_sub(div);
172 if 0 <= (sub as $iD) {
173 duo = sub;
174 quo_lo |= pow_lo;
175 if duo < div_original {
176 return (quo_lo as $uD, duo);
177 }
178 }
179 div >>= 1;
180 pow_lo >>= 1;
181 }
182 }
183 }
184 }
185 };
186}
187
188public_test_dep! {
189/// Returns `n / d` and sets `*rem = n % d`.
190///
191/// This specialization exists because:
192/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`,
193/// so we have to use an old fashioned `&mut u128` argument to return the remainder.
194/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the
195/// delegate algorithm strategy the only reasonably fast way to perform `u128` division.
196// used on SPARC
197#[allow(dead_code)]
198pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
199 use super::*;
200 let duo_lo = duo as u64;
201 let duo_hi = (duo >> 64) as u64;
202 let div_lo = div as u64;
203 let div_hi = (div >> 64) as u64;
204
205 match (div_lo == 0, div_hi == 0, duo_hi == 0) {
206 (true, true, _) => zero_div_fn(),
207 (_, false, true) => {
208 *rem = duo;
209 return 0;
210 }
211 (false, true, true) => {
212 let tmp = u64_by_u64_div_rem(duo_lo, div_lo);
213 *rem = tmp.1 as u128;
214 return tmp.0 as u128;
215 }
216 (false, true, false) => {
217 if duo_hi < div_lo {
218 let norm_shift = u64_normalization_shift(div_lo, duo_hi, false);
219 let shl = if norm_shift == 0 {
220 64 - 1
221 } else {
222 64 - norm_shift
223 };
224
225 let mut div: u128 = div << shl;
226 let mut pow_lo: u64 = 1 << shl;
227 let mut quo_lo: u64 = 0;
228 let mut duo = duo;
229 loop {
230 let sub = duo.wrapping_sub(div);
231 if 0 <= (sub as i128) {
232 duo = sub;
233 quo_lo |= pow_lo;
234 let duo_hi = (duo >> 64) as u64;
235 if duo_hi == 0 {
236 let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
237 *rem = tmp.1 as u128;
238 return (quo_lo | tmp.0) as u128;
239 }
240 }
241 div >>= 1;
242 pow_lo >>= 1;
243 }
244 } else if duo_hi == div_lo {
245 let tmp = u64_by_u64_div_rem(duo as u64, div as u64);
246 *rem = tmp.1 as u128;
247 return (1 << 64) | (tmp.0 as u128);
248 } else {
249 if (div_lo >> 32) == 0 {
250 let div_0 = div_lo as u32 as u64;
251 let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0);
252
253 let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32);
254 let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0);
255
256 let duo_lo = (duo as u32 as u64) | (rem_2 << 32);
257 let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0);
258
259 *rem = rem_1 as u128;
260 return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64);
261 }
262
263 let duo_lo = duo as u64;
264 let tmp = u64_by_u64_div_rem(duo_hi, div_lo);
265 let quo_hi = tmp.0;
266 let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64);
267 if duo < div {
268 *rem = duo;
269 return (quo_hi as u128) << 64;
270 }
271
272 let mut div: u128 = div << (64 - 1);
273 let mut pow_lo: u64 = 1 << (64 - 1);
274 let mut quo_lo: u64 = 0;
275 loop {
276 let sub = duo.wrapping_sub(div);
277 if 0 <= (sub as i128) {
278 duo = sub;
279 quo_lo |= pow_lo;
280 let duo_hi = (duo >> 64) as u64;
281 if duo_hi == 0 {
282 let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
283 *rem = tmp.1 as u128;
284 return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64);
285 }
286 }
287 div >>= 1;
288 pow_lo >>= 1;
289 }
290 }
291 }
292 (_, false, false) => {
293 if duo < div {
294 *rem = duo;
295 return 0;
296 }
297 let div_original = div;
298 let shl = u64_normalization_shift(duo_hi, div_hi, false);
299 let mut duo = duo;
300 let mut div: u128 = div << shl;
301 let mut pow_lo: u64 = 1 << shl;
302 let mut quo_lo: u64 = 0;
303 loop {
304 let sub = duo.wrapping_sub(div);
305 if 0 <= (sub as i128) {
306 duo = sub;
307 quo_lo |= pow_lo;
308 if duo < div_original {
309 *rem = duo;
310 return quo_lo as u128;
311 }
312 }
313 div >>= 1;
314 pow_lo >>= 1;
315 }
316 }
317 }
318}
319}
320