delegate.rs source code [crates/compiler_builtins/src/int/specialized_div_rem/delegate.rs]

1	/// Creates an unsigned division function that uses a combination of hardware division and
2	/// binary long division to divide integers larger than what hardware division by itself can do. This
3	/// function is intended for microarchitectures that have division hardware, but not fast enough
4	/// multiplication hardware for `impl_trifecta` to be faster.
5	#[allow(unused_macros)]
6	macro_rules! impl_delegate {
7	(
8	$fn:ident, // name of the unsigned division function
9	$zero_div_fn:ident, // function called when division by zero is attempted
10	$half_normalization_shift:ident, // function for finding the normalization shift of $uX
11	$half_division:ident, // function for division of a $uX by a $uX
12	$n_h:expr, // the number of bits in $iH or $uH
13	$uH:ident, // unsigned integer with half the bit width of $uX
14	$uX:ident, // unsigned integer with half the bit width of $uD.
15	$uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
16	$iD:ident // signed integer type with the same bitwidth as `$uD`
17	) => {
18	/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
19	/// tuple.
20	pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
21	// The two possibility algorithm, undersubtracting long division algorithm, or any kind
22	// of reciprocal based algorithm will not be fastest, because they involve large
23	// multiplications that we assume to not be fast enough relative to the divisions to
24	// outweigh setup times.
25
26	// the number of bits in a $uX
27	let n = $n_h * `2`;
28
29	let duo_lo = duo as $uX;
30	let duo_hi = (duo >> n) as $uX;
31	let div_lo = div as $uX;
32	let div_hi = (div >> n) as $uX;
33
34	match (div_lo == `0`, div_hi == `0`, duo_hi == `0`) {
35	(`true`, `true`, _) => $zero_div_fn(),
36	(_, `false`, `true`) => {
37	// `duo` < `div`
38	return (`0`, duo);
39	}
40	(`false`, `true`, `true`) => {
41	// delegate to smaller division
42	let tmp = $half_division(duo_lo, div_lo);
43	return (tmp.`0` as $uD, tmp.`1` as $uD);
44	}
45	(`false`, `true`, `false`) => {
46	if duo_hi < div_lo {
47	// `quo_hi` will always be 0. This performs a binary long division algorithm
48	// to zero `duo_hi` followed by a half division.
49
50	// We can calculate the normalization shift using only `$uX` size functions.
51	// If we calculated the normalization shift using
52	// `$half_normalization_shift(duo_hi, div_lo false)`, it would break the
53	// assumption the function has that the first argument is more than the
54	// second argument. If the arguments are switched, the assumption holds true
55	// since `duo_hi < div_lo`.
56	let norm_shift = $half_normalization_shift(div_lo, duo_hi, `false`);
57	let shl = if norm_shift == `0` {
58	// Consider what happens if the msbs of `duo_hi` and `div_lo` align with
59	// no shifting. The normalization shift will always return
60	// `norm_shift == 0` regardless of whether it is fully normalized,
61	// because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would
62	// result in shift overflow down the line. For the edge case, because
63	// both `duo_hi < div_lo` and we are comparing all the significant bits
64	// of `duo_hi` and `div`, we can make `shl = n - 1`.
65	n - `1`
66	} else {
67	// We also cannot just use `shl = n - norm_shift - 1` in the general
68	// case, because when we are not in the edge case comparing all the
69	// significant bits, then the full `duo < div` may not be true and thus
70	// breaks the division algorithm.
71	n - norm_shift
72	};
73
74	// The 3 variable restoring division algorithm (see binary_long.rs) is ideal
75	// for this task, since `pow` and `quo` can be `$uX` and the delegation
76	// check is simple.
77	let mut div: $uD = div << shl;
78	let mut pow_lo: $uX = `1` << shl;
79	let mut quo_lo: $uX = `0`;
80	let mut duo = duo;
81	loop {
82	let sub = duo.wrapping_sub(div);
83	if `0` <= (sub as $iD) {
84	duo = sub;
85	quo_lo \|= pow_lo;
86	let duo_hi = (duo >> n) as $uX;
87	if duo_hi == `0` {
88	// Delegate to get the rest of the quotient. Note that the
89	// `div_lo` here is the original unshifted `div`.
90	let tmp = $half_division(duo as $uX, div_lo);
91	return ((quo_lo \| tmp.`0`) as $uD, tmp.`1` as $uD);
92	}
93	}
94	div >>= `1`;
95	pow_lo >>= `1`;
96	}
97	} else if duo_hi == div_lo {
98	// `quo_hi == 1`. This branch is cheap and helps with edge cases.
99	let tmp = $half_division(duo as $uX, div as $uX);
100	return ((`1` << n) \| (tmp.`0` as $uD), tmp.`1` as $uD);
101	} else {
102	// `div_lo < duo_hi`
103	// `rem_hi == 0`
104	if (div_lo >> $n_h) == `0` {
105	// Short division of $uD by a $uH, using $uX by $uX division
106	let div_0 = div_lo as $uH as $uX;
107	let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
108
109	let duo_mid = ((duo >> $n_h) as $uH as $uX) \| (rem_3 << $n_h);
110	let (quo_1, rem_2) = $half_division(duo_mid, div_0);
111
112	let duo_lo = (duo as $uH as $uX) \| (rem_2 << $n_h);
113	let (quo_0, rem_1) = $half_division(duo_lo, div_0);
114
115	return (
116	(quo_0 as $uD) \| ((quo_1 as $uD) << $n_h) \| ((quo_hi as $uD) << n),
117	rem_1 as $uD,
118	);
119	}
120
121	// This is basically a short division composed of a half division for the hi
122	// part, specialized 3 variable binary long division in the middle, and
123	// another half division for the lo part.
124	let duo_lo = duo as $uX;
125	let tmp = $half_division(duo_hi, div_lo);
126	let quo_hi = tmp.`0`;
127	let mut duo = (duo_lo as $uD) \| ((tmp.`1` as $uD) << n);
128	// This check is required to avoid breaking the long division below.
129	if duo < div {
130	return ((quo_hi as $uD) << n, duo);
131	}
132
133	// The half division handled all shift alignments down to `n`, so this
134	// division can continue with a shift of `n - 1`.
135	let mut div: $uD = div << (n - `1`);
136	let mut pow_lo: $uX = `1` << (n - `1`);
137	let mut quo_lo: $uX = `0`;
138	loop {
139	let sub = duo.wrapping_sub(div);
140	if `0` <= (sub as $iD) {
141	duo = sub;
142	quo_lo \|= pow_lo;
143	let duo_hi = (duo >> n) as $uX;
144	if duo_hi == `0` {
145	// Delegate to get the rest of the quotient. Note that the
146	// `div_lo` here is the original unshifted `div`.
147	let tmp = $half_division(duo as $uX, div_lo);
148	return (
149	(tmp.`0`) as $uD \| (quo_lo as $uD) \| ((quo_hi as $uD) << n),
150	tmp.`1` as $uD,
151	);
152	}
153	}
154	div >>= `1`;
155	pow_lo >>= `1`;
156	}
157	}
158	}
159	(_, `false`, `false`) => {
160	// Full $uD by $uD binary long division. `quo_hi` will always be 0.
161	if duo < div {
162	return (`0`, duo);
163	}
164	let div_original = div;
165	let shl = $half_normalization_shift(duo_hi, div_hi, `false`);
166	let mut duo = duo;
167	let mut div: $uD = div << shl;
168	let mut pow_lo: $uX = `1` << shl;
169	let mut quo_lo: $uX = `0`;
170	loop {
171	let sub = duo.wrapping_sub(div);
172	if `0` <= (sub as $iD) {
173	duo = sub;
174	quo_lo \|= pow_lo;
175	if duo < div_original {
176	return (quo_lo as $uD, duo);
177	}
178	}
179	div >>= `1`;
180	pow_lo >>= `1`;
181	}
182	}
183	}
184	}
185	};
186	}
187
188	public_test_dep! {
189	/// Returns `n / d` and sets `rem = n % d`.*
190	///
191	/// This specialization exists because:
192	/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`,
193	/// so we have to use an old fashioned `&mut u128` argument to return the remainder.
194	/// - 64-bit SPARC does not have u64 u64 => u128 widening multiplication, which makes the*
195	/// delegate algorithm strategy the only reasonably fast way to perform `u128` division.
196	// used on SPARC
197	#[allow(dead_code)]
198	pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
199	use super::*;
200	let duo_lo = duo as u64;
201	let duo_hi = (duo >> `64`) as u64;
202	let div_lo = div as u64;
203	let div_hi = (div >> `64`) as u64;
204
205	match (div_lo == `0`, div_hi == `0`, duo_hi == `0`) {
206	(`true`, `true`, _) => zero_div_fn(),
207	(_, `false`, `true`) => {
208	*rem = duo;
209	return `0`;
210	}
211	(`false`, `true`, `true`) => {
212	let tmp = u64_by_u64_div_rem(duo_lo, div_lo);
213	rem = tmp.1 as u128*;
214	return tmp.0 as u128;
215	}
216	(`false`, `true`, `false`) => {
217	if duo_hi < div_lo {
218	let norm_shift = u64_normalization_shift(div_lo, duo_hi, `false`);
219	let shl = if norm_shift == `0` {
220	`64` - `1`
221	} else {
222	`64` - norm_shift
223	};
224
225	let mut div: u128 = div << shl;
226	let mut pow_lo: u64 = `1` << shl;
227	let mut quo_lo: u64 = `0`;
228	let mut duo = duo;
229	loop {
230	let sub = duo.wrapping_sub(div);
231	if `0` <= (sub as i128) {
232	duo = sub;
233	quo_lo \|= pow_lo;
234	let duo_hi = (duo >> `64`) as u64;
235	if duo_hi == `0` {
236	let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
237	rem = tmp.1 as u128*;
238	return (quo_lo \| tmp.0) as u128;
239	}
240	}
241	div >>= `1`;
242	pow_lo >>= `1`;
243	}
244	} else if duo_hi == div_lo {
245	let tmp = u64_by_u64_div_rem(duo as u64, div as u64);
246	rem = tmp.1 as u128*;
247	return (`1` << `64`) \| (tmp.0 as u128);
248	} else {
249	if (div_lo >> `32`) == `0` {
250	let div_0 = div_lo as u32 as u64;
251	let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0);
252
253	let duo_mid = ((duo >> `32`) as u32 as u64) \| (rem_3 << `32`);
254	let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0);
255
256	let duo_lo = (duo as u32 as u64) \| (rem_2 << `32`);
257	let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0);
258
259	rem = rem_1 as u128*;
260	return (quo_0 as u128) \| ((quo_1 as u128) << `32`) \| ((quo_hi as u128) << `64`);
261	}
262
263	let duo_lo = duo as u64;
264	let tmp = u64_by_u64_div_rem(duo_hi, div_lo);
265	let quo_hi = tmp.0;
266	let mut duo = (duo_lo as u128) \| ((tmp.1 as u128) << `64`);
267	if duo < div {
268	*rem = duo;
269	return (quo_hi as u128) << `64`;
270	}
271
272	let mut div: u128 = div << (`64` - `1`);
273	let mut pow_lo: u64 = `1` << (`64` - `1`);
274	let mut quo_lo: u64 = `0`;
275	loop {
276	let sub = duo.wrapping_sub(div);
277	if `0` <= (sub as i128) {
278	duo = sub;
279	quo_lo \|= pow_lo;
280	let duo_hi = (duo >> `64`) as u64;
281	if duo_hi == `0` {
282	let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
283	rem = tmp.1 as u128*;
284	return (tmp.0) as u128 \| (quo_lo as u128) \| ((quo_hi as u128) << `64`);
285	}
286	}
287	div >>= `1`;
288	pow_lo >>= `1`;
289	}
290	}
291	}
292	(_, `false`, `false`) => {
293	if duo < div {
294	*rem = duo;
295	return `0`;
296	}
297	let div_original = div;
298	let shl = u64_normalization_shift(duo_hi, div_hi, `false`);
299	let mut duo = duo;
300	let mut div: u128 = div << shl;
301	let mut pow_lo: u64 = `1` << shl;
302	let mut quo_lo: u64 = `0`;
303	loop {
304	let sub = duo.wrapping_sub(div);
305	if `0` <= (sub as i128) {
306	duo = sub;
307	quo_lo \|= pow_lo;
308	if duo < div_original {
309	*rem = duo;
310	return quo_lo as u128;
311	}
312	}
313	div >>= `1`;
314	pow_lo >>= `1`;
315	}
316	}
317	}
318	}
319	}
320