bigint.rs source code [crates/minimal_lexical/src/bigint.rs]

1	//! A simple big-integer type for slow path algorithms.
2	//!
3	//! This includes minimal stackvector for use in big-integer arithmetic.
4
5	#![doc(hidden)]
6
7	#[cfg(feature = "alloc")]
8	use crate::heapvec::HeapVec;
9	use crate::num::Float;
10	#[cfg(not(feature = "alloc"))]
11	use crate::stackvec::StackVec;
12	#[cfg(not(feature = "compact"))]
13	use crate::table::{LARGE_POW5, LARGE_POW5_STEP};
14	use core::{cmp, ops, ptr};
15
16	/// Number of bits in a Bigint.
17	///
18	/// This needs to be at least the number of bits required to store
19	/// a Bigint, which is `log2(radixdigits)`.
20	/// ≅ 3600 for base-10, rounded-up.
21	pub const BIGINT_BITS: usize = `4000`;
22
23	/// The number of limbs for the bigint.
24	pub const BIGINT_LIMBS: usize = BIGINT_BITS / LIMB_BITS;
25
26	#[cfg(feature = "alloc")]
27	pub type VecType = HeapVec;
28
29	#[cfg(not(feature = "alloc"))]
30	pub type VecType = StackVec;
31
32	/// Storage for a big integer type.
33	///
34	/// This is used for algorithms when we have a finite number of digits.
35	/// Specifically, it stores all the significant digits scaled to the
36	/// proper exponent, as an integral type, and then directly compares
37	/// these digits.
38	///
39	/// This requires us to store the number of significant bits, plus the
40	/// number of exponent bits (required) since we scale everything
41	/// to the same exponent.
42	#[derive(Clone, PartialEq, Eq)]
43	pub struct Bigint {
44	/// Significant digits for the float, stored in a big integer in LE order.
45	///
46	/// This is pretty much the same number of digits for any radix, since the
47	/// significant digits balances out the zeros from the exponent:
48	/// 1. Decimal is 1091 digits, 767 mantissa digits + 324 exponent zeros.
49	/// 2. Base 6 is 1097 digits, or 680 mantissa digits + 417 exponent zeros.
50	/// 3. Base 36 is 1086 digits, or 877 mantissa digits + 209 exponent zeros.
51	///
52	/// However, the number of bytes required is larger for large radixes:
53	/// for decimal, we need `log2(101091) ≅ 3600`, while for base 36
54	/// we need `log2(361086) ≅ 5600`. Since we use uninitialized data,
55	/// we avoid a major performance hit from the large buffer size.
56	pub data: VecType,
57	}
58
59	#[allow(clippy::new_without_default)]
60	impl Bigint {
61	/// Construct a bigint representing 0.
62	#[inline(always)]
63	pub fn new() -> Self {
64	Self {
65	data: VecType::new(),
66	}
67	}
68
69	/// Construct a bigint from an integer.
70	#[inline(always)]
71	pub fn from_u64(value: u64) -> Self {
72	Self {
73	data: VecType::from_u64(value),
74	}
75	}
76
77	#[inline(always)]
78	pub fn hi64(&self) -> (u64, bool) {
79	self.data.hi64()
80	}
81
82	/// Multiply and assign as if by exponentiation by a power.
83	#[inline]
84	pub fn pow(&mut self, base: u32, exp: u32) -> Option<()> {
85	debug_assert!(base == `2` \|\| base == `5` \|\| base == `10`);
86	if base % `5` == `0` {
87	pow(&mut self.data, exp)?;
88	}
89	if base % `2` == `0` {
90	shl(&mut self.data, exp as usize)?;
91	}
92	Some(())
93	}
94
95	/// Calculate the bit-length of the big-integer.
96	#[inline]
97	pub fn bit_length(&self) -> u32 {
98	bit_length(&self.data)
99	}
100	}
101
102	impl ops::MulAssign<&Bigint> for Bigint {
103	fn mul_assign(&mut self, rhs: &Bigint) {
104	self.data *= &rhs.data;
105	}
106	}
107
108	/// REVERSE VIEW
109
110	/// Reverse, immutable view of a sequence.
111	pub struct ReverseView<'a, T: 'a> {
112	inner: &'a [T],
113	}
114
115	impl<'a, T> ops::Index<usize> for ReverseView<'a, T> {
116	type Output = T;
117
118	#[inline]
119	fn index(&self, index: usize) -> &T {
120	let len: usize = self.inner.len();
121	&(*self.inner)[len - index - `1`]
122	}
123	}
124
125	/// Create a reverse view of the vector for indexing.
126	#[inline]
127	pub fn rview(x: &[Limb]) -> ReverseView<Limb> {
128	ReverseView {
129	inner: x,
130	}
131	}
132
133	// COMPARE
134	// -------
135
136	/// Compare `x` to `y`, in little-endian order.
137	#[inline]
138	pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
139	match x.len().cmp(&y.len()) {
140	cmp::Ordering::Equal => {
141	let iter = x.iter().rev().zip(y.iter().rev());
142	for (&xi, yi) in iter {
143	match xi.cmp(yi) {
144	cmp::Ordering::Equal => (),
145	ord: Ordering => return ord,
146	}
147	}
148	// Equal case.
149	cmp::Ordering::Equal
150	},
151	ord: Ordering => ord,
152	}
153	}
154
155	// NORMALIZE
156	// ---------
157
158	/// Normalize the integer, so any leading zero values are removed.
159	#[inline]
160	pub fn normalize(x: &mut VecType) {
161	// We don't care if this wraps: the index is bounds-checked.
162	while let Some(&value) = x.get(index:x.len().wrapping_sub(`1`)) {
163	if value == `0` {
164	unsafe { x.set_len(x.len() - `1`) };
165	} else {
166	break;
167	}
168	}
169	}
170
171	/// Get if the big integer is normalized.
172	#[inline]
173	#[allow(clippy::match_like_matches_macro)]
174	pub fn is_normalized(x: &[Limb]) -> bool {
175	// We don't care if this wraps: the index is bounds-checked.
176	match x.get(index:x.len().wrapping_sub(`1`)) {
177	Some(&`0`) => `false`,
178	_ => `true`,
179	}
180	}
181
182	// FROM
183	// ----
184
185	/// Create StackVec from u64 value.
186	#[inline(always)]
187	#[allow(clippy::branches_sharing_code)]
188	pub fn from_u64(x: u64) -> VecType {
189	let mut vec: StackVec = VecType::new();
190	debug_assert!(vec.capacity() >= `2`);
191	if LIMB_BITS == `32` {
192	vec.try_push(x as Limb).unwrap();
193	vec.try_push((x >> `32`) as Limb).unwrap();
194	} else {
195	vec.try_push(x as Limb).unwrap();
196	}
197	vec.normalize();
198	vec
199	}
200
201	// HI
202	// --
203
204	/// Check if any of the remaining bits are non-zero.
205	///
206	/// # Safety
207	///
208	/// Safe as long as `rindex <= x.len()`.
209	#[inline]
210	pub fn nonzero(x: &[Limb], rindex: usize) -> bool {
211	debug_assert!(rindex <= x.len());
212
213	let len: usize = x.len();
214	let slc: &[u64] = &x[..len - rindex];
215	slc.iter().rev().any(\|&x\| x != `0`)
216	}
217
218	// These return the high X bits and if the bits were truncated.
219
220	/// Shift 32-bit integer to high 64-bits.
221	#[inline]
222	pub fn u32_to_hi64_1(r0: u32) -> (u64, bool) {
223	u64_to_hi64_1(r0 as u64)
224	}
225
226	/// Shift 2 32-bit integers to high 64-bits.
227	#[inline]
228	pub fn u32_to_hi64_2(r0: u32, r1: u32) -> (u64, bool) {
229	let r0: u64 = (r0 as u64) << `32`;
230	let r1: u64 = r1 as u64;
231	u64_to_hi64_1(r0:r0 \| r1)
232	}
233
234	/// Shift 3 32-bit integers to high 64-bits.
235	#[inline]
236	pub fn u32_to_hi64_3(r0: u32, r1: u32, r2: u32) -> (u64, bool) {
237	let r0: u64 = r0 as u64;
238	let r1: u64 = (r1 as u64) << `32`;
239	let r2: u64 = r2 as u64;
240	u64_to_hi64_2(r0, r1:r1 \| r2)
241	}
242
243	/// Shift 64-bit integer to high 64-bits.
244	#[inline]
245	pub fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
246	let ls: u32 = r0.leading_zeros();
247	(r0 << ls, `false`)
248	}
249
250	/// Shift 2 64-bit integers to high 64-bits.
251	#[inline]
252	pub fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
253	let ls: u32 = r0.leading_zeros();
254	let rs: u32 = `64` - ls;
255	let v: u64 = match ls {
256	`0` => r0,
257	_ => (r0 << ls) \| (r1 >> rs),
258	};
259	let n: bool = r1 << ls != `0`;
260	(v, n)
261	}
262
263	/// Extract the hi bits from the buffer.
264	macro_rules! hi {
265	// # Safety
266	//
267	// Safe as long as the `stackvec.len() >= 1`.
268	(@`1` $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
269	$fn($rview[`0`] as $t)
270	}};
271
272	// # Safety
273	//
274	// Safe as long as the `stackvec.len() >= 2`.
275	(@`2` $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
276	let r0 = $rview[`0`] as $t;
277	let r1 = $rview[`1`] as $t;
278	$fn(r0, r1)
279	}};
280
281	// # Safety
282	//
283	// Safe as long as the `stackvec.len() >= 2`.
284	(@nonzero2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
285	let (v, n) = hi!(@`2` $self, $rview, $t, $fn);
286	(v, n \|\| nonzero($self, `2` ))
287	}};
288
289	// # Safety
290	//
291	// Safe as long as the `stackvec.len() >= 3`.
292	(@`3` $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
293	let r0 = $rview[`0`] as $t;
294	let r1 = $rview[`1`] as $t;
295	let r2 = $rview[`2`] as $t;
296	$fn(r0, r1, r2)
297	}};
298
299	// # Safety
300	//
301	// Safe as long as the `stackvec.len() >= 3`.
302	(@nonzero3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
303	let (v, n) = hi!(@`3` $self, $rview, $t, $fn);
304	(v, n \|\| nonzero($self, `3`))
305	}};
306	}
307
308	/// Get the high 64 bits from the vector.
309	#[inline(always)]
310	pub fn hi64(x: &[Limb]) -> (u64, bool) {
311	let rslc: ReverseView<'_, u64> = rview(x);
312	// SAFETY: the buffer must be at least length bytes long.
313	match x.len() {
314	`0` => (`0`, `false`),
315	`1` if LIMB_BITS == `32` => hi!(@`1` x, rslc, u32, u32_to_hi64_1),
316	`1` => hi!(@`1` x, rslc, u64, u64_to_hi64_1),
317	`2` if LIMB_BITS == `32` => hi!(@`2` x, rslc, u32, u32_to_hi64_2),
318	`2` => hi!(@`2` x, rslc, u64, u64_to_hi64_2),
319	_ if LIMB_BITS == `32` => hi!(@nonzero3 x, rslc, u32, u32_to_hi64_3),
320	_ => hi!(@nonzero2 x, rslc, u64, u64_to_hi64_2),
321	}
322	}
323
324	// POWERS
325	// ------
326
327	/// MulAssign by a power of 5.
328	///
329	/// Theoretically...
330	///
331	/// Use an exponentiation by squaring method, since it reduces the time
332	/// complexity of the multiplication to ~`O(log(n))` for the squaring,
333	/// and `O(nm)` for the result. Since `m` is typically a lower-order*
334	/// factor, this significantly reduces the number of multiplications
335	/// we need to do. Iteratively multiplying by small powers follows
336	/// the nth triangular number series, which scales as `O(p^2)`, but
337	/// where `p` is `n+m`. In short, it scales very poorly.
338	///
339	/// Practically....
340	///
341	/// Exponentiation by Squaring:
342	/// running 2 tests
343	/// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78)
344	/// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007)
345	///
346	/// Exponentiation by Iterative Small Powers:
347	/// running 2 tests
348	/// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31)
349	/// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47)
350	///
351	/// Exponentiation by Iterative Large Powers (of 2):
352	/// running 2 tests
353	/// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31)
354	/// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47)
355	///
356	/// The following benchmarks were run on `1 5^300`, using native `pow`,*
357	/// a version with only small powers, and one with pre-computed powers
358	/// of `5^(3 max_exp)`, rather than `5^(5 * max_exp)`.*
359	///
360	/// However, using large powers is crucial for good performance for higher
361	/// powers.
362	/// pow/default time: [426.20 ns 427.96 ns 429.89 ns]
363	/// pow/small time: [2.9270 us 2.9411 us 2.9565 us]
364	/// pow/large:3 time: [838.51 ns 842.21 ns 846.27 ns]
365	///
366	/// Even using worst-case scenarios, exponentiation by squaring is
367	/// significantly slower for our workloads. Just multiply by small powers,
368	/// in simple cases, and use precalculated large powers in other cases.
369	///
370	/// Furthermore, using sufficiently big large powers is also crucial for
371	/// performance. This is a tradeoff of binary size and performance, and
372	/// using a single value at ~`5^(5 max_exp)` seems optimal.*
373	pub fn pow(x: &mut VecType, mut exp: u32) -> Option<()> {
374	// Minimize the number of iterations for large exponents: just
375	// do a few steps with a large powers.
376	#[cfg(not(feature = "compact"))]
377	{
378	while exp >= LARGE_POW5_STEP {
379	large_mul(x, &LARGE_POW5)?;
380	exp -= LARGE_POW5_STEP;
381	}
382	}
383
384	// Now use our pre-computed small powers iteratively.
385	// This is calculated as `⌊log(2^BITS - 1, 5)⌋`.
386	let small_step = if LIMB_BITS == `32` {
387	`13`
388	} else {
389	`27`
390	};
391	let max_native = (`5` as Limb).pow(small_step);
392	while exp >= small_step {
393	small_mul(x, max_native)?;
394	exp -= small_step;
395	}
396	if exp != `0` {
397	// SAFETY: safe, since `exp < small_step`.
398	let small_power = unsafe { f64::int_pow_fast_path(exp as usize, `5`) };
399	small_mul(x, small_power as Limb)?;
400	}
401	Some(())
402	}
403
404	// SCALAR
405	// ------
406
407	/// Add two small integers and return the resulting value and if overflow happens.
408	#[inline(always)]
409	pub fn scalar_add(x: Limb, y: Limb) -> (Limb, bool) {
410	x.overflowing_add(y)
411	}
412
413	/// Multiply two small integers (with carry) (and return the overflow contribution).
414	///
415	/// Returns the (low, high) components.
416	#[inline(always)]
417	pub fn scalar_mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
418	// Cannot overflow, as long as wide is 2x as wide. This is because
419	// the following is always true:
420	// `Wide::MAX - (Narrow::MAX Narrow::MAX) >= Narrow::MAX`*
421	let z: Wide = (x as Wide) * (y as Wide) + (carry as Wide);
422	(z as Limb, (z >> LIMB_BITS) as Limb)
423	}
424
425	// SMALL
426	// -----
427
428	/// Add small integer to bigint starting from offset.
429	#[inline]
430	pub fn small_add_from(x: &mut VecType, y: Limb, start: usize) -> Option<()> {
431	let mut index: usize = start;
432	let mut carry: u64 = y;
433	while carry != `0` && index < x.len() {
434	let result: (u64, bool) = scalar_add(x[index], y:carry);
435	x[index] = result.0;
436	carry = result.1 as Limb;
437	index += `1`;
438	}
439	// If we carried past all the elements, add to the end of the buffer.
440	if carry != `0` {
441	x.try_push(carry)?;
442	}
443	Some(())
444	}
445
446	/// Add small integer to bigint.
447	#[inline(always)]
448	pub fn small_add(x: &mut VecType, y: Limb) -> Option<()> {
449	small_add_from(x, y, start:`0`)
450	}
451
452	/// Multiply bigint by small integer.
453	#[inline]
454	pub fn small_mul(x: &mut VecType, y: Limb) -> Option<()> {
455	let mut carry: u64 = `0`;
456	for xi: &mut u64 in x.iter_mut() {
457	let result: (u64, u64) = scalar_mul(*xi, y, carry);
458	*xi = result.0;
459	carry = result.1;
460	}
461	// If we carried past all the elements, add to the end of the buffer.
462	if carry != `0` {
463	x.try_push(carry)?;
464	}
465	Some(())
466	}
467
468	// LARGE
469	// -----
470
471	/// Add bigint to bigint starting from offset.
472	pub fn large_add_from(x: &mut VecType, y: &[Limb], start: usize) -> Option<()> {
473	// The effective x buffer is from `xstart..x.len()`, so we need to treat
474	// that as the current range. If the effective y buffer is longer, need
475	// to resize to that, + the start index.
476	if y.len() > x.len().saturating_sub(start) {
477	// Ensure we panic if we can't extend the buffer.
478	// This avoids any unsafe behavior afterwards.
479	x.try_resize(y.len() + start, `0`)?;
480	}
481
482	// Iteratively add elements from y to x.
483	let mut carry = `false`;
484	for (index, &yi) in y.iter().enumerate() {
485	// We panicked in `try_resize` if this wasn't true.
486	let xi = x.get_mut(start + index).unwrap();
487
488	// Only one op of the two ops can overflow, since we added at max
489	// Limb::max_value() + Limb::max_value(). Add the previous carry,
490	// and store the current carry for the next.
491	let result = scalar_add(*xi, yi);
492	*xi = result.0;
493	let mut tmp = result.1;
494	if carry {
495	let result = scalar_add(*xi, `1`);
496	*xi = result.0;
497	tmp \|= result.1;
498	}
499	carry = tmp;
500	}
501
502	// Handle overflow.
503	if carry {
504	small_add_from(x, `1`, y.len() + start)?;
505	}
506	Some(())
507	}
508
509	/// Add bigint to bigint.
510	#[inline(always)]
511	pub fn large_add(x: &mut VecType, y: &[Limb]) -> Option<()> {
512	large_add_from(x, y, start:`0`)
513	}
514
515	/// Grade-school multiplication algorithm.
516	///
517	/// Slow, naive algorithm, using limb-bit bases and just shifting left for
518	/// each iteration. This could be optimized with numerous other algorithms,
519	/// but it's extremely simple, and works in O(nm) time, which is fine*
520	/// by me. Each iteration, of which there are `m` iterations, requires
521	/// `n` multiplications, and `n` additions, or grade-school multiplication.
522	///
523	/// Don't use Karatsuba multiplication, since out implementation seems to
524	/// be slower asymptotically, which is likely just due to the small sizes
525	/// we deal with here. For example, running on the following data:
526	///
527	/// ```text
528	/// const SMALL_X: &[u32] = &[
529	/// 766857581, 3588187092, 1583923090, 2204542082, 1564708913, 2695310100, 3676050286,
530	/// 1022770393, 468044626, 446028186
531	/// ];
532	/// const SMALL_Y: &[u32] = &[
533	/// 3945492125, 3250752032, 1282554898, 1708742809, 1131807209, 3171663979, 1353276095,
534	/// 1678845844, 2373924447, 3640713171
535	/// ];
536	/// const LARGE_X: &[u32] = &[
537	/// 3647536243, 2836434412, 2154401029, 1297917894, 137240595, 790694805, 2260404854,
538	/// 3872698172, 690585094, 99641546, 3510774932, 1672049983, 2313458559, 2017623719,
539	/// 638180197, 1140936565, 1787190494, 1797420655, 14113450, 2350476485, 3052941684,
540	/// 1993594787, 2901001571, 4156930025, 1248016552, 848099908, 2660577483, 4030871206,
541	/// 692169593, 2835966319, 1781364505, 4266390061, 1813581655, 4210899844, 2137005290,
542	/// 2346701569, 3715571980, 3386325356, 1251725092, 2267270902, 474686922, 2712200426,
543	/// 197581715, 3087636290, 1379224439, 1258285015, 3230794403, 2759309199, 1494932094,
544	/// 326310242
545	/// ];
546	/// const LARGE_Y: &[u32] = &[
547	/// 1574249566, 868970575, 76716509, 3198027972, 1541766986, 1095120699, 3891610505,
548	/// 2322545818, 1677345138, 865101357, 2650232883, 2831881215, 3985005565, 2294283760,
549	/// 3468161605, 393539559, 3665153349, 1494067812, 106699483, 2596454134, 797235106,
550	/// 705031740, 1209732933, 2732145769, 4122429072, 141002534, 790195010, 4014829800,
551	/// 1303930792, 3649568494, 308065964, 1233648836, 2807326116, 79326486, 1262500691,
552	/// 621809229, 2258109428, 3819258501, 171115668, 1139491184, 2979680603, 1333372297,
553	/// 1657496603, 2790845317, 4090236532, 4220374789, 601876604, 1828177209, 2372228171,
554	/// 2247372529
555	/// ];
556	/// ```
557	///
558	/// We get the following results:
559
560	/// ```text
561	/// mul/small:long time: [220.23 ns 221.47 ns 222.81 ns]
562	/// Found 4 outliers among 100 measurements (4.00%)
563	/// 2 (2.00%) high mild
564	/// 2 (2.00%) high severe
565	/// mul/small:karatsuba time: [233.88 ns 234.63 ns 235.44 ns]
566	/// Found 11 outliers among 100 measurements (11.00%)
567	/// 8 (8.00%) high mild
568	/// 3 (3.00%) high severe
569	/// mul/large:long time: [1.9365 us 1.9455 us 1.9558 us]
570	/// Found 12 outliers among 100 measurements (12.00%)
571	/// 7 (7.00%) high mild
572	/// 5 (5.00%) high severe
573	/// mul/large:karatsuba time: [4.4250 us 4.4515 us 4.4812 us]
574	/// ```
575	///
576	/// In short, Karatsuba multiplication is never worthwhile for out use-case.
577	pub fn long_mul(x: &[Limb], y: &[Limb]) -> Option<VecType> {
578	// Using the immutable value, multiply by all the scalars in y, using
579	// the algorithm defined above. Use a single buffer to avoid
580	// frequent reallocations. Handle the first case to avoid a redundant
581	// addition, since we know y.len() >= 1.
582	let mut z: ! = VecType::try_from(x)?;
583	if !y.is_empty() {
584	let y0: u64 = y[`0`];
585	small_mul(&mut z, y:y0)?;
586
587	for (index: usize, &yi: u64) in y.iter().enumerate().skip(`1`) {
588	if yi != `0` {
589	let mut zi: [u64] = VecType::try_from(x)?;
590	small_mul(&mut zi, y:yi)?;
591	large_add_from(&mut z, &zi, start:index)?;
592	}
593	}
594	}
595
596	z.normalize();
597	Some(z)
598	}
599
600	/// Multiply bigint by bigint using grade-school multiplication algorithm.
601	#[inline(always)]
602	pub fn large_mul(x: &mut VecType, y: &[Limb]) -> Option<()> {
603	// Karatsuba multiplication never makes sense, so just use grade school
604	// multiplication.
605	if y.len() == `1` {
606	// SAFETY: safe since `y.len() == 1`.
607	small_mul(x, y[`0`])?;
608	} else {
609	*x = long_mul(x:y, y:x)?;
610	}
611	Some(())
612	}
613
614	// SHIFT
615	// -----
616
617	/// Shift-left `n` bits inside a buffer.
618	#[inline]
619	pub fn shl_bits(x: &mut VecType, n: usize) -> Option<()> {
620	debug_assert!(n != `0`);
621
622	// Internally, for each item, we shift left by n, and add the previous
623	// right shifted limb-bits.
624	// For example, we transform (for u8) shifted left 2, to:
625	// b10100100 b01000010
626	// b10 b10010001 b00001000
627	debug_assert!(n < LIMB_BITS);
628	let rshift = LIMB_BITS - n;
629	let lshift = n;
630	let mut prev: Limb = `0`;
631	for xi in x.iter_mut() {
632	let tmp = *xi;
633	*xi <<= lshift;
634	*xi \|= prev >> rshift;
635	prev = tmp;
636	}
637
638	// Always push the carry, even if it creates a non-normal result.
639	let carry = prev >> rshift;
640	if carry != `0` {
641	x.try_push(carry)?;
642	}
643
644	Some(())
645	}
646
647	/// Shift-left `n` limbs inside a buffer.
648	#[inline]
649	pub fn shl_limbs(x: &mut VecType, n: usize) -> Option<()> {
650	debug_assert!(n != `0`);
651	if n + x.len() > x.capacity() {
652	None
653	} else if !x.is_empty() {
654	let len: usize = n + x.len();
655	// SAFE: since x is not empty, and `x.len() + n <= x.capacity()`.
656	unsafe {
657	// Move the elements.
658	let src: *const u64 = x.as_ptr();
659	let dst: *mut u64 = x.as_mut_ptr().add(count:n);
660	ptr::copy(src, dst, count:x.len());
661	// Write our 0s.
662	ptr::write_bytes(dst:x.as_mut_ptr(), val:`0`, count:n);
663	x.set_len(len);
664	}
665	Some(())
666	} else {
667	Some(())
668	}
669	}
670
671	/// Shift-left buffer by n bits.
672	#[inline]
673	pub fn shl(x: &mut VecType, n: usize) -> Option<()> {
674	let rem: usize = n % LIMB_BITS;
675	let div: usize = n / LIMB_BITS;
676	if rem != `0` {
677	shl_bits(x, n:rem)?;
678	}
679	if div != `0` {
680	shl_limbs(x, n:div)?;
681	}
682	Some(())
683	}
684
685	/// Get number of leading zero bits in the storage.
686	#[inline]
687	pub fn leading_zeros(x: &[Limb]) -> u32 {
688	let length: usize = x.len();
689	// wrapping_sub is fine, since it'll just return None.
690	if let Some(&value) = x.get(index:length.wrapping_sub(`1`)) {
691	value.leading_zeros()
692	} else {
693	`0`
694	}
695	}
696
697	/// Calculate the bit-length of the big-integer.
698	#[inline]
699	pub fn bit_length(x: &[Limb]) -> u32 {
700	let nlz: u32 = leading_zeros(x);
701	LIMB_BITS as u32 * x.len() as u32 - nlz
702	}
703
704	// LIMB
705	// ----
706
707	// Type for a single limb of the big integer.
708	//
709	// A limb is analogous to a digit in base10, except, it stores 32-bit
710	// or 64-bit numbers instead. We want types where 64-bit multiplication
711	// is well-supported by the architecture, rather than emulated in 3
712	// instructions. The quickest way to check this support is using a
713	// cross-compiler for numerous architectures, along with the following
714	// source file and command:
715	//
716	// Compile with `gcc main.c -c -S -O3 -masm=intel`
717	//
718	// And the source code is:
719	// ```text
720	// #include <stdint.h>
721	//
722	// struct i128 {
723	// uint64_t hi;
724	// uint64_t lo;
725	// };
726	//
727	// // Type your code here, or load an example.
728	// struct i128 square(uint64_t x, uint64_t y) {
729	// __int128 prod = (__int128)x (__int128)y;*
730	// struct i128 z;
731	// z.hi = (uint64_t)(prod >> 64);
732	// z.lo = (uint64_t)prod;
733	// return z;
734	// }
735	// ```
736	//
737	// If the result contains `call __multi3`, then the multiplication
738	// is emulated by the compiler. Otherwise, it's natively supported.
739	//
740	// This should be all-known 64-bit platforms supported by Rust.
741	// https://forge.rust-lang.org/platform-support.html
742	//
743	// # Supported
744	//
745	// Platforms where native 128-bit multiplication is explicitly supported:
746	// - x86_64 (Supported via `MUL`).
747	// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
748	// - s390x (Supported via `MLGR`).
749	//
750	// # Efficient
751	//
752	// Platforms where native 64-bit multiplication is supported and
753	// you can extract hi-lo for 64-bit multiplications.
754	// - aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
755	// - powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
756	// - riscv64 (Requires `MUL` and `MULH` to capture high and low bits).
757	//
758	// # Unsupported
759	//
760	// Platforms where native 128-bit multiplication is not supported,
761	// requiring software emulation.
762	// sparc64 (`UMUL` only supports double-word arguments).
763	// sparcv9 (Same as sparc64).
764	//
765	// These tests are run via `xcross`, my own library for C cross-compiling,
766	// which supports numerous targets (far in excess of Rust's tier 1 support,
767	// or rust-embedded/cross's list). xcross may be found here:
768	// https://github.com/Alexhuszagh/xcross
769	//
770	// To compile for the given target, run:
771	// `xcross gcc main.c -c -S -O3 --target $target`
772	//
773	// All 32-bit architectures inherently do not have support. That means
774	// we can essentially look for 64-bit architectures that are not SPARC.
775
776	#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
777	pub type Limb = u64;
778	#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
779	pub type Wide = u128;
780	#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
781	pub const LIMB_BITS: usize = `64`;
782
783	#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
784	pub type Limb = u32;
785	#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
786	pub type Wide = u64;
787	#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
788	pub const LIMB_BITS: usize = `32`;
789