1//! A simple big-integer type for slow path algorithms.
2//!
3//! This includes minimal stackvector for use in big-integer arithmetic.
4
5#![doc(hidden)]
6
7#[cfg(feature = "alloc")]
8use crate::heapvec::HeapVec;
9use crate::num::Float;
10#[cfg(not(feature = "alloc"))]
11use crate::stackvec::StackVec;
12#[cfg(not(feature = "compact"))]
13use crate::table::{LARGE_POW5, LARGE_POW5_STEP};
14use core::{cmp, ops, ptr};
15
16/// Number of bits in a Bigint.
17///
18/// This needs to be at least the number of bits required to store
19/// a Bigint, which is `log2(radix**digits)`.
20/// ≅ 3600 for base-10, rounded-up.
21pub const BIGINT_BITS: usize = 4000;
22
23/// The number of limbs for the bigint.
24pub const BIGINT_LIMBS: usize = BIGINT_BITS / LIMB_BITS;
25
26#[cfg(feature = "alloc")]
27pub type VecType = HeapVec;
28
29#[cfg(not(feature = "alloc"))]
30pub type VecType = StackVec;
31
32/// Storage for a big integer type.
33///
34/// This is used for algorithms when we have a finite number of digits.
35/// Specifically, it stores all the significant digits scaled to the
36/// proper exponent, as an integral type, and then directly compares
37/// these digits.
38///
39/// This requires us to store the number of significant bits, plus the
40/// number of exponent bits (required) since we scale everything
41/// to the same exponent.
42#[derive(Clone, PartialEq, Eq)]
43pub struct Bigint {
44 /// Significant digits for the float, stored in a big integer in LE order.
45 ///
46 /// This is pretty much the same number of digits for any radix, since the
47 /// significant digits balances out the zeros from the exponent:
48 /// 1. Decimal is 1091 digits, 767 mantissa digits + 324 exponent zeros.
49 /// 2. Base 6 is 1097 digits, or 680 mantissa digits + 417 exponent zeros.
50 /// 3. Base 36 is 1086 digits, or 877 mantissa digits + 209 exponent zeros.
51 ///
52 /// However, the number of bytes required is larger for large radixes:
53 /// for decimal, we need `log2(10**1091) ≅ 3600`, while for base 36
54 /// we need `log2(36**1086) ≅ 5600`. Since we use uninitialized data,
55 /// we avoid a major performance hit from the large buffer size.
56 pub data: VecType,
57}
58
59#[allow(clippy::new_without_default)]
60impl Bigint {
61 /// Construct a bigint representing 0.
62 #[inline(always)]
63 pub fn new() -> Self {
64 Self {
65 data: VecType::new(),
66 }
67 }
68
69 /// Construct a bigint from an integer.
70 #[inline(always)]
71 pub fn from_u64(value: u64) -> Self {
72 Self {
73 data: VecType::from_u64(value),
74 }
75 }
76
77 #[inline(always)]
78 pub fn hi64(&self) -> (u64, bool) {
79 self.data.hi64()
80 }
81
82 /// Multiply and assign as if by exponentiation by a power.
83 #[inline]
84 pub fn pow(&mut self, base: u32, exp: u32) -> Option<()> {
85 debug_assert!(base == 2 || base == 5 || base == 10);
86 if base % 5 == 0 {
87 pow(&mut self.data, exp)?;
88 }
89 if base % 2 == 0 {
90 shl(&mut self.data, exp as usize)?;
91 }
92 Some(())
93 }
94
95 /// Calculate the bit-length of the big-integer.
96 #[inline]
97 pub fn bit_length(&self) -> u32 {
98 bit_length(&self.data)
99 }
100}
101
102impl ops::MulAssign<&Bigint> for Bigint {
103 fn mul_assign(&mut self, rhs: &Bigint) {
104 self.data *= &rhs.data;
105 }
106}
107
108/// REVERSE VIEW
109
110/// Reverse, immutable view of a sequence.
111pub struct ReverseView<'a, T: 'a> {
112 inner: &'a [T],
113}
114
115impl<'a, T> ops::Index<usize> for ReverseView<'a, T> {
116 type Output = T;
117
118 #[inline]
119 fn index(&self, index: usize) -> &T {
120 let len: usize = self.inner.len();
121 &(*self.inner)[len - index - 1]
122 }
123}
124
125/// Create a reverse view of the vector for indexing.
126#[inline]
127pub fn rview(x: &[Limb]) -> ReverseView<Limb> {
128 ReverseView {
129 inner: x,
130 }
131}
132
133// COMPARE
134// -------
135
136/// Compare `x` to `y`, in little-endian order.
137#[inline]
138pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
139 match x.len().cmp(&y.len()) {
140 cmp::Ordering::Equal => {
141 let iter: impl Iterator = x.iter().rev().zip(y.iter().rev());
142 for (&xi: u64, yi: &u64) in iter {
143 match xi.cmp(yi) {
144 cmp::Ordering::Equal => (),
145 ord: Ordering => return ord,
146 }
147 }
148 // Equal case.
149 cmp::Ordering::Equal
150 },
151 ord: Ordering => ord,
152 }
153}
154
155// NORMALIZE
156// ---------
157
158/// Normalize the integer, so any leading zero values are removed.
159#[inline]
160pub fn normalize(x: &mut VecType) {
161 // We don't care if this wraps: the index is bounds-checked.
162 while let Some(&value: u64) = x.get(index:x.len().wrapping_sub(1)) {
163 if value == 0 {
164 unsafe { x.set_len(x.len() - 1) };
165 } else {
166 break;
167 }
168 }
169}
170
171/// Get if the big integer is normalized.
172#[inline]
173#[allow(clippy::match_like_matches_macro)]
174pub fn is_normalized(x: &[Limb]) -> bool {
175 // We don't care if this wraps: the index is bounds-checked.
176 match x.get(index:x.len().wrapping_sub(1)) {
177 Some(&0) => false,
178 _ => true,
179 }
180}
181
182// FROM
183// ----
184
185/// Create StackVec from u64 value.
186#[inline(always)]
187#[allow(clippy::branches_sharing_code)]
188pub fn from_u64(x: u64) -> VecType {
189 let mut vec: StackVec = VecType::new();
190 debug_assert!(vec.capacity() >= 2);
191 if LIMB_BITS == 32 {
192 vec.try_push(x as Limb).unwrap();
193 vec.try_push((x >> 32) as Limb).unwrap();
194 } else {
195 vec.try_push(x as Limb).unwrap();
196 }
197 vec.normalize();
198 vec
199}
200
201// HI
202// --
203
204/// Check if any of the remaining bits are non-zero.
205///
206/// # Safety
207///
208/// Safe as long as `rindex <= x.len()`.
209#[inline]
210pub fn nonzero(x: &[Limb], rindex: usize) -> bool {
211 debug_assert!(rindex <= x.len());
212
213 let len: usize = x.len();
214 let slc: &[u64] = &x[..len - rindex];
215 slc.iter().rev().any(|&x: u64| x != 0)
216}
217
218// These return the high X bits and if the bits were truncated.
219
220/// Shift 32-bit integer to high 64-bits.
221#[inline]
222pub fn u32_to_hi64_1(r0: u32) -> (u64, bool) {
223 u64_to_hi64_1(r0 as u64)
224}
225
226/// Shift 2 32-bit integers to high 64-bits.
227#[inline]
228pub fn u32_to_hi64_2(r0: u32, r1: u32) -> (u64, bool) {
229 let r0: u64 = (r0 as u64) << 32;
230 let r1: u64 = r1 as u64;
231 u64_to_hi64_1(r0:r0 | r1)
232}
233
234/// Shift 3 32-bit integers to high 64-bits.
235#[inline]
236pub fn u32_to_hi64_3(r0: u32, r1: u32, r2: u32) -> (u64, bool) {
237 let r0: u64 = r0 as u64;
238 let r1: u64 = (r1 as u64) << 32;
239 let r2: u64 = r2 as u64;
240 u64_to_hi64_2(r0, r1:r1 | r2)
241}
242
243/// Shift 64-bit integer to high 64-bits.
244#[inline]
245pub fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
246 let ls: u32 = r0.leading_zeros();
247 (r0 << ls, false)
248}
249
250/// Shift 2 64-bit integers to high 64-bits.
251#[inline]
252pub fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
253 let ls: u32 = r0.leading_zeros();
254 let rs: u32 = 64 - ls;
255 let v: u64 = match ls {
256 0 => r0,
257 _ => (r0 << ls) | (r1 >> rs),
258 };
259 let n: bool = r1 << ls != 0;
260 (v, n)
261}
262
263/// Extract the hi bits from the buffer.
264macro_rules! hi {
265 // # Safety
266 //
267 // Safe as long as the `stackvec.len() >= 1`.
268 (@1 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
269 $fn($rview[0] as $t)
270 }};
271
272 // # Safety
273 //
274 // Safe as long as the `stackvec.len() >= 2`.
275 (@2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
276 let r0 = $rview[0] as $t;
277 let r1 = $rview[1] as $t;
278 $fn(r0, r1)
279 }};
280
281 // # Safety
282 //
283 // Safe as long as the `stackvec.len() >= 2`.
284 (@nonzero2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
285 let (v, n) = hi!(@2 $self, $rview, $t, $fn);
286 (v, n || nonzero($self, 2 ))
287 }};
288
289 // # Safety
290 //
291 // Safe as long as the `stackvec.len() >= 3`.
292 (@3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
293 let r0 = $rview[0] as $t;
294 let r1 = $rview[1] as $t;
295 let r2 = $rview[2] as $t;
296 $fn(r0, r1, r2)
297 }};
298
299 // # Safety
300 //
301 // Safe as long as the `stackvec.len() >= 3`.
302 (@nonzero3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
303 let (v, n) = hi!(@3 $self, $rview, $t, $fn);
304 (v, n || nonzero($self, 3))
305 }};
306}
307
308/// Get the high 64 bits from the vector.
309#[inline(always)]
310pub fn hi64(x: &[Limb]) -> (u64, bool) {
311 let rslc: ReverseView<'_, u64> = rview(x);
312 // SAFETY: the buffer must be at least length bytes long.
313 match x.len() {
314 0 => (0, false),
315 1 if LIMB_BITS == 32 => hi!(@1 x, rslc, u32, u32_to_hi64_1),
316 1 => hi!(@1 x, rslc, u64, u64_to_hi64_1),
317 2 if LIMB_BITS == 32 => hi!(@2 x, rslc, u32, u32_to_hi64_2),
318 2 => hi!(@2 x, rslc, u64, u64_to_hi64_2),
319 _ if LIMB_BITS == 32 => hi!(@nonzero3 x, rslc, u32, u32_to_hi64_3),
320 _ => hi!(@nonzero2 x, rslc, u64, u64_to_hi64_2),
321 }
322}
323
324// POWERS
325// ------
326
327/// MulAssign by a power of 5.
328///
329/// Theoretically...
330///
331/// Use an exponentiation by squaring method, since it reduces the time
332/// complexity of the multiplication to ~`O(log(n))` for the squaring,
333/// and `O(n*m)` for the result. Since `m` is typically a lower-order
334/// factor, this significantly reduces the number of multiplications
335/// we need to do. Iteratively multiplying by small powers follows
336/// the nth triangular number series, which scales as `O(p^2)`, but
337/// where `p` is `n+m`. In short, it scales very poorly.
338///
339/// Practically....
340///
341/// Exponentiation by Squaring:
342/// running 2 tests
343/// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78)
344/// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007)
345///
346/// Exponentiation by Iterative Small Powers:
347/// running 2 tests
348/// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31)
349/// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47)
350///
351/// Exponentiation by Iterative Large Powers (of 2):
352/// running 2 tests
353/// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31)
354/// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47)
355///
356/// The following benchmarks were run on `1 * 5^300`, using native `pow`,
357/// a version with only small powers, and one with pre-computed powers
358/// of `5^(3 * max_exp)`, rather than `5^(5 * max_exp)`.
359///
360/// However, using large powers is crucial for good performance for higher
361/// powers.
362/// pow/default time: [426.20 ns 427.96 ns 429.89 ns]
363/// pow/small time: [2.9270 us 2.9411 us 2.9565 us]
364/// pow/large:3 time: [838.51 ns 842.21 ns 846.27 ns]
365///
366/// Even using worst-case scenarios, exponentiation by squaring is
367/// significantly slower for our workloads. Just multiply by small powers,
368/// in simple cases, and use precalculated large powers in other cases.
369///
370/// Furthermore, using sufficiently big large powers is also crucial for
371/// performance. This is a tradeoff of binary size and performance, and
372/// using a single value at ~`5^(5 * max_exp)` seems optimal.
373pub fn pow(x: &mut VecType, mut exp: u32) -> Option<()> {
374 // Minimize the number of iterations for large exponents: just
375 // do a few steps with a large powers.
376 #[cfg(not(feature = "compact"))]
377 {
378 while exp >= LARGE_POW5_STEP {
379 large_mul(x, &LARGE_POW5)?;
380 exp -= LARGE_POW5_STEP;
381 }
382 }
383
384 // Now use our pre-computed small powers iteratively.
385 // This is calculated as `⌊log(2^BITS - 1, 5)⌋`.
386 let small_step = if LIMB_BITS == 32 {
387 13
388 } else {
389 27
390 };
391 let max_native = (5 as Limb).pow(small_step);
392 while exp >= small_step {
393 small_mul(x, max_native)?;
394 exp -= small_step;
395 }
396 if exp != 0 {
397 // SAFETY: safe, since `exp < small_step`.
398 let small_power = unsafe { f64::int_pow_fast_path(exp as usize, 5) };
399 small_mul(x, small_power as Limb)?;
400 }
401 Some(())
402}
403
404// SCALAR
405// ------
406
407/// Add two small integers and return the resulting value and if overflow happens.
408#[inline(always)]
409pub fn scalar_add(x: Limb, y: Limb) -> (Limb, bool) {
410 x.overflowing_add(y)
411}
412
413/// Multiply two small integers (with carry) (and return the overflow contribution).
414///
415/// Returns the (low, high) components.
416#[inline(always)]
417pub fn scalar_mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
418 // Cannot overflow, as long as wide is 2x as wide. This is because
419 // the following is always true:
420 // `Wide::MAX - (Narrow::MAX * Narrow::MAX) >= Narrow::MAX`
421 let z: Wide = (x as Wide) * (y as Wide) + (carry as Wide);
422 (z as Limb, (z >> LIMB_BITS) as Limb)
423}
424
425// SMALL
426// -----
427
428/// Add small integer to bigint starting from offset.
429#[inline]
430pub fn small_add_from(x: &mut VecType, y: Limb, start: usize) -> Option<()> {
431 let mut index: usize = start;
432 let mut carry: u64 = y;
433 while carry != 0 && index < x.len() {
434 let result: (u64, bool) = scalar_add(x:x[index], y:carry);
435 x[index] = result.0;
436 carry = result.1 as Limb;
437 index += 1;
438 }
439 // If we carried past all the elements, add to the end of the buffer.
440 if carry != 0 {
441 x.try_push(carry)?;
442 }
443 Some(())
444}
445
446/// Add small integer to bigint.
447#[inline(always)]
448pub fn small_add(x: &mut VecType, y: Limb) -> Option<()> {
449 small_add_from(x, y, start:0)
450}
451
452/// Multiply bigint by small integer.
453#[inline]
454pub fn small_mul(x: &mut VecType, y: Limb) -> Option<()> {
455 let mut carry: u64 = 0;
456 for xi: &mut u64 in x.iter_mut() {
457 let result: (u64, u64) = scalar_mul(*xi, y, carry);
458 *xi = result.0;
459 carry = result.1;
460 }
461 // If we carried past all the elements, add to the end of the buffer.
462 if carry != 0 {
463 x.try_push(carry)?;
464 }
465 Some(())
466}
467
468// LARGE
469// -----
470
471/// Add bigint to bigint starting from offset.
472pub fn large_add_from(x: &mut VecType, y: &[Limb], start: usize) -> Option<()> {
473 // The effective x buffer is from `xstart..x.len()`, so we need to treat
474 // that as the current range. If the effective y buffer is longer, need
475 // to resize to that, + the start index.
476 if y.len() > x.len().saturating_sub(start) {
477 // Ensure we panic if we can't extend the buffer.
478 // This avoids any unsafe behavior afterwards.
479 x.try_resize(y.len() + start, 0)?;
480 }
481
482 // Iteratively add elements from y to x.
483 let mut carry = false;
484 for (index, &yi) in y.iter().enumerate() {
485 // We panicked in `try_resize` if this wasn't true.
486 let xi = x.get_mut(start + index).unwrap();
487
488 // Only one op of the two ops can overflow, since we added at max
489 // Limb::max_value() + Limb::max_value(). Add the previous carry,
490 // and store the current carry for the next.
491 let result = scalar_add(*xi, yi);
492 *xi = result.0;
493 let mut tmp = result.1;
494 if carry {
495 let result = scalar_add(*xi, 1);
496 *xi = result.0;
497 tmp |= result.1;
498 }
499 carry = tmp;
500 }
501
502 // Handle overflow.
503 if carry {
504 small_add_from(x, 1, y.len() + start)?;
505 }
506 Some(())
507}
508
509/// Add bigint to bigint.
510#[inline(always)]
511pub fn large_add(x: &mut VecType, y: &[Limb]) -> Option<()> {
512 large_add_from(x, y, start:0)
513}
514
515/// Grade-school multiplication algorithm.
516///
517/// Slow, naive algorithm, using limb-bit bases and just shifting left for
518/// each iteration. This could be optimized with numerous other algorithms,
519/// but it's extremely simple, and works in O(n*m) time, which is fine
520/// by me. Each iteration, of which there are `m` iterations, requires
521/// `n` multiplications, and `n` additions, or grade-school multiplication.
522///
523/// Don't use Karatsuba multiplication, since out implementation seems to
524/// be slower asymptotically, which is likely just due to the small sizes
525/// we deal with here. For example, running on the following data:
526///
527/// ```text
528/// const SMALL_X: &[u32] = &[
529/// 766857581, 3588187092, 1583923090, 2204542082, 1564708913, 2695310100, 3676050286,
530/// 1022770393, 468044626, 446028186
531/// ];
532/// const SMALL_Y: &[u32] = &[
533/// 3945492125, 3250752032, 1282554898, 1708742809, 1131807209, 3171663979, 1353276095,
534/// 1678845844, 2373924447, 3640713171
535/// ];
536/// const LARGE_X: &[u32] = &[
537/// 3647536243, 2836434412, 2154401029, 1297917894, 137240595, 790694805, 2260404854,
538/// 3872698172, 690585094, 99641546, 3510774932, 1672049983, 2313458559, 2017623719,
539/// 638180197, 1140936565, 1787190494, 1797420655, 14113450, 2350476485, 3052941684,
540/// 1993594787, 2901001571, 4156930025, 1248016552, 848099908, 2660577483, 4030871206,
541/// 692169593, 2835966319, 1781364505, 4266390061, 1813581655, 4210899844, 2137005290,
542/// 2346701569, 3715571980, 3386325356, 1251725092, 2267270902, 474686922, 2712200426,
543/// 197581715, 3087636290, 1379224439, 1258285015, 3230794403, 2759309199, 1494932094,
544/// 326310242
545/// ];
546/// const LARGE_Y: &[u32] = &[
547/// 1574249566, 868970575, 76716509, 3198027972, 1541766986, 1095120699, 3891610505,
548/// 2322545818, 1677345138, 865101357, 2650232883, 2831881215, 3985005565, 2294283760,
549/// 3468161605, 393539559, 3665153349, 1494067812, 106699483, 2596454134, 797235106,
550/// 705031740, 1209732933, 2732145769, 4122429072, 141002534, 790195010, 4014829800,
551/// 1303930792, 3649568494, 308065964, 1233648836, 2807326116, 79326486, 1262500691,
552/// 621809229, 2258109428, 3819258501, 171115668, 1139491184, 2979680603, 1333372297,
553/// 1657496603, 2790845317, 4090236532, 4220374789, 601876604, 1828177209, 2372228171,
554/// 2247372529
555/// ];
556/// ```
557///
558/// We get the following results:
559
560/// ```text
561/// mul/small:long time: [220.23 ns 221.47 ns 222.81 ns]
562/// Found 4 outliers among 100 measurements (4.00%)
563/// 2 (2.00%) high mild
564/// 2 (2.00%) high severe
565/// mul/small:karatsuba time: [233.88 ns 234.63 ns 235.44 ns]
566/// Found 11 outliers among 100 measurements (11.00%)
567/// 8 (8.00%) high mild
568/// 3 (3.00%) high severe
569/// mul/large:long time: [1.9365 us 1.9455 us 1.9558 us]
570/// Found 12 outliers among 100 measurements (12.00%)
571/// 7 (7.00%) high mild
572/// 5 (5.00%) high severe
573/// mul/large:karatsuba time: [4.4250 us 4.4515 us 4.4812 us]
574/// ```
575///
576/// In short, Karatsuba multiplication is never worthwhile for out use-case.
577pub fn long_mul(x: &[Limb], y: &[Limb]) -> Option<VecType> {
578 // Using the immutable value, multiply by all the scalars in y, using
579 // the algorithm defined above. Use a single buffer to avoid
580 // frequent reallocations. Handle the first case to avoid a redundant
581 // addition, since we know y.len() >= 1.
582 let mut z: StackVec = VecType::try_from(x)?;
583 if !y.is_empty() {
584 let y0: u64 = y[0];
585 small_mul(&mut z, y:y0)?;
586
587 for (index: usize, &yi: u64) in y.iter().enumerate().skip(1) {
588 if yi != 0 {
589 let mut zi: StackVec = VecType::try_from(x)?;
590 small_mul(&mut zi, y:yi)?;
591 large_add_from(&mut z, &zi, start:index)?;
592 }
593 }
594 }
595
596 z.normalize();
597 Some(z)
598}
599
600/// Multiply bigint by bigint using grade-school multiplication algorithm.
601#[inline(always)]
602pub fn large_mul(x: &mut VecType, y: &[Limb]) -> Option<()> {
603 // Karatsuba multiplication never makes sense, so just use grade school
604 // multiplication.
605 if y.len() == 1 {
606 // SAFETY: safe since `y.len() == 1`.
607 small_mul(x, y:y[0])?;
608 } else {
609 *x = long_mul(x:y, y:x)?;
610 }
611 Some(())
612}
613
614// SHIFT
615// -----
616
617/// Shift-left `n` bits inside a buffer.
618#[inline]
619pub fn shl_bits(x: &mut VecType, n: usize) -> Option<()> {
620 debug_assert!(n != 0);
621
622 // Internally, for each item, we shift left by n, and add the previous
623 // right shifted limb-bits.
624 // For example, we transform (for u8) shifted left 2, to:
625 // b10100100 b01000010
626 // b10 b10010001 b00001000
627 debug_assert!(n < LIMB_BITS);
628 let rshift = LIMB_BITS - n;
629 let lshift = n;
630 let mut prev: Limb = 0;
631 for xi in x.iter_mut() {
632 let tmp = *xi;
633 *xi <<= lshift;
634 *xi |= prev >> rshift;
635 prev = tmp;
636 }
637
638 // Always push the carry, even if it creates a non-normal result.
639 let carry = prev >> rshift;
640 if carry != 0 {
641 x.try_push(carry)?;
642 }
643
644 Some(())
645}
646
647/// Shift-left `n` limbs inside a buffer.
648#[inline]
649pub fn shl_limbs(x: &mut VecType, n: usize) -> Option<()> {
650 debug_assert!(n != 0);
651 if n + x.len() > x.capacity() {
652 None
653 } else if !x.is_empty() {
654 let len: usize = n + x.len();
655 // SAFE: since x is not empty, and `x.len() + n <= x.capacity()`.
656 unsafe {
657 // Move the elements.
658 let src: *const u64 = x.as_ptr();
659 let dst: *mut u64 = x.as_mut_ptr().add(count:n);
660 ptr::copy(src, dst, count:x.len());
661 // Write our 0s.
662 ptr::write_bytes(dst:x.as_mut_ptr(), val:0, count:n);
663 x.set_len(len);
664 }
665 Some(())
666 } else {
667 Some(())
668 }
669}
670
671/// Shift-left buffer by n bits.
672#[inline]
673pub fn shl(x: &mut VecType, n: usize) -> Option<()> {
674 let rem: usize = n % LIMB_BITS;
675 let div: usize = n / LIMB_BITS;
676 if rem != 0 {
677 shl_bits(x, n:rem)?;
678 }
679 if div != 0 {
680 shl_limbs(x, n:div)?;
681 }
682 Some(())
683}
684
685/// Get number of leading zero bits in the storage.
686#[inline]
687pub fn leading_zeros(x: &[Limb]) -> u32 {
688 let length: usize = x.len();
689 // wrapping_sub is fine, since it'll just return None.
690 if let Some(&value: u64) = x.get(index:length.wrapping_sub(1)) {
691 value.leading_zeros()
692 } else {
693 0
694 }
695}
696
697/// Calculate the bit-length of the big-integer.
698#[inline]
699pub fn bit_length(x: &[Limb]) -> u32 {
700 let nlz: u32 = leading_zeros(x);
701 LIMB_BITS as u32 * x.len() as u32 - nlz
702}
703
704// LIMB
705// ----
706
707// Type for a single limb of the big integer.
708//
709// A limb is analogous to a digit in base10, except, it stores 32-bit
710// or 64-bit numbers instead. We want types where 64-bit multiplication
711// is well-supported by the architecture, rather than emulated in 3
712// instructions. The quickest way to check this support is using a
713// cross-compiler for numerous architectures, along with the following
714// source file and command:
715//
716// Compile with `gcc main.c -c -S -O3 -masm=intel`
717//
718// And the source code is:
719// ```text
720// #include <stdint.h>
721//
722// struct i128 {
723// uint64_t hi;
724// uint64_t lo;
725// };
726//
727// // Type your code here, or load an example.
728// struct i128 square(uint64_t x, uint64_t y) {
729// __int128 prod = (__int128)x * (__int128)y;
730// struct i128 z;
731// z.hi = (uint64_t)(prod >> 64);
732// z.lo = (uint64_t)prod;
733// return z;
734// }
735// ```
736//
737// If the result contains `call __multi3`, then the multiplication
738// is emulated by the compiler. Otherwise, it's natively supported.
739//
740// This should be all-known 64-bit platforms supported by Rust.
741// https://forge.rust-lang.org/platform-support.html
742//
743// # Supported
744//
745// Platforms where native 128-bit multiplication is explicitly supported:
746// - x86_64 (Supported via `MUL`).
747// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
748// - s390x (Supported via `MLGR`).
749//
750// # Efficient
751//
752// Platforms where native 64-bit multiplication is supported and
753// you can extract hi-lo for 64-bit multiplications.
754// - aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
755// - powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
756// - riscv64 (Requires `MUL` and `MULH` to capture high and low bits).
757//
758// # Unsupported
759//
760// Platforms where native 128-bit multiplication is not supported,
761// requiring software emulation.
762// sparc64 (`UMUL` only supports double-word arguments).
763// sparcv9 (Same as sparc64).
764//
765// These tests are run via `xcross`, my own library for C cross-compiling,
766// which supports numerous targets (far in excess of Rust's tier 1 support,
767// or rust-embedded/cross's list). xcross may be found here:
768// https://github.com/Alexhuszagh/xcross
769//
770// To compile for the given target, run:
771// `xcross gcc main.c -c -S -O3 --target $target`
772//
773// All 32-bit architectures inherently do not have support. That means
774// we can essentially look for 64-bit architectures that are not SPARC.
775
776#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
777pub type Limb = u64;
778#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
779pub type Wide = u128;
780#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
781pub const LIMB_BITS: usize = 64;
782
783#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
784pub type Limb = u32;
785#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
786pub type Wide = u64;
787#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
788pub const LIMB_BITS: usize = 32;
789