1// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3//! Building-blocks for arbitrary-precision math.
4//!
5//! These algorithms assume little-endian order for the large integer
6//! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb,
7//! and `0` is the least significant limb.
8
9use super::large_powers;
10use super::num::*;
11use super::small_powers::*;
12use alloc::vec::Vec;
13use core::{cmp, iter, mem};
14
15// ALIASES
16// -------
17
18// Type for a single limb of the big integer.
19//
20// A limb is analogous to a digit in base10, except, it stores 32-bit
21// or 64-bit numbers instead.
22//
23// This should be all-known 64-bit platforms supported by Rust.
24// https://forge.rust-lang.org/platform-support.html
25//
26// Platforms where native 128-bit multiplication is explicitly supported:
27// - x86_64 (Supported via `MUL`).
28// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
29//
30// Platforms where native 64-bit multiplication is supported and
31// you can extract hi-lo for 64-bit multiplications.
32// aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
33// powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
34//
35// Platforms where native 128-bit multiplication is not supported,
36// requiring software emulation.
37// sparc64 (`UMUL` only supported double-word arguments).
38
39// 32-BIT LIMB
40#[cfg(limb_width_32)]
41pub type Limb = u32;
42
43#[cfg(limb_width_32)]
44pub const POW5_LIMB: &[Limb] = &POW5_32;
45
46#[cfg(limb_width_32)]
47pub const POW10_LIMB: &[Limb] = &POW10_32;
48
49#[cfg(limb_width_32)]
50type Wide = u64;
51
52// 64-BIT LIMB
53#[cfg(limb_width_64)]
54pub type Limb = u64;
55
56#[cfg(limb_width_64)]
57pub const POW5_LIMB: &[Limb] = &POW5_64;
58
59#[cfg(limb_width_64)]
60pub const POW10_LIMB: &[Limb] = &POW10_64;
61
62#[cfg(limb_width_64)]
63type Wide = u128;
64
65/// Cast to limb type.
66#[inline]
67pub(crate) fn as_limb<T: Integer>(t: T) -> Limb {
68 Limb::as_cast(t)
69}
70
71/// Cast to wide type.
72#[inline]
73fn as_wide<T: Integer>(t: T) -> Wide {
74 Wide::as_cast(t)
75}
76
77// SPLIT
78// -----
79
80/// Split u64 into limbs, in little-endian order.
81#[inline]
82#[cfg(limb_width_32)]
83fn split_u64(x: u64) -> [Limb; 2] {
84 [as_limb(x), as_limb(x >> 32)]
85}
86
87/// Split u64 into limbs, in little-endian order.
88#[inline]
89#[cfg(limb_width_64)]
90fn split_u64(x: u64) -> [Limb; 1] {
91 [as_limb(x)]
92}
93
94// HI64
95// ----
96
97// NONZERO
98
99/// Check if any of the remaining bits are non-zero.
100#[inline]
101pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool {
102 let len = x.len();
103 let slc = &x[..len - rindex];
104 slc.iter().rev().any(|&x| x != T::ZERO)
105}
106
107/// Shift 64-bit integer to high 64-bits.
108#[inline]
109fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
110 debug_assert!(r0 != 0);
111 let ls = r0.leading_zeros();
112 (r0 << ls, false)
113}
114
115/// Shift 2 64-bit integers to high 64-bits.
116#[inline]
117fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
118 debug_assert!(r0 != 0);
119 let ls = r0.leading_zeros();
120 let rs = 64 - ls;
121 let v = match ls {
122 0 => r0,
123 _ => (r0 << ls) | (r1 >> rs),
124 };
125 let n = r1 << ls != 0;
126 (v, n)
127}
128
129/// Trait to export the high 64-bits from a little-endian slice.
130trait Hi64<T>: AsRef<[T]> {
131 /// Get the hi64 bits from a 1-limb slice.
132 fn hi64_1(&self) -> (u64, bool);
133
134 /// Get the hi64 bits from a 2-limb slice.
135 fn hi64_2(&self) -> (u64, bool);
136
137 /// Get the hi64 bits from a 3-limb slice.
138 fn hi64_3(&self) -> (u64, bool);
139
140 /// High-level exporter to extract the high 64 bits from a little-endian slice.
141 #[inline]
142 fn hi64(&self) -> (u64, bool) {
143 match self.as_ref().len() {
144 0 => (0, false),
145 1 => self.hi64_1(),
146 2 => self.hi64_2(),
147 _ => self.hi64_3(),
148 }
149 }
150}
151
152impl Hi64<u32> for [u32] {
153 #[inline]
154 fn hi64_1(&self) -> (u64, bool) {
155 debug_assert!(self.len() == 1);
156 let r0 = self[0] as u64;
157 u64_to_hi64_1(r0)
158 }
159
160 #[inline]
161 fn hi64_2(&self) -> (u64, bool) {
162 debug_assert!(self.len() == 2);
163 let r0 = (self[1] as u64) << 32;
164 let r1 = self[0] as u64;
165 u64_to_hi64_1(r0 | r1)
166 }
167
168 #[inline]
169 fn hi64_3(&self) -> (u64, bool) {
170 debug_assert!(self.len() >= 3);
171 let r0 = self[self.len() - 1] as u64;
172 let r1 = (self[self.len() - 2] as u64) << 32;
173 let r2 = self[self.len() - 3] as u64;
174 let (v, n) = u64_to_hi64_2(r0, r1 | r2);
175 (v, n || nonzero(self, 3))
176 }
177}
178
179impl Hi64<u64> for [u64] {
180 #[inline]
181 fn hi64_1(&self) -> (u64, bool) {
182 debug_assert!(self.len() == 1);
183 let r0 = self[0];
184 u64_to_hi64_1(r0)
185 }
186
187 #[inline]
188 fn hi64_2(&self) -> (u64, bool) {
189 debug_assert!(self.len() >= 2);
190 let r0 = self[self.len() - 1];
191 let r1 = self[self.len() - 2];
192 let (v, n) = u64_to_hi64_2(r0, r1);
193 (v, n || nonzero(self, 2))
194 }
195
196 #[inline]
197 fn hi64_3(&self) -> (u64, bool) {
198 self.hi64_2()
199 }
200}
201
202// SCALAR
203// ------
204
205// Scalar-to-scalar operations, for building-blocks for arbitrary-precision
206// operations.
207
208mod scalar {
209 use super::*;
210
211 // ADDITION
212
213 /// Add two small integers and return the resulting value and if overflow happens.
214 #[inline]
215 pub fn add(x: Limb, y: Limb) -> (Limb, bool) {
216 x.overflowing_add(y)
217 }
218
219 /// AddAssign two small integers and return if overflow happens.
220 #[inline]
221 pub fn iadd(x: &mut Limb, y: Limb) -> bool {
222 let t = add(*x, y);
223 *x = t.0;
224 t.1
225 }
226
227 // SUBTRACTION
228
229 /// Subtract two small integers and return the resulting value and if overflow happens.
230 #[inline]
231 pub fn sub(x: Limb, y: Limb) -> (Limb, bool) {
232 x.overflowing_sub(y)
233 }
234
235 /// SubAssign two small integers and return if overflow happens.
236 #[inline]
237 pub fn isub(x: &mut Limb, y: Limb) -> bool {
238 let t = sub(*x, y);
239 *x = t.0;
240 t.1
241 }
242
243 // MULTIPLICATION
244
245 /// Multiply two small integers (with carry) (and return the overflow contribution).
246 ///
247 /// Returns the (low, high) components.
248 #[inline]
249 pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
250 // Cannot overflow, as long as wide is 2x as wide. This is because
251 // the following is always true:
252 // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()`
253 let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry);
254 let bits = mem::size_of::<Limb>() * 8;
255 (as_limb(z), as_limb(z >> bits))
256 }
257
258 /// Multiply two small integers (with carry) (and return if overflow happens).
259 #[inline]
260 pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb {
261 let t = mul(*x, y, carry);
262 *x = t.0;
263 t.1
264 }
265} // scalar
266
267// SMALL
268// -----
269
270// Large-to-small operations, to modify a big integer from a native scalar.
271
272mod small {
273 use super::*;
274
275 // MULTIPLICATIION
276
277 /// ADDITION
278
279 /// Implied AddAssign implementation for adding a small integer to bigint.
280 ///
281 /// Allows us to choose a start-index in x to store, to allow incrementing
282 /// from a non-zero start.
283 #[inline]
284 pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
285 if x.len() <= xstart {
286 x.push(y);
287 } else {
288 // Initial add
289 let mut carry = scalar::iadd(&mut x[xstart], y);
290
291 // Increment until overflow stops occurring.
292 let mut size = xstart + 1;
293 while carry && size < x.len() {
294 carry = scalar::iadd(&mut x[size], 1);
295 size += 1;
296 }
297
298 // If we overflowed the buffer entirely, need to add 1 to the end
299 // of the buffer.
300 if carry {
301 x.push(1);
302 }
303 }
304 }
305
306 /// AddAssign small integer to bigint.
307 #[inline]
308 pub fn iadd(x: &mut Vec<Limb>, y: Limb) {
309 iadd_impl(x, y, 0);
310 }
311
312 // SUBTRACTION
313
314 /// SubAssign small integer to bigint.
315 /// Does not do overflowing subtraction.
316 #[inline]
317 pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
318 debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1));
319
320 // Initial subtraction
321 let mut carry = scalar::isub(&mut x[xstart], y);
322
323 // Increment until overflow stops occurring.
324 let mut size = xstart + 1;
325 while carry && size < x.len() {
326 carry = scalar::isub(&mut x[size], 1);
327 size += 1;
328 }
329 normalize(x);
330 }
331
332 // MULTIPLICATION
333
334 /// MulAssign small integer to bigint.
335 #[inline]
336 pub fn imul(x: &mut Vec<Limb>, y: Limb) {
337 // Multiply iteratively over all elements, adding the carry each time.
338 let mut carry: Limb = 0;
339 for xi in &mut *x {
340 carry = scalar::imul(xi, y, carry);
341 }
342
343 // Overflow of value, add to end.
344 if carry != 0 {
345 x.push(carry);
346 }
347 }
348
349 /// Mul small integer to bigint.
350 #[inline]
351 pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> {
352 let mut z = Vec::<Limb>::default();
353 z.extend_from_slice(x);
354 imul(&mut z, y);
355 z
356 }
357
358 /// MulAssign by a power.
359 ///
360 /// Theoretically...
361 ///
362 /// Use an exponentiation by squaring method, since it reduces the time
363 /// complexity of the multiplication to ~`O(log(n))` for the squaring,
364 /// and `O(n*m)` for the result. Since `m` is typically a lower-order
365 /// factor, this significantly reduces the number of multiplications
366 /// we need to do. Iteratively multiplying by small powers follows
367 /// the nth triangular number series, which scales as `O(p^2)`, but
368 /// where `p` is `n+m`. In short, it scales very poorly.
369 ///
370 /// Practically....
371 ///
372 /// Exponentiation by Squaring:
373 /// running 2 tests
374 /// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78)
375 /// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007)
376 ///
377 /// Exponentiation by Iterative Small Powers:
378 /// running 2 tests
379 /// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31)
380 /// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47)
381 ///
382 /// Exponentiation by Iterative Large Powers (of 2):
383 /// running 2 tests
384 /// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31)
385 /// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47)
386 ///
387 /// Even using worst-case scenarios, exponentiation by squaring is
388 /// significantly slower for our workloads. Just multiply by small powers,
389 /// in simple cases, and use precalculated large powers in other cases.
390 pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) {
391 use super::large::KARATSUBA_CUTOFF;
392
393 let small_powers = POW5_LIMB;
394 let large_powers = large_powers::POW5;
395
396 if n == 0 {
397 // No exponent, just return.
398 // The 0-index of the large powers is `2^0`, which is 1, so we want
399 // to make sure we don't take that path with a literal 0.
400 return;
401 }
402
403 // We want to use the asymptotically faster algorithm if we're going
404 // to be using Karabatsu multiplication sometime during the result,
405 // otherwise, just use exponentiation by squaring.
406 let bit_length = 32 - n.leading_zeros() as usize;
407 debug_assert!(bit_length != 0 && bit_length <= large_powers.len());
408 if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF {
409 // We can use iterative small powers to make this faster for the
410 // easy cases.
411
412 // Multiply by the largest small power until n < step.
413 let step = small_powers.len() - 1;
414 let power = small_powers[step];
415 let mut n = n as usize;
416 while n >= step {
417 imul(x, power);
418 n -= step;
419 }
420
421 // Multiply by the remainder.
422 imul(x, small_powers[n]);
423 } else {
424 // In theory, this code should be asymptotically a lot faster,
425 // in practice, our small::imul seems to be the limiting step,
426 // and large imul is slow as well.
427
428 // Multiply by higher order powers.
429 let mut idx: usize = 0;
430 let mut bit: usize = 1;
431 let mut n = n as usize;
432 while n != 0 {
433 if n & bit != 0 {
434 debug_assert!(idx < large_powers.len());
435 large::imul(x, large_powers[idx]);
436 n ^= bit;
437 }
438 idx += 1;
439 bit <<= 1;
440 }
441 }
442 }
443
444 // BIT LENGTH
445
446 /// Get number of leading zero bits in the storage.
447 #[inline]
448 pub fn leading_zeros(x: &[Limb]) -> usize {
449 x.last().map_or(0, |x| x.leading_zeros() as usize)
450 }
451
452 /// Calculate the bit-length of the big-integer.
453 #[inline]
454 pub fn bit_length(x: &[Limb]) -> usize {
455 let bits = mem::size_of::<Limb>() * 8;
456 // Avoid overflowing, calculate via total number of bits
457 // minus leading zero bits.
458 let nlz = leading_zeros(x);
459 bits.checked_mul(x.len())
460 .map_or_else(usize::max_value, |v| v - nlz)
461 }
462
463 // SHL
464
465 /// Shift-left bits inside a buffer.
466 ///
467 /// Assumes `n < Limb::BITS`, IE, internally shifting bits.
468 #[inline]
469 pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) {
470 // Need to shift by the number of `bits % Limb::BITS)`.
471 let bits = mem::size_of::<Limb>() * 8;
472 debug_assert!(n < bits);
473 if n == 0 {
474 return;
475 }
476
477 // Internally, for each item, we shift left by n, and add the previous
478 // right shifted limb-bits.
479 // For example, we transform (for u8) shifted left 2, to:
480 // b10100100 b01000010
481 // b10 b10010001 b00001000
482 let rshift = bits - n;
483 let lshift = n;
484 let mut prev: Limb = 0;
485 for xi in &mut *x {
486 let tmp = *xi;
487 *xi <<= lshift;
488 *xi |= prev >> rshift;
489 prev = tmp;
490 }
491
492 // Always push the carry, even if it creates a non-normal result.
493 let carry = prev >> rshift;
494 if carry != 0 {
495 x.push(carry);
496 }
497 }
498
499 /// Shift-left `n` digits inside a buffer.
500 ///
501 /// Assumes `n` is not 0.
502 #[inline]
503 pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) {
504 debug_assert!(n != 0);
505 if !x.is_empty() {
506 x.reserve(n);
507 x.splice(..0, iter::repeat(0).take(n));
508 }
509 }
510
511 /// Shift-left buffer by n bits.
512 #[inline]
513 pub fn ishl(x: &mut Vec<Limb>, n: usize) {
514 let bits = mem::size_of::<Limb>() * 8;
515 // Need to pad with zeros for the number of `bits / Limb::BITS`,
516 // and shift-left with carry for `bits % Limb::BITS`.
517 let rem = n % bits;
518 let div = n / bits;
519 ishl_bits(x, rem);
520 if div != 0 {
521 ishl_limbs(x, div);
522 }
523 }
524
525 // NORMALIZE
526
527 /// Normalize the container by popping any leading zeros.
528 #[inline]
529 pub fn normalize(x: &mut Vec<Limb>) {
530 // Remove leading zero if we cause underflow. Since we're dividing
531 // by a small power, we have at max 1 int removed.
532 while x.last() == Some(&0) {
533 x.pop();
534 }
535 }
536} // small
537
538// LARGE
539// -----
540
541// Large-to-large operations, to modify a big integer from a native scalar.
542
543mod large {
544 use super::*;
545
546 // RELATIVE OPERATORS
547
548 /// Compare `x` to `y`, in little-endian order.
549 #[inline]
550 pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
551 if x.len() > y.len() {
552 cmp::Ordering::Greater
553 } else if x.len() < y.len() {
554 cmp::Ordering::Less
555 } else {
556 let iter = x.iter().rev().zip(y.iter().rev());
557 for (&xi, &yi) in iter {
558 if xi > yi {
559 return cmp::Ordering::Greater;
560 } else if xi < yi {
561 return cmp::Ordering::Less;
562 }
563 }
564 // Equal case.
565 cmp::Ordering::Equal
566 }
567 }
568
569 /// Check if x is less than y.
570 #[inline]
571 pub fn less(x: &[Limb], y: &[Limb]) -> bool {
572 compare(x, y) == cmp::Ordering::Less
573 }
574
575 /// Check if x is greater than or equal to y.
576 #[inline]
577 pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool {
578 !less(x, y)
579 }
580
581 // ADDITION
582
583 /// Implied AddAssign implementation for bigints.
584 ///
585 /// Allows us to choose a start-index in x to store, so we can avoid
586 /// padding the buffer with zeros when not needed, optimized for vectors.
587 pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) {
588 // The effective x buffer is from `xstart..x.len()`, so we need to treat
589 // that as the current range. If the effective y buffer is longer, need
590 // to resize to that, + the start index.
591 if y.len() > x.len() - xstart {
592 x.resize(y.len() + xstart, 0);
593 }
594
595 // Iteratively add elements from y to x.
596 let mut carry = false;
597 for (xi, yi) in x[xstart..].iter_mut().zip(y.iter()) {
598 // Only one op of the two can overflow, since we added at max
599 // Limb::max_value() + Limb::max_value(). Add the previous carry,
600 // and store the current carry for the next.
601 let mut tmp = scalar::iadd(xi, *yi);
602 if carry {
603 tmp |= scalar::iadd(xi, 1);
604 }
605 carry = tmp;
606 }
607
608 // Overflow from the previous bit.
609 if carry {
610 small::iadd_impl(x, 1, y.len() + xstart);
611 }
612 }
613
614 /// AddAssign bigint to bigint.
615 #[inline]
616 pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) {
617 iadd_impl(x, y, 0);
618 }
619
620 /// Add bigint to bigint.
621 #[inline]
622 pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
623 let mut z = Vec::<Limb>::default();
624 z.extend_from_slice(x);
625 iadd(&mut z, y);
626 z
627 }
628
629 // SUBTRACTION
630
631 /// SubAssign bigint to bigint.
632 pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) {
633 // Basic underflow checks.
634 debug_assert!(greater_equal(x, y));
635
636 // Iteratively add elements from y to x.
637 let mut carry = false;
638 for (xi, yi) in x.iter_mut().zip(y.iter()) {
639 // Only one op of the two can overflow, since we added at max
640 // Limb::max_value() + Limb::max_value(). Add the previous carry,
641 // and store the current carry for the next.
642 let mut tmp = scalar::isub(xi, *yi);
643 if carry {
644 tmp |= scalar::isub(xi, 1);
645 }
646 carry = tmp;
647 }
648
649 if carry {
650 small::isub_impl(x, 1, y.len());
651 } else {
652 small::normalize(x);
653 }
654 }
655
656 // MULTIPLICATION
657
658 /// Number of digits to bottom-out to asymptotically slow algorithms.
659 ///
660 /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits,
661 /// so we go halfway, while Newton division tends to out-perform
662 /// Algorithm D at ~1024 bits. We can toggle this for optimal performance.
663 pub const KARATSUBA_CUTOFF: usize = 32;
664
665 /// Grade-school multiplication algorithm.
666 ///
667 /// Slow, naive algorithm, using limb-bit bases and just shifting left for
668 /// each iteration. This could be optimized with numerous other algorithms,
669 /// but it's extremely simple, and works in O(n*m) time, which is fine
670 /// by me. Each iteration, of which there are `m` iterations, requires
671 /// `n` multiplications, and `n` additions, or grade-school multiplication.
672 fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
673 // Using the immutable value, multiply by all the scalars in y, using
674 // the algorithm defined above. Use a single buffer to avoid
675 // frequent reallocations. Handle the first case to avoid a redundant
676 // addition, since we know y.len() >= 1.
677 let mut z: Vec<Limb> = small::mul(x, y[0]);
678 z.resize(x.len() + y.len(), 0);
679
680 // Handle the iterative cases.
681 for (i, &yi) in y[1..].iter().enumerate() {
682 let zi: Vec<Limb> = small::mul(x, yi);
683 iadd_impl(&mut z, &zi, i + 1);
684 }
685
686 small::normalize(&mut z);
687
688 z
689 }
690
691 /// Split two buffers into halfway, into (lo, hi).
692 #[inline]
693 pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) {
694 (&z[..m], &z[m..])
695 }
696
697 /// Karatsuba multiplication algorithm with roughly equal input sizes.
698 ///
699 /// Assumes `y.len() >= x.len()`.
700 fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
701 if y.len() <= KARATSUBA_CUTOFF {
702 // Bottom-out to long division for small cases.
703 long_mul(x, y)
704 } else if x.len() < y.len() / 2 {
705 karatsuba_uneven_mul(x, y)
706 } else {
707 // Do our 3 multiplications.
708 let m = y.len() / 2;
709 let (xl, xh) = karatsuba_split(x, m);
710 let (yl, yh) = karatsuba_split(y, m);
711 let sumx = add(xl, xh);
712 let sumy = add(yl, yh);
713 let z0 = karatsuba_mul(xl, yl);
714 let mut z1 = karatsuba_mul(&sumx, &sumy);
715 let z2 = karatsuba_mul(xh, yh);
716 // Properly scale z1, which is `z1 - z2 - zo`.
717 isub(&mut z1, &z2);
718 isub(&mut z1, &z0);
719
720 // Create our result, which is equal to, in little-endian order:
721 // [z0, z1 - z2 - z0, z2]
722 // z1 must be shifted m digits (2^(32m)) over.
723 // z2 must be shifted 2*m digits (2^(64m)) over.
724 let len = z0.len().max(m + z1.len()).max(2 * m + z2.len());
725 let mut result = z0;
726 result.reserve_exact(len - result.len());
727 iadd_impl(&mut result, &z1, m);
728 iadd_impl(&mut result, &z2, 2 * m);
729
730 result
731 }
732 }
733
734 /// Karatsuba multiplication algorithm where y is substantially larger than x.
735 ///
736 /// Assumes `y.len() >= x.len()`.
737 fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> {
738 let mut result = Vec::<Limb>::default();
739 result.resize(x.len() + y.len(), 0);
740
741 // This effectively is like grade-school multiplication between
742 // two numbers, except we're using splits on `y`, and the intermediate
743 // step is a Karatsuba multiplication.
744 let mut start = 0;
745 while !y.is_empty() {
746 let m = x.len().min(y.len());
747 let (yl, yh) = karatsuba_split(y, m);
748 let prod = karatsuba_mul(x, yl);
749 iadd_impl(&mut result, &prod, start);
750 y = yh;
751 start += m;
752 }
753 small::normalize(&mut result);
754
755 result
756 }
757
758 /// Forwarder to the proper Karatsuba algorithm.
759 #[inline]
760 fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
761 if x.len() < y.len() {
762 karatsuba_mul(x, y)
763 } else {
764 karatsuba_mul(y, x)
765 }
766 }
767
768 /// MulAssign bigint to bigint.
769 #[inline]
770 pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) {
771 if y.len() == 1 {
772 small::imul(x, y[0]);
773 } else {
774 // We're not really in a condition where using Karatsuba
775 // multiplication makes sense, so we're just going to use long
776 // division. ~20% speedup compared to:
777 // *x = karatsuba_mul_fwd(x, y);
778 *x = karatsuba_mul_fwd(x, y);
779 }
780 }
781} // large
782
783// TRAITS
784// ------
785
786/// Traits for shared operations for big integers.
787///
788/// None of these are implemented using normal traits, since these
789/// are very expensive operations, and we want to deliberately
790/// and explicitly use these functions.
791pub(crate) trait Math: Clone + Sized + Default {
792 // DATA
793
794 /// Get access to the underlying data
795 fn data(&self) -> &Vec<Limb>;
796
797 /// Get access to the underlying data
798 fn data_mut(&mut self) -> &mut Vec<Limb>;
799
800 // RELATIVE OPERATIONS
801
802 /// Compare self to y.
803 #[inline]
804 fn compare(&self, y: &Self) -> cmp::Ordering {
805 large::compare(self.data(), y.data())
806 }
807
808 // PROPERTIES
809
810 /// Get the high 64-bits from the bigint and if there are remaining bits.
811 #[inline]
812 fn hi64(&self) -> (u64, bool) {
813 self.data().as_slice().hi64()
814 }
815
816 /// Calculate the bit-length of the big-integer.
817 /// Returns usize::max_value() if the value overflows,
818 /// IE, if `self.data().len() > usize::max_value() / 8`.
819 #[inline]
820 fn bit_length(&self) -> usize {
821 small::bit_length(self.data())
822 }
823
824 // INTEGER CONVERSIONS
825
826 /// Create new big integer from u64.
827 #[inline]
828 fn from_u64(x: u64) -> Self {
829 let mut v = Self::default();
830 let slc = split_u64(x);
831 v.data_mut().extend_from_slice(&slc);
832 v.normalize();
833 v
834 }
835
836 // NORMALIZE
837
838 /// Normalize the integer, so any leading zero values are removed.
839 #[inline]
840 fn normalize(&mut self) {
841 small::normalize(self.data_mut());
842 }
843
844 // ADDITION
845
846 /// AddAssign small integer.
847 #[inline]
848 fn iadd_small(&mut self, y: Limb) {
849 small::iadd(self.data_mut(), y);
850 }
851
852 // MULTIPLICATION
853
854 /// MulAssign small integer.
855 #[inline]
856 fn imul_small(&mut self, y: Limb) {
857 small::imul(self.data_mut(), y);
858 }
859
860 /// Multiply by a power of 2.
861 #[inline]
862 fn imul_pow2(&mut self, n: u32) {
863 self.ishl(n as usize);
864 }
865
866 /// Multiply by a power of 5.
867 #[inline]
868 fn imul_pow5(&mut self, n: u32) {
869 small::imul_pow5(self.data_mut(), n);
870 }
871
872 /// MulAssign by a power of 10.
873 #[inline]
874 fn imul_pow10(&mut self, n: u32) {
875 self.imul_pow5(n);
876 self.imul_pow2(n);
877 }
878
879 // SHIFTS
880
881 /// Shift-left the entire buffer n bits.
882 #[inline]
883 fn ishl(&mut self, n: usize) {
884 small::ishl(self.data_mut(), n);
885 }
886}
887