1 | use alloc::vec::Vec; |
2 | use core::mem; |
3 | use core::ops::Shl; |
4 | use num_traits::One; |
5 | |
6 | use crate::big_digit::{self, BigDigit, DoubleBigDigit}; |
7 | use crate::biguint::BigUint; |
8 | |
9 | struct MontyReducer { |
10 | n0inv: BigDigit, |
11 | } |
12 | |
13 | // k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson |
14 | // Iteration for Multiplicative Inverses Modulo Prime Powers". |
15 | fn inv_mod_alt(b: BigDigit) -> BigDigit { |
16 | assert_ne!(b & 1, 0); |
17 | |
18 | let mut k0: u64 = BigDigit::wrapping_sub(self:2, rhs:b); |
19 | let mut t: u64 = b - 1; |
20 | let mut i: u8 = 1; |
21 | while i < big_digit::BITS { |
22 | t = t.wrapping_mul(t); |
23 | k0 = k0.wrapping_mul(t + 1); |
24 | |
25 | i <<= 1; |
26 | } |
27 | debug_assert_eq!(k0.wrapping_mul(b), 1); |
28 | k0.wrapping_neg() |
29 | } |
30 | |
31 | impl MontyReducer { |
32 | fn new(n: &BigUint) -> Self { |
33 | let n0inv: u64 = inv_mod_alt(n.data[0]); |
34 | MontyReducer { n0inv } |
35 | } |
36 | } |
37 | |
38 | /// Computes z mod m = x * y * 2 ** (-n*_W) mod m |
39 | /// assuming k = -1/m mod 2**_W |
40 | /// See Gueron, "Efficient Software Implementations of Modular Exponentiation". |
41 | /// <https://eprint.iacr.org/2011/239.pdf> |
42 | /// In the terminology of that paper, this is an "Almost Montgomery Multiplication": |
43 | /// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result |
44 | /// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. |
45 | #[allow (clippy::many_single_char_names)] |
46 | fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint { |
47 | // This code assumes x, y, m are all the same length, n. |
48 | // (required by addMulVVW and the for loop). |
49 | // It also assumes that x, y are already reduced mod m, |
50 | // or else the result will not be properly reduced. |
51 | assert!( |
52 | x.data.len() == n && y.data.len() == n && m.data.len() == n, |
53 | " {:?} {:?} {:?} {}" , |
54 | x, |
55 | y, |
56 | m, |
57 | n |
58 | ); |
59 | |
60 | let mut z = BigUint::ZERO; |
61 | z.data.resize(n * 2, 0); |
62 | |
63 | let mut c: BigDigit = 0; |
64 | for i in 0..n { |
65 | let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]); |
66 | let t = z.data[i].wrapping_mul(k); |
67 | let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t); |
68 | let cx = c.wrapping_add(c2); |
69 | let cy = cx.wrapping_add(c3); |
70 | z.data[n + i] = cy; |
71 | if cx < c2 || cy < c3 { |
72 | c = 1; |
73 | } else { |
74 | c = 0; |
75 | } |
76 | } |
77 | |
78 | if c == 0 { |
79 | z.data = z.data[n..].to_vec(); |
80 | } else { |
81 | { |
82 | let (first, second) = z.data.split_at_mut(n); |
83 | sub_vv(first, second, &m.data); |
84 | } |
85 | z.data = z.data[..n].to_vec(); |
86 | } |
87 | |
88 | z |
89 | } |
90 | |
91 | #[inline (always)] |
92 | fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit { |
93 | let mut c: u64 = 0; |
94 | for (zi: &mut u64, xi: &u64) in z.iter_mut().zip(x.iter()) { |
95 | let (z1: u64, z0: u64) = mul_add_www(*xi, y, *zi); |
96 | let (c_: u64, zi_: u64) = add_ww(x:z0, y:c, c:0); |
97 | *zi = zi_; |
98 | c = c_ + z1; |
99 | } |
100 | |
101 | c |
102 | } |
103 | |
104 | /// The resulting carry c is either 0 or 1. |
105 | #[inline (always)] |
106 | fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit { |
107 | let mut c: u64 = 0; |
108 | for (i: usize, (xi: &u64, yi: &u64)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { |
109 | let zi: u64 = xi.wrapping_sub(*yi).wrapping_sub(c); |
110 | z[i] = zi; |
111 | // see "Hacker's Delight", section 2-12 (overflow detection) |
112 | c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1) |
113 | } |
114 | |
115 | c |
116 | } |
117 | |
118 | /// z1<<_W + z0 = x+y+c, with c == 0 or 1 |
119 | #[inline (always)] |
120 | fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
121 | let yc: u64 = y.wrapping_add(c); |
122 | let z0: u64 = x.wrapping_add(yc); |
123 | let z1: u64 = if z0 < x || yc < y { 1 } else { 0 }; |
124 | |
125 | (z1, z0) |
126 | } |
127 | |
128 | /// z1 << _W + z0 = x * y + c |
129 | #[inline (always)] |
130 | fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
131 | let z: u128 = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit; |
132 | ((z >> big_digit::BITS) as BigDigit, z as BigDigit) |
133 | } |
134 | |
135 | /// Calculates x ** y mod m using a fixed, 4-bit window. |
136 | #[allow (clippy::many_single_char_names)] |
137 | pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint { |
138 | assert!(m.data[0] & 1 == 1); |
139 | let mr = MontyReducer::new(m); |
140 | let num_words = m.data.len(); |
141 | |
142 | let mut x = x.clone(); |
143 | |
144 | // We want the lengths of x and m to be equal. |
145 | // It is OK if x >= m as long as len(x) == len(m). |
146 | if x.data.len() > num_words { |
147 | x %= m; |
148 | // Note: now len(x) <= numWords, not guaranteed ==. |
149 | } |
150 | if x.data.len() < num_words { |
151 | x.data.resize(num_words, 0); |
152 | } |
153 | |
154 | // rr = 2**(2*_W*len(m)) mod m |
155 | let mut rr = BigUint::one(); |
156 | rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m; |
157 | if rr.data.len() < num_words { |
158 | rr.data.resize(num_words, 0); |
159 | } |
160 | // one = 1, with equal length to that of m |
161 | let mut one = BigUint::one(); |
162 | one.data.resize(num_words, 0); |
163 | |
164 | let n = 4; |
165 | // powers[i] contains x^i |
166 | let mut powers = Vec::with_capacity(1 << n); |
167 | powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words)); |
168 | powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words)); |
169 | for i in 2..1 << n { |
170 | let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words); |
171 | powers.push(r); |
172 | } |
173 | |
174 | // initialize z = 1 (Montgomery 1) |
175 | let mut z = powers[0].clone(); |
176 | z.data.resize(num_words, 0); |
177 | let mut zz = BigUint::ZERO; |
178 | zz.data.resize(num_words, 0); |
179 | |
180 | // same windowed exponent, but with Montgomery multiplications |
181 | for i in (0..y.data.len()).rev() { |
182 | let mut yi = y.data[i]; |
183 | let mut j = 0; |
184 | while j < big_digit::BITS { |
185 | if i != y.data.len() - 1 || j != 0 { |
186 | zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
187 | z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
188 | zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
189 | z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
190 | } |
191 | zz = montgomery( |
192 | &z, |
193 | &powers[(yi >> (big_digit::BITS - n)) as usize], |
194 | m, |
195 | mr.n0inv, |
196 | num_words, |
197 | ); |
198 | mem::swap(&mut z, &mut zz); |
199 | yi <<= n; |
200 | j += n; |
201 | } |
202 | } |
203 | |
204 | // convert to regular number |
205 | zz = montgomery(&z, &one, m, mr.n0inv, num_words); |
206 | |
207 | zz.normalize(); |
208 | // One last reduction, just in case. |
209 | // See golang.org/issue/13907. |
210 | if zz >= *m { |
211 | // Common case is m has high bit set; in that case, |
212 | // since zz is the same length as m, there can be just |
213 | // one multiple of m to remove. Just subtract. |
214 | // We think that the subtract should be sufficient in general, |
215 | // so do that unconditionally, but double-check, |
216 | // in case our beliefs are wrong. |
217 | // The div is not expected to be reached. |
218 | zz -= m; |
219 | if zz >= *m { |
220 | zz %= m; |
221 | } |
222 | } |
223 | |
224 | zz.normalize(); |
225 | zz |
226 | } |
227 | |