1 | // Copyright 2017-2025 Brian Smith. |
2 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any |
4 | // purpose with or without fee is hereby granted, provided that the above |
5 | // copyright notice and this permission notice appear in all copies. |
6 | // |
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
14 | |
15 | pub use super::n0::N0; |
16 | use super::{inout::AliasingSlices3, LimbSliceError, MIN_LIMBS}; |
17 | use crate::cpu; |
18 | use cfg_if::cfg_if; |
19 | |
20 | // Indicates that the element is not encoded; there is no *R* factor |
21 | // that needs to be canceled out. |
22 | #[derive (Copy, Clone)] |
23 | pub enum Unencoded {} |
24 | |
25 | // Indicates that the element is encoded; the value has one *R* |
26 | // factor that needs to be canceled out. |
27 | #[derive (Copy, Clone)] |
28 | pub enum R {} |
29 | |
30 | // Indicates the element is encoded three times; the value has three |
31 | // *R* factors that need to be canceled out. |
32 | #[allow (clippy::upper_case_acronyms)] |
33 | #[derive (Copy, Clone)] |
34 | pub enum RRR {} |
35 | |
36 | // Indicates the element is encoded twice; the value has two *R* |
37 | // factors that need to be canceled out. |
38 | #[derive (Copy, Clone)] |
39 | pub enum RR {} |
40 | |
41 | // Indicates the element is inversely encoded; the value has one |
42 | // 1/*R* factor that needs to be canceled out. |
43 | #[derive (Copy, Clone)] |
44 | pub enum RInverse {} |
45 | |
46 | pub trait Encoding {} |
47 | |
48 | impl Encoding for RRR {} |
49 | impl Encoding for RR {} |
50 | impl Encoding for R {} |
51 | impl Encoding for Unencoded {} |
52 | impl Encoding for RInverse {} |
53 | |
54 | /// The encoding of the result of a reduction. |
55 | pub trait ReductionEncoding { |
56 | type Output: Encoding; |
57 | } |
58 | |
59 | impl ReductionEncoding for RRR { |
60 | type Output = RR; |
61 | } |
62 | |
63 | impl ReductionEncoding for RR { |
64 | type Output = R; |
65 | } |
66 | impl ReductionEncoding for R { |
67 | type Output = Unencoded; |
68 | } |
69 | impl ReductionEncoding for Unencoded { |
70 | type Output = RInverse; |
71 | } |
72 | |
73 | /// The encoding of the result of a multiplication. |
74 | pub trait ProductEncoding { |
75 | type Output: Encoding; |
76 | } |
77 | |
78 | impl<E: ReductionEncoding> ProductEncoding for (Unencoded, E) { |
79 | type Output = E::Output; |
80 | } |
81 | |
82 | impl<E: Encoding> ProductEncoding for (R, E) { |
83 | type Output = E; |
84 | } |
85 | |
86 | impl ProductEncoding for (RR, RR) { |
87 | type Output = RRR; |
88 | } |
89 | |
90 | impl<E: ReductionEncoding> ProductEncoding for (RInverse, E) |
91 | where |
92 | E::Output: ReductionEncoding, |
93 | { |
94 | type Output = <<E as ReductionEncoding>::Output as ReductionEncoding>::Output; |
95 | } |
96 | |
97 | // XXX: Rust doesn't allow overlapping impls, |
98 | // TODO (if/when Rust allows it): |
99 | // impl<E1, E2: ReductionEncoding> ProductEncoding for |
100 | // (E1, E2) { |
101 | // type Output = <(E2, E1) as ProductEncoding>::Output; |
102 | // } |
103 | impl ProductEncoding for (RR, Unencoded) { |
104 | type Output = <(Unencoded, RR) as ProductEncoding>::Output; |
105 | } |
106 | impl ProductEncoding for (RR, RInverse) { |
107 | type Output = <(RInverse, RR) as ProductEncoding>::Output; |
108 | } |
109 | |
110 | impl ProductEncoding for (RRR, RInverse) { |
111 | type Output = <(RInverse, RRR) as ProductEncoding>::Output; |
112 | } |
113 | |
114 | #[allow (unused_imports)] |
115 | use crate::{bssl, c, limb::Limb}; |
116 | |
117 | #[inline (always)] |
118 | pub(super) fn limbs_mul_mont( |
119 | in_out: impl AliasingSlices3<Limb>, |
120 | n: &[Limb], |
121 | n0: &N0, |
122 | cpu: cpu::Features, |
123 | ) -> Result<(), LimbSliceError> { |
124 | const MOD_FALLBACK: usize = 1; // No restriction. |
125 | cfg_if! { |
126 | if #[cfg(all(target_arch = "aarch64" , target_endian = "little" ))] { |
127 | let _: cpu::Features = cpu; |
128 | const MIN_4X: usize = 4; |
129 | const MOD_4X: usize = 4; |
130 | if n.len() >= MIN_4X && n.len() % MOD_4X == 0 { |
131 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
132 | (MIN_4X, MOD_4X, ()) => bn_mul4x_mont |
133 | }) |
134 | } else { |
135 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
136 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
137 | }) |
138 | } |
139 | } else if #[cfg(all(target_arch = "arm" , target_endian = "little" ))] { |
140 | const MIN_8X: usize = 8; |
141 | const MOD_8X: usize = 8; |
142 | if n.len() >= MIN_8X && n.len() % MOD_8X == 0 { |
143 | use crate::cpu::{GetFeature as _, arm::Neon}; |
144 | if let Some(cpu) = cpu.get_feature() { |
145 | return bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
146 | (MIN_8X, MOD_8X, Neon) => bn_mul8x_mont_neon |
147 | }); |
148 | } |
149 | } |
150 | // The ARM version of `bn_mul_mont_nohw` has a minimum of 2. |
151 | const _MIN_LIMBS_AT_LEAST_2: () = assert!(MIN_LIMBS >= 2); |
152 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
153 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
154 | }) |
155 | } else if #[cfg(target_arch = "x86" )] { |
156 | use crate::{cpu::GetFeature as _, cpu::intel::Sse2}; |
157 | // The X86 implementation of `bn_mul_mont` has a minimum of 4. |
158 | const _MIN_LIMBS_AT_LEAST_4: () = assert!(MIN_LIMBS >= 4); |
159 | if let Some(cpu) = cpu.get_feature() { |
160 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
161 | (MIN_LIMBS, MOD_FALLBACK, Sse2) => bn_mul_mont |
162 | }) |
163 | } else { |
164 | // This isn't really an FFI call; it's defined below. |
165 | unsafe { |
166 | super::ffi::bn_mul_mont_ffi::<(), {MIN_LIMBS}, 1>(in_out, n, n0, (), |
167 | bn_mul_mont_fallback) |
168 | } |
169 | } |
170 | } else if #[cfg(target_arch = "x86_64" )] { |
171 | use crate::{cpu::GetFeature as _, polyfill::slice}; |
172 | use super::x86_64_mont; |
173 | if n.len() >= x86_64_mont::MIN_4X { |
174 | if let (n, []) = slice::as_chunks(n) { |
175 | return x86_64_mont::mul_mont5_4x(in_out, n, n0, cpu.get_feature()); |
176 | } |
177 | } |
178 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
179 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
180 | }) |
181 | } else { |
182 | // Use the fallback implementation implemented below through the |
183 | // FFI wrapper defined below, so that Rust and C code both go |
184 | // through `bn_mul_mont`. |
185 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
186 | (MIN_LIMBS, MOD_FALLBACK, cpu::Features) => bn_mul_mont |
187 | }) |
188 | } |
189 | } |
190 | } |
191 | |
192 | cfg_if! { |
193 | if #[cfg(not(any( |
194 | all(target_arch = "aarch64" , target_endian = "little" ), |
195 | all(target_arch = "arm" , target_endian = "little" ), |
196 | target_arch = "x86_64" )))] { |
197 | |
198 | // TODO: Stop calling this from C and un-export it. |
199 | #[cfg(not(target_arch = "x86" ))] |
200 | prefixed_export! { |
201 | unsafe extern "C" fn bn_mul_mont( |
202 | r: *mut Limb, |
203 | a: *const Limb, |
204 | b: *const Limb, |
205 | n: *const Limb, |
206 | n0: &N0, |
207 | num_limbs: c::NonZero_size_t, |
208 | ) { |
209 | unsafe { bn_mul_mont_fallback(r, a, b, n, n0, num_limbs) } |
210 | } |
211 | } |
212 | |
213 | #[cfg_attr(target_arch = "x86" , cold)] |
214 | #[cfg_attr(target_arch = "x86" , inline(never))] |
215 | unsafe extern "C" fn bn_mul_mont_fallback( |
216 | r: *mut Limb, |
217 | a: *const Limb, |
218 | b: *const Limb, |
219 | n: *const Limb, |
220 | n0: &N0, |
221 | num_limbs: c::NonZero_size_t, |
222 | ) { |
223 | use super::MAX_LIMBS; |
224 | |
225 | let num_limbs = num_limbs.get(); |
226 | |
227 | // The mutable pointer `r` may alias `a` and/or `b`, so the lifetimes of |
228 | // any slices for `a` or `b` must not overlap with the lifetime of any |
229 | // mutable for `r`. |
230 | |
231 | // Nothing aliases `n` |
232 | let n = unsafe { core::slice::from_raw_parts(n, num_limbs) }; |
233 | |
234 | let mut tmp = [0; 2 * MAX_LIMBS]; |
235 | let tmp = &mut tmp[..(2 * num_limbs)]; |
236 | { |
237 | let a: &[Limb] = unsafe { core::slice::from_raw_parts(a, num_limbs) }; |
238 | let b: &[Limb] = unsafe { core::slice::from_raw_parts(b, num_limbs) }; |
239 | limbs_mul(tmp, a, b); |
240 | } |
241 | let r: &mut [Limb] = unsafe { core::slice::from_raw_parts_mut(r, num_limbs) }; |
242 | limbs_from_mont_in_place(r, tmp, n, n0); |
243 | } |
244 | } |
245 | } |
246 | |
247 | // `bigint` needs then when the `alloc` feature is enabled. `bn_mul_mont` above needs this when |
248 | // we are using the platforms for which we don't have `bn_mul_mont` in assembly. |
249 | #[cfg (any( |
250 | feature = "alloc" , |
251 | not(any( |
252 | all(target_arch = "aarch64" , target_endian = "little" ), |
253 | all(target_arch = "arm" , target_endian = "little" ), |
254 | target_arch = "x86" , |
255 | target_arch = "x86_64" |
256 | )) |
257 | ))] |
258 | pub(super) fn limbs_from_mont_in_place(r: &mut [Limb], tmp: &mut [Limb], m: &[Limb], n0: &N0) { |
259 | prefixed_extern! { |
260 | fn bn_from_montgomery_in_place( |
261 | r: *mut Limb, |
262 | num_r: c::size_t, |
263 | a: *mut Limb, |
264 | num_a: c::size_t, |
265 | n: *const Limb, |
266 | num_n: c::size_t, |
267 | n0: &N0, |
268 | ) -> bssl::Result; |
269 | } |
270 | Result::from(unsafe { |
271 | bn_from_montgomery_in_place( |
272 | r.as_mut_ptr(), |
273 | r.len(), |
274 | tmp.as_mut_ptr(), |
275 | tmp.len(), |
276 | m.as_ptr(), |
277 | m.len(), |
278 | n0, |
279 | ) |
280 | }) |
281 | .unwrap() |
282 | } |
283 | |
284 | #[cfg (not(any( |
285 | all(target_arch = "aarch64" , target_endian = "little" ), |
286 | all(target_arch = "arm" , target_endian = "little" ), |
287 | target_arch = "x86_64" |
288 | )))] |
289 | fn limbs_mul(r: &mut [Limb], a: &[Limb], b: &[Limb]) { |
290 | debug_assert_eq!(r.len(), 2 * a.len()); |
291 | debug_assert_eq!(a.len(), b.len()); |
292 | let ab_len = a.len(); |
293 | |
294 | r[..ab_len].fill(0); |
295 | for (i, &b_limb) in b.iter().enumerate() { |
296 | r[ab_len + i] = unsafe { |
297 | limbs_mul_add_limb(r[i..][..ab_len].as_mut_ptr(), a.as_ptr(), b_limb, ab_len) |
298 | }; |
299 | } |
300 | } |
301 | |
302 | #[cfg (any( |
303 | test, |
304 | not(any( |
305 | all(target_arch = "aarch64" , target_endian = "little" ), |
306 | all(target_arch = "arm" , target_endian = "little" ), |
307 | target_arch = "x86_64" , |
308 | )) |
309 | ))] |
310 | prefixed_extern! { |
311 | // `r` must not alias `a` |
312 | #[must_use] |
313 | fn limbs_mul_add_limb(r: *mut Limb, a: *const Limb, b: Limb, num_limbs: c::size_t) -> Limb; |
314 | } |
315 | |
316 | /// r = r**2 |
317 | pub(super) fn limbs_square_mont( |
318 | r: &mut [Limb], |
319 | n: &[Limb], |
320 | n0: &N0, |
321 | cpu: cpu::Features, |
322 | ) -> Result<(), LimbSliceError> { |
323 | #[cfg (all(target_arch = "aarch64" , target_endian = "little" ))] |
324 | { |
325 | use super::aarch64_mont; |
326 | use crate::polyfill::slice; |
327 | if let ((r, []), (n, [])) = (slice::as_chunks_mut(r), slice::as_chunks(n)) { |
328 | return aarch64_mont::sqr_mont5(r, n, n0); |
329 | } |
330 | } |
331 | |
332 | #[cfg (target_arch = "x86_64" )] |
333 | { |
334 | use super::x86_64_mont; |
335 | use crate::{cpu::GetFeature as _, polyfill::slice}; |
336 | if let ((r: AsChunksMut<'_, u64, 8>, []), (n: AsChunks<'_, u64, 8>, [])) = (slice::as_chunks_mut(slice:r), slice::as_chunks(slice:n)) { |
337 | return x86_64_mont::sqr_mont5(in_out:r, n, n0, maybe_adx_bmi2:cpu.get_feature()); |
338 | } |
339 | } |
340 | |
341 | limbs_mul_mont(in_out:r, n, n0, cpu) |
342 | } |
343 | |
344 | #[cfg (test)] |
345 | mod tests { |
346 | use super::super::MAX_LIMBS; |
347 | use super::*; |
348 | use crate::limb::Limb; |
349 | |
350 | #[test ] |
351 | // TODO: wasm |
352 | fn test_mul_add_words() { |
353 | const ZERO: Limb = 0; |
354 | const MAX: Limb = ZERO.wrapping_sub(1); |
355 | static TEST_CASES: &[(&[Limb], &[Limb], Limb, Limb, &[Limb])] = &[ |
356 | (&[0], &[0], 0, 0, &[0]), |
357 | (&[MAX], &[0], MAX, 0, &[MAX]), |
358 | (&[0], &[MAX], MAX, MAX - 1, &[1]), |
359 | (&[MAX], &[MAX], MAX, MAX, &[0]), |
360 | (&[0, 0], &[MAX, MAX], MAX, MAX - 1, &[1, MAX]), |
361 | (&[1, 0], &[MAX, MAX], MAX, MAX - 1, &[2, MAX]), |
362 | (&[MAX, 0], &[MAX, MAX], MAX, MAX, &[0, 0]), |
363 | (&[0, 1], &[MAX, MAX], MAX, MAX, &[1, 0]), |
364 | (&[MAX, MAX], &[MAX, MAX], MAX, MAX, &[0, MAX]), |
365 | ]; |
366 | |
367 | for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() { |
368 | let mut r = [0; MAX_LIMBS]; |
369 | let r = { |
370 | let r = &mut r[..r_input.len()]; |
371 | r.copy_from_slice(r_input); |
372 | r |
373 | }; |
374 | assert_eq!(r.len(), a.len()); // Sanity check |
375 | let actual_retval = |
376 | unsafe { limbs_mul_add_limb(r.as_mut_ptr(), a.as_ptr(), *w, a.len()) }; |
377 | assert_eq!(&r, expected_r, "{}: {:x?} != {:x?}" , i, r, expected_r); |
378 | assert_eq!( |
379 | actual_retval, *expected_retval, |
380 | "{}: {:x?} != {:x?}" , |
381 | i, actual_retval, *expected_retval |
382 | ); |
383 | } |
384 | } |
385 | } |
386 | |