| 1 | // Copyright 2017-2025 Brian Smith. |
| 2 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any |
| 4 | // purpose with or without fee is hereby granted, provided that the above |
| 5 | // copyright notice and this permission notice appear in all copies. |
| 6 | // |
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
| 10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 14 | |
| 15 | pub use super::n0::N0; |
| 16 | use super::{inout::AliasingSlices3, LimbSliceError, MIN_LIMBS}; |
| 17 | use crate::cpu; |
| 18 | use cfg_if::cfg_if; |
| 19 | |
| 20 | // Indicates that the element is not encoded; there is no *R* factor |
| 21 | // that needs to be canceled out. |
| 22 | #[derive (Copy, Clone)] |
| 23 | pub enum Unencoded {} |
| 24 | |
| 25 | // Indicates that the element is encoded; the value has one *R* |
| 26 | // factor that needs to be canceled out. |
| 27 | #[derive (Copy, Clone)] |
| 28 | pub enum R {} |
| 29 | |
| 30 | // Indicates the element is encoded three times; the value has three |
| 31 | // *R* factors that need to be canceled out. |
| 32 | #[allow (clippy::upper_case_acronyms)] |
| 33 | #[derive (Copy, Clone)] |
| 34 | pub enum RRR {} |
| 35 | |
| 36 | // Indicates the element is encoded twice; the value has two *R* |
| 37 | // factors that need to be canceled out. |
| 38 | #[derive (Copy, Clone)] |
| 39 | pub enum RR {} |
| 40 | |
| 41 | // Indicates the element is inversely encoded; the value has one |
| 42 | // 1/*R* factor that needs to be canceled out. |
| 43 | #[derive (Copy, Clone)] |
| 44 | pub enum RInverse {} |
| 45 | |
| 46 | pub trait Encoding {} |
| 47 | |
| 48 | impl Encoding for RRR {} |
| 49 | impl Encoding for RR {} |
| 50 | impl Encoding for R {} |
| 51 | impl Encoding for Unencoded {} |
| 52 | impl Encoding for RInverse {} |
| 53 | |
| 54 | /// The encoding of the result of a reduction. |
| 55 | pub trait ReductionEncoding { |
| 56 | type Output: Encoding; |
| 57 | } |
| 58 | |
| 59 | impl ReductionEncoding for RRR { |
| 60 | type Output = RR; |
| 61 | } |
| 62 | |
| 63 | impl ReductionEncoding for RR { |
| 64 | type Output = R; |
| 65 | } |
| 66 | impl ReductionEncoding for R { |
| 67 | type Output = Unencoded; |
| 68 | } |
| 69 | impl ReductionEncoding for Unencoded { |
| 70 | type Output = RInverse; |
| 71 | } |
| 72 | |
| 73 | /// The encoding of the result of a multiplication. |
| 74 | pub trait ProductEncoding { |
| 75 | type Output: Encoding; |
| 76 | } |
| 77 | |
| 78 | impl<E: ReductionEncoding> ProductEncoding for (Unencoded, E) { |
| 79 | type Output = E::Output; |
| 80 | } |
| 81 | |
| 82 | impl<E: Encoding> ProductEncoding for (R, E) { |
| 83 | type Output = E; |
| 84 | } |
| 85 | |
| 86 | impl ProductEncoding for (RR, RR) { |
| 87 | type Output = RRR; |
| 88 | } |
| 89 | |
| 90 | impl<E: ReductionEncoding> ProductEncoding for (RInverse, E) |
| 91 | where |
| 92 | E::Output: ReductionEncoding, |
| 93 | { |
| 94 | type Output = <<E as ReductionEncoding>::Output as ReductionEncoding>::Output; |
| 95 | } |
| 96 | |
| 97 | // XXX: Rust doesn't allow overlapping impls, |
| 98 | // TODO (if/when Rust allows it): |
| 99 | // impl<E1, E2: ReductionEncoding> ProductEncoding for |
| 100 | // (E1, E2) { |
| 101 | // type Output = <(E2, E1) as ProductEncoding>::Output; |
| 102 | // } |
| 103 | impl ProductEncoding for (RR, Unencoded) { |
| 104 | type Output = <(Unencoded, RR) as ProductEncoding>::Output; |
| 105 | } |
| 106 | impl ProductEncoding for (RR, RInverse) { |
| 107 | type Output = <(RInverse, RR) as ProductEncoding>::Output; |
| 108 | } |
| 109 | |
| 110 | impl ProductEncoding for (RRR, RInverse) { |
| 111 | type Output = <(RInverse, RRR) as ProductEncoding>::Output; |
| 112 | } |
| 113 | |
| 114 | #[allow (unused_imports)] |
| 115 | use crate::{bssl, c, limb::Limb}; |
| 116 | |
| 117 | #[inline (always)] |
| 118 | pub(super) fn limbs_mul_mont( |
| 119 | in_out: impl AliasingSlices3<Limb>, |
| 120 | n: &[Limb], |
| 121 | n0: &N0, |
| 122 | cpu: cpu::Features, |
| 123 | ) -> Result<(), LimbSliceError> { |
| 124 | const MOD_FALLBACK: usize = 1; // No restriction. |
| 125 | cfg_if! { |
| 126 | if #[cfg(all(target_arch = "aarch64" , target_endian = "little" ))] { |
| 127 | let _: cpu::Features = cpu; |
| 128 | const MIN_4X: usize = 4; |
| 129 | const MOD_4X: usize = 4; |
| 130 | if n.len() >= MIN_4X && n.len() % MOD_4X == 0 { |
| 131 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
| 132 | (MIN_4X, MOD_4X, ()) => bn_mul4x_mont |
| 133 | }) |
| 134 | } else { |
| 135 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
| 136 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
| 137 | }) |
| 138 | } |
| 139 | } else if #[cfg(all(target_arch = "arm" , target_endian = "little" ))] { |
| 140 | const MIN_8X: usize = 8; |
| 141 | const MOD_8X: usize = 8; |
| 142 | if n.len() >= MIN_8X && n.len() % MOD_8X == 0 { |
| 143 | use crate::cpu::{GetFeature as _, arm::Neon}; |
| 144 | if let Some(cpu) = cpu.get_feature() { |
| 145 | return bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
| 146 | (MIN_8X, MOD_8X, Neon) => bn_mul8x_mont_neon |
| 147 | }); |
| 148 | } |
| 149 | } |
| 150 | // The ARM version of `bn_mul_mont_nohw` has a minimum of 2. |
| 151 | const _MIN_LIMBS_AT_LEAST_2: () = assert!(MIN_LIMBS >= 2); |
| 152 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
| 153 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
| 154 | }) |
| 155 | } else if #[cfg(target_arch = "x86" )] { |
| 156 | use crate::{cpu::GetFeature as _, cpu::intel::Sse2}; |
| 157 | // The X86 implementation of `bn_mul_mont` has a minimum of 4. |
| 158 | const _MIN_LIMBS_AT_LEAST_4: () = assert!(MIN_LIMBS >= 4); |
| 159 | if let Some(cpu) = cpu.get_feature() { |
| 160 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
| 161 | (MIN_LIMBS, MOD_FALLBACK, Sse2) => bn_mul_mont |
| 162 | }) |
| 163 | } else { |
| 164 | // This isn't really an FFI call; it's defined below. |
| 165 | unsafe { |
| 166 | super::ffi::bn_mul_mont_ffi::<(), {MIN_LIMBS}, 1>(in_out, n, n0, (), |
| 167 | bn_mul_mont_fallback) |
| 168 | } |
| 169 | } |
| 170 | } else if #[cfg(target_arch = "x86_64" )] { |
| 171 | use crate::{cpu::GetFeature as _, polyfill::slice}; |
| 172 | use super::x86_64_mont; |
| 173 | if n.len() >= x86_64_mont::MIN_4X { |
| 174 | if let (n, []) = slice::as_chunks(n) { |
| 175 | return x86_64_mont::mul_mont5_4x(in_out, n, n0, cpu.get_feature()); |
| 176 | } |
| 177 | } |
| 178 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
| 179 | (MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw |
| 180 | }) |
| 181 | } else { |
| 182 | // Use the fallback implementation implemented below through the |
| 183 | // FFI wrapper defined below, so that Rust and C code both go |
| 184 | // through `bn_mul_mont`. |
| 185 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
| 186 | (MIN_LIMBS, MOD_FALLBACK, cpu::Features) => bn_mul_mont |
| 187 | }) |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | cfg_if! { |
| 193 | if #[cfg(not(any( |
| 194 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 195 | all(target_arch = "arm" , target_endian = "little" ), |
| 196 | target_arch = "x86_64" )))] { |
| 197 | |
| 198 | // TODO: Stop calling this from C and un-export it. |
| 199 | #[cfg(not(target_arch = "x86" ))] |
| 200 | prefixed_export! { |
| 201 | unsafe extern "C" fn bn_mul_mont( |
| 202 | r: *mut Limb, |
| 203 | a: *const Limb, |
| 204 | b: *const Limb, |
| 205 | n: *const Limb, |
| 206 | n0: &N0, |
| 207 | num_limbs: c::NonZero_size_t, |
| 208 | ) { |
| 209 | unsafe { bn_mul_mont_fallback(r, a, b, n, n0, num_limbs) } |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | #[cfg_attr(target_arch = "x86" , cold)] |
| 214 | #[cfg_attr(target_arch = "x86" , inline(never))] |
| 215 | unsafe extern "C" fn bn_mul_mont_fallback( |
| 216 | r: *mut Limb, |
| 217 | a: *const Limb, |
| 218 | b: *const Limb, |
| 219 | n: *const Limb, |
| 220 | n0: &N0, |
| 221 | num_limbs: c::NonZero_size_t, |
| 222 | ) { |
| 223 | use super::MAX_LIMBS; |
| 224 | |
| 225 | let num_limbs = num_limbs.get(); |
| 226 | |
| 227 | // The mutable pointer `r` may alias `a` and/or `b`, so the lifetimes of |
| 228 | // any slices for `a` or `b` must not overlap with the lifetime of any |
| 229 | // mutable for `r`. |
| 230 | |
| 231 | // Nothing aliases `n` |
| 232 | let n = unsafe { core::slice::from_raw_parts(n, num_limbs) }; |
| 233 | |
| 234 | let mut tmp = [0; 2 * MAX_LIMBS]; |
| 235 | let tmp = &mut tmp[..(2 * num_limbs)]; |
| 236 | { |
| 237 | let a: &[Limb] = unsafe { core::slice::from_raw_parts(a, num_limbs) }; |
| 238 | let b: &[Limb] = unsafe { core::slice::from_raw_parts(b, num_limbs) }; |
| 239 | limbs_mul(tmp, a, b); |
| 240 | } |
| 241 | let r: &mut [Limb] = unsafe { core::slice::from_raw_parts_mut(r, num_limbs) }; |
| 242 | limbs_from_mont_in_place(r, tmp, n, n0); |
| 243 | } |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | // `bigint` needs then when the `alloc` feature is enabled. `bn_mul_mont` above needs this when |
| 248 | // we are using the platforms for which we don't have `bn_mul_mont` in assembly. |
| 249 | #[cfg (any( |
| 250 | feature = "alloc" , |
| 251 | not(any( |
| 252 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 253 | all(target_arch = "arm" , target_endian = "little" ), |
| 254 | target_arch = "x86" , |
| 255 | target_arch = "x86_64" |
| 256 | )) |
| 257 | ))] |
| 258 | pub(super) fn limbs_from_mont_in_place(r: &mut [Limb], tmp: &mut [Limb], m: &[Limb], n0: &N0) { |
| 259 | prefixed_extern! { |
| 260 | fn bn_from_montgomery_in_place( |
| 261 | r: *mut Limb, |
| 262 | num_r: c::size_t, |
| 263 | a: *mut Limb, |
| 264 | num_a: c::size_t, |
| 265 | n: *const Limb, |
| 266 | num_n: c::size_t, |
| 267 | n0: &N0, |
| 268 | ) -> bssl::Result; |
| 269 | } |
| 270 | Result::from(unsafe { |
| 271 | bn_from_montgomery_in_place( |
| 272 | r.as_mut_ptr(), |
| 273 | r.len(), |
| 274 | tmp.as_mut_ptr(), |
| 275 | tmp.len(), |
| 276 | m.as_ptr(), |
| 277 | m.len(), |
| 278 | n0, |
| 279 | ) |
| 280 | }) |
| 281 | .unwrap() |
| 282 | } |
| 283 | |
| 284 | #[cfg (not(any( |
| 285 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 286 | all(target_arch = "arm" , target_endian = "little" ), |
| 287 | target_arch = "x86_64" |
| 288 | )))] |
| 289 | fn limbs_mul(r: &mut [Limb], a: &[Limb], b: &[Limb]) { |
| 290 | debug_assert_eq!(r.len(), 2 * a.len()); |
| 291 | debug_assert_eq!(a.len(), b.len()); |
| 292 | let ab_len = a.len(); |
| 293 | |
| 294 | r[..ab_len].fill(0); |
| 295 | for (i, &b_limb) in b.iter().enumerate() { |
| 296 | r[ab_len + i] = unsafe { |
| 297 | limbs_mul_add_limb(r[i..][..ab_len].as_mut_ptr(), a.as_ptr(), b_limb, ab_len) |
| 298 | }; |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | #[cfg (any( |
| 303 | test, |
| 304 | not(any( |
| 305 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 306 | all(target_arch = "arm" , target_endian = "little" ), |
| 307 | target_arch = "x86_64" , |
| 308 | )) |
| 309 | ))] |
| 310 | prefixed_extern! { |
| 311 | // `r` must not alias `a` |
| 312 | #[must_use] |
| 313 | fn limbs_mul_add_limb(r: *mut Limb, a: *const Limb, b: Limb, num_limbs: c::size_t) -> Limb; |
| 314 | } |
| 315 | |
| 316 | /// r = r**2 |
| 317 | pub(super) fn limbs_square_mont( |
| 318 | r: &mut [Limb], |
| 319 | n: &[Limb], |
| 320 | n0: &N0, |
| 321 | cpu: cpu::Features, |
| 322 | ) -> Result<(), LimbSliceError> { |
| 323 | #[cfg (all(target_arch = "aarch64" , target_endian = "little" ))] |
| 324 | { |
| 325 | use super::aarch64_mont; |
| 326 | use crate::polyfill::slice; |
| 327 | if let ((r, []), (n, [])) = (slice::as_chunks_mut(r), slice::as_chunks(n)) { |
| 328 | return aarch64_mont::sqr_mont5(r, n, n0); |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | #[cfg (target_arch = "x86_64" )] |
| 333 | { |
| 334 | use super::x86_64_mont; |
| 335 | use crate::{cpu::GetFeature as _, polyfill::slice}; |
| 336 | if let ((r: AsChunksMut<'_, u64, 8>, []), (n: AsChunks<'_, u64, 8>, [])) = (slice::as_chunks_mut(slice:r), slice::as_chunks(slice:n)) { |
| 337 | return x86_64_mont::sqr_mont5(in_out:r, n, n0, maybe_adx_bmi2:cpu.get_feature()); |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | limbs_mul_mont(in_out:r, n, n0, cpu) |
| 342 | } |
| 343 | |
| 344 | #[cfg (test)] |
| 345 | mod tests { |
| 346 | use super::super::MAX_LIMBS; |
| 347 | use super::*; |
| 348 | use crate::limb::Limb; |
| 349 | |
| 350 | #[test ] |
| 351 | // TODO: wasm |
| 352 | fn test_mul_add_words() { |
| 353 | const ZERO: Limb = 0; |
| 354 | const MAX: Limb = ZERO.wrapping_sub(1); |
| 355 | static TEST_CASES: &[(&[Limb], &[Limb], Limb, Limb, &[Limb])] = &[ |
| 356 | (&[0], &[0], 0, 0, &[0]), |
| 357 | (&[MAX], &[0], MAX, 0, &[MAX]), |
| 358 | (&[0], &[MAX], MAX, MAX - 1, &[1]), |
| 359 | (&[MAX], &[MAX], MAX, MAX, &[0]), |
| 360 | (&[0, 0], &[MAX, MAX], MAX, MAX - 1, &[1, MAX]), |
| 361 | (&[1, 0], &[MAX, MAX], MAX, MAX - 1, &[2, MAX]), |
| 362 | (&[MAX, 0], &[MAX, MAX], MAX, MAX, &[0, 0]), |
| 363 | (&[0, 1], &[MAX, MAX], MAX, MAX, &[1, 0]), |
| 364 | (&[MAX, MAX], &[MAX, MAX], MAX, MAX, &[0, MAX]), |
| 365 | ]; |
| 366 | |
| 367 | for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() { |
| 368 | let mut r = [0; MAX_LIMBS]; |
| 369 | let r = { |
| 370 | let r = &mut r[..r_input.len()]; |
| 371 | r.copy_from_slice(r_input); |
| 372 | r |
| 373 | }; |
| 374 | assert_eq!(r.len(), a.len()); // Sanity check |
| 375 | let actual_retval = |
| 376 | unsafe { limbs_mul_add_limb(r.as_mut_ptr(), a.as_ptr(), *w, a.len()) }; |
| 377 | assert_eq!(&r, expected_r, "{}: {:x?} != {:x?}" , i, r, expected_r); |
| 378 | assert_eq!( |
| 379 | actual_retval, *expected_retval, |
| 380 | "{}: {:x?} != {:x?}" , |
| 381 | i, actual_retval, *expected_retval |
| 382 | ); |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | |