| 1 | // Copyright 2015-2025 Brian Smith. |
| 2 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any |
| 4 | // purpose with or without fee is hereby granted, provided that the above |
| 5 | // copyright notice and this permission notice appear in all copies. |
| 6 | // |
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
| 10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 14 | |
| 15 | #![cfg (target_arch = "x86_64" )] |
| 16 | |
| 17 | use super::{ |
| 18 | inout::{AliasingSlices2, AliasingSlices3}, |
| 19 | n0::N0, |
| 20 | LimbSliceError, MAX_LIMBS, |
| 21 | }; |
| 22 | use crate::{ |
| 23 | c, |
| 24 | cpu::intel::{Adx, Bmi1, Bmi2}, |
| 25 | error::LenMismatchError, |
| 26 | limb::{LeakyWindow, Limb, Window}, |
| 27 | polyfill::slice::{AsChunks, AsChunksMut}, |
| 28 | }; |
| 29 | use core::num::NonZeroUsize; |
| 30 | |
| 31 | const _512_IS_LIMB_BITS_TIMES_8: () = assert!(8 * Limb::BITS == 512); |
| 32 | |
| 33 | #[inline ] |
| 34 | pub(super) fn mul_mont5( |
| 35 | mut r: AsChunksMut<Limb, 8>, |
| 36 | a: AsChunks<Limb, 8>, |
| 37 | b: AsChunks<Limb, 8>, |
| 38 | m: AsChunks<Limb, 8>, |
| 39 | n0: &N0, |
| 40 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
| 41 | ) -> Result<(), LimbSliceError> { |
| 42 | mul_mont5_4x( |
| 43 | (r.as_flattened_mut(), a.as_flattened(), b.as_flattened()), |
| 44 | n:m.into(), |
| 45 | n0, |
| 46 | maybe_adx_bmi2, |
| 47 | ) |
| 48 | } |
| 49 | |
| 50 | pub const MIN_4X: usize = 8; |
| 51 | |
| 52 | #[inline ] |
| 53 | pub(super) fn mul_mont5_4x( |
| 54 | in_out: impl AliasingSlices3<Limb>, |
| 55 | n: AsChunks<Limb, 4>, |
| 56 | n0: &N0, |
| 57 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
| 58 | ) -> Result<(), LimbSliceError> { |
| 59 | const MOD_4X: usize = 4; |
| 60 | let n: &[u64] = n.as_flattened(); |
| 61 | if let Some(cpu: (Adx, Bmi2)) = maybe_adx_bmi2 { |
| 62 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
| 63 | (MIN_4X, MOD_4X, (Adx, Bmi2)) => bn_mulx4x_mont |
| 64 | }) |
| 65 | } else { |
| 66 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
| 67 | (MIN_4X, MOD_4X, ()) => bn_mul4x_mont |
| 68 | }) |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | #[inline ] |
| 73 | pub(super) fn sqr_mont5( |
| 74 | mut in_out: AsChunksMut<Limb, 8>, |
| 75 | n: AsChunks<Limb, 8>, |
| 76 | n0: &N0, |
| 77 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
| 78 | ) -> Result<(), LimbSliceError> { |
| 79 | prefixed_extern! { |
| 80 | // `r` and/or 'a' may alias. |
| 81 | // XXX: BoringSSL declares this to return `int`. |
| 82 | // `num` must be a non-zero multiple of 8. |
| 83 | fn bn_sqr8x_mont( |
| 84 | rp: *mut Limb, |
| 85 | ap: *const Limb, |
| 86 | mulx_adx_capable: Limb, |
| 87 | np: *const Limb, |
| 88 | n0: &N0, |
| 89 | num: c::NonZero_size_t); |
| 90 | } |
| 91 | |
| 92 | let in_out = in_out.as_flattened_mut(); |
| 93 | let n = n.as_flattened(); |
| 94 | let num_limbs = NonZeroUsize::new(n.len()).ok_or_else(|| LimbSliceError::too_short(n.len()))?; |
| 95 | |
| 96 | // Avoid stack overflow from the alloca inside. |
| 97 | if num_limbs.get() > MAX_LIMBS { |
| 98 | return Err(LimbSliceError::too_long(num_limbs.get())); |
| 99 | } |
| 100 | |
| 101 | // `Limb::from(mulx_adx.is_some())`, but intentionally branchy. |
| 102 | let mulx_adx_capable = match maybe_adx_bmi2 { |
| 103 | Some(_) => Limb::from(true), |
| 104 | None => Limb::from(false), |
| 105 | }; |
| 106 | |
| 107 | in_out |
| 108 | .with_non_dangling_non_null_pointers_ra(num_limbs, |r, a| { |
| 109 | let n = n.as_ptr(); // Non-dangling because num_limbs > 0. |
| 110 | unsafe { bn_sqr8x_mont(r, a, mulx_adx_capable, n, n0, num_limbs) }; |
| 111 | }) |
| 112 | .map_err(LimbSliceError::len_mismatch) |
| 113 | } |
| 114 | |
| 115 | #[inline (always)] |
| 116 | pub(super) fn scatter5( |
| 117 | a: AsChunks<Limb, 8>, |
| 118 | mut table: AsChunksMut<Limb, 8>, |
| 119 | power: LeakyWindow, |
| 120 | ) -> Result<(), LimbSliceError> { |
| 121 | prefixed_extern! { |
| 122 | // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see |
| 123 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 124 | fn bn_scatter5( |
| 125 | inp: *const Limb, |
| 126 | num: c::NonZero_size_t, |
| 127 | table: *mut Limb, |
| 128 | power: LeakyWindow, |
| 129 | ); |
| 130 | } |
| 131 | let num_limbs: NonZero = check_common(a, table.as_ref())?; |
| 132 | let a: &[u64] = a.as_flattened(); |
| 133 | let table: &mut [u64] = table.as_flattened_mut(); |
| 134 | assert!(power < 32); |
| 135 | unsafe { bn_scatter5(inp:a.as_ptr(), num_limbs, table.as_mut_ptr(), power) }; |
| 136 | Ok(()) |
| 137 | } |
| 138 | |
| 139 | // SAFETY: `power` must be less than 32. |
| 140 | #[inline (always)] |
| 141 | pub(super) unsafe fn gather5( |
| 142 | mut r: AsChunksMut<Limb, 8>, |
| 143 | table: AsChunks<Limb, 8>, |
| 144 | power: Window, |
| 145 | ) -> Result<(), LimbSliceError> { |
| 146 | prefixed_extern! { |
| 147 | // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see |
| 148 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 149 | pub(super) fn bn_gather5( |
| 150 | out: *mut Limb, |
| 151 | num: c::NonZero_size_t, |
| 152 | table: *const Limb, |
| 153 | power: Window); |
| 154 | } |
| 155 | let num_limbs: NonZero = check_common(a:r.as_ref(), table)?; |
| 156 | let r: &mut [u64] = r.as_flattened_mut(); |
| 157 | let table: &[u64] = table.as_flattened(); |
| 158 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
| 159 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
| 160 | unsafe { bn_gather5(out:r.as_mut_ptr(), num_limbs, table.as_ptr(), power) }; |
| 161 | Ok(()) |
| 162 | } |
| 163 | |
| 164 | // SAFETY: `power` must be less than 32. |
| 165 | #[inline (always)] |
| 166 | pub(super) unsafe fn mul_mont_gather5_amm( |
| 167 | mut r: AsChunksMut<Limb, 8>, |
| 168 | a: AsChunks<Limb, 8>, |
| 169 | table: AsChunks<Limb, 8>, |
| 170 | n: AsChunks<Limb, 8>, |
| 171 | n0: &N0, |
| 172 | power: Window, |
| 173 | maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>, |
| 174 | ) -> Result<(), LimbSliceError> { |
| 175 | prefixed_extern! { |
| 176 | // Upstream has `num: c::int` and `power: c::int`; see |
| 177 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 178 | pub(super) fn bn_mul4x_mont_gather5( |
| 179 | rp: *mut Limb, |
| 180 | ap: *const Limb, |
| 181 | table: *const Limb, |
| 182 | np: *const Limb, |
| 183 | n0: &N0, |
| 184 | num: c::NonZero_size_t, |
| 185 | power: Window, |
| 186 | ); |
| 187 | // Upstream has `num: c::int` and `power: c::int`; see |
| 188 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 189 | pub(super) fn bn_mulx4x_mont_gather5( |
| 190 | rp: *mut Limb, |
| 191 | ap: *const Limb, |
| 192 | table: *const Limb, |
| 193 | np: *const Limb, |
| 194 | n0: &N0, |
| 195 | num: c::NonZero_size_t, |
| 196 | power: Window, |
| 197 | ); |
| 198 | } |
| 199 | let num_limbs = check_common_with_n(r.as_ref(), table, n)?; |
| 200 | let a = a.as_flattened(); |
| 201 | if a.len() != num_limbs.get() { |
| 202 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new(a.len()))); |
| 203 | } |
| 204 | let r = r.as_flattened_mut(); |
| 205 | let r = r.as_mut_ptr(); |
| 206 | let a = a.as_ptr(); |
| 207 | let table = table.as_flattened(); |
| 208 | let table = table.as_ptr(); |
| 209 | let n = n.as_flattened(); |
| 210 | let n = n.as_ptr(); |
| 211 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
| 212 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
| 213 | if maybe_adx_bmi1_bmi2.is_some() { |
| 214 | unsafe { bn_mulx4x_mont_gather5(r, a, table, n, n0, num_limbs, power) } |
| 215 | } else { |
| 216 | unsafe { bn_mul4x_mont_gather5(r, a, table, n, n0, num_limbs, power) } |
| 217 | }; |
| 218 | Ok(()) |
| 219 | } |
| 220 | |
| 221 | // SAFETY: `power` must be less than 32. |
| 222 | #[inline (always)] |
| 223 | pub(super) unsafe fn power5_amm( |
| 224 | mut in_out: AsChunksMut<Limb, 8>, |
| 225 | table: AsChunks<Limb, 8>, |
| 226 | n: AsChunks<Limb, 8>, |
| 227 | n0: &N0, |
| 228 | power: Window, |
| 229 | maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>, |
| 230 | ) -> Result<(), LimbSliceError> { |
| 231 | prefixed_extern! { |
| 232 | // Upstream has `num: c::int` and `power: c::int`; see |
| 233 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 234 | fn bn_power5_nohw( |
| 235 | rp: *mut Limb, |
| 236 | ap: *const Limb, |
| 237 | table: *const Limb, |
| 238 | np: *const Limb, |
| 239 | n0: &N0, |
| 240 | num: c::NonZero_size_t, |
| 241 | power: Window, |
| 242 | ); |
| 243 | // Upstream has `num: c::int` and `power: c::int`; see |
| 244 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
| 245 | fn bn_powerx5( |
| 246 | rp: *mut Limb, |
| 247 | ap: *const Limb, |
| 248 | table: *const Limb, |
| 249 | np: *const Limb, |
| 250 | n0: &N0, |
| 251 | num: c::NonZero_size_t, |
| 252 | power: Window, |
| 253 | ); |
| 254 | } |
| 255 | let num_limbs = check_common_with_n(in_out.as_ref(), table, n)?; |
| 256 | let in_out = in_out.as_flattened_mut(); |
| 257 | let r = in_out.as_mut_ptr(); |
| 258 | let a = in_out.as_ptr(); |
| 259 | let table = table.as_flattened(); |
| 260 | let table = table.as_ptr(); |
| 261 | let n = n.as_flattened(); |
| 262 | let n = n.as_ptr(); |
| 263 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
| 264 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
| 265 | if maybe_adx_bmi1_bmi2.is_some() { |
| 266 | unsafe { bn_powerx5(r, a, table, n, n0, num_limbs, power) } |
| 267 | } else { |
| 268 | unsafe { bn_power5_nohw(r, a, table, n, n0, num_limbs, power) } |
| 269 | }; |
| 270 | Ok(()) |
| 271 | } |
| 272 | |
| 273 | // Helps the compiler will be able to hoist all of these checks out of the |
| 274 | // loops in the caller. Try to help the compiler by doing the checks |
| 275 | // consistently in each function and also by inlining this function and all the |
| 276 | // callers. |
| 277 | #[inline (always)] |
| 278 | fn check_common( |
| 279 | a: AsChunks<Limb, 8>, |
| 280 | table: AsChunks<Limb, 8>, |
| 281 | ) -> Result<NonZeroUsize, LimbSliceError> { |
| 282 | assert_eq!((table.as_ptr() as usize) % 16, 0); // According to BoringSSL. |
| 283 | let a: &[u64] = a.as_flattened(); |
| 284 | let table: &[u64] = table.as_flattened(); |
| 285 | let num_limbs: NonZero = NonZeroUsize::new(a.len()).ok_or_else(|| LimbSliceError::too_short(a.len()))?; |
| 286 | if num_limbs.get() > MAX_LIMBS { |
| 287 | return Err(LimbSliceError::too_long(a.len())); |
| 288 | } |
| 289 | if num_limbs.get() * 32 != table.len() { |
| 290 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new( |
| 291 | table.len(), |
| 292 | ))); |
| 293 | }; |
| 294 | Ok(num_limbs) |
| 295 | } |
| 296 | |
| 297 | #[inline (always)] |
| 298 | fn check_common_with_n( |
| 299 | a: AsChunks<Limb, 8>, |
| 300 | table: AsChunks<Limb, 8>, |
| 301 | n: AsChunks<Limb, 8>, |
| 302 | ) -> Result<NonZeroUsize, LimbSliceError> { |
| 303 | // Choose `a` instead of `n` so that every function starts with |
| 304 | // `check_common` passing the exact same arguments, so that the compiler |
| 305 | // can easily de-dupe the checks. |
| 306 | let num_limbs: NonZero = check_common(a, table)?; |
| 307 | let n: &[u64] = n.as_flattened(); |
| 308 | if n.len() != num_limbs.get() { |
| 309 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new(n.len()))); |
| 310 | } |
| 311 | Ok(num_limbs) |
| 312 | } |
| 313 | |