1 | // Copyright 2015-2025 Brian Smith. |
2 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any |
4 | // purpose with or without fee is hereby granted, provided that the above |
5 | // copyright notice and this permission notice appear in all copies. |
6 | // |
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
14 | |
15 | #![cfg (target_arch = "x86_64" )] |
16 | |
17 | use super::{ |
18 | inout::{AliasingSlices2, AliasingSlices3}, |
19 | n0::N0, |
20 | LimbSliceError, MAX_LIMBS, |
21 | }; |
22 | use crate::{ |
23 | c, |
24 | cpu::intel::{Adx, Bmi1, Bmi2}, |
25 | error::LenMismatchError, |
26 | limb::{LeakyWindow, Limb, Window}, |
27 | polyfill::slice::{AsChunks, AsChunksMut}, |
28 | }; |
29 | use core::num::NonZeroUsize; |
30 | |
31 | const _512_IS_LIMB_BITS_TIMES_8: () = assert!(8 * Limb::BITS == 512); |
32 | |
33 | #[inline ] |
34 | pub(super) fn mul_mont5( |
35 | mut r: AsChunksMut<Limb, 8>, |
36 | a: AsChunks<Limb, 8>, |
37 | b: AsChunks<Limb, 8>, |
38 | m: AsChunks<Limb, 8>, |
39 | n0: &N0, |
40 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
41 | ) -> Result<(), LimbSliceError> { |
42 | mul_mont5_4x( |
43 | (r.as_flattened_mut(), a.as_flattened(), b.as_flattened()), |
44 | n:m.into(), |
45 | n0, |
46 | maybe_adx_bmi2, |
47 | ) |
48 | } |
49 | |
50 | pub const MIN_4X: usize = 8; |
51 | |
52 | #[inline ] |
53 | pub(super) fn mul_mont5_4x( |
54 | in_out: impl AliasingSlices3<Limb>, |
55 | n: AsChunks<Limb, 4>, |
56 | n0: &N0, |
57 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
58 | ) -> Result<(), LimbSliceError> { |
59 | const MOD_4X: usize = 4; |
60 | let n: &[u64] = n.as_flattened(); |
61 | if let Some(cpu: (Adx, Bmi2)) = maybe_adx_bmi2 { |
62 | bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe { |
63 | (MIN_4X, MOD_4X, (Adx, Bmi2)) => bn_mulx4x_mont |
64 | }) |
65 | } else { |
66 | bn_mul_mont_ffi!(in_out, n, n0, (), unsafe { |
67 | (MIN_4X, MOD_4X, ()) => bn_mul4x_mont |
68 | }) |
69 | } |
70 | } |
71 | |
72 | #[inline ] |
73 | pub(super) fn sqr_mont5( |
74 | mut in_out: AsChunksMut<Limb, 8>, |
75 | n: AsChunks<Limb, 8>, |
76 | n0: &N0, |
77 | maybe_adx_bmi2: Option<(Adx, Bmi2)>, |
78 | ) -> Result<(), LimbSliceError> { |
79 | prefixed_extern! { |
80 | // `r` and/or 'a' may alias. |
81 | // XXX: BoringSSL declares this to return `int`. |
82 | // `num` must be a non-zero multiple of 8. |
83 | fn bn_sqr8x_mont( |
84 | rp: *mut Limb, |
85 | ap: *const Limb, |
86 | mulx_adx_capable: Limb, |
87 | np: *const Limb, |
88 | n0: &N0, |
89 | num: c::NonZero_size_t); |
90 | } |
91 | |
92 | let in_out = in_out.as_flattened_mut(); |
93 | let n = n.as_flattened(); |
94 | let num_limbs = NonZeroUsize::new(n.len()).ok_or_else(|| LimbSliceError::too_short(n.len()))?; |
95 | |
96 | // Avoid stack overflow from the alloca inside. |
97 | if num_limbs.get() > MAX_LIMBS { |
98 | return Err(LimbSliceError::too_long(num_limbs.get())); |
99 | } |
100 | |
101 | // `Limb::from(mulx_adx.is_some())`, but intentionally branchy. |
102 | let mulx_adx_capable = match maybe_adx_bmi2 { |
103 | Some(_) => Limb::from(true), |
104 | None => Limb::from(false), |
105 | }; |
106 | |
107 | in_out |
108 | .with_non_dangling_non_null_pointers_ra(num_limbs, |r, a| { |
109 | let n = n.as_ptr(); // Non-dangling because num_limbs > 0. |
110 | unsafe { bn_sqr8x_mont(r, a, mulx_adx_capable, n, n0, num_limbs) }; |
111 | }) |
112 | .map_err(LimbSliceError::len_mismatch) |
113 | } |
114 | |
115 | #[inline (always)] |
116 | pub(super) fn scatter5( |
117 | a: AsChunks<Limb, 8>, |
118 | mut table: AsChunksMut<Limb, 8>, |
119 | power: LeakyWindow, |
120 | ) -> Result<(), LimbSliceError> { |
121 | prefixed_extern! { |
122 | // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see |
123 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
124 | fn bn_scatter5( |
125 | inp: *const Limb, |
126 | num: c::NonZero_size_t, |
127 | table: *mut Limb, |
128 | power: LeakyWindow, |
129 | ); |
130 | } |
131 | let num_limbs: NonZero = check_common(a, table.as_ref())?; |
132 | let a: &[u64] = a.as_flattened(); |
133 | let table: &mut [u64] = table.as_flattened_mut(); |
134 | assert!(power < 32); |
135 | unsafe { bn_scatter5(inp:a.as_ptr(), num_limbs, table.as_mut_ptr(), power) }; |
136 | Ok(()) |
137 | } |
138 | |
139 | // SAFETY: `power` must be less than 32. |
140 | #[inline (always)] |
141 | pub(super) unsafe fn gather5( |
142 | mut r: AsChunksMut<Limb, 8>, |
143 | table: AsChunks<Limb, 8>, |
144 | power: Window, |
145 | ) -> Result<(), LimbSliceError> { |
146 | prefixed_extern! { |
147 | // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see |
148 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
149 | pub(super) fn bn_gather5( |
150 | out: *mut Limb, |
151 | num: c::NonZero_size_t, |
152 | table: *const Limb, |
153 | power: Window); |
154 | } |
155 | let num_limbs: NonZero = check_common(a:r.as_ref(), table)?; |
156 | let r: &mut [u64] = r.as_flattened_mut(); |
157 | let table: &[u64] = table.as_flattened(); |
158 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
159 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
160 | unsafe { bn_gather5(out:r.as_mut_ptr(), num_limbs, table.as_ptr(), power) }; |
161 | Ok(()) |
162 | } |
163 | |
164 | // SAFETY: `power` must be less than 32. |
165 | #[inline (always)] |
166 | pub(super) unsafe fn mul_mont_gather5_amm( |
167 | mut r: AsChunksMut<Limb, 8>, |
168 | a: AsChunks<Limb, 8>, |
169 | table: AsChunks<Limb, 8>, |
170 | n: AsChunks<Limb, 8>, |
171 | n0: &N0, |
172 | power: Window, |
173 | maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>, |
174 | ) -> Result<(), LimbSliceError> { |
175 | prefixed_extern! { |
176 | // Upstream has `num: c::int` and `power: c::int`; see |
177 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
178 | pub(super) fn bn_mul4x_mont_gather5( |
179 | rp: *mut Limb, |
180 | ap: *const Limb, |
181 | table: *const Limb, |
182 | np: *const Limb, |
183 | n0: &N0, |
184 | num: c::NonZero_size_t, |
185 | power: Window, |
186 | ); |
187 | // Upstream has `num: c::int` and `power: c::int`; see |
188 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
189 | pub(super) fn bn_mulx4x_mont_gather5( |
190 | rp: *mut Limb, |
191 | ap: *const Limb, |
192 | table: *const Limb, |
193 | np: *const Limb, |
194 | n0: &N0, |
195 | num: c::NonZero_size_t, |
196 | power: Window, |
197 | ); |
198 | } |
199 | let num_limbs = check_common_with_n(r.as_ref(), table, n)?; |
200 | let a = a.as_flattened(); |
201 | if a.len() != num_limbs.get() { |
202 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new(a.len()))); |
203 | } |
204 | let r = r.as_flattened_mut(); |
205 | let r = r.as_mut_ptr(); |
206 | let a = a.as_ptr(); |
207 | let table = table.as_flattened(); |
208 | let table = table.as_ptr(); |
209 | let n = n.as_flattened(); |
210 | let n = n.as_ptr(); |
211 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
212 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
213 | if maybe_adx_bmi1_bmi2.is_some() { |
214 | unsafe { bn_mulx4x_mont_gather5(r, a, table, n, n0, num_limbs, power) } |
215 | } else { |
216 | unsafe { bn_mul4x_mont_gather5(r, a, table, n, n0, num_limbs, power) } |
217 | }; |
218 | Ok(()) |
219 | } |
220 | |
221 | // SAFETY: `power` must be less than 32. |
222 | #[inline (always)] |
223 | pub(super) unsafe fn power5_amm( |
224 | mut in_out: AsChunksMut<Limb, 8>, |
225 | table: AsChunks<Limb, 8>, |
226 | n: AsChunks<Limb, 8>, |
227 | n0: &N0, |
228 | power: Window, |
229 | maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>, |
230 | ) -> Result<(), LimbSliceError> { |
231 | prefixed_extern! { |
232 | // Upstream has `num: c::int` and `power: c::int`; see |
233 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
234 | fn bn_power5_nohw( |
235 | rp: *mut Limb, |
236 | ap: *const Limb, |
237 | table: *const Limb, |
238 | np: *const Limb, |
239 | n0: &N0, |
240 | num: c::NonZero_size_t, |
241 | power: Window, |
242 | ); |
243 | // Upstream has `num: c::int` and `power: c::int`; see |
244 | // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`. |
245 | fn bn_powerx5( |
246 | rp: *mut Limb, |
247 | ap: *const Limb, |
248 | table: *const Limb, |
249 | np: *const Limb, |
250 | n0: &N0, |
251 | num: c::NonZero_size_t, |
252 | power: Window, |
253 | ); |
254 | } |
255 | let num_limbs = check_common_with_n(in_out.as_ref(), table, n)?; |
256 | let in_out = in_out.as_flattened_mut(); |
257 | let r = in_out.as_mut_ptr(); |
258 | let a = in_out.as_ptr(); |
259 | let table = table.as_flattened(); |
260 | let table = table.as_ptr(); |
261 | let n = n.as_flattened(); |
262 | let n = n.as_ptr(); |
263 | // SAFETY: We cannot assert that `power` is in range because it is secret. |
264 | // TODO: Create a `Window5` type that is guaranteed to be in range. |
265 | if maybe_adx_bmi1_bmi2.is_some() { |
266 | unsafe { bn_powerx5(r, a, table, n, n0, num_limbs, power) } |
267 | } else { |
268 | unsafe { bn_power5_nohw(r, a, table, n, n0, num_limbs, power) } |
269 | }; |
270 | Ok(()) |
271 | } |
272 | |
273 | // Helps the compiler will be able to hoist all of these checks out of the |
274 | // loops in the caller. Try to help the compiler by doing the checks |
275 | // consistently in each function and also by inlining this function and all the |
276 | // callers. |
277 | #[inline (always)] |
278 | fn check_common( |
279 | a: AsChunks<Limb, 8>, |
280 | table: AsChunks<Limb, 8>, |
281 | ) -> Result<NonZeroUsize, LimbSliceError> { |
282 | assert_eq!((table.as_ptr() as usize) % 16, 0); // According to BoringSSL. |
283 | let a: &[u64] = a.as_flattened(); |
284 | let table: &[u64] = table.as_flattened(); |
285 | let num_limbs: NonZero = NonZeroUsize::new(a.len()).ok_or_else(|| LimbSliceError::too_short(a.len()))?; |
286 | if num_limbs.get() > MAX_LIMBS { |
287 | return Err(LimbSliceError::too_long(a.len())); |
288 | } |
289 | if num_limbs.get() * 32 != table.len() { |
290 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new( |
291 | table.len(), |
292 | ))); |
293 | }; |
294 | Ok(num_limbs) |
295 | } |
296 | |
297 | #[inline (always)] |
298 | fn check_common_with_n( |
299 | a: AsChunks<Limb, 8>, |
300 | table: AsChunks<Limb, 8>, |
301 | n: AsChunks<Limb, 8>, |
302 | ) -> Result<NonZeroUsize, LimbSliceError> { |
303 | // Choose `a` instead of `n` so that every function starts with |
304 | // `check_common` passing the exact same arguments, so that the compiler |
305 | // can easily de-dupe the checks. |
306 | let num_limbs: NonZero = check_common(a, table)?; |
307 | let n: &[u64] = n.as_flattened(); |
308 | if n.len() != num_limbs.get() { |
309 | return Err(LimbSliceError::len_mismatch(LenMismatchError::new(n.len()))); |
310 | } |
311 | Ok(num_limbs) |
312 | } |
313 | |