| 1 | // Copyright 2016-2023 Brian Smith. |
| 2 | // |
| 3 | // Permission to use, copy, modify, and/or distribute this software for any |
| 4 | // purpose with or without fee is hereby granted, provided that the above |
| 5 | // copyright notice and this permission notice appear in all copies. |
| 6 | // |
| 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
| 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
| 10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 14 | |
| 15 | use super::{ |
| 16 | elem::{binary_op, binary_op_assign}, |
| 17 | elem_sqr_mul, elem_sqr_mul_acc, PublicModulus, *, |
| 18 | }; |
| 19 | |
| 20 | pub(super) const NUM_LIMBS: usize = 256 / LIMB_BITS; |
| 21 | |
| 22 | pub static COMMON_OPS: CommonOps = CommonOps { |
| 23 | num_limbs: elem::NumLimbs::P256, |
| 24 | |
| 25 | q: PublicModulus { |
| 26 | p: limbs_from_hex("ffffffff00000001000000000000000000000000ffffffffffffffffffffffff" ), |
| 27 | rr: PublicElem::from_hex("4fffffffdfffffffffffffffefffffffbffffffff0000000000000003" ), |
| 28 | }, |
| 29 | n: PublicElem::from_hex("ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551" ), |
| 30 | |
| 31 | a: PublicElem::from_hex("fffffffc00000004000000000000000000000003fffffffffffffffffffffffc" ), |
| 32 | b: PublicElem::from_hex("dc30061d04874834e5a220abf7212ed6acf005cd78843090d89cdf6229c4bddf" ), |
| 33 | |
| 34 | elem_mul_mont: p256_mul_mont, |
| 35 | elem_sqr_mont: p256_sqr_mont, |
| 36 | }; |
| 37 | |
| 38 | #[cfg (test)] |
| 39 | pub(super) static GENERATOR: (PublicElem<R>, PublicElem<R>) = ( |
| 40 | PublicElem::from_hex("18905f76a53755c679fb732b7762251075ba95fc5fedb60179e730d418a9143c" ), |
| 41 | PublicElem::from_hex("8571ff1825885d85d2e88688dd21f3258b4ab8e4ba19e45cddf25357ce95560a" ), |
| 42 | ); |
| 43 | |
| 44 | pub static PRIVATE_KEY_OPS: PrivateKeyOps = PrivateKeyOps { |
| 45 | common: &COMMON_OPS, |
| 46 | elem_inv_squared: p256_elem_inv_squared, |
| 47 | point_mul_base_impl: p256_point_mul_base_impl, |
| 48 | point_mul_impl: p256_point_mul, |
| 49 | point_add_jacobian_impl: p256_point_add, |
| 50 | }; |
| 51 | |
| 52 | fn p256_elem_inv_squared(q: &Modulus<Q>, a: &Elem<R>) -> Elem<R> { |
| 53 | // Calculate a**-2 (mod q) == a**(q - 3) (mod q) |
| 54 | // |
| 55 | // The exponent (q - 3) is: |
| 56 | // |
| 57 | // 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc |
| 58 | |
| 59 | #[inline ] |
| 60 | fn sqr_mul(q: &Modulus<Q>, a: &Elem<R>, squarings: LeakyWord, b: &Elem<R>) -> Elem<R> { |
| 61 | elem_sqr_mul(&COMMON_OPS, a, squarings, b, q.cpu()) |
| 62 | } |
| 63 | |
| 64 | #[inline ] |
| 65 | fn sqr_mul_acc(q: &Modulus<Q>, a: &mut Elem<R>, squarings: LeakyWord, b: &Elem<R>) { |
| 66 | elem_sqr_mul_acc(&COMMON_OPS, a, squarings, b, q.cpu()) |
| 67 | } |
| 68 | |
| 69 | let b_1 = &a; |
| 70 | let b_11 = sqr_mul(q, b_1, 1, b_1); |
| 71 | let b_111 = sqr_mul(q, &b_11, 1, b_1); |
| 72 | let f_11 = sqr_mul(q, &b_111, 3, &b_111); |
| 73 | let fff = sqr_mul(q, &f_11, 6, &f_11); |
| 74 | let fff_111 = sqr_mul(q, &fff, 3, &b_111); |
| 75 | let fffffff_11 = sqr_mul(q, &fff_111, 15, &fff_111); |
| 76 | let ffffffff = sqr_mul(q, &fffffff_11, 2, &b_11); |
| 77 | |
| 78 | // ffffffff00000001 |
| 79 | let mut acc = sqr_mul(q, &ffffffff, 31 + 1, b_1); |
| 80 | |
| 81 | // ffffffff00000001000000000000000000000000ffffffff |
| 82 | sqr_mul_acc(q, &mut acc, 96 + 32, &ffffffff); |
| 83 | |
| 84 | // ffffffff00000001000000000000000000000000ffffffffffffffff |
| 85 | sqr_mul_acc(q, &mut acc, 32, &ffffffff); |
| 86 | |
| 87 | // ffffffff00000001000000000000000000000000fffffffffffffffffffffff_11 |
| 88 | sqr_mul_acc(q, &mut acc, 30, &fffffff_11); |
| 89 | |
| 90 | // ffffffff00000001000000000000000000000000fffffffffffffffffffffffc |
| 91 | q.elem_square(&mut acc); |
| 92 | q.elem_square(&mut acc); |
| 93 | |
| 94 | acc |
| 95 | } |
| 96 | |
| 97 | fn p256_point_mul_base_impl(g_scalar: &Scalar, _cpu: cpu::Features) -> Point { |
| 98 | prefixed_extern! { |
| 99 | fn p256_point_mul_base( |
| 100 | r: *mut Limb, // [3][COMMON_OPS.num_limbs] |
| 101 | g_scalar: *const Limb, // [COMMON_OPS.num_limbs] |
| 102 | ); |
| 103 | } |
| 104 | |
| 105 | let mut r: Point = Point::new_at_infinity(); |
| 106 | unsafe { |
| 107 | p256_point_mul_base(r.xyz.as_mut_ptr(), g_scalar.limbs.as_ptr()); |
| 108 | } |
| 109 | r |
| 110 | } |
| 111 | |
| 112 | pub static PUBLIC_KEY_OPS: PublicKeyOps = PublicKeyOps { |
| 113 | common: &COMMON_OPS, |
| 114 | }; |
| 115 | |
| 116 | pub static SCALAR_OPS: ScalarOps = ScalarOps { |
| 117 | common: &COMMON_OPS, |
| 118 | scalar_mul_mont: p256_scalar_mul_mont, |
| 119 | }; |
| 120 | |
| 121 | pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { |
| 122 | scalar_ops: &SCALAR_OPS, |
| 123 | public_key_ops: &PUBLIC_KEY_OPS, |
| 124 | |
| 125 | #[cfg (any( |
| 126 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 127 | target_arch = "x86_64" |
| 128 | ))] |
| 129 | twin_mul: twin_mul_nistz256, |
| 130 | |
| 131 | #[cfg (not(any( |
| 132 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 133 | target_arch = "x86_64" |
| 134 | )))] |
| 135 | twin_mul: |g_scalar, p_scalar, p_xy, cpu| { |
| 136 | twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy, cpu) |
| 137 | }, |
| 138 | |
| 139 | q_minus_n: PublicElem::from_hex("4319055358e8617b0c46353d039cdaae" ), |
| 140 | |
| 141 | // TODO: Use an optimized variable-time implementation. |
| 142 | scalar_inv_to_mont_vartime: |s: &Elem, cpu: Features| PRIVATE_SCALAR_OPS.scalar_inv_to_mont(a:s, cpu), |
| 143 | }; |
| 144 | |
| 145 | #[cfg (any( |
| 146 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 147 | target_arch = "x86_64" |
| 148 | ))] |
| 149 | fn twin_mul_nistz256( |
| 150 | g_scalar: &Scalar, |
| 151 | p_scalar: &Scalar, |
| 152 | p_xy: &(Elem<R>, Elem<R>), |
| 153 | cpu: cpu::Features, |
| 154 | ) -> Point { |
| 155 | let scaled_g: Point = point_mul_base_vartime(g_scalar, cpu); |
| 156 | let scaled_p: Point = PRIVATE_KEY_OPS.point_mul(p_scalar, p_xy, _cpu:cpu::features()); |
| 157 | PRIVATE_KEY_OPS.point_sum(&scaled_g, &scaled_p, cpu) |
| 158 | } |
| 159 | |
| 160 | #[cfg (any( |
| 161 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 162 | target_arch = "x86_64" |
| 163 | ))] |
| 164 | fn point_mul_base_vartime(g_scalar: &Scalar, _cpu: cpu::Features) -> Point { |
| 165 | prefixed_extern! { |
| 166 | fn p256_point_mul_base_vartime(r: *mut Limb, // [3][COMMON_OPS.num_limbs] |
| 167 | g_scalar: *const Limb, // [COMMON_OPS.num_limbs] |
| 168 | ); |
| 169 | } |
| 170 | let mut scaled_g: Point = Point::new_at_infinity(); |
| 171 | unsafe { |
| 172 | p256_point_mul_base_vartime(r:scaled_g.xyz.as_mut_ptr(), g_scalar.limbs.as_ptr()); |
| 173 | } |
| 174 | scaled_g |
| 175 | } |
| 176 | |
| 177 | pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { |
| 178 | scalar_ops: &SCALAR_OPS, |
| 179 | |
| 180 | oneRR_mod_n: PublicScalar::from_hex( |
| 181 | "66e12d94f3d956202845b2392b6bec594699799c49bd6fa683244c95be79eea2" , |
| 182 | ), |
| 183 | scalar_inv_to_mont: p256_scalar_inv_to_mont, |
| 184 | }; |
| 185 | |
| 186 | #[allow (clippy::just_underscores_and_digits)] |
| 187 | fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> { |
| 188 | // Calculate the modular inverse of scalar |a| using Fermat's Little |
| 189 | // Theorem: |
| 190 | // |
| 191 | // a**-1 (mod n) == a**(n - 2) (mod n) |
| 192 | // |
| 193 | // The exponent (n - 2) is: |
| 194 | // |
| 195 | // 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f |
| 196 | |
| 197 | #[inline ] |
| 198 | fn mul(a: &Scalar<R>, b: &Scalar<R>) -> Scalar<R> { |
| 199 | binary_op(p256_scalar_mul_mont, a, b) |
| 200 | } |
| 201 | |
| 202 | #[inline ] |
| 203 | fn sqr(a: &Scalar<R>) -> Scalar<R> { |
| 204 | let mut tmp = Scalar::zero(); |
| 205 | unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), 1) } |
| 206 | tmp |
| 207 | } |
| 208 | |
| 209 | // Returns (`a` squared `squarings` times) * `b`. |
| 210 | fn sqr_mul(a: &Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) -> Scalar<R> { |
| 211 | debug_assert!(squarings >= 1); |
| 212 | let mut tmp = Scalar::zero(); |
| 213 | unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), squarings) } |
| 214 | mul(&tmp, b) |
| 215 | } |
| 216 | |
| 217 | // Sets `acc` = (`acc` squared `squarings` times) * `b`. |
| 218 | fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: LeakyWord, b: &Scalar<R>) { |
| 219 | debug_assert!(squarings >= 1); |
| 220 | unsafe { p256_scalar_sqr_rep_mont(acc.limbs.as_mut_ptr(), acc.limbs.as_ptr(), squarings) } |
| 221 | binary_op_assign(p256_scalar_mul_mont, acc, b); |
| 222 | } |
| 223 | |
| 224 | let _1 = &a; |
| 225 | |
| 226 | let _10 = sqr(_1); // 2 |
| 227 | let _100 = sqr(&_10); // 4 |
| 228 | let _101 = mul(&_100, _1); // 5 |
| 229 | let _111 = mul(&_101, &_10); // 7 |
| 230 | |
| 231 | let _1000 = sqr(&_100); // 8 |
| 232 | let _10000 = sqr(&_1000); // 16 |
| 233 | let _100000 = sqr(&_10000); // 32 |
| 234 | |
| 235 | let _100111 = mul(&_111, &_100000); // 39 = 7 + 32 |
| 236 | let _101011 = mul(&_100, &_100111); // 43 = 4 + 39 |
| 237 | let _101111 = mul(&_100, &_101011); // 47 = 4 + 39 |
| 238 | let _1001111 = mul(&_100000, &_101111); // 79 = 32 + 47 |
| 239 | let _86 = sqr(&_101011); // 86 = 43 * 2 |
| 240 | let _1011011 = mul(&_101, &_86); // 91 = 5 + 86 |
| 241 | let _92 = mul(_1, &_1011011); // 92 = 1 + 91 |
| 242 | let _1100011 = mul(&_111, &_92); // 99 = 7 + 92 |
| 243 | let _10111111 = mul(&_92, &_1100011); // 191 = 92 + 99 |
| 244 | let _11011111 = mul(&_100000, &_10111111); // 223 = 32 + 191 |
| 245 | |
| 246 | let ff = mul(&_100000, &_11011111); // 255 = 32 + 223 |
| 247 | let ffff = sqr_mul(&ff, 0 + 8, &ff); |
| 248 | let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff); |
| 249 | |
| 250 | // ffffffff00000000ffffffff |
| 251 | let mut acc = sqr_mul(&ffffffff, 32 + 32, &ffffffff); |
| 252 | |
| 253 | // ffffffff00000000ffffffffffffffff |
| 254 | sqr_mul_acc(&mut acc, 0 + 32, &ffffffff); |
| 255 | |
| 256 | // The rest of the exponent, in binary, is: |
| 257 | // |
| 258 | // 1011110011100110111110101010110110100111000101111001111010000100 |
| 259 | // 1111001110111001110010101100001011111100011000110010010101001111 |
| 260 | |
| 261 | sqr_mul_acc(&mut acc, 6, &_101111); |
| 262 | sqr_mul_acc(&mut acc, 2 + 3, &_111); |
| 263 | sqr_mul_acc(&mut acc, 2 + 8, &_11011111); |
| 264 | sqr_mul_acc(&mut acc, 1 + 3, &_101); |
| 265 | sqr_mul_acc(&mut acc, 1 + 7, &_1011011); |
| 266 | sqr_mul_acc(&mut acc, 1 + 6, &_100111); |
| 267 | sqr_mul_acc(&mut acc, 3 + 6, &_101111); |
| 268 | sqr_mul_acc(&mut acc, 2 + 3, &_111); |
| 269 | sqr_mul_acc(&mut acc, 3, &_101); |
| 270 | sqr_mul_acc(&mut acc, 4 + 7, &_1001111); |
| 271 | sqr_mul_acc(&mut acc, 2 + 3, &_111); |
| 272 | sqr_mul_acc(&mut acc, 1 + 3, &_111); |
| 273 | sqr_mul_acc(&mut acc, 2 + 3, &_111); |
| 274 | sqr_mul_acc(&mut acc, 2 + 6, &_101011); |
| 275 | sqr_mul_acc(&mut acc, 4 + 8, &_10111111); |
| 276 | sqr_mul_acc(&mut acc, 3 + 7, &_1100011); |
| 277 | sqr_mul_acc(&mut acc, 2 + 1, _1); |
| 278 | sqr_mul_acc(&mut acc, 2 + 3, &_101); |
| 279 | sqr_mul_acc(&mut acc, 1 + 7, &_1001111); |
| 280 | |
| 281 | acc |
| 282 | } |
| 283 | |
| 284 | prefixed_extern! { |
| 285 | pub(super) fn p256_mul_mont( |
| 286 | r: *mut Limb, // [COMMON_OPS.num_limbs] |
| 287 | a: *const Limb, // [COMMON_OPS.num_limbs] |
| 288 | b: *const Limb, // [COMMON_OPS.num_limbs] |
| 289 | ); |
| 290 | pub(super) fn p256_sqr_mont( |
| 291 | r: *mut Limb, // [COMMON_OPS.num_limbs] |
| 292 | a: *const Limb, // [COMMON_OPS.num_limbs] |
| 293 | ); |
| 294 | |
| 295 | fn p256_point_add( |
| 296 | r: *mut Limb, // [3][COMMON_OPS.num_limbs] |
| 297 | a: *const Limb, // [3][COMMON_OPS.num_limbs] |
| 298 | b: *const Limb, // [3][COMMON_OPS.num_limbs] |
| 299 | ); |
| 300 | fn p256_point_mul( |
| 301 | r: *mut Limb, // [3][COMMON_OPS.num_limbs] |
| 302 | p_scalar: *const Limb, // [COMMON_OPS.num_limbs] |
| 303 | p_x: *const Limb, // [COMMON_OPS.num_limbs] |
| 304 | p_y: *const Limb, // [COMMON_OPS.num_limbs] |
| 305 | ); |
| 306 | |
| 307 | fn p256_scalar_mul_mont( |
| 308 | r: *mut Limb, // [COMMON_OPS.num_limbs] |
| 309 | a: *const Limb, // [COMMON_OPS.num_limbs] |
| 310 | b: *const Limb, // [COMMON_OPS.num_limbs] |
| 311 | ); |
| 312 | fn p256_scalar_sqr_rep_mont( |
| 313 | r: *mut Limb, // [COMMON_OPS.num_limbs] |
| 314 | a: *const Limb, // [COMMON_OPS.num_limbs] |
| 315 | rep: LeakyWord, |
| 316 | ); |
| 317 | } |
| 318 | |
| 319 | #[cfg (test)] |
| 320 | mod tests { |
| 321 | #[cfg (any( |
| 322 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 323 | target_arch = "x86_64" |
| 324 | ))] |
| 325 | #[test ] |
| 326 | fn p256_point_mul_base_vartime_test() { |
| 327 | use super::{super::tests::point_mul_base_tests, *}; |
| 328 | point_mul_base_tests( |
| 329 | &PRIVATE_KEY_OPS, |
| 330 | point_mul_base_vartime, |
| 331 | test_file!("p256_point_mul_base_tests.txt" ), |
| 332 | ); |
| 333 | } |
| 334 | } |
| 335 | |