| 1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
| 2 | /* |
| 3 | * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| 4 | * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation |
| 5 | */ |
| 6 | |
| 7 | #include <linux/types.h> |
| 8 | #include <linux/jump_label.h> |
| 9 | #include <linux/kernel.h> |
| 10 | |
| 11 | #include <asm/cpufeature.h> |
| 12 | #include <asm/processor.h> |
| 13 | |
| 14 | static __always_inline u64 eq_mask(u64 a, u64 b) |
| 15 | { |
| 16 | u64 x = a ^ b; |
| 17 | u64 minus_x = ~x + (u64)1U; |
| 18 | u64 x_or_minus_x = x | minus_x; |
| 19 | u64 xnx = x_or_minus_x >> (u32)63U; |
| 20 | return xnx - (u64)1U; |
| 21 | } |
| 22 | |
| 23 | static __always_inline u64 gte_mask(u64 a, u64 b) |
| 24 | { |
| 25 | u64 x = a; |
| 26 | u64 y = b; |
| 27 | u64 x_xor_y = x ^ y; |
| 28 | u64 x_sub_y = x - y; |
| 29 | u64 x_sub_y_xor_y = x_sub_y ^ y; |
| 30 | u64 q = x_xor_y | x_sub_y_xor_y; |
| 31 | u64 x_xor_q = x ^ q; |
| 32 | u64 x_xor_q_ = x_xor_q >> (u32)63U; |
| 33 | return x_xor_q_ - (u64)1U; |
| 34 | } |
| 35 | |
| 36 | /* Computes the addition of four-element f1 with value in f2 |
| 37 | * and returns the carry (if any) */ |
| 38 | static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) |
| 39 | { |
| 40 | u64 carry_r; |
| 41 | |
| 42 | asm volatile( |
| 43 | /* Clear registers to propagate the carry bit */ |
| 44 | " xor %%r8d, %%r8d;" |
| 45 | " xor %%r9d, %%r9d;" |
| 46 | " xor %%r10d, %%r10d;" |
| 47 | " xor %%r11d, %%r11d;" |
| 48 | " xor %k1, %k1;" |
| 49 | |
| 50 | /* Begin addition chain */ |
| 51 | " addq 0(%3), %0;" |
| 52 | " movq %0, 0(%2);" |
| 53 | " adcxq 8(%3), %%r8;" |
| 54 | " movq %%r8, 8(%2);" |
| 55 | " adcxq 16(%3), %%r9;" |
| 56 | " movq %%r9, 16(%2);" |
| 57 | " adcxq 24(%3), %%r10;" |
| 58 | " movq %%r10, 24(%2);" |
| 59 | |
| 60 | /* Return the carry bit in a register */ |
| 61 | " adcx %%r11, %1;" |
| 62 | : "+&r" (f2), "=&r" (carry_r) |
| 63 | : "r" (out), "r" (f1) |
| 64 | : "%r8" , "%r9" , "%r10" , "%r11" , "memory" , "cc" ); |
| 65 | |
| 66 | return carry_r; |
| 67 | } |
| 68 | |
| 69 | /* Computes the field addition of two field elements */ |
| 70 | static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) |
| 71 | { |
| 72 | asm volatile( |
| 73 | /* Compute the raw addition of f1 + f2 */ |
| 74 | " movq 0(%0), %%r8;" |
| 75 | " addq 0(%2), %%r8;" |
| 76 | " movq 8(%0), %%r9;" |
| 77 | " adcxq 8(%2), %%r9;" |
| 78 | " movq 16(%0), %%r10;" |
| 79 | " adcxq 16(%2), %%r10;" |
| 80 | " movq 24(%0), %%r11;" |
| 81 | " adcxq 24(%2), %%r11;" |
| 82 | |
| 83 | /* Wrap the result back into the field */ |
| 84 | |
| 85 | /* Step 1: Compute carry*38 */ |
| 86 | " mov $0, %%rax;" |
| 87 | " mov $38, %0;" |
| 88 | " cmovc %0, %%rax;" |
| 89 | |
| 90 | /* Step 2: Add carry*38 to the original sum */ |
| 91 | " xor %%ecx, %%ecx;" |
| 92 | " add %%rax, %%r8;" |
| 93 | " adcx %%rcx, %%r9;" |
| 94 | " movq %%r9, 8(%1);" |
| 95 | " adcx %%rcx, %%r10;" |
| 96 | " movq %%r10, 16(%1);" |
| 97 | " adcx %%rcx, %%r11;" |
| 98 | " movq %%r11, 24(%1);" |
| 99 | |
| 100 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 101 | " mov $0, %%rax;" |
| 102 | " cmovc %0, %%rax;" |
| 103 | " add %%rax, %%r8;" |
| 104 | " movq %%r8, 0(%1);" |
| 105 | : "+&r" (f2) |
| 106 | : "r" (out), "r" (f1) |
| 107 | : "%rax" , "%rcx" , "%r8" , "%r9" , "%r10" , "%r11" , "memory" , "cc" ); |
| 108 | } |
| 109 | |
| 110 | /* Computes the field subtraction of two field elements */ |
| 111 | static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) |
| 112 | { |
| 113 | asm volatile( |
| 114 | /* Compute the raw subtraction of f1-f2 */ |
| 115 | " movq 0(%1), %%r8;" |
| 116 | " subq 0(%2), %%r8;" |
| 117 | " movq 8(%1), %%r9;" |
| 118 | " sbbq 8(%2), %%r9;" |
| 119 | " movq 16(%1), %%r10;" |
| 120 | " sbbq 16(%2), %%r10;" |
| 121 | " movq 24(%1), %%r11;" |
| 122 | " sbbq 24(%2), %%r11;" |
| 123 | |
| 124 | /* Wrap the result back into the field */ |
| 125 | |
| 126 | /* Step 1: Compute carry*38 */ |
| 127 | " mov $0, %%rax;" |
| 128 | " mov $38, %%rcx;" |
| 129 | " cmovc %%rcx, %%rax;" |
| 130 | |
| 131 | /* Step 2: Subtract carry*38 from the original difference */ |
| 132 | " sub %%rax, %%r8;" |
| 133 | " sbb $0, %%r9;" |
| 134 | " sbb $0, %%r10;" |
| 135 | " sbb $0, %%r11;" |
| 136 | |
| 137 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 138 | " mov $0, %%rax;" |
| 139 | " cmovc %%rcx, %%rax;" |
| 140 | " sub %%rax, %%r8;" |
| 141 | |
| 142 | /* Store the result */ |
| 143 | " movq %%r8, 0(%0);" |
| 144 | " movq %%r9, 8(%0);" |
| 145 | " movq %%r10, 16(%0);" |
| 146 | " movq %%r11, 24(%0);" |
| 147 | : |
| 148 | : "r" (out), "r" (f1), "r" (f2) |
| 149 | : "%rax" , "%rcx" , "%r8" , "%r9" , "%r10" , "%r11" , "memory" , "cc" ); |
| 150 | } |
| 151 | |
| 152 | /* Computes a field multiplication: out <- f1 * f2 |
| 153 | * Uses the 8-element buffer tmp for intermediate results */ |
| 154 | static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| 155 | { |
| 156 | asm volatile( |
| 157 | |
| 158 | /* Compute the raw multiplication: tmp <- src1 * src2 */ |
| 159 | |
| 160 | /* Compute src1[0] * src2 */ |
| 161 | " movq 0(%0), %%rdx;" |
| 162 | " mulxq 0(%1), %%r8, %%r9;" |
| 163 | " xor %%r10d, %%r10d;" |
| 164 | " movq %%r8, 0(%2);" |
| 165 | " mulxq 8(%1), %%r10, %%r11;" |
| 166 | " adox %%r9, %%r10;" |
| 167 | " movq %%r10, 8(%2);" |
| 168 | " mulxq 16(%1), %%rbx, %%r13;" |
| 169 | " adox %%r11, %%rbx;" |
| 170 | " mulxq 24(%1), %%r14, %%rdx;" |
| 171 | " adox %%r13, %%r14;" |
| 172 | " mov $0, %%rax;" |
| 173 | " adox %%rdx, %%rax;" |
| 174 | |
| 175 | /* Compute src1[1] * src2 */ |
| 176 | " movq 8(%0), %%rdx;" |
| 177 | " mulxq 0(%1), %%r8, %%r9;" |
| 178 | " xor %%r10d, %%r10d;" |
| 179 | " adcxq 8(%2), %%r8;" |
| 180 | " movq %%r8, 8(%2);" |
| 181 | " mulxq 8(%1), %%r10, %%r11;" |
| 182 | " adox %%r9, %%r10;" |
| 183 | " adcx %%rbx, %%r10;" |
| 184 | " movq %%r10, 16(%2);" |
| 185 | " mulxq 16(%1), %%rbx, %%r13;" |
| 186 | " adox %%r11, %%rbx;" |
| 187 | " adcx %%r14, %%rbx;" |
| 188 | " mov $0, %%r8;" |
| 189 | " mulxq 24(%1), %%r14, %%rdx;" |
| 190 | " adox %%r13, %%r14;" |
| 191 | " adcx %%rax, %%r14;" |
| 192 | " mov $0, %%rax;" |
| 193 | " adox %%rdx, %%rax;" |
| 194 | " adcx %%r8, %%rax;" |
| 195 | |
| 196 | /* Compute src1[2] * src2 */ |
| 197 | " movq 16(%0), %%rdx;" |
| 198 | " mulxq 0(%1), %%r8, %%r9;" |
| 199 | " xor %%r10d, %%r10d;" |
| 200 | " adcxq 16(%2), %%r8;" |
| 201 | " movq %%r8, 16(%2);" |
| 202 | " mulxq 8(%1), %%r10, %%r11;" |
| 203 | " adox %%r9, %%r10;" |
| 204 | " adcx %%rbx, %%r10;" |
| 205 | " movq %%r10, 24(%2);" |
| 206 | " mulxq 16(%1), %%rbx, %%r13;" |
| 207 | " adox %%r11, %%rbx;" |
| 208 | " adcx %%r14, %%rbx;" |
| 209 | " mov $0, %%r8;" |
| 210 | " mulxq 24(%1), %%r14, %%rdx;" |
| 211 | " adox %%r13, %%r14;" |
| 212 | " adcx %%rax, %%r14;" |
| 213 | " mov $0, %%rax;" |
| 214 | " adox %%rdx, %%rax;" |
| 215 | " adcx %%r8, %%rax;" |
| 216 | |
| 217 | /* Compute src1[3] * src2 */ |
| 218 | " movq 24(%0), %%rdx;" |
| 219 | " mulxq 0(%1), %%r8, %%r9;" |
| 220 | " xor %%r10d, %%r10d;" |
| 221 | " adcxq 24(%2), %%r8;" |
| 222 | " movq %%r8, 24(%2);" |
| 223 | " mulxq 8(%1), %%r10, %%r11;" |
| 224 | " adox %%r9, %%r10;" |
| 225 | " adcx %%rbx, %%r10;" |
| 226 | " movq %%r10, 32(%2);" |
| 227 | " mulxq 16(%1), %%rbx, %%r13;" |
| 228 | " adox %%r11, %%rbx;" |
| 229 | " adcx %%r14, %%rbx;" |
| 230 | " movq %%rbx, 40(%2);" |
| 231 | " mov $0, %%r8;" |
| 232 | " mulxq 24(%1), %%r14, %%rdx;" |
| 233 | " adox %%r13, %%r14;" |
| 234 | " adcx %%rax, %%r14;" |
| 235 | " movq %%r14, 48(%2);" |
| 236 | " mov $0, %%rax;" |
| 237 | " adox %%rdx, %%rax;" |
| 238 | " adcx %%r8, %%rax;" |
| 239 | " movq %%rax, 56(%2);" |
| 240 | |
| 241 | /* Line up pointers */ |
| 242 | " mov %2, %0;" |
| 243 | " mov %3, %2;" |
| 244 | |
| 245 | /* Wrap the result back into the field */ |
| 246 | |
| 247 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 248 | " mov $38, %%rdx;" |
| 249 | " mulxq 32(%0), %%r8, %%r13;" |
| 250 | " xor %k1, %k1;" |
| 251 | " adoxq 0(%0), %%r8;" |
| 252 | " mulxq 40(%0), %%r9, %%rbx;" |
| 253 | " adcx %%r13, %%r9;" |
| 254 | " adoxq 8(%0), %%r9;" |
| 255 | " mulxq 48(%0), %%r10, %%r13;" |
| 256 | " adcx %%rbx, %%r10;" |
| 257 | " adoxq 16(%0), %%r10;" |
| 258 | " mulxq 56(%0), %%r11, %%rax;" |
| 259 | " adcx %%r13, %%r11;" |
| 260 | " adoxq 24(%0), %%r11;" |
| 261 | " adcx %1, %%rax;" |
| 262 | " adox %1, %%rax;" |
| 263 | " imul %%rdx, %%rax;" |
| 264 | |
| 265 | /* Step 2: Fold the carry back into dst */ |
| 266 | " add %%rax, %%r8;" |
| 267 | " adcx %1, %%r9;" |
| 268 | " movq %%r9, 8(%2);" |
| 269 | " adcx %1, %%r10;" |
| 270 | " movq %%r10, 16(%2);" |
| 271 | " adcx %1, %%r11;" |
| 272 | " movq %%r11, 24(%2);" |
| 273 | |
| 274 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 275 | " mov $0, %%rax;" |
| 276 | " cmovc %%rdx, %%rax;" |
| 277 | " add %%rax, %%r8;" |
| 278 | " movq %%r8, 0(%2);" |
| 279 | : "+&r" (f1), "+&r" (f2), "+&r" (tmp) |
| 280 | : "r" (out) |
| 281 | : "%rax" , "%rbx" , "%rdx" , "%r8" , "%r9" , "%r10" , "%r11" , "%r13" , |
| 282 | "%r14" , "memory" , "cc" ); |
| 283 | } |
| 284 | |
| 285 | /* Computes two field multiplications: |
| 286 | * out[0] <- f1[0] * f2[0] |
| 287 | * out[1] <- f1[1] * f2[1] |
| 288 | * Uses the 16-element buffer tmp for intermediate results: */ |
| 289 | static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) |
| 290 | { |
| 291 | asm volatile( |
| 292 | |
| 293 | /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ |
| 294 | |
| 295 | /* Compute src1[0] * src2 */ |
| 296 | " movq 0(%0), %%rdx;" |
| 297 | " mulxq 0(%1), %%r8, %%r9;" |
| 298 | " xor %%r10d, %%r10d;" |
| 299 | " movq %%r8, 0(%2);" |
| 300 | " mulxq 8(%1), %%r10, %%r11;" |
| 301 | " adox %%r9, %%r10;" |
| 302 | " movq %%r10, 8(%2);" |
| 303 | " mulxq 16(%1), %%rbx, %%r13;" |
| 304 | " adox %%r11, %%rbx;" |
| 305 | " mulxq 24(%1), %%r14, %%rdx;" |
| 306 | " adox %%r13, %%r14;" |
| 307 | " mov $0, %%rax;" |
| 308 | " adox %%rdx, %%rax;" |
| 309 | |
| 310 | /* Compute src1[1] * src2 */ |
| 311 | " movq 8(%0), %%rdx;" |
| 312 | " mulxq 0(%1), %%r8, %%r9;" |
| 313 | " xor %%r10d, %%r10d;" |
| 314 | " adcxq 8(%2), %%r8;" |
| 315 | " movq %%r8, 8(%2);" |
| 316 | " mulxq 8(%1), %%r10, %%r11;" |
| 317 | " adox %%r9, %%r10;" |
| 318 | " adcx %%rbx, %%r10;" |
| 319 | " movq %%r10, 16(%2);" |
| 320 | " mulxq 16(%1), %%rbx, %%r13;" |
| 321 | " adox %%r11, %%rbx;" |
| 322 | " adcx %%r14, %%rbx;" |
| 323 | " mov $0, %%r8;" |
| 324 | " mulxq 24(%1), %%r14, %%rdx;" |
| 325 | " adox %%r13, %%r14;" |
| 326 | " adcx %%rax, %%r14;" |
| 327 | " mov $0, %%rax;" |
| 328 | " adox %%rdx, %%rax;" |
| 329 | " adcx %%r8, %%rax;" |
| 330 | |
| 331 | /* Compute src1[2] * src2 */ |
| 332 | " movq 16(%0), %%rdx;" |
| 333 | " mulxq 0(%1), %%r8, %%r9;" |
| 334 | " xor %%r10d, %%r10d;" |
| 335 | " adcxq 16(%2), %%r8;" |
| 336 | " movq %%r8, 16(%2);" |
| 337 | " mulxq 8(%1), %%r10, %%r11;" |
| 338 | " adox %%r9, %%r10;" |
| 339 | " adcx %%rbx, %%r10;" |
| 340 | " movq %%r10, 24(%2);" |
| 341 | " mulxq 16(%1), %%rbx, %%r13;" |
| 342 | " adox %%r11, %%rbx;" |
| 343 | " adcx %%r14, %%rbx;" |
| 344 | " mov $0, %%r8;" |
| 345 | " mulxq 24(%1), %%r14, %%rdx;" |
| 346 | " adox %%r13, %%r14;" |
| 347 | " adcx %%rax, %%r14;" |
| 348 | " mov $0, %%rax;" |
| 349 | " adox %%rdx, %%rax;" |
| 350 | " adcx %%r8, %%rax;" |
| 351 | |
| 352 | /* Compute src1[3] * src2 */ |
| 353 | " movq 24(%0), %%rdx;" |
| 354 | " mulxq 0(%1), %%r8, %%r9;" |
| 355 | " xor %%r10d, %%r10d;" |
| 356 | " adcxq 24(%2), %%r8;" |
| 357 | " movq %%r8, 24(%2);" |
| 358 | " mulxq 8(%1), %%r10, %%r11;" |
| 359 | " adox %%r9, %%r10;" |
| 360 | " adcx %%rbx, %%r10;" |
| 361 | " movq %%r10, 32(%2);" |
| 362 | " mulxq 16(%1), %%rbx, %%r13;" |
| 363 | " adox %%r11, %%rbx;" |
| 364 | " adcx %%r14, %%rbx;" |
| 365 | " movq %%rbx, 40(%2);" |
| 366 | " mov $0, %%r8;" |
| 367 | " mulxq 24(%1), %%r14, %%rdx;" |
| 368 | " adox %%r13, %%r14;" |
| 369 | " adcx %%rax, %%r14;" |
| 370 | " movq %%r14, 48(%2);" |
| 371 | " mov $0, %%rax;" |
| 372 | " adox %%rdx, %%rax;" |
| 373 | " adcx %%r8, %%rax;" |
| 374 | " movq %%rax, 56(%2);" |
| 375 | |
| 376 | /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ |
| 377 | |
| 378 | /* Compute src1[0] * src2 */ |
| 379 | " movq 32(%0), %%rdx;" |
| 380 | " mulxq 32(%1), %%r8, %%r9;" |
| 381 | " xor %%r10d, %%r10d;" |
| 382 | " movq %%r8, 64(%2);" |
| 383 | " mulxq 40(%1), %%r10, %%r11;" |
| 384 | " adox %%r9, %%r10;" |
| 385 | " movq %%r10, 72(%2);" |
| 386 | " mulxq 48(%1), %%rbx, %%r13;" |
| 387 | " adox %%r11, %%rbx;" |
| 388 | " mulxq 56(%1), %%r14, %%rdx;" |
| 389 | " adox %%r13, %%r14;" |
| 390 | " mov $0, %%rax;" |
| 391 | " adox %%rdx, %%rax;" |
| 392 | |
| 393 | /* Compute src1[1] * src2 */ |
| 394 | " movq 40(%0), %%rdx;" |
| 395 | " mulxq 32(%1), %%r8, %%r9;" |
| 396 | " xor %%r10d, %%r10d;" |
| 397 | " adcxq 72(%2), %%r8;" |
| 398 | " movq %%r8, 72(%2);" |
| 399 | " mulxq 40(%1), %%r10, %%r11;" |
| 400 | " adox %%r9, %%r10;" |
| 401 | " adcx %%rbx, %%r10;" |
| 402 | " movq %%r10, 80(%2);" |
| 403 | " mulxq 48(%1), %%rbx, %%r13;" |
| 404 | " adox %%r11, %%rbx;" |
| 405 | " adcx %%r14, %%rbx;" |
| 406 | " mov $0, %%r8;" |
| 407 | " mulxq 56(%1), %%r14, %%rdx;" |
| 408 | " adox %%r13, %%r14;" |
| 409 | " adcx %%rax, %%r14;" |
| 410 | " mov $0, %%rax;" |
| 411 | " adox %%rdx, %%rax;" |
| 412 | " adcx %%r8, %%rax;" |
| 413 | |
| 414 | /* Compute src1[2] * src2 */ |
| 415 | " movq 48(%0), %%rdx;" |
| 416 | " mulxq 32(%1), %%r8, %%r9;" |
| 417 | " xor %%r10d, %%r10d;" |
| 418 | " adcxq 80(%2), %%r8;" |
| 419 | " movq %%r8, 80(%2);" |
| 420 | " mulxq 40(%1), %%r10, %%r11;" |
| 421 | " adox %%r9, %%r10;" |
| 422 | " adcx %%rbx, %%r10;" |
| 423 | " movq %%r10, 88(%2);" |
| 424 | " mulxq 48(%1), %%rbx, %%r13;" |
| 425 | " adox %%r11, %%rbx;" |
| 426 | " adcx %%r14, %%rbx;" |
| 427 | " mov $0, %%r8;" |
| 428 | " mulxq 56(%1), %%r14, %%rdx;" |
| 429 | " adox %%r13, %%r14;" |
| 430 | " adcx %%rax, %%r14;" |
| 431 | " mov $0, %%rax;" |
| 432 | " adox %%rdx, %%rax;" |
| 433 | " adcx %%r8, %%rax;" |
| 434 | |
| 435 | /* Compute src1[3] * src2 */ |
| 436 | " movq 56(%0), %%rdx;" |
| 437 | " mulxq 32(%1), %%r8, %%r9;" |
| 438 | " xor %%r10d, %%r10d;" |
| 439 | " adcxq 88(%2), %%r8;" |
| 440 | " movq %%r8, 88(%2);" |
| 441 | " mulxq 40(%1), %%r10, %%r11;" |
| 442 | " adox %%r9, %%r10;" |
| 443 | " adcx %%rbx, %%r10;" |
| 444 | " movq %%r10, 96(%2);" |
| 445 | " mulxq 48(%1), %%rbx, %%r13;" |
| 446 | " adox %%r11, %%rbx;" |
| 447 | " adcx %%r14, %%rbx;" |
| 448 | " movq %%rbx, 104(%2);" |
| 449 | " mov $0, %%r8;" |
| 450 | " mulxq 56(%1), %%r14, %%rdx;" |
| 451 | " adox %%r13, %%r14;" |
| 452 | " adcx %%rax, %%r14;" |
| 453 | " movq %%r14, 112(%2);" |
| 454 | " mov $0, %%rax;" |
| 455 | " adox %%rdx, %%rax;" |
| 456 | " adcx %%r8, %%rax;" |
| 457 | " movq %%rax, 120(%2);" |
| 458 | |
| 459 | /* Line up pointers */ |
| 460 | " mov %2, %0;" |
| 461 | " mov %3, %2;" |
| 462 | |
| 463 | /* Wrap the results back into the field */ |
| 464 | |
| 465 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 466 | " mov $38, %%rdx;" |
| 467 | " mulxq 32(%0), %%r8, %%r13;" |
| 468 | " xor %k1, %k1;" |
| 469 | " adoxq 0(%0), %%r8;" |
| 470 | " mulxq 40(%0), %%r9, %%rbx;" |
| 471 | " adcx %%r13, %%r9;" |
| 472 | " adoxq 8(%0), %%r9;" |
| 473 | " mulxq 48(%0), %%r10, %%r13;" |
| 474 | " adcx %%rbx, %%r10;" |
| 475 | " adoxq 16(%0), %%r10;" |
| 476 | " mulxq 56(%0), %%r11, %%rax;" |
| 477 | " adcx %%r13, %%r11;" |
| 478 | " adoxq 24(%0), %%r11;" |
| 479 | " adcx %1, %%rax;" |
| 480 | " adox %1, %%rax;" |
| 481 | " imul %%rdx, %%rax;" |
| 482 | |
| 483 | /* Step 2: Fold the carry back into dst */ |
| 484 | " add %%rax, %%r8;" |
| 485 | " adcx %1, %%r9;" |
| 486 | " movq %%r9, 8(%2);" |
| 487 | " adcx %1, %%r10;" |
| 488 | " movq %%r10, 16(%2);" |
| 489 | " adcx %1, %%r11;" |
| 490 | " movq %%r11, 24(%2);" |
| 491 | |
| 492 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 493 | " mov $0, %%rax;" |
| 494 | " cmovc %%rdx, %%rax;" |
| 495 | " add %%rax, %%r8;" |
| 496 | " movq %%r8, 0(%2);" |
| 497 | |
| 498 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 499 | " mov $38, %%rdx;" |
| 500 | " mulxq 96(%0), %%r8, %%r13;" |
| 501 | " xor %k1, %k1;" |
| 502 | " adoxq 64(%0), %%r8;" |
| 503 | " mulxq 104(%0), %%r9, %%rbx;" |
| 504 | " adcx %%r13, %%r9;" |
| 505 | " adoxq 72(%0), %%r9;" |
| 506 | " mulxq 112(%0), %%r10, %%r13;" |
| 507 | " adcx %%rbx, %%r10;" |
| 508 | " adoxq 80(%0), %%r10;" |
| 509 | " mulxq 120(%0), %%r11, %%rax;" |
| 510 | " adcx %%r13, %%r11;" |
| 511 | " adoxq 88(%0), %%r11;" |
| 512 | " adcx %1, %%rax;" |
| 513 | " adox %1, %%rax;" |
| 514 | " imul %%rdx, %%rax;" |
| 515 | |
| 516 | /* Step 2: Fold the carry back into dst */ |
| 517 | " add %%rax, %%r8;" |
| 518 | " adcx %1, %%r9;" |
| 519 | " movq %%r9, 40(%2);" |
| 520 | " adcx %1, %%r10;" |
| 521 | " movq %%r10, 48(%2);" |
| 522 | " adcx %1, %%r11;" |
| 523 | " movq %%r11, 56(%2);" |
| 524 | |
| 525 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 526 | " mov $0, %%rax;" |
| 527 | " cmovc %%rdx, %%rax;" |
| 528 | " add %%rax, %%r8;" |
| 529 | " movq %%r8, 32(%2);" |
| 530 | : "+&r" (f1), "+&r" (f2), "+&r" (tmp) |
| 531 | : "r" (out) |
| 532 | : "%rax" , "%rbx" , "%rdx" , "%r8" , "%r9" , "%r10" , "%r11" , "%r13" , |
| 533 | "%r14" , "memory" , "cc" ); |
| 534 | } |
| 535 | |
| 536 | /* Computes the field multiplication of four-element f1 with value in f2 |
| 537 | * Requires f2 to be smaller than 2^17 */ |
| 538 | static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) |
| 539 | { |
| 540 | register u64 f2_r asm("rdx" ) = f2; |
| 541 | |
| 542 | asm volatile( |
| 543 | /* Compute the raw multiplication of f1*f2 */ |
| 544 | " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ |
| 545 | " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ |
| 546 | " add %%rcx, %%r9;" |
| 547 | " mov $0, %%rcx;" |
| 548 | " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ |
| 549 | " adcx %%rbx, %%r10;" |
| 550 | " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ |
| 551 | " adcx %%r13, %%r11;" |
| 552 | " adcx %%rcx, %%rax;" |
| 553 | |
| 554 | /* Wrap the result back into the field */ |
| 555 | |
| 556 | /* Step 1: Compute carry*38 */ |
| 557 | " mov $38, %%rdx;" |
| 558 | " imul %%rdx, %%rax;" |
| 559 | |
| 560 | /* Step 2: Fold the carry back into dst */ |
| 561 | " add %%rax, %%r8;" |
| 562 | " adcx %%rcx, %%r9;" |
| 563 | " movq %%r9, 8(%1);" |
| 564 | " adcx %%rcx, %%r10;" |
| 565 | " movq %%r10, 16(%1);" |
| 566 | " adcx %%rcx, %%r11;" |
| 567 | " movq %%r11, 24(%1);" |
| 568 | |
| 569 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 570 | " mov $0, %%rax;" |
| 571 | " cmovc %%rdx, %%rax;" |
| 572 | " add %%rax, %%r8;" |
| 573 | " movq %%r8, 0(%1);" |
| 574 | : "+&r" (f2_r) |
| 575 | : "r" (out), "r" (f1) |
| 576 | : "%rax" , "%rbx" , "%rcx" , "%r8" , "%r9" , "%r10" , "%r11" , "%r13" , |
| 577 | "memory" , "cc" ); |
| 578 | } |
| 579 | |
| 580 | /* Computes p1 <- bit ? p2 : p1 in constant time */ |
| 581 | static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) |
| 582 | { |
| 583 | asm volatile( |
| 584 | /* Transfer bit into CF flag */ |
| 585 | " add $18446744073709551615, %0;" |
| 586 | |
| 587 | /* cswap p1[0], p2[0] */ |
| 588 | " movq 0(%1), %%r8;" |
| 589 | " movq 0(%2), %%r9;" |
| 590 | " mov %%r8, %%r10;" |
| 591 | " cmovc %%r9, %%r8;" |
| 592 | " cmovc %%r10, %%r9;" |
| 593 | " movq %%r8, 0(%1);" |
| 594 | " movq %%r9, 0(%2);" |
| 595 | |
| 596 | /* cswap p1[1], p2[1] */ |
| 597 | " movq 8(%1), %%r8;" |
| 598 | " movq 8(%2), %%r9;" |
| 599 | " mov %%r8, %%r10;" |
| 600 | " cmovc %%r9, %%r8;" |
| 601 | " cmovc %%r10, %%r9;" |
| 602 | " movq %%r8, 8(%1);" |
| 603 | " movq %%r9, 8(%2);" |
| 604 | |
| 605 | /* cswap p1[2], p2[2] */ |
| 606 | " movq 16(%1), %%r8;" |
| 607 | " movq 16(%2), %%r9;" |
| 608 | " mov %%r8, %%r10;" |
| 609 | " cmovc %%r9, %%r8;" |
| 610 | " cmovc %%r10, %%r9;" |
| 611 | " movq %%r8, 16(%1);" |
| 612 | " movq %%r9, 16(%2);" |
| 613 | |
| 614 | /* cswap p1[3], p2[3] */ |
| 615 | " movq 24(%1), %%r8;" |
| 616 | " movq 24(%2), %%r9;" |
| 617 | " mov %%r8, %%r10;" |
| 618 | " cmovc %%r9, %%r8;" |
| 619 | " cmovc %%r10, %%r9;" |
| 620 | " movq %%r8, 24(%1);" |
| 621 | " movq %%r9, 24(%2);" |
| 622 | |
| 623 | /* cswap p1[4], p2[4] */ |
| 624 | " movq 32(%1), %%r8;" |
| 625 | " movq 32(%2), %%r9;" |
| 626 | " mov %%r8, %%r10;" |
| 627 | " cmovc %%r9, %%r8;" |
| 628 | " cmovc %%r10, %%r9;" |
| 629 | " movq %%r8, 32(%1);" |
| 630 | " movq %%r9, 32(%2);" |
| 631 | |
| 632 | /* cswap p1[5], p2[5] */ |
| 633 | " movq 40(%1), %%r8;" |
| 634 | " movq 40(%2), %%r9;" |
| 635 | " mov %%r8, %%r10;" |
| 636 | " cmovc %%r9, %%r8;" |
| 637 | " cmovc %%r10, %%r9;" |
| 638 | " movq %%r8, 40(%1);" |
| 639 | " movq %%r9, 40(%2);" |
| 640 | |
| 641 | /* cswap p1[6], p2[6] */ |
| 642 | " movq 48(%1), %%r8;" |
| 643 | " movq 48(%2), %%r9;" |
| 644 | " mov %%r8, %%r10;" |
| 645 | " cmovc %%r9, %%r8;" |
| 646 | " cmovc %%r10, %%r9;" |
| 647 | " movq %%r8, 48(%1);" |
| 648 | " movq %%r9, 48(%2);" |
| 649 | |
| 650 | /* cswap p1[7], p2[7] */ |
| 651 | " movq 56(%1), %%r8;" |
| 652 | " movq 56(%2), %%r9;" |
| 653 | " mov %%r8, %%r10;" |
| 654 | " cmovc %%r9, %%r8;" |
| 655 | " cmovc %%r10, %%r9;" |
| 656 | " movq %%r8, 56(%1);" |
| 657 | " movq %%r9, 56(%2);" |
| 658 | : "+&r" (bit) |
| 659 | : "r" (p1), "r" (p2) |
| 660 | : "%r8" , "%r9" , "%r10" , "memory" , "cc" ); |
| 661 | } |
| 662 | |
| 663 | /* Computes the square of a field element: out <- f * f |
| 664 | * Uses the 8-element buffer tmp for intermediate results */ |
| 665 | static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) |
| 666 | { |
| 667 | asm volatile( |
| 668 | /* Compute the raw multiplication: tmp <- f * f */ |
| 669 | |
| 670 | /* Step 1: Compute all partial products */ |
| 671 | " movq 0(%0), %%rdx;" /* f[0] */ |
| 672 | " mulxq 8(%0), %%r8, %%r14;" |
| 673 | " xor %%r15d, %%r15d;" /* f[1]*f[0] */ |
| 674 | " mulxq 16(%0), %%r9, %%r10;" |
| 675 | " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| 676 | " mulxq 24(%0), %%rax, %%rcx;" |
| 677 | " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| 678 | " movq 24(%0), %%rdx;" /* f[3] */ |
| 679 | " mulxq 8(%0), %%r11, %%rbx;" |
| 680 | " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| 681 | " mulxq 16(%0), %%rax, %%r13;" |
| 682 | " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| 683 | " movq 8(%0), %%rdx;" |
| 684 | " adcx %%r15, %%r13;" /* f1 */ |
| 685 | " mulxq 16(%0), %%rax, %%rcx;" |
| 686 | " mov $0, %%r14;" /* f[2]*f[1] */ |
| 687 | |
| 688 | /* Step 2: Compute two parallel carry chains */ |
| 689 | " xor %%r15d, %%r15d;" |
| 690 | " adox %%rax, %%r10;" |
| 691 | " adcx %%r8, %%r8;" |
| 692 | " adox %%rcx, %%r11;" |
| 693 | " adcx %%r9, %%r9;" |
| 694 | " adox %%r15, %%rbx;" |
| 695 | " adcx %%r10, %%r10;" |
| 696 | " adox %%r15, %%r13;" |
| 697 | " adcx %%r11, %%r11;" |
| 698 | " adox %%r15, %%r14;" |
| 699 | " adcx %%rbx, %%rbx;" |
| 700 | " adcx %%r13, %%r13;" |
| 701 | " adcx %%r14, %%r14;" |
| 702 | |
| 703 | /* Step 3: Compute intermediate squares */ |
| 704 | " movq 0(%0), %%rdx;" |
| 705 | " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| 706 | " movq %%rax, 0(%1);" |
| 707 | " add %%rcx, %%r8;" |
| 708 | " movq %%r8, 8(%1);" |
| 709 | " movq 8(%0), %%rdx;" |
| 710 | " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| 711 | " adcx %%rax, %%r9;" |
| 712 | " movq %%r9, 16(%1);" |
| 713 | " adcx %%rcx, %%r10;" |
| 714 | " movq %%r10, 24(%1);" |
| 715 | " movq 16(%0), %%rdx;" |
| 716 | " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| 717 | " adcx %%rax, %%r11;" |
| 718 | " movq %%r11, 32(%1);" |
| 719 | " adcx %%rcx, %%rbx;" |
| 720 | " movq %%rbx, 40(%1);" |
| 721 | " movq 24(%0), %%rdx;" |
| 722 | " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| 723 | " adcx %%rax, %%r13;" |
| 724 | " movq %%r13, 48(%1);" |
| 725 | " adcx %%rcx, %%r14;" |
| 726 | " movq %%r14, 56(%1);" |
| 727 | |
| 728 | /* Line up pointers */ |
| 729 | " mov %1, %0;" |
| 730 | " mov %2, %1;" |
| 731 | |
| 732 | /* Wrap the result back into the field */ |
| 733 | |
| 734 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 735 | " mov $38, %%rdx;" |
| 736 | " mulxq 32(%0), %%r8, %%r13;" |
| 737 | " xor %%ecx, %%ecx;" |
| 738 | " adoxq 0(%0), %%r8;" |
| 739 | " mulxq 40(%0), %%r9, %%rbx;" |
| 740 | " adcx %%r13, %%r9;" |
| 741 | " adoxq 8(%0), %%r9;" |
| 742 | " mulxq 48(%0), %%r10, %%r13;" |
| 743 | " adcx %%rbx, %%r10;" |
| 744 | " adoxq 16(%0), %%r10;" |
| 745 | " mulxq 56(%0), %%r11, %%rax;" |
| 746 | " adcx %%r13, %%r11;" |
| 747 | " adoxq 24(%0), %%r11;" |
| 748 | " adcx %%rcx, %%rax;" |
| 749 | " adox %%rcx, %%rax;" |
| 750 | " imul %%rdx, %%rax;" |
| 751 | |
| 752 | /* Step 2: Fold the carry back into dst */ |
| 753 | " add %%rax, %%r8;" |
| 754 | " adcx %%rcx, %%r9;" |
| 755 | " movq %%r9, 8(%1);" |
| 756 | " adcx %%rcx, %%r10;" |
| 757 | " movq %%r10, 16(%1);" |
| 758 | " adcx %%rcx, %%r11;" |
| 759 | " movq %%r11, 24(%1);" |
| 760 | |
| 761 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 762 | " mov $0, %%rax;" |
| 763 | " cmovc %%rdx, %%rax;" |
| 764 | " add %%rax, %%r8;" |
| 765 | " movq %%r8, 0(%1);" |
| 766 | : "+&r" (f), "+&r" (tmp) |
| 767 | : "r" (out) |
| 768 | : "%rax" , "%rbx" , "%rcx" , "%rdx" , "%r8" , "%r9" , "%r10" , "%r11" , |
| 769 | "%r13" , "%r14" , "%r15" , "memory" , "cc" ); |
| 770 | } |
| 771 | |
| 772 | /* Computes two field squarings: |
| 773 | * out[0] <- f[0] * f[0] |
| 774 | * out[1] <- f[1] * f[1] |
| 775 | * Uses the 16-element buffer tmp for intermediate results */ |
| 776 | static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) |
| 777 | { |
| 778 | asm volatile( |
| 779 | /* Step 1: Compute all partial products */ |
| 780 | " movq 0(%0), %%rdx;" /* f[0] */ |
| 781 | " mulxq 8(%0), %%r8, %%r14;" |
| 782 | " xor %%r15d, %%r15d;" /* f[1]*f[0] */ |
| 783 | " mulxq 16(%0), %%r9, %%r10;" |
| 784 | " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| 785 | " mulxq 24(%0), %%rax, %%rcx;" |
| 786 | " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| 787 | " movq 24(%0), %%rdx;" /* f[3] */ |
| 788 | " mulxq 8(%0), %%r11, %%rbx;" |
| 789 | " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| 790 | " mulxq 16(%0), %%rax, %%r13;" |
| 791 | " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| 792 | " movq 8(%0), %%rdx;" |
| 793 | " adcx %%r15, %%r13;" /* f1 */ |
| 794 | " mulxq 16(%0), %%rax, %%rcx;" |
| 795 | " mov $0, %%r14;" /* f[2]*f[1] */ |
| 796 | |
| 797 | /* Step 2: Compute two parallel carry chains */ |
| 798 | " xor %%r15d, %%r15d;" |
| 799 | " adox %%rax, %%r10;" |
| 800 | " adcx %%r8, %%r8;" |
| 801 | " adox %%rcx, %%r11;" |
| 802 | " adcx %%r9, %%r9;" |
| 803 | " adox %%r15, %%rbx;" |
| 804 | " adcx %%r10, %%r10;" |
| 805 | " adox %%r15, %%r13;" |
| 806 | " adcx %%r11, %%r11;" |
| 807 | " adox %%r15, %%r14;" |
| 808 | " adcx %%rbx, %%rbx;" |
| 809 | " adcx %%r13, %%r13;" |
| 810 | " adcx %%r14, %%r14;" |
| 811 | |
| 812 | /* Step 3: Compute intermediate squares */ |
| 813 | " movq 0(%0), %%rdx;" |
| 814 | " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| 815 | " movq %%rax, 0(%1);" |
| 816 | " add %%rcx, %%r8;" |
| 817 | " movq %%r8, 8(%1);" |
| 818 | " movq 8(%0), %%rdx;" |
| 819 | " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| 820 | " adcx %%rax, %%r9;" |
| 821 | " movq %%r9, 16(%1);" |
| 822 | " adcx %%rcx, %%r10;" |
| 823 | " movq %%r10, 24(%1);" |
| 824 | " movq 16(%0), %%rdx;" |
| 825 | " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| 826 | " adcx %%rax, %%r11;" |
| 827 | " movq %%r11, 32(%1);" |
| 828 | " adcx %%rcx, %%rbx;" |
| 829 | " movq %%rbx, 40(%1);" |
| 830 | " movq 24(%0), %%rdx;" |
| 831 | " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| 832 | " adcx %%rax, %%r13;" |
| 833 | " movq %%r13, 48(%1);" |
| 834 | " adcx %%rcx, %%r14;" |
| 835 | " movq %%r14, 56(%1);" |
| 836 | |
| 837 | /* Step 1: Compute all partial products */ |
| 838 | " movq 32(%0), %%rdx;" /* f[0] */ |
| 839 | " mulxq 40(%0), %%r8, %%r14;" |
| 840 | " xor %%r15d, %%r15d;" /* f[1]*f[0] */ |
| 841 | " mulxq 48(%0), %%r9, %%r10;" |
| 842 | " adcx %%r14, %%r9;" /* f[2]*f[0] */ |
| 843 | " mulxq 56(%0), %%rax, %%rcx;" |
| 844 | " adcx %%rax, %%r10;" /* f[3]*f[0] */ |
| 845 | " movq 56(%0), %%rdx;" /* f[3] */ |
| 846 | " mulxq 40(%0), %%r11, %%rbx;" |
| 847 | " adcx %%rcx, %%r11;" /* f[1]*f[3] */ |
| 848 | " mulxq 48(%0), %%rax, %%r13;" |
| 849 | " adcx %%rax, %%rbx;" /* f[2]*f[3] */ |
| 850 | " movq 40(%0), %%rdx;" |
| 851 | " adcx %%r15, %%r13;" /* f1 */ |
| 852 | " mulxq 48(%0), %%rax, %%rcx;" |
| 853 | " mov $0, %%r14;" /* f[2]*f[1] */ |
| 854 | |
| 855 | /* Step 2: Compute two parallel carry chains */ |
| 856 | " xor %%r15d, %%r15d;" |
| 857 | " adox %%rax, %%r10;" |
| 858 | " adcx %%r8, %%r8;" |
| 859 | " adox %%rcx, %%r11;" |
| 860 | " adcx %%r9, %%r9;" |
| 861 | " adox %%r15, %%rbx;" |
| 862 | " adcx %%r10, %%r10;" |
| 863 | " adox %%r15, %%r13;" |
| 864 | " adcx %%r11, %%r11;" |
| 865 | " adox %%r15, %%r14;" |
| 866 | " adcx %%rbx, %%rbx;" |
| 867 | " adcx %%r13, %%r13;" |
| 868 | " adcx %%r14, %%r14;" |
| 869 | |
| 870 | /* Step 3: Compute intermediate squares */ |
| 871 | " movq 32(%0), %%rdx;" |
| 872 | " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ |
| 873 | " movq %%rax, 64(%1);" |
| 874 | " add %%rcx, %%r8;" |
| 875 | " movq %%r8, 72(%1);" |
| 876 | " movq 40(%0), %%rdx;" |
| 877 | " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ |
| 878 | " adcx %%rax, %%r9;" |
| 879 | " movq %%r9, 80(%1);" |
| 880 | " adcx %%rcx, %%r10;" |
| 881 | " movq %%r10, 88(%1);" |
| 882 | " movq 48(%0), %%rdx;" |
| 883 | " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ |
| 884 | " adcx %%rax, %%r11;" |
| 885 | " movq %%r11, 96(%1);" |
| 886 | " adcx %%rcx, %%rbx;" |
| 887 | " movq %%rbx, 104(%1);" |
| 888 | " movq 56(%0), %%rdx;" |
| 889 | " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ |
| 890 | " adcx %%rax, %%r13;" |
| 891 | " movq %%r13, 112(%1);" |
| 892 | " adcx %%rcx, %%r14;" |
| 893 | " movq %%r14, 120(%1);" |
| 894 | |
| 895 | /* Line up pointers */ |
| 896 | " mov %1, %0;" |
| 897 | " mov %2, %1;" |
| 898 | |
| 899 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 900 | " mov $38, %%rdx;" |
| 901 | " mulxq 32(%0), %%r8, %%r13;" |
| 902 | " xor %%ecx, %%ecx;" |
| 903 | " adoxq 0(%0), %%r8;" |
| 904 | " mulxq 40(%0), %%r9, %%rbx;" |
| 905 | " adcx %%r13, %%r9;" |
| 906 | " adoxq 8(%0), %%r9;" |
| 907 | " mulxq 48(%0), %%r10, %%r13;" |
| 908 | " adcx %%rbx, %%r10;" |
| 909 | " adoxq 16(%0), %%r10;" |
| 910 | " mulxq 56(%0), %%r11, %%rax;" |
| 911 | " adcx %%r13, %%r11;" |
| 912 | " adoxq 24(%0), %%r11;" |
| 913 | " adcx %%rcx, %%rax;" |
| 914 | " adox %%rcx, %%rax;" |
| 915 | " imul %%rdx, %%rax;" |
| 916 | |
| 917 | /* Step 2: Fold the carry back into dst */ |
| 918 | " add %%rax, %%r8;" |
| 919 | " adcx %%rcx, %%r9;" |
| 920 | " movq %%r9, 8(%1);" |
| 921 | " adcx %%rcx, %%r10;" |
| 922 | " movq %%r10, 16(%1);" |
| 923 | " adcx %%rcx, %%r11;" |
| 924 | " movq %%r11, 24(%1);" |
| 925 | |
| 926 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 927 | " mov $0, %%rax;" |
| 928 | " cmovc %%rdx, %%rax;" |
| 929 | " add %%rax, %%r8;" |
| 930 | " movq %%r8, 0(%1);" |
| 931 | |
| 932 | /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ |
| 933 | " mov $38, %%rdx;" |
| 934 | " mulxq 96(%0), %%r8, %%r13;" |
| 935 | " xor %%ecx, %%ecx;" |
| 936 | " adoxq 64(%0), %%r8;" |
| 937 | " mulxq 104(%0), %%r9, %%rbx;" |
| 938 | " adcx %%r13, %%r9;" |
| 939 | " adoxq 72(%0), %%r9;" |
| 940 | " mulxq 112(%0), %%r10, %%r13;" |
| 941 | " adcx %%rbx, %%r10;" |
| 942 | " adoxq 80(%0), %%r10;" |
| 943 | " mulxq 120(%0), %%r11, %%rax;" |
| 944 | " adcx %%r13, %%r11;" |
| 945 | " adoxq 88(%0), %%r11;" |
| 946 | " adcx %%rcx, %%rax;" |
| 947 | " adox %%rcx, %%rax;" |
| 948 | " imul %%rdx, %%rax;" |
| 949 | |
| 950 | /* Step 2: Fold the carry back into dst */ |
| 951 | " add %%rax, %%r8;" |
| 952 | " adcx %%rcx, %%r9;" |
| 953 | " movq %%r9, 40(%1);" |
| 954 | " adcx %%rcx, %%r10;" |
| 955 | " movq %%r10, 48(%1);" |
| 956 | " adcx %%rcx, %%r11;" |
| 957 | " movq %%r11, 56(%1);" |
| 958 | |
| 959 | /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ |
| 960 | " mov $0, %%rax;" |
| 961 | " cmovc %%rdx, %%rax;" |
| 962 | " add %%rax, %%r8;" |
| 963 | " movq %%r8, 32(%1);" |
| 964 | : "+&r" (f), "+&r" (tmp) |
| 965 | : "r" (out) |
| 966 | : "%rax" , "%rbx" , "%rcx" , "%rdx" , "%r8" , "%r9" , "%r10" , "%r11" , |
| 967 | "%r13" , "%r14" , "%r15" , "memory" , "cc" ); |
| 968 | } |
| 969 | |
| 970 | static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) |
| 971 | { |
| 972 | u64 *nq = p01_tmp1; |
| 973 | u64 *nq_p1 = p01_tmp1 + (u32)8U; |
| 974 | u64 *tmp1 = p01_tmp1 + (u32)16U; |
| 975 | u64 *x1 = q; |
| 976 | u64 *x2 = nq; |
| 977 | u64 *z2 = nq + (u32)4U; |
| 978 | u64 *z3 = nq_p1 + (u32)4U; |
| 979 | u64 *a = tmp1; |
| 980 | u64 *b = tmp1 + (u32)4U; |
| 981 | u64 *ab = tmp1; |
| 982 | u64 *dc = tmp1 + (u32)8U; |
| 983 | u64 *x3; |
| 984 | u64 *z31; |
| 985 | u64 *d0; |
| 986 | u64 *c0; |
| 987 | u64 *a1; |
| 988 | u64 *b1; |
| 989 | u64 *d; |
| 990 | u64 *c; |
| 991 | u64 *ab1; |
| 992 | u64 *dc1; |
| 993 | fadd(out: a, f1: x2, f2: z2); |
| 994 | fsub(out: b, f1: x2, f2: z2); |
| 995 | x3 = nq_p1; |
| 996 | z31 = nq_p1 + (u32)4U; |
| 997 | d0 = dc; |
| 998 | c0 = dc + (u32)4U; |
| 999 | fadd(out: c0, f1: x3, f2: z31); |
| 1000 | fsub(out: d0, f1: x3, f2: z31); |
| 1001 | fmul2(out: dc, f1: dc, f2: ab, tmp: tmp2); |
| 1002 | fadd(out: x3, f1: d0, f2: c0); |
| 1003 | fsub(out: z31, f1: d0, f2: c0); |
| 1004 | a1 = tmp1; |
| 1005 | b1 = tmp1 + (u32)4U; |
| 1006 | d = tmp1 + (u32)8U; |
| 1007 | c = tmp1 + (u32)12U; |
| 1008 | ab1 = tmp1; |
| 1009 | dc1 = tmp1 + (u32)8U; |
| 1010 | fsqr2(out: dc1, f: ab1, tmp: tmp2); |
| 1011 | fsqr2(out: nq_p1, f: nq_p1, tmp: tmp2); |
| 1012 | a1[0U] = c[0U]; |
| 1013 | a1[1U] = c[1U]; |
| 1014 | a1[2U] = c[2U]; |
| 1015 | a1[3U] = c[3U]; |
| 1016 | fsub(out: c, f1: d, f2: c); |
| 1017 | fmul_scalar(out: b1, f1: c, f2: (u64)121665U); |
| 1018 | fadd(out: b1, f1: b1, f2: d); |
| 1019 | fmul2(out: nq, f1: dc1, f2: ab1, tmp: tmp2); |
| 1020 | fmul(out: z3, f1: z3, f2: x1, tmp: tmp2); |
| 1021 | } |
| 1022 | |
| 1023 | static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) |
| 1024 | { |
| 1025 | u64 *x2 = nq; |
| 1026 | u64 *z2 = nq + (u32)4U; |
| 1027 | u64 *a = tmp1; |
| 1028 | u64 *b = tmp1 + (u32)4U; |
| 1029 | u64 *d = tmp1 + (u32)8U; |
| 1030 | u64 *c = tmp1 + (u32)12U; |
| 1031 | u64 *ab = tmp1; |
| 1032 | u64 *dc = tmp1 + (u32)8U; |
| 1033 | fadd(out: a, f1: x2, f2: z2); |
| 1034 | fsub(out: b, f1: x2, f2: z2); |
| 1035 | fsqr2(out: dc, f: ab, tmp: tmp2); |
| 1036 | a[0U] = c[0U]; |
| 1037 | a[1U] = c[1U]; |
| 1038 | a[2U] = c[2U]; |
| 1039 | a[3U] = c[3U]; |
| 1040 | fsub(out: c, f1: d, f2: c); |
| 1041 | fmul_scalar(out: b, f1: c, f2: (u64)121665U); |
| 1042 | fadd(out: b, f1: b, f2: d); |
| 1043 | fmul2(out: nq, f1: dc, f2: ab, tmp: tmp2); |
| 1044 | } |
| 1045 | |
| 1046 | static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) |
| 1047 | { |
| 1048 | u64 tmp2[16U] = { 0U }; |
| 1049 | u64 p01_tmp1_swap[33U] = { 0U }; |
| 1050 | u64 *p0 = p01_tmp1_swap; |
| 1051 | u64 *p01 = p01_tmp1_swap; |
| 1052 | u64 *p03 = p01; |
| 1053 | u64 *p11 = p01 + (u32)8U; |
| 1054 | u64 *x0; |
| 1055 | u64 *z0; |
| 1056 | u64 *p01_tmp1; |
| 1057 | u64 *p01_tmp11; |
| 1058 | u64 *nq10; |
| 1059 | u64 *nq_p11; |
| 1060 | u64 *swap1; |
| 1061 | u64 sw0; |
| 1062 | u64 *nq1; |
| 1063 | u64 *tmp1; |
| 1064 | memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); |
| 1065 | x0 = p03; |
| 1066 | z0 = p03 + (u32)4U; |
| 1067 | x0[0U] = (u64)1U; |
| 1068 | x0[1U] = (u64)0U; |
| 1069 | x0[2U] = (u64)0U; |
| 1070 | x0[3U] = (u64)0U; |
| 1071 | z0[0U] = (u64)0U; |
| 1072 | z0[1U] = (u64)0U; |
| 1073 | z0[2U] = (u64)0U; |
| 1074 | z0[3U] = (u64)0U; |
| 1075 | p01_tmp1 = p01_tmp1_swap; |
| 1076 | p01_tmp11 = p01_tmp1_swap; |
| 1077 | nq10 = p01_tmp1_swap; |
| 1078 | nq_p11 = p01_tmp1_swap + (u32)8U; |
| 1079 | swap1 = p01_tmp1_swap + (u32)32U; |
| 1080 | cswap2(bit: (u64)1U, p1: nq10, p2: nq_p11); |
| 1081 | point_add_and_double(q: init1, p01_tmp1: p01_tmp11, tmp2); |
| 1082 | swap1[0U] = (u64)1U; |
| 1083 | { |
| 1084 | u32 i; |
| 1085 | for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { |
| 1086 | u64 *p01_tmp12 = p01_tmp1_swap; |
| 1087 | u64 *swap2 = p01_tmp1_swap + (u32)32U; |
| 1088 | u64 *nq2 = p01_tmp12; |
| 1089 | u64 *nq_p12 = p01_tmp12 + (u32)8U; |
| 1090 | u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); |
| 1091 | u64 sw = swap2[0U] ^ bit; |
| 1092 | cswap2(bit: sw, p1: nq2, p2: nq_p12); |
| 1093 | point_add_and_double(q: init1, p01_tmp1: p01_tmp12, tmp2); |
| 1094 | swap2[0U] = bit; |
| 1095 | } |
| 1096 | } |
| 1097 | sw0 = swap1[0U]; |
| 1098 | cswap2(bit: sw0, p1: nq10, p2: nq_p11); |
| 1099 | nq1 = p01_tmp1; |
| 1100 | tmp1 = p01_tmp1 + (u32)16U; |
| 1101 | point_double(nq: nq1, tmp1, tmp2); |
| 1102 | point_double(nq: nq1, tmp1, tmp2); |
| 1103 | point_double(nq: nq1, tmp1, tmp2); |
| 1104 | memcpy(out, p0, (u32)8U * sizeof(p0[0U])); |
| 1105 | |
| 1106 | memzero_explicit(s: tmp2, count: sizeof(tmp2)); |
| 1107 | memzero_explicit(s: p01_tmp1_swap, count: sizeof(p01_tmp1_swap)); |
| 1108 | } |
| 1109 | |
| 1110 | static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) |
| 1111 | { |
| 1112 | u32 i; |
| 1113 | fsqr(out: o, f: inp, tmp); |
| 1114 | for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) |
| 1115 | fsqr(out: o, f: o, tmp); |
| 1116 | } |
| 1117 | |
| 1118 | static void finv(u64 *o, const u64 *i, u64 *tmp) |
| 1119 | { |
| 1120 | u64 t1[16U] = { 0U }; |
| 1121 | u64 *a0 = t1; |
| 1122 | u64 *b = t1 + (u32)4U; |
| 1123 | u64 *c = t1 + (u32)8U; |
| 1124 | u64 *t00 = t1 + (u32)12U; |
| 1125 | u64 *tmp1 = tmp; |
| 1126 | u64 *a; |
| 1127 | u64 *t0; |
| 1128 | fsquare_times(o: a0, inp: i, tmp: tmp1, n1: (u32)1U); |
| 1129 | fsquare_times(o: t00, inp: a0, tmp: tmp1, n1: (u32)2U); |
| 1130 | fmul(out: b, f1: t00, f2: i, tmp); |
| 1131 | fmul(out: a0, f1: b, f2: a0, tmp); |
| 1132 | fsquare_times(o: t00, inp: a0, tmp: tmp1, n1: (u32)1U); |
| 1133 | fmul(out: b, f1: t00, f2: b, tmp); |
| 1134 | fsquare_times(o: t00, inp: b, tmp: tmp1, n1: (u32)5U); |
| 1135 | fmul(out: b, f1: t00, f2: b, tmp); |
| 1136 | fsquare_times(o: t00, inp: b, tmp: tmp1, n1: (u32)10U); |
| 1137 | fmul(out: c, f1: t00, f2: b, tmp); |
| 1138 | fsquare_times(o: t00, inp: c, tmp: tmp1, n1: (u32)20U); |
| 1139 | fmul(out: t00, f1: t00, f2: c, tmp); |
| 1140 | fsquare_times(o: t00, inp: t00, tmp: tmp1, n1: (u32)10U); |
| 1141 | fmul(out: b, f1: t00, f2: b, tmp); |
| 1142 | fsquare_times(o: t00, inp: b, tmp: tmp1, n1: (u32)50U); |
| 1143 | fmul(out: c, f1: t00, f2: b, tmp); |
| 1144 | fsquare_times(o: t00, inp: c, tmp: tmp1, n1: (u32)100U); |
| 1145 | fmul(out: t00, f1: t00, f2: c, tmp); |
| 1146 | fsquare_times(o: t00, inp: t00, tmp: tmp1, n1: (u32)50U); |
| 1147 | fmul(out: t00, f1: t00, f2: b, tmp); |
| 1148 | fsquare_times(o: t00, inp: t00, tmp: tmp1, n1: (u32)5U); |
| 1149 | a = t1; |
| 1150 | t0 = t1 + (u32)12U; |
| 1151 | fmul(out: o, f1: t0, f2: a, tmp); |
| 1152 | } |
| 1153 | |
| 1154 | static void store_felem(u64 *b, u64 *f) |
| 1155 | { |
| 1156 | u64 f30 = f[3U]; |
| 1157 | u64 top_bit0 = f30 >> (u32)63U; |
| 1158 | u64 f31; |
| 1159 | u64 top_bit; |
| 1160 | u64 f0; |
| 1161 | u64 f1; |
| 1162 | u64 f2; |
| 1163 | u64 f3; |
| 1164 | u64 m0; |
| 1165 | u64 m1; |
| 1166 | u64 m2; |
| 1167 | u64 m3; |
| 1168 | u64 mask; |
| 1169 | u64 f0_; |
| 1170 | u64 f1_; |
| 1171 | u64 f2_; |
| 1172 | u64 f3_; |
| 1173 | u64 o0; |
| 1174 | u64 o1; |
| 1175 | u64 o2; |
| 1176 | u64 o3; |
| 1177 | f[3U] = f30 & (u64)0x7fffffffffffffffU; |
| 1178 | add_scalar(out: f, f1: f, f2: (u64)19U * top_bit0); |
| 1179 | f31 = f[3U]; |
| 1180 | top_bit = f31 >> (u32)63U; |
| 1181 | f[3U] = f31 & (u64)0x7fffffffffffffffU; |
| 1182 | add_scalar(out: f, f1: f, f2: (u64)19U * top_bit); |
| 1183 | f0 = f[0U]; |
| 1184 | f1 = f[1U]; |
| 1185 | f2 = f[2U]; |
| 1186 | f3 = f[3U]; |
| 1187 | m0 = gte_mask(a: f0, b: (u64)0xffffffffffffffedU); |
| 1188 | m1 = eq_mask(a: f1, b: (u64)0xffffffffffffffffU); |
| 1189 | m2 = eq_mask(a: f2, b: (u64)0xffffffffffffffffU); |
| 1190 | m3 = eq_mask(a: f3, b: (u64)0x7fffffffffffffffU); |
| 1191 | mask = ((m0 & m1) & m2) & m3; |
| 1192 | f0_ = f0 - (mask & (u64)0xffffffffffffffedU); |
| 1193 | f1_ = f1 - (mask & (u64)0xffffffffffffffffU); |
| 1194 | f2_ = f2 - (mask & (u64)0xffffffffffffffffU); |
| 1195 | f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); |
| 1196 | o0 = f0_; |
| 1197 | o1 = f1_; |
| 1198 | o2 = f2_; |
| 1199 | o3 = f3_; |
| 1200 | b[0U] = o0; |
| 1201 | b[1U] = o1; |
| 1202 | b[2U] = o2; |
| 1203 | b[3U] = o3; |
| 1204 | } |
| 1205 | |
| 1206 | static void encode_point(u8 *o, const u64 *i) |
| 1207 | { |
| 1208 | const u64 *x = i; |
| 1209 | const u64 *z = i + (u32)4U; |
| 1210 | u64 tmp[4U] = { 0U }; |
| 1211 | u64 tmp_w[16U] = { 0U }; |
| 1212 | finv(o: tmp, i: z, tmp: tmp_w); |
| 1213 | fmul(out: tmp, f1: tmp, f2: x, tmp: tmp_w); |
| 1214 | store_felem(b: (u64 *)o, f: tmp); |
| 1215 | } |
| 1216 | |
| 1217 | static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) |
| 1218 | { |
| 1219 | u64 init1[8U] = { 0U }; |
| 1220 | u64 tmp[4U] = { 0U }; |
| 1221 | u64 tmp3; |
| 1222 | u64 *x; |
| 1223 | u64 *z; |
| 1224 | { |
| 1225 | u32 i; |
| 1226 | for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { |
| 1227 | u64 *os = tmp; |
| 1228 | const u8 *bj = pub + i * (u32)8U; |
| 1229 | u64 u = *(u64 *)bj; |
| 1230 | u64 r = u; |
| 1231 | u64 x0 = r; |
| 1232 | os[i] = x0; |
| 1233 | } |
| 1234 | } |
| 1235 | tmp3 = tmp[3U]; |
| 1236 | tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; |
| 1237 | x = init1; |
| 1238 | z = init1 + (u32)4U; |
| 1239 | z[0U] = (u64)1U; |
| 1240 | z[1U] = (u64)0U; |
| 1241 | z[2U] = (u64)0U; |
| 1242 | z[3U] = (u64)0U; |
| 1243 | x[0U] = tmp[0U]; |
| 1244 | x[1U] = tmp[1U]; |
| 1245 | x[2U] = tmp[2U]; |
| 1246 | x[3U] = tmp[3U]; |
| 1247 | montgomery_ladder(out: init1, key: priv, init1); |
| 1248 | encode_point(o: out, i: init1); |
| 1249 | } |
| 1250 | |
| 1251 | /* The below constants were generated using this sage script: |
| 1252 | * |
| 1253 | * #!/usr/bin/env sage |
| 1254 | * import sys |
| 1255 | * from sage.all import * |
| 1256 | * def limbs(n): |
| 1257 | * n = int(n) |
| 1258 | * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) |
| 1259 | * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l |
| 1260 | * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) |
| 1261 | * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] |
| 1262 | * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) |
| 1263 | * print("static const u64 table_ladder[] = {") |
| 1264 | * p = ec.lift_x(9) |
| 1265 | * for i in range(252): |
| 1266 | * l = (p[0] + p[2]) / (p[0] - p[2]) |
| 1267 | * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) |
| 1268 | * p = p * 2 |
| 1269 | * print("};") |
| 1270 | * |
| 1271 | */ |
| 1272 | |
| 1273 | static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; |
| 1274 | |
| 1275 | static const u64 table_ladder[] = { |
| 1276 | 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, |
| 1277 | 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, |
| 1278 | 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, |
| 1279 | 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, |
| 1280 | 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, |
| 1281 | 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, |
| 1282 | 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, |
| 1283 | 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, |
| 1284 | 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, |
| 1285 | 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, |
| 1286 | 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, |
| 1287 | 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, |
| 1288 | 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, |
| 1289 | 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, |
| 1290 | 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, |
| 1291 | 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, |
| 1292 | 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, |
| 1293 | 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, |
| 1294 | 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, |
| 1295 | 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, |
| 1296 | 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, |
| 1297 | 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, |
| 1298 | 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, |
| 1299 | 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, |
| 1300 | 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, |
| 1301 | 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, |
| 1302 | 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, |
| 1303 | 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, |
| 1304 | 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, |
| 1305 | 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, |
| 1306 | 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, |
| 1307 | 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, |
| 1308 | 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, |
| 1309 | 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, |
| 1310 | 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, |
| 1311 | 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, |
| 1312 | 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, |
| 1313 | 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, |
| 1314 | 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, |
| 1315 | 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, |
| 1316 | 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, |
| 1317 | 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, |
| 1318 | 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, |
| 1319 | 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, |
| 1320 | 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, |
| 1321 | 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, |
| 1322 | 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, |
| 1323 | 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, |
| 1324 | 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, |
| 1325 | 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, |
| 1326 | 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, |
| 1327 | 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, |
| 1328 | 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, |
| 1329 | 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, |
| 1330 | 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, |
| 1331 | 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, |
| 1332 | 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, |
| 1333 | 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, |
| 1334 | 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, |
| 1335 | 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, |
| 1336 | 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, |
| 1337 | 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, |
| 1338 | 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, |
| 1339 | 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, |
| 1340 | 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, |
| 1341 | 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, |
| 1342 | 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, |
| 1343 | 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, |
| 1344 | 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, |
| 1345 | 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, |
| 1346 | 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, |
| 1347 | 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, |
| 1348 | 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, |
| 1349 | 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, |
| 1350 | 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, |
| 1351 | 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, |
| 1352 | 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, |
| 1353 | 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, |
| 1354 | 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, |
| 1355 | 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, |
| 1356 | 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, |
| 1357 | 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, |
| 1358 | 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, |
| 1359 | 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, |
| 1360 | 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, |
| 1361 | 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, |
| 1362 | 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, |
| 1363 | 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, |
| 1364 | 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, |
| 1365 | 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, |
| 1366 | 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, |
| 1367 | 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, |
| 1368 | 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, |
| 1369 | 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, |
| 1370 | 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, |
| 1371 | 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, |
| 1372 | 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, |
| 1373 | 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, |
| 1374 | 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, |
| 1375 | 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, |
| 1376 | 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, |
| 1377 | 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, |
| 1378 | 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, |
| 1379 | 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, |
| 1380 | 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, |
| 1381 | 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, |
| 1382 | 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, |
| 1383 | 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, |
| 1384 | 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, |
| 1385 | 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, |
| 1386 | 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, |
| 1387 | 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, |
| 1388 | 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, |
| 1389 | 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, |
| 1390 | 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, |
| 1391 | 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, |
| 1392 | 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, |
| 1393 | 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, |
| 1394 | 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, |
| 1395 | 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, |
| 1396 | 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, |
| 1397 | 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, |
| 1398 | 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, |
| 1399 | 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, |
| 1400 | 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, |
| 1401 | 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, |
| 1402 | 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, |
| 1403 | 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, |
| 1404 | 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, |
| 1405 | 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, |
| 1406 | 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, |
| 1407 | 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, |
| 1408 | 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, |
| 1409 | 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, |
| 1410 | 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, |
| 1411 | 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, |
| 1412 | 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, |
| 1413 | 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, |
| 1414 | 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, |
| 1415 | 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, |
| 1416 | 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, |
| 1417 | 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, |
| 1418 | 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, |
| 1419 | 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, |
| 1420 | 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, |
| 1421 | 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, |
| 1422 | 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, |
| 1423 | 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, |
| 1424 | 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, |
| 1425 | 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, |
| 1426 | 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, |
| 1427 | 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, |
| 1428 | 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, |
| 1429 | 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, |
| 1430 | 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, |
| 1431 | 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, |
| 1432 | 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, |
| 1433 | 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, |
| 1434 | 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, |
| 1435 | 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, |
| 1436 | 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, |
| 1437 | 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, |
| 1438 | 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, |
| 1439 | 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, |
| 1440 | 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, |
| 1441 | 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, |
| 1442 | 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, |
| 1443 | 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, |
| 1444 | 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, |
| 1445 | 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, |
| 1446 | 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, |
| 1447 | 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, |
| 1448 | 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, |
| 1449 | 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, |
| 1450 | 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, |
| 1451 | 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, |
| 1452 | 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, |
| 1453 | 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, |
| 1454 | 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, |
| 1455 | 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, |
| 1456 | 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, |
| 1457 | 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, |
| 1458 | 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, |
| 1459 | 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, |
| 1460 | 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, |
| 1461 | 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, |
| 1462 | 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, |
| 1463 | 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, |
| 1464 | 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, |
| 1465 | 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, |
| 1466 | 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, |
| 1467 | 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, |
| 1468 | 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, |
| 1469 | 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, |
| 1470 | 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, |
| 1471 | 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, |
| 1472 | 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, |
| 1473 | 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, |
| 1474 | 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, |
| 1475 | 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, |
| 1476 | 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, |
| 1477 | 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, |
| 1478 | 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, |
| 1479 | 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, |
| 1480 | 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, |
| 1481 | 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, |
| 1482 | 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, |
| 1483 | 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, |
| 1484 | 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, |
| 1485 | 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, |
| 1486 | 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, |
| 1487 | 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, |
| 1488 | 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, |
| 1489 | 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, |
| 1490 | 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, |
| 1491 | 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, |
| 1492 | 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, |
| 1493 | 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, |
| 1494 | 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, |
| 1495 | 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, |
| 1496 | 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, |
| 1497 | 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, |
| 1498 | 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, |
| 1499 | 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, |
| 1500 | 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, |
| 1501 | 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, |
| 1502 | 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, |
| 1503 | 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, |
| 1504 | 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, |
| 1505 | 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, |
| 1506 | 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, |
| 1507 | 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, |
| 1508 | 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, |
| 1509 | 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, |
| 1510 | 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, |
| 1511 | 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, |
| 1512 | 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, |
| 1513 | 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, |
| 1514 | 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, |
| 1515 | 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, |
| 1516 | 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, |
| 1517 | 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, |
| 1518 | 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, |
| 1519 | 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, |
| 1520 | 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, |
| 1521 | 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, |
| 1522 | 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, |
| 1523 | 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, |
| 1524 | 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, |
| 1525 | 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, |
| 1526 | 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, |
| 1527 | 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL |
| 1528 | }; |
| 1529 | |
| 1530 | static void curve25519_ever64_base(u8 *out, const u8 *priv) |
| 1531 | { |
| 1532 | u64 swap = 1; |
| 1533 | int i, j, k; |
| 1534 | u64 tmp[16 + 32 + 4]; |
| 1535 | u64 *x1 = &tmp[0]; |
| 1536 | u64 *z1 = &tmp[4]; |
| 1537 | u64 *x2 = &tmp[8]; |
| 1538 | u64 *z2 = &tmp[12]; |
| 1539 | u64 *xz1 = &tmp[0]; |
| 1540 | u64 *xz2 = &tmp[8]; |
| 1541 | u64 *a = &tmp[0 + 16]; |
| 1542 | u64 *b = &tmp[4 + 16]; |
| 1543 | u64 *c = &tmp[8 + 16]; |
| 1544 | u64 *ab = &tmp[0 + 16]; |
| 1545 | u64 *abcd = &tmp[0 + 16]; |
| 1546 | u64 *ef = &tmp[16 + 16]; |
| 1547 | u64 *efgh = &tmp[16 + 16]; |
| 1548 | u64 *key = &tmp[0 + 16 + 32]; |
| 1549 | |
| 1550 | memcpy(key, priv, 32); |
| 1551 | ((u8 *)key)[0] &= 248; |
| 1552 | ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; |
| 1553 | |
| 1554 | x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; |
| 1555 | z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; |
| 1556 | z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; |
| 1557 | memcpy(x2, p_minus_s, sizeof(p_minus_s)); |
| 1558 | |
| 1559 | j = 3; |
| 1560 | for (i = 0; i < 4; ++i) { |
| 1561 | while (j < (const int[]){ 64, 64, 64, 63 }[i]) { |
| 1562 | u64 bit = (key[i] >> j) & 1; |
| 1563 | k = (64 * i + j - 3); |
| 1564 | swap = swap ^ bit; |
| 1565 | cswap2(bit: swap, p1: xz1, p2: xz2); |
| 1566 | swap = bit; |
| 1567 | fsub(out: b, f1: x1, f2: z1); |
| 1568 | fadd(out: a, f1: x1, f2: z1); |
| 1569 | fmul(out: c, f1: &table_ladder[4 * k], f2: b, tmp: ef); |
| 1570 | fsub(out: b, f1: a, f2: c); |
| 1571 | fadd(out: a, f1: a, f2: c); |
| 1572 | fsqr2(out: ab, f: ab, tmp: efgh); |
| 1573 | fmul2(out: xz1, f1: xz2, f2: ab, tmp: efgh); |
| 1574 | ++j; |
| 1575 | } |
| 1576 | j = 0; |
| 1577 | } |
| 1578 | |
| 1579 | point_double(nq: xz1, tmp1: abcd, tmp2: efgh); |
| 1580 | point_double(nq: xz1, tmp1: abcd, tmp2: efgh); |
| 1581 | point_double(nq: xz1, tmp1: abcd, tmp2: efgh); |
| 1582 | encode_point(o: out, i: xz1); |
| 1583 | |
| 1584 | memzero_explicit(s: tmp, count: sizeof(tmp)); |
| 1585 | } |
| 1586 | |
| 1587 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); |
| 1588 | |
| 1589 | static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], |
| 1590 | const u8 secret[CURVE25519_KEY_SIZE], |
| 1591 | const u8 basepoint[CURVE25519_KEY_SIZE]) |
| 1592 | { |
| 1593 | if (static_branch_likely(&curve25519_use_bmi2_adx)) |
| 1594 | curve25519_ever64(out: mypublic, priv: secret, pub: basepoint); |
| 1595 | else |
| 1596 | curve25519_generic(out: mypublic, scalar: secret, point: basepoint); |
| 1597 | } |
| 1598 | |
| 1599 | static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], |
| 1600 | const u8 secret[CURVE25519_KEY_SIZE]) |
| 1601 | { |
| 1602 | if (static_branch_likely(&curve25519_use_bmi2_adx)) |
| 1603 | curve25519_ever64_base(out: pub, priv: secret); |
| 1604 | else |
| 1605 | curve25519_generic(out: pub, scalar: secret, point: curve25519_base_point); |
| 1606 | } |
| 1607 | |
| 1608 | #define curve25519_mod_init_arch curve25519_mod_init_arch |
| 1609 | static void curve25519_mod_init_arch(void) |
| 1610 | { |
| 1611 | if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) |
| 1612 | static_branch_enable(&curve25519_use_bmi2_adx); |
| 1613 | } |
| 1614 | |