ecc.c source code [linux/crypto/ecc.c]

1	/*
2	* Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
3	* Copyright (c) 2019 Vitaly Chikunov <vt@altlinux.org>
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions are
7	* met:
8	* * Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* * Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18	* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include <crypto/ecc_curve.h>
28	#include <linux/module.h>
29	#include <linux/random.h>
30	#include <linux/slab.h>
31	#include <linux/swab.h>
32	#include <linux/fips.h>
33	#include <crypto/ecdh.h>
34	#include <crypto/rng.h>
35	#include <crypto/internal/ecc.h>
36	#include <linux/unaligned.h>
37	#include <linux/ratelimit.h>
38
39	#include "ecc_curve_defs.h"
40
41	typedef struct {
42	u64 m_low;
43	u64 m_high;
44	} uint128_t;
45
46	/ Returns curv25519 curve param /
47	const struct ecc_curve ecc_get_curve25519(void*)
48	{
49	return &ecc_25519;
50	}
51	EXPORT_SYMBOL(ecc_get_curve25519);
52
53	const struct ecc_curve ecc_get_curve(unsigned* int curve_id)
54	{
55	switch (curve_id) {
56	/ In FIPS mode only allow P256 and higher /
57	case ECC_CURVE_NIST_P192:
58	return fips_enabled ? NULL : &nist_p192;
59	case ECC_CURVE_NIST_P256:
60	return &nist_p256;
61	case ECC_CURVE_NIST_P384:
62	return &nist_p384;
63	case ECC_CURVE_NIST_P521:
64	return &nist_p521;
65	default:
66	return NULL;
67	}
68	}
69	EXPORT_SYMBOL(ecc_get_curve);
70
71	void ecc_digits_from_bytes(const u8 in, unsigned* int nbytes,
72	u64 out, unsigned* int ndigits)
73	{
74	int diff = ndigits - DIV_ROUND_UP_POW2(nbytes, sizeof(u64));
75	unsigned int o = nbytes & `7`;
76	__be64 msd = `0`;
77
78	/ diff > 0: not enough input bytes: set most significant digits to 0 /
79	if (diff > `0`) {
80	ndigits -= diff;
81	memset(&out[ndigits], `0`, diff * sizeof(u64));
82	}
83
84	if (o) {
85	memcpy((u8 )&msd + sizeof*(msd) - o, in, o);
86	out[--ndigits] = be64_to_cpu(msd);
87	in += o;
88	}
89	ecc_swap_digits(in, out, ndigits);
90	}
91	EXPORT_SYMBOL(ecc_digits_from_bytes);
92
93	static u64 ecc_alloc_digits_space(unsigned* int ndigits)
94	{
95	size_t len = ndigits * sizeof(u64);
96
97	if (!len)
98	return NULL;
99
100	return kmalloc(len, GFP_KERNEL);
101	}
102
103	static void ecc_free_digits_space(u64 *space)
104	{
105	kfree_sensitive(objp: space);
106	}
107
108	struct ecc_point ecc_alloc_point(unsigned* int ndigits)
109	{
110	struct ecc_point p = kmalloc(sizeof(p), GFP_KERNEL);
111
112	if (!p)
113	return NULL;
114
115	p->x = ecc_alloc_digits_space(ndigits);
116	if (!p->x)
117	goto err_alloc_x;
118
119	p->y = ecc_alloc_digits_space(ndigits);
120	if (!p->y)
121	goto err_alloc_y;
122
123	p->ndigits = ndigits;
124
125	return p;
126
127	err_alloc_y:
128	ecc_free_digits_space(space: p->x);
129	err_alloc_x:
130	kfree(objp: p);
131	return NULL;
132	}
133	EXPORT_SYMBOL(ecc_alloc_point);
134
135	void ecc_free_point(struct ecc_point *p)
136	{
137	if (!p)
138	return;
139
140	kfree_sensitive(objp: p->x);
141	kfree_sensitive(objp: p->y);
142	kfree_sensitive(objp: p);
143	}
144	EXPORT_SYMBOL(ecc_free_point);
145
146	static void vli_clear(u64 vli, unsigned* int ndigits)
147	{
148	int i;
149
150	for (i = `0`; i < ndigits; i++)
151	vli[i] = `0`;
152	}
153
154	/ Returns true if vli == 0, false otherwise. /
155	bool vli_is_zero(const u64 vli, unsigned* int ndigits)
156	{
157	int i;
158
159	for (i = `0`; i < ndigits; i++) {
160	if (vli[i])
161	return false;
162	}
163
164	return true;
165	}
166	EXPORT_SYMBOL(vli_is_zero);
167
168	/ Returns nonzero if bit of vli is set. /
169	static u64 vli_test_bit(const u64 vli, unsigned* int bit)
170	{
171	return (vli[bit / `64`] & ((u64)`1` << (bit % `64`)));
172	}
173
174	static bool vli_is_negative(const u64 vli, unsigned* int ndigits)
175	{
176	return vli_test_bit(vli, bit: ndigits * `64` - `1`);
177	}
178
179	/ Counts the number of 64-bit "digits" in vli. /
180	static unsigned int vli_num_digits(const u64 vli, unsigned* int ndigits)
181	{
182	int i;
183
184	/ Search from the end until we find a non-zero digit.*
185	* We do it in reverse because we expect that most digits will
186	* be nonzero.
187	*/
188	for (i = ndigits - `1`; i >= `0` && vli[i] == `0`; i--);
189
190	return (i + `1`);
191	}
192
193	/ Counts the number of bits required for vli. /
194	unsigned int vli_num_bits(const u64 vli, unsigned* int ndigits)
195	{
196	unsigned int i, num_digits;
197	u64 digit;
198
199	num_digits = vli_num_digits(vli, ndigits);
200	if (num_digits == `0`)
201	return `0`;
202
203	digit = vli[num_digits - `1`];
204	for (i = `0`; digit; i++)
205	digit >>= `1`;
206
207	return ((num_digits - `1`) * `64` + i);
208	}
209	EXPORT_SYMBOL(vli_num_bits);
210
211	/ Set dest from unaligned bit string src. /
212	void vli_from_be64(u64 dest, const* void src, unsigned* int ndigits)
213	{
214	int i;
215	const u64 *from = src;
216
217	for (i = `0`; i < ndigits; i++)
218	dest[i] = get_unaligned_be64(p: &from[ndigits - `1` - i]);
219	}
220	EXPORT_SYMBOL(vli_from_be64);
221
222	void vli_from_le64(u64 dest, const* void src, unsigned* int ndigits)
223	{
224	int i;
225	const u64 *from = src;
226
227	for (i = `0`; i < ndigits; i++)
228	dest[i] = get_unaligned_le64(p: &from[i]);
229	}
230	EXPORT_SYMBOL(vli_from_le64);
231
232	/ Sets dest = src. /
233	static void vli_set(u64 dest, const* u64 src, unsigned* int ndigits)
234	{
235	int i;
236
237	for (i = `0`; i < ndigits; i++)
238	dest[i] = src[i];
239	}
240
241	/ Returns sign of left - right. /
242	int vli_cmp(const u64 left, const* u64 right, unsigned* int ndigits)
243	{
244	int i;
245
246	for (i = ndigits - `1`; i >= `0`; i--) {
247	if (left[i] > right[i])
248	return `1`;
249	else if (left[i] < right[i])
250	return -`1`;
251	}
252
253	return `0`;
254	}
255	EXPORT_SYMBOL(vli_cmp);
256
257	/ Computes result = in << c, returning carry. Can modify in place*
258	* (if result == in). 0 < shift < 64.
259	*/
260	static u64 vli_lshift(u64 result, const* u64 in, unsigned* int shift,
261	unsigned int ndigits)
262	{
263	u64 carry = `0`;
264	int i;
265
266	for (i = `0`; i < ndigits; i++) {
267	u64 temp = in[i];
268
269	result[i] = (temp << shift) \| carry;
270	carry = temp >> (`64` - shift);
271	}
272
273	return carry;
274	}
275
276	/ Computes vli = vli >> 1. /
277	static void vli_rshift1(u64 vli, unsigned* int ndigits)
278	{
279	u64 *end = vli;
280	u64 carry = `0`;
281
282	vli += ndigits;
283
284	while (vli-- > end) {
285	u64 temp = *vli;
286	*vli = (temp >> `1`) \| carry;
287	carry = temp << `63`;
288	}
289	}
290
291	/ Computes result = left + right, returning carry. Can modify in place. /
292	static u64 vli_add(u64 result, const* u64 left, const* u64 *right,
293	unsigned int ndigits)
294	{
295	u64 carry = `0`;
296	int i;
297
298	for (i = `0`; i < ndigits; i++) {
299	u64 sum;
300
301	sum = left[i] + right[i] + carry;
302	if (sum != left[i])
303	carry = (sum < left[i]);
304
305	result[i] = sum;
306	}
307
308	return carry;
309	}
310
311	/ Computes result = left + right, returning carry. Can modify in place. /
312	static u64 vli_uadd(u64 result, const* u64 *left, u64 right,
313	unsigned int ndigits)
314	{
315	u64 carry = right;
316	int i;
317
318	for (i = `0`; i < ndigits; i++) {
319	u64 sum;
320
321	sum = left[i] + carry;
322	if (sum != left[i])
323	carry = (sum < left[i]);
324	else
325	carry = !!carry;
326
327	result[i] = sum;
328	}
329
330	return carry;
331	}
332
333	/ Computes result = left - right, returning borrow. Can modify in place. /
334	u64 vli_sub(u64 result, const* u64 left, const* u64 *right,
335	unsigned int ndigits)
336	{
337	u64 borrow = `0`;
338	int i;
339
340	for (i = `0`; i < ndigits; i++) {
341	u64 diff;
342
343	diff = left[i] - right[i] - borrow;
344	if (diff != left[i])
345	borrow = (diff > left[i]);
346
347	result[i] = diff;
348	}
349
350	return borrow;
351	}
352	EXPORT_SYMBOL(vli_sub);
353
354	/ Computes result = left - right, returning borrow. Can modify in place. /
355	static u64 vli_usub(u64 result, const* u64 *left, u64 right,
356	unsigned int ndigits)
357	{
358	u64 borrow = right;
359	int i;
360
361	for (i = `0`; i < ndigits; i++) {
362	u64 diff;
363
364	diff = left[i] - borrow;
365	if (diff != left[i])
366	borrow = (diff > left[i]);
367
368	result[i] = diff;
369	}
370
371	return borrow;
372	}
373
374	static uint128_t mul_64_64(u64 left, u64 right)
375	{
376	uint128_t result;
377	#if defined(CONFIG_ARCH_SUPPORTS_INT128)
378	unsigned __int128 m = (unsigned __int128)left * right;
379
380	result.m_low = m;
381	result.m_high = m >> `64`;
382	#else
383	u64 a0 = left & `0xffffffffull`;
384	u64 a1 = left >> `32`;
385	u64 b0 = right & `0xffffffffull`;
386	u64 b1 = right >> `32`;
387	u64 m0 = a0 * b0;
388	u64 m1 = a0 * b1;
389	u64 m2 = a1 * b0;
390	u64 m3 = a1 * b1;
391
392	m2 += (m0 >> `32`);
393	m2 += m1;
394
395	/ Overflow /
396	if (m2 < m1)
397	m3 += `0x100000000ull`;
398
399	result.m_low = (m0 & `0xffffffffull`) \| (m2 << `32`);
400	result.m_high = m3 + (m2 >> `32`);
401	#endif
402	return result;
403	}
404
405	static uint128_t add_128_128(uint128_t a, uint128_t b)
406	{
407	uint128_t result;
408
409	result.m_low = a.m_low + b.m_low;
410	result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
411
412	return result;
413	}
414
415	static void vli_mult(u64 result, const* u64 left, const* u64 *right,
416	unsigned int ndigits)
417	{
418	uint128_t r01 = { `0`, `0` };
419	u64 r2 = `0`;
420	unsigned int i, k;
421
422	/ Compute each digit of result in sequence, maintaining the*
423	* carries.
424	*/
425	for (k = `0`; k < ndigits * `2` - `1`; k++) {
426	unsigned int min;
427
428	if (k < ndigits)
429	min = `0`;
430	else
431	min = (k + `1`) - ndigits;
432
433	for (i = min; i <= k && i < ndigits; i++) {
434	uint128_t product;
435
436	product = mul_64_64(left: left[i], right: right[k - i]);
437
438	r01 = add_128_128(a: r01, b: product);
439	r2 += (r01.m_high < product.m_high);
440	}
441
442	result[k] = r01.m_low;
443	r01.m_low = r01.m_high;
444	r01.m_high = r2;
445	r2 = `0`;
446	}
447
448	result[ndigits * `2` - `1`] = r01.m_low;
449	}
450
451	/ Compute product = left * right, for a small right value. /
452	static void vli_umult(u64 result, const* u64 *left, u32 right,
453	unsigned int ndigits)
454	{
455	uint128_t r01 = { `0` };
456	unsigned int k;
457
458	for (k = `0`; k < ndigits; k++) {
459	uint128_t product;
460
461	product = mul_64_64(left: left[k], right);
462	r01 = add_128_128(a: r01, b: product);
463	/ no carry /
464	result[k] = r01.m_low;
465	r01.m_low = r01.m_high;
466	r01.m_high = `0`;
467	}
468	result[k] = r01.m_low;
469	for (++k; k < ndigits * `2`; k++)
470	result[k] = `0`;
471	}
472
473	static void vli_square(u64 result, const* u64 left, unsigned* int ndigits)
474	{
475	uint128_t r01 = { `0`, `0` };
476	u64 r2 = `0`;
477	int i, k;
478
479	for (k = `0`; k < ndigits * `2` - `1`; k++) {
480	unsigned int min;
481
482	if (k < ndigits)
483	min = `0`;
484	else
485	min = (k + `1`) - ndigits;
486
487	for (i = min; i <= k && i <= k - i; i++) {
488	uint128_t product;
489
490	product = mul_64_64(left: left[i], right: left[k - i]);
491
492	if (i < k - i) {
493	r2 += product.m_high >> `63`;
494	product.m_high = (product.m_high << `1`) \|
495	(product.m_low >> `63`);
496	product.m_low <<= `1`;
497	}
498
499	r01 = add_128_128(a: r01, b: product);
500	r2 += (r01.m_high < product.m_high);
501	}
502
503	result[k] = r01.m_low;
504	r01.m_low = r01.m_high;
505	r01.m_high = r2;
506	r2 = `0`;
507	}
508
509	result[ndigits * `2` - `1`] = r01.m_low;
510	}
511
512	/ Computes result = (left + right) % mod.*
513	* Assumes that left < mod and right < mod, result != mod.
514	*/
515	static void vli_mod_add(u64 result, const* u64 left, const* u64 *right,
516	const u64 mod, unsigned* int ndigits)
517	{
518	u64 carry;
519
520	carry = vli_add(result, left, right, ndigits);
521
522	/ result > mod (result = mod + remainder), so subtract mod to*
523	* get remainder.
524	*/
525	if (carry \|\| vli_cmp(result, mod, ndigits) >= `0`)
526	vli_sub(result, result, mod, ndigits);
527	}
528
529	/ Computes result = (left - right) % mod.*
530	* Assumes that left < mod and right < mod, result != mod.
531	*/
532	static void vli_mod_sub(u64 result, const* u64 left, const* u64 *right,
533	const u64 mod, unsigned* int ndigits)
534	{
535	u64 borrow = vli_sub(result, left, right, ndigits);
536
537	/ In this case, p_result == -diff == (max int) - diff.*
538	* Since -x % d == d - x, we can get the correct result from
539	* result + mod (with overflow).
540	*/
541	if (borrow)
542	vli_add(result, left: result, right: mod, ndigits);
543	}
544
545	/*
546	* Computes result = product % mod
547	* for special form moduli: p = 2^k-c, for small c (note the minus sign)
548	*
549	* References:
550	* R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
551	* 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
552	* Algorithm 9.2.13 (Fast mod operation for special-form moduli).
553	*/
554	static void vli_mmod_special(u64 result, const* u64 *product,
555	const u64 mod, unsigned* int ndigits)
556	{
557	u64 c = -mod[`0`];
558	u64 t[ECC_MAX_DIGITS * `2`];
559	u64 r[ECC_MAX_DIGITS * `2`];
560
561	vli_set(dest: r, src: product, ndigits: ndigits * `2`);
562	while (!vli_is_zero(r + ndigits, ndigits)) {
563	vli_umult(result: t, left: r + ndigits, right: c, ndigits);
564	vli_clear(vli: r + ndigits, ndigits);
565	vli_add(result: r, left: r, right: t, ndigits: ndigits * `2`);
566	}
567	vli_set(dest: t, src: mod, ndigits);
568	vli_clear(vli: t + ndigits, ndigits);
569	while (vli_cmp(r, t, ndigits * `2`) >= `0`)
570	vli_sub(r, r, t, ndigits * `2`);
571	vli_set(dest: result, src: r, ndigits);
572	}
573
574	/*
575	* Computes result = product % mod
576	* for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
577	* where k-1 does not fit into qword boundary by -1 bit (such as 255).
578
579	* References (loosely based on):
580	* A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
581	* 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
582	* URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
583	*
584	* H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
585	* Handbook of Elliptic and Hyperelliptic Curve Cryptography.
586	* Algorithm 10.25 Fast reduction for special form moduli
587	*/
588	static void vli_mmod_special2(u64 result, const* u64 *product,
589	const u64 mod, unsigned* int ndigits)
590	{
591	u64 c2 = mod[`0`] * `2`;
592	u64 q[ECC_MAX_DIGITS];
593	u64 r[ECC_MAX_DIGITS * `2`];
594	u64 m[ECC_MAX_DIGITS * `2`]; / expanded mod /
595	int carry; / last bit that doesn't fit into q /
596	int i;
597
598	vli_set(dest: m, src: mod, ndigits);
599	vli_clear(vli: m + ndigits, ndigits);
600
601	vli_set(dest: r, src: product, ndigits);
602	/ q and carry are top bits /
603	vli_set(dest: q, src: product + ndigits, ndigits);
604	vli_clear(vli: r + ndigits, ndigits);
605	carry = vli_is_negative(vli: r, ndigits);
606	if (carry)
607	r[ndigits - `1`] &= (`1ull` << `63`) - `1`;
608	for (i = `1`; carry \|\| !vli_is_zero(q, ndigits); i++) {
609	u64 qc[ECC_MAX_DIGITS * `2`];
610
611	vli_umult(result: qc, left: q, right: c2, ndigits);
612	if (carry)
613	vli_uadd(result: qc, left: qc, right: mod[`0`], ndigits: ndigits * `2`);
614	vli_set(dest: q, src: qc + ndigits, ndigits);
615	vli_clear(vli: qc + ndigits, ndigits);
616	carry = vli_is_negative(vli: qc, ndigits);
617	if (carry)
618	qc[ndigits - `1`] &= (`1ull` << `63`) - `1`;
619	if (i & `1`)
620	vli_sub(r, r, qc, ndigits * `2`);
621	else
622	vli_add(result: r, left: r, right: qc, ndigits: ndigits * `2`);
623	}
624	while (vli_is_negative(vli: r, ndigits: ndigits * `2`))
625	vli_add(result: r, left: r, right: m, ndigits: ndigits * `2`);
626	while (vli_cmp(r, m, ndigits * `2`) >= `0`)
627	vli_sub(r, r, m, ndigits * `2`);
628
629	vli_set(dest: result, src: r, ndigits);
630	}
631
632	/*
633	* Computes result = product % mod, where product is 2N words long.
634	* Reference: Ken MacKay's micro-ecc.
635	* Currently only designed to work for curve_p or curve_n.
636	*/
637	static void vli_mmod_slow(u64 result, u64 product, const u64 *mod,
638	unsigned int ndigits)
639	{
640	u64 mod_m[`2` * ECC_MAX_DIGITS];
641	u64 tmp[`2` * ECC_MAX_DIGITS];
642	u64 *v[`2`] = { tmp, product };
643	u64 carry = `0`;
644	unsigned int i;
645	/ Shift mod so its highest set bit is at the maximum position. /
646	int shift = (ndigits * `2` * `64`) - vli_num_bits(mod, ndigits);
647	int word_shift = shift / `64`;
648	int bit_shift = shift % `64`;
649
650	vli_clear(vli: mod_m, ndigits: word_shift);
651	if (bit_shift > `0`) {
652	for (i = `0`; i < ndigits; ++i) {
653	mod_m[word_shift + i] = (mod[i] << bit_shift) \| carry;
654	carry = mod[i] >> (`64` - bit_shift);
655	}
656	} else
657	vli_set(dest: mod_m + word_shift, src: mod, ndigits);
658
659	for (i = `1`; shift >= `0`; --shift) {
660	u64 borrow = `0`;
661	unsigned int j;
662
663	for (j = `0`; j < ndigits * `2`; ++j) {
664	u64 diff = v[i][j] - mod_m[j] - borrow;
665
666	if (diff != v[i][j])
667	borrow = (diff > v[i][j]);
668	v[`1` - i][j] = diff;
669	}
670	i = !(i ^ borrow); / Swap the index if there was no borrow /
671	vli_rshift1(vli: mod_m, ndigits);
672	mod_m[ndigits - `1`] \|= mod_m[ndigits] << (`64` - `1`);
673	vli_rshift1(vli: mod_m + ndigits, ndigits);
674	}
675	vli_set(dest: result, src: v[i], ndigits);
676	}
677
678	/ Computes result = product % mod using Barrett's reduction with precomputed*
679	* value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
680	* length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
681	* boundary.
682	*
683	* Reference:
684	* R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
685	* 2.4.1 Barrett's algorithm. Algorithm 2.5.
686	*/
687	static void vli_mmod_barrett(u64 result, u64 product, const u64 *mod,
688	unsigned int ndigits)
689	{
690	u64 q[ECC_MAX_DIGITS * `2`];
691	u64 r[ECC_MAX_DIGITS * `2`];
692	const u64 *mu = mod + ndigits;
693
694	vli_mult(result: q, left: product + ndigits, right: mu, ndigits);
695	if (mu[ndigits])
696	vli_add(result: q + ndigits, left: q + ndigits, right: product + ndigits, ndigits);
697	vli_mult(result: r, left: mod, right: q + ndigits, ndigits);
698	vli_sub(r, product, r, ndigits * `2`);
699	while (!vli_is_zero(r + ndigits, ndigits) \|\|
700	vli_cmp(r, mod, ndigits) != -`1`) {
701	u64 carry;
702
703	carry = vli_sub(r, r, mod, ndigits);
704	vli_usub(result: r + ndigits, left: r + ndigits, right: carry, ndigits);
705	}
706	vli_set(dest: result, src: r, ndigits);
707	}
708
709	/ Computes p_result = p_product % curve_p.*
710	* See algorithm 5 and 6 from
711	* http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
712	*/
713	static void vli_mmod_fast_192(u64 result, const* u64 *product,
714	const u64 curve_prime, u64 tmp)
715	{
716	const unsigned int ndigits = ECC_CURVE_NIST_P192_DIGITS;
717	int carry;
718
719	vli_set(dest: result, src: product, ndigits);
720
721	vli_set(dest: tmp, src: &product[`3`], ndigits);
722	carry = vli_add(result, left: result, right: tmp, ndigits);
723
724	tmp[`0`] = `0`;
725	tmp[`1`] = product[`3`];
726	tmp[`2`] = product[`4`];
727	carry += vli_add(result, left: result, right: tmp, ndigits);
728
729	tmp[`0`] = tmp[`1`] = product[`5`];
730	tmp[`2`] = `0`;
731	carry += vli_add(result, left: result, right: tmp, ndigits);
732
733	while (carry \|\| vli_cmp(curve_prime, result, ndigits) != `1`)
734	carry -= vli_sub(result, result, curve_prime, ndigits);
735	}
736
737	/ Computes result = product % curve_prime*
738	* from http://www.nsa.gov/ia/_files/nist-routines.pdf
739	*/
740	static void vli_mmod_fast_256(u64 result, const* u64 *product,
741	const u64 curve_prime, u64 tmp)
742	{
743	int carry;
744	const unsigned int ndigits = ECC_CURVE_NIST_P256_DIGITS;
745
746	/ t /
747	vli_set(dest: result, src: product, ndigits);
748
749	/ s1 /
750	tmp[`0`] = `0`;
751	tmp[`1`] = product[`5`] & `0xffffffff00000000ull`;
752	tmp[`2`] = product[`6`];
753	tmp[`3`] = product[`7`];
754	carry = vli_lshift(result: tmp, in: tmp, shift: `1`, ndigits);
755	carry += vli_add(result, left: result, right: tmp, ndigits);
756
757	/ s2 /
758	tmp[`1`] = product[`6`] << `32`;
759	tmp[`2`] = (product[`6`] >> `32`) \| (product[`7`] << `32`);
760	tmp[`3`] = product[`7`] >> `32`;
761	carry += vli_lshift(result: tmp, in: tmp, shift: `1`, ndigits);
762	carry += vli_add(result, left: result, right: tmp, ndigits);
763
764	/ s3 /
765	tmp[`0`] = product[`4`];
766	tmp[`1`] = product[`5`] & `0xffffffff`;
767	tmp[`2`] = `0`;
768	tmp[`3`] = product[`7`];
769	carry += vli_add(result, left: result, right: tmp, ndigits);
770
771	/ s4 /
772	tmp[`0`] = (product[`4`] >> `32`) \| (product[`5`] << `32`);
773	tmp[`1`] = (product[`5`] >> `32`) \| (product[`6`] & `0xffffffff00000000ull`);
774	tmp[`2`] = product[`7`];
775	tmp[`3`] = (product[`6`] >> `32`) \| (product[`4`] << `32`);
776	carry += vli_add(result, left: result, right: tmp, ndigits);
777
778	/ d1 /
779	tmp[`0`] = (product[`5`] >> `32`) \| (product[`6`] << `32`);
780	tmp[`1`] = (product[`6`] >> `32`);
781	tmp[`2`] = `0`;
782	tmp[`3`] = (product[`4`] & `0xffffffff`) \| (product[`5`] << `32`);
783	carry -= vli_sub(result, result, tmp, ndigits);
784
785	/ d2 /
786	tmp[`0`] = product[`6`];
787	tmp[`1`] = product[`7`];
788	tmp[`2`] = `0`;
789	tmp[`3`] = (product[`4`] >> `32`) \| (product[`5`] & `0xffffffff00000000ull`);
790	carry -= vli_sub(result, result, tmp, ndigits);
791
792	/ d3 /
793	tmp[`0`] = (product[`6`] >> `32`) \| (product[`7`] << `32`);
794	tmp[`1`] = (product[`7`] >> `32`) \| (product[`4`] << `32`);
795	tmp[`2`] = (product[`4`] >> `32`) \| (product[`5`] << `32`);
796	tmp[`3`] = (product[`6`] << `32`);
797	carry -= vli_sub(result, result, tmp, ndigits);
798
799	/ d4 /
800	tmp[`0`] = product[`7`];
801	tmp[`1`] = product[`4`] & `0xffffffff00000000ull`;
802	tmp[`2`] = product[`5`];
803	tmp[`3`] = product[`6`] & `0xffffffff00000000ull`;
804	carry -= vli_sub(result, result, tmp, ndigits);
805
806	if (carry < `0`) {
807	do {
808	carry += vli_add(result, left: result, right: curve_prime, ndigits);
809	} while (carry < `0`);
810	} else {
811	while (carry \|\| vli_cmp(curve_prime, result, ndigits) != `1`)
812	carry -= vli_sub(result, result, curve_prime, ndigits);
813	}
814	}
815
816	#define SL32OR32(x32, y32) (((u64)x32 << 32) \| y32)
817	#define AND64H(x64) (x64 & 0xffFFffFF00000000ull)
818	#define AND64L(x64) (x64 & 0x00000000ffFFffFFull)
819
820	/ Computes result = product % curve_prime*
821	* from "Mathematical routines for the NIST prime elliptic curves"
822	*/
823	static void vli_mmod_fast_384(u64 result, const* u64 *product,
824	const u64 curve_prime, u64 tmp)
825	{
826	int carry;
827	const unsigned int ndigits = ECC_CURVE_NIST_P384_DIGITS;
828
829	/ t /
830	vli_set(dest: result, src: product, ndigits);
831
832	/ s1 /
833	tmp[`0`] = `0`; // 0 \|\| 0
834	tmp[`1`] = `0`; // 0 \|\| 0
835	tmp[`2`] = SL32OR32(product[`11`], (product[`10`]>>`32`)); //a22\|\|a21
836	tmp[`3`] = product[`11`]>>`32`; // 0 \|\|a23
837	tmp[`4`] = `0`; // 0 \|\| 0
838	tmp[`5`] = `0`; // 0 \|\| 0
839	carry = vli_lshift(result: tmp, in: tmp, shift: `1`, ndigits);
840	carry += vli_add(result, left: result, right: tmp, ndigits);
841
842	/ s2 /
843	tmp[`0`] = product[`6`]; //a13\|\|a12
844	tmp[`1`] = product[`7`]; //a15\|\|a14
845	tmp[`2`] = product[`8`]; //a17\|\|a16
846	tmp[`3`] = product[`9`]; //a19\|\|a18
847	tmp[`4`] = product[`10`]; //a21\|\|a20
848	tmp[`5`] = product[`11`]; //a23\|\|a22
849	carry += vli_add(result, left: result, right: tmp, ndigits);
850
851	/ s3 /
852	tmp[`0`] = SL32OR32(product[`11`], (product[`10`]>>`32`)); //a22\|\|a21
853	tmp[`1`] = SL32OR32(product[`6`], (product[`11`]>>`32`)); //a12\|\|a23
854	tmp[`2`] = SL32OR32(product[`7`], (product[`6`])>>`32`); //a14\|\|a13
855	tmp[`3`] = SL32OR32(product[`8`], (product[`7`]>>`32`)); //a16\|\|a15
856	tmp[`4`] = SL32OR32(product[`9`], (product[`8`]>>`32`)); //a18\|\|a17
857	tmp[`5`] = SL32OR32(product[`10`], (product[`9`]>>`32`)); //a20\|\|a19
858	carry += vli_add(result, left: result, right: tmp, ndigits);
859
860	/ s4 /
861	tmp[`0`] = AND64H(product[`11`]); //a23\|\| 0
862	tmp[`1`] = (product[`10`]<<`32`); //a20\|\| 0
863	tmp[`2`] = product[`6`]; //a13\|\|a12
864	tmp[`3`] = product[`7`]; //a15\|\|a14
865	tmp[`4`] = product[`8`]; //a17\|\|a16
866	tmp[`5`] = product[`9`]; //a19\|\|a18
867	carry += vli_add(result, left: result, right: tmp, ndigits);
868
869	/ s5 /
870	tmp[`0`] = `0`; // 0\|\| 0
871	tmp[`1`] = `0`; // 0\|\| 0
872	tmp[`2`] = product[`10`]; //a21\|\|a20
873	tmp[`3`] = product[`11`]; //a23\|\|a22
874	tmp[`4`] = `0`; // 0\|\| 0
875	tmp[`5`] = `0`; // 0\|\| 0
876	carry += vli_add(result, left: result, right: tmp, ndigits);
877
878	/ s6 /
879	tmp[`0`] = AND64L(product[`10`]); // 0 \|\|a20
880	tmp[`1`] = AND64H(product[`10`]); //a21\|\| 0
881	tmp[`2`] = product[`11`]; //a23\|\|a22
882	tmp[`3`] = `0`; // 0 \|\| 0
883	tmp[`4`] = `0`; // 0 \|\| 0
884	tmp[`5`] = `0`; // 0 \|\| 0
885	carry += vli_add(result, left: result, right: tmp, ndigits);
886
887	/ d1 /
888	tmp[`0`] = SL32OR32(product[`6`], (product[`11`]>>`32`)); //a12\|\|a23
889	tmp[`1`] = SL32OR32(product[`7`], (product[`6`]>>`32`)); //a14\|\|a13
890	tmp[`2`] = SL32OR32(product[`8`], (product[`7`]>>`32`)); //a16\|\|a15
891	tmp[`3`] = SL32OR32(product[`9`], (product[`8`]>>`32`)); //a18\|\|a17
892	tmp[`4`] = SL32OR32(product[`10`], (product[`9`]>>`32`)); //a20\|\|a19
893	tmp[`5`] = SL32OR32(product[`11`], (product[`10`]>>`32`)); //a22\|\|a21
894	carry -= vli_sub(result, result, tmp, ndigits);
895
896	/ d2 /
897	tmp[`0`] = (product[`10`]<<`32`); //a20\|\| 0
898	tmp[`1`] = SL32OR32(product[`11`], (product[`10`]>>`32`)); //a22\|\|a21
899	tmp[`2`] = (product[`11`]>>`32`); // 0 \|\|a23
900	tmp[`3`] = `0`; // 0 \|\| 0
901	tmp[`4`] = `0`; // 0 \|\| 0
902	tmp[`5`] = `0`; // 0 \|\| 0
903	carry -= vli_sub(result, result, tmp, ndigits);
904
905	/ d3 /
906	tmp[`0`] = `0`; // 0 \|\| 0
907	tmp[`1`] = AND64H(product[`11`]); //a23\|\| 0
908	tmp[`2`] = product[`11`]>>`32`; // 0 \|\|a23
909	tmp[`3`] = `0`; // 0 \|\| 0
910	tmp[`4`] = `0`; // 0 \|\| 0
911	tmp[`5`] = `0`; // 0 \|\| 0
912	carry -= vli_sub(result, result, tmp, ndigits);
913
914	if (carry < `0`) {
915	do {
916	carry += vli_add(result, left: result, right: curve_prime, ndigits);
917	} while (carry < `0`);
918	} else {
919	while (carry \|\| vli_cmp(curve_prime, result, ndigits) != `1`)
920	carry -= vli_sub(result, result, curve_prime, ndigits);
921	}
922
923	}
924
925	#undef SL32OR32
926	#undef AND64H
927	#undef AND64L
928
929	/*
930	* Computes result = product % curve_prime
931	* from "Recommendations for Discrete Logarithm-Based Cryptography:
932	* Elliptic Curve Domain Parameters" section G.1.4
933	*/
934	static void vli_mmod_fast_521(u64 result, const* u64 *product,
935	const u64 curve_prime, u64 tmp)
936	{
937	const unsigned int ndigits = ECC_CURVE_NIST_P521_DIGITS;
938	size_t i;
939
940	/ Initialize result with lowest 521 bits from product /
941	vli_set(dest: result, src: product, ndigits);
942	result[`8`] &= `0x1ff`;
943
944	for (i = `0`; i < ndigits; i++)
945	tmp[i] = (product[`8` + i] >> `9`) \| (product[`9` + i] << `55`);
946	tmp[`8`] &= `0x1ff`;
947
948	vli_mod_add(result, left: result, right: tmp, mod: curve_prime, ndigits);
949	}
950
951	/ Computes result = product % curve_prime for different curve_primes.*
952	*
953	* Note that curve_primes are distinguished just by heuristic check and
954	* not by complete conformance check.
955	*/
956	static bool vli_mmod_fast(u64 result, u64 product,
957	const struct ecc_curve *curve)
958	{
959	u64 tmp[`2` * ECC_MAX_DIGITS];
960	const u64 *curve_prime = curve->p;
961	const unsigned int ndigits = curve->g.ndigits;
962
963	/ All NIST curves have name prefix 'nist_' /
964	if (strncmp(curve->name, "nist_", `5`) != `0`) {
965	/ Try to handle Pseudo-Marsenne primes. /
966	if (curve_prime[ndigits - `1`] == -`1ull`) {
967	vli_mmod_special(result, product, mod: curve_prime,
968	ndigits);
969	return true;
970	} else if (curve_prime[ndigits - `1`] == `1ull` << `63` &&
971	curve_prime[ndigits - `2`] == `0`) {
972	vli_mmod_special2(result, product, mod: curve_prime,
973	ndigits);
974	return true;
975	}
976	vli_mmod_barrett(result, product, mod: curve_prime, ndigits);
977	return true;
978	}
979
980	switch (ndigits) {
981	case ECC_CURVE_NIST_P192_DIGITS:
982	vli_mmod_fast_192(result, product, curve_prime, tmp);
983	break;
984	case ECC_CURVE_NIST_P256_DIGITS:
985	vli_mmod_fast_256(result, product, curve_prime, tmp);
986	break;
987	case ECC_CURVE_NIST_P384_DIGITS:
988	vli_mmod_fast_384(result, product, curve_prime, tmp);
989	break;
990	case ECC_CURVE_NIST_P521_DIGITS:
991	vli_mmod_fast_521(result, product, curve_prime, tmp);
992	break;
993	default:
994	pr_err_ratelimited("ecc: unsupported digits size!\n");
995	return false;
996	}
997
998	return true;
999	}
1000
1001	/ Computes result = (left * right) % mod.*
1002	* Assumes that mod is big enough curve order.
1003	*/
1004	void vli_mod_mult_slow(u64 result, const* u64 left, const* u64 *right,
1005	const u64 mod, unsigned* int ndigits)
1006	{
1007	u64 product[ECC_MAX_DIGITS * `2`];
1008
1009	vli_mult(result: product, left, right, ndigits);
1010	vli_mmod_slow(result, product, mod, ndigits);
1011	}
1012	EXPORT_SYMBOL(vli_mod_mult_slow);
1013
1014	/ Computes result = (left * right) % curve_prime. /
1015	static void vli_mod_mult_fast(u64 result, const* u64 left, const* u64 *right,
1016	const struct ecc_curve *curve)
1017	{
1018	u64 product[`2` * ECC_MAX_DIGITS];
1019
1020	vli_mult(result: product, left, right, ndigits: curve->g.ndigits);
1021	vli_mmod_fast(result, product, curve);
1022	}
1023
1024	/ Computes result = left^2 % curve_prime. /
1025	static void vli_mod_square_fast(u64 result, const* u64 *left,
1026	const struct ecc_curve *curve)
1027	{
1028	u64 product[`2` * ECC_MAX_DIGITS];
1029
1030	vli_square(result: product, left, ndigits: curve->g.ndigits);
1031	vli_mmod_fast(result, product, curve);
1032	}
1033
1034	#define EVEN(vli) (!(vli[0] & 1))
1035	/ Computes result = (1 / p_input) % mod. All VLIs are the same size.*
1036	* See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
1037	* https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
1038	*/
1039	void vli_mod_inv(u64 result, const* u64 input, const* u64 *mod,
1040	unsigned int ndigits)
1041	{
1042	u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
1043	u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
1044	u64 carry;
1045	int cmp_result;
1046
1047	if (vli_is_zero(input, ndigits)) {
1048	vli_clear(vli: result, ndigits);
1049	return;
1050	}
1051
1052	vli_set(dest: a, src: input, ndigits);
1053	vli_set(dest: b, src: mod, ndigits);
1054	vli_clear(vli: u, ndigits);
1055	u[`0`] = `1`;
1056	vli_clear(vli: v, ndigits);
1057
1058	while ((cmp_result = vli_cmp(a, b, ndigits)) != `0`) {
1059	carry = `0`;
1060
1061	if (EVEN(a)) {
1062	vli_rshift1(vli: a, ndigits);
1063
1064	if (!EVEN(u))
1065	carry = vli_add(result: u, left: u, right: mod, ndigits);
1066
1067	vli_rshift1(vli: u, ndigits);
1068	if (carry)
1069	u[ndigits - `1`] \|= `0x8000000000000000ull`;
1070	} else if (EVEN(b)) {
1071	vli_rshift1(vli: b, ndigits);
1072
1073	if (!EVEN(v))
1074	carry = vli_add(result: v, left: v, right: mod, ndigits);
1075
1076	vli_rshift1(vli: v, ndigits);
1077	if (carry)
1078	v[ndigits - `1`] \|= `0x8000000000000000ull`;
1079	} else if (cmp_result > `0`) {
1080	vli_sub(a, a, b, ndigits);
1081	vli_rshift1(vli: a, ndigits);
1082
1083	if (vli_cmp(u, v, ndigits) < `0`)
1084	vli_add(result: u, left: u, right: mod, ndigits);
1085
1086	vli_sub(u, u, v, ndigits);
1087	if (!EVEN(u))
1088	carry = vli_add(result: u, left: u, right: mod, ndigits);
1089
1090	vli_rshift1(vli: u, ndigits);
1091	if (carry)
1092	u[ndigits - `1`] \|= `0x8000000000000000ull`;
1093	} else {
1094	vli_sub(b, b, a, ndigits);
1095	vli_rshift1(vli: b, ndigits);
1096
1097	if (vli_cmp(v, u, ndigits) < `0`)
1098	vli_add(result: v, left: v, right: mod, ndigits);
1099
1100	vli_sub(v, v, u, ndigits);
1101	if (!EVEN(v))
1102	carry = vli_add(result: v, left: v, right: mod, ndigits);
1103
1104	vli_rshift1(vli: v, ndigits);
1105	if (carry)
1106	v[ndigits - `1`] \|= `0x8000000000000000ull`;
1107	}
1108	}
1109
1110	vli_set(dest: result, src: u, ndigits);
1111	}
1112	EXPORT_SYMBOL(vli_mod_inv);
1113
1114	/ ------ Point operations ------ /
1115
1116	/ Returns true if p_point is the point at infinity, false otherwise. /
1117	bool ecc_point_is_zero(const struct ecc_point *point)
1118	{
1119	return (vli_is_zero(point->x, point->ndigits) &&
1120	vli_is_zero(point->y, point->ndigits));
1121	}
1122	EXPORT_SYMBOL(ecc_point_is_zero);
1123
1124	/ Point multiplication algorithm using Montgomery's ladder with co-Z*
1125	* coordinates. From https://eprint.iacr.org/2011/338.pdf
1126	*/
1127
1128	/ Double in place /
1129	static void ecc_point_double_jacobian(u64 x1, u64 y1, u64 *z1,
1130	const struct ecc_curve *curve)
1131	{
1132	/ t1 = x, t2 = y, t3 = z /
1133	u64 t4[ECC_MAX_DIGITS];
1134	u64 t5[ECC_MAX_DIGITS];
1135	const u64 *curve_prime = curve->p;
1136	const unsigned int ndigits = curve->g.ndigits;
1137
1138	if (vli_is_zero(z1, ndigits))
1139	return;
1140
1141	/ t4 = y1^2 /
1142	vli_mod_square_fast(result: t4, left: y1, curve);
1143	/ t5 = x1y1^2 = A /*
1144	vli_mod_mult_fast(result: t5, left: x1, right: t4, curve);
1145	/ t4 = y1^4 /
1146	vli_mod_square_fast(result: t4, left: t4, curve);
1147	/ t2 = y1z1 = z3 /*
1148	vli_mod_mult_fast(result: y1, left: y1, right: z1, curve);
1149	/ t3 = z1^2 /
1150	vli_mod_square_fast(result: z1, left: z1, curve);
1151
1152	/ t1 = x1 + z1^2 /
1153	vli_mod_add(result: x1, left: x1, right: z1, mod: curve_prime, ndigits);
1154	/ t3 = 2z1^2 /*
1155	vli_mod_add(result: z1, left: z1, right: z1, mod: curve_prime, ndigits);
1156	/ t3 = x1 - z1^2 /
1157	vli_mod_sub(result: z1, left: x1, right: z1, mod: curve_prime, ndigits);
1158	/ t1 = x1^2 - z1^4 /
1159	vli_mod_mult_fast(result: x1, left: x1, right: z1, curve);
1160
1161	/ t3 = 2(x1^2 - z1^4) /*
1162	vli_mod_add(result: z1, left: x1, right: x1, mod: curve_prime, ndigits);
1163	/ t1 = 3(x1^2 - z1^4) /*
1164	vli_mod_add(result: x1, left: x1, right: z1, mod: curve_prime, ndigits);
1165	if (vli_test_bit(vli: x1, bit: `0`)) {
1166	u64 carry = vli_add(result: x1, left: x1, right: curve_prime, ndigits);
1167
1168	vli_rshift1(vli: x1, ndigits);
1169	x1[ndigits - `1`] \|= carry << `63`;
1170	} else {
1171	vli_rshift1(vli: x1, ndigits);
1172	}
1173	/ t1 = 3/2(x1^2 - z1^4) = B /*
1174
1175	/ t3 = B^2 /
1176	vli_mod_square_fast(result: z1, left: x1, curve);
1177	/ t3 = B^2 - A /
1178	vli_mod_sub(result: z1, left: z1, right: t5, mod: curve_prime, ndigits);
1179	/ t3 = B^2 - 2A = x3 /
1180	vli_mod_sub(result: z1, left: z1, right: t5, mod: curve_prime, ndigits);
1181	/ t5 = A - x3 /
1182	vli_mod_sub(result: t5, left: t5, right: z1, mod: curve_prime, ndigits);
1183	/ t1 = B * (A - x3) /
1184	vli_mod_mult_fast(result: x1, left: x1, right: t5, curve);
1185	/ t4 = B * (A - x3) - y1^4 = y3 /
1186	vli_mod_sub(result: t4, left: x1, right: t4, mod: curve_prime, ndigits);
1187
1188	vli_set(dest: x1, src: z1, ndigits);
1189	vli_set(dest: z1, src: y1, ndigits);
1190	vli_set(dest: y1, src: t4, ndigits);
1191	}
1192
1193	/ Modify (x1, y1) => (x1 * z^2, y1 * z^3) /
1194	static void apply_z(u64 x1, u64 y1, u64 z, const* struct ecc_curve *curve)
1195	{
1196	u64 t1[ECC_MAX_DIGITS];
1197
1198	vli_mod_square_fast(result: t1, left: z, curve); / z^2 /
1199	vli_mod_mult_fast(result: x1, left: x1, right: t1, curve); / x1 * z^2 /
1200	vli_mod_mult_fast(result: t1, left: t1, right: z, curve); / z^3 /
1201	vli_mod_mult_fast(result: y1, left: y1, right: t1, curve); / y1 * z^3 /
1202	}
1203
1204	/ P = (x1, y1) => 2P, (x2, y2) => P' /
1205	static void xycz_initial_double(u64 x1, u64 y1, u64 x2, u64 y2,
1206	u64 p_initial_z, const* struct ecc_curve *curve)
1207	{
1208	u64 z[ECC_MAX_DIGITS];
1209	const unsigned int ndigits = curve->g.ndigits;
1210
1211	vli_set(dest: x2, src: x1, ndigits);
1212	vli_set(dest: y2, src: y1, ndigits);
1213
1214	vli_clear(vli: z, ndigits);
1215	z[`0`] = `1`;
1216
1217	if (p_initial_z)
1218	vli_set(dest: z, src: p_initial_z, ndigits);
1219
1220	apply_z(x1, y1, z, curve);
1221
1222	ecc_point_double_jacobian(x1, y1, z1: z, curve);
1223
1224	apply_z(x1: x2, y1: y2, z, curve);
1225	}
1226
1227	/ Input P = (x1, y1, Z), Q = (x2, y2, Z)*
1228	* Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
1229	* or P => P', Q => P + Q
1230	*/
1231	static void xycz_add(u64 x1, u64 y1, u64 x2, u64 y2,
1232	const struct ecc_curve *curve)
1233	{
1234	/ t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 /
1235	u64 t5[ECC_MAX_DIGITS];
1236	const u64 *curve_prime = curve->p;
1237	const unsigned int ndigits = curve->g.ndigits;
1238
1239	/ t5 = x2 - x1 /
1240	vli_mod_sub(result: t5, left: x2, right: x1, mod: curve_prime, ndigits);
1241	/ t5 = (x2 - x1)^2 = A /
1242	vli_mod_square_fast(result: t5, left: t5, curve);
1243	/ t1 = x1A = B /*
1244	vli_mod_mult_fast(result: x1, left: x1, right: t5, curve);
1245	/ t3 = x2A = C /*
1246	vli_mod_mult_fast(result: x2, left: x2, right: t5, curve);
1247	/ t4 = y2 - y1 /
1248	vli_mod_sub(result: y2, left: y2, right: y1, mod: curve_prime, ndigits);
1249	/ t5 = (y2 - y1)^2 = D /
1250	vli_mod_square_fast(result: t5, left: y2, curve);
1251
1252	/ t5 = D - B /
1253	vli_mod_sub(result: t5, left: t5, right: x1, mod: curve_prime, ndigits);
1254	/ t5 = D - B - C = x3 /
1255	vli_mod_sub(result: t5, left: t5, right: x2, mod: curve_prime, ndigits);
1256	/ t3 = C - B /
1257	vli_mod_sub(result: x2, left: x2, right: x1, mod: curve_prime, ndigits);
1258	/ t2 = y1(C - B) /*
1259	vli_mod_mult_fast(result: y1, left: y1, right: x2, curve);
1260	/ t3 = B - x3 /
1261	vli_mod_sub(result: x2, left: x1, right: t5, mod: curve_prime, ndigits);
1262	/ t4 = (y2 - y1)(B - x3) /*
1263	vli_mod_mult_fast(result: y2, left: y2, right: x2, curve);
1264	/ t4 = y3 /
1265	vli_mod_sub(result: y2, left: y2, right: y1, mod: curve_prime, ndigits);
1266
1267	vli_set(dest: x2, src: t5, ndigits);
1268	}
1269
1270	/ Input P = (x1, y1, Z), Q = (x2, y2, Z)*
1271	* Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
1272	* or P => P - Q, Q => P + Q
1273	*/
1274	static void xycz_add_c(u64 x1, u64 y1, u64 x2, u64 y2,
1275	const struct ecc_curve *curve)
1276	{
1277	/ t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 /
1278	u64 t5[ECC_MAX_DIGITS];
1279	u64 t6[ECC_MAX_DIGITS];
1280	u64 t7[ECC_MAX_DIGITS];
1281	const u64 *curve_prime = curve->p;
1282	const unsigned int ndigits = curve->g.ndigits;
1283
1284	/ t5 = x2 - x1 /
1285	vli_mod_sub(result: t5, left: x2, right: x1, mod: curve_prime, ndigits);
1286	/ t5 = (x2 - x1)^2 = A /
1287	vli_mod_square_fast(result: t5, left: t5, curve);
1288	/ t1 = x1A = B /*
1289	vli_mod_mult_fast(result: x1, left: x1, right: t5, curve);
1290	/ t3 = x2A = C /*
1291	vli_mod_mult_fast(result: x2, left: x2, right: t5, curve);
1292	/ t4 = y2 + y1 /
1293	vli_mod_add(result: t5, left: y2, right: y1, mod: curve_prime, ndigits);
1294	/ t4 = y2 - y1 /
1295	vli_mod_sub(result: y2, left: y2, right: y1, mod: curve_prime, ndigits);
1296
1297	/ t6 = C - B /
1298	vli_mod_sub(result: t6, left: x2, right: x1, mod: curve_prime, ndigits);
1299	/ t2 = y1 * (C - B) /
1300	vli_mod_mult_fast(result: y1, left: y1, right: t6, curve);
1301	/ t6 = B + C /
1302	vli_mod_add(result: t6, left: x1, right: x2, mod: curve_prime, ndigits);
1303	/ t3 = (y2 - y1)^2 /
1304	vli_mod_square_fast(result: x2, left: y2, curve);
1305	/ t3 = x3 /
1306	vli_mod_sub(result: x2, left: x2, right: t6, mod: curve_prime, ndigits);
1307
1308	/ t7 = B - x3 /
1309	vli_mod_sub(result: t7, left: x1, right: x2, mod: curve_prime, ndigits);
1310	/ t4 = (y2 - y1)(B - x3) /*
1311	vli_mod_mult_fast(result: y2, left: y2, right: t7, curve);
1312	/ t4 = y3 /
1313	vli_mod_sub(result: y2, left: y2, right: y1, mod: curve_prime, ndigits);
1314
1315	/ t7 = (y2 + y1)^2 = F /
1316	vli_mod_square_fast(result: t7, left: t5, curve);
1317	/ t7 = x3' /
1318	vli_mod_sub(result: t7, left: t7, right: t6, mod: curve_prime, ndigits);
1319	/ t6 = x3' - B /
1320	vli_mod_sub(result: t6, left: t7, right: x1, mod: curve_prime, ndigits);
1321	/ t6 = (y2 + y1)(x3' - B) /*
1322	vli_mod_mult_fast(result: t6, left: t6, right: t5, curve);
1323	/ t2 = y3' /
1324	vli_mod_sub(result: y1, left: t6, right: y1, mod: curve_prime, ndigits);
1325
1326	vli_set(dest: x1, src: t7, ndigits);
1327	}
1328
1329	static void ecc_point_mult(struct ecc_point *result,
1330	const struct ecc_point point, const* u64 *scalar,
1331	u64 initial_z, const* struct ecc_curve *curve,
1332	unsigned int ndigits)
1333	{
1334	/ R0 and R1 /
1335	u64 rx[`2`][ECC_MAX_DIGITS];
1336	u64 ry[`2`][ECC_MAX_DIGITS];
1337	u64 z[ECC_MAX_DIGITS];
1338	u64 sk[`2`][ECC_MAX_DIGITS];
1339	u64 *curve_prime = curve->p;
1340	int i, nb;
1341	int num_bits;
1342	int carry;
1343
1344	carry = vli_add(result: sk[`0`], left: scalar, right: curve->n, ndigits);
1345	vli_add(result: sk[`1`], left: sk[`0`], right: curve->n, ndigits);
1346	scalar = sk[!carry];
1347	if (curve->nbits == `521`) / NIST P521 /
1348	num_bits = curve->nbits + `2`;
1349	else
1350	num_bits = sizeof(u64) * ndigits * `8` + `1`;
1351
1352	vli_set(dest: rx[`1`], src: point->x, ndigits);
1353	vli_set(dest: ry[`1`], src: point->y, ndigits);
1354
1355	xycz_initial_double(x1: rx[`1`], y1: ry[`1`], x2: rx[`0`], y2: ry[`0`], p_initial_z: initial_z, curve);
1356
1357	for (i = num_bits - `2`; i > `0`; i--) {
1358	nb = !vli_test_bit(vli: scalar, bit: i);
1359	xycz_add_c(x1: rx[`1` - nb], y1: ry[`1` - nb], x2: rx[nb], y2: ry[nb], curve);
1360	xycz_add(x1: rx[nb], y1: ry[nb], x2: rx[`1` - nb], y2: ry[`1` - nb], curve);
1361	}
1362
1363	nb = !vli_test_bit(vli: scalar, bit: `0`);
1364	xycz_add_c(x1: rx[`1` - nb], y1: ry[`1` - nb], x2: rx[nb], y2: ry[nb], curve);
1365
1366	/ Find final 1/Z value. /
1367	/ X1 - X0 /
1368	vli_mod_sub(result: z, left: rx[`1`], right: rx[`0`], mod: curve_prime, ndigits);
1369	/ Yb * (X1 - X0) /
1370	vli_mod_mult_fast(result: z, left: z, right: ry[`1` - nb], curve);
1371	/ xP * Yb * (X1 - X0) /
1372	vli_mod_mult_fast(result: z, left: z, right: point->x, curve);
1373
1374	/ 1 / (xP * Yb * (X1 - X0)) /
1375	vli_mod_inv(z, z, curve_prime, point->ndigits);
1376
1377	/ yP / (xP * Yb * (X1 - X0)) /
1378	vli_mod_mult_fast(result: z, left: z, right: point->y, curve);
1379	/ Xb * yP / (xP * Yb * (X1 - X0)) /
1380	vli_mod_mult_fast(result: z, left: z, right: rx[`1` - nb], curve);
1381	/ End 1/Z calculation /
1382
1383	xycz_add(x1: rx[nb], y1: ry[nb], x2: rx[`1` - nb], y2: ry[`1` - nb], curve);
1384
1385	apply_z(x1: rx[`0`], y1: ry[`0`], z, curve);
1386
1387	vli_set(dest: result->x, src: rx[`0`], ndigits);
1388	vli_set(dest: result->y, src: ry[`0`], ndigits);
1389	}
1390
1391	/ Computes R = P + Q mod p /
1392	static void ecc_point_add(const struct ecc_point *result,
1393	const struct ecc_point p, const* struct ecc_point *q,
1394	const struct ecc_curve *curve)
1395	{
1396	u64 z[ECC_MAX_DIGITS];
1397	u64 px[ECC_MAX_DIGITS];
1398	u64 py[ECC_MAX_DIGITS];
1399	unsigned int ndigits = curve->g.ndigits;
1400
1401	vli_set(dest: result->x, src: q->x, ndigits);
1402	vli_set(dest: result->y, src: q->y, ndigits);
1403	vli_mod_sub(result: z, left: result->x, right: p->x, mod: curve->p, ndigits);
1404	vli_set(dest: px, src: p->x, ndigits);
1405	vli_set(dest: py, src: p->y, ndigits);
1406	xycz_add(x1: px, y1: py, x2: result->x, y2: result->y, curve);
1407	vli_mod_inv(z, z, curve->p, ndigits);
1408	apply_z(x1: result->x, y1: result->y, z, curve);
1409	}
1410
1411	/ Computes R = u1P + u2Q mod p using Shamir's trick.*
1412	* Based on: Kenneth MacKay's micro-ecc (2014).
1413	*/
1414	void ecc_point_mult_shamir(const struct ecc_point *result,
1415	const u64 u1, const* struct ecc_point *p,
1416	const u64 u2, const* struct ecc_point *q,
1417	const struct ecc_curve *curve)
1418	{
1419	u64 z[ECC_MAX_DIGITS];
1420	u64 sump[`2`][ECC_MAX_DIGITS];
1421	u64 *rx = result->x;
1422	u64 *ry = result->y;
1423	unsigned int ndigits = curve->g.ndigits;
1424	unsigned int num_bits;
1425	struct ecc_point sum = ECC_POINT_INIT(sump[`0`], sump[`1`], ndigits);
1426	const struct ecc_point *points[`4`];
1427	const struct ecc_point *point;
1428	unsigned int idx;
1429	int i;
1430
1431	ecc_point_add(result: &sum, p, q, curve);
1432	points[`0`] = NULL;
1433	points[`1`] = p;
1434	points[`2`] = q;
1435	points[`3`] = ∑
1436
1437	num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
1438	i = num_bits - `1`;
1439	idx = !!vli_test_bit(vli: u1, bit: i);
1440	idx \|= (!!vli_test_bit(vli: u2, bit: i)) << `1`;
1441	point = points[idx];
1442
1443	vli_set(dest: rx, src: point->x, ndigits);
1444	vli_set(dest: ry, src: point->y, ndigits);
1445	vli_clear(vli: z + `1`, ndigits: ndigits - `1`);
1446	z[`0`] = `1`;
1447
1448	for (--i; i >= `0`; i--) {
1449	ecc_point_double_jacobian(x1: rx, y1: ry, z1: z, curve);
1450	idx = !!vli_test_bit(vli: u1, bit: i);
1451	idx \|= (!!vli_test_bit(vli: u2, bit: i)) << `1`;
1452	point = points[idx];
1453	if (point) {
1454	u64 tx[ECC_MAX_DIGITS];
1455	u64 ty[ECC_MAX_DIGITS];
1456	u64 tz[ECC_MAX_DIGITS];
1457
1458	vli_set(dest: tx, src: point->x, ndigits);
1459	vli_set(dest: ty, src: point->y, ndigits);
1460	apply_z(x1: tx, y1: ty, z, curve);
1461	vli_mod_sub(result: tz, left: rx, right: tx, mod: curve->p, ndigits);
1462	xycz_add(x1: tx, y1: ty, x2: rx, y2: ry, curve);
1463	vli_mod_mult_fast(result: z, left: z, right: tz, curve);
1464	}
1465	}
1466	vli_mod_inv(z, z, curve->p, ndigits);
1467	apply_z(x1: rx, y1: ry, z, curve);
1468	}
1469	EXPORT_SYMBOL(ecc_point_mult_shamir);
1470
1471	/*
1472	* This function performs checks equivalent to Appendix A.4.2 of FIPS 186-5.
1473	* Whereas A.4.2 results in an integer in the interval [1, n-1], this function
1474	* ensures that the integer is in the range of [2, n-3]. We are slightly
1475	* stricter because of the currently used scalar multiplication algorithm.
1476	*/
1477	static int __ecc_is_key_valid(const struct ecc_curve *curve,
1478	const u64 private_key, unsigned* int ndigits)
1479	{
1480	u64 one[ECC_MAX_DIGITS] = { `1`, };
1481	u64 res[ECC_MAX_DIGITS];
1482
1483	if (!private_key)
1484	return -EINVAL;
1485
1486	if (curve->g.ndigits != ndigits)
1487	return -EINVAL;
1488
1489	/ Make sure the private key is in the range [2, n-3]. /
1490	if (vli_cmp(one, private_key, ndigits) != -`1`)
1491	return -EINVAL;
1492	vli_sub(res, curve->n, one, ndigits);
1493	vli_sub(res, res, one, ndigits);
1494	if (vli_cmp(res, private_key, ndigits) != `1`)
1495	return -EINVAL;
1496
1497	return `0`;
1498	}
1499
1500	int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
1501	const u64 private_key, unsigned* int private_key_len)
1502	{
1503	int nbytes;
1504	const struct ecc_curve *curve = ecc_get_curve(curve_id);
1505
1506	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
1507
1508	if (private_key_len != nbytes)
1509	return -EINVAL;
1510
1511	return __ecc_is_key_valid(curve, private_key, ndigits);
1512	}
1513	EXPORT_SYMBOL(ecc_is_key_valid);
1514
1515	/*
1516	* ECC private keys are generated using the method of rejection sampling,
1517	* equivalent to that described in FIPS 186-5, Appendix A.2.2.
1518	*
1519	* This method generates a private key uniformly distributed in the range
1520	* [2, n-3].
1521	*/
1522	int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits,
1523	u64 *private_key)
1524	{
1525	const struct ecc_curve *curve = ecc_get_curve(curve_id);
1526	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
1527	unsigned int nbits = vli_num_bits(curve->n, ndigits);
1528	int err;
1529
1530	/*
1531	* Step 1 & 2: check that N is included in Table 1 of FIPS 186-5,
1532	* section 6.1.1.
1533	*/
1534	if (nbits < `224`)
1535	return -EINVAL;
1536
1537	/*
1538	* FIPS 186-5 recommends that the private key should be obtained from a
1539	* RBG with a security strength equal to or greater than the security
1540	* strength associated with N.
1541	*
1542	* The maximum security strength identified by NIST SP800-57pt1r4 for
1543	* ECC is 256 (N >= 512).
1544	*
1545	* This condition is met by the default RNG because it selects a favored
1546	* DRBG with a security strength of 256.
1547	*/
1548	if (crypto_get_default_rng())
1549	return -EFAULT;
1550
1551	/ Step 3: obtain N returned_bits from the DRBG. /
1552	err = crypto_rng_get_bytes(tfm: crypto_default_rng,
1553	rdata: (u8 *)private_key, dlen: nbytes);
1554	crypto_put_default_rng();
1555	if (err)
1556	return err;
1557
1558	/ Step 4: make sure the private key is in the valid range. /
1559	if (__ecc_is_key_valid(curve, private_key, ndigits))
1560	return -EINVAL;
1561
1562	return `0`;
1563	}
1564	EXPORT_SYMBOL(ecc_gen_privkey);
1565
1566	int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
1567	const u64 private_key, u64 public_key)
1568	{
1569	int ret = `0`;
1570	struct ecc_point *pk;
1571	const struct ecc_curve *curve = ecc_get_curve(curve_id);
1572
1573	if (!private_key) {
1574	ret = -EINVAL;
1575	goto out;
1576	}
1577
1578	pk = ecc_alloc_point(ndigits);
1579	if (!pk) {
1580	ret = -ENOMEM;
1581	goto out;
1582	}
1583
1584	ecc_point_mult(result: pk, point: &curve->g, scalar: private_key, NULL, curve, ndigits);
1585
1586	/ SP800-56A rev 3 5.6.2.1.3 key check /
1587	if (ecc_is_pubkey_valid_full(curve, pk)) {
1588	ret = -EAGAIN;
1589	goto err_free_point;
1590	}
1591
1592	ecc_swap_digits(in: pk->x, out: public_key, ndigits);
1593	ecc_swap_digits(in: pk->y, out: &public_key[ndigits], ndigits);
1594
1595	err_free_point:
1596	ecc_free_point(pk);
1597	out:
1598	return ret;
1599	}
1600	EXPORT_SYMBOL(ecc_make_pub_key);
1601
1602	/ SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only /
1603	int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
1604	struct ecc_point *pk)
1605	{
1606	u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
1607
1608	if (WARN_ON(pk->ndigits != curve->g.ndigits))
1609	return -EINVAL;
1610
1611	/ Check 1: Verify key is not the zero point. /
1612	if (ecc_point_is_zero(pk))
1613	return -EINVAL;
1614
1615	/ Check 2: Verify key is in the range [1, p-1]. /
1616	if (vli_cmp(curve->p, pk->x, pk->ndigits) != `1`)
1617	return -EINVAL;
1618	if (vli_cmp(curve->p, pk->y, pk->ndigits) != `1`)
1619	return -EINVAL;
1620
1621	/ Check 3: Verify that y^2 == (x^3 + a·x + b) mod p /
1622	vli_mod_square_fast(result: yy, left: pk->y, curve); / y^2 /
1623	vli_mod_square_fast(result: xxx, left: pk->x, curve); / x^2 /
1624	vli_mod_mult_fast(result: xxx, left: xxx, right: pk->x, curve); / x^3 /
1625	vli_mod_mult_fast(result: w, left: curve->a, right: pk->x, curve); / a·x /
1626	vli_mod_add(result: w, left: w, right: curve->b, mod: curve->p, ndigits: pk->ndigits); / a·x + b /
1627	vli_mod_add(result: w, left: w, right: xxx, mod: curve->p, ndigits: pk->ndigits); / x^3 + a·x + b /
1628	if (vli_cmp(yy, w, pk->ndigits) != `0`) / Equation /
1629	return -EINVAL;
1630
1631	return `0`;
1632	}
1633	EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
1634
1635	/ SP800-56A section 5.6.2.3.3 full verification /
1636	int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
1637	struct ecc_point *pk)
1638	{
1639	struct ecc_point *nQ;
1640
1641	/ Checks 1 through 3 /
1642	int ret = ecc_is_pubkey_valid_partial(curve, pk);
1643
1644	if (ret)
1645	return ret;
1646
1647	/ Check 4: Verify that nQ is the zero point. /
1648	nQ = ecc_alloc_point(pk->ndigits);
1649	if (!nQ)
1650	return -ENOMEM;
1651
1652	ecc_point_mult(result: nQ, point: pk, scalar: curve->n, NULL, curve, ndigits: pk->ndigits);
1653	if (!ecc_point_is_zero(nQ))
1654	ret = -EINVAL;
1655
1656	ecc_free_point(nQ);
1657
1658	return ret;
1659	}
1660	EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
1661
1662	int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
1663	const u64 private_key, const* u64 *public_key,
1664	u64 *secret)
1665	{
1666	int ret = `0`;
1667	struct ecc_point product, pk;
1668	u64 rand_z[ECC_MAX_DIGITS];
1669	unsigned int nbytes;
1670	const struct ecc_curve *curve = ecc_get_curve(curve_id);
1671
1672	if (!private_key \|\| !public_key \|\| ndigits > ARRAY_SIZE(rand_z)) {
1673	ret = -EINVAL;
1674	goto out;
1675	}
1676
1677	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
1678
1679	get_random_bytes(buf: rand_z, len: nbytes);
1680
1681	pk = ecc_alloc_point(ndigits);
1682	if (!pk) {
1683	ret = -ENOMEM;
1684	goto out;
1685	}
1686
1687	ecc_swap_digits(in: public_key, out: pk->x, ndigits);
1688	ecc_swap_digits(in: &public_key[ndigits], out: pk->y, ndigits);
1689	ret = ecc_is_pubkey_valid_partial(curve, pk);
1690	if (ret)
1691	goto err_alloc_product;
1692
1693	product = ecc_alloc_point(ndigits);
1694	if (!product) {
1695	ret = -ENOMEM;
1696	goto err_alloc_product;
1697	}
1698
1699	ecc_point_mult(result: product, point: pk, scalar: private_key, initial_z: rand_z, curve, ndigits);
1700
1701	if (ecc_point_is_zero(product)) {
1702	ret = -EFAULT;
1703	goto err_validity;
1704	}
1705
1706	ecc_swap_digits(in: product->x, out: secret, ndigits);
1707
1708	err_validity:
1709	memzero_explicit(s: rand_z, count: sizeof(rand_z));
1710	ecc_free_point(product);
1711	err_alloc_product:
1712	ecc_free_point(pk);
1713	out:
1714	return ret;
1715	}
1716	EXPORT_SYMBOL(crypto_ecdh_shared_secret);
1717
1718	MODULE_DESCRIPTION("core elliptic curve module");
1719	MODULE_LICENSE("Dual BSD/GPL");
1720

source code of linux/crypto/ecc.c