curve25519-fiat32.c source code [linux/lib/crypto/curve25519-fiat32.c]

1	// SPDX-License-Identifier: GPL-2.0 OR MIT
2	/*
3	* Copyright (C) 2015-2016 The fiat-crypto Authors.
4	* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5	*
6	* This is a machine-generated formally verified implementation of Curve25519
7	* ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
8	* machine generated, it has been tweaked to be suitable for use in the kernel.
9	* It is optimized for 32-bit machines and machines that cannot work efficiently
10	* with 128-bit integer types.
11	*/
12
13	#include <asm/unaligned.h>
14	#include <crypto/curve25519.h>
15	#include <linux/string.h>
16
17	/ fe means field element. Here the field is \Z/(2^255-19). An element t,*
18	* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
19	* t[3]+2^102 t[4]+...+2^230 t[9].
20	* fe limbs are bounded by 1.1252^26,1.1252^25,1.1252^26,1.1252^25,etc.
21	* Multiplication and carrying produce fe from fe_loose.
22	*/
23	typedef struct fe { u32 v[`10`]; } fe;
24
25	/ fe_loose limbs are bounded by 3.3752^26,3.3752^25,3.3752^26,3.3752^25,etc*
26	* Addition and subtraction produce fe_loose from (fe, fe).
27	*/
28	typedef struct fe_loose { u32 v[`10`]; } fe_loose;
29
30	static __always_inline void fe_frombytes_impl(u32 h[`10`], const u8 *s)
31	{
32	/ Ignores top bit of s. /
33	u32 a0 = get_unaligned_le32(p: s);
34	u32 a1 = get_unaligned_le32(p: s+`4`);
35	u32 a2 = get_unaligned_le32(p: s+`8`);
36	u32 a3 = get_unaligned_le32(p: s+`12`);
37	u32 a4 = get_unaligned_le32(p: s+`16`);
38	u32 a5 = get_unaligned_le32(p: s+`20`);
39	u32 a6 = get_unaligned_le32(p: s+`24`);
40	u32 a7 = get_unaligned_le32(p: s+`28`);
41	h[`0`] = a0&((`1`<<`26`)-`1`); / 26 used, 32-26 left. 26 /
42	h[`1`] = (a0>>`26`) \| ((a1&((`1`<<`19`)-`1`))<< `6`); / (32-26) + 19 = 6+19 = 25 /
43	h[`2`] = (a1>>`19`) \| ((a2&((`1`<<`13`)-`1`))<<`13`); / (32-19) + 13 = 13+13 = 26 /
44	h[`3`] = (a2>>`13`) \| ((a3&((`1`<< `6`)-`1`))<<`19`); / (32-13) + 6 = 19+ 6 = 25 /
45	h[`4`] = (a3>> `6`); / (32- 6) = 26 /
46	h[`5`] = a4&((`1`<<`25`)-`1`); / 25 /
47	h[`6`] = (a4>>`25`) \| ((a5&((`1`<<`19`)-`1`))<< `7`); / (32-25) + 19 = 7+19 = 26 /
48	h[`7`] = (a5>>`19`) \| ((a6&((`1`<<`12`)-`1`))<<`13`); / (32-19) + 12 = 13+12 = 25 /
49	h[`8`] = (a6>>`12`) \| ((a7&((`1`<< `6`)-`1`))<<`20`); / (32-12) + 6 = 20+ 6 = 26 /
50	h[`9`] = (a7>> `6`)&((`1`<<`25`)-`1`); / 25 /
51	}
52
53	static __always_inline void fe_frombytes(fe h, const* u8 *s)
54	{
55	fe_frombytes_impl(h: h->v, s);
56	}
57
58	static __always_inline u8 /bool/
59	addcarryx_u25(u8 /bool/ c, u32 a, u32 b, u32 *low)
60	{
61	/ This function extracts 25 bits of result and 1 bit of carry*
62	* (26 total), so a 32-bit intermediate is sufficient.
63	*/
64	u32 x = a + b + c;
65	*low = x & ((`1` << `25`) - `1`);
66	return (x >> `25`) & `1`;
67	}
68
69	static __always_inline u8 /bool/
70	addcarryx_u26(u8 /bool/ c, u32 a, u32 b, u32 *low)
71	{
72	/ This function extracts 26 bits of result and 1 bit of carry*
73	* (27 total), so a 32-bit intermediate is sufficient.
74	*/
75	u32 x = a + b + c;
76	*low = x & ((`1` << `26`) - `1`);
77	return (x >> `26`) & `1`;
78	}
79
80	static __always_inline u8 /bool/
81	subborrow_u25(u8 /bool/ c, u32 a, u32 b, u32 *low)
82	{
83	/ This function extracts 25 bits of result and 1 bit of borrow*
84	* (26 total), so a 32-bit intermediate is sufficient.
85	*/
86	u32 x = a - b - c;
87	*low = x & ((`1` << `25`) - `1`);
88	return x >> `31`;
89	}
90
91	static __always_inline u8 /bool/
92	subborrow_u26(u8 /bool/ c, u32 a, u32 b, u32 *low)
93	{
94	/ This function extracts 26 bits of result and 1 bit of borrow*
95	*(27 total), so a 32-bit intermediate is sufficient.
96	*/
97	u32 x = a - b - c;
98	*low = x & ((`1` << `26`) - `1`);
99	return x >> `31`;
100	}
101
102	static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
103	{
104	t = -!!t; / all set if nonzero, 0 if 0 /
105	return (t&nz) \| ((~t)&z);
106	}
107
108	static __always_inline void fe_freeze(u32 out[`10`], const u32 in1[`10`])
109	{
110	{ const u32 x17 = in1[`9`];
111	{ const u32 x18 = in1[`8`];
112	{ const u32 x16 = in1[`7`];
113	{ const u32 x14 = in1[`6`];
114	{ const u32 x12 = in1[`5`];
115	{ const u32 x10 = in1[`4`];
116	{ const u32 x8 = in1[`3`];
117	{ const u32 x6 = in1[`2`];
118	{ const u32 x4 = in1[`1`];
119	{ const u32 x2 = in1[`0`];
120	{ u32 x20; u8/bool/ x21 = subborrow_u26(c: `0x0`, a: x2, b: `0x3ffffed`, low: &x20);
121	{ u32 x23; u8/bool/ x24 = subborrow_u25(c: x21, a: x4, b: `0x1ffffff`, low: &x23);
122	{ u32 x26; u8/bool/ x27 = subborrow_u26(c: x24, a: x6, b: `0x3ffffff`, low: &x26);
123	{ u32 x29; u8/bool/ x30 = subborrow_u25(c: x27, a: x8, b: `0x1ffffff`, low: &x29);
124	{ u32 x32; u8/bool/ x33 = subborrow_u26(c: x30, a: x10, b: `0x3ffffff`, low: &x32);
125	{ u32 x35; u8/bool/ x36 = subborrow_u25(c: x33, a: x12, b: `0x1ffffff`, low: &x35);
126	{ u32 x38; u8/bool/ x39 = subborrow_u26(c: x36, a: x14, b: `0x3ffffff`, low: &x38);
127	{ u32 x41; u8/bool/ x42 = subborrow_u25(c: x39, a: x16, b: `0x1ffffff`, low: &x41);
128	{ u32 x44; u8/bool/ x45 = subborrow_u26(c: x42, a: x18, b: `0x3ffffff`, low: &x44);
129	{ u32 x47; u8/bool/ x48 = subborrow_u25(c: x45, a: x17, b: `0x1ffffff`, low: &x47);
130	{ u32 x49 = cmovznz32(t: x48, z: `0x0`, nz: `0xffffffff`);
131	{ u32 x50 = (x49 & `0x3ffffed`);
132	{ u32 x52; u8/bool/ x53 = addcarryx_u26(c: `0x0`, a: x20, b: x50, low: &x52);
133	{ u32 x54 = (x49 & `0x1ffffff`);
134	{ u32 x56; u8/bool/ x57 = addcarryx_u25(c: x53, a: x23, b: x54, low: &x56);
135	{ u32 x58 = (x49 & `0x3ffffff`);
136	{ u32 x60; u8/bool/ x61 = addcarryx_u26(c: x57, a: x26, b: x58, low: &x60);
137	{ u32 x62 = (x49 & `0x1ffffff`);
138	{ u32 x64; u8/bool/ x65 = addcarryx_u25(c: x61, a: x29, b: x62, low: &x64);
139	{ u32 x66 = (x49 & `0x3ffffff`);
140	{ u32 x68; u8/bool/ x69 = addcarryx_u26(c: x65, a: x32, b: x66, low: &x68);
141	{ u32 x70 = (x49 & `0x1ffffff`);
142	{ u32 x72; u8/bool/ x73 = addcarryx_u25(c: x69, a: x35, b: x70, low: &x72);
143	{ u32 x74 = (x49 & `0x3ffffff`);
144	{ u32 x76; u8/bool/ x77 = addcarryx_u26(c: x73, a: x38, b: x74, low: &x76);
145	{ u32 x78 = (x49 & `0x1ffffff`);
146	{ u32 x80; u8/bool/ x81 = addcarryx_u25(c: x77, a: x41, b: x78, low: &x80);
147	{ u32 x82 = (x49 & `0x3ffffff`);
148	{ u32 x84; u8/bool/ x85 = addcarryx_u26(c: x81, a: x44, b: x82, low: &x84);
149	{ u32 x86 = (x49 & `0x1ffffff`);
150	{ u32 x88; addcarryx_u25(c: x85, a: x47, b: x86, low: &x88);
151	out[`0`] = x52;
152	out[`1`] = x56;
153	out[`2`] = x60;
154	out[`3`] = x64;
155	out[`4`] = x68;
156	out[`5`] = x72;
157	out[`6`] = x76;
158	out[`7`] = x80;
159	out[`8`] = x84;
160	out[`9`] = x88;
161	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
162	}
163
164	static __always_inline void fe_tobytes(u8 s[`32`], const fe *f)
165	{
166	u32 h[`10`];
167	fe_freeze(out: h, in1: f->v);
168	s[`0`] = h[`0`] >> `0`;
169	s[`1`] = h[`0`] >> `8`;
170	s[`2`] = h[`0`] >> `16`;
171	s[`3`] = (h[`0`] >> `24`) \| (h[`1`] << `2`);
172	s[`4`] = h[`1`] >> `6`;
173	s[`5`] = h[`1`] >> `14`;
174	s[`6`] = (h[`1`] >> `22`) \| (h[`2`] << `3`);
175	s[`7`] = h[`2`] >> `5`;
176	s[`8`] = h[`2`] >> `13`;
177	s[`9`] = (h[`2`] >> `21`) \| (h[`3`] << `5`);
178	s[`10`] = h[`3`] >> `3`;
179	s[`11`] = h[`3`] >> `11`;
180	s[`12`] = (h[`3`] >> `19`) \| (h[`4`] << `6`);
181	s[`13`] = h[`4`] >> `2`;
182	s[`14`] = h[`4`] >> `10`;
183	s[`15`] = h[`4`] >> `18`;
184	s[`16`] = h[`5`] >> `0`;
185	s[`17`] = h[`5`] >> `8`;
186	s[`18`] = h[`5`] >> `16`;
187	s[`19`] = (h[`5`] >> `24`) \| (h[`6`] << `1`);
188	s[`20`] = h[`6`] >> `7`;
189	s[`21`] = h[`6`] >> `15`;
190	s[`22`] = (h[`6`] >> `23`) \| (h[`7`] << `3`);
191	s[`23`] = h[`7`] >> `5`;
192	s[`24`] = h[`7`] >> `13`;
193	s[`25`] = (h[`7`] >> `21`) \| (h[`8`] << `4`);
194	s[`26`] = h[`8`] >> `4`;
195	s[`27`] = h[`8`] >> `12`;
196	s[`28`] = (h[`8`] >> `20`) \| (h[`9`] << `6`);
197	s[`29`] = h[`9`] >> `2`;
198	s[`30`] = h[`9`] >> `10`;
199	s[`31`] = h[`9`] >> `18`;
200	}
201
202	/ h = f /
203	static __always_inline void fe_copy(fe h, const* fe *f)
204	{
205	memmove(h, f, sizeof(u32) * `10`);
206	}
207
208	static __always_inline void fe_copy_lt(fe_loose h, const* fe *f)
209	{
210	memmove(h, f, sizeof(u32) * `10`);
211	}
212
213	/ h = 0 /
214	static __always_inline void fe_0(fe *h)
215	{
216	memset(h, `0`, sizeof(u32) * `10`);
217	}
218
219	/ h = 1 /
220	static __always_inline void fe_1(fe *h)
221	{
222	memset(h, `0`, sizeof(u32) * `10`);
223	h->v[`0`] = `1`;
224	}
225
226	static noinline void fe_add_impl(u32 out[`10`], const u32 in1[`10`], const u32 in2[`10`])
227	{
228	{ const u32 x20 = in1[`9`];
229	{ const u32 x21 = in1[`8`];
230	{ const u32 x19 = in1[`7`];
231	{ const u32 x17 = in1[`6`];
232	{ const u32 x15 = in1[`5`];
233	{ const u32 x13 = in1[`4`];
234	{ const u32 x11 = in1[`3`];
235	{ const u32 x9 = in1[`2`];
236	{ const u32 x7 = in1[`1`];
237	{ const u32 x5 = in1[`0`];
238	{ const u32 x38 = in2[`9`];
239	{ const u32 x39 = in2[`8`];
240	{ const u32 x37 = in2[`7`];
241	{ const u32 x35 = in2[`6`];
242	{ const u32 x33 = in2[`5`];
243	{ const u32 x31 = in2[`4`];
244	{ const u32 x29 = in2[`3`];
245	{ const u32 x27 = in2[`2`];
246	{ const u32 x25 = in2[`1`];
247	{ const u32 x23 = in2[`0`];
248	out[`0`] = (x5 + x23);
249	out[`1`] = (x7 + x25);
250	out[`2`] = (x9 + x27);
251	out[`3`] = (x11 + x29);
252	out[`4`] = (x13 + x31);
253	out[`5`] = (x15 + x33);
254	out[`6`] = (x17 + x35);
255	out[`7`] = (x19 + x37);
256	out[`8`] = (x21 + x39);
257	out[`9`] = (x20 + x38);
258	}}}}}}}}}}}}}}}}}}}}
259	}
260
261	/ h = f + g*
262	* Can overlap h with f or g.
263	*/
264	static __always_inline void fe_add(fe_loose h, const* fe f, const* fe *g)
265	{
266	fe_add_impl(out: h->v, in1: f->v, in2: g->v);
267	}
268
269	static noinline void fe_sub_impl(u32 out[`10`], const u32 in1[`10`], const u32 in2[`10`])
270	{
271	{ const u32 x20 = in1[`9`];
272	{ const u32 x21 = in1[`8`];
273	{ const u32 x19 = in1[`7`];
274	{ const u32 x17 = in1[`6`];
275	{ const u32 x15 = in1[`5`];
276	{ const u32 x13 = in1[`4`];
277	{ const u32 x11 = in1[`3`];
278	{ const u32 x9 = in1[`2`];
279	{ const u32 x7 = in1[`1`];
280	{ const u32 x5 = in1[`0`];
281	{ const u32 x38 = in2[`9`];
282	{ const u32 x39 = in2[`8`];
283	{ const u32 x37 = in2[`7`];
284	{ const u32 x35 = in2[`6`];
285	{ const u32 x33 = in2[`5`];
286	{ const u32 x31 = in2[`4`];
287	{ const u32 x29 = in2[`3`];
288	{ const u32 x27 = in2[`2`];
289	{ const u32 x25 = in2[`1`];
290	{ const u32 x23 = in2[`0`];
291	out[`0`] = ((`0x7ffffda` + x5) - x23);
292	out[`1`] = ((`0x3fffffe` + x7) - x25);
293	out[`2`] = ((`0x7fffffe` + x9) - x27);
294	out[`3`] = ((`0x3fffffe` + x11) - x29);
295	out[`4`] = ((`0x7fffffe` + x13) - x31);
296	out[`5`] = ((`0x3fffffe` + x15) - x33);
297	out[`6`] = ((`0x7fffffe` + x17) - x35);
298	out[`7`] = ((`0x3fffffe` + x19) - x37);
299	out[`8`] = ((`0x7fffffe` + x21) - x39);
300	out[`9`] = ((`0x3fffffe` + x20) - x38);
301	}}}}}}}}}}}}}}}}}}}}
302	}
303
304	/ h = f - g*
305	* Can overlap h with f or g.
306	*/
307	static __always_inline void fe_sub(fe_loose h, const* fe f, const* fe *g)
308	{
309	fe_sub_impl(out: h->v, in1: f->v, in2: g->v);
310	}
311
312	static noinline void fe_mul_impl(u32 out[`10`], const u32 in1[`10`], const u32 in2[`10`])
313	{
314	{ const u32 x20 = in1[`9`];
315	{ const u32 x21 = in1[`8`];
316	{ const u32 x19 = in1[`7`];
317	{ const u32 x17 = in1[`6`];
318	{ const u32 x15 = in1[`5`];
319	{ const u32 x13 = in1[`4`];
320	{ const u32 x11 = in1[`3`];
321	{ const u32 x9 = in1[`2`];
322	{ const u32 x7 = in1[`1`];
323	{ const u32 x5 = in1[`0`];
324	{ const u32 x38 = in2[`9`];
325	{ const u32 x39 = in2[`8`];
326	{ const u32 x37 = in2[`7`];
327	{ const u32 x35 = in2[`6`];
328	{ const u32 x33 = in2[`5`];
329	{ const u32 x31 = in2[`4`];
330	{ const u32 x29 = in2[`3`];
331	{ const u32 x27 = in2[`2`];
332	{ const u32 x25 = in2[`1`];
333	{ const u32 x23 = in2[`0`];
334	{ u64 x40 = ((u64)x23 * x5);
335	{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
336	{ u64 x42 = ((((u64)(`0x2` * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
337	{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
338	{ u64 x44 = (((((u64)x27 * x9) + (`0x2` * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
339	{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
340	{ u64 x46 = (((((`0x2` * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
341	{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
342	{ u64 x48 = (((((((u64)x31 * x13) + (`0x2` * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
343	{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
344	{ u64 x50 = (((((`0x2` * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
345	{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
346	{ u64 x52 = (((((u64)x35 * x17) + (`0x2` * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
347	{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
348	{ u64 x54 = (((`0x2` * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
349	{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
350	{ u64 x56 = (((u64)x39 * x21) + (`0x2` * (((u64)x37 * x20) + ((u64)x38 * x19))));
351	{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
352	{ u64 x58 = ((u64)(`0x2` * x38) * x20);
353	{ u64 x59 = (x48 + (x58 << `0x4`));
354	{ u64 x60 = (x59 + (x58 << `0x1`));
355	{ u64 x61 = (x60 + x58);
356	{ u64 x62 = (x47 + (x57 << `0x4`));
357	{ u64 x63 = (x62 + (x57 << `0x1`));
358	{ u64 x64 = (x63 + x57);
359	{ u64 x65 = (x46 + (x56 << `0x4`));
360	{ u64 x66 = (x65 + (x56 << `0x1`));
361	{ u64 x67 = (x66 + x56);
362	{ u64 x68 = (x45 + (x55 << `0x4`));
363	{ u64 x69 = (x68 + (x55 << `0x1`));
364	{ u64 x70 = (x69 + x55);
365	{ u64 x71 = (x44 + (x54 << `0x4`));
366	{ u64 x72 = (x71 + (x54 << `0x1`));
367	{ u64 x73 = (x72 + x54);
368	{ u64 x74 = (x43 + (x53 << `0x4`));
369	{ u64 x75 = (x74 + (x53 << `0x1`));
370	{ u64 x76 = (x75 + x53);
371	{ u64 x77 = (x42 + (x52 << `0x4`));
372	{ u64 x78 = (x77 + (x52 << `0x1`));
373	{ u64 x79 = (x78 + x52);
374	{ u64 x80 = (x41 + (x51 << `0x4`));
375	{ u64 x81 = (x80 + (x51 << `0x1`));
376	{ u64 x82 = (x81 + x51);
377	{ u64 x83 = (x40 + (x50 << `0x4`));
378	{ u64 x84 = (x83 + (x50 << `0x1`));
379	{ u64 x85 = (x84 + x50);
380	{ u64 x86 = (x85 >> `0x1a`);
381	{ u32 x87 = ((u32)x85 & `0x3ffffff`);
382	{ u64 x88 = (x86 + x82);
383	{ u64 x89 = (x88 >> `0x19`);
384	{ u32 x90 = ((u32)x88 & `0x1ffffff`);
385	{ u64 x91 = (x89 + x79);
386	{ u64 x92 = (x91 >> `0x1a`);
387	{ u32 x93 = ((u32)x91 & `0x3ffffff`);
388	{ u64 x94 = (x92 + x76);
389	{ u64 x95 = (x94 >> `0x19`);
390	{ u32 x96 = ((u32)x94 & `0x1ffffff`);
391	{ u64 x97 = (x95 + x73);
392	{ u64 x98 = (x97 >> `0x1a`);
393	{ u32 x99 = ((u32)x97 & `0x3ffffff`);
394	{ u64 x100 = (x98 + x70);
395	{ u64 x101 = (x100 >> `0x19`);
396	{ u32 x102 = ((u32)x100 & `0x1ffffff`);
397	{ u64 x103 = (x101 + x67);
398	{ u64 x104 = (x103 >> `0x1a`);
399	{ u32 x105 = ((u32)x103 & `0x3ffffff`);
400	{ u64 x106 = (x104 + x64);
401	{ u64 x107 = (x106 >> `0x19`);
402	{ u32 x108 = ((u32)x106 & `0x1ffffff`);
403	{ u64 x109 = (x107 + x61);
404	{ u64 x110 = (x109 >> `0x1a`);
405	{ u32 x111 = ((u32)x109 & `0x3ffffff`);
406	{ u64 x112 = (x110 + x49);
407	{ u64 x113 = (x112 >> `0x19`);
408	{ u32 x114 = ((u32)x112 & `0x1ffffff`);
409	{ u64 x115 = (x87 + (`0x13` * x113));
410	{ u32 x116 = (u32) (x115 >> `0x1a`);
411	{ u32 x117 = ((u32)x115 & `0x3ffffff`);
412	{ u32 x118 = (x116 + x90);
413	{ u32 x119 = (x118 >> `0x19`);
414	{ u32 x120 = (x118 & `0x1ffffff`);
415	out[`0`] = x117;
416	out[`1`] = x120;
417	out[`2`] = (x119 + x93);
418	out[`3`] = x96;
419	out[`4`] = x99;
420	out[`5`] = x102;
421	out[`6`] = x105;
422	out[`7`] = x108;
423	out[`8`] = x111;
424	out[`9`] = x114;
425	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
426	}
427
428	static __always_inline void fe_mul_ttt(fe h, const* fe f, const* fe *g)
429	{
430	fe_mul_impl(out: h->v, in1: f->v, in2: g->v);
431	}
432
433	static __always_inline void fe_mul_tlt(fe h, const* fe_loose f, const* fe *g)
434	{
435	fe_mul_impl(out: h->v, in1: f->v, in2: g->v);
436	}
437
438	static __always_inline void
439	fe_mul_tll(fe h, const* fe_loose f, const* fe_loose *g)
440	{
441	fe_mul_impl(out: h->v, in1: f->v, in2: g->v);
442	}
443
444	static noinline void fe_sqr_impl(u32 out[`10`], const u32 in1[`10`])
445	{
446	{ const u32 x17 = in1[`9`];
447	{ const u32 x18 = in1[`8`];
448	{ const u32 x16 = in1[`7`];
449	{ const u32 x14 = in1[`6`];
450	{ const u32 x12 = in1[`5`];
451	{ const u32 x10 = in1[`4`];
452	{ const u32 x8 = in1[`3`];
453	{ const u32 x6 = in1[`2`];
454	{ const u32 x4 = in1[`1`];
455	{ const u32 x2 = in1[`0`];
456	{ u64 x19 = ((u64)x2 * x2);
457	{ u64 x20 = ((u64)(`0x2` * x2) * x4);
458	{ u64 x21 = (`0x2` * (((u64)x4 * x4) + ((u64)x2 * x6)));
459	{ u64 x22 = (`0x2` * (((u64)x4 * x6) + ((u64)x2 * x8)));
460	{ u64 x23 = ((((u64)x6 * x6) + ((u64)(`0x4` * x4) * x8)) + ((u64)(`0x2` * x2) * x10));
461	{ u64 x24 = (`0x2` * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
462	{ u64 x25 = (`0x2` * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(`0x2` * x4) * x12)));
463	{ u64 x26 = (`0x2` * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
464	{ u64 x27 = (((u64)x10 * x10) + (`0x2` * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (`0x2` * (((u64)x4 * x16) + ((u64)x8 * x12))))));
465	{ u64 x28 = (`0x2` * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
466	{ u64 x29 = (`0x2` * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (`0x2` * (((u64)x8 * x16) + ((u64)x4 * x17)))));
467	{ u64 x30 = (`0x2` * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
468	{ u64 x31 = (((u64)x14 * x14) + (`0x2` * (((u64)x10 * x18) + (`0x2` * (((u64)x12 * x16) + ((u64)x8 * x17))))));
469	{ u64 x32 = (`0x2` * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
470	{ u64 x33 = (`0x2` * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(`0x2` * x12) * x17)));
471	{ u64 x34 = (`0x2` * (((u64)x16 * x18) + ((u64)x14 * x17)));
472	{ u64 x35 = (((u64)x18 * x18) + ((u64)(`0x4` * x16) * x17));
473	{ u64 x36 = ((u64)(`0x2` * x18) * x17);
474	{ u64 x37 = ((u64)(`0x2` * x17) * x17);
475	{ u64 x38 = (x27 + (x37 << `0x4`));
476	{ u64 x39 = (x38 + (x37 << `0x1`));
477	{ u64 x40 = (x39 + x37);
478	{ u64 x41 = (x26 + (x36 << `0x4`));
479	{ u64 x42 = (x41 + (x36 << `0x1`));
480	{ u64 x43 = (x42 + x36);
481	{ u64 x44 = (x25 + (x35 << `0x4`));
482	{ u64 x45 = (x44 + (x35 << `0x1`));
483	{ u64 x46 = (x45 + x35);
484	{ u64 x47 = (x24 + (x34 << `0x4`));
485	{ u64 x48 = (x47 + (x34 << `0x1`));
486	{ u64 x49 = (x48 + x34);
487	{ u64 x50 = (x23 + (x33 << `0x4`));
488	{ u64 x51 = (x50 + (x33 << `0x1`));
489	{ u64 x52 = (x51 + x33);
490	{ u64 x53 = (x22 + (x32 << `0x4`));
491	{ u64 x54 = (x53 + (x32 << `0x1`));
492	{ u64 x55 = (x54 + x32);
493	{ u64 x56 = (x21 + (x31 << `0x4`));
494	{ u64 x57 = (x56 + (x31 << `0x1`));
495	{ u64 x58 = (x57 + x31);
496	{ u64 x59 = (x20 + (x30 << `0x4`));
497	{ u64 x60 = (x59 + (x30 << `0x1`));
498	{ u64 x61 = (x60 + x30);
499	{ u64 x62 = (x19 + (x29 << `0x4`));
500	{ u64 x63 = (x62 + (x29 << `0x1`));
501	{ u64 x64 = (x63 + x29);
502	{ u64 x65 = (x64 >> `0x1a`);
503	{ u32 x66 = ((u32)x64 & `0x3ffffff`);
504	{ u64 x67 = (x65 + x61);
505	{ u64 x68 = (x67 >> `0x19`);
506	{ u32 x69 = ((u32)x67 & `0x1ffffff`);
507	{ u64 x70 = (x68 + x58);
508	{ u64 x71 = (x70 >> `0x1a`);
509	{ u32 x72 = ((u32)x70 & `0x3ffffff`);
510	{ u64 x73 = (x71 + x55);
511	{ u64 x74 = (x73 >> `0x19`);
512	{ u32 x75 = ((u32)x73 & `0x1ffffff`);
513	{ u64 x76 = (x74 + x52);
514	{ u64 x77 = (x76 >> `0x1a`);
515	{ u32 x78 = ((u32)x76 & `0x3ffffff`);
516	{ u64 x79 = (x77 + x49);
517	{ u64 x80 = (x79 >> `0x19`);
518	{ u32 x81 = ((u32)x79 & `0x1ffffff`);
519	{ u64 x82 = (x80 + x46);
520	{ u64 x83 = (x82 >> `0x1a`);
521	{ u32 x84 = ((u32)x82 & `0x3ffffff`);
522	{ u64 x85 = (x83 + x43);
523	{ u64 x86 = (x85 >> `0x19`);
524	{ u32 x87 = ((u32)x85 & `0x1ffffff`);
525	{ u64 x88 = (x86 + x40);
526	{ u64 x89 = (x88 >> `0x1a`);
527	{ u32 x90 = ((u32)x88 & `0x3ffffff`);
528	{ u64 x91 = (x89 + x28);
529	{ u64 x92 = (x91 >> `0x19`);
530	{ u32 x93 = ((u32)x91 & `0x1ffffff`);
531	{ u64 x94 = (x66 + (`0x13` * x92));
532	{ u32 x95 = (u32) (x94 >> `0x1a`);
533	{ u32 x96 = ((u32)x94 & `0x3ffffff`);
534	{ u32 x97 = (x95 + x69);
535	{ u32 x98 = (x97 >> `0x19`);
536	{ u32 x99 = (x97 & `0x1ffffff`);
537	out[`0`] = x96;
538	out[`1`] = x99;
539	out[`2`] = (x98 + x72);
540	out[`3`] = x75;
541	out[`4`] = x78;
542	out[`5`] = x81;
543	out[`6`] = x84;
544	out[`7`] = x87;
545	out[`8`] = x90;
546	out[`9`] = x93;
547	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
548	}
549
550	static __always_inline void fe_sq_tl(fe h, const* fe_loose *f)
551	{
552	fe_sqr_impl(out: h->v, in1: f->v);
553	}
554
555	static __always_inline void fe_sq_tt(fe h, const* fe *f)
556	{
557	fe_sqr_impl(out: h->v, in1: f->v);
558	}
559
560	static __always_inline void fe_loose_invert(fe out, const* fe_loose *z)
561	{
562	fe t0;
563	fe t1;
564	fe t2;
565	fe t3;
566	int i;
567
568	fe_sq_tl(h: &t0, f: z);
569	fe_sq_tt(h: &t1, f: &t0);
570	for (i = `1`; i < `2`; ++i)
571	fe_sq_tt(h: &t1, f: &t1);
572	fe_mul_tlt(h: &t1, f: z, g: &t1);
573	fe_mul_ttt(h: &t0, f: &t0, g: &t1);
574	fe_sq_tt(h: &t2, f: &t0);
575	fe_mul_ttt(h: &t1, f: &t1, g: &t2);
576	fe_sq_tt(h: &t2, f: &t1);
577	for (i = `1`; i < `5`; ++i)
578	fe_sq_tt(h: &t2, f: &t2);
579	fe_mul_ttt(h: &t1, f: &t2, g: &t1);
580	fe_sq_tt(h: &t2, f: &t1);
581	for (i = `1`; i < `10`; ++i)
582	fe_sq_tt(h: &t2, f: &t2);
583	fe_mul_ttt(h: &t2, f: &t2, g: &t1);
584	fe_sq_tt(h: &t3, f: &t2);
585	for (i = `1`; i < `20`; ++i)
586	fe_sq_tt(h: &t3, f: &t3);
587	fe_mul_ttt(h: &t2, f: &t3, g: &t2);
588	fe_sq_tt(h: &t2, f: &t2);
589	for (i = `1`; i < `10`; ++i)
590	fe_sq_tt(h: &t2, f: &t2);
591	fe_mul_ttt(h: &t1, f: &t2, g: &t1);
592	fe_sq_tt(h: &t2, f: &t1);
593	for (i = `1`; i < `50`; ++i)
594	fe_sq_tt(h: &t2, f: &t2);
595	fe_mul_ttt(h: &t2, f: &t2, g: &t1);
596	fe_sq_tt(h: &t3, f: &t2);
597	for (i = `1`; i < `100`; ++i)
598	fe_sq_tt(h: &t3, f: &t3);
599	fe_mul_ttt(h: &t2, f: &t3, g: &t2);
600	fe_sq_tt(h: &t2, f: &t2);
601	for (i = `1`; i < `50`; ++i)
602	fe_sq_tt(h: &t2, f: &t2);
603	fe_mul_ttt(h: &t1, f: &t2, g: &t1);
604	fe_sq_tt(h: &t1, f: &t1);
605	for (i = `1`; i < `5`; ++i)
606	fe_sq_tt(h: &t1, f: &t1);
607	fe_mul_ttt(h: out, f: &t1, g: &t0);
608	}
609
610	static __always_inline void fe_invert(fe out, const* fe *z)
611	{
612	fe_loose l;
613	fe_copy_lt(h: &l, f: z);
614	fe_loose_invert(out, z: &l);
615	}
616
617	/ Replace (f,g) with (g,f) if b == 1;*
618	* replace (f,g) with (f,g) if b == 0.
619	*
620	* Preconditions: b in {0,1}
621	*/
622	static noinline void fe_cswap(fe f, fe g, unsigned int b)
623	{
624	unsigned i;
625	b = `0` - b;
626	for (i = `0`; i < `10`; i++) {
627	u32 x = f->v[i] ^ g->v[i];
628	x &= b;
629	f->v[i] ^= x;
630	g->v[i] ^= x;
631	}
632	}
633
634	/ NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0./
635	static __always_inline void fe_mul_121666_impl(u32 out[`10`], const u32 in1[`10`])
636	{
637	{ const u32 x20 = in1[`9`];
638	{ const u32 x21 = in1[`8`];
639	{ const u32 x19 = in1[`7`];
640	{ const u32 x17 = in1[`6`];
641	{ const u32 x15 = in1[`5`];
642	{ const u32 x13 = in1[`4`];
643	{ const u32 x11 = in1[`3`];
644	{ const u32 x9 = in1[`2`];
645	{ const u32 x7 = in1[`1`];
646	{ const u32 x5 = in1[`0`];
647	{ const u32 x38 = `0`;
648	{ const u32 x39 = `0`;
649	{ const u32 x37 = `0`;
650	{ const u32 x35 = `0`;
651	{ const u32 x33 = `0`;
652	{ const u32 x31 = `0`;
653	{ const u32 x29 = `0`;
654	{ const u32 x27 = `0`;
655	{ const u32 x25 = `0`;
656	{ const u32 x23 = `121666`;
657	{ u64 x40 = ((u64)x23 * x5);
658	{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
659	{ u64 x42 = ((((u64)(`0x2` * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
660	{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
661	{ u64 x44 = (((((u64)x27 * x9) + (`0x2` * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
662	{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
663	{ u64 x46 = (((((`0x2` * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
664	{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
665	{ u64 x48 = (((((((u64)x31 * x13) + (`0x2` * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
666	{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
667	{ u64 x50 = (((((`0x2` * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
668	{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
669	{ u64 x52 = (((((u64)x35 * x17) + (`0x2` * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
670	{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
671	{ u64 x54 = (((`0x2` * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
672	{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
673	{ u64 x56 = (((u64)x39 * x21) + (`0x2` * (((u64)x37 * x20) + ((u64)x38 * x19))));
674	{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
675	{ u64 x58 = ((u64)(`0x2` * x38) * x20);
676	{ u64 x59 = (x48 + (x58 << `0x4`));
677	{ u64 x60 = (x59 + (x58 << `0x1`));
678	{ u64 x61 = (x60 + x58);
679	{ u64 x62 = (x47 + (x57 << `0x4`));
680	{ u64 x63 = (x62 + (x57 << `0x1`));
681	{ u64 x64 = (x63 + x57);
682	{ u64 x65 = (x46 + (x56 << `0x4`));
683	{ u64 x66 = (x65 + (x56 << `0x1`));
684	{ u64 x67 = (x66 + x56);
685	{ u64 x68 = (x45 + (x55 << `0x4`));
686	{ u64 x69 = (x68 + (x55 << `0x1`));
687	{ u64 x70 = (x69 + x55);
688	{ u64 x71 = (x44 + (x54 << `0x4`));
689	{ u64 x72 = (x71 + (x54 << `0x1`));
690	{ u64 x73 = (x72 + x54);
691	{ u64 x74 = (x43 + (x53 << `0x4`));
692	{ u64 x75 = (x74 + (x53 << `0x1`));
693	{ u64 x76 = (x75 + x53);
694	{ u64 x77 = (x42 + (x52 << `0x4`));
695	{ u64 x78 = (x77 + (x52 << `0x1`));
696	{ u64 x79 = (x78 + x52);
697	{ u64 x80 = (x41 + (x51 << `0x4`));
698	{ u64 x81 = (x80 + (x51 << `0x1`));
699	{ u64 x82 = (x81 + x51);
700	{ u64 x83 = (x40 + (x50 << `0x4`));
701	{ u64 x84 = (x83 + (x50 << `0x1`));
702	{ u64 x85 = (x84 + x50);
703	{ u64 x86 = (x85 >> `0x1a`);
704	{ u32 x87 = ((u32)x85 & `0x3ffffff`);
705	{ u64 x88 = (x86 + x82);
706	{ u64 x89 = (x88 >> `0x19`);
707	{ u32 x90 = ((u32)x88 & `0x1ffffff`);
708	{ u64 x91 = (x89 + x79);
709	{ u64 x92 = (x91 >> `0x1a`);
710	{ u32 x93 = ((u32)x91 & `0x3ffffff`);
711	{ u64 x94 = (x92 + x76);
712	{ u64 x95 = (x94 >> `0x19`);
713	{ u32 x96 = ((u32)x94 & `0x1ffffff`);
714	{ u64 x97 = (x95 + x73);
715	{ u64 x98 = (x97 >> `0x1a`);
716	{ u32 x99 = ((u32)x97 & `0x3ffffff`);
717	{ u64 x100 = (x98 + x70);
718	{ u64 x101 = (x100 >> `0x19`);
719	{ u32 x102 = ((u32)x100 & `0x1ffffff`);
720	{ u64 x103 = (x101 + x67);
721	{ u64 x104 = (x103 >> `0x1a`);
722	{ u32 x105 = ((u32)x103 & `0x3ffffff`);
723	{ u64 x106 = (x104 + x64);
724	{ u64 x107 = (x106 >> `0x19`);
725	{ u32 x108 = ((u32)x106 & `0x1ffffff`);
726	{ u64 x109 = (x107 + x61);
727	{ u64 x110 = (x109 >> `0x1a`);
728	{ u32 x111 = ((u32)x109 & `0x3ffffff`);
729	{ u64 x112 = (x110 + x49);
730	{ u64 x113 = (x112 >> `0x19`);
731	{ u32 x114 = ((u32)x112 & `0x1ffffff`);
732	{ u64 x115 = (x87 + (`0x13` * x113));
733	{ u32 x116 = (u32) (x115 >> `0x1a`);
734	{ u32 x117 = ((u32)x115 & `0x3ffffff`);
735	{ u32 x118 = (x116 + x90);
736	{ u32 x119 = (x118 >> `0x19`);
737	{ u32 x120 = (x118 & `0x1ffffff`);
738	out[`0`] = x117;
739	out[`1`] = x120;
740	out[`2`] = (x119 + x93);
741	out[`3`] = x96;
742	out[`4`] = x99;
743	out[`5`] = x102;
744	out[`6`] = x105;
745	out[`7`] = x108;
746	out[`8`] = x111;
747	out[`9`] = x114;
748	}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
749	}
750
751	static __always_inline void fe_mul121666(fe h, const* fe_loose *f)
752	{
753	fe_mul_121666_impl(out: h->v, in1: f->v);
754	}
755
756	void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
757	const u8 scalar[CURVE25519_KEY_SIZE],
758	const u8 point[CURVE25519_KEY_SIZE])
759	{
760	fe x1, x2, z2, x3, z3;
761	fe_loose x2l, z2l, x3l;
762	unsigned swap = `0`;
763	int pos;
764	u8 e[`32`];
765
766	memcpy(e, scalar, `32`);
767	curve25519_clamp_secret(secret: e);
768
769	/ The following implementation was transcribed to Coq and proven to*
770	* correspond to unary scalar multiplication in affine coordinates given
771	* that x1 != 0 is the x coordinate of some point on the curve. It was
772	* also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
773	* z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
774	* quantified over the underlying field, so it applies to Curve25519
775	* itself and the quadratic twist of Curve25519. It was not proven in
776	* Coq that prime-field arithmetic correctly simulates extension-field
777	* arithmetic on prime-field values. The decoding of the byte array
778	* representation of e was not considered.
779	*
780	* Specification of Montgomery curves in affine coordinates:
781	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
782	*
783	* Proof that these form a group that is isomorphic to a Weierstrass
784	* curve:
785	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
786	*
787	* Coq transcription and correctness proof of the loop
788	* (where scalarbits=255):
789	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
790	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
791	* preconditions: 0 <= e < 2^255 (not necessarily e < order),
792	* fe_invert(0) = 0
793	*/
794	fe_frombytes(h: &x1, s: point);
795	fe_1(h: &x2);
796	fe_0(h: &z2);
797	fe_copy(h: &x3, f: &x1);
798	fe_1(h: &z3);
799
800	for (pos = `254`; pos >= `0`; --pos) {
801	fe tmp0, tmp1;
802	fe_loose tmp0l, tmp1l;
803	/ loop invariant as of right before the test, for the case*
804	* where x1 != 0:
805	* pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
806	* is nonzero
807	* let r := e >> (pos+1) in the following equalities of
808	* projective points:
809	* to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
810	* to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
811	* x1 is the nonzero x coordinate of the nonzero
812	* point (rP-(r+1)P)
813	*/
814	unsigned b = `1` & (e[pos / `8`] >> (pos & `7`));
815	swap ^= b;
816	fe_cswap(f: &x2, g: &x3, b: swap);
817	fe_cswap(f: &z2, g: &z3, b: swap);
818	swap = b;
819	/ Coq transcription of ladderstep formula (called from*
820	* transcribed loop):
821	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
822	* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
823	* x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
824	* x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
825	*/
826	fe_sub(h: &tmp0l, f: &x3, g: &z3);
827	fe_sub(h: &tmp1l, f: &x2, g: &z2);
828	fe_add(h: &x2l, f: &x2, g: &z2);
829	fe_add(h: &z2l, f: &x3, g: &z3);
830	fe_mul_tll(h: &z3, f: &tmp0l, g: &x2l);
831	fe_mul_tll(h: &z2, f: &z2l, g: &tmp1l);
832	fe_sq_tl(h: &tmp0, f: &tmp1l);
833	fe_sq_tl(h: &tmp1, f: &x2l);
834	fe_add(h: &x3l, f: &z3, g: &z2);
835	fe_sub(h: &z2l, f: &z3, g: &z2);
836	fe_mul_ttt(h: &x2, f: &tmp1, g: &tmp0);
837	fe_sub(h: &tmp1l, f: &tmp1, g: &tmp0);
838	fe_sq_tl(h: &z2, f: &z2l);
839	fe_mul121666(h: &z3, f: &tmp1l);
840	fe_sq_tl(h: &x3, f: &x3l);
841	fe_add(h: &tmp0l, f: &tmp0, g: &z3);
842	fe_mul_ttt(h: &z3, f: &x1, g: &z2);
843	fe_mul_tll(h: &z2, f: &tmp1l, g: &tmp0l);
844	}
845	/ here pos=-1, so r=e, so to_xz (eP) === if swap then (x3, z3)
846	* else (x2, z2)
847	*/
848	fe_cswap(f: &x2, g: &x3, b: swap);
849	fe_cswap(f: &z2, g: &z3, b: swap);
850
851	fe_invert(out: &z2, z: &z2);
852	fe_mul_ttt(h: &x2, f: &x2, g: &z2);
853	fe_tobytes(s: out, f: &x2);
854
855	memzero_explicit(s: &x1, count: sizeof(x1));
856	memzero_explicit(s: &x2, count: sizeof(x2));
857	memzero_explicit(s: &z2, count: sizeof(z2));
858	memzero_explicit(s: &x3, count: sizeof(x3));
859	memzero_explicit(s: &z3, count: sizeof(z3));
860	memzero_explicit(s: &x2l, count: sizeof(x2l));
861	memzero_explicit(s: &z2l, count: sizeof(z2l));
862	memzero_explicit(s: &x3l, count: sizeof(x3l));
863	memzero_explicit(s: &e, count: sizeof(e));
864	}
865

source code of linux/lib/crypto/curve25519-fiat32.c