1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright (C) 2015-2016 The fiat-crypto Authors.
4 * Copyright (C) 2018-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
6 * This is a machine-generated formally verified implementation of Curve25519
7 * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
8 * machine generated, it has been tweaked to be suitable for use in the kernel.
9 * It is optimized for 32-bit machines and machines that cannot work efficiently
10 * with 128-bit integer types.
13 /* fe means field element. Here the field is \Z/(2^255-19). An element t,
14 * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
15 * t[3]+2^102 t[4]+...+2^230 t[9].
16 * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
17 * Multiplication and carrying produce fe from fe_loose.
19 typedef struct fe
{ u32 v
[10]; } fe
;
21 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
22 * Addition and subtraction produce fe_loose from (fe, fe).
24 typedef struct fe_loose
{ u32 v
[10]; } fe_loose
;
26 static __always_inline
void fe_frombytes_impl(u32 h
[10], const u8
*s
)
28 /* Ignores top bit of s. */
29 u32 a0
= get_unaligned_le32(s
);
30 u32 a1
= get_unaligned_le32(s
+4);
31 u32 a2
= get_unaligned_le32(s
+8);
32 u32 a3
= get_unaligned_le32(s
+12);
33 u32 a4
= get_unaligned_le32(s
+16);
34 u32 a5
= get_unaligned_le32(s
+20);
35 u32 a6
= get_unaligned_le32(s
+24);
36 u32 a7
= get_unaligned_le32(s
+28);
37 h
[0] = a0
&((1<<26)-1); /* 26 used, 32-26 left. 26 */
38 h
[1] = (a0
>>26) | ((a1
&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
39 h
[2] = (a1
>>19) | ((a2
&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
40 h
[3] = (a2
>>13) | ((a3
&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
41 h
[4] = (a3
>> 6); /* (32- 6) = 26 */
42 h
[5] = a4
&((1<<25)-1); /* 25 */
43 h
[6] = (a4
>>25) | ((a5
&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
44 h
[7] = (a5
>>19) | ((a6
&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
45 h
[8] = (a6
>>12) | ((a7
&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
46 h
[9] = (a7
>> 6)&((1<<25)-1); /* 25 */
49 static __always_inline
void fe_frombytes(fe
*h
, const u8
*s
)
51 fe_frombytes_impl(h
->v
, s
);
54 static __always_inline u8
/*bool*/
55 addcarryx_u25(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
57 /* This function extracts 25 bits of result and 1 bit of carry
58 * (26 total), so a 32-bit intermediate is sufficient.
61 *low
= x
& ((1 << 25) - 1);
65 static __always_inline u8
/*bool*/
66 addcarryx_u26(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
68 /* This function extracts 26 bits of result and 1 bit of carry
69 * (27 total), so a 32-bit intermediate is sufficient.
72 *low
= x
& ((1 << 26) - 1);
76 static __always_inline u8
/*bool*/
77 subborrow_u25(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
79 /* This function extracts 25 bits of result and 1 bit of borrow
80 * (26 total), so a 32-bit intermediate is sufficient.
83 *low
= x
& ((1 << 25) - 1);
87 static __always_inline u8
/*bool*/
88 subborrow_u26(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
90 /* This function extracts 26 bits of result and 1 bit of borrow
91 *(27 total), so a 32-bit intermediate is sufficient.
94 *low
= x
& ((1 << 26) - 1);
98 static __always_inline u32
cmovznz32(u32 t
, u32 z
, u32 nz
)
100 t
= -!!t
; /* all set if nonzero, 0 if 0 */
101 return (t
&nz
) | ((~t
)&z
);
104 static __always_inline
void fe_freeze(u32 out
[10], const u32 in1
[10])
106 { const u32 x17
= in1
[9];
107 { const u32 x18
= in1
[8];
108 { const u32 x16
= in1
[7];
109 { const u32 x14
= in1
[6];
110 { const u32 x12
= in1
[5];
111 { const u32 x10
= in1
[4];
112 { const u32 x8
= in1
[3];
113 { const u32 x6
= in1
[2];
114 { const u32 x4
= in1
[1];
115 { const u32 x2
= in1
[0];
116 { u32 x20
; u8
/*bool*/ x21
= subborrow_u26(0x0, x2
, 0x3ffffed, &x20
);
117 { u32 x23
; u8
/*bool*/ x24
= subborrow_u25(x21
, x4
, 0x1ffffff, &x23
);
118 { u32 x26
; u8
/*bool*/ x27
= subborrow_u26(x24
, x6
, 0x3ffffff, &x26
);
119 { u32 x29
; u8
/*bool*/ x30
= subborrow_u25(x27
, x8
, 0x1ffffff, &x29
);
120 { u32 x32
; u8
/*bool*/ x33
= subborrow_u26(x30
, x10
, 0x3ffffff, &x32
);
121 { u32 x35
; u8
/*bool*/ x36
= subborrow_u25(x33
, x12
, 0x1ffffff, &x35
);
122 { u32 x38
; u8
/*bool*/ x39
= subborrow_u26(x36
, x14
, 0x3ffffff, &x38
);
123 { u32 x41
; u8
/*bool*/ x42
= subborrow_u25(x39
, x16
, 0x1ffffff, &x41
);
124 { u32 x44
; u8
/*bool*/ x45
= subborrow_u26(x42
, x18
, 0x3ffffff, &x44
);
125 { u32 x47
; u8
/*bool*/ x48
= subborrow_u25(x45
, x17
, 0x1ffffff, &x47
);
126 { u32 x49
= cmovznz32(x48
, 0x0, 0xffffffff);
127 { u32 x50
= (x49
& 0x3ffffed);
128 { u32 x52
; u8
/*bool*/ x53
= addcarryx_u26(0x0, x20
, x50
, &x52
);
129 { u32 x54
= (x49
& 0x1ffffff);
130 { u32 x56
; u8
/*bool*/ x57
= addcarryx_u25(x53
, x23
, x54
, &x56
);
131 { u32 x58
= (x49
& 0x3ffffff);
132 { u32 x60
; u8
/*bool*/ x61
= addcarryx_u26(x57
, x26
, x58
, &x60
);
133 { u32 x62
= (x49
& 0x1ffffff);
134 { u32 x64
; u8
/*bool*/ x65
= addcarryx_u25(x61
, x29
, x62
, &x64
);
135 { u32 x66
= (x49
& 0x3ffffff);
136 { u32 x68
; u8
/*bool*/ x69
= addcarryx_u26(x65
, x32
, x66
, &x68
);
137 { u32 x70
= (x49
& 0x1ffffff);
138 { u32 x72
; u8
/*bool*/ x73
= addcarryx_u25(x69
, x35
, x70
, &x72
);
139 { u32 x74
= (x49
& 0x3ffffff);
140 { u32 x76
; u8
/*bool*/ x77
= addcarryx_u26(x73
, x38
, x74
, &x76
);
141 { u32 x78
= (x49
& 0x1ffffff);
142 { u32 x80
; u8
/*bool*/ x81
= addcarryx_u25(x77
, x41
, x78
, &x80
);
143 { u32 x82
= (x49
& 0x3ffffff);
144 { u32 x84
; u8
/*bool*/ x85
= addcarryx_u26(x81
, x44
, x82
, &x84
);
145 { u32 x86
= (x49
& 0x1ffffff);
146 { u32 x88
; addcarryx_u25(x85
, x47
, x86
, &x88
);
157 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
160 static __always_inline
void fe_tobytes(u8 s
[32], const fe
*f
)
167 s
[3] = (h
[0] >> 24) | (h
[1] << 2);
170 s
[6] = (h
[1] >> 22) | (h
[2] << 3);
173 s
[9] = (h
[2] >> 21) | (h
[3] << 5);
176 s
[12] = (h
[3] >> 19) | (h
[4] << 6);
183 s
[19] = (h
[5] >> 24) | (h
[6] << 1);
186 s
[22] = (h
[6] >> 23) | (h
[7] << 3);
189 s
[25] = (h
[7] >> 21) | (h
[8] << 4);
192 s
[28] = (h
[8] >> 20) | (h
[9] << 6);
199 static __always_inline
void fe_copy(fe
*h
, const fe
*f
)
201 memmove(h
, f
, sizeof(u32
) * 10);
204 static __always_inline
void fe_copy_lt(fe_loose
*h
, const fe
*f
)
206 memmove(h
, f
, sizeof(u32
) * 10);
210 static __always_inline
void fe_0(fe
*h
)
212 memset(h
, 0, sizeof(u32
) * 10);
216 static __always_inline
void fe_1(fe
*h
)
218 memset(h
, 0, sizeof(u32
) * 10);
222 static void fe_add_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
224 { const u32 x20
= in1
[9];
225 { const u32 x21
= in1
[8];
226 { const u32 x19
= in1
[7];
227 { const u32 x17
= in1
[6];
228 { const u32 x15
= in1
[5];
229 { const u32 x13
= in1
[4];
230 { const u32 x11
= in1
[3];
231 { const u32 x9
= in1
[2];
232 { const u32 x7
= in1
[1];
233 { const u32 x5
= in1
[0];
234 { const u32 x38
= in2
[9];
235 { const u32 x39
= in2
[8];
236 { const u32 x37
= in2
[7];
237 { const u32 x35
= in2
[6];
238 { const u32 x33
= in2
[5];
239 { const u32 x31
= in2
[4];
240 { const u32 x29
= in2
[3];
241 { const u32 x27
= in2
[2];
242 { const u32 x25
= in2
[1];
243 { const u32 x23
= in2
[0];
247 out
[3] = (x11
+ x29
);
248 out
[4] = (x13
+ x31
);
249 out
[5] = (x15
+ x33
);
250 out
[6] = (x17
+ x35
);
251 out
[7] = (x19
+ x37
);
252 out
[8] = (x21
+ x39
);
253 out
[9] = (x20
+ x38
);
258 * Can overlap h with f or g.
260 static __always_inline
void fe_add(fe_loose
*h
, const fe
*f
, const fe
*g
)
262 fe_add_impl(h
->v
, f
->v
, g
->v
);
265 static void fe_sub_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
267 { const u32 x20
= in1
[9];
268 { const u32 x21
= in1
[8];
269 { const u32 x19
= in1
[7];
270 { const u32 x17
= in1
[6];
271 { const u32 x15
= in1
[5];
272 { const u32 x13
= in1
[4];
273 { const u32 x11
= in1
[3];
274 { const u32 x9
= in1
[2];
275 { const u32 x7
= in1
[1];
276 { const u32 x5
= in1
[0];
277 { const u32 x38
= in2
[9];
278 { const u32 x39
= in2
[8];
279 { const u32 x37
= in2
[7];
280 { const u32 x35
= in2
[6];
281 { const u32 x33
= in2
[5];
282 { const u32 x31
= in2
[4];
283 { const u32 x29
= in2
[3];
284 { const u32 x27
= in2
[2];
285 { const u32 x25
= in2
[1];
286 { const u32 x23
= in2
[0];
287 out
[0] = ((0x7ffffda + x5
) - x23
);
288 out
[1] = ((0x3fffffe + x7
) - x25
);
289 out
[2] = ((0x7fffffe + x9
) - x27
);
290 out
[3] = ((0x3fffffe + x11
) - x29
);
291 out
[4] = ((0x7fffffe + x13
) - x31
);
292 out
[5] = ((0x3fffffe + x15
) - x33
);
293 out
[6] = ((0x7fffffe + x17
) - x35
);
294 out
[7] = ((0x3fffffe + x19
) - x37
);
295 out
[8] = ((0x7fffffe + x21
) - x39
);
296 out
[9] = ((0x3fffffe + x20
) - x38
);
301 * Can overlap h with f or g.
303 static __always_inline
void fe_sub(fe_loose
*h
, const fe
*f
, const fe
*g
)
305 fe_sub_impl(h
->v
, f
->v
, g
->v
);
308 static void fe_mul_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
310 { const u32 x20
= in1
[9];
311 { const u32 x21
= in1
[8];
312 { const u32 x19
= in1
[7];
313 { const u32 x17
= in1
[6];
314 { const u32 x15
= in1
[5];
315 { const u32 x13
= in1
[4];
316 { const u32 x11
= in1
[3];
317 { const u32 x9
= in1
[2];
318 { const u32 x7
= in1
[1];
319 { const u32 x5
= in1
[0];
320 { const u32 x38
= in2
[9];
321 { const u32 x39
= in2
[8];
322 { const u32 x37
= in2
[7];
323 { const u32 x35
= in2
[6];
324 { const u32 x33
= in2
[5];
325 { const u32 x31
= in2
[4];
326 { const u32 x29
= in2
[3];
327 { const u32 x27
= in2
[2];
328 { const u32 x25
= in2
[1];
329 { const u32 x23
= in2
[0];
330 { u64 x40
= ((u64
)x23
* x5
);
331 { u64 x41
= (((u64
)x23
* x7
) + ((u64
)x25
* x5
));
332 { u64 x42
= ((((u64
)(0x2 * x25
) * x7
) + ((u64
)x23
* x9
)) + ((u64
)x27
* x5
));
333 { u64 x43
= (((((u64
)x25
* x9
) + ((u64
)x27
* x7
)) + ((u64
)x23
* x11
)) + ((u64
)x29
* x5
));
334 { u64 x44
= (((((u64
)x27
* x9
) + (0x2 * (((u64
)x25
* x11
) + ((u64
)x29
* x7
)))) + ((u64
)x23
* x13
)) + ((u64
)x31
* x5
));
335 { u64 x45
= (((((((u64
)x27
* x11
) + ((u64
)x29
* x9
)) + ((u64
)x25
* x13
)) + ((u64
)x31
* x7
)) + ((u64
)x23
* x15
)) + ((u64
)x33
* x5
));
336 { u64 x46
= (((((0x2 * ((((u64
)x29
* x11
) + ((u64
)x25
* x15
)) + ((u64
)x33
* x7
))) + ((u64
)x27
* x13
)) + ((u64
)x31
* x9
)) + ((u64
)x23
* x17
)) + ((u64
)x35
* x5
));
337 { u64 x47
= (((((((((u64
)x29
* x13
) + ((u64
)x31
* x11
)) + ((u64
)x27
* x15
)) + ((u64
)x33
* x9
)) + ((u64
)x25
* x17
)) + ((u64
)x35
* x7
)) + ((u64
)x23
* x19
)) + ((u64
)x37
* x5
));
338 { u64 x48
= (((((((u64
)x31
* x13
) + (0x2 * (((((u64
)x29
* x15
) + ((u64
)x33
* x11
)) + ((u64
)x25
* x19
)) + ((u64
)x37
* x7
)))) + ((u64
)x27
* x17
)) + ((u64
)x35
* x9
)) + ((u64
)x23
* x21
)) + ((u64
)x39
* x5
));
339 { u64 x49
= (((((((((((u64
)x31
* x15
) + ((u64
)x33
* x13
)) + ((u64
)x29
* x17
)) + ((u64
)x35
* x11
)) + ((u64
)x27
* x19
)) + ((u64
)x37
* x9
)) + ((u64
)x25
* x21
)) + ((u64
)x39
* x7
)) + ((u64
)x23
* x20
)) + ((u64
)x38
* x5
));
340 { u64 x50
= (((((0x2 * ((((((u64
)x33
* x15
) + ((u64
)x29
* x19
)) + ((u64
)x37
* x11
)) + ((u64
)x25
* x20
)) + ((u64
)x38
* x7
))) + ((u64
)x31
* x17
)) + ((u64
)x35
* x13
)) + ((u64
)x27
* x21
)) + ((u64
)x39
* x9
));
341 { u64 x51
= (((((((((u64
)x33
* x17
) + ((u64
)x35
* x15
)) + ((u64
)x31
* x19
)) + ((u64
)x37
* x13
)) + ((u64
)x29
* x21
)) + ((u64
)x39
* x11
)) + ((u64
)x27
* x20
)) + ((u64
)x38
* x9
));
342 { u64 x52
= (((((u64
)x35
* x17
) + (0x2 * (((((u64
)x33
* x19
) + ((u64
)x37
* x15
)) + ((u64
)x29
* x20
)) + ((u64
)x38
* x11
)))) + ((u64
)x31
* x21
)) + ((u64
)x39
* x13
));
343 { u64 x53
= (((((((u64
)x35
* x19
) + ((u64
)x37
* x17
)) + ((u64
)x33
* x21
)) + ((u64
)x39
* x15
)) + ((u64
)x31
* x20
)) + ((u64
)x38
* x13
));
344 { u64 x54
= (((0x2 * ((((u64
)x37
* x19
) + ((u64
)x33
* x20
)) + ((u64
)x38
* x15
))) + ((u64
)x35
* x21
)) + ((u64
)x39
* x17
));
345 { u64 x55
= (((((u64
)x37
* x21
) + ((u64
)x39
* x19
)) + ((u64
)x35
* x20
)) + ((u64
)x38
* x17
));
346 { u64 x56
= (((u64
)x39
* x21
) + (0x2 * (((u64
)x37
* x20
) + ((u64
)x38
* x19
))));
347 { u64 x57
= (((u64
)x39
* x20
) + ((u64
)x38
* x21
));
348 { u64 x58
= ((u64
)(0x2 * x38
) * x20
);
349 { u64 x59
= (x48
+ (x58
<< 0x4));
350 { u64 x60
= (x59
+ (x58
<< 0x1));
351 { u64 x61
= (x60
+ x58
);
352 { u64 x62
= (x47
+ (x57
<< 0x4));
353 { u64 x63
= (x62
+ (x57
<< 0x1));
354 { u64 x64
= (x63
+ x57
);
355 { u64 x65
= (x46
+ (x56
<< 0x4));
356 { u64 x66
= (x65
+ (x56
<< 0x1));
357 { u64 x67
= (x66
+ x56
);
358 { u64 x68
= (x45
+ (x55
<< 0x4));
359 { u64 x69
= (x68
+ (x55
<< 0x1));
360 { u64 x70
= (x69
+ x55
);
361 { u64 x71
= (x44
+ (x54
<< 0x4));
362 { u64 x72
= (x71
+ (x54
<< 0x1));
363 { u64 x73
= (x72
+ x54
);
364 { u64 x74
= (x43
+ (x53
<< 0x4));
365 { u64 x75
= (x74
+ (x53
<< 0x1));
366 { u64 x76
= (x75
+ x53
);
367 { u64 x77
= (x42
+ (x52
<< 0x4));
368 { u64 x78
= (x77
+ (x52
<< 0x1));
369 { u64 x79
= (x78
+ x52
);
370 { u64 x80
= (x41
+ (x51
<< 0x4));
371 { u64 x81
= (x80
+ (x51
<< 0x1));
372 { u64 x82
= (x81
+ x51
);
373 { u64 x83
= (x40
+ (x50
<< 0x4));
374 { u64 x84
= (x83
+ (x50
<< 0x1));
375 { u64 x85
= (x84
+ x50
);
376 { u64 x86
= (x85
>> 0x1a);
377 { u32 x87
= ((u32
)x85
& 0x3ffffff);
378 { u64 x88
= (x86
+ x82
);
379 { u64 x89
= (x88
>> 0x19);
380 { u32 x90
= ((u32
)x88
& 0x1ffffff);
381 { u64 x91
= (x89
+ x79
);
382 { u64 x92
= (x91
>> 0x1a);
383 { u32 x93
= ((u32
)x91
& 0x3ffffff);
384 { u64 x94
= (x92
+ x76
);
385 { u64 x95
= (x94
>> 0x19);
386 { u32 x96
= ((u32
)x94
& 0x1ffffff);
387 { u64 x97
= (x95
+ x73
);
388 { u64 x98
= (x97
>> 0x1a);
389 { u32 x99
= ((u32
)x97
& 0x3ffffff);
390 { u64 x100
= (x98
+ x70
);
391 { u64 x101
= (x100
>> 0x19);
392 { u32 x102
= ((u32
)x100
& 0x1ffffff);
393 { u64 x103
= (x101
+ x67
);
394 { u64 x104
= (x103
>> 0x1a);
395 { u32 x105
= ((u32
)x103
& 0x3ffffff);
396 { u64 x106
= (x104
+ x64
);
397 { u64 x107
= (x106
>> 0x19);
398 { u32 x108
= ((u32
)x106
& 0x1ffffff);
399 { u64 x109
= (x107
+ x61
);
400 { u64 x110
= (x109
>> 0x1a);
401 { u32 x111
= ((u32
)x109
& 0x3ffffff);
402 { u64 x112
= (x110
+ x49
);
403 { u64 x113
= (x112
>> 0x19);
404 { u32 x114
= ((u32
)x112
& 0x1ffffff);
405 { u64 x115
= (x87
+ (0x13 * x113
));
406 { u32 x116
= (u32
) (x115
>> 0x1a);
407 { u32 x117
= ((u32
)x115
& 0x3ffffff);
408 { u32 x118
= (x116
+ x90
);
409 { u32 x119
= (x118
>> 0x19);
410 { u32 x120
= (x118
& 0x1ffffff);
413 out
[2] = (x119
+ x93
);
421 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
424 static __always_inline
void fe_mul_ttt(fe
*h
, const fe
*f
, const fe
*g
)
426 fe_mul_impl(h
->v
, f
->v
, g
->v
);
429 static __always_inline
void fe_mul_tlt(fe
*h
, const fe_loose
*f
, const fe
*g
)
431 fe_mul_impl(h
->v
, f
->v
, g
->v
);
434 static __always_inline
void
435 fe_mul_tll(fe
*h
, const fe_loose
*f
, const fe_loose
*g
)
437 fe_mul_impl(h
->v
, f
->v
, g
->v
);
440 static void fe_sqr_impl(u32 out
[10], const u32 in1
[10])
442 { const u32 x17
= in1
[9];
443 { const u32 x18
= in1
[8];
444 { const u32 x16
= in1
[7];
445 { const u32 x14
= in1
[6];
446 { const u32 x12
= in1
[5];
447 { const u32 x10
= in1
[4];
448 { const u32 x8
= in1
[3];
449 { const u32 x6
= in1
[2];
450 { const u32 x4
= in1
[1];
451 { const u32 x2
= in1
[0];
452 { u64 x19
= ((u64
)x2
* x2
);
453 { u64 x20
= ((u64
)(0x2 * x2
) * x4
);
454 { u64 x21
= (0x2 * (((u64
)x4
* x4
) + ((u64
)x2
* x6
)));
455 { u64 x22
= (0x2 * (((u64
)x4
* x6
) + ((u64
)x2
* x8
)));
456 { u64 x23
= ((((u64
)x6
* x6
) + ((u64
)(0x4 * x4
) * x8
)) + ((u64
)(0x2 * x2
) * x10
));
457 { u64 x24
= (0x2 * ((((u64
)x6
* x8
) + ((u64
)x4
* x10
)) + ((u64
)x2
* x12
)));
458 { u64 x25
= (0x2 * (((((u64
)x8
* x8
) + ((u64
)x6
* x10
)) + ((u64
)x2
* x14
)) + ((u64
)(0x2 * x4
) * x12
)));
459 { u64 x26
= (0x2 * (((((u64
)x8
* x10
) + ((u64
)x6
* x12
)) + ((u64
)x4
* x14
)) + ((u64
)x2
* x16
)));
460 { u64 x27
= (((u64
)x10
* x10
) + (0x2 * ((((u64
)x6
* x14
) + ((u64
)x2
* x18
)) + (0x2 * (((u64
)x4
* x16
) + ((u64
)x8
* x12
))))));
461 { u64 x28
= (0x2 * ((((((u64
)x10
* x12
) + ((u64
)x8
* x14
)) + ((u64
)x6
* x16
)) + ((u64
)x4
* x18
)) + ((u64
)x2
* x17
)));
462 { u64 x29
= (0x2 * (((((u64
)x12
* x12
) + ((u64
)x10
* x14
)) + ((u64
)x6
* x18
)) + (0x2 * (((u64
)x8
* x16
) + ((u64
)x4
* x17
)))));
463 { u64 x30
= (0x2 * (((((u64
)x12
* x14
) + ((u64
)x10
* x16
)) + ((u64
)x8
* x18
)) + ((u64
)x6
* x17
)));
464 { u64 x31
= (((u64
)x14
* x14
) + (0x2 * (((u64
)x10
* x18
) + (0x2 * (((u64
)x12
* x16
) + ((u64
)x8
* x17
))))));
465 { u64 x32
= (0x2 * ((((u64
)x14
* x16
) + ((u64
)x12
* x18
)) + ((u64
)x10
* x17
)));
466 { u64 x33
= (0x2 * ((((u64
)x16
* x16
) + ((u64
)x14
* x18
)) + ((u64
)(0x2 * x12
) * x17
)));
467 { u64 x34
= (0x2 * (((u64
)x16
* x18
) + ((u64
)x14
* x17
)));
468 { u64 x35
= (((u64
)x18
* x18
) + ((u64
)(0x4 * x16
) * x17
));
469 { u64 x36
= ((u64
)(0x2 * x18
) * x17
);
470 { u64 x37
= ((u64
)(0x2 * x17
) * x17
);
471 { u64 x38
= (x27
+ (x37
<< 0x4));
472 { u64 x39
= (x38
+ (x37
<< 0x1));
473 { u64 x40
= (x39
+ x37
);
474 { u64 x41
= (x26
+ (x36
<< 0x4));
475 { u64 x42
= (x41
+ (x36
<< 0x1));
476 { u64 x43
= (x42
+ x36
);
477 { u64 x44
= (x25
+ (x35
<< 0x4));
478 { u64 x45
= (x44
+ (x35
<< 0x1));
479 { u64 x46
= (x45
+ x35
);
480 { u64 x47
= (x24
+ (x34
<< 0x4));
481 { u64 x48
= (x47
+ (x34
<< 0x1));
482 { u64 x49
= (x48
+ x34
);
483 { u64 x50
= (x23
+ (x33
<< 0x4));
484 { u64 x51
= (x50
+ (x33
<< 0x1));
485 { u64 x52
= (x51
+ x33
);
486 { u64 x53
= (x22
+ (x32
<< 0x4));
487 { u64 x54
= (x53
+ (x32
<< 0x1));
488 { u64 x55
= (x54
+ x32
);
489 { u64 x56
= (x21
+ (x31
<< 0x4));
490 { u64 x57
= (x56
+ (x31
<< 0x1));
491 { u64 x58
= (x57
+ x31
);
492 { u64 x59
= (x20
+ (x30
<< 0x4));
493 { u64 x60
= (x59
+ (x30
<< 0x1));
494 { u64 x61
= (x60
+ x30
);
495 { u64 x62
= (x19
+ (x29
<< 0x4));
496 { u64 x63
= (x62
+ (x29
<< 0x1));
497 { u64 x64
= (x63
+ x29
);
498 { u64 x65
= (x64
>> 0x1a);
499 { u32 x66
= ((u32
)x64
& 0x3ffffff);
500 { u64 x67
= (x65
+ x61
);
501 { u64 x68
= (x67
>> 0x19);
502 { u32 x69
= ((u32
)x67
& 0x1ffffff);
503 { u64 x70
= (x68
+ x58
);
504 { u64 x71
= (x70
>> 0x1a);
505 { u32 x72
= ((u32
)x70
& 0x3ffffff);
506 { u64 x73
= (x71
+ x55
);
507 { u64 x74
= (x73
>> 0x19);
508 { u32 x75
= ((u32
)x73
& 0x1ffffff);
509 { u64 x76
= (x74
+ x52
);
510 { u64 x77
= (x76
>> 0x1a);
511 { u32 x78
= ((u32
)x76
& 0x3ffffff);
512 { u64 x79
= (x77
+ x49
);
513 { u64 x80
= (x79
>> 0x19);
514 { u32 x81
= ((u32
)x79
& 0x1ffffff);
515 { u64 x82
= (x80
+ x46
);
516 { u64 x83
= (x82
>> 0x1a);
517 { u32 x84
= ((u32
)x82
& 0x3ffffff);
518 { u64 x85
= (x83
+ x43
);
519 { u64 x86
= (x85
>> 0x19);
520 { u32 x87
= ((u32
)x85
& 0x1ffffff);
521 { u64 x88
= (x86
+ x40
);
522 { u64 x89
= (x88
>> 0x1a);
523 { u32 x90
= ((u32
)x88
& 0x3ffffff);
524 { u64 x91
= (x89
+ x28
);
525 { u64 x92
= (x91
>> 0x19);
526 { u32 x93
= ((u32
)x91
& 0x1ffffff);
527 { u64 x94
= (x66
+ (0x13 * x92
));
528 { u32 x95
= (u32
) (x94
>> 0x1a);
529 { u32 x96
= ((u32
)x94
& 0x3ffffff);
530 { u32 x97
= (x95
+ x69
);
531 { u32 x98
= (x97
>> 0x19);
532 { u32 x99
= (x97
& 0x1ffffff);
535 out
[2] = (x98
+ x72
);
543 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
546 static __always_inline
void fe_sq_tl(fe
*h
, const fe_loose
*f
)
548 fe_sqr_impl(h
->v
, f
->v
);
551 static __always_inline
void fe_sq_tt(fe
*h
, const fe
*f
)
553 fe_sqr_impl(h
->v
, f
->v
);
556 static __always_inline
void fe_loose_invert(fe
*out
, const fe_loose
*z
)
566 for (i
= 1; i
< 2; ++i
)
568 fe_mul_tlt(&t1
, z
, &t1
);
569 fe_mul_ttt(&t0
, &t0
, &t1
);
571 fe_mul_ttt(&t1
, &t1
, &t2
);
573 for (i
= 1; i
< 5; ++i
)
575 fe_mul_ttt(&t1
, &t2
, &t1
);
577 for (i
= 1; i
< 10; ++i
)
579 fe_mul_ttt(&t2
, &t2
, &t1
);
581 for (i
= 1; i
< 20; ++i
)
583 fe_mul_ttt(&t2
, &t3
, &t2
);
585 for (i
= 1; i
< 10; ++i
)
587 fe_mul_ttt(&t1
, &t2
, &t1
);
589 for (i
= 1; i
< 50; ++i
)
591 fe_mul_ttt(&t2
, &t2
, &t1
);
593 for (i
= 1; i
< 100; ++i
)
595 fe_mul_ttt(&t2
, &t3
, &t2
);
597 for (i
= 1; i
< 50; ++i
)
599 fe_mul_ttt(&t1
, &t2
, &t1
);
601 for (i
= 1; i
< 5; ++i
)
603 fe_mul_ttt(out
, &t1
, &t0
);
606 static __always_inline
void fe_invert(fe
*out
, const fe
*z
)
610 fe_loose_invert(out
, &l
);
613 /* Replace (f,g) with (g,f) if b == 1;
614 * replace (f,g) with (f,g) if b == 0.
616 * Preconditions: b in {0,1}
618 static __always_inline
void fe_cswap(fe
*f
, fe
*g
, unsigned int b
)
622 for (i
= 0; i
< 10; i
++) {
623 u32 x
= f
->v
[i
] ^ g
->v
[i
];
630 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
631 static __always_inline
void fe_mul_121666_impl(u32 out
[10], const u32 in1
[10])
633 { const u32 x20
= in1
[9];
634 { const u32 x21
= in1
[8];
635 { const u32 x19
= in1
[7];
636 { const u32 x17
= in1
[6];
637 { const u32 x15
= in1
[5];
638 { const u32 x13
= in1
[4];
639 { const u32 x11
= in1
[3];
640 { const u32 x9
= in1
[2];
641 { const u32 x7
= in1
[1];
642 { const u32 x5
= in1
[0];
652 { const u32 x23
= 121666;
653 { u64 x40
= ((u64
)x23
* x5
);
654 { u64 x41
= (((u64
)x23
* x7
) + ((u64
)x25
* x5
));
655 { u64 x42
= ((((u64
)(0x2 * x25
) * x7
) + ((u64
)x23
* x9
)) + ((u64
)x27
* x5
));
656 { u64 x43
= (((((u64
)x25
* x9
) + ((u64
)x27
* x7
)) + ((u64
)x23
* x11
)) + ((u64
)x29
* x5
));
657 { u64 x44
= (((((u64
)x27
* x9
) + (0x2 * (((u64
)x25
* x11
) + ((u64
)x29
* x7
)))) + ((u64
)x23
* x13
)) + ((u64
)x31
* x5
));
658 { u64 x45
= (((((((u64
)x27
* x11
) + ((u64
)x29
* x9
)) + ((u64
)x25
* x13
)) + ((u64
)x31
* x7
)) + ((u64
)x23
* x15
)) + ((u64
)x33
* x5
));
659 { u64 x46
= (((((0x2 * ((((u64
)x29
* x11
) + ((u64
)x25
* x15
)) + ((u64
)x33
* x7
))) + ((u64
)x27
* x13
)) + ((u64
)x31
* x9
)) + ((u64
)x23
* x17
)) + ((u64
)x35
* x5
));
660 { u64 x47
= (((((((((u64
)x29
* x13
) + ((u64
)x31
* x11
)) + ((u64
)x27
* x15
)) + ((u64
)x33
* x9
)) + ((u64
)x25
* x17
)) + ((u64
)x35
* x7
)) + ((u64
)x23
* x19
)) + ((u64
)x37
* x5
));
661 { u64 x48
= (((((((u64
)x31
* x13
) + (0x2 * (((((u64
)x29
* x15
) + ((u64
)x33
* x11
)) + ((u64
)x25
* x19
)) + ((u64
)x37
* x7
)))) + ((u64
)x27
* x17
)) + ((u64
)x35
* x9
)) + ((u64
)x23
* x21
)) + ((u64
)x39
* x5
));
662 { u64 x49
= (((((((((((u64
)x31
* x15
) + ((u64
)x33
* x13
)) + ((u64
)x29
* x17
)) + ((u64
)x35
* x11
)) + ((u64
)x27
* x19
)) + ((u64
)x37
* x9
)) + ((u64
)x25
* x21
)) + ((u64
)x39
* x7
)) + ((u64
)x23
* x20
)) + ((u64
)x38
* x5
));
663 { u64 x50
= (((((0x2 * ((((((u64
)x33
* x15
) + ((u64
)x29
* x19
)) + ((u64
)x37
* x11
)) + ((u64
)x25
* x20
)) + ((u64
)x38
* x7
))) + ((u64
)x31
* x17
)) + ((u64
)x35
* x13
)) + ((u64
)x27
* x21
)) + ((u64
)x39
* x9
));
664 { u64 x51
= (((((((((u64
)x33
* x17
) + ((u64
)x35
* x15
)) + ((u64
)x31
* x19
)) + ((u64
)x37
* x13
)) + ((u64
)x29
* x21
)) + ((u64
)x39
* x11
)) + ((u64
)x27
* x20
)) + ((u64
)x38
* x9
));
665 { u64 x52
= (((((u64
)x35
* x17
) + (0x2 * (((((u64
)x33
* x19
) + ((u64
)x37
* x15
)) + ((u64
)x29
* x20
)) + ((u64
)x38
* x11
)))) + ((u64
)x31
* x21
)) + ((u64
)x39
* x13
));
666 { u64 x53
= (((((((u64
)x35
* x19
) + ((u64
)x37
* x17
)) + ((u64
)x33
* x21
)) + ((u64
)x39
* x15
)) + ((u64
)x31
* x20
)) + ((u64
)x38
* x13
));
667 { u64 x54
= (((0x2 * ((((u64
)x37
* x19
) + ((u64
)x33
* x20
)) + ((u64
)x38
* x15
))) + ((u64
)x35
* x21
)) + ((u64
)x39
* x17
));
668 { u64 x55
= (((((u64
)x37
* x21
) + ((u64
)x39
* x19
)) + ((u64
)x35
* x20
)) + ((u64
)x38
* x17
));
669 { u64 x56
= (((u64
)x39
* x21
) + (0x2 * (((u64
)x37
* x20
) + ((u64
)x38
* x19
))));
670 { u64 x57
= (((u64
)x39
* x20
) + ((u64
)x38
* x21
));
671 { u64 x58
= ((u64
)(0x2 * x38
) * x20
);
672 { u64 x59
= (x48
+ (x58
<< 0x4));
673 { u64 x60
= (x59
+ (x58
<< 0x1));
674 { u64 x61
= (x60
+ x58
);
675 { u64 x62
= (x47
+ (x57
<< 0x4));
676 { u64 x63
= (x62
+ (x57
<< 0x1));
677 { u64 x64
= (x63
+ x57
);
678 { u64 x65
= (x46
+ (x56
<< 0x4));
679 { u64 x66
= (x65
+ (x56
<< 0x1));
680 { u64 x67
= (x66
+ x56
);
681 { u64 x68
= (x45
+ (x55
<< 0x4));
682 { u64 x69
= (x68
+ (x55
<< 0x1));
683 { u64 x70
= (x69
+ x55
);
684 { u64 x71
= (x44
+ (x54
<< 0x4));
685 { u64 x72
= (x71
+ (x54
<< 0x1));
686 { u64 x73
= (x72
+ x54
);
687 { u64 x74
= (x43
+ (x53
<< 0x4));
688 { u64 x75
= (x74
+ (x53
<< 0x1));
689 { u64 x76
= (x75
+ x53
);
690 { u64 x77
= (x42
+ (x52
<< 0x4));
691 { u64 x78
= (x77
+ (x52
<< 0x1));
692 { u64 x79
= (x78
+ x52
);
693 { u64 x80
= (x41
+ (x51
<< 0x4));
694 { u64 x81
= (x80
+ (x51
<< 0x1));
695 { u64 x82
= (x81
+ x51
);
696 { u64 x83
= (x40
+ (x50
<< 0x4));
697 { u64 x84
= (x83
+ (x50
<< 0x1));
698 { u64 x85
= (x84
+ x50
);
699 { u64 x86
= (x85
>> 0x1a);
700 { u32 x87
= ((u32
)x85
& 0x3ffffff);
701 { u64 x88
= (x86
+ x82
);
702 { u64 x89
= (x88
>> 0x19);
703 { u32 x90
= ((u32
)x88
& 0x1ffffff);
704 { u64 x91
= (x89
+ x79
);
705 { u64 x92
= (x91
>> 0x1a);
706 { u32 x93
= ((u32
)x91
& 0x3ffffff);
707 { u64 x94
= (x92
+ x76
);
708 { u64 x95
= (x94
>> 0x19);
709 { u32 x96
= ((u32
)x94
& 0x1ffffff);
710 { u64 x97
= (x95
+ x73
);
711 { u64 x98
= (x97
>> 0x1a);
712 { u32 x99
= ((u32
)x97
& 0x3ffffff);
713 { u64 x100
= (x98
+ x70
);
714 { u64 x101
= (x100
>> 0x19);
715 { u32 x102
= ((u32
)x100
& 0x1ffffff);
716 { u64 x103
= (x101
+ x67
);
717 { u64 x104
= (x103
>> 0x1a);
718 { u32 x105
= ((u32
)x103
& 0x3ffffff);
719 { u64 x106
= (x104
+ x64
);
720 { u64 x107
= (x106
>> 0x19);
721 { u32 x108
= ((u32
)x106
& 0x1ffffff);
722 { u64 x109
= (x107
+ x61
);
723 { u64 x110
= (x109
>> 0x1a);
724 { u32 x111
= ((u32
)x109
& 0x3ffffff);
725 { u64 x112
= (x110
+ x49
);
726 { u64 x113
= (x112
>> 0x19);
727 { u32 x114
= ((u32
)x112
& 0x1ffffff);
728 { u64 x115
= (x87
+ (0x13 * x113
));
729 { u32 x116
= (u32
) (x115
>> 0x1a);
730 { u32 x117
= ((u32
)x115
& 0x3ffffff);
731 { u32 x118
= (x116
+ x90
);
732 { u32 x119
= (x118
>> 0x19);
733 { u32 x120
= (x118
& 0x1ffffff);
736 out
[2] = (x119
+ x93
);
744 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
747 static __always_inline
void fe_mul121666(fe
*h
, const fe_loose
*f
)
749 fe_mul_121666_impl(h
->v
, f
->v
);
752 static void curve25519_generic(u8 out
[CURVE25519_KEY_SIZE
],
753 const u8 scalar
[CURVE25519_KEY_SIZE
],
754 const u8 point
[CURVE25519_KEY_SIZE
])
756 fe x1
, x2
, z2
, x3
, z3
;
757 fe_loose x2l
, z2l
, x3l
;
762 memcpy(e
, scalar
, 32);
763 curve25519_clamp_secret(e
);
765 /* The following implementation was transcribed to Coq and proven to
766 * correspond to unary scalar multiplication in affine coordinates given
767 * that x1 != 0 is the x coordinate of some point on the curve. It was
768 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
769 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
770 * quantified over the underlying field, so it applies to Curve25519
771 * itself and the quadratic twist of Curve25519. It was not proven in
772 * Coq that prime-field arithmetic correctly simulates extension-field
773 * arithmetic on prime-field values. The decoding of the byte array
774 * representation of e was not considered.
776 * Specification of Montgomery curves in affine coordinates:
777 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
779 * Proof that these form a group that is isomorphic to a Weierstrass
781 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
783 * Coq transcription and correctness proof of the loop
784 * (where scalarbits=255):
785 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
786 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
787 * preconditions: 0 <= e < 2^255 (not necessarily e < order),
790 fe_frombytes(&x1
, point
);
796 for (pos
= 254; pos
>= 0; --pos
) {
798 fe_loose tmp0l
, tmp1l
;
799 /* loop invariant as of right before the test, for the case
801 * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
803 * let r := e >> (pos+1) in the following equalities of
805 * to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
806 * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
807 * x1 is the nonzero x coordinate of the nonzero
808 * point (r*P-(r+1)*P)
810 unsigned b
= 1 & (e
[pos
/ 8] >> (pos
& 7));
812 fe_cswap(&x2
, &x3
, swap
);
813 fe_cswap(&z2
, &z3
, swap
);
815 /* Coq transcription of ladderstep formula (called from
817 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
818 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
819 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
820 * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
822 fe_sub(&tmp0l
, &x3
, &z3
);
823 fe_sub(&tmp1l
, &x2
, &z2
);
824 fe_add(&x2l
, &x2
, &z2
);
825 fe_add(&z2l
, &x3
, &z3
);
826 fe_mul_tll(&z3
, &tmp0l
, &x2l
);
827 fe_mul_tll(&z2
, &z2l
, &tmp1l
);
828 fe_sq_tl(&tmp0
, &tmp1l
);
829 fe_sq_tl(&tmp1
, &x2l
);
830 fe_add(&x3l
, &z3
, &z2
);
831 fe_sub(&z2l
, &z3
, &z2
);
832 fe_mul_ttt(&x2
, &tmp1
, &tmp0
);
833 fe_sub(&tmp1l
, &tmp1
, &tmp0
);
835 fe_mul121666(&z3
, &tmp1l
);
837 fe_add(&tmp0l
, &tmp0
, &z3
);
838 fe_mul_ttt(&z3
, &x1
, &z2
);
839 fe_mul_tll(&z2
, &tmp1l
, &tmp0l
);
841 /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
844 fe_cswap(&x2
, &x3
, swap
);
845 fe_cswap(&z2
, &z3
, swap
);
848 fe_mul_ttt(&x2
, &x2
, &z2
);
849 fe_tobytes(out
, &x2
);
851 memzero_explicit(&x1
, sizeof(x1
));
852 memzero_explicit(&x2
, sizeof(x2
));
853 memzero_explicit(&z2
, sizeof(z2
));
854 memzero_explicit(&x3
, sizeof(x3
));
855 memzero_explicit(&z3
, sizeof(z3
));
856 memzero_explicit(&x2l
, sizeof(x2l
));
857 memzero_explicit(&z2l
, sizeof(z2l
));
858 memzero_explicit(&x3l
, sizeof(x3l
));
859 memzero_explicit(&e
, sizeof(e
));