summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/ec/ecp_nistp521.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/ec/ecp_nistp521.c2112
1 files changed, 0 insertions, 2112 deletions
diff --git a/src/lib/libcrypto/ec/ecp_nistp521.c b/src/lib/libcrypto/ec/ecp_nistp521.c
deleted file mode 100644
index caeea14911..0000000000
--- a/src/lib/libcrypto/ec/ecp_nistp521.c
+++ /dev/null
@@ -1,2112 +0,0 @@
1/* $OpenBSD: ecp_nistp521.c,v 1.30 2022/12/26 07:18:51 jmc Exp $ */
2/*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5/*
6 * Copyright (c) 2011 Google Inc.
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29#include <stdint.h>
30#include <string.h>
31
32#include <openssl/opensslconf.h>
33
34#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
35
36#include <openssl/err.h>
37#include "ec_local.h"
38
39#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
40 /* even with gcc, the typedef won't work for 32-bit platforms */
41 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
42#else
43 #error "Need GCC 3.1 or later to define type uint128_t"
44#endif
45
46typedef uint8_t u8;
47typedef uint64_t u64;
48typedef int64_t s64;
49
50/* The underlying field.
51 *
52 * P521 operates over GF(2^521-1). We can serialise an element of this field
53 * into 66 bytes where the most significant byte contains only a single bit. We
54 * call this an felem_bytearray. */
55
56typedef u8 felem_bytearray[66];
57
58/* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
59 * These values are big-endian. */
60static const felem_bytearray nistp521_curve_params[5] =
61 {
62 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff},
71 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xfc},
80 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
81 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
82 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
83 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
84 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
85 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
86 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
87 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
88 0x3f, 0x00},
89 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
90 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
91 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
92 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
93 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
94 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
95 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
96 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
97 0xbd, 0x66},
98 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
99 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
100 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
101 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
102 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
103 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
104 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
105 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
106 0x66, 0x50}
107 };
108
109/* The representation of field elements.
110 * ------------------------------------
111 *
112 * We represent field elements with nine values. These values are either 64 or
113 * 128 bits and the field element represented is:
114 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
115 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
116 * 58 bits apart, but are greater than 58 bits in length, the most significant
117 * bits of each limb overlap with the least significant bits of the next.
118 *
119 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
120 * 'largefelem' */
121
122#define NLIMBS 9
123
124typedef uint64_t limb;
125typedef limb felem[NLIMBS];
126typedef uint128_t largefelem[NLIMBS];
127
128static const limb bottom57bits = 0x1ffffffffffffff;
129static const limb bottom58bits = 0x3ffffffffffffff;
130
131/* bin66_to_felem takes a little-endian byte array and converts it into felem
132 * form. This assumes that the CPU is little-endian. */
133static void
134bin66_to_felem(felem out, const u8 in[66])
135{
136 out[0] = (*((limb *) & in[0])) & bottom58bits;
137 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
138 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
139 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
140 out[4] = (*((limb *) & in[29])) & bottom58bits;
141 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
142 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
143 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
144 out[8] = (*((limb *) & in[58])) & bottom57bits;
145}
146
147/* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
148 * array. This assumes that the CPU is little-endian. */
149static void
150felem_to_bin66(u8 out[66], const felem in)
151{
152 memset(out, 0, 66);
153 (*((limb *) & out[0])) = in[0];
154 (*((limb *) & out[7])) |= in[1] << 2;
155 (*((limb *) & out[14])) |= in[2] << 4;
156 (*((limb *) & out[21])) |= in[3] << 6;
157 (*((limb *) & out[29])) = in[4];
158 (*((limb *) & out[36])) |= in[5] << 2;
159 (*((limb *) & out[43])) |= in[6] << 4;
160 (*((limb *) & out[50])) |= in[7] << 6;
161 (*((limb *) & out[58])) = in[8];
162}
163
164/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
165static void
166flip_endian(u8 *out, const u8 *in, unsigned len)
167{
168 unsigned i;
169 for (i = 0; i < len; ++i)
170 out[i] = in[len - 1 - i];
171}
172
173/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
174static int
175BN_to_felem(felem out, const BIGNUM *bn)
176{
177 felem_bytearray b_in;
178 felem_bytearray b_out;
179 unsigned num_bytes;
180
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out, 0, sizeof b_out);
183 num_bytes = BN_num_bytes(bn);
184 if (num_bytes > sizeof b_out) {
185 ECerror(EC_R_BIGNUM_OUT_OF_RANGE);
186 return 0;
187 }
188 if (BN_is_negative(bn)) {
189 ECerror(EC_R_BIGNUM_OUT_OF_RANGE);
190 return 0;
191 }
192 num_bytes = BN_bn2bin(bn, b_in);
193 flip_endian(b_out, b_in, num_bytes);
194 bin66_to_felem(out, b_out);
195 return 1;
196}
197
198/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
199static BIGNUM *
200felem_to_BN(BIGNUM *out, const felem in)
201{
202 felem_bytearray b_in, b_out;
203 felem_to_bin66(b_in, in);
204 flip_endian(b_out, b_in, sizeof b_out);
205 return BN_bin2bn(b_out, sizeof b_out, out);
206}
207
208
209/* Field operations
210 * ---------------- */
211
212static void
213felem_one(felem out)
214{
215 out[0] = 1;
216 out[1] = 0;
217 out[2] = 0;
218 out[3] = 0;
219 out[4] = 0;
220 out[5] = 0;
221 out[6] = 0;
222 out[7] = 0;
223 out[8] = 0;
224}
225
226static void
227felem_assign(felem out, const felem in)
228{
229 out[0] = in[0];
230 out[1] = in[1];
231 out[2] = in[2];
232 out[3] = in[3];
233 out[4] = in[4];
234 out[5] = in[5];
235 out[6] = in[6];
236 out[7] = in[7];
237 out[8] = in[8];
238}
239
240/* felem_sum64 sets out = out + in. */
241static void
242felem_sum64(felem out, const felem in)
243{
244 out[0] += in[0];
245 out[1] += in[1];
246 out[2] += in[2];
247 out[3] += in[3];
248 out[4] += in[4];
249 out[5] += in[5];
250 out[6] += in[6];
251 out[7] += in[7];
252 out[8] += in[8];
253}
254
255/* felem_scalar sets out = in * scalar */
256static void
257felem_scalar(felem out, const felem in, limb scalar)
258{
259 out[0] = in[0] * scalar;
260 out[1] = in[1] * scalar;
261 out[2] = in[2] * scalar;
262 out[3] = in[3] * scalar;
263 out[4] = in[4] * scalar;
264 out[5] = in[5] * scalar;
265 out[6] = in[6] * scalar;
266 out[7] = in[7] * scalar;
267 out[8] = in[8] * scalar;
268}
269
270/* felem_scalar64 sets out = out * scalar */
271static void
272felem_scalar64(felem out, limb scalar)
273{
274 out[0] *= scalar;
275 out[1] *= scalar;
276 out[2] *= scalar;
277 out[3] *= scalar;
278 out[4] *= scalar;
279 out[5] *= scalar;
280 out[6] *= scalar;
281 out[7] *= scalar;
282 out[8] *= scalar;
283}
284
285/* felem_scalar128 sets out = out * scalar */
286static void
287felem_scalar128(largefelem out, limb scalar)
288{
289 out[0] *= scalar;
290 out[1] *= scalar;
291 out[2] *= scalar;
292 out[3] *= scalar;
293 out[4] *= scalar;
294 out[5] *= scalar;
295 out[6] *= scalar;
296 out[7] *= scalar;
297 out[8] *= scalar;
298}
299
300/* felem_neg sets |out| to |-in|
301 * On entry:
302 * in[i] < 2^59 + 2^14
303 * On exit:
304 * out[i] < 2^62
305 */
306static void
307felem_neg(felem out, const felem in)
308{
309 /* In order to prevent underflow, we subtract from 0 mod p. */
310 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
311 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
312
313 out[0] = two62m3 - in[0];
314 out[1] = two62m2 - in[1];
315 out[2] = two62m2 - in[2];
316 out[3] = two62m2 - in[3];
317 out[4] = two62m2 - in[4];
318 out[5] = two62m2 - in[5];
319 out[6] = two62m2 - in[6];
320 out[7] = two62m2 - in[7];
321 out[8] = two62m2 - in[8];
322}
323
324/* felem_diff64 subtracts |in| from |out|
325 * On entry:
326 * in[i] < 2^59 + 2^14
327 * On exit:
328 * out[i] < out[i] + 2^62
329 */
330static void
331felem_diff64(felem out, const felem in)
332{
333 /* In order to prevent underflow, we add 0 mod p before subtracting. */
334 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
335 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
336
337 out[0] += two62m3 - in[0];
338 out[1] += two62m2 - in[1];
339 out[2] += two62m2 - in[2];
340 out[3] += two62m2 - in[3];
341 out[4] += two62m2 - in[4];
342 out[5] += two62m2 - in[5];
343 out[6] += two62m2 - in[6];
344 out[7] += two62m2 - in[7];
345 out[8] += two62m2 - in[8];
346}
347
348/* felem_diff_128_64 subtracts |in| from |out|
349 * On entry:
350 * in[i] < 2^62 + 2^17
351 * On exit:
352 * out[i] < out[i] + 2^63
353 */
354static void
355felem_diff_128_64(largefelem out, const felem in)
356{
357 /* In order to prevent underflow, we add 0 mod p before subtracting. */
358 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
359 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
360
361 out[0] += two63m6 - in[0];
362 out[1] += two63m5 - in[1];
363 out[2] += two63m5 - in[2];
364 out[3] += two63m5 - in[3];
365 out[4] += two63m5 - in[4];
366 out[5] += two63m5 - in[5];
367 out[6] += two63m5 - in[6];
368 out[7] += two63m5 - in[7];
369 out[8] += two63m5 - in[8];
370}
371
372/* felem_diff_128_64 subtracts |in| from |out|
373 * On entry:
374 * in[i] < 2^126
375 * On exit:
376 * out[i] < out[i] + 2^127 - 2^69
377 */
378static void
379felem_diff128(largefelem out, const largefelem in)
380{
381 /* In order to prevent underflow, we add 0 mod p before subtracting. */
382 static const uint128_t two127m70 = (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
383 static const uint128_t two127m69 = (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
384
385 out[0] += (two127m70 - in[0]);
386 out[1] += (two127m69 - in[1]);
387 out[2] += (two127m69 - in[2]);
388 out[3] += (two127m69 - in[3]);
389 out[4] += (two127m69 - in[4]);
390 out[5] += (two127m69 - in[5]);
391 out[6] += (two127m69 - in[6]);
392 out[7] += (two127m69 - in[7]);
393 out[8] += (two127m69 - in[8]);
394}
395
396/* felem_square sets |out| = |in|^2
397 * On entry:
398 * in[i] < 2^62
399 * On exit:
400 * out[i] < 17 * max(in[i]) * max(in[i])
401 */
402static void
403felem_square(largefelem out, const felem in)
404{
405 felem inx2, inx4;
406 felem_scalar(inx2, in, 2);
407 felem_scalar(inx4, in, 4);
408
409 /*
410 * We have many cases were we want to do in[x] * in[y] + in[y] *
411 * in[x] This is obviously just 2 * in[x] * in[y] However, rather
412 * than do the doubling on the 128 bit result, we double one of the
413 * inputs to the multiplication by reading from |inx2|
414 */
415
416 out[0] = ((uint128_t) in[0]) * in[0];
417 out[1] = ((uint128_t) in[0]) * inx2[1];
418 out[2] = ((uint128_t) in[0]) * inx2[2] +
419 ((uint128_t) in[1]) * in[1];
420 out[3] = ((uint128_t) in[0]) * inx2[3] +
421 ((uint128_t) in[1]) * inx2[2];
422 out[4] = ((uint128_t) in[0]) * inx2[4] +
423 ((uint128_t) in[1]) * inx2[3] +
424 ((uint128_t) in[2]) * in[2];
425 out[5] = ((uint128_t) in[0]) * inx2[5] +
426 ((uint128_t) in[1]) * inx2[4] +
427 ((uint128_t) in[2]) * inx2[3];
428 out[6] = ((uint128_t) in[0]) * inx2[6] +
429 ((uint128_t) in[1]) * inx2[5] +
430 ((uint128_t) in[2]) * inx2[4] +
431 ((uint128_t) in[3]) * in[3];
432 out[7] = ((uint128_t) in[0]) * inx2[7] +
433 ((uint128_t) in[1]) * inx2[6] +
434 ((uint128_t) in[2]) * inx2[5] +
435 ((uint128_t) in[3]) * inx2[4];
436 out[8] = ((uint128_t) in[0]) * inx2[8] +
437 ((uint128_t) in[1]) * inx2[7] +
438 ((uint128_t) in[2]) * inx2[6] +
439 ((uint128_t) in[3]) * inx2[5] +
440 ((uint128_t) in[4]) * in[4];
441
442 /*
443 * The remaining limbs fall above 2^521, with the first falling at
444 * 2^522. They correspond to locations one bit up from the limbs
445 * produced above so we would have to multiply by two to align them.
446 * Again, rather than operate on the 128-bit result, we double one of
447 * the inputs to the multiplication. If we want to double for both
448 * this reason, and the reason above, then we end up multiplying by
449 * four.
450 */
451
452 /* 9 */
453 out[0] += ((uint128_t) in[1]) * inx4[8] +
454 ((uint128_t) in[2]) * inx4[7] +
455 ((uint128_t) in[3]) * inx4[6] +
456 ((uint128_t) in[4]) * inx4[5];
457
458 /* 10 */
459 out[1] += ((uint128_t) in[2]) * inx4[8] +
460 ((uint128_t) in[3]) * inx4[7] +
461 ((uint128_t) in[4]) * inx4[6] +
462 ((uint128_t) in[5]) * inx2[5];
463
464 /* 11 */
465 out[2] += ((uint128_t) in[3]) * inx4[8] +
466 ((uint128_t) in[4]) * inx4[7] +
467 ((uint128_t) in[5]) * inx4[6];
468
469 /* 12 */
470 out[3] += ((uint128_t) in[4]) * inx4[8] +
471 ((uint128_t) in[5]) * inx4[7] +
472 ((uint128_t) in[6]) * inx2[6];
473
474 /* 13 */
475 out[4] += ((uint128_t) in[5]) * inx4[8] +
476 ((uint128_t) in[6]) * inx4[7];
477
478 /* 14 */
479 out[5] += ((uint128_t) in[6]) * inx4[8] +
480 ((uint128_t) in[7]) * inx2[7];
481
482 /* 15 */
483 out[6] += ((uint128_t) in[7]) * inx4[8];
484
485 /* 16 */
486 out[7] += ((uint128_t) in[8]) * inx2[8];
487}
488
489/* felem_mul sets |out| = |in1| * |in2|
490 * On entry:
491 * in1[i] < 2^64
492 * in2[i] < 2^63
493 * On exit:
494 * out[i] < 17 * max(in1[i]) * max(in2[i])
495 */
496static void
497felem_mul(largefelem out, const felem in1, const felem in2)
498{
499 felem in2x2;
500 felem_scalar(in2x2, in2, 2);
501
502 out[0] = ((uint128_t) in1[0]) * in2[0];
503
504 out[1] = ((uint128_t) in1[0]) * in2[1] +
505 ((uint128_t) in1[1]) * in2[0];
506
507 out[2] = ((uint128_t) in1[0]) * in2[2] +
508 ((uint128_t) in1[1]) * in2[1] +
509 ((uint128_t) in1[2]) * in2[0];
510
511 out[3] = ((uint128_t) in1[0]) * in2[3] +
512 ((uint128_t) in1[1]) * in2[2] +
513 ((uint128_t) in1[2]) * in2[1] +
514 ((uint128_t) in1[3]) * in2[0];
515
516 out[4] = ((uint128_t) in1[0]) * in2[4] +
517 ((uint128_t) in1[1]) * in2[3] +
518 ((uint128_t) in1[2]) * in2[2] +
519 ((uint128_t) in1[3]) * in2[1] +
520 ((uint128_t) in1[4]) * in2[0];
521
522 out[5] = ((uint128_t) in1[0]) * in2[5] +
523 ((uint128_t) in1[1]) * in2[4] +
524 ((uint128_t) in1[2]) * in2[3] +
525 ((uint128_t) in1[3]) * in2[2] +
526 ((uint128_t) in1[4]) * in2[1] +
527 ((uint128_t) in1[5]) * in2[0];
528
529 out[6] = ((uint128_t) in1[0]) * in2[6] +
530 ((uint128_t) in1[1]) * in2[5] +
531 ((uint128_t) in1[2]) * in2[4] +
532 ((uint128_t) in1[3]) * in2[3] +
533 ((uint128_t) in1[4]) * in2[2] +
534 ((uint128_t) in1[5]) * in2[1] +
535 ((uint128_t) in1[6]) * in2[0];
536
537 out[7] = ((uint128_t) in1[0]) * in2[7] +
538 ((uint128_t) in1[1]) * in2[6] +
539 ((uint128_t) in1[2]) * in2[5] +
540 ((uint128_t) in1[3]) * in2[4] +
541 ((uint128_t) in1[4]) * in2[3] +
542 ((uint128_t) in1[5]) * in2[2] +
543 ((uint128_t) in1[6]) * in2[1] +
544 ((uint128_t) in1[7]) * in2[0];
545
546 out[8] = ((uint128_t) in1[0]) * in2[8] +
547 ((uint128_t) in1[1]) * in2[7] +
548 ((uint128_t) in1[2]) * in2[6] +
549 ((uint128_t) in1[3]) * in2[5] +
550 ((uint128_t) in1[4]) * in2[4] +
551 ((uint128_t) in1[5]) * in2[3] +
552 ((uint128_t) in1[6]) * in2[2] +
553 ((uint128_t) in1[7]) * in2[1] +
554 ((uint128_t) in1[8]) * in2[0];
555
556 /* See comment in felem_square about the use of in2x2 here */
557
558 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
559 ((uint128_t) in1[2]) * in2x2[7] +
560 ((uint128_t) in1[3]) * in2x2[6] +
561 ((uint128_t) in1[4]) * in2x2[5] +
562 ((uint128_t) in1[5]) * in2x2[4] +
563 ((uint128_t) in1[6]) * in2x2[3] +
564 ((uint128_t) in1[7]) * in2x2[2] +
565 ((uint128_t) in1[8]) * in2x2[1];
566
567 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
568 ((uint128_t) in1[3]) * in2x2[7] +
569 ((uint128_t) in1[4]) * in2x2[6] +
570 ((uint128_t) in1[5]) * in2x2[5] +
571 ((uint128_t) in1[6]) * in2x2[4] +
572 ((uint128_t) in1[7]) * in2x2[3] +
573 ((uint128_t) in1[8]) * in2x2[2];
574
575 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
576 ((uint128_t) in1[4]) * in2x2[7] +
577 ((uint128_t) in1[5]) * in2x2[6] +
578 ((uint128_t) in1[6]) * in2x2[5] +
579 ((uint128_t) in1[7]) * in2x2[4] +
580 ((uint128_t) in1[8]) * in2x2[3];
581
582 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
583 ((uint128_t) in1[5]) * in2x2[7] +
584 ((uint128_t) in1[6]) * in2x2[6] +
585 ((uint128_t) in1[7]) * in2x2[5] +
586 ((uint128_t) in1[8]) * in2x2[4];
587
588 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
589 ((uint128_t) in1[6]) * in2x2[7] +
590 ((uint128_t) in1[7]) * in2x2[6] +
591 ((uint128_t) in1[8]) * in2x2[5];
592
593 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
594 ((uint128_t) in1[7]) * in2x2[7] +
595 ((uint128_t) in1[8]) * in2x2[6];
596
597 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
598 ((uint128_t) in1[8]) * in2x2[7];
599
600 out[7] += ((uint128_t) in1[8]) * in2x2[8];
601}
602
603static const limb bottom52bits = 0xfffffffffffff;
604
605/* felem_reduce converts a largefelem to an felem.
606 * On entry:
607 * in[i] < 2^128
608 * On exit:
609 * out[i] < 2^59 + 2^14
610 */
611static void
612felem_reduce(felem out, const largefelem in)
613{
614 u64 overflow1, overflow2;
615
616 out[0] = ((limb) in[0]) & bottom58bits;
617 out[1] = ((limb) in[1]) & bottom58bits;
618 out[2] = ((limb) in[2]) & bottom58bits;
619 out[3] = ((limb) in[3]) & bottom58bits;
620 out[4] = ((limb) in[4]) & bottom58bits;
621 out[5] = ((limb) in[5]) & bottom58bits;
622 out[6] = ((limb) in[6]) & bottom58bits;
623 out[7] = ((limb) in[7]) & bottom58bits;
624 out[8] = ((limb) in[8]) & bottom58bits;
625
626 /* out[i] < 2^58 */
627
628 out[1] += ((limb) in[0]) >> 58;
629 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
630 /*
631 * out[1] < 2^58 + 2^6 + 2^58 = 2^59 + 2^6
632 */
633 out[2] += ((limb) (in[0] >> 64)) >> 52;
634
635 out[2] += ((limb) in[1]) >> 58;
636 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
637 out[3] += ((limb) (in[1] >> 64)) >> 52;
638
639 out[3] += ((limb) in[2]) >> 58;
640 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
641 out[4] += ((limb) (in[2] >> 64)) >> 52;
642
643 out[4] += ((limb) in[3]) >> 58;
644 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
645 out[5] += ((limb) (in[3] >> 64)) >> 52;
646
647 out[5] += ((limb) in[4]) >> 58;
648 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
649 out[6] += ((limb) (in[4] >> 64)) >> 52;
650
651 out[6] += ((limb) in[5]) >> 58;
652 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
653 out[7] += ((limb) (in[5] >> 64)) >> 52;
654
655 out[7] += ((limb) in[6]) >> 58;
656 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
657 out[8] += ((limb) (in[6] >> 64)) >> 52;
658
659 out[8] += ((limb) in[7]) >> 58;
660 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
661 /*
662 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 < 2^59 + 2^13
663 */
664 overflow1 = ((limb) (in[7] >> 64)) >> 52;
665
666 overflow1 += ((limb) in[8]) >> 58;
667 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
668 overflow2 = ((limb) (in[8] >> 64)) >> 52;
669
670 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
671 overflow2 <<= 1; /* overflow2 < 2^13 */
672
673 out[0] += overflow1; /* out[0] < 2^60 */
674 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
675
676 out[1] += out[0] >> 58;
677 out[0] &= bottom58bits;
678 /*
679 * out[0] < 2^58 out[1] < 2^59 + 2^6 + 2^13 + 2^2 < 2^59 + 2^14
680 */
681}
682
683static void
684felem_square_reduce(felem out, const felem in)
685{
686 largefelem tmp;
687 felem_square(tmp, in);
688 felem_reduce(out, tmp);
689}
690
691static void
692felem_mul_reduce(felem out, const felem in1, const felem in2)
693{
694 largefelem tmp;
695 felem_mul(tmp, in1, in2);
696 felem_reduce(out, tmp);
697}
698
699/* felem_inv calculates |out| = |in|^{-1}
700 *
701 * Based on Fermat's Little Theorem:
702 * a^p = a (mod p)
703 * a^{p-1} = 1 (mod p)
704 * a^{p-2} = a^{-1} (mod p)
705 */
706static void
707felem_inv(felem out, const felem in)
708{
709 felem ftmp, ftmp2, ftmp3, ftmp4;
710 largefelem tmp;
711 unsigned i;
712
713 felem_square(tmp, in);
714 felem_reduce(ftmp, tmp);/* 2^1 */
715 felem_mul(tmp, in, ftmp);
716 felem_reduce(ftmp, tmp);/* 2^2 - 2^0 */
717 felem_assign(ftmp2, ftmp);
718 felem_square(tmp, ftmp);
719 felem_reduce(ftmp, tmp);/* 2^3 - 2^1 */
720 felem_mul(tmp, in, ftmp);
721 felem_reduce(ftmp, tmp);/* 2^3 - 2^0 */
722 felem_square(tmp, ftmp);
723 felem_reduce(ftmp, tmp);/* 2^4 - 2^1 */
724
725 felem_square(tmp, ftmp2);
726 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
727 felem_square(tmp, ftmp3);
728 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
729 felem_mul(tmp, ftmp3, ftmp2);
730 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
731
732 felem_assign(ftmp2, ftmp3);
733 felem_square(tmp, ftmp3);
734 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
735 felem_square(tmp, ftmp3);
736 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
739 felem_square(tmp, ftmp3);
740 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
741 felem_assign(ftmp4, ftmp3);
742 felem_mul(tmp, ftmp3, ftmp);
743 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
744 felem_square(tmp, ftmp4);
745 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
746 felem_mul(tmp, ftmp3, ftmp2);
747 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
748 felem_assign(ftmp2, ftmp3);
749
750 for (i = 0; i < 8; i++) {
751 felem_square(tmp, ftmp3);
752 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
753 }
754 felem_mul(tmp, ftmp3, ftmp2);
755 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
756 felem_assign(ftmp2, ftmp3);
757
758 for (i = 0; i < 16; i++) {
759 felem_square(tmp, ftmp3);
760 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
761 }
762 felem_mul(tmp, ftmp3, ftmp2);
763 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
764 felem_assign(ftmp2, ftmp3);
765
766 for (i = 0; i < 32; i++) {
767 felem_square(tmp, ftmp3);
768 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
769 }
770 felem_mul(tmp, ftmp3, ftmp2);
771 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
772 felem_assign(ftmp2, ftmp3);
773
774 for (i = 0; i < 64; i++) {
775 felem_square(tmp, ftmp3);
776 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
777 }
778 felem_mul(tmp, ftmp3, ftmp2);
779 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
780 felem_assign(ftmp2, ftmp3);
781
782 for (i = 0; i < 128; i++) {
783 felem_square(tmp, ftmp3);
784 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
785 }
786 felem_mul(tmp, ftmp3, ftmp2);
787 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
788 felem_assign(ftmp2, ftmp3);
789
790 for (i = 0; i < 256; i++) {
791 felem_square(tmp, ftmp3);
792 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
793 }
794 felem_mul(tmp, ftmp3, ftmp2);
795 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
796
797 for (i = 0; i < 9; i++) {
798 felem_square(tmp, ftmp3);
799 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
800 }
801 felem_mul(tmp, ftmp3, ftmp4);
802 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
803 felem_mul(tmp, ftmp3, in);
804 felem_reduce(out, tmp); /* 2^512 - 3 */
805}
806
807/* This is 2^521-1, expressed as an felem */
808static const felem kPrime =
809{
810 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
811 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
812 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
813};
814
815/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
816 * otherwise.
817 * On entry:
818 * in[i] < 2^59 + 2^14
819 */
820static limb
821felem_is_zero(const felem in)
822{
823 felem ftmp;
824 limb is_zero, is_p;
825 felem_assign(ftmp, in);
826
827 ftmp[0] += ftmp[8] >> 57;
828 ftmp[8] &= bottom57bits;
829 /* ftmp[8] < 2^57 */
830 ftmp[1] += ftmp[0] >> 58;
831 ftmp[0] &= bottom58bits;
832 ftmp[2] += ftmp[1] >> 58;
833 ftmp[1] &= bottom58bits;
834 ftmp[3] += ftmp[2] >> 58;
835 ftmp[2] &= bottom58bits;
836 ftmp[4] += ftmp[3] >> 58;
837 ftmp[3] &= bottom58bits;
838 ftmp[5] += ftmp[4] >> 58;
839 ftmp[4] &= bottom58bits;
840 ftmp[6] += ftmp[5] >> 58;
841 ftmp[5] &= bottom58bits;
842 ftmp[7] += ftmp[6] >> 58;
843 ftmp[6] &= bottom58bits;
844 ftmp[8] += ftmp[7] >> 58;
845 ftmp[7] &= bottom58bits;
846 /* ftmp[8] < 2^57 + 4 */
847
848 /*
849 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
850 * greater than our bound for ftmp[8]. Therefore we only have to
851 * check if the zero is zero or 2^521-1.
852 */
853
854 is_zero = 0;
855 is_zero |= ftmp[0];
856 is_zero |= ftmp[1];
857 is_zero |= ftmp[2];
858 is_zero |= ftmp[3];
859 is_zero |= ftmp[4];
860 is_zero |= ftmp[5];
861 is_zero |= ftmp[6];
862 is_zero |= ftmp[7];
863 is_zero |= ftmp[8];
864
865 is_zero--;
866 /*
867 * We know that ftmp[i] < 2^63, therefore the only way that the top
868 * bit can be set is if is_zero was 0 before the decrement.
869 */
870 is_zero = ((s64) is_zero) >> 63;
871
872 is_p = ftmp[0] ^ kPrime[0];
873 is_p |= ftmp[1] ^ kPrime[1];
874 is_p |= ftmp[2] ^ kPrime[2];
875 is_p |= ftmp[3] ^ kPrime[3];
876 is_p |= ftmp[4] ^ kPrime[4];
877 is_p |= ftmp[5] ^ kPrime[5];
878 is_p |= ftmp[6] ^ kPrime[6];
879 is_p |= ftmp[7] ^ kPrime[7];
880 is_p |= ftmp[8] ^ kPrime[8];
881
882 is_p--;
883 is_p = ((s64) is_p) >> 63;
884
885 is_zero |= is_p;
886 return is_zero;
887}
888
889static int
890felem_is_zero_int(const felem in)
891{
892 return (int) (felem_is_zero(in) & ((limb) 1));
893}
894
895/* felem_contract converts |in| to its unique, minimal representation.
896 * On entry:
897 * in[i] < 2^59 + 2^14
898 */
899static void
900felem_contract(felem out, const felem in)
901{
902 limb is_p, is_greater, sign;
903 static const limb two58 = ((limb) 1) << 58;
904
905 felem_assign(out, in);
906
907 out[0] += out[8] >> 57;
908 out[8] &= bottom57bits;
909 /* out[8] < 2^57 */
910 out[1] += out[0] >> 58;
911 out[0] &= bottom58bits;
912 out[2] += out[1] >> 58;
913 out[1] &= bottom58bits;
914 out[3] += out[2] >> 58;
915 out[2] &= bottom58bits;
916 out[4] += out[3] >> 58;
917 out[3] &= bottom58bits;
918 out[5] += out[4] >> 58;
919 out[4] &= bottom58bits;
920 out[6] += out[5] >> 58;
921 out[5] &= bottom58bits;
922 out[7] += out[6] >> 58;
923 out[6] &= bottom58bits;
924 out[8] += out[7] >> 58;
925 out[7] &= bottom58bits;
926 /* out[8] < 2^57 + 4 */
927
928 /*
929 * If the value is greater than 2^521-1 then we have to subtract
930 * 2^521-1 out. See the comments in felem_is_zero regarding why we
931 * don't test for other multiples of the prime.
932 */
933
934 /*
935 * First, if |out| is equal to 2^521-1, we subtract it out to get
936 * zero.
937 */
938
939 is_p = out[0] ^ kPrime[0];
940 is_p |= out[1] ^ kPrime[1];
941 is_p |= out[2] ^ kPrime[2];
942 is_p |= out[3] ^ kPrime[3];
943 is_p |= out[4] ^ kPrime[4];
944 is_p |= out[5] ^ kPrime[5];
945 is_p |= out[6] ^ kPrime[6];
946 is_p |= out[7] ^ kPrime[7];
947 is_p |= out[8] ^ kPrime[8];
948
949 is_p--;
950 is_p &= is_p << 32;
951 is_p &= is_p << 16;
952 is_p &= is_p << 8;
953 is_p &= is_p << 4;
954 is_p &= is_p << 2;
955 is_p &= is_p << 1;
956 is_p = ((s64) is_p) >> 63;
957 is_p = ~is_p;
958
959 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
960
961 out[0] &= is_p;
962 out[1] &= is_p;
963 out[2] &= is_p;
964 out[3] &= is_p;
965 out[4] &= is_p;
966 out[5] &= is_p;
967 out[6] &= is_p;
968 out[7] &= is_p;
969 out[8] &= is_p;
970
971 /*
972 * In order to test that |out| >= 2^521-1 we need only test if out[8]
973 * >> 57 is greater than zero as (2^521-1) + x >= 2^522
974 */
975 is_greater = out[8] >> 57;
976 is_greater |= is_greater << 32;
977 is_greater |= is_greater << 16;
978 is_greater |= is_greater << 8;
979 is_greater |= is_greater << 4;
980 is_greater |= is_greater << 2;
981 is_greater |= is_greater << 1;
982 is_greater = ((s64) is_greater) >> 63;
983
984 out[0] -= kPrime[0] & is_greater;
985 out[1] -= kPrime[1] & is_greater;
986 out[2] -= kPrime[2] & is_greater;
987 out[3] -= kPrime[3] & is_greater;
988 out[4] -= kPrime[4] & is_greater;
989 out[5] -= kPrime[5] & is_greater;
990 out[6] -= kPrime[6] & is_greater;
991 out[7] -= kPrime[7] & is_greater;
992 out[8] -= kPrime[8] & is_greater;
993
994 /* Eliminate negative coefficients */
995 sign = -(out[0] >> 63);
996 out[0] += (two58 & sign);
997 out[1] -= (1 & sign);
998 sign = -(out[1] >> 63);
999 out[1] += (two58 & sign);
1000 out[2] -= (1 & sign);
1001 sign = -(out[2] >> 63);
1002 out[2] += (two58 & sign);
1003 out[3] -= (1 & sign);
1004 sign = -(out[3] >> 63);
1005 out[3] += (two58 & sign);
1006 out[4] -= (1 & sign);
1007 sign = -(out[4] >> 63);
1008 out[4] += (two58 & sign);
1009 out[5] -= (1 & sign);
1010 sign = -(out[0] >> 63);
1011 out[5] += (two58 & sign);
1012 out[6] -= (1 & sign);
1013 sign = -(out[6] >> 63);
1014 out[6] += (two58 & sign);
1015 out[7] -= (1 & sign);
1016 sign = -(out[7] >> 63);
1017 out[7] += (two58 & sign);
1018 out[8] -= (1 & sign);
1019 sign = -(out[5] >> 63);
1020 out[5] += (two58 & sign);
1021 out[6] -= (1 & sign);
1022 sign = -(out[6] >> 63);
1023 out[6] += (two58 & sign);
1024 out[7] -= (1 & sign);
1025 sign = -(out[7] >> 63);
1026 out[7] += (two58 & sign);
1027 out[8] -= (1 & sign);
1028}
1029
1030/* Group operations
1031 * ----------------
1032 *
1033 * Building on top of the field operations we have the operations on the
1034 * elliptic curve group itself. Points on the curve are represented in Jacobian
1035 * coordinates */
1036
1037/* point_double calculates 2*(x_in, y_in, z_in)
1038 *
1039 * The method is taken from:
1040 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1041 *
1042 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1043 * while x_out == y_in is not (maybe this works, but it's not tested). */
1044static void
1045point_double(felem x_out, felem y_out, felem z_out,
1046 const felem x_in, const felem y_in, const felem z_in)
1047{
1048 largefelem tmp, tmp2;
1049 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1050
1051 felem_assign(ftmp, x_in);
1052 felem_assign(ftmp2, x_in);
1053
1054 /* delta = z^2 */
1055 felem_square(tmp, z_in);
1056 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
1057
1058 /* gamma = y^2 */
1059 felem_square(tmp, y_in);
1060 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
1061
1062 /* beta = x*gamma */
1063 felem_mul(tmp, x_in, gamma);
1064 felem_reduce(beta, tmp);/* beta[i] < 2^59 + 2^14 */
1065
1066 /* alpha = 3*(x-delta)*(x+delta) */
1067 felem_diff64(ftmp, delta);
1068 /* ftmp[i] < 2^61 */
1069 felem_sum64(ftmp2, delta);
1070 /* ftmp2[i] < 2^60 + 2^15 */
1071 felem_scalar64(ftmp2, 3);
1072 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1073 felem_mul(tmp, ftmp, ftmp2);
1074 /*
1075 * tmp[i] < 17(3*2^121 + 3*2^76) = 61*2^121 + 61*2^76 < 64*2^121 +
1076 * 64*2^76 = 2^127 + 2^82 < 2^128
1077 */
1078 felem_reduce(alpha, tmp);
1079
1080 /* x' = alpha^2 - 8*beta */
1081 felem_square(tmp, alpha);
1082 /*
1083 * tmp[i] < 17*2^120 < 2^125
1084 */
1085 felem_assign(ftmp, beta);
1086 felem_scalar64(ftmp, 8);
1087 /* ftmp[i] < 2^62 + 2^17 */
1088 felem_diff_128_64(tmp, ftmp);
1089 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1090 felem_reduce(x_out, tmp);
1091
1092 /* z' = (y + z)^2 - gamma - delta */
1093 felem_sum64(delta, gamma);
1094 /* delta[i] < 2^60 + 2^15 */
1095 felem_assign(ftmp, y_in);
1096 felem_sum64(ftmp, z_in);
1097 /* ftmp[i] < 2^60 + 2^15 */
1098 felem_square(tmp, ftmp);
1099 /*
1100 * tmp[i] < 17(2^122) < 2^127
1101 */
1102 felem_diff_128_64(tmp, delta);
1103 /* tmp[i] < 2^127 + 2^63 */
1104 felem_reduce(z_out, tmp);
1105
1106 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1107 felem_scalar64(beta, 4);
1108 /* beta[i] < 2^61 + 2^16 */
1109 felem_diff64(beta, x_out);
1110 /* beta[i] < 2^61 + 2^60 + 2^16 */
1111 felem_mul(tmp, alpha, beta);
1112 /*
1113 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) = 17*(2^120 + 2^75
1114 * + 2^119 + 2^74 + 2^75 + 2^30) = 17*(2^120 + 2^119 + 2^76 + 2^74 +
1115 * 2^30) < 2^128
1116 */
1117 felem_square(tmp2, gamma);
1118 /*
1119 * tmp2[i] < 17*(2^59 + 2^14)^2 = 17*(2^118 + 2^74 + 2^28)
1120 */
1121 felem_scalar128(tmp2, 8);
1122 /*
1123 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) = 2^125 + 2^121 + 2^81 + 2^77
1124 * + 2^35 + 2^31 < 2^126
1125 */
1126 felem_diff128(tmp, tmp2);
1127 /*
1128 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) =
1129 * 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 2^74
1130 * + 2^69 + 2^34 + 2^30 < 2^128
1131 */
1132 felem_reduce(y_out, tmp);
1133}
1134
1135/* copy_conditional copies in to out iff mask is all ones. */
1136static void
1137copy_conditional(felem out, const felem in, limb mask)
1138{
1139 unsigned i;
1140 for (i = 0; i < NLIMBS; ++i) {
1141 const limb tmp = mask & (in[i] ^ out[i]);
1142 out[i] ^= tmp;
1143 }
1144}
1145
1146/* point_add calculates (x1, y1, z1) + (x2, y2, z2)
1147 *
1148 * The method is taken from
1149 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1150 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1151 *
1152 * This function includes a branch for checking whether the two input points
1153 * are equal (while not equal to the point at infinity). This case never
1154 * happens during single point multiplication, so there is no timing leak for
1155 * ECDH or ECDSA signing. */
1156static void
1157point_add(felem x3, felem y3, felem z3,
1158 const felem x1, const felem y1, const felem z1,
1159 const int mixed, const felem x2, const felem y2, const felem z2)
1160{
1161 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1162 largefelem tmp, tmp2;
1163 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1164
1165 z1_is_zero = felem_is_zero(z1);
1166 z2_is_zero = felem_is_zero(z2);
1167
1168 /* ftmp = z1z1 = z1**2 */
1169 felem_square(tmp, z1);
1170 felem_reduce(ftmp, tmp);
1171
1172 if (!mixed) {
1173 /* ftmp2 = z2z2 = z2**2 */
1174 felem_square(tmp, z2);
1175 felem_reduce(ftmp2, tmp);
1176
1177 /* u1 = ftmp3 = x1*z2z2 */
1178 felem_mul(tmp, x1, ftmp2);
1179 felem_reduce(ftmp3, tmp);
1180
1181 /* ftmp5 = z1 + z2 */
1182 felem_assign(ftmp5, z1);
1183 felem_sum64(ftmp5, z2);
1184 /* ftmp5[i] < 2^61 */
1185
1186 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1187 felem_square(tmp, ftmp5);
1188 /* tmp[i] < 17*2^122 */
1189 felem_diff_128_64(tmp, ftmp);
1190 /* tmp[i] < 17*2^122 + 2^63 */
1191 felem_diff_128_64(tmp, ftmp2);
1192 /* tmp[i] < 17*2^122 + 2^64 */
1193 felem_reduce(ftmp5, tmp);
1194
1195 /* ftmp2 = z2 * z2z2 */
1196 felem_mul(tmp, ftmp2, z2);
1197 felem_reduce(ftmp2, tmp);
1198
1199 /* s1 = ftmp6 = y1 * z2**3 */
1200 felem_mul(tmp, y1, ftmp2);
1201 felem_reduce(ftmp6, tmp);
1202 } else {
1203 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1204
1205 /* u1 = ftmp3 = x1*z2z2 */
1206 felem_assign(ftmp3, x1);
1207
1208 /* ftmp5 = 2*z1z2 */
1209 felem_scalar(ftmp5, z1, 2);
1210
1211 /* s1 = ftmp6 = y1 * z2**3 */
1212 felem_assign(ftmp6, y1);
1213 }
1214
1215 /* u2 = x2*z1z1 */
1216 felem_mul(tmp, x2, ftmp);
1217 /* tmp[i] < 17*2^120 */
1218
1219 /* h = ftmp4 = u2 - u1 */
1220 felem_diff_128_64(tmp, ftmp3);
1221 /* tmp[i] < 17*2^120 + 2^63 */
1222 felem_reduce(ftmp4, tmp);
1223
1224 x_equal = felem_is_zero(ftmp4);
1225
1226 /* z_out = ftmp5 * h */
1227 felem_mul(tmp, ftmp5, ftmp4);
1228 felem_reduce(z_out, tmp);
1229
1230 /* ftmp = z1 * z1z1 */
1231 felem_mul(tmp, ftmp, z1);
1232 felem_reduce(ftmp, tmp);
1233
1234 /* s2 = tmp = y2 * z1**3 */
1235 felem_mul(tmp, y2, ftmp);
1236 /* tmp[i] < 17*2^120 */
1237
1238 /* r = ftmp5 = (s2 - s1)*2 */
1239 felem_diff_128_64(tmp, ftmp6);
1240 /* tmp[i] < 17*2^120 + 2^63 */
1241 felem_reduce(ftmp5, tmp);
1242 y_equal = felem_is_zero(ftmp5);
1243 felem_scalar64(ftmp5, 2);
1244 /* ftmp5[i] < 2^61 */
1245
1246 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1247 point_double(x3, y3, z3, x1, y1, z1);
1248 return;
1249 }
1250 /* I = ftmp = (2h)**2 */
1251 felem_assign(ftmp, ftmp4);
1252 felem_scalar64(ftmp, 2);
1253 /* ftmp[i] < 2^61 */
1254 felem_square(tmp, ftmp);
1255 /* tmp[i] < 17*2^122 */
1256 felem_reduce(ftmp, tmp);
1257
1258 /* J = ftmp2 = h * I */
1259 felem_mul(tmp, ftmp4, ftmp);
1260 felem_reduce(ftmp2, tmp);
1261
1262 /* V = ftmp4 = U1 * I */
1263 felem_mul(tmp, ftmp3, ftmp);
1264 felem_reduce(ftmp4, tmp);
1265
1266 /* x_out = r**2 - J - 2V */
1267 felem_square(tmp, ftmp5);
1268 /* tmp[i] < 17*2^122 */
1269 felem_diff_128_64(tmp, ftmp2);
1270 /* tmp[i] < 17*2^122 + 2^63 */
1271 felem_assign(ftmp3, ftmp4);
1272 felem_scalar64(ftmp4, 2);
1273 /* ftmp4[i] < 2^61 */
1274 felem_diff_128_64(tmp, ftmp4);
1275 /* tmp[i] < 17*2^122 + 2^64 */
1276 felem_reduce(x_out, tmp);
1277
1278 /* y_out = r(V-x_out) - 2 * s1 * J */
1279 felem_diff64(ftmp3, x_out);
1280 /*
1281 * ftmp3[i] < 2^60 + 2^60 = 2^61
1282 */
1283 felem_mul(tmp, ftmp5, ftmp3);
1284 /* tmp[i] < 17*2^122 */
1285 felem_mul(tmp2, ftmp6, ftmp2);
1286 /* tmp2[i] < 17*2^120 */
1287 felem_scalar128(tmp2, 2);
1288 /* tmp2[i] < 17*2^121 */
1289 felem_diff128(tmp, tmp2);
1290 /*
1291 * tmp[i] < 2^127 - 2^69 + 17*2^122 = 2^126 - 2^122 - 2^6 - 2^2 - 1 <
1292 * 2^127
1293 */
1294 felem_reduce(y_out, tmp);
1295
1296 copy_conditional(x_out, x2, z1_is_zero);
1297 copy_conditional(x_out, x1, z2_is_zero);
1298 copy_conditional(y_out, y2, z1_is_zero);
1299 copy_conditional(y_out, y1, z2_is_zero);
1300 copy_conditional(z_out, z2, z1_is_zero);
1301 copy_conditional(z_out, z1, z2_is_zero);
1302 felem_assign(x3, x_out);
1303 felem_assign(y3, y_out);
1304 felem_assign(z3, z_out);
1305}
1306
1307/* Base point pre computation
1308 * --------------------------
1309 *
1310 * Two different sorts of precomputed tables are used in the following code.
1311 * Each contain various points on the curve, where each point is three field
1312 * elements (x, y, z).
1313 *
1314 * For the base point table, z is usually 1 (0 for the point at infinity).
1315 * This table has 16 elements:
1316 * index | bits | point
1317 * ------+---------+------------------------------
1318 * 0 | 0 0 0 0 | 0G
1319 * 1 | 0 0 0 1 | 1G
1320 * 2 | 0 0 1 0 | 2^130G
1321 * 3 | 0 0 1 1 | (2^130 + 1)G
1322 * 4 | 0 1 0 0 | 2^260G
1323 * 5 | 0 1 0 1 | (2^260 + 1)G
1324 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1325 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1326 * 8 | 1 0 0 0 | 2^390G
1327 * 9 | 1 0 0 1 | (2^390 + 1)G
1328 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1329 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1330 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1331 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1332 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1333 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1334 *
1335 * The reason for this is so that we can clock bits into four different
1336 * locations when doing simple scalar multiplies against the base point.
1337 *
1338 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1339
1340/* gmul is the table of precomputed base points */
1341static const felem gmul[16][3] =
1342{{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1343{0, 0, 0, 0, 0, 0, 0, 0, 0},
1344{0, 0, 0, 0, 0, 0, 0, 0, 0}},
1345{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1346 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
13470x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1348{0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1349 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
13500x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1351{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1352{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1353 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
13540x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1355{0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1356 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
13570x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1358{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1359{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1360 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
13610x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1362{0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1363 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
13640x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1365{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1366{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1367 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
13680x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1369{0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1370 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
13710x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1372{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1373{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1374 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
13750x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1376{0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1377 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
13780x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1379{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1380{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1381 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
13820x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1383{0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1384 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
13850x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1386{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1387{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1388 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
13890x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1390{0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1391 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
13920x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1393{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1394{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1395 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
13960x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1397{0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1398 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
13990x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1400{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1401{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1402 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
14030x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1404{0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1405 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
14060x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1407{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1408{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1409 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
14100x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1411{0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1412 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
14130x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1414{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1415{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1416 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
14170x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1418{0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1419 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
14200x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1421{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1422{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1423 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
14240x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1425{0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1426 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
14270x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1428{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1429{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1430 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
14310x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1432{0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1433 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
14340x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1435{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1436{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1437 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
14380x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1439{0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1440 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
14410x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1442{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1443{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1444 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
14450x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1446{0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1447 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
14480x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1449{1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1450
1451/* select_point selects the |idx|th point from a precomputation table and
1452 * copies it to out. */
1453static void
1454select_point(const limb idx, unsigned int size, const felem pre_comp[ /* size */ ][3],
1455 felem out[3])
1456{
1457 unsigned i, j;
1458 limb *outlimbs = &out[0][0];
1459 memset(outlimbs, 0, 3 * sizeof(felem));
1460
1461 for (i = 0; i < size; i++) {
1462 const limb *inlimbs = &pre_comp[i][0][0];
1463 limb mask = i ^ idx;
1464 mask |= mask >> 4;
1465 mask |= mask >> 2;
1466 mask |= mask >> 1;
1467 mask &= 1;
1468 mask--;
1469 for (j = 0; j < NLIMBS * 3; j++)
1470 outlimbs[j] |= inlimbs[j] & mask;
1471 }
1472}
1473
1474/* get_bit returns the |i|th bit in |in| */
1475static char
1476get_bit(const felem_bytearray in, int i)
1477{
1478 if (i < 0)
1479 return 0;
1480 return (in[i >> 3] >> (i & 7)) & 1;
1481}
1482
1483/* Interleaved point multiplication using precomputed point multiples:
1484 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1485 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1486 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1487 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1488static void
1489batch_mul(felem x_out, felem y_out, felem z_out,
1490 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1491 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1492{
1493 int i, skip;
1494 unsigned num, gen_mul = (g_scalar != NULL);
1495 felem nq[3], tmp[4];
1496 limb bits;
1497 u8 sign, digit;
1498
1499 /* set nq to the point at infinity */
1500 memset(nq, 0, 3 * sizeof(felem));
1501
1502 /*
1503 * Loop over all scalars msb-to-lsb, interleaving additions of
1504 * multiples of the generator (last quarter of rounds) and additions
1505 * of other points multiples (every 5th round).
1506 */
1507 skip = 1; /* save two point operations in the first
1508 * round */
1509 for (i = (num_points ? 520 : 130); i >= 0; --i) {
1510 /* double */
1511 if (!skip)
1512 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1513
1514 /* add multiples of the generator */
1515 if (gen_mul && (i <= 130)) {
1516 bits = get_bit(g_scalar, i + 390) << 3;
1517 if (i < 130) {
1518 bits |= get_bit(g_scalar, i + 260) << 2;
1519 bits |= get_bit(g_scalar, i + 130) << 1;
1520 bits |= get_bit(g_scalar, i);
1521 }
1522 /* select the point to add, in constant time */
1523 select_point(bits, 16, g_pre_comp, tmp);
1524 if (!skip) {
1525 point_add(nq[0], nq[1], nq[2],
1526 nq[0], nq[1], nq[2],
1527 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1528 } else {
1529 memcpy(nq, tmp, 3 * sizeof(felem));
1530 skip = 0;
1531 }
1532 }
1533 /* do other additions every 5 doublings */
1534 if (num_points && (i % 5 == 0)) {
1535 /* loop over all scalars */
1536 for (num = 0; num < num_points; ++num) {
1537 bits = get_bit(scalars[num], i + 4) << 5;
1538 bits |= get_bit(scalars[num], i + 3) << 4;
1539 bits |= get_bit(scalars[num], i + 2) << 3;
1540 bits |= get_bit(scalars[num], i + 1) << 2;
1541 bits |= get_bit(scalars[num], i) << 1;
1542 bits |= get_bit(scalars[num], i - 1);
1543 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1544
1545 /*
1546 * select the point to add or subtract, in
1547 * constant time
1548 */
1549 select_point(digit, 17, pre_comp[num], tmp);
1550 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the
1551 * negative point */
1552 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1553
1554 if (!skip) {
1555 point_add(nq[0], nq[1], nq[2],
1556 nq[0], nq[1], nq[2],
1557 mixed, tmp[0], tmp[1], tmp[2]);
1558 } else {
1559 memcpy(nq, tmp, 3 * sizeof(felem));
1560 skip = 0;
1561 }
1562 }
1563 }
1564 }
1565 felem_assign(x_out, nq[0]);
1566 felem_assign(y_out, nq[1]);
1567 felem_assign(z_out, nq[2]);
1568}
1569
1570
1571/* Precomputation for the group generator. */
1572typedef struct {
1573 felem g_pre_comp[16][3];
1574 int references;
1575} NISTP521_PRE_COMP;
1576
1577const EC_METHOD *
1578EC_GFp_nistp521_method(void)
1579{
1580 static const EC_METHOD ret = {
1581 .flags = EC_FLAGS_DEFAULT_OCT,
1582 .field_type = NID_X9_62_prime_field,
1583 .group_init = ec_GFp_nistp521_group_init,
1584 .group_finish = ec_GFp_simple_group_finish,
1585 .group_clear_finish = ec_GFp_simple_group_clear_finish,
1586 .group_copy = ec_GFp_nist_group_copy,
1587 .group_set_curve = ec_GFp_nistp521_group_set_curve,
1588 .group_get_curve = ec_GFp_simple_group_get_curve,
1589 .group_get_degree = ec_GFp_simple_group_get_degree,
1590 .group_order_bits = ec_group_simple_order_bits,
1591 .group_check_discriminant =
1592 ec_GFp_simple_group_check_discriminant,
1593 .point_init = ec_GFp_simple_point_init,
1594 .point_finish = ec_GFp_simple_point_finish,
1595 .point_clear_finish = ec_GFp_simple_point_clear_finish,
1596 .point_copy = ec_GFp_simple_point_copy,
1597 .point_set_to_infinity = ec_GFp_simple_point_set_to_infinity,
1598 .point_set_Jprojective_coordinates =
1599 ec_GFp_simple_set_Jprojective_coordinates,
1600 .point_get_Jprojective_coordinates =
1601 ec_GFp_simple_get_Jprojective_coordinates,
1602 .point_set_affine_coordinates =
1603 ec_GFp_simple_point_set_affine_coordinates,
1604 .point_get_affine_coordinates =
1605 ec_GFp_nistp521_point_get_affine_coordinates,
1606 .add = ec_GFp_simple_add,
1607 .dbl = ec_GFp_simple_dbl,
1608 .invert = ec_GFp_simple_invert,
1609 .is_at_infinity = ec_GFp_simple_is_at_infinity,
1610 .is_on_curve = ec_GFp_simple_is_on_curve,
1611 .point_cmp = ec_GFp_simple_cmp,
1612 .make_affine = ec_GFp_simple_make_affine,
1613 .points_make_affine = ec_GFp_simple_points_make_affine,
1614 .mul = ec_GFp_nistp521_points_mul,
1615 .precompute_mult = ec_GFp_nistp521_precompute_mult,
1616 .have_precompute_mult = ec_GFp_nistp521_have_precompute_mult,
1617 .field_mul = ec_GFp_nist_field_mul,
1618 .field_sqr = ec_GFp_nist_field_sqr,
1619 .blind_coordinates = NULL,
1620 };
1621
1622 return &ret;
1623}
1624
1625
1626/******************************************************************************/
1627/* FUNCTIONS TO MANAGE PRECOMPUTATION
1628 */
1629
1630static NISTP521_PRE_COMP *
1631nistp521_pre_comp_new()
1632{
1633 NISTP521_PRE_COMP *ret = NULL;
1634 ret = malloc(sizeof(NISTP521_PRE_COMP));
1635 if (!ret) {
1636 ECerror(ERR_R_MALLOC_FAILURE);
1637 return ret;
1638 }
1639 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1640 ret->references = 1;
1641 return ret;
1642}
1643
1644static void *
1645nistp521_pre_comp_dup(void *src_)
1646{
1647 NISTP521_PRE_COMP *src = src_;
1648
1649 /* no need to actually copy, these objects never change! */
1650 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1651
1652 return src_;
1653}
1654
1655static void
1656nistp521_pre_comp_free(void *pre_)
1657{
1658 int i;
1659 NISTP521_PRE_COMP *pre = pre_;
1660
1661 if (!pre)
1662 return;
1663
1664 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1665 if (i > 0)
1666 return;
1667
1668 free(pre);
1669}
1670
1671static void
1672nistp521_pre_comp_clear_free(void *pre_)
1673{
1674 int i;
1675 NISTP521_PRE_COMP *pre = pre_;
1676
1677 if (!pre)
1678 return;
1679
1680 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1681 if (i > 0)
1682 return;
1683
1684 freezero(pre, sizeof(*pre));
1685}
1686
1687/******************************************************************************/
1688/* OPENSSL EC_METHOD FUNCTIONS
1689 */
1690
1691int
1692ec_GFp_nistp521_group_init(EC_GROUP *group)
1693{
1694 int ret;
1695 ret = ec_GFp_simple_group_init(group);
1696 group->a_is_minus3 = 1;
1697 return ret;
1698}
1699
1700int
1701ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1702 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1703{
1704 int ret = 0;
1705 BN_CTX *new_ctx = NULL;
1706 BIGNUM *curve_p, *curve_a, *curve_b;
1707
1708 if (ctx == NULL)
1709 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1710 return 0;
1711 BN_CTX_start(ctx);
1712 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1713 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1714 ((curve_b = BN_CTX_get(ctx)) == NULL))
1715 goto err;
1716 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1717 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1718 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1719 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1720 (BN_cmp(curve_b, b))) {
1721 ECerror(EC_R_WRONG_CURVE_PARAMETERS);
1722 goto err;
1723 }
1724 group->field_mod_func = BN_nist_mod_521;
1725 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1726 err:
1727 BN_CTX_end(ctx);
1728 BN_CTX_free(new_ctx);
1729 return ret;
1730}
1731
1732/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1733 * (X', Y') = (X/Z^2, Y/Z^3) */
1734int
1735ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1736 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1737{
1738 felem z1, z2, x_in, y_in, x_out, y_out;
1739 largefelem tmp;
1740
1741 if (EC_POINT_is_at_infinity(group, point) > 0) {
1742 ECerror(EC_R_POINT_AT_INFINITY);
1743 return 0;
1744 }
1745 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1746 (!BN_to_felem(z1, &point->Z)))
1747 return 0;
1748 felem_inv(z2, z1);
1749 felem_square(tmp, z2);
1750 felem_reduce(z1, tmp);
1751 felem_mul(tmp, x_in, z1);
1752 felem_reduce(x_in, tmp);
1753 felem_contract(x_out, x_in);
1754 if (x != NULL) {
1755 if (!felem_to_BN(x, x_out)) {
1756 ECerror(ERR_R_BN_LIB);
1757 return 0;
1758 }
1759 }
1760 felem_mul(tmp, z1, z2);
1761 felem_reduce(z1, tmp);
1762 felem_mul(tmp, y_in, z1);
1763 felem_reduce(y_in, tmp);
1764 felem_contract(y_out, y_in);
1765 if (y != NULL) {
1766 if (!felem_to_BN(y, y_out)) {
1767 ECerror(ERR_R_BN_LIB);
1768 return 0;
1769 }
1770 }
1771 return 1;
1772}
1773
1774static void
1775make_points_affine(size_t num, felem points[ /* num */ ][3], felem tmp_felems[ /* num+1 */ ])
1776{
1777 /*
1778 * Runs in constant time, unless an input is the point at infinity
1779 * (which normally shouldn't happen).
1780 */
1781 ec_GFp_nistp_points_make_affine_internal(
1782 num,
1783 points,
1784 sizeof(felem),
1785 tmp_felems,
1786 (void (*) (void *)) felem_one,
1787 (int (*) (const void *)) felem_is_zero_int,
1788 (void (*) (void *, const void *)) felem_assign,
1789 (void (*) (void *, const void *)) felem_square_reduce,
1790 (void (*) (void *, const void *, const void *)) felem_mul_reduce,
1791 (void (*) (void *, const void *)) felem_inv,
1792 (void (*) (void *, const void *)) felem_contract);
1793}
1794
1795/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1796 * Result is stored in r (r can equal one of the inputs). */
1797int
1798ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1799 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1800 const BIGNUM *scalars[], BN_CTX *ctx)
1801{
1802 int ret = 0;
1803 int j;
1804 int mixed = 0;
1805 BN_CTX *new_ctx = NULL;
1806 BIGNUM *x, *y, *z, *tmp_scalar;
1807 felem_bytearray g_secret;
1808 felem_bytearray *secrets = NULL;
1809 felem(*pre_comp)[17][3] = NULL;
1810 felem *tmp_felems = NULL;
1811 felem_bytearray tmp;
1812 unsigned i, num_bytes;
1813 int have_pre_comp = 0;
1814 size_t num_points = num;
1815 felem x_in, y_in, z_in, x_out, y_out, z_out;
1816 NISTP521_PRE_COMP *pre = NULL;
1817 felem(*g_pre_comp)[3] = NULL;
1818 EC_POINT *generator = NULL;
1819 const EC_POINT *p = NULL;
1820 const BIGNUM *p_scalar = NULL;
1821
1822 if (ctx == NULL)
1823 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1824 return 0;
1825 BN_CTX_start(ctx);
1826 if (((x = BN_CTX_get(ctx)) == NULL) ||
1827 ((y = BN_CTX_get(ctx)) == NULL) ||
1828 ((z = BN_CTX_get(ctx)) == NULL) ||
1829 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1830 goto err;
1831
1832 if (scalar != NULL) {
1833 pre = EC_EX_DATA_get_data(group->extra_data,
1834 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1835 nistp521_pre_comp_clear_free);
1836 if (pre)
1837 /* we have precomputation, try to use it */
1838 g_pre_comp = &pre->g_pre_comp[0];
1839 else
1840 /* try to use the standard precomputation */
1841 g_pre_comp = (felem(*)[3]) gmul;
1842 generator = EC_POINT_new(group);
1843 if (generator == NULL)
1844 goto err;
1845 /* get the generator from precomputation */
1846 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1847 !felem_to_BN(y, g_pre_comp[1][1]) ||
1848 !felem_to_BN(z, g_pre_comp[1][2])) {
1849 ECerror(ERR_R_BN_LIB);
1850 goto err;
1851 }
1852 if (!EC_POINT_set_Jprojective_coordinates(group, generator,
1853 x, y, z, ctx))
1854 goto err;
1855 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1856 /* precomputation matches generator */
1857 have_pre_comp = 1;
1858 else
1859 /*
1860 * we don't have valid precomputation: treat the
1861 * generator as a random point
1862 */
1863 num_points++;
1864 }
1865 if (num_points > 0) {
1866 if (num_points >= 2) {
1867 /*
1868 * unless we precompute multiples for just one point,
1869 * converting those into affine form is time well
1870 * spent
1871 */
1872 mixed = 1;
1873 }
1874 secrets = calloc(num_points, sizeof(felem_bytearray));
1875 pre_comp = calloc(num_points, 17 * 3 * sizeof(felem));
1876 if (mixed) {
1877 /* XXX should do more int overflow checking */
1878 tmp_felems = reallocarray(NULL,
1879 (num_points * 17 + 1), sizeof(felem));
1880 }
1881 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL))) {
1882 ECerror(ERR_R_MALLOC_FAILURE);
1883 goto err;
1884 }
1885 /*
1886 * we treat NULL scalars as 0, and NULL points as points at
1887 * infinity, i.e., they contribute nothing to the linear
1888 * combination
1889 */
1890 for (i = 0; i < num_points; ++i) {
1891 if (i == num)
1892 /*
1893 * we didn't have a valid precomputation, so
1894 * we pick the generator
1895 */
1896 {
1897 p = EC_GROUP_get0_generator(group);
1898 p_scalar = scalar;
1899 } else
1900 /* the i^th point */
1901 {
1902 p = points[i];
1903 p_scalar = scalars[i];
1904 }
1905 if ((p_scalar != NULL) && (p != NULL)) {
1906 /* reduce scalar to 0 <= scalar < 2^521 */
1907 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar))) {
1908 /*
1909 * this is an unusual input, and we
1910 * don't guarantee constant-timeness
1911 */
1912 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1913 ECerror(ERR_R_BN_LIB);
1914 goto err;
1915 }
1916 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1917 } else
1918 num_bytes = BN_bn2bin(p_scalar, tmp);
1919 flip_endian(secrets[i], tmp, num_bytes);
1920 /* precompute multiples */
1921 if ((!BN_to_felem(x_out, &p->X)) ||
1922 (!BN_to_felem(y_out, &p->Y)) ||
1923 (!BN_to_felem(z_out, &p->Z)))
1924 goto err;
1925 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1926 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1927 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1928 for (j = 2; j <= 16; ++j) {
1929 if (j & 1) {
1930 point_add(
1931 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1932 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1933 0, pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1934 } else {
1935 point_double(
1936 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1937 pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1938 }
1939 }
1940 }
1941 }
1942 if (mixed)
1943 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1944 }
1945 /* the scalar for the generator */
1946 if ((scalar != NULL) && (have_pre_comp)) {
1947 memset(g_secret, 0, sizeof(g_secret));
1948 /* reduce scalar to 0 <= scalar < 2^521 */
1949 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1950 /*
1951 * this is an unusual input, and we don't guarantee
1952 * constant-timeness
1953 */
1954 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1955 ECerror(ERR_R_BN_LIB);
1956 goto err;
1957 }
1958 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1959 } else
1960 num_bytes = BN_bn2bin(scalar, tmp);
1961 flip_endian(g_secret, tmp, num_bytes);
1962 /* do the multiplication with generator precomputation */
1963 batch_mul(x_out, y_out, z_out,
1964 (const felem_bytearray(*)) secrets, num_points,
1965 g_secret,
1966 mixed, (const felem(*)[17][3]) pre_comp,
1967 (const felem(*)[3]) g_pre_comp);
1968 } else
1969 /* do the multiplication without generator precomputation */
1970 batch_mul(x_out, y_out, z_out,
1971 (const felem_bytearray(*)) secrets, num_points,
1972 NULL, mixed, (const felem(*)[17][3]) pre_comp, NULL);
1973 /* reduce the output to its unique minimal representation */
1974 felem_contract(x_in, x_out);
1975 felem_contract(y_in, y_out);
1976 felem_contract(z_in, z_out);
1977 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1978 (!felem_to_BN(z, z_in))) {
1979 ECerror(ERR_R_BN_LIB);
1980 goto err;
1981 }
1982 ret = EC_POINT_set_Jprojective_coordinates(group, r, x, y, z, ctx);
1983
1984 err:
1985 BN_CTX_end(ctx);
1986 EC_POINT_free(generator);
1987 BN_CTX_free(new_ctx);
1988 free(secrets);
1989 free(pre_comp);
1990 free(tmp_felems);
1991 return ret;
1992}
1993
1994int
1995ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1996{
1997 int ret = 0;
1998 NISTP521_PRE_COMP *pre = NULL;
1999 int i, j;
2000 BN_CTX *new_ctx = NULL;
2001 BIGNUM *x, *y;
2002 EC_POINT *generator = NULL;
2003 felem tmp_felems[16];
2004
2005 /* throw away old precomputation */
2006 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
2007 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
2008 if (ctx == NULL)
2009 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2010 return 0;
2011 BN_CTX_start(ctx);
2012 if (((x = BN_CTX_get(ctx)) == NULL) ||
2013 ((y = BN_CTX_get(ctx)) == NULL))
2014 goto err;
2015 /* get the generator */
2016 if (group->generator == NULL)
2017 goto err;
2018 generator = EC_POINT_new(group);
2019 if (generator == NULL)
2020 goto err;
2021 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2022 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2023 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
2024 goto err;
2025 if ((pre = nistp521_pre_comp_new()) == NULL)
2026 goto err;
2027 /* if the generator is the standard one, use built-in precomputation */
2028 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2029 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2030 ret = 1;
2031 goto err;
2032 }
2033 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
2034 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
2035 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
2036 goto err;
2037 /* compute 2^130*G, 2^260*G, 2^390*G */
2038 for (i = 1; i <= 4; i <<= 1) {
2039 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2040 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2041 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2042 for (j = 0; j < 129; ++j) {
2043 point_double(pre->g_pre_comp[2 * i][0],
2044 pre->g_pre_comp[2 * i][1],
2045 pre->g_pre_comp[2 * i][2],
2046 pre->g_pre_comp[2 * i][0],
2047 pre->g_pre_comp[2 * i][1],
2048 pre->g_pre_comp[2 * i][2]);
2049 }
2050 }
2051 /* g_pre_comp[0] is the point at infinity */
2052 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2053 /* the remaining multiples */
2054 /* 2^130*G + 2^260*G */
2055 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2056 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2057 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2058 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2059 pre->g_pre_comp[2][2]);
2060 /* 2^130*G + 2^390*G */
2061 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2062 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2063 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2064 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2065 pre->g_pre_comp[2][2]);
2066 /* 2^260*G + 2^390*G */
2067 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2068 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2069 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2070 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2071 pre->g_pre_comp[4][2]);
2072 /* 2^130*G + 2^260*G + 2^390*G */
2073 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2074 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2075 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2076 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2077 pre->g_pre_comp[2][2]);
2078 for (i = 1; i < 8; ++i) {
2079 /* odd multiples: add G */
2080 point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1],
2081 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2082 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
2083 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2084 pre->g_pre_comp[1][2]);
2085 }
2086 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2087
2088 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2089 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2090 goto err;
2091 ret = 1;
2092 pre = NULL;
2093 err:
2094 BN_CTX_end(ctx);
2095 EC_POINT_free(generator);
2096 BN_CTX_free(new_ctx);
2097 nistp521_pre_comp_free(pre);
2098 return ret;
2099}
2100
2101int
2102ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2103{
2104 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2105 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2106 != NULL)
2107 return 1;
2108 else
2109 return 0;
2110}
2111
2112#endif