summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/ec/ecp_nistp521.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/ec/ecp_nistp521.c')
-rw-r--r--src/lib/libcrypto/ec/ecp_nistp521.c2113
1 files changed, 0 insertions, 2113 deletions
diff --git a/src/lib/libcrypto/ec/ecp_nistp521.c b/src/lib/libcrypto/ec/ecp_nistp521.c
deleted file mode 100644
index 6382091cf9..0000000000
--- a/src/lib/libcrypto/ec/ecp_nistp521.c
+++ /dev/null
@@ -1,2113 +0,0 @@
1/* $OpenBSD: ecp_nistp521.c,v 1.16 2015/02/08 22:25:03 miod Exp $ */
2/*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5/*
6 * Copyright (c) 2011 Google Inc.
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29#include <stdint.h>
30#include <string.h>
31
32#include <openssl/opensslconf.h>
33
34#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
35
36#include <openssl/err.h>
37#include "ec_lcl.h"
38
39#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
40 /* even with gcc, the typedef won't work for 32-bit platforms */
41 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
42#else
43 #error "Need GCC 3.1 or later to define type uint128_t"
44#endif
45
46typedef uint8_t u8;
47typedef uint64_t u64;
48typedef int64_t s64;
49
50/* The underlying field.
51 *
52 * P521 operates over GF(2^521-1). We can serialise an element of this field
53 * into 66 bytes where the most significant byte contains only a single bit. We
54 * call this an felem_bytearray. */
55
56typedef u8 felem_bytearray[66];
57
58/* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
59 * These values are big-endian. */
60static const felem_bytearray nistp521_curve_params[5] =
61 {
62 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff},
71 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xfc},
80 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
81 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
82 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
83 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
84 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
85 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
86 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
87 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
88 0x3f, 0x00},
89 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
90 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
91 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
92 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
93 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
94 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
95 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
96 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
97 0xbd, 0x66},
98 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
99 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
100 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
101 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
102 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
103 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
104 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
105 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
106 0x66, 0x50}
107 };
108
109/* The representation of field elements.
110 * ------------------------------------
111 *
112 * We represent field elements with nine values. These values are either 64 or
113 * 128 bits and the field element represented is:
114 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
115 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
116 * 58 bits apart, but are greater than 58 bits in length, the most significant
117 * bits of each limb overlap with the least significant bits of the next.
118 *
119 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
120 * 'largefelem' */
121
122#define NLIMBS 9
123
124typedef uint64_t limb;
125typedef limb felem[NLIMBS];
126typedef uint128_t largefelem[NLIMBS];
127
128static const limb bottom57bits = 0x1ffffffffffffff;
129static const limb bottom58bits = 0x3ffffffffffffff;
130
131/* bin66_to_felem takes a little-endian byte array and converts it into felem
132 * form. This assumes that the CPU is little-endian. */
133static void
134bin66_to_felem(felem out, const u8 in[66])
135{
136 out[0] = (*((limb *) & in[0])) & bottom58bits;
137 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
138 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
139 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
140 out[4] = (*((limb *) & in[29])) & bottom58bits;
141 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
142 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
143 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
144 out[8] = (*((limb *) & in[58])) & bottom57bits;
145}
146
147/* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
148 * array. This assumes that the CPU is little-endian. */
149static void
150felem_to_bin66(u8 out[66], const felem in)
151{
152 memset(out, 0, 66);
153 (*((limb *) & out[0])) = in[0];
154 (*((limb *) & out[7])) |= in[1] << 2;
155 (*((limb *) & out[14])) |= in[2] << 4;
156 (*((limb *) & out[21])) |= in[3] << 6;
157 (*((limb *) & out[29])) = in[4];
158 (*((limb *) & out[36])) |= in[5] << 2;
159 (*((limb *) & out[43])) |= in[6] << 4;
160 (*((limb *) & out[50])) |= in[7] << 6;
161 (*((limb *) & out[58])) = in[8];
162}
163
164/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
165static void
166flip_endian(u8 * out, const u8 * in, unsigned len)
167{
168 unsigned i;
169 for (i = 0; i < len; ++i)
170 out[i] = in[len - 1 - i];
171}
172
173/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
174static int
175BN_to_felem(felem out, const BIGNUM * bn)
176{
177 felem_bytearray b_in;
178 felem_bytearray b_out;
179 unsigned num_bytes;
180
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out, 0, sizeof b_out);
183 num_bytes = BN_num_bytes(bn);
184 if (num_bytes > sizeof b_out) {
185 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
186 return 0;
187 }
188 if (BN_is_negative(bn)) {
189 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
190 return 0;
191 }
192 num_bytes = BN_bn2bin(bn, b_in);
193 flip_endian(b_out, b_in, num_bytes);
194 bin66_to_felem(out, b_out);
195 return 1;
196}
197
198/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
199static BIGNUM *
200felem_to_BN(BIGNUM * out, const felem in)
201{
202 felem_bytearray b_in, b_out;
203 felem_to_bin66(b_in, in);
204 flip_endian(b_out, b_in, sizeof b_out);
205 return BN_bin2bn(b_out, sizeof b_out, out);
206}
207
208
209/* Field operations
210 * ---------------- */
211
212static void
213felem_one(felem out)
214{
215 out[0] = 1;
216 out[1] = 0;
217 out[2] = 0;
218 out[3] = 0;
219 out[4] = 0;
220 out[5] = 0;
221 out[6] = 0;
222 out[7] = 0;
223 out[8] = 0;
224}
225
226static void
227felem_assign(felem out, const felem in)
228{
229 out[0] = in[0];
230 out[1] = in[1];
231 out[2] = in[2];
232 out[3] = in[3];
233 out[4] = in[4];
234 out[5] = in[5];
235 out[6] = in[6];
236 out[7] = in[7];
237 out[8] = in[8];
238}
239
240/* felem_sum64 sets out = out + in. */
241static void
242felem_sum64(felem out, const felem in)
243{
244 out[0] += in[0];
245 out[1] += in[1];
246 out[2] += in[2];
247 out[3] += in[3];
248 out[4] += in[4];
249 out[5] += in[5];
250 out[6] += in[6];
251 out[7] += in[7];
252 out[8] += in[8];
253}
254
255/* felem_scalar sets out = in * scalar */
256static void
257felem_scalar(felem out, const felem in, limb scalar)
258{
259 out[0] = in[0] * scalar;
260 out[1] = in[1] * scalar;
261 out[2] = in[2] * scalar;
262 out[3] = in[3] * scalar;
263 out[4] = in[4] * scalar;
264 out[5] = in[5] * scalar;
265 out[6] = in[6] * scalar;
266 out[7] = in[7] * scalar;
267 out[8] = in[8] * scalar;
268}
269
270/* felem_scalar64 sets out = out * scalar */
271static void
272felem_scalar64(felem out, limb scalar)
273{
274 out[0] *= scalar;
275 out[1] *= scalar;
276 out[2] *= scalar;
277 out[3] *= scalar;
278 out[4] *= scalar;
279 out[5] *= scalar;
280 out[6] *= scalar;
281 out[7] *= scalar;
282 out[8] *= scalar;
283}
284
285/* felem_scalar128 sets out = out * scalar */
286static void
287felem_scalar128(largefelem out, limb scalar)
288{
289 out[0] *= scalar;
290 out[1] *= scalar;
291 out[2] *= scalar;
292 out[3] *= scalar;
293 out[4] *= scalar;
294 out[5] *= scalar;
295 out[6] *= scalar;
296 out[7] *= scalar;
297 out[8] *= scalar;
298}
299
300/* felem_neg sets |out| to |-in|
301 * On entry:
302 * in[i] < 2^59 + 2^14
303 * On exit:
304 * out[i] < 2^62
305 */
306static void
307felem_neg(felem out, const felem in)
308{
309 /* In order to prevent underflow, we subtract from 0 mod p. */
310 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
311 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
312
313 out[0] = two62m3 - in[0];
314 out[1] = two62m2 - in[1];
315 out[2] = two62m2 - in[2];
316 out[3] = two62m2 - in[3];
317 out[4] = two62m2 - in[4];
318 out[5] = two62m2 - in[5];
319 out[6] = two62m2 - in[6];
320 out[7] = two62m2 - in[7];
321 out[8] = two62m2 - in[8];
322}
323
324/* felem_diff64 subtracts |in| from |out|
325 * On entry:
326 * in[i] < 2^59 + 2^14
327 * On exit:
328 * out[i] < out[i] + 2^62
329 */
330static void
331felem_diff64(felem out, const felem in)
332{
333 /* In order to prevent underflow, we add 0 mod p before subtracting. */
334 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
335 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
336
337 out[0] += two62m3 - in[0];
338 out[1] += two62m2 - in[1];
339 out[2] += two62m2 - in[2];
340 out[3] += two62m2 - in[3];
341 out[4] += two62m2 - in[4];
342 out[5] += two62m2 - in[5];
343 out[6] += two62m2 - in[6];
344 out[7] += two62m2 - in[7];
345 out[8] += two62m2 - in[8];
346}
347
348/* felem_diff_128_64 subtracts |in| from |out|
349 * On entry:
350 * in[i] < 2^62 + 2^17
351 * On exit:
352 * out[i] < out[i] + 2^63
353 */
354static void
355felem_diff_128_64(largefelem out, const felem in)
356{
357 /* In order to prevent underflow, we add 0 mod p before subtracting. */
358 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
359 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
360
361 out[0] += two63m6 - in[0];
362 out[1] += two63m5 - in[1];
363 out[2] += two63m5 - in[2];
364 out[3] += two63m5 - in[3];
365 out[4] += two63m5 - in[4];
366 out[5] += two63m5 - in[5];
367 out[6] += two63m5 - in[6];
368 out[7] += two63m5 - in[7];
369 out[8] += two63m5 - in[8];
370}
371
372/* felem_diff_128_64 subtracts |in| from |out|
373 * On entry:
374 * in[i] < 2^126
375 * On exit:
376 * out[i] < out[i] + 2^127 - 2^69
377 */
378static void
379felem_diff128(largefelem out, const largefelem in)
380{
381 /* In order to prevent underflow, we add 0 mod p before subtracting. */
382 static const uint128_t two127m70 = (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
383 static const uint128_t two127m69 = (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
384
385 out[0] += (two127m70 - in[0]);
386 out[1] += (two127m69 - in[1]);
387 out[2] += (two127m69 - in[2]);
388 out[3] += (two127m69 - in[3]);
389 out[4] += (two127m69 - in[4]);
390 out[5] += (two127m69 - in[5]);
391 out[6] += (two127m69 - in[6]);
392 out[7] += (two127m69 - in[7]);
393 out[8] += (two127m69 - in[8]);
394}
395
396/* felem_square sets |out| = |in|^2
397 * On entry:
398 * in[i] < 2^62
399 * On exit:
400 * out[i] < 17 * max(in[i]) * max(in[i])
401 */
402static void
403felem_square(largefelem out, const felem in)
404{
405 felem inx2, inx4;
406 felem_scalar(inx2, in, 2);
407 felem_scalar(inx4, in, 4);
408
409 /*
410 * We have many cases were we want to do in[x] * in[y] + in[y] *
411 * in[x] This is obviously just 2 * in[x] * in[y] However, rather
412 * than do the doubling on the 128 bit result, we double one of the
413 * inputs to the multiplication by reading from |inx2|
414 */
415
416 out[0] = ((uint128_t) in[0]) * in[0];
417 out[1] = ((uint128_t) in[0]) * inx2[1];
418 out[2] = ((uint128_t) in[0]) * inx2[2] +
419 ((uint128_t) in[1]) * in[1];
420 out[3] = ((uint128_t) in[0]) * inx2[3] +
421 ((uint128_t) in[1]) * inx2[2];
422 out[4] = ((uint128_t) in[0]) * inx2[4] +
423 ((uint128_t) in[1]) * inx2[3] +
424 ((uint128_t) in[2]) * in[2];
425 out[5] = ((uint128_t) in[0]) * inx2[5] +
426 ((uint128_t) in[1]) * inx2[4] +
427 ((uint128_t) in[2]) * inx2[3];
428 out[6] = ((uint128_t) in[0]) * inx2[6] +
429 ((uint128_t) in[1]) * inx2[5] +
430 ((uint128_t) in[2]) * inx2[4] +
431 ((uint128_t) in[3]) * in[3];
432 out[7] = ((uint128_t) in[0]) * inx2[7] +
433 ((uint128_t) in[1]) * inx2[6] +
434 ((uint128_t) in[2]) * inx2[5] +
435 ((uint128_t) in[3]) * inx2[4];
436 out[8] = ((uint128_t) in[0]) * inx2[8] +
437 ((uint128_t) in[1]) * inx2[7] +
438 ((uint128_t) in[2]) * inx2[6] +
439 ((uint128_t) in[3]) * inx2[5] +
440 ((uint128_t) in[4]) * in[4];
441
442 /*
443 * The remaining limbs fall above 2^521, with the first falling at
444 * 2^522. They correspond to locations one bit up from the limbs
445 * produced above so we would have to multiply by two to align them.
446 * Again, rather than operate on the 128-bit result, we double one of
447 * the inputs to the multiplication. If we want to double for both
448 * this reason, and the reason above, then we end up multiplying by
449 * four.
450 */
451
452 /* 9 */
453 out[0] += ((uint128_t) in[1]) * inx4[8] +
454 ((uint128_t) in[2]) * inx4[7] +
455 ((uint128_t) in[3]) * inx4[6] +
456 ((uint128_t) in[4]) * inx4[5];
457
458 /* 10 */
459 out[1] += ((uint128_t) in[2]) * inx4[8] +
460 ((uint128_t) in[3]) * inx4[7] +
461 ((uint128_t) in[4]) * inx4[6] +
462 ((uint128_t) in[5]) * inx2[5];
463
464 /* 11 */
465 out[2] += ((uint128_t) in[3]) * inx4[8] +
466 ((uint128_t) in[4]) * inx4[7] +
467 ((uint128_t) in[5]) * inx4[6];
468
469 /* 12 */
470 out[3] += ((uint128_t) in[4]) * inx4[8] +
471 ((uint128_t) in[5]) * inx4[7] +
472 ((uint128_t) in[6]) * inx2[6];
473
474 /* 13 */
475 out[4] += ((uint128_t) in[5]) * inx4[8] +
476 ((uint128_t) in[6]) * inx4[7];
477
478 /* 14 */
479 out[5] += ((uint128_t) in[6]) * inx4[8] +
480 ((uint128_t) in[7]) * inx2[7];
481
482 /* 15 */
483 out[6] += ((uint128_t) in[7]) * inx4[8];
484
485 /* 16 */
486 out[7] += ((uint128_t) in[8]) * inx2[8];
487}
488
489/* felem_mul sets |out| = |in1| * |in2|
490 * On entry:
491 * in1[i] < 2^64
492 * in2[i] < 2^63
493 * On exit:
494 * out[i] < 17 * max(in1[i]) * max(in2[i])
495 */
496static void
497felem_mul(largefelem out, const felem in1, const felem in2)
498{
499 felem in2x2;
500 felem_scalar(in2x2, in2, 2);
501
502 out[0] = ((uint128_t) in1[0]) * in2[0];
503
504 out[1] = ((uint128_t) in1[0]) * in2[1] +
505 ((uint128_t) in1[1]) * in2[0];
506
507 out[2] = ((uint128_t) in1[0]) * in2[2] +
508 ((uint128_t) in1[1]) * in2[1] +
509 ((uint128_t) in1[2]) * in2[0];
510
511 out[3] = ((uint128_t) in1[0]) * in2[3] +
512 ((uint128_t) in1[1]) * in2[2] +
513 ((uint128_t) in1[2]) * in2[1] +
514 ((uint128_t) in1[3]) * in2[0];
515
516 out[4] = ((uint128_t) in1[0]) * in2[4] +
517 ((uint128_t) in1[1]) * in2[3] +
518 ((uint128_t) in1[2]) * in2[2] +
519 ((uint128_t) in1[3]) * in2[1] +
520 ((uint128_t) in1[4]) * in2[0];
521
522 out[5] = ((uint128_t) in1[0]) * in2[5] +
523 ((uint128_t) in1[1]) * in2[4] +
524 ((uint128_t) in1[2]) * in2[3] +
525 ((uint128_t) in1[3]) * in2[2] +
526 ((uint128_t) in1[4]) * in2[1] +
527 ((uint128_t) in1[5]) * in2[0];
528
529 out[6] = ((uint128_t) in1[0]) * in2[6] +
530 ((uint128_t) in1[1]) * in2[5] +
531 ((uint128_t) in1[2]) * in2[4] +
532 ((uint128_t) in1[3]) * in2[3] +
533 ((uint128_t) in1[4]) * in2[2] +
534 ((uint128_t) in1[5]) * in2[1] +
535 ((uint128_t) in1[6]) * in2[0];
536
537 out[7] = ((uint128_t) in1[0]) * in2[7] +
538 ((uint128_t) in1[1]) * in2[6] +
539 ((uint128_t) in1[2]) * in2[5] +
540 ((uint128_t) in1[3]) * in2[4] +
541 ((uint128_t) in1[4]) * in2[3] +
542 ((uint128_t) in1[5]) * in2[2] +
543 ((uint128_t) in1[6]) * in2[1] +
544 ((uint128_t) in1[7]) * in2[0];
545
546 out[8] = ((uint128_t) in1[0]) * in2[8] +
547 ((uint128_t) in1[1]) * in2[7] +
548 ((uint128_t) in1[2]) * in2[6] +
549 ((uint128_t) in1[3]) * in2[5] +
550 ((uint128_t) in1[4]) * in2[4] +
551 ((uint128_t) in1[5]) * in2[3] +
552 ((uint128_t) in1[6]) * in2[2] +
553 ((uint128_t) in1[7]) * in2[1] +
554 ((uint128_t) in1[8]) * in2[0];
555
556 /* See comment in felem_square about the use of in2x2 here */
557
558 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
559 ((uint128_t) in1[2]) * in2x2[7] +
560 ((uint128_t) in1[3]) * in2x2[6] +
561 ((uint128_t) in1[4]) * in2x2[5] +
562 ((uint128_t) in1[5]) * in2x2[4] +
563 ((uint128_t) in1[6]) * in2x2[3] +
564 ((uint128_t) in1[7]) * in2x2[2] +
565 ((uint128_t) in1[8]) * in2x2[1];
566
567 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
568 ((uint128_t) in1[3]) * in2x2[7] +
569 ((uint128_t) in1[4]) * in2x2[6] +
570 ((uint128_t) in1[5]) * in2x2[5] +
571 ((uint128_t) in1[6]) * in2x2[4] +
572 ((uint128_t) in1[7]) * in2x2[3] +
573 ((uint128_t) in1[8]) * in2x2[2];
574
575 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
576 ((uint128_t) in1[4]) * in2x2[7] +
577 ((uint128_t) in1[5]) * in2x2[6] +
578 ((uint128_t) in1[6]) * in2x2[5] +
579 ((uint128_t) in1[7]) * in2x2[4] +
580 ((uint128_t) in1[8]) * in2x2[3];
581
582 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
583 ((uint128_t) in1[5]) * in2x2[7] +
584 ((uint128_t) in1[6]) * in2x2[6] +
585 ((uint128_t) in1[7]) * in2x2[5] +
586 ((uint128_t) in1[8]) * in2x2[4];
587
588 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
589 ((uint128_t) in1[6]) * in2x2[7] +
590 ((uint128_t) in1[7]) * in2x2[6] +
591 ((uint128_t) in1[8]) * in2x2[5];
592
593 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
594 ((uint128_t) in1[7]) * in2x2[7] +
595 ((uint128_t) in1[8]) * in2x2[6];
596
597 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
598 ((uint128_t) in1[8]) * in2x2[7];
599
600 out[7] += ((uint128_t) in1[8]) * in2x2[8];
601}
602
603static const limb bottom52bits = 0xfffffffffffff;
604
605/* felem_reduce converts a largefelem to an felem.
606 * On entry:
607 * in[i] < 2^128
608 * On exit:
609 * out[i] < 2^59 + 2^14
610 */
611static void
612felem_reduce(felem out, const largefelem in)
613{
614 u64 overflow1, overflow2;
615
616 out[0] = ((limb) in[0]) & bottom58bits;
617 out[1] = ((limb) in[1]) & bottom58bits;
618 out[2] = ((limb) in[2]) & bottom58bits;
619 out[3] = ((limb) in[3]) & bottom58bits;
620 out[4] = ((limb) in[4]) & bottom58bits;
621 out[5] = ((limb) in[5]) & bottom58bits;
622 out[6] = ((limb) in[6]) & bottom58bits;
623 out[7] = ((limb) in[7]) & bottom58bits;
624 out[8] = ((limb) in[8]) & bottom58bits;
625
626 /* out[i] < 2^58 */
627
628 out[1] += ((limb) in[0]) >> 58;
629 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
630 /*
631 * out[1] < 2^58 + 2^6 + 2^58 = 2^59 + 2^6
632 */
633 out[2] += ((limb) (in[0] >> 64)) >> 52;
634
635 out[2] += ((limb) in[1]) >> 58;
636 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
637 out[3] += ((limb) (in[1] >> 64)) >> 52;
638
639 out[3] += ((limb) in[2]) >> 58;
640 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
641 out[4] += ((limb) (in[2] >> 64)) >> 52;
642
643 out[4] += ((limb) in[3]) >> 58;
644 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
645 out[5] += ((limb) (in[3] >> 64)) >> 52;
646
647 out[5] += ((limb) in[4]) >> 58;
648 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
649 out[6] += ((limb) (in[4] >> 64)) >> 52;
650
651 out[6] += ((limb) in[5]) >> 58;
652 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
653 out[7] += ((limb) (in[5] >> 64)) >> 52;
654
655 out[7] += ((limb) in[6]) >> 58;
656 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
657 out[8] += ((limb) (in[6] >> 64)) >> 52;
658
659 out[8] += ((limb) in[7]) >> 58;
660 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
661 /*
662 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 < 2^59 + 2^13
663 */
664 overflow1 = ((limb) (in[7] >> 64)) >> 52;
665
666 overflow1 += ((limb) in[8]) >> 58;
667 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
668 overflow2 = ((limb) (in[8] >> 64)) >> 52;
669
670 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
671 overflow2 <<= 1; /* overflow2 < 2^13 */
672
673 out[0] += overflow1; /* out[0] < 2^60 */
674 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
675
676 out[1] += out[0] >> 58;
677 out[0] &= bottom58bits;
678 /*
679 * out[0] < 2^58 out[1] < 2^59 + 2^6 + 2^13 + 2^2 < 2^59 + 2^14
680 */
681}
682
683static void
684felem_square_reduce(felem out, const felem in)
685{
686 largefelem tmp;
687 felem_square(tmp, in);
688 felem_reduce(out, tmp);
689}
690
691static void
692felem_mul_reduce(felem out, const felem in1, const felem in2)
693{
694 largefelem tmp;
695 felem_mul(tmp, in1, in2);
696 felem_reduce(out, tmp);
697}
698
699/* felem_inv calculates |out| = |in|^{-1}
700 *
701 * Based on Fermat's Little Theorem:
702 * a^p = a (mod p)
703 * a^{p-1} = 1 (mod p)
704 * a^{p-2} = a^{-1} (mod p)
705 */
706static void
707felem_inv(felem out, const felem in)
708{
709 felem ftmp, ftmp2, ftmp3, ftmp4;
710 largefelem tmp;
711 unsigned i;
712
713 felem_square(tmp, in);
714 felem_reduce(ftmp, tmp);/* 2^1 */
715 felem_mul(tmp, in, ftmp);
716 felem_reduce(ftmp, tmp);/* 2^2 - 2^0 */
717 felem_assign(ftmp2, ftmp);
718 felem_square(tmp, ftmp);
719 felem_reduce(ftmp, tmp);/* 2^3 - 2^1 */
720 felem_mul(tmp, in, ftmp);
721 felem_reduce(ftmp, tmp);/* 2^3 - 2^0 */
722 felem_square(tmp, ftmp);
723 felem_reduce(ftmp, tmp);/* 2^4 - 2^1 */
724
725 felem_square(tmp, ftmp2);
726 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
727 felem_square(tmp, ftmp3);
728 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
729 felem_mul(tmp, ftmp3, ftmp2);
730 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
731
732 felem_assign(ftmp2, ftmp3);
733 felem_square(tmp, ftmp3);
734 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
735 felem_square(tmp, ftmp3);
736 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
739 felem_square(tmp, ftmp3);
740 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
741 felem_assign(ftmp4, ftmp3);
742 felem_mul(tmp, ftmp3, ftmp);
743 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
744 felem_square(tmp, ftmp4);
745 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
746 felem_mul(tmp, ftmp3, ftmp2);
747 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
748 felem_assign(ftmp2, ftmp3);
749
750 for (i = 0; i < 8; i++) {
751 felem_square(tmp, ftmp3);
752 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
753 }
754 felem_mul(tmp, ftmp3, ftmp2);
755 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
756 felem_assign(ftmp2, ftmp3);
757
758 for (i = 0; i < 16; i++) {
759 felem_square(tmp, ftmp3);
760 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
761 }
762 felem_mul(tmp, ftmp3, ftmp2);
763 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
764 felem_assign(ftmp2, ftmp3);
765
766 for (i = 0; i < 32; i++) {
767 felem_square(tmp, ftmp3);
768 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
769 }
770 felem_mul(tmp, ftmp3, ftmp2);
771 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
772 felem_assign(ftmp2, ftmp3);
773
774 for (i = 0; i < 64; i++) {
775 felem_square(tmp, ftmp3);
776 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
777 }
778 felem_mul(tmp, ftmp3, ftmp2);
779 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
780 felem_assign(ftmp2, ftmp3);
781
782 for (i = 0; i < 128; i++) {
783 felem_square(tmp, ftmp3);
784 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
785 }
786 felem_mul(tmp, ftmp3, ftmp2);
787 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
788 felem_assign(ftmp2, ftmp3);
789
790 for (i = 0; i < 256; i++) {
791 felem_square(tmp, ftmp3);
792 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
793 }
794 felem_mul(tmp, ftmp3, ftmp2);
795 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
796
797 for (i = 0; i < 9; i++) {
798 felem_square(tmp, ftmp3);
799 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
800 }
801 felem_mul(tmp, ftmp3, ftmp4);
802 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
803 felem_mul(tmp, ftmp3, in);
804 felem_reduce(out, tmp); /* 2^512 - 3 */
805}
806
807/* This is 2^521-1, expressed as an felem */
808static const felem kPrime =
809{
810 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
811 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
812 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
813};
814
815/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
816 * otherwise.
817 * On entry:
818 * in[i] < 2^59 + 2^14
819 */
820static limb
821felem_is_zero(const felem in)
822{
823 felem ftmp;
824 limb is_zero, is_p;
825 felem_assign(ftmp, in);
826
827 ftmp[0] += ftmp[8] >> 57;
828 ftmp[8] &= bottom57bits;
829 /* ftmp[8] < 2^57 */
830 ftmp[1] += ftmp[0] >> 58;
831 ftmp[0] &= bottom58bits;
832 ftmp[2] += ftmp[1] >> 58;
833 ftmp[1] &= bottom58bits;
834 ftmp[3] += ftmp[2] >> 58;
835 ftmp[2] &= bottom58bits;
836 ftmp[4] += ftmp[3] >> 58;
837 ftmp[3] &= bottom58bits;
838 ftmp[5] += ftmp[4] >> 58;
839 ftmp[4] &= bottom58bits;
840 ftmp[6] += ftmp[5] >> 58;
841 ftmp[5] &= bottom58bits;
842 ftmp[7] += ftmp[6] >> 58;
843 ftmp[6] &= bottom58bits;
844 ftmp[8] += ftmp[7] >> 58;
845 ftmp[7] &= bottom58bits;
846 /* ftmp[8] < 2^57 + 4 */
847
848 /*
849 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
850 * greater than our bound for ftmp[8]. Therefore we only have to
851 * check if the zero is zero or 2^521-1.
852 */
853
854 is_zero = 0;
855 is_zero |= ftmp[0];
856 is_zero |= ftmp[1];
857 is_zero |= ftmp[2];
858 is_zero |= ftmp[3];
859 is_zero |= ftmp[4];
860 is_zero |= ftmp[5];
861 is_zero |= ftmp[6];
862 is_zero |= ftmp[7];
863 is_zero |= ftmp[8];
864
865 is_zero--;
866 /*
867 * We know that ftmp[i] < 2^63, therefore the only way that the top
868 * bit can be set is if is_zero was 0 before the decrement.
869 */
870 is_zero = ((s64) is_zero) >> 63;
871
872 is_p = ftmp[0] ^ kPrime[0];
873 is_p |= ftmp[1] ^ kPrime[1];
874 is_p |= ftmp[2] ^ kPrime[2];
875 is_p |= ftmp[3] ^ kPrime[3];
876 is_p |= ftmp[4] ^ kPrime[4];
877 is_p |= ftmp[5] ^ kPrime[5];
878 is_p |= ftmp[6] ^ kPrime[6];
879 is_p |= ftmp[7] ^ kPrime[7];
880 is_p |= ftmp[8] ^ kPrime[8];
881
882 is_p--;
883 is_p = ((s64) is_p) >> 63;
884
885 is_zero |= is_p;
886 return is_zero;
887}
888
889static int
890felem_is_zero_int(const felem in)
891{
892 return (int) (felem_is_zero(in) & ((limb) 1));
893}
894
895/* felem_contract converts |in| to its unique, minimal representation.
896 * On entry:
897 * in[i] < 2^59 + 2^14
898 */
899static void
900felem_contract(felem out, const felem in)
901{
902 limb is_p, is_greater, sign;
903 static const limb two58 = ((limb) 1) << 58;
904
905 felem_assign(out, in);
906
907 out[0] += out[8] >> 57;
908 out[8] &= bottom57bits;
909 /* out[8] < 2^57 */
910 out[1] += out[0] >> 58;
911 out[0] &= bottom58bits;
912 out[2] += out[1] >> 58;
913 out[1] &= bottom58bits;
914 out[3] += out[2] >> 58;
915 out[2] &= bottom58bits;
916 out[4] += out[3] >> 58;
917 out[3] &= bottom58bits;
918 out[5] += out[4] >> 58;
919 out[4] &= bottom58bits;
920 out[6] += out[5] >> 58;
921 out[5] &= bottom58bits;
922 out[7] += out[6] >> 58;
923 out[6] &= bottom58bits;
924 out[8] += out[7] >> 58;
925 out[7] &= bottom58bits;
926 /* out[8] < 2^57 + 4 */
927
928 /*
929 * If the value is greater than 2^521-1 then we have to subtract
930 * 2^521-1 out. See the comments in felem_is_zero regarding why we
931 * don't test for other multiples of the prime.
932 */
933
934 /*
935 * First, if |out| is equal to 2^521-1, we subtract it out to get
936 * zero.
937 */
938
939 is_p = out[0] ^ kPrime[0];
940 is_p |= out[1] ^ kPrime[1];
941 is_p |= out[2] ^ kPrime[2];
942 is_p |= out[3] ^ kPrime[3];
943 is_p |= out[4] ^ kPrime[4];
944 is_p |= out[5] ^ kPrime[5];
945 is_p |= out[6] ^ kPrime[6];
946 is_p |= out[7] ^ kPrime[7];
947 is_p |= out[8] ^ kPrime[8];
948
949 is_p--;
950 is_p &= is_p << 32;
951 is_p &= is_p << 16;
952 is_p &= is_p << 8;
953 is_p &= is_p << 4;
954 is_p &= is_p << 2;
955 is_p &= is_p << 1;
956 is_p = ((s64) is_p) >> 63;
957 is_p = ~is_p;
958
959 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
960
961 out[0] &= is_p;
962 out[1] &= is_p;
963 out[2] &= is_p;
964 out[3] &= is_p;
965 out[4] &= is_p;
966 out[5] &= is_p;
967 out[6] &= is_p;
968 out[7] &= is_p;
969 out[8] &= is_p;
970
971 /*
972 * In order to test that |out| >= 2^521-1 we need only test if out[8]
973 * >> 57 is greater than zero as (2^521-1) + x >= 2^522
974 */
975 is_greater = out[8] >> 57;
976 is_greater |= is_greater << 32;
977 is_greater |= is_greater << 16;
978 is_greater |= is_greater << 8;
979 is_greater |= is_greater << 4;
980 is_greater |= is_greater << 2;
981 is_greater |= is_greater << 1;
982 is_greater = ((s64) is_greater) >> 63;
983
984 out[0] -= kPrime[0] & is_greater;
985 out[1] -= kPrime[1] & is_greater;
986 out[2] -= kPrime[2] & is_greater;
987 out[3] -= kPrime[3] & is_greater;
988 out[4] -= kPrime[4] & is_greater;
989 out[5] -= kPrime[5] & is_greater;
990 out[6] -= kPrime[6] & is_greater;
991 out[7] -= kPrime[7] & is_greater;
992 out[8] -= kPrime[8] & is_greater;
993
994 /* Eliminate negative coefficients */
995 sign = -(out[0] >> 63);
996 out[0] += (two58 & sign);
997 out[1] -= (1 & sign);
998 sign = -(out[1] >> 63);
999 out[1] += (two58 & sign);
1000 out[2] -= (1 & sign);
1001 sign = -(out[2] >> 63);
1002 out[2] += (two58 & sign);
1003 out[3] -= (1 & sign);
1004 sign = -(out[3] >> 63);
1005 out[3] += (two58 & sign);
1006 out[4] -= (1 & sign);
1007 sign = -(out[4] >> 63);
1008 out[4] += (two58 & sign);
1009 out[5] -= (1 & sign);
1010 sign = -(out[0] >> 63);
1011 out[5] += (two58 & sign);
1012 out[6] -= (1 & sign);
1013 sign = -(out[6] >> 63);
1014 out[6] += (two58 & sign);
1015 out[7] -= (1 & sign);
1016 sign = -(out[7] >> 63);
1017 out[7] += (two58 & sign);
1018 out[8] -= (1 & sign);
1019 sign = -(out[5] >> 63);
1020 out[5] += (two58 & sign);
1021 out[6] -= (1 & sign);
1022 sign = -(out[6] >> 63);
1023 out[6] += (two58 & sign);
1024 out[7] -= (1 & sign);
1025 sign = -(out[7] >> 63);
1026 out[7] += (two58 & sign);
1027 out[8] -= (1 & sign);
1028}
1029
1030/* Group operations
1031 * ----------------
1032 *
1033 * Building on top of the field operations we have the operations on the
1034 * elliptic curve group itself. Points on the curve are represented in Jacobian
1035 * coordinates */
1036
1037/* point_double calcuates 2*(x_in, y_in, z_in)
1038 *
1039 * The method is taken from:
1040 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1041 *
1042 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1043 * while x_out == y_in is not (maybe this works, but it's not tested). */
1044static void
1045point_double(felem x_out, felem y_out, felem z_out,
1046 const felem x_in, const felem y_in, const felem z_in)
1047{
1048 largefelem tmp, tmp2;
1049 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1050
1051 felem_assign(ftmp, x_in);
1052 felem_assign(ftmp2, x_in);
1053
1054 /* delta = z^2 */
1055 felem_square(tmp, z_in);
1056 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
1057
1058 /* gamma = y^2 */
1059 felem_square(tmp, y_in);
1060 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
1061
1062 /* beta = x*gamma */
1063 felem_mul(tmp, x_in, gamma);
1064 felem_reduce(beta, tmp);/* beta[i] < 2^59 + 2^14 */
1065
1066 /* alpha = 3*(x-delta)*(x+delta) */
1067 felem_diff64(ftmp, delta);
1068 /* ftmp[i] < 2^61 */
1069 felem_sum64(ftmp2, delta);
1070 /* ftmp2[i] < 2^60 + 2^15 */
1071 felem_scalar64(ftmp2, 3);
1072 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1073 felem_mul(tmp, ftmp, ftmp2);
1074 /*
1075 * tmp[i] < 17(3*2^121 + 3*2^76) = 61*2^121 + 61*2^76 < 64*2^121 +
1076 * 64*2^76 = 2^127 + 2^82 < 2^128
1077 */
1078 felem_reduce(alpha, tmp);
1079
1080 /* x' = alpha^2 - 8*beta */
1081 felem_square(tmp, alpha);
1082 /*
1083 * tmp[i] < 17*2^120 < 2^125
1084 */
1085 felem_assign(ftmp, beta);
1086 felem_scalar64(ftmp, 8);
1087 /* ftmp[i] < 2^62 + 2^17 */
1088 felem_diff_128_64(tmp, ftmp);
1089 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1090 felem_reduce(x_out, tmp);
1091
1092 /* z' = (y + z)^2 - gamma - delta */
1093 felem_sum64(delta, gamma);
1094 /* delta[i] < 2^60 + 2^15 */
1095 felem_assign(ftmp, y_in);
1096 felem_sum64(ftmp, z_in);
1097 /* ftmp[i] < 2^60 + 2^15 */
1098 felem_square(tmp, ftmp);
1099 /*
1100 * tmp[i] < 17(2^122) < 2^127
1101 */
1102 felem_diff_128_64(tmp, delta);
1103 /* tmp[i] < 2^127 + 2^63 */
1104 felem_reduce(z_out, tmp);
1105
1106 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1107 felem_scalar64(beta, 4);
1108 /* beta[i] < 2^61 + 2^16 */
1109 felem_diff64(beta, x_out);
1110 /* beta[i] < 2^61 + 2^60 + 2^16 */
1111 felem_mul(tmp, alpha, beta);
1112 /*
1113 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) = 17*(2^120 + 2^75
1114 * + 2^119 + 2^74 + 2^75 + 2^30) = 17*(2^120 + 2^119 + 2^76 + 2^74 +
1115 * 2^30) < 2^128
1116 */
1117 felem_square(tmp2, gamma);
1118 /*
1119 * tmp2[i] < 17*(2^59 + 2^14)^2 = 17*(2^118 + 2^74 + 2^28)
1120 */
1121 felem_scalar128(tmp2, 8);
1122 /*
1123 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) = 2^125 + 2^121 + 2^81 + 2^77
1124 * + 2^35 + 2^31 < 2^126
1125 */
1126 felem_diff128(tmp, tmp2);
1127 /*
1128 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) =
1129 * 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 2^74
1130 * + 2^69 + 2^34 + 2^30 < 2^128
1131 */
1132 felem_reduce(y_out, tmp);
1133}
1134
1135/* copy_conditional copies in to out iff mask is all ones. */
1136static void
1137copy_conditional(felem out, const felem in, limb mask)
1138{
1139 unsigned i;
1140 for (i = 0; i < NLIMBS; ++i) {
1141 const limb tmp = mask & (in[i] ^ out[i]);
1142 out[i] ^= tmp;
1143 }
1144}
1145
1146/* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1147 *
1148 * The method is taken from
1149 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1150 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1151 *
1152 * This function includes a branch for checking whether the two input points
1153 * are equal (while not equal to the point at infinity). This case never
1154 * happens during single point multiplication, so there is no timing leak for
1155 * ECDH or ECDSA signing. */
1156static void
1157point_add(felem x3, felem y3, felem z3,
1158 const felem x1, const felem y1, const felem z1,
1159 const int mixed, const felem x2, const felem y2, const felem z2)
1160{
1161 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1162 largefelem tmp, tmp2;
1163 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1164
1165 z1_is_zero = felem_is_zero(z1);
1166 z2_is_zero = felem_is_zero(z2);
1167
1168 /* ftmp = z1z1 = z1**2 */
1169 felem_square(tmp, z1);
1170 felem_reduce(ftmp, tmp);
1171
1172 if (!mixed) {
1173 /* ftmp2 = z2z2 = z2**2 */
1174 felem_square(tmp, z2);
1175 felem_reduce(ftmp2, tmp);
1176
1177 /* u1 = ftmp3 = x1*z2z2 */
1178 felem_mul(tmp, x1, ftmp2);
1179 felem_reduce(ftmp3, tmp);
1180
1181 /* ftmp5 = z1 + z2 */
1182 felem_assign(ftmp5, z1);
1183 felem_sum64(ftmp5, z2);
1184 /* ftmp5[i] < 2^61 */
1185
1186 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1187 felem_square(tmp, ftmp5);
1188 /* tmp[i] < 17*2^122 */
1189 felem_diff_128_64(tmp, ftmp);
1190 /* tmp[i] < 17*2^122 + 2^63 */
1191 felem_diff_128_64(tmp, ftmp2);
1192 /* tmp[i] < 17*2^122 + 2^64 */
1193 felem_reduce(ftmp5, tmp);
1194
1195 /* ftmp2 = z2 * z2z2 */
1196 felem_mul(tmp, ftmp2, z2);
1197 felem_reduce(ftmp2, tmp);
1198
1199 /* s1 = ftmp6 = y1 * z2**3 */
1200 felem_mul(tmp, y1, ftmp2);
1201 felem_reduce(ftmp6, tmp);
1202 } else {
1203 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1204
1205 /* u1 = ftmp3 = x1*z2z2 */
1206 felem_assign(ftmp3, x1);
1207
1208 /* ftmp5 = 2*z1z2 */
1209 felem_scalar(ftmp5, z1, 2);
1210
1211 /* s1 = ftmp6 = y1 * z2**3 */
1212 felem_assign(ftmp6, y1);
1213 }
1214
1215 /* u2 = x2*z1z1 */
1216 felem_mul(tmp, x2, ftmp);
1217 /* tmp[i] < 17*2^120 */
1218
1219 /* h = ftmp4 = u2 - u1 */
1220 felem_diff_128_64(tmp, ftmp3);
1221 /* tmp[i] < 17*2^120 + 2^63 */
1222 felem_reduce(ftmp4, tmp);
1223
1224 x_equal = felem_is_zero(ftmp4);
1225
1226 /* z_out = ftmp5 * h */
1227 felem_mul(tmp, ftmp5, ftmp4);
1228 felem_reduce(z_out, tmp);
1229
1230 /* ftmp = z1 * z1z1 */
1231 felem_mul(tmp, ftmp, z1);
1232 felem_reduce(ftmp, tmp);
1233
1234 /* s2 = tmp = y2 * z1**3 */
1235 felem_mul(tmp, y2, ftmp);
1236 /* tmp[i] < 17*2^120 */
1237
1238 /* r = ftmp5 = (s2 - s1)*2 */
1239 felem_diff_128_64(tmp, ftmp6);
1240 /* tmp[i] < 17*2^120 + 2^63 */
1241 felem_reduce(ftmp5, tmp);
1242 y_equal = felem_is_zero(ftmp5);
1243 felem_scalar64(ftmp5, 2);
1244 /* ftmp5[i] < 2^61 */
1245
1246 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1247 point_double(x3, y3, z3, x1, y1, z1);
1248 return;
1249 }
1250 /* I = ftmp = (2h)**2 */
1251 felem_assign(ftmp, ftmp4);
1252 felem_scalar64(ftmp, 2);
1253 /* ftmp[i] < 2^61 */
1254 felem_square(tmp, ftmp);
1255 /* tmp[i] < 17*2^122 */
1256 felem_reduce(ftmp, tmp);
1257
1258 /* J = ftmp2 = h * I */
1259 felem_mul(tmp, ftmp4, ftmp);
1260 felem_reduce(ftmp2, tmp);
1261
1262 /* V = ftmp4 = U1 * I */
1263 felem_mul(tmp, ftmp3, ftmp);
1264 felem_reduce(ftmp4, tmp);
1265
1266 /* x_out = r**2 - J - 2V */
1267 felem_square(tmp, ftmp5);
1268 /* tmp[i] < 17*2^122 */
1269 felem_diff_128_64(tmp, ftmp2);
1270 /* tmp[i] < 17*2^122 + 2^63 */
1271 felem_assign(ftmp3, ftmp4);
1272 felem_scalar64(ftmp4, 2);
1273 /* ftmp4[i] < 2^61 */
1274 felem_diff_128_64(tmp, ftmp4);
1275 /* tmp[i] < 17*2^122 + 2^64 */
1276 felem_reduce(x_out, tmp);
1277
1278 /* y_out = r(V-x_out) - 2 * s1 * J */
1279 felem_diff64(ftmp3, x_out);
1280 /*
1281 * ftmp3[i] < 2^60 + 2^60 = 2^61
1282 */
1283 felem_mul(tmp, ftmp5, ftmp3);
1284 /* tmp[i] < 17*2^122 */
1285 felem_mul(tmp2, ftmp6, ftmp2);
1286 /* tmp2[i] < 17*2^120 */
1287 felem_scalar128(tmp2, 2);
1288 /* tmp2[i] < 17*2^121 */
1289 felem_diff128(tmp, tmp2);
1290 /*
1291 * tmp[i] < 2^127 - 2^69 + 17*2^122 = 2^126 - 2^122 - 2^6 - 2^2 - 1 <
1292 * 2^127
1293 */
1294 felem_reduce(y_out, tmp);
1295
1296 copy_conditional(x_out, x2, z1_is_zero);
1297 copy_conditional(x_out, x1, z2_is_zero);
1298 copy_conditional(y_out, y2, z1_is_zero);
1299 copy_conditional(y_out, y1, z2_is_zero);
1300 copy_conditional(z_out, z2, z1_is_zero);
1301 copy_conditional(z_out, z1, z2_is_zero);
1302 felem_assign(x3, x_out);
1303 felem_assign(y3, y_out);
1304 felem_assign(z3, z_out);
1305}
1306
1307/* Base point pre computation
1308 * --------------------------
1309 *
1310 * Two different sorts of precomputed tables are used in the following code.
1311 * Each contain various points on the curve, where each point is three field
1312 * elements (x, y, z).
1313 *
1314 * For the base point table, z is usually 1 (0 for the point at infinity).
1315 * This table has 16 elements:
1316 * index | bits | point
1317 * ------+---------+------------------------------
1318 * 0 | 0 0 0 0 | 0G
1319 * 1 | 0 0 0 1 | 1G
1320 * 2 | 0 0 1 0 | 2^130G
1321 * 3 | 0 0 1 1 | (2^130 + 1)G
1322 * 4 | 0 1 0 0 | 2^260G
1323 * 5 | 0 1 0 1 | (2^260 + 1)G
1324 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1325 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1326 * 8 | 1 0 0 0 | 2^390G
1327 * 9 | 1 0 0 1 | (2^390 + 1)G
1328 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1329 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1330 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1331 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1332 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1333 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1334 *
1335 * The reason for this is so that we can clock bits into four different
1336 * locations when doing simple scalar multiplies against the base point.
1337 *
1338 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1339
1340/* gmul is the table of precomputed base points */
1341static const felem gmul[16][3] =
1342{{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1343{0, 0, 0, 0, 0, 0, 0, 0, 0},
1344{0, 0, 0, 0, 0, 0, 0, 0, 0}},
1345{{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1346 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
13470x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1348{0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1349 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
13500x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1351{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1352{{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1353 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
13540x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1355{0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1356 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
13570x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1358{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1359{{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1360 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
13610x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1362{0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1363 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
13640x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1365{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1366{{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1367 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
13680x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1369{0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1370 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
13710x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1372{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1373{{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1374 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
13750x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1376{0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1377 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
13780x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1379{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1380{{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1381 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
13820x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1383{0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1384 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
13850x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1386{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1387{{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1388 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
13890x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1390{0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1391 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
13920x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1393{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1394{{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1395 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
13960x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1397{0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1398 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
13990x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1400{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1401{{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1402 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
14030x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1404{0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1405 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
14060x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1407{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1408{{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1409 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
14100x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1411{0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1412 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
14130x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1414{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1415{{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1416 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
14170x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1418{0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1419 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
14200x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1421{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1422{{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1423 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
14240x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1425{0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1426 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
14270x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1428{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1429{{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1430 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
14310x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1432{0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1433 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
14340x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1435{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1436{{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1437 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
14380x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1439{0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1440 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
14410x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1442{1, 0, 0, 0, 0, 0, 0, 0, 0}},
1443{{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1444 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
14450x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1446{0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1447 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
14480x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1449{1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1450
1451/* select_point selects the |idx|th point from a precomputation table and
1452 * copies it to out. */
1453static void
1454select_point(const limb idx, unsigned int size, const felem pre_comp[ /* size */ ][3],
1455 felem out[3])
1456{
1457 unsigned i, j;
1458 limb *outlimbs = &out[0][0];
1459 memset(outlimbs, 0, 3 * sizeof(felem));
1460
1461 for (i = 0; i < size; i++) {
1462 const limb *inlimbs = &pre_comp[i][0][0];
1463 limb mask = i ^ idx;
1464 mask |= mask >> 4;
1465 mask |= mask >> 2;
1466 mask |= mask >> 1;
1467 mask &= 1;
1468 mask--;
1469 for (j = 0; j < NLIMBS * 3; j++)
1470 outlimbs[j] |= inlimbs[j] & mask;
1471 }
1472}
1473
1474/* get_bit returns the |i|th bit in |in| */
1475static char
1476get_bit(const felem_bytearray in, int i)
1477{
1478 if (i < 0)
1479 return 0;
1480 return (in[i >> 3] >> (i & 7)) & 1;
1481}
1482
1483/* Interleaved point multiplication using precomputed point multiples:
1484 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1485 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1486 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1487 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1488static void
1489batch_mul(felem x_out, felem y_out, felem z_out,
1490 const felem_bytearray scalars[], const unsigned num_points, const u8 * g_scalar,
1491 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1492{
1493 int i, skip;
1494 unsigned num, gen_mul = (g_scalar != NULL);
1495 felem nq[3], tmp[4];
1496 limb bits;
1497 u8 sign, digit;
1498
1499 /* set nq to the point at infinity */
1500 memset(nq, 0, 3 * sizeof(felem));
1501
1502 /*
1503 * Loop over all scalars msb-to-lsb, interleaving additions of
1504 * multiples of the generator (last quarter of rounds) and additions
1505 * of other points multiples (every 5th round).
1506 */
1507 skip = 1; /* save two point operations in the first
1508 * round */
1509 for (i = (num_points ? 520 : 130); i >= 0; --i) {
1510 /* double */
1511 if (!skip)
1512 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1513
1514 /* add multiples of the generator */
1515 if (gen_mul && (i <= 130)) {
1516 bits = get_bit(g_scalar, i + 390) << 3;
1517 if (i < 130) {
1518 bits |= get_bit(g_scalar, i + 260) << 2;
1519 bits |= get_bit(g_scalar, i + 130) << 1;
1520 bits |= get_bit(g_scalar, i);
1521 }
1522 /* select the point to add, in constant time */
1523 select_point(bits, 16, g_pre_comp, tmp);
1524 if (!skip) {
1525 point_add(nq[0], nq[1], nq[2],
1526 nq[0], nq[1], nq[2],
1527 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1528 } else {
1529 memcpy(nq, tmp, 3 * sizeof(felem));
1530 skip = 0;
1531 }
1532 }
1533 /* do other additions every 5 doublings */
1534 if (num_points && (i % 5 == 0)) {
1535 /* loop over all scalars */
1536 for (num = 0; num < num_points; ++num) {
1537 bits = get_bit(scalars[num], i + 4) << 5;
1538 bits |= get_bit(scalars[num], i + 3) << 4;
1539 bits |= get_bit(scalars[num], i + 2) << 3;
1540 bits |= get_bit(scalars[num], i + 1) << 2;
1541 bits |= get_bit(scalars[num], i) << 1;
1542 bits |= get_bit(scalars[num], i - 1);
1543 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1544
1545 /*
1546 * select the point to add or subtract, in
1547 * constant time
1548 */
1549 select_point(digit, 17, pre_comp[num], tmp);
1550 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the
1551 * negative point */
1552 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1553
1554 if (!skip) {
1555 point_add(nq[0], nq[1], nq[2],
1556 nq[0], nq[1], nq[2],
1557 mixed, tmp[0], tmp[1], tmp[2]);
1558 } else {
1559 memcpy(nq, tmp, 3 * sizeof(felem));
1560 skip = 0;
1561 }
1562 }
1563 }
1564 }
1565 felem_assign(x_out, nq[0]);
1566 felem_assign(y_out, nq[1]);
1567 felem_assign(z_out, nq[2]);
1568}
1569
1570
1571/* Precomputation for the group generator. */
1572typedef struct {
1573 felem g_pre_comp[16][3];
1574 int references;
1575} NISTP521_PRE_COMP;
1576
1577const EC_METHOD *
1578EC_GFp_nistp521_method(void)
1579{
1580 static const EC_METHOD ret = {
1581 .flags = EC_FLAGS_DEFAULT_OCT,
1582 .field_type = NID_X9_62_prime_field,
1583 .group_init = ec_GFp_nistp521_group_init,
1584 .group_finish = ec_GFp_simple_group_finish,
1585 .group_clear_finish = ec_GFp_simple_group_clear_finish,
1586 .group_copy = ec_GFp_nist_group_copy,
1587 .group_set_curve = ec_GFp_nistp521_group_set_curve,
1588 .group_get_curve = ec_GFp_simple_group_get_curve,
1589 .group_get_degree = ec_GFp_simple_group_get_degree,
1590 .group_check_discriminant =
1591 ec_GFp_simple_group_check_discriminant,
1592 .point_init = ec_GFp_simple_point_init,
1593 .point_finish = ec_GFp_simple_point_finish,
1594 .point_clear_finish = ec_GFp_simple_point_clear_finish,
1595 .point_copy = ec_GFp_simple_point_copy,
1596 .point_set_to_infinity = ec_GFp_simple_point_set_to_infinity,
1597 .point_set_Jprojective_coordinates_GFp =
1598 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1599 .point_get_Jprojective_coordinates_GFp =
1600 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1601 .point_set_affine_coordinates =
1602 ec_GFp_simple_point_set_affine_coordinates,
1603 .point_get_affine_coordinates =
1604 ec_GFp_nistp521_point_get_affine_coordinates,
1605 .add = ec_GFp_simple_add,
1606 .dbl = ec_GFp_simple_dbl,
1607 .invert = ec_GFp_simple_invert,
1608 .is_at_infinity = ec_GFp_simple_is_at_infinity,
1609 .is_on_curve = ec_GFp_simple_is_on_curve,
1610 .point_cmp = ec_GFp_simple_cmp,
1611 .make_affine = ec_GFp_simple_make_affine,
1612 .points_make_affine = ec_GFp_simple_points_make_affine,
1613 .mul = ec_GFp_nistp521_points_mul,
1614 .precompute_mult = ec_GFp_nistp521_precompute_mult,
1615 .have_precompute_mult = ec_GFp_nistp521_have_precompute_mult,
1616 .field_mul = ec_GFp_nist_field_mul,
1617 .field_sqr = ec_GFp_nist_field_sqr
1618 };
1619
1620 return &ret;
1621}
1622
1623
1624/******************************************************************************/
1625/* FUNCTIONS TO MANAGE PRECOMPUTATION
1626 */
1627
1628static NISTP521_PRE_COMP *
1629nistp521_pre_comp_new()
1630{
1631 NISTP521_PRE_COMP *ret = NULL;
1632 ret = malloc(sizeof(NISTP521_PRE_COMP));
1633 if (!ret) {
1634 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1635 return ret;
1636 }
1637 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1638 ret->references = 1;
1639 return ret;
1640}
1641
1642static void *
1643nistp521_pre_comp_dup(void *src_)
1644{
1645 NISTP521_PRE_COMP *src = src_;
1646
1647 /* no need to actually copy, these objects never change! */
1648 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1649
1650 return src_;
1651}
1652
1653static void
1654nistp521_pre_comp_free(void *pre_)
1655{
1656 int i;
1657 NISTP521_PRE_COMP *pre = pre_;
1658
1659 if (!pre)
1660 return;
1661
1662 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1663 if (i > 0)
1664 return;
1665
1666 free(pre);
1667}
1668
1669static void
1670nistp521_pre_comp_clear_free(void *pre_)
1671{
1672 int i;
1673 NISTP521_PRE_COMP *pre = pre_;
1674
1675 if (!pre)
1676 return;
1677
1678 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1679 if (i > 0)
1680 return;
1681
1682 OPENSSL_cleanse(pre, sizeof(*pre));
1683 free(pre);
1684}
1685
1686/******************************************************************************/
1687/* OPENSSL EC_METHOD FUNCTIONS
1688 */
1689
1690int
1691ec_GFp_nistp521_group_init(EC_GROUP * group)
1692{
1693 int ret;
1694 ret = ec_GFp_simple_group_init(group);
1695 group->a_is_minus3 = 1;
1696 return ret;
1697}
1698
1699int
1700ec_GFp_nistp521_group_set_curve(EC_GROUP * group, const BIGNUM * p,
1701 const BIGNUM * a, const BIGNUM * b, BN_CTX * ctx)
1702{
1703 int ret = 0;
1704 BN_CTX *new_ctx = NULL;
1705 BIGNUM *curve_p, *curve_a, *curve_b;
1706
1707 if (ctx == NULL)
1708 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1709 return 0;
1710 BN_CTX_start(ctx);
1711 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1712 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1713 ((curve_b = BN_CTX_get(ctx)) == NULL))
1714 goto err;
1715 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1716 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1717 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1718 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1719 (BN_cmp(curve_b, b))) {
1720 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1721 EC_R_WRONG_CURVE_PARAMETERS);
1722 goto err;
1723 }
1724 group->field_mod_func = BN_nist_mod_521;
1725 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1726err:
1727 BN_CTX_end(ctx);
1728 BN_CTX_free(new_ctx);
1729 return ret;
1730}
1731
1732/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1733 * (X', Y') = (X/Z^2, Y/Z^3) */
1734int
1735ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP * group,
1736 const EC_POINT * point, BIGNUM * x, BIGNUM * y, BN_CTX * ctx)
1737{
1738 felem z1, z2, x_in, y_in, x_out, y_out;
1739 largefelem tmp;
1740
1741 if (EC_POINT_is_at_infinity(group, point) > 0) {
1742 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1743 EC_R_POINT_AT_INFINITY);
1744 return 0;
1745 }
1746 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1747 (!BN_to_felem(z1, &point->Z)))
1748 return 0;
1749 felem_inv(z2, z1);
1750 felem_square(tmp, z2);
1751 felem_reduce(z1, tmp);
1752 felem_mul(tmp, x_in, z1);
1753 felem_reduce(x_in, tmp);
1754 felem_contract(x_out, x_in);
1755 if (x != NULL) {
1756 if (!felem_to_BN(x, x_out)) {
1757 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1758 return 0;
1759 }
1760 }
1761 felem_mul(tmp, z1, z2);
1762 felem_reduce(z1, tmp);
1763 felem_mul(tmp, y_in, z1);
1764 felem_reduce(y_in, tmp);
1765 felem_contract(y_out, y_in);
1766 if (y != NULL) {
1767 if (!felem_to_BN(y, y_out)) {
1768 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1769 return 0;
1770 }
1771 }
1772 return 1;
1773}
1774
1775static void
1776make_points_affine(size_t num, felem points[ /* num */ ][3], felem tmp_felems[ /* num+1 */ ])
1777{
1778 /*
1779 * Runs in constant time, unless an input is the point at infinity
1780 * (which normally shouldn't happen).
1781 */
1782 ec_GFp_nistp_points_make_affine_internal(
1783 num,
1784 points,
1785 sizeof(felem),
1786 tmp_felems,
1787 (void (*) (void *)) felem_one,
1788 (int (*) (const void *)) felem_is_zero_int,
1789 (void (*) (void *, const void *)) felem_assign,
1790 (void (*) (void *, const void *)) felem_square_reduce,
1791 (void (*) (void *, const void *, const void *)) felem_mul_reduce,
1792 (void (*) (void *, const void *)) felem_inv,
1793 (void (*) (void *, const void *)) felem_contract);
1794}
1795
1796/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1797 * Result is stored in r (r can equal one of the inputs). */
1798int
1799ec_GFp_nistp521_points_mul(const EC_GROUP * group, EC_POINT * r,
1800 const BIGNUM * scalar, size_t num, const EC_POINT * points[],
1801 const BIGNUM * scalars[], BN_CTX * ctx)
1802{
1803 int ret = 0;
1804 int j;
1805 int mixed = 0;
1806 BN_CTX *new_ctx = NULL;
1807 BIGNUM *x, *y, *z, *tmp_scalar;
1808 felem_bytearray g_secret;
1809 felem_bytearray *secrets = NULL;
1810 felem(*pre_comp)[17][3] = NULL;
1811 felem *tmp_felems = NULL;
1812 felem_bytearray tmp;
1813 unsigned i, num_bytes;
1814 int have_pre_comp = 0;
1815 size_t num_points = num;
1816 felem x_in, y_in, z_in, x_out, y_out, z_out;
1817 NISTP521_PRE_COMP *pre = NULL;
1818 felem(*g_pre_comp)[3] = NULL;
1819 EC_POINT *generator = NULL;
1820 const EC_POINT *p = NULL;
1821 const BIGNUM *p_scalar = NULL;
1822
1823 if (ctx == NULL)
1824 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1825 return 0;
1826 BN_CTX_start(ctx);
1827 if (((x = BN_CTX_get(ctx)) == NULL) ||
1828 ((y = BN_CTX_get(ctx)) == NULL) ||
1829 ((z = BN_CTX_get(ctx)) == NULL) ||
1830 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1831 goto err;
1832
1833 if (scalar != NULL) {
1834 pre = EC_EX_DATA_get_data(group->extra_data,
1835 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1836 nistp521_pre_comp_clear_free);
1837 if (pre)
1838 /* we have precomputation, try to use it */
1839 g_pre_comp = &pre->g_pre_comp[0];
1840 else
1841 /* try to use the standard precomputation */
1842 g_pre_comp = (felem(*)[3]) gmul;
1843 generator = EC_POINT_new(group);
1844 if (generator == NULL)
1845 goto err;
1846 /* get the generator from precomputation */
1847 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1848 !felem_to_BN(y, g_pre_comp[1][1]) ||
1849 !felem_to_BN(z, g_pre_comp[1][2])) {
1850 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1851 goto err;
1852 }
1853 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1854 generator, x, y, z, ctx))
1855 goto err;
1856 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1857 /* precomputation matches generator */
1858 have_pre_comp = 1;
1859 else
1860 /*
1861 * we don't have valid precomputation: treat the
1862 * generator as a random point
1863 */
1864 num_points++;
1865 }
1866 if (num_points > 0) {
1867 if (num_points >= 2) {
1868 /*
1869 * unless we precompute multiples for just one point,
1870 * converting those into affine form is time well
1871 * spent
1872 */
1873 mixed = 1;
1874 }
1875 secrets = calloc(num_points, sizeof(felem_bytearray));
1876 pre_comp = calloc(num_points, 17 * 3 * sizeof(felem));
1877 if (mixed) {
1878 /* XXX should do more int overflow checking */
1879 tmp_felems = reallocarray(NULL,
1880 (num_points * 17 + 1), sizeof(felem));
1881 }
1882 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL))) {
1883 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1884 goto err;
1885 }
1886 /*
1887 * we treat NULL scalars as 0, and NULL points as points at
1888 * infinity, i.e., they contribute nothing to the linear
1889 * combination
1890 */
1891 for (i = 0; i < num_points; ++i) {
1892 if (i == num)
1893 /*
1894 * we didn't have a valid precomputation, so
1895 * we pick the generator
1896 */
1897 {
1898 p = EC_GROUP_get0_generator(group);
1899 p_scalar = scalar;
1900 } else
1901 /* the i^th point */
1902 {
1903 p = points[i];
1904 p_scalar = scalars[i];
1905 }
1906 if ((p_scalar != NULL) && (p != NULL)) {
1907 /* reduce scalar to 0 <= scalar < 2^521 */
1908 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar))) {
1909 /*
1910 * this is an unusual input, and we
1911 * don't guarantee constant-timeness
1912 */
1913 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1914 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1915 goto err;
1916 }
1917 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1918 } else
1919 num_bytes = BN_bn2bin(p_scalar, tmp);
1920 flip_endian(secrets[i], tmp, num_bytes);
1921 /* precompute multiples */
1922 if ((!BN_to_felem(x_out, &p->X)) ||
1923 (!BN_to_felem(y_out, &p->Y)) ||
1924 (!BN_to_felem(z_out, &p->Z)))
1925 goto err;
1926 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1927 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1928 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1929 for (j = 2; j <= 16; ++j) {
1930 if (j & 1) {
1931 point_add(
1932 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1933 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1934 0, pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1935 } else {
1936 point_double(
1937 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1938 pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1939 }
1940 }
1941 }
1942 }
1943 if (mixed)
1944 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1945 }
1946 /* the scalar for the generator */
1947 if ((scalar != NULL) && (have_pre_comp)) {
1948 memset(g_secret, 0, sizeof(g_secret));
1949 /* reduce scalar to 0 <= scalar < 2^521 */
1950 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1951 /*
1952 * this is an unusual input, and we don't guarantee
1953 * constant-timeness
1954 */
1955 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1956 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1957 goto err;
1958 }
1959 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1960 } else
1961 num_bytes = BN_bn2bin(scalar, tmp);
1962 flip_endian(g_secret, tmp, num_bytes);
1963 /* do the multiplication with generator precomputation */
1964 batch_mul(x_out, y_out, z_out,
1965 (const felem_bytearray(*)) secrets, num_points,
1966 g_secret,
1967 mixed, (const felem(*)[17][3]) pre_comp,
1968 (const felem(*)[3]) g_pre_comp);
1969 } else
1970 /* do the multiplication without generator precomputation */
1971 batch_mul(x_out, y_out, z_out,
1972 (const felem_bytearray(*)) secrets, num_points,
1973 NULL, mixed, (const felem(*)[17][3]) pre_comp, NULL);
1974 /* reduce the output to its unique minimal representation */
1975 felem_contract(x_in, x_out);
1976 felem_contract(y_in, y_out);
1977 felem_contract(z_in, z_out);
1978 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1979 (!felem_to_BN(z, z_in))) {
1980 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1981 goto err;
1982 }
1983 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1984
1985err:
1986 BN_CTX_end(ctx);
1987 EC_POINT_free(generator);
1988 BN_CTX_free(new_ctx);
1989 free(secrets);
1990 free(pre_comp);
1991 free(tmp_felems);
1992 return ret;
1993}
1994
1995int
1996ec_GFp_nistp521_precompute_mult(EC_GROUP * group, BN_CTX * ctx)
1997{
1998 int ret = 0;
1999 NISTP521_PRE_COMP *pre = NULL;
2000 int i, j;
2001 BN_CTX *new_ctx = NULL;
2002 BIGNUM *x, *y;
2003 EC_POINT *generator = NULL;
2004 felem tmp_felems[16];
2005
2006 /* throw away old precomputation */
2007 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
2008 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
2009 if (ctx == NULL)
2010 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2011 return 0;
2012 BN_CTX_start(ctx);
2013 if (((x = BN_CTX_get(ctx)) == NULL) ||
2014 ((y = BN_CTX_get(ctx)) == NULL))
2015 goto err;
2016 /* get the generator */
2017 if (group->generator == NULL)
2018 goto err;
2019 generator = EC_POINT_new(group);
2020 if (generator == NULL)
2021 goto err;
2022 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2023 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2024 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2025 goto err;
2026 if ((pre = nistp521_pre_comp_new()) == NULL)
2027 goto err;
2028 /* if the generator is the standard one, use built-in precomputation */
2029 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2030 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2031 ret = 1;
2032 goto err;
2033 }
2034 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
2035 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
2036 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
2037 goto err;
2038 /* compute 2^130*G, 2^260*G, 2^390*G */
2039 for (i = 1; i <= 4; i <<= 1) {
2040 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2041 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2042 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2043 for (j = 0; j < 129; ++j) {
2044 point_double(pre->g_pre_comp[2 * i][0],
2045 pre->g_pre_comp[2 * i][1],
2046 pre->g_pre_comp[2 * i][2],
2047 pre->g_pre_comp[2 * i][0],
2048 pre->g_pre_comp[2 * i][1],
2049 pre->g_pre_comp[2 * i][2]);
2050 }
2051 }
2052 /* g_pre_comp[0] is the point at infinity */
2053 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2054 /* the remaining multiples */
2055 /* 2^130*G + 2^260*G */
2056 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2057 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2058 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2059 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2060 pre->g_pre_comp[2][2]);
2061 /* 2^130*G + 2^390*G */
2062 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2063 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2064 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2065 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2066 pre->g_pre_comp[2][2]);
2067 /* 2^260*G + 2^390*G */
2068 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2069 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2070 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2071 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2072 pre->g_pre_comp[4][2]);
2073 /* 2^130*G + 2^260*G + 2^390*G */
2074 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2075 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2076 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2077 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2078 pre->g_pre_comp[2][2]);
2079 for (i = 1; i < 8; ++i) {
2080 /* odd multiples: add G */
2081 point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1],
2082 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2083 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
2084 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2085 pre->g_pre_comp[1][2]);
2086 }
2087 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2088
2089 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2090 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2091 goto err;
2092 ret = 1;
2093 pre = NULL;
2094err:
2095 BN_CTX_end(ctx);
2096 EC_POINT_free(generator);
2097 BN_CTX_free(new_ctx);
2098 nistp521_pre_comp_free(pre);
2099 return ret;
2100}
2101
2102int
2103ec_GFp_nistp521_have_precompute_mult(const EC_GROUP * group)
2104{
2105 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2106 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2107 != NULL)
2108 return 1;
2109 else
2110 return 0;
2111}
2112
2113#endif