summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/ec/ecp_nistp256.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/ec/ecp_nistp256.c1357
1 files changed, 719 insertions, 638 deletions
diff --git a/src/lib/libcrypto/ec/ecp_nistp256.c b/src/lib/libcrypto/ec/ecp_nistp256.c
index 132ca0d250..345f67d520 100644
--- a/src/lib/libcrypto/ec/ecp_nistp256.c
+++ b/src/lib/libcrypto/ec/ecp_nistp256.c
@@ -112,41 +112,45 @@ typedef limb longfelem[NLIMBS * 2];
112typedef u64 smallfelem[NLIMBS]; 112typedef u64 smallfelem[NLIMBS];
113 113
114/* This is the value of the prime as four 64-bit words, little-endian. */ 114/* This is the value of the prime as four 64-bit words, little-endian. */
115static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul }; 115static const u64 kPrime[4] = {0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul};
116static const limb bottom32bits = 0xffffffff; 116static const limb bottom32bits = 0xffffffff;
117static const u64 bottom63bits = 0x7ffffffffffffffful; 117static const u64 bottom63bits = 0x7ffffffffffffffful;
118 118
119/* bin32_to_felem takes a little-endian byte array and converts it into felem 119/* bin32_to_felem takes a little-endian byte array and converts it into felem
120 * form. This assumes that the CPU is little-endian. */ 120 * form. This assumes that the CPU is little-endian. */
121static void bin32_to_felem(felem out, const u8 in[32]) 121static void
122 { 122bin32_to_felem(felem out, const u8 in[32])
123 out[0] = *((u64*) &in[0]); 123{
124 out[1] = *((u64*) &in[8]); 124 out[0] = *((u64 *) & in[0]);
125 out[2] = *((u64*) &in[16]); 125 out[1] = *((u64 *) & in[8]);
126 out[3] = *((u64*) &in[24]); 126 out[2] = *((u64 *) & in[16]);
127 } 127 out[3] = *((u64 *) & in[24]);
128}
128 129
129/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian, 130/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130 * 32 byte array. This assumes that the CPU is little-endian. */ 131 * 32 byte array. This assumes that the CPU is little-endian. */
131static void smallfelem_to_bin32(u8 out[32], const smallfelem in) 132static void
132 { 133smallfelem_to_bin32(u8 out[32], const smallfelem in)
133 *((u64*) &out[0]) = in[0]; 134{
134 *((u64*) &out[8]) = in[1]; 135 *((u64 *) & out[0]) = in[0];
135 *((u64*) &out[16]) = in[2]; 136 *((u64 *) & out[8]) = in[1];
136 *((u64*) &out[24]) = in[3]; 137 *((u64 *) & out[16]) = in[2];
137 } 138 *((u64 *) & out[24]) = in[3];
139}
138 140
139/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 141/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140static void flip_endian(u8 *out, const u8 *in, unsigned len) 142static void
141 { 143flip_endian(u8 * out, const u8 * in, unsigned len)
144{
142 unsigned i; 145 unsigned i;
143 for (i = 0; i < len; ++i) 146 for (i = 0; i < len; ++i)
144 out[i] = in[len-1-i]; 147 out[i] = in[len - 1 - i];
145 } 148}
146 149
147/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 150/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148static int BN_to_felem(felem out, const BIGNUM *bn) 151static int
149 { 152BN_to_felem(felem out, const BIGNUM * bn)
153{
150 felem_bytearray b_in; 154 felem_bytearray b_in;
151 felem_bytearray b_out; 155 felem_bytearray b_out;
152 unsigned num_bytes; 156 unsigned num_bytes;
@@ -154,89 +158,95 @@ static int BN_to_felem(felem out, const BIGNUM *bn)
154 /* BN_bn2bin eats leading zeroes */ 158 /* BN_bn2bin eats leading zeroes */
155 memset(b_out, 0, sizeof b_out); 159 memset(b_out, 0, sizeof b_out);
156 num_bytes = BN_num_bytes(bn); 160 num_bytes = BN_num_bytes(bn);
157 if (num_bytes > sizeof b_out) 161 if (num_bytes > sizeof b_out) {
158 {
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 162 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
160 return 0; 163 return 0;
161 } 164 }
162 if (BN_is_negative(bn)) 165 if (BN_is_negative(bn)) {
163 {
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 166 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
165 return 0; 167 return 0;
166 } 168 }
167 num_bytes = BN_bn2bin(bn, b_in); 169 num_bytes = BN_bn2bin(bn, b_in);
168 flip_endian(b_out, b_in, num_bytes); 170 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out); 171 bin32_to_felem(out, b_out);
170 return 1; 172 return 1;
171 } 173}
172 174
173/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 175/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in) 176static BIGNUM *
175 { 177smallfelem_to_BN(BIGNUM * out, const smallfelem in)
178{
176 felem_bytearray b_in, b_out; 179 felem_bytearray b_in, b_out;
177 smallfelem_to_bin32(b_in, in); 180 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in, sizeof b_out); 181 flip_endian(b_out, b_in, sizeof b_out);
179 return BN_bin2bn(b_out, sizeof b_out, out); 182 return BN_bin2bn(b_out, sizeof b_out, out);
180 } 183}
181 184
182 185
183/* Field operations 186/* Field operations
184 * ---------------- */ 187 * ---------------- */
185 188
186static void smallfelem_one(smallfelem out) 189static void
187 { 190smallfelem_one(smallfelem out)
191{
188 out[0] = 1; 192 out[0] = 1;
189 out[1] = 0; 193 out[1] = 0;
190 out[2] = 0; 194 out[2] = 0;
191 out[3] = 0; 195 out[3] = 0;
192 } 196}
193 197
194static void smallfelem_assign(smallfelem out, const smallfelem in) 198static void
195 { 199smallfelem_assign(smallfelem out, const smallfelem in)
200{
196 out[0] = in[0]; 201 out[0] = in[0];
197 out[1] = in[1]; 202 out[1] = in[1];
198 out[2] = in[2]; 203 out[2] = in[2];
199 out[3] = in[3]; 204 out[3] = in[3];
200 } 205}
201 206
202static void felem_assign(felem out, const felem in) 207static void
203 { 208felem_assign(felem out, const felem in)
209{
204 out[0] = in[0]; 210 out[0] = in[0];
205 out[1] = in[1]; 211 out[1] = in[1];
206 out[2] = in[2]; 212 out[2] = in[2];
207 out[3] = in[3]; 213 out[3] = in[3];
208 } 214}
209 215
210/* felem_sum sets out = out + in. */ 216/* felem_sum sets out = out + in. */
211static void felem_sum(felem out, const felem in) 217static void
212 { 218felem_sum(felem out, const felem in)
219{
213 out[0] += in[0]; 220 out[0] += in[0];
214 out[1] += in[1]; 221 out[1] += in[1];
215 out[2] += in[2]; 222 out[2] += in[2];
216 out[3] += in[3]; 223 out[3] += in[3];
217 } 224}
218 225
219/* felem_small_sum sets out = out + in. */ 226/* felem_small_sum sets out = out + in. */
220static void felem_small_sum(felem out, const smallfelem in) 227static void
221 { 228felem_small_sum(felem out, const smallfelem in)
229{
222 out[0] += in[0]; 230 out[0] += in[0];
223 out[1] += in[1]; 231 out[1] += in[1];
224 out[2] += in[2]; 232 out[2] += in[2];
225 out[3] += in[3]; 233 out[3] += in[3];
226 } 234}
227 235
228/* felem_scalar sets out = out * scalar */ 236/* felem_scalar sets out = out * scalar */
229static void felem_scalar(felem out, const u64 scalar) 237static void
230 { 238felem_scalar(felem out, const u64 scalar)
239{
231 out[0] *= scalar; 240 out[0] *= scalar;
232 out[1] *= scalar; 241 out[1] *= scalar;
233 out[2] *= scalar; 242 out[2] *= scalar;
234 out[3] *= scalar; 243 out[3] *= scalar;
235 } 244}
236 245
237/* longfelem_scalar sets out = out * scalar */ 246/* longfelem_scalar sets out = out * scalar */
238static void longfelem_scalar(longfelem out, const u64 scalar) 247static void
239 { 248longfelem_scalar(longfelem out, const u64 scalar)
249{
240 out[0] *= scalar; 250 out[0] *= scalar;
241 out[1] *= scalar; 251 out[1] *= scalar;
242 out[2] *= scalar; 252 out[2] *= scalar;
@@ -245,27 +255,28 @@ static void longfelem_scalar(longfelem out, const u64 scalar)
245 out[5] *= scalar; 255 out[5] *= scalar;
246 out[6] *= scalar; 256 out[6] *= scalar;
247 out[7] *= scalar; 257 out[7] *= scalar;
248 } 258}
249 259
250#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9) 260#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251#define two105 (((limb)1) << 105) 261#define two105 (((limb)1) << 105)
252#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9) 262#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
253 263
254/* zero105 is 0 mod p */ 264/* zero105 is 0 mod p */
255static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 }; 265static const felem zero105 = {two105m41m9, two105, two105m41p9, two105m41p9};
256 266
257/* smallfelem_neg sets |out| to |-small| 267/* smallfelem_neg sets |out| to |-small|
258 * On exit: 268 * On exit:
259 * out[i] < out[i] + 2^105 269 * out[i] < out[i] + 2^105
260 */ 270 */
261static void smallfelem_neg(felem out, const smallfelem small) 271static void
262 { 272smallfelem_neg(felem out, const smallfelem small)
273{
263 /* In order to prevent underflow, we subtract from 0 mod p. */ 274 /* In order to prevent underflow, we subtract from 0 mod p. */
264 out[0] = zero105[0] - small[0]; 275 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1]; 276 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2]; 277 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3]; 278 out[3] = zero105[3] - small[3];
268 } 279}
269 280
270/* felem_diff subtracts |in| from |out| 281/* felem_diff subtracts |in| from |out|
271 * On entry: 282 * On entry:
@@ -273,8 +284,9 @@ static void smallfelem_neg(felem out, const smallfelem small)
273 * On exit: 284 * On exit:
274 * out[i] < out[i] + 2^105 285 * out[i] < out[i] + 2^105
275 */ 286 */
276static void felem_diff(felem out, const felem in) 287static void
277 { 288felem_diff(felem out, const felem in)
289{
278 /* In order to prevent underflow, we add 0 mod p before subtracting. */ 290 /* In order to prevent underflow, we add 0 mod p before subtracting. */
279 out[0] += zero105[0]; 291 out[0] += zero105[0];
280 out[1] += zero105[1]; 292 out[1] += zero105[1];
@@ -285,14 +297,14 @@ static void felem_diff(felem out, const felem in)
285 out[1] -= in[1]; 297 out[1] -= in[1];
286 out[2] -= in[2]; 298 out[2] -= in[2];
287 out[3] -= in[3]; 299 out[3] -= in[3];
288 } 300}
289 301
290#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11) 302#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291#define two107 (((limb)1) << 107) 303#define two107 (((limb)1) << 107)
292#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11) 304#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
293 305
294/* zero107 is 0 mod p */ 306/* zero107 is 0 mod p */
295static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 }; 307static const felem zero107 = {two107m43m11, two107, two107m43p11, two107m43p11};
296 308
297/* An alternative felem_diff for larger inputs |in| 309/* An alternative felem_diff for larger inputs |in|
298 * felem_diff_zero107 subtracts |in| from |out| 310 * felem_diff_zero107 subtracts |in| from |out|
@@ -301,8 +313,9 @@ static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11
301 * On exit: 313 * On exit:
302 * out[i] < out[i] + 2^107 314 * out[i] < out[i] + 2^107
303 */ 315 */
304static void felem_diff_zero107(felem out, const felem in) 316static void
305 { 317felem_diff_zero107(felem out, const felem in)
318{
306 /* In order to prevent underflow, we add 0 mod p before subtracting. */ 319 /* In order to prevent underflow, we add 0 mod p before subtracting. */
307 out[0] += zero107[0]; 320 out[0] += zero107[0];
308 out[1] += zero107[1]; 321 out[1] += zero107[1];
@@ -313,7 +326,7 @@ static void felem_diff_zero107(felem out, const felem in)
313 out[1] -= in[1]; 326 out[1] -= in[1];
314 out[2] -= in[2]; 327 out[2] -= in[2];
315 out[3] -= in[3]; 328 out[3] -= in[3];
316 } 329}
317 330
318/* longfelem_diff subtracts |in| from |out| 331/* longfelem_diff subtracts |in| from |out|
319 * On entry: 332 * On entry:
@@ -321,13 +334,14 @@ static void felem_diff_zero107(felem out, const felem in)
321 * On exit: 334 * On exit:
322 * out[i] < out[i] + 2^70 + 2^40 335 * out[i] < out[i] + 2^70 + 2^40
323 */ 336 */
324static void longfelem_diff(longfelem out, const longfelem in) 337static void
325 { 338longfelem_diff(longfelem out, const longfelem in)
326 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6); 339{
327 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40); 340 static const limb two70m8p6 = (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
328 static const limb two70 = (((limb)1) << 70); 341 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
329 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6); 342 static const limb two70 = (((limb) 1) << 70);
330 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6); 343 static const limb two70m40m38p6 = (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) + (((limb) 1) << 6);
344 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
331 345
332 /* add 0 mod p to avoid underflow */ 346 /* add 0 mod p to avoid underflow */
333 out[0] += two70m8p6; 347 out[0] += two70m8p6;
@@ -348,7 +362,7 @@ static void longfelem_diff(longfelem out, const longfelem in)
348 out[5] -= in[5]; 362 out[5] -= in[5];
349 out[6] -= in[6]; 363 out[6] -= in[6];
350 out[7] -= in[7]; 364 out[7] -= in[7];
351 } 365}
352 366
353#define two64m0 (((limb)1) << 64) - 1 367#define two64m0 (((limb)1) << 64) - 1
354#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1 368#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
@@ -356,7 +370,7 @@ static void longfelem_diff(longfelem out, const longfelem in)
356#define two64m32 (((limb)1) << 64) - (((limb)1) << 32) 370#define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
357 371
358/* zero110 is 0 mod p */ 372/* zero110 is 0 mod p */
359static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; 373static const felem zero110 = {two64m0, two110p32m0, two64m46, two64m32};
360 374
361/* felem_shrink converts an felem into a smallfelem. The result isn't quite 375/* felem_shrink converts an felem into a smallfelem. The result isn't quite
362 * minimal as the value may be greater than p. 376 * minimal as the value may be greater than p.
@@ -366,12 +380,13 @@ static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
366 * On exit: 380 * On exit:
367 * out[i] < 2^64 381 * out[i] < 2^64
368 */ 382 */
369static void felem_shrink(smallfelem out, const felem in) 383static void
370 { 384felem_shrink(smallfelem out, const felem in)
385{
371 felem tmp; 386 felem tmp;
372 u64 a, b, mask; 387 u64 a, b, mask;
373 s64 high, low; 388 s64 high, low;
374 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */ 389 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
375 390
376 /* Carry 2->3 */ 391 /* Carry 2->3 */
377 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64)); 392 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
@@ -382,50 +397,58 @@ static void felem_shrink(smallfelem out, const felem in)
382 tmp[1] = zero110[1] + in[1]; 397 tmp[1] = zero110[1] + in[1];
383 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */ 398 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
384 399
385 /* We perform two partial reductions where we eliminate the 400 /*
386 * high-word of tmp[3]. We don't update the other words till the end. 401 * We perform two partial reductions where we eliminate the high-word
402 * of tmp[3]. We don't update the other words till the end.
387 */ 403 */
388 a = tmp[3] >> 64; /* a < 2^46 */ 404 a = tmp[3] >> 64; /* a < 2^46 */
389 tmp[3] = (u64) tmp[3]; 405 tmp[3] = (u64) tmp[3];
390 tmp[3] -= a; 406 tmp[3] -= a;
391 tmp[3] += ((limb)a) << 32; 407 tmp[3] += ((limb) a) << 32;
392 /* tmp[3] < 2^79 */ 408 /* tmp[3] < 2^79 */
393 409
394 b = a; 410 b = a;
395 a = tmp[3] >> 64; /* a < 2^15 */ 411 a = tmp[3] >> 64; /* a < 2^15 */
396 b += a; /* b < 2^46 + 2^15 < 2^47 */ 412 b += a; /* b < 2^46 + 2^15 < 2^47 */
397 tmp[3] = (u64) tmp[3]; 413 tmp[3] = (u64) tmp[3];
398 tmp[3] -= a; 414 tmp[3] -= a;
399 tmp[3] += ((limb)a) << 32; 415 tmp[3] += ((limb) a) << 32;
400 /* tmp[3] < 2^64 + 2^47 */ 416 /* tmp[3] < 2^64 + 2^47 */
401 417
402 /* This adjusts the other two words to complete the two partial 418 /*
403 * reductions. */ 419 * This adjusts the other two words to complete the two partial
420 * reductions.
421 */
404 tmp[0] += b; 422 tmp[0] += b;
405 tmp[1] -= (((limb)b) << 32); 423 tmp[1] -= (((limb) b) << 32);
406 424
407 /* In order to make space in tmp[3] for the carry from 2 -> 3, we 425 /*
408 * conditionally subtract kPrime if tmp[3] is large enough. */ 426 * In order to make space in tmp[3] for the carry from 2 -> 3, we
427 * conditionally subtract kPrime if tmp[3] is large enough.
428 */
409 high = tmp[3] >> 64; 429 high = tmp[3] >> 64;
410 /* As tmp[3] < 2^65, high is either 1 or 0 */ 430 /* As tmp[3] < 2^65, high is either 1 or 0 */
411 high <<= 63; 431 high <<= 63;
412 high >>= 63; 432 high >>= 63;
413 /* high is: 433 /*
414 * all ones if the high word of tmp[3] is 1 434 * high is: all ones if the high word of tmp[3] is 1 all zeros if
415 * all zeros if the high word of tmp[3] if 0 */ 435 * the high word of tmp[3] if 0
436 */
416 low = tmp[3]; 437 low = tmp[3];
417 mask = low >> 63; 438 mask = low >> 63;
418 /* mask is: 439 /*
419 * all ones if the MSB of low is 1 440 * mask is: all ones if the MSB of low is 1 all zeros if the MSB
420 * all zeros if the MSB of low if 0 */ 441 * of low if 0
442 */
421 low &= bottom63bits; 443 low &= bottom63bits;
422 low -= kPrime3Test; 444 low -= kPrime3Test;
423 /* if low was greater than kPrime3Test then the MSB is zero */ 445 /* if low was greater than kPrime3Test then the MSB is zero */
424 low = ~low; 446 low = ~low;
425 low >>= 63; 447 low >>= 63;
426 /* low is: 448 /*
427 * all ones if low was > kPrime3Test 449 * low is: all ones if low was > kPrime3Test all zeros if low was
428 * all zeros if low was <= kPrime3Test */ 450 * <= kPrime3Test
451 */
429 mask = (mask & low) | high; 452 mask = (mask & low) | high;
430 tmp[0] -= mask & kPrime[0]; 453 tmp[0] -= mask & kPrime[0];
431 tmp[1] -= mask & kPrime[1]; 454 tmp[1] -= mask & kPrime[1];
@@ -433,25 +456,29 @@ static void felem_shrink(smallfelem out, const felem in)
433 tmp[3] -= mask & kPrime[3]; 456 tmp[3] -= mask & kPrime[3];
434 /* tmp[3] < 2**64 - 2**32 + 1 */ 457 /* tmp[3] < 2**64 - 2**32 + 1 */
435 458
436 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0]; 459 tmp[1] += ((u64) (tmp[0] >> 64));
437 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1]; 460 tmp[0] = (u64) tmp[0];
438 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2]; 461 tmp[2] += ((u64) (tmp[1] >> 64));
462 tmp[1] = (u64) tmp[1];
463 tmp[3] += ((u64) (tmp[2] >> 64));
464 tmp[2] = (u64) tmp[2];
439 /* tmp[i] < 2^64 */ 465 /* tmp[i] < 2^64 */
440 466
441 out[0] = tmp[0]; 467 out[0] = tmp[0];
442 out[1] = tmp[1]; 468 out[1] = tmp[1];
443 out[2] = tmp[2]; 469 out[2] = tmp[2];
444 out[3] = tmp[3]; 470 out[3] = tmp[3];
445 } 471}
446 472
447/* smallfelem_expand converts a smallfelem to an felem */ 473/* smallfelem_expand converts a smallfelem to an felem */
448static void smallfelem_expand(felem out, const smallfelem in) 474static void
449 { 475smallfelem_expand(felem out, const smallfelem in)
476{
450 out[0] = in[0]; 477 out[0] = in[0];
451 out[1] = in[1]; 478 out[1] = in[1];
452 out[2] = in[2]; 479 out[2] = in[2];
453 out[3] = in[3]; 480 out[3] = in[3];
454 } 481}
455 482
456/* smallfelem_square sets |out| = |small|^2 483/* smallfelem_square sets |out| = |small|^2
457 * On entry: 484 * On entry:
@@ -459,8 +486,9 @@ static void smallfelem_expand(felem out, const smallfelem in)
459 * On exit: 486 * On exit:
460 * out[i] < 7 * 2^64 < 2^67 487 * out[i] < 7 * 2^64 < 2^67
461 */ 488 */
462static void smallfelem_square(longfelem out, const smallfelem small) 489static void
463 { 490smallfelem_square(longfelem out, const smallfelem small)
491{
464 limb a; 492 limb a;
465 u64 high, low; 493 u64 high, low;
466 494
@@ -529,7 +557,7 @@ static void smallfelem_square(longfelem out, const smallfelem small)
529 high = a >> 64; 557 high = a >> 64;
530 out[6] += low; 558 out[6] += low;
531 out[7] = high; 559 out[7] = high;
532 } 560}
533 561
534/* felem_square sets |out| = |in|^2 562/* felem_square sets |out| = |in|^2
535 * On entry: 563 * On entry:
@@ -537,12 +565,13 @@ static void smallfelem_square(longfelem out, const smallfelem small)
537 * On exit: 565 * On exit:
538 * out[i] < 7 * 2^64 < 2^67 566 * out[i] < 7 * 2^64 < 2^67
539 */ 567 */
540static void felem_square(longfelem out, const felem in) 568static void
541 { 569felem_square(longfelem out, const felem in)
570{
542 u64 small[4]; 571 u64 small[4];
543 felem_shrink(small, in); 572 felem_shrink(small, in);
544 smallfelem_square(out, small); 573 smallfelem_square(out, small);
545 } 574}
546 575
547/* smallfelem_mul sets |out| = |small1| * |small2| 576/* smallfelem_mul sets |out| = |small1| * |small2|
548 * On entry: 577 * On entry:
@@ -551,8 +580,9 @@ static void felem_square(longfelem out, const felem in)
551 * On exit: 580 * On exit:
552 * out[i] < 7 * 2^64 < 2^67 581 * out[i] < 7 * 2^64 < 2^67
553 */ 582 */
554static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2) 583static void
555 { 584smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
585{
556 limb a; 586 limb a;
557 u64 high, low; 587 u64 high, low;
558 588
@@ -657,7 +687,7 @@ static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfe
657 high = a >> 64; 687 high = a >> 64;
658 out[6] += low; 688 out[6] += low;
659 out[7] = high; 689 out[7] = high;
660 } 690}
661 691
662/* felem_mul sets |out| = |in1| * |in2| 692/* felem_mul sets |out| = |in1| * |in2|
663 * On entry: 693 * On entry:
@@ -666,13 +696,14 @@ static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfe
666 * On exit: 696 * On exit:
667 * out[i] < 7 * 2^64 < 2^67 697 * out[i] < 7 * 2^64 < 2^67
668 */ 698 */
669static void felem_mul(longfelem out, const felem in1, const felem in2) 699static void
670 { 700felem_mul(longfelem out, const felem in1, const felem in2)
701{
671 smallfelem small1, small2; 702 smallfelem small1, small2;
672 felem_shrink(small1, in1); 703 felem_shrink(small1, in1);
673 felem_shrink(small2, in2); 704 felem_shrink(small2, in2);
674 smallfelem_mul(out, small1, small2); 705 smallfelem_mul(out, small1, small2);
675 } 706}
676 707
677/* felem_small_mul sets |out| = |small1| * |in2| 708/* felem_small_mul sets |out| = |small1| * |in2|
678 * On entry: 709 * On entry:
@@ -681,23 +712,24 @@ static void felem_mul(longfelem out, const felem in1, const felem in2)
681 * On exit: 712 * On exit:
682 * out[i] < 7 * 2^64 < 2^67 713 * out[i] < 7 * 2^64 < 2^67
683 */ 714 */
684static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2) 715static void
685 { 716felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
717{
686 smallfelem small2; 718 smallfelem small2;
687 felem_shrink(small2, in2); 719 felem_shrink(small2, in2);
688 smallfelem_mul(out, small1, small2); 720 smallfelem_mul(out, small1, small2);
689 } 721}
690 722
691#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4) 723#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
692#define two100 (((limb)1) << 100) 724#define two100 (((limb)1) << 100)
693#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4) 725#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
694/* zero100 is 0 mod p */ 726/* zero100 is 0 mod p */
695static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 }; 727static const felem zero100 = {two100m36m4, two100, two100m36p4, two100m36p4};
696 728
697/* Internal function for the different flavours of felem_reduce. 729/* Internal function for the different flavours of felem_reduce.
698 * felem_reduce_ reduces the higher coefficients in[4]-in[7]. 730 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
699 * On entry: 731 * On entry:
700 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7] 732 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
701 * out[1] >= in[7] + 2^32*in[4] 733 * out[1] >= in[7] + 2^32*in[4]
702 * out[2] >= in[5] + 2^32*in[5] 734 * out[2] >= in[5] + 2^32*in[5]
703 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6] 735 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
@@ -707,8 +739,9 @@ static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
707 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7] 739 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
708 * out[3] <= out[3] + 2^32*in[4] + 3*in[7] 740 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
709 */ 741 */
710static void felem_reduce_(felem out, const longfelem in) 742static void
711 { 743felem_reduce_(felem out, const longfelem in)
744{
712 int128_t c; 745 int128_t c;
713 /* combine common terms from below */ 746 /* combine common terms from below */
714 c = in[4] + (in[5] << 32); 747 c = in[4] + (in[5] << 32);
@@ -739,7 +772,7 @@ static void felem_reduce_(felem out, const longfelem in)
739 out[0] -= (in[7] << 32); 772 out[0] -= (in[7] << 32);
740 out[2] += (in[7] << 33); 773 out[2] += (in[7] << 33);
741 out[3] += (in[7] * 3); 774 out[3] += (in[7] * 3);
742 } 775}
743 776
744/* felem_reduce converts a longfelem into an felem. 777/* felem_reduce converts a longfelem into an felem.
745 * To be called directly after felem_square or felem_mul. 778 * To be called directly after felem_square or felem_mul.
@@ -749,8 +782,9 @@ static void felem_reduce_(felem out, const longfelem in)
749 * On exit: 782 * On exit:
750 * out[i] < 2^101 783 * out[i] < 2^101
751 */ 784 */
752static void felem_reduce(felem out, const longfelem in) 785static void
753 { 786felem_reduce(felem out, const longfelem in)
787{
754 out[0] = zero100[0] + in[0]; 788 out[0] = zero100[0] + in[0];
755 out[1] = zero100[1] + in[1]; 789 out[1] = zero100[1] + in[1];
756 out[2] = zero100[2] + in[2]; 790 out[2] = zero100[2] + in[2];
@@ -758,17 +792,18 @@ static void felem_reduce(felem out, const longfelem in)
758 792
759 felem_reduce_(out, in); 793 felem_reduce_(out, in);
760 794
761 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0 795 /*
762 * out[1] > 2^100 - 2^64 - 7*2^96 > 0 796 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
763 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0 797 * out[1] > 2^100 - 2^64 - 7*2^96 > 0 out[2] > 2^100 - 2^36 + 2^4 -
764 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0 798 * 5*2^64 - 5*2^96 > 0 out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96
765 * 799 * - 3*2^96 > 0
766 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101 800 *
767 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101 801 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101 out[1] < 2^100 +
768 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101 802 * 3*2^64 + 5*2^64 + 3*2^97 < 2^101 out[2] < 2^100 + 5*2^64 + 2^64 +
769 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101 803 * 3*2^65 + 2^97 < 2^101 out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 <
804 * 2^101
770 */ 805 */
771 } 806}
772 807
773/* felem_reduce_zero105 converts a larger longfelem into an felem. 808/* felem_reduce_zero105 converts a larger longfelem into an felem.
774 * On entry: 809 * On entry:
@@ -776,8 +811,9 @@ static void felem_reduce(felem out, const longfelem in)
776 * On exit: 811 * On exit:
777 * out[i] < 2^106 812 * out[i] < 2^106
778 */ 813 */
779static void felem_reduce_zero105(felem out, const longfelem in) 814static void
780 { 815felem_reduce_zero105(felem out, const longfelem in)
816{
781 out[0] = zero105[0] + in[0]; 817 out[0] = zero105[0] + in[0];
782 out[1] = zero105[1] + in[1]; 818 out[1] = zero105[1] + in[1];
783 out[2] = zero105[2] + in[2]; 819 out[2] = zero105[2] + in[2];
@@ -785,34 +821,36 @@ static void felem_reduce_zero105(felem out, const longfelem in)
785 821
786 felem_reduce_(out, in); 822 felem_reduce_(out, in);
787 823
788 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0 824 /*
789 * out[1] > 2^105 - 2^71 - 2^103 > 0 825 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
790 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0 826 * out[1] > 2^105 - 2^71 - 2^103 > 0 out[2] > 2^105 - 2^41 + 2^9 -
791 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0 827 * 2^71 - 2^103 > 0 out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 -
792 * 828 * 2^103 > 0
793 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 829 *
794 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 830 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 out[1] < 2^105 + 2^71 +
795 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106 831 * 2^71 + 2^103 < 2^106 out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 <
796 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106 832 * 2^106 out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
797 */ 833 */
798 } 834}
799 835
800/* subtract_u64 sets *result = *result - v and *carry to one if the subtraction 836/* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
801 * underflowed. */ 837 * underflowed. */
802static void subtract_u64(u64* result, u64* carry, u64 v) 838static void
803 { 839subtract_u64(u64 * result, u64 * carry, u64 v)
840{
804 uint128_t r = *result; 841 uint128_t r = *result;
805 r -= v; 842 r -= v;
806 *carry = (r >> 64) & 1; 843 *carry = (r >> 64) & 1;
807 *result = (u64) r; 844 *result = (u64) r;
808 } 845}
809 846
810/* felem_contract converts |in| to its unique, minimal representation. 847/* felem_contract converts |in| to its unique, minimal representation.
811 * On entry: 848 * On entry:
812 * in[i] < 2^109 849 * in[i] < 2^109
813 */ 850 */
814static void felem_contract(smallfelem out, const felem in) 851static void
815 { 852felem_contract(smallfelem out, const felem in)
853{
816 unsigned i; 854 unsigned i;
817 u64 all_equal_so_far = 0, result = 0, carry; 855 u64 all_equal_so_far = 0, result = 0, carry;
818 856
@@ -820,20 +858,25 @@ static void felem_contract(smallfelem out, const felem in)
820 /* small is minimal except that the value might be > p */ 858 /* small is minimal except that the value might be > p */
821 859
822 all_equal_so_far--; 860 all_equal_so_far--;
823 /* We are doing a constant time test if out >= kPrime. We need to 861 /*
862 * We are doing a constant time test if out >= kPrime. We need to
824 * compare each u64, from most-significant to least significant. For 863 * compare each u64, from most-significant to least significant. For
825 * each one, if all words so far have been equal (m is all ones) then a 864 * each one, if all words so far have been equal (m is all ones) then
826 * non-equal result is the answer. Otherwise we continue. */ 865 * a non-equal result is the answer. Otherwise we continue.
827 for (i = 3; i < 4; i--) 866 */
828 { 867 for (i = 3; i < 4; i--) {
829 u64 equal; 868 u64 equal;
830 uint128_t a = ((uint128_t) kPrime[i]) - out[i]; 869 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
831 /* if out[i] > kPrime[i] then a will underflow and the high 870 /*
832 * 64-bits will all be set. */ 871 * if out[i] > kPrime[i] then a will underflow and the high
872 * 64-bits will all be set.
873 */
833 result |= all_equal_so_far & ((u64) (a >> 64)); 874 result |= all_equal_so_far & ((u64) (a >> 64));
834 875
835 /* if kPrime[i] == out[i] then |equal| will be all zeros and 876 /*
836 * the decrement will make it all ones. */ 877 * if kPrime[i] == out[i] then |equal| will be all zeros and
878 * the decrement will make it all ones.
879 */
837 equal = kPrime[i] ^ out[i]; 880 equal = kPrime[i] ^ out[i];
838 equal--; 881 equal--;
839 equal &= equal << 32; 882 equal &= equal << 32;
@@ -845,10 +888,12 @@ static void felem_contract(smallfelem out, const felem in)
845 equal = ((s64) equal) >> 63; 888 equal = ((s64) equal) >> 63;
846 889
847 all_equal_so_far &= equal; 890 all_equal_so_far &= equal;
848 } 891 }
849 892
850 /* if all_equal_so_far is still all ones then the two values are equal 893 /*
851 * and so out >= kPrime is true. */ 894 * if all_equal_so_far is still all ones then the two values are
895 * equal and so out >= kPrime is true.
896 */
852 result |= all_equal_so_far; 897 result |= all_equal_so_far;
853 898
854 /* if out >= kPrime then we subtract kPrime. */ 899 /* if out >= kPrime then we subtract kPrime. */
@@ -865,35 +910,38 @@ static void felem_contract(smallfelem out, const felem in)
865 subtract_u64(&out[3], &carry, carry); 910 subtract_u64(&out[3], &carry, carry);
866 911
867 subtract_u64(&out[3], &carry, result & kPrime[3]); 912 subtract_u64(&out[3], &carry, result & kPrime[3]);
868 } 913}
869 914
870static void smallfelem_square_contract(smallfelem out, const smallfelem in) 915static void
871 { 916smallfelem_square_contract(smallfelem out, const smallfelem in)
917{
872 longfelem longtmp; 918 longfelem longtmp;
873 felem tmp; 919 felem tmp;
874 920
875 smallfelem_square(longtmp, in); 921 smallfelem_square(longtmp, in);
876 felem_reduce(tmp, longtmp); 922 felem_reduce(tmp, longtmp);
877 felem_contract(out, tmp); 923 felem_contract(out, tmp);
878 } 924}
879 925
880static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2) 926static void
881 { 927smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
928{
882 longfelem longtmp; 929 longfelem longtmp;
883 felem tmp; 930 felem tmp;
884 931
885 smallfelem_mul(longtmp, in1, in2); 932 smallfelem_mul(longtmp, in1, in2);
886 felem_reduce(tmp, longtmp); 933 felem_reduce(tmp, longtmp);
887 felem_contract(out, tmp); 934 felem_contract(out, tmp);
888 } 935}
889 936
890/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 937/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
891 * otherwise. 938 * otherwise.
892 * On entry: 939 * On entry:
893 * small[i] < 2^64 940 * small[i] < 2^64
894 */ 941 */
895static limb smallfelem_is_zero(const smallfelem small) 942static limb
896 { 943smallfelem_is_zero(const smallfelem small)
944{
897 limb result; 945 limb result;
898 u64 is_p; 946 u64 is_p;
899 947
@@ -908,9 +956,9 @@ static limb smallfelem_is_zero(const smallfelem small)
908 is_zero = ((s64) is_zero) >> 63; 956 is_zero = ((s64) is_zero) >> 63;
909 957
910 is_p = (small[0] ^ kPrime[0]) | 958 is_p = (small[0] ^ kPrime[0]) |
911 (small[1] ^ kPrime[1]) | 959 (small[1] ^ kPrime[1]) |
912 (small[2] ^ kPrime[2]) | 960 (small[2] ^ kPrime[2]) |
913 (small[3] ^ kPrime[3]); 961 (small[3] ^ kPrime[3]);
914 is_p--; 962 is_p--;
915 is_p &= is_p << 32; 963 is_p &= is_p << 32;
916 is_p &= is_p << 16; 964 is_p &= is_p << 16;
@@ -925,12 +973,13 @@ static limb smallfelem_is_zero(const smallfelem small)
925 result = is_zero; 973 result = is_zero;
926 result |= ((limb) is_zero) << 64; 974 result |= ((limb) is_zero) << 64;
927 return result; 975 return result;
928 } 976}
929 977
930static int smallfelem_is_zero_int(const smallfelem small) 978static int
931 { 979smallfelem_is_zero_int(const smallfelem small)
932 return (int) (smallfelem_is_zero(small) & ((limb)1)); 980{
933 } 981 return (int) (smallfelem_is_zero(small) & ((limb) 1));
982}
934 983
935/* felem_inv calculates |out| = |in|^{-1} 984/* felem_inv calculates |out| = |in|^{-1}
936 * 985 *
@@ -939,77 +988,110 @@ static int smallfelem_is_zero_int(const smallfelem small)
939 * a^{p-1} = 1 (mod p) 988 * a^{p-1} = 1 (mod p)
940 * a^{p-2} = a^{-1} (mod p) 989 * a^{p-2} = a^{-1} (mod p)
941 */ 990 */
942static void felem_inv(felem out, const felem in) 991static void
943 { 992felem_inv(felem out, const felem in)
993{
944 felem ftmp, ftmp2; 994 felem ftmp, ftmp2;
945 /* each e_I will hold |in|^{2^I - 1} */ 995 /* each e_I will hold |in|^{2^I - 1} */
946 felem e2, e4, e8, e16, e32, e64; 996 felem e2, e4, e8, e16, e32, e64;
947 longfelem tmp; 997 longfelem tmp;
948 unsigned i; 998 unsigned i;
949 999
950 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */ 1000 felem_square(tmp, in);
951 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 1001 felem_reduce(ftmp, tmp);/* 2^1 */
1002 felem_mul(tmp, in, ftmp);
1003 felem_reduce(ftmp, tmp);/* 2^2 - 2^0 */
952 felem_assign(e2, ftmp); 1004 felem_assign(e2, ftmp);
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 1005 felem_square(tmp, ftmp);
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */ 1006 felem_reduce(ftmp, tmp);/* 2^3 - 2^1 */
955 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */ 1007 felem_square(tmp, ftmp);
1008 felem_reduce(ftmp, tmp);/* 2^4 - 2^2 */
1009 felem_mul(tmp, ftmp, e2);
1010 felem_reduce(ftmp, tmp);/* 2^4 - 2^0 */
956 felem_assign(e4, ftmp); 1011 felem_assign(e4, ftmp);
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */ 1012 felem_square(tmp, ftmp);
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */ 1013 felem_reduce(ftmp, tmp);/* 2^5 - 2^1 */
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */ 1014 felem_square(tmp, ftmp);
960 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */ 1015 felem_reduce(ftmp, tmp);/* 2^6 - 2^2 */
961 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */ 1016 felem_square(tmp, ftmp);
1017 felem_reduce(ftmp, tmp);/* 2^7 - 2^3 */
1018 felem_square(tmp, ftmp);
1019 felem_reduce(ftmp, tmp);/* 2^8 - 2^4 */
1020 felem_mul(tmp, ftmp, e4);
1021 felem_reduce(ftmp, tmp);/* 2^8 - 2^0 */
962 felem_assign(e8, ftmp); 1022 felem_assign(e8, ftmp);
963 for (i = 0; i < 8; i++) { 1023 for (i = 0; i < 8; i++) {
964 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 1024 felem_square(tmp, ftmp);
965 } /* 2^16 - 2^8 */ 1025 felem_reduce(ftmp, tmp);
966 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */ 1026 } /* 2^16 - 2^8 */
1027 felem_mul(tmp, ftmp, e8);
1028 felem_reduce(ftmp, tmp);/* 2^16 - 2^0 */
967 felem_assign(e16, ftmp); 1029 felem_assign(e16, ftmp);
968 for (i = 0; i < 16; i++) { 1030 for (i = 0; i < 16; i++) {
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 1031 felem_square(tmp, ftmp);
970 } /* 2^32 - 2^16 */ 1032 felem_reduce(ftmp, tmp);
971 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */ 1033 } /* 2^32 - 2^16 */
1034 felem_mul(tmp, ftmp, e16);
1035 felem_reduce(ftmp, tmp);/* 2^32 - 2^0 */
972 felem_assign(e32, ftmp); 1036 felem_assign(e32, ftmp);
973 for (i = 0; i < 32; i++) { 1037 for (i = 0; i < 32; i++) {
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 1038 felem_square(tmp, ftmp);
975 } /* 2^64 - 2^32 */ 1039 felem_reduce(ftmp, tmp);
1040 } /* 2^64 - 2^32 */
976 felem_assign(e64, ftmp); 1041 felem_assign(e64, ftmp);
977 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */ 1042 felem_mul(tmp, ftmp, in);
1043 felem_reduce(ftmp, tmp);/* 2^64 - 2^32 + 2^0 */
978 for (i = 0; i < 192; i++) { 1044 for (i = 0; i < 192; i++) {
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 1045 felem_square(tmp, ftmp);
980 } /* 2^256 - 2^224 + 2^192 */ 1046 felem_reduce(ftmp, tmp);
1047 } /* 2^256 - 2^224 + 2^192 */
981 1048
982 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */ 1049 felem_mul(tmp, e64, e32);
1050 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
983 for (i = 0; i < 16; i++) { 1051 for (i = 0; i < 16; i++) {
984 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 1052 felem_square(tmp, ftmp2);
985 } /* 2^80 - 2^16 */ 1053 felem_reduce(ftmp2, tmp);
986 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */ 1054 } /* 2^80 - 2^16 */
1055 felem_mul(tmp, ftmp2, e16);
1056 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
987 for (i = 0; i < 8; i++) { 1057 for (i = 0; i < 8; i++) {
988 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 1058 felem_square(tmp, ftmp2);
989 } /* 2^88 - 2^8 */ 1059 felem_reduce(ftmp2, tmp);
990 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */ 1060 } /* 2^88 - 2^8 */
1061 felem_mul(tmp, ftmp2, e8);
1062 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
991 for (i = 0; i < 4; i++) { 1063 for (i = 0; i < 4; i++) {
992 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 1064 felem_square(tmp, ftmp2);
993 } /* 2^92 - 2^4 */ 1065 felem_reduce(ftmp2, tmp);
994 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */ 1066 } /* 2^92 - 2^4 */
995 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */ 1067 felem_mul(tmp, ftmp2, e4);
996 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */ 1068 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
997 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */ 1069 felem_square(tmp, ftmp2);
998 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */ 1070 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
999 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */ 1071 felem_square(tmp, ftmp2);
1000 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */ 1072 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1001 1073 felem_mul(tmp, ftmp2, e2);
1002 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ 1074 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1003 } 1075 felem_square(tmp, ftmp2);
1076 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1077 felem_square(tmp, ftmp2);
1078 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1079 felem_mul(tmp, ftmp2, in);
1080 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1081
1082 felem_mul(tmp, ftmp2, ftmp);
1083 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1084}
1004 1085
1005static void smallfelem_inv_contract(smallfelem out, const smallfelem in) 1086static void
1006 { 1087smallfelem_inv_contract(smallfelem out, const smallfelem in)
1088{
1007 felem tmp; 1089 felem tmp;
1008 1090
1009 smallfelem_expand(tmp, in); 1091 smallfelem_expand(tmp, in);
1010 felem_inv(tmp, tmp); 1092 felem_inv(tmp, tmp);
1011 felem_contract(out, tmp); 1093 felem_contract(out, tmp);
1012 } 1094}
1013 1095
1014/* Group operations 1096/* Group operations
1015 * ---------------- 1097 * ----------------
@@ -1027,8 +1109,8 @@ static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1027 * while x_out == y_in is not (maybe this works, but it's not tested). */ 1109 * while x_out == y_in is not (maybe this works, but it's not tested). */
1028static void 1110static void
1029point_double(felem x_out, felem y_out, felem z_out, 1111point_double(felem x_out, felem y_out, felem z_out,
1030 const felem x_in, const felem y_in, const felem z_in) 1112 const felem x_in, const felem y_in, const felem z_in)
1031 { 1113{
1032 longfelem tmp, tmp2; 1114 longfelem tmp, tmp2;
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2; 1115 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034 smallfelem small1, small2; 1116 smallfelem small1, small2;
@@ -1101,14 +1183,14 @@ point_double(felem x_out, felem y_out, felem z_out,
1101 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */ 1183 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1102 felem_reduce_zero105(y_out, tmp); 1184 felem_reduce_zero105(y_out, tmp);
1103 /* y_out[i] < 2^106 */ 1185 /* y_out[i] < 2^106 */
1104 } 1186}
1105 1187
1106/* point_double_small is the same as point_double, except that it operates on 1188/* point_double_small is the same as point_double, except that it operates on
1107 * smallfelems */ 1189 * smallfelems */
1108static void 1190static void
1109point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out, 1191point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1110 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in) 1192 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1111 { 1193{
1112 felem felem_x_out, felem_y_out, felem_z_out; 1194 felem felem_x_out, felem_y_out, felem_z_out;
1113 felem felem_x_in, felem_y_in, felem_z_in; 1195 felem felem_x_in, felem_y_in, felem_z_in;
1114 1196
@@ -1116,35 +1198,33 @@ point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1116 smallfelem_expand(felem_y_in, y_in); 1198 smallfelem_expand(felem_y_in, y_in);
1117 smallfelem_expand(felem_z_in, z_in); 1199 smallfelem_expand(felem_z_in, z_in);
1118 point_double(felem_x_out, felem_y_out, felem_z_out, 1200 point_double(felem_x_out, felem_y_out, felem_z_out,
1119 felem_x_in, felem_y_in, felem_z_in); 1201 felem_x_in, felem_y_in, felem_z_in);
1120 felem_shrink(x_out, felem_x_out); 1202 felem_shrink(x_out, felem_x_out);
1121 felem_shrink(y_out, felem_y_out); 1203 felem_shrink(y_out, felem_y_out);
1122 felem_shrink(z_out, felem_z_out); 1204 felem_shrink(z_out, felem_z_out);
1123 } 1205}
1124 1206
1125/* copy_conditional copies in to out iff mask is all ones. */ 1207/* copy_conditional copies in to out iff mask is all ones. */
1126static void 1208static void
1127copy_conditional(felem out, const felem in, limb mask) 1209copy_conditional(felem out, const felem in, limb mask)
1128 { 1210{
1129 unsigned i; 1211 unsigned i;
1130 for (i = 0; i < NLIMBS; ++i) 1212 for (i = 0; i < NLIMBS; ++i) {
1131 {
1132 const limb tmp = mask & (in[i] ^ out[i]); 1213 const limb tmp = mask & (in[i] ^ out[i]);
1133 out[i] ^= tmp; 1214 out[i] ^= tmp;
1134 }
1135 } 1215 }
1216}
1136 1217
1137/* copy_small_conditional copies in to out iff mask is all ones. */ 1218/* copy_small_conditional copies in to out iff mask is all ones. */
1138static void 1219static void
1139copy_small_conditional(felem out, const smallfelem in, limb mask) 1220copy_small_conditional(felem out, const smallfelem in, limb mask)
1140 { 1221{
1141 unsigned i; 1222 unsigned i;
1142 const u64 mask64 = mask; 1223 const u64 mask64 = mask;
1143 for (i = 0; i < NLIMBS; ++i) 1224 for (i = 0; i < NLIMBS; ++i) {
1144 {
1145 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask); 1225 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1146 }
1147 } 1226 }
1227}
1148 1228
1149/* point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1229/* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1150 * 1230 *
@@ -1156,10 +1236,11 @@ copy_small_conditional(felem out, const smallfelem in, limb mask)
1156 * are equal, (while not equal to the point at infinity). This case never 1236 * are equal, (while not equal to the point at infinity). This case never
1157 * happens during single point multiplication, so there is no timing leak for 1237 * happens during single point multiplication, so there is no timing leak for
1158 * ECDH or ECDSA signing. */ 1238 * ECDH or ECDSA signing. */
1159static void point_add(felem x3, felem y3, felem z3, 1239static void
1160 const felem x1, const felem y1, const felem z1, 1240point_add(felem x3, felem y3, felem z3,
1161 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2) 1241 const felem x1, const felem y1, const felem z1,
1162 { 1242 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1243{
1163 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1244 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1164 longfelem tmp, tmp2; 1245 longfelem tmp, tmp2;
1165 smallfelem small1, small2, small3, small4, small5; 1246 smallfelem small1, small2, small3, small4, small5;
@@ -1176,8 +1257,7 @@ static void point_add(felem x3, felem y3, felem z3,
1176 /* ftmp[i] < 2^101 */ 1257 /* ftmp[i] < 2^101 */
1177 felem_shrink(small1, ftmp); 1258 felem_shrink(small1, ftmp);
1178 1259
1179 if(!mixed) 1260 if (!mixed) {
1180 {
1181 /* ftmp2 = z2z2 = z2**2 */ 1261 /* ftmp2 = z2z2 = z2**2 */
1182 smallfelem_square(tmp, z2); 1262 smallfelem_square(tmp, z2);
1183 felem_reduce(ftmp2, tmp); 1263 felem_reduce(ftmp2, tmp);
@@ -1213,9 +1293,7 @@ static void point_add(felem x3, felem y3, felem z3,
1213 felem_mul(tmp, y1, ftmp2); 1293 felem_mul(tmp, y1, ftmp2);
1214 felem_reduce(ftmp6, tmp); 1294 felem_reduce(ftmp6, tmp);
1215 /* ftmp6[i] < 2^101 */ 1295 /* ftmp6[i] < 2^101 */
1216 } 1296 } else {
1217 else
1218 {
1219 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */ 1297 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1220 1298
1221 /* u1 = ftmp3 = x1*z2z2 */ 1299 /* u1 = ftmp3 = x1*z2z2 */
@@ -1230,7 +1308,7 @@ static void point_add(felem x3, felem y3, felem z3,
1230 /* s1 = ftmp2 = y1 * z2**3 */ 1308 /* s1 = ftmp2 = y1 * z2**3 */
1231 felem_assign(ftmp6, y1); 1309 felem_assign(ftmp6, y1);
1232 /* ftmp6[i] < 2^106 */ 1310 /* ftmp6[i] < 2^106 */
1233 } 1311 }
1234 1312
1235 /* u2 = x2*z1z1 */ 1313 /* u2 = x2*z1z1 */
1236 smallfelem_mul(tmp, x2, small1); 1314 smallfelem_mul(tmp, x2, small1);
@@ -1258,18 +1336,16 @@ static void point_add(felem x3, felem y3, felem z3,
1258 1336
1259 /* r = ftmp5 = (s2 - s1)*2 */ 1337 /* r = ftmp5 = (s2 - s1)*2 */
1260 felem_diff_zero107(ftmp5, ftmp6); 1338 felem_diff_zero107(ftmp5, ftmp6);
1261 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/ 1339 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1262 felem_scalar(ftmp5, 2); 1340 felem_scalar(ftmp5, 2);
1263 /* ftmp5[i] < 2^109 */ 1341 /* ftmp5[i] < 2^109 */
1264 felem_shrink(small1, ftmp5); 1342 felem_shrink(small1, ftmp5);
1265 y_equal = smallfelem_is_zero(small1); 1343 y_equal = smallfelem_is_zero(small1);
1266 1344
1267 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) 1345 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1268 {
1269 point_double(x3, y3, z3, x1, y1, z1); 1346 point_double(x3, y3, z3, x1, y1, z1);
1270 return; 1347 return;
1271 } 1348 }
1272
1273 /* I = ftmp = (2h)**2 */ 1349 /* I = ftmp = (2h)**2 */
1274 felem_assign(ftmp, ftmp4); 1350 felem_assign(ftmp, ftmp4);
1275 felem_scalar(ftmp, 2); 1351 felem_scalar(ftmp, 2);
@@ -1316,14 +1392,15 @@ static void point_add(felem x3, felem y3, felem z3,
1316 felem_assign(x3, x_out); 1392 felem_assign(x3, x_out);
1317 felem_assign(y3, y_out); 1393 felem_assign(y3, y_out);
1318 felem_assign(z3, z_out); 1394 felem_assign(z3, z_out);
1319 } 1395}
1320 1396
1321/* point_add_small is the same as point_add, except that it operates on 1397/* point_add_small is the same as point_add, except that it operates on
1322 * smallfelems */ 1398 * smallfelems */
1323static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3, 1399static void
1324 smallfelem x1, smallfelem y1, smallfelem z1, 1400point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1325 smallfelem x2, smallfelem y2, smallfelem z2) 1401 smallfelem x1, smallfelem y1, smallfelem z1,
1326 { 1402 smallfelem x2, smallfelem y2, smallfelem z2)
1403{
1327 felem felem_x3, felem_y3, felem_z3; 1404 felem felem_x3, felem_y3, felem_z3;
1328 felem felem_x1, felem_y1, felem_z1; 1405 felem felem_x1, felem_y1, felem_z1;
1329 smallfelem_expand(felem_x1, x1); 1406 smallfelem_expand(felem_x1, x1);
@@ -1333,7 +1410,7 @@ static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1333 felem_shrink(x3, felem_x3); 1410 felem_shrink(x3, felem_x3);
1334 felem_shrink(y3, felem_y3); 1411 felem_shrink(y3, felem_y3);
1335 felem_shrink(z3, felem_z3); 1412 felem_shrink(z3, felem_z3);
1336 } 1413}
1337 1414
1338/* Base point pre computation 1415/* Base point pre computation
1339 * -------------------------- 1416 * --------------------------
@@ -1373,113 +1450,113 @@ static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1373/* gmul is the table of precomputed base points */ 1450/* gmul is the table of precomputed base points */
1374static const smallfelem gmul[2][16][3] = 1451static const smallfelem gmul[2][16][3] =
1375{{{{0, 0, 0, 0}, 1452{{{{0, 0, 0, 0},
1376 {0, 0, 0, 0}, 1453{0, 0, 0, 0},
1377 {0, 0, 0, 0}}, 1454{0, 0, 0, 0}},
1378 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247}, 1455{{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1379 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b}, 1456{0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1380 {1, 0, 0, 0}}, 1457{1, 0, 0, 0}},
1381 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5}, 1458{{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1382 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d}, 1459{0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1383 {1, 0, 0, 0}}, 1460{1, 0, 0, 0}},
1384 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f}, 1461{{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1385 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644}, 1462{0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1386 {1, 0, 0, 0}}, 1463{1, 0, 0, 0}},
1387 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67}, 1464{{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1388 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee}, 1465{0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1389 {1, 0, 0, 0}}, 1466{1, 0, 0, 0}},
1390 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff}, 1467{{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1391 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b}, 1468{0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1392 {1, 0, 0, 0}}, 1469{1, 0, 0, 0}},
1393 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8}, 1470{{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1394 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851}, 1471{0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1395 {1, 0, 0, 0}}, 1472{1, 0, 0, 0}},
1396 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea}, 1473{{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1397 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b}, 1474{0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1398 {1, 0, 0, 0}}, 1475{1, 0, 0, 0}},
1399 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276}, 1476{{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1400 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816}, 1477{0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1401 {1, 0, 0, 0}}, 1478{1, 0, 0, 0}},
1402 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad}, 1479{{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1403 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663}, 1480{0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1404 {1, 0, 0, 0}}, 1481{1, 0, 0, 0}},
1405 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d}, 1482{{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1406 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321}, 1483{0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1407 {1, 0, 0, 0}}, 1484{1, 0, 0, 0}},
1408 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287}, 1485{{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1409 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6}, 1486{0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1410 {1, 0, 0, 0}}, 1487{1, 0, 0, 0}},
1411 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466}, 1488{{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1412 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20}, 1489{0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1413 {1, 0, 0, 0}}, 1490{1, 0, 0, 0}},
1414 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9}, 1491{{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1415 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61}, 1492{0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1416 {1, 0, 0, 0}}, 1493{1, 0, 0, 0}},
1417 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a}, 1494{{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1418 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc}, 1495{0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1419 {1, 0, 0, 0}}, 1496{1, 0, 0, 0}},
1420 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c}, 1497{{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1421 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab}, 1498{0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1422 {1, 0, 0, 0}}}, 1499{1, 0, 0, 0}}},
1423 {{{0, 0, 0, 0}, 1500{{{0, 0, 0, 0},
1424 {0, 0, 0, 0}, 1501{0, 0, 0, 0},
1425 {0, 0, 0, 0}}, 1502{0, 0, 0, 0}},
1426 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89}, 1503{{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1427 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624}, 1504{0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1428 {1, 0, 0, 0}}, 1505{1, 0, 0, 0}},
1429 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6}, 1506{{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1430 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1}, 1507{0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1431 {1, 0, 0, 0}}, 1508{1, 0, 0, 0}},
1432 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a}, 1509{{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1433 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593}, 1510{0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1434 {1, 0, 0, 0}}, 1511{1, 0, 0, 0}},
1435 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617}, 1512{{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1436 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7}, 1513{0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1437 {1, 0, 0, 0}}, 1514{1, 0, 0, 0}},
1438 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276}, 1515{{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1439 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a}, 1516{0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1440 {1, 0, 0, 0}}, 1517{1, 0, 0, 0}},
1441 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908}, 1518{{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1442 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e}, 1519{0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1443 {1, 0, 0, 0}}, 1520{1, 0, 0, 0}},
1444 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7}, 1521{{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1445 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec}, 1522{0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1446 {1, 0, 0, 0}}, 1523{1, 0, 0, 0}},
1447 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee}, 1524{{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1448 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6}, 1525{0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1449 {1, 0, 0, 0}}, 1526{1, 0, 0, 0}},
1450 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109}, 1527{{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1451 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5}, 1528{0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1452 {1, 0, 0, 0}}, 1529{1, 0, 0, 0}},
1453 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba}, 1530{{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1454 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44}, 1531{0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1455 {1, 0, 0, 0}}, 1532{1, 0, 0, 0}},
1456 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b}, 1533{{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1457 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc}, 1534{0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1458 {1, 0, 0, 0}}, 1535{1, 0, 0, 0}},
1459 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107}, 1536{{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1460 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387}, 1537{0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1461 {1, 0, 0, 0}}, 1538{1, 0, 0, 0}},
1462 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503}, 1539{{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1463 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be}, 1540{0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1464 {1, 0, 0, 0}}, 1541{1, 0, 0, 0}},
1465 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9}, 1542{{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1466 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a}, 1543{0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1467 {1, 0, 0, 0}}, 1544{1, 0, 0, 0}},
1468 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6}, 1545{{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1469 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81}, 1546{0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1470 {1, 0, 0, 0}}}}; 1547{1, 0, 0, 0}}}};
1471 1548
1472/* select_point selects the |idx|th point from a precomputation table and 1549/* select_point selects the |idx|th point from a precomputation table and
1473 * copies it to out. */ 1550 * copies it to out. */
1474static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3]) 1551static void
1475 { 1552select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1553{
1476 unsigned i, j; 1554 unsigned i, j;
1477 u64 *outlimbs = &out[0][0]; 1555 u64 *outlimbs = &out[0][0];
1478 memset(outlimbs, 0, 3 * sizeof(smallfelem)); 1556 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1479 1557
1480 for (i = 0; i < size; i++) 1558 for (i = 0; i < size; i++) {
1481 { 1559 const u64 *inlimbs = (u64 *) & pre_comp[i][0][0];
1482 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1483 u64 mask = i ^ idx; 1560 u64 mask = i ^ idx;
1484 mask |= mask >> 4; 1561 mask |= mask >> 4;
1485 mask |= mask >> 2; 1562 mask |= mask >> 2;
@@ -1488,26 +1565,28 @@ static void select_point(const u64 idx, unsigned int size, const smallfelem pre_
1488 mask--; 1565 mask--;
1489 for (j = 0; j < NLIMBS * 3; j++) 1566 for (j = 0; j < NLIMBS * 3; j++)
1490 outlimbs[j] |= inlimbs[j] & mask; 1567 outlimbs[j] |= inlimbs[j] & mask;
1491 }
1492 } 1568 }
1569}
1493 1570
1494/* get_bit returns the |i|th bit in |in| */ 1571/* get_bit returns the |i|th bit in |in| */
1495static char get_bit(const felem_bytearray in, int i) 1572static char
1496 { 1573get_bit(const felem_bytearray in, int i)
1574{
1497 if ((i < 0) || (i >= 256)) 1575 if ((i < 0) || (i >= 256))
1498 return 0; 1576 return 0;
1499 return (in[i >> 3] >> (i & 7)) & 1; 1577 return (in[i >> 3] >> (i & 7)) & 1;
1500 } 1578}
1501 1579
1502/* Interleaved point multiplication using precomputed point multiples: 1580/* Interleaved point multiplication using precomputed point multiples:
1503 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], 1581 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1504 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple 1582 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1505 * of the generator, using certain (large) precomputed multiples in g_pre_comp. 1583 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1506 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */ 1584 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1507static void batch_mul(felem x_out, felem y_out, felem z_out, 1585static void
1508 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar, 1586batch_mul(felem x_out, felem y_out, felem z_out,
1509 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3]) 1587 const felem_bytearray scalars[], const unsigned num_points, const u8 * g_scalar,
1510 { 1588 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1589{
1511 int i, skip; 1590 int i, skip;
1512 unsigned num, gen_mul = (g_scalar != NULL); 1591 unsigned num, gen_mul = (g_scalar != NULL);
1513 felem nq[3], ftmp; 1592 felem nq[3], ftmp;
@@ -1518,20 +1597,20 @@ static void batch_mul(felem x_out, felem y_out, felem z_out,
1518 /* set nq to the point at infinity */ 1597 /* set nq to the point at infinity */
1519 memset(nq, 0, 3 * sizeof(felem)); 1598 memset(nq, 0, 3 * sizeof(felem));
1520 1599
1521 /* Loop over all scalars msb-to-lsb, interleaving additions 1600 /*
1522 * of multiples of the generator (two in each of the last 32 rounds) 1601 * Loop over all scalars msb-to-lsb, interleaving additions of
1523 * and additions of other points multiples (every 5th round). 1602 * multiples of the generator (two in each of the last 32 rounds) and
1603 * additions of other points multiples (every 5th round).
1524 */ 1604 */
1525 skip = 1; /* save two point operations in the first round */ 1605 skip = 1; /* save two point operations in the first
1526 for (i = (num_points ? 255 : 31); i >= 0; --i) 1606 * round */
1527 { 1607 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1528 /* double */ 1608 /* double */
1529 if (!skip) 1609 if (!skip)
1530 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1610 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1531 1611
1532 /* add multiples of the generator */ 1612 /* add multiples of the generator */
1533 if (gen_mul && (i <= 31)) 1613 if (gen_mul && (i <= 31)) {
1534 {
1535 /* first, look 32 bits upwards */ 1614 /* first, look 32 bits upwards */
1536 bits = get_bit(g_scalar, i + 224) << 3; 1615 bits = get_bit(g_scalar, i + 224) << 3;
1537 bits |= get_bit(g_scalar, i + 160) << 2; 1616 bits |= get_bit(g_scalar, i + 160) << 2;
@@ -1540,19 +1619,16 @@ static void batch_mul(felem x_out, felem y_out, felem z_out,
1540 /* select the point to add, in constant time */ 1619 /* select the point to add, in constant time */
1541 select_point(bits, 16, g_pre_comp[1], tmp); 1620 select_point(bits, 16, g_pre_comp[1], tmp);
1542 1621
1543 if (!skip) 1622 if (!skip) {
1544 {
1545 point_add(nq[0], nq[1], nq[2], 1623 point_add(nq[0], nq[1], nq[2],
1546 nq[0], nq[1], nq[2], 1624 nq[0], nq[1], nq[2],
1547 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1625 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1548 } 1626 } else {
1549 else
1550 {
1551 smallfelem_expand(nq[0], tmp[0]); 1627 smallfelem_expand(nq[0], tmp[0]);
1552 smallfelem_expand(nq[1], tmp[1]); 1628 smallfelem_expand(nq[1], tmp[1]);
1553 smallfelem_expand(nq[2], tmp[2]); 1629 smallfelem_expand(nq[2], tmp[2]);
1554 skip = 0; 1630 skip = 0;
1555 } 1631 }
1556 1632
1557 /* second, look at the current position */ 1633 /* second, look at the current position */
1558 bits = get_bit(g_scalar, i + 192) << 3; 1634 bits = get_bit(g_scalar, i + 192) << 3;
@@ -1562,16 +1638,13 @@ static void batch_mul(felem x_out, felem y_out, felem z_out,
1562 /* select the point to add, in constant time */ 1638 /* select the point to add, in constant time */
1563 select_point(bits, 16, g_pre_comp[0], tmp); 1639 select_point(bits, 16, g_pre_comp[0], tmp);
1564 point_add(nq[0], nq[1], nq[2], 1640 point_add(nq[0], nq[1], nq[2],
1565 nq[0], nq[1], nq[2], 1641 nq[0], nq[1], nq[2],
1566 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1642 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1567 } 1643 }
1568
1569 /* do other additions every 5 doublings */ 1644 /* do other additions every 5 doublings */
1570 if (num_points && (i % 5 == 0)) 1645 if (num_points && (i % 5 == 0)) {
1571 {
1572 /* loop over all scalars */ 1646 /* loop over all scalars */
1573 for (num = 0; num < num_points; ++num) 1647 for (num = 0; num < num_points; ++num) {
1574 {
1575 bits = get_bit(scalars[num], i + 4) << 5; 1648 bits = get_bit(scalars[num], i + 4) << 5;
1576 bits |= get_bit(scalars[num], i + 3) << 4; 1649 bits |= get_bit(scalars[num], i + 3) << 4;
1577 bits |= get_bit(scalars[num], i + 2) << 3; 1650 bits |= get_bit(scalars[num], i + 2) << 3;
@@ -1580,32 +1653,33 @@ static void batch_mul(felem x_out, felem y_out, felem z_out,
1580 bits |= get_bit(scalars[num], i - 1); 1653 bits |= get_bit(scalars[num], i - 1);
1581 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1654 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1582 1655
1583 /* select the point to add or subtract, in constant time */ 1656 /*
1657 * select the point to add or subtract, in
1658 * constant time
1659 */
1584 select_point(digit, 17, pre_comp[num], tmp); 1660 select_point(digit, 17, pre_comp[num], tmp);
1585 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */ 1661 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the
1662 * negative point */
1586 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1)); 1663 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1587 felem_contract(tmp[1], ftmp); 1664 felem_contract(tmp[1], ftmp);
1588 1665
1589 if (!skip) 1666 if (!skip) {
1590 {
1591 point_add(nq[0], nq[1], nq[2], 1667 point_add(nq[0], nq[1], nq[2],
1592 nq[0], nq[1], nq[2], 1668 nq[0], nq[1], nq[2],
1593 mixed, tmp[0], tmp[1], tmp[2]); 1669 mixed, tmp[0], tmp[1], tmp[2]);
1594 } 1670 } else {
1595 else
1596 {
1597 smallfelem_expand(nq[0], tmp[0]); 1671 smallfelem_expand(nq[0], tmp[0]);
1598 smallfelem_expand(nq[1], tmp[1]); 1672 smallfelem_expand(nq[1], tmp[1]);
1599 smallfelem_expand(nq[2], tmp[2]); 1673 smallfelem_expand(nq[2], tmp[2]);
1600 skip = 0; 1674 skip = 0;
1601 }
1602 } 1675 }
1603 } 1676 }
1604 } 1677 }
1678 }
1605 felem_assign(x_out, nq[0]); 1679 felem_assign(x_out, nq[0]);
1606 felem_assign(y_out, nq[1]); 1680 felem_assign(y_out, nq[1]);
1607 felem_assign(z_out, nq[2]); 1681 felem_assign(z_out, nq[2]);
1608 } 1682}
1609 1683
1610/* Precomputation for the group generator. */ 1684/* Precomputation for the group generator. */
1611typedef struct { 1685typedef struct {
@@ -1627,20 +1701,20 @@ EC_GFp_nistp256_method(void)
1627 .group_get_curve = ec_GFp_simple_group_get_curve, 1701 .group_get_curve = ec_GFp_simple_group_get_curve,
1628 .group_get_degree = ec_GFp_simple_group_get_degree, 1702 .group_get_degree = ec_GFp_simple_group_get_degree,
1629 .group_check_discriminant = 1703 .group_check_discriminant =
1630 ec_GFp_simple_group_check_discriminant, 1704 ec_GFp_simple_group_check_discriminant,
1631 .point_init = ec_GFp_simple_point_init, 1705 .point_init = ec_GFp_simple_point_init,
1632 .point_finish = ec_GFp_simple_point_finish, 1706 .point_finish = ec_GFp_simple_point_finish,
1633 .point_clear_finish = ec_GFp_simple_point_clear_finish, 1707 .point_clear_finish = ec_GFp_simple_point_clear_finish,
1634 .point_copy = ec_GFp_simple_point_copy, 1708 .point_copy = ec_GFp_simple_point_copy,
1635 .point_set_to_infinity = ec_GFp_simple_point_set_to_infinity, 1709 .point_set_to_infinity = ec_GFp_simple_point_set_to_infinity,
1636 .point_set_Jprojective_coordinates_GFp = 1710 .point_set_Jprojective_coordinates_GFp =
1637 ec_GFp_simple_set_Jprojective_coordinates_GFp, 1711 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1638 .point_get_Jprojective_coordinates_GFp = 1712 .point_get_Jprojective_coordinates_GFp =
1639 ec_GFp_simple_get_Jprojective_coordinates_GFp, 1713 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1640 .point_set_affine_coordinates = 1714 .point_set_affine_coordinates =
1641 ec_GFp_simple_point_set_affine_coordinates, 1715 ec_GFp_simple_point_set_affine_coordinates,
1642 .point_get_affine_coordinates = 1716 .point_get_affine_coordinates =
1643 ec_GFp_nistp256_point_get_affine_coordinates, 1717 ec_GFp_nistp256_point_get_affine_coordinates,
1644 .add = ec_GFp_simple_add, 1718 .add = ec_GFp_simple_add,
1645 .dbl = ec_GFp_simple_dbl, 1719 .dbl = ec_GFp_simple_dbl,
1646 .invert = ec_GFp_simple_invert, 1720 .invert = ec_GFp_simple_invert,
@@ -1663,32 +1737,34 @@ EC_GFp_nistp256_method(void)
1663/* FUNCTIONS TO MANAGE PRECOMPUTATION 1737/* FUNCTIONS TO MANAGE PRECOMPUTATION
1664 */ 1738 */
1665 1739
1666static NISTP256_PRE_COMP *nistp256_pre_comp_new() 1740static NISTP256_PRE_COMP *
1667 { 1741nistp256_pre_comp_new()
1742{
1668 NISTP256_PRE_COMP *ret = NULL; 1743 NISTP256_PRE_COMP *ret = NULL;
1669 ret = (NISTP256_PRE_COMP *) malloc(sizeof *ret); 1744 ret = (NISTP256_PRE_COMP *) malloc(sizeof *ret);
1670 if (!ret) 1745 if (!ret) {
1671 {
1672 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1746 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1673 return ret; 1747 return ret;
1674 } 1748 }
1675 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1749 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1676 ret->references = 1; 1750 ret->references = 1;
1677 return ret; 1751 return ret;
1678 } 1752}
1679 1753
1680static void *nistp256_pre_comp_dup(void *src_) 1754static void *
1681 { 1755nistp256_pre_comp_dup(void *src_)
1756{
1682 NISTP256_PRE_COMP *src = src_; 1757 NISTP256_PRE_COMP *src = src_;
1683 1758
1684 /* no need to actually copy, these objects never change! */ 1759 /* no need to actually copy, these objects never change! */
1685 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1760 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1686 1761
1687 return src_; 1762 return src_;
1688 } 1763}
1689 1764
1690static void nistp256_pre_comp_free(void *pre_) 1765static void
1691 { 1766nistp256_pre_comp_free(void *pre_)
1767{
1692 int i; 1768 int i;
1693 NISTP256_PRE_COMP *pre = pre_; 1769 NISTP256_PRE_COMP *pre = pre_;
1694 1770
@@ -1700,10 +1776,11 @@ static void nistp256_pre_comp_free(void *pre_)
1700 return; 1776 return;
1701 1777
1702 free(pre); 1778 free(pre);
1703 } 1779}
1704 1780
1705static void nistp256_pre_comp_clear_free(void *pre_) 1781static void
1706 { 1782nistp256_pre_comp_clear_free(void *pre_)
1783{
1707 int i; 1784 int i;
1708 NISTP256_PRE_COMP *pre = pre_; 1785 NISTP256_PRE_COMP *pre = pre_;
1709 1786
@@ -1716,43 +1793,46 @@ static void nistp256_pre_comp_clear_free(void *pre_)
1716 1793
1717 OPENSSL_cleanse(pre, sizeof *pre); 1794 OPENSSL_cleanse(pre, sizeof *pre);
1718 free(pre); 1795 free(pre);
1719 } 1796}
1720 1797
1721/******************************************************************************/ 1798/******************************************************************************/
1722/* OPENSSL EC_METHOD FUNCTIONS 1799/* OPENSSL EC_METHOD FUNCTIONS
1723 */ 1800 */
1724 1801
1725int ec_GFp_nistp256_group_init(EC_GROUP *group) 1802int
1726 { 1803ec_GFp_nistp256_group_init(EC_GROUP * group)
1804{
1727 int ret; 1805 int ret;
1728 ret = ec_GFp_simple_group_init(group); 1806 ret = ec_GFp_simple_group_init(group);
1729 group->a_is_minus3 = 1; 1807 group->a_is_minus3 = 1;
1730 return ret; 1808 return ret;
1731 } 1809}
1732 1810
1733int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1811int
1734 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 1812ec_GFp_nistp256_group_set_curve(EC_GROUP * group, const BIGNUM * p,
1735 { 1813 const BIGNUM * a, const BIGNUM * b, BN_CTX * ctx)
1814{
1736 int ret = 0; 1815 int ret = 0;
1737 BN_CTX *new_ctx = NULL; 1816 BN_CTX *new_ctx = NULL;
1738 BIGNUM *curve_p, *curve_a, *curve_b; 1817 BIGNUM *curve_p, *curve_a, *curve_b;
1739 1818
1740 if (ctx == NULL) 1819 if (ctx == NULL)
1741 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1820 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1821 return 0;
1742 BN_CTX_start(ctx); 1822 BN_CTX_start(ctx);
1743 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1823 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1744 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1824 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1745 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err; 1825 ((curve_b = BN_CTX_get(ctx)) == NULL))
1826 goto err;
1746 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p); 1827 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1747 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a); 1828 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1748 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b); 1829 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1749 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || 1830 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1750 (BN_cmp(curve_b, b))) 1831 (BN_cmp(curve_b, b))) {
1751 {
1752 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE, 1832 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1753 EC_R_WRONG_CURVE_PARAMETERS); 1833 EC_R_WRONG_CURVE_PARAMETERS);
1754 goto err; 1834 goto err;
1755 } 1835 }
1756 group->field_mod_func = BN_nist_mod_256; 1836 group->field_mod_func = BN_nist_mod_256;
1757 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1837 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1758err: 1838err:
@@ -1760,76 +1840,82 @@ err:
1760 if (new_ctx != NULL) 1840 if (new_ctx != NULL)
1761 BN_CTX_free(new_ctx); 1841 BN_CTX_free(new_ctx);
1762 return ret; 1842 return ret;
1763 } 1843}
1764 1844
1765/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns 1845/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1766 * (X', Y') = (X/Z^2, Y/Z^3) */ 1846 * (X', Y') = (X/Z^2, Y/Z^3) */
1767int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group, 1847int
1768 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) 1848ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP * group,
1769 { 1849 const EC_POINT * point, BIGNUM * x, BIGNUM * y, BN_CTX * ctx)
1850{
1770 felem z1, z2, x_in, y_in; 1851 felem z1, z2, x_in, y_in;
1771 smallfelem x_out, y_out; 1852 smallfelem x_out, y_out;
1772 longfelem tmp; 1853 longfelem tmp;
1773 1854
1774 if (EC_POINT_is_at_infinity(group, point)) 1855 if (EC_POINT_is_at_infinity(group, point)) {
1775 {
1776 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1856 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1777 EC_R_POINT_AT_INFINITY); 1857 EC_R_POINT_AT_INFINITY);
1778 return 0; 1858 return 0;
1779 } 1859 }
1780 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1860 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1781 (!BN_to_felem(z1, &point->Z))) return 0; 1861 (!BN_to_felem(z1, &point->Z)))
1862 return 0;
1782 felem_inv(z2, z1); 1863 felem_inv(z2, z1);
1783 felem_square(tmp, z2); felem_reduce(z1, tmp); 1864 felem_square(tmp, z2);
1784 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp); 1865 felem_reduce(z1, tmp);
1866 felem_mul(tmp, x_in, z1);
1867 felem_reduce(x_in, tmp);
1785 felem_contract(x_out, x_in); 1868 felem_contract(x_out, x_in);
1786 if (x != NULL) 1869 if (x != NULL) {
1787 {
1788 if (!smallfelem_to_BN(x, x_out)) { 1870 if (!smallfelem_to_BN(x, x_out)) {
1789 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1871 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1790 ERR_R_BN_LIB); 1872 ERR_R_BN_LIB);
1791 return 0; 1873 return 0;
1792 }
1793 } 1874 }
1794 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp); 1875 }
1795 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp); 1876 felem_mul(tmp, z1, z2);
1877 felem_reduce(z1, tmp);
1878 felem_mul(tmp, y_in, z1);
1879 felem_reduce(y_in, tmp);
1796 felem_contract(y_out, y_in); 1880 felem_contract(y_out, y_in);
1797 if (y != NULL) 1881 if (y != NULL) {
1798 { 1882 if (!smallfelem_to_BN(y, y_out)) {
1799 if (!smallfelem_to_BN(y, y_out))
1800 {
1801 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1883 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1802 ERR_R_BN_LIB); 1884 ERR_R_BN_LIB);
1803 return 0; 1885 return 0;
1804 }
1805 } 1886 }
1806 return 1;
1807 } 1887 }
1888 return 1;
1889}
1808 1890
1809static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */]) 1891static void
1810 { 1892make_points_affine(size_t num, smallfelem points[ /* num */ ][3], smallfelem tmp_smallfelems[ /* num+1 */ ])
1811 /* Runs in constant time, unless an input is the point at infinity 1893{
1812 * (which normally shouldn't happen). */ 1894 /*
1895 * Runs in constant time, unless an input is the point at infinity
1896 * (which normally shouldn't happen).
1897 */
1813 ec_GFp_nistp_points_make_affine_internal( 1898 ec_GFp_nistp_points_make_affine_internal(
1814 num, 1899 num,
1815 points, 1900 points,
1816 sizeof(smallfelem), 1901 sizeof(smallfelem),
1817 tmp_smallfelems, 1902 tmp_smallfelems,
1818 (void (*)(void *)) smallfelem_one, 1903 (void (*) (void *)) smallfelem_one,
1819 (int (*)(const void *)) smallfelem_is_zero_int, 1904 (int (*) (const void *)) smallfelem_is_zero_int,
1820 (void (*)(void *, const void *)) smallfelem_assign, 1905 (void (*) (void *, const void *)) smallfelem_assign,
1821 (void (*)(void *, const void *)) smallfelem_square_contract, 1906 (void (*) (void *, const void *)) smallfelem_square_contract,
1822 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract, 1907 (void (*) (void *, const void *, const void *)) smallfelem_mul_contract,
1823 (void (*)(void *, const void *)) smallfelem_inv_contract, 1908 (void (*) (void *, const void *)) smallfelem_inv_contract,
1824 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */); 1909 (void (*) (void *, const void *)) smallfelem_assign /* nothing to contract */ );
1825 } 1910}
1826 1911
1827/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values 1912/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1828 * Result is stored in r (r can equal one of the inputs). */ 1913 * Result is stored in r (r can equal one of the inputs). */
1829int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r, 1914int
1830 const BIGNUM *scalar, size_t num, const EC_POINT *points[], 1915ec_GFp_nistp256_points_mul(const EC_GROUP * group, EC_POINT * r,
1831 const BIGNUM *scalars[], BN_CTX *ctx) 1916 const BIGNUM * scalar, size_t num, const EC_POINT * points[],
1832 { 1917 const BIGNUM * scalars[], BN_CTX * ctx)
1918{
1833 int ret = 0; 1919 int ret = 0;
1834 int j; 1920 int j;
1835 int mixed = 0; 1921 int mixed = 0;
@@ -1837,7 +1923,7 @@ int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1837 BIGNUM *x, *y, *z, *tmp_scalar; 1923 BIGNUM *x, *y, *z, *tmp_scalar;
1838 felem_bytearray g_secret; 1924 felem_bytearray g_secret;
1839 felem_bytearray *secrets = NULL; 1925 felem_bytearray *secrets = NULL;
1840 smallfelem (*pre_comp)[17][3] = NULL; 1926 smallfelem(*pre_comp)[17][3] = NULL;
1841 smallfelem *tmp_smallfelems = NULL; 1927 smallfelem *tmp_smallfelems = NULL;
1842 felem_bytearray tmp; 1928 felem_bytearray tmp;
1843 unsigned i, num_bytes; 1929 unsigned i, num_bytes;
@@ -1846,28 +1932,28 @@ int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1846 smallfelem x_in, y_in, z_in; 1932 smallfelem x_in, y_in, z_in;
1847 felem x_out, y_out, z_out; 1933 felem x_out, y_out, z_out;
1848 NISTP256_PRE_COMP *pre = NULL; 1934 NISTP256_PRE_COMP *pre = NULL;
1849 const smallfelem (*g_pre_comp)[16][3] = NULL; 1935 const smallfelem(*g_pre_comp)[16][3] = NULL;
1850 EC_POINT *generator = NULL; 1936 EC_POINT *generator = NULL;
1851 const EC_POINT *p = NULL; 1937 const EC_POINT *p = NULL;
1852 const BIGNUM *p_scalar = NULL; 1938 const BIGNUM *p_scalar = NULL;
1853 1939
1854 if (ctx == NULL) 1940 if (ctx == NULL)
1855 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1941 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1942 return 0;
1856 BN_CTX_start(ctx); 1943 BN_CTX_start(ctx);
1857 if (((x = BN_CTX_get(ctx)) == NULL) || 1944 if (((x = BN_CTX_get(ctx)) == NULL) ||
1858 ((y = BN_CTX_get(ctx)) == NULL) || 1945 ((y = BN_CTX_get(ctx)) == NULL) ||
1859 ((z = BN_CTX_get(ctx)) == NULL) || 1946 ((z = BN_CTX_get(ctx)) == NULL) ||
1860 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1947 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1861 goto err; 1948 goto err;
1862 1949
1863 if (scalar != NULL) 1950 if (scalar != NULL) {
1864 {
1865 pre = EC_EX_DATA_get_data(group->extra_data, 1951 pre = EC_EX_DATA_get_data(group->extra_data,
1866 nistp256_pre_comp_dup, nistp256_pre_comp_free, 1952 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1867 nistp256_pre_comp_clear_free); 1953 nistp256_pre_comp_clear_free);
1868 if (pre) 1954 if (pre)
1869 /* we have precomputation, try to use it */ 1955 /* we have precomputation, try to use it */
1870 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp; 1956 g_pre_comp = (const smallfelem(*)[16][3]) pre->g_pre_comp;
1871 else 1957 else
1872 /* try to use the standard precomputation */ 1958 /* try to use the standard precomputation */
1873 g_pre_comp = &gmul[0]; 1959 g_pre_comp = &gmul[0];
@@ -1876,147 +1962,140 @@ int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1876 goto err; 1962 goto err;
1877 /* get the generator from precomputation */ 1963 /* get the generator from precomputation */
1878 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) || 1964 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1879 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) || 1965 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1880 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) 1966 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
1881 {
1882 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 1967 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1883 goto err; 1968 goto err;
1884 } 1969 }
1885 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1970 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1886 generator, x, y, z, ctx)) 1971 generator, x, y, z, ctx))
1887 goto err; 1972 goto err;
1888 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1973 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1889 /* precomputation matches generator */ 1974 /* precomputation matches generator */
1890 have_pre_comp = 1; 1975 have_pre_comp = 1;
1891 else 1976 else
1892 /* we don't have valid precomputation: 1977 /*
1893 * treat the generator as a random point */ 1978 * we don't have valid precomputation: treat the
1979 * generator as a random point
1980 */
1894 num_points++; 1981 num_points++;
1895 } 1982 }
1896 if (num_points > 0) 1983 if (num_points > 0) {
1897 { 1984 if (num_points >= 3) {
1898 if (num_points >= 3) 1985 /*
1899 { 1986 * unless we precompute multiples for just one or two
1900 /* unless we precompute multiples for just one or two points, 1987 * points, converting those into affine form is time
1901 * converting those into affine form is time well spent */ 1988 * well spent
1989 */
1902 mixed = 1; 1990 mixed = 1;
1903 } 1991 }
1904 secrets = malloc(num_points * sizeof(felem_bytearray)); 1992 secrets = malloc(num_points * sizeof(felem_bytearray));
1905 pre_comp = malloc(num_points * 17 * 3 * sizeof(smallfelem)); 1993 pre_comp = malloc(num_points * 17 * 3 * sizeof(smallfelem));
1906 if (mixed) 1994 if (mixed)
1907 tmp_smallfelems = malloc((num_points * 17 + 1) * sizeof(smallfelem)); 1995 tmp_smallfelems = malloc((num_points * 17 + 1) * sizeof(smallfelem));
1908 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL))) 1996 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL))) {
1909 {
1910 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1997 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1911 goto err; 1998 goto err;
1912 } 1999 }
1913 2000 /*
1914 /* we treat NULL scalars as 0, and NULL points as points at infinity, 2001 * we treat NULL scalars as 0, and NULL points as points at
1915 * i.e., they contribute nothing to the linear combination */ 2002 * infinity, i.e., they contribute nothing to the linear
2003 * combination
2004 */
1916 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 2005 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1917 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem)); 2006 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1918 for (i = 0; i < num_points; ++i) 2007 for (i = 0; i < num_points; ++i) {
1919 {
1920 if (i == num) 2008 if (i == num)
1921 /* we didn't have a valid precomputation, so we pick 2009 /*
1922 * the generator */ 2010 * we didn't have a valid precomputation, so
1923 { 2011 * we pick the generator
2012 */
2013 {
1924 p = EC_GROUP_get0_generator(group); 2014 p = EC_GROUP_get0_generator(group);
1925 p_scalar = scalar; 2015 p_scalar = scalar;
1926 } 2016 } else
1927 else
1928 /* the i^th point */ 2017 /* the i^th point */
1929 { 2018 {
1930 p = points[i]; 2019 p = points[i];
1931 p_scalar = scalars[i]; 2020 p_scalar = scalars[i];
1932 } 2021 }
1933 if ((p_scalar != NULL) && (p != NULL)) 2022 if ((p_scalar != NULL) && (p != NULL)) {
1934 {
1935 /* reduce scalar to 0 <= scalar < 2^256 */ 2023 /* reduce scalar to 0 <= scalar < 2^256 */
1936 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar))) 2024 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar))) {
1937 { 2025 /*
1938 /* this is an unusual input, and we don't guarantee 2026 * this is an unusual input, and we
1939 * constant-timeness */ 2027 * don't guarantee constant-timeness
1940 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) 2028 */
1941 { 2029 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1942 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2030 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1943 goto err; 2031 goto err;
1944 }
1945 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1946 } 2032 }
1947 else 2033 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2034 } else
1948 num_bytes = BN_bn2bin(p_scalar, tmp); 2035 num_bytes = BN_bn2bin(p_scalar, tmp);
1949 flip_endian(secrets[i], tmp, num_bytes); 2036 flip_endian(secrets[i], tmp, num_bytes);
1950 /* precompute multiples */ 2037 /* precompute multiples */
1951 if ((!BN_to_felem(x_out, &p->X)) || 2038 if ((!BN_to_felem(x_out, &p->X)) ||
1952 (!BN_to_felem(y_out, &p->Y)) || 2039 (!BN_to_felem(y_out, &p->Y)) ||
1953 (!BN_to_felem(z_out, &p->Z))) goto err; 2040 (!BN_to_felem(z_out, &p->Z)))
2041 goto err;
1954 felem_shrink(pre_comp[i][1][0], x_out); 2042 felem_shrink(pre_comp[i][1][0], x_out);
1955 felem_shrink(pre_comp[i][1][1], y_out); 2043 felem_shrink(pre_comp[i][1][1], y_out);
1956 felem_shrink(pre_comp[i][1][2], z_out); 2044 felem_shrink(pre_comp[i][1][2], z_out);
1957 for (j = 2; j <= 16; ++j) 2045 for (j = 2; j <= 16; ++j) {
1958 { 2046 if (j & 1) {
1959 if (j & 1)
1960 {
1961 point_add_small( 2047 point_add_small(
1962 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 2048 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1963 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 2049 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1964 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]); 2050 pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1965 } 2051 } else {
1966 else
1967 {
1968 point_double_small( 2052 point_double_small(
1969 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 2053 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1970 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]); 2054 pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1971 }
1972 } 2055 }
1973 } 2056 }
1974 } 2057 }
2058 }
1975 if (mixed) 2059 if (mixed)
1976 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems); 2060 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1977 } 2061 }
1978
1979 /* the scalar for the generator */ 2062 /* the scalar for the generator */
1980 if ((scalar != NULL) && (have_pre_comp)) 2063 if ((scalar != NULL) && (have_pre_comp)) {
1981 {
1982 memset(g_secret, 0, sizeof(g_secret)); 2064 memset(g_secret, 0, sizeof(g_secret));
1983 /* reduce scalar to 0 <= scalar < 2^256 */ 2065 /* reduce scalar to 0 <= scalar < 2^256 */
1984 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) 2066 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
1985 { 2067 /*
1986 /* this is an unusual input, and we don't guarantee 2068 * this is an unusual input, and we don't guarantee
1987 * constant-timeness */ 2069 * constant-timeness
1988 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) 2070 */
1989 { 2071 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1990 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2072 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1991 goto err; 2073 goto err;
1992 }
1993 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1994 } 2074 }
1995 else 2075 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2076 } else
1996 num_bytes = BN_bn2bin(scalar, tmp); 2077 num_bytes = BN_bn2bin(scalar, tmp);
1997 flip_endian(g_secret, tmp, num_bytes); 2078 flip_endian(g_secret, tmp, num_bytes);
1998 /* do the multiplication with generator precomputation*/ 2079 /* do the multiplication with generator precomputation */
1999 batch_mul(x_out, y_out, z_out, 2080 batch_mul(x_out, y_out, z_out,
2000 (const felem_bytearray (*)) secrets, num_points, 2081 (const felem_bytearray(*)) secrets, num_points,
2001 g_secret, 2082 g_secret,
2002 mixed, (const smallfelem (*)[17][3]) pre_comp, 2083 mixed, (const smallfelem(*)[17][3]) pre_comp,
2003 g_pre_comp); 2084 g_pre_comp);
2004 } 2085 } else
2005 else
2006 /* do the multiplication without generator precomputation */ 2086 /* do the multiplication without generator precomputation */
2007 batch_mul(x_out, y_out, z_out, 2087 batch_mul(x_out, y_out, z_out,
2008 (const felem_bytearray (*)) secrets, num_points, 2088 (const felem_bytearray(*)) secrets, num_points,
2009 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL); 2089 NULL, mixed, (const smallfelem(*)[17][3]) pre_comp, NULL);
2010 /* reduce the output to its unique minimal representation */ 2090 /* reduce the output to its unique minimal representation */
2011 felem_contract(x_in, x_out); 2091 felem_contract(x_in, x_out);
2012 felem_contract(y_in, y_out); 2092 felem_contract(y_in, y_out);
2013 felem_contract(z_in, z_out); 2093 felem_contract(z_in, z_out);
2014 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) || 2094 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2015 (!smallfelem_to_BN(z, z_in))) 2095 (!smallfelem_to_BN(z, z_in))) {
2016 {
2017 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2096 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2018 goto err; 2097 goto err;
2019 } 2098 }
2020 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2099 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2021 2100
2022err: 2101err:
@@ -2032,10 +2111,11 @@ err:
2032 if (tmp_smallfelems != NULL) 2111 if (tmp_smallfelems != NULL)
2033 free(tmp_smallfelems); 2112 free(tmp_smallfelems);
2034 return ret; 2113 return ret;
2035 } 2114}
2036 2115
2037int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2116int
2038 { 2117ec_GFp_nistp256_precompute_mult(EC_GROUP * group, BN_CTX * ctx)
2118{
2039 int ret = 0; 2119 int ret = 0;
2040 NISTP256_PRE_COMP *pre = NULL; 2120 NISTP256_PRE_COMP *pre = NULL;
2041 int i, j; 2121 int i, j;
@@ -2047,106 +2127,106 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2047 2127
2048 /* throw away old precomputation */ 2128 /* throw away old precomputation */
2049 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup, 2129 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2050 nistp256_pre_comp_free, nistp256_pre_comp_clear_free); 2130 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2051 if (ctx == NULL) 2131 if (ctx == NULL)
2052 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 2132 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2133 return 0;
2053 BN_CTX_start(ctx); 2134 BN_CTX_start(ctx);
2054 if (((x = BN_CTX_get(ctx)) == NULL) || 2135 if (((x = BN_CTX_get(ctx)) == NULL) ||
2055 ((y = BN_CTX_get(ctx)) == NULL)) 2136 ((y = BN_CTX_get(ctx)) == NULL))
2056 goto err; 2137 goto err;
2057 /* get the generator */ 2138 /* get the generator */
2058 if (group->generator == NULL) goto err; 2139 if (group->generator == NULL)
2140 goto err;
2059 generator = EC_POINT_new(group); 2141 generator = EC_POINT_new(group);
2060 if (generator == NULL) 2142 if (generator == NULL)
2061 goto err; 2143 goto err;
2062 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x); 2144 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2063 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y); 2145 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2064 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2146 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2065 goto err; 2147 goto err;
2066 if ((pre = nistp256_pre_comp_new()) == NULL) 2148 if ((pre = nistp256_pre_comp_new()) == NULL)
2067 goto err; 2149 goto err;
2068 /* if the generator is the standard one, use built-in precomputation */ 2150 /* if the generator is the standard one, use built-in precomputation */
2069 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 2151 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2070 {
2071 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2152 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2072 ret = 1; 2153 ret = 1;
2073 goto err; 2154 goto err;
2074 } 2155 }
2075 if ((!BN_to_felem(x_tmp, &group->generator->X)) || 2156 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2076 (!BN_to_felem(y_tmp, &group->generator->Y)) || 2157 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2077 (!BN_to_felem(z_tmp, &group->generator->Z))) 2158 (!BN_to_felem(z_tmp, &group->generator->Z)))
2078 goto err; 2159 goto err;
2079 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp); 2160 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2080 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp); 2161 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2081 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp); 2162 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2082 /* compute 2^64*G, 2^128*G, 2^192*G for the first table, 2163 /*
2083 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one 2164 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G,
2165 * 2^96*G, 2^160*G, 2^224*G for the second one
2084 */ 2166 */
2085 for (i = 1; i <= 8; i <<= 1) 2167 for (i = 1; i <= 8; i <<= 1) {
2086 {
2087 point_double_small( 2168 point_double_small(
2088 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 2169 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2089 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 2170 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2090 for (j = 0; j < 31; ++j) 2171 for (j = 0; j < 31; ++j) {
2091 {
2092 point_double_small( 2172 point_double_small(
2093 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 2173 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2094 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 2174 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2095 } 2175 }
2096 if (i == 8) 2176 if (i == 8)
2097 break; 2177 break;
2098 point_double_small( 2178 point_double_small(
2099 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 2179 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2],
2100 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 2180 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2101 for (j = 0; j < 31; ++j) 2181 for (j = 0; j < 31; ++j) {
2102 {
2103 point_double_small( 2182 point_double_small(
2104 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 2183 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2],
2105 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]); 2184 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2]);
2106 }
2107 } 2185 }
2108 for (i = 0; i < 2; i++) 2186 }
2109 { 2187 for (i = 0; i < 2; i++) {
2110 /* g_pre_comp[i][0] is the point at infinity */ 2188 /* g_pre_comp[i][0] is the point at infinity */
2111 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 2189 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2112 /* the remaining multiples */ 2190 /* the remaining multiples */
2113 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */ 2191 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2114 point_add_small( 2192 point_add_small(
2115 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2], 2193 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2116 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 2194 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2117 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2195 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2118 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */ 2196 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2119 point_add_small( 2197 point_add_small(
2120 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2], 2198 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2121 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2199 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2122 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2200 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2123 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */ 2201 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2124 point_add_small( 2202 point_add_small(
2125 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 2203 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2126 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2204 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2127 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]); 2205 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2128 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */ 2206 /*
2207 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G +
2208 * 2^224*G
2209 */
2129 point_add_small( 2210 point_add_small(
2130 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2], 2211 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2131 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 2212 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2132 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2213 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2133 for (j = 1; j < 8; ++j) 2214 for (j = 1; j < 8; ++j) {
2134 {
2135 /* odd multiples: add G resp. 2^32*G */ 2215 /* odd multiples: add G resp. 2^32*G */
2136 point_add_small( 2216 point_add_small(
2137 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2], 2217 pre->g_pre_comp[i][2 * j + 1][0], pre->g_pre_comp[i][2 * j + 1][1], pre->g_pre_comp[i][2 * j + 1][2],
2138 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2], 2218 pre->g_pre_comp[i][2 * j][0], pre->g_pre_comp[i][2 * j][1], pre->g_pre_comp[i][2 * j][2],
2139 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]); 2219 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2140 }
2141 } 2220 }
2221 }
2142 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems); 2222 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2143 2223
2144 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup, 2224 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2145 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)) 2225 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2146 goto err; 2226 goto err;
2147 ret = 1; 2227 ret = 1;
2148 pre = NULL; 2228 pre = NULL;
2149 err: 2229err:
2150 BN_CTX_end(ctx); 2230 BN_CTX_end(ctx);
2151 if (generator != NULL) 2231 if (generator != NULL)
2152 EC_POINT_free(generator); 2232 EC_POINT_free(generator);
@@ -2155,17 +2235,18 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2155 if (pre) 2235 if (pre)
2156 nistp256_pre_comp_free(pre); 2236 nistp256_pre_comp_free(pre);
2157 return ret; 2237 return ret;
2158 } 2238}
2159 2239
2160int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group) 2240int
2161 { 2241ec_GFp_nistp256_have_precompute_mult(const EC_GROUP * group)
2242{
2162 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup, 2243 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2163 nistp256_pre_comp_free, nistp256_pre_comp_clear_free) 2244 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2164 != NULL) 2245 != NULL)
2165 return 1; 2246 return 1;
2166 else 2247 else
2167 return 0; 2248 return 0;
2168 } 2249}
2169#else 2250#else
2170static void *dummy=&dummy; 2251static void *dummy = &dummy;
2171#endif 2252#endif