diff options
-rw-r--r-- | networking/tls_sp_c32.c | 123 |
1 files changed, 54 insertions, 69 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 9c92d0a14..f4902f7f3 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -213,34 +213,7 @@ static void sp_256_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
213 | r[0] = (sp_digit)(c >> 26); | 213 | r[0] = (sp_digit)(c >> 26); |
214 | } | 214 | } |
215 | 215 | ||
216 | /* Square a and put result in r. (r = a * a) */ | 216 | /* Shift number right one bit. Bottom bit is lost. */ |
217 | static void sp_256_sqr_10(sp_digit* r, const sp_digit* a) | ||
218 | { | ||
219 | int i, j, k; | ||
220 | int64_t c; | ||
221 | |||
222 | c = ((int64_t)a[9]) * a[9]; | ||
223 | r[19] = (sp_digit)(c >> 26); | ||
224 | c = (c & 0x3ffffff) << 26; | ||
225 | for (k = 17; k >= 0; k--) { | ||
226 | for (i = 9; i >= 0; i--) { | ||
227 | j = k - i; | ||
228 | if (j >= 10 || i <= j) | ||
229 | break; | ||
230 | if (j < 0) | ||
231 | continue; | ||
232 | c += ((int64_t)a[i]) * a[j] * 2; | ||
233 | } | ||
234 | if (i == j) | ||
235 | c += ((int64_t)a[i]) * a[i]; | ||
236 | r[k + 2] += c >> 52; | ||
237 | r[k + 1] = (c >> 26) & 0x3ffffff; | ||
238 | c = (c & 0x3ffffff) << 26; | ||
239 | } | ||
240 | r[0] = (sp_digit)(c >> 26); | ||
241 | } | ||
242 | |||
243 | /* Shift number left one bit. Bottom bit is lost. */ | ||
244 | static void sp_256_rshift1_10(sp_digit* r, sp_digit* a) | 217 | static void sp_256_rshift1_10(sp_digit* r, sp_digit* a) |
245 | { | 218 | { |
246 | int i; | 219 | int i; |
@@ -343,8 +316,11 @@ static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, sp_digit b) | |||
343 | * m The single precision number representing the modulus. | 316 | * m The single precision number representing the modulus. |
344 | * mp The digit representing the negative inverse of m mod 2^n. | 317 | * mp The digit representing the negative inverse of m mod 2^n. |
345 | */ | 318 | */ |
346 | static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) | 319 | static void sp_256_mont_reduce_10(sp_digit* a /*, const sp_digit* m, sp_digit mp*/) |
347 | { | 320 | { |
321 | const sp_digit* m = p256_mod; | ||
322 | sp_digit mp = p256_mp_mod; | ||
323 | |||
348 | int i; | 324 | int i; |
349 | sp_digit mu; | 325 | sp_digit mu; |
350 | 326 | ||
@@ -359,7 +335,7 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) | |||
359 | a[i+1] += a[i] >> 26; | 335 | a[i+1] += a[i] >> 26; |
360 | a[i] &= 0x3ffffff; | 336 | a[i] &= 0x3ffffff; |
361 | } | 337 | } |
362 | else { | 338 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ |
363 | for (i = 0; i < 9; i++) { | 339 | for (i = 0; i < 9; i++) { |
364 | mu = a[i] & 0x3ffffff; | 340 | mu = a[i] & 0x3ffffff; |
365 | sp_256_mul_add_10(a+i, m, mu); | 341 | sp_256_mul_add_10(a+i, m, mu); |
@@ -372,8 +348,12 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) | |||
372 | } | 348 | } |
373 | 349 | ||
374 | sp_256_mont_shift_10(a, a); | 350 | sp_256_mont_shift_10(a, a); |
351 | //TODO: can below condition ever be true? Doesn't it require 512+th bit(s) in a to be set? | ||
375 | if ((a[9] >> 22) > 0) | 352 | if ((a[9] >> 22) > 0) |
353 | { | ||
354 | dbg("THIS HAPPENS\n"); | ||
376 | sp_256_sub_10(a, a, m); | 355 | sp_256_sub_10(a, a, m); |
356 | } | ||
377 | sp_256_norm_10(a); | 357 | sp_256_norm_10(a); |
378 | } | 358 | } |
379 | 359 | ||
@@ -386,11 +366,14 @@ static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) | |||
386 | * m Modulus (prime). | 366 | * m Modulus (prime). |
387 | * mp Montogmery mulitplier. | 367 | * mp Montogmery mulitplier. |
388 | */ | 368 | */ |
389 | static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b, | 369 | static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b |
390 | const sp_digit* m, sp_digit mp) | 370 | /*, const sp_digit* m, sp_digit mp*/) |
391 | { | 371 | { |
372 | //const sp_digit* m = p256_mod; | ||
373 | //sp_digit mp = p256_mp_mod; | ||
374 | |||
392 | sp_256_mul_10(r, a, b); | 375 | sp_256_mul_10(r, a, b); |
393 | sp_256_mont_reduce_10(r, m, mp); | 376 | sp_256_mont_reduce_10(r /*, m, mp*/); |
394 | } | 377 | } |
395 | 378 | ||
396 | /* Square the Montgomery form number. (r = a * a mod m) | 379 | /* Square the Montgomery form number. (r = a * a mod m) |
@@ -400,11 +383,13 @@ static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b | |||
400 | * m Modulus (prime). | 383 | * m Modulus (prime). |
401 | * mp Montogmery mulitplier. | 384 | * mp Montogmery mulitplier. |
402 | */ | 385 | */ |
403 | static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m, | 386 | static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a |
404 | sp_digit mp) | 387 | /*, const sp_digit* m, sp_digit mp*/) |
405 | { | 388 | { |
406 | sp_256_sqr_10(r, a); | 389 | //const sp_digit* m = p256_mod; |
407 | sp_256_mont_reduce_10(r, m, mp); | 390 | //sp_digit mp = p256_mp_mod; |
391 | |||
392 | sp_256_mont_mul_10(r, a, a /*, m, mp*/); | ||
408 | } | 393 | } |
409 | 394 | ||
410 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the | 395 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the |
@@ -432,10 +417,10 @@ static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a) | |||
432 | 417 | ||
433 | memcpy(t, a, sizeof(sp_digit) * 10); | 418 | memcpy(t, a, sizeof(sp_digit) * 10); |
434 | for (i = 254; i >= 0; i--) { | 419 | for (i = 254; i >= 0; i--) { |
435 | sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod); | 420 | sp_256_mont_sqr_10(t, t /*, p256_mod, p256_mp_mod*/); |
436 | /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ | 421 | /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ |
437 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) | 422 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) |
438 | sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod); | 423 | sp_256_mont_mul_10(t, t, a /*, p256_mod, p256_mp_mod*/); |
439 | } | 424 | } |
440 | memcpy(r, t, sizeof(sp_digit) * 10); | 425 | memcpy(r, t, sizeof(sp_digit) * 10); |
441 | } | 426 | } |
@@ -577,22 +562,22 @@ static void sp_256_map_10(sp_point* r, sp_point* p) | |||
577 | 562 | ||
578 | sp_256_mont_inv_10(t1, p->z); | 563 | sp_256_mont_inv_10(t1, p->z); |
579 | 564 | ||
580 | sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); | 565 | sp_256_mont_sqr_10(t2, t1 /*, p256_mod, p256_mp_mod*/); |
581 | sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); | 566 | sp_256_mont_mul_10(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); |
582 | 567 | ||
583 | /* x /= z^2 */ | 568 | /* x /= z^2 */ |
584 | sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod); | 569 | sp_256_mont_mul_10(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); |
585 | memset(r->x + 10, 0, sizeof(r->x) / 2); | 570 | memset(r->x + 10, 0, sizeof(r->x) / 2); |
586 | sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod); | 571 | sp_256_mont_reduce_10(r->x /*, p256_mod, p256_mp_mod*/); |
587 | /* Reduce x to less than modulus */ | 572 | /* Reduce x to less than modulus */ |
588 | if (sp_256_cmp_10(r->x, p256_mod) >= 0) | 573 | if (sp_256_cmp_10(r->x, p256_mod) >= 0) |
589 | sp_256_sub_10(r->x, r->x, p256_mod); | 574 | sp_256_sub_10(r->x, r->x, p256_mod); |
590 | sp_256_norm_10(r->x); | 575 | sp_256_norm_10(r->x); |
591 | 576 | ||
592 | /* y /= z^3 */ | 577 | /* y /= z^3 */ |
593 | sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod); | 578 | sp_256_mont_mul_10(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); |
594 | memset(r->y + 10, 0, sizeof(r->y) / 2); | 579 | memset(r->y + 10, 0, sizeof(r->y) / 2); |
595 | sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod); | 580 | sp_256_mont_reduce_10(r->y /*, p256_mod, p256_mp_mod*/); |
596 | /* Reduce y to less than modulus */ | 581 | /* Reduce y to less than modulus */ |
597 | if (sp_256_cmp_10(r->y, p256_mod) >= 0) | 582 | if (sp_256_cmp_10(r->y, p256_mod) >= 0) |
598 | sp_256_sub_10(r->y, r->y, p256_mod); | 583 | sp_256_sub_10(r->y, r->y, p256_mod); |
@@ -620,9 +605,9 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p) | |||
620 | return; | 605 | return; |
621 | 606 | ||
622 | /* T1 = Z * Z */ | 607 | /* T1 = Z * Z */ |
623 | sp_256_mont_sqr_10(t1, r->z, p256_mod, p256_mp_mod); | 608 | sp_256_mont_sqr_10(t1, r->z /*, p256_mod, p256_mp_mod*/); |
624 | /* Z = Y * Z */ | 609 | /* Z = Y * Z */ |
625 | sp_256_mont_mul_10(r->z, r->y, r->z, p256_mod, p256_mp_mod); | 610 | sp_256_mont_mul_10(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); |
626 | /* Z = 2Z */ | 611 | /* Z = 2Z */ |
627 | sp_256_mont_dbl_10(r->z, r->z, p256_mod); | 612 | sp_256_mont_dbl_10(r->z, r->z, p256_mod); |
628 | /* T2 = X - T1 */ | 613 | /* T2 = X - T1 */ |
@@ -630,21 +615,21 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p) | |||
630 | /* T1 = X + T1 */ | 615 | /* T1 = X + T1 */ |
631 | sp_256_mont_add_10(t1, r->x, t1, p256_mod); | 616 | sp_256_mont_add_10(t1, r->x, t1, p256_mod); |
632 | /* T2 = T1 * T2 */ | 617 | /* T2 = T1 * T2 */ |
633 | sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod); | 618 | sp_256_mont_mul_10(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); |
634 | /* T1 = 3T2 */ | 619 | /* T1 = 3T2 */ |
635 | sp_256_mont_tpl_10(t1, t2, p256_mod); | 620 | sp_256_mont_tpl_10(t1, t2, p256_mod); |
636 | /* Y = 2Y */ | 621 | /* Y = 2Y */ |
637 | sp_256_mont_dbl_10(r->y, r->y, p256_mod); | 622 | sp_256_mont_dbl_10(r->y, r->y, p256_mod); |
638 | /* Y = Y * Y */ | 623 | /* Y = Y * Y */ |
639 | sp_256_mont_sqr_10(r->y, r->y, p256_mod, p256_mp_mod); | 624 | sp_256_mont_sqr_10(r->y, r->y /*, p256_mod, p256_mp_mod*/); |
640 | /* T2 = Y * Y */ | 625 | /* T2 = Y * Y */ |
641 | sp_256_mont_sqr_10(t2, r->y, p256_mod, p256_mp_mod); | 626 | sp_256_mont_sqr_10(t2, r->y /*, p256_mod, p256_mp_mod*/); |
642 | /* T2 = T2/2 */ | 627 | /* T2 = T2/2 */ |
643 | sp_256_div2_10(t2, t2, p256_mod); | 628 | sp_256_div2_10(t2, t2, p256_mod); |
644 | /* Y = Y * X */ | 629 | /* Y = Y * X */ |
645 | sp_256_mont_mul_10(r->y, r->y, r->x, p256_mod, p256_mp_mod); | 630 | sp_256_mont_mul_10(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); |
646 | /* X = T1 * T1 */ | 631 | /* X = T1 * T1 */ |
647 | sp_256_mont_mul_10(r->x, t1, t1, p256_mod, p256_mp_mod); | 632 | sp_256_mont_mul_10(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); |
648 | /* X = X - Y */ | 633 | /* X = X - Y */ |
649 | sp_256_mont_sub_10(r->x, r->x, r->y, p256_mod); | 634 | sp_256_mont_sub_10(r->x, r->x, r->y, p256_mod); |
650 | /* X = X - Y */ | 635 | /* X = X - Y */ |
@@ -652,7 +637,7 @@ static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p) | |||
652 | /* Y = Y - X */ | 637 | /* Y = Y - X */ |
653 | sp_256_mont_sub_10(r->y, r->y, r->x, p256_mod); | 638 | sp_256_mont_sub_10(r->y, r->y, r->x, p256_mod); |
654 | /* Y = Y * T1 */ | 639 | /* Y = Y * T1 */ |
655 | sp_256_mont_mul_10(r->y, r->y, t1, p256_mod, p256_mp_mod); | 640 | sp_256_mont_mul_10(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); |
656 | /* Y = Y - T2 */ | 641 | /* Y = Y - T2 */ |
657 | sp_256_mont_sub_10(r->y, r->y, t2, p256_mod); | 642 | sp_256_mont_sub_10(r->y, r->y, t2, p256_mod); |
658 | } | 643 | } |
@@ -700,36 +685,36 @@ static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q) | |||
700 | *r = p->infinity ? *q : *p; /* struct copy */ | 685 | *r = p->infinity ? *q : *p; /* struct copy */ |
701 | 686 | ||
702 | /* U1 = X1*Z2^2 */ | 687 | /* U1 = X1*Z2^2 */ |
703 | sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod); | 688 | sp_256_mont_sqr_10(t1, q->z /*, p256_mod, p256_mp_mod*/); |
704 | sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod); | 689 | sp_256_mont_mul_10(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); |
705 | sp_256_mont_mul_10(t1, t1, v->x, p256_mod, p256_mp_mod); | 690 | sp_256_mont_mul_10(t1, t1, v->x /*, p256_mod, p256_mp_mod*/); |
706 | /* U2 = X2*Z1^2 */ | 691 | /* U2 = X2*Z1^2 */ |
707 | sp_256_mont_sqr_10(t2, v->z, p256_mod, p256_mp_mod); | 692 | sp_256_mont_sqr_10(t2, v->z /*, p256_mod, p256_mp_mod*/); |
708 | sp_256_mont_mul_10(t4, t2, v->z, p256_mod, p256_mp_mod); | 693 | sp_256_mont_mul_10(t4, t2, v->z /*, p256_mod, p256_mp_mod*/); |
709 | sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); | 694 | sp_256_mont_mul_10(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); |
710 | /* S1 = Y1*Z2^3 */ | 695 | /* S1 = Y1*Z2^3 */ |
711 | sp_256_mont_mul_10(t3, t3, v->y, p256_mod, p256_mp_mod); | 696 | sp_256_mont_mul_10(t3, t3, v->y /*, p256_mod, p256_mp_mod*/); |
712 | /* S2 = Y2*Z1^3 */ | 697 | /* S2 = Y2*Z1^3 */ |
713 | sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); | 698 | sp_256_mont_mul_10(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); |
714 | /* H = U2 - U1 */ | 699 | /* H = U2 - U1 */ |
715 | sp_256_mont_sub_10(t2, t2, t1, p256_mod); | 700 | sp_256_mont_sub_10(t2, t2, t1, p256_mod); |
716 | /* R = S2 - S1 */ | 701 | /* R = S2 - S1 */ |
717 | sp_256_mont_sub_10(t4, t4, t3, p256_mod); | 702 | sp_256_mont_sub_10(t4, t4, t3, p256_mod); |
718 | /* Z3 = H*Z1*Z2 */ | 703 | /* Z3 = H*Z1*Z2 */ |
719 | sp_256_mont_mul_10(v->z, v->z, q->z, p256_mod, p256_mp_mod); | 704 | sp_256_mont_mul_10(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); |
720 | sp_256_mont_mul_10(v->z, v->z, t2, p256_mod, p256_mp_mod); | 705 | sp_256_mont_mul_10(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); |
721 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ | 706 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ |
722 | sp_256_mont_sqr_10(v->x, t4, p256_mod, p256_mp_mod); | 707 | sp_256_mont_sqr_10(v->x, t4 /*, p256_mod, p256_mp_mod*/); |
723 | sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); | 708 | sp_256_mont_sqr_10(t5, t2 /*, p256_mod, p256_mp_mod*/); |
724 | sp_256_mont_mul_10(v->y, t1, t5, p256_mod, p256_mp_mod); | 709 | sp_256_mont_mul_10(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); |
725 | sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); | 710 | sp_256_mont_mul_10(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); |
726 | sp_256_mont_sub_10(v->x, v->x, t5, p256_mod); | 711 | sp_256_mont_sub_10(v->x, v->x, t5, p256_mod); |
727 | sp_256_mont_dbl_10(t1, v->y, p256_mod); | 712 | sp_256_mont_dbl_10(t1, v->y, p256_mod); |
728 | sp_256_mont_sub_10(v->x, v->x, t1, p256_mod); | 713 | sp_256_mont_sub_10(v->x, v->x, t1, p256_mod); |
729 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | 714 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ |
730 | sp_256_mont_sub_10(v->y, v->y, v->x, p256_mod); | 715 | sp_256_mont_sub_10(v->y, v->y, v->x, p256_mod); |
731 | sp_256_mont_mul_10(v->y, v->y, t4, p256_mod, p256_mp_mod); | 716 | sp_256_mont_mul_10(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); |
732 | sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod); | 717 | sp_256_mont_mul_10(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); |
733 | sp_256_mont_sub_10(v->y, v->y, t5, p256_mod); | 718 | sp_256_mont_sub_10(v->y, v->y, t5, p256_mod); |
734 | } | 719 | } |
735 | } | 720 | } |