diff options
-rw-r--r-- | networking/tls_sp_c32.c | 178 |
1 files changed, 72 insertions, 106 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 3291b553c..3452b08b9 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -49,9 +49,9 @@ typedef int32_t signed_sp_digit; | |||
49 | */ | 49 | */ |
50 | 50 | ||
51 | typedef struct sp_point { | 51 | typedef struct sp_point { |
52 | sp_digit x[2 * 8]; | 52 | sp_digit x[8]; |
53 | sp_digit y[2 * 8]; | 53 | sp_digit y[8]; |
54 | sp_digit z[2 * 8]; | 54 | sp_digit z[8]; |
55 | int infinity; | 55 | int infinity; |
56 | } sp_point; | 56 | } sp_point; |
57 | 57 | ||
@@ -456,12 +456,11 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) | |||
456 | #endif | 456 | #endif |
457 | 457 | ||
458 | /* Multiply a and b into r. (r = a * b) | 458 | /* Multiply a and b into r. (r = a * b) |
459 | * r should be [16] array (512 bits). | 459 | * r should be [16] array (512 bits), and must not coincide with a or b. |
460 | */ | 460 | */ |
461 | static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | 461 | static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) |
462 | { | 462 | { |
463 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) | 463 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) |
464 | sp_digit rr[15]; /* in case r coincides with a or b */ | ||
465 | int k; | 464 | int k; |
466 | uint32_t accl; | 465 | uint32_t accl; |
467 | uint32_t acch; | 466 | uint32_t acch; |
@@ -493,16 +492,15 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
493 | j--; | 492 | j--; |
494 | i++; | 493 | i++; |
495 | } while (i != 8 && i <= k); | 494 | } while (i != 8 && i <= k); |
496 | rr[k] = accl; | 495 | r[k] = accl; |
497 | accl = acch; | 496 | accl = acch; |
498 | acch = acc_hi; | 497 | acch = acc_hi; |
499 | } | 498 | } |
500 | r[15] = accl; | 499 | r[15] = accl; |
501 | memcpy(r, rr, sizeof(rr)); | ||
502 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | 500 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
503 | const uint64_t* aa = (const void*)a; | 501 | const uint64_t* aa = (const void*)a; |
504 | const uint64_t* bb = (const void*)b; | 502 | const uint64_t* bb = (const void*)b; |
505 | uint64_t rr[8]; | 503 | const uint64_t* rr = (const void*)r; |
506 | int k; | 504 | int k; |
507 | uint64_t accl; | 505 | uint64_t accl; |
508 | uint64_t acch; | 506 | uint64_t acch; |
@@ -539,11 +537,8 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
539 | acch = acc_hi; | 537 | acch = acc_hi; |
540 | } | 538 | } |
541 | rr[7] = accl; | 539 | rr[7] = accl; |
542 | memcpy(r, rr, sizeof(rr)); | ||
543 | #elif 0 | 540 | #elif 0 |
544 | //TODO: arm assembly (untested) | 541 | //TODO: arm assembly (untested) |
545 | sp_digit tmp[16]; | ||
546 | |||
547 | asm volatile ( | 542 | asm volatile ( |
548 | "\n mov r5, #0" | 543 | "\n mov r5, #0" |
549 | "\n mov r6, #0" | 544 | "\n mov r6, #0" |
@@ -575,12 +570,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
575 | "\n cmp r5, #56" | 570 | "\n cmp r5, #56" |
576 | "\n ble 1b" | 571 | "\n ble 1b" |
577 | "\n str r6, [%[r], r5]" | 572 | "\n str r6, [%[r], r5]" |
578 | : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) | 573 | : [r] "r" (r), [a] "r" (a), [b] "r" (b) |
579 | : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" | 574 | : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" |
580 | ); | 575 | ); |
581 | memcpy(r, tmp, sizeof(tmp)); | ||
582 | #else | 576 | #else |
583 | sp_digit rr[15]; /* in case r coincides with a or b */ | ||
584 | int i, j, k; | 577 | int i, j, k; |
585 | uint64_t acc; | 578 | uint64_t acc; |
586 | 579 | ||
@@ -600,11 +593,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
600 | j--; | 593 | j--; |
601 | i++; | 594 | i++; |
602 | } while (i != 8 && i <= k); | 595 | } while (i != 8 && i <= k); |
603 | rr[k] = acc; | 596 | r[k] = acc; |
604 | acc = (acc >> 32) | ((uint64_t)acc_hi << 32); | 597 | acc = (acc >> 32) | ((uint64_t)acc_hi << 32); |
605 | } | 598 | } |
606 | r[15] = acc; | 599 | r[15] = acc; |
607 | memcpy(r, rr, sizeof(rr)); | ||
608 | #endif | 600 | #endif |
609 | } | 601 | } |
610 | 602 | ||
@@ -709,30 +701,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* | |||
709 | } | 701 | } |
710 | 702 | ||
711 | /* Shift the result in the high 256 bits down to the bottom. | 703 | /* Shift the result in the high 256 bits down to the bottom. |
712 | * High half is cleared to zeros. | ||
713 | */ | 704 | */ |
714 | #if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff | 705 | static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a) |
715 | static void sp_512to256_mont_shift_8(sp_digit* rr) | ||
716 | { | 706 | { |
717 | uint64_t *r = (void*)rr; | 707 | memcpy(r, a + 8, sizeof(*r) * 8); |
718 | int i; | ||
719 | |||
720 | for (i = 0; i < 4; i++) { | ||
721 | r[i] = r[i+4]; | ||
722 | r[i+4] = 0; | ||
723 | } | ||
724 | } | 708 | } |
725 | #else | ||
726 | static void sp_512to256_mont_shift_8(sp_digit* r) | ||
727 | { | ||
728 | int i; | ||
729 | |||
730 | for (i = 0; i < 8; i++) { | ||
731 | r[i] = r[i+8]; | ||
732 | r[i+8] = 0; | ||
733 | } | ||
734 | } | ||
735 | #endif | ||
736 | 709 | ||
737 | /* Mul a by scalar b and add into r. (r += a * b) | 710 | /* Mul a by scalar b and add into r. (r += a * b) |
738 | * a = p256_mod | 711 | * a = p256_mod |
@@ -868,11 +841,12 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
868 | * Note: the result is NOT guaranteed to be less than p256_mod! | 841 | * Note: the result is NOT guaranteed to be less than p256_mod! |
869 | * (it is only guaranteed to fit into 256 bits). | 842 | * (it is only guaranteed to fit into 256 bits). |
870 | * | 843 | * |
871 | * a Double-wide number to reduce in place. | 844 | * r Result. |
845 | * a Double-wide number to reduce. Clobbered. | ||
872 | * m The single precision number representing the modulus. | 846 | * m The single precision number representing the modulus. |
873 | * mp The digit representing the negative inverse of m mod 2^n. | 847 | * mp The digit representing the negative inverse of m mod 2^n. |
874 | */ | 848 | */ |
875 | static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) | 849 | static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/) |
876 | { | 850 | { |
877 | // const sp_digit* m = p256_mod; | 851 | // const sp_digit* m = p256_mod; |
878 | sp_digit mp = p256_mp_mod; | 852 | sp_digit mp = p256_mp_mod; |
@@ -895,10 +869,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit | |||
895 | goto inc_next_word0; | 869 | goto inc_next_word0; |
896 | } | 870 | } |
897 | } | 871 | } |
898 | sp_512to256_mont_shift_8(a); | 872 | sp_512to256_mont_shift_8(r, a); |
899 | if (word16th != 0) | 873 | if (word16th != 0) |
900 | sp_256_sub_8_p256_mod(a); | 874 | sp_256_sub_8_p256_mod(r); |
901 | sp_256_norm_8(a); | 875 | sp_256_norm_8(r); |
902 | } | 876 | } |
903 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ | 877 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ |
904 | sp_digit word16th = 0; | 878 | sp_digit word16th = 0; |
@@ -915,10 +889,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit | |||
915 | goto inc_next_word; | 889 | goto inc_next_word; |
916 | } | 890 | } |
917 | } | 891 | } |
918 | sp_512to256_mont_shift_8(a); | 892 | sp_512to256_mont_shift_8(r, a); |
919 | if (word16th != 0) | 893 | if (word16th != 0) |
920 | sp_256_sub_8_p256_mod(a); | 894 | sp_256_sub_8_p256_mod(r); |
921 | sp_256_norm_8(a); | 895 | sp_256_norm_8(r); |
922 | } | 896 | } |
923 | } | 897 | } |
924 | 898 | ||
@@ -926,35 +900,34 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit | |||
926 | * (r = a * b mod m) | 900 | * (r = a * b mod m) |
927 | * | 901 | * |
928 | * r Result of multiplication. | 902 | * r Result of multiplication. |
929 | * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). | ||
930 | * a First number to multiply in Montogmery form. | 903 | * a First number to multiply in Montogmery form. |
931 | * b Second number to multiply in Montogmery form. | 904 | * b Second number to multiply in Montogmery form. |
932 | * m Modulus (prime). | 905 | * m Modulus (prime). |
933 | * mp Montogmery mulitplier. | 906 | * mp Montogmery mulitplier. |
934 | */ | 907 | */ |
935 | static void sp_256to512z_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b | 908 | static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b |
936 | /*, const sp_digit* m, sp_digit mp*/) | 909 | /*, const sp_digit* m, sp_digit mp*/) |
937 | { | 910 | { |
938 | //const sp_digit* m = p256_mod; | 911 | //const sp_digit* m = p256_mod; |
939 | //sp_digit mp = p256_mp_mod; | 912 | //sp_digit mp = p256_mp_mod; |
940 | sp_256to512_mul_8(r, a, b); | 913 | sp_digit t[2 * 8]; |
941 | sp_512to256_mont_reduce_8(r /*, m, mp*/); | 914 | sp_256to512_mul_8(t, a, b); |
915 | sp_512to256_mont_reduce_8(r, t /*, m, mp*/); | ||
942 | } | 916 | } |
943 | 917 | ||
944 | /* Square the Montgomery form number. (r = a * a mod m) | 918 | /* Square the Montgomery form number. (r = a * a mod m) |
945 | * | 919 | * |
946 | * r Result of squaring. | 920 | * r Result of squaring. |
947 | * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). | ||
948 | * a Number to square in Montogmery form. | 921 | * a Number to square in Montogmery form. |
949 | * m Modulus (prime). | 922 | * m Modulus (prime). |
950 | * mp Montogmery mulitplier. | 923 | * mp Montogmery mulitplier. |
951 | */ | 924 | */ |
952 | static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a | 925 | static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a |
953 | /*, const sp_digit* m, sp_digit mp*/) | 926 | /*, const sp_digit* m, sp_digit mp*/) |
954 | { | 927 | { |
955 | //const sp_digit* m = p256_mod; | 928 | //const sp_digit* m = p256_mod; |
956 | //sp_digit mp = p256_mp_mod; | 929 | //sp_digit mp = p256_mp_mod; |
957 | sp_256to512z_mont_mul_8(r, a, a /*, m, mp*/); | 930 | sp_256_mont_mul_8(r, a, a /*, m, mp*/); |
958 | } | 931 | } |
959 | 932 | ||
960 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the | 933 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the |
@@ -964,11 +937,8 @@ static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a | |||
964 | * a Number to invert. | 937 | * a Number to invert. |
965 | */ | 938 | */ |
966 | #if 0 | 939 | #if 0 |
967 | /* Mod-2 for the P256 curve. */ | 940 | //p256_mod - 2: |
968 | static const uint32_t p256_mod_2[8] = { | 941 | //ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2 |
969 | 0xfffffffd,0xffffffff,0xffffffff,0x00000000, | ||
970 | 0x00000000,0x00000000,0x00000001,0xffffffff, | ||
971 | }; | ||
972 | //Bit pattern: | 942 | //Bit pattern: |
973 | //2 2 2 2 2 2 2 1...1 | 943 | //2 2 2 2 2 2 2 1...1 |
974 | //5 5 4 3 2 1 0 9...0 9...1 | 944 | //5 5 4 3 2 1 0 9...0 9...1 |
@@ -977,15 +947,15 @@ static const uint32_t p256_mod_2[8] = { | |||
977 | #endif | 947 | #endif |
978 | static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) | 948 | static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) |
979 | { | 949 | { |
980 | sp_digit t[2*8]; | 950 | sp_digit t[8]; |
981 | int i; | 951 | int i; |
982 | 952 | ||
983 | memcpy(t, a, sizeof(sp_digit) * 8); | 953 | memcpy(t, a, sizeof(sp_digit) * 8); |
984 | for (i = 254; i >= 0; i--) { | 954 | for (i = 254; i >= 0; i--) { |
985 | sp_256to512z_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); | 955 | sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); |
986 | /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ | 956 | /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ |
987 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) | 957 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) |
988 | sp_256to512z_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); | 958 | sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); |
989 | } | 959 | } |
990 | memcpy(r, t, sizeof(sp_digit) * 8); | 960 | memcpy(r, t, sizeof(sp_digit) * 8); |
991 | } | 961 | } |
@@ -1056,25 +1026,28 @@ static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a) | |||
1056 | */ | 1026 | */ |
1057 | static void sp_256_map_8(sp_point* r, sp_point* p) | 1027 | static void sp_256_map_8(sp_point* r, sp_point* p) |
1058 | { | 1028 | { |
1059 | sp_digit t1[2*8]; | 1029 | sp_digit t1[8]; |
1060 | sp_digit t2[2*8]; | 1030 | sp_digit t2[8]; |
1031 | sp_digit rr[2 * 8]; | ||
1061 | 1032 | ||
1062 | sp_256_mont_inv_8(t1, p->z); | 1033 | sp_256_mont_inv_8(t1, p->z); |
1063 | 1034 | ||
1064 | sp_256to512z_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); | 1035 | sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); |
1065 | sp_256to512z_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); | 1036 | sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); |
1066 | 1037 | ||
1067 | /* x /= z^2 */ | 1038 | /* x /= z^2 */ |
1068 | sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); | 1039 | sp_256_mont_mul_8(rr, p->x, t2 /*, p256_mod, p256_mp_mod*/); |
1069 | sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); | 1040 | memset(rr + 8, 0, sizeof(rr) / 2); |
1041 | sp_512to256_mont_reduce_8(r->x, rr /*, p256_mod, p256_mp_mod*/); | ||
1070 | /* Reduce x to less than modulus */ | 1042 | /* Reduce x to less than modulus */ |
1071 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) | 1043 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) |
1072 | sp_256_sub_8_p256_mod(r->x); | 1044 | sp_256_sub_8_p256_mod(r->x); |
1073 | sp_256_norm_8(r->x); | 1045 | sp_256_norm_8(r->x); |
1074 | 1046 | ||
1075 | /* y /= z^3 */ | 1047 | /* y /= z^3 */ |
1076 | sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); | 1048 | sp_256_mont_mul_8(rr, p->y, t1 /*, p256_mod, p256_mp_mod*/); |
1077 | sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); | 1049 | memset(rr + 8, 0, sizeof(rr) / 2); |
1050 | sp_512to256_mont_reduce_8(r->y, rr /*, p256_mod, p256_mp_mod*/); | ||
1078 | /* Reduce y to less than modulus */ | 1051 | /* Reduce y to less than modulus */ |
1079 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) | 1052 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) |
1080 | sp_256_sub_8_p256_mod(r->y); | 1053 | sp_256_sub_8_p256_mod(r->y); |
@@ -1091,8 +1064,8 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
1091 | */ | 1064 | */ |
1092 | static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | 1065 | static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) |
1093 | { | 1066 | { |
1094 | sp_digit t1[2*8]; | 1067 | sp_digit t1[8]; |
1095 | sp_digit t2[2*8]; | 1068 | sp_digit t2[8]; |
1096 | 1069 | ||
1097 | /* Put point to double into result */ | 1070 | /* Put point to double into result */ |
1098 | if (r != p) | 1071 | if (r != p) |
@@ -1101,17 +1074,10 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1101 | if (r->infinity) | 1074 | if (r->infinity) |
1102 | return; | 1075 | return; |
1103 | 1076 | ||
1104 | if (SP_DEBUG) { | ||
1105 | /* unused part of t2, may result in spurios | ||
1106 | * differences in debug output. Clear it. | ||
1107 | */ | ||
1108 | memset(t2, 0, sizeof(t2)); | ||
1109 | } | ||
1110 | |||
1111 | /* T1 = Z * Z */ | 1077 | /* T1 = Z * Z */ |
1112 | sp_256to512z_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); | 1078 | sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); |
1113 | /* Z = Y * Z */ | 1079 | /* Z = Y * Z */ |
1114 | sp_256to512z_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); | 1080 | sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); |
1115 | /* Z = 2Z */ | 1081 | /* Z = 2Z */ |
1116 | sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/); | 1082 | sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/); |
1117 | /* T2 = X - T1 */ | 1083 | /* T2 = X - T1 */ |
@@ -1119,21 +1085,21 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1119 | /* T1 = X + T1 */ | 1085 | /* T1 = X + T1 */ |
1120 | sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/); | 1086 | sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/); |
1121 | /* T2 = T1 * T2 */ | 1087 | /* T2 = T1 * T2 */ |
1122 | sp_256to512z_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); | 1088 | sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); |
1123 | /* T1 = 3T2 */ | 1089 | /* T1 = 3T2 */ |
1124 | sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/); | 1090 | sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/); |
1125 | /* Y = 2Y */ | 1091 | /* Y = 2Y */ |
1126 | sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/); | 1092 | sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/); |
1127 | /* Y = Y * Y */ | 1093 | /* Y = Y * Y */ |
1128 | sp_256to512z_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); | 1094 | sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); |
1129 | /* T2 = Y * Y */ | 1095 | /* T2 = Y * Y */ |
1130 | sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); | 1096 | sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); |
1131 | /* T2 = T2/2 */ | 1097 | /* T2 = T2/2 */ |
1132 | sp_256_div2_8(t2 /*, p256_mod*/); | 1098 | sp_256_div2_8(t2 /*, p256_mod*/); |
1133 | /* Y = Y * X */ | 1099 | /* Y = Y * X */ |
1134 | sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); | 1100 | sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); |
1135 | /* X = T1 * T1 */ | 1101 | /* X = T1 * T1 */ |
1136 | sp_256to512z_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); | 1102 | sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); |
1137 | /* X = X - Y */ | 1103 | /* X = X - Y */ |
1138 | sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); | 1104 | sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); |
1139 | /* X = X - Y */ | 1105 | /* X = X - Y */ |
@@ -1141,7 +1107,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1141 | /* Y = Y - X */ | 1107 | /* Y = Y - X */ |
1142 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); | 1108 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); |
1143 | /* Y = Y * T1 */ | 1109 | /* Y = Y * T1 */ |
1144 | sp_256to512z_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); | 1110 | sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); |
1145 | /* Y = Y - T2 */ | 1111 | /* Y = Y - T2 */ |
1146 | sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/); | 1112 | sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/); |
1147 | dump_512("y2 %s\n", r->y); | 1113 | dump_512("y2 %s\n", r->y); |
@@ -1155,11 +1121,11 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1155 | */ | 1121 | */ |
1156 | static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) | 1122 | static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) |
1157 | { | 1123 | { |
1158 | sp_digit t1[2*8]; | 1124 | sp_digit t1[8]; |
1159 | sp_digit t2[2*8]; | 1125 | sp_digit t2[8]; |
1160 | sp_digit t3[2*8]; | 1126 | sp_digit t3[8]; |
1161 | sp_digit t4[2*8]; | 1127 | sp_digit t4[8]; |
1162 | sp_digit t5[2*8]; | 1128 | sp_digit t5[8]; |
1163 | 1129 | ||
1164 | /* Ensure only the first point is the same as the result. */ | 1130 | /* Ensure only the first point is the same as the result. */ |
1165 | if (q == r) { | 1131 | if (q == r) { |
@@ -1186,36 +1152,36 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* | |||
1186 | } | 1152 | } |
1187 | 1153 | ||
1188 | /* U1 = X1*Z2^2 */ | 1154 | /* U1 = X1*Z2^2 */ |
1189 | sp_256to512z_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); | 1155 | sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); |
1190 | sp_256to512z_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); | 1156 | sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); |
1191 | sp_256to512z_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); | 1157 | sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); |
1192 | /* U2 = X2*Z1^2 */ | 1158 | /* U2 = X2*Z1^2 */ |
1193 | sp_256to512z_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); | 1159 | sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); |
1194 | sp_256to512z_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); | 1160 | sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); |
1195 | sp_256to512z_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); | 1161 | sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); |
1196 | /* S1 = Y1*Z2^3 */ | 1162 | /* S1 = Y1*Z2^3 */ |
1197 | sp_256to512z_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); | 1163 | sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); |
1198 | /* S2 = Y2*Z1^3 */ | 1164 | /* S2 = Y2*Z1^3 */ |
1199 | sp_256to512z_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); | 1165 | sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); |
1200 | /* H = U2 - U1 */ | 1166 | /* H = U2 - U1 */ |
1201 | sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); | 1167 | sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); |
1202 | /* R = S2 - S1 */ | 1168 | /* R = S2 - S1 */ |
1203 | sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); | 1169 | sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); |
1204 | /* Z3 = H*Z1*Z2 */ | 1170 | /* Z3 = H*Z1*Z2 */ |
1205 | sp_256to512z_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); | 1171 | sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); |
1206 | sp_256to512z_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); | 1172 | sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); |
1207 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ | 1173 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ |
1208 | sp_256to512z_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); | 1174 | sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); |
1209 | sp_256to512z_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); | 1175 | sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); |
1210 | sp_256to512z_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); | 1176 | sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); |
1211 | sp_256to512z_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); | 1177 | sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); |
1212 | sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); | 1178 | sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); |
1213 | sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); | 1179 | sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); |
1214 | sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); | 1180 | sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); |
1215 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | 1181 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ |
1216 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); | 1182 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); |
1217 | sp_256to512z_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); | 1183 | sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); |
1218 | sp_256to512z_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); | 1184 | sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); |
1219 | sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); | 1185 | sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); |
1220 | } | 1186 | } |
1221 | 1187 | ||