diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-26 13:25:56 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-26 13:30:09 +0200 |
commit | f18a1fd6f368ada05b33cf36483304a5e3c4945d (patch) | |
tree | 433a988ac92ba89af647eb168c6c781c6d05cc03 | |
parent | 121b02d6b6c9f276e7f8da560e5996d3e389cd63 (diff) | |
download | busybox-w32-f18a1fd6f368ada05b33cf36483304a5e3c4945d.tar.gz busybox-w32-f18a1fd6f368ada05b33cf36483304a5e3c4945d.tar.bz2 busybox-w32-f18a1fd6f368ada05b33cf36483304a5e3c4945d.zip |
tls: implement secp256r1 elliptic curve (aka P256)
function old new delta
sp_256_mod_mul_norm_10 - 1439 +1439
sp_256_ecc_mulmod_10 - 1363 +1363
sp_256_proj_point_dbl_10 - 490 +490
p256_base - 244 +244
static.sp_256_mont_sqr_10 - 234 +234
static.sp_256_mont_mul_10 - 214 +214
curve_P256_compute_pubkey_and_premaster - 197 +197
static.sp_256_mont_reduce_10 - 176 +176
static.sp_256_from_bin - 149 +149
sp_256_to_bin - 148 +148
tls_handshake 2046 2146 +100
static.sp_256_mul_add_10 - 82 +82
.rodata 103275 103336 +61
static.sp_256_mont_sub_10 - 52 +52
static.sp_256_mont_dbl_10 - 52 +52
static.sp_256_cmp_10 - 43 +43
p256_mod - 40 +40
static.sp_256_cond_sub_10 - 32 +32
p256_mod_2 - 32 +32
sp_256_norm_10 - 31 +31
sp_256_cmp_equal_10 - 30 +30
sp_256_add_10 - 22 +22
addr_mask - 8 +8
------------------------------------------------------------------------------
(add/remove: 22/0 grow/shrink: 2/0 up/down: 5239/0) Total: 5239 bytes
text data bss dec hex filename
1018192 559 5020 1023771 f9f1b busybox_old
1023431 559 5020 1029010 fb392 busybox_unstripped
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls.c | 101 | ||||
-rw-r--r-- | networking/tls.h | 8 | ||||
-rw-r--r-- | networking/tls_fe.h | 1 | ||||
-rw-r--r-- | networking/tls_sp_c32.c | 1052 |
4 files changed, 1126 insertions, 36 deletions
diff --git a/networking/tls.c b/networking/tls.c index 8492997ac..cacd2e9ff 100644 --- a/networking/tls.c +++ b/networking/tls.c | |||
@@ -18,6 +18,7 @@ | |||
18 | //kbuild:lib-$(CONFIG_TLS) += tls_aesgcm.o | 18 | //kbuild:lib-$(CONFIG_TLS) += tls_aesgcm.o |
19 | //kbuild:lib-$(CONFIG_TLS) += tls_rsa.o | 19 | //kbuild:lib-$(CONFIG_TLS) += tls_rsa.o |
20 | //kbuild:lib-$(CONFIG_TLS) += tls_fe.o | 20 | //kbuild:lib-$(CONFIG_TLS) += tls_fe.o |
21 | //kbuild:lib-$(CONFIG_TLS) += tls_sp_c32.o | ||
21 | 22 | ||
22 | #include "tls.h" | 23 | #include "tls.h" |
23 | 24 | ||
@@ -265,8 +266,9 @@ enum { | |||
265 | GOT_CERT_RSA_KEY_ALG = 1 << 1, | 266 | GOT_CERT_RSA_KEY_ALG = 1 << 1, |
266 | GOT_CERT_ECDSA_KEY_ALG = 1 << 2, // so far unused | 267 | GOT_CERT_ECDSA_KEY_ALG = 1 << 2, // so far unused |
267 | GOT_EC_KEY = 1 << 3, | 268 | GOT_EC_KEY = 1 << 3, |
268 | ENCRYPTION_AESGCM = 1 << 4, // else AES-SHA (or NULL-SHA if ALLOW_RSA_NULL_SHA256=1) | 269 | GOT_EC_CURVE_X25519 = 1 << 4, // else P256 |
269 | ENCRYPT_ON_WRITE = 1 << 5, | 270 | ENCRYPTION_AESGCM = 1 << 5, // else AES-SHA (or NULL-SHA if ALLOW_RSA_NULL_SHA256=1) |
271 | ENCRYPT_ON_WRITE = 1 << 6, | ||
270 | }; | 272 | }; |
271 | 273 | ||
272 | struct record_hdr { | 274 | struct record_hdr { |
@@ -285,7 +287,11 @@ struct tls_handshake_data { | |||
285 | //TODO: store just the DER key here, parse/use/delete it when sending client key | 287 | //TODO: store just the DER key here, parse/use/delete it when sending client key |
286 | //this way it will stay key type agnostic here. | 288 | //this way it will stay key type agnostic here. |
287 | psRsaKey_t server_rsa_pub_key; | 289 | psRsaKey_t server_rsa_pub_key; |
288 | uint8_t ecc_pub_key32[32]; | 290 | |
291 | /* peer's elliptic curve key data */ | ||
292 | /* for x25519, it contains one point in first 32 bytes */ | ||
293 | /* for P256, it contains x,y point pair, each 32 bytes long */ | ||
294 | uint8_t ecc_pub_key32[2 * 32]; | ||
289 | 295 | ||
290 | /* HANDSHAKE HASH: */ | 296 | /* HANDSHAKE HASH: */ |
291 | //unsigned saved_client_hello_size; | 297 | //unsigned saved_client_hello_size; |
@@ -1526,20 +1532,13 @@ static void send_client_hello_and_alloc_hsd(tls_state_t *tls, const char *sni) | |||
1526 | }; | 1532 | }; |
1527 | static const uint8_t supported_groups[] = { | 1533 | static const uint8_t supported_groups[] = { |
1528 | 0x00,0x0a, //extension_type: "supported_groups" | 1534 | 0x00,0x0a, //extension_type: "supported_groups" |
1529 | 0x00,0x04, //ext len | 1535 | 0x00,0x06, //ext len |
1530 | 0x00,0x02, //list len | 1536 | 0x00,0x04, //list len |
1531 | 0x00,0x1d, //curve_x25519 (RFC 7748) | 1537 | 0x00,0x17, //curve_secp256r1 |
1532 | //0x00,0x1e, //curve_x448 (RFC 7748) | ||
1533 | //0x00,0x17, //curve_secp256r1 | ||
1534 | //0x00,0x18, //curve_secp384r1 | 1538 | //0x00,0x18, //curve_secp384r1 |
1535 | //0x00,0x19, //curve_secp521r1 | 1539 | //0x00,0x19, //curve_secp521r1 |
1536 | //TODO: implement secp256r1 (at least): dl.fedoraproject.org immediately aborts | 1540 | 0x00,0x1d, //curve_x25519 (RFC 7748) |
1537 | //if only x25519/x448 are advertised, seems to support only secpNNNr1 curves: | 1541 | //0x00,0x1e, //curve_x448 (RFC 7748) |
1538 | // openssl s_client -connect dl.fedoraproject.org:443 -debug -tls1_2 -cipher ECDHE-RSA-AES128-GCM-SHA256 | ||
1539 | //Peer signing digest: SHA512 | ||
1540 | //Peer signature type: RSA | ||
1541 | //Server Temp Key: ECDH, P-256, 256 bits | ||
1542 | //TLSv1.2, Cipher is ECDHE-RSA-AES128-GCM-SHA256 | ||
1543 | }; | 1542 | }; |
1544 | //static const uint8_t signature_algorithms[] = { | 1543 | //static const uint8_t signature_algorithms[] = { |
1545 | // 000d | 1544 | // 000d |
@@ -1877,12 +1876,32 @@ static void process_server_key(tls_state_t *tls, int len) | |||
1877 | if (len < (1+2+1+32)) tls_error_die(tls); | 1876 | if (len < (1+2+1+32)) tls_error_die(tls); |
1878 | keybuf += 4; | 1877 | keybuf += 4; |
1879 | 1878 | ||
1880 | /* So far we only support curve_x25519 */ | 1879 | #if BB_BIG_ENDIAN |
1880 | # define _0x03001741 0x03001741 | ||
1881 | # define _0x03001d20 0x03001d20 | ||
1882 | #else | ||
1883 | # define _0x03001741 0x41170003 | ||
1884 | # define _0x03001d20 0x201d0003 | ||
1885 | #endif | ||
1881 | move_from_unaligned32(t32, keybuf); | 1886 | move_from_unaligned32(t32, keybuf); |
1882 | if (t32 != htonl(0x03001d20)) | 1887 | keybuf += 4; |
1883 | bb_simple_error_msg_and_die("elliptic curve is not x25519"); | 1888 | switch (t32) { |
1889 | case _0x03001d20: //curve_x25519 | ||
1890 | tls->flags |= GOT_EC_CURVE_X25519; | ||
1891 | memcpy(tls->hsd->ecc_pub_key32, keybuf, 32); | ||
1892 | break; | ||
1893 | case _0x03001741: //curve_secp256r1 | ||
1894 | /* P256 point can be transmitted odd- or even-compressed | ||
1895 | * (first byte is 3 or 2) or uncompressed (4). | ||
1896 | */ | ||
1897 | if (*keybuf++ != 4) | ||
1898 | bb_simple_error_msg_and_die("compressed EC points not supported"); | ||
1899 | memcpy(tls->hsd->ecc_pub_key32, keybuf, 2 * 32); | ||
1900 | break; | ||
1901 | default: | ||
1902 | bb_error_msg_and_die("elliptic curve is not x25519 or P256: 0x%08x", t32); | ||
1903 | } | ||
1884 | 1904 | ||
1885 | memcpy(tls->hsd->ecc_pub_key32, keybuf + 4, 32); | ||
1886 | tls->flags |= GOT_EC_KEY; | 1905 | tls->flags |= GOT_EC_KEY; |
1887 | dbg("got eccPubKey\n"); | 1906 | dbg("got eccPubKey\n"); |
1888 | } | 1907 | } |
@@ -1918,9 +1937,7 @@ static void send_client_key_exchange(tls_state_t *tls) | |||
1918 | }; | 1937 | }; |
1919 | //FIXME: better size estimate | 1938 | //FIXME: better size estimate |
1920 | struct client_key_exchange *record = tls_get_zeroed_outbuf(tls, sizeof(*record)); | 1939 | struct client_key_exchange *record = tls_get_zeroed_outbuf(tls, sizeof(*record)); |
1921 | uint8_t rsa_premaster[RSA_PREMASTER_SIZE]; | 1940 | uint8_t premaster[RSA_PREMASTER_SIZE > EC_CURVE_KEYSIZE ? RSA_PREMASTER_SIZE : EC_CURVE_KEYSIZE]; |
1922 | uint8_t x25519_premaster[CURVE25519_KEYSIZE]; | ||
1923 | uint8_t *premaster; | ||
1924 | int premaster_size; | 1941 | int premaster_size; |
1925 | int len; | 1942 | int len; |
1926 | 1943 | ||
@@ -1929,19 +1946,19 @@ static void send_client_key_exchange(tls_state_t *tls) | |||
1929 | if (!(tls->flags & GOT_CERT_RSA_KEY_ALG)) | 1946 | if (!(tls->flags & GOT_CERT_RSA_KEY_ALG)) |
1930 | bb_simple_error_msg("server cert is not RSA"); | 1947 | bb_simple_error_msg("server cert is not RSA"); |
1931 | 1948 | ||
1932 | tls_get_random(rsa_premaster, sizeof(rsa_premaster)); | 1949 | tls_get_random(premaster, RSA_PREMASTER_SIZE); |
1933 | if (TLS_DEBUG_FIXED_SECRETS) | 1950 | if (TLS_DEBUG_FIXED_SECRETS) |
1934 | memset(rsa_premaster, 0x44, sizeof(rsa_premaster)); | 1951 | memset(premaster, 0x44, RSA_PREMASTER_SIZE); |
1935 | // RFC 5246 | 1952 | // RFC 5246 |
1936 | // "Note: The version number in the PreMasterSecret is the version | 1953 | // "Note: The version number in the PreMasterSecret is the version |
1937 | // offered by the client in the ClientHello.client_version, not the | 1954 | // offered by the client in the ClientHello.client_version, not the |
1938 | // version negotiated for the connection." | 1955 | // version negotiated for the connection." |
1939 | rsa_premaster[0] = TLS_MAJ; | 1956 | premaster[0] = TLS_MAJ; |
1940 | rsa_premaster[1] = TLS_MIN; | 1957 | premaster[1] = TLS_MIN; |
1941 | dump_hex("premaster:%s\n", rsa_premaster, sizeof(rsa_premaster)); | 1958 | dump_hex("premaster:%s\n", premaster, sizeof(premaster)); |
1942 | len = psRsaEncryptPub(/*pool:*/ NULL, | 1959 | len = psRsaEncryptPub(/*pool:*/ NULL, |
1943 | /* psRsaKey_t* */ &tls->hsd->server_rsa_pub_key, | 1960 | /* psRsaKey_t* */ &tls->hsd->server_rsa_pub_key, |
1944 | rsa_premaster, /*inlen:*/ sizeof(rsa_premaster), | 1961 | premaster, /*inlen:*/ RSA_PREMASTER_SIZE, |
1945 | record->key + 2, sizeof(record->key) - 2, | 1962 | record->key + 2, sizeof(record->key) - 2, |
1946 | data_param_ignored | 1963 | data_param_ignored |
1947 | ); | 1964 | ); |
@@ -1949,10 +1966,10 @@ static void send_client_key_exchange(tls_state_t *tls) | |||
1949 | record->key[0] = len >> 8; | 1966 | record->key[0] = len >> 8; |
1950 | record->key[1] = len & 0xff; | 1967 | record->key[1] = len & 0xff; |
1951 | len += 2; | 1968 | len += 2; |
1952 | premaster = rsa_premaster; | 1969 | premaster_size = RSA_PREMASTER_SIZE; |
1953 | premaster_size = sizeof(rsa_premaster); | 1970 | } else /* ECDHE */ |
1954 | } else { | 1971 | if (tls->flags & GOT_EC_CURVE_X25519) { |
1955 | /* ECDHE */ | 1972 | /* ECDHE, curve x25519 */ |
1956 | static const uint8_t basepoint9[CURVE25519_KEYSIZE] ALIGN8 = {9}; | 1973 | static const uint8_t basepoint9[CURVE25519_KEYSIZE] ALIGN8 = {9}; |
1957 | uint8_t privkey[CURVE25519_KEYSIZE]; //[32] | 1974 | uint8_t privkey[CURVE25519_KEYSIZE]; //[32] |
1958 | 1975 | ||
@@ -1969,13 +1986,27 @@ static void send_client_key_exchange(tls_state_t *tls) | |||
1969 | 1986 | ||
1970 | /* Compute premaster using peer's public key */ | 1987 | /* Compute premaster using peer's public key */ |
1971 | dbg("computing x25519_premaster\n"); | 1988 | dbg("computing x25519_premaster\n"); |
1972 | curve25519(x25519_premaster, privkey, tls->hsd->ecc_pub_key32); | 1989 | curve25519(premaster, privkey, tls->hsd->ecc_pub_key32); |
1973 | 1990 | ||
1974 | len = CURVE25519_KEYSIZE; | 1991 | len = CURVE25519_KEYSIZE; |
1975 | record->key[0] = len; | 1992 | record->key[0] = len; |
1976 | len++; | 1993 | len++; |
1977 | premaster = x25519_premaster; | 1994 | premaster_size = CURVE25519_KEYSIZE; |
1978 | premaster_size = sizeof(x25519_premaster); | 1995 | } else { |
1996 | /* ECDHE, curve P256 */ | ||
1997 | if (!(tls->flags & GOT_EC_KEY)) | ||
1998 | bb_simple_error_msg_and_die("server did not provide EC key"); | ||
1999 | |||
2000 | dbg("computing P256_premaster\n"); | ||
2001 | curve_P256_compute_pubkey_and_premaster( | ||
2002 | record->key + 2, premaster, | ||
2003 | /*point:*/ tls->hsd->ecc_pub_key32 | ||
2004 | ); | ||
2005 | premaster_size = P256_KEYSIZE; | ||
2006 | len = 1 + P256_KEYSIZE * 2; | ||
2007 | record->key[0] = len; | ||
2008 | record->key[1] = 4; | ||
2009 | len++; | ||
1979 | } | 2010 | } |
1980 | 2011 | ||
1981 | record->type = HANDSHAKE_CLIENT_KEY_EXCHANGE; | 2012 | record->type = HANDSHAKE_CLIENT_KEY_EXCHANGE; |
diff --git a/networking/tls.h b/networking/tls.h index d4ac1bef8..e1afb7ea8 100644 --- a/networking/tls.h +++ b/networking/tls.h | |||
@@ -106,3 +106,11 @@ void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC; | |||
106 | #include "tls_aesgcm.h" | 106 | #include "tls_aesgcm.h" |
107 | #include "tls_rsa.h" | 107 | #include "tls_rsa.h" |
108 | #include "tls_fe.h" | 108 | #include "tls_fe.h" |
109 | |||
110 | #define EC_CURVE_KEYSIZE 32 | ||
111 | #define P256_KEYSIZE 32 | ||
112 | #define CURVE25519_KEYSIZE 32 | ||
113 | |||
114 | void curve_P256_compute_pubkey_and_premaster( | ||
115 | uint8_t *pubkey, uint8_t *premaster, | ||
116 | const uint8_t *peerkey32) FAST_FUNC; | ||
diff --git a/networking/tls_fe.h b/networking/tls_fe.h index fe8cff228..2859c9d2d 100644 --- a/networking/tls_fe.h +++ b/networking/tls_fe.h | |||
@@ -3,5 +3,4 @@ | |||
3 | * | 3 | * |
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | 4 | * Licensed under GPLv2, see file LICENSE in this source tree. |
5 | */ | 5 | */ |
6 | #define CURVE25519_KEYSIZE 32 | ||
7 | void curve25519(uint8_t *result, const uint8_t *e, const uint8_t *q) FAST_FUNC; | 6 | void curve25519(uint8_t *result, const uint8_t *e, const uint8_t *q) FAST_FUNC; |
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c new file mode 100644 index 000000000..e7667de73 --- /dev/null +++ b/networking/tls_sp_c32.c | |||
@@ -0,0 +1,1052 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2021 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | #define SP_DEBUG 0 | ||
9 | #define FIXED_SECRET 0 | ||
10 | #define FIXED_PEER_PUBKEY 0 | ||
11 | |||
12 | #if SP_DEBUG | ||
13 | # define dbg(...) fprintf(stderr, __VA_ARGS__) | ||
14 | static void dump_hex(const char *fmt, const void *vp, int len) | ||
15 | { | ||
16 | char hexbuf[32 * 1024 + 4]; | ||
17 | const uint8_t *p = vp; | ||
18 | |||
19 | bin2hex(hexbuf, (void*)p, len)[0] = '\0'; | ||
20 | dbg(fmt, hexbuf); | ||
21 | } | ||
22 | #else | ||
23 | # define dbg(...) ((void)0) | ||
24 | # define dump_hex(...) ((void)0) | ||
25 | #endif | ||
26 | |||
27 | #undef DIGIT_BIT | ||
28 | #define DIGIT_BIT 32 | ||
29 | typedef int32_t sp_digit; | ||
30 | |||
31 | /* The code below is taken from parts of | ||
32 | * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c | ||
33 | * and heavily modified. | ||
34 | * Header comment is kept intact: | ||
35 | */ | ||
36 | |||
37 | /* sp.c | ||
38 | * | ||
39 | * Copyright (C) 2006-2018 wolfSSL Inc. | ||
40 | * | ||
41 | * This file is part of wolfSSL. | ||
42 | * | ||
43 | * wolfSSL is free software; you can redistribute it and/or modify | ||
44 | * it under the terms of the GNU General Public License as published by | ||
45 | * the Free Software Foundation; either version 2 of the License, or | ||
46 | * (at your option) any later version. | ||
47 | * | ||
48 | * wolfSSL is distributed in the hope that it will be useful, | ||
49 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
50 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
51 | * GNU General Public License for more details. | ||
52 | * | ||
53 | * You should have received a copy of the GNU General Public License | ||
54 | * along with this program; if not, write to the Free Software | ||
55 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA | ||
56 | */ | ||
57 | |||
58 | /* Implementation by Sean Parkinson. */ | ||
59 | |||
60 | /* Point structure to use. */ | ||
61 | typedef struct sp_point { | ||
62 | sp_digit x[2 * 10]; | ||
63 | sp_digit y[2 * 10]; | ||
64 | sp_digit z[2 * 10]; | ||
65 | int infinity; | ||
66 | } sp_point; | ||
67 | |||
68 | /* The modulus (prime) of the curve P256. */ | ||
69 | static const sp_digit p256_mod[10] = { | ||
70 | 0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000, | ||
71 | 0x0000000,0x0000000,0x0000400,0x3ff0000,0x03fffff, | ||
72 | }; | ||
73 | |||
74 | #define p256_mp_mod ((sp_digit)0x000001) | ||
75 | |||
76 | /* Mask for address to obfuscate which of the two address will be used. */ | ||
77 | static const size_t addr_mask[2] = { 0, (size_t)-1 }; | ||
78 | |||
79 | /* The base point of curve P256. */ | ||
80 | static const sp_point p256_base = { | ||
81 | /* X ordinate */ | ||
82 | { 0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,0x1091fe2,0x1f2e12c,0x01ac5f4 }, | ||
83 | /* Y ordinate */ | ||
84 | { 0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,0x1fe6e3b,0x2e2fe1a,0x013f8d0 }, | ||
85 | /* Z ordinate */ | ||
86 | { 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000 }, | ||
87 | /* infinity */ | ||
88 | 0 | ||
89 | }; | ||
90 | |||
91 | /* Write r as big endian to byte aray. | ||
92 | * Fixed length number of bytes written: 32 | ||
93 | * | ||
94 | * r A single precision integer. | ||
95 | * a Byte array. | ||
96 | */ | ||
97 | static void sp_256_to_bin(sp_digit* r, uint8_t* a) | ||
98 | { | ||
99 | int i, j, s = 0, b; | ||
100 | |||
101 | for (i = 0; i < 9; i++) { | ||
102 | r[i+1] += r[i] >> 26; | ||
103 | r[i] &= 0x3ffffff; | ||
104 | } | ||
105 | j = 256 / 8 - 1; | ||
106 | a[j] = 0; | ||
107 | for (i=0; i<10 && j>=0; i++) { | ||
108 | b = 0; | ||
109 | a[j--] |= r[i] << s; b += 8 - s; | ||
110 | if (j < 0) | ||
111 | break; | ||
112 | while (b < 26) { | ||
113 | a[j--] = r[i] >> b; b += 8; | ||
114 | if (j < 0) | ||
115 | break; | ||
116 | } | ||
117 | s = 8 - (b - 26); | ||
118 | if (j >= 0) | ||
119 | a[j] = 0; | ||
120 | if (s != 0) | ||
121 | j++; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* Read big endian unsigned byte aray into r. | ||
126 | * | ||
127 | * r A single precision integer. | ||
128 | * a Byte array. | ||
129 | * n Number of bytes in array to read. | ||
130 | */ | ||
131 | static void sp_256_from_bin(sp_digit* r, int max, const uint8_t* a, int n) | ||
132 | { | ||
133 | int i, j = 0, s = 0; | ||
134 | |||
135 | r[0] = 0; | ||
136 | for (i = n-1; i >= 0; i--) { | ||
137 | r[j] |= ((sp_digit)a[i]) << s; | ||
138 | if (s >= 18) { | ||
139 | r[j] &= 0x3ffffff; | ||
140 | s = 26 - s; | ||
141 | if (j + 1 >= max) | ||
142 | break; | ||
143 | r[++j] = a[i] >> s; | ||
144 | s = 8 - s; | ||
145 | } | ||
146 | else | ||
147 | s += 8; | ||
148 | } | ||
149 | |||
150 | for (j++; j < max; j++) | ||
151 | r[j] = 0; | ||
152 | } | ||
153 | |||
154 | /* Convert a point of big-endian 32-byte x,y pair to type sp_point. */ | ||
155 | static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32) | ||
156 | { | ||
157 | memset(p, 0, sizeof(*p)); | ||
158 | /*p->infinity = 0;*/ | ||
159 | sp_256_from_bin(p->x, 2 * 10, bin2x32, 32); | ||
160 | sp_256_from_bin(p->y, 2 * 10, bin2x32 + 32, 32); | ||
161 | //static const uint8_t one[1] = { 1 }; | ||
162 | //sp_256_from_bin(p->z, 2 * 10, one, 1); | ||
163 | p->z[0] = 1; | ||
164 | } | ||
165 | |||
166 | /* Compare a with b in constant time. | ||
167 | * | ||
168 | * a A single precision integer. | ||
169 | * b A single precision integer. | ||
170 | * return -ve, 0 or +ve if a is less than, equal to or greater than b | ||
171 | * respectively. | ||
172 | */ | ||
173 | static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b) | ||
174 | { | ||
175 | sp_digit r = 0; | ||
176 | int i; | ||
177 | for (i = 9; i >= 0; i--) | ||
178 | r |= (a[i] - b[i]) & (0 - !r); | ||
179 | return r; | ||
180 | } | ||
181 | |||
182 | /* Compare two numbers to determine if they are equal. | ||
183 | * | ||
184 | * a First number to compare. | ||
185 | * b Second number to compare. | ||
186 | * return 1 when equal and 0 otherwise. | ||
187 | */ | ||
188 | static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b) | ||
189 | { | ||
190 | #if 1 | ||
191 | sp_digit r = 0; | ||
192 | int i; | ||
193 | for (i = 0; i < 10; i++) | ||
194 | r |= (a[i] ^ b[i]); | ||
195 | return r == 0; | ||
196 | #else | ||
197 | return sp_256_cmp_10(a, b) == 0; | ||
198 | #endif | ||
199 | } | ||
200 | |||
201 | /* Normalize the values in each word to 26. | ||
202 | * | ||
203 | * a Array of sp_digit to normalize. | ||
204 | */ | ||
205 | static void sp_256_norm_10(sp_digit* a) | ||
206 | { | ||
207 | int i; | ||
208 | for (i = 0; i < 9; i++) { | ||
209 | a[i+1] += a[i] >> 26; | ||
210 | a[i] &= 0x3ffffff; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | /* Add b to a into r. (r = a + b) | ||
215 | * | ||
216 | * r A single precision integer. | ||
217 | * a A single precision integer. | ||
218 | * b A single precision integer. | ||
219 | */ | ||
220 | static void sp_256_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b) | ||
221 | { | ||
222 | int i; | ||
223 | for (i = 0; i < 10; i++) | ||
224 | r[i] = a[i] + b[i]; | ||
225 | } | ||
226 | |||
227 | /* Conditionally add a and b using the mask m. | ||
228 | * m is -1 to add and 0 when not. | ||
229 | * | ||
230 | * r A single precision number representing conditional add result. | ||
231 | * a A single precision number to add with. | ||
232 | * b A single precision number to add. | ||
233 | * m Mask value to apply. | ||
234 | */ | ||
235 | static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a, | ||
236 | const sp_digit* b, const sp_digit m) | ||
237 | { | ||
238 | int i; | ||
239 | for (i = 0; i < 10; i++) | ||
240 | r[i] = a[i] + (b[i] & m); | ||
241 | } | ||
242 | |||
243 | /* Conditionally subtract b from a using the mask m. | ||
244 | * m is -1 to subtract and 0 when not. | ||
245 | * | ||
246 | * r A single precision number representing condition subtract result. | ||
247 | * a A single precision number to subtract from. | ||
248 | * b A single precision number to subtract. | ||
249 | * m Mask value to apply. | ||
250 | */ | ||
251 | static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a, | ||
252 | const sp_digit* b, const sp_digit m) | ||
253 | { | ||
254 | int i; | ||
255 | for (i = 0; i < 10; i++) | ||
256 | r[i] = a[i] - (b[i] & m); | ||
257 | } | ||
258 | |||
259 | /* Add 1 to a. (a = a + 1) | ||
260 | * | ||
261 | * r A single precision integer. | ||
262 | * a A single precision integer. | ||
263 | */ | ||
264 | static void sp_256_add_one_10(sp_digit* a) | ||
265 | { | ||
266 | a[0]++; | ||
267 | sp_256_norm_10(a); | ||
268 | } | ||
269 | |||
270 | /* Shift number left one bit. | ||
271 | * Bottom bit is lost. | ||
272 | * | ||
273 | * r Result of shift. | ||
274 | * a Number to shift. | ||
275 | */ | ||
276 | static void sp_256_rshift1_10(sp_digit* r, sp_digit* a) | ||
277 | { | ||
278 | int i; | ||
279 | for (i = 0; i < 9; i++) | ||
280 | r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff; | ||
281 | r[9] = a[9] >> 1; | ||
282 | } | ||
283 | |||
284 | /* Multiply a number by Montogmery normalizer mod modulus (prime). | ||
285 | * | ||
286 | * r The resulting Montgomery form number. | ||
287 | * a The number to convert. | ||
288 | */ | ||
289 | static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a) | ||
290 | { | ||
291 | int64_t t[8]; | ||
292 | int64_t a32[8]; | ||
293 | int64_t o; | ||
294 | |||
295 | a32[0] = a[0]; | ||
296 | a32[0] |= a[1] << 26; | ||
297 | a32[0] &= 0xffffffff; | ||
298 | a32[1] = (sp_digit)(a[1] >> 6); | ||
299 | a32[1] |= a[2] << 20; | ||
300 | a32[1] &= 0xffffffff; | ||
301 | a32[2] = (sp_digit)(a[2] >> 12); | ||
302 | a32[2] |= a[3] << 14; | ||
303 | a32[2] &= 0xffffffff; | ||
304 | a32[3] = (sp_digit)(a[3] >> 18); | ||
305 | a32[3] |= a[4] << 8; | ||
306 | a32[3] &= 0xffffffff; | ||
307 | a32[4] = (sp_digit)(a[4] >> 24); | ||
308 | a32[4] |= a[5] << 2; | ||
309 | a32[4] |= a[6] << 28; | ||
310 | a32[4] &= 0xffffffff; | ||
311 | a32[5] = (sp_digit)(a[6] >> 4); | ||
312 | a32[5] |= a[7] << 22; | ||
313 | a32[5] &= 0xffffffff; | ||
314 | a32[6] = (sp_digit)(a[7] >> 10); | ||
315 | a32[6] |= a[8] << 16; | ||
316 | a32[6] &= 0xffffffff; | ||
317 | a32[7] = (sp_digit)(a[8] >> 16); | ||
318 | a32[7] |= a[9] << 10; | ||
319 | a32[7] &= 0xffffffff; | ||
320 | |||
321 | /* 1 1 0 -1 -1 -1 -1 0 */ | ||
322 | t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; | ||
323 | /* 0 1 1 0 -1 -1 -1 -1 */ | ||
324 | t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; | ||
325 | /* 0 0 1 1 0 -1 -1 -1 */ | ||
326 | t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; | ||
327 | /* -1 -1 0 2 2 1 0 -1 */ | ||
328 | t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; | ||
329 | /* 0 -1 -1 0 2 2 1 0 */ | ||
330 | t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; | ||
331 | /* 0 0 -1 -1 0 2 2 1 */ | ||
332 | t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; | ||
333 | /* -1 -1 0 0 0 1 3 2 */ | ||
334 | t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; | ||
335 | /* 1 0 -1 -1 -1 -1 0 3 */ | ||
336 | t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; | ||
337 | |||
338 | t[1] += t[0] >> 32; t[0] &= 0xffffffff; | ||
339 | t[2] += t[1] >> 32; t[1] &= 0xffffffff; | ||
340 | t[3] += t[2] >> 32; t[2] &= 0xffffffff; | ||
341 | t[4] += t[3] >> 32; t[3] &= 0xffffffff; | ||
342 | t[5] += t[4] >> 32; t[4] &= 0xffffffff; | ||
343 | t[6] += t[5] >> 32; t[5] &= 0xffffffff; | ||
344 | t[7] += t[6] >> 32; t[6] &= 0xffffffff; | ||
345 | o = t[7] >> 32; t[7] &= 0xffffffff; | ||
346 | t[0] += o; | ||
347 | t[3] -= o; | ||
348 | t[6] -= o; | ||
349 | t[7] += o; | ||
350 | t[1] += t[0] >> 32; t[0] &= 0xffffffff; | ||
351 | t[2] += t[1] >> 32; t[1] &= 0xffffffff; | ||
352 | t[3] += t[2] >> 32; t[2] &= 0xffffffff; | ||
353 | t[4] += t[3] >> 32; t[3] &= 0xffffffff; | ||
354 | t[5] += t[4] >> 32; t[4] &= 0xffffffff; | ||
355 | t[6] += t[5] >> 32; t[5] &= 0xffffffff; | ||
356 | t[7] += t[6] >> 32; t[6] &= 0xffffffff; | ||
357 | |||
358 | r[0] = (sp_digit)(t[0]) & 0x3ffffff; | ||
359 | r[1] = (sp_digit)(t[0] >> 26); | ||
360 | r[1] |= t[1] << 6; | ||
361 | r[1] &= 0x3ffffff; | ||
362 | r[2] = (sp_digit)(t[1] >> 20); | ||
363 | r[2] |= t[2] << 12; | ||
364 | r[2] &= 0x3ffffff; | ||
365 | r[3] = (sp_digit)(t[2] >> 14); | ||
366 | r[3] |= t[3] << 18; | ||
367 | r[3] &= 0x3ffffff; | ||
368 | r[4] = (sp_digit)(t[3] >> 8); | ||
369 | r[4] |= t[4] << 24; | ||
370 | r[4] &= 0x3ffffff; | ||
371 | r[5] = (sp_digit)(t[4] >> 2) & 0x3ffffff; | ||
372 | r[6] = (sp_digit)(t[4] >> 28); | ||
373 | r[6] |= t[5] << 4; | ||
374 | r[6] &= 0x3ffffff; | ||
375 | r[7] = (sp_digit)(t[5] >> 22); | ||
376 | r[7] |= t[6] << 10; | ||
377 | r[7] &= 0x3ffffff; | ||
378 | r[8] = (sp_digit)(t[6] >> 16); | ||
379 | r[8] |= t[7] << 16; | ||
380 | r[8] &= 0x3ffffff; | ||
381 | r[9] = (sp_digit)(t[7] >> 10); | ||
382 | } | ||
383 | |||
384 | /* Mul a by scalar b and add into r. (r += a * b) | ||
385 | * | ||
386 | * r A single precision integer. | ||
387 | * a A single precision integer. | ||
388 | * b A scalar. | ||
389 | */ | ||
390 | static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, | ||
391 | const sp_digit b) | ||
392 | { | ||
393 | int64_t tb = b; | ||
394 | int64_t t = 0; | ||
395 | int i; | ||
396 | |||
397 | for (i = 0; i < 10; i++) { | ||
398 | t += (tb * a[i]) + r[i]; | ||
399 | r[i] = t & 0x3ffffff; | ||
400 | t >>= 26; | ||
401 | } | ||
402 | r[10] += t; | ||
403 | } | ||
404 | |||
405 | /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) | ||
406 | * | ||
407 | * r Result of division by 2. | ||
408 | * a Number to divide. | ||
409 | * m Modulus (prime). | ||
410 | */ | ||
411 | static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m) | ||
412 | { | ||
413 | sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1)); | ||
414 | sp_256_norm_10(r); | ||
415 | sp_256_rshift1_10(r, r); | ||
416 | } | ||
417 | |||
418 | /* Shift the result in the high 256 bits down to the bottom. | ||
419 | * | ||
420 | * r A single precision number. | ||
421 | * a A single precision number. | ||
422 | */ | ||
423 | static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a) | ||
424 | { | ||
425 | int i; | ||
426 | sp_digit n, s; | ||
427 | |||
428 | s = a[10]; | ||
429 | n = a[9] >> 22; | ||
430 | for (i = 0; i < 9; i++) { | ||
431 | n += (s & 0x3ffffff) << 4; | ||
432 | r[i] = n & 0x3ffffff; | ||
433 | n >>= 26; | ||
434 | s = a[11 + i] + (s >> 26); | ||
435 | } | ||
436 | n += s << 4; | ||
437 | r[9] = n; | ||
438 | memset(&r[10], 0, sizeof(*r) * 10); | ||
439 | } | ||
440 | |||
441 | /* Add two Montgomery form numbers (r = a + b % m). | ||
442 | * | ||
443 | * r Result of addition. | ||
444 | * a First number to add in Montogmery form. | ||
445 | * b Second number to add in Montogmery form. | ||
446 | * m Modulus (prime). | ||
447 | */ | ||
448 | static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b, | ||
449 | const sp_digit* m) | ||
450 | { | ||
451 | sp_256_add_10(r, a, b); | ||
452 | sp_256_norm_10(r); | ||
453 | sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0)); | ||
454 | sp_256_norm_10(r); | ||
455 | } | ||
456 | |||
457 | /* Double a Montgomery form number (r = a + a % m). | ||
458 | * | ||
459 | * r Result of doubling. | ||
460 | * a Number to double in Montogmery form. | ||
461 | * m Modulus (prime). | ||
462 | */ | ||
463 | static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) | ||
464 | { | ||
465 | sp_256_add_10(r, a, a); | ||
466 | sp_256_norm_10(r); | ||
467 | sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0)); | ||
468 | sp_256_norm_10(r); | ||
469 | } | ||
470 | |||
471 | /* Triple a Montgomery form number (r = a + a + a % m). | ||
472 | * | ||
473 | * r Result of Tripling. | ||
474 | * a Number to triple in Montogmery form. | ||
475 | * m Modulus (prime). | ||
476 | */ | ||
477 | static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m) | ||
478 | { | ||
479 | sp_256_add_10(r, a, a); | ||
480 | sp_256_norm_10(r); | ||
481 | sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0)); | ||
482 | sp_256_norm_10(r); | ||
483 | sp_256_add_10(r, r, a); | ||
484 | sp_256_norm_10(r); | ||
485 | sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0)); | ||
486 | sp_256_norm_10(r); | ||
487 | } | ||
488 | |||
489 | /* Sub b from a into r. (r = a - b) | ||
490 | * | ||
491 | * r A single precision integer. | ||
492 | * a A single precision integer. | ||
493 | * b A single precision integer. | ||
494 | */ | ||
495 | static void sp_256_sub_10(sp_digit* r, const sp_digit* a, | ||
496 | const sp_digit* b) | ||
497 | { | ||
498 | int i; | ||
499 | for (i = 0; i < 10; i++) | ||
500 | r[i] = a[i] - b[i]; | ||
501 | } | ||
502 | |||
503 | /* Subtract two Montgomery form numbers (r = a - b % m). | ||
504 | * | ||
505 | * r Result of subtration. | ||
506 | * a Number to subtract from in Montogmery form. | ||
507 | * b Number to subtract with in Montogmery form. | ||
508 | * m Modulus (prime). | ||
509 | */ | ||
510 | static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b, | ||
511 | const sp_digit* m) | ||
512 | { | ||
513 | sp_256_sub_10(r, a, b); | ||
514 | sp_256_cond_add_10(r, r, m, r[9] >> 22); | ||
515 | sp_256_norm_10(r); | ||
516 | } | ||
517 | |||
518 | /* Reduce the number back to 256 bits using Montgomery reduction. | ||
519 | * | ||
520 | * a A single precision number to reduce in place. | ||
521 | * m The single precision number representing the modulus. | ||
522 | * mp The digit representing the negative inverse of m mod 2^n. | ||
523 | */ | ||
524 | static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp) | ||
525 | { | ||
526 | int i; | ||
527 | sp_digit mu; | ||
528 | |||
529 | if (mp != 1) { | ||
530 | for (i = 0; i < 9; i++) { | ||
531 | mu = (a[i] * mp) & 0x3ffffff; | ||
532 | sp_256_mul_add_10(a+i, m, mu); | ||
533 | a[i+1] += a[i] >> 26; | ||
534 | } | ||
535 | mu = (a[i] * mp) & 0x3fffffl; | ||
536 | sp_256_mul_add_10(a+i, m, mu); | ||
537 | a[i+1] += a[i] >> 26; | ||
538 | a[i] &= 0x3ffffff; | ||
539 | } | ||
540 | else { | ||
541 | for (i = 0; i < 9; i++) { | ||
542 | mu = a[i] & 0x3ffffff; | ||
543 | sp_256_mul_add_10(a+i, p256_mod, mu); | ||
544 | a[i+1] += a[i] >> 26; | ||
545 | } | ||
546 | mu = a[i] & 0x3fffffl; | ||
547 | sp_256_mul_add_10(a+i, p256_mod, mu); | ||
548 | a[i+1] += a[i] >> 26; | ||
549 | a[i] &= 0x3ffffff; | ||
550 | } | ||
551 | |||
552 | sp_256_mont_shift_10(a, a); | ||
553 | sp_256_cond_sub_10(a, a, m, 0 - ((a[9] >> 22) > 0)); | ||
554 | sp_256_norm_10(a); | ||
555 | } | ||
556 | |||
557 | /* Multiply a and b into r. (r = a * b) | ||
558 | * | ||
559 | * r A single precision integer. | ||
560 | * a A single precision integer. | ||
561 | * b A single precision integer. | ||
562 | */ | ||
563 | static void sp_256_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b) | ||
564 | { | ||
565 | int i, j, k; | ||
566 | int64_t c; | ||
567 | |||
568 | c = ((int64_t)a[9]) * b[9]; | ||
569 | r[19] = (sp_digit)(c >> 26); | ||
570 | c = (c & 0x3ffffff) << 26; | ||
571 | for (k = 17; k >= 0; k--) { | ||
572 | for (i = 9; i >= 0; i--) { | ||
573 | j = k - i; | ||
574 | if (j >= 10) | ||
575 | break; | ||
576 | if (j < 0) | ||
577 | continue; | ||
578 | c += ((int64_t)a[i]) * b[j]; | ||
579 | } | ||
580 | r[k + 2] += c >> 52; | ||
581 | r[k + 1] = (c >> 26) & 0x3ffffff; | ||
582 | c = (c & 0x3ffffff) << 26; | ||
583 | } | ||
584 | r[0] = (sp_digit)(c >> 26); | ||
585 | } | ||
586 | |||
587 | /* Multiply two Montogmery form numbers mod the modulus (prime). | ||
588 | * (r = a * b mod m) | ||
589 | * | ||
590 | * r Result of multiplication. | ||
591 | * a First number to multiply in Montogmery form. | ||
592 | * b Second number to multiply in Montogmery form. | ||
593 | * m Modulus (prime). | ||
594 | * mp Montogmery mulitplier. | ||
595 | */ | ||
596 | static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b, | ||
597 | const sp_digit* m, sp_digit mp) | ||
598 | { | ||
599 | sp_256_mul_10(r, a, b); | ||
600 | sp_256_mont_reduce_10(r, m, mp); | ||
601 | } | ||
602 | |||
603 | /* Square a and put result in r. (r = a * a) | ||
604 | * | ||
605 | * r A single precision integer. | ||
606 | * a A single precision integer. | ||
607 | */ | ||
608 | static void sp_256_sqr_10(sp_digit* r, const sp_digit* a) | ||
609 | { | ||
610 | int i, j, k; | ||
611 | int64_t c; | ||
612 | |||
613 | c = ((int64_t)a[9]) * a[9]; | ||
614 | r[19] = (sp_digit)(c >> 26); | ||
615 | c = (c & 0x3ffffff) << 26; | ||
616 | for (k = 17; k >= 0; k--) { | ||
617 | for (i = 9; i >= 0; i--) { | ||
618 | j = k - i; | ||
619 | if (j >= 10 || i <= j) | ||
620 | break; | ||
621 | if (j < 0) | ||
622 | continue; | ||
623 | |||
624 | c += ((int64_t)a[i]) * a[j] * 2; | ||
625 | } | ||
626 | if (i == j) | ||
627 | c += ((int64_t)a[i]) * a[i]; | ||
628 | |||
629 | r[k + 2] += c >> 52; | ||
630 | r[k + 1] = (c >> 26) & 0x3ffffff; | ||
631 | c = (c & 0x3ffffff) << 26; | ||
632 | } | ||
633 | r[0] = (sp_digit)(c >> 26); | ||
634 | } | ||
635 | |||
636 | /* Square the Montgomery form number. (r = a * a mod m) | ||
637 | * | ||
638 | * r Result of squaring. | ||
639 | * a Number to square in Montogmery form. | ||
640 | * m Modulus (prime). | ||
641 | * mp Montogmery mulitplier. | ||
642 | */ | ||
643 | static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m, | ||
644 | sp_digit mp) | ||
645 | { | ||
646 | sp_256_sqr_10(r, a); | ||
647 | sp_256_mont_reduce_10(r, m, mp); | ||
648 | } | ||
649 | |||
650 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the | ||
651 | * P256 curve. (r = 1 / a mod m) | ||
652 | * | ||
653 | * r Inverse result. | ||
654 | * a Number to invert. | ||
655 | * td Temporary data. | ||
656 | */ | ||
657 | /* Mod-2 for the P256 curve. */ | ||
658 | static const uint32_t p256_mod_2[8] = { | ||
659 | 0xfffffffd,0xffffffff,0xffffffff,0x00000000, | ||
660 | 0x00000000,0x00000000,0x00000001,0xffffffff, | ||
661 | }; | ||
662 | static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a, sp_digit* td) | ||
663 | { | ||
664 | sp_digit* t = td; | ||
665 | int i; | ||
666 | |||
667 | memcpy(t, a, sizeof(sp_digit) * 10); | ||
668 | for (i = 254; i >= 0; i--) { | ||
669 | sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod); | ||
670 | if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32))) | ||
671 | sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod); | ||
672 | } | ||
673 | memcpy(r, t, sizeof(sp_digit) * 10); | ||
674 | } | ||
675 | |||
676 | /* Map the Montgomery form projective co-ordinate point to an affine point. | ||
677 | * | ||
678 | * r Resulting affine co-ordinate point. | ||
679 | * p Montgomery form projective co-ordinate point. | ||
680 | * t Temporary ordinate data. | ||
681 | */ | ||
682 | static void sp_256_map_10(sp_point* r, sp_point* p, sp_digit* t) | ||
683 | { | ||
684 | sp_digit* t1 = t; | ||
685 | sp_digit* t2 = t + 2*10; | ||
686 | int32_t n; | ||
687 | |||
688 | sp_256_mont_inv_10(t1, p->z, t + 2*10); | ||
689 | |||
690 | sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod); | ||
691 | sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod); | ||
692 | |||
693 | /* x /= z^2 */ | ||
694 | sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod); | ||
695 | memset(r->x + 10, 0, sizeof(r->x) / 2); | ||
696 | sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod); | ||
697 | /* Reduce x to less than modulus */ | ||
698 | n = sp_256_cmp_10(r->x, p256_mod); | ||
699 | sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - (n >= 0)); | ||
700 | sp_256_norm_10(r->x); | ||
701 | |||
702 | /* y /= z^3 */ | ||
703 | sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod); | ||
704 | memset(r->y + 10, 0, sizeof(r->y) / 2); | ||
705 | sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod); | ||
706 | /* Reduce y to less than modulus */ | ||
707 | n = sp_256_cmp_10(r->y, p256_mod); | ||
708 | sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - (n >= 0)); | ||
709 | sp_256_norm_10(r->y); | ||
710 | |||
711 | memset(r->z, 0, sizeof(r->z)); | ||
712 | r->z[0] = 1; | ||
713 | } | ||
714 | |||
715 | /* Double the Montgomery form projective point p. | ||
716 | * | ||
717 | * r Result of doubling point. | ||
718 | * p Point to double. | ||
719 | * t Temporary ordinate data. | ||
720 | */ | ||
721 | static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p, sp_digit* t) | ||
722 | { | ||
723 | sp_point *rp[2]; | ||
724 | sp_point tp; | ||
725 | sp_digit* t1 = t; | ||
726 | sp_digit* t2 = t + 2*10; | ||
727 | sp_digit* x; | ||
728 | sp_digit* y; | ||
729 | sp_digit* z; | ||
730 | int i; | ||
731 | |||
732 | /* When infinity don't double point passed in - constant time. */ | ||
733 | rp[0] = r; | ||
734 | rp[1] = &tp; | ||
735 | x = rp[p->infinity]->x; | ||
736 | y = rp[p->infinity]->y; | ||
737 | z = rp[p->infinity]->z; | ||
738 | /* Put point to double into result - good for infinity. */ | ||
739 | if (r != p) { | ||
740 | for (i = 0; i < 10; i++) | ||
741 | r->x[i] = p->x[i]; | ||
742 | for (i = 0; i < 10; i++) | ||
743 | r->y[i] = p->y[i]; | ||
744 | for (i = 0; i < 10; i++) | ||
745 | r->z[i] = p->z[i]; | ||
746 | r->infinity = p->infinity; | ||
747 | } | ||
748 | |||
749 | /* T1 = Z * Z */ | ||
750 | sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod); | ||
751 | /* Z = Y * Z */ | ||
752 | sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod); | ||
753 | /* Z = 2Z */ | ||
754 | sp_256_mont_dbl_10(z, z, p256_mod); | ||
755 | /* T2 = X - T1 */ | ||
756 | sp_256_mont_sub_10(t2, x, t1, p256_mod); | ||
757 | /* T1 = X + T1 */ | ||
758 | sp_256_mont_add_10(t1, x, t1, p256_mod); | ||
759 | /* T2 = T1 * T2 */ | ||
760 | sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod); | ||
761 | /* T1 = 3T2 */ | ||
762 | sp_256_mont_tpl_10(t1, t2, p256_mod); | ||
763 | /* Y = 2Y */ | ||
764 | sp_256_mont_dbl_10(y, y, p256_mod); | ||
765 | /* Y = Y * Y */ | ||
766 | sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod); | ||
767 | /* T2 = Y * Y */ | ||
768 | sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod); | ||
769 | /* T2 = T2/2 */ | ||
770 | sp_256_div2_10(t2, t2, p256_mod); | ||
771 | /* Y = Y * X */ | ||
772 | sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod); | ||
773 | /* X = T1 * T1 */ | ||
774 | sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod); | ||
775 | /* X = X - Y */ | ||
776 | sp_256_mont_sub_10(x, x, y, p256_mod); | ||
777 | /* X = X - Y */ | ||
778 | sp_256_mont_sub_10(x, x, y, p256_mod); | ||
779 | /* Y = Y - X */ | ||
780 | sp_256_mont_sub_10(y, y, x, p256_mod); | ||
781 | /* Y = Y * T1 */ | ||
782 | sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod); | ||
783 | /* Y = Y - T2 */ | ||
784 | sp_256_mont_sub_10(y, y, t2, p256_mod); | ||
785 | } | ||
786 | |||
787 | /* Add two Montgomery form projective points. | ||
788 | * | ||
789 | * r Result of addition. | ||
790 | * p Frist point to add. | ||
791 | * q Second point to add. | ||
792 | * t Temporary ordinate data. | ||
793 | */ | ||
794 | static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q, | ||
795 | sp_digit* t) | ||
796 | { | ||
797 | sp_point *ap[2]; | ||
798 | sp_point *rp[2]; | ||
799 | sp_point tp; | ||
800 | sp_digit* t1 = t; | ||
801 | sp_digit* t2 = t + 2*10; | ||
802 | sp_digit* t3 = t + 4*10; | ||
803 | sp_digit* t4 = t + 6*10; | ||
804 | sp_digit* t5 = t + 8*10; | ||
805 | sp_digit* x; | ||
806 | sp_digit* y; | ||
807 | sp_digit* z; | ||
808 | int i; | ||
809 | |||
810 | /* Ensure only the first point is the same as the result. */ | ||
811 | if (q == r) { | ||
812 | sp_point* a = p; | ||
813 | p = q; | ||
814 | q = a; | ||
815 | } | ||
816 | |||
817 | /* Check double */ | ||
818 | sp_256_sub_10(t1, p256_mod, q->y); | ||
819 | sp_256_norm_10(t1); | ||
820 | if (sp_256_cmp_equal_10(p->x, q->x) | ||
821 | & sp_256_cmp_equal_10(p->z, q->z) | ||
822 | & (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1)) | ||
823 | ) { | ||
824 | sp_256_proj_point_dbl_10(r, p, t); | ||
825 | } | ||
826 | else { | ||
827 | rp[0] = r; | ||
828 | rp[1] = &tp; | ||
829 | memset(&tp, 0, sizeof(tp)); | ||
830 | x = rp[p->infinity | q->infinity]->x; | ||
831 | y = rp[p->infinity | q->infinity]->y; | ||
832 | z = rp[p->infinity | q->infinity]->z; | ||
833 | |||
834 | ap[0] = p; | ||
835 | ap[1] = q; | ||
836 | for (i=0; i<10; i++) | ||
837 | r->x[i] = ap[p->infinity]->x[i]; | ||
838 | for (i=0; i<10; i++) | ||
839 | r->y[i] = ap[p->infinity]->y[i]; | ||
840 | for (i=0; i<10; i++) | ||
841 | r->z[i] = ap[p->infinity]->z[i]; | ||
842 | r->infinity = ap[p->infinity]->infinity; | ||
843 | |||
844 | /* U1 = X1*Z2^2 */ | ||
845 | sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod); | ||
846 | sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod); | ||
847 | sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod); | ||
848 | /* U2 = X2*Z1^2 */ | ||
849 | sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod); | ||
850 | sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod); | ||
851 | sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod); | ||
852 | /* S1 = Y1*Z2^3 */ | ||
853 | sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod); | ||
854 | /* S2 = Y2*Z1^3 */ | ||
855 | sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod); | ||
856 | /* H = U2 - U1 */ | ||
857 | sp_256_mont_sub_10(t2, t2, t1, p256_mod); | ||
858 | /* R = S2 - S1 */ | ||
859 | sp_256_mont_sub_10(t4, t4, t3, p256_mod); | ||
860 | /* Z3 = H*Z1*Z2 */ | ||
861 | sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod); | ||
862 | sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod); | ||
863 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ | ||
864 | sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod); | ||
865 | sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod); | ||
866 | sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod); | ||
867 | sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod); | ||
868 | sp_256_mont_sub_10(x, x, t5, p256_mod); | ||
869 | sp_256_mont_dbl_10(t1, y, p256_mod); | ||
870 | sp_256_mont_sub_10(x, x, t1, p256_mod); | ||
871 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | ||
872 | sp_256_mont_sub_10(y, y, x, p256_mod); | ||
873 | sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod); | ||
874 | sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod); | ||
875 | sp_256_mont_sub_10(y, y, t5, p256_mod); | ||
876 | } | ||
877 | } | ||
878 | |||
879 | /* Multiply the point by the scalar and return the result. | ||
880 | * If map is true then convert result to affine co-ordinates. | ||
881 | * | ||
882 | * r Resulting point. | ||
883 | * g Point to multiply. | ||
884 | * k Scalar to multiply by. | ||
885 | */ | ||
886 | static void sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k /*, int map*/) | ||
887 | { | ||
888 | enum { map = 1 }; /* we always convert result to affine coordinates */ | ||
889 | sp_point td[3]; | ||
890 | sp_point* t[3]; | ||
891 | sp_digit tmp[2 * 10 * 5]; | ||
892 | sp_digit n; | ||
893 | int i; | ||
894 | int c, y; | ||
895 | |||
896 | memset(td, 0, sizeof(td)); | ||
897 | |||
898 | t[0] = &td[0]; | ||
899 | t[1] = &td[1]; | ||
900 | t[2] = &td[2]; | ||
901 | |||
902 | /* t[0] = {0, 0, 1} * norm */ | ||
903 | t[0]->infinity = 1; | ||
904 | /* t[1] = {g->x, g->y, g->z} * norm */ | ||
905 | sp_256_mod_mul_norm_10(t[1]->x, g->x); | ||
906 | sp_256_mod_mul_norm_10(t[1]->y, g->y); | ||
907 | sp_256_mod_mul_norm_10(t[1]->z, g->z); | ||
908 | |||
909 | i = 9; | ||
910 | c = 22; | ||
911 | n = k[i--] << (26 - c); | ||
912 | for (; ; c--) { | ||
913 | if (c == 0) { | ||
914 | if (i == -1) | ||
915 | break; | ||
916 | |||
917 | n = k[i--]; | ||
918 | c = 26; | ||
919 | } | ||
920 | |||
921 | y = (n >> 25) & 1; | ||
922 | n <<= 1; | ||
923 | |||
924 | sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp); | ||
925 | ///FIXME type (or rewrite - get rid of t[] array) | ||
926 | memcpy(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + | ||
927 | ((size_t)t[1] & addr_mask[y])), | ||
928 | sizeof(sp_point)); | ||
929 | sp_256_proj_point_dbl_10(t[2], t[2], tmp); | ||
930 | memcpy((void*)(((size_t)t[0] & addr_mask[y^1]) + | ||
931 | ((size_t)t[1] & addr_mask[y])), t[2], | ||
932 | sizeof(sp_point)); | ||
933 | } | ||
934 | |||
935 | if (map) | ||
936 | sp_256_map_10(r, t[0], tmp); | ||
937 | else | ||
938 | memcpy(r, t[0], sizeof(sp_point)); | ||
939 | |||
940 | memset(tmp, 0, sizeof(tmp)); | ||
941 | memset(td, 0, sizeof(td)); | ||
942 | } | ||
943 | |||
944 | /* Multiply the base point of P256 by the scalar and return the result. | ||
945 | * If map is true then convert result to affine co-ordinates. | ||
946 | * | ||
947 | * r Resulting point. | ||
948 | * k Scalar to multiply by. | ||
949 | */ | ||
950 | static void sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k /*, int map*/) | ||
951 | { | ||
952 | sp_256_ecc_mulmod_10(r, &p256_base, k /*, map*/); | ||
953 | } | ||
954 | |||
955 | /* Multiply the point by the scalar and serialize the X ordinate. | ||
956 | * The number is 0 padded to maximum size on output. | ||
957 | * | ||
958 | * priv Scalar to multiply the point by. | ||
959 | * peerkey2x32 Point to multiply. | ||
960 | * out Buffer to hold X ordinate. | ||
961 | */ | ||
962 | static void sp_ecc_secret_gen_256(sp_digit priv[10], const uint8_t *peerkey2x32, uint8_t* out32) | ||
963 | { | ||
964 | sp_point point[1]; | ||
965 | |||
966 | #if FIXED_PEER_PUBKEY | ||
967 | memset((void*)peerkey32, 0x55, 64); | ||
968 | #endif | ||
969 | dump_hex("peerkey32 %s\n", peerkey2x32, 32); | ||
970 | dump_hex(" %s\n", peerkey2x32 + 32, 32); | ||
971 | |||
972 | sp_256_point_from_bin2x32(point, peerkey2x32); | ||
973 | dump_hex("point->x %s\n", point->x, sizeof(point->x)); | ||
974 | dump_hex("point->y %s\n", point->y, sizeof(point->y)); | ||
975 | |||
976 | sp_256_ecc_mulmod_10(point, point, priv); | ||
977 | |||
978 | sp_256_to_bin(point->x, out32); | ||
979 | dump_hex("out32: %s\n", out32, 32); | ||
980 | } | ||
981 | |||
982 | /* Generates a scalar that is in the range 1..order-1. | ||
983 | * | ||
984 | * rng Random number generator. | ||
985 | * k Scalar value. | ||
986 | */ | ||
987 | static void sp_256_ecc_gen_k_10(sp_digit k[10]) | ||
988 | { | ||
989 | #define SIMPLIFY 1 | ||
990 | #if !SIMPLIFY | ||
991 | /* The order of the curve P256 minus 2. */ | ||
992 | static const sp_digit p256_order2[10] = { | ||
993 | 0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa, | ||
994 | 0x3ffffff,0x3ffffff,0x00003ff,0x3ff0000,0x03fffff, | ||
995 | }; | ||
996 | #endif | ||
997 | uint8_t buf[32]; | ||
998 | |||
999 | for (;;) { | ||
1000 | tls_get_random(buf, sizeof(buf)); | ||
1001 | #if FIXED_SECRET | ||
1002 | memset(buf, 0x77, sizeof(buf)); | ||
1003 | #endif | ||
1004 | sp_256_from_bin(k, 10, buf, sizeof(buf)); | ||
1005 | #if !SIMPLIFY | ||
1006 | if (sp_256_cmp_10(k, p256_order2) < 0) | ||
1007 | break; | ||
1008 | #else | ||
1009 | /* non-loopy version (and not needing p256_order2[]): | ||
1010 | * if most-significant word seems that it can be larger | ||
1011 | * than p256_order2, fix it up: | ||
1012 | */ | ||
1013 | if (k[9] >= 0x03fffff) | ||
1014 | k[9] = 0x03ffffe; | ||
1015 | break; | ||
1016 | #endif | ||
1017 | } | ||
1018 | sp_256_add_one_10(k); | ||
1019 | #undef SIMPLIFY | ||
1020 | } | ||
1021 | |||
1022 | /* Makes a random EC key pair. | ||
1023 | * | ||
1024 | * priv Generated private value. | ||
1025 | * pubkey Generated public point. | ||
1026 | */ | ||
1027 | static void sp_ecc_make_key_256(sp_digit k[10], uint8_t *pubkey) | ||
1028 | { | ||
1029 | sp_point point[1]; | ||
1030 | |||
1031 | sp_256_ecc_gen_k_10(k); | ||
1032 | sp_256_ecc_mulmod_base_10(point, k); | ||
1033 | sp_256_to_bin(point->x, pubkey); | ||
1034 | sp_256_to_bin(point->y, pubkey + 32); | ||
1035 | |||
1036 | memset(point, 0, sizeof(point)); //paranoia | ||
1037 | } | ||
1038 | |||
1039 | void FAST_FUNC curve_P256_compute_pubkey_and_premaster( | ||
1040 | uint8_t *pubkey, uint8_t *premaster32, | ||
1041 | const uint8_t *peerkey2x32) | ||
1042 | { | ||
1043 | sp_digit privkey[10]; | ||
1044 | |||
1045 | sp_ecc_make_key_256(privkey, pubkey); | ||
1046 | dump_hex("pubkey: %s\n", pubkey, 32); | ||
1047 | dump_hex(" %s\n", pubkey + 32, 32); | ||
1048 | |||
1049 | /* Combine our privkey and peerkey32 to generate premaster */ | ||
1050 | sp_ecc_secret_gen_256(privkey, /*x,y:*/peerkey2x32, premaster32); | ||
1051 | dump_hex("premaster: %s\n", premaster32, 32); | ||
1052 | } | ||