diff options
Diffstat (limited to 'networking')
-rw-r--r-- | networking/tls.c | 225 | ||||
-rw-r--r-- | networking/tls.h | 73 | ||||
-rw-r--r-- | networking/tls_pstm.c | 2254 | ||||
-rw-r--r-- | networking/tls_pstm.h | 238 | ||||
-rw-r--r-- | networking/tls_pstm_montgomery_reduce.c | 423 | ||||
-rw-r--r-- | networking/tls_pstm_mul_comba.c | 777 | ||||
-rw-r--r-- | networking/tls_pstm_sqr_comba.c | 1107 | ||||
-rw-r--r-- | networking/tls_rsa.c | 203 | ||||
-rw-r--r-- | networking/tls_rsa.h | 18 |
9 files changed, 5281 insertions, 37 deletions
diff --git a/networking/tls.c b/networking/tls.c index 69c81b558..b0a4f7e75 100644 --- a/networking/tls.c +++ b/networking/tls.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
3 | * | ||
4 | * Copyright (C) 2017 Denys Vlasenko | 2 | * Copyright (C) 2017 Denys Vlasenko |
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | 5 | */ |
6 | //config:config TLS | 6 | //config:config TLS |
7 | //config: bool "tls (debugging)" | 7 | //config: bool "tls (debugging)" |
@@ -10,6 +10,11 @@ | |||
10 | //applet:IF_TLS(APPLET(tls, BB_DIR_USR_BIN, BB_SUID_DROP)) | 10 | //applet:IF_TLS(APPLET(tls, BB_DIR_USR_BIN, BB_SUID_DROP)) |
11 | 11 | ||
12 | //kbuild:lib-$(CONFIG_TLS) += tls.o | 12 | //kbuild:lib-$(CONFIG_TLS) += tls.o |
13 | //kbuild:lib-$(CONFIG_TLS) += tls_pstm.o | ||
14 | //kbuild:lib-$(CONFIG_TLS) += tls_pstm_montgomery_reduce.o | ||
15 | //kbuild:lib-$(CONFIG_TLS) += tls_pstm_mul_comba.o | ||
16 | //kbuild:lib-$(CONFIG_TLS) += tls_pstm_sqr_comba.o | ||
17 | //kbuild:lib-$(CONFIG_TLS) += tls_rsa.o | ||
13 | ////kbuild:lib-$(CONFIG_TLS) += tls_ciphers.o | 18 | ////kbuild:lib-$(CONFIG_TLS) += tls_ciphers.o |
14 | ////kbuild:lib-$(CONFIG_TLS) += tls_aes.o | 19 | ////kbuild:lib-$(CONFIG_TLS) += tls_aes.o |
15 | ////kbuild:lib-$(CONFIG_TLS) += tls_aes_gcm.o | 20 | ////kbuild:lib-$(CONFIG_TLS) += tls_aes_gcm.o |
@@ -18,9 +23,7 @@ | |||
18 | //usage: "HOST[:PORT]" | 23 | //usage: "HOST[:PORT]" |
19 | //usage:#define tls_full_usage "\n\n" | 24 | //usage:#define tls_full_usage "\n\n" |
20 | 25 | ||
21 | #include "libbb.h" | 26 | #include "tls.h" |
22 | //#include "tls_cryptoapi.h" | ||
23 | //#include "tls_ciphers.h" | ||
24 | 27 | ||
25 | #if 1 | 28 | #if 1 |
26 | # define dbg(...) fprintf(stderr, __VA_ARGS__) | 29 | # define dbg(...) fprintf(stderr, __VA_ARGS__) |
@@ -28,23 +31,26 @@ | |||
28 | # define dbg(...) ((void)0) | 31 | # define dbg(...) ((void)0) |
29 | #endif | 32 | #endif |
30 | 33 | ||
31 | #define RECORD_TYPE_CHANGE_CIPHER_SPEC 20 | 34 | #define RECORD_TYPE_CHANGE_CIPHER_SPEC 20 |
32 | #define RECORD_TYPE_ALERT 21 | 35 | #define RECORD_TYPE_ALERT 21 |
33 | #define RECORD_TYPE_HANDSHAKE 22 | 36 | #define RECORD_TYPE_HANDSHAKE 22 |
34 | #define RECORD_TYPE_APPLICATION_DATA 23 | 37 | #define RECORD_TYPE_APPLICATION_DATA 23 |
35 | 38 | ||
36 | #define HANDSHAKE_HELLO_REQUEST 0 | 39 | #define HANDSHAKE_HELLO_REQUEST 0 |
37 | #define HANDSHAKE_CLIENT_HELLO 1 | 40 | #define HANDSHAKE_CLIENT_HELLO 1 |
38 | #define HANDSHAKE_SERVER_HELLO 2 | 41 | #define HANDSHAKE_SERVER_HELLO 2 |
39 | #define HANDSHAKE_HELLO_VERIFY_REQUEST 3 | 42 | #define HANDSHAKE_HELLO_VERIFY_REQUEST 3 |
40 | #define HANDSHAKE_NEW_SESSION_TICKET 4 | 43 | #define HANDSHAKE_NEW_SESSION_TICKET 4 |
41 | #define HANDSHAKE_CERTIFICATE 11 | 44 | #define HANDSHAKE_CERTIFICATE 11 |
42 | #define HANDSHAKE_SERVER_KEY_EXCHANGE 12 | 45 | #define HANDSHAKE_SERVER_KEY_EXCHANGE 12 |
43 | #define HANDSHAKE_CERTIFICATE_REQUEST 13 | 46 | #define HANDSHAKE_CERTIFICATE_REQUEST 13 |
44 | #define HANDSHAKE_SERVER_HELLO_DONE 14 | 47 | #define HANDSHAKE_SERVER_HELLO_DONE 14 |
45 | #define HANDSHAKE_CERTIFICATE_VERIFY 15 | 48 | #define HANDSHAKE_CERTIFICATE_VERIFY 15 |
46 | #define HANDSHAKE_CLIENT_KEY_EXCHANGE 16 | 49 | #define HANDSHAKE_CLIENT_KEY_EXCHANGE 16 |
47 | #define HANDSHAKE_FINISHED 20 | 50 | #define HANDSHAKE_FINISHED 20 |
51 | |||
52 | #define SSL_HS_RANDOM_SIZE 32 | ||
53 | #define SSL_HS_RSA_PREMASTER_SIZE 48 | ||
48 | 54 | ||
49 | #define SSL_NULL_WITH_NULL_NULL 0x0000 | 55 | #define SSL_NULL_WITH_NULL_NULL 0x0000 |
50 | #define SSL_RSA_WITH_NULL_MD5 0x0001 | 56 | #define SSL_RSA_WITH_NULL_MD5 0x0001 |
@@ -112,6 +118,7 @@ | |||
112 | //TLS 1.2 | 118 | //TLS 1.2 |
113 | #define TLS_MAJ 3 | 119 | #define TLS_MAJ 3 |
114 | #define TLS_MIN 3 | 120 | #define TLS_MIN 3 |
121 | //#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA // ok, recvs SERVER_KEY_EXCHANGE *** matrixssl uses this on my box | ||
115 | //#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE | 122 | //#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE |
116 | // All GCMs: | 123 | // All GCMs: |
117 | //#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE | 124 | //#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE |
@@ -123,9 +130,9 @@ | |||
123 | //#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 | 130 | //#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 |
124 | //#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE | 131 | //#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE |
125 | //#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE | 132 | //#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE |
126 | #define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE | 133 | #define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE *** select this? |
127 | //#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE | 134 | //#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE |
128 | // (tested b/c this one doesn't req server certs... no luck) | 135 | //^^^^^^^^^^^^^^^^^^^^^^^ (tested b/c this one doesn't req server certs... no luck) |
129 | //test TLS_RSA_WITH_AES_128_CBC_SHA, in tls 1.2 it's mandated to be always supported | 136 | //test TLS_RSA_WITH_AES_128_CBC_SHA, in tls 1.2 it's mandated to be always supported |
130 | 137 | ||
131 | struct record_hdr { | 138 | struct record_hdr { |
@@ -137,8 +144,7 @@ struct record_hdr { | |||
137 | typedef struct tls_state { | 144 | typedef struct tls_state { |
138 | int fd; | 145 | int fd; |
139 | 146 | ||
140 | uint8_t *pubkey; | 147 | psRsaKey_t server_rsa_pub_key; |
141 | int pubkey_len; | ||
142 | 148 | ||
143 | // RFC 5246 | 149 | // RFC 5246 |
144 | // |6.2.1. Fragmentation | 150 | // |6.2.1. Fragmentation |
@@ -170,6 +176,12 @@ typedef struct tls_state { | |||
170 | uint8_t inbuf[18*1024]; | 176 | uint8_t inbuf[18*1024]; |
171 | } tls_state_t; | 177 | } tls_state_t; |
172 | 178 | ||
179 | void tls_get_random(void *buf, unsigned len) | ||
180 | { | ||
181 | if (len != open_read_close("/dev/urandom", buf, len)) | ||
182 | xfunc_die(); | ||
183 | } | ||
184 | |||
173 | static | 185 | static |
174 | tls_state_t *new_tls_state(void) | 186 | tls_state_t *new_tls_state(void) |
175 | { | 187 | { |
@@ -286,7 +298,7 @@ static void send_client_hello(tls_state_t *tls) | |||
286 | hello.len24_lo = (sizeof(hello) - sizeof(hello.xhdr) - 4); | 298 | hello.len24_lo = (sizeof(hello) - sizeof(hello.xhdr) - 4); |
287 | hello.proto_maj = TLS_MAJ; | 299 | hello.proto_maj = TLS_MAJ; |
288 | hello.proto_min = TLS_MIN; | 300 | hello.proto_min = TLS_MIN; |
289 | open_read_close("/dev/urandom", hello.rand32, sizeof(hello.rand32)); | 301 | tls_get_random(hello.rand32, sizeof(hello.rand32)); |
290 | //hello.session_id_len = 0; | 302 | //hello.session_id_len = 0; |
291 | //hello.cipherid_len16_hi = 0; | 303 | //hello.cipherid_len16_hi = 0; |
292 | hello.cipherid_len16_lo = 2 * 1; | 304 | hello.cipherid_len16_lo = 2 * 1; |
@@ -407,7 +419,18 @@ static uint8_t *skip_der_item(uint8_t *der, uint8_t *end) | |||
407 | return new_der; | 419 | return new_der; |
408 | } | 420 | } |
409 | 421 | ||
410 | static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len) | 422 | static void der_binary_to_pstm(pstm_int *pstm_n, uint8_t *der, uint8_t *end) |
423 | { | ||
424 | uint8_t *bin_ptr; | ||
425 | unsigned len = get_der_len(&bin_ptr, der, end); | ||
426 | |||
427 | dbg("binary bytes:%u, first:0x%02x\n", len, bin_ptr[0]); | ||
428 | pstm_init_for_read_unsigned_bin(/*pool:*/ NULL, pstm_n, len); | ||
429 | pstm_read_unsigned_bin(pstm_n, bin_ptr, len); | ||
430 | //return bin + len; | ||
431 | } | ||
432 | |||
433 | static void find_key_in_der_cert(tls_state_t *tls, uint8_t *der, int len) | ||
411 | { | 434 | { |
412 | /* Certificate is a DER-encoded data structure. Each DER element has a length, | 435 | /* Certificate is a DER-encoded data structure. Each DER element has a length, |
413 | * which makes it easy to skip over large compound elements of any complexity | 436 | * which makes it easy to skip over large compound elements of any complexity |
@@ -504,19 +527,43 @@ static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len) | |||
504 | der = skip_der_item(der, end); /* validity */ | 527 | der = skip_der_item(der, end); /* validity */ |
505 | der = skip_der_item(der, end); /* subject */ | 528 | der = skip_der_item(der, end); /* subject */ |
506 | 529 | ||
507 | /* enter "subjectPublicKeyInfo" */ | 530 | /* enter subjectPublicKeyInfo */ |
508 | der = enter_der_item(der, &end); | 531 | der = enter_der_item(der, &end); |
509 | 532 | { /* check subjectPublicKeyInfo.algorithm */ | |
510 | /* skip "subjectPublicKeyInfo.algorithm" */ | 533 | static const uint8_t expected[] = { |
534 | 0x30,0x0d, // SEQ 13 bytes | ||
535 | 0x06,0x09, 0x2a,0x86,0x48,0x86,0xf7,0x0d,0x01,0x01,0x01, // OID RSA_KEY_ALG 42.134.72.134.247.13.1.1.1 | ||
536 | //0x05,0x00, // NULL | ||
537 | }; | ||
538 | if (memcmp(der, expected, sizeof(expected)) != 0) | ||
539 | bb_error_msg_and_die("not RSA key"); | ||
540 | } | ||
541 | /* skip subjectPublicKeyInfo.algorithm */ | ||
511 | der = skip_der_item(der, end); | 542 | der = skip_der_item(der, end); |
512 | /* enter "subjectPublicKeyInfo.publicKey" */ | 543 | /* enter subjectPublicKeyInfo.publicKey */ |
513 | // die_if_not_this_der_type(der, end, 0x03); /* must be BITSTRING */ | 544 | // die_if_not_this_der_type(der, end, 0x03); /* must be BITSTRING */ |
514 | der = enter_der_item(der, &end); | 545 | der = enter_der_item(der, &end); |
515 | 546 | ||
516 | /* return a copy */ | 547 | /* parse RSA key: */ |
517 | *key_len = end - der; | 548 | //based on getAsnRsaPubKey(), pkcs1ParsePrivBin() is also of note |
518 | dbg("copying key bytes:%u, first:0x%02x\n", *key_len, der[0]); | 549 | dbg("key bytes:%u, first:0x%02x\n", (int)(end - der), der[0]); |
519 | return xmemdup(der, *key_len); | 550 | if (end - der < 14) xfunc_die(); |
551 | /* example format: | ||
552 | * ignore bits: 00 | ||
553 | * SEQ 0x018a/394 bytes: 3082018a | ||
554 | * INTEGER 0x0181/385 bytes (modulus): 02820181 XX...XXX | ||
555 | * INTEGER 3 bytes (exponent): 0203 010001 | ||
556 | */ | ||
557 | if (*der != 0) /* "ignore bits", should be 0 */ | ||
558 | xfunc_die(); | ||
559 | der++; | ||
560 | der = enter_der_item(der, &end); /* enter SEQ */ | ||
561 | //memset(tls->server_rsa_pub_key, 0, sizeof(tls->server_rsa_pub_key)); | ||
562 | der_binary_to_pstm(&tls->server_rsa_pub_key.N, der, end); /* modulus */ | ||
563 | der = skip_der_item(der, end); | ||
564 | der_binary_to_pstm(&tls->server_rsa_pub_key.e, der, end); /* exponent */ | ||
565 | tls->server_rsa_pub_key.size = pstm_unsigned_bin_size(&tls->server_rsa_pub_key.N); | ||
566 | dbg("server_rsa_pub_key.size:%d\n", tls->server_rsa_pub_key.size); | ||
520 | } | 567 | } |
521 | 568 | ||
522 | static void get_server_cert_or_die(tls_state_t *tls) | 569 | static void get_server_cert_or_die(tls_state_t *tls) |
@@ -553,7 +600,107 @@ static void get_server_cert_or_die(tls_state_t *tls) | |||
553 | len = len1; | 600 | len = len1; |
554 | 601 | ||
555 | if (len) | 602 | if (len) |
556 | tls->pubkey = find_key_in_der_cert(&tls->pubkey_len, certbuf + 10, len); | 603 | find_key_in_der_cert(tls, certbuf + 10, len); |
604 | } | ||
605 | |||
606 | static void send_client_key_exchange(tls_state_t *tls) | ||
607 | { | ||
608 | #if 0 //matrixssl code snippets: | ||
609 | int32 csRsaEncryptPub(psPool_t *pool, psPubKey_t *key, | ||
610 | unsigned char *in, uint32 inlen, unsigned char *out, uint32 outlen, | ||
611 | void *data) | ||
612 | { | ||
613 | psAssert(key->type == PS_RSA); | ||
614 | return psRsaEncryptPub(pool, (psRsaKey_t*)key->key, in, inlen, out, outlen, | ||
615 | data); | ||
616 | } | ||
617 | ... | ||
618 | /* pkaAfter.user is buffer len */ | ||
619 | if ((rc = csRsaEncryptPub(pka->pool, &ssl->sec.cert->publicKey, | ||
620 | ssl->sec.premaster, ssl->sec.premasterSize, pka->outbuf, | ||
621 | pka->user, pka->data)) < 0) { | ||
622 | if (rc == PS_PENDING) { | ||
623 | /* For these ClientKeyExchange paths, we do want to come | ||
624 | back through nowDoCkePka for a double pass so each | ||
625 | case can manage its own pkaAfter and to make sure | ||
626 | psX509FreeCert and sslCreateKeys() are hit below. */ | ||
627 | return rc; | ||
628 | } | ||
629 | psTraceIntInfo("csRsaEncryptPub in CKE failed %d\n", rc); | ||
630 | return MATRIXSSL_ERROR; | ||
631 | } | ||
632 | /* RSA closed the pool on second pass */ | ||
633 | pka->pool = NULL; | ||
634 | clearPkaAfter(ssl); | ||
635 | ... | ||
636 | #ifdef USE_RSA_CIPHER_SUITE | ||
637 | /* | ||
638 | Standard RSA suite | ||
639 | */ | ||
640 | ssl->sec.premasterSize = SSL_HS_RSA_PREMASTER_SIZE; | ||
641 | ssl->sec.premaster = psMalloc(ssl->hsPool, | ||
642 | SSL_HS_RSA_PREMASTER_SIZE); | ||
643 | if (ssl->sec.premaster == NULL) { | ||
644 | return SSL_MEM_ERROR; | ||
645 | } | ||
646 | |||
647 | ssl->sec.premaster[0] = ssl->reqMajVer; | ||
648 | ssl->sec.premaster[1] = ssl->reqMinVer; | ||
649 | if (matrixCryptoGetPrngData(ssl->sec.premaster + 2, | ||
650 | SSL_HS_RSA_PREMASTER_SIZE - 2, ssl->userPtr) < 0) { | ||
651 | return MATRIXSSL_ERROR; | ||
652 | } | ||
653 | |||
654 | /* Shedule RSA encryption. Put tmp pool under control of After */ | ||
655 | pkaAfter->type = PKA_AFTER_RSA_ENCRYPT; | ||
656 | pkaAfter->outbuf = c; | ||
657 | pkaAfter->data = pkiData; | ||
658 | pkaAfter->pool = pkiPool; | ||
659 | pkaAfter->user = (uint32)(end - c); /* Available space */ | ||
660 | |||
661 | c += keyLen; | ||
662 | #endif | ||
663 | #endif // 0 | ||
664 | |||
665 | struct client_key_exchange { | ||
666 | struct record_hdr xhdr; | ||
667 | uint8_t type; | ||
668 | uint8_t len24_hi, len24_mid, len24_lo; | ||
669 | uint8_t keylen16_hi, keylen16_lo; /* exist for RSA, but not for some other key types */ | ||
670 | //had a bug when had no keylen: we: | ||
671 | //write(3, "\x16\x03\x03\x01\x84\x10\x00\x01\x80\xXX\xXX\xXX\xXX\xXX\xXX...", 393) = 393 | ||
672 | //openssl: | ||
673 | //write to 0xe9a090 [0xf9ac20] (395 bytes => 395 (0x18B)) | ||
674 | //0000 - 16 03 03 01 86 10 00 01 -82 01 80 xx xx xx xx xx | ||
675 | uint8_t key[384]; // size?? | ||
676 | }; | ||
677 | struct client_key_exchange record; | ||
678 | uint8_t premaster[SSL_HS_RSA_PREMASTER_SIZE]; | ||
679 | |||
680 | memset(&record, 0, sizeof(record)); | ||
681 | record.xhdr.type = RECORD_TYPE_HANDSHAKE; | ||
682 | record.xhdr.proto_maj = TLS_MAJ; | ||
683 | record.xhdr.proto_min = TLS_MIN; | ||
684 | record.xhdr.len16_hi = (sizeof(record) - sizeof(record.xhdr)) >> 8; | ||
685 | record.xhdr.len16_lo = (sizeof(record) - sizeof(record.xhdr)) & 0xff; | ||
686 | record.type = HANDSHAKE_CLIENT_KEY_EXCHANGE; | ||
687 | //record.len24_hi = 0; | ||
688 | record.len24_mid = (sizeof(record) - sizeof(record.xhdr) - 4) >> 8; | ||
689 | record.len24_lo = (sizeof(record) - sizeof(record.xhdr) - 4) & 0xff; | ||
690 | record.keylen16_hi = (sizeof(record) - sizeof(record.xhdr) - 6) >> 8; | ||
691 | record.keylen16_lo = (sizeof(record) - sizeof(record.xhdr) - 6) & 0xff; | ||
692 | |||
693 | tls_get_random(premaster, sizeof(premaster)); | ||
694 | premaster[0] = TLS_MAJ; | ||
695 | premaster[1] = TLS_MIN; | ||
696 | psRsaEncryptPub(/*pool:*/ NULL, | ||
697 | /* psRsaKey_t* */ &tls->server_rsa_pub_key, | ||
698 | premaster, /*inlen:*/ sizeof(premaster), | ||
699 | record.key, sizeof(record.key), | ||
700 | data_param_ignored | ||
701 | ); | ||
702 | |||
703 | xwrite(tls->fd, &record, sizeof(record)); | ||
557 | } | 704 | } |
558 | 705 | ||
559 | static void tls_handshake(tls_state_t *tls) | 706 | static void tls_handshake(tls_state_t *tls) |
@@ -614,6 +761,8 @@ static void tls_handshake(tls_state_t *tls) | |||
614 | // 459 bytes: | 761 | // 459 bytes: |
615 | // 0c 00|01|c7 03|00|17|41|04|87|94|2e|2f|68|d0|c9|f4|97|a8|2d|ef|ed|67|ea|c6|f3|b3|56|47|5d|27|b6|bd|ee|70|25|30|5e|b0|8e|f6|21|5a... | 762 | // 0c 00|01|c7 03|00|17|41|04|87|94|2e|2f|68|d0|c9|f4|97|a8|2d|ef|ed|67|ea|c6|f3|b3|56|47|5d|27|b6|bd|ee|70|25|30|5e|b0|8e|f6|21|5a... |
616 | //SvKey len=455^ | 763 | //SvKey len=455^ |
764 | // with TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA: 461 bytes: | ||
765 | // 0c 00|01|c9 03|00|17|41|04|cd|9b|b4|29|1f|f6|b0|c2|84|82|7f|29|6a|47|4e|ec|87|0b|c1|9c|69|e1|f8|c6|d0|53|e9|27|90|a5|c8|02|15|75... | ||
617 | dbg("got SERVER_KEY_EXCHANGE\n"); | 766 | dbg("got SERVER_KEY_EXCHANGE\n"); |
618 | len = xread_tls_block(tls); | 767 | len = xread_tls_block(tls); |
619 | break; | 768 | break; |
@@ -624,6 +773,8 @@ static void tls_handshake(tls_state_t *tls) | |||
624 | case HANDSHAKE_SERVER_HELLO_DONE: | 773 | case HANDSHAKE_SERVER_HELLO_DONE: |
625 | // 0e 000000 (len:0) | 774 | // 0e 000000 (len:0) |
626 | dbg("got SERVER_HELLO_DONE\n"); | 775 | dbg("got SERVER_HELLO_DONE\n"); |
776 | send_client_key_exchange(tls); | ||
777 | len = xread_tls_block(tls); | ||
627 | break; | 778 | break; |
628 | default: | 779 | default: |
629 | tls_error_die(tls); | 780 | tls_error_die(tls); |
diff --git a/networking/tls.h b/networking/tls.h new file mode 100644 index 000000000..20317ecc3 --- /dev/null +++ b/networking/tls.h | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "libbb.h" | ||
7 | |||
8 | /* config tweaks */ | ||
9 | #define HAVE_NATIVE_INT64 1 | ||
10 | #undef DISABLE_PSTM | ||
11 | #undef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
12 | #undef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
13 | //TODO: enable to use asm: | ||
14 | //#if defined(__GNUC__) && defined(__i386__) -> #define PSTM_32BIT and PSTM_X86 | ||
15 | //#if defined(__GNUC__) && defined(__x86_64__) -> #define PSTM_64BIT and PSTM_X86_64 | ||
16 | //ARM and MIPS also have these | ||
17 | |||
18 | |||
19 | #define PS_SUCCESS 0 | ||
20 | #define PS_FAILURE -1 | ||
21 | #define PS_ARG_FAIL -6 /* Failure due to bad function param */ | ||
22 | #define PS_PLATFORM_FAIL -7 /* Failure as a result of system call error */ | ||
23 | #define PS_MEM_FAIL -8 /* Failure to allocate requested memory */ | ||
24 | #define PS_LIMIT_FAIL -9 /* Failure on sanity/limit tests */ | ||
25 | |||
26 | #define PS_TRUE 1 | ||
27 | #define PS_FALSE 0 | ||
28 | |||
29 | #if BB_BIG_ENDIAN | ||
30 | # define ENDIAN_BIG 1 | ||
31 | # undef ENDIAN_LITTLE | ||
32 | //#???? ENDIAN_32BITWORD | ||
33 | // controls only STORE32L, which we don't use | ||
34 | #else | ||
35 | # define ENDIAN_LITTLE 1 | ||
36 | # undef ENDIAN_BIG | ||
37 | #endif | ||
38 | |||
39 | typedef uint64_t uint64; | ||
40 | typedef int64_t int64; | ||
41 | typedef uint32_t uint32; | ||
42 | typedef int32_t int32; | ||
43 | typedef uint16_t uint16; | ||
44 | typedef int16_t int16; | ||
45 | |||
46 | //FIXME | ||
47 | typedef char psPool_t; | ||
48 | |||
49 | //#ifdef PS_PUBKEY_OPTIMIZE_FOR_SMALLER_RAM | ||
50 | #define PS_EXPTMOD_WINSIZE 3 | ||
51 | //#ifdef PS_PUBKEY_OPTIMIZE_FOR_FASTER_SPEED | ||
52 | //#define PS_EXPTMOD_WINSIZE 5 | ||
53 | |||
54 | #define PUBKEY_TYPE 0x01 | ||
55 | #define PRIVKEY_TYPE 0x02 | ||
56 | |||
57 | void tls_get_random(void *buf, unsigned len); | ||
58 | |||
59 | #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS) | ||
60 | |||
61 | #define psFree(p, pool) free(p) | ||
62 | #define psTraceCrypto(msg) bb_error_msg_and_die(msg) | ||
63 | |||
64 | /* Secure zerofill */ | ||
65 | #define memset_s(A,B,C,D) memset((A),(C),(D)) | ||
66 | /* Constant time memory comparison */ | ||
67 | #define memcmpct(s1, s2, len) memcmp((s1), (s2), (len)) | ||
68 | #undef min | ||
69 | #define min(x, y) ((x) < (y) ? (x) : (y)) | ||
70 | |||
71 | |||
72 | #include "tls_pstm.h" | ||
73 | #include "tls_rsa.h" | ||
diff --git a/networking/tls_pstm.c b/networking/tls_pstm.c new file mode 100644 index 000000000..0d797f87f --- /dev/null +++ b/networking/tls_pstm.c | |||
@@ -0,0 +1,2254 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | /** | ||
9 | * @file pstm.c | ||
10 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
11 | * | ||
12 | * Multiprecision number implementation. | ||
13 | */ | ||
14 | /* | ||
15 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
16 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
17 | * All Rights Reserved | ||
18 | * | ||
19 | * The latest version of this code is available at http://www.matrixssl.org | ||
20 | * | ||
21 | * This software is open source; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This General Public License does NOT permit incorporating this software | ||
27 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
28 | * commercial license for this software may be purchased from INSIDE at | ||
29 | * http://www.insidesecure.com/eng/Company/Locations | ||
30 | * | ||
31 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
32 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
33 | * See the GNU General Public License for more details. | ||
34 | * | ||
35 | * You should have received a copy of the GNU General Public License | ||
36 | * along with this program; if not, write to the Free Software | ||
37 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
38 | * http://www.gnu.org/copyleft/gpl.html | ||
39 | */ | ||
40 | /******************************************************************************/ | ||
41 | |||
42 | ///bbox | ||
43 | //#include "../cryptoApi.h" | ||
44 | #ifndef DISABLE_PSTM | ||
45 | |||
46 | static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c); | ||
47 | |||
48 | /******************************************************************************/ | ||
49 | /* | ||
50 | init an pstm_int for a given size | ||
51 | */ | ||
52 | int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size) | ||
53 | { | ||
54 | // uint16 x; | ||
55 | |||
56 | /* | ||
57 | alloc mem | ||
58 | */ | ||
59 | a->dp = xzalloc(sizeof (pstm_digit) * size); | ||
60 | a->pool = pool; | ||
61 | a->used = 0; | ||
62 | a->alloc = (int16)size; | ||
63 | a->sign = PSTM_ZPOS; | ||
64 | /* | ||
65 | zero the digits | ||
66 | */ | ||
67 | ///bbox | ||
68 | // for (x = 0; x < size; x++) { | ||
69 | // a->dp[x] = 0; | ||
70 | // } | ||
71 | return PSTM_OKAY; | ||
72 | } | ||
73 | |||
74 | /******************************************************************************/ | ||
75 | /* | ||
76 | Init a new pstm_int. | ||
77 | */ | ||
78 | int32 pstm_init(psPool_t *pool, pstm_int * a) | ||
79 | { | ||
80 | // int32 i; | ||
81 | /* | ||
82 | allocate memory required and clear it | ||
83 | */ | ||
84 | a->dp = xzalloc(sizeof (pstm_digit) * PSTM_DEFAULT_INIT); | ||
85 | /* | ||
86 | set the digits to zero | ||
87 | */ | ||
88 | ///bbox | ||
89 | // for (i = 0; i < PSTM_DEFAULT_INIT; i++) { | ||
90 | // a->dp[i] = 0; | ||
91 | // } | ||
92 | /* | ||
93 | set the used to zero, allocated digits to the default precision and sign | ||
94 | to positive | ||
95 | */ | ||
96 | a->pool = pool; | ||
97 | a->used = 0; | ||
98 | a->alloc = PSTM_DEFAULT_INIT; | ||
99 | a->sign = PSTM_ZPOS; | ||
100 | |||
101 | return PSTM_OKAY; | ||
102 | } | ||
103 | |||
104 | /******************************************************************************/ | ||
105 | /* | ||
106 | Grow as required | ||
107 | */ | ||
108 | int32 pstm_grow(pstm_int * a, int16 size) | ||
109 | { | ||
110 | int16 i; | ||
111 | pstm_digit *tmp; | ||
112 | |||
113 | /* | ||
114 | If the alloc size is smaller alloc more ram. | ||
115 | */ | ||
116 | if (a->alloc < size) { | ||
117 | /* | ||
118 | Reallocate the array a->dp | ||
119 | |||
120 | We store the return in a temporary variable in case the operation | ||
121 | failed we don't want to overwrite the dp member of a. | ||
122 | */ | ||
123 | tmp = xrealloc(a->dp, sizeof (pstm_digit) * size); | ||
124 | /* | ||
125 | reallocation succeeded so set a->dp | ||
126 | */ | ||
127 | a->dp = tmp; | ||
128 | /* | ||
129 | zero excess digits | ||
130 | */ | ||
131 | i = a->alloc; | ||
132 | a->alloc = size; | ||
133 | for (; i < a->alloc; i++) { | ||
134 | a->dp[i] = 0; | ||
135 | } | ||
136 | } | ||
137 | return PSTM_OKAY; | ||
138 | } | ||
139 | |||
140 | /******************************************************************************/ | ||
141 | /* | ||
142 | copy, b = a (b must be pre-allocated) | ||
143 | */ | ||
144 | int32 pstm_copy(pstm_int * a, pstm_int * b) | ||
145 | { | ||
146 | int32 res, n; | ||
147 | |||
148 | /* | ||
149 | If dst == src do nothing | ||
150 | */ | ||
151 | if (a == b) { | ||
152 | return PSTM_OKAY; | ||
153 | } | ||
154 | /* | ||
155 | Grow dest | ||
156 | */ | ||
157 | if (b->alloc < a->used) { | ||
158 | if ((res = pstm_grow (b, a->used)) != PSTM_OKAY) { | ||
159 | return res; | ||
160 | } | ||
161 | } | ||
162 | /* | ||
163 | Zero b and copy the parameters over | ||
164 | */ | ||
165 | { | ||
166 | register pstm_digit *tmpa, *tmpb; | ||
167 | |||
168 | /* pointer aliases */ | ||
169 | /* source */ | ||
170 | tmpa = a->dp; | ||
171 | |||
172 | /* destination */ | ||
173 | tmpb = b->dp; | ||
174 | |||
175 | /* copy all the digits */ | ||
176 | for (n = 0; n < a->used; n++) { | ||
177 | *tmpb++ = *tmpa++; | ||
178 | } | ||
179 | |||
180 | /* clear high digits */ | ||
181 | for (; n < b->used; n++) { | ||
182 | *tmpb++ = 0; | ||
183 | } | ||
184 | } | ||
185 | /* | ||
186 | copy used count and sign | ||
187 | */ | ||
188 | b->used = a->used; | ||
189 | b->sign = a->sign; | ||
190 | return PSTM_OKAY; | ||
191 | } | ||
192 | |||
193 | /******************************************************************************/ | ||
194 | /* | ||
195 | Trim unused digits | ||
196 | |||
197 | This is used to ensure that leading zero digits are trimed and the | ||
198 | leading "used" digit will be non-zero. Typically very fast. Also fixes | ||
199 | the sign if there are no more leading digits | ||
200 | */ | ||
201 | void pstm_clamp(pstm_int * a) | ||
202 | { | ||
203 | /* decrease used while the most significant digit is zero. */ | ||
204 | while (a->used > 0 && a->dp[a->used - 1] == 0) { | ||
205 | --(a->used); | ||
206 | } | ||
207 | /* reset the sign flag if used == 0 */ | ||
208 | if (a->used == 0) { | ||
209 | a->sign = PSTM_ZPOS; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | /******************************************************************************/ | ||
214 | /* | ||
215 | clear one (frees). | ||
216 | */ | ||
217 | void pstm_clear(pstm_int * a) | ||
218 | { | ||
219 | int32 i; | ||
220 | /* | ||
221 | only do anything if a hasn't been freed previously | ||
222 | */ | ||
223 | if (a != NULL && a->dp != NULL) { | ||
224 | /* | ||
225 | first zero the digits | ||
226 | */ | ||
227 | for (i = 0; i < a->used; i++) { | ||
228 | a->dp[i] = 0; | ||
229 | } | ||
230 | |||
231 | psFree (a->dp, a->pool); | ||
232 | /* | ||
233 | reset members to make debugging easier | ||
234 | */ | ||
235 | a->dp = NULL; | ||
236 | a->alloc = a->used = 0; | ||
237 | a->sign = PSTM_ZPOS; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /******************************************************************************/ | ||
242 | /* | ||
243 | clear many (frees). | ||
244 | */ | ||
245 | void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2, | ||
246 | pstm_int *mp3, pstm_int *mp4, pstm_int *mp5, | ||
247 | pstm_int *mp6, pstm_int *mp7) | ||
248 | { | ||
249 | int32 n; /* Number of ok inits */ | ||
250 | |||
251 | pstm_int *tempArray[9]; | ||
252 | |||
253 | tempArray[0] = mp0; | ||
254 | tempArray[1] = mp1; | ||
255 | tempArray[2] = mp2; | ||
256 | tempArray[3] = mp3; | ||
257 | tempArray[4] = mp4; | ||
258 | tempArray[5] = mp5; | ||
259 | tempArray[6] = mp6; | ||
260 | tempArray[7] = mp7; | ||
261 | tempArray[8] = NULL; | ||
262 | |||
263 | for (n = 0; tempArray[n] != NULL; n++) { | ||
264 | if ((tempArray[n] != NULL) && (tempArray[n]->dp != NULL)) { | ||
265 | pstm_clear(tempArray[n]); | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /******************************************************************************/ | ||
271 | /* | ||
272 | Set to zero. | ||
273 | */ | ||
274 | void pstm_zero(pstm_int * a) | ||
275 | { | ||
276 | int32 n; | ||
277 | pstm_digit *tmp; | ||
278 | |||
279 | a->sign = PSTM_ZPOS; | ||
280 | a->used = 0; | ||
281 | |||
282 | tmp = a->dp; | ||
283 | for (n = 0; n < a->alloc; n++) { | ||
284 | *tmp++ = 0; | ||
285 | } | ||
286 | } | ||
287 | |||
288 | |||
289 | /******************************************************************************/ | ||
290 | /* | ||
291 | Compare maginitude of two ints (unsigned). | ||
292 | */ | ||
293 | int32 pstm_cmp_mag(pstm_int * a, pstm_int * b) | ||
294 | { | ||
295 | int16 n; | ||
296 | pstm_digit *tmpa, *tmpb; | ||
297 | |||
298 | /* | ||
299 | compare based on # of non-zero digits | ||
300 | */ | ||
301 | if (a->used > b->used) { | ||
302 | return PSTM_GT; | ||
303 | } | ||
304 | |||
305 | if (a->used < b->used) { | ||
306 | return PSTM_LT; | ||
307 | } | ||
308 | |||
309 | /* alias for a */ | ||
310 | tmpa = a->dp + (a->used - 1); | ||
311 | |||
312 | /* alias for b */ | ||
313 | tmpb = b->dp + (a->used - 1); | ||
314 | |||
315 | /* | ||
316 | compare based on digits | ||
317 | */ | ||
318 | for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { | ||
319 | if (*tmpa > *tmpb) { | ||
320 | return PSTM_GT; | ||
321 | } | ||
322 | if (*tmpa < *tmpb) { | ||
323 | return PSTM_LT; | ||
324 | } | ||
325 | } | ||
326 | return PSTM_EQ; | ||
327 | } | ||
328 | |||
329 | /******************************************************************************/ | ||
330 | /* | ||
331 | Compare two ints (signed) | ||
332 | */ | ||
333 | int32 pstm_cmp(pstm_int * a, pstm_int * b) | ||
334 | { | ||
335 | /* | ||
336 | compare based on sign | ||
337 | */ | ||
338 | if (a->sign != b->sign) { | ||
339 | if (a->sign == PSTM_NEG) { | ||
340 | return PSTM_LT; | ||
341 | } else { | ||
342 | return PSTM_GT; | ||
343 | } | ||
344 | } | ||
345 | /* | ||
346 | compare digits | ||
347 | */ | ||
348 | if (a->sign == PSTM_NEG) { | ||
349 | /* if negative compare opposite direction */ | ||
350 | return pstm_cmp_mag(b, a); | ||
351 | } else { | ||
352 | return pstm_cmp_mag(a, b); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | /******************************************************************************/ | ||
357 | /* | ||
358 | pstm_ints can be initialized more precisely when they will populated | ||
359 | using pstm_read_unsigned_bin since the length of the byte stream is known | ||
360 | */ | ||
361 | int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a, uint32 len) | ||
362 | { | ||
363 | int32 size; | ||
364 | /* | ||
365 | Need to set this based on how many words max it will take to store the bin. | ||
366 | The magic + 2: | ||
367 | 1 to round up for the remainder of this integer math | ||
368 | 1 for the initial carry of '1' bits that fall between DIGIT_BIT and 8 | ||
369 | */ | ||
370 | size = (((len / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT)) | ||
371 | / DIGIT_BIT) + 2; | ||
372 | return pstm_init_size(pool, a, size); | ||
373 | } | ||
374 | |||
375 | |||
376 | /******************************************************************************/ | ||
377 | /* | ||
378 | Reads a unsigned char array into pstm_int format. User should have | ||
379 | called pstm_init_for_read_unsigned_bin first. There is some grow logic | ||
380 | here if the default pstm_init was used but we don't really want to hit it. | ||
381 | */ | ||
382 | int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c) | ||
383 | { | ||
384 | /* zero the int */ | ||
385 | pstm_zero (a); | ||
386 | |||
387 | /* | ||
388 | If we know the endianness of this architecture, and we're using | ||
389 | 32-bit pstm_digits, we can optimize this | ||
390 | */ | ||
391 | #if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(PSTM_64BIT) | ||
392 | /* But not for both simultaneously */ | ||
393 | #if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG) | ||
394 | #error Both ENDIAN_LITTLE and ENDIAN_BIG defined. | ||
395 | #endif | ||
396 | { | ||
397 | unsigned char *pd; | ||
398 | if ((unsigned)c > (PSTM_MAX_SIZE * sizeof(pstm_digit))) { | ||
399 | uint32 excess = c - (PSTM_MAX_SIZE * sizeof(pstm_digit)); | ||
400 | c -= excess; | ||
401 | b += excess; | ||
402 | } | ||
403 | a->used = (int16)((c + sizeof(pstm_digit) - 1)/sizeof(pstm_digit)); | ||
404 | if (a->alloc < a->used) { | ||
405 | if (pstm_grow(a, a->used) != PSTM_OKAY) { | ||
406 | return PSTM_MEM; | ||
407 | } | ||
408 | } | ||
409 | pd = (unsigned char *)a->dp; | ||
410 | /* read the bytes in */ | ||
411 | #ifdef ENDIAN_BIG | ||
412 | { | ||
413 | /* Use Duff's device to unroll the loop. */ | ||
414 | int32 idx = (c - 1) & ~3; | ||
415 | switch (c % 4) { | ||
416 | case 0: do { pd[idx+0] = *b++; | ||
417 | case 3: pd[idx+1] = *b++; | ||
418 | case 2: pd[idx+2] = *b++; | ||
419 | case 1: pd[idx+3] = *b++; | ||
420 | idx -= 4; | ||
421 | } while ((c -= 4) > 0); | ||
422 | } | ||
423 | } | ||
424 | #else | ||
425 | for (c -= 1; c >= 0; c -= 1) { | ||
426 | pd[c] = *b++; | ||
427 | } | ||
428 | #endif | ||
429 | } | ||
430 | #else | ||
431 | /* Big enough based on the len? */ | ||
432 | a->used = (((c / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT)) | ||
433 | / DIGIT_BIT) + 2; | ||
434 | |||
435 | if (a->alloc < a->used) { | ||
436 | if (pstm_grow(a, a->used) != PSTM_OKAY) { | ||
437 | return PSTM_MEM; | ||
438 | } | ||
439 | } | ||
440 | /* read the bytes in */ | ||
441 | for (; c > 0; c--) { | ||
442 | if (pstm_mul_2d (a, 8, a) != PSTM_OKAY) { | ||
443 | return PS_MEM_FAIL; | ||
444 | } | ||
445 | a->dp[0] |= *b++; | ||
446 | a->used += 1; | ||
447 | } | ||
448 | #endif | ||
449 | |||
450 | pstm_clamp (a); | ||
451 | return PS_SUCCESS; | ||
452 | } | ||
453 | |||
454 | /******************************************************************************/ | ||
455 | /* | ||
456 | */ | ||
457 | int16 pstm_count_bits (pstm_int * a) | ||
458 | { | ||
459 | int16 r; | ||
460 | pstm_digit q; | ||
461 | |||
462 | if (a->used == 0) { | ||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | /* get number of digits and add that */ | ||
467 | r = (a->used - 1) * DIGIT_BIT; | ||
468 | |||
469 | /* take the last digit and count the bits in it */ | ||
470 | q = a->dp[a->used - 1]; | ||
471 | while (q > ((pstm_digit) 0)) { | ||
472 | ++r; | ||
473 | q >>= ((pstm_digit) 1); | ||
474 | } | ||
475 | return r; | ||
476 | } | ||
477 | |||
478 | /******************************************************************************/ | ||
479 | int32 pstm_unsigned_bin_size(pstm_int *a) | ||
480 | { | ||
481 | int32 size = pstm_count_bits (a); | ||
482 | return (size / 8 + ((size & 7) != 0 ? 1 : 0)); | ||
483 | } | ||
484 | |||
485 | /******************************************************************************/ | ||
486 | void pstm_set(pstm_int *a, pstm_digit b) | ||
487 | { | ||
488 | pstm_zero(a); | ||
489 | a->dp[0] = b; | ||
490 | a->used = a->dp[0] ? 1 : 0; | ||
491 | } | ||
492 | |||
493 | /******************************************************************************/ | ||
494 | /* | ||
495 | Right shift | ||
496 | */ | ||
497 | void pstm_rshd(pstm_int *a, int16 x) | ||
498 | { | ||
499 | int16 y; | ||
500 | |||
501 | /* too many digits just zero and return */ | ||
502 | if (x >= a->used) { | ||
503 | pstm_zero(a); | ||
504 | return; | ||
505 | } | ||
506 | |||
507 | /* shift */ | ||
508 | for (y = 0; y < a->used - x; y++) { | ||
509 | a->dp[y] = a->dp[y+x]; | ||
510 | } | ||
511 | |||
512 | /* zero rest */ | ||
513 | for (; y < a->used; y++) { | ||
514 | a->dp[y] = 0; | ||
515 | } | ||
516 | |||
517 | /* decrement count */ | ||
518 | a->used -= x; | ||
519 | pstm_clamp(a); | ||
520 | } | ||
521 | |||
522 | /******************************************************************************/ | ||
523 | /* | ||
524 | Shift left a certain amount of digits. | ||
525 | */ | ||
526 | int32 pstm_lshd(pstm_int * a, int16 b) | ||
527 | { | ||
528 | int16 x; | ||
529 | int32 res; | ||
530 | |||
531 | /* | ||
532 | If its less than zero return. | ||
533 | */ | ||
534 | if (b <= 0) { | ||
535 | return PSTM_OKAY; | ||
536 | } | ||
537 | /* | ||
538 | Grow to fit the new digits. | ||
539 | */ | ||
540 | if (a->alloc < a->used + b) { | ||
541 | if ((res = pstm_grow (a, a->used + b)) != PSTM_OKAY) { | ||
542 | return res; | ||
543 | } | ||
544 | } | ||
545 | |||
546 | { | ||
547 | register pstm_digit *top, *bottom; | ||
548 | /* | ||
549 | Increment the used by the shift amount then copy upwards. | ||
550 | */ | ||
551 | a->used += b; | ||
552 | |||
553 | /* top */ | ||
554 | top = a->dp + a->used - 1; | ||
555 | |||
556 | /* base */ | ||
557 | bottom = a->dp + a->used - 1 - b; | ||
558 | /* | ||
559 | This is implemented using a sliding window except the window goes the | ||
560 | other way around. Copying from the bottom to the top. | ||
561 | */ | ||
562 | for (x = a->used - 1; x >= b; x--) { | ||
563 | *top-- = *bottom--; | ||
564 | } | ||
565 | |||
566 | /* zero the lower digits */ | ||
567 | top = a->dp; | ||
568 | for (x = 0; x < b; x++) { | ||
569 | *top++ = 0; | ||
570 | } | ||
571 | } | ||
572 | return PSTM_OKAY; | ||
573 | } | ||
574 | |||
575 | /******************************************************************************/ | ||
576 | /* | ||
577 | computes a = 2**b | ||
578 | */ | ||
579 | int32 pstm_2expt(pstm_int *a, int16 b) | ||
580 | { | ||
581 | int16 z; | ||
582 | |||
583 | /* zero a as per default */ | ||
584 | pstm_zero (a); | ||
585 | |||
586 | if (b < 0) { | ||
587 | return PSTM_OKAY; | ||
588 | } | ||
589 | |||
590 | z = b / DIGIT_BIT; | ||
591 | if (z >= PSTM_MAX_SIZE) { | ||
592 | return PS_LIMIT_FAIL; | ||
593 | } | ||
594 | |||
595 | /* set the used count of where the bit will go */ | ||
596 | a->used = z + 1; | ||
597 | |||
598 | if (a->used > a->alloc) { | ||
599 | if (pstm_grow(a, a->used) != PSTM_OKAY) { | ||
600 | return PS_MEM_FAIL; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | /* put the single bit in its place */ | ||
605 | a->dp[z] = ((pstm_digit)1) << (b % DIGIT_BIT); | ||
606 | return PSTM_OKAY; | ||
607 | } | ||
608 | |||
609 | /******************************************************************************/ | ||
610 | /* | ||
611 | |||
612 | */ | ||
613 | int32 pstm_mul_2(pstm_int * a, pstm_int * b) | ||
614 | { | ||
615 | int32 res; | ||
616 | int16 x, oldused; | ||
617 | |||
618 | /* | ||
619 | grow to accomodate result | ||
620 | */ | ||
621 | if (b->alloc < a->used + 1) { | ||
622 | if ((res = pstm_grow (b, a->used + 1)) != PSTM_OKAY) { | ||
623 | return res; | ||
624 | } | ||
625 | } | ||
626 | oldused = b->used; | ||
627 | b->used = a->used; | ||
628 | |||
629 | { | ||
630 | register pstm_digit r, rr, *tmpa, *tmpb; | ||
631 | |||
632 | /* alias for source */ | ||
633 | tmpa = a->dp; | ||
634 | |||
635 | /* alias for dest */ | ||
636 | tmpb = b->dp; | ||
637 | |||
638 | /* carry */ | ||
639 | r = 0; | ||
640 | for (x = 0; x < a->used; x++) { | ||
641 | /* | ||
642 | get what will be the *next* carry bit from the | ||
643 | MSB of the current digit | ||
644 | */ | ||
645 | rr = *tmpa >> ((pstm_digit)(DIGIT_BIT - 1)); | ||
646 | /* | ||
647 | now shift up this digit, add in the carry [from the previous] | ||
648 | */ | ||
649 | *tmpb++ = ((*tmpa++ << ((pstm_digit)1)) | r); | ||
650 | /* | ||
651 | copy the carry that would be from the source | ||
652 | digit into the next iteration | ||
653 | */ | ||
654 | r = rr; | ||
655 | } | ||
656 | |||
657 | /* new leading digit? */ | ||
658 | if (r != 0 && b->used != (PSTM_MAX_SIZE-1)) { | ||
659 | /* add a MSB which is always 1 at this point */ | ||
660 | *tmpb = 1; | ||
661 | ++(b->used); | ||
662 | } | ||
663 | /* | ||
664 | now zero any excess digits on the destination that we didn't write to | ||
665 | */ | ||
666 | tmpb = b->dp + b->used; | ||
667 | for (x = b->used; x < oldused; x++) { | ||
668 | *tmpb++ = 0; | ||
669 | } | ||
670 | } | ||
671 | b->sign = a->sign; | ||
672 | return PSTM_OKAY; | ||
673 | } | ||
674 | |||
675 | /******************************************************************************/ | ||
676 | /* | ||
677 | unsigned subtraction ||a|| >= ||b|| ALWAYS! | ||
678 | */ | ||
679 | int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c) | ||
680 | { | ||
681 | int16 oldbused, oldused; | ||
682 | int32 x; | ||
683 | pstm_word t; | ||
684 | |||
685 | if (b->used > a->used) { | ||
686 | return PS_LIMIT_FAIL; | ||
687 | } | ||
688 | if (c->alloc < a->used) { | ||
689 | if ((x = pstm_grow (c, a->used)) != PSTM_OKAY) { | ||
690 | return x; | ||
691 | } | ||
692 | } | ||
693 | oldused = c->used; | ||
694 | oldbused = b->used; | ||
695 | c->used = a->used; | ||
696 | t = 0; | ||
697 | |||
698 | for (x = 0; x < oldbused; x++) { | ||
699 | t = ((pstm_word)a->dp[x]) - (((pstm_word)b->dp[x]) + t); | ||
700 | c->dp[x] = (pstm_digit)t; | ||
701 | t = (t >> DIGIT_BIT)&1; | ||
702 | } | ||
703 | for (; x < a->used; x++) { | ||
704 | t = ((pstm_word)a->dp[x]) - t; | ||
705 | c->dp[x] = (pstm_digit)t; | ||
706 | t = (t >> DIGIT_BIT); | ||
707 | } | ||
708 | for (; x < oldused; x++) { | ||
709 | c->dp[x] = 0; | ||
710 | } | ||
711 | pstm_clamp(c); | ||
712 | return PSTM_OKAY; | ||
713 | } | ||
714 | |||
715 | /******************************************************************************/ | ||
716 | /* | ||
717 | unsigned addition | ||
718 | */ | ||
719 | static int32 s_pstm_add(pstm_int *a, pstm_int *b, pstm_int *c) | ||
720 | { | ||
721 | int16 x, y, oldused; | ||
722 | register pstm_word t, adp, bdp; | ||
723 | |||
724 | y = a->used; | ||
725 | if (b->used > y) { | ||
726 | y = b->used; | ||
727 | } | ||
728 | oldused = c->used; | ||
729 | c->used = y; | ||
730 | |||
731 | if (c->used > c->alloc) { | ||
732 | if (pstm_grow(c, c->used) != PSTM_OKAY) { | ||
733 | return PS_MEM_FAIL; | ||
734 | } | ||
735 | } | ||
736 | |||
737 | t = 0; | ||
738 | for (x = 0; x < y; x++) { | ||
739 | if (a->used < x) { | ||
740 | adp = 0; | ||
741 | } else { | ||
742 | adp = (pstm_word)a->dp[x]; | ||
743 | } | ||
744 | if (b->used < x) { | ||
745 | bdp = 0; | ||
746 | } else { | ||
747 | bdp = (pstm_word)b->dp[x]; | ||
748 | } | ||
749 | t += (adp) + (bdp); | ||
750 | c->dp[x] = (pstm_digit)t; | ||
751 | t >>= DIGIT_BIT; | ||
752 | } | ||
753 | if (t != 0 && x < PSTM_MAX_SIZE) { | ||
754 | if (c->used == c->alloc) { | ||
755 | if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) { | ||
756 | return PS_MEM_FAIL; | ||
757 | } | ||
758 | } | ||
759 | c->dp[c->used++] = (pstm_digit)t; | ||
760 | ++x; | ||
761 | } | ||
762 | |||
763 | c->used = x; | ||
764 | for (; x < oldused; x++) { | ||
765 | c->dp[x] = 0; | ||
766 | } | ||
767 | pstm_clamp(c); | ||
768 | return PSTM_OKAY; | ||
769 | } | ||
770 | |||
771 | |||
772 | /******************************************************************************/ | ||
773 | /* | ||
774 | |||
775 | */ | ||
776 | int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c) | ||
777 | { | ||
778 | int32 res; | ||
779 | int16 sa, sb; | ||
780 | |||
781 | sa = a->sign; | ||
782 | sb = b->sign; | ||
783 | |||
784 | if (sa != sb) { | ||
785 | /* | ||
786 | subtract a negative from a positive, OR a positive from a negative. | ||
787 | For both, ADD their magnitudes, and use the sign of the first number. | ||
788 | */ | ||
789 | c->sign = sa; | ||
790 | if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) { | ||
791 | return res; | ||
792 | } | ||
793 | } else { | ||
794 | /* | ||
795 | subtract a positive from a positive, OR a negative from a negative. | ||
796 | First, take the difference between their magnitudes, then... | ||
797 | */ | ||
798 | if (pstm_cmp_mag (a, b) != PSTM_LT) { | ||
799 | /* Copy the sign from the first */ | ||
800 | c->sign = sa; | ||
801 | /* The first has a larger or equal magnitude */ | ||
802 | if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) { | ||
803 | return res; | ||
804 | } | ||
805 | } else { | ||
806 | /* The result has the _opposite_ sign from the first number. */ | ||
807 | c->sign = (sa == PSTM_ZPOS) ? PSTM_NEG : PSTM_ZPOS; | ||
808 | /* The second has a larger magnitude */ | ||
809 | if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) { | ||
810 | return res; | ||
811 | } | ||
812 | } | ||
813 | } | ||
814 | return PS_SUCCESS; | ||
815 | } | ||
816 | |||
817 | /******************************************************************************/ | ||
818 | /* | ||
819 | c = a - b | ||
820 | */ | ||
821 | int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c) | ||
822 | { | ||
823 | pstm_int tmp; | ||
824 | int32 res; | ||
825 | |||
826 | if (pstm_init_size(pool, &tmp, sizeof(pstm_digit)) != PSTM_OKAY) { | ||
827 | return PS_MEM_FAIL; | ||
828 | } | ||
829 | pstm_set(&tmp, b); | ||
830 | res = pstm_sub(a, &tmp, c); | ||
831 | pstm_clear(&tmp); | ||
832 | return res; | ||
833 | } | ||
834 | |||
835 | /******************************************************************************/ | ||
836 | /* | ||
837 | setups the montgomery reduction | ||
838 | */ | ||
839 | int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho) | ||
840 | { | ||
841 | pstm_digit x, b; | ||
842 | |||
843 | /* | ||
844 | fast inversion mod 2**k | ||
845 | Based on the fact that | ||
846 | XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) | ||
847 | => 2*X*A - X*X*A*A = 1 | ||
848 | => 2*(1) - (1) = 1 | ||
849 | */ | ||
850 | b = a->dp[0]; | ||
851 | |||
852 | if ((b & 1) == 0) { | ||
853 | psTraceCrypto("pstm_montogomery_setup failure\n"); | ||
854 | return PS_ARG_FAIL; | ||
855 | } | ||
856 | |||
857 | x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ | ||
858 | x *= 2 - b * x; /* here x*a==1 mod 2**8 */ | ||
859 | x *= 2 - b * x; /* here x*a==1 mod 2**16 */ | ||
860 | x *= 2 - b * x; /* here x*a==1 mod 2**32 */ | ||
861 | #ifdef PSTM_64BIT | ||
862 | x *= 2 - b * x; /* here x*a==1 mod 2**64 */ | ||
863 | #endif | ||
864 | /* rho = -1/m mod b */ | ||
865 | *rho = (pstm_digit)(((pstm_word) 1 << ((pstm_word) DIGIT_BIT)) - | ||
866 | ((pstm_word)x)); | ||
867 | return PSTM_OKAY; | ||
868 | } | ||
869 | |||
870 | /******************************************************************************/ | ||
871 | /* | ||
872 | * computes a = B**n mod b without division or multiplication useful for | ||
873 | * normalizing numbers in a Montgomery system. | ||
874 | */ | ||
875 | int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b) | ||
876 | { | ||
877 | int32 x; | ||
878 | int16 bits; | ||
879 | |||
880 | /* how many bits of last digit does b use */ | ||
881 | bits = pstm_count_bits (b) % DIGIT_BIT; | ||
882 | if (!bits) bits = DIGIT_BIT; | ||
883 | |||
884 | /* compute A = B^(n-1) * 2^(bits-1) */ | ||
885 | if (b->used > 1) { | ||
886 | if ((x = pstm_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != | ||
887 | PSTM_OKAY) { | ||
888 | return x; | ||
889 | } | ||
890 | } else { | ||
891 | pstm_set(a, 1); | ||
892 | bits = 1; | ||
893 | } | ||
894 | |||
895 | /* now compute C = A * B mod b */ | ||
896 | for (x = bits - 1; x < (int32)DIGIT_BIT; x++) { | ||
897 | if (pstm_mul_2 (a, a) != PSTM_OKAY) { | ||
898 | return PS_MEM_FAIL; | ||
899 | } | ||
900 | if (pstm_cmp_mag (a, b) != PSTM_LT) { | ||
901 | if (s_pstm_sub (a, b, a) != PSTM_OKAY) { | ||
902 | return PS_MEM_FAIL; | ||
903 | } | ||
904 | } | ||
905 | } | ||
906 | return PSTM_OKAY; | ||
907 | } | ||
908 | |||
909 | /******************************************************************************/ | ||
910 | /* | ||
911 | c = a * 2**d | ||
912 | */ | ||
913 | static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c) | ||
914 | { | ||
915 | pstm_digit carry, carrytmp, shift; | ||
916 | int16 x; | ||
917 | |||
918 | /* copy it */ | ||
919 | if (pstm_copy(a, c) != PSTM_OKAY) { | ||
920 | return PS_MEM_FAIL; | ||
921 | } | ||
922 | |||
923 | /* handle whole digits */ | ||
924 | if (b >= DIGIT_BIT) { | ||
925 | if (pstm_lshd(c, b/DIGIT_BIT) != PSTM_OKAY) { | ||
926 | return PS_MEM_FAIL; | ||
927 | } | ||
928 | } | ||
929 | b %= DIGIT_BIT; | ||
930 | |||
931 | /* shift the digits */ | ||
932 | if (b != 0) { | ||
933 | carry = 0; | ||
934 | shift = DIGIT_BIT - b; | ||
935 | for (x = 0; x < c->used; x++) { | ||
936 | carrytmp = c->dp[x] >> shift; | ||
937 | c->dp[x] = (c->dp[x] << b) + carry; | ||
938 | carry = carrytmp; | ||
939 | } | ||
940 | /* store last carry if room */ | ||
941 | if (carry && x < PSTM_MAX_SIZE) { | ||
942 | if (c->used == c->alloc) { | ||
943 | if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) { | ||
944 | return PS_MEM_FAIL; | ||
945 | } | ||
946 | } | ||
947 | c->dp[c->used++] = carry; | ||
948 | } | ||
949 | } | ||
950 | pstm_clamp(c); | ||
951 | return PSTM_OKAY; | ||
952 | } | ||
953 | |||
954 | /******************************************************************************/ | ||
955 | /* | ||
956 | c = a mod 2**d | ||
957 | */ | ||
958 | static int32 pstm_mod_2d(pstm_int *a, int16 b, pstm_int *c) | ||
959 | { | ||
960 | int16 x; | ||
961 | |||
962 | /* zero if count less than or equal to zero */ | ||
963 | if (b <= 0) { | ||
964 | pstm_zero(c); | ||
965 | return PSTM_OKAY; | ||
966 | } | ||
967 | |||
968 | /* get copy of input */ | ||
969 | if (pstm_copy(a, c) != PSTM_OKAY) { | ||
970 | return PS_MEM_FAIL; | ||
971 | } | ||
972 | |||
973 | /* if 2**d is larger than we just return */ | ||
974 | if (b >= (DIGIT_BIT * a->used)) { | ||
975 | return PSTM_OKAY; | ||
976 | } | ||
977 | |||
978 | /* zero digits above the last digit of the modulus */ | ||
979 | for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) | ||
980 | { | ||
981 | c->dp[x] = 0; | ||
982 | } | ||
983 | /* clear the digit that is not completely outside/inside the modulus */ | ||
984 | c->dp[b / DIGIT_BIT] &= ~((pstm_digit)0) >> (DIGIT_BIT - b); | ||
985 | pstm_clamp (c); | ||
986 | return PSTM_OKAY; | ||
987 | } | ||
988 | |||
989 | |||
990 | /******************************************************************************/ | ||
991 | /* | ||
992 | c = a * b | ||
993 | */ | ||
994 | int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c) | ||
995 | { | ||
996 | pstm_word w; | ||
997 | int32 res; | ||
998 | int16 x, oldused; | ||
999 | |||
1000 | if (c->alloc < a->used + 1) { | ||
1001 | if ((res = pstm_grow (c, a->used + 1)) != PSTM_OKAY) { | ||
1002 | return res; | ||
1003 | } | ||
1004 | } | ||
1005 | oldused = c->used; | ||
1006 | c->used = a->used; | ||
1007 | c->sign = a->sign; | ||
1008 | w = 0; | ||
1009 | for (x = 0; x < a->used; x++) { | ||
1010 | w = ((pstm_word)a->dp[x]) * ((pstm_word)b) + w; | ||
1011 | c->dp[x] = (pstm_digit)w; | ||
1012 | w = w >> DIGIT_BIT; | ||
1013 | } | ||
1014 | if (w != 0 && (a->used != PSTM_MAX_SIZE)) { | ||
1015 | c->dp[c->used++] = (pstm_digit)w; | ||
1016 | ++x; | ||
1017 | } | ||
1018 | for (; x < oldused; x++) { | ||
1019 | c->dp[x] = 0; | ||
1020 | } | ||
1021 | pstm_clamp(c); | ||
1022 | return PSTM_OKAY; | ||
1023 | } | ||
1024 | |||
1025 | /******************************************************************************/ | ||
1026 | /* | ||
1027 | c = a / 2**b | ||
1028 | */ | ||
1029 | int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c, | ||
1030 | pstm_int *d) | ||
1031 | { | ||
1032 | pstm_digit D, r, rr; | ||
1033 | int32 res; | ||
1034 | int16 x; | ||
1035 | pstm_int t; | ||
1036 | |||
1037 | /* if the shift count is <= 0 then we do no work */ | ||
1038 | if (b <= 0) { | ||
1039 | if (pstm_copy (a, c) != PSTM_OKAY) { | ||
1040 | return PS_MEM_FAIL; | ||
1041 | } | ||
1042 | if (d != NULL) { | ||
1043 | pstm_zero (d); | ||
1044 | } | ||
1045 | return PSTM_OKAY; | ||
1046 | } | ||
1047 | |||
1048 | /* get the remainder */ | ||
1049 | if (d != NULL) { | ||
1050 | if (pstm_init(pool, &t) != PSTM_OKAY) { | ||
1051 | return PS_MEM_FAIL; | ||
1052 | } | ||
1053 | if (pstm_mod_2d (a, b, &t) != PSTM_OKAY) { | ||
1054 | res = PS_MEM_FAIL; | ||
1055 | goto LBL_DONE; | ||
1056 | } | ||
1057 | } | ||
1058 | |||
1059 | /* copy */ | ||
1060 | if (pstm_copy(a, c) != PSTM_OKAY) { | ||
1061 | res = PS_MEM_FAIL; | ||
1062 | goto LBL_DONE; | ||
1063 | } | ||
1064 | |||
1065 | /* shift by as many digits in the bit count */ | ||
1066 | if (b >= (int32)DIGIT_BIT) { | ||
1067 | pstm_rshd (c, b / DIGIT_BIT); | ||
1068 | } | ||
1069 | |||
1070 | /* shift any bit count < DIGIT_BIT */ | ||
1071 | D = (pstm_digit) (b % DIGIT_BIT); | ||
1072 | if (D != 0) { | ||
1073 | register pstm_digit *tmpc, mask, shift; | ||
1074 | |||
1075 | /* mask */ | ||
1076 | mask = (((pstm_digit)1) << D) - 1; | ||
1077 | |||
1078 | /* shift for lsb */ | ||
1079 | shift = DIGIT_BIT - D; | ||
1080 | |||
1081 | /* alias */ | ||
1082 | tmpc = c->dp + (c->used - 1); | ||
1083 | |||
1084 | /* carry */ | ||
1085 | r = 0; | ||
1086 | for (x = c->used - 1; x >= 0; x--) { | ||
1087 | /* get the lower bits of this word in a temp */ | ||
1088 | rr = *tmpc & mask; | ||
1089 | |||
1090 | /* shift the current word and mix in the carry bits from previous */ | ||
1091 | *tmpc = (*tmpc >> D) | (r << shift); | ||
1092 | --tmpc; | ||
1093 | |||
1094 | /* set the carry to the carry bits of the current word above */ | ||
1095 | r = rr; | ||
1096 | } | ||
1097 | } | ||
1098 | pstm_clamp (c); | ||
1099 | |||
1100 | res = PSTM_OKAY; | ||
1101 | LBL_DONE: | ||
1102 | if (d != NULL) { | ||
1103 | if (pstm_copy(&t, d) != PSTM_OKAY) { | ||
1104 | res = PS_MEM_FAIL; | ||
1105 | } | ||
1106 | pstm_clear(&t); | ||
1107 | } | ||
1108 | return res; | ||
1109 | } | ||
1110 | |||
1111 | /******************************************************************************/ | ||
1112 | /* | ||
1113 | b = a/2 | ||
1114 | */ | ||
1115 | int32 pstm_div_2(pstm_int * a, pstm_int * b) | ||
1116 | { | ||
1117 | int16 x, oldused; | ||
1118 | |||
1119 | if (b->alloc < a->used) { | ||
1120 | if (pstm_grow(b, a->used) != PSTM_OKAY) { | ||
1121 | return PS_MEM_FAIL; | ||
1122 | } | ||
1123 | } | ||
1124 | oldused = b->used; | ||
1125 | b->used = a->used; | ||
1126 | { | ||
1127 | register pstm_digit r, rr, *tmpa, *tmpb; | ||
1128 | |||
1129 | /* source alias */ | ||
1130 | tmpa = a->dp + b->used - 1; | ||
1131 | |||
1132 | /* dest alias */ | ||
1133 | tmpb = b->dp + b->used - 1; | ||
1134 | |||
1135 | /* carry */ | ||
1136 | r = 0; | ||
1137 | for (x = b->used - 1; x >= 0; x--) { | ||
1138 | /* get the carry for the next iteration */ | ||
1139 | rr = *tmpa & 1; | ||
1140 | |||
1141 | /* shift the current digit, add in carry and store */ | ||
1142 | *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); | ||
1143 | |||
1144 | /* forward carry to next iteration */ | ||
1145 | r = rr; | ||
1146 | } | ||
1147 | |||
1148 | /* zero excess digits */ | ||
1149 | tmpb = b->dp + b->used; | ||
1150 | for (x = b->used; x < oldused; x++) { | ||
1151 | *tmpb++ = 0; | ||
1152 | } | ||
1153 | } | ||
1154 | b->sign = a->sign; | ||
1155 | pstm_clamp (b); | ||
1156 | return PSTM_OKAY; | ||
1157 | } | ||
1158 | |||
1159 | /******************************************************************************/ | ||
1160 | /* | ||
1161 | Creates "a" then copies b into it | ||
1162 | */ | ||
1163 | int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b, int16 toSqr) | ||
1164 | { | ||
1165 | int16 x; | ||
1166 | int32 res; | ||
1167 | |||
1168 | if (a == b) { | ||
1169 | return PSTM_OKAY; | ||
1170 | } | ||
1171 | x = b->alloc; | ||
1172 | |||
1173 | if (toSqr) { | ||
1174 | /* | ||
1175 | Smart-size: Increasing size of a if b->used is roughly half | ||
1176 | of b->alloc because usage has shown that a lot of these copies | ||
1177 | go on to be squared and need these extra digits | ||
1178 | */ | ||
1179 | if ((b->used * 2) + 2 >= x) { | ||
1180 | x = (b->used * 2) + 3; | ||
1181 | } | ||
1182 | } | ||
1183 | if ((res = pstm_init_size(pool, a, x)) != PSTM_OKAY) { | ||
1184 | return res; | ||
1185 | } | ||
1186 | return pstm_copy(b, a); | ||
1187 | } | ||
1188 | |||
1189 | /******************************************************************************/ | ||
1190 | /* | ||
1191 | With some compilers, we have seen issues linking with the builtin | ||
1192 | 64 bit division routine. The issues with either manifest in a failure | ||
1193 | to find 'udivdi3' at link time, or a runtime invalid instruction fault | ||
1194 | during an RSA operation. | ||
1195 | The routine below divides a 64 bit unsigned int by a 32 bit unsigned int | ||
1196 | explicitly, rather than using the division operation | ||
1197 | The 64 bit result is placed in the 'numerator' parameter | ||
1198 | The 32 bit mod (remainder) of the division is the return parameter | ||
1199 | Based on implementations by: | ||
1200 | Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> | ||
1201 | Copyright (C) 1999 Hewlett-Packard Co | ||
1202 | Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> | ||
1203 | */ | ||
1204 | #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT) | ||
1205 | static uint32 psDiv64(uint64 *numerator, uint32 denominator) | ||
1206 | { | ||
1207 | uint64 rem = *numerator; | ||
1208 | uint64 b = denominator; | ||
1209 | uint64 res = 0; | ||
1210 | uint64 d = 1; | ||
1211 | uint32 high = rem >> 32; | ||
1212 | |||
1213 | if (high >= denominator) { | ||
1214 | high /= denominator; | ||
1215 | res = (uint64) high << 32; | ||
1216 | rem -= (uint64) (high * denominator) << 32; | ||
1217 | } | ||
1218 | while ((int64)b > 0 && b < rem) { | ||
1219 | b = b+b; | ||
1220 | d = d+d; | ||
1221 | } | ||
1222 | do { | ||
1223 | if (rem >= b) { | ||
1224 | rem -= b; | ||
1225 | res += d; | ||
1226 | } | ||
1227 | b >>= 1; | ||
1228 | d >>= 1; | ||
1229 | } while (d); | ||
1230 | *numerator = res; | ||
1231 | return rem; | ||
1232 | } | ||
1233 | #endif /* USE_MATRIX_DIV64 */ | ||
1234 | |||
1235 | #if defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT) | ||
1236 | typedef unsigned long uint128 __attribute__ ((mode(TI))); | ||
1237 | static uint64 psDiv128(uint128 *numerator, uint64 denominator) | ||
1238 | { | ||
1239 | uint128 rem = *numerator; | ||
1240 | uint128 b = denominator; | ||
1241 | uint128 res = 0; | ||
1242 | uint128 d = 1; | ||
1243 | uint64 high = rem >> 64; | ||
1244 | |||
1245 | if (high >= denominator) { | ||
1246 | high /= denominator; | ||
1247 | res = (uint128) high << 64; | ||
1248 | rem -= (uint128) (high * denominator) << 64; | ||
1249 | } | ||
1250 | while ((uint128)b > 0 && b < rem) { | ||
1251 | b = b+b; | ||
1252 | d = d+d; | ||
1253 | } | ||
1254 | do { | ||
1255 | if (rem >= b) { | ||
1256 | rem -= b; | ||
1257 | res += d; | ||
1258 | } | ||
1259 | b >>= 1; | ||
1260 | d >>= 1; | ||
1261 | } while (d); | ||
1262 | *numerator = res; | ||
1263 | return rem; | ||
1264 | } | ||
1265 | #endif /* USE_MATRIX_DIV128 */ | ||
1266 | |||
1267 | /******************************************************************************/ | ||
1268 | /* | ||
1269 | a/b => cb + d == a | ||
1270 | */ | ||
1271 | int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c, | ||
1272 | pstm_int *d) | ||
1273 | { | ||
1274 | pstm_int q, x, y, t1, t2; | ||
1275 | int32 res; | ||
1276 | int16 n, t, i, norm, neg; | ||
1277 | |||
1278 | /* is divisor zero ? */ | ||
1279 | if (pstm_iszero (b) == 1) { | ||
1280 | return PS_LIMIT_FAIL; | ||
1281 | } | ||
1282 | |||
1283 | /* if a < b then q=0, r = a */ | ||
1284 | if (pstm_cmp_mag (a, b) == PSTM_LT) { | ||
1285 | if (d != NULL) { | ||
1286 | if (pstm_copy(a, d) != PSTM_OKAY) { | ||
1287 | return PS_MEM_FAIL; | ||
1288 | } | ||
1289 | } | ||
1290 | if (c != NULL) { | ||
1291 | pstm_zero (c); | ||
1292 | } | ||
1293 | return PSTM_OKAY; | ||
1294 | } | ||
1295 | /* | ||
1296 | Smart-size inits | ||
1297 | */ | ||
1298 | if ((res = pstm_init_size(pool, &t1, a->alloc)) != PSTM_OKAY) { | ||
1299 | return res; | ||
1300 | } | ||
1301 | if ((res = pstm_init_size(pool, &t2, 3)) != PSTM_OKAY) { | ||
1302 | goto LBL_T1; | ||
1303 | } | ||
1304 | if ((res = pstm_init_copy(pool, &x, a, 0)) != PSTM_OKAY) { | ||
1305 | goto LBL_T2; | ||
1306 | } | ||
1307 | /* | ||
1308 | Used to be an init_copy on b but pstm_grow was always hit with triple size | ||
1309 | */ | ||
1310 | if ((res = pstm_init_size(pool, &y, b->used * 3)) != PSTM_OKAY) { | ||
1311 | goto LBL_X; | ||
1312 | } | ||
1313 | if ((res = pstm_copy(b, &y)) != PSTM_OKAY) { | ||
1314 | goto LBL_Y; | ||
1315 | } | ||
1316 | |||
1317 | /* fix the sign */ | ||
1318 | neg = (a->sign == b->sign) ? PSTM_ZPOS : PSTM_NEG; | ||
1319 | x.sign = y.sign = PSTM_ZPOS; | ||
1320 | |||
1321 | /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ | ||
1322 | norm = pstm_count_bits(&y) % DIGIT_BIT; | ||
1323 | if (norm < (int32)(DIGIT_BIT-1)) { | ||
1324 | norm = (DIGIT_BIT-1) - norm; | ||
1325 | if ((res = pstm_mul_2d(&x, norm, &x)) != PSTM_OKAY) { | ||
1326 | goto LBL_Y; | ||
1327 | } | ||
1328 | if ((res = pstm_mul_2d(&y, norm, &y)) != PSTM_OKAY) { | ||
1329 | goto LBL_Y; | ||
1330 | } | ||
1331 | } else { | ||
1332 | norm = 0; | ||
1333 | } | ||
1334 | |||
1335 | /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ | ||
1336 | n = x.used - 1; | ||
1337 | t = y.used - 1; | ||
1338 | |||
1339 | if ((res = pstm_init_size(pool, &q, n - t + 1)) != PSTM_OKAY) { | ||
1340 | goto LBL_Y; | ||
1341 | } | ||
1342 | q.used = n - t + 1; | ||
1343 | |||
1344 | /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ | ||
1345 | if ((res = pstm_lshd(&y, n - t)) != PSTM_OKAY) { /* y = y*b**{n-t} */ | ||
1346 | goto LBL_Q; | ||
1347 | } | ||
1348 | |||
1349 | while (pstm_cmp (&x, &y) != PSTM_LT) { | ||
1350 | ++(q.dp[n - t]); | ||
1351 | if ((res = pstm_sub(&x, &y, &x)) != PSTM_OKAY) { | ||
1352 | goto LBL_Q; | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1356 | /* reset y by shifting it back down */ | ||
1357 | pstm_rshd (&y, n - t); | ||
1358 | |||
1359 | /* step 3. for i from n down to (t + 1) */ | ||
1360 | for (i = n; i >= (t + 1); i--) { | ||
1361 | if (i > x.used) { | ||
1362 | continue; | ||
1363 | } | ||
1364 | |||
1365 | /* step 3.1 if xi == yt then set q{i-t-1} to b-1, | ||
1366 | * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ | ||
1367 | if (x.dp[i] == y.dp[t]) { | ||
1368 | q.dp[i - t - 1] = (pstm_digit)((((pstm_word)1) << DIGIT_BIT) - 1); | ||
1369 | } else { | ||
1370 | pstm_word tmp; | ||
1371 | tmp = ((pstm_word) x.dp[i]) << ((pstm_word) DIGIT_BIT); | ||
1372 | tmp |= ((pstm_word) x.dp[i - 1]); | ||
1373 | #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT) | ||
1374 | psDiv64(&tmp, y.dp[t]); | ||
1375 | #elif defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT) | ||
1376 | psDiv128(&tmp, y.dp[t]); | ||
1377 | #else | ||
1378 | tmp /= ((pstm_word) y.dp[t]); | ||
1379 | #endif /* USE_MATRIX_DIV64 */ | ||
1380 | q.dp[i - t - 1] = (pstm_digit) (tmp); | ||
1381 | } | ||
1382 | |||
1383 | /* while (q{i-t-1} * (yt * b + y{t-1})) > | ||
1384 | xi * b**2 + xi-1 * b + xi-2 | ||
1385 | |||
1386 | do q{i-t-1} -= 1; | ||
1387 | */ | ||
1388 | q.dp[i - t - 1] = (q.dp[i - t - 1] + 1); | ||
1389 | do { | ||
1390 | q.dp[i - t - 1] = (q.dp[i - t - 1] - 1); | ||
1391 | |||
1392 | /* find left hand */ | ||
1393 | pstm_zero (&t1); | ||
1394 | t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; | ||
1395 | t1.dp[1] = y.dp[t]; | ||
1396 | t1.used = 2; | ||
1397 | if ((res = pstm_mul_d (&t1, q.dp[i - t - 1], &t1)) != PSTM_OKAY) { | ||
1398 | goto LBL_Q; | ||
1399 | } | ||
1400 | |||
1401 | /* find right hand */ | ||
1402 | t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; | ||
1403 | t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; | ||
1404 | t2.dp[2] = x.dp[i]; | ||
1405 | t2.used = 3; | ||
1406 | } while (pstm_cmp_mag(&t1, &t2) == PSTM_GT); | ||
1407 | |||
1408 | /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ | ||
1409 | if ((res = pstm_mul_d(&y, q.dp[i - t - 1], &t1)) != PSTM_OKAY) { | ||
1410 | goto LBL_Q; | ||
1411 | } | ||
1412 | |||
1413 | if ((res = pstm_lshd(&t1, i - t - 1)) != PSTM_OKAY) { | ||
1414 | goto LBL_Q; | ||
1415 | } | ||
1416 | |||
1417 | if ((res = pstm_sub(&x, &t1, &x)) != PSTM_OKAY) { | ||
1418 | goto LBL_Q; | ||
1419 | } | ||
1420 | |||
1421 | /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ | ||
1422 | if (x.sign == PSTM_NEG) { | ||
1423 | if ((res = pstm_copy(&y, &t1)) != PSTM_OKAY) { | ||
1424 | goto LBL_Q; | ||
1425 | } | ||
1426 | if ((res = pstm_lshd (&t1, i - t - 1)) != PSTM_OKAY) { | ||
1427 | goto LBL_Q; | ||
1428 | } | ||
1429 | if ((res = pstm_add (&x, &t1, &x)) != PSTM_OKAY) { | ||
1430 | goto LBL_Q; | ||
1431 | } | ||
1432 | q.dp[i - t - 1] = q.dp[i - t - 1] - 1; | ||
1433 | } | ||
1434 | } | ||
1435 | /* | ||
1436 | now q is the quotient and x is the remainder (which we have to normalize) | ||
1437 | */ | ||
1438 | /* get sign before writing to c */ | ||
1439 | x.sign = x.used == 0 ? PSTM_ZPOS : a->sign; | ||
1440 | |||
1441 | if (c != NULL) { | ||
1442 | pstm_clamp (&q); | ||
1443 | if (pstm_copy (&q, c) != PSTM_OKAY) { | ||
1444 | res = PS_MEM_FAIL; | ||
1445 | goto LBL_Q; | ||
1446 | } | ||
1447 | c->sign = neg; | ||
1448 | } | ||
1449 | |||
1450 | if (d != NULL) { | ||
1451 | if ((res = pstm_div_2d (pool, &x, norm, &x, NULL)) != PSTM_OKAY) { | ||
1452 | goto LBL_Q; | ||
1453 | } | ||
1454 | /* | ||
1455 | the following is a kludge, essentially we were seeing the right | ||
1456 | remainder but with excess digits that should have been zero | ||
1457 | */ | ||
1458 | for (i = b->used; i < x.used; i++) { | ||
1459 | x.dp[i] = 0; | ||
1460 | } | ||
1461 | pstm_clamp(&x); | ||
1462 | if (pstm_copy (&x, d) != PSTM_OKAY) { | ||
1463 | res = PS_MEM_FAIL; | ||
1464 | goto LBL_Q; | ||
1465 | } | ||
1466 | } | ||
1467 | |||
1468 | res = PSTM_OKAY; | ||
1469 | |||
1470 | LBL_Q:pstm_clear (&q); | ||
1471 | LBL_Y:pstm_clear (&y); | ||
1472 | LBL_X:pstm_clear (&x); | ||
1473 | LBL_T2:pstm_clear (&t2); | ||
1474 | LBL_T1:pstm_clear (&t1); | ||
1475 | |||
1476 | return res; | ||
1477 | } | ||
1478 | |||
1479 | /******************************************************************************/ | ||
1480 | /* | ||
1481 | Swap the elements of two integers, for cases where you can't simply swap | ||
1482 | the pstm_int pointers around | ||
1483 | */ | ||
1484 | void pstm_exch(pstm_int * a, pstm_int * b) | ||
1485 | { | ||
1486 | pstm_int t; | ||
1487 | |||
1488 | t = *a; | ||
1489 | *a = *b; | ||
1490 | *b = t; | ||
1491 | } | ||
1492 | |||
1493 | /******************************************************************************/ | ||
1494 | /* | ||
1495 | c = a mod b, 0 <= c < b | ||
1496 | */ | ||
1497 | int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c) | ||
1498 | { | ||
1499 | pstm_int t; | ||
1500 | int32 err; | ||
1501 | /* | ||
1502 | Smart-size | ||
1503 | */ | ||
1504 | if ((err = pstm_init_size(pool, &t, b->alloc)) != PSTM_OKAY) { | ||
1505 | return err; | ||
1506 | } | ||
1507 | if ((err = pstm_div(pool, a, b, NULL, &t)) != PSTM_OKAY) { | ||
1508 | pstm_clear (&t); | ||
1509 | return err; | ||
1510 | } | ||
1511 | if (t.sign != b->sign) { | ||
1512 | err = pstm_add(&t, b, c); | ||
1513 | } else { | ||
1514 | pstm_exch (&t, c); | ||
1515 | } | ||
1516 | pstm_clear (&t); | ||
1517 | return err; | ||
1518 | } | ||
1519 | |||
1520 | /******************************************************************************/ | ||
1521 | /* | ||
1522 | d = a * b (mod c) | ||
1523 | */ | ||
1524 | int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c, | ||
1525 | pstm_int *d) | ||
1526 | { | ||
1527 | int32 res; | ||
1528 | int16 size; | ||
1529 | pstm_int tmp; | ||
1530 | |||
1531 | /* | ||
1532 | Smart-size pstm_inits. d is an output that is influenced by this local 't' | ||
1533 | so don't shrink 'd' if it wants to becuase this will lead to an pstm_grow | ||
1534 | in RSA operations | ||
1535 | */ | ||
1536 | size = a->used + b->used + 1; | ||
1537 | if ((a == d) && (size < a->alloc)) { | ||
1538 | size = a->alloc; | ||
1539 | } | ||
1540 | if ((res = pstm_init_size(pool, &tmp, size)) != PSTM_OKAY) { | ||
1541 | return res; | ||
1542 | } | ||
1543 | if ((res = pstm_mul_comba(pool, a, b, &tmp, NULL, 0)) != PSTM_OKAY) { | ||
1544 | pstm_clear(&tmp); | ||
1545 | return res; | ||
1546 | } | ||
1547 | res = pstm_mod(pool, &tmp, c, d); | ||
1548 | pstm_clear(&tmp); | ||
1549 | return res; | ||
1550 | } | ||
1551 | |||
1552 | /******************************************************************************/ | ||
1553 | /* | ||
1554 | * y = g**x (mod b) | ||
1555 | * Some restrictions... x must be positive and < b | ||
1556 | */ | ||
1557 | int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P, | ||
1558 | pstm_int *Y) | ||
1559 | { | ||
1560 | pstm_int M[32], res; /* Keep this winsize based: (1 << max_winsize) */ | ||
1561 | pstm_digit buf, mp; | ||
1562 | pstm_digit *paD; | ||
1563 | int32 err, bitbuf; | ||
1564 | int16 bitcpy, bitcnt, mode, digidx, x, y, winsize; | ||
1565 | uint32 paDlen; | ||
1566 | |||
1567 | /* set window size from what user set as optimization */ | ||
1568 | x = pstm_count_bits(X); | ||
1569 | if (x < 50) { | ||
1570 | winsize = 2; | ||
1571 | } else { | ||
1572 | winsize = PS_EXPTMOD_WINSIZE; | ||
1573 | } | ||
1574 | |||
1575 | /* now setup montgomery */ | ||
1576 | if ((err = pstm_montgomery_setup (P, &mp)) != PSTM_OKAY) { | ||
1577 | return err; | ||
1578 | } | ||
1579 | |||
1580 | /* setup result */ | ||
1581 | if ((err = pstm_init_size(pool, &res, (P->used * 2) + 1)) != PSTM_OKAY) { | ||
1582 | return err; | ||
1583 | } | ||
1584 | /* | ||
1585 | create M table | ||
1586 | The M table contains powers of the input base, e.g. M[x] = G^x mod P | ||
1587 | The first half of the table is not computed though except for M[0] and M[1] | ||
1588 | */ | ||
1589 | /* now we need R mod m */ | ||
1590 | if ((err = pstm_montgomery_calc_normalization (&res, P)) != PSTM_OKAY) { | ||
1591 | goto LBL_RES; | ||
1592 | } | ||
1593 | /* | ||
1594 | init M array | ||
1595 | init first cell | ||
1596 | */ | ||
1597 | if ((err = pstm_init_size(pool, &M[1], res.used)) != PSTM_OKAY) { | ||
1598 | goto LBL_RES; | ||
1599 | } | ||
1600 | |||
1601 | /* now set M[1] to G * R mod m */ | ||
1602 | if (pstm_cmp_mag(P, G) != PSTM_GT) { | ||
1603 | /* G > P so we reduce it first */ | ||
1604 | if ((err = pstm_mod(pool, G, P, &M[1])) != PSTM_OKAY) { | ||
1605 | goto LBL_M; | ||
1606 | } | ||
1607 | } else { | ||
1608 | if ((err = pstm_copy(G, &M[1])) != PSTM_OKAY) { | ||
1609 | goto LBL_M; | ||
1610 | } | ||
1611 | } | ||
1612 | if ((err = pstm_mulmod (pool, &M[1], &res, P, &M[1])) != PSTM_OKAY) { | ||
1613 | goto LBL_M; | ||
1614 | } | ||
1615 | /* | ||
1616 | Pre-allocated digit. Used for mul, sqr, AND reduce | ||
1617 | */ | ||
1618 | paDlen = ((M[1].used + 3) * 2) * sizeof(pstm_digit); | ||
1619 | paD = xzalloc(paDlen); | ||
1620 | /* | ||
1621 | compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times | ||
1622 | */ | ||
1623 | if (pstm_init_copy(pool, &M[1 << (winsize - 1)], &M[1], 1) != PSTM_OKAY) { | ||
1624 | err = PS_MEM_FAIL; | ||
1625 | goto LBL_PAD; | ||
1626 | } | ||
1627 | for (x = 0; x < (winsize - 1); x++) { | ||
1628 | if ((err = pstm_sqr_comba (pool, &M[1 << (winsize - 1)], | ||
1629 | &M[1 << (winsize - 1)], paD, paDlen)) != PSTM_OKAY) { | ||
1630 | goto LBL_PAD; | ||
1631 | } | ||
1632 | if ((err = pstm_montgomery_reduce(pool, &M[1 << (winsize - 1)], P, mp, | ||
1633 | paD, paDlen)) != PSTM_OKAY) { | ||
1634 | goto LBL_PAD; | ||
1635 | } | ||
1636 | } | ||
1637 | /* | ||
1638 | now init the second half of the array | ||
1639 | */ | ||
1640 | for (x = (1<<(winsize-1)) + 1; x < (1 << winsize); x++) { | ||
1641 | if ((err = pstm_init_size(pool, &M[x], M[1<<(winsize-1)].alloc + 1)) | ||
1642 | != PSTM_OKAY) { | ||
1643 | for (y = 1<<(winsize-1); y < x; y++) { | ||
1644 | pstm_clear(&M[y]); | ||
1645 | } | ||
1646 | goto LBL_PAD; | ||
1647 | } | ||
1648 | } | ||
1649 | |||
1650 | /* create upper table */ | ||
1651 | for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { | ||
1652 | if ((err = pstm_mul_comba(pool, &M[x - 1], &M[1], &M[x], paD, paDlen)) | ||
1653 | != PSTM_OKAY) { | ||
1654 | goto LBL_MARRAY; | ||
1655 | } | ||
1656 | if ((err = pstm_montgomery_reduce(pool, &M[x], P, mp, paD, paDlen)) != | ||
1657 | PSTM_OKAY) { | ||
1658 | goto LBL_MARRAY; | ||
1659 | } | ||
1660 | } | ||
1661 | |||
1662 | /* set initial mode and bit cnt */ | ||
1663 | mode = 0; | ||
1664 | bitcnt = 1; | ||
1665 | buf = 0; | ||
1666 | digidx = X->used - 1; | ||
1667 | bitcpy = 0; | ||
1668 | bitbuf = 0; | ||
1669 | |||
1670 | for (;;) { | ||
1671 | /* grab next digit as required */ | ||
1672 | if (--bitcnt == 0) { | ||
1673 | /* if digidx == -1 we are out of digits so break */ | ||
1674 | if (digidx == -1) { | ||
1675 | break; | ||
1676 | } | ||
1677 | /* read next digit and reset bitcnt */ | ||
1678 | buf = X->dp[digidx--]; | ||
1679 | bitcnt = (int32)DIGIT_BIT; | ||
1680 | } | ||
1681 | |||
1682 | /* grab the next msb from the exponent */ | ||
1683 | y = (pstm_digit)(buf >> (DIGIT_BIT - 1)) & 1; | ||
1684 | buf <<= (pstm_digit)1; | ||
1685 | /* | ||
1686 | If the bit is zero and mode == 0 then we ignore it. | ||
1687 | These represent the leading zero bits before the first 1 bit | ||
1688 | in the exponent. Technically this opt is not required but it | ||
1689 | does lower the # of trivial squaring/reductions used | ||
1690 | */ | ||
1691 | if (mode == 0 && y == 0) { | ||
1692 | continue; | ||
1693 | } | ||
1694 | |||
1695 | /* if the bit is zero and mode == 1 then we square */ | ||
1696 | if (mode == 1 && y == 0) { | ||
1697 | if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) != | ||
1698 | PSTM_OKAY) { | ||
1699 | goto LBL_MARRAY; | ||
1700 | } | ||
1701 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) | ||
1702 | != PSTM_OKAY) { | ||
1703 | goto LBL_MARRAY; | ||
1704 | } | ||
1705 | continue; | ||
1706 | } | ||
1707 | |||
1708 | /* else we add it to the window */ | ||
1709 | bitbuf |= (y << (winsize - ++bitcpy)); | ||
1710 | mode = 2; | ||
1711 | |||
1712 | if (bitcpy == winsize) { | ||
1713 | /* ok window is filled so square as required and mul square first */ | ||
1714 | for (x = 0; x < winsize; x++) { | ||
1715 | if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) != | ||
1716 | PSTM_OKAY) { | ||
1717 | goto LBL_MARRAY; | ||
1718 | } | ||
1719 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, | ||
1720 | paDlen)) != PSTM_OKAY) { | ||
1721 | goto LBL_MARRAY; | ||
1722 | } | ||
1723 | } | ||
1724 | |||
1725 | /* then multiply */ | ||
1726 | if ((err = pstm_mul_comba(pool, &res, &M[bitbuf], &res, paD, | ||
1727 | paDlen)) != PSTM_OKAY) { | ||
1728 | goto LBL_MARRAY; | ||
1729 | } | ||
1730 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) | ||
1731 | != PSTM_OKAY) { | ||
1732 | goto LBL_MARRAY; | ||
1733 | } | ||
1734 | |||
1735 | /* empty window and reset */ | ||
1736 | bitcpy = 0; | ||
1737 | bitbuf = 0; | ||
1738 | mode = 1; | ||
1739 | } | ||
1740 | } | ||
1741 | |||
1742 | /* if bits remain then square/multiply */ | ||
1743 | if (mode == 2 && bitcpy > 0) { | ||
1744 | /* square then multiply if the bit is set */ | ||
1745 | for (x = 0; x < bitcpy; x++) { | ||
1746 | if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) != | ||
1747 | PSTM_OKAY) { | ||
1748 | goto LBL_MARRAY; | ||
1749 | } | ||
1750 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) | ||
1751 | != PSTM_OKAY) { | ||
1752 | goto LBL_MARRAY; | ||
1753 | } | ||
1754 | |||
1755 | /* get next bit of the window */ | ||
1756 | bitbuf <<= 1; | ||
1757 | if ((bitbuf & (1 << winsize)) != 0) { | ||
1758 | /* then multiply */ | ||
1759 | if ((err = pstm_mul_comba(pool, &res, &M[1], &res, paD, paDlen)) | ||
1760 | != PSTM_OKAY) { | ||
1761 | goto LBL_MARRAY; | ||
1762 | } | ||
1763 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, | ||
1764 | paDlen)) != PSTM_OKAY) { | ||
1765 | goto LBL_MARRAY; | ||
1766 | } | ||
1767 | } | ||
1768 | } | ||
1769 | } | ||
1770 | /* | ||
1771 | Fix up result if Montgomery reduction is used recall that any value in a | ||
1772 | Montgomery system is actually multiplied by R mod n. So we have to reduce | ||
1773 | one more time to cancel out the factor of R. | ||
1774 | */ | ||
1775 | if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) != | ||
1776 | PSTM_OKAY) { | ||
1777 | goto LBL_MARRAY; | ||
1778 | } | ||
1779 | /* swap res with Y */ | ||
1780 | if ((err = pstm_copy (&res, Y)) != PSTM_OKAY) { | ||
1781 | goto LBL_MARRAY; | ||
1782 | } | ||
1783 | err = PSTM_OKAY; | ||
1784 | LBL_MARRAY: | ||
1785 | for (x = 1<<(winsize-1); x < (1 << winsize); x++) { | ||
1786 | pstm_clear(&M[x]); | ||
1787 | } | ||
1788 | LBL_PAD:psFree(paD, pool); | ||
1789 | LBL_M: pstm_clear(&M[1]); | ||
1790 | LBL_RES:pstm_clear(&res); | ||
1791 | return err; | ||
1792 | } | ||
1793 | |||
1794 | /******************************************************************************/ | ||
1795 | /* | ||
1796 | |||
1797 | */ | ||
1798 | int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c) | ||
1799 | { | ||
1800 | int32 res; | ||
1801 | int16 sa, sb; | ||
1802 | |||
1803 | /* get sign of both inputs */ | ||
1804 | sa = a->sign; | ||
1805 | sb = b->sign; | ||
1806 | |||
1807 | /* handle two cases, not four */ | ||
1808 | if (sa == sb) { | ||
1809 | /* both positive or both negative, add their mags, copy the sign */ | ||
1810 | c->sign = sa; | ||
1811 | if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) { | ||
1812 | return res; | ||
1813 | } | ||
1814 | } else { | ||
1815 | /* | ||
1816 | one positive, the other negative | ||
1817 | subtract the one with the greater magnitude from the one of the lesser | ||
1818 | magnitude. The result gets the sign of the one with the greater mag. | ||
1819 | */ | ||
1820 | if (pstm_cmp_mag (a, b) == PSTM_LT) { | ||
1821 | c->sign = sb; | ||
1822 | if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) { | ||
1823 | return res; | ||
1824 | } | ||
1825 | } else { | ||
1826 | c->sign = sa; | ||
1827 | if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) { | ||
1828 | return res; | ||
1829 | } | ||
1830 | } | ||
1831 | } | ||
1832 | return PS_SUCCESS; | ||
1833 | } | ||
1834 | |||
1835 | /******************************************************************************/ | ||
1836 | /* | ||
1837 | reverse an array, used for radix code | ||
1838 | */ | ||
1839 | static void pstm_reverse (unsigned char *s, int16 len) | ||
1840 | { | ||
1841 | int32 ix, iy; | ||
1842 | unsigned char t; | ||
1843 | |||
1844 | ix = 0; | ||
1845 | iy = len - 1; | ||
1846 | while (ix < iy) { | ||
1847 | t = s[ix]; | ||
1848 | s[ix] = s[iy]; | ||
1849 | s[iy] = t; | ||
1850 | ++ix; | ||
1851 | --iy; | ||
1852 | } | ||
1853 | } | ||
1854 | /******************************************************************************/ | ||
1855 | /* | ||
1856 | No reverse. Useful in some of the EIP-154 PKA stuff where special byte | ||
1857 | order seems to come into play more often | ||
1858 | */ | ||
1859 | int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a, unsigned char *b) | ||
1860 | { | ||
1861 | int32 res; | ||
1862 | int16 x; | ||
1863 | pstm_int t = { 0 }; | ||
1864 | |||
1865 | if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) { | ||
1866 | return res; | ||
1867 | } | ||
1868 | |||
1869 | x = 0; | ||
1870 | while (pstm_iszero (&t) == 0) { | ||
1871 | b[x++] = (unsigned char) (t.dp[0] & 255); | ||
1872 | if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) { | ||
1873 | pstm_clear(&t); | ||
1874 | return res; | ||
1875 | } | ||
1876 | } | ||
1877 | pstm_clear(&t); | ||
1878 | return PS_SUCCESS; | ||
1879 | } | ||
1880 | /******************************************************************************/ | ||
1881 | /* | ||
1882 | |||
1883 | */ | ||
1884 | int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a, unsigned char *b) | ||
1885 | { | ||
1886 | int32 res; | ||
1887 | int16 x; | ||
1888 | pstm_int t = { 0 }; | ||
1889 | |||
1890 | if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) { | ||
1891 | return res; | ||
1892 | } | ||
1893 | |||
1894 | x = 0; | ||
1895 | while (pstm_iszero (&t) == 0) { | ||
1896 | b[x++] = (unsigned char) (t.dp[0] & 255); | ||
1897 | if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) { | ||
1898 | pstm_clear(&t); | ||
1899 | return res; | ||
1900 | } | ||
1901 | } | ||
1902 | pstm_reverse (b, x); | ||
1903 | pstm_clear(&t); | ||
1904 | return PS_SUCCESS; | ||
1905 | } | ||
1906 | |||
1907 | /******************************************************************************/ | ||
1908 | /* | ||
1909 | compare against a single digit | ||
1910 | */ | ||
1911 | int32 pstm_cmp_d(pstm_int *a, pstm_digit b) | ||
1912 | { | ||
1913 | /* compare based on sign */ | ||
1914 | if ((b && a->used == 0) || a->sign == PSTM_NEG) { | ||
1915 | return PSTM_LT; | ||
1916 | } | ||
1917 | |||
1918 | /* compare based on magnitude */ | ||
1919 | if (a->used > 1) { | ||
1920 | return PSTM_GT; | ||
1921 | } | ||
1922 | |||
1923 | /* compare the only digit of a to b */ | ||
1924 | if (a->dp[0] > b) { | ||
1925 | return PSTM_GT; | ||
1926 | } else if (a->dp[0] < b) { | ||
1927 | return PSTM_LT; | ||
1928 | } else { | ||
1929 | return PSTM_EQ; | ||
1930 | } | ||
1931 | } | ||
1932 | |||
1933 | /* | ||
1934 | Need invmod for ECC and also private key loading for hardware crypto | ||
1935 | in cases where dQ > dP. The values must be switched and a new qP must be | ||
1936 | calculated using this function | ||
1937 | */ | ||
1938 | static int32 pstm_invmod_slow(psPool_t *pool, pstm_int * a, pstm_int * b, | ||
1939 | pstm_int * c) | ||
1940 | { | ||
1941 | pstm_int x, y, u, v, A, B, C, D; | ||
1942 | int32 res; | ||
1943 | |||
1944 | /* b cannot be negative */ | ||
1945 | if (b->sign == PSTM_NEG || pstm_iszero(b) == 1) { | ||
1946 | return PS_LIMIT_FAIL; | ||
1947 | } | ||
1948 | |||
1949 | /* init temps */ | ||
1950 | if (pstm_init_size(pool, &x, b->used) != PSTM_OKAY) { | ||
1951 | return PS_MEM_FAIL; | ||
1952 | } | ||
1953 | |||
1954 | /* x = a, y = b */ | ||
1955 | if ((res = pstm_mod(pool, a, b, &x)) != PSTM_OKAY) { | ||
1956 | goto LBL_X; | ||
1957 | } | ||
1958 | |||
1959 | if (pstm_init_copy(pool, &y, b, 0) != PSTM_OKAY) { | ||
1960 | goto LBL_X; | ||
1961 | } | ||
1962 | |||
1963 | /* 2. [modified] if x,y are both even then return an error! */ | ||
1964 | if (pstm_iseven (&x) == 1 && pstm_iseven (&y) == 1) { | ||
1965 | res = PS_FAILURE; | ||
1966 | goto LBL_Y; | ||
1967 | } | ||
1968 | |||
1969 | /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ | ||
1970 | if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) { | ||
1971 | goto LBL_Y; | ||
1972 | } | ||
1973 | if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) { | ||
1974 | goto LBL_U; | ||
1975 | } | ||
1976 | |||
1977 | if ((res = pstm_init_size(pool, &A, sizeof(pstm_digit))) != PSTM_OKAY) { | ||
1978 | goto LBL_V; | ||
1979 | } | ||
1980 | |||
1981 | if ((res = pstm_init_size(pool, &D, sizeof(pstm_digit))) != PSTM_OKAY) { | ||
1982 | goto LBL_A; | ||
1983 | } | ||
1984 | pstm_set (&A, 1); | ||
1985 | pstm_set (&D, 1); | ||
1986 | |||
1987 | if ((res = pstm_init(pool, &B)) != PSTM_OKAY) { | ||
1988 | goto LBL_D; | ||
1989 | } | ||
1990 | if ((res = pstm_init(pool, &C)) != PSTM_OKAY) { | ||
1991 | goto LBL_B; | ||
1992 | } | ||
1993 | |||
1994 | top: | ||
1995 | /* 4. while u is even do */ | ||
1996 | while (pstm_iseven (&u) == 1) { | ||
1997 | /* 4.1 u = u/2 */ | ||
1998 | if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) { | ||
1999 | goto LBL_C; | ||
2000 | } | ||
2001 | |||
2002 | /* 4.2 if A or B is odd then */ | ||
2003 | if (pstm_isodd (&A) == 1 || pstm_isodd (&B) == 1) { | ||
2004 | /* A = (A+y)/2, B = (B-x)/2 */ | ||
2005 | if ((res = pstm_add (&A, &y, &A)) != PSTM_OKAY) { | ||
2006 | goto LBL_C; | ||
2007 | } | ||
2008 | if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) { | ||
2009 | goto LBL_C; | ||
2010 | } | ||
2011 | } | ||
2012 | /* A = A/2, B = B/2 */ | ||
2013 | if ((res = pstm_div_2 (&A, &A)) != PSTM_OKAY) { | ||
2014 | goto LBL_C; | ||
2015 | } | ||
2016 | if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) { | ||
2017 | goto LBL_C; | ||
2018 | } | ||
2019 | } | ||
2020 | |||
2021 | /* 5. while v is even do */ | ||
2022 | while (pstm_iseven (&v) == 1) { | ||
2023 | /* 5.1 v = v/2 */ | ||
2024 | if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) { | ||
2025 | goto LBL_C; | ||
2026 | } | ||
2027 | |||
2028 | /* 5.2 if C or D is odd then */ | ||
2029 | if (pstm_isodd (&C) == 1 || pstm_isodd (&D) == 1) { | ||
2030 | /* C = (C+y)/2, D = (D-x)/2 */ | ||
2031 | if ((res = pstm_add (&C, &y, &C)) != PSTM_OKAY) { | ||
2032 | goto LBL_C; | ||
2033 | } | ||
2034 | if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) { | ||
2035 | goto LBL_C; | ||
2036 | } | ||
2037 | } | ||
2038 | /* C = C/2, D = D/2 */ | ||
2039 | if ((res = pstm_div_2 (&C, &C)) != PSTM_OKAY) { | ||
2040 | goto LBL_C; | ||
2041 | } | ||
2042 | if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) { | ||
2043 | goto LBL_C; | ||
2044 | } | ||
2045 | } | ||
2046 | |||
2047 | /* 6. if u >= v then */ | ||
2048 | if (pstm_cmp (&u, &v) != PSTM_LT) { | ||
2049 | /* u = u - v, A = A - C, B = B - D */ | ||
2050 | if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) { | ||
2051 | goto LBL_C; | ||
2052 | } | ||
2053 | if ((res = pstm_sub (&A, &C, &A)) != PSTM_OKAY) { | ||
2054 | goto LBL_C; | ||
2055 | } | ||
2056 | if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) { | ||
2057 | goto LBL_C; | ||
2058 | } | ||
2059 | } else { | ||
2060 | /* v - v - u, C = C - A, D = D - B */ | ||
2061 | if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) { | ||
2062 | goto LBL_C; | ||
2063 | } | ||
2064 | if ((res = pstm_sub (&C, &A, &C)) != PSTM_OKAY) { | ||
2065 | goto LBL_C; | ||
2066 | } | ||
2067 | if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) { | ||
2068 | goto LBL_C; | ||
2069 | } | ||
2070 | } | ||
2071 | |||
2072 | /* if not zero goto step 4 */ | ||
2073 | if (pstm_iszero (&u) == 0) | ||
2074 | goto top; | ||
2075 | |||
2076 | /* now a = C, b = D, gcd == g*v */ | ||
2077 | |||
2078 | /* if v != 1 then there is no inverse */ | ||
2079 | if (pstm_cmp_d (&v, 1) != PSTM_EQ) { | ||
2080 | res = PS_FAILURE; | ||
2081 | goto LBL_C; | ||
2082 | } | ||
2083 | |||
2084 | /* if its too low */ | ||
2085 | while (pstm_cmp_d(&C, 0) == PSTM_LT) { | ||
2086 | if ((res = pstm_add(&C, b, &C)) != PSTM_OKAY) { | ||
2087 | goto LBL_C; | ||
2088 | } | ||
2089 | } | ||
2090 | |||
2091 | /* too big */ | ||
2092 | while (pstm_cmp_mag(&C, b) != PSTM_LT) { | ||
2093 | if ((res = pstm_sub(&C, b, &C)) != PSTM_OKAY) { | ||
2094 | goto LBL_C; | ||
2095 | } | ||
2096 | } | ||
2097 | |||
2098 | /* C is now the inverse */ | ||
2099 | if ((res = pstm_copy(&C, c)) != PSTM_OKAY) { | ||
2100 | goto LBL_C; | ||
2101 | } | ||
2102 | res = PSTM_OKAY; | ||
2103 | |||
2104 | LBL_C: pstm_clear(&C); | ||
2105 | LBL_D: pstm_clear(&D); | ||
2106 | LBL_B: pstm_clear(&B); | ||
2107 | LBL_A: pstm_clear(&A); | ||
2108 | LBL_V: pstm_clear(&v); | ||
2109 | LBL_U: pstm_clear(&u); | ||
2110 | LBL_Y: pstm_clear(&y); | ||
2111 | LBL_X: pstm_clear(&x); | ||
2112 | |||
2113 | return res; | ||
2114 | } | ||
2115 | |||
2116 | /* c = 1/a (mod b) for odd b only */ | ||
2117 | int32 pstm_invmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c) | ||
2118 | { | ||
2119 | pstm_int x, y, u, v, B, D; | ||
2120 | int32 res; | ||
2121 | uint16 neg, sanity; | ||
2122 | |||
2123 | /* 2. [modified] b must be odd */ | ||
2124 | if (pstm_iseven (b) == 1) { | ||
2125 | return pstm_invmod_slow(pool, a,b,c); | ||
2126 | } | ||
2127 | |||
2128 | /* x == modulus, y == value to invert */ | ||
2129 | if ((res = pstm_init_copy(pool, &x, b, 0)) != PSTM_OKAY) { | ||
2130 | return res; | ||
2131 | } | ||
2132 | |||
2133 | if ((res = pstm_init_size(pool, &y, a->alloc)) != PSTM_OKAY) { | ||
2134 | goto LBL_X; | ||
2135 | } | ||
2136 | |||
2137 | /* we need y = |a| */ | ||
2138 | pstm_abs(a, &y); | ||
2139 | |||
2140 | /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ | ||
2141 | if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) { | ||
2142 | goto LBL_Y; | ||
2143 | } | ||
2144 | if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) { | ||
2145 | goto LBL_U; | ||
2146 | } | ||
2147 | if ((res = pstm_init(pool, &B)) != PSTM_OKAY) { | ||
2148 | goto LBL_V; | ||
2149 | } | ||
2150 | if ((res = pstm_init(pool, &D)) != PSTM_OKAY) { | ||
2151 | goto LBL_B; | ||
2152 | } | ||
2153 | |||
2154 | pstm_set (&D, 1); | ||
2155 | |||
2156 | sanity = 0; | ||
2157 | top: | ||
2158 | /* 4. while u is even do */ | ||
2159 | while (pstm_iseven (&u) == 1) { | ||
2160 | /* 4.1 u = u/2 */ | ||
2161 | if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) { | ||
2162 | goto LBL_D; | ||
2163 | } | ||
2164 | |||
2165 | /* 4.2 if B is odd then */ | ||
2166 | if (pstm_isodd (&B) == 1) { | ||
2167 | if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) { | ||
2168 | goto LBL_D; | ||
2169 | } | ||
2170 | } | ||
2171 | /* B = B/2 */ | ||
2172 | if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) { | ||
2173 | goto LBL_D; | ||
2174 | } | ||
2175 | } | ||
2176 | |||
2177 | /* 5. while v is even do */ | ||
2178 | while (pstm_iseven (&v) == 1) { | ||
2179 | /* 5.1 v = v/2 */ | ||
2180 | if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) { | ||
2181 | goto LBL_D; | ||
2182 | } | ||
2183 | /* 5.2 if D is odd then */ | ||
2184 | if (pstm_isodd (&D) == 1) { | ||
2185 | /* D = (D-x)/2 */ | ||
2186 | if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) { | ||
2187 | goto LBL_D; | ||
2188 | } | ||
2189 | } | ||
2190 | /* D = D/2 */ | ||
2191 | if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) { | ||
2192 | goto LBL_D; | ||
2193 | } | ||
2194 | } | ||
2195 | |||
2196 | /* 6. if u >= v then */ | ||
2197 | if (pstm_cmp (&u, &v) != PSTM_LT) { | ||
2198 | /* u = u - v, B = B - D */ | ||
2199 | if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) { | ||
2200 | goto LBL_D; | ||
2201 | } | ||
2202 | if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) { | ||
2203 | goto LBL_D; | ||
2204 | } | ||
2205 | } else { | ||
2206 | /* v - v - u, D = D - B */ | ||
2207 | if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) { | ||
2208 | goto LBL_D; | ||
2209 | } | ||
2210 | if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) { | ||
2211 | goto LBL_D; | ||
2212 | } | ||
2213 | } | ||
2214 | |||
2215 | /* if not zero goto step 4 */ | ||
2216 | if (sanity++ > 1000) { | ||
2217 | res = PS_LIMIT_FAIL; | ||
2218 | goto LBL_D; | ||
2219 | } | ||
2220 | if (pstm_iszero (&u) == 0) { | ||
2221 | goto top; | ||
2222 | } | ||
2223 | |||
2224 | /* now a = C, b = D, gcd == g*v */ | ||
2225 | |||
2226 | /* if v != 1 then there is no inverse */ | ||
2227 | if (pstm_cmp_d (&v, 1) != PSTM_EQ) { | ||
2228 | res = PS_FAILURE; | ||
2229 | goto LBL_D; | ||
2230 | } | ||
2231 | |||
2232 | /* b is now the inverse */ | ||
2233 | neg = a->sign; | ||
2234 | while (D.sign == PSTM_NEG) { | ||
2235 | if ((res = pstm_add (&D, b, &D)) != PSTM_OKAY) { | ||
2236 | goto LBL_D; | ||
2237 | } | ||
2238 | } | ||
2239 | if ((res = pstm_copy (&D, c)) != PSTM_OKAY) { | ||
2240 | goto LBL_D; | ||
2241 | } | ||
2242 | c->sign = neg; | ||
2243 | res = PSTM_OKAY; | ||
2244 | |||
2245 | LBL_D: pstm_clear(&D); | ||
2246 | LBL_B: pstm_clear(&B); | ||
2247 | LBL_V: pstm_clear(&v); | ||
2248 | LBL_U: pstm_clear(&u); | ||
2249 | LBL_Y: pstm_clear(&y); | ||
2250 | LBL_X: pstm_clear(&x); | ||
2251 | return res; | ||
2252 | } | ||
2253 | #endif /* !DISABLE_PSTM */ | ||
2254 | /******************************************************************************/ | ||
diff --git a/networking/tls_pstm.h b/networking/tls_pstm.h new file mode 100644 index 000000000..1affc1b69 --- /dev/null +++ b/networking/tls_pstm.h | |||
@@ -0,0 +1,238 @@ | |||
1 | /** | ||
2 | * @file pstm.h | ||
3 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
4 | * | ||
5 | * multiple-precision integer library. | ||
6 | */ | ||
7 | /* | ||
8 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
9 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
10 | * All Rights Reserved | ||
11 | * | ||
12 | * The latest version of this code is available at http://www.matrixssl.org | ||
13 | * | ||
14 | * This software is open source; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License as published by | ||
16 | * the Free Software Foundation; either version 2 of the License, or | ||
17 | * (at your option) any later version. | ||
18 | * | ||
19 | * This General Public License does NOT permit incorporating this software | ||
20 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
21 | * commercial license for this software may be purchased from INSIDE at | ||
22 | * http://www.insidesecure.com/eng/Company/Locations | ||
23 | * | ||
24 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
25 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
26 | * See the GNU General Public License for more details. | ||
27 | * | ||
28 | * You should have received a copy of the GNU General Public License | ||
29 | * along with this program; if not, write to the Free Software | ||
30 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
31 | * http://www.gnu.org/copyleft/gpl.html | ||
32 | */ | ||
33 | /******************************************************************************/ | ||
34 | |||
35 | #ifndef _h_PSTMATH | ||
36 | #define _h_PSTMATH | ||
37 | #ifndef DISABLE_PSTM | ||
38 | |||
39 | /* Define this here to avoid including circular limits.h on some platforms */ | ||
40 | #ifndef CHAR_BIT | ||
41 | #define CHAR_BIT 8 | ||
42 | #endif | ||
43 | |||
44 | /******************************************************************************/ | ||
45 | /* | ||
46 | If native 64 bit integers are not supported, we do not support 32x32->64 | ||
47 | in hardware, so we must set the 16 bit flag to produce 16x16->32 products. | ||
48 | */ | ||
49 | #ifndef HAVE_NATIVE_INT64 | ||
50 | #define PSTM_16BIT | ||
51 | #endif /* ! HAVE_NATIVE_INT64 */ | ||
52 | |||
53 | /******************************************************************************/ | ||
54 | /* | ||
55 | Some default configurations. | ||
56 | |||
57 | pstm_word should be the largest value the processor can hold as the product | ||
58 | of a multiplication. Most platforms support a 32x32->64 MAC instruction, | ||
59 | so 64bits is the default pstm_word size. | ||
60 | pstm_digit should be half the size of pstm_word | ||
61 | */ | ||
62 | #ifdef PSTM_8BIT | ||
63 | /* 8-bit digits, 16-bit word products */ | ||
64 | typedef unsigned char pstm_digit; | ||
65 | typedef unsigned short pstm_word; | ||
66 | #define DIGIT_BIT 8 | ||
67 | |||
68 | #elif defined(PSTM_16BIT) | ||
69 | /* 16-bit digits, 32-bit word products */ | ||
70 | typedef unsigned short pstm_digit; | ||
71 | typedef unsigned long pstm_word; | ||
72 | #define DIGIT_BIT 16 | ||
73 | |||
74 | #elif defined(PSTM_64BIT) | ||
75 | /* 64-bit digits, 128-bit word products */ | ||
76 | #ifndef __GNUC__ | ||
77 | #error "64bit digits requires GCC" | ||
78 | #endif | ||
79 | typedef unsigned long pstm_digit; | ||
80 | typedef unsigned long pstm_word __attribute__ ((mode(TI))); | ||
81 | #define DIGIT_BIT 64 | ||
82 | |||
83 | #else | ||
84 | /* This is the default case, 32-bit digits, 64-bit word products */ | ||
85 | typedef uint32 pstm_digit; | ||
86 | typedef uint64 pstm_word; | ||
87 | #define DIGIT_BIT 32 | ||
88 | #define PSTM_32BIT | ||
89 | #endif /* digit and word size */ | ||
90 | |||
91 | #define PSTM_MASK (pstm_digit)(-1) | ||
92 | #define PSTM_DIGIT_MAX PSTM_MASK | ||
93 | |||
94 | /******************************************************************************/ | ||
95 | /* | ||
96 | equalities | ||
97 | */ | ||
98 | #define PSTM_LT -1 /* less than */ | ||
99 | #define PSTM_EQ 0 /* equal to */ | ||
100 | #define PSTM_GT 1 /* greater than */ | ||
101 | |||
102 | #define PSTM_ZPOS 0 /* positive integer */ | ||
103 | #define PSTM_NEG 1 /* negative */ | ||
104 | |||
105 | #define PSTM_OKAY PS_SUCCESS | ||
106 | #define PSTM_MEM PS_MEM_FAIL | ||
107 | |||
108 | /******************************************************************************/ | ||
109 | /* | ||
110 | Various build options | ||
111 | */ | ||
112 | #define PSTM_DEFAULT_INIT 64 /* default (64) digits of allocation */ | ||
113 | #define PSTM_MAX_SIZE 4096 | ||
114 | |||
115 | typedef struct { | ||
116 | int16 used, alloc, sign; | ||
117 | pstm_digit *dp; | ||
118 | psPool_t *pool; | ||
119 | } pstm_int; | ||
120 | |||
121 | /******************************************************************************/ | ||
122 | /* | ||
123 | Operations on large integers | ||
124 | */ | ||
125 | #define pstm_iszero(a) (((a)->used == 0) ? PS_TRUE : PS_FALSE) | ||
126 | #define pstm_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? PS_TRUE : PS_FALSE) | ||
127 | #define pstm_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? PS_TRUE : PS_FALSE) | ||
128 | #define pstm_abs(a, b) { pstm_copy(a, b); (b)->sign = 0; } | ||
129 | |||
130 | extern void pstm_set(pstm_int *a, pstm_digit b); | ||
131 | |||
132 | extern void pstm_zero(pstm_int * a); | ||
133 | |||
134 | extern int32 pstm_init(psPool_t *pool, pstm_int * a); | ||
135 | |||
136 | extern int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size); | ||
137 | |||
138 | extern int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b, | ||
139 | int16 toSqr); | ||
140 | |||
141 | extern int16 pstm_count_bits (pstm_int * a); | ||
142 | |||
143 | extern int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a, | ||
144 | uint32 len); | ||
145 | |||
146 | extern int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c); | ||
147 | |||
148 | extern int32 pstm_unsigned_bin_size(pstm_int *a); | ||
149 | |||
150 | extern int32 pstm_copy(pstm_int * a, pstm_int * b); | ||
151 | |||
152 | extern void pstm_exch(pstm_int * a, pstm_int * b); | ||
153 | |||
154 | extern void pstm_clear(pstm_int * a); | ||
155 | |||
156 | extern void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2, | ||
157 | pstm_int *mp3, pstm_int *mp4, pstm_int *mp5, pstm_int *mp6, | ||
158 | pstm_int *mp7); | ||
159 | |||
160 | extern int32 pstm_grow(pstm_int * a, int16 size); | ||
161 | |||
162 | extern void pstm_clamp(pstm_int * a); | ||
163 | |||
164 | extern int32 pstm_cmp(pstm_int * a, pstm_int * b); | ||
165 | |||
166 | extern int32 pstm_cmp_mag(pstm_int * a, pstm_int * b); | ||
167 | |||
168 | extern void pstm_rshd(pstm_int *a, int16 x); | ||
169 | |||
170 | extern int32 pstm_lshd(pstm_int * a, int16 b); | ||
171 | |||
172 | extern int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c, | ||
173 | pstm_int *d); | ||
174 | |||
175 | extern int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c, | ||
176 | pstm_int *d); | ||
177 | |||
178 | extern int32 pstm_div_2(pstm_int * a, pstm_int * b); | ||
179 | |||
180 | extern int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c); | ||
181 | |||
182 | extern int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c); | ||
183 | |||
184 | extern int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c); | ||
185 | |||
186 | extern int32 pstm_mul_2(pstm_int * a, pstm_int * b); | ||
187 | |||
188 | extern int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c); | ||
189 | |||
190 | extern int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c, | ||
191 | pstm_int *d); | ||
192 | |||
193 | extern int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P, | ||
194 | pstm_int *Y); | ||
195 | |||
196 | extern int32 pstm_2expt(pstm_int *a, int16 b); | ||
197 | |||
198 | extern int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c); | ||
199 | |||
200 | extern int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a, | ||
201 | unsigned char *b); | ||
202 | |||
203 | extern int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a, | ||
204 | unsigned char *b); | ||
205 | |||
206 | extern int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho); | ||
207 | |||
208 | ///bbox: pool unused | ||
209 | #define pstm_montgomery_reduce(pool, a, m, mp, paD, paDlen) \ | ||
210 | pstm_montgomery_reduce( a, m, mp, paD, paDlen) | ||
211 | extern int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m, | ||
212 | pstm_digit mp, pstm_digit *paD, uint32 paDlen); | ||
213 | |||
214 | #define pstm_mul_comba(pool, A, B, C, paD, paDlen) \ | ||
215 | pstm_mul_comba( A, B, C, paD, paDlen) | ||
216 | extern int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B, | ||
217 | pstm_int *C, pstm_digit *paD, uint32 paDlen); | ||
218 | |||
219 | ///bbox: pool unused | ||
220 | #define pstm_sqr_comba(pool, A, B, paD, paDlen) \ | ||
221 | pstm_sqr_comba( A, B, paD, paDlen) | ||
222 | extern int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, | ||
223 | pstm_digit *paD, uint32 paDlen); | ||
224 | |||
225 | extern int32 pstm_cmp_d(pstm_int *a, pstm_digit b); | ||
226 | |||
227 | extern int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b); | ||
228 | |||
229 | extern int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c); | ||
230 | |||
231 | extern int32 pstm_invmod(psPool_t *pool, pstm_int * a, pstm_int * b, | ||
232 | pstm_int * c); | ||
233 | |||
234 | #else /* DISABLE_PSTM */ | ||
235 | typedef int32 pstm_int; | ||
236 | #endif /* !DISABLE_PSTM */ | ||
237 | #endif /* _h_PSTMATH */ | ||
238 | |||
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c new file mode 100644 index 000000000..c231c4ddf --- /dev/null +++ b/networking/tls_pstm_montgomery_reduce.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | /** | ||
9 | * @file pstm_montgomery_reduce.c | ||
10 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
11 | * | ||
12 | * Multiprecision Montgomery Reduction. | ||
13 | */ | ||
14 | /* | ||
15 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
16 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
17 | * All Rights Reserved | ||
18 | * | ||
19 | * The latest version of this code is available at http://www.matrixssl.org | ||
20 | * | ||
21 | * This software is open source; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This General Public License does NOT permit incorporating this software | ||
27 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
28 | * commercial license for this software may be purchased from INSIDE at | ||
29 | * http://www.insidesecure.com/eng/Company/Locations | ||
30 | * | ||
31 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
32 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
33 | * See the GNU General Public License for more details. | ||
34 | * | ||
35 | * You should have received a copy of the GNU General Public License | ||
36 | * along with this program; if not, write to the Free Software | ||
37 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
38 | * http://www.gnu.org/copyleft/gpl.html | ||
39 | */ | ||
40 | /******************************************************************************/ | ||
41 | |||
42 | ///bbox | ||
43 | //#include "../cryptoApi.h" | ||
44 | #ifndef DISABLE_PSTM | ||
45 | |||
46 | /******************************************************************************/ | ||
47 | |||
48 | #if defined(PSTM_X86) | ||
49 | /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ | ||
50 | #if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT) | ||
51 | #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" | ||
52 | #endif | ||
53 | //#pragma message ("Using 32 bit x86 Assembly Optimizations") | ||
54 | |||
55 | #define MONT_START | ||
56 | #define MONT_FINI | ||
57 | #define LOOP_END | ||
58 | #define LOOP_START \ | ||
59 | mu = c[x] * mp | ||
60 | |||
61 | #define INNERMUL \ | ||
62 | asm( \ | ||
63 | "movl %5,%%eax \n\t" \ | ||
64 | "mull %4 \n\t" \ | ||
65 | "addl %1,%%eax \n\t" \ | ||
66 | "adcl $0,%%edx \n\t" \ | ||
67 | "addl %%eax,%0 \n\t" \ | ||
68 | "adcl $0,%%edx \n\t" \ | ||
69 | "movl %%edx,%1 \n\t" \ | ||
70 | :"=g"(_c[LO]), "=r"(cy) \ | ||
71 | :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ | ||
72 | : "%eax", "%edx", "%cc") | ||
73 | |||
74 | #define PROPCARRY \ | ||
75 | asm( \ | ||
76 | "addl %1,%0 \n\t" \ | ||
77 | "setb %%al \n\t" \ | ||
78 | "movzbl %%al,%1 \n\t" \ | ||
79 | :"=g"(_c[LO]), "=r"(cy) \ | ||
80 | :"0"(_c[LO]), "1"(cy) \ | ||
81 | : "%eax", "%cc") | ||
82 | |||
83 | /******************************************************************************/ | ||
84 | #elif defined(PSTM_X86_64) | ||
85 | /* x86-64 optimized */ | ||
86 | #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) | ||
87 | #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" | ||
88 | #endif | ||
89 | //#pragma message ("Using 64 bit x86_64 Assembly Optimizations") | ||
90 | |||
91 | #define MONT_START | ||
92 | #define MONT_FINI | ||
93 | #define LOOP_END | ||
94 | #define LOOP_START \ | ||
95 | mu = c[x] * mp | ||
96 | |||
97 | #define INNERMUL \ | ||
98 | asm( \ | ||
99 | "movq %5,%%rax \n\t" \ | ||
100 | "mulq %4 \n\t" \ | ||
101 | "addq %1,%%rax \n\t" \ | ||
102 | "adcq $0,%%rdx \n\t" \ | ||
103 | "addq %%rax,%0 \n\t" \ | ||
104 | "adcq $0,%%rdx \n\t" \ | ||
105 | "movq %%rdx,%1 \n\t" \ | ||
106 | :"=g"(_c[LO]), "=r"(cy) \ | ||
107 | :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ | ||
108 | : "%rax", "%rdx", "cc") | ||
109 | |||
110 | #define INNERMUL8 \ | ||
111 | asm( \ | ||
112 | "movq 0(%5),%%rax \n\t" \ | ||
113 | "movq 0(%2),%%r10 \n\t" \ | ||
114 | "movq 0x8(%5),%%r11 \n\t" \ | ||
115 | "mulq %4 \n\t" \ | ||
116 | "addq %%r10,%%rax \n\t" \ | ||
117 | "adcq $0,%%rdx \n\t" \ | ||
118 | "movq 0x8(%2),%%r10 \n\t" \ | ||
119 | "addq %3,%%rax \n\t" \ | ||
120 | "adcq $0,%%rdx \n\t" \ | ||
121 | "movq %%rax,0(%0) \n\t" \ | ||
122 | "movq %%rdx,%1 \n\t" \ | ||
123 | \ | ||
124 | "movq %%r11,%%rax \n\t" \ | ||
125 | "movq 0x10(%5),%%r11 \n\t" \ | ||
126 | "mulq %4 \n\t" \ | ||
127 | "addq %%r10,%%rax \n\t" \ | ||
128 | "adcq $0,%%rdx \n\t" \ | ||
129 | "movq 0x10(%2),%%r10 \n\t" \ | ||
130 | "addq %3,%%rax \n\t" \ | ||
131 | "adcq $0,%%rdx \n\t" \ | ||
132 | "movq %%rax,0x8(%0) \n\t" \ | ||
133 | "movq %%rdx,%1 \n\t" \ | ||
134 | \ | ||
135 | "movq %%r11,%%rax \n\t" \ | ||
136 | "movq 0x18(%5),%%r11 \n\t" \ | ||
137 | "mulq %4 \n\t" \ | ||
138 | "addq %%r10,%%rax \n\t" \ | ||
139 | "adcq $0,%%rdx \n\t" \ | ||
140 | "movq 0x18(%2),%%r10 \n\t" \ | ||
141 | "addq %3,%%rax \n\t" \ | ||
142 | "adcq $0,%%rdx \n\t" \ | ||
143 | "movq %%rax,0x10(%0) \n\t" \ | ||
144 | "movq %%rdx,%1 \n\t" \ | ||
145 | \ | ||
146 | "movq %%r11,%%rax \n\t" \ | ||
147 | "movq 0x20(%5),%%r11 \n\t" \ | ||
148 | "mulq %4 \n\t" \ | ||
149 | "addq %%r10,%%rax \n\t" \ | ||
150 | "adcq $0,%%rdx \n\t" \ | ||
151 | "movq 0x20(%2),%%r10 \n\t" \ | ||
152 | "addq %3,%%rax \n\t" \ | ||
153 | "adcq $0,%%rdx \n\t" \ | ||
154 | "movq %%rax,0x18(%0) \n\t" \ | ||
155 | "movq %%rdx,%1 \n\t" \ | ||
156 | \ | ||
157 | "movq %%r11,%%rax \n\t" \ | ||
158 | "movq 0x28(%5),%%r11 \n\t" \ | ||
159 | "mulq %4 \n\t" \ | ||
160 | "addq %%r10,%%rax \n\t" \ | ||
161 | "adcq $0,%%rdx \n\t" \ | ||
162 | "movq 0x28(%2),%%r10 \n\t" \ | ||
163 | "addq %3,%%rax \n\t" \ | ||
164 | "adcq $0,%%rdx \n\t" \ | ||
165 | "movq %%rax,0x20(%0) \n\t" \ | ||
166 | "movq %%rdx,%1 \n\t" \ | ||
167 | \ | ||
168 | "movq %%r11,%%rax \n\t" \ | ||
169 | "movq 0x30(%5),%%r11 \n\t" \ | ||
170 | "mulq %4 \n\t" \ | ||
171 | "addq %%r10,%%rax \n\t" \ | ||
172 | "adcq $0,%%rdx \n\t" \ | ||
173 | "movq 0x30(%2),%%r10 \n\t" \ | ||
174 | "addq %3,%%rax \n\t" \ | ||
175 | "adcq $0,%%rdx \n\t" \ | ||
176 | "movq %%rax,0x28(%0) \n\t" \ | ||
177 | "movq %%rdx,%1 \n\t" \ | ||
178 | \ | ||
179 | "movq %%r11,%%rax \n\t" \ | ||
180 | "movq 0x38(%5),%%r11 \n\t" \ | ||
181 | "mulq %4 \n\t" \ | ||
182 | "addq %%r10,%%rax \n\t" \ | ||
183 | "adcq $0,%%rdx \n\t" \ | ||
184 | "movq 0x38(%2),%%r10 \n\t" \ | ||
185 | "addq %3,%%rax \n\t" \ | ||
186 | "adcq $0,%%rdx \n\t" \ | ||
187 | "movq %%rax,0x30(%0) \n\t" \ | ||
188 | "movq %%rdx,%1 \n\t" \ | ||
189 | \ | ||
190 | "movq %%r11,%%rax \n\t" \ | ||
191 | "mulq %4 \n\t" \ | ||
192 | "addq %%r10,%%rax \n\t" \ | ||
193 | "adcq $0,%%rdx \n\t" \ | ||
194 | "addq %3,%%rax \n\t" \ | ||
195 | "adcq $0,%%rdx \n\t" \ | ||
196 | "movq %%rax,0x38(%0) \n\t" \ | ||
197 | "movq %%rdx,%1 \n\t" \ | ||
198 | \ | ||
199 | :"=r"(_c), "=r"(cy) \ | ||
200 | : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ | ||
201 | : "%rax", "%rdx", "%r10", "%r11", "cc") | ||
202 | |||
203 | #define PROPCARRY \ | ||
204 | asm( \ | ||
205 | "addq %1,%0 \n\t" \ | ||
206 | "setb %%al \n\t" \ | ||
207 | "movzbq %%al,%1 \n\t" \ | ||
208 | :"=g"(_c[LO]), "=r"(cy) \ | ||
209 | :"0"(_c[LO]), "1"(cy) \ | ||
210 | : "%rax", "cc") | ||
211 | |||
212 | /******************************************************************************/ | ||
213 | #elif defined(PSTM_ARM) | ||
214 | |||
215 | #define MONT_START | ||
216 | #define MONT_FINI | ||
217 | #define LOOP_END | ||
218 | #define LOOP_START \ | ||
219 | mu = c[x] * mp | ||
220 | |||
221 | #ifdef __thumb2__ | ||
222 | //#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations") | ||
223 | #define INNERMUL \ | ||
224 | asm( \ | ||
225 | " LDR r0,%1 \n\t" \ | ||
226 | " ADDS r0,r0,%0 \n\t" \ | ||
227 | " ITE CS \n\t" \ | ||
228 | " MOVCS %0,#1 \n\t" \ | ||
229 | " MOVCC %0,#0 \n\t" \ | ||
230 | " UMLAL r0,%0,%3,%4 \n\t" \ | ||
231 | " STR r0,%1 \n\t" \ | ||
232 | :"=r"(cy),"=m"(_c[0])\ | ||
233 | :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\ | ||
234 | :"r0","%cc"); | ||
235 | #define PROPCARRY \ | ||
236 | asm( \ | ||
237 | " LDR r0,%1 \n\t" \ | ||
238 | " ADDS r0,r0,%0 \n\t" \ | ||
239 | " STR r0,%1 \n\t" \ | ||
240 | " ITE CS \n\t" \ | ||
241 | " MOVCS %0,#1 \n\t" \ | ||
242 | " MOVCC %0,#0 \n\t" \ | ||
243 | :"=r"(cy),"=m"(_c[0])\ | ||
244 | :"0"(cy),"m"(_c[0])\ | ||
245 | :"r0","%cc"); | ||
246 | #else /* Non-Thumb2 code */ | ||
247 | //#pragma message ("Using 32 bit ARM Assembly Optimizations") | ||
248 | #define INNERMUL \ | ||
249 | asm( \ | ||
250 | " LDR r0,%1 \n\t" \ | ||
251 | " ADDS r0,r0,%0 \n\t" \ | ||
252 | " MOVCS %0,#1 \n\t" \ | ||
253 | " MOVCC %0,#0 \n\t" \ | ||
254 | " UMLAL r0,%0,%3,%4 \n\t" \ | ||
255 | " STR r0,%1 \n\t" \ | ||
256 | :"=r"(cy),"=m"(_c[0])\ | ||
257 | :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\ | ||
258 | :"r0","%cc"); | ||
259 | #define PROPCARRY \ | ||
260 | asm( \ | ||
261 | " LDR r0,%1 \n\t" \ | ||
262 | " ADDS r0,r0,%0 \n\t" \ | ||
263 | " STR r0,%1 \n\t" \ | ||
264 | " MOVCS %0,#1 \n\t" \ | ||
265 | " MOVCC %0,#0 \n\t" \ | ||
266 | :"=r"(cy),"=m"(_c[0])\ | ||
267 | :"0"(cy),"m"(_c[0])\ | ||
268 | :"r0","%cc"); | ||
269 | #endif /* __thumb2__ */ | ||
270 | |||
271 | |||
272 | /******************************************************************************/ | ||
273 | #elif defined(PSTM_MIPS) | ||
274 | /* MIPS32 */ | ||
275 | //#pragma message ("Using 32 bit MIPS Assembly Optimizations") | ||
276 | #define MONT_START | ||
277 | #define MONT_FINI | ||
278 | #define LOOP_END | ||
279 | #define LOOP_START \ | ||
280 | mu = c[x] * mp | ||
281 | |||
282 | #define INNERMUL \ | ||
283 | asm( \ | ||
284 | " multu %3,%4 \n\t" \ | ||
285 | " mflo $12 \n\t" \ | ||
286 | " mfhi $13 \n\t" \ | ||
287 | " addu $12,$12,%0 \n\t" \ | ||
288 | " sltu $10,$12,%0 \n\t" \ | ||
289 | " addu $13,$13,$10 \n\t" \ | ||
290 | " lw $10,%1 \n\t" \ | ||
291 | " addu $12,$12,$10 \n\t" \ | ||
292 | " sltu $10,$12,$10 \n\t" \ | ||
293 | " addu %0,$13,$10 \n\t" \ | ||
294 | " sw $12,%1 \n\t" \ | ||
295 | :"=r"(cy),"=m"(_c[0])\ | ||
296 | :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\ | ||
297 | :"$10","$12","$13")\ | ||
298 | ; ++tmpm; | ||
299 | |||
300 | #define PROPCARRY \ | ||
301 | asm( \ | ||
302 | " lw $10,%1 \n\t" \ | ||
303 | " addu $10,$10,%0 \n\t" \ | ||
304 | " sw $10,%1 \n\t" \ | ||
305 | " sltu %0,$10,%0 \n\t" \ | ||
306 | :"=r"(cy),"=m"(_c[0])\ | ||
307 | :"r"(cy),"r"(_c[0])\ | ||
308 | :"$10"); | ||
309 | |||
310 | |||
311 | /******************************************************************************/ | ||
312 | #else | ||
313 | |||
314 | /* ISO C code */ | ||
315 | #define MONT_START | ||
316 | #define MONT_FINI | ||
317 | #define LOOP_END | ||
318 | #define LOOP_START \ | ||
319 | mu = c[x] * mp | ||
320 | |||
321 | #define INNERMUL \ | ||
322 | do { pstm_word t; \ | ||
323 | t = ((pstm_word)_c[0] + (pstm_word)cy) + \ | ||
324 | (((pstm_word)mu) * ((pstm_word)*tmpm++)); \ | ||
325 | _c[0] = (pstm_digit)t; \ | ||
326 | cy = (pstm_digit)(t >> DIGIT_BIT); \ | ||
327 | } while (0) | ||
328 | |||
329 | #define PROPCARRY \ | ||
330 | do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0) | ||
331 | |||
332 | #endif | ||
333 | |||
334 | /******************************************************************************/ | ||
335 | |||
336 | #define LO 0 | ||
337 | |||
338 | /* computes x/R == x (mod N) via Montgomery Reduction */ | ||
339 | int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m, | ||
340 | pstm_digit mp, pstm_digit *paD, uint32 paDlen) | ||
341 | { | ||
342 | pstm_digit *c, *_c, *tmpm, mu; | ||
343 | int32 oldused, x, y; | ||
344 | int16 pa; | ||
345 | |||
346 | pa = m->used; | ||
347 | if (pa > a->alloc) { | ||
348 | /* Sanity test for bad numbers. This will confirm no buffer overruns */ | ||
349 | return PS_LIMIT_FAIL; | ||
350 | } | ||
351 | |||
352 | if (paD && paDlen >= (uint32)2*pa+1) { | ||
353 | c = paD; | ||
354 | memset(c, 0x0, paDlen); | ||
355 | } else { | ||
356 | c = xzalloc(2*pa+1); | ||
357 | } | ||
358 | /* copy the input */ | ||
359 | oldused = a->used; | ||
360 | for (x = 0; x < oldused; x++) { | ||
361 | c[x] = a->dp[x]; | ||
362 | } | ||
363 | |||
364 | MONT_START; | ||
365 | |||
366 | for (x = 0; x < pa; x++) { | ||
367 | pstm_digit cy = 0; | ||
368 | /* get Mu for this round */ | ||
369 | LOOP_START; | ||
370 | _c = c + x; | ||
371 | tmpm = m->dp; | ||
372 | y = 0; | ||
373 | #ifdef PSTM_X86_64 | ||
374 | for (; y < (pa & ~7); y += 8) { | ||
375 | INNERMUL8; | ||
376 | _c += 8; | ||
377 | tmpm += 8; | ||
378 | } | ||
379 | #endif /* PSTM_X86_64 */ | ||
380 | for (; y < pa; y++) { | ||
381 | INNERMUL; | ||
382 | ++_c; | ||
383 | } | ||
384 | LOOP_END; | ||
385 | while (cy) { | ||
386 | PROPCARRY; | ||
387 | ++_c; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* now copy out */ | ||
392 | _c = c + pa; | ||
393 | tmpm = a->dp; | ||
394 | for (x = 0; x < pa+1; x++) { | ||
395 | *tmpm++ = *_c++; | ||
396 | } | ||
397 | |||
398 | for (; x < oldused; x++) { | ||
399 | *tmpm++ = 0; | ||
400 | } | ||
401 | |||
402 | MONT_FINI; | ||
403 | |||
404 | a->used = pa+1; | ||
405 | pstm_clamp(a); | ||
406 | |||
407 | /* reuse x as return code */ | ||
408 | x = PSTM_OKAY; | ||
409 | |||
410 | /* if A >= m then A = A - m */ | ||
411 | if (pstm_cmp_mag (a, m) != PSTM_LT) { | ||
412 | if (s_pstm_sub (a, m, a) != PSTM_OKAY) { | ||
413 | x = PS_MEM_FAIL; | ||
414 | } | ||
415 | } | ||
416 | if (paDlen < (uint32)2*pa+1) { | ||
417 | psFree(c, pool); | ||
418 | } | ||
419 | return x; | ||
420 | } | ||
421 | |||
422 | #endif /* !DISABLE_PSTM */ | ||
423 | /******************************************************************************/ | ||
diff --git a/networking/tls_pstm_mul_comba.c b/networking/tls_pstm_mul_comba.c new file mode 100644 index 000000000..6e051baeb --- /dev/null +++ b/networking/tls_pstm_mul_comba.c | |||
@@ -0,0 +1,777 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | /** | ||
9 | * @file pstm_mul_comba.c | ||
10 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
11 | * | ||
12 | * Multiprecision multiplication with Comba technique. | ||
13 | */ | ||
14 | /* | ||
15 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
16 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
17 | * All Rights Reserved | ||
18 | * | ||
19 | * The latest version of this code is available at http://www.matrixssl.org | ||
20 | * | ||
21 | * This software is open source; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This General Public License does NOT permit incorporating this software | ||
27 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
28 | * commercial license for this software may be purchased from INSIDE at | ||
29 | * http://www.insidesecure.com/eng/Company/Locations | ||
30 | * | ||
31 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
32 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
33 | * See the GNU General Public License for more details. | ||
34 | * | ||
35 | * You should have received a copy of the GNU General Public License | ||
36 | * along with this program; if not, write to the Free Software | ||
37 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
38 | * http://www.gnu.org/copyleft/gpl.html | ||
39 | */ | ||
40 | /******************************************************************************/ | ||
41 | |||
42 | ///bbox | ||
43 | //#include "../cryptoApi.h" | ||
44 | #ifndef DISABLE_PSTM | ||
45 | |||
46 | /******************************************************************************/ | ||
47 | #if defined(PSTM_X86) | ||
48 | /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ | ||
49 | #if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT) | ||
50 | #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" | ||
51 | #endif | ||
52 | //#pragma message ("Using 32 bit x86 Assembly Optimizations") | ||
53 | |||
54 | /* anything you need at the start */ | ||
55 | #define COMBA_START | ||
56 | |||
57 | /* clear the chaining variables */ | ||
58 | #define COMBA_CLEAR \ | ||
59 | c0 = c1 = c2 = 0; | ||
60 | |||
61 | /* forward the carry to the next digit */ | ||
62 | #define COMBA_FORWARD \ | ||
63 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
64 | |||
65 | /* store the first sum */ | ||
66 | #define COMBA_STORE(x) \ | ||
67 | x = c0; | ||
68 | |||
69 | /* store the second sum [carry] */ | ||
70 | #define COMBA_STORE2(x) \ | ||
71 | x = c1; | ||
72 | |||
73 | /* anything you need at the end */ | ||
74 | #define COMBA_FINI | ||
75 | |||
76 | /* this should multiply i and j */ | ||
77 | #define MULADD(i, j) \ | ||
78 | asm( \ | ||
79 | "movl %6,%%eax \n\t" \ | ||
80 | "mull %7 \n\t" \ | ||
81 | "addl %%eax,%0 \n\t" \ | ||
82 | "adcl %%edx,%1 \n\t" \ | ||
83 | "adcl $0,%2 \n\t" \ | ||
84 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); | ||
85 | |||
86 | /******************************************************************************/ | ||
87 | #elif defined(PSTM_X86_64) | ||
88 | /* x86-64 optimized */ | ||
89 | #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) | ||
90 | #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" | ||
91 | #endif | ||
92 | //#pragma message ("Using 64 bit x86_64 Assembly Optimizations") | ||
93 | |||
94 | /* anything you need at the start */ | ||
95 | #define COMBA_START | ||
96 | |||
97 | /* clear the chaining variables */ | ||
98 | #define COMBA_CLEAR \ | ||
99 | c0 = c1 = c2 = 0; | ||
100 | |||
101 | /* forward the carry to the next digit */ | ||
102 | #define COMBA_FORWARD \ | ||
103 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
104 | |||
105 | /* store the first sum */ | ||
106 | #define COMBA_STORE(x) \ | ||
107 | x = c0; | ||
108 | |||
109 | /* store the second sum [carry] */ | ||
110 | #define COMBA_STORE2(x) \ | ||
111 | x = c1; | ||
112 | |||
113 | /* anything you need at the end */ | ||
114 | #define COMBA_FINI | ||
115 | |||
116 | /* this should multiply i and j */ | ||
117 | #define MULADD(i, j) \ | ||
118 | asm ( \ | ||
119 | "movq %6,%%rax \n\t" \ | ||
120 | "mulq %7 \n\t" \ | ||
121 | "addq %%rax,%0 \n\t" \ | ||
122 | "adcq %%rdx,%1 \n\t" \ | ||
123 | "adcq $0,%2 \n\t" \ | ||
124 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
125 | |||
126 | /******************************************************************************/ | ||
127 | #elif defined(PSTM_ARM) | ||
128 | /* ARM code */ | ||
129 | //#pragma message ("Using 32 bit ARM Assembly Optimizations") | ||
130 | |||
131 | #define COMBA_START | ||
132 | |||
133 | #define COMBA_CLEAR \ | ||
134 | c0 = c1 = c2 = 0; | ||
135 | |||
136 | #define COMBA_FORWARD \ | ||
137 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
138 | |||
139 | #define COMBA_STORE(x) \ | ||
140 | x = c0; | ||
141 | |||
142 | #define COMBA_STORE2(x) \ | ||
143 | x = c1; | ||
144 | |||
145 | #define COMBA_FINI | ||
146 | |||
147 | #define MULADD(i, j) \ | ||
148 | asm( \ | ||
149 | " UMULL r0,r1,%6,%7 \n\t" \ | ||
150 | " ADDS %0,%0,r0 \n\t" \ | ||
151 | " ADCS %1,%1,r1 \n\t" \ | ||
152 | " ADC %2,%2,#0 \n\t" \ | ||
153 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | ||
154 | |||
155 | /******************************************************************************/ | ||
156 | #elif defined(PSTM_MIPS) | ||
157 | /* MIPS32 code */ | ||
158 | //#pragma message ("Using 32 bit MIPS Assembly Optimizations") | ||
159 | |||
160 | #define COMBA_START | ||
161 | |||
162 | #define COMBA_CLEAR \ | ||
163 | c0 = c1 = c2 = 0; | ||
164 | |||
165 | #define COMBA_FORWARD \ | ||
166 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
167 | |||
168 | #define COMBA_STORE(x) \ | ||
169 | x = c0; | ||
170 | |||
171 | #define COMBA_STORE2(x) \ | ||
172 | x = c1; | ||
173 | |||
174 | #define COMBA_FINI | ||
175 | |||
176 | #define MULADD(i, j) \ | ||
177 | asm( \ | ||
178 | " multu %6,%7 \n\t" \ | ||
179 | " mflo $12 \n\t" \ | ||
180 | " mfhi $13 \n\t" \ | ||
181 | " addu %0,%0,$12 \n\t" \ | ||
182 | " sltu $12,%0,$12 \n\t" \ | ||
183 | " addu %1,%1,$13 \n\t" \ | ||
184 | " sltu $13,%1,$13 \n\t" \ | ||
185 | " addu %1,%1,$12 \n\t" \ | ||
186 | " sltu $12,%1,$12 \n\t" \ | ||
187 | " addu %2,%2,$13 \n\t" \ | ||
188 | " addu %2,%2,$12 \n\t" \ | ||
189 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13"); | ||
190 | |||
191 | /******************************************************************************/ | ||
192 | #else | ||
193 | |||
194 | #define COMBA_START | ||
195 | |||
196 | #define COMBA_CLEAR \ | ||
197 | c0 = c1 = c2 = 0; | ||
198 | |||
199 | #define COMBA_FORWARD \ | ||
200 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
201 | |||
202 | #define COMBA_STORE(x) \ | ||
203 | x = c0; | ||
204 | |||
205 | #define COMBA_STORE2(x) \ | ||
206 | x = c1; | ||
207 | |||
208 | #define COMBA_FINI | ||
209 | |||
210 | #define MULADD(i, j) \ | ||
211 | do { pstm_word t; \ | ||
212 | t = (pstm_word)c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \ | ||
213 | t = (pstm_word)c1 + (t >> DIGIT_BIT); \ | ||
214 | c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \ | ||
215 | } while (0); | ||
216 | |||
217 | #endif | ||
218 | |||
219 | /******************************************************************************/ | ||
220 | /* generic PxQ multiplier */ | ||
221 | ///bbox: pool unused | ||
222 | #define pstm_mul_comba_gen(pool, A, B, C, paD, paDlen) \ | ||
223 | pstm_mul_comba_gen( A, B, C, paD, paDlen) | ||
224 | static int32 pstm_mul_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B, | ||
225 | pstm_int *C, pstm_digit *paD, uint32 paDlen) | ||
226 | { | ||
227 | int16 paDfail, pa; | ||
228 | int32 ix, iy, iz, tx, ty; | ||
229 | pstm_digit c0, c1, c2, *tmpx, *tmpy, *dst; | ||
230 | |||
231 | COMBA_START; | ||
232 | COMBA_CLEAR; | ||
233 | |||
234 | paDfail = 0; | ||
235 | /* get size of output and trim */ | ||
236 | pa = A->used + B->used; | ||
237 | |||
238 | /* | ||
239 | If c is not large enough grow it and continue | ||
240 | */ | ||
241 | if (C->alloc < pa) { | ||
242 | if (pstm_grow(C, pa) != PSTM_OKAY) { | ||
243 | return PS_MEM_FAIL; | ||
244 | } | ||
245 | } | ||
246 | if (paD != NULL) { | ||
247 | if (paDlen < (sizeof(pstm_digit) * pa)) { | ||
248 | paDfail = 1; /* have a paD but it's not large enough */ | ||
249 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
250 | } else { | ||
251 | dst = paD; | ||
252 | memset(dst, 0x0, paDlen); | ||
253 | } | ||
254 | } else { | ||
255 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
256 | } | ||
257 | |||
258 | for (ix = 0; ix < pa; ix++) { | ||
259 | /* get offsets into the two bignums */ | ||
260 | ty = min(ix, B->used-1); | ||
261 | tx = ix - ty; | ||
262 | |||
263 | /* setup temp aliases */ | ||
264 | tmpx = A->dp + tx; | ||
265 | tmpy = B->dp + ty; | ||
266 | /* | ||
267 | This is the number of times the loop will iterate, essentially it's | ||
268 | while (tx++ < a->used && ty-- >= 0) { ... } | ||
269 | */ | ||
270 | iy = min(A->used-tx, ty+1); | ||
271 | |||
272 | /* execute loop */ | ||
273 | COMBA_FORWARD; | ||
274 | for (iz = 0; iz < iy; ++iz) { | ||
275 | MULADD(*tmpx++, *tmpy--); | ||
276 | } | ||
277 | |||
278 | /* store term */ | ||
279 | COMBA_STORE(dst[ix]); | ||
280 | } | ||
281 | COMBA_FINI; | ||
282 | /* | ||
283 | setup dest | ||
284 | */ | ||
285 | iy = C->used; | ||
286 | C->used = pa; | ||
287 | C->sign = A->sign ^ B->sign; | ||
288 | { | ||
289 | pstm_digit *tmpc; | ||
290 | tmpc = C->dp; | ||
291 | for (ix = 0; ix < pa; ix++) { | ||
292 | *tmpc++ = dst[ix]; | ||
293 | } | ||
294 | /* | ||
295 | clear unused digits [that existed in the old copy of c] | ||
296 | */ | ||
297 | for (; ix < iy; ix++) { | ||
298 | *tmpc++ = 0; | ||
299 | } | ||
300 | } | ||
301 | pstm_clamp(C); | ||
302 | |||
303 | if ((paD == NULL) || (paDfail == 1)) { | ||
304 | psFree(dst, pool); | ||
305 | } | ||
306 | |||
307 | return PS_SUCCESS; | ||
308 | } | ||
309 | |||
310 | /******************************************************************************/ | ||
311 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
312 | static int32 pstm_mul_comba16(pstm_int *A, pstm_int *B, pstm_int *C) | ||
313 | { | ||
314 | pstm_digit c0, c1, c2, at[32]; | ||
315 | |||
316 | if (C->alloc < 32) { | ||
317 | if (pstm_grow(C, 32) != PSTM_OKAY) { | ||
318 | return PS_MEM_FAIL; | ||
319 | } | ||
320 | } | ||
321 | memcpy(at, A->dp, 16 * sizeof(pstm_digit)); | ||
322 | memcpy(at+16, B->dp, 16 * sizeof(pstm_digit)); | ||
323 | |||
324 | COMBA_START; | ||
325 | |||
326 | COMBA_CLEAR; | ||
327 | /* 0 */ | ||
328 | MULADD(at[0], at[16]); | ||
329 | COMBA_STORE(C->dp[0]); | ||
330 | /* 1 */ | ||
331 | COMBA_FORWARD; | ||
332 | MULADD(at[0], at[17]); MULADD(at[1], at[16]); | ||
333 | COMBA_STORE(C->dp[1]); | ||
334 | /* 2 */ | ||
335 | COMBA_FORWARD; | ||
336 | MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); | ||
337 | COMBA_STORE(C->dp[2]); | ||
338 | /* 3 */ | ||
339 | COMBA_FORWARD; | ||
340 | MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); | ||
341 | COMBA_STORE(C->dp[3]); | ||
342 | /* 4 */ | ||
343 | COMBA_FORWARD; | ||
344 | MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); | ||
345 | COMBA_STORE(C->dp[4]); | ||
346 | /* 5 */ | ||
347 | COMBA_FORWARD; | ||
348 | MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); | ||
349 | COMBA_STORE(C->dp[5]); | ||
350 | /* 6 */ | ||
351 | COMBA_FORWARD; | ||
352 | MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); | ||
353 | COMBA_STORE(C->dp[6]); | ||
354 | /* 7 */ | ||
355 | COMBA_FORWARD; | ||
356 | MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); | ||
357 | COMBA_STORE(C->dp[7]); | ||
358 | /* 8 */ | ||
359 | COMBA_FORWARD; | ||
360 | MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); | ||
361 | COMBA_STORE(C->dp[8]); | ||
362 | /* 9 */ | ||
363 | COMBA_FORWARD; | ||
364 | MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); | ||
365 | COMBA_STORE(C->dp[9]); | ||
366 | /* 10 */ | ||
367 | COMBA_FORWARD; | ||
368 | MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); | ||
369 | COMBA_STORE(C->dp[10]); | ||
370 | /* 11 */ | ||
371 | COMBA_FORWARD; | ||
372 | MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); | ||
373 | COMBA_STORE(C->dp[11]); | ||
374 | /* 12 */ | ||
375 | COMBA_FORWARD; | ||
376 | MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); | ||
377 | COMBA_STORE(C->dp[12]); | ||
378 | /* 13 */ | ||
379 | COMBA_FORWARD; | ||
380 | MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); | ||
381 | COMBA_STORE(C->dp[13]); | ||
382 | /* 14 */ | ||
383 | COMBA_FORWARD; | ||
384 | MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); | ||
385 | COMBA_STORE(C->dp[14]); | ||
386 | /* 15 */ | ||
387 | COMBA_FORWARD; | ||
388 | MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]); | ||
389 | COMBA_STORE(C->dp[15]); | ||
390 | /* 16 */ | ||
391 | COMBA_FORWARD; | ||
392 | MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); | ||
393 | COMBA_STORE(C->dp[16]); | ||
394 | /* 17 */ | ||
395 | COMBA_FORWARD; | ||
396 | MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); | ||
397 | COMBA_STORE(C->dp[17]); | ||
398 | /* 18 */ | ||
399 | COMBA_FORWARD; | ||
400 | MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); | ||
401 | COMBA_STORE(C->dp[18]); | ||
402 | /* 19 */ | ||
403 | COMBA_FORWARD; | ||
404 | MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); | ||
405 | COMBA_STORE(C->dp[19]); | ||
406 | /* 20 */ | ||
407 | COMBA_FORWARD; | ||
408 | MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); | ||
409 | COMBA_STORE(C->dp[20]); | ||
410 | /* 21 */ | ||
411 | COMBA_FORWARD; | ||
412 | MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); | ||
413 | COMBA_STORE(C->dp[21]); | ||
414 | /* 22 */ | ||
415 | COMBA_FORWARD; | ||
416 | MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); | ||
417 | COMBA_STORE(C->dp[22]); | ||
418 | /* 23 */ | ||
419 | COMBA_FORWARD; | ||
420 | MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); | ||
421 | COMBA_STORE(C->dp[23]); | ||
422 | /* 24 */ | ||
423 | COMBA_FORWARD; | ||
424 | MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); | ||
425 | COMBA_STORE(C->dp[24]); | ||
426 | /* 25 */ | ||
427 | COMBA_FORWARD; | ||
428 | MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); | ||
429 | COMBA_STORE(C->dp[25]); | ||
430 | /* 26 */ | ||
431 | COMBA_FORWARD; | ||
432 | MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); | ||
433 | COMBA_STORE(C->dp[26]); | ||
434 | /* 27 */ | ||
435 | COMBA_FORWARD; | ||
436 | MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); | ||
437 | COMBA_STORE(C->dp[27]); | ||
438 | /* 28 */ | ||
439 | COMBA_FORWARD; | ||
440 | MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); | ||
441 | COMBA_STORE(C->dp[28]); | ||
442 | /* 29 */ | ||
443 | COMBA_FORWARD; | ||
444 | MULADD(at[14], at[31]); MULADD(at[15], at[30]); | ||
445 | COMBA_STORE(C->dp[29]); | ||
446 | /* 30 */ | ||
447 | COMBA_FORWARD; | ||
448 | MULADD(at[15], at[31]); | ||
449 | COMBA_STORE(C->dp[30]); | ||
450 | COMBA_STORE2(C->dp[31]); | ||
451 | C->used = 32; | ||
452 | C->sign = A->sign ^ B->sign; | ||
453 | pstm_clamp(C); | ||
454 | COMBA_FINI; | ||
455 | return PSTM_OKAY; | ||
456 | } | ||
457 | #endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */ | ||
458 | |||
459 | |||
460 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
461 | static int32 pstm_mul_comba32(pstm_int *A, pstm_int *B, pstm_int *C) | ||
462 | { | ||
463 | pstm_digit c0, c1, c2, at[64]; | ||
464 | int32 out_size; | ||
465 | |||
466 | if (C->alloc < 64) { | ||
467 | if (pstm_grow(C, 64) != PSTM_OKAY) { | ||
468 | return PS_MEM_FAIL; | ||
469 | } | ||
470 | } | ||
471 | |||
472 | out_size = A->used + B->used; | ||
473 | memcpy(at, A->dp, 32 * sizeof(pstm_digit)); | ||
474 | memcpy(at+32, B->dp, 32 * sizeof(pstm_digit)); | ||
475 | COMBA_START; | ||
476 | |||
477 | COMBA_CLEAR; | ||
478 | /* 0 */ | ||
479 | MULADD(at[0], at[32]); | ||
480 | COMBA_STORE(C->dp[0]); | ||
481 | /* 1 */ | ||
482 | COMBA_FORWARD; | ||
483 | MULADD(at[0], at[33]); MULADD(at[1], at[32]); | ||
484 | COMBA_STORE(C->dp[1]); | ||
485 | /* 2 */ | ||
486 | COMBA_FORWARD; | ||
487 | MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); | ||
488 | COMBA_STORE(C->dp[2]); | ||
489 | /* 3 */ | ||
490 | COMBA_FORWARD; | ||
491 | MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); | ||
492 | COMBA_STORE(C->dp[3]); | ||
493 | /* 4 */ | ||
494 | COMBA_FORWARD; | ||
495 | MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); | ||
496 | COMBA_STORE(C->dp[4]); | ||
497 | /* 5 */ | ||
498 | COMBA_FORWARD; | ||
499 | MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); | ||
500 | COMBA_STORE(C->dp[5]); | ||
501 | /* 6 */ | ||
502 | COMBA_FORWARD; | ||
503 | MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); | ||
504 | COMBA_STORE(C->dp[6]); | ||
505 | /* 7 */ | ||
506 | COMBA_FORWARD; | ||
507 | MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); | ||
508 | COMBA_STORE(C->dp[7]); | ||
509 | /* 8 */ | ||
510 | COMBA_FORWARD; | ||
511 | MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); | ||
512 | COMBA_STORE(C->dp[8]); | ||
513 | /* 9 */ | ||
514 | COMBA_FORWARD; | ||
515 | MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); | ||
516 | COMBA_STORE(C->dp[9]); | ||
517 | /* 10 */ | ||
518 | COMBA_FORWARD; | ||
519 | MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); | ||
520 | COMBA_STORE(C->dp[10]); | ||
521 | /* 11 */ | ||
522 | COMBA_FORWARD; | ||
523 | MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); | ||
524 | COMBA_STORE(C->dp[11]); | ||
525 | /* 12 */ | ||
526 | COMBA_FORWARD; | ||
527 | MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); | ||
528 | COMBA_STORE(C->dp[12]); | ||
529 | /* 13 */ | ||
530 | COMBA_FORWARD; | ||
531 | MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); | ||
532 | COMBA_STORE(C->dp[13]); | ||
533 | /* 14 */ | ||
534 | COMBA_FORWARD; | ||
535 | MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); | ||
536 | COMBA_STORE(C->dp[14]); | ||
537 | /* 15 */ | ||
538 | COMBA_FORWARD; | ||
539 | MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); | ||
540 | COMBA_STORE(C->dp[15]); | ||
541 | /* 16 */ | ||
542 | COMBA_FORWARD; | ||
543 | MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); | ||
544 | COMBA_STORE(C->dp[16]); | ||
545 | /* 17 */ | ||
546 | COMBA_FORWARD; | ||
547 | MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); | ||
548 | COMBA_STORE(C->dp[17]); | ||
549 | /* 18 */ | ||
550 | COMBA_FORWARD; | ||
551 | MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); | ||
552 | COMBA_STORE(C->dp[18]); | ||
553 | /* 19 */ | ||
554 | COMBA_FORWARD; | ||
555 | MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); | ||
556 | COMBA_STORE(C->dp[19]); | ||
557 | /* 20 */ | ||
558 | COMBA_FORWARD; | ||
559 | MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); | ||
560 | COMBA_STORE(C->dp[20]); | ||
561 | /* 21 */ | ||
562 | COMBA_FORWARD; | ||
563 | MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); | ||
564 | COMBA_STORE(C->dp[21]); | ||
565 | /* 22 */ | ||
566 | COMBA_FORWARD; | ||
567 | MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); | ||
568 | COMBA_STORE(C->dp[22]); | ||
569 | /* 23 */ | ||
570 | COMBA_FORWARD; | ||
571 | MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); | ||
572 | COMBA_STORE(C->dp[23]); | ||
573 | /* 24 */ | ||
574 | COMBA_FORWARD; | ||
575 | MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); | ||
576 | COMBA_STORE(C->dp[24]); | ||
577 | /* 25 */ | ||
578 | COMBA_FORWARD; | ||
579 | MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); | ||
580 | COMBA_STORE(C->dp[25]); | ||
581 | /* 26 */ | ||
582 | COMBA_FORWARD; | ||
583 | MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); | ||
584 | COMBA_STORE(C->dp[26]); | ||
585 | /* 27 */ | ||
586 | COMBA_FORWARD; | ||
587 | MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); | ||
588 | COMBA_STORE(C->dp[27]); | ||
589 | /* 28 */ | ||
590 | COMBA_FORWARD; | ||
591 | MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]); | ||
592 | COMBA_STORE(C->dp[28]); | ||
593 | /* 29 */ | ||
594 | COMBA_FORWARD; | ||
595 | MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]); | ||
596 | COMBA_STORE(C->dp[29]); | ||
597 | /* 30 */ | ||
598 | COMBA_FORWARD; | ||
599 | MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]); | ||
600 | COMBA_STORE(C->dp[30]); | ||
601 | /* 31 */ | ||
602 | COMBA_FORWARD; | ||
603 | MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]); | ||
604 | COMBA_STORE(C->dp[31]); | ||
605 | /* 32 */ | ||
606 | COMBA_FORWARD; | ||
607 | MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]); | ||
608 | COMBA_STORE(C->dp[32]); | ||
609 | /* 33 */ | ||
610 | COMBA_FORWARD; | ||
611 | MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]); | ||
612 | COMBA_STORE(C->dp[33]); | ||
613 | /* 34 */ | ||
614 | COMBA_FORWARD; | ||
615 | MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]); | ||
616 | COMBA_STORE(C->dp[34]); | ||
617 | /* 35 */ | ||
618 | COMBA_FORWARD; | ||
619 | MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]); | ||
620 | COMBA_STORE(C->dp[35]); | ||
621 | /* 36 */ | ||
622 | COMBA_FORWARD; | ||
623 | MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]); | ||
624 | COMBA_STORE(C->dp[36]); | ||
625 | /* 37 */ | ||
626 | COMBA_FORWARD; | ||
627 | MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]); | ||
628 | COMBA_STORE(C->dp[37]); | ||
629 | /* 38 */ | ||
630 | COMBA_FORWARD; | ||
631 | MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]); | ||
632 | COMBA_STORE(C->dp[38]); | ||
633 | |||
634 | /* early out at 40 digits, 40*32==1280, or two 640 bit operands */ | ||
635 | if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; } | ||
636 | |||
637 | /* 39 */ | ||
638 | COMBA_FORWARD; | ||
639 | MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]); | ||
640 | COMBA_STORE(C->dp[39]); | ||
641 | /* 40 */ | ||
642 | COMBA_FORWARD; | ||
643 | MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]); | ||
644 | COMBA_STORE(C->dp[40]); | ||
645 | /* 41 */ | ||
646 | COMBA_FORWARD; | ||
647 | MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]); | ||
648 | COMBA_STORE(C->dp[41]); | ||
649 | /* 42 */ | ||
650 | COMBA_FORWARD; | ||
651 | MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]); | ||
652 | COMBA_STORE(C->dp[42]); | ||
653 | /* 43 */ | ||
654 | COMBA_FORWARD; | ||
655 | MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]); | ||
656 | COMBA_STORE(C->dp[43]); | ||
657 | /* 44 */ | ||
658 | COMBA_FORWARD; | ||
659 | MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]); | ||
660 | COMBA_STORE(C->dp[44]); | ||
661 | /* 45 */ | ||
662 | COMBA_FORWARD; | ||
663 | MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]); | ||
664 | COMBA_STORE(C->dp[45]); | ||
665 | /* 46 */ | ||
666 | COMBA_FORWARD; | ||
667 | MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]); | ||
668 | COMBA_STORE(C->dp[46]); | ||
669 | |||
670 | /* early out at 48 digits, 48*32==1536, or two 768 bit operands */ | ||
671 | if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; } | ||
672 | |||
673 | /* 47 */ | ||
674 | COMBA_FORWARD; | ||
675 | MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); | ||
676 | COMBA_STORE(C->dp[47]); | ||
677 | /* 48 */ | ||
678 | COMBA_FORWARD; | ||
679 | MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); | ||
680 | COMBA_STORE(C->dp[48]); | ||
681 | /* 49 */ | ||
682 | COMBA_FORWARD; | ||
683 | MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); | ||
684 | COMBA_STORE(C->dp[49]); | ||
685 | /* 50 */ | ||
686 | COMBA_FORWARD; | ||
687 | MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); | ||
688 | COMBA_STORE(C->dp[50]); | ||
689 | /* 51 */ | ||
690 | COMBA_FORWARD; | ||
691 | MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); | ||
692 | COMBA_STORE(C->dp[51]); | ||
693 | /* 52 */ | ||
694 | COMBA_FORWARD; | ||
695 | MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); | ||
696 | COMBA_STORE(C->dp[52]); | ||
697 | /* 53 */ | ||
698 | COMBA_FORWARD; | ||
699 | MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); | ||
700 | COMBA_STORE(C->dp[53]); | ||
701 | /* 54 */ | ||
702 | COMBA_FORWARD; | ||
703 | MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); | ||
704 | COMBA_STORE(C->dp[54]); | ||
705 | |||
706 | /* early out at 56 digits, 56*32==1792, or two 896 bit operands */ | ||
707 | if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; } | ||
708 | |||
709 | /* 55 */ | ||
710 | COMBA_FORWARD; | ||
711 | MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); | ||
712 | COMBA_STORE(C->dp[55]); | ||
713 | /* 56 */ | ||
714 | COMBA_FORWARD; | ||
715 | MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); | ||
716 | COMBA_STORE(C->dp[56]); | ||
717 | /* 57 */ | ||
718 | COMBA_FORWARD; | ||
719 | MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); | ||
720 | COMBA_STORE(C->dp[57]); | ||
721 | /* 58 */ | ||
722 | COMBA_FORWARD; | ||
723 | MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); | ||
724 | COMBA_STORE(C->dp[58]); | ||
725 | /* 59 */ | ||
726 | COMBA_FORWARD; | ||
727 | MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); | ||
728 | COMBA_STORE(C->dp[59]); | ||
729 | /* 60 */ | ||
730 | COMBA_FORWARD; | ||
731 | MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); | ||
732 | COMBA_STORE(C->dp[60]); | ||
733 | /* 61 */ | ||
734 | COMBA_FORWARD; | ||
735 | MULADD(at[30], at[63]); MULADD(at[31], at[62]); | ||
736 | COMBA_STORE(C->dp[61]); | ||
737 | /* 62 */ | ||
738 | COMBA_FORWARD; | ||
739 | MULADD(at[31], at[63]); | ||
740 | COMBA_STORE(C->dp[62]); | ||
741 | COMBA_STORE2(C->dp[63]); | ||
742 | C->used = 64; | ||
743 | C->sign = A->sign ^ B->sign; | ||
744 | pstm_clamp(C); | ||
745 | COMBA_FINI; | ||
746 | return PSTM_OKAY; | ||
747 | } | ||
748 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
749 | |||
750 | /******************************************************************************/ | ||
751 | |||
752 | int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_int *C, | ||
753 | pstm_digit *paD, uint32 paDlen) | ||
754 | { | ||
755 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
756 | if (A->used == 16 && B->used == 16) { | ||
757 | return pstm_mul_comba16(A, B, C); | ||
758 | } else { | ||
759 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
760 | if (A->used == 32 && B->used == 32) { | ||
761 | return pstm_mul_comba32(A, B, C); | ||
762 | } | ||
763 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
764 | return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen); | ||
765 | } | ||
766 | #else | ||
767 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
768 | if (A->used == 32 && B->used == 32) { | ||
769 | return pstm_mul_comba32(A, B, C); | ||
770 | } | ||
771 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
772 | return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen); | ||
773 | #endif | ||
774 | } | ||
775 | |||
776 | #endif /* !DISABLE_PSTM */ | ||
777 | /******************************************************************************/ | ||
diff --git a/networking/tls_pstm_sqr_comba.c b/networking/tls_pstm_sqr_comba.c new file mode 100644 index 000000000..98186d31f --- /dev/null +++ b/networking/tls_pstm_sqr_comba.c | |||
@@ -0,0 +1,1107 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | /** | ||
9 | * @file pstm_sqr_comba.c | ||
10 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
11 | * | ||
12 | * Multiprecision Squaring with Comba technique. | ||
13 | */ | ||
14 | /* | ||
15 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
16 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
17 | * All Rights Reserved | ||
18 | * | ||
19 | * The latest version of this code is available at http://www.matrixssl.org | ||
20 | * | ||
21 | * This software is open source; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This General Public License does NOT permit incorporating this software | ||
27 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
28 | * commercial license for this software may be purchased from INSIDE at | ||
29 | * http://www.insidesecure.com/eng/Company/Locations | ||
30 | * | ||
31 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
32 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
33 | * See the GNU General Public License for more details. | ||
34 | * | ||
35 | * You should have received a copy of the GNU General Public License | ||
36 | * along with this program; if not, write to the Free Software | ||
37 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
38 | * http://www.gnu.org/copyleft/gpl.html | ||
39 | */ | ||
40 | /******************************************************************************/ | ||
41 | |||
42 | ///bbox | ||
43 | //#include "../cryptoApi.h" | ||
44 | #ifndef DISABLE_PSTM | ||
45 | |||
46 | /******************************************************************************/ | ||
47 | #if defined(PSTM_X86) | ||
48 | /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ | ||
49 | #if !defined(__GNUC__) || !defined(__i386__) | ||
50 | #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" | ||
51 | #endif | ||
52 | //#pragma message ("Using 32 bit x86 Assembly Optimizations") | ||
53 | |||
54 | #define COMBA_START | ||
55 | |||
56 | #define CLEAR_CARRY \ | ||
57 | c0 = c1 = c2 = 0; | ||
58 | |||
59 | #define COMBA_STORE(x) \ | ||
60 | x = c0; | ||
61 | |||
62 | #define COMBA_STORE2(x) \ | ||
63 | x = c1; | ||
64 | |||
65 | #define CARRY_FORWARD \ | ||
66 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
67 | |||
68 | #define COMBA_FINI | ||
69 | |||
70 | #define SQRADD(i, j) \ | ||
71 | asm( \ | ||
72 | "movl %6,%%eax \n\t" \ | ||
73 | "mull %%eax \n\t" \ | ||
74 | "addl %%eax,%0 \n\t" \ | ||
75 | "adcl %%edx,%1 \n\t" \ | ||
76 | "adcl $0,%2 \n\t" \ | ||
77 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); | ||
78 | |||
79 | #define SQRADD2(i, j) \ | ||
80 | asm( \ | ||
81 | "movl %6,%%eax \n\t" \ | ||
82 | "mull %7 \n\t" \ | ||
83 | "addl %%eax,%0 \n\t" \ | ||
84 | "adcl %%edx,%1 \n\t" \ | ||
85 | "adcl $0,%2 \n\t" \ | ||
86 | "addl %%eax,%0 \n\t" \ | ||
87 | "adcl %%edx,%1 \n\t" \ | ||
88 | "adcl $0,%2 \n\t" \ | ||
89 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); | ||
90 | |||
91 | #define SQRADDSC(i, j) \ | ||
92 | asm( \ | ||
93 | "movl %6,%%eax \n\t" \ | ||
94 | "mull %7 \n\t" \ | ||
95 | "movl %%eax,%0 \n\t" \ | ||
96 | "movl %%edx,%1 \n\t" \ | ||
97 | "xorl %2,%2 \n\t" \ | ||
98 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | ||
99 | |||
100 | #define SQRADDAC(i, j) \ | ||
101 | asm( \ | ||
102 | "movl %6,%%eax \n\t" \ | ||
103 | "mull %7 \n\t" \ | ||
104 | "addl %%eax,%0 \n\t" \ | ||
105 | "adcl %%edx,%1 \n\t" \ | ||
106 | "adcl $0,%2 \n\t" \ | ||
107 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | ||
108 | |||
109 | #define SQRADDDB \ | ||
110 | asm( \ | ||
111 | "addl %6,%0 \n\t" \ | ||
112 | "adcl %7,%1 \n\t" \ | ||
113 | "adcl %8,%2 \n\t" \ | ||
114 | "addl %6,%0 \n\t" \ | ||
115 | "adcl %7,%1 \n\t" \ | ||
116 | "adcl %8,%2 \n\t" \ | ||
117 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); | ||
118 | |||
119 | /******************************************************************************/ | ||
120 | #elif defined(PSTM_X86_64) | ||
121 | /* x86-64 optimized */ | ||
122 | #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) | ||
123 | #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" | ||
124 | #endif | ||
125 | //#pragma message ("Using 64 bit x86_64 Assembly Optimizations") | ||
126 | |||
127 | #define COMBA_START | ||
128 | |||
129 | #define CLEAR_CARRY \ | ||
130 | c0 = c1 = c2 = 0; | ||
131 | |||
132 | #define COMBA_STORE(x) \ | ||
133 | x = c0; | ||
134 | |||
135 | #define COMBA_STORE2(x) \ | ||
136 | x = c1; | ||
137 | |||
138 | #define CARRY_FORWARD \ | ||
139 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
140 | |||
141 | #define COMBA_FINI | ||
142 | |||
143 | #define SQRADD(i, j) \ | ||
144 | asm( \ | ||
145 | "movq %6,%%rax \n\t" \ | ||
146 | "mulq %%rax \n\t" \ | ||
147 | "addq %%rax,%0 \n\t" \ | ||
148 | "adcq %%rdx,%1 \n\t" \ | ||
149 | "adcq $0,%2 \n\t" \ | ||
150 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); | ||
151 | |||
152 | #define SQRADD2(i, j) \ | ||
153 | asm( \ | ||
154 | "movq %6,%%rax \n\t" \ | ||
155 | "mulq %7 \n\t" \ | ||
156 | "addq %%rax,%0 \n\t" \ | ||
157 | "adcq %%rdx,%1 \n\t" \ | ||
158 | "adcq $0,%2 \n\t" \ | ||
159 | "addq %%rax,%0 \n\t" \ | ||
160 | "adcq %%rdx,%1 \n\t" \ | ||
161 | "adcq $0,%2 \n\t" \ | ||
162 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
163 | |||
164 | #define SQRADDSC(i, j) \ | ||
165 | asm( \ | ||
166 | "movq %6,%%rax \n\t" \ | ||
167 | "mulq %7 \n\t" \ | ||
168 | "movq %%rax,%0 \n\t" \ | ||
169 | "movq %%rdx,%1 \n\t" \ | ||
170 | "xorq %2,%2 \n\t" \ | ||
171 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
172 | |||
173 | #define SQRADDAC(i, j) \ | ||
174 | asm( \ | ||
175 | "movq %6,%%rax \n\t" \ | ||
176 | "mulq %7 \n\t" \ | ||
177 | "addq %%rax,%0 \n\t" \ | ||
178 | "adcq %%rdx,%1 \n\t" \ | ||
179 | "adcq $0,%2 \n\t" \ | ||
180 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
181 | |||
182 | #define SQRADDDB \ | ||
183 | asm( \ | ||
184 | "addq %6,%0 \n\t" \ | ||
185 | "adcq %7,%1 \n\t" \ | ||
186 | "adcq %8,%2 \n\t" \ | ||
187 | "addq %6,%0 \n\t" \ | ||
188 | "adcq %7,%1 \n\t" \ | ||
189 | "adcq %8,%2 \n\t" \ | ||
190 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); | ||
191 | |||
192 | /******************************************************************************/ | ||
193 | #elif defined(PSTM_ARM) | ||
194 | /* ARM code */ | ||
195 | //#pragma message ("Using 32 bit ARM Assembly Optimizations") | ||
196 | |||
197 | #define COMBA_START | ||
198 | |||
199 | #define CLEAR_CARRY \ | ||
200 | c0 = c1 = c2 = 0; | ||
201 | |||
202 | #define COMBA_STORE(x) \ | ||
203 | x = c0; | ||
204 | |||
205 | #define COMBA_STORE2(x) \ | ||
206 | x = c1; | ||
207 | |||
208 | #define CARRY_FORWARD \ | ||
209 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
210 | |||
211 | #define COMBA_FINI | ||
212 | |||
213 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
214 | #define SQRADD(i, j) \ | ||
215 | asm( \ | ||
216 | " UMULL r0,r1,%6,%6 \n\t" \ | ||
217 | " ADDS %0,%0,r0 \n\t" \ | ||
218 | " ADCS %1,%1,r1 \n\t" \ | ||
219 | " ADC %2,%2,#0 \n\t" \ | ||
220 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); | ||
221 | |||
222 | /* for squaring some of the terms are doubled... */ | ||
223 | #define SQRADD2(i, j) \ | ||
224 | asm( \ | ||
225 | " UMULL r0,r1,%6,%7 \n\t" \ | ||
226 | " ADDS %0,%0,r0 \n\t" \ | ||
227 | " ADCS %1,%1,r1 \n\t" \ | ||
228 | " ADC %2,%2,#0 \n\t" \ | ||
229 | " ADDS %0,%0,r0 \n\t" \ | ||
230 | " ADCS %1,%1,r1 \n\t" \ | ||
231 | " ADC %2,%2,#0 \n\t" \ | ||
232 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | ||
233 | |||
234 | #define SQRADDSC(i, j) \ | ||
235 | asm( \ | ||
236 | " UMULL %0,%1,%6,%7 \n\t" \ | ||
237 | " SUB %2,%2,%2 \n\t" \ | ||
238 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); | ||
239 | |||
240 | #define SQRADDAC(i, j) \ | ||
241 | asm( \ | ||
242 | " UMULL r0,r1,%6,%7 \n\t" \ | ||
243 | " ADDS %0,%0,r0 \n\t" \ | ||
244 | " ADCS %1,%1,r1 \n\t" \ | ||
245 | " ADC %2,%2,#0 \n\t" \ | ||
246 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | ||
247 | |||
248 | #define SQRADDDB \ | ||
249 | asm( \ | ||
250 | " ADDS %0,%0,%3 \n\t" \ | ||
251 | " ADCS %1,%1,%4 \n\t" \ | ||
252 | " ADC %2,%2,%5 \n\t" \ | ||
253 | " ADDS %0,%0,%3 \n\t" \ | ||
254 | " ADCS %1,%1,%4 \n\t" \ | ||
255 | " ADC %2,%2,%5 \n\t" \ | ||
256 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); | ||
257 | |||
258 | /******************************************************************************/ | ||
259 | #elif defined(PSTM_MIPS) | ||
260 | /* MIPS32 */ | ||
261 | //#pragma message ("Using 32 bit MIPS Assembly Optimizations") | ||
262 | |||
263 | #define COMBA_START | ||
264 | |||
265 | #define CLEAR_CARRY \ | ||
266 | c0 = c1 = c2 = 0; | ||
267 | |||
268 | #define COMBA_STORE(x) \ | ||
269 | x = c0; | ||
270 | |||
271 | #define COMBA_STORE2(x) \ | ||
272 | x = c1; | ||
273 | |||
274 | #define CARRY_FORWARD \ | ||
275 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
276 | |||
277 | #define COMBA_FINI | ||
278 | |||
279 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
280 | #define SQRADD(i, j) \ | ||
281 | asm( \ | ||
282 | " multu %6,%6 \n\t" \ | ||
283 | " mflo $12 \n\t" \ | ||
284 | " mfhi $13 \n\t" \ | ||
285 | " addu %0,%0,$12 \n\t" \ | ||
286 | " sltu $12,%0,$12 \n\t" \ | ||
287 | " addu %1,%1,$13 \n\t" \ | ||
288 | " sltu $13,%1,$13 \n\t" \ | ||
289 | " addu %1,%1,$12 \n\t" \ | ||
290 | " sltu $12,%1,$12 \n\t" \ | ||
291 | " addu %2,%2,$13 \n\t" \ | ||
292 | " addu %2,%2,$12 \n\t" \ | ||
293 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); | ||
294 | |||
295 | /* for squaring some of the terms are doubled... */ | ||
296 | #define SQRADD2(i, j) \ | ||
297 | asm( \ | ||
298 | " multu %6,%7 \n\t" \ | ||
299 | " mflo $12 \n\t" \ | ||
300 | " mfhi $13 \n\t" \ | ||
301 | \ | ||
302 | " addu %0,%0,$12 \n\t" \ | ||
303 | " sltu $14,%0,$12 \n\t" \ | ||
304 | " addu %1,%1,$13 \n\t" \ | ||
305 | " sltu $15,%1,$13 \n\t" \ | ||
306 | " addu %1,%1,$14 \n\t" \ | ||
307 | " sltu $14,%1,$14 \n\t" \ | ||
308 | " addu %2,%2,$15 \n\t" \ | ||
309 | " addu %2,%2,$14 \n\t" \ | ||
310 | \ | ||
311 | " addu %0,%0,$12 \n\t" \ | ||
312 | " sltu $14,%0,$12 \n\t" \ | ||
313 | " addu %1,%1,$13 \n\t" \ | ||
314 | " sltu $15,%1,$13 \n\t" \ | ||
315 | " addu %1,%1,$14 \n\t" \ | ||
316 | " sltu $14,%1,$14 \n\t" \ | ||
317 | " addu %2,%2,$15 \n\t" \ | ||
318 | " addu %2,%2,$14 \n\t" \ | ||
319 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); | ||
320 | |||
321 | #define SQRADDSC(i, j) \ | ||
322 | asm( \ | ||
323 | " multu %6,%7 \n\t" \ | ||
324 | " mflo %0 \n\t" \ | ||
325 | " mfhi %1 \n\t" \ | ||
326 | " xor %2,%2,%2 \n\t" \ | ||
327 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); | ||
328 | |||
329 | #define SQRADDAC(i, j) \ | ||
330 | asm( \ | ||
331 | " multu %6,%7 \n\t" \ | ||
332 | " mflo $12 \n\t" \ | ||
333 | " mfhi $13 \n\t" \ | ||
334 | " addu %0,%0,$12 \n\t" \ | ||
335 | " sltu $12,%0,$12 \n\t" \ | ||
336 | " addu %1,%1,$13 \n\t" \ | ||
337 | " sltu $13,%1,$13 \n\t" \ | ||
338 | " addu %1,%1,$12 \n\t" \ | ||
339 | " sltu $12,%1,$12 \n\t" \ | ||
340 | " addu %2,%2,$13 \n\t" \ | ||
341 | " addu %2,%2,$12 \n\t" \ | ||
342 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); | ||
343 | |||
344 | #define SQRADDDB \ | ||
345 | asm( \ | ||
346 | " addu %0,%0,%3 \n\t" \ | ||
347 | " sltu $10,%0,%3 \n\t" \ | ||
348 | " addu %1,%1,$10 \n\t" \ | ||
349 | " sltu $10,%1,$10 \n\t" \ | ||
350 | " addu %1,%1,%4 \n\t" \ | ||
351 | " sltu $11,%1,%4 \n\t" \ | ||
352 | " addu %2,%2,$10 \n\t" \ | ||
353 | " addu %2,%2,$11 \n\t" \ | ||
354 | " addu %2,%2,%5 \n\t" \ | ||
355 | \ | ||
356 | " addu %0,%0,%3 \n\t" \ | ||
357 | " sltu $10,%0,%3 \n\t" \ | ||
358 | " addu %1,%1,$10 \n\t" \ | ||
359 | " sltu $10,%1,$10 \n\t" \ | ||
360 | " addu %1,%1,%4 \n\t" \ | ||
361 | " sltu $11,%1,%4 \n\t" \ | ||
362 | " addu %2,%2,$10 \n\t" \ | ||
363 | " addu %2,%2,$11 \n\t" \ | ||
364 | " addu %2,%2,%5 \n\t" \ | ||
365 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); | ||
366 | |||
367 | #else | ||
368 | /******************************************************************************/ | ||
369 | #define PSTM_ISO | ||
370 | /* ISO C portable code */ | ||
371 | |||
372 | #define COMBA_START | ||
373 | |||
374 | #define CLEAR_CARRY \ | ||
375 | c0 = c1 = c2 = 0; | ||
376 | |||
377 | #define COMBA_STORE(x) \ | ||
378 | x = c0; | ||
379 | |||
380 | #define COMBA_STORE2(x) \ | ||
381 | x = c1; | ||
382 | |||
383 | #define CARRY_FORWARD \ | ||
384 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
385 | |||
386 | #define COMBA_FINI | ||
387 | |||
388 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
389 | #define SQRADD(i, j) \ | ||
390 | do { pstm_word t; \ | ||
391 | t = c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \ | ||
392 | t = c1 + (t >> DIGIT_BIT); \ | ||
393 | c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \ | ||
394 | } while (0); | ||
395 | |||
396 | |||
397 | /* for squaring some of the terms are doubled... */ | ||
398 | #define SQRADD2(i, j) \ | ||
399 | do { pstm_word t; \ | ||
400 | t = ((pstm_word)i) * ((pstm_word)j); \ | ||
401 | tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \ | ||
402 | tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \ | ||
403 | c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \ | ||
404 | tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \ | ||
405 | tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \ | ||
406 | c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \ | ||
407 | } while (0); | ||
408 | |||
409 | #define SQRADDSC(i, j) \ | ||
410 | do { pstm_word t; \ | ||
411 | t = ((pstm_word)i) * ((pstm_word)j); \ | ||
412 | sc0 = (pstm_digit)t; sc1 = (pstm_digit)(t >> DIGIT_BIT); sc2 = 0; \ | ||
413 | } while (0); | ||
414 | |||
415 | #define SQRADDAC(i, j) \ | ||
416 | do { pstm_word t; \ | ||
417 | t = ((pstm_word)sc0) + ((pstm_word)i) * ((pstm_word)j); \ | ||
418 | sc0 = (pstm_digit)t; \ | ||
419 | t = ((pstm_word)sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit)t; \ | ||
420 | sc2 += (pstm_digit)(t >> DIGIT_BIT); \ | ||
421 | } while (0); | ||
422 | |||
423 | #define SQRADDDB \ | ||
424 | do { pstm_word t; \ | ||
425 | t = ((pstm_word)sc0) + ((pstm_word)sc0) + ((pstm_word)c0); \ | ||
426 | c0 = (pstm_digit)t; \ | ||
427 | t = ((pstm_word)sc1) + ((pstm_word)sc1) + c1 + (t >> DIGIT_BIT); \ | ||
428 | c1 = (pstm_digit)t; \ | ||
429 | c2 = c2 + sc2 + sc2 + (pstm_digit)(t >> DIGIT_BIT); \ | ||
430 | } while (0); | ||
431 | |||
432 | #endif /* ISO_C */ | ||
433 | |||
434 | /******************************************************************************/ | ||
435 | /* | ||
436 | Non-unrolled comba squarer | ||
437 | */ | ||
438 | ///bbox: pool unused | ||
439 | #define pstm_sqr_comba_gen(pool, A, B, paD, paDlen) \ | ||
440 | pstm_sqr_comba_gen( A, B, paD, paDlen) | ||
441 | static int32 pstm_sqr_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B, | ||
442 | pstm_digit *paD, uint32 paDlen) | ||
443 | { | ||
444 | int16 paDfail, pa; | ||
445 | int32 ix, iz; | ||
446 | pstm_digit c0, c1, c2, *dst; | ||
447 | #ifdef PSTM_ISO | ||
448 | pstm_word tt; | ||
449 | #endif | ||
450 | |||
451 | paDfail = 0; | ||
452 | /* get size of output and trim */ | ||
453 | pa = A->used + A->used; | ||
454 | |||
455 | /* number of output digits to produce */ | ||
456 | COMBA_START; | ||
457 | CLEAR_CARRY; | ||
458 | /* | ||
459 | If b is not large enough grow it and continue | ||
460 | */ | ||
461 | if (B->alloc < pa) { | ||
462 | if (pstm_grow(B, pa) != PSTM_OKAY) { | ||
463 | return PS_MEM_FAIL; | ||
464 | } | ||
465 | } | ||
466 | if (paD != NULL) { | ||
467 | if (paDlen < (sizeof(pstm_digit) * pa)) { | ||
468 | paDfail = 1; /* have a paD, but it's not big enough */ | ||
469 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
470 | } else { | ||
471 | dst = paD; | ||
472 | memset(dst, 0x0, paDlen); | ||
473 | } | ||
474 | } else { | ||
475 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
476 | } | ||
477 | |||
478 | for (ix = 0; ix < pa; ix++) { | ||
479 | int32 tx, ty, iy; | ||
480 | pstm_digit *tmpy, *tmpx; | ||
481 | |||
482 | /* get offsets into the two bignums */ | ||
483 | ty = min(A->used-1, ix); | ||
484 | tx = ix - ty; | ||
485 | |||
486 | /* setup temp aliases */ | ||
487 | tmpx = A->dp + tx; | ||
488 | tmpy = A->dp + ty; | ||
489 | |||
490 | /* | ||
491 | This is the number of times the loop will iterate, | ||
492 | while (tx++ < a->used && ty-- >= 0) { ... } | ||
493 | */ | ||
494 | iy = min(A->used-tx, ty+1); | ||
495 | |||
496 | /* | ||
497 | now for squaring tx can never equal ty. We halve the distance since | ||
498 | they approach at a rate of 2x and we have to round because odd cases | ||
499 | need to be executed | ||
500 | */ | ||
501 | iy = min(iy, (ty-tx+1)>>1); | ||
502 | |||
503 | /* forward carries */ | ||
504 | CARRY_FORWARD; | ||
505 | |||
506 | /* execute loop */ | ||
507 | for (iz = 0; iz < iy; iz++) { | ||
508 | SQRADD2(*tmpx++, *tmpy--); | ||
509 | } | ||
510 | |||
511 | /* even columns have the square term in them */ | ||
512 | if ((ix&1) == 0) { | ||
513 | SQRADD(A->dp[ix>>1], A->dp[ix>>1]); | ||
514 | } | ||
515 | |||
516 | /* store it */ | ||
517 | COMBA_STORE(dst[ix]); | ||
518 | } | ||
519 | |||
520 | COMBA_FINI; | ||
521 | /* | ||
522 | setup dest | ||
523 | */ | ||
524 | iz = B->used; | ||
525 | B->used = pa; | ||
526 | { | ||
527 | pstm_digit *tmpc; | ||
528 | tmpc = B->dp; | ||
529 | for (ix = 0; ix < pa; ix++) { | ||
530 | *tmpc++ = dst[ix]; | ||
531 | } | ||
532 | /* clear unused digits (that existed in the old copy of c) */ | ||
533 | for (; ix < iz; ix++) { | ||
534 | *tmpc++ = 0; | ||
535 | } | ||
536 | } | ||
537 | pstm_clamp(B); | ||
538 | |||
539 | if ((paD == NULL) || paDfail == 1) { | ||
540 | psFree(dst, pool); | ||
541 | } | ||
542 | return PS_SUCCESS; | ||
543 | } | ||
544 | |||
545 | /******************************************************************************/ | ||
546 | /* | ||
547 | Unrolled Comba loop for 1024 bit keys | ||
548 | */ | ||
549 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
550 | static int32 pstm_sqr_comba16(pstm_int *A, pstm_int *B) | ||
551 | { | ||
552 | pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; | ||
553 | #ifdef PSTM_ISO | ||
554 | pstm_word tt; | ||
555 | #endif | ||
556 | |||
557 | if (B->alloc < 32) { | ||
558 | if (pstm_grow(B, 32) != PSTM_OKAY) { | ||
559 | return PS_MEM_FAIL; | ||
560 | } | ||
561 | } | ||
562 | a = A->dp; | ||
563 | sc0 = sc1 = sc2 = 0; | ||
564 | |||
565 | COMBA_START; | ||
566 | |||
567 | /* clear carries */ | ||
568 | CLEAR_CARRY; | ||
569 | |||
570 | /* output 0 */ | ||
571 | SQRADD(a[0],a[0]); | ||
572 | COMBA_STORE(b[0]); | ||
573 | |||
574 | /* output 1 */ | ||
575 | CARRY_FORWARD; | ||
576 | SQRADD2(a[0], a[1]); | ||
577 | COMBA_STORE(b[1]); | ||
578 | |||
579 | /* output 2 */ | ||
580 | CARRY_FORWARD; | ||
581 | SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); | ||
582 | COMBA_STORE(b[2]); | ||
583 | |||
584 | /* output 3 */ | ||
585 | CARRY_FORWARD; | ||
586 | SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); | ||
587 | COMBA_STORE(b[3]); | ||
588 | |||
589 | /* output 4 */ | ||
590 | CARRY_FORWARD; | ||
591 | SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); | ||
592 | COMBA_STORE(b[4]); | ||
593 | |||
594 | /* output 5 */ | ||
595 | CARRY_FORWARD; | ||
596 | SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; | ||
597 | COMBA_STORE(b[5]); | ||
598 | |||
599 | /* output 6 */ | ||
600 | CARRY_FORWARD; | ||
601 | SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); | ||
602 | COMBA_STORE(b[6]); | ||
603 | |||
604 | /* output 7 */ | ||
605 | CARRY_FORWARD; | ||
606 | SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; | ||
607 | COMBA_STORE(b[7]); | ||
608 | |||
609 | /* output 8 */ | ||
610 | CARRY_FORWARD; | ||
611 | SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); | ||
612 | COMBA_STORE(b[8]); | ||
613 | |||
614 | /* output 9 */ | ||
615 | CARRY_FORWARD; | ||
616 | SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; | ||
617 | COMBA_STORE(b[9]); | ||
618 | |||
619 | /* output 10 */ | ||
620 | CARRY_FORWARD; | ||
621 | SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); | ||
622 | COMBA_STORE(b[10]); | ||
623 | |||
624 | /* output 11 */ | ||
625 | CARRY_FORWARD; | ||
626 | SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; | ||
627 | COMBA_STORE(b[11]); | ||
628 | |||
629 | /* output 12 */ | ||
630 | CARRY_FORWARD; | ||
631 | SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); | ||
632 | COMBA_STORE(b[12]); | ||
633 | |||
634 | /* output 13 */ | ||
635 | CARRY_FORWARD; | ||
636 | SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; | ||
637 | COMBA_STORE(b[13]); | ||
638 | |||
639 | /* output 14 */ | ||
640 | CARRY_FORWARD; | ||
641 | SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); | ||
642 | COMBA_STORE(b[14]); | ||
643 | |||
644 | /* output 15 */ | ||
645 | CARRY_FORWARD; | ||
646 | SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; | ||
647 | COMBA_STORE(b[15]); | ||
648 | |||
649 | /* output 16 */ | ||
650 | CARRY_FORWARD; | ||
651 | SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); | ||
652 | COMBA_STORE(b[16]); | ||
653 | |||
654 | /* output 17 */ | ||
655 | CARRY_FORWARD; | ||
656 | SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; | ||
657 | COMBA_STORE(b[17]); | ||
658 | |||
659 | /* output 18 */ | ||
660 | CARRY_FORWARD; | ||
661 | SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); | ||
662 | COMBA_STORE(b[18]); | ||
663 | |||
664 | /* output 19 */ | ||
665 | CARRY_FORWARD; | ||
666 | SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; | ||
667 | COMBA_STORE(b[19]); | ||
668 | |||
669 | /* output 20 */ | ||
670 | CARRY_FORWARD; | ||
671 | SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); | ||
672 | COMBA_STORE(b[20]); | ||
673 | |||
674 | /* output 21 */ | ||
675 | CARRY_FORWARD; | ||
676 | SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; | ||
677 | COMBA_STORE(b[21]); | ||
678 | |||
679 | /* output 22 */ | ||
680 | CARRY_FORWARD; | ||
681 | SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); | ||
682 | COMBA_STORE(b[22]); | ||
683 | |||
684 | /* output 23 */ | ||
685 | CARRY_FORWARD; | ||
686 | SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; | ||
687 | COMBA_STORE(b[23]); | ||
688 | |||
689 | /* output 24 */ | ||
690 | CARRY_FORWARD; | ||
691 | SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); | ||
692 | COMBA_STORE(b[24]); | ||
693 | |||
694 | /* output 25 */ | ||
695 | CARRY_FORWARD; | ||
696 | SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; | ||
697 | COMBA_STORE(b[25]); | ||
698 | |||
699 | /* output 26 */ | ||
700 | CARRY_FORWARD; | ||
701 | SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); | ||
702 | COMBA_STORE(b[26]); | ||
703 | |||
704 | /* output 27 */ | ||
705 | CARRY_FORWARD; | ||
706 | SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); | ||
707 | COMBA_STORE(b[27]); | ||
708 | |||
709 | /* output 28 */ | ||
710 | CARRY_FORWARD; | ||
711 | SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); | ||
712 | COMBA_STORE(b[28]); | ||
713 | |||
714 | /* output 29 */ | ||
715 | CARRY_FORWARD; | ||
716 | SQRADD2(a[14], a[15]); | ||
717 | COMBA_STORE(b[29]); | ||
718 | |||
719 | /* output 30 */ | ||
720 | CARRY_FORWARD; | ||
721 | SQRADD(a[15], a[15]); | ||
722 | COMBA_STORE(b[30]); | ||
723 | COMBA_STORE2(b[31]); | ||
724 | COMBA_FINI; | ||
725 | |||
726 | B->used = 32; | ||
727 | B->sign = PSTM_ZPOS; | ||
728 | memcpy(B->dp, b, 32 * sizeof(pstm_digit)); | ||
729 | pstm_clamp(B); | ||
730 | return PSTM_OKAY; | ||
731 | } | ||
732 | #endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */ | ||
733 | |||
734 | |||
735 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
736 | static int32 pstm_sqr_comba32(pstm_int *A, pstm_int *B) | ||
737 | { | ||
738 | pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; | ||
739 | #ifdef PSTM_ISO | ||
740 | pstm_word tt; | ||
741 | #endif | ||
742 | |||
743 | if (B->alloc < 64) { | ||
744 | if (pstm_grow(B, 64) != PSTM_OKAY) { | ||
745 | return PS_MEM_FAIL; | ||
746 | } | ||
747 | } | ||
748 | sc0 = sc1 = sc2 = 0; | ||
749 | a = A->dp; | ||
750 | COMBA_START; | ||
751 | |||
752 | /* clear carries */ | ||
753 | CLEAR_CARRY; | ||
754 | |||
755 | /* output 0 */ | ||
756 | SQRADD(a[0],a[0]); | ||
757 | COMBA_STORE(b[0]); | ||
758 | |||
759 | /* output 1 */ | ||
760 | CARRY_FORWARD; | ||
761 | SQRADD2(a[0], a[1]); | ||
762 | COMBA_STORE(b[1]); | ||
763 | |||
764 | /* output 2 */ | ||
765 | CARRY_FORWARD; | ||
766 | SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); | ||
767 | COMBA_STORE(b[2]); | ||
768 | |||
769 | /* output 3 */ | ||
770 | CARRY_FORWARD; | ||
771 | SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); | ||
772 | COMBA_STORE(b[3]); | ||
773 | |||
774 | /* output 4 */ | ||
775 | CARRY_FORWARD; | ||
776 | SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); | ||
777 | COMBA_STORE(b[4]); | ||
778 | |||
779 | /* output 5 */ | ||
780 | CARRY_FORWARD; | ||
781 | SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; | ||
782 | COMBA_STORE(b[5]); | ||
783 | |||
784 | /* output 6 */ | ||
785 | CARRY_FORWARD; | ||
786 | SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); | ||
787 | COMBA_STORE(b[6]); | ||
788 | |||
789 | /* output 7 */ | ||
790 | CARRY_FORWARD; | ||
791 | SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; | ||
792 | COMBA_STORE(b[7]); | ||
793 | |||
794 | /* output 8 */ | ||
795 | CARRY_FORWARD; | ||
796 | SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); | ||
797 | COMBA_STORE(b[8]); | ||
798 | |||
799 | /* output 9 */ | ||
800 | CARRY_FORWARD; | ||
801 | SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; | ||
802 | COMBA_STORE(b[9]); | ||
803 | |||
804 | /* output 10 */ | ||
805 | CARRY_FORWARD; | ||
806 | SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); | ||
807 | COMBA_STORE(b[10]); | ||
808 | |||
809 | /* output 11 */ | ||
810 | CARRY_FORWARD; | ||
811 | SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; | ||
812 | COMBA_STORE(b[11]); | ||
813 | |||
814 | /* output 12 */ | ||
815 | CARRY_FORWARD; | ||
816 | SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); | ||
817 | COMBA_STORE(b[12]); | ||
818 | |||
819 | /* output 13 */ | ||
820 | CARRY_FORWARD; | ||
821 | SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; | ||
822 | COMBA_STORE(b[13]); | ||
823 | |||
824 | /* output 14 */ | ||
825 | CARRY_FORWARD; | ||
826 | SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); | ||
827 | COMBA_STORE(b[14]); | ||
828 | |||
829 | /* output 15 */ | ||
830 | CARRY_FORWARD; | ||
831 | SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; | ||
832 | COMBA_STORE(b[15]); | ||
833 | |||
834 | /* output 16 */ | ||
835 | CARRY_FORWARD; | ||
836 | SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); | ||
837 | COMBA_STORE(b[16]); | ||
838 | |||
839 | /* output 17 */ | ||
840 | CARRY_FORWARD; | ||
841 | SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; | ||
842 | COMBA_STORE(b[17]); | ||
843 | |||
844 | /* output 18 */ | ||
845 | CARRY_FORWARD; | ||
846 | SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); | ||
847 | COMBA_STORE(b[18]); | ||
848 | |||
849 | /* output 19 */ | ||
850 | CARRY_FORWARD; | ||
851 | SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; | ||
852 | COMBA_STORE(b[19]); | ||
853 | |||
854 | /* output 20 */ | ||
855 | CARRY_FORWARD; | ||
856 | SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); | ||
857 | COMBA_STORE(b[20]); | ||
858 | |||
859 | /* output 21 */ | ||
860 | CARRY_FORWARD; | ||
861 | SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; | ||
862 | COMBA_STORE(b[21]); | ||
863 | |||
864 | /* output 22 */ | ||
865 | CARRY_FORWARD; | ||
866 | SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); | ||
867 | COMBA_STORE(b[22]); | ||
868 | |||
869 | /* output 23 */ | ||
870 | CARRY_FORWARD; | ||
871 | SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; | ||
872 | COMBA_STORE(b[23]); | ||
873 | |||
874 | /* output 24 */ | ||
875 | CARRY_FORWARD; | ||
876 | SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); | ||
877 | COMBA_STORE(b[24]); | ||
878 | |||
879 | /* output 25 */ | ||
880 | CARRY_FORWARD; | ||
881 | SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; | ||
882 | COMBA_STORE(b[25]); | ||
883 | |||
884 | /* output 26 */ | ||
885 | CARRY_FORWARD; | ||
886 | SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); | ||
887 | COMBA_STORE(b[26]); | ||
888 | |||
889 | /* output 27 */ | ||
890 | CARRY_FORWARD; | ||
891 | SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; | ||
892 | COMBA_STORE(b[27]); | ||
893 | |||
894 | /* output 28 */ | ||
895 | CARRY_FORWARD; | ||
896 | SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); | ||
897 | COMBA_STORE(b[28]); | ||
898 | |||
899 | /* output 29 */ | ||
900 | CARRY_FORWARD; | ||
901 | SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; | ||
902 | COMBA_STORE(b[29]); | ||
903 | |||
904 | /* output 30 */ | ||
905 | CARRY_FORWARD; | ||
906 | SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); | ||
907 | COMBA_STORE(b[30]); | ||
908 | |||
909 | /* output 31 */ | ||
910 | CARRY_FORWARD; | ||
911 | SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; | ||
912 | COMBA_STORE(b[31]); | ||
913 | |||
914 | /* output 32 */ | ||
915 | CARRY_FORWARD; | ||
916 | SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); | ||
917 | COMBA_STORE(b[32]); | ||
918 | |||
919 | /* output 33 */ | ||
920 | CARRY_FORWARD; | ||
921 | SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; | ||
922 | COMBA_STORE(b[33]); | ||
923 | |||
924 | /* output 34 */ | ||
925 | CARRY_FORWARD; | ||
926 | SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); | ||
927 | COMBA_STORE(b[34]); | ||
928 | |||
929 | /* output 35 */ | ||
930 | CARRY_FORWARD; | ||
931 | SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; | ||
932 | COMBA_STORE(b[35]); | ||
933 | |||
934 | /* output 36 */ | ||
935 | CARRY_FORWARD; | ||
936 | SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); | ||
937 | COMBA_STORE(b[36]); | ||
938 | |||
939 | /* output 37 */ | ||
940 | CARRY_FORWARD; | ||
941 | SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; | ||
942 | COMBA_STORE(b[37]); | ||
943 | |||
944 | /* output 38 */ | ||
945 | CARRY_FORWARD; | ||
946 | SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); | ||
947 | COMBA_STORE(b[38]); | ||
948 | |||
949 | /* output 39 */ | ||
950 | CARRY_FORWARD; | ||
951 | SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; | ||
952 | COMBA_STORE(b[39]); | ||
953 | |||
954 | /* output 40 */ | ||
955 | CARRY_FORWARD; | ||
956 | SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); | ||
957 | COMBA_STORE(b[40]); | ||
958 | |||
959 | /* output 41 */ | ||
960 | CARRY_FORWARD; | ||
961 | SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; | ||
962 | COMBA_STORE(b[41]); | ||
963 | |||
964 | /* output 42 */ | ||
965 | CARRY_FORWARD; | ||
966 | SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); | ||
967 | COMBA_STORE(b[42]); | ||
968 | |||
969 | /* output 43 */ | ||
970 | CARRY_FORWARD; | ||
971 | SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; | ||
972 | COMBA_STORE(b[43]); | ||
973 | |||
974 | /* output 44 */ | ||
975 | CARRY_FORWARD; | ||
976 | SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); | ||
977 | COMBA_STORE(b[44]); | ||
978 | |||
979 | /* output 45 */ | ||
980 | CARRY_FORWARD; | ||
981 | SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; | ||
982 | COMBA_STORE(b[45]); | ||
983 | |||
984 | /* output 46 */ | ||
985 | CARRY_FORWARD; | ||
986 | SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); | ||
987 | COMBA_STORE(b[46]); | ||
988 | |||
989 | /* output 47 */ | ||
990 | CARRY_FORWARD; | ||
991 | SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; | ||
992 | COMBA_STORE(b[47]); | ||
993 | |||
994 | /* output 48 */ | ||
995 | CARRY_FORWARD; | ||
996 | SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); | ||
997 | COMBA_STORE(b[48]); | ||
998 | |||
999 | /* output 49 */ | ||
1000 | CARRY_FORWARD; | ||
1001 | SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; | ||
1002 | COMBA_STORE(b[49]); | ||
1003 | |||
1004 | /* output 50 */ | ||
1005 | CARRY_FORWARD; | ||
1006 | SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); | ||
1007 | COMBA_STORE(b[50]); | ||
1008 | |||
1009 | /* output 51 */ | ||
1010 | CARRY_FORWARD; | ||
1011 | SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; | ||
1012 | COMBA_STORE(b[51]); | ||
1013 | |||
1014 | /* output 52 */ | ||
1015 | CARRY_FORWARD; | ||
1016 | SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); | ||
1017 | COMBA_STORE(b[52]); | ||
1018 | |||
1019 | /* output 53 */ | ||
1020 | CARRY_FORWARD; | ||
1021 | SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; | ||
1022 | COMBA_STORE(b[53]); | ||
1023 | |||
1024 | /* output 54 */ | ||
1025 | CARRY_FORWARD; | ||
1026 | SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); | ||
1027 | COMBA_STORE(b[54]); | ||
1028 | |||
1029 | /* output 55 */ | ||
1030 | CARRY_FORWARD; | ||
1031 | SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; | ||
1032 | COMBA_STORE(b[55]); | ||
1033 | |||
1034 | /* output 56 */ | ||
1035 | CARRY_FORWARD; | ||
1036 | SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); | ||
1037 | COMBA_STORE(b[56]); | ||
1038 | |||
1039 | /* output 57 */ | ||
1040 | CARRY_FORWARD; | ||
1041 | SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; | ||
1042 | COMBA_STORE(b[57]); | ||
1043 | |||
1044 | /* output 58 */ | ||
1045 | CARRY_FORWARD; | ||
1046 | SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); | ||
1047 | COMBA_STORE(b[58]); | ||
1048 | |||
1049 | /* output 59 */ | ||
1050 | CARRY_FORWARD; | ||
1051 | SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); | ||
1052 | COMBA_STORE(b[59]); | ||
1053 | |||
1054 | /* output 60 */ | ||
1055 | CARRY_FORWARD; | ||
1056 | SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); | ||
1057 | COMBA_STORE(b[60]); | ||
1058 | |||
1059 | /* output 61 */ | ||
1060 | CARRY_FORWARD; | ||
1061 | SQRADD2(a[30], a[31]); | ||
1062 | COMBA_STORE(b[61]); | ||
1063 | |||
1064 | /* output 62 */ | ||
1065 | CARRY_FORWARD; | ||
1066 | SQRADD(a[31], a[31]); | ||
1067 | COMBA_STORE(b[62]); | ||
1068 | COMBA_STORE2(b[63]); | ||
1069 | COMBA_FINI; | ||
1070 | |||
1071 | B->used = 64; | ||
1072 | B->sign = PSTM_ZPOS; | ||
1073 | memcpy(B->dp, b, 64 * sizeof(pstm_digit)); | ||
1074 | pstm_clamp(B); | ||
1075 | return PSTM_OKAY; | ||
1076 | } | ||
1077 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1078 | |||
1079 | /******************************************************************************/ | ||
1080 | /* | ||
1081 | */ | ||
1082 | int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_digit *paD, | ||
1083 | uint32 paDlen) | ||
1084 | { | ||
1085 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
1086 | if (A->used == 16) { | ||
1087 | return pstm_sqr_comba16(A, B); | ||
1088 | } else { | ||
1089 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
1090 | if (A->used == 32) { | ||
1091 | return pstm_sqr_comba32(A, B); | ||
1092 | } | ||
1093 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1094 | return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); | ||
1095 | } | ||
1096 | #else | ||
1097 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
1098 | if (A->used == 32) { | ||
1099 | return pstm_sqr_comba32(A, B); | ||
1100 | } | ||
1101 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1102 | return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); | ||
1103 | #endif | ||
1104 | } | ||
1105 | |||
1106 | #endif /* DISABLE_PSTM */ | ||
1107 | /******************************************************************************/ | ||
diff --git a/networking/tls_rsa.c b/networking/tls_rsa.c new file mode 100644 index 000000000..058b09cee --- /dev/null +++ b/networking/tls_rsa.c | |||
@@ -0,0 +1,203 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | #define pkcs1Pad(in, inlen, out, outlen, cryptType, userPtr) \ | ||
9 | pkcs1Pad(in, inlen, out, outlen, cryptType) | ||
10 | static ///bbox | ||
11 | int32 pkcs1Pad(unsigned char *in, uint32 inlen, unsigned char *out, | ||
12 | uint32 outlen, int32 cryptType, void *userPtr) | ||
13 | { | ||
14 | unsigned char *c; | ||
15 | int32 randomLen; | ||
16 | |||
17 | randomLen = outlen - 3 - inlen; | ||
18 | if (randomLen < 8) { | ||
19 | psTraceCrypto("pkcs1Pad failure\n"); | ||
20 | return PS_LIMIT_FAIL; | ||
21 | } | ||
22 | c = out; | ||
23 | *c = 0x00; | ||
24 | c++; | ||
25 | *c = (unsigned char)cryptType; | ||
26 | c++; | ||
27 | if (cryptType == PUBKEY_TYPE) { | ||
28 | while (randomLen-- > 0) { | ||
29 | *c++ = 0xFF; | ||
30 | } | ||
31 | } else { | ||
32 | if (matrixCryptoGetPrngData(c, (uint32)randomLen, userPtr) < 0) { | ||
33 | return PS_PLATFORM_FAIL; | ||
34 | } | ||
35 | /* | ||
36 | SECURITY: Read through the random data and change all 0x0 to 0x01. | ||
37 | This is per spec that no random bytes should be 0 | ||
38 | */ | ||
39 | while (randomLen-- > 0) { | ||
40 | if (*c == 0x0) { | ||
41 | *c = 0x01; | ||
42 | } | ||
43 | c++; | ||
44 | } | ||
45 | } | ||
46 | *c = 0x00; | ||
47 | c++; | ||
48 | memcpy(c, in, inlen); | ||
49 | |||
50 | return outlen; | ||
51 | } | ||
52 | |||
53 | #define psRsaCrypt(pool, in, inlen, out, outlen, key, type, data) \ | ||
54 | psRsaCrypt(pool, in, inlen, out, outlen, key, type) | ||
55 | static ///bbox | ||
56 | int32 psRsaCrypt(psPool_t *pool, const unsigned char *in, uint32 inlen, | ||
57 | unsigned char *out, uint32 *outlen, psRsaKey_t *key, int32 type, | ||
58 | void *data) | ||
59 | { | ||
60 | pstm_int tmp, tmpa, tmpb; | ||
61 | int32 res; | ||
62 | uint32 x; | ||
63 | |||
64 | if (in == NULL || out == NULL || outlen == NULL || key == NULL) { | ||
65 | psTraceCrypto("NULL parameter error in psRsaCrypt\n"); | ||
66 | return PS_ARG_FAIL; | ||
67 | } | ||
68 | |||
69 | tmp.dp = tmpa.dp = tmpb.dp = NULL; | ||
70 | |||
71 | /* Init and copy into tmp */ | ||
72 | if (pstm_init_for_read_unsigned_bin(pool, &tmp, inlen + sizeof(pstm_digit)) | ||
73 | != PS_SUCCESS) { | ||
74 | return PS_FAILURE; | ||
75 | } | ||
76 | if (pstm_read_unsigned_bin(&tmp, (unsigned char *)in, inlen) != PS_SUCCESS){ | ||
77 | pstm_clear(&tmp); | ||
78 | return PS_FAILURE; | ||
79 | } | ||
80 | /* Sanity check on the input */ | ||
81 | if (pstm_cmp(&key->N, &tmp) == PSTM_LT) { | ||
82 | res = PS_LIMIT_FAIL; | ||
83 | goto done; | ||
84 | } | ||
85 | if (type == PRIVKEY_TYPE) { | ||
86 | if (key->optimized) { | ||
87 | if (pstm_init_size(pool, &tmpa, key->p.alloc) != PS_SUCCESS) { | ||
88 | res = PS_FAILURE; | ||
89 | goto done; | ||
90 | } | ||
91 | if (pstm_init_size(pool, &tmpb, key->q.alloc) != PS_SUCCESS) { | ||
92 | pstm_clear(&tmpa); | ||
93 | res = PS_FAILURE; | ||
94 | goto done; | ||
95 | } | ||
96 | if (pstm_exptmod(pool, &tmp, &key->dP, &key->p, &tmpa) != | ||
97 | PS_SUCCESS) { | ||
98 | psTraceCrypto("decrypt error: pstm_exptmod dP, p\n"); | ||
99 | goto error; | ||
100 | } | ||
101 | if (pstm_exptmod(pool, &tmp, &key->dQ, &key->q, &tmpb) != | ||
102 | PS_SUCCESS) { | ||
103 | psTraceCrypto("decrypt error: pstm_exptmod dQ, q\n"); | ||
104 | goto error; | ||
105 | } | ||
106 | if (pstm_sub(&tmpa, &tmpb, &tmp) != PS_SUCCESS) { | ||
107 | psTraceCrypto("decrypt error: sub tmpb, tmp\n"); | ||
108 | goto error; | ||
109 | } | ||
110 | if (pstm_mulmod(pool, &tmp, &key->qP, &key->p, &tmp) != PS_SUCCESS) { | ||
111 | psTraceCrypto("decrypt error: pstm_mulmod qP, p\n"); | ||
112 | goto error; | ||
113 | } | ||
114 | if (pstm_mul_comba(pool, &tmp, &key->q, &tmp, NULL, 0) | ||
115 | != PS_SUCCESS){ | ||
116 | psTraceCrypto("decrypt error: pstm_mul q \n"); | ||
117 | goto error; | ||
118 | } | ||
119 | if (pstm_add(&tmp, &tmpb, &tmp) != PS_SUCCESS) { | ||
120 | psTraceCrypto("decrypt error: pstm_add tmp \n"); | ||
121 | goto error; | ||
122 | } | ||
123 | } else { | ||
124 | if (pstm_exptmod(pool, &tmp, &key->d, &key->N, &tmp) != | ||
125 | PS_SUCCESS) { | ||
126 | psTraceCrypto("psRsaCrypt error: pstm_exptmod\n"); | ||
127 | goto error; | ||
128 | } | ||
129 | } | ||
130 | } else if (type == PUBKEY_TYPE) { | ||
131 | if (pstm_exptmod(pool, &tmp, &key->e, &key->N, &tmp) != PS_SUCCESS) { | ||
132 | psTraceCrypto("psRsaCrypt error: pstm_exptmod\n"); | ||
133 | goto error; | ||
134 | } | ||
135 | } else { | ||
136 | psTraceCrypto("psRsaCrypt error: invalid type param\n"); | ||
137 | goto error; | ||
138 | } | ||
139 | /* Read it back */ | ||
140 | x = pstm_unsigned_bin_size(&key->N); | ||
141 | |||
142 | if ((uint32)x > *outlen) { | ||
143 | res = -1; | ||
144 | psTraceCrypto("psRsaCrypt error: pstm_unsigned_bin_size\n"); | ||
145 | goto done; | ||
146 | } | ||
147 | /* We want the encrypted value to always be the key size. Pad with 0x0 */ | ||
148 | while ((uint32)x < (unsigned long)key->size) { | ||
149 | *out++ = 0x0; | ||
150 | x++; | ||
151 | } | ||
152 | |||
153 | *outlen = x; | ||
154 | /* Convert it */ | ||
155 | memset(out, 0x0, x); | ||
156 | |||
157 | if (pstm_to_unsigned_bin(pool, &tmp, out+(x-pstm_unsigned_bin_size(&tmp))) | ||
158 | != PS_SUCCESS) { | ||
159 | psTraceCrypto("psRsaCrypt error: pstm_to_unsigned_bin\n"); | ||
160 | goto error; | ||
161 | } | ||
162 | /* Clean up and return */ | ||
163 | res = PS_SUCCESS; | ||
164 | goto done; | ||
165 | error: | ||
166 | res = PS_FAILURE; | ||
167 | done: | ||
168 | if (type == PRIVKEY_TYPE && key->optimized) { | ||
169 | pstm_clear_multi(&tmpa, &tmpb, NULL, NULL, NULL, NULL, NULL, NULL); | ||
170 | } | ||
171 | pstm_clear(&tmp); | ||
172 | return res; | ||
173 | } | ||
174 | |||
175 | int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key, | ||
176 | unsigned char *in, uint32 inlen, | ||
177 | unsigned char *out, uint32 outlen, void *data) | ||
178 | { | ||
179 | int32 err; | ||
180 | uint32 size; | ||
181 | |||
182 | size = key->size; | ||
183 | if (outlen < size) { | ||
184 | psTraceCrypto("Error on bad outlen parameter to psRsaEncryptPub\n"); | ||
185 | return PS_ARG_FAIL; | ||
186 | } | ||
187 | |||
188 | if ((err = pkcs1Pad(in, inlen, out, size, PRIVKEY_TYPE, data)) | ||
189 | < PS_SUCCESS) { | ||
190 | psTraceCrypto("Error padding psRsaEncryptPub. Likely data too long\n"); | ||
191 | return err; | ||
192 | } | ||
193 | if ((err = psRsaCrypt(pool, out, size, out, (uint32*)&outlen, key, | ||
194 | PUBKEY_TYPE, data)) < PS_SUCCESS) { | ||
195 | psTraceCrypto("Error performing psRsaEncryptPub\n"); | ||
196 | return err; | ||
197 | } | ||
198 | if (outlen != size) { | ||
199 | psTraceCrypto("Encrypted size error in psRsaEncryptPub\n"); | ||
200 | return PS_FAILURE; | ||
201 | } | ||
202 | return size; | ||
203 | } | ||
diff --git a/networking/tls_rsa.h b/networking/tls_rsa.h new file mode 100644 index 000000000..3281087c7 --- /dev/null +++ b/networking/tls_rsa.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | |||
7 | typedef struct { | ||
8 | pstm_int e, d, N, qP, dP, dQ, p, q; | ||
9 | uint32 size; /* Size of the key in bytes */ | ||
10 | int32 optimized; /* 1 for optimized */ | ||
11 | psPool_t *pool; | ||
12 | } psRsaKey_t; | ||
13 | |||
14 | #define psRsaEncryptPub(pool, key, in, inlen, out, outlen, data) \ | ||
15 | psRsaEncryptPub(pool, key, in, inlen, out, outlen) | ||
16 | int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key, | ||
17 | unsigned char *in, uint32 inlen, | ||
18 | unsigned char *out, uint32 outlen, void *data); | ||