aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2017-01-15 00:12:42 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2017-01-15 00:12:42 +0100
commit11d0096516c0d5395729caba5dfd940a10a6e20c (patch)
tree035f3f0461a42b42421df48c9479bdc1549c1c09
parent2a17d1fc9bdcbc97d48dd08a9fa4941da25187fd (diff)
downloadbusybox-w32-11d0096516c0d5395729caba5dfd940a10a6e20c.tar.gz
busybox-w32-11d0096516c0d5395729caba5dfd940a10a6e20c.tar.bz2
busybox-w32-11d0096516c0d5395729caba5dfd940a10a6e20c.zip
tls: format and send CLIENT_KEY_EXCHANGE
$ ./busybox tls kernel.org insize:0 tail:0 got block len:74 got HANDSHAKE got SERVER_HELLO insize:79 tail:4265 got block len:4392 got HANDSHAKE got CERTIFICATE entered der @0x8b217a7:0x30 len:1452 inner_byte @0x8b217ab:0x30 entered der @0x8b217ab:0x30 len:1172 inner_byte @0x8b217af:0xa0 skipped der 0xa0, next byte 0x02 skipped der 0x02, next byte 0x30 skipped der 0x30, next byte 0x30 skipped der 0x30, next byte 0x30 skipped der 0x30, next byte 0x30 skipped der 0x30, next byte 0x30 entered der @0x8b218b4:0x30 len:418 inner_byte @0x8b218b8:0x30 skipped der 0x30, next byte 0x03 entered der @0x8b218c7:0x03 len:399 inner_byte @0x8b218cb:0x00 key bytes:399, first:0x00 entered der @0x8b218cc:0x30 len:394 inner_byte @0x8b218d0:0x02 binary bytes:385, first:0x00 skipped der 0x02, next byte 0x02 binary bytes:3, first:0x01 server_rsa_pub_key.size:384 insize:4397 tail:9 got block len:4 got SERVER_HELLO_DONE insize:9 tail:0 ^C Next step: send CHANGE_CIPHER_SPEC... and actually implement it. Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--networking/tls.c225
-rw-r--r--networking/tls.h73
-rw-r--r--networking/tls_pstm.c2254
-rw-r--r--networking/tls_pstm.h238
-rw-r--r--networking/tls_pstm_montgomery_reduce.c423
-rw-r--r--networking/tls_pstm_mul_comba.c777
-rw-r--r--networking/tls_pstm_sqr_comba.c1107
-rw-r--r--networking/tls_rsa.c203
-rw-r--r--networking/tls_rsa.h18
9 files changed, 5281 insertions, 37 deletions
diff --git a/networking/tls.c b/networking/tls.c
index 69c81b558..b0a4f7e75 100644
--- a/networking/tls.c
+++ b/networking/tls.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Licensed under GPLv2, see file LICENSE in this source tree.
3 *
4 * Copyright (C) 2017 Denys Vlasenko 2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */ 5 */
6//config:config TLS 6//config:config TLS
7//config: bool "tls (debugging)" 7//config: bool "tls (debugging)"
@@ -10,6 +10,11 @@
10//applet:IF_TLS(APPLET(tls, BB_DIR_USR_BIN, BB_SUID_DROP)) 10//applet:IF_TLS(APPLET(tls, BB_DIR_USR_BIN, BB_SUID_DROP))
11 11
12//kbuild:lib-$(CONFIG_TLS) += tls.o 12//kbuild:lib-$(CONFIG_TLS) += tls.o
13//kbuild:lib-$(CONFIG_TLS) += tls_pstm.o
14//kbuild:lib-$(CONFIG_TLS) += tls_pstm_montgomery_reduce.o
15//kbuild:lib-$(CONFIG_TLS) += tls_pstm_mul_comba.o
16//kbuild:lib-$(CONFIG_TLS) += tls_pstm_sqr_comba.o
17//kbuild:lib-$(CONFIG_TLS) += tls_rsa.o
13////kbuild:lib-$(CONFIG_TLS) += tls_ciphers.o 18////kbuild:lib-$(CONFIG_TLS) += tls_ciphers.o
14////kbuild:lib-$(CONFIG_TLS) += tls_aes.o 19////kbuild:lib-$(CONFIG_TLS) += tls_aes.o
15////kbuild:lib-$(CONFIG_TLS) += tls_aes_gcm.o 20////kbuild:lib-$(CONFIG_TLS) += tls_aes_gcm.o
@@ -18,9 +23,7 @@
18//usage: "HOST[:PORT]" 23//usage: "HOST[:PORT]"
19//usage:#define tls_full_usage "\n\n" 24//usage:#define tls_full_usage "\n\n"
20 25
21#include "libbb.h" 26#include "tls.h"
22//#include "tls_cryptoapi.h"
23//#include "tls_ciphers.h"
24 27
25#if 1 28#if 1
26# define dbg(...) fprintf(stderr, __VA_ARGS__) 29# define dbg(...) fprintf(stderr, __VA_ARGS__)
@@ -28,23 +31,26 @@
28# define dbg(...) ((void)0) 31# define dbg(...) ((void)0)
29#endif 32#endif
30 33
31#define RECORD_TYPE_CHANGE_CIPHER_SPEC 20 34#define RECORD_TYPE_CHANGE_CIPHER_SPEC 20
32#define RECORD_TYPE_ALERT 21 35#define RECORD_TYPE_ALERT 21
33#define RECORD_TYPE_HANDSHAKE 22 36#define RECORD_TYPE_HANDSHAKE 22
34#define RECORD_TYPE_APPLICATION_DATA 23 37#define RECORD_TYPE_APPLICATION_DATA 23
35 38
36#define HANDSHAKE_HELLO_REQUEST 0 39#define HANDSHAKE_HELLO_REQUEST 0
37#define HANDSHAKE_CLIENT_HELLO 1 40#define HANDSHAKE_CLIENT_HELLO 1
38#define HANDSHAKE_SERVER_HELLO 2 41#define HANDSHAKE_SERVER_HELLO 2
39#define HANDSHAKE_HELLO_VERIFY_REQUEST 3 42#define HANDSHAKE_HELLO_VERIFY_REQUEST 3
40#define HANDSHAKE_NEW_SESSION_TICKET 4 43#define HANDSHAKE_NEW_SESSION_TICKET 4
41#define HANDSHAKE_CERTIFICATE 11 44#define HANDSHAKE_CERTIFICATE 11
42#define HANDSHAKE_SERVER_KEY_EXCHANGE 12 45#define HANDSHAKE_SERVER_KEY_EXCHANGE 12
43#define HANDSHAKE_CERTIFICATE_REQUEST 13 46#define HANDSHAKE_CERTIFICATE_REQUEST 13
44#define HANDSHAKE_SERVER_HELLO_DONE 14 47#define HANDSHAKE_SERVER_HELLO_DONE 14
45#define HANDSHAKE_CERTIFICATE_VERIFY 15 48#define HANDSHAKE_CERTIFICATE_VERIFY 15
46#define HANDSHAKE_CLIENT_KEY_EXCHANGE 16 49#define HANDSHAKE_CLIENT_KEY_EXCHANGE 16
47#define HANDSHAKE_FINISHED 20 50#define HANDSHAKE_FINISHED 20
51
52#define SSL_HS_RANDOM_SIZE 32
53#define SSL_HS_RSA_PREMASTER_SIZE 48
48 54
49#define SSL_NULL_WITH_NULL_NULL 0x0000 55#define SSL_NULL_WITH_NULL_NULL 0x0000
50#define SSL_RSA_WITH_NULL_MD5 0x0001 56#define SSL_RSA_WITH_NULL_MD5 0x0001
@@ -112,6 +118,7 @@
112//TLS 1.2 118//TLS 1.2
113#define TLS_MAJ 3 119#define TLS_MAJ 3
114#define TLS_MIN 3 120#define TLS_MIN 3
121//#define CIPHER_ID TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA // ok, recvs SERVER_KEY_EXCHANGE *** matrixssl uses this on my box
115//#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE 122//#define CIPHER_ID TLS_RSA_WITH_AES_256_CBC_SHA256 // ok, no SERVER_KEY_EXCHANGE
116// All GCMs: 123// All GCMs:
117//#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE 124//#define CIPHER_ID TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 // SSL_ALERT_HANDSHAKE_FAILURE
@@ -123,9 +130,9 @@
123//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 130//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
124//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE 131//#define CIPHER_ID TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 // SSL_ALERT_HANDSHAKE_FAILURE
125//#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE 132//#define CIPHER_ID TLS_RSA_WITH_AES_256_GCM_SHA384 // ok, no SERVER_KEY_EXCHANGE
126#define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE 133#define CIPHER_ID TLS_RSA_WITH_AES_128_GCM_SHA256 // ok, no SERVER_KEY_EXCHANGE *** select this?
127//#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE 134//#define CIPHER_ID TLS_DH_anon_WITH_AES_256_CBC_SHA // SSL_ALERT_HANDSHAKE_FAILURE
128// (tested b/c this one doesn't req server certs... no luck) 135//^^^^^^^^^^^^^^^^^^^^^^^ (tested b/c this one doesn't req server certs... no luck)
129//test TLS_RSA_WITH_AES_128_CBC_SHA, in tls 1.2 it's mandated to be always supported 136//test TLS_RSA_WITH_AES_128_CBC_SHA, in tls 1.2 it's mandated to be always supported
130 137
131struct record_hdr { 138struct record_hdr {
@@ -137,8 +144,7 @@ struct record_hdr {
137typedef struct tls_state { 144typedef struct tls_state {
138 int fd; 145 int fd;
139 146
140 uint8_t *pubkey; 147 psRsaKey_t server_rsa_pub_key;
141 int pubkey_len;
142 148
143 // RFC 5246 149 // RFC 5246
144 // |6.2.1. Fragmentation 150 // |6.2.1. Fragmentation
@@ -170,6 +176,12 @@ typedef struct tls_state {
170 uint8_t inbuf[18*1024]; 176 uint8_t inbuf[18*1024];
171} tls_state_t; 177} tls_state_t;
172 178
179void tls_get_random(void *buf, unsigned len)
180{
181 if (len != open_read_close("/dev/urandom", buf, len))
182 xfunc_die();
183}
184
173static 185static
174tls_state_t *new_tls_state(void) 186tls_state_t *new_tls_state(void)
175{ 187{
@@ -286,7 +298,7 @@ static void send_client_hello(tls_state_t *tls)
286 hello.len24_lo = (sizeof(hello) - sizeof(hello.xhdr) - 4); 298 hello.len24_lo = (sizeof(hello) - sizeof(hello.xhdr) - 4);
287 hello.proto_maj = TLS_MAJ; 299 hello.proto_maj = TLS_MAJ;
288 hello.proto_min = TLS_MIN; 300 hello.proto_min = TLS_MIN;
289 open_read_close("/dev/urandom", hello.rand32, sizeof(hello.rand32)); 301 tls_get_random(hello.rand32, sizeof(hello.rand32));
290 //hello.session_id_len = 0; 302 //hello.session_id_len = 0;
291 //hello.cipherid_len16_hi = 0; 303 //hello.cipherid_len16_hi = 0;
292 hello.cipherid_len16_lo = 2 * 1; 304 hello.cipherid_len16_lo = 2 * 1;
@@ -407,7 +419,18 @@ static uint8_t *skip_der_item(uint8_t *der, uint8_t *end)
407 return new_der; 419 return new_der;
408} 420}
409 421
410static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len) 422static void der_binary_to_pstm(pstm_int *pstm_n, uint8_t *der, uint8_t *end)
423{
424 uint8_t *bin_ptr;
425 unsigned len = get_der_len(&bin_ptr, der, end);
426
427 dbg("binary bytes:%u, first:0x%02x\n", len, bin_ptr[0]);
428 pstm_init_for_read_unsigned_bin(/*pool:*/ NULL, pstm_n, len);
429 pstm_read_unsigned_bin(pstm_n, bin_ptr, len);
430 //return bin + len;
431}
432
433static void find_key_in_der_cert(tls_state_t *tls, uint8_t *der, int len)
411{ 434{
412/* Certificate is a DER-encoded data structure. Each DER element has a length, 435/* Certificate is a DER-encoded data structure. Each DER element has a length,
413 * which makes it easy to skip over large compound elements of any complexity 436 * which makes it easy to skip over large compound elements of any complexity
@@ -504,19 +527,43 @@ static void *find_key_in_der_cert(int *key_len, uint8_t *der, int len)
504 der = skip_der_item(der, end); /* validity */ 527 der = skip_der_item(der, end); /* validity */
505 der = skip_der_item(der, end); /* subject */ 528 der = skip_der_item(der, end); /* subject */
506 529
507 /* enter "subjectPublicKeyInfo" */ 530 /* enter subjectPublicKeyInfo */
508 der = enter_der_item(der, &end); 531 der = enter_der_item(der, &end);
509 532 { /* check subjectPublicKeyInfo.algorithm */
510 /* skip "subjectPublicKeyInfo.algorithm" */ 533 static const uint8_t expected[] = {
534 0x30,0x0d, // SEQ 13 bytes
535 0x06,0x09, 0x2a,0x86,0x48,0x86,0xf7,0x0d,0x01,0x01,0x01, // OID RSA_KEY_ALG 42.134.72.134.247.13.1.1.1
536 //0x05,0x00, // NULL
537 };
538 if (memcmp(der, expected, sizeof(expected)) != 0)
539 bb_error_msg_and_die("not RSA key");
540 }
541 /* skip subjectPublicKeyInfo.algorithm */
511 der = skip_der_item(der, end); 542 der = skip_der_item(der, end);
512 /* enter "subjectPublicKeyInfo.publicKey" */ 543 /* enter subjectPublicKeyInfo.publicKey */
513// die_if_not_this_der_type(der, end, 0x03); /* must be BITSTRING */ 544// die_if_not_this_der_type(der, end, 0x03); /* must be BITSTRING */
514 der = enter_der_item(der, &end); 545 der = enter_der_item(der, &end);
515 546
516 /* return a copy */ 547 /* parse RSA key: */
517 *key_len = end - der; 548//based on getAsnRsaPubKey(), pkcs1ParsePrivBin() is also of note
518 dbg("copying key bytes:%u, first:0x%02x\n", *key_len, der[0]); 549 dbg("key bytes:%u, first:0x%02x\n", (int)(end - der), der[0]);
519 return xmemdup(der, *key_len); 550 if (end - der < 14) xfunc_die();
551 /* example format:
552 * ignore bits: 00
553 * SEQ 0x018a/394 bytes: 3082018a
554 * INTEGER 0x0181/385 bytes (modulus): 02820181 XX...XXX
555 * INTEGER 3 bytes (exponent): 0203 010001
556 */
557 if (*der != 0) /* "ignore bits", should be 0 */
558 xfunc_die();
559 der++;
560 der = enter_der_item(der, &end); /* enter SEQ */
561 //memset(tls->server_rsa_pub_key, 0, sizeof(tls->server_rsa_pub_key));
562 der_binary_to_pstm(&tls->server_rsa_pub_key.N, der, end); /* modulus */
563 der = skip_der_item(der, end);
564 der_binary_to_pstm(&tls->server_rsa_pub_key.e, der, end); /* exponent */
565 tls->server_rsa_pub_key.size = pstm_unsigned_bin_size(&tls->server_rsa_pub_key.N);
566 dbg("server_rsa_pub_key.size:%d\n", tls->server_rsa_pub_key.size);
520} 567}
521 568
522static void get_server_cert_or_die(tls_state_t *tls) 569static void get_server_cert_or_die(tls_state_t *tls)
@@ -553,7 +600,107 @@ static void get_server_cert_or_die(tls_state_t *tls)
553 len = len1; 600 len = len1;
554 601
555 if (len) 602 if (len)
556 tls->pubkey = find_key_in_der_cert(&tls->pubkey_len, certbuf + 10, len); 603 find_key_in_der_cert(tls, certbuf + 10, len);
604}
605
606static void send_client_key_exchange(tls_state_t *tls)
607{
608#if 0 //matrixssl code snippets:
609 int32 csRsaEncryptPub(psPool_t *pool, psPubKey_t *key,
610 unsigned char *in, uint32 inlen, unsigned char *out, uint32 outlen,
611 void *data)
612 {
613 psAssert(key->type == PS_RSA);
614 return psRsaEncryptPub(pool, (psRsaKey_t*)key->key, in, inlen, out, outlen,
615 data);
616 }
617...
618 /* pkaAfter.user is buffer len */
619 if ((rc = csRsaEncryptPub(pka->pool, &ssl->sec.cert->publicKey,
620 ssl->sec.premaster, ssl->sec.premasterSize, pka->outbuf,
621 pka->user, pka->data)) < 0) {
622 if (rc == PS_PENDING) {
623 /* For these ClientKeyExchange paths, we do want to come
624 back through nowDoCkePka for a double pass so each
625 case can manage its own pkaAfter and to make sure
626 psX509FreeCert and sslCreateKeys() are hit below. */
627 return rc;
628 }
629 psTraceIntInfo("csRsaEncryptPub in CKE failed %d\n", rc);
630 return MATRIXSSL_ERROR;
631 }
632 /* RSA closed the pool on second pass */
633 pka->pool = NULL;
634 clearPkaAfter(ssl);
635...
636#ifdef USE_RSA_CIPHER_SUITE
637/*
638 Standard RSA suite
639*/
640 ssl->sec.premasterSize = SSL_HS_RSA_PREMASTER_SIZE;
641 ssl->sec.premaster = psMalloc(ssl->hsPool,
642 SSL_HS_RSA_PREMASTER_SIZE);
643 if (ssl->sec.premaster == NULL) {
644 return SSL_MEM_ERROR;
645 }
646
647 ssl->sec.premaster[0] = ssl->reqMajVer;
648 ssl->sec.premaster[1] = ssl->reqMinVer;
649 if (matrixCryptoGetPrngData(ssl->sec.premaster + 2,
650 SSL_HS_RSA_PREMASTER_SIZE - 2, ssl->userPtr) < 0) {
651 return MATRIXSSL_ERROR;
652 }
653
654 /* Shedule RSA encryption. Put tmp pool under control of After */
655 pkaAfter->type = PKA_AFTER_RSA_ENCRYPT;
656 pkaAfter->outbuf = c;
657 pkaAfter->data = pkiData;
658 pkaAfter->pool = pkiPool;
659 pkaAfter->user = (uint32)(end - c); /* Available space */
660
661 c += keyLen;
662#endif
663#endif // 0
664
665 struct client_key_exchange {
666 struct record_hdr xhdr;
667 uint8_t type;
668 uint8_t len24_hi, len24_mid, len24_lo;
669 uint8_t keylen16_hi, keylen16_lo; /* exist for RSA, but not for some other key types */
670//had a bug when had no keylen: we:
671//write(3, "\x16\x03\x03\x01\x84\x10\x00\x01\x80\xXX\xXX\xXX\xXX\xXX\xXX...", 393) = 393
672//openssl:
673//write to 0xe9a090 [0xf9ac20] (395 bytes => 395 (0x18B))
674//0000 - 16 03 03 01 86 10 00 01 -82 01 80 xx xx xx xx xx
675 uint8_t key[384]; // size??
676 };
677 struct client_key_exchange record;
678 uint8_t premaster[SSL_HS_RSA_PREMASTER_SIZE];
679
680 memset(&record, 0, sizeof(record));
681 record.xhdr.type = RECORD_TYPE_HANDSHAKE;
682 record.xhdr.proto_maj = TLS_MAJ;
683 record.xhdr.proto_min = TLS_MIN;
684 record.xhdr.len16_hi = (sizeof(record) - sizeof(record.xhdr)) >> 8;
685 record.xhdr.len16_lo = (sizeof(record) - sizeof(record.xhdr)) & 0xff;
686 record.type = HANDSHAKE_CLIENT_KEY_EXCHANGE;
687 //record.len24_hi = 0;
688 record.len24_mid = (sizeof(record) - sizeof(record.xhdr) - 4) >> 8;
689 record.len24_lo = (sizeof(record) - sizeof(record.xhdr) - 4) & 0xff;
690 record.keylen16_hi = (sizeof(record) - sizeof(record.xhdr) - 6) >> 8;
691 record.keylen16_lo = (sizeof(record) - sizeof(record.xhdr) - 6) & 0xff;
692
693 tls_get_random(premaster, sizeof(premaster));
694 premaster[0] = TLS_MAJ;
695 premaster[1] = TLS_MIN;
696 psRsaEncryptPub(/*pool:*/ NULL,
697 /* psRsaKey_t* */ &tls->server_rsa_pub_key,
698 premaster, /*inlen:*/ sizeof(premaster),
699 record.key, sizeof(record.key),
700 data_param_ignored
701 );
702
703 xwrite(tls->fd, &record, sizeof(record));
557} 704}
558 705
559static void tls_handshake(tls_state_t *tls) 706static void tls_handshake(tls_state_t *tls)
@@ -614,6 +761,8 @@ static void tls_handshake(tls_state_t *tls)
614 // 459 bytes: 761 // 459 bytes:
615 // 0c 00|01|c7 03|00|17|41|04|87|94|2e|2f|68|d0|c9|f4|97|a8|2d|ef|ed|67|ea|c6|f3|b3|56|47|5d|27|b6|bd|ee|70|25|30|5e|b0|8e|f6|21|5a... 762 // 0c 00|01|c7 03|00|17|41|04|87|94|2e|2f|68|d0|c9|f4|97|a8|2d|ef|ed|67|ea|c6|f3|b3|56|47|5d|27|b6|bd|ee|70|25|30|5e|b0|8e|f6|21|5a...
616 //SvKey len=455^ 763 //SvKey len=455^
764 // with TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA: 461 bytes:
765 // 0c 00|01|c9 03|00|17|41|04|cd|9b|b4|29|1f|f6|b0|c2|84|82|7f|29|6a|47|4e|ec|87|0b|c1|9c|69|e1|f8|c6|d0|53|e9|27|90|a5|c8|02|15|75...
617 dbg("got SERVER_KEY_EXCHANGE\n"); 766 dbg("got SERVER_KEY_EXCHANGE\n");
618 len = xread_tls_block(tls); 767 len = xread_tls_block(tls);
619 break; 768 break;
@@ -624,6 +773,8 @@ static void tls_handshake(tls_state_t *tls)
624 case HANDSHAKE_SERVER_HELLO_DONE: 773 case HANDSHAKE_SERVER_HELLO_DONE:
625 // 0e 000000 (len:0) 774 // 0e 000000 (len:0)
626 dbg("got SERVER_HELLO_DONE\n"); 775 dbg("got SERVER_HELLO_DONE\n");
776 send_client_key_exchange(tls);
777 len = xread_tls_block(tls);
627 break; 778 break;
628 default: 779 default:
629 tls_error_die(tls); 780 tls_error_die(tls);
diff --git a/networking/tls.h b/networking/tls.h
new file mode 100644
index 000000000..20317ecc3
--- /dev/null
+++ b/networking/tls.h
@@ -0,0 +1,73 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "libbb.h"
7
8/* config tweaks */
9#define HAVE_NATIVE_INT64 1
10#undef DISABLE_PSTM
11#undef USE_1024_KEY_SPEED_OPTIMIZATIONS
12#undef USE_2048_KEY_SPEED_OPTIMIZATIONS
13//TODO: enable to use asm:
14//#if defined(__GNUC__) && defined(__i386__) -> #define PSTM_32BIT and PSTM_X86
15//#if defined(__GNUC__) && defined(__x86_64__) -> #define PSTM_64BIT and PSTM_X86_64
16//ARM and MIPS also have these
17
18
19#define PS_SUCCESS 0
20#define PS_FAILURE -1
21#define PS_ARG_FAIL -6 /* Failure due to bad function param */
22#define PS_PLATFORM_FAIL -7 /* Failure as a result of system call error */
23#define PS_MEM_FAIL -8 /* Failure to allocate requested memory */
24#define PS_LIMIT_FAIL -9 /* Failure on sanity/limit tests */
25
26#define PS_TRUE 1
27#define PS_FALSE 0
28
29#if BB_BIG_ENDIAN
30# define ENDIAN_BIG 1
31# undef ENDIAN_LITTLE
32//#???? ENDIAN_32BITWORD
33// controls only STORE32L, which we don't use
34#else
35# define ENDIAN_LITTLE 1
36# undef ENDIAN_BIG
37#endif
38
39typedef uint64_t uint64;
40typedef int64_t int64;
41typedef uint32_t uint32;
42typedef int32_t int32;
43typedef uint16_t uint16;
44typedef int16_t int16;
45
46//FIXME
47typedef char psPool_t;
48
49//#ifdef PS_PUBKEY_OPTIMIZE_FOR_SMALLER_RAM
50#define PS_EXPTMOD_WINSIZE 3
51//#ifdef PS_PUBKEY_OPTIMIZE_FOR_FASTER_SPEED
52//#define PS_EXPTMOD_WINSIZE 5
53
54#define PUBKEY_TYPE 0x01
55#define PRIVKEY_TYPE 0x02
56
57void tls_get_random(void *buf, unsigned len);
58
59#define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS)
60
61#define psFree(p, pool) free(p)
62#define psTraceCrypto(msg) bb_error_msg_and_die(msg)
63
64/* Secure zerofill */
65#define memset_s(A,B,C,D) memset((A),(C),(D))
66/* Constant time memory comparison */
67#define memcmpct(s1, s2, len) memcmp((s1), (s2), (len))
68#undef min
69#define min(x, y) ((x) < (y) ? (x) : (y))
70
71
72#include "tls_pstm.h"
73#include "tls_rsa.h"
diff --git a/networking/tls_pstm.c b/networking/tls_pstm.c
new file mode 100644
index 000000000..0d797f87f
--- /dev/null
+++ b/networking/tls_pstm.c
@@ -0,0 +1,2254 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8/**
9 * @file pstm.c
10 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
11 *
12 * Multiprecision number implementation.
13 */
14/*
15 * Copyright (c) 2013-2015 INSIDE Secure Corporation
16 * Copyright (c) PeerSec Networks, 2002-2011
17 * All Rights Reserved
18 *
19 * The latest version of this code is available at http://www.matrixssl.org
20 *
21 * This software is open source; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This General Public License does NOT permit incorporating this software
27 * into proprietary programs. If you are unable to comply with the GPL, a
28 * commercial license for this software may be purchased from INSIDE at
29 * http://www.insidesecure.com/eng/Company/Locations
30 *
31 * This program is distributed in WITHOUT ANY WARRANTY; without even the
32 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33 * See the GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 * http://www.gnu.org/copyleft/gpl.html
39 */
40/******************************************************************************/
41
42///bbox
43//#include "../cryptoApi.h"
44#ifndef DISABLE_PSTM
45
46static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c);
47
48/******************************************************************************/
49/*
50 init an pstm_int for a given size
51 */
52int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size)
53{
54// uint16 x;
55
56/*
57 alloc mem
58 */
59 a->dp = xzalloc(sizeof (pstm_digit) * size);
60 a->pool = pool;
61 a->used = 0;
62 a->alloc = (int16)size;
63 a->sign = PSTM_ZPOS;
64/*
65 zero the digits
66 */
67///bbox
68// for (x = 0; x < size; x++) {
69// a->dp[x] = 0;
70// }
71 return PSTM_OKAY;
72}
73
74/******************************************************************************/
75/*
76 Init a new pstm_int.
77*/
78int32 pstm_init(psPool_t *pool, pstm_int * a)
79{
80// int32 i;
81/*
82 allocate memory required and clear it
83 */
84 a->dp = xzalloc(sizeof (pstm_digit) * PSTM_DEFAULT_INIT);
85/*
86 set the digits to zero
87 */
88///bbox
89// for (i = 0; i < PSTM_DEFAULT_INIT; i++) {
90// a->dp[i] = 0;
91// }
92/*
93 set the used to zero, allocated digits to the default precision and sign
94 to positive
95 */
96 a->pool = pool;
97 a->used = 0;
98 a->alloc = PSTM_DEFAULT_INIT;
99 a->sign = PSTM_ZPOS;
100
101 return PSTM_OKAY;
102}
103
104/******************************************************************************/
105/*
106 Grow as required
107 */
108int32 pstm_grow(pstm_int * a, int16 size)
109{
110 int16 i;
111 pstm_digit *tmp;
112
113/*
114 If the alloc size is smaller alloc more ram.
115 */
116 if (a->alloc < size) {
117/*
118 Reallocate the array a->dp
119
120 We store the return in a temporary variable in case the operation
121 failed we don't want to overwrite the dp member of a.
122*/
123 tmp = xrealloc(a->dp, sizeof (pstm_digit) * size);
124/*
125 reallocation succeeded so set a->dp
126 */
127 a->dp = tmp;
128/*
129 zero excess digits
130 */
131 i = a->alloc;
132 a->alloc = size;
133 for (; i < a->alloc; i++) {
134 a->dp[i] = 0;
135 }
136 }
137 return PSTM_OKAY;
138}
139
140/******************************************************************************/
141/*
142 copy, b = a (b must be pre-allocated)
143 */
144int32 pstm_copy(pstm_int * a, pstm_int * b)
145{
146 int32 res, n;
147
148/*
149 If dst == src do nothing
150 */
151 if (a == b) {
152 return PSTM_OKAY;
153 }
154/*
155 Grow dest
156 */
157 if (b->alloc < a->used) {
158 if ((res = pstm_grow (b, a->used)) != PSTM_OKAY) {
159 return res;
160 }
161 }
162/*
163 Zero b and copy the parameters over
164 */
165 {
166 register pstm_digit *tmpa, *tmpb;
167
168 /* pointer aliases */
169 /* source */
170 tmpa = a->dp;
171
172 /* destination */
173 tmpb = b->dp;
174
175 /* copy all the digits */
176 for (n = 0; n < a->used; n++) {
177 *tmpb++ = *tmpa++;
178 }
179
180 /* clear high digits */
181 for (; n < b->used; n++) {
182 *tmpb++ = 0;
183 }
184 }
185/*
186 copy used count and sign
187 */
188 b->used = a->used;
189 b->sign = a->sign;
190 return PSTM_OKAY;
191}
192
193/******************************************************************************/
194/*
195 Trim unused digits
196
197 This is used to ensure that leading zero digits are trimed and the
198 leading "used" digit will be non-zero. Typically very fast. Also fixes
199 the sign if there are no more leading digits
200*/
201void pstm_clamp(pstm_int * a)
202{
203/* decrease used while the most significant digit is zero. */
204 while (a->used > 0 && a->dp[a->used - 1] == 0) {
205 --(a->used);
206 }
207/* reset the sign flag if used == 0 */
208 if (a->used == 0) {
209 a->sign = PSTM_ZPOS;
210 }
211}
212
213/******************************************************************************/
214/*
215 clear one (frees).
216 */
217void pstm_clear(pstm_int * a)
218{
219 int32 i;
220/*
221 only do anything if a hasn't been freed previously
222 */
223 if (a != NULL && a->dp != NULL) {
224/*
225 first zero the digits
226 */
227 for (i = 0; i < a->used; i++) {
228 a->dp[i] = 0;
229 }
230
231 psFree (a->dp, a->pool);
232/*
233 reset members to make debugging easier
234 */
235 a->dp = NULL;
236 a->alloc = a->used = 0;
237 a->sign = PSTM_ZPOS;
238 }
239}
240
241/******************************************************************************/
242/*
243 clear many (frees).
244 */
245void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2,
246 pstm_int *mp3, pstm_int *mp4, pstm_int *mp5,
247 pstm_int *mp6, pstm_int *mp7)
248{
249 int32 n; /* Number of ok inits */
250
251 pstm_int *tempArray[9];
252
253 tempArray[0] = mp0;
254 tempArray[1] = mp1;
255 tempArray[2] = mp2;
256 tempArray[3] = mp3;
257 tempArray[4] = mp4;
258 tempArray[5] = mp5;
259 tempArray[6] = mp6;
260 tempArray[7] = mp7;
261 tempArray[8] = NULL;
262
263 for (n = 0; tempArray[n] != NULL; n++) {
264 if ((tempArray[n] != NULL) && (tempArray[n]->dp != NULL)) {
265 pstm_clear(tempArray[n]);
266 }
267 }
268}
269
270/******************************************************************************/
271/*
272 Set to zero.
273 */
274void pstm_zero(pstm_int * a)
275{
276 int32 n;
277 pstm_digit *tmp;
278
279 a->sign = PSTM_ZPOS;
280 a->used = 0;
281
282 tmp = a->dp;
283 for (n = 0; n < a->alloc; n++) {
284 *tmp++ = 0;
285 }
286}
287
288
289/******************************************************************************/
290/*
291 Compare maginitude of two ints (unsigned).
292 */
293int32 pstm_cmp_mag(pstm_int * a, pstm_int * b)
294{
295 int16 n;
296 pstm_digit *tmpa, *tmpb;
297
298/*
299 compare based on # of non-zero digits
300 */
301 if (a->used > b->used) {
302 return PSTM_GT;
303 }
304
305 if (a->used < b->used) {
306 return PSTM_LT;
307 }
308
309 /* alias for a */
310 tmpa = a->dp + (a->used - 1);
311
312 /* alias for b */
313 tmpb = b->dp + (a->used - 1);
314
315/*
316 compare based on digits
317 */
318 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
319 if (*tmpa > *tmpb) {
320 return PSTM_GT;
321 }
322 if (*tmpa < *tmpb) {
323 return PSTM_LT;
324 }
325 }
326 return PSTM_EQ;
327}
328
329/******************************************************************************/
330/*
331 Compare two ints (signed)
332 */
333int32 pstm_cmp(pstm_int * a, pstm_int * b)
334{
335/*
336 compare based on sign
337 */
338 if (a->sign != b->sign) {
339 if (a->sign == PSTM_NEG) {
340 return PSTM_LT;
341 } else {
342 return PSTM_GT;
343 }
344 }
345/*
346 compare digits
347 */
348 if (a->sign == PSTM_NEG) {
349 /* if negative compare opposite direction */
350 return pstm_cmp_mag(b, a);
351 } else {
352 return pstm_cmp_mag(a, b);
353 }
354}
355
356/******************************************************************************/
357/*
358 pstm_ints can be initialized more precisely when they will populated
359 using pstm_read_unsigned_bin since the length of the byte stream is known
360*/
361int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a, uint32 len)
362{
363 int32 size;
364/*
365 Need to set this based on how many words max it will take to store the bin.
366 The magic + 2:
367 1 to round up for the remainder of this integer math
368 1 for the initial carry of '1' bits that fall between DIGIT_BIT and 8
369*/
370 size = (((len / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
371 / DIGIT_BIT) + 2;
372 return pstm_init_size(pool, a, size);
373}
374
375
376/******************************************************************************/
377/*
378 Reads a unsigned char array into pstm_int format. User should have
379 called pstm_init_for_read_unsigned_bin first. There is some grow logic
380 here if the default pstm_init was used but we don't really want to hit it.
381*/
382int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c)
383{
384 /* zero the int */
385 pstm_zero (a);
386
387/*
388 If we know the endianness of this architecture, and we're using
389 32-bit pstm_digits, we can optimize this
390*/
391#if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(PSTM_64BIT)
392 /* But not for both simultaneously */
393#if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
394#error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
395#endif
396 {
397 unsigned char *pd;
398 if ((unsigned)c > (PSTM_MAX_SIZE * sizeof(pstm_digit))) {
399 uint32 excess = c - (PSTM_MAX_SIZE * sizeof(pstm_digit));
400 c -= excess;
401 b += excess;
402 }
403 a->used = (int16)((c + sizeof(pstm_digit) - 1)/sizeof(pstm_digit));
404 if (a->alloc < a->used) {
405 if (pstm_grow(a, a->used) != PSTM_OKAY) {
406 return PSTM_MEM;
407 }
408 }
409 pd = (unsigned char *)a->dp;
410 /* read the bytes in */
411#ifdef ENDIAN_BIG
412 {
413 /* Use Duff's device to unroll the loop. */
414 int32 idx = (c - 1) & ~3;
415 switch (c % 4) {
416 case 0: do { pd[idx+0] = *b++;
417 case 3: pd[idx+1] = *b++;
418 case 2: pd[idx+2] = *b++;
419 case 1: pd[idx+3] = *b++;
420 idx -= 4;
421 } while ((c -= 4) > 0);
422 }
423 }
424#else
425 for (c -= 1; c >= 0; c -= 1) {
426 pd[c] = *b++;
427 }
428#endif
429 }
430#else
431 /* Big enough based on the len? */
432 a->used = (((c / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
433 / DIGIT_BIT) + 2;
434
435 if (a->alloc < a->used) {
436 if (pstm_grow(a, a->used) != PSTM_OKAY) {
437 return PSTM_MEM;
438 }
439 }
440 /* read the bytes in */
441 for (; c > 0; c--) {
442 if (pstm_mul_2d (a, 8, a) != PSTM_OKAY) {
443 return PS_MEM_FAIL;
444 }
445 a->dp[0] |= *b++;
446 a->used += 1;
447 }
448#endif
449
450 pstm_clamp (a);
451 return PS_SUCCESS;
452}
453
454/******************************************************************************/
455/*
456*/
457int16 pstm_count_bits (pstm_int * a)
458{
459 int16 r;
460 pstm_digit q;
461
462 if (a->used == 0) {
463 return 0;
464 }
465
466 /* get number of digits and add that */
467 r = (a->used - 1) * DIGIT_BIT;
468
469 /* take the last digit and count the bits in it */
470 q = a->dp[a->used - 1];
471 while (q > ((pstm_digit) 0)) {
472 ++r;
473 q >>= ((pstm_digit) 1);
474 }
475 return r;
476}
477
478/******************************************************************************/
479int32 pstm_unsigned_bin_size(pstm_int *a)
480{
481 int32 size = pstm_count_bits (a);
482 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
483}
484
485/******************************************************************************/
486void pstm_set(pstm_int *a, pstm_digit b)
487{
488 pstm_zero(a);
489 a->dp[0] = b;
490 a->used = a->dp[0] ? 1 : 0;
491}
492
493/******************************************************************************/
494/*
495 Right shift
496*/
497void pstm_rshd(pstm_int *a, int16 x)
498{
499 int16 y;
500
501 /* too many digits just zero and return */
502 if (x >= a->used) {
503 pstm_zero(a);
504 return;
505 }
506
507 /* shift */
508 for (y = 0; y < a->used - x; y++) {
509 a->dp[y] = a->dp[y+x];
510 }
511
512 /* zero rest */
513 for (; y < a->used; y++) {
514 a->dp[y] = 0;
515 }
516
517 /* decrement count */
518 a->used -= x;
519 pstm_clamp(a);
520}
521
522/******************************************************************************/
523/*
524 Shift left a certain amount of digits.
525 */
526int32 pstm_lshd(pstm_int * a, int16 b)
527{
528 int16 x;
529 int32 res;
530
531/*
532 If its less than zero return.
533 */
534 if (b <= 0) {
535 return PSTM_OKAY;
536 }
537/*
538 Grow to fit the new digits.
539 */
540 if (a->alloc < a->used + b) {
541 if ((res = pstm_grow (a, a->used + b)) != PSTM_OKAY) {
542 return res;
543 }
544 }
545
546 {
547 register pstm_digit *top, *bottom;
548/*
549 Increment the used by the shift amount then copy upwards.
550 */
551 a->used += b;
552
553 /* top */
554 top = a->dp + a->used - 1;
555
556 /* base */
557 bottom = a->dp + a->used - 1 - b;
558/*
559 This is implemented using a sliding window except the window goes the
560 other way around. Copying from the bottom to the top.
561 */
562 for (x = a->used - 1; x >= b; x--) {
563 *top-- = *bottom--;
564 }
565
566 /* zero the lower digits */
567 top = a->dp;
568 for (x = 0; x < b; x++) {
569 *top++ = 0;
570 }
571 }
572 return PSTM_OKAY;
573}
574
575/******************************************************************************/
576/*
577 computes a = 2**b
578*/
579int32 pstm_2expt(pstm_int *a, int16 b)
580{
581 int16 z;
582
583 /* zero a as per default */
584 pstm_zero (a);
585
586 if (b < 0) {
587 return PSTM_OKAY;
588 }
589
590 z = b / DIGIT_BIT;
591 if (z >= PSTM_MAX_SIZE) {
592 return PS_LIMIT_FAIL;
593 }
594
595 /* set the used count of where the bit will go */
596 a->used = z + 1;
597
598 if (a->used > a->alloc) {
599 if (pstm_grow(a, a->used) != PSTM_OKAY) {
600 return PS_MEM_FAIL;
601 }
602 }
603
604 /* put the single bit in its place */
605 a->dp[z] = ((pstm_digit)1) << (b % DIGIT_BIT);
606 return PSTM_OKAY;
607}
608
609/******************************************************************************/
610/*
611
612*/
613int32 pstm_mul_2(pstm_int * a, pstm_int * b)
614{
615 int32 res;
616 int16 x, oldused;
617
618/*
619 grow to accomodate result
620 */
621 if (b->alloc < a->used + 1) {
622 if ((res = pstm_grow (b, a->used + 1)) != PSTM_OKAY) {
623 return res;
624 }
625 }
626 oldused = b->used;
627 b->used = a->used;
628
629 {
630 register pstm_digit r, rr, *tmpa, *tmpb;
631
632 /* alias for source */
633 tmpa = a->dp;
634
635 /* alias for dest */
636 tmpb = b->dp;
637
638 /* carry */
639 r = 0;
640 for (x = 0; x < a->used; x++) {
641/*
642 get what will be the *next* carry bit from the
643 MSB of the current digit
644*/
645 rr = *tmpa >> ((pstm_digit)(DIGIT_BIT - 1));
646/*
647 now shift up this digit, add in the carry [from the previous]
648*/
649 *tmpb++ = ((*tmpa++ << ((pstm_digit)1)) | r);
650/*
651 copy the carry that would be from the source
652 digit into the next iteration
653*/
654 r = rr;
655 }
656
657 /* new leading digit? */
658 if (r != 0 && b->used != (PSTM_MAX_SIZE-1)) {
659 /* add a MSB which is always 1 at this point */
660 *tmpb = 1;
661 ++(b->used);
662 }
663/*
664 now zero any excess digits on the destination that we didn't write to
665*/
666 tmpb = b->dp + b->used;
667 for (x = b->used; x < oldused; x++) {
668 *tmpb++ = 0;
669 }
670 }
671 b->sign = a->sign;
672 return PSTM_OKAY;
673}
674
675/******************************************************************************/
676/*
677 unsigned subtraction ||a|| >= ||b|| ALWAYS!
678*/
679int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
680{
681 int16 oldbused, oldused;
682 int32 x;
683 pstm_word t;
684
685 if (b->used > a->used) {
686 return PS_LIMIT_FAIL;
687 }
688 if (c->alloc < a->used) {
689 if ((x = pstm_grow (c, a->used)) != PSTM_OKAY) {
690 return x;
691 }
692 }
693 oldused = c->used;
694 oldbused = b->used;
695 c->used = a->used;
696 t = 0;
697
698 for (x = 0; x < oldbused; x++) {
699 t = ((pstm_word)a->dp[x]) - (((pstm_word)b->dp[x]) + t);
700 c->dp[x] = (pstm_digit)t;
701 t = (t >> DIGIT_BIT)&1;
702 }
703 for (; x < a->used; x++) {
704 t = ((pstm_word)a->dp[x]) - t;
705 c->dp[x] = (pstm_digit)t;
706 t = (t >> DIGIT_BIT);
707 }
708 for (; x < oldused; x++) {
709 c->dp[x] = 0;
710 }
711 pstm_clamp(c);
712 return PSTM_OKAY;
713}
714
715/******************************************************************************/
716/*
717 unsigned addition
718*/
719static int32 s_pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
720{
721 int16 x, y, oldused;
722 register pstm_word t, adp, bdp;
723
724 y = a->used;
725 if (b->used > y) {
726 y = b->used;
727 }
728 oldused = c->used;
729 c->used = y;
730
731 if (c->used > c->alloc) {
732 if (pstm_grow(c, c->used) != PSTM_OKAY) {
733 return PS_MEM_FAIL;
734 }
735 }
736
737 t = 0;
738 for (x = 0; x < y; x++) {
739 if (a->used < x) {
740 adp = 0;
741 } else {
742 adp = (pstm_word)a->dp[x];
743 }
744 if (b->used < x) {
745 bdp = 0;
746 } else {
747 bdp = (pstm_word)b->dp[x];
748 }
749 t += (adp) + (bdp);
750 c->dp[x] = (pstm_digit)t;
751 t >>= DIGIT_BIT;
752 }
753 if (t != 0 && x < PSTM_MAX_SIZE) {
754 if (c->used == c->alloc) {
755 if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
756 return PS_MEM_FAIL;
757 }
758 }
759 c->dp[c->used++] = (pstm_digit)t;
760 ++x;
761 }
762
763 c->used = x;
764 for (; x < oldused; x++) {
765 c->dp[x] = 0;
766 }
767 pstm_clamp(c);
768 return PSTM_OKAY;
769}
770
771
772/******************************************************************************/
773/*
774
775*/
776int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
777{
778 int32 res;
779 int16 sa, sb;
780
781 sa = a->sign;
782 sb = b->sign;
783
784 if (sa != sb) {
785/*
786 subtract a negative from a positive, OR a positive from a negative.
787 For both, ADD their magnitudes, and use the sign of the first number.
788 */
789 c->sign = sa;
790 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
791 return res;
792 }
793 } else {
794/*
795 subtract a positive from a positive, OR a negative from a negative.
796 First, take the difference between their magnitudes, then...
797 */
798 if (pstm_cmp_mag (a, b) != PSTM_LT) {
799 /* Copy the sign from the first */
800 c->sign = sa;
801 /* The first has a larger or equal magnitude */
802 if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
803 return res;
804 }
805 } else {
806 /* The result has the _opposite_ sign from the first number. */
807 c->sign = (sa == PSTM_ZPOS) ? PSTM_NEG : PSTM_ZPOS;
808 /* The second has a larger magnitude */
809 if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
810 return res;
811 }
812 }
813 }
814 return PS_SUCCESS;
815}
816
817/******************************************************************************/
818/*
819 c = a - b
820*/
821int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c)
822{
823 pstm_int tmp;
824 int32 res;
825
826 if (pstm_init_size(pool, &tmp, sizeof(pstm_digit)) != PSTM_OKAY) {
827 return PS_MEM_FAIL;
828 }
829 pstm_set(&tmp, b);
830 res = pstm_sub(a, &tmp, c);
831 pstm_clear(&tmp);
832 return res;
833}
834
835/******************************************************************************/
836/*
837 setups the montgomery reduction
838*/
839int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho)
840{
841 pstm_digit x, b;
842
843/*
844 fast inversion mod 2**k
845 Based on the fact that
846 XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
847 => 2*X*A - X*X*A*A = 1
848 => 2*(1) - (1) = 1
849 */
850 b = a->dp[0];
851
852 if ((b & 1) == 0) {
853 psTraceCrypto("pstm_montogomery_setup failure\n");
854 return PS_ARG_FAIL;
855 }
856
857 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
858 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
859 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
860 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
861#ifdef PSTM_64BIT
862 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
863#endif
864 /* rho = -1/m mod b */
865 *rho = (pstm_digit)(((pstm_word) 1 << ((pstm_word) DIGIT_BIT)) -
866 ((pstm_word)x));
867 return PSTM_OKAY;
868}
869
870/******************************************************************************/
871/*
872 * computes a = B**n mod b without division or multiplication useful for
873 * normalizing numbers in a Montgomery system.
874 */
875int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b)
876{
877 int32 x;
878 int16 bits;
879
880 /* how many bits of last digit does b use */
881 bits = pstm_count_bits (b) % DIGIT_BIT;
882 if (!bits) bits = DIGIT_BIT;
883
884 /* compute A = B^(n-1) * 2^(bits-1) */
885 if (b->used > 1) {
886 if ((x = pstm_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) !=
887 PSTM_OKAY) {
888 return x;
889 }
890 } else {
891 pstm_set(a, 1);
892 bits = 1;
893 }
894
895 /* now compute C = A * B mod b */
896 for (x = bits - 1; x < (int32)DIGIT_BIT; x++) {
897 if (pstm_mul_2 (a, a) != PSTM_OKAY) {
898 return PS_MEM_FAIL;
899 }
900 if (pstm_cmp_mag (a, b) != PSTM_LT) {
901 if (s_pstm_sub (a, b, a) != PSTM_OKAY) {
902 return PS_MEM_FAIL;
903 }
904 }
905 }
906 return PSTM_OKAY;
907}
908
909/******************************************************************************/
910/*
911 c = a * 2**d
912*/
913static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c)
914{
915 pstm_digit carry, carrytmp, shift;
916 int16 x;
917
918 /* copy it */
919 if (pstm_copy(a, c) != PSTM_OKAY) {
920 return PS_MEM_FAIL;
921 }
922
923 /* handle whole digits */
924 if (b >= DIGIT_BIT) {
925 if (pstm_lshd(c, b/DIGIT_BIT) != PSTM_OKAY) {
926 return PS_MEM_FAIL;
927 }
928 }
929 b %= DIGIT_BIT;
930
931 /* shift the digits */
932 if (b != 0) {
933 carry = 0;
934 shift = DIGIT_BIT - b;
935 for (x = 0; x < c->used; x++) {
936 carrytmp = c->dp[x] >> shift;
937 c->dp[x] = (c->dp[x] << b) + carry;
938 carry = carrytmp;
939 }
940 /* store last carry if room */
941 if (carry && x < PSTM_MAX_SIZE) {
942 if (c->used == c->alloc) {
943 if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
944 return PS_MEM_FAIL;
945 }
946 }
947 c->dp[c->used++] = carry;
948 }
949 }
950 pstm_clamp(c);
951 return PSTM_OKAY;
952}
953
954/******************************************************************************/
955/*
956 c = a mod 2**d
957*/
958static int32 pstm_mod_2d(pstm_int *a, int16 b, pstm_int *c)
959{
960 int16 x;
961
962 /* zero if count less than or equal to zero */
963 if (b <= 0) {
964 pstm_zero(c);
965 return PSTM_OKAY;
966 }
967
968 /* get copy of input */
969 if (pstm_copy(a, c) != PSTM_OKAY) {
970 return PS_MEM_FAIL;
971 }
972
973 /* if 2**d is larger than we just return */
974 if (b >= (DIGIT_BIT * a->used)) {
975 return PSTM_OKAY;
976 }
977
978 /* zero digits above the last digit of the modulus */
979 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++)
980 {
981 c->dp[x] = 0;
982 }
983 /* clear the digit that is not completely outside/inside the modulus */
984 c->dp[b / DIGIT_BIT] &= ~((pstm_digit)0) >> (DIGIT_BIT - b);
985 pstm_clamp (c);
986 return PSTM_OKAY;
987}
988
989
990/******************************************************************************/
991/*
992 c = a * b
993*/
994int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c)
995{
996 pstm_word w;
997 int32 res;
998 int16 x, oldused;
999
1000 if (c->alloc < a->used + 1) {
1001 if ((res = pstm_grow (c, a->used + 1)) != PSTM_OKAY) {
1002 return res;
1003 }
1004 }
1005 oldused = c->used;
1006 c->used = a->used;
1007 c->sign = a->sign;
1008 w = 0;
1009 for (x = 0; x < a->used; x++) {
1010 w = ((pstm_word)a->dp[x]) * ((pstm_word)b) + w;
1011 c->dp[x] = (pstm_digit)w;
1012 w = w >> DIGIT_BIT;
1013 }
1014 if (w != 0 && (a->used != PSTM_MAX_SIZE)) {
1015 c->dp[c->used++] = (pstm_digit)w;
1016 ++x;
1017 }
1018 for (; x < oldused; x++) {
1019 c->dp[x] = 0;
1020 }
1021 pstm_clamp(c);
1022 return PSTM_OKAY;
1023}
1024
1025/******************************************************************************/
1026/*
1027 c = a / 2**b
1028*/
1029int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c,
1030 pstm_int *d)
1031{
1032 pstm_digit D, r, rr;
1033 int32 res;
1034 int16 x;
1035 pstm_int t;
1036
1037 /* if the shift count is <= 0 then we do no work */
1038 if (b <= 0) {
1039 if (pstm_copy (a, c) != PSTM_OKAY) {
1040 return PS_MEM_FAIL;
1041 }
1042 if (d != NULL) {
1043 pstm_zero (d);
1044 }
1045 return PSTM_OKAY;
1046 }
1047
1048 /* get the remainder */
1049 if (d != NULL) {
1050 if (pstm_init(pool, &t) != PSTM_OKAY) {
1051 return PS_MEM_FAIL;
1052 }
1053 if (pstm_mod_2d (a, b, &t) != PSTM_OKAY) {
1054 res = PS_MEM_FAIL;
1055 goto LBL_DONE;
1056 }
1057 }
1058
1059 /* copy */
1060 if (pstm_copy(a, c) != PSTM_OKAY) {
1061 res = PS_MEM_FAIL;
1062 goto LBL_DONE;
1063 }
1064
1065 /* shift by as many digits in the bit count */
1066 if (b >= (int32)DIGIT_BIT) {
1067 pstm_rshd (c, b / DIGIT_BIT);
1068 }
1069
1070 /* shift any bit count < DIGIT_BIT */
1071 D = (pstm_digit) (b % DIGIT_BIT);
1072 if (D != 0) {
1073 register pstm_digit *tmpc, mask, shift;
1074
1075 /* mask */
1076 mask = (((pstm_digit)1) << D) - 1;
1077
1078 /* shift for lsb */
1079 shift = DIGIT_BIT - D;
1080
1081 /* alias */
1082 tmpc = c->dp + (c->used - 1);
1083
1084 /* carry */
1085 r = 0;
1086 for (x = c->used - 1; x >= 0; x--) {
1087 /* get the lower bits of this word in a temp */
1088 rr = *tmpc & mask;
1089
1090 /* shift the current word and mix in the carry bits from previous */
1091 *tmpc = (*tmpc >> D) | (r << shift);
1092 --tmpc;
1093
1094 /* set the carry to the carry bits of the current word above */
1095 r = rr;
1096 }
1097 }
1098 pstm_clamp (c);
1099
1100 res = PSTM_OKAY;
1101LBL_DONE:
1102 if (d != NULL) {
1103 if (pstm_copy(&t, d) != PSTM_OKAY) {
1104 res = PS_MEM_FAIL;
1105 }
1106 pstm_clear(&t);
1107 }
1108 return res;
1109}
1110
1111/******************************************************************************/
1112/*
1113 b = a/2
1114*/
1115int32 pstm_div_2(pstm_int * a, pstm_int * b)
1116{
1117 int16 x, oldused;
1118
1119 if (b->alloc < a->used) {
1120 if (pstm_grow(b, a->used) != PSTM_OKAY) {
1121 return PS_MEM_FAIL;
1122 }
1123 }
1124 oldused = b->used;
1125 b->used = a->used;
1126 {
1127 register pstm_digit r, rr, *tmpa, *tmpb;
1128
1129 /* source alias */
1130 tmpa = a->dp + b->used - 1;
1131
1132 /* dest alias */
1133 tmpb = b->dp + b->used - 1;
1134
1135 /* carry */
1136 r = 0;
1137 for (x = b->used - 1; x >= 0; x--) {
1138 /* get the carry for the next iteration */
1139 rr = *tmpa & 1;
1140
1141 /* shift the current digit, add in carry and store */
1142 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1143
1144 /* forward carry to next iteration */
1145 r = rr;
1146 }
1147
1148 /* zero excess digits */
1149 tmpb = b->dp + b->used;
1150 for (x = b->used; x < oldused; x++) {
1151 *tmpb++ = 0;
1152 }
1153 }
1154 b->sign = a->sign;
1155 pstm_clamp (b);
1156 return PSTM_OKAY;
1157}
1158
1159/******************************************************************************/
1160/*
1161 Creates "a" then copies b into it
1162 */
1163int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b, int16 toSqr)
1164{
1165 int16 x;
1166 int32 res;
1167
1168 if (a == b) {
1169 return PSTM_OKAY;
1170 }
1171 x = b->alloc;
1172
1173 if (toSqr) {
1174/*
1175 Smart-size: Increasing size of a if b->used is roughly half
1176 of b->alloc because usage has shown that a lot of these copies
1177 go on to be squared and need these extra digits
1178*/
1179 if ((b->used * 2) + 2 >= x) {
1180 x = (b->used * 2) + 3;
1181 }
1182 }
1183 if ((res = pstm_init_size(pool, a, x)) != PSTM_OKAY) {
1184 return res;
1185 }
1186 return pstm_copy(b, a);
1187}
1188
1189/******************************************************************************/
1190/*
1191 With some compilers, we have seen issues linking with the builtin
1192 64 bit division routine. The issues with either manifest in a failure
1193 to find 'udivdi3' at link time, or a runtime invalid instruction fault
1194 during an RSA operation.
1195 The routine below divides a 64 bit unsigned int by a 32 bit unsigned int
1196 explicitly, rather than using the division operation
1197 The 64 bit result is placed in the 'numerator' parameter
1198 The 32 bit mod (remainder) of the division is the return parameter
1199 Based on implementations by:
1200 Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
1201 Copyright (C) 1999 Hewlett-Packard Co
1202 Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
1203*/
1204#if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1205static uint32 psDiv64(uint64 *numerator, uint32 denominator)
1206{
1207 uint64 rem = *numerator;
1208 uint64 b = denominator;
1209 uint64 res = 0;
1210 uint64 d = 1;
1211 uint32 high = rem >> 32;
1212
1213 if (high >= denominator) {
1214 high /= denominator;
1215 res = (uint64) high << 32;
1216 rem -= (uint64) (high * denominator) << 32;
1217 }
1218 while ((int64)b > 0 && b < rem) {
1219 b = b+b;
1220 d = d+d;
1221 }
1222 do {
1223 if (rem >= b) {
1224 rem -= b;
1225 res += d;
1226 }
1227 b >>= 1;
1228 d >>= 1;
1229 } while (d);
1230 *numerator = res;
1231 return rem;
1232}
1233#endif /* USE_MATRIX_DIV64 */
1234
1235#if defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1236typedef unsigned long uint128 __attribute__ ((mode(TI)));
1237static uint64 psDiv128(uint128 *numerator, uint64 denominator)
1238{
1239 uint128 rem = *numerator;
1240 uint128 b = denominator;
1241 uint128 res = 0;
1242 uint128 d = 1;
1243 uint64 high = rem >> 64;
1244
1245 if (high >= denominator) {
1246 high /= denominator;
1247 res = (uint128) high << 64;
1248 rem -= (uint128) (high * denominator) << 64;
1249 }
1250 while ((uint128)b > 0 && b < rem) {
1251 b = b+b;
1252 d = d+d;
1253 }
1254 do {
1255 if (rem >= b) {
1256 rem -= b;
1257 res += d;
1258 }
1259 b >>= 1;
1260 d >>= 1;
1261 } while (d);
1262 *numerator = res;
1263 return rem;
1264}
1265#endif /* USE_MATRIX_DIV128 */
1266
1267/******************************************************************************/
1268/*
1269 a/b => cb + d == a
1270*/
1271int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1272 pstm_int *d)
1273{
1274 pstm_int q, x, y, t1, t2;
1275 int32 res;
1276 int16 n, t, i, norm, neg;
1277
1278 /* is divisor zero ? */
1279 if (pstm_iszero (b) == 1) {
1280 return PS_LIMIT_FAIL;
1281 }
1282
1283 /* if a < b then q=0, r = a */
1284 if (pstm_cmp_mag (a, b) == PSTM_LT) {
1285 if (d != NULL) {
1286 if (pstm_copy(a, d) != PSTM_OKAY) {
1287 return PS_MEM_FAIL;
1288 }
1289 }
1290 if (c != NULL) {
1291 pstm_zero (c);
1292 }
1293 return PSTM_OKAY;
1294 }
1295/*
1296 Smart-size inits
1297*/
1298 if ((res = pstm_init_size(pool, &t1, a->alloc)) != PSTM_OKAY) {
1299 return res;
1300 }
1301 if ((res = pstm_init_size(pool, &t2, 3)) != PSTM_OKAY) {
1302 goto LBL_T1;
1303 }
1304 if ((res = pstm_init_copy(pool, &x, a, 0)) != PSTM_OKAY) {
1305 goto LBL_T2;
1306 }
1307/*
1308 Used to be an init_copy on b but pstm_grow was always hit with triple size
1309*/
1310 if ((res = pstm_init_size(pool, &y, b->used * 3)) != PSTM_OKAY) {
1311 goto LBL_X;
1312 }
1313 if ((res = pstm_copy(b, &y)) != PSTM_OKAY) {
1314 goto LBL_Y;
1315 }
1316
1317 /* fix the sign */
1318 neg = (a->sign == b->sign) ? PSTM_ZPOS : PSTM_NEG;
1319 x.sign = y.sign = PSTM_ZPOS;
1320
1321 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1322 norm = pstm_count_bits(&y) % DIGIT_BIT;
1323 if (norm < (int32)(DIGIT_BIT-1)) {
1324 norm = (DIGIT_BIT-1) - norm;
1325 if ((res = pstm_mul_2d(&x, norm, &x)) != PSTM_OKAY) {
1326 goto LBL_Y;
1327 }
1328 if ((res = pstm_mul_2d(&y, norm, &y)) != PSTM_OKAY) {
1329 goto LBL_Y;
1330 }
1331 } else {
1332 norm = 0;
1333 }
1334
1335 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1336 n = x.used - 1;
1337 t = y.used - 1;
1338
1339 if ((res = pstm_init_size(pool, &q, n - t + 1)) != PSTM_OKAY) {
1340 goto LBL_Y;
1341 }
1342 q.used = n - t + 1;
1343
1344 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1345 if ((res = pstm_lshd(&y, n - t)) != PSTM_OKAY) { /* y = y*b**{n-t} */
1346 goto LBL_Q;
1347 }
1348
1349 while (pstm_cmp (&x, &y) != PSTM_LT) {
1350 ++(q.dp[n - t]);
1351 if ((res = pstm_sub(&x, &y, &x)) != PSTM_OKAY) {
1352 goto LBL_Q;
1353 }
1354 }
1355
1356 /* reset y by shifting it back down */
1357 pstm_rshd (&y, n - t);
1358
1359 /* step 3. for i from n down to (t + 1) */
1360 for (i = n; i >= (t + 1); i--) {
1361 if (i > x.used) {
1362 continue;
1363 }
1364
1365 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1366 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1367 if (x.dp[i] == y.dp[t]) {
1368 q.dp[i - t - 1] = (pstm_digit)((((pstm_word)1) << DIGIT_BIT) - 1);
1369 } else {
1370 pstm_word tmp;
1371 tmp = ((pstm_word) x.dp[i]) << ((pstm_word) DIGIT_BIT);
1372 tmp |= ((pstm_word) x.dp[i - 1]);
1373#if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1374 psDiv64(&tmp, y.dp[t]);
1375#elif defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1376 psDiv128(&tmp, y.dp[t]);
1377#else
1378 tmp /= ((pstm_word) y.dp[t]);
1379#endif /* USE_MATRIX_DIV64 */
1380 q.dp[i - t - 1] = (pstm_digit) (tmp);
1381 }
1382
1383 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1384 xi * b**2 + xi-1 * b + xi-2
1385
1386 do q{i-t-1} -= 1;
1387 */
1388 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
1389 do {
1390 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
1391
1392 /* find left hand */
1393 pstm_zero (&t1);
1394 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1395 t1.dp[1] = y.dp[t];
1396 t1.used = 2;
1397 if ((res = pstm_mul_d (&t1, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1398 goto LBL_Q;
1399 }
1400
1401 /* find right hand */
1402 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1403 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1404 t2.dp[2] = x.dp[i];
1405 t2.used = 3;
1406 } while (pstm_cmp_mag(&t1, &t2) == PSTM_GT);
1407
1408 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1409 if ((res = pstm_mul_d(&y, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1410 goto LBL_Q;
1411 }
1412
1413 if ((res = pstm_lshd(&t1, i - t - 1)) != PSTM_OKAY) {
1414 goto LBL_Q;
1415 }
1416
1417 if ((res = pstm_sub(&x, &t1, &x)) != PSTM_OKAY) {
1418 goto LBL_Q;
1419 }
1420
1421 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1422 if (x.sign == PSTM_NEG) {
1423 if ((res = pstm_copy(&y, &t1)) != PSTM_OKAY) {
1424 goto LBL_Q;
1425 }
1426 if ((res = pstm_lshd (&t1, i - t - 1)) != PSTM_OKAY) {
1427 goto LBL_Q;
1428 }
1429 if ((res = pstm_add (&x, &t1, &x)) != PSTM_OKAY) {
1430 goto LBL_Q;
1431 }
1432 q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
1433 }
1434 }
1435/*
1436 now q is the quotient and x is the remainder (which we have to normalize)
1437*/
1438 /* get sign before writing to c */
1439 x.sign = x.used == 0 ? PSTM_ZPOS : a->sign;
1440
1441 if (c != NULL) {
1442 pstm_clamp (&q);
1443 if (pstm_copy (&q, c) != PSTM_OKAY) {
1444 res = PS_MEM_FAIL;
1445 goto LBL_Q;
1446 }
1447 c->sign = neg;
1448 }
1449
1450 if (d != NULL) {
1451 if ((res = pstm_div_2d (pool, &x, norm, &x, NULL)) != PSTM_OKAY) {
1452 goto LBL_Q;
1453 }
1454/*
1455 the following is a kludge, essentially we were seeing the right
1456 remainder but with excess digits that should have been zero
1457 */
1458 for (i = b->used; i < x.used; i++) {
1459 x.dp[i] = 0;
1460 }
1461 pstm_clamp(&x);
1462 if (pstm_copy (&x, d) != PSTM_OKAY) {
1463 res = PS_MEM_FAIL;
1464 goto LBL_Q;
1465 }
1466 }
1467
1468 res = PSTM_OKAY;
1469
1470LBL_Q:pstm_clear (&q);
1471LBL_Y:pstm_clear (&y);
1472LBL_X:pstm_clear (&x);
1473LBL_T2:pstm_clear (&t2);
1474LBL_T1:pstm_clear (&t1);
1475
1476 return res;
1477}
1478
1479/******************************************************************************/
1480/*
1481 Swap the elements of two integers, for cases where you can't simply swap
1482 the pstm_int pointers around
1483*/
1484void pstm_exch(pstm_int * a, pstm_int * b)
1485{
1486 pstm_int t;
1487
1488 t = *a;
1489 *a = *b;
1490 *b = t;
1491}
1492
1493/******************************************************************************/
1494/*
1495 c = a mod b, 0 <= c < b
1496*/
1497int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
1498{
1499 pstm_int t;
1500 int32 err;
1501/*
1502 Smart-size
1503*/
1504 if ((err = pstm_init_size(pool, &t, b->alloc)) != PSTM_OKAY) {
1505 return err;
1506 }
1507 if ((err = pstm_div(pool, a, b, NULL, &t)) != PSTM_OKAY) {
1508 pstm_clear (&t);
1509 return err;
1510 }
1511 if (t.sign != b->sign) {
1512 err = pstm_add(&t, b, c);
1513 } else {
1514 pstm_exch (&t, c);
1515 }
1516 pstm_clear (&t);
1517 return err;
1518}
1519
1520/******************************************************************************/
1521/*
1522 d = a * b (mod c)
1523*/
1524int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1525 pstm_int *d)
1526{
1527 int32 res;
1528 int16 size;
1529 pstm_int tmp;
1530
1531/*
1532 Smart-size pstm_inits. d is an output that is influenced by this local 't'
1533 so don't shrink 'd' if it wants to becuase this will lead to an pstm_grow
1534 in RSA operations
1535*/
1536 size = a->used + b->used + 1;
1537 if ((a == d) && (size < a->alloc)) {
1538 size = a->alloc;
1539 }
1540 if ((res = pstm_init_size(pool, &tmp, size)) != PSTM_OKAY) {
1541 return res;
1542 }
1543 if ((res = pstm_mul_comba(pool, a, b, &tmp, NULL, 0)) != PSTM_OKAY) {
1544 pstm_clear(&tmp);
1545 return res;
1546 }
1547 res = pstm_mod(pool, &tmp, c, d);
1548 pstm_clear(&tmp);
1549 return res;
1550}
1551
1552/******************************************************************************/
1553/*
1554 * y = g**x (mod b)
1555 * Some restrictions... x must be positive and < b
1556 */
1557int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P,
1558 pstm_int *Y)
1559{
1560 pstm_int M[32], res; /* Keep this winsize based: (1 << max_winsize) */
1561 pstm_digit buf, mp;
1562 pstm_digit *paD;
1563 int32 err, bitbuf;
1564 int16 bitcpy, bitcnt, mode, digidx, x, y, winsize;
1565 uint32 paDlen;
1566
1567 /* set window size from what user set as optimization */
1568 x = pstm_count_bits(X);
1569 if (x < 50) {
1570 winsize = 2;
1571 } else {
1572 winsize = PS_EXPTMOD_WINSIZE;
1573 }
1574
1575 /* now setup montgomery */
1576 if ((err = pstm_montgomery_setup (P, &mp)) != PSTM_OKAY) {
1577 return err;
1578 }
1579
1580 /* setup result */
1581 if ((err = pstm_init_size(pool, &res, (P->used * 2) + 1)) != PSTM_OKAY) {
1582 return err;
1583 }
1584/*
1585 create M table
1586 The M table contains powers of the input base, e.g. M[x] = G^x mod P
1587 The first half of the table is not computed though except for M[0] and M[1]
1588 */
1589 /* now we need R mod m */
1590 if ((err = pstm_montgomery_calc_normalization (&res, P)) != PSTM_OKAY) {
1591 goto LBL_RES;
1592 }
1593/*
1594 init M array
1595 init first cell
1596 */
1597 if ((err = pstm_init_size(pool, &M[1], res.used)) != PSTM_OKAY) {
1598 goto LBL_RES;
1599 }
1600
1601 /* now set M[1] to G * R mod m */
1602 if (pstm_cmp_mag(P, G) != PSTM_GT) {
1603 /* G > P so we reduce it first */
1604 if ((err = pstm_mod(pool, G, P, &M[1])) != PSTM_OKAY) {
1605 goto LBL_M;
1606 }
1607 } else {
1608 if ((err = pstm_copy(G, &M[1])) != PSTM_OKAY) {
1609 goto LBL_M;
1610 }
1611 }
1612 if ((err = pstm_mulmod (pool, &M[1], &res, P, &M[1])) != PSTM_OKAY) {
1613 goto LBL_M;
1614 }
1615/*
1616 Pre-allocated digit. Used for mul, sqr, AND reduce
1617*/
1618 paDlen = ((M[1].used + 3) * 2) * sizeof(pstm_digit);
1619 paD = xzalloc(paDlen);
1620/*
1621 compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times
1622 */
1623 if (pstm_init_copy(pool, &M[1 << (winsize - 1)], &M[1], 1) != PSTM_OKAY) {
1624 err = PS_MEM_FAIL;
1625 goto LBL_PAD;
1626 }
1627 for (x = 0; x < (winsize - 1); x++) {
1628 if ((err = pstm_sqr_comba (pool, &M[1 << (winsize - 1)],
1629 &M[1 << (winsize - 1)], paD, paDlen)) != PSTM_OKAY) {
1630 goto LBL_PAD;
1631 }
1632 if ((err = pstm_montgomery_reduce(pool, &M[1 << (winsize - 1)], P, mp,
1633 paD, paDlen)) != PSTM_OKAY) {
1634 goto LBL_PAD;
1635 }
1636 }
1637/*
1638 now init the second half of the array
1639*/
1640 for (x = (1<<(winsize-1)) + 1; x < (1 << winsize); x++) {
1641 if ((err = pstm_init_size(pool, &M[x], M[1<<(winsize-1)].alloc + 1))
1642 != PSTM_OKAY) {
1643 for (y = 1<<(winsize-1); y < x; y++) {
1644 pstm_clear(&M[y]);
1645 }
1646 goto LBL_PAD;
1647 }
1648 }
1649
1650 /* create upper table */
1651 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1652 if ((err = pstm_mul_comba(pool, &M[x - 1], &M[1], &M[x], paD, paDlen))
1653 != PSTM_OKAY) {
1654 goto LBL_MARRAY;
1655 }
1656 if ((err = pstm_montgomery_reduce(pool, &M[x], P, mp, paD, paDlen)) !=
1657 PSTM_OKAY) {
1658 goto LBL_MARRAY;
1659 }
1660 }
1661
1662 /* set initial mode and bit cnt */
1663 mode = 0;
1664 bitcnt = 1;
1665 buf = 0;
1666 digidx = X->used - 1;
1667 bitcpy = 0;
1668 bitbuf = 0;
1669
1670 for (;;) {
1671 /* grab next digit as required */
1672 if (--bitcnt == 0) {
1673 /* if digidx == -1 we are out of digits so break */
1674 if (digidx == -1) {
1675 break;
1676 }
1677 /* read next digit and reset bitcnt */
1678 buf = X->dp[digidx--];
1679 bitcnt = (int32)DIGIT_BIT;
1680 }
1681
1682 /* grab the next msb from the exponent */
1683 y = (pstm_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1684 buf <<= (pstm_digit)1;
1685/*
1686 If the bit is zero and mode == 0 then we ignore it.
1687 These represent the leading zero bits before the first 1 bit
1688 in the exponent. Technically this opt is not required but it
1689 does lower the # of trivial squaring/reductions used
1690*/
1691 if (mode == 0 && y == 0) {
1692 continue;
1693 }
1694
1695 /* if the bit is zero and mode == 1 then we square */
1696 if (mode == 1 && y == 0) {
1697 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1698 PSTM_OKAY) {
1699 goto LBL_MARRAY;
1700 }
1701 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1702 != PSTM_OKAY) {
1703 goto LBL_MARRAY;
1704 }
1705 continue;
1706 }
1707
1708 /* else we add it to the window */
1709 bitbuf |= (y << (winsize - ++bitcpy));
1710 mode = 2;
1711
1712 if (bitcpy == winsize) {
1713 /* ok window is filled so square as required and mul square first */
1714 for (x = 0; x < winsize; x++) {
1715 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1716 PSTM_OKAY) {
1717 goto LBL_MARRAY;
1718 }
1719 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1720 paDlen)) != PSTM_OKAY) {
1721 goto LBL_MARRAY;
1722 }
1723 }
1724
1725 /* then multiply */
1726 if ((err = pstm_mul_comba(pool, &res, &M[bitbuf], &res, paD,
1727 paDlen)) != PSTM_OKAY) {
1728 goto LBL_MARRAY;
1729 }
1730 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1731 != PSTM_OKAY) {
1732 goto LBL_MARRAY;
1733 }
1734
1735 /* empty window and reset */
1736 bitcpy = 0;
1737 bitbuf = 0;
1738 mode = 1;
1739 }
1740 }
1741
1742 /* if bits remain then square/multiply */
1743 if (mode == 2 && bitcpy > 0) {
1744 /* square then multiply if the bit is set */
1745 for (x = 0; x < bitcpy; x++) {
1746 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1747 PSTM_OKAY) {
1748 goto LBL_MARRAY;
1749 }
1750 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1751 != PSTM_OKAY) {
1752 goto LBL_MARRAY;
1753 }
1754
1755 /* get next bit of the window */
1756 bitbuf <<= 1;
1757 if ((bitbuf & (1 << winsize)) != 0) {
1758 /* then multiply */
1759 if ((err = pstm_mul_comba(pool, &res, &M[1], &res, paD, paDlen))
1760 != PSTM_OKAY) {
1761 goto LBL_MARRAY;
1762 }
1763 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1764 paDlen)) != PSTM_OKAY) {
1765 goto LBL_MARRAY;
1766 }
1767 }
1768 }
1769 }
1770/*
1771 Fix up result if Montgomery reduction is used recall that any value in a
1772 Montgomery system is actually multiplied by R mod n. So we have to reduce
1773 one more time to cancel out the factor of R.
1774*/
1775 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) !=
1776 PSTM_OKAY) {
1777 goto LBL_MARRAY;
1778 }
1779 /* swap res with Y */
1780 if ((err = pstm_copy (&res, Y)) != PSTM_OKAY) {
1781 goto LBL_MARRAY;
1782 }
1783 err = PSTM_OKAY;
1784LBL_MARRAY:
1785 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1786 pstm_clear(&M[x]);
1787 }
1788LBL_PAD:psFree(paD, pool);
1789LBL_M: pstm_clear(&M[1]);
1790LBL_RES:pstm_clear(&res);
1791 return err;
1792}
1793
1794/******************************************************************************/
1795/*
1796
1797*/
1798int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
1799{
1800 int32 res;
1801 int16 sa, sb;
1802
1803 /* get sign of both inputs */
1804 sa = a->sign;
1805 sb = b->sign;
1806
1807 /* handle two cases, not four */
1808 if (sa == sb) {
1809 /* both positive or both negative, add their mags, copy the sign */
1810 c->sign = sa;
1811 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
1812 return res;
1813 }
1814 } else {
1815/*
1816 one positive, the other negative
1817 subtract the one with the greater magnitude from the one of the lesser
1818 magnitude. The result gets the sign of the one with the greater mag.
1819 */
1820 if (pstm_cmp_mag (a, b) == PSTM_LT) {
1821 c->sign = sb;
1822 if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
1823 return res;
1824 }
1825 } else {
1826 c->sign = sa;
1827 if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
1828 return res;
1829 }
1830 }
1831 }
1832 return PS_SUCCESS;
1833}
1834
1835/******************************************************************************/
1836/*
1837 reverse an array, used for radix code
1838*/
1839static void pstm_reverse (unsigned char *s, int16 len)
1840{
1841 int32 ix, iy;
1842 unsigned char t;
1843
1844 ix = 0;
1845 iy = len - 1;
1846 while (ix < iy) {
1847 t = s[ix];
1848 s[ix] = s[iy];
1849 s[iy] = t;
1850 ++ix;
1851 --iy;
1852 }
1853}
1854/******************************************************************************/
1855/*
1856 No reverse. Useful in some of the EIP-154 PKA stuff where special byte
1857 order seems to come into play more often
1858*/
1859int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a, unsigned char *b)
1860{
1861 int32 res;
1862 int16 x;
1863 pstm_int t = { 0 };
1864
1865 if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1866 return res;
1867 }
1868
1869 x = 0;
1870 while (pstm_iszero (&t) == 0) {
1871 b[x++] = (unsigned char) (t.dp[0] & 255);
1872 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1873 pstm_clear(&t);
1874 return res;
1875 }
1876 }
1877 pstm_clear(&t);
1878 return PS_SUCCESS;
1879}
1880/******************************************************************************/
1881/*
1882
1883*/
1884int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a, unsigned char *b)
1885{
1886 int32 res;
1887 int16 x;
1888 pstm_int t = { 0 };
1889
1890 if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1891 return res;
1892 }
1893
1894 x = 0;
1895 while (pstm_iszero (&t) == 0) {
1896 b[x++] = (unsigned char) (t.dp[0] & 255);
1897 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1898 pstm_clear(&t);
1899 return res;
1900 }
1901 }
1902 pstm_reverse (b, x);
1903 pstm_clear(&t);
1904 return PS_SUCCESS;
1905}
1906
1907/******************************************************************************/
1908/*
1909 compare against a single digit
1910*/
1911int32 pstm_cmp_d(pstm_int *a, pstm_digit b)
1912{
1913 /* compare based on sign */
1914 if ((b && a->used == 0) || a->sign == PSTM_NEG) {
1915 return PSTM_LT;
1916 }
1917
1918 /* compare based on magnitude */
1919 if (a->used > 1) {
1920 return PSTM_GT;
1921 }
1922
1923 /* compare the only digit of a to b */
1924 if (a->dp[0] > b) {
1925 return PSTM_GT;
1926 } else if (a->dp[0] < b) {
1927 return PSTM_LT;
1928 } else {
1929 return PSTM_EQ;
1930 }
1931}
1932
1933/*
1934 Need invmod for ECC and also private key loading for hardware crypto
1935 in cases where dQ > dP. The values must be switched and a new qP must be
1936 calculated using this function
1937*/
1938static int32 pstm_invmod_slow(psPool_t *pool, pstm_int * a, pstm_int * b,
1939 pstm_int * c)
1940{
1941 pstm_int x, y, u, v, A, B, C, D;
1942 int32 res;
1943
1944 /* b cannot be negative */
1945 if (b->sign == PSTM_NEG || pstm_iszero(b) == 1) {
1946 return PS_LIMIT_FAIL;
1947 }
1948
1949 /* init temps */
1950 if (pstm_init_size(pool, &x, b->used) != PSTM_OKAY) {
1951 return PS_MEM_FAIL;
1952 }
1953
1954 /* x = a, y = b */
1955 if ((res = pstm_mod(pool, a, b, &x)) != PSTM_OKAY) {
1956 goto LBL_X;
1957 }
1958
1959 if (pstm_init_copy(pool, &y, b, 0) != PSTM_OKAY) {
1960 goto LBL_X;
1961 }
1962
1963 /* 2. [modified] if x,y are both even then return an error! */
1964 if (pstm_iseven (&x) == 1 && pstm_iseven (&y) == 1) {
1965 res = PS_FAILURE;
1966 goto LBL_Y;
1967 }
1968
1969 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1970 if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
1971 goto LBL_Y;
1972 }
1973 if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
1974 goto LBL_U;
1975 }
1976
1977 if ((res = pstm_init_size(pool, &A, sizeof(pstm_digit))) != PSTM_OKAY) {
1978 goto LBL_V;
1979 }
1980
1981 if ((res = pstm_init_size(pool, &D, sizeof(pstm_digit))) != PSTM_OKAY) {
1982 goto LBL_A;
1983 }
1984 pstm_set (&A, 1);
1985 pstm_set (&D, 1);
1986
1987 if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
1988 goto LBL_D;
1989 }
1990 if ((res = pstm_init(pool, &C)) != PSTM_OKAY) {
1991 goto LBL_B;
1992 }
1993
1994top:
1995 /* 4. while u is even do */
1996 while (pstm_iseven (&u) == 1) {
1997 /* 4.1 u = u/2 */
1998 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
1999 goto LBL_C;
2000 }
2001
2002 /* 4.2 if A or B is odd then */
2003 if (pstm_isodd (&A) == 1 || pstm_isodd (&B) == 1) {
2004 /* A = (A+y)/2, B = (B-x)/2 */
2005 if ((res = pstm_add (&A, &y, &A)) != PSTM_OKAY) {
2006 goto LBL_C;
2007 }
2008 if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2009 goto LBL_C;
2010 }
2011 }
2012 /* A = A/2, B = B/2 */
2013 if ((res = pstm_div_2 (&A, &A)) != PSTM_OKAY) {
2014 goto LBL_C;
2015 }
2016 if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) {
2017 goto LBL_C;
2018 }
2019 }
2020
2021 /* 5. while v is even do */
2022 while (pstm_iseven (&v) == 1) {
2023 /* 5.1 v = v/2 */
2024 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2025 goto LBL_C;
2026 }
2027
2028 /* 5.2 if C or D is odd then */
2029 if (pstm_isodd (&C) == 1 || pstm_isodd (&D) == 1) {
2030 /* C = (C+y)/2, D = (D-x)/2 */
2031 if ((res = pstm_add (&C, &y, &C)) != PSTM_OKAY) {
2032 goto LBL_C;
2033 }
2034 if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2035 goto LBL_C;
2036 }
2037 }
2038 /* C = C/2, D = D/2 */
2039 if ((res = pstm_div_2 (&C, &C)) != PSTM_OKAY) {
2040 goto LBL_C;
2041 }
2042 if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) {
2043 goto LBL_C;
2044 }
2045 }
2046
2047 /* 6. if u >= v then */
2048 if (pstm_cmp (&u, &v) != PSTM_LT) {
2049 /* u = u - v, A = A - C, B = B - D */
2050 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2051 goto LBL_C;
2052 }
2053 if ((res = pstm_sub (&A, &C, &A)) != PSTM_OKAY) {
2054 goto LBL_C;
2055 }
2056 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2057 goto LBL_C;
2058 }
2059 } else {
2060 /* v - v - u, C = C - A, D = D - B */
2061 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2062 goto LBL_C;
2063 }
2064 if ((res = pstm_sub (&C, &A, &C)) != PSTM_OKAY) {
2065 goto LBL_C;
2066 }
2067 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2068 goto LBL_C;
2069 }
2070 }
2071
2072 /* if not zero goto step 4 */
2073 if (pstm_iszero (&u) == 0)
2074 goto top;
2075
2076 /* now a = C, b = D, gcd == g*v */
2077
2078 /* if v != 1 then there is no inverse */
2079 if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2080 res = PS_FAILURE;
2081 goto LBL_C;
2082 }
2083
2084 /* if its too low */
2085 while (pstm_cmp_d(&C, 0) == PSTM_LT) {
2086 if ((res = pstm_add(&C, b, &C)) != PSTM_OKAY) {
2087 goto LBL_C;
2088 }
2089 }
2090
2091 /* too big */
2092 while (pstm_cmp_mag(&C, b) != PSTM_LT) {
2093 if ((res = pstm_sub(&C, b, &C)) != PSTM_OKAY) {
2094 goto LBL_C;
2095 }
2096 }
2097
2098 /* C is now the inverse */
2099 if ((res = pstm_copy(&C, c)) != PSTM_OKAY) {
2100 goto LBL_C;
2101 }
2102 res = PSTM_OKAY;
2103
2104LBL_C: pstm_clear(&C);
2105LBL_D: pstm_clear(&D);
2106LBL_B: pstm_clear(&B);
2107LBL_A: pstm_clear(&A);
2108LBL_V: pstm_clear(&v);
2109LBL_U: pstm_clear(&u);
2110LBL_Y: pstm_clear(&y);
2111LBL_X: pstm_clear(&x);
2112
2113 return res;
2114}
2115
2116/* c = 1/a (mod b) for odd b only */
2117int32 pstm_invmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
2118{
2119 pstm_int x, y, u, v, B, D;
2120 int32 res;
2121 uint16 neg, sanity;
2122
2123 /* 2. [modified] b must be odd */
2124 if (pstm_iseven (b) == 1) {
2125 return pstm_invmod_slow(pool, a,b,c);
2126 }
2127
2128 /* x == modulus, y == value to invert */
2129 if ((res = pstm_init_copy(pool, &x, b, 0)) != PSTM_OKAY) {
2130 return res;
2131 }
2132
2133 if ((res = pstm_init_size(pool, &y, a->alloc)) != PSTM_OKAY) {
2134 goto LBL_X;
2135 }
2136
2137 /* we need y = |a| */
2138 pstm_abs(a, &y);
2139
2140 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2141 if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
2142 goto LBL_Y;
2143 }
2144 if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
2145 goto LBL_U;
2146 }
2147 if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
2148 goto LBL_V;
2149 }
2150 if ((res = pstm_init(pool, &D)) != PSTM_OKAY) {
2151 goto LBL_B;
2152 }
2153
2154 pstm_set (&D, 1);
2155
2156 sanity = 0;
2157top:
2158 /* 4. while u is even do */
2159 while (pstm_iseven (&u) == 1) {
2160 /* 4.1 u = u/2 */
2161 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
2162 goto LBL_D;
2163 }
2164
2165 /* 4.2 if B is odd then */
2166 if (pstm_isodd (&B) == 1) {
2167 if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2168 goto LBL_D;
2169 }
2170 }
2171 /* B = B/2 */
2172 if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) {
2173 goto LBL_D;
2174 }
2175 }
2176
2177 /* 5. while v is even do */
2178 while (pstm_iseven (&v) == 1) {
2179 /* 5.1 v = v/2 */
2180 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2181 goto LBL_D;
2182 }
2183 /* 5.2 if D is odd then */
2184 if (pstm_isodd (&D) == 1) {
2185 /* D = (D-x)/2 */
2186 if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2187 goto LBL_D;
2188 }
2189 }
2190 /* D = D/2 */
2191 if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) {
2192 goto LBL_D;
2193 }
2194 }
2195
2196 /* 6. if u >= v then */
2197 if (pstm_cmp (&u, &v) != PSTM_LT) {
2198 /* u = u - v, B = B - D */
2199 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2200 goto LBL_D;
2201 }
2202 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2203 goto LBL_D;
2204 }
2205 } else {
2206 /* v - v - u, D = D - B */
2207 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2208 goto LBL_D;
2209 }
2210 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2211 goto LBL_D;
2212 }
2213 }
2214
2215 /* if not zero goto step 4 */
2216 if (sanity++ > 1000) {
2217 res = PS_LIMIT_FAIL;
2218 goto LBL_D;
2219 }
2220 if (pstm_iszero (&u) == 0) {
2221 goto top;
2222 }
2223
2224 /* now a = C, b = D, gcd == g*v */
2225
2226 /* if v != 1 then there is no inverse */
2227 if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2228 res = PS_FAILURE;
2229 goto LBL_D;
2230 }
2231
2232 /* b is now the inverse */
2233 neg = a->sign;
2234 while (D.sign == PSTM_NEG) {
2235 if ((res = pstm_add (&D, b, &D)) != PSTM_OKAY) {
2236 goto LBL_D;
2237 }
2238 }
2239 if ((res = pstm_copy (&D, c)) != PSTM_OKAY) {
2240 goto LBL_D;
2241 }
2242 c->sign = neg;
2243 res = PSTM_OKAY;
2244
2245LBL_D: pstm_clear(&D);
2246LBL_B: pstm_clear(&B);
2247LBL_V: pstm_clear(&v);
2248LBL_U: pstm_clear(&u);
2249LBL_Y: pstm_clear(&y);
2250LBL_X: pstm_clear(&x);
2251 return res;
2252}
2253#endif /* !DISABLE_PSTM */
2254/******************************************************************************/
diff --git a/networking/tls_pstm.h b/networking/tls_pstm.h
new file mode 100644
index 000000000..1affc1b69
--- /dev/null
+++ b/networking/tls_pstm.h
@@ -0,0 +1,238 @@
1/**
2 * @file pstm.h
3 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
4 *
5 * multiple-precision integer library.
6 */
7/*
8 * Copyright (c) 2013-2015 INSIDE Secure Corporation
9 * Copyright (c) PeerSec Networks, 2002-2011
10 * All Rights Reserved
11 *
12 * The latest version of this code is available at http://www.matrixssl.org
13 *
14 * This software is open source; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This General Public License does NOT permit incorporating this software
20 * into proprietary programs. If you are unable to comply with the GPL, a
21 * commercial license for this software may be purchased from INSIDE at
22 * http://www.insidesecure.com/eng/Company/Locations
23 *
24 * This program is distributed in WITHOUT ANY WARRANTY; without even the
25 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
26 * See the GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 * http://www.gnu.org/copyleft/gpl.html
32 */
33/******************************************************************************/
34
35#ifndef _h_PSTMATH
36#define _h_PSTMATH
37#ifndef DISABLE_PSTM
38
39/* Define this here to avoid including circular limits.h on some platforms */
40#ifndef CHAR_BIT
41#define CHAR_BIT 8
42#endif
43
44/******************************************************************************/
45/*
46 If native 64 bit integers are not supported, we do not support 32x32->64
47 in hardware, so we must set the 16 bit flag to produce 16x16->32 products.
48*/
49#ifndef HAVE_NATIVE_INT64
50 #define PSTM_16BIT
51#endif /* ! HAVE_NATIVE_INT64 */
52
53/******************************************************************************/
54/*
55 Some default configurations.
56
57 pstm_word should be the largest value the processor can hold as the product
58 of a multiplication. Most platforms support a 32x32->64 MAC instruction,
59 so 64bits is the default pstm_word size.
60 pstm_digit should be half the size of pstm_word
61 */
62#ifdef PSTM_8BIT
63/* 8-bit digits, 16-bit word products */
64 typedef unsigned char pstm_digit;
65 typedef unsigned short pstm_word;
66 #define DIGIT_BIT 8
67
68#elif defined(PSTM_16BIT)
69/* 16-bit digits, 32-bit word products */
70 typedef unsigned short pstm_digit;
71 typedef unsigned long pstm_word;
72 #define DIGIT_BIT 16
73
74#elif defined(PSTM_64BIT)
75/* 64-bit digits, 128-bit word products */
76 #ifndef __GNUC__
77 #error "64bit digits requires GCC"
78 #endif
79 typedef unsigned long pstm_digit;
80 typedef unsigned long pstm_word __attribute__ ((mode(TI)));
81 #define DIGIT_BIT 64
82
83#else
84/* This is the default case, 32-bit digits, 64-bit word products */
85 typedef uint32 pstm_digit;
86 typedef uint64 pstm_word;
87 #define DIGIT_BIT 32
88 #define PSTM_32BIT
89#endif /* digit and word size */
90
91#define PSTM_MASK (pstm_digit)(-1)
92#define PSTM_DIGIT_MAX PSTM_MASK
93
94/******************************************************************************/
95/*
96 equalities
97 */
98#define PSTM_LT -1 /* less than */
99#define PSTM_EQ 0 /* equal to */
100#define PSTM_GT 1 /* greater than */
101
102#define PSTM_ZPOS 0 /* positive integer */
103#define PSTM_NEG 1 /* negative */
104
105#define PSTM_OKAY PS_SUCCESS
106#define PSTM_MEM PS_MEM_FAIL
107
108/******************************************************************************/
109/*
110 Various build options
111 */
112#define PSTM_DEFAULT_INIT 64 /* default (64) digits of allocation */
113#define PSTM_MAX_SIZE 4096
114
115typedef struct {
116 int16 used, alloc, sign;
117 pstm_digit *dp;
118 psPool_t *pool;
119} pstm_int;
120
121/******************************************************************************/
122/*
123 Operations on large integers
124 */
125#define pstm_iszero(a) (((a)->used == 0) ? PS_TRUE : PS_FALSE)
126#define pstm_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? PS_TRUE : PS_FALSE)
127#define pstm_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? PS_TRUE : PS_FALSE)
128#define pstm_abs(a, b) { pstm_copy(a, b); (b)->sign = 0; }
129
130extern void pstm_set(pstm_int *a, pstm_digit b);
131
132extern void pstm_zero(pstm_int * a);
133
134extern int32 pstm_init(psPool_t *pool, pstm_int * a);
135
136extern int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size);
137
138extern int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b,
139 int16 toSqr);
140
141extern int16 pstm_count_bits (pstm_int * a);
142
143extern int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a,
144 uint32 len);
145
146extern int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c);
147
148extern int32 pstm_unsigned_bin_size(pstm_int *a);
149
150extern int32 pstm_copy(pstm_int * a, pstm_int * b);
151
152extern void pstm_exch(pstm_int * a, pstm_int * b);
153
154extern void pstm_clear(pstm_int * a);
155
156extern void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2,
157 pstm_int *mp3, pstm_int *mp4, pstm_int *mp5, pstm_int *mp6,
158 pstm_int *mp7);
159
160extern int32 pstm_grow(pstm_int * a, int16 size);
161
162extern void pstm_clamp(pstm_int * a);
163
164extern int32 pstm_cmp(pstm_int * a, pstm_int * b);
165
166extern int32 pstm_cmp_mag(pstm_int * a, pstm_int * b);
167
168extern void pstm_rshd(pstm_int *a, int16 x);
169
170extern int32 pstm_lshd(pstm_int * a, int16 b);
171
172extern int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
173 pstm_int *d);
174
175extern int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c,
176 pstm_int *d);
177
178extern int32 pstm_div_2(pstm_int * a, pstm_int * b);
179
180extern int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c);
181
182extern int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c);
183
184extern int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c);
185
186extern int32 pstm_mul_2(pstm_int * a, pstm_int * b);
187
188extern int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c);
189
190extern int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
191 pstm_int *d);
192
193extern int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P,
194 pstm_int *Y);
195
196extern int32 pstm_2expt(pstm_int *a, int16 b);
197
198extern int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c);
199
200extern int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a,
201 unsigned char *b);
202
203extern int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a,
204 unsigned char *b);
205
206extern int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho);
207
208///bbox: pool unused
209#define pstm_montgomery_reduce(pool, a, m, mp, paD, paDlen) \
210 pstm_montgomery_reduce( a, m, mp, paD, paDlen)
211extern int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
212 pstm_digit mp, pstm_digit *paD, uint32 paDlen);
213
214#define pstm_mul_comba(pool, A, B, C, paD, paDlen) \
215 pstm_mul_comba( A, B, C, paD, paDlen)
216extern int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B,
217 pstm_int *C, pstm_digit *paD, uint32 paDlen);
218
219///bbox: pool unused
220#define pstm_sqr_comba(pool, A, B, paD, paDlen) \
221 pstm_sqr_comba( A, B, paD, paDlen)
222extern int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B,
223 pstm_digit *paD, uint32 paDlen);
224
225extern int32 pstm_cmp_d(pstm_int *a, pstm_digit b);
226
227extern int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b);
228
229extern int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c);
230
231extern int32 pstm_invmod(psPool_t *pool, pstm_int * a, pstm_int * b,
232 pstm_int * c);
233
234#else /* DISABLE_PSTM */
235 typedef int32 pstm_int;
236#endif /* !DISABLE_PSTM */
237#endif /* _h_PSTMATH */
238
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c
new file mode 100644
index 000000000..c231c4ddf
--- /dev/null
+++ b/networking/tls_pstm_montgomery_reduce.c
@@ -0,0 +1,423 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8/**
9 * @file pstm_montgomery_reduce.c
10 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
11 *
12 * Multiprecision Montgomery Reduction.
13 */
14/*
15 * Copyright (c) 2013-2015 INSIDE Secure Corporation
16 * Copyright (c) PeerSec Networks, 2002-2011
17 * All Rights Reserved
18 *
19 * The latest version of this code is available at http://www.matrixssl.org
20 *
21 * This software is open source; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This General Public License does NOT permit incorporating this software
27 * into proprietary programs. If you are unable to comply with the GPL, a
28 * commercial license for this software may be purchased from INSIDE at
29 * http://www.insidesecure.com/eng/Company/Locations
30 *
31 * This program is distributed in WITHOUT ANY WARRANTY; without even the
32 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33 * See the GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 * http://www.gnu.org/copyleft/gpl.html
39 */
40/******************************************************************************/
41
42///bbox
43//#include "../cryptoApi.h"
44#ifndef DISABLE_PSTM
45
46/******************************************************************************/
47
48#if defined(PSTM_X86)
49/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
50#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
51#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
52#endif
53//#pragma message ("Using 32 bit x86 Assembly Optimizations")
54
55#define MONT_START
56#define MONT_FINI
57#define LOOP_END
58#define LOOP_START \
59 mu = c[x] * mp
60
61#define INNERMUL \
62asm( \
63 "movl %5,%%eax \n\t" \
64 "mull %4 \n\t" \
65 "addl %1,%%eax \n\t" \
66 "adcl $0,%%edx \n\t" \
67 "addl %%eax,%0 \n\t" \
68 "adcl $0,%%edx \n\t" \
69 "movl %%edx,%1 \n\t" \
70:"=g"(_c[LO]), "=r"(cy) \
71:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
72: "%eax", "%edx", "%cc")
73
74#define PROPCARRY \
75asm( \
76 "addl %1,%0 \n\t" \
77 "setb %%al \n\t" \
78 "movzbl %%al,%1 \n\t" \
79:"=g"(_c[LO]), "=r"(cy) \
80:"0"(_c[LO]), "1"(cy) \
81: "%eax", "%cc")
82
83/******************************************************************************/
84#elif defined(PSTM_X86_64)
85/* x86-64 optimized */
86#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
87#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
88#endif
89//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
90
91#define MONT_START
92#define MONT_FINI
93#define LOOP_END
94#define LOOP_START \
95mu = c[x] * mp
96
97#define INNERMUL \
98asm( \
99 "movq %5,%%rax \n\t" \
100 "mulq %4 \n\t" \
101 "addq %1,%%rax \n\t" \
102 "adcq $0,%%rdx \n\t" \
103 "addq %%rax,%0 \n\t" \
104 "adcq $0,%%rdx \n\t" \
105 "movq %%rdx,%1 \n\t" \
106 :"=g"(_c[LO]), "=r"(cy) \
107 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
108 : "%rax", "%rdx", "cc")
109
110#define INNERMUL8 \
111asm( \
112 "movq 0(%5),%%rax \n\t" \
113 "movq 0(%2),%%r10 \n\t" \
114 "movq 0x8(%5),%%r11 \n\t" \
115 "mulq %4 \n\t" \
116 "addq %%r10,%%rax \n\t" \
117 "adcq $0,%%rdx \n\t" \
118 "movq 0x8(%2),%%r10 \n\t" \
119 "addq %3,%%rax \n\t" \
120 "adcq $0,%%rdx \n\t" \
121 "movq %%rax,0(%0) \n\t" \
122 "movq %%rdx,%1 \n\t" \
123 \
124 "movq %%r11,%%rax \n\t" \
125 "movq 0x10(%5),%%r11 \n\t" \
126 "mulq %4 \n\t" \
127 "addq %%r10,%%rax \n\t" \
128 "adcq $0,%%rdx \n\t" \
129 "movq 0x10(%2),%%r10 \n\t" \
130 "addq %3,%%rax \n\t" \
131 "adcq $0,%%rdx \n\t" \
132 "movq %%rax,0x8(%0) \n\t" \
133 "movq %%rdx,%1 \n\t" \
134 \
135 "movq %%r11,%%rax \n\t" \
136 "movq 0x18(%5),%%r11 \n\t" \
137 "mulq %4 \n\t" \
138 "addq %%r10,%%rax \n\t" \
139 "adcq $0,%%rdx \n\t" \
140 "movq 0x18(%2),%%r10 \n\t" \
141 "addq %3,%%rax \n\t" \
142 "adcq $0,%%rdx \n\t" \
143 "movq %%rax,0x10(%0) \n\t" \
144 "movq %%rdx,%1 \n\t" \
145 \
146 "movq %%r11,%%rax \n\t" \
147 "movq 0x20(%5),%%r11 \n\t" \
148 "mulq %4 \n\t" \
149 "addq %%r10,%%rax \n\t" \
150 "adcq $0,%%rdx \n\t" \
151 "movq 0x20(%2),%%r10 \n\t" \
152 "addq %3,%%rax \n\t" \
153 "adcq $0,%%rdx \n\t" \
154 "movq %%rax,0x18(%0) \n\t" \
155 "movq %%rdx,%1 \n\t" \
156 \
157 "movq %%r11,%%rax \n\t" \
158 "movq 0x28(%5),%%r11 \n\t" \
159 "mulq %4 \n\t" \
160 "addq %%r10,%%rax \n\t" \
161 "adcq $0,%%rdx \n\t" \
162 "movq 0x28(%2),%%r10 \n\t" \
163 "addq %3,%%rax \n\t" \
164 "adcq $0,%%rdx \n\t" \
165 "movq %%rax,0x20(%0) \n\t" \
166 "movq %%rdx,%1 \n\t" \
167 \
168 "movq %%r11,%%rax \n\t" \
169 "movq 0x30(%5),%%r11 \n\t" \
170 "mulq %4 \n\t" \
171 "addq %%r10,%%rax \n\t" \
172 "adcq $0,%%rdx \n\t" \
173 "movq 0x30(%2),%%r10 \n\t" \
174 "addq %3,%%rax \n\t" \
175 "adcq $0,%%rdx \n\t" \
176 "movq %%rax,0x28(%0) \n\t" \
177 "movq %%rdx,%1 \n\t" \
178 \
179 "movq %%r11,%%rax \n\t" \
180 "movq 0x38(%5),%%r11 \n\t" \
181 "mulq %4 \n\t" \
182 "addq %%r10,%%rax \n\t" \
183 "adcq $0,%%rdx \n\t" \
184 "movq 0x38(%2),%%r10 \n\t" \
185 "addq %3,%%rax \n\t" \
186 "adcq $0,%%rdx \n\t" \
187 "movq %%rax,0x30(%0) \n\t" \
188 "movq %%rdx,%1 \n\t" \
189 \
190 "movq %%r11,%%rax \n\t" \
191 "mulq %4 \n\t" \
192 "addq %%r10,%%rax \n\t" \
193 "adcq $0,%%rdx \n\t" \
194 "addq %3,%%rax \n\t" \
195 "adcq $0,%%rdx \n\t" \
196 "movq %%rax,0x38(%0) \n\t" \
197 "movq %%rdx,%1 \n\t" \
198 \
199 :"=r"(_c), "=r"(cy) \
200 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
201 : "%rax", "%rdx", "%r10", "%r11", "cc")
202
203#define PROPCARRY \
204asm( \
205 "addq %1,%0 \n\t" \
206 "setb %%al \n\t" \
207 "movzbq %%al,%1 \n\t" \
208 :"=g"(_c[LO]), "=r"(cy) \
209 :"0"(_c[LO]), "1"(cy) \
210 : "%rax", "cc")
211
212/******************************************************************************/
213#elif defined(PSTM_ARM)
214
215#define MONT_START
216#define MONT_FINI
217#define LOOP_END
218#define LOOP_START \
219mu = c[x] * mp
220
221#ifdef __thumb2__
222//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
223#define INNERMUL \
224asm( \
225 " LDR r0,%1 \n\t" \
226 " ADDS r0,r0,%0 \n\t" \
227 " ITE CS \n\t" \
228 " MOVCS %0,#1 \n\t" \
229 " MOVCC %0,#0 \n\t" \
230 " UMLAL r0,%0,%3,%4 \n\t" \
231 " STR r0,%1 \n\t" \
232 :"=r"(cy),"=m"(_c[0])\
233 :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
234 :"r0","%cc");
235#define PROPCARRY \
236asm( \
237 " LDR r0,%1 \n\t" \
238 " ADDS r0,r0,%0 \n\t" \
239 " STR r0,%1 \n\t" \
240 " ITE CS \n\t" \
241 " MOVCS %0,#1 \n\t" \
242 " MOVCC %0,#0 \n\t" \
243 :"=r"(cy),"=m"(_c[0])\
244 :"0"(cy),"m"(_c[0])\
245 :"r0","%cc");
246#else /* Non-Thumb2 code */
247//#pragma message ("Using 32 bit ARM Assembly Optimizations")
248#define INNERMUL \
249asm( \
250 " LDR r0,%1 \n\t" \
251 " ADDS r0,r0,%0 \n\t" \
252 " MOVCS %0,#1 \n\t" \
253 " MOVCC %0,#0 \n\t" \
254 " UMLAL r0,%0,%3,%4 \n\t" \
255 " STR r0,%1 \n\t" \
256 :"=r"(cy),"=m"(_c[0])\
257 :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
258 :"r0","%cc");
259#define PROPCARRY \
260asm( \
261 " LDR r0,%1 \n\t" \
262 " ADDS r0,r0,%0 \n\t" \
263 " STR r0,%1 \n\t" \
264 " MOVCS %0,#1 \n\t" \
265 " MOVCC %0,#0 \n\t" \
266 :"=r"(cy),"=m"(_c[0])\
267 :"0"(cy),"m"(_c[0])\
268 :"r0","%cc");
269#endif /* __thumb2__ */
270
271
272/******************************************************************************/
273#elif defined(PSTM_MIPS)
274/* MIPS32 */
275//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
276#define MONT_START
277#define MONT_FINI
278#define LOOP_END
279#define LOOP_START \
280mu = c[x] * mp
281
282#define INNERMUL \
283asm( \
284 " multu %3,%4 \n\t" \
285 " mflo $12 \n\t" \
286 " mfhi $13 \n\t" \
287 " addu $12,$12,%0 \n\t" \
288 " sltu $10,$12,%0 \n\t" \
289 " addu $13,$13,$10 \n\t" \
290 " lw $10,%1 \n\t" \
291 " addu $12,$12,$10 \n\t" \
292 " sltu $10,$12,$10 \n\t" \
293 " addu %0,$13,$10 \n\t" \
294 " sw $12,%1 \n\t" \
295 :"=r"(cy),"=m"(_c[0])\
296 :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
297 :"$10","$12","$13")\
298; ++tmpm;
299
300#define PROPCARRY \
301asm( \
302 " lw $10,%1 \n\t" \
303 " addu $10,$10,%0 \n\t" \
304 " sw $10,%1 \n\t" \
305 " sltu %0,$10,%0 \n\t" \
306 :"=r"(cy),"=m"(_c[0])\
307 :"r"(cy),"r"(_c[0])\
308 :"$10");
309
310
311/******************************************************************************/
312#else
313
314/* ISO C code */
315#define MONT_START
316#define MONT_FINI
317#define LOOP_END
318#define LOOP_START \
319 mu = c[x] * mp
320
321#define INNERMUL \
322 do { pstm_word t; \
323 t = ((pstm_word)_c[0] + (pstm_word)cy) + \
324 (((pstm_word)mu) * ((pstm_word)*tmpm++)); \
325 _c[0] = (pstm_digit)t; \
326 cy = (pstm_digit)(t >> DIGIT_BIT); \
327 } while (0)
328
329#define PROPCARRY \
330 do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
331
332#endif
333
334/******************************************************************************/
335
336#define LO 0
337
338/* computes x/R == x (mod N) via Montgomery Reduction */
339int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
340 pstm_digit mp, pstm_digit *paD, uint32 paDlen)
341{
342 pstm_digit *c, *_c, *tmpm, mu;
343 int32 oldused, x, y;
344 int16 pa;
345
346 pa = m->used;
347 if (pa > a->alloc) {
348 /* Sanity test for bad numbers. This will confirm no buffer overruns */
349 return PS_LIMIT_FAIL;
350 }
351
352 if (paD && paDlen >= (uint32)2*pa+1) {
353 c = paD;
354 memset(c, 0x0, paDlen);
355 } else {
356 c = xzalloc(2*pa+1);
357 }
358 /* copy the input */
359 oldused = a->used;
360 for (x = 0; x < oldused; x++) {
361 c[x] = a->dp[x];
362 }
363
364 MONT_START;
365
366 for (x = 0; x < pa; x++) {
367 pstm_digit cy = 0;
368 /* get Mu for this round */
369 LOOP_START;
370 _c = c + x;
371 tmpm = m->dp;
372 y = 0;
373#ifdef PSTM_X86_64
374 for (; y < (pa & ~7); y += 8) {
375 INNERMUL8;
376 _c += 8;
377 tmpm += 8;
378 }
379#endif /* PSTM_X86_64 */
380 for (; y < pa; y++) {
381 INNERMUL;
382 ++_c;
383 }
384 LOOP_END;
385 while (cy) {
386 PROPCARRY;
387 ++_c;
388 }
389 }
390
391 /* now copy out */
392 _c = c + pa;
393 tmpm = a->dp;
394 for (x = 0; x < pa+1; x++) {
395 *tmpm++ = *_c++;
396 }
397
398 for (; x < oldused; x++) {
399 *tmpm++ = 0;
400 }
401
402 MONT_FINI;
403
404 a->used = pa+1;
405 pstm_clamp(a);
406
407 /* reuse x as return code */
408 x = PSTM_OKAY;
409
410 /* if A >= m then A = A - m */
411 if (pstm_cmp_mag (a, m) != PSTM_LT) {
412 if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
413 x = PS_MEM_FAIL;
414 }
415 }
416 if (paDlen < (uint32)2*pa+1) {
417 psFree(c, pool);
418 }
419 return x;
420}
421
422#endif /* !DISABLE_PSTM */
423/******************************************************************************/
diff --git a/networking/tls_pstm_mul_comba.c b/networking/tls_pstm_mul_comba.c
new file mode 100644
index 000000000..6e051baeb
--- /dev/null
+++ b/networking/tls_pstm_mul_comba.c
@@ -0,0 +1,777 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8/**
9 * @file pstm_mul_comba.c
10 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
11 *
12 * Multiprecision multiplication with Comba technique.
13 */
14/*
15 * Copyright (c) 2013-2015 INSIDE Secure Corporation
16 * Copyright (c) PeerSec Networks, 2002-2011
17 * All Rights Reserved
18 *
19 * The latest version of this code is available at http://www.matrixssl.org
20 *
21 * This software is open source; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This General Public License does NOT permit incorporating this software
27 * into proprietary programs. If you are unable to comply with the GPL, a
28 * commercial license for this software may be purchased from INSIDE at
29 * http://www.insidesecure.com/eng/Company/Locations
30 *
31 * This program is distributed in WITHOUT ANY WARRANTY; without even the
32 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33 * See the GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 * http://www.gnu.org/copyleft/gpl.html
39 */
40/******************************************************************************/
41
42///bbox
43//#include "../cryptoApi.h"
44#ifndef DISABLE_PSTM
45
46/******************************************************************************/
47#if defined(PSTM_X86)
48/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
49#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
50#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
51#endif
52//#pragma message ("Using 32 bit x86 Assembly Optimizations")
53
54/* anything you need at the start */
55#define COMBA_START
56
57/* clear the chaining variables */
58#define COMBA_CLEAR \
59 c0 = c1 = c2 = 0;
60
61/* forward the carry to the next digit */
62#define COMBA_FORWARD \
63 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
64
65/* store the first sum */
66#define COMBA_STORE(x) \
67 x = c0;
68
69/* store the second sum [carry] */
70#define COMBA_STORE2(x) \
71 x = c1;
72
73/* anything you need at the end */
74#define COMBA_FINI
75
76/* this should multiply i and j */
77#define MULADD(i, j) \
78asm( \
79 "movl %6,%%eax \n\t" \
80 "mull %7 \n\t" \
81 "addl %%eax,%0 \n\t" \
82 "adcl %%edx,%1 \n\t" \
83 "adcl $0,%2 \n\t" \
84 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
85
86/******************************************************************************/
87#elif defined(PSTM_X86_64)
88/* x86-64 optimized */
89#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
90#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
91#endif
92//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
93
94/* anything you need at the start */
95#define COMBA_START
96
97/* clear the chaining variables */
98#define COMBA_CLEAR \
99c0 = c1 = c2 = 0;
100
101/* forward the carry to the next digit */
102#define COMBA_FORWARD \
103do { c0 = c1; c1 = c2; c2 = 0; } while (0);
104
105/* store the first sum */
106#define COMBA_STORE(x) \
107x = c0;
108
109/* store the second sum [carry] */
110#define COMBA_STORE2(x) \
111x = c1;
112
113/* anything you need at the end */
114#define COMBA_FINI
115
116/* this should multiply i and j */
117#define MULADD(i, j) \
118asm ( \
119 "movq %6,%%rax \n\t" \
120 "mulq %7 \n\t" \
121 "addq %%rax,%0 \n\t" \
122 "adcq %%rdx,%1 \n\t" \
123 "adcq $0,%2 \n\t" \
124 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
125
126/******************************************************************************/
127#elif defined(PSTM_ARM)
128/* ARM code */
129//#pragma message ("Using 32 bit ARM Assembly Optimizations")
130
131#define COMBA_START
132
133#define COMBA_CLEAR \
134c0 = c1 = c2 = 0;
135
136#define COMBA_FORWARD \
137do { c0 = c1; c1 = c2; c2 = 0; } while (0);
138
139#define COMBA_STORE(x) \
140x = c0;
141
142#define COMBA_STORE2(x) \
143x = c1;
144
145#define COMBA_FINI
146
147#define MULADD(i, j) \
148asm( \
149 " UMULL r0,r1,%6,%7 \n\t" \
150 " ADDS %0,%0,r0 \n\t" \
151 " ADCS %1,%1,r1 \n\t" \
152 " ADC %2,%2,#0 \n\t" \
153 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
154
155/******************************************************************************/
156#elif defined(PSTM_MIPS)
157/* MIPS32 code */
158//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
159
160#define COMBA_START
161
162#define COMBA_CLEAR \
163c0 = c1 = c2 = 0;
164
165#define COMBA_FORWARD \
166do { c0 = c1; c1 = c2; c2 = 0; } while (0);
167
168#define COMBA_STORE(x) \
169x = c0;
170
171#define COMBA_STORE2(x) \
172x = c1;
173
174#define COMBA_FINI
175
176#define MULADD(i, j) \
177asm( \
178 " multu %6,%7 \n\t" \
179 " mflo $12 \n\t" \
180 " mfhi $13 \n\t" \
181 " addu %0,%0,$12 \n\t" \
182 " sltu $12,%0,$12 \n\t" \
183 " addu %1,%1,$13 \n\t" \
184 " sltu $13,%1,$13 \n\t" \
185 " addu %1,%1,$12 \n\t" \
186 " sltu $12,%1,$12 \n\t" \
187 " addu %2,%2,$13 \n\t" \
188 " addu %2,%2,$12 \n\t" \
189 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
190
191/******************************************************************************/
192#else
193
194#define COMBA_START
195
196#define COMBA_CLEAR \
197 c0 = c1 = c2 = 0;
198
199#define COMBA_FORWARD \
200 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
201
202#define COMBA_STORE(x) \
203 x = c0;
204
205#define COMBA_STORE2(x) \
206 x = c1;
207
208#define COMBA_FINI
209
210#define MULADD(i, j) \
211 do { pstm_word t; \
212 t = (pstm_word)c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \
213 t = (pstm_word)c1 + (t >> DIGIT_BIT); \
214 c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \
215 } while (0);
216
217#endif
218
219/******************************************************************************/
220/* generic PxQ multiplier */
221///bbox: pool unused
222#define pstm_mul_comba_gen(pool, A, B, C, paD, paDlen) \
223 pstm_mul_comba_gen( A, B, C, paD, paDlen)
224static int32 pstm_mul_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B,
225 pstm_int *C, pstm_digit *paD, uint32 paDlen)
226{
227 int16 paDfail, pa;
228 int32 ix, iy, iz, tx, ty;
229 pstm_digit c0, c1, c2, *tmpx, *tmpy, *dst;
230
231 COMBA_START;
232 COMBA_CLEAR;
233
234 paDfail = 0;
235 /* get size of output and trim */
236 pa = A->used + B->used;
237
238/*
239 If c is not large enough grow it and continue
240*/
241 if (C->alloc < pa) {
242 if (pstm_grow(C, pa) != PSTM_OKAY) {
243 return PS_MEM_FAIL;
244 }
245 }
246 if (paD != NULL) {
247 if (paDlen < (sizeof(pstm_digit) * pa)) {
248 paDfail = 1; /* have a paD but it's not large enough */
249 dst = xzalloc(sizeof(pstm_digit) * pa);
250 } else {
251 dst = paD;
252 memset(dst, 0x0, paDlen);
253 }
254 } else {
255 dst = xzalloc(sizeof(pstm_digit) * pa);
256 }
257
258 for (ix = 0; ix < pa; ix++) {
259 /* get offsets into the two bignums */
260 ty = min(ix, B->used-1);
261 tx = ix - ty;
262
263 /* setup temp aliases */
264 tmpx = A->dp + tx;
265 tmpy = B->dp + ty;
266/*
267 This is the number of times the loop will iterate, essentially it's
268 while (tx++ < a->used && ty-- >= 0) { ... }
269*/
270 iy = min(A->used-tx, ty+1);
271
272 /* execute loop */
273 COMBA_FORWARD;
274 for (iz = 0; iz < iy; ++iz) {
275 MULADD(*tmpx++, *tmpy--);
276 }
277
278 /* store term */
279 COMBA_STORE(dst[ix]);
280 }
281 COMBA_FINI;
282/*
283 setup dest
284 */
285 iy = C->used;
286 C->used = pa;
287 C->sign = A->sign ^ B->sign;
288 {
289 pstm_digit *tmpc;
290 tmpc = C->dp;
291 for (ix = 0; ix < pa; ix++) {
292 *tmpc++ = dst[ix];
293 }
294/*
295 clear unused digits [that existed in the old copy of c]
296 */
297 for (; ix < iy; ix++) {
298 *tmpc++ = 0;
299 }
300 }
301 pstm_clamp(C);
302
303 if ((paD == NULL) || (paDfail == 1)) {
304 psFree(dst, pool);
305 }
306
307 return PS_SUCCESS;
308}
309
310/******************************************************************************/
311#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
312static int32 pstm_mul_comba16(pstm_int *A, pstm_int *B, pstm_int *C)
313{
314 pstm_digit c0, c1, c2, at[32];
315
316 if (C->alloc < 32) {
317 if (pstm_grow(C, 32) != PSTM_OKAY) {
318 return PS_MEM_FAIL;
319 }
320 }
321 memcpy(at, A->dp, 16 * sizeof(pstm_digit));
322 memcpy(at+16, B->dp, 16 * sizeof(pstm_digit));
323
324 COMBA_START;
325
326 COMBA_CLEAR;
327 /* 0 */
328 MULADD(at[0], at[16]);
329 COMBA_STORE(C->dp[0]);
330 /* 1 */
331 COMBA_FORWARD;
332 MULADD(at[0], at[17]); MULADD(at[1], at[16]);
333 COMBA_STORE(C->dp[1]);
334 /* 2 */
335 COMBA_FORWARD;
336 MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]);
337 COMBA_STORE(C->dp[2]);
338 /* 3 */
339 COMBA_FORWARD;
340 MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]);
341 COMBA_STORE(C->dp[3]);
342 /* 4 */
343 COMBA_FORWARD;
344 MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]);
345 COMBA_STORE(C->dp[4]);
346 /* 5 */
347 COMBA_FORWARD;
348 MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]);
349 COMBA_STORE(C->dp[5]);
350 /* 6 */
351 COMBA_FORWARD;
352 MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]);
353 COMBA_STORE(C->dp[6]);
354 /* 7 */
355 COMBA_FORWARD;
356 MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]);
357 COMBA_STORE(C->dp[7]);
358 /* 8 */
359 COMBA_FORWARD;
360 MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]);
361 COMBA_STORE(C->dp[8]);
362 /* 9 */
363 COMBA_FORWARD;
364 MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]);
365 COMBA_STORE(C->dp[9]);
366 /* 10 */
367 COMBA_FORWARD;
368 MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]);
369 COMBA_STORE(C->dp[10]);
370 /* 11 */
371 COMBA_FORWARD;
372 MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]);
373 COMBA_STORE(C->dp[11]);
374 /* 12 */
375 COMBA_FORWARD;
376 MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]);
377 COMBA_STORE(C->dp[12]);
378 /* 13 */
379 COMBA_FORWARD;
380 MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]);
381 COMBA_STORE(C->dp[13]);
382 /* 14 */
383 COMBA_FORWARD;
384 MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]);
385 COMBA_STORE(C->dp[14]);
386 /* 15 */
387 COMBA_FORWARD;
388 MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]);
389 COMBA_STORE(C->dp[15]);
390 /* 16 */
391 COMBA_FORWARD;
392 MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]);
393 COMBA_STORE(C->dp[16]);
394 /* 17 */
395 COMBA_FORWARD;
396 MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]);
397 COMBA_STORE(C->dp[17]);
398 /* 18 */
399 COMBA_FORWARD;
400 MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]);
401 COMBA_STORE(C->dp[18]);
402 /* 19 */
403 COMBA_FORWARD;
404 MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]);
405 COMBA_STORE(C->dp[19]);
406 /* 20 */
407 COMBA_FORWARD;
408 MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]);
409 COMBA_STORE(C->dp[20]);
410 /* 21 */
411 COMBA_FORWARD;
412 MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]);
413 COMBA_STORE(C->dp[21]);
414 /* 22 */
415 COMBA_FORWARD;
416 MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]);
417 COMBA_STORE(C->dp[22]);
418 /* 23 */
419 COMBA_FORWARD;
420 MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]);
421 COMBA_STORE(C->dp[23]);
422 /* 24 */
423 COMBA_FORWARD;
424 MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]);
425 COMBA_STORE(C->dp[24]);
426 /* 25 */
427 COMBA_FORWARD;
428 MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]);
429 COMBA_STORE(C->dp[25]);
430 /* 26 */
431 COMBA_FORWARD;
432 MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]);
433 COMBA_STORE(C->dp[26]);
434 /* 27 */
435 COMBA_FORWARD;
436 MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]);
437 COMBA_STORE(C->dp[27]);
438 /* 28 */
439 COMBA_FORWARD;
440 MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]);
441 COMBA_STORE(C->dp[28]);
442 /* 29 */
443 COMBA_FORWARD;
444 MULADD(at[14], at[31]); MULADD(at[15], at[30]);
445 COMBA_STORE(C->dp[29]);
446 /* 30 */
447 COMBA_FORWARD;
448 MULADD(at[15], at[31]);
449 COMBA_STORE(C->dp[30]);
450 COMBA_STORE2(C->dp[31]);
451 C->used = 32;
452 C->sign = A->sign ^ B->sign;
453 pstm_clamp(C);
454 COMBA_FINI;
455 return PSTM_OKAY;
456}
457#endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */
458
459
460#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
461static int32 pstm_mul_comba32(pstm_int *A, pstm_int *B, pstm_int *C)
462{
463 pstm_digit c0, c1, c2, at[64];
464 int32 out_size;
465
466 if (C->alloc < 64) {
467 if (pstm_grow(C, 64) != PSTM_OKAY) {
468 return PS_MEM_FAIL;
469 }
470 }
471
472 out_size = A->used + B->used;
473 memcpy(at, A->dp, 32 * sizeof(pstm_digit));
474 memcpy(at+32, B->dp, 32 * sizeof(pstm_digit));
475 COMBA_START;
476
477 COMBA_CLEAR;
478 /* 0 */
479 MULADD(at[0], at[32]);
480 COMBA_STORE(C->dp[0]);
481 /* 1 */
482 COMBA_FORWARD;
483 MULADD(at[0], at[33]); MULADD(at[1], at[32]);
484 COMBA_STORE(C->dp[1]);
485 /* 2 */
486 COMBA_FORWARD;
487 MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]);
488 COMBA_STORE(C->dp[2]);
489 /* 3 */
490 COMBA_FORWARD;
491 MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]);
492 COMBA_STORE(C->dp[3]);
493 /* 4 */
494 COMBA_FORWARD;
495 MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]);
496 COMBA_STORE(C->dp[4]);
497 /* 5 */
498 COMBA_FORWARD;
499 MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]);
500 COMBA_STORE(C->dp[5]);
501 /* 6 */
502 COMBA_FORWARD;
503 MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]);
504 COMBA_STORE(C->dp[6]);
505 /* 7 */
506 COMBA_FORWARD;
507 MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]);
508 COMBA_STORE(C->dp[7]);
509 /* 8 */
510 COMBA_FORWARD;
511 MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]);
512 COMBA_STORE(C->dp[8]);
513 /* 9 */
514 COMBA_FORWARD;
515 MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]);
516 COMBA_STORE(C->dp[9]);
517 /* 10 */
518 COMBA_FORWARD;
519 MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]);
520 COMBA_STORE(C->dp[10]);
521 /* 11 */
522 COMBA_FORWARD;
523 MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]);
524 COMBA_STORE(C->dp[11]);
525 /* 12 */
526 COMBA_FORWARD;
527 MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]);
528 COMBA_STORE(C->dp[12]);
529 /* 13 */
530 COMBA_FORWARD;
531 MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]);
532 COMBA_STORE(C->dp[13]);
533 /* 14 */
534 COMBA_FORWARD;
535 MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]);
536 COMBA_STORE(C->dp[14]);
537 /* 15 */
538 COMBA_FORWARD;
539 MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]);
540 COMBA_STORE(C->dp[15]);
541 /* 16 */
542 COMBA_FORWARD;
543 MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]);
544 COMBA_STORE(C->dp[16]);
545 /* 17 */
546 COMBA_FORWARD;
547 MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]);
548 COMBA_STORE(C->dp[17]);
549 /* 18 */
550 COMBA_FORWARD;
551 MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]);
552 COMBA_STORE(C->dp[18]);
553 /* 19 */
554 COMBA_FORWARD;
555 MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]);
556 COMBA_STORE(C->dp[19]);
557 /* 20 */
558 COMBA_FORWARD;
559 MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]);
560 COMBA_STORE(C->dp[20]);
561 /* 21 */
562 COMBA_FORWARD;
563 MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]);
564 COMBA_STORE(C->dp[21]);
565 /* 22 */
566 COMBA_FORWARD;
567 MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]);
568 COMBA_STORE(C->dp[22]);
569 /* 23 */
570 COMBA_FORWARD;
571 MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]);
572 COMBA_STORE(C->dp[23]);
573 /* 24 */
574 COMBA_FORWARD;
575 MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]);
576 COMBA_STORE(C->dp[24]);
577 /* 25 */
578 COMBA_FORWARD;
579 MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]);
580 COMBA_STORE(C->dp[25]);
581 /* 26 */
582 COMBA_FORWARD;
583 MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]);
584 COMBA_STORE(C->dp[26]);
585 /* 27 */
586 COMBA_FORWARD;
587 MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]);
588 COMBA_STORE(C->dp[27]);
589 /* 28 */
590 COMBA_FORWARD;
591 MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]);
592 COMBA_STORE(C->dp[28]);
593 /* 29 */
594 COMBA_FORWARD;
595 MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]);
596 COMBA_STORE(C->dp[29]);
597 /* 30 */
598 COMBA_FORWARD;
599 MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]);
600 COMBA_STORE(C->dp[30]);
601 /* 31 */
602 COMBA_FORWARD;
603 MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]);
604 COMBA_STORE(C->dp[31]);
605 /* 32 */
606 COMBA_FORWARD;
607 MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]);
608 COMBA_STORE(C->dp[32]);
609 /* 33 */
610 COMBA_FORWARD;
611 MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]);
612 COMBA_STORE(C->dp[33]);
613 /* 34 */
614 COMBA_FORWARD;
615 MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]);
616 COMBA_STORE(C->dp[34]);
617 /* 35 */
618 COMBA_FORWARD;
619 MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]);
620 COMBA_STORE(C->dp[35]);
621 /* 36 */
622 COMBA_FORWARD;
623 MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]);
624 COMBA_STORE(C->dp[36]);
625 /* 37 */
626 COMBA_FORWARD;
627 MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]);
628 COMBA_STORE(C->dp[37]);
629 /* 38 */
630 COMBA_FORWARD;
631 MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]);
632 COMBA_STORE(C->dp[38]);
633
634 /* early out at 40 digits, 40*32==1280, or two 640 bit operands */
635 if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
636
637 /* 39 */
638 COMBA_FORWARD;
639 MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]);
640 COMBA_STORE(C->dp[39]);
641 /* 40 */
642 COMBA_FORWARD;
643 MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]);
644 COMBA_STORE(C->dp[40]);
645 /* 41 */
646 COMBA_FORWARD;
647 MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]);
648 COMBA_STORE(C->dp[41]);
649 /* 42 */
650 COMBA_FORWARD;
651 MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]);
652 COMBA_STORE(C->dp[42]);
653 /* 43 */
654 COMBA_FORWARD;
655 MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]);
656 COMBA_STORE(C->dp[43]);
657 /* 44 */
658 COMBA_FORWARD;
659 MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]);
660 COMBA_STORE(C->dp[44]);
661 /* 45 */
662 COMBA_FORWARD;
663 MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]);
664 COMBA_STORE(C->dp[45]);
665 /* 46 */
666 COMBA_FORWARD;
667 MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]);
668 COMBA_STORE(C->dp[46]);
669
670 /* early out at 48 digits, 48*32==1536, or two 768 bit operands */
671 if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
672
673 /* 47 */
674 COMBA_FORWARD;
675 MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]);
676 COMBA_STORE(C->dp[47]);
677 /* 48 */
678 COMBA_FORWARD;
679 MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]);
680 COMBA_STORE(C->dp[48]);
681 /* 49 */
682 COMBA_FORWARD;
683 MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]);
684 COMBA_STORE(C->dp[49]);
685 /* 50 */
686 COMBA_FORWARD;
687 MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]);
688 COMBA_STORE(C->dp[50]);
689 /* 51 */
690 COMBA_FORWARD;
691 MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]);
692 COMBA_STORE(C->dp[51]);
693 /* 52 */
694 COMBA_FORWARD;
695 MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]);
696 COMBA_STORE(C->dp[52]);
697 /* 53 */
698 COMBA_FORWARD;
699 MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]);
700 COMBA_STORE(C->dp[53]);
701 /* 54 */
702 COMBA_FORWARD;
703 MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]);
704 COMBA_STORE(C->dp[54]);
705
706 /* early out at 56 digits, 56*32==1792, or two 896 bit operands */
707 if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; pstm_clamp(C); COMBA_FINI; return PSTM_OKAY; }
708
709 /* 55 */
710 COMBA_FORWARD;
711 MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]);
712 COMBA_STORE(C->dp[55]);
713 /* 56 */
714 COMBA_FORWARD;
715 MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]);
716 COMBA_STORE(C->dp[56]);
717 /* 57 */
718 COMBA_FORWARD;
719 MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]);
720 COMBA_STORE(C->dp[57]);
721 /* 58 */
722 COMBA_FORWARD;
723 MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]);
724 COMBA_STORE(C->dp[58]);
725 /* 59 */
726 COMBA_FORWARD;
727 MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]);
728 COMBA_STORE(C->dp[59]);
729 /* 60 */
730 COMBA_FORWARD;
731 MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]);
732 COMBA_STORE(C->dp[60]);
733 /* 61 */
734 COMBA_FORWARD;
735 MULADD(at[30], at[63]); MULADD(at[31], at[62]);
736 COMBA_STORE(C->dp[61]);
737 /* 62 */
738 COMBA_FORWARD;
739 MULADD(at[31], at[63]);
740 COMBA_STORE(C->dp[62]);
741 COMBA_STORE2(C->dp[63]);
742 C->used = 64;
743 C->sign = A->sign ^ B->sign;
744 pstm_clamp(C);
745 COMBA_FINI;
746 return PSTM_OKAY;
747}
748#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
749
750/******************************************************************************/
751
752int32 pstm_mul_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_int *C,
753 pstm_digit *paD, uint32 paDlen)
754{
755#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
756 if (A->used == 16 && B->used == 16) {
757 return pstm_mul_comba16(A, B, C);
758 } else {
759#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
760 if (A->used == 32 && B->used == 32) {
761 return pstm_mul_comba32(A, B, C);
762 }
763#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
764 return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen);
765 }
766#else
767#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
768 if (A->used == 32 && B->used == 32) {
769 return pstm_mul_comba32(A, B, C);
770 }
771#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
772 return pstm_mul_comba_gen(pool, A, B, C, paD, paDlen);
773#endif
774}
775
776#endif /* !DISABLE_PSTM */
777/******************************************************************************/
diff --git a/networking/tls_pstm_sqr_comba.c b/networking/tls_pstm_sqr_comba.c
new file mode 100644
index 000000000..98186d31f
--- /dev/null
+++ b/networking/tls_pstm_sqr_comba.c
@@ -0,0 +1,1107 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8/**
9 * @file pstm_sqr_comba.c
10 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
11 *
12 * Multiprecision Squaring with Comba technique.
13 */
14/*
15 * Copyright (c) 2013-2015 INSIDE Secure Corporation
16 * Copyright (c) PeerSec Networks, 2002-2011
17 * All Rights Reserved
18 *
19 * The latest version of this code is available at http://www.matrixssl.org
20 *
21 * This software is open source; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This General Public License does NOT permit incorporating this software
27 * into proprietary programs. If you are unable to comply with the GPL, a
28 * commercial license for this software may be purchased from INSIDE at
29 * http://www.insidesecure.com/eng/Company/Locations
30 *
31 * This program is distributed in WITHOUT ANY WARRANTY; without even the
32 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33 * See the GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 * http://www.gnu.org/copyleft/gpl.html
39 */
40/******************************************************************************/
41
42///bbox
43//#include "../cryptoApi.h"
44#ifndef DISABLE_PSTM
45
46/******************************************************************************/
47#if defined(PSTM_X86)
48/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
49#if !defined(__GNUC__) || !defined(__i386__)
50#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
51#endif
52//#pragma message ("Using 32 bit x86 Assembly Optimizations")
53
54#define COMBA_START
55
56#define CLEAR_CARRY \
57 c0 = c1 = c2 = 0;
58
59#define COMBA_STORE(x) \
60 x = c0;
61
62#define COMBA_STORE2(x) \
63 x = c1;
64
65#define CARRY_FORWARD \
66 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
67
68#define COMBA_FINI
69
70#define SQRADD(i, j) \
71asm( \
72 "movl %6,%%eax \n\t" \
73 "mull %%eax \n\t" \
74 "addl %%eax,%0 \n\t" \
75 "adcl %%edx,%1 \n\t" \
76 "adcl $0,%2 \n\t" \
77 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
78
79#define SQRADD2(i, j) \
80asm( \
81 "movl %6,%%eax \n\t" \
82 "mull %7 \n\t" \
83 "addl %%eax,%0 \n\t" \
84 "adcl %%edx,%1 \n\t" \
85 "adcl $0,%2 \n\t" \
86 "addl %%eax,%0 \n\t" \
87 "adcl %%edx,%1 \n\t" \
88 "adcl $0,%2 \n\t" \
89 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
90
91#define SQRADDSC(i, j) \
92asm( \
93 "movl %6,%%eax \n\t" \
94 "mull %7 \n\t" \
95 "movl %%eax,%0 \n\t" \
96 "movl %%edx,%1 \n\t" \
97 "xorl %2,%2 \n\t" \
98 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
99
100#define SQRADDAC(i, j) \
101asm( \
102 "movl %6,%%eax \n\t" \
103 "mull %7 \n\t" \
104 "addl %%eax,%0 \n\t" \
105 "adcl %%edx,%1 \n\t" \
106 "adcl $0,%2 \n\t" \
107 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
108
109#define SQRADDDB \
110asm( \
111 "addl %6,%0 \n\t" \
112 "adcl %7,%1 \n\t" \
113 "adcl %8,%2 \n\t" \
114 "addl %6,%0 \n\t" \
115 "adcl %7,%1 \n\t" \
116 "adcl %8,%2 \n\t" \
117 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
118
119/******************************************************************************/
120#elif defined(PSTM_X86_64)
121/* x86-64 optimized */
122#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
123#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
124#endif
125//#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
126
127#define COMBA_START
128
129#define CLEAR_CARRY \
130c0 = c1 = c2 = 0;
131
132#define COMBA_STORE(x) \
133x = c0;
134
135#define COMBA_STORE2(x) \
136x = c1;
137
138#define CARRY_FORWARD \
139do { c0 = c1; c1 = c2; c2 = 0; } while (0);
140
141#define COMBA_FINI
142
143#define SQRADD(i, j) \
144asm( \
145 "movq %6,%%rax \n\t" \
146 "mulq %%rax \n\t" \
147 "addq %%rax,%0 \n\t" \
148 "adcq %%rdx,%1 \n\t" \
149 "adcq $0,%2 \n\t" \
150 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
151
152#define SQRADD2(i, j) \
153asm( \
154 "movq %6,%%rax \n\t" \
155 "mulq %7 \n\t" \
156 "addq %%rax,%0 \n\t" \
157 "adcq %%rdx,%1 \n\t" \
158 "adcq $0,%2 \n\t" \
159 "addq %%rax,%0 \n\t" \
160 "adcq %%rdx,%1 \n\t" \
161 "adcq $0,%2 \n\t" \
162 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
163
164#define SQRADDSC(i, j) \
165asm( \
166 "movq %6,%%rax \n\t" \
167 "mulq %7 \n\t" \
168 "movq %%rax,%0 \n\t" \
169 "movq %%rdx,%1 \n\t" \
170 "xorq %2,%2 \n\t" \
171 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
172
173#define SQRADDAC(i, j) \
174asm( \
175 "movq %6,%%rax \n\t" \
176 "mulq %7 \n\t" \
177 "addq %%rax,%0 \n\t" \
178 "adcq %%rdx,%1 \n\t" \
179 "adcq $0,%2 \n\t" \
180 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
181
182#define SQRADDDB \
183asm( \
184 "addq %6,%0 \n\t" \
185 "adcq %7,%1 \n\t" \
186 "adcq %8,%2 \n\t" \
187 "addq %6,%0 \n\t" \
188 "adcq %7,%1 \n\t" \
189 "adcq %8,%2 \n\t" \
190 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
191
192/******************************************************************************/
193#elif defined(PSTM_ARM)
194/* ARM code */
195//#pragma message ("Using 32 bit ARM Assembly Optimizations")
196
197#define COMBA_START
198
199#define CLEAR_CARRY \
200c0 = c1 = c2 = 0;
201
202#define COMBA_STORE(x) \
203x = c0;
204
205#define COMBA_STORE2(x) \
206x = c1;
207
208#define CARRY_FORWARD \
209do { c0 = c1; c1 = c2; c2 = 0; } while (0);
210
211#define COMBA_FINI
212
213/* multiplies point i and j, updates carry "c1" and digit c2 */
214#define SQRADD(i, j) \
215asm( \
216" UMULL r0,r1,%6,%6 \n\t" \
217" ADDS %0,%0,r0 \n\t" \
218" ADCS %1,%1,r1 \n\t" \
219" ADC %2,%2,#0 \n\t" \
220:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
221
222/* for squaring some of the terms are doubled... */
223#define SQRADD2(i, j) \
224asm( \
225" UMULL r0,r1,%6,%7 \n\t" \
226" ADDS %0,%0,r0 \n\t" \
227" ADCS %1,%1,r1 \n\t" \
228" ADC %2,%2,#0 \n\t" \
229" ADDS %0,%0,r0 \n\t" \
230" ADCS %1,%1,r1 \n\t" \
231" ADC %2,%2,#0 \n\t" \
232:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
233
234#define SQRADDSC(i, j) \
235asm( \
236" UMULL %0,%1,%6,%7 \n\t" \
237" SUB %2,%2,%2 \n\t" \
238:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
239
240#define SQRADDAC(i, j) \
241asm( \
242" UMULL r0,r1,%6,%7 \n\t" \
243" ADDS %0,%0,r0 \n\t" \
244" ADCS %1,%1,r1 \n\t" \
245" ADC %2,%2,#0 \n\t" \
246:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
247
248#define SQRADDDB \
249asm( \
250" ADDS %0,%0,%3 \n\t" \
251" ADCS %1,%1,%4 \n\t" \
252" ADC %2,%2,%5 \n\t" \
253" ADDS %0,%0,%3 \n\t" \
254" ADCS %1,%1,%4 \n\t" \
255" ADC %2,%2,%5 \n\t" \
256:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
257
258/******************************************************************************/
259#elif defined(PSTM_MIPS)
260/* MIPS32 */
261//#pragma message ("Using 32 bit MIPS Assembly Optimizations")
262
263#define COMBA_START
264
265#define CLEAR_CARRY \
266c0 = c1 = c2 = 0;
267
268#define COMBA_STORE(x) \
269x = c0;
270
271#define COMBA_STORE2(x) \
272x = c1;
273
274#define CARRY_FORWARD \
275do { c0 = c1; c1 = c2; c2 = 0; } while (0);
276
277#define COMBA_FINI
278
279/* multiplies point i and j, updates carry "c1" and digit c2 */
280#define SQRADD(i, j) \
281asm( \
282 " multu %6,%6 \n\t" \
283 " mflo $12 \n\t" \
284 " mfhi $13 \n\t" \
285 " addu %0,%0,$12 \n\t" \
286 " sltu $12,%0,$12 \n\t" \
287 " addu %1,%1,$13 \n\t" \
288 " sltu $13,%1,$13 \n\t" \
289 " addu %1,%1,$12 \n\t" \
290 " sltu $12,%1,$12 \n\t" \
291 " addu %2,%2,$13 \n\t" \
292 " addu %2,%2,$12 \n\t" \
293 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
294
295/* for squaring some of the terms are doubled... */
296#define SQRADD2(i, j) \
297asm( \
298 " multu %6,%7 \n\t" \
299 " mflo $12 \n\t" \
300 " mfhi $13 \n\t" \
301 \
302 " addu %0,%0,$12 \n\t" \
303 " sltu $14,%0,$12 \n\t" \
304 " addu %1,%1,$13 \n\t" \
305 " sltu $15,%1,$13 \n\t" \
306 " addu %1,%1,$14 \n\t" \
307 " sltu $14,%1,$14 \n\t" \
308 " addu %2,%2,$15 \n\t" \
309 " addu %2,%2,$14 \n\t" \
310 \
311 " addu %0,%0,$12 \n\t" \
312 " sltu $14,%0,$12 \n\t" \
313 " addu %1,%1,$13 \n\t" \
314 " sltu $15,%1,$13 \n\t" \
315 " addu %1,%1,$14 \n\t" \
316 " sltu $14,%1,$14 \n\t" \
317 " addu %2,%2,$15 \n\t" \
318 " addu %2,%2,$14 \n\t" \
319 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
320
321#define SQRADDSC(i, j) \
322asm( \
323 " multu %6,%7 \n\t" \
324 " mflo %0 \n\t" \
325 " mfhi %1 \n\t" \
326 " xor %2,%2,%2 \n\t" \
327 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
328
329#define SQRADDAC(i, j) \
330asm( \
331 " multu %6,%7 \n\t" \
332 " mflo $12 \n\t" \
333 " mfhi $13 \n\t" \
334 " addu %0,%0,$12 \n\t" \
335 " sltu $12,%0,$12 \n\t" \
336 " addu %1,%1,$13 \n\t" \
337 " sltu $13,%1,$13 \n\t" \
338 " addu %1,%1,$12 \n\t" \
339 " sltu $12,%1,$12 \n\t" \
340 " addu %2,%2,$13 \n\t" \
341 " addu %2,%2,$12 \n\t" \
342 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
343
344#define SQRADDDB \
345asm( \
346 " addu %0,%0,%3 \n\t" \
347 " sltu $10,%0,%3 \n\t" \
348 " addu %1,%1,$10 \n\t" \
349 " sltu $10,%1,$10 \n\t" \
350 " addu %1,%1,%4 \n\t" \
351 " sltu $11,%1,%4 \n\t" \
352 " addu %2,%2,$10 \n\t" \
353 " addu %2,%2,$11 \n\t" \
354 " addu %2,%2,%5 \n\t" \
355 \
356 " addu %0,%0,%3 \n\t" \
357 " sltu $10,%0,%3 \n\t" \
358 " addu %1,%1,$10 \n\t" \
359 " sltu $10,%1,$10 \n\t" \
360 " addu %1,%1,%4 \n\t" \
361 " sltu $11,%1,%4 \n\t" \
362 " addu %2,%2,$10 \n\t" \
363 " addu %2,%2,$11 \n\t" \
364 " addu %2,%2,%5 \n\t" \
365 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
366
367#else
368/******************************************************************************/
369#define PSTM_ISO
370/* ISO C portable code */
371
372#define COMBA_START
373
374#define CLEAR_CARRY \
375 c0 = c1 = c2 = 0;
376
377#define COMBA_STORE(x) \
378 x = c0;
379
380#define COMBA_STORE2(x) \
381 x = c1;
382
383#define CARRY_FORWARD \
384 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
385
386#define COMBA_FINI
387
388/* multiplies point i and j, updates carry "c1" and digit c2 */
389#define SQRADD(i, j) \
390 do { pstm_word t; \
391 t = c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \
392 t = c1 + (t >> DIGIT_BIT); \
393 c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \
394 } while (0);
395
396
397/* for squaring some of the terms are doubled... */
398#define SQRADD2(i, j) \
399 do { pstm_word t; \
400 t = ((pstm_word)i) * ((pstm_word)j); \
401 tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
402 tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
403 c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
404 tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
405 tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
406 c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
407 } while (0);
408
409#define SQRADDSC(i, j) \
410 do { pstm_word t; \
411 t = ((pstm_word)i) * ((pstm_word)j); \
412 sc0 = (pstm_digit)t; sc1 = (pstm_digit)(t >> DIGIT_BIT); sc2 = 0; \
413 } while (0);
414
415#define SQRADDAC(i, j) \
416 do { pstm_word t; \
417 t = ((pstm_word)sc0) + ((pstm_word)i) * ((pstm_word)j); \
418 sc0 = (pstm_digit)t; \
419 t = ((pstm_word)sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit)t; \
420 sc2 += (pstm_digit)(t >> DIGIT_BIT); \
421 } while (0);
422
423#define SQRADDDB \
424 do { pstm_word t; \
425 t = ((pstm_word)sc0) + ((pstm_word)sc0) + ((pstm_word)c0); \
426 c0 = (pstm_digit)t; \
427 t = ((pstm_word)sc1) + ((pstm_word)sc1) + c1 + (t >> DIGIT_BIT); \
428 c1 = (pstm_digit)t; \
429 c2 = c2 + sc2 + sc2 + (pstm_digit)(t >> DIGIT_BIT); \
430 } while (0);
431
432#endif /* ISO_C */
433
434/******************************************************************************/
435/*
436 Non-unrolled comba squarer
437 */
438///bbox: pool unused
439#define pstm_sqr_comba_gen(pool, A, B, paD, paDlen) \
440 pstm_sqr_comba_gen( A, B, paD, paDlen)
441static int32 pstm_sqr_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B,
442 pstm_digit *paD, uint32 paDlen)
443{
444 int16 paDfail, pa;
445 int32 ix, iz;
446 pstm_digit c0, c1, c2, *dst;
447#ifdef PSTM_ISO
448 pstm_word tt;
449#endif
450
451 paDfail = 0;
452 /* get size of output and trim */
453 pa = A->used + A->used;
454
455 /* number of output digits to produce */
456 COMBA_START;
457 CLEAR_CARRY;
458/*
459 If b is not large enough grow it and continue
460*/
461 if (B->alloc < pa) {
462 if (pstm_grow(B, pa) != PSTM_OKAY) {
463 return PS_MEM_FAIL;
464 }
465 }
466 if (paD != NULL) {
467 if (paDlen < (sizeof(pstm_digit) * pa)) {
468 paDfail = 1; /* have a paD, but it's not big enough */
469 dst = xzalloc(sizeof(pstm_digit) * pa);
470 } else {
471 dst = paD;
472 memset(dst, 0x0, paDlen);
473 }
474 } else {
475 dst = xzalloc(sizeof(pstm_digit) * pa);
476 }
477
478 for (ix = 0; ix < pa; ix++) {
479 int32 tx, ty, iy;
480 pstm_digit *tmpy, *tmpx;
481
482 /* get offsets into the two bignums */
483 ty = min(A->used-1, ix);
484 tx = ix - ty;
485
486 /* setup temp aliases */
487 tmpx = A->dp + tx;
488 tmpy = A->dp + ty;
489
490/*
491 This is the number of times the loop will iterate,
492 while (tx++ < a->used && ty-- >= 0) { ... }
493*/
494 iy = min(A->used-tx, ty+1);
495
496/*
497 now for squaring tx can never equal ty. We halve the distance since
498 they approach at a rate of 2x and we have to round because odd cases
499 need to be executed
500*/
501 iy = min(iy, (ty-tx+1)>>1);
502
503 /* forward carries */
504 CARRY_FORWARD;
505
506 /* execute loop */
507 for (iz = 0; iz < iy; iz++) {
508 SQRADD2(*tmpx++, *tmpy--);
509 }
510
511 /* even columns have the square term in them */
512 if ((ix&1) == 0) {
513 SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
514 }
515
516 /* store it */
517 COMBA_STORE(dst[ix]);
518 }
519
520 COMBA_FINI;
521/*
522 setup dest
523 */
524 iz = B->used;
525 B->used = pa;
526 {
527 pstm_digit *tmpc;
528 tmpc = B->dp;
529 for (ix = 0; ix < pa; ix++) {
530 *tmpc++ = dst[ix];
531 }
532 /* clear unused digits (that existed in the old copy of c) */
533 for (; ix < iz; ix++) {
534 *tmpc++ = 0;
535 }
536 }
537 pstm_clamp(B);
538
539 if ((paD == NULL) || paDfail == 1) {
540 psFree(dst, pool);
541 }
542 return PS_SUCCESS;
543}
544
545/******************************************************************************/
546/*
547 Unrolled Comba loop for 1024 bit keys
548 */
549#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
550static int32 pstm_sqr_comba16(pstm_int *A, pstm_int *B)
551{
552 pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
553#ifdef PSTM_ISO
554 pstm_word tt;
555#endif
556
557 if (B->alloc < 32) {
558 if (pstm_grow(B, 32) != PSTM_OKAY) {
559 return PS_MEM_FAIL;
560 }
561 }
562 a = A->dp;
563 sc0 = sc1 = sc2 = 0;
564
565 COMBA_START;
566
567 /* clear carries */
568 CLEAR_CARRY;
569
570 /* output 0 */
571 SQRADD(a[0],a[0]);
572 COMBA_STORE(b[0]);
573
574 /* output 1 */
575 CARRY_FORWARD;
576 SQRADD2(a[0], a[1]);
577 COMBA_STORE(b[1]);
578
579 /* output 2 */
580 CARRY_FORWARD;
581 SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
582 COMBA_STORE(b[2]);
583
584 /* output 3 */
585 CARRY_FORWARD;
586 SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
587 COMBA_STORE(b[3]);
588
589 /* output 4 */
590 CARRY_FORWARD;
591 SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
592 COMBA_STORE(b[4]);
593
594 /* output 5 */
595 CARRY_FORWARD;
596 SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
597 COMBA_STORE(b[5]);
598
599 /* output 6 */
600 CARRY_FORWARD;
601 SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
602 COMBA_STORE(b[6]);
603
604 /* output 7 */
605 CARRY_FORWARD;
606 SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
607 COMBA_STORE(b[7]);
608
609 /* output 8 */
610 CARRY_FORWARD;
611 SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
612 COMBA_STORE(b[8]);
613
614 /* output 9 */
615 CARRY_FORWARD;
616 SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
617 COMBA_STORE(b[9]);
618
619 /* output 10 */
620 CARRY_FORWARD;
621 SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
622 COMBA_STORE(b[10]);
623
624 /* output 11 */
625 CARRY_FORWARD;
626 SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
627 COMBA_STORE(b[11]);
628
629 /* output 12 */
630 CARRY_FORWARD;
631 SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
632 COMBA_STORE(b[12]);
633
634 /* output 13 */
635 CARRY_FORWARD;
636 SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
637 COMBA_STORE(b[13]);
638
639 /* output 14 */
640 CARRY_FORWARD;
641 SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
642 COMBA_STORE(b[14]);
643
644 /* output 15 */
645 CARRY_FORWARD;
646 SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
647 COMBA_STORE(b[15]);
648
649 /* output 16 */
650 CARRY_FORWARD;
651 SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
652 COMBA_STORE(b[16]);
653
654 /* output 17 */
655 CARRY_FORWARD;
656 SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
657 COMBA_STORE(b[17]);
658
659 /* output 18 */
660 CARRY_FORWARD;
661 SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
662 COMBA_STORE(b[18]);
663
664 /* output 19 */
665 CARRY_FORWARD;
666 SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
667 COMBA_STORE(b[19]);
668
669 /* output 20 */
670 CARRY_FORWARD;
671 SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
672 COMBA_STORE(b[20]);
673
674 /* output 21 */
675 CARRY_FORWARD;
676 SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
677 COMBA_STORE(b[21]);
678
679 /* output 22 */
680 CARRY_FORWARD;
681 SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
682 COMBA_STORE(b[22]);
683
684 /* output 23 */
685 CARRY_FORWARD;
686 SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
687 COMBA_STORE(b[23]);
688
689 /* output 24 */
690 CARRY_FORWARD;
691 SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
692 COMBA_STORE(b[24]);
693
694 /* output 25 */
695 CARRY_FORWARD;
696 SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
697 COMBA_STORE(b[25]);
698
699 /* output 26 */
700 CARRY_FORWARD;
701 SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]);
702 COMBA_STORE(b[26]);
703
704 /* output 27 */
705 CARRY_FORWARD;
706 SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]);
707 COMBA_STORE(b[27]);
708
709 /* output 28 */
710 CARRY_FORWARD;
711 SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]);
712 COMBA_STORE(b[28]);
713
714 /* output 29 */
715 CARRY_FORWARD;
716 SQRADD2(a[14], a[15]);
717 COMBA_STORE(b[29]);
718
719 /* output 30 */
720 CARRY_FORWARD;
721 SQRADD(a[15], a[15]);
722 COMBA_STORE(b[30]);
723 COMBA_STORE2(b[31]);
724 COMBA_FINI;
725
726 B->used = 32;
727 B->sign = PSTM_ZPOS;
728 memcpy(B->dp, b, 32 * sizeof(pstm_digit));
729 pstm_clamp(B);
730 return PSTM_OKAY;
731}
732#endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */
733
734
735#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
736static int32 pstm_sqr_comba32(pstm_int *A, pstm_int *B)
737{
738 pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
739#ifdef PSTM_ISO
740 pstm_word tt;
741#endif
742
743 if (B->alloc < 64) {
744 if (pstm_grow(B, 64) != PSTM_OKAY) {
745 return PS_MEM_FAIL;
746 }
747 }
748 sc0 = sc1 = sc2 = 0;
749 a = A->dp;
750 COMBA_START;
751
752 /* clear carries */
753 CLEAR_CARRY;
754
755 /* output 0 */
756 SQRADD(a[0],a[0]);
757 COMBA_STORE(b[0]);
758
759 /* output 1 */
760 CARRY_FORWARD;
761 SQRADD2(a[0], a[1]);
762 COMBA_STORE(b[1]);
763
764 /* output 2 */
765 CARRY_FORWARD;
766 SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
767 COMBA_STORE(b[2]);
768
769 /* output 3 */
770 CARRY_FORWARD;
771 SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
772 COMBA_STORE(b[3]);
773
774 /* output 4 */
775 CARRY_FORWARD;
776 SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
777 COMBA_STORE(b[4]);
778
779 /* output 5 */
780 CARRY_FORWARD;
781 SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
782 COMBA_STORE(b[5]);
783
784 /* output 6 */
785 CARRY_FORWARD;
786 SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
787 COMBA_STORE(b[6]);
788
789 /* output 7 */
790 CARRY_FORWARD;
791 SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
792 COMBA_STORE(b[7]);
793
794 /* output 8 */
795 CARRY_FORWARD;
796 SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
797 COMBA_STORE(b[8]);
798
799 /* output 9 */
800 CARRY_FORWARD;
801 SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
802 COMBA_STORE(b[9]);
803
804 /* output 10 */
805 CARRY_FORWARD;
806 SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
807 COMBA_STORE(b[10]);
808
809 /* output 11 */
810 CARRY_FORWARD;
811 SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
812 COMBA_STORE(b[11]);
813
814 /* output 12 */
815 CARRY_FORWARD;
816 SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
817 COMBA_STORE(b[12]);
818
819 /* output 13 */
820 CARRY_FORWARD;
821 SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
822 COMBA_STORE(b[13]);
823
824 /* output 14 */
825 CARRY_FORWARD;
826 SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
827 COMBA_STORE(b[14]);
828
829 /* output 15 */
830 CARRY_FORWARD;
831 SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
832 COMBA_STORE(b[15]);
833
834 /* output 16 */
835 CARRY_FORWARD;
836 SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
837 COMBA_STORE(b[16]);
838
839 /* output 17 */
840 CARRY_FORWARD;
841 SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
842 COMBA_STORE(b[17]);
843
844 /* output 18 */
845 CARRY_FORWARD;
846 SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
847 COMBA_STORE(b[18]);
848
849 /* output 19 */
850 CARRY_FORWARD;
851 SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
852 COMBA_STORE(b[19]);
853
854 /* output 20 */
855 CARRY_FORWARD;
856 SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
857 COMBA_STORE(b[20]);
858
859 /* output 21 */
860 CARRY_FORWARD;
861 SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
862 COMBA_STORE(b[21]);
863
864 /* output 22 */
865 CARRY_FORWARD;
866 SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
867 COMBA_STORE(b[22]);
868
869 /* output 23 */
870 CARRY_FORWARD;
871 SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
872 COMBA_STORE(b[23]);
873
874 /* output 24 */
875 CARRY_FORWARD;
876 SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
877 COMBA_STORE(b[24]);
878
879 /* output 25 */
880 CARRY_FORWARD;
881 SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
882 COMBA_STORE(b[25]);
883
884 /* output 26 */
885 CARRY_FORWARD;
886 SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
887 COMBA_STORE(b[26]);
888
889 /* output 27 */
890 CARRY_FORWARD;
891 SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
892 COMBA_STORE(b[27]);
893
894 /* output 28 */
895 CARRY_FORWARD;
896 SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
897 COMBA_STORE(b[28]);
898
899 /* output 29 */
900 CARRY_FORWARD;
901 SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
902 COMBA_STORE(b[29]);
903
904 /* output 30 */
905 CARRY_FORWARD;
906 SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
907 COMBA_STORE(b[30]);
908
909 /* output 31 */
910 CARRY_FORWARD;
911 SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
912 COMBA_STORE(b[31]);
913
914 /* output 32 */
915 CARRY_FORWARD;
916 SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
917 COMBA_STORE(b[32]);
918
919 /* output 33 */
920 CARRY_FORWARD;
921 SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
922 COMBA_STORE(b[33]);
923
924 /* output 34 */
925 CARRY_FORWARD;
926 SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
927 COMBA_STORE(b[34]);
928
929 /* output 35 */
930 CARRY_FORWARD;
931 SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
932 COMBA_STORE(b[35]);
933
934 /* output 36 */
935 CARRY_FORWARD;
936 SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
937 COMBA_STORE(b[36]);
938
939 /* output 37 */
940 CARRY_FORWARD;
941 SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
942 COMBA_STORE(b[37]);
943
944 /* output 38 */
945 CARRY_FORWARD;
946 SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
947 COMBA_STORE(b[38]);
948
949 /* output 39 */
950 CARRY_FORWARD;
951 SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
952 COMBA_STORE(b[39]);
953
954 /* output 40 */
955 CARRY_FORWARD;
956 SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
957 COMBA_STORE(b[40]);
958
959 /* output 41 */
960 CARRY_FORWARD;
961 SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
962 COMBA_STORE(b[41]);
963
964 /* output 42 */
965 CARRY_FORWARD;
966 SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
967 COMBA_STORE(b[42]);
968
969 /* output 43 */
970 CARRY_FORWARD;
971 SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
972 COMBA_STORE(b[43]);
973
974 /* output 44 */
975 CARRY_FORWARD;
976 SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
977 COMBA_STORE(b[44]);
978
979 /* output 45 */
980 CARRY_FORWARD;
981 SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
982 COMBA_STORE(b[45]);
983
984 /* output 46 */
985 CARRY_FORWARD;
986 SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
987 COMBA_STORE(b[46]);
988
989 /* output 47 */
990 CARRY_FORWARD;
991 SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
992 COMBA_STORE(b[47]);
993
994 /* output 48 */
995 CARRY_FORWARD;
996 SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
997 COMBA_STORE(b[48]);
998
999 /* output 49 */
1000 CARRY_FORWARD;
1001 SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
1002 COMBA_STORE(b[49]);
1003
1004 /* output 50 */
1005 CARRY_FORWARD;
1006 SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
1007 COMBA_STORE(b[50]);
1008
1009 /* output 51 */
1010 CARRY_FORWARD;
1011 SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
1012 COMBA_STORE(b[51]);
1013
1014 /* output 52 */
1015 CARRY_FORWARD;
1016 SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
1017 COMBA_STORE(b[52]);
1018
1019 /* output 53 */
1020 CARRY_FORWARD;
1021 SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
1022 COMBA_STORE(b[53]);
1023
1024 /* output 54 */
1025 CARRY_FORWARD;
1026 SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
1027 COMBA_STORE(b[54]);
1028
1029 /* output 55 */
1030 CARRY_FORWARD;
1031 SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
1032 COMBA_STORE(b[55]);
1033
1034 /* output 56 */
1035 CARRY_FORWARD;
1036 SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
1037 COMBA_STORE(b[56]);
1038
1039 /* output 57 */
1040 CARRY_FORWARD;
1041 SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
1042 COMBA_STORE(b[57]);
1043
1044 /* output 58 */
1045 CARRY_FORWARD;
1046 SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]);
1047 COMBA_STORE(b[58]);
1048
1049 /* output 59 */
1050 CARRY_FORWARD;
1051 SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]);
1052 COMBA_STORE(b[59]);
1053
1054 /* output 60 */
1055 CARRY_FORWARD;
1056 SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]);
1057 COMBA_STORE(b[60]);
1058
1059 /* output 61 */
1060 CARRY_FORWARD;
1061 SQRADD2(a[30], a[31]);
1062 COMBA_STORE(b[61]);
1063
1064 /* output 62 */
1065 CARRY_FORWARD;
1066 SQRADD(a[31], a[31]);
1067 COMBA_STORE(b[62]);
1068 COMBA_STORE2(b[63]);
1069 COMBA_FINI;
1070
1071 B->used = 64;
1072 B->sign = PSTM_ZPOS;
1073 memcpy(B->dp, b, 64 * sizeof(pstm_digit));
1074 pstm_clamp(B);
1075 return PSTM_OKAY;
1076}
1077#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1078
1079/******************************************************************************/
1080/*
1081 */
1082int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_digit *paD,
1083 uint32 paDlen)
1084{
1085#ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
1086 if (A->used == 16) {
1087 return pstm_sqr_comba16(A, B);
1088 } else {
1089#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
1090 if (A->used == 32) {
1091 return pstm_sqr_comba32(A, B);
1092 }
1093#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1094 return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
1095 }
1096#else
1097#ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
1098 if (A->used == 32) {
1099 return pstm_sqr_comba32(A, B);
1100 }
1101#endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1102 return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
1103#endif
1104}
1105
1106#endif /* DISABLE_PSTM */
1107/******************************************************************************/
diff --git a/networking/tls_rsa.c b/networking/tls_rsa.c
new file mode 100644
index 000000000..058b09cee
--- /dev/null
+++ b/networking/tls_rsa.c
@@ -0,0 +1,203 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6#include "tls.h"
7
8#define pkcs1Pad(in, inlen, out, outlen, cryptType, userPtr) \
9 pkcs1Pad(in, inlen, out, outlen, cryptType)
10static ///bbox
11int32 pkcs1Pad(unsigned char *in, uint32 inlen, unsigned char *out,
12 uint32 outlen, int32 cryptType, void *userPtr)
13{
14 unsigned char *c;
15 int32 randomLen;
16
17 randomLen = outlen - 3 - inlen;
18 if (randomLen < 8) {
19 psTraceCrypto("pkcs1Pad failure\n");
20 return PS_LIMIT_FAIL;
21 }
22 c = out;
23 *c = 0x00;
24 c++;
25 *c = (unsigned char)cryptType;
26 c++;
27 if (cryptType == PUBKEY_TYPE) {
28 while (randomLen-- > 0) {
29 *c++ = 0xFF;
30 }
31 } else {
32 if (matrixCryptoGetPrngData(c, (uint32)randomLen, userPtr) < 0) {
33 return PS_PLATFORM_FAIL;
34 }
35/*
36 SECURITY: Read through the random data and change all 0x0 to 0x01.
37 This is per spec that no random bytes should be 0
38*/
39 while (randomLen-- > 0) {
40 if (*c == 0x0) {
41 *c = 0x01;
42 }
43 c++;
44 }
45 }
46 *c = 0x00;
47 c++;
48 memcpy(c, in, inlen);
49
50 return outlen;
51}
52
53#define psRsaCrypt(pool, in, inlen, out, outlen, key, type, data) \
54 psRsaCrypt(pool, in, inlen, out, outlen, key, type)
55static ///bbox
56int32 psRsaCrypt(psPool_t *pool, const unsigned char *in, uint32 inlen,
57 unsigned char *out, uint32 *outlen, psRsaKey_t *key, int32 type,
58 void *data)
59{
60 pstm_int tmp, tmpa, tmpb;
61 int32 res;
62 uint32 x;
63
64 if (in == NULL || out == NULL || outlen == NULL || key == NULL) {
65 psTraceCrypto("NULL parameter error in psRsaCrypt\n");
66 return PS_ARG_FAIL;
67 }
68
69 tmp.dp = tmpa.dp = tmpb.dp = NULL;
70
71 /* Init and copy into tmp */
72 if (pstm_init_for_read_unsigned_bin(pool, &tmp, inlen + sizeof(pstm_digit))
73 != PS_SUCCESS) {
74 return PS_FAILURE;
75 }
76 if (pstm_read_unsigned_bin(&tmp, (unsigned char *)in, inlen) != PS_SUCCESS){
77 pstm_clear(&tmp);
78 return PS_FAILURE;
79 }
80 /* Sanity check on the input */
81 if (pstm_cmp(&key->N, &tmp) == PSTM_LT) {
82 res = PS_LIMIT_FAIL;
83 goto done;
84 }
85 if (type == PRIVKEY_TYPE) {
86 if (key->optimized) {
87 if (pstm_init_size(pool, &tmpa, key->p.alloc) != PS_SUCCESS) {
88 res = PS_FAILURE;
89 goto done;
90 }
91 if (pstm_init_size(pool, &tmpb, key->q.alloc) != PS_SUCCESS) {
92 pstm_clear(&tmpa);
93 res = PS_FAILURE;
94 goto done;
95 }
96 if (pstm_exptmod(pool, &tmp, &key->dP, &key->p, &tmpa) !=
97 PS_SUCCESS) {
98 psTraceCrypto("decrypt error: pstm_exptmod dP, p\n");
99 goto error;
100 }
101 if (pstm_exptmod(pool, &tmp, &key->dQ, &key->q, &tmpb) !=
102 PS_SUCCESS) {
103 psTraceCrypto("decrypt error: pstm_exptmod dQ, q\n");
104 goto error;
105 }
106 if (pstm_sub(&tmpa, &tmpb, &tmp) != PS_SUCCESS) {
107 psTraceCrypto("decrypt error: sub tmpb, tmp\n");
108 goto error;
109 }
110 if (pstm_mulmod(pool, &tmp, &key->qP, &key->p, &tmp) != PS_SUCCESS) {
111 psTraceCrypto("decrypt error: pstm_mulmod qP, p\n");
112 goto error;
113 }
114 if (pstm_mul_comba(pool, &tmp, &key->q, &tmp, NULL, 0)
115 != PS_SUCCESS){
116 psTraceCrypto("decrypt error: pstm_mul q \n");
117 goto error;
118 }
119 if (pstm_add(&tmp, &tmpb, &tmp) != PS_SUCCESS) {
120 psTraceCrypto("decrypt error: pstm_add tmp \n");
121 goto error;
122 }
123 } else {
124 if (pstm_exptmod(pool, &tmp, &key->d, &key->N, &tmp) !=
125 PS_SUCCESS) {
126 psTraceCrypto("psRsaCrypt error: pstm_exptmod\n");
127 goto error;
128 }
129 }
130 } else if (type == PUBKEY_TYPE) {
131 if (pstm_exptmod(pool, &tmp, &key->e, &key->N, &tmp) != PS_SUCCESS) {
132 psTraceCrypto("psRsaCrypt error: pstm_exptmod\n");
133 goto error;
134 }
135 } else {
136 psTraceCrypto("psRsaCrypt error: invalid type param\n");
137 goto error;
138 }
139 /* Read it back */
140 x = pstm_unsigned_bin_size(&key->N);
141
142 if ((uint32)x > *outlen) {
143 res = -1;
144 psTraceCrypto("psRsaCrypt error: pstm_unsigned_bin_size\n");
145 goto done;
146 }
147 /* We want the encrypted value to always be the key size. Pad with 0x0 */
148 while ((uint32)x < (unsigned long)key->size) {
149 *out++ = 0x0;
150 x++;
151 }
152
153 *outlen = x;
154 /* Convert it */
155 memset(out, 0x0, x);
156
157 if (pstm_to_unsigned_bin(pool, &tmp, out+(x-pstm_unsigned_bin_size(&tmp)))
158 != PS_SUCCESS) {
159 psTraceCrypto("psRsaCrypt error: pstm_to_unsigned_bin\n");
160 goto error;
161 }
162 /* Clean up and return */
163 res = PS_SUCCESS;
164 goto done;
165error:
166 res = PS_FAILURE;
167done:
168 if (type == PRIVKEY_TYPE && key->optimized) {
169 pstm_clear_multi(&tmpa, &tmpb, NULL, NULL, NULL, NULL, NULL, NULL);
170 }
171 pstm_clear(&tmp);
172 return res;
173}
174
175int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key,
176 unsigned char *in, uint32 inlen,
177 unsigned char *out, uint32 outlen, void *data)
178{
179 int32 err;
180 uint32 size;
181
182 size = key->size;
183 if (outlen < size) {
184 psTraceCrypto("Error on bad outlen parameter to psRsaEncryptPub\n");
185 return PS_ARG_FAIL;
186 }
187
188 if ((err = pkcs1Pad(in, inlen, out, size, PRIVKEY_TYPE, data))
189 < PS_SUCCESS) {
190 psTraceCrypto("Error padding psRsaEncryptPub. Likely data too long\n");
191 return err;
192 }
193 if ((err = psRsaCrypt(pool, out, size, out, (uint32*)&outlen, key,
194 PUBKEY_TYPE, data)) < PS_SUCCESS) {
195 psTraceCrypto("Error performing psRsaEncryptPub\n");
196 return err;
197 }
198 if (outlen != size) {
199 psTraceCrypto("Encrypted size error in psRsaEncryptPub\n");
200 return PS_FAILURE;
201 }
202 return size;
203}
diff --git a/networking/tls_rsa.h b/networking/tls_rsa.h
new file mode 100644
index 000000000..3281087c7
--- /dev/null
+++ b/networking/tls_rsa.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) 2017 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6
7typedef struct {
8 pstm_int e, d, N, qP, dP, dQ, p, q;
9 uint32 size; /* Size of the key in bytes */
10 int32 optimized; /* 1 for optimized */
11 psPool_t *pool;
12} psRsaKey_t;
13
14#define psRsaEncryptPub(pool, key, in, inlen, out, outlen, data) \
15 psRsaEncryptPub(pool, key, in, inlen, out, outlen)
16int32 psRsaEncryptPub(psPool_t *pool, psRsaKey_t *key,
17 unsigned char *in, uint32 inlen,
18 unsigned char *out, uint32 outlen, void *data);