summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordjm <>2009-04-06 06:30:05 +0000
committerdjm <>2009-04-06 06:30:05 +0000
commitcddf71b5c4a9cb7ece411c7ee543271a70471a1f (patch)
tree0e6cdd8333fa33be2ec4687b78ca3525e7fe0512
parent05552533f486a700413f5b1fd925ae0f2c26d8a8 (diff)
downloadopenbsd-cddf71b5c4a9cb7ece411c7ee543271a70471a1f.tar.gz
openbsd-cddf71b5c4a9cb7ece411c7ee543271a70471a1f.tar.bz2
openbsd-cddf71b5c4a9cb7ece411c7ee543271a70471a1f.zip
import of OpenSSL 0.9.8k
-rw-r--r--src/lib/libcrypto/bn/asm/mips3-mont.pl327
-rw-r--r--src/lib/libcrypto/x509v3/v3_addr.c33
-rw-r--r--src/lib/libssl/src/apps/genpkey.c440
-rw-r--r--src/lib/libssl/src/apps/pkey.c284
-rw-r--r--src/lib/libssl/src/apps/pkeyparam.c201
-rw-r--r--src/lib/libssl/src/apps/pkeyutl.c570
-rw-r--r--src/lib/libssl/src/apps/ts.c1144
-rw-r--r--src/lib/libssl/src/apps/tsget195
-rw-r--r--src/lib/libssl/src/crypto/aes/aes_x86core.c1063
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl1030
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl1176
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl1333
-rwxr-xr-xsrc/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl1181
-rw-r--r--src/lib/libssl/src/crypto/asn1/ameth_lib.c446
-rw-r--r--src/lib/libssl/src/crypto/asn1/asn1_locl.h134
-rw-r--r--src/lib/libssl/src/crypto/asn1/asn_mime.c2
-rw-r--r--src/lib/libssl/src/crypto/asn1/bio_asn1.c495
-rw-r--r--src/lib/libssl/src/crypto/asn1/bio_ndef.c246
-rw-r--r--src/lib/libssl/src/crypto/asn1/x_nx509.c72
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/alpha-mont.pl317
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl200
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl327
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl323
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl918
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl225
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/s390x.S678
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/sparcv9-mont.pl606
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/sparcv9a-mont.pl882
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/via-mont.pl242
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/x86-mont.pl591
-rw-r--r--src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl1138
-rw-r--r--src/lib/libssl/src/crypto/camellia/asm/cmll-x86_64.pl1080
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_smime.c4
-rw-r--r--src/lib/libssl/src/crypto/des/asm/des_enc.m4345
-rwxr-xr-xsrc/lib/libssl/src/crypto/ppccpuid.pl94
-rw-r--r--src/lib/libssl/src/crypto/s390xcpuid.S90
-rw-r--r--src/lib/libssl/src/crypto/sparcv9cap.c154
-rw-r--r--src/lib/libssl/src/crypto/x509/x509_vpm.c16
-rw-r--r--src/lib/libssl/src/crypto/x509v3/v3_addr.c33
39 files changed, 18486 insertions, 149 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips3-mont.pl b/src/lib/libcrypto/bn/asm/mips3-mont.pl
new file mode 100644
index 0000000000..8f9156e02a
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips3-mont.pl
@@ -0,0 +1,327 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# This module doesn't present direct interest for OpenSSL, because it
11# doesn't provide better performance for longer keys. While 512-bit
12# RSA private key operations are 40% faster, 1024-bit ones are hardly
13# faster at all, while longer key operations are slower by up to 20%.
14# It might be of interest to embedded system developers though, as
15# it's smaller than 1KB, yet offers ~3x improvement over compiler
16# generated code.
17#
18# The module targets N32 and N64 MIPS ABIs and currently is a bit
19# IRIX-centric, i.e. is likely to require adaptation for other OSes.
20
21# int bn_mul_mont(
22$rp="a0"; # BN_ULONG *rp,
23$ap="a1"; # const BN_ULONG *ap,
24$bp="a2"; # const BN_ULONG *bp,
25$np="a3"; # const BN_ULONG *np,
26$n0="a4"; # const BN_ULONG *n0,
27$num="a5"; # int num);
28
29$lo0="a6";
30$hi0="a7";
31$lo1="v0";
32$hi1="v1";
33$aj="t0";
34$bi="t1";
35$nj="t2";
36$tp="t3";
37$alo="s0";
38$ahi="s1";
39$nlo="s2";
40$nhi="s3";
41$tj="s4";
42$i="s5";
43$j="s6";
44$fp="t8";
45$m1="t9";
46
47$FRAME=8*(2+8);
48
49$code=<<___;
50#include <asm.h>
51#include <regdef.h>
52
53.text
54
55.set noat
56.set reorder
57
58.align 5
59.globl bn_mul_mont
60.ent bn_mul_mont
61bn_mul_mont:
62 .set noreorder
63 PTR_SUB sp,64
64 move $fp,sp
65 .frame $fp,64,ra
66 slt AT,$num,4
67 li v0,0
68 beqzl AT,.Lproceed
69 nop
70 jr ra
71 PTR_ADD sp,$fp,64
72 .set reorder
73.align 5
74.Lproceed:
75 ld $n0,0($n0)
76 ld $bi,0($bp) # bp[0]
77 ld $aj,0($ap) # ap[0]
78 ld $nj,0($np) # np[0]
79 PTR_SUB sp,16 # place for two extra words
80 sll $num,3
81 li AT,-4096
82 PTR_SUB sp,$num
83 and sp,AT
84
85 sd s0,0($fp)
86 sd s1,8($fp)
87 sd s2,16($fp)
88 sd s3,24($fp)
89 sd s4,32($fp)
90 sd s5,40($fp)
91 sd s6,48($fp)
92 sd s7,56($fp)
93
94 dmultu $aj,$bi
95 ld $alo,8($ap)
96 ld $nlo,8($np)
97 mflo $lo0
98 mfhi $hi0
99 dmultu $lo0,$n0
100 mflo $m1
101
102 dmultu $alo,$bi
103 mflo $alo
104 mfhi $ahi
105
106 dmultu $nj,$m1
107 mflo $lo1
108 mfhi $hi1
109 dmultu $nlo,$m1
110 daddu $lo1,$lo0
111 sltu AT,$lo1,$lo0
112 daddu $hi1,AT
113 mflo $nlo
114 mfhi $nhi
115
116 move $tp,sp
117 li $j,16
118.align 4
119.L1st:
120 .set noreorder
121 PTR_ADD $aj,$ap,$j
122 ld $aj,($aj)
123 PTR_ADD $nj,$np,$j
124 ld $nj,($nj)
125
126 dmultu $aj,$bi
127 daddu $lo0,$alo,$hi0
128 daddu $lo1,$nlo,$hi1
129 sltu AT,$lo0,$hi0
130 sltu s7,$lo1,$hi1
131 daddu $hi0,$ahi,AT
132 daddu $hi1,$nhi,s7
133 mflo $alo
134 mfhi $ahi
135
136 daddu $lo1,$lo0
137 sltu AT,$lo1,$lo0
138 dmultu $nj,$m1
139 daddu $hi1,AT
140 addu $j,8
141 sd $lo1,($tp)
142 sltu s7,$j,$num
143 mflo $nlo
144 mfhi $nhi
145
146 bnez s7,.L1st
147 PTR_ADD $tp,8
148 .set reorder
149
150 daddu $lo0,$alo,$hi0
151 sltu AT,$lo0,$hi0
152 daddu $hi0,$ahi,AT
153
154 daddu $lo1,$nlo,$hi1
155 sltu s7,$lo1,$hi1
156 daddu $hi1,$nhi,s7
157 daddu $lo1,$lo0
158 sltu AT,$lo1,$lo0
159 daddu $hi1,AT
160
161 sd $lo1,($tp)
162
163 daddu $hi1,$hi0
164 sltu AT,$hi1,$hi0
165 sd $hi1,8($tp)
166 sd AT,16($tp)
167
168 li $i,8
169.align 4
170.Louter:
171 PTR_ADD $bi,$bp,$i
172 ld $bi,($bi)
173 ld $aj,($ap)
174 ld $alo,8($ap)
175 ld $tj,(sp)
176
177 dmultu $aj,$bi
178 ld $nj,($np)
179 ld $nlo,8($np)
180 mflo $lo0
181 mfhi $hi0
182 daddu $lo0,$tj
183 dmultu $lo0,$n0
184 sltu AT,$lo0,$tj
185 daddu $hi0,AT
186 mflo $m1
187
188 dmultu $alo,$bi
189 mflo $alo
190 mfhi $ahi
191
192 dmultu $nj,$m1
193 mflo $lo1
194 mfhi $hi1
195
196 dmultu $nlo,$m1
197 daddu $lo1,$lo0
198 sltu AT,$lo1,$lo0
199 daddu $hi1,AT
200 mflo $nlo
201 mfhi $nhi
202
203 move $tp,sp
204 li $j,16
205 ld $tj,8($tp)
206.align 4
207.Linner:
208 .set noreorder
209 PTR_ADD $aj,$ap,$j
210 ld $aj,($aj)
211 PTR_ADD $nj,$np,$j
212 ld $nj,($nj)
213
214 dmultu $aj,$bi
215 daddu $lo0,$alo,$hi0
216 daddu $lo1,$nlo,$hi1
217 sltu AT,$lo0,$hi0
218 sltu s7,$lo1,$hi1
219 daddu $hi0,$ahi,AT
220 daddu $hi1,$nhi,s7
221 mflo $alo
222 mfhi $ahi
223
224 daddu $lo0,$tj
225 addu $j,8
226 dmultu $nj,$m1
227 sltu AT,$lo0,$tj
228 daddu $lo1,$lo0
229 daddu $hi0,AT
230 sltu s7,$lo1,$lo0
231 ld $tj,16($tp)
232 daddu $hi1,s7
233 sltu AT,$j,$num
234 mflo $nlo
235 mfhi $nhi
236 sd $lo1,($tp)
237 bnez AT,.Linner
238 PTR_ADD $tp,8
239 .set reorder
240
241 daddu $lo0,$alo,$hi0
242 sltu AT,$lo0,$hi0
243 daddu $hi0,$ahi,AT
244 daddu $lo0,$tj
245 sltu s7,$lo0,$tj
246 daddu $hi0,s7
247
248 ld $tj,16($tp)
249 daddu $lo1,$nlo,$hi1
250 sltu AT,$lo1,$hi1
251 daddu $hi1,$nhi,AT
252 daddu $lo1,$lo0
253 sltu s7,$lo1,$lo0
254 daddu $hi1,s7
255 sd $lo1,($tp)
256
257 daddu $lo1,$hi1,$hi0
258 sltu $hi1,$lo1,$hi0
259 daddu $lo1,$tj
260 sltu AT,$lo1,$tj
261 daddu $hi1,AT
262 sd $lo1,8($tp)
263 sd $hi1,16($tp)
264
265 addu $i,8
266 sltu s7,$i,$num
267 bnez s7,.Louter
268
269 .set noreorder
270 PTR_ADD $tj,sp,$num # &tp[num]
271 move $tp,sp
272 move $ap,sp
273 li $hi0,0 # clear borrow bit
274
275.align 4
276.Lsub: ld $lo0,($tp)
277 ld $lo1,($np)
278 PTR_ADD $tp,8
279 PTR_ADD $np,8
280 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i]
281 sgtu AT,$lo1,$lo0
282 dsubu $lo0,$lo1,$hi0
283 sgtu $hi0,$lo0,$lo1
284 sd $lo0,($rp)
285 or $hi0,AT
286 sltu AT,$tp,$tj
287 bnez AT,.Lsub
288 PTR_ADD $rp,8
289
290 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit
291 move $tp,sp
292 PTR_SUB $rp,$num # restore rp
293 not $hi1,$hi0
294
295 and $ap,$hi0,sp
296 and $bp,$hi1,$rp
297 or $ap,$ap,$bp # ap=borrow?tp:rp
298
299.align 4
300.Lcopy: ld $aj,($ap)
301 PTR_ADD $ap,8
302 PTR_ADD $tp,8
303 sd zero,-8($tp)
304 sltu AT,$tp,$tj
305 sd $aj,($rp)
306 bnez AT,.Lcopy
307 PTR_ADD $rp,8
308
309 ld s0,0($fp)
310 ld s1,8($fp)
311 ld s2,16($fp)
312 ld s3,24($fp)
313 ld s4,32($fp)
314 ld s5,40($fp)
315 ld s6,48($fp)
316 ld s7,56($fp)
317 li v0,1
318 jr ra
319 PTR_ADD sp,$fp,64
320 .set reorder
321END(bn_mul_mont)
322.rdata
323.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
324___
325
326print $code;
327close STDOUT;
diff --git a/src/lib/libcrypto/x509v3/v3_addr.c b/src/lib/libcrypto/x509v3/v3_addr.c
index a37f844d3c..efdf7c3ba7 100644
--- a/src/lib/libcrypto/x509v3/v3_addr.c
+++ b/src/lib/libcrypto/x509v3/v3_addr.c
@@ -61,7 +61,7 @@
61 61
62#include <stdio.h> 62#include <stdio.h>
63#include <stdlib.h> 63#include <stdlib.h>
64#include <assert.h> 64
65#include "cryptlib.h" 65#include "cryptlib.h"
66#include <openssl/conf.h> 66#include <openssl/conf.h>
67#include <openssl/asn1.h> 67#include <openssl/asn1.h>
@@ -128,7 +128,7 @@ static int length_from_afi(const unsigned afi)
128/* 128/*
129 * Extract the AFI from an IPAddressFamily. 129 * Extract the AFI from an IPAddressFamily.
130 */ 130 */
131unsigned v3_addr_get_afi(const IPAddressFamily *f) 131unsigned int v3_addr_get_afi(const IPAddressFamily *f)
132{ 132{
133 return ((f != NULL && 133 return ((f != NULL &&
134 f->addressFamily != NULL && 134 f->addressFamily != NULL &&
@@ -147,7 +147,7 @@ static void addr_expand(unsigned char *addr,
147 const int length, 147 const int length,
148 const unsigned char fill) 148 const unsigned char fill)
149{ 149{
150 assert(bs->length >= 0 && bs->length <= length); 150 OPENSSL_assert(bs->length >= 0 && bs->length <= length);
151 if (bs->length > 0) { 151 if (bs->length > 0) {
152 memcpy(addr, bs->data, bs->length); 152 memcpy(addr, bs->data, bs->length);
153 if ((bs->flags & 7) != 0) { 153 if ((bs->flags & 7) != 0) {
@@ -190,6 +190,8 @@ static int i2r_address(BIO *out,
190 BIO_printf(out, "%x%s", (addr[i] << 8) | addr[i+1], (i < 14 ? ":" : "")); 190 BIO_printf(out, "%x%s", (addr[i] << 8) | addr[i+1], (i < 14 ? ":" : ""));
191 if (i < 16) 191 if (i < 16)
192 BIO_puts(out, ":"); 192 BIO_puts(out, ":");
193 if (i == 0)
194 BIO_puts(out, ":");
193 break; 195 break;
194 default: 196 default:
195 for (i = 0; i < bs->length; i++) 197 for (i = 0; i < bs->length; i++)
@@ -243,7 +245,7 @@ static int i2r_IPAddrBlocks(X509V3_EXT_METHOD *method,
243 int i; 245 int i;
244 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) { 246 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) {
245 IPAddressFamily *f = sk_IPAddressFamily_value(addr, i); 247 IPAddressFamily *f = sk_IPAddressFamily_value(addr, i);
246 const unsigned afi = v3_addr_get_afi(f); 248 const unsigned int afi = v3_addr_get_afi(f);
247 switch (afi) { 249 switch (afi) {
248 case IANA_AFI_IPV4: 250 case IANA_AFI_IPV4:
249 BIO_printf(out, "%*sIPv4", indent, ""); 251 BIO_printf(out, "%*sIPv4", indent, "");
@@ -453,7 +455,7 @@ static int make_addressRange(IPAddressOrRange **result,
453 if ((aor = IPAddressOrRange_new()) == NULL) 455 if ((aor = IPAddressOrRange_new()) == NULL)
454 return 0; 456 return 0;
455 aor->type = IPAddressOrRange_addressRange; 457 aor->type = IPAddressOrRange_addressRange;
456 assert(aor->u.addressRange == NULL); 458 OPENSSL_assert(aor->u.addressRange == NULL);
457 if ((aor->u.addressRange = IPAddressRange_new()) == NULL) 459 if ((aor->u.addressRange = IPAddressRange_new()) == NULL)
458 goto err; 460 goto err;
459 if (aor->u.addressRange->min == NULL && 461 if (aor->u.addressRange->min == NULL &&
@@ -522,7 +524,7 @@ static IPAddressFamily *make_IPAddressFamily(IPAddrBlocks *addr,
522 524
523 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) { 525 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) {
524 f = sk_IPAddressFamily_value(addr, i); 526 f = sk_IPAddressFamily_value(addr, i);
525 assert(f->addressFamily->data != NULL); 527 OPENSSL_assert(f->addressFamily->data != NULL);
526 if (f->addressFamily->length == keylen && 528 if (f->addressFamily->length == keylen &&
527 !memcmp(f->addressFamily->data, key, keylen)) 529 !memcmp(f->addressFamily->data, key, keylen))
528 return f; 530 return f;
@@ -654,7 +656,7 @@ static void extract_min_max(IPAddressOrRange *aor,
654 unsigned char *max, 656 unsigned char *max,
655 int length) 657 int length)
656{ 658{
657 assert(aor != NULL && min != NULL && max != NULL); 659 OPENSSL_assert(aor != NULL && min != NULL && max != NULL);
658 switch (aor->type) { 660 switch (aor->type) {
659 case IPAddressOrRange_addressPrefix: 661 case IPAddressOrRange_addressPrefix:
660 addr_expand(min, aor->u.addressPrefix, length, 0x00); 662 addr_expand(min, aor->u.addressPrefix, length, 0x00);
@@ -880,7 +882,7 @@ int v3_addr_canonize(IPAddrBlocks *addr)
880 } 882 }
881 (void)sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp); 883 (void)sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp);
882 sk_IPAddressFamily_sort(addr); 884 sk_IPAddressFamily_sort(addr);
883 assert(v3_addr_is_canonical(addr)); 885 OPENSSL_assert(v3_addr_is_canonical(addr));
884 return 1; 886 return 1;
885} 887}
886 888
@@ -1127,7 +1129,10 @@ int v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b)
1127 for (i = 0; i < sk_IPAddressFamily_num(a); i++) { 1129 for (i = 0; i < sk_IPAddressFamily_num(a); i++) {
1128 IPAddressFamily *fa = sk_IPAddressFamily_value(a, i); 1130 IPAddressFamily *fa = sk_IPAddressFamily_value(a, i);
1129 int j = sk_IPAddressFamily_find(b, fa); 1131 int j = sk_IPAddressFamily_find(b, fa);
1130 IPAddressFamily *fb = sk_IPAddressFamily_value(b, j); 1132 IPAddressFamily *fb;
1133 fb = sk_IPAddressFamily_value(b, j);
1134 if (fb == NULL)
1135 return 0;
1131 if (!addr_contains(fb->ipAddressChoice->u.addressesOrRanges, 1136 if (!addr_contains(fb->ipAddressChoice->u.addressesOrRanges,
1132 fa->ipAddressChoice->u.addressesOrRanges, 1137 fa->ipAddressChoice->u.addressesOrRanges,
1133 length_from_afi(v3_addr_get_afi(fb)))) 1138 length_from_afi(v3_addr_get_afi(fb))))
@@ -1164,9 +1169,9 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1164 int i, j, ret = 1; 1169 int i, j, ret = 1;
1165 X509 *x = NULL; 1170 X509 *x = NULL;
1166 1171
1167 assert(chain != NULL && sk_X509_num(chain) > 0); 1172 OPENSSL_assert(chain != NULL && sk_X509_num(chain) > 0);
1168 assert(ctx != NULL || ext != NULL); 1173 OPENSSL_assert(ctx != NULL || ext != NULL);
1169 assert(ctx == NULL || ctx->verify_cb != NULL); 1174 OPENSSL_assert(ctx == NULL || ctx->verify_cb != NULL);
1170 1175
1171 /* 1176 /*
1172 * Figure out where to start. If we don't have an extension to 1177 * Figure out where to start. If we don't have an extension to
@@ -1178,7 +1183,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1178 } else { 1183 } else {
1179 i = 0; 1184 i = 0;
1180 x = sk_X509_value(chain, i); 1185 x = sk_X509_value(chain, i);
1181 assert(x != NULL); 1186 OPENSSL_assert(x != NULL);
1182 if ((ext = x->rfc3779_addr) == NULL) 1187 if ((ext = x->rfc3779_addr) == NULL)
1183 goto done; 1188 goto done;
1184 } 1189 }
@@ -1197,7 +1202,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1197 */ 1202 */
1198 for (i++; i < sk_X509_num(chain); i++) { 1203 for (i++; i < sk_X509_num(chain); i++) {
1199 x = sk_X509_value(chain, i); 1204 x = sk_X509_value(chain, i);
1200 assert(x != NULL); 1205 OPENSSL_assert(x != NULL);
1201 if (!v3_addr_is_canonical(x->rfc3779_addr)) 1206 if (!v3_addr_is_canonical(x->rfc3779_addr))
1202 validation_err(X509_V_ERR_INVALID_EXTENSION); 1207 validation_err(X509_V_ERR_INVALID_EXTENSION);
1203 if (x->rfc3779_addr == NULL) { 1208 if (x->rfc3779_addr == NULL) {
diff --git a/src/lib/libssl/src/apps/genpkey.c b/src/lib/libssl/src/apps/genpkey.c
new file mode 100644
index 0000000000..6dfda08b9e
--- /dev/null
+++ b/src/lib/libssl/src/apps/genpkey.c
@@ -0,0 +1,440 @@
1/* apps/genpkey.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2006
4 */
5/* ====================================================================
6 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#include <stdio.h>
59#include <string.h>
60#include "apps.h"
61#include <openssl/pem.h>
62#include <openssl/err.h>
63#include <openssl/evp.h>
64#ifndef OPENSSL_NO_ENGINE
65#include <openssl/engine.h>
66#endif
67
68static int init_keygen_file(BIO *err, EVP_PKEY_CTX **pctx,
69 const char *file, ENGINE *e);
70static int genpkey_cb(EVP_PKEY_CTX *ctx);
71
72#define PROG genpkey_main
73
74int MAIN(int, char **);
75
76int MAIN(int argc, char **argv)
77 {
78 ENGINE *e = NULL;
79 char **args, *outfile = NULL;
80 char *passarg = NULL;
81 BIO *in = NULL, *out = NULL;
82 const EVP_CIPHER *cipher = NULL;
83 int outformat;
84 int text = 0;
85 EVP_PKEY *pkey=NULL;
86 EVP_PKEY_CTX *ctx = NULL;
87 char *pass = NULL;
88 int badarg = 0;
89 int ret = 1, rv;
90
91 int do_param = 0;
92
93 if (bio_err == NULL)
94 bio_err = BIO_new_fp (stderr, BIO_NOCLOSE);
95
96 if (!load_config(bio_err, NULL))
97 goto end;
98
99 outformat=FORMAT_PEM;
100
101 ERR_load_crypto_strings();
102 OpenSSL_add_all_algorithms();
103 args = argv + 1;
104 while (!badarg && *args && *args[0] == '-')
105 {
106 if (!strcmp(*args,"-outform"))
107 {
108 if (args[1])
109 {
110 args++;
111 outformat=str2fmt(*args);
112 }
113 else badarg = 1;
114 }
115 else if (!strcmp(*args,"-pass"))
116 {
117 if (!args[1]) goto bad;
118 passarg= *(++args);
119 }
120#ifndef OPENSSL_NO_ENGINE
121 else if (strcmp(*args,"-engine") == 0)
122 {
123 if (!args[1])
124 goto bad;
125 e = setup_engine(bio_err, *(++args), 0);
126 }
127#endif
128 else if (!strcmp (*args, "-paramfile"))
129 {
130 if (!args[1])
131 goto bad;
132 args++;
133 if (do_param == 1)
134 goto bad;
135 if (!init_keygen_file(bio_err, &ctx, *args, e))
136 goto end;
137 }
138 else if (!strcmp (*args, "-out"))
139 {
140 if (args[1])
141 {
142 args++;
143 outfile = *args;
144 }
145 else badarg = 1;
146 }
147 else if (strcmp(*args,"-algorithm") == 0)
148 {
149 if (!args[1])
150 goto bad;
151 if (!init_gen_str(bio_err, &ctx, *(++args),e, do_param))
152 goto end;
153 }
154 else if (strcmp(*args,"-pkeyopt") == 0)
155 {
156 if (!args[1])
157 goto bad;
158 if (!ctx)
159 {
160 BIO_puts(bio_err, "No keytype specified\n");
161 goto bad;
162 }
163 else if (pkey_ctrl_string(ctx, *(++args)) <= 0)
164 {
165 BIO_puts(bio_err, "parameter setting error\n");
166 ERR_print_errors(bio_err);
167 goto end;
168 }
169 }
170 else if (strcmp(*args,"-genparam") == 0)
171 {
172 if (ctx)
173 goto bad;
174 do_param = 1;
175 }
176 else if (strcmp(*args,"-text") == 0)
177 text=1;
178 else
179 {
180 cipher = EVP_get_cipherbyname(*args + 1);
181 if (!cipher)
182 {
183 BIO_printf(bio_err, "Unknown cipher %s\n",
184 *args + 1);
185 badarg = 1;
186 }
187 if (do_param == 1)
188 badarg = 1;
189 }
190 args++;
191 }
192
193 if (!ctx)
194 badarg = 1;
195
196 if (badarg)
197 {
198 bad:
199 BIO_printf(bio_err, "Usage: genpkey [options]\n");
200 BIO_printf(bio_err, "where options may be\n");
201 BIO_printf(bio_err, "-out file output file\n");
202 BIO_printf(bio_err, "-outform X output format (DER or PEM)\n");
203 BIO_printf(bio_err, "-pass arg output file pass phrase source\n");
204 BIO_printf(bio_err, "-<cipher> use cipher <cipher> to encrypt the key\n");
205#ifndef OPENSSL_NO_ENGINE
206 BIO_printf(bio_err, "-engine e use engine e, possibly a hardware device.\n");
207#endif
208 BIO_printf(bio_err, "-paramfile file parameters file\n");
209 BIO_printf(bio_err, "-algorithm alg the public key algorithm\n");
210 BIO_printf(bio_err, "-pkeyopt opt:value set the public key algorithm option <opt>\n"
211 " to value <value>\n");
212 BIO_printf(bio_err, "-genparam generate parameters, not key\n");
213 BIO_printf(bio_err, "-text print the in text\n");
214 BIO_printf(bio_err, "NB: options order may be important! See the manual page.\n");
215 goto end;
216 }
217
218 if (!app_passwd(bio_err, passarg, NULL, &pass, NULL))
219 {
220 BIO_puts(bio_err, "Error getting password\n");
221 goto end;
222 }
223
224 if (outfile)
225 {
226 if (!(out = BIO_new_file (outfile, "wb")))
227 {
228 BIO_printf(bio_err,
229 "Can't open output file %s\n", outfile);
230 goto end;
231 }
232 }
233 else
234 {
235 out = BIO_new_fp (stdout, BIO_NOCLOSE);
236#ifdef OPENSSL_SYS_VMS
237 {
238 BIO *tmpbio = BIO_new(BIO_f_linebuffer());
239 out = BIO_push(tmpbio, out);
240 }
241#endif
242 }
243
244 EVP_PKEY_CTX_set_cb(ctx, genpkey_cb);
245 EVP_PKEY_CTX_set_app_data(ctx, bio_err);
246
247 if (do_param)
248 {
249 if (EVP_PKEY_paramgen(ctx, &pkey) <= 0)
250 {
251 BIO_puts(bio_err, "Error generating parameters\n");
252 ERR_print_errors(bio_err);
253 goto end;
254 }
255 }
256 else
257 {
258 if (EVP_PKEY_keygen(ctx, &pkey) <= 0)
259 {
260 BIO_puts(bio_err, "Error generating key\n");
261 ERR_print_errors(bio_err);
262 goto end;
263 }
264 }
265
266 if (do_param)
267 rv = PEM_write_bio_Parameters(out, pkey);
268 else if (outformat == FORMAT_PEM)
269 rv = PEM_write_bio_PrivateKey(out, pkey, cipher, NULL, 0,
270 NULL, pass);
271 else if (outformat == FORMAT_ASN1)
272 rv = i2d_PrivateKey_bio(out, pkey);
273 else
274 {
275 BIO_printf(bio_err, "Bad format specified for key\n");
276 goto end;
277 }
278
279 if (rv <= 0)
280 {
281 BIO_puts(bio_err, "Error writing key\n");
282 ERR_print_errors(bio_err);
283 }
284
285 if (text)
286 {
287 if (do_param)
288 rv = EVP_PKEY_print_params(out, pkey, 0, NULL);
289 else
290 rv = EVP_PKEY_print_private(out, pkey, 0, NULL);
291
292 if (rv <= 0)
293 {
294 BIO_puts(bio_err, "Error printing key\n");
295 ERR_print_errors(bio_err);
296 }
297 }
298
299 ret = 0;
300
301 end:
302 if (pkey)
303 EVP_PKEY_free(pkey);
304 if (ctx)
305 EVP_PKEY_CTX_free(ctx);
306 if (out)
307 BIO_free_all(out);
308 BIO_free(in);
309 if (pass)
310 OPENSSL_free(pass);
311
312 return ret;
313 }
314
315static int init_keygen_file(BIO *err, EVP_PKEY_CTX **pctx,
316 const char *file, ENGINE *e)
317 {
318 BIO *pbio;
319 EVP_PKEY *pkey = NULL;
320 EVP_PKEY_CTX *ctx = NULL;
321 if (*pctx)
322 {
323 BIO_puts(err, "Parameters already set!\n");
324 return 0;
325 }
326
327 pbio = BIO_new_file(file, "r");
328 if (!pbio)
329 {
330 BIO_printf(err, "Can't open parameter file %s\n", file);
331 return 0;
332 }
333
334 pkey = PEM_read_bio_Parameters(pbio, NULL);
335 BIO_free(pbio);
336
337 if (!pkey)
338 {
339 BIO_printf(bio_err, "Error reading parameter file %s\n", file);
340 return 0;
341 }
342
343 ctx = EVP_PKEY_CTX_new(pkey, e);
344 if (!ctx)
345 goto err;
346 if (EVP_PKEY_keygen_init(ctx) <= 0)
347 goto err;
348 EVP_PKEY_free(pkey);
349 *pctx = ctx;
350 return 1;
351
352 err:
353 BIO_puts(err, "Error initializing context\n");
354 ERR_print_errors(err);
355 if (ctx)
356 EVP_PKEY_CTX_free(ctx);
357 if (pkey)
358 EVP_PKEY_free(pkey);
359 return 0;
360
361 }
362
363int init_gen_str(BIO *err, EVP_PKEY_CTX **pctx,
364 const char *algname, ENGINE *e, int do_param)
365 {
366 EVP_PKEY_CTX *ctx = NULL;
367 const EVP_PKEY_ASN1_METHOD *ameth;
368 ENGINE *tmpeng = NULL;
369 int pkey_id;
370
371 if (*pctx)
372 {
373 BIO_puts(err, "Algorithm already set!\n");
374 return 0;
375 }
376
377 ameth = EVP_PKEY_asn1_find_str(&tmpeng, algname, -1);
378
379#ifndef OPENSSL_NO_ENGINE
380 if (!ameth && e)
381 ameth = ENGINE_get_pkey_asn1_meth_str(e, algname, -1);
382#endif
383
384 if (!ameth)
385 {
386 BIO_printf(bio_err, "Algorithm %s not found\n", algname);
387 return 0;
388 }
389
390 ERR_clear_error();
391
392 EVP_PKEY_asn1_get0_info(&pkey_id, NULL, NULL, NULL, NULL, ameth);
393#ifndef OPENSSL_NO_ENGINE
394 if (tmpeng)
395 ENGINE_finish(tmpeng);
396#endif
397 ctx = EVP_PKEY_CTX_new_id(pkey_id, e);
398
399 if (!ctx)
400 goto err;
401 if (do_param)
402 {
403 if (EVP_PKEY_paramgen_init(ctx) <= 0)
404 goto err;
405 }
406 else
407 {
408 if (EVP_PKEY_keygen_init(ctx) <= 0)
409 goto err;
410 }
411
412 *pctx = ctx;
413 return 1;
414
415 err:
416 BIO_printf(err, "Error initializing %s context\n", algname);
417 ERR_print_errors(err);
418 if (ctx)
419 EVP_PKEY_CTX_free(ctx);
420 return 0;
421
422 }
423
424static int genpkey_cb(EVP_PKEY_CTX *ctx)
425 {
426 char c='*';
427 BIO *b = EVP_PKEY_CTX_get_app_data(ctx);
428 int p;
429 p = EVP_PKEY_CTX_get_keygen_info(ctx, 0);
430 if (p == 0) c='.';
431 if (p == 1) c='+';
432 if (p == 2) c='*';
433 if (p == 3) c='\n';
434 BIO_write(b,&c,1);
435 (void)BIO_flush(b);
436#ifdef LINT
437 p=n;
438#endif
439 return 1;
440 }
diff --git a/src/lib/libssl/src/apps/pkey.c b/src/lib/libssl/src/apps/pkey.c
new file mode 100644
index 0000000000..17e6702fb1
--- /dev/null
+++ b/src/lib/libssl/src/apps/pkey.c
@@ -0,0 +1,284 @@
1/* apps/pkey.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2006
4 */
5/* ====================================================================
6 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#include <stdio.h>
59#include <string.h>
60#include "apps.h"
61#include <openssl/pem.h>
62#include <openssl/err.h>
63#include <openssl/evp.h>
64
65#define PROG pkey_main
66
67int MAIN(int, char **);
68
69int MAIN(int argc, char **argv)
70 {
71 ENGINE *e = NULL;
72 char **args, *infile = NULL, *outfile = NULL;
73 char *passargin = NULL, *passargout = NULL;
74 BIO *in = NULL, *out = NULL;
75 const EVP_CIPHER *cipher = NULL;
76 int informat, outformat;
77 int pubin = 0, pubout = 0, pubtext = 0, text = 0, noout = 0;
78 EVP_PKEY *pkey=NULL;
79 char *passin = NULL, *passout = NULL;
80 int badarg = 0;
81#ifndef OPENSSL_NO_ENGINE
82 char *engine=NULL;
83#endif
84 int ret = 1;
85
86 if (bio_err == NULL)
87 bio_err = BIO_new_fp (stderr, BIO_NOCLOSE);
88
89 if (!load_config(bio_err, NULL))
90 goto end;
91
92 informat=FORMAT_PEM;
93 outformat=FORMAT_PEM;
94
95 ERR_load_crypto_strings();
96 OpenSSL_add_all_algorithms();
97 args = argv + 1;
98 while (!badarg && *args && *args[0] == '-')
99 {
100 if (!strcmp(*args,"-inform"))
101 {
102 if (args[1])
103 {
104 args++;
105 informat=str2fmt(*args);
106 }
107 else badarg = 1;
108 }
109 else if (!strcmp(*args,"-outform"))
110 {
111 if (args[1])
112 {
113 args++;
114 outformat=str2fmt(*args);
115 }
116 else badarg = 1;
117 }
118 else if (!strcmp(*args,"-passin"))
119 {
120 if (!args[1]) goto bad;
121 passargin= *(++args);
122 }
123 else if (!strcmp(*args,"-passout"))
124 {
125 if (!args[1]) goto bad;
126 passargout= *(++args);
127 }
128#ifndef OPENSSL_NO_ENGINE
129 else if (strcmp(*args,"-engine") == 0)
130 {
131 if (!args[1]) goto bad;
132 engine= *(++args);
133 }
134#endif
135 else if (!strcmp (*args, "-in"))
136 {
137 if (args[1])
138 {
139 args++;
140 infile = *args;
141 }
142 else badarg = 1;
143 }
144 else if (!strcmp (*args, "-out"))
145 {
146 if (args[1])
147 {
148 args++;
149 outfile = *args;
150 }
151 else badarg = 1;
152 }
153 else if (strcmp(*args,"-pubin") == 0)
154 {
155 pubin=1;
156 pubout=1;
157 pubtext=1;
158 }
159 else if (strcmp(*args,"-pubout") == 0)
160 pubout=1;
161 else if (strcmp(*args,"-text_pub") == 0)
162 {
163 pubtext=1;
164 text=1;
165 }
166 else if (strcmp(*args,"-text") == 0)
167 text=1;
168 else if (strcmp(*args,"-noout") == 0)
169 noout=1;
170 else
171 {
172 cipher = EVP_get_cipherbyname(*args + 1);
173 if (!cipher)
174 {
175 BIO_printf(bio_err, "Unknown cipher %s\n",
176 *args + 1);
177 badarg = 1;
178 }
179 }
180 args++;
181 }
182
183 if (badarg)
184 {
185 bad:
186 BIO_printf(bio_err, "Usage pkey [options]\n");
187 BIO_printf(bio_err, "where options are\n");
188 BIO_printf(bio_err, "-in file input file\n");
189 BIO_printf(bio_err, "-inform X input format (DER or PEM)\n");
190 BIO_printf(bio_err, "-passin arg input file pass phrase source\n");
191 BIO_printf(bio_err, "-outform X output format (DER or PEM)\n");
192 BIO_printf(bio_err, "-out file output file\n");
193 BIO_printf(bio_err, "-passout arg output file pass phrase source\n");
194#ifndef OPENSSL_NO_ENGINE
195 BIO_printf(bio_err, "-engine e use engine e, possibly a hardware device.\n");
196#endif
197 return 1;
198 }
199
200#ifndef OPENSSL_NO_ENGINE
201 e = setup_engine(bio_err, engine, 0);
202#endif
203
204 if (!app_passwd(bio_err, passargin, passargout, &passin, &passout))
205 {
206 BIO_printf(bio_err, "Error getting passwords\n");
207 goto end;
208 }
209
210 if (outfile)
211 {
212 if (!(out = BIO_new_file (outfile, "wb")))
213 {
214 BIO_printf(bio_err,
215 "Can't open output file %s\n", outfile);
216 goto end;
217 }
218 }
219 else
220 {
221 out = BIO_new_fp (stdout, BIO_NOCLOSE);
222#ifdef OPENSSL_SYS_VMS
223 {
224 BIO *tmpbio = BIO_new(BIO_f_linebuffer());
225 out = BIO_push(tmpbio, out);
226 }
227#endif
228 }
229
230 if (pubin)
231 pkey = load_pubkey(bio_err, infile, informat, 1,
232 passin, e, "Public Key");
233 else
234 pkey = load_key(bio_err, infile, informat, 1,
235 passin, e, "key");
236 if (!pkey)
237 goto end;
238
239 if (!noout)
240 {
241 if (outformat == FORMAT_PEM)
242 {
243 if (pubout)
244 PEM_write_bio_PUBKEY(out,pkey);
245 else
246 PEM_write_bio_PrivateKey(out, pkey, cipher,
247 NULL, 0, NULL, passout);
248 }
249 else if (outformat == FORMAT_ASN1)
250 {
251 if (pubout)
252 i2d_PUBKEY_bio(out, pkey);
253 else
254 i2d_PrivateKey_bio(out, pkey);
255 }
256 else
257 {
258 BIO_printf(bio_err, "Bad format specified for key\n");
259 goto end;
260 }
261
262 }
263
264 if (text)
265 {
266 if (pubtext)
267 EVP_PKEY_print_public(out, pkey, 0, NULL);
268 else
269 EVP_PKEY_print_private(out, pkey, 0, NULL);
270 }
271
272 ret = 0;
273
274 end:
275 EVP_PKEY_free(pkey);
276 BIO_free_all(out);
277 BIO_free(in);
278 if (passin)
279 OPENSSL_free(passin);
280 if (passout)
281 OPENSSL_free(passout);
282
283 return ret;
284 }
diff --git a/src/lib/libssl/src/apps/pkeyparam.c b/src/lib/libssl/src/apps/pkeyparam.c
new file mode 100644
index 0000000000..4319eb4de5
--- /dev/null
+++ b/src/lib/libssl/src/apps/pkeyparam.c
@@ -0,0 +1,201 @@
1/* apps/pkeyparam.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2006
4 */
5/* ====================================================================
6 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#include <stdio.h>
59#include <string.h>
60#include "apps.h"
61#include <openssl/pem.h>
62#include <openssl/err.h>
63#include <openssl/evp.h>
64
65#define PROG pkeyparam_main
66
67int MAIN(int, char **);
68
69int MAIN(int argc, char **argv)
70 {
71 char **args, *infile = NULL, *outfile = NULL;
72 BIO *in = NULL, *out = NULL;
73 int text = 0, noout = 0;
74 EVP_PKEY *pkey=NULL;
75 int badarg = 0;
76#ifndef OPENSSL_NO_ENGINE
77 ENGINE *e = NULL;
78 char *engine=NULL;
79#endif
80 int ret = 1;
81
82 if (bio_err == NULL)
83 bio_err = BIO_new_fp (stderr, BIO_NOCLOSE);
84
85 if (!load_config(bio_err, NULL))
86 goto end;
87
88 ERR_load_crypto_strings();
89 OpenSSL_add_all_algorithms();
90 args = argv + 1;
91 while (!badarg && *args && *args[0] == '-')
92 {
93 if (!strcmp (*args, "-in"))
94 {
95 if (args[1])
96 {
97 args++;
98 infile = *args;
99 }
100 else badarg = 1;
101 }
102 else if (!strcmp (*args, "-out"))
103 {
104 if (args[1])
105 {
106 args++;
107 outfile = *args;
108 }
109 else badarg = 1;
110 }
111#ifndef OPENSSL_NO_ENGINE
112 else if (strcmp(*args,"-engine") == 0)
113 {
114 if (!args[1]) goto bad;
115 engine= *(++args);
116 }
117#endif
118
119 else if (strcmp(*args,"-text") == 0)
120 text=1;
121 else if (strcmp(*args,"-noout") == 0)
122 noout=1;
123 args++;
124 }
125
126 if (badarg)
127 {
128#ifndef OPENSSL_NO_ENGINE
129 bad:
130#endif
131 BIO_printf(bio_err, "Usage pkeyparam [options]\n");
132 BIO_printf(bio_err, "where options are\n");
133 BIO_printf(bio_err, "-in file input file\n");
134 BIO_printf(bio_err, "-out file output file\n");
135 BIO_printf(bio_err, "-text print parameters as text\n");
136 BIO_printf(bio_err, "-noout don't output encoded parameters\n");
137#ifndef OPENSSL_NO_ENGINE
138 BIO_printf(bio_err, "-engine e use engine e, possibly a hardware device.\n");
139#endif
140 return 1;
141 }
142
143#ifndef OPENSSL_NO_ENGINE
144 e = setup_engine(bio_err, engine, 0);
145#endif
146
147 if (infile)
148 {
149 if (!(in = BIO_new_file (infile, "r")))
150 {
151 BIO_printf(bio_err,
152 "Can't open input file %s\n", infile);
153 goto end;
154 }
155 }
156 else
157 in = BIO_new_fp (stdin, BIO_NOCLOSE);
158
159 if (outfile)
160 {
161 if (!(out = BIO_new_file (outfile, "w")))
162 {
163 BIO_printf(bio_err,
164 "Can't open output file %s\n", outfile);
165 goto end;
166 }
167 }
168 else
169 {
170 out = BIO_new_fp (stdout, BIO_NOCLOSE);
171#ifdef OPENSSL_SYS_VMS
172 {
173 BIO *tmpbio = BIO_new(BIO_f_linebuffer());
174 out = BIO_push(tmpbio, out);
175 }
176#endif
177 }
178
179 pkey = PEM_read_bio_Parameters(in, NULL);
180 if (!pkey)
181 {
182 BIO_printf(bio_err, "Error reading paramters\n");
183 ERR_print_errors(bio_err);
184 goto end;
185 }
186
187 if (!noout)
188 PEM_write_bio_Parameters(out,pkey);
189
190 if (text)
191 EVP_PKEY_print_params(out, pkey, 0, NULL);
192
193 ret = 0;
194
195 end:
196 EVP_PKEY_free(pkey);
197 BIO_free_all(out);
198 BIO_free(in);
199
200 return ret;
201 }
diff --git a/src/lib/libssl/src/apps/pkeyutl.c b/src/lib/libssl/src/apps/pkeyutl.c
new file mode 100644
index 0000000000..b808e1ef49
--- /dev/null
+++ b/src/lib/libssl/src/apps/pkeyutl.c
@@ -0,0 +1,570 @@
1/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
2 * project 2006.
3 */
4/* ====================================================================
5 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * licensing@OpenSSL.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58
59#include "apps.h"
60#include <string.h>
61#include <openssl/err.h>
62#include <openssl/pem.h>
63#include <openssl/evp.h>
64
65#define KEY_PRIVKEY 1
66#define KEY_PUBKEY 2
67#define KEY_CERT 3
68
69static void usage(void);
70
71#undef PROG
72
73#define PROG pkeyutl_main
74
75static EVP_PKEY_CTX *init_ctx(int *pkeysize,
76 char *keyfile, int keyform, int key_type,
77 char *passargin, int pkey_op, ENGINE *e);
78
79static int setup_peer(BIO *err, EVP_PKEY_CTX *ctx, int peerform,
80 const char *file);
81
82static int do_keyop(EVP_PKEY_CTX *ctx, int pkey_op,
83 unsigned char *out, size_t *poutlen,
84 unsigned char *in, size_t inlen);
85
86int MAIN(int argc, char **);
87
88int MAIN(int argc, char **argv)
89{
90 BIO *in = NULL, *out = NULL;
91 char *infile = NULL, *outfile = NULL, *sigfile = NULL;
92 ENGINE *e = NULL;
93 int pkey_op = EVP_PKEY_OP_SIGN, key_type = KEY_PRIVKEY;
94 int keyform = FORMAT_PEM, peerform = FORMAT_PEM;
95 char badarg = 0, rev = 0;
96 char hexdump = 0, asn1parse = 0;
97 EVP_PKEY_CTX *ctx = NULL;
98 char *passargin = NULL;
99 int keysize = -1;
100
101 unsigned char *buf_in = NULL, *buf_out = NULL, *sig = NULL;
102 size_t buf_outlen;
103 int buf_inlen = 0, siglen = -1;
104
105 int ret = 1, rv = -1;
106
107 argc--;
108 argv++;
109
110 if(!bio_err) bio_err = BIO_new_fp(stderr, BIO_NOCLOSE);
111
112 if (!load_config(bio_err, NULL))
113 goto end;
114 ERR_load_crypto_strings();
115 OpenSSL_add_all_algorithms();
116
117 while(argc >= 1)
118 {
119 if (!strcmp(*argv,"-in"))
120 {
121 if (--argc < 1) badarg = 1;
122 infile= *(++argv);
123 }
124 else if (!strcmp(*argv,"-out"))
125 {
126 if (--argc < 1) badarg = 1;
127 outfile= *(++argv);
128 }
129 else if (!strcmp(*argv,"-sigfile"))
130 {
131 if (--argc < 1) badarg = 1;
132 sigfile= *(++argv);
133 }
134 else if(!strcmp(*argv, "-inkey"))
135 {
136 if (--argc < 1)
137 badarg = 1;
138 else
139 {
140 ctx = init_ctx(&keysize,
141 *(++argv), keyform, key_type,
142 passargin, pkey_op, e);
143 if (!ctx)
144 {
145 BIO_puts(bio_err,
146 "Error initializing context\n");
147 ERR_print_errors(bio_err);
148 badarg = 1;
149 }
150 }
151 }
152 else if (!strcmp(*argv,"-peerkey"))
153 {
154 if (--argc < 1)
155 badarg = 1;
156 else if (!setup_peer(bio_err, ctx, peerform, *(++argv)))
157 badarg = 1;
158 }
159 else if (!strcmp(*argv,"-passin"))
160 {
161 if (--argc < 1) badarg = 1;
162 passargin= *(++argv);
163 }
164 else if (strcmp(*argv,"-peerform") == 0)
165 {
166 if (--argc < 1) badarg = 1;
167 peerform=str2fmt(*(++argv));
168 }
169 else if (strcmp(*argv,"-keyform") == 0)
170 {
171 if (--argc < 1) badarg = 1;
172 keyform=str2fmt(*(++argv));
173 }
174#ifndef OPENSSL_NO_ENGINE
175 else if(!strcmp(*argv, "-engine"))
176 {
177 if (--argc < 1)
178 badarg = 1;
179 else
180 e = setup_engine(bio_err, *(++argv), 0);
181 }
182#endif
183 else if(!strcmp(*argv, "-pubin"))
184 key_type = KEY_PUBKEY;
185 else if(!strcmp(*argv, "-certin"))
186 key_type = KEY_CERT;
187 else if(!strcmp(*argv, "-asn1parse"))
188 asn1parse = 1;
189 else if(!strcmp(*argv, "-hexdump"))
190 hexdump = 1;
191 else if(!strcmp(*argv, "-sign"))
192 pkey_op = EVP_PKEY_OP_SIGN;
193 else if(!strcmp(*argv, "-verify"))
194 pkey_op = EVP_PKEY_OP_VERIFY;
195 else if(!strcmp(*argv, "-verifyrecover"))
196 pkey_op = EVP_PKEY_OP_VERIFYRECOVER;
197 else if(!strcmp(*argv, "-rev"))
198 rev = 1;
199 else if(!strcmp(*argv, "-encrypt"))
200 pkey_op = EVP_PKEY_OP_ENCRYPT;
201 else if(!strcmp(*argv, "-decrypt"))
202 pkey_op = EVP_PKEY_OP_DECRYPT;
203 else if(!strcmp(*argv, "-derive"))
204 pkey_op = EVP_PKEY_OP_DERIVE;
205 else if (strcmp(*argv,"-pkeyopt") == 0)
206 {
207 if (--argc < 1)
208 badarg = 1;
209 else if (!ctx)
210 {
211 BIO_puts(bio_err,
212 "-pkeyopt command before -inkey\n");
213 badarg = 1;
214 }
215 else if (pkey_ctrl_string(ctx, *(++argv)) <= 0)
216 {
217 BIO_puts(bio_err, "parameter setting error\n");
218 ERR_print_errors(bio_err);
219 goto end;
220 }
221 }
222 else badarg = 1;
223 if(badarg)
224 {
225 usage();
226 goto end;
227 }
228 argc--;
229 argv++;
230 }
231
232 if (!ctx)
233 {
234 usage();
235 goto end;
236 }
237
238 if (sigfile && (pkey_op != EVP_PKEY_OP_VERIFY))
239 {
240 BIO_puts(bio_err, "Signature file specified for non verify\n");
241 goto end;
242 }
243
244 if (!sigfile && (pkey_op == EVP_PKEY_OP_VERIFY))
245 {
246 BIO_puts(bio_err, "No signature file specified for verify\n");
247 goto end;
248 }
249
250/* FIXME: seed PRNG only if needed */
251 app_RAND_load_file(NULL, bio_err, 0);
252
253 if (pkey_op != EVP_PKEY_OP_DERIVE)
254 {
255 if(infile)
256 {
257 if(!(in = BIO_new_file(infile, "rb")))
258 {
259 BIO_puts(bio_err,
260 "Error Opening Input File\n");
261 ERR_print_errors(bio_err);
262 goto end;
263 }
264 }
265 else
266 in = BIO_new_fp(stdin, BIO_NOCLOSE);
267 }
268
269 if(outfile)
270 {
271 if(!(out = BIO_new_file(outfile, "wb")))
272 {
273 BIO_printf(bio_err, "Error Creating Output File\n");
274 ERR_print_errors(bio_err);
275 goto end;
276 }
277 }
278 else
279 {
280 out = BIO_new_fp(stdout, BIO_NOCLOSE);
281#ifdef OPENSSL_SYS_VMS
282 {
283 BIO *tmpbio = BIO_new(BIO_f_linebuffer());
284 out = BIO_push(tmpbio, out);
285 }
286#endif
287 }
288
289 if (sigfile)
290 {
291 BIO *sigbio = BIO_new_file(sigfile, "rb");
292 if (!sigbio)
293 {
294 BIO_printf(bio_err, "Can't open signature file %s\n",
295 sigfile);
296 goto end;
297 }
298 siglen = bio_to_mem(&sig, keysize * 10, sigbio);
299 BIO_free(sigbio);
300 if (siglen <= 0)
301 {
302 BIO_printf(bio_err, "Error reading signature data\n");
303 goto end;
304 }
305 }
306
307 if (in)
308 {
309 /* Read the input data */
310 buf_inlen = bio_to_mem(&buf_in, keysize * 10, in);
311 if(buf_inlen <= 0)
312 {
313 BIO_printf(bio_err, "Error reading input Data\n");
314 exit(1);
315 }
316 if(rev)
317 {
318 size_t i;
319 unsigned char ctmp;
320 size_t l = (size_t)buf_inlen;
321 for(i = 0; i < l/2; i++)
322 {
323 ctmp = buf_in[i];
324 buf_in[i] = buf_in[l - 1 - i];
325 buf_in[l - 1 - i] = ctmp;
326 }
327 }
328 }
329
330 if(pkey_op == EVP_PKEY_OP_VERIFY)
331 {
332 rv = EVP_PKEY_verify(ctx, sig, (size_t)siglen,
333 buf_in, (size_t)buf_inlen);
334 if (rv == 0)
335 BIO_puts(out, "Signature Verification Failure\n");
336 else if (rv == 1)
337 BIO_puts(out, "Signature Verified Successfully\n");
338 if (rv >= 0)
339 goto end;
340 }
341 else
342 {
343 rv = do_keyop(ctx, pkey_op, NULL, (size_t *)&buf_outlen,
344 buf_in, (size_t)buf_inlen);
345 if (rv > 0)
346 {
347 buf_out = OPENSSL_malloc(buf_outlen);
348 if (!buf_out)
349 rv = -1;
350 else
351 rv = do_keyop(ctx, pkey_op,
352 buf_out, (size_t *)&buf_outlen,
353 buf_in, (size_t)buf_inlen);
354 }
355 }
356
357 if(rv <= 0)
358 {
359 BIO_printf(bio_err, "Public Key operation error\n");
360 ERR_print_errors(bio_err);
361 goto end;
362 }
363 ret = 0;
364 if(asn1parse)
365 {
366 if(!ASN1_parse_dump(out, buf_out, buf_outlen, 1, -1))
367 ERR_print_errors(bio_err);
368 }
369 else if(hexdump)
370 BIO_dump(out, (char *)buf_out, buf_outlen);
371 else
372 BIO_write(out, buf_out, buf_outlen);
373
374 end:
375 if (ctx)
376 EVP_PKEY_CTX_free(ctx);
377 BIO_free(in);
378 BIO_free_all(out);
379 if (buf_in)
380 OPENSSL_free(buf_in);
381 if (buf_out)
382 OPENSSL_free(buf_out);
383 if (sig)
384 OPENSSL_free(sig);
385 return ret;
386}
387
388static void usage()
389{
390 BIO_printf(bio_err, "Usage: pkeyutl [options]\n");
391 BIO_printf(bio_err, "-in file input file\n");
392 BIO_printf(bio_err, "-out file output file\n");
393 BIO_printf(bio_err, "-signature file signature file (verify operation only)\n");
394 BIO_printf(bio_err, "-inkey file input key\n");
395 BIO_printf(bio_err, "-keyform arg private key format - default PEM\n");
396 BIO_printf(bio_err, "-pubin input is a public key\n");
397 BIO_printf(bio_err, "-certin input is a certificate carrying a public key\n");
398 BIO_printf(bio_err, "-pkeyopt X:Y public key options\n");
399 BIO_printf(bio_err, "-sign sign with private key\n");
400 BIO_printf(bio_err, "-verify verify with public key\n");
401 BIO_printf(bio_err, "-verifyrecover verify with public key, recover original data\n");
402 BIO_printf(bio_err, "-encrypt encrypt with public key\n");
403 BIO_printf(bio_err, "-decrypt decrypt with private key\n");
404 BIO_printf(bio_err, "-derive derive shared secret\n");
405 BIO_printf(bio_err, "-hexdump hex dump output\n");
406#ifndef OPENSSL_NO_ENGINE
407 BIO_printf(bio_err, "-engine e use engine e, possibly a hardware device.\n");
408#endif
409 BIO_printf(bio_err, "-passin arg pass phrase source\n");
410
411}
412
413static EVP_PKEY_CTX *init_ctx(int *pkeysize,
414 char *keyfile, int keyform, int key_type,
415 char *passargin, int pkey_op, ENGINE *e)
416 {
417 EVP_PKEY *pkey = NULL;
418 EVP_PKEY_CTX *ctx = NULL;
419 char *passin = NULL;
420 int rv = -1;
421 X509 *x;
422 if(((pkey_op == EVP_PKEY_OP_SIGN) || (pkey_op == EVP_PKEY_OP_DECRYPT)
423 || (pkey_op == EVP_PKEY_OP_DERIVE))
424 && (key_type != KEY_PRIVKEY))
425 {
426 BIO_printf(bio_err, "A private key is needed for this operation\n");
427 goto end;
428 }
429 if(!app_passwd(bio_err, passargin, NULL, &passin, NULL))
430 {
431 BIO_printf(bio_err, "Error getting password\n");
432 goto end;
433 }
434 switch(key_type)
435 {
436 case KEY_PRIVKEY:
437 pkey = load_key(bio_err, keyfile, keyform, 0,
438 passin, e, "Private Key");
439 break;
440
441 case KEY_PUBKEY:
442 pkey = load_pubkey(bio_err, keyfile, keyform, 0,
443 NULL, e, "Public Key");
444 break;
445
446 case KEY_CERT:
447 x = load_cert(bio_err, keyfile, keyform,
448 NULL, e, "Certificate");
449 if(x)
450 {
451 pkey = X509_get_pubkey(x);
452 X509_free(x);
453 }
454 break;
455
456 }
457
458 *pkeysize = EVP_PKEY_size(pkey);
459
460 if (!pkey)
461 goto end;
462
463 ctx = EVP_PKEY_CTX_new(pkey, e);
464
465 EVP_PKEY_free(pkey);
466
467 if (!ctx)
468 goto end;
469
470 switch(pkey_op)
471 {
472 case EVP_PKEY_OP_SIGN:
473 rv = EVP_PKEY_sign_init(ctx);
474 break;
475
476 case EVP_PKEY_OP_VERIFY:
477 rv = EVP_PKEY_verify_init(ctx);
478 break;
479
480 case EVP_PKEY_OP_VERIFYRECOVER:
481 rv = EVP_PKEY_verify_recover_init(ctx);
482 break;
483
484 case EVP_PKEY_OP_ENCRYPT:
485 rv = EVP_PKEY_encrypt_init(ctx);
486 break;
487
488 case EVP_PKEY_OP_DECRYPT:
489 rv = EVP_PKEY_decrypt_init(ctx);
490 break;
491
492 case EVP_PKEY_OP_DERIVE:
493 rv = EVP_PKEY_derive_init(ctx);
494 break;
495 }
496
497 if (rv <= 0)
498 {
499 EVP_PKEY_CTX_free(ctx);
500 ctx = NULL;
501 }
502
503 end:
504
505 if (passin)
506 OPENSSL_free(passin);
507
508 return ctx;
509
510
511 }
512
513static int setup_peer(BIO *err, EVP_PKEY_CTX *ctx, int peerform,
514 const char *file)
515 {
516 EVP_PKEY *peer = NULL;
517 int ret;
518 if (!ctx)
519 {
520 BIO_puts(err, "-peerkey command before -inkey\n");
521 return 0;
522 }
523
524 peer = load_pubkey(bio_err, file, peerform, 0, NULL, NULL, "Peer Key");
525
526 if (!peer)
527 {
528 BIO_printf(bio_err, "Error reading peer key %s\n", file);
529 ERR_print_errors(err);
530 return 0;
531 }
532
533 ret = EVP_PKEY_derive_set_peer(ctx, peer);
534
535 EVP_PKEY_free(peer);
536 if (ret <= 0)
537 ERR_print_errors(err);
538 return ret;
539 }
540
541static int do_keyop(EVP_PKEY_CTX *ctx, int pkey_op,
542 unsigned char *out, size_t *poutlen,
543 unsigned char *in, size_t inlen)
544 {
545 int rv = 0;
546 switch(pkey_op)
547 {
548 case EVP_PKEY_OP_VERIFYRECOVER:
549 rv = EVP_PKEY_verify_recover(ctx, out, poutlen, in, inlen);
550 break;
551
552 case EVP_PKEY_OP_SIGN:
553 rv = EVP_PKEY_sign(ctx, out, poutlen, in, inlen);
554 break;
555
556 case EVP_PKEY_OP_ENCRYPT:
557 rv = EVP_PKEY_encrypt(ctx, out, poutlen, in, inlen);
558 break;
559
560 case EVP_PKEY_OP_DECRYPT:
561 rv = EVP_PKEY_decrypt(ctx, out, poutlen, in, inlen);
562 break;
563
564 case EVP_PKEY_OP_DERIVE:
565 rv = EVP_PKEY_derive(ctx, out, poutlen);
566 break;
567
568 }
569 return rv;
570 }
diff --git a/src/lib/libssl/src/apps/ts.c b/src/lib/libssl/src/apps/ts.c
new file mode 100644
index 0000000000..74e7e932b3
--- /dev/null
+++ b/src/lib/libssl/src/apps/ts.c
@@ -0,0 +1,1144 @@
1/* apps/ts.c */
2/* Written by Zoltan Glozik (zglozik@stones.com) for the OpenSSL
3 * project 2002.
4 */
5/* ====================================================================
6 * Copyright (c) 2001 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62#include "apps.h"
63#include <openssl/bio.h>
64#include <openssl/err.h>
65#include <openssl/pem.h>
66#include <openssl/rand.h>
67#include <openssl/ts.h>
68#include <openssl/bn.h>
69
70#undef PROG
71#define PROG ts_main
72
73/* Length of the nonce of the request in bits (must be a multiple of 8). */
74#define NONCE_LENGTH 64
75
76/* Macro definitions for the configuration file. */
77#define ENV_OID_FILE "oid_file"
78
79/* Local function declarations. */
80
81static ASN1_OBJECT *txt2obj(const char *oid);
82static CONF *load_config_file(const char *configfile);
83
84/* Query related functions. */
85static int query_command(const char *data, char *digest,
86 const EVP_MD *md, const char *policy, int no_nonce,
87 int cert, const char *in, const char *out, int text);
88static BIO *BIO_open_with_default(const char *file, const char *mode,
89 FILE *default_fp);
90static TS_REQ *create_query(BIO *data_bio, char *digest, const EVP_MD *md,
91 const char *policy, int no_nonce, int cert);
92static int create_digest(BIO *input, char *digest,
93 const EVP_MD *md, unsigned char **md_value);
94static ASN1_INTEGER *create_nonce(int bits);
95
96/* Reply related functions. */
97static int reply_command(CONF *conf, char *section, char *engine,
98 char *queryfile, char *passin, char *inkey,
99 char *signer, char *chain, const char *policy,
100 char *in, int token_in, char *out, int token_out,
101 int text);
102static TS_RESP *read_PKCS7(BIO *in_bio);
103static TS_RESP *create_response(CONF *conf, const char *section, char *engine,
104 char *queryfile, char *passin, char *inkey,
105 char *signer, char *chain, const char *policy);
106static ASN1_INTEGER * MS_CALLBACK serial_cb(TS_RESP_CTX *ctx, void *data);
107static ASN1_INTEGER *next_serial(const char *serialfile);
108static int save_ts_serial(const char *serialfile, ASN1_INTEGER *serial);
109
110/* Verify related functions. */
111static int verify_command(char *data, char *digest, char *queryfile,
112 char *in, int token_in,
113 char *ca_path, char *ca_file, char *untrusted);
114static TS_VERIFY_CTX *create_verify_ctx(char *data, char *digest,
115 char *queryfile,
116 char *ca_path, char *ca_file,
117 char *untrusted);
118static X509_STORE *create_cert_store(char *ca_path, char *ca_file);
119static int MS_CALLBACK verify_cb(int ok, X509_STORE_CTX *ctx);
120
121/* Main function definition. */
122int MAIN(int, char **);
123
124int MAIN(int argc, char **argv)
125 {
126 int ret = 1;
127 char *configfile = NULL;
128 char *section = NULL;
129 CONF *conf = NULL;
130 enum mode {
131 CMD_NONE, CMD_QUERY, CMD_REPLY, CMD_VERIFY
132 } mode = CMD_NONE;
133 char *data = NULL;
134 char *digest = NULL;
135 const EVP_MD *md = NULL;
136 char *rnd = NULL;
137 char *policy = NULL;
138 int no_nonce = 0;
139 int cert = 0;
140 char *in = NULL;
141 char *out = NULL;
142 int text = 0;
143 char *queryfile = NULL;
144 char *passin = NULL; /* Password source. */
145 char *password =NULL; /* Password itself. */
146 char *inkey = NULL;
147 char *signer = NULL;
148 char *chain = NULL;
149 char *ca_path = NULL;
150 char *ca_file = NULL;
151 char *untrusted = NULL;
152 char *engine = NULL;
153 /* Input is ContentInfo instead of TimeStampResp. */
154 int token_in = 0;
155 /* Output is ContentInfo instead of TimeStampResp. */
156 int token_out = 0;
157 int free_bio_err = 0;
158
159 ERR_load_crypto_strings();
160 apps_startup();
161
162 if (bio_err == NULL && (bio_err = BIO_new(BIO_s_file())) != NULL)
163 {
164 free_bio_err = 1;
165 BIO_set_fp(bio_err, stderr, BIO_NOCLOSE | BIO_FP_TEXT);
166 }
167
168 for (argc--, argv++; argc > 0; argc--, argv++)
169 {
170 if (strcmp(*argv, "-config") == 0)
171 {
172 if (argc-- < 1) goto usage;
173 configfile = *++argv;
174 }
175 else if (strcmp(*argv, "-section") == 0)
176 {
177 if (argc-- < 1) goto usage;
178 section = *++argv;
179 }
180 else if (strcmp(*argv, "-query") == 0)
181 {
182 if (mode != CMD_NONE) goto usage;
183 mode = CMD_QUERY;
184 }
185 else if (strcmp(*argv, "-data") == 0)
186 {
187 if (argc-- < 1) goto usage;
188 data = *++argv;
189 }
190 else if (strcmp(*argv, "-digest") == 0)
191 {
192 if (argc-- < 1) goto usage;
193 digest = *++argv;
194 }
195 else if (strcmp(*argv, "-rand") == 0)
196 {
197 if (argc-- < 1) goto usage;
198 rnd = *++argv;
199 }
200 else if (strcmp(*argv, "-policy") == 0)
201 {
202 if (argc-- < 1) goto usage;
203 policy = *++argv;
204 }
205 else if (strcmp(*argv, "-no_nonce") == 0)
206 {
207 no_nonce = 1;
208 }
209 else if (strcmp(*argv, "-cert") == 0)
210 {
211 cert = 1;
212 }
213 else if (strcmp(*argv, "-in") == 0)
214 {
215 if (argc-- < 1) goto usage;
216 in = *++argv;
217 }
218 else if (strcmp(*argv, "-token_in") == 0)
219 {
220 token_in = 1;
221 }
222 else if (strcmp(*argv, "-out") == 0)
223 {
224 if (argc-- < 1) goto usage;
225 out = *++argv;
226 }
227 else if (strcmp(*argv, "-token_out") == 0)
228 {
229 token_out = 1;
230 }
231 else if (strcmp(*argv, "-text") == 0)
232 {
233 text = 1;
234 }
235 else if (strcmp(*argv, "-reply") == 0)
236 {
237 if (mode != CMD_NONE) goto usage;
238 mode = CMD_REPLY;
239 }
240 else if (strcmp(*argv, "-queryfile") == 0)
241 {
242 if (argc-- < 1) goto usage;
243 queryfile = *++argv;
244 }
245 else if (strcmp(*argv, "-passin") == 0)
246 {
247 if (argc-- < 1) goto usage;
248 passin = *++argv;
249 }
250 else if (strcmp(*argv, "-inkey") == 0)
251 {
252 if (argc-- < 1) goto usage;
253 inkey = *++argv;
254 }
255 else if (strcmp(*argv, "-signer") == 0)
256 {
257 if (argc-- < 1) goto usage;
258 signer = *++argv;
259 }
260 else if (strcmp(*argv, "-chain") == 0)
261 {
262 if (argc-- < 1) goto usage;
263 chain = *++argv;
264 }
265 else if (strcmp(*argv, "-verify") == 0)
266 {
267 if (mode != CMD_NONE) goto usage;
268 mode = CMD_VERIFY;
269 }
270 else if (strcmp(*argv, "-CApath") == 0)
271 {
272 if (argc-- < 1) goto usage;
273 ca_path = *++argv;
274 }
275 else if (strcmp(*argv, "-CAfile") == 0)
276 {
277 if (argc-- < 1) goto usage;
278 ca_file = *++argv;
279 }
280 else if (strcmp(*argv, "-untrusted") == 0)
281 {
282 if (argc-- < 1) goto usage;
283 untrusted = *++argv;
284 }
285 else if (strcmp(*argv, "-engine") == 0)
286 {
287 if (argc-- < 1) goto usage;
288 engine = *++argv;
289 }
290 else if ((md = EVP_get_digestbyname(*argv + 1)) != NULL)
291 {
292 /* empty. */
293 }
294 else
295 goto usage;
296 }
297
298 /* Seed the random number generator if it is going to be used. */
299 if (mode == CMD_QUERY && !no_nonce)
300 {
301 if (!app_RAND_load_file(NULL, bio_err, 1) && rnd == NULL)
302 BIO_printf(bio_err, "warning, not much extra random "
303 "data, consider using the -rand option\n");
304 if (rnd != NULL)
305 BIO_printf(bio_err,"%ld semi-random bytes loaded\n",
306 app_RAND_load_files(rnd));
307 }
308
309 /* Get the password if required. */
310 if(mode == CMD_REPLY && passin &&
311 !app_passwd(bio_err, passin, NULL, &password, NULL))
312 {
313 BIO_printf(bio_err,"Error getting password.\n");
314 goto cleanup;
315 }
316
317 /* Check consistency of parameters and execute
318 the appropriate function. */
319 switch (mode)
320 {
321 case CMD_NONE:
322 goto usage;
323 case CMD_QUERY:
324 /* Data file and message imprint cannot be specified
325 at the same time. */
326 ret = data != NULL && digest != NULL;
327 if (ret) goto usage;
328 /* Load the config file for possible policy OIDs. */
329 conf = load_config_file(configfile);
330 ret = !query_command(data, digest, md, policy, no_nonce, cert,
331 in, out, text);
332 break;
333 case CMD_REPLY:
334 conf = load_config_file(configfile);
335 if (in == NULL)
336 {
337 ret = !(queryfile != NULL && conf != NULL && !token_in);
338 if (ret) goto usage;
339 }
340 else
341 {
342 /* 'in' and 'queryfile' are exclusive. */
343 ret = !(queryfile == NULL);
344 if (ret) goto usage;
345 }
346
347 ret = !reply_command(conf, section, engine, queryfile,
348 password, inkey, signer, chain, policy,
349 in, token_in, out, token_out, text);
350 break;
351 case CMD_VERIFY:
352 ret = !(((queryfile && !data && !digest)
353 || (!queryfile && data && !digest)
354 || (!queryfile && !data && digest))
355 && in != NULL);
356 if (ret) goto usage;
357
358 ret = !verify_command(data, digest, queryfile, in, token_in,
359 ca_path, ca_file, untrusted);
360 }
361
362 goto cleanup;
363
364 usage:
365 BIO_printf(bio_err, "usage:\n"
366 "ts -query [-rand file%cfile%c...] [-config configfile] "
367 "[-data file_to_hash] [-digest digest_bytes]"
368 "[-md2|-md4|-md5|-sha|-sha1|-mdc2|-ripemd160] "
369 "[-policy object_id] [-no_nonce] [-cert] "
370 "[-in request.tsq] [-out request.tsq] [-text]\n",
371 LIST_SEPARATOR_CHAR, LIST_SEPARATOR_CHAR);
372 BIO_printf(bio_err, "or\n"
373 "ts -reply [-config configfile] [-section tsa_section] "
374 "[-queryfile request.tsq] [-passin password] "
375 "[-signer tsa_cert.pem] [-inkey private_key.pem] "
376 "[-chain certs_file.pem] [-policy object_id] "
377 "[-in response.tsr] [-token_in] "
378 "[-out response.tsr] [-token_out] [-text] [-engine id]\n");
379 BIO_printf(bio_err, "or\n"
380 "ts -verify [-data file_to_hash] [-digest digest_bytes] "
381 "[-queryfile request.tsq] "
382 "-in response.tsr [-token_in] "
383 "-CApath ca_path -CAfile ca_file.pem "
384 "-untrusted cert_file.pem\n");
385 cleanup:
386 /* Clean up. */
387 app_RAND_write_file(NULL, bio_err);
388 NCONF_free(conf);
389 OPENSSL_free(password);
390 OBJ_cleanup();
391 if (free_bio_err)
392 {
393 BIO_free_all(bio_err);
394 bio_err = NULL;
395 }
396
397 OPENSSL_EXIT(ret);
398 }
399
400/*
401 * Configuration file-related function definitions.
402 */
403
404static ASN1_OBJECT *txt2obj(const char *oid)
405 {
406 ASN1_OBJECT *oid_obj = NULL;
407
408 if (!(oid_obj = OBJ_txt2obj(oid, 0)))
409 BIO_printf(bio_err, "cannot convert %s to OID\n", oid);
410
411 return oid_obj;
412 }
413
414static CONF *load_config_file(const char *configfile)
415 {
416 CONF *conf = NULL;
417 long errorline = -1;
418
419 if (!configfile) configfile = getenv("OPENSSL_CONF");
420 if (!configfile) configfile = getenv("SSLEAY_CONF");
421
422 if (configfile &&
423 (!(conf = NCONF_new(NULL)) ||
424 NCONF_load(conf, configfile, &errorline) <= 0))
425 {
426 if (errorline <= 0)
427 BIO_printf(bio_err, "error loading the config file "
428 "'%s'\n", configfile);
429 else
430 BIO_printf(bio_err, "error on line %ld of config file "
431 "'%s'\n", errorline, configfile);
432 }
433
434 if (conf != NULL)
435 {
436 const char *p;
437
438 BIO_printf(bio_err,"Using configuration from %s\n", configfile);
439 p = NCONF_get_string(conf, NULL, ENV_OID_FILE);
440 if (p != NULL)
441 {
442 BIO *oid_bio = BIO_new_file(p, "r");
443 if (!oid_bio)
444 ERR_print_errors(bio_err);
445 else
446 {
447 OBJ_create_objects(oid_bio);
448 BIO_free_all(oid_bio);
449 }
450 }
451 else
452 ERR_clear_error();
453 if(!add_oid_section(bio_err, conf))
454 ERR_print_errors(bio_err);
455 }
456 return conf;
457 }
458
459/*
460 * Query-related method definitions.
461 */
462
463static int query_command(const char *data, char *digest, const EVP_MD *md,
464 const char *policy, int no_nonce,
465 int cert, const char *in, const char *out, int text)
466 {
467 int ret = 0;
468 TS_REQ *query = NULL;
469 BIO *in_bio = NULL;
470 BIO *data_bio = NULL;
471 BIO *out_bio = NULL;
472
473 /* Build query object either from file or from scratch. */
474 if (in != NULL)
475 {
476 if ((in_bio = BIO_new_file(in, "rb")) == NULL) goto end;
477 query = d2i_TS_REQ_bio(in_bio, NULL);
478 }
479 else
480 {
481 /* Open the file if no explicit digest bytes were specified. */
482 if (!digest
483 && !(data_bio = BIO_open_with_default(data, "rb", stdin)))
484 goto end;
485 /* Creating the query object. */
486 query = create_query(data_bio, digest, md,
487 policy, no_nonce, cert);
488 /* Saving the random number generator state. */
489 }
490 if (query == NULL) goto end;
491
492 /* Write query either in ASN.1 or in text format. */
493 if ((out_bio = BIO_open_with_default(out, "wb", stdout)) == NULL)
494 goto end;
495 if (text)
496 {
497 /* Text output. */
498 if (!TS_REQ_print_bio(out_bio, query))
499 goto end;
500 }
501 else
502 {
503 /* ASN.1 output. */
504 if (!i2d_TS_REQ_bio(out_bio, query))
505 goto end;
506 }
507
508 ret = 1;
509
510 end:
511 ERR_print_errors(bio_err);
512
513 /* Clean up. */
514 BIO_free_all(in_bio);
515 BIO_free_all(data_bio);
516 BIO_free_all(out_bio);
517 TS_REQ_free(query);
518
519 return ret;
520 }
521
522static BIO *BIO_open_with_default(const char *file, const char *mode,
523 FILE *default_fp)
524 {
525 return file == NULL ?
526 BIO_new_fp(default_fp, BIO_NOCLOSE)
527 : BIO_new_file(file, mode);
528 }
529
530static TS_REQ *create_query(BIO *data_bio, char *digest, const EVP_MD *md,
531 const char *policy, int no_nonce, int cert)
532 {
533 int ret = 0;
534 TS_REQ *ts_req = NULL;
535 int len;
536 TS_MSG_IMPRINT *msg_imprint = NULL;
537 X509_ALGOR *algo = NULL;
538 unsigned char *data = NULL;
539 ASN1_OBJECT *policy_obj = NULL;
540 ASN1_INTEGER *nonce_asn1 = NULL;
541
542 /* Setting default message digest. */
543 if (!md && !(md = EVP_get_digestbyname("sha1"))) goto err;
544
545 /* Creating request object. */
546 if (!(ts_req = TS_REQ_new())) goto err;
547
548 /* Setting version. */
549 if (!TS_REQ_set_version(ts_req, 1)) goto err;
550
551 /* Creating and adding MSG_IMPRINT object. */
552 if (!(msg_imprint = TS_MSG_IMPRINT_new())) goto err;
553
554 /* Adding algorithm. */
555 if (!(algo = X509_ALGOR_new())) goto err;
556 if (!(algo->algorithm = OBJ_nid2obj(EVP_MD_type(md)))) goto err;
557 if (!(algo->parameter = ASN1_TYPE_new())) goto err;
558 algo->parameter->type = V_ASN1_NULL;
559 if (!TS_MSG_IMPRINT_set_algo(msg_imprint, algo)) goto err;
560
561 /* Adding message digest. */
562 if ((len = create_digest(data_bio, digest, md, &data)) == 0)
563 goto err;
564 if (!TS_MSG_IMPRINT_set_msg(msg_imprint, data, len)) goto err;
565
566 if (!TS_REQ_set_msg_imprint(ts_req, msg_imprint)) goto err;
567
568 /* Setting policy if requested. */
569 if (policy && !(policy_obj = txt2obj(policy))) goto err;
570 if (policy_obj && !TS_REQ_set_policy_id(ts_req, policy_obj)) goto err;
571
572 /* Setting nonce if requested. */
573 if (!no_nonce && !(nonce_asn1 = create_nonce(NONCE_LENGTH))) goto err;
574 if (nonce_asn1 && !TS_REQ_set_nonce(ts_req, nonce_asn1)) goto err;
575
576 /* Setting certificate request flag if requested. */
577 if (!TS_REQ_set_cert_req(ts_req, cert)) goto err;
578
579 ret = 1;
580 err:
581 if (!ret)
582 {
583 TS_REQ_free(ts_req);
584 ts_req = NULL;
585 BIO_printf(bio_err, "could not create query\n");
586 }
587 TS_MSG_IMPRINT_free(msg_imprint);
588 X509_ALGOR_free(algo);
589 OPENSSL_free(data);
590 ASN1_OBJECT_free(policy_obj);
591 ASN1_INTEGER_free(nonce_asn1);
592 return ts_req;
593 }
594
595static int create_digest(BIO *input, char *digest, const EVP_MD *md,
596 unsigned char **md_value)
597 {
598 int md_value_len;
599
600 md_value_len = EVP_MD_size(md);
601 if (md_value_len < 0)
602 goto err;
603 if (input)
604 {
605 /* Digest must be computed from an input file. */
606 EVP_MD_CTX md_ctx;
607 unsigned char buffer[4096];
608 int length;
609
610 *md_value = OPENSSL_malloc(md_value_len);
611 if (*md_value == 0) goto err;
612
613 EVP_DigestInit(&md_ctx, md);
614 while ((length = BIO_read(input, buffer, sizeof(buffer))) > 0)
615 {
616 EVP_DigestUpdate(&md_ctx, buffer, length);
617 }
618 EVP_DigestFinal(&md_ctx, *md_value, NULL);
619 }
620 else
621 {
622 /* Digest bytes are specified with digest. */
623 long digest_len;
624 *md_value = string_to_hex(digest, &digest_len);
625 if (!*md_value || md_value_len != digest_len)
626 {
627 OPENSSL_free(*md_value);
628 *md_value = NULL;
629 BIO_printf(bio_err, "bad digest, %d bytes "
630 "must be specified\n", md_value_len);
631 goto err;
632 }
633 }
634
635 return md_value_len;
636 err:
637 return 0;
638 }
639
640static ASN1_INTEGER *create_nonce(int bits)
641 {
642 unsigned char buf[20];
643 ASN1_INTEGER *nonce = NULL;
644 int len = (bits - 1) / 8 + 1;
645 int i;
646
647 /* Generating random byte sequence. */
648 if (len > (int)sizeof(buf)) goto err;
649 if (!RAND_bytes(buf, len)) goto err;
650
651 /* Find the first non-zero byte and creating ASN1_INTEGER object. */
652 for (i = 0; i < len && !buf[i]; ++i);
653 if (!(nonce = ASN1_INTEGER_new())) goto err;
654 OPENSSL_free(nonce->data);
655 /* Allocate at least one byte. */
656 nonce->length = len - i;
657 if (!(nonce->data = OPENSSL_malloc(nonce->length + 1))) goto err;
658 memcpy(nonce->data, buf + i, nonce->length);
659
660 return nonce;
661 err:
662 BIO_printf(bio_err, "could not create nonce\n");
663 ASN1_INTEGER_free(nonce);
664 return NULL;
665 }
666/*
667 * Reply-related method definitions.
668 */
669
670static int reply_command(CONF *conf, char *section, char *engine,
671 char *queryfile, char *passin, char *inkey,
672 char *signer, char *chain, const char *policy,
673 char *in, int token_in,
674 char *out, int token_out, int text)
675 {
676 int ret = 0;
677 TS_RESP *response = NULL;
678 BIO *in_bio = NULL;
679 BIO *query_bio = NULL;
680 BIO *inkey_bio = NULL;
681 BIO *signer_bio = NULL;
682 BIO *out_bio = NULL;
683
684 /* Build response object either from response or query. */
685 if (in != NULL)
686 {
687 if ((in_bio = BIO_new_file(in, "rb")) == NULL) goto end;
688 if (token_in)
689 {
690 /* We have a ContentInfo (PKCS7) object, add
691 'granted' status info around it. */
692 response = read_PKCS7(in_bio);
693 }
694 else
695 {
696 /* We have a ready-made TS_RESP object. */
697 response = d2i_TS_RESP_bio(in_bio, NULL);
698 }
699 }
700 else
701 {
702 response = create_response(conf, section, engine, queryfile,
703 passin, inkey, signer, chain,
704 policy);
705 if (response)
706 BIO_printf(bio_err, "Response has been generated.\n");
707 else
708 BIO_printf(bio_err, "Response is not generated.\n");
709 }
710 if (response == NULL) goto end;
711
712 /* Write response either in ASN.1 or text format. */
713 if ((out_bio = BIO_open_with_default(out, "wb", stdout)) == NULL)
714 goto end;
715 if (text)
716 {
717 /* Text output. */
718 if (token_out)
719 {
720 TS_TST_INFO *tst_info = TS_RESP_get_tst_info(response);
721 if (!TS_TST_INFO_print_bio(out_bio, tst_info)) goto end;
722 }
723 else
724 {
725 if (!TS_RESP_print_bio(out_bio, response)) goto end;
726 }
727 }
728 else
729 {
730 /* ASN.1 DER output. */
731 if (token_out)
732 {
733 PKCS7 *token = TS_RESP_get_token(response);
734 if (!i2d_PKCS7_bio(out_bio, token)) goto end;
735 }
736 else
737 {
738 if (!i2d_TS_RESP_bio(out_bio, response)) goto end;
739 }
740 }
741
742 ret = 1;
743
744 end:
745 ERR_print_errors(bio_err);
746
747 /* Clean up. */
748 BIO_free_all(in_bio);
749 BIO_free_all(query_bio);
750 BIO_free_all(inkey_bio);
751 BIO_free_all(signer_bio);
752 BIO_free_all(out_bio);
753 TS_RESP_free(response);
754
755 return ret;
756 }
757
758/* Reads a PKCS7 token and adds default 'granted' status info to it. */
759static TS_RESP *read_PKCS7(BIO *in_bio)
760 {
761 int ret = 0;
762 PKCS7 *token = NULL;
763 TS_TST_INFO *tst_info = NULL;
764 TS_RESP *resp = NULL;
765 TS_STATUS_INFO *si = NULL;
766
767 /* Read PKCS7 object and extract the signed time stamp info. */
768 if (!(token = d2i_PKCS7_bio(in_bio, NULL))) goto end;
769 if (!(tst_info = PKCS7_to_TS_TST_INFO(token))) goto end;
770
771 /* Creating response object. */
772 if (!(resp = TS_RESP_new())) goto end;
773
774 /* Create granted status info. */
775 if (!(si = TS_STATUS_INFO_new())) goto end;
776 if (!(ASN1_INTEGER_set(si->status, TS_STATUS_GRANTED))) goto end;
777 if (!TS_RESP_set_status_info(resp, si)) goto end;
778
779 /* Setting encapsulated token. */
780 TS_RESP_set_tst_info(resp, token, tst_info);
781 token = NULL; /* Ownership is lost. */
782 tst_info = NULL; /* Ownership is lost. */
783
784 ret = 1;
785 end:
786 PKCS7_free(token);
787 TS_TST_INFO_free(tst_info);
788 if (!ret)
789 {
790 TS_RESP_free(resp);
791 resp = NULL;
792 }
793 TS_STATUS_INFO_free(si);
794 return resp;
795 }
796
797static TS_RESP *create_response(CONF *conf, const char *section, char *engine,
798 char *queryfile, char *passin, char *inkey,
799 char *signer, char *chain, const char *policy)
800 {
801 int ret = 0;
802 TS_RESP *response = NULL;
803 BIO *query_bio = NULL;
804 TS_RESP_CTX *resp_ctx = NULL;
805
806 if (!(query_bio = BIO_new_file(queryfile, "rb")))
807 goto end;
808
809 /* Getting TSA configuration section. */
810 if (!(section = TS_CONF_get_tsa_section(conf, section)))
811 goto end;
812
813 /* Setting up response generation context. */
814 if (!(resp_ctx = TS_RESP_CTX_new())) goto end;
815
816 /* Setting serial number provider callback. */
817 if (!TS_CONF_set_serial(conf, section, serial_cb, resp_ctx)) goto end;
818#ifndef OPENSSL_NO_ENGINE
819 /* Setting default OpenSSL engine. */
820 if (!TS_CONF_set_crypto_device(conf, section, engine)) goto end;
821#endif
822
823 /* Setting TSA signer certificate. */
824 if (!TS_CONF_set_signer_cert(conf, section, signer, resp_ctx)) goto end;
825
826 /* Setting TSA signer certificate chain. */
827 if (!TS_CONF_set_certs(conf, section, chain, resp_ctx)) goto end;
828
829 /* Setting TSA signer private key. */
830 if (!TS_CONF_set_signer_key(conf, section, inkey, passin, resp_ctx))
831 goto end;
832
833 /* Setting default policy OID. */
834 if (!TS_CONF_set_def_policy(conf, section, policy, resp_ctx)) goto end;
835
836 /* Setting acceptable policy OIDs. */
837 if (!TS_CONF_set_policies(conf, section, resp_ctx)) goto end;
838
839 /* Setting the acceptable one-way hash algorithms. */
840 if (!TS_CONF_set_digests(conf, section, resp_ctx)) goto end;
841
842 /* Setting guaranteed time stamp accuracy. */
843 if (!TS_CONF_set_accuracy(conf, section, resp_ctx)) goto end;
844
845 /* Setting the precision of the time. */
846 if (!TS_CONF_set_clock_precision_digits(conf, section, resp_ctx))
847 goto end;
848
849 /* Setting the ordering flaf if requested. */
850 if (!TS_CONF_set_ordering(conf, section, resp_ctx)) goto end;
851
852 /* Setting the TSA name required flag if requested. */
853 if (!TS_CONF_set_tsa_name(conf, section, resp_ctx)) goto end;
854
855 /* Setting the ESS cert id chain flag if requested. */
856 if (!TS_CONF_set_ess_cert_id_chain(conf, section, resp_ctx)) goto end;
857
858 /* Creating the response. */
859 if (!(response = TS_RESP_create_response(resp_ctx, query_bio)))
860 goto end;
861
862 ret = 1;
863 end:
864 if (!ret)
865 {
866 TS_RESP_free(response);
867 response = NULL;
868 }
869 TS_RESP_CTX_free(resp_ctx);
870 BIO_free_all(query_bio);
871
872 return response;
873 }
874
875static ASN1_INTEGER * MS_CALLBACK serial_cb(TS_RESP_CTX *ctx, void *data)
876 {
877 const char *serial_file = (const char *) data;
878 ASN1_INTEGER *serial = next_serial(serial_file);
879
880 if (!serial)
881 {
882 TS_RESP_CTX_set_status_info(ctx, TS_STATUS_REJECTION,
883 "Error during serial number "
884 "generation.");
885 TS_RESP_CTX_add_failure_info(ctx,
886 TS_INFO_ADD_INFO_NOT_AVAILABLE);
887 }
888 else
889 save_ts_serial(serial_file, serial);
890
891 return serial;
892 }
893
894static ASN1_INTEGER *next_serial(const char *serialfile)
895 {
896 int ret = 0;
897 BIO *in = NULL;
898 ASN1_INTEGER *serial = NULL;
899 BIGNUM *bn = NULL;
900
901 if (!(serial = ASN1_INTEGER_new())) goto err;
902
903 if (!(in = BIO_new_file(serialfile, "r")))
904 {
905 ERR_clear_error();
906 BIO_printf(bio_err, "Warning: could not open file %s for "
907 "reading, using serial number: 1\n", serialfile);
908 if (!ASN1_INTEGER_set(serial, 1)) goto err;
909 }
910 else
911 {
912 char buf[1024];
913 if (!a2i_ASN1_INTEGER(in, serial, buf, sizeof(buf)))
914 {
915 BIO_printf(bio_err, "unable to load number from %s\n",
916 serialfile);
917 goto err;
918 }
919 if (!(bn = ASN1_INTEGER_to_BN(serial, NULL))) goto err;
920 ASN1_INTEGER_free(serial);
921 serial = NULL;
922 if (!BN_add_word(bn, 1)) goto err;
923 if (!(serial = BN_to_ASN1_INTEGER(bn, NULL))) goto err;
924 }
925 ret = 1;
926 err:
927 if (!ret)
928 {
929 ASN1_INTEGER_free(serial);
930 serial = NULL;
931 }
932 BIO_free_all(in);
933 BN_free(bn);
934 return serial;
935 }
936
937static int save_ts_serial(const char *serialfile, ASN1_INTEGER *serial)
938 {
939 int ret = 0;
940 BIO *out = NULL;
941
942 if (!(out = BIO_new_file(serialfile, "w"))) goto err;
943 if (i2a_ASN1_INTEGER(out, serial) <= 0) goto err;
944 if (BIO_puts(out, "\n") <= 0) goto err;
945 ret = 1;
946 err:
947 if (!ret)
948 BIO_printf(bio_err, "could not save serial number to %s\n",
949 serialfile);
950 BIO_free_all(out);
951 return ret;
952 }
953
954/*
955 * Verify-related method definitions.
956 */
957
958static int verify_command(char *data, char *digest, char *queryfile,
959 char *in, int token_in,
960 char *ca_path, char *ca_file, char *untrusted)
961 {
962 BIO *in_bio = NULL;
963 PKCS7 *token = NULL;
964 TS_RESP *response = NULL;
965 TS_VERIFY_CTX *verify_ctx = NULL;
966 int ret = 0;
967
968 /* Decode the token (PKCS7) or response (TS_RESP) files. */
969 if (!(in_bio = BIO_new_file(in, "rb"))) goto end;
970 if (token_in)
971 {
972 if (!(token = d2i_PKCS7_bio(in_bio, NULL))) goto end;
973 }
974 else
975 {
976 if (!(response = d2i_TS_RESP_bio(in_bio, NULL))) goto end;
977 }
978
979 if (!(verify_ctx = create_verify_ctx(data, digest, queryfile,
980 ca_path, ca_file, untrusted)))
981 goto end;
982
983 /* Checking the token or response against the request. */
984 ret = token_in ?
985 TS_RESP_verify_token(verify_ctx, token) :
986 TS_RESP_verify_response(verify_ctx, response);
987
988 end:
989 printf("Verification: ");
990 if (ret)
991 printf("OK\n");
992 else
993 {
994 printf("FAILED\n");
995 /* Print errors, if there are any. */
996 ERR_print_errors(bio_err);
997 }
998
999 /* Clean up. */
1000 BIO_free_all(in_bio);
1001 PKCS7_free(token);
1002 TS_RESP_free(response);
1003 TS_VERIFY_CTX_free(verify_ctx);
1004 return ret;
1005 }
1006
1007static TS_VERIFY_CTX *create_verify_ctx(char *data, char *digest,
1008 char *queryfile,
1009 char *ca_path, char *ca_file,
1010 char *untrusted)
1011 {
1012 TS_VERIFY_CTX *ctx = NULL;
1013 BIO *input = NULL;
1014 TS_REQ *request = NULL;
1015 int ret = 0;
1016
1017 if (data != NULL || digest != NULL)
1018 {
1019 if (!(ctx = TS_VERIFY_CTX_new())) goto err;
1020 ctx->flags = TS_VFY_VERSION | TS_VFY_SIGNER;
1021 if (data != NULL)
1022 {
1023 ctx->flags |= TS_VFY_DATA;
1024 if (!(ctx->data = BIO_new_file(data, "rb"))) goto err;
1025 }
1026 else if (digest != NULL)
1027 {
1028 long imprint_len;
1029 ctx->flags |= TS_VFY_IMPRINT;
1030 if (!(ctx->imprint = string_to_hex(digest,
1031 &imprint_len)))
1032 {
1033 BIO_printf(bio_err, "invalid digest string\n");
1034 goto err;
1035 }
1036 ctx->imprint_len = imprint_len;
1037 }
1038
1039 }
1040 else if (queryfile != NULL)
1041 {
1042 /* The request has just to be read, decoded and converted to
1043 a verify context object. */
1044 if (!(input = BIO_new_file(queryfile, "rb"))) goto err;
1045 if (!(request = d2i_TS_REQ_bio(input, NULL))) goto err;
1046 if (!(ctx = TS_REQ_to_TS_VERIFY_CTX(request, NULL))) goto err;
1047 }
1048 else
1049 return NULL;
1050
1051 /* Add the signature verification flag and arguments. */
1052 ctx->flags |= TS_VFY_SIGNATURE;
1053
1054 /* Initialising the X509_STORE object. */
1055 if (!(ctx->store = create_cert_store(ca_path, ca_file))) goto err;
1056
1057 /* Loading untrusted certificates. */
1058 if (untrusted && !(ctx->certs = TS_CONF_load_certs(untrusted)))
1059 goto err;
1060
1061 ret = 1;
1062 err:
1063 if (!ret)
1064 {
1065 TS_VERIFY_CTX_free(ctx);
1066 ctx = NULL;
1067 }
1068 BIO_free_all(input);
1069 TS_REQ_free(request);
1070 return ctx;
1071 }
1072
1073static X509_STORE *create_cert_store(char *ca_path, char *ca_file)
1074 {
1075 X509_STORE *cert_ctx = NULL;
1076 X509_LOOKUP *lookup = NULL;
1077 int i;
1078
1079 /* Creating the X509_STORE object. */
1080 cert_ctx = X509_STORE_new();
1081
1082 /* Setting the callback for certificate chain verification. */
1083 X509_STORE_set_verify_cb_func(cert_ctx, verify_cb);
1084
1085 /* Adding a trusted certificate directory source. */
1086 if (ca_path)
1087 {
1088 lookup = X509_STORE_add_lookup(cert_ctx,
1089 X509_LOOKUP_hash_dir());
1090 if (lookup == NULL)
1091 {
1092 BIO_printf(bio_err, "memory allocation failure\n");
1093 goto err;
1094 }
1095 i = X509_LOOKUP_add_dir(lookup, ca_path, X509_FILETYPE_PEM);
1096 if (!i)
1097 {
1098 BIO_printf(bio_err, "Error loading directory %s\n",
1099 ca_path);
1100 goto err;
1101 }
1102 }
1103
1104 /* Adding a trusted certificate file source. */
1105 if (ca_file)
1106 {
1107 lookup = X509_STORE_add_lookup(cert_ctx, X509_LOOKUP_file());
1108 if (lookup == NULL)
1109 {
1110 BIO_printf(bio_err, "memory allocation failure\n");
1111 goto err;
1112 }
1113 i = X509_LOOKUP_load_file(lookup, ca_file, X509_FILETYPE_PEM);
1114 if (!i)
1115 {
1116 BIO_printf(bio_err, "Error loading file %s\n", ca_file);
1117 goto err;
1118 }
1119 }
1120
1121 return cert_ctx;
1122 err:
1123 X509_STORE_free(cert_ctx);
1124 return NULL;
1125 }
1126
1127static int MS_CALLBACK verify_cb(int ok, X509_STORE_CTX *ctx)
1128 {
1129 /*
1130 char buf[256];
1131
1132 if (!ok)
1133 {
1134 X509_NAME_oneline(X509_get_subject_name(ctx->current_cert),
1135 buf, sizeof(buf));
1136 printf("%s\n", buf);
1137 printf("error %d at %d depth lookup: %s\n",
1138 ctx->error, ctx->error_depth,
1139 X509_verify_cert_error_string(ctx->error));
1140 }
1141 */
1142
1143 return ok;
1144 }
diff --git a/src/lib/libssl/src/apps/tsget b/src/lib/libssl/src/apps/tsget
new file mode 100644
index 0000000000..258d0bc348
--- /dev/null
+++ b/src/lib/libssl/src/apps/tsget
@@ -0,0 +1,195 @@
1#!/usr/bin/perl -w
2# Written by Zoltan Glozik <zglozik@stones.com>.
3# Copyright (c) 2002 The OpenTSA Project. All rights reserved.
4$::version = '$Id: tsget,v 1.1.1.1 2009/04/06 06:30:00 djm Exp $';
5
6use strict;
7use IO::Handle;
8use Getopt::Std;
9use File::Basename;
10use WWW::Curl::easy;
11
12use vars qw(%options);
13
14# Callback for reading the body.
15sub read_body {
16 my ($maxlength, $state) = @_;
17 my $return_data = "";
18 my $data_len = length ${$state->{data}};
19 if ($state->{bytes} < $data_len) {
20 $data_len = $data_len - $state->{bytes};
21 $data_len = $maxlength if $data_len > $maxlength;
22 $return_data = substr ${$state->{data}}, $state->{bytes}, $data_len;
23 $state->{bytes} += $data_len;
24 }
25 return $return_data;
26}
27
28# Callback for writing the body into a variable.
29sub write_body {
30 my ($data, $pointer) = @_;
31 ${$pointer} .= $data;
32 return length($data);
33}
34
35# Initialise a new Curl object.
36sub create_curl {
37 my $url = shift;
38
39 # Create Curl object.
40 my $curl = WWW::Curl::easy::new();
41
42 # Error-handling related options.
43 $curl->setopt(CURLOPT_VERBOSE, 1) if $options{d};
44 $curl->setopt(CURLOPT_FAILONERROR, 1);
45 $curl->setopt(CURLOPT_USERAGENT, "OpenTSA tsget.pl/" . (split / /, $::version)[2]);
46
47 # Options for POST method.
48 $curl->setopt(CURLOPT_UPLOAD, 1);
49 $curl->setopt(CURLOPT_CUSTOMREQUEST, "POST");
50 $curl->setopt(CURLOPT_HTTPHEADER,
51 ["Content-Type: application/timestamp-query",
52 "Accept: application/timestamp-reply"]);
53 $curl->setopt(CURLOPT_READFUNCTION, \&read_body);
54 $curl->setopt(CURLOPT_HEADERFUNCTION, sub { return length($_[0]); });
55
56 # Options for getting the result.
57 $curl->setopt(CURLOPT_WRITEFUNCTION, \&write_body);
58
59 # SSL related options.
60 $curl->setopt(CURLOPT_SSLKEYTYPE, "PEM");
61 $curl->setopt(CURLOPT_SSL_VERIFYPEER, 1); # Verify server's certificate.
62 $curl->setopt(CURLOPT_SSL_VERIFYHOST, 2); # Check server's CN.
63 $curl->setopt(CURLOPT_SSLKEY, $options{k}) if defined($options{k});
64 $curl->setopt(CURLOPT_SSLKEYPASSWD, $options{p}) if defined($options{p});
65 $curl->setopt(CURLOPT_SSLCERT, $options{c}) if defined($options{c});
66 $curl->setopt(CURLOPT_CAINFO, $options{C}) if defined($options{C});
67 $curl->setopt(CURLOPT_CAPATH, $options{P}) if defined($options{P});
68 $curl->setopt(CURLOPT_RANDOM_FILE, $options{r}) if defined($options{r});
69 $curl->setopt(CURLOPT_EGDSOCKET, $options{g}) if defined($options{g});
70
71 # Setting destination.
72 $curl->setopt(CURLOPT_URL, $url);
73
74 return $curl;
75}
76
77# Send a request and returns the body back.
78sub get_timestamp {
79 my $curl = shift;
80 my $body = shift;
81 my $ts_body;
82 local $::error_buf;
83
84 # Error-handling related options.
85 $curl->setopt(CURLOPT_ERRORBUFFER, "::error_buf");
86
87 # Options for POST method.
88 $curl->setopt(CURLOPT_INFILE, {data => $body, bytes => 0});
89 $curl->setopt(CURLOPT_INFILESIZE, length(${$body}));
90
91 # Options for getting the result.
92 $curl->setopt(CURLOPT_FILE, \$ts_body);
93
94 # Send the request...
95 my $error_code = $curl->perform();
96 my $error_string;
97 if ($error_code != 0) {
98 my $http_code = $curl->getinfo(CURLINFO_HTTP_CODE);
99 $error_string = "could not get timestamp";
100 $error_string .= ", http code: $http_code" unless $http_code == 0;
101 $error_string .= ", curl code: $error_code";
102 $error_string .= " ($::error_buf)" if defined($::error_buf);
103 } else {
104 my $ct = $curl->getinfo(CURLINFO_CONTENT_TYPE);
105 if (lc($ct) ne "application/timestamp-reply") {
106 $error_string = "unexpected content type returned: $ct";
107 }
108 }
109 return ($ts_body, $error_string);
110
111}
112
113# Print usage information and exists.
114sub usage {
115
116 print STDERR "usage: $0 -h <server_url> [-e <extension>] [-o <output>] ";
117 print STDERR "[-v] [-d] [-k <private_key.pem>] [-p <key_password>] ";
118 print STDERR "[-c <client_cert.pem>] [-C <CA_certs.pem>] [-P <CA_path>] ";
119 print STDERR "[-r <file:file...>] [-g <EGD_socket>] [<request>]...\n";
120 exit 1;
121}
122
123# ----------------------------------------------------------------------
124# Main program
125# ----------------------------------------------------------------------
126
127# Getting command-line options (default comes from TSGET environment variable).
128my $getopt_arg = "h:e:o:vdk:p:c:C:P:r:g:";
129if (exists $ENV{TSGET}) {
130 my @old_argv = @ARGV;
131 @ARGV = split /\s+/, $ENV{TSGET};
132 getopts($getopt_arg, \%options) or usage;
133 @ARGV = @old_argv;
134}
135getopts($getopt_arg, \%options) or usage;
136
137# Checking argument consistency.
138if (!exists($options{h}) || (@ARGV == 0 && !exists($options{o}))
139 || (@ARGV > 1 && exists($options{o}))) {
140 print STDERR "Inconsistent command line options.\n";
141 usage;
142}
143# Setting defaults.
144@ARGV = ("-") unless @ARGV != 0;
145$options{e} = ".tsr" unless defined($options{e});
146
147# Processing requests.
148my $curl = create_curl $options{h};
149undef $/; # For reading whole files.
150REQUEST: foreach (@ARGV) {
151 my $input = $_;
152 my ($base, $path) = fileparse($input, '\.[^.]*');
153 my $output_base = $base . $options{e};
154 my $output = defined($options{o}) ? $options{o} : $path . $output_base;
155
156 STDERR->printflush("$input: ") if $options{v};
157 # Read request.
158 my $body;
159 if ($input eq "-") {
160 # Read the request from STDIN;
161 $body = <STDIN>;
162 } else {
163 # Read the request from file.
164 open INPUT, "<" . $input
165 or warn("$input: could not open input file: $!\n"), next REQUEST;
166 $body = <INPUT>;
167 close INPUT
168 or warn("$input: could not close input file: $!\n"), next REQUEST;
169 }
170
171 # Send request.
172 STDERR->printflush("sending request") if $options{v};
173
174 my ($ts_body, $error) = get_timestamp $curl, \$body;
175 if (defined($error)) {
176 die "$input: fatal error: $error\n";
177 }
178 STDERR->printflush(", reply received") if $options{v};
179
180 # Write response.
181 if ($output eq "-") {
182 # Write to STDOUT.
183 print $ts_body;
184 } else {
185 # Write to file.
186 open OUTPUT, ">", $output
187 or warn("$output: could not open output file: $!\n"), next REQUEST;
188 print OUTPUT $ts_body;
189 close OUTPUT
190 or warn("$output: could not close output file: $!\n"), next REQUEST;
191 }
192 STDERR->printflush(", $output written.\n") if $options{v};
193}
194$curl->cleanup();
195WWW::Curl::easy::global_cleanup();
diff --git a/src/lib/libssl/src/crypto/aes/aes_x86core.c b/src/lib/libssl/src/crypto/aes/aes_x86core.c
new file mode 100644
index 0000000000..d323e265c0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/aes_x86core.c
@@ -0,0 +1,1063 @@
1/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2/**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * This is experimental x86[_64] derivative. It assumes little-endian
30 * byte order and expects CPU to sustain unaligned memory references.
31 * It is used as playground for cache-time attack mitigations and
32 * serves as reference C implementation for x86[_64] assembler.
33 *
34 * <appro@fy.chalmers.se>
35 */
36
37
38#ifndef AES_DEBUG
39# ifndef NDEBUG
40# define NDEBUG
41# endif
42#endif
43#include <assert.h>
44
45#include <stdlib.h>
46#include <openssl/aes.h>
47#include "aes_locl.h"
48
49/*
50 * These two parameters control which table, 256-byte or 2KB, is
51 * referenced in outer and respectively inner rounds.
52 */
53#define AES_COMPACT_IN_OUTER_ROUNDS
54#ifdef AES_COMPACT_IN_OUTER_ROUNDS
55/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
56 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
57 * by factor of ~2. */
58# undef AES_COMPACT_IN_INNER_ROUNDS
59#endif
60
61#if 1
62static void prefetch256(const void *table)
63{
64 volatile unsigned long *t=(void *)table,ret;
65 unsigned long sum;
66 int i;
67
68 /* 32 is common least cache-line size */
69 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
70
71 ret = sum;
72}
73#else
74# define prefetch256(t)
75#endif
76
77#undef GETU32
78#define GETU32(p) (*((u32*)(p)))
79
80#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
81typedef unsigned __int64 u64;
82#define U64(C) C##UI64
83#elif defined(__arch64__)
84typedef unsigned long u64;
85#define U64(C) C##UL
86#else
87typedef unsigned long long u64;
88#define U64(C) C##ULL
89#endif
90
91#undef ROTATE
92#if defined(_MSC_VER) || defined(__ICC)
93# define ROTATE(a,n) _lrotl(a,n)
94#elif defined(__GNUC__) && __GNUC__>=2
95# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
96# define ROTATE(a,n) ({ register unsigned int ret; \
97 asm ( \
98 "roll %1,%0" \
99 : "=r"(ret) \
100 : "I"(n), "0"(a) \
101 : "cc"); \
102 ret; \
103 })
104# endif
105#endif
106/*
107Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
108Te0[x] = S [x].[02, 01, 01, 03];
109Te1[x] = S [x].[03, 02, 01, 01];
110Te2[x] = S [x].[01, 03, 02, 01];
111Te3[x] = S [x].[01, 01, 03, 02];
112*/
113#define Te0 (u32)((u64*)((u8*)Te+0))
114#define Te1 (u32)((u64*)((u8*)Te+3))
115#define Te2 (u32)((u64*)((u8*)Te+2))
116#define Te3 (u32)((u64*)((u8*)Te+1))
117/*
118Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
119Td0[x] = Si[x].[0e, 09, 0d, 0b];
120Td1[x] = Si[x].[0b, 0e, 09, 0d];
121Td2[x] = Si[x].[0d, 0b, 0e, 09];
122Td3[x] = Si[x].[09, 0d, 0b, 0e];
123Td4[x] = Si[x].[01];
124*/
125#define Td0 (u32)((u64*)((u8*)Td+0))
126#define Td1 (u32)((u64*)((u8*)Td+3))
127#define Td2 (u32)((u64*)((u8*)Td+2))
128#define Td3 (u32)((u64*)((u8*)Td+1))
129
130static const u64 Te[256] = {
131 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
132 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
133 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
134 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
135 U64(0x5030306050303060), U64(0x0301010203010102),
136 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
137 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
138 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
139 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
140 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
141 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
142 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
143 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
144 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
145 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
146 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
147 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
148 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
149 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
150 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
151 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
152 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
153 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
154 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
155 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
156 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
157 U64(0x2818183028181830), U64(0xa1969637a1969637),
158 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
159 U64(0x0907070e0907070e), U64(0x3612122436121224),
160 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
161 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
162 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
163 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
164 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
165 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
166 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
167 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
168 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
169 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
170 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
171 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
172 U64(0x0000000000000000), U64(0x2cededc12cededc1),
173 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
174 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
175 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
176 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
177 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
178 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
179 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
180 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
181 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
182 U64(0x5533336655333366), U64(0x9485851194858511),
183 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
184 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
185 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
186 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
187 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
188 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
189 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
190 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
191 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
192 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
193 U64(0x3010102030101020), U64(0x1affffe51affffe5),
194 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
195 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
196 U64(0x3513132635131326), U64(0x2fececc32fececc3),
197 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
198 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
199 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
200 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
201 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
202 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
203 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
204 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
205 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
206 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
207 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
208 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
209 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
210 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
211 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
212 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
213 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
214 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
215 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
216 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
217 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
218 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
219 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
220 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
221 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
222 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
223 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
224 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
225 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
226 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
227 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
228 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
229 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
230 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
231 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
232 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
233 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
234 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
235 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
236 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
237 U64(0xd8484890d8484890), U64(0x0503030605030306),
238 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
239 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
240 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
241 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
242 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
243 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
244 U64(0xb398982bb398982b), U64(0x3311112233111122),
245 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
246 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
247 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
248 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
249 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
250 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
251 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
252 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
253 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
254 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
255 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
256 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
257 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
258 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
259};
260
261static const u8 Te4[256] = {
262 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
263 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
264 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
265 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
266 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
267 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
268 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
269 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
270 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
271 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
272 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
273 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
274 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
275 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
276 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
277 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
278 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
279 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
280 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
281 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
282 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
283 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
284 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
285 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
286 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
287 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
288 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
289 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
290 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
291 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
292 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
293 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
294};
295
296static const u64 Td[256] = {
297 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
298 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
299 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
300 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
301 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
302 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
303 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
304 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
305 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
306 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
307 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
308 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
309 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
310 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
311 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
312 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
313 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
314 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
315 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
316 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
317 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
318 U64(0x6033519760335197), U64(0x457f5362457f5362),
319 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
320 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
321 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
322 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
323 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
324 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
325 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
326 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
327 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
328 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
329 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
330 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
331 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
332 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
333 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
334 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
335 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
336 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
337 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
338 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
339 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
340 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
341 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
342 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
343 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
344 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
345 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
346 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
347 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
348 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
349 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
350 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
351 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
352 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
353 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
354 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
355 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
356 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
357 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
358 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
359 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
360 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
361 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
362 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
363 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
364 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
365 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
366 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
367 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
368 U64(0x4022971340229713), U64(0x2011c6842011c684),
369 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
370 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
371 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
372 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
373 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
374 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
375 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
376 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
377 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
378 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
379 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
380 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
381 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
382 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
383 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
384 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
385 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
386 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
387 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
388 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
389 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
390 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
391 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
392 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
393 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
394 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
395 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
396 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
397 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
398 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
399 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
400 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
401 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
402 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
403 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
404 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
405 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
406 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
407 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
408 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
409 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
410 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
411 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
412 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
413 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
414 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
415 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
416 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
417 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
418 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
419 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
420 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
421 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
422 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
423 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
424 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
425};
426static const u8 Td4[256] = {
427 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
428 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
429 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
430 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
431 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
432 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
433 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
434 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
435 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
436 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
437 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
438 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
439 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
440 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
441 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
442 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
443 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
444 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
445 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
446 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
447 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
448 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
449 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
450 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
451 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
452 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
453 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
454 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
455 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
456 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
457 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
458 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
459};
460
461static const u32 rcon[] = {
462 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
463 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
464 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
465};
466
467/**
468 * Expand the cipher key into the encryption key schedule.
469 */
470int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
471 AES_KEY *key) {
472
473 u32 *rk;
474 int i = 0;
475 u32 temp;
476
477 if (!userKey || !key)
478 return -1;
479 if (bits != 128 && bits != 192 && bits != 256)
480 return -2;
481
482 rk = key->rd_key;
483
484 if (bits==128)
485 key->rounds = 10;
486 else if (bits==192)
487 key->rounds = 12;
488 else
489 key->rounds = 14;
490
491 rk[0] = GETU32(userKey );
492 rk[1] = GETU32(userKey + 4);
493 rk[2] = GETU32(userKey + 8);
494 rk[3] = GETU32(userKey + 12);
495 if (bits == 128) {
496 while (1) {
497 temp = rk[3];
498 rk[4] = rk[0] ^
499 (Te4[(temp >> 8) & 0xff] ) ^
500 (Te4[(temp >> 16) & 0xff] << 8) ^
501 (Te4[(temp >> 24) ] << 16) ^
502 (Te4[(temp ) & 0xff] << 24) ^
503 rcon[i];
504 rk[5] = rk[1] ^ rk[4];
505 rk[6] = rk[2] ^ rk[5];
506 rk[7] = rk[3] ^ rk[6];
507 if (++i == 10) {
508 return 0;
509 }
510 rk += 4;
511 }
512 }
513 rk[4] = GETU32(userKey + 16);
514 rk[5] = GETU32(userKey + 20);
515 if (bits == 192) {
516 while (1) {
517 temp = rk[ 5];
518 rk[ 6] = rk[ 0] ^
519 (Te4[(temp >> 8) & 0xff] ) ^
520 (Te4[(temp >> 16) & 0xff] << 8) ^
521 (Te4[(temp >> 24) ] << 16) ^
522 (Te4[(temp ) & 0xff] << 24) ^
523 rcon[i];
524 rk[ 7] = rk[ 1] ^ rk[ 6];
525 rk[ 8] = rk[ 2] ^ rk[ 7];
526 rk[ 9] = rk[ 3] ^ rk[ 8];
527 if (++i == 8) {
528 return 0;
529 }
530 rk[10] = rk[ 4] ^ rk[ 9];
531 rk[11] = rk[ 5] ^ rk[10];
532 rk += 6;
533 }
534 }
535 rk[6] = GETU32(userKey + 24);
536 rk[7] = GETU32(userKey + 28);
537 if (bits == 256) {
538 while (1) {
539 temp = rk[ 7];
540 rk[ 8] = rk[ 0] ^
541 (Te4[(temp >> 8) & 0xff] ) ^
542 (Te4[(temp >> 16) & 0xff] << 8) ^
543 (Te4[(temp >> 24) ] << 16) ^
544 (Te4[(temp ) & 0xff] << 24) ^
545 rcon[i];
546 rk[ 9] = rk[ 1] ^ rk[ 8];
547 rk[10] = rk[ 2] ^ rk[ 9];
548 rk[11] = rk[ 3] ^ rk[10];
549 if (++i == 7) {
550 return 0;
551 }
552 temp = rk[11];
553 rk[12] = rk[ 4] ^
554 (Te4[(temp ) & 0xff] ) ^
555 (Te4[(temp >> 8) & 0xff] << 8) ^
556 (Te4[(temp >> 16) & 0xff] << 16) ^
557 (Te4[(temp >> 24) ] << 24);
558 rk[13] = rk[ 5] ^ rk[12];
559 rk[14] = rk[ 6] ^ rk[13];
560 rk[15] = rk[ 7] ^ rk[14];
561
562 rk += 8;
563 }
564 }
565 return 0;
566}
567
568/**
569 * Expand the cipher key into the decryption key schedule.
570 */
571int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
572 AES_KEY *key) {
573
574 u32 *rk;
575 int i, j, status;
576 u32 temp;
577
578 /* first, start with an encryption schedule */
579 status = AES_set_encrypt_key(userKey, bits, key);
580 if (status < 0)
581 return status;
582
583 rk = key->rd_key;
584
585 /* invert the order of the round keys: */
586 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
587 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
588 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
589 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
590 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
591 }
592 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
593 for (i = 1; i < (key->rounds); i++) {
594 rk += 4;
595#if 1
596 for (j = 0; j < 4; j++) {
597 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
598
599 tp1 = rk[j];
600 m = tp1 & 0x80808080;
601 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
602 ((m - (m >> 7)) & 0x1b1b1b1b);
603 m = tp2 & 0x80808080;
604 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
605 ((m - (m >> 7)) & 0x1b1b1b1b);
606 m = tp4 & 0x80808080;
607 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
608 ((m - (m >> 7)) & 0x1b1b1b1b);
609 tp9 = tp8 ^ tp1;
610 tpb = tp9 ^ tp2;
611 tpd = tp9 ^ tp4;
612 tpe = tp8 ^ tp4 ^ tp2;
613#if defined(ROTATE)
614 rk[j] = tpe ^ ROTATE(tpd,16) ^
615 ROTATE(tp9,8) ^ ROTATE(tpb,24);
616#else
617 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
618 (tp9 >> 24) ^ (tp9 << 8) ^
619 (tpb >> 8) ^ (tpb << 24);
620#endif
621 }
622#else
623 rk[0] =
624 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
625 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
626 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
627 Td3[Te2[(rk[0] >> 24) ] & 0xff];
628 rk[1] =
629 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
630 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
631 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
632 Td3[Te2[(rk[1] >> 24) ] & 0xff];
633 rk[2] =
634 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
635 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
636 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
637 Td3[Te2[(rk[2] >> 24) ] & 0xff];
638 rk[3] =
639 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
640 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
641 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
642 Td3[Te2[(rk[3] >> 24) ] & 0xff];
643#endif
644 }
645 return 0;
646}
647
648/*
649 * Encrypt a single block
650 * in and out can overlap
651 */
652void AES_encrypt(const unsigned char *in, unsigned char *out,
653 const AES_KEY *key) {
654
655 const u32 *rk;
656 u32 s0, s1, s2, s3, t[4];
657 int r;
658
659 assert(in && out && key);
660 rk = key->rd_key;
661
662 /*
663 * map byte array block to cipher state
664 * and add initial round key:
665 */
666 s0 = GETU32(in ) ^ rk[0];
667 s1 = GETU32(in + 4) ^ rk[1];
668 s2 = GETU32(in + 8) ^ rk[2];
669 s3 = GETU32(in + 12) ^ rk[3];
670
671#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
672 prefetch256(Te4);
673
674 t[0] = Te4[(s0 ) & 0xff] ^
675 Te4[(s1 >> 8) & 0xff] << 8 ^
676 Te4[(s2 >> 16) & 0xff] << 16 ^
677 Te4[(s3 >> 24) ] << 24;
678 t[1] = Te4[(s1 ) & 0xff] ^
679 Te4[(s2 >> 8) & 0xff] << 8 ^
680 Te4[(s3 >> 16) & 0xff] << 16 ^
681 Te4[(s0 >> 24) ] << 24;
682 t[2] = Te4[(s2 ) & 0xff] ^
683 Te4[(s3 >> 8) & 0xff] << 8 ^
684 Te4[(s0 >> 16) & 0xff] << 16 ^
685 Te4[(s1 >> 24) ] << 24;
686 t[3] = Te4[(s3 ) & 0xff] ^
687 Te4[(s0 >> 8) & 0xff] << 8 ^
688 Te4[(s1 >> 16) & 0xff] << 16 ^
689 Te4[(s2 >> 24) ] << 24;
690
691 /* now do the linear transform using words */
692 { int i;
693 u32 r0, r1, r2;
694
695 for (i = 0; i < 4; i++) {
696 r0 = t[i];
697 r1 = r0 & 0x80808080;
698 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
699 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
700#if defined(ROTATE)
701 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
702 ROTATE(r0,16) ^ ROTATE(r0,8);
703#else
704 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
705 (r0 << 16) ^ (r0 >> 16) ^
706 (r0 << 8) ^ (r0 >> 24);
707#endif
708 t[i] ^= rk[4+i];
709 }
710 }
711#else
712 t[0] = Te0[(s0 ) & 0xff] ^
713 Te1[(s1 >> 8) & 0xff] ^
714 Te2[(s2 >> 16) & 0xff] ^
715 Te3[(s3 >> 24) ] ^
716 rk[4];
717 t[1] = Te0[(s1 ) & 0xff] ^
718 Te1[(s2 >> 8) & 0xff] ^
719 Te2[(s3 >> 16) & 0xff] ^
720 Te3[(s0 >> 24) ] ^
721 rk[5];
722 t[2] = Te0[(s2 ) & 0xff] ^
723 Te1[(s3 >> 8) & 0xff] ^
724 Te2[(s0 >> 16) & 0xff] ^
725 Te3[(s1 >> 24) ] ^
726 rk[6];
727 t[3] = Te0[(s3 ) & 0xff] ^
728 Te1[(s0 >> 8) & 0xff] ^
729 Te2[(s1 >> 16) & 0xff] ^
730 Te3[(s2 >> 24) ] ^
731 rk[7];
732#endif
733 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
734
735 /*
736 * Nr - 2 full rounds:
737 */
738 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
739#if defined(AES_COMPACT_IN_INNER_ROUNDS)
740 t[0] = Te4[(s0 ) & 0xff] ^
741 Te4[(s1 >> 8) & 0xff] << 8 ^
742 Te4[(s2 >> 16) & 0xff] << 16 ^
743 Te4[(s3 >> 24) ] << 24;
744 t[1] = Te4[(s1 ) & 0xff] ^
745 Te4[(s2 >> 8) & 0xff] << 8 ^
746 Te4[(s3 >> 16) & 0xff] << 16 ^
747 Te4[(s0 >> 24) ] << 24;
748 t[2] = Te4[(s2 ) & 0xff] ^
749 Te4[(s3 >> 8) & 0xff] << 8 ^
750 Te4[(s0 >> 16) & 0xff] << 16 ^
751 Te4[(s1 >> 24) ] << 24;
752 t[3] = Te4[(s3 ) & 0xff] ^
753 Te4[(s0 >> 8) & 0xff] << 8 ^
754 Te4[(s1 >> 16) & 0xff] << 16 ^
755 Te4[(s2 >> 24) ] << 24;
756
757 /* now do the linear transform using words */
758 { int i;
759 u32 r0, r1, r2;
760
761 for (i = 0; i < 4; i++) {
762 r0 = t[i];
763 r1 = r0 & 0x80808080;
764 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
765 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
766#if defined(ROTATE)
767 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
768 ROTATE(r0,16) ^ ROTATE(r0,8);
769#else
770 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
771 (r0 << 16) ^ (r0 >> 16) ^
772 (r0 << 8) ^ (r0 >> 24);
773#endif
774 t[i] ^= rk[i];
775 }
776 }
777#else
778 t[0] = Te0[(s0 ) & 0xff] ^
779 Te1[(s1 >> 8) & 0xff] ^
780 Te2[(s2 >> 16) & 0xff] ^
781 Te3[(s3 >> 24) ] ^
782 rk[0];
783 t[1] = Te0[(s1 ) & 0xff] ^
784 Te1[(s2 >> 8) & 0xff] ^
785 Te2[(s3 >> 16) & 0xff] ^
786 Te3[(s0 >> 24) ] ^
787 rk[1];
788 t[2] = Te0[(s2 ) & 0xff] ^
789 Te1[(s3 >> 8) & 0xff] ^
790 Te2[(s0 >> 16) & 0xff] ^
791 Te3[(s1 >> 24) ] ^
792 rk[2];
793 t[3] = Te0[(s3 ) & 0xff] ^
794 Te1[(s0 >> 8) & 0xff] ^
795 Te2[(s1 >> 16) & 0xff] ^
796 Te3[(s2 >> 24) ] ^
797 rk[3];
798#endif
799 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
800 }
801 /*
802 * apply last round and
803 * map cipher state to byte array block:
804 */
805#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
806 prefetch256(Te4);
807
808 *(u32*)(out+0) =
809 Te4[(s0 ) & 0xff] ^
810 Te4[(s1 >> 8) & 0xff] << 8 ^
811 Te4[(s2 >> 16) & 0xff] << 16 ^
812 Te4[(s3 >> 24) ] << 24 ^
813 rk[0];
814 *(u32*)(out+4) =
815 Te4[(s1 ) & 0xff] ^
816 Te4[(s2 >> 8) & 0xff] << 8 ^
817 Te4[(s3 >> 16) & 0xff] << 16 ^
818 Te4[(s0 >> 24) ] << 24 ^
819 rk[1];
820 *(u32*)(out+8) =
821 Te4[(s2 ) & 0xff] ^
822 Te4[(s3 >> 8) & 0xff] << 8 ^
823 Te4[(s0 >> 16) & 0xff] << 16 ^
824 Te4[(s1 >> 24) ] << 24 ^
825 rk[2];
826 *(u32*)(out+12) =
827 Te4[(s3 ) & 0xff] ^
828 Te4[(s0 >> 8) & 0xff] << 8 ^
829 Te4[(s1 >> 16) & 0xff] << 16 ^
830 Te4[(s2 >> 24) ] << 24 ^
831 rk[3];
832#else
833 *(u32*)(out+0) =
834 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
835 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
836 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
837 (Te1[(s3 >> 24) ] & 0xff000000U) ^
838 rk[0];
839 *(u32*)(out+4) =
840 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
841 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
842 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
843 (Te1[(s0 >> 24) ] & 0xff000000U) ^
844 rk[1];
845 *(u32*)(out+8) =
846 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
847 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
848 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
849 (Te1[(s1 >> 24) ] & 0xff000000U) ^
850 rk[2];
851 *(u32*)(out+12) =
852 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
853 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
854 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
855 (Te1[(s2 >> 24) ] & 0xff000000U) ^
856 rk[3];
857#endif
858}
859
860/*
861 * Decrypt a single block
862 * in and out can overlap
863 */
864void AES_decrypt(const unsigned char *in, unsigned char *out,
865 const AES_KEY *key) {
866
867 const u32 *rk;
868 u32 s0, s1, s2, s3, t[4];
869 int r;
870
871 assert(in && out && key);
872 rk = key->rd_key;
873
874 /*
875 * map byte array block to cipher state
876 * and add initial round key:
877 */
878 s0 = GETU32(in ) ^ rk[0];
879 s1 = GETU32(in + 4) ^ rk[1];
880 s2 = GETU32(in + 8) ^ rk[2];
881 s3 = GETU32(in + 12) ^ rk[3];
882
883#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
884 prefetch256(Td4);
885
886 t[0] = Td4[(s0 ) & 0xff] ^
887 Td4[(s3 >> 8) & 0xff] << 8 ^
888 Td4[(s2 >> 16) & 0xff] << 16 ^
889 Td4[(s1 >> 24) ] << 24;
890 t[1] = Td4[(s1 ) & 0xff] ^
891 Td4[(s0 >> 8) & 0xff] << 8 ^
892 Td4[(s3 >> 16) & 0xff] << 16 ^
893 Td4[(s2 >> 24) ] << 24;
894 t[2] = Td4[(s2 ) & 0xff] ^
895 Td4[(s1 >> 8) & 0xff] << 8 ^
896 Td4[(s0 >> 16) & 0xff] << 16 ^
897 Td4[(s3 >> 24) ] << 24;
898 t[3] = Td4[(s3 ) & 0xff] ^
899 Td4[(s2 >> 8) & 0xff] << 8 ^
900 Td4[(s1 >> 16) & 0xff] << 16 ^
901 Td4[(s0 >> 24) ] << 24;
902
903 /* now do the linear transform using words */
904 { int i;
905 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
906
907 for (i = 0; i < 4; i++) {
908 tp1 = t[i];
909 m = tp1 & 0x80808080;
910 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
911 ((m - (m >> 7)) & 0x1b1b1b1b);
912 m = tp2 & 0x80808080;
913 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
914 ((m - (m >> 7)) & 0x1b1b1b1b);
915 m = tp4 & 0x80808080;
916 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
917 ((m - (m >> 7)) & 0x1b1b1b1b);
918 tp9 = tp8 ^ tp1;
919 tpb = tp9 ^ tp2;
920 tpd = tp9 ^ tp4;
921 tpe = tp8 ^ tp4 ^ tp2;
922#if defined(ROTATE)
923 t[i] = tpe ^ ROTATE(tpd,16) ^
924 ROTATE(tp9,8) ^ ROTATE(tpb,24);
925#else
926 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
927 (tp9 >> 24) ^ (tp9 << 8) ^
928 (tpb >> 8) ^ (tpb << 24);
929#endif
930 t[i] ^= rk[4+i];
931 }
932 }
933#else
934 t[0] = Td0[(s0 ) & 0xff] ^
935 Td1[(s3 >> 8) & 0xff] ^
936 Td2[(s2 >> 16) & 0xff] ^
937 Td3[(s1 >> 24) ] ^
938 rk[4];
939 t[1] = Td0[(s1 ) & 0xff] ^
940 Td1[(s0 >> 8) & 0xff] ^
941 Td2[(s3 >> 16) & 0xff] ^
942 Td3[(s2 >> 24) ] ^
943 rk[5];
944 t[2] = Td0[(s2 ) & 0xff] ^
945 Td1[(s1 >> 8) & 0xff] ^
946 Td2[(s0 >> 16) & 0xff] ^
947 Td3[(s3 >> 24) ] ^
948 rk[6];
949 t[3] = Td0[(s3 ) & 0xff] ^
950 Td1[(s2 >> 8) & 0xff] ^
951 Td2[(s1 >> 16) & 0xff] ^
952 Td3[(s0 >> 24) ] ^
953 rk[7];
954#endif
955 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
956
957 /*
958 * Nr - 2 full rounds:
959 */
960 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
961#if defined(AES_COMPACT_IN_INNER_ROUNDS)
962 t[0] = Td4[(s0 ) & 0xff] ^
963 Td4[(s3 >> 8) & 0xff] << 8 ^
964 Td4[(s2 >> 16) & 0xff] << 16 ^
965 Td4[(s1 >> 24) ] << 24;
966 t[1] = Td4[(s1 ) & 0xff] ^
967 Td4[(s0 >> 8) & 0xff] << 8 ^
968 Td4[(s3 >> 16) & 0xff] << 16 ^
969 Td4[(s2 >> 24) ] << 24;
970 t[2] = Td4[(s2 ) & 0xff] ^
971 Td4[(s1 >> 8) & 0xff] << 8 ^
972 Td4[(s0 >> 16) & 0xff] << 16 ^
973 Td4[(s3 >> 24) ] << 24;
974 t[3] = Td4[(s3 ) & 0xff] ^
975 Td4[(s2 >> 8) & 0xff] << 8 ^
976 Td4[(s1 >> 16) & 0xff] << 16 ^
977 Td4[(s0 >> 24) ] << 24;
978
979 /* now do the linear transform using words */
980 { int i;
981 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
982
983 for (i = 0; i < 4; i++) {
984 tp1 = t[i];
985 m = tp1 & 0x80808080;
986 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
987 ((m - (m >> 7)) & 0x1b1b1b1b);
988 m = tp2 & 0x80808080;
989 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
990 ((m - (m >> 7)) & 0x1b1b1b1b);
991 m = tp4 & 0x80808080;
992 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
993 ((m - (m >> 7)) & 0x1b1b1b1b);
994 tp9 = tp8 ^ tp1;
995 tpb = tp9 ^ tp2;
996 tpd = tp9 ^ tp4;
997 tpe = tp8 ^ tp4 ^ tp2;
998#if defined(ROTATE)
999 t[i] = tpe ^ ROTATE(tpd,16) ^
1000 ROTATE(tp9,8) ^ ROTATE(tpb,24);
1001#else
1002 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1003 (tp9 >> 24) ^ (tp9 << 8) ^
1004 (tpb >> 8) ^ (tpb << 24);
1005#endif
1006 t[i] ^= rk[i];
1007 }
1008 }
1009#else
1010 t[0] = Td0[(s0 ) & 0xff] ^
1011 Td1[(s3 >> 8) & 0xff] ^
1012 Td2[(s2 >> 16) & 0xff] ^
1013 Td3[(s1 >> 24) ] ^
1014 rk[0];
1015 t[1] = Td0[(s1 ) & 0xff] ^
1016 Td1[(s0 >> 8) & 0xff] ^
1017 Td2[(s3 >> 16) & 0xff] ^
1018 Td3[(s2 >> 24) ] ^
1019 rk[1];
1020 t[2] = Td0[(s2 ) & 0xff] ^
1021 Td1[(s1 >> 8) & 0xff] ^
1022 Td2[(s0 >> 16) & 0xff] ^
1023 Td3[(s3 >> 24) ] ^
1024 rk[2];
1025 t[3] = Td0[(s3 ) & 0xff] ^
1026 Td1[(s2 >> 8) & 0xff] ^
1027 Td2[(s1 >> 16) & 0xff] ^
1028 Td3[(s0 >> 24) ] ^
1029 rk[3];
1030#endif
1031 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1032 }
1033 /*
1034 * apply last round and
1035 * map cipher state to byte array block:
1036 */
1037 prefetch256(Td4);
1038
1039 *(u32*)(out+0) =
1040 (Td4[(s0 ) & 0xff]) ^
1041 (Td4[(s3 >> 8) & 0xff] << 8) ^
1042 (Td4[(s2 >> 16) & 0xff] << 16) ^
1043 (Td4[(s1 >> 24) ] << 24) ^
1044 rk[0];
1045 *(u32*)(out+4) =
1046 (Td4[(s1 ) & 0xff]) ^
1047 (Td4[(s0 >> 8) & 0xff] << 8) ^
1048 (Td4[(s3 >> 16) & 0xff] << 16) ^
1049 (Td4[(s2 >> 24) ] << 24) ^
1050 rk[1];
1051 *(u32*)(out+8) =
1052 (Td4[(s2 ) & 0xff]) ^
1053 (Td4[(s1 >> 8) & 0xff] << 8) ^
1054 (Td4[(s0 >> 16) & 0xff] << 16) ^
1055 (Td4[(s3 >> 24) ] << 24) ^
1056 rk[2];
1057 *(u32*)(out+12) =
1058 (Td4[(s3 ) & 0xff]) ^
1059 (Td4[(s2 >> 8) & 0xff] << 8) ^
1060 (Td4[(s1 >> 16) & 0xff] << 16) ^
1061 (Td4[(s0 >> 24) ] << 24) ^
1062 rk[3];
1063}
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl b/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl
new file mode 100644
index 0000000000..15742c1ec5
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl
@@ -0,0 +1,1030 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key.
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25$s0="r0";
26$s1="r1";
27$s2="r2";
28$s3="r3";
29$t1="r4";
30$t2="r5";
31$t3="r6";
32$i1="r7";
33$i2="r8";
34$i3="r9";
35
36$tbl="r10";
37$key="r11";
38$rounds="r12";
39
40$code=<<___;
41.text
42.code 32
43
44.type AES_Te,%object
45.align 5
46AES_Te:
47.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
48.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
49.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
50.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
51.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
52.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
53.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
54.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
55.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
56.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
57.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
58.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
59.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
60.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
61.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
62.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
63.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
64.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
65.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
66.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
67.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
68.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
69.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
70.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
71.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
72.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
73.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
74.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
75.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
76.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
77.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
78.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
79.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
80.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
81.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
82.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
83.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
84.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
85.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
86.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
87.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
88.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
89.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
90.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
91.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
92.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
93.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
94.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
95.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
96.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
97.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
98.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
99.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
100.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
101.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
102.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
103.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
104.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
105.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
106.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
107.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
108.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
109.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
110.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
111@ Te4[256]
112.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
113.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
114.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
115.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
116.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
117.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
118.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
119.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
120.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
121.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
122.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
123.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
124.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
125.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
126.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
127.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
128.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
129.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
130.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
131.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
132.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
133.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
134.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
135.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
136.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
137.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
138.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
139.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
140.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
141.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
142.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
143.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
144@ rcon[]
145.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
146.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
147.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
148.size AES_Te,.-AES_Te
149
150@ void AES_encrypt(const unsigned char *in, unsigned char *out,
151@ const AES_KEY *key) {
152.global AES_encrypt
153.type AES_encrypt,%function
154.align 5
155AES_encrypt:
156 sub r3,pc,#8 @ AES_encrypt
157 stmdb sp!,{r1,r4-r12,lr}
158 mov $rounds,r0 @ inp
159 mov $key,r2
160 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
161
162 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
163 ldrb $t1,[$rounds,#2] @ manner...
164 ldrb $t2,[$rounds,#1]
165 ldrb $t3,[$rounds,#0]
166 orr $s0,$s0,$t1,lsl#8
167 orr $s0,$s0,$t2,lsl#16
168 orr $s0,$s0,$t3,lsl#24
169 ldrb $s1,[$rounds,#7]
170 ldrb $t1,[$rounds,#6]
171 ldrb $t2,[$rounds,#5]
172 ldrb $t3,[$rounds,#4]
173 orr $s1,$s1,$t1,lsl#8
174 orr $s1,$s1,$t2,lsl#16
175 orr $s1,$s1,$t3,lsl#24
176 ldrb $s2,[$rounds,#11]
177 ldrb $t1,[$rounds,#10]
178 ldrb $t2,[$rounds,#9]
179 ldrb $t3,[$rounds,#8]
180 orr $s2,$s2,$t1,lsl#8
181 orr $s2,$s2,$t2,lsl#16
182 orr $s2,$s2,$t3,lsl#24
183 ldrb $s3,[$rounds,#15]
184 ldrb $t1,[$rounds,#14]
185 ldrb $t2,[$rounds,#13]
186 ldrb $t3,[$rounds,#12]
187 orr $s3,$s3,$t1,lsl#8
188 orr $s3,$s3,$t2,lsl#16
189 orr $s3,$s3,$t3,lsl#24
190
191 bl _armv4_AES_encrypt
192
193 ldr $rounds,[sp],#4 @ pop out
194 mov $t1,$s0,lsr#24 @ write output in endian-neutral
195 mov $t2,$s0,lsr#16 @ manner...
196 mov $t3,$s0,lsr#8
197 strb $t1,[$rounds,#0]
198 strb $t2,[$rounds,#1]
199 strb $t3,[$rounds,#2]
200 strb $s0,[$rounds,#3]
201 mov $t1,$s1,lsr#24
202 mov $t2,$s1,lsr#16
203 mov $t3,$s1,lsr#8
204 strb $t1,[$rounds,#4]
205 strb $t2,[$rounds,#5]
206 strb $t3,[$rounds,#6]
207 strb $s1,[$rounds,#7]
208 mov $t1,$s2,lsr#24
209 mov $t2,$s2,lsr#16
210 mov $t3,$s2,lsr#8
211 strb $t1,[$rounds,#8]
212 strb $t2,[$rounds,#9]
213 strb $t3,[$rounds,#10]
214 strb $s2,[$rounds,#11]
215 mov $t1,$s3,lsr#24
216 mov $t2,$s3,lsr#16
217 mov $t3,$s3,lsr#8
218 strb $t1,[$rounds,#12]
219 strb $t2,[$rounds,#13]
220 strb $t3,[$rounds,#14]
221 strb $s3,[$rounds,#15]
222
223 ldmia sp!,{r4-r12,lr}
224 tst lr,#1
225 moveq pc,lr @ be binary compatible with V4, yet
226 bx lr @ interoperable with Thumb ISA:-)
227.size AES_encrypt,.-AES_encrypt
228
229.type _armv4_AES_encrypt,%function
230.align 2
231_armv4_AES_encrypt:
232 str lr,[sp,#-4]! @ push lr
233 ldr $t1,[$key],#16
234 ldr $t2,[$key,#-12]
235 ldr $t3,[$key,#-8]
236 ldr $i1,[$key,#-4]
237 ldr $rounds,[$key,#240-16]
238 eor $s0,$s0,$t1
239 eor $s1,$s1,$t2
240 eor $s2,$s2,$t3
241 eor $s3,$s3,$i1
242 sub $rounds,$rounds,#1
243 mov lr,#255
244
245.Lenc_loop:
246 and $i2,lr,$s0,lsr#8
247 and $i3,lr,$s0,lsr#16
248 and $i1,lr,$s0
249 mov $s0,$s0,lsr#24
250 ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
251 ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
252 ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
253 ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
254
255 and $i1,lr,$s1,lsr#16 @ i0
256 and $i2,lr,$s1
257 and $i3,lr,$s1,lsr#8
258 mov $s1,$s1,lsr#24
259 ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
260 ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
261 ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
262 ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
263 eor $s0,$s0,$i1,ror#8
264 eor $s1,$s1,$t1,ror#24
265 eor $t2,$t2,$i2,ror#8
266 eor $t3,$t3,$i3,ror#8
267
268 and $i1,lr,$s2,lsr#8 @ i0
269 and $i2,lr,$s2,lsr#16 @ i1
270 and $i3,lr,$s2
271 mov $s2,$s2,lsr#24
272 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
273 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
274 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
275 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
276 eor $s0,$s0,$i1,ror#16
277 eor $s1,$s1,$i2,ror#8
278 eor $s2,$s2,$t2,ror#16
279 eor $t3,$t3,$i3,ror#16
280
281 and $i1,lr,$s3 @ i0
282 and $i2,lr,$s3,lsr#8 @ i1
283 and $i3,lr,$s3,lsr#16 @ i2
284 mov $s3,$s3,lsr#24
285 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
286 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
287 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
288 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
289 eor $s0,$s0,$i1,ror#24
290 eor $s1,$s1,$i2,ror#16
291 eor $s2,$s2,$i3,ror#8
292 eor $s3,$s3,$t3,ror#8
293
294 ldr $t1,[$key],#16
295 ldr $t2,[$key,#-12]
296 ldr $t3,[$key,#-8]
297 ldr $i1,[$key,#-4]
298 eor $s0,$s0,$t1
299 eor $s1,$s1,$t2
300 eor $s2,$s2,$t3
301 eor $s3,$s3,$i1
302
303 subs $rounds,$rounds,#1
304 bne .Lenc_loop
305
306 add $tbl,$tbl,#2
307
308 and $i1,lr,$s0
309 and $i2,lr,$s0,lsr#8
310 and $i3,lr,$s0,lsr#16
311 mov $s0,$s0,lsr#24
312 ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
313 ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
314 ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
315 ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
316
317 and $i1,lr,$s1,lsr#16 @ i0
318 and $i2,lr,$s1
319 and $i3,lr,$s1,lsr#8
320 mov $s1,$s1,lsr#24
321 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
322 ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
323 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
324 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
325 eor $s0,$i1,$s0,lsl#8
326 eor $s1,$t1,$s1,lsl#24
327 eor $t2,$i2,$t2,lsl#8
328 eor $t3,$i3,$t3,lsl#8
329
330 and $i1,lr,$s2,lsr#8 @ i0
331 and $i2,lr,$s2,lsr#16 @ i1
332 and $i3,lr,$s2
333 mov $s2,$s2,lsr#24
334 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
335 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
336 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
337 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
338 eor $s0,$i1,$s0,lsl#8
339 eor $s1,$s1,$i2,lsl#16
340 eor $s2,$t2,$s2,lsl#24
341 eor $t3,$i3,$t3,lsl#8
342
343 and $i1,lr,$s3 @ i0
344 and $i2,lr,$s3,lsr#8 @ i1
345 and $i3,lr,$s3,lsr#16 @ i2
346 mov $s3,$s3,lsr#24
347 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
348 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
349 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
350 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
351 eor $s0,$i1,$s0,lsl#8
352 eor $s1,$s1,$i2,lsl#8
353 eor $s2,$s2,$i3,lsl#16
354 eor $s3,$t3,$s3,lsl#24
355
356 ldr lr,[sp],#4 @ pop lr
357 ldr $t1,[$key,#0]
358 ldr $t2,[$key,#4]
359 ldr $t3,[$key,#8]
360 ldr $i1,[$key,#12]
361 eor $s0,$s0,$t1
362 eor $s1,$s1,$t2
363 eor $s2,$s2,$t3
364 eor $s3,$s3,$i1
365
366 sub $tbl,$tbl,#2
367 mov pc,lr @ return
368.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
369
370.global AES_set_encrypt_key
371.type AES_set_encrypt_key,%function
372.align 5
373AES_set_encrypt_key:
374 sub r3,pc,#8 @ AES_set_encrypt_key
375 teq r0,#0
376 moveq r0,#-1
377 beq .Labrt
378 teq r2,#0
379 moveq r0,#-1
380 beq .Labrt
381
382 teq r1,#128
383 beq .Lok
384 teq r1,#192
385 beq .Lok
386 teq r1,#256
387 movne r0,#-1
388 bne .Labrt
389
390.Lok: stmdb sp!,{r4-r12,lr}
391 sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4
392
393 mov $rounds,r0 @ inp
394 mov lr,r1 @ bits
395 mov $key,r2 @ key
396
397 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
398 ldrb $t1,[$rounds,#2] @ manner...
399 ldrb $t2,[$rounds,#1]
400 ldrb $t3,[$rounds,#0]
401 orr $s0,$s0,$t1,lsl#8
402 orr $s0,$s0,$t2,lsl#16
403 orr $s0,$s0,$t3,lsl#24
404 ldrb $s1,[$rounds,#7]
405 ldrb $t1,[$rounds,#6]
406 ldrb $t2,[$rounds,#5]
407 ldrb $t3,[$rounds,#4]
408 orr $s1,$s1,$t1,lsl#8
409 orr $s1,$s1,$t2,lsl#16
410 orr $s1,$s1,$t3,lsl#24
411 ldrb $s2,[$rounds,#11]
412 ldrb $t1,[$rounds,#10]
413 ldrb $t2,[$rounds,#9]
414 ldrb $t3,[$rounds,#8]
415 orr $s2,$s2,$t1,lsl#8
416 orr $s2,$s2,$t2,lsl#16
417 orr $s2,$s2,$t3,lsl#24
418 ldrb $s3,[$rounds,#15]
419 ldrb $t1,[$rounds,#14]
420 ldrb $t2,[$rounds,#13]
421 ldrb $t3,[$rounds,#12]
422 orr $s3,$s3,$t1,lsl#8
423 orr $s3,$s3,$t2,lsl#16
424 orr $s3,$s3,$t3,lsl#24
425 str $s0,[$key],#16
426 str $s1,[$key,#-12]
427 str $s2,[$key,#-8]
428 str $s3,[$key,#-4]
429
430 teq lr,#128
431 bne .Lnot128
432 mov $rounds,#10
433 str $rounds,[$key,#240-16]
434 add $t3,$tbl,#256 @ rcon
435 mov lr,#255
436
437.L128_loop:
438 and $t2,lr,$s3,lsr#24
439 and $i1,lr,$s3,lsr#16
440 and $i2,lr,$s3,lsr#8
441 and $i3,lr,$s3
442 ldrb $t2,[$tbl,$t2]
443 ldrb $i1,[$tbl,$i1]
444 ldrb $i2,[$tbl,$i2]
445 ldrb $i3,[$tbl,$i3]
446 ldr $t1,[$t3],#4 @ rcon[i++]
447 orr $t2,$t2,$i1,lsl#24
448 orr $t2,$t2,$i2,lsl#16
449 orr $t2,$t2,$i3,lsl#8
450 eor $t2,$t2,$t1
451 eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
452 eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
453 eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
454 eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
455 str $s0,[$key],#16
456 str $s1,[$key,#-12]
457 str $s2,[$key,#-8]
458 str $s3,[$key,#-4]
459
460 subs $rounds,$rounds,#1
461 bne .L128_loop
462 sub r2,$key,#176
463 b .Ldone
464
465.Lnot128:
466 ldrb $i2,[$rounds,#19]
467 ldrb $t1,[$rounds,#18]
468 ldrb $t2,[$rounds,#17]
469 ldrb $t3,[$rounds,#16]
470 orr $i2,$i2,$t1,lsl#8
471 orr $i2,$i2,$t2,lsl#16
472 orr $i2,$i2,$t3,lsl#24
473 ldrb $i3,[$rounds,#23]
474 ldrb $t1,[$rounds,#22]
475 ldrb $t2,[$rounds,#21]
476 ldrb $t3,[$rounds,#20]
477 orr $i3,$i3,$t1,lsl#8
478 orr $i3,$i3,$t2,lsl#16
479 orr $i3,$i3,$t3,lsl#24
480 str $i2,[$key],#8
481 str $i3,[$key,#-4]
482
483 teq lr,#192
484 bne .Lnot192
485 mov $rounds,#12
486 str $rounds,[$key,#240-24]
487 add $t3,$tbl,#256 @ rcon
488 mov lr,#255
489 mov $rounds,#8
490
491.L192_loop:
492 and $t2,lr,$i3,lsr#24
493 and $i1,lr,$i3,lsr#16
494 and $i2,lr,$i3,lsr#8
495 and $i3,lr,$i3
496 ldrb $t2,[$tbl,$t2]
497 ldrb $i1,[$tbl,$i1]
498 ldrb $i2,[$tbl,$i2]
499 ldrb $i3,[$tbl,$i3]
500 ldr $t1,[$t3],#4 @ rcon[i++]
501 orr $t2,$t2,$i1,lsl#24
502 orr $t2,$t2,$i2,lsl#16
503 orr $t2,$t2,$i3,lsl#8
504 eor $i3,$t2,$t1
505 eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
506 eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
507 eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
508 eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
509 str $s0,[$key],#24
510 str $s1,[$key,#-20]
511 str $s2,[$key,#-16]
512 str $s3,[$key,#-12]
513
514 subs $rounds,$rounds,#1
515 subeq r2,$key,#216
516 beq .Ldone
517
518 ldr $i1,[$key,#-32]
519 ldr $i2,[$key,#-28]
520 eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
521 eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
522 str $i1,[$key,#-8]
523 str $i3,[$key,#-4]
524 b .L192_loop
525
526.Lnot192:
527 ldrb $i2,[$rounds,#27]
528 ldrb $t1,[$rounds,#26]
529 ldrb $t2,[$rounds,#25]
530 ldrb $t3,[$rounds,#24]
531 orr $i2,$i2,$t1,lsl#8
532 orr $i2,$i2,$t2,lsl#16
533 orr $i2,$i2,$t3,lsl#24
534 ldrb $i3,[$rounds,#31]
535 ldrb $t1,[$rounds,#30]
536 ldrb $t2,[$rounds,#29]
537 ldrb $t3,[$rounds,#28]
538 orr $i3,$i3,$t1,lsl#8
539 orr $i3,$i3,$t2,lsl#16
540 orr $i3,$i3,$t3,lsl#24
541 str $i2,[$key],#8
542 str $i3,[$key,#-4]
543
544 mov $rounds,#14
545 str $rounds,[$key,#240-32]
546 add $t3,$tbl,#256 @ rcon
547 mov lr,#255
548 mov $rounds,#7
549
550.L256_loop:
551 and $t2,lr,$i3,lsr#24
552 and $i1,lr,$i3,lsr#16
553 and $i2,lr,$i3,lsr#8
554 and $i3,lr,$i3
555 ldrb $t2,[$tbl,$t2]
556 ldrb $i1,[$tbl,$i1]
557 ldrb $i2,[$tbl,$i2]
558 ldrb $i3,[$tbl,$i3]
559 ldr $t1,[$t3],#4 @ rcon[i++]
560 orr $t2,$t2,$i1,lsl#24
561 orr $t2,$t2,$i2,lsl#16
562 orr $t2,$t2,$i3,lsl#8
563 eor $i3,$t2,$t1
564 eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
565 eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
566 eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
567 eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
568 str $s0,[$key],#32
569 str $s1,[$key,#-28]
570 str $s2,[$key,#-24]
571 str $s3,[$key,#-20]
572
573 subs $rounds,$rounds,#1
574 subeq r2,$key,#256
575 beq .Ldone
576
577 and $t2,lr,$s3
578 and $i1,lr,$s3,lsr#8
579 and $i2,lr,$s3,lsr#16
580 and $i3,lr,$s3,lsr#24
581 ldrb $t2,[$tbl,$t2]
582 ldrb $i1,[$tbl,$i1]
583 ldrb $i2,[$tbl,$i2]
584 ldrb $i3,[$tbl,$i3]
585 orr $t2,$t2,$i1,lsl#8
586 orr $t2,$t2,$i2,lsl#16
587 orr $t2,$t2,$i3,lsl#24
588
589 ldr $t1,[$key,#-48]
590 ldr $i1,[$key,#-44]
591 ldr $i2,[$key,#-40]
592 ldr $i3,[$key,#-36]
593 eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
594 eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
595 eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
596 eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
597 str $t1,[$key,#-16]
598 str $i1,[$key,#-12]
599 str $i2,[$key,#-8]
600 str $i3,[$key,#-4]
601 b .L256_loop
602
603.Ldone: mov r0,#0
604 ldmia sp!,{r4-r12,lr}
605.Labrt: tst lr,#1
606 moveq pc,lr @ be binary compatible with V4, yet
607 bx lr @ interoperable with Thumb ISA:-)
608.size AES_set_encrypt_key,.-AES_set_encrypt_key
609
610.global AES_set_decrypt_key
611.type AES_set_decrypt_key,%function
612.align 5
613AES_set_decrypt_key:
614 str lr,[sp,#-4]! @ push lr
615 bl AES_set_encrypt_key
616 teq r0,#0
617 ldrne lr,[sp],#4 @ pop lr
618 bne .Labrt
619
620 stmdb sp!,{r4-r12}
621
622 ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
623 mov $key,r2 @ which is AES_KEY *key
624 mov $i1,r2
625 add $i2,r2,$rounds,lsl#4
626
627.Linv: ldr $s0,[$i1]
628 ldr $s1,[$i1,#4]
629 ldr $s2,[$i1,#8]
630 ldr $s3,[$i1,#12]
631 ldr $t1,[$i2]
632 ldr $t2,[$i2,#4]
633 ldr $t3,[$i2,#8]
634 ldr $i3,[$i2,#12]
635 str $s0,[$i2],#-16
636 str $s1,[$i2,#16+4]
637 str $s2,[$i2,#16+8]
638 str $s3,[$i2,#16+12]
639 str $t1,[$i1],#16
640 str $t2,[$i1,#-12]
641 str $t3,[$i1,#-8]
642 str $i3,[$i1,#-4]
643 teq $i1,$i2
644 bne .Linv
645___
646$mask80=$i1;
647$mask1b=$i2;
648$mask7f=$i3;
649$code.=<<___;
650 ldr $s0,[$key,#16]! @ prefetch tp1
651 mov $mask80,#0x80
652 mov $mask1b,#0x1b
653 orr $mask80,$mask80,#0x8000
654 orr $mask1b,$mask1b,#0x1b00
655 orr $mask80,$mask80,$mask80,lsl#16
656 orr $mask1b,$mask1b,$mask1b,lsl#16
657 sub $rounds,$rounds,#1
658 mvn $mask7f,$mask80
659 mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
660
661.Lmix: and $t1,$s0,$mask80
662 and $s1,$s0,$mask7f
663 sub $t1,$t1,$t1,lsr#7
664 and $t1,$t1,$mask1b
665 eor $s1,$t1,$s1,lsl#1 @ tp2
666
667 and $t1,$s1,$mask80
668 and $s2,$s1,$mask7f
669 sub $t1,$t1,$t1,lsr#7
670 and $t1,$t1,$mask1b
671 eor $s2,$t1,$s2,lsl#1 @ tp4
672
673 and $t1,$s2,$mask80
674 and $s3,$s2,$mask7f
675 sub $t1,$t1,$t1,lsr#7
676 and $t1,$t1,$mask1b
677 eor $s3,$t1,$s3,lsl#1 @ tp8
678
679 eor $t1,$s1,$s2
680 eor $t2,$s0,$s3 @ tp9
681 eor $t1,$t1,$s3 @ tpe
682 eor $t1,$t1,$s1,ror#24
683 eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
684 eor $t1,$t1,$s2,ror#16
685 eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
686 eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
687
688 ldr $s0,[$key,#4] @ prefetch tp1
689 str $t1,[$key],#4
690 subs $rounds,$rounds,#1
691 bne .Lmix
692
693 mov r0,#0
694 ldmia sp!,{r4-r12,lr}
695 tst lr,#1
696 moveq pc,lr @ be binary compatible with V4, yet
697 bx lr @ interoperable with Thumb ISA:-)
698.size AES_set_decrypt_key,.-AES_set_decrypt_key
699
700.type AES_Td,%object
701.align 5
702AES_Td:
703.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
704.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
705.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
706.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
707.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
708.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
709.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
710.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
711.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
712.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
713.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
714.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
715.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
716.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
717.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
718.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
719.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
720.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
721.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
722.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
723.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
724.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
725.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
726.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
727.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
728.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
729.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
730.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
731.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
732.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
733.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
734.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
735.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
736.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
737.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
738.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
739.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
740.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
741.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
742.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
743.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
744.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
745.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
746.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
747.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
748.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
749.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
750.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
751.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
752.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
753.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
754.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
755.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
756.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
757.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
758.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
759.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
760.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
761.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
762.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
763.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
764.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
765.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
766.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
767@ Td4[256]
768.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
769.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
770.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
771.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
772.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
773.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
774.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
775.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
776.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
777.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
778.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
779.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
780.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
781.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
782.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
783.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
784.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
785.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
786.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
787.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
788.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
789.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
790.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
791.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
792.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
793.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
794.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
795.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
796.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
797.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
798.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
799.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
800.size AES_Td,.-AES_Td
801
802@ void AES_decrypt(const unsigned char *in, unsigned char *out,
803@ const AES_KEY *key) {
804.global AES_decrypt
805.type AES_decrypt,%function
806.align 5
807AES_decrypt:
808 sub r3,pc,#8 @ AES_decrypt
809 stmdb sp!,{r1,r4-r12,lr}
810 mov $rounds,r0 @ inp
811 mov $key,r2
812 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
813
814 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
815 ldrb $t1,[$rounds,#2] @ manner...
816 ldrb $t2,[$rounds,#1]
817 ldrb $t3,[$rounds,#0]
818 orr $s0,$s0,$t1,lsl#8
819 orr $s0,$s0,$t2,lsl#16
820 orr $s0,$s0,$t3,lsl#24
821 ldrb $s1,[$rounds,#7]
822 ldrb $t1,[$rounds,#6]
823 ldrb $t2,[$rounds,#5]
824 ldrb $t3,[$rounds,#4]
825 orr $s1,$s1,$t1,lsl#8
826 orr $s1,$s1,$t2,lsl#16
827 orr $s1,$s1,$t3,lsl#24
828 ldrb $s2,[$rounds,#11]
829 ldrb $t1,[$rounds,#10]
830 ldrb $t2,[$rounds,#9]
831 ldrb $t3,[$rounds,#8]
832 orr $s2,$s2,$t1,lsl#8
833 orr $s2,$s2,$t2,lsl#16
834 orr $s2,$s2,$t3,lsl#24
835 ldrb $s3,[$rounds,#15]
836 ldrb $t1,[$rounds,#14]
837 ldrb $t2,[$rounds,#13]
838 ldrb $t3,[$rounds,#12]
839 orr $s3,$s3,$t1,lsl#8
840 orr $s3,$s3,$t2,lsl#16
841 orr $s3,$s3,$t3,lsl#24
842
843 bl _armv4_AES_decrypt
844
845 ldr $rounds,[sp],#4 @ pop out
846 mov $t1,$s0,lsr#24 @ write output in endian-neutral
847 mov $t2,$s0,lsr#16 @ manner...
848 mov $t3,$s0,lsr#8
849 strb $t1,[$rounds,#0]
850 strb $t2,[$rounds,#1]
851 strb $t3,[$rounds,#2]
852 strb $s0,[$rounds,#3]
853 mov $t1,$s1,lsr#24
854 mov $t2,$s1,lsr#16
855 mov $t3,$s1,lsr#8
856 strb $t1,[$rounds,#4]
857 strb $t2,[$rounds,#5]
858 strb $t3,[$rounds,#6]
859 strb $s1,[$rounds,#7]
860 mov $t1,$s2,lsr#24
861 mov $t2,$s2,lsr#16
862 mov $t3,$s2,lsr#8
863 strb $t1,[$rounds,#8]
864 strb $t2,[$rounds,#9]
865 strb $t3,[$rounds,#10]
866 strb $s2,[$rounds,#11]
867 mov $t1,$s3,lsr#24
868 mov $t2,$s3,lsr#16
869 mov $t3,$s3,lsr#8
870 strb $t1,[$rounds,#12]
871 strb $t2,[$rounds,#13]
872 strb $t3,[$rounds,#14]
873 strb $s3,[$rounds,#15]
874
875 ldmia sp!,{r4-r12,lr}
876 tst lr,#1
877 moveq pc,lr @ be binary compatible with V4, yet
878 bx lr @ interoperable with Thumb ISA:-)
879.size AES_decrypt,.-AES_decrypt
880
881.type _armv4_AES_decrypt,%function
882.align 2
883_armv4_AES_decrypt:
884 str lr,[sp,#-4]! @ push lr
885 ldr $t1,[$key],#16
886 ldr $t2,[$key,#-12]
887 ldr $t3,[$key,#-8]
888 ldr $i1,[$key,#-4]
889 ldr $rounds,[$key,#240-16]
890 eor $s0,$s0,$t1
891 eor $s1,$s1,$t2
892 eor $s2,$s2,$t3
893 eor $s3,$s3,$i1
894 sub $rounds,$rounds,#1
895 mov lr,#255
896
897.Ldec_loop:
898 and $i1,lr,$s0,lsr#16
899 and $i2,lr,$s0,lsr#8
900 and $i3,lr,$s0
901 mov $s0,$s0,lsr#24
902 ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
903 ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
904 ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
905 ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
906
907 and $i1,lr,$s1 @ i0
908 and $i2,lr,$s1,lsr#16
909 and $i3,lr,$s1,lsr#8
910 mov $s1,$s1,lsr#24
911 ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
912 ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
913 ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
914 ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
915 eor $s0,$s0,$i1,ror#24
916 eor $s1,$s1,$t1,ror#8
917 eor $t2,$i2,$t2,ror#8
918 eor $t3,$i3,$t3,ror#8
919
920 and $i1,lr,$s2,lsr#8 @ i0
921 and $i2,lr,$s2 @ i1
922 and $i3,lr,$s2,lsr#16
923 mov $s2,$s2,lsr#24
924 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
925 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
926 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
927 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
928 eor $s0,$s0,$i1,ror#16
929 eor $s1,$s1,$i2,ror#24
930 eor $s2,$s2,$t2,ror#8
931 eor $t3,$i3,$t3,ror#8
932
933 and $i1,lr,$s3,lsr#16 @ i0
934 and $i2,lr,$s3,lsr#8 @ i1
935 and $i3,lr,$s3 @ i2
936 mov $s3,$s3,lsr#24
937 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
938 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
939 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
940 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
941 eor $s0,$s0,$i1,ror#8
942 eor $s1,$s1,$i2,ror#16
943 eor $s2,$s2,$i3,ror#24
944 eor $s3,$s3,$t3,ror#8
945
946 ldr $t1,[$key],#16
947 ldr $t2,[$key,#-12]
948 ldr $t3,[$key,#-8]
949 ldr $i1,[$key,#-4]
950 eor $s0,$s0,$t1
951 eor $s1,$s1,$t2
952 eor $s2,$s2,$t3
953 eor $s3,$s3,$i1
954
955 subs $rounds,$rounds,#1
956 bne .Ldec_loop
957
958 add $tbl,$tbl,#1024
959
960 ldr $t1,[$tbl,#0] @ prefetch Td4
961 ldr $t2,[$tbl,#32]
962 ldr $t3,[$tbl,#64]
963 ldr $i1,[$tbl,#96]
964 ldr $i2,[$tbl,#128]
965 ldr $i3,[$tbl,#160]
966 ldr $t1,[$tbl,#192]
967 ldr $t2,[$tbl,#224]
968
969 and $i1,lr,$s0,lsr#16
970 and $i2,lr,$s0,lsr#8
971 and $i3,lr,$s0
972 ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24]
973 ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
974 ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
975 ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
976
977 and $i1,lr,$s1 @ i0
978 and $i2,lr,$s1,lsr#16
979 and $i3,lr,$s1,lsr#8
980 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
981 ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
982 ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
983 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
984 eor $s0,$i1,$s0,lsl#24
985 eor $s1,$t1,$s1,lsl#8
986 eor $t2,$t2,$i2,lsl#8
987 eor $t3,$t3,$i3,lsl#8
988
989 and $i1,lr,$s2,lsr#8 @ i0
990 and $i2,lr,$s2 @ i1
991 and $i3,lr,$s2,lsr#16
992 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
993 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
994 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
995 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
996 eor $s0,$s0,$i1,lsl#8
997 eor $s1,$i2,$s1,lsl#16
998 eor $s2,$t2,$s2,lsl#16
999 eor $t3,$t3,$i3,lsl#16
1000
1001 and $i1,lr,$s3,lsr#16 @ i0
1002 and $i2,lr,$s3,lsr#8 @ i1
1003 and $i3,lr,$s3 @ i2
1004 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1005 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1006 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1007 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
1008 eor $s0,$s0,$i1,lsl#16
1009 eor $s1,$s1,$i2,lsl#8
1010 eor $s2,$i3,$s2,lsl#8
1011 eor $s3,$t3,$s3,lsl#24
1012
1013 ldr lr,[sp],#4 @ pop lr
1014 ldr $t1,[$key,#0]
1015 ldr $t2,[$key,#4]
1016 ldr $t3,[$key,#8]
1017 ldr $i1,[$key,#12]
1018 eor $s0,$s0,$t1
1019 eor $s1,$s1,$t2
1020 eor $s2,$s2,$t3
1021 eor $s3,$s3,$i1
1022
1023 sub $tbl,$tbl,#1024
1024 mov pc,lr @ return
1025.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1026.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1027___
1028
1029$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
1030print $code;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl b/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl
new file mode 100644
index 0000000000..ce427655ef
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl
@@ -0,0 +1,1176 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19$flavour = shift;
20
21if ($flavour =~ /64/) {
22 $SIZE_T =8;
23 $STU ="stdu";
24 $POP ="ld";
25 $PUSH ="std";
26} elsif ($flavour =~ /32/) {
27 $SIZE_T =4;
28 $STU ="stwu";
29 $POP ="lwz";
30 $PUSH ="stw";
31} else { die "nonsense $flavour"; }
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
89if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
90else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
95.machine "any"
96.text
97
98.align 7
99LAES_Te:
100 mflr r0
101 bcl 20,31,\$+4
102 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
103 addi $Tbl0,$Tbl0,`128-8`
104 mtlr r0
105 blr
106 .space `32-24`
107LAES_Td:
108 mflr r0
109 bcl 20,31,\$+4
110 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
111 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
112 mtlr r0
113 blr
114 .space `128-32-24`
115___
116&_data_word(
117 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl .AES_encrypt
316.align 7
317.AES_encrypt:
318 mflr r0
319 $STU $sp,-$FRAME($sp)
320
321 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
322 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
323 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
324 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
325 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
326 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
327 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
328 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
329 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
330 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
331 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
332 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
333 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
334 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
335 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
336 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
337 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
338 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
339 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
340 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
341 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
342
343 lwz $s0,0($inp)
344 lwz $s1,4($inp)
345 lwz $s2,8($inp)
346 lwz $s3,12($inp)
347 bl LAES_Te
348 bl Lppc_AES_encrypt_compact
349 stw $s0,0($out)
350 stw $s1,4($out)
351 stw $s2,8($out)
352 stw $s3,12($out)
353
354 $POP r0,`$FRAME-$SIZE_T*21`($sp)
355 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
356 $POP r13,`$FRAME-$SIZE_T*19`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
375 mtlr r0
376 addi $sp,$sp,$FRAME
377 blr
378
379.align 4
380Lppc_AES_encrypt:
381 lwz $acc00,240($key)
382 lwz $t0,0($key)
383 lwz $t1,4($key)
384 lwz $t2,8($key)
385 lwz $t3,12($key)
386 addi $Tbl1,$Tbl0,3
387 addi $Tbl2,$Tbl0,2
388 addi $Tbl3,$Tbl0,1
389 addi $acc00,$acc00,-1
390 addi $key,$key,16
391 xor $s0,$s0,$t0
392 xor $s1,$s1,$t1
393 xor $s2,$s2,$t2
394 xor $s3,$s3,$t3
395 mtctr $acc00
396.align 4
397Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28
400 lwz $t0,0($key)
401 lwz $t1,4($key)
402 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28
404 lwz $t2,8($key)
405 lwz $t3,12($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28
408 lwzx $acc00,$Tbl0,$acc00
409 lwzx $acc01,$Tbl0,$acc01
410 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28
412 lwzx $acc02,$Tbl0,$acc02
413 lwzx $acc03,$Tbl0,$acc03
414 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28
416 lwzx $acc04,$Tbl1,$acc04
417 lwzx $acc05,$Tbl1,$acc05
418 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28
420 lwzx $acc06,$Tbl1,$acc06
421 lwzx $acc07,$Tbl1,$acc07
422 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28
424 lwzx $acc08,$Tbl2,$acc08
425 lwzx $acc09,$Tbl2,$acc09
426 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28
428 lwzx $acc10,$Tbl2,$acc10
429 lwzx $acc11,$Tbl2,$acc11
430 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01
432 lwzx $acc12,$Tbl3,$acc12
433 lwzx $acc13,$Tbl3,$acc13
434 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03
436 lwzx $acc14,$Tbl3,$acc14
437 lwzx $acc15,$Tbl3,$acc15
438 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05
440 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08
443 xor $t1,$t1,$acc09
444 xor $t2,$t2,$acc10
445 xor $t3,$t3,$acc11
446 xor $s0,$t0,$acc12
447 xor $s1,$t1,$acc13
448 xor $s2,$t2,$acc14
449 xor $s3,$t3,$acc15
450 addi $key,$key,16
451 bdnz- Lenc_loop
452
453 addi $Tbl2,$Tbl0,2048
454 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
465 lwz $t0,0($key)
466 lwz $t1,4($key)
467 rlwinm $acc02,$s2,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31
469 lwz $t2,8($key)
470 lwz $t3,12($key)
471 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31
473 lbzx $acc00,$Tbl2,$acc00
474 lbzx $acc01,$Tbl2,$acc01
475 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31
477 lbzx $acc02,$Tbl2,$acc02
478 lbzx $acc03,$Tbl2,$acc03
479 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31
481 lbzx $acc04,$Tbl2,$acc04
482 lbzx $acc05,$Tbl2,$acc05
483 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31
485 lbzx $acc06,$Tbl2,$acc06
486 lbzx $acc07,$Tbl2,$acc07
487 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31
489 lbzx $acc08,$Tbl2,$acc08
490 lbzx $acc09,$Tbl2,$acc09
491 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31
493 lbzx $acc10,$Tbl2,$acc10
494 lbzx $acc11,$Tbl2,$acc11
495 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7
497 lbzx $acc12,$Tbl2,$acc12
498 lbzx $acc13,$Tbl2,$acc13
499 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7
501 lbzx $acc14,$Tbl2,$acc14
502 lbzx $acc15,$Tbl2,$acc15
503 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15
505 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15
507 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23
509 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12
512 or $s1,$s1,$acc13
513 or $s2,$s2,$acc14
514 or $s3,$s3,$acc15
515 xor $s0,$s0,$t0
516 xor $s1,$s1,$t1
517 xor $s2,$s2,$t2
518 xor $s3,$s3,$t3
519 blr
520
521.align 4
522Lppc_AES_encrypt_compact:
523 lwz $acc00,240($key)
524 lwz $t0,0($key)
525 lwz $t1,4($key)
526 lwz $t2,8($key)
527 lwz $t3,12($key)
528 addi $Tbl1,$Tbl0,2048
529 lis $mask80,0x8080
530 lis $mask1b,0x1b1b
531 addi $key,$key,16
532 ori $mask80,$mask80,0x8080
533 ori $mask1b,$mask1b,0x1b1b
534 mtctr $acc00
535.align 4
536Lenc_compact_loop:
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
541 rlwinm $acc00,$s0,`32-24`,24,31
542 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
547 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
551 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31
553 lbzx $acc04,$Tbl1,$acc04
554 lbzx $acc05,$Tbl1,$acc05
555 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31
557 lbzx $acc06,$Tbl1,$acc06
558 lbzx $acc07,$Tbl1,$acc07
559 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31
561 lbzx $acc08,$Tbl1,$acc08
562 lbzx $acc09,$Tbl1,$acc09
563 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31
565 lbzx $acc10,$Tbl1,$acc10
566 lbzx $acc11,$Tbl1,$acc11
567 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31
569 lbzx $acc12,$Tbl1,$acc12
570 lbzx $acc13,$Tbl1,$acc13
571 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7
573 lbzx $acc14,$Tbl1,$acc14
574 lbzx $acc15,$Tbl1,$acc15
575 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7
577 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15
579 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23
582 rlwimi $s1,$acc09,8,16,23
583 rlwimi $s2,$acc10,8,16,23
584 rlwimi $s3,$acc11,8,16,23
585 lwz $t0,0($key)
586 lwz $t1,4($key)
587 or $s0,$s0,$acc12
588 or $s1,$s1,$acc13
589 lwz $t2,8($key)
590 lwz $t3,12($key)
591 or $s2,$s2,$acc14
592 or $s3,$s3,$acc15
593
594 addi $key,$key,16
595 bdz Lenc_compact_done
596
597 and $acc00,$s0,$mask80 # r1=r0&0x80808080
598 and $acc01,$s1,$mask80
599 and $acc02,$s2,$mask80
600 and $acc03,$s3,$mask80
601 srwi $acc04,$acc00,7 # r1>>7
602 srwi $acc05,$acc01,7
603 srwi $acc06,$acc02,7
604 srwi $acc07,$acc03,7
605 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
606 andc $acc09,$s1,$mask80
607 andc $acc10,$s2,$mask80
608 andc $acc11,$s3,$mask80
609 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
610 sub $acc01,$acc01,$acc05
611 sub $acc02,$acc02,$acc06
612 sub $acc03,$acc03,$acc07
613 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
614 add $acc09,$acc09,$acc09
615 add $acc10,$acc10,$acc10
616 add $acc11,$acc11,$acc11
617 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
618 and $acc01,$acc01,$mask1b
619 and $acc02,$acc02,$mask1b
620 and $acc03,$acc03,$mask1b
621 xor $acc00,$acc00,$acc08 # r2
622 xor $acc01,$acc01,$acc09
623 xor $acc02,$acc02,$acc10
624 xor $acc03,$acc03,$acc11
625
626 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
627 rotlwi $acc13,$s1,16
628 rotlwi $acc14,$s2,16
629 rotlwi $acc15,$s3,16
630 xor $s0,$s0,$acc00 # r0^r2
631 xor $s1,$s1,$acc01
632 xor $s2,$s2,$acc02
633 xor $s3,$s3,$acc03
634 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
635 rotrwi $s1,$s1,24
636 rotrwi $s2,$s2,24
637 rotrwi $s3,$s3,24
638 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
639 xor $s1,$s1,$acc01
640 xor $s2,$s2,$acc02
641 xor $s3,$s3,$acc03
642 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
643 rotlwi $acc09,$acc13,8
644 rotlwi $acc10,$acc14,8
645 rotlwi $acc11,$acc15,8
646 xor $s0,$s0,$acc12 #
647 xor $s1,$s1,$acc13
648 xor $s2,$s2,$acc14
649 xor $s3,$s3,$acc15
650 xor $s0,$s0,$acc08 #
651 xor $s1,$s1,$acc09
652 xor $s2,$s2,$acc10
653 xor $s3,$s3,$acc11
654
655 b Lenc_compact_loop
656.align 4
657Lenc_compact_done:
658 xor $s0,$s0,$t0
659 xor $s1,$s1,$t1
660 xor $s2,$s2,$t2
661 xor $s3,$s3,$t3
662 blr
663
664.globl .AES_decrypt
665.align 7
666.AES_decrypt:
667 mflr r0
668 $STU $sp,-$FRAME($sp)
669
670 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
671 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
672 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
673 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
674 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
675 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
676 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
677 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
678 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
679 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
680 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
681 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
682 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
683 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
684 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
685 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
686 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
687 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
688 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
689 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
690 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
691
692 lwz $s0,0($inp)
693 lwz $s1,4($inp)
694 lwz $s2,8($inp)
695 lwz $s3,12($inp)
696 bl LAES_Td
697 bl Lppc_AES_decrypt_compact
698 stw $s0,0($out)
699 stw $s1,4($out)
700 stw $s2,8($out)
701 stw $s3,12($out)
702
703 $POP r0,`$FRAME-$SIZE_T*21`($sp)
704 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
705 $POP r13,`$FRAME-$SIZE_T*19`($sp)
706 $POP r14,`$FRAME-$SIZE_T*18`($sp)
707 $POP r15,`$FRAME-$SIZE_T*17`($sp)
708 $POP r16,`$FRAME-$SIZE_T*16`($sp)
709 $POP r17,`$FRAME-$SIZE_T*15`($sp)
710 $POP r18,`$FRAME-$SIZE_T*14`($sp)
711 $POP r19,`$FRAME-$SIZE_T*13`($sp)
712 $POP r20,`$FRAME-$SIZE_T*12`($sp)
713 $POP r21,`$FRAME-$SIZE_T*11`($sp)
714 $POP r22,`$FRAME-$SIZE_T*10`($sp)
715 $POP r23,`$FRAME-$SIZE_T*9`($sp)
716 $POP r24,`$FRAME-$SIZE_T*8`($sp)
717 $POP r25,`$FRAME-$SIZE_T*7`($sp)
718 $POP r26,`$FRAME-$SIZE_T*6`($sp)
719 $POP r27,`$FRAME-$SIZE_T*5`($sp)
720 $POP r28,`$FRAME-$SIZE_T*4`($sp)
721 $POP r29,`$FRAME-$SIZE_T*3`($sp)
722 $POP r30,`$FRAME-$SIZE_T*2`($sp)
723 $POP r31,`$FRAME-$SIZE_T*1`($sp)
724 mtlr r0
725 addi $sp,$sp,$FRAME
726 blr
727
728.align 4
729Lppc_AES_decrypt:
730 lwz $acc00,240($key)
731 lwz $t0,0($key)
732 lwz $t1,4($key)
733 lwz $t2,8($key)
734 lwz $t3,12($key)
735 addi $Tbl1,$Tbl0,3
736 addi $Tbl2,$Tbl0,2
737 addi $Tbl3,$Tbl0,1
738 addi $acc00,$acc00,-1
739 addi $key,$key,16
740 xor $s0,$s0,$t0
741 xor $s1,$s1,$t1
742 xor $s2,$s2,$t2
743 xor $s3,$s3,$t3
744 mtctr $acc00
745.align 4
746Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28
749 lwz $t0,0($key)
750 lwz $t1,4($key)
751 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28
753 lwz $t2,8($key)
754 lwz $t3,12($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28
757 lwzx $acc00,$Tbl0,$acc00
758 lwzx $acc01,$Tbl0,$acc01
759 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28
761 lwzx $acc02,$Tbl0,$acc02
762 lwzx $acc03,$Tbl0,$acc03
763 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28
765 lwzx $acc04,$Tbl1,$acc04
766 lwzx $acc05,$Tbl1,$acc05
767 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28
769 lwzx $acc06,$Tbl1,$acc06
770 lwzx $acc07,$Tbl1,$acc07
771 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28
773 lwzx $acc08,$Tbl2,$acc08
774 lwzx $acc09,$Tbl2,$acc09
775 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28
777 lwzx $acc10,$Tbl2,$acc10
778 lwzx $acc11,$Tbl2,$acc11
779 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01
781 lwzx $acc12,$Tbl3,$acc12
782 lwzx $acc13,$Tbl3,$acc13
783 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03
785 lwzx $acc14,$Tbl3,$acc14
786 lwzx $acc15,$Tbl3,$acc15
787 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05
789 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08
792 xor $t1,$t1,$acc09
793 xor $t2,$t2,$acc10
794 xor $t3,$t3,$acc11
795 xor $s0,$t0,$acc12
796 xor $s1,$t1,$acc13
797 xor $s2,$t2,$acc14
798 xor $s3,$t3,$acc15
799 addi $key,$key,16
800 bdnz- Ldec_loop
801
802 addi $Tbl2,$Tbl0,2048
803 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
814 lwz $t0,0($key)
815 lwz $t1,4($key)
816 rlwinm $acc02,$s2,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31
818 lwz $t2,8($key)
819 lwz $t3,12($key)
820 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31
822 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01
824 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31
826 lbzx $acc02,$Tbl2,$acc02
827 lbzx $acc03,$Tbl2,$acc03
828 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31
830 lbzx $acc04,$Tbl2,$acc04
831 lbzx $acc05,$Tbl2,$acc05
832 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31
834 lbzx $acc06,$Tbl2,$acc06
835 lbzx $acc07,$Tbl2,$acc07
836 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31
838 lbzx $acc08,$Tbl2,$acc08
839 lbzx $acc09,$Tbl2,$acc09
840 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31
842 lbzx $acc10,$Tbl2,$acc10
843 lbzx $acc11,$Tbl2,$acc11
844 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7
846 lbzx $acc12,$Tbl2,$acc12
847 lbzx $acc13,$Tbl2,$acc13
848 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7
850 lbzx $acc14,$Tbl2,$acc14
851 lbzx $acc15,$Tbl2,$acc15
852 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15
854 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23
857 rlwimi $s1,$acc09,8,16,23
858 rlwimi $s2,$acc10,8,16,23
859 rlwimi $s3,$acc11,8,16,23
860 or $s0,$s0,$acc12
861 or $s1,$s1,$acc13
862 or $s2,$s2,$acc14
863 or $s3,$s3,$acc15
864 xor $s0,$s0,$t0
865 xor $s1,$s1,$t1
866 xor $s2,$s2,$t2
867 xor $s3,$s3,$t3
868 blr
869
870.align 4
871Lppc_AES_decrypt_compact:
872 lwz $acc00,240($key)
873 lwz $t0,0($key)
874 lwz $t1,4($key)
875 lwz $t2,8($key)
876 lwz $t3,12($key)
877 addi $Tbl1,$Tbl0,2048
878 lis $mask80,0x8080
879 lis $mask1b,0x1b1b
880 addi $key,$key,16
881 ori $mask80,$mask80,0x8080
882 ori $mask1b,$mask1b,0x1b1b
883___
884$code.=<<___ if ($SIZE_T==8);
885 insrdi $mask80,$mask80,32,0
886 insrdi $mask1b,$mask1b,32,0
887___
888$code.=<<___;
889 mtctr $acc00
890.align 4
891Ldec_compact_loop:
892 xor $s0,$s0,$t0
893 xor $s1,$s1,$t1
894 xor $s2,$s2,$t2
895 xor $s3,$s3,$t3
896 rlwinm $acc00,$s0,`32-24`,24,31
897 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
902 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
906 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31
908 lbzx $acc04,$Tbl1,$acc04
909 lbzx $acc05,$Tbl1,$acc05
910 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31
912 lbzx $acc06,$Tbl1,$acc06
913 lbzx $acc07,$Tbl1,$acc07
914 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31
916 lbzx $acc08,$Tbl1,$acc08
917 lbzx $acc09,$Tbl1,$acc09
918 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31
920 lbzx $acc10,$Tbl1,$acc10
921 lbzx $acc11,$Tbl1,$acc11
922 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31
924 lbzx $acc12,$Tbl1,$acc12
925 lbzx $acc13,$Tbl1,$acc13
926 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7
928 lbzx $acc14,$Tbl1,$acc14
929 lbzx $acc15,$Tbl1,$acc15
930 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7
932 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15
934 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23
937 rlwimi $s1,$acc09,8,16,23
938 rlwimi $s2,$acc10,8,16,23
939 rlwimi $s3,$acc11,8,16,23
940 lwz $t0,0($key)
941 lwz $t1,4($key)
942 or $s0,$s0,$acc12
943 or $s1,$s1,$acc13
944 lwz $t2,8($key)
945 lwz $t3,12($key)
946 or $s2,$s2,$acc14
947 or $s3,$s3,$acc15
948
949 addi $key,$key,16
950 bdz Ldec_compact_done
951___
952$code.=<<___ if ($SIZE_T==8);
953 # vectorized permutation improves decrypt performance by 10%
954 insrdi $s0,$s1,32,0
955 insrdi $s2,$s3,32,0
956
957 and $acc00,$s0,$mask80 # r1=r0&0x80808080
958 and $acc02,$s2,$mask80
959 srdi $acc04,$acc00,7 # r1>>7
960 srdi $acc06,$acc02,7
961 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
962 andc $acc10,$s2,$mask80
963 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
964 sub $acc02,$acc02,$acc06
965 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
966 add $acc10,$acc10,$acc10
967 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
968 and $acc02,$acc02,$mask1b
969 xor $acc00,$acc00,$acc08 # r2
970 xor $acc02,$acc02,$acc10
971
972 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
973 and $acc06,$acc02,$mask80
974 srdi $acc08,$acc04,7 # r1>>7
975 srdi $acc10,$acc06,7
976 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
977 andc $acc14,$acc02,$mask80
978 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
979 sub $acc06,$acc06,$acc10
980 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
981 add $acc14,$acc14,$acc14
982 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
983 and $acc06,$acc06,$mask1b
984 xor $acc04,$acc04,$acc12 # r4
985 xor $acc06,$acc06,$acc14
986
987 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
988 and $acc10,$acc06,$mask80
989 srdi $acc12,$acc08,7 # r1>>7
990 srdi $acc14,$acc10,7
991 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
992 sub $acc10,$acc10,$acc14
993 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
994 andc $acc14,$acc06,$mask80
995 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
996 add $acc14,$acc14,$acc14
997 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
998 and $acc10,$acc10,$mask1b
999 xor $acc08,$acc08,$acc12 # r8
1000 xor $acc10,$acc10,$acc14
1001
1002 xor $acc00,$acc00,$s0 # r2^r0
1003 xor $acc02,$acc02,$s2
1004 xor $acc04,$acc04,$s0 # r4^r0
1005 xor $acc06,$acc06,$s2
1006
1007 extrdi $acc01,$acc00,32,0
1008 extrdi $acc03,$acc02,32,0
1009 extrdi $acc05,$acc04,32,0
1010 extrdi $acc07,$acc06,32,0
1011 extrdi $acc09,$acc08,32,0
1012 extrdi $acc11,$acc10,32,0
1013___
1014$code.=<<___ if ($SIZE_T==4);
1015 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1016 and $acc01,$s1,$mask80
1017 and $acc02,$s2,$mask80
1018 and $acc03,$s3,$mask80
1019 srwi $acc04,$acc00,7 # r1>>7
1020 srwi $acc05,$acc01,7
1021 srwi $acc06,$acc02,7
1022 srwi $acc07,$acc03,7
1023 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1024 andc $acc09,$s1,$mask80
1025 andc $acc10,$s2,$mask80
1026 andc $acc11,$s3,$mask80
1027 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1028 sub $acc01,$acc01,$acc05
1029 sub $acc02,$acc02,$acc06
1030 sub $acc03,$acc03,$acc07
1031 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1032 add $acc09,$acc09,$acc09
1033 add $acc10,$acc10,$acc10
1034 add $acc11,$acc11,$acc11
1035 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1036 and $acc01,$acc01,$mask1b
1037 and $acc02,$acc02,$mask1b
1038 and $acc03,$acc03,$mask1b
1039 xor $acc00,$acc00,$acc08 # r2
1040 xor $acc01,$acc01,$acc09
1041 xor $acc02,$acc02,$acc10
1042 xor $acc03,$acc03,$acc11
1043
1044 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1045 and $acc05,$acc01,$mask80
1046 and $acc06,$acc02,$mask80
1047 and $acc07,$acc03,$mask80
1048 srwi $acc08,$acc04,7 # r1>>7
1049 srwi $acc09,$acc05,7
1050 srwi $acc10,$acc06,7
1051 srwi $acc11,$acc07,7
1052 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1053 andc $acc13,$acc01,$mask80
1054 andc $acc14,$acc02,$mask80
1055 andc $acc15,$acc03,$mask80
1056 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1057 sub $acc05,$acc05,$acc09
1058 sub $acc06,$acc06,$acc10
1059 sub $acc07,$acc07,$acc11
1060 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1061 add $acc13,$acc13,$acc13
1062 add $acc14,$acc14,$acc14
1063 add $acc15,$acc15,$acc15
1064 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1065 and $acc05,$acc05,$mask1b
1066 and $acc06,$acc06,$mask1b
1067 and $acc07,$acc07,$mask1b
1068 xor $acc04,$acc04,$acc12 # r4
1069 xor $acc05,$acc05,$acc13
1070 xor $acc06,$acc06,$acc14
1071 xor $acc07,$acc07,$acc15
1072
1073 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1074 and $acc09,$acc05,$mask80
1075 and $acc10,$acc06,$mask80
1076 and $acc11,$acc07,$mask80
1077 srwi $acc12,$acc08,7 # r1>>7
1078 srwi $acc13,$acc09,7
1079 srwi $acc14,$acc10,7
1080 srwi $acc15,$acc11,7
1081 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1082 sub $acc09,$acc09,$acc13
1083 sub $acc10,$acc10,$acc14
1084 sub $acc11,$acc11,$acc15
1085 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1086 andc $acc13,$acc05,$mask80
1087 andc $acc14,$acc06,$mask80
1088 andc $acc15,$acc07,$mask80
1089 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1090 add $acc13,$acc13,$acc13
1091 add $acc14,$acc14,$acc14
1092 add $acc15,$acc15,$acc15
1093 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1094 and $acc09,$acc09,$mask1b
1095 and $acc10,$acc10,$mask1b
1096 and $acc11,$acc11,$mask1b
1097 xor $acc08,$acc08,$acc12 # r8
1098 xor $acc09,$acc09,$acc13
1099 xor $acc10,$acc10,$acc14
1100 xor $acc11,$acc11,$acc15
1101
1102 xor $acc00,$acc00,$s0 # r2^r0
1103 xor $acc01,$acc01,$s1
1104 xor $acc02,$acc02,$s2
1105 xor $acc03,$acc03,$s3
1106 xor $acc04,$acc04,$s0 # r4^r0
1107 xor $acc05,$acc05,$s1
1108 xor $acc06,$acc06,$s2
1109 xor $acc07,$acc07,$s3
1110___
1111$code.=<<___;
1112 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1113 rotrwi $s1,$s1,8
1114 rotrwi $s2,$s2,8
1115 rotrwi $s3,$s3,8
1116 xor $s0,$s0,$acc00 # ^= r2^r0
1117 xor $s1,$s1,$acc01
1118 xor $s2,$s2,$acc02
1119 xor $s3,$s3,$acc03
1120 xor $acc00,$acc00,$acc08
1121 xor $acc01,$acc01,$acc09
1122 xor $acc02,$acc02,$acc10
1123 xor $acc03,$acc03,$acc11
1124 xor $s0,$s0,$acc04 # ^= r4^r0
1125 xor $s1,$s1,$acc05
1126 xor $s2,$s2,$acc06
1127 xor $s3,$s3,$acc07
1128 rotrwi $acc00,$acc00,24
1129 rotrwi $acc01,$acc01,24
1130 rotrwi $acc02,$acc02,24
1131 rotrwi $acc03,$acc03,24
1132 xor $acc04,$acc04,$acc08
1133 xor $acc05,$acc05,$acc09
1134 xor $acc06,$acc06,$acc10
1135 xor $acc07,$acc07,$acc11
1136 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137 xor $s1,$s1,$acc09
1138 xor $s2,$s2,$acc10
1139 xor $s3,$s3,$acc11
1140 rotrwi $acc04,$acc04,16
1141 rotrwi $acc05,$acc05,16
1142 rotrwi $acc06,$acc06,16
1143 rotrwi $acc07,$acc07,16
1144 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1145 xor $s1,$s1,$acc01
1146 xor $s2,$s2,$acc02
1147 xor $s3,$s3,$acc03
1148 rotrwi $acc08,$acc08,8
1149 rotrwi $acc09,$acc09,8
1150 rotrwi $acc10,$acc10,8
1151 rotrwi $acc11,$acc11,8
1152 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1153 xor $s1,$s1,$acc05
1154 xor $s2,$s2,$acc06
1155 xor $s3,$s3,$acc07
1156 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1157 xor $s1,$s1,$acc09
1158 xor $s2,$s2,$acc10
1159 xor $s3,$s3,$acc11
1160
1161 b Ldec_compact_loop
1162.align 4
1163Ldec_compact_done:
1164 xor $s0,$s0,$t0
1165 xor $s1,$s1,$t1
1166 xor $s2,$s2,$t2
1167 xor $s3,$s3,$t3
1168 blr
1169.long 0
1170.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align 7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl b/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl
new file mode 100644
index 0000000000..4b27afd92f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl
@@ -0,0 +1,1333 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for s390x.
11
12# April 2007.
13#
14# Software performance improvement over gcc-generated code is ~70% and
15# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17# *strictly* in-order execution and issued instruction [in this case
18# load value from memory is critical] has to complete before execution
19# flow proceeds. S-boxes are compressed to 2KB[+256B].
20#
21# As for hardware acceleration support. It's basically a "teaser," as
22# it can and should be improved in several ways. Most notably support
23# for CBC is not utilized, nor multiple blocks are ever processed.
24# Then software key schedule can be postponed till hardware support
25# detection... Performance improvement over assembler is reportedly
26# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27# support is implemented.
28
29# May 2007.
30#
31# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32# for 128-bit keys, if hardware support is detected.
33
34# Januray 2009.
35#
36# Add support for hardware AES192/256 and reschedule instructions to
37# minimize/avoid Address Generation Interlock hazard and to favour
38# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39# almost 50% on z9. The gain is smaller on z10, because being dual-
40# issue z10 makes it improssible to eliminate the interlock condition:
41# critial path is not long enough. Yet it spends ~24 cycles per byte
42# processed with 128-bit key.
43#
44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits.
48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized.
52
53$softonly=0; # allow hardware support
54
55$t0="%r0"; $mask="%r0";
56$t1="%r1";
57$t2="%r2"; $inp="%r2";
58$t3="%r3"; $out="%r3"; $bits="%r3";
59$key="%r4";
60$i1="%r5";
61$i2="%r6";
62$i3="%r7";
63$s0="%r8";
64$s1="%r9";
65$s2="%r10";
66$s3="%r11";
67$tbl="%r12";
68$rounds="%r13";
69$ra="%r14";
70$sp="%r15";
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code=<<___;
78.text
79
80.type AES_Te,\@object
81.align 256
82AES_Te:
83___
84&_data_word(
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
149$code.=<<___;
150# Te4[256]
151.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183# rcon[]
184.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
187.align 256
188.size AES_Te,.-AES_Te
189
190# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191# const AES_KEY *key) {
192.globl AES_encrypt
193.type AES_encrypt,\@function
194AES_encrypt:
195___
196$code.=<<___ if (!$softonly);
197 l %r0,240($key)
198 lhi %r1,16
199 clr %r0,%r1
200 jl .Lesoft
201
202 la %r1,0($key)
203 #la %r2,0($inp)
204 la %r4,0($out)
205 lghi %r3,16 # single block length
206 .long 0xb92e0042 # km %r4,%r2
207 brc 1,.-4 # can this happen?
208 br %r14
209.align 64
210.Lesoft:
211___
212$code.=<<___;
213 stmg %r3,$ra,24($sp)
214
215 llgf $s0,0($inp)
216 llgf $s1,4($inp)
217 llgf $s2,8($inp)
218 llgf $s3,12($inp)
219
220 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt
222
223 lg $out,24($sp)
224 st $s0,0($out)
225 st $s1,4($out)
226 st $s2,8($out)
227 st $s3,12($out)
228
229 lmg %r6,$ra,48($sp)
230 br $ra
231.size AES_encrypt,.-AES_encrypt
232
233.type _s390x_AES_encrypt,\@function
234.align 16
235_s390x_AES_encrypt:
236 stg $ra,152($sp)
237 x $s0,0($key)
238 x $s1,4($key)
239 x $s2,8($key)
240 x $s3,12($key)
241 l $rounds,240($key)
242 llill $mask,`0xff<<3`
243 aghi $rounds,-1
244 j .Lenc_loop
245.align 16
246.Lenc_loop:
247 sllg $t1,$s0,`0+3`
248 srlg $t2,$s0,`8-3`
249 srlg $t3,$s0,`16-3`
250 srl $s0,`24-3`
251 nr $s0,$mask
252 ngr $t1,$mask
253 nr $t2,$mask
254 nr $t3,$mask
255
256 srlg $i1,$s1,`16-3` # i0
257 sllg $i2,$s1,`0+3`
258 srlg $i3,$s1,`8-3`
259 srl $s1,`24-3`
260 nr $i1,$mask
261 nr $s1,$mask
262 ngr $i2,$mask
263 nr $i3,$mask
264
265 l $s0,0($s0,$tbl) # Te0[s0>>24]
266 l $t1,1($t1,$tbl) # Te3[s0>>0]
267 l $t2,2($t2,$tbl) # Te2[s0>>8]
268 l $t3,3($t3,$tbl) # Te1[s0>>16]
269
270 x $s0,3($i1,$tbl) # Te1[s1>>16]
271 l $s1,0($s1,$tbl) # Te0[s1>>24]
272 x $t2,1($i2,$tbl) # Te3[s1>>0]
273 x $t3,2($i3,$tbl) # Te2[s1>>8]
274
275 srlg $i1,$s2,`8-3` # i0
276 srlg $i2,$s2,`16-3` # i1
277 nr $i1,$mask
278 nr $i2,$mask
279 sllg $i3,$s2,`0+3`
280 srl $s2,`24-3`
281 nr $s2,$mask
282 ngr $i3,$mask
283
284 xr $s1,$t1
285 srlg $ra,$s3,`8-3` # i1
286 sllg $t1,$s3,`0+3` # i0
287 nr $ra,$mask
288 la $key,16($key)
289 ngr $t1,$mask
290
291 x $s0,2($i1,$tbl) # Te2[s2>>8]
292 x $s1,3($i2,$tbl) # Te1[s2>>16]
293 l $s2,0($s2,$tbl) # Te0[s2>>24]
294 x $t3,1($i3,$tbl) # Te3[s2>>0]
295
296 srlg $i3,$s3,`16-3` # i2
297 xr $s2,$t2
298 srl $s3,`24-3`
299 nr $i3,$mask
300 nr $s3,$mask
301
302 x $s0,0($key)
303 x $s1,4($key)
304 x $s2,8($key)
305 x $t3,12($key)
306
307 x $s0,1($t1,$tbl) # Te3[s3>>0]
308 x $s1,2($ra,$tbl) # Te2[s3>>8]
309 x $s2,3($i3,$tbl) # Te1[s3>>16]
310 l $s3,0($s3,$tbl) # Te0[s3>>24]
311 xr $s3,$t3
312
313 brct $rounds,.Lenc_loop
314 .align 16
315
316 sllg $t1,$s0,`0+3`
317 srlg $t2,$s0,`8-3`
318 ngr $t1,$mask
319 srlg $t3,$s0,`16-3`
320 srl $s0,`24-3`
321 nr $s0,$mask
322 nr $t2,$mask
323 nr $t3,$mask
324
325 srlg $i1,$s1,`16-3` # i0
326 sllg $i2,$s1,`0+3`
327 ngr $i2,$mask
328 srlg $i3,$s1,`8-3`
329 srl $s1,`24-3`
330 nr $i1,$mask
331 nr $s1,$mask
332 nr $i3,$mask
333
334 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
336 sll $s0,24
337 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
339 sll $t2,8
340 sll $t3,16
341
342 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
346 sll $i1,16
347 sll $s1,24
348 sll $i3,8
349 or $s0,$i1
350 or $s1,$t1
351 or $t2,$i2
352 or $t3,$i3
353
354 srlg $i1,$s2,`8-3` # i0
355 srlg $i2,$s2,`16-3` # i1
356 nr $i1,$mask
357 nr $i2,$mask
358 sllg $i3,$s2,`0+3`
359 srl $s2,`24-3`
360 ngr $i3,$mask
361 nr $s2,$mask
362
363 sllg $t1,$s3,`0+3` # i0
364 srlg $ra,$s3,`8-3` # i1
365 ngr $t1,$mask
366
367 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
369 sll $i1,8
370 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
372 sll $i2,16
373 nr $ra,$mask
374 sll $s2,24
375 or $s0,$i1
376 or $s1,$i2
377 or $s2,$t2
378 or $t3,$i3
379
380 srlg $i3,$s3,`16-3` # i2
381 srl $s3,`24-3`
382 nr $i3,$mask
383 nr $s3,$mask
384
385 l $t0,16($key)
386 l $t2,20($key)
387
388 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
392 sll $i2,8
393 sll $i3,16
394 sll $s3,24
395 or $s0,$i1
396 or $s1,$i2
397 or $s2,$i3
398 or $s3,$t3
399
400 lg $ra,152($sp)
401 xr $s0,$t0
402 xr $s1,$t2
403 x $s2,24($key)
404 x $s3,28($key)
405
406 br $ra
407.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
408___
409
410$code.=<<___;
411.type AES_Td,\@object
412.align 256
413AES_Td:
414___
415&_data_word(
416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
480$code.=<<___;
481# Td4[256]
482.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514.size AES_Td,.-AES_Td
515
516# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517# const AES_KEY *key) {
518.globl AES_decrypt
519.type AES_decrypt,\@function
520AES_decrypt:
521___
522$code.=<<___ if (!$softonly);
523 l %r0,240($key)
524 lhi %r1,16
525 clr %r0,%r1
526 jl .Ldsoft
527
528 la %r1,0($key)
529 #la %r2,0($inp)
530 la %r4,0($out)
531 lghi %r3,16 # single block length
532 .long 0xb92e0042 # km %r4,%r2
533 brc 1,.-4 # can this happen?
534 br %r14
535.align 64
536.Ldsoft:
537___
538$code.=<<___;
539 stmg %r3,$ra,24($sp)
540
541 llgf $s0,0($inp)
542 llgf $s1,4($inp)
543 llgf $s2,8($inp)
544 llgf $s3,12($inp)
545
546 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt
548
549 lg $out,24($sp)
550 st $s0,0($out)
551 st $s1,4($out)
552 st $s2,8($out)
553 st $s3,12($out)
554
555 lmg %r6,$ra,48($sp)
556 br $ra
557.size AES_decrypt,.-AES_decrypt
558
559.type _s390x_AES_decrypt,\@function
560.align 16
561_s390x_AES_decrypt:
562 stg $ra,152($sp)
563 x $s0,0($key)
564 x $s1,4($key)
565 x $s2,8($key)
566 x $s3,12($key)
567 l $rounds,240($key)
568 llill $mask,`0xff<<3`
569 aghi $rounds,-1
570 j .Ldec_loop
571.align 16
572.Ldec_loop:
573 srlg $t1,$s0,`16-3`
574 srlg $t2,$s0,`8-3`
575 sllg $t3,$s0,`0+3`
576 srl $s0,`24-3`
577 nr $s0,$mask
578 nr $t1,$mask
579 nr $t2,$mask
580 ngr $t3,$mask
581
582 sllg $i1,$s1,`0+3` # i0
583 srlg $i2,$s1,`16-3`
584 srlg $i3,$s1,`8-3`
585 srl $s1,`24-3`
586 ngr $i1,$mask
587 nr $s1,$mask
588 nr $i2,$mask
589 nr $i3,$mask
590
591 l $s0,0($s0,$tbl) # Td0[s0>>24]
592 l $t1,3($t1,$tbl) # Td1[s0>>16]
593 l $t2,2($t2,$tbl) # Td2[s0>>8]
594 l $t3,1($t3,$tbl) # Td3[s0>>0]
595
596 x $s0,1($i1,$tbl) # Td3[s1>>0]
597 l $s1,0($s1,$tbl) # Td0[s1>>24]
598 x $t2,3($i2,$tbl) # Td1[s1>>16]
599 x $t3,2($i3,$tbl) # Td2[s1>>8]
600
601 srlg $i1,$s2,`8-3` # i0
602 sllg $i2,$s2,`0+3` # i1
603 srlg $i3,$s2,`16-3`
604 srl $s2,`24-3`
605 nr $i1,$mask
606 ngr $i2,$mask
607 nr $s2,$mask
608 nr $i3,$mask
609
610 xr $s1,$t1
611 srlg $ra,$s3,`8-3` # i1
612 srlg $t1,$s3,`16-3` # i0
613 nr $ra,$mask
614 la $key,16($key)
615 nr $t1,$mask
616
617 x $s0,2($i1,$tbl) # Td2[s2>>8]
618 x $s1,1($i2,$tbl) # Td3[s2>>0]
619 l $s2,0($s2,$tbl) # Td0[s2>>24]
620 x $t3,3($i3,$tbl) # Td1[s2>>16]
621
622 sllg $i3,$s3,`0+3` # i2
623 srl $s3,`24-3`
624 ngr $i3,$mask
625 nr $s3,$mask
626
627 xr $s2,$t2
628 x $s0,0($key)
629 x $s1,4($key)
630 x $s2,8($key)
631 x $t3,12($key)
632
633 x $s0,3($t1,$tbl) # Td1[s3>>16]
634 x $s1,2($ra,$tbl) # Td2[s3>>8]
635 x $s2,1($i3,$tbl) # Td3[s3>>0]
636 l $s3,0($s3,$tbl) # Td0[s3>>24]
637 xr $s3,$t3
638
639 brct $rounds,.Ldec_loop
640 .align 16
641
642 l $t1,`2048+0`($tbl) # prefetch Td4
643 l $t2,`2048+64`($tbl)
644 l $t3,`2048+128`($tbl)
645 l $i1,`2048+192`($tbl)
646 llill $mask,0xff
647
648 srlg $i3,$s0,24 # i0
649 srlg $t1,$s0,16
650 srlg $t2,$s0,8
651 nr $s0,$mask # i3
652 nr $t1,$mask
653
654 srlg $i1,$s1,24
655 nr $t2,$mask
656 srlg $i2,$s1,16
657 srlg $ra,$s1,8
658 nr $s1,$mask # i0
659 nr $i2,$mask
660 nr $ra,$mask
661
662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
665 sll $t1,16
666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
667 sllg $s0,$i3,24
668 sll $t2,8
669
670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
673 sll $i1,24
674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
675 sll $i2,16
676 sll $i3,8
677 or $s0,$s1
678 or $t1,$i1
679 or $t2,$i2
680 or $t3,$i3
681
682 srlg $i1,$s2,8 # i0
683 srlg $i2,$s2,24
684 srlg $i3,$s2,16
685 nr $s2,$mask # i1
686 nr $i1,$mask
687 nr $i3,$mask
688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
692 sll $i1,8
693 sll $i2,24
694 or $s0,$i1
695 sll $i3,16
696 or $t2,$i2
697 or $t3,$i3
698
699 srlg $i1,$s3,16 # i0
700 srlg $i2,$s3,8 # i1
701 srlg $i3,$s3,24
702 nr $s3,$mask # i2
703 nr $i1,$mask
704 nr $i2,$mask
705
706 lg $ra,152($sp)
707 or $s1,$t1
708 l $t0,16($key)
709 l $t1,20($key)
710
711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
713 sll $i1,16
714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
716 sll $i2,8
717 sll $s3,24
718 or $s0,$i1
719 or $s1,$i2
720 or $s2,$t2
721 or $s3,$t3
722
723 xr $s0,$t0
724 xr $s1,$t1
725 x $s2,24($key)
726 x $s3,28($key)
727
728 br $ra
729.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
730___
731
732$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) {
735.globl AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function
737.align 16
738AES_set_encrypt_key:
739 lghi $t0,0
740 clgr $inp,$t0
741 je .Lminus1
742 clgr $key,$t0
743 je .Lminus1
744
745 lghi $t0,128
746 clr $bits,$t0
747 je .Lproceed
748 lghi $t0,192
749 clr $bits,$t0
750 je .Lproceed
751 lghi $t0,256
752 clr $bits,$t0
753 je .Lproceed
754 lghi %r2,-2
755 br %r14
756
757.align 16
758.Lproceed:
759___
760$code.=<<___ if (!$softonly);
761 # convert bits to km code, [128,192,256]->[18,19,20]
762 lhi %r5,-128
763 lhi %r0,18
764 ar %r5,$bits
765 srl %r5,6
766 ar %r5,%r0
767
768 lghi %r0,0 # query capability vector
769 la %r1,16($sp)
770 .long 0xb92f0042 # kmc %r4,%r2
771
772 llihh %r1,0x8000
773 srlg %r1,%r1,0(%r5)
774 ng %r1,16($sp)
775 jz .Lekey_internal
776
777 lmg %r0,%r1,0($inp) # just copy 128 bits...
778 stmg %r0,%r1,0($key)
779 lhi %r0,192
780 cr $bits,%r0
781 jl 1f
782 lg %r1,16($inp)
783 stg %r1,16($key)
784 je 1f
785 lg %r1,24($inp)
786 stg %r1,24($key)
7871: st $bits,236($key) # save bits
788 st %r5,240($key) # save km code
789 lghi %r2,0
790 br %r14
791___
792$code.=<<___;
793.align 16
794.Lekey_internal:
795 stmg %r6,%r13,48($sp) # all non-volatile regs
796
797 larl $tbl,AES_Te+2048
798
799 llgf $s0,0($inp)
800 llgf $s1,4($inp)
801 llgf $s2,8($inp)
802 llgf $s3,12($inp)
803 st $s0,0($key)
804 st $s1,4($key)
805 st $s2,8($key)
806 st $s3,12($key)
807 lghi $t0,128
808 cr $bits,$t0
809 jne .Lnot128
810
811 llill $mask,0xff
812 lghi $t3,0 # i=0
813 lghi $rounds,10
814 st $rounds,240($key)
815
816 llgfr $t2,$s3 # temp=rk[3]
817 srlg $i1,$s3,8
818 srlg $i2,$s3,16
819 srlg $i3,$s3,24
820 nr $t2,$mask
821 nr $i1,$mask
822 nr $i2,$mask
823
824.align 16
825.L128_loop:
826 la $t2,0($t2,$tbl)
827 la $i1,0($i1,$tbl)
828 la $i2,0($i2,$tbl)
829 la $i3,0($i3,$tbl)
830 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833 icm $t2,1,0($i3) # Te4[rk[3]>>24]
834 x $t2,256($t3,$tbl) # rcon[i]
835 xr $s0,$t2 # rk[4]=rk[0]^...
836 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
839
840 llgfr $t2,$s3 # temp=rk[3]
841 srlg $i1,$s3,8
842 srlg $i2,$s3,16
843 nr $t2,$mask
844 nr $i1,$mask
845 srlg $i3,$s3,24
846 nr $i2,$mask
847
848 st $s0,16($key)
849 st $s1,20($key)
850 st $s2,24($key)
851 st $s3,28($key)
852 la $key,16($key) # key+=4
853 la $t3,4($t3) # i++
854 brct $rounds,.L128_loop
855 lghi %r2,0
856 lmg %r6,%r13,48($sp)
857 br $ra
858
859.align 16
860.Lnot128:
861 llgf $t0,16($inp)
862 llgf $t1,20($inp)
863 st $t0,16($key)
864 st $t1,20($key)
865 lghi $t0,192
866 cr $bits,$t0
867 jne .Lnot192
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,12
872 st $rounds,240($key)
873 lghi $rounds,8
874
875 srlg $i1,$t1,8
876 srlg $i2,$t1,16
877 srlg $i3,$t1,24
878 nr $t1,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L192_loop:
884 la $t1,0($t1,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891 icm $t1,1,0($i3) # Te4[rk[5]>>24]
892 x $t1,256($t3,$tbl) # rcon[i]
893 xr $s0,$t1 # rk[6]=rk[0]^...
894 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
897
898 st $s0,24($key)
899 st $s1,28($key)
900 st $s2,32($key)
901 st $s3,36($key)
902 brct $rounds,.L192_continue
903 lghi %r2,0
904 lmg %r6,%r13,48($sp)
905 br $ra
906
907.align 16
908.L192_continue:
909 lgr $t1,$s3
910 x $t1,16($key) # rk[10]=rk[4]^rk[9]
911 st $t1,40($key)
912 x $t1,20($key) # rk[11]=rk[5]^rk[10]
913 st $t1,44($key)
914
915 srlg $i1,$t1,8
916 srlg $i2,$t1,16
917 srlg $i3,$t1,24
918 nr $t1,$mask
919 nr $i1,$mask
920 nr $i2,$mask
921
922 la $key,24($key) # key+=6
923 la $t3,4($t3) # i++
924 j .L192_loop
925
926.align 16
927.Lnot192:
928 llgf $t0,24($inp)
929 llgf $t1,28($inp)
930 st $t0,24($key)
931 st $t1,28($key)
932 llill $mask,0xff
933 lghi $t3,0 # i=0
934 lghi $rounds,14
935 st $rounds,240($key)
936 lghi $rounds,7
937
938 srlg $i1,$t1,8
939 srlg $i2,$t1,16
940 srlg $i3,$t1,24
941 nr $t1,$mask
942 nr $i1,$mask
943 nr $i2,$mask
944
945.align 16
946.L256_loop:
947 la $t1,0($t1,$tbl)
948 la $i1,0($i1,$tbl)
949 la $i2,0($i2,$tbl)
950 la $i3,0($i3,$tbl)
951 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954 icm $t1,1,0($i3) # Te4[rk[7]>>24]
955 x $t1,256($t3,$tbl) # rcon[i]
956 xr $s0,$t1 # rk[8]=rk[0]^...
957 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
960 st $s0,32($key)
961 st $s1,36($key)
962 st $s2,40($key)
963 st $s3,44($key)
964 brct $rounds,.L256_continue
965 lghi %r2,0
966 lmg %r6,%r13,48($sp)
967 br $ra
968
969.align 16
970.L256_continue:
971 lgr $t1,$s3 # temp=rk[11]
972 srlg $i1,$s3,8
973 srlg $i2,$s3,16
974 srlg $i3,$s3,24
975 nr $t1,$mask
976 nr $i1,$mask
977 nr $i2,$mask
978 la $t1,0($t1,$tbl)
979 la $i1,0($i1,$tbl)
980 la $i2,0($i2,$tbl)
981 la $i3,0($i3,$tbl)
982 llgc $t1,0($t1) # Te4[rk[11]>>0]
983 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986 x $t1,16($key) # rk[12]=rk[4]^...
987 st $t1,48($key)
988 x $t1,20($key) # rk[13]=rk[5]^rk[12]
989 st $t1,52($key)
990 x $t1,24($key) # rk[14]=rk[6]^rk[13]
991 st $t1,56($key)
992 x $t1,28($key) # rk[15]=rk[7]^rk[14]
993 st $t1,60($key)
994
995 srlg $i1,$t1,8
996 srlg $i2,$t1,16
997 srlg $i3,$t1,24
998 nr $t1,$mask
999 nr $i1,$mask
1000 nr $i2,$mask
1001
1002 la $key,32($key) # key+=8
1003 la $t3,4($t3) # i++
1004 j .L256_loop
1005
1006.Lminus1:
1007 lghi %r2,-1
1008 br $ra
1009.size AES_set_encrypt_key,.-AES_set_encrypt_key
1010
1011# void AES_set_decrypt_key(const unsigned char *in, int bits,
1012# AES_KEY *key) {
1013.globl AES_set_decrypt_key
1014.type AES_set_decrypt_key,\@function
1015.align 16
1016AES_set_decrypt_key:
1017 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018 stg $ra,112($sp) # save non-volatile registers!
1019 bras $ra,AES_set_encrypt_key
1020 lg $key,32($sp)
1021 lg $ra,112($sp)
1022 ltgr %r2,%r2
1023 bnzr $ra
1024___
1025$code.=<<___ if (!$softonly);
1026 l $t0,240($key)
1027 lhi $t1,16
1028 cr $t0,$t1
1029 jl .Lgo
1030 oill $t0,0x80 # set "decrypt" bit
1031 st $t0,240($key)
1032 br $ra
1033
1034.align 16
1035.Ldkey_internal:
1036 stg $key,32($sp)
1037 stg $ra,40($sp)
1038 bras $ra,.Lekey_internal
1039 lg $key,32($sp)
1040 lg $ra,40($sp)
1041___
1042$code.=<<___;
1043
1044.Lgo: llgf $rounds,240($key)
1045 la $i1,0($key)
1046 sllg $i2,$rounds,4
1047 la $i2,0($i2,$key)
1048 srl $rounds,1
1049 lghi $t1,-16
1050
1051.align 16
1052.Linv: lmg $s0,$s1,0($i1)
1053 lmg $s2,$s3,0($i2)
1054 stmg $s0,$s1,0($i2)
1055 stmg $s2,$s3,0($i1)
1056 la $i1,16($i1)
1057 la $i2,0($t1,$i2)
1058 brct $rounds,.Linv
1059___
1060$mask80=$i1;
1061$mask1b=$i2;
1062$maskfe=$i3;
1063$code.=<<___;
1064 llgf $rounds,240($key)
1065 aghi $rounds,-1
1066 sll $rounds,2 # (rounds-1)*4
1067 llilh $mask80,0x8080
1068 llilh $mask1b,0x1b1b
1069 llilh $maskfe,0xfefe
1070 oill $mask80,0x8080
1071 oill $mask1b,0x1b1b
1072 oill $maskfe,0xfefe
1073
1074.align 16
1075.Lmix: l $s0,16($key) # tp1
1076 lr $s1,$s0
1077 ngr $s1,$mask80
1078 srlg $t1,$s1,7
1079 slr $s1,$t1
1080 nr $s1,$mask1b
1081 sllg $t1,$s0,1
1082 nr $t1,$maskfe
1083 xr $s1,$t1 # tp2
1084
1085 lr $s2,$s1
1086 ngr $s2,$mask80
1087 srlg $t1,$s2,7
1088 slr $s2,$t1
1089 nr $s2,$mask1b
1090 sllg $t1,$s1,1
1091 nr $t1,$maskfe
1092 xr $s2,$t1 # tp4
1093
1094 lr $s3,$s2
1095 ngr $s3,$mask80
1096 srlg $t1,$s3,7
1097 slr $s3,$t1
1098 nr $s3,$mask1b
1099 sllg $t1,$s2,1
1100 nr $t1,$maskfe
1101 xr $s3,$t1 # tp8
1102
1103 xr $s1,$s0 # tp2^tp1
1104 xr $s2,$s0 # tp4^tp1
1105 rll $s0,$s0,24 # = ROTATE(tp1,8)
1106 xr $s2,$s3 # ^=tp8
1107 xr $s0,$s1 # ^=tp2^tp1
1108 xr $s1,$s3 # tp2^tp1^tp8
1109 xr $s0,$s2 # ^=tp4^tp1^tp8
1110 rll $s1,$s1,8
1111 rll $s2,$s2,16
1112 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1113 rll $s3,$s3,24
1114 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115 xr $s0,$s3 # ^= ROTATE(tp8,8)
1116
1117 st $s0,16($key)
1118 la $key,4($key)
1119 brct $rounds,.Lmix
1120
1121 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1122 lghi %r2,0
1123 br $ra
1124.size AES_set_decrypt_key,.-AES_set_decrypt_key
1125___
1126
1127#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128# size_t length, const AES_KEY *key,
1129# unsigned char *ivec, const int enc)
1130{
1131my $inp="%r2";
1132my $out="%r4"; # length and out are swapped
1133my $len="%r3";
1134my $key="%r5";
1135my $ivp="%r6";
1136
1137$code.=<<___;
1138.globl AES_cbc_encrypt
1139.type AES_cbc_encrypt,\@function
1140.align 16
1141AES_cbc_encrypt:
1142 xgr %r3,%r4 # flip %r3 and %r4, out and len
1143 xgr %r4,%r3
1144 xgr %r3,%r4
1145___
1146$code.=<<___ if (!$softonly);
1147 lhi %r0,16
1148 cl %r0,240($key)
1149 jh .Lcbc_software
1150
1151 lg %r0,0($ivp) # copy ivec
1152 lg %r1,8($ivp)
1153 stmg %r0,%r1,16($sp)
1154 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155 stmg %r0,%r1,32($sp)
1156 lmg %r0,%r1,16($key)
1157 stmg %r0,%r1,48($sp)
1158 l %r0,240($key) # load kmc code
1159 lghi $key,15 # res=len%16, len-=res;
1160 ngr $key,$len
1161 slgr $len,$key
1162 la %r1,16($sp) # parameter block - ivec || key
1163 jz .Lkmc_truncated
1164 .long 0xb92f0042 # kmc %r4,%r2
1165 brc 1,.-4 # pay attention to "partial completion"
1166 ltr $key,$key
1167 jnz .Lkmc_truncated
1168.Lkmc_done:
1169 lmg %r0,%r1,16($sp) # copy ivec to caller
1170 stg %r0,0($ivp)
1171 stg %r1,8($ivp)
1172 br $ra
1173.align 16
1174.Lkmc_truncated:
1175 ahi $key,-1 # it's the way it's encoded in mvc
1176 tmll %r0,0x80
1177 jnz .Lkmc_truncated_dec
1178 lghi %r1,0
1179 stg %r1,128($sp)
1180 stg %r1,136($sp)
1181 bras %r1,1f
1182 mvc 128(1,$sp),0($inp)
11831: ex $key,0(%r1)
1184 la %r1,16($sp) # restore parameter block
1185 la $inp,128($sp)
1186 lghi $len,16
1187 .long 0xb92f0042 # kmc %r4,%r2
1188 j .Lkmc_done
1189.align 16
1190.Lkmc_truncated_dec:
1191 stg $out,64($sp)
1192 la $out,128($sp)
1193 lghi $len,16
1194 .long 0xb92f0042 # kmc %r4,%r2
1195 lg $out,64($sp)
1196 bras %r1,2f
1197 mvc 0(1,$out),128($sp)
11982: ex $key,0(%r1)
1199 j .Lkmc_done
1200.align 16
1201.Lcbc_software:
1202___
1203$code.=<<___;
1204 stmg $key,$ra,40($sp)
1205 lhi %r0,0
1206 cl %r0,164($sp)
1207 je .Lcbc_decrypt
1208
1209 larl $tbl,AES_Te
1210
1211 llgf $s0,0($ivp)
1212 llgf $s1,4($ivp)
1213 llgf $s2,8($ivp)
1214 llgf $s3,12($ivp)
1215
1216 lghi $t0,16
1217 slgr $len,$t0
1218 brc 4,.Lcbc_enc_tail # if borrow
1219.Lcbc_enc_loop:
1220 stmg $inp,$out,16($sp)
1221 x $s0,0($inp)
1222 x $s1,4($inp)
1223 x $s2,8($inp)
1224 x $s3,12($inp)
1225 lgr %r4,$key
1226
1227 bras $ra,_s390x_AES_encrypt
1228
1229 lmg $inp,$key,16($sp)
1230 st $s0,0($out)
1231 st $s1,4($out)
1232 st $s2,8($out)
1233 st $s3,12($out)
1234
1235 la $inp,16($inp)
1236 la $out,16($out)
1237 lghi $t0,16
1238 ltgr $len,$len
1239 jz .Lcbc_enc_done
1240 slgr $len,$t0
1241 brc 4,.Lcbc_enc_tail # if borrow
1242 j .Lcbc_enc_loop
1243.align 16
1244.Lcbc_enc_done:
1245 lg $ivp,48($sp)
1246 st $s0,0($ivp)
1247 st $s1,4($ivp)
1248 st $s2,8($ivp)
1249 st $s3,12($ivp)
1250
1251 lmg %r7,$ra,56($sp)
1252 br $ra
1253
1254.align 16
1255.Lcbc_enc_tail:
1256 aghi $len,15
1257 lghi $t0,0
1258 stg $t0,128($sp)
1259 stg $t0,136($sp)
1260 bras $t1,3f
1261 mvc 128(1,$sp),0($inp)
12623: ex $len,0($t1)
1263 lghi $len,0
1264 la $inp,128($sp)
1265 j .Lcbc_enc_loop
1266
1267.align 16
1268.Lcbc_decrypt:
1269 larl $tbl,AES_Td
1270
1271 lg $t0,0($ivp)
1272 lg $t1,8($ivp)
1273 stmg $t0,$t1,128($sp)
1274
1275.Lcbc_dec_loop:
1276 stmg $inp,$out,16($sp)
1277 llgf $s0,0($inp)
1278 llgf $s1,4($inp)
1279 llgf $s2,8($inp)
1280 llgf $s3,12($inp)
1281 lgr %r4,$key
1282
1283 bras $ra,_s390x_AES_decrypt
1284
1285 lmg $inp,$key,16($sp)
1286 sllg $s0,$s0,32
1287 sllg $s2,$s2,32
1288 lr $s0,$s1
1289 lr $s2,$s3
1290
1291 lg $t0,0($inp)
1292 lg $t1,8($inp)
1293 xg $s0,128($sp)
1294 xg $s2,136($sp)
1295 lghi $s1,16
1296 slgr $len,$s1
1297 brc 4,.Lcbc_dec_tail # if borrow
1298 brc 2,.Lcbc_dec_done # if zero
1299 stg $s0,0($out)
1300 stg $s2,8($out)
1301 stmg $t0,$t1,128($sp)
1302
1303 la $inp,16($inp)
1304 la $out,16($out)
1305 j .Lcbc_dec_loop
1306
1307.Lcbc_dec_done:
1308 stg $s0,0($out)
1309 stg $s2,8($out)
1310.Lcbc_dec_exit:
1311 lmg $ivp,$ra,48($sp)
1312 stmg $t0,$t1,0($ivp)
1313
1314 br $ra
1315
1316.align 16
1317.Lcbc_dec_tail:
1318 aghi $len,15
1319 stg $s0,128($sp)
1320 stg $s2,136($sp)
1321 bras $s1,4f
1322 mvc 0(1,$out),128($sp)
13234: ex $len,0($s1)
1324 j .Lcbc_dec_exit
1325.size AES_cbc_encrypt,.-AES_cbc_encrypt
1326___
1327}
1328$code.=<<___;
1329.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1330___
1331
1332$code =~ s/\`([^\`]*)\`/eval $1/gem;
1333print $code;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl b/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl
new file mode 100755
index 0000000000..c57b3a2d6d
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl
@@ -0,0 +1,1181 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.1
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
33$bits=32;
34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
35if ($bits==64) { $bias=2047; $frame=192; }
36else { $bias=0; $frame=112; }
37$locals=16;
38
39$acc0="%l0";
40$acc1="%o0";
41$acc2="%o1";
42$acc3="%o2";
43
44$acc4="%l1";
45$acc5="%o3";
46$acc6="%o4";
47$acc7="%o5";
48
49$acc8="%l2";
50$acc9="%o7";
51$acc10="%g1";
52$acc11="%g2";
53
54$acc12="%l3";
55$acc13="%g3";
56$acc14="%g4";
57$acc15="%g5";
58
59$t0="%l4";
60$t1="%l5";
61$t2="%l6";
62$t3="%l7";
63
64$s0="%i0";
65$s1="%i1";
66$s2="%i2";
67$s3="%i3";
68$tbl="%i4";
69$key="%i5";
70$rounds="%i7"; # aliases with return address, which is off-loaded to stack
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code.=<<___ if ($bits==64);
78.register %g2,#scratch
79.register %g3,#scratch
80___
81$code.=<<___;
82.section ".text",#alloc,#execinstr
83
84.align 256
85AES_Te:
86___
87&_data_word(
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
152$code.=<<___;
153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
185.type AES_Te,#object
186.size AES_Te,(.-AES_Te)
187
188.align 64
189.skip 16
190_sparcv9_AES_encrypt:
191 save %sp,-$frame-$locals,%sp
192 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
193 ld [$key+240],$rounds
194 ld [$key+0],$t0
195 ld [$key+4],$t1 !
196 ld [$key+8],$t2
197 srl $rounds,1,$rounds
198 xor $t0,$s0,$s0
199 ld [$key+12],$t3
200 srl $s0,21,$acc0
201 xor $t1,$s1,$s1
202 ld [$key+16],$t0
203 srl $s1,13,$acc1 !
204 xor $t2,$s2,$s2
205 ld [$key+20],$t1
206 xor $t3,$s3,$s3
207 ld [$key+24],$t2
208 and $acc0,2040,$acc0
209 ld [$key+28],$t3
210 nop
211.Lenc_loop:
212 srl $s2,5,$acc2 !
213 and $acc1,2040,$acc1
214 ldx [$tbl+$acc0],$acc0
215 sll $s3,3,$acc3
216 and $acc2,2040,$acc2
217 ldx [$tbl+$acc1],$acc1
218 srl $s1,21,$acc4
219 and $acc3,2040,$acc3
220 ldx [$tbl+$acc2],$acc2 !
221 srl $s2,13,$acc5
222 and $acc4,2040,$acc4
223 ldx [$tbl+$acc3],$acc3
224 srl $s3,5,$acc6
225 and $acc5,2040,$acc5
226 ldx [$tbl+$acc4],$acc4
227 fmovs %f0,%f0
228 sll $s0,3,$acc7 !
229 and $acc6,2040,$acc6
230 ldx [$tbl+$acc5],$acc5
231 srl $s2,21,$acc8
232 and $acc7,2040,$acc7
233 ldx [$tbl+$acc6],$acc6
234 srl $s3,13,$acc9
235 and $acc8,2040,$acc8
236 ldx [$tbl+$acc7],$acc7 !
237 srl $s0,5,$acc10
238 and $acc9,2040,$acc9
239 ldx [$tbl+$acc8],$acc8
240 sll $s1,3,$acc11
241 and $acc10,2040,$acc10
242 ldx [$tbl+$acc9],$acc9
243 fmovs %f0,%f0
244 srl $s3,21,$acc12 !
245 and $acc11,2040,$acc11
246 ldx [$tbl+$acc10],$acc10
247 srl $s0,13,$acc13
248 and $acc12,2040,$acc12
249 ldx [$tbl+$acc11],$acc11
250 srl $s1,5,$acc14
251 and $acc13,2040,$acc13
252 ldx [$tbl+$acc12],$acc12 !
253 sll $s2,3,$acc15
254 and $acc14,2040,$acc14
255 ldx [$tbl+$acc13],$acc13
256 and $acc15,2040,$acc15
257 add $key,32,$key
258 ldx [$tbl+$acc14],$acc14
259 fmovs %f0,%f0
260 subcc $rounds,1,$rounds !
261 ldx [$tbl+$acc15],$acc15
262 bz,a,pn %icc,.Lenc_last
263 add $tbl,2048,$rounds
264
265 srlx $acc1,8,$acc1
266 xor $acc0,$t0,$t0
267 ld [$key+0],$s0
268 fmovs %f0,%f0
269 srlx $acc2,16,$acc2 !
270 xor $acc1,$t0,$t0
271 ld [$key+4],$s1
272 srlx $acc3,24,$acc3
273 xor $acc2,$t0,$t0
274 ld [$key+8],$s2
275 srlx $acc5,8,$acc5
276 xor $acc3,$t0,$t0
277 ld [$key+12],$s3 !
278 srlx $acc6,16,$acc6
279 xor $acc4,$t1,$t1
280 fmovs %f0,%f0
281 srlx $acc7,24,$acc7
282 xor $acc5,$t1,$t1
283 srlx $acc9,8,$acc9
284 xor $acc6,$t1,$t1
285 srlx $acc10,16,$acc10 !
286 xor $acc7,$t1,$t1
287 srlx $acc11,24,$acc11
288 xor $acc8,$t2,$t2
289 srlx $acc13,8,$acc13
290 xor $acc9,$t2,$t2
291 srlx $acc14,16,$acc14
292 xor $acc10,$t2,$t2
293 srlx $acc15,24,$acc15 !
294 xor $acc11,$t2,$t2
295 xor $acc12,$acc14,$acc14
296 xor $acc13,$t3,$t3
297 srl $t0,21,$acc0
298 xor $acc14,$t3,$t3
299 srl $t1,13,$acc1
300 xor $acc15,$t3,$t3
301
302 and $acc0,2040,$acc0 !
303 srl $t2,5,$acc2
304 and $acc1,2040,$acc1
305 ldx [$tbl+$acc0],$acc0
306 sll $t3,3,$acc3
307 and $acc2,2040,$acc2
308 ldx [$tbl+$acc1],$acc1
309 fmovs %f0,%f0
310 srl $t1,21,$acc4 !
311 and $acc3,2040,$acc3
312 ldx [$tbl+$acc2],$acc2
313 srl $t2,13,$acc5
314 and $acc4,2040,$acc4
315 ldx [$tbl+$acc3],$acc3
316 srl $t3,5,$acc6
317 and $acc5,2040,$acc5
318 ldx [$tbl+$acc4],$acc4 !
319 sll $t0,3,$acc7
320 and $acc6,2040,$acc6
321 ldx [$tbl+$acc5],$acc5
322 srl $t2,21,$acc8
323 and $acc7,2040,$acc7
324 ldx [$tbl+$acc6],$acc6
325 fmovs %f0,%f0
326 srl $t3,13,$acc9 !
327 and $acc8,2040,$acc8
328 ldx [$tbl+$acc7],$acc7
329 srl $t0,5,$acc10
330 and $acc9,2040,$acc9
331 ldx [$tbl+$acc8],$acc8
332 sll $t1,3,$acc11
333 and $acc10,2040,$acc10
334 ldx [$tbl+$acc9],$acc9 !
335 srl $t3,21,$acc12
336 and $acc11,2040,$acc11
337 ldx [$tbl+$acc10],$acc10
338 srl $t0,13,$acc13
339 and $acc12,2040,$acc12
340 ldx [$tbl+$acc11],$acc11
341 fmovs %f0,%f0
342 srl $t1,5,$acc14 !
343 and $acc13,2040,$acc13
344 ldx [$tbl+$acc12],$acc12
345 sll $t2,3,$acc15
346 and $acc14,2040,$acc14
347 ldx [$tbl+$acc13],$acc13
348 srlx $acc1,8,$acc1
349 and $acc15,2040,$acc15
350 ldx [$tbl+$acc14],$acc14 !
351
352 srlx $acc2,16,$acc2
353 xor $acc0,$s0,$s0
354 ldx [$tbl+$acc15],$acc15
355 srlx $acc3,24,$acc3
356 xor $acc1,$s0,$s0
357 ld [$key+16],$t0
358 fmovs %f0,%f0
359 srlx $acc5,8,$acc5 !
360 xor $acc2,$s0,$s0
361 ld [$key+20],$t1
362 srlx $acc6,16,$acc6
363 xor $acc3,$s0,$s0
364 ld [$key+24],$t2
365 srlx $acc7,24,$acc7
366 xor $acc4,$s1,$s1
367 ld [$key+28],$t3 !
368 srlx $acc9,8,$acc9
369 xor $acc5,$s1,$s1
370 ldx [$tbl+2048+0],%g0 ! prefetch te4
371 srlx $acc10,16,$acc10
372 xor $acc6,$s1,$s1
373 ldx [$tbl+2048+32],%g0 ! prefetch te4
374 srlx $acc11,24,$acc11
375 xor $acc7,$s1,$s1
376 ldx [$tbl+2048+64],%g0 ! prefetch te4
377 srlx $acc13,8,$acc13
378 xor $acc8,$s2,$s2
379 ldx [$tbl+2048+96],%g0 ! prefetch te4
380 srlx $acc14,16,$acc14 !
381 xor $acc9,$s2,$s2
382 ldx [$tbl+2048+128],%g0 ! prefetch te4
383 srlx $acc15,24,$acc15
384 xor $acc10,$s2,$s2
385 ldx [$tbl+2048+160],%g0 ! prefetch te4
386 srl $s0,21,$acc0
387 xor $acc11,$s2,$s2
388 ldx [$tbl+2048+192],%g0 ! prefetch te4
389 xor $acc12,$acc14,$acc14
390 xor $acc13,$s3,$s3
391 ldx [$tbl+2048+224],%g0 ! prefetch te4
392 srl $s1,13,$acc1 !
393 xor $acc14,$s3,$s3
394 xor $acc15,$s3,$s3
395 ba .Lenc_loop
396 and $acc0,2040,$acc0
397
398.align 32
399.Lenc_last:
400 srlx $acc1,8,$acc1 !
401 xor $acc0,$t0,$t0
402 ld [$key+0],$s0
403 srlx $acc2,16,$acc2
404 xor $acc1,$t0,$t0
405 ld [$key+4],$s1
406 srlx $acc3,24,$acc3
407 xor $acc2,$t0,$t0
408 ld [$key+8],$s2 !
409 srlx $acc5,8,$acc5
410 xor $acc3,$t0,$t0
411 ld [$key+12],$s3
412 srlx $acc6,16,$acc6
413 xor $acc4,$t1,$t1
414 srlx $acc7,24,$acc7
415 xor $acc5,$t1,$t1
416 srlx $acc9,8,$acc9 !
417 xor $acc6,$t1,$t1
418 srlx $acc10,16,$acc10
419 xor $acc7,$t1,$t1
420 srlx $acc11,24,$acc11
421 xor $acc8,$t2,$t2
422 srlx $acc13,8,$acc13
423 xor $acc9,$t2,$t2
424 srlx $acc14,16,$acc14 !
425 xor $acc10,$t2,$t2
426 srlx $acc15,24,$acc15
427 xor $acc11,$t2,$t2
428 xor $acc12,$acc14,$acc14
429 xor $acc13,$t3,$t3
430 srl $t0,24,$acc0
431 xor $acc14,$t3,$t3
432 srl $t1,16,$acc1 !
433 xor $acc15,$t3,$t3
434
435 srl $t2,8,$acc2
436 and $acc1,255,$acc1
437 ldub [$rounds+$acc0],$acc0
438 srl $t1,24,$acc4
439 and $acc2,255,$acc2
440 ldub [$rounds+$acc1],$acc1
441 srl $t2,16,$acc5 !
442 and $t3,255,$acc3
443 ldub [$rounds+$acc2],$acc2
444 ldub [$rounds+$acc3],$acc3
445 srl $t3,8,$acc6
446 and $acc5,255,$acc5
447 ldub [$rounds+$acc4],$acc4
448 fmovs %f0,%f0
449 srl $t2,24,$acc8 !
450 and $acc6,255,$acc6
451 ldub [$rounds+$acc5],$acc5
452 srl $t3,16,$acc9
453 and $t0,255,$acc7
454 ldub [$rounds+$acc6],$acc6
455 ldub [$rounds+$acc7],$acc7
456 fmovs %f0,%f0
457 srl $t0,8,$acc10 !
458 and $acc9,255,$acc9
459 ldub [$rounds+$acc8],$acc8
460 srl $t3,24,$acc12
461 and $acc10,255,$acc10
462 ldub [$rounds+$acc9],$acc9
463 srl $t0,16,$acc13
464 and $t1,255,$acc11
465 ldub [$rounds+$acc10],$acc10 !
466 srl $t1,8,$acc14
467 and $acc13,255,$acc13
468 ldub [$rounds+$acc11],$acc11
469 ldub [$rounds+$acc12],$acc12
470 and $acc14,255,$acc14
471 ldub [$rounds+$acc13],$acc13
472 and $t2,255,$acc15
473 ldub [$rounds+$acc14],$acc14 !
474
475 sll $acc0,24,$acc0
476 xor $acc3,$s0,$s0
477 ldub [$rounds+$acc15],$acc15
478 sll $acc1,16,$acc1
479 xor $acc0,$s0,$s0
480 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
481 fmovs %f0,%f0
482 sll $acc2,8,$acc2 !
483 xor $acc1,$s0,$s0
484 sll $acc4,24,$acc4
485 xor $acc2,$s0,$s0
486 sll $acc5,16,$acc5
487 xor $acc7,$s1,$s1
488 sll $acc6,8,$acc6
489 xor $acc4,$s1,$s1
490 sll $acc8,24,$acc8 !
491 xor $acc5,$s1,$s1
492 sll $acc9,16,$acc9
493 xor $acc11,$s2,$s2
494 sll $acc10,8,$acc10
495 xor $acc6,$s1,$s1
496 sll $acc12,24,$acc12
497 xor $acc8,$s2,$s2
498 sll $acc13,16,$acc13 !
499 xor $acc9,$s2,$s2
500 sll $acc14,8,$acc14
501 xor $acc10,$s2,$s2
502 xor $acc12,$acc14,$acc14
503 xor $acc13,$s3,$s3
504 xor $acc14,$s3,$s3
505 xor $acc15,$s3,$s3
506
507 ret
508 restore
509.type _sparcv9_AES_encrypt,#function
510.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
511
512.align 32
513.globl AES_encrypt
514AES_encrypt:
515 or %o0,%o1,%g1
516 andcc %g1,3,%g0
517 bnz,pn %xcc,.Lunaligned_enc
518 save %sp,-$frame,%sp
519
520 ld [%i0+0],%o0
521 ld [%i0+4],%o1
522 ld [%i0+8],%o2
523 ld [%i0+12],%o3
524
5251: call .+8
526 add %o7,AES_Te-1b,%o4
527 call _sparcv9_AES_encrypt
528 mov %i2,%o5
529
530 st %o0,[%i1+0]
531 st %o1,[%i1+4]
532 st %o2,[%i1+8]
533 st %o3,[%i1+12]
534
535 ret
536 restore
537
538.align 32
539.Lunaligned_enc:
540 ldub [%i0+0],%l0
541 ldub [%i0+1],%l1
542 ldub [%i0+2],%l2
543
544 sll %l0,24,%l0
545 ldub [%i0+3],%l3
546 sll %l1,16,%l1
547 ldub [%i0+4],%l4
548 sll %l2,8,%l2
549 or %l1,%l0,%l0
550 ldub [%i0+5],%l5
551 sll %l4,24,%l4
552 or %l3,%l2,%l2
553 ldub [%i0+6],%l6
554 sll %l5,16,%l5
555 or %l0,%l2,%o0
556 ldub [%i0+7],%l7
557
558 sll %l6,8,%l6
559 or %l5,%l4,%l4
560 ldub [%i0+8],%l0
561 or %l7,%l6,%l6
562 ldub [%i0+9],%l1
563 or %l4,%l6,%o1
564 ldub [%i0+10],%l2
565
566 sll %l0,24,%l0
567 ldub [%i0+11],%l3
568 sll %l1,16,%l1
569 ldub [%i0+12],%l4
570 sll %l2,8,%l2
571 or %l1,%l0,%l0
572 ldub [%i0+13],%l5
573 sll %l4,24,%l4
574 or %l3,%l2,%l2
575 ldub [%i0+14],%l6
576 sll %l5,16,%l5
577 or %l0,%l2,%o2
578 ldub [%i0+15],%l7
579
580 sll %l6,8,%l6
581 or %l5,%l4,%l4
582 or %l7,%l6,%l6
583 or %l4,%l6,%o3
584
5851: call .+8
586 add %o7,AES_Te-1b,%o4
587 call _sparcv9_AES_encrypt
588 mov %i2,%o5
589
590 srl %o0,24,%l0
591 srl %o0,16,%l1
592 stb %l0,[%i1+0]
593 srl %o0,8,%l2
594 stb %l1,[%i1+1]
595 stb %l2,[%i1+2]
596 srl %o1,24,%l4
597 stb %o0,[%i1+3]
598
599 srl %o1,16,%l5
600 stb %l4,[%i1+4]
601 srl %o1,8,%l6
602 stb %l5,[%i1+5]
603 stb %l6,[%i1+6]
604 srl %o2,24,%l0
605 stb %o1,[%i1+7]
606
607 srl %o2,16,%l1
608 stb %l0,[%i1+8]
609 srl %o2,8,%l2
610 stb %l1,[%i1+9]
611 stb %l2,[%i1+10]
612 srl %o3,24,%l4
613 stb %o2,[%i1+11]
614
615 srl %o3,16,%l5
616 stb %l4,[%i1+12]
617 srl %o3,8,%l6
618 stb %l5,[%i1+13]
619 stb %l6,[%i1+14]
620 stb %o3,[%i1+15]
621
622 ret
623 restore
624.type AES_encrypt,#function
625.size AES_encrypt,(.-AES_encrypt)
626
627___
628
629$code.=<<___;
630.align 256
631AES_Td:
632___
633&_data_word(
634 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
635 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
636 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
637 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
638 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
639 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
640 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
641 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
642 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
643 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
644 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
645 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
646 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
647 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
648 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
649 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
650 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
651 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
652 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
653 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
654 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
655 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
656 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
657 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
658 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
659 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
660 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
661 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
662 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
663 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
664 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
665 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
666 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
667 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
668 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
669 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
670 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
671 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
672 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
673 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
674 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
675 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
676 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
677 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
678 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
679 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
680 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
681 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
682 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
683 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
684 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
685 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
686 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
687 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
688 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
689 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
690 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
691 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
692 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
693 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
694 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
695 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
696 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
697 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
698$code.=<<___;
699 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
700 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
701 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
702 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
703 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
704 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
705 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
706 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
707 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
708 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
709 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
710 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
711 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
712 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
713 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
714 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
715 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
716 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
717 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
718 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
719 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
720 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
721 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
722 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
723 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
724 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
725 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
726 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
727 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
728 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
729 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
730 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
731.type AES_Td,#object
732.size AES_Td,(.-AES_Td)
733
734.align 64
735.skip 16
736_sparcv9_AES_decrypt:
737 save %sp,-$frame-$locals,%sp
738 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
739 ld [$key+240],$rounds
740 ld [$key+0],$t0
741 ld [$key+4],$t1 !
742 ld [$key+8],$t2
743 ld [$key+12],$t3
744 srl $rounds,1,$rounds
745 xor $t0,$s0,$s0
746 ld [$key+16],$t0
747 xor $t1,$s1,$s1
748 ld [$key+20],$t1
749 srl $s0,21,$acc0 !
750 xor $t2,$s2,$s2
751 ld [$key+24],$t2
752 xor $t3,$s3,$s3
753 and $acc0,2040,$acc0
754 ld [$key+28],$t3
755 srl $s3,13,$acc1
756 nop
757.Ldec_loop:
758 srl $s2,5,$acc2 !
759 and $acc1,2040,$acc1
760 ldx [$tbl+$acc0],$acc0
761 sll $s1,3,$acc3
762 and $acc2,2040,$acc2
763 ldx [$tbl+$acc1],$acc1
764 srl $s1,21,$acc4
765 and $acc3,2040,$acc3
766 ldx [$tbl+$acc2],$acc2 !
767 srl $s0,13,$acc5
768 and $acc4,2040,$acc4
769 ldx [$tbl+$acc3],$acc3
770 srl $s3,5,$acc6
771 and $acc5,2040,$acc5
772 ldx [$tbl+$acc4],$acc4
773 fmovs %f0,%f0
774 sll $s2,3,$acc7 !
775 and $acc6,2040,$acc6
776 ldx [$tbl+$acc5],$acc5
777 srl $s2,21,$acc8
778 and $acc7,2040,$acc7
779 ldx [$tbl+$acc6],$acc6
780 srl $s1,13,$acc9
781 and $acc8,2040,$acc8
782 ldx [$tbl+$acc7],$acc7 !
783 srl $s0,5,$acc10
784 and $acc9,2040,$acc9
785 ldx [$tbl+$acc8],$acc8
786 sll $s3,3,$acc11
787 and $acc10,2040,$acc10
788 ldx [$tbl+$acc9],$acc9
789 fmovs %f0,%f0
790 srl $s3,21,$acc12 !
791 and $acc11,2040,$acc11
792 ldx [$tbl+$acc10],$acc10
793 srl $s2,13,$acc13
794 and $acc12,2040,$acc12
795 ldx [$tbl+$acc11],$acc11
796 srl $s1,5,$acc14
797 and $acc13,2040,$acc13
798 ldx [$tbl+$acc12],$acc12 !
799 sll $s0,3,$acc15
800 and $acc14,2040,$acc14
801 ldx [$tbl+$acc13],$acc13
802 and $acc15,2040,$acc15
803 add $key,32,$key
804 ldx [$tbl+$acc14],$acc14
805 fmovs %f0,%f0
806 subcc $rounds,1,$rounds !
807 ldx [$tbl+$acc15],$acc15
808 bz,a,pn %icc,.Ldec_last
809 add $tbl,2048,$rounds
810
811 srlx $acc1,8,$acc1
812 xor $acc0,$t0,$t0
813 ld [$key+0],$s0
814 fmovs %f0,%f0
815 srlx $acc2,16,$acc2 !
816 xor $acc1,$t0,$t0
817 ld [$key+4],$s1
818 srlx $acc3,24,$acc3
819 xor $acc2,$t0,$t0
820 ld [$key+8],$s2
821 srlx $acc5,8,$acc5
822 xor $acc3,$t0,$t0
823 ld [$key+12],$s3 !
824 srlx $acc6,16,$acc6
825 xor $acc4,$t1,$t1
826 fmovs %f0,%f0
827 srlx $acc7,24,$acc7
828 xor $acc5,$t1,$t1
829 srlx $acc9,8,$acc9
830 xor $acc6,$t1,$t1
831 srlx $acc10,16,$acc10 !
832 xor $acc7,$t1,$t1
833 srlx $acc11,24,$acc11
834 xor $acc8,$t2,$t2
835 srlx $acc13,8,$acc13
836 xor $acc9,$t2,$t2
837 srlx $acc14,16,$acc14
838 xor $acc10,$t2,$t2
839 srlx $acc15,24,$acc15 !
840 xor $acc11,$t2,$t2
841 xor $acc12,$acc14,$acc14
842 xor $acc13,$t3,$t3
843 srl $t0,21,$acc0
844 xor $acc14,$t3,$t3
845 xor $acc15,$t3,$t3
846 srl $t3,13,$acc1
847
848 and $acc0,2040,$acc0 !
849 srl $t2,5,$acc2
850 and $acc1,2040,$acc1
851 ldx [$tbl+$acc0],$acc0
852 sll $t1,3,$acc3
853 and $acc2,2040,$acc2
854 ldx [$tbl+$acc1],$acc1
855 fmovs %f0,%f0
856 srl $t1,21,$acc4 !
857 and $acc3,2040,$acc3
858 ldx [$tbl+$acc2],$acc2
859 srl $t0,13,$acc5
860 and $acc4,2040,$acc4
861 ldx [$tbl+$acc3],$acc3
862 srl $t3,5,$acc6
863 and $acc5,2040,$acc5
864 ldx [$tbl+$acc4],$acc4 !
865 sll $t2,3,$acc7
866 and $acc6,2040,$acc6
867 ldx [$tbl+$acc5],$acc5
868 srl $t2,21,$acc8
869 and $acc7,2040,$acc7
870 ldx [$tbl+$acc6],$acc6
871 fmovs %f0,%f0
872 srl $t1,13,$acc9 !
873 and $acc8,2040,$acc8
874 ldx [$tbl+$acc7],$acc7
875 srl $t0,5,$acc10
876 and $acc9,2040,$acc9
877 ldx [$tbl+$acc8],$acc8
878 sll $t3,3,$acc11
879 and $acc10,2040,$acc10
880 ldx [$tbl+$acc9],$acc9 !
881 srl $t3,21,$acc12
882 and $acc11,2040,$acc11
883 ldx [$tbl+$acc10],$acc10
884 srl $t2,13,$acc13
885 and $acc12,2040,$acc12
886 ldx [$tbl+$acc11],$acc11
887 fmovs %f0,%f0
888 srl $t1,5,$acc14 !
889 and $acc13,2040,$acc13
890 ldx [$tbl+$acc12],$acc12
891 sll $t0,3,$acc15
892 and $acc14,2040,$acc14
893 ldx [$tbl+$acc13],$acc13
894 srlx $acc1,8,$acc1
895 and $acc15,2040,$acc15
896 ldx [$tbl+$acc14],$acc14 !
897
898 srlx $acc2,16,$acc2
899 xor $acc0,$s0,$s0
900 ldx [$tbl+$acc15],$acc15
901 srlx $acc3,24,$acc3
902 xor $acc1,$s0,$s0
903 ld [$key+16],$t0
904 fmovs %f0,%f0
905 srlx $acc5,8,$acc5 !
906 xor $acc2,$s0,$s0
907 ld [$key+20],$t1
908 srlx $acc6,16,$acc6
909 xor $acc3,$s0,$s0
910 ld [$key+24],$t2
911 srlx $acc7,24,$acc7
912 xor $acc4,$s1,$s1
913 ld [$key+28],$t3 !
914 srlx $acc9,8,$acc9
915 xor $acc5,$s1,$s1
916 ldx [$tbl+2048+0],%g0 ! prefetch td4
917 srlx $acc10,16,$acc10
918 xor $acc6,$s1,$s1
919 ldx [$tbl+2048+32],%g0 ! prefetch td4
920 srlx $acc11,24,$acc11
921 xor $acc7,$s1,$s1
922 ldx [$tbl+2048+64],%g0 ! prefetch td4
923 srlx $acc13,8,$acc13
924 xor $acc8,$s2,$s2
925 ldx [$tbl+2048+96],%g0 ! prefetch td4
926 srlx $acc14,16,$acc14 !
927 xor $acc9,$s2,$s2
928 ldx [$tbl+2048+128],%g0 ! prefetch td4
929 srlx $acc15,24,$acc15
930 xor $acc10,$s2,$s2
931 ldx [$tbl+2048+160],%g0 ! prefetch td4
932 srl $s0,21,$acc0
933 xor $acc11,$s2,$s2
934 ldx [$tbl+2048+192],%g0 ! prefetch td4
935 xor $acc12,$acc14,$acc14
936 xor $acc13,$s3,$s3
937 ldx [$tbl+2048+224],%g0 ! prefetch td4
938 and $acc0,2040,$acc0 !
939 xor $acc14,$s3,$s3
940 xor $acc15,$s3,$s3
941 ba .Ldec_loop
942 srl $s3,13,$acc1
943
944.align 32
945.Ldec_last:
946 srlx $acc1,8,$acc1 !
947 xor $acc0,$t0,$t0
948 ld [$key+0],$s0
949 srlx $acc2,16,$acc2
950 xor $acc1,$t0,$t0
951 ld [$key+4],$s1
952 srlx $acc3,24,$acc3
953 xor $acc2,$t0,$t0
954 ld [$key+8],$s2 !
955 srlx $acc5,8,$acc5
956 xor $acc3,$t0,$t0
957 ld [$key+12],$s3
958 srlx $acc6,16,$acc6
959 xor $acc4,$t1,$t1
960 srlx $acc7,24,$acc7
961 xor $acc5,$t1,$t1
962 srlx $acc9,8,$acc9 !
963 xor $acc6,$t1,$t1
964 srlx $acc10,16,$acc10
965 xor $acc7,$t1,$t1
966 srlx $acc11,24,$acc11
967 xor $acc8,$t2,$t2
968 srlx $acc13,8,$acc13
969 xor $acc9,$t2,$t2
970 srlx $acc14,16,$acc14 !
971 xor $acc10,$t2,$t2
972 srlx $acc15,24,$acc15
973 xor $acc11,$t2,$t2
974 xor $acc12,$acc14,$acc14
975 xor $acc13,$t3,$t3
976 srl $t0,24,$acc0
977 xor $acc14,$t3,$t3
978 xor $acc15,$t3,$t3 !
979 srl $t3,16,$acc1
980
981 srl $t2,8,$acc2
982 and $acc1,255,$acc1
983 ldub [$rounds+$acc0],$acc0
984 srl $t1,24,$acc4
985 and $acc2,255,$acc2
986 ldub [$rounds+$acc1],$acc1
987 srl $t0,16,$acc5 !
988 and $t1,255,$acc3
989 ldub [$rounds+$acc2],$acc2
990 ldub [$rounds+$acc3],$acc3
991 srl $t3,8,$acc6
992 and $acc5,255,$acc5
993 ldub [$rounds+$acc4],$acc4
994 fmovs %f0,%f0
995 srl $t2,24,$acc8 !
996 and $acc6,255,$acc6
997 ldub [$rounds+$acc5],$acc5
998 srl $t1,16,$acc9
999 and $t2,255,$acc7
1000 ldub [$rounds+$acc6],$acc6
1001 ldub [$rounds+$acc7],$acc7
1002 fmovs %f0,%f0
1003 srl $t0,8,$acc10 !
1004 and $acc9,255,$acc9
1005 ldub [$rounds+$acc8],$acc8
1006 srl $t3,24,$acc12
1007 and $acc10,255,$acc10
1008 ldub [$rounds+$acc9],$acc9
1009 srl $t2,16,$acc13
1010 and $t3,255,$acc11
1011 ldub [$rounds+$acc10],$acc10 !
1012 srl $t1,8,$acc14
1013 and $acc13,255,$acc13
1014 ldub [$rounds+$acc11],$acc11
1015 ldub [$rounds+$acc12],$acc12
1016 and $acc14,255,$acc14
1017 ldub [$rounds+$acc13],$acc13
1018 and $t0,255,$acc15
1019 ldub [$rounds+$acc14],$acc14 !
1020
1021 sll $acc0,24,$acc0
1022 xor $acc3,$s0,$s0
1023 ldub [$rounds+$acc15],$acc15
1024 sll $acc1,16,$acc1
1025 xor $acc0,$s0,$s0
1026 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1027 fmovs %f0,%f0
1028 sll $acc2,8,$acc2 !
1029 xor $acc1,$s0,$s0
1030 sll $acc4,24,$acc4
1031 xor $acc2,$s0,$s0
1032 sll $acc5,16,$acc5
1033 xor $acc7,$s1,$s1
1034 sll $acc6,8,$acc6
1035 xor $acc4,$s1,$s1
1036 sll $acc8,24,$acc8 !
1037 xor $acc5,$s1,$s1
1038 sll $acc9,16,$acc9
1039 xor $acc11,$s2,$s2
1040 sll $acc10,8,$acc10
1041 xor $acc6,$s1,$s1
1042 sll $acc12,24,$acc12
1043 xor $acc8,$s2,$s2
1044 sll $acc13,16,$acc13 !
1045 xor $acc9,$s2,$s2
1046 sll $acc14,8,$acc14
1047 xor $acc10,$s2,$s2
1048 xor $acc12,$acc14,$acc14
1049 xor $acc13,$s3,$s3
1050 xor $acc14,$s3,$s3
1051 xor $acc15,$s3,$s3
1052
1053 ret
1054 restore
1055.type _sparcv9_AES_decrypt,#function
1056.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1057
1058.align 32
1059.globl AES_decrypt
1060AES_decrypt:
1061 or %o0,%o1,%g1
1062 andcc %g1,3,%g0
1063 bnz,pn %xcc,.Lunaligned_dec
1064 save %sp,-$frame,%sp
1065
1066 ld [%i0+0],%o0
1067 ld [%i0+4],%o1
1068 ld [%i0+8],%o2
1069 ld [%i0+12],%o3
1070
10711: call .+8
1072 add %o7,AES_Td-1b,%o4
1073 call _sparcv9_AES_decrypt
1074 mov %i2,%o5
1075
1076 st %o0,[%i1+0]
1077 st %o1,[%i1+4]
1078 st %o2,[%i1+8]
1079 st %o3,[%i1+12]
1080
1081 ret
1082 restore
1083
1084.align 32
1085.Lunaligned_dec:
1086 ldub [%i0+0],%l0
1087 ldub [%i0+1],%l1
1088 ldub [%i0+2],%l2
1089
1090 sll %l0,24,%l0
1091 ldub [%i0+3],%l3
1092 sll %l1,16,%l1
1093 ldub [%i0+4],%l4
1094 sll %l2,8,%l2
1095 or %l1,%l0,%l0
1096 ldub [%i0+5],%l5
1097 sll %l4,24,%l4
1098 or %l3,%l2,%l2
1099 ldub [%i0+6],%l6
1100 sll %l5,16,%l5
1101 or %l0,%l2,%o0
1102 ldub [%i0+7],%l7
1103
1104 sll %l6,8,%l6
1105 or %l5,%l4,%l4
1106 ldub [%i0+8],%l0
1107 or %l7,%l6,%l6
1108 ldub [%i0+9],%l1
1109 or %l4,%l6,%o1
1110 ldub [%i0+10],%l2
1111
1112 sll %l0,24,%l0
1113 ldub [%i0+11],%l3
1114 sll %l1,16,%l1
1115 ldub [%i0+12],%l4
1116 sll %l2,8,%l2
1117 or %l1,%l0,%l0
1118 ldub [%i0+13],%l5
1119 sll %l4,24,%l4
1120 or %l3,%l2,%l2
1121 ldub [%i0+14],%l6
1122 sll %l5,16,%l5
1123 or %l0,%l2,%o2
1124 ldub [%i0+15],%l7
1125
1126 sll %l6,8,%l6
1127 or %l5,%l4,%l4
1128 or %l7,%l6,%l6
1129 or %l4,%l6,%o3
1130
11311: call .+8
1132 add %o7,AES_Td-1b,%o4
1133 call _sparcv9_AES_decrypt
1134 mov %i2,%o5
1135
1136 srl %o0,24,%l0
1137 srl %o0,16,%l1
1138 stb %l0,[%i1+0]
1139 srl %o0,8,%l2
1140 stb %l1,[%i1+1]
1141 stb %l2,[%i1+2]
1142 srl %o1,24,%l4
1143 stb %o0,[%i1+3]
1144
1145 srl %o1,16,%l5
1146 stb %l4,[%i1+4]
1147 srl %o1,8,%l6
1148 stb %l5,[%i1+5]
1149 stb %l6,[%i1+6]
1150 srl %o2,24,%l0
1151 stb %o1,[%i1+7]
1152
1153 srl %o2,16,%l1
1154 stb %l0,[%i1+8]
1155 srl %o2,8,%l2
1156 stb %l1,[%i1+9]
1157 stb %l2,[%i1+10]
1158 srl %o3,24,%l4
1159 stb %o2,[%i1+11]
1160
1161 srl %o3,16,%l5
1162 stb %l4,[%i1+12]
1163 srl %o3,8,%l6
1164 stb %l5,[%i1+13]
1165 stb %l6,[%i1+14]
1166 stb %o3,[%i1+15]
1167
1168 ret
1169 restore
1170.type AES_decrypt,#function
1171.size AES_decrypt,(.-AES_decrypt)
1172___
1173
1174# fmovs instructions substituting for FP nops were originally added
1175# to meet specific instruction alignment requirements to maximize ILP.
1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1177# undesired effect, so just omit them and sacrifice some portion of
1178# percent in performance...
1179$code =~ s/fmovs.*$//gem;
1180
1181print $code;
diff --git a/src/lib/libssl/src/crypto/asn1/ameth_lib.c b/src/lib/libssl/src/crypto/asn1/ameth_lib.c
new file mode 100644
index 0000000000..18957c669e
--- /dev/null
+++ b/src/lib/libssl/src/crypto/asn1/ameth_lib.c
@@ -0,0 +1,446 @@
1/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
2 * project 2006.
3 */
4/* ====================================================================
5 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * licensing@OpenSSL.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include <stdio.h>
59#include "cryptlib.h"
60#include <openssl/asn1t.h>
61#include <openssl/x509.h>
62#ifndef OPENSSL_NO_ENGINE
63#include <openssl/engine.h>
64#endif
65#include "asn1_locl.h"
66
67extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[];
68extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[];
69extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth;
70extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth;
71extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth;
72
73/* Keep this sorted in type order !! */
74static const EVP_PKEY_ASN1_METHOD *standard_methods[] =
75 {
76#ifndef OPENSSL_NO_RSA
77 &rsa_asn1_meths[0],
78 &rsa_asn1_meths[1],
79#endif
80#ifndef OPENSSL_NO_DH
81 &dh_asn1_meth,
82#endif
83#ifndef OPENSSL_NO_DSA
84 &dsa_asn1_meths[0],
85 &dsa_asn1_meths[1],
86 &dsa_asn1_meths[2],
87 &dsa_asn1_meths[3],
88 &dsa_asn1_meths[4],
89#endif
90#ifndef OPENSSL_NO_EC
91 &eckey_asn1_meth,
92#endif
93 &hmac_asn1_meth
94 };
95
96typedef int sk_cmp_fn_type(const char * const *a, const char * const *b);
97DECLARE_STACK_OF(EVP_PKEY_ASN1_METHOD)
98static STACK_OF(EVP_PKEY_ASN1_METHOD) *app_methods = NULL;
99
100
101
102#ifdef TEST
103void main()
104 {
105 int i;
106 for (i = 0;
107 i < sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
108 i++)
109 fprintf(stderr, "Number %d id=%d (%s)\n", i,
110 standard_methods[i]->pkey_id,
111 OBJ_nid2sn(standard_methods[i]->pkey_id));
112 }
113#endif
114
115DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *,
116 const EVP_PKEY_ASN1_METHOD *, ameth);
117
118static int ameth_cmp(const EVP_PKEY_ASN1_METHOD * const *a,
119 const EVP_PKEY_ASN1_METHOD * const *b)
120 {
121 return ((*a)->pkey_id - (*b)->pkey_id);
122 }
123
124IMPLEMENT_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *,
125 const EVP_PKEY_ASN1_METHOD *, ameth);
126
127int EVP_PKEY_asn1_get_count(void)
128 {
129 int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
130 if (app_methods)
131 num += sk_EVP_PKEY_ASN1_METHOD_num(app_methods);
132 return num;
133 }
134
135const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_get0(int idx)
136 {
137 int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
138 if (idx < 0)
139 return NULL;
140 if (idx < num)
141 return standard_methods[idx];
142 idx -= num;
143 return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx);
144 }
145
146static const EVP_PKEY_ASN1_METHOD *pkey_asn1_find(int type)
147 {
148 EVP_PKEY_ASN1_METHOD tmp;
149 const EVP_PKEY_ASN1_METHOD *t = &tmp, **ret;
150 tmp.pkey_id = type;
151 if (app_methods)
152 {
153 int idx;
154 idx = sk_EVP_PKEY_ASN1_METHOD_find(app_methods, &tmp);
155 if (idx >= 0)
156 return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx);
157 }
158 ret = OBJ_bsearch_ameth(&t, standard_methods,
159 sizeof(standard_methods)
160 /sizeof(EVP_PKEY_ASN1_METHOD *));
161 if (!ret || !*ret)
162 return NULL;
163 return *ret;
164 }
165
166/* Find an implementation of an ASN1 algorithm. If 'pe' is not NULL
167 * also search through engines and set *pe to a functional reference
168 * to the engine implementing 'type' or NULL if no engine implements
169 * it.
170 */
171
172const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find(ENGINE **pe, int type)
173 {
174 const EVP_PKEY_ASN1_METHOD *t;
175 ENGINE *e;
176
177 for (;;)
178 {
179 t = pkey_asn1_find(type);
180 if (!t || !(t->pkey_flags & ASN1_PKEY_ALIAS))
181 break;
182 type = t->pkey_base_id;
183 }
184 if (pe)
185 {
186#ifndef OPENSSL_NO_ENGINE
187 /* type will contain the final unaliased type */
188 e = ENGINE_get_pkey_asn1_meth_engine(type);
189 if (e)
190 {
191 *pe = e;
192 return ENGINE_get_pkey_asn1_meth(e, type);
193 }
194#endif
195 *pe = NULL;
196 }
197 return t;
198 }
199
200const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find_str(ENGINE **pe,
201 const char *str, int len)
202 {
203 int i;
204 const EVP_PKEY_ASN1_METHOD *ameth;
205 if (len == -1)
206 len = strlen(str);
207 if (pe)
208 {
209#ifndef OPENSSL_NO_ENGINE
210 ENGINE *e;
211 ameth = ENGINE_pkey_asn1_find_str(&e, str, len);
212 if (ameth)
213 {
214 /* Convert structural into
215 * functional reference
216 */
217 if (!ENGINE_init(e))
218 ameth = NULL;
219 ENGINE_free(e);
220 *pe = e;
221 return ameth;
222 }
223#endif
224 *pe = NULL;
225 }
226 for (i = 0; i < EVP_PKEY_asn1_get_count(); i++)
227 {
228 ameth = EVP_PKEY_asn1_get0(i);
229 if (ameth->pkey_flags & ASN1_PKEY_ALIAS)
230 continue;
231 if (((int)strlen(ameth->pem_str) == len) &&
232 !strncasecmp(ameth->pem_str, str, len))
233 return ameth;
234 }
235 return NULL;
236 }
237
238int EVP_PKEY_asn1_add0(const EVP_PKEY_ASN1_METHOD *ameth)
239 {
240 if (app_methods == NULL)
241 {
242 app_methods = sk_EVP_PKEY_ASN1_METHOD_new(ameth_cmp);
243 if (!app_methods)
244 return 0;
245 }
246 if (!sk_EVP_PKEY_ASN1_METHOD_push(app_methods, ameth))
247 return 0;
248 sk_EVP_PKEY_ASN1_METHOD_sort(app_methods);
249 return 1;
250 }
251
252int EVP_PKEY_asn1_add_alias(int to, int from)
253 {
254 EVP_PKEY_ASN1_METHOD *ameth;
255 ameth = EVP_PKEY_asn1_new(from, ASN1_PKEY_ALIAS, NULL, NULL);
256 if (!ameth)
257 return 0;
258 ameth->pkey_base_id = to;
259 return EVP_PKEY_asn1_add0(ameth);
260 }
261
262int EVP_PKEY_asn1_get0_info(int *ppkey_id, int *ppkey_base_id, int *ppkey_flags,
263 const char **pinfo, const char **ppem_str,
264 const EVP_PKEY_ASN1_METHOD *ameth)
265 {
266 if (!ameth)
267 return 0;
268 if (ppkey_id)
269 *ppkey_id = ameth->pkey_id;
270 if (ppkey_base_id)
271 *ppkey_base_id = ameth->pkey_base_id;
272 if (ppkey_flags)
273 *ppkey_flags = ameth->pkey_flags;
274 if (pinfo)
275 *pinfo = ameth->info;
276 if (ppem_str)
277 *ppem_str = ameth->pem_str;
278 return 1;
279 }
280
281const EVP_PKEY_ASN1_METHOD* EVP_PKEY_get0_asn1(EVP_PKEY *pkey)
282 {
283 return pkey->ameth;
284 }
285
286EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags,
287 const char *pem_str, const char *info)
288 {
289 EVP_PKEY_ASN1_METHOD *ameth;
290 ameth = OPENSSL_malloc(sizeof(EVP_PKEY_ASN1_METHOD));
291 if (!ameth)
292 return NULL;
293
294 ameth->pkey_id = id;
295 ameth->pkey_base_id = id;
296 ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC;
297
298 if (info)
299 {
300 ameth->info = BUF_strdup(info);
301 if (!ameth->info)
302 goto err;
303 }
304
305 if (pem_str)
306 {
307 ameth->pem_str = BUF_strdup(pem_str);
308 if (!ameth->pem_str)
309 goto err;
310 }
311
312 ameth->pub_decode = 0;
313 ameth->pub_encode = 0;
314 ameth->pub_cmp = 0;
315 ameth->pub_print = 0;
316
317 ameth->priv_decode = 0;
318 ameth->priv_encode = 0;
319 ameth->priv_print = 0;
320
321 ameth->old_priv_encode = 0;
322 ameth->old_priv_decode = 0;
323
324 ameth->pkey_size = 0;
325 ameth->pkey_bits = 0;
326
327 ameth->param_decode = 0;
328 ameth->param_encode = 0;
329 ameth->param_missing = 0;
330 ameth->param_copy = 0;
331 ameth->param_cmp = 0;
332 ameth->param_print = 0;
333
334 ameth->pkey_free = 0;
335 ameth->pkey_ctrl = 0;
336
337 return ameth;
338
339 err:
340
341 EVP_PKEY_asn1_free(ameth);
342 return NULL;
343
344 }
345
346void EVP_PKEY_asn1_copy(EVP_PKEY_ASN1_METHOD *dst,
347 const EVP_PKEY_ASN1_METHOD *src)
348 {
349
350 dst->pub_decode = src->pub_decode;
351 dst->pub_encode = src->pub_encode;
352 dst->pub_cmp = src->pub_cmp;
353 dst->pub_print = src->pub_print;
354
355 dst->priv_decode = src->priv_decode;
356 dst->priv_encode = src->priv_encode;
357 dst->priv_print = src->priv_print;
358
359 dst->old_priv_encode = src->old_priv_encode;
360 dst->old_priv_decode = src->old_priv_decode;
361
362 dst->pkey_size = src->pkey_size;
363 dst->pkey_bits = src->pkey_bits;
364
365 dst->param_decode = src->param_decode;
366 dst->param_encode = src->param_encode;
367 dst->param_missing = src->param_missing;
368 dst->param_copy = src->param_copy;
369 dst->param_cmp = src->param_cmp;
370 dst->param_print = src->param_print;
371
372 dst->pkey_free = src->pkey_free;
373 dst->pkey_ctrl = src->pkey_ctrl;
374
375 }
376
377void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth)
378 {
379 if (ameth && (ameth->pkey_flags & ASN1_PKEY_DYNAMIC))
380 {
381 if (ameth->pem_str)
382 OPENSSL_free(ameth->pem_str);
383 if (ameth->info)
384 OPENSSL_free(ameth->info);
385 OPENSSL_free(ameth);
386 }
387 }
388
389void EVP_PKEY_asn1_set_public(EVP_PKEY_ASN1_METHOD *ameth,
390 int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub),
391 int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk),
392 int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b),
393 int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent,
394 ASN1_PCTX *pctx),
395 int (*pkey_size)(const EVP_PKEY *pk),
396 int (*pkey_bits)(const EVP_PKEY *pk))
397 {
398 ameth->pub_decode = pub_decode;
399 ameth->pub_encode = pub_encode;
400 ameth->pub_cmp = pub_cmp;
401 ameth->pub_print = pub_print;
402 ameth->pkey_size = pkey_size;
403 ameth->pkey_bits = pkey_bits;
404 }
405
406void EVP_PKEY_asn1_set_private(EVP_PKEY_ASN1_METHOD *ameth,
407 int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf),
408 int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk),
409 int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent,
410 ASN1_PCTX *pctx))
411 {
412 ameth->priv_decode = priv_decode;
413 ameth->priv_encode = priv_encode;
414 ameth->priv_print = priv_print;
415 }
416
417void EVP_PKEY_asn1_set_param(EVP_PKEY_ASN1_METHOD *ameth,
418 int (*param_decode)(EVP_PKEY *pkey,
419 const unsigned char **pder, int derlen),
420 int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder),
421 int (*param_missing)(const EVP_PKEY *pk),
422 int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from),
423 int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b),
424 int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent,
425 ASN1_PCTX *pctx))
426 {
427 ameth->param_decode = param_decode;
428 ameth->param_encode = param_encode;
429 ameth->param_missing = param_missing;
430 ameth->param_copy = param_copy;
431 ameth->param_cmp = param_cmp;
432 ameth->param_print = param_print;
433 }
434
435void EVP_PKEY_asn1_set_free(EVP_PKEY_ASN1_METHOD *ameth,
436 void (*pkey_free)(EVP_PKEY *pkey))
437 {
438 ameth->pkey_free = pkey_free;
439 }
440
441void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
442 int (*pkey_ctrl)(EVP_PKEY *pkey, int op,
443 long arg1, void *arg2))
444 {
445 ameth->pkey_ctrl = pkey_ctrl;
446 }
diff --git a/src/lib/libssl/src/crypto/asn1/asn1_locl.h b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
new file mode 100644
index 0000000000..5aa65e28f5
--- /dev/null
+++ b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
@@ -0,0 +1,134 @@
1/* asn1t.h */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2006.
4 */
5/* ====================================================================
6 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59/* Internal ASN1 structures and functions: not for application use */
60
61/* ASN1 print context structure */
62
63struct asn1_pctx_st
64 {
65 unsigned long flags;
66 unsigned long nm_flags;
67 unsigned long cert_flags;
68 unsigned long oid_flags;
69 unsigned long str_flags;
70 } /* ASN1_PCTX */;
71
72/* ASN1 public key method structure */
73
74struct evp_pkey_asn1_method_st
75 {
76 int pkey_id;
77 int pkey_base_id;
78 unsigned long pkey_flags;
79
80 char *pem_str;
81 char *info;
82
83 int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub);
84 int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk);
85 int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b);
86 int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent,
87 ASN1_PCTX *pctx);
88
89 int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf);
90 int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk);
91 int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent,
92 ASN1_PCTX *pctx);
93
94 int (*pkey_size)(const EVP_PKEY *pk);
95 int (*pkey_bits)(const EVP_PKEY *pk);
96
97 int (*param_decode)(EVP_PKEY *pkey,
98 const unsigned char **pder, int derlen);
99 int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder);
100 int (*param_missing)(const EVP_PKEY *pk);
101 int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from);
102 int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b);
103 int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent,
104 ASN1_PCTX *pctx);
105
106 void (*pkey_free)(EVP_PKEY *pkey);
107 int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2);
108
109 /* Legacy functions for old PEM */
110
111 int (*old_priv_decode)(EVP_PKEY *pkey,
112 const unsigned char **pder, int derlen);
113 int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder);
114
115 } /* EVP_PKEY_ASN1_METHOD */;
116
117/* Method to handle CRL access.
118 * In general a CRL could be very large (several Mb) and can consume large
119 * amounts of resources if stored in memory by multiple processes.
120 * This method allows general CRL operations to be redirected to more
121 * efficient callbacks: for example a CRL entry database.
122 */
123
124#define X509_CRL_METHOD_DYNAMIC 1
125
126struct x509_crl_method_st
127 {
128 int flags;
129 int (*crl_init)(X509_CRL *crl);
130 int (*crl_free)(X509_CRL *crl);
131 int (*crl_lookup)(X509_CRL *crl, X509_REVOKED **ret,
132 ASN1_INTEGER *ser, X509_NAME *issuer);
133 int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
134 };
diff --git a/src/lib/libssl/src/crypto/asn1/asn_mime.c b/src/lib/libssl/src/crypto/asn1/asn_mime.c
index bc80b20d63..d8d9e76cc0 100644
--- a/src/lib/libssl/src/crypto/asn1/asn_mime.c
+++ b/src/lib/libssl/src/crypto/asn1/asn_mime.c
@@ -152,7 +152,6 @@ static ASN1_VALUE *b64_read_asn1(BIO *bio, const ASN1_ITEM *it)
152 152
153static int asn1_write_micalg(BIO *out, STACK_OF(X509_ALGOR) *mdalgs) 153static int asn1_write_micalg(BIO *out, STACK_OF(X509_ALGOR) *mdalgs)
154 { 154 {
155 const EVP_MD *md;
156 int i, have_unknown = 0, write_comma, md_nid; 155 int i, have_unknown = 0, write_comma, md_nid;
157 have_unknown = 0; 156 have_unknown = 0;
158 write_comma = 0; 157 write_comma = 0;
@@ -162,7 +161,6 @@ static int asn1_write_micalg(BIO *out, STACK_OF(X509_ALGOR) *mdalgs)
162 BIO_write(out, ",", 1); 161 BIO_write(out, ",", 1);
163 write_comma = 1; 162 write_comma = 1;
164 md_nid = OBJ_obj2nid(sk_X509_ALGOR_value(mdalgs, i)->algorithm); 163 md_nid = OBJ_obj2nid(sk_X509_ALGOR_value(mdalgs, i)->algorithm);
165 md = EVP_get_digestbynid(md_nid);
166 switch(md_nid) 164 switch(md_nid)
167 { 165 {
168 case NID_sha1: 166 case NID_sha1:
diff --git a/src/lib/libssl/src/crypto/asn1/bio_asn1.c b/src/lib/libssl/src/crypto/asn1/bio_asn1.c
new file mode 100644
index 0000000000..dc7efd551c
--- /dev/null
+++ b/src/lib/libssl/src/crypto/asn1/bio_asn1.c
@@ -0,0 +1,495 @@
1/* bio_asn1.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59/* Experimental ASN1 BIO. When written through the data is converted
60 * to an ASN1 string type: default is OCTET STRING. Additional functions
61 * can be provided to add prefix and suffix data.
62 */
63
64#include <string.h>
65#include <openssl/bio.h>
66#include <openssl/asn1.h>
67
68/* Must be large enough for biggest tag+length */
69#define DEFAULT_ASN1_BUF_SIZE 20
70
71typedef enum
72 {
73 ASN1_STATE_START,
74 ASN1_STATE_PRE_COPY,
75 ASN1_STATE_HEADER,
76 ASN1_STATE_HEADER_COPY,
77 ASN1_STATE_DATA_COPY,
78 ASN1_STATE_POST_COPY,
79 ASN1_STATE_DONE
80 } asn1_bio_state_t;
81
82typedef struct BIO_ASN1_EX_FUNCS_st
83 {
84 asn1_ps_func *ex_func;
85 asn1_ps_func *ex_free_func;
86 } BIO_ASN1_EX_FUNCS;
87
88typedef struct BIO_ASN1_BUF_CTX_t
89 {
90 /* Internal state */
91 asn1_bio_state_t state;
92 /* Internal buffer */
93 unsigned char *buf;
94 /* Size of buffer */
95 int bufsize;
96 /* Current position in buffer */
97 int bufpos;
98 /* Current buffer length */
99 int buflen;
100 /* Amount of data to copy */
101 int copylen;
102 /* Class and tag to use */
103 int asn1_class, asn1_tag;
104 asn1_ps_func *prefix, *prefix_free, *suffix, *suffix_free;
105 /* Extra buffer for prefix and suffix data */
106 unsigned char *ex_buf;
107 int ex_len;
108 int ex_pos;
109 void *ex_arg;
110 } BIO_ASN1_BUF_CTX;
111
112
113static int asn1_bio_write(BIO *h, const char *buf,int num);
114static int asn1_bio_read(BIO *h, char *buf, int size);
115static int asn1_bio_puts(BIO *h, const char *str);
116static int asn1_bio_gets(BIO *h, char *str, int size);
117static long asn1_bio_ctrl(BIO *h, int cmd, long arg1, void *arg2);
118static int asn1_bio_new(BIO *h);
119static int asn1_bio_free(BIO *data);
120static long asn1_bio_callback_ctrl(BIO *h, int cmd, bio_info_cb *fp);
121
122static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size);
123static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
124 asn1_ps_func *cleanup, asn1_bio_state_t next);
125static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
126 asn1_ps_func *setup,
127 asn1_bio_state_t ex_state,
128 asn1_bio_state_t other_state);
129
130static BIO_METHOD methods_asn1=
131 {
132 BIO_TYPE_ASN1,
133 "asn1",
134 asn1_bio_write,
135 asn1_bio_read,
136 asn1_bio_puts,
137 asn1_bio_gets,
138 asn1_bio_ctrl,
139 asn1_bio_new,
140 asn1_bio_free,
141 asn1_bio_callback_ctrl,
142 };
143
144BIO_METHOD *BIO_f_asn1(void)
145 {
146 return(&methods_asn1);
147 }
148
149
150static int asn1_bio_new(BIO *b)
151 {
152 BIO_ASN1_BUF_CTX *ctx;
153 ctx = OPENSSL_malloc(sizeof(BIO_ASN1_BUF_CTX));
154 if (!ctx)
155 return 0;
156 if (!asn1_bio_init(ctx, DEFAULT_ASN1_BUF_SIZE))
157 return 0;
158 b->init = 1;
159 b->ptr = (char *)ctx;
160 b->flags = 0;
161 return 1;
162 }
163
164static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size)
165 {
166 ctx->buf = OPENSSL_malloc(size);
167 if (!ctx->buf)
168 return 0;
169 ctx->bufsize = size;
170 ctx->bufpos = 0;
171 ctx->buflen = 0;
172 ctx->copylen = 0;
173 ctx->asn1_class = V_ASN1_UNIVERSAL;
174 ctx->asn1_tag = V_ASN1_OCTET_STRING;
175 ctx->ex_buf = 0;
176 ctx->ex_pos = 0;
177 ctx->ex_len = 0;
178 ctx->state = ASN1_STATE_START;
179 return 1;
180 }
181
182static int asn1_bio_free(BIO *b)
183 {
184 BIO_ASN1_BUF_CTX *ctx;
185 ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
186 if (ctx == NULL)
187 return 0;
188 if (ctx->buf)
189 OPENSSL_free(ctx->buf);
190 OPENSSL_free(ctx);
191 b->init = 0;
192 b->ptr = NULL;
193 b->flags = 0;
194 return 1;
195 }
196
197static int asn1_bio_write(BIO *b, const char *in , int inl)
198 {
199 BIO_ASN1_BUF_CTX *ctx;
200 int wrmax, wrlen, ret;
201 unsigned char *p;
202 if (!in || (inl < 0) || (b->next_bio == NULL))
203 return 0;
204 ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
205 if (ctx == NULL)
206 return 0;
207
208 wrlen = 0;
209 ret = -1;
210
211 for(;;)
212 {
213 switch (ctx->state)
214 {
215
216 /* Setup prefix data, call it */
217 case ASN1_STATE_START:
218 if (!asn1_bio_setup_ex(b, ctx, ctx->prefix,
219 ASN1_STATE_PRE_COPY, ASN1_STATE_HEADER))
220 return 0;
221 break;
222
223 /* Copy any pre data first */
224 case ASN1_STATE_PRE_COPY:
225
226 ret = asn1_bio_flush_ex(b, ctx, ctx->prefix_free,
227 ASN1_STATE_HEADER);
228
229 if (ret <= 0)
230 goto done;
231
232 break;
233
234 case ASN1_STATE_HEADER:
235 ctx->buflen =
236 ASN1_object_size(0, inl, ctx->asn1_tag) - inl;
237 OPENSSL_assert(ctx->buflen <= ctx->bufsize);
238 p = ctx->buf;
239 ASN1_put_object(&p, 0, inl,
240 ctx->asn1_tag, ctx->asn1_class);
241 ctx->copylen = inl;
242 ctx->state = ASN1_STATE_HEADER_COPY;
243
244 break;
245
246 case ASN1_STATE_HEADER_COPY:
247 ret = BIO_write(b->next_bio,
248 ctx->buf + ctx->bufpos, ctx->buflen);
249 if (ret <= 0)
250 goto done;
251
252 ctx->buflen -= ret;
253 if (ctx->buflen)
254 ctx->bufpos += ret;
255 else
256 {
257 ctx->bufpos = 0;
258 ctx->state = ASN1_STATE_DATA_COPY;
259 }
260
261 break;
262
263 case ASN1_STATE_DATA_COPY:
264
265 if (inl > ctx->copylen)
266 wrmax = ctx->copylen;
267 else
268 wrmax = inl;
269 ret = BIO_write(b->next_bio, in, wrmax);
270 if (ret <= 0)
271 break;
272 wrlen += ret;
273 ctx->copylen -= ret;
274 in += ret;
275 inl -= ret;
276
277 if (ctx->copylen == 0)
278 ctx->state = ASN1_STATE_HEADER;
279
280 if (inl == 0)
281 goto done;
282
283 break;
284
285 default:
286 BIO_clear_retry_flags(b);
287 return 0;
288
289 }
290
291 }
292
293 done:
294 BIO_clear_retry_flags(b);
295 BIO_copy_next_retry(b);
296
297 return (wrlen > 0) ? wrlen : ret;
298
299 }
300
301static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
302 asn1_ps_func *cleanup, asn1_bio_state_t next)
303 {
304 int ret;
305 if (ctx->ex_len <= 0)
306 return 1;
307 for(;;)
308 {
309 ret = BIO_write(b->next_bio, ctx->ex_buf + ctx->ex_pos,
310 ctx->ex_len);
311 if (ret <= 0)
312 break;
313 ctx->ex_len -= ret;
314 if (ctx->ex_len > 0)
315 ctx->ex_pos += ret;
316 else
317 {
318 if(cleanup)
319 cleanup(b, &ctx->ex_buf, &ctx->ex_len,
320 &ctx->ex_arg);
321 ctx->state = next;
322 ctx->ex_pos = 0;
323 break;
324 }
325 }
326 return ret;
327 }
328
329static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
330 asn1_ps_func *setup,
331 asn1_bio_state_t ex_state,
332 asn1_bio_state_t other_state)
333 {
334 if (setup && !setup(b, &ctx->ex_buf, &ctx->ex_len, &ctx->ex_arg))
335 {
336 BIO_clear_retry_flags(b);
337 return 0;
338 }
339 if (ctx->ex_len > 0)
340 ctx->state = ex_state;
341 else
342 ctx->state = other_state;
343 return 1;
344 }
345
346static int asn1_bio_read(BIO *b, char *in , int inl)
347 {
348 if (!b->next_bio)
349 return 0;
350 return BIO_read(b->next_bio, in , inl);
351 }
352
353static int asn1_bio_puts(BIO *b, const char *str)
354 {
355 return asn1_bio_write(b, str, strlen(str));
356 }
357
358static int asn1_bio_gets(BIO *b, char *str, int size)
359 {
360 if (!b->next_bio)
361 return 0;
362 return BIO_gets(b->next_bio, str , size);
363 }
364
365static long asn1_bio_callback_ctrl(BIO *b, int cmd, bio_info_cb *fp)
366 {
367 if (b->next_bio == NULL) return(0);
368 return BIO_callback_ctrl(b->next_bio,cmd,fp);
369 }
370
371static long asn1_bio_ctrl(BIO *b, int cmd, long arg1, void *arg2)
372 {
373 BIO_ASN1_BUF_CTX *ctx;
374 BIO_ASN1_EX_FUNCS *ex_func;
375 long ret = 1;
376 ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
377 if (ctx == NULL)
378 return 0;
379 switch(cmd)
380 {
381
382 case BIO_C_SET_PREFIX:
383 ex_func = arg2;
384 ctx->prefix = ex_func->ex_func;
385 ctx->prefix_free = ex_func->ex_free_func;
386 break;
387
388 case BIO_C_GET_PREFIX:
389 ex_func = arg2;
390 ex_func->ex_func = ctx->prefix;
391 ex_func->ex_free_func = ctx->prefix_free;
392 break;
393
394 case BIO_C_SET_SUFFIX:
395 ex_func = arg2;
396 ctx->suffix = ex_func->ex_func;
397 ctx->suffix_free = ex_func->ex_free_func;
398 break;
399
400 case BIO_C_GET_SUFFIX:
401 ex_func = arg2;
402 ex_func->ex_func = ctx->suffix;
403 ex_func->ex_free_func = ctx->suffix_free;
404 break;
405
406 case BIO_C_SET_EX_ARG:
407 ctx->ex_arg = arg2;
408 break;
409
410 case BIO_C_GET_EX_ARG:
411 *(void **)arg2 = ctx->ex_arg;
412 break;
413
414 case BIO_CTRL_FLUSH:
415 if (!b->next_bio)
416 return 0;
417
418 /* Call post function if possible */
419 if (ctx->state == ASN1_STATE_HEADER)
420 {
421 if (!asn1_bio_setup_ex(b, ctx, ctx->suffix,
422 ASN1_STATE_POST_COPY, ASN1_STATE_DONE))
423 return 0;
424 }
425
426 if (ctx->state == ASN1_STATE_POST_COPY)
427 {
428 ret = asn1_bio_flush_ex(b, ctx, ctx->suffix_free,
429 ASN1_STATE_DONE);
430 if (ret <= 0)
431 return ret;
432 }
433
434 if (ctx->state == ASN1_STATE_DONE)
435 return BIO_ctrl(b->next_bio, cmd, arg1, arg2);
436 else
437 {
438 BIO_clear_retry_flags(b);
439 return 0;
440 }
441 break;
442
443
444 default:
445 if (!b->next_bio)
446 return 0;
447 return BIO_ctrl(b->next_bio, cmd, arg1, arg2);
448
449 }
450
451 return ret;
452 }
453
454static int asn1_bio_set_ex(BIO *b, int cmd,
455 asn1_ps_func *ex_func, asn1_ps_func *ex_free_func)
456 {
457 BIO_ASN1_EX_FUNCS extmp;
458 extmp.ex_func = ex_func;
459 extmp.ex_free_func = ex_free_func;
460 return BIO_ctrl(b, cmd, 0, &extmp);
461 }
462
463static int asn1_bio_get_ex(BIO *b, int cmd,
464 asn1_ps_func **ex_func, asn1_ps_func **ex_free_func)
465 {
466 BIO_ASN1_EX_FUNCS extmp;
467 int ret;
468 ret = BIO_ctrl(b, cmd, 0, &extmp);
469 if (ret > 0)
470 {
471 *ex_func = extmp.ex_func;
472 *ex_free_func = extmp.ex_free_func;
473 }
474 return ret;
475 }
476
477int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, asn1_ps_func *prefix_free)
478 {
479 return asn1_bio_set_ex(b, BIO_C_SET_PREFIX, prefix, prefix_free);
480 }
481
482int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, asn1_ps_func **pprefix_free)
483 {
484 return asn1_bio_get_ex(b, BIO_C_GET_PREFIX, pprefix, pprefix_free);
485 }
486
487int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, asn1_ps_func *suffix_free)
488 {
489 return asn1_bio_set_ex(b, BIO_C_SET_SUFFIX, suffix, suffix_free);
490 }
491
492int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, asn1_ps_func **psuffix_free)
493 {
494 return asn1_bio_get_ex(b, BIO_C_GET_SUFFIX, psuffix, psuffix_free);
495 }
diff --git a/src/lib/libssl/src/crypto/asn1/bio_ndef.c b/src/lib/libssl/src/crypto/asn1/bio_ndef.c
new file mode 100644
index 0000000000..370389b1e6
--- /dev/null
+++ b/src/lib/libssl/src/crypto/asn1/bio_ndef.c
@@ -0,0 +1,246 @@
1/* bio_ndef.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 */
54
55#include <openssl/asn1.h>
56#include <openssl/asn1t.h>
57#include <openssl/bio.h>
58#include <openssl/err.h>
59
60#ifndef OPENSSL_SYSNAME_NETWARE
61#include <memory.h>
62#endif
63#include <stdio.h>
64
65/* Experimental NDEF ASN1 BIO support routines */
66
67/* The usage is quite simple, initialize an ASN1 structure,
68 * get a BIO from it then any data written through the BIO
69 * will end up translated to approptiate format on the fly.
70 * The data is streamed out and does *not* need to be
71 * all held in memory at once.
72 *
73 * When the BIO is flushed the output is finalized and any
74 * signatures etc written out.
75 *
76 * The BIO is a 'proper' BIO and can handle non blocking I/O
77 * correctly.
78 *
79 * The usage is simple. The implementation is *not*...
80 */
81
82/* BIO support data stored in the ASN1 BIO ex_arg */
83
84typedef struct ndef_aux_st
85 {
86 /* ASN1 structure this BIO refers to */
87 ASN1_VALUE *val;
88 const ASN1_ITEM *it;
89 /* Top of the BIO chain */
90 BIO *ndef_bio;
91 /* Output BIO */
92 BIO *out;
93 /* Boundary where content is inserted */
94 unsigned char **boundary;
95 /* DER buffer start */
96 unsigned char *derbuf;
97 } NDEF_SUPPORT;
98
99static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg);
100static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg);
101static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg);
102static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg);
103
104BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it)
105 {
106 NDEF_SUPPORT *ndef_aux = NULL;
107 BIO *asn_bio = NULL;
108 const ASN1_AUX *aux = it->funcs;
109 ASN1_STREAM_ARG sarg;
110
111 if (!aux || !aux->asn1_cb)
112 {
113 ASN1err(ASN1_F_BIO_NEW_NDEF, ASN1_R_STREAMING_NOT_SUPPORTED);
114 return NULL;
115 }
116 ndef_aux = OPENSSL_malloc(sizeof(NDEF_SUPPORT));
117 asn_bio = BIO_new(BIO_f_asn1());
118
119 /* ASN1 bio needs to be next to output BIO */
120
121 out = BIO_push(asn_bio, out);
122
123 if (!ndef_aux || !asn_bio || !out)
124 goto err;
125
126 BIO_asn1_set_prefix(asn_bio, ndef_prefix, ndef_prefix_free);
127 BIO_asn1_set_suffix(asn_bio, ndef_suffix, ndef_suffix_free);
128
129 /* Now let callback prepend any digest, cipher etc BIOs
130 * ASN1 structure needs.
131 */
132
133 sarg.out = out;
134 sarg.ndef_bio = NULL;
135 sarg.boundary = NULL;
136
137 if (aux->asn1_cb(ASN1_OP_STREAM_PRE, &val, it, &sarg) <= 0)
138 goto err;
139
140 ndef_aux->val = val;
141 ndef_aux->it = it;
142 ndef_aux->ndef_bio = sarg.ndef_bio;
143 ndef_aux->boundary = sarg.boundary;
144 ndef_aux->out = out;
145
146 BIO_ctrl(asn_bio, BIO_C_SET_EX_ARG, 0, ndef_aux);
147
148 return sarg.ndef_bio;
149
150 err:
151 if (asn_bio)
152 BIO_free(asn_bio);
153 if (ndef_aux)
154 OPENSSL_free(ndef_aux);
155 return NULL;
156 }
157
158static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg)
159 {
160 NDEF_SUPPORT *ndef_aux;
161 unsigned char *p;
162 int derlen;
163
164 if (!parg)
165 return 0;
166
167 ndef_aux = *(NDEF_SUPPORT **)parg;
168
169 derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it);
170 p = OPENSSL_malloc(derlen);
171 ndef_aux->derbuf = p;
172 *pbuf = p;
173 derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it);
174
175 if (!*ndef_aux->boundary)
176 return 0;
177
178 *plen = *ndef_aux->boundary - *pbuf;
179
180 return 1;
181 }
182
183static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg)
184 {
185 NDEF_SUPPORT *ndef_aux;
186
187 if (!parg)
188 return 0;
189
190 ndef_aux = *(NDEF_SUPPORT **)parg;
191
192 if (ndef_aux->derbuf)
193 OPENSSL_free(ndef_aux->derbuf);
194
195 ndef_aux->derbuf = NULL;
196 *pbuf = NULL;
197 *plen = 0;
198 return 1;
199 }
200
201static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg)
202 {
203 NDEF_SUPPORT **pndef_aux = (NDEF_SUPPORT **)parg;
204 if (!ndef_prefix_free(b, pbuf, plen, parg))
205 return 0;
206 OPENSSL_free(*pndef_aux);
207 *pndef_aux = NULL;
208 return 1;
209 }
210
211static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg)
212 {
213 NDEF_SUPPORT *ndef_aux;
214 unsigned char *p;
215 int derlen;
216 const ASN1_AUX *aux;
217 ASN1_STREAM_ARG sarg;
218
219 if (!parg)
220 return 0;
221
222 ndef_aux = *(NDEF_SUPPORT **)parg;
223
224 aux = ndef_aux->it->funcs;
225
226 /* Finalize structures */
227 sarg.ndef_bio = ndef_aux->ndef_bio;
228 sarg.out = ndef_aux->out;
229 sarg.boundary = ndef_aux->boundary;
230 if (aux->asn1_cb(ASN1_OP_STREAM_POST,
231 &ndef_aux->val, ndef_aux->it, &sarg) <= 0)
232 return 0;
233
234 derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it);
235 p = OPENSSL_malloc(derlen);
236 ndef_aux->derbuf = p;
237 *pbuf = p;
238 derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it);
239
240 if (!*ndef_aux->boundary)
241 return 0;
242 *pbuf = *ndef_aux->boundary;
243 *plen = derlen - (*ndef_aux->boundary - ndef_aux->derbuf);
244
245 return 1;
246 }
diff --git a/src/lib/libssl/src/crypto/asn1/x_nx509.c b/src/lib/libssl/src/crypto/asn1/x_nx509.c
new file mode 100644
index 0000000000..fbd9a22db3
--- /dev/null
+++ b/src/lib/libssl/src/crypto/asn1/x_nx509.c
@@ -0,0 +1,72 @@
1/* x_nx509.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2005.
4 */
5/* ====================================================================
6 * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include <stddef.h>
60#include <openssl/x509.h>
61#include <openssl/asn1.h>
62#include <openssl/asn1t.h>
63
64/* Old netscape certificate wrapper format */
65
66ASN1_SEQUENCE(NETSCAPE_X509) = {
67 ASN1_SIMPLE(NETSCAPE_X509, header, ASN1_OCTET_STRING),
68 ASN1_OPT(NETSCAPE_X509, cert, X509)
69} ASN1_SEQUENCE_END(NETSCAPE_X509)
70
71IMPLEMENT_ASN1_FUNCTIONS(NETSCAPE_X509)
72
diff --git a/src/lib/libssl/src/crypto/bn/asm/alpha-mont.pl b/src/lib/libssl/src/crypto/bn/asm/alpha-mont.pl
new file mode 100644
index 0000000000..7a2cc3173b
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/alpha-mont.pl
@@ -0,0 +1,317 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# On 21264 RSA sign performance improves by 70/35/20/15 percent for
11# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
12# instructed to '-tune host' code with in-line assembler. Other
13# benchmarks improve by 15-20%. To anchor it to something else, the
14# code provides approximately the same performance per GHz as AMD64.
15# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
16# difference.
17
18# int bn_mul_mont(
19$rp="a0"; # BN_ULONG *rp,
20$ap="a1"; # const BN_ULONG *ap,
21$bp="a2"; # const BN_ULONG *bp,
22$np="a3"; # const BN_ULONG *np,
23$n0="a4"; # const BN_ULONG *n0,
24$num="a5"; # int num);
25
26$lo0="t0";
27$hi0="t1";
28$lo1="t2";
29$hi1="t3";
30$aj="t4";
31$bi="t5";
32$nj="t6";
33$tp="t7";
34$alo="t8";
35$ahi="t9";
36$nlo="t10";
37$nhi="t11";
38$tj="t12";
39$i="s3";
40$j="s4";
41$m1="s5";
42
43$code=<<___;
44#include <asm.h>
45#include <regdef.h>
46
47.text
48
49.set noat
50.set noreorder
51
52.globl bn_mul_mont
53.align 5
54.ent bn_mul_mont
55bn_mul_mont:
56 lda sp,-40(sp)
57 stq ra,0(sp)
58 stq s3,8(sp)
59 stq s4,16(sp)
60 stq s5,24(sp)
61 stq fp,32(sp)
62 mov sp,fp
63 .mask 0x0400f000,-40
64 .frame fp,40,ra
65 .prologue 0
66
67 .align 4
68 .set reorder
69 sextl $num,$num
70 mov 0,v0
71 cmplt $num,4,AT
72 bne AT,.Lexit
73
74 ldq $hi0,0($ap) # ap[0]
75 s8addq $num,16,AT
76 ldq $aj,8($ap)
77 subq sp,AT,sp
78 ldq $bi,0($bp) # bp[0]
79 mov -4096,AT
80 ldq $n0,0($n0)
81 and sp,AT,sp
82
83 mulq $hi0,$bi,$lo0
84 ldq $hi1,0($np) # np[0]
85 umulh $hi0,$bi,$hi0
86 ldq $nj,8($np)
87
88 mulq $lo0,$n0,$m1
89
90 mulq $hi1,$m1,$lo1
91 umulh $hi1,$m1,$hi1
92
93 addq $lo1,$lo0,$lo1
94 cmpult $lo1,$lo0,AT
95 addq $hi1,AT,$hi1
96
97 mulq $aj,$bi,$alo
98 mov 2,$j
99 umulh $aj,$bi,$ahi
100 mov sp,$tp
101
102 mulq $nj,$m1,$nlo
103 s8addq $j,$ap,$aj
104 umulh $nj,$m1,$nhi
105 s8addq $j,$np,$nj
106.align 4
107.L1st:
108 .set noreorder
109 ldq $aj,($aj)
110 addl $j,1,$j
111 ldq $nj,($nj)
112 lda $tp,8($tp)
113
114 addq $alo,$hi0,$lo0
115 mulq $aj,$bi,$alo
116 cmpult $lo0,$hi0,AT
117 addq $nlo,$hi1,$lo1
118
119 mulq $nj,$m1,$nlo
120 addq $ahi,AT,$hi0
121 cmpult $lo1,$hi1,v0
122 cmplt $j,$num,$tj
123
124 umulh $aj,$bi,$ahi
125 addq $nhi,v0,$hi1
126 addq $lo1,$lo0,$lo1
127 s8addq $j,$ap,$aj
128
129 umulh $nj,$m1,$nhi
130 cmpult $lo1,$lo0,v0
131 addq $hi1,v0,$hi1
132 s8addq $j,$np,$nj
133
134 stq $lo1,-8($tp)
135 nop
136 unop
137 bne $tj,.L1st
138 .set reorder
139
140 addq $alo,$hi0,$lo0
141 addq $nlo,$hi1,$lo1
142 cmpult $lo0,$hi0,AT
143 cmpult $lo1,$hi1,v0
144 addq $ahi,AT,$hi0
145 addq $nhi,v0,$hi1
146
147 addq $lo1,$lo0,$lo1
148 cmpult $lo1,$lo0,v0
149 addq $hi1,v0,$hi1
150
151 stq $lo1,0($tp)
152
153 addq $hi1,$hi0,$hi1
154 cmpult $hi1,$hi0,AT
155 stq $hi1,8($tp)
156 stq AT,16($tp)
157
158 mov 1,$i
159.align 4
160.Louter:
161 s8addq $i,$bp,$bi
162 ldq $hi0,($ap)
163 ldq $aj,8($ap)
164 ldq $bi,($bi)
165 ldq $hi1,($np)
166 ldq $nj,8($np)
167 ldq $tj,(sp)
168
169 mulq $hi0,$bi,$lo0
170 umulh $hi0,$bi,$hi0
171
172 addq $lo0,$tj,$lo0
173 cmpult $lo0,$tj,AT
174 addq $hi0,AT,$hi0
175
176 mulq $lo0,$n0,$m1
177
178 mulq $hi1,$m1,$lo1
179 umulh $hi1,$m1,$hi1
180
181 addq $lo1,$lo0,$lo1
182 cmpult $lo1,$lo0,AT
183 mov 2,$j
184 addq $hi1,AT,$hi1
185
186 mulq $aj,$bi,$alo
187 mov sp,$tp
188 umulh $aj,$bi,$ahi
189
190 mulq $nj,$m1,$nlo
191 s8addq $j,$ap,$aj
192 umulh $nj,$m1,$nhi
193.align 4
194.Linner:
195 .set noreorder
196 ldq $tj,8($tp) #L0
197 nop #U1
198 ldq $aj,($aj) #L1
199 s8addq $j,$np,$nj #U0
200
201 ldq $nj,($nj) #L0
202 nop #U1
203 addq $alo,$hi0,$lo0 #L1
204 lda $tp,8($tp)
205
206 mulq $aj,$bi,$alo #U1
207 cmpult $lo0,$hi0,AT #L0
208 addq $nlo,$hi1,$lo1 #L1
209 addl $j,1,$j
210
211 mulq $nj,$m1,$nlo #U1
212 addq $ahi,AT,$hi0 #L0
213 addq $lo0,$tj,$lo0 #L1
214 cmpult $lo1,$hi1,v0 #U0
215
216 umulh $aj,$bi,$ahi #U1
217 cmpult $lo0,$tj,AT #L0
218 addq $lo1,$lo0,$lo1 #L1
219 addq $nhi,v0,$hi1 #U0
220
221 umulh $nj,$m1,$nhi #U1
222 s8addq $j,$ap,$aj #L0
223 cmpult $lo1,$lo0,v0 #L1
224 cmplt $j,$num,$tj #U0 # borrow $tj
225
226 addq $hi0,AT,$hi0 #L0
227 addq $hi1,v0,$hi1 #U1
228 stq $lo1,-8($tp) #L1
229 bne $tj,.Linner #U0
230 .set reorder
231
232 ldq $tj,8($tp)
233 addq $alo,$hi0,$lo0
234 addq $nlo,$hi1,$lo1
235 cmpult $lo0,$hi0,AT
236 cmpult $lo1,$hi1,v0
237 addq $ahi,AT,$hi0
238 addq $nhi,v0,$hi1
239
240 addq $lo0,$tj,$lo0
241 cmpult $lo0,$tj,AT
242 addq $hi0,AT,$hi0
243
244 ldq $tj,16($tp)
245 addq $lo1,$lo0,$j
246 cmpult $j,$lo0,v0
247 addq $hi1,v0,$hi1
248
249 addq $hi1,$hi0,$lo1
250 stq $j,($tp)
251 cmpult $lo1,$hi0,$hi1
252 addq $lo1,$tj,$lo1
253 cmpult $lo1,$tj,AT
254 addl $i,1,$i
255 addq $hi1,AT,$hi1
256 stq $lo1,8($tp)
257 cmplt $i,$num,$tj # borrow $tj
258 stq $hi1,16($tp)
259 bne $tj,.Louter
260
261 s8addq $num,sp,$tj # &tp[num]
262 mov $rp,$bp # put rp aside
263 mov sp,$tp
264 mov sp,$ap
265 mov 0,$hi0 # clear borrow bit
266
267.align 4
268.Lsub: ldq $lo0,($tp)
269 ldq $lo1,($np)
270 lda $tp,8($tp)
271 lda $np,8($np)
272 subq $lo0,$lo1,$lo1 # tp[i]-np[i]
273 cmpult $lo0,$lo1,AT
274 subq $lo1,$hi0,$lo0
275 cmpult $lo1,$lo0,$hi0
276 or $hi0,AT,$hi0
277 stq $lo0,($rp)
278 cmpult $tp,$tj,v0
279 lda $rp,8($rp)
280 bne v0,.Lsub
281
282 subq $hi1,$hi0,$hi0 # handle upmost overflow bit
283 mov sp,$tp
284 mov $bp,$rp # restore rp
285
286 and sp,$hi0,$ap
287 bic $bp,$hi0,$bp
288 bis $bp,$ap,$ap # ap=borrow?tp:rp
289
290.align 4
291.Lcopy: ldq $aj,($ap) # copy or in-place refresh
292 lda $tp,8($tp)
293 lda $rp,8($rp)
294 lda $ap,8($ap)
295 stq zero,-8($tp) # zap tp
296 cmpult $tp,$tj,AT
297 stq $aj,-8($rp)
298 bne AT,.Lcopy
299 mov 1,v0
300
301.Lexit:
302 .set noreorder
303 mov fp,sp
304 /*ldq ra,0(sp)*/
305 ldq s3,8(sp)
306 ldq s4,16(sp)
307 ldq s5,24(sp)
308 ldq fp,32(sp)
309 lda sp,40(sp)
310 ret (ra)
311.end bn_mul_mont
312.rdata
313.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
314___
315
316print $code;
317close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl b/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl
new file mode 100644
index 0000000000..05d5dc1a48
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl
@@ -0,0 +1,200 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# January 2007.
11
12# Montgomery multiplication for ARMv4.
13#
14# Performance improvement naturally varies among CPU implementations
15# and compilers. The code was observed to provide +65-35% improvement
16# [depending on key length, less for longer keys] on ARM920T, and
17# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
18# base and compiler generated code with in-lined umull and even umlal
19# instructions. The latter means that this code didn't really have an
20# "advantage" of utilizing some "secret" instruction.
21#
22# The code is interoperable with Thumb ISA and is rather compact, less
23# than 1/2KB. Windows CE port would be trivial, as it's exclusively
24# about decorations, ABI and instruction syntax are identical.
25
26$num="r0"; # starts as num argument, but holds &tp[num-1]
27$ap="r1";
28$bp="r2"; $bi="r2"; $rp="r2";
29$np="r3";
30$tp="r4";
31$aj="r5";
32$nj="r6";
33$tj="r7";
34$n0="r8";
35########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
36$alo="r10"; # sl, gcc uses it to keep @GOT
37$ahi="r11"; # fp
38$nlo="r12"; # ip
39########### # r13 is stack pointer
40$nhi="r14"; # lr
41########### # r15 is program counter
42
43#### argument block layout relative to &tp[num-1], a.k.a. $num
44$_rp="$num,#12*4";
45# ap permanently resides in r1
46$_bp="$num,#13*4";
47# np permanently resides in r3
48$_n0="$num,#14*4";
49$_num="$num,#15*4"; $_bpend=$_num;
50
51$code=<<___;
52.text
53
54.global bn_mul_mont
55.type bn_mul_mont,%function
56
57.align 2
58bn_mul_mont:
59 stmdb sp!,{r0,r2} @ sp points at argument block
60 ldr $num,[sp,#3*4] @ load num
61 cmp $num,#2
62 movlt r0,#0
63 addlt sp,sp,#2*4
64 blt .Labrt
65
66 stmdb sp!,{r4-r12,lr} @ save 10 registers
67
68 mov $num,$num,lsl#2 @ rescale $num for byte count
69 sub sp,sp,$num @ alloca(4*num)
70 sub sp,sp,#4 @ +extra dword
71 sub $num,$num,#4 @ "num=num-1"
72 add $tp,$bp,$num @ &bp[num-1]
73
74 add $num,sp,$num @ $num to point at &tp[num-1]
75 ldr $n0,[$_n0] @ &n0
76 ldr $bi,[$bp] @ bp[0]
77 ldr $aj,[$ap],#4 @ ap[0],ap++
78 ldr $nj,[$np],#4 @ np[0],np++
79 ldr $n0,[$n0] @ *n0
80 str $tp,[$_bpend] @ save &bp[num]
81
82 umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
83 str $n0,[$_n0] @ save n0 value
84 mul $n0,$alo,$n0 @ "tp[0]"*n0
85 mov $nlo,#0
86 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
87 mov $tp,sp
88
89.L1st:
90 ldr $aj,[$ap],#4 @ ap[j],ap++
91 mov $alo,$ahi
92 mov $ahi,#0
93 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
94 ldr $nj,[$np],#4 @ np[j],np++
95 mov $nhi,#0
96 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
97 adds $nlo,$nlo,$alo
98 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
99 adc $nlo,$nhi,#0
100 cmp $tp,$num
101 bne .L1st
102
103 adds $nlo,$nlo,$ahi
104 mov $nhi,#0
105 adc $nhi,$nhi,#0
106 ldr $tp,[$_bp] @ restore bp
107 str $nlo,[$num] @ tp[num-1]=
108 ldr $n0,[$_n0] @ restore n0
109 str $nhi,[$num,#4] @ tp[num]=
110
111.Louter:
112 sub $tj,$num,sp @ "original" $num-1 value
113 sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
114 sub $np,$np,$tj @ "rewind" np to &np[1]
115 ldr $bi,[$tp,#4]! @ *(++bp)
116 ldr $aj,[$ap,#-4] @ ap[0]
117 ldr $nj,[$np,#-4] @ np[0]
118 ldr $alo,[sp] @ tp[0]
119 ldr $tj,[sp,#4] @ tp[1]
120
121 mov $ahi,#0
122 umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
123 str $tp,[$_bp] @ save bp
124 mul $n0,$alo,$n0
125 mov $nlo,#0
126 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
127 mov $tp,sp
128
129.Linner:
130 ldr $aj,[$ap],#4 @ ap[j],ap++
131 adds $alo,$ahi,$tj @ +=tp[j]
132 mov $ahi,#0
133 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
134 ldr $nj,[$np],#4 @ np[j],np++
135 mov $nhi,#0
136 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
137 ldr $tj,[$tp,#8] @ tp[j+1]
138 adc $ahi,$ahi,#0
139 adds $nlo,$nlo,$alo
140 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
141 adc $nlo,$nhi,#0
142 cmp $tp,$num
143 bne .Linner
144
145 adds $nlo,$nlo,$ahi
146 mov $nhi,#0
147 adc $nhi,$nhi,#0
148 adds $nlo,$nlo,$tj
149 adc $nhi,$nhi,#0
150 ldr $tp,[$_bp] @ restore bp
151 ldr $tj,[$_bpend] @ restore &bp[num]
152 str $nlo,[$num] @ tp[num-1]=
153 ldr $n0,[$_n0] @ restore n0
154 str $nhi,[$num,#4] @ tp[num]=
155
156 cmp $tp,$tj
157 bne .Louter
158
159 ldr $rp,[$_rp] @ pull rp
160 add $num,$num,#4 @ $num to point at &tp[num]
161 sub $aj,$num,sp @ "original" num value
162 mov $tp,sp @ "rewind" $tp
163 mov $ap,$tp @ "borrow" $ap
164 sub $np,$np,$aj @ "rewind" $np to &np[0]
165
166 subs $tj,$tj,$tj @ "clear" carry flag
167.Lsub: ldr $tj,[$tp],#4
168 ldr $nj,[$np],#4
169 sbcs $tj,$tj,$nj @ tp[j]-np[j]
170 str $tj,[$rp],#4 @ rp[j]=
171 teq $tp,$num @ preserve carry
172 bne .Lsub
173 sbcs $nhi,$nhi,#0 @ upmost carry
174 mov $tp,sp @ "rewind" $tp
175 sub $rp,$rp,$aj @ "rewind" $rp
176
177 and $ap,$tp,$nhi
178 bic $np,$rp,$nhi
179 orr $ap,$ap,$np @ ap=borrow?tp:rp
180
181.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh
182 str sp,[$tp],#4 @ zap tp
183 str $tj,[$rp],#4
184 cmp $tp,$num
185 bne .Lcopy
186
187 add sp,$num,#4 @ skip over tp[num+1]
188 ldmia sp!,{r4-r12,lr} @ restore registers
189 add sp,sp,#2*4 @ skip over {r0,r2}
190 mov r0,#1
191.Labrt: tst lr,#1
192 moveq pc,lr @ be binary compatible with V4, yet
193 bx lr @ interoperable with Thumb ISA:-)
194.size bn_mul_mont,.-bn_mul_mont
195.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
196___
197
198$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
199print $code;
200close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl b/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl
new file mode 100644
index 0000000000..8f9156e02a
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl
@@ -0,0 +1,327 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# This module doesn't present direct interest for OpenSSL, because it
11# doesn't provide better performance for longer keys. While 512-bit
12# RSA private key operations are 40% faster, 1024-bit ones are hardly
13# faster at all, while longer key operations are slower by up to 20%.
14# It might be of interest to embedded system developers though, as
15# it's smaller than 1KB, yet offers ~3x improvement over compiler
16# generated code.
17#
18# The module targets N32 and N64 MIPS ABIs and currently is a bit
19# IRIX-centric, i.e. is likely to require adaptation for other OSes.
20
21# int bn_mul_mont(
22$rp="a0"; # BN_ULONG *rp,
23$ap="a1"; # const BN_ULONG *ap,
24$bp="a2"; # const BN_ULONG *bp,
25$np="a3"; # const BN_ULONG *np,
26$n0="a4"; # const BN_ULONG *n0,
27$num="a5"; # int num);
28
29$lo0="a6";
30$hi0="a7";
31$lo1="v0";
32$hi1="v1";
33$aj="t0";
34$bi="t1";
35$nj="t2";
36$tp="t3";
37$alo="s0";
38$ahi="s1";
39$nlo="s2";
40$nhi="s3";
41$tj="s4";
42$i="s5";
43$j="s6";
44$fp="t8";
45$m1="t9";
46
47$FRAME=8*(2+8);
48
49$code=<<___;
50#include <asm.h>
51#include <regdef.h>
52
53.text
54
55.set noat
56.set reorder
57
58.align 5
59.globl bn_mul_mont
60.ent bn_mul_mont
61bn_mul_mont:
62 .set noreorder
63 PTR_SUB sp,64
64 move $fp,sp
65 .frame $fp,64,ra
66 slt AT,$num,4
67 li v0,0
68 beqzl AT,.Lproceed
69 nop
70 jr ra
71 PTR_ADD sp,$fp,64
72 .set reorder
73.align 5
74.Lproceed:
75 ld $n0,0($n0)
76 ld $bi,0($bp) # bp[0]
77 ld $aj,0($ap) # ap[0]
78 ld $nj,0($np) # np[0]
79 PTR_SUB sp,16 # place for two extra words
80 sll $num,3
81 li AT,-4096
82 PTR_SUB sp,$num
83 and sp,AT
84
85 sd s0,0($fp)
86 sd s1,8($fp)
87 sd s2,16($fp)
88 sd s3,24($fp)
89 sd s4,32($fp)
90 sd s5,40($fp)
91 sd s6,48($fp)
92 sd s7,56($fp)
93
94 dmultu $aj,$bi
95 ld $alo,8($ap)
96 ld $nlo,8($np)
97 mflo $lo0
98 mfhi $hi0
99 dmultu $lo0,$n0
100 mflo $m1
101
102 dmultu $alo,$bi
103 mflo $alo
104 mfhi $ahi
105
106 dmultu $nj,$m1
107 mflo $lo1
108 mfhi $hi1
109 dmultu $nlo,$m1
110 daddu $lo1,$lo0
111 sltu AT,$lo1,$lo0
112 daddu $hi1,AT
113 mflo $nlo
114 mfhi $nhi
115
116 move $tp,sp
117 li $j,16
118.align 4
119.L1st:
120 .set noreorder
121 PTR_ADD $aj,$ap,$j
122 ld $aj,($aj)
123 PTR_ADD $nj,$np,$j
124 ld $nj,($nj)
125
126 dmultu $aj,$bi
127 daddu $lo0,$alo,$hi0
128 daddu $lo1,$nlo,$hi1
129 sltu AT,$lo0,$hi0
130 sltu s7,$lo1,$hi1
131 daddu $hi0,$ahi,AT
132 daddu $hi1,$nhi,s7
133 mflo $alo
134 mfhi $ahi
135
136 daddu $lo1,$lo0
137 sltu AT,$lo1,$lo0
138 dmultu $nj,$m1
139 daddu $hi1,AT
140 addu $j,8
141 sd $lo1,($tp)
142 sltu s7,$j,$num
143 mflo $nlo
144 mfhi $nhi
145
146 bnez s7,.L1st
147 PTR_ADD $tp,8
148 .set reorder
149
150 daddu $lo0,$alo,$hi0
151 sltu AT,$lo0,$hi0
152 daddu $hi0,$ahi,AT
153
154 daddu $lo1,$nlo,$hi1
155 sltu s7,$lo1,$hi1
156 daddu $hi1,$nhi,s7
157 daddu $lo1,$lo0
158 sltu AT,$lo1,$lo0
159 daddu $hi1,AT
160
161 sd $lo1,($tp)
162
163 daddu $hi1,$hi0
164 sltu AT,$hi1,$hi0
165 sd $hi1,8($tp)
166 sd AT,16($tp)
167
168 li $i,8
169.align 4
170.Louter:
171 PTR_ADD $bi,$bp,$i
172 ld $bi,($bi)
173 ld $aj,($ap)
174 ld $alo,8($ap)
175 ld $tj,(sp)
176
177 dmultu $aj,$bi
178 ld $nj,($np)
179 ld $nlo,8($np)
180 mflo $lo0
181 mfhi $hi0
182 daddu $lo0,$tj
183 dmultu $lo0,$n0
184 sltu AT,$lo0,$tj
185 daddu $hi0,AT
186 mflo $m1
187
188 dmultu $alo,$bi
189 mflo $alo
190 mfhi $ahi
191
192 dmultu $nj,$m1
193 mflo $lo1
194 mfhi $hi1
195
196 dmultu $nlo,$m1
197 daddu $lo1,$lo0
198 sltu AT,$lo1,$lo0
199 daddu $hi1,AT
200 mflo $nlo
201 mfhi $nhi
202
203 move $tp,sp
204 li $j,16
205 ld $tj,8($tp)
206.align 4
207.Linner:
208 .set noreorder
209 PTR_ADD $aj,$ap,$j
210 ld $aj,($aj)
211 PTR_ADD $nj,$np,$j
212 ld $nj,($nj)
213
214 dmultu $aj,$bi
215 daddu $lo0,$alo,$hi0
216 daddu $lo1,$nlo,$hi1
217 sltu AT,$lo0,$hi0
218 sltu s7,$lo1,$hi1
219 daddu $hi0,$ahi,AT
220 daddu $hi1,$nhi,s7
221 mflo $alo
222 mfhi $ahi
223
224 daddu $lo0,$tj
225 addu $j,8
226 dmultu $nj,$m1
227 sltu AT,$lo0,$tj
228 daddu $lo1,$lo0
229 daddu $hi0,AT
230 sltu s7,$lo1,$lo0
231 ld $tj,16($tp)
232 daddu $hi1,s7
233 sltu AT,$j,$num
234 mflo $nlo
235 mfhi $nhi
236 sd $lo1,($tp)
237 bnez AT,.Linner
238 PTR_ADD $tp,8
239 .set reorder
240
241 daddu $lo0,$alo,$hi0
242 sltu AT,$lo0,$hi0
243 daddu $hi0,$ahi,AT
244 daddu $lo0,$tj
245 sltu s7,$lo0,$tj
246 daddu $hi0,s7
247
248 ld $tj,16($tp)
249 daddu $lo1,$nlo,$hi1
250 sltu AT,$lo1,$hi1
251 daddu $hi1,$nhi,AT
252 daddu $lo1,$lo0
253 sltu s7,$lo1,$lo0
254 daddu $hi1,s7
255 sd $lo1,($tp)
256
257 daddu $lo1,$hi1,$hi0
258 sltu $hi1,$lo1,$hi0
259 daddu $lo1,$tj
260 sltu AT,$lo1,$tj
261 daddu $hi1,AT
262 sd $lo1,8($tp)
263 sd $hi1,16($tp)
264
265 addu $i,8
266 sltu s7,$i,$num
267 bnez s7,.Louter
268
269 .set noreorder
270 PTR_ADD $tj,sp,$num # &tp[num]
271 move $tp,sp
272 move $ap,sp
273 li $hi0,0 # clear borrow bit
274
275.align 4
276.Lsub: ld $lo0,($tp)
277 ld $lo1,($np)
278 PTR_ADD $tp,8
279 PTR_ADD $np,8
280 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i]
281 sgtu AT,$lo1,$lo0
282 dsubu $lo0,$lo1,$hi0
283 sgtu $hi0,$lo0,$lo1
284 sd $lo0,($rp)
285 or $hi0,AT
286 sltu AT,$tp,$tj
287 bnez AT,.Lsub
288 PTR_ADD $rp,8
289
290 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit
291 move $tp,sp
292 PTR_SUB $rp,$num # restore rp
293 not $hi1,$hi0
294
295 and $ap,$hi0,sp
296 and $bp,$hi1,$rp
297 or $ap,$ap,$bp # ap=borrow?tp:rp
298
299.align 4
300.Lcopy: ld $aj,($ap)
301 PTR_ADD $ap,8
302 PTR_ADD $tp,8
303 sd zero,-8($tp)
304 sltu AT,$tp,$tj
305 sd $aj,($rp)
306 bnez AT,.Lcopy
307 PTR_ADD $rp,8
308
309 ld s0,0($fp)
310 ld s1,8($fp)
311 ld s2,16($fp)
312 ld s3,24($fp)
313 ld s4,32($fp)
314 ld s5,40($fp)
315 ld s6,48($fp)
316 ld s7,56($fp)
317 li v0,1
318 jr ra
319 PTR_ADD sp,$fp,64
320 .set reorder
321END(bn_mul_mont)
322.rdata
323.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
324___
325
326print $code;
327close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl b/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl
new file mode 100644
index 0000000000..7849eae959
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl
@@ -0,0 +1,323 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2006
11
12# "Teaser" Montgomery multiplication module for PowerPC. It's possible
13# to gain a bit more by modulo-scheduling outer loop, then dedicated
14# squaring procedure should give further 20% and code can be adapted
15# for 32-bit application running on 64-bit CPU. As for the latter.
16# It won't be able to achieve "native" 64-bit performance, because in
17# 32-bit application context every addc instruction will have to be
18# expanded as addc, twice right shift by 32 and finally adde, etc.
19# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
20# for 64-bit application running on PPC970/G5 is:
21#
22# 512-bit +65%
23# 1024-bit +35%
24# 2048-bit +18%
25# 4096-bit +4%
26
27$flavour = shift;
28
29if ($flavour =~ /32/) {
30 $BITS= 32;
31 $BNSZ= $BITS/8;
32 $SIZE_T=4;
33 $RZONE= 224;
34 $FRAME= $SIZE_T*16;
35
36 $LD= "lwz"; # load
37 $LDU= "lwzu"; # load and update
38 $LDX= "lwzx"; # load indexed
39 $ST= "stw"; # store
40 $STU= "stwu"; # store and update
41 $STX= "stwx"; # store indexed
42 $STUX= "stwux"; # store indexed and update
43 $UMULL= "mullw"; # unsigned multiply low
44 $UMULH= "mulhwu"; # unsigned multiply high
45 $UCMP= "cmplw"; # unsigned compare
46 $SHRI= "srwi"; # unsigned shift right by immediate
47 $PUSH= $ST;
48 $POP= $LD;
49} elsif ($flavour =~ /64/) {
50 $BITS= 64;
51 $BNSZ= $BITS/8;
52 $SIZE_T=8;
53 $RZONE= 288;
54 $FRAME= $SIZE_T*16;
55
56 # same as above, but 64-bit mnemonics...
57 $LD= "ld"; # load
58 $LDU= "ldu"; # load and update
59 $LDX= "ldx"; # load indexed
60 $ST= "std"; # store
61 $STU= "stdu"; # store and update
62 $STX= "stdx"; # store indexed
63 $STUX= "stdux"; # store indexed and update
64 $UMULL= "mulld"; # unsigned multiply low
65 $UMULH= "mulhdu"; # unsigned multiply high
66 $UCMP= "cmpld"; # unsigned compare
67 $SHRI= "srdi"; # unsigned shift right by immediate
68 $PUSH= $ST;
69 $POP= $LD;
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$sp="r1";
80$toc="r2";
81$rp="r3"; $ovf="r3";
82$ap="r4";
83$bp="r5";
84$np="r6";
85$n0="r7";
86$num="r8";
87$rp="r9"; # $rp is reassigned
88$aj="r10";
89$nj="r11";
90$tj="r12";
91# non-volatile registers
92$i="r14";
93$j="r15";
94$tp="r16";
95$m0="r17";
96$m1="r18";
97$lo0="r19";
98$hi0="r20";
99$lo1="r21";
100$hi1="r22";
101$alo="r23";
102$ahi="r24";
103$nlo="r25";
104#
105$nhi="r0";
106
107$code=<<___;
108.machine "any"
109.text
110
111.globl .bn_mul_mont
112.align 4
113.bn_mul_mont:
114 cmpwi $num,4
115 mr $rp,r3 ; $rp is reassigned
116 li r3,0
117 bltlr
118
119 slwi $num,$num,`log($BNSZ)/log(2)`
120 li $tj,-4096
121 addi $ovf,$num,`$FRAME+$RZONE`
122 subf $ovf,$ovf,$sp ; $sp-$ovf
123 and $ovf,$ovf,$tj ; minimize TLB usage
124 subf $ovf,$sp,$ovf ; $ovf-$sp
125 srwi $num,$num,`log($BNSZ)/log(2)`
126 $STUX $sp,$sp,$ovf
127
128 $PUSH r14,`4*$SIZE_T`($sp)
129 $PUSH r15,`5*$SIZE_T`($sp)
130 $PUSH r16,`6*$SIZE_T`($sp)
131 $PUSH r17,`7*$SIZE_T`($sp)
132 $PUSH r18,`8*$SIZE_T`($sp)
133 $PUSH r19,`9*$SIZE_T`($sp)
134 $PUSH r20,`10*$SIZE_T`($sp)
135 $PUSH r21,`11*$SIZE_T`($sp)
136 $PUSH r22,`12*$SIZE_T`($sp)
137 $PUSH r23,`13*$SIZE_T`($sp)
138 $PUSH r24,`14*$SIZE_T`($sp)
139 $PUSH r25,`15*$SIZE_T`($sp)
140
141 $LD $n0,0($n0) ; pull n0[0] value
142 addi $num,$num,-2 ; adjust $num for counter register
143
144 $LD $m0,0($bp) ; m0=bp[0]
145 $LD $aj,0($ap) ; ap[0]
146 addi $tp,$sp,$FRAME
147 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
148 $UMULH $hi0,$aj,$m0
149
150 $LD $aj,$BNSZ($ap) ; ap[1]
151 $LD $nj,0($np) ; np[0]
152
153 $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
154
155 $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
156 $UMULH $ahi,$aj,$m0
157
158 $UMULL $lo1,$nj,$m1 ; np[0]*m1
159 $UMULH $hi1,$nj,$m1
160 $LD $nj,$BNSZ($np) ; np[1]
161 addc $lo1,$lo1,$lo0
162 addze $hi1,$hi1
163
164 $UMULL $nlo,$nj,$m1 ; np[1]*m1
165 $UMULH $nhi,$nj,$m1
166
167 mtctr $num
168 li $j,`2*$BNSZ`
169.align 4
170L1st:
171 $LDX $aj,$ap,$j ; ap[j]
172 addc $lo0,$alo,$hi0
173 $LDX $nj,$np,$j ; np[j]
174 addze $hi0,$ahi
175 $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
176 addc $lo1,$nlo,$hi1
177 $UMULH $ahi,$aj,$m0
178 addze $hi1,$nhi
179 $UMULL $nlo,$nj,$m1 ; np[j]*m1
180 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
181 $UMULH $nhi,$nj,$m1
182 addze $hi1,$hi1
183 $ST $lo1,0($tp) ; tp[j-1]
184
185 addi $j,$j,$BNSZ ; j++
186 addi $tp,$tp,$BNSZ ; tp++
187 bdnz- L1st
188;L1st
189 addc $lo0,$alo,$hi0
190 addze $hi0,$ahi
191
192 addc $lo1,$nlo,$hi1
193 addze $hi1,$nhi
194 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
195 addze $hi1,$hi1
196 $ST $lo1,0($tp) ; tp[j-1]
197
198 li $ovf,0
199 addc $hi1,$hi1,$hi0
200 addze $ovf,$ovf ; upmost overflow bit
201 $ST $hi1,$BNSZ($tp)
202
203 li $i,$BNSZ
204.align 4
205Louter:
206 $LDX $m0,$bp,$i ; m0=bp[i]
207 $LD $aj,0($ap) ; ap[0]
208 addi $tp,$sp,$FRAME
209 $LD $tj,$FRAME($sp) ; tp[0]
210 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
211 $UMULH $hi0,$aj,$m0
212 $LD $aj,$BNSZ($ap) ; ap[1]
213 $LD $nj,0($np) ; np[0]
214 addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
215 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
216 addze $hi0,$hi0
217 $UMULL $m1,$lo0,$n0 ; tp[0]*n0
218 $UMULH $ahi,$aj,$m0
219 $UMULL $lo1,$nj,$m1 ; np[0]*m1
220 $UMULH $hi1,$nj,$m1
221 $LD $nj,$BNSZ($np) ; np[1]
222 addc $lo1,$lo1,$lo0
223 $UMULL $nlo,$nj,$m1 ; np[1]*m1
224 addze $hi1,$hi1
225 $UMULH $nhi,$nj,$m1
226
227 mtctr $num
228 li $j,`2*$BNSZ`
229.align 4
230Linner:
231 $LDX $aj,$ap,$j ; ap[j]
232 addc $lo0,$alo,$hi0
233 $LD $tj,$BNSZ($tp) ; tp[j]
234 addze $hi0,$ahi
235 $LDX $nj,$np,$j ; np[j]
236 addc $lo1,$nlo,$hi1
237 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
238 addze $hi1,$nhi
239 $UMULH $ahi,$aj,$m0
240 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
241 $UMULL $nlo,$nj,$m1 ; np[j]*m1
242 addze $hi0,$hi0
243 $UMULH $nhi,$nj,$m1
244 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
245 addi $j,$j,$BNSZ ; j++
246 addze $hi1,$hi1
247 $ST $lo1,0($tp) ; tp[j-1]
248 addi $tp,$tp,$BNSZ ; tp++
249 bdnz- Linner
250;Linner
251 $LD $tj,$BNSZ($tp) ; tp[j]
252 addc $lo0,$alo,$hi0
253 addze $hi0,$ahi
254 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
255 addze $hi0,$hi0
256
257 addc $lo1,$nlo,$hi1
258 addze $hi1,$nhi
259 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
260 addze $hi1,$hi1
261 $ST $lo1,0($tp) ; tp[j-1]
262
263 addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
264 li $ovf,0
265 adde $hi1,$hi1,$hi0
266 addze $ovf,$ovf
267 $ST $hi1,$BNSZ($tp)
268;
269 slwi $tj,$num,`log($BNSZ)/log(2)`
270 $UCMP $i,$tj
271 addi $i,$i,$BNSZ
272 ble- Louter
273
274 addi $num,$num,2 ; restore $num
275 subfc $j,$j,$j ; j=0 and "clear" XER[CA]
276 addi $tp,$sp,$FRAME
277 mtctr $num
278
279.align 4
280Lsub: $LDX $tj,$tp,$j
281 $LDX $nj,$np,$j
282 subfe $aj,$nj,$tj ; tp[j]-np[j]
283 $STX $aj,$rp,$j
284 addi $j,$j,$BNSZ
285 bdnz- Lsub
286
287 li $j,0
288 mtctr $num
289 subfe $ovf,$j,$ovf ; handle upmost overflow bit
290 and $ap,$tp,$ovf
291 andc $np,$rp,$ovf
292 or $ap,$ap,$np ; ap=borrow?tp:rp
293
294.align 4
295Lcopy: ; copy or in-place refresh
296 $LDX $tj,$ap,$j
297 $STX $tj,$rp,$j
298 $STX $j,$tp,$j ; zap at once
299 addi $j,$j,$BNSZ
300 bdnz- Lcopy
301
302 $POP r14,`4*$SIZE_T`($sp)
303 $POP r15,`5*$SIZE_T`($sp)
304 $POP r16,`6*$SIZE_T`($sp)
305 $POP r17,`7*$SIZE_T`($sp)
306 $POP r18,`8*$SIZE_T`($sp)
307 $POP r19,`9*$SIZE_T`($sp)
308 $POP r20,`10*$SIZE_T`($sp)
309 $POP r21,`11*$SIZE_T`($sp)
310 $POP r22,`12*$SIZE_T`($sp)
311 $POP r23,`13*$SIZE_T`($sp)
312 $POP r24,`14*$SIZE_T`($sp)
313 $POP r25,`15*$SIZE_T`($sp)
314 $POP $sp,0($sp)
315 li r3,1
316 blr
317 .long 0
318.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
319___
320
321$code =~ s/\`([^\`]*)\`/eval $1/gem;
322print $code;
323close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl b/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl
new file mode 100644
index 0000000000..3449b35855
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl
@@ -0,0 +1,918 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2007
11
12# The reason for undertaken effort is basically following. Even though
13# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI
14# performance was observed to be less than impressive, essentially as
15# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope.
16# Well, it's not surprising that IBM had to make some sacrifices to
17# boost the clock frequency that much, but no overall improvement?
18# Having observed how much difference did switching to FPU make on
19# UltraSPARC, playing same stunt on Power 6 appeared appropriate...
20# Unfortunately the resulting performance improvement is not as
21# impressive, ~30%, and in absolute terms is still very far from what
22# one would expect from 4.7GHz CPU. There is a chance that I'm doing
23# something wrong, but in the lack of assembler level micro-profiling
24# data or at least decent platform guide I can't tell... Or better
25# results might be achieved with VMX... Anyway, this module provides
26# *worse* performance on other PowerPC implementations, ~40-15% slower
27# on PPC970 depending on key length and ~40% slower on Power 5 for all
28# key lengths. As it's obviously inappropriate as "best all-round"
29# alternative, it has to be complemented with run-time CPU family
30# detection. Oh! It should also be noted that unlike other PowerPC
31# implementation IALU ppc-mont.pl module performs *suboptimaly* on
32# >=1024-bit key lengths on Power 6. It should also be noted that
33# *everything* said so far applies to 64-bit builds! As far as 32-bit
34# application executed on 64-bit CPU goes, this module is likely to
35# become preferred choice, because it's easy to adapt it for such
36# case and *is* faster than 32-bit ppc-mont.pl on *all* processors.
37
38# February 2008
39
40# Micro-profiling assisted optimization results in ~15% improvement
41# over original ppc64-mont.pl version, or overall ~50% improvement
42# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same
43# Power 6 CPU, this module is 5-150% faster depending on key length,
44# [hereafter] more for longer keys. But if compared to ppc-mont.pl
45# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
46# in absolute terms, but it's apparently the way Power 6 is...
47
48$flavour = shift;
49
50if ($flavour =~ /32/) {
51 $SIZE_T=4;
52 $RZONE= 224;
53 $FRAME= $SIZE_T*12+8*12;
54 $fname= "bn_mul_mont_ppc64";
55
56 $STUX= "stwux"; # store indexed and update
57 $PUSH= "stw";
58 $POP= "lwz";
59 die "not implemented yet";
60} elsif ($flavour =~ /64/) {
61 $SIZE_T=8;
62 $RZONE= 288;
63 $FRAME= $SIZE_T*12+8*12;
64 $fname= "bn_mul_mont";
65
66 # same as above, but 64-bit mnemonics...
67 $STUX= "stdux"; # store indexed and update
68 $PUSH= "std";
69 $POP= "ld";
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$FRAME=($FRAME+63)&~63;
80$TRANSFER=16*8;
81
82$carry="r0";
83$sp="r1";
84$toc="r2";
85$rp="r3"; $ovf="r3";
86$ap="r4";
87$bp="r5";
88$np="r6";
89$n0="r7";
90$num="r8";
91$rp="r9"; # $rp is reassigned
92$tp="r10";
93$j="r11";
94$i="r12";
95# non-volatile registers
96$nap_d="r14"; # interleaved ap and np in double format
97$a0="r15"; # ap[0]
98$t0="r16"; # temporary registers
99$t1="r17";
100$t2="r18";
101$t3="r19";
102$t4="r20";
103$t5="r21";
104$t6="r22";
105$t7="r23";
106
107# PPC offers enough register bank capacity to unroll inner loops twice
108#
109# ..A3A2A1A0
110# dcba
111# -----------
112# A0a
113# A0b
114# A0c
115# A0d
116# A1a
117# A1b
118# A1c
119# A1d
120# A2a
121# A2b
122# A2c
123# A2d
124# A3a
125# A3b
126# A3c
127# A3d
128# ..a
129# ..b
130#
131$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
132$na="f4"; $nb="f5"; $nc="f6"; $nd="f7";
133$dota="f8"; $dotb="f9";
134$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13";
135$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17";
136$T0a="f18"; $T0b="f19";
137$T1a="f20"; $T1b="f21";
138$T2a="f22"; $T2b="f23";
139$T3a="f24"; $T3b="f25";
140
141# sp----------->+-------------------------------+
142# | saved sp |
143# +-------------------------------+
144# | |
145# +-------------------------------+
146# | 10 saved gpr, r14-r23 |
147# . .
148# . .
149# +12*size_t +-------------------------------+
150# | 12 saved fpr, f14-f25 |
151# . .
152# . .
153# +12*8 +-------------------------------+
154# | padding to 64 byte boundary |
155# . .
156# +X +-------------------------------+
157# | 16 gpr<->fpr transfer zone |
158# . .
159# . .
160# +16*8 +-------------------------------+
161# | __int64 tmp[-1] |
162# +-------------------------------+
163# | __int64 tmp[num] |
164# . .
165# . .
166# . .
167# +(num+1)*8 +-------------------------------+
168# | padding to 64 byte boundary |
169# . .
170# +X +-------------------------------+
171# | double nap_d[4*num] |
172# . .
173# . .
174# . .
175# +-------------------------------+
176
177$code=<<___;
178.machine "any"
179.text
180
181.globl .$fname
182.align 5
183.$fname:
184 cmpwi $num,4
185 mr $rp,r3 ; $rp is reassigned
186 li r3,0 ; possible "not handled" return code
187 bltlr-
188 andi. r0,$num,1 ; $num has to be even
189 bnelr-
190
191 slwi $num,$num,3 ; num*=8
192 li $i,-4096
193 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
194 add $tp,$tp,$num ; place for tp[num+1]
195 addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE`
196 subf $tp,$tp,$sp ; $sp-$tp
197 and $tp,$tp,$i ; minimize TLB usage
198 subf $tp,$sp,$tp ; $tp-$sp
199 $STUX $sp,$sp,$tp ; alloca
200
201 $PUSH r14,`2*$SIZE_T`($sp)
202 $PUSH r15,`3*$SIZE_T`($sp)
203 $PUSH r16,`4*$SIZE_T`($sp)
204 $PUSH r17,`5*$SIZE_T`($sp)
205 $PUSH r18,`6*$SIZE_T`($sp)
206 $PUSH r19,`7*$SIZE_T`($sp)
207 $PUSH r20,`8*$SIZE_T`($sp)
208 $PUSH r21,`9*$SIZE_T`($sp)
209 $PUSH r22,`10*$SIZE_T`($sp)
210 $PUSH r23,`11*$SIZE_T`($sp)
211 stfd f14,`12*$SIZE_T+0`($sp)
212 stfd f15,`12*$SIZE_T+8`($sp)
213 stfd f16,`12*$SIZE_T+16`($sp)
214 stfd f17,`12*$SIZE_T+24`($sp)
215 stfd f18,`12*$SIZE_T+32`($sp)
216 stfd f19,`12*$SIZE_T+40`($sp)
217 stfd f20,`12*$SIZE_T+48`($sp)
218 stfd f21,`12*$SIZE_T+56`($sp)
219 stfd f22,`12*$SIZE_T+64`($sp)
220 stfd f23,`12*$SIZE_T+72`($sp)
221 stfd f24,`12*$SIZE_T+80`($sp)
222 stfd f25,`12*$SIZE_T+88`($sp)
223
224 ld $a0,0($ap) ; pull ap[0] value
225 ld $n0,0($n0) ; pull n0[0] value
226 ld $t3,0($bp) ; bp[0]
227
228 addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
229 li $i,-64
230 add $nap_d,$tp,$num
231 and $nap_d,$nap_d,$i ; align to 64 bytes
232
233 mulld $t7,$a0,$t3 ; ap[0]*bp[0]
234 ; nap_d is off by 1, because it's used with stfdu/lfdu
235 addi $nap_d,$nap_d,-8
236 srwi $j,$num,`3+1` ; counter register, num/2
237 mulld $t7,$t7,$n0 ; tp[0]*n0
238 addi $j,$j,-1
239 addi $tp,$sp,`$FRAME+$TRANSFER-8`
240 li $carry,0
241 mtctr $j
242
243 ; transfer bp[0] to FPU as 4x16-bit values
244 extrdi $t0,$t3,16,48
245 extrdi $t1,$t3,16,32
246 extrdi $t2,$t3,16,16
247 extrdi $t3,$t3,16,0
248 std $t0,`$FRAME+0`($sp)
249 std $t1,`$FRAME+8`($sp)
250 std $t2,`$FRAME+16`($sp)
251 std $t3,`$FRAME+24`($sp)
252 ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
253 extrdi $t4,$t7,16,48
254 extrdi $t5,$t7,16,32
255 extrdi $t6,$t7,16,16
256 extrdi $t7,$t7,16,0
257 std $t4,`$FRAME+32`($sp)
258 std $t5,`$FRAME+40`($sp)
259 std $t6,`$FRAME+48`($sp)
260 std $t7,`$FRAME+56`($sp)
261 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
262 lwz $t1,0($ap)
263 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
264 lwz $t3,8($ap)
265 lwz $t4,4($np) ; load n[j] as 32-bit word pair
266 lwz $t5,0($np)
267 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
268 lwz $t7,8($np)
269 lfd $ba,`$FRAME+0`($sp)
270 lfd $bb,`$FRAME+8`($sp)
271 lfd $bc,`$FRAME+16`($sp)
272 lfd $bd,`$FRAME+24`($sp)
273 lfd $na,`$FRAME+32`($sp)
274 lfd $nb,`$FRAME+40`($sp)
275 lfd $nc,`$FRAME+48`($sp)
276 lfd $nd,`$FRAME+56`($sp)
277 std $t0,`$FRAME+64`($sp)
278 std $t1,`$FRAME+72`($sp)
279 std $t2,`$FRAME+80`($sp)
280 std $t3,`$FRAME+88`($sp)
281 std $t4,`$FRAME+96`($sp)
282 std $t5,`$FRAME+104`($sp)
283 std $t6,`$FRAME+112`($sp)
284 std $t7,`$FRAME+120`($sp)
285 fcfid $ba,$ba
286 fcfid $bb,$bb
287 fcfid $bc,$bc
288 fcfid $bd,$bd
289 fcfid $na,$na
290 fcfid $nb,$nb
291 fcfid $nc,$nc
292 fcfid $nd,$nd
293
294 lfd $A0,`$FRAME+64`($sp)
295 lfd $A1,`$FRAME+72`($sp)
296 lfd $A2,`$FRAME+80`($sp)
297 lfd $A3,`$FRAME+88`($sp)
298 lfd $N0,`$FRAME+96`($sp)
299 lfd $N1,`$FRAME+104`($sp)
300 lfd $N2,`$FRAME+112`($sp)
301 lfd $N3,`$FRAME+120`($sp)
302 fcfid $A0,$A0
303 fcfid $A1,$A1
304 fcfid $A2,$A2
305 fcfid $A3,$A3
306 fcfid $N0,$N0
307 fcfid $N1,$N1
308 fcfid $N2,$N2
309 fcfid $N3,$N3
310 addi $ap,$ap,16
311 addi $np,$np,16
312
313 fmul $T1a,$A1,$ba
314 fmul $T1b,$A1,$bb
315 stfd $A0,8($nap_d) ; save a[j] in double format
316 stfd $A1,16($nap_d)
317 fmul $T2a,$A2,$ba
318 fmul $T2b,$A2,$bb
319 stfd $A2,24($nap_d) ; save a[j+1] in double format
320 stfd $A3,32($nap_d)
321 fmul $T3a,$A3,$ba
322 fmul $T3b,$A3,$bb
323 stfd $N0,40($nap_d) ; save n[j] in double format
324 stfd $N1,48($nap_d)
325 fmul $T0a,$A0,$ba
326 fmul $T0b,$A0,$bb
327 stfd $N2,56($nap_d) ; save n[j+1] in double format
328 stfdu $N3,64($nap_d)
329
330 fmadd $T1a,$A0,$bc,$T1a
331 fmadd $T1b,$A0,$bd,$T1b
332 fmadd $T2a,$A1,$bc,$T2a
333 fmadd $T2b,$A1,$bd,$T2b
334 fmadd $T3a,$A2,$bc,$T3a
335 fmadd $T3b,$A2,$bd,$T3b
336 fmul $dota,$A3,$bc
337 fmul $dotb,$A3,$bd
338
339 fmadd $T1a,$N1,$na,$T1a
340 fmadd $T1b,$N1,$nb,$T1b
341 fmadd $T2a,$N2,$na,$T2a
342 fmadd $T2b,$N2,$nb,$T2b
343 fmadd $T3a,$N3,$na,$T3a
344 fmadd $T3b,$N3,$nb,$T3b
345 fmadd $T0a,$N0,$na,$T0a
346 fmadd $T0b,$N0,$nb,$T0b
347
348 fmadd $T1a,$N0,$nc,$T1a
349 fmadd $T1b,$N0,$nd,$T1b
350 fmadd $T2a,$N1,$nc,$T2a
351 fmadd $T2b,$N1,$nd,$T2b
352 fmadd $T3a,$N2,$nc,$T3a
353 fmadd $T3b,$N2,$nd,$T3b
354 fmadd $dota,$N3,$nc,$dota
355 fmadd $dotb,$N3,$nd,$dotb
356
357 fctid $T0a,$T0a
358 fctid $T0b,$T0b
359 fctid $T1a,$T1a
360 fctid $T1b,$T1b
361 fctid $T2a,$T2a
362 fctid $T2b,$T2b
363 fctid $T3a,$T3a
364 fctid $T3b,$T3b
365
366 stfd $T0a,`$FRAME+0`($sp)
367 stfd $T0b,`$FRAME+8`($sp)
368 stfd $T1a,`$FRAME+16`($sp)
369 stfd $T1b,`$FRAME+24`($sp)
370 stfd $T2a,`$FRAME+32`($sp)
371 stfd $T2b,`$FRAME+40`($sp)
372 stfd $T3a,`$FRAME+48`($sp)
373 stfd $T3b,`$FRAME+56`($sp)
374
375.align 5
376L1st:
377 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
378 lwz $t1,0($ap)
379 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
380 lwz $t3,8($ap)
381 lwz $t4,4($np) ; load n[j] as 32-bit word pair
382 lwz $t5,0($np)
383 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
384 lwz $t7,8($np)
385 std $t0,`$FRAME+64`($sp)
386 std $t1,`$FRAME+72`($sp)
387 std $t2,`$FRAME+80`($sp)
388 std $t3,`$FRAME+88`($sp)
389 std $t4,`$FRAME+96`($sp)
390 std $t5,`$FRAME+104`($sp)
391 std $t6,`$FRAME+112`($sp)
392 std $t7,`$FRAME+120`($sp)
393 ld $t0,`$FRAME+0`($sp)
394 ld $t1,`$FRAME+8`($sp)
395 ld $t2,`$FRAME+16`($sp)
396 ld $t3,`$FRAME+24`($sp)
397 ld $t4,`$FRAME+32`($sp)
398 ld $t5,`$FRAME+40`($sp)
399 ld $t6,`$FRAME+48`($sp)
400 ld $t7,`$FRAME+56`($sp)
401 lfd $A0,`$FRAME+64`($sp)
402 lfd $A1,`$FRAME+72`($sp)
403 lfd $A2,`$FRAME+80`($sp)
404 lfd $A3,`$FRAME+88`($sp)
405 lfd $N0,`$FRAME+96`($sp)
406 lfd $N1,`$FRAME+104`($sp)
407 lfd $N2,`$FRAME+112`($sp)
408 lfd $N3,`$FRAME+120`($sp)
409 fcfid $A0,$A0
410 fcfid $A1,$A1
411 fcfid $A2,$A2
412 fcfid $A3,$A3
413 fcfid $N0,$N0
414 fcfid $N1,$N1
415 fcfid $N2,$N2
416 fcfid $N3,$N3
417 addi $ap,$ap,16
418 addi $np,$np,16
419
420 fmul $T1a,$A1,$ba
421 fmul $T1b,$A1,$bb
422 fmul $T2a,$A2,$ba
423 fmul $T2b,$A2,$bb
424 stfd $A0,8($nap_d) ; save a[j] in double format
425 stfd $A1,16($nap_d)
426 fmul $T3a,$A3,$ba
427 fmul $T3b,$A3,$bb
428 fmadd $T0a,$A0,$ba,$dota
429 fmadd $T0b,$A0,$bb,$dotb
430 stfd $A2,24($nap_d) ; save a[j+1] in double format
431 stfd $A3,32($nap_d)
432
433 fmadd $T1a,$A0,$bc,$T1a
434 fmadd $T1b,$A0,$bd,$T1b
435 fmadd $T2a,$A1,$bc,$T2a
436 fmadd $T2b,$A1,$bd,$T2b
437 stfd $N0,40($nap_d) ; save n[j] in double format
438 stfd $N1,48($nap_d)
439 fmadd $T3a,$A2,$bc,$T3a
440 fmadd $T3b,$A2,$bd,$T3b
441 add $t0,$t0,$carry ; can not overflow
442 fmul $dota,$A3,$bc
443 fmul $dotb,$A3,$bd
444 stfd $N2,56($nap_d) ; save n[j+1] in double format
445 stfdu $N3,64($nap_d)
446 srdi $carry,$t0,16
447 add $t1,$t1,$carry
448 srdi $carry,$t1,16
449
450 fmadd $T1a,$N1,$na,$T1a
451 fmadd $T1b,$N1,$nb,$T1b
452 insrdi $t0,$t1,16,32
453 fmadd $T2a,$N2,$na,$T2a
454 fmadd $T2b,$N2,$nb,$T2b
455 add $t2,$t2,$carry
456 fmadd $T3a,$N3,$na,$T3a
457 fmadd $T3b,$N3,$nb,$T3b
458 srdi $carry,$t2,16
459 fmadd $T0a,$N0,$na,$T0a
460 fmadd $T0b,$N0,$nb,$T0b
461 insrdi $t0,$t2,16,16
462 add $t3,$t3,$carry
463 srdi $carry,$t3,16
464
465 fmadd $T1a,$N0,$nc,$T1a
466 fmadd $T1b,$N0,$nd,$T1b
467 insrdi $t0,$t3,16,0 ; 0..63 bits
468 fmadd $T2a,$N1,$nc,$T2a
469 fmadd $T2b,$N1,$nd,$T2b
470 add $t4,$t4,$carry
471 fmadd $T3a,$N2,$nc,$T3a
472 fmadd $T3b,$N2,$nd,$T3b
473 srdi $carry,$t4,16
474 fmadd $dota,$N3,$nc,$dota
475 fmadd $dotb,$N3,$nd,$dotb
476 add $t5,$t5,$carry
477 srdi $carry,$t5,16
478 insrdi $t4,$t5,16,32
479
480 fctid $T0a,$T0a
481 fctid $T0b,$T0b
482 add $t6,$t6,$carry
483 fctid $T1a,$T1a
484 fctid $T1b,$T1b
485 srdi $carry,$t6,16
486 fctid $T2a,$T2a
487 fctid $T2b,$T2b
488 insrdi $t4,$t6,16,16
489 fctid $T3a,$T3a
490 fctid $T3b,$T3b
491 add $t7,$t7,$carry
492 insrdi $t4,$t7,16,0 ; 64..127 bits
493 srdi $carry,$t7,16 ; upper 33 bits
494
495 stfd $T0a,`$FRAME+0`($sp)
496 stfd $T0b,`$FRAME+8`($sp)
497 stfd $T1a,`$FRAME+16`($sp)
498 stfd $T1b,`$FRAME+24`($sp)
499 stfd $T2a,`$FRAME+32`($sp)
500 stfd $T2b,`$FRAME+40`($sp)
501 stfd $T3a,`$FRAME+48`($sp)
502 stfd $T3b,`$FRAME+56`($sp)
503 std $t0,8($tp) ; tp[j-1]
504 stdu $t4,16($tp) ; tp[j]
505 bdnz- L1st
506
507 fctid $dota,$dota
508 fctid $dotb,$dotb
509
510 ld $t0,`$FRAME+0`($sp)
511 ld $t1,`$FRAME+8`($sp)
512 ld $t2,`$FRAME+16`($sp)
513 ld $t3,`$FRAME+24`($sp)
514 ld $t4,`$FRAME+32`($sp)
515 ld $t5,`$FRAME+40`($sp)
516 ld $t6,`$FRAME+48`($sp)
517 ld $t7,`$FRAME+56`($sp)
518 stfd $dota,`$FRAME+64`($sp)
519 stfd $dotb,`$FRAME+72`($sp)
520
521 add $t0,$t0,$carry ; can not overflow
522 srdi $carry,$t0,16
523 add $t1,$t1,$carry
524 srdi $carry,$t1,16
525 insrdi $t0,$t1,16,32
526 add $t2,$t2,$carry
527 srdi $carry,$t2,16
528 insrdi $t0,$t2,16,16
529 add $t3,$t3,$carry
530 srdi $carry,$t3,16
531 insrdi $t0,$t3,16,0 ; 0..63 bits
532 add $t4,$t4,$carry
533 srdi $carry,$t4,16
534 add $t5,$t5,$carry
535 srdi $carry,$t5,16
536 insrdi $t4,$t5,16,32
537 add $t6,$t6,$carry
538 srdi $carry,$t6,16
539 insrdi $t4,$t6,16,16
540 add $t7,$t7,$carry
541 insrdi $t4,$t7,16,0 ; 64..127 bits
542 srdi $carry,$t7,16 ; upper 33 bits
543 ld $t6,`$FRAME+64`($sp)
544 ld $t7,`$FRAME+72`($sp)
545
546 std $t0,8($tp) ; tp[j-1]
547 stdu $t4,16($tp) ; tp[j]
548
549 add $t6,$t6,$carry ; can not overflow
550 srdi $carry,$t6,16
551 add $t7,$t7,$carry
552 insrdi $t6,$t7,48,0
553 srdi $ovf,$t7,48
554 std $t6,8($tp) ; tp[num-1]
555
556 slwi $t7,$num,2
557 subf $nap_d,$t7,$nap_d ; rewind pointer
558
559 li $i,8 ; i=1
560.align 5
561Louter:
562 ldx $t3,$bp,$i ; bp[i]
563 ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
564 mulld $t7,$a0,$t3 ; ap[0]*bp[i]
565
566 addi $tp,$sp,`$FRAME+$TRANSFER`
567 add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
568 li $carry,0
569 mulld $t7,$t7,$n0 ; tp[0]*n0
570 mtctr $j
571
572 ; transfer bp[i] to FPU as 4x16-bit values
573 extrdi $t0,$t3,16,48
574 extrdi $t1,$t3,16,32
575 extrdi $t2,$t3,16,16
576 extrdi $t3,$t3,16,0
577 std $t0,`$FRAME+0`($sp)
578 std $t1,`$FRAME+8`($sp)
579 std $t2,`$FRAME+16`($sp)
580 std $t3,`$FRAME+24`($sp)
581 ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
582 extrdi $t4,$t7,16,48
583 extrdi $t5,$t7,16,32
584 extrdi $t6,$t7,16,16
585 extrdi $t7,$t7,16,0
586 std $t4,`$FRAME+32`($sp)
587 std $t5,`$FRAME+40`($sp)
588 std $t6,`$FRAME+48`($sp)
589 std $t7,`$FRAME+56`($sp)
590
591 lfd $A0,8($nap_d) ; load a[j] in double format
592 lfd $A1,16($nap_d)
593 lfd $A2,24($nap_d) ; load a[j+1] in double format
594 lfd $A3,32($nap_d)
595 lfd $N0,40($nap_d) ; load n[j] in double format
596 lfd $N1,48($nap_d)
597 lfd $N2,56($nap_d) ; load n[j+1] in double format
598 lfdu $N3,64($nap_d)
599
600 lfd $ba,`$FRAME+0`($sp)
601 lfd $bb,`$FRAME+8`($sp)
602 lfd $bc,`$FRAME+16`($sp)
603 lfd $bd,`$FRAME+24`($sp)
604 lfd $na,`$FRAME+32`($sp)
605 lfd $nb,`$FRAME+40`($sp)
606 lfd $nc,`$FRAME+48`($sp)
607 lfd $nd,`$FRAME+56`($sp)
608
609 fcfid $ba,$ba
610 fcfid $bb,$bb
611 fcfid $bc,$bc
612 fcfid $bd,$bd
613 fcfid $na,$na
614 fcfid $nb,$nb
615 fcfid $nc,$nc
616 fcfid $nd,$nd
617
618 fmul $T1a,$A1,$ba
619 fmul $T1b,$A1,$bb
620 fmul $T2a,$A2,$ba
621 fmul $T2b,$A2,$bb
622 fmul $T3a,$A3,$ba
623 fmul $T3b,$A3,$bb
624 fmul $T0a,$A0,$ba
625 fmul $T0b,$A0,$bb
626
627 fmadd $T1a,$A0,$bc,$T1a
628 fmadd $T1b,$A0,$bd,$T1b
629 fmadd $T2a,$A1,$bc,$T2a
630 fmadd $T2b,$A1,$bd,$T2b
631 fmadd $T3a,$A2,$bc,$T3a
632 fmadd $T3b,$A2,$bd,$T3b
633 fmul $dota,$A3,$bc
634 fmul $dotb,$A3,$bd
635
636 fmadd $T1a,$N1,$na,$T1a
637 fmadd $T1b,$N1,$nb,$T1b
638 lfd $A0,8($nap_d) ; load a[j] in double format
639 lfd $A1,16($nap_d)
640 fmadd $T2a,$N2,$na,$T2a
641 fmadd $T2b,$N2,$nb,$T2b
642 lfd $A2,24($nap_d) ; load a[j+1] in double format
643 lfd $A3,32($nap_d)
644 fmadd $T3a,$N3,$na,$T3a
645 fmadd $T3b,$N3,$nb,$T3b
646 fmadd $T0a,$N0,$na,$T0a
647 fmadd $T0b,$N0,$nb,$T0b
648
649 fmadd $T1a,$N0,$nc,$T1a
650 fmadd $T1b,$N0,$nd,$T1b
651 fmadd $T2a,$N1,$nc,$T2a
652 fmadd $T2b,$N1,$nd,$T2b
653 fmadd $T3a,$N2,$nc,$T3a
654 fmadd $T3b,$N2,$nd,$T3b
655 fmadd $dota,$N3,$nc,$dota
656 fmadd $dotb,$N3,$nd,$dotb
657
658 fctid $T0a,$T0a
659 fctid $T0b,$T0b
660 fctid $T1a,$T1a
661 fctid $T1b,$T1b
662 fctid $T2a,$T2a
663 fctid $T2b,$T2b
664 fctid $T3a,$T3a
665 fctid $T3b,$T3b
666
667 stfd $T0a,`$FRAME+0`($sp)
668 stfd $T0b,`$FRAME+8`($sp)
669 stfd $T1a,`$FRAME+16`($sp)
670 stfd $T1b,`$FRAME+24`($sp)
671 stfd $T2a,`$FRAME+32`($sp)
672 stfd $T2b,`$FRAME+40`($sp)
673 stfd $T3a,`$FRAME+48`($sp)
674 stfd $T3b,`$FRAME+56`($sp)
675
676.align 5
677Linner:
678 fmul $T1a,$A1,$ba
679 fmul $T1b,$A1,$bb
680 fmul $T2a,$A2,$ba
681 fmul $T2b,$A2,$bb
682 lfd $N0,40($nap_d) ; load n[j] in double format
683 lfd $N1,48($nap_d)
684 fmul $T3a,$A3,$ba
685 fmul $T3b,$A3,$bb
686 fmadd $T0a,$A0,$ba,$dota
687 fmadd $T0b,$A0,$bb,$dotb
688 lfd $N2,56($nap_d) ; load n[j+1] in double format
689 lfdu $N3,64($nap_d)
690
691 fmadd $T1a,$A0,$bc,$T1a
692 fmadd $T1b,$A0,$bd,$T1b
693 fmadd $T2a,$A1,$bc,$T2a
694 fmadd $T2b,$A1,$bd,$T2b
695 lfd $A0,8($nap_d) ; load a[j] in double format
696 lfd $A1,16($nap_d)
697 fmadd $T3a,$A2,$bc,$T3a
698 fmadd $T3b,$A2,$bd,$T3b
699 fmul $dota,$A3,$bc
700 fmul $dotb,$A3,$bd
701 lfd $A2,24($nap_d) ; load a[j+1] in double format
702 lfd $A3,32($nap_d)
703
704 fmadd $T1a,$N1,$na,$T1a
705 fmadd $T1b,$N1,$nb,$T1b
706 ld $t0,`$FRAME+0`($sp)
707 ld $t1,`$FRAME+8`($sp)
708 fmadd $T2a,$N2,$na,$T2a
709 fmadd $T2b,$N2,$nb,$T2b
710 ld $t2,`$FRAME+16`($sp)
711 ld $t3,`$FRAME+24`($sp)
712 fmadd $T3a,$N3,$na,$T3a
713 fmadd $T3b,$N3,$nb,$T3b
714 add $t0,$t0,$carry ; can not overflow
715 ld $t4,`$FRAME+32`($sp)
716 ld $t5,`$FRAME+40`($sp)
717 fmadd $T0a,$N0,$na,$T0a
718 fmadd $T0b,$N0,$nb,$T0b
719 srdi $carry,$t0,16
720 add $t1,$t1,$carry
721 srdi $carry,$t1,16
722 ld $t6,`$FRAME+48`($sp)
723 ld $t7,`$FRAME+56`($sp)
724
725 fmadd $T1a,$N0,$nc,$T1a
726 fmadd $T1b,$N0,$nd,$T1b
727 insrdi $t0,$t1,16,32
728 ld $t1,8($tp) ; tp[j]
729 fmadd $T2a,$N1,$nc,$T2a
730 fmadd $T2b,$N1,$nd,$T2b
731 add $t2,$t2,$carry
732 fmadd $T3a,$N2,$nc,$T3a
733 fmadd $T3b,$N2,$nd,$T3b
734 srdi $carry,$t2,16
735 insrdi $t0,$t2,16,16
736 fmadd $dota,$N3,$nc,$dota
737 fmadd $dotb,$N3,$nd,$dotb
738 add $t3,$t3,$carry
739 ldu $t2,16($tp) ; tp[j+1]
740 srdi $carry,$t3,16
741 insrdi $t0,$t3,16,0 ; 0..63 bits
742 add $t4,$t4,$carry
743
744 fctid $T0a,$T0a
745 fctid $T0b,$T0b
746 srdi $carry,$t4,16
747 fctid $T1a,$T1a
748 fctid $T1b,$T1b
749 add $t5,$t5,$carry
750 fctid $T2a,$T2a
751 fctid $T2b,$T2b
752 srdi $carry,$t5,16
753 insrdi $t4,$t5,16,32
754 fctid $T3a,$T3a
755 fctid $T3b,$T3b
756 add $t6,$t6,$carry
757 srdi $carry,$t6,16
758 insrdi $t4,$t6,16,16
759
760 stfd $T0a,`$FRAME+0`($sp)
761 stfd $T0b,`$FRAME+8`($sp)
762 add $t7,$t7,$carry
763 addc $t3,$t0,$t1
764 stfd $T1a,`$FRAME+16`($sp)
765 stfd $T1b,`$FRAME+24`($sp)
766 insrdi $t4,$t7,16,0 ; 64..127 bits
767 srdi $carry,$t7,16 ; upper 33 bits
768 stfd $T2a,`$FRAME+32`($sp)
769 stfd $T2b,`$FRAME+40`($sp)
770 adde $t5,$t4,$t2
771 stfd $T3a,`$FRAME+48`($sp)
772 stfd $T3b,`$FRAME+56`($sp)
773 addze $carry,$carry
774 std $t3,-16($tp) ; tp[j-1]
775 std $t5,-8($tp) ; tp[j]
776 bdnz- Linner
777
778 fctid $dota,$dota
779 fctid $dotb,$dotb
780 ld $t0,`$FRAME+0`($sp)
781 ld $t1,`$FRAME+8`($sp)
782 ld $t2,`$FRAME+16`($sp)
783 ld $t3,`$FRAME+24`($sp)
784 ld $t4,`$FRAME+32`($sp)
785 ld $t5,`$FRAME+40`($sp)
786 ld $t6,`$FRAME+48`($sp)
787 ld $t7,`$FRAME+56`($sp)
788 stfd $dota,`$FRAME+64`($sp)
789 stfd $dotb,`$FRAME+72`($sp)
790
791 add $t0,$t0,$carry ; can not overflow
792 srdi $carry,$t0,16
793 add $t1,$t1,$carry
794 srdi $carry,$t1,16
795 insrdi $t0,$t1,16,32
796 add $t2,$t2,$carry
797 ld $t1,8($tp) ; tp[j]
798 srdi $carry,$t2,16
799 insrdi $t0,$t2,16,16
800 add $t3,$t3,$carry
801 ldu $t2,16($tp) ; tp[j+1]
802 srdi $carry,$t3,16
803 insrdi $t0,$t3,16,0 ; 0..63 bits
804 add $t4,$t4,$carry
805 srdi $carry,$t4,16
806 add $t5,$t5,$carry
807 srdi $carry,$t5,16
808 insrdi $t4,$t5,16,32
809 add $t6,$t6,$carry
810 srdi $carry,$t6,16
811 insrdi $t4,$t6,16,16
812 add $t7,$t7,$carry
813 insrdi $t4,$t7,16,0 ; 64..127 bits
814 srdi $carry,$t7,16 ; upper 33 bits
815 ld $t6,`$FRAME+64`($sp)
816 ld $t7,`$FRAME+72`($sp)
817
818 addc $t3,$t0,$t1
819 adde $t5,$t4,$t2
820 addze $carry,$carry
821
822 std $t3,-16($tp) ; tp[j-1]
823 std $t5,-8($tp) ; tp[j]
824
825 add $carry,$carry,$ovf ; comsume upmost overflow
826 add $t6,$t6,$carry ; can not overflow
827 srdi $carry,$t6,16
828 add $t7,$t7,$carry
829 insrdi $t6,$t7,48,0
830 srdi $ovf,$t7,48
831 std $t6,0($tp) ; tp[num-1]
832
833 slwi $t7,$num,2
834 addi $i,$i,8
835 subf $nap_d,$t7,$nap_d ; rewind pointer
836 cmpw $i,$num
837 blt- Louter
838
839 subf $np,$num,$np ; rewind np
840 addi $j,$j,1 ; restore counter
841 subfc $i,$i,$i ; j=0 and "clear" XER[CA]
842 addi $tp,$sp,`$FRAME+$TRANSFER+8`
843 addi $t4,$sp,`$FRAME+$TRANSFER+16`
844 addi $t5,$np,8
845 addi $t6,$rp,8
846 mtctr $j
847
848.align 4
849Lsub: ldx $t0,$tp,$i
850 ldx $t1,$np,$i
851 ldx $t2,$t4,$i
852 ldx $t3,$t5,$i
853 subfe $t0,$t1,$t0 ; tp[j]-np[j]
854 subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1]
855 stdx $t0,$rp,$i
856 stdx $t2,$t6,$i
857 addi $i,$i,16
858 bdnz- Lsub
859
860 li $i,0
861 subfe $ovf,$i,$ovf ; handle upmost overflow bit
862 and $ap,$tp,$ovf
863 andc $np,$rp,$ovf
864 or $ap,$ap,$np ; ap=borrow?tp:rp
865 addi $t7,$ap,8
866 mtctr $j
867
868.align 4
869Lcopy: ; copy or in-place refresh
870 ldx $t0,$ap,$i
871 ldx $t1,$t7,$i
872 std $i,8($nap_d) ; zap nap_d
873 std $i,16($nap_d)
874 std $i,24($nap_d)
875 std $i,32($nap_d)
876 std $i,40($nap_d)
877 std $i,48($nap_d)
878 std $i,56($nap_d)
879 stdu $i,64($nap_d)
880 stdx $t0,$rp,$i
881 stdx $t1,$t6,$i
882 stdx $i,$tp,$i ; zap tp at once
883 stdx $i,$t4,$i
884 addi $i,$i,16
885 bdnz- Lcopy
886
887 $POP r14,`2*$SIZE_T`($sp)
888 $POP r15,`3*$SIZE_T`($sp)
889 $POP r16,`4*$SIZE_T`($sp)
890 $POP r17,`5*$SIZE_T`($sp)
891 $POP r18,`6*$SIZE_T`($sp)
892 $POP r19,`7*$SIZE_T`($sp)
893 $POP r20,`8*$SIZE_T`($sp)
894 $POP r21,`9*$SIZE_T`($sp)
895 $POP r22,`10*$SIZE_T`($sp)
896 $POP r23,`11*$SIZE_T`($sp)
897 lfd f14,`12*$SIZE_T+0`($sp)
898 lfd f15,`12*$SIZE_T+8`($sp)
899 lfd f16,`12*$SIZE_T+16`($sp)
900 lfd f17,`12*$SIZE_T+24`($sp)
901 lfd f18,`12*$SIZE_T+32`($sp)
902 lfd f19,`12*$SIZE_T+40`($sp)
903 lfd f20,`12*$SIZE_T+48`($sp)
904 lfd f21,`12*$SIZE_T+56`($sp)
905 lfd f22,`12*$SIZE_T+64`($sp)
906 lfd f23,`12*$SIZE_T+72`($sp)
907 lfd f24,`12*$SIZE_T+80`($sp)
908 lfd f25,`12*$SIZE_T+88`($sp)
909 $POP $sp,0($sp)
910 li r3,1 ; signal "handled"
911 blr
912 .long 0
913.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>"
914___
915
916$code =~ s/\`([^\`]*)\`/eval $1/gem;
917print $code;
918close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl b/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl
new file mode 100644
index 0000000000..d23251033b
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl
@@ -0,0 +1,225 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2007.
11#
12# Performance improvement over vanilla C code varies from 85% to 45%
13# depending on key length and benchmark. Unfortunately in this context
14# these are not very impressive results [for code that utilizes "wide"
15# 64x64=128-bit multiplication, which is not commonly available to C
16# programmers], at least hand-coded bn_asm.c replacement is known to
17# provide 30-40% better results for longest keys. Well, on a second
18# thought it's not very surprising, because z-CPUs are single-issue
19# and _strictly_ in-order execution, while bn_mul_mont is more or less
20# dependent on CPU ability to pipe-line instructions and have several
21# of them "in-flight" at the same time. I mean while other methods,
22# for example Karatsuba, aim to minimize amount of multiplications at
23# the cost of other operations increase, bn_mul_mont aim to neatly
24# "overlap" multiplications and the other operations [and on most
25# platforms even minimize the amount of the other operations, in
26# particular references to memory]. But it's possible to improve this
27# module performance by implementing dedicated squaring code-path and
28# possibly by unrolling loops...
29
30# January 2009.
31#
32# Reschedule to minimize/avoid Address Generation Interlock hazard,
33# make inner loops counter-based.
34
35$mn0="%r0";
36$num="%r1";
37
38# int bn_mul_mont(
39$rp="%r2"; # BN_ULONG *rp,
40$ap="%r3"; # const BN_ULONG *ap,
41$bp="%r4"; # const BN_ULONG *bp,
42$np="%r5"; # const BN_ULONG *np,
43$n0="%r6"; # const BN_ULONG *n0,
44#$num="160(%r15)" # int num);
45
46$bi="%r2"; # zaps rp
47$j="%r7";
48
49$ahi="%r8";
50$alo="%r9";
51$nhi="%r10";
52$nlo="%r11";
53$AHI="%r12";
54$NHI="%r13";
55$count="%r14";
56$sp="%r15";
57
58$code.=<<___;
59.text
60.globl bn_mul_mont
61.type bn_mul_mont,\@function
62bn_mul_mont:
63 lgf $num,164($sp) # pull $num
64 sla $num,3 # $num to enumerate bytes
65 la $bp,0($num,$bp)
66
67 stg %r2,16($sp)
68
69 cghi $num,16 #
70 lghi %r2,0 #
71 blr %r14 # if($num<16) return 0;
72 cghi $num,128 #
73 bhr %r14 # if($num>128) return 0;
74
75 stmg %r3,%r15,24($sp)
76
77 lghi $rp,-160-8 # leave room for carry bit
78 lcgr $j,$num # -$num
79 lgr %r0,$sp
80 la $rp,0($rp,$sp)
81 la $sp,0($j,$rp) # alloca
82 stg %r0,0($sp) # back chain
83
84 sra $num,3 # restore $num
85 la $bp,0($j,$bp) # restore $bp
86 ahi $num,-1 # adjust $num for inner loop
87 lg $n0,0($n0) # pull n0
88
89 lg $bi,0($bp)
90 lg $alo,0($ap)
91 mlgr $ahi,$bi # ap[0]*bp[0]
92 lgr $AHI,$ahi
93
94 lgr $mn0,$alo # "tp[0]"*n0
95 msgr $mn0,$n0
96
97 lg $nlo,0($np) #
98 mlgr $nhi,$mn0 # np[0]*m1
99 algr $nlo,$alo # +="tp[0]"
100 lghi $NHI,0
101 alcgr $NHI,$nhi
102
103 la $j,8(%r0) # j=1
104 lr $count,$num
105
106.align 16
107.L1st:
108 lg $alo,0($j,$ap)
109 mlgr $ahi,$bi # ap[j]*bp[0]
110 algr $alo,$AHI
111 lghi $AHI,0
112 alcgr $AHI,$ahi
113
114 lg $nlo,0($j,$np)
115 mlgr $nhi,$mn0 # np[j]*m1
116 algr $nlo,$NHI
117 lghi $NHI,0
118 alcgr $nhi,$NHI # +="tp[j]"
119 algr $nlo,$alo
120 alcgr $NHI,$nhi
121
122 stg $nlo,160-8($j,$sp) # tp[j-1]=
123 la $j,8($j) # j++
124 brct $count,.L1st
125
126 algr $NHI,$AHI
127 lghi $AHI,0
128 alcgr $AHI,$AHI # upmost overflow bit
129 stg $NHI,160-8($j,$sp)
130 stg $AHI,160($j,$sp)
131 la $bp,8($bp) # bp++
132
133.Louter:
134 lg $bi,0($bp) # bp[i]
135 lg $alo,0($ap)
136 mlgr $ahi,$bi # ap[0]*bp[i]
137 alg $alo,160($sp) # +=tp[0]
138 lghi $AHI,0
139 alcgr $AHI,$ahi
140
141 lgr $mn0,$alo
142 msgr $mn0,$n0 # tp[0]*n0
143
144 lg $nlo,0($np) # np[0]
145 mlgr $nhi,$mn0 # np[0]*m1
146 algr $nlo,$alo # +="tp[0]"
147 lghi $NHI,0
148 alcgr $NHI,$nhi
149
150 la $j,8(%r0) # j=1
151 lr $count,$num
152
153.align 16
154.Linner:
155 lg $alo,0($j,$ap)
156 mlgr $ahi,$bi # ap[j]*bp[i]
157 algr $alo,$AHI
158 lghi $AHI,0
159 alcgr $ahi,$AHI
160 alg $alo,160($j,$sp)# +=tp[j]
161 alcgr $AHI,$ahi
162
163 lg $nlo,0($j,$np)
164 mlgr $nhi,$mn0 # np[j]*m1
165 algr $nlo,$NHI
166 lghi $NHI,0
167 alcgr $nhi,$NHI
168 algr $nlo,$alo # +="tp[j]"
169 alcgr $NHI,$nhi
170
171 stg $nlo,160-8($j,$sp) # tp[j-1]=
172 la $j,8($j) # j++
173 brct $count,.Linner
174
175 algr $NHI,$AHI
176 lghi $AHI,0
177 alcgr $AHI,$AHI
178 alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
179 lghi $ahi,0
180 alcgr $AHI,$ahi # new upmost overflow bit
181 stg $NHI,160-8($j,$sp)
182 stg $AHI,160($j,$sp)
183
184 la $bp,8($bp) # bp++
185 clg $bp,160+8+32($j,$sp) # compare to &bp[num]
186 jne .Louter
187
188 lg $rp,160+8+16($j,$sp) # reincarnate rp
189 la $ap,160($sp)
190 ahi $num,1 # restore $num, incidentally clears "borrow"
191
192 la $j,0(%r0)
193 lr $count,$num
194.Lsub: lg $alo,0($j,$ap)
195 slbg $alo,0($j,$np)
196 stg $alo,0($j,$rp)
197 la $j,8($j)
198 brct $count,.Lsub
199 lghi $ahi,0
200 slbgr $AHI,$ahi # handle upmost carry
201
202 ngr $ap,$AHI
203 lghi $np,-1
204 xgr $np,$AHI
205 ngr $np,$rp
206 ogr $ap,$np # ap=borrow?tp:rp
207
208 la $j,0(%r0)
209 lgr $count,$num
210.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
211 stg $j,160($j,$sp) # zap tp
212 stg $alo,0($j,$rp)
213 la $j,8($j)
214 brct $count,.Lcopy
215
216 la %r1,160+8+48($j,$sp)
217 lmg %r6,%r15,0(%r1)
218 lghi %r2,1 # signal "processed"
219 br %r14
220.size bn_mul_mont,.-bn_mul_mont
221.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
222___
223
224print $code;
225close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/s390x.S b/src/lib/libssl/src/crypto/bn/asm/s390x.S
new file mode 100755
index 0000000000..8f45f5d513
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/s390x.S
@@ -0,0 +1,678 @@
1.ident "s390x.S, version 1.0"
2// ====================================================================
3// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4// project.
5//
6// Rights for redistribution and usage in source and binary forms are
7// granted according to the OpenSSL license. Warranty of any kind is
8// disclaimed.
9// ====================================================================
10
11.text
12
13#define zero %r0
14
15// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
16.globl bn_mul_add_words
17.type bn_mul_add_words,@function
18.align 4
19bn_mul_add_words:
20 lghi zero,0 // zero = 0
21 la %r1,0(%r2) // put rp aside
22 lghi %r2,0 // i=0;
23 ltgfr %r4,%r4
24 bler %r14 // if (len<=0) return 0;
25
26 stmg %r6,%r10,48(%r15)
27 lghi %r8,0 // carry = 0
28 srag %r10,%r4,2 // cnt=len/4
29 jz .Loop1_madd
30
31.Loop4_madd:
32 lg %r7,0(%r2,%r3) // ap[i]
33 mlgr %r6,%r5 // *=w
34 algr %r7,%r8 // +=carry
35 alcgr %r6,zero
36 alg %r7,0(%r2,%r1) // +=rp[i]
37 alcgr %r6,zero
38 stg %r7,0(%r2,%r1) // rp[i]=
39
40 lg %r9,8(%r2,%r3)
41 mlgr %r8,%r5
42 algr %r9,%r6
43 alcgr %r8,zero
44 alg %r9,8(%r2,%r1)
45 alcgr %r8,zero
46 stg %r9,8(%r2,%r1)
47
48 lg %r7,16(%r2,%r3)
49 mlgr %r6,%r5
50 algr %r7,%r8
51 alcgr %r6,zero
52 alg %r7,16(%r2,%r1)
53 alcgr %r6,zero
54 stg %r7,16(%r2,%r1)
55
56 lg %r9,24(%r2,%r3)
57 mlgr %r8,%r5
58 algr %r9,%r6
59 alcgr %r8,zero
60 alg %r9,24(%r2,%r1)
61 alcgr %r8,zero
62 stg %r9,24(%r2,%r1)
63
64 la %r2,32(%r2) // i+=4
65 brct %r10,.Loop4_madd
66
67 lghi %r10,3
68 nr %r4,%r10 // cnt=len%4
69 jz .Lend_madd
70
71.Loop1_madd:
72 lg %r7,0(%r2,%r3) // ap[i]
73 mlgr %r6,%r5 // *=w
74 algr %r7,%r8 // +=carry
75 alcgr %r6,zero
76 alg %r7,0(%r2,%r1) // +=rp[i]
77 alcgr %r6,zero
78 stg %r7,0(%r2,%r1) // rp[i]=
79
80 lgr %r8,%r6
81 la %r2,8(%r2) // i++
82 brct %r4,.Loop1_madd
83
84.Lend_madd:
85 lgr %r2,%r8
86 lmg %r6,%r10,48(%r15)
87 br %r14
88.size bn_mul_add_words,.-bn_mul_add_words
89
90// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
91.globl bn_mul_words
92.type bn_mul_words,@function
93.align 4
94bn_mul_words:
95 lghi zero,0 // zero = 0
96 la %r1,0(%r2) // put rp aside
97 lghi %r2,0 // i=0;
98 ltgfr %r4,%r4
99 bler %r14 // if (len<=0) return 0;
100
101 stmg %r6,%r10,48(%r15)
102 lghi %r8,0 // carry = 0
103 srag %r10,%r4,2 // cnt=len/4
104 jz .Loop1_mul
105
106.Loop4_mul:
107 lg %r7,0(%r2,%r3) // ap[i]
108 mlgr %r6,%r5 // *=w
109 algr %r7,%r8 // +=carry
110 alcgr %r6,zero
111 stg %r7,0(%r2,%r1) // rp[i]=
112
113 lg %r9,8(%r2,%r3)
114 mlgr %r8,%r5
115 algr %r9,%r6
116 alcgr %r8,zero
117 stg %r9,8(%r2,%r1)
118
119 lg %r7,16(%r2,%r3)
120 mlgr %r6,%r5
121 algr %r7,%r8
122 alcgr %r6,zero
123 stg %r7,16(%r2,%r1)
124
125 lg %r9,24(%r2,%r3)
126 mlgr %r8,%r5
127 algr %r9,%r6
128 alcgr %r8,zero
129 stg %r9,24(%r2,%r1)
130
131 la %r2,32(%r2) // i+=4
132 brct %r10,.Loop4_mul
133
134 lghi %r10,3
135 nr %r4,%r10 // cnt=len%4
136 jz .Lend_mul
137
138.Loop1_mul:
139 lg %r7,0(%r2,%r3) // ap[i]
140 mlgr %r6,%r5 // *=w
141 algr %r7,%r8 // +=carry
142 alcgr %r6,zero
143 stg %r7,0(%r2,%r1) // rp[i]=
144
145 lgr %r8,%r6
146 la %r2,8(%r2) // i++
147 brct %r4,.Loop1_mul
148
149.Lend_mul:
150 lgr %r2,%r8
151 lmg %r6,%r10,48(%r15)
152 br %r14
153.size bn_mul_words,.-bn_mul_words
154
155// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
156.globl bn_sqr_words
157.type bn_sqr_words,@function
158.align 4
159bn_sqr_words:
160 ltgfr %r4,%r4
161 bler %r14
162
163 stmg %r6,%r7,48(%r15)
164 srag %r1,%r4,2 // cnt=len/4
165 jz .Loop1_sqr
166
167.Loop4_sqr:
168 lg %r7,0(%r3)
169 mlgr %r6,%r7
170 stg %r7,0(%r2)
171 stg %r6,8(%r2)
172
173 lg %r7,8(%r3)
174 mlgr %r6,%r7
175 stg %r7,16(%r2)
176 stg %r6,24(%r2)
177
178 lg %r7,16(%r3)
179 mlgr %r6,%r7
180 stg %r7,32(%r2)
181 stg %r6,40(%r2)
182
183 lg %r7,24(%r3)
184 mlgr %r6,%r7
185 stg %r7,48(%r2)
186 stg %r6,56(%r2)
187
188 la %r3,32(%r3)
189 la %r2,64(%r2)
190 brct %r1,.Loop4_sqr
191
192 lghi %r1,3
193 nr %r4,%r1 // cnt=len%4
194 jz .Lend_sqr
195
196.Loop1_sqr:
197 lg %r7,0(%r3)
198 mlgr %r6,%r7
199 stg %r7,0(%r2)
200 stg %r6,8(%r2)
201
202 la %r3,8(%r3)
203 la %r2,16(%r2)
204 brct %r4,.Loop1_sqr
205
206.Lend_sqr:
207 lmg %r6,%r7,48(%r15)
208 br %r14
209.size bn_sqr_words,.-bn_sqr_words
210
211// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
212.globl bn_div_words
213.type bn_div_words,@function
214.align 4
215bn_div_words:
216 dlgr %r2,%r4
217 lgr %r2,%r3
218 br %r14
219.size bn_div_words,.-bn_div_words
220
221// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
222.globl bn_add_words
223.type bn_add_words,@function
224.align 4
225bn_add_words:
226 la %r1,0(%r2) // put rp aside
227 lghi %r2,0 // i=0
228 ltgfr %r5,%r5
229 bler %r14 // if (len<=0) return 0;
230
231 stg %r6,48(%r15)
232 lghi %r6,3
233 nr %r6,%r5 // len%4
234 sra %r5,2 // len/4, use sra because it sets condition code
235 jz .Loop1_add // carry is incidentally cleared if branch taken
236 algr %r2,%r2 // clear carry
237
238.Loop4_add:
239 lg %r0,0(%r2,%r3)
240 alcg %r0,0(%r2,%r4)
241 stg %r0,0(%r2,%r1)
242 lg %r0,8(%r2,%r3)
243 alcg %r0,8(%r2,%r4)
244 stg %r0,8(%r2,%r1)
245 lg %r0,16(%r2,%r3)
246 alcg %r0,16(%r2,%r4)
247 stg %r0,16(%r2,%r1)
248 lg %r0,24(%r2,%r3)
249 alcg %r0,24(%r2,%r4)
250 stg %r0,24(%r2,%r1)
251
252 la %r2,32(%r2) // i+=4
253 brct %r5,.Loop4_add
254
255 la %r6,1(%r6) // see if len%4 is zero ...
256 brct %r6,.Loop1_add // without touching condition code:-)
257
258.Lexit_add:
259 lghi %r2,0
260 alcgr %r2,%r2
261 lg %r6,48(%r15)
262 br %r14
263
264.Loop1_add:
265 lg %r0,0(%r2,%r3)
266 alcg %r0,0(%r2,%r4)
267 stg %r0,0(%r2,%r1)
268
269 la %r2,8(%r2) // i++
270 brct %r6,.Loop1_add
271
272 j .Lexit_add
273.size bn_add_words,.-bn_add_words
274
275// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
276.globl bn_sub_words
277.type bn_sub_words,@function
278.align 4
279bn_sub_words:
280 la %r1,0(%r2) // put rp aside
281 lghi %r2,0 // i=0
282 ltgfr %r5,%r5
283 bler %r14 // if (len<=0) return 0;
284
285 stg %r6,48(%r15)
286 lghi %r6,3
287 nr %r6,%r5 // len%4
288 sra %r5,2 // len/4, use sra because it sets condition code
289 jnz .Loop4_sub // borrow is incidentally cleared if branch taken
290 slgr %r2,%r2 // clear borrow
291
292.Loop1_sub:
293 lg %r0,0(%r2,%r3)
294 slbg %r0,0(%r2,%r4)
295 stg %r0,0(%r2,%r1)
296
297 la %r2,8(%r2) // i++
298 brct %r6,.Loop1_sub
299 j .Lexit_sub
300
301.Loop4_sub:
302 lg %r0,0(%r2,%r3)
303 slbg %r0,0(%r2,%r4)
304 stg %r0,0(%r2,%r1)
305 lg %r0,8(%r2,%r3)
306 slbg %r0,8(%r2,%r4)
307 stg %r0,8(%r2,%r1)
308 lg %r0,16(%r2,%r3)
309 slbg %r0,16(%r2,%r4)
310 stg %r0,16(%r2,%r1)
311 lg %r0,24(%r2,%r3)
312 slbg %r0,24(%r2,%r4)
313 stg %r0,24(%r2,%r1)
314
315 la %r2,32(%r2) // i+=4
316 brct %r5,.Loop4_sub
317
318 la %r6,1(%r6) // see if len%4 is zero ...
319 brct %r6,.Loop1_sub // without touching condition code:-)
320
321.Lexit_sub:
322 lghi %r2,0
323 slbgr %r2,%r2
324 lcgr %r2,%r2
325 lg %r6,48(%r15)
326 br %r14
327.size bn_sub_words,.-bn_sub_words
328
329#define c1 %r1
330#define c2 %r5
331#define c3 %r8
332
333#define mul_add_c(ai,bi,c1,c2,c3) \
334 lg %r7,ai*8(%r3); \
335 mlg %r6,bi*8(%r4); \
336 algr c1,%r7; \
337 alcgr c2,%r6; \
338 alcgr c3,zero
339
340// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
341.globl bn_mul_comba8
342.type bn_mul_comba8,@function
343.align 4
344bn_mul_comba8:
345 stmg %r6,%r8,48(%r15)
346
347 lghi c1,0
348 lghi c2,0
349 lghi c3,0
350 lghi zero,0
351
352 mul_add_c(0,0,c1,c2,c3);
353 stg c1,0*8(%r2)
354 lghi c1,0
355
356 mul_add_c(0,1,c2,c3,c1);
357 mul_add_c(1,0,c2,c3,c1);
358 stg c2,1*8(%r2)
359 lghi c2,0
360
361 mul_add_c(2,0,c3,c1,c2);
362 mul_add_c(1,1,c3,c1,c2);
363 mul_add_c(0,2,c3,c1,c2);
364 stg c3,2*8(%r2)
365 lghi c3,0
366
367 mul_add_c(0,3,c1,c2,c3);
368 mul_add_c(1,2,c1,c2,c3);
369 mul_add_c(2,1,c1,c2,c3);
370 mul_add_c(3,0,c1,c2,c3);
371 stg c1,3*8(%r2)
372 lghi c1,0
373
374 mul_add_c(4,0,c2,c3,c1);
375 mul_add_c(3,1,c2,c3,c1);
376 mul_add_c(2,2,c2,c3,c1);
377 mul_add_c(1,3,c2,c3,c1);
378 mul_add_c(0,4,c2,c3,c1);
379 stg c2,4*8(%r2)
380 lghi c2,0
381
382 mul_add_c(0,5,c3,c1,c2);
383 mul_add_c(1,4,c3,c1,c2);
384 mul_add_c(2,3,c3,c1,c2);
385 mul_add_c(3,2,c3,c1,c2);
386 mul_add_c(4,1,c3,c1,c2);
387 mul_add_c(5,0,c3,c1,c2);
388 stg c3,5*8(%r2)
389 lghi c3,0
390
391 mul_add_c(6,0,c1,c2,c3);
392 mul_add_c(5,1,c1,c2,c3);
393 mul_add_c(4,2,c1,c2,c3);
394 mul_add_c(3,3,c1,c2,c3);
395 mul_add_c(2,4,c1,c2,c3);
396 mul_add_c(1,5,c1,c2,c3);
397 mul_add_c(0,6,c1,c2,c3);
398 stg c1,6*8(%r2)
399 lghi c1,0
400
401 mul_add_c(0,7,c2,c3,c1);
402 mul_add_c(1,6,c2,c3,c1);
403 mul_add_c(2,5,c2,c3,c1);
404 mul_add_c(3,4,c2,c3,c1);
405 mul_add_c(4,3,c2,c3,c1);
406 mul_add_c(5,2,c2,c3,c1);
407 mul_add_c(6,1,c2,c3,c1);
408 mul_add_c(7,0,c2,c3,c1);
409 stg c2,7*8(%r2)
410 lghi c2,0
411
412 mul_add_c(7,1,c3,c1,c2);
413 mul_add_c(6,2,c3,c1,c2);
414 mul_add_c(5,3,c3,c1,c2);
415 mul_add_c(4,4,c3,c1,c2);
416 mul_add_c(3,5,c3,c1,c2);
417 mul_add_c(2,6,c3,c1,c2);
418 mul_add_c(1,7,c3,c1,c2);
419 stg c3,8*8(%r2)
420 lghi c3,0
421
422 mul_add_c(2,7,c1,c2,c3);
423 mul_add_c(3,6,c1,c2,c3);
424 mul_add_c(4,5,c1,c2,c3);
425 mul_add_c(5,4,c1,c2,c3);
426 mul_add_c(6,3,c1,c2,c3);
427 mul_add_c(7,2,c1,c2,c3);
428 stg c1,9*8(%r2)
429 lghi c1,0
430
431 mul_add_c(7,3,c2,c3,c1);
432 mul_add_c(6,4,c2,c3,c1);
433 mul_add_c(5,5,c2,c3,c1);
434 mul_add_c(4,6,c2,c3,c1);
435 mul_add_c(3,7,c2,c3,c1);
436 stg c2,10*8(%r2)
437 lghi c2,0
438
439 mul_add_c(4,7,c3,c1,c2);
440 mul_add_c(5,6,c3,c1,c2);
441 mul_add_c(6,5,c3,c1,c2);
442 mul_add_c(7,4,c3,c1,c2);
443 stg c3,11*8(%r2)
444 lghi c3,0
445
446 mul_add_c(7,5,c1,c2,c3);
447 mul_add_c(6,6,c1,c2,c3);
448 mul_add_c(5,7,c1,c2,c3);
449 stg c1,12*8(%r2)
450 lghi c1,0
451
452
453 mul_add_c(6,7,c2,c3,c1);
454 mul_add_c(7,6,c2,c3,c1);
455 stg c2,13*8(%r2)
456 lghi c2,0
457
458 mul_add_c(7,7,c3,c1,c2);
459 stg c3,14*8(%r2)
460 stg c1,15*8(%r2)
461
462 lmg %r6,%r8,48(%r15)
463 br %r14
464.size bn_mul_comba8,.-bn_mul_comba8
465
466// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
467.globl bn_mul_comba4
468.type bn_mul_comba4,@function
469.align 4
470bn_mul_comba4:
471 stmg %r6,%r8,48(%r15)
472
473 lghi c1,0
474 lghi c2,0
475 lghi c3,0
476 lghi zero,0
477
478 mul_add_c(0,0,c1,c2,c3);
479 stg c1,0*8(%r3)
480 lghi c1,0
481
482 mul_add_c(0,1,c2,c3,c1);
483 mul_add_c(1,0,c2,c3,c1);
484 stg c2,1*8(%r2)
485 lghi c2,0
486
487 mul_add_c(2,0,c3,c1,c2);
488 mul_add_c(1,1,c3,c1,c2);
489 mul_add_c(0,2,c3,c1,c2);
490 stg c3,2*8(%r2)
491 lghi c3,0
492
493 mul_add_c(0,3,c1,c2,c3);
494 mul_add_c(1,2,c1,c2,c3);
495 mul_add_c(2,1,c1,c2,c3);
496 mul_add_c(3,0,c1,c2,c3);
497 stg c1,3*8(%r2)
498 lghi c1,0
499
500 mul_add_c(3,1,c2,c3,c1);
501 mul_add_c(2,2,c2,c3,c1);
502 mul_add_c(1,3,c2,c3,c1);
503 stg c2,4*8(%r2)
504 lghi c2,0
505
506 mul_add_c(2,3,c3,c1,c2);
507 mul_add_c(3,2,c3,c1,c2);
508 stg c3,5*8(%r2)
509 lghi c3,0
510
511 mul_add_c(3,3,c1,c2,c3);
512 stg c1,6*8(%r2)
513 stg c2,7*8(%r2)
514
515 stmg %r6,%r8,48(%r15)
516 br %r14
517.size bn_mul_comba4,.-bn_mul_comba4
518
519#define sqr_add_c(ai,c1,c2,c3) \
520 lg %r7,ai*8(%r3); \
521 mlgr %r6,%r7; \
522 algr c1,%r7; \
523 alcgr c2,%r6; \
524 alcgr c3,zero
525
526#define sqr_add_c2(ai,aj,c1,c2,c3) \
527 lg %r7,ai*8(%r3); \
528 mlg %r6,aj*8(%r3); \
529 algr c1,%r7; \
530 alcgr c2,%r6; \
531 alcgr c3,zero; \
532 algr c1,%r7; \
533 alcgr c2,%r6; \
534 alcgr c3,zero
535
536// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
537.globl bn_sqr_comba8
538.type bn_sqr_comba8,@function
539.align 4
540bn_sqr_comba8:
541 stmg %r6,%r8,48(%r15)
542
543 lghi c1,0
544 lghi c2,0
545 lghi c3,0
546 lghi zero,0
547
548 sqr_add_c(0,c1,c2,c3);
549 stg c1,0*8(%r2)
550 lghi c1,0
551
552 sqr_add_c2(1,0,c2,c3,c1);
553 stg c2,1*8(%r2)
554 lghi c2,0
555
556 sqr_add_c(1,c3,c1,c2);
557 sqr_add_c2(2,0,c3,c1,c2);
558 stg c3,2*8(%r2)
559 lghi c3,0
560
561 sqr_add_c2(3,0,c1,c2,c3);
562 sqr_add_c2(2,1,c1,c2,c3);
563 stg c1,3*8(%r2)
564 lghi c1,0
565
566 sqr_add_c(2,c2,c3,c1);
567 sqr_add_c2(3,1,c2,c3,c1);
568 sqr_add_c2(4,0,c2,c3,c1);
569 stg c2,4*8(%r2)
570 lghi c2,0
571
572 sqr_add_c2(5,0,c3,c1,c2);
573 sqr_add_c2(4,1,c3,c1,c2);
574 sqr_add_c2(3,2,c3,c1,c2);
575 stg c3,5*8(%r2)
576 lghi c3,0
577
578 sqr_add_c(3,c1,c2,c3);
579 sqr_add_c2(4,2,c1,c2,c3);
580 sqr_add_c2(5,1,c1,c2,c3);
581 sqr_add_c2(6,0,c1,c2,c3);
582 stg c1,6*8(%r2)
583 lghi c1,0
584
585 sqr_add_c2(7,0,c2,c3,c1);
586 sqr_add_c2(6,1,c2,c3,c1);
587 sqr_add_c2(5,2,c2,c3,c1);
588 sqr_add_c2(4,3,c2,c3,c1);
589 stg c2,7*8(%r2)
590 lghi c2,0
591
592 sqr_add_c(4,c3,c1,c2);
593 sqr_add_c2(5,3,c3,c1,c2);
594 sqr_add_c2(6,2,c3,c1,c2);
595 sqr_add_c2(7,1,c3,c1,c2);
596 stg c3,8*8(%r2)
597 lghi c3,0
598
599 sqr_add_c2(7,2,c1,c2,c3);
600 sqr_add_c2(6,3,c1,c2,c3);
601 sqr_add_c2(5,4,c1,c2,c3);
602 stg c1,9*8(%r2)
603 lghi c1,0
604
605 sqr_add_c(5,c2,c3,c1);
606 sqr_add_c2(6,4,c2,c3,c1);
607 sqr_add_c2(7,3,c2,c3,c1);
608 stg c2,10*8(%r2)
609 lghi c2,0
610
611 sqr_add_c2(7,4,c3,c1,c2);
612 sqr_add_c2(6,5,c3,c1,c2);
613 stg c3,11*8(%r2)
614 lghi c3,0
615
616 sqr_add_c(6,c1,c2,c3);
617 sqr_add_c2(7,5,c1,c2,c3);
618 stg c1,12*8(%r2)
619 lghi c1,0
620
621 sqr_add_c2(7,6,c2,c3,c1);
622 stg c2,13*8(%r2)
623 lghi c2,0
624
625 sqr_add_c(7,c3,c1,c2);
626 stg c3,14*8(%r2)
627 stg c1,15*8(%r2)
628
629 lmg %r6,%r8,48(%r15)
630 br %r14
631.size bn_sqr_comba8,.-bn_sqr_comba8
632
633// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
634.globl bn_sqr_comba4
635.type bn_sqr_comba4,@function
636.align 4
637bn_sqr_comba4:
638 stmg %r6,%r8,48(%r15)
639
640 lghi c1,0
641 lghi c2,0
642 lghi c3,0
643 lghi zero,0
644
645 sqr_add_c(0,c1,c2,c3);
646 stg c1,0*8(%r2)
647 lghi c1,0
648
649 sqr_add_c2(1,0,c2,c3,c1);
650 stg c2,1*8(%r2)
651 lghi c2,0
652
653 sqr_add_c(1,c3,c1,c2);
654 sqr_add_c2(2,0,c3,c1,c2);
655 stg c3,2*8(%r2)
656 lghi c3,0
657
658 sqr_add_c2(3,0,c1,c2,c3);
659 sqr_add_c2(2,1,c1,c2,c3);
660 stg c1,3*8(%r2)
661 lghi c1,0
662
663 sqr_add_c(2,c2,c3,c1);
664 sqr_add_c2(3,1,c2,c3,c1);
665 stg c2,4*8(%r2)
666 lghi c2,0
667
668 sqr_add_c2(3,2,c3,c1,c2);
669 stg c3,5*8(%r2)
670 lghi c3,0
671
672 sqr_add_c(3,c1,c2,c3);
673 stg c1,6*8(%r2)
674 stg c2,7*8(%r2)
675
676 lmg %r6,%r8,48(%r15)
677 br %r14
678.size bn_sqr_comba4,.-bn_sqr_comba4
diff --git a/src/lib/libssl/src/crypto/bn/asm/sparcv9-mont.pl b/src/lib/libssl/src/crypto/bn/asm/sparcv9-mont.pl
new file mode 100644
index 0000000000..b8fb1e8a25
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/sparcv9-mont.pl
@@ -0,0 +1,606 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2005
11#
12# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
13# for undertaken effort are multiple. First of all, UltraSPARC is not
14# the whole SPARCv9 universe and other VIS-free implementations deserve
15# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
17# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18# several integrated RSA/DSA accelerator circuits accessible through
19# kernel driver [only(*)], but having decent user-land software
20# implementation is important too. Finally, reasons like desire to
21# experiment with dedicated squaring procedure. Yes, this module
22# implements one, because it was easiest to draft it in SPARCv9
23# instructions...
24
25# (*) Engine accessing the driver in question is on my TODO list.
26# For reference, acceleator is estimated to give 6 to 10 times
27# improvement on single-threaded RSA sign. It should be noted
28# that 6-10x improvement coefficient does not actually mean
29# something extraordinary in terms of absolute [single-threaded]
30# performance, as SPARCv9 instruction set is by all means least
31# suitable for high performance crypto among other 64 bit
32# platforms. 6-10x factor simply places T1 in same performance
33# domain as say AMD64 and IA-64. Improvement of RSA verify don't
34# appear impressive at all, but it's the sign operation which is
35# far more critical/interesting.
36
37# You might notice that inner loops are modulo-scheduled:-) This has
38# essentially negligible impact on UltraSPARC performance, it's
39# Fujitsu SPARC64 V users who should notice and hopefully appreciate
40# the advantage... Currently this module surpasses sparcv9a-mont.pl
41# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
42# module still have hidden potential [see TODO list there], which is
43# estimated to be larger than 20%...
44
45# int bn_mul_mont(
46$rp="%i0"; # BN_ULONG *rp,
47$ap="%i1"; # const BN_ULONG *ap,
48$bp="%i2"; # const BN_ULONG *bp,
49$np="%i3"; # const BN_ULONG *np,
50$n0="%i4"; # const BN_ULONG *n0,
51$num="%i5"; # int num);
52
53$bits=32;
54for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
55if ($bits==64) { $bias=2047; $frame=192; }
56else { $bias=0; $frame=128; }
57
58$car0="%o0";
59$car1="%o1";
60$car2="%o2"; # 1 bit
61$acc0="%o3";
62$acc1="%o4";
63$mask="%g1"; # 32 bits, what a waste...
64$tmp0="%g4";
65$tmp1="%g5";
66
67$i="%l0";
68$j="%l1";
69$mul0="%l2";
70$mul1="%l3";
71$tp="%l4";
72$apj="%l5";
73$npj="%l6";
74$tpj="%l7";
75
76$fname="bn_mul_mont_int";
77
78$code=<<___;
79.section ".text",#alloc,#execinstr
80
81.global $fname
82.align 32
83$fname:
84 cmp %o5,4 ! 128 bits minimum
85 bge,pt %icc,.Lenter
86 sethi %hi(0xffffffff),$mask
87 retl
88 clr %o0
89.align 32
90.Lenter:
91 save %sp,-$frame,%sp
92 sll $num,2,$num ! num*=4
93 or $mask,%lo(0xffffffff),$mask
94 ld [$n0],$n0
95 cmp $ap,$bp
96 and $num,$mask,$num
97 ld [$bp],$mul0 ! bp[0]
98 nop
99
100 add %sp,$bias,%o7 ! real top of stack
101 ld [$ap],$car0 ! ap[0] ! redundant in squaring context
102 sub %o7,$num,%o7
103 ld [$ap+4],$apj ! ap[1]
104 and %o7,-1024,%o7
105 ld [$np],$car1 ! np[0]
106 sub %o7,$bias,%sp ! alloca
107 ld [$np+4],$npj ! np[1]
108 be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
109 mov 12,$j
110
111 mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
112 mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
113 and $car0,$mask,$acc0
114 add %sp,$bias+$frame,$tp
115 ld [$ap+8],$apj !prologue!
116
117 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
118 and $mul1,$mask,$mul1
119
120 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
121 mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
122 srlx $car0,32,$car0
123 add $acc0,$car1,$car1
124 ld [$np+8],$npj !prologue!
125 srlx $car1,32,$car1
126 mov $tmp0,$acc0 !prologue!
127
128.L1st:
129 mulx $apj,$mul0,$tmp0
130 mulx $npj,$mul1,$tmp1
131 add $acc0,$car0,$car0
132 ld [$ap+$j],$apj ! ap[j]
133 and $car0,$mask,$acc0
134 add $acc1,$car1,$car1
135 ld [$np+$j],$npj ! np[j]
136 srlx $car0,32,$car0
137 add $acc0,$car1,$car1
138 add $j,4,$j ! j++
139 mov $tmp0,$acc0
140 st $car1,[$tp]
141 cmp $j,$num
142 mov $tmp1,$acc1
143 srlx $car1,32,$car1
144 bl %icc,.L1st
145 add $tp,4,$tp ! tp++
146!.L1st
147
148 mulx $apj,$mul0,$tmp0 !epilogue!
149 mulx $npj,$mul1,$tmp1
150 add $acc0,$car0,$car0
151 and $car0,$mask,$acc0
152 add $acc1,$car1,$car1
153 srlx $car0,32,$car0
154 add $acc0,$car1,$car1
155 st $car1,[$tp]
156 srlx $car1,32,$car1
157
158 add $tmp0,$car0,$car0
159 and $car0,$mask,$acc0
160 add $tmp1,$car1,$car1
161 srlx $car0,32,$car0
162 add $acc0,$car1,$car1
163 st $car1,[$tp+4]
164 srlx $car1,32,$car1
165
166 add $car0,$car1,$car1
167 st $car1,[$tp+8]
168 srlx $car1,32,$car2
169
170 mov 4,$i ! i++
171 ld [$bp+4],$mul0 ! bp[1]
172.Louter:
173 add %sp,$bias+$frame,$tp
174 ld [$ap],$car0 ! ap[0]
175 ld [$ap+4],$apj ! ap[1]
176 ld [$np],$car1 ! np[0]
177 ld [$np+4],$npj ! np[1]
178 ld [$tp],$tmp1 ! tp[0]
179 ld [$tp+4],$tpj ! tp[1]
180 mov 12,$j
181
182 mulx $car0,$mul0,$car0
183 mulx $apj,$mul0,$tmp0 !prologue!
184 add $tmp1,$car0,$car0
185 ld [$ap+8],$apj !prologue!
186 and $car0,$mask,$acc0
187
188 mulx $n0,$acc0,$mul1
189 and $mul1,$mask,$mul1
190
191 mulx $car1,$mul1,$car1
192 mulx $npj,$mul1,$acc1 !prologue!
193 srlx $car0,32,$car0
194 add $acc0,$car1,$car1
195 ld [$np+8],$npj !prologue!
196 srlx $car1,32,$car1
197 mov $tmp0,$acc0 !prologue!
198
199.Linner:
200 mulx $apj,$mul0,$tmp0
201 mulx $npj,$mul1,$tmp1
202 add $tpj,$car0,$car0
203 ld [$ap+$j],$apj ! ap[j]
204 add $acc0,$car0,$car0
205 add $acc1,$car1,$car1
206 ld [$np+$j],$npj ! np[j]
207 and $car0,$mask,$acc0
208 ld [$tp+8],$tpj ! tp[j]
209 srlx $car0,32,$car0
210 add $acc0,$car1,$car1
211 add $j,4,$j ! j++
212 mov $tmp0,$acc0
213 st $car1,[$tp] ! tp[j-1]
214 srlx $car1,32,$car1
215 mov $tmp1,$acc1
216 cmp $j,$num
217 bl %icc,.Linner
218 add $tp,4,$tp ! tp++
219!.Linner
220
221 mulx $apj,$mul0,$tmp0 !epilogue!
222 mulx $npj,$mul1,$tmp1
223 add $tpj,$car0,$car0
224 add $acc0,$car0,$car0
225 ld [$tp+8],$tpj ! tp[j]
226 and $car0,$mask,$acc0
227 add $acc1,$car1,$car1
228 srlx $car0,32,$car0
229 add $acc0,$car1,$car1
230 st $car1,[$tp] ! tp[j-1]
231 srlx $car1,32,$car1
232
233 add $tpj,$car0,$car0
234 add $tmp0,$car0,$car0
235 and $car0,$mask,$acc0
236 add $tmp1,$car1,$car1
237 add $acc0,$car1,$car1
238 st $car1,[$tp+4] ! tp[j-1]
239 srlx $car0,32,$car0
240 add $i,4,$i ! i++
241 srlx $car1,32,$car1
242
243 add $car0,$car1,$car1
244 cmp $i,$num
245 add $car2,$car1,$car1
246 st $car1,[$tp+8]
247
248 srlx $car1,32,$car2
249 bl,a %icc,.Louter
250 ld [$bp+$i],$mul0 ! bp[i]
251!.Louter
252
253 add $tp,12,$tp
254
255.Ltail:
256 add $np,$num,$np
257 add $rp,$num,$rp
258 mov $tp,$ap
259 sub %g0,$num,%o7 ! k=-num
260 ba .Lsub
261 subcc %g0,%g0,%g0 ! clear %icc.c
262.align 16
263.Lsub:
264 ld [$tp+%o7],%o0
265 ld [$np+%o7],%o1
266 subccc %o0,%o1,%o1 ! tp[j]-np[j]
267 add $rp,%o7,$i
268 add %o7,4,%o7
269 brnz %o7,.Lsub
270 st %o1,[$i]
271 subc $car2,0,$car2 ! handle upmost overflow bit
272 and $tp,$car2,$ap
273 andn $rp,$car2,$np
274 or $ap,$np,$ap
275 sub %g0,$num,%o7
276
277.Lcopy:
278 ld [$ap+%o7],%o0 ! copy or in-place refresh
279 st %g0,[$tp+%o7] ! zap tp
280 st %o0,[$rp+%o7]
281 add %o7,4,%o7
282 brnz %o7,.Lcopy
283 nop
284 mov 1,%i0
285 ret
286 restore
287___
288
289########
290######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
291######## code without following dedicated squaring procedure.
292########
293$sbit="%i2"; # re-use $bp!
294
295$code.=<<___;
296.align 32
297.Lbn_sqr_mont:
298 mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
299 mulx $apj,$mul0,$tmp0 !prologue!
300 and $car0,$mask,$acc0
301 add %sp,$bias+$frame,$tp
302 ld [$ap+8],$apj !prologue!
303
304 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
305 srlx $car0,32,$car0
306 and $mul1,$mask,$mul1
307
308 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
309 mulx $npj,$mul1,$acc1 !prologue!
310 and $car0,1,$sbit
311 ld [$np+8],$npj !prologue!
312 srlx $car0,1,$car0
313 add $acc0,$car1,$car1
314 srlx $car1,32,$car1
315 mov $tmp0,$acc0 !prologue!
316
317.Lsqr_1st:
318 mulx $apj,$mul0,$tmp0
319 mulx $npj,$mul1,$tmp1
320 add $acc0,$car0,$car0 ! ap[j]*a0+c0
321 add $acc1,$car1,$car1
322 ld [$ap+$j],$apj ! ap[j]
323 and $car0,$mask,$acc0
324 ld [$np+$j],$npj ! np[j]
325 srlx $car0,32,$car0
326 add $acc0,$acc0,$acc0
327 or $sbit,$acc0,$acc0
328 mov $tmp1,$acc1
329 srlx $acc0,32,$sbit
330 add $j,4,$j ! j++
331 and $acc0,$mask,$acc0
332 cmp $j,$num
333 add $acc0,$car1,$car1
334 st $car1,[$tp]
335 mov $tmp0,$acc0
336 srlx $car1,32,$car1
337 bl %icc,.Lsqr_1st
338 add $tp,4,$tp ! tp++
339!.Lsqr_1st
340
341 mulx $apj,$mul0,$tmp0 ! epilogue
342 mulx $npj,$mul1,$tmp1
343 add $acc0,$car0,$car0 ! ap[j]*a0+c0
344 add $acc1,$car1,$car1
345 and $car0,$mask,$acc0
346 srlx $car0,32,$car0
347 add $acc0,$acc0,$acc0
348 or $sbit,$acc0,$acc0
349 srlx $acc0,32,$sbit
350 and $acc0,$mask,$acc0
351 add $acc0,$car1,$car1
352 st $car1,[$tp]
353 srlx $car1,32,$car1
354
355 add $tmp0,$car0,$car0 ! ap[j]*a0+c0
356 add $tmp1,$car1,$car1
357 and $car0,$mask,$acc0
358 srlx $car0,32,$car0
359 add $acc0,$acc0,$acc0
360 or $sbit,$acc0,$acc0
361 srlx $acc0,32,$sbit
362 and $acc0,$mask,$acc0
363 add $acc0,$car1,$car1
364 st $car1,[$tp+4]
365 srlx $car1,32,$car1
366
367 add $car0,$car0,$car0
368 or $sbit,$car0,$car0
369 add $car0,$car1,$car1
370 st $car1,[$tp+8]
371 srlx $car1,32,$car2
372
373 ld [%sp+$bias+$frame],$tmp0 ! tp[0]
374 ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
375 ld [%sp+$bias+$frame+8],$tpj ! tp[2]
376 ld [$ap+4],$mul0 ! ap[1]
377 ld [$ap+8],$apj ! ap[2]
378 ld [$np],$car1 ! np[0]
379 ld [$np+4],$npj ! np[1]
380 mulx $n0,$tmp0,$mul1
381
382 mulx $mul0,$mul0,$car0
383 and $mul1,$mask,$mul1
384
385 mulx $car1,$mul1,$car1
386 mulx $npj,$mul1,$acc1
387 add $tmp0,$car1,$car1
388 and $car0,$mask,$acc0
389 ld [$np+8],$npj ! np[2]
390 srlx $car1,32,$car1
391 add $tmp1,$car1,$car1
392 srlx $car0,32,$car0
393 add $acc0,$car1,$car1
394 and $car0,1,$sbit
395 add $acc1,$car1,$car1
396 srlx $car0,1,$car0
397 mov 12,$j
398 st $car1,[%sp+$bias+$frame] ! tp[0]=
399 srlx $car1,32,$car1
400 add %sp,$bias+$frame+4,$tp
401
402.Lsqr_2nd:
403 mulx $apj,$mul0,$acc0
404 mulx $npj,$mul1,$acc1
405 add $acc0,$car0,$car0
406 add $tpj,$car1,$car1
407 ld [$ap+$j],$apj ! ap[j]
408 and $car0,$mask,$acc0
409 ld [$np+$j],$npj ! np[j]
410 srlx $car0,32,$car0
411 add $acc1,$car1,$car1
412 ld [$tp+8],$tpj ! tp[j]
413 add $acc0,$acc0,$acc0
414 add $j,4,$j ! j++
415 or $sbit,$acc0,$acc0
416 srlx $acc0,32,$sbit
417 and $acc0,$mask,$acc0
418 cmp $j,$num
419 add $acc0,$car1,$car1
420 st $car1,[$tp] ! tp[j-1]
421 srlx $car1,32,$car1
422 bl %icc,.Lsqr_2nd
423 add $tp,4,$tp ! tp++
424!.Lsqr_2nd
425
426 mulx $apj,$mul0,$acc0
427 mulx $npj,$mul1,$acc1
428 add $acc0,$car0,$car0
429 add $tpj,$car1,$car1
430 and $car0,$mask,$acc0
431 srlx $car0,32,$car0
432 add $acc1,$car1,$car1
433 add $acc0,$acc0,$acc0
434 or $sbit,$acc0,$acc0
435 srlx $acc0,32,$sbit
436 and $acc0,$mask,$acc0
437 add $acc0,$car1,$car1
438 st $car1,[$tp] ! tp[j-1]
439 srlx $car1,32,$car1
440
441 add $car0,$car0,$car0
442 or $sbit,$car0,$car0
443 add $car0,$car1,$car1
444 add $car2,$car1,$car1
445 st $car1,[$tp+4]
446 srlx $car1,32,$car2
447
448 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
449 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
450 ld [$ap+8],$mul0 ! ap[2]
451 ld [$np],$car1 ! np[0]
452 ld [$np+4],$npj ! np[1]
453 mulx $n0,$tmp1,$mul1
454 and $mul1,$mask,$mul1
455 mov 8,$i
456
457 mulx $mul0,$mul0,$car0
458 mulx $car1,$mul1,$car1
459 and $car0,$mask,$acc0
460 add $tmp1,$car1,$car1
461 srlx $car0,32,$car0
462 add %sp,$bias+$frame,$tp
463 srlx $car1,32,$car1
464 and $car0,1,$sbit
465 srlx $car0,1,$car0
466 mov 4,$j
467
468.Lsqr_outer:
469.Lsqr_inner1:
470 mulx $npj,$mul1,$acc1
471 add $tpj,$car1,$car1
472 add $j,4,$j
473 ld [$tp+8],$tpj
474 cmp $j,$i
475 add $acc1,$car1,$car1
476 ld [$np+$j],$npj
477 st $car1,[$tp]
478 srlx $car1,32,$car1
479 bl %icc,.Lsqr_inner1
480 add $tp,4,$tp
481!.Lsqr_inner1
482
483 add $j,4,$j
484 ld [$ap+$j],$apj ! ap[j]
485 mulx $npj,$mul1,$acc1
486 add $tpj,$car1,$car1
487 ld [$np+$j],$npj ! np[j]
488 add $acc0,$car1,$car1
489 ld [$tp+8],$tpj ! tp[j]
490 add $acc1,$car1,$car1
491 st $car1,[$tp]
492 srlx $car1,32,$car1
493
494 add $j,4,$j
495 cmp $j,$num
496 be,pn %icc,.Lsqr_no_inner2
497 add $tp,4,$tp
498
499.Lsqr_inner2:
500 mulx $apj,$mul0,$acc0
501 mulx $npj,$mul1,$acc1
502 add $tpj,$car1,$car1
503 add $acc0,$car0,$car0
504 ld [$ap+$j],$apj ! ap[j]
505 and $car0,$mask,$acc0
506 ld [$np+$j],$npj ! np[j]
507 srlx $car0,32,$car0
508 add $acc0,$acc0,$acc0
509 ld [$tp+8],$tpj ! tp[j]
510 or $sbit,$acc0,$acc0
511 add $j,4,$j ! j++
512 srlx $acc0,32,$sbit
513 and $acc0,$mask,$acc0
514 cmp $j,$num
515 add $acc0,$car1,$car1
516 add $acc1,$car1,$car1
517 st $car1,[$tp] ! tp[j-1]
518 srlx $car1,32,$car1
519 bl %icc,.Lsqr_inner2
520 add $tp,4,$tp ! tp++
521
522.Lsqr_no_inner2:
523 mulx $apj,$mul0,$acc0
524 mulx $npj,$mul1,$acc1
525 add $tpj,$car1,$car1
526 add $acc0,$car0,$car0
527 and $car0,$mask,$acc0
528 srlx $car0,32,$car0
529 add $acc0,$acc0,$acc0
530 or $sbit,$acc0,$acc0
531 srlx $acc0,32,$sbit
532 and $acc0,$mask,$acc0
533 add $acc0,$car1,$car1
534 add $acc1,$car1,$car1
535 st $car1,[$tp] ! tp[j-1]
536 srlx $car1,32,$car1
537
538 add $car0,$car0,$car0
539 or $sbit,$car0,$car0
540 add $car0,$car1,$car1
541 add $car2,$car1,$car1
542 st $car1,[$tp+4]
543 srlx $car1,32,$car2
544
545 add $i,4,$i ! i++
546 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
547 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
548 ld [$ap+$i],$mul0 ! ap[j]
549 ld [$np],$car1 ! np[0]
550 ld [$np+4],$npj ! np[1]
551 mulx $n0,$tmp1,$mul1
552 and $mul1,$mask,$mul1
553 add $i,4,$tmp0
554
555 mulx $mul0,$mul0,$car0
556 mulx $car1,$mul1,$car1
557 and $car0,$mask,$acc0
558 add $tmp1,$car1,$car1
559 srlx $car0,32,$car0
560 add %sp,$bias+$frame,$tp
561 srlx $car1,32,$car1
562 and $car0,1,$sbit
563 srlx $car0,1,$car0
564
565 cmp $tmp0,$num ! i<num-1
566 bl %icc,.Lsqr_outer
567 mov 4,$j
568
569.Lsqr_last:
570 mulx $npj,$mul1,$acc1
571 add $tpj,$car1,$car1
572 add $j,4,$j
573 ld [$tp+8],$tpj
574 cmp $j,$i
575 add $acc1,$car1,$car1
576 ld [$np+$j],$npj
577 st $car1,[$tp]
578 srlx $car1,32,$car1
579 bl %icc,.Lsqr_last
580 add $tp,4,$tp
581!.Lsqr_last
582
583 mulx $npj,$mul1,$acc1
584 add $tpj,$car1,$car1
585 add $acc0,$car1,$car1
586 add $acc1,$car1,$car1
587 st $car1,[$tp]
588 srlx $car1,32,$car1
589
590 add $car0,$car0,$car0 ! recover $car0
591 or $sbit,$car0,$car0
592 add $car0,$car1,$car1
593 add $car2,$car1,$car1
594 st $car1,[$tp+4]
595 srlx $car1,32,$car2
596
597 ba .Ltail
598 add $tp,8,$tp
599.type $fname,#function
600.size $fname,(.-$fname)
601.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
602.align 32
603___
604$code =~ s/\`([^\`]*)\`/eval($1)/gem;
605print $code;
606close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/sparcv9a-mont.pl b/src/lib/libssl/src/crypto/bn/asm/sparcv9a-mont.pl
new file mode 100755
index 0000000000..a14205f2f0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/sparcv9a-mont.pl
@@ -0,0 +1,882 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
13# Because unlike integer multiplier, which simply stalls whole CPU,
14# FPU is fully pipelined and can effectively emit 48 bit partial
15# product every cycle. Why not blended SPARC v9? One can argue that
16# making this module dependent on UltraSPARC VIS extension limits its
17# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
18# implementations from compatibility matrix. But the rest, whole Sun
19# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
20# VIS extension instructions used in this module. This is considered
21# good enough to not care about HAL SPARC64 users [if any] who have
22# integer-only pure SPARCv9 module to "fall down" to.
23
24# USI&II cores currently exhibit uniform 2x improvement [over pre-
25# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
26# performance improves few percents for shorter keys and worsens few
27# percents for longer keys. This is because USIII integer multiplier
28# is >3x faster than USI&II one, which is harder to match [but see
29# TODO list below]. It should also be noted that SPARC64 V features
30# out-of-order execution, which *might* mean that integer multiplier
31# is pipelined, which in turn *might* be impossible to match... On
32# additional note, SPARC64 V implements FP Multiply-Add instruction,
33# which is perfectly usable in this context... In other words, as far
34# as Fujitsu SPARC64 V goes, talk to the author:-)
35
36# The implementation implies following "non-natural" limitations on
37# input arguments:
38# - num may not be less than 4;
39# - num has to be even;
40# Failure to meet either condition has no fatal effects, simply
41# doesn't give any performance gain.
42
43# TODO:
44# - modulo-schedule inner loop for better performance (on in-order
45# execution core such as UltraSPARC this shall result in further
46# noticeable(!) improvement);
47# - dedicated squaring procedure[?];
48
49######################################################################
50# November 2006
51#
52# Modulo-scheduled inner loops allow to interleave floating point and
53# integer instructions and minimize Read-After-Write penalties. This
54# results in *further* 20-50% perfromance improvement [depending on
55# key length, more for longer keys] on USI&II cores and 30-80% - on
56# USIII&IV.
57
58$fname="bn_mul_mont_fpu";
59$bits=32;
60for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
61
62if ($bits==64) {
63 $bias=2047;
64 $frame=192;
65} else {
66 $bias=0;
67 $frame=128; # 96 rounded up to largest known cache-line
68}
69$locals=64;
70
71# In order to provide for 32-/64-bit ABI duality, I keep integers wider
72# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
73# exclusively for pointers, indexes and other small values...
74# int bn_mul_mont(
75$rp="%i0"; # BN_ULONG *rp,
76$ap="%i1"; # const BN_ULONG *ap,
77$bp="%i2"; # const BN_ULONG *bp,
78$np="%i3"; # const BN_ULONG *np,
79$n0="%i4"; # const BN_ULONG *n0,
80$num="%i5"; # int num);
81
82$tp="%l0"; # t[num]
83$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved
84$ap_h="%l2"; # to these four vectors as double-precision FP values.
85$np_l="%l3"; # This way a bunch of fxtods are eliminated in second
86$np_h="%l4"; # loop and L1-cache aliasing is minimized...
87$i="%l5";
88$j="%l6";
89$mask="%l7"; # 16-bit mask, 0xffff
90
91$n0="%g4"; # reassigned(!) to "64-bit" register
92$carry="%i4"; # %i4 reused(!) for a carry bit
93
94# FP register naming chart
95#
96# ..HILO
97# dcba
98# --------
99# LOa
100# LOb
101# LOc
102# LOd
103# HIa
104# HIb
105# HIc
106# HId
107# ..a
108# ..b
109$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6";
110$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14";
111$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19";
112$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23";
113
114$dota="%f24"; $dotb="%f26";
115
116$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
117$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
118$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
119$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
120
121$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
122
123$code=<<___;
124.section ".text",#alloc,#execinstr
125
126.global $fname
127.align 32
128$fname:
129 save %sp,-$frame-$locals,%sp
130
131 cmp $num,4
132 bl,a,pn %icc,.Lret
133 clr %i0
134 andcc $num,1,%g0 ! $num has to be even...
135 bnz,a,pn %icc,.Lret
136 clr %i0 ! signal "unsupported input value"
137
138 srl $num,1,$num
139 sethi %hi(0xffff),$mask
140 ld [%i4+0],$n0 ! $n0 reassigned, remember?
141 or $mask,%lo(0xffff),$mask
142 ld [%i4+4],%o0
143 sllx %o0,32,%o0
144 or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
145
146 sll $num,3,$num ! num*=8
147
148 add %sp,$bias,%o0 ! real top of stack
149 sll $num,2,%o1
150 add %o1,$num,%o1 ! %o1=num*5
151 sub %o0,%o1,%o0
152 and %o0,-2048,%o0 ! optimize TLB utilization
153 sub %o0,$bias,%sp ! alloca(5*num*8)
154
155 rd %asi,%o7 ! save %asi
156 add %sp,$bias+$frame+$locals,$tp
157 add $tp,$num,$ap_l
158 add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends !
159 add $ap_l,$num,$ap_h
160 add $ap_h,$num,$np_l
161 add $np_l,$num,$np_h
162
163 wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads
164
165 add $rp,$num,$rp ! readjust input pointers to point
166 add $ap,$num,$ap ! at the ends too...
167 add $bp,$num,$bp
168 add $np,$num,$np
169
170 stx %o7,[%sp+$bias+$frame+48] ! save %asi
171
172 sub %g0,$num,$i ! i=-num
173 sub %g0,$num,$j ! j=-num
174
175 add $ap,$j,%o3
176 add $bp,$i,%o4
177
178 ld [%o3+4],%g1 ! bp[0]
179 ld [%o3+0],%o0
180 ld [%o4+4],%g5 ! ap[0]
181 sllx %g1,32,%g1
182 ld [%o4+0],%o1
183 sllx %g5,32,%g5
184 or %g1,%o0,%o0
185 or %g5,%o1,%o1
186
187 add $np,$j,%o5
188
189 mulx %o1,%o0,%o0 ! ap[0]*bp[0]
190 mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
191 stx %o0,[%sp+$bias+$frame+0]
192
193 ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
194 fzeros $alo
195 ld [%o3+4],$ahi_
196 fzeros $ahi
197 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
198 fzeros $nlo
199 ld [%o5+4],$nhi_
200 fzeros $nhi
201
202 ! transfer b[i] to FPU as 4x16-bit values
203 ldda [%o4+2]%asi,$ba
204 fxtod $alo,$alo
205 ldda [%o4+0]%asi,$bb
206 fxtod $ahi,$ahi
207 ldda [%o4+6]%asi,$bc
208 fxtod $nlo,$nlo
209 ldda [%o4+4]%asi,$bd
210 fxtod $nhi,$nhi
211
212 ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
213 ldda [%sp+$bias+$frame+6]%asi,$na
214 fxtod $ba,$ba
215 ldda [%sp+$bias+$frame+4]%asi,$nb
216 fxtod $bb,$bb
217 ldda [%sp+$bias+$frame+2]%asi,$nc
218 fxtod $bc,$bc
219 ldda [%sp+$bias+$frame+0]%asi,$nd
220 fxtod $bd,$bd
221
222 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
223 fxtod $na,$na
224 std $ahi,[$ap_h+$j]
225 fxtod $nb,$nb
226 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
227 fxtod $nc,$nc
228 std $nhi,[$np_h+$j]
229 fxtod $nd,$nd
230
231 fmuld $alo,$ba,$aloa
232 fmuld $nlo,$na,$nloa
233 fmuld $alo,$bb,$alob
234 fmuld $nlo,$nb,$nlob
235 fmuld $alo,$bc,$aloc
236 faddd $aloa,$nloa,$nloa
237 fmuld $nlo,$nc,$nloc
238 fmuld $alo,$bd,$alod
239 faddd $alob,$nlob,$nlob
240 fmuld $nlo,$nd,$nlod
241 fmuld $ahi,$ba,$ahia
242 faddd $aloc,$nloc,$nloc
243 fmuld $nhi,$na,$nhia
244 fmuld $ahi,$bb,$ahib
245 faddd $alod,$nlod,$nlod
246 fmuld $nhi,$nb,$nhib
247 fmuld $ahi,$bc,$ahic
248 faddd $ahia,$nhia,$nhia
249 fmuld $nhi,$nc,$nhic
250 fmuld $ahi,$bd,$ahid
251 faddd $ahib,$nhib,$nhib
252 fmuld $nhi,$nd,$nhid
253
254 faddd $ahic,$nhic,$dota ! $nhic
255 faddd $ahid,$nhid,$dotb ! $nhid
256
257 faddd $nloc,$nhia,$nloc
258 faddd $nlod,$nhib,$nlod
259
260 fdtox $nloa,$nloa
261 fdtox $nlob,$nlob
262 fdtox $nloc,$nloc
263 fdtox $nlod,$nlod
264
265 std $nloa,[%sp+$bias+$frame+0]
266 add $j,8,$j
267 std $nlob,[%sp+$bias+$frame+8]
268 add $ap,$j,%o4
269 std $nloc,[%sp+$bias+$frame+16]
270 add $np,$j,%o5
271 std $nlod,[%sp+$bias+$frame+24]
272
273 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
274 fzeros $alo
275 ld [%o4+4],$ahi_
276 fzeros $ahi
277 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
278 fzeros $nlo
279 ld [%o5+4],$nhi_
280 fzeros $nhi
281
282 fxtod $alo,$alo
283 fxtod $ahi,$ahi
284 fxtod $nlo,$nlo
285 fxtod $nhi,$nhi
286
287 ldx [%sp+$bias+$frame+0],%o0
288 fmuld $alo,$ba,$aloa
289 ldx [%sp+$bias+$frame+8],%o1
290 fmuld $nlo,$na,$nloa
291 ldx [%sp+$bias+$frame+16],%o2
292 fmuld $alo,$bb,$alob
293 ldx [%sp+$bias+$frame+24],%o3
294 fmuld $nlo,$nb,$nlob
295
296 srlx %o0,16,%o7
297 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
298 fmuld $alo,$bc,$aloc
299 add %o7,%o1,%o1
300 std $ahi,[$ap_h+$j]
301 faddd $aloa,$nloa,$nloa
302 fmuld $nlo,$nc,$nloc
303 srlx %o1,16,%o7
304 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
305 fmuld $alo,$bd,$alod
306 add %o7,%o2,%o2
307 std $nhi,[$np_h+$j]
308 faddd $alob,$nlob,$nlob
309 fmuld $nlo,$nd,$nlod
310 srlx %o2,16,%o7
311 fmuld $ahi,$ba,$ahia
312 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
313 faddd $aloc,$nloc,$nloc
314 fmuld $nhi,$na,$nhia
315 !and %o0,$mask,%o0
316 !and %o1,$mask,%o1
317 !and %o2,$mask,%o2
318 !sllx %o1,16,%o1
319 !sllx %o2,32,%o2
320 !sllx %o3,48,%o7
321 !or %o1,%o0,%o0
322 !or %o2,%o0,%o0
323 !or %o7,%o0,%o0 ! 64-bit result
324 srlx %o3,16,%g1 ! 34-bit carry
325 fmuld $ahi,$bb,$ahib
326
327 faddd $alod,$nlod,$nlod
328 fmuld $nhi,$nb,$nhib
329 fmuld $ahi,$bc,$ahic
330 faddd $ahia,$nhia,$nhia
331 fmuld $nhi,$nc,$nhic
332 fmuld $ahi,$bd,$ahid
333 faddd $ahib,$nhib,$nhib
334 fmuld $nhi,$nd,$nhid
335
336 faddd $dota,$nloa,$nloa
337 faddd $dotb,$nlob,$nlob
338 faddd $ahic,$nhic,$dota ! $nhic
339 faddd $ahid,$nhid,$dotb ! $nhid
340
341 faddd $nloc,$nhia,$nloc
342 faddd $nlod,$nhib,$nlod
343
344 fdtox $nloa,$nloa
345 fdtox $nlob,$nlob
346 fdtox $nloc,$nloc
347 fdtox $nlod,$nlod
348
349 std $nloa,[%sp+$bias+$frame+0]
350 std $nlob,[%sp+$bias+$frame+8]
351 addcc $j,8,$j
352 std $nloc,[%sp+$bias+$frame+16]
353 bz,pn %icc,.L1stskip
354 std $nlod,[%sp+$bias+$frame+24]
355
356.align 32 ! incidentally already aligned !
357.L1st:
358 add $ap,$j,%o4
359 add $np,$j,%o5
360 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
361 fzeros $alo
362 ld [%o4+4],$ahi_
363 fzeros $ahi
364 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
365 fzeros $nlo
366 ld [%o5+4],$nhi_
367 fzeros $nhi
368
369 fxtod $alo,$alo
370 fxtod $ahi,$ahi
371 fxtod $nlo,$nlo
372 fxtod $nhi,$nhi
373
374 ldx [%sp+$bias+$frame+0],%o0
375 fmuld $alo,$ba,$aloa
376 ldx [%sp+$bias+$frame+8],%o1
377 fmuld $nlo,$na,$nloa
378 ldx [%sp+$bias+$frame+16],%o2
379 fmuld $alo,$bb,$alob
380 ldx [%sp+$bias+$frame+24],%o3
381 fmuld $nlo,$nb,$nlob
382
383 srlx %o0,16,%o7
384 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
385 fmuld $alo,$bc,$aloc
386 add %o7,%o1,%o1
387 std $ahi,[$ap_h+$j]
388 faddd $aloa,$nloa,$nloa
389 fmuld $nlo,$nc,$nloc
390 srlx %o1,16,%o7
391 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
392 fmuld $alo,$bd,$alod
393 add %o7,%o2,%o2
394 std $nhi,[$np_h+$j]
395 faddd $alob,$nlob,$nlob
396 fmuld $nlo,$nd,$nlod
397 srlx %o2,16,%o7
398 fmuld $ahi,$ba,$ahia
399 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
400 and %o0,$mask,%o0
401 faddd $aloc,$nloc,$nloc
402 fmuld $nhi,$na,$nhia
403 and %o1,$mask,%o1
404 and %o2,$mask,%o2
405 fmuld $ahi,$bb,$ahib
406 sllx %o1,16,%o1
407 faddd $alod,$nlod,$nlod
408 fmuld $nhi,$nb,$nhib
409 sllx %o2,32,%o2
410 fmuld $ahi,$bc,$ahic
411 sllx %o3,48,%o7
412 or %o1,%o0,%o0
413 faddd $ahia,$nhia,$nhia
414 fmuld $nhi,$nc,$nhic
415 or %o2,%o0,%o0
416 fmuld $ahi,$bd,$ahid
417 or %o7,%o0,%o0 ! 64-bit result
418 faddd $ahib,$nhib,$nhib
419 fmuld $nhi,$nd,$nhid
420 addcc %g1,%o0,%o0
421 faddd $dota,$nloa,$nloa
422 srlx %o3,16,%g1 ! 34-bit carry
423 faddd $dotb,$nlob,$nlob
424 bcs,a %xcc,.+8
425 add %g1,1,%g1
426
427 stx %o0,[$tp] ! tp[j-1]=
428
429 faddd $ahic,$nhic,$dota ! $nhic
430 faddd $ahid,$nhid,$dotb ! $nhid
431
432 faddd $nloc,$nhia,$nloc
433 faddd $nlod,$nhib,$nlod
434
435 fdtox $nloa,$nloa
436 fdtox $nlob,$nlob
437 fdtox $nloc,$nloc
438 fdtox $nlod,$nlod
439
440 std $nloa,[%sp+$bias+$frame+0]
441 std $nlob,[%sp+$bias+$frame+8]
442 std $nloc,[%sp+$bias+$frame+16]
443 std $nlod,[%sp+$bias+$frame+24]
444
445 addcc $j,8,$j
446 bnz,pt %icc,.L1st
447 add $tp,8,$tp
448
449.L1stskip:
450 fdtox $dota,$dota
451 fdtox $dotb,$dotb
452
453 ldx [%sp+$bias+$frame+0],%o0
454 ldx [%sp+$bias+$frame+8],%o1
455 ldx [%sp+$bias+$frame+16],%o2
456 ldx [%sp+$bias+$frame+24],%o3
457
458 srlx %o0,16,%o7
459 std $dota,[%sp+$bias+$frame+32]
460 add %o7,%o1,%o1
461 std $dotb,[%sp+$bias+$frame+40]
462 srlx %o1,16,%o7
463 add %o7,%o2,%o2
464 srlx %o2,16,%o7
465 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
466 and %o0,$mask,%o0
467 and %o1,$mask,%o1
468 and %o2,$mask,%o2
469 sllx %o1,16,%o1
470 sllx %o2,32,%o2
471 sllx %o3,48,%o7
472 or %o1,%o0,%o0
473 or %o2,%o0,%o0
474 or %o7,%o0,%o0 ! 64-bit result
475 ldx [%sp+$bias+$frame+32],%o4
476 addcc %g1,%o0,%o0
477 ldx [%sp+$bias+$frame+40],%o5
478 srlx %o3,16,%g1 ! 34-bit carry
479 bcs,a %xcc,.+8
480 add %g1,1,%g1
481
482 stx %o0,[$tp] ! tp[j-1]=
483 add $tp,8,$tp
484
485 srlx %o4,16,%o7
486 add %o7,%o5,%o5
487 and %o4,$mask,%o4
488 sllx %o5,16,%o7
489 or %o7,%o4,%o4
490 addcc %g1,%o4,%o4
491 srlx %o5,48,%g1
492 bcs,a %xcc,.+8
493 add %g1,1,%g1
494
495 mov %g1,$carry
496 stx %o4,[$tp] ! tp[num-1]=
497
498 ba .Louter
499 add $i,8,$i
500.align 32
501.Louter:
502 sub %g0,$num,$j ! j=-num
503 add %sp,$bias+$frame+$locals,$tp
504
505 add $ap,$j,%o3
506 add $bp,$i,%o4
507
508 ld [%o3+4],%g1 ! bp[i]
509 ld [%o3+0],%o0
510 ld [%o4+4],%g5 ! ap[0]
511 sllx %g1,32,%g1
512 ld [%o4+0],%o1
513 sllx %g5,32,%g5
514 or %g1,%o0,%o0
515 or %g5,%o1,%o1
516
517 ldx [$tp],%o2 ! tp[0]
518 mulx %o1,%o0,%o0
519 addcc %o2,%o0,%o0
520 mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
521 stx %o0,[%sp+$bias+$frame+0]
522
523 ! transfer b[i] to FPU as 4x16-bit values
524 ldda [%o4+2]%asi,$ba
525 ldda [%o4+0]%asi,$bb
526 ldda [%o4+6]%asi,$bc
527 ldda [%o4+4]%asi,$bd
528
529 ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
530 ldda [%sp+$bias+$frame+6]%asi,$na
531 fxtod $ba,$ba
532 ldda [%sp+$bias+$frame+4]%asi,$nb
533 fxtod $bb,$bb
534 ldda [%sp+$bias+$frame+2]%asi,$nc
535 fxtod $bc,$bc
536 ldda [%sp+$bias+$frame+0]%asi,$nd
537 fxtod $bd,$bd
538 ldd [$ap_l+$j],$alo ! load a[j] in double format
539 fxtod $na,$na
540 ldd [$ap_h+$j],$ahi
541 fxtod $nb,$nb
542 ldd [$np_l+$j],$nlo ! load n[j] in double format
543 fxtod $nc,$nc
544 ldd [$np_h+$j],$nhi
545 fxtod $nd,$nd
546
547 fmuld $alo,$ba,$aloa
548 fmuld $nlo,$na,$nloa
549 fmuld $alo,$bb,$alob
550 fmuld $nlo,$nb,$nlob
551 fmuld $alo,$bc,$aloc
552 faddd $aloa,$nloa,$nloa
553 fmuld $nlo,$nc,$nloc
554 fmuld $alo,$bd,$alod
555 faddd $alob,$nlob,$nlob
556 fmuld $nlo,$nd,$nlod
557 fmuld $ahi,$ba,$ahia
558 faddd $aloc,$nloc,$nloc
559 fmuld $nhi,$na,$nhia
560 fmuld $ahi,$bb,$ahib
561 faddd $alod,$nlod,$nlod
562 fmuld $nhi,$nb,$nhib
563 fmuld $ahi,$bc,$ahic
564 faddd $ahia,$nhia,$nhia
565 fmuld $nhi,$nc,$nhic
566 fmuld $ahi,$bd,$ahid
567 faddd $ahib,$nhib,$nhib
568 fmuld $nhi,$nd,$nhid
569
570 faddd $ahic,$nhic,$dota ! $nhic
571 faddd $ahid,$nhid,$dotb ! $nhid
572
573 faddd $nloc,$nhia,$nloc
574 faddd $nlod,$nhib,$nlod
575
576 fdtox $nloa,$nloa
577 fdtox $nlob,$nlob
578 fdtox $nloc,$nloc
579 fdtox $nlod,$nlod
580
581 std $nloa,[%sp+$bias+$frame+0]
582 std $nlob,[%sp+$bias+$frame+8]
583 std $nloc,[%sp+$bias+$frame+16]
584 add $j,8,$j
585 std $nlod,[%sp+$bias+$frame+24]
586
587 ldd [$ap_l+$j],$alo ! load a[j] in double format
588 ldd [$ap_h+$j],$ahi
589 ldd [$np_l+$j],$nlo ! load n[j] in double format
590 ldd [$np_h+$j],$nhi
591
592 fmuld $alo,$ba,$aloa
593 fmuld $nlo,$na,$nloa
594 fmuld $alo,$bb,$alob
595 fmuld $nlo,$nb,$nlob
596 fmuld $alo,$bc,$aloc
597 ldx [%sp+$bias+$frame+0],%o0
598 faddd $aloa,$nloa,$nloa
599 fmuld $nlo,$nc,$nloc
600 ldx [%sp+$bias+$frame+8],%o1
601 fmuld $alo,$bd,$alod
602 ldx [%sp+$bias+$frame+16],%o2
603 faddd $alob,$nlob,$nlob
604 fmuld $nlo,$nd,$nlod
605 ldx [%sp+$bias+$frame+24],%o3
606 fmuld $ahi,$ba,$ahia
607
608 srlx %o0,16,%o7
609 faddd $aloc,$nloc,$nloc
610 fmuld $nhi,$na,$nhia
611 add %o7,%o1,%o1
612 fmuld $ahi,$bb,$ahib
613 srlx %o1,16,%o7
614 faddd $alod,$nlod,$nlod
615 fmuld $nhi,$nb,$nhib
616 add %o7,%o2,%o2
617 fmuld $ahi,$bc,$ahic
618 srlx %o2,16,%o7
619 faddd $ahia,$nhia,$nhia
620 fmuld $nhi,$nc,$nhic
621 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
622 ! why?
623 and %o0,$mask,%o0
624 fmuld $ahi,$bd,$ahid
625 and %o1,$mask,%o1
626 and %o2,$mask,%o2
627 faddd $ahib,$nhib,$nhib
628 fmuld $nhi,$nd,$nhid
629 sllx %o1,16,%o1
630 faddd $dota,$nloa,$nloa
631 sllx %o2,32,%o2
632 faddd $dotb,$nlob,$nlob
633 sllx %o3,48,%o7
634 or %o1,%o0,%o0
635 faddd $ahic,$nhic,$dota ! $nhic
636 or %o2,%o0,%o0
637 faddd $ahid,$nhid,$dotb ! $nhid
638 or %o7,%o0,%o0 ! 64-bit result
639 ldx [$tp],%o7
640 faddd $nloc,$nhia,$nloc
641 addcc %o7,%o0,%o0
642 ! end-of-why?
643 faddd $nlod,$nhib,$nlod
644 srlx %o3,16,%g1 ! 34-bit carry
645 fdtox $nloa,$nloa
646 bcs,a %xcc,.+8
647 add %g1,1,%g1
648
649 fdtox $nlob,$nlob
650 fdtox $nloc,$nloc
651 fdtox $nlod,$nlod
652
653 std $nloa,[%sp+$bias+$frame+0]
654 std $nlob,[%sp+$bias+$frame+8]
655 addcc $j,8,$j
656 std $nloc,[%sp+$bias+$frame+16]
657 bz,pn %icc,.Linnerskip
658 std $nlod,[%sp+$bias+$frame+24]
659
660 ba .Linner
661 nop
662.align 32
663.Linner:
664 ldd [$ap_l+$j],$alo ! load a[j] in double format
665 ldd [$ap_h+$j],$ahi
666 ldd [$np_l+$j],$nlo ! load n[j] in double format
667 ldd [$np_h+$j],$nhi
668
669 fmuld $alo,$ba,$aloa
670 fmuld $nlo,$na,$nloa
671 fmuld $alo,$bb,$alob
672 fmuld $nlo,$nb,$nlob
673 fmuld $alo,$bc,$aloc
674 ldx [%sp+$bias+$frame+0],%o0
675 faddd $aloa,$nloa,$nloa
676 fmuld $nlo,$nc,$nloc
677 ldx [%sp+$bias+$frame+8],%o1
678 fmuld $alo,$bd,$alod
679 ldx [%sp+$bias+$frame+16],%o2
680 faddd $alob,$nlob,$nlob
681 fmuld $nlo,$nd,$nlod
682 ldx [%sp+$bias+$frame+24],%o3
683 fmuld $ahi,$ba,$ahia
684
685 srlx %o0,16,%o7
686 faddd $aloc,$nloc,$nloc
687 fmuld $nhi,$na,$nhia
688 add %o7,%o1,%o1
689 fmuld $ahi,$bb,$ahib
690 srlx %o1,16,%o7
691 faddd $alod,$nlod,$nlod
692 fmuld $nhi,$nb,$nhib
693 add %o7,%o2,%o2
694 fmuld $ahi,$bc,$ahic
695 srlx %o2,16,%o7
696 faddd $ahia,$nhia,$nhia
697 fmuld $nhi,$nc,$nhic
698 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
699 and %o0,$mask,%o0
700 fmuld $ahi,$bd,$ahid
701 and %o1,$mask,%o1
702 and %o2,$mask,%o2
703 faddd $ahib,$nhib,$nhib
704 fmuld $nhi,$nd,$nhid
705 sllx %o1,16,%o1
706 faddd $dota,$nloa,$nloa
707 sllx %o2,32,%o2
708 faddd $dotb,$nlob,$nlob
709 sllx %o3,48,%o7
710 or %o1,%o0,%o0
711 faddd $ahic,$nhic,$dota ! $nhic
712 or %o2,%o0,%o0
713 faddd $ahid,$nhid,$dotb ! $nhid
714 or %o7,%o0,%o0 ! 64-bit result
715 faddd $nloc,$nhia,$nloc
716 addcc %g1,%o0,%o0
717 ldx [$tp+8],%o7 ! tp[j]
718 faddd $nlod,$nhib,$nlod
719 srlx %o3,16,%g1 ! 34-bit carry
720 fdtox $nloa,$nloa
721 bcs,a %xcc,.+8
722 add %g1,1,%g1
723 fdtox $nlob,$nlob
724 addcc %o7,%o0,%o0
725 fdtox $nloc,$nloc
726 bcs,a %xcc,.+8
727 add %g1,1,%g1
728
729 stx %o0,[$tp] ! tp[j-1]
730 fdtox $nlod,$nlod
731
732 std $nloa,[%sp+$bias+$frame+0]
733 std $nlob,[%sp+$bias+$frame+8]
734 std $nloc,[%sp+$bias+$frame+16]
735 addcc $j,8,$j
736 std $nlod,[%sp+$bias+$frame+24]
737 bnz,pt %icc,.Linner
738 add $tp,8,$tp
739
740.Linnerskip:
741 fdtox $dota,$dota
742 fdtox $dotb,$dotb
743
744 ldx [%sp+$bias+$frame+0],%o0
745 ldx [%sp+$bias+$frame+8],%o1
746 ldx [%sp+$bias+$frame+16],%o2
747 ldx [%sp+$bias+$frame+24],%o3
748
749 srlx %o0,16,%o7
750 std $dota,[%sp+$bias+$frame+32]
751 add %o7,%o1,%o1
752 std $dotb,[%sp+$bias+$frame+40]
753 srlx %o1,16,%o7
754 add %o7,%o2,%o2
755 srlx %o2,16,%o7
756 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
757 and %o0,$mask,%o0
758 and %o1,$mask,%o1
759 and %o2,$mask,%o2
760 sllx %o1,16,%o1
761 sllx %o2,32,%o2
762 sllx %o3,48,%o7
763 or %o1,%o0,%o0
764 or %o2,%o0,%o0
765 ldx [%sp+$bias+$frame+32],%o4
766 or %o7,%o0,%o0 ! 64-bit result
767 ldx [%sp+$bias+$frame+40],%o5
768 addcc %g1,%o0,%o0
769 ldx [$tp+8],%o7 ! tp[j]
770 srlx %o3,16,%g1 ! 34-bit carry
771 bcs,a %xcc,.+8
772 add %g1,1,%g1
773
774 addcc %o7,%o0,%o0
775 bcs,a %xcc,.+8
776 add %g1,1,%g1
777
778 stx %o0,[$tp] ! tp[j-1]
779 add $tp,8,$tp
780
781 srlx %o4,16,%o7
782 add %o7,%o5,%o5
783 and %o4,$mask,%o4
784 sllx %o5,16,%o7
785 or %o7,%o4,%o4
786 addcc %g1,%o4,%o4
787 srlx %o5,48,%g1
788 bcs,a %xcc,.+8
789 add %g1,1,%g1
790
791 addcc $carry,%o4,%o4
792 stx %o4,[$tp] ! tp[num-1]
793 mov %g1,$carry
794 bcs,a %xcc,.+8
795 add $carry,1,$carry
796
797 addcc $i,8,$i
798 bnz %icc,.Louter
799 nop
800
801 add $tp,8,$tp ! adjust tp to point at the end
802 orn %g0,%g0,%g4
803 sub %g0,$num,%o7 ! n=-num
804 ba .Lsub
805 subcc %g0,%g0,%g0 ! clear %icc.c
806
807.align 32
808.Lsub:
809 ldx [$tp+%o7],%o0
810 add $np,%o7,%g1
811 ld [%g1+0],%o2
812 ld [%g1+4],%o3
813 srlx %o0,32,%o1
814 subccc %o0,%o2,%o2
815 add $rp,%o7,%g1
816 subccc %o1,%o3,%o3
817 st %o2,[%g1+0]
818 add %o7,8,%o7
819 brnz,pt %o7,.Lsub
820 st %o3,[%g1+4]
821 subc $carry,0,%g4
822 sub %g0,$num,%o7 ! n=-num
823 ba .Lcopy
824 nop
825
826.align 32
827.Lcopy:
828 ldx [$tp+%o7],%o0
829 add $rp,%o7,%g1
830 ld [%g1+0],%o2
831 ld [%g1+4],%o3
832 stx %g0,[$tp+%o7]
833 and %o0,%g4,%o0
834 srlx %o0,32,%o1
835 andn %o2,%g4,%o2
836 andn %o3,%g4,%o3
837 or %o2,%o0,%o0
838 or %o3,%o1,%o1
839 st %o0,[%g1+0]
840 add %o7,8,%o7
841 brnz,pt %o7,.Lcopy
842 st %o1,[%g1+4]
843 sub %g0,$num,%o7 ! n=-num
844
845.Lzap:
846 stx %g0,[$ap_l+%o7]
847 stx %g0,[$ap_h+%o7]
848 stx %g0,[$np_l+%o7]
849 stx %g0,[$np_h+%o7]
850 add %o7,8,%o7
851 brnz,pt %o7,.Lzap
852 nop
853
854 ldx [%sp+$bias+$frame+48],%o7
855 wr %g0,%o7,%asi ! restore %asi
856
857 mov 1,%i0
858.Lret:
859 ret
860 restore
861.type $fname,#function
862.size $fname,(.-$fname)
863.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
864.align 32
865___
866
867$code =~ s/\`([^\`]*)\`/eval($1)/gem;
868
869# Below substitution makes it possible to compile without demanding
870# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
871# dare to do this, because VIS capability is detected at run-time now
872# and this routine is not called on CPU not capable to execute it. Do
873# note that fzeros is not the only VIS dependency! Another dependency
874# is implicit and is just _a_ numerical value loaded to %asi register,
875# which assembler can't recognize as VIS specific...
876$code =~ s/fzeros\s+%f([0-9]+)/
877 sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
878 /gem;
879
880print $code;
881# flush
882close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/via-mont.pl b/src/lib/libssl/src/crypto/bn/asm/via-mont.pl
new file mode 100644
index 0000000000..c046a514c8
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/via-mont.pl
@@ -0,0 +1,242 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# Wrapper around 'rep montmul', VIA-specific instruction accessing
11# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
12# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
13#
14# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
15# different software configurations on 1.5GHz VIA Esther processor.
16# Lines marked with "software integer" denote performance of hand-
17# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
18# refers to hand-coded SSE2 Montgomery multiplication procedure found
19# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
20# Padlock SDK 2.0.1 available for download from VIA, which naturally
21# utilizes the magic 'repz montmul' instruction. And finally "hardware
22# this" refers to *this* implementation which also uses 'repz montmul'
23#
24# sign verify sign/s verify/s
25# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer
26# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2
27# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK
28# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this
29#
30# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer
31# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2
32# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK
33# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this
34#
35# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer
36# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2
37# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK
38# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this
39#
40# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer
41# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2
42# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK
43# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this
44#
45# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer
46# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2
47# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK
48# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this
49#
50# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer
51# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2
52# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK
53# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this
54#
55# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer
56# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2
57# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK
58# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this
59#
60# To give you some other reference point here is output for 2.4GHz P4
61# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
62# SSE2" in above terms.
63#
64# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0
65# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0
66# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9
67# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3
68# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1
69# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
70# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
71#
72# Conclusions:
73# - VIA SDK leaves a *lot* of room for improvement (which this
74# implementation successfully fills:-);
75# - 'rep montmul' gives up to >3x performance improvement depending on
76# key length;
77# - in terms of absolute performance it delivers approximately as much
78# as modern out-of-order 32-bit cores [again, for longer keys].
79
80$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
81push(@INC,"${dir}","${dir}../../perlasm");
82require "x86asm.pl";
83
84&asm_init($ARGV[0],"via-mont.pl");
85
86# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
87$func="bn_mul_mont_padlock";
88
89$pad=16*1; # amount of reserved bytes on top of every vector
90
91# stack layout
92$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA
93$A=&DWP(4,"esp");
94$B=&DWP(8,"esp");
95$T=&DWP(12,"esp");
96$M=&DWP(16,"esp");
97$scratch=&DWP(20,"esp");
98$rp=&DWP(24,"esp"); # these are mine
99$sp=&DWP(28,"esp");
100# &DWP(32,"esp") # 32 byte scratch area
101# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num]
102# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num]
103# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num]
104# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num]
105# Note that SDK suggests to unconditionally allocate 2K per vector. This
106# has quite an impact on performance. It naturally depends on key length,
107# but to give an example 1024 bit private RSA key operations suffer >30%
108# penalty. I allocate only as much as actually required...
109
110&function_begin($func);
111 &xor ("eax","eax");
112 &mov ("ecx",&wparam(5)); # num
113 # meet VIA's limitations for num [note that the specification
114 # expresses them in bits, while we work with amount of 32-bit words]
115 &test ("ecx",3);
116 &jnz (&label("leave")); # num % 4 != 0
117 &cmp ("ecx",8);
118 &jb (&label("leave")); # num < 8
119 &cmp ("ecx",1024);
120 &ja (&label("leave")); # num > 1024
121
122 &pushf ();
123 &cld ();
124
125 &mov ("edi",&wparam(0)); # rp
126 &mov ("eax",&wparam(1)); # ap
127 &mov ("ebx",&wparam(2)); # bp
128 &mov ("edx",&wparam(3)); # np
129 &mov ("esi",&wparam(4)); # n0
130 &mov ("esi",&DWP(0,"esi")); # *n0
131
132 &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes
133 &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes
134 &neg ("ebp");
135 &add ("ebp","esp");
136 &and ("ebp",-64); # align to cache-line
137 &xchg ("ebp","esp"); # alloca
138
139 &mov ($rp,"edi"); # save rp
140 &mov ($sp,"ebp"); # save esp
141
142 &mov ($mZeroPrime,"esi");
143 &lea ("esi",&DWP(64,"esp")); # tp
144 &mov ($T,"esi");
145 &lea ("edi",&DWP(32,"esp")); # scratch area
146 &mov ($scratch,"edi");
147 &mov ("esi","eax");
148
149 &lea ("ebp",&DWP(-$pad,"ecx"));
150 &shr ("ebp",2); # restore original num value in ebp
151
152 &xor ("eax","eax");
153
154 &mov ("ecx","ebp");
155 &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
156 &data_byte(0xf3,0xab); # rep stosl, bzero
157
158 &mov ("ecx","ebp");
159 &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
160 &mov ($A,"edi");
161 &data_byte(0xf3,0xa5); # rep movsl, memcpy
162 &mov ("ecx",$pad/4);
163 &data_byte(0xf3,0xab); # rep stosl, bzero pad
164 # edi points at the end of padded ap copy...
165
166 &mov ("ecx","ebp");
167 &mov ("esi","ebx");
168 &mov ($B,"edi");
169 &data_byte(0xf3,0xa5); # rep movsl, memcpy
170 &mov ("ecx",$pad/4);
171 &data_byte(0xf3,0xab); # rep stosl, bzero pad
172 # edi points at the end of padded bp copy...
173
174 &mov ("ecx","ebp");
175 &mov ("esi","edx");
176 &mov ($M,"edi");
177 &data_byte(0xf3,0xa5); # rep movsl, memcpy
178 &mov ("ecx",$pad/4);
179 &data_byte(0xf3,0xab); # rep stosl, bzero pad
180 # edi points at the end of padded np copy...
181
182 # let magic happen...
183 &mov ("ecx","ebp");
184 &mov ("esi","esp");
185 &shl ("ecx",5); # convert word counter to bit counter
186 &align (4);
187 &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
188
189 &mov ("ecx","ebp");
190 &lea ("esi",&DWP(64,"esp")); # tp
191 # edi still points at the end of padded np copy...
192 &neg ("ebp");
193 &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind"
194 &mov ("edi",$rp); # restore rp
195 &xor ("edx","edx"); # i=0 and clear CF
196
197&set_label("sub",8);
198 &mov ("eax",&DWP(0,"esi","edx",4));
199 &sbb ("eax",&DWP(0,"ebp","edx",4));
200 &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i]
201 &lea ("edx",&DWP(1,"edx")); # i++
202 &loop (&label("sub")); # doesn't affect CF!
203
204 &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit
205 &sbb ("eax",0);
206 &and ("esi","eax");
207 &not ("eax");
208 &mov ("ebp","edi");
209 &and ("ebp","eax");
210 &or ("esi","ebp"); # tp=carry?tp:rp
211
212 &mov ("ecx","edx"); # num
213 &xor ("edx","edx"); # i=0
214
215&set_label("copy",8);
216 &mov ("eax",&DWP(0,"esi","edx",4));
217 &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp
218 &mov (&DWP(0,"edi","edx",4),"eax");
219 &lea ("edx",&DWP(1,"edx")); # i++
220 &loop (&label("copy"));
221
222 &mov ("ebp",$sp);
223 &xor ("eax","eax");
224
225 &mov ("ecx",64/4);
226 &mov ("edi","esp"); # zap frame including scratch area
227 &data_byte(0xf3,0xab); # rep stosl, bzero
228
229 # zap copies of ap, bp and np
230 &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
231 &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2));
232 &data_byte(0xf3,0xab); # rep stosl, bzero
233
234 &mov ("esp","ebp");
235 &inc ("eax"); # signal "done"
236 &popf ();
237&set_label("leave");
238&function_end($func);
239
240&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
241
242&asm_finish();
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86-mont.pl b/src/lib/libssl/src/crypto/bn/asm/x86-mont.pl
new file mode 100755
index 0000000000..5cd3cd2ed5
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/x86-mont.pl
@@ -0,0 +1,591 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# This is a "teaser" code, as it can be improved in several ways...
13# First of all non-SSE2 path should be implemented (yes, for now it
14# performs Montgomery multiplication/convolution only on SSE2-capable
15# CPUs such as P4, others fall down to original code). Then inner loop
16# can be unrolled and modulo-scheduled to improve ILP and possibly
17# moved to 128-bit XMM register bank (though it would require input
18# rearrangement and/or increase bus bandwidth utilization). Dedicated
19# squaring procedure should give further performance improvement...
20# Yet, for being draft, the code improves rsa512 *sign* benchmark by
21# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
22
23# December 2006
24#
25# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
26# Integer-only code [being equipped with dedicated squaring procedure]
27# gives ~40% on rsa512 sign benchmark...
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30push(@INC,"${dir}","${dir}../../perlasm");
31require "x86asm.pl";
32
33&asm_init($ARGV[0],$0);
34
35$sse2=0;
36for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37
38&external_label("OPENSSL_ia32cap_P") if ($sse2);
39
40&function_begin("bn_mul_mont");
41
42$i="edx";
43$j="ecx";
44$ap="esi"; $tp="esi"; # overlapping variables!!!
45$rp="edi"; $bp="edi"; # overlapping variables!!!
46$np="ebp";
47$num="ebx";
48
49$_num=&DWP(4*0,"esp"); # stack top layout
50$_rp=&DWP(4*1,"esp");
51$_ap=&DWP(4*2,"esp");
52$_bp=&DWP(4*3,"esp");
53$_np=&DWP(4*4,"esp");
54$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
55$_sp=&DWP(4*6,"esp");
56$_bpend=&DWP(4*7,"esp");
57$frame=32; # size of above frame rounded up to 16n
58
59 &xor ("eax","eax");
60 &mov ("edi",&wparam(5)); # int num
61 &cmp ("edi",4);
62 &jl (&label("just_leave"));
63
64 &lea ("esi",&wparam(0)); # put aside pointer to argument block
65 &lea ("edx",&wparam(1)); # load ap
66 &mov ("ebp","esp"); # saved stack pointer!
67 &add ("edi",2); # extra two words on top of tp
68 &neg ("edi");
69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70 &neg ("edi");
71
72 # minimize cache contention by arraning 2K window between stack
73 # pointer and ap argument [np is also position sensitive vector,
74 # but it's assumed to be near ap, as it's allocated at ~same
75 # time].
76 &mov ("eax","esp");
77 &sub ("eax","edx");
78 &and ("eax",2047);
79 &sub ("esp","eax"); # this aligns sp and ap modulo 2048
80
81 &xor ("edx","esp");
82 &and ("edx",2048);
83 &xor ("edx",2048);
84 &sub ("esp","edx"); # this splits them apart modulo 4096
85
86 &and ("esp",-64); # align to cache line
87
88 ################################# load argument block...
89 &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
90 &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
91 &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
92 &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
93 &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
94 #&mov ("edi",&DWP(5*4,"esi"));# int num
95
96 &mov ("esi",&DWP(0,"esi")); # pull n0[0]
97 &mov ($_rp,"eax"); # ... save a copy of argument block
98 &mov ($_ap,"ebx");
99 &mov ($_bp,"ecx");
100 &mov ($_np,"edx");
101 &mov ($_n0,"esi");
102 &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
103 #&mov ($_num,$num); # redundant as $num is not reused
104 &mov ($_sp,"ebp"); # saved stack pointer!
105
106if($sse2) {
107$acc0="mm0"; # mmx register bank layout
108$acc1="mm1";
109$car0="mm2";
110$car1="mm3";
111$mul0="mm4";
112$mul1="mm5";
113$temp="mm6";
114$mask="mm7";
115
116 &picmeup("eax","OPENSSL_ia32cap_P");
117 &bt (&DWP(0,"eax"),26);
118 &jnc (&label("non_sse2"));
119
120 &mov ("eax",-1);
121 &movd ($mask,"eax"); # mask 32 lower bits
122
123 &mov ($ap,$_ap); # load input pointers
124 &mov ($bp,$_bp);
125 &mov ($np,$_np);
126
127 &xor ($i,$i); # i=0
128 &xor ($j,$j); # j=0
129
130 &movd ($mul0,&DWP(0,$bp)); # bp[0]
131 &movd ($mul1,&DWP(0,$ap)); # ap[0]
132 &movd ($car1,&DWP(0,$np)); # np[0]
133
134 &pmuludq($mul1,$mul0); # ap[0]*bp[0]
135 &movq ($car0,$mul1);
136 &movq ($acc0,$mul1); # I wish movd worked for
137 &pand ($acc0,$mask); # inter-register transfers
138
139 &pmuludq($mul1,$_n0q); # *=n0
140
141 &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
142 &paddq ($car1,$acc0);
143
144 &movd ($acc1,&DWP(4,$np)); # np[1]
145 &movd ($acc0,&DWP(4,$ap)); # ap[1]
146
147 &psrlq ($car0,32);
148 &psrlq ($car1,32);
149
150 &inc ($j); # j++
151&set_label("1st",16);
152 &pmuludq($acc0,$mul0); # ap[j]*bp[0]
153 &pmuludq($acc1,$mul1); # np[j]*m1
154 &paddq ($car0,$acc0); # +=c0
155 &paddq ($car1,$acc1); # +=c1
156
157 &movq ($acc0,$car0);
158 &pand ($acc0,$mask);
159 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
160 &paddq ($car1,$acc0); # +=ap[j]*bp[0];
161 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
162 &psrlq ($car0,32);
163 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
164 &psrlq ($car1,32);
165
166 &lea ($j,&DWP(1,$j));
167 &cmp ($j,$num);
168 &jl (&label("1st"));
169
170 &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
171 &pmuludq($acc1,$mul1); # np[num-1]*m1
172 &paddq ($car0,$acc0); # +=c0
173 &paddq ($car1,$acc1); # +=c1
174
175 &movq ($acc0,$car0);
176 &pand ($acc0,$mask);
177 &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
178 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
179
180 &psrlq ($car0,32);
181 &psrlq ($car1,32);
182
183 &paddq ($car1,$car0);
184 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
185
186 &inc ($i); # i++
187&set_label("outer");
188 &xor ($j,$j); # j=0
189
190 &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
191 &movd ($mul1,&DWP(0,$ap)); # ap[0]
192 &movd ($temp,&DWP($frame,"esp")); # tp[0]
193 &movd ($car1,&DWP(0,$np)); # np[0]
194 &pmuludq($mul1,$mul0); # ap[0]*bp[i]
195
196 &paddq ($mul1,$temp); # +=tp[0]
197 &movq ($acc0,$mul1);
198 &movq ($car0,$mul1);
199 &pand ($acc0,$mask);
200
201 &pmuludq($mul1,$_n0q); # *=n0
202
203 &pmuludq($car1,$mul1);
204 &paddq ($car1,$acc0);
205
206 &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
207 &movd ($acc1,&DWP(4,$np)); # np[1]
208 &movd ($acc0,&DWP(4,$ap)); # ap[1]
209
210 &psrlq ($car0,32);
211 &psrlq ($car1,32);
212 &paddq ($car0,$temp); # +=tp[1]
213
214 &inc ($j); # j++
215 &dec ($num);
216&set_label("inner");
217 &pmuludq($acc0,$mul0); # ap[j]*bp[i]
218 &pmuludq($acc1,$mul1); # np[j]*m1
219 &paddq ($car0,$acc0); # +=c0
220 &paddq ($car1,$acc1); # +=c1
221
222 &movq ($acc0,$car0);
223 &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
224 &pand ($acc0,$mask);
225 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
226 &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
227 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
228 &psrlq ($car0,32);
229 &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
230 &psrlq ($car1,32);
231 &paddq ($car0,$temp); # +=tp[j+1]
232
233 &dec ($num);
234 &lea ($j,&DWP(1,$j)); # j++
235 &jnz (&label("inner"));
236
237 &mov ($num,$j);
238 &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
239 &pmuludq($acc1,$mul1); # np[num-1]*m1
240 &paddq ($car0,$acc0); # +=c0
241 &paddq ($car1,$acc1); # +=c1
242
243 &movq ($acc0,$car0);
244 &pand ($acc0,$mask);
245 &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
246 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
247 &psrlq ($car0,32);
248 &psrlq ($car1,32);
249
250 &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
251 &paddq ($car1,$car0);
252 &paddq ($car1,$temp);
253 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
254
255 &lea ($i,&DWP(1,$i)); # i++
256 &cmp ($i,$num);
257 &jle (&label("outer"));
258
259 &emms (); # done with mmx bank
260 &jmp (&label("common_tail"));
261
262&set_label("non_sse2",16);
263}
264
265if (0) {
266 &mov ("esp",$_sp);
267 &xor ("eax","eax"); # signal "not fast enough [yet]"
268 &jmp (&label("just_leave"));
269 # While the below code provides competitive performance for
270 # all key lengthes on modern Intel cores, it's still more
271 # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272 # means compared to the original integer-only assembler.
273 # 512-bit RSA sign is better by ~40%, but that's about all
274 # one can say about all CPUs...
275} else {
276$inp="esi"; # integer path uses these registers differently
277$word="edi";
278$carry="ebp";
279
280 &mov ($inp,$_ap);
281 &lea ($carry,&DWP(1,$num));
282 &mov ($word,$_bp);
283 &xor ($j,$j); # j=0
284 &mov ("edx",$inp);
285 &and ($carry,1); # see if num is even
286 &sub ("edx",$word); # see if ap==bp
287 &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
288 &or ($carry,"edx");
289 &mov ($word,&DWP(0,$word)); # bp[0]
290 &jz (&label("bn_sqr_mont"));
291 &mov ($_bpend,"eax");
292 &mov ("eax",&DWP(0,$inp));
293 &xor ("edx","edx");
294
295&set_label("mull",16);
296 &mov ($carry,"edx");
297 &mul ($word); # ap[j]*bp[0]
298 &add ($carry,"eax");
299 &lea ($j,&DWP(1,$j));
300 &adc ("edx",0);
301 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
302 &cmp ($j,$num);
303 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
304 &jl (&label("mull"));
305
306 &mov ($carry,"edx");
307 &mul ($word); # ap[num-1]*bp[0]
308 &mov ($word,$_n0);
309 &add ("eax",$carry);
310 &mov ($inp,$_np);
311 &adc ("edx",0);
312 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
313
314 &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
315 &xor ($j,$j);
316 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
317 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
318
319 &mov ("eax",&DWP(0,$inp)); # np[0]
320 &mul ($word); # np[0]*m
321 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
322 &mov ("eax",&DWP(4,$inp)); # np[1]
323 &adc ("edx",0);
324 &inc ($j);
325
326 &jmp (&label("2ndmadd"));
327
328&set_label("1stmadd",16);
329 &mov ($carry,"edx");
330 &mul ($word); # ap[j]*bp[i]
331 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
332 &lea ($j,&DWP(1,$j));
333 &adc ("edx",0);
334 &add ($carry,"eax");
335 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
336 &adc ("edx",0);
337 &cmp ($j,$num);
338 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
339 &jl (&label("1stmadd"));
340
341 &mov ($carry,"edx");
342 &mul ($word); # ap[num-1]*bp[i]
343 &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
344 &mov ($word,$_n0);
345 &adc ("edx",0);
346 &mov ($inp,$_np);
347 &add ($carry,"eax");
348 &adc ("edx",0);
349 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
350
351 &xor ($j,$j);
352 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
353 &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
354 &adc ($j,0);
355 &mov ("eax",&DWP(0,$inp)); # np[0]
356 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
357 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
358
359 &mul ($word); # np[0]*m
360 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
361 &mov ("eax",&DWP(4,$inp)); # np[1]
362 &adc ("edx",0);
363 &mov ($j,1);
364
365&set_label("2ndmadd",16);
366 &mov ($carry,"edx");
367 &mul ($word); # np[j]*m
368 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
369 &lea ($j,&DWP(1,$j));
370 &adc ("edx",0);
371 &add ($carry,"eax");
372 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
373 &adc ("edx",0);
374 &cmp ($j,$num);
375 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
376 &jl (&label("2ndmadd"));
377
378 &mov ($carry,"edx");
379 &mul ($word); # np[j]*m
380 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
381 &adc ("edx",0);
382 &add ($carry,"eax");
383 &adc ("edx",0);
384 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
385
386 &xor ("eax","eax");
387 &mov ($j,$_bp); # &bp[i]
388 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
389 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
390 &lea ($j,&DWP(4,$j));
391 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
392 &cmp ($j,$_bpend);
393 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
394 &je (&label("common_tail"));
395
396 &mov ($word,&DWP(0,$j)); # bp[i+1]
397 &mov ($inp,$_ap);
398 &mov ($_bp,$j); # &bp[++i]
399 &xor ($j,$j);
400 &xor ("edx","edx");
401 &mov ("eax",&DWP(0,$inp));
402 &jmp (&label("1stmadd"));
403
404&set_label("bn_sqr_mont",16);
405$sbit=$num;
406 &mov ($_num,$num);
407 &mov ($_bp,$j); # i=0
408
409 &mov ("eax",$word); # ap[0]
410 &mul ($word); # ap[0]*ap[0]
411 &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
412 &mov ($sbit,"edx");
413 &shr ("edx",1);
414 &and ($sbit,1);
415 &inc ($j);
416&set_label("sqr",16);
417 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
418 &mov ($carry,"edx");
419 &mul ($word); # ap[j]*ap[0]
420 &add ("eax",$carry);
421 &lea ($j,&DWP(1,$j));
422 &adc ("edx",0);
423 &lea ($carry,&DWP(0,$sbit,"eax",2));
424 &shr ("eax",31);
425 &cmp ($j,$_num);
426 &mov ($sbit,"eax");
427 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
428 &jl (&label("sqr"));
429
430 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
431 &mov ($carry,"edx");
432 &mul ($word); # ap[num-1]*ap[0]
433 &add ("eax",$carry);
434 &mov ($word,$_n0);
435 &adc ("edx",0);
436 &mov ($inp,$_np);
437 &lea ($carry,&DWP(0,$sbit,"eax",2));
438 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
439 &shr ("eax",31);
440 &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
441
442 &lea ($carry,&DWP(0,"eax","edx",2));
443 &mov ("eax",&DWP(0,$inp)); # np[0]
444 &shr ("edx",31);
445 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
446 &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
447
448 &mul ($word); # np[0]*m
449 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
450 &mov ($num,$j);
451 &adc ("edx",0);
452 &mov ("eax",&DWP(4,$inp)); # np[1]
453 &mov ($j,1);
454
455&set_label("3rdmadd",16);
456 &mov ($carry,"edx");
457 &mul ($word); # np[j]*m
458 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
459 &adc ("edx",0);
460 &add ($carry,"eax");
461 &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
462 &adc ("edx",0);
463 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
464
465 &mov ($carry,"edx");
466 &mul ($word); # np[j+1]*m
467 &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
468 &lea ($j,&DWP(2,$j));
469 &adc ("edx",0);
470 &add ($carry,"eax");
471 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
472 &adc ("edx",0);
473 &cmp ($j,$num);
474 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
475 &jl (&label("3rdmadd"));
476
477 &mov ($carry,"edx");
478 &mul ($word); # np[j]*m
479 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
480 &adc ("edx",0);
481 &add ($carry,"eax");
482 &adc ("edx",0);
483 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
484
485 &mov ($j,$_bp); # i
486 &xor ("eax","eax");
487 &mov ($inp,$_ap);
488 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
489 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
490 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
491 &cmp ($j,$num);
492 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
493 &je (&label("common_tail"));
494
495 &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
496 &lea ($j,&DWP(1,$j));
497 &mov ("eax",$word);
498 &mov ($_bp,$j); # ++i
499 &mul ($word); # ap[i]*ap[i]
500 &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
501 &adc ("edx",0);
502 &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
503 &xor ($carry,$carry);
504 &cmp ($j,$num);
505 &lea ($j,&DWP(1,$j));
506 &je (&label("sqrlast"));
507
508 &mov ($sbit,"edx"); # zaps $num
509 &shr ("edx",1);
510 &and ($sbit,1);
511&set_label("sqradd",16);
512 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
513 &mov ($carry,"edx");
514 &mul ($word); # ap[j]*ap[i]
515 &add ("eax",$carry);
516 &lea ($carry,&DWP(0,"eax","eax"));
517 &adc ("edx",0);
518 &shr ("eax",31);
519 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
520 &lea ($j,&DWP(1,$j));
521 &adc ("eax",0);
522 &add ($carry,$sbit);
523 &adc ("eax",0);
524 &cmp ($j,$_num);
525 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
526 &mov ($sbit,"eax");
527 &jle (&label("sqradd"));
528
529 &mov ($carry,"edx");
530 &lea ("edx",&DWP(0,$sbit,"edx",2));
531 &shr ($carry,31);
532&set_label("sqrlast");
533 &mov ($word,$_n0);
534 &mov ($inp,$_np);
535 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
536
537 &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
538 &mov ("eax",&DWP(0,$inp)); # np[0]
539 &adc ($carry,0);
540 &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
541 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
542
543 &mul ($word); # np[0]*m
544 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
545 &lea ($num,&DWP(-1,$j));
546 &adc ("edx",0);
547 &mov ($j,1);
548 &mov ("eax",&DWP(4,$inp)); # np[1]
549
550 &jmp (&label("3rdmadd"));
551}
552
553&set_label("common_tail",16);
554 &mov ($np,$_np); # load modulus pointer
555 &mov ($rp,$_rp); # load result pointer
556 &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
557
558 &mov ("eax",&DWP(0,$tp)); # tp[0]
559 &mov ($j,$num); # j=num-1
560 &xor ($i,$i); # i=0 and clear CF!
561
562&set_label("sub",16);
563 &sbb ("eax",&DWP(0,$np,$i,4));
564 &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
565 &dec ($j); # doesn't affect CF!
566 &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
567 &lea ($i,&DWP(1,$i)); # i++
568 &jge (&label("sub"));
569
570 &sbb ("eax",0); # handle upmost overflow bit
571 &and ($tp,"eax");
572 &not ("eax");
573 &mov ($np,$rp);
574 &and ($np,"eax");
575 &or ($tp,$np); # tp=carry?tp:rp
576
577&set_label("copy",16); # copy or in-place refresh
578 &mov ("eax",&DWP(0,$tp,$num,4));
579 &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
580 &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
581 &dec ($num);
582 &jge (&label("copy"));
583
584 &mov ("esp",$_sp); # pull saved stack pointer
585 &mov ("eax",1);
586&set_label("just_leave");
587&function_end("bn_mul_mont");
588
589&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
590
591&asm_finish();
diff --git a/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl
new file mode 100644
index 0000000000..0812815bfb
--- /dev/null
+++ b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl
@@ -0,0 +1,1138 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
5#
6# This module may be used under the terms of either the GNU General
7# Public License version 2 or later, the GNU Lesser General Public
8# License version 2.1 or later, the Mozilla Public License version
9# 1.1 or the BSD License. The exact terms of either license are
10# distributed along with this module. For further details see
11# http://www.openssl.org/~appro/camellia/.
12# ====================================================================
13
14# Performance in cycles per processed byte (less is better) in
15# 'openssl speed ...' benchmark:
16#
17# AMD K8 Core2 PIII P4
18# -evp camellia-128-ecb 21.5 22.8 27.0 28.9
19# + over gcc 3.4.6 +90/11% +70/10% +53/4% +160/64%
20# + over icc 8.0 +48/19% +21/15% +21/17% +55/37%
21#
22# camellia-128-cbc 17.3 21.1 23.9 25.9
23#
24# 128-bit key setup 196 280 256 240 cycles/key
25# + over gcc 3.4.6 +30/0% +17/11% +11/0% +63/40%
26# + over icc 8.0 +18/3% +10/0% +10/3% +21/10%
27#
28# Pairs of numbers in "+" rows represent performance improvement over
29# compiler generated position-independent code, PIC, and non-PIC
30# respectively. PIC results are of greater relevance, as this module
31# is position-independent, i.e. suitable for a shared library or PIE.
32# Position independence "costs" one register, which is why compilers
33# are so close with non-PIC results, they have an extra register to
34# spare. CBC results are better than ECB ones thanks to "zero-copy"
35# private _x86_* interface, and are ~30-40% better than with compiler
36# generated cmll_cbc.o, and reach ~80-90% of x86_64 performance on
37# same CPU (where applicable).
38
39$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40push(@INC,"${dir}","${dir}../../perlasm");
41require "x86asm.pl";
42
43$OPENSSL=1;
44
45&asm_init($ARGV[0],"cmll-586.pl",$ARGV[$#ARGV] eq "386");
46
47@T=("eax","ebx","ecx","edx");
48$idx="esi";
49$key="edi";
50$Tbl="ebp";
51
52# stack frame layout in _x86_Camellia_* routines, frame is allocated
53# by caller
54$__ra=&DWP(0,"esp"); # return address
55$__s0=&DWP(4,"esp"); # s0 backing store
56$__s1=&DWP(8,"esp"); # s1 backing store
57$__s2=&DWP(12,"esp"); # s2 backing store
58$__s3=&DWP(16,"esp"); # s3 backing store
59$__end=&DWP(20,"esp"); # pointer to end/start of key schedule
60
61# stack frame layout in Camellia_[en|crypt] routines, which differs from
62# above by 4 and overlaps by pointer to end/start of key schedule
63$_end=&DWP(16,"esp");
64$_esp=&DWP(20,"esp");
65
66# const unsigned int Camellia_SBOX[4][256];
67# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][],
68# and [2][] - with [3][]. This is done to optimize code size.
69$SBOX1_1110=0; # Camellia_SBOX[0]
70$SBOX4_4404=4; # Camellia_SBOX[1]
71$SBOX2_0222=2048; # Camellia_SBOX[2]
72$SBOX3_3033=2052; # Camellia_SBOX[3]
73&static_label("Camellia_SIGMA");
74&static_label("Camellia_SBOX");
75
76sub Camellia_Feistel {
77my $i=@_[0];
78my $seed=defined(@_[1])?@_[1]:0;
79my $scale=$seed<0?-8:8;
80my $frame=defined(@_[2])?@_[2]:0;
81my $j=($i&1)*2;
82my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4];
83
84 &xor ($t0,$idx); # t0^=key[0]
85 &xor ($t1,&DWP($seed+$i*$scale+4,$key)); # t1^=key[1]
86 &movz ($idx,&HB($t0)); # (t0>>8)&0xff
87 &mov ($t3,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t3=SBOX3_3033[0]
88 &movz ($idx,&LB($t0)); # (t0>>0)&0xff
89 &xor ($t3,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t3^=SBOX4_4404[0]
90 &shr ($t0,16);
91 &movz ($idx,&LB($t1)); # (t1>>0)&0xff
92 &mov ($t2,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t2=SBOX1_1110[1]
93 &movz ($idx,&HB($t0)); # (t0>>24)&0xff
94 &xor ($t3,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t3^=SBOX1_1110[0]
95 &movz ($idx,&HB($t1)); # (t1>>8)&0xff
96 &xor ($t2,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t2^=SBOX4_4404[1]
97 &shr ($t1,16);
98 &movz ($t0,&LB($t0)); # (t0>>16)&0xff
99 &xor ($t3,&DWP($SBOX2_0222,$Tbl,$t0,8)); # t3^=SBOX2_0222[0]
100 &movz ($idx,&HB($t1)); # (t1>>24)&0xff
101 &mov ($t0,&DWP($frame+4*(($j+3)%4),"esp")); # prefetch "s3"
102 &xor ($t2,$t3); # t2^=t3
103 &rotr ($t3,8); # t3=RightRotate(t3,8)
104 &xor ($t2,&DWP($SBOX2_0222,$Tbl,$idx,8)); # t2^=SBOX2_0222[1]
105 &movz ($idx,&LB($t1)); # (t1>>16)&0xff
106 &mov ($t1,&DWP($frame+4*(($j+2)%4),"esp")); # prefetch "s2"
107 &xor ($t3,$t0); # t3^=s3
108 &xor ($t2,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t2^=SBOX3_3033[1]
109 &mov ($idx,&DWP($seed+($i+1)*$scale,$key)); # prefetch key[i+1]
110 &xor ($t3,$t2); # t3^=t2
111 &mov (&DWP($frame+4*(($j+3)%4),"esp"),$t3); # s3=t3
112 &xor ($t2,$t1); # t2^=s2
113 &mov (&DWP($frame+4*(($j+2)%4),"esp"),$t2); # s2=t2
114}
115
116# void Camellia_EncryptBlock_Rounds(
117# int grandRounds,
118# const Byte plaintext[],
119# const KEY_TABLE_TYPE keyTable,
120# Byte ciphertext[])
121&function_begin("Camellia_EncryptBlock_Rounds");
122 &mov ("eax",&wparam(0)); # load grandRounds
123 &mov ($idx,&wparam(1)); # load plaintext pointer
124 &mov ($key,&wparam(2)); # load key schedule pointer
125
126 &mov ("ebx","esp");
127 &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra
128 &and ("esp",-64);
129
130 # place stack frame just "above mod 1024" the key schedule
131 # this ensures that cache associativity of 2 suffices
132 &lea ("ecx",&DWP(-64-63,$key));
133 &sub ("ecx","esp");
134 &neg ("ecx");
135 &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line
136 &sub ("esp","ecx");
137 &add ("esp",4); # 4 is reserved for callee's return address
138
139 &shl ("eax",6);
140 &lea ("eax",&DWP(0,$key,"eax"));
141 &mov ($_esp,"ebx"); # save %esp
142 &mov ($_end,"eax"); # save keyEnd
143
144 &call (&label("pic_point"));
145 &set_label("pic_point");
146 &blindpop($Tbl);
147 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
148
149 &mov (@T[0],&DWP(0,$idx)); # load plaintext
150 &mov (@T[1],&DWP(4,$idx));
151 &mov (@T[2],&DWP(8,$idx));
152 &bswap (@T[0]);
153 &mov (@T[3],&DWP(12,$idx));
154 &bswap (@T[1]);
155 &bswap (@T[2]);
156 &bswap (@T[3]);
157
158 &call ("_x86_Camellia_encrypt");
159
160 &mov ("esp",$_esp);
161 &bswap (@T[0]);
162 &mov ($idx,&wparam(3)); # load ciphertext pointer
163 &bswap (@T[1]);
164 &bswap (@T[2]);
165 &bswap (@T[3]);
166 &mov (&DWP(0,$idx),@T[0]); # write ciphertext
167 &mov (&DWP(4,$idx),@T[1]);
168 &mov (&DWP(8,$idx),@T[2]);
169 &mov (&DWP(12,$idx),@T[3]);
170&function_end("Camellia_EncryptBlock_Rounds");
171# V1.x API
172&function_begin_B("Camellia_EncryptBlock");
173 &mov ("eax",128);
174 &sub ("eax",&wparam(0)); # load keyBitLength
175 &mov ("eax",3);
176 &adc ("eax",0); # keyBitLength==128?3:4
177 &mov (&wparam(0),"eax");
178 &jmp (&label("Camellia_EncryptBlock_Rounds"));
179&function_end_B("Camellia_EncryptBlock");
180
181if ($OPENSSL) {
182# void Camellia_encrypt(
183# const unsigned char *in,
184# unsigned char *out,
185# const CAMELLIA_KEY *key)
186&function_begin("Camellia_encrypt");
187 &mov ($idx,&wparam(0)); # load plaintext pointer
188 &mov ($key,&wparam(2)); # load key schedule pointer
189
190 &mov ("ebx","esp");
191 &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra
192 &and ("esp",-64);
193 &mov ("eax",&DWP(272,$key)); # load grandRounds counter
194
195 # place stack frame just "above mod 1024" the key schedule
196 # this ensures that cache associativity of 2 suffices
197 &lea ("ecx",&DWP(-64-63,$key));
198 &sub ("ecx","esp");
199 &neg ("ecx");
200 &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line
201 &sub ("esp","ecx");
202 &add ("esp",4); # 4 is reserved for callee's return address
203
204 &shl ("eax",6);
205 &lea ("eax",&DWP(0,$key,"eax"));
206 &mov ($_esp,"ebx"); # save %esp
207 &mov ($_end,"eax"); # save keyEnd
208
209 &call (&label("pic_point"));
210 &set_label("pic_point");
211 &blindpop($Tbl);
212 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
213
214 &mov (@T[0],&DWP(0,$idx)); # load plaintext
215 &mov (@T[1],&DWP(4,$idx));
216 &mov (@T[2],&DWP(8,$idx));
217 &bswap (@T[0]);
218 &mov (@T[3],&DWP(12,$idx));
219 &bswap (@T[1]);
220 &bswap (@T[2]);
221 &bswap (@T[3]);
222
223 &call ("_x86_Camellia_encrypt");
224
225 &mov ("esp",$_esp);
226 &bswap (@T[0]);
227 &mov ($idx,&wparam(1)); # load ciphertext pointer
228 &bswap (@T[1]);
229 &bswap (@T[2]);
230 &bswap (@T[3]);
231 &mov (&DWP(0,$idx),@T[0]); # write ciphertext
232 &mov (&DWP(4,$idx),@T[1]);
233 &mov (&DWP(8,$idx),@T[2]);
234 &mov (&DWP(12,$idx),@T[3]);
235&function_end("Camellia_encrypt");
236}
237
238&function_begin_B("_x86_Camellia_encrypt");
239 &xor (@T[0],&DWP(0,$key)); # ^=key[0-3]
240 &xor (@T[1],&DWP(4,$key));
241 &xor (@T[2],&DWP(8,$key));
242 &xor (@T[3],&DWP(12,$key));
243 &mov ($idx,&DWP(16,$key)); # prefetch key[4]
244
245 &mov ($__s0,@T[0]); # save s[0-3]
246 &mov ($__s1,@T[1]);
247 &mov ($__s2,@T[2]);
248 &mov ($__s3,@T[3]);
249
250&set_label("loop",16);
251 for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16,4); }
252
253 &add ($key,16*4);
254 &cmp ($key,$__end);
255 &je (&label("done"));
256
257 # @T[0-1] are preloaded, $idx is preloaded with key[0]
258 &and ($idx,@T[0]);
259 &mov (@T[3],$__s3);
260 &rotl ($idx,1);
261 &mov (@T[2],@T[3]);
262 &xor (@T[1],$idx);
263 &or (@T[2],&DWP(12,$key));
264 &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1);
265 &xor (@T[2],$__s2);
266
267 &mov ($idx,&DWP(4,$key));
268 &mov ($__s2,@T[2]); # s2^=s3|key[3];
269 &or ($idx,@T[1]);
270 &and (@T[2],&DWP(8,$key));
271 &xor (@T[0],$idx);
272 &rotl (@T[2],1);
273 &mov ($__s0,@T[0]); # s0^=s1|key[1];
274 &xor (@T[3],@T[2]);
275 &mov ($idx,&DWP(16,$key)); # prefetch key[4]
276 &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1);
277 &jmp (&label("loop"));
278
279&set_label("done",8);
280 &mov (@T[2],@T[0]); # SwapHalf
281 &mov (@T[3],@T[1]);
282 &mov (@T[0],$__s2);
283 &mov (@T[1],$__s3);
284 &xor (@T[0],$idx); # $idx is preloaded with key[0]
285 &xor (@T[1],&DWP(4,$key));
286 &xor (@T[2],&DWP(8,$key));
287 &xor (@T[3],&DWP(12,$key));
288 &ret ();
289&function_end_B("_x86_Camellia_encrypt");
290
291# void Camellia_DecryptBlock_Rounds(
292# int grandRounds,
293# const Byte ciphertext[],
294# const KEY_TABLE_TYPE keyTable,
295# Byte plaintext[])
296&function_begin("Camellia_DecryptBlock_Rounds");
297 &mov ("eax",&wparam(0)); # load grandRounds
298 &mov ($idx,&wparam(1)); # load ciphertext pointer
299 &mov ($key,&wparam(2)); # load key schedule pointer
300
301 &mov ("ebx","esp");
302 &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra
303 &and ("esp",-64);
304
305 # place stack frame just "above mod 1024" the key schedule
306 # this ensures that cache associativity of 2 suffices
307 &lea ("ecx",&DWP(-64-63,$key));
308 &sub ("ecx","esp");
309 &neg ("ecx");
310 &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line
311 &sub ("esp","ecx");
312 &add ("esp",4); # 4 is reserved for callee's return address
313
314 &shl ("eax",6);
315 &mov (&DWP(4*4,"esp"),$key); # save keyStart
316 &lea ($key,&DWP(0,$key,"eax"));
317 &mov (&DWP(5*4,"esp"),"ebx");# save %esp
318
319 &call (&label("pic_point"));
320 &set_label("pic_point");
321 &blindpop($Tbl);
322 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
323
324 &mov (@T[0],&DWP(0,$idx)); # load ciphertext
325 &mov (@T[1],&DWP(4,$idx));
326 &mov (@T[2],&DWP(8,$idx));
327 &bswap (@T[0]);
328 &mov (@T[3],&DWP(12,$idx));
329 &bswap (@T[1]);
330 &bswap (@T[2]);
331 &bswap (@T[3]);
332
333 &call ("_x86_Camellia_decrypt");
334
335 &mov ("esp",&DWP(5*4,"esp"));
336 &bswap (@T[0]);
337 &mov ($idx,&wparam(3)); # load plaintext pointer
338 &bswap (@T[1]);
339 &bswap (@T[2]);
340 &bswap (@T[3]);
341 &mov (&DWP(0,$idx),@T[0]); # write plaintext
342 &mov (&DWP(4,$idx),@T[1]);
343 &mov (&DWP(8,$idx),@T[2]);
344 &mov (&DWP(12,$idx),@T[3]);
345&function_end("Camellia_DecryptBlock_Rounds");
346# V1.x API
347&function_begin_B("Camellia_DecryptBlock");
348 &mov ("eax",128);
349 &sub ("eax",&wparam(0)); # load keyBitLength
350 &mov ("eax",3);
351 &adc ("eax",0); # keyBitLength==128?3:4
352 &mov (&wparam(0),"eax");
353 &jmp (&label("Camellia_DecryptBlock_Rounds"));
354&function_end_B("Camellia_DecryptBlock");
355
356if ($OPENSSL) {
357# void Camellia_decrypt(
358# const unsigned char *in,
359# unsigned char *out,
360# const CAMELLIA_KEY *key)
361&function_begin("Camellia_decrypt");
362 &mov ($idx,&wparam(0)); # load ciphertext pointer
363 &mov ($key,&wparam(2)); # load key schedule pointer
364
365 &mov ("ebx","esp");
366 &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra
367 &and ("esp",-64);
368 &mov ("eax",&DWP(272,$key)); # load grandRounds counter
369
370 # place stack frame just "above mod 1024" the key schedule
371 # this ensures that cache associativity of 2 suffices
372 &lea ("ecx",&DWP(-64-63,$key));
373 &sub ("ecx","esp");
374 &neg ("ecx");
375 &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line
376 &sub ("esp","ecx");
377 &add ("esp",4); # 4 is reserved for callee's return address
378
379 &shl ("eax",6);
380 &mov (&DWP(4*4,"esp"),$key); # save keyStart
381 &lea ($key,&DWP(0,$key,"eax"));
382 &mov (&DWP(5*4,"esp"),"ebx");# save %esp
383
384 &call (&label("pic_point"));
385 &set_label("pic_point");
386 &blindpop($Tbl);
387 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
388
389 &mov (@T[0],&DWP(0,$idx)); # load ciphertext
390 &mov (@T[1],&DWP(4,$idx));
391 &mov (@T[2],&DWP(8,$idx));
392 &bswap (@T[0]);
393 &mov (@T[3],&DWP(12,$idx));
394 &bswap (@T[1]);
395 &bswap (@T[2]);
396 &bswap (@T[3]);
397
398 &call ("_x86_Camellia_decrypt");
399
400 &mov ("esp",&DWP(5*4,"esp"));
401 &bswap (@T[0]);
402 &mov ($idx,&wparam(1)); # load plaintext pointer
403 &bswap (@T[1]);
404 &bswap (@T[2]);
405 &bswap (@T[3]);
406 &mov (&DWP(0,$idx),@T[0]); # write plaintext
407 &mov (&DWP(4,$idx),@T[1]);
408 &mov (&DWP(8,$idx),@T[2]);
409 &mov (&DWP(12,$idx),@T[3]);
410&function_end("Camellia_decrypt");
411}
412
413&function_begin_B("_x86_Camellia_decrypt");
414 &xor (@T[0],&DWP(0,$key)); # ^=key[0-3]
415 &xor (@T[1],&DWP(4,$key));
416 &xor (@T[2],&DWP(8,$key));
417 &xor (@T[3],&DWP(12,$key));
418 &mov ($idx,&DWP(-8,$key)); # prefetch key[-2]
419
420 &mov ($__s0,@T[0]); # save s[0-3]
421 &mov ($__s1,@T[1]);
422 &mov ($__s2,@T[2]);
423 &mov ($__s3,@T[3]);
424
425&set_label("loop",16);
426 for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8,4); }
427
428 &sub ($key,16*4);
429 &cmp ($key,$__end);
430 &je (&label("done"));
431
432 # @T[0-1] are preloaded, $idx is preloaded with key[2]
433 &and ($idx,@T[0]);
434 &mov (@T[3],$__s3);
435 &rotl ($idx,1);
436 &mov (@T[2],@T[3]);
437 &xor (@T[1],$idx);
438 &or (@T[2],&DWP(4,$key));
439 &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1);
440 &xor (@T[2],$__s2);
441
442 &mov ($idx,&DWP(12,$key));
443 &mov ($__s2,@T[2]); # s2^=s3|key[3];
444 &or ($idx,@T[1]);
445 &and (@T[2],&DWP(0,$key));
446 &xor (@T[0],$idx);
447 &rotl (@T[2],1);
448 &mov ($__s0,@T[0]); # s0^=s1|key[1];
449 &xor (@T[3],@T[2]);
450 &mov ($idx,&DWP(-8,$key)); # prefetch key[4]
451 &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1);
452 &jmp (&label("loop"));
453
454&set_label("done",8);
455 &mov (@T[2],@T[0]); # SwapHalf
456 &mov (@T[3],@T[1]);
457 &mov (@T[0],$__s2);
458 &mov (@T[1],$__s3);
459 &xor (@T[2],$idx); # $idx is preloaded with key[2]
460 &xor (@T[3],&DWP(12,$key));
461 &xor (@T[0],&DWP(0,$key));
462 &xor (@T[1],&DWP(4,$key));
463 &ret ();
464&function_end_B("_x86_Camellia_decrypt");
465
466# shld is very slow on Intel P4 family. Even on AMD it limits
467# instruction decode rate [because it's VectorPath] and consequently
468# performance. PIII, PM and Core[2] seem to be the only ones which
469# execute this code ~7% faster...
470sub __rotl128 {
471 my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_;
472
473 $rnd *= 2;
474 if ($rot) {
475 &mov ($idx,$i0);
476 &shld ($i0,$i1,$rot);
477 &shld ($i1,$i2,$rot);
478 &shld ($i2,$i3,$rot);
479 &shld ($i3,$idx,$rot);
480 }
481 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]);
482 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]);
483 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]);
484 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]);
485}
486
487# ... Implementing 128-bit rotate without shld gives >3x performance
488# improvement on P4, only ~7% degradation on other Intel CPUs and
489# not worse performance on AMD. This is therefore preferred.
490sub _rotl128 {
491 my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_;
492
493 $rnd *= 2;
494 if ($rot) {
495 &mov ($Tbl,$i0);
496 &shl ($i0,$rot);
497 &mov ($idx,$i1);
498 &shr ($idx,32-$rot);
499 &shl ($i1,$rot);
500 &or ($i0,$idx);
501 &mov ($idx,$i2);
502 &shl ($i2,$rot);
503 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]);
504 &shr ($idx,32-$rot);
505 &or ($i1,$idx);
506 &shr ($Tbl,32-$rot);
507 &mov ($idx,$i3);
508 &shr ($idx,32-$rot);
509 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]);
510 &shl ($i3,$rot);
511 &or ($i2,$idx);
512 &or ($i3,$Tbl);
513 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]);
514 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]);
515 } else {
516 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]);
517 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]);
518 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]);
519 &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]);
520 }
521}
522
523sub _saveround {
524my ($rnd,$key,@T)=@_;
525my $bias=int(@T[0])?shift(@T):0;
526
527 &mov (&DWP($bias+$rnd*8+0,$key),@T[0]);
528 &mov (&DWP($bias+$rnd*8+4,$key),@T[1]) if ($#T>=1);
529 &mov (&DWP($bias+$rnd*8+8,$key),@T[2]) if ($#T>=2);
530 &mov (&DWP($bias+$rnd*8+12,$key),@T[3]) if ($#T>=3);
531}
532
533sub _loadround {
534my ($rnd,$key,@T)=@_;
535my $bias=int(@T[0])?shift(@T):0;
536
537 &mov (@T[0],&DWP($bias+$rnd*8+0,$key));
538 &mov (@T[1],&DWP($bias+$rnd*8+4,$key)) if ($#T>=1);
539 &mov (@T[2],&DWP($bias+$rnd*8+8,$key)) if ($#T>=2);
540 &mov (@T[3],&DWP($bias+$rnd*8+12,$key)) if ($#T>=3);
541}
542
543# void Camellia_Ekeygen(
544# const int keyBitLength,
545# const Byte *rawKey,
546# KEY_TABLE_TYPE keyTable)
547&function_begin("Camellia_Ekeygen");
548{ my $step=0;
549
550 &stack_push(4); # place for s[0-3]
551
552 &mov ($Tbl,&wparam(0)); # load arguments
553 &mov ($idx,&wparam(1));
554 &mov ($key,&wparam(2));
555
556 &mov (@T[0],&DWP(0,$idx)); # load 0-127 bits
557 &mov (@T[1],&DWP(4,$idx));
558 &mov (@T[2],&DWP(8,$idx));
559 &mov (@T[3],&DWP(12,$idx));
560
561 &bswap (@T[0]);
562 &bswap (@T[1]);
563 &bswap (@T[2]);
564 &bswap (@T[3]);
565
566 &_saveround (0,$key,@T); # KL<<<0
567
568 &cmp ($Tbl,128);
569 &je (&label("1st128"));
570
571 &mov (@T[0],&DWP(16,$idx)); # load 128-191 bits
572 &mov (@T[1],&DWP(20,$idx));
573 &cmp ($Tbl,192);
574 &je (&label("1st192"));
575 &mov (@T[2],&DWP(24,$idx)); # load 192-255 bits
576 &mov (@T[3],&DWP(28,$idx));
577 &jmp (&label("1st256"));
578&set_label("1st192",4);
579 &mov (@T[2],@T[0]);
580 &mov (@T[3],@T[1]);
581 &not (@T[2]);
582 &not (@T[3]);
583&set_label("1st256",4);
584 &bswap (@T[0]);
585 &bswap (@T[1]);
586 &bswap (@T[2]);
587 &bswap (@T[3]);
588
589 &_saveround (4,$key,@T); # temporary storage for KR!
590
591 &xor (@T[0],&DWP(0*8+0,$key)); # KR^KL
592 &xor (@T[1],&DWP(0*8+4,$key));
593 &xor (@T[2],&DWP(1*8+0,$key));
594 &xor (@T[3],&DWP(1*8+4,$key));
595
596&set_label("1st128",4);
597 &call (&label("pic_point"));
598 &set_label("pic_point");
599 &blindpop($Tbl);
600 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
601 &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl));
602
603 &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0]
604 &mov (&swtmp(0),@T[0]); # save s[0-3]
605 &mov (&swtmp(1),@T[1]);
606 &mov (&swtmp(2),@T[2]);
607 &mov (&swtmp(3),@T[3]);
608 &Camellia_Feistel($step++);
609 &Camellia_Feistel($step++);
610 &mov (@T[2],&swtmp(2));
611 &mov (@T[3],&swtmp(3));
612
613 &mov ($idx,&wparam(2));
614 &xor (@T[0],&DWP(0*8+0,$idx)); # ^KL
615 &xor (@T[1],&DWP(0*8+4,$idx));
616 &xor (@T[2],&DWP(1*8+0,$idx));
617 &xor (@T[3],&DWP(1*8+4,$idx));
618
619 &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[4]
620 &mov (&swtmp(0),@T[0]); # save s[0-3]
621 &mov (&swtmp(1),@T[1]);
622 &mov (&swtmp(2),@T[2]);
623 &mov (&swtmp(3),@T[3]);
624 &Camellia_Feistel($step++);
625 &Camellia_Feistel($step++);
626 &mov (@T[2],&swtmp(2));
627 &mov (@T[3],&swtmp(3));
628
629 &mov ($idx,&wparam(0));
630 &cmp ($idx,128);
631 &jne (&label("2nd256"));
632
633 &mov ($key,&wparam(2));
634 &lea ($key,&DWP(128,$key)); # size optimization
635
636 ####### process KA
637 &_saveround (2,$key,-128,@T); # KA<<<0
638 &_rotl128 (@T,15,6,@T); # KA<<<15
639 &_rotl128 (@T,15,8,@T); # KA<<<(15+15=30)
640 &_rotl128 (@T,15,12,@T[0],@T[1]); # KA<<<(30+15=45)
641 &_rotl128 (@T,15,14,@T); # KA<<<(45+15=60)
642 push (@T,shift(@T)); # rotl128(@T,32);
643 &_rotl128 (@T,2,20,@T); # KA<<<(60+32+2=94)
644 &_rotl128 (@T,17,24,@T); # KA<<<(94+17=111)
645
646 ####### process KL
647 &_loadround (0,$key,-128,@T); # load KL
648 &_rotl128 (@T,15,4,@T); # KL<<<15
649 &_rotl128 (@T,30,10,@T); # KL<<<(15+30=45)
650 &_rotl128 (@T,15,13,@T[2],@T[3]); # KL<<<(45+15=60)
651 &_rotl128 (@T,17,16,@T); # KL<<<(60+17=77)
652 &_rotl128 (@T,17,18,@T); # KL<<<(77+17=94)
653 &_rotl128 (@T,17,22,@T); # KL<<<(94+17=111)
654
655 while (@T[0] ne "eax") # restore order
656 { unshift (@T,pop(@T)); }
657
658 &mov ("eax",3); # 3 grandRounds
659 &jmp (&label("done"));
660
661&set_label("2nd256",16);
662 &mov ($idx,&wparam(2));
663 &_saveround (6,$idx,@T); # temporary storage for KA!
664
665 &xor (@T[0],&DWP(4*8+0,$idx)); # KA^KR
666 &xor (@T[1],&DWP(4*8+4,$idx));
667 &xor (@T[2],&DWP(5*8+0,$idx));
668 &xor (@T[3],&DWP(5*8+4,$idx));
669
670 &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[8]
671 &mov (&swtmp(0),@T[0]); # save s[0-3]
672 &mov (&swtmp(1),@T[1]);
673 &mov (&swtmp(2),@T[2]);
674 &mov (&swtmp(3),@T[3]);
675 &Camellia_Feistel($step++);
676 &Camellia_Feistel($step++);
677 &mov (@T[2],&swtmp(2));
678 &mov (@T[3],&swtmp(3));
679
680 &mov ($key,&wparam(2));
681 &lea ($key,&DWP(128,$key)); # size optimization
682
683 ####### process KB
684 &_saveround (2,$key,-128,@T); # KB<<<0
685 &_rotl128 (@T,30,10,@T); # KB<<<30
686 &_rotl128 (@T,30,20,@T); # KB<<<(30+30=60)
687 push (@T,shift(@T)); # rotl128(@T,32);
688 &_rotl128 (@T,19,32,@T); # KB<<<(60+32+19=111)
689
690 ####### process KR
691 &_loadround (4,$key,-128,@T); # load KR
692 &_rotl128 (@T,15,4,@T); # KR<<<15
693 &_rotl128 (@T,15,8,@T); # KR<<<(15+15=30)
694 &_rotl128 (@T,30,18,@T); # KR<<<(30+30=60)
695 push (@T,shift(@T)); # rotl128(@T,32);
696 &_rotl128 (@T,2,26,@T); # KR<<<(60+32+2=94)
697
698 ####### process KA
699 &_loadround (6,$key,-128,@T); # load KA
700 &_rotl128 (@T,15,6,@T); # KA<<<15
701 &_rotl128 (@T,30,14,@T); # KA<<<(15+30=45)
702 push (@T,shift(@T)); # rotl128(@T,32);
703 &_rotl128 (@T,0,24,@T); # KA<<<(45+32+0=77)
704 &_rotl128 (@T,17,28,@T); # KA<<<(77+17=94)
705
706 ####### process KL
707 &_loadround (0,$key,-128,@T); # load KL
708 push (@T,shift(@T)); # rotl128(@T,32);
709 &_rotl128 (@T,13,12,@T); # KL<<<(32+13=45)
710 &_rotl128 (@T,15,16,@T); # KL<<<(45+15=60)
711 &_rotl128 (@T,17,22,@T); # KL<<<(60+17=77)
712 push (@T,shift(@T)); # rotl128(@T,32);
713 &_rotl128 (@T,2,30,@T); # KL<<<(77+32+2=111)
714
715 while (@T[0] ne "eax") # restore order
716 { unshift (@T,pop(@T)); }
717
718 &mov ("eax",4); # 4 grandRounds
719&set_label("done");
720 &lea ("edx",&DWP(272-128,$key)); # end of key schedule
721 &stack_pop(4);
722}
723&function_end("Camellia_Ekeygen");
724
725if ($OPENSSL) {
726# int Camellia_set_key (
727# const unsigned char *userKey,
728# int bits,
729# CAMELLIA_KEY *key)
730&function_begin_B("Camellia_set_key");
731 &push ("ebx");
732 &mov ("ecx",&wparam(0)); # pull arguments
733 &mov ("ebx",&wparam(1));
734 &mov ("edx",&wparam(2));
735
736 &mov ("eax",-1);
737 &test ("ecx","ecx");
738 &jz (&label("done")); # userKey==NULL?
739 &test ("edx","edx");
740 &jz (&label("done")); # key==NULL?
741
742 &mov ("eax",-2);
743 &cmp ("ebx",256);
744 &je (&label("arg_ok")); # bits==256?
745 &cmp ("ebx",192);
746 &je (&label("arg_ok")); # bits==192?
747 &cmp ("ebx",128);
748 &jne (&label("done")); # bits!=128?
749&set_label("arg_ok",4);
750
751 &push ("edx"); # push arguments
752 &push ("ecx");
753 &push ("ebx");
754 &call ("Camellia_Ekeygen");
755 &stack_pop(3);
756
757 # eax holds grandRounds and edx points at where to put it
758 &mov (&DWP(0,"edx"),"eax");
759 &xor ("eax","eax");
760&set_label("done",4);
761 &pop ("ebx");
762 &ret ();
763&function_end_B("Camellia_set_key");
764}
765
766@SBOX=(
767112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65,
768 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189,
769134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26,
770166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77,
771139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153,
772223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215,
773 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34,
774254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80,
775170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210,
776 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148,
777135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226,
778 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46,
779233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89,
780120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250,
781114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164,
782 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158);
783
784sub S1110 { my $i=shift; $i=@SBOX[$i]; return $i<<24|$i<<16|$i<<8; }
785sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<16|$i; }
786sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; }
787sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; }
788
789&set_label("Camellia_SIGMA",64);
790&data_word(
791 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2,
792 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c,
793 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd,
794 0, 0, 0, 0);
795&set_label("Camellia_SBOX",64);
796# tables are interleaved, remember?
797for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
798for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
799
800# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
801# size_t length, const CAMELLIA_KEY *key,
802# unsigned char *ivp,const int enc);
803{
804# stack frame layout
805# -4(%esp) # return address 0(%esp)
806# 0(%esp) # s0 4(%esp)
807# 4(%esp) # s1 8(%esp)
808# 8(%esp) # s2 12(%esp)
809# 12(%esp) # s3 16(%esp)
810# 16(%esp) # end of key schedule 20(%esp)
811# 20(%esp) # %esp backup
812my $_inp=&DWP(24,"esp"); #copy of wparam(0)
813my $_out=&DWP(28,"esp"); #copy of wparam(1)
814my $_len=&DWP(32,"esp"); #copy of wparam(2)
815my $_key=&DWP(36,"esp"); #copy of wparam(3)
816my $_ivp=&DWP(40,"esp"); #copy of wparam(4)
817my $ivec=&DWP(44,"esp"); #ivec[16]
818my $_tmp=&DWP(44,"esp"); #volatile variable [yes, aliases with ivec]
819my ($s0,$s1,$s2,$s3) = @T;
820
821&function_begin("Camellia_cbc_encrypt");
822 &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
823 &cmp ($s2,0);
824 &je (&label("enc_out"));
825
826 &pushf ();
827 &cld ();
828
829 &mov ($s0,&wparam(0)); # load inp
830 &mov ($s1,&wparam(1)); # load out
831 #&mov ($s2,&wparam(2)); # load len
832 &mov ($s3,&wparam(3)); # load key
833 &mov ($Tbl,&wparam(4)); # load ivp
834
835 # allocate aligned stack frame...
836 &lea ($idx,&DWP(-64,"esp"));
837 &and ($idx,-64);
838
839 # place stack frame just "above mod 1024" the key schedule
840 # this ensures that cache associativity of 2 suffices
841 &lea ($key,&DWP(-64-63,$s3));
842 &sub ($key,$idx);
843 &neg ($key);
844 &and ($key,0x3C0); # modulo 1024, but aligned to cache-line
845 &sub ($idx,$key);
846
847 &mov ($key,&wparam(5)); # load enc
848
849 &exch ("esp",$idx);
850 &add ("esp",4); # reserve for return address!
851 &mov ($_esp,$idx); # save %esp
852
853 &mov ($_inp,$s0); # save copy of inp
854 &mov ($_out,$s1); # save copy of out
855 &mov ($_len,$s2); # save copy of len
856 &mov ($_key,$s3); # save copy of key
857 &mov ($_ivp,$Tbl); # save copy of ivp
858
859 &call (&label("pic_point")); # make it PIC!
860 &set_label("pic_point");
861 &blindpop($Tbl);
862 &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
863
864 &mov ($idx,32);
865 &set_label("prefetch_sbox",4);
866 &mov ($s0,&DWP(0,$Tbl));
867 &mov ($s1,&DWP(32,$Tbl));
868 &mov ($s2,&DWP(64,$Tbl));
869 &mov ($s3,&DWP(96,$Tbl));
870 &lea ($Tbl,&DWP(128,$Tbl));
871 &dec ($idx);
872 &jnz (&label("prefetch_sbox"));
873 &mov ($s0,$_key);
874 &sub ($Tbl,4096);
875 &mov ($idx,$_inp);
876 &mov ($s3,&DWP(272,$s0)); # load grandRounds
877
878 &cmp ($key,0);
879 &je (&label("DECRYPT"));
880
881 &mov ($s2,$_len);
882 &mov ($key,$_ivp);
883 &shl ($s3,6);
884 &lea ($s3,&DWP(0,$s0,$s3));
885 &mov ($_end,$s3);
886
887 &test ($s2,0xFFFFFFF0);
888 &jz (&label("enc_tail")); # short input...
889
890 &mov ($s0,&DWP(0,$key)); # load iv
891 &mov ($s1,&DWP(4,$key));
892
893 &set_label("enc_loop",4);
894 &mov ($s2,&DWP(8,$key));
895 &mov ($s3,&DWP(12,$key));
896
897 &xor ($s0,&DWP(0,$idx)); # xor input data
898 &xor ($s1,&DWP(4,$idx));
899 &xor ($s2,&DWP(8,$idx));
900 &bswap ($s0);
901 &xor ($s3,&DWP(12,$idx));
902 &bswap ($s1);
903 &mov ($key,$_key); # load key
904 &bswap ($s2);
905 &bswap ($s3);
906
907 &call ("_x86_Camellia_encrypt");
908
909 &mov ($idx,$_inp); # load inp
910 &mov ($key,$_out); # load out
911
912 &bswap ($s0);
913 &bswap ($s1);
914 &bswap ($s2);
915 &mov (&DWP(0,$key),$s0); # save output data
916 &bswap ($s3);
917 &mov (&DWP(4,$key),$s1);
918 &mov (&DWP(8,$key),$s2);
919 &mov (&DWP(12,$key),$s3);
920
921 &mov ($s2,$_len); # load len
922
923 &lea ($idx,&DWP(16,$idx));
924 &mov ($_inp,$idx); # save inp
925
926 &lea ($s3,&DWP(16,$key));
927 &mov ($_out,$s3); # save out
928
929 &sub ($s2,16);
930 &test ($s2,0xFFFFFFF0);
931 &mov ($_len,$s2); # save len
932 &jnz (&label("enc_loop"));
933 &test ($s2,15);
934 &jnz (&label("enc_tail"));
935 &mov ($idx,$_ivp); # load ivp
936 &mov ($s2,&DWP(8,$key)); # restore last dwords
937 &mov ($s3,&DWP(12,$key));
938 &mov (&DWP(0,$idx),$s0); # save ivec
939 &mov (&DWP(4,$idx),$s1);
940 &mov (&DWP(8,$idx),$s2);
941 &mov (&DWP(12,$idx),$s3);
942
943 &mov ("esp",$_esp);
944 &popf ();
945 &set_label("enc_out");
946 &function_end_A();
947 &pushf (); # kludge, never executed
948
949 &set_label("enc_tail",4);
950 &mov ($s0,$key eq "edi" ? $key : "");
951 &mov ($key,$_out); # load out
952 &push ($s0); # push ivp
953 &mov ($s1,16);
954 &sub ($s1,$s2);
955 &cmp ($key,$idx); # compare with inp
956 &je (&label("enc_in_place"));
957 &align (4);
958 &data_word(0xA4F3F689); # rep movsb # copy input
959 &jmp (&label("enc_skip_in_place"));
960 &set_label("enc_in_place");
961 &lea ($key,&DWP(0,$key,$s2));
962 &set_label("enc_skip_in_place");
963 &mov ($s2,$s1);
964 &xor ($s0,$s0);
965 &align (4);
966 &data_word(0xAAF3F689); # rep stosb # zero tail
967 &pop ($key); # pop ivp
968
969 &mov ($idx,$_out); # output as input
970 &mov ($s0,&DWP(0,$key));
971 &mov ($s1,&DWP(4,$key));
972 &mov ($_len,16); # len=16
973 &jmp (&label("enc_loop")); # one more spin...
974
975#----------------------------- DECRYPT -----------------------------#
976&set_label("DECRYPT",16);
977 &shl ($s3,6);
978 &lea ($s3,&DWP(0,$s0,$s3));
979 &mov ($_end,$s0);
980 &mov ($_key,$s3);
981
982 &cmp ($idx,$_out);
983 &je (&label("dec_in_place")); # in-place processing...
984
985 &mov ($key,$_ivp); # load ivp
986 &mov ($_tmp,$key);
987
988 &set_label("dec_loop",4);
989 &mov ($s0,&DWP(0,$idx)); # read input
990 &mov ($s1,&DWP(4,$idx));
991 &mov ($s2,&DWP(8,$idx));
992 &bswap ($s0);
993 &mov ($s3,&DWP(12,$idx));
994 &bswap ($s1);
995 &mov ($key,$_key); # load key
996 &bswap ($s2);
997 &bswap ($s3);
998
999 &call ("_x86_Camellia_decrypt");
1000
1001 &mov ($key,$_tmp); # load ivp
1002 &mov ($idx,$_len); # load len
1003
1004 &bswap ($s0);
1005 &bswap ($s1);
1006 &bswap ($s2);
1007 &xor ($s0,&DWP(0,$key)); # xor iv
1008 &bswap ($s3);
1009 &xor ($s1,&DWP(4,$key));
1010 &xor ($s2,&DWP(8,$key));
1011 &xor ($s3,&DWP(12,$key));
1012
1013 &sub ($idx,16);
1014 &jc (&label("dec_partial"));
1015 &mov ($_len,$idx); # save len
1016 &mov ($idx,$_inp); # load inp
1017 &mov ($key,$_out); # load out
1018
1019 &mov (&DWP(0,$key),$s0); # write output
1020 &mov (&DWP(4,$key),$s1);
1021 &mov (&DWP(8,$key),$s2);
1022 &mov (&DWP(12,$key),$s3);
1023
1024 &mov ($_tmp,$idx); # save ivp
1025 &lea ($idx,&DWP(16,$idx));
1026 &mov ($_inp,$idx); # save inp
1027
1028 &lea ($key,&DWP(16,$key));
1029 &mov ($_out,$key); # save out
1030
1031 &jnz (&label("dec_loop"));
1032 &mov ($key,$_tmp); # load temp ivp
1033 &set_label("dec_end");
1034 &mov ($idx,$_ivp); # load user ivp
1035 &mov ($s0,&DWP(0,$key)); # load iv
1036 &mov ($s1,&DWP(4,$key));
1037 &mov ($s2,&DWP(8,$key));
1038 &mov ($s3,&DWP(12,$key));
1039 &mov (&DWP(0,$idx),$s0); # copy back to user
1040 &mov (&DWP(4,$idx),$s1);
1041 &mov (&DWP(8,$idx),$s2);
1042 &mov (&DWP(12,$idx),$s3);
1043 &jmp (&label("dec_out"));
1044
1045 &set_label("dec_partial",4);
1046 &lea ($key,$ivec);
1047 &mov (&DWP(0,$key),$s0); # dump output to stack
1048 &mov (&DWP(4,$key),$s1);
1049 &mov (&DWP(8,$key),$s2);
1050 &mov (&DWP(12,$key),$s3);
1051 &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$idx));
1052 &mov ($idx eq "esi" ? $idx : "",$key);
1053 &mov ($key eq "edi" ? $key : "",$_out); # load out
1054 &data_word(0xA4F3F689); # rep movsb # copy output
1055 &mov ($key,$_inp); # use inp as temp ivp
1056 &jmp (&label("dec_end"));
1057
1058 &set_label("dec_in_place",4);
1059 &set_label("dec_in_place_loop");
1060 &lea ($key,$ivec);
1061 &mov ($s0,&DWP(0,$idx)); # read input
1062 &mov ($s1,&DWP(4,$idx));
1063 &mov ($s2,&DWP(8,$idx));
1064 &mov ($s3,&DWP(12,$idx));
1065
1066 &mov (&DWP(0,$key),$s0); # copy to temp
1067 &mov (&DWP(4,$key),$s1);
1068 &mov (&DWP(8,$key),$s2);
1069 &bswap ($s0);
1070 &mov (&DWP(12,$key),$s3);
1071 &bswap ($s1);
1072 &mov ($key,$_key); # load key
1073 &bswap ($s2);
1074 &bswap ($s3);
1075
1076 &call ("_x86_Camellia_decrypt");
1077
1078 &mov ($key,$_ivp); # load ivp
1079 &mov ($idx,$_out); # load out
1080
1081 &bswap ($s0);
1082 &bswap ($s1);
1083 &bswap ($s2);
1084 &xor ($s0,&DWP(0,$key)); # xor iv
1085 &bswap ($s3);
1086 &xor ($s1,&DWP(4,$key));
1087 &xor ($s2,&DWP(8,$key));
1088 &xor ($s3,&DWP(12,$key));
1089
1090 &mov (&DWP(0,$idx),$s0); # write output
1091 &mov (&DWP(4,$idx),$s1);
1092 &mov (&DWP(8,$idx),$s2);
1093 &mov (&DWP(12,$idx),$s3);
1094
1095 &lea ($idx,&DWP(16,$idx));
1096 &mov ($_out,$idx); # save out
1097
1098 &lea ($idx,$ivec);
1099 &mov ($s0,&DWP(0,$idx)); # read temp
1100 &mov ($s1,&DWP(4,$idx));
1101 &mov ($s2,&DWP(8,$idx));
1102 &mov ($s3,&DWP(12,$idx));
1103
1104 &mov (&DWP(0,$key),$s0); # copy iv
1105 &mov (&DWP(4,$key),$s1);
1106 &mov (&DWP(8,$key),$s2);
1107 &mov (&DWP(12,$key),$s3);
1108
1109 &mov ($idx,$_inp); # load inp
1110
1111 &lea ($idx,&DWP(16,$idx));
1112 &mov ($_inp,$idx); # save inp
1113
1114 &mov ($s2,$_len); # load len
1115 &sub ($s2,16);
1116 &jc (&label("dec_in_place_partial"));
1117 &mov ($_len,$s2); # save len
1118 &jnz (&label("dec_in_place_loop"));
1119 &jmp (&label("dec_out"));
1120
1121 &set_label("dec_in_place_partial",4);
1122 # one can argue if this is actually required...
1123 &mov ($key eq "edi" ? $key : "",$_out);
1124 &lea ($idx eq "esi" ? $idx : "",$ivec);
1125 &lea ($key,&DWP(0,$key,$s2));
1126 &lea ($idx,&DWP(16,$idx,$s2));
1127 &neg ($s2 eq "ecx" ? $s2 : "");
1128 &data_word(0xA4F3F689); # rep movsb # restore tail
1129
1130 &set_label("dec_out",4);
1131 &mov ("esp",$_esp);
1132 &popf ();
1133&function_end("Camellia_cbc_encrypt");
1134}
1135
1136&asciz("Camellia for x86 by <appro@openssl.org>");
1137
1138&asm_finish();
diff --git a/src/lib/libssl/src/crypto/camellia/asm/cmll-x86_64.pl b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86_64.pl
new file mode 100644
index 0000000000..c683646ca7
--- /dev/null
+++ b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86_64.pl
@@ -0,0 +1,1080 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
5#
6# This module may be used under the terms of either the GNU General
7# Public License version 2 or later, the GNU Lesser General Public
8# License version 2.1 or later, the Mozilla Public License version
9# 1.1 or the BSD License. The exact terms of either license are
10# distributed along with this module. For further details see
11# http://www.openssl.org/~appro/camellia/.
12# ====================================================================
13
14# Performance in cycles per processed byte (less is better) in
15# 'openssl speed ...' benchmark:
16#
17# AMD64 Core2 EM64T
18# -evp camellia-128-ecb 16.7 21.0 22.7
19# + over gcc 3.4.6 +25% +5% 0%
20#
21# camellia-128-cbc 15.7 20.4 21.1
22#
23# 128-bit key setup 128 216 205 cycles/key
24# + over gcc 3.4.6 +54% +39% +15%
25#
26# Numbers in "+" rows represent performance improvement over compiler
27# generated code. Key setup timings are impressive on AMD and Core2
28# thanks to 64-bit operations being covertly deployed. Improvement on
29# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it
30# apparently emulates some of 64-bit operations in [32-bit] microcode.
31
32$flavour = shift;
33$output = shift;
34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
35
36$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
37
38$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
39( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
40( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
41die "can't locate x86_64-xlate.pl";
42
43open STDOUT,"| $^X $xlate $flavour $output";
44
45sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
46sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
47 $r =~ s/%[er]([sd]i)/%\1l/;
48 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
49
50$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx";
51@S=("%r8d","%r9d","%r10d","%r11d");
52$i0="%esi";
53$i1="%edi";
54$Tbl="%rbp"; # size optimization
55$inp="%r12";
56$out="%r13";
57$key="%r14";
58$keyend="%r15";
59$arg0d=$win64?"%ecx":"%edi";
60
61# const unsigned int Camellia_SBOX[4][256];
62# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][],
63# and [2][] - with [3][]. This is done to minimize code size.
64$SBOX1_1110=0; # Camellia_SBOX[0]
65$SBOX4_4404=4; # Camellia_SBOX[1]
66$SBOX2_0222=2048; # Camellia_SBOX[2]
67$SBOX3_3033=2052; # Camellia_SBOX[3]
68
69sub Camellia_Feistel {
70my $i=@_[0];
71my $seed=defined(@_[1])?@_[1]:0;
72my $scale=$seed<0?-8:8;
73my $j=($i&1)*2;
74my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4];
75
76$code.=<<___;
77 xor $s0,$t0 # t0^=key[0]
78 xor $s1,$t1 # t1^=key[1]
79 movz `&hi("$t0")`,$i0 # (t0>>8)&0xff
80 movz `&lo("$t1")`,$i1 # (t1>>0)&0xff
81 mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0]
82 mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1]
83 movz `&lo("$t0")`,$i0 # (t0>>0)&0xff
84 shr \$16,$t0
85 movz `&hi("$t1")`,$i1 # (t1>>8)&0xff
86 xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0]
87 shr \$16,$t1
88 xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1]
89 movz `&hi("$t0")`,$i0 # (t0>>24)&0xff
90 movz `&lo("$t1")`,$i1 # (t1>>16)&0xff
91 xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0]
92 xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1]
93 movz `&lo("$t0")`,$i0 # (t0>>16)&0xff
94 movz `&hi("$t1")`,$i1 # (t1>>24)&0xff
95 xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0]
96 xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1]
97 mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1]
98 mov `$seed+($i+1)*$scale+4`($key),$t0
99 xor $t3,$t2 # t2^=t3
100 ror \$8,$t3 # t3=RightRotate(t3,8)
101 xor $t2,$s2
102 xor $t2,$s3
103 xor $t3,$s3
104___
105}
106
107# void Camellia_EncryptBlock_Rounds(
108# int grandRounds,
109# const Byte plaintext[],
110# const KEY_TABLE_TYPE keyTable,
111# Byte ciphertext[])
112$code=<<___;
113.text
114
115# V1.x API
116.globl Camellia_EncryptBlock
117.type Camellia_EncryptBlock,\@abi-omnipotent
118.align 16
119Camellia_EncryptBlock:
120 movl \$128,%eax
121 subl $arg0d,%eax
122 movl \$3,$arg0d
123 adcl \$0,$arg0d # keyBitLength==128?3:4
124 jmp .Lenc_rounds
125.size Camellia_EncryptBlock,.-Camellia_EncryptBlock
126# V2
127.globl Camellia_EncryptBlock_Rounds
128.type Camellia_EncryptBlock_Rounds,\@function,4
129.align 16
130.Lenc_rounds:
131Camellia_EncryptBlock_Rounds:
132 push %rbx
133 push %rbp
134 push %r13
135 push %r14
136 push %r15
137.Lenc_prologue:
138
139 #mov %rsi,$inp # put away arguments
140 mov %rcx,$out
141 mov %rdx,$key
142
143 shl \$6,%edi # process grandRounds
144 lea .LCamellia_SBOX(%rip),$Tbl
145 lea ($key,%rdi),$keyend
146
147 mov 0(%rsi),@S[0] # load plaintext
148 mov 4(%rsi),@S[1]
149 mov 8(%rsi),@S[2]
150 bswap @S[0]
151 mov 12(%rsi),@S[3]
152 bswap @S[1]
153 bswap @S[2]
154 bswap @S[3]
155
156 call _x86_64_Camellia_encrypt
157
158 bswap @S[0]
159 bswap @S[1]
160 bswap @S[2]
161 mov @S[0],0($out)
162 bswap @S[3]
163 mov @S[1],4($out)
164 mov @S[2],8($out)
165 mov @S[3],12($out)
166
167 mov 0(%rsp),%r15
168 mov 8(%rsp),%r14
169 mov 16(%rsp),%r13
170 mov 24(%rsp),%rbp
171 mov 32(%rsp),%rbx
172 lea 40(%rsp),%rsp
173.Lenc_epilogue:
174 ret
175.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
176
177.type _x86_64_Camellia_encrypt,\@abi-omnipotent
178.align 16
179_x86_64_Camellia_encrypt:
180 xor 0($key),@S[1]
181 xor 4($key),@S[0] # ^=key[0-3]
182 xor 8($key),@S[3]
183 xor 12($key),@S[2]
184.align 16
185.Leloop:
186 mov 16($key),$t1 # prefetch key[4-5]
187 mov 20($key),$t0
188
189___
190 for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); }
191$code.=<<___;
192 lea 16*4($key),$key
193 cmp $keyend,$key
194 mov 8($key),$t3 # prefetch key[2-3]
195 mov 12($key),$t2
196 je .Ledone
197
198 and @S[0],$t0
199 or @S[3],$t3
200 rol \$1,$t0
201 xor $t3,@S[2] # s2^=s3|key[3];
202 xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1);
203 and @S[2],$t2
204 or @S[1],$t1
205 rol \$1,$t2
206 xor $t1,@S[0] # s0^=s1|key[1];
207 xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1);
208 jmp .Leloop
209
210.align 16
211.Ledone:
212 xor @S[2],$t0 # SwapHalf
213 xor @S[3],$t1
214 xor @S[0],$t2
215 xor @S[1],$t3
216
217 mov $t0,@S[0]
218 mov $t1,@S[1]
219 mov $t2,@S[2]
220 mov $t3,@S[3]
221
222 .byte 0xf3,0xc3 # rep ret
223.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt
224
225# V1.x API
226.globl Camellia_DecryptBlock
227.type Camellia_DecryptBlock,\@abi-omnipotent
228.align 16
229Camellia_DecryptBlock:
230 movl \$128,%eax
231 subl $arg0d,%eax
232 movl \$3,$arg0d
233 adcl \$0,$arg0d # keyBitLength==128?3:4
234 jmp .Ldec_rounds
235.size Camellia_DecryptBlock,.-Camellia_DecryptBlock
236# V2
237.globl Camellia_DecryptBlock_Rounds
238.type Camellia_DecryptBlock_Rounds,\@function,4
239.align 16
240.Ldec_rounds:
241Camellia_DecryptBlock_Rounds:
242 push %rbx
243 push %rbp
244 push %r13
245 push %r14
246 push %r15
247.Ldec_prologue:
248
249 #mov %rsi,$inp # put away arguments
250 mov %rcx,$out
251 mov %rdx,$keyend
252
253 shl \$6,%edi # process grandRounds
254 lea .LCamellia_SBOX(%rip),$Tbl
255 lea ($keyend,%rdi),$key
256
257 mov 0(%rsi),@S[0] # load plaintext
258 mov 4(%rsi),@S[1]
259 mov 8(%rsi),@S[2]
260 bswap @S[0]
261 mov 12(%rsi),@S[3]
262 bswap @S[1]
263 bswap @S[2]
264 bswap @S[3]
265
266 call _x86_64_Camellia_decrypt
267
268 bswap @S[0]
269 bswap @S[1]
270 bswap @S[2]
271 mov @S[0],0($out)
272 bswap @S[3]
273 mov @S[1],4($out)
274 mov @S[2],8($out)
275 mov @S[3],12($out)
276
277 mov 0(%rsp),%r15
278 mov 8(%rsp),%r14
279 mov 16(%rsp),%r13
280 mov 24(%rsp),%rbp
281 mov 32(%rsp),%rbx
282 lea 40(%rsp),%rsp
283.Ldec_epilogue:
284 ret
285.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
286
287.type _x86_64_Camellia_decrypt,\@abi-omnipotent
288.align 16
289_x86_64_Camellia_decrypt:
290 xor 0($key),@S[1]
291 xor 4($key),@S[0] # ^=key[0-3]
292 xor 8($key),@S[3]
293 xor 12($key),@S[2]
294.align 16
295.Ldloop:
296 mov -8($key),$t1 # prefetch key[4-5]
297 mov -4($key),$t0
298
299___
300 for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); }
301$code.=<<___;
302 lea -16*4($key),$key
303 cmp $keyend,$key
304 mov 0($key),$t3 # prefetch key[2-3]
305 mov 4($key),$t2
306 je .Lddone
307
308 and @S[0],$t0
309 or @S[3],$t3
310 rol \$1,$t0
311 xor $t3,@S[2] # s2^=s3|key[3];
312 xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1);
313 and @S[2],$t2
314 or @S[1],$t1
315 rol \$1,$t2
316 xor $t1,@S[0] # s0^=s1|key[1];
317 xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1);
318
319 jmp .Ldloop
320
321.align 16
322.Lddone:
323 xor @S[2],$t2
324 xor @S[3],$t3
325 xor @S[0],$t0
326 xor @S[1],$t1
327
328 mov $t2,@S[0] # SwapHalf
329 mov $t3,@S[1]
330 mov $t0,@S[2]
331 mov $t1,@S[3]
332
333 .byte 0xf3,0xc3 # rep ret
334.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt
335___
336
337sub _saveround {
338my ($rnd,$key,@T)=@_;
339my $bias=int(@T[0])?shift(@T):0;
340
341 if ($#T==3) {
342 $code.=<<___;
343 mov @T[1],`$bias+$rnd*8+0`($key)
344 mov @T[0],`$bias+$rnd*8+4`($key)
345 mov @T[3],`$bias+$rnd*8+8`($key)
346 mov @T[2],`$bias+$rnd*8+12`($key)
347___
348 } else {
349 $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n";
350 $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1);
351 }
352}
353
354sub _loadround {
355my ($rnd,$key,@T)=@_;
356my $bias=int(@T[0])?shift(@T):0;
357
358$code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n";
359$code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1);
360}
361
362# shld is very slow on Intel EM64T family. Even on AMD it limits
363# instruction decode rate [because it's VectorPath] and consequently
364# performance...
365sub __rotl128 {
366my ($i0,$i1,$rot)=@_;
367
368 if ($rot) {
369 $code.=<<___;
370 mov $i0,%r11
371 shld \$$rot,$i1,$i0
372 shld \$$rot,%r11,$i1
373___
374 }
375}
376
377# ... Implementing 128-bit rotate without shld gives 80% better
378# performance EM64T, +15% on AMD64 and only ~7% degradation on
379# Core2. This is therefore preferred.
380sub _rotl128 {
381my ($i0,$i1,$rot)=@_;
382
383 if ($rot) {
384 $code.=<<___;
385 mov $i0,%r11
386 shl \$$rot,$i0
387 mov $i1,%r9
388 shr \$`64-$rot`,%r9
389 shr \$`64-$rot`,%r11
390 or %r9,$i0
391 shl \$$rot,$i1
392 or %r11,$i1
393___
394 }
395}
396
397{ my $step=0;
398
399$code.=<<___;
400.globl Camellia_Ekeygen
401.type Camellia_Ekeygen,\@function,3
402.align 16
403Camellia_Ekeygen:
404 push %rbx
405 push %rbp
406 push %r13
407 push %r14
408 push %r15
409.Lkey_prologue:
410
411 mov %rdi,$keyend # put away arguments, keyBitLength
412 mov %rdx,$out # keyTable
413
414 mov 0(%rsi),@S[0] # load 0-127 bits
415 mov 4(%rsi),@S[1]
416 mov 8(%rsi),@S[2]
417 mov 12(%rsi),@S[3]
418
419 bswap @S[0]
420 bswap @S[1]
421 bswap @S[2]
422 bswap @S[3]
423___
424 &_saveround (0,$out,@S); # KL<<<0
425$code.=<<___;
426 cmp \$128,$keyend # check keyBitLength
427 je .L1st128
428
429 mov 16(%rsi),@S[0] # load 128-191 bits
430 mov 20(%rsi),@S[1]
431 cmp \$192,$keyend
432 je .L1st192
433 mov 24(%rsi),@S[2] # load 192-255 bits
434 mov 28(%rsi),@S[3]
435 jmp .L1st256
436.L1st192:
437 mov @S[0],@S[2]
438 mov @S[1],@S[3]
439 not @S[2]
440 not @S[3]
441.L1st256:
442 bswap @S[0]
443 bswap @S[1]
444 bswap @S[2]
445 bswap @S[3]
446___
447 &_saveround (4,$out,@S); # temp storage for KR!
448$code.=<<___;
449 xor 0($out),@S[1] # KR^KL
450 xor 4($out),@S[0]
451 xor 8($out),@S[3]
452 xor 12($out),@S[2]
453
454.L1st128:
455 lea .LCamellia_SIGMA(%rip),$key
456 lea .LCamellia_SBOX(%rip),$Tbl
457
458 mov 0($key),$t1
459 mov 4($key),$t0
460___
461 &Camellia_Feistel($step++);
462 &Camellia_Feistel($step++);
463$code.=<<___;
464 xor 0($out),@S[1] # ^KL
465 xor 4($out),@S[0]
466 xor 8($out),@S[3]
467 xor 12($out),@S[2]
468___
469 &Camellia_Feistel($step++);
470 &Camellia_Feistel($step++);
471$code.=<<___;
472 cmp \$128,$keyend
473 jne .L2nd256
474
475 lea 128($out),$out # size optimization
476 shl \$32,%r8 # @S[0]||
477 shl \$32,%r10 # @S[2]||
478 or %r9,%r8 # ||@S[1]
479 or %r11,%r10 # ||@S[3]
480___
481 &_loadround (0,$out,-128,"%rax","%rbx"); # KL
482 &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0
483 &_rotl128 ("%rax","%rbx",15);
484 &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15
485 &_rotl128 ("%r8","%r10",15);
486 &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15
487 &_rotl128 ("%r8","%r10",15); # 15+15=30
488 &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30
489 &_rotl128 ("%rax","%rbx",30); # 15+30=45
490 &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45
491 &_rotl128 ("%r8","%r10",15); # 30+15=45
492 &_saveround (12,$out,-128,"%r8"); # KA<<<45
493 &_rotl128 ("%rax","%rbx",15); # 45+15=60
494 &_saveround (13,$out,-128,"%rbx"); # KL<<<60
495 &_rotl128 ("%r8","%r10",15); # 45+15=60
496 &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60
497 &_rotl128 ("%rax","%rbx",17); # 60+17=77
498 &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77
499 &_rotl128 ("%rax","%rbx",17); # 77+17=94
500 &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94
501 &_rotl128 ("%r8","%r10",34); # 60+34=94
502 &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94
503 &_rotl128 ("%rax","%rbx",17); # 94+17=111
504 &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111
505 &_rotl128 ("%r8","%r10",17); # 94+17=111
506 &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111
507$code.=<<___;
508 mov \$3,%eax
509 jmp .Ldone
510.align 16
511.L2nd256:
512___
513 &_saveround (6,$out,@S); # temp storage for KA!
514$code.=<<___;
515 xor `4*8+0`($out),@S[1] # KA^KR
516 xor `4*8+4`($out),@S[0]
517 xor `5*8+0`($out),@S[3]
518 xor `5*8+4`($out),@S[2]
519___
520 &Camellia_Feistel($step++);
521 &Camellia_Feistel($step++);
522
523 &_loadround (0,$out,"%rax","%rbx"); # KL
524 &_loadround (4,$out,"%rcx","%rdx"); # KR
525 &_loadround (6,$out,"%r14","%r15"); # KA
526$code.=<<___;
527 lea 128($out),$out # size optimization
528 shl \$32,%r8 # @S[0]||
529 shl \$32,%r10 # @S[2]||
530 or %r9,%r8 # ||@S[1]
531 or %r11,%r10 # ||@S[3]
532___
533 &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0
534 &_rotl128 ("%rcx","%rdx",15);
535 &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15
536 &_rotl128 ("%r14","%r15",15);
537 &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15
538 &_rotl128 ("%rcx","%rdx",15); # 15+15=30
539 &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30
540 &_rotl128 ("%r8","%r10",30);
541 &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30
542 &_rotl128 ("%rax","%rbx",45);
543 &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45
544 &_rotl128 ("%r14","%r15",30); # 15+30=45
545 &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45
546 &_rotl128 ("%rax","%rbx",15); # 45+15=60
547 &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60
548 &_rotl128 ("%rcx","%rdx",30); # 30+30=60
549 &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60
550 &_rotl128 ("%r8","%r10",30); # 30+30=60
551 &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60
552 &_rotl128 ("%rax","%rbx",17); # 60+17=77
553 &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77
554 &_rotl128 ("%r14","%r15",32); # 45+32=77
555 &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77
556 &_rotl128 ("%rcx","%rdx",34); # 60+34=94
557 &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94
558 &_rotl128 ("%r14","%r15",17); # 77+17=94
559 &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77
560 &_rotl128 ("%rax","%rbx",34); # 77+34=111
561 &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111
562 &_rotl128 ("%r8","%r10",51); # 60+51=111
563 &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111
564$code.=<<___;
565 mov \$4,%eax
566.Ldone:
567 mov 0(%rsp),%r15
568 mov 8(%rsp),%r14
569 mov 16(%rsp),%r13
570 mov 24(%rsp),%rbp
571 mov 32(%rsp),%rbx
572 lea 40(%rsp),%rsp
573.Lkey_epilogue:
574 ret
575.size Camellia_Ekeygen,.-Camellia_Ekeygen
576___
577}
578
579@SBOX=(
580112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65,
581 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189,
582134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26,
583166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77,
584139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153,
585223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215,
586 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34,
587254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80,
588170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210,
589 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148,
590135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226,
591 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46,
592233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89,
593120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250,
594114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164,
595 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158);
596
597sub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); }
598sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); }
599sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); }
600sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); }
601
602$code.=<<___;
603.align 64
604.LCamellia_SIGMA:
605.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858
606.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5
607.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2
608.long 0, 0, 0, 0
609.LCamellia_SBOX:
610___
611# tables are interleaved, remember?
612sub data_word { $code.=".long\t".join(',',@_)."\n"; }
613for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
614for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
615
616# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
617# size_t length, const CAMELLIA_KEY *key,
618# unsigned char *ivp,const int enc);
619{
620$_key="0(%rsp)";
621$_end="8(%rsp)"; # inp+len&~15
622$_res="16(%rsp)"; # len&15
623$ivec="24(%rsp)";
624$_ivp="40(%rsp)";
625$_rsp="48(%rsp)";
626
627$code.=<<___;
628.globl Camellia_cbc_encrypt
629.type Camellia_cbc_encrypt,\@function,6
630.align 16
631Camellia_cbc_encrypt:
632 cmp \$0,%rdx
633 je .Lcbc_abort
634 push %rbx
635 push %rbp
636 push %r12
637 push %r13
638 push %r14
639 push %r15
640.Lcbc_prologue:
641
642 mov %rsp,%rbp
643 sub \$64,%rsp
644 and \$-64,%rsp
645
646 # place stack frame just "above mod 1024" the key schedule,
647 # this ensures that cache associativity suffices
648 lea -64-63(%rcx),%r10
649 sub %rsp,%r10
650 neg %r10
651 and \$0x3C0,%r10
652 sub %r10,%rsp
653 #add \$8,%rsp # 8 is reserved for callee's ra
654
655 mov %rdi,$inp # inp argument
656 mov %rsi,$out # out argument
657 mov %r8,%rbx # ivp argument
658 mov %rcx,$key # key argument
659 mov 272(%rcx),$keyend # grandRounds
660
661 mov %r8,$_ivp
662 mov %rbp,$_rsp
663
664.Lcbc_body:
665 lea .LCamellia_SBOX(%rip),$Tbl
666
667 mov \$32,%ecx
668.align 4
669.Lcbc_prefetch_sbox:
670 mov 0($Tbl),%rax
671 mov 32($Tbl),%rsi
672 mov 64($Tbl),%rdi
673 mov 96($Tbl),%r11
674 lea 128($Tbl),$Tbl
675 loop .Lcbc_prefetch_sbox
676 sub \$4096,$Tbl
677 shl \$6,$keyend
678 mov %rdx,%rcx # len argument
679 lea ($key,$keyend),$keyend
680
681 cmp \$0,%r9d # enc argument
682 je .LCBC_DECRYPT
683
684 and \$-16,%rdx
685 and \$15,%rcx # length residue
686 lea ($inp,%rdx),%rdx
687 mov $key,$_key
688 mov %rdx,$_end
689 mov %rcx,$_res
690
691 cmp $inp,%rdx
692 mov 0(%rbx),@S[0] # load IV
693 mov 4(%rbx),@S[1]
694 mov 8(%rbx),@S[2]
695 mov 12(%rbx),@S[3]
696 je .Lcbc_enc_tail
697 jmp .Lcbc_eloop
698
699.align 16
700.Lcbc_eloop:
701 xor 0($inp),@S[0]
702 xor 4($inp),@S[1]
703 xor 8($inp),@S[2]
704 bswap @S[0]
705 xor 12($inp),@S[3]
706 bswap @S[1]
707 bswap @S[2]
708 bswap @S[3]
709
710 call _x86_64_Camellia_encrypt
711
712 mov $_key,$key # "rewind" the key
713 bswap @S[0]
714 mov $_end,%rdx
715 bswap @S[1]
716 mov $_res,%rcx
717 bswap @S[2]
718 mov @S[0],0($out)
719 bswap @S[3]
720 mov @S[1],4($out)
721 mov @S[2],8($out)
722 lea 16($inp),$inp
723 mov @S[3],12($out)
724 cmp %rdx,$inp
725 lea 16($out),$out
726 jne .Lcbc_eloop
727
728 cmp \$0,%rcx
729 jne .Lcbc_enc_tail
730
731 mov $_ivp,$out
732 mov @S[0],0($out) # write out IV residue
733 mov @S[1],4($out)
734 mov @S[2],8($out)
735 mov @S[3],12($out)
736 jmp .Lcbc_done
737
738.align 16
739.Lcbc_enc_tail:
740 xor %rax,%rax
741 mov %rax,0+$ivec
742 mov %rax,8+$ivec
743 mov %rax,$_res
744
745.Lcbc_enc_pushf:
746 pushfq
747 cld
748 mov $inp,%rsi
749 lea 8+$ivec,%rdi
750 .long 0x9066A4F3 # rep movsb
751 popfq
752.Lcbc_enc_popf:
753
754 lea $ivec,$inp
755 lea 16+$ivec,%rax
756 mov %rax,$_end
757 jmp .Lcbc_eloop # one more time
758
759.align 16
760.LCBC_DECRYPT:
761 xchg $key,$keyend
762 add \$15,%rdx
763 and \$15,%rcx # length residue
764 and \$-16,%rdx
765 mov $key,$_key
766 lea ($inp,%rdx),%rdx
767 mov %rdx,$_end
768 mov %rcx,$_res
769
770 mov (%rbx),%rax # load IV
771 mov 8(%rbx),%rbx
772 jmp .Lcbc_dloop
773.align 16
774.Lcbc_dloop:
775 mov 0($inp),@S[0]
776 mov 4($inp),@S[1]
777 mov 8($inp),@S[2]
778 bswap @S[0]
779 mov 12($inp),@S[3]
780 bswap @S[1]
781 mov %rax,0+$ivec # save IV to temporary storage
782 bswap @S[2]
783 mov %rbx,8+$ivec
784 bswap @S[3]
785
786 call _x86_64_Camellia_decrypt
787
788 mov $_key,$key # "rewind" the key
789 mov $_end,%rdx
790 mov $_res,%rcx
791
792 bswap @S[0]
793 mov ($inp),%rax # load IV for next iteration
794 bswap @S[1]
795 mov 8($inp),%rbx
796 bswap @S[2]
797 xor 0+$ivec,@S[0]
798 bswap @S[3]
799 xor 4+$ivec,@S[1]
800 xor 8+$ivec,@S[2]
801 lea 16($inp),$inp
802 xor 12+$ivec,@S[3]
803 cmp %rdx,$inp
804 je .Lcbc_ddone
805
806 mov @S[0],0($out)
807 mov @S[1],4($out)
808 mov @S[2],8($out)
809 mov @S[3],12($out)
810
811 lea 16($out),$out
812 jmp .Lcbc_dloop
813
814.align 16
815.Lcbc_ddone:
816 mov $_ivp,%rdx
817 cmp \$0,%rcx
818 jne .Lcbc_dec_tail
819
820 mov @S[0],0($out)
821 mov @S[1],4($out)
822 mov @S[2],8($out)
823 mov @S[3],12($out)
824
825 mov %rax,(%rdx) # write out IV residue
826 mov %rbx,8(%rdx)
827 jmp .Lcbc_done
828.align 16
829.Lcbc_dec_tail:
830 mov @S[0],0+$ivec
831 mov @S[1],4+$ivec
832 mov @S[2],8+$ivec
833 mov @S[3],12+$ivec
834
835.Lcbc_dec_pushf:
836 pushfq
837 cld
838 lea 8+$ivec,%rsi
839 lea ($out),%rdi
840 .long 0x9066A4F3 # rep movsb
841 popfq
842.Lcbc_dec_popf:
843
844 mov %rax,(%rdx) # write out IV residue
845 mov %rbx,8(%rdx)
846 jmp .Lcbc_done
847
848.align 16
849.Lcbc_done:
850 mov $_rsp,%rcx
851 mov 0(%rcx),%r15
852 mov 8(%rcx),%r14
853 mov 16(%rcx),%r13
854 mov 24(%rcx),%r12
855 mov 32(%rcx),%rbp
856 mov 40(%rcx),%rbx
857 lea 48(%rcx),%rsp
858.Lcbc_abort:
859 ret
860.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
861
862.asciz "Camellia for x86_64 by <appro@openssl.org>"
863___
864}
865
866# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
867# CONTEXT *context,DISPATCHER_CONTEXT *disp)
868if ($win64) {
869$rec="%rcx";
870$frame="%rdx";
871$context="%r8";
872$disp="%r9";
873
874$code.=<<___;
875.extern __imp_RtlVirtualUnwind
876.type common_se_handler,\@abi-omnipotent
877.align 16
878common_se_handler:
879 push %rsi
880 push %rdi
881 push %rbx
882 push %rbp
883 push %r12
884 push %r13
885 push %r14
886 push %r15
887 pushfq
888 lea -64(%rsp),%rsp
889
890 mov 120($context),%rax # pull context->Rax
891 mov 248($context),%rbx # pull context->Rip
892
893 mov 8($disp),%rsi # disp->ImageBase
894 mov 56($disp),%r11 # disp->HandlerData
895
896 mov 0(%r11),%r10d # HandlerData[0]
897 lea (%rsi,%r10),%r10 # prologue label
898 cmp %r10,%rbx # context->Rip<prologue label
899 jb .Lin_prologue
900
901 mov 152($context),%rax # pull context->Rsp
902
903 mov 4(%r11),%r10d # HandlerData[1]
904 lea (%rsi,%r10),%r10 # epilogue label
905 cmp %r10,%rbx # context->Rip>=epilogue label
906 jae .Lin_prologue
907
908 lea 40(%rax),%rax
909 mov -8(%rax),%rbx
910 mov -16(%rax),%rbp
911 mov -24(%rax),%r13
912 mov -32(%rax),%r14
913 mov -40(%rax),%r15
914 mov %rbx,144($context) # restore context->Rbx
915 mov %rbp,160($context) # restore context->Rbp
916 mov %r13,224($context) # restore context->R13
917 mov %r14,232($context) # restore context->R14
918 mov %r15,240($context) # restore context->R15
919
920.Lin_prologue:
921 mov 8(%rax),%rdi
922 mov 16(%rax),%rsi
923 mov %rax,152($context) # restore context->Rsp
924 mov %rsi,168($context) # restore context->Rsi
925 mov %rdi,176($context) # restore context->Rdi
926
927 jmp .Lcommon_seh_exit
928.size common_se_handler,.-common_se_handler
929
930.type cbc_se_handler,\@abi-omnipotent
931.align 16
932cbc_se_handler:
933 push %rsi
934 push %rdi
935 push %rbx
936 push %rbp
937 push %r12
938 push %r13
939 push %r14
940 push %r15
941 pushfq
942 lea -64(%rsp),%rsp
943
944 mov 120($context),%rax # pull context->Rax
945 mov 248($context),%rbx # pull context->Rip
946
947 lea .Lcbc_prologue(%rip),%r10
948 cmp %r10,%rbx # context->Rip<.Lcbc_prologue
949 jb .Lin_cbc_prologue
950
951 lea .Lcbc_body(%rip),%r10
952 cmp %r10,%rbx # context->Rip<.Lcbc_body
953 jb .Lin_cbc_frame_setup
954
955 mov 152($context),%rax # pull context->Rsp
956
957 lea .Lcbc_abort(%rip),%r10
958 cmp %r10,%rbx # context->Rip>=.Lcbc_abort
959 jae .Lin_cbc_prologue
960
961 # handle pushf/popf in Camellia_cbc_encrypt
962 lea .Lcbc_enc_pushf(%rip),%r10
963 cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf
964 jbe .Lin_cbc_no_flag
965 lea 8(%rax),%rax
966 lea .Lcbc_enc_popf(%rip),%r10
967 cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf
968 jb .Lin_cbc_no_flag
969 lea -8(%rax),%rax
970 lea .Lcbc_dec_pushf(%rip),%r10
971 cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf
972 jbe .Lin_cbc_no_flag
973 lea 8(%rax),%rax
974 lea .Lcbc_dec_popf(%rip),%r10
975 cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf
976 jb .Lin_cbc_no_flag
977 lea -8(%rax),%rax
978
979.Lin_cbc_no_flag:
980 mov 48(%rax),%rax # $_rsp
981 lea 48(%rax),%rax
982
983.Lin_cbc_frame_setup:
984 mov -8(%rax),%rbx
985 mov -16(%rax),%rbp
986 mov -24(%rax),%r12
987 mov -32(%rax),%r13
988 mov -40(%rax),%r14
989 mov -48(%rax),%r15
990 mov %rbx,144($context) # restore context->Rbx
991 mov %rbp,160($context) # restore context->Rbp
992 mov %r12,216($context) # restore context->R12
993 mov %r13,224($context) # restore context->R13
994 mov %r14,232($context) # restore context->R14
995 mov %r15,240($context) # restore context->R15
996
997.Lin_cbc_prologue:
998 mov 8(%rax),%rdi
999 mov 16(%rax),%rsi
1000 mov %rax,152($context) # restore context->Rsp
1001 mov %rsi,168($context) # restore context->Rsi
1002 mov %rdi,176($context) # restore context->Rdi
1003
1004.align 4
1005.Lcommon_seh_exit:
1006
1007 mov 40($disp),%rdi # disp->ContextRecord
1008 mov $context,%rsi # context
1009 mov \$`1232/8`,%ecx # sizeof(CONTEXT)
1010 .long 0xa548f3fc # cld; rep movsq
1011
1012 mov $disp,%rsi
1013 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1014 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1015 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1016 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1017 mov 40(%rsi),%r10 # disp->ContextRecord
1018 lea 56(%rsi),%r11 # &disp->HandlerData
1019 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1020 mov %r10,32(%rsp) # arg5
1021 mov %r11,40(%rsp) # arg6
1022 mov %r12,48(%rsp) # arg7
1023 mov %rcx,56(%rsp) # arg8, (NULL)
1024 call *__imp_RtlVirtualUnwind(%rip)
1025
1026 mov \$1,%eax # ExceptionContinueSearch
1027 lea 64(%rsp),%rsp
1028 popfq
1029 pop %r15
1030 pop %r14
1031 pop %r13
1032 pop %r12
1033 pop %rbp
1034 pop %rbx
1035 pop %rdi
1036 pop %rsi
1037 ret
1038.size cbc_se_handler,.-cbc_se_handler
1039
1040.section .pdata
1041.align 4
1042 .rva .LSEH_begin_Camellia_EncryptBlock_Rounds
1043 .rva .LSEH_end_Camellia_EncryptBlock_Rounds
1044 .rva .LSEH_info_Camellia_EncryptBlock_Rounds
1045
1046 .rva .LSEH_begin_Camellia_DecryptBlock_Rounds
1047 .rva .LSEH_end_Camellia_DecryptBlock_Rounds
1048 .rva .LSEH_info_Camellia_DecryptBlock_Rounds
1049
1050 .rva .LSEH_begin_Camellia_Ekeygen
1051 .rva .LSEH_end_Camellia_Ekeygen
1052 .rva .LSEH_info_Camellia_Ekeygen
1053
1054 .rva .LSEH_begin_Camellia_cbc_encrypt
1055 .rva .LSEH_end_Camellia_cbc_encrypt
1056 .rva .LSEH_info_Camellia_cbc_encrypt
1057
1058.section .xdata
1059.align 8
1060.LSEH_info_Camellia_EncryptBlock_Rounds:
1061 .byte 9,0,0,0
1062 .rva common_se_handler
1063 .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[]
1064.LSEH_info_Camellia_DecryptBlock_Rounds:
1065 .byte 9,0,0,0
1066 .rva common_se_handler
1067 .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
1068.LSEH_info_Camellia_Ekeygen:
1069 .byte 9,0,0,0
1070 .rva common_se_handler
1071 .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[]
1072.LSEH_info_Camellia_cbc_encrypt:
1073 .byte 9,0,0,0
1074 .rva cbc_se_handler
1075___
1076}
1077
1078$code =~ s/\`([^\`]*)\`/eval $1/gem;
1079print $code;
1080close STDOUT;
diff --git a/src/lib/libssl/src/crypto/cms/cms_smime.c b/src/lib/libssl/src/crypto/cms/cms_smime.c
index b9463f9abb..f35883aa22 100644
--- a/src/lib/libssl/src/crypto/cms/cms_smime.c
+++ b/src/lib/libssl/src/crypto/cms/cms_smime.c
@@ -298,7 +298,7 @@ static int cms_signerinfo_verify_cert(CMS_SignerInfo *si,
298 CMS_R_STORE_INIT_ERROR); 298 CMS_R_STORE_INIT_ERROR);
299 goto err; 299 goto err;
300 } 300 }
301 X509_STORE_CTX_set_purpose(&ctx, X509_PURPOSE_SMIME_SIGN); 301 X509_STORE_CTX_set_default(&ctx, "smime_sign");
302 if (crls) 302 if (crls)
303 X509_STORE_CTX_set0_crls(&ctx, crls); 303 X509_STORE_CTX_set0_crls(&ctx, crls);
304 304
@@ -425,7 +425,7 @@ int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs,
425 for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) 425 for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++)
426 { 426 {
427 si = sk_CMS_SignerInfo_value(sinfos, i); 427 si = sk_CMS_SignerInfo_value(sinfos, i);
428 if (!CMS_SignerInfo_verify_content(si, cmsbio)) 428 if (CMS_SignerInfo_verify_content(si, cmsbio) <= 0)
429 { 429 {
430 CMSerr(CMS_F_CMS_VERIFY, 430 CMSerr(CMS_F_CMS_VERIFY,
431 CMS_R_CONTENT_VERIFY_ERROR); 431 CMS_R_CONTENT_VERIFY_ERROR);
diff --git a/src/lib/libssl/src/crypto/des/asm/des_enc.m4 b/src/lib/libssl/src/crypto/des/asm/des_enc.m4
index f5b1928f99..f59333a030 100644
--- a/src/lib/libssl/src/crypto/des/asm/des_enc.m4
+++ b/src/lib/libssl/src/crypto/des/asm/des_enc.m4
@@ -44,6 +44,7 @@
44! 44!
45 45
46.ident "des_enc.m4 2.1" 46.ident "des_enc.m4 2.1"
47.file "des_enc-sparc.S"
47 48
48#if defined(__SUNPRO_C) && defined(__sparcv9) 49#if defined(__SUNPRO_C) && defined(__sparcv9)
49# define ABI64 /* They've said -xarch=v9 at command line */ 50# define ABI64 /* They've said -xarch=v9 at command line */
@@ -315,16 +316,16 @@ $4:
315 ld [global1+local1], local1 316 ld [global1+local1], local1
316 xor $2, out1, out1 ! 8642 317 xor $2, out1, out1 ! 8642
317 xor $2, out0, out0 ! 7531 318 xor $2, out0, out0 ! 7531
318 fmovs %f0, %f0 ! fxor used for alignment 319 ! fmovs %f0, %f0 ! fxor used for alignment
319 320
320 srl out1, 4, local0 ! rotate 4 right 321 srl out1, 4, local0 ! rotate 4 right
321 and out0, local5, local3 ! 3 322 and out0, local5, local3 ! 3
322 fmovs %f0, %f0 323 ! fmovs %f0, %f0
323 324
324 ld [$5+$3*8], local7 ! key 7531 next round 325 ld [$5+$3*8], local7 ! key 7531 next round
325 srl local3, 8, local3 ! 3 326 srl local3, 8, local3 ! 3
326 and local0, 252, local2 ! 2 327 and local0, 252, local2 ! 2
327 fmovs %f0, %f0 328 ! fmovs %f0, %f0
328 329
329 ld [global3+local3],local3 ! 3 330 ld [global3+local3],local3 ! 3
330 sll out1, 28, out1 ! rotate 331 sll out1, 28, out1 ! rotate
@@ -1179,8 +1180,11 @@ DES_encrypt1:
1179 1180
1180 save %sp, FRAME, %sp 1181 save %sp, FRAME, %sp
1181 1182
1182 call .PIC.me.up 1183 sethi %hi(.PIC.DES_SPtrans-1f),global1
1183 mov .PIC.me.up-(.-4),out0 1184 or global1,%lo(.PIC.DES_SPtrans-1f),global1
11851: call .+8
1186 add %o7,global1,global1
1187 sub global1,.PIC.DES_SPtrans-.des_and,out2
1184 1188
1185 ld [in0], in5 ! left 1189 ld [in0], in5 ! left
1186 cmp in2, 0 ! enc 1190 cmp in2, 0 ! enc
@@ -1237,8 +1241,11 @@ DES_encrypt2:
1237 1241
1238 save %sp, FRAME, %sp 1242 save %sp, FRAME, %sp
1239 1243
1240 call .PIC.me.up 1244 sethi %hi(.PIC.DES_SPtrans-1f),global1
1241 mov .PIC.me.up-(.-4),out0 1245 or global1,%lo(.PIC.DES_SPtrans-1f),global1
12461: call .+8
1247 add %o7,global1,global1
1248 sub global1,.PIC.DES_SPtrans-.des_and,out2
1242 1249
1243 ! Set sbox address 1 to 6 and rotate halfs 3 left 1250 ! Set sbox address 1 to 6 and rotate halfs 3 left
1244 ! Errors caught by destest? Yes. Still? *NO* 1251 ! Errors caught by destest? Yes. Still? *NO*
@@ -1352,8 +1359,11 @@ DES_encrypt3:
1352 1359
1353 save %sp, FRAME, %sp 1360 save %sp, FRAME, %sp
1354 1361
1355 call .PIC.me.up 1362 sethi %hi(.PIC.DES_SPtrans-1f),global1
1356 mov .PIC.me.up-(.-4),out0 1363 or global1,%lo(.PIC.DES_SPtrans-1f),global1
13641: call .+8
1365 add %o7,global1,global1
1366 sub global1,.PIC.DES_SPtrans-.des_and,out2
1357 1367
1358 ld [in0], in5 ! left 1368 ld [in0], in5 ! left
1359 add in2, 120, in4 ! ks2 1369 add in2, 120, in4 ! ks2
@@ -1394,8 +1404,11 @@ DES_decrypt3:
1394 1404
1395 save %sp, FRAME, %sp 1405 save %sp, FRAME, %sp
1396 1406
1397 call .PIC.me.up 1407 sethi %hi(.PIC.DES_SPtrans-1f),global1
1398 mov .PIC.me.up-(.-4),out0 1408 or global1,%lo(.PIC.DES_SPtrans-1f),global1
14091: call .+8
1410 add %o7,global1,global1
1411 sub global1,.PIC.DES_SPtrans-.des_and,out2
1399 1412
1400 ld [in0], in5 ! left 1413 ld [in0], in5 ! left
1401 add in3, 120, in4 ! ks3 1414 add in3, 120, in4 ! ks3
@@ -1424,105 +1437,6 @@ DES_decrypt3:
1424.DES_decrypt3.end: 1437.DES_decrypt3.end:
1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1438 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1426 1439
1427 .align 256
1428 .type .des_and,#object
1429 .size .des_and,284
1430
1431.des_and:
1432
1433! This table is used for AND 0xFC when it is known that register
1434! bits 8-31 are zero. Makes it possible to do three arithmetic
1435! operations in one cycle.
1436
1437 .byte 0, 0, 0, 0, 4, 4, 4, 4
1438 .byte 8, 8, 8, 8, 12, 12, 12, 12
1439 .byte 16, 16, 16, 16, 20, 20, 20, 20
1440 .byte 24, 24, 24, 24, 28, 28, 28, 28
1441 .byte 32, 32, 32, 32, 36, 36, 36, 36
1442 .byte 40, 40, 40, 40, 44, 44, 44, 44
1443 .byte 48, 48, 48, 48, 52, 52, 52, 52
1444 .byte 56, 56, 56, 56, 60, 60, 60, 60
1445 .byte 64, 64, 64, 64, 68, 68, 68, 68
1446 .byte 72, 72, 72, 72, 76, 76, 76, 76
1447 .byte 80, 80, 80, 80, 84, 84, 84, 84
1448 .byte 88, 88, 88, 88, 92, 92, 92, 92
1449 .byte 96, 96, 96, 96, 100, 100, 100, 100
1450 .byte 104, 104, 104, 104, 108, 108, 108, 108
1451 .byte 112, 112, 112, 112, 116, 116, 116, 116
1452 .byte 120, 120, 120, 120, 124, 124, 124, 124
1453 .byte 128, 128, 128, 128, 132, 132, 132, 132
1454 .byte 136, 136, 136, 136, 140, 140, 140, 140
1455 .byte 144, 144, 144, 144, 148, 148, 148, 148
1456 .byte 152, 152, 152, 152, 156, 156, 156, 156
1457 .byte 160, 160, 160, 160, 164, 164, 164, 164
1458 .byte 168, 168, 168, 168, 172, 172, 172, 172
1459 .byte 176, 176, 176, 176, 180, 180, 180, 180
1460 .byte 184, 184, 184, 184, 188, 188, 188, 188
1461 .byte 192, 192, 192, 192, 196, 196, 196, 196
1462 .byte 200, 200, 200, 200, 204, 204, 204, 204
1463 .byte 208, 208, 208, 208, 212, 212, 212, 212
1464 .byte 216, 216, 216, 216, 220, 220, 220, 220
1465 .byte 224, 224, 224, 224, 228, 228, 228, 228
1466 .byte 232, 232, 232, 232, 236, 236, 236, 236
1467 .byte 240, 240, 240, 240, 244, 244, 244, 244
1468 .byte 248, 248, 248, 248, 252, 252, 252, 252
1469
1470 ! 5 numbers for initil/final permutation
1471
1472 .word 0x0f0f0f0f ! offset 256
1473 .word 0x0000ffff ! 260
1474 .word 0x33333333 ! 264
1475 .word 0x00ff00ff ! 268
1476 .word 0x55555555 ! 272
1477
1478 .word 0 ! 276
1479 .word LOOPS ! 280
1480 .word 0x0000FC00 ! 284
1481.PIC.DES_SPtrans:
1482 .word %r_disp32(DES_SPtrans)
1483
1484! input: out0 offset between .PIC.me.up and caller
1485! output: out0 pointer to .PIC.me.up
1486! out2 pointer to .des_and
1487! global1 pointer to DES_SPtrans
1488 .align 32
1489.PIC.me.up:
1490 add out0,%o7,out0 ! pointer to .PIC.me.up
1491#if 1
1492 ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1
1493 add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1
1494 add global1,out0,global1
1495#else
1496# ifdef OPENSSL_PIC
1497 ! In case anybody wonders why this code is same for both ABI.
1498 ! To start with it is not. Do note LDPTR below. But of course
1499 ! you must be wondering why the rest of it does not contain
1500 ! things like %hh, %hm and %lm. Well, those are needed only
1501 ! if OpenSSL library *itself* will become larger than 4GB,
1502 ! which is not going to happen any time soon.
1503 sethi %hi(DES_SPtrans),global1
1504 or global1,%lo(DES_SPtrans),global1
1505 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1506 add global1,out0,global1
1507 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1508 LDPTR [out2+global1],global1
1509# elif 0
1510 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1511# elif defined(ABI64)
1512 sethi %hh(DES_SPtrans),out2
1513 or out2,%hm(DES_SPtrans),out2
1514 sethi %lm(DES_SPtrans),global1
1515 or global1,%lo(DES_SPtrans),global1
1516 sllx out2,32,out2
1517 or out2,global1,global1
1518# else
1519 sethi %hi(DES_SPtrans),global1
1520 or global1,%lo(DES_SPtrans),global1
1521# endif
1522#endif
1523 retl
1524 add out0,.des_and-.PIC.me.up,out2
1525
1526! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1440! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1527! ***************************************************************** 1441! *****************************************************************
1528 1442
@@ -1539,8 +1453,11 @@ DES_ncbc_encrypt:
1539 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1453 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1540 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1454 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1541 1455
1542 call .PIC.me.up 1456 sethi %hi(.PIC.DES_SPtrans-1f),global1
1543 mov .PIC.me.up-(.-4),out0 1457 or global1,%lo(.PIC.DES_SPtrans-1f),global1
14581: call .+8
1459 add %o7,global1,global1
1460 sub global1,.PIC.DES_SPtrans-.des_and,out2
1544 1461
1545 cmp in5, 0 ! enc 1462 cmp in5, 0 ! enc
1546 1463
@@ -1761,8 +1678,11 @@ DES_ede3_cbc_encrypt:
1761 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1678 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1762 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1679 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1763 1680
1764 call .PIC.me.up 1681 sethi %hi(.PIC.DES_SPtrans-1f),global1
1765 mov .PIC.me.up-(.-4),out0 1682 or global1,%lo(.PIC.DES_SPtrans-1f),global1
16831: call .+8
1684 add %o7,global1,global1
1685 sub global1,.PIC.DES_SPtrans-.des_and,out2
1766 1686
1767 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1687 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1768 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1688 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
@@ -1978,3 +1898,200 @@ DES_ede3_cbc_encrypt:
1978 1898
1979.DES_ede3_cbc_encrypt.end: 1899.DES_ede3_cbc_encrypt.end:
1980 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1900 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1901
1902 .align 256
1903 .type .des_and,#object
1904 .size .des_and,284
1905
1906.des_and:
1907
1908! This table is used for AND 0xFC when it is known that register
1909! bits 8-31 are zero. Makes it possible to do three arithmetic
1910! operations in one cycle.
1911
1912 .byte 0, 0, 0, 0, 4, 4, 4, 4
1913 .byte 8, 8, 8, 8, 12, 12, 12, 12
1914 .byte 16, 16, 16, 16, 20, 20, 20, 20
1915 .byte 24, 24, 24, 24, 28, 28, 28, 28
1916 .byte 32, 32, 32, 32, 36, 36, 36, 36
1917 .byte 40, 40, 40, 40, 44, 44, 44, 44
1918 .byte 48, 48, 48, 48, 52, 52, 52, 52
1919 .byte 56, 56, 56, 56, 60, 60, 60, 60
1920 .byte 64, 64, 64, 64, 68, 68, 68, 68
1921 .byte 72, 72, 72, 72, 76, 76, 76, 76
1922 .byte 80, 80, 80, 80, 84, 84, 84, 84
1923 .byte 88, 88, 88, 88, 92, 92, 92, 92
1924 .byte 96, 96, 96, 96, 100, 100, 100, 100
1925 .byte 104, 104, 104, 104, 108, 108, 108, 108
1926 .byte 112, 112, 112, 112, 116, 116, 116, 116
1927 .byte 120, 120, 120, 120, 124, 124, 124, 124
1928 .byte 128, 128, 128, 128, 132, 132, 132, 132
1929 .byte 136, 136, 136, 136, 140, 140, 140, 140
1930 .byte 144, 144, 144, 144, 148, 148, 148, 148
1931 .byte 152, 152, 152, 152, 156, 156, 156, 156
1932 .byte 160, 160, 160, 160, 164, 164, 164, 164
1933 .byte 168, 168, 168, 168, 172, 172, 172, 172
1934 .byte 176, 176, 176, 176, 180, 180, 180, 180
1935 .byte 184, 184, 184, 184, 188, 188, 188, 188
1936 .byte 192, 192, 192, 192, 196, 196, 196, 196
1937 .byte 200, 200, 200, 200, 204, 204, 204, 204
1938 .byte 208, 208, 208, 208, 212, 212, 212, 212
1939 .byte 216, 216, 216, 216, 220, 220, 220, 220
1940 .byte 224, 224, 224, 224, 228, 228, 228, 228
1941 .byte 232, 232, 232, 232, 236, 236, 236, 236
1942 .byte 240, 240, 240, 240, 244, 244, 244, 244
1943 .byte 248, 248, 248, 248, 252, 252, 252, 252
1944
1945 ! 5 numbers for initil/final permutation
1946
1947 .word 0x0f0f0f0f ! offset 256
1948 .word 0x0000ffff ! 260
1949 .word 0x33333333 ! 264
1950 .word 0x00ff00ff ! 268
1951 .word 0x55555555 ! 272
1952
1953 .word 0 ! 276
1954 .word LOOPS ! 280
1955 .word 0x0000FC00 ! 284
1956
1957 .type .PIC.DES_SPtrans,#object
1958 .size .PIC.DES_SPtrans,2048
1959.align 64
1960.PIC.DES_SPtrans:
1961 ! nibble 0
1962 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
1963 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
1964 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
1965 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
1966 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
1967 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
1968 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
1969 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
1970 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
1971 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
1972 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
1973 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
1974 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
1975 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
1976 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
1977 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
1978 ! nibble 1
1979 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
1980 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
1981 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
1982 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
1983 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
1984 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
1985 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
1986 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
1987 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
1988 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
1989 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
1990 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
1991 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
1992 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
1993 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
1994 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
1995 ! nibble 2
1996 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
1997 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
1998 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
1999 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
2000 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
2001 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
2002 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
2003 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
2004 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
2005 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
2006 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
2007 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
2008 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
2009 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
2010 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
2011 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
2012 ! nibble 3
2013 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
2014 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
2015 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
2016 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
2017 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
2018 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
2019 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
2020 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
2021 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
2022 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
2023 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
2024 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
2025 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
2026 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
2027 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
2028 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
2029 ! nibble 4
2030 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
2031 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
2032 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
2033 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
2034 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
2035 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
2036 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
2037 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
2038 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
2039 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
2040 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
2041 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
2042 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
2043 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
2044 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
2045 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
2046 ! nibble 5
2047 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
2048 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
2049 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
2050 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
2051 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
2052 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
2053 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
2054 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
2055 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
2056 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
2057 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
2058 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
2059 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
2060 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
2061 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
2062 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
2063 ! nibble 6
2064 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
2065 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
2066 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
2067 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
2068 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
2069 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
2070 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
2071 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
2072 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
2073 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
2074 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
2075 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
2076 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
2077 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
2078 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
2079 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
2080 ! nibble 7
2081 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
2082 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
2083 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
2084 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
2085 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
2086 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
2087 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
2088 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
2089 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
2090 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
2091 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
2092 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
2093 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
2094 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
2095 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
2096 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
2097
diff --git a/src/lib/libssl/src/crypto/ppccpuid.pl b/src/lib/libssl/src/crypto/ppccpuid.pl
new file mode 100755
index 0000000000..fe44ff07bc
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ppccpuid.pl
@@ -0,0 +1,94 @@
1#!/usr/bin/env perl
2
3$flavour = shift;
4
5$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
6( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
7( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or
8die "can't locate ppc-xlate.pl";
9
10open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
11
12if ($flavour=~/64/) {
13 $CMPLI="cmpldi";
14 $SHRLI="srdi";
15 $SIGNX="extsw";
16} else {
17 $CMPLI="cmplwi";
18 $SHRLI="srwi";
19 $SIGNX="mr";
20}
21
22$code=<<___;
23.machine "any"
24.text
25
26.globl .OPENSSL_cpuid_setup
27.align 4
28.OPENSSL_cpuid_setup:
29 blr
30
31.globl .OPENSSL_wipe_cpu
32.align 4
33.OPENSSL_wipe_cpu:
34 xor r0,r0,r0
35 mr r3,r1
36 xor r4,r4,r4
37 xor r5,r5,r5
38 xor r6,r6,r6
39 xor r7,r7,r7
40 xor r8,r8,r8
41 xor r9,r9,r9
42 xor r10,r10,r10
43 xor r11,r11,r11
44 xor r12,r12,r12
45 blr
46
47.globl .OPENSSL_atomic_add
48.align 4
49.OPENSSL_atomic_add:
50Loop: lwarx r5,0,r3
51 add r0,r4,r5
52 stwcx. r0,0,r3
53 bne- Loop
54 $SIGNX r3,r0
55 blr
56
57.globl .OPENSSL_rdtsc
58.align 4
59.OPENSSL_rdtsc:
60 mftb r3
61 mftbu r4
62 blr
63
64.globl .OPENSSL_cleanse
65.align 4
66.OPENSSL_cleanse:
67 $CMPLI r4,7
68 li r0,0
69 bge Lot
70Little: mtctr r4
71 stb r0,0(r3)
72 addi r3,r3,1
73 bdnz- \$-8
74 blr
75Lot: andi. r5,r3,3
76 beq Laligned
77 stb r0,0(r3)
78 subi r4,r4,1
79 addi r3,r3,1
80 b Lot
81Laligned:
82 $SHRLI r5,r4,2
83 mtctr r5
84 stw r0,0(r3)
85 addi r3,r3,4
86 bdnz- \$-8
87 andi. r4,r4,3
88 bne Little
89 blr
90___
91
92$code =~ s/\`([^\`]*)\`/eval $1/gem;
93print $code;
94close STDOUT;
diff --git a/src/lib/libssl/src/crypto/s390xcpuid.S b/src/lib/libssl/src/crypto/s390xcpuid.S
new file mode 100644
index 0000000000..8500133ad0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/s390xcpuid.S
@@ -0,0 +1,90 @@
1.text
2
3.globl OPENSSL_cpuid_setup
4.type OPENSSL_cpuid_setup,@function
5.align 16
6OPENSSL_cpuid_setup:
7 br %r14 # reserved for future
8.size OPENSSL_cpuid_setup,.-OPENSSL_cpuid_setup
9
10.globl OPENSSL_s390x_facilities
11.type OPENSSL_s390x_facilities,@function
12.align 16
13OPENSSL_s390x_facilities:
14 lghi %r0,0
15 .long 0xb2b0f010 # stfle 16(%r15)
16 lg %r2,16(%r15)
17 br %r14
18.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities
19
20.globl OPENSSL_rdtsc
21.type OPENSSL_rdtsc,@function
22.align 16
23OPENSSL_rdtsc:
24 stck 16(%r15)
25 lg %r2,16(%r15)
26 br %r14
27.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
28
29.globl OPENSSL_atomic_add
30.type OPENSSL_atomic_add,@function
31.align 16
32OPENSSL_atomic_add:
33 l %r1,0(%r2)
34.Lspin: lr %r0,%r1
35 ar %r0,%r3
36 cs %r1,%r0,0(%r2)
37 brc 4,.Lspin
38 lgfr %r2,%r0 # OpenSSL expects the new value
39 br %r14
40.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
41
42.globl OPENSSL_wipe_cpu
43.type OPENSSL_wipe_cpu,@function
44.align 16
45OPENSSL_wipe_cpu:
46 xgr %r0,%r0
47 xgr %r1,%r1
48 lgr %r2,%r15
49 xgr %r3,%r3
50 xgr %r4,%r4
51 lzdr %f0
52 lzdr %f1
53 lzdr %f2
54 lzdr %f3
55 lzdr %f4
56 lzdr %f5
57 lzdr %f6
58 lzdr %f7
59 br %r14
60.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
61
62.globl OPENSSL_cleanse
63.type OPENSSL_cleanse,@function
64.align 16
65OPENSSL_cleanse:
66 lghi %r4,15
67 lghi %r0,0
68 clgr %r3,%r4
69 jh .Lot
70.Little:
71 stc %r0,0(%r2)
72 la %r2,1(%r2)
73 brctg %r3,.Little
74 br %r14
75.align 4
76.Lot: tmll %r2,7
77 jz .Laligned
78 stc %r0,0(%r2)
79 la %r2,1(%r2)
80 brctg %r3,.Lot
81.Laligned:
82 srlg %r4,%r3,3
83.Loop: stg %r0,0(%r2)
84 la %r2,8(%r2)
85 brctg %r4,.Loop
86 lghi %r4,7
87 ngr %r3,%r4
88 jnz .Little
89 br %r14
90.size OPENSSL_cleanse,.-OPENSSL_cleanse
diff --git a/src/lib/libssl/src/crypto/sparcv9cap.c b/src/lib/libssl/src/crypto/sparcv9cap.c
new file mode 100644
index 0000000000..5f31d20bd0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sparcv9cap.c
@@ -0,0 +1,154 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <sys/time.h>
5#include <openssl/bn.h>
6
7#define SPARCV9_TICK_PRIVILEGED (1<<0)
8#define SPARCV9_PREFER_FPU (1<<1)
9#define SPARCV9_VIS1 (1<<2)
10#define SPARCV9_VIS2 (1<<3) /* reserved */
11#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
12static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
13
14int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
15 {
16 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
17 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
18
19 if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
20 (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
21 return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
22 else
23 return bn_mul_mont_int(rp,ap,bp,np,n0,num);
24 }
25
26unsigned long OPENSSL_rdtsc(void)
27 {
28 unsigned long _sparcv9_rdtick(void);
29
30 if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
31#if defined(__sun) && defined(__SVR4)
32 return gethrtime();
33#else
34 return 0;
35#endif
36 else
37 return _sparcv9_rdtick();
38 }
39
40#if defined(__sun) && defined(__SVR4)
41
42#include <dlfcn.h>
43#include <libdevinfo.h>
44#include <sys/systeminfo.h>
45
46typedef di_node_t (*di_init_t)(const char *,uint_t);
47typedef void (*di_fini_t)(di_node_t);
48typedef char * (*di_node_name_t)(di_node_t);
49typedef int (*di_walk_node_t)(di_node_t,uint_t,di_node_name_t,int (*)(di_node_t,di_node_name_t));
50
51#define DLLINK(h,name) (name=(name##_t)dlsym((h),#name))
52
53static int walk_nodename(di_node_t node, di_node_name_t di_node_name)
54 {
55 char *name = (*di_node_name)(node);
56
57 /* This is expected to catch all UltraSPARC flavors prior T1 */
58 if (!strcmp (name,"SUNW,UltraSPARC") ||
59 !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */
60 {
61 OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
62
63 /* %tick is privileged only on UltraSPARC-I/II, but not IIe */
64 if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0')
65 OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
66
67 return DI_WALK_TERMINATE;
68 }
69 /* This is expected to catch remaining UltraSPARCs, such as T1 */
70 else if (!strncmp(name,"SUNW,UltraSPARC",15))
71 {
72 OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
73
74 return DI_WALK_TERMINATE;
75 }
76
77 return DI_WALK_CONTINUE;
78 }
79
80void OPENSSL_cpuid_setup(void)
81 {
82 void *h;
83 char *e,si[256];
84 static int trigger=0;
85
86 if (trigger) return;
87 trigger=1;
88
89 if ((e=getenv("OPENSSL_sparcv9cap")))
90 {
91 OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
92 return;
93 }
94
95 if (sysinfo(SI_MACHINE,si,sizeof(si))>0)
96 {
97 if (strcmp(si,"sun4v"))
98 /* FPU is preferred for all CPUs, but US-T1/2 */
99 OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
100 }
101
102 if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
103 {
104 if (strstr(si,"+vis"))
105 OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
106 if (strstr(si,"+vis2"))
107 {
108 OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
109 OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
110 return;
111 }
112 }
113
114 if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do
115 {
116 di_init_t di_init;
117 di_fini_t di_fini;
118 di_walk_node_t di_walk_node;
119 di_node_name_t di_node_name;
120 di_node_t root_node;
121
122 if (!DLLINK(h,di_init)) break;
123 if (!DLLINK(h,di_fini)) break;
124 if (!DLLINK(h,di_walk_node)) break;
125 if (!DLLINK(h,di_node_name)) break;
126
127 if ((root_node = (*di_init)("/",DINFOSUBTREE))!=DI_NODE_NIL)
128 {
129 (*di_walk_node)(root_node,DI_WALK_SIBFIRST,
130 di_node_name,walk_nodename);
131 (*di_fini)(root_node);
132 }
133 } while(0);
134
135 if (h) dlclose(h);
136 }
137
138#else
139
140void OPENSSL_cpuid_setup(void)
141 {
142 char *e;
143
144 if ((e=getenv("OPENSSL_sparcv9cap")))
145 {
146 OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
147 return;
148 }
149
150 /* For now we assume that the rest supports UltraSPARC-I* only */
151 OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
152 }
153
154#endif
diff --git a/src/lib/libssl/src/crypto/x509/x509_vpm.c b/src/lib/libssl/src/crypto/x509/x509_vpm.c
index c92e65936f..2b06718aec 100644
--- a/src/lib/libssl/src/crypto/x509/x509_vpm.c
+++ b/src/lib/libssl/src/crypto/x509/x509_vpm.c
@@ -74,7 +74,7 @@ static void x509_verify_param_zero(X509_VERIFY_PARAM *param)
74 param->name = NULL; 74 param->name = NULL;
75 param->purpose = 0; 75 param->purpose = 0;
76 param->trust = 0; 76 param->trust = 0;
77 param->inh_flags = X509_VP_FLAG_DEFAULT; 77 param->inh_flags = 0;
78 param->flags = 0; 78 param->flags = 0;
79 param->depth = -1; 79 param->depth = -1;
80 if (param->policies) 80 if (param->policies)
@@ -320,11 +320,21 @@ static const X509_VERIFY_PARAM default_table[] = {
320 0, /* flags */ 320 0, /* flags */
321 0, /* purpose */ 321 0, /* purpose */
322 0, /* trust */ 322 0, /* trust */
323 9, /* depth */ 323 100, /* depth */
324 NULL /* policies */ 324 NULL /* policies */
325 }, 325 },
326 { 326 {
327 "pkcs7", /* SSL/TLS client parameters */ 327 "pkcs7", /* S/MIME signing parameters */
328 0, /* Check time */
329 0, /* internal flags */
330 0, /* flags */
331 X509_PURPOSE_SMIME_SIGN, /* purpose */
332 X509_TRUST_EMAIL, /* trust */
333 -1, /* depth */
334 NULL /* policies */
335 },
336 {
337 "smime_sign", /* S/MIME signing parameters */
328 0, /* Check time */ 338 0, /* Check time */
329 0, /* internal flags */ 339 0, /* internal flags */
330 0, /* flags */ 340 0, /* flags */
diff --git a/src/lib/libssl/src/crypto/x509v3/v3_addr.c b/src/lib/libssl/src/crypto/x509v3/v3_addr.c
index a37f844d3c..efdf7c3ba7 100644
--- a/src/lib/libssl/src/crypto/x509v3/v3_addr.c
+++ b/src/lib/libssl/src/crypto/x509v3/v3_addr.c
@@ -61,7 +61,7 @@
61 61
62#include <stdio.h> 62#include <stdio.h>
63#include <stdlib.h> 63#include <stdlib.h>
64#include <assert.h> 64
65#include "cryptlib.h" 65#include "cryptlib.h"
66#include <openssl/conf.h> 66#include <openssl/conf.h>
67#include <openssl/asn1.h> 67#include <openssl/asn1.h>
@@ -128,7 +128,7 @@ static int length_from_afi(const unsigned afi)
128/* 128/*
129 * Extract the AFI from an IPAddressFamily. 129 * Extract the AFI from an IPAddressFamily.
130 */ 130 */
131unsigned v3_addr_get_afi(const IPAddressFamily *f) 131unsigned int v3_addr_get_afi(const IPAddressFamily *f)
132{ 132{
133 return ((f != NULL && 133 return ((f != NULL &&
134 f->addressFamily != NULL && 134 f->addressFamily != NULL &&
@@ -147,7 +147,7 @@ static void addr_expand(unsigned char *addr,
147 const int length, 147 const int length,
148 const unsigned char fill) 148 const unsigned char fill)
149{ 149{
150 assert(bs->length >= 0 && bs->length <= length); 150 OPENSSL_assert(bs->length >= 0 && bs->length <= length);
151 if (bs->length > 0) { 151 if (bs->length > 0) {
152 memcpy(addr, bs->data, bs->length); 152 memcpy(addr, bs->data, bs->length);
153 if ((bs->flags & 7) != 0) { 153 if ((bs->flags & 7) != 0) {
@@ -190,6 +190,8 @@ static int i2r_address(BIO *out,
190 BIO_printf(out, "%x%s", (addr[i] << 8) | addr[i+1], (i < 14 ? ":" : "")); 190 BIO_printf(out, "%x%s", (addr[i] << 8) | addr[i+1], (i < 14 ? ":" : ""));
191 if (i < 16) 191 if (i < 16)
192 BIO_puts(out, ":"); 192 BIO_puts(out, ":");
193 if (i == 0)
194 BIO_puts(out, ":");
193 break; 195 break;
194 default: 196 default:
195 for (i = 0; i < bs->length; i++) 197 for (i = 0; i < bs->length; i++)
@@ -243,7 +245,7 @@ static int i2r_IPAddrBlocks(X509V3_EXT_METHOD *method,
243 int i; 245 int i;
244 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) { 246 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) {
245 IPAddressFamily *f = sk_IPAddressFamily_value(addr, i); 247 IPAddressFamily *f = sk_IPAddressFamily_value(addr, i);
246 const unsigned afi = v3_addr_get_afi(f); 248 const unsigned int afi = v3_addr_get_afi(f);
247 switch (afi) { 249 switch (afi) {
248 case IANA_AFI_IPV4: 250 case IANA_AFI_IPV4:
249 BIO_printf(out, "%*sIPv4", indent, ""); 251 BIO_printf(out, "%*sIPv4", indent, "");
@@ -453,7 +455,7 @@ static int make_addressRange(IPAddressOrRange **result,
453 if ((aor = IPAddressOrRange_new()) == NULL) 455 if ((aor = IPAddressOrRange_new()) == NULL)
454 return 0; 456 return 0;
455 aor->type = IPAddressOrRange_addressRange; 457 aor->type = IPAddressOrRange_addressRange;
456 assert(aor->u.addressRange == NULL); 458 OPENSSL_assert(aor->u.addressRange == NULL);
457 if ((aor->u.addressRange = IPAddressRange_new()) == NULL) 459 if ((aor->u.addressRange = IPAddressRange_new()) == NULL)
458 goto err; 460 goto err;
459 if (aor->u.addressRange->min == NULL && 461 if (aor->u.addressRange->min == NULL &&
@@ -522,7 +524,7 @@ static IPAddressFamily *make_IPAddressFamily(IPAddrBlocks *addr,
522 524
523 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) { 525 for (i = 0; i < sk_IPAddressFamily_num(addr); i++) {
524 f = sk_IPAddressFamily_value(addr, i); 526 f = sk_IPAddressFamily_value(addr, i);
525 assert(f->addressFamily->data != NULL); 527 OPENSSL_assert(f->addressFamily->data != NULL);
526 if (f->addressFamily->length == keylen && 528 if (f->addressFamily->length == keylen &&
527 !memcmp(f->addressFamily->data, key, keylen)) 529 !memcmp(f->addressFamily->data, key, keylen))
528 return f; 530 return f;
@@ -654,7 +656,7 @@ static void extract_min_max(IPAddressOrRange *aor,
654 unsigned char *max, 656 unsigned char *max,
655 int length) 657 int length)
656{ 658{
657 assert(aor != NULL && min != NULL && max != NULL); 659 OPENSSL_assert(aor != NULL && min != NULL && max != NULL);
658 switch (aor->type) { 660 switch (aor->type) {
659 case IPAddressOrRange_addressPrefix: 661 case IPAddressOrRange_addressPrefix:
660 addr_expand(min, aor->u.addressPrefix, length, 0x00); 662 addr_expand(min, aor->u.addressPrefix, length, 0x00);
@@ -880,7 +882,7 @@ int v3_addr_canonize(IPAddrBlocks *addr)
880 } 882 }
881 (void)sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp); 883 (void)sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp);
882 sk_IPAddressFamily_sort(addr); 884 sk_IPAddressFamily_sort(addr);
883 assert(v3_addr_is_canonical(addr)); 885 OPENSSL_assert(v3_addr_is_canonical(addr));
884 return 1; 886 return 1;
885} 887}
886 888
@@ -1127,7 +1129,10 @@ int v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b)
1127 for (i = 0; i < sk_IPAddressFamily_num(a); i++) { 1129 for (i = 0; i < sk_IPAddressFamily_num(a); i++) {
1128 IPAddressFamily *fa = sk_IPAddressFamily_value(a, i); 1130 IPAddressFamily *fa = sk_IPAddressFamily_value(a, i);
1129 int j = sk_IPAddressFamily_find(b, fa); 1131 int j = sk_IPAddressFamily_find(b, fa);
1130 IPAddressFamily *fb = sk_IPAddressFamily_value(b, j); 1132 IPAddressFamily *fb;
1133 fb = sk_IPAddressFamily_value(b, j);
1134 if (fb == NULL)
1135 return 0;
1131 if (!addr_contains(fb->ipAddressChoice->u.addressesOrRanges, 1136 if (!addr_contains(fb->ipAddressChoice->u.addressesOrRanges,
1132 fa->ipAddressChoice->u.addressesOrRanges, 1137 fa->ipAddressChoice->u.addressesOrRanges,
1133 length_from_afi(v3_addr_get_afi(fb)))) 1138 length_from_afi(v3_addr_get_afi(fb))))
@@ -1164,9 +1169,9 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1164 int i, j, ret = 1; 1169 int i, j, ret = 1;
1165 X509 *x = NULL; 1170 X509 *x = NULL;
1166 1171
1167 assert(chain != NULL && sk_X509_num(chain) > 0); 1172 OPENSSL_assert(chain != NULL && sk_X509_num(chain) > 0);
1168 assert(ctx != NULL || ext != NULL); 1173 OPENSSL_assert(ctx != NULL || ext != NULL);
1169 assert(ctx == NULL || ctx->verify_cb != NULL); 1174 OPENSSL_assert(ctx == NULL || ctx->verify_cb != NULL);
1170 1175
1171 /* 1176 /*
1172 * Figure out where to start. If we don't have an extension to 1177 * Figure out where to start. If we don't have an extension to
@@ -1178,7 +1183,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1178 } else { 1183 } else {
1179 i = 0; 1184 i = 0;
1180 x = sk_X509_value(chain, i); 1185 x = sk_X509_value(chain, i);
1181 assert(x != NULL); 1186 OPENSSL_assert(x != NULL);
1182 if ((ext = x->rfc3779_addr) == NULL) 1187 if ((ext = x->rfc3779_addr) == NULL)
1183 goto done; 1188 goto done;
1184 } 1189 }
@@ -1197,7 +1202,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx,
1197 */ 1202 */
1198 for (i++; i < sk_X509_num(chain); i++) { 1203 for (i++; i < sk_X509_num(chain); i++) {
1199 x = sk_X509_value(chain, i); 1204 x = sk_X509_value(chain, i);
1200 assert(x != NULL); 1205 OPENSSL_assert(x != NULL);
1201 if (!v3_addr_is_canonical(x->rfc3779_addr)) 1206 if (!v3_addr_is_canonical(x->rfc3779_addr))
1202 validation_err(X509_V_ERR_INVALID_EXTENSION); 1207 validation_err(X509_V_ERR_INVALID_EXTENSION);
1203 if (x->rfc3779_addr == NULL) { 1208 if (x->rfc3779_addr == NULL) {