summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes
diff options
context:
space:
mode:
authorcvs2svn <admin@example.com>2009-06-25 14:33:51 +0000
committercvs2svn <admin@example.com>2009-06-25 14:33:51 +0000
commit3944e6efcea0baa7128a89353d149b37100c0ece (patch)
tree64c1ad6d7af88839fd67d630ca81c768fd1191cd /src/lib/libcrypto/aes
parent2eabc3aa42ad7d46a1723621f8e34e533342f67a (diff)
downloadopenbsd-OPENBSD_4_6_BASE.tar.gz
openbsd-OPENBSD_4_6_BASE.tar.bz2
openbsd-OPENBSD_4_6_BASE.zip
This commit was manufactured by cvs2git to create tag 'OPENBSD_4_6_BASE'.OPENBSD_4_6_BASE
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r--src/lib/libcrypto/aes/README3
-rw-r--r--src/lib/libcrypto/aes/aes.h148
-rw-r--r--src/lib/libcrypto/aes/aes_cbc.c133
-rw-r--r--src/lib/libcrypto/aes/aes_cfb.c225
-rw-r--r--src/lib/libcrypto/aes/aes_core.c1167
-rw-r--r--src/lib/libcrypto/aes/aes_ctr.c139
-rw-r--r--src/lib/libcrypto/aes/aes_ecb.c73
-rw-r--r--src/lib/libcrypto/aes/aes_ige.c323
-rw-r--r--src/lib/libcrypto/aes/aes_locl.h89
-rw-r--r--src/lib/libcrypto/aes/aes_misc.c64
-rw-r--r--src/lib/libcrypto/aes/aes_ofb.c142
-rw-r--r--src/lib/libcrypto/aes/aes_wrap.c259
-rw-r--r--src/lib/libcrypto/aes/aes_x86core.c1063
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl1533
-rw-r--r--src/lib/libcrypto/aes/asm/aes-armv4.pl1030
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ia64.S1123
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ppc.pl1176
-rw-r--r--src/lib/libcrypto/aes/asm/aes-s390x.pl1333
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-sparcv9.pl1181
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-x86_64.pl1579
20 files changed, 0 insertions, 12783 deletions
diff --git a/src/lib/libcrypto/aes/README b/src/lib/libcrypto/aes/README
deleted file mode 100644
index 0f9620a80e..0000000000
--- a/src/lib/libcrypto/aes/README
+++ /dev/null
@@ -1,3 +0,0 @@
1This is an OpenSSL-compatible version of AES (also called Rijndael).
2aes_core.c is basically the same as rijndael-alg-fst.c but with an
3API that looks like the rest of the OpenSSL symmetric cipher suite.
diff --git a/src/lib/libcrypto/aes/aes.h b/src/lib/libcrypto/aes/aes.h
deleted file mode 100644
index 450f2b4051..0000000000
--- a/src/lib/libcrypto/aes/aes.h
+++ /dev/null
@@ -1,148 +0,0 @@
1/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef HEADER_AES_H
53#define HEADER_AES_H
54
55#include <openssl/opensslconf.h>
56
57#ifdef OPENSSL_NO_AES
58#error AES is disabled.
59#endif
60
61#define AES_ENCRYPT 1
62#define AES_DECRYPT 0
63
64/* Because array size can't be a const in C, the following two are macros.
65 Both sizes are in bytes. */
66#define AES_MAXNR 14
67#define AES_BLOCK_SIZE 16
68
69#ifdef OPENSSL_FIPS
70#define FIPS_AES_SIZE_T int
71#endif
72
73#ifdef __cplusplus
74extern "C" {
75#endif
76
77/* This should be a hidden type, but EVP requires that the size be known */
78struct aes_key_st {
79#ifdef AES_LONG
80 unsigned long rd_key[4 *(AES_MAXNR + 1)];
81#else
82 unsigned int rd_key[4 *(AES_MAXNR + 1)];
83#endif
84 int rounds;
85};
86typedef struct aes_key_st AES_KEY;
87
88const char *AES_options(void);
89
90int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
91 AES_KEY *key);
92int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
93 AES_KEY *key);
94
95void AES_encrypt(const unsigned char *in, unsigned char *out,
96 const AES_KEY *key);
97void AES_decrypt(const unsigned char *in, unsigned char *out,
98 const AES_KEY *key);
99
100void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
101 const AES_KEY *key, const int enc);
102void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
103 const unsigned long length, const AES_KEY *key,
104 unsigned char *ivec, const int enc);
105void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
106 const unsigned long length, const AES_KEY *key,
107 unsigned char *ivec, int *num, const int enc);
108void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
109 const unsigned long length, const AES_KEY *key,
110 unsigned char *ivec, int *num, const int enc);
111void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
112 const unsigned long length, const AES_KEY *key,
113 unsigned char *ivec, int *num, const int enc);
114void AES_cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
115 const int nbits,const AES_KEY *key,
116 unsigned char *ivec,const int enc);
117void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
118 const unsigned long length, const AES_KEY *key,
119 unsigned char *ivec, int *num);
120void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
121 const unsigned long length, const AES_KEY *key,
122 unsigned char ivec[AES_BLOCK_SIZE],
123 unsigned char ecount_buf[AES_BLOCK_SIZE],
124 unsigned int *num);
125
126/* For IGE, see also http://www.links.org/files/openssl-ige.pdf */
127/* NB: the IV is _two_ blocks long */
128void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
129 const unsigned long length, const AES_KEY *key,
130 unsigned char *ivec, const int enc);
131/* NB: the IV is _four_ blocks long */
132void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
133 const unsigned long length, const AES_KEY *key,
134 const AES_KEY *key2, const unsigned char *ivec,
135 const int enc);
136
137int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
138 unsigned char *out,
139 const unsigned char *in, unsigned int inlen);
140int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
141 unsigned char *out,
142 const unsigned char *in, unsigned int inlen);
143
144#ifdef __cplusplus
145}
146#endif
147
148#endif /* !HEADER_AES_H */
diff --git a/src/lib/libcrypto/aes/aes_cbc.c b/src/lib/libcrypto/aes/aes_cbc.c
deleted file mode 100644
index 373864cd4b..0000000000
--- a/src/lib/libcrypto/aes/aes_cbc.c
+++ /dev/null
@@ -1,133 +0,0 @@
1/* crypto/aes/aes_cbc.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62#if !defined(OPENSSL_FIPS_AES_ASM)
63void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
64 const unsigned long length, const AES_KEY *key,
65 unsigned char *ivec, const int enc) {
66
67 unsigned long n;
68 unsigned long len = length;
69 unsigned char tmp[AES_BLOCK_SIZE];
70 const unsigned char *iv = ivec;
71
72 assert(in && out && key && ivec);
73 assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
74
75 if (AES_ENCRYPT == enc) {
76 while (len >= AES_BLOCK_SIZE) {
77 for(n=0; n < AES_BLOCK_SIZE; ++n)
78 out[n] = in[n] ^ iv[n];
79 AES_encrypt(out, out, key);
80 iv = out;
81 len -= AES_BLOCK_SIZE;
82 in += AES_BLOCK_SIZE;
83 out += AES_BLOCK_SIZE;
84 }
85 if (len) {
86 for(n=0; n < len; ++n)
87 out[n] = in[n] ^ iv[n];
88 for(n=len; n < AES_BLOCK_SIZE; ++n)
89 out[n] = iv[n];
90 AES_encrypt(out, out, key);
91 iv = out;
92 }
93 memcpy(ivec,iv,AES_BLOCK_SIZE);
94 } else if (in != out) {
95 while (len >= AES_BLOCK_SIZE) {
96 AES_decrypt(in, out, key);
97 for(n=0; n < AES_BLOCK_SIZE; ++n)
98 out[n] ^= iv[n];
99 iv = in;
100 len -= AES_BLOCK_SIZE;
101 in += AES_BLOCK_SIZE;
102 out += AES_BLOCK_SIZE;
103 }
104 if (len) {
105 AES_decrypt(in,tmp,key);
106 for(n=0; n < len; ++n)
107 out[n] = tmp[n] ^ iv[n];
108 iv = in;
109 }
110 memcpy(ivec,iv,AES_BLOCK_SIZE);
111 } else {
112 while (len >= AES_BLOCK_SIZE) {
113 memcpy(tmp, in, AES_BLOCK_SIZE);
114 AES_decrypt(in, out, key);
115 for(n=0; n < AES_BLOCK_SIZE; ++n)
116 out[n] ^= ivec[n];
117 memcpy(ivec, tmp, AES_BLOCK_SIZE);
118 len -= AES_BLOCK_SIZE;
119 in += AES_BLOCK_SIZE;
120 out += AES_BLOCK_SIZE;
121 }
122 if (len) {
123 memcpy(tmp, in, AES_BLOCK_SIZE);
124 AES_decrypt(tmp, out, key);
125 for(n=0; n < len; ++n)
126 out[n] ^= ivec[n];
127 for(n=len; n < AES_BLOCK_SIZE; ++n)
128 out[n] = tmp[n];
129 memcpy(ivec, tmp, AES_BLOCK_SIZE);
130 }
131 }
132}
133#endif
diff --git a/src/lib/libcrypto/aes/aes_cfb.c b/src/lib/libcrypto/aes/aes_cfb.c
deleted file mode 100644
index 49f0411010..0000000000
--- a/src/lib/libcrypto/aes/aes_cfb.c
+++ /dev/null
@@ -1,225 +0,0 @@
1/* crypto/aes/aes_cfb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
52 * All rights reserved.
53 *
54 * This package is an SSL implementation written
55 * by Eric Young (eay@cryptsoft.com).
56 * The implementation was written so as to conform with Netscapes SSL.
57 *
58 * This library is free for commercial and non-commercial use as long as
59 * the following conditions are aheared to. The following conditions
60 * apply to all code found in this distribution, be it the RC4, RSA,
61 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
62 * included with this distribution is covered by the same copyright terms
63 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
64 *
65 * Copyright remains Eric Young's, and as such any Copyright notices in
66 * the code are not to be removed.
67 * If this package is used in a product, Eric Young should be given attribution
68 * as the author of the parts of the library used.
69 * This can be in the form of a textual message at program startup or
70 * in documentation (online or textual) provided with the package.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * "This product includes cryptographic software written by
83 * Eric Young (eay@cryptsoft.com)"
84 * The word 'cryptographic' can be left out if the rouines from the library
85 * being used are not cryptographic related :-).
86 * 4. If you include any Windows specific code (or a derivative thereof) from
87 * the apps directory (application code) you must include an acknowledgement:
88 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
89 *
90 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
94 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100 * SUCH DAMAGE.
101 *
102 * The licence and distribution terms for any publically available version or
103 * derivative of this code cannot be changed. i.e. this code cannot simply be
104 * copied and put under another distribution licence
105 * [including the GNU Public Licence.]
106 */
107
108#ifndef AES_DEBUG
109# ifndef NDEBUG
110# define NDEBUG
111# endif
112#endif
113#include <assert.h>
114
115#include <openssl/aes.h>
116#include "aes_locl.h"
117#include "e_os.h"
118
119/* The input and output encrypted as though 128bit cfb mode is being
120 * used. The extra state information to record how much of the
121 * 128bit block we have used is contained in *num;
122 */
123
124void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
125 const unsigned long length, const AES_KEY *key,
126 unsigned char *ivec, int *num, const int enc) {
127
128 unsigned int n;
129 unsigned long l = length;
130 unsigned char c;
131
132 assert(in && out && key && ivec && num);
133
134 n = *num;
135
136 if (enc) {
137 while (l--) {
138 if (n == 0) {
139 AES_encrypt(ivec, ivec, key);
140 }
141 ivec[n] = *(out++) = *(in++) ^ ivec[n];
142 n = (n+1) % AES_BLOCK_SIZE;
143 }
144 } else {
145 while (l--) {
146 if (n == 0) {
147 AES_encrypt(ivec, ivec, key);
148 }
149 c = *(in);
150 *(out++) = *(in++) ^ ivec[n];
151 ivec[n] = c;
152 n = (n+1) % AES_BLOCK_SIZE;
153 }
154 }
155
156 *num=n;
157}
158
159/* This expects a single block of size nbits for both in and out. Note that
160 it corrupts any extra bits in the last byte of out */
161void AES_cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
162 const int nbits,const AES_KEY *key,
163 unsigned char *ivec,const int enc)
164 {
165 int n,rem,num;
166 unsigned char ovec[AES_BLOCK_SIZE*2];
167
168 if (nbits<=0 || nbits>128) return;
169
170 /* fill in the first half of the new IV with the current IV */
171 memcpy(ovec,ivec,AES_BLOCK_SIZE);
172 /* construct the new IV */
173 AES_encrypt(ivec,ivec,key);
174 num = (nbits+7)/8;
175 if (enc) /* encrypt the input */
176 for(n=0 ; n < num ; ++n)
177 out[n] = (ovec[AES_BLOCK_SIZE+n] = in[n] ^ ivec[n]);
178 else /* decrypt the input */
179 for(n=0 ; n < num ; ++n)
180 out[n] = (ovec[AES_BLOCK_SIZE+n] = in[n]) ^ ivec[n];
181 /* shift ovec left... */
182 rem = nbits%8;
183 num = nbits/8;
184 if(rem==0)
185 memcpy(ivec,ovec+num,AES_BLOCK_SIZE);
186 else
187 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
188 ivec[n] = ovec[n+num]<<rem | ovec[n+num+1]>>(8-rem);
189
190 /* it is not necessary to cleanse ovec, since the IV is not secret */
191 }
192
193/* N.B. This expects the input to be packed, MS bit first */
194void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
195 const unsigned long length, const AES_KEY *key,
196 unsigned char *ivec, int *num, const int enc)
197 {
198 unsigned int n;
199 unsigned char c[1],d[1];
200
201 assert(in && out && key && ivec && num);
202 assert(*num == 0);
203
204 memset(out,0,(length+7)/8);
205 for(n=0 ; n < length ; ++n)
206 {
207 c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0;
208 AES_cfbr_encrypt_block(c,d,1,key,ivec,enc);
209 out[n/8]=(out[n/8]&~(1 << (7-n%8)))|((d[0]&0x80) >> (n%8));
210 }
211 }
212
213void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
214 const unsigned long length, const AES_KEY *key,
215 unsigned char *ivec, int *num, const int enc)
216 {
217 unsigned int n;
218
219 assert(in && out && key && ivec && num);
220 assert(*num == 0);
221
222 for(n=0 ; n < length ; ++n)
223 AES_cfbr_encrypt_block(&in[n],&out[n],8,key,ivec,enc);
224 }
225
diff --git a/src/lib/libcrypto/aes/aes_core.c b/src/lib/libcrypto/aes/aes_core.c
deleted file mode 100644
index cffdd4daec..0000000000
--- a/src/lib/libcrypto/aes/aes_core.c
+++ /dev/null
@@ -1,1167 +0,0 @@
1/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2/**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* Note: rewritten a little bit to provide error control and an OpenSSL-
29 compatible API */
30
31#ifndef AES_DEBUG
32# ifndef NDEBUG
33# define NDEBUG
34# endif
35#endif
36#include <assert.h>
37
38#include <stdlib.h>
39#include <openssl/aes.h>
40#ifdef OPENSSL_FIPS
41#include <openssl/fips.h>
42#endif
43
44#include "aes_locl.h"
45
46/*
47Te0[x] = S [x].[02, 01, 01, 03];
48Te1[x] = S [x].[03, 02, 01, 01];
49Te2[x] = S [x].[01, 03, 02, 01];
50Te3[x] = S [x].[01, 01, 03, 02];
51
52Td0[x] = Si[x].[0e, 09, 0d, 0b];
53Td1[x] = Si[x].[0b, 0e, 09, 0d];
54Td2[x] = Si[x].[0d, 0b, 0e, 09];
55Td3[x] = Si[x].[09, 0d, 0b, 0e];
56Td4[x] = Si[x].[01];
57*/
58
59static const u32 Te0[256] = {
60 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
61 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
62 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
63 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
64 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
65 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
66 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
67 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
68 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
69 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
70 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
71 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
72 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
73 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
74 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
75 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
76 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
77 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
78 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
79 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
80 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
81 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
82 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
83 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
84 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
85 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
86 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
87 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
88 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
89 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
90 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
91 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
92 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
93 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
94 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
95 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
96 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
97 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
98 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
99 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
100 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
101 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
102 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
103 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
104 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
105 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
106 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
107 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
108 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
109 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
110 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
111 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
112 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
113 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
114 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
115 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
116 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
117 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
118 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
119 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
120 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
121 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
122 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
123 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
124};
125static const u32 Te1[256] = {
126 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
127 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
128 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
129 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
130 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
131 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
132 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
133 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
134 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
135 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
136 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
137 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
138 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
139 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
140 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
141 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
142 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
143 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
144 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
145 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
146 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
147 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
148 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
149 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
150 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
151 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
152 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
153 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
154 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
155 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
156 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
157 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
158 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
159 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
160 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
161 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
162 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
163 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
164 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
165 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
166 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
167 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
168 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
169 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
170 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
171 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
172 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
173 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
174 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
175 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
176 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
177 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
178 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
179 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
180 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
181 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
182 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
183 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
184 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
185 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
186 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
187 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
188 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
189 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
190};
191static const u32 Te2[256] = {
192 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
193 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
194 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
195 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
196 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
197 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
198 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
199 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
200 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
201 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
202 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
203 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
204 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
205 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
206 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
207 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
208 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
209 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
210 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
211 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
212 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
213 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
214 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
215 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
216 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
217 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
218 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
219 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
220 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
221 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
222 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
223 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
224 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
225 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
226 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
227 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
228 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
229 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
230 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
231 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
232 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
233 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
234 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
235 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
236 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
237 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
238 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
239 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
240 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
241 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
242 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
243 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
244 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
245 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
246 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
247 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
248 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
249 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
250 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
251 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
252 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
253 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
254 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
255 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
256};
257static const u32 Te3[256] = {
258 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
259 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
260 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
261 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
262 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
263 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
264 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
265 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
266 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
267 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
268 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
269 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
270 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
271 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
272 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
273 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
274 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
275 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
276 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
277 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
278 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
279 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
280 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
281 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
282 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
283 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
284 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
285 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
286 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
287 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
288 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
289 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
290 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
291 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
292 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
293 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
294 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
295 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
296 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
297 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
298 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
299 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
300 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
301 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
302 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
303 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
304 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
305 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
306 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
307 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
308 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
309 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
310 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
311 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
312 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
313 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
314 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
315 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
316 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
317 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
318 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
319 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
320 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
321 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
322};
323
324static const u32 Td0[256] = {
325 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
326 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
327 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
328 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
329 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
330 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
331 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
332 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
333 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
334 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
335 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
336 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
337 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
338 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
339 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
340 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
341 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
342 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
343 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
344 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
345 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
346 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
347 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
348 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
349 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
350 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
351 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
352 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
353 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
354 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
355 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
356 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
357 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
358 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
359 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
360 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
361 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
362 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
363 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
364 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
365 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
366 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
367 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
368 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
369 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
370 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
371 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
372 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
373 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
374 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
375 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
376 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
377 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
378 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
379 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
380 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
381 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
382 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
383 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
384 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
385 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
386 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
387 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
388 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
389};
390static const u32 Td1[256] = {
391 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
392 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
393 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
394 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
395 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
396 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
397 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
398 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
399 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
400 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
401 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
402 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
403 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
404 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
405 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
406 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
407 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
408 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
409 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
410 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
411 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
412 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
413 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
414 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
415 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
416 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
417 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
418 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
419 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
420 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
421 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
422 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
423 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
424 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
425 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
426 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
427 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
428 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
429 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
430 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
431 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
432 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
433 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
434 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
435 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
436 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
437 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
438 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
439 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
440 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
441 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
442 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
443 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
444 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
445 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
446 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
447 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
448 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
449 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
450 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
451 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
452 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
453 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
454 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
455};
456static const u32 Td2[256] = {
457 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
458 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
459 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
460 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
461 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
462 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
463 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
464 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
465 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
466 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
467 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
468 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
469 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
470 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
471 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
472 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
473 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
474 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
475 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
476 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
477 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
478 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
479 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
480 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
481 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
482 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
483 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
484 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
485 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
486 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
487 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
488 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
489 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
490 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
491 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
492 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
493 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
494 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
495 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
496 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
497 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
498 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
499 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
500 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
501 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
502 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
503 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
504 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
505 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
506 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
507 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
508 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
509 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
510 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
511 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
512 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
513 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
514 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
515 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
516 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
517 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
518 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
519 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
520 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
521};
522static const u32 Td3[256] = {
523 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
524 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
525 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
526 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
527 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
528 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
529 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
530 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
531 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
532 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
533 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
534 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
535 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
536 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
537 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
538 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
539 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
540 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
541 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
542 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
543 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
544 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
545 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
546 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
547 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
548 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
549 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
550 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
551 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
552 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
553 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
554 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
555 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
556 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
557 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
558 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
559 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
560 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
561 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
562 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
563 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
564 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
565 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
566 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
567 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
568 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
569 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
570 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
571 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
572 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
573 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
574 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
575 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
576 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
577 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
578 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
579 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
580 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
581 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
582 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
583 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
584 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
585 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
586 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
587};
588static const u8 Td4[256] = {
589 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
590 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
591 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
592 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
593 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
594 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
595 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
596 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
597 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
598 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
599 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
600 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
601 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
602 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
603 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
604 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
605 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
606 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
607 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
608 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
609 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
610 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
611 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
612 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
613 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
614 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
615 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
616 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
617 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
618 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
619 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
620 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
621};
622static const u32 rcon[] = {
623 0x01000000, 0x02000000, 0x04000000, 0x08000000,
624 0x10000000, 0x20000000, 0x40000000, 0x80000000,
625 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
626};
627
628/**
629 * Expand the cipher key into the encryption key schedule.
630 */
631int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
632 AES_KEY *key) {
633
634 u32 *rk;
635 int i = 0;
636 u32 temp;
637
638#ifdef OPENSSL_FIPS
639 FIPS_selftest_check();
640#endif
641
642 if (!userKey || !key)
643 return -1;
644 if (bits != 128 && bits != 192 && bits != 256)
645 return -2;
646
647 rk = key->rd_key;
648
649 if (bits==128)
650 key->rounds = 10;
651 else if (bits==192)
652 key->rounds = 12;
653 else
654 key->rounds = 14;
655
656 rk[0] = GETU32(userKey );
657 rk[1] = GETU32(userKey + 4);
658 rk[2] = GETU32(userKey + 8);
659 rk[3] = GETU32(userKey + 12);
660 if (bits == 128) {
661 while (1) {
662 temp = rk[3];
663 rk[4] = rk[0] ^
664 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
665 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
666 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
667 (Te1[(temp >> 24) ] & 0x000000ff) ^
668 rcon[i];
669 rk[5] = rk[1] ^ rk[4];
670 rk[6] = rk[2] ^ rk[5];
671 rk[7] = rk[3] ^ rk[6];
672 if (++i == 10) {
673 return 0;
674 }
675 rk += 4;
676 }
677 }
678 rk[4] = GETU32(userKey + 16);
679 rk[5] = GETU32(userKey + 20);
680 if (bits == 192) {
681 while (1) {
682 temp = rk[ 5];
683 rk[ 6] = rk[ 0] ^
684 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
685 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
686 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
687 (Te1[(temp >> 24) ] & 0x000000ff) ^
688 rcon[i];
689 rk[ 7] = rk[ 1] ^ rk[ 6];
690 rk[ 8] = rk[ 2] ^ rk[ 7];
691 rk[ 9] = rk[ 3] ^ rk[ 8];
692 if (++i == 8) {
693 return 0;
694 }
695 rk[10] = rk[ 4] ^ rk[ 9];
696 rk[11] = rk[ 5] ^ rk[10];
697 rk += 6;
698 }
699 }
700 rk[6] = GETU32(userKey + 24);
701 rk[7] = GETU32(userKey + 28);
702 if (bits == 256) {
703 while (1) {
704 temp = rk[ 7];
705 rk[ 8] = rk[ 0] ^
706 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
707 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
708 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
709 (Te1[(temp >> 24) ] & 0x000000ff) ^
710 rcon[i];
711 rk[ 9] = rk[ 1] ^ rk[ 8];
712 rk[10] = rk[ 2] ^ rk[ 9];
713 rk[11] = rk[ 3] ^ rk[10];
714 if (++i == 7) {
715 return 0;
716 }
717 temp = rk[11];
718 rk[12] = rk[ 4] ^
719 (Te2[(temp >> 24) ] & 0xff000000) ^
720 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
721 (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
722 (Te1[(temp ) & 0xff] & 0x000000ff);
723 rk[13] = rk[ 5] ^ rk[12];
724 rk[14] = rk[ 6] ^ rk[13];
725 rk[15] = rk[ 7] ^ rk[14];
726
727 rk += 8;
728 }
729 }
730 return 0;
731}
732
733/**
734 * Expand the cipher key into the decryption key schedule.
735 */
736int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
737 AES_KEY *key) {
738
739 u32 *rk;
740 int i, j, status;
741 u32 temp;
742
743 /* first, start with an encryption schedule */
744 status = AES_set_encrypt_key(userKey, bits, key);
745 if (status < 0)
746 return status;
747
748 rk = key->rd_key;
749
750 /* invert the order of the round keys: */
751 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
752 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
753 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
754 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
755 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
756 }
757 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
758 for (i = 1; i < (key->rounds); i++) {
759 rk += 4;
760 rk[0] =
761 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
762 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
763 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
764 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
765 rk[1] =
766 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
767 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
768 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
769 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
770 rk[2] =
771 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
772 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
773 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
774 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
775 rk[3] =
776 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
777 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
778 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
779 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
780 }
781 return 0;
782}
783
784#ifndef AES_ASM
785/*
786 * Encrypt a single block
787 * in and out can overlap
788 */
789void AES_encrypt(const unsigned char *in, unsigned char *out,
790 const AES_KEY *key) {
791
792 const u32 *rk;
793 u32 s0, s1, s2, s3, t0, t1, t2, t3;
794#ifndef FULL_UNROLL
795 int r;
796#endif /* ?FULL_UNROLL */
797
798 assert(in && out && key);
799 rk = key->rd_key;
800
801 /*
802 * map byte array block to cipher state
803 * and add initial round key:
804 */
805 s0 = GETU32(in ) ^ rk[0];
806 s1 = GETU32(in + 4) ^ rk[1];
807 s2 = GETU32(in + 8) ^ rk[2];
808 s3 = GETU32(in + 12) ^ rk[3];
809#ifdef FULL_UNROLL
810 /* round 1: */
811 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
812 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
813 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
814 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
815 /* round 2: */
816 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
817 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
818 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
819 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
820 /* round 3: */
821 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
822 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
823 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
824 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
825 /* round 4: */
826 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
827 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
828 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
829 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
830 /* round 5: */
831 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
832 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
833 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
834 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
835 /* round 6: */
836 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
837 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
838 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
839 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
840 /* round 7: */
841 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
842 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
843 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
844 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
845 /* round 8: */
846 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
847 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
848 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
849 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
850 /* round 9: */
851 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
852 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
853 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
854 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
855 if (key->rounds > 10) {
856 /* round 10: */
857 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
858 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
859 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
860 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
861 /* round 11: */
862 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
863 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
864 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
865 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
866 if (key->rounds > 12) {
867 /* round 12: */
868 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
869 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
870 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
871 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
872 /* round 13: */
873 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
874 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
875 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
876 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
877 }
878 }
879 rk += key->rounds << 2;
880#else /* !FULL_UNROLL */
881 /*
882 * Nr - 1 full rounds:
883 */
884 r = key->rounds >> 1;
885 for (;;) {
886 t0 =
887 Te0[(s0 >> 24) ] ^
888 Te1[(s1 >> 16) & 0xff] ^
889 Te2[(s2 >> 8) & 0xff] ^
890 Te3[(s3 ) & 0xff] ^
891 rk[4];
892 t1 =
893 Te0[(s1 >> 24) ] ^
894 Te1[(s2 >> 16) & 0xff] ^
895 Te2[(s3 >> 8) & 0xff] ^
896 Te3[(s0 ) & 0xff] ^
897 rk[5];
898 t2 =
899 Te0[(s2 >> 24) ] ^
900 Te1[(s3 >> 16) & 0xff] ^
901 Te2[(s0 >> 8) & 0xff] ^
902 Te3[(s1 ) & 0xff] ^
903 rk[6];
904 t3 =
905 Te0[(s3 >> 24) ] ^
906 Te1[(s0 >> 16) & 0xff] ^
907 Te2[(s1 >> 8) & 0xff] ^
908 Te3[(s2 ) & 0xff] ^
909 rk[7];
910
911 rk += 8;
912 if (--r == 0) {
913 break;
914 }
915
916 s0 =
917 Te0[(t0 >> 24) ] ^
918 Te1[(t1 >> 16) & 0xff] ^
919 Te2[(t2 >> 8) & 0xff] ^
920 Te3[(t3 ) & 0xff] ^
921 rk[0];
922 s1 =
923 Te0[(t1 >> 24) ] ^
924 Te1[(t2 >> 16) & 0xff] ^
925 Te2[(t3 >> 8) & 0xff] ^
926 Te3[(t0 ) & 0xff] ^
927 rk[1];
928 s2 =
929 Te0[(t2 >> 24) ] ^
930 Te1[(t3 >> 16) & 0xff] ^
931 Te2[(t0 >> 8) & 0xff] ^
932 Te3[(t1 ) & 0xff] ^
933 rk[2];
934 s3 =
935 Te0[(t3 >> 24) ] ^
936 Te1[(t0 >> 16) & 0xff] ^
937 Te2[(t1 >> 8) & 0xff] ^
938 Te3[(t2 ) & 0xff] ^
939 rk[3];
940 }
941#endif /* ?FULL_UNROLL */
942 /*
943 * apply last round and
944 * map cipher state to byte array block:
945 */
946 s0 =
947 (Te2[(t0 >> 24) ] & 0xff000000) ^
948 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
949 (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
950 (Te1[(t3 ) & 0xff] & 0x000000ff) ^
951 rk[0];
952 PUTU32(out , s0);
953 s1 =
954 (Te2[(t1 >> 24) ] & 0xff000000) ^
955 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
956 (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
957 (Te1[(t0 ) & 0xff] & 0x000000ff) ^
958 rk[1];
959 PUTU32(out + 4, s1);
960 s2 =
961 (Te2[(t2 >> 24) ] & 0xff000000) ^
962 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
963 (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
964 (Te1[(t1 ) & 0xff] & 0x000000ff) ^
965 rk[2];
966 PUTU32(out + 8, s2);
967 s3 =
968 (Te2[(t3 >> 24) ] & 0xff000000) ^
969 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
970 (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
971 (Te1[(t2 ) & 0xff] & 0x000000ff) ^
972 rk[3];
973 PUTU32(out + 12, s3);
974}
975
976/*
977 * Decrypt a single block
978 * in and out can overlap
979 */
980void AES_decrypt(const unsigned char *in, unsigned char *out,
981 const AES_KEY *key) {
982
983 const u32 *rk;
984 u32 s0, s1, s2, s3, t0, t1, t2, t3;
985#ifndef FULL_UNROLL
986 int r;
987#endif /* ?FULL_UNROLL */
988
989 assert(in && out && key);
990 rk = key->rd_key;
991
992 /*
993 * map byte array block to cipher state
994 * and add initial round key:
995 */
996 s0 = GETU32(in ) ^ rk[0];
997 s1 = GETU32(in + 4) ^ rk[1];
998 s2 = GETU32(in + 8) ^ rk[2];
999 s3 = GETU32(in + 12) ^ rk[3];
1000#ifdef FULL_UNROLL
1001 /* round 1: */
1002 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1003 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1004 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1005 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1006 /* round 2: */
1007 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1008 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1009 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1010 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1011 /* round 3: */
1012 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1013 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1014 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1015 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1016 /* round 4: */
1017 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1018 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1019 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1020 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1021 /* round 5: */
1022 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1023 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1024 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1025 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1026 /* round 6: */
1027 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1028 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1029 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1030 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1031 /* round 7: */
1032 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1033 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1034 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1035 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1036 /* round 8: */
1037 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1038 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1039 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1040 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1041 /* round 9: */
1042 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1043 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1044 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1045 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1046 if (key->rounds > 10) {
1047 /* round 10: */
1048 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1049 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1050 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1051 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1052 /* round 11: */
1053 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1054 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1055 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1056 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1057 if (key->rounds > 12) {
1058 /* round 12: */
1059 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1060 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1061 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1062 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1063 /* round 13: */
1064 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1065 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1066 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1067 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1068 }
1069 }
1070 rk += key->rounds << 2;
1071#else /* !FULL_UNROLL */
1072 /*
1073 * Nr - 1 full rounds:
1074 */
1075 r = key->rounds >> 1;
1076 for (;;) {
1077 t0 =
1078 Td0[(s0 >> 24) ] ^
1079 Td1[(s3 >> 16) & 0xff] ^
1080 Td2[(s2 >> 8) & 0xff] ^
1081 Td3[(s1 ) & 0xff] ^
1082 rk[4];
1083 t1 =
1084 Td0[(s1 >> 24) ] ^
1085 Td1[(s0 >> 16) & 0xff] ^
1086 Td2[(s3 >> 8) & 0xff] ^
1087 Td3[(s2 ) & 0xff] ^
1088 rk[5];
1089 t2 =
1090 Td0[(s2 >> 24) ] ^
1091 Td1[(s1 >> 16) & 0xff] ^
1092 Td2[(s0 >> 8) & 0xff] ^
1093 Td3[(s3 ) & 0xff] ^
1094 rk[6];
1095 t3 =
1096 Td0[(s3 >> 24) ] ^
1097 Td1[(s2 >> 16) & 0xff] ^
1098 Td2[(s1 >> 8) & 0xff] ^
1099 Td3[(s0 ) & 0xff] ^
1100 rk[7];
1101
1102 rk += 8;
1103 if (--r == 0) {
1104 break;
1105 }
1106
1107 s0 =
1108 Td0[(t0 >> 24) ] ^
1109 Td1[(t3 >> 16) & 0xff] ^
1110 Td2[(t2 >> 8) & 0xff] ^
1111 Td3[(t1 ) & 0xff] ^
1112 rk[0];
1113 s1 =
1114 Td0[(t1 >> 24) ] ^
1115 Td1[(t0 >> 16) & 0xff] ^
1116 Td2[(t3 >> 8) & 0xff] ^
1117 Td3[(t2 ) & 0xff] ^
1118 rk[1];
1119 s2 =
1120 Td0[(t2 >> 24) ] ^
1121 Td1[(t1 >> 16) & 0xff] ^
1122 Td2[(t0 >> 8) & 0xff] ^
1123 Td3[(t3 ) & 0xff] ^
1124 rk[2];
1125 s3 =
1126 Td0[(t3 >> 24) ] ^
1127 Td1[(t2 >> 16) & 0xff] ^
1128 Td2[(t1 >> 8) & 0xff] ^
1129 Td3[(t0 ) & 0xff] ^
1130 rk[3];
1131 }
1132#endif /* ?FULL_UNROLL */
1133 /*
1134 * apply last round and
1135 * map cipher state to byte array block:
1136 */
1137 s0 =
1138 (Td4[(t0 >> 24) ] << 24) ^
1139 (Td4[(t3 >> 16) & 0xff] << 16) ^
1140 (Td4[(t2 >> 8) & 0xff] << 8) ^
1141 (Td4[(t1 ) & 0xff]) ^
1142 rk[0];
1143 PUTU32(out , s0);
1144 s1 =
1145 (Td4[(t1 >> 24) ] << 24) ^
1146 (Td4[(t0 >> 16) & 0xff] << 16) ^
1147 (Td4[(t3 >> 8) & 0xff] << 8) ^
1148 (Td4[(t2 ) & 0xff]) ^
1149 rk[1];
1150 PUTU32(out + 4, s1);
1151 s2 =
1152 (Td4[(t2 >> 24) ] << 24) ^
1153 (Td4[(t1 >> 16) & 0xff] << 16) ^
1154 (Td4[(t0 >> 8) & 0xff] << 8) ^
1155 (Td4[(t3 ) & 0xff]) ^
1156 rk[2];
1157 PUTU32(out + 8, s2);
1158 s3 =
1159 (Td4[(t3 >> 24) ] << 24) ^
1160 (Td4[(t2 >> 16) & 0xff] << 16) ^
1161 (Td4[(t1 >> 8) & 0xff] << 8) ^
1162 (Td4[(t0 ) & 0xff]) ^
1163 rk[3];
1164 PUTU32(out + 12, s3);
1165}
1166
1167#endif /* AES_ASM */
diff --git a/src/lib/libcrypto/aes/aes_ctr.c b/src/lib/libcrypto/aes/aes_ctr.c
deleted file mode 100644
index f36982be1e..0000000000
--- a/src/lib/libcrypto/aes/aes_ctr.c
+++ /dev/null
@@ -1,139 +0,0 @@
1/* crypto/aes/aes_ctr.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62/* NOTE: the IV/counter CTR mode is big-endian. The rest of the AES code
63 * is endian-neutral. */
64
65/* increment counter (128-bit int) by 1 */
66static void AES_ctr128_inc(unsigned char *counter) {
67 unsigned long c;
68
69 /* Grab bottom dword of counter and increment */
70 c = GETU32(counter + 12);
71 c++; c &= 0xFFFFFFFF;
72 PUTU32(counter + 12, c);
73
74 /* if no overflow, we're done */
75 if (c)
76 return;
77
78 /* Grab 1st dword of counter and increment */
79 c = GETU32(counter + 8);
80 c++; c &= 0xFFFFFFFF;
81 PUTU32(counter + 8, c);
82
83 /* if no overflow, we're done */
84 if (c)
85 return;
86
87 /* Grab 2nd dword of counter and increment */
88 c = GETU32(counter + 4);
89 c++; c &= 0xFFFFFFFF;
90 PUTU32(counter + 4, c);
91
92 /* if no overflow, we're done */
93 if (c)
94 return;
95
96 /* Grab top dword of counter and increment */
97 c = GETU32(counter + 0);
98 c++; c &= 0xFFFFFFFF;
99 PUTU32(counter + 0, c);
100}
101
102/* The input encrypted as though 128bit counter mode is being
103 * used. The extra state information to record how much of the
104 * 128bit block we have used is contained in *num, and the
105 * encrypted counter is kept in ecount_buf. Both *num and
106 * ecount_buf must be initialised with zeros before the first
107 * call to AES_ctr128_encrypt().
108 *
109 * This algorithm assumes that the counter is in the x lower bits
110 * of the IV (ivec), and that the application has full control over
111 * overflow and the rest of the IV. This implementation takes NO
112 * responsability for checking that the counter doesn't overflow
113 * into the rest of the IV when incremented.
114 */
115void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
116 const unsigned long length, const AES_KEY *key,
117 unsigned char ivec[AES_BLOCK_SIZE],
118 unsigned char ecount_buf[AES_BLOCK_SIZE],
119 unsigned int *num) {
120
121 unsigned int n;
122 unsigned long l=length;
123
124 assert(in && out && key && counter && num);
125 assert(*num < AES_BLOCK_SIZE);
126
127 n = *num;
128
129 while (l--) {
130 if (n == 0) {
131 AES_encrypt(ivec, ecount_buf, key);
132 AES_ctr128_inc(ivec);
133 }
134 *(out++) = *(in++) ^ ecount_buf[n];
135 n = (n+1) % AES_BLOCK_SIZE;
136 }
137
138 *num=n;
139}
diff --git a/src/lib/libcrypto/aes/aes_ecb.c b/src/lib/libcrypto/aes/aes_ecb.c
deleted file mode 100644
index 28aa561c2d..0000000000
--- a/src/lib/libcrypto/aes/aes_ecb.c
+++ /dev/null
@@ -1,73 +0,0 @@
1/* crypto/aes/aes_ecb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
63 const AES_KEY *key, const int enc) {
64
65 assert(in && out && key);
66 assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
67
68 if (AES_ENCRYPT == enc)
69 AES_encrypt(in, out, key);
70 else
71 AES_decrypt(in, out, key);
72}
73
diff --git a/src/lib/libcrypto/aes/aes_ige.c b/src/lib/libcrypto/aes/aes_ige.c
deleted file mode 100644
index 45d7096181..0000000000
--- a/src/lib/libcrypto/aes/aes_ige.c
+++ /dev/null
@@ -1,323 +0,0 @@
1/* crypto/aes/aes_ige.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include "cryptlib.h"
53
54#include <openssl/aes.h>
55#include "aes_locl.h"
56
57#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
58typedef struct {
59 unsigned long data[N_WORDS];
60} aes_block_t;
61
62/* XXX: probably some better way to do this */
63#if defined(__i386__) || defined(__x86_64__)
64#define UNALIGNED_MEMOPS_ARE_FAST 1
65#else
66#define UNALIGNED_MEMOPS_ARE_FAST 0
67#endif
68
69#if UNALIGNED_MEMOPS_ARE_FAST
70#define load_block(d, s) (d) = *(const aes_block_t *)(s)
71#define store_block(d, s) *(aes_block_t *)(d) = (s)
72#else
73#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
74#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
75#endif
76
77/* N.B. The IV for this mode is _twice_ the block size */
78
79void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
80 const unsigned long length, const AES_KEY *key,
81 unsigned char *ivec, const int enc)
82 {
83 unsigned long n;
84 unsigned long len;
85
86 OPENSSL_assert(in && out && key && ivec);
87 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
88 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
89
90 len = length / AES_BLOCK_SIZE;
91
92 if (AES_ENCRYPT == enc)
93 {
94 if (in != out &&
95 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
96 {
97 aes_block_t *ivp = (aes_block_t *)ivec;
98 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
99
100 while (len)
101 {
102 aes_block_t *inp = (aes_block_t *)in;
103 aes_block_t *outp = (aes_block_t *)out;
104
105 for(n=0 ; n < N_WORDS; ++n)
106 outp->data[n] = inp->data[n] ^ ivp->data[n];
107 AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
108 for(n=0 ; n < N_WORDS; ++n)
109 outp->data[n] ^= iv2p->data[n];
110 ivp = outp;
111 iv2p = inp;
112 --len;
113 in += AES_BLOCK_SIZE;
114 out += AES_BLOCK_SIZE;
115 }
116 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
117 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
118 }
119 else
120 {
121 aes_block_t tmp, tmp2;
122 aes_block_t iv;
123 aes_block_t iv2;
124
125 load_block(iv, ivec);
126 load_block(iv2, ivec + AES_BLOCK_SIZE);
127
128 while (len)
129 {
130 load_block(tmp, in);
131 for(n=0 ; n < N_WORDS; ++n)
132 tmp2.data[n] = tmp.data[n] ^ iv.data[n];
133 AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
134 for(n=0 ; n < N_WORDS; ++n)
135 tmp2.data[n] ^= iv2.data[n];
136 store_block(out, tmp2);
137 iv = tmp2;
138 iv2 = tmp;
139 --len;
140 in += AES_BLOCK_SIZE;
141 out += AES_BLOCK_SIZE;
142 }
143 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
144 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
145 }
146 }
147 else
148 {
149 if (in != out &&
150 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
151 {
152 aes_block_t *ivp = (aes_block_t *)ivec;
153 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
154
155 while (len)
156 {
157 aes_block_t tmp;
158 aes_block_t *inp = (aes_block_t *)in;
159 aes_block_t *outp = (aes_block_t *)out;
160
161 for(n=0 ; n < N_WORDS; ++n)
162 tmp.data[n] = inp->data[n] ^ iv2p->data[n];
163 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)outp->data, key);
164 for(n=0 ; n < N_WORDS; ++n)
165 outp->data[n] ^= ivp->data[n];
166 ivp = inp;
167 iv2p = outp;
168 --len;
169 in += AES_BLOCK_SIZE;
170 out += AES_BLOCK_SIZE;
171 }
172 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
173 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
174 }
175 else
176 {
177 aes_block_t tmp, tmp2;
178 aes_block_t iv;
179 aes_block_t iv2;
180
181 load_block(iv, ivec);
182 load_block(iv2, ivec + AES_BLOCK_SIZE);
183
184 while (len)
185 {
186 load_block(tmp, in);
187 tmp2 = tmp;
188 for(n=0 ; n < N_WORDS; ++n)
189 tmp.data[n] ^= iv2.data[n];
190 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
191 for(n=0 ; n < N_WORDS; ++n)
192 tmp.data[n] ^= iv.data[n];
193 store_block(out, tmp);
194 iv = tmp2;
195 iv2 = tmp;
196 --len;
197 in += AES_BLOCK_SIZE;
198 out += AES_BLOCK_SIZE;
199 }
200 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
201 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
202 }
203 }
204 }
205
206/*
207 * Note that its effectively impossible to do biIGE in anything other
208 * than a single pass, so no provision is made for chaining.
209 */
210
211/* N.B. The IV for this mode is _four times_ the block size */
212
213void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
214 const unsigned long length, const AES_KEY *key,
215 const AES_KEY *key2, const unsigned char *ivec,
216 const int enc)
217 {
218 unsigned long n;
219 unsigned long len = length;
220 unsigned char tmp[AES_BLOCK_SIZE];
221 unsigned char tmp2[AES_BLOCK_SIZE];
222 unsigned char tmp3[AES_BLOCK_SIZE];
223 unsigned char prev[AES_BLOCK_SIZE];
224 const unsigned char *iv;
225 const unsigned char *iv2;
226
227 OPENSSL_assert(in && out && key && ivec);
228 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
229 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
230
231 if (AES_ENCRYPT == enc)
232 {
233 /* XXX: Do a separate case for when in != out (strictly should
234 check for overlap, too) */
235
236 /* First the forward pass */
237 iv = ivec;
238 iv2 = ivec + AES_BLOCK_SIZE;
239 while (len >= AES_BLOCK_SIZE)
240 {
241 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
242 out[n] = in[n] ^ iv[n];
243 AES_encrypt(out, out, key);
244 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
245 out[n] ^= iv2[n];
246 iv = out;
247 memcpy(prev, in, AES_BLOCK_SIZE);
248 iv2 = prev;
249 len -= AES_BLOCK_SIZE;
250 in += AES_BLOCK_SIZE;
251 out += AES_BLOCK_SIZE;
252 }
253
254 /* And now backwards */
255 iv = ivec + AES_BLOCK_SIZE*2;
256 iv2 = ivec + AES_BLOCK_SIZE*3;
257 len = length;
258 while(len >= AES_BLOCK_SIZE)
259 {
260 out -= AES_BLOCK_SIZE;
261 /* XXX: reduce copies by alternating between buffers */
262 memcpy(tmp, out, AES_BLOCK_SIZE);
263 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
264 out[n] ^= iv[n];
265 /* hexdump(stdout, "out ^ iv", out, AES_BLOCK_SIZE); */
266 AES_encrypt(out, out, key);
267 /* hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
268 /* hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
269 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
270 out[n] ^= iv2[n];
271 /* hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
272 iv = out;
273 memcpy(prev, tmp, AES_BLOCK_SIZE);
274 iv2 = prev;
275 len -= AES_BLOCK_SIZE;
276 }
277 }
278 else
279 {
280 /* First backwards */
281 iv = ivec + AES_BLOCK_SIZE*2;
282 iv2 = ivec + AES_BLOCK_SIZE*3;
283 in += length;
284 out += length;
285 while (len >= AES_BLOCK_SIZE)
286 {
287 in -= AES_BLOCK_SIZE;
288 out -= AES_BLOCK_SIZE;
289 memcpy(tmp, in, AES_BLOCK_SIZE);
290 memcpy(tmp2, in, AES_BLOCK_SIZE);
291 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
292 tmp[n] ^= iv2[n];
293 AES_decrypt(tmp, out, key);
294 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
295 out[n] ^= iv[n];
296 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
297 iv = tmp3;
298 iv2 = out;
299 len -= AES_BLOCK_SIZE;
300 }
301
302 /* And now forwards */
303 iv = ivec;
304 iv2 = ivec + AES_BLOCK_SIZE;
305 len = length;
306 while (len >= AES_BLOCK_SIZE)
307 {
308 memcpy(tmp, out, AES_BLOCK_SIZE);
309 memcpy(tmp2, out, AES_BLOCK_SIZE);
310 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
311 tmp[n] ^= iv2[n];
312 AES_decrypt(tmp, out, key);
313 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
314 out[n] ^= iv[n];
315 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
316 iv = tmp3;
317 iv2 = out;
318 len -= AES_BLOCK_SIZE;
319 in += AES_BLOCK_SIZE;
320 out += AES_BLOCK_SIZE;
321 }
322 }
323 }
diff --git a/src/lib/libcrypto/aes/aes_locl.h b/src/lib/libcrypto/aes/aes_locl.h
deleted file mode 100644
index 054b442d41..0000000000
--- a/src/lib/libcrypto/aes/aes_locl.h
+++ /dev/null
@@ -1,89 +0,0 @@
1/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef HEADER_AES_LOCL_H
53#define HEADER_AES_LOCL_H
54
55#include <openssl/e_os2.h>
56
57#ifdef OPENSSL_NO_AES
58#error AES is disabled.
59#endif
60
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64
65#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
66# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
67# define GETU32(p) SWAP(*((u32 *)(p)))
68# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
69#else
70# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
71# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
72#endif
73
74#ifdef AES_LONG
75typedef unsigned long u32;
76#else
77typedef unsigned int u32;
78#endif
79typedef unsigned short u16;
80typedef unsigned char u8;
81
82#define MAXKC (256/32)
83#define MAXKB (256/8)
84#define MAXNR 14
85
86/* This controls loop-unrolling in aes_core.c */
87#undef FULL_UNROLL
88
89#endif /* !HEADER_AES_LOCL_H */
diff --git a/src/lib/libcrypto/aes/aes_misc.c b/src/lib/libcrypto/aes/aes_misc.c
deleted file mode 100644
index 4fead1b4c7..0000000000
--- a/src/lib/libcrypto/aes/aes_misc.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/* crypto/aes/aes_misc.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/aes.h>
54#include "aes_locl.h"
55
56const char AES_version[]="AES" OPENSSL_VERSION_PTEXT;
57
58const char *AES_options(void) {
59#ifdef FULL_UNROLL
60 return "aes(full)";
61#else
62 return "aes(partial)";
63#endif
64}
diff --git a/src/lib/libcrypto/aes/aes_ofb.c b/src/lib/libcrypto/aes/aes_ofb.c
deleted file mode 100644
index f358bb39e2..0000000000
--- a/src/lib/libcrypto/aes/aes_ofb.c
+++ /dev/null
@@ -1,142 +0,0 @@
1/* crypto/aes/aes_ofb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
52 * All rights reserved.
53 *
54 * This package is an SSL implementation written
55 * by Eric Young (eay@cryptsoft.com).
56 * The implementation was written so as to conform with Netscapes SSL.
57 *
58 * This library is free for commercial and non-commercial use as long as
59 * the following conditions are aheared to. The following conditions
60 * apply to all code found in this distribution, be it the RC4, RSA,
61 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
62 * included with this distribution is covered by the same copyright terms
63 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
64 *
65 * Copyright remains Eric Young's, and as such any Copyright notices in
66 * the code are not to be removed.
67 * If this package is used in a product, Eric Young should be given attribution
68 * as the author of the parts of the library used.
69 * This can be in the form of a textual message at program startup or
70 * in documentation (online or textual) provided with the package.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * "This product includes cryptographic software written by
83 * Eric Young (eay@cryptsoft.com)"
84 * The word 'cryptographic' can be left out if the rouines from the library
85 * being used are not cryptographic related :-).
86 * 4. If you include any Windows specific code (or a derivative thereof) from
87 * the apps directory (application code) you must include an acknowledgement:
88 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
89 *
90 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
94 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100 * SUCH DAMAGE.
101 *
102 * The licence and distribution terms for any publically available version or
103 * derivative of this code cannot be changed. i.e. this code cannot simply be
104 * copied and put under another distribution licence
105 * [including the GNU Public Licence.]
106 */
107
108#ifndef AES_DEBUG
109# ifndef NDEBUG
110# define NDEBUG
111# endif
112#endif
113#include <assert.h>
114
115#include <openssl/aes.h>
116#include "aes_locl.h"
117
118/* The input and output encrypted as though 128bit ofb mode is being
119 * used. The extra state information to record how much of the
120 * 128bit block we have used is contained in *num;
121 */
122void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
123 const unsigned long length, const AES_KEY *key,
124 unsigned char *ivec, int *num) {
125
126 unsigned int n;
127 unsigned long l=length;
128
129 assert(in && out && key && ivec && num);
130
131 n = *num;
132
133 while (l--) {
134 if (n == 0) {
135 AES_encrypt(ivec, ivec, key);
136 }
137 *(out++) = *(in++) ^ ivec[n];
138 n = (n+1) % AES_BLOCK_SIZE;
139 }
140
141 *num=n;
142}
diff --git a/src/lib/libcrypto/aes/aes_wrap.c b/src/lib/libcrypto/aes/aes_wrap.c
deleted file mode 100644
index 9feacd65d8..0000000000
--- a/src/lib/libcrypto/aes/aes_wrap.c
+++ /dev/null
@@ -1,259 +0,0 @@
1/* crypto/aes/aes_wrap.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54#include "cryptlib.h"
55#include <openssl/aes.h>
56#include <openssl/bio.h>
57
58static const unsigned char default_iv[] = {
59 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
60};
61
62int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
63 unsigned char *out,
64 const unsigned char *in, unsigned int inlen)
65 {
66 unsigned char *A, B[16], *R;
67 unsigned int i, j, t;
68 if ((inlen & 0x7) || (inlen < 8))
69 return -1;
70 A = B;
71 t = 1;
72 memcpy(out + 8, in, inlen);
73 if (!iv)
74 iv = default_iv;
75
76 memcpy(A, iv, 8);
77
78 for (j = 0; j < 6; j++)
79 {
80 R = out + 8;
81 for (i = 0; i < inlen; i += 8, t++, R += 8)
82 {
83 memcpy(B + 8, R, 8);
84 AES_encrypt(B, B, key);
85 A[7] ^= (unsigned char)(t & 0xff);
86 if (t > 0xff)
87 {
88 A[6] ^= (unsigned char)((t & 0xff) >> 8);
89 A[5] ^= (unsigned char)((t & 0xff) >> 16);
90 A[4] ^= (unsigned char)((t & 0xff) >> 24);
91 }
92 memcpy(R, B + 8, 8);
93 }
94 }
95 memcpy(out, A, 8);
96 return inlen + 8;
97 }
98
99int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
100 unsigned char *out,
101 const unsigned char *in, unsigned int inlen)
102 {
103 unsigned char *A, B[16], *R;
104 unsigned int i, j, t;
105 inlen -= 8;
106 if (inlen & 0x7)
107 return -1;
108 if (inlen < 8)
109 return -1;
110 A = B;
111 t = 6 * (inlen >> 3);
112 memcpy(A, in, 8);
113 memcpy(out, in + 8, inlen);
114 for (j = 0; j < 6; j++)
115 {
116 R = out + inlen - 8;
117 for (i = 0; i < inlen; i += 8, t--, R -= 8)
118 {
119 A[7] ^= (unsigned char)(t & 0xff);
120 if (t > 0xff)
121 {
122 A[6] ^= (unsigned char)((t & 0xff) >> 8);
123 A[5] ^= (unsigned char)((t & 0xff) >> 16);
124 A[4] ^= (unsigned char)((t & 0xff) >> 24);
125 }
126 memcpy(B + 8, R, 8);
127 AES_decrypt(B, B, key);
128 memcpy(R, B + 8, 8);
129 }
130 }
131 if (!iv)
132 iv = default_iv;
133 if (memcmp(A, iv, 8))
134 {
135 OPENSSL_cleanse(out, inlen);
136 return 0;
137 }
138 return inlen;
139 }
140
141#ifdef AES_WRAP_TEST
142
143int AES_wrap_unwrap_test(const unsigned char *kek, int keybits,
144 const unsigned char *iv,
145 const unsigned char *eout,
146 const unsigned char *key, int keylen)
147 {
148 unsigned char *otmp = NULL, *ptmp = NULL;
149 int r, ret = 0;
150 AES_KEY wctx;
151 otmp = OPENSSL_malloc(keylen + 8);
152 ptmp = OPENSSL_malloc(keylen);
153 if (!otmp || !ptmp)
154 return 0;
155 if (AES_set_encrypt_key(kek, keybits, &wctx))
156 goto err;
157 r = AES_wrap_key(&wctx, iv, otmp, key, keylen);
158 if (r <= 0)
159 goto err;
160
161 if (eout && memcmp(eout, otmp, keylen))
162 goto err;
163
164 if (AES_set_decrypt_key(kek, keybits, &wctx))
165 goto err;
166 r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r);
167
168 if (memcmp(key, ptmp, keylen))
169 goto err;
170
171 ret = 1;
172
173 err:
174 if (otmp)
175 OPENSSL_free(otmp);
176 if (ptmp)
177 OPENSSL_free(ptmp);
178
179 return ret;
180
181 }
182
183
184
185int main(int argc, char **argv)
186{
187
188static const unsigned char kek[] = {
189 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
190 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
191 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
192 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
193};
194
195static const unsigned char key[] = {
196 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
197 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
199 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
200};
201
202static const unsigned char e1[] = {
203 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47,
204 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82,
205 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5
206};
207
208static const unsigned char e2[] = {
209 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35,
210 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2,
211 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d
212};
213
214static const unsigned char e3[] = {
215 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2,
216 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a,
217 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7
218};
219
220static const unsigned char e4[] = {
221 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32,
222 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc,
223 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93,
224 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2
225};
226
227static const unsigned char e5[] = {
228 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f,
229 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4,
230 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95,
231 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1
232};
233
234static const unsigned char e6[] = {
235 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4,
236 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26,
237 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26,
238 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b,
239 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21
240};
241
242 AES_KEY wctx, xctx;
243 int ret;
244 ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16);
245 fprintf(stderr, "Key test result %d\n", ret);
246 ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16);
247 fprintf(stderr, "Key test result %d\n", ret);
248 ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16);
249 fprintf(stderr, "Key test result %d\n", ret);
250 ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24);
251 fprintf(stderr, "Key test result %d\n", ret);
252 ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24);
253 fprintf(stderr, "Key test result %d\n", ret);
254 ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32);
255 fprintf(stderr, "Key test result %d\n", ret);
256}
257
258
259#endif
diff --git a/src/lib/libcrypto/aes/aes_x86core.c b/src/lib/libcrypto/aes/aes_x86core.c
deleted file mode 100644
index d323e265c0..0000000000
--- a/src/lib/libcrypto/aes/aes_x86core.c
+++ /dev/null
@@ -1,1063 +0,0 @@
1/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2/**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * This is experimental x86[_64] derivative. It assumes little-endian
30 * byte order and expects CPU to sustain unaligned memory references.
31 * It is used as playground for cache-time attack mitigations and
32 * serves as reference C implementation for x86[_64] assembler.
33 *
34 * <appro@fy.chalmers.se>
35 */
36
37
38#ifndef AES_DEBUG
39# ifndef NDEBUG
40# define NDEBUG
41# endif
42#endif
43#include <assert.h>
44
45#include <stdlib.h>
46#include <openssl/aes.h>
47#include "aes_locl.h"
48
49/*
50 * These two parameters control which table, 256-byte or 2KB, is
51 * referenced in outer and respectively inner rounds.
52 */
53#define AES_COMPACT_IN_OUTER_ROUNDS
54#ifdef AES_COMPACT_IN_OUTER_ROUNDS
55/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
56 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
57 * by factor of ~2. */
58# undef AES_COMPACT_IN_INNER_ROUNDS
59#endif
60
61#if 1
62static void prefetch256(const void *table)
63{
64 volatile unsigned long *t=(void *)table,ret;
65 unsigned long sum;
66 int i;
67
68 /* 32 is common least cache-line size */
69 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
70
71 ret = sum;
72}
73#else
74# define prefetch256(t)
75#endif
76
77#undef GETU32
78#define GETU32(p) (*((u32*)(p)))
79
80#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
81typedef unsigned __int64 u64;
82#define U64(C) C##UI64
83#elif defined(__arch64__)
84typedef unsigned long u64;
85#define U64(C) C##UL
86#else
87typedef unsigned long long u64;
88#define U64(C) C##ULL
89#endif
90
91#undef ROTATE
92#if defined(_MSC_VER) || defined(__ICC)
93# define ROTATE(a,n) _lrotl(a,n)
94#elif defined(__GNUC__) && __GNUC__>=2
95# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
96# define ROTATE(a,n) ({ register unsigned int ret; \
97 asm ( \
98 "roll %1,%0" \
99 : "=r"(ret) \
100 : "I"(n), "0"(a) \
101 : "cc"); \
102 ret; \
103 })
104# endif
105#endif
106/*
107Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
108Te0[x] = S [x].[02, 01, 01, 03];
109Te1[x] = S [x].[03, 02, 01, 01];
110Te2[x] = S [x].[01, 03, 02, 01];
111Te3[x] = S [x].[01, 01, 03, 02];
112*/
113#define Te0 (u32)((u64*)((u8*)Te+0))
114#define Te1 (u32)((u64*)((u8*)Te+3))
115#define Te2 (u32)((u64*)((u8*)Te+2))
116#define Te3 (u32)((u64*)((u8*)Te+1))
117/*
118Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
119Td0[x] = Si[x].[0e, 09, 0d, 0b];
120Td1[x] = Si[x].[0b, 0e, 09, 0d];
121Td2[x] = Si[x].[0d, 0b, 0e, 09];
122Td3[x] = Si[x].[09, 0d, 0b, 0e];
123Td4[x] = Si[x].[01];
124*/
125#define Td0 (u32)((u64*)((u8*)Td+0))
126#define Td1 (u32)((u64*)((u8*)Td+3))
127#define Td2 (u32)((u64*)((u8*)Td+2))
128#define Td3 (u32)((u64*)((u8*)Td+1))
129
130static const u64 Te[256] = {
131 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
132 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
133 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
134 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
135 U64(0x5030306050303060), U64(0x0301010203010102),
136 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
137 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
138 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
139 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
140 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
141 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
142 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
143 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
144 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
145 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
146 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
147 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
148 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
149 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
150 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
151 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
152 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
153 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
154 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
155 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
156 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
157 U64(0x2818183028181830), U64(0xa1969637a1969637),
158 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
159 U64(0x0907070e0907070e), U64(0x3612122436121224),
160 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
161 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
162 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
163 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
164 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
165 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
166 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
167 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
168 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
169 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
170 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
171 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
172 U64(0x0000000000000000), U64(0x2cededc12cededc1),
173 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
174 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
175 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
176 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
177 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
178 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
179 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
180 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
181 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
182 U64(0x5533336655333366), U64(0x9485851194858511),
183 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
184 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
185 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
186 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
187 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
188 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
189 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
190 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
191 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
192 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
193 U64(0x3010102030101020), U64(0x1affffe51affffe5),
194 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
195 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
196 U64(0x3513132635131326), U64(0x2fececc32fececc3),
197 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
198 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
199 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
200 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
201 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
202 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
203 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
204 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
205 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
206 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
207 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
208 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
209 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
210 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
211 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
212 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
213 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
214 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
215 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
216 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
217 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
218 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
219 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
220 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
221 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
222 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
223 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
224 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
225 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
226 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
227 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
228 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
229 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
230 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
231 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
232 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
233 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
234 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
235 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
236 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
237 U64(0xd8484890d8484890), U64(0x0503030605030306),
238 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
239 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
240 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
241 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
242 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
243 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
244 U64(0xb398982bb398982b), U64(0x3311112233111122),
245 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
246 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
247 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
248 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
249 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
250 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
251 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
252 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
253 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
254 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
255 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
256 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
257 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
258 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
259};
260
261static const u8 Te4[256] = {
262 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
263 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
264 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
265 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
266 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
267 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
268 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
269 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
270 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
271 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
272 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
273 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
274 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
275 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
276 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
277 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
278 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
279 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
280 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
281 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
282 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
283 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
284 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
285 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
286 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
287 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
288 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
289 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
290 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
291 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
292 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
293 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
294};
295
296static const u64 Td[256] = {
297 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
298 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
299 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
300 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
301 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
302 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
303 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
304 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
305 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
306 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
307 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
308 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
309 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
310 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
311 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
312 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
313 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
314 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
315 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
316 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
317 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
318 U64(0x6033519760335197), U64(0x457f5362457f5362),
319 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
320 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
321 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
322 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
323 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
324 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
325 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
326 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
327 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
328 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
329 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
330 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
331 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
332 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
333 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
334 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
335 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
336 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
337 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
338 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
339 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
340 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
341 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
342 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
343 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
344 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
345 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
346 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
347 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
348 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
349 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
350 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
351 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
352 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
353 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
354 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
355 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
356 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
357 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
358 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
359 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
360 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
361 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
362 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
363 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
364 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
365 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
366 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
367 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
368 U64(0x4022971340229713), U64(0x2011c6842011c684),
369 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
370 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
371 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
372 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
373 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
374 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
375 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
376 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
377 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
378 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
379 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
380 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
381 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
382 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
383 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
384 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
385 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
386 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
387 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
388 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
389 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
390 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
391 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
392 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
393 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
394 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
395 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
396 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
397 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
398 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
399 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
400 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
401 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
402 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
403 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
404 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
405 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
406 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
407 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
408 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
409 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
410 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
411 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
412 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
413 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
414 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
415 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
416 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
417 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
418 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
419 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
420 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
421 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
422 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
423 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
424 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
425};
426static const u8 Td4[256] = {
427 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
428 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
429 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
430 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
431 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
432 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
433 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
434 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
435 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
436 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
437 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
438 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
439 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
440 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
441 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
442 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
443 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
444 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
445 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
446 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
447 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
448 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
449 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
450 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
451 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
452 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
453 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
454 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
455 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
456 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
457 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
458 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
459};
460
461static const u32 rcon[] = {
462 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
463 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
464 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
465};
466
467/**
468 * Expand the cipher key into the encryption key schedule.
469 */
470int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
471 AES_KEY *key) {
472
473 u32 *rk;
474 int i = 0;
475 u32 temp;
476
477 if (!userKey || !key)
478 return -1;
479 if (bits != 128 && bits != 192 && bits != 256)
480 return -2;
481
482 rk = key->rd_key;
483
484 if (bits==128)
485 key->rounds = 10;
486 else if (bits==192)
487 key->rounds = 12;
488 else
489 key->rounds = 14;
490
491 rk[0] = GETU32(userKey );
492 rk[1] = GETU32(userKey + 4);
493 rk[2] = GETU32(userKey + 8);
494 rk[3] = GETU32(userKey + 12);
495 if (bits == 128) {
496 while (1) {
497 temp = rk[3];
498 rk[4] = rk[0] ^
499 (Te4[(temp >> 8) & 0xff] ) ^
500 (Te4[(temp >> 16) & 0xff] << 8) ^
501 (Te4[(temp >> 24) ] << 16) ^
502 (Te4[(temp ) & 0xff] << 24) ^
503 rcon[i];
504 rk[5] = rk[1] ^ rk[4];
505 rk[6] = rk[2] ^ rk[5];
506 rk[7] = rk[3] ^ rk[6];
507 if (++i == 10) {
508 return 0;
509 }
510 rk += 4;
511 }
512 }
513 rk[4] = GETU32(userKey + 16);
514 rk[5] = GETU32(userKey + 20);
515 if (bits == 192) {
516 while (1) {
517 temp = rk[ 5];
518 rk[ 6] = rk[ 0] ^
519 (Te4[(temp >> 8) & 0xff] ) ^
520 (Te4[(temp >> 16) & 0xff] << 8) ^
521 (Te4[(temp >> 24) ] << 16) ^
522 (Te4[(temp ) & 0xff] << 24) ^
523 rcon[i];
524 rk[ 7] = rk[ 1] ^ rk[ 6];
525 rk[ 8] = rk[ 2] ^ rk[ 7];
526 rk[ 9] = rk[ 3] ^ rk[ 8];
527 if (++i == 8) {
528 return 0;
529 }
530 rk[10] = rk[ 4] ^ rk[ 9];
531 rk[11] = rk[ 5] ^ rk[10];
532 rk += 6;
533 }
534 }
535 rk[6] = GETU32(userKey + 24);
536 rk[7] = GETU32(userKey + 28);
537 if (bits == 256) {
538 while (1) {
539 temp = rk[ 7];
540 rk[ 8] = rk[ 0] ^
541 (Te4[(temp >> 8) & 0xff] ) ^
542 (Te4[(temp >> 16) & 0xff] << 8) ^
543 (Te4[(temp >> 24) ] << 16) ^
544 (Te4[(temp ) & 0xff] << 24) ^
545 rcon[i];
546 rk[ 9] = rk[ 1] ^ rk[ 8];
547 rk[10] = rk[ 2] ^ rk[ 9];
548 rk[11] = rk[ 3] ^ rk[10];
549 if (++i == 7) {
550 return 0;
551 }
552 temp = rk[11];
553 rk[12] = rk[ 4] ^
554 (Te4[(temp ) & 0xff] ) ^
555 (Te4[(temp >> 8) & 0xff] << 8) ^
556 (Te4[(temp >> 16) & 0xff] << 16) ^
557 (Te4[(temp >> 24) ] << 24);
558 rk[13] = rk[ 5] ^ rk[12];
559 rk[14] = rk[ 6] ^ rk[13];
560 rk[15] = rk[ 7] ^ rk[14];
561
562 rk += 8;
563 }
564 }
565 return 0;
566}
567
568/**
569 * Expand the cipher key into the decryption key schedule.
570 */
571int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
572 AES_KEY *key) {
573
574 u32 *rk;
575 int i, j, status;
576 u32 temp;
577
578 /* first, start with an encryption schedule */
579 status = AES_set_encrypt_key(userKey, bits, key);
580 if (status < 0)
581 return status;
582
583 rk = key->rd_key;
584
585 /* invert the order of the round keys: */
586 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
587 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
588 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
589 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
590 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
591 }
592 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
593 for (i = 1; i < (key->rounds); i++) {
594 rk += 4;
595#if 1
596 for (j = 0; j < 4; j++) {
597 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
598
599 tp1 = rk[j];
600 m = tp1 & 0x80808080;
601 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
602 ((m - (m >> 7)) & 0x1b1b1b1b);
603 m = tp2 & 0x80808080;
604 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
605 ((m - (m >> 7)) & 0x1b1b1b1b);
606 m = tp4 & 0x80808080;
607 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
608 ((m - (m >> 7)) & 0x1b1b1b1b);
609 tp9 = tp8 ^ tp1;
610 tpb = tp9 ^ tp2;
611 tpd = tp9 ^ tp4;
612 tpe = tp8 ^ tp4 ^ tp2;
613#if defined(ROTATE)
614 rk[j] = tpe ^ ROTATE(tpd,16) ^
615 ROTATE(tp9,8) ^ ROTATE(tpb,24);
616#else
617 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
618 (tp9 >> 24) ^ (tp9 << 8) ^
619 (tpb >> 8) ^ (tpb << 24);
620#endif
621 }
622#else
623 rk[0] =
624 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
625 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
626 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
627 Td3[Te2[(rk[0] >> 24) ] & 0xff];
628 rk[1] =
629 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
630 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
631 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
632 Td3[Te2[(rk[1] >> 24) ] & 0xff];
633 rk[2] =
634 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
635 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
636 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
637 Td3[Te2[(rk[2] >> 24) ] & 0xff];
638 rk[3] =
639 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
640 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
641 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
642 Td3[Te2[(rk[3] >> 24) ] & 0xff];
643#endif
644 }
645 return 0;
646}
647
648/*
649 * Encrypt a single block
650 * in and out can overlap
651 */
652void AES_encrypt(const unsigned char *in, unsigned char *out,
653 const AES_KEY *key) {
654
655 const u32 *rk;
656 u32 s0, s1, s2, s3, t[4];
657 int r;
658
659 assert(in && out && key);
660 rk = key->rd_key;
661
662 /*
663 * map byte array block to cipher state
664 * and add initial round key:
665 */
666 s0 = GETU32(in ) ^ rk[0];
667 s1 = GETU32(in + 4) ^ rk[1];
668 s2 = GETU32(in + 8) ^ rk[2];
669 s3 = GETU32(in + 12) ^ rk[3];
670
671#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
672 prefetch256(Te4);
673
674 t[0] = Te4[(s0 ) & 0xff] ^
675 Te4[(s1 >> 8) & 0xff] << 8 ^
676 Te4[(s2 >> 16) & 0xff] << 16 ^
677 Te4[(s3 >> 24) ] << 24;
678 t[1] = Te4[(s1 ) & 0xff] ^
679 Te4[(s2 >> 8) & 0xff] << 8 ^
680 Te4[(s3 >> 16) & 0xff] << 16 ^
681 Te4[(s0 >> 24) ] << 24;
682 t[2] = Te4[(s2 ) & 0xff] ^
683 Te4[(s3 >> 8) & 0xff] << 8 ^
684 Te4[(s0 >> 16) & 0xff] << 16 ^
685 Te4[(s1 >> 24) ] << 24;
686 t[3] = Te4[(s3 ) & 0xff] ^
687 Te4[(s0 >> 8) & 0xff] << 8 ^
688 Te4[(s1 >> 16) & 0xff] << 16 ^
689 Te4[(s2 >> 24) ] << 24;
690
691 /* now do the linear transform using words */
692 { int i;
693 u32 r0, r1, r2;
694
695 for (i = 0; i < 4; i++) {
696 r0 = t[i];
697 r1 = r0 & 0x80808080;
698 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
699 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
700#if defined(ROTATE)
701 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
702 ROTATE(r0,16) ^ ROTATE(r0,8);
703#else
704 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
705 (r0 << 16) ^ (r0 >> 16) ^
706 (r0 << 8) ^ (r0 >> 24);
707#endif
708 t[i] ^= rk[4+i];
709 }
710 }
711#else
712 t[0] = Te0[(s0 ) & 0xff] ^
713 Te1[(s1 >> 8) & 0xff] ^
714 Te2[(s2 >> 16) & 0xff] ^
715 Te3[(s3 >> 24) ] ^
716 rk[4];
717 t[1] = Te0[(s1 ) & 0xff] ^
718 Te1[(s2 >> 8) & 0xff] ^
719 Te2[(s3 >> 16) & 0xff] ^
720 Te3[(s0 >> 24) ] ^
721 rk[5];
722 t[2] = Te0[(s2 ) & 0xff] ^
723 Te1[(s3 >> 8) & 0xff] ^
724 Te2[(s0 >> 16) & 0xff] ^
725 Te3[(s1 >> 24) ] ^
726 rk[6];
727 t[3] = Te0[(s3 ) & 0xff] ^
728 Te1[(s0 >> 8) & 0xff] ^
729 Te2[(s1 >> 16) & 0xff] ^
730 Te3[(s2 >> 24) ] ^
731 rk[7];
732#endif
733 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
734
735 /*
736 * Nr - 2 full rounds:
737 */
738 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
739#if defined(AES_COMPACT_IN_INNER_ROUNDS)
740 t[0] = Te4[(s0 ) & 0xff] ^
741 Te4[(s1 >> 8) & 0xff] << 8 ^
742 Te4[(s2 >> 16) & 0xff] << 16 ^
743 Te4[(s3 >> 24) ] << 24;
744 t[1] = Te4[(s1 ) & 0xff] ^
745 Te4[(s2 >> 8) & 0xff] << 8 ^
746 Te4[(s3 >> 16) & 0xff] << 16 ^
747 Te4[(s0 >> 24) ] << 24;
748 t[2] = Te4[(s2 ) & 0xff] ^
749 Te4[(s3 >> 8) & 0xff] << 8 ^
750 Te4[(s0 >> 16) & 0xff] << 16 ^
751 Te4[(s1 >> 24) ] << 24;
752 t[3] = Te4[(s3 ) & 0xff] ^
753 Te4[(s0 >> 8) & 0xff] << 8 ^
754 Te4[(s1 >> 16) & 0xff] << 16 ^
755 Te4[(s2 >> 24) ] << 24;
756
757 /* now do the linear transform using words */
758 { int i;
759 u32 r0, r1, r2;
760
761 for (i = 0; i < 4; i++) {
762 r0 = t[i];
763 r1 = r0 & 0x80808080;
764 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
765 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
766#if defined(ROTATE)
767 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
768 ROTATE(r0,16) ^ ROTATE(r0,8);
769#else
770 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
771 (r0 << 16) ^ (r0 >> 16) ^
772 (r0 << 8) ^ (r0 >> 24);
773#endif
774 t[i] ^= rk[i];
775 }
776 }
777#else
778 t[0] = Te0[(s0 ) & 0xff] ^
779 Te1[(s1 >> 8) & 0xff] ^
780 Te2[(s2 >> 16) & 0xff] ^
781 Te3[(s3 >> 24) ] ^
782 rk[0];
783 t[1] = Te0[(s1 ) & 0xff] ^
784 Te1[(s2 >> 8) & 0xff] ^
785 Te2[(s3 >> 16) & 0xff] ^
786 Te3[(s0 >> 24) ] ^
787 rk[1];
788 t[2] = Te0[(s2 ) & 0xff] ^
789 Te1[(s3 >> 8) & 0xff] ^
790 Te2[(s0 >> 16) & 0xff] ^
791 Te3[(s1 >> 24) ] ^
792 rk[2];
793 t[3] = Te0[(s3 ) & 0xff] ^
794 Te1[(s0 >> 8) & 0xff] ^
795 Te2[(s1 >> 16) & 0xff] ^
796 Te3[(s2 >> 24) ] ^
797 rk[3];
798#endif
799 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
800 }
801 /*
802 * apply last round and
803 * map cipher state to byte array block:
804 */
805#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
806 prefetch256(Te4);
807
808 *(u32*)(out+0) =
809 Te4[(s0 ) & 0xff] ^
810 Te4[(s1 >> 8) & 0xff] << 8 ^
811 Te4[(s2 >> 16) & 0xff] << 16 ^
812 Te4[(s3 >> 24) ] << 24 ^
813 rk[0];
814 *(u32*)(out+4) =
815 Te4[(s1 ) & 0xff] ^
816 Te4[(s2 >> 8) & 0xff] << 8 ^
817 Te4[(s3 >> 16) & 0xff] << 16 ^
818 Te4[(s0 >> 24) ] << 24 ^
819 rk[1];
820 *(u32*)(out+8) =
821 Te4[(s2 ) & 0xff] ^
822 Te4[(s3 >> 8) & 0xff] << 8 ^
823 Te4[(s0 >> 16) & 0xff] << 16 ^
824 Te4[(s1 >> 24) ] << 24 ^
825 rk[2];
826 *(u32*)(out+12) =
827 Te4[(s3 ) & 0xff] ^
828 Te4[(s0 >> 8) & 0xff] << 8 ^
829 Te4[(s1 >> 16) & 0xff] << 16 ^
830 Te4[(s2 >> 24) ] << 24 ^
831 rk[3];
832#else
833 *(u32*)(out+0) =
834 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
835 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
836 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
837 (Te1[(s3 >> 24) ] & 0xff000000U) ^
838 rk[0];
839 *(u32*)(out+4) =
840 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
841 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
842 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
843 (Te1[(s0 >> 24) ] & 0xff000000U) ^
844 rk[1];
845 *(u32*)(out+8) =
846 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
847 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
848 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
849 (Te1[(s1 >> 24) ] & 0xff000000U) ^
850 rk[2];
851 *(u32*)(out+12) =
852 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
853 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
854 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
855 (Te1[(s2 >> 24) ] & 0xff000000U) ^
856 rk[3];
857#endif
858}
859
860/*
861 * Decrypt a single block
862 * in and out can overlap
863 */
864void AES_decrypt(const unsigned char *in, unsigned char *out,
865 const AES_KEY *key) {
866
867 const u32 *rk;
868 u32 s0, s1, s2, s3, t[4];
869 int r;
870
871 assert(in && out && key);
872 rk = key->rd_key;
873
874 /*
875 * map byte array block to cipher state
876 * and add initial round key:
877 */
878 s0 = GETU32(in ) ^ rk[0];
879 s1 = GETU32(in + 4) ^ rk[1];
880 s2 = GETU32(in + 8) ^ rk[2];
881 s3 = GETU32(in + 12) ^ rk[3];
882
883#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
884 prefetch256(Td4);
885
886 t[0] = Td4[(s0 ) & 0xff] ^
887 Td4[(s3 >> 8) & 0xff] << 8 ^
888 Td4[(s2 >> 16) & 0xff] << 16 ^
889 Td4[(s1 >> 24) ] << 24;
890 t[1] = Td4[(s1 ) & 0xff] ^
891 Td4[(s0 >> 8) & 0xff] << 8 ^
892 Td4[(s3 >> 16) & 0xff] << 16 ^
893 Td4[(s2 >> 24) ] << 24;
894 t[2] = Td4[(s2 ) & 0xff] ^
895 Td4[(s1 >> 8) & 0xff] << 8 ^
896 Td4[(s0 >> 16) & 0xff] << 16 ^
897 Td4[(s3 >> 24) ] << 24;
898 t[3] = Td4[(s3 ) & 0xff] ^
899 Td4[(s2 >> 8) & 0xff] << 8 ^
900 Td4[(s1 >> 16) & 0xff] << 16 ^
901 Td4[(s0 >> 24) ] << 24;
902
903 /* now do the linear transform using words */
904 { int i;
905 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
906
907 for (i = 0; i < 4; i++) {
908 tp1 = t[i];
909 m = tp1 & 0x80808080;
910 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
911 ((m - (m >> 7)) & 0x1b1b1b1b);
912 m = tp2 & 0x80808080;
913 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
914 ((m - (m >> 7)) & 0x1b1b1b1b);
915 m = tp4 & 0x80808080;
916 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
917 ((m - (m >> 7)) & 0x1b1b1b1b);
918 tp9 = tp8 ^ tp1;
919 tpb = tp9 ^ tp2;
920 tpd = tp9 ^ tp4;
921 tpe = tp8 ^ tp4 ^ tp2;
922#if defined(ROTATE)
923 t[i] = tpe ^ ROTATE(tpd,16) ^
924 ROTATE(tp9,8) ^ ROTATE(tpb,24);
925#else
926 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
927 (tp9 >> 24) ^ (tp9 << 8) ^
928 (tpb >> 8) ^ (tpb << 24);
929#endif
930 t[i] ^= rk[4+i];
931 }
932 }
933#else
934 t[0] = Td0[(s0 ) & 0xff] ^
935 Td1[(s3 >> 8) & 0xff] ^
936 Td2[(s2 >> 16) & 0xff] ^
937 Td3[(s1 >> 24) ] ^
938 rk[4];
939 t[1] = Td0[(s1 ) & 0xff] ^
940 Td1[(s0 >> 8) & 0xff] ^
941 Td2[(s3 >> 16) & 0xff] ^
942 Td3[(s2 >> 24) ] ^
943 rk[5];
944 t[2] = Td0[(s2 ) & 0xff] ^
945 Td1[(s1 >> 8) & 0xff] ^
946 Td2[(s0 >> 16) & 0xff] ^
947 Td3[(s3 >> 24) ] ^
948 rk[6];
949 t[3] = Td0[(s3 ) & 0xff] ^
950 Td1[(s2 >> 8) & 0xff] ^
951 Td2[(s1 >> 16) & 0xff] ^
952 Td3[(s0 >> 24) ] ^
953 rk[7];
954#endif
955 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
956
957 /*
958 * Nr - 2 full rounds:
959 */
960 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
961#if defined(AES_COMPACT_IN_INNER_ROUNDS)
962 t[0] = Td4[(s0 ) & 0xff] ^
963 Td4[(s3 >> 8) & 0xff] << 8 ^
964 Td4[(s2 >> 16) & 0xff] << 16 ^
965 Td4[(s1 >> 24) ] << 24;
966 t[1] = Td4[(s1 ) & 0xff] ^
967 Td4[(s0 >> 8) & 0xff] << 8 ^
968 Td4[(s3 >> 16) & 0xff] << 16 ^
969 Td4[(s2 >> 24) ] << 24;
970 t[2] = Td4[(s2 ) & 0xff] ^
971 Td4[(s1 >> 8) & 0xff] << 8 ^
972 Td4[(s0 >> 16) & 0xff] << 16 ^
973 Td4[(s3 >> 24) ] << 24;
974 t[3] = Td4[(s3 ) & 0xff] ^
975 Td4[(s2 >> 8) & 0xff] << 8 ^
976 Td4[(s1 >> 16) & 0xff] << 16 ^
977 Td4[(s0 >> 24) ] << 24;
978
979 /* now do the linear transform using words */
980 { int i;
981 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
982
983 for (i = 0; i < 4; i++) {
984 tp1 = t[i];
985 m = tp1 & 0x80808080;
986 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
987 ((m - (m >> 7)) & 0x1b1b1b1b);
988 m = tp2 & 0x80808080;
989 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
990 ((m - (m >> 7)) & 0x1b1b1b1b);
991 m = tp4 & 0x80808080;
992 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
993 ((m - (m >> 7)) & 0x1b1b1b1b);
994 tp9 = tp8 ^ tp1;
995 tpb = tp9 ^ tp2;
996 tpd = tp9 ^ tp4;
997 tpe = tp8 ^ tp4 ^ tp2;
998#if defined(ROTATE)
999 t[i] = tpe ^ ROTATE(tpd,16) ^
1000 ROTATE(tp9,8) ^ ROTATE(tpb,24);
1001#else
1002 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1003 (tp9 >> 24) ^ (tp9 << 8) ^
1004 (tpb >> 8) ^ (tpb << 24);
1005#endif
1006 t[i] ^= rk[i];
1007 }
1008 }
1009#else
1010 t[0] = Td0[(s0 ) & 0xff] ^
1011 Td1[(s3 >> 8) & 0xff] ^
1012 Td2[(s2 >> 16) & 0xff] ^
1013 Td3[(s1 >> 24) ] ^
1014 rk[0];
1015 t[1] = Td0[(s1 ) & 0xff] ^
1016 Td1[(s0 >> 8) & 0xff] ^
1017 Td2[(s3 >> 16) & 0xff] ^
1018 Td3[(s2 >> 24) ] ^
1019 rk[1];
1020 t[2] = Td0[(s2 ) & 0xff] ^
1021 Td1[(s1 >> 8) & 0xff] ^
1022 Td2[(s0 >> 16) & 0xff] ^
1023 Td3[(s3 >> 24) ] ^
1024 rk[2];
1025 t[3] = Td0[(s3 ) & 0xff] ^
1026 Td1[(s2 >> 8) & 0xff] ^
1027 Td2[(s1 >> 16) & 0xff] ^
1028 Td3[(s0 >> 24) ] ^
1029 rk[3];
1030#endif
1031 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1032 }
1033 /*
1034 * apply last round and
1035 * map cipher state to byte array block:
1036 */
1037 prefetch256(Td4);
1038
1039 *(u32*)(out+0) =
1040 (Td4[(s0 ) & 0xff]) ^
1041 (Td4[(s3 >> 8) & 0xff] << 8) ^
1042 (Td4[(s2 >> 16) & 0xff] << 16) ^
1043 (Td4[(s1 >> 24) ] << 24) ^
1044 rk[0];
1045 *(u32*)(out+4) =
1046 (Td4[(s1 ) & 0xff]) ^
1047 (Td4[(s0 >> 8) & 0xff] << 8) ^
1048 (Td4[(s3 >> 16) & 0xff] << 16) ^
1049 (Td4[(s2 >> 24) ] << 24) ^
1050 rk[1];
1051 *(u32*)(out+8) =
1052 (Td4[(s2 ) & 0xff]) ^
1053 (Td4[(s1 >> 8) & 0xff] << 8) ^
1054 (Td4[(s0 >> 16) & 0xff] << 16) ^
1055 (Td4[(s3 >> 24) ] << 24) ^
1056 rk[2];
1057 *(u32*)(out+12) =
1058 (Td4[(s3 ) & 0xff]) ^
1059 (Td4[(s2 >> 8) & 0xff] << 8) ^
1060 (Td4[(s1 >> 16) & 0xff] << 16) ^
1061 (Td4[(s0 >> 24) ] << 24) ^
1062 rk[3];
1063}
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
deleted file mode 100644
index e771e83953..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ /dev/null
@@ -1,1533 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 3.6.
10#
11# You might fail to appreciate this module performance from the first
12# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered
13# to be *the* best Intel C compiler without -KPIC, performance appears
14# to be virtually identical... But try to re-configure with shared
15# library support... Aha! Intel compiler "suddenly" lags behind by 30%
16# [on P4, more on others]:-) And if compared to position-independent
17# code generated by GNU C, this code performs *more* than *twice* as
18# fast! Yes, all this buzz about PIC means that unlike other hand-
19# coded implementations, this one was explicitly designed to be safe
20# to use even in shared library context... This also means that this
21# code isn't necessarily absolutely fastest "ever," because in order
22# to achieve position independence an extra register has to be
23# off-loaded to stack, which affects the benchmark result.
24#
25# Special note about instruction choice. Do you recall RC4_INT code
26# performing poorly on P4? It might be the time to figure out why.
27# RC4_INT code implies effective address calculations in base+offset*4
28# form. Trouble is that it seems that offset scaling turned to be
29# critical path... At least eliminating scaling resulted in 2.8x RC4
30# performance improvement [as you might recall]. As AES code is hungry
31# for scaling too, I [try to] avoid the latter by favoring off-by-2
32# shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF.
33#
34# As was shown by Dean Gaudet <dean@arctic.org>, the above note turned
35# void. Performance improvement with off-by-2 shifts was observed on
36# intermediate implementation, which was spilling yet another register
37# to stack... Final offset*4 code below runs just a tad faster on P4,
38# but exhibits up to 10% improvement on other cores.
39#
40# Second version is "monolithic" replacement for aes_core.c, which in
41# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key.
42# This made it possible to implement little-endian variant of the
43# algorithm without modifying the base C code. Motivating factor for
44# the undertaken effort was that it appeared that in tight IA-32
45# register window little-endian flavor could achieve slightly higher
46# Instruction Level Parallelism, and it indeed resulted in up to 15%
47# better performance on most recent µ-archs...
48#
49# Third version adds AES_cbc_encrypt implementation, which resulted in
50# up to 40% performance imrovement of CBC benchmark results. 40% was
51# observed on P4 core, where "overall" imrovement coefficient, i.e. if
52# compared to PIC generated by GCC and in CBC mode, was observed to be
53# as large as 4x:-) CBC performance is virtually identical to ECB now
54# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
55# Opteron, because certain function prologues and epilogues are
56# effectively taken out of the loop...
57#
58# Version 3.2 implements compressed tables and prefetch of these tables
59# in CBC[!] mode. Former means that 3/4 of table references are now
60# misaligned, which unfortunately has negative impact on elder IA-32
61# implementations, Pentium suffered 30% penalty, PIII - 10%.
62#
63# Version 3.3 avoids L1 cache aliasing between stack frame and
64# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The
65# latter is achieved by copying the key schedule to controlled place in
66# stack. This unfortunately has rather strong impact on small block CBC
67# performance, ~2x deterioration on 16-byte block if compared to 3.3.
68#
69# Version 3.5 checks if there is L1 cache aliasing between user-supplied
70# key schedule and S-boxes and abstains from copying the former if
71# there is no. This allows end-user to consciously retain small block
72# performance by aligning key schedule in specific manner.
73#
74# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB.
75#
76# Current ECB performance numbers for 128-bit key in CPU cycles per
77# processed byte [measure commonly used by AES benchmarkers] are:
78#
79# small footprint fully unrolled
80# P4 24 22
81# AMD K8 20 19
82# PIII 25 23
83# Pentium 81 78
84
85push(@INC,"perlasm","../../perlasm");
86require "x86asm.pl";
87
88&asm_init($ARGV[0],"aes-586.pl",$ARGV[$#ARGV] eq "386");
89
90$s0="eax";
91$s1="ebx";
92$s2="ecx";
93$s3="edx";
94$key="edi";
95$acc="esi";
96
97$compromise=0; # $compromise=128 abstains from copying key
98 # schedule to stack when encrypting inputs
99 # shorter than 128 bytes at the cost of
100 # risksing aliasing with S-boxes. In return
101 # you get way better, up to +70%, small block
102 # performance.
103$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
104 # recent µ-archs], but ~5 times smaller!
105 # I favor compact code to minimize cache
106 # contention and in hope to "collect" 5% back
107 # in real-life applications...
108$vertical_spin=0; # shift "verticaly" defaults to 0, because of
109 # its proof-of-concept status...
110
111# Note that there is no decvert(), as well as last encryption round is
112# performed with "horizontal" shifts. This is because this "vertical"
113# implementation [one which groups shifts on a given $s[i] to form a
114# "column," unlike "horizontal" one, which groups shifts on different
115# $s[i] to form a "row"] is work in progress. It was observed to run
116# few percents faster on Intel cores, but not AMD. On AMD K8 core it's
117# whole 12% slower:-( So we face a trade-off... Shall it be resolved
118# some day? Till then the code is considered experimental and by
119# default remains dormant...
120
121sub encvert()
122{ my ($te,@s) = @_;
123 my $v0 = $acc, $v1 = $key;
124
125 &mov ($v0,$s[3]); # copy s3
126 &mov (&DWP(4,"esp"),$s[2]); # save s2
127 &mov ($v1,$s[0]); # copy s0
128 &mov (&DWP(8,"esp"),$s[1]); # save s1
129
130 &movz ($s[2],&HB($s[0]));
131 &and ($s[0],0xFF);
132 &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
133 &shr ($v1,16);
134 &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
135 &movz ($s[1],&HB($v1));
136 &and ($v1,0xFF);
137 &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
138 &mov ($v1,$v0);
139 &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
140
141 &and ($v0,0xFF);
142 &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
143 &movz ($v0,&HB($v1));
144 &shr ($v1,16);
145 &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
146 &movz ($v0,&HB($v1));
147 &and ($v1,0xFF);
148 &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
149 &mov ($v1,&DWP(4,"esp")); # restore s2
150 &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
151
152 &mov ($v0,$v1);
153 &and ($v1,0xFF);
154 &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
155 &movz ($v1,&HB($v0));
156 &shr ($v0,16);
157 &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
158 &movz ($v1,&HB($v0));
159 &and ($v0,0xFF);
160 &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
161 &mov ($v0,&DWP(8,"esp")); # restore s1
162 &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
163
164 &mov ($v1,$v0);
165 &and ($v0,0xFF);
166 &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
167 &movz ($v0,&HB($v1));
168 &shr ($v1,16);
169 &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
170 &movz ($v0,&HB($v1));
171 &and ($v1,0xFF);
172 &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
173 &mov ($key,&DWP(12,"esp")); # reincarnate v1 as key
174 &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
175}
176
177sub encstep()
178{ my ($i,$te,@s) = @_;
179 my $tmp = $key;
180 my $out = $i==3?$s[0]:$acc;
181
182 # lines marked with #%e?x[i] denote "reordered" instructions...
183 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
184 else { &mov ($out,$s[0]);
185 &and ($out,0xFF); }
186 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
187 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
188 &mov ($out,&DWP(0,$te,$out,8));
189
190 if ($i==3) { $tmp=$s[1]; }##%eax
191 &movz ($tmp,&HB($s[1]));
192 &xor ($out,&DWP(3,$te,$tmp,8));
193
194 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
195 else { &mov ($tmp,$s[2]);
196 &shr ($tmp,16); }
197 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
198 &and ($tmp,0xFF);
199 &xor ($out,&DWP(2,$te,$tmp,8));
200
201 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
202 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
203 else { &mov ($tmp,$s[3]);
204 &shr ($tmp,24) }
205 &xor ($out,&DWP(1,$te,$tmp,8));
206 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
207 if ($i==3) { &mov ($s[3],$acc); }
208 &comment();
209}
210
211sub enclast()
212{ my ($i,$te,@s)=@_;
213 my $tmp = $key;
214 my $out = $i==3?$s[0]:$acc;
215
216 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
217 else { &mov ($out,$s[0]); }
218 &and ($out,0xFF);
219 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
220 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
221 &mov ($out,&DWP(2,$te,$out,8));
222 &and ($out,0x000000ff);
223
224 if ($i==3) { $tmp=$s[1]; }##%eax
225 &movz ($tmp,&HB($s[1]));
226 &mov ($tmp,&DWP(0,$te,$tmp,8));
227 &and ($tmp,0x0000ff00);
228 &xor ($out,$tmp);
229
230 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
231 else { mov ($tmp,$s[2]);
232 &shr ($tmp,16); }
233 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
234 &and ($tmp,0xFF);
235 &mov ($tmp,&DWP(0,$te,$tmp,8));
236 &and ($tmp,0x00ff0000);
237 &xor ($out,$tmp);
238
239 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
240 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
241 else { &mov ($tmp,$s[3]);
242 &shr ($tmp,24); }
243 &mov ($tmp,&DWP(2,$te,$tmp,8));
244 &and ($tmp,0xff000000);
245 &xor ($out,$tmp);
246 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
247 if ($i==3) { &mov ($s[3],$acc); }
248}
249
250sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
251
252&public_label("AES_Te");
253&function_begin_C("_x86_AES_encrypt");
254 if ($vertical_spin) {
255 # I need high parts of volatile registers to be accessible...
256 &exch ($s1="edi",$key="ebx");
257 &mov ($s2="esi",$acc="ecx");
258 }
259
260 # note that caller is expected to allocate stack frame for me!
261 &mov (&DWP(12,"esp"),$key); # save key
262
263 &xor ($s0,&DWP(0,$key)); # xor with key
264 &xor ($s1,&DWP(4,$key));
265 &xor ($s2,&DWP(8,$key));
266 &xor ($s3,&DWP(12,$key));
267
268 &mov ($acc,&DWP(240,$key)); # load key->rounds
269
270 if ($small_footprint) {
271 &lea ($acc,&DWP(-2,$acc,$acc));
272 &lea ($acc,&DWP(0,$key,$acc,8));
273 &mov (&DWP(16,"esp"),$acc); # end of key schedule
274 &align (4);
275 &set_label("loop");
276 if ($vertical_spin) {
277 &encvert("ebp",$s0,$s1,$s2,$s3);
278 } else {
279 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
280 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
281 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
282 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
283 }
284 &add ($key,16); # advance rd_key
285 &xor ($s0,&DWP(0,$key));
286 &xor ($s1,&DWP(4,$key));
287 &xor ($s2,&DWP(8,$key));
288 &xor ($s3,&DWP(12,$key));
289 &cmp ($key,&DWP(16,"esp"));
290 &mov (&DWP(12,"esp"),$key);
291 &jb (&label("loop"));
292 }
293 else {
294 &cmp ($acc,10);
295 &jle (&label("10rounds"));
296 &cmp ($acc,12);
297 &jle (&label("12rounds"));
298
299 &set_label("14rounds");
300 for ($i=1;$i<3;$i++) {
301 if ($vertical_spin) {
302 &encvert("ebp",$s0,$s1,$s2,$s3);
303 } else {
304 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
305 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
306 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
307 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
308 }
309 &xor ($s0,&DWP(16*$i+0,$key));
310 &xor ($s1,&DWP(16*$i+4,$key));
311 &xor ($s2,&DWP(16*$i+8,$key));
312 &xor ($s3,&DWP(16*$i+12,$key));
313 }
314 &add ($key,32);
315 &mov (&DWP(12,"esp"),$key); # advance rd_key
316 &set_label("12rounds");
317 for ($i=1;$i<3;$i++) {
318 if ($vertical_spin) {
319 &encvert("ebp",$s0,$s1,$s2,$s3);
320 } else {
321 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
322 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
323 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
324 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
325 }
326 &xor ($s0,&DWP(16*$i+0,$key));
327 &xor ($s1,&DWP(16*$i+4,$key));
328 &xor ($s2,&DWP(16*$i+8,$key));
329 &xor ($s3,&DWP(16*$i+12,$key));
330 }
331 &add ($key,32);
332 &mov (&DWP(12,"esp"),$key); # advance rd_key
333 &set_label("10rounds");
334 for ($i=1;$i<10;$i++) {
335 if ($vertical_spin) {
336 &encvert("ebp",$s0,$s1,$s2,$s3);
337 } else {
338 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
339 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
340 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
341 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
342 }
343 &xor ($s0,&DWP(16*$i+0,$key));
344 &xor ($s1,&DWP(16*$i+4,$key));
345 &xor ($s2,&DWP(16*$i+8,$key));
346 &xor ($s3,&DWP(16*$i+12,$key));
347 }
348 }
349
350 if ($vertical_spin) {
351 # "reincarnate" some registers for "horizontal" spin...
352 &mov ($s1="ebx",$key="edi");
353 &mov ($s2="ecx",$acc="esi");
354 }
355 &enclast(0,"ebp",$s0,$s1,$s2,$s3);
356 &enclast(1,"ebp",$s1,$s2,$s3,$s0);
357 &enclast(2,"ebp",$s2,$s3,$s0,$s1);
358 &enclast(3,"ebp",$s3,$s0,$s1,$s2);
359
360 &add ($key,$small_footprint?16:160);
361 &xor ($s0,&DWP(0,$key));
362 &xor ($s1,&DWP(4,$key));
363 &xor ($s2,&DWP(8,$key));
364 &xor ($s3,&DWP(12,$key));
365
366 &ret ();
367
368&set_label("AES_Te",64); # Yes! I keep it in the code segment!
369 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
370 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
371 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
372 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
373 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
374 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
375 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
376 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
377 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
378 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
379 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
380 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
381 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
382 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
383 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
384 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
385 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
386 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
387 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
388 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
389 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
390 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
391 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
392 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
393 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
394 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
395 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
396 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
397 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
398 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
399 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
400 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
401 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
402 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
403 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
404 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
405 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
406 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
407 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
408 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
409 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
410 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
411 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
412 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
413 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
414 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
415 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
416 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
417 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
418 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
419 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
420 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
421 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
422 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
423 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
424 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
425 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
426 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
427 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
428 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
429 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
430 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
431 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
432 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
433#rcon:
434 &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008);
435 &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
436 &data_word(0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0);
437&function_end_B("_x86_AES_encrypt");
438
439# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
440&public_label("AES_Te");
441&function_begin("AES_encrypt");
442 &mov ($acc,&wparam(0)); # load inp
443 &mov ($key,&wparam(2)); # load key
444
445 &mov ($s0,"esp");
446 &sub ("esp",24);
447 &and ("esp",-64);
448 &add ("esp",4);
449 &mov (&DWP(16,"esp"),$s0);
450
451 &call (&label("pic_point")); # make it PIC!
452 &set_label("pic_point");
453 &blindpop("ebp");
454 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
455
456 &mov ($s0,&DWP(0,$acc)); # load input data
457 &mov ($s1,&DWP(4,$acc));
458 &mov ($s2,&DWP(8,$acc));
459 &mov ($s3,&DWP(12,$acc));
460
461 &call ("_x86_AES_encrypt");
462
463 &mov ("esp",&DWP(16,"esp"));
464
465 &mov ($acc,&wparam(1)); # load out
466 &mov (&DWP(0,$acc),$s0); # write output data
467 &mov (&DWP(4,$acc),$s1);
468 &mov (&DWP(8,$acc),$s2);
469 &mov (&DWP(12,$acc),$s3);
470&function_end("AES_encrypt");
471
472#------------------------------------------------------------------#
473
474sub decstep()
475{ my ($i,$td,@s) = @_;
476 my $tmp = $key;
477 my $out = $i==3?$s[0]:$acc;
478
479 # no instructions are reordered, as performance appears
480 # optimal... or rather that all attempts to reorder didn't
481 # result in better performance [which by the way is not a
482 # bit lower than ecryption].
483 if($i==3) { &mov ($key,&DWP(12,"esp")); }
484 else { &mov ($out,$s[0]); }
485 &and ($out,0xFF);
486 &mov ($out,&DWP(0,$td,$out,8));
487
488 if ($i==3) { $tmp=$s[1]; }
489 &movz ($tmp,&HB($s[1]));
490 &xor ($out,&DWP(3,$td,$tmp,8));
491
492 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
493 else { &mov ($tmp,$s[2]); }
494 &shr ($tmp,16);
495 &and ($tmp,0xFF);
496 &xor ($out,&DWP(2,$td,$tmp,8));
497
498 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
499 else { &mov ($tmp,$s[3]); }
500 &shr ($tmp,24);
501 &xor ($out,&DWP(1,$td,$tmp,8));
502 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
503 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
504 &comment();
505}
506
507sub declast()
508{ my ($i,$td,@s)=@_;
509 my $tmp = $key;
510 my $out = $i==3?$s[0]:$acc;
511
512 if($i==3) { &mov ($key,&DWP(12,"esp")); }
513 else { &mov ($out,$s[0]); }
514 &and ($out,0xFF);
515 &movz ($out,&BP(2048,$td,$out,1));
516
517 if ($i==3) { $tmp=$s[1]; }
518 &movz ($tmp,&HB($s[1]));
519 &movz ($tmp,&BP(2048,$td,$tmp,1));
520 &shl ($tmp,8);
521 &xor ($out,$tmp);
522
523 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
524 else { mov ($tmp,$s[2]); }
525 &shr ($tmp,16);
526 &and ($tmp,0xFF);
527 &movz ($tmp,&BP(2048,$td,$tmp,1));
528 &shl ($tmp,16);
529 &xor ($out,$tmp);
530
531 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
532 else { &mov ($tmp,$s[3]); }
533 &shr ($tmp,24);
534 &movz ($tmp,&BP(2048,$td,$tmp,1));
535 &shl ($tmp,24);
536 &xor ($out,$tmp);
537 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
538 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
539}
540
541&public_label("AES_Td");
542&function_begin_C("_x86_AES_decrypt");
543 # note that caller is expected to allocate stack frame for me!
544 &mov (&DWP(12,"esp"),$key); # save key
545
546 &xor ($s0,&DWP(0,$key)); # xor with key
547 &xor ($s1,&DWP(4,$key));
548 &xor ($s2,&DWP(8,$key));
549 &xor ($s3,&DWP(12,$key));
550
551 &mov ($acc,&DWP(240,$key)); # load key->rounds
552
553 if ($small_footprint) {
554 &lea ($acc,&DWP(-2,$acc,$acc));
555 &lea ($acc,&DWP(0,$key,$acc,8));
556 &mov (&DWP(16,"esp"),$acc); # end of key schedule
557 &align (4);
558 &set_label("loop");
559 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
560 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
561 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
562 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
563 &add ($key,16); # advance rd_key
564 &xor ($s0,&DWP(0,$key));
565 &xor ($s1,&DWP(4,$key));
566 &xor ($s2,&DWP(8,$key));
567 &xor ($s3,&DWP(12,$key));
568 &cmp ($key,&DWP(16,"esp"));
569 &mov (&DWP(12,"esp"),$key);
570 &jb (&label("loop"));
571 }
572 else {
573 &cmp ($acc,10);
574 &jle (&label("10rounds"));
575 &cmp ($acc,12);
576 &jle (&label("12rounds"));
577
578 &set_label("14rounds");
579 for ($i=1;$i<3;$i++) {
580 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
581 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
582 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
583 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
584 &xor ($s0,&DWP(16*$i+0,$key));
585 &xor ($s1,&DWP(16*$i+4,$key));
586 &xor ($s2,&DWP(16*$i+8,$key));
587 &xor ($s3,&DWP(16*$i+12,$key));
588 }
589 &add ($key,32);
590 &mov (&DWP(12,"esp"),$key); # advance rd_key
591 &set_label("12rounds");
592 for ($i=1;$i<3;$i++) {
593 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
594 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
595 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
596 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
597 &xor ($s0,&DWP(16*$i+0,$key));
598 &xor ($s1,&DWP(16*$i+4,$key));
599 &xor ($s2,&DWP(16*$i+8,$key));
600 &xor ($s3,&DWP(16*$i+12,$key));
601 }
602 &add ($key,32);
603 &mov (&DWP(12,"esp"),$key); # advance rd_key
604 &set_label("10rounds");
605 for ($i=1;$i<10;$i++) {
606 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
607 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
608 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
609 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
610 &xor ($s0,&DWP(16*$i+0,$key));
611 &xor ($s1,&DWP(16*$i+4,$key));
612 &xor ($s2,&DWP(16*$i+8,$key));
613 &xor ($s3,&DWP(16*$i+12,$key));
614 }
615 }
616
617 &declast(0,"ebp",$s0,$s3,$s2,$s1);
618 &declast(1,"ebp",$s1,$s0,$s3,$s2);
619 &declast(2,"ebp",$s2,$s1,$s0,$s3);
620 &declast(3,"ebp",$s3,$s2,$s1,$s0);
621
622 &add ($key,$small_footprint?16:160);
623 &xor ($s0,&DWP(0,$key));
624 &xor ($s1,&DWP(4,$key));
625 &xor ($s2,&DWP(8,$key));
626 &xor ($s3,&DWP(12,$key));
627
628 &ret ();
629
630&set_label("AES_Td",64); # Yes! I keep it in the code segment!
631 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
632 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
633 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
634 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
635 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
636 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
637 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
638 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
639 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
640 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
641 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
642 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
643 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
644 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
645 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
646 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
647 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
648 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
649 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
650 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
651 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
652 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
653 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
654 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
655 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
656 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
657 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
658 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
659 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
660 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
661 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
662 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
663 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
664 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
665 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
666 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
667 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
668 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
669 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
670 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
671 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
672 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
673 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
674 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
675 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
676 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
677 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
678 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
679 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
680 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
681 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
682 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
683 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
684 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
685 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
686 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
687 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
688 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
689 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
690 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
691 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
692 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
693 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
694 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
695#Td4:
696 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
697 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
698 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
699 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
700 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
701 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
702 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
703 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
704 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
705 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
706 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
707 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
708 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
709 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
710 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
711 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
712 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
713 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
714 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
715 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
716 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
717 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
718 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
719 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
720 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
721 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
722 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
723 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
724 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
725 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
726 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
727 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
728&function_end_B("_x86_AES_decrypt");
729
730# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
731&public_label("AES_Td");
732&function_begin("AES_decrypt");
733 &mov ($acc,&wparam(0)); # load inp
734 &mov ($key,&wparam(2)); # load key
735
736 &mov ($s0,"esp");
737 &sub ("esp",24);
738 &and ("esp",-64);
739 &add ("esp",4);
740 &mov (&DWP(16,"esp"),$s0);
741
742 &call (&label("pic_point")); # make it PIC!
743 &set_label("pic_point");
744 &blindpop("ebp");
745 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
746
747 # prefetch Td4
748 &lea ("ebp",&DWP(2048+128,"ebp"));
749 &mov ($s0,&DWP(0-128,"ebp"));
750 &mov ($s1,&DWP(32-128,"ebp"));
751 &mov ($s2,&DWP(64-128,"ebp"));
752 &mov ($s3,&DWP(96-128,"ebp"));
753 &mov ($s0,&DWP(128-128,"ebp"));
754 &mov ($s1,&DWP(160-128,"ebp"));
755 &mov ($s2,&DWP(192-128,"ebp"));
756 &mov ($s3,&DWP(224-128,"ebp"));
757 &lea ("ebp",&DWP(-2048-128,"ebp"));
758
759 &mov ($s0,&DWP(0,$acc)); # load input data
760 &mov ($s1,&DWP(4,$acc));
761 &mov ($s2,&DWP(8,$acc));
762 &mov ($s3,&DWP(12,$acc));
763
764 &call ("_x86_AES_decrypt");
765
766 &mov ("esp",&DWP(16,"esp"));
767
768 &mov ($acc,&wparam(1)); # load out
769 &mov (&DWP(0,$acc),$s0); # write output data
770 &mov (&DWP(4,$acc),$s1);
771 &mov (&DWP(8,$acc),$s2);
772 &mov (&DWP(12,$acc),$s3);
773&function_end("AES_decrypt");
774
775# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
776# size_t length, const AES_KEY *key,
777# unsigned char *ivp,const int enc);
778{
779# stack frame layout
780# -4(%esp) 0(%esp) return address
781# 0(%esp) 4(%esp) tmp1
782# 4(%esp) 8(%esp) tmp2
783# 8(%esp) 12(%esp) key
784# 12(%esp) 16(%esp) end of key schedule
785my $_esp=&DWP(16,"esp"); #saved %esp
786my $_inp=&DWP(20,"esp"); #copy of wparam(0)
787my $_out=&DWP(24,"esp"); #copy of wparam(1)
788my $_len=&DWP(28,"esp"); #copy of wparam(2)
789my $_key=&DWP(32,"esp"); #copy of wparam(3)
790my $_ivp=&DWP(36,"esp"); #copy of wparam(4)
791my $_tmp=&DWP(40,"esp"); #volatile variable
792my $ivec=&DWP(44,"esp"); #ivec[16]
793my $aes_key=&DWP(60,"esp"); #copy of aes_key
794my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds
795
796&public_label("AES_Te");
797&public_label("AES_Td");
798&function_begin("AES_cbc_encrypt");
799 &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
800 &cmp ($s2,0);
801 &je (&label("enc_out"));
802
803 &call (&label("pic_point")); # make it PIC!
804 &set_label("pic_point");
805 &blindpop("ebp");
806
807 &pushf ();
808 &cld ();
809
810 &cmp (&wparam(5),0);
811 &je (&label("DECRYPT"));
812
813 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
814
815 # allocate aligned stack frame...
816 &lea ($key,&DWP(-64-244,"esp"));
817 &and ($key,-64);
818
819 # ... and make sure it doesn't alias with AES_Te modulo 4096
820 &mov ($s0,"ebp");
821 &lea ($s1,&DWP(2048,"ebp"));
822 &mov ($s3,$key);
823 &and ($s0,0xfff); # s = %ebp&0xfff
824 &and ($s1,0xfff); # e = (%ebp+2048)&0xfff
825 &and ($s3,0xfff); # p = %esp&0xfff
826
827 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
828 &jb (&label("te_break_out"));
829 &sub ($s3,$s1);
830 &sub ($key,$s3);
831 &jmp (&label("te_ok"));
832 &set_label("te_break_out"); # else %esp -= (p-s)&0xfff + framesz;
833 &sub ($s3,$s0);
834 &and ($s3,0xfff);
835 &add ($s3,64+256);
836 &sub ($key,$s3);
837 &align (4);
838 &set_label("te_ok");
839
840 &mov ($s0,&wparam(0)); # load inp
841 &mov ($s1,&wparam(1)); # load out
842 &mov ($s3,&wparam(3)); # load key
843 &mov ($acc,&wparam(4)); # load ivp
844
845 &exch ("esp",$key);
846 &add ("esp",4); # reserve for return address!
847 &mov ($_esp,$key); # save %esp
848
849 &mov ($_inp,$s0); # save copy of inp
850 &mov ($_out,$s1); # save copy of out
851 &mov ($_len,$s2); # save copy of len
852 &mov ($_key,$s3); # save copy of key
853 &mov ($_ivp,$acc); # save copy of ivp
854
855 &mov ($mark,0); # copy of aes_key->rounds = 0;
856 if ($compromise) {
857 &cmp ($s2,$compromise);
858 &jb (&label("skip_ecopy"));
859 }
860 # do we copy key schedule to stack?
861 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
862 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
863 &sub ($s1,"ebp");
864 &mov ("esi",$s3);
865 &and ($s1,0xfff);
866 &lea ("edi",$aes_key);
867 &cmp ($s1,2048);
868 &jb (&label("do_ecopy"));
869 &cmp ($s1,4096-244);
870 &jb (&label("skip_ecopy"));
871 &align (4);
872 &set_label("do_ecopy");
873 &mov ($_key,"edi");
874 &data_word(0xA5F3F689); # rep movsd
875 &set_label("skip_ecopy");
876
877 &mov ($acc,$s0);
878 &mov ($key,16);
879 &align (4);
880 &set_label("prefetch_te");
881 &mov ($s0,&DWP(0,"ebp"));
882 &mov ($s1,&DWP(32,"ebp"));
883 &mov ($s2,&DWP(64,"ebp"));
884 &mov ($s3,&DWP(96,"ebp"));
885 &lea ("ebp",&DWP(128,"ebp"));
886 &dec ($key);
887 &jnz (&label("prefetch_te"));
888 &sub ("ebp",2048);
889
890 &mov ($s2,$_len);
891 &mov ($key,$_ivp);
892 &test ($s2,0xFFFFFFF0);
893 &jz (&label("enc_tail")); # short input...
894
895 &mov ($s0,&DWP(0,$key)); # load iv
896 &mov ($s1,&DWP(4,$key));
897
898 &align (4);
899 &set_label("enc_loop");
900 &mov ($s2,&DWP(8,$key));
901 &mov ($s3,&DWP(12,$key));
902
903 &xor ($s0,&DWP(0,$acc)); # xor input data
904 &xor ($s1,&DWP(4,$acc));
905 &xor ($s2,&DWP(8,$acc));
906 &xor ($s3,&DWP(12,$acc));
907
908 &mov ($key,$_key); # load key
909 &call ("_x86_AES_encrypt");
910
911 &mov ($acc,$_inp); # load inp
912 &mov ($key,$_out); # load out
913
914 &mov (&DWP(0,$key),$s0); # save output data
915 &mov (&DWP(4,$key),$s1);
916 &mov (&DWP(8,$key),$s2);
917 &mov (&DWP(12,$key),$s3);
918
919 &mov ($s2,$_len); # load len
920
921 &lea ($acc,&DWP(16,$acc));
922 &mov ($_inp,$acc); # save inp
923
924 &lea ($s3,&DWP(16,$key));
925 &mov ($_out,$s3); # save out
926
927 &sub ($s2,16);
928 &test ($s2,0xFFFFFFF0);
929 &mov ($_len,$s2); # save len
930 &jnz (&label("enc_loop"));
931 &test ($s2,15);
932 &jnz (&label("enc_tail"));
933 &mov ($acc,$_ivp); # load ivp
934 &mov ($s2,&DWP(8,$key)); # restore last dwords
935 &mov ($s3,&DWP(12,$key));
936 &mov (&DWP(0,$acc),$s0); # save ivec
937 &mov (&DWP(4,$acc),$s1);
938 &mov (&DWP(8,$acc),$s2);
939 &mov (&DWP(12,$acc),$s3);
940
941 &cmp ($mark,0); # was the key schedule copied?
942 &mov ("edi",$_key);
943 &je (&label("skip_ezero"));
944 # zero copy of key schedule
945 &mov ("ecx",240/4);
946 &xor ("eax","eax");
947 &align (4);
948 &data_word(0xABF3F689); # rep stosd
949 &set_label("skip_ezero")
950 &mov ("esp",$_esp);
951 &popf ();
952 &set_label("enc_out");
953 &function_end_A();
954 &pushf (); # kludge, never executed
955
956 &align (4);
957 &set_label("enc_tail");
958 &mov ($s0,$key eq "edi" ? $key : "");
959 &mov ($key,$_out); # load out
960 &push ($s0); # push ivp
961 &mov ($s1,16);
962 &sub ($s1,$s2);
963 &cmp ($key,$acc); # compare with inp
964 &je (&label("enc_in_place"));
965 &align (4);
966 &data_word(0xA4F3F689); # rep movsb # copy input
967 &jmp (&label("enc_skip_in_place"));
968 &set_label("enc_in_place");
969 &lea ($key,&DWP(0,$key,$s2));
970 &set_label("enc_skip_in_place");
971 &mov ($s2,$s1);
972 &xor ($s0,$s0);
973 &align (4);
974 &data_word(0xAAF3F689); # rep stosb # zero tail
975 &pop ($key); # pop ivp
976
977 &mov ($acc,$_out); # output as input
978 &mov ($s0,&DWP(0,$key));
979 &mov ($s1,&DWP(4,$key));
980 &mov ($_len,16); # len=16
981 &jmp (&label("enc_loop")); # one more spin...
982
983#----------------------------- DECRYPT -----------------------------#
984&align (4);
985&set_label("DECRYPT");
986 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
987
988 # allocate aligned stack frame...
989 &lea ($key,&DWP(-64-244,"esp"));
990 &and ($key,-64);
991
992 # ... and make sure it doesn't alias with AES_Td modulo 4096
993 &mov ($s0,"ebp");
994 &lea ($s1,&DWP(2048+256,"ebp"));
995 &mov ($s3,$key);
996 &and ($s0,0xfff); # s = %ebp&0xfff
997 &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff
998 &and ($s3,0xfff); # p = %esp&0xfff
999
1000 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
1001 &jb (&label("td_break_out"));
1002 &sub ($s3,$s1);
1003 &sub ($key,$s3);
1004 &jmp (&label("td_ok"));
1005 &set_label("td_break_out"); # else %esp -= (p-s)&0xfff + framesz;
1006 &sub ($s3,$s0);
1007 &and ($s3,0xfff);
1008 &add ($s3,64+256);
1009 &sub ($key,$s3);
1010 &align (4);
1011 &set_label("td_ok");
1012
1013 &mov ($s0,&wparam(0)); # load inp
1014 &mov ($s1,&wparam(1)); # load out
1015 &mov ($s3,&wparam(3)); # load key
1016 &mov ($acc,&wparam(4)); # load ivp
1017
1018 &exch ("esp",$key);
1019 &add ("esp",4); # reserve for return address!
1020 &mov ($_esp,$key); # save %esp
1021
1022 &mov ($_inp,$s0); # save copy of inp
1023 &mov ($_out,$s1); # save copy of out
1024 &mov ($_len,$s2); # save copy of len
1025 &mov ($_key,$s3); # save copy of key
1026 &mov ($_ivp,$acc); # save copy of ivp
1027
1028 &mov ($mark,0); # copy of aes_key->rounds = 0;
1029 if ($compromise) {
1030 &cmp ($s2,$compromise);
1031 &jb (&label("skip_dcopy"));
1032 }
1033 # do we copy key schedule to stack?
1034 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
1035 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
1036 &sub ($s1,"ebp");
1037 &mov ("esi",$s3);
1038 &and ($s1,0xfff);
1039 &lea ("edi",$aes_key);
1040 &cmp ($s1,2048+256);
1041 &jb (&label("do_dcopy"));
1042 &cmp ($s1,4096-244);
1043 &jb (&label("skip_dcopy"));
1044 &align (4);
1045 &set_label("do_dcopy");
1046 &mov ($_key,"edi");
1047 &data_word(0xA5F3F689); # rep movsd
1048 &set_label("skip_dcopy");
1049
1050 &mov ($acc,$s0);
1051 &mov ($key,18);
1052 &align (4);
1053 &set_label("prefetch_td");
1054 &mov ($s0,&DWP(0,"ebp"));
1055 &mov ($s1,&DWP(32,"ebp"));
1056 &mov ($s2,&DWP(64,"ebp"));
1057 &mov ($s3,&DWP(96,"ebp"));
1058 &lea ("ebp",&DWP(128,"ebp"));
1059 &dec ($key);
1060 &jnz (&label("prefetch_td"));
1061 &sub ("ebp",2048+256);
1062
1063 &cmp ($acc,$_out);
1064 &je (&label("dec_in_place")); # in-place processing...
1065
1066 &mov ($key,$_ivp); # load ivp
1067 &mov ($_tmp,$key);
1068
1069 &align (4);
1070 &set_label("dec_loop");
1071 &mov ($s0,&DWP(0,$acc)); # read input
1072 &mov ($s1,&DWP(4,$acc));
1073 &mov ($s2,&DWP(8,$acc));
1074 &mov ($s3,&DWP(12,$acc));
1075
1076 &mov ($key,$_key); # load key
1077 &call ("_x86_AES_decrypt");
1078
1079 &mov ($key,$_tmp); # load ivp
1080 &mov ($acc,$_len); # load len
1081 &xor ($s0,&DWP(0,$key)); # xor iv
1082 &xor ($s1,&DWP(4,$key));
1083 &xor ($s2,&DWP(8,$key));
1084 &xor ($s3,&DWP(12,$key));
1085
1086 &sub ($acc,16);
1087 &jc (&label("dec_partial"));
1088 &mov ($_len,$acc); # save len
1089 &mov ($acc,$_inp); # load inp
1090 &mov ($key,$_out); # load out
1091
1092 &mov (&DWP(0,$key),$s0); # write output
1093 &mov (&DWP(4,$key),$s1);
1094 &mov (&DWP(8,$key),$s2);
1095 &mov (&DWP(12,$key),$s3);
1096
1097 &mov ($_tmp,$acc); # save ivp
1098 &lea ($acc,&DWP(16,$acc));
1099 &mov ($_inp,$acc); # save inp
1100
1101 &lea ($key,&DWP(16,$key));
1102 &mov ($_out,$key); # save out
1103
1104 &jnz (&label("dec_loop"));
1105 &mov ($key,$_tmp); # load temp ivp
1106 &set_label("dec_end");
1107 &mov ($acc,$_ivp); # load user ivp
1108 &mov ($s0,&DWP(0,$key)); # load iv
1109 &mov ($s1,&DWP(4,$key));
1110 &mov ($s2,&DWP(8,$key));
1111 &mov ($s3,&DWP(12,$key));
1112 &mov (&DWP(0,$acc),$s0); # copy back to user
1113 &mov (&DWP(4,$acc),$s1);
1114 &mov (&DWP(8,$acc),$s2);
1115 &mov (&DWP(12,$acc),$s3);
1116 &jmp (&label("dec_out"));
1117
1118 &align (4);
1119 &set_label("dec_partial");
1120 &lea ($key,$ivec);
1121 &mov (&DWP(0,$key),$s0); # dump output to stack
1122 &mov (&DWP(4,$key),$s1);
1123 &mov (&DWP(8,$key),$s2);
1124 &mov (&DWP(12,$key),$s3);
1125 &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc));
1126 &mov ($acc eq "esi" ? $acc : "",$key);
1127 &mov ($key eq "edi" ? $key : "",$_out); # load out
1128 &data_word(0xA4F3F689); # rep movsb # copy output
1129 &mov ($key,$_inp); # use inp as temp ivp
1130 &jmp (&label("dec_end"));
1131
1132 &align (4);
1133 &set_label("dec_in_place");
1134 &set_label("dec_in_place_loop");
1135 &lea ($key,$ivec);
1136 &mov ($s0,&DWP(0,$acc)); # read input
1137 &mov ($s1,&DWP(4,$acc));
1138 &mov ($s2,&DWP(8,$acc));
1139 &mov ($s3,&DWP(12,$acc));
1140
1141 &mov (&DWP(0,$key),$s0); # copy to temp
1142 &mov (&DWP(4,$key),$s1);
1143 &mov (&DWP(8,$key),$s2);
1144 &mov (&DWP(12,$key),$s3);
1145
1146 &mov ($key,$_key); # load key
1147 &call ("_x86_AES_decrypt");
1148
1149 &mov ($key,$_ivp); # load ivp
1150 &mov ($acc,$_out); # load out
1151 &xor ($s0,&DWP(0,$key)); # xor iv
1152 &xor ($s1,&DWP(4,$key));
1153 &xor ($s2,&DWP(8,$key));
1154 &xor ($s3,&DWP(12,$key));
1155
1156 &mov (&DWP(0,$acc),$s0); # write output
1157 &mov (&DWP(4,$acc),$s1);
1158 &mov (&DWP(8,$acc),$s2);
1159 &mov (&DWP(12,$acc),$s3);
1160
1161 &lea ($acc,&DWP(16,$acc));
1162 &mov ($_out,$acc); # save out
1163
1164 &lea ($acc,$ivec);
1165 &mov ($s0,&DWP(0,$acc)); # read temp
1166 &mov ($s1,&DWP(4,$acc));
1167 &mov ($s2,&DWP(8,$acc));
1168 &mov ($s3,&DWP(12,$acc));
1169
1170 &mov (&DWP(0,$key),$s0); # copy iv
1171 &mov (&DWP(4,$key),$s1);
1172 &mov (&DWP(8,$key),$s2);
1173 &mov (&DWP(12,$key),$s3);
1174
1175 &mov ($acc,$_inp); # load inp
1176
1177 &lea ($acc,&DWP(16,$acc));
1178 &mov ($_inp,$acc); # save inp
1179
1180 &mov ($s2,$_len); # load len
1181 &sub ($s2,16);
1182 &jc (&label("dec_in_place_partial"));
1183 &mov ($_len,$s2); # save len
1184 &jnz (&label("dec_in_place_loop"));
1185 &jmp (&label("dec_out"));
1186
1187 &align (4);
1188 &set_label("dec_in_place_partial");
1189 # one can argue if this is actually required...
1190 &mov ($key eq "edi" ? $key : "",$_out);
1191 &lea ($acc eq "esi" ? $acc : "",$ivec);
1192 &lea ($key,&DWP(0,$key,$s2));
1193 &lea ($acc,&DWP(16,$acc,$s2));
1194 &neg ($s2 eq "ecx" ? $s2 : "");
1195 &data_word(0xA4F3F689); # rep movsb # restore tail
1196
1197 &align (4);
1198 &set_label("dec_out");
1199 &cmp ($mark,0); # was the key schedule copied?
1200 &mov ("edi",$_key);
1201 &je (&label("skip_dzero"));
1202 # zero copy of key schedule
1203 &mov ("ecx",240/4);
1204 &xor ("eax","eax");
1205 &align (4);
1206 &data_word(0xABF3F689); # rep stosd
1207 &set_label("skip_dzero")
1208 &mov ("esp",$_esp);
1209 &popf ();
1210&function_end("AES_cbc_encrypt");
1211}
1212
1213#------------------------------------------------------------------#
1214
1215sub enckey()
1216{
1217 &movz ("esi",&LB("edx")); # rk[i]>>0
1218 &mov ("ebx",&DWP(2,"ebp","esi",8));
1219 &movz ("esi",&HB("edx")); # rk[i]>>8
1220 &and ("ebx",0xFF000000);
1221 &xor ("eax","ebx");
1222
1223 &mov ("ebx",&DWP(2,"ebp","esi",8));
1224 &shr ("edx",16);
1225 &and ("ebx",0x000000FF);
1226 &movz ("esi",&LB("edx")); # rk[i]>>16
1227 &xor ("eax","ebx");
1228
1229 &mov ("ebx",&DWP(0,"ebp","esi",8));
1230 &movz ("esi",&HB("edx")); # rk[i]>>24
1231 &and ("ebx",0x0000FF00);
1232 &xor ("eax","ebx");
1233
1234 &mov ("ebx",&DWP(0,"ebp","esi",8));
1235 &and ("ebx",0x00FF0000);
1236 &xor ("eax","ebx");
1237
1238 &xor ("eax",&DWP(2048,"ebp","ecx",4)); # rcon
1239}
1240
1241# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1242# AES_KEY *key)
1243&public_label("AES_Te");
1244&function_begin("AES_set_encrypt_key", "", "_x86_AES_set_encrypt_key");
1245 &mov ("esi",&wparam(0)); # user supplied key
1246 &mov ("edi",&wparam(2)); # private key schedule
1247
1248 &test ("esi",-1);
1249 &jz (&label("badpointer"));
1250 &test ("edi",-1);
1251 &jz (&label("badpointer"));
1252
1253 &call (&label("pic_point"));
1254 &set_label("pic_point");
1255 &blindpop("ebp");
1256 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1257
1258 &mov ("ecx",&wparam(1)); # number of bits in key
1259 &cmp ("ecx",128);
1260 &je (&label("10rounds"));
1261 &cmp ("ecx",192);
1262 &je (&label("12rounds"));
1263 &cmp ("ecx",256);
1264 &je (&label("14rounds"));
1265 &mov ("eax",-2); # invalid number of bits
1266 &jmp (&label("exit"));
1267
1268 &set_label("10rounds");
1269 &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords
1270 &mov ("ebx",&DWP(4,"esi"));
1271 &mov ("ecx",&DWP(8,"esi"));
1272 &mov ("edx",&DWP(12,"esi"));
1273 &mov (&DWP(0,"edi"),"eax");
1274 &mov (&DWP(4,"edi"),"ebx");
1275 &mov (&DWP(8,"edi"),"ecx");
1276 &mov (&DWP(12,"edi"),"edx");
1277
1278 &xor ("ecx","ecx");
1279 &jmp (&label("10shortcut"));
1280
1281 &align (4);
1282 &set_label("10loop");
1283 &mov ("eax",&DWP(0,"edi")); # rk[0]
1284 &mov ("edx",&DWP(12,"edi")); # rk[3]
1285 &set_label("10shortcut");
1286 &enckey ();
1287
1288 &mov (&DWP(16,"edi"),"eax"); # rk[4]
1289 &xor ("eax",&DWP(4,"edi"));
1290 &mov (&DWP(20,"edi"),"eax"); # rk[5]
1291 &xor ("eax",&DWP(8,"edi"));
1292 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1293 &xor ("eax",&DWP(12,"edi"));
1294 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1295 &inc ("ecx");
1296 &add ("edi",16);
1297 &cmp ("ecx",10);
1298 &jl (&label("10loop"));
1299
1300 &mov (&DWP(80,"edi"),10); # setup number of rounds
1301 &xor ("eax","eax");
1302 &jmp (&label("exit"));
1303
1304 &set_label("12rounds");
1305 &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords
1306 &mov ("ebx",&DWP(4,"esi"));
1307 &mov ("ecx",&DWP(8,"esi"));
1308 &mov ("edx",&DWP(12,"esi"));
1309 &mov (&DWP(0,"edi"),"eax");
1310 &mov (&DWP(4,"edi"),"ebx");
1311 &mov (&DWP(8,"edi"),"ecx");
1312 &mov (&DWP(12,"edi"),"edx");
1313 &mov ("ecx",&DWP(16,"esi"));
1314 &mov ("edx",&DWP(20,"esi"));
1315 &mov (&DWP(16,"edi"),"ecx");
1316 &mov (&DWP(20,"edi"),"edx");
1317
1318 &xor ("ecx","ecx");
1319 &jmp (&label("12shortcut"));
1320
1321 &align (4);
1322 &set_label("12loop");
1323 &mov ("eax",&DWP(0,"edi")); # rk[0]
1324 &mov ("edx",&DWP(20,"edi")); # rk[5]
1325 &set_label("12shortcut");
1326 &enckey ();
1327
1328 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1329 &xor ("eax",&DWP(4,"edi"));
1330 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1331 &xor ("eax",&DWP(8,"edi"));
1332 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1333 &xor ("eax",&DWP(12,"edi"));
1334 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1335
1336 &cmp ("ecx",7);
1337 &je (&label("12break"));
1338 &inc ("ecx");
1339
1340 &xor ("eax",&DWP(16,"edi"));
1341 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1342 &xor ("eax",&DWP(20,"edi"));
1343 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1344
1345 &add ("edi",24);
1346 &jmp (&label("12loop"));
1347
1348 &set_label("12break");
1349 &mov (&DWP(72,"edi"),12); # setup number of rounds
1350 &xor ("eax","eax");
1351 &jmp (&label("exit"));
1352
1353 &set_label("14rounds");
1354 &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords
1355 &mov ("ebx",&DWP(4,"esi"));
1356 &mov ("ecx",&DWP(8,"esi"));
1357 &mov ("edx",&DWP(12,"esi"));
1358 &mov (&DWP(0,"edi"),"eax");
1359 &mov (&DWP(4,"edi"),"ebx");
1360 &mov (&DWP(8,"edi"),"ecx");
1361 &mov (&DWP(12,"edi"),"edx");
1362 &mov ("eax",&DWP(16,"esi"));
1363 &mov ("ebx",&DWP(20,"esi"));
1364 &mov ("ecx",&DWP(24,"esi"));
1365 &mov ("edx",&DWP(28,"esi"));
1366 &mov (&DWP(16,"edi"),"eax");
1367 &mov (&DWP(20,"edi"),"ebx");
1368 &mov (&DWP(24,"edi"),"ecx");
1369 &mov (&DWP(28,"edi"),"edx");
1370
1371 &xor ("ecx","ecx");
1372 &jmp (&label("14shortcut"));
1373
1374 &align (4);
1375 &set_label("14loop");
1376 &mov ("edx",&DWP(28,"edi")); # rk[7]
1377 &set_label("14shortcut");
1378 &mov ("eax",&DWP(0,"edi")); # rk[0]
1379
1380 &enckey ();
1381
1382 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1383 &xor ("eax",&DWP(4,"edi"));
1384 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1385 &xor ("eax",&DWP(8,"edi"));
1386 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1387 &xor ("eax",&DWP(12,"edi"));
1388 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1389
1390 &cmp ("ecx",6);
1391 &je (&label("14break"));
1392 &inc ("ecx");
1393
1394 &mov ("edx","eax");
1395 &mov ("eax",&DWP(16,"edi")); # rk[4]
1396 &movz ("esi",&LB("edx")); # rk[11]>>0
1397 &mov ("ebx",&DWP(2,"ebp","esi",8));
1398 &movz ("esi",&HB("edx")); # rk[11]>>8
1399 &and ("ebx",0x000000FF);
1400 &xor ("eax","ebx");
1401
1402 &mov ("ebx",&DWP(0,"ebp","esi",8));
1403 &shr ("edx",16);
1404 &and ("ebx",0x0000FF00);
1405 &movz ("esi",&LB("edx")); # rk[11]>>16
1406 &xor ("eax","ebx");
1407
1408 &mov ("ebx",&DWP(0,"ebp","esi",8));
1409 &movz ("esi",&HB("edx")); # rk[11]>>24
1410 &and ("ebx",0x00FF0000);
1411 &xor ("eax","ebx");
1412
1413 &mov ("ebx",&DWP(2,"ebp","esi",8));
1414 &and ("ebx",0xFF000000);
1415 &xor ("eax","ebx");
1416
1417 &mov (&DWP(48,"edi"),"eax"); # rk[12]
1418 &xor ("eax",&DWP(20,"edi"));
1419 &mov (&DWP(52,"edi"),"eax"); # rk[13]
1420 &xor ("eax",&DWP(24,"edi"));
1421 &mov (&DWP(56,"edi"),"eax"); # rk[14]
1422 &xor ("eax",&DWP(28,"edi"));
1423 &mov (&DWP(60,"edi"),"eax"); # rk[15]
1424
1425 &add ("edi",32);
1426 &jmp (&label("14loop"));
1427
1428 &set_label("14break");
1429 &mov (&DWP(48,"edi"),14); # setup number of rounds
1430 &xor ("eax","eax");
1431 &jmp (&label("exit"));
1432
1433 &set_label("badpointer");
1434 &mov ("eax",-1);
1435 &set_label("exit");
1436&function_end("AES_set_encrypt_key");
1437
1438sub deckey()
1439{ my ($i,$ptr,$te,$td) = @_;
1440
1441 &mov ("eax",&DWP($i,$ptr));
1442 &mov ("edx","eax");
1443 &movz ("ebx",&HB("eax"));
1444 &shr ("edx",16);
1445 &and ("eax",0xFF);
1446 &movz ("eax",&BP(2,$te,"eax",8));
1447 &movz ("ebx",&BP(2,$te,"ebx",8));
1448 &mov ("eax",&DWP(0,$td,"eax",8));
1449 &xor ("eax",&DWP(3,$td,"ebx",8));
1450 &movz ("ebx",&HB("edx"));
1451 &and ("edx",0xFF);
1452 &movz ("edx",&BP(2,$te,"edx",8));
1453 &movz ("ebx",&BP(2,$te,"ebx",8));
1454 &xor ("eax",&DWP(2,$td,"edx",8));
1455 &xor ("eax",&DWP(1,$td,"ebx",8));
1456 &mov (&DWP($i,$ptr),"eax");
1457}
1458
1459# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1460# AES_KEY *key)
1461&public_label("AES_Td");
1462&public_label("AES_Te");
1463&function_begin_B("AES_set_decrypt_key");
1464 &mov ("eax",&wparam(0));
1465 &mov ("ecx",&wparam(1));
1466 &mov ("edx",&wparam(2));
1467 &sub ("esp",12);
1468 &mov (&DWP(0,"esp"),"eax");
1469 &mov (&DWP(4,"esp"),"ecx");
1470 &mov (&DWP(8,"esp"),"edx");
1471 &call ("_x86_AES_set_encrypt_key");
1472 &add ("esp",12);
1473 &cmp ("eax",0);
1474 &je (&label("proceed"));
1475 &ret ();
1476
1477 &set_label("proceed");
1478 &push ("ebp");
1479 &push ("ebx");
1480 &push ("esi");
1481 &push ("edi");
1482
1483 &mov ("esi",&wparam(2));
1484 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1485 &lea ("ecx",&DWP(0,"","ecx",4));
1486 &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk
1487
1488 &align (4);
1489 &set_label("invert"); # invert order of chunks
1490 &mov ("eax",&DWP(0,"esi"));
1491 &mov ("ebx",&DWP(4,"esi"));
1492 &mov ("ecx",&DWP(0,"edi"));
1493 &mov ("edx",&DWP(4,"edi"));
1494 &mov (&DWP(0,"edi"),"eax");
1495 &mov (&DWP(4,"edi"),"ebx");
1496 &mov (&DWP(0,"esi"),"ecx");
1497 &mov (&DWP(4,"esi"),"edx");
1498 &mov ("eax",&DWP(8,"esi"));
1499 &mov ("ebx",&DWP(12,"esi"));
1500 &mov ("ecx",&DWP(8,"edi"));
1501 &mov ("edx",&DWP(12,"edi"));
1502 &mov (&DWP(8,"edi"),"eax");
1503 &mov (&DWP(12,"edi"),"ebx");
1504 &mov (&DWP(8,"esi"),"ecx");
1505 &mov (&DWP(12,"esi"),"edx");
1506 &add ("esi",16);
1507 &sub ("edi",16);
1508 &cmp ("esi","edi");
1509 &jne (&label("invert"));
1510
1511 &call (&label("pic_point"));
1512 &set_label("pic_point");
1513 blindpop("ebp");
1514 &lea ("edi",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
1515 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1516
1517 &mov ("esi",&wparam(2));
1518 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1519 &dec ("ecx");
1520 &align (4);
1521 &set_label("permute"); # permute the key schedule
1522 &add ("esi",16);
1523 &deckey (0,"esi","ebp","edi");
1524 &deckey (4,"esi","ebp","edi");
1525 &deckey (8,"esi","ebp","edi");
1526 &deckey (12,"esi","ebp","edi");
1527 &dec ("ecx");
1528 &jnz (&label("permute"));
1529
1530 &xor ("eax","eax"); # return success
1531&function_end("AES_set_decrypt_key");
1532
1533&asm_finish();
diff --git a/src/lib/libcrypto/aes/asm/aes-armv4.pl b/src/lib/libcrypto/aes/asm/aes-armv4.pl
deleted file mode 100644
index 15742c1ec5..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-armv4.pl
+++ /dev/null
@@ -1,1030 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key.
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25$s0="r0";
26$s1="r1";
27$s2="r2";
28$s3="r3";
29$t1="r4";
30$t2="r5";
31$t3="r6";
32$i1="r7";
33$i2="r8";
34$i3="r9";
35
36$tbl="r10";
37$key="r11";
38$rounds="r12";
39
40$code=<<___;
41.text
42.code 32
43
44.type AES_Te,%object
45.align 5
46AES_Te:
47.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
48.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
49.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
50.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
51.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
52.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
53.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
54.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
55.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
56.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
57.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
58.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
59.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
60.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
61.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
62.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
63.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
64.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
65.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
66.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
67.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
68.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
69.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
70.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
71.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
72.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
73.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
74.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
75.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
76.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
77.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
78.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
79.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
80.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
81.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
82.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
83.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
84.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
85.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
86.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
87.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
88.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
89.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
90.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
91.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
92.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
93.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
94.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
95.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
96.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
97.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
98.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
99.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
100.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
101.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
102.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
103.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
104.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
105.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
106.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
107.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
108.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
109.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
110.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
111@ Te4[256]
112.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
113.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
114.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
115.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
116.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
117.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
118.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
119.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
120.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
121.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
122.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
123.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
124.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
125.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
126.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
127.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
128.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
129.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
130.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
131.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
132.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
133.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
134.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
135.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
136.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
137.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
138.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
139.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
140.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
141.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
142.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
143.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
144@ rcon[]
145.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
146.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
147.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
148.size AES_Te,.-AES_Te
149
150@ void AES_encrypt(const unsigned char *in, unsigned char *out,
151@ const AES_KEY *key) {
152.global AES_encrypt
153.type AES_encrypt,%function
154.align 5
155AES_encrypt:
156 sub r3,pc,#8 @ AES_encrypt
157 stmdb sp!,{r1,r4-r12,lr}
158 mov $rounds,r0 @ inp
159 mov $key,r2
160 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
161
162 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
163 ldrb $t1,[$rounds,#2] @ manner...
164 ldrb $t2,[$rounds,#1]
165 ldrb $t3,[$rounds,#0]
166 orr $s0,$s0,$t1,lsl#8
167 orr $s0,$s0,$t2,lsl#16
168 orr $s0,$s0,$t3,lsl#24
169 ldrb $s1,[$rounds,#7]
170 ldrb $t1,[$rounds,#6]
171 ldrb $t2,[$rounds,#5]
172 ldrb $t3,[$rounds,#4]
173 orr $s1,$s1,$t1,lsl#8
174 orr $s1,$s1,$t2,lsl#16
175 orr $s1,$s1,$t3,lsl#24
176 ldrb $s2,[$rounds,#11]
177 ldrb $t1,[$rounds,#10]
178 ldrb $t2,[$rounds,#9]
179 ldrb $t3,[$rounds,#8]
180 orr $s2,$s2,$t1,lsl#8
181 orr $s2,$s2,$t2,lsl#16
182 orr $s2,$s2,$t3,lsl#24
183 ldrb $s3,[$rounds,#15]
184 ldrb $t1,[$rounds,#14]
185 ldrb $t2,[$rounds,#13]
186 ldrb $t3,[$rounds,#12]
187 orr $s3,$s3,$t1,lsl#8
188 orr $s3,$s3,$t2,lsl#16
189 orr $s3,$s3,$t3,lsl#24
190
191 bl _armv4_AES_encrypt
192
193 ldr $rounds,[sp],#4 @ pop out
194 mov $t1,$s0,lsr#24 @ write output in endian-neutral
195 mov $t2,$s0,lsr#16 @ manner...
196 mov $t3,$s0,lsr#8
197 strb $t1,[$rounds,#0]
198 strb $t2,[$rounds,#1]
199 strb $t3,[$rounds,#2]
200 strb $s0,[$rounds,#3]
201 mov $t1,$s1,lsr#24
202 mov $t2,$s1,lsr#16
203 mov $t3,$s1,lsr#8
204 strb $t1,[$rounds,#4]
205 strb $t2,[$rounds,#5]
206 strb $t3,[$rounds,#6]
207 strb $s1,[$rounds,#7]
208 mov $t1,$s2,lsr#24
209 mov $t2,$s2,lsr#16
210 mov $t3,$s2,lsr#8
211 strb $t1,[$rounds,#8]
212 strb $t2,[$rounds,#9]
213 strb $t3,[$rounds,#10]
214 strb $s2,[$rounds,#11]
215 mov $t1,$s3,lsr#24
216 mov $t2,$s3,lsr#16
217 mov $t3,$s3,lsr#8
218 strb $t1,[$rounds,#12]
219 strb $t2,[$rounds,#13]
220 strb $t3,[$rounds,#14]
221 strb $s3,[$rounds,#15]
222
223 ldmia sp!,{r4-r12,lr}
224 tst lr,#1
225 moveq pc,lr @ be binary compatible with V4, yet
226 bx lr @ interoperable with Thumb ISA:-)
227.size AES_encrypt,.-AES_encrypt
228
229.type _armv4_AES_encrypt,%function
230.align 2
231_armv4_AES_encrypt:
232 str lr,[sp,#-4]! @ push lr
233 ldr $t1,[$key],#16
234 ldr $t2,[$key,#-12]
235 ldr $t3,[$key,#-8]
236 ldr $i1,[$key,#-4]
237 ldr $rounds,[$key,#240-16]
238 eor $s0,$s0,$t1
239 eor $s1,$s1,$t2
240 eor $s2,$s2,$t3
241 eor $s3,$s3,$i1
242 sub $rounds,$rounds,#1
243 mov lr,#255
244
245.Lenc_loop:
246 and $i2,lr,$s0,lsr#8
247 and $i3,lr,$s0,lsr#16
248 and $i1,lr,$s0
249 mov $s0,$s0,lsr#24
250 ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
251 ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
252 ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
253 ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
254
255 and $i1,lr,$s1,lsr#16 @ i0
256 and $i2,lr,$s1
257 and $i3,lr,$s1,lsr#8
258 mov $s1,$s1,lsr#24
259 ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
260 ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
261 ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
262 ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
263 eor $s0,$s0,$i1,ror#8
264 eor $s1,$s1,$t1,ror#24
265 eor $t2,$t2,$i2,ror#8
266 eor $t3,$t3,$i3,ror#8
267
268 and $i1,lr,$s2,lsr#8 @ i0
269 and $i2,lr,$s2,lsr#16 @ i1
270 and $i3,lr,$s2
271 mov $s2,$s2,lsr#24
272 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
273 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
274 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
275 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
276 eor $s0,$s0,$i1,ror#16
277 eor $s1,$s1,$i2,ror#8
278 eor $s2,$s2,$t2,ror#16
279 eor $t3,$t3,$i3,ror#16
280
281 and $i1,lr,$s3 @ i0
282 and $i2,lr,$s3,lsr#8 @ i1
283 and $i3,lr,$s3,lsr#16 @ i2
284 mov $s3,$s3,lsr#24
285 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
286 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
287 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
288 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
289 eor $s0,$s0,$i1,ror#24
290 eor $s1,$s1,$i2,ror#16
291 eor $s2,$s2,$i3,ror#8
292 eor $s3,$s3,$t3,ror#8
293
294 ldr $t1,[$key],#16
295 ldr $t2,[$key,#-12]
296 ldr $t3,[$key,#-8]
297 ldr $i1,[$key,#-4]
298 eor $s0,$s0,$t1
299 eor $s1,$s1,$t2
300 eor $s2,$s2,$t3
301 eor $s3,$s3,$i1
302
303 subs $rounds,$rounds,#1
304 bne .Lenc_loop
305
306 add $tbl,$tbl,#2
307
308 and $i1,lr,$s0
309 and $i2,lr,$s0,lsr#8
310 and $i3,lr,$s0,lsr#16
311 mov $s0,$s0,lsr#24
312 ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
313 ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
314 ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
315 ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
316
317 and $i1,lr,$s1,lsr#16 @ i0
318 and $i2,lr,$s1
319 and $i3,lr,$s1,lsr#8
320 mov $s1,$s1,lsr#24
321 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
322 ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
323 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
324 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
325 eor $s0,$i1,$s0,lsl#8
326 eor $s1,$t1,$s1,lsl#24
327 eor $t2,$i2,$t2,lsl#8
328 eor $t3,$i3,$t3,lsl#8
329
330 and $i1,lr,$s2,lsr#8 @ i0
331 and $i2,lr,$s2,lsr#16 @ i1
332 and $i3,lr,$s2
333 mov $s2,$s2,lsr#24
334 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
335 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
336 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
337 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
338 eor $s0,$i1,$s0,lsl#8
339 eor $s1,$s1,$i2,lsl#16
340 eor $s2,$t2,$s2,lsl#24
341 eor $t3,$i3,$t3,lsl#8
342
343 and $i1,lr,$s3 @ i0
344 and $i2,lr,$s3,lsr#8 @ i1
345 and $i3,lr,$s3,lsr#16 @ i2
346 mov $s3,$s3,lsr#24
347 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
348 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
349 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
350 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
351 eor $s0,$i1,$s0,lsl#8
352 eor $s1,$s1,$i2,lsl#8
353 eor $s2,$s2,$i3,lsl#16
354 eor $s3,$t3,$s3,lsl#24
355
356 ldr lr,[sp],#4 @ pop lr
357 ldr $t1,[$key,#0]
358 ldr $t2,[$key,#4]
359 ldr $t3,[$key,#8]
360 ldr $i1,[$key,#12]
361 eor $s0,$s0,$t1
362 eor $s1,$s1,$t2
363 eor $s2,$s2,$t3
364 eor $s3,$s3,$i1
365
366 sub $tbl,$tbl,#2
367 mov pc,lr @ return
368.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
369
370.global AES_set_encrypt_key
371.type AES_set_encrypt_key,%function
372.align 5
373AES_set_encrypt_key:
374 sub r3,pc,#8 @ AES_set_encrypt_key
375 teq r0,#0
376 moveq r0,#-1
377 beq .Labrt
378 teq r2,#0
379 moveq r0,#-1
380 beq .Labrt
381
382 teq r1,#128
383 beq .Lok
384 teq r1,#192
385 beq .Lok
386 teq r1,#256
387 movne r0,#-1
388 bne .Labrt
389
390.Lok: stmdb sp!,{r4-r12,lr}
391 sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4
392
393 mov $rounds,r0 @ inp
394 mov lr,r1 @ bits
395 mov $key,r2 @ key
396
397 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
398 ldrb $t1,[$rounds,#2] @ manner...
399 ldrb $t2,[$rounds,#1]
400 ldrb $t3,[$rounds,#0]
401 orr $s0,$s0,$t1,lsl#8
402 orr $s0,$s0,$t2,lsl#16
403 orr $s0,$s0,$t3,lsl#24
404 ldrb $s1,[$rounds,#7]
405 ldrb $t1,[$rounds,#6]
406 ldrb $t2,[$rounds,#5]
407 ldrb $t3,[$rounds,#4]
408 orr $s1,$s1,$t1,lsl#8
409 orr $s1,$s1,$t2,lsl#16
410 orr $s1,$s1,$t3,lsl#24
411 ldrb $s2,[$rounds,#11]
412 ldrb $t1,[$rounds,#10]
413 ldrb $t2,[$rounds,#9]
414 ldrb $t3,[$rounds,#8]
415 orr $s2,$s2,$t1,lsl#8
416 orr $s2,$s2,$t2,lsl#16
417 orr $s2,$s2,$t3,lsl#24
418 ldrb $s3,[$rounds,#15]
419 ldrb $t1,[$rounds,#14]
420 ldrb $t2,[$rounds,#13]
421 ldrb $t3,[$rounds,#12]
422 orr $s3,$s3,$t1,lsl#8
423 orr $s3,$s3,$t2,lsl#16
424 orr $s3,$s3,$t3,lsl#24
425 str $s0,[$key],#16
426 str $s1,[$key,#-12]
427 str $s2,[$key,#-8]
428 str $s3,[$key,#-4]
429
430 teq lr,#128
431 bne .Lnot128
432 mov $rounds,#10
433 str $rounds,[$key,#240-16]
434 add $t3,$tbl,#256 @ rcon
435 mov lr,#255
436
437.L128_loop:
438 and $t2,lr,$s3,lsr#24
439 and $i1,lr,$s3,lsr#16
440 and $i2,lr,$s3,lsr#8
441 and $i3,lr,$s3
442 ldrb $t2,[$tbl,$t2]
443 ldrb $i1,[$tbl,$i1]
444 ldrb $i2,[$tbl,$i2]
445 ldrb $i3,[$tbl,$i3]
446 ldr $t1,[$t3],#4 @ rcon[i++]
447 orr $t2,$t2,$i1,lsl#24
448 orr $t2,$t2,$i2,lsl#16
449 orr $t2,$t2,$i3,lsl#8
450 eor $t2,$t2,$t1
451 eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
452 eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
453 eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
454 eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
455 str $s0,[$key],#16
456 str $s1,[$key,#-12]
457 str $s2,[$key,#-8]
458 str $s3,[$key,#-4]
459
460 subs $rounds,$rounds,#1
461 bne .L128_loop
462 sub r2,$key,#176
463 b .Ldone
464
465.Lnot128:
466 ldrb $i2,[$rounds,#19]
467 ldrb $t1,[$rounds,#18]
468 ldrb $t2,[$rounds,#17]
469 ldrb $t3,[$rounds,#16]
470 orr $i2,$i2,$t1,lsl#8
471 orr $i2,$i2,$t2,lsl#16
472 orr $i2,$i2,$t3,lsl#24
473 ldrb $i3,[$rounds,#23]
474 ldrb $t1,[$rounds,#22]
475 ldrb $t2,[$rounds,#21]
476 ldrb $t3,[$rounds,#20]
477 orr $i3,$i3,$t1,lsl#8
478 orr $i3,$i3,$t2,lsl#16
479 orr $i3,$i3,$t3,lsl#24
480 str $i2,[$key],#8
481 str $i3,[$key,#-4]
482
483 teq lr,#192
484 bne .Lnot192
485 mov $rounds,#12
486 str $rounds,[$key,#240-24]
487 add $t3,$tbl,#256 @ rcon
488 mov lr,#255
489 mov $rounds,#8
490
491.L192_loop:
492 and $t2,lr,$i3,lsr#24
493 and $i1,lr,$i3,lsr#16
494 and $i2,lr,$i3,lsr#8
495 and $i3,lr,$i3
496 ldrb $t2,[$tbl,$t2]
497 ldrb $i1,[$tbl,$i1]
498 ldrb $i2,[$tbl,$i2]
499 ldrb $i3,[$tbl,$i3]
500 ldr $t1,[$t3],#4 @ rcon[i++]
501 orr $t2,$t2,$i1,lsl#24
502 orr $t2,$t2,$i2,lsl#16
503 orr $t2,$t2,$i3,lsl#8
504 eor $i3,$t2,$t1
505 eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
506 eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
507 eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
508 eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
509 str $s0,[$key],#24
510 str $s1,[$key,#-20]
511 str $s2,[$key,#-16]
512 str $s3,[$key,#-12]
513
514 subs $rounds,$rounds,#1
515 subeq r2,$key,#216
516 beq .Ldone
517
518 ldr $i1,[$key,#-32]
519 ldr $i2,[$key,#-28]
520 eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
521 eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
522 str $i1,[$key,#-8]
523 str $i3,[$key,#-4]
524 b .L192_loop
525
526.Lnot192:
527 ldrb $i2,[$rounds,#27]
528 ldrb $t1,[$rounds,#26]
529 ldrb $t2,[$rounds,#25]
530 ldrb $t3,[$rounds,#24]
531 orr $i2,$i2,$t1,lsl#8
532 orr $i2,$i2,$t2,lsl#16
533 orr $i2,$i2,$t3,lsl#24
534 ldrb $i3,[$rounds,#31]
535 ldrb $t1,[$rounds,#30]
536 ldrb $t2,[$rounds,#29]
537 ldrb $t3,[$rounds,#28]
538 orr $i3,$i3,$t1,lsl#8
539 orr $i3,$i3,$t2,lsl#16
540 orr $i3,$i3,$t3,lsl#24
541 str $i2,[$key],#8
542 str $i3,[$key,#-4]
543
544 mov $rounds,#14
545 str $rounds,[$key,#240-32]
546 add $t3,$tbl,#256 @ rcon
547 mov lr,#255
548 mov $rounds,#7
549
550.L256_loop:
551 and $t2,lr,$i3,lsr#24
552 and $i1,lr,$i3,lsr#16
553 and $i2,lr,$i3,lsr#8
554 and $i3,lr,$i3
555 ldrb $t2,[$tbl,$t2]
556 ldrb $i1,[$tbl,$i1]
557 ldrb $i2,[$tbl,$i2]
558 ldrb $i3,[$tbl,$i3]
559 ldr $t1,[$t3],#4 @ rcon[i++]
560 orr $t2,$t2,$i1,lsl#24
561 orr $t2,$t2,$i2,lsl#16
562 orr $t2,$t2,$i3,lsl#8
563 eor $i3,$t2,$t1
564 eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
565 eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
566 eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
567 eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
568 str $s0,[$key],#32
569 str $s1,[$key,#-28]
570 str $s2,[$key,#-24]
571 str $s3,[$key,#-20]
572
573 subs $rounds,$rounds,#1
574 subeq r2,$key,#256
575 beq .Ldone
576
577 and $t2,lr,$s3
578 and $i1,lr,$s3,lsr#8
579 and $i2,lr,$s3,lsr#16
580 and $i3,lr,$s3,lsr#24
581 ldrb $t2,[$tbl,$t2]
582 ldrb $i1,[$tbl,$i1]
583 ldrb $i2,[$tbl,$i2]
584 ldrb $i3,[$tbl,$i3]
585 orr $t2,$t2,$i1,lsl#8
586 orr $t2,$t2,$i2,lsl#16
587 orr $t2,$t2,$i3,lsl#24
588
589 ldr $t1,[$key,#-48]
590 ldr $i1,[$key,#-44]
591 ldr $i2,[$key,#-40]
592 ldr $i3,[$key,#-36]
593 eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
594 eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
595 eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
596 eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
597 str $t1,[$key,#-16]
598 str $i1,[$key,#-12]
599 str $i2,[$key,#-8]
600 str $i3,[$key,#-4]
601 b .L256_loop
602
603.Ldone: mov r0,#0
604 ldmia sp!,{r4-r12,lr}
605.Labrt: tst lr,#1
606 moveq pc,lr @ be binary compatible with V4, yet
607 bx lr @ interoperable with Thumb ISA:-)
608.size AES_set_encrypt_key,.-AES_set_encrypt_key
609
610.global AES_set_decrypt_key
611.type AES_set_decrypt_key,%function
612.align 5
613AES_set_decrypt_key:
614 str lr,[sp,#-4]! @ push lr
615 bl AES_set_encrypt_key
616 teq r0,#0
617 ldrne lr,[sp],#4 @ pop lr
618 bne .Labrt
619
620 stmdb sp!,{r4-r12}
621
622 ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
623 mov $key,r2 @ which is AES_KEY *key
624 mov $i1,r2
625 add $i2,r2,$rounds,lsl#4
626
627.Linv: ldr $s0,[$i1]
628 ldr $s1,[$i1,#4]
629 ldr $s2,[$i1,#8]
630 ldr $s3,[$i1,#12]
631 ldr $t1,[$i2]
632 ldr $t2,[$i2,#4]
633 ldr $t3,[$i2,#8]
634 ldr $i3,[$i2,#12]
635 str $s0,[$i2],#-16
636 str $s1,[$i2,#16+4]
637 str $s2,[$i2,#16+8]
638 str $s3,[$i2,#16+12]
639 str $t1,[$i1],#16
640 str $t2,[$i1,#-12]
641 str $t3,[$i1,#-8]
642 str $i3,[$i1,#-4]
643 teq $i1,$i2
644 bne .Linv
645___
646$mask80=$i1;
647$mask1b=$i2;
648$mask7f=$i3;
649$code.=<<___;
650 ldr $s0,[$key,#16]! @ prefetch tp1
651 mov $mask80,#0x80
652 mov $mask1b,#0x1b
653 orr $mask80,$mask80,#0x8000
654 orr $mask1b,$mask1b,#0x1b00
655 orr $mask80,$mask80,$mask80,lsl#16
656 orr $mask1b,$mask1b,$mask1b,lsl#16
657 sub $rounds,$rounds,#1
658 mvn $mask7f,$mask80
659 mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
660
661.Lmix: and $t1,$s0,$mask80
662 and $s1,$s0,$mask7f
663 sub $t1,$t1,$t1,lsr#7
664 and $t1,$t1,$mask1b
665 eor $s1,$t1,$s1,lsl#1 @ tp2
666
667 and $t1,$s1,$mask80
668 and $s2,$s1,$mask7f
669 sub $t1,$t1,$t1,lsr#7
670 and $t1,$t1,$mask1b
671 eor $s2,$t1,$s2,lsl#1 @ tp4
672
673 and $t1,$s2,$mask80
674 and $s3,$s2,$mask7f
675 sub $t1,$t1,$t1,lsr#7
676 and $t1,$t1,$mask1b
677 eor $s3,$t1,$s3,lsl#1 @ tp8
678
679 eor $t1,$s1,$s2
680 eor $t2,$s0,$s3 @ tp9
681 eor $t1,$t1,$s3 @ tpe
682 eor $t1,$t1,$s1,ror#24
683 eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
684 eor $t1,$t1,$s2,ror#16
685 eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
686 eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
687
688 ldr $s0,[$key,#4] @ prefetch tp1
689 str $t1,[$key],#4
690 subs $rounds,$rounds,#1
691 bne .Lmix
692
693 mov r0,#0
694 ldmia sp!,{r4-r12,lr}
695 tst lr,#1
696 moveq pc,lr @ be binary compatible with V4, yet
697 bx lr @ interoperable with Thumb ISA:-)
698.size AES_set_decrypt_key,.-AES_set_decrypt_key
699
700.type AES_Td,%object
701.align 5
702AES_Td:
703.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
704.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
705.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
706.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
707.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
708.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
709.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
710.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
711.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
712.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
713.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
714.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
715.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
716.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
717.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
718.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
719.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
720.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
721.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
722.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
723.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
724.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
725.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
726.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
727.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
728.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
729.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
730.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
731.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
732.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
733.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
734.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
735.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
736.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
737.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
738.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
739.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
740.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
741.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
742.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
743.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
744.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
745.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
746.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
747.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
748.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
749.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
750.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
751.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
752.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
753.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
754.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
755.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
756.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
757.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
758.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
759.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
760.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
761.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
762.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
763.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
764.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
765.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
766.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
767@ Td4[256]
768.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
769.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
770.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
771.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
772.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
773.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
774.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
775.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
776.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
777.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
778.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
779.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
780.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
781.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
782.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
783.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
784.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
785.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
786.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
787.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
788.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
789.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
790.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
791.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
792.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
793.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
794.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
795.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
796.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
797.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
798.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
799.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
800.size AES_Td,.-AES_Td
801
802@ void AES_decrypt(const unsigned char *in, unsigned char *out,
803@ const AES_KEY *key) {
804.global AES_decrypt
805.type AES_decrypt,%function
806.align 5
807AES_decrypt:
808 sub r3,pc,#8 @ AES_decrypt
809 stmdb sp!,{r1,r4-r12,lr}
810 mov $rounds,r0 @ inp
811 mov $key,r2
812 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
813
814 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
815 ldrb $t1,[$rounds,#2] @ manner...
816 ldrb $t2,[$rounds,#1]
817 ldrb $t3,[$rounds,#0]
818 orr $s0,$s0,$t1,lsl#8
819 orr $s0,$s0,$t2,lsl#16
820 orr $s0,$s0,$t3,lsl#24
821 ldrb $s1,[$rounds,#7]
822 ldrb $t1,[$rounds,#6]
823 ldrb $t2,[$rounds,#5]
824 ldrb $t3,[$rounds,#4]
825 orr $s1,$s1,$t1,lsl#8
826 orr $s1,$s1,$t2,lsl#16
827 orr $s1,$s1,$t3,lsl#24
828 ldrb $s2,[$rounds,#11]
829 ldrb $t1,[$rounds,#10]
830 ldrb $t2,[$rounds,#9]
831 ldrb $t3,[$rounds,#8]
832 orr $s2,$s2,$t1,lsl#8
833 orr $s2,$s2,$t2,lsl#16
834 orr $s2,$s2,$t3,lsl#24
835 ldrb $s3,[$rounds,#15]
836 ldrb $t1,[$rounds,#14]
837 ldrb $t2,[$rounds,#13]
838 ldrb $t3,[$rounds,#12]
839 orr $s3,$s3,$t1,lsl#8
840 orr $s3,$s3,$t2,lsl#16
841 orr $s3,$s3,$t3,lsl#24
842
843 bl _armv4_AES_decrypt
844
845 ldr $rounds,[sp],#4 @ pop out
846 mov $t1,$s0,lsr#24 @ write output in endian-neutral
847 mov $t2,$s0,lsr#16 @ manner...
848 mov $t3,$s0,lsr#8
849 strb $t1,[$rounds,#0]
850 strb $t2,[$rounds,#1]
851 strb $t3,[$rounds,#2]
852 strb $s0,[$rounds,#3]
853 mov $t1,$s1,lsr#24
854 mov $t2,$s1,lsr#16
855 mov $t3,$s1,lsr#8
856 strb $t1,[$rounds,#4]
857 strb $t2,[$rounds,#5]
858 strb $t3,[$rounds,#6]
859 strb $s1,[$rounds,#7]
860 mov $t1,$s2,lsr#24
861 mov $t2,$s2,lsr#16
862 mov $t3,$s2,lsr#8
863 strb $t1,[$rounds,#8]
864 strb $t2,[$rounds,#9]
865 strb $t3,[$rounds,#10]
866 strb $s2,[$rounds,#11]
867 mov $t1,$s3,lsr#24
868 mov $t2,$s3,lsr#16
869 mov $t3,$s3,lsr#8
870 strb $t1,[$rounds,#12]
871 strb $t2,[$rounds,#13]
872 strb $t3,[$rounds,#14]
873 strb $s3,[$rounds,#15]
874
875 ldmia sp!,{r4-r12,lr}
876 tst lr,#1
877 moveq pc,lr @ be binary compatible with V4, yet
878 bx lr @ interoperable with Thumb ISA:-)
879.size AES_decrypt,.-AES_decrypt
880
881.type _armv4_AES_decrypt,%function
882.align 2
883_armv4_AES_decrypt:
884 str lr,[sp,#-4]! @ push lr
885 ldr $t1,[$key],#16
886 ldr $t2,[$key,#-12]
887 ldr $t3,[$key,#-8]
888 ldr $i1,[$key,#-4]
889 ldr $rounds,[$key,#240-16]
890 eor $s0,$s0,$t1
891 eor $s1,$s1,$t2
892 eor $s2,$s2,$t3
893 eor $s3,$s3,$i1
894 sub $rounds,$rounds,#1
895 mov lr,#255
896
897.Ldec_loop:
898 and $i1,lr,$s0,lsr#16
899 and $i2,lr,$s0,lsr#8
900 and $i3,lr,$s0
901 mov $s0,$s0,lsr#24
902 ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
903 ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
904 ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
905 ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
906
907 and $i1,lr,$s1 @ i0
908 and $i2,lr,$s1,lsr#16
909 and $i3,lr,$s1,lsr#8
910 mov $s1,$s1,lsr#24
911 ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
912 ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
913 ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
914 ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
915 eor $s0,$s0,$i1,ror#24
916 eor $s1,$s1,$t1,ror#8
917 eor $t2,$i2,$t2,ror#8
918 eor $t3,$i3,$t3,ror#8
919
920 and $i1,lr,$s2,lsr#8 @ i0
921 and $i2,lr,$s2 @ i1
922 and $i3,lr,$s2,lsr#16
923 mov $s2,$s2,lsr#24
924 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
925 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
926 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
927 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
928 eor $s0,$s0,$i1,ror#16
929 eor $s1,$s1,$i2,ror#24
930 eor $s2,$s2,$t2,ror#8
931 eor $t3,$i3,$t3,ror#8
932
933 and $i1,lr,$s3,lsr#16 @ i0
934 and $i2,lr,$s3,lsr#8 @ i1
935 and $i3,lr,$s3 @ i2
936 mov $s3,$s3,lsr#24
937 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
938 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
939 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
940 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
941 eor $s0,$s0,$i1,ror#8
942 eor $s1,$s1,$i2,ror#16
943 eor $s2,$s2,$i3,ror#24
944 eor $s3,$s3,$t3,ror#8
945
946 ldr $t1,[$key],#16
947 ldr $t2,[$key,#-12]
948 ldr $t3,[$key,#-8]
949 ldr $i1,[$key,#-4]
950 eor $s0,$s0,$t1
951 eor $s1,$s1,$t2
952 eor $s2,$s2,$t3
953 eor $s3,$s3,$i1
954
955 subs $rounds,$rounds,#1
956 bne .Ldec_loop
957
958 add $tbl,$tbl,#1024
959
960 ldr $t1,[$tbl,#0] @ prefetch Td4
961 ldr $t2,[$tbl,#32]
962 ldr $t3,[$tbl,#64]
963 ldr $i1,[$tbl,#96]
964 ldr $i2,[$tbl,#128]
965 ldr $i3,[$tbl,#160]
966 ldr $t1,[$tbl,#192]
967 ldr $t2,[$tbl,#224]
968
969 and $i1,lr,$s0,lsr#16
970 and $i2,lr,$s0,lsr#8
971 and $i3,lr,$s0
972 ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24]
973 ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
974 ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
975 ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
976
977 and $i1,lr,$s1 @ i0
978 and $i2,lr,$s1,lsr#16
979 and $i3,lr,$s1,lsr#8
980 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
981 ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
982 ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
983 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
984 eor $s0,$i1,$s0,lsl#24
985 eor $s1,$t1,$s1,lsl#8
986 eor $t2,$t2,$i2,lsl#8
987 eor $t3,$t3,$i3,lsl#8
988
989 and $i1,lr,$s2,lsr#8 @ i0
990 and $i2,lr,$s2 @ i1
991 and $i3,lr,$s2,lsr#16
992 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
993 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
994 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
995 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
996 eor $s0,$s0,$i1,lsl#8
997 eor $s1,$i2,$s1,lsl#16
998 eor $s2,$t2,$s2,lsl#16
999 eor $t3,$t3,$i3,lsl#16
1000
1001 and $i1,lr,$s3,lsr#16 @ i0
1002 and $i2,lr,$s3,lsr#8 @ i1
1003 and $i3,lr,$s3 @ i2
1004 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1005 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1006 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1007 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
1008 eor $s0,$s0,$i1,lsl#16
1009 eor $s1,$s1,$i2,lsl#8
1010 eor $s2,$i3,$s2,lsl#8
1011 eor $s3,$t3,$s3,lsl#24
1012
1013 ldr lr,[sp],#4 @ pop lr
1014 ldr $t1,[$key,#0]
1015 ldr $t2,[$key,#4]
1016 ldr $t3,[$key,#8]
1017 ldr $i1,[$key,#12]
1018 eor $s0,$s0,$t1
1019 eor $s1,$s1,$t2
1020 eor $s2,$s2,$t3
1021 eor $s3,$s3,$i1
1022
1023 sub $tbl,$tbl,#1024
1024 mov pc,lr @ return
1025.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1026.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1027___
1028
1029$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
1030print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-ia64.S b/src/lib/libcrypto/aes/asm/aes-ia64.S
deleted file mode 100644
index 7f6c4c3662..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-ia64.S
+++ /dev/null
@@ -1,1123 +0,0 @@
1// ====================================================================
2// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
3// project. Rights for redistribution and usage in source and binary
4// forms are granted according to the OpenSSL license.
5// ====================================================================
6//
7// What's wrong with compiler generated code? Compiler never uses
8// variable 'shr' which is pairable with 'extr'/'dep' instructions.
9// Then it uses 'zxt' which is an I-type, but can be replaced with
10// 'and' which in turn can be assigned to M-port [there're double as
11// much M-ports as there're I-ports on Itanium 2]. By sacrificing few
12// registers for small constants (255, 24 and 16) to be used with
13// 'shr' and 'and' instructions I can achieve better ILP, Intruction
14// Level Parallelism, and performance. This code outperforms GCC 3.3
15// generated code by over factor of 2 (two), GCC 3.4 - by 70% and
16// HP C - by 40%. Measured best-case scenario, i.e. aligned
17// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
18// ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
19
20// Version 1.2 mitigates the hazard of cache-timing attacks by
21// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
22// references to S-boxes for L2 cache latency, c) prefetching T[ed]4
23// prior last round. As result performance dropped to (26 + 15*rounds)
24// ticks per block or 11 cycles per byte processed with 128-bit key.
25// This is ~16% deterioration. For reference Itanium 2 L1 cache has
26// 64 bytes line size and L2 - 128 bytes...
27
28.ident "aes-ia64.S, version 1.2"
29.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
30.explicit
31.text
32
33rk0=r8; rk1=r9;
34
35pfssave=r2;
36lcsave=r10;
37prsave=r3;
38maskff=r11;
39twenty4=r14;
40sixteen=r15;
41
42te00=r16; te11=r17; te22=r18; te33=r19;
43te01=r20; te12=r21; te23=r22; te30=r23;
44te02=r24; te13=r25; te20=r26; te31=r27;
45te03=r28; te10=r29; te21=r30; te32=r31;
46
47// these are rotating...
48t0=r32; s0=r33;
49t1=r34; s1=r35;
50t2=r36; s2=r37;
51t3=r38; s3=r39;
52
53te0=r40; te1=r41; te2=r42; te3=r43;
54
55#if defined(_HPUX_SOURCE) && !defined(_LP64)
56# define ADDP addp4
57#else
58# define ADDP add
59#endif
60
61// Offsets from Te0
62#define TE0 0
63#define TE2 2
64#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
65#define TE1 3
66#define TE3 1
67#else
68#define TE1 1
69#define TE3 3
70#endif
71
72// This implies that AES_KEY comprises 32-bit key schedule elements
73// even on LP64 platforms.
74#ifndef KSZ
75# define KSZ 4
76# define LDKEY ld4
77#endif
78
79.proc _ia64_AES_encrypt#
80// Input: rk0-rk1
81// te0
82// te3 as AES_KEY->rounds!!!
83// s0-s3
84// maskff,twenty4,sixteen
85// Output: r16,r20,r24,r28 as s0-s3
86// Clobber: r16-r31,rk0-rk1,r32-r43
87.align 32
88_ia64_AES_encrypt:
89 .prologue
90 .altrp b6
91 .body
92{ .mmi; alloc r16=ar.pfs,12,0,0,8
93 LDKEY t0=[rk0],2*KSZ
94 mov pr.rot=1<<16 }
95{ .mmi; LDKEY t1=[rk1],2*KSZ
96 add te1=TE1,te0
97 add te3=-3,te3 };;
98{ .mib; LDKEY t2=[rk0],2*KSZ
99 mov ar.ec=2 }
100{ .mib; LDKEY t3=[rk1],2*KSZ
101 add te2=TE2,te0
102 brp.loop.imp .Le_top,.Le_end-16 };;
103
104{ .mmi; xor s0=s0,t0
105 xor s1=s1,t1
106 mov ar.lc=te3 }
107{ .mmi; xor s2=s2,t2
108 xor s3=s3,t3
109 add te3=TE3,te0 };;
110
111.align 32
112.Le_top:
113{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
114 (p0) and te33=s3,maskff // 0/0:s3&0xff
115 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
116{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
117 (p0) and te30=s0,maskff // 0/1:s0&0xff
118 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
119{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
120 (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24
121 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
122{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
123 (p0) shladd te30=te30,3,te3 // 1/1:te3+s0
124 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
125{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff]
126 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
127 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
128{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0]
129 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
130 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
131{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
132 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
133 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
134{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
135 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
136 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
137{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
138 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
139 (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
140{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
141 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
142 (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16
143{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
144 (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16
145 (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
146{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
147 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
148 (p0) and te31=s1,maskff };; // 5/2:s1&0xff
149{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16]
150 (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16
151 (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
152{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
153 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
154 (p0) and te32=s2,maskff };; // 6/3:s2&0xff
155
156{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16]
157 (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff
158 (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff
159{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
160 (p0) shladd te32=te32,3,te3 // 7/3:te3+s2
161 (p0) xor t0=t0,te33 };; // 7/0:
162{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1]
163 (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16
164 (p0) xor t0=t0,te22 } // 8/0:
165{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2]
166 (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16
167 (p0) xor t1=t1,te30 };; // 8/1:
168{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16]
169 (p0) ld4 te10=[te10] // 9/3:te1[s0>>16]
170 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
171{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
172 (p0) xor t2=t2,te20 // 10[9]/2:
173 (p0) xor t3=t3,te21 };; // 10[9]/3:
174{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done!
175 (p0) xor t1=t1,te01 // 11[10]/1:
176 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
177{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
178 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
179{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done!
180 (p0) xor t2=t2,te31 // 13[11]/2:
181 (p0) xor t3=t3,te32 } // 13[11]/3:
182{ .mmi; (p17) add te0=2048,te0 // 13[11]/
183 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
184{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done!
185 (p17) add te2=2048+128-TE2,te2} // 14[12]/
186{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done!
187 (p17) add te3=2048+192-TE3,te3 // 14[12]/
188 br.ctop.sptk .Le_top };;
189.Le_end:
190
191
192{ .mmi; ld8 te12=[te0] // prefetch Te4
193 ld8 te31=[te1] }
194{ .mmi; ld8 te10=[te2]
195 ld8 te32=[te3] }
196
197{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
198 and te33=s3,maskff // 0/0:s3&0xff
199 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
200{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
201 and te30=s0,maskff // 0/1:s0&0xff
202 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
203{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
204 add te33=te33,te0 // 1/0:te0+s0>>24
205 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
206{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
207 add te30=te30,te0 // 1/1:te0+s0
208 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
209{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff]
210 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
211 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
212{ .mmi; ld1 te30=[te30] // 2/1:te0[s0]
213 add te23=te23,te0 // 2/1:te0+s3>>8
214 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
215{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
216 add te20=te20,te0 // 3/2:te0+s0>>8
217 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
218{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
219 add te00=te00,te0 // 3/0:te0+s0>>24
220 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
221{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
222 add te21=te21,te0 // 4/3:te0+s2
223 extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
224{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
225 add te01=te01,te0 // 4/1:te0+s1>>24
226 shr.u te13=s3,sixteen };; // 4/2:s3>>16
227{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
228 add te11=te11,te0 // 5/0:te0+s1>>16
229 extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
230{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
231 add te02=te02,te0 // 5/2:te0+s2>>24
232 and te31=s1,maskff };; // 5/2:s1&0xff
233{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16]
234 add te12=te12,te0 // 6/1:te0+s2>>16
235 extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
236{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
237 add te03=te03,te0 // 6/3:te0+s0>>16
238 and te32=s2,maskff };; // 6/3:s2&0xff
239
240{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16]
241 add te31=te31,te0 // 7/2:te0+s1&0xff
242 dep te33=te22,te33,8,8} // 7/0:
243{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
244 add te32=te32,te0 // 7/3:te0+s2
245 and te13=te13,maskff};; // 7/2:s3>>16&0xff
246{ .mmi; ld1 te31=[te31] // 8/2:te0[s1]
247 add te13=te13,te0 // 8/2:te0+s3>>16
248 dep te30=te23,te30,8,8} // 8/1:
249{ .mmi; ld1 te32=[te32] // 8/3:te0[s2]
250 add te10=te10,te0 // 8/3:te0+s0>>16
251 shl te00=te00,twenty4};; // 8/0:
252{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16]
253 dep te33=te11,te33,16,8 // 9/0:
254 shl te01=te01,twenty4};; // 9/1:
255{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16]
256 dep te31=te20,te31,8,8 // 10/2:
257 shl te02=te02,twenty4};; // 10/2:
258{ .mii; xor t0=t0,te33 // 11/0:
259 dep te32=te21,te32,8,8 // 11/3:
260 shl te12=te12,sixteen};; // 11/1:
261{ .mii; xor r16=t0,te00 // 12/0:done!
262 dep te31=te13,te31,16,8 // 12/2:
263 shl te03=te03,twenty4};; // 12/3:
264{ .mmi; xor t1=t1,te01 // 13/1:
265 xor t2=t2,te02 // 13/2:
266 dep te32=te10,te32,16,8};; // 13/3:
267{ .mmi; xor t1=t1,te30 // 14/1:
268 xor r24=t2,te31 // 14/2:done!
269 xor t3=t3,te32 };; // 14/3:
270{ .mib; xor r20=t1,te12 // 15/1:done!
271 xor r28=t3,te03 // 15/3:done!
272 br.ret.sptk b6 };;
273.endp _ia64_AES_encrypt#
274
275// void AES_encrypt (const void *in,void *out,const AES_KEY *key);
276.global AES_encrypt#
277.proc AES_encrypt#
278.align 32
279AES_encrypt:
280 .prologue
281 .save ar.pfs,pfssave
282{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
283 and out0=3,in0
284 mov r3=ip }
285{ .mmi; ADDP in0=0,in0
286 mov loc0=psr.um
287 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
288
289{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
290 add out8=(AES_Te#-AES_encrypt#),r3 // Te0
291 .save pr,prsave
292 mov prsave=pr }
293{ .mmi; rum 1<<3 // clear um.ac
294 .save ar.lc,lcsave
295 mov lcsave=ar.lc };;
296
297 .body
298#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
299{ .mib; cmp.ne p6,p0=out0,r0
300 add out0=4,in0
301(p6) br.dpnt.many .Le_i_unaligned };;
302
303{ .mmi; ld4 out1=[in0],8 // s0
304 and out9=3,in1
305 mov twenty4=24 }
306{ .mmi; ld4 out3=[out0],8 // s1
307 ADDP rk0=0,in2
308 mov sixteen=16 };;
309{ .mmi; ld4 out5=[in0] // s2
310 cmp.ne p6,p0=out9,r0
311 mov maskff=0xff }
312{ .mmb; ld4 out7=[out0] // s3
313 ADDP rk1=KSZ,in2
314 br.call.sptk.many b6=_ia64_AES_encrypt };;
315
316{ .mib; ADDP in0=4,in1
317 ADDP in1=0,in1
318(p6) br.spnt .Le_o_unaligned };;
319
320{ .mii; mov psr.um=loc0
321 mov ar.pfs=pfssave
322 mov ar.lc=lcsave };;
323{ .mmi; st4 [in1]=r16,8 // s0
324 st4 [in0]=r20,8 // s1
325 mov pr=prsave,0x1ffff };;
326{ .mmb; st4 [in1]=r24 // s2
327 st4 [in0]=r28 // s3
328 br.ret.sptk.many b0 };;
329#endif
330
331.align 32
332.Le_i_unaligned:
333{ .mmi; add out0=1,in0
334 add out2=2,in0
335 add out4=3,in0 };;
336{ .mmi; ld1 r16=[in0],4
337 ld1 r17=[out0],4 }//;;
338{ .mmi; ld1 r18=[out2],4
339 ld1 out1=[out4],4 };; // s0
340{ .mmi; ld1 r20=[in0],4
341 ld1 r21=[out0],4 }//;;
342{ .mmi; ld1 r22=[out2],4
343 ld1 out3=[out4],4 };; // s1
344{ .mmi; ld1 r24=[in0],4
345 ld1 r25=[out0],4 }//;;
346{ .mmi; ld1 r26=[out2],4
347 ld1 out5=[out4],4 };; // s2
348{ .mmi; ld1 r28=[in0]
349 ld1 r29=[out0] }//;;
350{ .mmi; ld1 r30=[out2]
351 ld1 out7=[out4] };; // s3
352
353{ .mii;
354 dep out1=r16,out1,24,8 //;;
355 dep out3=r20,out3,24,8 }//;;
356{ .mii; ADDP rk0=0,in2
357 dep out5=r24,out5,24,8 //;;
358 dep out7=r28,out7,24,8 };;
359{ .mii; ADDP rk1=KSZ,in2
360 dep out1=r17,out1,16,8 //;;
361 dep out3=r21,out3,16,8 }//;;
362{ .mii; mov twenty4=24
363 dep out5=r25,out5,16,8 //;;
364 dep out7=r29,out7,16,8 };;
365{ .mii; mov sixteen=16
366 dep out1=r18,out1,8,8 //;;
367 dep out3=r22,out3,8,8 }//;;
368{ .mii; mov maskff=0xff
369 dep out5=r26,out5,8,8 //;;
370 dep out7=r30,out7,8,8 };;
371
372{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };;
373
374.Le_o_unaligned:
375{ .mii; ADDP out0=0,in1
376 extr.u r17=r16,8,8 // s0
377 shr.u r19=r16,twenty4 }//;;
378{ .mii; ADDP out1=1,in1
379 extr.u r18=r16,16,8
380 shr.u r23=r20,twenty4 }//;; // s1
381{ .mii; ADDP out2=2,in1
382 extr.u r21=r20,8,8
383 shr.u r22=r20,sixteen }//;;
384{ .mii; ADDP out3=3,in1
385 extr.u r25=r24,8,8 // s2
386 shr.u r27=r24,twenty4 };;
387{ .mii; st1 [out3]=r16,4
388 extr.u r26=r24,16,8
389 shr.u r31=r28,twenty4 }//;; // s3
390{ .mii; st1 [out2]=r17,4
391 extr.u r29=r28,8,8
392 shr.u r30=r28,sixteen }//;;
393
394{ .mmi; st1 [out1]=r18,4
395 st1 [out0]=r19,4 };;
396{ .mmi; st1 [out3]=r20,4
397 st1 [out2]=r21,4 }//;;
398{ .mmi; st1 [out1]=r22,4
399 st1 [out0]=r23,4 };;
400{ .mmi; st1 [out3]=r24,4
401 st1 [out2]=r25,4
402 mov pr=prsave,0x1ffff }//;;
403{ .mmi; st1 [out1]=r26,4
404 st1 [out0]=r27,4
405 mov ar.pfs=pfssave };;
406{ .mmi; st1 [out3]=r28
407 st1 [out2]=r29
408 mov ar.lc=lcsave }//;;
409{ .mmi; st1 [out1]=r30
410 st1 [out0]=r31 }
411{ .mfb; mov psr.um=loc0 // restore user mask
412 br.ret.sptk.many b0 };;
413.endp AES_encrypt#
414
415// *AES_decrypt are autogenerated by the following script:
416#if 0
417#!/usr/bin/env perl
418print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
419open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
420print "#endif\n";
421while(<>) {
422 $process=1 if (/\.proc\s+_ia64_AES_encrypt/);
423 next if (!$process);
424
425 #s/te00=s0/td00=s0/; s/te00/td00/g;
426 s/te11=s1/td13=s3/; s/te11/td13/g;
427 #s/te22=s2/td22=s2/; s/te22/td22/g;
428 s/te33=s3/td31=s1/; s/te33/td31/g;
429
430 #s/te01=s1/td01=s1/; s/te01/td01/g;
431 s/te12=s2/td10=s0/; s/te12/td10/g;
432 #s/te23=s3/td23=s3/; s/te23/td23/g;
433 s/te30=s0/td32=s2/; s/te30/td32/g;
434
435 #s/te02=s2/td02=s2/; s/te02/td02/g;
436 s/te13=s3/td11=s1/; s/te13/td11/g;
437 #s/te20=s0/td20=s0/; s/te20/td20/g;
438 s/te31=s1/td33=s3/; s/te31/td33/g;
439
440 #s/te03=s3/td03=s3/; s/te03/td03/g;
441 s/te10=s0/td12=s2/; s/te10/td12/g;
442 #s/te21=s1/td21=s1/; s/te21/td21/g;
443 s/te32=s2/td30=s0/; s/te32/td30/g;
444
445 s/td/te/g;
446
447 s/AES_encrypt/AES_decrypt/g;
448 s/\.Le_/.Ld_/g;
449 s/AES_Te#/AES_Td#/g;
450
451 print;
452
453 exit if (/\.endp\s+AES_decrypt/);
454}
455#endif
456.proc _ia64_AES_decrypt#
457// Input: rk0-rk1
458// te0
459// te3 as AES_KEY->rounds!!!
460// s0-s3
461// maskff,twenty4,sixteen
462// Output: r16,r20,r24,r28 as s0-s3
463// Clobber: r16-r31,rk0-rk1,r32-r43
464.align 32
465_ia64_AES_decrypt:
466 .prologue
467 .altrp b6
468 .body
469{ .mmi; alloc r16=ar.pfs,12,0,0,8
470 LDKEY t0=[rk0],2*KSZ
471 mov pr.rot=1<<16 }
472{ .mmi; LDKEY t1=[rk1],2*KSZ
473 add te1=TE1,te0
474 add te3=-3,te3 };;
475{ .mib; LDKEY t2=[rk0],2*KSZ
476 mov ar.ec=2 }
477{ .mib; LDKEY t3=[rk1],2*KSZ
478 add te2=TE2,te0
479 brp.loop.imp .Ld_top,.Ld_end-16 };;
480
481{ .mmi; xor s0=s0,t0
482 xor s1=s1,t1
483 mov ar.lc=te3 }
484{ .mmi; xor s2=s2,t2
485 xor s3=s3,t3
486 add te3=TE3,te0 };;
487
488.align 32
489.Ld_top:
490{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
491 (p0) and te31=s1,maskff // 0/0:s3&0xff
492 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
493{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
494 (p0) and te32=s2,maskff // 0/1:s0&0xff
495 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
496{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
497 (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24
498 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
499{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
500 (p0) shladd te32=te32,3,te3 // 1/1:te3+s0
501 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
502{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff]
503 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
504 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
505{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0]
506 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
507 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
508{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
509 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
510 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
511{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
512 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
513 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
514{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
515 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
516 (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
517{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
518 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
519 (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16
520{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
521 (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16
522 (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
523{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
524 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
525 (p0) and te33=s3,maskff };; // 5/2:s1&0xff
526{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16]
527 (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16
528 (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
529{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
530 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
531 (p0) and te30=s0,maskff };; // 6/3:s2&0xff
532
533{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16]
534 (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff
535 (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff
536{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
537 (p0) shladd te30=te30,3,te3 // 7/3:te3+s2
538 (p0) xor t0=t0,te31 };; // 7/0:
539{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1]
540 (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16
541 (p0) xor t0=t0,te22 } // 8/0:
542{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2]
543 (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16
544 (p0) xor t1=t1,te32 };; // 8/1:
545{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16]
546 (p0) ld4 te12=[te12] // 9/3:te1[s0>>16]
547 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
548{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
549 (p0) xor t2=t2,te20 // 10[9]/2:
550 (p0) xor t3=t3,te21 };; // 10[9]/3:
551{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done!
552 (p0) xor t1=t1,te01 // 11[10]/1:
553 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
554{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
555 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
556{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done!
557 (p0) xor t2=t2,te33 // 13[11]/2:
558 (p0) xor t3=t3,te30 } // 13[11]/3:
559{ .mmi; (p17) add te0=2048,te0 // 13[11]/
560 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
561{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done!
562 (p17) add te2=2048+128-TE2,te2} // 14[12]/
563{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done!
564 (p17) add te3=2048+192-TE3,te3 // 14[12]/
565 br.ctop.sptk .Ld_top };;
566.Ld_end:
567
568
569{ .mmi; ld8 te10=[te0] // prefetch Td4
570 ld8 te33=[te1] }
571{ .mmi; ld8 te12=[te2]
572 ld8 te30=[te3] }
573
574{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
575 and te31=s1,maskff // 0/0:s3&0xff
576 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
577{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
578 and te32=s2,maskff // 0/1:s0&0xff
579 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
580{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
581 add te31=te31,te0 // 1/0:te0+s0>>24
582 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
583{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
584 add te32=te32,te0 // 1/1:te0+s0
585 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
586{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff]
587 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
588 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
589{ .mmi; ld1 te32=[te32] // 2/1:te0[s0]
590 add te23=te23,te0 // 2/1:te0+s3>>8
591 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
592{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
593 add te20=te20,te0 // 3/2:te0+s0>>8
594 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
595{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
596 add te00=te00,te0 // 3/0:te0+s0>>24
597 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
598{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
599 add te21=te21,te0 // 4/3:te0+s2
600 extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
601{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
602 add te01=te01,te0 // 4/1:te0+s1>>24
603 shr.u te11=s1,sixteen };; // 4/2:s3>>16
604{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
605 add te13=te13,te0 // 5/0:te0+s1>>16
606 extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
607{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
608 add te02=te02,te0 // 5/2:te0+s2>>24
609 and te33=s3,maskff };; // 5/2:s1&0xff
610{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16]
611 add te10=te10,te0 // 6/1:te0+s2>>16
612 extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
613{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
614 add te03=te03,te0 // 6/3:te0+s0>>16
615 and te30=s0,maskff };; // 6/3:s2&0xff
616
617{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16]
618 add te33=te33,te0 // 7/2:te0+s1&0xff
619 dep te31=te22,te31,8,8} // 7/0:
620{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
621 add te30=te30,te0 // 7/3:te0+s2
622 and te11=te11,maskff};; // 7/2:s3>>16&0xff
623{ .mmi; ld1 te33=[te33] // 8/2:te0[s1]
624 add te11=te11,te0 // 8/2:te0+s3>>16
625 dep te32=te23,te32,8,8} // 8/1:
626{ .mmi; ld1 te30=[te30] // 8/3:te0[s2]
627 add te12=te12,te0 // 8/3:te0+s0>>16
628 shl te00=te00,twenty4};; // 8/0:
629{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16]
630 dep te31=te13,te31,16,8 // 9/0:
631 shl te01=te01,twenty4};; // 9/1:
632{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16]
633 dep te33=te20,te33,8,8 // 10/2:
634 shl te02=te02,twenty4};; // 10/2:
635{ .mii; xor t0=t0,te31 // 11/0:
636 dep te30=te21,te30,8,8 // 11/3:
637 shl te10=te10,sixteen};; // 11/1:
638{ .mii; xor r16=t0,te00 // 12/0:done!
639 dep te33=te11,te33,16,8 // 12/2:
640 shl te03=te03,twenty4};; // 12/3:
641{ .mmi; xor t1=t1,te01 // 13/1:
642 xor t2=t2,te02 // 13/2:
643 dep te30=te12,te30,16,8};; // 13/3:
644{ .mmi; xor t1=t1,te32 // 14/1:
645 xor r24=t2,te33 // 14/2:done!
646 xor t3=t3,te30 };; // 14/3:
647{ .mib; xor r20=t1,te10 // 15/1:done!
648 xor r28=t3,te03 // 15/3:done!
649 br.ret.sptk b6 };;
650.endp _ia64_AES_decrypt#
651
652// void AES_decrypt (const void *in,void *out,const AES_KEY *key);
653.global AES_decrypt#
654.proc AES_decrypt#
655.align 32
656AES_decrypt:
657 .prologue
658 .save ar.pfs,pfssave
659{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
660 and out0=3,in0
661 mov r3=ip }
662{ .mmi; ADDP in0=0,in0
663 mov loc0=psr.um
664 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
665
666{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
667 add out8=(AES_Td#-AES_decrypt#),r3 // Te0
668 .save pr,prsave
669 mov prsave=pr }
670{ .mmi; rum 1<<3 // clear um.ac
671 .save ar.lc,lcsave
672 mov lcsave=ar.lc };;
673
674 .body
675#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
676{ .mib; cmp.ne p6,p0=out0,r0
677 add out0=4,in0
678(p6) br.dpnt.many .Ld_i_unaligned };;
679
680{ .mmi; ld4 out1=[in0],8 // s0
681 and out9=3,in1
682 mov twenty4=24 }
683{ .mmi; ld4 out3=[out0],8 // s1
684 ADDP rk0=0,in2
685 mov sixteen=16 };;
686{ .mmi; ld4 out5=[in0] // s2
687 cmp.ne p6,p0=out9,r0
688 mov maskff=0xff }
689{ .mmb; ld4 out7=[out0] // s3
690 ADDP rk1=KSZ,in2
691 br.call.sptk.many b6=_ia64_AES_decrypt };;
692
693{ .mib; ADDP in0=4,in1
694 ADDP in1=0,in1
695(p6) br.spnt .Ld_o_unaligned };;
696
697{ .mii; mov psr.um=loc0
698 mov ar.pfs=pfssave
699 mov ar.lc=lcsave };;
700{ .mmi; st4 [in1]=r16,8 // s0
701 st4 [in0]=r20,8 // s1
702 mov pr=prsave,0x1ffff };;
703{ .mmb; st4 [in1]=r24 // s2
704 st4 [in0]=r28 // s3
705 br.ret.sptk.many b0 };;
706#endif
707
708.align 32
709.Ld_i_unaligned:
710{ .mmi; add out0=1,in0
711 add out2=2,in0
712 add out4=3,in0 };;
713{ .mmi; ld1 r16=[in0],4
714 ld1 r17=[out0],4 }//;;
715{ .mmi; ld1 r18=[out2],4
716 ld1 out1=[out4],4 };; // s0
717{ .mmi; ld1 r20=[in0],4
718 ld1 r21=[out0],4 }//;;
719{ .mmi; ld1 r22=[out2],4
720 ld1 out3=[out4],4 };; // s1
721{ .mmi; ld1 r24=[in0],4
722 ld1 r25=[out0],4 }//;;
723{ .mmi; ld1 r26=[out2],4
724 ld1 out5=[out4],4 };; // s2
725{ .mmi; ld1 r28=[in0]
726 ld1 r29=[out0] }//;;
727{ .mmi; ld1 r30=[out2]
728 ld1 out7=[out4] };; // s3
729
730{ .mii;
731 dep out1=r16,out1,24,8 //;;
732 dep out3=r20,out3,24,8 }//;;
733{ .mii; ADDP rk0=0,in2
734 dep out5=r24,out5,24,8 //;;
735 dep out7=r28,out7,24,8 };;
736{ .mii; ADDP rk1=KSZ,in2
737 dep out1=r17,out1,16,8 //;;
738 dep out3=r21,out3,16,8 }//;;
739{ .mii; mov twenty4=24
740 dep out5=r25,out5,16,8 //;;
741 dep out7=r29,out7,16,8 };;
742{ .mii; mov sixteen=16
743 dep out1=r18,out1,8,8 //;;
744 dep out3=r22,out3,8,8 }//;;
745{ .mii; mov maskff=0xff
746 dep out5=r26,out5,8,8 //;;
747 dep out7=r30,out7,8,8 };;
748
749{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };;
750
751.Ld_o_unaligned:
752{ .mii; ADDP out0=0,in1
753 extr.u r17=r16,8,8 // s0
754 shr.u r19=r16,twenty4 }//;;
755{ .mii; ADDP out1=1,in1
756 extr.u r18=r16,16,8
757 shr.u r23=r20,twenty4 }//;; // s1
758{ .mii; ADDP out2=2,in1
759 extr.u r21=r20,8,8
760 shr.u r22=r20,sixteen }//;;
761{ .mii; ADDP out3=3,in1
762 extr.u r25=r24,8,8 // s2
763 shr.u r27=r24,twenty4 };;
764{ .mii; st1 [out3]=r16,4
765 extr.u r26=r24,16,8
766 shr.u r31=r28,twenty4 }//;; // s3
767{ .mii; st1 [out2]=r17,4
768 extr.u r29=r28,8,8
769 shr.u r30=r28,sixteen }//;;
770
771{ .mmi; st1 [out1]=r18,4
772 st1 [out0]=r19,4 };;
773{ .mmi; st1 [out3]=r20,4
774 st1 [out2]=r21,4 }//;;
775{ .mmi; st1 [out1]=r22,4
776 st1 [out0]=r23,4 };;
777{ .mmi; st1 [out3]=r24,4
778 st1 [out2]=r25,4
779 mov pr=prsave,0x1ffff }//;;
780{ .mmi; st1 [out1]=r26,4
781 st1 [out0]=r27,4
782 mov ar.pfs=pfssave };;
783{ .mmi; st1 [out3]=r28
784 st1 [out2]=r29
785 mov ar.lc=lcsave }//;;
786{ .mmi; st1 [out1]=r30
787 st1 [out0]=r31 }
788{ .mfb; mov psr.um=loc0 // restore user mask
789 br.ret.sptk.many b0 };;
790.endp AES_decrypt#
791
792// leave it in .text segment...
793.align 64
794.global AES_Te#
795.type AES_Te#,@object
796AES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
797 data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
798 data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
799 data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
800 data4 0x60303050,0x60303050, 0x02010103,0x02010103
801 data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
802 data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
803 data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
804 data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
805 data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
806 data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
807 data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
808 data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
809 data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
810 data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
811 data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
812 data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
813 data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
814 data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
815 data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
816 data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
817 data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
818 data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
819 data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f
820 data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
821 data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
822 data4 0x30181828,0x30181828, 0x379696a1,0x379696a1
823 data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
824 data4 0x0e070709,0x0e070709, 0x24121236,0x24121236
825 data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
826 data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
827 data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
828 data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
829 data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
830 data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
831 data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
832 data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
833 data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
834 data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
835 data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
836 data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
837 data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
838 data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
839 data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
840 data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
841 data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
842 data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
843 data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
844 data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
845 data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
846 data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
847 data4 0x66333355,0x66333355, 0x11858594,0x11858594
848 data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
849 data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
850 data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
851 data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
852 data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
853 data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
854 data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
855 data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
856 data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
857 data4 0xafdada75,0xafdada75, 0x42212163,0x42212163
858 data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
859 data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
860 data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
861 data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
862 data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
863 data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739
864 data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
865 data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
866 data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
867 data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395
868 data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198
869 data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
870 data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
871 data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
872 data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
873 data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
874 data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
875 data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
876 data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
877 data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
878 data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a
879 data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
880 data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
881 data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
882 data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4
883 data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
884 data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
885 data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
886 data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
887 data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
888 data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
889 data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
890 data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
891 data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
892 data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
893 data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
894 data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
895 data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
896 data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
897 data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
898 data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
899 data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
900 data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
901 data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
902 data4 0x904848d8,0x904848d8, 0x06030305,0x06030305
903 data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
904 data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
905 data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
906 data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158
907 data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
908 data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
909 data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
910 data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
911 data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
912 data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
913 data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
914 data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
915 data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
916 data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
917 data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
918 data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
919 data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
920 data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0
921 data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
922 data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
923 data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
924// Te4:
925 data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
926 data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
927 data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
928 data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
929 data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
930 data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
931 data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
932 data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
933 data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
934 data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
935 data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
936 data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
937 data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
938 data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
939 data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
940 data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
941 data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
942 data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
943 data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
944 data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
945 data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
946 data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
947 data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
948 data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
949 data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
950 data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
951 data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
952 data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
953 data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
954 data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
955 data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
956 data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
957.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#"
958
959.align 64
960.global AES_Td#
961.type AES_Td#,@object
962AES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
963 data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
964 data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
965 data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
966 data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
967 data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
968 data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
969 data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f
970 data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
971 data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
972 data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
973 data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
974 data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
975 data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
976 data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
977 data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
978 data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
979 data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
980 data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
981 data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
982 data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
983 data4 0x97513360,0x97513360, 0x62537f45,0x62537f45
984 data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
985 data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
986 data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
987 data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
988 data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
989 data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
990 data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
991 data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
992 data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
993 data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c
994 data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
995 data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
996 data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
997 data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
998 data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
999 data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1000 data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1001 data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1002 data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1003 data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1004 data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1005 data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1006 data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1007 data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1008 data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1009 data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1010 data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1011 data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1012 data4 0x09808683,0x09808683, 0x322bed48,0x322bed48
1013 data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1014 data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1015 data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1016 data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1017 data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1018 data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1019 data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1020 data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1021 data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1022 data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1023 data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1024 data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1025 data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1026 data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1027 data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1028 data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1029 data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1030 data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1031 data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1032 data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1033 data4 0x13972240,0x13972240, 0x84c61120,0x84c61120
1034 data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1035 data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1036 data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1037 data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1038 data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1039 data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1040 data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1041 data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1042 data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1043 data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1044 data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1045 data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1046 data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1047 data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1048 data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1049 data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1050 data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1051 data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1052 data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1053 data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1054 data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1055 data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1056 data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1057 data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1058 data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1059 data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1060 data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1061 data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1062 data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1063 data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1064 data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1065 data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1066 data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1067 data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1068 data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1069 data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1070 data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1071 data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1072 data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1073 data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1074 data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1075 data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1076 data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1077 data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1078 data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1079 data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1080 data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1081 data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1082 data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1083 data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1084 data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1085 data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1086 data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1087 data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1088 data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1089 data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1090// Td4:
1091 data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1092 data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1093 data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1094 data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1095 data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1096 data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1097 data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1098 data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1099 data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1100 data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1101 data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1102 data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1103 data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1104 data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1105 data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1106 data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1107 data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1108 data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1109 data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1110 data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1111 data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1112 data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1113 data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1114 data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1115 data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1116 data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1117 data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1118 data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1119 data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1120 data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1121 data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1122 data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1123.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#"
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
deleted file mode 100644
index ce427655ef..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-ppc.pl
+++ /dev/null
@@ -1,1176 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19$flavour = shift;
20
21if ($flavour =~ /64/) {
22 $SIZE_T =8;
23 $STU ="stdu";
24 $POP ="ld";
25 $PUSH ="std";
26} elsif ($flavour =~ /32/) {
27 $SIZE_T =4;
28 $STU ="stwu";
29 $POP ="lwz";
30 $PUSH ="stw";
31} else { die "nonsense $flavour"; }
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
89if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
90else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
95.machine "any"
96.text
97
98.align 7
99LAES_Te:
100 mflr r0
101 bcl 20,31,\$+4
102 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
103 addi $Tbl0,$Tbl0,`128-8`
104 mtlr r0
105 blr
106 .space `32-24`
107LAES_Td:
108 mflr r0
109 bcl 20,31,\$+4
110 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
111 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
112 mtlr r0
113 blr
114 .space `128-32-24`
115___
116&_data_word(
117 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl .AES_encrypt
316.align 7
317.AES_encrypt:
318 mflr r0
319 $STU $sp,-$FRAME($sp)
320
321 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
322 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
323 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
324 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
325 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
326 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
327 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
328 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
329 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
330 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
331 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
332 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
333 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
334 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
335 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
336 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
337 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
338 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
339 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
340 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
341 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
342
343 lwz $s0,0($inp)
344 lwz $s1,4($inp)
345 lwz $s2,8($inp)
346 lwz $s3,12($inp)
347 bl LAES_Te
348 bl Lppc_AES_encrypt_compact
349 stw $s0,0($out)
350 stw $s1,4($out)
351 stw $s2,8($out)
352 stw $s3,12($out)
353
354 $POP r0,`$FRAME-$SIZE_T*21`($sp)
355 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
356 $POP r13,`$FRAME-$SIZE_T*19`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
375 mtlr r0
376 addi $sp,$sp,$FRAME
377 blr
378
379.align 4
380Lppc_AES_encrypt:
381 lwz $acc00,240($key)
382 lwz $t0,0($key)
383 lwz $t1,4($key)
384 lwz $t2,8($key)
385 lwz $t3,12($key)
386 addi $Tbl1,$Tbl0,3
387 addi $Tbl2,$Tbl0,2
388 addi $Tbl3,$Tbl0,1
389 addi $acc00,$acc00,-1
390 addi $key,$key,16
391 xor $s0,$s0,$t0
392 xor $s1,$s1,$t1
393 xor $s2,$s2,$t2
394 xor $s3,$s3,$t3
395 mtctr $acc00
396.align 4
397Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28
400 lwz $t0,0($key)
401 lwz $t1,4($key)
402 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28
404 lwz $t2,8($key)
405 lwz $t3,12($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28
408 lwzx $acc00,$Tbl0,$acc00
409 lwzx $acc01,$Tbl0,$acc01
410 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28
412 lwzx $acc02,$Tbl0,$acc02
413 lwzx $acc03,$Tbl0,$acc03
414 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28
416 lwzx $acc04,$Tbl1,$acc04
417 lwzx $acc05,$Tbl1,$acc05
418 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28
420 lwzx $acc06,$Tbl1,$acc06
421 lwzx $acc07,$Tbl1,$acc07
422 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28
424 lwzx $acc08,$Tbl2,$acc08
425 lwzx $acc09,$Tbl2,$acc09
426 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28
428 lwzx $acc10,$Tbl2,$acc10
429 lwzx $acc11,$Tbl2,$acc11
430 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01
432 lwzx $acc12,$Tbl3,$acc12
433 lwzx $acc13,$Tbl3,$acc13
434 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03
436 lwzx $acc14,$Tbl3,$acc14
437 lwzx $acc15,$Tbl3,$acc15
438 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05
440 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08
443 xor $t1,$t1,$acc09
444 xor $t2,$t2,$acc10
445 xor $t3,$t3,$acc11
446 xor $s0,$t0,$acc12
447 xor $s1,$t1,$acc13
448 xor $s2,$t2,$acc14
449 xor $s3,$t3,$acc15
450 addi $key,$key,16
451 bdnz- Lenc_loop
452
453 addi $Tbl2,$Tbl0,2048
454 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
465 lwz $t0,0($key)
466 lwz $t1,4($key)
467 rlwinm $acc02,$s2,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31
469 lwz $t2,8($key)
470 lwz $t3,12($key)
471 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31
473 lbzx $acc00,$Tbl2,$acc00
474 lbzx $acc01,$Tbl2,$acc01
475 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31
477 lbzx $acc02,$Tbl2,$acc02
478 lbzx $acc03,$Tbl2,$acc03
479 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31
481 lbzx $acc04,$Tbl2,$acc04
482 lbzx $acc05,$Tbl2,$acc05
483 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31
485 lbzx $acc06,$Tbl2,$acc06
486 lbzx $acc07,$Tbl2,$acc07
487 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31
489 lbzx $acc08,$Tbl2,$acc08
490 lbzx $acc09,$Tbl2,$acc09
491 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31
493 lbzx $acc10,$Tbl2,$acc10
494 lbzx $acc11,$Tbl2,$acc11
495 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7
497 lbzx $acc12,$Tbl2,$acc12
498 lbzx $acc13,$Tbl2,$acc13
499 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7
501 lbzx $acc14,$Tbl2,$acc14
502 lbzx $acc15,$Tbl2,$acc15
503 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15
505 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15
507 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23
509 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12
512 or $s1,$s1,$acc13
513 or $s2,$s2,$acc14
514 or $s3,$s3,$acc15
515 xor $s0,$s0,$t0
516 xor $s1,$s1,$t1
517 xor $s2,$s2,$t2
518 xor $s3,$s3,$t3
519 blr
520
521.align 4
522Lppc_AES_encrypt_compact:
523 lwz $acc00,240($key)
524 lwz $t0,0($key)
525 lwz $t1,4($key)
526 lwz $t2,8($key)
527 lwz $t3,12($key)
528 addi $Tbl1,$Tbl0,2048
529 lis $mask80,0x8080
530 lis $mask1b,0x1b1b
531 addi $key,$key,16
532 ori $mask80,$mask80,0x8080
533 ori $mask1b,$mask1b,0x1b1b
534 mtctr $acc00
535.align 4
536Lenc_compact_loop:
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
541 rlwinm $acc00,$s0,`32-24`,24,31
542 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
547 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
551 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31
553 lbzx $acc04,$Tbl1,$acc04
554 lbzx $acc05,$Tbl1,$acc05
555 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31
557 lbzx $acc06,$Tbl1,$acc06
558 lbzx $acc07,$Tbl1,$acc07
559 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31
561 lbzx $acc08,$Tbl1,$acc08
562 lbzx $acc09,$Tbl1,$acc09
563 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31
565 lbzx $acc10,$Tbl1,$acc10
566 lbzx $acc11,$Tbl1,$acc11
567 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31
569 lbzx $acc12,$Tbl1,$acc12
570 lbzx $acc13,$Tbl1,$acc13
571 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7
573 lbzx $acc14,$Tbl1,$acc14
574 lbzx $acc15,$Tbl1,$acc15
575 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7
577 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15
579 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23
582 rlwimi $s1,$acc09,8,16,23
583 rlwimi $s2,$acc10,8,16,23
584 rlwimi $s3,$acc11,8,16,23
585 lwz $t0,0($key)
586 lwz $t1,4($key)
587 or $s0,$s0,$acc12
588 or $s1,$s1,$acc13
589 lwz $t2,8($key)
590 lwz $t3,12($key)
591 or $s2,$s2,$acc14
592 or $s3,$s3,$acc15
593
594 addi $key,$key,16
595 bdz Lenc_compact_done
596
597 and $acc00,$s0,$mask80 # r1=r0&0x80808080
598 and $acc01,$s1,$mask80
599 and $acc02,$s2,$mask80
600 and $acc03,$s3,$mask80
601 srwi $acc04,$acc00,7 # r1>>7
602 srwi $acc05,$acc01,7
603 srwi $acc06,$acc02,7
604 srwi $acc07,$acc03,7
605 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
606 andc $acc09,$s1,$mask80
607 andc $acc10,$s2,$mask80
608 andc $acc11,$s3,$mask80
609 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
610 sub $acc01,$acc01,$acc05
611 sub $acc02,$acc02,$acc06
612 sub $acc03,$acc03,$acc07
613 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
614 add $acc09,$acc09,$acc09
615 add $acc10,$acc10,$acc10
616 add $acc11,$acc11,$acc11
617 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
618 and $acc01,$acc01,$mask1b
619 and $acc02,$acc02,$mask1b
620 and $acc03,$acc03,$mask1b
621 xor $acc00,$acc00,$acc08 # r2
622 xor $acc01,$acc01,$acc09
623 xor $acc02,$acc02,$acc10
624 xor $acc03,$acc03,$acc11
625
626 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
627 rotlwi $acc13,$s1,16
628 rotlwi $acc14,$s2,16
629 rotlwi $acc15,$s3,16
630 xor $s0,$s0,$acc00 # r0^r2
631 xor $s1,$s1,$acc01
632 xor $s2,$s2,$acc02
633 xor $s3,$s3,$acc03
634 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
635 rotrwi $s1,$s1,24
636 rotrwi $s2,$s2,24
637 rotrwi $s3,$s3,24
638 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
639 xor $s1,$s1,$acc01
640 xor $s2,$s2,$acc02
641 xor $s3,$s3,$acc03
642 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
643 rotlwi $acc09,$acc13,8
644 rotlwi $acc10,$acc14,8
645 rotlwi $acc11,$acc15,8
646 xor $s0,$s0,$acc12 #
647 xor $s1,$s1,$acc13
648 xor $s2,$s2,$acc14
649 xor $s3,$s3,$acc15
650 xor $s0,$s0,$acc08 #
651 xor $s1,$s1,$acc09
652 xor $s2,$s2,$acc10
653 xor $s3,$s3,$acc11
654
655 b Lenc_compact_loop
656.align 4
657Lenc_compact_done:
658 xor $s0,$s0,$t0
659 xor $s1,$s1,$t1
660 xor $s2,$s2,$t2
661 xor $s3,$s3,$t3
662 blr
663
664.globl .AES_decrypt
665.align 7
666.AES_decrypt:
667 mflr r0
668 $STU $sp,-$FRAME($sp)
669
670 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
671 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
672 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
673 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
674 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
675 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
676 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
677 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
678 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
679 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
680 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
681 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
682 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
683 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
684 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
685 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
686 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
687 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
688 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
689 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
690 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
691
692 lwz $s0,0($inp)
693 lwz $s1,4($inp)
694 lwz $s2,8($inp)
695 lwz $s3,12($inp)
696 bl LAES_Td
697 bl Lppc_AES_decrypt_compact
698 stw $s0,0($out)
699 stw $s1,4($out)
700 stw $s2,8($out)
701 stw $s3,12($out)
702
703 $POP r0,`$FRAME-$SIZE_T*21`($sp)
704 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
705 $POP r13,`$FRAME-$SIZE_T*19`($sp)
706 $POP r14,`$FRAME-$SIZE_T*18`($sp)
707 $POP r15,`$FRAME-$SIZE_T*17`($sp)
708 $POP r16,`$FRAME-$SIZE_T*16`($sp)
709 $POP r17,`$FRAME-$SIZE_T*15`($sp)
710 $POP r18,`$FRAME-$SIZE_T*14`($sp)
711 $POP r19,`$FRAME-$SIZE_T*13`($sp)
712 $POP r20,`$FRAME-$SIZE_T*12`($sp)
713 $POP r21,`$FRAME-$SIZE_T*11`($sp)
714 $POP r22,`$FRAME-$SIZE_T*10`($sp)
715 $POP r23,`$FRAME-$SIZE_T*9`($sp)
716 $POP r24,`$FRAME-$SIZE_T*8`($sp)
717 $POP r25,`$FRAME-$SIZE_T*7`($sp)
718 $POP r26,`$FRAME-$SIZE_T*6`($sp)
719 $POP r27,`$FRAME-$SIZE_T*5`($sp)
720 $POP r28,`$FRAME-$SIZE_T*4`($sp)
721 $POP r29,`$FRAME-$SIZE_T*3`($sp)
722 $POP r30,`$FRAME-$SIZE_T*2`($sp)
723 $POP r31,`$FRAME-$SIZE_T*1`($sp)
724 mtlr r0
725 addi $sp,$sp,$FRAME
726 blr
727
728.align 4
729Lppc_AES_decrypt:
730 lwz $acc00,240($key)
731 lwz $t0,0($key)
732 lwz $t1,4($key)
733 lwz $t2,8($key)
734 lwz $t3,12($key)
735 addi $Tbl1,$Tbl0,3
736 addi $Tbl2,$Tbl0,2
737 addi $Tbl3,$Tbl0,1
738 addi $acc00,$acc00,-1
739 addi $key,$key,16
740 xor $s0,$s0,$t0
741 xor $s1,$s1,$t1
742 xor $s2,$s2,$t2
743 xor $s3,$s3,$t3
744 mtctr $acc00
745.align 4
746Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28
749 lwz $t0,0($key)
750 lwz $t1,4($key)
751 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28
753 lwz $t2,8($key)
754 lwz $t3,12($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28
757 lwzx $acc00,$Tbl0,$acc00
758 lwzx $acc01,$Tbl0,$acc01
759 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28
761 lwzx $acc02,$Tbl0,$acc02
762 lwzx $acc03,$Tbl0,$acc03
763 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28
765 lwzx $acc04,$Tbl1,$acc04
766 lwzx $acc05,$Tbl1,$acc05
767 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28
769 lwzx $acc06,$Tbl1,$acc06
770 lwzx $acc07,$Tbl1,$acc07
771 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28
773 lwzx $acc08,$Tbl2,$acc08
774 lwzx $acc09,$Tbl2,$acc09
775 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28
777 lwzx $acc10,$Tbl2,$acc10
778 lwzx $acc11,$Tbl2,$acc11
779 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01
781 lwzx $acc12,$Tbl3,$acc12
782 lwzx $acc13,$Tbl3,$acc13
783 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03
785 lwzx $acc14,$Tbl3,$acc14
786 lwzx $acc15,$Tbl3,$acc15
787 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05
789 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08
792 xor $t1,$t1,$acc09
793 xor $t2,$t2,$acc10
794 xor $t3,$t3,$acc11
795 xor $s0,$t0,$acc12
796 xor $s1,$t1,$acc13
797 xor $s2,$t2,$acc14
798 xor $s3,$t3,$acc15
799 addi $key,$key,16
800 bdnz- Ldec_loop
801
802 addi $Tbl2,$Tbl0,2048
803 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
814 lwz $t0,0($key)
815 lwz $t1,4($key)
816 rlwinm $acc02,$s2,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31
818 lwz $t2,8($key)
819 lwz $t3,12($key)
820 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31
822 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01
824 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31
826 lbzx $acc02,$Tbl2,$acc02
827 lbzx $acc03,$Tbl2,$acc03
828 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31
830 lbzx $acc04,$Tbl2,$acc04
831 lbzx $acc05,$Tbl2,$acc05
832 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31
834 lbzx $acc06,$Tbl2,$acc06
835 lbzx $acc07,$Tbl2,$acc07
836 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31
838 lbzx $acc08,$Tbl2,$acc08
839 lbzx $acc09,$Tbl2,$acc09
840 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31
842 lbzx $acc10,$Tbl2,$acc10
843 lbzx $acc11,$Tbl2,$acc11
844 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7
846 lbzx $acc12,$Tbl2,$acc12
847 lbzx $acc13,$Tbl2,$acc13
848 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7
850 lbzx $acc14,$Tbl2,$acc14
851 lbzx $acc15,$Tbl2,$acc15
852 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15
854 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23
857 rlwimi $s1,$acc09,8,16,23
858 rlwimi $s2,$acc10,8,16,23
859 rlwimi $s3,$acc11,8,16,23
860 or $s0,$s0,$acc12
861 or $s1,$s1,$acc13
862 or $s2,$s2,$acc14
863 or $s3,$s3,$acc15
864 xor $s0,$s0,$t0
865 xor $s1,$s1,$t1
866 xor $s2,$s2,$t2
867 xor $s3,$s3,$t3
868 blr
869
870.align 4
871Lppc_AES_decrypt_compact:
872 lwz $acc00,240($key)
873 lwz $t0,0($key)
874 lwz $t1,4($key)
875 lwz $t2,8($key)
876 lwz $t3,12($key)
877 addi $Tbl1,$Tbl0,2048
878 lis $mask80,0x8080
879 lis $mask1b,0x1b1b
880 addi $key,$key,16
881 ori $mask80,$mask80,0x8080
882 ori $mask1b,$mask1b,0x1b1b
883___
884$code.=<<___ if ($SIZE_T==8);
885 insrdi $mask80,$mask80,32,0
886 insrdi $mask1b,$mask1b,32,0
887___
888$code.=<<___;
889 mtctr $acc00
890.align 4
891Ldec_compact_loop:
892 xor $s0,$s0,$t0
893 xor $s1,$s1,$t1
894 xor $s2,$s2,$t2
895 xor $s3,$s3,$t3
896 rlwinm $acc00,$s0,`32-24`,24,31
897 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
902 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
906 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31
908 lbzx $acc04,$Tbl1,$acc04
909 lbzx $acc05,$Tbl1,$acc05
910 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31
912 lbzx $acc06,$Tbl1,$acc06
913 lbzx $acc07,$Tbl1,$acc07
914 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31
916 lbzx $acc08,$Tbl1,$acc08
917 lbzx $acc09,$Tbl1,$acc09
918 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31
920 lbzx $acc10,$Tbl1,$acc10
921 lbzx $acc11,$Tbl1,$acc11
922 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31
924 lbzx $acc12,$Tbl1,$acc12
925 lbzx $acc13,$Tbl1,$acc13
926 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7
928 lbzx $acc14,$Tbl1,$acc14
929 lbzx $acc15,$Tbl1,$acc15
930 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7
932 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15
934 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23
937 rlwimi $s1,$acc09,8,16,23
938 rlwimi $s2,$acc10,8,16,23
939 rlwimi $s3,$acc11,8,16,23
940 lwz $t0,0($key)
941 lwz $t1,4($key)
942 or $s0,$s0,$acc12
943 or $s1,$s1,$acc13
944 lwz $t2,8($key)
945 lwz $t3,12($key)
946 or $s2,$s2,$acc14
947 or $s3,$s3,$acc15
948
949 addi $key,$key,16
950 bdz Ldec_compact_done
951___
952$code.=<<___ if ($SIZE_T==8);
953 # vectorized permutation improves decrypt performance by 10%
954 insrdi $s0,$s1,32,0
955 insrdi $s2,$s3,32,0
956
957 and $acc00,$s0,$mask80 # r1=r0&0x80808080
958 and $acc02,$s2,$mask80
959 srdi $acc04,$acc00,7 # r1>>7
960 srdi $acc06,$acc02,7
961 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
962 andc $acc10,$s2,$mask80
963 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
964 sub $acc02,$acc02,$acc06
965 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
966 add $acc10,$acc10,$acc10
967 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
968 and $acc02,$acc02,$mask1b
969 xor $acc00,$acc00,$acc08 # r2
970 xor $acc02,$acc02,$acc10
971
972 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
973 and $acc06,$acc02,$mask80
974 srdi $acc08,$acc04,7 # r1>>7
975 srdi $acc10,$acc06,7
976 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
977 andc $acc14,$acc02,$mask80
978 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
979 sub $acc06,$acc06,$acc10
980 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
981 add $acc14,$acc14,$acc14
982 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
983 and $acc06,$acc06,$mask1b
984 xor $acc04,$acc04,$acc12 # r4
985 xor $acc06,$acc06,$acc14
986
987 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
988 and $acc10,$acc06,$mask80
989 srdi $acc12,$acc08,7 # r1>>7
990 srdi $acc14,$acc10,7
991 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
992 sub $acc10,$acc10,$acc14
993 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
994 andc $acc14,$acc06,$mask80
995 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
996 add $acc14,$acc14,$acc14
997 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
998 and $acc10,$acc10,$mask1b
999 xor $acc08,$acc08,$acc12 # r8
1000 xor $acc10,$acc10,$acc14
1001
1002 xor $acc00,$acc00,$s0 # r2^r0
1003 xor $acc02,$acc02,$s2
1004 xor $acc04,$acc04,$s0 # r4^r0
1005 xor $acc06,$acc06,$s2
1006
1007 extrdi $acc01,$acc00,32,0
1008 extrdi $acc03,$acc02,32,0
1009 extrdi $acc05,$acc04,32,0
1010 extrdi $acc07,$acc06,32,0
1011 extrdi $acc09,$acc08,32,0
1012 extrdi $acc11,$acc10,32,0
1013___
1014$code.=<<___ if ($SIZE_T==4);
1015 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1016 and $acc01,$s1,$mask80
1017 and $acc02,$s2,$mask80
1018 and $acc03,$s3,$mask80
1019 srwi $acc04,$acc00,7 # r1>>7
1020 srwi $acc05,$acc01,7
1021 srwi $acc06,$acc02,7
1022 srwi $acc07,$acc03,7
1023 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1024 andc $acc09,$s1,$mask80
1025 andc $acc10,$s2,$mask80
1026 andc $acc11,$s3,$mask80
1027 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1028 sub $acc01,$acc01,$acc05
1029 sub $acc02,$acc02,$acc06
1030 sub $acc03,$acc03,$acc07
1031 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1032 add $acc09,$acc09,$acc09
1033 add $acc10,$acc10,$acc10
1034 add $acc11,$acc11,$acc11
1035 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1036 and $acc01,$acc01,$mask1b
1037 and $acc02,$acc02,$mask1b
1038 and $acc03,$acc03,$mask1b
1039 xor $acc00,$acc00,$acc08 # r2
1040 xor $acc01,$acc01,$acc09
1041 xor $acc02,$acc02,$acc10
1042 xor $acc03,$acc03,$acc11
1043
1044 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1045 and $acc05,$acc01,$mask80
1046 and $acc06,$acc02,$mask80
1047 and $acc07,$acc03,$mask80
1048 srwi $acc08,$acc04,7 # r1>>7
1049 srwi $acc09,$acc05,7
1050 srwi $acc10,$acc06,7
1051 srwi $acc11,$acc07,7
1052 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1053 andc $acc13,$acc01,$mask80
1054 andc $acc14,$acc02,$mask80
1055 andc $acc15,$acc03,$mask80
1056 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1057 sub $acc05,$acc05,$acc09
1058 sub $acc06,$acc06,$acc10
1059 sub $acc07,$acc07,$acc11
1060 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1061 add $acc13,$acc13,$acc13
1062 add $acc14,$acc14,$acc14
1063 add $acc15,$acc15,$acc15
1064 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1065 and $acc05,$acc05,$mask1b
1066 and $acc06,$acc06,$mask1b
1067 and $acc07,$acc07,$mask1b
1068 xor $acc04,$acc04,$acc12 # r4
1069 xor $acc05,$acc05,$acc13
1070 xor $acc06,$acc06,$acc14
1071 xor $acc07,$acc07,$acc15
1072
1073 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1074 and $acc09,$acc05,$mask80
1075 and $acc10,$acc06,$mask80
1076 and $acc11,$acc07,$mask80
1077 srwi $acc12,$acc08,7 # r1>>7
1078 srwi $acc13,$acc09,7
1079 srwi $acc14,$acc10,7
1080 srwi $acc15,$acc11,7
1081 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1082 sub $acc09,$acc09,$acc13
1083 sub $acc10,$acc10,$acc14
1084 sub $acc11,$acc11,$acc15
1085 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1086 andc $acc13,$acc05,$mask80
1087 andc $acc14,$acc06,$mask80
1088 andc $acc15,$acc07,$mask80
1089 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1090 add $acc13,$acc13,$acc13
1091 add $acc14,$acc14,$acc14
1092 add $acc15,$acc15,$acc15
1093 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1094 and $acc09,$acc09,$mask1b
1095 and $acc10,$acc10,$mask1b
1096 and $acc11,$acc11,$mask1b
1097 xor $acc08,$acc08,$acc12 # r8
1098 xor $acc09,$acc09,$acc13
1099 xor $acc10,$acc10,$acc14
1100 xor $acc11,$acc11,$acc15
1101
1102 xor $acc00,$acc00,$s0 # r2^r0
1103 xor $acc01,$acc01,$s1
1104 xor $acc02,$acc02,$s2
1105 xor $acc03,$acc03,$s3
1106 xor $acc04,$acc04,$s0 # r4^r0
1107 xor $acc05,$acc05,$s1
1108 xor $acc06,$acc06,$s2
1109 xor $acc07,$acc07,$s3
1110___
1111$code.=<<___;
1112 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1113 rotrwi $s1,$s1,8
1114 rotrwi $s2,$s2,8
1115 rotrwi $s3,$s3,8
1116 xor $s0,$s0,$acc00 # ^= r2^r0
1117 xor $s1,$s1,$acc01
1118 xor $s2,$s2,$acc02
1119 xor $s3,$s3,$acc03
1120 xor $acc00,$acc00,$acc08
1121 xor $acc01,$acc01,$acc09
1122 xor $acc02,$acc02,$acc10
1123 xor $acc03,$acc03,$acc11
1124 xor $s0,$s0,$acc04 # ^= r4^r0
1125 xor $s1,$s1,$acc05
1126 xor $s2,$s2,$acc06
1127 xor $s3,$s3,$acc07
1128 rotrwi $acc00,$acc00,24
1129 rotrwi $acc01,$acc01,24
1130 rotrwi $acc02,$acc02,24
1131 rotrwi $acc03,$acc03,24
1132 xor $acc04,$acc04,$acc08
1133 xor $acc05,$acc05,$acc09
1134 xor $acc06,$acc06,$acc10
1135 xor $acc07,$acc07,$acc11
1136 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137 xor $s1,$s1,$acc09
1138 xor $s2,$s2,$acc10
1139 xor $s3,$s3,$acc11
1140 rotrwi $acc04,$acc04,16
1141 rotrwi $acc05,$acc05,16
1142 rotrwi $acc06,$acc06,16
1143 rotrwi $acc07,$acc07,16
1144 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1145 xor $s1,$s1,$acc01
1146 xor $s2,$s2,$acc02
1147 xor $s3,$s3,$acc03
1148 rotrwi $acc08,$acc08,8
1149 rotrwi $acc09,$acc09,8
1150 rotrwi $acc10,$acc10,8
1151 rotrwi $acc11,$acc11,8
1152 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1153 xor $s1,$s1,$acc05
1154 xor $s2,$s2,$acc06
1155 xor $s3,$s3,$acc07
1156 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1157 xor $s1,$s1,$acc09
1158 xor $s2,$s2,$acc10
1159 xor $s3,$s3,$acc11
1160
1161 b Ldec_compact_loop
1162.align 4
1163Ldec_compact_done:
1164 xor $s0,$s0,$t0
1165 xor $s1,$s1,$t1
1166 xor $s2,$s2,$t2
1167 xor $s3,$s3,$t3
1168 blr
1169.long 0
1170.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align 7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl
deleted file mode 100644
index 4b27afd92f..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-s390x.pl
+++ /dev/null
@@ -1,1333 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for s390x.
11
12# April 2007.
13#
14# Software performance improvement over gcc-generated code is ~70% and
15# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17# *strictly* in-order execution and issued instruction [in this case
18# load value from memory is critical] has to complete before execution
19# flow proceeds. S-boxes are compressed to 2KB[+256B].
20#
21# As for hardware acceleration support. It's basically a "teaser," as
22# it can and should be improved in several ways. Most notably support
23# for CBC is not utilized, nor multiple blocks are ever processed.
24# Then software key schedule can be postponed till hardware support
25# detection... Performance improvement over assembler is reportedly
26# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27# support is implemented.
28
29# May 2007.
30#
31# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32# for 128-bit keys, if hardware support is detected.
33
34# Januray 2009.
35#
36# Add support for hardware AES192/256 and reschedule instructions to
37# minimize/avoid Address Generation Interlock hazard and to favour
38# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39# almost 50% on z9. The gain is smaller on z10, because being dual-
40# issue z10 makes it improssible to eliminate the interlock condition:
41# critial path is not long enough. Yet it spends ~24 cycles per byte
42# processed with 128-bit key.
43#
44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits.
48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized.
52
53$softonly=0; # allow hardware support
54
55$t0="%r0"; $mask="%r0";
56$t1="%r1";
57$t2="%r2"; $inp="%r2";
58$t3="%r3"; $out="%r3"; $bits="%r3";
59$key="%r4";
60$i1="%r5";
61$i2="%r6";
62$i3="%r7";
63$s0="%r8";
64$s1="%r9";
65$s2="%r10";
66$s3="%r11";
67$tbl="%r12";
68$rounds="%r13";
69$ra="%r14";
70$sp="%r15";
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code=<<___;
78.text
79
80.type AES_Te,\@object
81.align 256
82AES_Te:
83___
84&_data_word(
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
149$code.=<<___;
150# Te4[256]
151.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183# rcon[]
184.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
187.align 256
188.size AES_Te,.-AES_Te
189
190# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191# const AES_KEY *key) {
192.globl AES_encrypt
193.type AES_encrypt,\@function
194AES_encrypt:
195___
196$code.=<<___ if (!$softonly);
197 l %r0,240($key)
198 lhi %r1,16
199 clr %r0,%r1
200 jl .Lesoft
201
202 la %r1,0($key)
203 #la %r2,0($inp)
204 la %r4,0($out)
205 lghi %r3,16 # single block length
206 .long 0xb92e0042 # km %r4,%r2
207 brc 1,.-4 # can this happen?
208 br %r14
209.align 64
210.Lesoft:
211___
212$code.=<<___;
213 stmg %r3,$ra,24($sp)
214
215 llgf $s0,0($inp)
216 llgf $s1,4($inp)
217 llgf $s2,8($inp)
218 llgf $s3,12($inp)
219
220 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt
222
223 lg $out,24($sp)
224 st $s0,0($out)
225 st $s1,4($out)
226 st $s2,8($out)
227 st $s3,12($out)
228
229 lmg %r6,$ra,48($sp)
230 br $ra
231.size AES_encrypt,.-AES_encrypt
232
233.type _s390x_AES_encrypt,\@function
234.align 16
235_s390x_AES_encrypt:
236 stg $ra,152($sp)
237 x $s0,0($key)
238 x $s1,4($key)
239 x $s2,8($key)
240 x $s3,12($key)
241 l $rounds,240($key)
242 llill $mask,`0xff<<3`
243 aghi $rounds,-1
244 j .Lenc_loop
245.align 16
246.Lenc_loop:
247 sllg $t1,$s0,`0+3`
248 srlg $t2,$s0,`8-3`
249 srlg $t3,$s0,`16-3`
250 srl $s0,`24-3`
251 nr $s0,$mask
252 ngr $t1,$mask
253 nr $t2,$mask
254 nr $t3,$mask
255
256 srlg $i1,$s1,`16-3` # i0
257 sllg $i2,$s1,`0+3`
258 srlg $i3,$s1,`8-3`
259 srl $s1,`24-3`
260 nr $i1,$mask
261 nr $s1,$mask
262 ngr $i2,$mask
263 nr $i3,$mask
264
265 l $s0,0($s0,$tbl) # Te0[s0>>24]
266 l $t1,1($t1,$tbl) # Te3[s0>>0]
267 l $t2,2($t2,$tbl) # Te2[s0>>8]
268 l $t3,3($t3,$tbl) # Te1[s0>>16]
269
270 x $s0,3($i1,$tbl) # Te1[s1>>16]
271 l $s1,0($s1,$tbl) # Te0[s1>>24]
272 x $t2,1($i2,$tbl) # Te3[s1>>0]
273 x $t3,2($i3,$tbl) # Te2[s1>>8]
274
275 srlg $i1,$s2,`8-3` # i0
276 srlg $i2,$s2,`16-3` # i1
277 nr $i1,$mask
278 nr $i2,$mask
279 sllg $i3,$s2,`0+3`
280 srl $s2,`24-3`
281 nr $s2,$mask
282 ngr $i3,$mask
283
284 xr $s1,$t1
285 srlg $ra,$s3,`8-3` # i1
286 sllg $t1,$s3,`0+3` # i0
287 nr $ra,$mask
288 la $key,16($key)
289 ngr $t1,$mask
290
291 x $s0,2($i1,$tbl) # Te2[s2>>8]
292 x $s1,3($i2,$tbl) # Te1[s2>>16]
293 l $s2,0($s2,$tbl) # Te0[s2>>24]
294 x $t3,1($i3,$tbl) # Te3[s2>>0]
295
296 srlg $i3,$s3,`16-3` # i2
297 xr $s2,$t2
298 srl $s3,`24-3`
299 nr $i3,$mask
300 nr $s3,$mask
301
302 x $s0,0($key)
303 x $s1,4($key)
304 x $s2,8($key)
305 x $t3,12($key)
306
307 x $s0,1($t1,$tbl) # Te3[s3>>0]
308 x $s1,2($ra,$tbl) # Te2[s3>>8]
309 x $s2,3($i3,$tbl) # Te1[s3>>16]
310 l $s3,0($s3,$tbl) # Te0[s3>>24]
311 xr $s3,$t3
312
313 brct $rounds,.Lenc_loop
314 .align 16
315
316 sllg $t1,$s0,`0+3`
317 srlg $t2,$s0,`8-3`
318 ngr $t1,$mask
319 srlg $t3,$s0,`16-3`
320 srl $s0,`24-3`
321 nr $s0,$mask
322 nr $t2,$mask
323 nr $t3,$mask
324
325 srlg $i1,$s1,`16-3` # i0
326 sllg $i2,$s1,`0+3`
327 ngr $i2,$mask
328 srlg $i3,$s1,`8-3`
329 srl $s1,`24-3`
330 nr $i1,$mask
331 nr $s1,$mask
332 nr $i3,$mask
333
334 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
336 sll $s0,24
337 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
339 sll $t2,8
340 sll $t3,16
341
342 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
346 sll $i1,16
347 sll $s1,24
348 sll $i3,8
349 or $s0,$i1
350 or $s1,$t1
351 or $t2,$i2
352 or $t3,$i3
353
354 srlg $i1,$s2,`8-3` # i0
355 srlg $i2,$s2,`16-3` # i1
356 nr $i1,$mask
357 nr $i2,$mask
358 sllg $i3,$s2,`0+3`
359 srl $s2,`24-3`
360 ngr $i3,$mask
361 nr $s2,$mask
362
363 sllg $t1,$s3,`0+3` # i0
364 srlg $ra,$s3,`8-3` # i1
365 ngr $t1,$mask
366
367 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
369 sll $i1,8
370 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
372 sll $i2,16
373 nr $ra,$mask
374 sll $s2,24
375 or $s0,$i1
376 or $s1,$i2
377 or $s2,$t2
378 or $t3,$i3
379
380 srlg $i3,$s3,`16-3` # i2
381 srl $s3,`24-3`
382 nr $i3,$mask
383 nr $s3,$mask
384
385 l $t0,16($key)
386 l $t2,20($key)
387
388 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
392 sll $i2,8
393 sll $i3,16
394 sll $s3,24
395 or $s0,$i1
396 or $s1,$i2
397 or $s2,$i3
398 or $s3,$t3
399
400 lg $ra,152($sp)
401 xr $s0,$t0
402 xr $s1,$t2
403 x $s2,24($key)
404 x $s3,28($key)
405
406 br $ra
407.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
408___
409
410$code.=<<___;
411.type AES_Td,\@object
412.align 256
413AES_Td:
414___
415&_data_word(
416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
480$code.=<<___;
481# Td4[256]
482.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514.size AES_Td,.-AES_Td
515
516# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517# const AES_KEY *key) {
518.globl AES_decrypt
519.type AES_decrypt,\@function
520AES_decrypt:
521___
522$code.=<<___ if (!$softonly);
523 l %r0,240($key)
524 lhi %r1,16
525 clr %r0,%r1
526 jl .Ldsoft
527
528 la %r1,0($key)
529 #la %r2,0($inp)
530 la %r4,0($out)
531 lghi %r3,16 # single block length
532 .long 0xb92e0042 # km %r4,%r2
533 brc 1,.-4 # can this happen?
534 br %r14
535.align 64
536.Ldsoft:
537___
538$code.=<<___;
539 stmg %r3,$ra,24($sp)
540
541 llgf $s0,0($inp)
542 llgf $s1,4($inp)
543 llgf $s2,8($inp)
544 llgf $s3,12($inp)
545
546 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt
548
549 lg $out,24($sp)
550 st $s0,0($out)
551 st $s1,4($out)
552 st $s2,8($out)
553 st $s3,12($out)
554
555 lmg %r6,$ra,48($sp)
556 br $ra
557.size AES_decrypt,.-AES_decrypt
558
559.type _s390x_AES_decrypt,\@function
560.align 16
561_s390x_AES_decrypt:
562 stg $ra,152($sp)
563 x $s0,0($key)
564 x $s1,4($key)
565 x $s2,8($key)
566 x $s3,12($key)
567 l $rounds,240($key)
568 llill $mask,`0xff<<3`
569 aghi $rounds,-1
570 j .Ldec_loop
571.align 16
572.Ldec_loop:
573 srlg $t1,$s0,`16-3`
574 srlg $t2,$s0,`8-3`
575 sllg $t3,$s0,`0+3`
576 srl $s0,`24-3`
577 nr $s0,$mask
578 nr $t1,$mask
579 nr $t2,$mask
580 ngr $t3,$mask
581
582 sllg $i1,$s1,`0+3` # i0
583 srlg $i2,$s1,`16-3`
584 srlg $i3,$s1,`8-3`
585 srl $s1,`24-3`
586 ngr $i1,$mask
587 nr $s1,$mask
588 nr $i2,$mask
589 nr $i3,$mask
590
591 l $s0,0($s0,$tbl) # Td0[s0>>24]
592 l $t1,3($t1,$tbl) # Td1[s0>>16]
593 l $t2,2($t2,$tbl) # Td2[s0>>8]
594 l $t3,1($t3,$tbl) # Td3[s0>>0]
595
596 x $s0,1($i1,$tbl) # Td3[s1>>0]
597 l $s1,0($s1,$tbl) # Td0[s1>>24]
598 x $t2,3($i2,$tbl) # Td1[s1>>16]
599 x $t3,2($i3,$tbl) # Td2[s1>>8]
600
601 srlg $i1,$s2,`8-3` # i0
602 sllg $i2,$s2,`0+3` # i1
603 srlg $i3,$s2,`16-3`
604 srl $s2,`24-3`
605 nr $i1,$mask
606 ngr $i2,$mask
607 nr $s2,$mask
608 nr $i3,$mask
609
610 xr $s1,$t1
611 srlg $ra,$s3,`8-3` # i1
612 srlg $t1,$s3,`16-3` # i0
613 nr $ra,$mask
614 la $key,16($key)
615 nr $t1,$mask
616
617 x $s0,2($i1,$tbl) # Td2[s2>>8]
618 x $s1,1($i2,$tbl) # Td3[s2>>0]
619 l $s2,0($s2,$tbl) # Td0[s2>>24]
620 x $t3,3($i3,$tbl) # Td1[s2>>16]
621
622 sllg $i3,$s3,`0+3` # i2
623 srl $s3,`24-3`
624 ngr $i3,$mask
625 nr $s3,$mask
626
627 xr $s2,$t2
628 x $s0,0($key)
629 x $s1,4($key)
630 x $s2,8($key)
631 x $t3,12($key)
632
633 x $s0,3($t1,$tbl) # Td1[s3>>16]
634 x $s1,2($ra,$tbl) # Td2[s3>>8]
635 x $s2,1($i3,$tbl) # Td3[s3>>0]
636 l $s3,0($s3,$tbl) # Td0[s3>>24]
637 xr $s3,$t3
638
639 brct $rounds,.Ldec_loop
640 .align 16
641
642 l $t1,`2048+0`($tbl) # prefetch Td4
643 l $t2,`2048+64`($tbl)
644 l $t3,`2048+128`($tbl)
645 l $i1,`2048+192`($tbl)
646 llill $mask,0xff
647
648 srlg $i3,$s0,24 # i0
649 srlg $t1,$s0,16
650 srlg $t2,$s0,8
651 nr $s0,$mask # i3
652 nr $t1,$mask
653
654 srlg $i1,$s1,24
655 nr $t2,$mask
656 srlg $i2,$s1,16
657 srlg $ra,$s1,8
658 nr $s1,$mask # i0
659 nr $i2,$mask
660 nr $ra,$mask
661
662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
665 sll $t1,16
666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
667 sllg $s0,$i3,24
668 sll $t2,8
669
670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
673 sll $i1,24
674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
675 sll $i2,16
676 sll $i3,8
677 or $s0,$s1
678 or $t1,$i1
679 or $t2,$i2
680 or $t3,$i3
681
682 srlg $i1,$s2,8 # i0
683 srlg $i2,$s2,24
684 srlg $i3,$s2,16
685 nr $s2,$mask # i1
686 nr $i1,$mask
687 nr $i3,$mask
688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
692 sll $i1,8
693 sll $i2,24
694 or $s0,$i1
695 sll $i3,16
696 or $t2,$i2
697 or $t3,$i3
698
699 srlg $i1,$s3,16 # i0
700 srlg $i2,$s3,8 # i1
701 srlg $i3,$s3,24
702 nr $s3,$mask # i2
703 nr $i1,$mask
704 nr $i2,$mask
705
706 lg $ra,152($sp)
707 or $s1,$t1
708 l $t0,16($key)
709 l $t1,20($key)
710
711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
713 sll $i1,16
714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
716 sll $i2,8
717 sll $s3,24
718 or $s0,$i1
719 or $s1,$i2
720 or $s2,$t2
721 or $s3,$t3
722
723 xr $s0,$t0
724 xr $s1,$t1
725 x $s2,24($key)
726 x $s3,28($key)
727
728 br $ra
729.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
730___
731
732$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) {
735.globl AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function
737.align 16
738AES_set_encrypt_key:
739 lghi $t0,0
740 clgr $inp,$t0
741 je .Lminus1
742 clgr $key,$t0
743 je .Lminus1
744
745 lghi $t0,128
746 clr $bits,$t0
747 je .Lproceed
748 lghi $t0,192
749 clr $bits,$t0
750 je .Lproceed
751 lghi $t0,256
752 clr $bits,$t0
753 je .Lproceed
754 lghi %r2,-2
755 br %r14
756
757.align 16
758.Lproceed:
759___
760$code.=<<___ if (!$softonly);
761 # convert bits to km code, [128,192,256]->[18,19,20]
762 lhi %r5,-128
763 lhi %r0,18
764 ar %r5,$bits
765 srl %r5,6
766 ar %r5,%r0
767
768 lghi %r0,0 # query capability vector
769 la %r1,16($sp)
770 .long 0xb92f0042 # kmc %r4,%r2
771
772 llihh %r1,0x8000
773 srlg %r1,%r1,0(%r5)
774 ng %r1,16($sp)
775 jz .Lekey_internal
776
777 lmg %r0,%r1,0($inp) # just copy 128 bits...
778 stmg %r0,%r1,0($key)
779 lhi %r0,192
780 cr $bits,%r0
781 jl 1f
782 lg %r1,16($inp)
783 stg %r1,16($key)
784 je 1f
785 lg %r1,24($inp)
786 stg %r1,24($key)
7871: st $bits,236($key) # save bits
788 st %r5,240($key) # save km code
789 lghi %r2,0
790 br %r14
791___
792$code.=<<___;
793.align 16
794.Lekey_internal:
795 stmg %r6,%r13,48($sp) # all non-volatile regs
796
797 larl $tbl,AES_Te+2048
798
799 llgf $s0,0($inp)
800 llgf $s1,4($inp)
801 llgf $s2,8($inp)
802 llgf $s3,12($inp)
803 st $s0,0($key)
804 st $s1,4($key)
805 st $s2,8($key)
806 st $s3,12($key)
807 lghi $t0,128
808 cr $bits,$t0
809 jne .Lnot128
810
811 llill $mask,0xff
812 lghi $t3,0 # i=0
813 lghi $rounds,10
814 st $rounds,240($key)
815
816 llgfr $t2,$s3 # temp=rk[3]
817 srlg $i1,$s3,8
818 srlg $i2,$s3,16
819 srlg $i3,$s3,24
820 nr $t2,$mask
821 nr $i1,$mask
822 nr $i2,$mask
823
824.align 16
825.L128_loop:
826 la $t2,0($t2,$tbl)
827 la $i1,0($i1,$tbl)
828 la $i2,0($i2,$tbl)
829 la $i3,0($i3,$tbl)
830 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833 icm $t2,1,0($i3) # Te4[rk[3]>>24]
834 x $t2,256($t3,$tbl) # rcon[i]
835 xr $s0,$t2 # rk[4]=rk[0]^...
836 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
839
840 llgfr $t2,$s3 # temp=rk[3]
841 srlg $i1,$s3,8
842 srlg $i2,$s3,16
843 nr $t2,$mask
844 nr $i1,$mask
845 srlg $i3,$s3,24
846 nr $i2,$mask
847
848 st $s0,16($key)
849 st $s1,20($key)
850 st $s2,24($key)
851 st $s3,28($key)
852 la $key,16($key) # key+=4
853 la $t3,4($t3) # i++
854 brct $rounds,.L128_loop
855 lghi %r2,0
856 lmg %r6,%r13,48($sp)
857 br $ra
858
859.align 16
860.Lnot128:
861 llgf $t0,16($inp)
862 llgf $t1,20($inp)
863 st $t0,16($key)
864 st $t1,20($key)
865 lghi $t0,192
866 cr $bits,$t0
867 jne .Lnot192
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,12
872 st $rounds,240($key)
873 lghi $rounds,8
874
875 srlg $i1,$t1,8
876 srlg $i2,$t1,16
877 srlg $i3,$t1,24
878 nr $t1,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L192_loop:
884 la $t1,0($t1,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891 icm $t1,1,0($i3) # Te4[rk[5]>>24]
892 x $t1,256($t3,$tbl) # rcon[i]
893 xr $s0,$t1 # rk[6]=rk[0]^...
894 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
897
898 st $s0,24($key)
899 st $s1,28($key)
900 st $s2,32($key)
901 st $s3,36($key)
902 brct $rounds,.L192_continue
903 lghi %r2,0
904 lmg %r6,%r13,48($sp)
905 br $ra
906
907.align 16
908.L192_continue:
909 lgr $t1,$s3
910 x $t1,16($key) # rk[10]=rk[4]^rk[9]
911 st $t1,40($key)
912 x $t1,20($key) # rk[11]=rk[5]^rk[10]
913 st $t1,44($key)
914
915 srlg $i1,$t1,8
916 srlg $i2,$t1,16
917 srlg $i3,$t1,24
918 nr $t1,$mask
919 nr $i1,$mask
920 nr $i2,$mask
921
922 la $key,24($key) # key+=6
923 la $t3,4($t3) # i++
924 j .L192_loop
925
926.align 16
927.Lnot192:
928 llgf $t0,24($inp)
929 llgf $t1,28($inp)
930 st $t0,24($key)
931 st $t1,28($key)
932 llill $mask,0xff
933 lghi $t3,0 # i=0
934 lghi $rounds,14
935 st $rounds,240($key)
936 lghi $rounds,7
937
938 srlg $i1,$t1,8
939 srlg $i2,$t1,16
940 srlg $i3,$t1,24
941 nr $t1,$mask
942 nr $i1,$mask
943 nr $i2,$mask
944
945.align 16
946.L256_loop:
947 la $t1,0($t1,$tbl)
948 la $i1,0($i1,$tbl)
949 la $i2,0($i2,$tbl)
950 la $i3,0($i3,$tbl)
951 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954 icm $t1,1,0($i3) # Te4[rk[7]>>24]
955 x $t1,256($t3,$tbl) # rcon[i]
956 xr $s0,$t1 # rk[8]=rk[0]^...
957 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
960 st $s0,32($key)
961 st $s1,36($key)
962 st $s2,40($key)
963 st $s3,44($key)
964 brct $rounds,.L256_continue
965 lghi %r2,0
966 lmg %r6,%r13,48($sp)
967 br $ra
968
969.align 16
970.L256_continue:
971 lgr $t1,$s3 # temp=rk[11]
972 srlg $i1,$s3,8
973 srlg $i2,$s3,16
974 srlg $i3,$s3,24
975 nr $t1,$mask
976 nr $i1,$mask
977 nr $i2,$mask
978 la $t1,0($t1,$tbl)
979 la $i1,0($i1,$tbl)
980 la $i2,0($i2,$tbl)
981 la $i3,0($i3,$tbl)
982 llgc $t1,0($t1) # Te4[rk[11]>>0]
983 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986 x $t1,16($key) # rk[12]=rk[4]^...
987 st $t1,48($key)
988 x $t1,20($key) # rk[13]=rk[5]^rk[12]
989 st $t1,52($key)
990 x $t1,24($key) # rk[14]=rk[6]^rk[13]
991 st $t1,56($key)
992 x $t1,28($key) # rk[15]=rk[7]^rk[14]
993 st $t1,60($key)
994
995 srlg $i1,$t1,8
996 srlg $i2,$t1,16
997 srlg $i3,$t1,24
998 nr $t1,$mask
999 nr $i1,$mask
1000 nr $i2,$mask
1001
1002 la $key,32($key) # key+=8
1003 la $t3,4($t3) # i++
1004 j .L256_loop
1005
1006.Lminus1:
1007 lghi %r2,-1
1008 br $ra
1009.size AES_set_encrypt_key,.-AES_set_encrypt_key
1010
1011# void AES_set_decrypt_key(const unsigned char *in, int bits,
1012# AES_KEY *key) {
1013.globl AES_set_decrypt_key
1014.type AES_set_decrypt_key,\@function
1015.align 16
1016AES_set_decrypt_key:
1017 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018 stg $ra,112($sp) # save non-volatile registers!
1019 bras $ra,AES_set_encrypt_key
1020 lg $key,32($sp)
1021 lg $ra,112($sp)
1022 ltgr %r2,%r2
1023 bnzr $ra
1024___
1025$code.=<<___ if (!$softonly);
1026 l $t0,240($key)
1027 lhi $t1,16
1028 cr $t0,$t1
1029 jl .Lgo
1030 oill $t0,0x80 # set "decrypt" bit
1031 st $t0,240($key)
1032 br $ra
1033
1034.align 16
1035.Ldkey_internal:
1036 stg $key,32($sp)
1037 stg $ra,40($sp)
1038 bras $ra,.Lekey_internal
1039 lg $key,32($sp)
1040 lg $ra,40($sp)
1041___
1042$code.=<<___;
1043
1044.Lgo: llgf $rounds,240($key)
1045 la $i1,0($key)
1046 sllg $i2,$rounds,4
1047 la $i2,0($i2,$key)
1048 srl $rounds,1
1049 lghi $t1,-16
1050
1051.align 16
1052.Linv: lmg $s0,$s1,0($i1)
1053 lmg $s2,$s3,0($i2)
1054 stmg $s0,$s1,0($i2)
1055 stmg $s2,$s3,0($i1)
1056 la $i1,16($i1)
1057 la $i2,0($t1,$i2)
1058 brct $rounds,.Linv
1059___
1060$mask80=$i1;
1061$mask1b=$i2;
1062$maskfe=$i3;
1063$code.=<<___;
1064 llgf $rounds,240($key)
1065 aghi $rounds,-1
1066 sll $rounds,2 # (rounds-1)*4
1067 llilh $mask80,0x8080
1068 llilh $mask1b,0x1b1b
1069 llilh $maskfe,0xfefe
1070 oill $mask80,0x8080
1071 oill $mask1b,0x1b1b
1072 oill $maskfe,0xfefe
1073
1074.align 16
1075.Lmix: l $s0,16($key) # tp1
1076 lr $s1,$s0
1077 ngr $s1,$mask80
1078 srlg $t1,$s1,7
1079 slr $s1,$t1
1080 nr $s1,$mask1b
1081 sllg $t1,$s0,1
1082 nr $t1,$maskfe
1083 xr $s1,$t1 # tp2
1084
1085 lr $s2,$s1
1086 ngr $s2,$mask80
1087 srlg $t1,$s2,7
1088 slr $s2,$t1
1089 nr $s2,$mask1b
1090 sllg $t1,$s1,1
1091 nr $t1,$maskfe
1092 xr $s2,$t1 # tp4
1093
1094 lr $s3,$s2
1095 ngr $s3,$mask80
1096 srlg $t1,$s3,7
1097 slr $s3,$t1
1098 nr $s3,$mask1b
1099 sllg $t1,$s2,1
1100 nr $t1,$maskfe
1101 xr $s3,$t1 # tp8
1102
1103 xr $s1,$s0 # tp2^tp1
1104 xr $s2,$s0 # tp4^tp1
1105 rll $s0,$s0,24 # = ROTATE(tp1,8)
1106 xr $s2,$s3 # ^=tp8
1107 xr $s0,$s1 # ^=tp2^tp1
1108 xr $s1,$s3 # tp2^tp1^tp8
1109 xr $s0,$s2 # ^=tp4^tp1^tp8
1110 rll $s1,$s1,8
1111 rll $s2,$s2,16
1112 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1113 rll $s3,$s3,24
1114 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115 xr $s0,$s3 # ^= ROTATE(tp8,8)
1116
1117 st $s0,16($key)
1118 la $key,4($key)
1119 brct $rounds,.Lmix
1120
1121 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1122 lghi %r2,0
1123 br $ra
1124.size AES_set_decrypt_key,.-AES_set_decrypt_key
1125___
1126
1127#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128# size_t length, const AES_KEY *key,
1129# unsigned char *ivec, const int enc)
1130{
1131my $inp="%r2";
1132my $out="%r4"; # length and out are swapped
1133my $len="%r3";
1134my $key="%r5";
1135my $ivp="%r6";
1136
1137$code.=<<___;
1138.globl AES_cbc_encrypt
1139.type AES_cbc_encrypt,\@function
1140.align 16
1141AES_cbc_encrypt:
1142 xgr %r3,%r4 # flip %r3 and %r4, out and len
1143 xgr %r4,%r3
1144 xgr %r3,%r4
1145___
1146$code.=<<___ if (!$softonly);
1147 lhi %r0,16
1148 cl %r0,240($key)
1149 jh .Lcbc_software
1150
1151 lg %r0,0($ivp) # copy ivec
1152 lg %r1,8($ivp)
1153 stmg %r0,%r1,16($sp)
1154 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155 stmg %r0,%r1,32($sp)
1156 lmg %r0,%r1,16($key)
1157 stmg %r0,%r1,48($sp)
1158 l %r0,240($key) # load kmc code
1159 lghi $key,15 # res=len%16, len-=res;
1160 ngr $key,$len
1161 slgr $len,$key
1162 la %r1,16($sp) # parameter block - ivec || key
1163 jz .Lkmc_truncated
1164 .long 0xb92f0042 # kmc %r4,%r2
1165 brc 1,.-4 # pay attention to "partial completion"
1166 ltr $key,$key
1167 jnz .Lkmc_truncated
1168.Lkmc_done:
1169 lmg %r0,%r1,16($sp) # copy ivec to caller
1170 stg %r0,0($ivp)
1171 stg %r1,8($ivp)
1172 br $ra
1173.align 16
1174.Lkmc_truncated:
1175 ahi $key,-1 # it's the way it's encoded in mvc
1176 tmll %r0,0x80
1177 jnz .Lkmc_truncated_dec
1178 lghi %r1,0
1179 stg %r1,128($sp)
1180 stg %r1,136($sp)
1181 bras %r1,1f
1182 mvc 128(1,$sp),0($inp)
11831: ex $key,0(%r1)
1184 la %r1,16($sp) # restore parameter block
1185 la $inp,128($sp)
1186 lghi $len,16
1187 .long 0xb92f0042 # kmc %r4,%r2
1188 j .Lkmc_done
1189.align 16
1190.Lkmc_truncated_dec:
1191 stg $out,64($sp)
1192 la $out,128($sp)
1193 lghi $len,16
1194 .long 0xb92f0042 # kmc %r4,%r2
1195 lg $out,64($sp)
1196 bras %r1,2f
1197 mvc 0(1,$out),128($sp)
11982: ex $key,0(%r1)
1199 j .Lkmc_done
1200.align 16
1201.Lcbc_software:
1202___
1203$code.=<<___;
1204 stmg $key,$ra,40($sp)
1205 lhi %r0,0
1206 cl %r0,164($sp)
1207 je .Lcbc_decrypt
1208
1209 larl $tbl,AES_Te
1210
1211 llgf $s0,0($ivp)
1212 llgf $s1,4($ivp)
1213 llgf $s2,8($ivp)
1214 llgf $s3,12($ivp)
1215
1216 lghi $t0,16
1217 slgr $len,$t0
1218 brc 4,.Lcbc_enc_tail # if borrow
1219.Lcbc_enc_loop:
1220 stmg $inp,$out,16($sp)
1221 x $s0,0($inp)
1222 x $s1,4($inp)
1223 x $s2,8($inp)
1224 x $s3,12($inp)
1225 lgr %r4,$key
1226
1227 bras $ra,_s390x_AES_encrypt
1228
1229 lmg $inp,$key,16($sp)
1230 st $s0,0($out)
1231 st $s1,4($out)
1232 st $s2,8($out)
1233 st $s3,12($out)
1234
1235 la $inp,16($inp)
1236 la $out,16($out)
1237 lghi $t0,16
1238 ltgr $len,$len
1239 jz .Lcbc_enc_done
1240 slgr $len,$t0
1241 brc 4,.Lcbc_enc_tail # if borrow
1242 j .Lcbc_enc_loop
1243.align 16
1244.Lcbc_enc_done:
1245 lg $ivp,48($sp)
1246 st $s0,0($ivp)
1247 st $s1,4($ivp)
1248 st $s2,8($ivp)
1249 st $s3,12($ivp)
1250
1251 lmg %r7,$ra,56($sp)
1252 br $ra
1253
1254.align 16
1255.Lcbc_enc_tail:
1256 aghi $len,15
1257 lghi $t0,0
1258 stg $t0,128($sp)
1259 stg $t0,136($sp)
1260 bras $t1,3f
1261 mvc 128(1,$sp),0($inp)
12623: ex $len,0($t1)
1263 lghi $len,0
1264 la $inp,128($sp)
1265 j .Lcbc_enc_loop
1266
1267.align 16
1268.Lcbc_decrypt:
1269 larl $tbl,AES_Td
1270
1271 lg $t0,0($ivp)
1272 lg $t1,8($ivp)
1273 stmg $t0,$t1,128($sp)
1274
1275.Lcbc_dec_loop:
1276 stmg $inp,$out,16($sp)
1277 llgf $s0,0($inp)
1278 llgf $s1,4($inp)
1279 llgf $s2,8($inp)
1280 llgf $s3,12($inp)
1281 lgr %r4,$key
1282
1283 bras $ra,_s390x_AES_decrypt
1284
1285 lmg $inp,$key,16($sp)
1286 sllg $s0,$s0,32
1287 sllg $s2,$s2,32
1288 lr $s0,$s1
1289 lr $s2,$s3
1290
1291 lg $t0,0($inp)
1292 lg $t1,8($inp)
1293 xg $s0,128($sp)
1294 xg $s2,136($sp)
1295 lghi $s1,16
1296 slgr $len,$s1
1297 brc 4,.Lcbc_dec_tail # if borrow
1298 brc 2,.Lcbc_dec_done # if zero
1299 stg $s0,0($out)
1300 stg $s2,8($out)
1301 stmg $t0,$t1,128($sp)
1302
1303 la $inp,16($inp)
1304 la $out,16($out)
1305 j .Lcbc_dec_loop
1306
1307.Lcbc_dec_done:
1308 stg $s0,0($out)
1309 stg $s2,8($out)
1310.Lcbc_dec_exit:
1311 lmg $ivp,$ra,48($sp)
1312 stmg $t0,$t1,0($ivp)
1313
1314 br $ra
1315
1316.align 16
1317.Lcbc_dec_tail:
1318 aghi $len,15
1319 stg $s0,128($sp)
1320 stg $s2,136($sp)
1321 bras $s1,4f
1322 mvc 0(1,$out),128($sp)
13234: ex $len,0($s1)
1324 j .Lcbc_dec_exit
1325.size AES_cbc_encrypt,.-AES_cbc_encrypt
1326___
1327}
1328$code.=<<___;
1329.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1330___
1331
1332$code =~ s/\`([^\`]*)\`/eval $1/gem;
1333print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl b/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
deleted file mode 100755
index c57b3a2d6d..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
+++ /dev/null
@@ -1,1181 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.1
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
33$bits=32;
34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
35if ($bits==64) { $bias=2047; $frame=192; }
36else { $bias=0; $frame=112; }
37$locals=16;
38
39$acc0="%l0";
40$acc1="%o0";
41$acc2="%o1";
42$acc3="%o2";
43
44$acc4="%l1";
45$acc5="%o3";
46$acc6="%o4";
47$acc7="%o5";
48
49$acc8="%l2";
50$acc9="%o7";
51$acc10="%g1";
52$acc11="%g2";
53
54$acc12="%l3";
55$acc13="%g3";
56$acc14="%g4";
57$acc15="%g5";
58
59$t0="%l4";
60$t1="%l5";
61$t2="%l6";
62$t3="%l7";
63
64$s0="%i0";
65$s1="%i1";
66$s2="%i2";
67$s3="%i3";
68$tbl="%i4";
69$key="%i5";
70$rounds="%i7"; # aliases with return address, which is off-loaded to stack
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code.=<<___ if ($bits==64);
78.register %g2,#scratch
79.register %g3,#scratch
80___
81$code.=<<___;
82.section ".text",#alloc,#execinstr
83
84.align 256
85AES_Te:
86___
87&_data_word(
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
152$code.=<<___;
153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
185.type AES_Te,#object
186.size AES_Te,(.-AES_Te)
187
188.align 64
189.skip 16
190_sparcv9_AES_encrypt:
191 save %sp,-$frame-$locals,%sp
192 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
193 ld [$key+240],$rounds
194 ld [$key+0],$t0
195 ld [$key+4],$t1 !
196 ld [$key+8],$t2
197 srl $rounds,1,$rounds
198 xor $t0,$s0,$s0
199 ld [$key+12],$t3
200 srl $s0,21,$acc0
201 xor $t1,$s1,$s1
202 ld [$key+16],$t0
203 srl $s1,13,$acc1 !
204 xor $t2,$s2,$s2
205 ld [$key+20],$t1
206 xor $t3,$s3,$s3
207 ld [$key+24],$t2
208 and $acc0,2040,$acc0
209 ld [$key+28],$t3
210 nop
211.Lenc_loop:
212 srl $s2,5,$acc2 !
213 and $acc1,2040,$acc1
214 ldx [$tbl+$acc0],$acc0
215 sll $s3,3,$acc3
216 and $acc2,2040,$acc2
217 ldx [$tbl+$acc1],$acc1
218 srl $s1,21,$acc4
219 and $acc3,2040,$acc3
220 ldx [$tbl+$acc2],$acc2 !
221 srl $s2,13,$acc5
222 and $acc4,2040,$acc4
223 ldx [$tbl+$acc3],$acc3
224 srl $s3,5,$acc6
225 and $acc5,2040,$acc5
226 ldx [$tbl+$acc4],$acc4
227 fmovs %f0,%f0
228 sll $s0,3,$acc7 !
229 and $acc6,2040,$acc6
230 ldx [$tbl+$acc5],$acc5
231 srl $s2,21,$acc8
232 and $acc7,2040,$acc7
233 ldx [$tbl+$acc6],$acc6
234 srl $s3,13,$acc9
235 and $acc8,2040,$acc8
236 ldx [$tbl+$acc7],$acc7 !
237 srl $s0,5,$acc10
238 and $acc9,2040,$acc9
239 ldx [$tbl+$acc8],$acc8
240 sll $s1,3,$acc11
241 and $acc10,2040,$acc10
242 ldx [$tbl+$acc9],$acc9
243 fmovs %f0,%f0
244 srl $s3,21,$acc12 !
245 and $acc11,2040,$acc11
246 ldx [$tbl+$acc10],$acc10
247 srl $s0,13,$acc13
248 and $acc12,2040,$acc12
249 ldx [$tbl+$acc11],$acc11
250 srl $s1,5,$acc14
251 and $acc13,2040,$acc13
252 ldx [$tbl+$acc12],$acc12 !
253 sll $s2,3,$acc15
254 and $acc14,2040,$acc14
255 ldx [$tbl+$acc13],$acc13
256 and $acc15,2040,$acc15
257 add $key,32,$key
258 ldx [$tbl+$acc14],$acc14
259 fmovs %f0,%f0
260 subcc $rounds,1,$rounds !
261 ldx [$tbl+$acc15],$acc15
262 bz,a,pn %icc,.Lenc_last
263 add $tbl,2048,$rounds
264
265 srlx $acc1,8,$acc1
266 xor $acc0,$t0,$t0
267 ld [$key+0],$s0
268 fmovs %f0,%f0
269 srlx $acc2,16,$acc2 !
270 xor $acc1,$t0,$t0
271 ld [$key+4],$s1
272 srlx $acc3,24,$acc3
273 xor $acc2,$t0,$t0
274 ld [$key+8],$s2
275 srlx $acc5,8,$acc5
276 xor $acc3,$t0,$t0
277 ld [$key+12],$s3 !
278 srlx $acc6,16,$acc6
279 xor $acc4,$t1,$t1
280 fmovs %f0,%f0
281 srlx $acc7,24,$acc7
282 xor $acc5,$t1,$t1
283 srlx $acc9,8,$acc9
284 xor $acc6,$t1,$t1
285 srlx $acc10,16,$acc10 !
286 xor $acc7,$t1,$t1
287 srlx $acc11,24,$acc11
288 xor $acc8,$t2,$t2
289 srlx $acc13,8,$acc13
290 xor $acc9,$t2,$t2
291 srlx $acc14,16,$acc14
292 xor $acc10,$t2,$t2
293 srlx $acc15,24,$acc15 !
294 xor $acc11,$t2,$t2
295 xor $acc12,$acc14,$acc14
296 xor $acc13,$t3,$t3
297 srl $t0,21,$acc0
298 xor $acc14,$t3,$t3
299 srl $t1,13,$acc1
300 xor $acc15,$t3,$t3
301
302 and $acc0,2040,$acc0 !
303 srl $t2,5,$acc2
304 and $acc1,2040,$acc1
305 ldx [$tbl+$acc0],$acc0
306 sll $t3,3,$acc3
307 and $acc2,2040,$acc2
308 ldx [$tbl+$acc1],$acc1
309 fmovs %f0,%f0
310 srl $t1,21,$acc4 !
311 and $acc3,2040,$acc3
312 ldx [$tbl+$acc2],$acc2
313 srl $t2,13,$acc5
314 and $acc4,2040,$acc4
315 ldx [$tbl+$acc3],$acc3
316 srl $t3,5,$acc6
317 and $acc5,2040,$acc5
318 ldx [$tbl+$acc4],$acc4 !
319 sll $t0,3,$acc7
320 and $acc6,2040,$acc6
321 ldx [$tbl+$acc5],$acc5
322 srl $t2,21,$acc8
323 and $acc7,2040,$acc7
324 ldx [$tbl+$acc6],$acc6
325 fmovs %f0,%f0
326 srl $t3,13,$acc9 !
327 and $acc8,2040,$acc8
328 ldx [$tbl+$acc7],$acc7
329 srl $t0,5,$acc10
330 and $acc9,2040,$acc9
331 ldx [$tbl+$acc8],$acc8
332 sll $t1,3,$acc11
333 and $acc10,2040,$acc10
334 ldx [$tbl+$acc9],$acc9 !
335 srl $t3,21,$acc12
336 and $acc11,2040,$acc11
337 ldx [$tbl+$acc10],$acc10
338 srl $t0,13,$acc13
339 and $acc12,2040,$acc12
340 ldx [$tbl+$acc11],$acc11
341 fmovs %f0,%f0
342 srl $t1,5,$acc14 !
343 and $acc13,2040,$acc13
344 ldx [$tbl+$acc12],$acc12
345 sll $t2,3,$acc15
346 and $acc14,2040,$acc14
347 ldx [$tbl+$acc13],$acc13
348 srlx $acc1,8,$acc1
349 and $acc15,2040,$acc15
350 ldx [$tbl+$acc14],$acc14 !
351
352 srlx $acc2,16,$acc2
353 xor $acc0,$s0,$s0
354 ldx [$tbl+$acc15],$acc15
355 srlx $acc3,24,$acc3
356 xor $acc1,$s0,$s0
357 ld [$key+16],$t0
358 fmovs %f0,%f0
359 srlx $acc5,8,$acc5 !
360 xor $acc2,$s0,$s0
361 ld [$key+20],$t1
362 srlx $acc6,16,$acc6
363 xor $acc3,$s0,$s0
364 ld [$key+24],$t2
365 srlx $acc7,24,$acc7
366 xor $acc4,$s1,$s1
367 ld [$key+28],$t3 !
368 srlx $acc9,8,$acc9
369 xor $acc5,$s1,$s1
370 ldx [$tbl+2048+0],%g0 ! prefetch te4
371 srlx $acc10,16,$acc10
372 xor $acc6,$s1,$s1
373 ldx [$tbl+2048+32],%g0 ! prefetch te4
374 srlx $acc11,24,$acc11
375 xor $acc7,$s1,$s1
376 ldx [$tbl+2048+64],%g0 ! prefetch te4
377 srlx $acc13,8,$acc13
378 xor $acc8,$s2,$s2
379 ldx [$tbl+2048+96],%g0 ! prefetch te4
380 srlx $acc14,16,$acc14 !
381 xor $acc9,$s2,$s2
382 ldx [$tbl+2048+128],%g0 ! prefetch te4
383 srlx $acc15,24,$acc15
384 xor $acc10,$s2,$s2
385 ldx [$tbl+2048+160],%g0 ! prefetch te4
386 srl $s0,21,$acc0
387 xor $acc11,$s2,$s2
388 ldx [$tbl+2048+192],%g0 ! prefetch te4
389 xor $acc12,$acc14,$acc14
390 xor $acc13,$s3,$s3
391 ldx [$tbl+2048+224],%g0 ! prefetch te4
392 srl $s1,13,$acc1 !
393 xor $acc14,$s3,$s3
394 xor $acc15,$s3,$s3
395 ba .Lenc_loop
396 and $acc0,2040,$acc0
397
398.align 32
399.Lenc_last:
400 srlx $acc1,8,$acc1 !
401 xor $acc0,$t0,$t0
402 ld [$key+0],$s0
403 srlx $acc2,16,$acc2
404 xor $acc1,$t0,$t0
405 ld [$key+4],$s1
406 srlx $acc3,24,$acc3
407 xor $acc2,$t0,$t0
408 ld [$key+8],$s2 !
409 srlx $acc5,8,$acc5
410 xor $acc3,$t0,$t0
411 ld [$key+12],$s3
412 srlx $acc6,16,$acc6
413 xor $acc4,$t1,$t1
414 srlx $acc7,24,$acc7
415 xor $acc5,$t1,$t1
416 srlx $acc9,8,$acc9 !
417 xor $acc6,$t1,$t1
418 srlx $acc10,16,$acc10
419 xor $acc7,$t1,$t1
420 srlx $acc11,24,$acc11
421 xor $acc8,$t2,$t2
422 srlx $acc13,8,$acc13
423 xor $acc9,$t2,$t2
424 srlx $acc14,16,$acc14 !
425 xor $acc10,$t2,$t2
426 srlx $acc15,24,$acc15
427 xor $acc11,$t2,$t2
428 xor $acc12,$acc14,$acc14
429 xor $acc13,$t3,$t3
430 srl $t0,24,$acc0
431 xor $acc14,$t3,$t3
432 srl $t1,16,$acc1 !
433 xor $acc15,$t3,$t3
434
435 srl $t2,8,$acc2
436 and $acc1,255,$acc1
437 ldub [$rounds+$acc0],$acc0
438 srl $t1,24,$acc4
439 and $acc2,255,$acc2
440 ldub [$rounds+$acc1],$acc1
441 srl $t2,16,$acc5 !
442 and $t3,255,$acc3
443 ldub [$rounds+$acc2],$acc2
444 ldub [$rounds+$acc3],$acc3
445 srl $t3,8,$acc6
446 and $acc5,255,$acc5
447 ldub [$rounds+$acc4],$acc4
448 fmovs %f0,%f0
449 srl $t2,24,$acc8 !
450 and $acc6,255,$acc6
451 ldub [$rounds+$acc5],$acc5
452 srl $t3,16,$acc9
453 and $t0,255,$acc7
454 ldub [$rounds+$acc6],$acc6
455 ldub [$rounds+$acc7],$acc7
456 fmovs %f0,%f0
457 srl $t0,8,$acc10 !
458 and $acc9,255,$acc9
459 ldub [$rounds+$acc8],$acc8
460 srl $t3,24,$acc12
461 and $acc10,255,$acc10
462 ldub [$rounds+$acc9],$acc9
463 srl $t0,16,$acc13
464 and $t1,255,$acc11
465 ldub [$rounds+$acc10],$acc10 !
466 srl $t1,8,$acc14
467 and $acc13,255,$acc13
468 ldub [$rounds+$acc11],$acc11
469 ldub [$rounds+$acc12],$acc12
470 and $acc14,255,$acc14
471 ldub [$rounds+$acc13],$acc13
472 and $t2,255,$acc15
473 ldub [$rounds+$acc14],$acc14 !
474
475 sll $acc0,24,$acc0
476 xor $acc3,$s0,$s0
477 ldub [$rounds+$acc15],$acc15
478 sll $acc1,16,$acc1
479 xor $acc0,$s0,$s0
480 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
481 fmovs %f0,%f0
482 sll $acc2,8,$acc2 !
483 xor $acc1,$s0,$s0
484 sll $acc4,24,$acc4
485 xor $acc2,$s0,$s0
486 sll $acc5,16,$acc5
487 xor $acc7,$s1,$s1
488 sll $acc6,8,$acc6
489 xor $acc4,$s1,$s1
490 sll $acc8,24,$acc8 !
491 xor $acc5,$s1,$s1
492 sll $acc9,16,$acc9
493 xor $acc11,$s2,$s2
494 sll $acc10,8,$acc10
495 xor $acc6,$s1,$s1
496 sll $acc12,24,$acc12
497 xor $acc8,$s2,$s2
498 sll $acc13,16,$acc13 !
499 xor $acc9,$s2,$s2
500 sll $acc14,8,$acc14
501 xor $acc10,$s2,$s2
502 xor $acc12,$acc14,$acc14
503 xor $acc13,$s3,$s3
504 xor $acc14,$s3,$s3
505 xor $acc15,$s3,$s3
506
507 ret
508 restore
509.type _sparcv9_AES_encrypt,#function
510.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
511
512.align 32
513.globl AES_encrypt
514AES_encrypt:
515 or %o0,%o1,%g1
516 andcc %g1,3,%g0
517 bnz,pn %xcc,.Lunaligned_enc
518 save %sp,-$frame,%sp
519
520 ld [%i0+0],%o0
521 ld [%i0+4],%o1
522 ld [%i0+8],%o2
523 ld [%i0+12],%o3
524
5251: call .+8
526 add %o7,AES_Te-1b,%o4
527 call _sparcv9_AES_encrypt
528 mov %i2,%o5
529
530 st %o0,[%i1+0]
531 st %o1,[%i1+4]
532 st %o2,[%i1+8]
533 st %o3,[%i1+12]
534
535 ret
536 restore
537
538.align 32
539.Lunaligned_enc:
540 ldub [%i0+0],%l0
541 ldub [%i0+1],%l1
542 ldub [%i0+2],%l2
543
544 sll %l0,24,%l0
545 ldub [%i0+3],%l3
546 sll %l1,16,%l1
547 ldub [%i0+4],%l4
548 sll %l2,8,%l2
549 or %l1,%l0,%l0
550 ldub [%i0+5],%l5
551 sll %l4,24,%l4
552 or %l3,%l2,%l2
553 ldub [%i0+6],%l6
554 sll %l5,16,%l5
555 or %l0,%l2,%o0
556 ldub [%i0+7],%l7
557
558 sll %l6,8,%l6
559 or %l5,%l4,%l4
560 ldub [%i0+8],%l0
561 or %l7,%l6,%l6
562 ldub [%i0+9],%l1
563 or %l4,%l6,%o1
564 ldub [%i0+10],%l2
565
566 sll %l0,24,%l0
567 ldub [%i0+11],%l3
568 sll %l1,16,%l1
569 ldub [%i0+12],%l4
570 sll %l2,8,%l2
571 or %l1,%l0,%l0
572 ldub [%i0+13],%l5
573 sll %l4,24,%l4
574 or %l3,%l2,%l2
575 ldub [%i0+14],%l6
576 sll %l5,16,%l5
577 or %l0,%l2,%o2
578 ldub [%i0+15],%l7
579
580 sll %l6,8,%l6
581 or %l5,%l4,%l4
582 or %l7,%l6,%l6
583 or %l4,%l6,%o3
584
5851: call .+8
586 add %o7,AES_Te-1b,%o4
587 call _sparcv9_AES_encrypt
588 mov %i2,%o5
589
590 srl %o0,24,%l0
591 srl %o0,16,%l1
592 stb %l0,[%i1+0]
593 srl %o0,8,%l2
594 stb %l1,[%i1+1]
595 stb %l2,[%i1+2]
596 srl %o1,24,%l4
597 stb %o0,[%i1+3]
598
599 srl %o1,16,%l5
600 stb %l4,[%i1+4]
601 srl %o1,8,%l6
602 stb %l5,[%i1+5]
603 stb %l6,[%i1+6]
604 srl %o2,24,%l0
605 stb %o1,[%i1+7]
606
607 srl %o2,16,%l1
608 stb %l0,[%i1+8]
609 srl %o2,8,%l2
610 stb %l1,[%i1+9]
611 stb %l2,[%i1+10]
612 srl %o3,24,%l4
613 stb %o2,[%i1+11]
614
615 srl %o3,16,%l5
616 stb %l4,[%i1+12]
617 srl %o3,8,%l6
618 stb %l5,[%i1+13]
619 stb %l6,[%i1+14]
620 stb %o3,[%i1+15]
621
622 ret
623 restore
624.type AES_encrypt,#function
625.size AES_encrypt,(.-AES_encrypt)
626
627___
628
629$code.=<<___;
630.align 256
631AES_Td:
632___
633&_data_word(
634 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
635 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
636 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
637 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
638 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
639 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
640 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
641 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
642 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
643 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
644 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
645 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
646 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
647 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
648 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
649 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
650 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
651 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
652 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
653 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
654 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
655 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
656 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
657 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
658 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
659 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
660 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
661 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
662 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
663 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
664 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
665 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
666 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
667 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
668 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
669 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
670 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
671 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
672 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
673 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
674 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
675 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
676 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
677 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
678 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
679 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
680 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
681 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
682 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
683 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
684 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
685 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
686 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
687 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
688 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
689 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
690 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
691 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
692 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
693 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
694 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
695 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
696 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
697 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
698$code.=<<___;
699 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
700 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
701 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
702 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
703 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
704 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
705 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
706 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
707 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
708 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
709 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
710 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
711 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
712 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
713 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
714 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
715 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
716 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
717 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
718 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
719 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
720 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
721 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
722 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
723 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
724 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
725 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
726 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
727 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
728 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
729 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
730 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
731.type AES_Td,#object
732.size AES_Td,(.-AES_Td)
733
734.align 64
735.skip 16
736_sparcv9_AES_decrypt:
737 save %sp,-$frame-$locals,%sp
738 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
739 ld [$key+240],$rounds
740 ld [$key+0],$t0
741 ld [$key+4],$t1 !
742 ld [$key+8],$t2
743 ld [$key+12],$t3
744 srl $rounds,1,$rounds
745 xor $t0,$s0,$s0
746 ld [$key+16],$t0
747 xor $t1,$s1,$s1
748 ld [$key+20],$t1
749 srl $s0,21,$acc0 !
750 xor $t2,$s2,$s2
751 ld [$key+24],$t2
752 xor $t3,$s3,$s3
753 and $acc0,2040,$acc0
754 ld [$key+28],$t3
755 srl $s3,13,$acc1
756 nop
757.Ldec_loop:
758 srl $s2,5,$acc2 !
759 and $acc1,2040,$acc1
760 ldx [$tbl+$acc0],$acc0
761 sll $s1,3,$acc3
762 and $acc2,2040,$acc2
763 ldx [$tbl+$acc1],$acc1
764 srl $s1,21,$acc4
765 and $acc3,2040,$acc3
766 ldx [$tbl+$acc2],$acc2 !
767 srl $s0,13,$acc5
768 and $acc4,2040,$acc4
769 ldx [$tbl+$acc3],$acc3
770 srl $s3,5,$acc6
771 and $acc5,2040,$acc5
772 ldx [$tbl+$acc4],$acc4
773 fmovs %f0,%f0
774 sll $s2,3,$acc7 !
775 and $acc6,2040,$acc6
776 ldx [$tbl+$acc5],$acc5
777 srl $s2,21,$acc8
778 and $acc7,2040,$acc7
779 ldx [$tbl+$acc6],$acc6
780 srl $s1,13,$acc9
781 and $acc8,2040,$acc8
782 ldx [$tbl+$acc7],$acc7 !
783 srl $s0,5,$acc10
784 and $acc9,2040,$acc9
785 ldx [$tbl+$acc8],$acc8
786 sll $s3,3,$acc11
787 and $acc10,2040,$acc10
788 ldx [$tbl+$acc9],$acc9
789 fmovs %f0,%f0
790 srl $s3,21,$acc12 !
791 and $acc11,2040,$acc11
792 ldx [$tbl+$acc10],$acc10
793 srl $s2,13,$acc13
794 and $acc12,2040,$acc12
795 ldx [$tbl+$acc11],$acc11
796 srl $s1,5,$acc14
797 and $acc13,2040,$acc13
798 ldx [$tbl+$acc12],$acc12 !
799 sll $s0,3,$acc15
800 and $acc14,2040,$acc14
801 ldx [$tbl+$acc13],$acc13
802 and $acc15,2040,$acc15
803 add $key,32,$key
804 ldx [$tbl+$acc14],$acc14
805 fmovs %f0,%f0
806 subcc $rounds,1,$rounds !
807 ldx [$tbl+$acc15],$acc15
808 bz,a,pn %icc,.Ldec_last
809 add $tbl,2048,$rounds
810
811 srlx $acc1,8,$acc1
812 xor $acc0,$t0,$t0
813 ld [$key+0],$s0
814 fmovs %f0,%f0
815 srlx $acc2,16,$acc2 !
816 xor $acc1,$t0,$t0
817 ld [$key+4],$s1
818 srlx $acc3,24,$acc3
819 xor $acc2,$t0,$t0
820 ld [$key+8],$s2
821 srlx $acc5,8,$acc5
822 xor $acc3,$t0,$t0
823 ld [$key+12],$s3 !
824 srlx $acc6,16,$acc6
825 xor $acc4,$t1,$t1
826 fmovs %f0,%f0
827 srlx $acc7,24,$acc7
828 xor $acc5,$t1,$t1
829 srlx $acc9,8,$acc9
830 xor $acc6,$t1,$t1
831 srlx $acc10,16,$acc10 !
832 xor $acc7,$t1,$t1
833 srlx $acc11,24,$acc11
834 xor $acc8,$t2,$t2
835 srlx $acc13,8,$acc13
836 xor $acc9,$t2,$t2
837 srlx $acc14,16,$acc14
838 xor $acc10,$t2,$t2
839 srlx $acc15,24,$acc15 !
840 xor $acc11,$t2,$t2
841 xor $acc12,$acc14,$acc14
842 xor $acc13,$t3,$t3
843 srl $t0,21,$acc0
844 xor $acc14,$t3,$t3
845 xor $acc15,$t3,$t3
846 srl $t3,13,$acc1
847
848 and $acc0,2040,$acc0 !
849 srl $t2,5,$acc2
850 and $acc1,2040,$acc1
851 ldx [$tbl+$acc0],$acc0
852 sll $t1,3,$acc3
853 and $acc2,2040,$acc2
854 ldx [$tbl+$acc1],$acc1
855 fmovs %f0,%f0
856 srl $t1,21,$acc4 !
857 and $acc3,2040,$acc3
858 ldx [$tbl+$acc2],$acc2
859 srl $t0,13,$acc5
860 and $acc4,2040,$acc4
861 ldx [$tbl+$acc3],$acc3
862 srl $t3,5,$acc6
863 and $acc5,2040,$acc5
864 ldx [$tbl+$acc4],$acc4 !
865 sll $t2,3,$acc7
866 and $acc6,2040,$acc6
867 ldx [$tbl+$acc5],$acc5
868 srl $t2,21,$acc8
869 and $acc7,2040,$acc7
870 ldx [$tbl+$acc6],$acc6
871 fmovs %f0,%f0
872 srl $t1,13,$acc9 !
873 and $acc8,2040,$acc8
874 ldx [$tbl+$acc7],$acc7
875 srl $t0,5,$acc10
876 and $acc9,2040,$acc9
877 ldx [$tbl+$acc8],$acc8
878 sll $t3,3,$acc11
879 and $acc10,2040,$acc10
880 ldx [$tbl+$acc9],$acc9 !
881 srl $t3,21,$acc12
882 and $acc11,2040,$acc11
883 ldx [$tbl+$acc10],$acc10
884 srl $t2,13,$acc13
885 and $acc12,2040,$acc12
886 ldx [$tbl+$acc11],$acc11
887 fmovs %f0,%f0
888 srl $t1,5,$acc14 !
889 and $acc13,2040,$acc13
890 ldx [$tbl+$acc12],$acc12
891 sll $t0,3,$acc15
892 and $acc14,2040,$acc14
893 ldx [$tbl+$acc13],$acc13
894 srlx $acc1,8,$acc1
895 and $acc15,2040,$acc15
896 ldx [$tbl+$acc14],$acc14 !
897
898 srlx $acc2,16,$acc2
899 xor $acc0,$s0,$s0
900 ldx [$tbl+$acc15],$acc15
901 srlx $acc3,24,$acc3
902 xor $acc1,$s0,$s0
903 ld [$key+16],$t0
904 fmovs %f0,%f0
905 srlx $acc5,8,$acc5 !
906 xor $acc2,$s0,$s0
907 ld [$key+20],$t1
908 srlx $acc6,16,$acc6
909 xor $acc3,$s0,$s0
910 ld [$key+24],$t2
911 srlx $acc7,24,$acc7
912 xor $acc4,$s1,$s1
913 ld [$key+28],$t3 !
914 srlx $acc9,8,$acc9
915 xor $acc5,$s1,$s1
916 ldx [$tbl+2048+0],%g0 ! prefetch td4
917 srlx $acc10,16,$acc10
918 xor $acc6,$s1,$s1
919 ldx [$tbl+2048+32],%g0 ! prefetch td4
920 srlx $acc11,24,$acc11
921 xor $acc7,$s1,$s1
922 ldx [$tbl+2048+64],%g0 ! prefetch td4
923 srlx $acc13,8,$acc13
924 xor $acc8,$s2,$s2
925 ldx [$tbl+2048+96],%g0 ! prefetch td4
926 srlx $acc14,16,$acc14 !
927 xor $acc9,$s2,$s2
928 ldx [$tbl+2048+128],%g0 ! prefetch td4
929 srlx $acc15,24,$acc15
930 xor $acc10,$s2,$s2
931 ldx [$tbl+2048+160],%g0 ! prefetch td4
932 srl $s0,21,$acc0
933 xor $acc11,$s2,$s2
934 ldx [$tbl+2048+192],%g0 ! prefetch td4
935 xor $acc12,$acc14,$acc14
936 xor $acc13,$s3,$s3
937 ldx [$tbl+2048+224],%g0 ! prefetch td4
938 and $acc0,2040,$acc0 !
939 xor $acc14,$s3,$s3
940 xor $acc15,$s3,$s3
941 ba .Ldec_loop
942 srl $s3,13,$acc1
943
944.align 32
945.Ldec_last:
946 srlx $acc1,8,$acc1 !
947 xor $acc0,$t0,$t0
948 ld [$key+0],$s0
949 srlx $acc2,16,$acc2
950 xor $acc1,$t0,$t0
951 ld [$key+4],$s1
952 srlx $acc3,24,$acc3
953 xor $acc2,$t0,$t0
954 ld [$key+8],$s2 !
955 srlx $acc5,8,$acc5
956 xor $acc3,$t0,$t0
957 ld [$key+12],$s3
958 srlx $acc6,16,$acc6
959 xor $acc4,$t1,$t1
960 srlx $acc7,24,$acc7
961 xor $acc5,$t1,$t1
962 srlx $acc9,8,$acc9 !
963 xor $acc6,$t1,$t1
964 srlx $acc10,16,$acc10
965 xor $acc7,$t1,$t1
966 srlx $acc11,24,$acc11
967 xor $acc8,$t2,$t2
968 srlx $acc13,8,$acc13
969 xor $acc9,$t2,$t2
970 srlx $acc14,16,$acc14 !
971 xor $acc10,$t2,$t2
972 srlx $acc15,24,$acc15
973 xor $acc11,$t2,$t2
974 xor $acc12,$acc14,$acc14
975 xor $acc13,$t3,$t3
976 srl $t0,24,$acc0
977 xor $acc14,$t3,$t3
978 xor $acc15,$t3,$t3 !
979 srl $t3,16,$acc1
980
981 srl $t2,8,$acc2
982 and $acc1,255,$acc1
983 ldub [$rounds+$acc0],$acc0
984 srl $t1,24,$acc4
985 and $acc2,255,$acc2
986 ldub [$rounds+$acc1],$acc1
987 srl $t0,16,$acc5 !
988 and $t1,255,$acc3
989 ldub [$rounds+$acc2],$acc2
990 ldub [$rounds+$acc3],$acc3
991 srl $t3,8,$acc6
992 and $acc5,255,$acc5
993 ldub [$rounds+$acc4],$acc4
994 fmovs %f0,%f0
995 srl $t2,24,$acc8 !
996 and $acc6,255,$acc6
997 ldub [$rounds+$acc5],$acc5
998 srl $t1,16,$acc9
999 and $t2,255,$acc7
1000 ldub [$rounds+$acc6],$acc6
1001 ldub [$rounds+$acc7],$acc7
1002 fmovs %f0,%f0
1003 srl $t0,8,$acc10 !
1004 and $acc9,255,$acc9
1005 ldub [$rounds+$acc8],$acc8
1006 srl $t3,24,$acc12
1007 and $acc10,255,$acc10
1008 ldub [$rounds+$acc9],$acc9
1009 srl $t2,16,$acc13
1010 and $t3,255,$acc11
1011 ldub [$rounds+$acc10],$acc10 !
1012 srl $t1,8,$acc14
1013 and $acc13,255,$acc13
1014 ldub [$rounds+$acc11],$acc11
1015 ldub [$rounds+$acc12],$acc12
1016 and $acc14,255,$acc14
1017 ldub [$rounds+$acc13],$acc13
1018 and $t0,255,$acc15
1019 ldub [$rounds+$acc14],$acc14 !
1020
1021 sll $acc0,24,$acc0
1022 xor $acc3,$s0,$s0
1023 ldub [$rounds+$acc15],$acc15
1024 sll $acc1,16,$acc1
1025 xor $acc0,$s0,$s0
1026 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1027 fmovs %f0,%f0
1028 sll $acc2,8,$acc2 !
1029 xor $acc1,$s0,$s0
1030 sll $acc4,24,$acc4
1031 xor $acc2,$s0,$s0
1032 sll $acc5,16,$acc5
1033 xor $acc7,$s1,$s1
1034 sll $acc6,8,$acc6
1035 xor $acc4,$s1,$s1
1036 sll $acc8,24,$acc8 !
1037 xor $acc5,$s1,$s1
1038 sll $acc9,16,$acc9
1039 xor $acc11,$s2,$s2
1040 sll $acc10,8,$acc10
1041 xor $acc6,$s1,$s1
1042 sll $acc12,24,$acc12
1043 xor $acc8,$s2,$s2
1044 sll $acc13,16,$acc13 !
1045 xor $acc9,$s2,$s2
1046 sll $acc14,8,$acc14
1047 xor $acc10,$s2,$s2
1048 xor $acc12,$acc14,$acc14
1049 xor $acc13,$s3,$s3
1050 xor $acc14,$s3,$s3
1051 xor $acc15,$s3,$s3
1052
1053 ret
1054 restore
1055.type _sparcv9_AES_decrypt,#function
1056.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1057
1058.align 32
1059.globl AES_decrypt
1060AES_decrypt:
1061 or %o0,%o1,%g1
1062 andcc %g1,3,%g0
1063 bnz,pn %xcc,.Lunaligned_dec
1064 save %sp,-$frame,%sp
1065
1066 ld [%i0+0],%o0
1067 ld [%i0+4],%o1
1068 ld [%i0+8],%o2
1069 ld [%i0+12],%o3
1070
10711: call .+8
1072 add %o7,AES_Td-1b,%o4
1073 call _sparcv9_AES_decrypt
1074 mov %i2,%o5
1075
1076 st %o0,[%i1+0]
1077 st %o1,[%i1+4]
1078 st %o2,[%i1+8]
1079 st %o3,[%i1+12]
1080
1081 ret
1082 restore
1083
1084.align 32
1085.Lunaligned_dec:
1086 ldub [%i0+0],%l0
1087 ldub [%i0+1],%l1
1088 ldub [%i0+2],%l2
1089
1090 sll %l0,24,%l0
1091 ldub [%i0+3],%l3
1092 sll %l1,16,%l1
1093 ldub [%i0+4],%l4
1094 sll %l2,8,%l2
1095 or %l1,%l0,%l0
1096 ldub [%i0+5],%l5
1097 sll %l4,24,%l4
1098 or %l3,%l2,%l2
1099 ldub [%i0+6],%l6
1100 sll %l5,16,%l5
1101 or %l0,%l2,%o0
1102 ldub [%i0+7],%l7
1103
1104 sll %l6,8,%l6
1105 or %l5,%l4,%l4
1106 ldub [%i0+8],%l0
1107 or %l7,%l6,%l6
1108 ldub [%i0+9],%l1
1109 or %l4,%l6,%o1
1110 ldub [%i0+10],%l2
1111
1112 sll %l0,24,%l0
1113 ldub [%i0+11],%l3
1114 sll %l1,16,%l1
1115 ldub [%i0+12],%l4
1116 sll %l2,8,%l2
1117 or %l1,%l0,%l0
1118 ldub [%i0+13],%l5
1119 sll %l4,24,%l4
1120 or %l3,%l2,%l2
1121 ldub [%i0+14],%l6
1122 sll %l5,16,%l5
1123 or %l0,%l2,%o2
1124 ldub [%i0+15],%l7
1125
1126 sll %l6,8,%l6
1127 or %l5,%l4,%l4
1128 or %l7,%l6,%l6
1129 or %l4,%l6,%o3
1130
11311: call .+8
1132 add %o7,AES_Td-1b,%o4
1133 call _sparcv9_AES_decrypt
1134 mov %i2,%o5
1135
1136 srl %o0,24,%l0
1137 srl %o0,16,%l1
1138 stb %l0,[%i1+0]
1139 srl %o0,8,%l2
1140 stb %l1,[%i1+1]
1141 stb %l2,[%i1+2]
1142 srl %o1,24,%l4
1143 stb %o0,[%i1+3]
1144
1145 srl %o1,16,%l5
1146 stb %l4,[%i1+4]
1147 srl %o1,8,%l6
1148 stb %l5,[%i1+5]
1149 stb %l6,[%i1+6]
1150 srl %o2,24,%l0
1151 stb %o1,[%i1+7]
1152
1153 srl %o2,16,%l1
1154 stb %l0,[%i1+8]
1155 srl %o2,8,%l2
1156 stb %l1,[%i1+9]
1157 stb %l2,[%i1+10]
1158 srl %o3,24,%l4
1159 stb %o2,[%i1+11]
1160
1161 srl %o3,16,%l5
1162 stb %l4,[%i1+12]
1163 srl %o3,8,%l6
1164 stb %l5,[%i1+13]
1165 stb %l6,[%i1+14]
1166 stb %o3,[%i1+15]
1167
1168 ret
1169 restore
1170.type AES_decrypt,#function
1171.size AES_decrypt,(.-AES_decrypt)
1172___
1173
1174# fmovs instructions substituting for FP nops were originally added
1175# to meet specific instruction alignment requirements to maximize ILP.
1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1177# undesired effect, so just omit them and sacrifice some portion of
1178# percent in performance...
1179$code =~ s/fmovs.*$//gem;
1180
1181print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
deleted file mode 100755
index f616f1751f..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl
+++ /dev/null
@@ -1,1579 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.2.
10#
11# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
12# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
13# [you'll notice a lot of resemblance], such as compressed S-boxes
14# in little-endian byte order, prefetch of these tables in CBC mode,
15# as well as avoiding L1 cache aliasing between stack frame and key
16# schedule and already mentioned tables, compressed Td4...
17#
18# Performance in number of cycles per processed byte for 128-bit key:
19#
20# ECB CBC encrypt
21# AMD64 13.7 13.0(*)
22# EM64T 20.2 18.6(*)
23#
24# (*) CBC benchmarks are better than ECB thanks to custom ABI used
25# by the private block encryption function.
26
27$verticalspin=1; # unlike 32-bit version $verticalspin performs
28 # ~15% better on both AMD and Intel cores
29$output=shift;
30open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
31
32$code=".text\n";
33
34$s0="%eax";
35$s1="%ebx";
36$s2="%ecx";
37$s3="%edx";
38$acc0="%esi";
39$acc1="%edi";
40$acc2="%ebp";
41$inp="%r8";
42$out="%r9";
43$t0="%r10d";
44$t1="%r11d";
45$t2="%r12d";
46$rnds="%r13d";
47$sbox="%r14";
48$key="%r15";
49
50sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
51sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
52 $r =~ s/%[er]([sd]i)/%\1l/;
53 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
54sub _data_word()
55{ my $i;
56 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
57}
58sub data_word()
59{ my $i;
60 my $last=pop(@_);
61 $code.=".long\t";
62 while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
63 $code.=sprintf"0x%08x\n",$last;
64}
65
66sub data_byte()
67{ my $i;
68 my $last=pop(@_);
69 $code.=".byte\t";
70 while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
71 $code.=sprintf"0x%02x\n",$last&0xff;
72}
73
74sub encvert()
75{ my $t3="%r8d"; # zaps $inp!
76
77$code.=<<___;
78 # favor 3-way issue Opteron pipeline...
79 movzb `&lo("$s0")`,$acc0
80 movzb `&lo("$s1")`,$acc1
81 movzb `&lo("$s2")`,$acc2
82 mov 0($sbox,$acc0,8),$t0
83 mov 0($sbox,$acc1,8),$t1
84 mov 0($sbox,$acc2,8),$t2
85
86 movzb `&hi("$s1")`,$acc0
87 movzb `&hi("$s2")`,$acc1
88 movzb `&lo("$s3")`,$acc2
89 xor 3($sbox,$acc0,8),$t0
90 xor 3($sbox,$acc1,8),$t1
91 mov 0($sbox,$acc2,8),$t3
92
93 movzb `&hi("$s3")`,$acc0
94 shr \$16,$s2
95 movzb `&hi("$s0")`,$acc2
96 xor 3($sbox,$acc0,8),$t2
97 shr \$16,$s3
98 xor 3($sbox,$acc2,8),$t3
99
100 shr \$16,$s1
101 lea 16($key),$key
102 shr \$16,$s0
103
104 movzb `&lo("$s2")`,$acc0
105 movzb `&lo("$s3")`,$acc1
106 movzb `&lo("$s0")`,$acc2
107 xor 2($sbox,$acc0,8),$t0
108 xor 2($sbox,$acc1,8),$t1
109 xor 2($sbox,$acc2,8),$t2
110
111 movzb `&hi("$s3")`,$acc0
112 movzb `&hi("$s0")`,$acc1
113 movzb `&lo("$s1")`,$acc2
114 xor 1($sbox,$acc0,8),$t0
115 xor 1($sbox,$acc1,8),$t1
116 xor 2($sbox,$acc2,8),$t3
117
118 mov 12($key),$s3
119 movzb `&hi("$s1")`,$acc1
120 movzb `&hi("$s2")`,$acc2
121 mov 0($key),$s0
122 xor 1($sbox,$acc1,8),$t2
123 xor 1($sbox,$acc2,8),$t3
124
125 mov 4($key),$s1
126 mov 8($key),$s2
127 xor $t0,$s0
128 xor $t1,$s1
129 xor $t2,$s2
130 xor $t3,$s3
131___
132}
133
134sub enclastvert()
135{ my $t3="%r8d"; # zaps $inp!
136
137$code.=<<___;
138 movzb `&lo("$s0")`,$acc0
139 movzb `&lo("$s1")`,$acc1
140 movzb `&lo("$s2")`,$acc2
141 mov 2($sbox,$acc0,8),$t0
142 mov 2($sbox,$acc1,8),$t1
143 mov 2($sbox,$acc2,8),$t2
144
145 and \$0x000000ff,$t0
146 and \$0x000000ff,$t1
147 and \$0x000000ff,$t2
148
149 movzb `&lo("$s3")`,$acc0
150 movzb `&hi("$s1")`,$acc1
151 movzb `&hi("$s2")`,$acc2
152 mov 2($sbox,$acc0,8),$t3
153 mov 0($sbox,$acc1,8),$acc1 #$t0
154 mov 0($sbox,$acc2,8),$acc2 #$t1
155
156 and \$0x000000ff,$t3
157 and \$0x0000ff00,$acc1
158 and \$0x0000ff00,$acc2
159
160 xor $acc1,$t0
161 xor $acc2,$t1
162 shr \$16,$s2
163
164 movzb `&hi("$s3")`,$acc0
165 movzb `&hi("$s0")`,$acc1
166 shr \$16,$s3
167 mov 0($sbox,$acc0,8),$acc0 #$t2
168 mov 0($sbox,$acc1,8),$acc1 #$t3
169
170 and \$0x0000ff00,$acc0
171 and \$0x0000ff00,$acc1
172 shr \$16,$s1
173 xor $acc0,$t2
174 xor $acc1,$t3
175 shr \$16,$s0
176
177 movzb `&lo("$s2")`,$acc0
178 movzb `&lo("$s3")`,$acc1
179 movzb `&lo("$s0")`,$acc2
180 mov 0($sbox,$acc0,8),$acc0 #$t0
181 mov 0($sbox,$acc1,8),$acc1 #$t1
182 mov 0($sbox,$acc2,8),$acc2 #$t2
183
184 and \$0x00ff0000,$acc0
185 and \$0x00ff0000,$acc1
186 and \$0x00ff0000,$acc2
187
188 xor $acc0,$t0
189 xor $acc1,$t1
190 xor $acc2,$t2
191
192 movzb `&lo("$s1")`,$acc0
193 movzb `&hi("$s3")`,$acc1
194 movzb `&hi("$s0")`,$acc2
195 mov 0($sbox,$acc0,8),$acc0 #$t3
196 mov 2($sbox,$acc1,8),$acc1 #$t0
197 mov 2($sbox,$acc2,8),$acc2 #$t1
198
199 and \$0x00ff0000,$acc0
200 and \$0xff000000,$acc1
201 and \$0xff000000,$acc2
202
203 xor $acc0,$t3
204 xor $acc1,$t0
205 xor $acc2,$t1
206
207 movzb `&hi("$s1")`,$acc0
208 movzb `&hi("$s2")`,$acc1
209 mov 16+12($key),$s3
210 mov 2($sbox,$acc0,8),$acc0 #$t2
211 mov 2($sbox,$acc1,8),$acc1 #$t3
212 mov 16+0($key),$s0
213
214 and \$0xff000000,$acc0
215 and \$0xff000000,$acc1
216
217 xor $acc0,$t2
218 xor $acc1,$t3
219
220 mov 16+4($key),$s1
221 mov 16+8($key),$s2
222 xor $t0,$s0
223 xor $t1,$s1
224 xor $t2,$s2
225 xor $t3,$s3
226___
227}
228
229sub encstep()
230{ my ($i,@s) = @_;
231 my $tmp0=$acc0;
232 my $tmp1=$acc1;
233 my $tmp2=$acc2;
234 my $out=($t0,$t1,$t2,$s[0])[$i];
235
236 if ($i==3) {
237 $tmp0=$s[1];
238 $tmp1=$s[2];
239 $tmp2=$s[3];
240 }
241 $code.=" movzb ".&lo($s[0]).",$out\n";
242 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
243 $code.=" lea 16($key),$key\n" if ($i==0);
244
245 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
246 $code.=" mov 0($sbox,$out,8),$out\n";
247
248 $code.=" shr \$16,$tmp1\n";
249 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
250 $code.=" xor 3($sbox,$tmp0,8),$out\n";
251
252 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
253 $code.=" shr \$24,$tmp2\n";
254 $code.=" xor 4*$i($key),$out\n";
255
256 $code.=" xor 2($sbox,$tmp1,8),$out\n";
257 $code.=" xor 1($sbox,$tmp2,8),$out\n";
258
259 $code.=" mov $t0,$s[1]\n" if ($i==3);
260 $code.=" mov $t1,$s[2]\n" if ($i==3);
261 $code.=" mov $t2,$s[3]\n" if ($i==3);
262 $code.="\n";
263}
264
265sub enclast()
266{ my ($i,@s)=@_;
267 my $tmp0=$acc0;
268 my $tmp1=$acc1;
269 my $tmp2=$acc2;
270 my $out=($t0,$t1,$t2,$s[0])[$i];
271
272 if ($i==3) {
273 $tmp0=$s[1];
274 $tmp1=$s[2];
275 $tmp2=$s[3];
276 }
277 $code.=" movzb ".&lo($s[0]).",$out\n";
278 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
279
280 $code.=" mov 2($sbox,$out,8),$out\n";
281 $code.=" shr \$16,$tmp1\n";
282 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
283
284 $code.=" and \$0x000000ff,$out\n";
285 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
286 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
287 $code.=" shr \$24,$tmp2\n";
288
289 $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
290 $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
291 $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
292
293 $code.=" and \$0x0000ff00,$tmp0\n";
294 $code.=" and \$0x00ff0000,$tmp1\n";
295 $code.=" and \$0xff000000,$tmp2\n";
296
297 $code.=" xor $tmp0,$out\n";
298 $code.=" mov $t0,$s[1]\n" if ($i==3);
299 $code.=" xor $tmp1,$out\n";
300 $code.=" mov $t1,$s[2]\n" if ($i==3);
301 $code.=" xor $tmp2,$out\n";
302 $code.=" mov $t2,$s[3]\n" if ($i==3);
303 $code.="\n";
304}
305
306$code.=<<___;
307.type _x86_64_AES_encrypt,\@abi-omnipotent
308.align 16
309_x86_64_AES_encrypt:
310 xor 0($key),$s0 # xor with key
311 xor 4($key),$s1
312 xor 8($key),$s2
313 xor 12($key),$s3
314
315 mov 240($key),$rnds # load key->rounds
316 sub \$1,$rnds
317 jmp .Lenc_loop
318.align 16
319.Lenc_loop:
320___
321 if ($verticalspin) { &encvert(); }
322 else { &encstep(0,$s0,$s1,$s2,$s3);
323 &encstep(1,$s1,$s2,$s3,$s0);
324 &encstep(2,$s2,$s3,$s0,$s1);
325 &encstep(3,$s3,$s0,$s1,$s2);
326 }
327$code.=<<___;
328 sub \$1,$rnds
329 jnz .Lenc_loop
330___
331 if ($verticalspin) { &enclastvert(); }
332 else { &enclast(0,$s0,$s1,$s2,$s3);
333 &enclast(1,$s1,$s2,$s3,$s0);
334 &enclast(2,$s2,$s3,$s0,$s1);
335 &enclast(3,$s3,$s0,$s1,$s2);
336 $code.=<<___;
337 xor 16+0($key),$s0 # xor with key
338 xor 16+4($key),$s1
339 xor 16+8($key),$s2
340 xor 16+12($key),$s3
341___
342 }
343$code.=<<___;
344 .byte 0xf3,0xc3 # rep ret
345.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
346___
347
348# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
349$code.=<<___;
350.globl AES_encrypt
351.type AES_encrypt,\@function,3
352.align 16
353AES_encrypt:
354 push %rbx
355 push %rbp
356 push %r12
357 push %r13
358 push %r14
359 push %r15
360
361 mov %rdx,$key
362 mov %rdi,$inp
363 mov %rsi,$out
364
365 .picmeup $sbox
366 lea AES_Te-.($sbox),$sbox
367
368 mov 0($inp),$s0
369 mov 4($inp),$s1
370 mov 8($inp),$s2
371 mov 12($inp),$s3
372
373 call _x86_64_AES_encrypt
374
375 mov $s0,0($out)
376 mov $s1,4($out)
377 mov $s2,8($out)
378 mov $s3,12($out)
379
380 pop %r15
381 pop %r14
382 pop %r13
383 pop %r12
384 pop %rbp
385 pop %rbx
386 ret
387.size AES_encrypt,.-AES_encrypt
388___
389
390#------------------------------------------------------------------#
391
392sub decvert()
393{ my $t3="%r8d"; # zaps $inp!
394
395$code.=<<___;
396 # favor 3-way issue Opteron pipeline...
397 movzb `&lo("$s0")`,$acc0
398 movzb `&lo("$s1")`,$acc1
399 movzb `&lo("$s2")`,$acc2
400 mov 0($sbox,$acc0,8),$t0
401 mov 0($sbox,$acc1,8),$t1
402 mov 0($sbox,$acc2,8),$t2
403
404 movzb `&hi("$s3")`,$acc0
405 movzb `&hi("$s0")`,$acc1
406 movzb `&lo("$s3")`,$acc2
407 xor 3($sbox,$acc0,8),$t0
408 xor 3($sbox,$acc1,8),$t1
409 mov 0($sbox,$acc2,8),$t3
410
411 movzb `&hi("$s1")`,$acc0
412 shr \$16,$s0
413 movzb `&hi("$s2")`,$acc2
414 xor 3($sbox,$acc0,8),$t2
415 shr \$16,$s3
416 xor 3($sbox,$acc2,8),$t3
417
418 shr \$16,$s1
419 lea 16($key),$key
420 shr \$16,$s2
421
422 movzb `&lo("$s2")`,$acc0
423 movzb `&lo("$s3")`,$acc1
424 movzb `&lo("$s0")`,$acc2
425 xor 2($sbox,$acc0,8),$t0
426 xor 2($sbox,$acc1,8),$t1
427 xor 2($sbox,$acc2,8),$t2
428
429 movzb `&hi("$s1")`,$acc0
430 movzb `&hi("$s2")`,$acc1
431 movzb `&lo("$s1")`,$acc2
432 xor 1($sbox,$acc0,8),$t0
433 xor 1($sbox,$acc1,8),$t1
434 xor 2($sbox,$acc2,8),$t3
435
436 movzb `&hi("$s3")`,$acc0
437 mov 12($key),$s3
438 movzb `&hi("$s0")`,$acc2
439 xor 1($sbox,$acc0,8),$t2
440 mov 0($key),$s0
441 xor 1($sbox,$acc2,8),$t3
442
443 xor $t0,$s0
444 mov 4($key),$s1
445 mov 8($key),$s2
446 xor $t2,$s2
447 xor $t1,$s1
448 xor $t3,$s3
449___
450}
451
452sub declastvert()
453{ my $t3="%r8d"; # zaps $inp!
454
455$code.=<<___;
456 movzb `&lo("$s0")`,$acc0
457 movzb `&lo("$s1")`,$acc1
458 movzb `&lo("$s2")`,$acc2
459 movzb 2048($sbox,$acc0,1),$t0
460 movzb 2048($sbox,$acc1,1),$t1
461 movzb 2048($sbox,$acc2,1),$t2
462
463 movzb `&lo("$s3")`,$acc0
464 movzb `&hi("$s3")`,$acc1
465 movzb `&hi("$s0")`,$acc2
466 movzb 2048($sbox,$acc0,1),$t3
467 movzb 2048($sbox,$acc1,1),$acc1 #$t0
468 movzb 2048($sbox,$acc2,1),$acc2 #$t1
469
470 shl \$8,$acc1
471 shl \$8,$acc2
472
473 xor $acc1,$t0
474 xor $acc2,$t1
475 shr \$16,$s3
476
477 movzb `&hi("$s1")`,$acc0
478 movzb `&hi("$s2")`,$acc1
479 shr \$16,$s0
480 movzb 2048($sbox,$acc0,1),$acc0 #$t2
481 movzb 2048($sbox,$acc1,1),$acc1 #$t3
482
483 shl \$8,$acc0
484 shl \$8,$acc1
485 shr \$16,$s1
486 xor $acc0,$t2
487 xor $acc1,$t3
488 shr \$16,$s2
489
490 movzb `&lo("$s2")`,$acc0
491 movzb `&lo("$s3")`,$acc1
492 movzb `&lo("$s0")`,$acc2
493 movzb 2048($sbox,$acc0,1),$acc0 #$t0
494 movzb 2048($sbox,$acc1,1),$acc1 #$t1
495 movzb 2048($sbox,$acc2,1),$acc2 #$t2
496
497 shl \$16,$acc0
498 shl \$16,$acc1
499 shl \$16,$acc2
500
501 xor $acc0,$t0
502 xor $acc1,$t1
503 xor $acc2,$t2
504
505 movzb `&lo("$s1")`,$acc0
506 movzb `&hi("$s1")`,$acc1
507 movzb `&hi("$s2")`,$acc2
508 movzb 2048($sbox,$acc0,1),$acc0 #$t3
509 movzb 2048($sbox,$acc1,1),$acc1 #$t0
510 movzb 2048($sbox,$acc2,1),$acc2 #$t1
511
512 shl \$16,$acc0
513 shl \$24,$acc1
514 shl \$24,$acc2
515
516 xor $acc0,$t3
517 xor $acc1,$t0
518 xor $acc2,$t1
519
520 movzb `&hi("$s3")`,$acc0
521 movzb `&hi("$s0")`,$acc1
522 mov 16+12($key),$s3
523 movzb 2048($sbox,$acc0,1),$acc0 #$t2
524 movzb 2048($sbox,$acc1,1),$acc1 #$t3
525 mov 16+0($key),$s0
526
527 shl \$24,$acc0
528 shl \$24,$acc1
529
530 xor $acc0,$t2
531 xor $acc1,$t3
532
533 mov 16+4($key),$s1
534 mov 16+8($key),$s2
535 xor $t0,$s0
536 xor $t1,$s1
537 xor $t2,$s2
538 xor $t3,$s3
539___
540}
541
542sub decstep()
543{ my ($i,@s) = @_;
544 my $tmp0=$acc0;
545 my $tmp1=$acc1;
546 my $tmp2=$acc2;
547 my $out=($t0,$t1,$t2,$s[0])[$i];
548
549 $code.=" mov $s[0],$out\n" if ($i!=3);
550 $tmp1=$s[2] if ($i==3);
551 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
552 $code.=" and \$0xFF,$out\n";
553
554 $code.=" mov 0($sbox,$out,8),$out\n";
555 $code.=" shr \$16,$tmp1\n";
556 $tmp2=$s[3] if ($i==3);
557 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
558
559 $tmp0=$s[1] if ($i==3);
560 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
561 $code.=" and \$0xFF,$tmp1\n";
562 $code.=" shr \$24,$tmp2\n";
563
564 $code.=" xor 3($sbox,$tmp0,8),$out\n";
565 $code.=" xor 2($sbox,$tmp1,8),$out\n";
566 $code.=" xor 1($sbox,$tmp2,8),$out\n";
567
568 $code.=" mov $t2,$s[1]\n" if ($i==3);
569 $code.=" mov $t1,$s[2]\n" if ($i==3);
570 $code.=" mov $t0,$s[3]\n" if ($i==3);
571 $code.="\n";
572}
573
574sub declast()
575{ my ($i,@s)=@_;
576 my $tmp0=$acc0;
577 my $tmp1=$acc1;
578 my $tmp2=$acc2;
579 my $out=($t0,$t1,$t2,$s[0])[$i];
580
581 $code.=" mov $s[0],$out\n" if ($i!=3);
582 $tmp1=$s[2] if ($i==3);
583 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
584 $code.=" and \$0xFF,$out\n";
585
586 $code.=" movzb 2048($sbox,$out,1),$out\n";
587 $code.=" shr \$16,$tmp1\n";
588 $tmp2=$s[3] if ($i==3);
589 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
590
591 $tmp0=$s[1] if ($i==3);
592 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
593 $code.=" and \$0xFF,$tmp1\n";
594 $code.=" shr \$24,$tmp2\n";
595
596 $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
597 $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
598 $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
599
600 $code.=" shl \$8,$tmp0\n";
601 $code.=" shl \$16,$tmp1\n";
602 $code.=" shl \$24,$tmp2\n";
603
604 $code.=" xor $tmp0,$out\n";
605 $code.=" mov $t2,$s[1]\n" if ($i==3);
606 $code.=" xor $tmp1,$out\n";
607 $code.=" mov $t1,$s[2]\n" if ($i==3);
608 $code.=" xor $tmp2,$out\n";
609 $code.=" mov $t0,$s[3]\n" if ($i==3);
610 $code.="\n";
611}
612
613$code.=<<___;
614.type _x86_64_AES_decrypt,\@abi-omnipotent
615.align 16
616_x86_64_AES_decrypt:
617 xor 0($key),$s0 # xor with key
618 xor 4($key),$s1
619 xor 8($key),$s2
620 xor 12($key),$s3
621
622 mov 240($key),$rnds # load key->rounds
623 sub \$1,$rnds
624 jmp .Ldec_loop
625.align 16
626.Ldec_loop:
627___
628 if ($verticalspin) { &decvert(); }
629 else { &decstep(0,$s0,$s3,$s2,$s1);
630 &decstep(1,$s1,$s0,$s3,$s2);
631 &decstep(2,$s2,$s1,$s0,$s3);
632 &decstep(3,$s3,$s2,$s1,$s0);
633 $code.=<<___;
634 lea 16($key),$key
635 xor 0($key),$s0 # xor with key
636 xor 4($key),$s1
637 xor 8($key),$s2
638 xor 12($key),$s3
639___
640 }
641$code.=<<___;
642 sub \$1,$rnds
643 jnz .Ldec_loop
644___
645 if ($verticalspin) { &declastvert(); }
646 else { &declast(0,$s0,$s3,$s2,$s1);
647 &declast(1,$s1,$s0,$s3,$s2);
648 &declast(2,$s2,$s1,$s0,$s3);
649 &declast(3,$s3,$s2,$s1,$s0);
650 $code.=<<___;
651 xor 16+0($key),$s0 # xor with key
652 xor 16+4($key),$s1
653 xor 16+8($key),$s2
654 xor 16+12($key),$s3
655___
656 }
657$code.=<<___;
658 .byte 0xf3,0xc3 # rep ret
659.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
660___
661
662# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
663$code.=<<___;
664.globl AES_decrypt
665.type AES_decrypt,\@function,3
666.align 16
667AES_decrypt:
668 push %rbx
669 push %rbp
670 push %r12
671 push %r13
672 push %r14
673 push %r15
674
675 mov %rdx,$key
676 mov %rdi,$inp
677 mov %rsi,$out
678
679 .picmeup $sbox
680 lea AES_Td-.($sbox),$sbox
681
682 # prefetch Td4
683 lea 2048+128($sbox),$sbox;
684 mov 0-128($sbox),$s0
685 mov 32-128($sbox),$s1
686 mov 64-128($sbox),$s2
687 mov 96-128($sbox),$s3
688 mov 128-128($sbox),$s0
689 mov 160-128($sbox),$s1
690 mov 192-128($sbox),$s2
691 mov 224-128($sbox),$s3
692 lea -2048-128($sbox),$sbox;
693
694 mov 0($inp),$s0
695 mov 4($inp),$s1
696 mov 8($inp),$s2
697 mov 12($inp),$s3
698
699 call _x86_64_AES_decrypt
700
701 mov $s0,0($out)
702 mov $s1,4($out)
703 mov $s2,8($out)
704 mov $s3,12($out)
705
706 pop %r15
707 pop %r14
708 pop %r13
709 pop %r12
710 pop %rbp
711 pop %rbx
712 ret
713.size AES_decrypt,.-AES_decrypt
714___
715#------------------------------------------------------------------#
716
717sub enckey()
718{
719$code.=<<___;
720 movz %dl,%esi # rk[i]>>0
721 mov 2(%rbp,%rsi,8),%ebx
722 movz %dh,%esi # rk[i]>>8
723 and \$0xFF000000,%ebx
724 xor %ebx,%eax
725
726 mov 2(%rbp,%rsi,8),%ebx
727 shr \$16,%edx
728 and \$0x000000FF,%ebx
729 movz %dl,%esi # rk[i]>>16
730 xor %ebx,%eax
731
732 mov 0(%rbp,%rsi,8),%ebx
733 movz %dh,%esi # rk[i]>>24
734 and \$0x0000FF00,%ebx
735 xor %ebx,%eax
736
737 mov 0(%rbp,%rsi,8),%ebx
738 and \$0x00FF0000,%ebx
739 xor %ebx,%eax
740
741 xor 2048(%rbp,%rcx,4),%eax # rcon
742___
743}
744
745# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
746# AES_KEY *key)
747$code.=<<___;
748.globl AES_set_encrypt_key
749.type AES_set_encrypt_key,\@function,3
750.align 16
751AES_set_encrypt_key:
752 push %rbx
753 push %rbp
754
755 mov %esi,%ecx # %ecx=bits
756 mov %rdi,%rsi # %rsi=userKey
757 mov %rdx,%rdi # %rdi=key
758
759 test \$-1,%rsi
760 jz .Lbadpointer
761 test \$-1,%rdi
762 jz .Lbadpointer
763
764 .picmeup %rbp
765 lea AES_Te-.(%rbp),%rbp
766
767 cmp \$128,%ecx
768 je .L10rounds
769 cmp \$192,%ecx
770 je .L12rounds
771 cmp \$256,%ecx
772 je .L14rounds
773 mov \$-2,%rax # invalid number of bits
774 jmp .Lexit
775
776.L10rounds:
777 mov 0(%rsi),%eax # copy first 4 dwords
778 mov 4(%rsi),%ebx
779 mov 8(%rsi),%ecx
780 mov 12(%rsi),%edx
781 mov %eax,0(%rdi)
782 mov %ebx,4(%rdi)
783 mov %ecx,8(%rdi)
784 mov %edx,12(%rdi)
785
786 xor %ecx,%ecx
787 jmp .L10shortcut
788.align 4
789.L10loop:
790 mov 0(%rdi),%eax # rk[0]
791 mov 12(%rdi),%edx # rk[3]
792.L10shortcut:
793___
794 &enckey ();
795$code.=<<___;
796 mov %eax,16(%rdi) # rk[4]
797 xor 4(%rdi),%eax
798 mov %eax,20(%rdi) # rk[5]
799 xor 8(%rdi),%eax
800 mov %eax,24(%rdi) # rk[6]
801 xor 12(%rdi),%eax
802 mov %eax,28(%rdi) # rk[7]
803 add \$1,%ecx
804 lea 16(%rdi),%rdi
805 cmp \$10,%ecx
806 jl .L10loop
807
808 movl \$10,80(%rdi) # setup number of rounds
809 xor %rax,%rax
810 jmp .Lexit
811
812.L12rounds:
813 mov 0(%rsi),%eax # copy first 6 dwords
814 mov 4(%rsi),%ebx
815 mov 8(%rsi),%ecx
816 mov 12(%rsi),%edx
817 mov %eax,0(%rdi)
818 mov %ebx,4(%rdi)
819 mov %ecx,8(%rdi)
820 mov %edx,12(%rdi)
821 mov 16(%rsi),%ecx
822 mov 20(%rsi),%edx
823 mov %ecx,16(%rdi)
824 mov %edx,20(%rdi)
825
826 xor %ecx,%ecx
827 jmp .L12shortcut
828.align 4
829.L12loop:
830 mov 0(%rdi),%eax # rk[0]
831 mov 20(%rdi),%edx # rk[5]
832.L12shortcut:
833___
834 &enckey ();
835$code.=<<___;
836 mov %eax,24(%rdi) # rk[6]
837 xor 4(%rdi),%eax
838 mov %eax,28(%rdi) # rk[7]
839 xor 8(%rdi),%eax
840 mov %eax,32(%rdi) # rk[8]
841 xor 12(%rdi),%eax
842 mov %eax,36(%rdi) # rk[9]
843
844 cmp \$7,%ecx
845 je .L12break
846 add \$1,%ecx
847
848 xor 16(%rdi),%eax
849 mov %eax,40(%rdi) # rk[10]
850 xor 20(%rdi),%eax
851 mov %eax,44(%rdi) # rk[11]
852
853 lea 24(%rdi),%rdi
854 jmp .L12loop
855.L12break:
856 movl \$12,72(%rdi) # setup number of rounds
857 xor %rax,%rax
858 jmp .Lexit
859
860.L14rounds:
861 mov 0(%rsi),%eax # copy first 8 dwords
862 mov 4(%rsi),%ebx
863 mov 8(%rsi),%ecx
864 mov 12(%rsi),%edx
865 mov %eax,0(%rdi)
866 mov %ebx,4(%rdi)
867 mov %ecx,8(%rdi)
868 mov %edx,12(%rdi)
869 mov 16(%rsi),%eax
870 mov 20(%rsi),%ebx
871 mov 24(%rsi),%ecx
872 mov 28(%rsi),%edx
873 mov %eax,16(%rdi)
874 mov %ebx,20(%rdi)
875 mov %ecx,24(%rdi)
876 mov %edx,28(%rdi)
877
878 xor %ecx,%ecx
879 jmp .L14shortcut
880.align 4
881.L14loop:
882 mov 28(%rdi),%edx # rk[4]
883.L14shortcut:
884 mov 0(%rdi),%eax # rk[0]
885___
886 &enckey ();
887$code.=<<___;
888 mov %eax,32(%rdi) # rk[8]
889 xor 4(%rdi),%eax
890 mov %eax,36(%rdi) # rk[9]
891 xor 8(%rdi),%eax
892 mov %eax,40(%rdi) # rk[10]
893 xor 12(%rdi),%eax
894 mov %eax,44(%rdi) # rk[11]
895
896 cmp \$6,%ecx
897 je .L14break
898 add \$1,%ecx
899
900 mov %eax,%edx
901 mov 16(%rdi),%eax # rk[4]
902 movz %dl,%esi # rk[11]>>0
903 mov 2(%rbp,%rsi,8),%ebx
904 movz %dh,%esi # rk[11]>>8
905 and \$0x000000FF,%ebx
906 xor %ebx,%eax
907
908 mov 0(%rbp,%rsi,8),%ebx
909 shr \$16,%edx
910 and \$0x0000FF00,%ebx
911 movz %dl,%esi # rk[11]>>16
912 xor %ebx,%eax
913
914 mov 0(%rbp,%rsi,8),%ebx
915 movz %dh,%esi # rk[11]>>24
916 and \$0x00FF0000,%ebx
917 xor %ebx,%eax
918
919 mov 2(%rbp,%rsi,8),%ebx
920 and \$0xFF000000,%ebx
921 xor %ebx,%eax
922
923 mov %eax,48(%rdi) # rk[12]
924 xor 20(%rdi),%eax
925 mov %eax,52(%rdi) # rk[13]
926 xor 24(%rdi),%eax
927 mov %eax,56(%rdi) # rk[14]
928 xor 28(%rdi),%eax
929 mov %eax,60(%rdi) # rk[15]
930
931 lea 32(%rdi),%rdi
932 jmp .L14loop
933.L14break:
934 movl \$14,48(%rdi) # setup number of rounds
935 xor %rax,%rax
936 jmp .Lexit
937
938.Lbadpointer:
939 mov \$-1,%rax
940.Lexit:
941 pop %rbp
942 pop %rbx
943 ret
944.size AES_set_encrypt_key,.-AES_set_encrypt_key
945___
946
947sub deckey()
948{ my ($i,$ptr,$te,$td) = @_;
949$code.=<<___;
950 mov $i($ptr),%eax
951 mov %eax,%edx
952 movz %ah,%ebx
953 shr \$16,%edx
954 and \$0xFF,%eax
955 movzb 2($te,%rax,8),%rax
956 movzb 2($te,%rbx,8),%rbx
957 mov 0($td,%rax,8),%eax
958 xor 3($td,%rbx,8),%eax
959 movzb %dh,%ebx
960 and \$0xFF,%edx
961 movzb 2($te,%rdx,8),%rdx
962 movzb 2($te,%rbx,8),%rbx
963 xor 2($td,%rdx,8),%eax
964 xor 1($td,%rbx,8),%eax
965 mov %eax,$i($ptr)
966___
967}
968
969# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
970# AES_KEY *key)
971$code.=<<___;
972.globl AES_set_decrypt_key
973.type AES_set_decrypt_key,\@function,3
974.align 16
975AES_set_decrypt_key:
976 push %rdx
977 call AES_set_encrypt_key
978 cmp \$0,%eax
979 je .Lproceed
980 lea 24(%rsp),%rsp
981 ret
982.Lproceed:
983 mov (%rsp),%r8 # restore key schedule
984 mov %rbx,(%rsp)
985
986 mov 240(%r8),%ecx # pull number of rounds
987 xor %rdi,%rdi
988 lea (%rdi,%rcx,4),%rcx
989 mov %r8,%rsi
990 lea (%r8,%rcx,4),%rdi # pointer to last chunk
991.align 4
992.Linvert:
993 mov 0(%rsi),%rax
994 mov 8(%rsi),%rbx
995 mov 0(%rdi),%rcx
996 mov 8(%rdi),%rdx
997 mov %rax,0(%rdi)
998 mov %rbx,8(%rdi)
999 mov %rcx,0(%rsi)
1000 mov %rdx,8(%rsi)
1001 lea 16(%rsi),%rsi
1002 lea -16(%rdi),%rdi
1003 cmp %rsi,%rdi
1004 jne .Linvert
1005
1006 .picmeup %r9
1007 lea AES_Td-.(%r9),%rdi
1008 lea AES_Te-AES_Td(%rdi),%r9
1009
1010 mov %r8,%rsi
1011 mov 240(%r8),%ecx # pull number of rounds
1012 sub \$1,%ecx
1013.align 4
1014.Lpermute:
1015 lea 16(%rsi),%rsi
1016___
1017 &deckey (0,"%rsi","%r9","%rdi");
1018 &deckey (4,"%rsi","%r9","%rdi");
1019 &deckey (8,"%rsi","%r9","%rdi");
1020 &deckey (12,"%rsi","%r9","%rdi");
1021$code.=<<___;
1022 sub \$1,%ecx
1023 jnz .Lpermute
1024
1025 xor %rax,%rax
1026 pop %rbx
1027 ret
1028.size AES_set_decrypt_key,.-AES_set_decrypt_key
1029___
1030
1031# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
1032# size_t length, const AES_KEY *key,
1033# unsigned char *ivp,const int enc);
1034{
1035# stack frame layout
1036# -8(%rsp) return address
1037my $_rsp="0(%rsp)"; # saved %rsp
1038my $_len="8(%rsp)"; # copy of 3rd parameter, length
1039my $_key="16(%rsp)"; # copy of 4th parameter, key
1040my $_ivp="24(%rsp)"; # copy of 5th parameter, ivp
1041my $keyp="32(%rsp)"; # one to pass as $key
1042my $ivec="40(%rsp)"; # ivec[16]
1043my $aes_key="56(%rsp)"; # copy of aes_key
1044my $mark="56+240(%rsp)"; # copy of aes_key->rounds
1045
1046$code.=<<___;
1047.globl AES_cbc_encrypt
1048.type AES_cbc_encrypt,\@function,6
1049.align 16
1050AES_cbc_encrypt:
1051 cmp \$0,%rdx # check length
1052 je .Lcbc_just_ret
1053 push %rbx
1054 push %rbp
1055 push %r12
1056 push %r13
1057 push %r14
1058 push %r15
1059 pushfq
1060 cld
1061 mov %r9d,%r9d # clear upper half of enc
1062
1063 .picmeup $sbox
1064.Lcbc_pic_point:
1065
1066 cmp \$0,%r9
1067 je .LDECRYPT
1068
1069 lea AES_Te-.Lcbc_pic_point($sbox),$sbox
1070
1071 # allocate aligned stack frame...
1072 lea -64-248(%rsp),$key
1073 and \$-64,$key
1074
1075 # ... and make it doesn't alias with AES_Te modulo 4096
1076 mov $sbox,%r10
1077 lea 2048($sbox),%r11
1078 mov $key,%r12
1079 and \$0xFFF,%r10 # s = $sbox&0xfff
1080 and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
1081 and \$0xFFF,%r12 # p = %rsp&0xfff
1082
1083 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1084 jb .Lcbc_te_break_out
1085 sub %r11,%r12
1086 sub %r12,$key
1087 jmp .Lcbc_te_ok
1088.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
1089 sub %r10,%r12
1090 and \$0xFFF,%r12
1091 add \$320,%r12
1092 sub %r12,$key
1093.align 4
1094.Lcbc_te_ok:
1095
1096 xchg %rsp,$key
1097 add \$8,%rsp # reserve for return address!
1098 mov $key,$_rsp # save %rsp
1099 mov %rdx,$_len # save copy of len
1100 mov %rcx,$_key # save copy of key
1101 mov %r8,$_ivp # save copy of ivp
1102 movl \$0,$mark # copy of aes_key->rounds = 0;
1103 mov %r8,%rbp # rearrange input arguments
1104 mov %rsi,$out
1105 mov %rdi,$inp
1106 mov %rcx,$key
1107
1108 # do we copy key schedule to stack?
1109 mov $key,%r10
1110 sub $sbox,%r10
1111 and \$0xfff,%r10
1112 cmp \$2048,%r10
1113 jb .Lcbc_do_ecopy
1114 cmp \$4096-248,%r10
1115 jb .Lcbc_skip_ecopy
1116.align 4
1117.Lcbc_do_ecopy:
1118 mov $key,%rsi
1119 lea $aes_key,%rdi
1120 lea $aes_key,$key
1121 mov \$240/8,%ecx
1122 .long 0x90A548F3 # rep movsq
1123 mov (%rsi),%eax # copy aes_key->rounds
1124 mov %eax,(%rdi)
1125.Lcbc_skip_ecopy:
1126 mov $key,$keyp # save key pointer
1127
1128 mov \$16,%ecx
1129.align 4
1130.Lcbc_prefetch_te:
1131 mov 0($sbox),%r10
1132 mov 32($sbox),%r11
1133 mov 64($sbox),%r12
1134 mov 96($sbox),%r13
1135 lea 128($sbox),$sbox
1136 sub \$1,%ecx
1137 jnz .Lcbc_prefetch_te
1138 sub \$2048,$sbox
1139
1140 test \$-16,%rdx # check upon length
1141 mov %rdx,%r10
1142 mov 0(%rbp),$s0 # load iv
1143 mov 4(%rbp),$s1
1144 mov 8(%rbp),$s2
1145 mov 12(%rbp),$s3
1146 jz .Lcbc_enc_tail # short input...
1147
1148.align 4
1149.Lcbc_enc_loop:
1150 xor 0($inp),$s0
1151 xor 4($inp),$s1
1152 xor 8($inp),$s2
1153 xor 12($inp),$s3
1154 mov $inp,$ivec # if ($verticalspin) save inp
1155
1156 mov $keyp,$key # restore key
1157 call _x86_64_AES_encrypt
1158
1159 mov $ivec,$inp # if ($verticalspin) restore inp
1160 mov $s0,0($out)
1161 mov $s1,4($out)
1162 mov $s2,8($out)
1163 mov $s3,12($out)
1164
1165 mov $_len,%r10
1166 lea 16($inp),$inp
1167 lea 16($out),$out
1168 sub \$16,%r10
1169 test \$-16,%r10
1170 mov %r10,$_len
1171 jnz .Lcbc_enc_loop
1172 test \$15,%r10
1173 jnz .Lcbc_enc_tail
1174 mov $_ivp,%rbp # restore ivp
1175 mov $s0,0(%rbp) # save ivec
1176 mov $s1,4(%rbp)
1177 mov $s2,8(%rbp)
1178 mov $s3,12(%rbp)
1179
1180.align 4
1181.Lcbc_cleanup:
1182 cmpl \$0,$mark # was the key schedule copied?
1183 lea $aes_key,%rdi
1184 mov $_rsp,%rsp
1185 je .Lcbc_exit
1186 mov \$240/8,%ecx
1187 xor %rax,%rax
1188 .long 0x90AB48F3 # rep stosq
1189.Lcbc_exit:
1190 popfq
1191 pop %r15
1192 pop %r14
1193 pop %r13
1194 pop %r12
1195 pop %rbp
1196 pop %rbx
1197.Lcbc_just_ret:
1198 ret
1199.align 4
1200.Lcbc_enc_tail:
1201 mov %rax,%r11
1202 mov %rcx,%r12
1203 mov %r10,%rcx
1204 mov $inp,%rsi
1205 mov $out,%rdi
1206 .long 0xF689A4F3 # rep movsb
1207 mov \$16,%rcx # zero tail
1208 sub %r10,%rcx
1209 xor %rax,%rax
1210 .long 0xF689AAF3 # rep stosb
1211 mov $out,$inp # this is not a mistake!
1212 movq \$16,$_len # len=16
1213 mov %r11,%rax
1214 mov %r12,%rcx
1215 jmp .Lcbc_enc_loop # one more spin...
1216#----------------------------- DECRYPT -----------------------------#
1217.align 16
1218.LDECRYPT:
1219 lea AES_Td-.Lcbc_pic_point($sbox),$sbox
1220
1221 # allocate aligned stack frame...
1222 lea -64-248(%rsp),$key
1223 and \$-64,$key
1224
1225 # ... and make it doesn't alias with AES_Td modulo 4096
1226 mov $sbox,%r10
1227 lea 2304($sbox),%r11
1228 mov $key,%r12
1229 and \$0xFFF,%r10 # s = $sbox&0xfff
1230 and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff
1231 and \$0xFFF,%r12 # p = %rsp&0xfff
1232
1233 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1234 jb .Lcbc_td_break_out
1235 sub %r11,%r12
1236 sub %r12,$key
1237 jmp .Lcbc_td_ok
1238.Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz
1239 sub %r10,%r12
1240 and \$0xFFF,%r12
1241 add \$320,%r12
1242 sub %r12,$key
1243.align 4
1244.Lcbc_td_ok:
1245
1246 xchg %rsp,$key
1247 add \$8,%rsp # reserve for return address!
1248 mov $key,$_rsp # save %rsp
1249 mov %rdx,$_len # save copy of len
1250 mov %rcx,$_key # save copy of key
1251 mov %r8,$_ivp # save copy of ivp
1252 movl \$0,$mark # copy of aes_key->rounds = 0;
1253 mov %r8,%rbp # rearrange input arguments
1254 mov %rsi,$out
1255 mov %rdi,$inp
1256 mov %rcx,$key
1257
1258 # do we copy key schedule to stack?
1259 mov $key,%r10
1260 sub $sbox,%r10
1261 and \$0xfff,%r10
1262 cmp \$2304,%r10
1263 jb .Lcbc_do_dcopy
1264 cmp \$4096-248,%r10
1265 jb .Lcbc_skip_dcopy
1266.align 4
1267.Lcbc_do_dcopy:
1268 mov $key,%rsi
1269 lea $aes_key,%rdi
1270 lea $aes_key,$key
1271 mov \$240/8,%ecx
1272 .long 0x90A548F3 # rep movsq
1273 mov (%rsi),%eax # copy aes_key->rounds
1274 mov %eax,(%rdi)
1275.Lcbc_skip_dcopy:
1276 mov $key,$keyp # save key pointer
1277
1278 mov \$18,%ecx
1279.align 4
1280.Lcbc_prefetch_td:
1281 mov 0($sbox),%r10
1282 mov 32($sbox),%r11
1283 mov 64($sbox),%r12
1284 mov 96($sbox),%r13
1285 lea 128($sbox),$sbox
1286 sub \$1,%ecx
1287 jnz .Lcbc_prefetch_td
1288 sub \$2304,$sbox
1289
1290 cmp $inp,$out
1291 je .Lcbc_dec_in_place
1292
1293 mov %rbp,$ivec
1294.align 4
1295.Lcbc_dec_loop:
1296 mov 0($inp),$s0 # read input
1297 mov 4($inp),$s1
1298 mov 8($inp),$s2
1299 mov 12($inp),$s3
1300 mov $inp,8+$ivec # if ($verticalspin) save inp
1301
1302 mov $keyp,$key # restore key
1303 call _x86_64_AES_decrypt
1304
1305 mov $ivec,%rbp # load ivp
1306 mov 8+$ivec,$inp # if ($verticalspin) restore inp
1307 xor 0(%rbp),$s0 # xor iv
1308 xor 4(%rbp),$s1
1309 xor 8(%rbp),$s2
1310 xor 12(%rbp),$s3
1311 mov $inp,%rbp # current input, next iv
1312
1313 mov $_len,%r10 # load len
1314 sub \$16,%r10
1315 jc .Lcbc_dec_partial
1316 mov %r10,$_len # update len
1317 mov %rbp,$ivec # update ivp
1318
1319 mov $s0,0($out) # write output
1320 mov $s1,4($out)
1321 mov $s2,8($out)
1322 mov $s3,12($out)
1323
1324 lea 16($inp),$inp
1325 lea 16($out),$out
1326 jnz .Lcbc_dec_loop
1327.Lcbc_dec_end:
1328 mov $_ivp,%r12 # load user ivp
1329 mov 0(%rbp),%r10 # load iv
1330 mov 8(%rbp),%r11
1331 mov %r10,0(%r12) # copy back to user
1332 mov %r11,8(%r12)
1333 jmp .Lcbc_cleanup
1334
1335.align 4
1336.Lcbc_dec_partial:
1337 mov $s0,0+$ivec # dump output to stack
1338 mov $s1,4+$ivec
1339 mov $s2,8+$ivec
1340 mov $s3,12+$ivec
1341 mov $out,%rdi
1342 lea $ivec,%rsi
1343 mov \$16,%rcx
1344 add %r10,%rcx # number of bytes to copy
1345 .long 0xF689A4F3 # rep movsb
1346 jmp .Lcbc_dec_end
1347
1348.align 16
1349.Lcbc_dec_in_place:
1350 mov 0($inp),$s0 # load input
1351 mov 4($inp),$s1
1352 mov 8($inp),$s2
1353 mov 12($inp),$s3
1354
1355 mov $inp,$ivec # if ($verticalspin) save inp
1356 mov $keyp,$key
1357 call _x86_64_AES_decrypt
1358
1359 mov $ivec,$inp # if ($verticalspin) restore inp
1360 mov $_ivp,%rbp
1361 xor 0(%rbp),$s0
1362 xor 4(%rbp),$s1
1363 xor 8(%rbp),$s2
1364 xor 12(%rbp),$s3
1365
1366 mov 0($inp),%r10 # copy input to iv
1367 mov 8($inp),%r11
1368 mov %r10,0(%rbp)
1369 mov %r11,8(%rbp)
1370
1371 mov $s0,0($out) # save output [zaps input]
1372 mov $s1,4($out)
1373 mov $s2,8($out)
1374 mov $s3,12($out)
1375
1376 mov $_len,%rcx
1377 lea 16($inp),$inp
1378 lea 16($out),$out
1379 sub \$16,%rcx
1380 jc .Lcbc_dec_in_place_partial
1381 mov %rcx,$_len
1382 jnz .Lcbc_dec_in_place
1383 jmp .Lcbc_cleanup
1384
1385.align 4
1386.Lcbc_dec_in_place_partial:
1387 # one can argue if this is actually required
1388 lea ($out,%rcx),%rdi
1389 lea (%rbp,%rcx),%rsi
1390 neg %rcx
1391 .long 0xF689A4F3 # rep movsb # restore tail
1392 jmp .Lcbc_cleanup
1393.size AES_cbc_encrypt,.-AES_cbc_encrypt
1394___
1395}
1396
1397$code.=<<___;
1398.globl AES_Te
1399.align 64
1400AES_Te:
1401___
1402 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
1403 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
1404 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
1405 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
1406 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
1407 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
1408 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
1409 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
1410 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
1411 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
1412 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
1413 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
1414 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
1415 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
1416 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
1417 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
1418 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
1419 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
1420 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
1421 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
1422 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
1423 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
1424 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
1425 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
1426 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
1427 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
1428 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
1429 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
1430 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
1431 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
1432 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
1433 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
1434 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
1435 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
1436 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
1437 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
1438 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
1439 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
1440 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
1441 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
1442 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
1443 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
1444 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
1445 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
1446 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
1447 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
1448 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
1449 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
1450 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
1451 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
1452 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
1453 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
1454 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
1455 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
1456 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
1457 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
1458 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
1459 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
1460 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
1461 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
1462 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
1463 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
1464 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
1465 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
1466#rcon:
1467$code.=<<___;
1468 .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
1469 .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
1470 .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0
1471___
1472$code.=<<___;
1473.globl AES_Td
1474.align 64
1475AES_Td:
1476___
1477 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
1478 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
1479 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
1480 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
1481 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
1482 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
1483 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
1484 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
1485 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
1486 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
1487 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
1488 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
1489 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
1490 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
1491 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
1492 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
1493 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
1494 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
1495 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
1496 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
1497 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
1498 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
1499 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
1500 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
1501 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
1502 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
1503 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
1504 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
1505 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
1506 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
1507 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
1508 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
1509 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
1510 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
1511 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
1512 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
1513 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
1514 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
1515 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
1516 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
1517 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
1518 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
1519 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
1520 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
1521 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
1522 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
1523 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
1524 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
1525 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
1526 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
1527 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
1528 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
1529 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
1530 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
1531 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
1532 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
1533 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
1534 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
1535 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
1536 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
1537 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
1538 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
1539 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
1540 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
1541#Td4:
1542 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1543 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1544 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1545 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1546 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1547 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1548 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1549 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1550 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1551 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1552 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1553 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1554 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1555 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1556 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1557 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1558 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1559 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1560 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1561 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1562 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1563 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1564 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1565 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1566 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1567 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1568 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1569 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1570 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1571 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1572 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1573 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1574
1575$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1576
1577print $code;
1578
1579close STDOUT;