summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes
diff options
context:
space:
mode:
authordjm <>2008-09-06 12:15:56 +0000
committerdjm <>2008-09-06 12:15:56 +0000
commit12867252827c8efaa8ddd1fa3b3d6e321e2bcdef (patch)
treeb7a1f167ae5aeff4cfd8a18b598b68fe98a066fd /src/lib/libcrypto/aes
parentf519f07de9bfb123f2b32aa3965e6f73c8364b80 (diff)
parent5a3c0a05c7f2c5d3c584b7c8d6aec836dd724c80 (diff)
downloadopenbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.tar.gz
openbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.tar.bz2
openbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.zip
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r--src/lib/libcrypto/aes/aes_ige.c323
-rw-r--r--src/lib/libcrypto/aes/aes_wrap.c259
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ia64.S1123
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-x86_64.pl1578
4 files changed, 3283 insertions, 0 deletions
diff --git a/src/lib/libcrypto/aes/aes_ige.c b/src/lib/libcrypto/aes/aes_ige.c
new file mode 100644
index 0000000000..45d7096181
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_ige.c
@@ -0,0 +1,323 @@
1/* crypto/aes/aes_ige.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include "cryptlib.h"
53
54#include <openssl/aes.h>
55#include "aes_locl.h"
56
57#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
58typedef struct {
59 unsigned long data[N_WORDS];
60} aes_block_t;
61
62/* XXX: probably some better way to do this */
63#if defined(__i386__) || defined(__x86_64__)
64#define UNALIGNED_MEMOPS_ARE_FAST 1
65#else
66#define UNALIGNED_MEMOPS_ARE_FAST 0
67#endif
68
69#if UNALIGNED_MEMOPS_ARE_FAST
70#define load_block(d, s) (d) = *(const aes_block_t *)(s)
71#define store_block(d, s) *(aes_block_t *)(d) = (s)
72#else
73#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
74#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
75#endif
76
77/* N.B. The IV for this mode is _twice_ the block size */
78
79void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
80 const unsigned long length, const AES_KEY *key,
81 unsigned char *ivec, const int enc)
82 {
83 unsigned long n;
84 unsigned long len;
85
86 OPENSSL_assert(in && out && key && ivec);
87 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
88 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
89
90 len = length / AES_BLOCK_SIZE;
91
92 if (AES_ENCRYPT == enc)
93 {
94 if (in != out &&
95 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
96 {
97 aes_block_t *ivp = (aes_block_t *)ivec;
98 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
99
100 while (len)
101 {
102 aes_block_t *inp = (aes_block_t *)in;
103 aes_block_t *outp = (aes_block_t *)out;
104
105 for(n=0 ; n < N_WORDS; ++n)
106 outp->data[n] = inp->data[n] ^ ivp->data[n];
107 AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
108 for(n=0 ; n < N_WORDS; ++n)
109 outp->data[n] ^= iv2p->data[n];
110 ivp = outp;
111 iv2p = inp;
112 --len;
113 in += AES_BLOCK_SIZE;
114 out += AES_BLOCK_SIZE;
115 }
116 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
117 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
118 }
119 else
120 {
121 aes_block_t tmp, tmp2;
122 aes_block_t iv;
123 aes_block_t iv2;
124
125 load_block(iv, ivec);
126 load_block(iv2, ivec + AES_BLOCK_SIZE);
127
128 while (len)
129 {
130 load_block(tmp, in);
131 for(n=0 ; n < N_WORDS; ++n)
132 tmp2.data[n] = tmp.data[n] ^ iv.data[n];
133 AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
134 for(n=0 ; n < N_WORDS; ++n)
135 tmp2.data[n] ^= iv2.data[n];
136 store_block(out, tmp2);
137 iv = tmp2;
138 iv2 = tmp;
139 --len;
140 in += AES_BLOCK_SIZE;
141 out += AES_BLOCK_SIZE;
142 }
143 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
144 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
145 }
146 }
147 else
148 {
149 if (in != out &&
150 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
151 {
152 aes_block_t *ivp = (aes_block_t *)ivec;
153 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
154
155 while (len)
156 {
157 aes_block_t tmp;
158 aes_block_t *inp = (aes_block_t *)in;
159 aes_block_t *outp = (aes_block_t *)out;
160
161 for(n=0 ; n < N_WORDS; ++n)
162 tmp.data[n] = inp->data[n] ^ iv2p->data[n];
163 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)outp->data, key);
164 for(n=0 ; n < N_WORDS; ++n)
165 outp->data[n] ^= ivp->data[n];
166 ivp = inp;
167 iv2p = outp;
168 --len;
169 in += AES_BLOCK_SIZE;
170 out += AES_BLOCK_SIZE;
171 }
172 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
173 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
174 }
175 else
176 {
177 aes_block_t tmp, tmp2;
178 aes_block_t iv;
179 aes_block_t iv2;
180
181 load_block(iv, ivec);
182 load_block(iv2, ivec + AES_BLOCK_SIZE);
183
184 while (len)
185 {
186 load_block(tmp, in);
187 tmp2 = tmp;
188 for(n=0 ; n < N_WORDS; ++n)
189 tmp.data[n] ^= iv2.data[n];
190 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
191 for(n=0 ; n < N_WORDS; ++n)
192 tmp.data[n] ^= iv.data[n];
193 store_block(out, tmp);
194 iv = tmp2;
195 iv2 = tmp;
196 --len;
197 in += AES_BLOCK_SIZE;
198 out += AES_BLOCK_SIZE;
199 }
200 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
201 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
202 }
203 }
204 }
205
206/*
207 * Note that its effectively impossible to do biIGE in anything other
208 * than a single pass, so no provision is made for chaining.
209 */
210
211/* N.B. The IV for this mode is _four times_ the block size */
212
213void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
214 const unsigned long length, const AES_KEY *key,
215 const AES_KEY *key2, const unsigned char *ivec,
216 const int enc)
217 {
218 unsigned long n;
219 unsigned long len = length;
220 unsigned char tmp[AES_BLOCK_SIZE];
221 unsigned char tmp2[AES_BLOCK_SIZE];
222 unsigned char tmp3[AES_BLOCK_SIZE];
223 unsigned char prev[AES_BLOCK_SIZE];
224 const unsigned char *iv;
225 const unsigned char *iv2;
226
227 OPENSSL_assert(in && out && key && ivec);
228 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
229 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
230
231 if (AES_ENCRYPT == enc)
232 {
233 /* XXX: Do a separate case for when in != out (strictly should
234 check for overlap, too) */
235
236 /* First the forward pass */
237 iv = ivec;
238 iv2 = ivec + AES_BLOCK_SIZE;
239 while (len >= AES_BLOCK_SIZE)
240 {
241 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
242 out[n] = in[n] ^ iv[n];
243 AES_encrypt(out, out, key);
244 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
245 out[n] ^= iv2[n];
246 iv = out;
247 memcpy(prev, in, AES_BLOCK_SIZE);
248 iv2 = prev;
249 len -= AES_BLOCK_SIZE;
250 in += AES_BLOCK_SIZE;
251 out += AES_BLOCK_SIZE;
252 }
253
254 /* And now backwards */
255 iv = ivec + AES_BLOCK_SIZE*2;
256 iv2 = ivec + AES_BLOCK_SIZE*3;
257 len = length;
258 while(len >= AES_BLOCK_SIZE)
259 {
260 out -= AES_BLOCK_SIZE;
261 /* XXX: reduce copies by alternating between buffers */
262 memcpy(tmp, out, AES_BLOCK_SIZE);
263 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
264 out[n] ^= iv[n];
265 /* hexdump(stdout, "out ^ iv", out, AES_BLOCK_SIZE); */
266 AES_encrypt(out, out, key);
267 /* hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
268 /* hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
269 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
270 out[n] ^= iv2[n];
271 /* hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
272 iv = out;
273 memcpy(prev, tmp, AES_BLOCK_SIZE);
274 iv2 = prev;
275 len -= AES_BLOCK_SIZE;
276 }
277 }
278 else
279 {
280 /* First backwards */
281 iv = ivec + AES_BLOCK_SIZE*2;
282 iv2 = ivec + AES_BLOCK_SIZE*3;
283 in += length;
284 out += length;
285 while (len >= AES_BLOCK_SIZE)
286 {
287 in -= AES_BLOCK_SIZE;
288 out -= AES_BLOCK_SIZE;
289 memcpy(tmp, in, AES_BLOCK_SIZE);
290 memcpy(tmp2, in, AES_BLOCK_SIZE);
291 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
292 tmp[n] ^= iv2[n];
293 AES_decrypt(tmp, out, key);
294 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
295 out[n] ^= iv[n];
296 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
297 iv = tmp3;
298 iv2 = out;
299 len -= AES_BLOCK_SIZE;
300 }
301
302 /* And now forwards */
303 iv = ivec;
304 iv2 = ivec + AES_BLOCK_SIZE;
305 len = length;
306 while (len >= AES_BLOCK_SIZE)
307 {
308 memcpy(tmp, out, AES_BLOCK_SIZE);
309 memcpy(tmp2, out, AES_BLOCK_SIZE);
310 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
311 tmp[n] ^= iv2[n];
312 AES_decrypt(tmp, out, key);
313 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
314 out[n] ^= iv[n];
315 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
316 iv = tmp3;
317 iv2 = out;
318 len -= AES_BLOCK_SIZE;
319 in += AES_BLOCK_SIZE;
320 out += AES_BLOCK_SIZE;
321 }
322 }
323 }
diff --git a/src/lib/libcrypto/aes/aes_wrap.c b/src/lib/libcrypto/aes/aes_wrap.c
new file mode 100644
index 0000000000..9feacd65d8
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_wrap.c
@@ -0,0 +1,259 @@
1/* crypto/aes/aes_wrap.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54#include "cryptlib.h"
55#include <openssl/aes.h>
56#include <openssl/bio.h>
57
58static const unsigned char default_iv[] = {
59 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
60};
61
62int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
63 unsigned char *out,
64 const unsigned char *in, unsigned int inlen)
65 {
66 unsigned char *A, B[16], *R;
67 unsigned int i, j, t;
68 if ((inlen & 0x7) || (inlen < 8))
69 return -1;
70 A = B;
71 t = 1;
72 memcpy(out + 8, in, inlen);
73 if (!iv)
74 iv = default_iv;
75
76 memcpy(A, iv, 8);
77
78 for (j = 0; j < 6; j++)
79 {
80 R = out + 8;
81 for (i = 0; i < inlen; i += 8, t++, R += 8)
82 {
83 memcpy(B + 8, R, 8);
84 AES_encrypt(B, B, key);
85 A[7] ^= (unsigned char)(t & 0xff);
86 if (t > 0xff)
87 {
88 A[6] ^= (unsigned char)((t & 0xff) >> 8);
89 A[5] ^= (unsigned char)((t & 0xff) >> 16);
90 A[4] ^= (unsigned char)((t & 0xff) >> 24);
91 }
92 memcpy(R, B + 8, 8);
93 }
94 }
95 memcpy(out, A, 8);
96 return inlen + 8;
97 }
98
99int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
100 unsigned char *out,
101 const unsigned char *in, unsigned int inlen)
102 {
103 unsigned char *A, B[16], *R;
104 unsigned int i, j, t;
105 inlen -= 8;
106 if (inlen & 0x7)
107 return -1;
108 if (inlen < 8)
109 return -1;
110 A = B;
111 t = 6 * (inlen >> 3);
112 memcpy(A, in, 8);
113 memcpy(out, in + 8, inlen);
114 for (j = 0; j < 6; j++)
115 {
116 R = out + inlen - 8;
117 for (i = 0; i < inlen; i += 8, t--, R -= 8)
118 {
119 A[7] ^= (unsigned char)(t & 0xff);
120 if (t > 0xff)
121 {
122 A[6] ^= (unsigned char)((t & 0xff) >> 8);
123 A[5] ^= (unsigned char)((t & 0xff) >> 16);
124 A[4] ^= (unsigned char)((t & 0xff) >> 24);
125 }
126 memcpy(B + 8, R, 8);
127 AES_decrypt(B, B, key);
128 memcpy(R, B + 8, 8);
129 }
130 }
131 if (!iv)
132 iv = default_iv;
133 if (memcmp(A, iv, 8))
134 {
135 OPENSSL_cleanse(out, inlen);
136 return 0;
137 }
138 return inlen;
139 }
140
141#ifdef AES_WRAP_TEST
142
143int AES_wrap_unwrap_test(const unsigned char *kek, int keybits,
144 const unsigned char *iv,
145 const unsigned char *eout,
146 const unsigned char *key, int keylen)
147 {
148 unsigned char *otmp = NULL, *ptmp = NULL;
149 int r, ret = 0;
150 AES_KEY wctx;
151 otmp = OPENSSL_malloc(keylen + 8);
152 ptmp = OPENSSL_malloc(keylen);
153 if (!otmp || !ptmp)
154 return 0;
155 if (AES_set_encrypt_key(kek, keybits, &wctx))
156 goto err;
157 r = AES_wrap_key(&wctx, iv, otmp, key, keylen);
158 if (r <= 0)
159 goto err;
160
161 if (eout && memcmp(eout, otmp, keylen))
162 goto err;
163
164 if (AES_set_decrypt_key(kek, keybits, &wctx))
165 goto err;
166 r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r);
167
168 if (memcmp(key, ptmp, keylen))
169 goto err;
170
171 ret = 1;
172
173 err:
174 if (otmp)
175 OPENSSL_free(otmp);
176 if (ptmp)
177 OPENSSL_free(ptmp);
178
179 return ret;
180
181 }
182
183
184
185int main(int argc, char **argv)
186{
187
188static const unsigned char kek[] = {
189 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
190 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
191 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
192 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
193};
194
195static const unsigned char key[] = {
196 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
197 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
199 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
200};
201
202static const unsigned char e1[] = {
203 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47,
204 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82,
205 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5
206};
207
208static const unsigned char e2[] = {
209 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35,
210 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2,
211 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d
212};
213
214static const unsigned char e3[] = {
215 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2,
216 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a,
217 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7
218};
219
220static const unsigned char e4[] = {
221 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32,
222 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc,
223 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93,
224 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2
225};
226
227static const unsigned char e5[] = {
228 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f,
229 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4,
230 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95,
231 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1
232};
233
234static const unsigned char e6[] = {
235 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4,
236 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26,
237 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26,
238 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b,
239 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21
240};
241
242 AES_KEY wctx, xctx;
243 int ret;
244 ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16);
245 fprintf(stderr, "Key test result %d\n", ret);
246 ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16);
247 fprintf(stderr, "Key test result %d\n", ret);
248 ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16);
249 fprintf(stderr, "Key test result %d\n", ret);
250 ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24);
251 fprintf(stderr, "Key test result %d\n", ret);
252 ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24);
253 fprintf(stderr, "Key test result %d\n", ret);
254 ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32);
255 fprintf(stderr, "Key test result %d\n", ret);
256}
257
258
259#endif
diff --git a/src/lib/libcrypto/aes/asm/aes-ia64.S b/src/lib/libcrypto/aes/asm/aes-ia64.S
new file mode 100644
index 0000000000..7f6c4c3662
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-ia64.S
@@ -0,0 +1,1123 @@
1// ====================================================================
2// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
3// project. Rights for redistribution and usage in source and binary
4// forms are granted according to the OpenSSL license.
5// ====================================================================
6//
7// What's wrong with compiler generated code? Compiler never uses
8// variable 'shr' which is pairable with 'extr'/'dep' instructions.
9// Then it uses 'zxt' which is an I-type, but can be replaced with
10// 'and' which in turn can be assigned to M-port [there're double as
11// much M-ports as there're I-ports on Itanium 2]. By sacrificing few
12// registers for small constants (255, 24 and 16) to be used with
13// 'shr' and 'and' instructions I can achieve better ILP, Intruction
14// Level Parallelism, and performance. This code outperforms GCC 3.3
15// generated code by over factor of 2 (two), GCC 3.4 - by 70% and
16// HP C - by 40%. Measured best-case scenario, i.e. aligned
17// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
18// ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
19
20// Version 1.2 mitigates the hazard of cache-timing attacks by
21// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
22// references to S-boxes for L2 cache latency, c) prefetching T[ed]4
23// prior last round. As result performance dropped to (26 + 15*rounds)
24// ticks per block or 11 cycles per byte processed with 128-bit key.
25// This is ~16% deterioration. For reference Itanium 2 L1 cache has
26// 64 bytes line size and L2 - 128 bytes...
27
28.ident "aes-ia64.S, version 1.2"
29.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
30.explicit
31.text
32
33rk0=r8; rk1=r9;
34
35pfssave=r2;
36lcsave=r10;
37prsave=r3;
38maskff=r11;
39twenty4=r14;
40sixteen=r15;
41
42te00=r16; te11=r17; te22=r18; te33=r19;
43te01=r20; te12=r21; te23=r22; te30=r23;
44te02=r24; te13=r25; te20=r26; te31=r27;
45te03=r28; te10=r29; te21=r30; te32=r31;
46
47// these are rotating...
48t0=r32; s0=r33;
49t1=r34; s1=r35;
50t2=r36; s2=r37;
51t3=r38; s3=r39;
52
53te0=r40; te1=r41; te2=r42; te3=r43;
54
55#if defined(_HPUX_SOURCE) && !defined(_LP64)
56# define ADDP addp4
57#else
58# define ADDP add
59#endif
60
61// Offsets from Te0
62#define TE0 0
63#define TE2 2
64#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
65#define TE1 3
66#define TE3 1
67#else
68#define TE1 1
69#define TE3 3
70#endif
71
72// This implies that AES_KEY comprises 32-bit key schedule elements
73// even on LP64 platforms.
74#ifndef KSZ
75# define KSZ 4
76# define LDKEY ld4
77#endif
78
79.proc _ia64_AES_encrypt#
80// Input: rk0-rk1
81// te0
82// te3 as AES_KEY->rounds!!!
83// s0-s3
84// maskff,twenty4,sixteen
85// Output: r16,r20,r24,r28 as s0-s3
86// Clobber: r16-r31,rk0-rk1,r32-r43
87.align 32
88_ia64_AES_encrypt:
89 .prologue
90 .altrp b6
91 .body
92{ .mmi; alloc r16=ar.pfs,12,0,0,8
93 LDKEY t0=[rk0],2*KSZ
94 mov pr.rot=1<<16 }
95{ .mmi; LDKEY t1=[rk1],2*KSZ
96 add te1=TE1,te0
97 add te3=-3,te3 };;
98{ .mib; LDKEY t2=[rk0],2*KSZ
99 mov ar.ec=2 }
100{ .mib; LDKEY t3=[rk1],2*KSZ
101 add te2=TE2,te0
102 brp.loop.imp .Le_top,.Le_end-16 };;
103
104{ .mmi; xor s0=s0,t0
105 xor s1=s1,t1
106 mov ar.lc=te3 }
107{ .mmi; xor s2=s2,t2
108 xor s3=s3,t3
109 add te3=TE3,te0 };;
110
111.align 32
112.Le_top:
113{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
114 (p0) and te33=s3,maskff // 0/0:s3&0xff
115 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
116{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
117 (p0) and te30=s0,maskff // 0/1:s0&0xff
118 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
119{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
120 (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24
121 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
122{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
123 (p0) shladd te30=te30,3,te3 // 1/1:te3+s0
124 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
125{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff]
126 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
127 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
128{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0]
129 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
130 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
131{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
132 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
133 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
134{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
135 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
136 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
137{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
138 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
139 (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
140{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
141 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
142 (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16
143{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
144 (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16
145 (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
146{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
147 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
148 (p0) and te31=s1,maskff };; // 5/2:s1&0xff
149{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16]
150 (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16
151 (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
152{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
153 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
154 (p0) and te32=s2,maskff };; // 6/3:s2&0xff
155
156{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16]
157 (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff
158 (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff
159{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
160 (p0) shladd te32=te32,3,te3 // 7/3:te3+s2
161 (p0) xor t0=t0,te33 };; // 7/0:
162{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1]
163 (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16
164 (p0) xor t0=t0,te22 } // 8/0:
165{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2]
166 (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16
167 (p0) xor t1=t1,te30 };; // 8/1:
168{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16]
169 (p0) ld4 te10=[te10] // 9/3:te1[s0>>16]
170 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
171{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
172 (p0) xor t2=t2,te20 // 10[9]/2:
173 (p0) xor t3=t3,te21 };; // 10[9]/3:
174{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done!
175 (p0) xor t1=t1,te01 // 11[10]/1:
176 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
177{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
178 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
179{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done!
180 (p0) xor t2=t2,te31 // 13[11]/2:
181 (p0) xor t3=t3,te32 } // 13[11]/3:
182{ .mmi; (p17) add te0=2048,te0 // 13[11]/
183 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
184{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done!
185 (p17) add te2=2048+128-TE2,te2} // 14[12]/
186{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done!
187 (p17) add te3=2048+192-TE3,te3 // 14[12]/
188 br.ctop.sptk .Le_top };;
189.Le_end:
190
191
192{ .mmi; ld8 te12=[te0] // prefetch Te4
193 ld8 te31=[te1] }
194{ .mmi; ld8 te10=[te2]
195 ld8 te32=[te3] }
196
197{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
198 and te33=s3,maskff // 0/0:s3&0xff
199 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
200{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
201 and te30=s0,maskff // 0/1:s0&0xff
202 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
203{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
204 add te33=te33,te0 // 1/0:te0+s0>>24
205 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
206{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
207 add te30=te30,te0 // 1/1:te0+s0
208 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
209{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff]
210 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
211 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
212{ .mmi; ld1 te30=[te30] // 2/1:te0[s0]
213 add te23=te23,te0 // 2/1:te0+s3>>8
214 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
215{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
216 add te20=te20,te0 // 3/2:te0+s0>>8
217 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
218{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
219 add te00=te00,te0 // 3/0:te0+s0>>24
220 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
221{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
222 add te21=te21,te0 // 4/3:te0+s2
223 extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
224{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
225 add te01=te01,te0 // 4/1:te0+s1>>24
226 shr.u te13=s3,sixteen };; // 4/2:s3>>16
227{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
228 add te11=te11,te0 // 5/0:te0+s1>>16
229 extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
230{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
231 add te02=te02,te0 // 5/2:te0+s2>>24
232 and te31=s1,maskff };; // 5/2:s1&0xff
233{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16]
234 add te12=te12,te0 // 6/1:te0+s2>>16
235 extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
236{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
237 add te03=te03,te0 // 6/3:te0+s0>>16
238 and te32=s2,maskff };; // 6/3:s2&0xff
239
240{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16]
241 add te31=te31,te0 // 7/2:te0+s1&0xff
242 dep te33=te22,te33,8,8} // 7/0:
243{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
244 add te32=te32,te0 // 7/3:te0+s2
245 and te13=te13,maskff};; // 7/2:s3>>16&0xff
246{ .mmi; ld1 te31=[te31] // 8/2:te0[s1]
247 add te13=te13,te0 // 8/2:te0+s3>>16
248 dep te30=te23,te30,8,8} // 8/1:
249{ .mmi; ld1 te32=[te32] // 8/3:te0[s2]
250 add te10=te10,te0 // 8/3:te0+s0>>16
251 shl te00=te00,twenty4};; // 8/0:
252{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16]
253 dep te33=te11,te33,16,8 // 9/0:
254 shl te01=te01,twenty4};; // 9/1:
255{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16]
256 dep te31=te20,te31,8,8 // 10/2:
257 shl te02=te02,twenty4};; // 10/2:
258{ .mii; xor t0=t0,te33 // 11/0:
259 dep te32=te21,te32,8,8 // 11/3:
260 shl te12=te12,sixteen};; // 11/1:
261{ .mii; xor r16=t0,te00 // 12/0:done!
262 dep te31=te13,te31,16,8 // 12/2:
263 shl te03=te03,twenty4};; // 12/3:
264{ .mmi; xor t1=t1,te01 // 13/1:
265 xor t2=t2,te02 // 13/2:
266 dep te32=te10,te32,16,8};; // 13/3:
267{ .mmi; xor t1=t1,te30 // 14/1:
268 xor r24=t2,te31 // 14/2:done!
269 xor t3=t3,te32 };; // 14/3:
270{ .mib; xor r20=t1,te12 // 15/1:done!
271 xor r28=t3,te03 // 15/3:done!
272 br.ret.sptk b6 };;
273.endp _ia64_AES_encrypt#
274
275// void AES_encrypt (const void *in,void *out,const AES_KEY *key);
276.global AES_encrypt#
277.proc AES_encrypt#
278.align 32
279AES_encrypt:
280 .prologue
281 .save ar.pfs,pfssave
282{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
283 and out0=3,in0
284 mov r3=ip }
285{ .mmi; ADDP in0=0,in0
286 mov loc0=psr.um
287 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
288
289{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
290 add out8=(AES_Te#-AES_encrypt#),r3 // Te0
291 .save pr,prsave
292 mov prsave=pr }
293{ .mmi; rum 1<<3 // clear um.ac
294 .save ar.lc,lcsave
295 mov lcsave=ar.lc };;
296
297 .body
298#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
299{ .mib; cmp.ne p6,p0=out0,r0
300 add out0=4,in0
301(p6) br.dpnt.many .Le_i_unaligned };;
302
303{ .mmi; ld4 out1=[in0],8 // s0
304 and out9=3,in1
305 mov twenty4=24 }
306{ .mmi; ld4 out3=[out0],8 // s1
307 ADDP rk0=0,in2
308 mov sixteen=16 };;
309{ .mmi; ld4 out5=[in0] // s2
310 cmp.ne p6,p0=out9,r0
311 mov maskff=0xff }
312{ .mmb; ld4 out7=[out0] // s3
313 ADDP rk1=KSZ,in2
314 br.call.sptk.many b6=_ia64_AES_encrypt };;
315
316{ .mib; ADDP in0=4,in1
317 ADDP in1=0,in1
318(p6) br.spnt .Le_o_unaligned };;
319
320{ .mii; mov psr.um=loc0
321 mov ar.pfs=pfssave
322 mov ar.lc=lcsave };;
323{ .mmi; st4 [in1]=r16,8 // s0
324 st4 [in0]=r20,8 // s1
325 mov pr=prsave,0x1ffff };;
326{ .mmb; st4 [in1]=r24 // s2
327 st4 [in0]=r28 // s3
328 br.ret.sptk.many b0 };;
329#endif
330
331.align 32
332.Le_i_unaligned:
333{ .mmi; add out0=1,in0
334 add out2=2,in0
335 add out4=3,in0 };;
336{ .mmi; ld1 r16=[in0],4
337 ld1 r17=[out0],4 }//;;
338{ .mmi; ld1 r18=[out2],4
339 ld1 out1=[out4],4 };; // s0
340{ .mmi; ld1 r20=[in0],4
341 ld1 r21=[out0],4 }//;;
342{ .mmi; ld1 r22=[out2],4
343 ld1 out3=[out4],4 };; // s1
344{ .mmi; ld1 r24=[in0],4
345 ld1 r25=[out0],4 }//;;
346{ .mmi; ld1 r26=[out2],4
347 ld1 out5=[out4],4 };; // s2
348{ .mmi; ld1 r28=[in0]
349 ld1 r29=[out0] }//;;
350{ .mmi; ld1 r30=[out2]
351 ld1 out7=[out4] };; // s3
352
353{ .mii;
354 dep out1=r16,out1,24,8 //;;
355 dep out3=r20,out3,24,8 }//;;
356{ .mii; ADDP rk0=0,in2
357 dep out5=r24,out5,24,8 //;;
358 dep out7=r28,out7,24,8 };;
359{ .mii; ADDP rk1=KSZ,in2
360 dep out1=r17,out1,16,8 //;;
361 dep out3=r21,out3,16,8 }//;;
362{ .mii; mov twenty4=24
363 dep out5=r25,out5,16,8 //;;
364 dep out7=r29,out7,16,8 };;
365{ .mii; mov sixteen=16
366 dep out1=r18,out1,8,8 //;;
367 dep out3=r22,out3,8,8 }//;;
368{ .mii; mov maskff=0xff
369 dep out5=r26,out5,8,8 //;;
370 dep out7=r30,out7,8,8 };;
371
372{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };;
373
374.Le_o_unaligned:
375{ .mii; ADDP out0=0,in1
376 extr.u r17=r16,8,8 // s0
377 shr.u r19=r16,twenty4 }//;;
378{ .mii; ADDP out1=1,in1
379 extr.u r18=r16,16,8
380 shr.u r23=r20,twenty4 }//;; // s1
381{ .mii; ADDP out2=2,in1
382 extr.u r21=r20,8,8
383 shr.u r22=r20,sixteen }//;;
384{ .mii; ADDP out3=3,in1
385 extr.u r25=r24,8,8 // s2
386 shr.u r27=r24,twenty4 };;
387{ .mii; st1 [out3]=r16,4
388 extr.u r26=r24,16,8
389 shr.u r31=r28,twenty4 }//;; // s3
390{ .mii; st1 [out2]=r17,4
391 extr.u r29=r28,8,8
392 shr.u r30=r28,sixteen }//;;
393
394{ .mmi; st1 [out1]=r18,4
395 st1 [out0]=r19,4 };;
396{ .mmi; st1 [out3]=r20,4
397 st1 [out2]=r21,4 }//;;
398{ .mmi; st1 [out1]=r22,4
399 st1 [out0]=r23,4 };;
400{ .mmi; st1 [out3]=r24,4
401 st1 [out2]=r25,4
402 mov pr=prsave,0x1ffff }//;;
403{ .mmi; st1 [out1]=r26,4
404 st1 [out0]=r27,4
405 mov ar.pfs=pfssave };;
406{ .mmi; st1 [out3]=r28
407 st1 [out2]=r29
408 mov ar.lc=lcsave }//;;
409{ .mmi; st1 [out1]=r30
410 st1 [out0]=r31 }
411{ .mfb; mov psr.um=loc0 // restore user mask
412 br.ret.sptk.many b0 };;
413.endp AES_encrypt#
414
415// *AES_decrypt are autogenerated by the following script:
416#if 0
417#!/usr/bin/env perl
418print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
419open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
420print "#endif\n";
421while(<>) {
422 $process=1 if (/\.proc\s+_ia64_AES_encrypt/);
423 next if (!$process);
424
425 #s/te00=s0/td00=s0/; s/te00/td00/g;
426 s/te11=s1/td13=s3/; s/te11/td13/g;
427 #s/te22=s2/td22=s2/; s/te22/td22/g;
428 s/te33=s3/td31=s1/; s/te33/td31/g;
429
430 #s/te01=s1/td01=s1/; s/te01/td01/g;
431 s/te12=s2/td10=s0/; s/te12/td10/g;
432 #s/te23=s3/td23=s3/; s/te23/td23/g;
433 s/te30=s0/td32=s2/; s/te30/td32/g;
434
435 #s/te02=s2/td02=s2/; s/te02/td02/g;
436 s/te13=s3/td11=s1/; s/te13/td11/g;
437 #s/te20=s0/td20=s0/; s/te20/td20/g;
438 s/te31=s1/td33=s3/; s/te31/td33/g;
439
440 #s/te03=s3/td03=s3/; s/te03/td03/g;
441 s/te10=s0/td12=s2/; s/te10/td12/g;
442 #s/te21=s1/td21=s1/; s/te21/td21/g;
443 s/te32=s2/td30=s0/; s/te32/td30/g;
444
445 s/td/te/g;
446
447 s/AES_encrypt/AES_decrypt/g;
448 s/\.Le_/.Ld_/g;
449 s/AES_Te#/AES_Td#/g;
450
451 print;
452
453 exit if (/\.endp\s+AES_decrypt/);
454}
455#endif
456.proc _ia64_AES_decrypt#
457// Input: rk0-rk1
458// te0
459// te3 as AES_KEY->rounds!!!
460// s0-s3
461// maskff,twenty4,sixteen
462// Output: r16,r20,r24,r28 as s0-s3
463// Clobber: r16-r31,rk0-rk1,r32-r43
464.align 32
465_ia64_AES_decrypt:
466 .prologue
467 .altrp b6
468 .body
469{ .mmi; alloc r16=ar.pfs,12,0,0,8
470 LDKEY t0=[rk0],2*KSZ
471 mov pr.rot=1<<16 }
472{ .mmi; LDKEY t1=[rk1],2*KSZ
473 add te1=TE1,te0
474 add te3=-3,te3 };;
475{ .mib; LDKEY t2=[rk0],2*KSZ
476 mov ar.ec=2 }
477{ .mib; LDKEY t3=[rk1],2*KSZ
478 add te2=TE2,te0
479 brp.loop.imp .Ld_top,.Ld_end-16 };;
480
481{ .mmi; xor s0=s0,t0
482 xor s1=s1,t1
483 mov ar.lc=te3 }
484{ .mmi; xor s2=s2,t2
485 xor s3=s3,t3
486 add te3=TE3,te0 };;
487
488.align 32
489.Ld_top:
490{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
491 (p0) and te31=s1,maskff // 0/0:s3&0xff
492 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
493{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
494 (p0) and te32=s2,maskff // 0/1:s0&0xff
495 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
496{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
497 (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24
498 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
499{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
500 (p0) shladd te32=te32,3,te3 // 1/1:te3+s0
501 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
502{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff]
503 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
504 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
505{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0]
506 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
507 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
508{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
509 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
510 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
511{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
512 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
513 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
514{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
515 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
516 (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
517{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
518 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
519 (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16
520{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
521 (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16
522 (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
523{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
524 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
525 (p0) and te33=s3,maskff };; // 5/2:s1&0xff
526{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16]
527 (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16
528 (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
529{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
530 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
531 (p0) and te30=s0,maskff };; // 6/3:s2&0xff
532
533{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16]
534 (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff
535 (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff
536{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
537 (p0) shladd te30=te30,3,te3 // 7/3:te3+s2
538 (p0) xor t0=t0,te31 };; // 7/0:
539{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1]
540 (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16
541 (p0) xor t0=t0,te22 } // 8/0:
542{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2]
543 (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16
544 (p0) xor t1=t1,te32 };; // 8/1:
545{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16]
546 (p0) ld4 te12=[te12] // 9/3:te1[s0>>16]
547 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
548{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
549 (p0) xor t2=t2,te20 // 10[9]/2:
550 (p0) xor t3=t3,te21 };; // 10[9]/3:
551{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done!
552 (p0) xor t1=t1,te01 // 11[10]/1:
553 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
554{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
555 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
556{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done!
557 (p0) xor t2=t2,te33 // 13[11]/2:
558 (p0) xor t3=t3,te30 } // 13[11]/3:
559{ .mmi; (p17) add te0=2048,te0 // 13[11]/
560 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
561{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done!
562 (p17) add te2=2048+128-TE2,te2} // 14[12]/
563{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done!
564 (p17) add te3=2048+192-TE3,te3 // 14[12]/
565 br.ctop.sptk .Ld_top };;
566.Ld_end:
567
568
569{ .mmi; ld8 te10=[te0] // prefetch Td4
570 ld8 te33=[te1] }
571{ .mmi; ld8 te12=[te2]
572 ld8 te30=[te3] }
573
574{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
575 and te31=s1,maskff // 0/0:s3&0xff
576 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
577{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
578 and te32=s2,maskff // 0/1:s0&0xff
579 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
580{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
581 add te31=te31,te0 // 1/0:te0+s0>>24
582 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
583{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
584 add te32=te32,te0 // 1/1:te0+s0
585 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
586{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff]
587 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
588 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
589{ .mmi; ld1 te32=[te32] // 2/1:te0[s0]
590 add te23=te23,te0 // 2/1:te0+s3>>8
591 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
592{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
593 add te20=te20,te0 // 3/2:te0+s0>>8
594 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
595{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
596 add te00=te00,te0 // 3/0:te0+s0>>24
597 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
598{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
599 add te21=te21,te0 // 4/3:te0+s2
600 extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
601{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
602 add te01=te01,te0 // 4/1:te0+s1>>24
603 shr.u te11=s1,sixteen };; // 4/2:s3>>16
604{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
605 add te13=te13,te0 // 5/0:te0+s1>>16
606 extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
607{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
608 add te02=te02,te0 // 5/2:te0+s2>>24
609 and te33=s3,maskff };; // 5/2:s1&0xff
610{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16]
611 add te10=te10,te0 // 6/1:te0+s2>>16
612 extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
613{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
614 add te03=te03,te0 // 6/3:te0+s0>>16
615 and te30=s0,maskff };; // 6/3:s2&0xff
616
617{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16]
618 add te33=te33,te0 // 7/2:te0+s1&0xff
619 dep te31=te22,te31,8,8} // 7/0:
620{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
621 add te30=te30,te0 // 7/3:te0+s2
622 and te11=te11,maskff};; // 7/2:s3>>16&0xff
623{ .mmi; ld1 te33=[te33] // 8/2:te0[s1]
624 add te11=te11,te0 // 8/2:te0+s3>>16
625 dep te32=te23,te32,8,8} // 8/1:
626{ .mmi; ld1 te30=[te30] // 8/3:te0[s2]
627 add te12=te12,te0 // 8/3:te0+s0>>16
628 shl te00=te00,twenty4};; // 8/0:
629{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16]
630 dep te31=te13,te31,16,8 // 9/0:
631 shl te01=te01,twenty4};; // 9/1:
632{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16]
633 dep te33=te20,te33,8,8 // 10/2:
634 shl te02=te02,twenty4};; // 10/2:
635{ .mii; xor t0=t0,te31 // 11/0:
636 dep te30=te21,te30,8,8 // 11/3:
637 shl te10=te10,sixteen};; // 11/1:
638{ .mii; xor r16=t0,te00 // 12/0:done!
639 dep te33=te11,te33,16,8 // 12/2:
640 shl te03=te03,twenty4};; // 12/3:
641{ .mmi; xor t1=t1,te01 // 13/1:
642 xor t2=t2,te02 // 13/2:
643 dep te30=te12,te30,16,8};; // 13/3:
644{ .mmi; xor t1=t1,te32 // 14/1:
645 xor r24=t2,te33 // 14/2:done!
646 xor t3=t3,te30 };; // 14/3:
647{ .mib; xor r20=t1,te10 // 15/1:done!
648 xor r28=t3,te03 // 15/3:done!
649 br.ret.sptk b6 };;
650.endp _ia64_AES_decrypt#
651
652// void AES_decrypt (const void *in,void *out,const AES_KEY *key);
653.global AES_decrypt#
654.proc AES_decrypt#
655.align 32
656AES_decrypt:
657 .prologue
658 .save ar.pfs,pfssave
659{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
660 and out0=3,in0
661 mov r3=ip }
662{ .mmi; ADDP in0=0,in0
663 mov loc0=psr.um
664 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
665
666{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
667 add out8=(AES_Td#-AES_decrypt#),r3 // Te0
668 .save pr,prsave
669 mov prsave=pr }
670{ .mmi; rum 1<<3 // clear um.ac
671 .save ar.lc,lcsave
672 mov lcsave=ar.lc };;
673
674 .body
675#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
676{ .mib; cmp.ne p6,p0=out0,r0
677 add out0=4,in0
678(p6) br.dpnt.many .Ld_i_unaligned };;
679
680{ .mmi; ld4 out1=[in0],8 // s0
681 and out9=3,in1
682 mov twenty4=24 }
683{ .mmi; ld4 out3=[out0],8 // s1
684 ADDP rk0=0,in2
685 mov sixteen=16 };;
686{ .mmi; ld4 out5=[in0] // s2
687 cmp.ne p6,p0=out9,r0
688 mov maskff=0xff }
689{ .mmb; ld4 out7=[out0] // s3
690 ADDP rk1=KSZ,in2
691 br.call.sptk.many b6=_ia64_AES_decrypt };;
692
693{ .mib; ADDP in0=4,in1
694 ADDP in1=0,in1
695(p6) br.spnt .Ld_o_unaligned };;
696
697{ .mii; mov psr.um=loc0
698 mov ar.pfs=pfssave
699 mov ar.lc=lcsave };;
700{ .mmi; st4 [in1]=r16,8 // s0
701 st4 [in0]=r20,8 // s1
702 mov pr=prsave,0x1ffff };;
703{ .mmb; st4 [in1]=r24 // s2
704 st4 [in0]=r28 // s3
705 br.ret.sptk.many b0 };;
706#endif
707
708.align 32
709.Ld_i_unaligned:
710{ .mmi; add out0=1,in0
711 add out2=2,in0
712 add out4=3,in0 };;
713{ .mmi; ld1 r16=[in0],4
714 ld1 r17=[out0],4 }//;;
715{ .mmi; ld1 r18=[out2],4
716 ld1 out1=[out4],4 };; // s0
717{ .mmi; ld1 r20=[in0],4
718 ld1 r21=[out0],4 }//;;
719{ .mmi; ld1 r22=[out2],4
720 ld1 out3=[out4],4 };; // s1
721{ .mmi; ld1 r24=[in0],4
722 ld1 r25=[out0],4 }//;;
723{ .mmi; ld1 r26=[out2],4
724 ld1 out5=[out4],4 };; // s2
725{ .mmi; ld1 r28=[in0]
726 ld1 r29=[out0] }//;;
727{ .mmi; ld1 r30=[out2]
728 ld1 out7=[out4] };; // s3
729
730{ .mii;
731 dep out1=r16,out1,24,8 //;;
732 dep out3=r20,out3,24,8 }//;;
733{ .mii; ADDP rk0=0,in2
734 dep out5=r24,out5,24,8 //;;
735 dep out7=r28,out7,24,8 };;
736{ .mii; ADDP rk1=KSZ,in2
737 dep out1=r17,out1,16,8 //;;
738 dep out3=r21,out3,16,8 }//;;
739{ .mii; mov twenty4=24
740 dep out5=r25,out5,16,8 //;;
741 dep out7=r29,out7,16,8 };;
742{ .mii; mov sixteen=16
743 dep out1=r18,out1,8,8 //;;
744 dep out3=r22,out3,8,8 }//;;
745{ .mii; mov maskff=0xff
746 dep out5=r26,out5,8,8 //;;
747 dep out7=r30,out7,8,8 };;
748
749{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };;
750
751.Ld_o_unaligned:
752{ .mii; ADDP out0=0,in1
753 extr.u r17=r16,8,8 // s0
754 shr.u r19=r16,twenty4 }//;;
755{ .mii; ADDP out1=1,in1
756 extr.u r18=r16,16,8
757 shr.u r23=r20,twenty4 }//;; // s1
758{ .mii; ADDP out2=2,in1
759 extr.u r21=r20,8,8
760 shr.u r22=r20,sixteen }//;;
761{ .mii; ADDP out3=3,in1
762 extr.u r25=r24,8,8 // s2
763 shr.u r27=r24,twenty4 };;
764{ .mii; st1 [out3]=r16,4
765 extr.u r26=r24,16,8
766 shr.u r31=r28,twenty4 }//;; // s3
767{ .mii; st1 [out2]=r17,4
768 extr.u r29=r28,8,8
769 shr.u r30=r28,sixteen }//;;
770
771{ .mmi; st1 [out1]=r18,4
772 st1 [out0]=r19,4 };;
773{ .mmi; st1 [out3]=r20,4
774 st1 [out2]=r21,4 }//;;
775{ .mmi; st1 [out1]=r22,4
776 st1 [out0]=r23,4 };;
777{ .mmi; st1 [out3]=r24,4
778 st1 [out2]=r25,4
779 mov pr=prsave,0x1ffff }//;;
780{ .mmi; st1 [out1]=r26,4
781 st1 [out0]=r27,4
782 mov ar.pfs=pfssave };;
783{ .mmi; st1 [out3]=r28
784 st1 [out2]=r29
785 mov ar.lc=lcsave }//;;
786{ .mmi; st1 [out1]=r30
787 st1 [out0]=r31 }
788{ .mfb; mov psr.um=loc0 // restore user mask
789 br.ret.sptk.many b0 };;
790.endp AES_decrypt#
791
792// leave it in .text segment...
793.align 64
794.global AES_Te#
795.type AES_Te#,@object
796AES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
797 data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
798 data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
799 data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
800 data4 0x60303050,0x60303050, 0x02010103,0x02010103
801 data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
802 data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
803 data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
804 data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
805 data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
806 data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
807 data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
808 data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
809 data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
810 data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
811 data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
812 data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
813 data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
814 data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
815 data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
816 data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
817 data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
818 data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
819 data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f
820 data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
821 data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
822 data4 0x30181828,0x30181828, 0x379696a1,0x379696a1
823 data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
824 data4 0x0e070709,0x0e070709, 0x24121236,0x24121236
825 data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
826 data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
827 data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
828 data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
829 data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
830 data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
831 data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
832 data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
833 data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
834 data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
835 data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
836 data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
837 data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
838 data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
839 data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
840 data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
841 data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
842 data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
843 data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
844 data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
845 data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
846 data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
847 data4 0x66333355,0x66333355, 0x11858594,0x11858594
848 data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
849 data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
850 data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
851 data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
852 data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
853 data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
854 data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
855 data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
856 data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
857 data4 0xafdada75,0xafdada75, 0x42212163,0x42212163
858 data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
859 data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
860 data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
861 data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
862 data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
863 data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739
864 data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
865 data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
866 data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
867 data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395
868 data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198
869 data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
870 data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
871 data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
872 data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
873 data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
874 data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
875 data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
876 data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
877 data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
878 data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a
879 data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
880 data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
881 data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
882 data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4
883 data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
884 data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
885 data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
886 data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
887 data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
888 data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
889 data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
890 data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
891 data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
892 data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
893 data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
894 data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
895 data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
896 data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
897 data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
898 data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
899 data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
900 data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
901 data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
902 data4 0x904848d8,0x904848d8, 0x06030305,0x06030305
903 data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
904 data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
905 data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
906 data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158
907 data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
908 data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
909 data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
910 data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
911 data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
912 data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
913 data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
914 data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
915 data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
916 data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
917 data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
918 data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
919 data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
920 data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0
921 data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
922 data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
923 data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
924// Te4:
925 data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
926 data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
927 data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
928 data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
929 data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
930 data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
931 data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
932 data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
933 data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
934 data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
935 data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
936 data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
937 data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
938 data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
939 data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
940 data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
941 data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
942 data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
943 data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
944 data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
945 data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
946 data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
947 data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
948 data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
949 data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
950 data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
951 data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
952 data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
953 data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
954 data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
955 data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
956 data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
957.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#"
958
959.align 64
960.global AES_Td#
961.type AES_Td#,@object
962AES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
963 data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
964 data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
965 data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
966 data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
967 data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
968 data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
969 data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f
970 data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
971 data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
972 data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
973 data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
974 data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
975 data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
976 data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
977 data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
978 data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
979 data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
980 data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
981 data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
982 data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
983 data4 0x97513360,0x97513360, 0x62537f45,0x62537f45
984 data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
985 data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
986 data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
987 data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
988 data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
989 data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
990 data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
991 data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
992 data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
993 data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c
994 data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
995 data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
996 data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
997 data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
998 data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
999 data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1000 data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1001 data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1002 data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1003 data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1004 data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1005 data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1006 data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1007 data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1008 data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1009 data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1010 data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1011 data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1012 data4 0x09808683,0x09808683, 0x322bed48,0x322bed48
1013 data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1014 data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1015 data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1016 data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1017 data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1018 data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1019 data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1020 data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1021 data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1022 data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1023 data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1024 data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1025 data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1026 data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1027 data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1028 data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1029 data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1030 data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1031 data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1032 data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1033 data4 0x13972240,0x13972240, 0x84c61120,0x84c61120
1034 data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1035 data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1036 data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1037 data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1038 data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1039 data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1040 data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1041 data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1042 data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1043 data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1044 data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1045 data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1046 data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1047 data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1048 data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1049 data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1050 data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1051 data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1052 data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1053 data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1054 data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1055 data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1056 data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1057 data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1058 data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1059 data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1060 data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1061 data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1062 data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1063 data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1064 data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1065 data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1066 data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1067 data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1068 data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1069 data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1070 data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1071 data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1072 data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1073 data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1074 data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1075 data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1076 data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1077 data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1078 data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1079 data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1080 data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1081 data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1082 data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1083 data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1084 data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1085 data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1086 data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1087 data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1088 data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1089 data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1090// Td4:
1091 data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1092 data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1093 data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1094 data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1095 data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1096 data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1097 data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1098 data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1099 data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1100 data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1101 data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1102 data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1103 data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1104 data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1105 data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1106 data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1107 data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1108 data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1109 data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1110 data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1111 data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1112 data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1113 data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1114 data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1115 data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1116 data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1117 data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1118 data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1119 data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1120 data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1121 data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1122 data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1123.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#"
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
new file mode 100755
index 0000000000..44e0bf8cae
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
@@ -0,0 +1,1578 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.2.
10#
11# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
12# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
13# [you'll notice a lot of resemblance], such as compressed S-boxes
14# in little-endian byte order, prefetch of these tables in CBC mode,
15# as well as avoiding L1 cache aliasing between stack frame and key
16# schedule and already mentioned tables, compressed Td4...
17#
18# Performance in number of cycles per processed byte for 128-bit key:
19#
20# ECB CBC encrypt
21# AMD64 13.7 13.0(*)
22# EM64T 20.2 18.6(*)
23#
24# (*) CBC benchmarks are better than ECB thanks to custom ABI used
25# by the private block encryption function.
26
27$verticalspin=1; # unlike 32-bit version $verticalspin performs
28 # ~15% better on both AMD and Intel cores
29$output=shift;
30open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
31
32$code=".text\n";
33
34$s0="%eax";
35$s1="%ebx";
36$s2="%ecx";
37$s3="%edx";
38$acc0="%esi";
39$acc1="%edi";
40$acc2="%ebp";
41$inp="%r8";
42$out="%r9";
43$t0="%r10d";
44$t1="%r11d";
45$t2="%r12d";
46$rnds="%r13d";
47$sbox="%r14";
48$key="%r15";
49
50sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
51sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
52 $r =~ s/%[er]([sd]i)/%\1l/;
53 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
54sub _data_word()
55{ my $i;
56 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
57}
58sub data_word()
59{ my $i;
60 my $last=pop(@_);
61 $code.=".long\t";
62 while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
63 $code.=sprintf"0x%08x\n",$last;
64}
65
66sub data_byte()
67{ my $i;
68 my $last=pop(@_);
69 $code.=".byte\t";
70 while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
71 $code.=sprintf"0x%02x\n",$last&0xff;
72}
73
74sub encvert()
75{ my $t3="%r8d"; # zaps $inp!
76
77$code.=<<___;
78 # favor 3-way issue Opteron pipeline...
79 movzb `&lo("$s0")`,$acc0
80 movzb `&lo("$s1")`,$acc1
81 movzb `&lo("$s2")`,$acc2
82 mov 0($sbox,$acc0,8),$t0
83 mov 0($sbox,$acc1,8),$t1
84 mov 0($sbox,$acc2,8),$t2
85
86 movzb `&hi("$s1")`,$acc0
87 movzb `&hi("$s2")`,$acc1
88 movzb `&lo("$s3")`,$acc2
89 xor 3($sbox,$acc0,8),$t0
90 xor 3($sbox,$acc1,8),$t1
91 mov 0($sbox,$acc2,8),$t3
92
93 movzb `&hi("$s3")`,$acc0
94 shr \$16,$s2
95 movzb `&hi("$s0")`,$acc2
96 xor 3($sbox,$acc0,8),$t2
97 shr \$16,$s3
98 xor 3($sbox,$acc2,8),$t3
99
100 shr \$16,$s1
101 lea 16($key),$key
102 shr \$16,$s0
103
104 movzb `&lo("$s2")`,$acc0
105 movzb `&lo("$s3")`,$acc1
106 movzb `&lo("$s0")`,$acc2
107 xor 2($sbox,$acc0,8),$t0
108 xor 2($sbox,$acc1,8),$t1
109 xor 2($sbox,$acc2,8),$t2
110
111 movzb `&hi("$s3")`,$acc0
112 movzb `&hi("$s0")`,$acc1
113 movzb `&lo("$s1")`,$acc2
114 xor 1($sbox,$acc0,8),$t0
115 xor 1($sbox,$acc1,8),$t1
116 xor 2($sbox,$acc2,8),$t3
117
118 mov 12($key),$s3
119 movzb `&hi("$s1")`,$acc1
120 movzb `&hi("$s2")`,$acc2
121 mov 0($key),$s0
122 xor 1($sbox,$acc1,8),$t2
123 xor 1($sbox,$acc2,8),$t3
124
125 mov 4($key),$s1
126 mov 8($key),$s2
127 xor $t0,$s0
128 xor $t1,$s1
129 xor $t2,$s2
130 xor $t3,$s3
131___
132}
133
134sub enclastvert()
135{ my $t3="%r8d"; # zaps $inp!
136
137$code.=<<___;
138 movzb `&lo("$s0")`,$acc0
139 movzb `&lo("$s1")`,$acc1
140 movzb `&lo("$s2")`,$acc2
141 mov 2($sbox,$acc0,8),$t0
142 mov 2($sbox,$acc1,8),$t1
143 mov 2($sbox,$acc2,8),$t2
144
145 and \$0x000000ff,$t0
146 and \$0x000000ff,$t1
147 and \$0x000000ff,$t2
148
149 movzb `&lo("$s3")`,$acc0
150 movzb `&hi("$s1")`,$acc1
151 movzb `&hi("$s2")`,$acc2
152 mov 2($sbox,$acc0,8),$t3
153 mov 0($sbox,$acc1,8),$acc1 #$t0
154 mov 0($sbox,$acc2,8),$acc2 #$t1
155
156 and \$0x000000ff,$t3
157 and \$0x0000ff00,$acc1
158 and \$0x0000ff00,$acc2
159
160 xor $acc1,$t0
161 xor $acc2,$t1
162 shr \$16,$s2
163
164 movzb `&hi("$s3")`,$acc0
165 movzb `&hi("$s0")`,$acc1
166 shr \$16,$s3
167 mov 0($sbox,$acc0,8),$acc0 #$t2
168 mov 0($sbox,$acc1,8),$acc1 #$t3
169
170 and \$0x0000ff00,$acc0
171 and \$0x0000ff00,$acc1
172 shr \$16,$s1
173 xor $acc0,$t2
174 xor $acc1,$t3
175 shr \$16,$s0
176
177 movzb `&lo("$s2")`,$acc0
178 movzb `&lo("$s3")`,$acc1
179 movzb `&lo("$s0")`,$acc2
180 mov 0($sbox,$acc0,8),$acc0 #$t0
181 mov 0($sbox,$acc1,8),$acc1 #$t1
182 mov 0($sbox,$acc2,8),$acc2 #$t2
183
184 and \$0x00ff0000,$acc0
185 and \$0x00ff0000,$acc1
186 and \$0x00ff0000,$acc2
187
188 xor $acc0,$t0
189 xor $acc1,$t1
190 xor $acc2,$t2
191
192 movzb `&lo("$s1")`,$acc0
193 movzb `&hi("$s3")`,$acc1
194 movzb `&hi("$s0")`,$acc2
195 mov 0($sbox,$acc0,8),$acc0 #$t3
196 mov 2($sbox,$acc1,8),$acc1 #$t0
197 mov 2($sbox,$acc2,8),$acc2 #$t1
198
199 and \$0x00ff0000,$acc0
200 and \$0xff000000,$acc1
201 and \$0xff000000,$acc2
202
203 xor $acc0,$t3
204 xor $acc1,$t0
205 xor $acc2,$t1
206
207 movzb `&hi("$s1")`,$acc0
208 movzb `&hi("$s2")`,$acc1
209 mov 16+12($key),$s3
210 mov 2($sbox,$acc0,8),$acc0 #$t2
211 mov 2($sbox,$acc1,8),$acc1 #$t3
212 mov 16+0($key),$s0
213
214 and \$0xff000000,$acc0
215 and \$0xff000000,$acc1
216
217 xor $acc0,$t2
218 xor $acc1,$t3
219
220 mov 16+4($key),$s1
221 mov 16+8($key),$s2
222 xor $t0,$s0
223 xor $t1,$s1
224 xor $t2,$s2
225 xor $t3,$s3
226___
227}
228
229sub encstep()
230{ my ($i,@s) = @_;
231 my $tmp0=$acc0;
232 my $tmp1=$acc1;
233 my $tmp2=$acc2;
234 my $out=($t0,$t1,$t2,$s[0])[$i];
235
236 if ($i==3) {
237 $tmp0=$s[1];
238 $tmp1=$s[2];
239 $tmp2=$s[3];
240 }
241 $code.=" movzb ".&lo($s[0]).",$out\n";
242 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
243 $code.=" lea 16($key),$key\n" if ($i==0);
244
245 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
246 $code.=" mov 0($sbox,$out,8),$out\n";
247
248 $code.=" shr \$16,$tmp1\n";
249 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
250 $code.=" xor 3($sbox,$tmp0,8),$out\n";
251
252 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
253 $code.=" shr \$24,$tmp2\n";
254 $code.=" xor 4*$i($key),$out\n";
255
256 $code.=" xor 2($sbox,$tmp1,8),$out\n";
257 $code.=" xor 1($sbox,$tmp2,8),$out\n";
258
259 $code.=" mov $t0,$s[1]\n" if ($i==3);
260 $code.=" mov $t1,$s[2]\n" if ($i==3);
261 $code.=" mov $t2,$s[3]\n" if ($i==3);
262 $code.="\n";
263}
264
265sub enclast()
266{ my ($i,@s)=@_;
267 my $tmp0=$acc0;
268 my $tmp1=$acc1;
269 my $tmp2=$acc2;
270 my $out=($t0,$t1,$t2,$s[0])[$i];
271
272 if ($i==3) {
273 $tmp0=$s[1];
274 $tmp1=$s[2];
275 $tmp2=$s[3];
276 }
277 $code.=" movzb ".&lo($s[0]).",$out\n";
278 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
279
280 $code.=" mov 2($sbox,$out,8),$out\n";
281 $code.=" shr \$16,$tmp1\n";
282 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
283
284 $code.=" and \$0x000000ff,$out\n";
285 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
286 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
287 $code.=" shr \$24,$tmp2\n";
288
289 $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
290 $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
291 $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
292
293 $code.=" and \$0x0000ff00,$tmp0\n";
294 $code.=" and \$0x00ff0000,$tmp1\n";
295 $code.=" and \$0xff000000,$tmp2\n";
296
297 $code.=" xor $tmp0,$out\n";
298 $code.=" mov $t0,$s[1]\n" if ($i==3);
299 $code.=" xor $tmp1,$out\n";
300 $code.=" mov $t1,$s[2]\n" if ($i==3);
301 $code.=" xor $tmp2,$out\n";
302 $code.=" mov $t2,$s[3]\n" if ($i==3);
303 $code.="\n";
304}
305
306$code.=<<___;
307.type _x86_64_AES_encrypt,\@abi-omnipotent
308.align 16
309_x86_64_AES_encrypt:
310 xor 0($key),$s0 # xor with key
311 xor 4($key),$s1
312 xor 8($key),$s2
313 xor 12($key),$s3
314
315 mov 240($key),$rnds # load key->rounds
316 sub \$1,$rnds
317 jmp .Lenc_loop
318.align 16
319.Lenc_loop:
320___
321 if ($verticalspin) { &encvert(); }
322 else { &encstep(0,$s0,$s1,$s2,$s3);
323 &encstep(1,$s1,$s2,$s3,$s0);
324 &encstep(2,$s2,$s3,$s0,$s1);
325 &encstep(3,$s3,$s0,$s1,$s2);
326 }
327$code.=<<___;
328 sub \$1,$rnds
329 jnz .Lenc_loop
330___
331 if ($verticalspin) { &enclastvert(); }
332 else { &enclast(0,$s0,$s1,$s2,$s3);
333 &enclast(1,$s1,$s2,$s3,$s0);
334 &enclast(2,$s2,$s3,$s0,$s1);
335 &enclast(3,$s3,$s0,$s1,$s2);
336 $code.=<<___;
337 xor 16+0($key),$s0 # xor with key
338 xor 16+4($key),$s1
339 xor 16+8($key),$s2
340 xor 16+12($key),$s3
341___
342 }
343$code.=<<___;
344 .byte 0xf3,0xc3 # rep ret
345.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
346___
347
348# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
349$code.=<<___;
350.globl AES_encrypt
351.type AES_encrypt,\@function,3
352.align 16
353AES_encrypt:
354 push %rbx
355 push %rbp
356 push %r12
357 push %r13
358 push %r14
359 push %r15
360
361 mov %rdx,$key
362 mov %rdi,$inp
363 mov %rsi,$out
364
365 .picmeup $sbox
366 lea AES_Te-.($sbox),$sbox
367
368 mov 0($inp),$s0
369 mov 4($inp),$s1
370 mov 8($inp),$s2
371 mov 12($inp),$s3
372
373 call _x86_64_AES_encrypt
374
375 mov $s0,0($out)
376 mov $s1,4($out)
377 mov $s2,8($out)
378 mov $s3,12($out)
379
380 pop %r15
381 pop %r14
382 pop %r13
383 pop %r12
384 pop %rbp
385 pop %rbx
386 ret
387.size AES_encrypt,.-AES_encrypt
388___
389
390#------------------------------------------------------------------#
391
392sub decvert()
393{ my $t3="%r8d"; # zaps $inp!
394
395$code.=<<___;
396 # favor 3-way issue Opteron pipeline...
397 movzb `&lo("$s0")`,$acc0
398 movzb `&lo("$s1")`,$acc1
399 movzb `&lo("$s2")`,$acc2
400 mov 0($sbox,$acc0,8),$t0
401 mov 0($sbox,$acc1,8),$t1
402 mov 0($sbox,$acc2,8),$t2
403
404 movzb `&hi("$s3")`,$acc0
405 movzb `&hi("$s0")`,$acc1
406 movzb `&lo("$s3")`,$acc2
407 xor 3($sbox,$acc0,8),$t0
408 xor 3($sbox,$acc1,8),$t1
409 mov 0($sbox,$acc2,8),$t3
410
411 movzb `&hi("$s1")`,$acc0
412 shr \$16,$s0
413 movzb `&hi("$s2")`,$acc2
414 xor 3($sbox,$acc0,8),$t2
415 shr \$16,$s3
416 xor 3($sbox,$acc2,8),$t3
417
418 shr \$16,$s1
419 lea 16($key),$key
420 shr \$16,$s2
421
422 movzb `&lo("$s2")`,$acc0
423 movzb `&lo("$s3")`,$acc1
424 movzb `&lo("$s0")`,$acc2
425 xor 2($sbox,$acc0,8),$t0
426 xor 2($sbox,$acc1,8),$t1
427 xor 2($sbox,$acc2,8),$t2
428
429 movzb `&hi("$s1")`,$acc0
430 movzb `&hi("$s2")`,$acc1
431 movzb `&lo("$s1")`,$acc2
432 xor 1($sbox,$acc0,8),$t0
433 xor 1($sbox,$acc1,8),$t1
434 xor 2($sbox,$acc2,8),$t3
435
436 movzb `&hi("$s3")`,$acc0
437 mov 12($key),$s3
438 movzb `&hi("$s0")`,$acc2
439 xor 1($sbox,$acc0,8),$t2
440 mov 0($key),$s0
441 xor 1($sbox,$acc2,8),$t3
442
443 xor $t0,$s0
444 mov 4($key),$s1
445 mov 8($key),$s2
446 xor $t2,$s2
447 xor $t1,$s1
448 xor $t3,$s3
449___
450}
451
452sub declastvert()
453{ my $t3="%r8d"; # zaps $inp!
454
455$code.=<<___;
456 movzb `&lo("$s0")`,$acc0
457 movzb `&lo("$s1")`,$acc1
458 movzb `&lo("$s2")`,$acc2
459 movzb 2048($sbox,$acc0,1),$t0
460 movzb 2048($sbox,$acc1,1),$t1
461 movzb 2048($sbox,$acc2,1),$t2
462
463 movzb `&lo("$s3")`,$acc0
464 movzb `&hi("$s3")`,$acc1
465 movzb `&hi("$s0")`,$acc2
466 movzb 2048($sbox,$acc0,1),$t3
467 movzb 2048($sbox,$acc1,1),$acc1 #$t0
468 movzb 2048($sbox,$acc2,1),$acc2 #$t1
469
470 shl \$8,$acc1
471 shl \$8,$acc2
472
473 xor $acc1,$t0
474 xor $acc2,$t1
475 shr \$16,$s3
476
477 movzb `&hi("$s1")`,$acc0
478 movzb `&hi("$s2")`,$acc1
479 shr \$16,$s0
480 movzb 2048($sbox,$acc0,1),$acc0 #$t2
481 movzb 2048($sbox,$acc1,1),$acc1 #$t3
482
483 shl \$8,$acc0
484 shl \$8,$acc1
485 shr \$16,$s1
486 xor $acc0,$t2
487 xor $acc1,$t3
488 shr \$16,$s2
489
490 movzb `&lo("$s2")`,$acc0
491 movzb `&lo("$s3")`,$acc1
492 movzb `&lo("$s0")`,$acc2
493 movzb 2048($sbox,$acc0,1),$acc0 #$t0
494 movzb 2048($sbox,$acc1,1),$acc1 #$t1
495 movzb 2048($sbox,$acc2,1),$acc2 #$t2
496
497 shl \$16,$acc0
498 shl \$16,$acc1
499 shl \$16,$acc2
500
501 xor $acc0,$t0
502 xor $acc1,$t1
503 xor $acc2,$t2
504
505 movzb `&lo("$s1")`,$acc0
506 movzb `&hi("$s1")`,$acc1
507 movzb `&hi("$s2")`,$acc2
508 movzb 2048($sbox,$acc0,1),$acc0 #$t3
509 movzb 2048($sbox,$acc1,1),$acc1 #$t0
510 movzb 2048($sbox,$acc2,1),$acc2 #$t1
511
512 shl \$16,$acc0
513 shl \$24,$acc1
514 shl \$24,$acc2
515
516 xor $acc0,$t3
517 xor $acc1,$t0
518 xor $acc2,$t1
519
520 movzb `&hi("$s3")`,$acc0
521 movzb `&hi("$s0")`,$acc1
522 mov 16+12($key),$s3
523 movzb 2048($sbox,$acc0,1),$acc0 #$t2
524 movzb 2048($sbox,$acc1,1),$acc1 #$t3
525 mov 16+0($key),$s0
526
527 shl \$24,$acc0
528 shl \$24,$acc1
529
530 xor $acc0,$t2
531 xor $acc1,$t3
532
533 mov 16+4($key),$s1
534 mov 16+8($key),$s2
535 xor $t0,$s0
536 xor $t1,$s1
537 xor $t2,$s2
538 xor $t3,$s3
539___
540}
541
542sub decstep()
543{ my ($i,@s) = @_;
544 my $tmp0=$acc0;
545 my $tmp1=$acc1;
546 my $tmp2=$acc2;
547 my $out=($t0,$t1,$t2,$s[0])[$i];
548
549 $code.=" mov $s[0],$out\n" if ($i!=3);
550 $tmp1=$s[2] if ($i==3);
551 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
552 $code.=" and \$0xFF,$out\n";
553
554 $code.=" mov 0($sbox,$out,8),$out\n";
555 $code.=" shr \$16,$tmp1\n";
556 $tmp2=$s[3] if ($i==3);
557 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
558
559 $tmp0=$s[1] if ($i==3);
560 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
561 $code.=" and \$0xFF,$tmp1\n";
562 $code.=" shr \$24,$tmp2\n";
563
564 $code.=" xor 3($sbox,$tmp0,8),$out\n";
565 $code.=" xor 2($sbox,$tmp1,8),$out\n";
566 $code.=" xor 1($sbox,$tmp2,8),$out\n";
567
568 $code.=" mov $t2,$s[1]\n" if ($i==3);
569 $code.=" mov $t1,$s[2]\n" if ($i==3);
570 $code.=" mov $t0,$s[3]\n" if ($i==3);
571 $code.="\n";
572}
573
574sub declast()
575{ my ($i,@s)=@_;
576 my $tmp0=$acc0;
577 my $tmp1=$acc1;
578 my $tmp2=$acc2;
579 my $out=($t0,$t1,$t2,$s[0])[$i];
580
581 $code.=" mov $s[0],$out\n" if ($i!=3);
582 $tmp1=$s[2] if ($i==3);
583 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
584 $code.=" and \$0xFF,$out\n";
585
586 $code.=" movzb 2048($sbox,$out,1),$out\n";
587 $code.=" shr \$16,$tmp1\n";
588 $tmp2=$s[3] if ($i==3);
589 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
590
591 $tmp0=$s[1] if ($i==3);
592 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
593 $code.=" and \$0xFF,$tmp1\n";
594 $code.=" shr \$24,$tmp2\n";
595
596 $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
597 $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
598 $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
599
600 $code.=" shl \$8,$tmp0\n";
601 $code.=" shl \$16,$tmp1\n";
602 $code.=" shl \$24,$tmp2\n";
603
604 $code.=" xor $tmp0,$out\n";
605 $code.=" mov $t2,$s[1]\n" if ($i==3);
606 $code.=" xor $tmp1,$out\n";
607 $code.=" mov $t1,$s[2]\n" if ($i==3);
608 $code.=" xor $tmp2,$out\n";
609 $code.=" mov $t0,$s[3]\n" if ($i==3);
610 $code.="\n";
611}
612
613$code.=<<___;
614.type _x86_64_AES_decrypt,\@abi-omnipotent
615.align 16
616_x86_64_AES_decrypt:
617 xor 0($key),$s0 # xor with key
618 xor 4($key),$s1
619 xor 8($key),$s2
620 xor 12($key),$s3
621
622 mov 240($key),$rnds # load key->rounds
623 sub \$1,$rnds
624 jmp .Ldec_loop
625.align 16
626.Ldec_loop:
627___
628 if ($verticalspin) { &decvert(); }
629 else { &decstep(0,$s0,$s3,$s2,$s1);
630 &decstep(1,$s1,$s0,$s3,$s2);
631 &decstep(2,$s2,$s1,$s0,$s3);
632 &decstep(3,$s3,$s2,$s1,$s0);
633 $code.=<<___;
634 lea 16($key),$key
635 xor 0($key),$s0 # xor with key
636 xor 4($key),$s1
637 xor 8($key),$s2
638 xor 12($key),$s3
639___
640 }
641$code.=<<___;
642 sub \$1,$rnds
643 jnz .Ldec_loop
644___
645 if ($verticalspin) { &declastvert(); }
646 else { &declast(0,$s0,$s3,$s2,$s1);
647 &declast(1,$s1,$s0,$s3,$s2);
648 &declast(2,$s2,$s1,$s0,$s3);
649 &declast(3,$s3,$s2,$s1,$s0);
650 $code.=<<___;
651 xor 16+0($key),$s0 # xor with key
652 xor 16+4($key),$s1
653 xor 16+8($key),$s2
654 xor 16+12($key),$s3
655___
656 }
657$code.=<<___;
658 .byte 0xf3,0xc3 # rep ret
659.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
660___
661
662# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
663$code.=<<___;
664.globl AES_decrypt
665.type AES_decrypt,\@function,3
666.align 16
667AES_decrypt:
668 push %rbx
669 push %rbp
670 push %r12
671 push %r13
672 push %r14
673 push %r15
674
675 mov %rdx,$key
676 mov %rdi,$inp
677 mov %rsi,$out
678
679 .picmeup $sbox
680 lea AES_Td-.($sbox),$sbox
681
682 # prefetch Td4
683 lea 2048+128($sbox),$sbox;
684 mov 0-128($sbox),$s0
685 mov 32-128($sbox),$s1
686 mov 64-128($sbox),$s2
687 mov 96-128($sbox),$s3
688 mov 128-128($sbox),$s0
689 mov 160-128($sbox),$s1
690 mov 192-128($sbox),$s2
691 mov 224-128($sbox),$s3
692 lea -2048-128($sbox),$sbox;
693
694 mov 0($inp),$s0
695 mov 4($inp),$s1
696 mov 8($inp),$s2
697 mov 12($inp),$s3
698
699 call _x86_64_AES_decrypt
700
701 mov $s0,0($out)
702 mov $s1,4($out)
703 mov $s2,8($out)
704 mov $s3,12($out)
705
706 pop %r15
707 pop %r14
708 pop %r13
709 pop %r12
710 pop %rbp
711 pop %rbx
712 ret
713.size AES_decrypt,.-AES_decrypt
714___
715#------------------------------------------------------------------#
716
717sub enckey()
718{
719$code.=<<___;
720 movz %dl,%esi # rk[i]>>0
721 mov 2(%rbp,%rsi,8),%ebx
722 movz %dh,%esi # rk[i]>>8
723 and \$0xFF000000,%ebx
724 xor %ebx,%eax
725
726 mov 2(%rbp,%rsi,8),%ebx
727 shr \$16,%edx
728 and \$0x000000FF,%ebx
729 movz %dl,%esi # rk[i]>>16
730 xor %ebx,%eax
731
732 mov 0(%rbp,%rsi,8),%ebx
733 movz %dh,%esi # rk[i]>>24
734 and \$0x0000FF00,%ebx
735 xor %ebx,%eax
736
737 mov 0(%rbp,%rsi,8),%ebx
738 and \$0x00FF0000,%ebx
739 xor %ebx,%eax
740
741 xor 2048(%rbp,%rcx,4),%eax # rcon
742___
743}
744
745# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
746# AES_KEY *key)
747$code.=<<___;
748.globl AES_set_encrypt_key
749.type AES_set_encrypt_key,\@function,3
750.align 16
751AES_set_encrypt_key:
752 push %rbx
753 push %rbp
754
755 mov %esi,%ecx # %ecx=bits
756 mov %rdi,%rsi # %rsi=userKey
757 mov %rdx,%rdi # %rdi=key
758
759 test \$-1,%rsi
760 jz .Lbadpointer
761 test \$-1,%rdi
762 jz .Lbadpointer
763
764 .picmeup %rbp
765 lea AES_Te-.(%rbp),%rbp
766
767 cmp \$128,%ecx
768 je .L10rounds
769 cmp \$192,%ecx
770 je .L12rounds
771 cmp \$256,%ecx
772 je .L14rounds
773 mov \$-2,%rax # invalid number of bits
774 jmp .Lexit
775
776.L10rounds:
777 mov 0(%rsi),%eax # copy first 4 dwords
778 mov 4(%rsi),%ebx
779 mov 8(%rsi),%ecx
780 mov 12(%rsi),%edx
781 mov %eax,0(%rdi)
782 mov %ebx,4(%rdi)
783 mov %ecx,8(%rdi)
784 mov %edx,12(%rdi)
785
786 xor %ecx,%ecx
787 jmp .L10shortcut
788.align 4
789.L10loop:
790 mov 0(%rdi),%eax # rk[0]
791 mov 12(%rdi),%edx # rk[3]
792.L10shortcut:
793___
794 &enckey ();
795$code.=<<___;
796 mov %eax,16(%rdi) # rk[4]
797 xor 4(%rdi),%eax
798 mov %eax,20(%rdi) # rk[5]
799 xor 8(%rdi),%eax
800 mov %eax,24(%rdi) # rk[6]
801 xor 12(%rdi),%eax
802 mov %eax,28(%rdi) # rk[7]
803 add \$1,%ecx
804 lea 16(%rdi),%rdi
805 cmp \$10,%ecx
806 jl .L10loop
807
808 movl \$10,80(%rdi) # setup number of rounds
809 xor %rax,%rax
810 jmp .Lexit
811
812.L12rounds:
813 mov 0(%rsi),%eax # copy first 6 dwords
814 mov 4(%rsi),%ebx
815 mov 8(%rsi),%ecx
816 mov 12(%rsi),%edx
817 mov %eax,0(%rdi)
818 mov %ebx,4(%rdi)
819 mov %ecx,8(%rdi)
820 mov %edx,12(%rdi)
821 mov 16(%rsi),%ecx
822 mov 20(%rsi),%edx
823 mov %ecx,16(%rdi)
824 mov %edx,20(%rdi)
825
826 xor %ecx,%ecx
827 jmp .L12shortcut
828.align 4
829.L12loop:
830 mov 0(%rdi),%eax # rk[0]
831 mov 20(%rdi),%edx # rk[5]
832.L12shortcut:
833___
834 &enckey ();
835$code.=<<___;
836 mov %eax,24(%rdi) # rk[6]
837 xor 4(%rdi),%eax
838 mov %eax,28(%rdi) # rk[7]
839 xor 8(%rdi),%eax
840 mov %eax,32(%rdi) # rk[8]
841 xor 12(%rdi),%eax
842 mov %eax,36(%rdi) # rk[9]
843
844 cmp \$7,%ecx
845 je .L12break
846 add \$1,%ecx
847
848 xor 16(%rdi),%eax
849 mov %eax,40(%rdi) # rk[10]
850 xor 20(%rdi),%eax
851 mov %eax,44(%rdi) # rk[11]
852
853 lea 24(%rdi),%rdi
854 jmp .L12loop
855.L12break:
856 movl \$12,72(%rdi) # setup number of rounds
857 xor %rax,%rax
858 jmp .Lexit
859
860.L14rounds:
861 mov 0(%rsi),%eax # copy first 8 dwords
862 mov 4(%rsi),%ebx
863 mov 8(%rsi),%ecx
864 mov 12(%rsi),%edx
865 mov %eax,0(%rdi)
866 mov %ebx,4(%rdi)
867 mov %ecx,8(%rdi)
868 mov %edx,12(%rdi)
869 mov 16(%rsi),%eax
870 mov 20(%rsi),%ebx
871 mov 24(%rsi),%ecx
872 mov 28(%rsi),%edx
873 mov %eax,16(%rdi)
874 mov %ebx,20(%rdi)
875 mov %ecx,24(%rdi)
876 mov %edx,28(%rdi)
877
878 xor %ecx,%ecx
879 jmp .L14shortcut
880.align 4
881.L14loop:
882 mov 28(%rdi),%edx # rk[4]
883.L14shortcut:
884 mov 0(%rdi),%eax # rk[0]
885___
886 &enckey ();
887$code.=<<___;
888 mov %eax,32(%rdi) # rk[8]
889 xor 4(%rdi),%eax
890 mov %eax,36(%rdi) # rk[9]
891 xor 8(%rdi),%eax
892 mov %eax,40(%rdi) # rk[10]
893 xor 12(%rdi),%eax
894 mov %eax,44(%rdi) # rk[11]
895
896 cmp \$6,%ecx
897 je .L14break
898 add \$1,%ecx
899
900 mov %eax,%edx
901 mov 16(%rdi),%eax # rk[4]
902 movz %dl,%esi # rk[11]>>0
903 mov 2(%rbp,%rsi,8),%ebx
904 movz %dh,%esi # rk[11]>>8
905 and \$0x000000FF,%ebx
906 xor %ebx,%eax
907
908 mov 0(%rbp,%rsi,8),%ebx
909 shr \$16,%edx
910 and \$0x0000FF00,%ebx
911 movz %dl,%esi # rk[11]>>16
912 xor %ebx,%eax
913
914 mov 0(%rbp,%rsi,8),%ebx
915 movz %dh,%esi # rk[11]>>24
916 and \$0x00FF0000,%ebx
917 xor %ebx,%eax
918
919 mov 2(%rbp,%rsi,8),%ebx
920 and \$0xFF000000,%ebx
921 xor %ebx,%eax
922
923 mov %eax,48(%rdi) # rk[12]
924 xor 20(%rdi),%eax
925 mov %eax,52(%rdi) # rk[13]
926 xor 24(%rdi),%eax
927 mov %eax,56(%rdi) # rk[14]
928 xor 28(%rdi),%eax
929 mov %eax,60(%rdi) # rk[15]
930
931 lea 32(%rdi),%rdi
932 jmp .L14loop
933.L14break:
934 movl \$14,48(%rdi) # setup number of rounds
935 xor %rax,%rax
936 jmp .Lexit
937
938.Lbadpointer:
939 mov \$-1,%rax
940.Lexit:
941 pop %rbp
942 pop %rbx
943 ret
944.size AES_set_encrypt_key,.-AES_set_encrypt_key
945___
946
947sub deckey()
948{ my ($i,$ptr,$te,$td) = @_;
949$code.=<<___;
950 mov $i($ptr),%eax
951 mov %eax,%edx
952 movz %ah,%ebx
953 shr \$16,%edx
954 and \$0xFF,%eax
955 movzb 2($te,%rax,8),%rax
956 movzb 2($te,%rbx,8),%rbx
957 mov 0($td,%rax,8),%eax
958 xor 3($td,%rbx,8),%eax
959 movzb %dh,%ebx
960 and \$0xFF,%edx
961 movzb 2($te,%rdx,8),%rdx
962 movzb 2($te,%rbx,8),%rbx
963 xor 2($td,%rdx,8),%eax
964 xor 1($td,%rbx,8),%eax
965 mov %eax,$i($ptr)
966___
967}
968
969# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
970# AES_KEY *key)
971$code.=<<___;
972.globl AES_set_decrypt_key
973.type AES_set_decrypt_key,\@function,3
974.align 16
975AES_set_decrypt_key:
976 push %rdx
977 call AES_set_encrypt_key
978 cmp \$0,%eax
979 je .Lproceed
980 lea 24(%rsp),%rsp
981 ret
982.Lproceed:
983 mov (%rsp),%r8 # restore key schedule
984 mov %rbx,(%rsp)
985
986 mov 240(%r8),%ecx # pull number of rounds
987 xor %rdi,%rdi
988 lea (%rdi,%rcx,4),%rcx
989 mov %r8,%rsi
990 lea (%r8,%rcx,4),%rdi # pointer to last chunk
991.align 4
992.Linvert:
993 mov 0(%rsi),%rax
994 mov 8(%rsi),%rbx
995 mov 0(%rdi),%rcx
996 mov 8(%rdi),%rdx
997 mov %rax,0(%rdi)
998 mov %rbx,8(%rdi)
999 mov %rcx,0(%rsi)
1000 mov %rdx,8(%rsi)
1001 lea 16(%rsi),%rsi
1002 lea -16(%rdi),%rdi
1003 cmp %rsi,%rdi
1004 jne .Linvert
1005
1006 .picmeup %r9
1007 lea AES_Td-.(%r9),%rdi
1008 lea AES_Te-AES_Td(%rdi),%r9
1009
1010 mov %r8,%rsi
1011 mov 240(%r8),%ecx # pull number of rounds
1012 sub \$1,%ecx
1013.align 4
1014.Lpermute:
1015 lea 16(%rsi),%rsi
1016___
1017 &deckey (0,"%rsi","%r9","%rdi");
1018 &deckey (4,"%rsi","%r9","%rdi");
1019 &deckey (8,"%rsi","%r9","%rdi");
1020 &deckey (12,"%rsi","%r9","%rdi");
1021$code.=<<___;
1022 sub \$1,%ecx
1023 jnz .Lpermute
1024
1025 xor %rax,%rax
1026 pop %rbx
1027 ret
1028.size AES_set_decrypt_key,.-AES_set_decrypt_key
1029___
1030
1031# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
1032# size_t length, const AES_KEY *key,
1033# unsigned char *ivp,const int enc);
1034{
1035# stack frame layout
1036# -8(%rsp) return address
1037my $_rsp="0(%rsp)"; # saved %rsp
1038my $_len="8(%rsp)"; # copy of 3rd parameter, length
1039my $_key="16(%rsp)"; # copy of 4th parameter, key
1040my $_ivp="24(%rsp)"; # copy of 5th parameter, ivp
1041my $keyp="32(%rsp)"; # one to pass as $key
1042my $ivec="40(%rsp)"; # ivec[16]
1043my $aes_key="56(%rsp)"; # copy of aes_key
1044my $mark="56+240(%rsp)"; # copy of aes_key->rounds
1045
1046$code.=<<___;
1047.globl AES_cbc_encrypt
1048.type AES_cbc_encrypt,\@function,6
1049.align 16
1050AES_cbc_encrypt:
1051 cmp \$0,%rdx # check length
1052 je .Lcbc_just_ret
1053 push %rbx
1054 push %rbp
1055 push %r12
1056 push %r13
1057 push %r14
1058 push %r15
1059 pushfq
1060 cld
1061 mov %r9d,%r9d # clear upper half of enc
1062
1063 .picmeup $sbox
1064.Lcbc_pic_point:
1065
1066 cmp \$0,%r9
1067 je .LDECRYPT
1068
1069 lea AES_Te-.Lcbc_pic_point($sbox),$sbox
1070
1071 # allocate aligned stack frame...
1072 lea -64-248(%rsp),$key
1073 and \$-64,$key
1074
1075 # ... and make it doesn't alias with AES_Te modulo 4096
1076 mov $sbox,%r10
1077 lea 2048($sbox),%r11
1078 mov $key,%r12
1079 and \$0xFFF,%r10 # s = $sbox&0xfff
1080 and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
1081 and \$0xFFF,%r12 # p = %rsp&0xfff
1082
1083 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1084 jb .Lcbc_te_break_out
1085 sub %r11,%r12
1086 sub %r12,$key
1087 jmp .Lcbc_te_ok
1088.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
1089 sub %r10,%r12
1090 and \$0xFFF,%r12
1091 add \$320,%r12
1092 sub %r12,$key
1093.align 4
1094.Lcbc_te_ok:
1095
1096 xchg %rsp,$key
1097 add \$8,%rsp # reserve for return address!
1098 mov $key,$_rsp # save %rsp
1099 mov %rdx,$_len # save copy of len
1100 mov %rcx,$_key # save copy of key
1101 mov %r8,$_ivp # save copy of ivp
1102 movl \$0,$mark # copy of aes_key->rounds = 0;
1103 mov %r8,%rbp # rearrange input arguments
1104 mov %rsi,$out
1105 mov %rdi,$inp
1106 mov %rcx,$key
1107
1108 # do we copy key schedule to stack?
1109 mov $key,%r10
1110 sub $sbox,%r10
1111 and \$0xfff,%r10
1112 cmp \$2048,%r10
1113 jb .Lcbc_do_ecopy
1114 cmp \$4096-248,%r10
1115 jb .Lcbc_skip_ecopy
1116.align 4
1117.Lcbc_do_ecopy:
1118 mov $key,%rsi
1119 lea $aes_key,%rdi
1120 lea $aes_key,$key
1121 mov \$240/8,%ecx
1122 .long 0x90A548F3 # rep movsq
1123 mov (%rsi),%eax # copy aes_key->rounds
1124 mov %eax,(%rdi)
1125.Lcbc_skip_ecopy:
1126 mov $key,$keyp # save key pointer
1127
1128 mov \$16,%ecx
1129.align 4
1130.Lcbc_prefetch_te:
1131 mov 0($sbox),%r10
1132 mov 32($sbox),%r11
1133 mov 64($sbox),%r12
1134 mov 96($sbox),%r13
1135 lea 128($sbox),$sbox
1136 sub \$1,%ecx
1137 jnz .Lcbc_prefetch_te
1138 sub \$2048,$sbox
1139
1140 test \$-16,%rdx # check upon length
1141 mov %rdx,%r10
1142 mov 0(%rbp),$s0 # load iv
1143 mov 4(%rbp),$s1
1144 mov 8(%rbp),$s2
1145 mov 12(%rbp),$s3
1146 jz .Lcbc_enc_tail # short input...
1147
1148.align 4
1149.Lcbc_enc_loop:
1150 xor 0($inp),$s0
1151 xor 4($inp),$s1
1152 xor 8($inp),$s2
1153 xor 12($inp),$s3
1154 mov $inp,$ivec # if ($verticalspin) save inp
1155
1156 mov $keyp,$key # restore key
1157 call _x86_64_AES_encrypt
1158
1159 mov $ivec,$inp # if ($verticalspin) restore inp
1160 mov $s0,0($out)
1161 mov $s1,4($out)
1162 mov $s2,8($out)
1163 mov $s3,12($out)
1164
1165 mov $_len,%r10
1166 lea 16($inp),$inp
1167 lea 16($out),$out
1168 sub \$16,%r10
1169 test \$-16,%r10
1170 mov %r10,$_len
1171 jnz .Lcbc_enc_loop
1172 test \$15,%r10
1173 jnz .Lcbc_enc_tail
1174 mov $_ivp,%rbp # restore ivp
1175 mov $s0,0(%rbp) # save ivec
1176 mov $s1,4(%rbp)
1177 mov $s2,8(%rbp)
1178 mov $s3,12(%rbp)
1179
1180.align 4
1181.Lcbc_cleanup:
1182 cmpl \$0,$mark # was the key schedule copied?
1183 lea $aes_key,%rdi
1184 mov $_rsp,%rsp
1185 je .Lcbc_exit
1186 mov \$240/8,%ecx
1187 xor %rax,%rax
1188 .long 0x90AB48F3 # rep stosq
1189.Lcbc_exit:
1190 popfq
1191 pop %r15
1192 pop %r14
1193 pop %r13
1194 pop %r12
1195 pop %rbp
1196 pop %rbx
1197.Lcbc_just_ret:
1198 ret
1199.align 4
1200.Lcbc_enc_tail:
1201 cmp $inp,$out
1202 je .Lcbc_enc_in_place
1203 mov %r10,%rcx
1204 mov $inp,%rsi
1205 mov $out,%rdi
1206 .long 0xF689A4F3 # rep movsb
1207.Lcbc_enc_in_place:
1208 mov \$16,%rcx # zero tail
1209 sub %r10,%rcx
1210 xor %rax,%rax
1211 .long 0xF689AAF3 # rep stosb
1212 mov $out,$inp # this is not a mistake!
1213 movq \$16,$_len # len=16
1214 jmp .Lcbc_enc_loop # one more spin...
1215#----------------------------- DECRYPT -----------------------------#
1216.align 16
1217.LDECRYPT:
1218 lea AES_Td-.Lcbc_pic_point($sbox),$sbox
1219
1220 # allocate aligned stack frame...
1221 lea -64-248(%rsp),$key
1222 and \$-64,$key
1223
1224 # ... and make it doesn't alias with AES_Td modulo 4096
1225 mov $sbox,%r10
1226 lea 2304($sbox),%r11
1227 mov $key,%r12
1228 and \$0xFFF,%r10 # s = $sbox&0xfff
1229 and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff
1230 and \$0xFFF,%r12 # p = %rsp&0xfff
1231
1232 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1233 jb .Lcbc_td_break_out
1234 sub %r11,%r12
1235 sub %r12,$key
1236 jmp .Lcbc_td_ok
1237.Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz
1238 sub %r10,%r12
1239 and \$0xFFF,%r12
1240 add \$320,%r12
1241 sub %r12,$key
1242.align 4
1243.Lcbc_td_ok:
1244
1245 xchg %rsp,$key
1246 add \$8,%rsp # reserve for return address!
1247 mov $key,$_rsp # save %rsp
1248 mov %rdx,$_len # save copy of len
1249 mov %rcx,$_key # save copy of key
1250 mov %r8,$_ivp # save copy of ivp
1251 movl \$0,$mark # copy of aes_key->rounds = 0;
1252 mov %r8,%rbp # rearrange input arguments
1253 mov %rsi,$out
1254 mov %rdi,$inp
1255 mov %rcx,$key
1256
1257 # do we copy key schedule to stack?
1258 mov $key,%r10
1259 sub $sbox,%r10
1260 and \$0xfff,%r10
1261 cmp \$2304,%r10
1262 jb .Lcbc_do_dcopy
1263 cmp \$4096-248,%r10
1264 jb .Lcbc_skip_dcopy
1265.align 4
1266.Lcbc_do_dcopy:
1267 mov $key,%rsi
1268 lea $aes_key,%rdi
1269 lea $aes_key,$key
1270 mov \$240/8,%ecx
1271 .long 0x90A548F3 # rep movsq
1272 mov (%rsi),%eax # copy aes_key->rounds
1273 mov %eax,(%rdi)
1274.Lcbc_skip_dcopy:
1275 mov $key,$keyp # save key pointer
1276
1277 mov \$18,%ecx
1278.align 4
1279.Lcbc_prefetch_td:
1280 mov 0($sbox),%r10
1281 mov 32($sbox),%r11
1282 mov 64($sbox),%r12
1283 mov 96($sbox),%r13
1284 lea 128($sbox),$sbox
1285 sub \$1,%ecx
1286 jnz .Lcbc_prefetch_td
1287 sub \$2304,$sbox
1288
1289 cmp $inp,$out
1290 je .Lcbc_dec_in_place
1291
1292 mov %rbp,$ivec
1293.align 4
1294.Lcbc_dec_loop:
1295 mov 0($inp),$s0 # read input
1296 mov 4($inp),$s1
1297 mov 8($inp),$s2
1298 mov 12($inp),$s3
1299 mov $inp,8+$ivec # if ($verticalspin) save inp
1300
1301 mov $keyp,$key # restore key
1302 call _x86_64_AES_decrypt
1303
1304 mov $ivec,%rbp # load ivp
1305 mov 8+$ivec,$inp # if ($verticalspin) restore inp
1306 xor 0(%rbp),$s0 # xor iv
1307 xor 4(%rbp),$s1
1308 xor 8(%rbp),$s2
1309 xor 12(%rbp),$s3
1310 mov $inp,%rbp # current input, next iv
1311
1312 mov $_len,%r10 # load len
1313 sub \$16,%r10
1314 jc .Lcbc_dec_partial
1315 mov %r10,$_len # update len
1316 mov %rbp,$ivec # update ivp
1317
1318 mov $s0,0($out) # write output
1319 mov $s1,4($out)
1320 mov $s2,8($out)
1321 mov $s3,12($out)
1322
1323 lea 16($inp),$inp
1324 lea 16($out),$out
1325 jnz .Lcbc_dec_loop
1326.Lcbc_dec_end:
1327 mov $_ivp,%r12 # load user ivp
1328 mov 0(%rbp),%r10 # load iv
1329 mov 8(%rbp),%r11
1330 mov %r10,0(%r12) # copy back to user
1331 mov %r11,8(%r12)
1332 jmp .Lcbc_cleanup
1333
1334.align 4
1335.Lcbc_dec_partial:
1336 mov $s0,0+$ivec # dump output to stack
1337 mov $s1,4+$ivec
1338 mov $s2,8+$ivec
1339 mov $s3,12+$ivec
1340 mov $out,%rdi
1341 lea $ivec,%rsi
1342 mov \$16,%rcx
1343 add %r10,%rcx # number of bytes to copy
1344 .long 0xF689A4F3 # rep movsb
1345 jmp .Lcbc_dec_end
1346
1347.align 16
1348.Lcbc_dec_in_place:
1349 mov 0($inp),$s0 # load input
1350 mov 4($inp),$s1
1351 mov 8($inp),$s2
1352 mov 12($inp),$s3
1353
1354 mov $inp,$ivec # if ($verticalspin) save inp
1355 mov $keyp,$key
1356 call _x86_64_AES_decrypt
1357
1358 mov $ivec,$inp # if ($verticalspin) restore inp
1359 mov $_ivp,%rbp
1360 xor 0(%rbp),$s0
1361 xor 4(%rbp),$s1
1362 xor 8(%rbp),$s2
1363 xor 12(%rbp),$s3
1364
1365 mov 0($inp),%r10 # copy input to iv
1366 mov 8($inp),%r11
1367 mov %r10,0(%rbp)
1368 mov %r11,8(%rbp)
1369
1370 mov $s0,0($out) # save output [zaps input]
1371 mov $s1,4($out)
1372 mov $s2,8($out)
1373 mov $s3,12($out)
1374
1375 mov $_len,%rcx
1376 lea 16($inp),$inp
1377 lea 16($out),$out
1378 sub \$16,%rcx
1379 jc .Lcbc_dec_in_place_partial
1380 mov %rcx,$_len
1381 jnz .Lcbc_dec_in_place
1382 jmp .Lcbc_cleanup
1383
1384.align 4
1385.Lcbc_dec_in_place_partial:
1386 # one can argue if this is actually required
1387 lea ($out,%rcx),%rdi
1388 lea (%rbp,%rcx),%rsi
1389 neg %rcx
1390 .long 0xF689A4F3 # rep movsb # restore tail
1391 jmp .Lcbc_cleanup
1392.size AES_cbc_encrypt,.-AES_cbc_encrypt
1393___
1394}
1395
1396$code.=<<___;
1397.globl AES_Te
1398.align 64
1399AES_Te:
1400___
1401 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
1402 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
1403 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
1404 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
1405 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
1406 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
1407 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
1408 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
1409 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
1410 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
1411 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
1412 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
1413 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
1414 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
1415 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
1416 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
1417 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
1418 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
1419 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
1420 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
1421 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
1422 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
1423 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
1424 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
1425 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
1426 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
1427 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
1428 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
1429 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
1430 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
1431 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
1432 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
1433 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
1434 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
1435 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
1436 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
1437 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
1438 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
1439 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
1440 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
1441 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
1442 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
1443 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
1444 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
1445 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
1446 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
1447 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
1448 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
1449 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
1450 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
1451 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
1452 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
1453 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
1454 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
1455 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
1456 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
1457 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
1458 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
1459 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
1460 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
1461 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
1462 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
1463 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
1464 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
1465#rcon:
1466$code.=<<___;
1467 .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
1468 .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
1469 .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0
1470___
1471$code.=<<___;
1472.globl AES_Td
1473.align 64
1474AES_Td:
1475___
1476 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
1477 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
1478 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
1479 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
1480 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
1481 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
1482 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
1483 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
1484 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
1485 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
1486 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
1487 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
1488 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
1489 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
1490 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
1491 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
1492 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
1493 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
1494 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
1495 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
1496 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
1497 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
1498 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
1499 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
1500 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
1501 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
1502 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
1503 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
1504 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
1505 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
1506 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
1507 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
1508 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
1509 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
1510 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
1511 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
1512 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
1513 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
1514 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
1515 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
1516 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
1517 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
1518 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
1519 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
1520 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
1521 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
1522 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
1523 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
1524 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
1525 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
1526 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
1527 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
1528 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
1529 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
1530 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
1531 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
1532 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
1533 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
1534 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
1535 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
1536 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
1537 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
1538 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
1539 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
1540#Td4:
1541 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1542 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1543 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1544 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1545 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1546 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1547 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1548 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1549 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1550 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1551 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1552 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1553 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1554 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1555 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1556 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1557 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1558 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1559 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1560 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1561 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1562 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1563 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1564 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1565 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1566 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1567 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1568 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1569 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1570 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1571 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1572 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1573
1574$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1575
1576print $code;
1577
1578close STDOUT;