summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r--src/lib/libcrypto/aes/Makefile130
-rw-r--r--src/lib/libcrypto/aes/README3
-rw-r--r--src/lib/libcrypto/aes/aes.h144
-rw-r--r--src/lib/libcrypto/aes/aes_cbc.c131
-rw-r--r--src/lib/libcrypto/aes/aes_cfb.c225
-rw-r--r--src/lib/libcrypto/aes/aes_core.c1159
-rw-r--r--src/lib/libcrypto/aes/aes_ctr.c139
-rw-r--r--src/lib/libcrypto/aes/aes_ecb.c73
-rw-r--r--src/lib/libcrypto/aes/aes_ige.c323
-rw-r--r--src/lib/libcrypto/aes/aes_locl.h89
-rw-r--r--src/lib/libcrypto/aes/aes_misc.c64
-rw-r--r--src/lib/libcrypto/aes/aes_ofb.c142
-rw-r--r--src/lib/libcrypto/aes/aes_wrap.c259
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl1532
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ia64.S1123
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-x86_64.pl1578
16 files changed, 7114 insertions, 0 deletions
diff --git a/src/lib/libcrypto/aes/Makefile b/src/lib/libcrypto/aes/Makefile
new file mode 100644
index 0000000000..22c7203dbb
--- /dev/null
+++ b/src/lib/libcrypto/aes/Makefile
@@ -0,0 +1,130 @@
1#
2# crypto/aes/Makefile
3#
4
5DIR= aes
6TOP= ../..
7CC= cc
8CPP= $(CC) -E
9INCLUDES=
10CFLAG=-g
11MAKEFILE= Makefile
12AR= ar r
13
14AES_ASM_OBJ=aes_core.o aes_cbc.o
15
16CFLAGS= $(INCLUDES) $(CFLAG)
17ASFLAGS= $(INCLUDES) $(ASFLAG)
18AFLAGS= $(ASFLAGS)
19
20GENERAL=Makefile
21#TEST=aestest.c
22TEST=
23APPS=
24
25LIB=$(TOP)/libcrypto.a
26LIBSRC=aes_core.c aes_misc.c aes_ecb.c aes_cbc.c aes_cfb.c aes_ofb.c \
27 aes_ctr.c aes_ige.c aes_wrap.c
28LIBOBJ=aes_misc.o aes_ecb.o aes_cfb.o aes_ofb.o aes_ctr.o aes_ige.o aes_wrap.o \
29 $(AES_ASM_OBJ)
30
31SRC= $(LIBSRC)
32
33EXHEADER= aes.h
34HEADER= aes_locl.h $(EXHEADER)
35
36ALL= $(GENERAL) $(SRC) $(HEADER)
37
38top:
39 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
40
41all: lib
42
43lib: $(LIBOBJ)
44 $(AR) $(LIB) $(LIBOBJ)
45 $(RANLIB) $(LIB) || echo Never mind.
46 @touch lib
47
48$(LIBOBJ): $(LIBSRC)
49
50aes-ia64.s: asm/aes-ia64.S
51 $(CC) $(CFLAGS) -E asm/aes-ia64.S > $@
52
53ax86-elf.s: asm/aes-586.pl ../perlasm/x86asm.pl
54 (cd asm; $(PERL) aes-586.pl elf $(CFLAGS) $(PROCESSOR) > ../$@)
55ax86-cof.s: asm/aes-586.pl ../perlasm/x86asm.pl
56 (cd asm; $(PERL) aes-586.pl coff $(CFLAGS) $(PROCESSOR) > ../$@)
57ax86-out.s: asm/aes-586.pl ../perlasm/x86asm.pl
58 (cd asm; $(PERL) aes-586.pl a.out $(CFLAGS) $(PROCESSOR) > ../$@)
59
60aes-x86_64.s: asm/aes-x86_64.pl
61 $(PERL) asm/aes-x86_64.pl $@
62
63files:
64 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
65
66links:
67 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
68 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
69 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
70
71install:
72 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
73 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
74 do \
75 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
76 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
77 done;
78
79tags:
80 ctags $(SRC)
81
82tests:
83
84lint:
85 lint -DLINT $(INCLUDES) $(SRC)>fluff
86
87depend:
88 @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
89 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
90
91dclean:
92 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
93 mv -f Makefile.new $(MAKEFILE)
94
95clean:
96 rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
97
98# DO NOT DELETE THIS LINE -- make depend depends on it.
99
100aes_cbc.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
101aes_cbc.o: ../../include/openssl/opensslconf.h aes_cbc.c aes_locl.h
102aes_cfb.o: ../../e_os.h ../../include/openssl/aes.h
103aes_cfb.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
104aes_cfb.o: aes_cfb.c aes_locl.h
105aes_core.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
106aes_core.o: ../../include/openssl/opensslconf.h aes_core.c aes_locl.h
107aes_ctr.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
108aes_ctr.o: ../../include/openssl/opensslconf.h aes_ctr.c aes_locl.h
109aes_ecb.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
110aes_ecb.o: ../../include/openssl/opensslconf.h aes_ecb.c aes_locl.h
111aes_ige.o: ../../e_os.h ../../include/openssl/aes.h ../../include/openssl/bio.h
112aes_ige.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
113aes_ige.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
114aes_ige.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
115aes_ige.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
116aes_ige.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
117aes_ige.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_ige.c aes_locl.h
118aes_misc.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
119aes_misc.o: ../../include/openssl/opensslconf.h
120aes_misc.o: ../../include/openssl/opensslv.h aes_locl.h aes_misc.c
121aes_ofb.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
122aes_ofb.o: ../../include/openssl/opensslconf.h aes_locl.h aes_ofb.c
123aes_wrap.o: ../../e_os.h ../../include/openssl/aes.h
124aes_wrap.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
125aes_wrap.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
126aes_wrap.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
127aes_wrap.o: ../../include/openssl/opensslconf.h
128aes_wrap.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
129aes_wrap.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
130aes_wrap.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_wrap.c
diff --git a/src/lib/libcrypto/aes/README b/src/lib/libcrypto/aes/README
new file mode 100644
index 0000000000..0f9620a80e
--- /dev/null
+++ b/src/lib/libcrypto/aes/README
@@ -0,0 +1,3 @@
1This is an OpenSSL-compatible version of AES (also called Rijndael).
2aes_core.c is basically the same as rijndael-alg-fst.c but with an
3API that looks like the rest of the OpenSSL symmetric cipher suite.
diff --git a/src/lib/libcrypto/aes/aes.h b/src/lib/libcrypto/aes/aes.h
new file mode 100644
index 0000000000..baf0222d49
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes.h
@@ -0,0 +1,144 @@
1/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef HEADER_AES_H
53#define HEADER_AES_H
54
55#include <openssl/opensslconf.h>
56
57#ifdef OPENSSL_NO_AES
58#error AES is disabled.
59#endif
60
61#define AES_ENCRYPT 1
62#define AES_DECRYPT 0
63
64/* Because array size can't be a const in C, the following two are macros.
65 Both sizes are in bytes. */
66#define AES_MAXNR 14
67#define AES_BLOCK_SIZE 16
68
69#ifdef __cplusplus
70extern "C" {
71#endif
72
73/* This should be a hidden type, but EVP requires that the size be known */
74struct aes_key_st {
75#ifdef AES_LONG
76 unsigned long rd_key[4 *(AES_MAXNR + 1)];
77#else
78 unsigned int rd_key[4 *(AES_MAXNR + 1)];
79#endif
80 int rounds;
81};
82typedef struct aes_key_st AES_KEY;
83
84const char *AES_options(void);
85
86int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
87 AES_KEY *key);
88int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
89 AES_KEY *key);
90
91void AES_encrypt(const unsigned char *in, unsigned char *out,
92 const AES_KEY *key);
93void AES_decrypt(const unsigned char *in, unsigned char *out,
94 const AES_KEY *key);
95
96void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
97 const AES_KEY *key, const int enc);
98void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
99 const unsigned long length, const AES_KEY *key,
100 unsigned char *ivec, const int enc);
101void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
102 const unsigned long length, const AES_KEY *key,
103 unsigned char *ivec, int *num, const int enc);
104void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
105 const unsigned long length, const AES_KEY *key,
106 unsigned char *ivec, int *num, const int enc);
107void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
108 const unsigned long length, const AES_KEY *key,
109 unsigned char *ivec, int *num, const int enc);
110void AES_cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
111 const int nbits,const AES_KEY *key,
112 unsigned char *ivec,const int enc);
113void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
114 const unsigned long length, const AES_KEY *key,
115 unsigned char *ivec, int *num);
116void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
117 const unsigned long length, const AES_KEY *key,
118 unsigned char ivec[AES_BLOCK_SIZE],
119 unsigned char ecount_buf[AES_BLOCK_SIZE],
120 unsigned int *num);
121
122/* For IGE, see also http://www.links.org/files/openssl-ige.pdf */
123/* NB: the IV is _two_ blocks long */
124void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
125 const unsigned long length, const AES_KEY *key,
126 unsigned char *ivec, const int enc);
127/* NB: the IV is _four_ blocks long */
128void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
129 const unsigned long length, const AES_KEY *key,
130 const AES_KEY *key2, const unsigned char *ivec,
131 const int enc);
132
133int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
134 unsigned char *out,
135 const unsigned char *in, unsigned int inlen);
136int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
137 unsigned char *out,
138 const unsigned char *in, unsigned int inlen);
139
140#ifdef __cplusplus
141}
142#endif
143
144#endif /* !HEADER_AES_H */
diff --git a/src/lib/libcrypto/aes/aes_cbc.c b/src/lib/libcrypto/aes/aes_cbc.c
new file mode 100644
index 0000000000..d2ba6bcdb4
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_cbc.c
@@ -0,0 +1,131 @@
1/* crypto/aes/aes_cbc.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
63 const unsigned long length, const AES_KEY *key,
64 unsigned char *ivec, const int enc) {
65
66 unsigned long n;
67 unsigned long len = length;
68 unsigned char tmp[AES_BLOCK_SIZE];
69 const unsigned char *iv = ivec;
70
71 assert(in && out && key && ivec);
72 assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
73
74 if (AES_ENCRYPT == enc) {
75 while (len >= AES_BLOCK_SIZE) {
76 for(n=0; n < AES_BLOCK_SIZE; ++n)
77 out[n] = in[n] ^ iv[n];
78 AES_encrypt(out, out, key);
79 iv = out;
80 len -= AES_BLOCK_SIZE;
81 in += AES_BLOCK_SIZE;
82 out += AES_BLOCK_SIZE;
83 }
84 if (len) {
85 for(n=0; n < len; ++n)
86 out[n] = in[n] ^ iv[n];
87 for(n=len; n < AES_BLOCK_SIZE; ++n)
88 out[n] = iv[n];
89 AES_encrypt(out, out, key);
90 iv = out;
91 }
92 memcpy(ivec,iv,AES_BLOCK_SIZE);
93 } else if (in != out) {
94 while (len >= AES_BLOCK_SIZE) {
95 AES_decrypt(in, out, key);
96 for(n=0; n < AES_BLOCK_SIZE; ++n)
97 out[n] ^= iv[n];
98 iv = in;
99 len -= AES_BLOCK_SIZE;
100 in += AES_BLOCK_SIZE;
101 out += AES_BLOCK_SIZE;
102 }
103 if (len) {
104 AES_decrypt(in,tmp,key);
105 for(n=0; n < len; ++n)
106 out[n] = tmp[n] ^ iv[n];
107 iv = in;
108 }
109 memcpy(ivec,iv,AES_BLOCK_SIZE);
110 } else {
111 while (len >= AES_BLOCK_SIZE) {
112 memcpy(tmp, in, AES_BLOCK_SIZE);
113 AES_decrypt(in, out, key);
114 for(n=0; n < AES_BLOCK_SIZE; ++n)
115 out[n] ^= ivec[n];
116 memcpy(ivec, tmp, AES_BLOCK_SIZE);
117 len -= AES_BLOCK_SIZE;
118 in += AES_BLOCK_SIZE;
119 out += AES_BLOCK_SIZE;
120 }
121 if (len) {
122 memcpy(tmp, in, AES_BLOCK_SIZE);
123 AES_decrypt(tmp, out, key);
124 for(n=0; n < len; ++n)
125 out[n] ^= ivec[n];
126 for(n=len; n < AES_BLOCK_SIZE; ++n)
127 out[n] = tmp[n];
128 memcpy(ivec, tmp, AES_BLOCK_SIZE);
129 }
130 }
131}
diff --git a/src/lib/libcrypto/aes/aes_cfb.c b/src/lib/libcrypto/aes/aes_cfb.c
new file mode 100644
index 0000000000..49f0411010
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_cfb.c
@@ -0,0 +1,225 @@
1/* crypto/aes/aes_cfb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
52 * All rights reserved.
53 *
54 * This package is an SSL implementation written
55 * by Eric Young (eay@cryptsoft.com).
56 * The implementation was written so as to conform with Netscapes SSL.
57 *
58 * This library is free for commercial and non-commercial use as long as
59 * the following conditions are aheared to. The following conditions
60 * apply to all code found in this distribution, be it the RC4, RSA,
61 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
62 * included with this distribution is covered by the same copyright terms
63 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
64 *
65 * Copyright remains Eric Young's, and as such any Copyright notices in
66 * the code are not to be removed.
67 * If this package is used in a product, Eric Young should be given attribution
68 * as the author of the parts of the library used.
69 * This can be in the form of a textual message at program startup or
70 * in documentation (online or textual) provided with the package.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * "This product includes cryptographic software written by
83 * Eric Young (eay@cryptsoft.com)"
84 * The word 'cryptographic' can be left out if the rouines from the library
85 * being used are not cryptographic related :-).
86 * 4. If you include any Windows specific code (or a derivative thereof) from
87 * the apps directory (application code) you must include an acknowledgement:
88 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
89 *
90 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
94 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100 * SUCH DAMAGE.
101 *
102 * The licence and distribution terms for any publically available version or
103 * derivative of this code cannot be changed. i.e. this code cannot simply be
104 * copied and put under another distribution licence
105 * [including the GNU Public Licence.]
106 */
107
108#ifndef AES_DEBUG
109# ifndef NDEBUG
110# define NDEBUG
111# endif
112#endif
113#include <assert.h>
114
115#include <openssl/aes.h>
116#include "aes_locl.h"
117#include "e_os.h"
118
119/* The input and output encrypted as though 128bit cfb mode is being
120 * used. The extra state information to record how much of the
121 * 128bit block we have used is contained in *num;
122 */
123
124void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
125 const unsigned long length, const AES_KEY *key,
126 unsigned char *ivec, int *num, const int enc) {
127
128 unsigned int n;
129 unsigned long l = length;
130 unsigned char c;
131
132 assert(in && out && key && ivec && num);
133
134 n = *num;
135
136 if (enc) {
137 while (l--) {
138 if (n == 0) {
139 AES_encrypt(ivec, ivec, key);
140 }
141 ivec[n] = *(out++) = *(in++) ^ ivec[n];
142 n = (n+1) % AES_BLOCK_SIZE;
143 }
144 } else {
145 while (l--) {
146 if (n == 0) {
147 AES_encrypt(ivec, ivec, key);
148 }
149 c = *(in);
150 *(out++) = *(in++) ^ ivec[n];
151 ivec[n] = c;
152 n = (n+1) % AES_BLOCK_SIZE;
153 }
154 }
155
156 *num=n;
157}
158
159/* This expects a single block of size nbits for both in and out. Note that
160 it corrupts any extra bits in the last byte of out */
161void AES_cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
162 const int nbits,const AES_KEY *key,
163 unsigned char *ivec,const int enc)
164 {
165 int n,rem,num;
166 unsigned char ovec[AES_BLOCK_SIZE*2];
167
168 if (nbits<=0 || nbits>128) return;
169
170 /* fill in the first half of the new IV with the current IV */
171 memcpy(ovec,ivec,AES_BLOCK_SIZE);
172 /* construct the new IV */
173 AES_encrypt(ivec,ivec,key);
174 num = (nbits+7)/8;
175 if (enc) /* encrypt the input */
176 for(n=0 ; n < num ; ++n)
177 out[n] = (ovec[AES_BLOCK_SIZE+n] = in[n] ^ ivec[n]);
178 else /* decrypt the input */
179 for(n=0 ; n < num ; ++n)
180 out[n] = (ovec[AES_BLOCK_SIZE+n] = in[n]) ^ ivec[n];
181 /* shift ovec left... */
182 rem = nbits%8;
183 num = nbits/8;
184 if(rem==0)
185 memcpy(ivec,ovec+num,AES_BLOCK_SIZE);
186 else
187 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
188 ivec[n] = ovec[n+num]<<rem | ovec[n+num+1]>>(8-rem);
189
190 /* it is not necessary to cleanse ovec, since the IV is not secret */
191 }
192
193/* N.B. This expects the input to be packed, MS bit first */
194void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
195 const unsigned long length, const AES_KEY *key,
196 unsigned char *ivec, int *num, const int enc)
197 {
198 unsigned int n;
199 unsigned char c[1],d[1];
200
201 assert(in && out && key && ivec && num);
202 assert(*num == 0);
203
204 memset(out,0,(length+7)/8);
205 for(n=0 ; n < length ; ++n)
206 {
207 c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0;
208 AES_cfbr_encrypt_block(c,d,1,key,ivec,enc);
209 out[n/8]=(out[n/8]&~(1 << (7-n%8)))|((d[0]&0x80) >> (n%8));
210 }
211 }
212
213void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
214 const unsigned long length, const AES_KEY *key,
215 unsigned char *ivec, int *num, const int enc)
216 {
217 unsigned int n;
218
219 assert(in && out && key && ivec && num);
220 assert(*num == 0);
221
222 for(n=0 ; n < length ; ++n)
223 AES_cfbr_encrypt_block(&in[n],&out[n],8,key,ivec,enc);
224 }
225
diff --git a/src/lib/libcrypto/aes/aes_core.c b/src/lib/libcrypto/aes/aes_core.c
new file mode 100644
index 0000000000..3a80e18b0a
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_core.c
@@ -0,0 +1,1159 @@
1/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2/**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* Note: rewritten a little bit to provide error control and an OpenSSL-
29 compatible API */
30
31#ifndef AES_DEBUG
32# ifndef NDEBUG
33# define NDEBUG
34# endif
35#endif
36#include <assert.h>
37
38#include <stdlib.h>
39#include <openssl/aes.h>
40#include "aes_locl.h"
41
42/*
43Te0[x] = S [x].[02, 01, 01, 03];
44Te1[x] = S [x].[03, 02, 01, 01];
45Te2[x] = S [x].[01, 03, 02, 01];
46Te3[x] = S [x].[01, 01, 03, 02];
47
48Td0[x] = Si[x].[0e, 09, 0d, 0b];
49Td1[x] = Si[x].[0b, 0e, 09, 0d];
50Td2[x] = Si[x].[0d, 0b, 0e, 09];
51Td3[x] = Si[x].[09, 0d, 0b, 0e];
52Td4[x] = Si[x].[01];
53*/
54
55static const u32 Te0[256] = {
56 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
57 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
58 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
59 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
60 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
61 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
62 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
63 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
64 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
65 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
66 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
67 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
68 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
69 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
70 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
71 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
72 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
73 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
74 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
75 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
76 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
77 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
78 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
79 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
80 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
81 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
82 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
83 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
84 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
85 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
86 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
87 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
88 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
89 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
90 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
91 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
92 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
93 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
94 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
95 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
96 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
97 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
98 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
99 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
100 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
101 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
102 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
103 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
104 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
105 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
106 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
107 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
108 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
109 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
110 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
111 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
112 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
113 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
114 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
115 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
116 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
117 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
118 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
119 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
120};
121static const u32 Te1[256] = {
122 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
123 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
124 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
125 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
126 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
127 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
128 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
129 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
130 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
131 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
132 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
133 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
134 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
135 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
136 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
137 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
138 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
139 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
140 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
141 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
142 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
143 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
144 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
145 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
146 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
147 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
148 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
149 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
150 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
151 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
152 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
153 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
154 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
155 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
156 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
157 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
158 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
159 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
160 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
161 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
162 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
163 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
164 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
165 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
166 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
167 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
168 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
169 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
170 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
171 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
172 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
173 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
174 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
175 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
176 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
177 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
178 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
179 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
180 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
181 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
182 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
183 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
184 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
185 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
186};
187static const u32 Te2[256] = {
188 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
189 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
190 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
191 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
192 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
193 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
194 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
195 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
196 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
197 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
198 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
199 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
200 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
201 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
202 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
203 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
204 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
205 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
206 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
207 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
208 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
209 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
210 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
211 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
212 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
213 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
214 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
215 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
216 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
217 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
218 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
219 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
220 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
221 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
222 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
223 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
224 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
225 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
226 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
227 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
228 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
229 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
230 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
231 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
232 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
233 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
234 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
235 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
236 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
237 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
238 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
239 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
240 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
241 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
242 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
243 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
244 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
245 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
246 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
247 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
248 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
249 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
250 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
251 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
252};
253static const u32 Te3[256] = {
254 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
255 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
256 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
257 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
258 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
259 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
260 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
261 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
262 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
263 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
264 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
265 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
266 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
267 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
268 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
269 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
270 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
271 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
272 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
273 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
274 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
275 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
276 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
277 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
278 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
279 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
280 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
281 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
282 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
283 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
284 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
285 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
286 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
287 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
288 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
289 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
290 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
291 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
292 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
293 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
294 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
295 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
296 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
297 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
298 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
299 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
300 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
301 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
302 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
303 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
304 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
305 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
306 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
307 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
308 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
309 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
310 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
311 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
312 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
313 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
314 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
315 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
316 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
317 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
318};
319
320static const u32 Td0[256] = {
321 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
322 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
323 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
324 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
325 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
326 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
327 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
328 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
329 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
330 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
331 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
332 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
333 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
334 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
335 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
336 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
337 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
338 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
339 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
340 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
341 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
342 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
343 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
344 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
345 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
346 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
347 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
348 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
349 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
350 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
351 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
352 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
353 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
354 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
355 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
356 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
357 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
358 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
359 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
360 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
361 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
362 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
363 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
364 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
365 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
366 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
367 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
368 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
369 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
370 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
371 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
372 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
373 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
374 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
375 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
376 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
377 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
378 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
379 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
380 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
381 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
382 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
383 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
384 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
385};
386static const u32 Td1[256] = {
387 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
388 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
389 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
390 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
391 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
392 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
393 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
394 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
395 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
396 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
397 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
398 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
399 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
400 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
401 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
402 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
403 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
404 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
405 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
406 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
407 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
408 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
409 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
410 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
411 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
412 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
413 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
414 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
415 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
416 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
417 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
418 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
419 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
420 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
421 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
422 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
423 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
424 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
425 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
426 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
427 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
428 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
429 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
430 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
431 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
432 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
433 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
434 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
435 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
436 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
437 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
438 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
439 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
440 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
441 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
442 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
443 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
444 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
445 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
446 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
447 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
448 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
449 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
450 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
451};
452static const u32 Td2[256] = {
453 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
454 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
455 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
456 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
457 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
458 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
459 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
460 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
461 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
462 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
463 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
464 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
465 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
466 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
467 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
468 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
469 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
470 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
471 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
472 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
473 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
474 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
475 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
476 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
477 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
478 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
479 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
480 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
481 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
482 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
483 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
484 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
485 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
486 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
487 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
488 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
489 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
490 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
491 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
492 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
493 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
494 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
495 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
496 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
497 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
498 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
499 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
500 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
501 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
502 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
503 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
504 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
505 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
506 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
507 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
508 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
509 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
510 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
511 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
512 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
513 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
514 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
515 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
516 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
517};
518static const u32 Td3[256] = {
519 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
520 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
521 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
522 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
523 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
524 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
525 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
526 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
527 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
528 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
529 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
530 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
531 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
532 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
533 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
534 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
535 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
536 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
537 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
538 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
539 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
540 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
541 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
542 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
543 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
544 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
545 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
546 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
547 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
548 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
549 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
550 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
551 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
552 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
553 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
554 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
555 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
556 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
557 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
558 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
559 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
560 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
561 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
562 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
563 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
564 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
565 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
566 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
567 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
568 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
569 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
570 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
571 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
572 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
573 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
574 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
575 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
576 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
577 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
578 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
579 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
580 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
581 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
582 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
583};
584static const u8 Td4[256] = {
585 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
586 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
587 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
588 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
589 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
590 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
591 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
592 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
593 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
594 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
595 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
596 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
597 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
598 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
599 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
600 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
601 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
602 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
603 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
604 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
605 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
606 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
607 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
608 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
609 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
610 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
611 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
612 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
613 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
614 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
615 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
616 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
617};
618static const u32 rcon[] = {
619 0x01000000, 0x02000000, 0x04000000, 0x08000000,
620 0x10000000, 0x20000000, 0x40000000, 0x80000000,
621 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
622};
623
624/**
625 * Expand the cipher key into the encryption key schedule.
626 */
627int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
628 AES_KEY *key) {
629
630 u32 *rk;
631 int i = 0;
632 u32 temp;
633
634 if (!userKey || !key)
635 return -1;
636 if (bits != 128 && bits != 192 && bits != 256)
637 return -2;
638
639 rk = key->rd_key;
640
641 if (bits==128)
642 key->rounds = 10;
643 else if (bits==192)
644 key->rounds = 12;
645 else
646 key->rounds = 14;
647
648 rk[0] = GETU32(userKey );
649 rk[1] = GETU32(userKey + 4);
650 rk[2] = GETU32(userKey + 8);
651 rk[3] = GETU32(userKey + 12);
652 if (bits == 128) {
653 while (1) {
654 temp = rk[3];
655 rk[4] = rk[0] ^
656 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
657 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
658 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
659 (Te1[(temp >> 24) ] & 0x000000ff) ^
660 rcon[i];
661 rk[5] = rk[1] ^ rk[4];
662 rk[6] = rk[2] ^ rk[5];
663 rk[7] = rk[3] ^ rk[6];
664 if (++i == 10) {
665 return 0;
666 }
667 rk += 4;
668 }
669 }
670 rk[4] = GETU32(userKey + 16);
671 rk[5] = GETU32(userKey + 20);
672 if (bits == 192) {
673 while (1) {
674 temp = rk[ 5];
675 rk[ 6] = rk[ 0] ^
676 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
677 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
678 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
679 (Te1[(temp >> 24) ] & 0x000000ff) ^
680 rcon[i];
681 rk[ 7] = rk[ 1] ^ rk[ 6];
682 rk[ 8] = rk[ 2] ^ rk[ 7];
683 rk[ 9] = rk[ 3] ^ rk[ 8];
684 if (++i == 8) {
685 return 0;
686 }
687 rk[10] = rk[ 4] ^ rk[ 9];
688 rk[11] = rk[ 5] ^ rk[10];
689 rk += 6;
690 }
691 }
692 rk[6] = GETU32(userKey + 24);
693 rk[7] = GETU32(userKey + 28);
694 if (bits == 256) {
695 while (1) {
696 temp = rk[ 7];
697 rk[ 8] = rk[ 0] ^
698 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
699 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
700 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
701 (Te1[(temp >> 24) ] & 0x000000ff) ^
702 rcon[i];
703 rk[ 9] = rk[ 1] ^ rk[ 8];
704 rk[10] = rk[ 2] ^ rk[ 9];
705 rk[11] = rk[ 3] ^ rk[10];
706 if (++i == 7) {
707 return 0;
708 }
709 temp = rk[11];
710 rk[12] = rk[ 4] ^
711 (Te2[(temp >> 24) ] & 0xff000000) ^
712 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
713 (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
714 (Te1[(temp ) & 0xff] & 0x000000ff);
715 rk[13] = rk[ 5] ^ rk[12];
716 rk[14] = rk[ 6] ^ rk[13];
717 rk[15] = rk[ 7] ^ rk[14];
718
719 rk += 8;
720 }
721 }
722 return 0;
723}
724
725/**
726 * Expand the cipher key into the decryption key schedule.
727 */
728int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
729 AES_KEY *key) {
730
731 u32 *rk;
732 int i, j, status;
733 u32 temp;
734
735 /* first, start with an encryption schedule */
736 status = AES_set_encrypt_key(userKey, bits, key);
737 if (status < 0)
738 return status;
739
740 rk = key->rd_key;
741
742 /* invert the order of the round keys: */
743 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
744 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
745 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
746 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
747 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
748 }
749 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
750 for (i = 1; i < (key->rounds); i++) {
751 rk += 4;
752 rk[0] =
753 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
754 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
755 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
756 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
757 rk[1] =
758 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
759 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
760 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
761 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
762 rk[2] =
763 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
764 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
765 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
766 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
767 rk[3] =
768 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
769 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
770 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
771 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
772 }
773 return 0;
774}
775
776#ifndef AES_ASM
777/*
778 * Encrypt a single block
779 * in and out can overlap
780 */
781void AES_encrypt(const unsigned char *in, unsigned char *out,
782 const AES_KEY *key) {
783
784 const u32 *rk;
785 u32 s0, s1, s2, s3, t0, t1, t2, t3;
786#ifndef FULL_UNROLL
787 int r;
788#endif /* ?FULL_UNROLL */
789
790 assert(in && out && key);
791 rk = key->rd_key;
792
793 /*
794 * map byte array block to cipher state
795 * and add initial round key:
796 */
797 s0 = GETU32(in ) ^ rk[0];
798 s1 = GETU32(in + 4) ^ rk[1];
799 s2 = GETU32(in + 8) ^ rk[2];
800 s3 = GETU32(in + 12) ^ rk[3];
801#ifdef FULL_UNROLL
802 /* round 1: */
803 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
804 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
805 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
806 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
807 /* round 2: */
808 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
809 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
810 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
811 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
812 /* round 3: */
813 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
814 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
815 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
816 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
817 /* round 4: */
818 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
819 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
820 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
821 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
822 /* round 5: */
823 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
824 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
825 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
826 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
827 /* round 6: */
828 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
829 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
830 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
831 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
832 /* round 7: */
833 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
834 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
835 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
836 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
837 /* round 8: */
838 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
839 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
840 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
841 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
842 /* round 9: */
843 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
844 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
845 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
846 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
847 if (key->rounds > 10) {
848 /* round 10: */
849 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
850 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
851 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
852 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
853 /* round 11: */
854 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
855 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
856 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
857 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
858 if (key->rounds > 12) {
859 /* round 12: */
860 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
861 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
862 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
863 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
864 /* round 13: */
865 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
866 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
867 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
868 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
869 }
870 }
871 rk += key->rounds << 2;
872#else /* !FULL_UNROLL */
873 /*
874 * Nr - 1 full rounds:
875 */
876 r = key->rounds >> 1;
877 for (;;) {
878 t0 =
879 Te0[(s0 >> 24) ] ^
880 Te1[(s1 >> 16) & 0xff] ^
881 Te2[(s2 >> 8) & 0xff] ^
882 Te3[(s3 ) & 0xff] ^
883 rk[4];
884 t1 =
885 Te0[(s1 >> 24) ] ^
886 Te1[(s2 >> 16) & 0xff] ^
887 Te2[(s3 >> 8) & 0xff] ^
888 Te3[(s0 ) & 0xff] ^
889 rk[5];
890 t2 =
891 Te0[(s2 >> 24) ] ^
892 Te1[(s3 >> 16) & 0xff] ^
893 Te2[(s0 >> 8) & 0xff] ^
894 Te3[(s1 ) & 0xff] ^
895 rk[6];
896 t3 =
897 Te0[(s3 >> 24) ] ^
898 Te1[(s0 >> 16) & 0xff] ^
899 Te2[(s1 >> 8) & 0xff] ^
900 Te3[(s2 ) & 0xff] ^
901 rk[7];
902
903 rk += 8;
904 if (--r == 0) {
905 break;
906 }
907
908 s0 =
909 Te0[(t0 >> 24) ] ^
910 Te1[(t1 >> 16) & 0xff] ^
911 Te2[(t2 >> 8) & 0xff] ^
912 Te3[(t3 ) & 0xff] ^
913 rk[0];
914 s1 =
915 Te0[(t1 >> 24) ] ^
916 Te1[(t2 >> 16) & 0xff] ^
917 Te2[(t3 >> 8) & 0xff] ^
918 Te3[(t0 ) & 0xff] ^
919 rk[1];
920 s2 =
921 Te0[(t2 >> 24) ] ^
922 Te1[(t3 >> 16) & 0xff] ^
923 Te2[(t0 >> 8) & 0xff] ^
924 Te3[(t1 ) & 0xff] ^
925 rk[2];
926 s3 =
927 Te0[(t3 >> 24) ] ^
928 Te1[(t0 >> 16) & 0xff] ^
929 Te2[(t1 >> 8) & 0xff] ^
930 Te3[(t2 ) & 0xff] ^
931 rk[3];
932 }
933#endif /* ?FULL_UNROLL */
934 /*
935 * apply last round and
936 * map cipher state to byte array block:
937 */
938 s0 =
939 (Te2[(t0 >> 24) ] & 0xff000000) ^
940 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
941 (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
942 (Te1[(t3 ) & 0xff] & 0x000000ff) ^
943 rk[0];
944 PUTU32(out , s0);
945 s1 =
946 (Te2[(t1 >> 24) ] & 0xff000000) ^
947 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
948 (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
949 (Te1[(t0 ) & 0xff] & 0x000000ff) ^
950 rk[1];
951 PUTU32(out + 4, s1);
952 s2 =
953 (Te2[(t2 >> 24) ] & 0xff000000) ^
954 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
955 (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
956 (Te1[(t1 ) & 0xff] & 0x000000ff) ^
957 rk[2];
958 PUTU32(out + 8, s2);
959 s3 =
960 (Te2[(t3 >> 24) ] & 0xff000000) ^
961 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
962 (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
963 (Te1[(t2 ) & 0xff] & 0x000000ff) ^
964 rk[3];
965 PUTU32(out + 12, s3);
966}
967
968/*
969 * Decrypt a single block
970 * in and out can overlap
971 */
972void AES_decrypt(const unsigned char *in, unsigned char *out,
973 const AES_KEY *key) {
974
975 const u32 *rk;
976 u32 s0, s1, s2, s3, t0, t1, t2, t3;
977#ifndef FULL_UNROLL
978 int r;
979#endif /* ?FULL_UNROLL */
980
981 assert(in && out && key);
982 rk = key->rd_key;
983
984 /*
985 * map byte array block to cipher state
986 * and add initial round key:
987 */
988 s0 = GETU32(in ) ^ rk[0];
989 s1 = GETU32(in + 4) ^ rk[1];
990 s2 = GETU32(in + 8) ^ rk[2];
991 s3 = GETU32(in + 12) ^ rk[3];
992#ifdef FULL_UNROLL
993 /* round 1: */
994 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
995 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
996 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
997 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
998 /* round 2: */
999 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1000 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1001 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1002 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1003 /* round 3: */
1004 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1005 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1006 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1007 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1008 /* round 4: */
1009 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1010 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1011 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1012 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1013 /* round 5: */
1014 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1015 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1016 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1017 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1018 /* round 6: */
1019 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1020 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1021 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1022 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1023 /* round 7: */
1024 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1025 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1026 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1027 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1028 /* round 8: */
1029 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1030 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1031 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1032 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1033 /* round 9: */
1034 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1035 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1036 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1037 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1038 if (key->rounds > 10) {
1039 /* round 10: */
1040 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1041 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1042 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1043 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1044 /* round 11: */
1045 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1046 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1047 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1048 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1049 if (key->rounds > 12) {
1050 /* round 12: */
1051 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1052 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1053 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1054 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1055 /* round 13: */
1056 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1057 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1058 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1059 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1060 }
1061 }
1062 rk += key->rounds << 2;
1063#else /* !FULL_UNROLL */
1064 /*
1065 * Nr - 1 full rounds:
1066 */
1067 r = key->rounds >> 1;
1068 for (;;) {
1069 t0 =
1070 Td0[(s0 >> 24) ] ^
1071 Td1[(s3 >> 16) & 0xff] ^
1072 Td2[(s2 >> 8) & 0xff] ^
1073 Td3[(s1 ) & 0xff] ^
1074 rk[4];
1075 t1 =
1076 Td0[(s1 >> 24) ] ^
1077 Td1[(s0 >> 16) & 0xff] ^
1078 Td2[(s3 >> 8) & 0xff] ^
1079 Td3[(s2 ) & 0xff] ^
1080 rk[5];
1081 t2 =
1082 Td0[(s2 >> 24) ] ^
1083 Td1[(s1 >> 16) & 0xff] ^
1084 Td2[(s0 >> 8) & 0xff] ^
1085 Td3[(s3 ) & 0xff] ^
1086 rk[6];
1087 t3 =
1088 Td0[(s3 >> 24) ] ^
1089 Td1[(s2 >> 16) & 0xff] ^
1090 Td2[(s1 >> 8) & 0xff] ^
1091 Td3[(s0 ) & 0xff] ^
1092 rk[7];
1093
1094 rk += 8;
1095 if (--r == 0) {
1096 break;
1097 }
1098
1099 s0 =
1100 Td0[(t0 >> 24) ] ^
1101 Td1[(t3 >> 16) & 0xff] ^
1102 Td2[(t2 >> 8) & 0xff] ^
1103 Td3[(t1 ) & 0xff] ^
1104 rk[0];
1105 s1 =
1106 Td0[(t1 >> 24) ] ^
1107 Td1[(t0 >> 16) & 0xff] ^
1108 Td2[(t3 >> 8) & 0xff] ^
1109 Td3[(t2 ) & 0xff] ^
1110 rk[1];
1111 s2 =
1112 Td0[(t2 >> 24) ] ^
1113 Td1[(t1 >> 16) & 0xff] ^
1114 Td2[(t0 >> 8) & 0xff] ^
1115 Td3[(t3 ) & 0xff] ^
1116 rk[2];
1117 s3 =
1118 Td0[(t3 >> 24) ] ^
1119 Td1[(t2 >> 16) & 0xff] ^
1120 Td2[(t1 >> 8) & 0xff] ^
1121 Td3[(t0 ) & 0xff] ^
1122 rk[3];
1123 }
1124#endif /* ?FULL_UNROLL */
1125 /*
1126 * apply last round and
1127 * map cipher state to byte array block:
1128 */
1129 s0 =
1130 (Td4[(t0 >> 24) ] << 24) ^
1131 (Td4[(t3 >> 16) & 0xff] << 16) ^
1132 (Td4[(t2 >> 8) & 0xff] << 8) ^
1133 (Td4[(t1 ) & 0xff]) ^
1134 rk[0];
1135 PUTU32(out , s0);
1136 s1 =
1137 (Td4[(t1 >> 24) ] << 24) ^
1138 (Td4[(t0 >> 16) & 0xff] << 16) ^
1139 (Td4[(t3 >> 8) & 0xff] << 8) ^
1140 (Td4[(t2 ) & 0xff]) ^
1141 rk[1];
1142 PUTU32(out + 4, s1);
1143 s2 =
1144 (Td4[(t2 >> 24) ] << 24) ^
1145 (Td4[(t1 >> 16) & 0xff] << 16) ^
1146 (Td4[(t0 >> 8) & 0xff] << 8) ^
1147 (Td4[(t3 ) & 0xff]) ^
1148 rk[2];
1149 PUTU32(out + 8, s2);
1150 s3 =
1151 (Td4[(t3 >> 24) ] << 24) ^
1152 (Td4[(t2 >> 16) & 0xff] << 16) ^
1153 (Td4[(t1 >> 8) & 0xff] << 8) ^
1154 (Td4[(t0 ) & 0xff]) ^
1155 rk[3];
1156 PUTU32(out + 12, s3);
1157}
1158
1159#endif /* AES_ASM */
diff --git a/src/lib/libcrypto/aes/aes_ctr.c b/src/lib/libcrypto/aes/aes_ctr.c
new file mode 100644
index 0000000000..f36982be1e
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_ctr.c
@@ -0,0 +1,139 @@
1/* crypto/aes/aes_ctr.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62/* NOTE: the IV/counter CTR mode is big-endian. The rest of the AES code
63 * is endian-neutral. */
64
65/* increment counter (128-bit int) by 1 */
66static void AES_ctr128_inc(unsigned char *counter) {
67 unsigned long c;
68
69 /* Grab bottom dword of counter and increment */
70 c = GETU32(counter + 12);
71 c++; c &= 0xFFFFFFFF;
72 PUTU32(counter + 12, c);
73
74 /* if no overflow, we're done */
75 if (c)
76 return;
77
78 /* Grab 1st dword of counter and increment */
79 c = GETU32(counter + 8);
80 c++; c &= 0xFFFFFFFF;
81 PUTU32(counter + 8, c);
82
83 /* if no overflow, we're done */
84 if (c)
85 return;
86
87 /* Grab 2nd dword of counter and increment */
88 c = GETU32(counter + 4);
89 c++; c &= 0xFFFFFFFF;
90 PUTU32(counter + 4, c);
91
92 /* if no overflow, we're done */
93 if (c)
94 return;
95
96 /* Grab top dword of counter and increment */
97 c = GETU32(counter + 0);
98 c++; c &= 0xFFFFFFFF;
99 PUTU32(counter + 0, c);
100}
101
102/* The input encrypted as though 128bit counter mode is being
103 * used. The extra state information to record how much of the
104 * 128bit block we have used is contained in *num, and the
105 * encrypted counter is kept in ecount_buf. Both *num and
106 * ecount_buf must be initialised with zeros before the first
107 * call to AES_ctr128_encrypt().
108 *
109 * This algorithm assumes that the counter is in the x lower bits
110 * of the IV (ivec), and that the application has full control over
111 * overflow and the rest of the IV. This implementation takes NO
112 * responsability for checking that the counter doesn't overflow
113 * into the rest of the IV when incremented.
114 */
115void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
116 const unsigned long length, const AES_KEY *key,
117 unsigned char ivec[AES_BLOCK_SIZE],
118 unsigned char ecount_buf[AES_BLOCK_SIZE],
119 unsigned int *num) {
120
121 unsigned int n;
122 unsigned long l=length;
123
124 assert(in && out && key && counter && num);
125 assert(*num < AES_BLOCK_SIZE);
126
127 n = *num;
128
129 while (l--) {
130 if (n == 0) {
131 AES_encrypt(ivec, ecount_buf, key);
132 AES_ctr128_inc(ivec);
133 }
134 *(out++) = *(in++) ^ ecount_buf[n];
135 n = (n+1) % AES_BLOCK_SIZE;
136 }
137
138 *num=n;
139}
diff --git a/src/lib/libcrypto/aes/aes_ecb.c b/src/lib/libcrypto/aes/aes_ecb.c
new file mode 100644
index 0000000000..28aa561c2d
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_ecb.c
@@ -0,0 +1,73 @@
1/* crypto/aes/aes_ecb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef AES_DEBUG
53# ifndef NDEBUG
54# define NDEBUG
55# endif
56#endif
57#include <assert.h>
58
59#include <openssl/aes.h>
60#include "aes_locl.h"
61
62void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
63 const AES_KEY *key, const int enc) {
64
65 assert(in && out && key);
66 assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
67
68 if (AES_ENCRYPT == enc)
69 AES_encrypt(in, out, key);
70 else
71 AES_decrypt(in, out, key);
72}
73
diff --git a/src/lib/libcrypto/aes/aes_ige.c b/src/lib/libcrypto/aes/aes_ige.c
new file mode 100644
index 0000000000..45d7096181
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_ige.c
@@ -0,0 +1,323 @@
1/* crypto/aes/aes_ige.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include "cryptlib.h"
53
54#include <openssl/aes.h>
55#include "aes_locl.h"
56
57#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
58typedef struct {
59 unsigned long data[N_WORDS];
60} aes_block_t;
61
62/* XXX: probably some better way to do this */
63#if defined(__i386__) || defined(__x86_64__)
64#define UNALIGNED_MEMOPS_ARE_FAST 1
65#else
66#define UNALIGNED_MEMOPS_ARE_FAST 0
67#endif
68
69#if UNALIGNED_MEMOPS_ARE_FAST
70#define load_block(d, s) (d) = *(const aes_block_t *)(s)
71#define store_block(d, s) *(aes_block_t *)(d) = (s)
72#else
73#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
74#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
75#endif
76
77/* N.B. The IV for this mode is _twice_ the block size */
78
79void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
80 const unsigned long length, const AES_KEY *key,
81 unsigned char *ivec, const int enc)
82 {
83 unsigned long n;
84 unsigned long len;
85
86 OPENSSL_assert(in && out && key && ivec);
87 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
88 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
89
90 len = length / AES_BLOCK_SIZE;
91
92 if (AES_ENCRYPT == enc)
93 {
94 if (in != out &&
95 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
96 {
97 aes_block_t *ivp = (aes_block_t *)ivec;
98 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
99
100 while (len)
101 {
102 aes_block_t *inp = (aes_block_t *)in;
103 aes_block_t *outp = (aes_block_t *)out;
104
105 for(n=0 ; n < N_WORDS; ++n)
106 outp->data[n] = inp->data[n] ^ ivp->data[n];
107 AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
108 for(n=0 ; n < N_WORDS; ++n)
109 outp->data[n] ^= iv2p->data[n];
110 ivp = outp;
111 iv2p = inp;
112 --len;
113 in += AES_BLOCK_SIZE;
114 out += AES_BLOCK_SIZE;
115 }
116 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
117 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
118 }
119 else
120 {
121 aes_block_t tmp, tmp2;
122 aes_block_t iv;
123 aes_block_t iv2;
124
125 load_block(iv, ivec);
126 load_block(iv2, ivec + AES_BLOCK_SIZE);
127
128 while (len)
129 {
130 load_block(tmp, in);
131 for(n=0 ; n < N_WORDS; ++n)
132 tmp2.data[n] = tmp.data[n] ^ iv.data[n];
133 AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
134 for(n=0 ; n < N_WORDS; ++n)
135 tmp2.data[n] ^= iv2.data[n];
136 store_block(out, tmp2);
137 iv = tmp2;
138 iv2 = tmp;
139 --len;
140 in += AES_BLOCK_SIZE;
141 out += AES_BLOCK_SIZE;
142 }
143 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
144 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
145 }
146 }
147 else
148 {
149 if (in != out &&
150 (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
151 {
152 aes_block_t *ivp = (aes_block_t *)ivec;
153 aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
154
155 while (len)
156 {
157 aes_block_t tmp;
158 aes_block_t *inp = (aes_block_t *)in;
159 aes_block_t *outp = (aes_block_t *)out;
160
161 for(n=0 ; n < N_WORDS; ++n)
162 tmp.data[n] = inp->data[n] ^ iv2p->data[n];
163 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)outp->data, key);
164 for(n=0 ; n < N_WORDS; ++n)
165 outp->data[n] ^= ivp->data[n];
166 ivp = inp;
167 iv2p = outp;
168 --len;
169 in += AES_BLOCK_SIZE;
170 out += AES_BLOCK_SIZE;
171 }
172 memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
173 memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
174 }
175 else
176 {
177 aes_block_t tmp, tmp2;
178 aes_block_t iv;
179 aes_block_t iv2;
180
181 load_block(iv, ivec);
182 load_block(iv2, ivec + AES_BLOCK_SIZE);
183
184 while (len)
185 {
186 load_block(tmp, in);
187 tmp2 = tmp;
188 for(n=0 ; n < N_WORDS; ++n)
189 tmp.data[n] ^= iv2.data[n];
190 AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
191 for(n=0 ; n < N_WORDS; ++n)
192 tmp.data[n] ^= iv.data[n];
193 store_block(out, tmp);
194 iv = tmp2;
195 iv2 = tmp;
196 --len;
197 in += AES_BLOCK_SIZE;
198 out += AES_BLOCK_SIZE;
199 }
200 memcpy(ivec, iv.data, AES_BLOCK_SIZE);
201 memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
202 }
203 }
204 }
205
206/*
207 * Note that its effectively impossible to do biIGE in anything other
208 * than a single pass, so no provision is made for chaining.
209 */
210
211/* N.B. The IV for this mode is _four times_ the block size */
212
213void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
214 const unsigned long length, const AES_KEY *key,
215 const AES_KEY *key2, const unsigned char *ivec,
216 const int enc)
217 {
218 unsigned long n;
219 unsigned long len = length;
220 unsigned char tmp[AES_BLOCK_SIZE];
221 unsigned char tmp2[AES_BLOCK_SIZE];
222 unsigned char tmp3[AES_BLOCK_SIZE];
223 unsigned char prev[AES_BLOCK_SIZE];
224 const unsigned char *iv;
225 const unsigned char *iv2;
226
227 OPENSSL_assert(in && out && key && ivec);
228 OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
229 OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
230
231 if (AES_ENCRYPT == enc)
232 {
233 /* XXX: Do a separate case for when in != out (strictly should
234 check for overlap, too) */
235
236 /* First the forward pass */
237 iv = ivec;
238 iv2 = ivec + AES_BLOCK_SIZE;
239 while (len >= AES_BLOCK_SIZE)
240 {
241 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
242 out[n] = in[n] ^ iv[n];
243 AES_encrypt(out, out, key);
244 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
245 out[n] ^= iv2[n];
246 iv = out;
247 memcpy(prev, in, AES_BLOCK_SIZE);
248 iv2 = prev;
249 len -= AES_BLOCK_SIZE;
250 in += AES_BLOCK_SIZE;
251 out += AES_BLOCK_SIZE;
252 }
253
254 /* And now backwards */
255 iv = ivec + AES_BLOCK_SIZE*2;
256 iv2 = ivec + AES_BLOCK_SIZE*3;
257 len = length;
258 while(len >= AES_BLOCK_SIZE)
259 {
260 out -= AES_BLOCK_SIZE;
261 /* XXX: reduce copies by alternating between buffers */
262 memcpy(tmp, out, AES_BLOCK_SIZE);
263 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
264 out[n] ^= iv[n];
265 /* hexdump(stdout, "out ^ iv", out, AES_BLOCK_SIZE); */
266 AES_encrypt(out, out, key);
267 /* hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
268 /* hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
269 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
270 out[n] ^= iv2[n];
271 /* hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
272 iv = out;
273 memcpy(prev, tmp, AES_BLOCK_SIZE);
274 iv2 = prev;
275 len -= AES_BLOCK_SIZE;
276 }
277 }
278 else
279 {
280 /* First backwards */
281 iv = ivec + AES_BLOCK_SIZE*2;
282 iv2 = ivec + AES_BLOCK_SIZE*3;
283 in += length;
284 out += length;
285 while (len >= AES_BLOCK_SIZE)
286 {
287 in -= AES_BLOCK_SIZE;
288 out -= AES_BLOCK_SIZE;
289 memcpy(tmp, in, AES_BLOCK_SIZE);
290 memcpy(tmp2, in, AES_BLOCK_SIZE);
291 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
292 tmp[n] ^= iv2[n];
293 AES_decrypt(tmp, out, key);
294 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
295 out[n] ^= iv[n];
296 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
297 iv = tmp3;
298 iv2 = out;
299 len -= AES_BLOCK_SIZE;
300 }
301
302 /* And now forwards */
303 iv = ivec;
304 iv2 = ivec + AES_BLOCK_SIZE;
305 len = length;
306 while (len >= AES_BLOCK_SIZE)
307 {
308 memcpy(tmp, out, AES_BLOCK_SIZE);
309 memcpy(tmp2, out, AES_BLOCK_SIZE);
310 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
311 tmp[n] ^= iv2[n];
312 AES_decrypt(tmp, out, key);
313 for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
314 out[n] ^= iv[n];
315 memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
316 iv = tmp3;
317 iv2 = out;
318 len -= AES_BLOCK_SIZE;
319 in += AES_BLOCK_SIZE;
320 out += AES_BLOCK_SIZE;
321 }
322 }
323 }
diff --git a/src/lib/libcrypto/aes/aes_locl.h b/src/lib/libcrypto/aes/aes_locl.h
new file mode 100644
index 0000000000..054b442d41
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_locl.h
@@ -0,0 +1,89 @@
1/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#ifndef HEADER_AES_LOCL_H
53#define HEADER_AES_LOCL_H
54
55#include <openssl/e_os2.h>
56
57#ifdef OPENSSL_NO_AES
58#error AES is disabled.
59#endif
60
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64
65#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
66# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
67# define GETU32(p) SWAP(*((u32 *)(p)))
68# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
69#else
70# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
71# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
72#endif
73
74#ifdef AES_LONG
75typedef unsigned long u32;
76#else
77typedef unsigned int u32;
78#endif
79typedef unsigned short u16;
80typedef unsigned char u8;
81
82#define MAXKC (256/32)
83#define MAXKB (256/8)
84#define MAXNR 14
85
86/* This controls loop-unrolling in aes_core.c */
87#undef FULL_UNROLL
88
89#endif /* !HEADER_AES_LOCL_H */
diff --git a/src/lib/libcrypto/aes/aes_misc.c b/src/lib/libcrypto/aes/aes_misc.c
new file mode 100644
index 0000000000..4fead1b4c7
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_misc.c
@@ -0,0 +1,64 @@
1/* crypto/aes/aes_misc.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/aes.h>
54#include "aes_locl.h"
55
56const char AES_version[]="AES" OPENSSL_VERSION_PTEXT;
57
58const char *AES_options(void) {
59#ifdef FULL_UNROLL
60 return "aes(full)";
61#else
62 return "aes(partial)";
63#endif
64}
diff --git a/src/lib/libcrypto/aes/aes_ofb.c b/src/lib/libcrypto/aes/aes_ofb.c
new file mode 100644
index 0000000000..f358bb39e2
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_ofb.c
@@ -0,0 +1,142 @@
1/* crypto/aes/aes_ofb.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
52 * All rights reserved.
53 *
54 * This package is an SSL implementation written
55 * by Eric Young (eay@cryptsoft.com).
56 * The implementation was written so as to conform with Netscapes SSL.
57 *
58 * This library is free for commercial and non-commercial use as long as
59 * the following conditions are aheared to. The following conditions
60 * apply to all code found in this distribution, be it the RC4, RSA,
61 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
62 * included with this distribution is covered by the same copyright terms
63 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
64 *
65 * Copyright remains Eric Young's, and as such any Copyright notices in
66 * the code are not to be removed.
67 * If this package is used in a product, Eric Young should be given attribution
68 * as the author of the parts of the library used.
69 * This can be in the form of a textual message at program startup or
70 * in documentation (online or textual) provided with the package.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * "This product includes cryptographic software written by
83 * Eric Young (eay@cryptsoft.com)"
84 * The word 'cryptographic' can be left out if the rouines from the library
85 * being used are not cryptographic related :-).
86 * 4. If you include any Windows specific code (or a derivative thereof) from
87 * the apps directory (application code) you must include an acknowledgement:
88 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
89 *
90 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
91 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
94 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100 * SUCH DAMAGE.
101 *
102 * The licence and distribution terms for any publically available version or
103 * derivative of this code cannot be changed. i.e. this code cannot simply be
104 * copied and put under another distribution licence
105 * [including the GNU Public Licence.]
106 */
107
108#ifndef AES_DEBUG
109# ifndef NDEBUG
110# define NDEBUG
111# endif
112#endif
113#include <assert.h>
114
115#include <openssl/aes.h>
116#include "aes_locl.h"
117
118/* The input and output encrypted as though 128bit ofb mode is being
119 * used. The extra state information to record how much of the
120 * 128bit block we have used is contained in *num;
121 */
122void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
123 const unsigned long length, const AES_KEY *key,
124 unsigned char *ivec, int *num) {
125
126 unsigned int n;
127 unsigned long l=length;
128
129 assert(in && out && key && ivec && num);
130
131 n = *num;
132
133 while (l--) {
134 if (n == 0) {
135 AES_encrypt(ivec, ivec, key);
136 }
137 *(out++) = *(in++) ^ ivec[n];
138 n = (n+1) % AES_BLOCK_SIZE;
139 }
140
141 *num=n;
142}
diff --git a/src/lib/libcrypto/aes/aes_wrap.c b/src/lib/libcrypto/aes/aes_wrap.c
new file mode 100644
index 0000000000..9feacd65d8
--- /dev/null
+++ b/src/lib/libcrypto/aes/aes_wrap.c
@@ -0,0 +1,259 @@
1/* crypto/aes/aes_wrap.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54#include "cryptlib.h"
55#include <openssl/aes.h>
56#include <openssl/bio.h>
57
58static const unsigned char default_iv[] = {
59 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
60};
61
62int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
63 unsigned char *out,
64 const unsigned char *in, unsigned int inlen)
65 {
66 unsigned char *A, B[16], *R;
67 unsigned int i, j, t;
68 if ((inlen & 0x7) || (inlen < 8))
69 return -1;
70 A = B;
71 t = 1;
72 memcpy(out + 8, in, inlen);
73 if (!iv)
74 iv = default_iv;
75
76 memcpy(A, iv, 8);
77
78 for (j = 0; j < 6; j++)
79 {
80 R = out + 8;
81 for (i = 0; i < inlen; i += 8, t++, R += 8)
82 {
83 memcpy(B + 8, R, 8);
84 AES_encrypt(B, B, key);
85 A[7] ^= (unsigned char)(t & 0xff);
86 if (t > 0xff)
87 {
88 A[6] ^= (unsigned char)((t & 0xff) >> 8);
89 A[5] ^= (unsigned char)((t & 0xff) >> 16);
90 A[4] ^= (unsigned char)((t & 0xff) >> 24);
91 }
92 memcpy(R, B + 8, 8);
93 }
94 }
95 memcpy(out, A, 8);
96 return inlen + 8;
97 }
98
99int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
100 unsigned char *out,
101 const unsigned char *in, unsigned int inlen)
102 {
103 unsigned char *A, B[16], *R;
104 unsigned int i, j, t;
105 inlen -= 8;
106 if (inlen & 0x7)
107 return -1;
108 if (inlen < 8)
109 return -1;
110 A = B;
111 t = 6 * (inlen >> 3);
112 memcpy(A, in, 8);
113 memcpy(out, in + 8, inlen);
114 for (j = 0; j < 6; j++)
115 {
116 R = out + inlen - 8;
117 for (i = 0; i < inlen; i += 8, t--, R -= 8)
118 {
119 A[7] ^= (unsigned char)(t & 0xff);
120 if (t > 0xff)
121 {
122 A[6] ^= (unsigned char)((t & 0xff) >> 8);
123 A[5] ^= (unsigned char)((t & 0xff) >> 16);
124 A[4] ^= (unsigned char)((t & 0xff) >> 24);
125 }
126 memcpy(B + 8, R, 8);
127 AES_decrypt(B, B, key);
128 memcpy(R, B + 8, 8);
129 }
130 }
131 if (!iv)
132 iv = default_iv;
133 if (memcmp(A, iv, 8))
134 {
135 OPENSSL_cleanse(out, inlen);
136 return 0;
137 }
138 return inlen;
139 }
140
141#ifdef AES_WRAP_TEST
142
143int AES_wrap_unwrap_test(const unsigned char *kek, int keybits,
144 const unsigned char *iv,
145 const unsigned char *eout,
146 const unsigned char *key, int keylen)
147 {
148 unsigned char *otmp = NULL, *ptmp = NULL;
149 int r, ret = 0;
150 AES_KEY wctx;
151 otmp = OPENSSL_malloc(keylen + 8);
152 ptmp = OPENSSL_malloc(keylen);
153 if (!otmp || !ptmp)
154 return 0;
155 if (AES_set_encrypt_key(kek, keybits, &wctx))
156 goto err;
157 r = AES_wrap_key(&wctx, iv, otmp, key, keylen);
158 if (r <= 0)
159 goto err;
160
161 if (eout && memcmp(eout, otmp, keylen))
162 goto err;
163
164 if (AES_set_decrypt_key(kek, keybits, &wctx))
165 goto err;
166 r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r);
167
168 if (memcmp(key, ptmp, keylen))
169 goto err;
170
171 ret = 1;
172
173 err:
174 if (otmp)
175 OPENSSL_free(otmp);
176 if (ptmp)
177 OPENSSL_free(ptmp);
178
179 return ret;
180
181 }
182
183
184
185int main(int argc, char **argv)
186{
187
188static const unsigned char kek[] = {
189 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
190 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
191 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
192 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
193};
194
195static const unsigned char key[] = {
196 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
197 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
199 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
200};
201
202static const unsigned char e1[] = {
203 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47,
204 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82,
205 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5
206};
207
208static const unsigned char e2[] = {
209 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35,
210 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2,
211 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d
212};
213
214static const unsigned char e3[] = {
215 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2,
216 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a,
217 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7
218};
219
220static const unsigned char e4[] = {
221 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32,
222 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc,
223 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93,
224 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2
225};
226
227static const unsigned char e5[] = {
228 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f,
229 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4,
230 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95,
231 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1
232};
233
234static const unsigned char e6[] = {
235 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4,
236 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26,
237 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26,
238 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b,
239 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21
240};
241
242 AES_KEY wctx, xctx;
243 int ret;
244 ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16);
245 fprintf(stderr, "Key test result %d\n", ret);
246 ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16);
247 fprintf(stderr, "Key test result %d\n", ret);
248 ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16);
249 fprintf(stderr, "Key test result %d\n", ret);
250 ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24);
251 fprintf(stderr, "Key test result %d\n", ret);
252 ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24);
253 fprintf(stderr, "Key test result %d\n", ret);
254 ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32);
255 fprintf(stderr, "Key test result %d\n", ret);
256}
257
258
259#endif
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
new file mode 100644
index 0000000000..3da307bef9
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -0,0 +1,1532 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 3.6.
10#
11# You might fail to appreciate this module performance from the first
12# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered
13# to be *the* best Intel C compiler without -KPIC, performance appears
14# to be virtually identical... But try to re-configure with shared
15# library support... Aha! Intel compiler "suddenly" lags behind by 30%
16# [on P4, more on others]:-) And if compared to position-independent
17# code generated by GNU C, this code performs *more* than *twice* as
18# fast! Yes, all this buzz about PIC means that unlike other hand-
19# coded implementations, this one was explicitly designed to be safe
20# to use even in shared library context... This also means that this
21# code isn't necessarily absolutely fastest "ever," because in order
22# to achieve position independence an extra register has to be
23# off-loaded to stack, which affects the benchmark result.
24#
25# Special note about instruction choice. Do you recall RC4_INT code
26# performing poorly on P4? It might be the time to figure out why.
27# RC4_INT code implies effective address calculations in base+offset*4
28# form. Trouble is that it seems that offset scaling turned to be
29# critical path... At least eliminating scaling resulted in 2.8x RC4
30# performance improvement [as you might recall]. As AES code is hungry
31# for scaling too, I [try to] avoid the latter by favoring off-by-2
32# shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF.
33#
34# As was shown by Dean Gaudet <dean@arctic.org>, the above note turned
35# void. Performance improvement with off-by-2 shifts was observed on
36# intermediate implementation, which was spilling yet another register
37# to stack... Final offset*4 code below runs just a tad faster on P4,
38# but exhibits up to 10% improvement on other cores.
39#
40# Second version is "monolithic" replacement for aes_core.c, which in
41# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key.
42# This made it possible to implement little-endian variant of the
43# algorithm without modifying the base C code. Motivating factor for
44# the undertaken effort was that it appeared that in tight IA-32
45# register window little-endian flavor could achieve slightly higher
46# Instruction Level Parallelism, and it indeed resulted in up to 15%
47# better performance on most recent µ-archs...
48#
49# Third version adds AES_cbc_encrypt implementation, which resulted in
50# up to 40% performance imrovement of CBC benchmark results. 40% was
51# observed on P4 core, where "overall" imrovement coefficient, i.e. if
52# compared to PIC generated by GCC and in CBC mode, was observed to be
53# as large as 4x:-) CBC performance is virtually identical to ECB now
54# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
55# Opteron, because certain function prologues and epilogues are
56# effectively taken out of the loop...
57#
58# Version 3.2 implements compressed tables and prefetch of these tables
59# in CBC[!] mode. Former means that 3/4 of table references are now
60# misaligned, which unfortunately has negative impact on elder IA-32
61# implementations, Pentium suffered 30% penalty, PIII - 10%.
62#
63# Version 3.3 avoids L1 cache aliasing between stack frame and
64# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The
65# latter is achieved by copying the key schedule to controlled place in
66# stack. This unfortunately has rather strong impact on small block CBC
67# performance, ~2x deterioration on 16-byte block if compared to 3.3.
68#
69# Version 3.5 checks if there is L1 cache aliasing between user-supplied
70# key schedule and S-boxes and abstains from copying the former if
71# there is no. This allows end-user to consciously retain small block
72# performance by aligning key schedule in specific manner.
73#
74# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB.
75#
76# Current ECB performance numbers for 128-bit key in CPU cycles per
77# processed byte [measure commonly used by AES benchmarkers] are:
78#
79# small footprint fully unrolled
80# P4 24 22
81# AMD K8 20 19
82# PIII 25 23
83# Pentium 81 78
84
85push(@INC,"perlasm","../../perlasm");
86require "x86asm.pl";
87
88&asm_init($ARGV[0],"aes-586.pl",$ARGV[$#ARGV] eq "386");
89
90$s0="eax";
91$s1="ebx";
92$s2="ecx";
93$s3="edx";
94$key="edi";
95$acc="esi";
96
97$compromise=0; # $compromise=128 abstains from copying key
98 # schedule to stack when encrypting inputs
99 # shorter than 128 bytes at the cost of
100 # risksing aliasing with S-boxes. In return
101 # you get way better, up to +70%, small block
102 # performance.
103$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
104 # recent µ-archs], but ~5 times smaller!
105 # I favor compact code to minimize cache
106 # contention and in hope to "collect" 5% back
107 # in real-life applications...
108$vertical_spin=0; # shift "verticaly" defaults to 0, because of
109 # its proof-of-concept status...
110
111# Note that there is no decvert(), as well as last encryption round is
112# performed with "horizontal" shifts. This is because this "vertical"
113# implementation [one which groups shifts on a given $s[i] to form a
114# "column," unlike "horizontal" one, which groups shifts on different
115# $s[i] to form a "row"] is work in progress. It was observed to run
116# few percents faster on Intel cores, but not AMD. On AMD K8 core it's
117# whole 12% slower:-( So we face a trade-off... Shall it be resolved
118# some day? Till then the code is considered experimental and by
119# default remains dormant...
120
121sub encvert()
122{ my ($te,@s) = @_;
123 my $v0 = $acc, $v1 = $key;
124
125 &mov ($v0,$s[3]); # copy s3
126 &mov (&DWP(4,"esp"),$s[2]); # save s2
127 &mov ($v1,$s[0]); # copy s0
128 &mov (&DWP(8,"esp"),$s[1]); # save s1
129
130 &movz ($s[2],&HB($s[0]));
131 &and ($s[0],0xFF);
132 &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
133 &shr ($v1,16);
134 &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
135 &movz ($s[1],&HB($v1));
136 &and ($v1,0xFF);
137 &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
138 &mov ($v1,$v0);
139 &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
140
141 &and ($v0,0xFF);
142 &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
143 &movz ($v0,&HB($v1));
144 &shr ($v1,16);
145 &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
146 &movz ($v0,&HB($v1));
147 &and ($v1,0xFF);
148 &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
149 &mov ($v1,&DWP(4,"esp")); # restore s2
150 &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
151
152 &mov ($v0,$v1);
153 &and ($v1,0xFF);
154 &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
155 &movz ($v1,&HB($v0));
156 &shr ($v0,16);
157 &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
158 &movz ($v1,&HB($v0));
159 &and ($v0,0xFF);
160 &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
161 &mov ($v0,&DWP(8,"esp")); # restore s1
162 &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
163
164 &mov ($v1,$v0);
165 &and ($v0,0xFF);
166 &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
167 &movz ($v0,&HB($v1));
168 &shr ($v1,16);
169 &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
170 &movz ($v0,&HB($v1));
171 &and ($v1,0xFF);
172 &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
173 &mov ($key,&DWP(12,"esp")); # reincarnate v1 as key
174 &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
175}
176
177sub encstep()
178{ my ($i,$te,@s) = @_;
179 my $tmp = $key;
180 my $out = $i==3?$s[0]:$acc;
181
182 # lines marked with #%e?x[i] denote "reordered" instructions...
183 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
184 else { &mov ($out,$s[0]);
185 &and ($out,0xFF); }
186 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
187 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
188 &mov ($out,&DWP(0,$te,$out,8));
189
190 if ($i==3) { $tmp=$s[1]; }##%eax
191 &movz ($tmp,&HB($s[1]));
192 &xor ($out,&DWP(3,$te,$tmp,8));
193
194 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
195 else { &mov ($tmp,$s[2]);
196 &shr ($tmp,16); }
197 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
198 &and ($tmp,0xFF);
199 &xor ($out,&DWP(2,$te,$tmp,8));
200
201 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
202 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
203 else { &mov ($tmp,$s[3]);
204 &shr ($tmp,24) }
205 &xor ($out,&DWP(1,$te,$tmp,8));
206 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
207 if ($i==3) { &mov ($s[3],$acc); }
208 &comment();
209}
210
211sub enclast()
212{ my ($i,$te,@s)=@_;
213 my $tmp = $key;
214 my $out = $i==3?$s[0]:$acc;
215
216 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
217 else { &mov ($out,$s[0]); }
218 &and ($out,0xFF);
219 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
220 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
221 &mov ($out,&DWP(2,$te,$out,8));
222 &and ($out,0x000000ff);
223
224 if ($i==3) { $tmp=$s[1]; }##%eax
225 &movz ($tmp,&HB($s[1]));
226 &mov ($tmp,&DWP(0,$te,$tmp,8));
227 &and ($tmp,0x0000ff00);
228 &xor ($out,$tmp);
229
230 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
231 else { mov ($tmp,$s[2]);
232 &shr ($tmp,16); }
233 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
234 &and ($tmp,0xFF);
235 &mov ($tmp,&DWP(0,$te,$tmp,8));
236 &and ($tmp,0x00ff0000);
237 &xor ($out,$tmp);
238
239 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
240 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
241 else { &mov ($tmp,$s[3]);
242 &shr ($tmp,24); }
243 &mov ($tmp,&DWP(2,$te,$tmp,8));
244 &and ($tmp,0xff000000);
245 &xor ($out,$tmp);
246 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
247 if ($i==3) { &mov ($s[3],$acc); }
248}
249
250sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
251
252&public_label("AES_Te");
253&function_begin_C("_x86_AES_encrypt");
254 if ($vertical_spin) {
255 # I need high parts of volatile registers to be accessible...
256 &exch ($s1="edi",$key="ebx");
257 &mov ($s2="esi",$acc="ecx");
258 }
259
260 # note that caller is expected to allocate stack frame for me!
261 &mov (&DWP(12,"esp"),$key); # save key
262
263 &xor ($s0,&DWP(0,$key)); # xor with key
264 &xor ($s1,&DWP(4,$key));
265 &xor ($s2,&DWP(8,$key));
266 &xor ($s3,&DWP(12,$key));
267
268 &mov ($acc,&DWP(240,$key)); # load key->rounds
269
270 if ($small_footprint) {
271 &lea ($acc,&DWP(-2,$acc,$acc));
272 &lea ($acc,&DWP(0,$key,$acc,8));
273 &mov (&DWP(16,"esp"),$acc); # end of key schedule
274 &align (4);
275 &set_label("loop");
276 if ($vertical_spin) {
277 &encvert("ebp",$s0,$s1,$s2,$s3);
278 } else {
279 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
280 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
281 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
282 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
283 }
284 &add ($key,16); # advance rd_key
285 &xor ($s0,&DWP(0,$key));
286 &xor ($s1,&DWP(4,$key));
287 &xor ($s2,&DWP(8,$key));
288 &xor ($s3,&DWP(12,$key));
289 &cmp ($key,&DWP(16,"esp"));
290 &mov (&DWP(12,"esp"),$key);
291 &jb (&label("loop"));
292 }
293 else {
294 &cmp ($acc,10);
295 &jle (&label("10rounds"));
296 &cmp ($acc,12);
297 &jle (&label("12rounds"));
298
299 &set_label("14rounds");
300 for ($i=1;$i<3;$i++) {
301 if ($vertical_spin) {
302 &encvert("ebp",$s0,$s1,$s2,$s3);
303 } else {
304 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
305 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
306 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
307 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
308 }
309 &xor ($s0,&DWP(16*$i+0,$key));
310 &xor ($s1,&DWP(16*$i+4,$key));
311 &xor ($s2,&DWP(16*$i+8,$key));
312 &xor ($s3,&DWP(16*$i+12,$key));
313 }
314 &add ($key,32);
315 &mov (&DWP(12,"esp"),$key); # advance rd_key
316 &set_label("12rounds");
317 for ($i=1;$i<3;$i++) {
318 if ($vertical_spin) {
319 &encvert("ebp",$s0,$s1,$s2,$s3);
320 } else {
321 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
322 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
323 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
324 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
325 }
326 &xor ($s0,&DWP(16*$i+0,$key));
327 &xor ($s1,&DWP(16*$i+4,$key));
328 &xor ($s2,&DWP(16*$i+8,$key));
329 &xor ($s3,&DWP(16*$i+12,$key));
330 }
331 &add ($key,32);
332 &mov (&DWP(12,"esp"),$key); # advance rd_key
333 &set_label("10rounds");
334 for ($i=1;$i<10;$i++) {
335 if ($vertical_spin) {
336 &encvert("ebp",$s0,$s1,$s2,$s3);
337 } else {
338 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
339 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
340 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
341 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
342 }
343 &xor ($s0,&DWP(16*$i+0,$key));
344 &xor ($s1,&DWP(16*$i+4,$key));
345 &xor ($s2,&DWP(16*$i+8,$key));
346 &xor ($s3,&DWP(16*$i+12,$key));
347 }
348 }
349
350 if ($vertical_spin) {
351 # "reincarnate" some registers for "horizontal" spin...
352 &mov ($s1="ebx",$key="edi");
353 &mov ($s2="ecx",$acc="esi");
354 }
355 &enclast(0,"ebp",$s0,$s1,$s2,$s3);
356 &enclast(1,"ebp",$s1,$s2,$s3,$s0);
357 &enclast(2,"ebp",$s2,$s3,$s0,$s1);
358 &enclast(3,"ebp",$s3,$s0,$s1,$s2);
359
360 &add ($key,$small_footprint?16:160);
361 &xor ($s0,&DWP(0,$key));
362 &xor ($s1,&DWP(4,$key));
363 &xor ($s2,&DWP(8,$key));
364 &xor ($s3,&DWP(12,$key));
365
366 &ret ();
367
368&set_label("AES_Te",64); # Yes! I keep it in the code segment!
369 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
370 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
371 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
372 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
373 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
374 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
375 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
376 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
377 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
378 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
379 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
380 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
381 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
382 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
383 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
384 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
385 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
386 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
387 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
388 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
389 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
390 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
391 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
392 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
393 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
394 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
395 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
396 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
397 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
398 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
399 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
400 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
401 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
402 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
403 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
404 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
405 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
406 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
407 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
408 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
409 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
410 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
411 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
412 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
413 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
414 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
415 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
416 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
417 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
418 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
419 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
420 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
421 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
422 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
423 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
424 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
425 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
426 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
427 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
428 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
429 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
430 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
431 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
432 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
433#rcon:
434 &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008);
435 &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
436 &data_word(0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0);
437&function_end_B("_x86_AES_encrypt");
438
439# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
440&public_label("AES_Te");
441&function_begin("AES_encrypt");
442 &mov ($acc,&wparam(0)); # load inp
443 &mov ($key,&wparam(2)); # load key
444
445 &mov ($s0,"esp");
446 &sub ("esp",24);
447 &and ("esp",-64);
448 &add ("esp",4);
449 &mov (&DWP(16,"esp"),$s0);
450
451 &call (&label("pic_point")); # make it PIC!
452 &set_label("pic_point");
453 &blindpop("ebp");
454 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
455
456 &mov ($s0,&DWP(0,$acc)); # load input data
457 &mov ($s1,&DWP(4,$acc));
458 &mov ($s2,&DWP(8,$acc));
459 &mov ($s3,&DWP(12,$acc));
460
461 &call ("_x86_AES_encrypt");
462
463 &mov ("esp",&DWP(16,"esp"));
464
465 &mov ($acc,&wparam(1)); # load out
466 &mov (&DWP(0,$acc),$s0); # write output data
467 &mov (&DWP(4,$acc),$s1);
468 &mov (&DWP(8,$acc),$s2);
469 &mov (&DWP(12,$acc),$s3);
470&function_end("AES_encrypt");
471
472#------------------------------------------------------------------#
473
474sub decstep()
475{ my ($i,$td,@s) = @_;
476 my $tmp = $key;
477 my $out = $i==3?$s[0]:$acc;
478
479 # no instructions are reordered, as performance appears
480 # optimal... or rather that all attempts to reorder didn't
481 # result in better performance [which by the way is not a
482 # bit lower than ecryption].
483 if($i==3) { &mov ($key,&DWP(12,"esp")); }
484 else { &mov ($out,$s[0]); }
485 &and ($out,0xFF);
486 &mov ($out,&DWP(0,$td,$out,8));
487
488 if ($i==3) { $tmp=$s[1]; }
489 &movz ($tmp,&HB($s[1]));
490 &xor ($out,&DWP(3,$td,$tmp,8));
491
492 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
493 else { &mov ($tmp,$s[2]); }
494 &shr ($tmp,16);
495 &and ($tmp,0xFF);
496 &xor ($out,&DWP(2,$td,$tmp,8));
497
498 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
499 else { &mov ($tmp,$s[3]); }
500 &shr ($tmp,24);
501 &xor ($out,&DWP(1,$td,$tmp,8));
502 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
503 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
504 &comment();
505}
506
507sub declast()
508{ my ($i,$td,@s)=@_;
509 my $tmp = $key;
510 my $out = $i==3?$s[0]:$acc;
511
512 if($i==3) { &mov ($key,&DWP(12,"esp")); }
513 else { &mov ($out,$s[0]); }
514 &and ($out,0xFF);
515 &movz ($out,&BP(2048,$td,$out,1));
516
517 if ($i==3) { $tmp=$s[1]; }
518 &movz ($tmp,&HB($s[1]));
519 &movz ($tmp,&BP(2048,$td,$tmp,1));
520 &shl ($tmp,8);
521 &xor ($out,$tmp);
522
523 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
524 else { mov ($tmp,$s[2]); }
525 &shr ($tmp,16);
526 &and ($tmp,0xFF);
527 &movz ($tmp,&BP(2048,$td,$tmp,1));
528 &shl ($tmp,16);
529 &xor ($out,$tmp);
530
531 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
532 else { &mov ($tmp,$s[3]); }
533 &shr ($tmp,24);
534 &movz ($tmp,&BP(2048,$td,$tmp,1));
535 &shl ($tmp,24);
536 &xor ($out,$tmp);
537 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
538 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
539}
540
541&public_label("AES_Td");
542&function_begin_C("_x86_AES_decrypt");
543 # note that caller is expected to allocate stack frame for me!
544 &mov (&DWP(12,"esp"),$key); # save key
545
546 &xor ($s0,&DWP(0,$key)); # xor with key
547 &xor ($s1,&DWP(4,$key));
548 &xor ($s2,&DWP(8,$key));
549 &xor ($s3,&DWP(12,$key));
550
551 &mov ($acc,&DWP(240,$key)); # load key->rounds
552
553 if ($small_footprint) {
554 &lea ($acc,&DWP(-2,$acc,$acc));
555 &lea ($acc,&DWP(0,$key,$acc,8));
556 &mov (&DWP(16,"esp"),$acc); # end of key schedule
557 &align (4);
558 &set_label("loop");
559 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
560 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
561 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
562 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
563 &add ($key,16); # advance rd_key
564 &xor ($s0,&DWP(0,$key));
565 &xor ($s1,&DWP(4,$key));
566 &xor ($s2,&DWP(8,$key));
567 &xor ($s3,&DWP(12,$key));
568 &cmp ($key,&DWP(16,"esp"));
569 &mov (&DWP(12,"esp"),$key);
570 &jb (&label("loop"));
571 }
572 else {
573 &cmp ($acc,10);
574 &jle (&label("10rounds"));
575 &cmp ($acc,12);
576 &jle (&label("12rounds"));
577
578 &set_label("14rounds");
579 for ($i=1;$i<3;$i++) {
580 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
581 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
582 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
583 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
584 &xor ($s0,&DWP(16*$i+0,$key));
585 &xor ($s1,&DWP(16*$i+4,$key));
586 &xor ($s2,&DWP(16*$i+8,$key));
587 &xor ($s3,&DWP(16*$i+12,$key));
588 }
589 &add ($key,32);
590 &mov (&DWP(12,"esp"),$key); # advance rd_key
591 &set_label("12rounds");
592 for ($i=1;$i<3;$i++) {
593 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
594 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
595 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
596 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
597 &xor ($s0,&DWP(16*$i+0,$key));
598 &xor ($s1,&DWP(16*$i+4,$key));
599 &xor ($s2,&DWP(16*$i+8,$key));
600 &xor ($s3,&DWP(16*$i+12,$key));
601 }
602 &add ($key,32);
603 &mov (&DWP(12,"esp"),$key); # advance rd_key
604 &set_label("10rounds");
605 for ($i=1;$i<10;$i++) {
606 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
607 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
608 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
609 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
610 &xor ($s0,&DWP(16*$i+0,$key));
611 &xor ($s1,&DWP(16*$i+4,$key));
612 &xor ($s2,&DWP(16*$i+8,$key));
613 &xor ($s3,&DWP(16*$i+12,$key));
614 }
615 }
616
617 &declast(0,"ebp",$s0,$s3,$s2,$s1);
618 &declast(1,"ebp",$s1,$s0,$s3,$s2);
619 &declast(2,"ebp",$s2,$s1,$s0,$s3);
620 &declast(3,"ebp",$s3,$s2,$s1,$s0);
621
622 &add ($key,$small_footprint?16:160);
623 &xor ($s0,&DWP(0,$key));
624 &xor ($s1,&DWP(4,$key));
625 &xor ($s2,&DWP(8,$key));
626 &xor ($s3,&DWP(12,$key));
627
628 &ret ();
629
630&set_label("AES_Td",64); # Yes! I keep it in the code segment!
631 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
632 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
633 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
634 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
635 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
636 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
637 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
638 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
639 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
640 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
641 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
642 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
643 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
644 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
645 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
646 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
647 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
648 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
649 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
650 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
651 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
652 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
653 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
654 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
655 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
656 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
657 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
658 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
659 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
660 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
661 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
662 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
663 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
664 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
665 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
666 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
667 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
668 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
669 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
670 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
671 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
672 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
673 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
674 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
675 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
676 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
677 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
678 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
679 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
680 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
681 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
682 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
683 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
684 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
685 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
686 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
687 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
688 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
689 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
690 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
691 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
692 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
693 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
694 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
695#Td4:
696 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
697 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
698 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
699 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
700 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
701 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
702 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
703 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
704 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
705 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
706 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
707 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
708 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
709 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
710 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
711 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
712 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
713 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
714 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
715 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
716 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
717 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
718 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
719 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
720 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
721 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
722 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
723 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
724 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
725 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
726 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
727 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
728&function_end_B("_x86_AES_decrypt");
729
730# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
731&public_label("AES_Td");
732&function_begin("AES_decrypt");
733 &mov ($acc,&wparam(0)); # load inp
734 &mov ($key,&wparam(2)); # load key
735
736 &mov ($s0,"esp");
737 &sub ("esp",24);
738 &and ("esp",-64);
739 &add ("esp",4);
740 &mov (&DWP(16,"esp"),$s0);
741
742 &call (&label("pic_point")); # make it PIC!
743 &set_label("pic_point");
744 &blindpop("ebp");
745 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
746
747 # prefetch Td4
748 &lea ("ebp",&DWP(2048+128,"ebp"));
749 &mov ($s0,&DWP(0-128,"ebp"));
750 &mov ($s1,&DWP(32-128,"ebp"));
751 &mov ($s2,&DWP(64-128,"ebp"));
752 &mov ($s3,&DWP(96-128,"ebp"));
753 &mov ($s0,&DWP(128-128,"ebp"));
754 &mov ($s1,&DWP(160-128,"ebp"));
755 &mov ($s2,&DWP(192-128,"ebp"));
756 &mov ($s3,&DWP(224-128,"ebp"));
757 &lea ("ebp",&DWP(-2048-128,"ebp"));
758
759 &mov ($s0,&DWP(0,$acc)); # load input data
760 &mov ($s1,&DWP(4,$acc));
761 &mov ($s2,&DWP(8,$acc));
762 &mov ($s3,&DWP(12,$acc));
763
764 &call ("_x86_AES_decrypt");
765
766 &mov ("esp",&DWP(16,"esp"));
767
768 &mov ($acc,&wparam(1)); # load out
769 &mov (&DWP(0,$acc),$s0); # write output data
770 &mov (&DWP(4,$acc),$s1);
771 &mov (&DWP(8,$acc),$s2);
772 &mov (&DWP(12,$acc),$s3);
773&function_end("AES_decrypt");
774
775# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
776# size_t length, const AES_KEY *key,
777# unsigned char *ivp,const int enc);
778{
779# stack frame layout
780# -4(%esp) 0(%esp) return address
781# 0(%esp) 4(%esp) tmp1
782# 4(%esp) 8(%esp) tmp2
783# 8(%esp) 12(%esp) key
784# 12(%esp) 16(%esp) end of key schedule
785my $_esp=&DWP(16,"esp"); #saved %esp
786my $_inp=&DWP(20,"esp"); #copy of wparam(0)
787my $_out=&DWP(24,"esp"); #copy of wparam(1)
788my $_len=&DWP(28,"esp"); #copy of wparam(2)
789my $_key=&DWP(32,"esp"); #copy of wparam(3)
790my $_ivp=&DWP(36,"esp"); #copy of wparam(4)
791my $_tmp=&DWP(40,"esp"); #volatile variable
792my $ivec=&DWP(44,"esp"); #ivec[16]
793my $aes_key=&DWP(60,"esp"); #copy of aes_key
794my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds
795
796&public_label("AES_Te");
797&public_label("AES_Td");
798&function_begin("AES_cbc_encrypt");
799 &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
800 &cmp ($s2,0);
801 &je (&label("enc_out"));
802
803 &call (&label("pic_point")); # make it PIC!
804 &set_label("pic_point");
805 &blindpop("ebp");
806
807 &pushf ();
808 &cld ();
809
810 &cmp (&wparam(5),0);
811 &je (&label("DECRYPT"));
812
813 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
814
815 # allocate aligned stack frame...
816 &lea ($key,&DWP(-64-244,"esp"));
817 &and ($key,-64);
818
819 # ... and make sure it doesn't alias with AES_Te modulo 4096
820 &mov ($s0,"ebp");
821 &lea ($s1,&DWP(2048,"ebp"));
822 &mov ($s3,$key);
823 &and ($s0,0xfff); # s = %ebp&0xfff
824 &and ($s1,0xfff); # e = (%ebp+2048)&0xfff
825 &and ($s3,0xfff); # p = %esp&0xfff
826
827 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
828 &jb (&label("te_break_out"));
829 &sub ($s3,$s1);
830 &sub ($key,$s3);
831 &jmp (&label("te_ok"));
832 &set_label("te_break_out"); # else %esp -= (p-s)&0xfff + framesz;
833 &sub ($s3,$s0);
834 &and ($s3,0xfff);
835 &add ($s3,64+256);
836 &sub ($key,$s3);
837 &align (4);
838 &set_label("te_ok");
839
840 &mov ($s0,&wparam(0)); # load inp
841 &mov ($s1,&wparam(1)); # load out
842 &mov ($s3,&wparam(3)); # load key
843 &mov ($acc,&wparam(4)); # load ivp
844
845 &exch ("esp",$key);
846 &add ("esp",4); # reserve for return address!
847 &mov ($_esp,$key); # save %esp
848
849 &mov ($_inp,$s0); # save copy of inp
850 &mov ($_out,$s1); # save copy of out
851 &mov ($_len,$s2); # save copy of len
852 &mov ($_key,$s3); # save copy of key
853 &mov ($_ivp,$acc); # save copy of ivp
854
855 &mov ($mark,0); # copy of aes_key->rounds = 0;
856 if ($compromise) {
857 &cmp ($s2,$compromise);
858 &jb (&label("skip_ecopy"));
859 }
860 # do we copy key schedule to stack?
861 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
862 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
863 &sub ($s1,"ebp");
864 &mov ("esi",$s3);
865 &and ($s1,0xfff);
866 &lea ("edi",$aes_key);
867 &cmp ($s1,2048);
868 &jb (&label("do_ecopy"));
869 &cmp ($s1,4096-244);
870 &jb (&label("skip_ecopy"));
871 &align (4);
872 &set_label("do_ecopy");
873 &mov ($_key,"edi");
874 &data_word(0xA5F3F689); # rep movsd
875 &set_label("skip_ecopy");
876
877 &mov ($acc,$s0);
878 &mov ($key,16);
879 &align (4);
880 &set_label("prefetch_te");
881 &mov ($s0,&DWP(0,"ebp"));
882 &mov ($s1,&DWP(32,"ebp"));
883 &mov ($s2,&DWP(64,"ebp"));
884 &mov ($s3,&DWP(96,"ebp"));
885 &lea ("ebp",&DWP(128,"ebp"));
886 &dec ($key);
887 &jnz (&label("prefetch_te"));
888 &sub ("ebp",2048);
889
890 &mov ($s2,$_len);
891 &mov ($key,$_ivp);
892 &test ($s2,0xFFFFFFF0);
893 &jz (&label("enc_tail")); # short input...
894
895 &mov ($s0,&DWP(0,$key)); # load iv
896 &mov ($s1,&DWP(4,$key));
897
898 &align (4);
899 &set_label("enc_loop");
900 &mov ($s2,&DWP(8,$key));
901 &mov ($s3,&DWP(12,$key));
902
903 &xor ($s0,&DWP(0,$acc)); # xor input data
904 &xor ($s1,&DWP(4,$acc));
905 &xor ($s2,&DWP(8,$acc));
906 &xor ($s3,&DWP(12,$acc));
907
908 &mov ($key,$_key); # load key
909 &call ("_x86_AES_encrypt");
910
911 &mov ($acc,$_inp); # load inp
912 &mov ($key,$_out); # load out
913
914 &mov (&DWP(0,$key),$s0); # save output data
915 &mov (&DWP(4,$key),$s1);
916 &mov (&DWP(8,$key),$s2);
917 &mov (&DWP(12,$key),$s3);
918
919 &mov ($s2,$_len); # load len
920
921 &lea ($acc,&DWP(16,$acc));
922 &mov ($_inp,$acc); # save inp
923
924 &lea ($s3,&DWP(16,$key));
925 &mov ($_out,$s3); # save out
926
927 &sub ($s2,16);
928 &test ($s2,0xFFFFFFF0);
929 &mov ($_len,$s2); # save len
930 &jnz (&label("enc_loop"));
931 &test ($s2,15);
932 &jnz (&label("enc_tail"));
933 &mov ($acc,$_ivp); # load ivp
934 &mov ($s2,&DWP(8,$key)); # restore last dwords
935 &mov ($s3,&DWP(12,$key));
936 &mov (&DWP(0,$acc),$s0); # save ivec
937 &mov (&DWP(4,$acc),$s1);
938 &mov (&DWP(8,$acc),$s2);
939 &mov (&DWP(12,$acc),$s3);
940
941 &cmp ($mark,0); # was the key schedule copied?
942 &mov ("edi",$_key);
943 &je (&label("skip_ezero"));
944 # zero copy of key schedule
945 &mov ("ecx",240/4);
946 &xor ("eax","eax");
947 &align (4);
948 &data_word(0xABF3F689); # rep stosd
949 &set_label("skip_ezero")
950 &mov ("esp",$_esp);
951 &popf ();
952 &set_label("enc_out");
953 &function_end_A();
954 &pushf (); # kludge, never executed
955
956 &align (4);
957 &set_label("enc_tail");
958 &push ($key eq "edi" ? $key : ""); # push ivp
959 &mov ($key,$_out); # load out
960 &mov ($s1,16);
961 &sub ($s1,$s2);
962 &cmp ($key,$acc); # compare with inp
963 &je (&label("enc_in_place"));
964 &align (4);
965 &data_word(0xA4F3F689); # rep movsb # copy input
966 &jmp (&label("enc_skip_in_place"));
967 &set_label("enc_in_place");
968 &lea ($key,&DWP(0,$key,$s2));
969 &set_label("enc_skip_in_place");
970 &mov ($s2,$s1);
971 &xor ($s0,$s0);
972 &align (4);
973 &data_word(0xAAF3F689); # rep stosb # zero tail
974 &pop ($key); # pop ivp
975
976 &mov ($acc,$_out); # output as input
977 &mov ($s0,&DWP(0,$key));
978 &mov ($s1,&DWP(4,$key));
979 &mov ($_len,16); # len=16
980 &jmp (&label("enc_loop")); # one more spin...
981
982#----------------------------- DECRYPT -----------------------------#
983&align (4);
984&set_label("DECRYPT");
985 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
986
987 # allocate aligned stack frame...
988 &lea ($key,&DWP(-64-244,"esp"));
989 &and ($key,-64);
990
991 # ... and make sure it doesn't alias with AES_Td modulo 4096
992 &mov ($s0,"ebp");
993 &lea ($s1,&DWP(2048+256,"ebp"));
994 &mov ($s3,$key);
995 &and ($s0,0xfff); # s = %ebp&0xfff
996 &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff
997 &and ($s3,0xfff); # p = %esp&0xfff
998
999 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
1000 &jb (&label("td_break_out"));
1001 &sub ($s3,$s1);
1002 &sub ($key,$s3);
1003 &jmp (&label("td_ok"));
1004 &set_label("td_break_out"); # else %esp -= (p-s)&0xfff + framesz;
1005 &sub ($s3,$s0);
1006 &and ($s3,0xfff);
1007 &add ($s3,64+256);
1008 &sub ($key,$s3);
1009 &align (4);
1010 &set_label("td_ok");
1011
1012 &mov ($s0,&wparam(0)); # load inp
1013 &mov ($s1,&wparam(1)); # load out
1014 &mov ($s3,&wparam(3)); # load key
1015 &mov ($acc,&wparam(4)); # load ivp
1016
1017 &exch ("esp",$key);
1018 &add ("esp",4); # reserve for return address!
1019 &mov ($_esp,$key); # save %esp
1020
1021 &mov ($_inp,$s0); # save copy of inp
1022 &mov ($_out,$s1); # save copy of out
1023 &mov ($_len,$s2); # save copy of len
1024 &mov ($_key,$s3); # save copy of key
1025 &mov ($_ivp,$acc); # save copy of ivp
1026
1027 &mov ($mark,0); # copy of aes_key->rounds = 0;
1028 if ($compromise) {
1029 &cmp ($s2,$compromise);
1030 &jb (&label("skip_dcopy"));
1031 }
1032 # do we copy key schedule to stack?
1033 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
1034 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
1035 &sub ($s1,"ebp");
1036 &mov ("esi",$s3);
1037 &and ($s1,0xfff);
1038 &lea ("edi",$aes_key);
1039 &cmp ($s1,2048+256);
1040 &jb (&label("do_dcopy"));
1041 &cmp ($s1,4096-244);
1042 &jb (&label("skip_dcopy"));
1043 &align (4);
1044 &set_label("do_dcopy");
1045 &mov ($_key,"edi");
1046 &data_word(0xA5F3F689); # rep movsd
1047 &set_label("skip_dcopy");
1048
1049 &mov ($acc,$s0);
1050 &mov ($key,18);
1051 &align (4);
1052 &set_label("prefetch_td");
1053 &mov ($s0,&DWP(0,"ebp"));
1054 &mov ($s1,&DWP(32,"ebp"));
1055 &mov ($s2,&DWP(64,"ebp"));
1056 &mov ($s3,&DWP(96,"ebp"));
1057 &lea ("ebp",&DWP(128,"ebp"));
1058 &dec ($key);
1059 &jnz (&label("prefetch_td"));
1060 &sub ("ebp",2048+256);
1061
1062 &cmp ($acc,$_out);
1063 &je (&label("dec_in_place")); # in-place processing...
1064
1065 &mov ($key,$_ivp); # load ivp
1066 &mov ($_tmp,$key);
1067
1068 &align (4);
1069 &set_label("dec_loop");
1070 &mov ($s0,&DWP(0,$acc)); # read input
1071 &mov ($s1,&DWP(4,$acc));
1072 &mov ($s2,&DWP(8,$acc));
1073 &mov ($s3,&DWP(12,$acc));
1074
1075 &mov ($key,$_key); # load key
1076 &call ("_x86_AES_decrypt");
1077
1078 &mov ($key,$_tmp); # load ivp
1079 &mov ($acc,$_len); # load len
1080 &xor ($s0,&DWP(0,$key)); # xor iv
1081 &xor ($s1,&DWP(4,$key));
1082 &xor ($s2,&DWP(8,$key));
1083 &xor ($s3,&DWP(12,$key));
1084
1085 &sub ($acc,16);
1086 &jc (&label("dec_partial"));
1087 &mov ($_len,$acc); # save len
1088 &mov ($acc,$_inp); # load inp
1089 &mov ($key,$_out); # load out
1090
1091 &mov (&DWP(0,$key),$s0); # write output
1092 &mov (&DWP(4,$key),$s1);
1093 &mov (&DWP(8,$key),$s2);
1094 &mov (&DWP(12,$key),$s3);
1095
1096 &mov ($_tmp,$acc); # save ivp
1097 &lea ($acc,&DWP(16,$acc));
1098 &mov ($_inp,$acc); # save inp
1099
1100 &lea ($key,&DWP(16,$key));
1101 &mov ($_out,$key); # save out
1102
1103 &jnz (&label("dec_loop"));
1104 &mov ($key,$_tmp); # load temp ivp
1105 &set_label("dec_end");
1106 &mov ($acc,$_ivp); # load user ivp
1107 &mov ($s0,&DWP(0,$key)); # load iv
1108 &mov ($s1,&DWP(4,$key));
1109 &mov ($s2,&DWP(8,$key));
1110 &mov ($s3,&DWP(12,$key));
1111 &mov (&DWP(0,$acc),$s0); # copy back to user
1112 &mov (&DWP(4,$acc),$s1);
1113 &mov (&DWP(8,$acc),$s2);
1114 &mov (&DWP(12,$acc),$s3);
1115 &jmp (&label("dec_out"));
1116
1117 &align (4);
1118 &set_label("dec_partial");
1119 &lea ($key,$ivec);
1120 &mov (&DWP(0,$key),$s0); # dump output to stack
1121 &mov (&DWP(4,$key),$s1);
1122 &mov (&DWP(8,$key),$s2);
1123 &mov (&DWP(12,$key),$s3);
1124 &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc));
1125 &mov ($acc eq "esi" ? $acc : "",$key);
1126 &mov ($key eq "edi" ? $key : "",$_out); # load out
1127 &data_word(0xA4F3F689); # rep movsb # copy output
1128 &mov ($key,$_inp); # use inp as temp ivp
1129 &jmp (&label("dec_end"));
1130
1131 &align (4);
1132 &set_label("dec_in_place");
1133 &set_label("dec_in_place_loop");
1134 &lea ($key,$ivec);
1135 &mov ($s0,&DWP(0,$acc)); # read input
1136 &mov ($s1,&DWP(4,$acc));
1137 &mov ($s2,&DWP(8,$acc));
1138 &mov ($s3,&DWP(12,$acc));
1139
1140 &mov (&DWP(0,$key),$s0); # copy to temp
1141 &mov (&DWP(4,$key),$s1);
1142 &mov (&DWP(8,$key),$s2);
1143 &mov (&DWP(12,$key),$s3);
1144
1145 &mov ($key,$_key); # load key
1146 &call ("_x86_AES_decrypt");
1147
1148 &mov ($key,$_ivp); # load ivp
1149 &mov ($acc,$_out); # load out
1150 &xor ($s0,&DWP(0,$key)); # xor iv
1151 &xor ($s1,&DWP(4,$key));
1152 &xor ($s2,&DWP(8,$key));
1153 &xor ($s3,&DWP(12,$key));
1154
1155 &mov (&DWP(0,$acc),$s0); # write output
1156 &mov (&DWP(4,$acc),$s1);
1157 &mov (&DWP(8,$acc),$s2);
1158 &mov (&DWP(12,$acc),$s3);
1159
1160 &lea ($acc,&DWP(16,$acc));
1161 &mov ($_out,$acc); # save out
1162
1163 &lea ($acc,$ivec);
1164 &mov ($s0,&DWP(0,$acc)); # read temp
1165 &mov ($s1,&DWP(4,$acc));
1166 &mov ($s2,&DWP(8,$acc));
1167 &mov ($s3,&DWP(12,$acc));
1168
1169 &mov (&DWP(0,$key),$s0); # copy iv
1170 &mov (&DWP(4,$key),$s1);
1171 &mov (&DWP(8,$key),$s2);
1172 &mov (&DWP(12,$key),$s3);
1173
1174 &mov ($acc,$_inp); # load inp
1175
1176 &lea ($acc,&DWP(16,$acc));
1177 &mov ($_inp,$acc); # save inp
1178
1179 &mov ($s2,$_len); # load len
1180 &sub ($s2,16);
1181 &jc (&label("dec_in_place_partial"));
1182 &mov ($_len,$s2); # save len
1183 &jnz (&label("dec_in_place_loop"));
1184 &jmp (&label("dec_out"));
1185
1186 &align (4);
1187 &set_label("dec_in_place_partial");
1188 # one can argue if this is actually required...
1189 &mov ($key eq "edi" ? $key : "",$_out);
1190 &lea ($acc eq "esi" ? $acc : "",$ivec);
1191 &lea ($key,&DWP(0,$key,$s2));
1192 &lea ($acc,&DWP(16,$acc,$s2));
1193 &neg ($s2 eq "ecx" ? $s2 : "");
1194 &data_word(0xA4F3F689); # rep movsb # restore tail
1195
1196 &align (4);
1197 &set_label("dec_out");
1198 &cmp ($mark,0); # was the key schedule copied?
1199 &mov ("edi",$_key);
1200 &je (&label("skip_dzero"));
1201 # zero copy of key schedule
1202 &mov ("ecx",240/4);
1203 &xor ("eax","eax");
1204 &align (4);
1205 &data_word(0xABF3F689); # rep stosd
1206 &set_label("skip_dzero")
1207 &mov ("esp",$_esp);
1208 &popf ();
1209&function_end("AES_cbc_encrypt");
1210}
1211
1212#------------------------------------------------------------------#
1213
1214sub enckey()
1215{
1216 &movz ("esi",&LB("edx")); # rk[i]>>0
1217 &mov ("ebx",&DWP(2,"ebp","esi",8));
1218 &movz ("esi",&HB("edx")); # rk[i]>>8
1219 &and ("ebx",0xFF000000);
1220 &xor ("eax","ebx");
1221
1222 &mov ("ebx",&DWP(2,"ebp","esi",8));
1223 &shr ("edx",16);
1224 &and ("ebx",0x000000FF);
1225 &movz ("esi",&LB("edx")); # rk[i]>>16
1226 &xor ("eax","ebx");
1227
1228 &mov ("ebx",&DWP(0,"ebp","esi",8));
1229 &movz ("esi",&HB("edx")); # rk[i]>>24
1230 &and ("ebx",0x0000FF00);
1231 &xor ("eax","ebx");
1232
1233 &mov ("ebx",&DWP(0,"ebp","esi",8));
1234 &and ("ebx",0x00FF0000);
1235 &xor ("eax","ebx");
1236
1237 &xor ("eax",&DWP(2048,"ebp","ecx",4)); # rcon
1238}
1239
1240# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1241# AES_KEY *key)
1242&public_label("AES_Te");
1243&function_begin("AES_set_encrypt_key", "", "_x86_AES_set_encrypt_key");
1244 &mov ("esi",&wparam(0)); # user supplied key
1245 &mov ("edi",&wparam(2)); # private key schedule
1246
1247 &test ("esi",-1);
1248 &jz (&label("badpointer"));
1249 &test ("edi",-1);
1250 &jz (&label("badpointer"));
1251
1252 &call (&label("pic_point"));
1253 &set_label("pic_point");
1254 &blindpop("ebp");
1255 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1256
1257 &mov ("ecx",&wparam(1)); # number of bits in key
1258 &cmp ("ecx",128);
1259 &je (&label("10rounds"));
1260 &cmp ("ecx",192);
1261 &je (&label("12rounds"));
1262 &cmp ("ecx",256);
1263 &je (&label("14rounds"));
1264 &mov ("eax",-2); # invalid number of bits
1265 &jmp (&label("exit"));
1266
1267 &set_label("10rounds");
1268 &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords
1269 &mov ("ebx",&DWP(4,"esi"));
1270 &mov ("ecx",&DWP(8,"esi"));
1271 &mov ("edx",&DWP(12,"esi"));
1272 &mov (&DWP(0,"edi"),"eax");
1273 &mov (&DWP(4,"edi"),"ebx");
1274 &mov (&DWP(8,"edi"),"ecx");
1275 &mov (&DWP(12,"edi"),"edx");
1276
1277 &xor ("ecx","ecx");
1278 &jmp (&label("10shortcut"));
1279
1280 &align (4);
1281 &set_label("10loop");
1282 &mov ("eax",&DWP(0,"edi")); # rk[0]
1283 &mov ("edx",&DWP(12,"edi")); # rk[3]
1284 &set_label("10shortcut");
1285 &enckey ();
1286
1287 &mov (&DWP(16,"edi"),"eax"); # rk[4]
1288 &xor ("eax",&DWP(4,"edi"));
1289 &mov (&DWP(20,"edi"),"eax"); # rk[5]
1290 &xor ("eax",&DWP(8,"edi"));
1291 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1292 &xor ("eax",&DWP(12,"edi"));
1293 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1294 &inc ("ecx");
1295 &add ("edi",16);
1296 &cmp ("ecx",10);
1297 &jl (&label("10loop"));
1298
1299 &mov (&DWP(80,"edi"),10); # setup number of rounds
1300 &xor ("eax","eax");
1301 &jmp (&label("exit"));
1302
1303 &set_label("12rounds");
1304 &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords
1305 &mov ("ebx",&DWP(4,"esi"));
1306 &mov ("ecx",&DWP(8,"esi"));
1307 &mov ("edx",&DWP(12,"esi"));
1308 &mov (&DWP(0,"edi"),"eax");
1309 &mov (&DWP(4,"edi"),"ebx");
1310 &mov (&DWP(8,"edi"),"ecx");
1311 &mov (&DWP(12,"edi"),"edx");
1312 &mov ("ecx",&DWP(16,"esi"));
1313 &mov ("edx",&DWP(20,"esi"));
1314 &mov (&DWP(16,"edi"),"ecx");
1315 &mov (&DWP(20,"edi"),"edx");
1316
1317 &xor ("ecx","ecx");
1318 &jmp (&label("12shortcut"));
1319
1320 &align (4);
1321 &set_label("12loop");
1322 &mov ("eax",&DWP(0,"edi")); # rk[0]
1323 &mov ("edx",&DWP(20,"edi")); # rk[5]
1324 &set_label("12shortcut");
1325 &enckey ();
1326
1327 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1328 &xor ("eax",&DWP(4,"edi"));
1329 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1330 &xor ("eax",&DWP(8,"edi"));
1331 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1332 &xor ("eax",&DWP(12,"edi"));
1333 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1334
1335 &cmp ("ecx",7);
1336 &je (&label("12break"));
1337 &inc ("ecx");
1338
1339 &xor ("eax",&DWP(16,"edi"));
1340 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1341 &xor ("eax",&DWP(20,"edi"));
1342 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1343
1344 &add ("edi",24);
1345 &jmp (&label("12loop"));
1346
1347 &set_label("12break");
1348 &mov (&DWP(72,"edi"),12); # setup number of rounds
1349 &xor ("eax","eax");
1350 &jmp (&label("exit"));
1351
1352 &set_label("14rounds");
1353 &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords
1354 &mov ("ebx",&DWP(4,"esi"));
1355 &mov ("ecx",&DWP(8,"esi"));
1356 &mov ("edx",&DWP(12,"esi"));
1357 &mov (&DWP(0,"edi"),"eax");
1358 &mov (&DWP(4,"edi"),"ebx");
1359 &mov (&DWP(8,"edi"),"ecx");
1360 &mov (&DWP(12,"edi"),"edx");
1361 &mov ("eax",&DWP(16,"esi"));
1362 &mov ("ebx",&DWP(20,"esi"));
1363 &mov ("ecx",&DWP(24,"esi"));
1364 &mov ("edx",&DWP(28,"esi"));
1365 &mov (&DWP(16,"edi"),"eax");
1366 &mov (&DWP(20,"edi"),"ebx");
1367 &mov (&DWP(24,"edi"),"ecx");
1368 &mov (&DWP(28,"edi"),"edx");
1369
1370 &xor ("ecx","ecx");
1371 &jmp (&label("14shortcut"));
1372
1373 &align (4);
1374 &set_label("14loop");
1375 &mov ("edx",&DWP(28,"edi")); # rk[7]
1376 &set_label("14shortcut");
1377 &mov ("eax",&DWP(0,"edi")); # rk[0]
1378
1379 &enckey ();
1380
1381 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1382 &xor ("eax",&DWP(4,"edi"));
1383 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1384 &xor ("eax",&DWP(8,"edi"));
1385 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1386 &xor ("eax",&DWP(12,"edi"));
1387 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1388
1389 &cmp ("ecx",6);
1390 &je (&label("14break"));
1391 &inc ("ecx");
1392
1393 &mov ("edx","eax");
1394 &mov ("eax",&DWP(16,"edi")); # rk[4]
1395 &movz ("esi",&LB("edx")); # rk[11]>>0
1396 &mov ("ebx",&DWP(2,"ebp","esi",8));
1397 &movz ("esi",&HB("edx")); # rk[11]>>8
1398 &and ("ebx",0x000000FF);
1399 &xor ("eax","ebx");
1400
1401 &mov ("ebx",&DWP(0,"ebp","esi",8));
1402 &shr ("edx",16);
1403 &and ("ebx",0x0000FF00);
1404 &movz ("esi",&LB("edx")); # rk[11]>>16
1405 &xor ("eax","ebx");
1406
1407 &mov ("ebx",&DWP(0,"ebp","esi",8));
1408 &movz ("esi",&HB("edx")); # rk[11]>>24
1409 &and ("ebx",0x00FF0000);
1410 &xor ("eax","ebx");
1411
1412 &mov ("ebx",&DWP(2,"ebp","esi",8));
1413 &and ("ebx",0xFF000000);
1414 &xor ("eax","ebx");
1415
1416 &mov (&DWP(48,"edi"),"eax"); # rk[12]
1417 &xor ("eax",&DWP(20,"edi"));
1418 &mov (&DWP(52,"edi"),"eax"); # rk[13]
1419 &xor ("eax",&DWP(24,"edi"));
1420 &mov (&DWP(56,"edi"),"eax"); # rk[14]
1421 &xor ("eax",&DWP(28,"edi"));
1422 &mov (&DWP(60,"edi"),"eax"); # rk[15]
1423
1424 &add ("edi",32);
1425 &jmp (&label("14loop"));
1426
1427 &set_label("14break");
1428 &mov (&DWP(48,"edi"),14); # setup number of rounds
1429 &xor ("eax","eax");
1430 &jmp (&label("exit"));
1431
1432 &set_label("badpointer");
1433 &mov ("eax",-1);
1434 &set_label("exit");
1435&function_end("AES_set_encrypt_key");
1436
1437sub deckey()
1438{ my ($i,$ptr,$te,$td) = @_;
1439
1440 &mov ("eax",&DWP($i,$ptr));
1441 &mov ("edx","eax");
1442 &movz ("ebx",&HB("eax"));
1443 &shr ("edx",16);
1444 &and ("eax",0xFF);
1445 &movz ("eax",&BP(2,$te,"eax",8));
1446 &movz ("ebx",&BP(2,$te,"ebx",8));
1447 &mov ("eax",&DWP(0,$td,"eax",8));
1448 &xor ("eax",&DWP(3,$td,"ebx",8));
1449 &movz ("ebx",&HB("edx"));
1450 &and ("edx",0xFF);
1451 &movz ("edx",&BP(2,$te,"edx",8));
1452 &movz ("ebx",&BP(2,$te,"ebx",8));
1453 &xor ("eax",&DWP(2,$td,"edx",8));
1454 &xor ("eax",&DWP(1,$td,"ebx",8));
1455 &mov (&DWP($i,$ptr),"eax");
1456}
1457
1458# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1459# AES_KEY *key)
1460&public_label("AES_Td");
1461&public_label("AES_Te");
1462&function_begin_B("AES_set_decrypt_key");
1463 &mov ("eax",&wparam(0));
1464 &mov ("ecx",&wparam(1));
1465 &mov ("edx",&wparam(2));
1466 &sub ("esp",12);
1467 &mov (&DWP(0,"esp"),"eax");
1468 &mov (&DWP(4,"esp"),"ecx");
1469 &mov (&DWP(8,"esp"),"edx");
1470 &call ("_x86_AES_set_encrypt_key");
1471 &add ("esp",12);
1472 &cmp ("eax",0);
1473 &je (&label("proceed"));
1474 &ret ();
1475
1476 &set_label("proceed");
1477 &push ("ebp");
1478 &push ("ebx");
1479 &push ("esi");
1480 &push ("edi");
1481
1482 &mov ("esi",&wparam(2));
1483 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1484 &lea ("ecx",&DWP(0,"","ecx",4));
1485 &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk
1486
1487 &align (4);
1488 &set_label("invert"); # invert order of chunks
1489 &mov ("eax",&DWP(0,"esi"));
1490 &mov ("ebx",&DWP(4,"esi"));
1491 &mov ("ecx",&DWP(0,"edi"));
1492 &mov ("edx",&DWP(4,"edi"));
1493 &mov (&DWP(0,"edi"),"eax");
1494 &mov (&DWP(4,"edi"),"ebx");
1495 &mov (&DWP(0,"esi"),"ecx");
1496 &mov (&DWP(4,"esi"),"edx");
1497 &mov ("eax",&DWP(8,"esi"));
1498 &mov ("ebx",&DWP(12,"esi"));
1499 &mov ("ecx",&DWP(8,"edi"));
1500 &mov ("edx",&DWP(12,"edi"));
1501 &mov (&DWP(8,"edi"),"eax");
1502 &mov (&DWP(12,"edi"),"ebx");
1503 &mov (&DWP(8,"esi"),"ecx");
1504 &mov (&DWP(12,"esi"),"edx");
1505 &add ("esi",16);
1506 &sub ("edi",16);
1507 &cmp ("esi","edi");
1508 &jne (&label("invert"));
1509
1510 &call (&label("pic_point"));
1511 &set_label("pic_point");
1512 blindpop("ebp");
1513 &lea ("edi",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
1514 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1515
1516 &mov ("esi",&wparam(2));
1517 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1518 &dec ("ecx");
1519 &align (4);
1520 &set_label("permute"); # permute the key schedule
1521 &add ("esi",16);
1522 &deckey (0,"esi","ebp","edi");
1523 &deckey (4,"esi","ebp","edi");
1524 &deckey (8,"esi","ebp","edi");
1525 &deckey (12,"esi","ebp","edi");
1526 &dec ("ecx");
1527 &jnz (&label("permute"));
1528
1529 &xor ("eax","eax"); # return success
1530&function_end("AES_set_decrypt_key");
1531
1532&asm_finish();
diff --git a/src/lib/libcrypto/aes/asm/aes-ia64.S b/src/lib/libcrypto/aes/asm/aes-ia64.S
new file mode 100644
index 0000000000..7f6c4c3662
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-ia64.S
@@ -0,0 +1,1123 @@
1// ====================================================================
2// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
3// project. Rights for redistribution and usage in source and binary
4// forms are granted according to the OpenSSL license.
5// ====================================================================
6//
7// What's wrong with compiler generated code? Compiler never uses
8// variable 'shr' which is pairable with 'extr'/'dep' instructions.
9// Then it uses 'zxt' which is an I-type, but can be replaced with
10// 'and' which in turn can be assigned to M-port [there're double as
11// much M-ports as there're I-ports on Itanium 2]. By sacrificing few
12// registers for small constants (255, 24 and 16) to be used with
13// 'shr' and 'and' instructions I can achieve better ILP, Intruction
14// Level Parallelism, and performance. This code outperforms GCC 3.3
15// generated code by over factor of 2 (two), GCC 3.4 - by 70% and
16// HP C - by 40%. Measured best-case scenario, i.e. aligned
17// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
18// ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
19
20// Version 1.2 mitigates the hazard of cache-timing attacks by
21// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
22// references to S-boxes for L2 cache latency, c) prefetching T[ed]4
23// prior last round. As result performance dropped to (26 + 15*rounds)
24// ticks per block or 11 cycles per byte processed with 128-bit key.
25// This is ~16% deterioration. For reference Itanium 2 L1 cache has
26// 64 bytes line size and L2 - 128 bytes...
27
28.ident "aes-ia64.S, version 1.2"
29.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
30.explicit
31.text
32
33rk0=r8; rk1=r9;
34
35pfssave=r2;
36lcsave=r10;
37prsave=r3;
38maskff=r11;
39twenty4=r14;
40sixteen=r15;
41
42te00=r16; te11=r17; te22=r18; te33=r19;
43te01=r20; te12=r21; te23=r22; te30=r23;
44te02=r24; te13=r25; te20=r26; te31=r27;
45te03=r28; te10=r29; te21=r30; te32=r31;
46
47// these are rotating...
48t0=r32; s0=r33;
49t1=r34; s1=r35;
50t2=r36; s2=r37;
51t3=r38; s3=r39;
52
53te0=r40; te1=r41; te2=r42; te3=r43;
54
55#if defined(_HPUX_SOURCE) && !defined(_LP64)
56# define ADDP addp4
57#else
58# define ADDP add
59#endif
60
61// Offsets from Te0
62#define TE0 0
63#define TE2 2
64#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
65#define TE1 3
66#define TE3 1
67#else
68#define TE1 1
69#define TE3 3
70#endif
71
72// This implies that AES_KEY comprises 32-bit key schedule elements
73// even on LP64 platforms.
74#ifndef KSZ
75# define KSZ 4
76# define LDKEY ld4
77#endif
78
79.proc _ia64_AES_encrypt#
80// Input: rk0-rk1
81// te0
82// te3 as AES_KEY->rounds!!!
83// s0-s3
84// maskff,twenty4,sixteen
85// Output: r16,r20,r24,r28 as s0-s3
86// Clobber: r16-r31,rk0-rk1,r32-r43
87.align 32
88_ia64_AES_encrypt:
89 .prologue
90 .altrp b6
91 .body
92{ .mmi; alloc r16=ar.pfs,12,0,0,8
93 LDKEY t0=[rk0],2*KSZ
94 mov pr.rot=1<<16 }
95{ .mmi; LDKEY t1=[rk1],2*KSZ
96 add te1=TE1,te0
97 add te3=-3,te3 };;
98{ .mib; LDKEY t2=[rk0],2*KSZ
99 mov ar.ec=2 }
100{ .mib; LDKEY t3=[rk1],2*KSZ
101 add te2=TE2,te0
102 brp.loop.imp .Le_top,.Le_end-16 };;
103
104{ .mmi; xor s0=s0,t0
105 xor s1=s1,t1
106 mov ar.lc=te3 }
107{ .mmi; xor s2=s2,t2
108 xor s3=s3,t3
109 add te3=TE3,te0 };;
110
111.align 32
112.Le_top:
113{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
114 (p0) and te33=s3,maskff // 0/0:s3&0xff
115 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
116{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
117 (p0) and te30=s0,maskff // 0/1:s0&0xff
118 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
119{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
120 (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24
121 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
122{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
123 (p0) shladd te30=te30,3,te3 // 1/1:te3+s0
124 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
125{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff]
126 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
127 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
128{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0]
129 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
130 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
131{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
132 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
133 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
134{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
135 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
136 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
137{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
138 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
139 (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
140{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
141 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
142 (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16
143{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
144 (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16
145 (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
146{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
147 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
148 (p0) and te31=s1,maskff };; // 5/2:s1&0xff
149{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16]
150 (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16
151 (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
152{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
153 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
154 (p0) and te32=s2,maskff };; // 6/3:s2&0xff
155
156{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16]
157 (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff
158 (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff
159{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
160 (p0) shladd te32=te32,3,te3 // 7/3:te3+s2
161 (p0) xor t0=t0,te33 };; // 7/0:
162{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1]
163 (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16
164 (p0) xor t0=t0,te22 } // 8/0:
165{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2]
166 (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16
167 (p0) xor t1=t1,te30 };; // 8/1:
168{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16]
169 (p0) ld4 te10=[te10] // 9/3:te1[s0>>16]
170 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
171{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
172 (p0) xor t2=t2,te20 // 10[9]/2:
173 (p0) xor t3=t3,te21 };; // 10[9]/3:
174{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done!
175 (p0) xor t1=t1,te01 // 11[10]/1:
176 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
177{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
178 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
179{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done!
180 (p0) xor t2=t2,te31 // 13[11]/2:
181 (p0) xor t3=t3,te32 } // 13[11]/3:
182{ .mmi; (p17) add te0=2048,te0 // 13[11]/
183 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
184{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done!
185 (p17) add te2=2048+128-TE2,te2} // 14[12]/
186{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done!
187 (p17) add te3=2048+192-TE3,te3 // 14[12]/
188 br.ctop.sptk .Le_top };;
189.Le_end:
190
191
192{ .mmi; ld8 te12=[te0] // prefetch Te4
193 ld8 te31=[te1] }
194{ .mmi; ld8 te10=[te2]
195 ld8 te32=[te3] }
196
197{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
198 and te33=s3,maskff // 0/0:s3&0xff
199 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
200{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
201 and te30=s0,maskff // 0/1:s0&0xff
202 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
203{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
204 add te33=te33,te0 // 1/0:te0+s0>>24
205 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
206{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
207 add te30=te30,te0 // 1/1:te0+s0
208 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
209{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff]
210 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
211 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
212{ .mmi; ld1 te30=[te30] // 2/1:te0[s0]
213 add te23=te23,te0 // 2/1:te0+s3>>8
214 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
215{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
216 add te20=te20,te0 // 3/2:te0+s0>>8
217 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
218{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
219 add te00=te00,te0 // 3/0:te0+s0>>24
220 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
221{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
222 add te21=te21,te0 // 4/3:te0+s2
223 extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
224{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
225 add te01=te01,te0 // 4/1:te0+s1>>24
226 shr.u te13=s3,sixteen };; // 4/2:s3>>16
227{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
228 add te11=te11,te0 // 5/0:te0+s1>>16
229 extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
230{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
231 add te02=te02,te0 // 5/2:te0+s2>>24
232 and te31=s1,maskff };; // 5/2:s1&0xff
233{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16]
234 add te12=te12,te0 // 6/1:te0+s2>>16
235 extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
236{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
237 add te03=te03,te0 // 6/3:te0+s0>>16
238 and te32=s2,maskff };; // 6/3:s2&0xff
239
240{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16]
241 add te31=te31,te0 // 7/2:te0+s1&0xff
242 dep te33=te22,te33,8,8} // 7/0:
243{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
244 add te32=te32,te0 // 7/3:te0+s2
245 and te13=te13,maskff};; // 7/2:s3>>16&0xff
246{ .mmi; ld1 te31=[te31] // 8/2:te0[s1]
247 add te13=te13,te0 // 8/2:te0+s3>>16
248 dep te30=te23,te30,8,8} // 8/1:
249{ .mmi; ld1 te32=[te32] // 8/3:te0[s2]
250 add te10=te10,te0 // 8/3:te0+s0>>16
251 shl te00=te00,twenty4};; // 8/0:
252{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16]
253 dep te33=te11,te33,16,8 // 9/0:
254 shl te01=te01,twenty4};; // 9/1:
255{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16]
256 dep te31=te20,te31,8,8 // 10/2:
257 shl te02=te02,twenty4};; // 10/2:
258{ .mii; xor t0=t0,te33 // 11/0:
259 dep te32=te21,te32,8,8 // 11/3:
260 shl te12=te12,sixteen};; // 11/1:
261{ .mii; xor r16=t0,te00 // 12/0:done!
262 dep te31=te13,te31,16,8 // 12/2:
263 shl te03=te03,twenty4};; // 12/3:
264{ .mmi; xor t1=t1,te01 // 13/1:
265 xor t2=t2,te02 // 13/2:
266 dep te32=te10,te32,16,8};; // 13/3:
267{ .mmi; xor t1=t1,te30 // 14/1:
268 xor r24=t2,te31 // 14/2:done!
269 xor t3=t3,te32 };; // 14/3:
270{ .mib; xor r20=t1,te12 // 15/1:done!
271 xor r28=t3,te03 // 15/3:done!
272 br.ret.sptk b6 };;
273.endp _ia64_AES_encrypt#
274
275// void AES_encrypt (const void *in,void *out,const AES_KEY *key);
276.global AES_encrypt#
277.proc AES_encrypt#
278.align 32
279AES_encrypt:
280 .prologue
281 .save ar.pfs,pfssave
282{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
283 and out0=3,in0
284 mov r3=ip }
285{ .mmi; ADDP in0=0,in0
286 mov loc0=psr.um
287 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
288
289{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
290 add out8=(AES_Te#-AES_encrypt#),r3 // Te0
291 .save pr,prsave
292 mov prsave=pr }
293{ .mmi; rum 1<<3 // clear um.ac
294 .save ar.lc,lcsave
295 mov lcsave=ar.lc };;
296
297 .body
298#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
299{ .mib; cmp.ne p6,p0=out0,r0
300 add out0=4,in0
301(p6) br.dpnt.many .Le_i_unaligned };;
302
303{ .mmi; ld4 out1=[in0],8 // s0
304 and out9=3,in1
305 mov twenty4=24 }
306{ .mmi; ld4 out3=[out0],8 // s1
307 ADDP rk0=0,in2
308 mov sixteen=16 };;
309{ .mmi; ld4 out5=[in0] // s2
310 cmp.ne p6,p0=out9,r0
311 mov maskff=0xff }
312{ .mmb; ld4 out7=[out0] // s3
313 ADDP rk1=KSZ,in2
314 br.call.sptk.many b6=_ia64_AES_encrypt };;
315
316{ .mib; ADDP in0=4,in1
317 ADDP in1=0,in1
318(p6) br.spnt .Le_o_unaligned };;
319
320{ .mii; mov psr.um=loc0
321 mov ar.pfs=pfssave
322 mov ar.lc=lcsave };;
323{ .mmi; st4 [in1]=r16,8 // s0
324 st4 [in0]=r20,8 // s1
325 mov pr=prsave,0x1ffff };;
326{ .mmb; st4 [in1]=r24 // s2
327 st4 [in0]=r28 // s3
328 br.ret.sptk.many b0 };;
329#endif
330
331.align 32
332.Le_i_unaligned:
333{ .mmi; add out0=1,in0
334 add out2=2,in0
335 add out4=3,in0 };;
336{ .mmi; ld1 r16=[in0],4
337 ld1 r17=[out0],4 }//;;
338{ .mmi; ld1 r18=[out2],4
339 ld1 out1=[out4],4 };; // s0
340{ .mmi; ld1 r20=[in0],4
341 ld1 r21=[out0],4 }//;;
342{ .mmi; ld1 r22=[out2],4
343 ld1 out3=[out4],4 };; // s1
344{ .mmi; ld1 r24=[in0],4
345 ld1 r25=[out0],4 }//;;
346{ .mmi; ld1 r26=[out2],4
347 ld1 out5=[out4],4 };; // s2
348{ .mmi; ld1 r28=[in0]
349 ld1 r29=[out0] }//;;
350{ .mmi; ld1 r30=[out2]
351 ld1 out7=[out4] };; // s3
352
353{ .mii;
354 dep out1=r16,out1,24,8 //;;
355 dep out3=r20,out3,24,8 }//;;
356{ .mii; ADDP rk0=0,in2
357 dep out5=r24,out5,24,8 //;;
358 dep out7=r28,out7,24,8 };;
359{ .mii; ADDP rk1=KSZ,in2
360 dep out1=r17,out1,16,8 //;;
361 dep out3=r21,out3,16,8 }//;;
362{ .mii; mov twenty4=24
363 dep out5=r25,out5,16,8 //;;
364 dep out7=r29,out7,16,8 };;
365{ .mii; mov sixteen=16
366 dep out1=r18,out1,8,8 //;;
367 dep out3=r22,out3,8,8 }//;;
368{ .mii; mov maskff=0xff
369 dep out5=r26,out5,8,8 //;;
370 dep out7=r30,out7,8,8 };;
371
372{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };;
373
374.Le_o_unaligned:
375{ .mii; ADDP out0=0,in1
376 extr.u r17=r16,8,8 // s0
377 shr.u r19=r16,twenty4 }//;;
378{ .mii; ADDP out1=1,in1
379 extr.u r18=r16,16,8
380 shr.u r23=r20,twenty4 }//;; // s1
381{ .mii; ADDP out2=2,in1
382 extr.u r21=r20,8,8
383 shr.u r22=r20,sixteen }//;;
384{ .mii; ADDP out3=3,in1
385 extr.u r25=r24,8,8 // s2
386 shr.u r27=r24,twenty4 };;
387{ .mii; st1 [out3]=r16,4
388 extr.u r26=r24,16,8
389 shr.u r31=r28,twenty4 }//;; // s3
390{ .mii; st1 [out2]=r17,4
391 extr.u r29=r28,8,8
392 shr.u r30=r28,sixteen }//;;
393
394{ .mmi; st1 [out1]=r18,4
395 st1 [out0]=r19,4 };;
396{ .mmi; st1 [out3]=r20,4
397 st1 [out2]=r21,4 }//;;
398{ .mmi; st1 [out1]=r22,4
399 st1 [out0]=r23,4 };;
400{ .mmi; st1 [out3]=r24,4
401 st1 [out2]=r25,4
402 mov pr=prsave,0x1ffff }//;;
403{ .mmi; st1 [out1]=r26,4
404 st1 [out0]=r27,4
405 mov ar.pfs=pfssave };;
406{ .mmi; st1 [out3]=r28
407 st1 [out2]=r29
408 mov ar.lc=lcsave }//;;
409{ .mmi; st1 [out1]=r30
410 st1 [out0]=r31 }
411{ .mfb; mov psr.um=loc0 // restore user mask
412 br.ret.sptk.many b0 };;
413.endp AES_encrypt#
414
415// *AES_decrypt are autogenerated by the following script:
416#if 0
417#!/usr/bin/env perl
418print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
419open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
420print "#endif\n";
421while(<>) {
422 $process=1 if (/\.proc\s+_ia64_AES_encrypt/);
423 next if (!$process);
424
425 #s/te00=s0/td00=s0/; s/te00/td00/g;
426 s/te11=s1/td13=s3/; s/te11/td13/g;
427 #s/te22=s2/td22=s2/; s/te22/td22/g;
428 s/te33=s3/td31=s1/; s/te33/td31/g;
429
430 #s/te01=s1/td01=s1/; s/te01/td01/g;
431 s/te12=s2/td10=s0/; s/te12/td10/g;
432 #s/te23=s3/td23=s3/; s/te23/td23/g;
433 s/te30=s0/td32=s2/; s/te30/td32/g;
434
435 #s/te02=s2/td02=s2/; s/te02/td02/g;
436 s/te13=s3/td11=s1/; s/te13/td11/g;
437 #s/te20=s0/td20=s0/; s/te20/td20/g;
438 s/te31=s1/td33=s3/; s/te31/td33/g;
439
440 #s/te03=s3/td03=s3/; s/te03/td03/g;
441 s/te10=s0/td12=s2/; s/te10/td12/g;
442 #s/te21=s1/td21=s1/; s/te21/td21/g;
443 s/te32=s2/td30=s0/; s/te32/td30/g;
444
445 s/td/te/g;
446
447 s/AES_encrypt/AES_decrypt/g;
448 s/\.Le_/.Ld_/g;
449 s/AES_Te#/AES_Td#/g;
450
451 print;
452
453 exit if (/\.endp\s+AES_decrypt/);
454}
455#endif
456.proc _ia64_AES_decrypt#
457// Input: rk0-rk1
458// te0
459// te3 as AES_KEY->rounds!!!
460// s0-s3
461// maskff,twenty4,sixteen
462// Output: r16,r20,r24,r28 as s0-s3
463// Clobber: r16-r31,rk0-rk1,r32-r43
464.align 32
465_ia64_AES_decrypt:
466 .prologue
467 .altrp b6
468 .body
469{ .mmi; alloc r16=ar.pfs,12,0,0,8
470 LDKEY t0=[rk0],2*KSZ
471 mov pr.rot=1<<16 }
472{ .mmi; LDKEY t1=[rk1],2*KSZ
473 add te1=TE1,te0
474 add te3=-3,te3 };;
475{ .mib; LDKEY t2=[rk0],2*KSZ
476 mov ar.ec=2 }
477{ .mib; LDKEY t3=[rk1],2*KSZ
478 add te2=TE2,te0
479 brp.loop.imp .Ld_top,.Ld_end-16 };;
480
481{ .mmi; xor s0=s0,t0
482 xor s1=s1,t1
483 mov ar.lc=te3 }
484{ .mmi; xor s2=s2,t2
485 xor s3=s3,t3
486 add te3=TE3,te0 };;
487
488.align 32
489.Ld_top:
490{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
491 (p0) and te31=s1,maskff // 0/0:s3&0xff
492 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
493{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
494 (p0) and te32=s2,maskff // 0/1:s0&0xff
495 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
496{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
497 (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24
498 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
499{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
500 (p0) shladd te32=te32,3,te3 // 1/1:te3+s0
501 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
502{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff]
503 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
504 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
505{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0]
506 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
507 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
508{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
509 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
510 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
511{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
512 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
513 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
514{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
515 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
516 (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
517{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
518 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
519 (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16
520{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
521 (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16
522 (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
523{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
524 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
525 (p0) and te33=s3,maskff };; // 5/2:s1&0xff
526{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16]
527 (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16
528 (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
529{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
530 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
531 (p0) and te30=s0,maskff };; // 6/3:s2&0xff
532
533{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16]
534 (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff
535 (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff
536{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
537 (p0) shladd te30=te30,3,te3 // 7/3:te3+s2
538 (p0) xor t0=t0,te31 };; // 7/0:
539{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1]
540 (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16
541 (p0) xor t0=t0,te22 } // 8/0:
542{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2]
543 (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16
544 (p0) xor t1=t1,te32 };; // 8/1:
545{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16]
546 (p0) ld4 te12=[te12] // 9/3:te1[s0>>16]
547 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
548{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
549 (p0) xor t2=t2,te20 // 10[9]/2:
550 (p0) xor t3=t3,te21 };; // 10[9]/3:
551{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done!
552 (p0) xor t1=t1,te01 // 11[10]/1:
553 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
554{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
555 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
556{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done!
557 (p0) xor t2=t2,te33 // 13[11]/2:
558 (p0) xor t3=t3,te30 } // 13[11]/3:
559{ .mmi; (p17) add te0=2048,te0 // 13[11]/
560 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
561{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done!
562 (p17) add te2=2048+128-TE2,te2} // 14[12]/
563{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done!
564 (p17) add te3=2048+192-TE3,te3 // 14[12]/
565 br.ctop.sptk .Ld_top };;
566.Ld_end:
567
568
569{ .mmi; ld8 te10=[te0] // prefetch Td4
570 ld8 te33=[te1] }
571{ .mmi; ld8 te12=[te2]
572 ld8 te30=[te3] }
573
574{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
575 and te31=s1,maskff // 0/0:s3&0xff
576 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
577{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
578 and te32=s2,maskff // 0/1:s0&0xff
579 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
580{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
581 add te31=te31,te0 // 1/0:te0+s0>>24
582 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
583{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
584 add te32=te32,te0 // 1/1:te0+s0
585 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
586{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff]
587 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
588 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
589{ .mmi; ld1 te32=[te32] // 2/1:te0[s0]
590 add te23=te23,te0 // 2/1:te0+s3>>8
591 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
592{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
593 add te20=te20,te0 // 3/2:te0+s0>>8
594 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
595{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
596 add te00=te00,te0 // 3/0:te0+s0>>24
597 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
598{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
599 add te21=te21,te0 // 4/3:te0+s2
600 extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
601{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
602 add te01=te01,te0 // 4/1:te0+s1>>24
603 shr.u te11=s1,sixteen };; // 4/2:s3>>16
604{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
605 add te13=te13,te0 // 5/0:te0+s1>>16
606 extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
607{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
608 add te02=te02,te0 // 5/2:te0+s2>>24
609 and te33=s3,maskff };; // 5/2:s1&0xff
610{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16]
611 add te10=te10,te0 // 6/1:te0+s2>>16
612 extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
613{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
614 add te03=te03,te0 // 6/3:te0+s0>>16
615 and te30=s0,maskff };; // 6/3:s2&0xff
616
617{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16]
618 add te33=te33,te0 // 7/2:te0+s1&0xff
619 dep te31=te22,te31,8,8} // 7/0:
620{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
621 add te30=te30,te0 // 7/3:te0+s2
622 and te11=te11,maskff};; // 7/2:s3>>16&0xff
623{ .mmi; ld1 te33=[te33] // 8/2:te0[s1]
624 add te11=te11,te0 // 8/2:te0+s3>>16
625 dep te32=te23,te32,8,8} // 8/1:
626{ .mmi; ld1 te30=[te30] // 8/3:te0[s2]
627 add te12=te12,te0 // 8/3:te0+s0>>16
628 shl te00=te00,twenty4};; // 8/0:
629{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16]
630 dep te31=te13,te31,16,8 // 9/0:
631 shl te01=te01,twenty4};; // 9/1:
632{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16]
633 dep te33=te20,te33,8,8 // 10/2:
634 shl te02=te02,twenty4};; // 10/2:
635{ .mii; xor t0=t0,te31 // 11/0:
636 dep te30=te21,te30,8,8 // 11/3:
637 shl te10=te10,sixteen};; // 11/1:
638{ .mii; xor r16=t0,te00 // 12/0:done!
639 dep te33=te11,te33,16,8 // 12/2:
640 shl te03=te03,twenty4};; // 12/3:
641{ .mmi; xor t1=t1,te01 // 13/1:
642 xor t2=t2,te02 // 13/2:
643 dep te30=te12,te30,16,8};; // 13/3:
644{ .mmi; xor t1=t1,te32 // 14/1:
645 xor r24=t2,te33 // 14/2:done!
646 xor t3=t3,te30 };; // 14/3:
647{ .mib; xor r20=t1,te10 // 15/1:done!
648 xor r28=t3,te03 // 15/3:done!
649 br.ret.sptk b6 };;
650.endp _ia64_AES_decrypt#
651
652// void AES_decrypt (const void *in,void *out,const AES_KEY *key);
653.global AES_decrypt#
654.proc AES_decrypt#
655.align 32
656AES_decrypt:
657 .prologue
658 .save ar.pfs,pfssave
659{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
660 and out0=3,in0
661 mov r3=ip }
662{ .mmi; ADDP in0=0,in0
663 mov loc0=psr.um
664 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
665
666{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
667 add out8=(AES_Td#-AES_decrypt#),r3 // Te0
668 .save pr,prsave
669 mov prsave=pr }
670{ .mmi; rum 1<<3 // clear um.ac
671 .save ar.lc,lcsave
672 mov lcsave=ar.lc };;
673
674 .body
675#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
676{ .mib; cmp.ne p6,p0=out0,r0
677 add out0=4,in0
678(p6) br.dpnt.many .Ld_i_unaligned };;
679
680{ .mmi; ld4 out1=[in0],8 // s0
681 and out9=3,in1
682 mov twenty4=24 }
683{ .mmi; ld4 out3=[out0],8 // s1
684 ADDP rk0=0,in2
685 mov sixteen=16 };;
686{ .mmi; ld4 out5=[in0] // s2
687 cmp.ne p6,p0=out9,r0
688 mov maskff=0xff }
689{ .mmb; ld4 out7=[out0] // s3
690 ADDP rk1=KSZ,in2
691 br.call.sptk.many b6=_ia64_AES_decrypt };;
692
693{ .mib; ADDP in0=4,in1
694 ADDP in1=0,in1
695(p6) br.spnt .Ld_o_unaligned };;
696
697{ .mii; mov psr.um=loc0
698 mov ar.pfs=pfssave
699 mov ar.lc=lcsave };;
700{ .mmi; st4 [in1]=r16,8 // s0
701 st4 [in0]=r20,8 // s1
702 mov pr=prsave,0x1ffff };;
703{ .mmb; st4 [in1]=r24 // s2
704 st4 [in0]=r28 // s3
705 br.ret.sptk.many b0 };;
706#endif
707
708.align 32
709.Ld_i_unaligned:
710{ .mmi; add out0=1,in0
711 add out2=2,in0
712 add out4=3,in0 };;
713{ .mmi; ld1 r16=[in0],4
714 ld1 r17=[out0],4 }//;;
715{ .mmi; ld1 r18=[out2],4
716 ld1 out1=[out4],4 };; // s0
717{ .mmi; ld1 r20=[in0],4
718 ld1 r21=[out0],4 }//;;
719{ .mmi; ld1 r22=[out2],4
720 ld1 out3=[out4],4 };; // s1
721{ .mmi; ld1 r24=[in0],4
722 ld1 r25=[out0],4 }//;;
723{ .mmi; ld1 r26=[out2],4
724 ld1 out5=[out4],4 };; // s2
725{ .mmi; ld1 r28=[in0]
726 ld1 r29=[out0] }//;;
727{ .mmi; ld1 r30=[out2]
728 ld1 out7=[out4] };; // s3
729
730{ .mii;
731 dep out1=r16,out1,24,8 //;;
732 dep out3=r20,out3,24,8 }//;;
733{ .mii; ADDP rk0=0,in2
734 dep out5=r24,out5,24,8 //;;
735 dep out7=r28,out7,24,8 };;
736{ .mii; ADDP rk1=KSZ,in2
737 dep out1=r17,out1,16,8 //;;
738 dep out3=r21,out3,16,8 }//;;
739{ .mii; mov twenty4=24
740 dep out5=r25,out5,16,8 //;;
741 dep out7=r29,out7,16,8 };;
742{ .mii; mov sixteen=16
743 dep out1=r18,out1,8,8 //;;
744 dep out3=r22,out3,8,8 }//;;
745{ .mii; mov maskff=0xff
746 dep out5=r26,out5,8,8 //;;
747 dep out7=r30,out7,8,8 };;
748
749{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };;
750
751.Ld_o_unaligned:
752{ .mii; ADDP out0=0,in1
753 extr.u r17=r16,8,8 // s0
754 shr.u r19=r16,twenty4 }//;;
755{ .mii; ADDP out1=1,in1
756 extr.u r18=r16,16,8
757 shr.u r23=r20,twenty4 }//;; // s1
758{ .mii; ADDP out2=2,in1
759 extr.u r21=r20,8,8
760 shr.u r22=r20,sixteen }//;;
761{ .mii; ADDP out3=3,in1
762 extr.u r25=r24,8,8 // s2
763 shr.u r27=r24,twenty4 };;
764{ .mii; st1 [out3]=r16,4
765 extr.u r26=r24,16,8
766 shr.u r31=r28,twenty4 }//;; // s3
767{ .mii; st1 [out2]=r17,4
768 extr.u r29=r28,8,8
769 shr.u r30=r28,sixteen }//;;
770
771{ .mmi; st1 [out1]=r18,4
772 st1 [out0]=r19,4 };;
773{ .mmi; st1 [out3]=r20,4
774 st1 [out2]=r21,4 }//;;
775{ .mmi; st1 [out1]=r22,4
776 st1 [out0]=r23,4 };;
777{ .mmi; st1 [out3]=r24,4
778 st1 [out2]=r25,4
779 mov pr=prsave,0x1ffff }//;;
780{ .mmi; st1 [out1]=r26,4
781 st1 [out0]=r27,4
782 mov ar.pfs=pfssave };;
783{ .mmi; st1 [out3]=r28
784 st1 [out2]=r29
785 mov ar.lc=lcsave }//;;
786{ .mmi; st1 [out1]=r30
787 st1 [out0]=r31 }
788{ .mfb; mov psr.um=loc0 // restore user mask
789 br.ret.sptk.many b0 };;
790.endp AES_decrypt#
791
792// leave it in .text segment...
793.align 64
794.global AES_Te#
795.type AES_Te#,@object
796AES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
797 data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
798 data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
799 data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
800 data4 0x60303050,0x60303050, 0x02010103,0x02010103
801 data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
802 data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
803 data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
804 data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
805 data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
806 data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
807 data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
808 data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
809 data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
810 data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
811 data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
812 data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
813 data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
814 data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
815 data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
816 data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
817 data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
818 data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
819 data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f
820 data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
821 data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
822 data4 0x30181828,0x30181828, 0x379696a1,0x379696a1
823 data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
824 data4 0x0e070709,0x0e070709, 0x24121236,0x24121236
825 data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
826 data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
827 data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
828 data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
829 data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
830 data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
831 data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
832 data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
833 data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
834 data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
835 data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
836 data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
837 data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
838 data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
839 data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
840 data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
841 data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
842 data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
843 data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
844 data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
845 data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
846 data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
847 data4 0x66333355,0x66333355, 0x11858594,0x11858594
848 data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
849 data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
850 data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
851 data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
852 data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
853 data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
854 data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
855 data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
856 data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
857 data4 0xafdada75,0xafdada75, 0x42212163,0x42212163
858 data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
859 data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
860 data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
861 data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
862 data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
863 data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739
864 data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
865 data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
866 data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
867 data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395
868 data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198
869 data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
870 data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
871 data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
872 data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
873 data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
874 data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
875 data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
876 data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
877 data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
878 data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a
879 data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
880 data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
881 data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
882 data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4
883 data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
884 data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
885 data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
886 data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
887 data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
888 data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
889 data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
890 data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
891 data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
892 data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
893 data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
894 data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
895 data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
896 data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
897 data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
898 data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
899 data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
900 data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
901 data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
902 data4 0x904848d8,0x904848d8, 0x06030305,0x06030305
903 data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
904 data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
905 data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
906 data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158
907 data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
908 data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
909 data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
910 data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
911 data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
912 data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
913 data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
914 data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
915 data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
916 data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
917 data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
918 data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
919 data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
920 data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0
921 data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
922 data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
923 data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
924// Te4:
925 data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
926 data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
927 data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
928 data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
929 data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
930 data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
931 data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
932 data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
933 data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
934 data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
935 data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
936 data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
937 data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
938 data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
939 data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
940 data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
941 data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
942 data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
943 data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
944 data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
945 data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
946 data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
947 data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
948 data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
949 data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
950 data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
951 data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
952 data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
953 data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
954 data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
955 data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
956 data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
957.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#"
958
959.align 64
960.global AES_Td#
961.type AES_Td#,@object
962AES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
963 data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
964 data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
965 data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
966 data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
967 data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
968 data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
969 data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f
970 data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
971 data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
972 data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
973 data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
974 data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
975 data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
976 data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
977 data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
978 data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
979 data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
980 data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
981 data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
982 data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
983 data4 0x97513360,0x97513360, 0x62537f45,0x62537f45
984 data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
985 data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
986 data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
987 data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
988 data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
989 data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
990 data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
991 data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
992 data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
993 data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c
994 data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
995 data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
996 data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
997 data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
998 data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
999 data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1000 data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1001 data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1002 data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1003 data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1004 data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1005 data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1006 data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1007 data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1008 data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1009 data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1010 data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1011 data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1012 data4 0x09808683,0x09808683, 0x322bed48,0x322bed48
1013 data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1014 data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1015 data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1016 data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1017 data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1018 data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1019 data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1020 data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1021 data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1022 data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1023 data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1024 data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1025 data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1026 data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1027 data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1028 data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1029 data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1030 data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1031 data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1032 data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1033 data4 0x13972240,0x13972240, 0x84c61120,0x84c61120
1034 data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1035 data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1036 data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1037 data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1038 data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1039 data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1040 data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1041 data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1042 data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1043 data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1044 data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1045 data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1046 data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1047 data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1048 data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1049 data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1050 data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1051 data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1052 data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1053 data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1054 data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1055 data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1056 data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1057 data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1058 data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1059 data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1060 data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1061 data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1062 data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1063 data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1064 data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1065 data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1066 data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1067 data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1068 data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1069 data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1070 data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1071 data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1072 data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1073 data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1074 data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1075 data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1076 data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1077 data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1078 data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1079 data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1080 data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1081 data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1082 data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1083 data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1084 data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1085 data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1086 data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1087 data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1088 data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1089 data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1090// Td4:
1091 data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1092 data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1093 data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1094 data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1095 data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1096 data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1097 data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1098 data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1099 data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1100 data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1101 data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1102 data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1103 data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1104 data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1105 data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1106 data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1107 data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1108 data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1109 data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1110 data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1111 data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1112 data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1113 data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1114 data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1115 data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1116 data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1117 data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1118 data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1119 data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1120 data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1121 data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1122 data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1123.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#"
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
new file mode 100755
index 0000000000..44e0bf8cae
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
@@ -0,0 +1,1578 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.2.
10#
11# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
12# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
13# [you'll notice a lot of resemblance], such as compressed S-boxes
14# in little-endian byte order, prefetch of these tables in CBC mode,
15# as well as avoiding L1 cache aliasing between stack frame and key
16# schedule and already mentioned tables, compressed Td4...
17#
18# Performance in number of cycles per processed byte for 128-bit key:
19#
20# ECB CBC encrypt
21# AMD64 13.7 13.0(*)
22# EM64T 20.2 18.6(*)
23#
24# (*) CBC benchmarks are better than ECB thanks to custom ABI used
25# by the private block encryption function.
26
27$verticalspin=1; # unlike 32-bit version $verticalspin performs
28 # ~15% better on both AMD and Intel cores
29$output=shift;
30open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
31
32$code=".text\n";
33
34$s0="%eax";
35$s1="%ebx";
36$s2="%ecx";
37$s3="%edx";
38$acc0="%esi";
39$acc1="%edi";
40$acc2="%ebp";
41$inp="%r8";
42$out="%r9";
43$t0="%r10d";
44$t1="%r11d";
45$t2="%r12d";
46$rnds="%r13d";
47$sbox="%r14";
48$key="%r15";
49
50sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
51sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
52 $r =~ s/%[er]([sd]i)/%\1l/;
53 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
54sub _data_word()
55{ my $i;
56 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
57}
58sub data_word()
59{ my $i;
60 my $last=pop(@_);
61 $code.=".long\t";
62 while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
63 $code.=sprintf"0x%08x\n",$last;
64}
65
66sub data_byte()
67{ my $i;
68 my $last=pop(@_);
69 $code.=".byte\t";
70 while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
71 $code.=sprintf"0x%02x\n",$last&0xff;
72}
73
74sub encvert()
75{ my $t3="%r8d"; # zaps $inp!
76
77$code.=<<___;
78 # favor 3-way issue Opteron pipeline...
79 movzb `&lo("$s0")`,$acc0
80 movzb `&lo("$s1")`,$acc1
81 movzb `&lo("$s2")`,$acc2
82 mov 0($sbox,$acc0,8),$t0
83 mov 0($sbox,$acc1,8),$t1
84 mov 0($sbox,$acc2,8),$t2
85
86 movzb `&hi("$s1")`,$acc0
87 movzb `&hi("$s2")`,$acc1
88 movzb `&lo("$s3")`,$acc2
89 xor 3($sbox,$acc0,8),$t0
90 xor 3($sbox,$acc1,8),$t1
91 mov 0($sbox,$acc2,8),$t3
92
93 movzb `&hi("$s3")`,$acc0
94 shr \$16,$s2
95 movzb `&hi("$s0")`,$acc2
96 xor 3($sbox,$acc0,8),$t2
97 shr \$16,$s3
98 xor 3($sbox,$acc2,8),$t3
99
100 shr \$16,$s1
101 lea 16($key),$key
102 shr \$16,$s0
103
104 movzb `&lo("$s2")`,$acc0
105 movzb `&lo("$s3")`,$acc1
106 movzb `&lo("$s0")`,$acc2
107 xor 2($sbox,$acc0,8),$t0
108 xor 2($sbox,$acc1,8),$t1
109 xor 2($sbox,$acc2,8),$t2
110
111 movzb `&hi("$s3")`,$acc0
112 movzb `&hi("$s0")`,$acc1
113 movzb `&lo("$s1")`,$acc2
114 xor 1($sbox,$acc0,8),$t0
115 xor 1($sbox,$acc1,8),$t1
116 xor 2($sbox,$acc2,8),$t3
117
118 mov 12($key),$s3
119 movzb `&hi("$s1")`,$acc1
120 movzb `&hi("$s2")`,$acc2
121 mov 0($key),$s0
122 xor 1($sbox,$acc1,8),$t2
123 xor 1($sbox,$acc2,8),$t3
124
125 mov 4($key),$s1
126 mov 8($key),$s2
127 xor $t0,$s0
128 xor $t1,$s1
129 xor $t2,$s2
130 xor $t3,$s3
131___
132}
133
134sub enclastvert()
135{ my $t3="%r8d"; # zaps $inp!
136
137$code.=<<___;
138 movzb `&lo("$s0")`,$acc0
139 movzb `&lo("$s1")`,$acc1
140 movzb `&lo("$s2")`,$acc2
141 mov 2($sbox,$acc0,8),$t0
142 mov 2($sbox,$acc1,8),$t1
143 mov 2($sbox,$acc2,8),$t2
144
145 and \$0x000000ff,$t0
146 and \$0x000000ff,$t1
147 and \$0x000000ff,$t2
148
149 movzb `&lo("$s3")`,$acc0
150 movzb `&hi("$s1")`,$acc1
151 movzb `&hi("$s2")`,$acc2
152 mov 2($sbox,$acc0,8),$t3
153 mov 0($sbox,$acc1,8),$acc1 #$t0
154 mov 0($sbox,$acc2,8),$acc2 #$t1
155
156 and \$0x000000ff,$t3
157 and \$0x0000ff00,$acc1
158 and \$0x0000ff00,$acc2
159
160 xor $acc1,$t0
161 xor $acc2,$t1
162 shr \$16,$s2
163
164 movzb `&hi("$s3")`,$acc0
165 movzb `&hi("$s0")`,$acc1
166 shr \$16,$s3
167 mov 0($sbox,$acc0,8),$acc0 #$t2
168 mov 0($sbox,$acc1,8),$acc1 #$t3
169
170 and \$0x0000ff00,$acc0
171 and \$0x0000ff00,$acc1
172 shr \$16,$s1
173 xor $acc0,$t2
174 xor $acc1,$t3
175 shr \$16,$s0
176
177 movzb `&lo("$s2")`,$acc0
178 movzb `&lo("$s3")`,$acc1
179 movzb `&lo("$s0")`,$acc2
180 mov 0($sbox,$acc0,8),$acc0 #$t0
181 mov 0($sbox,$acc1,8),$acc1 #$t1
182 mov 0($sbox,$acc2,8),$acc2 #$t2
183
184 and \$0x00ff0000,$acc0
185 and \$0x00ff0000,$acc1
186 and \$0x00ff0000,$acc2
187
188 xor $acc0,$t0
189 xor $acc1,$t1
190 xor $acc2,$t2
191
192 movzb `&lo("$s1")`,$acc0
193 movzb `&hi("$s3")`,$acc1
194 movzb `&hi("$s0")`,$acc2
195 mov 0($sbox,$acc0,8),$acc0 #$t3
196 mov 2($sbox,$acc1,8),$acc1 #$t0
197 mov 2($sbox,$acc2,8),$acc2 #$t1
198
199 and \$0x00ff0000,$acc0
200 and \$0xff000000,$acc1
201 and \$0xff000000,$acc2
202
203 xor $acc0,$t3
204 xor $acc1,$t0
205 xor $acc2,$t1
206
207 movzb `&hi("$s1")`,$acc0
208 movzb `&hi("$s2")`,$acc1
209 mov 16+12($key),$s3
210 mov 2($sbox,$acc0,8),$acc0 #$t2
211 mov 2($sbox,$acc1,8),$acc1 #$t3
212 mov 16+0($key),$s0
213
214 and \$0xff000000,$acc0
215 and \$0xff000000,$acc1
216
217 xor $acc0,$t2
218 xor $acc1,$t3
219
220 mov 16+4($key),$s1
221 mov 16+8($key),$s2
222 xor $t0,$s0
223 xor $t1,$s1
224 xor $t2,$s2
225 xor $t3,$s3
226___
227}
228
229sub encstep()
230{ my ($i,@s) = @_;
231 my $tmp0=$acc0;
232 my $tmp1=$acc1;
233 my $tmp2=$acc2;
234 my $out=($t0,$t1,$t2,$s[0])[$i];
235
236 if ($i==3) {
237 $tmp0=$s[1];
238 $tmp1=$s[2];
239 $tmp2=$s[3];
240 }
241 $code.=" movzb ".&lo($s[0]).",$out\n";
242 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
243 $code.=" lea 16($key),$key\n" if ($i==0);
244
245 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
246 $code.=" mov 0($sbox,$out,8),$out\n";
247
248 $code.=" shr \$16,$tmp1\n";
249 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
250 $code.=" xor 3($sbox,$tmp0,8),$out\n";
251
252 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
253 $code.=" shr \$24,$tmp2\n";
254 $code.=" xor 4*$i($key),$out\n";
255
256 $code.=" xor 2($sbox,$tmp1,8),$out\n";
257 $code.=" xor 1($sbox,$tmp2,8),$out\n";
258
259 $code.=" mov $t0,$s[1]\n" if ($i==3);
260 $code.=" mov $t1,$s[2]\n" if ($i==3);
261 $code.=" mov $t2,$s[3]\n" if ($i==3);
262 $code.="\n";
263}
264
265sub enclast()
266{ my ($i,@s)=@_;
267 my $tmp0=$acc0;
268 my $tmp1=$acc1;
269 my $tmp2=$acc2;
270 my $out=($t0,$t1,$t2,$s[0])[$i];
271
272 if ($i==3) {
273 $tmp0=$s[1];
274 $tmp1=$s[2];
275 $tmp2=$s[3];
276 }
277 $code.=" movzb ".&lo($s[0]).",$out\n";
278 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
279
280 $code.=" mov 2($sbox,$out,8),$out\n";
281 $code.=" shr \$16,$tmp1\n";
282 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
283
284 $code.=" and \$0x000000ff,$out\n";
285 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
286 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
287 $code.=" shr \$24,$tmp2\n";
288
289 $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
290 $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
291 $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
292
293 $code.=" and \$0x0000ff00,$tmp0\n";
294 $code.=" and \$0x00ff0000,$tmp1\n";
295 $code.=" and \$0xff000000,$tmp2\n";
296
297 $code.=" xor $tmp0,$out\n";
298 $code.=" mov $t0,$s[1]\n" if ($i==3);
299 $code.=" xor $tmp1,$out\n";
300 $code.=" mov $t1,$s[2]\n" if ($i==3);
301 $code.=" xor $tmp2,$out\n";
302 $code.=" mov $t2,$s[3]\n" if ($i==3);
303 $code.="\n";
304}
305
306$code.=<<___;
307.type _x86_64_AES_encrypt,\@abi-omnipotent
308.align 16
309_x86_64_AES_encrypt:
310 xor 0($key),$s0 # xor with key
311 xor 4($key),$s1
312 xor 8($key),$s2
313 xor 12($key),$s3
314
315 mov 240($key),$rnds # load key->rounds
316 sub \$1,$rnds
317 jmp .Lenc_loop
318.align 16
319.Lenc_loop:
320___
321 if ($verticalspin) { &encvert(); }
322 else { &encstep(0,$s0,$s1,$s2,$s3);
323 &encstep(1,$s1,$s2,$s3,$s0);
324 &encstep(2,$s2,$s3,$s0,$s1);
325 &encstep(3,$s3,$s0,$s1,$s2);
326 }
327$code.=<<___;
328 sub \$1,$rnds
329 jnz .Lenc_loop
330___
331 if ($verticalspin) { &enclastvert(); }
332 else { &enclast(0,$s0,$s1,$s2,$s3);
333 &enclast(1,$s1,$s2,$s3,$s0);
334 &enclast(2,$s2,$s3,$s0,$s1);
335 &enclast(3,$s3,$s0,$s1,$s2);
336 $code.=<<___;
337 xor 16+0($key),$s0 # xor with key
338 xor 16+4($key),$s1
339 xor 16+8($key),$s2
340 xor 16+12($key),$s3
341___
342 }
343$code.=<<___;
344 .byte 0xf3,0xc3 # rep ret
345.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
346___
347
348# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
349$code.=<<___;
350.globl AES_encrypt
351.type AES_encrypt,\@function,3
352.align 16
353AES_encrypt:
354 push %rbx
355 push %rbp
356 push %r12
357 push %r13
358 push %r14
359 push %r15
360
361 mov %rdx,$key
362 mov %rdi,$inp
363 mov %rsi,$out
364
365 .picmeup $sbox
366 lea AES_Te-.($sbox),$sbox
367
368 mov 0($inp),$s0
369 mov 4($inp),$s1
370 mov 8($inp),$s2
371 mov 12($inp),$s3
372
373 call _x86_64_AES_encrypt
374
375 mov $s0,0($out)
376 mov $s1,4($out)
377 mov $s2,8($out)
378 mov $s3,12($out)
379
380 pop %r15
381 pop %r14
382 pop %r13
383 pop %r12
384 pop %rbp
385 pop %rbx
386 ret
387.size AES_encrypt,.-AES_encrypt
388___
389
390#------------------------------------------------------------------#
391
392sub decvert()
393{ my $t3="%r8d"; # zaps $inp!
394
395$code.=<<___;
396 # favor 3-way issue Opteron pipeline...
397 movzb `&lo("$s0")`,$acc0
398 movzb `&lo("$s1")`,$acc1
399 movzb `&lo("$s2")`,$acc2
400 mov 0($sbox,$acc0,8),$t0
401 mov 0($sbox,$acc1,8),$t1
402 mov 0($sbox,$acc2,8),$t2
403
404 movzb `&hi("$s3")`,$acc0
405 movzb `&hi("$s0")`,$acc1
406 movzb `&lo("$s3")`,$acc2
407 xor 3($sbox,$acc0,8),$t0
408 xor 3($sbox,$acc1,8),$t1
409 mov 0($sbox,$acc2,8),$t3
410
411 movzb `&hi("$s1")`,$acc0
412 shr \$16,$s0
413 movzb `&hi("$s2")`,$acc2
414 xor 3($sbox,$acc0,8),$t2
415 shr \$16,$s3
416 xor 3($sbox,$acc2,8),$t3
417
418 shr \$16,$s1
419 lea 16($key),$key
420 shr \$16,$s2
421
422 movzb `&lo("$s2")`,$acc0
423 movzb `&lo("$s3")`,$acc1
424 movzb `&lo("$s0")`,$acc2
425 xor 2($sbox,$acc0,8),$t0
426 xor 2($sbox,$acc1,8),$t1
427 xor 2($sbox,$acc2,8),$t2
428
429 movzb `&hi("$s1")`,$acc0
430 movzb `&hi("$s2")`,$acc1
431 movzb `&lo("$s1")`,$acc2
432 xor 1($sbox,$acc0,8),$t0
433 xor 1($sbox,$acc1,8),$t1
434 xor 2($sbox,$acc2,8),$t3
435
436 movzb `&hi("$s3")`,$acc0
437 mov 12($key),$s3
438 movzb `&hi("$s0")`,$acc2
439 xor 1($sbox,$acc0,8),$t2
440 mov 0($key),$s0
441 xor 1($sbox,$acc2,8),$t3
442
443 xor $t0,$s0
444 mov 4($key),$s1
445 mov 8($key),$s2
446 xor $t2,$s2
447 xor $t1,$s1
448 xor $t3,$s3
449___
450}
451
452sub declastvert()
453{ my $t3="%r8d"; # zaps $inp!
454
455$code.=<<___;
456 movzb `&lo("$s0")`,$acc0
457 movzb `&lo("$s1")`,$acc1
458 movzb `&lo("$s2")`,$acc2
459 movzb 2048($sbox,$acc0,1),$t0
460 movzb 2048($sbox,$acc1,1),$t1
461 movzb 2048($sbox,$acc2,1),$t2
462
463 movzb `&lo("$s3")`,$acc0
464 movzb `&hi("$s3")`,$acc1
465 movzb `&hi("$s0")`,$acc2
466 movzb 2048($sbox,$acc0,1),$t3
467 movzb 2048($sbox,$acc1,1),$acc1 #$t0
468 movzb 2048($sbox,$acc2,1),$acc2 #$t1
469
470 shl \$8,$acc1
471 shl \$8,$acc2
472
473 xor $acc1,$t0
474 xor $acc2,$t1
475 shr \$16,$s3
476
477 movzb `&hi("$s1")`,$acc0
478 movzb `&hi("$s2")`,$acc1
479 shr \$16,$s0
480 movzb 2048($sbox,$acc0,1),$acc0 #$t2
481 movzb 2048($sbox,$acc1,1),$acc1 #$t3
482
483 shl \$8,$acc0
484 shl \$8,$acc1
485 shr \$16,$s1
486 xor $acc0,$t2
487 xor $acc1,$t3
488 shr \$16,$s2
489
490 movzb `&lo("$s2")`,$acc0
491 movzb `&lo("$s3")`,$acc1
492 movzb `&lo("$s0")`,$acc2
493 movzb 2048($sbox,$acc0,1),$acc0 #$t0
494 movzb 2048($sbox,$acc1,1),$acc1 #$t1
495 movzb 2048($sbox,$acc2,1),$acc2 #$t2
496
497 shl \$16,$acc0
498 shl \$16,$acc1
499 shl \$16,$acc2
500
501 xor $acc0,$t0
502 xor $acc1,$t1
503 xor $acc2,$t2
504
505 movzb `&lo("$s1")`,$acc0
506 movzb `&hi("$s1")`,$acc1
507 movzb `&hi("$s2")`,$acc2
508 movzb 2048($sbox,$acc0,1),$acc0 #$t3
509 movzb 2048($sbox,$acc1,1),$acc1 #$t0
510 movzb 2048($sbox,$acc2,1),$acc2 #$t1
511
512 shl \$16,$acc0
513 shl \$24,$acc1
514 shl \$24,$acc2
515
516 xor $acc0,$t3
517 xor $acc1,$t0
518 xor $acc2,$t1
519
520 movzb `&hi("$s3")`,$acc0
521 movzb `&hi("$s0")`,$acc1
522 mov 16+12($key),$s3
523 movzb 2048($sbox,$acc0,1),$acc0 #$t2
524 movzb 2048($sbox,$acc1,1),$acc1 #$t3
525 mov 16+0($key),$s0
526
527 shl \$24,$acc0
528 shl \$24,$acc1
529
530 xor $acc0,$t2
531 xor $acc1,$t3
532
533 mov 16+4($key),$s1
534 mov 16+8($key),$s2
535 xor $t0,$s0
536 xor $t1,$s1
537 xor $t2,$s2
538 xor $t3,$s3
539___
540}
541
542sub decstep()
543{ my ($i,@s) = @_;
544 my $tmp0=$acc0;
545 my $tmp1=$acc1;
546 my $tmp2=$acc2;
547 my $out=($t0,$t1,$t2,$s[0])[$i];
548
549 $code.=" mov $s[0],$out\n" if ($i!=3);
550 $tmp1=$s[2] if ($i==3);
551 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
552 $code.=" and \$0xFF,$out\n";
553
554 $code.=" mov 0($sbox,$out,8),$out\n";
555 $code.=" shr \$16,$tmp1\n";
556 $tmp2=$s[3] if ($i==3);
557 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
558
559 $tmp0=$s[1] if ($i==3);
560 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
561 $code.=" and \$0xFF,$tmp1\n";
562 $code.=" shr \$24,$tmp2\n";
563
564 $code.=" xor 3($sbox,$tmp0,8),$out\n";
565 $code.=" xor 2($sbox,$tmp1,8),$out\n";
566 $code.=" xor 1($sbox,$tmp2,8),$out\n";
567
568 $code.=" mov $t2,$s[1]\n" if ($i==3);
569 $code.=" mov $t1,$s[2]\n" if ($i==3);
570 $code.=" mov $t0,$s[3]\n" if ($i==3);
571 $code.="\n";
572}
573
574sub declast()
575{ my ($i,@s)=@_;
576 my $tmp0=$acc0;
577 my $tmp1=$acc1;
578 my $tmp2=$acc2;
579 my $out=($t0,$t1,$t2,$s[0])[$i];
580
581 $code.=" mov $s[0],$out\n" if ($i!=3);
582 $tmp1=$s[2] if ($i==3);
583 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
584 $code.=" and \$0xFF,$out\n";
585
586 $code.=" movzb 2048($sbox,$out,1),$out\n";
587 $code.=" shr \$16,$tmp1\n";
588 $tmp2=$s[3] if ($i==3);
589 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
590
591 $tmp0=$s[1] if ($i==3);
592 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
593 $code.=" and \$0xFF,$tmp1\n";
594 $code.=" shr \$24,$tmp2\n";
595
596 $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
597 $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
598 $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
599
600 $code.=" shl \$8,$tmp0\n";
601 $code.=" shl \$16,$tmp1\n";
602 $code.=" shl \$24,$tmp2\n";
603
604 $code.=" xor $tmp0,$out\n";
605 $code.=" mov $t2,$s[1]\n" if ($i==3);
606 $code.=" xor $tmp1,$out\n";
607 $code.=" mov $t1,$s[2]\n" if ($i==3);
608 $code.=" xor $tmp2,$out\n";
609 $code.=" mov $t0,$s[3]\n" if ($i==3);
610 $code.="\n";
611}
612
613$code.=<<___;
614.type _x86_64_AES_decrypt,\@abi-omnipotent
615.align 16
616_x86_64_AES_decrypt:
617 xor 0($key),$s0 # xor with key
618 xor 4($key),$s1
619 xor 8($key),$s2
620 xor 12($key),$s3
621
622 mov 240($key),$rnds # load key->rounds
623 sub \$1,$rnds
624 jmp .Ldec_loop
625.align 16
626.Ldec_loop:
627___
628 if ($verticalspin) { &decvert(); }
629 else { &decstep(0,$s0,$s3,$s2,$s1);
630 &decstep(1,$s1,$s0,$s3,$s2);
631 &decstep(2,$s2,$s1,$s0,$s3);
632 &decstep(3,$s3,$s2,$s1,$s0);
633 $code.=<<___;
634 lea 16($key),$key
635 xor 0($key),$s0 # xor with key
636 xor 4($key),$s1
637 xor 8($key),$s2
638 xor 12($key),$s3
639___
640 }
641$code.=<<___;
642 sub \$1,$rnds
643 jnz .Ldec_loop
644___
645 if ($verticalspin) { &declastvert(); }
646 else { &declast(0,$s0,$s3,$s2,$s1);
647 &declast(1,$s1,$s0,$s3,$s2);
648 &declast(2,$s2,$s1,$s0,$s3);
649 &declast(3,$s3,$s2,$s1,$s0);
650 $code.=<<___;
651 xor 16+0($key),$s0 # xor with key
652 xor 16+4($key),$s1
653 xor 16+8($key),$s2
654 xor 16+12($key),$s3
655___
656 }
657$code.=<<___;
658 .byte 0xf3,0xc3 # rep ret
659.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
660___
661
662# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
663$code.=<<___;
664.globl AES_decrypt
665.type AES_decrypt,\@function,3
666.align 16
667AES_decrypt:
668 push %rbx
669 push %rbp
670 push %r12
671 push %r13
672 push %r14
673 push %r15
674
675 mov %rdx,$key
676 mov %rdi,$inp
677 mov %rsi,$out
678
679 .picmeup $sbox
680 lea AES_Td-.($sbox),$sbox
681
682 # prefetch Td4
683 lea 2048+128($sbox),$sbox;
684 mov 0-128($sbox),$s0
685 mov 32-128($sbox),$s1
686 mov 64-128($sbox),$s2
687 mov 96-128($sbox),$s3
688 mov 128-128($sbox),$s0
689 mov 160-128($sbox),$s1
690 mov 192-128($sbox),$s2
691 mov 224-128($sbox),$s3
692 lea -2048-128($sbox),$sbox;
693
694 mov 0($inp),$s0
695 mov 4($inp),$s1
696 mov 8($inp),$s2
697 mov 12($inp),$s3
698
699 call _x86_64_AES_decrypt
700
701 mov $s0,0($out)
702 mov $s1,4($out)
703 mov $s2,8($out)
704 mov $s3,12($out)
705
706 pop %r15
707 pop %r14
708 pop %r13
709 pop %r12
710 pop %rbp
711 pop %rbx
712 ret
713.size AES_decrypt,.-AES_decrypt
714___
715#------------------------------------------------------------------#
716
717sub enckey()
718{
719$code.=<<___;
720 movz %dl,%esi # rk[i]>>0
721 mov 2(%rbp,%rsi,8),%ebx
722 movz %dh,%esi # rk[i]>>8
723 and \$0xFF000000,%ebx
724 xor %ebx,%eax
725
726 mov 2(%rbp,%rsi,8),%ebx
727 shr \$16,%edx
728 and \$0x000000FF,%ebx
729 movz %dl,%esi # rk[i]>>16
730 xor %ebx,%eax
731
732 mov 0(%rbp,%rsi,8),%ebx
733 movz %dh,%esi # rk[i]>>24
734 and \$0x0000FF00,%ebx
735 xor %ebx,%eax
736
737 mov 0(%rbp,%rsi,8),%ebx
738 and \$0x00FF0000,%ebx
739 xor %ebx,%eax
740
741 xor 2048(%rbp,%rcx,4),%eax # rcon
742___
743}
744
745# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
746# AES_KEY *key)
747$code.=<<___;
748.globl AES_set_encrypt_key
749.type AES_set_encrypt_key,\@function,3
750.align 16
751AES_set_encrypt_key:
752 push %rbx
753 push %rbp
754
755 mov %esi,%ecx # %ecx=bits
756 mov %rdi,%rsi # %rsi=userKey
757 mov %rdx,%rdi # %rdi=key
758
759 test \$-1,%rsi
760 jz .Lbadpointer
761 test \$-1,%rdi
762 jz .Lbadpointer
763
764 .picmeup %rbp
765 lea AES_Te-.(%rbp),%rbp
766
767 cmp \$128,%ecx
768 je .L10rounds
769 cmp \$192,%ecx
770 je .L12rounds
771 cmp \$256,%ecx
772 je .L14rounds
773 mov \$-2,%rax # invalid number of bits
774 jmp .Lexit
775
776.L10rounds:
777 mov 0(%rsi),%eax # copy first 4 dwords
778 mov 4(%rsi),%ebx
779 mov 8(%rsi),%ecx
780 mov 12(%rsi),%edx
781 mov %eax,0(%rdi)
782 mov %ebx,4(%rdi)
783 mov %ecx,8(%rdi)
784 mov %edx,12(%rdi)
785
786 xor %ecx,%ecx
787 jmp .L10shortcut
788.align 4
789.L10loop:
790 mov 0(%rdi),%eax # rk[0]
791 mov 12(%rdi),%edx # rk[3]
792.L10shortcut:
793___
794 &enckey ();
795$code.=<<___;
796 mov %eax,16(%rdi) # rk[4]
797 xor 4(%rdi),%eax
798 mov %eax,20(%rdi) # rk[5]
799 xor 8(%rdi),%eax
800 mov %eax,24(%rdi) # rk[6]
801 xor 12(%rdi),%eax
802 mov %eax,28(%rdi) # rk[7]
803 add \$1,%ecx
804 lea 16(%rdi),%rdi
805 cmp \$10,%ecx
806 jl .L10loop
807
808 movl \$10,80(%rdi) # setup number of rounds
809 xor %rax,%rax
810 jmp .Lexit
811
812.L12rounds:
813 mov 0(%rsi),%eax # copy first 6 dwords
814 mov 4(%rsi),%ebx
815 mov 8(%rsi),%ecx
816 mov 12(%rsi),%edx
817 mov %eax,0(%rdi)
818 mov %ebx,4(%rdi)
819 mov %ecx,8(%rdi)
820 mov %edx,12(%rdi)
821 mov 16(%rsi),%ecx
822 mov 20(%rsi),%edx
823 mov %ecx,16(%rdi)
824 mov %edx,20(%rdi)
825
826 xor %ecx,%ecx
827 jmp .L12shortcut
828.align 4
829.L12loop:
830 mov 0(%rdi),%eax # rk[0]
831 mov 20(%rdi),%edx # rk[5]
832.L12shortcut:
833___
834 &enckey ();
835$code.=<<___;
836 mov %eax,24(%rdi) # rk[6]
837 xor 4(%rdi),%eax
838 mov %eax,28(%rdi) # rk[7]
839 xor 8(%rdi),%eax
840 mov %eax,32(%rdi) # rk[8]
841 xor 12(%rdi),%eax
842 mov %eax,36(%rdi) # rk[9]
843
844 cmp \$7,%ecx
845 je .L12break
846 add \$1,%ecx
847
848 xor 16(%rdi),%eax
849 mov %eax,40(%rdi) # rk[10]
850 xor 20(%rdi),%eax
851 mov %eax,44(%rdi) # rk[11]
852
853 lea 24(%rdi),%rdi
854 jmp .L12loop
855.L12break:
856 movl \$12,72(%rdi) # setup number of rounds
857 xor %rax,%rax
858 jmp .Lexit
859
860.L14rounds:
861 mov 0(%rsi),%eax # copy first 8 dwords
862 mov 4(%rsi),%ebx
863 mov 8(%rsi),%ecx
864 mov 12(%rsi),%edx
865 mov %eax,0(%rdi)
866 mov %ebx,4(%rdi)
867 mov %ecx,8(%rdi)
868 mov %edx,12(%rdi)
869 mov 16(%rsi),%eax
870 mov 20(%rsi),%ebx
871 mov 24(%rsi),%ecx
872 mov 28(%rsi),%edx
873 mov %eax,16(%rdi)
874 mov %ebx,20(%rdi)
875 mov %ecx,24(%rdi)
876 mov %edx,28(%rdi)
877
878 xor %ecx,%ecx
879 jmp .L14shortcut
880.align 4
881.L14loop:
882 mov 28(%rdi),%edx # rk[4]
883.L14shortcut:
884 mov 0(%rdi),%eax # rk[0]
885___
886 &enckey ();
887$code.=<<___;
888 mov %eax,32(%rdi) # rk[8]
889 xor 4(%rdi),%eax
890 mov %eax,36(%rdi) # rk[9]
891 xor 8(%rdi),%eax
892 mov %eax,40(%rdi) # rk[10]
893 xor 12(%rdi),%eax
894 mov %eax,44(%rdi) # rk[11]
895
896 cmp \$6,%ecx
897 je .L14break
898 add \$1,%ecx
899
900 mov %eax,%edx
901 mov 16(%rdi),%eax # rk[4]
902 movz %dl,%esi # rk[11]>>0
903 mov 2(%rbp,%rsi,8),%ebx
904 movz %dh,%esi # rk[11]>>8
905 and \$0x000000FF,%ebx
906 xor %ebx,%eax
907
908 mov 0(%rbp,%rsi,8),%ebx
909 shr \$16,%edx
910 and \$0x0000FF00,%ebx
911 movz %dl,%esi # rk[11]>>16
912 xor %ebx,%eax
913
914 mov 0(%rbp,%rsi,8),%ebx
915 movz %dh,%esi # rk[11]>>24
916 and \$0x00FF0000,%ebx
917 xor %ebx,%eax
918
919 mov 2(%rbp,%rsi,8),%ebx
920 and \$0xFF000000,%ebx
921 xor %ebx,%eax
922
923 mov %eax,48(%rdi) # rk[12]
924 xor 20(%rdi),%eax
925 mov %eax,52(%rdi) # rk[13]
926 xor 24(%rdi),%eax
927 mov %eax,56(%rdi) # rk[14]
928 xor 28(%rdi),%eax
929 mov %eax,60(%rdi) # rk[15]
930
931 lea 32(%rdi),%rdi
932 jmp .L14loop
933.L14break:
934 movl \$14,48(%rdi) # setup number of rounds
935 xor %rax,%rax
936 jmp .Lexit
937
938.Lbadpointer:
939 mov \$-1,%rax
940.Lexit:
941 pop %rbp
942 pop %rbx
943 ret
944.size AES_set_encrypt_key,.-AES_set_encrypt_key
945___
946
947sub deckey()
948{ my ($i,$ptr,$te,$td) = @_;
949$code.=<<___;
950 mov $i($ptr),%eax
951 mov %eax,%edx
952 movz %ah,%ebx
953 shr \$16,%edx
954 and \$0xFF,%eax
955 movzb 2($te,%rax,8),%rax
956 movzb 2($te,%rbx,8),%rbx
957 mov 0($td,%rax,8),%eax
958 xor 3($td,%rbx,8),%eax
959 movzb %dh,%ebx
960 and \$0xFF,%edx
961 movzb 2($te,%rdx,8),%rdx
962 movzb 2($te,%rbx,8),%rbx
963 xor 2($td,%rdx,8),%eax
964 xor 1($td,%rbx,8),%eax
965 mov %eax,$i($ptr)
966___
967}
968
969# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
970# AES_KEY *key)
971$code.=<<___;
972.globl AES_set_decrypt_key
973.type AES_set_decrypt_key,\@function,3
974.align 16
975AES_set_decrypt_key:
976 push %rdx
977 call AES_set_encrypt_key
978 cmp \$0,%eax
979 je .Lproceed
980 lea 24(%rsp),%rsp
981 ret
982.Lproceed:
983 mov (%rsp),%r8 # restore key schedule
984 mov %rbx,(%rsp)
985
986 mov 240(%r8),%ecx # pull number of rounds
987 xor %rdi,%rdi
988 lea (%rdi,%rcx,4),%rcx
989 mov %r8,%rsi
990 lea (%r8,%rcx,4),%rdi # pointer to last chunk
991.align 4
992.Linvert:
993 mov 0(%rsi),%rax
994 mov 8(%rsi),%rbx
995 mov 0(%rdi),%rcx
996 mov 8(%rdi),%rdx
997 mov %rax,0(%rdi)
998 mov %rbx,8(%rdi)
999 mov %rcx,0(%rsi)
1000 mov %rdx,8(%rsi)
1001 lea 16(%rsi),%rsi
1002 lea -16(%rdi),%rdi
1003 cmp %rsi,%rdi
1004 jne .Linvert
1005
1006 .picmeup %r9
1007 lea AES_Td-.(%r9),%rdi
1008 lea AES_Te-AES_Td(%rdi),%r9
1009
1010 mov %r8,%rsi
1011 mov 240(%r8),%ecx # pull number of rounds
1012 sub \$1,%ecx
1013.align 4
1014.Lpermute:
1015 lea 16(%rsi),%rsi
1016___
1017 &deckey (0,"%rsi","%r9","%rdi");
1018 &deckey (4,"%rsi","%r9","%rdi");
1019 &deckey (8,"%rsi","%r9","%rdi");
1020 &deckey (12,"%rsi","%r9","%rdi");
1021$code.=<<___;
1022 sub \$1,%ecx
1023 jnz .Lpermute
1024
1025 xor %rax,%rax
1026 pop %rbx
1027 ret
1028.size AES_set_decrypt_key,.-AES_set_decrypt_key
1029___
1030
1031# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
1032# size_t length, const AES_KEY *key,
1033# unsigned char *ivp,const int enc);
1034{
1035# stack frame layout
1036# -8(%rsp) return address
1037my $_rsp="0(%rsp)"; # saved %rsp
1038my $_len="8(%rsp)"; # copy of 3rd parameter, length
1039my $_key="16(%rsp)"; # copy of 4th parameter, key
1040my $_ivp="24(%rsp)"; # copy of 5th parameter, ivp
1041my $keyp="32(%rsp)"; # one to pass as $key
1042my $ivec="40(%rsp)"; # ivec[16]
1043my $aes_key="56(%rsp)"; # copy of aes_key
1044my $mark="56+240(%rsp)"; # copy of aes_key->rounds
1045
1046$code.=<<___;
1047.globl AES_cbc_encrypt
1048.type AES_cbc_encrypt,\@function,6
1049.align 16
1050AES_cbc_encrypt:
1051 cmp \$0,%rdx # check length
1052 je .Lcbc_just_ret
1053 push %rbx
1054 push %rbp
1055 push %r12
1056 push %r13
1057 push %r14
1058 push %r15
1059 pushfq
1060 cld
1061 mov %r9d,%r9d # clear upper half of enc
1062
1063 .picmeup $sbox
1064.Lcbc_pic_point:
1065
1066 cmp \$0,%r9
1067 je .LDECRYPT
1068
1069 lea AES_Te-.Lcbc_pic_point($sbox),$sbox
1070
1071 # allocate aligned stack frame...
1072 lea -64-248(%rsp),$key
1073 and \$-64,$key
1074
1075 # ... and make it doesn't alias with AES_Te modulo 4096
1076 mov $sbox,%r10
1077 lea 2048($sbox),%r11
1078 mov $key,%r12
1079 and \$0xFFF,%r10 # s = $sbox&0xfff
1080 and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
1081 and \$0xFFF,%r12 # p = %rsp&0xfff
1082
1083 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1084 jb .Lcbc_te_break_out
1085 sub %r11,%r12
1086 sub %r12,$key
1087 jmp .Lcbc_te_ok
1088.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
1089 sub %r10,%r12
1090 and \$0xFFF,%r12
1091 add \$320,%r12
1092 sub %r12,$key
1093.align 4
1094.Lcbc_te_ok:
1095
1096 xchg %rsp,$key
1097 add \$8,%rsp # reserve for return address!
1098 mov $key,$_rsp # save %rsp
1099 mov %rdx,$_len # save copy of len
1100 mov %rcx,$_key # save copy of key
1101 mov %r8,$_ivp # save copy of ivp
1102 movl \$0,$mark # copy of aes_key->rounds = 0;
1103 mov %r8,%rbp # rearrange input arguments
1104 mov %rsi,$out
1105 mov %rdi,$inp
1106 mov %rcx,$key
1107
1108 # do we copy key schedule to stack?
1109 mov $key,%r10
1110 sub $sbox,%r10
1111 and \$0xfff,%r10
1112 cmp \$2048,%r10
1113 jb .Lcbc_do_ecopy
1114 cmp \$4096-248,%r10
1115 jb .Lcbc_skip_ecopy
1116.align 4
1117.Lcbc_do_ecopy:
1118 mov $key,%rsi
1119 lea $aes_key,%rdi
1120 lea $aes_key,$key
1121 mov \$240/8,%ecx
1122 .long 0x90A548F3 # rep movsq
1123 mov (%rsi),%eax # copy aes_key->rounds
1124 mov %eax,(%rdi)
1125.Lcbc_skip_ecopy:
1126 mov $key,$keyp # save key pointer
1127
1128 mov \$16,%ecx
1129.align 4
1130.Lcbc_prefetch_te:
1131 mov 0($sbox),%r10
1132 mov 32($sbox),%r11
1133 mov 64($sbox),%r12
1134 mov 96($sbox),%r13
1135 lea 128($sbox),$sbox
1136 sub \$1,%ecx
1137 jnz .Lcbc_prefetch_te
1138 sub \$2048,$sbox
1139
1140 test \$-16,%rdx # check upon length
1141 mov %rdx,%r10
1142 mov 0(%rbp),$s0 # load iv
1143 mov 4(%rbp),$s1
1144 mov 8(%rbp),$s2
1145 mov 12(%rbp),$s3
1146 jz .Lcbc_enc_tail # short input...
1147
1148.align 4
1149.Lcbc_enc_loop:
1150 xor 0($inp),$s0
1151 xor 4($inp),$s1
1152 xor 8($inp),$s2
1153 xor 12($inp),$s3
1154 mov $inp,$ivec # if ($verticalspin) save inp
1155
1156 mov $keyp,$key # restore key
1157 call _x86_64_AES_encrypt
1158
1159 mov $ivec,$inp # if ($verticalspin) restore inp
1160 mov $s0,0($out)
1161 mov $s1,4($out)
1162 mov $s2,8($out)
1163 mov $s3,12($out)
1164
1165 mov $_len,%r10
1166 lea 16($inp),$inp
1167 lea 16($out),$out
1168 sub \$16,%r10
1169 test \$-16,%r10
1170 mov %r10,$_len
1171 jnz .Lcbc_enc_loop
1172 test \$15,%r10
1173 jnz .Lcbc_enc_tail
1174 mov $_ivp,%rbp # restore ivp
1175 mov $s0,0(%rbp) # save ivec
1176 mov $s1,4(%rbp)
1177 mov $s2,8(%rbp)
1178 mov $s3,12(%rbp)
1179
1180.align 4
1181.Lcbc_cleanup:
1182 cmpl \$0,$mark # was the key schedule copied?
1183 lea $aes_key,%rdi
1184 mov $_rsp,%rsp
1185 je .Lcbc_exit
1186 mov \$240/8,%ecx
1187 xor %rax,%rax
1188 .long 0x90AB48F3 # rep stosq
1189.Lcbc_exit:
1190 popfq
1191 pop %r15
1192 pop %r14
1193 pop %r13
1194 pop %r12
1195 pop %rbp
1196 pop %rbx
1197.Lcbc_just_ret:
1198 ret
1199.align 4
1200.Lcbc_enc_tail:
1201 cmp $inp,$out
1202 je .Lcbc_enc_in_place
1203 mov %r10,%rcx
1204 mov $inp,%rsi
1205 mov $out,%rdi
1206 .long 0xF689A4F3 # rep movsb
1207.Lcbc_enc_in_place:
1208 mov \$16,%rcx # zero tail
1209 sub %r10,%rcx
1210 xor %rax,%rax
1211 .long 0xF689AAF3 # rep stosb
1212 mov $out,$inp # this is not a mistake!
1213 movq \$16,$_len # len=16
1214 jmp .Lcbc_enc_loop # one more spin...
1215#----------------------------- DECRYPT -----------------------------#
1216.align 16
1217.LDECRYPT:
1218 lea AES_Td-.Lcbc_pic_point($sbox),$sbox
1219
1220 # allocate aligned stack frame...
1221 lea -64-248(%rsp),$key
1222 and \$-64,$key
1223
1224 # ... and make it doesn't alias with AES_Td modulo 4096
1225 mov $sbox,%r10
1226 lea 2304($sbox),%r11
1227 mov $key,%r12
1228 and \$0xFFF,%r10 # s = $sbox&0xfff
1229 and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff
1230 and \$0xFFF,%r12 # p = %rsp&0xfff
1231
1232 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1233 jb .Lcbc_td_break_out
1234 sub %r11,%r12
1235 sub %r12,$key
1236 jmp .Lcbc_td_ok
1237.Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz
1238 sub %r10,%r12
1239 and \$0xFFF,%r12
1240 add \$320,%r12
1241 sub %r12,$key
1242.align 4
1243.Lcbc_td_ok:
1244
1245 xchg %rsp,$key
1246 add \$8,%rsp # reserve for return address!
1247 mov $key,$_rsp # save %rsp
1248 mov %rdx,$_len # save copy of len
1249 mov %rcx,$_key # save copy of key
1250 mov %r8,$_ivp # save copy of ivp
1251 movl \$0,$mark # copy of aes_key->rounds = 0;
1252 mov %r8,%rbp # rearrange input arguments
1253 mov %rsi,$out
1254 mov %rdi,$inp
1255 mov %rcx,$key
1256
1257 # do we copy key schedule to stack?
1258 mov $key,%r10
1259 sub $sbox,%r10
1260 and \$0xfff,%r10
1261 cmp \$2304,%r10
1262 jb .Lcbc_do_dcopy
1263 cmp \$4096-248,%r10
1264 jb .Lcbc_skip_dcopy
1265.align 4
1266.Lcbc_do_dcopy:
1267 mov $key,%rsi
1268 lea $aes_key,%rdi
1269 lea $aes_key,$key
1270 mov \$240/8,%ecx
1271 .long 0x90A548F3 # rep movsq
1272 mov (%rsi),%eax # copy aes_key->rounds
1273 mov %eax,(%rdi)
1274.Lcbc_skip_dcopy:
1275 mov $key,$keyp # save key pointer
1276
1277 mov \$18,%ecx
1278.align 4
1279.Lcbc_prefetch_td:
1280 mov 0($sbox),%r10
1281 mov 32($sbox),%r11
1282 mov 64($sbox),%r12
1283 mov 96($sbox),%r13
1284 lea 128($sbox),$sbox
1285 sub \$1,%ecx
1286 jnz .Lcbc_prefetch_td
1287 sub \$2304,$sbox
1288
1289 cmp $inp,$out
1290 je .Lcbc_dec_in_place
1291
1292 mov %rbp,$ivec
1293.align 4
1294.Lcbc_dec_loop:
1295 mov 0($inp),$s0 # read input
1296 mov 4($inp),$s1
1297 mov 8($inp),$s2
1298 mov 12($inp),$s3
1299 mov $inp,8+$ivec # if ($verticalspin) save inp
1300
1301 mov $keyp,$key # restore key
1302 call _x86_64_AES_decrypt
1303
1304 mov $ivec,%rbp # load ivp
1305 mov 8+$ivec,$inp # if ($verticalspin) restore inp
1306 xor 0(%rbp),$s0 # xor iv
1307 xor 4(%rbp),$s1
1308 xor 8(%rbp),$s2
1309 xor 12(%rbp),$s3
1310 mov $inp,%rbp # current input, next iv
1311
1312 mov $_len,%r10 # load len
1313 sub \$16,%r10
1314 jc .Lcbc_dec_partial
1315 mov %r10,$_len # update len
1316 mov %rbp,$ivec # update ivp
1317
1318 mov $s0,0($out) # write output
1319 mov $s1,4($out)
1320 mov $s2,8($out)
1321 mov $s3,12($out)
1322
1323 lea 16($inp),$inp
1324 lea 16($out),$out
1325 jnz .Lcbc_dec_loop
1326.Lcbc_dec_end:
1327 mov $_ivp,%r12 # load user ivp
1328 mov 0(%rbp),%r10 # load iv
1329 mov 8(%rbp),%r11
1330 mov %r10,0(%r12) # copy back to user
1331 mov %r11,8(%r12)
1332 jmp .Lcbc_cleanup
1333
1334.align 4
1335.Lcbc_dec_partial:
1336 mov $s0,0+$ivec # dump output to stack
1337 mov $s1,4+$ivec
1338 mov $s2,8+$ivec
1339 mov $s3,12+$ivec
1340 mov $out,%rdi
1341 lea $ivec,%rsi
1342 mov \$16,%rcx
1343 add %r10,%rcx # number of bytes to copy
1344 .long 0xF689A4F3 # rep movsb
1345 jmp .Lcbc_dec_end
1346
1347.align 16
1348.Lcbc_dec_in_place:
1349 mov 0($inp),$s0 # load input
1350 mov 4($inp),$s1
1351 mov 8($inp),$s2
1352 mov 12($inp),$s3
1353
1354 mov $inp,$ivec # if ($verticalspin) save inp
1355 mov $keyp,$key
1356 call _x86_64_AES_decrypt
1357
1358 mov $ivec,$inp # if ($verticalspin) restore inp
1359 mov $_ivp,%rbp
1360 xor 0(%rbp),$s0
1361 xor 4(%rbp),$s1
1362 xor 8(%rbp),$s2
1363 xor 12(%rbp),$s3
1364
1365 mov 0($inp),%r10 # copy input to iv
1366 mov 8($inp),%r11
1367 mov %r10,0(%rbp)
1368 mov %r11,8(%rbp)
1369
1370 mov $s0,0($out) # save output [zaps input]
1371 mov $s1,4($out)
1372 mov $s2,8($out)
1373 mov $s3,12($out)
1374
1375 mov $_len,%rcx
1376 lea 16($inp),$inp
1377 lea 16($out),$out
1378 sub \$16,%rcx
1379 jc .Lcbc_dec_in_place_partial
1380 mov %rcx,$_len
1381 jnz .Lcbc_dec_in_place
1382 jmp .Lcbc_cleanup
1383
1384.align 4
1385.Lcbc_dec_in_place_partial:
1386 # one can argue if this is actually required
1387 lea ($out,%rcx),%rdi
1388 lea (%rbp,%rcx),%rsi
1389 neg %rcx
1390 .long 0xF689A4F3 # rep movsb # restore tail
1391 jmp .Lcbc_cleanup
1392.size AES_cbc_encrypt,.-AES_cbc_encrypt
1393___
1394}
1395
1396$code.=<<___;
1397.globl AES_Te
1398.align 64
1399AES_Te:
1400___
1401 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
1402 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
1403 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
1404 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
1405 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
1406 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
1407 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
1408 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
1409 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
1410 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
1411 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
1412 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
1413 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
1414 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
1415 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
1416 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
1417 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
1418 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
1419 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
1420 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
1421 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
1422 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
1423 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
1424 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
1425 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
1426 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
1427 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
1428 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
1429 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
1430 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
1431 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
1432 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
1433 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
1434 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
1435 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
1436 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
1437 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
1438 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
1439 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
1440 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
1441 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
1442 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
1443 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
1444 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
1445 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
1446 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
1447 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
1448 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
1449 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
1450 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
1451 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
1452 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
1453 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
1454 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
1455 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
1456 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
1457 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
1458 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
1459 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
1460 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
1461 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
1462 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
1463 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
1464 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
1465#rcon:
1466$code.=<<___;
1467 .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
1468 .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
1469 .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0
1470___
1471$code.=<<___;
1472.globl AES_Td
1473.align 64
1474AES_Td:
1475___
1476 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
1477 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
1478 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
1479 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
1480 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
1481 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
1482 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
1483 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
1484 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
1485 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
1486 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
1487 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
1488 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
1489 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
1490 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
1491 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
1492 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
1493 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
1494 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
1495 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
1496 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
1497 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
1498 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
1499 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
1500 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
1501 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
1502 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
1503 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
1504 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
1505 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
1506 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
1507 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
1508 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
1509 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
1510 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
1511 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
1512 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
1513 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
1514 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
1515 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
1516 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
1517 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
1518 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
1519 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
1520 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
1521 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
1522 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
1523 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
1524 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
1525 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
1526 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
1527 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
1528 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
1529 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
1530 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
1531 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
1532 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
1533 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
1534 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
1535 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
1536 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
1537 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
1538 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
1539 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
1540#Td4:
1541 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1542 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1543 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1544 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1545 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1546 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1547 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1548 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1549 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1550 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1551 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1552 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1553 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1554 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1555 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1556 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1557 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1558 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1559 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1560 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1561 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1562 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1563 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1564 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1565 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1566 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1567 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1568 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1569 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1570 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1571 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1572 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1573
1574$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1575
1576print $code;
1577
1578close STDOUT;