summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes
diff options
context:
space:
mode:
authordjm <>2008-09-06 12:17:54 +0000
committerdjm <>2008-09-06 12:17:54 +0000
commit38ce604e3cc97706b876b0525ddff0121115456d (patch)
tree7ccc28afe1789ea3dbedf72365f955d5b8e105b5 /src/lib/libcrypto/aes
parent12867252827c8efaa8ddd1fa3b3d6e321e2bcdef (diff)
downloadopenbsd-38ce604e3cc97706b876b0525ddff0121115456d.tar.gz
openbsd-38ce604e3cc97706b876b0525ddff0121115456d.tar.bz2
openbsd-38ce604e3cc97706b876b0525ddff0121115456d.zip
resolve conflicts
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r--src/lib/libcrypto/aes/aes.h27
-rw-r--r--src/lib/libcrypto/aes/aes_cbc.c2
-rw-r--r--src/lib/libcrypto/aes/aes_core.c304
-rw-r--r--src/lib/libcrypto/aes/aes_locl.h6
-rw-r--r--src/lib/libcrypto/aes/aes_misc.c2
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl1657
6 files changed, 953 insertions, 1045 deletions
diff --git a/src/lib/libcrypto/aes/aes.h b/src/lib/libcrypto/aes/aes.h
index 8a3ea0b883..baf0222d49 100644
--- a/src/lib/libcrypto/aes/aes.h
+++ b/src/lib/libcrypto/aes/aes.h
@@ -52,7 +52,7 @@
52#ifndef HEADER_AES_H 52#ifndef HEADER_AES_H
53#define HEADER_AES_H 53#define HEADER_AES_H
54 54
55#include <openssl/e_os2.h> 55#include <openssl/opensslconf.h>
56 56
57#ifdef OPENSSL_NO_AES 57#ifdef OPENSSL_NO_AES
58#error AES is disabled. 58#error AES is disabled.
@@ -66,17 +66,17 @@
66#define AES_MAXNR 14 66#define AES_MAXNR 14
67#define AES_BLOCK_SIZE 16 67#define AES_BLOCK_SIZE 16
68 68
69#if defined(OPENSSL_FIPS)
70#define FIPS_AES_SIZE_T int
71#endif
72
73#ifdef __cplusplus 69#ifdef __cplusplus
74extern "C" { 70extern "C" {
75#endif 71#endif
76 72
77/* This should be a hidden type, but EVP requires that the size be known */ 73/* This should be a hidden type, but EVP requires that the size be known */
78struct aes_key_st { 74struct aes_key_st {
75#ifdef AES_LONG
79 unsigned long rd_key[4 *(AES_MAXNR + 1)]; 76 unsigned long rd_key[4 *(AES_MAXNR + 1)];
77#else
78 unsigned int rd_key[4 *(AES_MAXNR + 1)];
79#endif
80 int rounds; 80 int rounds;
81}; 81};
82typedef struct aes_key_st AES_KEY; 82typedef struct aes_key_st AES_KEY;
@@ -119,6 +119,23 @@ void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
119 unsigned char ecount_buf[AES_BLOCK_SIZE], 119 unsigned char ecount_buf[AES_BLOCK_SIZE],
120 unsigned int *num); 120 unsigned int *num);
121 121
122/* For IGE, see also http://www.links.org/files/openssl-ige.pdf */
123/* NB: the IV is _two_ blocks long */
124void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
125 const unsigned long length, const AES_KEY *key,
126 unsigned char *ivec, const int enc);
127/* NB: the IV is _four_ blocks long */
128void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
129 const unsigned long length, const AES_KEY *key,
130 const AES_KEY *key2, const unsigned char *ivec,
131 const int enc);
132
133int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
134 unsigned char *out,
135 const unsigned char *in, unsigned int inlen);
136int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
137 unsigned char *out,
138 const unsigned char *in, unsigned int inlen);
122 139
123#ifdef __cplusplus 140#ifdef __cplusplus
124} 141}
diff --git a/src/lib/libcrypto/aes/aes_cbc.c b/src/lib/libcrypto/aes/aes_cbc.c
index 373864cd4b..d2ba6bcdb4 100644
--- a/src/lib/libcrypto/aes/aes_cbc.c
+++ b/src/lib/libcrypto/aes/aes_cbc.c
@@ -59,7 +59,6 @@
59#include <openssl/aes.h> 59#include <openssl/aes.h>
60#include "aes_locl.h" 60#include "aes_locl.h"
61 61
62#if !defined(OPENSSL_FIPS_AES_ASM)
63void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 62void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
64 const unsigned long length, const AES_KEY *key, 63 const unsigned long length, const AES_KEY *key,
65 unsigned char *ivec, const int enc) { 64 unsigned char *ivec, const int enc) {
@@ -130,4 +129,3 @@ void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
130 } 129 }
131 } 130 }
132} 131}
133#endif
diff --git a/src/lib/libcrypto/aes/aes_core.c b/src/lib/libcrypto/aes/aes_core.c
index ed566a8123..3a80e18b0a 100644
--- a/src/lib/libcrypto/aes/aes_core.c
+++ b/src/lib/libcrypto/aes/aes_core.c
@@ -37,23 +37,19 @@
37 37
38#include <stdlib.h> 38#include <stdlib.h>
39#include <openssl/aes.h> 39#include <openssl/aes.h>
40#include <openssl/fips.h>
41#include "aes_locl.h" 40#include "aes_locl.h"
42 41
43#ifndef OPENSSL_FIPS
44
45/* 42/*
46Te0[x] = S [x].[02, 01, 01, 03]; 43Te0[x] = S [x].[02, 01, 01, 03];
47Te1[x] = S [x].[03, 02, 01, 01]; 44Te1[x] = S [x].[03, 02, 01, 01];
48Te2[x] = S [x].[01, 03, 02, 01]; 45Te2[x] = S [x].[01, 03, 02, 01];
49Te3[x] = S [x].[01, 01, 03, 02]; 46Te3[x] = S [x].[01, 01, 03, 02];
50Te4[x] = S [x].[01, 01, 01, 01];
51 47
52Td0[x] = Si[x].[0e, 09, 0d, 0b]; 48Td0[x] = Si[x].[0e, 09, 0d, 0b];
53Td1[x] = Si[x].[0b, 0e, 09, 0d]; 49Td1[x] = Si[x].[0b, 0e, 09, 0d];
54Td2[x] = Si[x].[0d, 0b, 0e, 09]; 50Td2[x] = Si[x].[0d, 0b, 0e, 09];
55Td3[x] = Si[x].[09, 0d, 0b, 0e]; 51Td3[x] = Si[x].[09, 0d, 0b, 0e];
56Td4[x] = Si[x].[01, 01, 01, 01]; 52Td4[x] = Si[x].[01];
57*/ 53*/
58 54
59static const u32 Te0[256] = { 55static const u32 Te0[256] = {
@@ -255,7 +251,6 @@ static const u32 Te2[256] = {
255 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, 251 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
256}; 252};
257static const u32 Te3[256] = { 253static const u32 Te3[256] = {
258
259 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 254 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
260 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, 255 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
261 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 256 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
@@ -321,72 +316,7 @@ static const u32 Te3[256] = {
321 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 316 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
322 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, 317 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
323}; 318};
324static const u32 Te4[256] = { 319
325 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
326 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
327 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
328 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
329 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
330 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
331 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
332 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
333 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
334 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
335 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
336 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
337 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
338 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
339 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
340 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
341 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
342 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
343 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
344 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
345 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
346 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
347 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
348 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
349 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
350 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
351 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
352 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
353 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
354 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
355 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
356 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
357 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
358 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
359 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
360 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
361 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
362 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
363 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
364 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
365 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
366 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
367 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
368 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
369 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
370 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
371 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
372 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
373 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
374 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
375 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
376 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
377 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
378 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
379 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
380 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
381 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
382 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
383 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
384 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
385 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
386 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
387 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
388 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
389};
390static const u32 Td0[256] = { 320static const u32 Td0[256] = {
391 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 321 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
392 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 322 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
@@ -540,7 +470,6 @@ static const u32 Td2[256] = {
540 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, 470 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
541 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 471 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
542 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, 472 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
543
544 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 473 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
545 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, 474 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
546 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 475 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
@@ -652,71 +581,39 @@ static const u32 Td3[256] = {
652 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 581 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
653 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, 582 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
654}; 583};
655static const u32 Td4[256] = { 584static const u8 Td4[256] = {
656 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 585 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
657 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, 586 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
658 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 587 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
659 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, 588 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
660 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 589 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
661 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, 590 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
662 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 591 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
663 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, 592 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
664 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 593 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
665 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, 594 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
666 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 595 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
667 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, 596 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
668 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 597 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
669 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, 598 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
670 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 599 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
671 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, 600 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
672 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 601 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
673 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, 602 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
674 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 603 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
675 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, 604 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
676 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 605 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
677 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, 606 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
678 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 607 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
679 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, 608 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
680 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 609 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
681 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, 610 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
682 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 611 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
683 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, 612 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
684 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 613 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
685 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, 614 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
686 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 615 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
687 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, 616 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
688 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
689 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
690 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
691 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
692 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
693 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
694 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
695 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
696 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
697 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
698 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
699 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
700 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
701 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
702 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
703 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
704 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
705 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
706 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
707 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
708 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
709 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
710 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
711 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
712 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
713 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
714 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
715 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
716 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
717 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
718 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
719 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
720}; 617};
721static const u32 rcon[] = { 618static const u32 rcon[] = {
722 0x01000000, 0x02000000, 0x04000000, 0x08000000, 619 0x01000000, 0x02000000, 0x04000000, 0x08000000,
@@ -756,10 +653,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
756 while (1) { 653 while (1) {
757 temp = rk[3]; 654 temp = rk[3];
758 rk[4] = rk[0] ^ 655 rk[4] = rk[0] ^
759 (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ 656 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
760 (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ 657 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
761 (Te4[(temp ) & 0xff] & 0x0000ff00) ^ 658 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
762 (Te4[(temp >> 24) ] & 0x000000ff) ^ 659 (Te1[(temp >> 24) ] & 0x000000ff) ^
763 rcon[i]; 660 rcon[i];
764 rk[5] = rk[1] ^ rk[4]; 661 rk[5] = rk[1] ^ rk[4];
765 rk[6] = rk[2] ^ rk[5]; 662 rk[6] = rk[2] ^ rk[5];
@@ -776,10 +673,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
776 while (1) { 673 while (1) {
777 temp = rk[ 5]; 674 temp = rk[ 5];
778 rk[ 6] = rk[ 0] ^ 675 rk[ 6] = rk[ 0] ^
779 (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ 676 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
780 (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ 677 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
781 (Te4[(temp ) & 0xff] & 0x0000ff00) ^ 678 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
782 (Te4[(temp >> 24) ] & 0x000000ff) ^ 679 (Te1[(temp >> 24) ] & 0x000000ff) ^
783 rcon[i]; 680 rcon[i];
784 rk[ 7] = rk[ 1] ^ rk[ 6]; 681 rk[ 7] = rk[ 1] ^ rk[ 6];
785 rk[ 8] = rk[ 2] ^ rk[ 7]; 682 rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -798,10 +695,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
798 while (1) { 695 while (1) {
799 temp = rk[ 7]; 696 temp = rk[ 7];
800 rk[ 8] = rk[ 0] ^ 697 rk[ 8] = rk[ 0] ^
801 (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ 698 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
802 (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ 699 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
803 (Te4[(temp ) & 0xff] & 0x0000ff00) ^ 700 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
804 (Te4[(temp >> 24) ] & 0x000000ff) ^ 701 (Te1[(temp >> 24) ] & 0x000000ff) ^
805 rcon[i]; 702 rcon[i];
806 rk[ 9] = rk[ 1] ^ rk[ 8]; 703 rk[ 9] = rk[ 1] ^ rk[ 8];
807 rk[10] = rk[ 2] ^ rk[ 9]; 704 rk[10] = rk[ 2] ^ rk[ 9];
@@ -811,10 +708,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
811 } 708 }
812 temp = rk[11]; 709 temp = rk[11];
813 rk[12] = rk[ 4] ^ 710 rk[12] = rk[ 4] ^
814 (Te4[(temp >> 24) ] & 0xff000000) ^ 711 (Te2[(temp >> 24) ] & 0xff000000) ^
815 (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ 712 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
816 (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ 713 (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
817 (Te4[(temp ) & 0xff] & 0x000000ff); 714 (Te1[(temp ) & 0xff] & 0x000000ff);
818 rk[13] = rk[ 5] ^ rk[12]; 715 rk[13] = rk[ 5] ^ rk[12];
819 rk[14] = rk[ 6] ^ rk[13]; 716 rk[14] = rk[ 6] ^ rk[13];
820 rk[15] = rk[ 7] ^ rk[14]; 717 rk[15] = rk[ 7] ^ rk[14];
@@ -853,29 +750,30 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
853 for (i = 1; i < (key->rounds); i++) { 750 for (i = 1; i < (key->rounds); i++) {
854 rk += 4; 751 rk += 4;
855 rk[0] = 752 rk[0] =
856 Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ 753 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
857 Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ 754 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
858 Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ 755 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
859 Td3[Te4[(rk[0] ) & 0xff] & 0xff]; 756 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
860 rk[1] = 757 rk[1] =
861 Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ 758 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
862 Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ 759 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
863 Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ 760 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
864 Td3[Te4[(rk[1] ) & 0xff] & 0xff]; 761 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
865 rk[2] = 762 rk[2] =
866 Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ 763 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
867 Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ 764 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
868 Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ 765 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
869 Td3[Te4[(rk[2] ) & 0xff] & 0xff]; 766 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
870 rk[3] = 767 rk[3] =
871 Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ 768 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
872 Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ 769 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
873 Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ 770 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
874 Td3[Te4[(rk[3] ) & 0xff] & 0xff]; 771 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
875 } 772 }
876 return 0; 773 return 0;
877} 774}
878 775
776#ifndef AES_ASM
879/* 777/*
880 * Encrypt a single block 778 * Encrypt a single block
881 * in and out can overlap 779 * in and out can overlap
@@ -1038,31 +936,31 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
1038 * map cipher state to byte array block: 936 * map cipher state to byte array block:
1039 */ 937 */
1040 s0 = 938 s0 =
1041 (Te4[(t0 >> 24) ] & 0xff000000) ^ 939 (Te2[(t0 >> 24) ] & 0xff000000) ^
1042 (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ 940 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
1043 (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ 941 (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
1044 (Te4[(t3 ) & 0xff] & 0x000000ff) ^ 942 (Te1[(t3 ) & 0xff] & 0x000000ff) ^
1045 rk[0]; 943 rk[0];
1046 PUTU32(out , s0); 944 PUTU32(out , s0);
1047 s1 = 945 s1 =
1048 (Te4[(t1 >> 24) ] & 0xff000000) ^ 946 (Te2[(t1 >> 24) ] & 0xff000000) ^
1049 (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ 947 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
1050 (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ 948 (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
1051 (Te4[(t0 ) & 0xff] & 0x000000ff) ^ 949 (Te1[(t0 ) & 0xff] & 0x000000ff) ^
1052 rk[1]; 950 rk[1];
1053 PUTU32(out + 4, s1); 951 PUTU32(out + 4, s1);
1054 s2 = 952 s2 =
1055 (Te4[(t2 >> 24) ] & 0xff000000) ^ 953 (Te2[(t2 >> 24) ] & 0xff000000) ^
1056 (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ 954 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
1057 (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ 955 (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
1058 (Te4[(t1 ) & 0xff] & 0x000000ff) ^ 956 (Te1[(t1 ) & 0xff] & 0x000000ff) ^
1059 rk[2]; 957 rk[2];
1060 PUTU32(out + 8, s2); 958 PUTU32(out + 8, s2);
1061 s3 = 959 s3 =
1062 (Te4[(t3 >> 24) ] & 0xff000000) ^ 960 (Te2[(t3 >> 24) ] & 0xff000000) ^
1063 (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ 961 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
1064 (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ 962 (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
1065 (Te4[(t2 ) & 0xff] & 0x000000ff) ^ 963 (Te1[(t2 ) & 0xff] & 0x000000ff) ^
1066 rk[3]; 964 rk[3];
1067 PUTU32(out + 12, s3); 965 PUTU32(out + 12, s3);
1068} 966}
@@ -1229,33 +1127,33 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
1229 * map cipher state to byte array block: 1127 * map cipher state to byte array block:
1230 */ 1128 */
1231 s0 = 1129 s0 =
1232 (Td4[(t0 >> 24) ] & 0xff000000) ^ 1130 (Td4[(t0 >> 24) ] << 24) ^
1233 (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ 1131 (Td4[(t3 >> 16) & 0xff] << 16) ^
1234 (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ 1132 (Td4[(t2 >> 8) & 0xff] << 8) ^
1235 (Td4[(t1 ) & 0xff] & 0x000000ff) ^ 1133 (Td4[(t1 ) & 0xff]) ^
1236 rk[0]; 1134 rk[0];
1237 PUTU32(out , s0); 1135 PUTU32(out , s0);
1238 s1 = 1136 s1 =
1239 (Td4[(t1 >> 24) ] & 0xff000000) ^ 1137 (Td4[(t1 >> 24) ] << 24) ^
1240 (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ 1138 (Td4[(t0 >> 16) & 0xff] << 16) ^
1241 (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ 1139 (Td4[(t3 >> 8) & 0xff] << 8) ^
1242 (Td4[(t2 ) & 0xff] & 0x000000ff) ^ 1140 (Td4[(t2 ) & 0xff]) ^
1243 rk[1]; 1141 rk[1];
1244 PUTU32(out + 4, s1); 1142 PUTU32(out + 4, s1);
1245 s2 = 1143 s2 =
1246 (Td4[(t2 >> 24) ] & 0xff000000) ^ 1144 (Td4[(t2 >> 24) ] << 24) ^
1247 (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ 1145 (Td4[(t1 >> 16) & 0xff] << 16) ^
1248 (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ 1146 (Td4[(t0 >> 8) & 0xff] << 8) ^
1249 (Td4[(t3 ) & 0xff] & 0x000000ff) ^ 1147 (Td4[(t3 ) & 0xff]) ^
1250 rk[2]; 1148 rk[2];
1251 PUTU32(out + 8, s2); 1149 PUTU32(out + 8, s2);
1252 s3 = 1150 s3 =
1253 (Td4[(t3 >> 24) ] & 0xff000000) ^ 1151 (Td4[(t3 >> 24) ] << 24) ^
1254 (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ 1152 (Td4[(t2 >> 16) & 0xff] << 16) ^
1255 (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ 1153 (Td4[(t1 >> 8) & 0xff] << 8) ^
1256 (Td4[(t0 ) & 0xff] & 0x000000ff) ^ 1154 (Td4[(t0 ) & 0xff]) ^
1257 rk[3]; 1155 rk[3];
1258 PUTU32(out + 12, s3); 1156 PUTU32(out + 12, s3);
1259} 1157}
1260 1158
1261#endif /* ndef OPENSSL_FIPS */ 1159#endif /* AES_ASM */
diff --git a/src/lib/libcrypto/aes/aes_locl.h b/src/lib/libcrypto/aes/aes_locl.h
index 4184729e34..054b442d41 100644
--- a/src/lib/libcrypto/aes/aes_locl.h
+++ b/src/lib/libcrypto/aes/aes_locl.h
@@ -62,7 +62,7 @@
62#include <stdlib.h> 62#include <stdlib.h>
63#include <string.h> 63#include <string.h>
64 64
65#if defined(_MSC_VER) && !defined(_M_IA64) && !defined(OPENSSL_SYS_WINCE) 65#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
66# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) 66# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
67# define GETU32(p) SWAP(*((u32 *)(p))) 67# define GETU32(p) SWAP(*((u32 *)(p)))
68# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); } 68# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
@@ -71,7 +71,11 @@
71# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } 71# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
72#endif 72#endif
73 73
74#ifdef AES_LONG
74typedef unsigned long u32; 75typedef unsigned long u32;
76#else
77typedef unsigned int u32;
78#endif
75typedef unsigned short u16; 79typedef unsigned short u16;
76typedef unsigned char u8; 80typedef unsigned char u8;
77 81
diff --git a/src/lib/libcrypto/aes/aes_misc.c b/src/lib/libcrypto/aes/aes_misc.c
index 090def25d5..4fead1b4c7 100644
--- a/src/lib/libcrypto/aes/aes_misc.c
+++ b/src/lib/libcrypto/aes/aes_misc.c
@@ -53,7 +53,7 @@
53#include <openssl/aes.h> 53#include <openssl/aes.h>
54#include "aes_locl.h" 54#include "aes_locl.h"
55 55
56const char *AES_version="AES" OPENSSL_VERSION_PTEXT; 56const char AES_version[]="AES" OPENSSL_VERSION_PTEXT;
57 57
58const char *AES_options(void) { 58const char *AES_options(void) {
59#ifdef FULL_UNROLL 59#ifdef FULL_UNROLL
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
index 688fda21ff..89fa261794 100644
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -6,7 +6,7 @@
6# forms are granted according to the OpenSSL license. 6# forms are granted according to the OpenSSL license.
7# ==================================================================== 7# ====================================================================
8# 8#
9# Version 2.0. 9# Version 3.6.
10# 10#
11# You might fail to appreciate this module performance from the first 11# You might fail to appreciate this module performance from the first
12# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered 12# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered
@@ -46,43 +46,68 @@
46# Instruction Level Parallelism, and it indeed resulted in up to 15% 46# Instruction Level Parallelism, and it indeed resulted in up to 15%
47# better performance on most recent µ-archs... 47# better performance on most recent µ-archs...
48# 48#
49# Current ECB performance numbers for 128-bit key in cycles per byte 49# Third version adds AES_cbc_encrypt implementation, which resulted in
50# [measure commonly used by AES benchmarkers] are: 50# up to 40% performance imrovement of CBC benchmark results. 40% was
51# observed on P4 core, where "overall" imrovement coefficient, i.e. if
52# compared to PIC generated by GCC and in CBC mode, was observed to be
53# as large as 4x:-) CBC performance is virtually identical to ECB now
54# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
55# Opteron, because certain function prologues and epilogues are
56# effectively taken out of the loop...
51# 57#
52# small footprint fully unrolled 58# Version 3.2 implements compressed tables and prefetch of these tables
53# P4[-3] 23[24] 22[23] 59# in CBC[!] mode. Former means that 3/4 of table references are now
54# AMD K8 19 18 60# misaligned, which unfortunately has negative impact on elder IA-32
55# PIII 26(*) 23 61# implementations, Pentium suffered 30% penalty, PIII - 10%.
56# Pentium 63(*) 52 62#
63# Version 3.3 avoids L1 cache aliasing between stack frame and
64# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The
65# latter is achieved by copying the key schedule to controlled place in
66# stack. This unfortunately has rather strong impact on small block CBC
67# performance, ~2x deterioration on 16-byte block if compared to 3.3.
68#
69# Version 3.5 checks if there is L1 cache aliasing between user-supplied
70# key schedule and S-boxes and abstains from copying the former if
71# there is no. This allows end-user to consciously retain small block
72# performance by aligning key schedule in specific manner.
57# 73#
58# (*) Performance difference between small footprint code and fully 74# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB.
59# unrolled in more commonly used CBC mode is not as big, 7% for 75#
60# PIII and 15% for Pentium, which I consider tolerable. 76# Current ECB performance numbers for 128-bit key in CPU cycles per
77# processed byte [measure commonly used by AES benchmarkers] are:
78#
79# small footprint fully unrolled
80# P4 24 22
81# AMD K8 20 19
82# PIII 25 23
83# Pentium 81 78
61 84
62push(@INC,"perlasm","../../perlasm"); 85push(@INC,"perlasm","../../perlasm");
63require "x86asm.pl"; 86require "x86asm.pl";
64 87
65&asm_init($ARGV[0],"aes-586.pl",$ARGV[$#ARGV] eq "386"); 88&asm_init($ARGV[0],"aes-586.pl",$ARGV[$#ARGV] eq "386");
66 89
67$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
68 # recent µ-archs], but ~5 times smaller!
69 # I favor compact code, because it minimizes
70 # cache contention...
71$vertical_spin=0; # shift "verticaly" defaults to 0, because of
72 # its proof-of-concept status, see below...
73
74$s0="eax"; 90$s0="eax";
75$s1="ebx"; 91$s1="ebx";
76$s2="ecx"; 92$s2="ecx";
77$s3="edx"; 93$s3="edx";
78$key="esi"; 94$key="edi";
79$acc="edi"; 95$acc="esi";
96
97$compromise=0; # $compromise=128 abstains from copying key
98 # schedule to stack when encrypting inputs
99 # shorter than 128 bytes at the cost of
100 # risksing aliasing with S-boxes. In return
101 # you get way better, up to +70%, small block
102 # performance.
103$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
104 # recent µ-archs], but ~5 times smaller!
105 # I favor compact code to minimize cache
106 # contention and in hope to "collect" 5% back
107 # in real-life applications...
108$vertical_spin=0; # shift "verticaly" defaults to 0, because of
109 # its proof-of-concept status...
80 110
81if ($vertical_spin) {
82 # I need high parts of volatile registers to be accessible...
83 $s1="esi"; $key="ebx";
84 $s2="edi"; $acc="ecx";
85}
86# Note that there is no decvert(), as well as last encryption round is 111# Note that there is no decvert(), as well as last encryption round is
87# performed with "horizontal" shifts. This is because this "vertical" 112# performed with "horizontal" shifts. This is because this "vertical"
88# implementation [one which groups shifts on a given $s[i] to form a 113# implementation [one which groups shifts on a given $s[i] to form a
@@ -98,55 +123,55 @@ sub encvert()
98 my $v0 = $acc, $v1 = $key; 123 my $v0 = $acc, $v1 = $key;
99 124
100 &mov ($v0,$s[3]); # copy s3 125 &mov ($v0,$s[3]); # copy s3
101 &mov (&DWP(0,"esp"),$s[2]); # save s2 126 &mov (&DWP(4,"esp"),$s[2]); # save s2
102 &mov ($v1,$s[0]); # copy s0 127 &mov ($v1,$s[0]); # copy s0
103 &mov (&DWP(4,"esp"),$s[1]); # save s1 128 &mov (&DWP(8,"esp"),$s[1]); # save s1
104 129
105 &movz ($s[2],&HB($s[0])); 130 &movz ($s[2],&HB($s[0]));
106 &and ($s[0],0xFF); 131 &and ($s[0],0xFF);
107 &mov ($s[0],&DWP(1024*0,$te,$s[0],4)); # s0>>0 132 &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
108 &shr ($v1,16); 133 &shr ($v1,16);
109 &mov ($s[3],&DWP(1024*1,$te,$s[2],4)); # s0>>8 134 &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
110 &movz ($s[1],&HB($v1)); 135 &movz ($s[1],&HB($v1));
111 &and ($v1,0xFF); 136 &and ($v1,0xFF);
112 &mov ($s[2],&DWP(1024*2,$te,$v1,4)); # s0>>16 137 &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
113 &mov ($v1,$v0); 138 &mov ($v1,$v0);
114 &mov ($s[1],&DWP(1024*3,$te,$s[1],4)); # s0>>24 139 &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
115 140
116 &and ($v0,0xFF); 141 &and ($v0,0xFF);
117 &xor ($s[3],&DWP(1024*0,$te,$v0,4)); # s3>>0 142 &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
118 &movz ($v0,&HB($v1)); 143 &movz ($v0,&HB($v1));
119 &shr ($v1,16); 144 &shr ($v1,16);
120 &xor ($s[2],&DWP(1024*1,$te,$v0,4)); # s3>>8 145 &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
121 &movz ($v0,&HB($v1)); 146 &movz ($v0,&HB($v1));
122 &and ($v1,0xFF); 147 &and ($v1,0xFF);
123 &xor ($s[1],&DWP(1024*2,$te,$v1,4)); # s3>>16 148 &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
124 &mov ($v1,&DWP(0,"esp")); # restore s2 149 &mov ($v1,&DWP(4,"esp")); # restore s2
125 &xor ($s[0],&DWP(1024*3,$te,$v0,4)); # s3>>24 150 &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
126 151
127 &mov ($v0,$v1); 152 &mov ($v0,$v1);
128 &and ($v1,0xFF); 153 &and ($v1,0xFF);
129 &xor ($s[2],&DWP(1024*0,$te,$v1,4)); # s2>>0 154 &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
130 &movz ($v1,&HB($v0)); 155 &movz ($v1,&HB($v0));
131 &shr ($v0,16); 156 &shr ($v0,16);
132 &xor ($s[1],&DWP(1024*1,$te,$v1,4)); # s2>>8 157 &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
133 &movz ($v1,&HB($v0)); 158 &movz ($v1,&HB($v0));
134 &and ($v0,0xFF); 159 &and ($v0,0xFF);
135 &xor ($s[0],&DWP(1024*2,$te,$v0,4)); # s2>>16 160 &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
136 &mov ($v0,&DWP(4,"esp")); # restore s1 161 &mov ($v0,&DWP(8,"esp")); # restore s1
137 &xor ($s[3],&DWP(1024*3,$te,$v1,4)); # s2>>24 162 &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
138 163
139 &mov ($v1,$v0); 164 &mov ($v1,$v0);
140 &and ($v0,0xFF); 165 &and ($v0,0xFF);
141 &xor ($s[1],&DWP(1024*0,$te,$v0,4)); # s1>>0 166 &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
142 &movz ($v0,&HB($v1)); 167 &movz ($v0,&HB($v1));
143 &shr ($v1,16); 168 &shr ($v1,16);
144 &xor ($s[0],&DWP(1024*1,$te,$v0,4)); # s1>>8 169 &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
145 &movz ($v0,&HB($v1)); 170 &movz ($v0,&HB($v1));
146 &and ($v1,0xFF); 171 &and ($v1,0xFF);
147 &xor ($s[3],&DWP(1024*2,$te,$v1,4)); # s1>>16 172 &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
148 &mov ($key,&DWP(12,"esp")); # reincarnate v1 as key 173 &mov ($key,&DWP(12,"esp")); # reincarnate v1 as key
149 &xor ($s[2],&DWP(1024*3,$te,$v0,4)); # s1>>24 174 &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
150} 175}
151 176
152sub encstep() 177sub encstep()
@@ -160,25 +185,25 @@ sub encstep()
160 &and ($out,0xFF); } 185 &and ($out,0xFF); }
161 if ($i==1) { &shr ($s[0],16); }#%ebx[1] 186 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
162 if ($i==2) { &shr ($s[0],24); }#%ecx[2] 187 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
163 &mov ($out,&DWP(1024*0,$te,$out,4)); 188 &mov ($out,&DWP(0,$te,$out,8));
164 189
165 if ($i==3) { $tmp=$s[1]; }##%eax 190 if ($i==3) { $tmp=$s[1]; }##%eax
166 &movz ($tmp,&HB($s[1])); 191 &movz ($tmp,&HB($s[1]));
167 &xor ($out,&DWP(1024*1,$te,$tmp,4)); 192 &xor ($out,&DWP(3,$te,$tmp,8));
168 193
169 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(0,"esp")); }##%ebx 194 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
170 else { &mov ($tmp,$s[2]); 195 else { &mov ($tmp,$s[2]);
171 &shr ($tmp,16); } 196 &shr ($tmp,16); }
172 if ($i==2) { &and ($s[1],0xFF); }#%edx[2] 197 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
173 &and ($tmp,0xFF); 198 &and ($tmp,0xFF);
174 &xor ($out,&DWP(1024*2,$te,$tmp,4)); 199 &xor ($out,&DWP(2,$te,$tmp,8));
175 200
176 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(4,"esp")); }##%ecx 201 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
177 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] 202 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
178 else { &mov ($tmp,$s[3]); 203 else { &mov ($tmp,$s[3]);
179 &shr ($tmp,24) } 204 &shr ($tmp,24) }
180 &xor ($out,&DWP(1024*3,$te,$tmp,4)); 205 &xor ($out,&DWP(1,$te,$tmp,8));
181 if ($i<2) { &mov (&DWP(4*$i,"esp"),$out); } 206 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
182 if ($i==3) { &mov ($s[3],$acc); } 207 if ($i==3) { &mov ($s[3],$acc); }
183 &comment(); 208 &comment();
184} 209}
@@ -193,60 +218,49 @@ sub enclast()
193 &and ($out,0xFF); 218 &and ($out,0xFF);
194 if ($i==1) { &shr ($s[0],16); }#%ebx[1] 219 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
195 if ($i==2) { &shr ($s[0],24); }#%ecx[2] 220 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
196 &mov ($out,&DWP(1024*0,$te,$out,4)); 221 &mov ($out,&DWP(2,$te,$out,8));
197 &and ($out,0x000000ff); 222 &and ($out,0x000000ff);
198 223
199 if ($i==3) { $tmp=$s[1]; }##%eax 224 if ($i==3) { $tmp=$s[1]; }##%eax
200 &movz ($tmp,&HB($s[1])); 225 &movz ($tmp,&HB($s[1]));
201 &mov ($tmp,&DWP(0,$te,$tmp,4)); 226 &mov ($tmp,&DWP(0,$te,$tmp,8));
202 &and ($tmp,0x0000ff00); 227 &and ($tmp,0x0000ff00);
203 &xor ($out,$tmp); 228 &xor ($out,$tmp);
204 229
205 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(0,"esp")); }##%ebx 230 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
206 else { mov ($tmp,$s[2]); 231 else { mov ($tmp,$s[2]);
207 &shr ($tmp,16); } 232 &shr ($tmp,16); }
208 if ($i==2) { &and ($s[1],0xFF); }#%edx[2] 233 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
209 &and ($tmp,0xFF); 234 &and ($tmp,0xFF);
210 &mov ($tmp,&DWP(0,$te,$tmp,4)); 235 &mov ($tmp,&DWP(0,$te,$tmp,8));
211 &and ($tmp,0x00ff0000); 236 &and ($tmp,0x00ff0000);
212 &xor ($out,$tmp); 237 &xor ($out,$tmp);
213 238
214 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(4,"esp")); }##%ecx 239 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
215 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] 240 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
216 else { &mov ($tmp,$s[3]); 241 else { &mov ($tmp,$s[3]);
217 &shr ($tmp,24); } 242 &shr ($tmp,24); }
218 &mov ($tmp,&DWP(0,$te,$tmp,4)); 243 &mov ($tmp,&DWP(2,$te,$tmp,8));
219 &and ($tmp,0xff000000); 244 &and ($tmp,0xff000000);
220 &xor ($out,$tmp); 245 &xor ($out,$tmp);
221 if ($i<2) { &mov (&DWP(4*$i,"esp"),$out); } 246 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
222 if ($i==3) { &mov ($s[3],$acc); } 247 if ($i==3) { &mov ($s[3],$acc); }
223} 248}
224 249
225# void AES_encrypt (const void *inp,void *out,const AES_KEY *key); 250sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
226&public_label("AES_Te");
227&function_begin("AES_encrypt");
228 &mov ($acc,&wparam(0)); # load inp
229 &mov ($key,&wparam(2)); # load key
230
231 &call (&label("pic_point")); # make it PIC!
232 &set_label("pic_point");
233 &blindpop("ebp");
234 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
235 251
236 # allocate aligned stack frame 252&public_label("AES_Te");
237 &mov ($s0,"esp"); 253&function_begin_B("_x86_AES_encrypt");
238 &sub ("esp",20); 254 if ($vertical_spin) {
239 &and ("esp",-16); 255 # I need high parts of volatile registers to be accessible...
256 &exch ($s1="edi",$key="ebx");
257 &mov ($s2="esi",$acc="ecx");
258 }
240 259
260 # note that caller is expected to allocate stack frame for me!
241 &mov (&DWP(12,"esp"),$key); # save key 261 &mov (&DWP(12,"esp"),$key); # save key
242 &mov (&DWP(16,"esp"),$s0); # save %esp
243 262
244 &mov ($s0,&DWP(0,$acc)); # load input data 263 &xor ($s0,&DWP(0,$key)); # xor with key
245 &mov ($s1,&DWP(4,$acc));
246 &mov ($s2,&DWP(8,$acc));
247 &mov ($s3,&DWP(12,$acc));
248
249 &xor ($s0,&DWP(0,$key));
250 &xor ($s1,&DWP(4,$key)); 264 &xor ($s1,&DWP(4,$key));
251 &xor ($s2,&DWP(8,$key)); 265 &xor ($s2,&DWP(8,$key));
252 &xor ($s3,&DWP(12,$key)); 266 &xor ($s3,&DWP(12,$key));
@@ -256,7 +270,7 @@ sub enclast()
256 if ($small_footprint) { 270 if ($small_footprint) {
257 &lea ($acc,&DWP(-2,$acc,$acc)); 271 &lea ($acc,&DWP(-2,$acc,$acc));
258 &lea ($acc,&DWP(0,$key,$acc,8)); 272 &lea ($acc,&DWP(0,$key,$acc,8));
259 &mov (&DWP(8,"esp"),$acc); # end of key schedule 273 &mov (&DWP(16,"esp"),$acc); # end of key schedule
260 &align (4); 274 &align (4);
261 &set_label("loop"); 275 &set_label("loop");
262 if ($vertical_spin) { 276 if ($vertical_spin) {
@@ -267,12 +281,12 @@ sub enclast()
267 &encstep(2,"ebp",$s2,$s3,$s0,$s1); 281 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
268 &encstep(3,"ebp",$s3,$s0,$s1,$s2); 282 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
269 } 283 }
270 &add ($key,16); # advance rd_key 284 &add ($key,16); # advance rd_key
271 &xor ($s0,&DWP(0,$key)); 285 &xor ($s0,&DWP(0,$key));
272 &xor ($s1,&DWP(4,$key)); 286 &xor ($s1,&DWP(4,$key));
273 &xor ($s2,&DWP(8,$key)); 287 &xor ($s2,&DWP(8,$key));
274 &xor ($s3,&DWP(12,$key)); 288 &xor ($s3,&DWP(12,$key));
275 &cmp ($key,&DWP(8,"esp")); 289 &cmp ($key,&DWP(16,"esp"));
276 &mov (&DWP(12,"esp"),$key); 290 &mov (&DWP(12,"esp"),$key);
277 &jb (&label("loop")); 291 &jb (&label("loop"));
278 } 292 }
@@ -298,7 +312,7 @@ sub enclast()
298 &xor ($s3,&DWP(16*$i+12,$key)); 312 &xor ($s3,&DWP(16*$i+12,$key));
299 } 313 }
300 &add ($key,32); 314 &add ($key,32);
301 &mov (&DWP(12,"esp"),$key); # advance rd_key 315 &mov (&DWP(12,"esp"),$key); # advance rd_key
302 &set_label("12rounds"); 316 &set_label("12rounds");
303 for ($i=1;$i<3;$i++) { 317 for ($i=1;$i<3;$i++) {
304 if ($vertical_spin) { 318 if ($vertical_spin) {
@@ -315,7 +329,7 @@ sub enclast()
315 &xor ($s3,&DWP(16*$i+12,$key)); 329 &xor ($s3,&DWP(16*$i+12,$key));
316 } 330 }
317 &add ($key,32); 331 &add ($key,32);
318 &mov (&DWP(12,"esp"),$key); # advance rd_key 332 &mov (&DWP(12,"esp"),$key); # advance rd_key
319 &set_label("10rounds"); 333 &set_label("10rounds");
320 for ($i=1;$i<10;$i++) { 334 for ($i=1;$i<10;$i++) {
321 if ($vertical_spin) { 335 if ($vertical_spin) {
@@ -333,375 +347,129 @@ sub enclast()
333 } 347 }
334 } 348 }
335 349
336 &add ("ebp",4*1024); # skip to Te4
337 if ($vertical_spin) { 350 if ($vertical_spin) {
338 # "reincarnate" some registers for "horizontal" spin... 351 # "reincarnate" some registers for "horizontal" spin...
339 &mov ($s1="ebx",$key="esi"); 352 &mov ($s1="ebx",$key="edi");
340 &mov ($s2="ecx",$acc="edi"); 353 &mov ($s2="ecx",$acc="esi");
341 } 354 }
342 &enclast(0,"ebp",$s0,$s1,$s2,$s3); 355 &enclast(0,"ebp",$s0,$s1,$s2,$s3);
343 &enclast(1,"ebp",$s1,$s2,$s3,$s0); 356 &enclast(1,"ebp",$s1,$s2,$s3,$s0);
344 &enclast(2,"ebp",$s2,$s3,$s0,$s1); 357 &enclast(2,"ebp",$s2,$s3,$s0,$s1);
345 &enclast(3,"ebp",$s3,$s0,$s1,$s2); 358 &enclast(3,"ebp",$s3,$s0,$s1,$s2);
346 359
347 &mov ("esp",&DWP(16,"esp")); # restore %esp
348 &add ($key,$small_footprint?16:160); 360 &add ($key,$small_footprint?16:160);
349 &xor ($s0,&DWP(0,$key)); 361 &xor ($s0,&DWP(0,$key));
350 &xor ($s1,&DWP(4,$key)); 362 &xor ($s1,&DWP(4,$key));
351 &xor ($s2,&DWP(8,$key)); 363 &xor ($s2,&DWP(8,$key));
352 &xor ($s3,&DWP(12,$key)); 364 &xor ($s3,&DWP(12,$key));
353 365
354 &mov ($acc,&wparam(1)); # load out
355 &mov (&DWP(0,$acc),$s0); # write output data
356 &mov (&DWP(4,$acc),$s1);
357 &mov (&DWP(8,$acc),$s2);
358 &mov (&DWP(12,$acc),$s3);
359
360 &pop ("edi");
361 &pop ("esi");
362 &pop ("ebx");
363 &pop ("ebp");
364 &ret (); 366 &ret ();
365 367
366&set_label("AES_Te",64); # Yes! I keep it in the code segment! 368&set_label("AES_Te",64); # Yes! I keep it in the code segment!
367 &data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); 369 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
368 &data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); 370 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
369 &data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); 371 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
370 &data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); 372 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
371 &data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); 373 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
372 &data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); 374 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
373 &data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); 375 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
374 &data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); 376 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
375 &data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); 377 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
376 &data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); 378 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
377 &data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); 379 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
378 &data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); 380 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
379 &data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); 381 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
380 &data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); 382 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
381 &data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); 383 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
382 &data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); 384 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
383 &data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); 385 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
384 &data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); 386 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
385 &data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); 387 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
386 &data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); 388 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
387 &data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); 389 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
388 &data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); 390 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
389 &data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); 391 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
390 &data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); 392 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
391 &data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); 393 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
392 &data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); 394 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
393 &data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); 395 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
394 &data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); 396 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
395 &data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); 397 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
396 &data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); 398 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
397 &data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); 399 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
398 &data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); 400 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
399 &data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); 401 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
400 &data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); 402 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
401 &data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); 403 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
402 &data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); 404 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
403 &data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); 405 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
404 &data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); 406 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
405 &data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); 407 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
406 &data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); 408 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
407 &data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); 409 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
408 &data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); 410 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
409 &data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); 411 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
410 &data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); 412 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
411 &data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); 413 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
412 &data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); 414 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
413 &data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); 415 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
414 &data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); 416 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
415 &data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); 417 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
416 &data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); 418 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
417 &data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); 419 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
418 &data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); 420 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
419 &data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); 421 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
420 &data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); 422 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
421 &data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); 423 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
422 &data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); 424 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
423 &data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); 425 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
424 &data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); 426 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
425 &data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); 427 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
426 &data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); 428 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
427 &data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); 429 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
428 &data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); 430 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
429 &data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); 431 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
430 &data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); 432 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
431#Te1:
432 &data_word(0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d);
433 &data_word(0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154);
434 &data_word(0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d);
435 &data_word(0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a);
436 &data_word(0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87);
437 &data_word(0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b);
438 &data_word(0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea);
439 &data_word(0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b);
440 &data_word(0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a);
441 &data_word(0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f);
442 &data_word(0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908);
443 &data_word(0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f);
444 &data_word(0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e);
445 &data_word(0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5);
446 &data_word(0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d);
447 &data_word(0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f);
448 &data_word(0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e);
449 &data_word(0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb);
450 &data_word(0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce);
451 &data_word(0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397);
452 &data_word(0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c);
453 &data_word(0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed);
454 &data_word(0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b);
455 &data_word(0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a);
456 &data_word(0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16);
457 &data_word(0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194);
458 &data_word(0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81);
459 &data_word(0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3);
460 &data_word(0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a);
461 &data_word(0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104);
462 &data_word(0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263);
463 &data_word(0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d);
464 &data_word(0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f);
465 &data_word(0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39);
466 &data_word(0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47);
467 &data_word(0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695);
468 &data_word(0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f);
469 &data_word(0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83);
470 &data_word(0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c);
471 &data_word(0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76);
472 &data_word(0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e);
473 &data_word(0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4);
474 &data_word(0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6);
475 &data_word(0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b);
476 &data_word(0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7);
477 &data_word(0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0);
478 &data_word(0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25);
479 &data_word(0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018);
480 &data_word(0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72);
481 &data_word(0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751);
482 &data_word(0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21);
483 &data_word(0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85);
484 &data_word(0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa);
485 &data_word(0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12);
486 &data_word(0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0);
487 &data_word(0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9);
488 &data_word(0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233);
489 &data_word(0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7);
490 &data_word(0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920);
491 &data_word(0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a);
492 &data_word(0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17);
493 &data_word(0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8);
494 &data_word(0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11);
495 &data_word(0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a);
496#Te2:
497 &data_word(0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b);
498 &data_word(0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5);
499 &data_word(0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b);
500 &data_word(0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76);
501 &data_word(0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d);
502 &data_word(0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0);
503 &data_word(0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf);
504 &data_word(0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0);
505 &data_word(0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26);
506 &data_word(0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc);
507 &data_word(0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1);
508 &data_word(0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15);
509 &data_word(0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3);
510 &data_word(0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a);
511 &data_word(0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2);
512 &data_word(0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75);
513 &data_word(0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a);
514 &data_word(0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0);
515 &data_word(0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3);
516 &data_word(0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784);
517 &data_word(0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced);
518 &data_word(0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b);
519 &data_word(0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39);
520 &data_word(0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf);
521 &data_word(0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb);
522 &data_word(0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485);
523 &data_word(0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f);
524 &data_word(0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8);
525 &data_word(0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f);
526 &data_word(0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5);
527 &data_word(0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321);
528 &data_word(0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2);
529 &data_word(0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec);
530 &data_word(0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917);
531 &data_word(0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d);
532 &data_word(0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573);
533 &data_word(0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc);
534 &data_word(0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388);
535 &data_word(0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14);
536 &data_word(0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db);
537 &data_word(0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a);
538 &data_word(0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c);
539 &data_word(0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662);
540 &data_word(0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79);
541 &data_word(0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d);
542 &data_word(0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9);
543 &data_word(0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea);
544 &data_word(0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808);
545 &data_word(0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e);
546 &data_word(0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6);
547 &data_word(0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f);
548 &data_word(0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a);
549 &data_word(0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66);
550 &data_word(0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e);
551 &data_word(0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9);
552 &data_word(0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e);
553 &data_word(0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311);
554 &data_word(0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794);
555 &data_word(0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9);
556 &data_word(0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf);
557 &data_word(0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d);
558 &data_word(0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868);
559 &data_word(0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f);
560 &data_word(0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16);
561#Te3:
562 &data_word(0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b);
563 &data_word(0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5);
564 &data_word(0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b);
565 &data_word(0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676);
566 &data_word(0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d);
567 &data_word(0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0);
568 &data_word(0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf);
569 &data_word(0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0);
570 &data_word(0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626);
571 &data_word(0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc);
572 &data_word(0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1);
573 &data_word(0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515);
574 &data_word(0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3);
575 &data_word(0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a);
576 &data_word(0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2);
577 &data_word(0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575);
578 &data_word(0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a);
579 &data_word(0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0);
580 &data_word(0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3);
581 &data_word(0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484);
582 &data_word(0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded);
583 &data_word(0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b);
584 &data_word(0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939);
585 &data_word(0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf);
586 &data_word(0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb);
587 &data_word(0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585);
588 &data_word(0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f);
589 &data_word(0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8);
590 &data_word(0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f);
591 &data_word(0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5);
592 &data_word(0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121);
593 &data_word(0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2);
594 &data_word(0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec);
595 &data_word(0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717);
596 &data_word(0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d);
597 &data_word(0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373);
598 &data_word(0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc);
599 &data_word(0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888);
600 &data_word(0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414);
601 &data_word(0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb);
602 &data_word(0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a);
603 &data_word(0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c);
604 &data_word(0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262);
605 &data_word(0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979);
606 &data_word(0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d);
607 &data_word(0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9);
608 &data_word(0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea);
609 &data_word(0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808);
610 &data_word(0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e);
611 &data_word(0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6);
612 &data_word(0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f);
613 &data_word(0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a);
614 &data_word(0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666);
615 &data_word(0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e);
616 &data_word(0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9);
617 &data_word(0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e);
618 &data_word(0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111);
619 &data_word(0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494);
620 &data_word(0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9);
621 &data_word(0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf);
622 &data_word(0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d);
623 &data_word(0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868);
624 &data_word(0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f);
625 &data_word(0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616);
626#Te4:
627 &data_word(0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b);
628 &data_word(0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5);
629 &data_word(0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b);
630 &data_word(0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676);
631 &data_word(0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d);
632 &data_word(0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0);
633 &data_word(0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf);
634 &data_word(0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0);
635 &data_word(0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626);
636 &data_word(0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc);
637 &data_word(0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1);
638 &data_word(0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515);
639 &data_word(0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3);
640 &data_word(0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a);
641 &data_word(0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2);
642 &data_word(0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575);
643 &data_word(0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a);
644 &data_word(0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0);
645 &data_word(0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3);
646 &data_word(0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484);
647 &data_word(0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed);
648 &data_word(0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b);
649 &data_word(0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939);
650 &data_word(0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf);
651 &data_word(0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb);
652 &data_word(0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585);
653 &data_word(0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f);
654 &data_word(0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8);
655 &data_word(0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f);
656 &data_word(0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5);
657 &data_word(0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121);
658 &data_word(0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2);
659 &data_word(0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec);
660 &data_word(0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717);
661 &data_word(0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d);
662 &data_word(0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373);
663 &data_word(0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc);
664 &data_word(0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888);
665 &data_word(0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414);
666 &data_word(0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb);
667 &data_word(0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a);
668 &data_word(0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c);
669 &data_word(0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262);
670 &data_word(0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979);
671 &data_word(0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d);
672 &data_word(0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9);
673 &data_word(0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea);
674 &data_word(0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808);
675 &data_word(0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e);
676 &data_word(0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6);
677 &data_word(0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f);
678 &data_word(0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a);
679 &data_word(0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666);
680 &data_word(0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e);
681 &data_word(0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9);
682 &data_word(0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e);
683 &data_word(0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111);
684 &data_word(0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494);
685 &data_word(0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9);
686 &data_word(0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf);
687 &data_word(0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d);
688 &data_word(0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868);
689 &data_word(0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f);
690 &data_word(0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616);
691#rcon: 433#rcon:
692 &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008); 434 &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008);
693 &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); 435 &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
694 &data_word(0x0000001b, 0x00000036); 436 &data_word(0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0);
695&function_end_B("AES_encrypt"); 437&function_end_B("_x86_AES_encrypt");
696 438
697#------------------------------------------------------------------# 439# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
440&public_label("AES_Te");
441&function_begin("AES_encrypt");
442 &mov ($acc,&wparam(0)); # load inp
443 &mov ($key,&wparam(2)); # load key
698 444
699$s0="eax"; 445 &mov ($s0,"esp");
700$s1="ebx"; 446 &sub ("esp",24);
701$s2="ecx"; 447 &and ("esp",-64);
702$s3="edx"; 448 &add ("esp",4);
703$key="edi"; 449 &mov (&DWP(16,"esp"),$s0);
704$acc="esi"; 450
451 &call (&label("pic_point")); # make it PIC!
452 &set_label("pic_point");
453 &blindpop("ebp");
454 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
455
456 &mov ($s0,&DWP(0,$acc)); # load input data
457 &mov ($s1,&DWP(4,$acc));
458 &mov ($s2,&DWP(8,$acc));
459 &mov ($s3,&DWP(12,$acc));
460
461 &call ("_x86_AES_encrypt");
462
463 &mov ("esp",&DWP(16,"esp"));
464
465 &mov ($acc,&wparam(1)); # load out
466 &mov (&DWP(0,$acc),$s0); # write output data
467 &mov (&DWP(4,$acc),$s1);
468 &mov (&DWP(8,$acc),$s2);
469 &mov (&DWP(12,$acc),$s3);
470&function_end("AES_encrypt");
471
472#------------------------------------------------------------------#
705 473
706sub decstep() 474sub decstep()
707{ my ($i,$td,@s) = @_; 475{ my ($i,$td,@s) = @_;
@@ -715,24 +483,24 @@ sub decstep()
715 if($i==3) { &mov ($key,&DWP(12,"esp")); } 483 if($i==3) { &mov ($key,&DWP(12,"esp")); }
716 else { &mov ($out,$s[0]); } 484 else { &mov ($out,$s[0]); }
717 &and ($out,0xFF); 485 &and ($out,0xFF);
718 &mov ($out,&DWP(1024*0,$td,$out,4)); 486 &mov ($out,&DWP(0,$td,$out,8));
719 487
720 if ($i==3) { $tmp=$s[1]; } 488 if ($i==3) { $tmp=$s[1]; }
721 &movz ($tmp,&HB($s[1])); 489 &movz ($tmp,&HB($s[1]));
722 &xor ($out,&DWP(1024*1,$td,$tmp,4)); 490 &xor ($out,&DWP(3,$td,$tmp,8));
723 491
724 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); } 492 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
725 else { &mov ($tmp,$s[2]); } 493 else { &mov ($tmp,$s[2]); }
726 &shr ($tmp,16); 494 &shr ($tmp,16);
727 &and ($tmp,0xFF); 495 &and ($tmp,0xFF);
728 &xor ($out,&DWP(1024*2,$td,$tmp,4)); 496 &xor ($out,&DWP(2,$td,$tmp,8));
729 497
730 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(4,"esp")); } 498 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
731 else { &mov ($tmp,$s[3]); } 499 else { &mov ($tmp,$s[3]); }
732 &shr ($tmp,24); 500 &shr ($tmp,24);
733 &xor ($out,&DWP(1024*3,$td,$tmp,4)); 501 &xor ($out,&DWP(1,$td,$tmp,8));
734 if ($i<2) { &mov (&DWP(4*$i,"esp"),$out); } 502 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
735 if ($i==3) { &mov ($s[3],&DWP(0,"esp")); } 503 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
736 &comment(); 504 &comment();
737} 505}
738 506
@@ -744,58 +512,38 @@ sub declast()
744 if($i==3) { &mov ($key,&DWP(12,"esp")); } 512 if($i==3) { &mov ($key,&DWP(12,"esp")); }
745 else { &mov ($out,$s[0]); } 513 else { &mov ($out,$s[0]); }
746 &and ($out,0xFF); 514 &and ($out,0xFF);
747 &mov ($out,&DWP(0,$td,$out,4)); 515 &movz ($out,&BP(2048,$td,$out,1));
748 &and ($out,0x000000ff);
749 516
750 if ($i==3) { $tmp=$s[1]; } 517 if ($i==3) { $tmp=$s[1]; }
751 &movz ($tmp,&HB($s[1])); 518 &movz ($tmp,&HB($s[1]));
752 &mov ($tmp,&DWP(0,$td,$tmp,4)); 519 &movz ($tmp,&BP(2048,$td,$tmp,1));
753 &and ($tmp,0x0000ff00); 520 &shl ($tmp,8);
754 &xor ($out,$tmp); 521 &xor ($out,$tmp);
755 522
756 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); } 523 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
757 else { mov ($tmp,$s[2]); } 524 else { mov ($tmp,$s[2]); }
758 &shr ($tmp,16); 525 &shr ($tmp,16);
759 &and ($tmp,0xFF); 526 &and ($tmp,0xFF);
760 &mov ($tmp,&DWP(0,$td,$tmp,4)); 527 &movz ($tmp,&BP(2048,$td,$tmp,1));
761 &and ($tmp,0x00ff0000); 528 &shl ($tmp,16);
762 &xor ($out,$tmp); 529 &xor ($out,$tmp);
763 530
764 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(4,"esp")); } 531 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
765 else { &mov ($tmp,$s[3]); } 532 else { &mov ($tmp,$s[3]); }
766 &shr ($tmp,24); 533 &shr ($tmp,24);
767 &mov ($tmp,&DWP(0,$td,$tmp,4)); 534 &movz ($tmp,&BP(2048,$td,$tmp,1));
768 &and ($tmp,0xff000000); 535 &shl ($tmp,24);
769 &xor ($out,$tmp); 536 &xor ($out,$tmp);
770 if ($i<2) { &mov (&DWP(4*$i,"esp"),$out); } 537 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
771 if ($i==3) { &mov ($s[3],&DWP(0,"esp")); } 538 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
772} 539}
773 540
774# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
775&public_label("AES_Td"); 541&public_label("AES_Td");
776&function_begin("AES_decrypt"); 542&function_begin_B("_x86_AES_decrypt");
777 &mov ($acc,&wparam(0)); # load inp 543 # note that caller is expected to allocate stack frame for me!
778 &mov ($key,&wparam(2)); # load key
779
780 &call (&label("pic_point")); # make it PIC!
781 &set_label("pic_point");
782 &blindpop("ebp");
783 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
784
785 # allocate aligned stack frame
786 &mov ($s0,"esp");
787 &sub ("esp",20);
788 &and ("esp",-16);
789
790 &mov (&DWP(12,"esp"),$key); # save key 544 &mov (&DWP(12,"esp"),$key); # save key
791 &mov (&DWP(16,"esp"),$s0); # save %esp
792
793 &mov ($s0,&DWP(0,$acc)); # load input data
794 &mov ($s1,&DWP(4,$acc));
795 &mov ($s2,&DWP(8,$acc));
796 &mov ($s3,&DWP(12,$acc));
797 545
798 &xor ($s0,&DWP(0,$key)); 546 &xor ($s0,&DWP(0,$key)); # xor with key
799 &xor ($s1,&DWP(4,$key)); 547 &xor ($s1,&DWP(4,$key));
800 &xor ($s2,&DWP(8,$key)); 548 &xor ($s2,&DWP(8,$key));
801 &xor ($s3,&DWP(12,$key)); 549 &xor ($s3,&DWP(12,$key));
@@ -805,19 +553,19 @@ sub declast()
805 if ($small_footprint) { 553 if ($small_footprint) {
806 &lea ($acc,&DWP(-2,$acc,$acc)); 554 &lea ($acc,&DWP(-2,$acc,$acc));
807 &lea ($acc,&DWP(0,$key,$acc,8)); 555 &lea ($acc,&DWP(0,$key,$acc,8));
808 &mov (&DWP(8,"esp"),$acc); # end of key schedule 556 &mov (&DWP(16,"esp"),$acc); # end of key schedule
809 &align (4); 557 &align (4);
810 &set_label("loop"); 558 &set_label("loop");
811 &decstep(0,"ebp",$s0,$s3,$s2,$s1); 559 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
812 &decstep(1,"ebp",$s1,$s0,$s3,$s2); 560 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
813 &decstep(2,"ebp",$s2,$s1,$s0,$s3); 561 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
814 &decstep(3,"ebp",$s3,$s2,$s1,$s0); 562 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
815 &add ($key,16); # advance rd_key 563 &add ($key,16); # advance rd_key
816 &xor ($s0,&DWP(0,$key)); 564 &xor ($s0,&DWP(0,$key));
817 &xor ($s1,&DWP(4,$key)); 565 &xor ($s1,&DWP(4,$key));
818 &xor ($s2,&DWP(8,$key)); 566 &xor ($s2,&DWP(8,$key));
819 &xor ($s3,&DWP(12,$key)); 567 &xor ($s3,&DWP(12,$key));
820 &cmp ($key,&DWP(8,"esp")); 568 &cmp ($key,&DWP(16,"esp"));
821 &mov (&DWP(12,"esp"),$key); 569 &mov (&DWP(12,"esp"),$key);
822 &jb (&label("loop")); 570 &jb (&label("loop"));
823 } 571 }
@@ -839,7 +587,7 @@ sub declast()
839 &xor ($s3,&DWP(16*$i+12,$key)); 587 &xor ($s3,&DWP(16*$i+12,$key));
840 } 588 }
841 &add ($key,32); 589 &add ($key,32);
842 &mov (&DWP(12,"esp"),$key); # advance rd_key 590 &mov (&DWP(12,"esp"),$key); # advance rd_key
843 &set_label("12rounds"); 591 &set_label("12rounds");
844 for ($i=1;$i<3;$i++) { 592 for ($i=1;$i<3;$i++) {
845 &decstep(0,"ebp",$s0,$s3,$s2,$s1); 593 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
@@ -852,7 +600,7 @@ sub declast()
852 &xor ($s3,&DWP(16*$i+12,$key)); 600 &xor ($s3,&DWP(16*$i+12,$key));
853 } 601 }
854 &add ($key,32); 602 &add ($key,32);
855 &mov (&DWP(12,"esp"),$key); # advance rd_key 603 &mov (&DWP(12,"esp"),$key); # advance rd_key
856 &set_label("10rounds"); 604 &set_label("10rounds");
857 for ($i=1;$i<10;$i++) { 605 for ($i=1;$i<10;$i++) {
858 &decstep(0,"ebp",$s0,$s3,$s2,$s1); 606 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
@@ -866,382 +614,627 @@ sub declast()
866 } 614 }
867 } 615 }
868 616
869 &add ("ebp",4*1024); # skip to Te4
870 &declast(0,"ebp",$s0,$s3,$s2,$s1); 617 &declast(0,"ebp",$s0,$s3,$s2,$s1);
871 &declast(1,"ebp",$s1,$s0,$s3,$s2); 618 &declast(1,"ebp",$s1,$s0,$s3,$s2);
872 &declast(2,"ebp",$s2,$s1,$s0,$s3); 619 &declast(2,"ebp",$s2,$s1,$s0,$s3);
873 &declast(3,"ebp",$s3,$s2,$s1,$s0); 620 &declast(3,"ebp",$s3,$s2,$s1,$s0);
874 621
875 &mov ("esp",&DWP(16,"esp")); # restore %esp
876 &add ($key,$small_footprint?16:160); 622 &add ($key,$small_footprint?16:160);
877 &xor ($s0,&DWP(0,$key)); 623 &xor ($s0,&DWP(0,$key));
878 &xor ($s1,&DWP(4,$key)); 624 &xor ($s1,&DWP(4,$key));
879 &xor ($s2,&DWP(8,$key)); 625 &xor ($s2,&DWP(8,$key));
880 &xor ($s3,&DWP(12,$key)); 626 &xor ($s3,&DWP(12,$key));
881 627
882 &mov ($key,&wparam(1)); # load out 628 &ret ();
883 &mov (&DWP(0,$key),$s0); # write output data 629
630&set_label("AES_Td",64); # Yes! I keep it in the code segment!
631 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
632 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
633 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
634 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
635 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
636 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
637 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
638 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
639 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
640 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
641 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
642 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
643 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
644 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
645 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
646 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
647 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
648 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
649 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
650 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
651 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
652 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
653 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
654 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
655 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
656 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
657 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
658 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
659 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
660 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
661 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
662 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
663 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
664 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
665 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
666 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
667 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
668 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
669 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
670 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
671 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
672 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
673 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
674 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
675 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
676 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
677 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
678 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
679 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
680 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
681 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
682 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
683 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
684 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
685 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
686 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
687 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
688 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
689 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
690 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
691 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
692 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
693 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
694 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
695#Td4:
696 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
697 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
698 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
699 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
700 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
701 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
702 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
703 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
704 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
705 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
706 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
707 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
708 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
709 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
710 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
711 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
712 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
713 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
714 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
715 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
716 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
717 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
718 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
719 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
720 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
721 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
722 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
723 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
724 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
725 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
726 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
727 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
728&function_end_B("_x86_AES_decrypt");
729
730# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
731&public_label("AES_Td");
732&function_begin("AES_decrypt");
733 &mov ($acc,&wparam(0)); # load inp
734 &mov ($key,&wparam(2)); # load key
735
736 &mov ($s0,"esp");
737 &sub ("esp",24);
738 &and ("esp",-64);
739 &add ("esp",4);
740 &mov (&DWP(16,"esp"),$s0);
741
742 &call (&label("pic_point")); # make it PIC!
743 &set_label("pic_point");
744 &blindpop("ebp");
745 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
746
747 # prefetch Td4
748 &lea ("ebp",&DWP(2048+128,"ebp"));
749 &mov ($s0,&DWP(0-128,"ebp"));
750 &mov ($s1,&DWP(32-128,"ebp"));
751 &mov ($s2,&DWP(64-128,"ebp"));
752 &mov ($s3,&DWP(96-128,"ebp"));
753 &mov ($s0,&DWP(128-128,"ebp"));
754 &mov ($s1,&DWP(160-128,"ebp"));
755 &mov ($s2,&DWP(192-128,"ebp"));
756 &mov ($s3,&DWP(224-128,"ebp"));
757 &lea ("ebp",&DWP(-2048-128,"ebp"));
758
759 &mov ($s0,&DWP(0,$acc)); # load input data
760 &mov ($s1,&DWP(4,$acc));
761 &mov ($s2,&DWP(8,$acc));
762 &mov ($s3,&DWP(12,$acc));
763
764 &call ("_x86_AES_decrypt");
765
766 &mov ("esp",&DWP(16,"esp"));
767
768 &mov ($acc,&wparam(1)); # load out
769 &mov (&DWP(0,$acc),$s0); # write output data
770 &mov (&DWP(4,$acc),$s1);
771 &mov (&DWP(8,$acc),$s2);
772 &mov (&DWP(12,$acc),$s3);
773&function_end("AES_decrypt");
774
775# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
776# size_t length, const AES_KEY *key,
777# unsigned char *ivp,const int enc);
778{
779# stack frame layout
780# -4(%esp) 0(%esp) return address
781# 0(%esp) 4(%esp) tmp1
782# 4(%esp) 8(%esp) tmp2
783# 8(%esp) 12(%esp) key
784# 12(%esp) 16(%esp) end of key schedule
785my $_esp=&DWP(16,"esp"); #saved %esp
786my $_inp=&DWP(20,"esp"); #copy of wparam(0)
787my $_out=&DWP(24,"esp"); #copy of wparam(1)
788my $_len=&DWP(28,"esp"); #copy of wparam(2)
789my $_key=&DWP(32,"esp"); #copy of wparam(3)
790my $_ivp=&DWP(36,"esp"); #copy of wparam(4)
791my $_tmp=&DWP(40,"esp"); #volatile variable
792my $ivec=&DWP(44,"esp"); #ivec[16]
793my $aes_key=&DWP(60,"esp"); #copy of aes_key
794my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds
795
796&public_label("AES_Te");
797&public_label("AES_Td");
798&function_begin("AES_cbc_encrypt");
799 &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
800 &cmp ($s2,0);
801 &je (&label("enc_out"));
802
803 &call (&label("pic_point")); # make it PIC!
804 &set_label("pic_point");
805 &blindpop("ebp");
806
807 &pushf ();
808 &cld ();
809
810 &cmp (&wparam(5),0);
811 &je (&label("DECRYPT"));
812
813 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
814
815 # allocate aligned stack frame...
816 &lea ($key,&DWP(-64-244,"esp"));
817 &and ($key,-64);
818
819 # ... and make sure it doesn't alias with AES_Te modulo 4096
820 &mov ($s0,"ebp");
821 &lea ($s1,&DWP(2048,"ebp"));
822 &mov ($s3,$key);
823 &and ($s0,0xfff); # s = %ebp&0xfff
824 &and ($s1,0xfff); # e = (%ebp+2048)&0xfff
825 &and ($s3,0xfff); # p = %esp&0xfff
826
827 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
828 &jb (&label("te_break_out"));
829 &sub ($s3,$s1);
830 &sub ($key,$s3);
831 &jmp (&label("te_ok"));
832 &set_label("te_break_out"); # else %esp -= (p-s)&0xfff + framesz;
833 &sub ($s3,$s0);
834 &and ($s3,0xfff);
835 &add ($s3,64+256);
836 &sub ($key,$s3);
837 &align (4);
838 &set_label("te_ok");
839
840 &mov ($s0,&wparam(0)); # load inp
841 &mov ($s1,&wparam(1)); # load out
842 &mov ($s3,&wparam(3)); # load key
843 &mov ($acc,&wparam(4)); # load ivp
844
845 &exch ("esp",$key);
846 &add ("esp",4); # reserve for return address!
847 &mov ($_esp,$key); # save %esp
848
849 &mov ($_inp,$s0); # save copy of inp
850 &mov ($_out,$s1); # save copy of out
851 &mov ($_len,$s2); # save copy of len
852 &mov ($_key,$s3); # save copy of key
853 &mov ($_ivp,$acc); # save copy of ivp
854
855 &mov ($mark,0); # copy of aes_key->rounds = 0;
856 if ($compromise) {
857 &cmp ($s2,$compromise);
858 &jb (&label("skip_ecopy"));
859 }
860 # do we copy key schedule to stack?
861 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
862 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
863 &sub ($s1,"ebp");
864 &mov ("esi",$s3);
865 &and ($s1,0xfff);
866 &lea ("edi",$aes_key);
867 &cmp ($s1,2048);
868 &jb (&label("do_ecopy"));
869 &cmp ($s1,4096-244);
870 &jb (&label("skip_ecopy"));
871 &align (4);
872 &set_label("do_ecopy");
873 &mov ($_key,"edi");
874 &data_word(0xA5F3F689); # rep movsd
875 &set_label("skip_ecopy");
876
877 &mov ($acc,$s0);
878 &mov ($key,16);
879 &align (4);
880 &set_label("prefetch_te");
881 &mov ($s0,&DWP(0,"ebp"));
882 &mov ($s1,&DWP(32,"ebp"));
883 &mov ($s2,&DWP(64,"ebp"));
884 &mov ($s3,&DWP(96,"ebp"));
885 &lea ("ebp",&DWP(128,"ebp"));
886 &dec ($key);
887 &jnz (&label("prefetch_te"));
888 &sub ("ebp",2048);
889
890 &mov ($s2,$_len);
891 &mov ($key,$_ivp);
892 &test ($s2,0xFFFFFFF0);
893 &jz (&label("enc_tail")); # short input...
894
895 &mov ($s0,&DWP(0,$key)); # load iv
896 &mov ($s1,&DWP(4,$key));
897
898 &align (4);
899 &set_label("enc_loop");
900 &mov ($s2,&DWP(8,$key));
901 &mov ($s3,&DWP(12,$key));
902
903 &xor ($s0,&DWP(0,$acc)); # xor input data
904 &xor ($s1,&DWP(4,$acc));
905 &xor ($s2,&DWP(8,$acc));
906 &xor ($s3,&DWP(12,$acc));
907
908 &mov ($key,$_key); # load key
909 &call ("_x86_AES_encrypt");
910
911 &mov ($acc,$_inp); # load inp
912 &mov ($key,$_out); # load out
913
914 &mov (&DWP(0,$key),$s0); # save output data
915 &mov (&DWP(4,$key),$s1);
916 &mov (&DWP(8,$key),$s2);
917 &mov (&DWP(12,$key),$s3);
918
919 &mov ($s2,$_len); # load len
920
921 &lea ($acc,&DWP(16,$acc));
922 &mov ($_inp,$acc); # save inp
923
924 &lea ($s3,&DWP(16,$key));
925 &mov ($_out,$s3); # save out
926
927 &sub ($s2,16);
928 &test ($s2,0xFFFFFFF0);
929 &mov ($_len,$s2); # save len
930 &jnz (&label("enc_loop"));
931 &test ($s2,15);
932 &jnz (&label("enc_tail"));
933 &mov ($acc,$_ivp); # load ivp
934 &mov ($s2,&DWP(8,$key)); # restore last dwords
935 &mov ($s3,&DWP(12,$key));
936 &mov (&DWP(0,$acc),$s0); # save ivec
937 &mov (&DWP(4,$acc),$s1);
938 &mov (&DWP(8,$acc),$s2);
939 &mov (&DWP(12,$acc),$s3);
940
941 &cmp ($mark,0); # was the key schedule copied?
942 &mov ("edi",$_key);
943 &je (&label("skip_ezero"));
944 # zero copy of key schedule
945 &mov ("ecx",240/4);
946 &xor ("eax","eax");
947 &align (4);
948 &data_word(0xABF3F689); # rep stosd
949 &set_label("skip_ezero")
950 &mov ("esp",$_esp);
951 &popf ();
952 &set_label("enc_out");
953 &function_end_A();
954 &pushf (); # kludge, never executed
955
956 &align (4);
957 &set_label("enc_tail");
958 &push ($key eq "edi" ? $key : ""); # push ivp
959 &mov ($key,$_out); # load out
960 &mov ($s1,16);
961 &sub ($s1,$s2);
962 &cmp ($key,$acc); # compare with inp
963 &je (&label("enc_in_place"));
964 &align (4);
965 &data_word(0xA4F3F689); # rep movsb # copy input
966 &jmp (&label("enc_skip_in_place"));
967 &set_label("enc_in_place");
968 &lea ($key,&DWP(0,$key,$s2));
969 &set_label("enc_skip_in_place");
970 &mov ($s2,$s1);
971 &xor ($s0,$s0);
972 &align (4);
973 &data_word(0xAAF3F689); # rep stosb # zero tail
974 &pop ($key); # pop ivp
975
976 &mov ($acc,$_out); # output as input
977 &mov ($s0,&DWP(0,$key));
978 &mov ($s1,&DWP(4,$key));
979 &mov ($_len,16); # len=16
980 &jmp (&label("enc_loop")); # one more spin...
981
982#----------------------------- DECRYPT -----------------------------#
983&align (4);
984&set_label("DECRYPT");
985 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
986
987 # allocate aligned stack frame...
988 &lea ($key,&DWP(-64-244,"esp"));
989 &and ($key,-64);
990
991 # ... and make sure it doesn't alias with AES_Td modulo 4096
992 &mov ($s0,"ebp");
993 &lea ($s1,&DWP(2048+256,"ebp"));
994 &mov ($s3,$key);
995 &and ($s0,0xfff); # s = %ebp&0xfff
996 &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff
997 &and ($s3,0xfff); # p = %esp&0xfff
998
999 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
1000 &jb (&label("td_break_out"));
1001 &sub ($s3,$s1);
1002 &sub ($key,$s3);
1003 &jmp (&label("td_ok"));
1004 &set_label("td_break_out"); # else %esp -= (p-s)&0xfff + framesz;
1005 &sub ($s3,$s0);
1006 &and ($s3,0xfff);
1007 &add ($s3,64+256);
1008 &sub ($key,$s3);
1009 &align (4);
1010 &set_label("td_ok");
1011
1012 &mov ($s0,&wparam(0)); # load inp
1013 &mov ($s1,&wparam(1)); # load out
1014 &mov ($s3,&wparam(3)); # load key
1015 &mov ($acc,&wparam(4)); # load ivp
1016
1017 &exch ("esp",$key);
1018 &add ("esp",4); # reserve for return address!
1019 &mov ($_esp,$key); # save %esp
1020
1021 &mov ($_inp,$s0); # save copy of inp
1022 &mov ($_out,$s1); # save copy of out
1023 &mov ($_len,$s2); # save copy of len
1024 &mov ($_key,$s3); # save copy of key
1025 &mov ($_ivp,$acc); # save copy of ivp
1026
1027 &mov ($mark,0); # copy of aes_key->rounds = 0;
1028 if ($compromise) {
1029 &cmp ($s2,$compromise);
1030 &jb (&label("skip_dcopy"));
1031 }
1032 # do we copy key schedule to stack?
1033 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
1034 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
1035 &sub ($s1,"ebp");
1036 &mov ("esi",$s3);
1037 &and ($s1,0xfff);
1038 &lea ("edi",$aes_key);
1039 &cmp ($s1,2048+256);
1040 &jb (&label("do_dcopy"));
1041 &cmp ($s1,4096-244);
1042 &jb (&label("skip_dcopy"));
1043 &align (4);
1044 &set_label("do_dcopy");
1045 &mov ($_key,"edi");
1046 &data_word(0xA5F3F689); # rep movsd
1047 &set_label("skip_dcopy");
1048
1049 &mov ($acc,$s0);
1050 &mov ($key,18);
1051 &align (4);
1052 &set_label("prefetch_td");
1053 &mov ($s0,&DWP(0,"ebp"));
1054 &mov ($s1,&DWP(32,"ebp"));
1055 &mov ($s2,&DWP(64,"ebp"));
1056 &mov ($s3,&DWP(96,"ebp"));
1057 &lea ("ebp",&DWP(128,"ebp"));
1058 &dec ($key);
1059 &jnz (&label("prefetch_td"));
1060 &sub ("ebp",2048+256);
1061
1062 &cmp ($acc,$_out);
1063 &je (&label("dec_in_place")); # in-place processing...
1064
1065 &mov ($key,$_ivp); # load ivp
1066 &mov ($_tmp,$key);
1067
1068 &align (4);
1069 &set_label("dec_loop");
1070 &mov ($s0,&DWP(0,$acc)); # read input
1071 &mov ($s1,&DWP(4,$acc));
1072 &mov ($s2,&DWP(8,$acc));
1073 &mov ($s3,&DWP(12,$acc));
1074
1075 &mov ($key,$_key); # load key
1076 &call ("_x86_AES_decrypt");
1077
1078 &mov ($key,$_tmp); # load ivp
1079 &mov ($acc,$_len); # load len
1080 &xor ($s0,&DWP(0,$key)); # xor iv
1081 &xor ($s1,&DWP(4,$key));
1082 &xor ($s2,&DWP(8,$key));
1083 &xor ($s3,&DWP(12,$key));
1084
1085 &sub ($acc,16);
1086 &jc (&label("dec_partial"));
1087 &mov ($_len,$acc); # save len
1088 &mov ($acc,$_inp); # load inp
1089 &mov ($key,$_out); # load out
1090
1091 &mov (&DWP(0,$key),$s0); # write output
1092 &mov (&DWP(4,$key),$s1);
1093 &mov (&DWP(8,$key),$s2);
1094 &mov (&DWP(12,$key),$s3);
1095
1096 &mov ($_tmp,$acc); # save ivp
1097 &lea ($acc,&DWP(16,$acc));
1098 &mov ($_inp,$acc); # save inp
1099
1100 &lea ($key,&DWP(16,$key));
1101 &mov ($_out,$key); # save out
1102
1103 &jnz (&label("dec_loop"));
1104 &mov ($key,$_tmp); # load temp ivp
1105 &set_label("dec_end");
1106 &mov ($acc,$_ivp); # load user ivp
1107 &mov ($s0,&DWP(0,$key)); # load iv
1108 &mov ($s1,&DWP(4,$key));
1109 &mov ($s2,&DWP(8,$key));
1110 &mov ($s3,&DWP(12,$key));
1111 &mov (&DWP(0,$acc),$s0); # copy back to user
1112 &mov (&DWP(4,$acc),$s1);
1113 &mov (&DWP(8,$acc),$s2);
1114 &mov (&DWP(12,$acc),$s3);
1115 &jmp (&label("dec_out"));
1116
1117 &align (4);
1118 &set_label("dec_partial");
1119 &lea ($key,$ivec);
1120 &mov (&DWP(0,$key),$s0); # dump output to stack
884 &mov (&DWP(4,$key),$s1); 1121 &mov (&DWP(4,$key),$s1);
885 &mov (&DWP(8,$key),$s2); 1122 &mov (&DWP(8,$key),$s2);
886 &mov (&DWP(12,$key),$s3); 1123 &mov (&DWP(12,$key),$s3);
1124 &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc));
1125 &mov ($acc eq "esi" ? $acc : "",$key);
1126 &mov ($key eq "edi" ? $key : "",$_out); # load out
1127 &data_word(0xA4F3F689); # rep movsb # copy output
1128 &mov ($key,$_inp); # use inp as temp ivp
1129 &jmp (&label("dec_end"));
1130
1131 &align (4);
1132 &set_label("dec_in_place");
1133 &set_label("dec_in_place_loop");
1134 &lea ($key,$ivec);
1135 &mov ($s0,&DWP(0,$acc)); # read input
1136 &mov ($s1,&DWP(4,$acc));
1137 &mov ($s2,&DWP(8,$acc));
1138 &mov ($s3,&DWP(12,$acc));
1139
1140 &mov (&DWP(0,$key),$s0); # copy to temp
1141 &mov (&DWP(4,$key),$s1);
1142 &mov (&DWP(8,$key),$s2);
1143 &mov (&DWP(12,$key),$s3);
1144
1145 &mov ($key,$_key); # load key
1146 &call ("_x86_AES_decrypt");
1147
1148 &mov ($key,$_ivp); # load ivp
1149 &mov ($acc,$_out); # load out
1150 &xor ($s0,&DWP(0,$key)); # xor iv
1151 &xor ($s1,&DWP(4,$key));
1152 &xor ($s2,&DWP(8,$key));
1153 &xor ($s3,&DWP(12,$key));
887 1154
888 &pop ("edi"); 1155 &mov (&DWP(0,$acc),$s0); # write output
889 &pop ("esi"); 1156 &mov (&DWP(4,$acc),$s1);
890 &pop ("ebx"); 1157 &mov (&DWP(8,$acc),$s2);
891 &pop ("ebp"); 1158 &mov (&DWP(12,$acc),$s3);
892 &ret (); 1159
1160 &lea ($acc,&DWP(16,$acc));
1161 &mov ($_out,$acc); # save out
1162
1163 &lea ($acc,$ivec);
1164 &mov ($s0,&DWP(0,$acc)); # read temp
1165 &mov ($s1,&DWP(4,$acc));
1166 &mov ($s2,&DWP(8,$acc));
1167 &mov ($s3,&DWP(12,$acc));
1168
1169 &mov (&DWP(0,$key),$s0); # copy iv
1170 &mov (&DWP(4,$key),$s1);
1171 &mov (&DWP(8,$key),$s2);
1172 &mov (&DWP(12,$key),$s3);
1173
1174 &mov ($acc,$_inp); # load inp
1175
1176 &lea ($acc,&DWP(16,$acc));
1177 &mov ($_inp,$acc); # save inp
1178
1179 &mov ($s2,$_len); # load len
1180 &sub ($s2,16);
1181 &jc (&label("dec_in_place_partial"));
1182 &mov ($_len,$s2); # save len
1183 &jnz (&label("dec_in_place_loop"));
1184 &jmp (&label("dec_out"));
1185
1186 &align (4);
1187 &set_label("dec_in_place_partial");
1188 # one can argue if this is actually required...
1189 &mov ($key eq "edi" ? $key : "",$_out);
1190 &lea ($acc eq "esi" ? $acc : "",$ivec);
1191 &lea ($key,&DWP(0,$key,$s2));
1192 &lea ($acc,&DWP(16,$acc,$s2));
1193 &neg ($s2 eq "ecx" ? $s2 : "");
1194 &data_word(0xA4F3F689); # rep movsb # restore tail
1195
1196 &align (4);
1197 &set_label("dec_out");
1198 &cmp ($mark,0); # was the key schedule copied?
1199 &mov ("edi",$_key);
1200 &je (&label("skip_dzero"));
1201 # zero copy of key schedule
1202 &mov ("ecx",240/4);
1203 &xor ("eax","eax");
1204 &align (4);
1205 &data_word(0xABF3F689); # rep stosd
1206 &set_label("skip_dzero")
1207 &mov ("esp",$_esp);
1208 &popf ();
1209&function_end("AES_cbc_encrypt");
1210}
893 1211
894&set_label("AES_Td",64); # Yes! I keep it in the code segment! 1212#------------------------------------------------------------------#
895 &data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
896 &data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
897 &data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
898 &data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
899 &data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
900 &data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
901 &data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
902 &data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
903 &data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
904 &data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
905 &data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
906 &data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
907 &data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
908 &data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
909 &data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
910 &data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
911 &data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
912 &data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
913 &data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
914 &data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
915 &data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
916 &data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
917 &data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
918 &data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
919 &data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
920 &data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
921 &data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
922 &data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
923 &data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
924 &data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
925 &data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
926 &data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
927 &data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
928 &data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
929 &data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
930 &data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
931 &data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
932 &data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
933 &data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
934 &data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
935 &data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
936 &data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
937 &data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
938 &data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
939 &data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
940 &data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
941 &data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
942 &data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
943 &data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
944 &data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
945 &data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
946 &data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
947 &data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
948 &data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
949 &data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
950 &data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
951 &data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
952 &data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
953 &data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
954 &data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
955 &data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
956 &data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
957 &data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
958 &data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
959#Td1:
960 &data_word(0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96);
961 &data_word(0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93);
962 &data_word(0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525);
963 &data_word(0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f);
964 &data_word(0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1);
965 &data_word(0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6);
966 &data_word(0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da);
967 &data_word(0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44);
968 &data_word(0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd);
969 &data_word(0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4);
970 &data_word(0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245);
971 &data_word(0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994);
972 &data_word(0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7);
973 &data_word(0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a);
974 &data_word(0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5);
975 &data_word(0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c);
976 &data_word(0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1);
977 &data_word(0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a);
978 &data_word(0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475);
979 &data_word(0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51);
980 &data_word(0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46);
981 &data_word(0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff);
982 &data_word(0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777);
983 &data_word(0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db);
984 &data_word(0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000);
985 &data_word(0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e);
986 &data_word(0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627);
987 &data_word(0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a);
988 &data_word(0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e);
989 &data_word(0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16);
990 &data_word(0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d);
991 &data_word(0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8);
992 &data_word(0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd);
993 &data_word(0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34);
994 &data_word(0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863);
995 &data_word(0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420);
996 &data_word(0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d);
997 &data_word(0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0);
998 &data_word(0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722);
999 &data_word(0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef);
1000 &data_word(0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836);
1001 &data_word(0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4);
1002 &data_word(0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462);
1003 &data_word(0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5);
1004 &data_word(0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3);
1005 &data_word(0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b);
1006 &data_word(0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8);
1007 &data_word(0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6);
1008 &data_word(0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6);
1009 &data_word(0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0);
1010 &data_word(0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315);
1011 &data_word(0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f);
1012 &data_word(0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df);
1013 &data_word(0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f);
1014 &data_word(0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e);
1015 &data_word(0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13);
1016 &data_word(0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89);
1017 &data_word(0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c);
1018 &data_word(0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf);
1019 &data_word(0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886);
1020 &data_word(0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f);
1021 &data_word(0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41);
1022 &data_word(0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490);
1023 &data_word(0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042);
1024#Td2:
1025 &data_word(0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e);
1026 &data_word(0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303);
1027 &data_word(0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c);
1028 &data_word(0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3);
1029 &data_word(0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0);
1030 &data_word(0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9);
1031 &data_word(0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59);
1032 &data_word(0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8);
1033 &data_word(0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71);
1034 &data_word(0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a);
1035 &data_word(0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f);
1036 &data_word(0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b);
1037 &data_word(0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8);
1038 &data_word(0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab);
1039 &data_word(0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508);
1040 &data_word(0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82);
1041 &data_word(0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2);
1042 &data_word(0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe);
1043 &data_word(0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb);
1044 &data_word(0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110);
1045 &data_word(0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd);
1046 &data_word(0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15);
1047 &data_word(0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e);
1048 &data_word(0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee);
1049 &data_word(0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000);
1050 &data_word(0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72);
1051 &data_word(0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739);
1052 &data_word(0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e);
1053 &data_word(0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91);
1054 &data_word(0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a);
1055 &data_word(0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17);
1056 &data_word(0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9);
1057 &data_word(0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60);
1058 &data_word(0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e);
1059 &data_word(0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1);
1060 &data_word(0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011);
1061 &data_word(0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1);
1062 &data_word(0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3);
1063 &data_word(0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264);
1064 &data_word(0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90);
1065 &data_word(0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b);
1066 &data_word(0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf);
1067 &data_word(0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246);
1068 &data_word(0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af);
1069 &data_word(0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312);
1070 &data_word(0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb);
1071 &data_word(0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a);
1072 &data_word(0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8);
1073 &data_word(0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c);
1074 &data_word(0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066);
1075 &data_word(0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8);
1076 &data_word(0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6);
1077 &data_word(0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04);
1078 &data_word(0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51);
1079 &data_word(0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41);
1080 &data_word(0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347);
1081 &data_word(0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c);
1082 &data_word(0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1);
1083 &data_word(0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37);
1084 &data_word(0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db);
1085 &data_word(0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40);
1086 &data_word(0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195);
1087 &data_word(0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1);
1088 &data_word(0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257);
1089#Td3:
1090 &data_word(0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27);
1091 &data_word(0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3);
1092 &data_word(0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02);
1093 &data_word(0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362);
1094 &data_word(0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe);
1095 &data_word(0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3);
1096 &data_word(0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952);
1097 &data_word(0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9);
1098 &data_word(0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9);
1099 &data_word(0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace);
1100 &data_word(0x63184adf, 0xe582311a, 0x97603351, 0x62457f53);
1101 &data_word(0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08);
1102 &data_word(0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b);
1103 &data_word(0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55);
1104 &data_word(0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837);
1105 &data_word(0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216);
1106 &data_word(0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269);
1107 &data_word(0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6);
1108 &data_word(0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6);
1109 &data_word(0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e);
1110 &data_word(0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6);
1111 &data_word(0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550);
1112 &data_word(0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9);
1113 &data_word(0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8);
1114 &data_word(0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000);
1115 &data_word(0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a);
1116 &data_word(0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d);
1117 &data_word(0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36);
1118 &data_word(0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b);
1119 &data_word(0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12);
1120 &data_word(0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b);
1121 &data_word(0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e);
1122 &data_word(0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f);
1123 &data_word(0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb);
1124 &data_word(0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4);
1125 &data_word(0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6);
1126 &data_word(0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129);
1127 &data_word(0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1);
1128 &data_word(0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9);
1129 &data_word(0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033);
1130 &data_word(0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4);
1131 &data_word(0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad);
1132 &data_word(0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e);
1133 &data_word(0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3);
1134 &data_word(0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225);
1135 &data_word(0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b);
1136 &data_word(0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f);
1137 &data_word(0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815);
1138 &data_word(0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0);
1139 &data_word(0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2);
1140 &data_word(0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7);
1141 &data_word(0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691);
1142 &data_word(0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496);
1143 &data_word(0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165);
1144 &data_word(0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b);
1145 &data_word(0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6);
1146 &data_word(0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13);
1147 &data_word(0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147);
1148 &data_word(0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7);
1149 &data_word(0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44);
1150 &data_word(0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3);
1151 &data_word(0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d);
1152 &data_word(0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156);
1153 &data_word(0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8);
1154#Td4:
1155 &data_word(0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5);
1156 &data_word(0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838);
1157 &data_word(0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e);
1158 &data_word(0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb);
1159 &data_word(0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282);
1160 &data_word(0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787);
1161 &data_word(0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444);
1162 &data_word(0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb);
1163 &data_word(0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232);
1164 &data_word(0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d);
1165 &data_word(0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b);
1166 &data_word(0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e);
1167 &data_word(0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666);
1168 &data_word(0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2);
1169 &data_word(0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949);
1170 &data_word(0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525);
1171 &data_word(0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464);
1172 &data_word(0x86868686, 0x68686868, 0x98989898, 0x16161616);
1173 &data_word(0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc);
1174 &data_word(0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292);
1175 &data_word(0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050);
1176 &data_word(0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada);
1177 &data_word(0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757);
1178 &data_word(0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484);
1179 &data_word(0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000);
1180 &data_word(0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a);
1181 &data_word(0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505);
1182 &data_word(0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606);
1183 &data_word(0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f);
1184 &data_word(0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202);
1185 &data_word(0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303);
1186 &data_word(0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b);
1187 &data_word(0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141);
1188 &data_word(0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea);
1189 &data_word(0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece);
1190 &data_word(0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373);
1191 &data_word(0x96969696, 0xacacacac, 0x74747474, 0x22222222);
1192 &data_word(0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585);
1193 &data_word(0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8);
1194 &data_word(0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e);
1195 &data_word(0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171);
1196 &data_word(0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989);
1197 &data_word(0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e);
1198 &data_word(0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b);
1199 &data_word(0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b);
1200 &data_word(0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020);
1201 &data_word(0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe);
1202 &data_word(0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4);
1203 &data_word(0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333);
1204 &data_word(0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131);
1205 &data_word(0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959);
1206 &data_word(0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f);
1207 &data_word(0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9);
1208 &data_word(0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d);
1209 &data_word(0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f);
1210 &data_word(0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef);
1211 &data_word(0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d);
1212 &data_word(0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0);
1213 &data_word(0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c);
1214 &data_word(0x83838383, 0x53535353, 0x99999999, 0x61616161);
1215 &data_word(0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e);
1216 &data_word(0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626);
1217 &data_word(0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363);
1218 &data_word(0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d);
1219&function_end_B("AES_decrypt");
1220 1213
1221sub enckey() 1214sub enckey()
1222{ 1215{
1223 &movz ("esi",&LB("edx")); # rk[i]>>0 1216 &movz ("esi",&LB("edx")); # rk[i]>>0
1224 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1217 &mov ("ebx",&DWP(2,"ebp","esi",8));
1225 &movz ("esi",&HB("edx")); # rk[i]>>8 1218 &movz ("esi",&HB("edx")); # rk[i]>>8
1226 &and ("ebx",0xFF000000); 1219 &and ("ebx",0xFF000000);
1227 &xor ("eax","ebx"); 1220 &xor ("eax","ebx");
1228 1221
1229 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1222 &mov ("ebx",&DWP(2,"ebp","esi",8));
1230 &shr ("edx",16); 1223 &shr ("edx",16);
1231 &and ("ebx",0x000000FF); 1224 &and ("ebx",0x000000FF);
1232 &movz ("esi",&LB("edx")); # rk[i]>>16 1225 &movz ("esi",&LB("edx")); # rk[i]>>16
1233 &xor ("eax","ebx"); 1226 &xor ("eax","ebx");
1234 1227
1235 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1228 &mov ("ebx",&DWP(0,"ebp","esi",8));
1236 &movz ("esi",&HB("edx")); # rk[i]>>24 1229 &movz ("esi",&HB("edx")); # rk[i]>>24
1237 &and ("ebx",0x0000FF00); 1230 &and ("ebx",0x0000FF00);
1238 &xor ("eax","ebx"); 1231 &xor ("eax","ebx");
1239 1232
1240 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1233 &mov ("ebx",&DWP(0,"ebp","esi",8));
1241 &and ("ebx",0x00FF0000); 1234 &and ("ebx",0x00FF0000);
1242 &xor ("eax","ebx"); 1235 &xor ("eax","ebx");
1243 1236
1244 &xor ("eax",&DWP(1024,"ebp","ecx",4)); # rcon 1237 &xor ("eax",&DWP(2048,"ebp","ecx",4)); # rcon
1245} 1238}
1246 1239
1247# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 1240# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
@@ -1260,7 +1253,6 @@ sub enckey()
1260 &set_label("pic_point"); 1253 &set_label("pic_point");
1261 &blindpop("ebp"); 1254 &blindpop("ebp");
1262 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp")); 1255 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1263 &add ("ebp",1024*4); # skip to Te4
1264 1256
1265 &mov ("ecx",&wparam(1)); # number of bits in key 1257 &mov ("ecx",&wparam(1)); # number of bits in key
1266 &cmp ("ecx",128); 1258 &cmp ("ecx",128);
@@ -1401,23 +1393,23 @@ sub enckey()
1401 &mov ("edx","eax"); 1393 &mov ("edx","eax");
1402 &mov ("eax",&DWP(16,"edi")); # rk[4] 1394 &mov ("eax",&DWP(16,"edi")); # rk[4]
1403 &movz ("esi",&LB("edx")); # rk[11]>>0 1395 &movz ("esi",&LB("edx")); # rk[11]>>0
1404 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1396 &mov ("ebx",&DWP(2,"ebp","esi",8));
1405 &movz ("esi",&HB("edx")); # rk[11]>>8 1397 &movz ("esi",&HB("edx")); # rk[11]>>8
1406 &and ("ebx",0x000000FF); 1398 &and ("ebx",0x000000FF);
1407 &xor ("eax","ebx"); 1399 &xor ("eax","ebx");
1408 1400
1409 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1401 &mov ("ebx",&DWP(0,"ebp","esi",8));
1410 &shr ("edx",16); 1402 &shr ("edx",16);
1411 &and ("ebx",0x0000FF00); 1403 &and ("ebx",0x0000FF00);
1412 &movz ("esi",&LB("edx")); # rk[11]>>16 1404 &movz ("esi",&LB("edx")); # rk[11]>>16
1413 &xor ("eax","ebx"); 1405 &xor ("eax","ebx");
1414 1406
1415 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1407 &mov ("ebx",&DWP(0,"ebp","esi",8));
1416 &movz ("esi",&HB("edx")); # rk[11]>>24 1408 &movz ("esi",&HB("edx")); # rk[11]>>24
1417 &and ("ebx",0x00FF0000); 1409 &and ("ebx",0x00FF0000);
1418 &xor ("eax","ebx"); 1410 &xor ("eax","ebx");
1419 1411
1420 &mov ("ebx",&DWP(0,"ebp","esi",4)); 1412 &mov ("ebx",&DWP(2,"ebp","esi",8));
1421 &and ("ebx",0xFF000000); 1413 &and ("ebx",0xFF000000);
1422 &xor ("eax","ebx"); 1414 &xor ("eax","ebx");
1423 1415
@@ -1443,23 +1435,23 @@ sub enckey()
1443&function_end("AES_set_encrypt_key"); 1435&function_end("AES_set_encrypt_key");
1444 1436
1445sub deckey() 1437sub deckey()
1446{ my ($i,$ptr,$te4,$td) = @_; 1438{ my ($i,$ptr,$te,$td) = @_;
1447 1439
1448 &mov ("eax",&DWP($i,$ptr)); 1440 &mov ("eax",&DWP($i,$ptr));
1449 &mov ("edx","eax"); 1441 &mov ("edx","eax");
1450 &movz ("ebx",&HB("eax")); 1442 &movz ("ebx",&HB("eax"));
1451 &shr ("edx",16); 1443 &shr ("edx",16);
1452 &and ("eax",0xFF); 1444 &and ("eax",0xFF);
1453 &movz ("eax",&BP(0,$te4,"eax",4)); 1445 &movz ("eax",&BP(2,$te,"eax",8));
1454 &movz ("ebx",&BP(0,$te4,"ebx",4)); 1446 &movz ("ebx",&BP(2,$te,"ebx",8));
1455 &mov ("eax",&DWP(1024*0,$td,"eax",4)); 1447 &mov ("eax",&DWP(0,$td,"eax",8));
1456 &xor ("eax",&DWP(1024*1,$td,"ebx",4)); 1448 &xor ("eax",&DWP(3,$td,"ebx",8));
1457 &movz ("ebx",&HB("edx")); 1449 &movz ("ebx",&HB("edx"));
1458 &and ("edx",0xFF); 1450 &and ("edx",0xFF);
1459 &movz ("edx",&BP(0,$te4,"edx",4)); 1451 &movz ("edx",&BP(2,$te,"edx",8));
1460 &movz ("ebx",&BP(0,$te4,"ebx",4)); 1452 &movz ("ebx",&BP(2,$te,"ebx",8));
1461 &xor ("eax",&DWP(1024*2,$td,"edx",4)); 1453 &xor ("eax",&DWP(2,$td,"edx",8));
1462 &xor ("eax",&DWP(1024*3,$td,"ebx",4)); 1454 &xor ("eax",&DWP(1,$td,"ebx",8));
1463 &mov (&DWP($i,$ptr),"eax"); 1455 &mov (&DWP($i,$ptr),"eax");
1464} 1456}
1465 1457
@@ -1520,7 +1512,6 @@ sub deckey()
1520 blindpop("ebp"); 1512 blindpop("ebp");
1521 &lea ("edi",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp")); 1513 &lea ("edi",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
1522 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp")); 1514 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1523 &add ("ebp",1024*4); # skip to Te4
1524 1515
1525 &mov ("esi",&wparam(2)); 1516 &mov ("esi",&wparam(2));
1526 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds 1517 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds