From 5a3c0a05c7f2c5d3c584b7c8d6aec836dd724c80 Mon Sep 17 00:00:00 2001 From: djm <> Date: Sat, 6 Sep 2008 12:15:56 +0000 Subject: import of OpenSSL 0.9.8h --- src/lib/libcrypto/md5/asm/md5-586.pl | 2 +- src/lib/libcrypto/md5/asm/md5-x86_64.pl | 245 ++++++++++++++++++++++++++++++++ src/lib/libcrypto/md5/md5.h | 10 +- src/lib/libcrypto/md5/md5_dgst.c | 112 +-------------- src/lib/libcrypto/md5/md5_locl.h | 52 +------ src/lib/libcrypto/md5/md5_one.c | 2 +- 6 files changed, 258 insertions(+), 165 deletions(-) create mode 100755 src/lib/libcrypto/md5/asm/md5-x86_64.pl (limited to 'src/lib/libcrypto/md5') diff --git a/src/lib/libcrypto/md5/asm/md5-586.pl b/src/lib/libcrypto/md5/asm/md5-586.pl index fa3fa3bed5..76ac235f7d 100644 --- a/src/lib/libcrypto/md5/asm/md5-586.pl +++ b/src/lib/libcrypto/md5/asm/md5-586.pl @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); -&md5_block("md5_block_asm_host_order"); +&md5_block("md5_block_asm_data_order"); &asm_finish(); sub Np diff --git a/src/lib/libcrypto/md5/asm/md5-x86_64.pl b/src/lib/libcrypto/md5/asm/md5-x86_64.pl new file mode 100755 index 0000000000..9a6fa67224 --- /dev/null +++ b/src/lib/libcrypto/md5/asm/md5-x86_64.pl @@ -0,0 +1,245 @@ +#!/usr/bin/perl -w +# +# MD5 optimized for AMD64. +# +# Author: Marc Bevand +# Licence: I hereby disclaim the copyright on this code and place it +# in the public domain. +# + +use strict; + +my $code; + +# round1_step() does: +# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) +# %r10d = X[k_next] +# %r11d = z' (copy of z for the next step) +# Each round1_step() takes about 5.71 clocks (9 instructions, 1.58 IPC) +sub round1_step +{ + my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; + $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); + $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); + $code .= <A + mov 1*4(%rbp), %ebx # ebx = ctx->B + mov 2*4(%rbp), %ecx # ecx = ctx->C + mov 3*4(%rbp), %edx # edx = ctx->D + # end is 'rdi' + # ptr is 'rsi' + # A is 'eax' + # B is 'ebx' + # C is 'ecx' + # D is 'edx' + + cmp %rdi, %rsi # cmp end with ptr + je .Lend # jmp if ptr == end + + # BEGIN of loop over 16-word blocks +.Lloop: # save old values of A, B, C, D + mov %eax, %r8d + mov %ebx, %r9d + mov %ecx, %r14d + mov %edx, %r15d +EOF +round1_step(-1,'%eax','%ebx','%ecx','%edx', '1','0xd76aa478', '7'); +round1_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xe8c7b756','12'); +round1_step( 0,'%ecx','%edx','%eax','%ebx', '3','0x242070db','17'); +round1_step( 0,'%ebx','%ecx','%edx','%eax', '4','0xc1bdceee','22'); +round1_step( 0,'%eax','%ebx','%ecx','%edx', '5','0xf57c0faf', '7'); +round1_step( 0,'%edx','%eax','%ebx','%ecx', '6','0x4787c62a','12'); +round1_step( 0,'%ecx','%edx','%eax','%ebx', '7','0xa8304613','17'); +round1_step( 0,'%ebx','%ecx','%edx','%eax', '8','0xfd469501','22'); +round1_step( 0,'%eax','%ebx','%ecx','%edx', '9','0x698098d8', '7'); +round1_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8b44f7af','12'); +round1_step( 0,'%ecx','%edx','%eax','%ebx','11','0xffff5bb1','17'); +round1_step( 0,'%ebx','%ecx','%edx','%eax','12','0x895cd7be','22'); +round1_step( 0,'%eax','%ebx','%ecx','%edx','13','0x6b901122', '7'); +round1_step( 0,'%edx','%eax','%ebx','%ecx','14','0xfd987193','12'); +round1_step( 0,'%ecx','%edx','%eax','%ebx','15','0xa679438e','17'); +round1_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x49b40821','22'); + +round2_step(-1,'%eax','%ebx','%ecx','%edx', '6','0xf61e2562', '5'); +round2_step( 0,'%edx','%eax','%ebx','%ecx','11','0xc040b340', '9'); +round2_step( 0,'%ecx','%edx','%eax','%ebx', '0','0x265e5a51','14'); +round2_step( 0,'%ebx','%ecx','%edx','%eax', '5','0xe9b6c7aa','20'); +round2_step( 0,'%eax','%ebx','%ecx','%edx','10','0xd62f105d', '5'); +round2_step( 0,'%edx','%eax','%ebx','%ecx','15', '0x2441453', '9'); +round2_step( 0,'%ecx','%edx','%eax','%ebx', '4','0xd8a1e681','14'); +round2_step( 0,'%ebx','%ecx','%edx','%eax', '9','0xe7d3fbc8','20'); +round2_step( 0,'%eax','%ebx','%ecx','%edx','14','0x21e1cde6', '5'); +round2_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xc33707d6', '9'); +round2_step( 0,'%ecx','%edx','%eax','%ebx', '8','0xf4d50d87','14'); +round2_step( 0,'%ebx','%ecx','%edx','%eax','13','0x455a14ed','20'); +round2_step( 0,'%eax','%ebx','%ecx','%edx', '2','0xa9e3e905', '5'); +round2_step( 0,'%edx','%eax','%ebx','%ecx', '7','0xfcefa3f8', '9'); +round2_step( 0,'%ecx','%edx','%eax','%ebx','12','0x676f02d9','14'); +round2_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x8d2a4c8a','20'); + +round3_step(-1,'%eax','%ebx','%ecx','%edx', '8','0xfffa3942', '4'); +round3_step( 0,'%edx','%eax','%ebx','%ecx','11','0x8771f681','11'); +round3_step( 0,'%ecx','%edx','%eax','%ebx','14','0x6d9d6122','16'); +round3_step( 0,'%ebx','%ecx','%edx','%eax', '1','0xfde5380c','23'); +round3_step( 0,'%eax','%ebx','%ecx','%edx', '4','0xa4beea44', '4'); +round3_step( 0,'%edx','%eax','%ebx','%ecx', '7','0x4bdecfa9','11'); +round3_step( 0,'%ecx','%edx','%eax','%ebx','10','0xf6bb4b60','16'); +round3_step( 0,'%ebx','%ecx','%edx','%eax','13','0xbebfbc70','23'); +round3_step( 0,'%eax','%ebx','%ecx','%edx', '0','0x289b7ec6', '4'); +round3_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xeaa127fa','11'); +round3_step( 0,'%ecx','%edx','%eax','%ebx', '6','0xd4ef3085','16'); +round3_step( 0,'%ebx','%ecx','%edx','%eax', '9', '0x4881d05','23'); +round3_step( 0,'%eax','%ebx','%ecx','%edx','12','0xd9d4d039', '4'); +round3_step( 0,'%edx','%eax','%ebx','%ecx','15','0xe6db99e5','11'); +round3_step( 0,'%ecx','%edx','%eax','%ebx', '2','0x1fa27cf8','16'); +round3_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xc4ac5665','23'); + +round4_step(-1,'%eax','%ebx','%ecx','%edx', '7','0xf4292244', '6'); +round4_step( 0,'%edx','%eax','%ebx','%ecx','14','0x432aff97','10'); +round4_step( 0,'%ecx','%edx','%eax','%ebx', '5','0xab9423a7','15'); +round4_step( 0,'%ebx','%ecx','%edx','%eax','12','0xfc93a039','21'); +round4_step( 0,'%eax','%ebx','%ecx','%edx', '3','0x655b59c3', '6'); +round4_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8f0ccc92','10'); +round4_step( 0,'%ecx','%edx','%eax','%ebx', '1','0xffeff47d','15'); +round4_step( 0,'%ebx','%ecx','%edx','%eax', '8','0x85845dd1','21'); +round4_step( 0,'%eax','%ebx','%ecx','%edx','15','0x6fa87e4f', '6'); +round4_step( 0,'%edx','%eax','%ebx','%ecx', '6','0xfe2ce6e0','10'); +round4_step( 0,'%ecx','%edx','%eax','%ebx','13','0xa3014314','15'); +round4_step( 0,'%ebx','%ecx','%edx','%eax', '4','0x4e0811a1','21'); +round4_step( 0,'%eax','%ebx','%ecx','%edx','11','0xf7537e82', '6'); +round4_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xbd3af235','10'); +round4_step( 0,'%ecx','%edx','%eax','%ebx', '9','0x2ad7d2bb','15'); +round4_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xeb86d391','21'); +$code .= <A = A + mov %ebx, 1*4(%rbp) # ctx->B = B + mov %ecx, 2*4(%rbp) # ctx->C = C + mov %edx, 3*4(%rbp) # ctx->D = D + + pop %r15 + pop %r14 + pop %rbx + pop %rbp + ret +.size md5_block_asm_data_order,.-md5_block_asm_data_order +EOF + +print $code; + +close STDOUT; diff --git a/src/lib/libcrypto/md5/md5.h b/src/lib/libcrypto/md5/md5.h index c663dd1816..dbdc0e1abc 100644 --- a/src/lib/libcrypto/md5/md5.h +++ b/src/lib/libcrypto/md5/md5.h @@ -60,6 +60,7 @@ #define HEADER_MD5_H #include +#include #ifdef __cplusplus extern "C" { @@ -101,16 +102,13 @@ typedef struct MD5state_st MD5_LONG A,B,C,D; MD5_LONG Nl,Nh; MD5_LONG data[MD5_LBLOCK]; - int num; + unsigned int num; } MD5_CTX; -#ifdef OPENSSL_FIPS -int private_MD5_Init(MD5_CTX *c); -#endif int MD5_Init(MD5_CTX *c); -int MD5_Update(MD5_CTX *c, const void *data, unsigned long len); +int MD5_Update(MD5_CTX *c, const void *data, size_t len); int MD5_Final(unsigned char *md, MD5_CTX *c); -unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md); +unsigned char *MD5(const unsigned char *d, size_t n, unsigned char *md); void MD5_Transform(MD5_CTX *c, const unsigned char *b); #ifdef __cplusplus } diff --git a/src/lib/libcrypto/md5/md5_dgst.c b/src/lib/libcrypto/md5/md5_dgst.c index 54b33c6509..b96e332ba4 100644 --- a/src/lib/libcrypto/md5/md5_dgst.c +++ b/src/lib/libcrypto/md5/md5_dgst.c @@ -60,7 +60,7 @@ #include "md5_locl.h" #include -const char *MD5_version="MD5" OPENSSL_VERSION_PTEXT; +const char MD5_version[]="MD5" OPENSSL_VERSION_PTEXT; /* Implemented from RFC1321 The MD5 Message-Digest Algorithm */ @@ -70,7 +70,7 @@ const char *MD5_version="MD5" OPENSSL_VERSION_PTEXT; #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L -FIPS_NON_FIPS_MD_Init(MD5) +int MD5_Init(MD5_CTX *c) { c->A=INIT_DATA_A; c->B=INIT_DATA_B; @@ -82,101 +82,11 @@ FIPS_NON_FIPS_MD_Init(MD5) return 1; } -#ifndef md5_block_host_order -void md5_block_host_order (MD5_CTX *c, const void *data, int num) - { - const MD5_LONG *X=data; - register unsigned MD32_REG_T A,B,C,D; - - A=c->A; - B=c->B; - C=c->C; - D=c->D; - - for (;num--;X+=HASH_LBLOCK) - { - /* Round 0 */ - R0(A,B,C,D,X[ 0], 7,0xd76aa478L); - R0(D,A,B,C,X[ 1],12,0xe8c7b756L); - R0(C,D,A,B,X[ 2],17,0x242070dbL); - R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); - R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); - R0(D,A,B,C,X[ 5],12,0x4787c62aL); - R0(C,D,A,B,X[ 6],17,0xa8304613L); - R0(B,C,D,A,X[ 7],22,0xfd469501L); - R0(A,B,C,D,X[ 8], 7,0x698098d8L); - R0(D,A,B,C,X[ 9],12,0x8b44f7afL); - R0(C,D,A,B,X[10],17,0xffff5bb1L); - R0(B,C,D,A,X[11],22,0x895cd7beL); - R0(A,B,C,D,X[12], 7,0x6b901122L); - R0(D,A,B,C,X[13],12,0xfd987193L); - R0(C,D,A,B,X[14],17,0xa679438eL); - R0(B,C,D,A,X[15],22,0x49b40821L); - /* Round 1 */ - R1(A,B,C,D,X[ 1], 5,0xf61e2562L); - R1(D,A,B,C,X[ 6], 9,0xc040b340L); - R1(C,D,A,B,X[11],14,0x265e5a51L); - R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); - R1(A,B,C,D,X[ 5], 5,0xd62f105dL); - R1(D,A,B,C,X[10], 9,0x02441453L); - R1(C,D,A,B,X[15],14,0xd8a1e681L); - R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); - R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); - R1(D,A,B,C,X[14], 9,0xc33707d6L); - R1(C,D,A,B,X[ 3],14,0xf4d50d87L); - R1(B,C,D,A,X[ 8],20,0x455a14edL); - R1(A,B,C,D,X[13], 5,0xa9e3e905L); - R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); - R1(C,D,A,B,X[ 7],14,0x676f02d9L); - R1(B,C,D,A,X[12],20,0x8d2a4c8aL); - /* Round 2 */ - R2(A,B,C,D,X[ 5], 4,0xfffa3942L); - R2(D,A,B,C,X[ 8],11,0x8771f681L); - R2(C,D,A,B,X[11],16,0x6d9d6122L); - R2(B,C,D,A,X[14],23,0xfde5380cL); - R2(A,B,C,D,X[ 1], 4,0xa4beea44L); - R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); - R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); - R2(B,C,D,A,X[10],23,0xbebfbc70L); - R2(A,B,C,D,X[13], 4,0x289b7ec6L); - R2(D,A,B,C,X[ 0],11,0xeaa127faL); - R2(C,D,A,B,X[ 3],16,0xd4ef3085L); - R2(B,C,D,A,X[ 6],23,0x04881d05L); - R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); - R2(D,A,B,C,X[12],11,0xe6db99e5L); - R2(C,D,A,B,X[15],16,0x1fa27cf8L); - R2(B,C,D,A,X[ 2],23,0xc4ac5665L); - /* Round 3 */ - R3(A,B,C,D,X[ 0], 6,0xf4292244L); - R3(D,A,B,C,X[ 7],10,0x432aff97L); - R3(C,D,A,B,X[14],15,0xab9423a7L); - R3(B,C,D,A,X[ 5],21,0xfc93a039L); - R3(A,B,C,D,X[12], 6,0x655b59c3L); - R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); - R3(C,D,A,B,X[10],15,0xffeff47dL); - R3(B,C,D,A,X[ 1],21,0x85845dd1L); - R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); - R3(D,A,B,C,X[15],10,0xfe2ce6e0L); - R3(C,D,A,B,X[ 6],15,0xa3014314L); - R3(B,C,D,A,X[13],21,0x4e0811a1L); - R3(A,B,C,D,X[ 4], 6,0xf7537e82L); - R3(D,A,B,C,X[11],10,0xbd3af235L); - R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); - R3(B,C,D,A,X[ 9],21,0xeb86d391L); - - A = c->A += A; - B = c->B += B; - C = c->C += C; - D = c->D += D; - } - } -#endif - #ifndef md5_block_data_order #ifdef X #undef X #endif -void md5_block_data_order (MD5_CTX *c, const void *data_, int num) +void md5_block_data_order (MD5_CTX *c, const void *data_, size_t num) { const unsigned char *data=data_; register unsigned MD32_REG_T A,B,C,D,l; @@ -274,19 +184,3 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, int num) } } #endif - -#ifdef undef -int printit(unsigned long *l) - { - int i,ii; - - for (i=0; i<2; i++) - { - for (ii=0; ii<8; ii++) - { - fprintf(stderr,"%08lx ",l[i*8+ii]); - } - fprintf(stderr,"\n"); - } - } -#endif diff --git a/src/lib/libcrypto/md5/md5_locl.h b/src/lib/libcrypto/md5/md5_locl.h index 9e360da732..84e81b960d 100644 --- a/src/lib/libcrypto/md5/md5_locl.h +++ b/src/lib/libcrypto/md5/md5_locl.h @@ -66,49 +66,19 @@ #endif #ifdef MD5_ASM -# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) -# define md5_block_host_order md5_block_asm_host_order -# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC) - void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num); -# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) +# define md5_block_data_order md5_block_asm_data_order # endif #endif -void md5_block_host_order (MD5_CTX *c, const void *p,int num); -void md5_block_data_order (MD5_CTX *c, const void *p,int num); - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) -/* - * *_block_host_order is expected to handle aligned data while - * *_block_data_order - unaligned. As algorithm and host (x86) - * are in this case of the same "endianness" these two are - * otherwise indistinguishable. But normally you don't want to - * call the same function because unaligned access in places - * where alignment is expected is usually a "Bad Thing". Indeed, - * on RISCs you get punished with BUS ERROR signal or *severe* - * performance degradation. Intel CPUs are in turn perfectly - * capable of loading unaligned data without such drastic side - * effect. Yes, they say it's slower than aligned load, but no - * exception is generated and therefore performance degradation - * is *incomparable* with RISCs. What we should weight here is - * costs of unaligned access against costs of aligning data. - * According to my measurements allowing unaligned access results - * in ~9% performance improvement on Pentium II operating at - * 266MHz. I won't be surprised if the difference will be higher - * on faster systems:-) - * - * - */ -#define md5_block_data_order md5_block_host_order -#endif +void md5_block_data_order (MD5_CTX *c, const void *p,size_t num); #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD5_LONG -#define HASH_LONG_LOG2 MD5_LONG_LOG2 #define HASH_CTX MD5_CTX #define HASH_CBLOCK MD5_CBLOCK -#define HASH_LBLOCK MD5_LBLOCK #define HASH_UPDATE MD5_Update #define HASH_TRANSFORM MD5_Transform #define HASH_FINAL MD5_Final @@ -119,21 +89,7 @@ void md5_block_data_order (MD5_CTX *c, const void *p,int num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) -#define HASH_BLOCK_HOST_ORDER md5_block_host_order -#if !defined(L_ENDIAN) || defined(md5_block_data_order) #define HASH_BLOCK_DATA_ORDER md5_block_data_order -/* - * Little-endians (Intel and Alpha) feel better without this. - * It looks like memcpy does better job than generic - * md5_block_data_order on copying-n-aligning input data. - * But frankly speaking I didn't expect such result on Alpha. - * On the other hand I've got this with egcs-1.0.2 and if - * program is compiled with another (better?) compiler it - * might turn out other way around. - * - * - */ -#endif #include "md32_common.h" diff --git a/src/lib/libcrypto/md5/md5_one.c b/src/lib/libcrypto/md5/md5_one.c index 44c6c455d1..43fee89379 100644 --- a/src/lib/libcrypto/md5/md5_one.c +++ b/src/lib/libcrypto/md5/md5_one.c @@ -65,7 +65,7 @@ #include #endif -unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md) +unsigned char *MD5(const unsigned char *d, size_t n, unsigned char *md) { MD5_CTX c; static unsigned char m[MD5_DIGEST_LENGTH]; -- cgit v1.2.3-55-g6feb