1 files changed, 72 insertions, 95 deletions
diff --git a/src/lib/libcrypto/md5/md5_locl.h b/src/lib/libcrypto/md5/md5_locl.h
index dbbe1b71ca..34c5257306 100644
--- a/src/lib/libcrypto/md5/md5_locl.h
+++ b/src/lib/libcrypto/md5/md5_locl.h
@@ -56,109 +56,94 @@
 * [including the GNU Public Licence.]
 */
-/* On sparc, this actually slows things down :-( */
-#if defined(sun)
-#undef B_ENDIAN
-#endif
 #include <stdlib.h>
 #include <string.h>
-#include "md5.h"
+#include <openssl/opensslconf.h>
+#include <openssl/md5.h>
-#define ULONG   unsigned long
-#define UCHAR   unsigned char
-#define UINT    unsigned int
-#if defined(NOCONST)
+#ifndef MD5_LONG_LOG2
-#define const
+#define MD5_LONG_LOG2 2 /* default to 32 bits */
 #endif
-#undef c2l
+#ifdef MD5_ASM
-#define c2l(c,l)        (l = ((unsigned long)(*((c)++)))     , \
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
-                         l|=(((unsigned long)(*((c)++)))<< 8), \
+#  define md5_block_host_order md5_block_asm_host_order
-                         l|=(((unsigned long)(*((c)++)))<<16), \
+# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
-                         l|=(((unsigned long)(*((c)++)))<<24))
+   void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
+#  define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
-#undef p_c2l
+# endif
-#define p_c2l(c,l,n)    { \
+#endif
-                        switch (n) { \
-                        case 0: l =((unsigned long)(*((c)++))); \
-                        case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-                        case 2: l|=((unsigned long)(*((c)++)))<<16; \
-                        case 3: l|=((unsigned long)(*((c)++)))<<24; \
-                                } \
-                        }
-/* NOTE the pointer is not incremented at the end of this */
+void md5_block_host_order (MD5_CTX *c, const void *p,int num);
-#undef c2l_p
+void md5_block_data_order (MD5_CTX *c, const void *p,int num);
-#define c2l_p(c,l,n)    { \
-                        l=0; \
-                        (c)+=n; \
-                        switch (n) { \
-                        case 3: l =((unsigned long)(*(--(c))))<<16; \
-                        case 2: l|=((unsigned long)(*(--(c))))<< 8; \
-                        case 1: l|=((unsigned long)(*(--(c))))    ; \
-                                } \
-                        }
-#undef p_c2l_p
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
-#define p_c2l_p(c,l,sc,len) { \
+/*
-                        switch (sc) \
+ * *_block_host_order is expected to handle aligned data while
-                                { \
+ * *_block_data_order - unaligned. As algorithm and host (x86)
-                        case 0: l =((unsigned long)(*((c)++))); \
+ * are in this case of the same "endianness" these two are
-                                if (--len == 0) break; \
+ * otherwise indistinguishable. But normally you don't want to
-                        case 1: l|=((unsigned long)(*((c)++)))<< 8; \
+ * call the same function because unaligned access in places
-                                if (--len == 0) break; \
+ * where alignment is expected is usually a "Bad Thing". Indeed,
-                        case 2: l|=((unsigned long)(*((c)++)))<<16; \
+ * on RISCs you get punished with BUS ERROR signal or *severe*
-                                } \
+ * performance degradation. Intel CPUs are in turn perfectly
-                        }
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ *                              <appro@fy.chalmers.se>
+ */
+#define md5_block_data_order md5_block_host_order
+#endif
-#undef l2c
+#define DATA_ORDER_IS_LITTLE_ENDIAN
-#define l2c(l,c)        (*((c)++)=(unsigned char)(((l)    )&0xff), \
-                         *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
+#define HASH_LONG               MD5_LONG
-                         *((c)++)=(unsigned char)(((l)>>16)&0xff), \
+#define HASH_LONG_LOG2          MD5_LONG_LOG2
-                         *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#define HASH_CTX                MD5_CTX
+#define HASH_CBLOCK             MD5_CBLOCK
+#define HASH_LBLOCK             MD5_LBLOCK
+#define HASH_UPDATE             MD5_Update
+#define HASH_TRANSFORM          MD5_Transform
+#define HASH_FINAL              MD5_Final
+#define HASH_MAKE_STRING(c,s)   do {    \
+        unsigned long ll;               \
+        ll=(c)->A; HOST_l2c(ll,(s));    \
+        ll=(c)->B; HOST_l2c(ll,(s));    \
+        ll=(c)->C; HOST_l2c(ll,(s));    \
+        ll=(c)->D; HOST_l2c(ll,(s));    \
+        } while (0)
+#define HASH_BLOCK_HOST_ORDER   md5_block_host_order
+#if !defined(L_ENDIAN) || defined(md5_block_data_order)
+#define HASH_BLOCK_DATA_ORDER   md5_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md5_block_data_order on copying-n-aligning input data.
+ * But frankly speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ *                              <appro@fy.chalmers.se>
+ */
+#endif
-/* NOTE - c is not incremented as per l2c */
+#include "md32_common.h"
-#undef l2cn
-#define l2cn(l1,l2,c,n) { \
-                        c+=n; \
-                        switch (n) { \
-                        case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
-                        case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
-                        case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
-                        case 5: *(--(c))=(unsigned char)(((l2)    )&0xff); \
-                        case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
-                        case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
-                        case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
-                        case 1: *(--(c))=(unsigned char)(((l1)    )&0xff); \
-                                } \
-                        }
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
-        { \
-        unsigned long l=(a); \
-        (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
-        }
-#else
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
-        { \
-        unsigned long l=(a); \
-        l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
-        (a)=ROTATE(l,16L); \
-        }
-#endif
 /*
 #define F(x,y,z)        (((x) & (y))  |  ((~(x)) & (z)))
 #define G(x,y,z)        (((x) & (z))  |  ((y) & (~(z))))
 */
 /* As pointed out by Wei Dai <weidai@eskimo.com>, the above can be
- * simplified to the code below.  Wei attributes these optimisations
+ * simplified to the code below.  Wei attributes these optimizations
 * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
 */
 #define F(b,c,d)        ((((c) ^ (d)) & (b)) ^ (d))
@@ -166,14 +151,6 @@
 #define H(b,c,d)        ((b) ^ (c) ^ (d))
 #define I(b,c,d)        (((~(d)) | (b)) ^ (c))
-#undef ROTATE
-#if defined(WIN32)
-#define ROTATE(a,n)     _lrotl(a,n)
-#else
-#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#endif
 #define R0(a,b,c,d,k,s,t) { \
        a+=((k)+(t)+F((b),(c),(d))); \
        a=ROTATE(a,s); \

diff --git a/src/lib/libcrypto/md5/md5_locl.h b/src/lib/libcrypto/md5/md5_locl.h index dbbe1b71ca..34c5257306 100644 --- a/src/lib/libcrypto/md5/md5_locl.h +++ b/src/lib/libcrypto/md5/md5_locl.h
@@ -56,109 +56,94 @@
56	* [including the GNU Public Licence.]	56	* [including the GNU Public Licence.]
57	*/	57	*/
58		58
59	/* On sparc, this actually slows things down :-( */
60	#if defined(sun)
61	#undef B_ENDIAN
62	#endif
63
64	#include <stdlib.h>	59	#include <stdlib.h>
65	#include <string.h>	60	#include <string.h>
66	#include "md5.h"	61	#include <openssl/opensslconf.h>
67		62	#include <openssl/md5.h>
68	#define ULONG unsigned long
69	#define UCHAR unsigned char
70	#define UINT unsigned int
71		63
72	#if defined(NOCONST)	64	#ifndef MD5_LONG_LOG2
73	#define const	65	#define MD5_LONG_LOG2 2 /* default to 32 bits */
74	#endif	66	#endif
75		67
76	#undef c2l	68	#ifdef MD5_ASM
77	#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \	69	# if defined(__i386) \|\| defined(__i386__) \|\| defined(_M_IX86) \|\| defined(__INTEL__)
78	l\|=(((unsigned long)(*((c)++)))<< 8), \	70	# define md5_block_host_order md5_block_asm_host_order
79	l\|=(((unsigned long)(*((c)++)))<<16), \	71	# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
80	l\|=(((unsigned long)(*((c)++)))<<24))	72	void md5_block_asm_data_order_aligned (MD5_CTX c, const MD5_LONG p,int num);
81		73	# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
82	#undef p_c2l	74	# endif
83	#define p_c2l(c,l,n) { \	75	#endif
84	switch (n) { \
85	case 0: l =((unsigned long)(*((c)++))); \
86	case 1: l\|=((unsigned long)(*((c)++)))<< 8; \
87	case 2: l\|=((unsigned long)(*((c)++)))<<16; \
88	case 3: l\|=((unsigned long)(*((c)++)))<<24; \
89	} \
90	}
91		76
92	/* NOTE the pointer is not incremented at the end of this */	77	void md5_block_host_order (MD5_CTX c, const void p,int num);
93	#undef c2l_p	78	void md5_block_data_order (MD5_CTX c, const void p,int num);
94	#define c2l_p(c,l,n) { \
95	l=0; \
96	(c)+=n; \
97	switch (n) { \
98	case 3: l =((unsigned long)(*(--(c))))<<16; \
99	case 2: l\|=((unsigned long)(*(--(c))))<< 8; \
100	case 1: l\|=((unsigned long)(*(--(c)))) ; \
101	} \
102	}
103		79
104	#undef p_c2l_p	80	#if defined(__i386) \|\| defined(__i386__) \|\| defined(_M_IX86) \|\| defined(__INTEL__)
105	#define p_c2l_p(c,l,sc,len) { \	81	/*
106	switch (sc) \	82	* *_block_host_order is expected to handle aligned data while
107	{ \	83	* *_block_data_order - unaligned. As algorithm and host (x86)
108	case 0: l =((unsigned long)(*((c)++))); \	84	* are in this case of the same "endianness" these two are
109	if (--len == 0) break; \	85	* otherwise indistinguishable. But normally you don't want to
110	case 1: l\|=((unsigned long)(*((c)++)))<< 8; \	86	* call the same function because unaligned access in places
111	if (--len == 0) break; \	87	* where alignment is expected is usually a "Bad Thing". Indeed,
112	case 2: l\|=((unsigned long)(*((c)++)))<<16; \	88	* on RISCs you get punished with BUS ERROR signal or severe
113	} \	89	* performance degradation. Intel CPUs are in turn perfectly
114	}	90	* capable of loading unaligned data without such drastic side
		91	* effect. Yes, they say it's slower than aligned load, but no
		92	* exception is generated and therefore performance degradation
		93	* is incomparable with RISCs. What we should weight here is
		94	* costs of unaligned access against costs of aligning data.
		95	* According to my measurements allowing unaligned access results
		96	* in ~9% performance improvement on Pentium II operating at
		97	* 266MHz. I won't be surprised if the difference will be higher
		98	* on faster systems:-)
		99	*
		100	* <appro@fy.chalmers.se>
		101	*/
		102	#define md5_block_data_order md5_block_host_order
		103	#endif
115		104
116	#undef l2c	105	#define DATA_ORDER_IS_LITTLE_ENDIAN
117	#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \	106
118	*((c)++)=(unsigned char)(((l)>> 8)&0xff), \	107	#define HASH_LONG MD5_LONG
119	*((c)++)=(unsigned char)(((l)>>16)&0xff), \	108	#define HASH_LONG_LOG2 MD5_LONG_LOG2
120	*((c)++)=(unsigned char)(((l)>>24)&0xff))	109	#define HASH_CTX MD5_CTX
		110	#define HASH_CBLOCK MD5_CBLOCK
		111	#define HASH_LBLOCK MD5_LBLOCK
		112	#define HASH_UPDATE MD5_Update
		113	#define HASH_TRANSFORM MD5_Transform
		114	#define HASH_FINAL MD5_Final
		115	#define HASH_MAKE_STRING(c,s) do { \
		116	unsigned long ll; \
		117	ll=(c)->A; HOST_l2c(ll,(s)); \
		118	ll=(c)->B; HOST_l2c(ll,(s)); \
		119	ll=(c)->C; HOST_l2c(ll,(s)); \
		120	ll=(c)->D; HOST_l2c(ll,(s)); \
		121	} while (0)
		122	#define HASH_BLOCK_HOST_ORDER md5_block_host_order
		123	#if !defined(L_ENDIAN) \|\| defined(md5_block_data_order)
		124	#define HASH_BLOCK_DATA_ORDER md5_block_data_order
		125	/*
		126	* Little-endians (Intel and Alpha) feel better without this.
		127	* It looks like memcpy does better job than generic
		128	* md5_block_data_order on copying-n-aligning input data.
		129	* But frankly speaking I didn't expect such result on Alpha.
		130	* On the other hand I've got this with egcs-1.0.2 and if
		131	* program is compiled with another (better?) compiler it
		132	* might turn out other way around.
		133	*
		134	* <appro@fy.chalmers.se>
		135	*/
		136	#endif
121		137
122	/* NOTE - c is not incremented as per l2c */	138	#include "md32_common.h"
123	#undef l2cn
124	#define l2cn(l1,l2,c,n) { \
125	c+=n; \
126	switch (n) { \
127	case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
128	case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
129	case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
130	case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
131	case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
132	case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
133	case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
134	case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
135	} \
136	}
137		139
138	/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
139	#if defined(WIN32)
140	/* 5 instructions with rotate instruction, else 9 */
141	#define Endian_Reverse32(a) \
142	{ \
143	unsigned long l=(a); \
144	(a)=((ROTATE(l,8)&0x00FF00FF)\|(ROTATE(l,24)&0xFF00FF00)); \
145	}
146	#else
147	/* 6 instructions with rotate instruction, else 8 */
148	#define Endian_Reverse32(a) \
149	{ \
150	unsigned long l=(a); \
151	l=(((l&0xFF00FF00)>>8L)\|((l&0x00FF00FF)<<8L)); \
152	(a)=ROTATE(l,16L); \
153	}
154	#endif
155	/*	140	/*
156	#define F(x,y,z) (((x) & (y)) \| ((~(x)) & (z)))	141	#define F(x,y,z) (((x) & (y)) \| ((~(x)) & (z)))
157	#define G(x,y,z) (((x) & (z)) \| ((y) & (~(z))))	142	#define G(x,y,z) (((x) & (z)) \| ((y) & (~(z))))
158	*/	143	*/
159		144
160	/* As pointed out by Wei Dai <weidai@eskimo.com>, the above can be	145	/* As pointed out by Wei Dai <weidai@eskimo.com>, the above can be
161	* simplified to the code below. Wei attributes these optimisations	146	* simplified to the code below. Wei attributes these optimizations
162	* to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.	147	* to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
163	*/	148	*/
164	#define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))	149	#define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))
@@ -166,14 +151,6 @@
166	#define H(b,c,d) ((b) ^ (c) ^ (d))	151	#define H(b,c,d) ((b) ^ (c) ^ (d))
167	#define I(b,c,d) (((~(d)) \| (b)) ^ (c))	152	#define I(b,c,d) (((~(d)) \| (b)) ^ (c))
168		153
169	#undef ROTATE
170	#if defined(WIN32)
171	#define ROTATE(a,n) _lrotl(a,n)
172	#else
173	#define ROTATE(a,n) (((a)<<(n))\|(((a)&0xffffffff)>>(32-(n))))
174	#endif
175
176
177	#define R0(a,b,c,d,k,s,t) { \	154	#define R0(a,b,c,d,k,s,t) { \
178	a+=((k)+(t)+F((b),(c),(d))); \	155	a+=((k)+(t)+F((b),(c),(d))); \
179	a=ROTATE(a,s); \	156	a=ROTATE(a,s); \