1 files changed, 43 insertions, 1 deletions
diff --git a/src/lib/libcrypto/md4/md4_locl.h b/src/lib/libcrypto/md4/md4_locl.h
index c8085b0ead..a8d31d7a73 100644
--- a/src/lib/libcrypto/md4/md4_locl.h
+++ b/src/lib/libcrypto/md4/md4_locl.h
@@ -65,13 +65,41 @@
 #define MD4_LONG_LOG2 2 /* default to 32 bits */
 #endif
-void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
+void md4_block_host_order (MD4_CTX *c, const void *p,int num);
+void md4_block_data_order (MD4_CTX *c, const void *p,int num);
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
+/*
+ * *_block_host_order is expected to handle aligned data while
+ * *_block_data_order - unaligned. As algorithm and host (x86)
+ * are in this case of the same "endianness" these two are
+ * otherwise indistinguishable. But normally you don't want to
+ * call the same function because unaligned access in places
+ * where alignment is expected is usually a "Bad Thing". Indeed,
+ * on RISCs you get punished with BUS ERROR signal or *severe*
+ * performance degradation. Intel CPUs are in turn perfectly
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ *                              <appro@fy.chalmers.se>
+ */
+#define md4_block_data_order md4_block_host_order
+#endif
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 #define HASH_LONG               MD4_LONG
+#define HASH_LONG_LOG2          MD4_LONG_LOG2
 #define HASH_CTX                MD4_CTX
 #define HASH_CBLOCK             MD4_CBLOCK
+#define HASH_LBLOCK             MD4_LBLOCK
 #define HASH_UPDATE             MD4_Update
 #define HASH_TRANSFORM          MD4_Transform
 #define HASH_FINAL              MD4_Final
@@ -82,7 +110,21 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
        ll=(c)->C; HOST_l2c(ll,(s));    \
        ll=(c)->D; HOST_l2c(ll,(s));    \
        } while (0)
+#define HASH_BLOCK_HOST_ORDER   md4_block_host_order
+#if !defined(L_ENDIAN) || defined(md4_block_data_order)
 #define HASH_BLOCK_DATA_ORDER   md4_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md4_block_data_order on copying-n-aligning input data.
+ * But frankly speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ *                              <appro@fy.chalmers.se>
+ */
+#endif
 #include "md32_common.h"

diff --git a/src/lib/libcrypto/md4/md4_locl.h b/src/lib/libcrypto/md4/md4_locl.h index c8085b0ead..a8d31d7a73 100644 --- a/src/lib/libcrypto/md4/md4_locl.h +++ b/src/lib/libcrypto/md4/md4_locl.h
@@ -65,13 +65,41 @@
65	#define MD4_LONG_LOG2 2 /* default to 32 bits */	65	#define MD4_LONG_LOG2 2 /* default to 32 bits */
66	#endif	66	#endif
67		67
68	void md4_block_data_order (MD4_CTX c, const void p,size_t num);	68	void md4_block_host_order (MD4_CTX c, const void p,int num);
		69	void md4_block_data_order (MD4_CTX c, const void p,int num);
		70
		71	#if defined(__i386) \|\| defined(__i386__) \|\| defined(_M_IX86) \|\| defined(__INTEL__)
		72	/*
		73	* *_block_host_order is expected to handle aligned data while
		74	* *_block_data_order - unaligned. As algorithm and host (x86)
		75	* are in this case of the same "endianness" these two are
		76	* otherwise indistinguishable. But normally you don't want to
		77	* call the same function because unaligned access in places
		78	* where alignment is expected is usually a "Bad Thing". Indeed,
		79	* on RISCs you get punished with BUS ERROR signal or severe
		80	* performance degradation. Intel CPUs are in turn perfectly
		81	* capable of loading unaligned data without such drastic side
		82	* effect. Yes, they say it's slower than aligned load, but no
		83	* exception is generated and therefore performance degradation
		84	* is incomparable with RISCs. What we should weight here is
		85	* costs of unaligned access against costs of aligning data.
		86	* According to my measurements allowing unaligned access results
		87	* in ~9% performance improvement on Pentium II operating at
		88	* 266MHz. I won't be surprised if the difference will be higher
		89	* on faster systems:-)
		90	*
		91	* <appro@fy.chalmers.se>
		92	*/
		93	#define md4_block_data_order md4_block_host_order
		94	#endif
69		95
70	#define DATA_ORDER_IS_LITTLE_ENDIAN	96	#define DATA_ORDER_IS_LITTLE_ENDIAN
71		97
72	#define HASH_LONG MD4_LONG	98	#define HASH_LONG MD4_LONG
		99	#define HASH_LONG_LOG2 MD4_LONG_LOG2
73	#define HASH_CTX MD4_CTX	100	#define HASH_CTX MD4_CTX
74	#define HASH_CBLOCK MD4_CBLOCK	101	#define HASH_CBLOCK MD4_CBLOCK
		102	#define HASH_LBLOCK MD4_LBLOCK
75	#define HASH_UPDATE MD4_Update	103	#define HASH_UPDATE MD4_Update
76	#define HASH_TRANSFORM MD4_Transform	104	#define HASH_TRANSFORM MD4_Transform
77	#define HASH_FINAL MD4_Final	105	#define HASH_FINAL MD4_Final
@@ -82,7 +110,21 @@ void md4_block_data_order (MD4_CTX c, const void p,size_t num);
82	ll=(c)->C; HOST_l2c(ll,(s)); \	110	ll=(c)->C; HOST_l2c(ll,(s)); \
83	ll=(c)->D; HOST_l2c(ll,(s)); \	111	ll=(c)->D; HOST_l2c(ll,(s)); \
84	} while (0)	112	} while (0)
		113	#define HASH_BLOCK_HOST_ORDER md4_block_host_order
		114	#if !defined(L_ENDIAN) \|\| defined(md4_block_data_order)
85	#define HASH_BLOCK_DATA_ORDER md4_block_data_order	115	#define HASH_BLOCK_DATA_ORDER md4_block_data_order
		116	/*
		117	* Little-endians (Intel and Alpha) feel better without this.
		118	* It looks like memcpy does better job than generic
		119	* md4_block_data_order on copying-n-aligning input data.
		120	* But frankly speaking I didn't expect such result on Alpha.
		121	* On the other hand I've got this with egcs-1.0.2 and if
		122	* program is compiled with another (better?) compiler it
		123	* might turn out other way around.
		124	*
		125	* <appro@fy.chalmers.se>
		126	*/
		127	#endif
86		128
87	#include "md32_common.h"	129	#include "md32_common.h"
88		130