From 38ce604e3cc97706b876b0525ddff0121115456d Mon Sep 17 00:00:00 2001
From: djm <>
Date: Sat, 6 Sep 2008 12:17:54 +0000
Subject: resolve conflicts

---
 src/lib/libcrypto/rc4/rc4_skey.c | 55 +++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 12 deletions(-)

(limited to 'src/lib/libcrypto/rc4/rc4_skey.c')

diff --git a/src/lib/libcrypto/rc4/rc4_skey.c b/src/lib/libcrypto/rc4/rc4_skey.c
index 60510624fd..46b77ec321 100644
--- a/src/lib/libcrypto/rc4/rc4_skey.c
+++ b/src/lib/libcrypto/rc4/rc4_skey.c
@@ -57,12 +57,10 @@
  */
 
 #include <openssl/rc4.h>
-#include <openssl/crypto.h>
-#include <openssl/fips.h>
 #include "rc4_locl.h"
 #include <openssl/opensslv.h>
 
-const char *RC4_version="RC4" OPENSSL_VERSION_PTEXT;
+const char RC4_version[]="RC4" OPENSSL_VERSION_PTEXT;
 
 const char *RC4_options(void)
 	{
@@ -87,7 +85,7 @@ const char *RC4_options(void)
  * Date: Wed, 14 Sep 1994 06:35:31 GMT
  */
 
-FIPS_NON_FIPS_VCIPHER_Init(RC4)
+void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
 	{
         register RC4_INT tmp;
         register int id1,id2;
@@ -95,26 +93,59 @@ FIPS_NON_FIPS_VCIPHER_Init(RC4)
         unsigned int i;
         
         d= &(key->data[0]);
-
-	for (i=0; i<256; i++)
-		d[i]=i;
         key->x = 0;     
         key->y = 0;     
         id1=id2=0;     
 
-#define SK_LOOP(n) { \
+#define SK_LOOP(d,n) { \
 		tmp=d[(n)]; \
 		id2 = (data[id1] + tmp + id2) & 0xff; \
 		if (++id1 == len) id1=0; \
 		d[(n)]=d[id2]; \
 		d[id2]=tmp; }
 
+#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
+# if	defined(__i386)   || defined(__i386__)   || defined(_M_IX86) || \
+	defined(__INTEL__) || \
+	defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64)
+	if (sizeof(RC4_INT) > 1) {
+		/*
+		 * Unlike all other x86 [and x86_64] implementations,
+		 * Intel P4 core [including EM64T] was found to perform
+		 * poorly with wider RC4_INT. Performance improvement
+		 * for IA-32 hand-coded assembler turned out to be 2.8x
+		 * if re-coded for RC4_CHAR! It's however inappropriate
+		 * to just switch to RC4_CHAR for x86[_64], as non-P4
+		 * implementations suffer from significant performance
+		 * losses then, e.g. PIII exhibits >2x deterioration,
+		 * and so does Opteron. In order to assure optimal
+		 * all-round performance, we detect P4 at run-time by
+		 * checking upon reserved bit 20 in CPU capability
+		 * vector and set up compressed key schedule, which is
+		 * recognized by correspondingly updated assembler
+		 * module... Bit 20 is set up by OPENSSL_ia32_cpuid.
+		 *
+		 *				<appro@fy.chalmers.se>
+		 */
+		if (OPENSSL_ia32cap_P & (1<<20)) {
+			unsigned char *cp=(unsigned char *)d;
+
+			for (i=0;i<256;i++) cp[i]=i;
+			for (i=0;i<256;i++) SK_LOOP(cp,i);
+			/* mark schedule as compressed! */
+			d[256/sizeof(RC4_INT)]=-1;
+			return;
+		}
+	}
+# endif
+#endif
+	for (i=0; i < 256; i++) d[i]=i;
 	for (i=0; i < 256; i+=4)
 		{
-		SK_LOOP(i+0);
-		SK_LOOP(i+1);
-		SK_LOOP(i+2);
-		SK_LOOP(i+3);
+		SK_LOOP(d,i+0);
+		SK_LOOP(d,i+1);
+		SK_LOOP(d,i+2);
+		SK_LOOP(d,i+3);
 		}
 	}
     
-- 
cgit v1.2.3-55-g6feb