From 796d609550df3a33fc11468741c5d2f6d3df4c11 Mon Sep 17 00:00:00 2001 From: beck <> Date: Sun, 19 Mar 2000 11:13:58 +0000 Subject: OpenSSL 0.9.5 merge *warning* this bumps shared lib minors for libssl and libcrypto from 2.1 to 2.2 if you are using the ssl26 packages for ssh and other things to work you will need to get new ones (see ~beck/libsslsnap/) on cvs or ~beck/src-patent.tar.gz on cvs --- src/lib/libcrypto/rc4/rc4_enc.c | 186 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 1 deletion(-) (limited to 'src/lib/libcrypto/rc4/rc4_enc.c') diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c index 3256bea8cc..d5f18a3a70 100644 --- a/src/lib/libcrypto/rc4/rc4_enc.c +++ b/src/lib/libcrypto/rc4/rc4_enc.c @@ -67,7 +67,7 @@ * Date: Wed, 14 Sep 1994 06:35:31 GMT */ -void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, +void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, unsigned char *outdata) { register RC4_INT *d; @@ -78,6 +78,190 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, y=key->y; d=key->data; +#if defined(RC4_CHUNK) + /* + * The original reason for implementing this(*) was the fact that + * pre-21164a Alpha CPUs don't have byte load/store instructions + * and e.g. a byte store has to be done with 64-bit load, shift, + * and, or and finally 64-bit store. Peaking data and operating + * at natural word size made it possible to reduce amount of + * instructions as well as to perform early read-ahead without + * suffering from RAW (read-after-write) hazard. This resulted + * in ~40%(**) performance improvement on 21064 box with gcc. + * But it's not only Alpha users who win here:-) Thanks to the + * early-n-wide read-ahead this implementation also exhibits + * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending + * on sizeof(RC4_INT)). + * + * (*) "this" means code which recognizes the case when input + * and output pointers appear to be aligned at natural CPU + * word boundary + * (**) i.e. according to 'apps/openssl speed rc4' benchmark, + * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... + * + * Cavets. + * + * - RC4_CHUNK="unsigned long long" should be a #1 choice for + * UltraSPARC. Unfortunately gcc generates very slow code + * (2.5-3 times slower than one generated by Sun's WorkShop + * C) and therefore gcc (at least 2.95 and earlier) should + * always be told that RC4_CHUNK="unsigned long". + * + * + */ + +# define RC4_STEP ( \ + x=(x+1) &0xff, \ + tx=d[x], \ + y=(tx+y)&0xff, \ + ty=d[y], \ + d[y]=tx, \ + d[x]=ty, \ + (RC4_CHUNK)d[(tx+ty)&0xff]\ + ) + + if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | + ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) + { + RC4_CHUNK ichunk,otp; + const union { long one; char little; } is_endian = {1}; + + /* + * I reckon we can afford to implement both endian + * cases and to decide which way to take at run-time + * because the machine code appears to be very compact + * and redundant 1-2KB is perfectly tolerable (i.e. + * in case the compiler fails to eliminate it:-). By + * suggestion from Terrel Larson + * who also stands for the is_endian union:-) + * + * Special notes. + * + * - is_endian is declared automatic as doing otherwise + * (declaring static) prevents gcc from eliminating + * the redundant code; + * - compilers (those I've tried) don't seem to have + * problems eliminating either the operators guarded + * by "if (sizeof(RC4_CHUNK)==8)" or the condition + * expressions themselves so I've got 'em to replace + * corresponding #ifdefs from the previous version; + * - I chose to let the redundant switch cases when + * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed + * before); + * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in + * [LB]ESHFT guards against "shift is out of range" + * warnings when sizeof(RC4_CHUNK)!=8 + * + * + */ + if (!is_endian.little) + { /* BIG-ENDIAN CASE */ +# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) + for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) + { + ichunk = *(RC4_CHUNK *)indata; + otp = RC4_STEP<x=x; + key->y=y; + return; + } + else + { /* LITTLE-ENDIAN CASE */ +# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) + for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) + { + ichunk = *(RC4_CHUNK *)indata; + otp = RC4_STEP; + otp |= RC4_STEP<<8; + otp |= RC4_STEP<<16; + otp |= RC4_STEP<<24; + if (sizeof(RC4_CHUNK)==8) + { + otp |= RC4_STEP<>= (sizeof(RC4_CHUNK)-len)<<3; + switch (len&(sizeof(RC4_CHUNK)-1)) + { + case 7: otp = RC4_STEP, i+=8; + case 6: otp |= RC4_STEP<x=x; + key->y=y; + return; + } + } +#endif #define LOOP(in,out) \ x=((x+1)&0xff); \ tx=d[x]; \ -- cgit v1.2.3-55-g6feb