diff options
Diffstat (limited to 'src/lib/libcrypto/rc4/rc4_enc.c')
| -rw-r--r-- | src/lib/libcrypto/rc4/rc4_enc.c | 194 |
1 files changed, 187 insertions, 7 deletions
diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c index ab8a111b52..d5f18a3a70 100644 --- a/src/lib/libcrypto/rc4/rc4_enc.c +++ b/src/lib/libcrypto/rc4/rc4_enc.c | |||
| @@ -56,7 +56,7 @@ | |||
| 56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
| 57 | */ | 57 | */ |
| 58 | 58 | ||
| 59 | #include "rc4.h" | 59 | #include <openssl/rc4.h> |
| 60 | #include "rc4_locl.h" | 60 | #include "rc4_locl.h" |
| 61 | 61 | ||
| 62 | /* RC4 as implemented from a posting from | 62 | /* RC4 as implemented from a posting from |
| @@ -67,11 +67,8 @@ | |||
| 67 | * Date: Wed, 14 Sep 1994 06:35:31 GMT | 67 | * Date: Wed, 14 Sep 1994 06:35:31 GMT |
| 68 | */ | 68 | */ |
| 69 | 69 | ||
| 70 | void RC4(key, len, indata, outdata) | 70 | void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, |
| 71 | RC4_KEY *key; | 71 | unsigned char *outdata) |
| 72 | unsigned long len; | ||
| 73 | unsigned char *indata; | ||
| 74 | unsigned char *outdata; | ||
| 75 | { | 72 | { |
| 76 | register RC4_INT *d; | 73 | register RC4_INT *d; |
| 77 | register RC4_INT x,y,tx,ty; | 74 | register RC4_INT x,y,tx,ty; |
| @@ -81,6 +78,190 @@ unsigned char *outdata; | |||
| 81 | y=key->y; | 78 | y=key->y; |
| 82 | d=key->data; | 79 | d=key->data; |
| 83 | 80 | ||
| 81 | #if defined(RC4_CHUNK) | ||
| 82 | /* | ||
| 83 | * The original reason for implementing this(*) was the fact that | ||
| 84 | * pre-21164a Alpha CPUs don't have byte load/store instructions | ||
| 85 | * and e.g. a byte store has to be done with 64-bit load, shift, | ||
| 86 | * and, or and finally 64-bit store. Peaking data and operating | ||
| 87 | * at natural word size made it possible to reduce amount of | ||
| 88 | * instructions as well as to perform early read-ahead without | ||
| 89 | * suffering from RAW (read-after-write) hazard. This resulted | ||
| 90 | * in ~40%(**) performance improvement on 21064 box with gcc. | ||
| 91 | * But it's not only Alpha users who win here:-) Thanks to the | ||
| 92 | * early-n-wide read-ahead this implementation also exhibits | ||
| 93 | * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending | ||
| 94 | * on sizeof(RC4_INT)). | ||
| 95 | * | ||
| 96 | * (*) "this" means code which recognizes the case when input | ||
| 97 | * and output pointers appear to be aligned at natural CPU | ||
| 98 | * word boundary | ||
| 99 | * (**) i.e. according to 'apps/openssl speed rc4' benchmark, | ||
| 100 | * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... | ||
| 101 | * | ||
| 102 | * Cavets. | ||
| 103 | * | ||
| 104 | * - RC4_CHUNK="unsigned long long" should be a #1 choice for | ||
| 105 | * UltraSPARC. Unfortunately gcc generates very slow code | ||
| 106 | * (2.5-3 times slower than one generated by Sun's WorkShop | ||
| 107 | * C) and therefore gcc (at least 2.95 and earlier) should | ||
| 108 | * always be told that RC4_CHUNK="unsigned long". | ||
| 109 | * | ||
| 110 | * <appro@fy.chalmers.se> | ||
| 111 | */ | ||
| 112 | |||
| 113 | # define RC4_STEP ( \ | ||
| 114 | x=(x+1) &0xff, \ | ||
| 115 | tx=d[x], \ | ||
| 116 | y=(tx+y)&0xff, \ | ||
| 117 | ty=d[y], \ | ||
| 118 | d[y]=tx, \ | ||
| 119 | d[x]=ty, \ | ||
| 120 | (RC4_CHUNK)d[(tx+ty)&0xff]\ | ||
| 121 | ) | ||
| 122 | |||
| 123 | if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | | ||
| 124 | ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) | ||
| 125 | { | ||
| 126 | RC4_CHUNK ichunk,otp; | ||
| 127 | const union { long one; char little; } is_endian = {1}; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * I reckon we can afford to implement both endian | ||
| 131 | * cases and to decide which way to take at run-time | ||
| 132 | * because the machine code appears to be very compact | ||
| 133 | * and redundant 1-2KB is perfectly tolerable (i.e. | ||
| 134 | * in case the compiler fails to eliminate it:-). By | ||
| 135 | * suggestion from Terrel Larson <terr@terralogic.net> | ||
| 136 | * who also stands for the is_endian union:-) | ||
| 137 | * | ||
| 138 | * Special notes. | ||
| 139 | * | ||
| 140 | * - is_endian is declared automatic as doing otherwise | ||
| 141 | * (declaring static) prevents gcc from eliminating | ||
| 142 | * the redundant code; | ||
| 143 | * - compilers (those I've tried) don't seem to have | ||
| 144 | * problems eliminating either the operators guarded | ||
| 145 | * by "if (sizeof(RC4_CHUNK)==8)" or the condition | ||
| 146 | * expressions themselves so I've got 'em to replace | ||
| 147 | * corresponding #ifdefs from the previous version; | ||
| 148 | * - I chose to let the redundant switch cases when | ||
| 149 | * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed | ||
| 150 | * before); | ||
| 151 | * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in | ||
| 152 | * [LB]ESHFT guards against "shift is out of range" | ||
| 153 | * warnings when sizeof(RC4_CHUNK)!=8 | ||
| 154 | * | ||
| 155 | * <appro@fy.chalmers.se> | ||
| 156 | */ | ||
| 157 | if (!is_endian.little) | ||
| 158 | { /* BIG-ENDIAN CASE */ | ||
| 159 | # define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
| 160 | for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) | ||
| 161 | { | ||
| 162 | ichunk = *(RC4_CHUNK *)indata; | ||
| 163 | otp = RC4_STEP<<BESHFT(0); | ||
| 164 | otp |= RC4_STEP<<BESHFT(1); | ||
| 165 | otp |= RC4_STEP<<BESHFT(2); | ||
| 166 | otp |= RC4_STEP<<BESHFT(3); | ||
| 167 | if (sizeof(RC4_CHUNK)==8) | ||
| 168 | { | ||
| 169 | otp |= RC4_STEP<<BESHFT(4); | ||
| 170 | otp |= RC4_STEP<<BESHFT(5); | ||
| 171 | otp |= RC4_STEP<<BESHFT(6); | ||
| 172 | otp |= RC4_STEP<<BESHFT(7); | ||
| 173 | } | ||
| 174 | *(RC4_CHUNK *)outdata = otp^ichunk; | ||
| 175 | indata += sizeof(RC4_CHUNK); | ||
| 176 | outdata += sizeof(RC4_CHUNK); | ||
| 177 | } | ||
| 178 | if (len) | ||
| 179 | { | ||
| 180 | RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; | ||
| 181 | |||
| 182 | ichunk = *(RC4_CHUNK *)indata; | ||
| 183 | ochunk = *(RC4_CHUNK *)outdata; | ||
| 184 | otp = 0; | ||
| 185 | i = BESHFT(0); | ||
| 186 | mask <<= (sizeof(RC4_CHUNK)-len)<<3; | ||
| 187 | switch (len&(sizeof(RC4_CHUNK)-1)) | ||
| 188 | { | ||
| 189 | case 7: otp = RC4_STEP<<i, i-=8; | ||
| 190 | case 6: otp |= RC4_STEP<<i, i-=8; | ||
| 191 | case 5: otp |= RC4_STEP<<i, i-=8; | ||
| 192 | case 4: otp |= RC4_STEP<<i, i-=8; | ||
| 193 | case 3: otp |= RC4_STEP<<i, i-=8; | ||
| 194 | case 2: otp |= RC4_STEP<<i, i-=8; | ||
| 195 | case 1: otp |= RC4_STEP<<i, i-=8; | ||
| 196 | case 0: ; /* | ||
| 197 | * it's never the case, | ||
| 198 | * but it has to be here | ||
| 199 | * for ultrix? | ||
| 200 | */ | ||
| 201 | } | ||
| 202 | ochunk &= ~mask; | ||
| 203 | ochunk |= (otp^ichunk) & mask; | ||
| 204 | *(RC4_CHUNK *)outdata = ochunk; | ||
| 205 | } | ||
| 206 | key->x=x; | ||
| 207 | key->y=y; | ||
| 208 | return; | ||
| 209 | } | ||
| 210 | else | ||
| 211 | { /* LITTLE-ENDIAN CASE */ | ||
| 212 | # define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
| 213 | for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) | ||
| 214 | { | ||
| 215 | ichunk = *(RC4_CHUNK *)indata; | ||
| 216 | otp = RC4_STEP; | ||
| 217 | otp |= RC4_STEP<<8; | ||
| 218 | otp |= RC4_STEP<<16; | ||
| 219 | otp |= RC4_STEP<<24; | ||
| 220 | if (sizeof(RC4_CHUNK)==8) | ||
| 221 | { | ||
| 222 | otp |= RC4_STEP<<LESHFT(4); | ||
| 223 | otp |= RC4_STEP<<LESHFT(5); | ||
| 224 | otp |= RC4_STEP<<LESHFT(6); | ||
| 225 | otp |= RC4_STEP<<LESHFT(7); | ||
| 226 | } | ||
| 227 | *(RC4_CHUNK *)outdata = otp^ichunk; | ||
| 228 | indata += sizeof(RC4_CHUNK); | ||
| 229 | outdata += sizeof(RC4_CHUNK); | ||
| 230 | } | ||
| 231 | if (len) | ||
| 232 | { | ||
| 233 | RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; | ||
| 234 | |||
| 235 | ichunk = *(RC4_CHUNK *)indata; | ||
| 236 | ochunk = *(RC4_CHUNK *)outdata; | ||
| 237 | otp = 0; | ||
| 238 | i = 0; | ||
| 239 | mask >>= (sizeof(RC4_CHUNK)-len)<<3; | ||
| 240 | switch (len&(sizeof(RC4_CHUNK)-1)) | ||
| 241 | { | ||
| 242 | case 7: otp = RC4_STEP, i+=8; | ||
| 243 | case 6: otp |= RC4_STEP<<i, i+=8; | ||
| 244 | case 5: otp |= RC4_STEP<<i, i+=8; | ||
| 245 | case 4: otp |= RC4_STEP<<i, i+=8; | ||
| 246 | case 3: otp |= RC4_STEP<<i, i+=8; | ||
| 247 | case 2: otp |= RC4_STEP<<i, i+=8; | ||
| 248 | case 1: otp |= RC4_STEP<<i, i+=8; | ||
| 249 | case 0: ; /* | ||
| 250 | * it's never the case, | ||
| 251 | * but it has to be here | ||
| 252 | * for ultrix? | ||
| 253 | */ | ||
| 254 | } | ||
| 255 | ochunk &= ~mask; | ||
| 256 | ochunk |= (otp^ichunk) & mask; | ||
| 257 | *(RC4_CHUNK *)outdata = ochunk; | ||
| 258 | } | ||
| 259 | key->x=x; | ||
| 260 | key->y=y; | ||
| 261 | return; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | #endif | ||
| 84 | #define LOOP(in,out) \ | 265 | #define LOOP(in,out) \ |
| 85 | x=((x+1)&0xff); \ | 266 | x=((x+1)&0xff); \ |
| 86 | tx=d[x]; \ | 267 | tx=d[x]; \ |
| @@ -95,7 +276,6 @@ unsigned char *outdata; | |||
| 95 | #define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) | 276 | #define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) |
| 96 | #endif | 277 | #endif |
| 97 | 278 | ||
| 98 | i= -(int)len; | ||
| 99 | i=(int)(len>>3L); | 279 | i=(int)(len>>3L); |
| 100 | if (i) | 280 | if (i) |
| 101 | { | 281 | { |
