diff options
author | beck <> | 2000-03-19 11:13:58 +0000 |
---|---|---|
committer | beck <> | 2000-03-19 11:13:58 +0000 |
commit | 796d609550df3a33fc11468741c5d2f6d3df4c11 (patch) | |
tree | 6c6d539061caa20372dad0ac4ddb1dfae2fbe7fe /src/lib/libcrypto/rc4/rc4_enc.c | |
parent | 5be3114c1fd7e0dfea1e38d3abb4cbba75244419 (diff) | |
download | openbsd-796d609550df3a33fc11468741c5d2f6d3df4c11.tar.gz openbsd-796d609550df3a33fc11468741c5d2f6d3df4c11.tar.bz2 openbsd-796d609550df3a33fc11468741c5d2f6d3df4c11.zip |
OpenSSL 0.9.5 merge
*warning* this bumps shared lib minors for libssl and libcrypto from 2.1 to 2.2
if you are using the ssl26 packages for ssh and other things to work you will
need to get new ones (see ~beck/libsslsnap/<arch>) on cvs or ~beck/src-patent.tar.gz on cvs
Diffstat (limited to 'src/lib/libcrypto/rc4/rc4_enc.c')
-rw-r--r-- | src/lib/libcrypto/rc4/rc4_enc.c | 186 |
1 files changed, 185 insertions, 1 deletions
diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c index 3256bea8cc..d5f18a3a70 100644 --- a/src/lib/libcrypto/rc4/rc4_enc.c +++ b/src/lib/libcrypto/rc4/rc4_enc.c | |||
@@ -67,7 +67,7 @@ | |||
67 | * Date: Wed, 14 Sep 1994 06:35:31 GMT | 67 | * Date: Wed, 14 Sep 1994 06:35:31 GMT |
68 | */ | 68 | */ |
69 | 69 | ||
70 | void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, | 70 | void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, |
71 | unsigned char *outdata) | 71 | unsigned char *outdata) |
72 | { | 72 | { |
73 | register RC4_INT *d; | 73 | register RC4_INT *d; |
@@ -78,6 +78,190 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, | |||
78 | y=key->y; | 78 | y=key->y; |
79 | d=key->data; | 79 | d=key->data; |
80 | 80 | ||
81 | #if defined(RC4_CHUNK) | ||
82 | /* | ||
83 | * The original reason for implementing this(*) was the fact that | ||
84 | * pre-21164a Alpha CPUs don't have byte load/store instructions | ||
85 | * and e.g. a byte store has to be done with 64-bit load, shift, | ||
86 | * and, or and finally 64-bit store. Peaking data and operating | ||
87 | * at natural word size made it possible to reduce amount of | ||
88 | * instructions as well as to perform early read-ahead without | ||
89 | * suffering from RAW (read-after-write) hazard. This resulted | ||
90 | * in ~40%(**) performance improvement on 21064 box with gcc. | ||
91 | * But it's not only Alpha users who win here:-) Thanks to the | ||
92 | * early-n-wide read-ahead this implementation also exhibits | ||
93 | * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending | ||
94 | * on sizeof(RC4_INT)). | ||
95 | * | ||
96 | * (*) "this" means code which recognizes the case when input | ||
97 | * and output pointers appear to be aligned at natural CPU | ||
98 | * word boundary | ||
99 | * (**) i.e. according to 'apps/openssl speed rc4' benchmark, | ||
100 | * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... | ||
101 | * | ||
102 | * Cavets. | ||
103 | * | ||
104 | * - RC4_CHUNK="unsigned long long" should be a #1 choice for | ||
105 | * UltraSPARC. Unfortunately gcc generates very slow code | ||
106 | * (2.5-3 times slower than one generated by Sun's WorkShop | ||
107 | * C) and therefore gcc (at least 2.95 and earlier) should | ||
108 | * always be told that RC4_CHUNK="unsigned long". | ||
109 | * | ||
110 | * <appro@fy.chalmers.se> | ||
111 | */ | ||
112 | |||
113 | # define RC4_STEP ( \ | ||
114 | x=(x+1) &0xff, \ | ||
115 | tx=d[x], \ | ||
116 | y=(tx+y)&0xff, \ | ||
117 | ty=d[y], \ | ||
118 | d[y]=tx, \ | ||
119 | d[x]=ty, \ | ||
120 | (RC4_CHUNK)d[(tx+ty)&0xff]\ | ||
121 | ) | ||
122 | |||
123 | if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | | ||
124 | ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 ) | ||
125 | { | ||
126 | RC4_CHUNK ichunk,otp; | ||
127 | const union { long one; char little; } is_endian = {1}; | ||
128 | |||
129 | /* | ||
130 | * I reckon we can afford to implement both endian | ||
131 | * cases and to decide which way to take at run-time | ||
132 | * because the machine code appears to be very compact | ||
133 | * and redundant 1-2KB is perfectly tolerable (i.e. | ||
134 | * in case the compiler fails to eliminate it:-). By | ||
135 | * suggestion from Terrel Larson <terr@terralogic.net> | ||
136 | * who also stands for the is_endian union:-) | ||
137 | * | ||
138 | * Special notes. | ||
139 | * | ||
140 | * - is_endian is declared automatic as doing otherwise | ||
141 | * (declaring static) prevents gcc from eliminating | ||
142 | * the redundant code; | ||
143 | * - compilers (those I've tried) don't seem to have | ||
144 | * problems eliminating either the operators guarded | ||
145 | * by "if (sizeof(RC4_CHUNK)==8)" or the condition | ||
146 | * expressions themselves so I've got 'em to replace | ||
147 | * corresponding #ifdefs from the previous version; | ||
148 | * - I chose to let the redundant switch cases when | ||
149 | * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed | ||
150 | * before); | ||
151 | * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in | ||
152 | * [LB]ESHFT guards against "shift is out of range" | ||
153 | * warnings when sizeof(RC4_CHUNK)!=8 | ||
154 | * | ||
155 | * <appro@fy.chalmers.se> | ||
156 | */ | ||
157 | if (!is_endian.little) | ||
158 | { /* BIG-ENDIAN CASE */ | ||
159 | # define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
160 | for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) | ||
161 | { | ||
162 | ichunk = *(RC4_CHUNK *)indata; | ||
163 | otp = RC4_STEP<<BESHFT(0); | ||
164 | otp |= RC4_STEP<<BESHFT(1); | ||
165 | otp |= RC4_STEP<<BESHFT(2); | ||
166 | otp |= RC4_STEP<<BESHFT(3); | ||
167 | if (sizeof(RC4_CHUNK)==8) | ||
168 | { | ||
169 | otp |= RC4_STEP<<BESHFT(4); | ||
170 | otp |= RC4_STEP<<BESHFT(5); | ||
171 | otp |= RC4_STEP<<BESHFT(6); | ||
172 | otp |= RC4_STEP<<BESHFT(7); | ||
173 | } | ||
174 | *(RC4_CHUNK *)outdata = otp^ichunk; | ||
175 | indata += sizeof(RC4_CHUNK); | ||
176 | outdata += sizeof(RC4_CHUNK); | ||
177 | } | ||
178 | if (len) | ||
179 | { | ||
180 | RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; | ||
181 | |||
182 | ichunk = *(RC4_CHUNK *)indata; | ||
183 | ochunk = *(RC4_CHUNK *)outdata; | ||
184 | otp = 0; | ||
185 | i = BESHFT(0); | ||
186 | mask <<= (sizeof(RC4_CHUNK)-len)<<3; | ||
187 | switch (len&(sizeof(RC4_CHUNK)-1)) | ||
188 | { | ||
189 | case 7: otp = RC4_STEP<<i, i-=8; | ||
190 | case 6: otp |= RC4_STEP<<i, i-=8; | ||
191 | case 5: otp |= RC4_STEP<<i, i-=8; | ||
192 | case 4: otp |= RC4_STEP<<i, i-=8; | ||
193 | case 3: otp |= RC4_STEP<<i, i-=8; | ||
194 | case 2: otp |= RC4_STEP<<i, i-=8; | ||
195 | case 1: otp |= RC4_STEP<<i, i-=8; | ||
196 | case 0: ; /* | ||
197 | * it's never the case, | ||
198 | * but it has to be here | ||
199 | * for ultrix? | ||
200 | */ | ||
201 | } | ||
202 | ochunk &= ~mask; | ||
203 | ochunk |= (otp^ichunk) & mask; | ||
204 | *(RC4_CHUNK *)outdata = ochunk; | ||
205 | } | ||
206 | key->x=x; | ||
207 | key->y=y; | ||
208 | return; | ||
209 | } | ||
210 | else | ||
211 | { /* LITTLE-ENDIAN CASE */ | ||
212 | # define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
213 | for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) | ||
214 | { | ||
215 | ichunk = *(RC4_CHUNK *)indata; | ||
216 | otp = RC4_STEP; | ||
217 | otp |= RC4_STEP<<8; | ||
218 | otp |= RC4_STEP<<16; | ||
219 | otp |= RC4_STEP<<24; | ||
220 | if (sizeof(RC4_CHUNK)==8) | ||
221 | { | ||
222 | otp |= RC4_STEP<<LESHFT(4); | ||
223 | otp |= RC4_STEP<<LESHFT(5); | ||
224 | otp |= RC4_STEP<<LESHFT(6); | ||
225 | otp |= RC4_STEP<<LESHFT(7); | ||
226 | } | ||
227 | *(RC4_CHUNK *)outdata = otp^ichunk; | ||
228 | indata += sizeof(RC4_CHUNK); | ||
229 | outdata += sizeof(RC4_CHUNK); | ||
230 | } | ||
231 | if (len) | ||
232 | { | ||
233 | RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk; | ||
234 | |||
235 | ichunk = *(RC4_CHUNK *)indata; | ||
236 | ochunk = *(RC4_CHUNK *)outdata; | ||
237 | otp = 0; | ||
238 | i = 0; | ||
239 | mask >>= (sizeof(RC4_CHUNK)-len)<<3; | ||
240 | switch (len&(sizeof(RC4_CHUNK)-1)) | ||
241 | { | ||
242 | case 7: otp = RC4_STEP, i+=8; | ||
243 | case 6: otp |= RC4_STEP<<i, i+=8; | ||
244 | case 5: otp |= RC4_STEP<<i, i+=8; | ||
245 | case 4: otp |= RC4_STEP<<i, i+=8; | ||
246 | case 3: otp |= RC4_STEP<<i, i+=8; | ||
247 | case 2: otp |= RC4_STEP<<i, i+=8; | ||
248 | case 1: otp |= RC4_STEP<<i, i+=8; | ||
249 | case 0: ; /* | ||
250 | * it's never the case, | ||
251 | * but it has to be here | ||
252 | * for ultrix? | ||
253 | */ | ||
254 | } | ||
255 | ochunk &= ~mask; | ||
256 | ochunk |= (otp^ichunk) & mask; | ||
257 | *(RC4_CHUNK *)outdata = ochunk; | ||
258 | } | ||
259 | key->x=x; | ||
260 | key->y=y; | ||
261 | return; | ||
262 | } | ||
263 | } | ||
264 | #endif | ||
81 | #define LOOP(in,out) \ | 265 | #define LOOP(in,out) \ |
82 | x=((x+1)&0xff); \ | 266 | x=((x+1)&0xff); \ |
83 | tx=d[x]; \ | 267 | tx=d[x]; \ |