summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/rc4/rc4_enc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/rc4/rc4_enc.c')
-rw-r--r--src/lib/libcrypto/rc4/rc4_enc.c186
1 files changed, 185 insertions, 1 deletions
diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c
index 3256bea8cc..d5f18a3a70 100644
--- a/src/lib/libcrypto/rc4/rc4_enc.c
+++ b/src/lib/libcrypto/rc4/rc4_enc.c
@@ -67,7 +67,7 @@
67 * Date: Wed, 14 Sep 1994 06:35:31 GMT 67 * Date: Wed, 14 Sep 1994 06:35:31 GMT
68 */ 68 */
69 69
70void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, 70void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata,
71 unsigned char *outdata) 71 unsigned char *outdata)
72 { 72 {
73 register RC4_INT *d; 73 register RC4_INT *d;
@@ -78,6 +78,190 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata,
78 y=key->y; 78 y=key->y;
79 d=key->data; 79 d=key->data;
80 80
81#if defined(RC4_CHUNK)
82 /*
83 * The original reason for implementing this(*) was the fact that
84 * pre-21164a Alpha CPUs don't have byte load/store instructions
85 * and e.g. a byte store has to be done with 64-bit load, shift,
86 * and, or and finally 64-bit store. Peaking data and operating
87 * at natural word size made it possible to reduce amount of
88 * instructions as well as to perform early read-ahead without
89 * suffering from RAW (read-after-write) hazard. This resulted
90 * in ~40%(**) performance improvement on 21064 box with gcc.
91 * But it's not only Alpha users who win here:-) Thanks to the
92 * early-n-wide read-ahead this implementation also exhibits
93 * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
94 * on sizeof(RC4_INT)).
95 *
96 * (*) "this" means code which recognizes the case when input
97 * and output pointers appear to be aligned at natural CPU
98 * word boundary
99 * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
100 * crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
101 *
102 * Cavets.
103 *
104 * - RC4_CHUNK="unsigned long long" should be a #1 choice for
105 * UltraSPARC. Unfortunately gcc generates very slow code
106 * (2.5-3 times slower than one generated by Sun's WorkShop
107 * C) and therefore gcc (at least 2.95 and earlier) should
108 * always be told that RC4_CHUNK="unsigned long".
109 *
110 * <appro@fy.chalmers.se>
111 */
112
113# define RC4_STEP ( \
114 x=(x+1) &0xff, \
115 tx=d[x], \
116 y=(tx+y)&0xff, \
117 ty=d[y], \
118 d[y]=tx, \
119 d[x]=ty, \
120 (RC4_CHUNK)d[(tx+ty)&0xff]\
121 )
122
123 if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) |
124 ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 )
125 {
126 RC4_CHUNK ichunk,otp;
127 const union { long one; char little; } is_endian = {1};
128
129 /*
130 * I reckon we can afford to implement both endian
131 * cases and to decide which way to take at run-time
132 * because the machine code appears to be very compact
133 * and redundant 1-2KB is perfectly tolerable (i.e.
134 * in case the compiler fails to eliminate it:-). By
135 * suggestion from Terrel Larson <terr@terralogic.net>
136 * who also stands for the is_endian union:-)
137 *
138 * Special notes.
139 *
140 * - is_endian is declared automatic as doing otherwise
141 * (declaring static) prevents gcc from eliminating
142 * the redundant code;
143 * - compilers (those I've tried) don't seem to have
144 * problems eliminating either the operators guarded
145 * by "if (sizeof(RC4_CHUNK)==8)" or the condition
146 * expressions themselves so I've got 'em to replace
147 * corresponding #ifdefs from the previous version;
148 * - I chose to let the redundant switch cases when
149 * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
150 * before);
151 * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
152 * [LB]ESHFT guards against "shift is out of range"
153 * warnings when sizeof(RC4_CHUNK)!=8
154 *
155 * <appro@fy.chalmers.se>
156 */
157 if (!is_endian.little)
158 { /* BIG-ENDIAN CASE */
159# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
160 for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK))
161 {
162 ichunk = *(RC4_CHUNK *)indata;
163 otp = RC4_STEP<<BESHFT(0);
164 otp |= RC4_STEP<<BESHFT(1);
165 otp |= RC4_STEP<<BESHFT(2);
166 otp |= RC4_STEP<<BESHFT(3);
167 if (sizeof(RC4_CHUNK)==8)
168 {
169 otp |= RC4_STEP<<BESHFT(4);
170 otp |= RC4_STEP<<BESHFT(5);
171 otp |= RC4_STEP<<BESHFT(6);
172 otp |= RC4_STEP<<BESHFT(7);
173 }
174 *(RC4_CHUNK *)outdata = otp^ichunk;
175 indata += sizeof(RC4_CHUNK);
176 outdata += sizeof(RC4_CHUNK);
177 }
178 if (len)
179 {
180 RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
181
182 ichunk = *(RC4_CHUNK *)indata;
183 ochunk = *(RC4_CHUNK *)outdata;
184 otp = 0;
185 i = BESHFT(0);
186 mask <<= (sizeof(RC4_CHUNK)-len)<<3;
187 switch (len&(sizeof(RC4_CHUNK)-1))
188 {
189 case 7: otp = RC4_STEP<<i, i-=8;
190 case 6: otp |= RC4_STEP<<i, i-=8;
191 case 5: otp |= RC4_STEP<<i, i-=8;
192 case 4: otp |= RC4_STEP<<i, i-=8;
193 case 3: otp |= RC4_STEP<<i, i-=8;
194 case 2: otp |= RC4_STEP<<i, i-=8;
195 case 1: otp |= RC4_STEP<<i, i-=8;
196 case 0: ; /*
197 * it's never the case,
198 * but it has to be here
199 * for ultrix?
200 */
201 }
202 ochunk &= ~mask;
203 ochunk |= (otp^ichunk) & mask;
204 *(RC4_CHUNK *)outdata = ochunk;
205 }
206 key->x=x;
207 key->y=y;
208 return;
209 }
210 else
211 { /* LITTLE-ENDIAN CASE */
212# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
213 for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK))
214 {
215 ichunk = *(RC4_CHUNK *)indata;
216 otp = RC4_STEP;
217 otp |= RC4_STEP<<8;
218 otp |= RC4_STEP<<16;
219 otp |= RC4_STEP<<24;
220 if (sizeof(RC4_CHUNK)==8)
221 {
222 otp |= RC4_STEP<<LESHFT(4);
223 otp |= RC4_STEP<<LESHFT(5);
224 otp |= RC4_STEP<<LESHFT(6);
225 otp |= RC4_STEP<<LESHFT(7);
226 }
227 *(RC4_CHUNK *)outdata = otp^ichunk;
228 indata += sizeof(RC4_CHUNK);
229 outdata += sizeof(RC4_CHUNK);
230 }
231 if (len)
232 {
233 RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
234
235 ichunk = *(RC4_CHUNK *)indata;
236 ochunk = *(RC4_CHUNK *)outdata;
237 otp = 0;
238 i = 0;
239 mask >>= (sizeof(RC4_CHUNK)-len)<<3;
240 switch (len&(sizeof(RC4_CHUNK)-1))
241 {
242 case 7: otp = RC4_STEP, i+=8;
243 case 6: otp |= RC4_STEP<<i, i+=8;
244 case 5: otp |= RC4_STEP<<i, i+=8;
245 case 4: otp |= RC4_STEP<<i, i+=8;
246 case 3: otp |= RC4_STEP<<i, i+=8;
247 case 2: otp |= RC4_STEP<<i, i+=8;
248 case 1: otp |= RC4_STEP<<i, i+=8;
249 case 0: ; /*
250 * it's never the case,
251 * but it has to be here
252 * for ultrix?
253 */
254 }
255 ochunk &= ~mask;
256 ochunk |= (otp^ichunk) & mask;
257 *(RC4_CHUNK *)outdata = ochunk;
258 }
259 key->x=x;
260 key->y=y;
261 return;
262 }
263 }
264#endif
81#define LOOP(in,out) \ 265#define LOOP(in,out) \
82 x=((x+1)&0xff); \ 266 x=((x+1)&0xff); \
83 tx=d[x]; \ 267 tx=d[x]; \