summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/rc4
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rw-r--r--src/lib/libcrypto/rc4/Makefile115
-rw-r--r--src/lib/libcrypto/rc4/asm/rc4-586.pl162
-rwxr-xr-xsrc/lib/libcrypto/rc4/asm/rc4-x86_64.pl294
-rw-r--r--src/lib/libcrypto/rc4/rc4.c193
-rw-r--r--src/lib/libcrypto/rc4/rc4.h1
-rw-r--r--src/lib/libcrypto/rc4/rc4_skey.c36
-rw-r--r--src/lib/libcrypto/rc4/rc4s.cpp73
-rw-r--r--src/lib/libcrypto/rc4/rc4speed.c253
-rw-r--r--src/lib/libcrypto/rc4/rc4test.c236
-rw-r--r--src/lib/libcrypto/rc4/rrc4.doc278
10 files changed, 1255 insertions, 386 deletions
diff --git a/src/lib/libcrypto/rc4/Makefile b/src/lib/libcrypto/rc4/Makefile
new file mode 100644
index 0000000000..264451a213
--- /dev/null
+++ b/src/lib/libcrypto/rc4/Makefile
@@ -0,0 +1,115 @@
1#
2# OpenSSL/crypto/rc4/Makefile
3#
4
5DIR= rc4
6TOP= ../..
7CC= cc
8CPP= $(CC) -E
9INCLUDES=
10CFLAG=-g
11AR= ar r
12
13RC4_ENC=rc4_enc.o rc4_skey.o
14
15CFLAGS= $(INCLUDES) $(CFLAG)
16ASFLAGS= $(INCLUDES) $(ASFLAG)
17AFLAGS= $(ASFLAGS)
18
19GENERAL=Makefile
20TEST=rc4test.c
21APPS=
22
23LIB=$(TOP)/libcrypto.a
24LIBSRC=rc4_skey.c rc4_enc.c
25LIBOBJ=$(RC4_ENC)
26
27SRC= $(LIBSRC)
28
29EXHEADER= rc4.h
30HEADER= $(EXHEADER) rc4_locl.h
31
32ALL= $(GENERAL) $(SRC) $(HEADER)
33
34top:
35 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
36
37all: lib
38
39lib: $(LIBOBJ)
40 $(AR) $(LIB) $(LIBOBJ)
41 $(RANLIB) $(LIB) || echo Never mind.
42 @touch lib
43
44rc4-586.s: asm/rc4-586.pl ../perlasm/x86asm.pl
45 $(PERL) asm/rc4-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
46
47rc4-x86_64.s: asm/rc4-x86_64.pl
48 $(PERL) asm/rc4-x86_64.pl $(PERLASM_SCHEME) > $@
49
50rc4-ia64.S: asm/rc4-ia64.pl
51 $(PERL) asm/rc4-ia64.pl $(CFLAGS) > $@
52
53rc4-s390x.s: asm/rc4-s390x.pl
54 $(PERL) asm/rc4-s390x.pl > $@
55
56rc4-ia64.s: rc4-ia64.S
57 @case `awk '/^#define RC4_INT/{print$$NF}' $(TOP)/include/openssl/opensslconf.h` in \
58 int) set -x; $(CC) $(CFLAGS) -DSZ=4 -E rc4-ia64.S > $@ ;; \
59 char) set -x; $(CC) $(CFLAGS) -DSZ=1 -E rc4-ia64.S > $@ ;; \
60 *) exit 1 ;; \
61 esac
62
63files:
64 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
65
66links:
67 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
68 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
69 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
70
71install:
72 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
73 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
74 do \
75 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
76 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
77 done;
78
79tags:
80 ctags $(SRC)
81
82tests:
83
84lint:
85 lint -DLINT $(INCLUDES) $(SRC)>fluff
86
87depend:
88 @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
89 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
90
91dclean:
92 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
93 mv -f Makefile.new $(MAKEFILE)
94
95clean:
96 rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
97
98# DO NOT DELETE THIS LINE -- make depend depends on it.
99
100rc4_enc.o: ../../e_os.h ../../include/openssl/bio.h
101rc4_enc.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
102rc4_enc.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
103rc4_enc.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
104rc4_enc.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
105rc4_enc.o: ../../include/openssl/rc4.h ../../include/openssl/safestack.h
106rc4_enc.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
107rc4_enc.o: ../cryptlib.h rc4_enc.c rc4_locl.h
108rc4_skey.o: ../../e_os.h ../../include/openssl/bio.h
109rc4_skey.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
110rc4_skey.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
111rc4_skey.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
112rc4_skey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
113rc4_skey.o: ../../include/openssl/rc4.h ../../include/openssl/safestack.h
114rc4_skey.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
115rc4_skey.o: ../cryptlib.h rc4_locl.h rc4_skey.c
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl
index 5c9ac6ad28..38a44a70ef 100644
--- a/src/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -28,34 +28,6 @@
28# 28#
29# <appro@fy.chalmers.se> 29# <appro@fy.chalmers.se>
30 30
31# May 2011
32#
33# Optimize for Core2 and Westmere [and incidentally Opteron]. Current
34# performance in cycles per processed byte (less is better) and
35# improvement relative to previous version of this module is:
36#
37# Pentium 10.2 # original numbers
38# Pentium III 7.8(*)
39# Intel P4 7.5
40#
41# Opteron 6.1/+20% # new MMX numbers
42# Core2 5.3/+67%(**)
43# Westmere 5.1/+94%(**)
44# Sandy Bridge 5.0/+8%
45# Atom 12.6/+6%
46#
47# (*) PIII can actually deliver 6.6 cycles per byte with MMX code,
48# but this specific code performs poorly on Core2. And vice
49# versa, below MMX/SSE code delivering 5.8/7.1 on Core2 performs
50# poorly on PIII, at 8.0/14.5:-( As PIII is not a "hot" CPU
51# [anymore], I chose to discard PIII-specific code path and opt
52# for original IALU-only code, which is why MMX/SSE code path
53# is guarded by SSE2 bit (see below), not MMX/SSE.
54# (**) Performance vs. block size on Core2 and Westmere had a maximum
55# at ... 64 bytes block size. And it was quite a maximum, 40-60%
56# in comparison to largest 8KB block size. Above improvement
57# coefficients are for the largest block size.
58
59$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 31$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
60push(@INC,"${dir}","${dir}../../perlasm"); 32push(@INC,"${dir}","${dir}../../perlasm");
61require "x86asm.pl"; 33require "x86asm.pl";
@@ -90,68 +62,6 @@ sub RC4_loop {
90 &$func ($out,&DWP(0,$dat,$ty,4)); 62 &$func ($out,&DWP(0,$dat,$ty,4));
91} 63}
92 64
93if ($alt=0) {
94 # >20% faster on Atom and Sandy Bridge[!], 8% faster on Opteron,
95 # but ~40% slower on Core2 and Westmere... Attempt to add movz
96 # brings down Opteron by 25%, Atom and Sandy Bridge by 15%, yet
97 # on Core2 with movz it's almost 20% slower than below alternative
98 # code... Yes, it's a total mess...
99 my @XX=($xx,$out);
100 $RC4_loop_mmx = sub { # SSE actually...
101 my $i=shift;
102 my $j=$i<=0?0:$i>>1;
103 my $mm=$i<=0?"mm0":"mm".($i&1);
104
105 &add (&LB($yy),&LB($tx));
106 &lea (@XX[1],&DWP(1,@XX[0]));
107 &pxor ("mm2","mm0") if ($i==0);
108 &psllq ("mm1",8) if ($i==0);
109 &and (@XX[1],0xff);
110 &pxor ("mm0","mm0") if ($i<=0);
111 &mov ($ty,&DWP(0,$dat,$yy,4));
112 &mov (&DWP(0,$dat,$yy,4),$tx);
113 &pxor ("mm1","mm2") if ($i==0);
114 &mov (&DWP(0,$dat,$XX[0],4),$ty);
115 &add (&LB($ty),&LB($tx));
116 &movd (@XX[0],"mm7") if ($i==0);
117 &mov ($tx,&DWP(0,$dat,@XX[1],4));
118 &pxor ("mm1","mm1") if ($i==1);
119 &movq ("mm2",&QWP(0,$inp)) if ($i==1);
120 &movq (&QWP(-8,(@XX[0],$inp)),"mm1") if ($i==0);
121 &pinsrw ($mm,&DWP(0,$dat,$ty,4),$j);
122
123 push (@XX,shift(@XX)) if ($i>=0);
124 }
125} else {
126 # Using pinsrw here improves performane on Intel CPUs by 2-3%, but
127 # brings down AMD by 7%...
128 $RC4_loop_mmx = sub {
129 my $i=shift;
130
131 &add (&LB($yy),&LB($tx));
132 &psllq ("mm1",8*(($i-1)&7)) if (abs($i)!=1);
133 &mov ($ty,&DWP(0,$dat,$yy,4));
134 &mov (&DWP(0,$dat,$yy,4),$tx);
135 &mov (&DWP(0,$dat,$xx,4),$ty);
136 &inc ($xx);
137 &add ($ty,$tx);
138 &movz ($xx,&LB($xx)); # (*)
139 &movz ($ty,&LB($ty)); # (*)
140 &pxor ("mm2",$i==1?"mm0":"mm1") if ($i>=0);
141 &movq ("mm0",&QWP(0,$inp)) if ($i<=0);
142 &movq (&QWP(-8,($out,$inp)),"mm2") if ($i==0);
143 &mov ($tx,&DWP(0,$dat,$xx,4));
144 &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));
145
146 # (*) This is the key to Core2 and Westmere performance.
147 # Whithout movz out-of-order execution logic confuses
148 # itself and fails to reorder loads and stores. Problem
149 # appears to be fixed in Sandy Bridge...
150 }
151}
152
153&external_label("OPENSSL_ia32cap_P");
154
155# void RC4(RC4_KEY *key,size_t len,const unsigned char *inp,unsigned char *out); 65# void RC4(RC4_KEY *key,size_t len,const unsigned char *inp,unsigned char *out);
156&function_begin("RC4"); 66&function_begin("RC4");
157 &mov ($dat,&wparam(0)); # load key schedule pointer 67 &mov ($dat,&wparam(0)); # load key schedule pointer
@@ -184,56 +94,11 @@ if ($alt=0) {
184 &and ($ty,-4); # how many 4-byte chunks? 94 &and ($ty,-4); # how many 4-byte chunks?
185 &jz (&label("loop1")); 95 &jz (&label("loop1"));
186 96
187 &test ($ty,-8);
188 &mov (&wparam(3),$out); # $out as accumulator in these loops
189 &jz (&label("go4loop4"));
190
191 &picmeup($out,"OPENSSL_ia32cap_P");
192 &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX]
193 &jnc (&label("go4loop4"));
194
195 &mov ($out,&wparam(3)) if (!$alt);
196 &movd ("mm7",&wparam(3)) if ($alt);
197 &and ($ty,-8);
198 &lea ($ty,&DWP(-8,$inp,$ty));
199 &mov (&DWP(-4,$dat),$ty); # save input+(len/8)*8-8
200
201 &$RC4_loop_mmx(-1);
202 &jmp(&label("loop_mmx_enter"));
203
204 &set_label("loop_mmx",16);
205 &$RC4_loop_mmx(0);
206 &set_label("loop_mmx_enter");
207 for ($i=1;$i<8;$i++) { &$RC4_loop_mmx($i); }
208 &mov ($ty,$yy);
209 &xor ($yy,$yy); # this is second key to Core2
210 &mov (&LB($yy),&LB($ty)); # and Westmere performance...
211 &cmp ($inp,&DWP(-4,$dat));
212 &lea ($inp,&DWP(8,$inp));
213 &jb (&label("loop_mmx"));
214
215 if ($alt) {
216 &movd ($out,"mm7");
217 &pxor ("mm2","mm0");
218 &psllq ("mm1",8);
219 &pxor ("mm1","mm2");
220 &movq (&QWP(-8,$out,$inp),"mm1");
221 } else {
222 &psllq ("mm1",56);
223 &pxor ("mm2","mm1");
224 &movq (&QWP(-8,$out,$inp),"mm2");
225 }
226 &emms ();
227
228 &cmp ($inp,&wparam(1)); # compare to input+len
229 &je (&label("done"));
230 &jmp (&label("loop1"));
231
232&set_label("go4loop4",16);
233 &lea ($ty,&DWP(-4,$inp,$ty)); 97 &lea ($ty,&DWP(-4,$inp,$ty));
234 &mov (&wparam(2),$ty); # save input+(len/4)*4-4 98 &mov (&wparam(2),$ty); # save input+(len/4)*4-4
99 &mov (&wparam(3),$out); # $out as accumulator in this loop
235 100
236 &set_label("loop4"); 101 &set_label("loop4",16);
237 for ($i=0;$i<4;$i++) { RC4_loop($i); } 102 for ($i=0;$i<4;$i++) { RC4_loop($i); }
238 &ror ($out,8); 103 &ror ($out,8);
239 &xor ($out,&DWP(0,$inp)); 104 &xor ($out,&DWP(0,$inp));
@@ -286,7 +151,7 @@ if ($alt=0) {
286 151
287&set_label("done"); 152&set_label("done");
288 &dec (&LB($xx)); 153 &dec (&LB($xx));
289 &mov (&DWP(-4,$dat),$yy); # save key->y 154 &mov (&BP(-4,$dat),&LB($yy)); # save key->y
290 &mov (&BP(-8,$dat),&LB($xx)); # save key->x 155 &mov (&BP(-8,$dat),&LB($xx)); # save key->x
291&set_label("abort"); 156&set_label("abort");
292&function_end("RC4"); 157&function_end("RC4");
@@ -299,8 +164,10 @@ $idi="ebp";
299$ido="ecx"; 164$ido="ecx";
300$idx="edx"; 165$idx="edx";
301 166
167&external_label("OPENSSL_ia32cap_P");
168
302# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); 169# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data);
303&function_begin("private_RC4_set_key"); 170&function_begin("RC4_set_key");
304 &mov ($out,&wparam(0)); # load key 171 &mov ($out,&wparam(0)); # load key
305 &mov ($idi,&wparam(1)); # load len 172 &mov ($idi,&wparam(1)); # load len
306 &mov ($inp,&wparam(2)); # load data 173 &mov ($inp,&wparam(2)); # load data
@@ -378,7 +245,7 @@ $idx="edx";
378 &xor ("eax","eax"); 245 &xor ("eax","eax");
379 &mov (&DWP(-8,$out),"eax"); # key->x=0; 246 &mov (&DWP(-8,$out),"eax"); # key->x=0;
380 &mov (&DWP(-4,$out),"eax"); # key->y=0; 247 &mov (&DWP(-4,$out),"eax"); # key->y=0;
381&function_end("private_RC4_set_key"); 248&function_end("RC4_set_key");
382 249
383# const char *RC4_options(void); 250# const char *RC4_options(void);
384&function_begin_B("RC4_options"); 251&function_begin_B("RC4_options");
@@ -387,21 +254,14 @@ $idx="edx";
387 &blindpop("eax"); 254 &blindpop("eax");
388 &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); 255 &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
389 &picmeup("edx","OPENSSL_ia32cap_P"); 256 &picmeup("edx","OPENSSL_ia32cap_P");
390 &mov ("edx",&DWP(0,"edx")); 257 &bt (&DWP(0,"edx"),20);
391 &bt ("edx",20); 258 &jnc (&label("skip"));
392 &jc (&label("1xchar")); 259 &add ("eax",12);
393 &bt ("edx",26); 260 &set_label("skip");
394 &jnc (&label("ret"));
395 &add ("eax",25);
396 &ret ();
397&set_label("1xchar");
398 &add ("eax",12);
399&set_label("ret");
400 &ret (); 261 &ret ();
401&set_label("opts",64); 262&set_label("opts",64);
402&asciz ("rc4(4x,int)"); 263&asciz ("rc4(4x,int)");
403&asciz ("rc4(1x,char)"); 264&asciz ("rc4(1x,char)");
404&asciz ("rc4(8x,mmx)");
405&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>"); 265&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
406&align (64); 266&align (64);
407&function_end_B("RC4_options"); 267&function_end_B("RC4_options");
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
index d6eac205e9..544386bf53 100755
--- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -7,8 +7,6 @@
7# details see http://www.openssl.org/~appro/cryptogams/. 7# details see http://www.openssl.org/~appro/cryptogams/.
8# ==================================================================== 8# ====================================================================
9# 9#
10# July 2004
11#
12# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in 10# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in
13# "hand-coded assembler"] doesn't stand for the whole improvement 11# "hand-coded assembler"] doesn't stand for the whole improvement
14# coefficient. It turned out that eliminating RC4_CHAR from config 12# coefficient. It turned out that eliminating RC4_CHAR from config
@@ -21,8 +19,6 @@
21# to operate on partial registers, it turned out to be the best bet. 19# to operate on partial registers, it turned out to be the best bet.
22# At least for AMD... How IA32E would perform remains to be seen... 20# At least for AMD... How IA32E would perform remains to be seen...
23 21
24# November 2004
25#
26# As was shown by Marc Bevand reordering of couple of load operations 22# As was shown by Marc Bevand reordering of couple of load operations
27# results in even higher performance gain of 3.3x:-) At least on 23# results in even higher performance gain of 3.3x:-) At least on
28# Opteron... For reference, 1x in this case is RC4_CHAR C-code 24# Opteron... For reference, 1x in this case is RC4_CHAR C-code
@@ -30,8 +26,6 @@
30# Latter means that if you want to *estimate* what to expect from 26# Latter means that if you want to *estimate* what to expect from
31# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz. 27# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz.
32 28
33# November 2004
34#
35# Intel P4 EM64T core was found to run the AMD64 code really slow... 29# Intel P4 EM64T core was found to run the AMD64 code really slow...
36# The only way to achieve comparable performance on P4 was to keep 30# The only way to achieve comparable performance on P4 was to keep
37# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to 31# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
@@ -39,14 +33,10 @@
39# on either AMD and Intel platforms, I implement both cases. See 33# on either AMD and Intel platforms, I implement both cases. See
40# rc4_skey.c for further details... 34# rc4_skey.c for further details...
41 35
42# April 2005
43#
44# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing 36# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing
45# those with add/sub results in 50% performance improvement of folded 37# those with add/sub results in 50% performance improvement of folded
46# loop... 38# loop...
47 39
48# May 2005
49#
50# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T 40# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
51# performance by >30% [unlike P4 32-bit case that is]. But this is 41# performance by >30% [unlike P4 32-bit case that is]. But this is
52# provided that loads are reordered even more aggressively! Both code 42# provided that loads are reordered even more aggressively! Both code
@@ -60,8 +50,6 @@
60# is not implemented, then this final RC4_CHAR code-path should be 50# is not implemented, then this final RC4_CHAR code-path should be
61# preferred, as it provides better *all-round* performance]. 51# preferred, as it provides better *all-round* performance].
62 52
63# March 2007
64#
65# Intel Core2 was observed to perform poorly on both code paths:-( It 53# Intel Core2 was observed to perform poorly on both code paths:-( It
66# apparently suffers from some kind of partial register stall, which 54# apparently suffers from some kind of partial register stall, which
67# occurs in 64-bit mode only [as virtually identical 32-bit loop was 55# occurs in 64-bit mode only [as virtually identical 32-bit loop was
@@ -70,37 +58,6 @@
70# fit for Core2 and therefore the code was modified to skip cloop8 on 58# fit for Core2 and therefore the code was modified to skip cloop8 on
71# this CPU. 59# this CPU.
72 60
73# May 2010
74#
75# Intel Westmere was observed to perform suboptimally. Adding yet
76# another movzb to cloop1 improved performance by almost 50%! Core2
77# performance is improved too, but nominally...
78
79# May 2011
80#
81# The only code path that was not modified is P4-specific one. Non-P4
82# Intel code path optimization is heavily based on submission by Maxim
83# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used
84# some of the ideas even in attempt to optmize the original RC4_INT
85# code path... Current performance in cycles per processed byte (less
86# is better) and improvement coefficients relative to previous
87# version of this module are:
88#
89# Opteron 5.3/+0%(*)
90# P4 6.5
91# Core2 6.2/+15%(**)
92# Westmere 4.2/+60%
93# Sandy Bridge 4.2/+120%
94# Atom 9.3/+80%
95#
96# (*) But corresponding loop has less instructions, which should have
97# positive effect on upcoming Bulldozer, which has one less ALU.
98# For reference, Intel code runs at 6.8 cpb rate on Opteron.
99# (**) Note that Core2 result is ~15% lower than corresponding result
100# for 32-bit code, meaning that it's possible to improve it,
101# but more than likely at the cost of the others (see rc4-586.pl
102# to get the idea)...
103
104$flavour = shift; 61$flavour = shift;
105$output = shift; 62$output = shift;
106if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 63if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -119,10 +76,13 @@ $len="%rsi"; # arg2
119$inp="%rdx"; # arg3 76$inp="%rdx"; # arg3
120$out="%rcx"; # arg4 77$out="%rcx"; # arg4
121 78
122{ 79@XX=("%r8","%r10");
80@TX=("%r9","%r11");
81$YY="%r12";
82$TY="%r13";
83
123$code=<<___; 84$code=<<___;
124.text 85.text
125.extern OPENSSL_ia32cap_P
126 86
127.globl RC4 87.globl RC4
128.type RC4,\@function,4 88.type RC4,\@function,4
@@ -135,173 +95,48 @@ RC4: or $len,$len
135 push %r12 95 push %r12
136 push %r13 96 push %r13
137.Lprologue: 97.Lprologue:
138 mov $len,%r11
139 mov $inp,%r12
140 mov $out,%r13
141___
142my $len="%r11"; # reassign input arguments
143my $inp="%r12";
144my $out="%r13";
145 98
146my @XX=("%r10","%rsi"); 99 add \$8,$dat
147my @TX=("%rax","%rbx"); 100 movl -8($dat),$XX[0]#d
148my $YY="%rcx"; 101 movl -4($dat),$YY#d
149my $TY="%rdx";
150
151$code.=<<___;
152 xor $XX[0],$XX[0]
153 xor $YY,$YY
154
155 lea 8($dat),$dat
156 mov -8($dat),$XX[0]#b
157 mov -4($dat),$YY#b
158 cmpl \$-1,256($dat) 102 cmpl \$-1,256($dat)
159 je .LRC4_CHAR 103 je .LRC4_CHAR
160 mov OPENSSL_ia32cap_P(%rip),%r8d
161 xor $TX[1],$TX[1]
162 inc $XX[0]#b 104 inc $XX[0]#b
163 sub $XX[0],$TX[1]
164 sub $inp,$out
165 movl ($dat,$XX[0],4),$TX[0]#d 105 movl ($dat,$XX[0],4),$TX[0]#d
166 test \$-16,$len 106 test \$-8,$len
167 jz .Lloop1 107 jz .Lloop1
168 bt \$30,%r8d # Intel CPU? 108 jmp .Lloop8
169 jc .Lintel
170 and \$7,$TX[1]
171 lea 1($XX[0]),$XX[1]
172 jz .Loop8
173 sub $TX[1],$len
174.Loop8_warmup:
175 add $TX[0]#b,$YY#b
176 movl ($dat,$YY,4),$TY#d
177 movl $TX[0]#d,($dat,$YY,4)
178 movl $TY#d,($dat,$XX[0],4)
179 add $TY#b,$TX[0]#b
180 inc $XX[0]#b
181 movl ($dat,$TX[0],4),$TY#d
182 movl ($dat,$XX[0],4),$TX[0]#d
183 xorb ($inp),$TY#b
184 movb $TY#b,($out,$inp)
185 lea 1($inp),$inp
186 dec $TX[1]
187 jnz .Loop8_warmup
188
189 lea 1($XX[0]),$XX[1]
190 jmp .Loop8
191.align 16 109.align 16
192.Loop8: 110.Lloop8:
193___ 111___
194for ($i=0;$i<8;$i++) { 112for ($i=0;$i<8;$i++) {
195$code.=<<___ if ($i==7);
196 add \$8,$XX[1]#b
197___
198$code.=<<___; 113$code.=<<___;
199 add $TX[0]#b,$YY#b 114 add $TX[0]#b,$YY#b
115 mov $XX[0],$XX[1]
200 movl ($dat,$YY,4),$TY#d 116 movl ($dat,$YY,4),$TY#d
117 ror \$8,%rax # ror is redundant when $i=0
118 inc $XX[1]#b
119 movl ($dat,$XX[1],4),$TX[1]#d
120 cmp $XX[1],$YY
201 movl $TX[0]#d,($dat,$YY,4) 121 movl $TX[0]#d,($dat,$YY,4)
202 movl `4*($i==7?-1:$i)`($dat,$XX[1],4),$TX[1]#d 122 cmove $TX[0],$TX[1]
203 ror \$8,%r8 # ror is redundant when $i=0 123 movl $TY#d,($dat,$XX[0],4)
204 movl $TY#d,4*$i($dat,$XX[0],4)
205 add $TX[0]#b,$TY#b 124 add $TX[0]#b,$TY#b
206 movb ($dat,$TY,4),%r8b 125 movb ($dat,$TY,4),%al
207___ 126___
208push(@TX,shift(@TX)); #push(@XX,shift(@XX)); # "rotate" registers 127push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
209} 128}
210$code.=<<___; 129$code.=<<___;
211 add \$8,$XX[0]#b 130 ror \$8,%rax
212 ror \$8,%r8
213 sub \$8,$len 131 sub \$8,$len
214 132
215 xor ($inp),%r8 133 xor ($inp),%rax
216 mov %r8,($out,$inp) 134 add \$8,$inp
217 lea 8($inp),$inp 135 mov %rax,($out)
136 add \$8,$out
218 137
219 test \$-8,$len 138 test \$-8,$len
220 jnz .Loop8 139 jnz .Lloop8
221 cmp \$0,$len
222 jne .Lloop1
223 jmp .Lexit
224
225.align 16
226.Lintel:
227 test \$-32,$len
228 jz .Lloop1
229 and \$15,$TX[1]
230 jz .Loop16_is_hot
231 sub $TX[1],$len
232.Loop16_warmup:
233 add $TX[0]#b,$YY#b
234 movl ($dat,$YY,4),$TY#d
235 movl $TX[0]#d,($dat,$YY,4)
236 movl $TY#d,($dat,$XX[0],4)
237 add $TY#b,$TX[0]#b
238 inc $XX[0]#b
239 movl ($dat,$TX[0],4),$TY#d
240 movl ($dat,$XX[0],4),$TX[0]#d
241 xorb ($inp),$TY#b
242 movb $TY#b,($out,$inp)
243 lea 1($inp),$inp
244 dec $TX[1]
245 jnz .Loop16_warmup
246
247 mov $YY,$TX[1]
248 xor $YY,$YY
249 mov $TX[1]#b,$YY#b
250
251.Loop16_is_hot:
252 lea ($dat,$XX[0],4),$XX[1]
253___
254sub RC4_loop {
255 my $i=shift;
256 my $j=$i<0?0:$i;
257 my $xmm="%xmm".($j&1);
258
259 $code.=" add \$16,$XX[0]#b\n" if ($i==15);
260 $code.=" movdqu ($inp),%xmm2\n" if ($i==15);
261 $code.=" add $TX[0]#b,$YY#b\n" if ($i<=0);
262 $code.=" movl ($dat,$YY,4),$TY#d\n";
263 $code.=" pxor %xmm0,%xmm2\n" if ($i==0);
264 $code.=" psllq \$8,%xmm1\n" if ($i==0);
265 $code.=" pxor $xmm,$xmm\n" if ($i<=1);
266 $code.=" movl $TX[0]#d,($dat,$YY,4)\n";
267 $code.=" add $TY#b,$TX[0]#b\n";
268 $code.=" movl `4*($j+1)`($XX[1]),$TX[1]#d\n" if ($i<15);
269 $code.=" movz $TX[0]#b,$TX[0]#d\n";
270 $code.=" movl $TY#d,4*$j($XX[1])\n";
271 $code.=" pxor %xmm1,%xmm2\n" if ($i==0);
272 $code.=" lea ($dat,$XX[0],4),$XX[1]\n" if ($i==15);
273 $code.=" add $TX[1]#b,$YY#b\n" if ($i<15);
274 $code.=" pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n";
275 $code.=" movdqu %xmm2,($out,$inp)\n" if ($i==0);
276 $code.=" lea 16($inp),$inp\n" if ($i==0);
277 $code.=" movl ($XX[1]),$TX[1]#d\n" if ($i==15);
278}
279 RC4_loop(-1);
280$code.=<<___;
281 jmp .Loop16_enter
282.align 16
283.Loop16:
284___
285
286for ($i=0;$i<16;$i++) {
287 $code.=".Loop16_enter:\n" if ($i==1);
288 RC4_loop($i);
289 push(@TX,shift(@TX)); # "rotate" registers
290}
291$code.=<<___;
292 mov $YY,$TX[1]
293 xor $YY,$YY # keyword to partial register
294 sub \$16,$len
295 mov $TX[1]#b,$YY#b
296 test \$-16,$len
297 jnz .Loop16
298
299 psllq \$8,%xmm1
300 pxor %xmm0,%xmm2
301 pxor %xmm1,%xmm2
302 movdqu %xmm2,($out,$inp)
303 lea 16($inp),$inp
304
305 cmp \$0,$len 140 cmp \$0,$len
306 jne .Lloop1 141 jne .Lloop1
307 jmp .Lexit 142 jmp .Lexit
@@ -317,8 +152,9 @@ $code.=<<___;
317 movl ($dat,$TX[0],4),$TY#d 152 movl ($dat,$TX[0],4),$TY#d
318 movl ($dat,$XX[0],4),$TX[0]#d 153 movl ($dat,$XX[0],4),$TX[0]#d
319 xorb ($inp),$TY#b 154 xorb ($inp),$TY#b
320 movb $TY#b,($out,$inp) 155 inc $inp
321 lea 1($inp),$inp 156 movb $TY#b,($out)
157 inc $out
322 dec $len 158 dec $len
323 jnz .Lloop1 159 jnz .Lloop1
324 jmp .Lexit 160 jmp .Lexit
@@ -329,11 +165,13 @@ $code.=<<___;
329 movzb ($dat,$XX[0]),$TX[0]#d 165 movzb ($dat,$XX[0]),$TX[0]#d
330 test \$-8,$len 166 test \$-8,$len
331 jz .Lcloop1 167 jz .Lcloop1
168 cmpl \$0,260($dat)
169 jnz .Lcloop1
332 jmp .Lcloop8 170 jmp .Lcloop8
333.align 16 171.align 16
334.Lcloop8: 172.Lcloop8:
335 mov ($inp),%r8d 173 mov ($inp),%eax
336 mov 4($inp),%r9d 174 mov 4($inp),%ebx
337___ 175___
338# unroll 2x4-wise, because 64-bit rotates kill Intel P4... 176# unroll 2x4-wise, because 64-bit rotates kill Intel P4...
339for ($i=0;$i<4;$i++) { 177for ($i=0;$i<4;$i++) {
@@ -350,8 +188,8 @@ $code.=<<___;
350 mov $TX[0],$TX[1] 188 mov $TX[0],$TX[1]
351.Lcmov$i: 189.Lcmov$i:
352 add $TX[0]#b,$TY#b 190 add $TX[0]#b,$TY#b
353 xor ($dat,$TY),%r8b 191 xor ($dat,$TY),%al
354 ror \$8,%r8d 192 ror \$8,%eax
355___ 193___
356push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 194push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
357} 195}
@@ -369,16 +207,16 @@ $code.=<<___;
369 mov $TX[0],$TX[1] 207 mov $TX[0],$TX[1]
370.Lcmov$i: 208.Lcmov$i:
371 add $TX[0]#b,$TY#b 209 add $TX[0]#b,$TY#b
372 xor ($dat,$TY),%r9b 210 xor ($dat,$TY),%bl
373 ror \$8,%r9d 211 ror \$8,%ebx
374___ 212___
375push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 213push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
376} 214}
377$code.=<<___; 215$code.=<<___;
378 lea -8($len),$len 216 lea -8($len),$len
379 mov %r8d,($out) 217 mov %eax,($out)
380 lea 8($inp),$inp 218 lea 8($inp),$inp
381 mov %r9d,4($out) 219 mov %ebx,4($out)
382 lea 8($out),$out 220 lea 8($out),$out
383 221
384 test \$-8,$len 222 test \$-8,$len
@@ -391,7 +229,6 @@ $code.=<<___;
391.align 16 229.align 16
392.Lcloop1: 230.Lcloop1:
393 add $TX[0]#b,$YY#b 231 add $TX[0]#b,$YY#b
394 movzb $YY#b,$YY#d
395 movzb ($dat,$YY),$TY#d 232 movzb ($dat,$YY),$TY#d
396 movb $TX[0]#b,($dat,$YY) 233 movb $TX[0]#b,($dat,$YY)
397 movb $TY#b,($dat,$XX[0]) 234 movb $TY#b,($dat,$XX[0])
@@ -423,16 +260,16 @@ $code.=<<___;
423 ret 260 ret
424.size RC4,.-RC4 261.size RC4,.-RC4
425___ 262___
426}
427 263
428$idx="%r8"; 264$idx="%r8";
429$ido="%r9"; 265$ido="%r9";
430 266
431$code.=<<___; 267$code.=<<___;
432.globl private_RC4_set_key 268.extern OPENSSL_ia32cap_P
433.type private_RC4_set_key,\@function,3 269.globl RC4_set_key
270.type RC4_set_key,\@function,3
434.align 16 271.align 16
435private_RC4_set_key: 272RC4_set_key:
436 lea 8($dat),$dat 273 lea 8($dat),$dat
437 lea ($inp,$len),$inp 274 lea ($inp,$len),$inp
438 neg $len 275 neg $len
@@ -442,10 +279,13 @@ private_RC4_set_key:
442 xor %r10,%r10 279 xor %r10,%r10
443 xor %r11,%r11 280 xor %r11,%r11
444 281
445 mov OPENSSL_ia32cap_P(%rip),$idx#d 282 mov PIC_GOT(OPENSSL_ia32cap_P),$idx#d
446 bt \$20,$idx#d # RC4_CHAR? 283 bt \$20,$idx#d
447 jc .Lc1stloop 284 jnc .Lw1stloop
448 jmp .Lw1stloop 285 bt \$30,$idx#d
286 setc $ido#b
287 mov $ido#d,260($dat)
288 jmp .Lc1stloop
449 289
450.align 16 290.align 16
451.Lw1stloop: 291.Lw1stloop:
@@ -499,29 +339,27 @@ private_RC4_set_key:
499 mov %eax,-8($dat) 339 mov %eax,-8($dat)
500 mov %eax,-4($dat) 340 mov %eax,-4($dat)
501 ret 341 ret
502.size private_RC4_set_key,.-private_RC4_set_key 342.size RC4_set_key,.-RC4_set_key
503 343
504.globl RC4_options 344.globl RC4_options
505.type RC4_options,\@abi-omnipotent 345.type RC4_options,\@abi-omnipotent
506.align 16 346.align 16
507RC4_options: 347RC4_options:
508 lea .Lopts(%rip),%rax 348 lea .Lopts(%rip),%rax
509 mov OPENSSL_ia32cap_P(%rip),%edx 349 mov PIC_GOT(OPENSSL_ia32cap_P),%edx
510 bt \$20,%edx 350 bt \$20,%edx
511 jc .L8xchar
512 bt \$30,%edx
513 jnc .Ldone 351 jnc .Ldone
514 add \$25,%rax
515 ret
516.L8xchar:
517 add \$12,%rax 352 add \$12,%rax
353 bt \$30,%edx
354 jnc .Ldone
355 add \$13,%rax
518.Ldone: 356.Ldone:
519 ret 357 ret
520.align 64 358.align 64
521.Lopts: 359.Lopts:
522.asciz "rc4(8x,int)" 360.asciz "rc4(8x,int)"
523.asciz "rc4(8x,char)" 361.asciz "rc4(8x,char)"
524.asciz "rc4(16x,int)" 362.asciz "rc4(1x,char)"
525.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 363.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
526.align 64 364.align 64
527.size RC4_options,.-RC4_options 365.size RC4_options,.-RC4_options
@@ -644,32 +482,22 @@ key_se_handler:
644 .rva .LSEH_end_RC4 482 .rva .LSEH_end_RC4
645 .rva .LSEH_info_RC4 483 .rva .LSEH_info_RC4
646 484
647 .rva .LSEH_begin_private_RC4_set_key 485 .rva .LSEH_begin_RC4_set_key
648 .rva .LSEH_end_private_RC4_set_key 486 .rva .LSEH_end_RC4_set_key
649 .rva .LSEH_info_private_RC4_set_key 487 .rva .LSEH_info_RC4_set_key
650 488
651.section .xdata 489.section .xdata
652.align 8 490.align 8
653.LSEH_info_RC4: 491.LSEH_info_RC4:
654 .byte 9,0,0,0 492 .byte 9,0,0,0
655 .rva stream_se_handler 493 .rva stream_se_handler
656.LSEH_info_private_RC4_set_key: 494.LSEH_info_RC4_set_key:
657 .byte 9,0,0,0 495 .byte 9,0,0,0
658 .rva key_se_handler 496 .rva key_se_handler
659___ 497___
660} 498}
661 499
662sub reg_part { 500$code =~ s/#([bwd])/$1/gm;
663my ($reg,$conv)=@_;
664 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
665 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
666 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
667 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
668 return $reg;
669}
670
671$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
672$code =~ s/\`([^\`]*)\`/eval $1/gem;
673 501
674print $code; 502print $code;
675 503
diff --git a/src/lib/libcrypto/rc4/rc4.c b/src/lib/libcrypto/rc4/rc4.c
new file mode 100644
index 0000000000..c900b26055
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rc4.c
@@ -0,0 +1,193 @@
1/* crypto/rc4/rc4.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62#include <openssl/rc4.h>
63#include <openssl/evp.h>
64
65char *usage[]={
66"usage: rc4 args\n",
67"\n",
68" -in arg - input file - default stdin\n",
69" -out arg - output file - default stdout\n",
70" -key key - password\n",
71NULL
72};
73
74int main(int argc, char *argv[])
75 {
76 FILE *in=NULL,*out=NULL;
77 char *infile=NULL,*outfile=NULL,*keystr=NULL;
78 RC4_KEY key;
79 char buf[BUFSIZ];
80 int badops=0,i;
81 char **pp;
82 unsigned char md[MD5_DIGEST_LENGTH];
83
84 argc--;
85 argv++;
86 while (argc >= 1)
87 {
88 if (strcmp(*argv,"-in") == 0)
89 {
90 if (--argc < 1) goto bad;
91 infile= *(++argv);
92 }
93 else if (strcmp(*argv,"-out") == 0)
94 {
95 if (--argc < 1) goto bad;
96 outfile= *(++argv);
97 }
98 else if (strcmp(*argv,"-key") == 0)
99 {
100 if (--argc < 1) goto bad;
101 keystr= *(++argv);
102 }
103 else
104 {
105 fprintf(stderr,"unknown option %s\n",*argv);
106 badops=1;
107 break;
108 }
109 argc--;
110 argv++;
111 }
112
113 if (badops)
114 {
115bad:
116 for (pp=usage; (*pp != NULL); pp++)
117 fprintf(stderr,"%s",*pp);
118 exit(1);
119 }
120
121 if (infile == NULL)
122 in=stdin;
123 else
124 {
125 in=fopen(infile,"r");
126 if (in == NULL)
127 {
128 perror("open");
129 exit(1);
130 }
131
132 }
133 if (outfile == NULL)
134 out=stdout;
135 else
136 {
137 out=fopen(outfile,"w");
138 if (out == NULL)
139 {
140 perror("open");
141 exit(1);
142 }
143 }
144
145#ifdef OPENSSL_SYS_MSDOS
146 /* This should set the file to binary mode. */
147 {
148#include <fcntl.h>
149 setmode(fileno(in),O_BINARY);
150 setmode(fileno(out),O_BINARY);
151 }
152#endif
153
154 if (keystr == NULL)
155 { /* get key */
156 i=EVP_read_pw_string(buf,BUFSIZ,"Enter RC4 password:",0);
157 if (i != 0)
158 {
159 OPENSSL_cleanse(buf,BUFSIZ);
160 fprintf(stderr,"bad password read\n");
161 exit(1);
162 }
163 keystr=buf;
164 }
165
166 EVP_Digest((unsigned char *)keystr,strlen(keystr),md,NULL,EVP_md5(),NULL);
167 OPENSSL_cleanse(keystr,strlen(keystr));
168 RC4_set_key(&key,MD5_DIGEST_LENGTH,md);
169
170 for(;;)
171 {
172 i=fread(buf,1,BUFSIZ,in);
173 if (i == 0) break;
174 if (i < 0)
175 {
176 perror("read");
177 exit(1);
178 }
179 RC4(&key,(unsigned int)i,(unsigned char *)buf,
180 (unsigned char *)buf);
181 i=fwrite(buf,(unsigned int)i,1,out);
182 if (i != 1)
183 {
184 perror("write");
185 exit(1);
186 }
187 }
188 fclose(out);
189 fclose(in);
190 exit(0);
191 return(1);
192 }
193
diff --git a/src/lib/libcrypto/rc4/rc4.h b/src/lib/libcrypto/rc4/rc4.h
index 88ceb46bc5..29d1acccf5 100644
--- a/src/lib/libcrypto/rc4/rc4.h
+++ b/src/lib/libcrypto/rc4/rc4.h
@@ -79,7 +79,6 @@ typedef struct rc4_key_st
79 79
80const char *RC4_options(void); 80const char *RC4_options(void);
81void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); 81void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
82void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
83void RC4(RC4_KEY *key, size_t len, const unsigned char *indata, 82void RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
84 unsigned char *outdata); 83 unsigned char *outdata);
85 84
diff --git a/src/lib/libcrypto/rc4/rc4_skey.c b/src/lib/libcrypto/rc4/rc4_skey.c
index fda27636e7..b22c40b0bd 100644
--- a/src/lib/libcrypto/rc4/rc4_skey.c
+++ b/src/lib/libcrypto/rc4/rc4_skey.c
@@ -85,7 +85,7 @@ const char *RC4_options(void)
85 * Date: Wed, 14 Sep 1994 06:35:31 GMT 85 * Date: Wed, 14 Sep 1994 06:35:31 GMT
86 */ 86 */
87 87
88void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) 88void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
89 { 89 {
90 register RC4_INT tmp; 90 register RC4_INT tmp;
91 register int id1,id2; 91 register int id1,id2;
@@ -104,6 +104,40 @@ void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
104 d[(n)]=d[id2]; \ 104 d[(n)]=d[id2]; \
105 d[id2]=tmp; } 105 d[id2]=tmp; }
106 106
107#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
108# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
109 defined(__INTEL__) || \
110 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64)
111 if (sizeof(RC4_INT) > 1) {
112 /*
113 * Unlike all other x86 [and x86_64] implementations,
114 * Intel P4 core [including EM64T] was found to perform
115 * poorly with wider RC4_INT. Performance improvement
116 * for IA-32 hand-coded assembler turned out to be 2.8x
117 * if re-coded for RC4_CHAR! It's however inappropriate
118 * to just switch to RC4_CHAR for x86[_64], as non-P4
119 * implementations suffer from significant performance
120 * losses then, e.g. PIII exhibits >2x deterioration,
121 * and so does Opteron. In order to assure optimal
122 * all-round performance, let us [try to] detect P4 at
123 * run-time by checking upon HTT bit in CPU capability
124 * vector and set up compressed key schedule, which is
125 * recognized by correspondingly updated assembler
126 * module...
127 * <appro@fy.chalmers.se>
128 */
129 if (OPENSSL_ia32cap_P & (1<<28)) {
130 unsigned char *cp=(unsigned char *)d;
131
132 for (i=0;i<256;i++) cp[i]=i;
133 for (i=0;i<256;i++) SK_LOOP(cp,i);
134 /* mark schedule as compressed! */
135 d[256/sizeof(RC4_INT)]=-1;
136 return;
137 }
138 }
139# endif
140#endif
107 for (i=0; i < 256; i++) d[i]=i; 141 for (i=0; i < 256; i++) d[i]=i;
108 for (i=0; i < 256; i+=4) 142 for (i=0; i < 256; i+=4)
109 { 143 {
diff --git a/src/lib/libcrypto/rc4/rc4s.cpp b/src/lib/libcrypto/rc4/rc4s.cpp
new file mode 100644
index 0000000000..3814fde997
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rc4s.cpp
@@ -0,0 +1,73 @@
1//
2// gettsc.inl
3//
4// gives access to the Pentium's (secret) cycle counter
5//
6// This software was written by Leonard Janke (janke@unixg.ubc.ca)
7// in 1996-7 and is entered, by him, into the public domain.
8
9#if defined(__WATCOMC__)
10void GetTSC(unsigned long&);
11#pragma aux GetTSC = 0x0f 0x31 "mov [edi], eax" parm [edi] modify [edx eax];
12#elif defined(__GNUC__)
13inline
14void GetTSC(unsigned long& tsc)
15{
16 asm volatile(".byte 15, 49\n\t"
17 : "=eax" (tsc)
18 :
19 : "%edx", "%eax");
20}
21#elif defined(_MSC_VER)
22inline
23void GetTSC(unsigned long& tsc)
24{
25 unsigned long a;
26 __asm _emit 0fh
27 __asm _emit 31h
28 __asm mov a, eax;
29 tsc=a;
30}
31#endif
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <openssl/rc4.h>
36
37void main(int argc,char *argv[])
38 {
39 unsigned char buffer[1024];
40 RC4_KEY ctx;
41 unsigned long s1,s2,e1,e2;
42 unsigned char k[16];
43 unsigned long data[2];
44 unsigned char iv[8];
45 int i,num=64,numm;
46 int j=0;
47
48 if (argc >= 2)
49 num=atoi(argv[1]);
50
51 if (num == 0) num=256;
52 if (num > 1024-16) num=1024-16;
53 numm=num+8;
54
55 for (j=0; j<6; j++)
56 {
57 for (i=0; i<10; i++) /**/
58 {
59 RC4(&ctx,numm,buffer,buffer);
60 GetTSC(s1);
61 RC4(&ctx,numm,buffer,buffer);
62 GetTSC(e1);
63 GetTSC(s2);
64 RC4(&ctx,num,buffer,buffer);
65 GetTSC(e2);
66 RC4(&ctx,num,buffer,buffer);
67 }
68
69 printf("RC4 (%d bytes) %d %d (%d) - 8 bytes\n",num,
70 e1-s1,e2-s2,(e1-s1)-(e2-s2));
71 }
72 }
73
diff --git a/src/lib/libcrypto/rc4/rc4speed.c b/src/lib/libcrypto/rc4/rc4speed.c
new file mode 100644
index 0000000000..0ebd38123d
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rc4speed.c
@@ -0,0 +1,253 @@
1/* crypto/rc4/rc4speed.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/* 11-Sep-92 Andrew Daviel Support for Silicon Graphics IRIX added */
60/* 06-Apr-92 Luke Brennan Support for VMS and add extra signal calls */
61
62#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
63#define TIMES
64#endif
65
66#include <stdio.h>
67
68#include <openssl/e_os2.h>
69#include OPENSSL_UNISTD_IO
70OPENSSL_DECLARE_EXIT
71
72#ifndef OPENSSL_SYS_NETWARE
73#include <signal.h>
74#endif
75
76#ifndef _IRIX
77#include <time.h>
78#endif
79#ifdef TIMES
80#include <sys/types.h>
81#include <sys/times.h>
82#endif
83
84/* Depending on the VMS version, the tms structure is perhaps defined.
85 The __TMS macro will show if it was. If it wasn't defined, we should
86 undefine TIMES, since that tells the rest of the program how things
87 should be handled. -- Richard Levitte */
88#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
89#undef TIMES
90#endif
91
92#ifndef TIMES
93#include <sys/timeb.h>
94#endif
95
96#if defined(sun) || defined(__ultrix)
97#define _POSIX_SOURCE
98#include <limits.h>
99#include <sys/param.h>
100#endif
101
102#include <openssl/rc4.h>
103
104/* The following if from times(3) man page. It may need to be changed */
105#ifndef HZ
106#ifndef CLK_TCK
107#define HZ 100.0
108#else /* CLK_TCK */
109#define HZ ((double)CLK_TCK)
110#endif
111#endif
112
113#define BUFSIZE ((long)1024)
114long run=0;
115
116double Time_F(int s);
117#ifdef SIGALRM
118#if defined(__STDC__) || defined(sgi) || defined(_AIX)
119#define SIGRETTYPE void
120#else
121#define SIGRETTYPE int
122#endif
123
124SIGRETTYPE sig_done(int sig);
125SIGRETTYPE sig_done(int sig)
126 {
127 signal(SIGALRM,sig_done);
128 run=0;
129#ifdef LINT
130 sig=sig;
131#endif
132 }
133#endif
134
135#define START 0
136#define STOP 1
137
138double Time_F(int s)
139 {
140 double ret;
141#ifdef TIMES
142 static struct tms tstart,tend;
143
144 if (s == START)
145 {
146 times(&tstart);
147 return(0);
148 }
149 else
150 {
151 times(&tend);
152 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
153 return((ret == 0.0)?1e-6:ret);
154 }
155#else /* !times() */
156 static struct timeb tstart,tend;
157 long i;
158
159 if (s == START)
160 {
161 ftime(&tstart);
162 return(0);
163 }
164 else
165 {
166 ftime(&tend);
167 i=(long)tend.millitm-(long)tstart.millitm;
168 ret=((double)(tend.time-tstart.time))+((double)i)/1e3;
169 return((ret == 0.0)?1e-6:ret);
170 }
171#endif
172 }
173
174int main(int argc, char **argv)
175 {
176 long count;
177 static unsigned char buf[BUFSIZE];
178 static unsigned char key[] ={
179 0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,
180 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10,
181 };
182 RC4_KEY sch;
183 double a,b,c,d;
184#ifndef SIGALRM
185 long ca,cb,cc;
186#endif
187
188#ifndef TIMES
189 printf("To get the most accurate results, try to run this\n");
190 printf("program when this computer is idle.\n");
191#endif
192
193#ifndef SIGALRM
194 printf("First we calculate the approximate speed ...\n");
195 RC4_set_key(&sch,16,key);
196 count=10;
197 do {
198 long i;
199 unsigned long data[2];
200
201 count*=2;
202 Time_F(START);
203 for (i=count; i; i--)
204 RC4(&sch,8,buf,buf);
205 d=Time_F(STOP);
206 } while (d < 3.0);
207 ca=count/512;
208 cc=count*8/BUFSIZE+1;
209 printf("Doing RC4_set_key %ld times\n",ca);
210#define COND(d) (count != (d))
211#define COUNT(d) (d)
212#else
213#define COND(c) (run)
214#define COUNT(d) (count)
215 signal(SIGALRM,sig_done);
216 printf("Doing RC4_set_key for 10 seconds\n");
217 alarm(10);
218#endif
219
220 Time_F(START);
221 for (count=0,run=1; COND(ca); count+=4)
222 {
223 RC4_set_key(&sch,16,key);
224 RC4_set_key(&sch,16,key);
225 RC4_set_key(&sch,16,key);
226 RC4_set_key(&sch,16,key);
227 }
228 d=Time_F(STOP);
229 printf("%ld RC4_set_key's in %.2f seconds\n",count,d);
230 a=((double)COUNT(ca))/d;
231
232#ifdef SIGALRM
233 printf("Doing RC4 on %ld byte blocks for 10 seconds\n",BUFSIZE);
234 alarm(10);
235#else
236 printf("Doing RC4 %ld times on %ld byte blocks\n",cc,BUFSIZE);
237#endif
238 Time_F(START);
239 for (count=0,run=1; COND(cc); count++)
240 RC4(&sch,BUFSIZE,buf,buf);
241 d=Time_F(STOP);
242 printf("%ld RC4's of %ld byte blocks in %.2f second\n",
243 count,BUFSIZE,d);
244 c=((double)COUNT(cc)*BUFSIZE)/d;
245
246 printf("RC4 set_key per sec = %12.2f (%9.3fuS)\n",a,1.0e6/a);
247 printf("RC4 bytes per sec = %12.2f (%9.3fuS)\n",c,8.0e6/c);
248 exit(0);
249#if defined(LINT) || defined(OPENSSL_SYS_MSDOS)
250 return(0);
251#endif
252 }
253
diff --git a/src/lib/libcrypto/rc4/rc4test.c b/src/lib/libcrypto/rc4/rc4test.c
new file mode 100644
index 0000000000..633a79e758
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rc4test.c
@@ -0,0 +1,236 @@
1/* crypto/rc4/rc4test.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62
63#include "../e_os.h"
64
65#ifdef OPENSSL_NO_RC4
66int main(int argc, char *argv[])
67{
68 printf("No RC4 support\n");
69 return(0);
70}
71#else
72#include <openssl/rc4.h>
73#include <openssl/sha.h>
74
75static unsigned char keys[7][30]={
76 {8,0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef},
77 {8,0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef},
78 {8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
79 {4,0xef,0x01,0x23,0x45},
80 {8,0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef},
81 {4,0xef,0x01,0x23,0x45},
82 };
83
84static unsigned char data_len[7]={8,8,8,20,28,10};
85static unsigned char data[7][30]={
86 {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xff},
87 {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff},
88 {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff},
89 {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
90 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
91 0x00,0x00,0x00,0x00,0xff},
92 {0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF0,
93 0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF0,
94 0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF0,
95 0x12,0x34,0x56,0x78,0xff},
96 {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff},
97 {0},
98 };
99
100static unsigned char output[7][30]={
101 {0x75,0xb7,0x87,0x80,0x99,0xe0,0xc5,0x96,0x00},
102 {0x74,0x94,0xc2,0xe7,0x10,0x4b,0x08,0x79,0x00},
103 {0xde,0x18,0x89,0x41,0xa3,0x37,0x5d,0x3a,0x00},
104 {0xd6,0xa1,0x41,0xa7,0xec,0x3c,0x38,0xdf,
105 0xbd,0x61,0x5a,0x11,0x62,0xe1,0xc7,0xba,
106 0x36,0xb6,0x78,0x58,0x00},
107 {0x66,0xa0,0x94,0x9f,0x8a,0xf7,0xd6,0x89,
108 0x1f,0x7f,0x83,0x2b,0xa8,0x33,0xc0,0x0c,
109 0x89,0x2e,0xbe,0x30,0x14,0x3c,0xe2,0x87,
110 0x40,0x01,0x1e,0xcf,0x00},
111 {0xd6,0xa1,0x41,0xa7,0xec,0x3c,0x38,0xdf,0xbd,0x61,0x00},
112 {0},
113 };
114
115int main(int argc, char *argv[])
116 {
117 int i,err=0;
118 int j;
119 unsigned char *p;
120 RC4_KEY key;
121 unsigned char obuf[512];
122
123 for (i=0; i<6; i++)
124 {
125 RC4_set_key(&key,keys[i][0],&(keys[i][1]));
126 memset(obuf,0x00,sizeof(obuf));
127 RC4(&key,data_len[i],&(data[i][0]),obuf);
128 if (memcmp(obuf,output[i],data_len[i]+1) != 0)
129 {
130 printf("error calculating RC4\n");
131 printf("output:");
132 for (j=0; j<data_len[i]+1; j++)
133 printf(" %02x",obuf[j]);
134 printf("\n");
135 printf("expect:");
136 p= &(output[i][0]);
137 for (j=0; j<data_len[i]+1; j++)
138 printf(" %02x",*(p++));
139 printf("\n");
140 err++;
141 }
142 else
143 printf("test %d ok\n",i);
144 }
145 printf("test end processing ");
146 for (i=0; i<data_len[3]; i++)
147 {
148 RC4_set_key(&key,keys[3][0],&(keys[3][1]));
149 memset(obuf,0x00,sizeof(obuf));
150 RC4(&key,i,&(data[3][0]),obuf);
151 if ((memcmp(obuf,output[3],i) != 0) || (obuf[i] != 0))
152 {
153 printf("error in RC4 length processing\n");
154 printf("output:");
155 for (j=0; j<i+1; j++)
156 printf(" %02x",obuf[j]);
157 printf("\n");
158 printf("expect:");
159 p= &(output[3][0]);
160 for (j=0; j<i; j++)
161 printf(" %02x",*(p++));
162 printf(" 00\n");
163 err++;
164 }
165 else
166 {
167 printf(".");
168 fflush(stdout);
169 }
170 }
171 printf("done\n");
172 printf("test multi-call ");
173 for (i=0; i<data_len[3]; i++)
174 {
175 RC4_set_key(&key,keys[3][0],&(keys[3][1]));
176 memset(obuf,0x00,sizeof(obuf));
177 RC4(&key,i,&(data[3][0]),obuf);
178 RC4(&key,data_len[3]-i,&(data[3][i]),&(obuf[i]));
179 if (memcmp(obuf,output[3],data_len[3]+1) != 0)
180 {
181 printf("error in RC4 multi-call processing\n");
182 printf("output:");
183 for (j=0; j<data_len[3]+1; j++)
184 printf(" %02x",obuf[j]);
185 printf("\n");
186 printf("expect:");
187 p= &(output[3][0]);
188 for (j=0; j<data_len[3]+1; j++)
189 printf(" %02x",*(p++));
190 err++;
191 }
192 else
193 {
194 printf(".");
195 fflush(stdout);
196 }
197 }
198 printf("done\n");
199 printf("bulk test ");
200 { unsigned char buf[513];
201 SHA_CTX c;
202 unsigned char md[SHA_DIGEST_LENGTH];
203 static unsigned char expected[]={
204 0xa4,0x7b,0xcc,0x00,0x3d,0xd0,0xbd,0xe1,0xac,0x5f,
205 0x12,0x1e,0x45,0xbc,0xfb,0x1a,0xa1,0xf2,0x7f,0xc5 };
206
207 RC4_set_key(&key,keys[0][0],&(keys[3][1]));
208 memset(buf,'\0',sizeof(buf));
209 SHA1_Init(&c);
210 for (i=0;i<2571;i++) {
211 RC4(&key,sizeof(buf),buf,buf);
212 SHA1_Update(&c,buf,sizeof(buf));
213 }
214 SHA1_Final(md,&c);
215
216 if (memcmp(md,expected,sizeof(md))) {
217 printf("error in RC4 bulk test\n");
218 printf("output:");
219 for (j=0; j<(int)sizeof(md); j++)
220 printf(" %02x",md[j]);
221 printf("\n");
222 printf("expect:");
223 for (j=0; j<(int)sizeof(md); j++)
224 printf(" %02x",expected[j]);
225 printf("\n");
226 err++;
227 }
228 else printf("ok\n");
229 }
230#ifdef OPENSSL_SYS_NETWARE
231 if (err) printf("ERROR: %d\n", err);
232#endif
233 EXIT(err);
234 return(0);
235 }
236#endif
diff --git a/src/lib/libcrypto/rc4/rrc4.doc b/src/lib/libcrypto/rc4/rrc4.doc
new file mode 100644
index 0000000000..2f9a953c12
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rrc4.doc
@@ -0,0 +1,278 @@
1Newsgroups: sci.crypt,alt.security,comp.security.misc,alt.privacy
2Path: ghost.dsi.unimi.it!univ-lyon1.fr!jussieu.fr!zaphod.crihan.fr!warwick!clyde.open.ac.uk!strath-cs!bnr.co.uk!bt!pipex!howland.reston.ans.net!europa.eng.gtefsd.com!MathWorks.Com!yeshua.marcam.com!charnel.ecst.csuchico.edu!csusac!csus.edu!netcom.com!sterndark
3From: sterndark@netcom.com (David Sterndark)
4Subject: RC4 Algorithm revealed.
5Message-ID: <sternCvKL4B.Hyy@netcom.com>
6Sender: sterndark@netcom.com
7Organization: NETCOM On-line Communication Services (408 261-4700 guest)
8X-Newsreader: TIN [version 1.2 PL1]
9Date: Wed, 14 Sep 1994 06:35:31 GMT
10Lines: 263
11Xref: ghost.dsi.unimi.it sci.crypt:27332 alt.security:14732 comp.security.misc:11701 alt.privacy:16026
12
13I am shocked, shocked, I tell you, shocked, to discover
14that the cypherpunks have illegaly and criminally revealed
15a crucial RSA trade secret and harmed the security of
16America by reverse engineering the RC4 algorithm and
17publishing it to the world.
18
19On Saturday morning an anonymous cypherpunk wrote:
20
21
22 SUBJECT: RC4 Source Code
23
24
25 I've tested this. It is compatible with the RC4 object module
26 that comes in the various RSA toolkits.
27
28 /* rc4.h */
29 typedef struct rc4_key
30 {
31 unsigned char state[256];
32 unsigned char x;
33 unsigned char y;
34 } rc4_key;
35 void prepare_key(unsigned char *key_data_ptr,int key_data_len,
36 rc4_key *key);
37 void rc4(unsigned char *buffer_ptr,int buffer_len,rc4_key * key);
38
39
40 /*rc4.c */
41 #include "rc4.h"
42 static void swap_byte(unsigned char *a, unsigned char *b);
43 void prepare_key(unsigned char *key_data_ptr, int key_data_len,
44 rc4_key *key)
45 {
46 unsigned char swapByte;
47 unsigned char index1;
48 unsigned char index2;
49 unsigned char* state;
50 short counter;
51
52 state = &key->state[0];
53 for(counter = 0; counter < 256; counter++)
54 state[counter] = counter;
55 key->x = 0;
56 key->y = 0;
57 index1 = 0;
58 index2 = 0;
59 for(counter = 0; counter < 256; counter++)
60 {
61 index2 = (key_data_ptr[index1] + state[counter] +
62 index2) % 256;
63 swap_byte(&state[counter], &state[index2]);
64
65 index1 = (index1 + 1) % key_data_len;
66 }
67 }
68
69 void rc4(unsigned char *buffer_ptr, int buffer_len, rc4_key *key)
70 {
71 unsigned char x;
72 unsigned char y;
73 unsigned char* state;
74 unsigned char xorIndex;
75 short counter;
76
77 x = key->x;
78 y = key->y;
79
80 state = &key->state[0];
81 for(counter = 0; counter < buffer_len; counter ++)
82 {
83 x = (x + 1) % 256;
84 y = (state[x] + y) % 256;
85 swap_byte(&state[x], &state[y]);
86
87 xorIndex = (state[x] + state[y]) % 256;
88
89 buffer_ptr[counter] ^= state[xorIndex];
90 }
91 key->x = x;
92 key->y = y;
93 }
94
95 static void swap_byte(unsigned char *a, unsigned char *b)
96 {
97 unsigned char swapByte;
98
99 swapByte = *a;
100 *a = *b;
101 *b = swapByte;
102 }
103
104
105
106Another cypherpunk, this one not anonymous, tested the
107output from this algorithm against the output from
108official RC4 object code
109
110
111 Date: Tue, 13 Sep 94 18:37:56 PDT
112 From: ekr@eit.COM (Eric Rescorla)
113 Message-Id: <9409140137.AA17743@eitech.eit.com>
114 Subject: RC4 compatibility testing
115 Cc: cypherpunks@toad.com
116
117 One data point:
118
119 I can't say anything about the internals of RC4 versus the
120 algorithm that Bill Sommerfeld is rightly calling 'Alleged RC4',
121 since I don't know anything about RC4's internals.
122
123 However, I do have a (legitimately acquired) copy of BSAFE2 and
124 so I'm able to compare the output of this algorithm to the output
125 of genuine RC4 as found in BSAFE. I chose a set of test vectors
126 and ran them through both algorithms. The algorithms appear to
127 give identical results, at least with these key/plaintext pairs.
128
129 I note that this is the algorithm _without_ Hal Finney's
130 proposed modification
131
132 (see <199409130605.XAA24133@jobe.shell.portal.com>).
133
134 The vectors I used (together with the ciphertext they produce)
135 follow at the end of this message.
136
137 -Ekr
138
139 Disclaimer: This posting does not reflect the opinions of EIT.
140
141 --------------------results follow--------------
142 Test vector 0
143 Key: 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef
144 Input: 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef
145 0 Output: 0x75 0xb7 0x87 0x80 0x99 0xe0 0xc5 0x96
146
147 Test vector 1
148 Key: 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef
149 Input: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
150 0 Output: 0x74 0x94 0xc2 0xe7 0x10 0x4b 0x08 0x79
151
152 Test vector 2
153 Key: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
154 Input: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
155 0 Output: 0xde 0x18 0x89 0x41 0xa3 0x37 0x5d 0x3a
156
157 Test vector 3
158 Key: 0xef 0x01 0x23 0x45
159 Input: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
160 0 Output: 0xd6 0xa1 0x41 0xa7 0xec 0x3c 0x38 0xdf 0xbd 0x61
161
162 Test vector 4
163 Key: 0x01 0x23 0x45 0x67 0x89 0xab 0xcd 0xef
164 Input: 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
165 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
166 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
167 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
168 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
169 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
170 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
171 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
172 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
173 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
174 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
175 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
176 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
177 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
178 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
179 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
180 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
181 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
182 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
183 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
184 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
185 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
186 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
187 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
188 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
189 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
190 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
191 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
192 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
193 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
194 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
195 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
196 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
197 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
198 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
199 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
200 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
201 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
202 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
203 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
204 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
205 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
206 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
207 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
208 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
209 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
210 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
211 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
212 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
213 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
214 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01
215 0x01
216 0 Output: 0x75 0x95 0xc3 0xe6 0x11 0x4a 0x09 0x78 0x0c 0x4a 0xd4
217 0x52 0x33 0x8e 0x1f 0xfd 0x9a 0x1b 0xe9 0x49 0x8f
218 0x81 0x3d 0x76 0x53 0x34 0x49 0xb6 0x77 0x8d 0xca
219 0xd8 0xc7 0x8a 0x8d 0x2b 0xa9 0xac 0x66 0x08 0x5d
220 0x0e 0x53 0xd5 0x9c 0x26 0xc2 0xd1 0xc4 0x90 0xc1
221 0xeb 0xbe 0x0c 0xe6 0x6d 0x1b 0x6b 0x1b 0x13 0xb6
222 0xb9 0x19 0xb8 0x47 0xc2 0x5a 0x91 0x44 0x7a 0x95
223 0xe7 0x5e 0x4e 0xf1 0x67 0x79 0xcd 0xe8 0xbf 0x0a
224 0x95 0x85 0x0e 0x32 0xaf 0x96 0x89 0x44 0x4f 0xd3
225 0x77 0x10 0x8f 0x98 0xfd 0xcb 0xd4 0xe7 0x26 0x56
226 0x75 0x00 0x99 0x0b 0xcc 0x7e 0x0c 0xa3 0xc4 0xaa
227 0xa3 0x04 0xa3 0x87 0xd2 0x0f 0x3b 0x8f 0xbb 0xcd
228 0x42 0xa1 0xbd 0x31 0x1d 0x7a 0x43 0x03 0xdd 0xa5
229 0xab 0x07 0x88 0x96 0xae 0x80 0xc1 0x8b 0x0a 0xf6
230 0x6d 0xff 0x31 0x96 0x16 0xeb 0x78 0x4e 0x49 0x5a
231 0xd2 0xce 0x90 0xd7 0xf7 0x72 0xa8 0x17 0x47 0xb6
232 0x5f 0x62 0x09 0x3b 0x1e 0x0d 0xb9 0xe5 0xba 0x53
233 0x2f 0xaf 0xec 0x47 0x50 0x83 0x23 0xe6 0x71 0x32
234 0x7d 0xf9 0x44 0x44 0x32 0xcb 0x73 0x67 0xce 0xc8
235 0x2f 0x5d 0x44 0xc0 0xd0 0x0b 0x67 0xd6 0x50 0xa0
236 0x75 0xcd 0x4b 0x70 0xde 0xdd 0x77 0xeb 0x9b 0x10
237 0x23 0x1b 0x6b 0x5b 0x74 0x13 0x47 0x39 0x6d 0x62
238 0x89 0x74 0x21 0xd4 0x3d 0xf9 0xb4 0x2e 0x44 0x6e
239 0x35 0x8e 0x9c 0x11 0xa9 0xb2 0x18 0x4e 0xcb 0xef
240 0x0c 0xd8 0xe7 0xa8 0x77 0xef 0x96 0x8f 0x13 0x90
241 0xec 0x9b 0x3d 0x35 0xa5 0x58 0x5c 0xb0 0x09 0x29
242 0x0e 0x2f 0xcd 0xe7 0xb5 0xec 0x66 0xd9 0x08 0x4b
243 0xe4 0x40 0x55 0xa6 0x19 0xd9 0xdd 0x7f 0xc3 0x16
244 0x6f 0x94 0x87 0xf7 0xcb 0x27 0x29 0x12 0x42 0x64
245 0x45 0x99 0x85 0x14 0xc1 0x5d 0x53 0xa1 0x8c 0x86
246 0x4c 0xe3 0xa2 0xb7 0x55 0x57 0x93 0x98 0x81 0x26
247 0x52 0x0e 0xac 0xf2 0xe3 0x06 0x6e 0x23 0x0c 0x91
248 0xbe 0xe4 0xdd 0x53 0x04 0xf5 0xfd 0x04 0x05 0xb3
249 0x5b 0xd9 0x9c 0x73 0x13 0x5d 0x3d 0x9b 0xc3 0x35
250 0xee 0x04 0x9e 0xf6 0x9b 0x38 0x67 0xbf 0x2d 0x7b
251 0xd1 0xea 0xa5 0x95 0xd8 0xbf 0xc0 0x06 0x6f 0xf8
252 0xd3 0x15 0x09 0xeb 0x0c 0x6c 0xaa 0x00 0x6c 0x80
253 0x7a 0x62 0x3e 0xf8 0x4c 0x3d 0x33 0xc1 0x95 0xd2
254 0x3e 0xe3 0x20 0xc4 0x0d 0xe0 0x55 0x81 0x57 0xc8
255 0x22 0xd4 0xb8 0xc5 0x69 0xd8 0x49 0xae 0xd5 0x9d
256 0x4e 0x0f 0xd7 0xf3 0x79 0x58 0x6b 0x4b 0x7f 0xf6
257 0x84 0xed 0x6a 0x18 0x9f 0x74 0x86 0xd4 0x9b 0x9c
258 0x4b 0xad 0x9b 0xa2 0x4b 0x96 0xab 0xf9 0x24 0x37
259 0x2c 0x8a 0x8f 0xff 0xb1 0x0d 0x55 0x35 0x49 0x00
260 0xa7 0x7a 0x3d 0xb5 0xf2 0x05 0xe1 0xb9 0x9f 0xcd
261 0x86 0x60 0x86 0x3a 0x15 0x9a 0xd4 0xab 0xe4 0x0f
262 0xa4 0x89 0x34 0x16 0x3d 0xdd 0xe5 0x42 0xa6 0x58
263 0x55 0x40 0xfd 0x68 0x3c 0xbf 0xd8 0xc0 0x0f 0x12
264 0x12 0x9a 0x28 0x4d 0xea 0xcc 0x4c 0xde 0xfe 0x58
265 0xbe 0x71 0x37 0x54 0x1c 0x04 0x71 0x26 0xc8 0xd4
266 0x9e 0x27 0x55 0xab 0x18 0x1a 0xb7 0xe9 0x40 0xb0
267 0xc0
268
269
270
271--
272 ---------------------------------------------------------------------
273We have the right to defend ourselves and our
274property, because of the kind of animals that we James A. Donald
275are. True law derives from this right, not from
276the arbitrary power of the omnipotent state. jamesd@netcom.com
277
278