summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/rc4
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rw-r--r--src/lib/libcrypto/rc4/asm/rc4-586.pl114
-rw-r--r--src/lib/libcrypto/rc4/rc4.h7
-rw-r--r--src/lib/libcrypto/rc4/rc4_enc.c4
-rw-r--r--src/lib/libcrypto/rc4/rc4_locl.h1
-rw-r--r--src/lib/libcrypto/rc4/rc4_skey.c8
5 files changed, 104 insertions, 30 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl
index 7ef889e5a1..d6e98f0811 100644
--- a/src/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -1,16 +1,37 @@
1#!/usr/local/bin/perl 1#!/usr/local/bin/perl
2 2
3# define for pentium pro friendly version 3# At some point it became apparent that the original SSLeay RC4
4# assembler implementation performs suboptimaly on latest IA-32
5# microarchitectures. After re-tuning performance has changed as
6# following:
7#
8# Pentium +0%
9# Pentium III +17%
10# AMD +52%(*)
11# P4 +180%(**)
12#
13# (*) This number is actually a trade-off:-) It's possible to
14# achieve +72%, but at the cost of -48% off PIII performance.
15# In other words code performing further 13% faster on AMD
16# would perform almost 2 times slower on Intel PIII...
17# For reference! This code delivers ~80% of rc4-amd64.pl
18# performance on the same Opteron machine.
19# (**) This number requires compressed key schedule set up by
20# RC4_set_key and therefore doesn't apply to 0.9.7 [option for
21# compressed key schedule is implemented in 0.9.8 and later,
22# see commentary section in rc4_skey.c for further details].
23#
24# <appro@fy.chalmers.se>
4 25
5push(@INC,"perlasm","../../perlasm"); 26push(@INC,"perlasm","../../perlasm");
6require "x86asm.pl"; 27require "x86asm.pl";
7 28
8&asm_init($ARGV[0],"rc4-586.pl"); 29&asm_init($ARGV[0],"rc4-586.pl");
9 30
10$tx="eax"; 31$x="eax";
11$ty="ebx"; 32$y="ebx";
12$x="ecx"; 33$tx="ecx";
13$y="edx"; 34$ty="edx";
14$in="esi"; 35$in="esi";
15$out="edi"; 36$out="edi";
16$d="ebp"; 37$d="ebp";
@@ -31,7 +52,7 @@ sub RC4_loop
31 { 52 {
32 &mov($ty, &swtmp(2)); 53 &mov($ty, &swtmp(2));
33 &cmp($ty, $in); 54 &cmp($ty, $in);
34 &jle(&label("finished")); 55 &jbe(&label("finished"));
35 &inc($in); 56 &inc($in);
36 } 57 }
37 else 58 else
@@ -39,27 +60,23 @@ sub RC4_loop
39 &add($ty, 8); 60 &add($ty, 8);
40 &inc($in); 61 &inc($in);
41 &cmp($ty, $in); 62 &cmp($ty, $in);
42 &jl(&label("finished")); 63 &jb(&label("finished"));
43 &mov(&swtmp(2), $ty); 64 &mov(&swtmp(2), $ty);
44 } 65 }
45 } 66 }
46 # Moved out 67 # Moved out
47 # &mov( $tx, &DWP(0,$d,$x,4)) if $p < 0; 68 # &mov( $tx, &DWP(0,$d,$x,4)) if $p < 0;
48 69
49 &add( $y, $tx); 70 &add( &LB($y), &LB($tx));
50 &and( $y, 0xff);
51 &inc( $x); # NEXT ROUND
52 &mov( $ty, &DWP(0,$d,$y,4)); 71 &mov( $ty, &DWP(0,$d,$y,4));
53 # XXX 72 # XXX
54 &mov( &DWP(-4,$d,$x,4),$ty); # AGI 73 &mov( &DWP(0,$d,$x,4),$ty);
55 &add( $ty, $tx); 74 &add( $ty, $tx);
56 &and( $x, 0xff); # NEXT ROUND
57 &and( $ty, 0xff);
58 &mov( &DWP(0,$d,$y,4),$tx); 75 &mov( &DWP(0,$d,$y,4),$tx);
59 &nop(); 76 &and( $ty, 0xff);
60 &mov( $ty, &DWP(0,$d,$ty,4)); 77 &inc( &LB($x)); # NEXT ROUND
61 &mov( $tx, &DWP(0,$d,$x,4)) if $p < 1; # NEXT ROUND 78 &mov( $tx, &DWP(0,$d,$x,4)) if $p < 1; # NEXT ROUND
62 # XXX 79 &mov( $ty, &DWP(0,$d,$ty,4));
63 80
64 if (!$char) 81 if (!$char)
65 { 82 {
@@ -88,35 +105,47 @@ sub RC4
88 105
89 &function_begin_B($name,""); 106 &function_begin_B($name,"");
90 107
108 &mov($ty,&wparam(1)); # len
109 &cmp($ty,0);
110 &jne(&label("proceed"));
111 &ret();
112 &set_label("proceed");
113
91 &comment(""); 114 &comment("");
92 115
93 &push("ebp"); 116 &push("ebp");
94 &push("ebx"); 117 &push("ebx");
95 &mov( $d, &wparam(0)); # key
96 &mov( $ty, &wparam(1)); # num
97 &push("esi"); 118 &push("esi");
98 &push("edi"); 119 &xor( $x, $x); # avoid partial register stalls
120 &push("edi");
121 &xor( $y, $y); # avoid partial register stalls
122 &mov( $d, &wparam(0)); # key
123 &mov( $in, &wparam(2));
99 124
100 &mov( $x, &DWP(0,$d,"",1)); 125 &movb( &LB($x), &BP(0,$d,"",1));
101 &mov( $y, &DWP(4,$d,"",1)); 126 &movb( &LB($y), &BP(4,$d,"",1));
102 127
103 &mov( $in, &wparam(2)); 128 &mov( $out, &wparam(3));
104 &inc( $x); 129 &inc( &LB($x));
105 130
106 &stack_push(3); # 3 temp variables 131 &stack_push(3); # 3 temp variables
107 &add( $d, 8); 132 &add( $d, 8);
108 &and( $x, 0xff); 133
134 # detect compressed schedule, see commentary section in rc4_skey.c...
135 # in 0.9.7 context ~50 bytes below RC4_CHAR label remain redundant,
136 # as compressed key schedule is set up in 0.9.8 and later.
137 &cmp(&DWP(256,$d),-1);
138 &je(&label("RC4_CHAR"));
109 139
110 &lea( $ty, &DWP(-8,$ty,$in)); 140 &lea( $ty, &DWP(-8,$ty,$in));
111 141
112 # check for 0 length input 142 # check for 0 length input
113 143
114 &mov( $out, &wparam(3));
115 &mov( &swtmp(2), $ty); # this is now address to exit at 144 &mov( &swtmp(2), $ty); # this is now address to exit at
116 &mov( $tx, &DWP(0,$d,$x,4)); 145 &mov( $tx, &DWP(0,$d,$x,4));
117 146
118 &cmp( $ty, $in); 147 &cmp( $ty, $in);
119 &jl( &label("end")); # less than 8 bytes 148 &jb( &label("end")); # less than 8 bytes
120 149
121 &set_label("start"); 150 &set_label("start");
122 151
@@ -148,7 +177,7 @@ sub RC4
148 &mov( &DWP(-4,$out,"",0), $tx); 177 &mov( &DWP(-4,$out,"",0), $tx);
149 &mov( $tx, &DWP(0,$d,$x,4)); 178 &mov( $tx, &DWP(0,$d,$x,4));
150 &cmp($in, $ty); 179 &cmp($in, $ty);
151 &jle(&label("start")); 180 &jbe(&label("start"));
152 181
153 &set_label("end"); 182 &set_label("end");
154 183
@@ -162,10 +191,37 @@ sub RC4
162 &RC4_loop(5,0,1); 191 &RC4_loop(5,0,1);
163 &RC4_loop(6,1,1); 192 &RC4_loop(6,1,1);
164 193
194 &jmp(&label("finished"));
195
196 &align(16);
197 # this is essentially Intel P4 specific codepath, see rc4_skey.c,
198 # and is engaged in 0.9.8 and later context...
199 &set_label("RC4_CHAR");
200
201 &lea ($ty,&DWP(0,$in,$ty));
202 &mov (&swtmp(2),$ty);
203
204 # strangely enough unrolled loop performs over 20% slower...
205 &set_label("RC4_CHAR_loop");
206 &movz ($tx,&BP(0,$d,$x));
207 &add (&LB($y),&LB($tx));
208 &movz ($ty,&BP(0,$d,$y));
209 &movb (&BP(0,$d,$y),&LB($tx));
210 &movb (&BP(0,$d,$x),&LB($ty));
211 &add (&LB($ty),&LB($tx));
212 &movz ($ty,&BP(0,$d,$ty));
213 &xorb (&LB($ty),&BP(0,$in));
214 &movb (&BP(0,$out),&LB($ty));
215 &inc (&LB($x));
216 &inc ($in);
217 &inc ($out);
218 &cmp ($in,&swtmp(2));
219 &jb (&label("RC4_CHAR_loop"));
220
165 &set_label("finished"); 221 &set_label("finished");
166 &dec( $x); 222 &dec( $x);
167 &stack_pop(3); 223 &stack_pop(3);
168 &mov( &DWP(-4,$d,"",0),$y); 224 &movb( &BP(-4,$d,"",0),&LB($y));
169 &movb( &BP(-8,$d,"",0),&LB($x)); 225 &movb( &BP(-8,$d,"",0),&LB($x));
170 226
171 &function_end($name); 227 &function_end($name);
diff --git a/src/lib/libcrypto/rc4/rc4.h b/src/lib/libcrypto/rc4/rc4.h
index 8722091f2e..dd90d9fde0 100644
--- a/src/lib/libcrypto/rc4/rc4.h
+++ b/src/lib/libcrypto/rc4/rc4.h
@@ -73,10 +73,17 @@ typedef struct rc4_key_st
73 { 73 {
74 RC4_INT x,y; 74 RC4_INT x,y;
75 RC4_INT data[256]; 75 RC4_INT data[256];
76#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
77 /* see crypto/rc4/asm/rc4-ia64.S for further details... */
78 RC4_INT pad[512-256-2];
79#endif
76 } RC4_KEY; 80 } RC4_KEY;
77 81
78 82
79const char *RC4_options(void); 83const char *RC4_options(void);
84#ifdef OPENSSL_FIPS
85void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
86#endif
80void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); 87void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
81void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, 88void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata,
82 unsigned char *outdata); 89 unsigned char *outdata);
diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c
index d5f18a3a70..81a97ea3b7 100644
--- a/src/lib/libcrypto/rc4/rc4_enc.c
+++ b/src/lib/libcrypto/rc4/rc4_enc.c
@@ -77,6 +77,10 @@ void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata,
77 x=key->x; 77 x=key->x;
78 y=key->y; 78 y=key->y;
79 d=key->data; 79 d=key->data;
80#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
81 /* see crypto/rc4/asm/rc4-ia64.S for further details... */
82 d=(RC4_INT *)(((size_t)(d+255))&~(sizeof(key->data)-1));
83#endif
80 84
81#if defined(RC4_CHUNK) 85#if defined(RC4_CHUNK)
82 /* 86 /*
diff --git a/src/lib/libcrypto/rc4/rc4_locl.h b/src/lib/libcrypto/rc4/rc4_locl.h
index 3bb80b6ce9..c712e1632e 100644
--- a/src/lib/libcrypto/rc4/rc4_locl.h
+++ b/src/lib/libcrypto/rc4/rc4_locl.h
@@ -1,4 +1,5 @@
1#ifndef HEADER_RC4_LOCL_H 1#ifndef HEADER_RC4_LOCL_H
2#define HEADER_RC4_LOCL_H 2#define HEADER_RC4_LOCL_H
3#include <openssl/opensslconf.h> 3#include <openssl/opensslconf.h>
4#include <cryptlib.h>
4#endif 5#endif
diff --git a/src/lib/libcrypto/rc4/rc4_skey.c b/src/lib/libcrypto/rc4/rc4_skey.c
index bb10c1ebe2..07234f061a 100644
--- a/src/lib/libcrypto/rc4/rc4_skey.c
+++ b/src/lib/libcrypto/rc4/rc4_skey.c
@@ -57,6 +57,7 @@
57 */ 57 */
58 58
59#include <openssl/rc4.h> 59#include <openssl/rc4.h>
60#include <openssl/crypto.h>
60#include "rc4_locl.h" 61#include "rc4_locl.h"
61#include <openssl/opensslv.h> 62#include <openssl/opensslv.h>
62 63
@@ -85,7 +86,7 @@ const char *RC4_options(void)
85 * Date: Wed, 14 Sep 1994 06:35:31 GMT 86 * Date: Wed, 14 Sep 1994 06:35:31 GMT
86 */ 87 */
87 88
88void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) 89FIPS_NON_FIPS_VCIPHER_Init(RC4)
89 { 90 {
90 register RC4_INT tmp; 91 register RC4_INT tmp;
91 register int id1,id2; 92 register int id1,id2;
@@ -93,6 +94,11 @@ void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
93 unsigned int i; 94 unsigned int i;
94 95
95 d= &(key->data[0]); 96 d= &(key->data[0]);
97#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
98 /* see crypto/rc4/asm/rc4-ia64.S for further details... */
99 d=(RC4_INT *)(((size_t)(d+255))&~(sizeof(key->data)-1));
100#endif
101
96 for (i=0; i<256; i++) 102 for (i=0; i<256; i++)
97 d[i]=i; 103 d[i]=i;
98 key->x = 0; 104 key->x = 0;