diff options
author | djm <> | 2006-06-27 05:05:42 +0000 |
---|---|---|
committer | djm <> | 2006-06-27 05:05:42 +0000 |
commit | f6198d4d0ab97685dc56be2d48715ed39fcc74b9 (patch) | |
tree | 6e28360095ed5ba5ef1760a419c43eef4ef6946b /src/lib/libcrypto/rc4 | |
parent | 0ff0f9d99c40072de315264b0f602bd639e7f662 (diff) | |
download | openbsd-f6198d4d0ab97685dc56be2d48715ed39fcc74b9.tar.gz openbsd-f6198d4d0ab97685dc56be2d48715ed39fcc74b9.tar.bz2 openbsd-f6198d4d0ab97685dc56be2d48715ed39fcc74b9.zip |
import of openssl-0.9.7j
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rwxr-xr-x | src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | 150 | ||||
-rw-r--r-- | src/lib/libcrypto/rc4/rc4.h | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/rc4/rc4_enc.c | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/rc4/rc4_skey.c | 5 |
4 files changed, 151 insertions, 12 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl new file mode 100755 index 0000000000..b628daca70 --- /dev/null +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | |||
@@ -0,0 +1,150 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. Rights for redistribution and usage in source and binary | ||
6 | # forms are granted according to the OpenSSL license. | ||
7 | # ==================================================================== | ||
8 | # | ||
9 | # Unlike 0.9.7f this code expects RC4_CHAR back in config line! See | ||
10 | # commentary section in corresponding script in development branch | ||
11 | # for background information about this option carousel. For those | ||
12 | # who don't have energy to figure out these gory details, here is | ||
13 | # basis in form of performance matrix relative to the original | ||
14 | # 0.9.7e C code-base: | ||
15 | # | ||
16 | # 0.9.7e 0.9.7f this | ||
17 | # AMD64 1x 3.3x 2.4x | ||
18 | # EM64T 1x 0.8x 1.5x | ||
19 | # | ||
20 | # In other words idea is to trade -25% AMD64 performance to compensate | ||
21 | # for deterioration and gain +90% on EM64T core. Development branch | ||
22 | # maintains best performance for either target, i.e. 3.3x for AMD64 | ||
23 | # and 1.5x for EM64T. | ||
24 | |||
25 | $output=shift; | ||
26 | |||
27 | open STDOUT,">$output" || die "can't open $output: $!"; | ||
28 | |||
29 | $dat="%rdi"; # arg1 | ||
30 | $len="%rsi"; # arg2 | ||
31 | $inp="%rdx"; # arg3 | ||
32 | $out="%rcx"; # arg4 | ||
33 | |||
34 | @XX=("%r8","%r10"); | ||
35 | @TX=("%r9","%r11"); | ||
36 | $YY="%r12"; | ||
37 | $TY="%r13"; | ||
38 | |||
39 | $code=<<___;; | ||
40 | .text | ||
41 | |||
42 | .globl RC4 | ||
43 | .type RC4,\@function | ||
44 | .align 16 | ||
45 | RC4: or $len,$len | ||
46 | jne .Lentry | ||
47 | repret | ||
48 | .Lentry: | ||
49 | push %r12 | ||
50 | push %r13 | ||
51 | |||
52 | add \$2,$dat | ||
53 | movzb -2($dat),$XX[0]#d | ||
54 | movzb -1($dat),$YY#d | ||
55 | |||
56 | add \$1,$XX[0]#b | ||
57 | movzb ($dat,$XX[0]),$TX[0]#d | ||
58 | test \$-8,$len | ||
59 | jz .Lcloop1 | ||
60 | push %rbx | ||
61 | .align 16 # incidentally aligned already | ||
62 | .Lcloop8: | ||
63 | mov ($inp),%eax | ||
64 | mov 4($inp),%ebx | ||
65 | ___ | ||
66 | # unroll 2x4-wise, because 64-bit rotates kill Intel P4... | ||
67 | for ($i=0;$i<4;$i++) { | ||
68 | $code.=<<___; | ||
69 | add $TX[0]#b,$YY#b | ||
70 | lea 1($XX[0]),$XX[1] | ||
71 | movzb ($dat,$YY),$TY#d | ||
72 | movzb $XX[1]#b,$XX[1]#d | ||
73 | movzb ($dat,$XX[1]),$TX[1]#d | ||
74 | movb $TX[0]#b,($dat,$YY) | ||
75 | cmp $XX[1],$YY | ||
76 | movb $TY#b,($dat,$XX[0]) | ||
77 | jne .Lcmov$i # Intel cmov is sloooow... | ||
78 | mov $TX[0],$TX[1] | ||
79 | .Lcmov$i: | ||
80 | add $TX[0]#b,$TY#b | ||
81 | xor ($dat,$TY),%al | ||
82 | ror \$8,%eax | ||
83 | ___ | ||
84 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
85 | } | ||
86 | for ($i=4;$i<8;$i++) { | ||
87 | $code.=<<___; | ||
88 | add $TX[0]#b,$YY#b | ||
89 | lea 1($XX[0]),$XX[1] | ||
90 | movzb ($dat,$YY),$TY#d | ||
91 | movzb $XX[1]#b,$XX[1]#d | ||
92 | movzb ($dat,$XX[1]),$TX[1]#d | ||
93 | movb $TX[0]#b,($dat,$YY) | ||
94 | cmp $XX[1],$YY | ||
95 | movb $TY#b,($dat,$XX[0]) | ||
96 | jne .Lcmov$i # Intel cmov is sloooow... | ||
97 | mov $TX[0],$TX[1] | ||
98 | .Lcmov$i: | ||
99 | add $TX[0]#b,$TY#b | ||
100 | xor ($dat,$TY),%bl | ||
101 | ror \$8,%ebx | ||
102 | ___ | ||
103 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
104 | } | ||
105 | $code.=<<___; | ||
106 | lea -8($len),$len | ||
107 | mov %eax,($out) | ||
108 | lea 8($inp),$inp | ||
109 | mov %ebx,4($out) | ||
110 | lea 8($out),$out | ||
111 | |||
112 | test \$-8,$len | ||
113 | jnz .Lcloop8 | ||
114 | pop %rbx | ||
115 | cmp \$0,$len | ||
116 | jne .Lcloop1 | ||
117 | .Lexit: | ||
118 | sub \$1,$XX[0]#b | ||
119 | movb $XX[0]#b,-2($dat) | ||
120 | movb $YY#b,-1($dat) | ||
121 | |||
122 | pop %r13 | ||
123 | pop %r12 | ||
124 | repret | ||
125 | |||
126 | .align 16 | ||
127 | .Lcloop1: | ||
128 | add $TX[0]#b,$YY#b | ||
129 | movzb ($dat,$YY),$TY#d | ||
130 | movb $TX[0]#b,($dat,$YY) | ||
131 | movb $TY#b,($dat,$XX[0]) | ||
132 | add $TX[0]#b,$TY#b | ||
133 | add \$1,$XX[0]#b | ||
134 | movzb ($dat,$TY),$TY#d | ||
135 | movzb ($dat,$XX[0]),$TX[0]#d | ||
136 | xorb ($inp),$TY#b | ||
137 | lea 1($inp),$inp | ||
138 | movb $TY#b,($out) | ||
139 | lea 1($out),$out | ||
140 | sub \$1,$len | ||
141 | jnz .Lcloop1 | ||
142 | jmp .Lexit | ||
143 | .size RC4,.-RC4 | ||
144 | ___ | ||
145 | |||
146 | $code =~ s/#([bwd])/$1/gm; | ||
147 | |||
148 | $code =~ s/repret/.byte\t0xF3,0xC3/gm; | ||
149 | |||
150 | print $code; | ||
diff --git a/src/lib/libcrypto/rc4/rc4.h b/src/lib/libcrypto/rc4/rc4.h index dd90d9fde0..ae0cea75b8 100644 --- a/src/lib/libcrypto/rc4/rc4.h +++ b/src/lib/libcrypto/rc4/rc4.h | |||
@@ -73,10 +73,6 @@ typedef struct rc4_key_st | |||
73 | { | 73 | { |
74 | RC4_INT x,y; | 74 | RC4_INT x,y; |
75 | RC4_INT data[256]; | 75 | RC4_INT data[256]; |
76 | #if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) | ||
77 | /* see crypto/rc4/asm/rc4-ia64.S for further details... */ | ||
78 | RC4_INT pad[512-256-2]; | ||
79 | #endif | ||
80 | } RC4_KEY; | 76 | } RC4_KEY; |
81 | 77 | ||
82 | 78 | ||
diff --git a/src/lib/libcrypto/rc4/rc4_enc.c b/src/lib/libcrypto/rc4/rc4_enc.c index 81a97ea3b7..d5f18a3a70 100644 --- a/src/lib/libcrypto/rc4/rc4_enc.c +++ b/src/lib/libcrypto/rc4/rc4_enc.c | |||
@@ -77,10 +77,6 @@ void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata, | |||
77 | x=key->x; | 77 | x=key->x; |
78 | y=key->y; | 78 | y=key->y; |
79 | d=key->data; | 79 | d=key->data; |
80 | #if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) | ||
81 | /* see crypto/rc4/asm/rc4-ia64.S for further details... */ | ||
82 | d=(RC4_INT *)(((size_t)(d+255))&~(sizeof(key->data)-1)); | ||
83 | #endif | ||
84 | 80 | ||
85 | #if defined(RC4_CHUNK) | 81 | #if defined(RC4_CHUNK) |
86 | /* | 82 | /* |
diff --git a/src/lib/libcrypto/rc4/rc4_skey.c b/src/lib/libcrypto/rc4/rc4_skey.c index 07234f061a..60510624fd 100644 --- a/src/lib/libcrypto/rc4/rc4_skey.c +++ b/src/lib/libcrypto/rc4/rc4_skey.c | |||
@@ -58,6 +58,7 @@ | |||
58 | 58 | ||
59 | #include <openssl/rc4.h> | 59 | #include <openssl/rc4.h> |
60 | #include <openssl/crypto.h> | 60 | #include <openssl/crypto.h> |
61 | #include <openssl/fips.h> | ||
61 | #include "rc4_locl.h" | 62 | #include "rc4_locl.h" |
62 | #include <openssl/opensslv.h> | 63 | #include <openssl/opensslv.h> |
63 | 64 | ||
@@ -94,10 +95,6 @@ FIPS_NON_FIPS_VCIPHER_Init(RC4) | |||
94 | unsigned int i; | 95 | unsigned int i; |
95 | 96 | ||
96 | d= &(key->data[0]); | 97 | d= &(key->data[0]); |
97 | #if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) | ||
98 | /* see crypto/rc4/asm/rc4-ia64.S for further details... */ | ||
99 | d=(RC4_INT *)(((size_t)(d+255))&~(sizeof(key->data)-1)); | ||
100 | #endif | ||
101 | 98 | ||
102 | for (i=0; i<256; i++) | 99 | for (i=0; i<256; i++) |
103 | d[i]=i; | 100 | d[i]=i; |