diff options
author | djm <> | 2006-06-27 05:05:42 +0000 |
---|---|---|
committer | djm <> | 2006-06-27 05:05:42 +0000 |
commit | 3f764f48d2626a43b6eeef7652c28303269d1204 (patch) | |
tree | 764d513589e09d2d10dbe70039b5f3bf58a36803 /src/lib/libcrypto/rc4 | |
parent | 0d2f07cb82812dd6f9e33c493104f4c24e5b13a3 (diff) | |
parent | f6198d4d0ab97685dc56be2d48715ed39fcc74b9 (diff) | |
download | openbsd-3f764f48d2626a43b6eeef7652c28303269d1204.tar.gz openbsd-3f764f48d2626a43b6eeef7652c28303269d1204.tar.bz2 openbsd-3f764f48d2626a43b6eeef7652c28303269d1204.zip |
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rwxr-xr-x | src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl new file mode 100755 index 0000000000..b628daca70 --- /dev/null +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | |||
@@ -0,0 +1,150 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. Rights for redistribution and usage in source and binary | ||
6 | # forms are granted according to the OpenSSL license. | ||
7 | # ==================================================================== | ||
8 | # | ||
9 | # Unlike 0.9.7f this code expects RC4_CHAR back in config line! See | ||
10 | # commentary section in corresponding script in development branch | ||
11 | # for background information about this option carousel. For those | ||
12 | # who don't have energy to figure out these gory details, here is | ||
13 | # basis in form of performance matrix relative to the original | ||
14 | # 0.9.7e C code-base: | ||
15 | # | ||
16 | # 0.9.7e 0.9.7f this | ||
17 | # AMD64 1x 3.3x 2.4x | ||
18 | # EM64T 1x 0.8x 1.5x | ||
19 | # | ||
20 | # In other words idea is to trade -25% AMD64 performance to compensate | ||
21 | # for deterioration and gain +90% on EM64T core. Development branch | ||
22 | # maintains best performance for either target, i.e. 3.3x for AMD64 | ||
23 | # and 1.5x for EM64T. | ||
24 | |||
25 | $output=shift; | ||
26 | |||
27 | open STDOUT,">$output" || die "can't open $output: $!"; | ||
28 | |||
29 | $dat="%rdi"; # arg1 | ||
30 | $len="%rsi"; # arg2 | ||
31 | $inp="%rdx"; # arg3 | ||
32 | $out="%rcx"; # arg4 | ||
33 | |||
34 | @XX=("%r8","%r10"); | ||
35 | @TX=("%r9","%r11"); | ||
36 | $YY="%r12"; | ||
37 | $TY="%r13"; | ||
38 | |||
39 | $code=<<___;; | ||
40 | .text | ||
41 | |||
42 | .globl RC4 | ||
43 | .type RC4,\@function | ||
44 | .align 16 | ||
45 | RC4: or $len,$len | ||
46 | jne .Lentry | ||
47 | repret | ||
48 | .Lentry: | ||
49 | push %r12 | ||
50 | push %r13 | ||
51 | |||
52 | add \$2,$dat | ||
53 | movzb -2($dat),$XX[0]#d | ||
54 | movzb -1($dat),$YY#d | ||
55 | |||
56 | add \$1,$XX[0]#b | ||
57 | movzb ($dat,$XX[0]),$TX[0]#d | ||
58 | test \$-8,$len | ||
59 | jz .Lcloop1 | ||
60 | push %rbx | ||
61 | .align 16 # incidentally aligned already | ||
62 | .Lcloop8: | ||
63 | mov ($inp),%eax | ||
64 | mov 4($inp),%ebx | ||
65 | ___ | ||
66 | # unroll 2x4-wise, because 64-bit rotates kill Intel P4... | ||
67 | for ($i=0;$i<4;$i++) { | ||
68 | $code.=<<___; | ||
69 | add $TX[0]#b,$YY#b | ||
70 | lea 1($XX[0]),$XX[1] | ||
71 | movzb ($dat,$YY),$TY#d | ||
72 | movzb $XX[1]#b,$XX[1]#d | ||
73 | movzb ($dat,$XX[1]),$TX[1]#d | ||
74 | movb $TX[0]#b,($dat,$YY) | ||
75 | cmp $XX[1],$YY | ||
76 | movb $TY#b,($dat,$XX[0]) | ||
77 | jne .Lcmov$i # Intel cmov is sloooow... | ||
78 | mov $TX[0],$TX[1] | ||
79 | .Lcmov$i: | ||
80 | add $TX[0]#b,$TY#b | ||
81 | xor ($dat,$TY),%al | ||
82 | ror \$8,%eax | ||
83 | ___ | ||
84 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
85 | } | ||
86 | for ($i=4;$i<8;$i++) { | ||
87 | $code.=<<___; | ||
88 | add $TX[0]#b,$YY#b | ||
89 | lea 1($XX[0]),$XX[1] | ||
90 | movzb ($dat,$YY),$TY#d | ||
91 | movzb $XX[1]#b,$XX[1]#d | ||
92 | movzb ($dat,$XX[1]),$TX[1]#d | ||
93 | movb $TX[0]#b,($dat,$YY) | ||
94 | cmp $XX[1],$YY | ||
95 | movb $TY#b,($dat,$XX[0]) | ||
96 | jne .Lcmov$i # Intel cmov is sloooow... | ||
97 | mov $TX[0],$TX[1] | ||
98 | .Lcmov$i: | ||
99 | add $TX[0]#b,$TY#b | ||
100 | xor ($dat,$TY),%bl | ||
101 | ror \$8,%ebx | ||
102 | ___ | ||
103 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | ||
104 | } | ||
105 | $code.=<<___; | ||
106 | lea -8($len),$len | ||
107 | mov %eax,($out) | ||
108 | lea 8($inp),$inp | ||
109 | mov %ebx,4($out) | ||
110 | lea 8($out),$out | ||
111 | |||
112 | test \$-8,$len | ||
113 | jnz .Lcloop8 | ||
114 | pop %rbx | ||
115 | cmp \$0,$len | ||
116 | jne .Lcloop1 | ||
117 | .Lexit: | ||
118 | sub \$1,$XX[0]#b | ||
119 | movb $XX[0]#b,-2($dat) | ||
120 | movb $YY#b,-1($dat) | ||
121 | |||
122 | pop %r13 | ||
123 | pop %r12 | ||
124 | repret | ||
125 | |||
126 | .align 16 | ||
127 | .Lcloop1: | ||
128 | add $TX[0]#b,$YY#b | ||
129 | movzb ($dat,$YY),$TY#d | ||
130 | movb $TX[0]#b,($dat,$YY) | ||
131 | movb $TY#b,($dat,$XX[0]) | ||
132 | add $TX[0]#b,$TY#b | ||
133 | add \$1,$XX[0]#b | ||
134 | movzb ($dat,$TY),$TY#d | ||
135 | movzb ($dat,$XX[0]),$TX[0]#d | ||
136 | xorb ($inp),$TY#b | ||
137 | lea 1($inp),$inp | ||
138 | movb $TY#b,($out) | ||
139 | lea 1($out),$out | ||
140 | sub \$1,$len | ||
141 | jnz .Lcloop1 | ||
142 | jmp .Lexit | ||
143 | .size RC4,.-RC4 | ||
144 | ___ | ||
145 | |||
146 | $code =~ s/#([bwd])/$1/gm; | ||
147 | |||
148 | $code =~ s/repret/.byte\t0xF3,0xC3/gm; | ||
149 | |||
150 | print $code; | ||