summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/rc4
diff options
context:
space:
mode:
authordjm <>2006-06-27 05:05:42 +0000
committerdjm <>2006-06-27 05:05:42 +0000
commit3f764f48d2626a43b6eeef7652c28303269d1204 (patch)
tree764d513589e09d2d10dbe70039b5f3bf58a36803 /src/lib/libcrypto/rc4
parent0d2f07cb82812dd6f9e33c493104f4c24e5b13a3 (diff)
parentf6198d4d0ab97685dc56be2d48715ed39fcc74b9 (diff)
downloadopenbsd-3f764f48d2626a43b6eeef7652c28303269d1204.tar.gz
openbsd-3f764f48d2626a43b6eeef7652c28303269d1204.tar.bz2
openbsd-3f764f48d2626a43b6eeef7652c28303269d1204.zip
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rwxr-xr-xsrc/lib/libcrypto/rc4/asm/rc4-x86_64.pl150
1 files changed, 150 insertions, 0 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
new file mode 100755
index 0000000000..b628daca70
--- /dev/null
+++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -0,0 +1,150 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Unlike 0.9.7f this code expects RC4_CHAR back in config line! See
10# commentary section in corresponding script in development branch
11# for background information about this option carousel. For those
12# who don't have energy to figure out these gory details, here is
13# basis in form of performance matrix relative to the original
14# 0.9.7e C code-base:
15#
16# 0.9.7e 0.9.7f this
17# AMD64 1x 3.3x 2.4x
18# EM64T 1x 0.8x 1.5x
19#
20# In other words idea is to trade -25% AMD64 performance to compensate
21# for deterioration and gain +90% on EM64T core. Development branch
22# maintains best performance for either target, i.e. 3.3x for AMD64
23# and 1.5x for EM64T.
24
25$output=shift;
26
27open STDOUT,">$output" || die "can't open $output: $!";
28
29$dat="%rdi"; # arg1
30$len="%rsi"; # arg2
31$inp="%rdx"; # arg3
32$out="%rcx"; # arg4
33
34@XX=("%r8","%r10");
35@TX=("%r9","%r11");
36$YY="%r12";
37$TY="%r13";
38
39$code=<<___;;
40.text
41
42.globl RC4
43.type RC4,\@function
44.align 16
45RC4: or $len,$len
46 jne .Lentry
47 repret
48.Lentry:
49 push %r12
50 push %r13
51
52 add \$2,$dat
53 movzb -2($dat),$XX[0]#d
54 movzb -1($dat),$YY#d
55
56 add \$1,$XX[0]#b
57 movzb ($dat,$XX[0]),$TX[0]#d
58 test \$-8,$len
59 jz .Lcloop1
60 push %rbx
61.align 16 # incidentally aligned already
62.Lcloop8:
63 mov ($inp),%eax
64 mov 4($inp),%ebx
65___
66# unroll 2x4-wise, because 64-bit rotates kill Intel P4...
67for ($i=0;$i<4;$i++) {
68$code.=<<___;
69 add $TX[0]#b,$YY#b
70 lea 1($XX[0]),$XX[1]
71 movzb ($dat,$YY),$TY#d
72 movzb $XX[1]#b,$XX[1]#d
73 movzb ($dat,$XX[1]),$TX[1]#d
74 movb $TX[0]#b,($dat,$YY)
75 cmp $XX[1],$YY
76 movb $TY#b,($dat,$XX[0])
77 jne .Lcmov$i # Intel cmov is sloooow...
78 mov $TX[0],$TX[1]
79.Lcmov$i:
80 add $TX[0]#b,$TY#b
81 xor ($dat,$TY),%al
82 ror \$8,%eax
83___
84push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
85}
86for ($i=4;$i<8;$i++) {
87$code.=<<___;
88 add $TX[0]#b,$YY#b
89 lea 1($XX[0]),$XX[1]
90 movzb ($dat,$YY),$TY#d
91 movzb $XX[1]#b,$XX[1]#d
92 movzb ($dat,$XX[1]),$TX[1]#d
93 movb $TX[0]#b,($dat,$YY)
94 cmp $XX[1],$YY
95 movb $TY#b,($dat,$XX[0])
96 jne .Lcmov$i # Intel cmov is sloooow...
97 mov $TX[0],$TX[1]
98.Lcmov$i:
99 add $TX[0]#b,$TY#b
100 xor ($dat,$TY),%bl
101 ror \$8,%ebx
102___
103push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
104}
105$code.=<<___;
106 lea -8($len),$len
107 mov %eax,($out)
108 lea 8($inp),$inp
109 mov %ebx,4($out)
110 lea 8($out),$out
111
112 test \$-8,$len
113 jnz .Lcloop8
114 pop %rbx
115 cmp \$0,$len
116 jne .Lcloop1
117.Lexit:
118 sub \$1,$XX[0]#b
119 movb $XX[0]#b,-2($dat)
120 movb $YY#b,-1($dat)
121
122 pop %r13
123 pop %r12
124 repret
125
126.align 16
127.Lcloop1:
128 add $TX[0]#b,$YY#b
129 movzb ($dat,$YY),$TY#d
130 movb $TX[0]#b,($dat,$YY)
131 movb $TY#b,($dat,$XX[0])
132 add $TX[0]#b,$TY#b
133 add \$1,$XX[0]#b
134 movzb ($dat,$TY),$TY#d
135 movzb ($dat,$XX[0]),$TX[0]#d
136 xorb ($inp),$TY#b
137 lea 1($inp),$inp
138 movb $TY#b,($out)
139 lea 1($out),$out
140 sub \$1,$len
141 jnz .Lcloop1
142 jmp .Lexit
143.size RC4,.-RC4
144___
145
146$code =~ s/#([bwd])/$1/gm;
147
148$code =~ s/repret/.byte\t0xF3,0xC3/gm;
149
150print $code;