diff options
Diffstat (limited to 'src/lib/libcrypto/rc4')
-rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-md5-x86_64.pl | 515 |
1 files changed, 0 insertions, 515 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-md5-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-md5-x86_64.pl deleted file mode 100644 index e5e8aa08a1..0000000000 --- a/src/lib/libcrypto/rc4/asm/rc4-md5-x86_64.pl +++ /dev/null | |||
@@ -1,515 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # June 2011 | ||
11 | # | ||
12 | # This is RC4+MD5 "stitch" implementation. The idea, as spelled in | ||
13 | # http://download.intel.com/design/intarch/papers/323686.pdf, is that | ||
14 | # since both algorithms exhibit instruction-level parallelism, ILP, | ||
15 | # below theoretical maximum, interleaving them would allow to utilize | ||
16 | # processor resources better and achieve better performance. RC4 | ||
17 | # instruction sequence is virtually identical to rc4-x86_64.pl, which | ||
18 | # is heavily based on submission by Maxim Perminov, Maxim Locktyukhin | ||
19 | # and Jim Guilford of Intel. MD5 is fresh implementation aiming to | ||
20 | # minimize register usage, which was used as "main thread" with RC4 | ||
21 | # weaved into it, one RC4 round per one MD5 round. In addition to the | ||
22 | # stiched subroutine the script can generate standalone replacement | ||
23 | # md5_block_asm_data_order and RC4. Below are performance numbers in | ||
24 | # cycles per processed byte, less is better, for these the standalone | ||
25 | # subroutines, sum of them, and stitched one: | ||
26 | # | ||
27 | # RC4 MD5 RC4+MD5 stitch gain | ||
28 | # Opteron 6.5(*) 5.4 11.9 7.0 +70%(*) | ||
29 | # Core2 6.5 5.8 12.3 7.7 +60% | ||
30 | # Westmere 4.3 5.2 9.5 7.0 +36% | ||
31 | # Sandy Bridge 4.2 5.5 9.7 6.8 +43% | ||
32 | # Atom 9.3 6.5 15.8 11.1 +42% | ||
33 | # | ||
34 | # (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement | ||
35 | # is +53%... | ||
36 | |||
37 | my ($rc4,$md5)=(1,1); # what to generate? | ||
38 | my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), | ||
39 | # but its result is discarded. Idea here is | ||
40 | # to be able to use 'openssl speed rc4' for | ||
41 | # benchmarking the stitched subroutine... | ||
42 | |||
43 | my $flavour = shift; | ||
44 | my $output = shift; | ||
45 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | ||
46 | |||
47 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; | ||
48 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | ||
49 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | ||
50 | die "can't locate x86_64-xlate.pl"; | ||
51 | |||
52 | open OUT,"| \"$^X\" $xlate $flavour $output"; | ||
53 | *STDOUT=*OUT; | ||
54 | |||
55 | my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); | ||
56 | |||
57 | if ($rc4 && !$md5) { | ||
58 | ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx"); | ||
59 | $func="RC4"; $nargs=4; | ||
60 | } elsif ($md5 && !$rc4) { | ||
61 | ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx"); | ||
62 | $func="md5_block_asm_data_order"; $nargs=3; | ||
63 | } else { | ||
64 | ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); | ||
65 | $func="rc4_md5_enc"; $nargs=6; | ||
66 | # void rc4_md5_enc( | ||
67 | # RC4_KEY *key, # | ||
68 | # const void *in0, # RC4 input | ||
69 | # void *out, # RC4 output | ||
70 | # MD5_CTX *ctx, # | ||
71 | # const void *inp, # MD5 input | ||
72 | # size_t len); # number of 64-byte blocks | ||
73 | } | ||
74 | |||
75 | my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, | ||
76 | 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, | ||
77 | 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, | ||
78 | 0x6b901122,0xfd987193,0xa679438e,0x49b40821, | ||
79 | |||
80 | 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, | ||
81 | 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, | ||
82 | 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, | ||
83 | 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, | ||
84 | |||
85 | 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, | ||
86 | 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, | ||
87 | 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, | ||
88 | 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, | ||
89 | |||
90 | 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, | ||
91 | 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, | ||
92 | 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, | ||
93 | 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 ); | ||
94 | |||
95 | my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers | ||
96 | my $tmp="%r12d"; | ||
97 | |||
98 | my @XX=("%rbp","%rsi"); # RC4 registers | ||
99 | my @TX=("%rax","%rbx"); | ||
100 | my $YY="%rcx"; | ||
101 | my $TY="%rdx"; | ||
102 | |||
103 | my $MOD=32; # 16, 32 or 64 | ||
104 | |||
105 | $code.=<<___; | ||
106 | .text | ||
107 | .align 16 | ||
108 | |||
109 | .globl $func | ||
110 | .type $func,\@function,$nargs | ||
111 | $func: | ||
112 | _CET_ENDBR | ||
113 | cmp \$0,$len | ||
114 | je .Labort | ||
115 | push %rbx | ||
116 | push %rbp | ||
117 | push %r12 | ||
118 | push %r13 | ||
119 | push %r14 | ||
120 | push %r15 | ||
121 | sub \$40,%rsp | ||
122 | .Lbody: | ||
123 | ___ | ||
124 | if ($rc4) { | ||
125 | $code.=<<___; | ||
126 | $D#md5# mov $ctx,%r11 # reassign arguments | ||
127 | mov $len,%r12 | ||
128 | mov $in0,%r13 | ||
129 | mov $out,%r14 | ||
130 | $D#md5# mov $inp,%r15 | ||
131 | ___ | ||
132 | $ctx="%r11" if ($md5); # reassign arguments | ||
133 | $len="%r12"; | ||
134 | $in0="%r13"; | ||
135 | $out="%r14"; | ||
136 | $inp="%r15" if ($md5); | ||
137 | $inp=$in0 if (!$md5); | ||
138 | $code.=<<___; | ||
139 | xor $XX[0],$XX[0] | ||
140 | xor $YY,$YY | ||
141 | |||
142 | lea 8($dat),$dat | ||
143 | mov -8($dat),$XX[0]#b | ||
144 | mov -4($dat),$YY#b | ||
145 | |||
146 | inc $XX[0]#b | ||
147 | sub $in0,$out | ||
148 | movl ($dat,$XX[0],4),$TX[0]#d | ||
149 | ___ | ||
150 | $code.=<<___ if (!$md5); | ||
151 | xor $TX[1],$TX[1] | ||
152 | test \$-128,$len | ||
153 | jz .Loop1 | ||
154 | sub $XX[0],$TX[1] | ||
155 | and \$`$MOD-1`,$TX[1] | ||
156 | jz .Loop${MOD}_is_hot | ||
157 | sub $TX[1],$len | ||
158 | .Loop${MOD}_warmup: | ||
159 | add $TX[0]#b,$YY#b | ||
160 | movl ($dat,$YY,4),$TY#d | ||
161 | movl $TX[0]#d,($dat,$YY,4) | ||
162 | movl $TY#d,($dat,$XX[0],4) | ||
163 | add $TY#b,$TX[0]#b | ||
164 | inc $XX[0]#b | ||
165 | movl ($dat,$TX[0],4),$TY#d | ||
166 | movl ($dat,$XX[0],4),$TX[0]#d | ||
167 | xorb ($in0),$TY#b | ||
168 | movb $TY#b,($out,$in0) | ||
169 | lea 1($in0),$in0 | ||
170 | dec $TX[1] | ||
171 | jnz .Loop${MOD}_warmup | ||
172 | |||
173 | mov $YY,$TX[1] | ||
174 | xor $YY,$YY | ||
175 | mov $TX[1]#b,$YY#b | ||
176 | |||
177 | .Loop${MOD}_is_hot: | ||
178 | mov $len,32(%rsp) # save original $len | ||
179 | shr \$6,$len # number of 64-byte blocks | ||
180 | ___ | ||
181 | if ($D && !$md5) { # stitch in dummy MD5 | ||
182 | $md5=1; | ||
183 | $ctx="%r11"; | ||
184 | $inp="%r15"; | ||
185 | $code.=<<___; | ||
186 | mov %rsp,$ctx | ||
187 | mov $in0,$inp | ||
188 | ___ | ||
189 | } | ||
190 | } | ||
191 | $code.=<<___; | ||
192 | #rc4# add $TX[0]#b,$YY#b | ||
193 | #rc4# lea ($dat,$XX[0],4),$XX[1] | ||
194 | shl \$6,$len | ||
195 | add $inp,$len # pointer to the end of input | ||
196 | mov $len,16(%rsp) | ||
197 | |||
198 | #md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX | ||
199 | #md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX | ||
200 | #md5# mov 1*4($ctx),$V[1] | ||
201 | #md5# mov 2*4($ctx),$V[2] | ||
202 | #md5# mov 3*4($ctx),$V[3] | ||
203 | jmp .Loop | ||
204 | |||
205 | .align 16 | ||
206 | .Loop: | ||
207 | #md5# mov $V[0],0*4(%rsp) # put aside current hash value | ||
208 | #md5# mov $V[1],1*4(%rsp) | ||
209 | #md5# mov $V[2],2*4(%rsp) | ||
210 | #md5# mov $V[3],$tmp # forward reference | ||
211 | #md5# mov $V[3],3*4(%rsp) | ||
212 | ___ | ||
213 | |||
214 | sub R0 { | ||
215 | my ($i,$a,$b,$c,$d)=@_; | ||
216 | my @rot0=(7,12,17,22); | ||
217 | my $j=$i%16; | ||
218 | my $k=$i%$MOD; | ||
219 | my $xmm="%xmm".($j&1); | ||
220 | $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15); | ||
221 | $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); | ||
222 | $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); | ||
223 | $code.=<<___; | ||
224 | #rc4# movl ($dat,$YY,4),$TY#d | ||
225 | #md5# xor $c,$tmp | ||
226 | #rc4# movl $TX[0]#d,($dat,$YY,4) | ||
227 | #md5# and $b,$tmp | ||
228 | #md5# add 4*`$j`($inp),$a | ||
229 | #rc4# add $TY#b,$TX[0]#b | ||
230 | #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d | ||
231 | #md5# add \$$K[$i],$a | ||
232 | #md5# xor $d,$tmp | ||
233 | #rc4# movz $TX[0]#b,$TX[0]#d | ||
234 | #rc4# movl $TY#d,4*$k($XX[1]) | ||
235 | #md5# add $tmp,$a | ||
236 | #rc4# add $TX[1]#b,$YY#b | ||
237 | #md5# rol \$$rot0[$j%4],$a | ||
238 | #md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference | ||
239 | #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n | ||
240 | #md5# add $b,$a | ||
241 | ___ | ||
242 | $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); | ||
243 | mov $YY,$XX[1] | ||
244 | xor $YY,$YY # keyword to partial register | ||
245 | mov $XX[1]#b,$YY#b | ||
246 | lea ($dat,$XX[0],4),$XX[1] | ||
247 | ___ | ||
248 | $code.=<<___ if ($rc4 && $j==15); | ||
249 | psllq \$8,%xmm1 | ||
250 | pxor %xmm0,%xmm2 | ||
251 | pxor %xmm1,%xmm2 | ||
252 | ___ | ||
253 | } | ||
254 | sub R1 { | ||
255 | my ($i,$a,$b,$c,$d)=@_; | ||
256 | my @rot1=(5,9,14,20); | ||
257 | my $j=$i%16; | ||
258 | my $k=$i%$MOD; | ||
259 | my $xmm="%xmm".($j&1); | ||
260 | $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15); | ||
261 | $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); | ||
262 | $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); | ||
263 | $code.=<<___; | ||
264 | #rc4# movl ($dat,$YY,4),$TY#d | ||
265 | #md5# xor $b,$tmp | ||
266 | #rc4# movl $TX[0]#d,($dat,$YY,4) | ||
267 | #md5# and $d,$tmp | ||
268 | #md5# add 4*`((1+5*$j)%16)`($inp),$a | ||
269 | #rc4# add $TY#b,$TX[0]#b | ||
270 | #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d | ||
271 | #md5# add \$$K[$i],$a | ||
272 | #md5# xor $c,$tmp | ||
273 | #rc4# movz $TX[0]#b,$TX[0]#d | ||
274 | #rc4# movl $TY#d,4*$k($XX[1]) | ||
275 | #md5# add $tmp,$a | ||
276 | #rc4# add $TX[1]#b,$YY#b | ||
277 | #md5# rol \$$rot1[$j%4],$a | ||
278 | #md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference | ||
279 | #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n | ||
280 | #md5# add $b,$a | ||
281 | ___ | ||
282 | $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); | ||
283 | mov $YY,$XX[1] | ||
284 | xor $YY,$YY # keyword to partial register | ||
285 | mov $XX[1]#b,$YY#b | ||
286 | lea ($dat,$XX[0],4),$XX[1] | ||
287 | ___ | ||
288 | $code.=<<___ if ($rc4 && $j==15); | ||
289 | psllq \$8,%xmm1 | ||
290 | pxor %xmm0,%xmm3 | ||
291 | pxor %xmm1,%xmm3 | ||
292 | ___ | ||
293 | } | ||
294 | sub R2 { | ||
295 | my ($i,$a,$b,$c,$d)=@_; | ||
296 | my @rot2=(4,11,16,23); | ||
297 | my $j=$i%16; | ||
298 | my $k=$i%$MOD; | ||
299 | my $xmm="%xmm".($j&1); | ||
300 | $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15); | ||
301 | $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); | ||
302 | $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); | ||
303 | $code.=<<___; | ||
304 | #rc4# movl ($dat,$YY,4),$TY#d | ||
305 | #md5# xor $c,$tmp | ||
306 | #rc4# movl $TX[0]#d,($dat,$YY,4) | ||
307 | #md5# xor $b,$tmp | ||
308 | #md5# add 4*`((5+3*$j)%16)`($inp),$a | ||
309 | #rc4# add $TY#b,$TX[0]#b | ||
310 | #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d | ||
311 | #md5# add \$$K[$i],$a | ||
312 | #rc4# movz $TX[0]#b,$TX[0]#d | ||
313 | #md5# add $tmp,$a | ||
314 | #rc4# movl $TY#d,4*$k($XX[1]) | ||
315 | #rc4# add $TX[1]#b,$YY#b | ||
316 | #md5# rol \$$rot2[$j%4],$a | ||
317 | #md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference | ||
318 | #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n | ||
319 | #md5# add $b,$a | ||
320 | ___ | ||
321 | $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); | ||
322 | mov $YY,$XX[1] | ||
323 | xor $YY,$YY # keyword to partial register | ||
324 | mov $XX[1]#b,$YY#b | ||
325 | lea ($dat,$XX[0],4),$XX[1] | ||
326 | ___ | ||
327 | $code.=<<___ if ($rc4 && $j==15); | ||
328 | psllq \$8,%xmm1 | ||
329 | pxor %xmm0,%xmm4 | ||
330 | pxor %xmm1,%xmm4 | ||
331 | ___ | ||
332 | } | ||
333 | sub R3 { | ||
334 | my ($i,$a,$b,$c,$d)=@_; | ||
335 | my @rot3=(6,10,15,21); | ||
336 | my $j=$i%16; | ||
337 | my $k=$i%$MOD; | ||
338 | my $xmm="%xmm".($j&1); | ||
339 | $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15); | ||
340 | $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); | ||
341 | $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); | ||
342 | $code.=<<___; | ||
343 | #rc4# movl ($dat,$YY,4),$TY#d | ||
344 | #md5# xor $d,$tmp | ||
345 | #rc4# movl $TX[0]#d,($dat,$YY,4) | ||
346 | #md5# or $b,$tmp | ||
347 | #md5# add 4*`((7*$j)%16)`($inp),$a | ||
348 | #rc4# add $TY#b,$TX[0]#b | ||
349 | #rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d | ||
350 | #md5# add \$$K[$i],$a | ||
351 | #rc4# movz $TX[0]#b,$TX[0]#d | ||
352 | #md5# xor $c,$tmp | ||
353 | #rc4# movl $TY#d,4*$k($XX[1]) | ||
354 | #md5# add $tmp,$a | ||
355 | #rc4# add $TX[1]#b,$YY#b | ||
356 | #md5# rol \$$rot3[$j%4],$a | ||
357 | #md5# mov \$-1,$tmp # forward reference | ||
358 | #rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n | ||
359 | #md5# add $b,$a | ||
360 | ___ | ||
361 | $code.=<<___ if ($rc4 && $j==15); | ||
362 | mov $XX[0],$XX[1] | ||
363 | xor $XX[0],$XX[0] # keyword to partial register | ||
364 | mov $XX[1]#b,$XX[0]#b | ||
365 | mov $YY,$XX[1] | ||
366 | xor $YY,$YY # keyword to partial register | ||
367 | mov $XX[1]#b,$YY#b | ||
368 | lea ($dat,$XX[0],4),$XX[1] | ||
369 | psllq \$8,%xmm1 | ||
370 | pxor %xmm0,%xmm5 | ||
371 | pxor %xmm1,%xmm5 | ||
372 | ___ | ||
373 | } | ||
374 | |||
375 | my $i=0; | ||
376 | for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } | ||
377 | for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } | ||
378 | for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } | ||
379 | for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } | ||
380 | |||
381 | $code.=<<___; | ||
382 | #md5# add 0*4(%rsp),$V[0] # accumulate hash value | ||
383 | #md5# add 1*4(%rsp),$V[1] | ||
384 | #md5# add 2*4(%rsp),$V[2] | ||
385 | #md5# add 3*4(%rsp),$V[3] | ||
386 | |||
387 | #rc4# movdqu %xmm2,($out,$in0) # write RC4 output | ||
388 | #rc4# movdqu %xmm3,16($out,$in0) | ||
389 | #rc4# movdqu %xmm4,32($out,$in0) | ||
390 | #rc4# movdqu %xmm5,48($out,$in0) | ||
391 | #md5# lea 64($inp),$inp | ||
392 | #rc4# lea 64($in0),$in0 | ||
393 | cmp 16(%rsp),$inp # are we done? | ||
394 | jb .Loop | ||
395 | |||
396 | #md5# mov 24(%rsp),$len # restore pointer to MD5_CTX | ||
397 | #rc4# sub $TX[0]#b,$YY#b # correct $YY | ||
398 | #md5# mov $V[0],0*4($len) # write MD5_CTX | ||
399 | #md5# mov $V[1],1*4($len) | ||
400 | #md5# mov $V[2],2*4($len) | ||
401 | #md5# mov $V[3],3*4($len) | ||
402 | ___ | ||
403 | $code.=<<___ if ($rc4 && (!$md5 || $D)); | ||
404 | mov 32(%rsp),$len # restore original $len | ||
405 | and \$63,$len # remaining bytes | ||
406 | jnz .Loop1 | ||
407 | jmp .Ldone | ||
408 | |||
409 | .align 16 | ||
410 | .Loop1: | ||
411 | add $TX[0]#b,$YY#b | ||
412 | movl ($dat,$YY,4),$TY#d | ||
413 | movl $TX[0]#d,($dat,$YY,4) | ||
414 | movl $TY#d,($dat,$XX[0],4) | ||
415 | add $TY#b,$TX[0]#b | ||
416 | inc $XX[0]#b | ||
417 | movl ($dat,$TX[0],4),$TY#d | ||
418 | movl ($dat,$XX[0],4),$TX[0]#d | ||
419 | xorb ($in0),$TY#b | ||
420 | movb $TY#b,($out,$in0) | ||
421 | lea 1($in0),$in0 | ||
422 | dec $len | ||
423 | jnz .Loop1 | ||
424 | |||
425 | .Ldone: | ||
426 | ___ | ||
427 | $code.=<<___; | ||
428 | #rc4# sub \$1,$XX[0]#b | ||
429 | #rc4# movl $XX[0]#d,-8($dat) | ||
430 | #rc4# movl $YY#d,-4($dat) | ||
431 | |||
432 | mov 40(%rsp),%r15 | ||
433 | mov 48(%rsp),%r14 | ||
434 | mov 56(%rsp),%r13 | ||
435 | mov 64(%rsp),%r12 | ||
436 | mov 72(%rsp),%rbp | ||
437 | mov 80(%rsp),%rbx | ||
438 | lea 88(%rsp),%rsp | ||
439 | .Lepilogue: | ||
440 | .Labort: | ||
441 | ret | ||
442 | .size $func,.-$func | ||
443 | ___ | ||
444 | |||
445 | if ($rc4 && $D) { # sole purpose of this section is to provide | ||
446 | # option to use the generated module as drop-in | ||
447 | # replacement for rc4-x86_64.pl for debugging | ||
448 | # and testing purposes... | ||
449 | my ($idx,$ido)=("%r8","%r9"); | ||
450 | my ($dat,$len,$inp)=("%rdi","%rsi","%rdx"); | ||
451 | |||
452 | $code.=<<___; | ||
453 | .globl RC4_set_key | ||
454 | .type RC4_set_key,\@function,3 | ||
455 | .align 16 | ||
456 | RC4_set_key: | ||
457 | _CET_ENDBR | ||
458 | lea 8($dat),$dat | ||
459 | lea ($inp,$len),$inp | ||
460 | neg $len | ||
461 | mov $len,%rcx | ||
462 | xor %eax,%eax | ||
463 | xor $ido,$ido | ||
464 | xor %r10,%r10 | ||
465 | xor %r11,%r11 | ||
466 | jmp .Lw1stloop | ||
467 | |||
468 | .align 16 | ||
469 | .Lw1stloop: | ||
470 | mov %eax,($dat,%rax,4) | ||
471 | add \$1,%al | ||
472 | jnc .Lw1stloop | ||
473 | |||
474 | xor $ido,$ido | ||
475 | xor $idx,$idx | ||
476 | .align 16 | ||
477 | .Lw2ndloop: | ||
478 | mov ($dat,$ido,4),%r10d | ||
479 | add ($inp,$len,1),$idx#b | ||
480 | add %r10b,$idx#b | ||
481 | add \$1,$len | ||
482 | mov ($dat,$idx,4),%r11d | ||
483 | cmovz %rcx,$len | ||
484 | mov %r10d,($dat,$idx,4) | ||
485 | mov %r11d,($dat,$ido,4) | ||
486 | add \$1,$ido#b | ||
487 | jnc .Lw2ndloop | ||
488 | |||
489 | xor %eax,%eax | ||
490 | mov %eax,-8($dat) | ||
491 | mov %eax,-4($dat) | ||
492 | ret | ||
493 | .size RC4_set_key,.-RC4_set_key | ||
494 | ___ | ||
495 | } | ||
496 | |||
497 | sub reg_part { | ||
498 | my ($reg,$conv)=@_; | ||
499 | if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } | ||
500 | elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } | ||
501 | elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } | ||
502 | elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } | ||
503 | return $reg; | ||
504 | } | ||
505 | |||
506 | $code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; | ||
507 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | ||
508 | $code =~ s/pinsrw\s+\$0,/movd /gm; | ||
509 | |||
510 | $code =~ s/#md5#//gm if ($md5); | ||
511 | $code =~ s/#rc4#//gm if ($rc4); | ||
512 | |||
513 | print $code; | ||
514 | |||
515 | close STDOUT; | ||