summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/md5/asm/md5-x86_64.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/md5/asm/md5-x86_64.pl')
-rwxr-xr-xsrc/lib/libcrypto/md5/asm/md5-x86_64.pl156
1 files changed, 140 insertions, 16 deletions
diff --git a/src/lib/libcrypto/md5/asm/md5-x86_64.pl b/src/lib/libcrypto/md5/asm/md5-x86_64.pl
index 9a6fa67224..867885435e 100755
--- a/src/lib/libcrypto/md5/asm/md5-x86_64.pl
+++ b/src/lib/libcrypto/md5/asm/md5-x86_64.pl
@@ -15,7 +15,7 @@ my $code;
15# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) 15# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s)
16# %r10d = X[k_next] 16# %r10d = X[k_next]
17# %r11d = z' (copy of z for the next step) 17# %r11d = z' (copy of z for the next step)
18# Each round1_step() takes about 5.71 clocks (9 instructions, 1.58 IPC) 18# Each round1_step() takes about 5.3 clocks (9 instructions, 1.7 IPC)
19sub round1_step 19sub round1_step
20{ 20{
21 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 21 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
@@ -37,22 +37,26 @@ EOF
37# round2_step() does: 37# round2_step() does:
38# dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s) 38# dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s)
39# %r10d = X[k_next] 39# %r10d = X[k_next]
40# %r11d = y' (copy of y for the next step) 40# %r11d = z' (copy of z for the next step)
41# Each round2_step() takes about 6.22 clocks (9 instructions, 1.45 IPC) 41# %r12d = z' (copy of z for the next step)
42# Each round2_step() takes about 5.4 clocks (11 instructions, 2.0 IPC)
42sub round2_step 43sub round2_step
43{ 44{
44 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 45 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
45 $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n" if ($pos == -1); 46 $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n" if ($pos == -1);
46 $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); 47 $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
48 $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
47 $code .= <<EOF; 49 $code .= <<EOF;
48 xor $x, %r11d /* x ^ ... */ 50 not %r11d /* not z */
49 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 51 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
50 and $z, %r11d /* z & ... */ 52 and $x, %r12d /* x & z */
51 xor $y, %r11d /* y ^ ... */ 53 and $y, %r11d /* y & (not z) */
52 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 54 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
53 add %r11d, $dst /* dst += ... */ 55 or %r11d, %r12d /* (y & (not z)) | (x & z) */
56 mov $y, %r11d /* (NEXT STEP) z' = $y */
57 add %r12d, $dst /* dst += ... */
58 mov $y, %r12d /* (NEXT STEP) z' = $y */
54 rol \$$s, $dst /* dst <<< s */ 59 rol \$$s, $dst /* dst <<< s */
55 mov $x, %r11d /* (NEXT STEP) y' = $x */
56 add $x, $dst /* dst += x */ 60 add $x, $dst /* dst += x */
57EOF 61EOF
58} 62}
@@ -61,7 +65,7 @@ EOF
61# dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s) 65# dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s)
62# %r10d = X[k_next] 66# %r10d = X[k_next]
63# %r11d = y' (copy of y for the next step) 67# %r11d = y' (copy of y for the next step)
64# Each round3_step() takes about 4.26 clocks (8 instructions, 1.88 IPC) 68# Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC)
65sub round3_step 69sub round3_step
66{ 70{
67 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 71 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
@@ -83,7 +87,7 @@ EOF
83# dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s) 87# dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s)
84# %r10d = X[k_next] 88# %r10d = X[k_next]
85# %r11d = not z' (copy of not z for the next step) 89# %r11d = not z' (copy of not z for the next step)
86# Each round4_step() takes about 5.27 clocks (9 instructions, 1.71 IPC) 90# Each round4_step() takes about 5.2 clocks (9 instructions, 1.7 IPC)
87sub round4_step 91sub round4_step
88{ 92{
89 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 93 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
@@ -104,8 +108,19 @@ sub round4_step
104EOF 108EOF
105} 109}
106 110
107my $output = shift; 111my $flavour = shift;
108open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output"; 112my $output = shift;
113if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
114
115my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
116
117$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
118( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
119( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
120die "can't locate x86_64-xlate.pl";
121
122no warnings qw(uninitialized);
123open STDOUT,"| $^X $xlate $flavour $output";
109 124
110$code .= <<EOF; 125$code .= <<EOF;
111.text 126.text
@@ -116,8 +131,10 @@ $code .= <<EOF;
116md5_block_asm_data_order: 131md5_block_asm_data_order:
117 push %rbp 132 push %rbp
118 push %rbx 133 push %rbx
134 push %r12
119 push %r14 135 push %r14
120 push %r15 136 push %r15
137.Lprologue:
121 138
122 # rdi = arg #1 (ctx, MD5_CTX pointer) 139 # rdi = arg #1 (ctx, MD5_CTX pointer)
123 # rsi = arg #2 (ptr, data pointer) 140 # rsi = arg #2 (ptr, data pointer)
@@ -232,13 +249,120 @@ $code .= <<EOF;
232 mov %ecx, 2*4(%rbp) # ctx->C = C 249 mov %ecx, 2*4(%rbp) # ctx->C = C
233 mov %edx, 3*4(%rbp) # ctx->D = D 250 mov %edx, 3*4(%rbp) # ctx->D = D
234 251
252 mov (%rsp),%r15
253 mov 8(%rsp),%r14
254 mov 16(%rsp),%r12
255 mov 24(%rsp),%rbx
256 mov 32(%rsp),%rbp
257 add \$40,%rsp
258.Lepilogue:
259 ret
260.size md5_block_asm_data_order,.-md5_block_asm_data_order
261EOF
262
263# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
264# CONTEXT *context,DISPATCHER_CONTEXT *disp)
265if ($win64) {
266my $rec="%rcx";
267my $frame="%rdx";
268my $context="%r8";
269my $disp="%r9";
270
271$code.=<<___;
272.extern __imp_RtlVirtualUnwind
273.type se_handler,\@abi-omnipotent
274.align 16
275se_handler:
276 push %rsi
277 push %rdi
278 push %rbx
279 push %rbp
280 push %r12
281 push %r13
282 push %r14
283 push %r15
284 pushfq
285 sub \$64,%rsp
286
287 mov 120($context),%rax # pull context->Rax
288 mov 248($context),%rbx # pull context->Rip
289
290 lea .Lprologue(%rip),%r10
291 cmp %r10,%rbx # context->Rip<.Lprologue
292 jb .Lin_prologue
293
294 mov 152($context),%rax # pull context->Rsp
295
296 lea .Lepilogue(%rip),%r10
297 cmp %r10,%rbx # context->Rip>=.Lepilogue
298 jae .Lin_prologue
299
300 lea 40(%rax),%rax
301
302 mov -8(%rax),%rbp
303 mov -16(%rax),%rbx
304 mov -24(%rax),%r12
305 mov -32(%rax),%r14
306 mov -40(%rax),%r15
307 mov %rbx,144($context) # restore context->Rbx
308 mov %rbp,160($context) # restore context->Rbp
309 mov %r12,216($context) # restore context->R12
310 mov %r14,232($context) # restore context->R14
311 mov %r15,240($context) # restore context->R15
312
313.Lin_prologue:
314 mov 8(%rax),%rdi
315 mov 16(%rax),%rsi
316 mov %rax,152($context) # restore context->Rsp
317 mov %rsi,168($context) # restore context->Rsi
318 mov %rdi,176($context) # restore context->Rdi
319
320 mov 40($disp),%rdi # disp->ContextRecord
321 mov $context,%rsi # context
322 mov \$154,%ecx # sizeof(CONTEXT)
323 .long 0xa548f3fc # cld; rep movsq
324
325 mov $disp,%rsi
326 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
327 mov 8(%rsi),%rdx # arg2, disp->ImageBase
328 mov 0(%rsi),%r8 # arg3, disp->ControlPc
329 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
330 mov 40(%rsi),%r10 # disp->ContextRecord
331 lea 56(%rsi),%r11 # &disp->HandlerData
332 lea 24(%rsi),%r12 # &disp->EstablisherFrame
333 mov %r10,32(%rsp) # arg5
334 mov %r11,40(%rsp) # arg6
335 mov %r12,48(%rsp) # arg7
336 mov %rcx,56(%rsp) # arg8, (NULL)
337 call *__imp_RtlVirtualUnwind(%rip)
338
339 mov \$1,%eax # ExceptionContinueSearch
340 add \$64,%rsp
341 popfq
235 pop %r15 342 pop %r15
236 pop %r14 343 pop %r14
237 pop %rbx 344 pop %r13
345 pop %r12
238 pop %rbp 346 pop %rbp
347 pop %rbx
348 pop %rdi
349 pop %rsi
239 ret 350 ret
240.size md5_block_asm_data_order,.-md5_block_asm_data_order 351.size se_handler,.-se_handler
241EOF 352
353.section .pdata
354.align 4
355 .rva .LSEH_begin_md5_block_asm_data_order
356 .rva .LSEH_end_md5_block_asm_data_order
357 .rva .LSEH_info_md5_block_asm_data_order
358
359.section .xdata
360.align 8
361.LSEH_info_md5_block_asm_data_order:
362 .byte 9,0,0,0
363 .rva se_handler
364___
365}
242 366
243print $code; 367print $code;
244 368