summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/x86_64-mont.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/x86_64-mont.pl')
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont.pl136
1 files changed, 126 insertions, 10 deletions
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
index c43b69592a..3b7a6f243f 100755
--- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
@@ -15,14 +15,18 @@
15# respectful 50%. It remains to be seen if loop unrolling and 15# respectful 50%. It remains to be seen if loop unrolling and
16# dedicated squaring routine can provide further improvement... 16# dedicated squaring routine can provide further improvement...
17 17
18$output=shift; 18$flavour = shift;
19$output = shift;
20if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
21
22$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19 23
20$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 24$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 25( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 26( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
23die "can't locate x86_64-xlate.pl"; 27die "can't locate x86_64-xlate.pl";
24 28
25open STDOUT,"| $^X $xlate $output"; 29open STDOUT,"| $^X $xlate $flavour $output";
26 30
27# int bn_mul_mont( 31# int bn_mul_mont(
28$rp="%rdi"; # BN_ULONG *rp, 32$rp="%rdi"; # BN_ULONG *rp,
@@ -55,13 +59,14 @@ bn_mul_mont:
55 push %r15 59 push %r15
56 60
57 mov ${num}d,${num}d 61 mov ${num}d,${num}d
58 lea 2($num),%rax 62 lea 2($num),%r10
59 mov %rsp,%rbp 63 mov %rsp,%r11
60 neg %rax 64 neg %r10
61 lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2)) 65 lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2))
62 and \$-1024,%rsp # minimize TLB usage 66 and \$-1024,%rsp # minimize TLB usage
63 67
64 mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp 68 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
69.Lprologue:
65 mov %rdx,$bp # $bp reassigned, remember? 70 mov %rdx,$bp # $bp reassigned, remember?
66 71
67 mov ($n0),$n0 # pull n0[0] value 72 mov ($n0),$n0 # pull n0[0] value
@@ -197,18 +202,129 @@ bn_mul_mont:
197 dec $j 202 dec $j
198 jge .Lcopy 203 jge .Lcopy
199 204
200 mov 8(%rsp,$num,8),%rsp # restore %rsp 205 mov 8(%rsp,$num,8),%rsi # restore %rsp
201 mov \$1,%rax 206 mov \$1,%rax
207 mov (%rsi),%r15
208 mov 8(%rsi),%r14
209 mov 16(%rsi),%r13
210 mov 24(%rsi),%r12
211 mov 32(%rsi),%rbp
212 mov 40(%rsi),%rbx
213 lea 48(%rsi),%rsp
214.Lepilogue:
215 ret
216.size bn_mul_mont,.-bn_mul_mont
217.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
218.align 16
219___
220
221# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
222# CONTEXT *context,DISPATCHER_CONTEXT *disp)
223if ($win64) {
224$rec="%rcx";
225$frame="%rdx";
226$context="%r8";
227$disp="%r9";
228
229$code.=<<___;
230.extern __imp_RtlVirtualUnwind
231.type se_handler,\@abi-omnipotent
232.align 16
233se_handler:
234 push %rsi
235 push %rdi
236 push %rbx
237 push %rbp
238 push %r12
239 push %r13
240 push %r14
241 push %r15
242 pushfq
243 sub \$64,%rsp
244
245 mov 120($context),%rax # pull context->Rax
246 mov 248($context),%rbx # pull context->Rip
247
248 lea .Lprologue(%rip),%r10
249 cmp %r10,%rbx # context->Rip<.Lprologue
250 jb .Lin_prologue
251
252 mov 152($context),%rax # pull context->Rsp
253
254 lea .Lepilogue(%rip),%r10
255 cmp %r10,%rbx # context->Rip>=.Lepilogue
256 jae .Lin_prologue
257
258 mov 192($context),%r10 # pull $num
259 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
260 lea 48(%rax),%rax
261
262 mov -8(%rax),%rbx
263 mov -16(%rax),%rbp
264 mov -24(%rax),%r12
265 mov -32(%rax),%r13
266 mov -40(%rax),%r14
267 mov -48(%rax),%r15
268 mov %rbx,144($context) # restore context->Rbx
269 mov %rbp,160($context) # restore context->Rbp
270 mov %r12,216($context) # restore context->R12
271 mov %r13,224($context) # restore context->R13
272 mov %r14,232($context) # restore context->R14
273 mov %r15,240($context) # restore context->R15
274
275.Lin_prologue:
276 mov 8(%rax),%rdi
277 mov 16(%rax),%rsi
278 mov %rax,152($context) # restore context->Rsp
279 mov %rsi,168($context) # restore context->Rsi
280 mov %rdi,176($context) # restore context->Rdi
281
282 mov 40($disp),%rdi # disp->ContextRecord
283 mov $context,%rsi # context
284 mov \$154,%ecx # sizeof(CONTEXT)
285 .long 0xa548f3fc # cld; rep movsq
286
287 mov $disp,%rsi
288 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
289 mov 8(%rsi),%rdx # arg2, disp->ImageBase
290 mov 0(%rsi),%r8 # arg3, disp->ControlPc
291 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
292 mov 40(%rsi),%r10 # disp->ContextRecord
293 lea 56(%rsi),%r11 # &disp->HandlerData
294 lea 24(%rsi),%r12 # &disp->EstablisherFrame
295 mov %r10,32(%rsp) # arg5
296 mov %r11,40(%rsp) # arg6
297 mov %r12,48(%rsp) # arg7
298 mov %rcx,56(%rsp) # arg8, (NULL)
299 call *__imp_RtlVirtualUnwind(%rip)
300
301 mov \$1,%eax # ExceptionContinueSearch
302 add \$64,%rsp
303 popfq
202 pop %r15 304 pop %r15
203 pop %r14 305 pop %r14
204 pop %r13 306 pop %r13
205 pop %r12 307 pop %r12
206 pop %rbp 308 pop %rbp
207 pop %rbx 309 pop %rbx
310 pop %rdi
311 pop %rsi
208 ret 312 ret
209.size bn_mul_mont,.-bn_mul_mont 313.size se_handler,.-se_handler
210.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 314
315.section .pdata
316.align 4
317 .rva .LSEH_begin_bn_mul_mont
318 .rva .LSEH_end_bn_mul_mont
319 .rva .LSEH_info_bn_mul_mont
320
321.section .xdata
322.align 8
323.LSEH_info_bn_mul_mont:
324 .byte 9,0,0,0
325 .rva se_handler
211___ 326___
327}
212 328
213print $code; 329print $code;
214close STDOUT; 330close STDOUT;