From 1a12fc8399638223feca8f853e2ac2cc22eeb471 Mon Sep 17 00:00:00 2001 From: miod <> Date: Fri, 4 Nov 2016 17:30:30 +0000 Subject: Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by meaningful constants in a private header file, so that reviewers can actually get a chance to figure out what the code is attempting to do without knowing all cpuid bits. While there, turn it from an array of two 32-bit ints into a properly aligned 64-bit int. Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will now always use OPENSSL_cpu_caps() and check for the proper bits in the whole 64-bit word it returns. i386 tests and ok jsing@ --- src/lib/libcrypto/aes/asm/aes-586.pl | 10 +++++----- src/lib/libcrypto/aes/asm/aes-x86_64.pl | 6 ++++-- src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | 7 ++++--- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/lib/libcrypto/aes') diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index aab40e6f1c..3ba8a26eaa 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl @@ -1187,7 +1187,7 @@ sub enclast() &lea ($tbl,&DWP(2048+128,$tbl,$s1)); if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("x86")); &movq ("mm0",&QWP(0,$acc)); @@ -1976,7 +1976,7 @@ sub declast() &lea ($tbl,&DWP(2048+128,$tbl,$s1)); if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("x86")); &movq ("mm0",&QWP(0,$acc)); @@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &test ($s2,15); &jnz (&label("slow_way")); if (!$x86only) { - &bt (&DWP(0,$s0),28); # check for hyper-threading bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit &jc (&label("slow_way")); } # pre-allocate aligned stack frame... @@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &jb (&label("slow_enc_tail")); if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit + &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("slow_enc_x86")); &movq ("mm0",&QWP(0,$key)); # load iv @@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds #--------------------------- SLOW DECRYPT ---------------------------# &set_label("slow_decrypt",16); if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit + &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("slow_dec_loop_x86")); &set_label("slow_dec_loop_sse",4); diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl index f75e90ba87..c37fd55648 100755 --- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl @@ -1655,6 +1655,7 @@ $code.=<<___; .type AES_cbc_encrypt,\@function,6 .align 16 .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl asm_AES_cbc_encrypt .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: @@ -1684,7 +1685,7 @@ AES_cbc_encrypt: jb .Lcbc_slow_prologue test \$15,%rdx jnz .Lcbc_slow_prologue - bt \$28,%r10d + bt \$IA32CAP_BIT0_HT,%r10d jc .Lcbc_slow_prologue # allocate aligned stack frame... @@ -1944,7 +1945,7 @@ AES_cbc_encrypt: lea ($key,%rax),%rax mov %rax,$keyend - # pick Te4 copy which can't "overlap" with stack frame or key scdedule + # pick Te4 copy which can't "overlap" with stack frame or key schedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax @@ -2814,6 +2815,7 @@ ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; +print "#include \"x86_arch.h\"\n"; print $code; close STDOUT; diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index 39b504cbe5..bc6c8f3fc0 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl @@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; $code.=<<___; .text .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc,\@abi-omnipotent @@ -93,10 +94,10 @@ aesni_cbc_sha1_enc: mov OPENSSL_ia32cap_P+4(%rip),%r11d ___ $code.=<<___ if ($avx); - and \$`1<<28`,%r11d # mask AVX bit - and \$`1<<30`,%r10d # mask "Intel CPU" bit + and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit + and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit or %r11d,%r10d - cmp \$`1<<28|1<<30`,%r10d + cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d je aesni_cbc_sha1_enc_avx ___ $code.=<<___; -- cgit v1.2.3-55-g6feb