diff options
26 files changed, 245 insertions, 146 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index aab40e6f1c..3ba8a26eaa 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl | |||
| @@ -1187,7 +1187,7 @@ sub enclast() | |||
| 1187 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); | 1187 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); |
| 1188 | 1188 | ||
| 1189 | if (!$x86only) { | 1189 | if (!$x86only) { |
| 1190 | &bt (&DWP(0,$s0),25); # check for SSE bit | 1190 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
| 1191 | &jnc (&label("x86")); | 1191 | &jnc (&label("x86")); |
| 1192 | 1192 | ||
| 1193 | &movq ("mm0",&QWP(0,$acc)); | 1193 | &movq ("mm0",&QWP(0,$acc)); |
| @@ -1976,7 +1976,7 @@ sub declast() | |||
| 1976 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); | 1976 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); |
| 1977 | 1977 | ||
| 1978 | if (!$x86only) { | 1978 | if (!$x86only) { |
| 1979 | &bt (&DWP(0,$s0),25); # check for SSE bit | 1979 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
| 1980 | &jnc (&label("x86")); | 1980 | &jnc (&label("x86")); |
| 1981 | 1981 | ||
| 1982 | &movq ("mm0",&QWP(0,$acc)); | 1982 | &movq ("mm0",&QWP(0,$acc)); |
| @@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
| 2054 | &test ($s2,15); | 2054 | &test ($s2,15); |
| 2055 | &jnz (&label("slow_way")); | 2055 | &jnz (&label("slow_way")); |
| 2056 | if (!$x86only) { | 2056 | if (!$x86only) { |
| 2057 | &bt (&DWP(0,$s0),28); # check for hyper-threading bit | 2057 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit |
| 2058 | &jc (&label("slow_way")); | 2058 | &jc (&label("slow_way")); |
| 2059 | } | 2059 | } |
| 2060 | # pre-allocate aligned stack frame... | 2060 | # pre-allocate aligned stack frame... |
| @@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
| 2364 | &jb (&label("slow_enc_tail")); | 2364 | &jb (&label("slow_enc_tail")); |
| 2365 | 2365 | ||
| 2366 | if (!$x86only) { | 2366 | if (!$x86only) { |
| 2367 | &bt ($_tmp,25); # check for SSE bit | 2367 | &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
| 2368 | &jnc (&label("slow_enc_x86")); | 2368 | &jnc (&label("slow_enc_x86")); |
| 2369 | 2369 | ||
| 2370 | &movq ("mm0",&QWP(0,$key)); # load iv | 2370 | &movq ("mm0",&QWP(0,$key)); # load iv |
| @@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
| 2479 | #--------------------------- SLOW DECRYPT ---------------------------# | 2479 | #--------------------------- SLOW DECRYPT ---------------------------# |
| 2480 | &set_label("slow_decrypt",16); | 2480 | &set_label("slow_decrypt",16); |
| 2481 | if (!$x86only) { | 2481 | if (!$x86only) { |
| 2482 | &bt ($_tmp,25); # check for SSE bit | 2482 | &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
| 2483 | &jnc (&label("slow_dec_loop_x86")); | 2483 | &jnc (&label("slow_dec_loop_x86")); |
| 2484 | 2484 | ||
| 2485 | &set_label("slow_dec_loop_sse",4); | 2485 | &set_label("slow_dec_loop_sse",4); |
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl index f75e90ba87..c37fd55648 100755 --- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl | |||
| @@ -1655,6 +1655,7 @@ $code.=<<___; | |||
| 1655 | .type AES_cbc_encrypt,\@function,6 | 1655 | .type AES_cbc_encrypt,\@function,6 |
| 1656 | .align 16 | 1656 | .align 16 |
| 1657 | .extern OPENSSL_ia32cap_P | 1657 | .extern OPENSSL_ia32cap_P |
| 1658 | .hidden OPENSSL_ia32cap_P | ||
| 1658 | .globl asm_AES_cbc_encrypt | 1659 | .globl asm_AES_cbc_encrypt |
| 1659 | .hidden asm_AES_cbc_encrypt | 1660 | .hidden asm_AES_cbc_encrypt |
| 1660 | asm_AES_cbc_encrypt: | 1661 | asm_AES_cbc_encrypt: |
| @@ -1684,7 +1685,7 @@ AES_cbc_encrypt: | |||
| 1684 | jb .Lcbc_slow_prologue | 1685 | jb .Lcbc_slow_prologue |
| 1685 | test \$15,%rdx | 1686 | test \$15,%rdx |
| 1686 | jnz .Lcbc_slow_prologue | 1687 | jnz .Lcbc_slow_prologue |
| 1687 | bt \$28,%r10d | 1688 | bt \$IA32CAP_BIT0_HT,%r10d |
| 1688 | jc .Lcbc_slow_prologue | 1689 | jc .Lcbc_slow_prologue |
| 1689 | 1690 | ||
| 1690 | # allocate aligned stack frame... | 1691 | # allocate aligned stack frame... |
| @@ -1944,7 +1945,7 @@ AES_cbc_encrypt: | |||
| 1944 | lea ($key,%rax),%rax | 1945 | lea ($key,%rax),%rax |
| 1945 | mov %rax,$keyend | 1946 | mov %rax,$keyend |
| 1946 | 1947 | ||
| 1947 | # pick Te4 copy which can't "overlap" with stack frame or key scdedule | 1948 | # pick Te4 copy which can't "overlap" with stack frame or key schedule |
| 1948 | lea 2048($sbox),$sbox | 1949 | lea 2048($sbox),$sbox |
| 1949 | lea 768-8(%rsp),%rax | 1950 | lea 768-8(%rsp),%rax |
| 1950 | sub $sbox,%rax | 1951 | sub $sbox,%rax |
| @@ -2814,6 +2815,7 @@ ___ | |||
| 2814 | 2815 | ||
| 2815 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; | 2816 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; |
| 2816 | 2817 | ||
| 2818 | print "#include \"x86_arch.h\"\n"; | ||
| 2817 | print $code; | 2819 | print $code; |
| 2818 | 2820 | ||
| 2819 | close STDOUT; | 2821 | close STDOUT; |
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index 39b504cbe5..bc6c8f3fc0 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | |||
| @@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; | |||
| 83 | $code.=<<___; | 83 | $code.=<<___; |
| 84 | .text | 84 | .text |
| 85 | .extern OPENSSL_ia32cap_P | 85 | .extern OPENSSL_ia32cap_P |
| 86 | .hidden OPENSSL_ia32cap_P | ||
| 86 | 87 | ||
| 87 | .globl aesni_cbc_sha1_enc | 88 | .globl aesni_cbc_sha1_enc |
| 88 | .type aesni_cbc_sha1_enc,\@abi-omnipotent | 89 | .type aesni_cbc_sha1_enc,\@abi-omnipotent |
| @@ -93,10 +94,10 @@ aesni_cbc_sha1_enc: | |||
| 93 | mov OPENSSL_ia32cap_P+4(%rip),%r11d | 94 | mov OPENSSL_ia32cap_P+4(%rip),%r11d |
| 94 | ___ | 95 | ___ |
| 95 | $code.=<<___ if ($avx); | 96 | $code.=<<___ if ($avx); |
| 96 | and \$`1<<28`,%r11d # mask AVX bit | 97 | and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit |
| 97 | and \$`1<<30`,%r10d # mask "Intel CPU" bit | 98 | and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit |
| 98 | or %r11d,%r10d | 99 | or %r11d,%r10d |
| 99 | cmp \$`1<<28|1<<30`,%r10d | 100 | cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d |
| 100 | je aesni_cbc_sha1_enc_avx | 101 | je aesni_cbc_sha1_enc_avx |
| 101 | ___ | 102 | ___ |
| 102 | $code.=<<___; | 103 | $code.=<<___; |
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 332ef3e91d..c4e2baa6c5 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
| @@ -25,7 +25,7 @@ sub bn_mul_add_words | |||
| 25 | { | 25 | { |
| 26 | local($name)=@_; | 26 | local($name)=@_; |
| 27 | 27 | ||
| 28 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 28 | &function_begin_B($name,""); |
| 29 | 29 | ||
| 30 | $r="eax"; | 30 | $r="eax"; |
| 31 | $a="edx"; | 31 | $a="edx"; |
| @@ -33,7 +33,7 @@ sub bn_mul_add_words | |||
| 33 | 33 | ||
| 34 | if ($sse2) { | 34 | if ($sse2) { |
| 35 | &picmeup("eax","OPENSSL_ia32cap_P"); | 35 | &picmeup("eax","OPENSSL_ia32cap_P"); |
| 36 | &bt(&DWP(0,"eax"),26); | 36 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 37 | &jnc(&label("maw_non_sse2")); | 37 | &jnc(&label("maw_non_sse2")); |
| 38 | 38 | ||
| 39 | &mov($r,&wparam(0)); | 39 | &mov($r,&wparam(0)); |
| @@ -211,7 +211,7 @@ sub bn_mul_words | |||
| 211 | { | 211 | { |
| 212 | local($name)=@_; | 212 | local($name)=@_; |
| 213 | 213 | ||
| 214 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 214 | &function_begin_B($name,""); |
| 215 | 215 | ||
| 216 | $r="eax"; | 216 | $r="eax"; |
| 217 | $a="edx"; | 217 | $a="edx"; |
| @@ -219,7 +219,7 @@ sub bn_mul_words | |||
| 219 | 219 | ||
| 220 | if ($sse2) { | 220 | if ($sse2) { |
| 221 | &picmeup("eax","OPENSSL_ia32cap_P"); | 221 | &picmeup("eax","OPENSSL_ia32cap_P"); |
| 222 | &bt(&DWP(0,"eax"),26); | 222 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 223 | &jnc(&label("mw_non_sse2")); | 223 | &jnc(&label("mw_non_sse2")); |
| 224 | 224 | ||
| 225 | &mov($r,&wparam(0)); | 225 | &mov($r,&wparam(0)); |
| @@ -322,7 +322,7 @@ sub bn_sqr_words | |||
| 322 | { | 322 | { |
| 323 | local($name)=@_; | 323 | local($name)=@_; |
| 324 | 324 | ||
| 325 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 325 | &function_begin_B($name,""); |
| 326 | 326 | ||
| 327 | $r="eax"; | 327 | $r="eax"; |
| 328 | $a="edx"; | 328 | $a="edx"; |
| @@ -330,7 +330,7 @@ sub bn_sqr_words | |||
| 330 | 330 | ||
| 331 | if ($sse2) { | 331 | if ($sse2) { |
| 332 | &picmeup("eax","OPENSSL_ia32cap_P"); | 332 | &picmeup("eax","OPENSSL_ia32cap_P"); |
| 333 | &bt(&DWP(0,"eax"),26); | 333 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 334 | &jnc(&label("sqr_non_sse2")); | 334 | &jnc(&label("sqr_non_sse2")); |
| 335 | 335 | ||
| 336 | &mov($r,&wparam(0)); | 336 | &mov($r,&wparam(0)); |
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 808a1e5969..97d9136260 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl | |||
| @@ -203,12 +203,12 @@ if (!$x86only) { | |||
| 203 | &picmeup("edx","OPENSSL_ia32cap_P"); | 203 | &picmeup("edx","OPENSSL_ia32cap_P"); |
| 204 | &mov ("eax",&DWP(0,"edx")); | 204 | &mov ("eax",&DWP(0,"edx")); |
| 205 | &mov ("edx",&DWP(4,"edx")); | 205 | &mov ("edx",&DWP(4,"edx")); |
| 206 | &test ("eax",1<<23); # check MMX bit | 206 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit |
| 207 | &jz (&label("ialu")); | 207 | &jz (&label("ialu")); |
| 208 | if ($sse2) { | 208 | if ($sse2) { |
| 209 | &test ("eax",1<<24); # check FXSR bit | 209 | &test ("eax","\$IA32CAP_MASK0_FXSR"); # check FXSR bit |
| 210 | &jz (&label("mmx")); | 210 | &jz (&label("mmx")); |
| 211 | &test ("edx",1<<1); # check PCLMULQDQ bit | 211 | &test ("edx","\$IA32CAP_MASK1_PCLMUL"); # check PCLMULQDQ bit |
| 212 | &jz (&label("mmx")); | 212 | &jz (&label("mmx")); |
| 213 | 213 | ||
| 214 | &movups ("xmm0",&QWP(8,"esp")); | 214 | &movups ("xmm0",&QWP(8,"esp")); |
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index e8f6b05084..a0bdd5787e 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl | |||
| @@ -114,7 +114,7 @@ $temp="mm6"; | |||
| 114 | $mask="mm7"; | 114 | $mask="mm7"; |
| 115 | 115 | ||
| 116 | &picmeup("eax","OPENSSL_ia32cap_P"); | 116 | &picmeup("eax","OPENSSL_ia32cap_P"); |
| 117 | &bt (&DWP(0,"eax"),26); | 117 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 118 | &jnc (&label("non_sse2")); | 118 | &jnc (&label("non_sse2")); |
| 119 | 119 | ||
| 120 | &mov ("eax",-1); | 120 | &mov ("eax",-1); |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl index 8e45c7479b..3ecb425dad 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl | |||
| @@ -163,12 +163,13 @@ ___ | |||
| 163 | 163 | ||
| 164 | $code.=<<___; | 164 | $code.=<<___; |
| 165 | .extern OPENSSL_ia32cap_P | 165 | .extern OPENSSL_ia32cap_P |
| 166 | .hidden OPENSSL_ia32cap_P | ||
| 166 | .globl bn_GF2m_mul_2x2 | 167 | .globl bn_GF2m_mul_2x2 |
| 167 | .type bn_GF2m_mul_2x2,\@abi-omnipotent | 168 | .type bn_GF2m_mul_2x2,\@abi-omnipotent |
| 168 | .align 16 | 169 | .align 16 |
| 169 | bn_GF2m_mul_2x2: | 170 | bn_GF2m_mul_2x2: |
| 170 | mov OPENSSL_ia32cap_P(%rip),%rax | 171 | mov OPENSSL_ia32cap_P+4(%rip),%eax |
| 171 | bt \$33,%rax | 172 | bt \$IA32CAP_BIT1_PCLMUL,%eax |
| 172 | jnc .Lvanilla_mul_2x2 | 173 | jnc .Lvanilla_mul_2x2 |
| 173 | 174 | ||
| 174 | movd $a1,%xmm0 | 175 | movd $a1,%xmm0 |
diff --git a/src/lib/libcrypto/cryptlib.c b/src/lib/libcrypto/cryptlib.c index fa091fbaea..8dec9caa93 100644 --- a/src/lib/libcrypto/cryptlib.c +++ b/src/lib/libcrypto/cryptlib.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: cryptlib.c,v 1.38 2016/11/04 13:56:04 miod Exp $ */ | 1 | /* $OpenBSD: cryptlib.c,v 1.39 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -627,47 +627,30 @@ CRYPTO_get_lock_name(int type) | |||
| 627 | defined(__INTEL__) || \ | 627 | defined(__INTEL__) || \ |
| 628 | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) | 628 | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) |
| 629 | 629 | ||
| 630 | unsigned int OPENSSL_ia32cap_P[2]; | 630 | uint64_t OPENSSL_ia32cap_P; |
| 631 | 631 | ||
| 632 | uint64_t | 632 | uint64_t |
| 633 | OPENSSL_cpu_caps(void) | 633 | OPENSSL_cpu_caps(void) |
| 634 | { | 634 | { |
| 635 | return *(uint64_t *)OPENSSL_ia32cap_P; | 635 | return OPENSSL_ia32cap_P; |
| 636 | } | 636 | } |
| 637 | 637 | ||
| 638 | #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) | 638 | #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) |
| 639 | #define OPENSSL_CPUID_SETUP | 639 | #define OPENSSL_CPUID_SETUP |
| 640 | typedef unsigned long long IA32CAP; | ||
| 641 | void | 640 | void |
| 642 | OPENSSL_cpuid_setup(void) | 641 | OPENSSL_cpuid_setup(void) |
| 643 | { | 642 | { |
| 644 | static int trigger = 0; | 643 | static int trigger = 0; |
| 645 | IA32CAP OPENSSL_ia32_cpuid(void); | 644 | uint64_t OPENSSL_ia32_cpuid(void); |
| 646 | IA32CAP vec; | ||
| 647 | 645 | ||
| 648 | if (trigger) | 646 | if (trigger) |
| 649 | return; | 647 | return; |
| 650 | trigger = 1; | 648 | trigger = 1; |
| 651 | 649 | OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid(); | |
| 652 | vec = OPENSSL_ia32_cpuid(); | ||
| 653 | |||
| 654 | /* | ||
| 655 | * |(1<<10) sets a reserved bit to signal that variable | ||
| 656 | * was initialized already... This is to avoid interference | ||
| 657 | * with cpuid snippets in ELF .init segment. | ||
| 658 | */ | ||
| 659 | OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10); | ||
| 660 | OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); | ||
| 661 | } | 650 | } |
| 662 | #endif | 651 | #endif |
| 663 | 652 | ||
| 664 | #else | 653 | #else |
| 665 | unsigned long * | ||
| 666 | OPENSSL_ia32cap_loc(void) | ||
| 667 | { | ||
| 668 | return NULL; | ||
| 669 | } | ||
| 670 | |||
| 671 | uint64_t | 654 | uint64_t |
| 672 | OPENSSL_cpu_caps(void) | 655 | OPENSSL_cpu_caps(void) |
| 673 | { | 656 | { |
diff --git a/src/lib/libcrypto/cryptlib.h b/src/lib/libcrypto/cryptlib.h index ad679dfa8d..d44738bf3c 100644 --- a/src/lib/libcrypto/cryptlib.h +++ b/src/lib/libcrypto/cryptlib.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: cryptlib.h,v 1.24 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: cryptlib.h,v 1.25 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -73,7 +73,6 @@ extern "C" { | |||
| 73 | #define X509_CERT_FILE_EVP "SSL_CERT_FILE" | 73 | #define X509_CERT_FILE_EVP "SSL_CERT_FILE" |
| 74 | 74 | ||
| 75 | void OPENSSL_cpuid_setup(void); | 75 | void OPENSSL_cpuid_setup(void); |
| 76 | extern unsigned int OPENSSL_ia32cap_P[]; | ||
| 77 | 76 | ||
| 78 | #ifdef __cplusplus | 77 | #ifdef __cplusplus |
| 79 | } | 78 | } |
diff --git a/src/lib/libcrypto/engine/eng_aesni.c b/src/lib/libcrypto/engine/eng_aesni.c index 5f9a36236a..92794f6086 100644 --- a/src/lib/libcrypto/engine/eng_aesni.c +++ b/src/lib/libcrypto/engine/eng_aesni.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: eng_aesni.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */ | 1 | /* $OpenBSD: eng_aesni.c,v 1.9 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Support for Intel AES-NI intruction set | 3 | * Support for Intel AES-NI intruction set |
| 4 | * Author: Huang Ying <ying.huang@intel.com> | 4 | * Author: Huang Ying <ying.huang@intel.com> |
| @@ -93,10 +93,11 @@ | |||
| 93 | defined(_M_AMD64) || defined(_M_X64) || \ | 93 | defined(_M_AMD64) || defined(_M_X64) || \ |
| 94 | defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) | 94 | defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) |
| 95 | #define COMPILE_HW_AESNI | 95 | #define COMPILE_HW_AESNI |
| 96 | #include "x86_arch.h" | ||
| 96 | #endif | 97 | #endif |
| 97 | static ENGINE *ENGINE_aesni (void); | 98 | static ENGINE *ENGINE_aesni(void); |
| 98 | 99 | ||
| 99 | void ENGINE_load_aesni (void) | 100 | void ENGINE_load_aesni(void) |
| 100 | { | 101 | { |
| 101 | /* On non-x86 CPUs it just returns. */ | 102 | /* On non-x86 CPUs it just returns. */ |
| 102 | #ifdef COMPILE_HW_AESNI | 103 | #ifdef COMPILE_HW_AESNI |
| @@ -302,20 +303,13 @@ aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out, | |||
| 302 | } | 303 | } |
| 303 | /* ===== Engine "management" functions ===== */ | 304 | /* ===== Engine "management" functions ===== */ |
| 304 | 305 | ||
| 305 | typedef unsigned long long IA32CAP; | ||
| 306 | |||
| 307 | /* Prepare the ENGINE structure for registration */ | 306 | /* Prepare the ENGINE structure for registration */ |
| 308 | static int | 307 | static int |
| 309 | aesni_bind_helper(ENGINE *e) | 308 | aesni_bind_helper(ENGINE *e) |
| 310 | { | 309 | { |
| 311 | int engage; | 310 | int engage; |
| 312 | 311 | ||
| 313 | if (sizeof(OPENSSL_ia32cap_P) > 4) { | 312 | engage = (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) != 0; |
| 314 | engage = ((IA32CAP)OPENSSL_ia32cap_P >> 57) & 1; | ||
| 315 | } else { | ||
| 316 | IA32CAP OPENSSL_ia32_cpuid(void); | ||
| 317 | engage = (OPENSSL_ia32_cpuid() >> 57) & 1; | ||
| 318 | } | ||
| 319 | 313 | ||
| 320 | /* Register everything or return with an error */ | 314 | /* Register everything or return with an error */ |
| 321 | if (!ENGINE_set_id(e, aesni_id) || | 315 | if (!ENGINE_set_id(e, aesni_id) || |
diff --git a/src/lib/libcrypto/evp/e_aes.c b/src/lib/libcrypto/evp/e_aes.c index 25199dca36..b20543a90c 100644 --- a/src/lib/libcrypto/evp/e_aes.c +++ b/src/lib/libcrypto/evp/e_aes.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: e_aes.c,v 1.30 2016/11/04 13:56:05 miod Exp $ */ | 1 | /* $OpenBSD: e_aes.c,v 1.31 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -150,10 +150,10 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len, | |||
| 150 | defined(_M_AMD64) || defined(_M_X64) || \ | 150 | defined(_M_AMD64) || defined(_M_X64) || \ |
| 151 | defined(__INTEL__) ) | 151 | defined(__INTEL__) ) |
| 152 | 152 | ||
| 153 | extern unsigned int OPENSSL_ia32cap_P[]; | 153 | #include "x86_arch.h" |
| 154 | 154 | ||
| 155 | #ifdef VPAES_ASM | 155 | #ifdef VPAES_ASM |
| 156 | #define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) | 156 | #define VPAES_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_SSSE3) |
| 157 | #endif | 157 | #endif |
| 158 | #ifdef BSAES_ASM | 158 | #ifdef BSAES_ASM |
| 159 | #define BSAES_CAPABLE VPAES_CAPABLE | 159 | #define BSAES_CAPABLE VPAES_CAPABLE |
| @@ -161,7 +161,7 @@ extern unsigned int OPENSSL_ia32cap_P[]; | |||
| 161 | /* | 161 | /* |
| 162 | * AES-NI section | 162 | * AES-NI section |
| 163 | */ | 163 | */ |
| 164 | #define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) | 164 | #define AESNI_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) |
| 165 | 165 | ||
| 166 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, | 166 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, |
| 167 | AES_KEY *key); | 167 | AES_KEY *key); |
diff --git a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c index 8574823aed..3f82cf5967 100644 --- a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c +++ b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.12 2016/05/04 15:01:33 tedu Exp $ */ | 1 | /* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -87,13 +87,12 @@ typedef struct { | |||
| 87 | defined(_M_AMD64) || defined(_M_X64) || \ | 87 | defined(_M_AMD64) || defined(_M_X64) || \ |
| 88 | defined(__INTEL__) ) | 88 | defined(__INTEL__) ) |
| 89 | 89 | ||
| 90 | #include "x86_arch.h" | ||
| 91 | |||
| 90 | #if defined(__GNUC__) && __GNUC__>=2 | 92 | #if defined(__GNUC__) && __GNUC__>=2 |
| 91 | # define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) | 93 | # define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) |
| 92 | #endif | 94 | #endif |
| 93 | 95 | ||
| 94 | extern unsigned int OPENSSL_ia32cap_P[2]; | ||
| 95 | #define AESNI_CAPABLE (1<<(57-32)) | ||
| 96 | |||
| 97 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); | 96 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); |
| 98 | int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); | 97 | int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); |
| 99 | 98 | ||
| @@ -578,14 +577,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = { | |||
| 578 | const EVP_CIPHER * | 577 | const EVP_CIPHER * |
| 579 | EVP_aes_128_cbc_hmac_sha1(void) | 578 | EVP_aes_128_cbc_hmac_sha1(void) |
| 580 | { | 579 | { |
| 581 | return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? | 580 | return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? |
| 582 | &aesni_128_cbc_hmac_sha1_cipher : NULL; | 581 | &aesni_128_cbc_hmac_sha1_cipher : NULL; |
| 583 | } | 582 | } |
| 584 | 583 | ||
| 585 | const EVP_CIPHER * | 584 | const EVP_CIPHER * |
| 586 | EVP_aes_256_cbc_hmac_sha1(void) | 585 | EVP_aes_256_cbc_hmac_sha1(void) |
| 587 | { | 586 | { |
| 588 | return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? | 587 | return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? |
| 589 | &aesni_256_cbc_hmac_sha1_cipher : NULL; | 588 | &aesni_256_cbc_hmac_sha1_cipher : NULL; |
| 590 | } | 589 | } |
| 591 | #else | 590 | #else |
diff --git a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c index 1f085af403..39527cafe6 100644 --- a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c +++ b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: e_rc4_hmac_md5.c,v 1.5 2014/08/11 13:29:43 bcook Exp $ */ | 1 | /* $OpenBSD: e_rc4_hmac_md5.c,v 1.6 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2011 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -105,6 +105,7 @@ rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *inkey, | |||
| 105 | defined(__INTEL__) ) && \ | 105 | defined(__INTEL__) ) && \ |
| 106 | !(defined(__APPLE__) && defined(__MACH__)) | 106 | !(defined(__APPLE__) && defined(__MACH__)) |
| 107 | #define STITCHED_CALL | 107 | #define STITCHED_CALL |
| 108 | #include "x86_arch.h" | ||
| 108 | #endif | 109 | #endif |
| 109 | 110 | ||
| 110 | #if !defined(STITCHED_CALL) | 111 | #if !defined(STITCHED_CALL) |
| @@ -122,7 +123,6 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
| 122 | md5_off = MD5_CBLOCK - key->md.num, | 123 | md5_off = MD5_CBLOCK - key->md.num, |
| 123 | blocks; | 124 | blocks; |
| 124 | unsigned int l; | 125 | unsigned int l; |
| 125 | extern unsigned int OPENSSL_ia32cap_P[]; | ||
| 126 | #endif | 126 | #endif |
| 127 | size_t plen = key->payload_length; | 127 | size_t plen = key->payload_length; |
| 128 | 128 | ||
| @@ -139,7 +139,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
| 139 | 139 | ||
| 140 | if (plen > md5_off && | 140 | if (plen > md5_off && |
| 141 | (blocks = (plen - md5_off) / MD5_CBLOCK) && | 141 | (blocks = (plen - md5_off) / MD5_CBLOCK) && |
| 142 | (OPENSSL_ia32cap_P[0]&(1 << 20)) == 0) { | 142 | (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { |
| 143 | MD5_Update(&key->md, in, md5_off); | 143 | MD5_Update(&key->md, in, md5_off); |
| 144 | RC4(&key->ks, rc4_off, in, out); | 144 | RC4(&key->ks, rc4_off, in, out); |
| 145 | 145 | ||
| @@ -187,7 +187,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
| 187 | rc4_off += MD5_CBLOCK; | 187 | rc4_off += MD5_CBLOCK; |
| 188 | 188 | ||
| 189 | if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && | 189 | if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && |
| 190 | (OPENSSL_ia32cap_P[0] & (1 << 20)) == 0) { | 190 | (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { |
| 191 | RC4(&key->ks, rc4_off, in, out); | 191 | RC4(&key->ks, rc4_off, in, out); |
| 192 | MD5_Update(&key->md, out, md5_off); | 192 | MD5_Update(&key->md, out, md5_off); |
| 193 | 193 | ||
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c index 6f8a8dd7f4..95ee755f83 100644 --- a/src/lib/libcrypto/modes/gcm128.c +++ b/src/lib/libcrypto/modes/gcm128.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: gcm128.c,v 1.14 2016/11/04 13:56:05 miod Exp $ */ | 1 | /* $OpenBSD: gcm128.c,v 1.15 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| @@ -637,13 +637,19 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) | |||
| 637 | 637 | ||
| 638 | #endif | 638 | #endif |
| 639 | 639 | ||
| 640 | #if defined(GHASH_ASM) && \ | ||
| 641 | (defined(__i386) || defined(__i386__) || \ | ||
| 642 | defined(__x86_64) || defined(__x86_64__) || \ | ||
| 643 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | ||
| 644 | #include "x86_arch.h" | ||
| 645 | #endif | ||
| 646 | |||
| 640 | #if TABLE_BITS==4 && defined(GHASH_ASM) | 647 | #if TABLE_BITS==4 && defined(GHASH_ASM) |
| 641 | # if (defined(__i386) || defined(__i386__) || \ | 648 | # if (defined(__i386) || defined(__i386__) || \ |
| 642 | defined(__x86_64) || defined(__x86_64__) || \ | 649 | defined(__x86_64) || defined(__x86_64__) || \ |
| 643 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | 650 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) |
| 644 | # define GHASH_ASM_X86_OR_64 | 651 | # define GHASH_ASM_X86_OR_64 |
| 645 | # define GCM_FUNCREF_4BIT | 652 | # define GCM_FUNCREF_4BIT |
| 646 | extern unsigned int OPENSSL_ia32cap_P[2]; | ||
| 647 | 653 | ||
| 648 | void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); | 654 | void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); |
| 649 | void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); | 655 | void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); |
| @@ -705,8 +711,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
| 705 | #elif TABLE_BITS==4 | 711 | #elif TABLE_BITS==4 |
| 706 | # if defined(GHASH_ASM_X86_OR_64) | 712 | # if defined(GHASH_ASM_X86_OR_64) |
| 707 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) | 713 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) |
| 708 | if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ | 714 | /* check FXSR and PCLMULQDQ bits */ |
| 709 | OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ | 715 | if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == |
| 716 | (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { | ||
| 710 | gcm_init_clmul(ctx->Htable,ctx->H.u); | 717 | gcm_init_clmul(ctx->Htable,ctx->H.u); |
| 711 | ctx->gmult = gcm_gmult_clmul; | 718 | ctx->gmult = gcm_gmult_clmul; |
| 712 | ctx->ghash = gcm_ghash_clmul; | 719 | ctx->ghash = gcm_ghash_clmul; |
| @@ -716,9 +723,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
| 716 | gcm_init_4bit(ctx->Htable,ctx->H.u); | 723 | gcm_init_4bit(ctx->Htable,ctx->H.u); |
| 717 | # if defined(GHASH_ASM_X86) /* x86 only */ | 724 | # if defined(GHASH_ASM_X86) /* x86 only */ |
| 718 | # if defined(OPENSSL_IA32_SSE2) | 725 | # if defined(OPENSSL_IA32_SSE2) |
| 719 | if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ | 726 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ |
| 720 | # else | 727 | # else |
| 721 | if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ | 728 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */ |
| 722 | # endif | 729 | # endif |
| 723 | ctx->gmult = gcm_gmult_4bit_mmx; | 730 | ctx->gmult = gcm_gmult_4bit_mmx; |
| 724 | ctx->ghash = gcm_ghash_4bit_mmx; | 731 | ctx->ghash = gcm_ghash_4bit_mmx; |
diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl index 4bd53da33d..a8393d2730 100755 --- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl +++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl | |||
| @@ -393,7 +393,7 @@ my %globals; | |||
| 393 | } | 393 | } |
| 394 | } | 394 | } |
| 395 | } | 395 | } |
| 396 | { package expr; # pick up expressioins | 396 | { package expr; # pick up expressions |
| 397 | sub re { | 397 | sub re { |
| 398 | my $self = shift; # single instance is enough... | 398 | my $self = shift; # single instance is enough... |
| 399 | local *line = shift; | 399 | local *line = shift; |
| @@ -777,6 +777,8 @@ ___ | |||
| 777 | OPTION DOTNAME | 777 | OPTION DOTNAME |
| 778 | ___ | 778 | ___ |
| 779 | } | 779 | } |
| 780 | print "#include \"x86_arch.h\"\n"; | ||
| 781 | |||
| 780 | while($line=<>) { | 782 | while($line=<>) { |
| 781 | 783 | ||
| 782 | chomp($line); | 784 | chomp($line); |
diff --git a/src/lib/libcrypto/perlasm/x86asm.pl b/src/lib/libcrypto/perlasm/x86asm.pl index 5916ea4f89..e039382e00 100644 --- a/src/lib/libcrypto/perlasm/x86asm.pl +++ b/src/lib/libcrypto/perlasm/x86asm.pl | |||
| @@ -248,6 +248,7 @@ EOF | |||
| 248 | $pic=0; | 248 | $pic=0; |
| 249 | for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } | 249 | for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } |
| 250 | 250 | ||
| 251 | ::emitraw("#include \"x86_arch.h\"\n"); | ||
| 251 | ::emitraw("#include <machine/asm.h>\n") if $openbsd; | 252 | ::emitraw("#include <machine/asm.h>\n") if $openbsd; |
| 252 | $filename =~ s/\.pl$//; | 253 | $filename =~ s/\.pl$//; |
| 253 | &file($filename); | 254 | &file($filename); |
diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl index d4baea514b..84d24edbbd 100644 --- a/src/lib/libcrypto/perlasm/x86gas.pl +++ b/src/lib/libcrypto/perlasm/x86gas.pl | |||
| @@ -157,10 +157,8 @@ sub ::file_end | |||
| 157 | } | 157 | } |
| 158 | } | 158 | } |
| 159 | if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { | 159 | if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { |
| 160 | my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; | 160 | push (@out, ".extern\t${nmdecor}OPENSSL_ia32cap_P\n"); |
| 161 | if ($::macosx) { push (@out,"$tmp,2\n"); } | 161 | push (@out, ".hidden\t${nmdecor}OPENSSL_ia32cap_P\n"); |
| 162 | elsif ($::elf) { push (@out,"$tmp,4\n"); } | ||
| 163 | else { push (@out,"$tmp\n"); } | ||
| 164 | } | 162 | } |
| 165 | push(@out,$initseg) if ($initseg); | 163 | push(@out,$initseg) if ($initseg); |
| 166 | } | 164 | } |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index 84f1a798cb..03f0cff467 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl | |||
| @@ -189,7 +189,8 @@ if ($alt=0) { | |||
| 189 | &jz (&label("go4loop4")); | 189 | &jz (&label("go4loop4")); |
| 190 | 190 | ||
| 191 | &picmeup($out,"OPENSSL_ia32cap_P"); | 191 | &picmeup($out,"OPENSSL_ia32cap_P"); |
| 192 | &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] | 192 | # check SSE2 bit [could have been MMX] |
| 193 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); | ||
| 193 | &jnc (&label("go4loop4")); | 194 | &jnc (&label("go4loop4")); |
| 194 | 195 | ||
| 195 | &mov ($out,&wparam(3)) if (!$alt); | 196 | &mov ($out,&wparam(3)) if (!$alt); |
| @@ -312,7 +313,7 @@ $idx="edx"; | |||
| 312 | &xor ("eax","eax"); | 313 | &xor ("eax","eax"); |
| 313 | &mov (&DWP(-4,$out),$idi); # borrow key->y | 314 | &mov (&DWP(-4,$out),$idi); # borrow key->y |
| 314 | 315 | ||
| 315 | &bt (&DWP(0,$idx),20); # check for bit#20 | 316 | &bt (&DWP(0,$idx),"\$IA32CAP_BIT0_INTELP4"); |
| 316 | &jc (&label("c1stloop")); | 317 | &jc (&label("c1stloop")); |
| 317 | 318 | ||
| 318 | &set_label("w1stloop",16); | 319 | &set_label("w1stloop",16); |
| @@ -388,9 +389,9 @@ $idx="edx"; | |||
| 388 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); | 389 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); |
| 389 | &picmeup("edx","OPENSSL_ia32cap_P"); | 390 | &picmeup("edx","OPENSSL_ia32cap_P"); |
| 390 | &mov ("edx",&DWP(0,"edx")); | 391 | &mov ("edx",&DWP(0,"edx")); |
| 391 | &bt ("edx",20); | 392 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); |
| 392 | &jc (&label("1xchar")); | 393 | &jc (&label("1xchar")); |
| 393 | &bt ("edx",26); | 394 | &bt ("edx","\$IA32CAP_BIT0_SSE2"); |
| 394 | &jnc (&label("ret")); | 395 | &jnc (&label("ret")); |
| 395 | &add ("eax",25); | 396 | &add ("eax",25); |
| 396 | &ret (); | 397 | &ret (); |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl index 197749dda7..2135b38ef8 100755 --- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | |||
| @@ -122,6 +122,7 @@ $out="%rcx"; # arg4 | |||
| 122 | $code=<<___; | 122 | $code=<<___; |
| 123 | .text | 123 | .text |
| 124 | .extern OPENSSL_ia32cap_P | 124 | .extern OPENSSL_ia32cap_P |
| 125 | .hidden OPENSSL_ia32cap_P | ||
| 125 | 126 | ||
| 126 | .globl RC4 | 127 | .globl RC4 |
| 127 | .type RC4,\@function,4 | 128 | .type RC4,\@function,4 |
| @@ -164,7 +165,7 @@ $code.=<<___; | |||
| 164 | movl ($dat,$XX[0],4),$TX[0]#d | 165 | movl ($dat,$XX[0],4),$TX[0]#d |
| 165 | test \$-16,$len | 166 | test \$-16,$len |
| 166 | jz .Lloop1 | 167 | jz .Lloop1 |
| 167 | bt \$30,%r8d # Intel CPU? | 168 | bt \$IA32CAP_BIT0_INTEL,%r8d # Intel CPU? |
| 168 | jc .Lintel | 169 | jc .Lintel |
| 169 | and \$7,$TX[1] | 170 | and \$7,$TX[1] |
| 170 | lea 1($XX[0]),$XX[1] | 171 | lea 1($XX[0]),$XX[1] |
| @@ -442,7 +443,7 @@ RC4_set_key: | |||
| 442 | xor %r11,%r11 | 443 | xor %r11,%r11 |
| 443 | 444 | ||
| 444 | mov OPENSSL_ia32cap_P(%rip),$idx#d | 445 | mov OPENSSL_ia32cap_P(%rip),$idx#d |
| 445 | bt \$20,$idx#d # RC4_CHAR? | 446 | bt \$IA32CAP_BIT0_INTELP4,$idx#d # RC4_CHAR? |
| 446 | jc .Lc1stloop | 447 | jc .Lc1stloop |
| 447 | jmp .Lw1stloop | 448 | jmp .Lw1stloop |
| 448 | 449 | ||
| @@ -506,9 +507,9 @@ RC4_set_key: | |||
| 506 | RC4_options: | 507 | RC4_options: |
| 507 | lea .Lopts(%rip),%rax | 508 | lea .Lopts(%rip),%rax |
| 508 | mov OPENSSL_ia32cap_P(%rip),%edx | 509 | mov OPENSSL_ia32cap_P(%rip),%edx |
| 509 | bt \$20,%edx | 510 | bt \$IA32CAP_BIT0_INTELP4,%edx |
| 510 | jc .L8xchar | 511 | jc .L8xchar |
| 511 | bt \$30,%edx | 512 | bt \$IA32CAP_BIT0_INTEL,%edx |
| 512 | jnc .Ldone | 513 | jnc .Ldone |
| 513 | add \$25,%rax | 514 | add \$25,%rax |
| 514 | ret | 515 | ret |
diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl index 6fbea34d78..d29ed84706 100644 --- a/src/lib/libcrypto/sha/asm/sha1-586.pl +++ b/src/lib/libcrypto/sha/asm/sha1-586.pl | |||
| @@ -303,15 +303,15 @@ if ($xmm) { | |||
| 303 | 303 | ||
| 304 | &mov ($A,&DWP(0,$T)); | 304 | &mov ($A,&DWP(0,$T)); |
| 305 | &mov ($D,&DWP(4,$T)); | 305 | &mov ($D,&DWP(4,$T)); |
| 306 | &test ($D,1<<9); # check SSSE3 bit | 306 | &test ($D,"\$IA32CAP_MASK1_SSSE3"); # check SSSE3 bit |
| 307 | &jz (&label("x86")); | 307 | &jz (&label("x86")); |
| 308 | &test ($A,1<<24); # check FXSR bit | 308 | &test ($A,"\$IA32CAP_MASK0_FXSR"); # check FXSR bit |
| 309 | &jz (&label("x86")); | 309 | &jz (&label("x86")); |
| 310 | if ($ymm) { | 310 | if ($ymm) { |
| 311 | &and ($D,1<<28); # mask AVX bit | 311 | &and ($D,"\$IA32CAP_MASK1_AVX"); # mask AVX bit |
| 312 | &and ($A,1<<30); # mask "Intel CPU" bit | 312 | &and ($A,"\$IA32CAP_MASK0_INTEL"); # mask "Intel CPU" bit |
| 313 | &or ($A,$D); | 313 | &or ($A,$D); |
| 314 | &cmp ($A,1<<28|1<<30); | 314 | &cmp ($A,"\$(IA32CAP_MASK1_AVX | IA32CAP_MASK0_INTEL)"); |
| 315 | &je (&label("avx_shortcut")); | 315 | &je (&label("avx_shortcut")); |
| 316 | } | 316 | } |
| 317 | &jmp (&label("ssse3_shortcut")); | 317 | &jmp (&label("ssse3_shortcut")); |
diff --git a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl index f15c7ec39b..147d21570b 100755 --- a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl +++ b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl | |||
| @@ -216,6 +216,7 @@ unshift(@xi,pop(@xi)); | |||
| 216 | $code.=<<___; | 216 | $code.=<<___; |
| 217 | .text | 217 | .text |
| 218 | .extern OPENSSL_ia32cap_P | 218 | .extern OPENSSL_ia32cap_P |
| 219 | .hidden OPENSSL_ia32cap_P | ||
| 219 | 220 | ||
| 220 | .globl sha1_block_data_order | 221 | .globl sha1_block_data_order |
| 221 | .type sha1_block_data_order,\@function,3 | 222 | .type sha1_block_data_order,\@function,3 |
| @@ -223,14 +224,14 @@ $code.=<<___; | |||
| 223 | sha1_block_data_order: | 224 | sha1_block_data_order: |
| 224 | mov OPENSSL_ia32cap_P+0(%rip),%r9d | 225 | mov OPENSSL_ia32cap_P+0(%rip),%r9d |
| 225 | mov OPENSSL_ia32cap_P+4(%rip),%r8d | 226 | mov OPENSSL_ia32cap_P+4(%rip),%r8d |
| 226 | test \$`1<<9`,%r8d # check SSSE3 bit | 227 | test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit |
| 227 | jz .Lialu | 228 | jz .Lialu |
| 228 | ___ | 229 | ___ |
| 229 | $code.=<<___ if ($avx); | 230 | $code.=<<___ if ($avx); |
| 230 | and \$`1<<28`,%r8d # mask AVX bit | 231 | and \$IA32CAP_MASK1_AVX,%r8d # mask AVX bit |
| 231 | and \$`1<<30`,%r9d # mask "Intel CPU" bit | 232 | and \$IA32CAP_MASK0_INTEL,%r9d # mask "Intel CPU" bit |
| 232 | or %r9d,%r8d | 233 | or %r9d,%r8d |
| 233 | cmp \$`1<<28|1<<30`,%r8d | 234 | cmp \$(IA32CAP_MASK0_INTEL | IA32CAP_MASK1_AVX),%r8d |
| 234 | je _avx_shortcut | 235 | je _avx_shortcut |
| 235 | ___ | 236 | ___ |
| 236 | $code.=<<___; | 237 | $code.=<<___; |
diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl index 7eab6a5b88..163361ebe9 100644 --- a/src/lib/libcrypto/sha/asm/sha512-586.pl +++ b/src/lib/libcrypto/sha/asm/sha512-586.pl | |||
| @@ -284,7 +284,7 @@ sub BODY_00_15_x86 { | |||
| 284 | 284 | ||
| 285 | if ($sse2) { | 285 | if ($sse2) { |
| 286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); | 286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); |
| 287 | &bt (&DWP(0,"edx"),26); | 287 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); |
| 288 | &jnc (&label("loop_x86")); | 288 | &jnc (&label("loop_x86")); |
| 289 | 289 | ||
| 290 | # load ctx->h[0-7] | 290 | # load ctx->h[0-7] |
diff --git a/src/lib/libcrypto/whrlpool/wp_block.c b/src/lib/libcrypto/whrlpool/wp_block.c index d8c1b89ba3..1e00a01330 100644 --- a/src/lib/libcrypto/whrlpool/wp_block.c +++ b/src/lib/libcrypto/whrlpool/wp_block.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: wp_block.c,v 1.12 2016/09/04 14:06:46 jsing Exp $ */ | 1 | /* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ |
| 2 | /** | 2 | /** |
| 3 | * The Whirlpool hashing function. | 3 | * The Whirlpool hashing function. |
| 4 | * | 4 | * |
| @@ -36,10 +36,12 @@ | |||
| 36 | * | 36 | * |
| 37 | */ | 37 | */ |
| 38 | 38 | ||
| 39 | #include "wp_locl.h" | ||
| 40 | #include <string.h> | 39 | #include <string.h> |
| 40 | #include <openssl/crypto.h> | ||
| 41 | #include <machine/endian.h> | 41 | #include <machine/endian.h> |
| 42 | 42 | ||
| 43 | #include "wp_locl.h" | ||
| 44 | |||
| 43 | typedef unsigned char u8; | 45 | typedef unsigned char u8; |
| 44 | #if defined(_LP64) | 46 | #if defined(_LP64) |
| 45 | typedef unsigned long u64; | 47 | typedef unsigned long u64; |
| @@ -57,12 +59,15 @@ typedef unsigned long long u64; | |||
| 57 | # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX | 59 | # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX |
| 58 | CPUs this is actually faster! */ | 60 | CPUs this is actually faster! */ |
| 59 | # endif | 61 | # endif |
| 60 | # define GO_FOR_MMX(ctx,inp,num) do { \ | 62 | #include "x86_arch.h" |
| 61 | extern unsigned int OPENSSL_ia32cap_P[]; \ | 63 | # define GO_FOR_MMX(ctx,inp,num) \ |
| 64 | do { \ | ||
| 62 | void whirlpool_block_mmx(void *,const void *,size_t); \ | 65 | void whirlpool_block_mmx(void *,const void *,size_t); \ |
| 63 | if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \ | 66 | if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \ |
| 64 | whirlpool_block_mmx(ctx->H.c,inp,num); return; \ | 67 | break; \ |
| 65 | } while (0) | 68 | whirlpool_block_mmx(ctx->H.c,inp,num); \ |
| 69 | return; \ | ||
| 70 | } while (0) | ||
| 66 | # endif | 71 | # endif |
| 67 | #elif defined(__arm__) | 72 | #elif defined(__arm__) |
| 68 | # define SMALL_REGISTER_BANK | 73 | # define SMALL_REGISTER_BANK |
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index b36d3f7dc5..6558dedb6b 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
| @@ -20,8 +20,8 @@ print<<___; | |||
| 20 | .section .init | 20 | .section .init |
| 21 | call OPENSSL_cpuid_setup | 21 | call OPENSSL_cpuid_setup |
| 22 | 22 | ||
| 23 | .extern OPENSSL_ia32cap_P | ||
| 23 | .hidden OPENSSL_ia32cap_P | 24 | .hidden OPENSSL_ia32cap_P |
| 24 | .comm OPENSSL_ia32cap_P,8,4 | ||
| 25 | 25 | ||
| 26 | .text | 26 | .text |
| 27 | 27 | ||
| @@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid: | |||
| 80 | mov %eax,%r10d | 80 | mov %eax,%r10d |
| 81 | mov \$0x80000001,%eax | 81 | mov \$0x80000001,%eax |
| 82 | cpuid | 82 | cpuid |
| 83 | or %ecx,%r9d | 83 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit |
| 84 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | 84 | or \$1,%r9d # make sure %r9d is not zero |
| 85 | 85 | ||
| 86 | cmp \$0x80000008,%r10d | 86 | cmp \$0x80000008,%r10d |
| 87 | jb .Lintel | 87 | jb .Lintel |
| @@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid: | |||
| 93 | 93 | ||
| 94 | mov \$1,%eax | 94 | mov \$1,%eax |
| 95 | cpuid | 95 | cpuid |
| 96 | bt \$28,%edx # test hyper-threading bit | 96 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
| 97 | jnc .Lgeneric | 97 | jnc .Lgeneric |
| 98 | shr \$16,%ebx # number of logical processors | 98 | shr \$16,%ebx # number of logical processors |
| 99 | cmp %r10b,%bl | 99 | cmp %r10b,%bl |
| 100 | ja .Lgeneric | 100 | ja .Lgeneric |
| 101 | and \$0xefffffff,%edx # ~(1<<28) | 101 | xor \$IA32CAP_MASK0_HT,%edx |
| 102 | jmp .Lgeneric | 102 | jmp .Lgeneric |
| 103 | 103 | ||
| 104 | .Lintel: | 104 | .Lintel: |
| @@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid: | |||
| 116 | .Lnocacheinfo: | 116 | .Lnocacheinfo: |
| 117 | mov \$1,%eax | 117 | mov \$1,%eax |
| 118 | cpuid | 118 | cpuid |
| 119 | and \$0xbfefffff,%edx # force reserved bits to 0 | 119 | # force reserved bits to 0 |
| 120 | and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx | ||
| 120 | cmp \$0,%r9d | 121 | cmp \$0,%r9d |
| 121 | jne .Lnotintel | 122 | jne .Lnotintel |
| 122 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs | 123 | # set reserved bit#30 on Intel CPUs |
| 124 | or \$IA32CAP_MASK0_INTEL,%edx | ||
| 123 | and \$15,%ah | 125 | and \$15,%ah |
| 124 | cmp \$15,%ah # examine Family ID | 126 | cmp \$15,%ah # examine Family ID |
| 125 | jne .Lnotintel | 127 | jne .Lnotintel |
| 126 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR | 128 | # set reserved bit#20 to engage RC4_CHAR |
| 129 | or \$IA32CAP_MASK0_INTELP4,%edx | ||
| 127 | .Lnotintel: | 130 | .Lnotintel: |
| 128 | bt \$28,%edx # test hyper-threading bit | 131 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
| 129 | jnc .Lgeneric | 132 | jnc .Lgeneric |
| 130 | and \$0xefffffff,%edx # ~(1<<28) | 133 | xor \$IA32CAP_MASK0_HT,%edx |
| 131 | cmp \$0,%r10d | 134 | cmp \$0,%r10d |
| 132 | je .Lgeneric | 135 | je .Lgeneric |
| 133 | 136 | ||
| 134 | or \$0x10000000,%edx # 1<<28 | 137 | or \$IA32CAP_MASK0_HT,%edx |
| 135 | shr \$16,%ebx | 138 | shr \$16,%ebx |
| 136 | cmp \$1,%bl # see if cache is shared | 139 | cmp \$1,%bl # see if cache is shared |
| 137 | ja .Lgeneric | 140 | ja .Lgeneric |
| 138 | and \$0xefffffff,%edx # ~(1<<28) | 141 | xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not |
| 142 | |||
| 139 | .Lgeneric: | 143 | .Lgeneric: |
| 140 | and \$0x00000800,%r9d # isolate AMD XOP flag | 144 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag |
| 141 | and \$0xfffff7ff,%ecx | 145 | and \$(~IA32CAP_MASK1_AMD_XOP),%ecx |
| 142 | or %ecx,%r9d # merge AMD XOP flag | 146 | or %ecx,%r9d # merge AMD XOP flag |
| 143 | 147 | ||
| 144 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | 148 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
| 145 | bt \$27,%r9d # check OSXSAVE bit | 149 | bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit |
| 146 | jnc .Lclear_avx | 150 | jnc .Lclear_avx |
| 147 | xor %ecx,%ecx # XCR0 | 151 | xor %ecx,%ecx # XCR0 |
| 148 | .byte 0x0f,0x01,0xd0 # xgetbv | 152 | .byte 0x0f,0x01,0xd0 # xgetbv |
| @@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid: | |||
| 150 | cmp \$6,%eax | 154 | cmp \$6,%eax |
| 151 | je .Ldone | 155 | je .Ldone |
| 152 | .Lclear_avx: | 156 | .Lclear_avx: |
| 153 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | 157 | mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax |
| 154 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | 158 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
| 155 | .Ldone: | 159 | .Ldone: |
| 156 | shl \$32,%r9 | 160 | shl \$32,%r9 |
diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h new file mode 100644 index 0000000000..5b2cf97546 --- /dev/null +++ b/src/lib/libcrypto/x86_arch.h | |||
| @@ -0,0 +1,90 @@ | |||
| 1 | /* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */ | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2016 Miodrag Vallat. | ||
| 4 | * | ||
| 5 | * Permission to use, copy, modify, and distribute this software for any | ||
| 6 | * purpose with or without fee is hereby granted, provided that the above | ||
| 7 | * copyright notice and this permission notice appear in all copies. | ||
| 8 | * | ||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
| 16 | */ | ||
| 17 | |||
| 18 | /* | ||
| 19 | * The knowledge of the layout of OPENSSL_ia32cap_P is internal to libcrypto | ||
| 20 | * (and, to some extent, to libssl), and may change in the future without | ||
| 21 | * notice. | ||
| 22 | */ | ||
| 23 | |||
| 24 | /* | ||
| 25 | * OPENSSL_ia32cap_P is computed at runtime by OPENSSL_ia32_cpuid(). | ||
| 26 | * | ||
| 27 | * On processors which lack the cpuid instruction, the value is always | ||
| 28 | * zero (this only matters on 32-bit processors, of course). | ||
| 29 | * | ||
| 30 | * On processors which support the cpuid instruction, after running | ||
| 31 | * "cpuid 1", the value of %edx is written to the low word of OPENSSL_ia32cap_P, | ||
| 32 | * and the value of %ecx is written to its high word. | ||
| 33 | * | ||
| 34 | * Further processing is done to set or clear specific bits, depending | ||
| 35 | * upon the exact processor type. | ||
| 36 | * | ||
| 37 | * Assembly routines usually address OPENSSL_ia32cap_P as two 32-bit words, | ||
| 38 | * hence two sets of bit numbers and masks. OPENSSL_cpu_caps() returns the | ||
| 39 | * complete 64-bit word. | ||
| 40 | */ | ||
| 41 | |||
| 42 | /* bit numbers for the low word */ | ||
| 43 | #define IA32CAP_BIT0_FPU 0 | ||
| 44 | #define IA32CAP_BIT0_MMX 23 | ||
| 45 | #define IA32CAP_BIT0_FXSR 24 | ||
| 46 | #define IA32CAP_BIT0_SSE 25 | ||
| 47 | #define IA32CAP_BIT0_SSE2 26 | ||
| 48 | #define IA32CAP_BIT0_HT 28 | ||
| 49 | |||
| 50 | /* the following bits are not obtained from cpuid */ | ||
| 51 | #define IA32CAP_BIT0_INTELP4 20 | ||
| 52 | #define IA32CAP_BIT0_INTEL 30 | ||
| 53 | |||
| 54 | /* bit numbers for the high word */ | ||
| 55 | #define IA32CAP_BIT1_PCLMUL 1 | ||
| 56 | #define IA32CAP_BIT1_SSSE3 9 | ||
| 57 | #define IA32CAP_BIT1_FMA3 12 | ||
| 58 | #define IA32CAP_BIT1_AESNI 25 | ||
| 59 | #define IA32CAP_BIT1_OSXSAVE 27 | ||
| 60 | #define IA32CAP_BIT1_AVX 28 | ||
| 61 | |||
| 62 | #define IA32CAP_BIT1_AMD_XOP 11 | ||
| 63 | |||
| 64 | /* bit masks for the low word */ | ||
| 65 | #define IA32CAP_MASK0_MMX (1 << IA32CAP_BIT0_MMX) | ||
| 66 | #define IA32CAP_MASK0_FXSR (1 << IA32CAP_BIT0_FXSR) | ||
| 67 | #define IA32CAP_MASK0_SSE (1 << IA32CAP_BIT0_SSE) | ||
| 68 | #define IA32CAP_MASK0_SSE2 (1 << IA32CAP_BIT0_SSE2) | ||
| 69 | #define IA32CAP_MASK0_HT (1 << IA32CAP_BIT0_HT) | ||
| 70 | |||
| 71 | #define IA32CAP_MASK0_INTELP4 (1 << IA32CAP_BIT0_INTELP4) | ||
| 72 | #define IA32CAP_MASK0_INTEL (1 << IA32CAP_BIT0_INTEL) | ||
| 73 | |||
| 74 | /* bit masks for the high word */ | ||
| 75 | #define IA32CAP_MASK1_PCLMUL (1 << IA32CAP_BIT1_PCLMUL) | ||
| 76 | #define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) | ||
| 77 | #define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) | ||
| 78 | #define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) | ||
| 79 | #define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) | ||
| 80 | |||
| 81 | #define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) | ||
| 82 | |||
| 83 | /* bit masks for OPENSSL_cpu_caps() */ | ||
| 84 | #define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX | ||
| 85 | #define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR | ||
| 86 | #define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE | ||
| 87 | #define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 | ||
| 88 | #define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) | ||
| 89 | #define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) | ||
| 90 | #define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) | ||
diff --git a/src/lib/libcrypto/x86cpuid.pl b/src/lib/libcrypto/x86cpuid.pl index 7918629f64..8b9570fc72 100644 --- a/src/lib/libcrypto/x86cpuid.pl +++ b/src/lib/libcrypto/x86cpuid.pl | |||
| @@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 56 | &mov ("esi","eax"); | 56 | &mov ("esi","eax"); |
| 57 | &mov ("eax",0x80000001); | 57 | &mov ("eax",0x80000001); |
| 58 | &cpuid (); | 58 | &cpuid (); |
| 59 | &or ("ebp","ecx"); | 59 | &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit |
| 60 | &and ("ebp",1<<11|1); # isolate XOP bit | 60 | &or ("ecx",1); # make sure ecx is not zero |
| 61 | &mov ("ebp","ecx"); | ||
| 62 | |||
| 61 | &cmp ("esi",0x80000008); | 63 | &cmp ("esi",0x80000008); |
| 62 | &jb (&label("intel")); | 64 | &jb (&label("intel")); |
| 63 | 65 | ||
| @@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 69 | &mov ("eax",1); | 71 | &mov ("eax",1); |
| 70 | &xor ("ecx","ecx"); | 72 | &xor ("ecx","ecx"); |
| 71 | &cpuid (); | 73 | &cpuid (); |
| 72 | &bt ("edx",28); | 74 | &bt ("edx","\$IA32CAP_BIT0_HT"); |
| 73 | &jnc (&label("generic")); | 75 | &jnc (&label("generic")); |
| 74 | &shr ("ebx",16); | 76 | &shr ("ebx",16); |
| 75 | &and ("ebx",0xff); | 77 | &and ("ebx",0xff); |
| 76 | &cmp ("ebx","esi"); | 78 | &cmp ("ebx","esi"); |
| 77 | &ja (&label("generic")); | 79 | &ja (&label("generic")); |
| 78 | &and ("edx",0xefffffff); # clear hyper-threading bit | 80 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit |
| 79 | &jmp (&label("generic")); | 81 | &jmp (&label("generic")); |
| 80 | 82 | ||
| 81 | &set_label("intel"); | 83 | &set_label("intel"); |
| @@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 94 | &mov ("eax",1); | 96 | &mov ("eax",1); |
| 95 | &xor ("ecx","ecx"); | 97 | &xor ("ecx","ecx"); |
| 96 | &cpuid (); | 98 | &cpuid (); |
| 97 | &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 | 99 | # force reserved bits to 0. |
| 100 | &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)"); | ||
| 98 | &cmp ("ebp",0); | 101 | &cmp ("ebp",0); |
| 99 | &jne (&label("notintel")); | 102 | &jne (&label("notintel")); |
| 100 | &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs | 103 | # set reserved bit#30 on Intel CPUs |
| 101 | &and (&HB("eax"),15); # familiy ID | 104 | &or ("edx","\$IA32CAP_MASK0_INTEL"); |
| 105 | &and (&HB("eax"),15); # family ID | ||
| 102 | &cmp (&HB("eax"),15); # P4? | 106 | &cmp (&HB("eax"),15); # P4? |
| 103 | &jne (&label("notintel")); | 107 | &jne (&label("notintel")); |
| 104 | &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR | 108 | # set reserved bit#20 to engage RC4_CHAR |
| 109 | &or ("edx","\$IA32CAP_MASK0_INTELP4"); | ||
| 105 | &set_label("notintel"); | 110 | &set_label("notintel"); |
| 106 | &bt ("edx",28); # test hyper-threading bit | 111 | &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit |
| 107 | &jnc (&label("generic")); | 112 | &jnc (&label("generic")); |
| 108 | &and ("edx",0xefffffff); | 113 | &xor ("edx","\$IA32CAP_MASK0_HT"); |
| 109 | &cmp ("edi",0); | 114 | &cmp ("edi",0); |
| 110 | &je (&label("generic")); | 115 | &je (&label("generic")); |
| 111 | 116 | ||
| 112 | &or ("edx",0x10000000); | 117 | &or ("edx","\$IA32CAP_MASK0_HT"); |
| 113 | &shr ("ebx",16); | 118 | &shr ("ebx",16); |
| 114 | &cmp (&LB("ebx"),1); | 119 | &cmp (&LB("ebx"),1); # see if cache is shared |
| 115 | &ja (&label("generic")); | 120 | &ja (&label("generic")); |
| 116 | &and ("edx",0xefffffff); # clear hyper-threading bit if not | 121 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not |
| 117 | 122 | ||
| 118 | &set_label("generic"); | 123 | &set_label("generic"); |
| 119 | &and ("ebp",1<<11); # isolate AMD XOP flag | 124 | &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag |
| 120 | &and ("ecx",0xfffff7ff); # force 11th bit to 0 | 125 | # force reserved bits to 0. |
| 126 | &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP"); | ||
| 121 | &mov ("esi","edx"); | 127 | &mov ("esi","edx"); |
| 122 | &or ("ebp","ecx"); # merge AMD XOP flag | 128 | &or ("ebp","ecx"); # merge AMD XOP flag |
| 123 | 129 | ||
| 124 | &bt ("ecx",27); # check OSXSAVE bit | 130 | &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit |
| 125 | &jnc (&label("clear_avx")); | 131 | &jnc (&label("clear_avx")); |
| 126 | &xor ("ecx","ecx"); | 132 | &xor ("ecx","ecx"); |
| 127 | &data_byte(0x0f,0x01,0xd0); # xgetbv | 133 | &data_byte(0x0f,0x01,0xd0); # xgetbv |
| @@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 131 | &cmp ("eax",2); | 137 | &cmp ("eax",2); |
| 132 | &je (&label("clear_avx")); | 138 | &je (&label("clear_avx")); |
| 133 | &set_label("clear_xmm"); | 139 | &set_label("clear_xmm"); |
| 134 | &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits | 140 | # clear AESNI and PCLMULQDQ bits. |
| 135 | &and ("esi",0xfeffffff); # clear FXSR | 141 | &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)"); |
| 142 | # clear FXSR. | ||
| 143 | &and ("esi","\$~IA32CAP_MASK0_FXSR"); | ||
| 136 | &set_label("clear_avx"); | 144 | &set_label("clear_avx"); |
| 137 | &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits | 145 | # clear AVX, FMA3 and AMD XOP bits. |
| 146 | &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)"); | ||
| 138 | &set_label("done"); | 147 | &set_label("done"); |
| 139 | &mov ("eax","esi"); | 148 | &mov ("eax","esi"); |
| 140 | &mov ("edx","ebp"); | 149 | &mov ("edx","ebp"); |
| @@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 143 | 152 | ||
| 144 | &external_label("OPENSSL_ia32cap_P"); | 153 | &external_label("OPENSSL_ia32cap_P"); |
| 145 | 154 | ||
| 146 | &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); | 155 | &function_begin_B("OPENSSL_wipe_cpu",""); |
| 147 | &xor ("eax","eax"); | 156 | &xor ("eax","eax"); |
| 148 | &xor ("edx","edx"); | 157 | &xor ("edx","edx"); |
| 149 | &picmeup("ecx","OPENSSL_ia32cap_P"); | 158 | &picmeup("ecx","OPENSSL_ia32cap_P"); |
| 150 | &mov ("ecx",&DWP(0,"ecx")); | 159 | &mov ("ecx",&DWP(0,"ecx")); |
| 151 | &bt (&DWP(0,"ecx"),0); | 160 | &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU"); |
| 152 | &jnc (&label("no_x87")); | 161 | &jnc (&label("no_x87")); |
| 153 | if ($sse2) { | 162 | if ($sse2) { |
| 154 | &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits | 163 | # Check SSE2 and FXSR bits. |
| 155 | &cmp ("ecx",1<<26|1<<24); | 164 | &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); |
| 165 | &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); | ||
| 156 | &jne (&label("no_sse2")); | 166 | &jne (&label("no_sse2")); |
| 157 | &pxor ("xmm0","xmm0"); | 167 | &pxor ("xmm0","xmm0"); |
| 158 | &pxor ("xmm1","xmm1"); | 168 | &pxor ("xmm1","xmm1"); |
