diff options
author | miod <> | 2016-11-04 17:30:30 +0000 |
---|---|---|
committer | miod <> | 2016-11-04 17:30:30 +0000 |
commit | 723502d9588ba0e1cc08af1b12654917da74d440 (patch) | |
tree | 77b413175d422148cfb0ef7b2062340230aa5413 /src/lib | |
parent | 391f8ce21bb7929810460a73e2fde2c80540848d (diff) | |
download | openbsd-723502d9588ba0e1cc08af1b12654917da74d440.tar.gz openbsd-723502d9588ba0e1cc08af1b12654917da74d440.tar.bz2 openbsd-723502d9588ba0e1cc08af1b12654917da74d440.zip |
Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by
meaningful constants in a private header file, so that reviewers can actually
get a chance to figure out what the code is attempting to do without knowing
all cpuid bits.
While there, turn it from an array of two 32-bit ints into a properly aligned
64-bit int.
Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will
now always use OPENSSL_cpu_caps() and check for the proper bits in the
whole 64-bit word it returns.
i386 tests and ok jsing@
Diffstat (limited to 'src/lib')
26 files changed, 245 insertions, 146 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index aab40e6f1c..3ba8a26eaa 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl | |||
@@ -1187,7 +1187,7 @@ sub enclast() | |||
1187 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); | 1187 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); |
1188 | 1188 | ||
1189 | if (!$x86only) { | 1189 | if (!$x86only) { |
1190 | &bt (&DWP(0,$s0),25); # check for SSE bit | 1190 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
1191 | &jnc (&label("x86")); | 1191 | &jnc (&label("x86")); |
1192 | 1192 | ||
1193 | &movq ("mm0",&QWP(0,$acc)); | 1193 | &movq ("mm0",&QWP(0,$acc)); |
@@ -1976,7 +1976,7 @@ sub declast() | |||
1976 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); | 1976 | &lea ($tbl,&DWP(2048+128,$tbl,$s1)); |
1977 | 1977 | ||
1978 | if (!$x86only) { | 1978 | if (!$x86only) { |
1979 | &bt (&DWP(0,$s0),25); # check for SSE bit | 1979 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
1980 | &jnc (&label("x86")); | 1980 | &jnc (&label("x86")); |
1981 | 1981 | ||
1982 | &movq ("mm0",&QWP(0,$acc)); | 1982 | &movq ("mm0",&QWP(0,$acc)); |
@@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
2054 | &test ($s2,15); | 2054 | &test ($s2,15); |
2055 | &jnz (&label("slow_way")); | 2055 | &jnz (&label("slow_way")); |
2056 | if (!$x86only) { | 2056 | if (!$x86only) { |
2057 | &bt (&DWP(0,$s0),28); # check for hyper-threading bit | 2057 | &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit |
2058 | &jc (&label("slow_way")); | 2058 | &jc (&label("slow_way")); |
2059 | } | 2059 | } |
2060 | # pre-allocate aligned stack frame... | 2060 | # pre-allocate aligned stack frame... |
@@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
2364 | &jb (&label("slow_enc_tail")); | 2364 | &jb (&label("slow_enc_tail")); |
2365 | 2365 | ||
2366 | if (!$x86only) { | 2366 | if (!$x86only) { |
2367 | &bt ($_tmp,25); # check for SSE bit | 2367 | &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
2368 | &jnc (&label("slow_enc_x86")); | 2368 | &jnc (&label("slow_enc_x86")); |
2369 | 2369 | ||
2370 | &movq ("mm0",&QWP(0,$key)); # load iv | 2370 | &movq ("mm0",&QWP(0,$key)); # load iv |
@@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
2479 | #--------------------------- SLOW DECRYPT ---------------------------# | 2479 | #--------------------------- SLOW DECRYPT ---------------------------# |
2480 | &set_label("slow_decrypt",16); | 2480 | &set_label("slow_decrypt",16); |
2481 | if (!$x86only) { | 2481 | if (!$x86only) { |
2482 | &bt ($_tmp,25); # check for SSE bit | 2482 | &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit |
2483 | &jnc (&label("slow_dec_loop_x86")); | 2483 | &jnc (&label("slow_dec_loop_x86")); |
2484 | 2484 | ||
2485 | &set_label("slow_dec_loop_sse",4); | 2485 | &set_label("slow_dec_loop_sse",4); |
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl index f75e90ba87..c37fd55648 100755 --- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl | |||
@@ -1655,6 +1655,7 @@ $code.=<<___; | |||
1655 | .type AES_cbc_encrypt,\@function,6 | 1655 | .type AES_cbc_encrypt,\@function,6 |
1656 | .align 16 | 1656 | .align 16 |
1657 | .extern OPENSSL_ia32cap_P | 1657 | .extern OPENSSL_ia32cap_P |
1658 | .hidden OPENSSL_ia32cap_P | ||
1658 | .globl asm_AES_cbc_encrypt | 1659 | .globl asm_AES_cbc_encrypt |
1659 | .hidden asm_AES_cbc_encrypt | 1660 | .hidden asm_AES_cbc_encrypt |
1660 | asm_AES_cbc_encrypt: | 1661 | asm_AES_cbc_encrypt: |
@@ -1684,7 +1685,7 @@ AES_cbc_encrypt: | |||
1684 | jb .Lcbc_slow_prologue | 1685 | jb .Lcbc_slow_prologue |
1685 | test \$15,%rdx | 1686 | test \$15,%rdx |
1686 | jnz .Lcbc_slow_prologue | 1687 | jnz .Lcbc_slow_prologue |
1687 | bt \$28,%r10d | 1688 | bt \$IA32CAP_BIT0_HT,%r10d |
1688 | jc .Lcbc_slow_prologue | 1689 | jc .Lcbc_slow_prologue |
1689 | 1690 | ||
1690 | # allocate aligned stack frame... | 1691 | # allocate aligned stack frame... |
@@ -1944,7 +1945,7 @@ AES_cbc_encrypt: | |||
1944 | lea ($key,%rax),%rax | 1945 | lea ($key,%rax),%rax |
1945 | mov %rax,$keyend | 1946 | mov %rax,$keyend |
1946 | 1947 | ||
1947 | # pick Te4 copy which can't "overlap" with stack frame or key scdedule | 1948 | # pick Te4 copy which can't "overlap" with stack frame or key schedule |
1948 | lea 2048($sbox),$sbox | 1949 | lea 2048($sbox),$sbox |
1949 | lea 768-8(%rsp),%rax | 1950 | lea 768-8(%rsp),%rax |
1950 | sub $sbox,%rax | 1951 | sub $sbox,%rax |
@@ -2814,6 +2815,7 @@ ___ | |||
2814 | 2815 | ||
2815 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; | 2816 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; |
2816 | 2817 | ||
2818 | print "#include \"x86_arch.h\"\n"; | ||
2817 | print $code; | 2819 | print $code; |
2818 | 2820 | ||
2819 | close STDOUT; | 2821 | close STDOUT; |
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index 39b504cbe5..bc6c8f3fc0 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | |||
@@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; | |||
83 | $code.=<<___; | 83 | $code.=<<___; |
84 | .text | 84 | .text |
85 | .extern OPENSSL_ia32cap_P | 85 | .extern OPENSSL_ia32cap_P |
86 | .hidden OPENSSL_ia32cap_P | ||
86 | 87 | ||
87 | .globl aesni_cbc_sha1_enc | 88 | .globl aesni_cbc_sha1_enc |
88 | .type aesni_cbc_sha1_enc,\@abi-omnipotent | 89 | .type aesni_cbc_sha1_enc,\@abi-omnipotent |
@@ -93,10 +94,10 @@ aesni_cbc_sha1_enc: | |||
93 | mov OPENSSL_ia32cap_P+4(%rip),%r11d | 94 | mov OPENSSL_ia32cap_P+4(%rip),%r11d |
94 | ___ | 95 | ___ |
95 | $code.=<<___ if ($avx); | 96 | $code.=<<___ if ($avx); |
96 | and \$`1<<28`,%r11d # mask AVX bit | 97 | and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit |
97 | and \$`1<<30`,%r10d # mask "Intel CPU" bit | 98 | and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit |
98 | or %r11d,%r10d | 99 | or %r11d,%r10d |
99 | cmp \$`1<<28|1<<30`,%r10d | 100 | cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d |
100 | je aesni_cbc_sha1_enc_avx | 101 | je aesni_cbc_sha1_enc_avx |
101 | ___ | 102 | ___ |
102 | $code.=<<___; | 103 | $code.=<<___; |
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 332ef3e91d..c4e2baa6c5 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
@@ -25,7 +25,7 @@ sub bn_mul_add_words | |||
25 | { | 25 | { |
26 | local($name)=@_; | 26 | local($name)=@_; |
27 | 27 | ||
28 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 28 | &function_begin_B($name,""); |
29 | 29 | ||
30 | $r="eax"; | 30 | $r="eax"; |
31 | $a="edx"; | 31 | $a="edx"; |
@@ -33,7 +33,7 @@ sub bn_mul_add_words | |||
33 | 33 | ||
34 | if ($sse2) { | 34 | if ($sse2) { |
35 | &picmeup("eax","OPENSSL_ia32cap_P"); | 35 | &picmeup("eax","OPENSSL_ia32cap_P"); |
36 | &bt(&DWP(0,"eax"),26); | 36 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
37 | &jnc(&label("maw_non_sse2")); | 37 | &jnc(&label("maw_non_sse2")); |
38 | 38 | ||
39 | &mov($r,&wparam(0)); | 39 | &mov($r,&wparam(0)); |
@@ -211,7 +211,7 @@ sub bn_mul_words | |||
211 | { | 211 | { |
212 | local($name)=@_; | 212 | local($name)=@_; |
213 | 213 | ||
214 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 214 | &function_begin_B($name,""); |
215 | 215 | ||
216 | $r="eax"; | 216 | $r="eax"; |
217 | $a="edx"; | 217 | $a="edx"; |
@@ -219,7 +219,7 @@ sub bn_mul_words | |||
219 | 219 | ||
220 | if ($sse2) { | 220 | if ($sse2) { |
221 | &picmeup("eax","OPENSSL_ia32cap_P"); | 221 | &picmeup("eax","OPENSSL_ia32cap_P"); |
222 | &bt(&DWP(0,"eax"),26); | 222 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
223 | &jnc(&label("mw_non_sse2")); | 223 | &jnc(&label("mw_non_sse2")); |
224 | 224 | ||
225 | &mov($r,&wparam(0)); | 225 | &mov($r,&wparam(0)); |
@@ -322,7 +322,7 @@ sub bn_sqr_words | |||
322 | { | 322 | { |
323 | local($name)=@_; | 323 | local($name)=@_; |
324 | 324 | ||
325 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 325 | &function_begin_B($name,""); |
326 | 326 | ||
327 | $r="eax"; | 327 | $r="eax"; |
328 | $a="edx"; | 328 | $a="edx"; |
@@ -330,7 +330,7 @@ sub bn_sqr_words | |||
330 | 330 | ||
331 | if ($sse2) { | 331 | if ($sse2) { |
332 | &picmeup("eax","OPENSSL_ia32cap_P"); | 332 | &picmeup("eax","OPENSSL_ia32cap_P"); |
333 | &bt(&DWP(0,"eax"),26); | 333 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
334 | &jnc(&label("sqr_non_sse2")); | 334 | &jnc(&label("sqr_non_sse2")); |
335 | 335 | ||
336 | &mov($r,&wparam(0)); | 336 | &mov($r,&wparam(0)); |
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 808a1e5969..97d9136260 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl | |||
@@ -203,12 +203,12 @@ if (!$x86only) { | |||
203 | &picmeup("edx","OPENSSL_ia32cap_P"); | 203 | &picmeup("edx","OPENSSL_ia32cap_P"); |
204 | &mov ("eax",&DWP(0,"edx")); | 204 | &mov ("eax",&DWP(0,"edx")); |
205 | &mov ("edx",&DWP(4,"edx")); | 205 | &mov ("edx",&DWP(4,"edx")); |
206 | &test ("eax",1<<23); # check MMX bit | 206 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit |
207 | &jz (&label("ialu")); | 207 | &jz (&label("ialu")); |
208 | if ($sse2) { | 208 | if ($sse2) { |
209 | &test ("eax",1<<24); # check FXSR bit | 209 | &test ("eax","\$IA32CAP_MASK0_FXSR"); # check FXSR bit |
210 | &jz (&label("mmx")); | 210 | &jz (&label("mmx")); |
211 | &test ("edx",1<<1); # check PCLMULQDQ bit | 211 | &test ("edx","\$IA32CAP_MASK1_PCLMUL"); # check PCLMULQDQ bit |
212 | &jz (&label("mmx")); | 212 | &jz (&label("mmx")); |
213 | 213 | ||
214 | &movups ("xmm0",&QWP(8,"esp")); | 214 | &movups ("xmm0",&QWP(8,"esp")); |
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index e8f6b05084..a0bdd5787e 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl | |||
@@ -114,7 +114,7 @@ $temp="mm6"; | |||
114 | $mask="mm7"; | 114 | $mask="mm7"; |
115 | 115 | ||
116 | &picmeup("eax","OPENSSL_ia32cap_P"); | 116 | &picmeup("eax","OPENSSL_ia32cap_P"); |
117 | &bt (&DWP(0,"eax"),26); | 117 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
118 | &jnc (&label("non_sse2")); | 118 | &jnc (&label("non_sse2")); |
119 | 119 | ||
120 | &mov ("eax",-1); | 120 | &mov ("eax",-1); |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl index 8e45c7479b..3ecb425dad 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl | |||
@@ -163,12 +163,13 @@ ___ | |||
163 | 163 | ||
164 | $code.=<<___; | 164 | $code.=<<___; |
165 | .extern OPENSSL_ia32cap_P | 165 | .extern OPENSSL_ia32cap_P |
166 | .hidden OPENSSL_ia32cap_P | ||
166 | .globl bn_GF2m_mul_2x2 | 167 | .globl bn_GF2m_mul_2x2 |
167 | .type bn_GF2m_mul_2x2,\@abi-omnipotent | 168 | .type bn_GF2m_mul_2x2,\@abi-omnipotent |
168 | .align 16 | 169 | .align 16 |
169 | bn_GF2m_mul_2x2: | 170 | bn_GF2m_mul_2x2: |
170 | mov OPENSSL_ia32cap_P(%rip),%rax | 171 | mov OPENSSL_ia32cap_P+4(%rip),%eax |
171 | bt \$33,%rax | 172 | bt \$IA32CAP_BIT1_PCLMUL,%eax |
172 | jnc .Lvanilla_mul_2x2 | 173 | jnc .Lvanilla_mul_2x2 |
173 | 174 | ||
174 | movd $a1,%xmm0 | 175 | movd $a1,%xmm0 |
diff --git a/src/lib/libcrypto/cryptlib.c b/src/lib/libcrypto/cryptlib.c index fa091fbaea..8dec9caa93 100644 --- a/src/lib/libcrypto/cryptlib.c +++ b/src/lib/libcrypto/cryptlib.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: cryptlib.c,v 1.38 2016/11/04 13:56:04 miod Exp $ */ | 1 | /* $OpenBSD: cryptlib.c,v 1.39 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -627,47 +627,30 @@ CRYPTO_get_lock_name(int type) | |||
627 | defined(__INTEL__) || \ | 627 | defined(__INTEL__) || \ |
628 | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) | 628 | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) |
629 | 629 | ||
630 | unsigned int OPENSSL_ia32cap_P[2]; | 630 | uint64_t OPENSSL_ia32cap_P; |
631 | 631 | ||
632 | uint64_t | 632 | uint64_t |
633 | OPENSSL_cpu_caps(void) | 633 | OPENSSL_cpu_caps(void) |
634 | { | 634 | { |
635 | return *(uint64_t *)OPENSSL_ia32cap_P; | 635 | return OPENSSL_ia32cap_P; |
636 | } | 636 | } |
637 | 637 | ||
638 | #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) | 638 | #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) |
639 | #define OPENSSL_CPUID_SETUP | 639 | #define OPENSSL_CPUID_SETUP |
640 | typedef unsigned long long IA32CAP; | ||
641 | void | 640 | void |
642 | OPENSSL_cpuid_setup(void) | 641 | OPENSSL_cpuid_setup(void) |
643 | { | 642 | { |
644 | static int trigger = 0; | 643 | static int trigger = 0; |
645 | IA32CAP OPENSSL_ia32_cpuid(void); | 644 | uint64_t OPENSSL_ia32_cpuid(void); |
646 | IA32CAP vec; | ||
647 | 645 | ||
648 | if (trigger) | 646 | if (trigger) |
649 | return; | 647 | return; |
650 | trigger = 1; | 648 | trigger = 1; |
651 | 649 | OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid(); | |
652 | vec = OPENSSL_ia32_cpuid(); | ||
653 | |||
654 | /* | ||
655 | * |(1<<10) sets a reserved bit to signal that variable | ||
656 | * was initialized already... This is to avoid interference | ||
657 | * with cpuid snippets in ELF .init segment. | ||
658 | */ | ||
659 | OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10); | ||
660 | OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); | ||
661 | } | 650 | } |
662 | #endif | 651 | #endif |
663 | 652 | ||
664 | #else | 653 | #else |
665 | unsigned long * | ||
666 | OPENSSL_ia32cap_loc(void) | ||
667 | { | ||
668 | return NULL; | ||
669 | } | ||
670 | |||
671 | uint64_t | 654 | uint64_t |
672 | OPENSSL_cpu_caps(void) | 655 | OPENSSL_cpu_caps(void) |
673 | { | 656 | { |
diff --git a/src/lib/libcrypto/cryptlib.h b/src/lib/libcrypto/cryptlib.h index ad679dfa8d..d44738bf3c 100644 --- a/src/lib/libcrypto/cryptlib.h +++ b/src/lib/libcrypto/cryptlib.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: cryptlib.h,v 1.24 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: cryptlib.h,v 1.25 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -73,7 +73,6 @@ extern "C" { | |||
73 | #define X509_CERT_FILE_EVP "SSL_CERT_FILE" | 73 | #define X509_CERT_FILE_EVP "SSL_CERT_FILE" |
74 | 74 | ||
75 | void OPENSSL_cpuid_setup(void); | 75 | void OPENSSL_cpuid_setup(void); |
76 | extern unsigned int OPENSSL_ia32cap_P[]; | ||
77 | 76 | ||
78 | #ifdef __cplusplus | 77 | #ifdef __cplusplus |
79 | } | 78 | } |
diff --git a/src/lib/libcrypto/engine/eng_aesni.c b/src/lib/libcrypto/engine/eng_aesni.c index 5f9a36236a..92794f6086 100644 --- a/src/lib/libcrypto/engine/eng_aesni.c +++ b/src/lib/libcrypto/engine/eng_aesni.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: eng_aesni.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */ | 1 | /* $OpenBSD: eng_aesni.c,v 1.9 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* | 2 | /* |
3 | * Support for Intel AES-NI intruction set | 3 | * Support for Intel AES-NI intruction set |
4 | * Author: Huang Ying <ying.huang@intel.com> | 4 | * Author: Huang Ying <ying.huang@intel.com> |
@@ -93,10 +93,11 @@ | |||
93 | defined(_M_AMD64) || defined(_M_X64) || \ | 93 | defined(_M_AMD64) || defined(_M_X64) || \ |
94 | defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) | 94 | defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) |
95 | #define COMPILE_HW_AESNI | 95 | #define COMPILE_HW_AESNI |
96 | #include "x86_arch.h" | ||
96 | #endif | 97 | #endif |
97 | static ENGINE *ENGINE_aesni (void); | 98 | static ENGINE *ENGINE_aesni(void); |
98 | 99 | ||
99 | void ENGINE_load_aesni (void) | 100 | void ENGINE_load_aesni(void) |
100 | { | 101 | { |
101 | /* On non-x86 CPUs it just returns. */ | 102 | /* On non-x86 CPUs it just returns. */ |
102 | #ifdef COMPILE_HW_AESNI | 103 | #ifdef COMPILE_HW_AESNI |
@@ -302,20 +303,13 @@ aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out, | |||
302 | } | 303 | } |
303 | /* ===== Engine "management" functions ===== */ | 304 | /* ===== Engine "management" functions ===== */ |
304 | 305 | ||
305 | typedef unsigned long long IA32CAP; | ||
306 | |||
307 | /* Prepare the ENGINE structure for registration */ | 306 | /* Prepare the ENGINE structure for registration */ |
308 | static int | 307 | static int |
309 | aesni_bind_helper(ENGINE *e) | 308 | aesni_bind_helper(ENGINE *e) |
310 | { | 309 | { |
311 | int engage; | 310 | int engage; |
312 | 311 | ||
313 | if (sizeof(OPENSSL_ia32cap_P) > 4) { | 312 | engage = (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) != 0; |
314 | engage = ((IA32CAP)OPENSSL_ia32cap_P >> 57) & 1; | ||
315 | } else { | ||
316 | IA32CAP OPENSSL_ia32_cpuid(void); | ||
317 | engage = (OPENSSL_ia32_cpuid() >> 57) & 1; | ||
318 | } | ||
319 | 313 | ||
320 | /* Register everything or return with an error */ | 314 | /* Register everything or return with an error */ |
321 | if (!ENGINE_set_id(e, aesni_id) || | 315 | if (!ENGINE_set_id(e, aesni_id) || |
diff --git a/src/lib/libcrypto/evp/e_aes.c b/src/lib/libcrypto/evp/e_aes.c index 25199dca36..b20543a90c 100644 --- a/src/lib/libcrypto/evp/e_aes.c +++ b/src/lib/libcrypto/evp/e_aes.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: e_aes.c,v 1.30 2016/11/04 13:56:05 miod Exp $ */ | 1 | /* $OpenBSD: e_aes.c,v 1.31 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -150,10 +150,10 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len, | |||
150 | defined(_M_AMD64) || defined(_M_X64) || \ | 150 | defined(_M_AMD64) || defined(_M_X64) || \ |
151 | defined(__INTEL__) ) | 151 | defined(__INTEL__) ) |
152 | 152 | ||
153 | extern unsigned int OPENSSL_ia32cap_P[]; | 153 | #include "x86_arch.h" |
154 | 154 | ||
155 | #ifdef VPAES_ASM | 155 | #ifdef VPAES_ASM |
156 | #define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) | 156 | #define VPAES_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_SSSE3) |
157 | #endif | 157 | #endif |
158 | #ifdef BSAES_ASM | 158 | #ifdef BSAES_ASM |
159 | #define BSAES_CAPABLE VPAES_CAPABLE | 159 | #define BSAES_CAPABLE VPAES_CAPABLE |
@@ -161,7 +161,7 @@ extern unsigned int OPENSSL_ia32cap_P[]; | |||
161 | /* | 161 | /* |
162 | * AES-NI section | 162 | * AES-NI section |
163 | */ | 163 | */ |
164 | #define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) | 164 | #define AESNI_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) |
165 | 165 | ||
166 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, | 166 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, |
167 | AES_KEY *key); | 167 | AES_KEY *key); |
diff --git a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c index 8574823aed..3f82cf5967 100644 --- a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c +++ b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.12 2016/05/04 15:01:33 tedu Exp $ */ | 1 | /* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -87,13 +87,12 @@ typedef struct { | |||
87 | defined(_M_AMD64) || defined(_M_X64) || \ | 87 | defined(_M_AMD64) || defined(_M_X64) || \ |
88 | defined(__INTEL__) ) | 88 | defined(__INTEL__) ) |
89 | 89 | ||
90 | #include "x86_arch.h" | ||
91 | |||
90 | #if defined(__GNUC__) && __GNUC__>=2 | 92 | #if defined(__GNUC__) && __GNUC__>=2 |
91 | # define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) | 93 | # define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) |
92 | #endif | 94 | #endif |
93 | 95 | ||
94 | extern unsigned int OPENSSL_ia32cap_P[2]; | ||
95 | #define AESNI_CAPABLE (1<<(57-32)) | ||
96 | |||
97 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); | 96 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); |
98 | int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); | 97 | int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); |
99 | 98 | ||
@@ -578,14 +577,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = { | |||
578 | const EVP_CIPHER * | 577 | const EVP_CIPHER * |
579 | EVP_aes_128_cbc_hmac_sha1(void) | 578 | EVP_aes_128_cbc_hmac_sha1(void) |
580 | { | 579 | { |
581 | return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? | 580 | return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? |
582 | &aesni_128_cbc_hmac_sha1_cipher : NULL; | 581 | &aesni_128_cbc_hmac_sha1_cipher : NULL; |
583 | } | 582 | } |
584 | 583 | ||
585 | const EVP_CIPHER * | 584 | const EVP_CIPHER * |
586 | EVP_aes_256_cbc_hmac_sha1(void) | 585 | EVP_aes_256_cbc_hmac_sha1(void) |
587 | { | 586 | { |
588 | return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? | 587 | return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? |
589 | &aesni_256_cbc_hmac_sha1_cipher : NULL; | 588 | &aesni_256_cbc_hmac_sha1_cipher : NULL; |
590 | } | 589 | } |
591 | #else | 590 | #else |
diff --git a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c index 1f085af403..39527cafe6 100644 --- a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c +++ b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: e_rc4_hmac_md5.c,v 1.5 2014/08/11 13:29:43 bcook Exp $ */ | 1 | /* $OpenBSD: e_rc4_hmac_md5.c,v 1.6 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 2011 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2011 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -105,6 +105,7 @@ rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *inkey, | |||
105 | defined(__INTEL__) ) && \ | 105 | defined(__INTEL__) ) && \ |
106 | !(defined(__APPLE__) && defined(__MACH__)) | 106 | !(defined(__APPLE__) && defined(__MACH__)) |
107 | #define STITCHED_CALL | 107 | #define STITCHED_CALL |
108 | #include "x86_arch.h" | ||
108 | #endif | 109 | #endif |
109 | 110 | ||
110 | #if !defined(STITCHED_CALL) | 111 | #if !defined(STITCHED_CALL) |
@@ -122,7 +123,6 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
122 | md5_off = MD5_CBLOCK - key->md.num, | 123 | md5_off = MD5_CBLOCK - key->md.num, |
123 | blocks; | 124 | blocks; |
124 | unsigned int l; | 125 | unsigned int l; |
125 | extern unsigned int OPENSSL_ia32cap_P[]; | ||
126 | #endif | 126 | #endif |
127 | size_t plen = key->payload_length; | 127 | size_t plen = key->payload_length; |
128 | 128 | ||
@@ -139,7 +139,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
139 | 139 | ||
140 | if (plen > md5_off && | 140 | if (plen > md5_off && |
141 | (blocks = (plen - md5_off) / MD5_CBLOCK) && | 141 | (blocks = (plen - md5_off) / MD5_CBLOCK) && |
142 | (OPENSSL_ia32cap_P[0]&(1 << 20)) == 0) { | 142 | (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { |
143 | MD5_Update(&key->md, in, md5_off); | 143 | MD5_Update(&key->md, in, md5_off); |
144 | RC4(&key->ks, rc4_off, in, out); | 144 | RC4(&key->ks, rc4_off, in, out); |
145 | 145 | ||
@@ -187,7 +187,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |||
187 | rc4_off += MD5_CBLOCK; | 187 | rc4_off += MD5_CBLOCK; |
188 | 188 | ||
189 | if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && | 189 | if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && |
190 | (OPENSSL_ia32cap_P[0] & (1 << 20)) == 0) { | 190 | (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { |
191 | RC4(&key->ks, rc4_off, in, out); | 191 | RC4(&key->ks, rc4_off, in, out); |
192 | MD5_Update(&key->md, out, md5_off); | 192 | MD5_Update(&key->md, out, md5_off); |
193 | 193 | ||
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c index 6f8a8dd7f4..95ee755f83 100644 --- a/src/lib/libcrypto/modes/gcm128.c +++ b/src/lib/libcrypto/modes/gcm128.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: gcm128.c,v 1.14 2016/11/04 13:56:05 miod Exp $ */ | 1 | /* $OpenBSD: gcm128.c,v 1.15 2016/11/04 17:30:30 miod Exp $ */ |
2 | /* ==================================================================== | 2 | /* ==================================================================== |
3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. |
4 | * | 4 | * |
@@ -637,13 +637,19 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) | |||
637 | 637 | ||
638 | #endif | 638 | #endif |
639 | 639 | ||
640 | #if defined(GHASH_ASM) && \ | ||
641 | (defined(__i386) || defined(__i386__) || \ | ||
642 | defined(__x86_64) || defined(__x86_64__) || \ | ||
643 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | ||
644 | #include "x86_arch.h" | ||
645 | #endif | ||
646 | |||
640 | #if TABLE_BITS==4 && defined(GHASH_ASM) | 647 | #if TABLE_BITS==4 && defined(GHASH_ASM) |
641 | # if (defined(__i386) || defined(__i386__) || \ | 648 | # if (defined(__i386) || defined(__i386__) || \ |
642 | defined(__x86_64) || defined(__x86_64__) || \ | 649 | defined(__x86_64) || defined(__x86_64__) || \ |
643 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) | 650 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) |
644 | # define GHASH_ASM_X86_OR_64 | 651 | # define GHASH_ASM_X86_OR_64 |
645 | # define GCM_FUNCREF_4BIT | 652 | # define GCM_FUNCREF_4BIT |
646 | extern unsigned int OPENSSL_ia32cap_P[2]; | ||
647 | 653 | ||
648 | void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); | 654 | void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); |
649 | void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); | 655 | void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); |
@@ -705,8 +711,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
705 | #elif TABLE_BITS==4 | 711 | #elif TABLE_BITS==4 |
706 | # if defined(GHASH_ASM_X86_OR_64) | 712 | # if defined(GHASH_ASM_X86_OR_64) |
707 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) | 713 | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) |
708 | if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ | 714 | /* check FXSR and PCLMULQDQ bits */ |
709 | OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ | 715 | if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == |
716 | (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { | ||
710 | gcm_init_clmul(ctx->Htable,ctx->H.u); | 717 | gcm_init_clmul(ctx->Htable,ctx->H.u); |
711 | ctx->gmult = gcm_gmult_clmul; | 718 | ctx->gmult = gcm_gmult_clmul; |
712 | ctx->ghash = gcm_ghash_clmul; | 719 | ctx->ghash = gcm_ghash_clmul; |
@@ -716,9 +723,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |||
716 | gcm_init_4bit(ctx->Htable,ctx->H.u); | 723 | gcm_init_4bit(ctx->Htable,ctx->H.u); |
717 | # if defined(GHASH_ASM_X86) /* x86 only */ | 724 | # if defined(GHASH_ASM_X86) /* x86 only */ |
718 | # if defined(OPENSSL_IA32_SSE2) | 725 | # if defined(OPENSSL_IA32_SSE2) |
719 | if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ | 726 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ |
720 | # else | 727 | # else |
721 | if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ | 728 | if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */ |
722 | # endif | 729 | # endif |
723 | ctx->gmult = gcm_gmult_4bit_mmx; | 730 | ctx->gmult = gcm_gmult_4bit_mmx; |
724 | ctx->ghash = gcm_ghash_4bit_mmx; | 731 | ctx->ghash = gcm_ghash_4bit_mmx; |
diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl index 4bd53da33d..a8393d2730 100755 --- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl +++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl | |||
@@ -393,7 +393,7 @@ my %globals; | |||
393 | } | 393 | } |
394 | } | 394 | } |
395 | } | 395 | } |
396 | { package expr; # pick up expressioins | 396 | { package expr; # pick up expressions |
397 | sub re { | 397 | sub re { |
398 | my $self = shift; # single instance is enough... | 398 | my $self = shift; # single instance is enough... |
399 | local *line = shift; | 399 | local *line = shift; |
@@ -777,6 +777,8 @@ ___ | |||
777 | OPTION DOTNAME | 777 | OPTION DOTNAME |
778 | ___ | 778 | ___ |
779 | } | 779 | } |
780 | print "#include \"x86_arch.h\"\n"; | ||
781 | |||
780 | while($line=<>) { | 782 | while($line=<>) { |
781 | 783 | ||
782 | chomp($line); | 784 | chomp($line); |
diff --git a/src/lib/libcrypto/perlasm/x86asm.pl b/src/lib/libcrypto/perlasm/x86asm.pl index 5916ea4f89..e039382e00 100644 --- a/src/lib/libcrypto/perlasm/x86asm.pl +++ b/src/lib/libcrypto/perlasm/x86asm.pl | |||
@@ -248,6 +248,7 @@ EOF | |||
248 | $pic=0; | 248 | $pic=0; |
249 | for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } | 249 | for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } |
250 | 250 | ||
251 | ::emitraw("#include \"x86_arch.h\"\n"); | ||
251 | ::emitraw("#include <machine/asm.h>\n") if $openbsd; | 252 | ::emitraw("#include <machine/asm.h>\n") if $openbsd; |
252 | $filename =~ s/\.pl$//; | 253 | $filename =~ s/\.pl$//; |
253 | &file($filename); | 254 | &file($filename); |
diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl index d4baea514b..84d24edbbd 100644 --- a/src/lib/libcrypto/perlasm/x86gas.pl +++ b/src/lib/libcrypto/perlasm/x86gas.pl | |||
@@ -157,10 +157,8 @@ sub ::file_end | |||
157 | } | 157 | } |
158 | } | 158 | } |
159 | if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { | 159 | if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { |
160 | my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; | 160 | push (@out, ".extern\t${nmdecor}OPENSSL_ia32cap_P\n"); |
161 | if ($::macosx) { push (@out,"$tmp,2\n"); } | 161 | push (@out, ".hidden\t${nmdecor}OPENSSL_ia32cap_P\n"); |
162 | elsif ($::elf) { push (@out,"$tmp,4\n"); } | ||
163 | else { push (@out,"$tmp\n"); } | ||
164 | } | 162 | } |
165 | push(@out,$initseg) if ($initseg); | 163 | push(@out,$initseg) if ($initseg); |
166 | } | 164 | } |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index 84f1a798cb..03f0cff467 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl | |||
@@ -189,7 +189,8 @@ if ($alt=0) { | |||
189 | &jz (&label("go4loop4")); | 189 | &jz (&label("go4loop4")); |
190 | 190 | ||
191 | &picmeup($out,"OPENSSL_ia32cap_P"); | 191 | &picmeup($out,"OPENSSL_ia32cap_P"); |
192 | &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] | 192 | # check SSE2 bit [could have been MMX] |
193 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); | ||
193 | &jnc (&label("go4loop4")); | 194 | &jnc (&label("go4loop4")); |
194 | 195 | ||
195 | &mov ($out,&wparam(3)) if (!$alt); | 196 | &mov ($out,&wparam(3)) if (!$alt); |
@@ -312,7 +313,7 @@ $idx="edx"; | |||
312 | &xor ("eax","eax"); | 313 | &xor ("eax","eax"); |
313 | &mov (&DWP(-4,$out),$idi); # borrow key->y | 314 | &mov (&DWP(-4,$out),$idi); # borrow key->y |
314 | 315 | ||
315 | &bt (&DWP(0,$idx),20); # check for bit#20 | 316 | &bt (&DWP(0,$idx),"\$IA32CAP_BIT0_INTELP4"); |
316 | &jc (&label("c1stloop")); | 317 | &jc (&label("c1stloop")); |
317 | 318 | ||
318 | &set_label("w1stloop",16); | 319 | &set_label("w1stloop",16); |
@@ -388,9 +389,9 @@ $idx="edx"; | |||
388 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); | 389 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); |
389 | &picmeup("edx","OPENSSL_ia32cap_P"); | 390 | &picmeup("edx","OPENSSL_ia32cap_P"); |
390 | &mov ("edx",&DWP(0,"edx")); | 391 | &mov ("edx",&DWP(0,"edx")); |
391 | &bt ("edx",20); | 392 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); |
392 | &jc (&label("1xchar")); | 393 | &jc (&label("1xchar")); |
393 | &bt ("edx",26); | 394 | &bt ("edx","\$IA32CAP_BIT0_SSE2"); |
394 | &jnc (&label("ret")); | 395 | &jnc (&label("ret")); |
395 | &add ("eax",25); | 396 | &add ("eax",25); |
396 | &ret (); | 397 | &ret (); |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl index 197749dda7..2135b38ef8 100755 --- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | |||
@@ -122,6 +122,7 @@ $out="%rcx"; # arg4 | |||
122 | $code=<<___; | 122 | $code=<<___; |
123 | .text | 123 | .text |
124 | .extern OPENSSL_ia32cap_P | 124 | .extern OPENSSL_ia32cap_P |
125 | .hidden OPENSSL_ia32cap_P | ||
125 | 126 | ||
126 | .globl RC4 | 127 | .globl RC4 |
127 | .type RC4,\@function,4 | 128 | .type RC4,\@function,4 |
@@ -164,7 +165,7 @@ $code.=<<___; | |||
164 | movl ($dat,$XX[0],4),$TX[0]#d | 165 | movl ($dat,$XX[0],4),$TX[0]#d |
165 | test \$-16,$len | 166 | test \$-16,$len |
166 | jz .Lloop1 | 167 | jz .Lloop1 |
167 | bt \$30,%r8d # Intel CPU? | 168 | bt \$IA32CAP_BIT0_INTEL,%r8d # Intel CPU? |
168 | jc .Lintel | 169 | jc .Lintel |
169 | and \$7,$TX[1] | 170 | and \$7,$TX[1] |
170 | lea 1($XX[0]),$XX[1] | 171 | lea 1($XX[0]),$XX[1] |
@@ -442,7 +443,7 @@ RC4_set_key: | |||
442 | xor %r11,%r11 | 443 | xor %r11,%r11 |
443 | 444 | ||
444 | mov OPENSSL_ia32cap_P(%rip),$idx#d | 445 | mov OPENSSL_ia32cap_P(%rip),$idx#d |
445 | bt \$20,$idx#d # RC4_CHAR? | 446 | bt \$IA32CAP_BIT0_INTELP4,$idx#d # RC4_CHAR? |
446 | jc .Lc1stloop | 447 | jc .Lc1stloop |
447 | jmp .Lw1stloop | 448 | jmp .Lw1stloop |
448 | 449 | ||
@@ -506,9 +507,9 @@ RC4_set_key: | |||
506 | RC4_options: | 507 | RC4_options: |
507 | lea .Lopts(%rip),%rax | 508 | lea .Lopts(%rip),%rax |
508 | mov OPENSSL_ia32cap_P(%rip),%edx | 509 | mov OPENSSL_ia32cap_P(%rip),%edx |
509 | bt \$20,%edx | 510 | bt \$IA32CAP_BIT0_INTELP4,%edx |
510 | jc .L8xchar | 511 | jc .L8xchar |
511 | bt \$30,%edx | 512 | bt \$IA32CAP_BIT0_INTEL,%edx |
512 | jnc .Ldone | 513 | jnc .Ldone |
513 | add \$25,%rax | 514 | add \$25,%rax |
514 | ret | 515 | ret |
diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl index 6fbea34d78..d29ed84706 100644 --- a/src/lib/libcrypto/sha/asm/sha1-586.pl +++ b/src/lib/libcrypto/sha/asm/sha1-586.pl | |||
@@ -303,15 +303,15 @@ if ($xmm) { | |||
303 | 303 | ||
304 | &mov ($A,&DWP(0,$T)); | 304 | &mov ($A,&DWP(0,$T)); |
305 | &mov ($D,&DWP(4,$T)); | 305 | &mov ($D,&DWP(4,$T)); |
306 | &test ($D,1<<9); # check SSSE3 bit | 306 | &test ($D,"\$IA32CAP_MASK1_SSSE3"); # check SSSE3 bit |
307 | &jz (&label("x86")); | 307 | &jz (&label("x86")); |
308 | &test ($A,1<<24); # check FXSR bit | 308 | &test ($A,"\$IA32CAP_MASK0_FXSR"); # check FXSR bit |
309 | &jz (&label("x86")); | 309 | &jz (&label("x86")); |
310 | if ($ymm) { | 310 | if ($ymm) { |
311 | &and ($D,1<<28); # mask AVX bit | 311 | &and ($D,"\$IA32CAP_MASK1_AVX"); # mask AVX bit |
312 | &and ($A,1<<30); # mask "Intel CPU" bit | 312 | &and ($A,"\$IA32CAP_MASK0_INTEL"); # mask "Intel CPU" bit |
313 | &or ($A,$D); | 313 | &or ($A,$D); |
314 | &cmp ($A,1<<28|1<<30); | 314 | &cmp ($A,"\$(IA32CAP_MASK1_AVX | IA32CAP_MASK0_INTEL)"); |
315 | &je (&label("avx_shortcut")); | 315 | &je (&label("avx_shortcut")); |
316 | } | 316 | } |
317 | &jmp (&label("ssse3_shortcut")); | 317 | &jmp (&label("ssse3_shortcut")); |
diff --git a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl index f15c7ec39b..147d21570b 100755 --- a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl +++ b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl | |||
@@ -216,6 +216,7 @@ unshift(@xi,pop(@xi)); | |||
216 | $code.=<<___; | 216 | $code.=<<___; |
217 | .text | 217 | .text |
218 | .extern OPENSSL_ia32cap_P | 218 | .extern OPENSSL_ia32cap_P |
219 | .hidden OPENSSL_ia32cap_P | ||
219 | 220 | ||
220 | .globl sha1_block_data_order | 221 | .globl sha1_block_data_order |
221 | .type sha1_block_data_order,\@function,3 | 222 | .type sha1_block_data_order,\@function,3 |
@@ -223,14 +224,14 @@ $code.=<<___; | |||
223 | sha1_block_data_order: | 224 | sha1_block_data_order: |
224 | mov OPENSSL_ia32cap_P+0(%rip),%r9d | 225 | mov OPENSSL_ia32cap_P+0(%rip),%r9d |
225 | mov OPENSSL_ia32cap_P+4(%rip),%r8d | 226 | mov OPENSSL_ia32cap_P+4(%rip),%r8d |
226 | test \$`1<<9`,%r8d # check SSSE3 bit | 227 | test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit |
227 | jz .Lialu | 228 | jz .Lialu |
228 | ___ | 229 | ___ |
229 | $code.=<<___ if ($avx); | 230 | $code.=<<___ if ($avx); |
230 | and \$`1<<28`,%r8d # mask AVX bit | 231 | and \$IA32CAP_MASK1_AVX,%r8d # mask AVX bit |
231 | and \$`1<<30`,%r9d # mask "Intel CPU" bit | 232 | and \$IA32CAP_MASK0_INTEL,%r9d # mask "Intel CPU" bit |
232 | or %r9d,%r8d | 233 | or %r9d,%r8d |
233 | cmp \$`1<<28|1<<30`,%r8d | 234 | cmp \$(IA32CAP_MASK0_INTEL | IA32CAP_MASK1_AVX),%r8d |
234 | je _avx_shortcut | 235 | je _avx_shortcut |
235 | ___ | 236 | ___ |
236 | $code.=<<___; | 237 | $code.=<<___; |
diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl index 7eab6a5b88..163361ebe9 100644 --- a/src/lib/libcrypto/sha/asm/sha512-586.pl +++ b/src/lib/libcrypto/sha/asm/sha512-586.pl | |||
@@ -284,7 +284,7 @@ sub BODY_00_15_x86 { | |||
284 | 284 | ||
285 | if ($sse2) { | 285 | if ($sse2) { |
286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); | 286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); |
287 | &bt (&DWP(0,"edx"),26); | 287 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); |
288 | &jnc (&label("loop_x86")); | 288 | &jnc (&label("loop_x86")); |
289 | 289 | ||
290 | # load ctx->h[0-7] | 290 | # load ctx->h[0-7] |
diff --git a/src/lib/libcrypto/whrlpool/wp_block.c b/src/lib/libcrypto/whrlpool/wp_block.c index d8c1b89ba3..1e00a01330 100644 --- a/src/lib/libcrypto/whrlpool/wp_block.c +++ b/src/lib/libcrypto/whrlpool/wp_block.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: wp_block.c,v 1.12 2016/09/04 14:06:46 jsing Exp $ */ | 1 | /* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ |
2 | /** | 2 | /** |
3 | * The Whirlpool hashing function. | 3 | * The Whirlpool hashing function. |
4 | * | 4 | * |
@@ -36,10 +36,12 @@ | |||
36 | * | 36 | * |
37 | */ | 37 | */ |
38 | 38 | ||
39 | #include "wp_locl.h" | ||
40 | #include <string.h> | 39 | #include <string.h> |
40 | #include <openssl/crypto.h> | ||
41 | #include <machine/endian.h> | 41 | #include <machine/endian.h> |
42 | 42 | ||
43 | #include "wp_locl.h" | ||
44 | |||
43 | typedef unsigned char u8; | 45 | typedef unsigned char u8; |
44 | #if defined(_LP64) | 46 | #if defined(_LP64) |
45 | typedef unsigned long u64; | 47 | typedef unsigned long u64; |
@@ -57,12 +59,15 @@ typedef unsigned long long u64; | |||
57 | # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX | 59 | # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX |
58 | CPUs this is actually faster! */ | 60 | CPUs this is actually faster! */ |
59 | # endif | 61 | # endif |
60 | # define GO_FOR_MMX(ctx,inp,num) do { \ | 62 | #include "x86_arch.h" |
61 | extern unsigned int OPENSSL_ia32cap_P[]; \ | 63 | # define GO_FOR_MMX(ctx,inp,num) \ |
64 | do { \ | ||
62 | void whirlpool_block_mmx(void *,const void *,size_t); \ | 65 | void whirlpool_block_mmx(void *,const void *,size_t); \ |
63 | if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \ | 66 | if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \ |
64 | whirlpool_block_mmx(ctx->H.c,inp,num); return; \ | 67 | break; \ |
65 | } while (0) | 68 | whirlpool_block_mmx(ctx->H.c,inp,num); \ |
69 | return; \ | ||
70 | } while (0) | ||
66 | # endif | 71 | # endif |
67 | #elif defined(__arm__) | 72 | #elif defined(__arm__) |
68 | # define SMALL_REGISTER_BANK | 73 | # define SMALL_REGISTER_BANK |
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index b36d3f7dc5..6558dedb6b 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
@@ -20,8 +20,8 @@ print<<___; | |||
20 | .section .init | 20 | .section .init |
21 | call OPENSSL_cpuid_setup | 21 | call OPENSSL_cpuid_setup |
22 | 22 | ||
23 | .extern OPENSSL_ia32cap_P | ||
23 | .hidden OPENSSL_ia32cap_P | 24 | .hidden OPENSSL_ia32cap_P |
24 | .comm OPENSSL_ia32cap_P,8,4 | ||
25 | 25 | ||
26 | .text | 26 | .text |
27 | 27 | ||
@@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid: | |||
80 | mov %eax,%r10d | 80 | mov %eax,%r10d |
81 | mov \$0x80000001,%eax | 81 | mov \$0x80000001,%eax |
82 | cpuid | 82 | cpuid |
83 | or %ecx,%r9d | 83 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit |
84 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | 84 | or \$1,%r9d # make sure %r9d is not zero |
85 | 85 | ||
86 | cmp \$0x80000008,%r10d | 86 | cmp \$0x80000008,%r10d |
87 | jb .Lintel | 87 | jb .Lintel |
@@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid: | |||
93 | 93 | ||
94 | mov \$1,%eax | 94 | mov \$1,%eax |
95 | cpuid | 95 | cpuid |
96 | bt \$28,%edx # test hyper-threading bit | 96 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
97 | jnc .Lgeneric | 97 | jnc .Lgeneric |
98 | shr \$16,%ebx # number of logical processors | 98 | shr \$16,%ebx # number of logical processors |
99 | cmp %r10b,%bl | 99 | cmp %r10b,%bl |
100 | ja .Lgeneric | 100 | ja .Lgeneric |
101 | and \$0xefffffff,%edx # ~(1<<28) | 101 | xor \$IA32CAP_MASK0_HT,%edx |
102 | jmp .Lgeneric | 102 | jmp .Lgeneric |
103 | 103 | ||
104 | .Lintel: | 104 | .Lintel: |
@@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid: | |||
116 | .Lnocacheinfo: | 116 | .Lnocacheinfo: |
117 | mov \$1,%eax | 117 | mov \$1,%eax |
118 | cpuid | 118 | cpuid |
119 | and \$0xbfefffff,%edx # force reserved bits to 0 | 119 | # force reserved bits to 0 |
120 | and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx | ||
120 | cmp \$0,%r9d | 121 | cmp \$0,%r9d |
121 | jne .Lnotintel | 122 | jne .Lnotintel |
122 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs | 123 | # set reserved bit#30 on Intel CPUs |
124 | or \$IA32CAP_MASK0_INTEL,%edx | ||
123 | and \$15,%ah | 125 | and \$15,%ah |
124 | cmp \$15,%ah # examine Family ID | 126 | cmp \$15,%ah # examine Family ID |
125 | jne .Lnotintel | 127 | jne .Lnotintel |
126 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR | 128 | # set reserved bit#20 to engage RC4_CHAR |
129 | or \$IA32CAP_MASK0_INTELP4,%edx | ||
127 | .Lnotintel: | 130 | .Lnotintel: |
128 | bt \$28,%edx # test hyper-threading bit | 131 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
129 | jnc .Lgeneric | 132 | jnc .Lgeneric |
130 | and \$0xefffffff,%edx # ~(1<<28) | 133 | xor \$IA32CAP_MASK0_HT,%edx |
131 | cmp \$0,%r10d | 134 | cmp \$0,%r10d |
132 | je .Lgeneric | 135 | je .Lgeneric |
133 | 136 | ||
134 | or \$0x10000000,%edx # 1<<28 | 137 | or \$IA32CAP_MASK0_HT,%edx |
135 | shr \$16,%ebx | 138 | shr \$16,%ebx |
136 | cmp \$1,%bl # see if cache is shared | 139 | cmp \$1,%bl # see if cache is shared |
137 | ja .Lgeneric | 140 | ja .Lgeneric |
138 | and \$0xefffffff,%edx # ~(1<<28) | 141 | xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not |
142 | |||
139 | .Lgeneric: | 143 | .Lgeneric: |
140 | and \$0x00000800,%r9d # isolate AMD XOP flag | 144 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag |
141 | and \$0xfffff7ff,%ecx | 145 | and \$(~IA32CAP_MASK1_AMD_XOP),%ecx |
142 | or %ecx,%r9d # merge AMD XOP flag | 146 | or %ecx,%r9d # merge AMD XOP flag |
143 | 147 | ||
144 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | 148 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
145 | bt \$27,%r9d # check OSXSAVE bit | 149 | bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit |
146 | jnc .Lclear_avx | 150 | jnc .Lclear_avx |
147 | xor %ecx,%ecx # XCR0 | 151 | xor %ecx,%ecx # XCR0 |
148 | .byte 0x0f,0x01,0xd0 # xgetbv | 152 | .byte 0x0f,0x01,0xd0 # xgetbv |
@@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid: | |||
150 | cmp \$6,%eax | 154 | cmp \$6,%eax |
151 | je .Ldone | 155 | je .Ldone |
152 | .Lclear_avx: | 156 | .Lclear_avx: |
153 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | 157 | mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax |
154 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | 158 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
155 | .Ldone: | 159 | .Ldone: |
156 | shl \$32,%r9 | 160 | shl \$32,%r9 |
diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h new file mode 100644 index 0000000000..5b2cf97546 --- /dev/null +++ b/src/lib/libcrypto/x86_arch.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */ | ||
2 | /* | ||
3 | * Copyright (c) 2016 Miodrag Vallat. | ||
4 | * | ||
5 | * Permission to use, copy, modify, and distribute this software for any | ||
6 | * purpose with or without fee is hereby granted, provided that the above | ||
7 | * copyright notice and this permission notice appear in all copies. | ||
8 | * | ||
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | */ | ||
17 | |||
18 | /* | ||
19 | * The knowledge of the layout of OPENSSL_ia32cap_P is internal to libcrypto | ||
20 | * (and, to some extent, to libssl), and may change in the future without | ||
21 | * notice. | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * OPENSSL_ia32cap_P is computed at runtime by OPENSSL_ia32_cpuid(). | ||
26 | * | ||
27 | * On processors which lack the cpuid instruction, the value is always | ||
28 | * zero (this only matters on 32-bit processors, of course). | ||
29 | * | ||
30 | * On processors which support the cpuid instruction, after running | ||
31 | * "cpuid 1", the value of %edx is written to the low word of OPENSSL_ia32cap_P, | ||
32 | * and the value of %ecx is written to its high word. | ||
33 | * | ||
34 | * Further processing is done to set or clear specific bits, depending | ||
35 | * upon the exact processor type. | ||
36 | * | ||
37 | * Assembly routines usually address OPENSSL_ia32cap_P as two 32-bit words, | ||
38 | * hence two sets of bit numbers and masks. OPENSSL_cpu_caps() returns the | ||
39 | * complete 64-bit word. | ||
40 | */ | ||
41 | |||
42 | /* bit numbers for the low word */ | ||
43 | #define IA32CAP_BIT0_FPU 0 | ||
44 | #define IA32CAP_BIT0_MMX 23 | ||
45 | #define IA32CAP_BIT0_FXSR 24 | ||
46 | #define IA32CAP_BIT0_SSE 25 | ||
47 | #define IA32CAP_BIT0_SSE2 26 | ||
48 | #define IA32CAP_BIT0_HT 28 | ||
49 | |||
50 | /* the following bits are not obtained from cpuid */ | ||
51 | #define IA32CAP_BIT0_INTELP4 20 | ||
52 | #define IA32CAP_BIT0_INTEL 30 | ||
53 | |||
54 | /* bit numbers for the high word */ | ||
55 | #define IA32CAP_BIT1_PCLMUL 1 | ||
56 | #define IA32CAP_BIT1_SSSE3 9 | ||
57 | #define IA32CAP_BIT1_FMA3 12 | ||
58 | #define IA32CAP_BIT1_AESNI 25 | ||
59 | #define IA32CAP_BIT1_OSXSAVE 27 | ||
60 | #define IA32CAP_BIT1_AVX 28 | ||
61 | |||
62 | #define IA32CAP_BIT1_AMD_XOP 11 | ||
63 | |||
64 | /* bit masks for the low word */ | ||
65 | #define IA32CAP_MASK0_MMX (1 << IA32CAP_BIT0_MMX) | ||
66 | #define IA32CAP_MASK0_FXSR (1 << IA32CAP_BIT0_FXSR) | ||
67 | #define IA32CAP_MASK0_SSE (1 << IA32CAP_BIT0_SSE) | ||
68 | #define IA32CAP_MASK0_SSE2 (1 << IA32CAP_BIT0_SSE2) | ||
69 | #define IA32CAP_MASK0_HT (1 << IA32CAP_BIT0_HT) | ||
70 | |||
71 | #define IA32CAP_MASK0_INTELP4 (1 << IA32CAP_BIT0_INTELP4) | ||
72 | #define IA32CAP_MASK0_INTEL (1 << IA32CAP_BIT0_INTEL) | ||
73 | |||
74 | /* bit masks for the high word */ | ||
75 | #define IA32CAP_MASK1_PCLMUL (1 << IA32CAP_BIT1_PCLMUL) | ||
76 | #define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) | ||
77 | #define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) | ||
78 | #define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) | ||
79 | #define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) | ||
80 | |||
81 | #define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) | ||
82 | |||
83 | /* bit masks for OPENSSL_cpu_caps() */ | ||
84 | #define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX | ||
85 | #define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR | ||
86 | #define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE | ||
87 | #define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 | ||
88 | #define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) | ||
89 | #define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) | ||
90 | #define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) | ||
diff --git a/src/lib/libcrypto/x86cpuid.pl b/src/lib/libcrypto/x86cpuid.pl index 7918629f64..8b9570fc72 100644 --- a/src/lib/libcrypto/x86cpuid.pl +++ b/src/lib/libcrypto/x86cpuid.pl | |||
@@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
56 | &mov ("esi","eax"); | 56 | &mov ("esi","eax"); |
57 | &mov ("eax",0x80000001); | 57 | &mov ("eax",0x80000001); |
58 | &cpuid (); | 58 | &cpuid (); |
59 | &or ("ebp","ecx"); | 59 | &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit |
60 | &and ("ebp",1<<11|1); # isolate XOP bit | 60 | &or ("ecx",1); # make sure ecx is not zero |
61 | &mov ("ebp","ecx"); | ||
62 | |||
61 | &cmp ("esi",0x80000008); | 63 | &cmp ("esi",0x80000008); |
62 | &jb (&label("intel")); | 64 | &jb (&label("intel")); |
63 | 65 | ||
@@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
69 | &mov ("eax",1); | 71 | &mov ("eax",1); |
70 | &xor ("ecx","ecx"); | 72 | &xor ("ecx","ecx"); |
71 | &cpuid (); | 73 | &cpuid (); |
72 | &bt ("edx",28); | 74 | &bt ("edx","\$IA32CAP_BIT0_HT"); |
73 | &jnc (&label("generic")); | 75 | &jnc (&label("generic")); |
74 | &shr ("ebx",16); | 76 | &shr ("ebx",16); |
75 | &and ("ebx",0xff); | 77 | &and ("ebx",0xff); |
76 | &cmp ("ebx","esi"); | 78 | &cmp ("ebx","esi"); |
77 | &ja (&label("generic")); | 79 | &ja (&label("generic")); |
78 | &and ("edx",0xefffffff); # clear hyper-threading bit | 80 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit |
79 | &jmp (&label("generic")); | 81 | &jmp (&label("generic")); |
80 | 82 | ||
81 | &set_label("intel"); | 83 | &set_label("intel"); |
@@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
94 | &mov ("eax",1); | 96 | &mov ("eax",1); |
95 | &xor ("ecx","ecx"); | 97 | &xor ("ecx","ecx"); |
96 | &cpuid (); | 98 | &cpuid (); |
97 | &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 | 99 | # force reserved bits to 0. |
100 | &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)"); | ||
98 | &cmp ("ebp",0); | 101 | &cmp ("ebp",0); |
99 | &jne (&label("notintel")); | 102 | &jne (&label("notintel")); |
100 | &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs | 103 | # set reserved bit#30 on Intel CPUs |
101 | &and (&HB("eax"),15); # familiy ID | 104 | &or ("edx","\$IA32CAP_MASK0_INTEL"); |
105 | &and (&HB("eax"),15); # family ID | ||
102 | &cmp (&HB("eax"),15); # P4? | 106 | &cmp (&HB("eax"),15); # P4? |
103 | &jne (&label("notintel")); | 107 | &jne (&label("notintel")); |
104 | &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR | 108 | # set reserved bit#20 to engage RC4_CHAR |
109 | &or ("edx","\$IA32CAP_MASK0_INTELP4"); | ||
105 | &set_label("notintel"); | 110 | &set_label("notintel"); |
106 | &bt ("edx",28); # test hyper-threading bit | 111 | &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit |
107 | &jnc (&label("generic")); | 112 | &jnc (&label("generic")); |
108 | &and ("edx",0xefffffff); | 113 | &xor ("edx","\$IA32CAP_MASK0_HT"); |
109 | &cmp ("edi",0); | 114 | &cmp ("edi",0); |
110 | &je (&label("generic")); | 115 | &je (&label("generic")); |
111 | 116 | ||
112 | &or ("edx",0x10000000); | 117 | &or ("edx","\$IA32CAP_MASK0_HT"); |
113 | &shr ("ebx",16); | 118 | &shr ("ebx",16); |
114 | &cmp (&LB("ebx"),1); | 119 | &cmp (&LB("ebx"),1); # see if cache is shared |
115 | &ja (&label("generic")); | 120 | &ja (&label("generic")); |
116 | &and ("edx",0xefffffff); # clear hyper-threading bit if not | 121 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not |
117 | 122 | ||
118 | &set_label("generic"); | 123 | &set_label("generic"); |
119 | &and ("ebp",1<<11); # isolate AMD XOP flag | 124 | &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag |
120 | &and ("ecx",0xfffff7ff); # force 11th bit to 0 | 125 | # force reserved bits to 0. |
126 | &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP"); | ||
121 | &mov ("esi","edx"); | 127 | &mov ("esi","edx"); |
122 | &or ("ebp","ecx"); # merge AMD XOP flag | 128 | &or ("ebp","ecx"); # merge AMD XOP flag |
123 | 129 | ||
124 | &bt ("ecx",27); # check OSXSAVE bit | 130 | &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit |
125 | &jnc (&label("clear_avx")); | 131 | &jnc (&label("clear_avx")); |
126 | &xor ("ecx","ecx"); | 132 | &xor ("ecx","ecx"); |
127 | &data_byte(0x0f,0x01,0xd0); # xgetbv | 133 | &data_byte(0x0f,0x01,0xd0); # xgetbv |
@@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
131 | &cmp ("eax",2); | 137 | &cmp ("eax",2); |
132 | &je (&label("clear_avx")); | 138 | &je (&label("clear_avx")); |
133 | &set_label("clear_xmm"); | 139 | &set_label("clear_xmm"); |
134 | &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits | 140 | # clear AESNI and PCLMULQDQ bits. |
135 | &and ("esi",0xfeffffff); # clear FXSR | 141 | &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)"); |
142 | # clear FXSR. | ||
143 | &and ("esi","\$~IA32CAP_MASK0_FXSR"); | ||
136 | &set_label("clear_avx"); | 144 | &set_label("clear_avx"); |
137 | &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits | 145 | # clear AVX, FMA3 and AMD XOP bits. |
146 | &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)"); | ||
138 | &set_label("done"); | 147 | &set_label("done"); |
139 | &mov ("eax","esi"); | 148 | &mov ("eax","esi"); |
140 | &mov ("edx","ebp"); | 149 | &mov ("edx","ebp"); |
@@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
143 | 152 | ||
144 | &external_label("OPENSSL_ia32cap_P"); | 153 | &external_label("OPENSSL_ia32cap_P"); |
145 | 154 | ||
146 | &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); | 155 | &function_begin_B("OPENSSL_wipe_cpu",""); |
147 | &xor ("eax","eax"); | 156 | &xor ("eax","eax"); |
148 | &xor ("edx","edx"); | 157 | &xor ("edx","edx"); |
149 | &picmeup("ecx","OPENSSL_ia32cap_P"); | 158 | &picmeup("ecx","OPENSSL_ia32cap_P"); |
150 | &mov ("ecx",&DWP(0,"ecx")); | 159 | &mov ("ecx",&DWP(0,"ecx")); |
151 | &bt (&DWP(0,"ecx"),0); | 160 | &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU"); |
152 | &jnc (&label("no_x87")); | 161 | &jnc (&label("no_x87")); |
153 | if ($sse2) { | 162 | if ($sse2) { |
154 | &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits | 163 | # Check SSE2 and FXSR bits. |
155 | &cmp ("ecx",1<<26|1<<24); | 164 | &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); |
165 | &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); | ||
156 | &jne (&label("no_sse2")); | 166 | &jne (&label("no_sse2")); |
157 | &pxor ("xmm0","xmm0"); | 167 | &pxor ("xmm0","xmm0"); |
158 | &pxor ("xmm1","xmm1"); | 168 | &pxor ("xmm1","xmm1"); |