summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authormiod <>2016-11-04 17:30:30 +0000
committermiod <>2016-11-04 17:30:30 +0000
commit723502d9588ba0e1cc08af1b12654917da74d440 (patch)
tree77b413175d422148cfb0ef7b2062340230aa5413 /src/lib
parent391f8ce21bb7929810460a73e2fde2c80540848d (diff)
downloadopenbsd-723502d9588ba0e1cc08af1b12654917da74d440.tar.gz
openbsd-723502d9588ba0e1cc08af1b12654917da74d440.tar.bz2
openbsd-723502d9588ba0e1cc08af1b12654917da74d440.zip
Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by
meaningful constants in a private header file, so that reviewers can actually get a chance to figure out what the code is attempting to do without knowing all cpuid bits. While there, turn it from an array of two 32-bit ints into a properly aligned 64-bit int. Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will now always use OPENSSL_cpu_caps() and check for the proper bits in the whole 64-bit word it returns. i386 tests and ok jsing@
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl10
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-x86_64.pl6
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl7
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl12
-rw-r--r--src/lib/libcrypto/bn/asm/x86-gf2m.pl6
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gf2m.pl5
-rw-r--r--src/lib/libcrypto/cryptlib.c27
-rw-r--r--src/lib/libcrypto/cryptlib.h3
-rw-r--r--src/lib/libcrypto/engine/eng_aesni.c16
-rw-r--r--src/lib/libcrypto/evp/e_aes.c8
-rw-r--r--src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c11
-rw-r--r--src/lib/libcrypto/evp/e_rc4_hmac_md5.c8
-rw-r--r--src/lib/libcrypto/modes/gcm128.c19
-rwxr-xr-xsrc/lib/libcrypto/perlasm/x86_64-xlate.pl4
-rw-r--r--src/lib/libcrypto/perlasm/x86asm.pl1
-rw-r--r--src/lib/libcrypto/perlasm/x86gas.pl6
-rw-r--r--src/lib/libcrypto/rc4/asm/rc4-586.pl9
-rwxr-xr-xsrc/lib/libcrypto/rc4/asm/rc4-x86_64.pl9
-rw-r--r--src/lib/libcrypto/sha/asm/sha1-586.pl10
-rwxr-xr-xsrc/lib/libcrypto/sha/asm/sha1-x86_64.pl9
-rw-r--r--src/lib/libcrypto/sha/asm/sha512-586.pl2
-rw-r--r--src/lib/libcrypto/whrlpool/wp_block.c19
-rw-r--r--src/lib/libcrypto/x86_64cpuid.pl36
-rw-r--r--src/lib/libcrypto/x86_arch.h90
-rw-r--r--src/lib/libcrypto/x86cpuid.pl56
26 files changed, 245 insertions, 146 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
index aab40e6f1c..3ba8a26eaa 100644
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -1187,7 +1187,7 @@ sub enclast()
1187 &lea ($tbl,&DWP(2048+128,$tbl,$s1)); 1187 &lea ($tbl,&DWP(2048+128,$tbl,$s1));
1188 1188
1189 if (!$x86only) { 1189 if (!$x86only) {
1190 &bt (&DWP(0,$s0),25); # check for SSE bit 1190 &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit
1191 &jnc (&label("x86")); 1191 &jnc (&label("x86"));
1192 1192
1193 &movq ("mm0",&QWP(0,$acc)); 1193 &movq ("mm0",&QWP(0,$acc));
@@ -1976,7 +1976,7 @@ sub declast()
1976 &lea ($tbl,&DWP(2048+128,$tbl,$s1)); 1976 &lea ($tbl,&DWP(2048+128,$tbl,$s1));
1977 1977
1978 if (!$x86only) { 1978 if (!$x86only) {
1979 &bt (&DWP(0,$s0),25); # check for SSE bit 1979 &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit
1980 &jnc (&label("x86")); 1980 &jnc (&label("x86"));
1981 1981
1982 &movq ("mm0",&QWP(0,$acc)); 1982 &movq ("mm0",&QWP(0,$acc));
@@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
2054 &test ($s2,15); 2054 &test ($s2,15);
2055 &jnz (&label("slow_way")); 2055 &jnz (&label("slow_way"));
2056 if (!$x86only) { 2056 if (!$x86only) {
2057 &bt (&DWP(0,$s0),28); # check for hyper-threading bit 2057 &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit
2058 &jc (&label("slow_way")); 2058 &jc (&label("slow_way"));
2059 } 2059 }
2060 # pre-allocate aligned stack frame... 2060 # pre-allocate aligned stack frame...
@@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
2364 &jb (&label("slow_enc_tail")); 2364 &jb (&label("slow_enc_tail"));
2365 2365
2366 if (!$x86only) { 2366 if (!$x86only) {
2367 &bt ($_tmp,25); # check for SSE bit 2367 &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit
2368 &jnc (&label("slow_enc_x86")); 2368 &jnc (&label("slow_enc_x86"));
2369 2369
2370 &movq ("mm0",&QWP(0,$key)); # load iv 2370 &movq ("mm0",&QWP(0,$key)); # load iv
@@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
2479#--------------------------- SLOW DECRYPT ---------------------------# 2479#--------------------------- SLOW DECRYPT ---------------------------#
2480&set_label("slow_decrypt",16); 2480&set_label("slow_decrypt",16);
2481 if (!$x86only) { 2481 if (!$x86only) {
2482 &bt ($_tmp,25); # check for SSE bit 2482 &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit
2483 &jnc (&label("slow_dec_loop_x86")); 2483 &jnc (&label("slow_dec_loop_x86"));
2484 2484
2485 &set_label("slow_dec_loop_sse",4); 2485 &set_label("slow_dec_loop_sse",4);
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
index f75e90ba87..c37fd55648 100755
--- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
@@ -1655,6 +1655,7 @@ $code.=<<___;
1655.type AES_cbc_encrypt,\@function,6 1655.type AES_cbc_encrypt,\@function,6
1656.align 16 1656.align 16
1657.extern OPENSSL_ia32cap_P 1657.extern OPENSSL_ia32cap_P
1658.hidden OPENSSL_ia32cap_P
1658.globl asm_AES_cbc_encrypt 1659.globl asm_AES_cbc_encrypt
1659.hidden asm_AES_cbc_encrypt 1660.hidden asm_AES_cbc_encrypt
1660asm_AES_cbc_encrypt: 1661asm_AES_cbc_encrypt:
@@ -1684,7 +1685,7 @@ AES_cbc_encrypt:
1684 jb .Lcbc_slow_prologue 1685 jb .Lcbc_slow_prologue
1685 test \$15,%rdx 1686 test \$15,%rdx
1686 jnz .Lcbc_slow_prologue 1687 jnz .Lcbc_slow_prologue
1687 bt \$28,%r10d 1688 bt \$IA32CAP_BIT0_HT,%r10d
1688 jc .Lcbc_slow_prologue 1689 jc .Lcbc_slow_prologue
1689 1690
1690 # allocate aligned stack frame... 1691 # allocate aligned stack frame...
@@ -1944,7 +1945,7 @@ AES_cbc_encrypt:
1944 lea ($key,%rax),%rax 1945 lea ($key,%rax),%rax
1945 mov %rax,$keyend 1946 mov %rax,$keyend
1946 1947
1947 # pick Te4 copy which can't "overlap" with stack frame or key scdedule 1948 # pick Te4 copy which can't "overlap" with stack frame or key schedule
1948 lea 2048($sbox),$sbox 1949 lea 2048($sbox),$sbox
1949 lea 768-8(%rsp),%rax 1950 lea 768-8(%rsp),%rax
1950 sub $sbox,%rax 1951 sub $sbox,%rax
@@ -2814,6 +2815,7 @@ ___
2814 2815
2815$code =~ s/\`([^\`]*)\`/eval($1)/gem; 2816$code =~ s/\`([^\`]*)\`/eval($1)/gem;
2816 2817
2818print "#include \"x86_arch.h\"\n";
2817print $code; 2819print $code;
2818 2820
2819close STDOUT; 2821close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index 39b504cbe5..bc6c8f3fc0 100644
--- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
83$code.=<<___; 83$code.=<<___;
84.text 84.text
85.extern OPENSSL_ia32cap_P 85.extern OPENSSL_ia32cap_P
86.hidden OPENSSL_ia32cap_P
86 87
87.globl aesni_cbc_sha1_enc 88.globl aesni_cbc_sha1_enc
88.type aesni_cbc_sha1_enc,\@abi-omnipotent 89.type aesni_cbc_sha1_enc,\@abi-omnipotent
@@ -93,10 +94,10 @@ aesni_cbc_sha1_enc:
93 mov OPENSSL_ia32cap_P+4(%rip),%r11d 94 mov OPENSSL_ia32cap_P+4(%rip),%r11d
94___ 95___
95$code.=<<___ if ($avx); 96$code.=<<___ if ($avx);
96 and \$`1<<28`,%r11d # mask AVX bit 97 and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit
97 and \$`1<<30`,%r10d # mask "Intel CPU" bit 98 and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit
98 or %r11d,%r10d 99 or %r11d,%r10d
99 cmp \$`1<<28|1<<30`,%r10d 100 cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d
100 je aesni_cbc_sha1_enc_avx 101 je aesni_cbc_sha1_enc_avx
101___ 102___
102$code.=<<___; 103$code.=<<___;
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 332ef3e91d..c4e2baa6c5 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -25,7 +25,7 @@ sub bn_mul_add_words
25 { 25 {
26 local($name)=@_; 26 local($name)=@_;
27 27
28 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 28 &function_begin_B($name,"");
29 29
30 $r="eax"; 30 $r="eax";
31 $a="edx"; 31 $a="edx";
@@ -33,7 +33,7 @@ sub bn_mul_add_words
33 33
34 if ($sse2) { 34 if ($sse2) {
35 &picmeup("eax","OPENSSL_ia32cap_P"); 35 &picmeup("eax","OPENSSL_ia32cap_P");
36 &bt(&DWP(0,"eax"),26); 36 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
37 &jnc(&label("maw_non_sse2")); 37 &jnc(&label("maw_non_sse2"));
38 38
39 &mov($r,&wparam(0)); 39 &mov($r,&wparam(0));
@@ -211,7 +211,7 @@ sub bn_mul_words
211 { 211 {
212 local($name)=@_; 212 local($name)=@_;
213 213
214 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 214 &function_begin_B($name,"");
215 215
216 $r="eax"; 216 $r="eax";
217 $a="edx"; 217 $a="edx";
@@ -219,7 +219,7 @@ sub bn_mul_words
219 219
220 if ($sse2) { 220 if ($sse2) {
221 &picmeup("eax","OPENSSL_ia32cap_P"); 221 &picmeup("eax","OPENSSL_ia32cap_P");
222 &bt(&DWP(0,"eax"),26); 222 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
223 &jnc(&label("mw_non_sse2")); 223 &jnc(&label("mw_non_sse2"));
224 224
225 &mov($r,&wparam(0)); 225 &mov($r,&wparam(0));
@@ -322,7 +322,7 @@ sub bn_sqr_words
322 { 322 {
323 local($name)=@_; 323 local($name)=@_;
324 324
325 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 325 &function_begin_B($name,"");
326 326
327 $r="eax"; 327 $r="eax";
328 $a="edx"; 328 $a="edx";
@@ -330,7 +330,7 @@ sub bn_sqr_words
330 330
331 if ($sse2) { 331 if ($sse2) {
332 &picmeup("eax","OPENSSL_ia32cap_P"); 332 &picmeup("eax","OPENSSL_ia32cap_P");
333 &bt(&DWP(0,"eax"),26); 333 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
334 &jnc(&label("sqr_non_sse2")); 334 &jnc(&label("sqr_non_sse2"));
335 335
336 &mov($r,&wparam(0)); 336 &mov($r,&wparam(0));
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
index 808a1e5969..97d9136260 100644
--- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -203,12 +203,12 @@ if (!$x86only) {
203 &picmeup("edx","OPENSSL_ia32cap_P"); 203 &picmeup("edx","OPENSSL_ia32cap_P");
204 &mov ("eax",&DWP(0,"edx")); 204 &mov ("eax",&DWP(0,"edx"));
205 &mov ("edx",&DWP(4,"edx")); 205 &mov ("edx",&DWP(4,"edx"));
206 &test ("eax",1<<23); # check MMX bit 206 &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit
207 &jz (&label("ialu")); 207 &jz (&label("ialu"));
208if ($sse2) { 208if ($sse2) {
209 &test ("eax",1<<24); # check FXSR bit 209 &test ("eax","\$IA32CAP_MASK0_FXSR"); # check FXSR bit
210 &jz (&label("mmx")); 210 &jz (&label("mmx"));
211 &test ("edx",1<<1); # check PCLMULQDQ bit 211 &test ("edx","\$IA32CAP_MASK1_PCLMUL"); # check PCLMULQDQ bit
212 &jz (&label("mmx")); 212 &jz (&label("mmx"));
213 213
214 &movups ("xmm0",&QWP(8,"esp")); 214 &movups ("xmm0",&QWP(8,"esp"));
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
index e8f6b05084..a0bdd5787e 100755
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86-mont.pl
@@ -114,7 +114,7 @@ $temp="mm6";
114$mask="mm7"; 114$mask="mm7";
115 115
116 &picmeup("eax","OPENSSL_ia32cap_P"); 116 &picmeup("eax","OPENSSL_ia32cap_P");
117 &bt (&DWP(0,"eax"),26); 117 &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
118 &jnc (&label("non_sse2")); 118 &jnc (&label("non_sse2"));
119 119
120 &mov ("eax",-1); 120 &mov ("eax",-1);
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
index 8e45c7479b..3ecb425dad 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -163,12 +163,13 @@ ___
163 163
164$code.=<<___; 164$code.=<<___;
165.extern OPENSSL_ia32cap_P 165.extern OPENSSL_ia32cap_P
166.hidden OPENSSL_ia32cap_P
166.globl bn_GF2m_mul_2x2 167.globl bn_GF2m_mul_2x2
167.type bn_GF2m_mul_2x2,\@abi-omnipotent 168.type bn_GF2m_mul_2x2,\@abi-omnipotent
168.align 16 169.align 16
169bn_GF2m_mul_2x2: 170bn_GF2m_mul_2x2:
170 mov OPENSSL_ia32cap_P(%rip),%rax 171 mov OPENSSL_ia32cap_P+4(%rip),%eax
171 bt \$33,%rax 172 bt \$IA32CAP_BIT1_PCLMUL,%eax
172 jnc .Lvanilla_mul_2x2 173 jnc .Lvanilla_mul_2x2
173 174
174 movd $a1,%xmm0 175 movd $a1,%xmm0
diff --git a/src/lib/libcrypto/cryptlib.c b/src/lib/libcrypto/cryptlib.c
index fa091fbaea..8dec9caa93 100644
--- a/src/lib/libcrypto/cryptlib.c
+++ b/src/lib/libcrypto/cryptlib.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: cryptlib.c,v 1.38 2016/11/04 13:56:04 miod Exp $ */ 1/* $OpenBSD: cryptlib.c,v 1.39 2016/11/04 17:30:30 miod Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -627,47 +627,30 @@ CRYPTO_get_lock_name(int type)
627 defined(__INTEL__) || \ 627 defined(__INTEL__) || \
628 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) 628 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
629 629
630unsigned int OPENSSL_ia32cap_P[2]; 630uint64_t OPENSSL_ia32cap_P;
631 631
632uint64_t 632uint64_t
633OPENSSL_cpu_caps(void) 633OPENSSL_cpu_caps(void)
634{ 634{
635 return *(uint64_t *)OPENSSL_ia32cap_P; 635 return OPENSSL_ia32cap_P;
636} 636}
637 637
638#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) 638#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
639#define OPENSSL_CPUID_SETUP 639#define OPENSSL_CPUID_SETUP
640typedef unsigned long long IA32CAP;
641void 640void
642OPENSSL_cpuid_setup(void) 641OPENSSL_cpuid_setup(void)
643{ 642{
644 static int trigger = 0; 643 static int trigger = 0;
645 IA32CAP OPENSSL_ia32_cpuid(void); 644 uint64_t OPENSSL_ia32_cpuid(void);
646 IA32CAP vec;
647 645
648 if (trigger) 646 if (trigger)
649 return; 647 return;
650 trigger = 1; 648 trigger = 1;
651 649 OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid();
652 vec = OPENSSL_ia32_cpuid();
653
654 /*
655 * |(1<<10) sets a reserved bit to signal that variable
656 * was initialized already... This is to avoid interference
657 * with cpuid snippets in ELF .init segment.
658 */
659 OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10);
660 OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
661} 650}
662#endif 651#endif
663 652
664#else 653#else
665unsigned long *
666OPENSSL_ia32cap_loc(void)
667{
668 return NULL;
669}
670
671uint64_t 654uint64_t
672OPENSSL_cpu_caps(void) 655OPENSSL_cpu_caps(void)
673{ 656{
diff --git a/src/lib/libcrypto/cryptlib.h b/src/lib/libcrypto/cryptlib.h
index ad679dfa8d..d44738bf3c 100644
--- a/src/lib/libcrypto/cryptlib.h
+++ b/src/lib/libcrypto/cryptlib.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: cryptlib.h,v 1.24 2014/07/11 08:44:47 jsing Exp $ */ 1/* $OpenBSD: cryptlib.h,v 1.25 2016/11/04 17:30:30 miod Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -73,7 +73,6 @@ extern "C" {
73#define X509_CERT_FILE_EVP "SSL_CERT_FILE" 73#define X509_CERT_FILE_EVP "SSL_CERT_FILE"
74 74
75void OPENSSL_cpuid_setup(void); 75void OPENSSL_cpuid_setup(void);
76extern unsigned int OPENSSL_ia32cap_P[];
77 76
78#ifdef __cplusplus 77#ifdef __cplusplus
79} 78}
diff --git a/src/lib/libcrypto/engine/eng_aesni.c b/src/lib/libcrypto/engine/eng_aesni.c
index 5f9a36236a..92794f6086 100644
--- a/src/lib/libcrypto/engine/eng_aesni.c
+++ b/src/lib/libcrypto/engine/eng_aesni.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: eng_aesni.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */ 1/* $OpenBSD: eng_aesni.c,v 1.9 2016/11/04 17:30:30 miod Exp $ */
2/* 2/*
3 * Support for Intel AES-NI intruction set 3 * Support for Intel AES-NI intruction set
4 * Author: Huang Ying <ying.huang@intel.com> 4 * Author: Huang Ying <ying.huang@intel.com>
@@ -93,10 +93,11 @@
93 defined(_M_AMD64) || defined(_M_X64) || \ 93 defined(_M_AMD64) || defined(_M_X64) || \
94 defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) 94 defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__)
95#define COMPILE_HW_AESNI 95#define COMPILE_HW_AESNI
96#include "x86_arch.h"
96#endif 97#endif
97static ENGINE *ENGINE_aesni (void); 98static ENGINE *ENGINE_aesni(void);
98 99
99void ENGINE_load_aesni (void) 100void ENGINE_load_aesni(void)
100{ 101{
101/* On non-x86 CPUs it just returns. */ 102/* On non-x86 CPUs it just returns. */
102#ifdef COMPILE_HW_AESNI 103#ifdef COMPILE_HW_AESNI
@@ -302,20 +303,13 @@ aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out,
302} 303}
303/* ===== Engine "management" functions ===== */ 304/* ===== Engine "management" functions ===== */
304 305
305typedef unsigned long long IA32CAP;
306
307/* Prepare the ENGINE structure for registration */ 306/* Prepare the ENGINE structure for registration */
308static int 307static int
309aesni_bind_helper(ENGINE *e) 308aesni_bind_helper(ENGINE *e)
310{ 309{
311 int engage; 310 int engage;
312 311
313 if (sizeof(OPENSSL_ia32cap_P) > 4) { 312 engage = (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) != 0;
314 engage = ((IA32CAP)OPENSSL_ia32cap_P >> 57) & 1;
315 } else {
316 IA32CAP OPENSSL_ia32_cpuid(void);
317 engage = (OPENSSL_ia32_cpuid() >> 57) & 1;
318 }
319 313
320 /* Register everything or return with an error */ 314 /* Register everything or return with an error */
321 if (!ENGINE_set_id(e, aesni_id) || 315 if (!ENGINE_set_id(e, aesni_id) ||
diff --git a/src/lib/libcrypto/evp/e_aes.c b/src/lib/libcrypto/evp/e_aes.c
index 25199dca36..b20543a90c 100644
--- a/src/lib/libcrypto/evp/e_aes.c
+++ b/src/lib/libcrypto/evp/e_aes.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: e_aes.c,v 1.30 2016/11/04 13:56:05 miod Exp $ */ 1/* $OpenBSD: e_aes.c,v 1.31 2016/11/04 17:30:30 miod Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -150,10 +150,10 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len,
150 defined(_M_AMD64) || defined(_M_X64) || \ 150 defined(_M_AMD64) || defined(_M_X64) || \
151 defined(__INTEL__) ) 151 defined(__INTEL__) )
152 152
153extern unsigned int OPENSSL_ia32cap_P[]; 153#include "x86_arch.h"
154 154
155#ifdef VPAES_ASM 155#ifdef VPAES_ASM
156#define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) 156#define VPAES_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_SSSE3)
157#endif 157#endif
158#ifdef BSAES_ASM 158#ifdef BSAES_ASM
159#define BSAES_CAPABLE VPAES_CAPABLE 159#define BSAES_CAPABLE VPAES_CAPABLE
@@ -161,7 +161,7 @@ extern unsigned int OPENSSL_ia32cap_P[];
161/* 161/*
162 * AES-NI section 162 * AES-NI section
163 */ 163 */
164#define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) 164#define AESNI_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI)
165 165
166int aesni_set_encrypt_key(const unsigned char *userKey, int bits, 166int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
167 AES_KEY *key); 167 AES_KEY *key);
diff --git a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
index 8574823aed..3f82cf5967 100644
--- a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
+++ b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.12 2016/05/04 15:01:33 tedu Exp $ */ 1/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -87,13 +87,12 @@ typedef struct {
87 defined(_M_AMD64) || defined(_M_X64) || \ 87 defined(_M_AMD64) || defined(_M_X64) || \
88 defined(__INTEL__) ) 88 defined(__INTEL__) )
89 89
90#include "x86_arch.h"
91
90#if defined(__GNUC__) && __GNUC__>=2 92#if defined(__GNUC__) && __GNUC__>=2
91# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) 93# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; })
92#endif 94#endif
93 95
94extern unsigned int OPENSSL_ia32cap_P[2];
95#define AESNI_CAPABLE (1<<(57-32))
96
97int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); 96int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key);
98int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); 97int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key);
99 98
@@ -578,14 +577,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = {
578const EVP_CIPHER * 577const EVP_CIPHER *
579EVP_aes_128_cbc_hmac_sha1(void) 578EVP_aes_128_cbc_hmac_sha1(void)
580{ 579{
581 return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? 580 return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ?
582 &aesni_128_cbc_hmac_sha1_cipher : NULL; 581 &aesni_128_cbc_hmac_sha1_cipher : NULL;
583} 582}
584 583
585const EVP_CIPHER * 584const EVP_CIPHER *
586EVP_aes_256_cbc_hmac_sha1(void) 585EVP_aes_256_cbc_hmac_sha1(void)
587{ 586{
588 return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? 587 return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ?
589 &aesni_256_cbc_hmac_sha1_cipher : NULL; 588 &aesni_256_cbc_hmac_sha1_cipher : NULL;
590} 589}
591#else 590#else
diff --git a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c
index 1f085af403..39527cafe6 100644
--- a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c
+++ b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: e_rc4_hmac_md5.c,v 1.5 2014/08/11 13:29:43 bcook Exp $ */ 1/* $OpenBSD: e_rc4_hmac_md5.c,v 1.6 2016/11/04 17:30:30 miod Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -105,6 +105,7 @@ rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *inkey,
105 defined(__INTEL__) ) && \ 105 defined(__INTEL__) ) && \
106 !(defined(__APPLE__) && defined(__MACH__)) 106 !(defined(__APPLE__) && defined(__MACH__))
107#define STITCHED_CALL 107#define STITCHED_CALL
108#include "x86_arch.h"
108#endif 109#endif
109 110
110#if !defined(STITCHED_CALL) 111#if !defined(STITCHED_CALL)
@@ -122,7 +123,6 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
122 md5_off = MD5_CBLOCK - key->md.num, 123 md5_off = MD5_CBLOCK - key->md.num,
123 blocks; 124 blocks;
124 unsigned int l; 125 unsigned int l;
125 extern unsigned int OPENSSL_ia32cap_P[];
126#endif 126#endif
127 size_t plen = key->payload_length; 127 size_t plen = key->payload_length;
128 128
@@ -139,7 +139,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
139 139
140 if (plen > md5_off && 140 if (plen > md5_off &&
141 (blocks = (plen - md5_off) / MD5_CBLOCK) && 141 (blocks = (plen - md5_off) / MD5_CBLOCK) &&
142 (OPENSSL_ia32cap_P[0]&(1 << 20)) == 0) { 142 (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) {
143 MD5_Update(&key->md, in, md5_off); 143 MD5_Update(&key->md, in, md5_off);
144 RC4(&key->ks, rc4_off, in, out); 144 RC4(&key->ks, rc4_off, in, out);
145 145
@@ -187,7 +187,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
187 rc4_off += MD5_CBLOCK; 187 rc4_off += MD5_CBLOCK;
188 188
189 if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && 189 if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) &&
190 (OPENSSL_ia32cap_P[0] & (1 << 20)) == 0) { 190 (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) {
191 RC4(&key->ks, rc4_off, in, out); 191 RC4(&key->ks, rc4_off, in, out);
192 MD5_Update(&key->md, out, md5_off); 192 MD5_Update(&key->md, out, md5_off);
193 193
diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c
index 6f8a8dd7f4..95ee755f83 100644
--- a/src/lib/libcrypto/modes/gcm128.c
+++ b/src/lib/libcrypto/modes/gcm128.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: gcm128.c,v 1.14 2016/11/04 13:56:05 miod Exp $ */ 1/* $OpenBSD: gcm128.c,v 1.15 2016/11/04 17:30:30 miod Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -637,13 +637,19 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
637 637
638#endif 638#endif
639 639
640#if defined(GHASH_ASM) && \
641 (defined(__i386) || defined(__i386__) || \
642 defined(__x86_64) || defined(__x86_64__) || \
643 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
644#include "x86_arch.h"
645#endif
646
640#if TABLE_BITS==4 && defined(GHASH_ASM) 647#if TABLE_BITS==4 && defined(GHASH_ASM)
641# if (defined(__i386) || defined(__i386__) || \ 648# if (defined(__i386) || defined(__i386__) || \
642 defined(__x86_64) || defined(__x86_64__) || \ 649 defined(__x86_64) || defined(__x86_64__) || \
643 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 650 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
644# define GHASH_ASM_X86_OR_64 651# define GHASH_ASM_X86_OR_64
645# define GCM_FUNCREF_4BIT 652# define GCM_FUNCREF_4BIT
646extern unsigned int OPENSSL_ia32cap_P[2];
647 653
648void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); 654void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
649void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); 655void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
@@ -705,8 +711,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
705#elif TABLE_BITS==4 711#elif TABLE_BITS==4
706# if defined(GHASH_ASM_X86_OR_64) 712# if defined(GHASH_ASM_X86_OR_64)
707# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 713# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
708 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ 714 /* check FXSR and PCLMULQDQ bits */
709 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ 715 if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
716 (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
710 gcm_init_clmul(ctx->Htable,ctx->H.u); 717 gcm_init_clmul(ctx->Htable,ctx->H.u);
711 ctx->gmult = gcm_gmult_clmul; 718 ctx->gmult = gcm_gmult_clmul;
712 ctx->ghash = gcm_ghash_clmul; 719 ctx->ghash = gcm_ghash_clmul;
@@ -716,9 +723,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
716 gcm_init_4bit(ctx->Htable,ctx->H.u); 723 gcm_init_4bit(ctx->Htable,ctx->H.u);
717# if defined(GHASH_ASM_X86) /* x86 only */ 724# if defined(GHASH_ASM_X86) /* x86 only */
718# if defined(OPENSSL_IA32_SSE2) 725# if defined(OPENSSL_IA32_SSE2)
719 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ 726 if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */
720# else 727# else
721 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ 728 if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */
722# endif 729# endif
723 ctx->gmult = gcm_gmult_4bit_mmx; 730 ctx->gmult = gcm_gmult_4bit_mmx;
724 ctx->ghash = gcm_ghash_4bit_mmx; 731 ctx->ghash = gcm_ghash_4bit_mmx;
diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl
index 4bd53da33d..a8393d2730 100755
--- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl
+++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl
@@ -393,7 +393,7 @@ my %globals;
393 } 393 }
394 } 394 }
395} 395}
396{ package expr; # pick up expressioins 396{ package expr; # pick up expressions
397 sub re { 397 sub re {
398 my $self = shift; # single instance is enough... 398 my $self = shift; # single instance is enough...
399 local *line = shift; 399 local *line = shift;
@@ -777,6 +777,8 @@ ___
777OPTION DOTNAME 777OPTION DOTNAME
778___ 778___
779} 779}
780print "#include \"x86_arch.h\"\n";
781
780while($line=<>) { 782while($line=<>) {
781 783
782 chomp($line); 784 chomp($line);
diff --git a/src/lib/libcrypto/perlasm/x86asm.pl b/src/lib/libcrypto/perlasm/x86asm.pl
index 5916ea4f89..e039382e00 100644
--- a/src/lib/libcrypto/perlasm/x86asm.pl
+++ b/src/lib/libcrypto/perlasm/x86asm.pl
@@ -248,6 +248,7 @@ EOF
248 $pic=0; 248 $pic=0;
249 for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } 249 for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); }
250 250
251 ::emitraw("#include \"x86_arch.h\"\n");
251 ::emitraw("#include <machine/asm.h>\n") if $openbsd; 252 ::emitraw("#include <machine/asm.h>\n") if $openbsd;
252 $filename =~ s/\.pl$//; 253 $filename =~ s/\.pl$//;
253 &file($filename); 254 &file($filename);
diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl
index d4baea514b..84d24edbbd 100644
--- a/src/lib/libcrypto/perlasm/x86gas.pl
+++ b/src/lib/libcrypto/perlasm/x86gas.pl
@@ -157,10 +157,8 @@ sub ::file_end
157 } 157 }
158 } 158 }
159 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { 159 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
160 my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; 160 push (@out, ".extern\t${nmdecor}OPENSSL_ia32cap_P\n");
161 if ($::macosx) { push (@out,"$tmp,2\n"); } 161 push (@out, ".hidden\t${nmdecor}OPENSSL_ia32cap_P\n");
162 elsif ($::elf) { push (@out,"$tmp,4\n"); }
163 else { push (@out,"$tmp\n"); }
164 } 162 }
165 push(@out,$initseg) if ($initseg); 163 push(@out,$initseg) if ($initseg);
166} 164}
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl
index 84f1a798cb..03f0cff467 100644
--- a/src/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -189,7 +189,8 @@ if ($alt=0) {
189 &jz (&label("go4loop4")); 189 &jz (&label("go4loop4"));
190 190
191 &picmeup($out,"OPENSSL_ia32cap_P"); 191 &picmeup($out,"OPENSSL_ia32cap_P");
192 &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] 192 # check SSE2 bit [could have been MMX]
193 &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
193 &jnc (&label("go4loop4")); 194 &jnc (&label("go4loop4"));
194 195
195 &mov ($out,&wparam(3)) if (!$alt); 196 &mov ($out,&wparam(3)) if (!$alt);
@@ -312,7 +313,7 @@ $idx="edx";
312 &xor ("eax","eax"); 313 &xor ("eax","eax");
313 &mov (&DWP(-4,$out),$idi); # borrow key->y 314 &mov (&DWP(-4,$out),$idi); # borrow key->y
314 315
315 &bt (&DWP(0,$idx),20); # check for bit#20 316 &bt (&DWP(0,$idx),"\$IA32CAP_BIT0_INTELP4");
316 &jc (&label("c1stloop")); 317 &jc (&label("c1stloop"));
317 318
318&set_label("w1stloop",16); 319&set_label("w1stloop",16);
@@ -388,9 +389,9 @@ $idx="edx";
388 &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); 389 &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
389 &picmeup("edx","OPENSSL_ia32cap_P"); 390 &picmeup("edx","OPENSSL_ia32cap_P");
390 &mov ("edx",&DWP(0,"edx")); 391 &mov ("edx",&DWP(0,"edx"));
391 &bt ("edx",20); 392 &bt ("edx","\$IA32CAP_BIT0_INTELP4");
392 &jc (&label("1xchar")); 393 &jc (&label("1xchar"));
393 &bt ("edx",26); 394 &bt ("edx","\$IA32CAP_BIT0_SSE2");
394 &jnc (&label("ret")); 395 &jnc (&label("ret"));
395 &add ("eax",25); 396 &add ("eax",25);
396 &ret (); 397 &ret ();
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
index 197749dda7..2135b38ef8 100755
--- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -122,6 +122,7 @@ $out="%rcx"; # arg4
122$code=<<___; 122$code=<<___;
123.text 123.text
124.extern OPENSSL_ia32cap_P 124.extern OPENSSL_ia32cap_P
125.hidden OPENSSL_ia32cap_P
125 126
126.globl RC4 127.globl RC4
127.type RC4,\@function,4 128.type RC4,\@function,4
@@ -164,7 +165,7 @@ $code.=<<___;
164 movl ($dat,$XX[0],4),$TX[0]#d 165 movl ($dat,$XX[0],4),$TX[0]#d
165 test \$-16,$len 166 test \$-16,$len
166 jz .Lloop1 167 jz .Lloop1
167 bt \$30,%r8d # Intel CPU? 168 bt \$IA32CAP_BIT0_INTEL,%r8d # Intel CPU?
168 jc .Lintel 169 jc .Lintel
169 and \$7,$TX[1] 170 and \$7,$TX[1]
170 lea 1($XX[0]),$XX[1] 171 lea 1($XX[0]),$XX[1]
@@ -442,7 +443,7 @@ RC4_set_key:
442 xor %r11,%r11 443 xor %r11,%r11
443 444
444 mov OPENSSL_ia32cap_P(%rip),$idx#d 445 mov OPENSSL_ia32cap_P(%rip),$idx#d
445 bt \$20,$idx#d # RC4_CHAR? 446 bt \$IA32CAP_BIT0_INTELP4,$idx#d # RC4_CHAR?
446 jc .Lc1stloop 447 jc .Lc1stloop
447 jmp .Lw1stloop 448 jmp .Lw1stloop
448 449
@@ -506,9 +507,9 @@ RC4_set_key:
506RC4_options: 507RC4_options:
507 lea .Lopts(%rip),%rax 508 lea .Lopts(%rip),%rax
508 mov OPENSSL_ia32cap_P(%rip),%edx 509 mov OPENSSL_ia32cap_P(%rip),%edx
509 bt \$20,%edx 510 bt \$IA32CAP_BIT0_INTELP4,%edx
510 jc .L8xchar 511 jc .L8xchar
511 bt \$30,%edx 512 bt \$IA32CAP_BIT0_INTEL,%edx
512 jnc .Ldone 513 jnc .Ldone
513 add \$25,%rax 514 add \$25,%rax
514 ret 515 ret
diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl
index 6fbea34d78..d29ed84706 100644
--- a/src/lib/libcrypto/sha/asm/sha1-586.pl
+++ b/src/lib/libcrypto/sha/asm/sha1-586.pl
@@ -303,15 +303,15 @@ if ($xmm) {
303 303
304 &mov ($A,&DWP(0,$T)); 304 &mov ($A,&DWP(0,$T));
305 &mov ($D,&DWP(4,$T)); 305 &mov ($D,&DWP(4,$T));
306 &test ($D,1<<9); # check SSSE3 bit 306 &test ($D,"\$IA32CAP_MASK1_SSSE3"); # check SSSE3 bit
307 &jz (&label("x86")); 307 &jz (&label("x86"));
308 &test ($A,1<<24); # check FXSR bit 308 &test ($A,"\$IA32CAP_MASK0_FXSR"); # check FXSR bit
309 &jz (&label("x86")); 309 &jz (&label("x86"));
310 if ($ymm) { 310 if ($ymm) {
311 &and ($D,1<<28); # mask AVX bit 311 &and ($D,"\$IA32CAP_MASK1_AVX"); # mask AVX bit
312 &and ($A,1<<30); # mask "Intel CPU" bit 312 &and ($A,"\$IA32CAP_MASK0_INTEL"); # mask "Intel CPU" bit
313 &or ($A,$D); 313 &or ($A,$D);
314 &cmp ($A,1<<28|1<<30); 314 &cmp ($A,"\$(IA32CAP_MASK1_AVX | IA32CAP_MASK0_INTEL)");
315 &je (&label("avx_shortcut")); 315 &je (&label("avx_shortcut"));
316 } 316 }
317 &jmp (&label("ssse3_shortcut")); 317 &jmp (&label("ssse3_shortcut"));
diff --git a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl
index f15c7ec39b..147d21570b 100755
--- a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl
+++ b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl
@@ -216,6 +216,7 @@ unshift(@xi,pop(@xi));
216$code.=<<___; 216$code.=<<___;
217.text 217.text
218.extern OPENSSL_ia32cap_P 218.extern OPENSSL_ia32cap_P
219.hidden OPENSSL_ia32cap_P
219 220
220.globl sha1_block_data_order 221.globl sha1_block_data_order
221.type sha1_block_data_order,\@function,3 222.type sha1_block_data_order,\@function,3
@@ -223,14 +224,14 @@ $code.=<<___;
223sha1_block_data_order: 224sha1_block_data_order:
224 mov OPENSSL_ia32cap_P+0(%rip),%r9d 225 mov OPENSSL_ia32cap_P+0(%rip),%r9d
225 mov OPENSSL_ia32cap_P+4(%rip),%r8d 226 mov OPENSSL_ia32cap_P+4(%rip),%r8d
226 test \$`1<<9`,%r8d # check SSSE3 bit 227 test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit
227 jz .Lialu 228 jz .Lialu
228___ 229___
229$code.=<<___ if ($avx); 230$code.=<<___ if ($avx);
230 and \$`1<<28`,%r8d # mask AVX bit 231 and \$IA32CAP_MASK1_AVX,%r8d # mask AVX bit
231 and \$`1<<30`,%r9d # mask "Intel CPU" bit 232 and \$IA32CAP_MASK0_INTEL,%r9d # mask "Intel CPU" bit
232 or %r9d,%r8d 233 or %r9d,%r8d
233 cmp \$`1<<28|1<<30`,%r8d 234 cmp \$(IA32CAP_MASK0_INTEL | IA32CAP_MASK1_AVX),%r8d
234 je _avx_shortcut 235 je _avx_shortcut
235___ 236___
236$code.=<<___; 237$code.=<<___;
diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl
index 7eab6a5b88..163361ebe9 100644
--- a/src/lib/libcrypto/sha/asm/sha512-586.pl
+++ b/src/lib/libcrypto/sha/asm/sha512-586.pl
@@ -284,7 +284,7 @@ sub BODY_00_15_x86 {
284 284
285if ($sse2) { 285if ($sse2) {
286 &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); 286 &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
287 &bt (&DWP(0,"edx"),26); 287 &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
288 &jnc (&label("loop_x86")); 288 &jnc (&label("loop_x86"));
289 289
290 # load ctx->h[0-7] 290 # load ctx->h[0-7]
diff --git a/src/lib/libcrypto/whrlpool/wp_block.c b/src/lib/libcrypto/whrlpool/wp_block.c
index d8c1b89ba3..1e00a01330 100644
--- a/src/lib/libcrypto/whrlpool/wp_block.c
+++ b/src/lib/libcrypto/whrlpool/wp_block.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: wp_block.c,v 1.12 2016/09/04 14:06:46 jsing Exp $ */ 1/* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */
2/** 2/**
3 * The Whirlpool hashing function. 3 * The Whirlpool hashing function.
4 * 4 *
@@ -36,10 +36,12 @@
36 * 36 *
37 */ 37 */
38 38
39#include "wp_locl.h"
40#include <string.h> 39#include <string.h>
40#include <openssl/crypto.h>
41#include <machine/endian.h> 41#include <machine/endian.h>
42 42
43#include "wp_locl.h"
44
43typedef unsigned char u8; 45typedef unsigned char u8;
44#if defined(_LP64) 46#if defined(_LP64)
45typedef unsigned long u64; 47typedef unsigned long u64;
@@ -57,12 +59,15 @@ typedef unsigned long long u64;
57# define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX 59# define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX
58 CPUs this is actually faster! */ 60 CPUs this is actually faster! */
59# endif 61# endif
60# define GO_FOR_MMX(ctx,inp,num) do { \ 62#include "x86_arch.h"
61 extern unsigned int OPENSSL_ia32cap_P[]; \ 63# define GO_FOR_MMX(ctx,inp,num) \
64do { \
62 void whirlpool_block_mmx(void *,const void *,size_t); \ 65 void whirlpool_block_mmx(void *,const void *,size_t); \
63 if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \ 66 if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \
64 whirlpool_block_mmx(ctx->H.c,inp,num); return; \ 67 break; \
65 } while (0) 68 whirlpool_block_mmx(ctx->H.c,inp,num); \
69 return; \
70} while (0)
66# endif 71# endif
67#elif defined(__arm__) 72#elif defined(__arm__)
68# define SMALL_REGISTER_BANK 73# define SMALL_REGISTER_BANK
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl
index b36d3f7dc5..6558dedb6b 100644
--- a/src/lib/libcrypto/x86_64cpuid.pl
+++ b/src/lib/libcrypto/x86_64cpuid.pl
@@ -20,8 +20,8 @@ print<<___;
20.section .init 20.section .init
21 call OPENSSL_cpuid_setup 21 call OPENSSL_cpuid_setup
22 22
23.extern OPENSSL_ia32cap_P
23.hidden OPENSSL_ia32cap_P 24.hidden OPENSSL_ia32cap_P
24.comm OPENSSL_ia32cap_P,8,4
25 25
26.text 26.text
27 27
@@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid:
80 mov %eax,%r10d 80 mov %eax,%r10d
81 mov \$0x80000001,%eax 81 mov \$0x80000001,%eax
82 cpuid 82 cpuid
83 or %ecx,%r9d 83 and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit
84 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 84 or \$1,%r9d # make sure %r9d is not zero
85 85
86 cmp \$0x80000008,%r10d 86 cmp \$0x80000008,%r10d
87 jb .Lintel 87 jb .Lintel
@@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid:
93 93
94 mov \$1,%eax 94 mov \$1,%eax
95 cpuid 95 cpuid
96 bt \$28,%edx # test hyper-threading bit 96 bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
97 jnc .Lgeneric 97 jnc .Lgeneric
98 shr \$16,%ebx # number of logical processors 98 shr \$16,%ebx # number of logical processors
99 cmp %r10b,%bl 99 cmp %r10b,%bl
100 ja .Lgeneric 100 ja .Lgeneric
101 and \$0xefffffff,%edx # ~(1<<28) 101 xor \$IA32CAP_MASK0_HT,%edx
102 jmp .Lgeneric 102 jmp .Lgeneric
103 103
104.Lintel: 104.Lintel:
@@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid:
116.Lnocacheinfo: 116.Lnocacheinfo:
117 mov \$1,%eax 117 mov \$1,%eax
118 cpuid 118 cpuid
119 and \$0xbfefffff,%edx # force reserved bits to 0 119 # force reserved bits to 0
120 and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx
120 cmp \$0,%r9d 121 cmp \$0,%r9d
121 jne .Lnotintel 122 jne .Lnotintel
122 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 123 # set reserved bit#30 on Intel CPUs
124 or \$IA32CAP_MASK0_INTEL,%edx
123 and \$15,%ah 125 and \$15,%ah
124 cmp \$15,%ah # examine Family ID 126 cmp \$15,%ah # examine Family ID
125 jne .Lnotintel 127 jne .Lnotintel
126 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 128 # set reserved bit#20 to engage RC4_CHAR
129 or \$IA32CAP_MASK0_INTELP4,%edx
127.Lnotintel: 130.Lnotintel:
128 bt \$28,%edx # test hyper-threading bit 131 bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit
129 jnc .Lgeneric 132 jnc .Lgeneric
130 and \$0xefffffff,%edx # ~(1<<28) 133 xor \$IA32CAP_MASK0_HT,%edx
131 cmp \$0,%r10d 134 cmp \$0,%r10d
132 je .Lgeneric 135 je .Lgeneric
133 136
134 or \$0x10000000,%edx # 1<<28 137 or \$IA32CAP_MASK0_HT,%edx
135 shr \$16,%ebx 138 shr \$16,%ebx
136 cmp \$1,%bl # see if cache is shared 139 cmp \$1,%bl # see if cache is shared
137 ja .Lgeneric 140 ja .Lgeneric
138 and \$0xefffffff,%edx # ~(1<<28) 141 xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not
142
139.Lgeneric: 143.Lgeneric:
140 and \$0x00000800,%r9d # isolate AMD XOP flag 144 and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag
141 and \$0xfffff7ff,%ecx 145 and \$(~IA32CAP_MASK1_AMD_XOP),%ecx
142 or %ecx,%r9d # merge AMD XOP flag 146 or %ecx,%r9d # merge AMD XOP flag
143 147
144 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 148 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
145 bt \$27,%r9d # check OSXSAVE bit 149 bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit
146 jnc .Lclear_avx 150 jnc .Lclear_avx
147 xor %ecx,%ecx # XCR0 151 xor %ecx,%ecx # XCR0
148 .byte 0x0f,0x01,0xd0 # xgetbv 152 .byte 0x0f,0x01,0xd0 # xgetbv
@@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid:
150 cmp \$6,%eax 154 cmp \$6,%eax
151 je .Ldone 155 je .Ldone
152.Lclear_avx: 156.Lclear_avx:
153 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 157 mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax
154 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 158 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
155.Ldone: 159.Ldone:
156 shl \$32,%r9 160 shl \$32,%r9
diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h
new file mode 100644
index 0000000000..5b2cf97546
--- /dev/null
+++ b/src/lib/libcrypto/x86_arch.h
@@ -0,0 +1,90 @@
1/* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */
2/*
3 * Copyright (c) 2016 Miodrag Vallat.
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/*
19 * The knowledge of the layout of OPENSSL_ia32cap_P is internal to libcrypto
20 * (and, to some extent, to libssl), and may change in the future without
21 * notice.
22 */
23
24/*
25 * OPENSSL_ia32cap_P is computed at runtime by OPENSSL_ia32_cpuid().
26 *
27 * On processors which lack the cpuid instruction, the value is always
28 * zero (this only matters on 32-bit processors, of course).
29 *
30 * On processors which support the cpuid instruction, after running
31 * "cpuid 1", the value of %edx is written to the low word of OPENSSL_ia32cap_P,
32 * and the value of %ecx is written to its high word.
33 *
34 * Further processing is done to set or clear specific bits, depending
35 * upon the exact processor type.
36 *
37 * Assembly routines usually address OPENSSL_ia32cap_P as two 32-bit words,
38 * hence two sets of bit numbers and masks. OPENSSL_cpu_caps() returns the
39 * complete 64-bit word.
40 */
41
42/* bit numbers for the low word */
43#define IA32CAP_BIT0_FPU 0
44#define IA32CAP_BIT0_MMX 23
45#define IA32CAP_BIT0_FXSR 24
46#define IA32CAP_BIT0_SSE 25
47#define IA32CAP_BIT0_SSE2 26
48#define IA32CAP_BIT0_HT 28
49
50/* the following bits are not obtained from cpuid */
51#define IA32CAP_BIT0_INTELP4 20
52#define IA32CAP_BIT0_INTEL 30
53
54/* bit numbers for the high word */
55#define IA32CAP_BIT1_PCLMUL 1
56#define IA32CAP_BIT1_SSSE3 9
57#define IA32CAP_BIT1_FMA3 12
58#define IA32CAP_BIT1_AESNI 25
59#define IA32CAP_BIT1_OSXSAVE 27
60#define IA32CAP_BIT1_AVX 28
61
62#define IA32CAP_BIT1_AMD_XOP 11
63
64/* bit masks for the low word */
65#define IA32CAP_MASK0_MMX (1 << IA32CAP_BIT0_MMX)
66#define IA32CAP_MASK0_FXSR (1 << IA32CAP_BIT0_FXSR)
67#define IA32CAP_MASK0_SSE (1 << IA32CAP_BIT0_SSE)
68#define IA32CAP_MASK0_SSE2 (1 << IA32CAP_BIT0_SSE2)
69#define IA32CAP_MASK0_HT (1 << IA32CAP_BIT0_HT)
70
71#define IA32CAP_MASK0_INTELP4 (1 << IA32CAP_BIT0_INTELP4)
72#define IA32CAP_MASK0_INTEL (1 << IA32CAP_BIT0_INTEL)
73
74/* bit masks for the high word */
75#define IA32CAP_MASK1_PCLMUL (1 << IA32CAP_BIT1_PCLMUL)
76#define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3)
77#define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3)
78#define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI)
79#define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX)
80
81#define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP)
82
83/* bit masks for OPENSSL_cpu_caps() */
84#define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX
85#define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR
86#define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE
87#define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4
88#define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL))
89#define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3))
90#define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI))
diff --git a/src/lib/libcrypto/x86cpuid.pl b/src/lib/libcrypto/x86cpuid.pl
index 7918629f64..8b9570fc72 100644
--- a/src/lib/libcrypto/x86cpuid.pl
+++ b/src/lib/libcrypto/x86cpuid.pl
@@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
56 &mov ("esi","eax"); 56 &mov ("esi","eax");
57 &mov ("eax",0x80000001); 57 &mov ("eax",0x80000001);
58 &cpuid (); 58 &cpuid ();
59 &or ("ebp","ecx"); 59 &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit
60 &and ("ebp",1<<11|1); # isolate XOP bit 60 &or ("ecx",1); # make sure ecx is not zero
61 &mov ("ebp","ecx");
62
61 &cmp ("esi",0x80000008); 63 &cmp ("esi",0x80000008);
62 &jb (&label("intel")); 64 &jb (&label("intel"));
63 65
@@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
69 &mov ("eax",1); 71 &mov ("eax",1);
70 &xor ("ecx","ecx"); 72 &xor ("ecx","ecx");
71 &cpuid (); 73 &cpuid ();
72 &bt ("edx",28); 74 &bt ("edx","\$IA32CAP_BIT0_HT");
73 &jnc (&label("generic")); 75 &jnc (&label("generic"));
74 &shr ("ebx",16); 76 &shr ("ebx",16);
75 &and ("ebx",0xff); 77 &and ("ebx",0xff);
76 &cmp ("ebx","esi"); 78 &cmp ("ebx","esi");
77 &ja (&label("generic")); 79 &ja (&label("generic"));
78 &and ("edx",0xefffffff); # clear hyper-threading bit 80 &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit
79 &jmp (&label("generic")); 81 &jmp (&label("generic"));
80 82
81&set_label("intel"); 83&set_label("intel");
@@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
94 &mov ("eax",1); 96 &mov ("eax",1);
95 &xor ("ecx","ecx"); 97 &xor ("ecx","ecx");
96 &cpuid (); 98 &cpuid ();
97 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 99 # force reserved bits to 0.
100 &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)");
98 &cmp ("ebp",0); 101 &cmp ("ebp",0);
99 &jne (&label("notintel")); 102 &jne (&label("notintel"));
100 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 103 # set reserved bit#30 on Intel CPUs
101 &and (&HB("eax"),15); # familiy ID 104 &or ("edx","\$IA32CAP_MASK0_INTEL");
105 &and (&HB("eax"),15); # family ID
102 &cmp (&HB("eax"),15); # P4? 106 &cmp (&HB("eax"),15); # P4?
103 &jne (&label("notintel")); 107 &jne (&label("notintel"));
104 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 108 # set reserved bit#20 to engage RC4_CHAR
109 &or ("edx","\$IA32CAP_MASK0_INTELP4");
105&set_label("notintel"); 110&set_label("notintel");
106 &bt ("edx",28); # test hyper-threading bit 111 &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit
107 &jnc (&label("generic")); 112 &jnc (&label("generic"));
108 &and ("edx",0xefffffff); 113 &xor ("edx","\$IA32CAP_MASK0_HT");
109 &cmp ("edi",0); 114 &cmp ("edi",0);
110 &je (&label("generic")); 115 &je (&label("generic"));
111 116
112 &or ("edx",0x10000000); 117 &or ("edx","\$IA32CAP_MASK0_HT");
113 &shr ("ebx",16); 118 &shr ("ebx",16);
114 &cmp (&LB("ebx"),1); 119 &cmp (&LB("ebx"),1); # see if cache is shared
115 &ja (&label("generic")); 120 &ja (&label("generic"));
116 &and ("edx",0xefffffff); # clear hyper-threading bit if not 121 &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not
117 122
118&set_label("generic"); 123&set_label("generic");
119 &and ("ebp",1<<11); # isolate AMD XOP flag 124 &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag
120 &and ("ecx",0xfffff7ff); # force 11th bit to 0 125 # force reserved bits to 0.
126 &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP");
121 &mov ("esi","edx"); 127 &mov ("esi","edx");
122 &or ("ebp","ecx"); # merge AMD XOP flag 128 &or ("ebp","ecx"); # merge AMD XOP flag
123 129
124 &bt ("ecx",27); # check OSXSAVE bit 130 &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit
125 &jnc (&label("clear_avx")); 131 &jnc (&label("clear_avx"));
126 &xor ("ecx","ecx"); 132 &xor ("ecx","ecx");
127 &data_byte(0x0f,0x01,0xd0); # xgetbv 133 &data_byte(0x0f,0x01,0xd0); # xgetbv
@@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
131 &cmp ("eax",2); 137 &cmp ("eax",2);
132 &je (&label("clear_avx")); 138 &je (&label("clear_avx"));
133&set_label("clear_xmm"); 139&set_label("clear_xmm");
134 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 140 # clear AESNI and PCLMULQDQ bits.
135 &and ("esi",0xfeffffff); # clear FXSR 141 &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)");
142 # clear FXSR.
143 &and ("esi","\$~IA32CAP_MASK0_FXSR");
136&set_label("clear_avx"); 144&set_label("clear_avx");
137 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 145 # clear AVX, FMA3 and AMD XOP bits.
146 &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)");
138&set_label("done"); 147&set_label("done");
139 &mov ("eax","esi"); 148 &mov ("eax","esi");
140 &mov ("edx","ebp"); 149 &mov ("edx","ebp");
@@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
143 152
144&external_label("OPENSSL_ia32cap_P"); 153&external_label("OPENSSL_ia32cap_P");
145 154
146&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 155&function_begin_B("OPENSSL_wipe_cpu","");
147 &xor ("eax","eax"); 156 &xor ("eax","eax");
148 &xor ("edx","edx"); 157 &xor ("edx","edx");
149 &picmeup("ecx","OPENSSL_ia32cap_P"); 158 &picmeup("ecx","OPENSSL_ia32cap_P");
150 &mov ("ecx",&DWP(0,"ecx")); 159 &mov ("ecx",&DWP(0,"ecx"));
151 &bt (&DWP(0,"ecx"),0); 160 &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU");
152 &jnc (&label("no_x87")); 161 &jnc (&label("no_x87"));
153 if ($sse2) { 162 if ($sse2) {
154 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 163 # Check SSE2 and FXSR bits.
155 &cmp ("ecx",1<<26|1<<24); 164 &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
165 &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)");
156 &jne (&label("no_sse2")); 166 &jne (&label("no_sse2"));
157 &pxor ("xmm0","xmm0"); 167 &pxor ("xmm0","xmm0");
158 &pxor ("xmm1","xmm1"); 168 &pxor ("xmm1","xmm1");