From 723502d9588ba0e1cc08af1b12654917da74d440 Mon Sep 17 00:00:00 2001 From: miod <> Date: Fri, 4 Nov 2016 17:30:30 +0000 Subject: Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by meaningful constants in a private header file, so that reviewers can actually get a chance to figure out what the code is attempting to do without knowing all cpuid bits. While there, turn it from an array of two 32-bit ints into a properly aligned 64-bit int. Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will now always use OPENSSL_cpu_caps() and check for the proper bits in the whole 64-bit word it returns. i386 tests and ok jsing@ --- src/lib/libcrypto/aes/asm/aes-586.pl | 10 +-- src/lib/libcrypto/aes/asm/aes-x86_64.pl | 6 +- src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | 7 +- src/lib/libcrypto/bn/asm/bn-586.pl | 12 ++-- src/lib/libcrypto/bn/asm/x86-gf2m.pl | 6 +- src/lib/libcrypto/bn/asm/x86-mont.pl | 2 +- src/lib/libcrypto/bn/asm/x86_64-gf2m.pl | 5 +- src/lib/libcrypto/cryptlib.c | 27 ++------ src/lib/libcrypto/cryptlib.h | 3 +- src/lib/libcrypto/engine/eng_aesni.c | 16 ++--- src/lib/libcrypto/evp/e_aes.c | 8 +-- src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c | 11 ++-- src/lib/libcrypto/evp/e_rc4_hmac_md5.c | 8 +-- src/lib/libcrypto/modes/gcm128.c | 19 ++++-- src/lib/libcrypto/perlasm/x86_64-xlate.pl | 4 +- src/lib/libcrypto/perlasm/x86asm.pl | 1 + src/lib/libcrypto/perlasm/x86gas.pl | 6 +- src/lib/libcrypto/rc4/asm/rc4-586.pl | 9 +-- src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | 9 +-- src/lib/libcrypto/sha/asm/sha1-586.pl | 10 +-- src/lib/libcrypto/sha/asm/sha1-x86_64.pl | 9 +-- src/lib/libcrypto/sha/asm/sha512-586.pl | 2 +- src/lib/libcrypto/whrlpool/wp_block.c | 19 ++++-- src/lib/libcrypto/x86_64cpuid.pl | 36 ++++++----- src/lib/libcrypto/x86_arch.h | 90 ++++++++++++++++++++++++++ src/lib/libcrypto/x86cpuid.pl | 56 +++++++++------- 26 files changed, 245 insertions(+), 146 deletions(-) create mode 100644 src/lib/libcrypto/x86_arch.h (limited to 'src/lib') diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index aab40e6f1c..3ba8a26eaa 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl @@ -1187,7 +1187,7 @@ sub enclast() &lea ($tbl,&DWP(2048+128,$tbl,$s1)); if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("x86")); &movq ("mm0",&QWP(0,$acc)); @@ -1976,7 +1976,7 @@ sub declast() &lea ($tbl,&DWP(2048+128,$tbl,$s1)); if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("x86")); &movq ("mm0",&QWP(0,$acc)); @@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &test ($s2,15); &jnz (&label("slow_way")); if (!$x86only) { - &bt (&DWP(0,$s0),28); # check for hyper-threading bit + &bt (&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit &jc (&label("slow_way")); } # pre-allocate aligned stack frame... @@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &jb (&label("slow_enc_tail")); if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit + &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("slow_enc_x86")); &movq ("mm0",&QWP(0,$key)); # load iv @@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds #--------------------------- SLOW DECRYPT ---------------------------# &set_label("slow_decrypt",16); if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit + &bt ($_tmp,"\$IA32CAP_BIT0_SSE"); # check for SSE bit &jnc (&label("slow_dec_loop_x86")); &set_label("slow_dec_loop_sse",4); diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl index f75e90ba87..c37fd55648 100755 --- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl @@ -1655,6 +1655,7 @@ $code.=<<___; .type AES_cbc_encrypt,\@function,6 .align 16 .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl asm_AES_cbc_encrypt .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: @@ -1684,7 +1685,7 @@ AES_cbc_encrypt: jb .Lcbc_slow_prologue test \$15,%rdx jnz .Lcbc_slow_prologue - bt \$28,%r10d + bt \$IA32CAP_BIT0_HT,%r10d jc .Lcbc_slow_prologue # allocate aligned stack frame... @@ -1944,7 +1945,7 @@ AES_cbc_encrypt: lea ($key,%rax),%rax mov %rax,$keyend - # pick Te4 copy which can't "overlap" with stack frame or key scdedule + # pick Te4 copy which can't "overlap" with stack frame or key schedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax @@ -2814,6 +2815,7 @@ ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; +print "#include \"x86_arch.h\"\n"; print $code; close STDOUT; diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index 39b504cbe5..bc6c8f3fc0 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl @@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; $code.=<<___; .text .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc,\@abi-omnipotent @@ -93,10 +94,10 @@ aesni_cbc_sha1_enc: mov OPENSSL_ia32cap_P+4(%rip),%r11d ___ $code.=<<___ if ($avx); - and \$`1<<28`,%r11d # mask AVX bit - and \$`1<<30`,%r10d # mask "Intel CPU" bit + and \$IA32CAP_MASK1_AVX,%r11d # mask AVX bit + and \$IA32CAP_MASK0_INTEL,%r10d # mask "Intel CPU" bit or %r11d,%r10d - cmp \$`1<<28|1<<30`,%r10d + cmp \$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d je aesni_cbc_sha1_enc_avx ___ $code.=<<___; diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 332ef3e91d..c4e2baa6c5 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl @@ -25,7 +25,7 @@ sub bn_mul_add_words { local($name)=@_; - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,""); $r="eax"; $a="edx"; @@ -33,7 +33,7 @@ sub bn_mul_add_words if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); + &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("maw_non_sse2")); &mov($r,&wparam(0)); @@ -211,7 +211,7 @@ sub bn_mul_words { local($name)=@_; - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,""); $r="eax"; $a="edx"; @@ -219,7 +219,7 @@ sub bn_mul_words if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); + &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("mw_non_sse2")); &mov($r,&wparam(0)); @@ -322,7 +322,7 @@ sub bn_sqr_words { local($name)=@_; - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,""); $r="eax"; $a="edx"; @@ -330,7 +330,7 @@ sub bn_sqr_words if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); + &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("sqr_non_sse2")); &mov($r,&wparam(0)); diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 808a1e5969..97d9136260 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl @@ -203,12 +203,12 @@ if (!$x86only) { &picmeup("edx","OPENSSL_ia32cap_P"); &mov ("eax",&DWP(0,"edx")); &mov ("edx",&DWP(4,"edx")); - &test ("eax",1<<23); # check MMX bit + &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit &jz (&label("ialu")); if ($sse2) { - &test ("eax",1<<24); # check FXSR bit + &test ("eax","\$IA32CAP_MASK0_FXSR"); # check FXSR bit &jz (&label("mmx")); - &test ("edx",1<<1); # check PCLMULQDQ bit + &test ("edx","\$IA32CAP_MASK1_PCLMUL"); # check PCLMULQDQ bit &jz (&label("mmx")); &movups ("xmm0",&QWP(8,"esp")); diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index e8f6b05084..a0bdd5787e 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl @@ -114,7 +114,7 @@ $temp="mm6"; $mask="mm7"; &picmeup("eax","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"eax"),26); + &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("non_sse2")); &mov ("eax",-1); diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl index 8e45c7479b..3ecb425dad 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl @@ -163,12 +163,13 @@ ___ $code.=<<___; .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,\@abi-omnipotent .align 16 bn_GF2m_mul_2x2: - mov OPENSSL_ia32cap_P(%rip),%rax - bt \$33,%rax + mov OPENSSL_ia32cap_P+4(%rip),%eax + bt \$IA32CAP_BIT1_PCLMUL,%eax jnc .Lvanilla_mul_2x2 movd $a1,%xmm0 diff --git a/src/lib/libcrypto/cryptlib.c b/src/lib/libcrypto/cryptlib.c index fa091fbaea..8dec9caa93 100644 --- a/src/lib/libcrypto/cryptlib.c +++ b/src/lib/libcrypto/cryptlib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cryptlib.c,v 1.38 2016/11/04 13:56:04 miod Exp $ */ +/* $OpenBSD: cryptlib.c,v 1.39 2016/11/04 17:30:30 miod Exp $ */ /* ==================================================================== * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. * @@ -627,47 +627,30 @@ CRYPTO_get_lock_name(int type) defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -unsigned int OPENSSL_ia32cap_P[2]; +uint64_t OPENSSL_ia32cap_P; uint64_t OPENSSL_cpu_caps(void) { - return *(uint64_t *)OPENSSL_ia32cap_P; + return OPENSSL_ia32cap_P; } #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) #define OPENSSL_CPUID_SETUP -typedef unsigned long long IA32CAP; void OPENSSL_cpuid_setup(void) { static int trigger = 0; - IA32CAP OPENSSL_ia32_cpuid(void); - IA32CAP vec; + uint64_t OPENSSL_ia32_cpuid(void); if (trigger) return; trigger = 1; - - vec = OPENSSL_ia32_cpuid(); - - /* - * |(1<<10) sets a reserved bit to signal that variable - * was initialized already... This is to avoid interference - * with cpuid snippets in ELF .init segment. - */ - OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10); - OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); + OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid(); } #endif #else -unsigned long * -OPENSSL_ia32cap_loc(void) -{ - return NULL; -} - uint64_t OPENSSL_cpu_caps(void) { diff --git a/src/lib/libcrypto/cryptlib.h b/src/lib/libcrypto/cryptlib.h index ad679dfa8d..d44738bf3c 100644 --- a/src/lib/libcrypto/cryptlib.h +++ b/src/lib/libcrypto/cryptlib.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cryptlib.h,v 1.24 2014/07/11 08:44:47 jsing Exp $ */ +/* $OpenBSD: cryptlib.h,v 1.25 2016/11/04 17:30:30 miod Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -73,7 +73,6 @@ extern "C" { #define X509_CERT_FILE_EVP "SSL_CERT_FILE" void OPENSSL_cpuid_setup(void); -extern unsigned int OPENSSL_ia32cap_P[]; #ifdef __cplusplus } diff --git a/src/lib/libcrypto/engine/eng_aesni.c b/src/lib/libcrypto/engine/eng_aesni.c index 5f9a36236a..92794f6086 100644 --- a/src/lib/libcrypto/engine/eng_aesni.c +++ b/src/lib/libcrypto/engine/eng_aesni.c @@ -1,4 +1,4 @@ -/* $OpenBSD: eng_aesni.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */ +/* $OpenBSD: eng_aesni.c,v 1.9 2016/11/04 17:30:30 miod Exp $ */ /* * Support for Intel AES-NI intruction set * Author: Huang Ying @@ -93,10 +93,11 @@ defined(_M_AMD64) || defined(_M_X64) || \ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) && !defined(__i386__) #define COMPILE_HW_AESNI +#include "x86_arch.h" #endif -static ENGINE *ENGINE_aesni (void); +static ENGINE *ENGINE_aesni(void); -void ENGINE_load_aesni (void) +void ENGINE_load_aesni(void) { /* On non-x86 CPUs it just returns. */ #ifdef COMPILE_HW_AESNI @@ -302,20 +303,13 @@ aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out, } /* ===== Engine "management" functions ===== */ -typedef unsigned long long IA32CAP; - /* Prepare the ENGINE structure for registration */ static int aesni_bind_helper(ENGINE *e) { int engage; - if (sizeof(OPENSSL_ia32cap_P) > 4) { - engage = ((IA32CAP)OPENSSL_ia32cap_P >> 57) & 1; - } else { - IA32CAP OPENSSL_ia32_cpuid(void); - engage = (OPENSSL_ia32_cpuid() >> 57) & 1; - } + engage = (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) != 0; /* Register everything or return with an error */ if (!ENGINE_set_id(e, aesni_id) || diff --git a/src/lib/libcrypto/evp/e_aes.c b/src/lib/libcrypto/evp/e_aes.c index 25199dca36..b20543a90c 100644 --- a/src/lib/libcrypto/evp/e_aes.c +++ b/src/lib/libcrypto/evp/e_aes.c @@ -1,4 +1,4 @@ -/* $OpenBSD: e_aes.c,v 1.30 2016/11/04 13:56:05 miod Exp $ */ +/* $OpenBSD: e_aes.c,v 1.31 2016/11/04 17:30:30 miod Exp $ */ /* ==================================================================== * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. * @@ -150,10 +150,10 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len, defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[]; +#include "x86_arch.h" #ifdef VPAES_ASM -#define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +#define VPAES_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_SSSE3) #endif #ifdef BSAES_ASM #define BSAES_CAPABLE VPAES_CAPABLE @@ -161,7 +161,7 @@ extern unsigned int OPENSSL_ia32cap_P[]; /* * AES-NI section */ -#define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) +#define AESNI_CAPABLE (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); diff --git a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c index 8574823aed..3f82cf5967 100644 --- a/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c +++ b/src/lib/libcrypto/evp/e_aes_cbc_hmac_sha1.c @@ -1,4 +1,4 @@ -/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.12 2016/05/04 15:01:33 tedu Exp $ */ +/* $OpenBSD: e_aes_cbc_hmac_sha1.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ /* ==================================================================== * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. * @@ -87,13 +87,12 @@ typedef struct { defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) +#include "x86_arch.h" + #if defined(__GNUC__) && __GNUC__>=2 # define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) #endif -extern unsigned int OPENSSL_ia32cap_P[2]; -#define AESNI_CAPABLE (1<<(57-32)) - int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); int aesni_set_decrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); @@ -578,14 +577,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = { const EVP_CIPHER * EVP_aes_128_cbc_hmac_sha1(void) { - return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? + return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? &aesni_128_cbc_hmac_sha1_cipher : NULL; } const EVP_CIPHER * EVP_aes_256_cbc_hmac_sha1(void) { - return OPENSSL_ia32cap_P[1] & AESNI_CAPABLE ? + return (OPENSSL_cpu_caps() & CPUCAP_MASK_AESNI) ? &aesni_256_cbc_hmac_sha1_cipher : NULL; } #else diff --git a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c index 1f085af403..39527cafe6 100644 --- a/src/lib/libcrypto/evp/e_rc4_hmac_md5.c +++ b/src/lib/libcrypto/evp/e_rc4_hmac_md5.c @@ -1,4 +1,4 @@ -/* $OpenBSD: e_rc4_hmac_md5.c,v 1.5 2014/08/11 13:29:43 bcook Exp $ */ +/* $OpenBSD: e_rc4_hmac_md5.c,v 1.6 2016/11/04 17:30:30 miod Exp $ */ /* ==================================================================== * Copyright (c) 2011 The OpenSSL Project. All rights reserved. * @@ -105,6 +105,7 @@ rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *inkey, defined(__INTEL__) ) && \ !(defined(__APPLE__) && defined(__MACH__)) #define STITCHED_CALL +#include "x86_arch.h" #endif #if !defined(STITCHED_CALL) @@ -122,7 +123,6 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, md5_off = MD5_CBLOCK - key->md.num, blocks; unsigned int l; - extern unsigned int OPENSSL_ia32cap_P[]; #endif size_t plen = key->payload_length; @@ -139,7 +139,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (plen > md5_off && (blocks = (plen - md5_off) / MD5_CBLOCK) && - (OPENSSL_ia32cap_P[0]&(1 << 20)) == 0) { + (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { MD5_Update(&key->md, in, md5_off); RC4(&key->ks, rc4_off, in, out); @@ -187,7 +187,7 @@ rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, rc4_off += MD5_CBLOCK; if (len > rc4_off && (blocks = (len - rc4_off) / MD5_CBLOCK) && - (OPENSSL_ia32cap_P[0] & (1 << 20)) == 0) { + (OPENSSL_cpu_caps() & CPUCAP_MASK_INTELP4) == 0) { RC4(&key->ks, rc4_off, in, out); MD5_Update(&key->md, out, md5_off); diff --git a/src/lib/libcrypto/modes/gcm128.c b/src/lib/libcrypto/modes/gcm128.c index 6f8a8dd7f4..95ee755f83 100644 --- a/src/lib/libcrypto/modes/gcm128.c +++ b/src/lib/libcrypto/modes/gcm128.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gcm128.c,v 1.14 2016/11/04 13:56:05 miod Exp $ */ +/* $OpenBSD: gcm128.c,v 1.15 2016/11/04 17:30:30 miod Exp $ */ /* ==================================================================== * Copyright (c) 2010 The OpenSSL Project. All rights reserved. * @@ -637,13 +637,19 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) #endif +#if defined(GHASH_ASM) && \ + (defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) +#include "x86_arch.h" +#endif + #if TABLE_BITS==4 && defined(GHASH_ASM) # if (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) # define GHASH_ASM_X86_OR_64 # define GCM_FUNCREF_4BIT -extern unsigned int OPENSSL_ia32cap_P[2]; void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); @@ -705,8 +711,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) #elif TABLE_BITS==4 # if defined(GHASH_ASM_X86_OR_64) # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) - if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ - OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ + /* check FXSR and PCLMULQDQ bits */ + if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == + (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { gcm_init_clmul(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_clmul; ctx->ghash = gcm_ghash_clmul; @@ -716,9 +723,9 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) gcm_init_4bit(ctx->Htable,ctx->H.u); # if defined(GHASH_ASM_X86) /* x86 only */ # if defined(OPENSSL_IA32_SSE2) - if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ + if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ # else - if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ + if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */ # endif ctx->gmult = gcm_gmult_4bit_mmx; ctx->ghash = gcm_ghash_4bit_mmx; diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl index 4bd53da33d..a8393d2730 100755 --- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl +++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl @@ -393,7 +393,7 @@ my %globals; } } } -{ package expr; # pick up expressioins +{ package expr; # pick up expressions sub re { my $self = shift; # single instance is enough... local *line = shift; @@ -777,6 +777,8 @@ ___ OPTION DOTNAME ___ } +print "#include \"x86_arch.h\"\n"; + while($line=<>) { chomp($line); diff --git a/src/lib/libcrypto/perlasm/x86asm.pl b/src/lib/libcrypto/perlasm/x86asm.pl index 5916ea4f89..e039382e00 100644 --- a/src/lib/libcrypto/perlasm/x86asm.pl +++ b/src/lib/libcrypto/perlasm/x86asm.pl @@ -248,6 +248,7 @@ EOF $pic=0; for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } + ::emitraw("#include \"x86_arch.h\"\n"); ::emitraw("#include \n") if $openbsd; $filename =~ s/\.pl$//; &file($filename); diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl index d4baea514b..84d24edbbd 100644 --- a/src/lib/libcrypto/perlasm/x86gas.pl +++ b/src/lib/libcrypto/perlasm/x86gas.pl @@ -157,10 +157,8 @@ sub ::file_end } } if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; - if ($::macosx) { push (@out,"$tmp,2\n"); } - elsif ($::elf) { push (@out,"$tmp,4\n"); } - else { push (@out,"$tmp\n"); } + push (@out, ".extern\t${nmdecor}OPENSSL_ia32cap_P\n"); + push (@out, ".hidden\t${nmdecor}OPENSSL_ia32cap_P\n"); } push(@out,$initseg) if ($initseg); } diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index 84f1a798cb..03f0cff467 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl @@ -189,7 +189,8 @@ if ($alt=0) { &jz (&label("go4loop4")); &picmeup($out,"OPENSSL_ia32cap_P"); - &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] + # check SSE2 bit [could have been MMX] + &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("go4loop4")); &mov ($out,&wparam(3)) if (!$alt); @@ -312,7 +313,7 @@ $idx="edx"; &xor ("eax","eax"); &mov (&DWP(-4,$out),$idi); # borrow key->y - &bt (&DWP(0,$idx),20); # check for bit#20 + &bt (&DWP(0,$idx),"\$IA32CAP_BIT0_INTELP4"); &jc (&label("c1stloop")); &set_label("w1stloop",16); @@ -388,9 +389,9 @@ $idx="edx"; &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); &picmeup("edx","OPENSSL_ia32cap_P"); &mov ("edx",&DWP(0,"edx")); - &bt ("edx",20); + &bt ("edx","\$IA32CAP_BIT0_INTELP4"); &jc (&label("1xchar")); - &bt ("edx",26); + &bt ("edx","\$IA32CAP_BIT0_SSE2"); &jnc (&label("ret")); &add ("eax",25); &ret (); diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl index 197749dda7..2135b38ef8 100755 --- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl @@ -122,6 +122,7 @@ $out="%rcx"; # arg4 $code=<<___; .text .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl RC4 .type RC4,\@function,4 @@ -164,7 +165,7 @@ $code.=<<___; movl ($dat,$XX[0],4),$TX[0]#d test \$-16,$len jz .Lloop1 - bt \$30,%r8d # Intel CPU? + bt \$IA32CAP_BIT0_INTEL,%r8d # Intel CPU? jc .Lintel and \$7,$TX[1] lea 1($XX[0]),$XX[1] @@ -442,7 +443,7 @@ RC4_set_key: xor %r11,%r11 mov OPENSSL_ia32cap_P(%rip),$idx#d - bt \$20,$idx#d # RC4_CHAR? + bt \$IA32CAP_BIT0_INTELP4,$idx#d # RC4_CHAR? jc .Lc1stloop jmp .Lw1stloop @@ -506,9 +507,9 @@ RC4_set_key: RC4_options: lea .Lopts(%rip),%rax mov OPENSSL_ia32cap_P(%rip),%edx - bt \$20,%edx + bt \$IA32CAP_BIT0_INTELP4,%edx jc .L8xchar - bt \$30,%edx + bt \$IA32CAP_BIT0_INTEL,%edx jnc .Ldone add \$25,%rax ret diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl index 6fbea34d78..d29ed84706 100644 --- a/src/lib/libcrypto/sha/asm/sha1-586.pl +++ b/src/lib/libcrypto/sha/asm/sha1-586.pl @@ -303,15 +303,15 @@ if ($xmm) { &mov ($A,&DWP(0,$T)); &mov ($D,&DWP(4,$T)); - &test ($D,1<<9); # check SSSE3 bit + &test ($D,"\$IA32CAP_MASK1_SSSE3"); # check SSSE3 bit &jz (&label("x86")); - &test ($A,1<<24); # check FXSR bit + &test ($A,"\$IA32CAP_MASK0_FXSR"); # check FXSR bit &jz (&label("x86")); if ($ymm) { - &and ($D,1<<28); # mask AVX bit - &and ($A,1<<30); # mask "Intel CPU" bit + &and ($D,"\$IA32CAP_MASK1_AVX"); # mask AVX bit + &and ($A,"\$IA32CAP_MASK0_INTEL"); # mask "Intel CPU" bit &or ($A,$D); - &cmp ($A,1<<28|1<<30); + &cmp ($A,"\$(IA32CAP_MASK1_AVX | IA32CAP_MASK0_INTEL)"); &je (&label("avx_shortcut")); } &jmp (&label("ssse3_shortcut")); diff --git a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl index f15c7ec39b..147d21570b 100755 --- a/src/lib/libcrypto/sha/asm/sha1-x86_64.pl +++ b/src/lib/libcrypto/sha/asm/sha1-x86_64.pl @@ -216,6 +216,7 @@ unshift(@xi,pop(@xi)); $code.=<<___; .text .extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P .globl sha1_block_data_order .type sha1_block_data_order,\@function,3 @@ -223,14 +224,14 @@ $code.=<<___; sha1_block_data_order: mov OPENSSL_ia32cap_P+0(%rip),%r9d mov OPENSSL_ia32cap_P+4(%rip),%r8d - test \$`1<<9`,%r8d # check SSSE3 bit + test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit jz .Lialu ___ $code.=<<___ if ($avx); - and \$`1<<28`,%r8d # mask AVX bit - and \$`1<<30`,%r9d # mask "Intel CPU" bit + and \$IA32CAP_MASK1_AVX,%r8d # mask AVX bit + and \$IA32CAP_MASK0_INTEL,%r9d # mask "Intel CPU" bit or %r9d,%r8d - cmp \$`1<<28|1<<30`,%r8d + cmp \$(IA32CAP_MASK0_INTEL | IA32CAP_MASK1_AVX),%r8d je _avx_shortcut ___ $code.=<<___; diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl index 7eab6a5b88..163361ebe9 100644 --- a/src/lib/libcrypto/sha/asm/sha512-586.pl +++ b/src/lib/libcrypto/sha/asm/sha512-586.pl @@ -284,7 +284,7 @@ sub BODY_00_15_x86 { if ($sse2) { &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); - &bt (&DWP(0,"edx"),26); + &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("loop_x86")); # load ctx->h[0-7] diff --git a/src/lib/libcrypto/whrlpool/wp_block.c b/src/lib/libcrypto/whrlpool/wp_block.c index d8c1b89ba3..1e00a01330 100644 --- a/src/lib/libcrypto/whrlpool/wp_block.c +++ b/src/lib/libcrypto/whrlpool/wp_block.c @@ -1,4 +1,4 @@ -/* $OpenBSD: wp_block.c,v 1.12 2016/09/04 14:06:46 jsing Exp $ */ +/* $OpenBSD: wp_block.c,v 1.13 2016/11/04 17:30:30 miod Exp $ */ /** * The Whirlpool hashing function. * @@ -36,10 +36,12 @@ * */ -#include "wp_locl.h" #include +#include #include +#include "wp_locl.h" + typedef unsigned char u8; #if defined(_LP64) typedef unsigned long u64; @@ -57,12 +59,15 @@ typedef unsigned long long u64; # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX CPUs this is actually faster! */ # endif -# define GO_FOR_MMX(ctx,inp,num) do { \ - extern unsigned int OPENSSL_ia32cap_P[]; \ +#include "x86_arch.h" +# define GO_FOR_MMX(ctx,inp,num) \ +do { \ void whirlpool_block_mmx(void *,const void *,size_t); \ - if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \ - whirlpool_block_mmx(ctx->H.c,inp,num); return; \ - } while (0) + if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0) \ + break; \ + whirlpool_block_mmx(ctx->H.c,inp,num); \ + return; \ +} while (0) # endif #elif defined(__arm__) # define SMALL_REGISTER_BANK diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index b36d3f7dc5..6558dedb6b 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl @@ -20,8 +20,8 @@ print<<___; .section .init call OPENSSL_cpuid_setup +.extern OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8,4 .text @@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid: mov %eax,%r10d mov \$0x80000001,%eax cpuid - or %ecx,%r9d - and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 + and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit + or \$1,%r9d # make sure %r9d is not zero cmp \$0x80000008,%r10d jb .Lintel @@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid: mov \$1,%eax cpuid - bt \$28,%edx # test hyper-threading bit + bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit jnc .Lgeneric shr \$16,%ebx # number of logical processors cmp %r10b,%bl ja .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) + xor \$IA32CAP_MASK0_HT,%edx jmp .Lgeneric .Lintel: @@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid: .Lnocacheinfo: mov \$1,%eax cpuid - and \$0xbfefffff,%edx # force reserved bits to 0 + # force reserved bits to 0 + and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx cmp \$0,%r9d jne .Lnotintel - or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs + # set reserved bit#30 on Intel CPUs + or \$IA32CAP_MASK0_INTEL,%edx and \$15,%ah cmp \$15,%ah # examine Family ID jne .Lnotintel - or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR + # set reserved bit#20 to engage RC4_CHAR + or \$IA32CAP_MASK0_INTELP4,%edx .Lnotintel: - bt \$28,%edx # test hyper-threading bit + bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit jnc .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) + xor \$IA32CAP_MASK0_HT,%edx cmp \$0,%r10d je .Lgeneric - or \$0x10000000,%edx # 1<<28 + or \$IA32CAP_MASK0_HT,%edx shr \$16,%ebx cmp \$1,%bl # see if cache is shared ja .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) + xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not + .Lgeneric: - and \$0x00000800,%r9d # isolate AMD XOP flag - and \$0xfffff7ff,%ecx + and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag + and \$(~IA32CAP_MASK1_AMD_XOP),%ecx or %ecx,%r9d # merge AMD XOP flag mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx - bt \$27,%r9d # check OSXSAVE bit + bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit jnc .Lclear_avx xor %ecx,%ecx # XCR0 .byte 0x0f,0x01,0xd0 # xgetbv @@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid: cmp \$6,%eax je .Ldone .Lclear_avx: - mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) + mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax and %eax,%r9d # clear AVX, FMA and AMD XOP bits .Ldone: shl \$32,%r9 diff --git a/src/lib/libcrypto/x86_arch.h b/src/lib/libcrypto/x86_arch.h new file mode 100644 index 0000000000..5b2cf97546 --- /dev/null +++ b/src/lib/libcrypto/x86_arch.h @@ -0,0 +1,90 @@ +/* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */ +/* + * Copyright (c) 2016 Miodrag Vallat. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The knowledge of the layout of OPENSSL_ia32cap_P is internal to libcrypto + * (and, to some extent, to libssl), and may change in the future without + * notice. + */ + +/* + * OPENSSL_ia32cap_P is computed at runtime by OPENSSL_ia32_cpuid(). + * + * On processors which lack the cpuid instruction, the value is always + * zero (this only matters on 32-bit processors, of course). + * + * On processors which support the cpuid instruction, after running + * "cpuid 1", the value of %edx is written to the low word of OPENSSL_ia32cap_P, + * and the value of %ecx is written to its high word. + * + * Further processing is done to set or clear specific bits, depending + * upon the exact processor type. + * + * Assembly routines usually address OPENSSL_ia32cap_P as two 32-bit words, + * hence two sets of bit numbers and masks. OPENSSL_cpu_caps() returns the + * complete 64-bit word. + */ + +/* bit numbers for the low word */ +#define IA32CAP_BIT0_FPU 0 +#define IA32CAP_BIT0_MMX 23 +#define IA32CAP_BIT0_FXSR 24 +#define IA32CAP_BIT0_SSE 25 +#define IA32CAP_BIT0_SSE2 26 +#define IA32CAP_BIT0_HT 28 + +/* the following bits are not obtained from cpuid */ +#define IA32CAP_BIT0_INTELP4 20 +#define IA32CAP_BIT0_INTEL 30 + +/* bit numbers for the high word */ +#define IA32CAP_BIT1_PCLMUL 1 +#define IA32CAP_BIT1_SSSE3 9 +#define IA32CAP_BIT1_FMA3 12 +#define IA32CAP_BIT1_AESNI 25 +#define IA32CAP_BIT1_OSXSAVE 27 +#define IA32CAP_BIT1_AVX 28 + +#define IA32CAP_BIT1_AMD_XOP 11 + +/* bit masks for the low word */ +#define IA32CAP_MASK0_MMX (1 << IA32CAP_BIT0_MMX) +#define IA32CAP_MASK0_FXSR (1 << IA32CAP_BIT0_FXSR) +#define IA32CAP_MASK0_SSE (1 << IA32CAP_BIT0_SSE) +#define IA32CAP_MASK0_SSE2 (1 << IA32CAP_BIT0_SSE2) +#define IA32CAP_MASK0_HT (1 << IA32CAP_BIT0_HT) + +#define IA32CAP_MASK0_INTELP4 (1 << IA32CAP_BIT0_INTELP4) +#define IA32CAP_MASK0_INTEL (1 << IA32CAP_BIT0_INTEL) + +/* bit masks for the high word */ +#define IA32CAP_MASK1_PCLMUL (1 << IA32CAP_BIT1_PCLMUL) +#define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) +#define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) +#define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) +#define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) + +#define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) + +/* bit masks for OPENSSL_cpu_caps() */ +#define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX +#define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR +#define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE +#define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 +#define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) +#define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) +#define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) diff --git a/src/lib/libcrypto/x86cpuid.pl b/src/lib/libcrypto/x86cpuid.pl index 7918629f64..8b9570fc72 100644 --- a/src/lib/libcrypto/x86cpuid.pl +++ b/src/lib/libcrypto/x86cpuid.pl @@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("esi","eax"); &mov ("eax",0x80000001); &cpuid (); - &or ("ebp","ecx"); - &and ("ebp",1<<11|1); # isolate XOP bit + &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit + &or ("ecx",1); # make sure ecx is not zero + &mov ("ebp","ecx"); + &cmp ("esi",0x80000008); &jb (&label("intel")); @@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("eax",1); &xor ("ecx","ecx"); &cpuid (); - &bt ("edx",28); + &bt ("edx","\$IA32CAP_BIT0_HT"); &jnc (&label("generic")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit + &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit &jmp (&label("generic")); &set_label("intel"); @@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("eax",1); &xor ("ecx","ecx"); &cpuid (); - &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 + # force reserved bits to 0. + &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)"); &cmp ("ebp",0); &jne (&label("notintel")); - &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs - &and (&HB("eax"),15); # familiy ID + # set reserved bit#30 on Intel CPUs + &or ("edx","\$IA32CAP_MASK0_INTEL"); + &and (&HB("eax"),15); # family ID &cmp (&HB("eax"),15); # P4? &jne (&label("notintel")); - &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR + # set reserved bit#20 to engage RC4_CHAR + &or ("edx","\$IA32CAP_MASK0_INTELP4"); &set_label("notintel"); - &bt ("edx",28); # test hyper-threading bit + &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit &jnc (&label("generic")); - &and ("edx",0xefffffff); + &xor ("edx","\$IA32CAP_MASK0_HT"); &cmp ("edi",0); &je (&label("generic")); - &or ("edx",0x10000000); + &or ("edx","\$IA32CAP_MASK0_HT"); &shr ("ebx",16); - &cmp (&LB("ebx"),1); + &cmp (&LB("ebx"),1); # see if cache is shared &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit if not + &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not &set_label("generic"); - &and ("ebp",1<<11); # isolate AMD XOP flag - &and ("ecx",0xfffff7ff); # force 11th bit to 0 + &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag + # force reserved bits to 0. + &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP"); &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag - &bt ("ecx",27); # check OSXSAVE bit + &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit &jnc (&label("clear_avx")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv @@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &cmp ("eax",2); &je (&label("clear_avx")); &set_label("clear_xmm"); - &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits - &and ("esi",0xfeffffff); # clear FXSR + # clear AESNI and PCLMULQDQ bits. + &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)"); + # clear FXSR. + &and ("esi","\$~IA32CAP_MASK0_FXSR"); &set_label("clear_avx"); - &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits + # clear AVX, FMA3 and AMD XOP bits. + &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)"); &set_label("done"); &mov ("eax","esi"); &mov ("edx","ebp"); @@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &external_label("OPENSSL_ia32cap_P"); -&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); +&function_begin_B("OPENSSL_wipe_cpu",""); &xor ("eax","eax"); &xor ("edx","edx"); &picmeup("ecx","OPENSSL_ia32cap_P"); &mov ("ecx",&DWP(0,"ecx")); - &bt (&DWP(0,"ecx"),0); + &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU"); &jnc (&label("no_x87")); if ($sse2) { - &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits - &cmp ("ecx",1<<26|1<<24); + # Check SSE2 and FXSR bits. + &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); + &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); &jne (&label("no_sse2")); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); -- cgit v1.2.3-55-g6feb