From 1a12fc8399638223feca8f853e2ac2cc22eeb471 Mon Sep 17 00:00:00 2001
From: miod <>
Date: Fri, 4 Nov 2016 17:30:30 +0000
Subject: Replace all uses of magic numbers when operating on OPENSSL_ia32_P[]
 by meaningful constants in a private header file, so that reviewers can
 actually get a chance to figure out what the code is attempting to do without
 knowing all cpuid bits.

While there, turn it from an array of two 32-bit ints into a properly aligned
64-bit int.

Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will
now always use OPENSSL_cpu_caps() and check for the proper bits in the
whole 64-bit word it returns.

i386 tests and ok jsing@
---
 src/lib/libcrypto/aes/asm/aes-586.pl           | 10 +++++-----
 src/lib/libcrypto/aes/asm/aes-x86_64.pl        |  6 ++++--
 src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl |  7 ++++---
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'src/lib/libcrypto/aes')

diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
index aab40e6f1c..3ba8a26eaa 100644
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -1187,7 +1187,7 @@ sub enclast()
 	&lea	($tbl,&DWP(2048+128,$tbl,$s1));
 
 					if (!$x86only) {
-	&bt	(&DWP(0,$s0),25);	# check for SSE bit
+	&bt	(&DWP(0,$s0),"\$IA32CAP_BIT0_SSE");	# check for SSE bit
 	&jnc	(&label("x86"));
 
 	&movq	("mm0",&QWP(0,$acc));
@@ -1976,7 +1976,7 @@ sub declast()
 	&lea	($tbl,&DWP(2048+128,$tbl,$s1));
 
 					if (!$x86only) {
-	&bt	(&DWP(0,$s0),25);	# check for SSE bit
+	&bt	(&DWP(0,$s0),"\$IA32CAP_BIT0_SSE");	# check for SSE bit
 	&jnc	(&label("x86"));
 
 	&movq	("mm0",&QWP(0,$acc));
@@ -2054,7 +2054,7 @@ my $mark=&DWP(76+240,"esp");	# copy of aes_key->rounds
 	&test	($s2,15);
 	&jnz	(&label("slow_way"));
 					if (!$x86only) {
-	&bt	(&DWP(0,$s0),28);	# check for hyper-threading bit
+	&bt	(&DWP(0,$s0),"\$IA32CAP_BIT0_HT"); # check for hyper-threading bit
 	&jc	(&label("slow_way"));
 					}
 	# pre-allocate aligned stack frame...
@@ -2364,7 +2364,7 @@ my $mark=&DWP(76+240,"esp");	# copy of aes_key->rounds
 	&jb	(&label("slow_enc_tail"));
 
 					if (!$x86only) {
-	&bt	($_tmp,25);		# check for SSE bit
+	&bt	($_tmp,"\$IA32CAP_BIT0_SSE");	# check for SSE bit
 	&jnc	(&label("slow_enc_x86"));
 
 	&movq	("mm0",&QWP(0,$key));	# load iv
@@ -2479,7 +2479,7 @@ my $mark=&DWP(76+240,"esp");	# copy of aes_key->rounds
 #--------------------------- SLOW DECRYPT ---------------------------#
 &set_label("slow_decrypt",16);
 					if (!$x86only) {
-	&bt	($_tmp,25);		# check for SSE bit
+	&bt	($_tmp,"\$IA32CAP_BIT0_SSE");	# check for SSE bit
 	&jnc	(&label("slow_dec_loop_x86"));
 
 	&set_label("slow_dec_loop_sse",4);
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
index f75e90ba87..c37fd55648 100755
--- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
@@ -1655,6 +1655,7 @@ $code.=<<___;
 .type	AES_cbc_encrypt,\@function,6
 .align	16
 .extern	OPENSSL_ia32cap_P
+.hidden	OPENSSL_ia32cap_P
 .globl	asm_AES_cbc_encrypt
 .hidden	asm_AES_cbc_encrypt
 asm_AES_cbc_encrypt:
@@ -1684,7 +1685,7 @@ AES_cbc_encrypt:
 	jb	.Lcbc_slow_prologue
 	test	\$15,%rdx
 	jnz	.Lcbc_slow_prologue
-	bt	\$28,%r10d
+	bt	\$IA32CAP_BIT0_HT,%r10d
 	jc	.Lcbc_slow_prologue
 
 	# allocate aligned stack frame...
@@ -1944,7 +1945,7 @@ AES_cbc_encrypt:
 	lea	($key,%rax),%rax
 	mov	%rax,$keyend
 
-	# pick Te4 copy which can't "overlap" with stack frame or key scdedule
+	# pick Te4 copy which can't "overlap" with stack frame or key schedule
 	lea	2048($sbox),$sbox
 	lea	768-8(%rsp),%rax
 	sub	$sbox,%rax
@@ -2814,6 +2815,7 @@ ___
 
 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
 
+print "#include \"x86_arch.h\"\n";
 print $code;
 
 close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index 39b504cbe5..bc6c8f3fc0 100644
--- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -83,6 +83,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
 $code.=<<___;
 .text
 .extern	OPENSSL_ia32cap_P
+.hidden	OPENSSL_ia32cap_P
 
 .globl	aesni_cbc_sha1_enc
 .type	aesni_cbc_sha1_enc,\@abi-omnipotent
@@ -93,10 +94,10 @@ aesni_cbc_sha1_enc:
 	mov	OPENSSL_ia32cap_P+4(%rip),%r11d
 ___
 $code.=<<___ if ($avx);
-	and	\$`1<<28`,%r11d		# mask AVX bit
-	and	\$`1<<30`,%r10d		# mask "Intel CPU" bit
+	and	\$IA32CAP_MASK1_AVX,%r11d	# mask AVX bit
+	and	\$IA32CAP_MASK0_INTEL,%r10d	# mask "Intel CPU" bit
 	or	%r11d,%r10d
-	cmp	\$`1<<28|1<<30`,%r10d
+	cmp	\$(IA32CAP_MASK1_AVX|IA32CAP_MASK0_INTEL),%r10d
 	je	aesni_cbc_sha1_enc_avx
 ___
 $code.=<<___;
-- 
cgit v1.2.3-55-g6feb