summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbcook <>2015-09-11 12:17:57 +0000
committerbcook <>2015-09-11 12:17:57 +0000
commit656ef1e2adb75d20a81b8763a2088b2c8e6c3058 (patch)
tree03ee90a380bddd9dac65bed24366a6dc8e0abd01 /src
parent52f5d74dfdc68d06be5b27e3df8b2fbb6387acf6 (diff)
downloadopenbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.tar.gz
openbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.tar.bz2
openbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.zip
Fixup inter-bank movq/movd operations, emit bytes for pclmulqdq again.
Fixes builds gcc + Apple's assembler, working on reenabling builds with older OpenBSD releases. based on OpenSSL commit: https://git.openssl.org/?p=openssl.git;a=commitdiff;h=902b30df193afc3417a96ba72a81ed390bd50de3 ok miod@
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gf2m.pl12
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont5.pl12
-rwxr-xr-xsrc/lib/libcrypto/perlasm/x86_64-xlate.pl16
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl12
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl12
-rwxr-xr-xsrc/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl16
6 files changed, 54 insertions, 26 deletions
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
index 226c66c35e..8e45c7479b 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -143,12 +143,12 @@ $code.=<<___;
143 mov (%rsp,$i1,8),$t1 143 mov (%rsp,$i1,8),$t1
144 mov $t1,$t0 144 mov $t1,$t0
145 shl \$`8*$n-4`,$t1 145 shl \$`8*$n-4`,$t1
146 movq $R,$i0 146 movd $R,$i0
147 shr \$`64-(8*$n-4)`,$t0 147 shr \$`64-(8*$n-4)`,$t0
148 xor $t1,$lo 148 xor $t1,$lo
149 psrldq \$8,$R 149 psrldq \$8,$R
150 xor $t0,$hi 150 xor $t0,$hi
151 movq $R,$i1 151 movd $R,$i1
152 xor $i0,$lo 152 xor $i0,$lo
153 xor $i1,$hi 153 xor $i1,$hi
154 154
@@ -171,15 +171,15 @@ bn_GF2m_mul_2x2:
171 bt \$33,%rax 171 bt \$33,%rax
172 jnc .Lvanilla_mul_2x2 172 jnc .Lvanilla_mul_2x2
173 173
174 movq $a1,%xmm0 174 movd $a1,%xmm0
175 movq $b1,%xmm1 175 movd $b1,%xmm1
176 movq $a0,%xmm2 176 movd $a0,%xmm2
177___ 177___
178$code.=<<___ if ($win64); 178$code.=<<___ if ($win64);
179 movq 40(%rsp),%xmm3 179 movq 40(%rsp),%xmm3
180___ 180___
181$code.=<<___ if (!$win64); 181$code.=<<___ if (!$win64);
182 movq $b0,%xmm3 182 movd $b0,%xmm3
183___ 183___
184$code.=<<___; 184$code.=<<___;
185 movdqa %xmm0,%xmm4 185 movdqa %xmm0,%xmm4
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl b/src/lib/libcrypto/bn/asm/x86_64-mont5.pl
index 9c88884d42..81e5c53728 100755
--- a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-mont5.pl
@@ -120,7 +120,7 @@ $code.=<<___;
120 lea $STRIDE($bp),$bp 120 lea $STRIDE($bp),$bp
121 por %xmm3,%xmm0 121 por %xmm3,%xmm0
122 122
123 movq %xmm0,$m0 # m0=bp[0] 123 movd %xmm0,$m0 # m0=bp[0]
124 124
125 mov ($n0),$n0 # pull n0[0] value 125 mov ($n0),$n0 # pull n0[0] value
126 mov ($ap),%rax 126 mov ($ap),%rax
@@ -183,7 +183,7 @@ $code.=<<___;
183 cmp $num,$j 183 cmp $num,$j
184 jl .L1st 184 jl .L1st
185 185
186 movq %xmm0,$m0 # bp[1] 186 movd %xmm0,$m0 # bp[1]
187 187
188 add %rax,$hi1 188 add %rax,$hi1
189 mov ($ap),%rax # ap[0] 189 mov ($ap),%rax # ap[0]
@@ -266,7 +266,7 @@ $code.=<<___;
266 cmp $num,$j 266 cmp $num,$j
267 jl .Linner 267 jl .Linner
268 268
269 movq %xmm0,$m0 # bp[i+1] 269 movd %xmm0,$m0 # bp[i+1]
270 270
271 add %rax,$hi1 271 add %rax,$hi1
272 mov ($ap),%rax # ap[0] 272 mov ($ap),%rax # ap[0]
@@ -403,7 +403,7 @@ $code.=<<___;
403 lea $STRIDE($bp),$bp 403 lea $STRIDE($bp),$bp
404 por %xmm3,%xmm0 404 por %xmm3,%xmm0
405 405
406 movq %xmm0,$m0 # m0=bp[0] 406 movd %xmm0,$m0 # m0=bp[0]
407 mov ($n0),$n0 # pull n0[0] value 407 mov ($n0),$n0 # pull n0[0] value
408 mov ($ap),%rax 408 mov ($ap),%rax
409 409
@@ -550,7 +550,7 @@ $code.=<<___;
550 mov $N[1],-16(%rsp,$j,8) # tp[j-1] 550 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
551 mov %rdx,$N[0] 551 mov %rdx,$N[0]
552 552
553 movq %xmm0,$m0 # bp[1] 553 movd %xmm0,$m0 # bp[1]
554 554
555 xor $N[1],$N[1] 555 xor $N[1],$N[1]
556 add $A[0],$N[0] 556 add $A[0],$N[0]
@@ -718,7 +718,7 @@ $code.=<<___;
718 mov $N[0],-24(%rsp,$j,8) # tp[j-1] 718 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
719 mov %rdx,$N[0] 719 mov %rdx,$N[0]
720 720
721 movq %xmm0,$m0 # bp[i+1] 721 movd %xmm0,$m0 # bp[i+1]
722 mov $N[1],-16(%rsp,$j,8) # tp[j-1] 722 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
723 723
724 xor $N[1],$N[1] 724 xor $N[1],$N[1]
diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl
index ed1f3ed6ab..82992f41e5 100755
--- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl
+++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl
@@ -121,7 +121,7 @@ my %globals;
121 $self->{sz} = ""; 121 $self->{sz} = "";
122 } elsif ($self->{op} =~ /^v/) { # VEX 122 } elsif ($self->{op} =~ /^v/) { # VEX
123 $self->{sz} = ""; 123 $self->{sz} = "";
124 } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { 124 } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) {
125 $self->{sz} = ""; 125 $self->{sz} = "";
126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { 126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
127 $self->{op} = $1; 127 $self->{op} = $1;
@@ -698,6 +698,20 @@ my $pinsrd = sub {
698 } 698 }
699}; 699};
700 700
701my $pclmulqdq = sub {
702 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
703 my @opcode=(0x66);
704 rex(\@opcode,$3,$2);
705 push @opcode,0x0f,0x3a,0x44;
706 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
707 my $c=$1;
708 push @opcode,$c=~/^0/?oct($c):$c;
709 @opcode;
710 } else {
711 ();
712 }
713};
714
701if ($nasm) { 715if ($nasm) {
702 print <<___; 716 print <<___;
703default rel 717default rel
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl
index 226c66c35e..8e45c7479b 100644
--- a/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl
@@ -143,12 +143,12 @@ $code.=<<___;
143 mov (%rsp,$i1,8),$t1 143 mov (%rsp,$i1,8),$t1
144 mov $t1,$t0 144 mov $t1,$t0
145 shl \$`8*$n-4`,$t1 145 shl \$`8*$n-4`,$t1
146 movq $R,$i0 146 movd $R,$i0
147 shr \$`64-(8*$n-4)`,$t0 147 shr \$`64-(8*$n-4)`,$t0
148 xor $t1,$lo 148 xor $t1,$lo
149 psrldq \$8,$R 149 psrldq \$8,$R
150 xor $t0,$hi 150 xor $t0,$hi
151 movq $R,$i1 151 movd $R,$i1
152 xor $i0,$lo 152 xor $i0,$lo
153 xor $i1,$hi 153 xor $i1,$hi
154 154
@@ -171,15 +171,15 @@ bn_GF2m_mul_2x2:
171 bt \$33,%rax 171 bt \$33,%rax
172 jnc .Lvanilla_mul_2x2 172 jnc .Lvanilla_mul_2x2
173 173
174 movq $a1,%xmm0 174 movd $a1,%xmm0
175 movq $b1,%xmm1 175 movd $b1,%xmm1
176 movq $a0,%xmm2 176 movd $a0,%xmm2
177___ 177___
178$code.=<<___ if ($win64); 178$code.=<<___ if ($win64);
179 movq 40(%rsp),%xmm3 179 movq 40(%rsp),%xmm3
180___ 180___
181$code.=<<___ if (!$win64); 181$code.=<<___ if (!$win64);
182 movq $b0,%xmm3 182 movd $b0,%xmm3
183___ 183___
184$code.=<<___; 184$code.=<<___;
185 movdqa %xmm0,%xmm4 185 movdqa %xmm0,%xmm4
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl
index 9c88884d42..81e5c53728 100755
--- a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl
@@ -120,7 +120,7 @@ $code.=<<___;
120 lea $STRIDE($bp),$bp 120 lea $STRIDE($bp),$bp
121 por %xmm3,%xmm0 121 por %xmm3,%xmm0
122 122
123 movq %xmm0,$m0 # m0=bp[0] 123 movd %xmm0,$m0 # m0=bp[0]
124 124
125 mov ($n0),$n0 # pull n0[0] value 125 mov ($n0),$n0 # pull n0[0] value
126 mov ($ap),%rax 126 mov ($ap),%rax
@@ -183,7 +183,7 @@ $code.=<<___;
183 cmp $num,$j 183 cmp $num,$j
184 jl .L1st 184 jl .L1st
185 185
186 movq %xmm0,$m0 # bp[1] 186 movd %xmm0,$m0 # bp[1]
187 187
188 add %rax,$hi1 188 add %rax,$hi1
189 mov ($ap),%rax # ap[0] 189 mov ($ap),%rax # ap[0]
@@ -266,7 +266,7 @@ $code.=<<___;
266 cmp $num,$j 266 cmp $num,$j
267 jl .Linner 267 jl .Linner
268 268
269 movq %xmm0,$m0 # bp[i+1] 269 movd %xmm0,$m0 # bp[i+1]
270 270
271 add %rax,$hi1 271 add %rax,$hi1
272 mov ($ap),%rax # ap[0] 272 mov ($ap),%rax # ap[0]
@@ -403,7 +403,7 @@ $code.=<<___;
403 lea $STRIDE($bp),$bp 403 lea $STRIDE($bp),$bp
404 por %xmm3,%xmm0 404 por %xmm3,%xmm0
405 405
406 movq %xmm0,$m0 # m0=bp[0] 406 movd %xmm0,$m0 # m0=bp[0]
407 mov ($n0),$n0 # pull n0[0] value 407 mov ($n0),$n0 # pull n0[0] value
408 mov ($ap),%rax 408 mov ($ap),%rax
409 409
@@ -550,7 +550,7 @@ $code.=<<___;
550 mov $N[1],-16(%rsp,$j,8) # tp[j-1] 550 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
551 mov %rdx,$N[0] 551 mov %rdx,$N[0]
552 552
553 movq %xmm0,$m0 # bp[1] 553 movd %xmm0,$m0 # bp[1]
554 554
555 xor $N[1],$N[1] 555 xor $N[1],$N[1]
556 add $A[0],$N[0] 556 add $A[0],$N[0]
@@ -718,7 +718,7 @@ $code.=<<___;
718 mov $N[0],-24(%rsp,$j,8) # tp[j-1] 718 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
719 mov %rdx,$N[0] 719 mov %rdx,$N[0]
720 720
721 movq %xmm0,$m0 # bp[i+1] 721 movd %xmm0,$m0 # bp[i+1]
722 mov $N[1],-16(%rsp,$j,8) # tp[j-1] 722 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
723 723
724 xor $N[1],$N[1] 724 xor $N[1],$N[1]
diff --git a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
index ed1f3ed6ab..82992f41e5 100755
--- a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
+++ b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
@@ -121,7 +121,7 @@ my %globals;
121 $self->{sz} = ""; 121 $self->{sz} = "";
122 } elsif ($self->{op} =~ /^v/) { # VEX 122 } elsif ($self->{op} =~ /^v/) { # VEX
123 $self->{sz} = ""; 123 $self->{sz} = "";
124 } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { 124 } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) {
125 $self->{sz} = ""; 125 $self->{sz} = "";
126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { 126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
127 $self->{op} = $1; 127 $self->{op} = $1;
@@ -698,6 +698,20 @@ my $pinsrd = sub {
698 } 698 }
699}; 699};
700 700
701my $pclmulqdq = sub {
702 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
703 my @opcode=(0x66);
704 rex(\@opcode,$3,$2);
705 push @opcode,0x0f,0x3a,0x44;
706 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
707 my $c=$1;
708 push @opcode,$c=~/^0/?oct($c):$c;
709 @opcode;
710 } else {
711 ();
712 }
713};
714
701if ($nasm) { 715if ($nasm) {
702 print <<___; 716 print <<___;
703default rel 717default rel