diff options
author | bcook <> | 2015-09-11 12:17:57 +0000 |
---|---|---|
committer | bcook <> | 2015-09-11 12:17:57 +0000 |
commit | 656ef1e2adb75d20a81b8763a2088b2c8e6c3058 (patch) | |
tree | 03ee90a380bddd9dac65bed24366a6dc8e0abd01 /src | |
parent | 52f5d74dfdc68d06be5b27e3df8b2fbb6387acf6 (diff) | |
download | openbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.tar.gz openbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.tar.bz2 openbsd-656ef1e2adb75d20a81b8763a2088b2c8e6c3058.zip |
Fixup inter-bank movq/movd operations, emit bytes for pclmulqdq again.
Fixes builds gcc + Apple's assembler, working on reenabling builds with older
OpenBSD releases.
based on OpenSSL commit:
https://git.openssl.org/?p=openssl.git;a=commitdiff;h=902b30df193afc3417a96ba72a81ed390bd50de3
ok miod@
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/asm/x86_64-gf2m.pl | 12 | ||||
-rwxr-xr-x | src/lib/libcrypto/bn/asm/x86_64-mont5.pl | 12 | ||||
-rwxr-xr-x | src/lib/libcrypto/perlasm/x86_64-xlate.pl | 16 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl | 12 | ||||
-rwxr-xr-x | src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl | 12 | ||||
-rwxr-xr-x | src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl | 16 |
6 files changed, 54 insertions, 26 deletions
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl index 226c66c35e..8e45c7479b 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl | |||
@@ -143,12 +143,12 @@ $code.=<<___; | |||
143 | mov (%rsp,$i1,8),$t1 | 143 | mov (%rsp,$i1,8),$t1 |
144 | mov $t1,$t0 | 144 | mov $t1,$t0 |
145 | shl \$`8*$n-4`,$t1 | 145 | shl \$`8*$n-4`,$t1 |
146 | movq $R,$i0 | 146 | movd $R,$i0 |
147 | shr \$`64-(8*$n-4)`,$t0 | 147 | shr \$`64-(8*$n-4)`,$t0 |
148 | xor $t1,$lo | 148 | xor $t1,$lo |
149 | psrldq \$8,$R | 149 | psrldq \$8,$R |
150 | xor $t0,$hi | 150 | xor $t0,$hi |
151 | movq $R,$i1 | 151 | movd $R,$i1 |
152 | xor $i0,$lo | 152 | xor $i0,$lo |
153 | xor $i1,$hi | 153 | xor $i1,$hi |
154 | 154 | ||
@@ -171,15 +171,15 @@ bn_GF2m_mul_2x2: | |||
171 | bt \$33,%rax | 171 | bt \$33,%rax |
172 | jnc .Lvanilla_mul_2x2 | 172 | jnc .Lvanilla_mul_2x2 |
173 | 173 | ||
174 | movq $a1,%xmm0 | 174 | movd $a1,%xmm0 |
175 | movq $b1,%xmm1 | 175 | movd $b1,%xmm1 |
176 | movq $a0,%xmm2 | 176 | movd $a0,%xmm2 |
177 | ___ | 177 | ___ |
178 | $code.=<<___ if ($win64); | 178 | $code.=<<___ if ($win64); |
179 | movq 40(%rsp),%xmm3 | 179 | movq 40(%rsp),%xmm3 |
180 | ___ | 180 | ___ |
181 | $code.=<<___ if (!$win64); | 181 | $code.=<<___ if (!$win64); |
182 | movq $b0,%xmm3 | 182 | movd $b0,%xmm3 |
183 | ___ | 183 | ___ |
184 | $code.=<<___; | 184 | $code.=<<___; |
185 | movdqa %xmm0,%xmm4 | 185 | movdqa %xmm0,%xmm4 |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl b/src/lib/libcrypto/bn/asm/x86_64-mont5.pl index 9c88884d42..81e5c53728 100755 --- a/src/lib/libcrypto/bn/asm/x86_64-mont5.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-mont5.pl | |||
@@ -120,7 +120,7 @@ $code.=<<___; | |||
120 | lea $STRIDE($bp),$bp | 120 | lea $STRIDE($bp),$bp |
121 | por %xmm3,%xmm0 | 121 | por %xmm3,%xmm0 |
122 | 122 | ||
123 | movq %xmm0,$m0 # m0=bp[0] | 123 | movd %xmm0,$m0 # m0=bp[0] |
124 | 124 | ||
125 | mov ($n0),$n0 # pull n0[0] value | 125 | mov ($n0),$n0 # pull n0[0] value |
126 | mov ($ap),%rax | 126 | mov ($ap),%rax |
@@ -183,7 +183,7 @@ $code.=<<___; | |||
183 | cmp $num,$j | 183 | cmp $num,$j |
184 | jl .L1st | 184 | jl .L1st |
185 | 185 | ||
186 | movq %xmm0,$m0 # bp[1] | 186 | movd %xmm0,$m0 # bp[1] |
187 | 187 | ||
188 | add %rax,$hi1 | 188 | add %rax,$hi1 |
189 | mov ($ap),%rax # ap[0] | 189 | mov ($ap),%rax # ap[0] |
@@ -266,7 +266,7 @@ $code.=<<___; | |||
266 | cmp $num,$j | 266 | cmp $num,$j |
267 | jl .Linner | 267 | jl .Linner |
268 | 268 | ||
269 | movq %xmm0,$m0 # bp[i+1] | 269 | movd %xmm0,$m0 # bp[i+1] |
270 | 270 | ||
271 | add %rax,$hi1 | 271 | add %rax,$hi1 |
272 | mov ($ap),%rax # ap[0] | 272 | mov ($ap),%rax # ap[0] |
@@ -403,7 +403,7 @@ $code.=<<___; | |||
403 | lea $STRIDE($bp),$bp | 403 | lea $STRIDE($bp),$bp |
404 | por %xmm3,%xmm0 | 404 | por %xmm3,%xmm0 |
405 | 405 | ||
406 | movq %xmm0,$m0 # m0=bp[0] | 406 | movd %xmm0,$m0 # m0=bp[0] |
407 | mov ($n0),$n0 # pull n0[0] value | 407 | mov ($n0),$n0 # pull n0[0] value |
408 | mov ($ap),%rax | 408 | mov ($ap),%rax |
409 | 409 | ||
@@ -550,7 +550,7 @@ $code.=<<___; | |||
550 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] | 550 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] |
551 | mov %rdx,$N[0] | 551 | mov %rdx,$N[0] |
552 | 552 | ||
553 | movq %xmm0,$m0 # bp[1] | 553 | movd %xmm0,$m0 # bp[1] |
554 | 554 | ||
555 | xor $N[1],$N[1] | 555 | xor $N[1],$N[1] |
556 | add $A[0],$N[0] | 556 | add $A[0],$N[0] |
@@ -718,7 +718,7 @@ $code.=<<___; | |||
718 | mov $N[0],-24(%rsp,$j,8) # tp[j-1] | 718 | mov $N[0],-24(%rsp,$j,8) # tp[j-1] |
719 | mov %rdx,$N[0] | 719 | mov %rdx,$N[0] |
720 | 720 | ||
721 | movq %xmm0,$m0 # bp[i+1] | 721 | movd %xmm0,$m0 # bp[i+1] |
722 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] | 722 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] |
723 | 723 | ||
724 | xor $N[1],$N[1] | 724 | xor $N[1],$N[1] |
diff --git a/src/lib/libcrypto/perlasm/x86_64-xlate.pl b/src/lib/libcrypto/perlasm/x86_64-xlate.pl index ed1f3ed6ab..82992f41e5 100755 --- a/src/lib/libcrypto/perlasm/x86_64-xlate.pl +++ b/src/lib/libcrypto/perlasm/x86_64-xlate.pl | |||
@@ -121,7 +121,7 @@ my %globals; | |||
121 | $self->{sz} = ""; | 121 | $self->{sz} = ""; |
122 | } elsif ($self->{op} =~ /^v/) { # VEX | 122 | } elsif ($self->{op} =~ /^v/) { # VEX |
123 | $self->{sz} = ""; | 123 | $self->{sz} = ""; |
124 | } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { | 124 | } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { |
125 | $self->{sz} = ""; | 125 | $self->{sz} = ""; |
126 | } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { | 126 | } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { |
127 | $self->{op} = $1; | 127 | $self->{op} = $1; |
@@ -698,6 +698,20 @@ my $pinsrd = sub { | |||
698 | } | 698 | } |
699 | }; | 699 | }; |
700 | 700 | ||
701 | my $pclmulqdq = sub { | ||
702 | if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { | ||
703 | my @opcode=(0x66); | ||
704 | rex(\@opcode,$3,$2); | ||
705 | push @opcode,0x0f,0x3a,0x44; | ||
706 | push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M | ||
707 | my $c=$1; | ||
708 | push @opcode,$c=~/^0/?oct($c):$c; | ||
709 | @opcode; | ||
710 | } else { | ||
711 | (); | ||
712 | } | ||
713 | }; | ||
714 | |||
701 | if ($nasm) { | 715 | if ($nasm) { |
702 | print <<___; | 716 | print <<___; |
703 | default rel | 717 | default rel |
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl index 226c66c35e..8e45c7479b 100644 --- a/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl | |||
@@ -143,12 +143,12 @@ $code.=<<___; | |||
143 | mov (%rsp,$i1,8),$t1 | 143 | mov (%rsp,$i1,8),$t1 |
144 | mov $t1,$t0 | 144 | mov $t1,$t0 |
145 | shl \$`8*$n-4`,$t1 | 145 | shl \$`8*$n-4`,$t1 |
146 | movq $R,$i0 | 146 | movd $R,$i0 |
147 | shr \$`64-(8*$n-4)`,$t0 | 147 | shr \$`64-(8*$n-4)`,$t0 |
148 | xor $t1,$lo | 148 | xor $t1,$lo |
149 | psrldq \$8,$R | 149 | psrldq \$8,$R |
150 | xor $t0,$hi | 150 | xor $t0,$hi |
151 | movq $R,$i1 | 151 | movd $R,$i1 |
152 | xor $i0,$lo | 152 | xor $i0,$lo |
153 | xor $i1,$hi | 153 | xor $i1,$hi |
154 | 154 | ||
@@ -171,15 +171,15 @@ bn_GF2m_mul_2x2: | |||
171 | bt \$33,%rax | 171 | bt \$33,%rax |
172 | jnc .Lvanilla_mul_2x2 | 172 | jnc .Lvanilla_mul_2x2 |
173 | 173 | ||
174 | movq $a1,%xmm0 | 174 | movd $a1,%xmm0 |
175 | movq $b1,%xmm1 | 175 | movd $b1,%xmm1 |
176 | movq $a0,%xmm2 | 176 | movd $a0,%xmm2 |
177 | ___ | 177 | ___ |
178 | $code.=<<___ if ($win64); | 178 | $code.=<<___ if ($win64); |
179 | movq 40(%rsp),%xmm3 | 179 | movq 40(%rsp),%xmm3 |
180 | ___ | 180 | ___ |
181 | $code.=<<___ if (!$win64); | 181 | $code.=<<___ if (!$win64); |
182 | movq $b0,%xmm3 | 182 | movd $b0,%xmm3 |
183 | ___ | 183 | ___ |
184 | $code.=<<___; | 184 | $code.=<<___; |
185 | movdqa %xmm0,%xmm4 | 185 | movdqa %xmm0,%xmm4 |
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl index 9c88884d42..81e5c53728 100755 --- a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl +++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl | |||
@@ -120,7 +120,7 @@ $code.=<<___; | |||
120 | lea $STRIDE($bp),$bp | 120 | lea $STRIDE($bp),$bp |
121 | por %xmm3,%xmm0 | 121 | por %xmm3,%xmm0 |
122 | 122 | ||
123 | movq %xmm0,$m0 # m0=bp[0] | 123 | movd %xmm0,$m0 # m0=bp[0] |
124 | 124 | ||
125 | mov ($n0),$n0 # pull n0[0] value | 125 | mov ($n0),$n0 # pull n0[0] value |
126 | mov ($ap),%rax | 126 | mov ($ap),%rax |
@@ -183,7 +183,7 @@ $code.=<<___; | |||
183 | cmp $num,$j | 183 | cmp $num,$j |
184 | jl .L1st | 184 | jl .L1st |
185 | 185 | ||
186 | movq %xmm0,$m0 # bp[1] | 186 | movd %xmm0,$m0 # bp[1] |
187 | 187 | ||
188 | add %rax,$hi1 | 188 | add %rax,$hi1 |
189 | mov ($ap),%rax # ap[0] | 189 | mov ($ap),%rax # ap[0] |
@@ -266,7 +266,7 @@ $code.=<<___; | |||
266 | cmp $num,$j | 266 | cmp $num,$j |
267 | jl .Linner | 267 | jl .Linner |
268 | 268 | ||
269 | movq %xmm0,$m0 # bp[i+1] | 269 | movd %xmm0,$m0 # bp[i+1] |
270 | 270 | ||
271 | add %rax,$hi1 | 271 | add %rax,$hi1 |
272 | mov ($ap),%rax # ap[0] | 272 | mov ($ap),%rax # ap[0] |
@@ -403,7 +403,7 @@ $code.=<<___; | |||
403 | lea $STRIDE($bp),$bp | 403 | lea $STRIDE($bp),$bp |
404 | por %xmm3,%xmm0 | 404 | por %xmm3,%xmm0 |
405 | 405 | ||
406 | movq %xmm0,$m0 # m0=bp[0] | 406 | movd %xmm0,$m0 # m0=bp[0] |
407 | mov ($n0),$n0 # pull n0[0] value | 407 | mov ($n0),$n0 # pull n0[0] value |
408 | mov ($ap),%rax | 408 | mov ($ap),%rax |
409 | 409 | ||
@@ -550,7 +550,7 @@ $code.=<<___; | |||
550 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] | 550 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] |
551 | mov %rdx,$N[0] | 551 | mov %rdx,$N[0] |
552 | 552 | ||
553 | movq %xmm0,$m0 # bp[1] | 553 | movd %xmm0,$m0 # bp[1] |
554 | 554 | ||
555 | xor $N[1],$N[1] | 555 | xor $N[1],$N[1] |
556 | add $A[0],$N[0] | 556 | add $A[0],$N[0] |
@@ -718,7 +718,7 @@ $code.=<<___; | |||
718 | mov $N[0],-24(%rsp,$j,8) # tp[j-1] | 718 | mov $N[0],-24(%rsp,$j,8) # tp[j-1] |
719 | mov %rdx,$N[0] | 719 | mov %rdx,$N[0] |
720 | 720 | ||
721 | movq %xmm0,$m0 # bp[i+1] | 721 | movd %xmm0,$m0 # bp[i+1] |
722 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] | 722 | mov $N[1],-16(%rsp,$j,8) # tp[j-1] |
723 | 723 | ||
724 | xor $N[1],$N[1] | 724 | xor $N[1],$N[1] |
diff --git a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl index ed1f3ed6ab..82992f41e5 100755 --- a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl +++ b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl | |||
@@ -121,7 +121,7 @@ my %globals; | |||
121 | $self->{sz} = ""; | 121 | $self->{sz} = ""; |
122 | } elsif ($self->{op} =~ /^v/) { # VEX | 122 | } elsif ($self->{op} =~ /^v/) { # VEX |
123 | $self->{sz} = ""; | 123 | $self->{sz} = ""; |
124 | } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { | 124 | } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { |
125 | $self->{sz} = ""; | 125 | $self->{sz} = ""; |
126 | } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { | 126 | } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { |
127 | $self->{op} = $1; | 127 | $self->{op} = $1; |
@@ -698,6 +698,20 @@ my $pinsrd = sub { | |||
698 | } | 698 | } |
699 | }; | 699 | }; |
700 | 700 | ||
701 | my $pclmulqdq = sub { | ||
702 | if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { | ||
703 | my @opcode=(0x66); | ||
704 | rex(\@opcode,$3,$2); | ||
705 | push @opcode,0x0f,0x3a,0x44; | ||
706 | push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M | ||
707 | my $c=$1; | ||
708 | push @opcode,$c=~/^0/?oct($c):$c; | ||
709 | @opcode; | ||
710 | } else { | ||
711 | (); | ||
712 | } | ||
713 | }; | ||
714 | |||
701 | if ($nasm) { | 715 | if ($nasm) { |
702 | print <<___; | 716 | print <<___; |
703 | default rel | 717 | default rel |