diff options
Diffstat (limited to 'src/lib/libcrypto/x86_64cpuid.pl')
| -rw-r--r-- | src/lib/libcrypto/x86_64cpuid.pl | 90 |
1 files changed, 20 insertions, 70 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index 7b7b93b223..b771a8539d 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
| @@ -7,23 +7,15 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |||
| 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| 8 | 8 | ||
| 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 10 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | 10 | open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; |
| 11 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or | ||
| 12 | die "can't locate x86_64-xlate.pl"; | ||
| 13 | |||
| 14 | open STDOUT,"| $^X $xlate $flavour $output"; | ||
| 15 | |||
| 16 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order | ||
| 17 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order | ||
| 18 | 11 | ||
| 12 | if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } | ||
| 13 | else { $arg1="%rdi"; $arg2="%rsi"; } | ||
| 19 | print<<___; | 14 | print<<___; |
| 15 | #include <machine/asm.h> | ||
| 20 | .extern OPENSSL_cpuid_setup | 16 | .extern OPENSSL_cpuid_setup |
| 21 | .hidden OPENSSL_cpuid_setup | ||
| 22 | .section .init | 17 | .section .init |
| 23 | call OPENSSL_cpuid_setup | 18 | call PIC_PLT(OPENSSL_cpuid_setup) |
| 24 | |||
| 25 | .hidden OPENSSL_ia32cap_P | ||
| 26 | .comm OPENSSL_ia32cap_P,8,4 | ||
| 27 | 19 | ||
| 28 | .text | 20 | .text |
| 29 | 21 | ||
| @@ -55,7 +47,7 @@ OPENSSL_rdtsc: | |||
| 55 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent | 47 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
| 56 | .align 16 | 48 | .align 16 |
| 57 | OPENSSL_ia32_cpuid: | 49 | OPENSSL_ia32_cpuid: |
| 58 | mov %rbx,%r8 # save %rbx | 50 | mov %rbx,%r8 |
| 59 | 51 | ||
| 60 | xor %eax,%eax | 52 | xor %eax,%eax |
| 61 | cpuid | 53 | cpuid |
| @@ -87,15 +79,7 @@ OPENSSL_ia32_cpuid: | |||
| 87 | # AMD specific | 79 | # AMD specific |
| 88 | mov \$0x80000000,%eax | 80 | mov \$0x80000000,%eax |
| 89 | cpuid | 81 | cpuid |
| 90 | cmp \$0x80000001,%eax | 82 | cmp \$0x80000008,%eax |
| 91 | jb .Lintel | ||
| 92 | mov %eax,%r10d | ||
| 93 | mov \$0x80000001,%eax | ||
| 94 | cpuid | ||
| 95 | or %ecx,%r9d | ||
| 96 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | ||
| 97 | |||
| 98 | cmp \$0x80000008,%r10d | ||
| 99 | jb .Lintel | 83 | jb .Lintel |
| 100 | 84 | ||
| 101 | mov \$0x80000008,%eax | 85 | mov \$0x80000008,%eax |
| @@ -106,12 +90,12 @@ OPENSSL_ia32_cpuid: | |||
| 106 | mov \$1,%eax | 90 | mov \$1,%eax |
| 107 | cpuid | 91 | cpuid |
| 108 | bt \$28,%edx # test hyper-threading bit | 92 | bt \$28,%edx # test hyper-threading bit |
| 109 | jnc .Lgeneric | 93 | jnc .Ldone |
| 110 | shr \$16,%ebx # number of logical processors | 94 | shr \$16,%ebx # number of logical processors |
| 111 | cmp %r10b,%bl | 95 | cmp %r10b,%bl |
| 112 | ja .Lgeneric | 96 | ja .Ldone |
| 113 | and \$0xefffffff,%edx # ~(1<<28) | 97 | and \$0xefffffff,%edx # ~(1<<28) |
| 114 | jmp .Lgeneric | 98 | jmp .Ldone |
| 115 | 99 | ||
| 116 | .Lintel: | 100 | .Lintel: |
| 117 | cmp \$4,%r11d | 101 | cmp \$4,%r11d |
| @@ -128,47 +112,30 @@ OPENSSL_ia32_cpuid: | |||
| 128 | .Lnocacheinfo: | 112 | .Lnocacheinfo: |
| 129 | mov \$1,%eax | 113 | mov \$1,%eax |
| 130 | cpuid | 114 | cpuid |
| 131 | and \$0xbfefffff,%edx # force reserved bits to 0 | ||
| 132 | cmp \$0,%r9d | 115 | cmp \$0,%r9d |
| 133 | jne .Lnotintel | 116 | jne .Lnotintel |
| 134 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs | 117 | or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR |
| 135 | and \$15,%ah | 118 | and \$15,%ah |
| 136 | cmp \$15,%ah # examine Family ID | 119 | cmp \$15,%ah # examine Family ID |
| 137 | jne .Lnotintel | 120 | je .Lnotintel |
| 138 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR | 121 | or \$0x40000000,%edx # use reserved bit to skip unrolled loop |
| 139 | .Lnotintel: | 122 | .Lnotintel: |
| 140 | bt \$28,%edx # test hyper-threading bit | 123 | bt \$28,%edx # test hyper-threading bit |
| 141 | jnc .Lgeneric | 124 | jnc .Ldone |
| 142 | and \$0xefffffff,%edx # ~(1<<28) | 125 | and \$0xefffffff,%edx # ~(1<<28) |
| 143 | cmp \$0,%r10d | 126 | cmp \$0,%r10d |
| 144 | je .Lgeneric | 127 | je .Ldone |
| 145 | 128 | ||
| 146 | or \$0x10000000,%edx # 1<<28 | 129 | or \$0x10000000,%edx # 1<<28 |
| 147 | shr \$16,%ebx | 130 | shr \$16,%ebx |
| 148 | cmp \$1,%bl # see if cache is shared | 131 | cmp \$1,%bl # see if cache is shared |
| 149 | ja .Lgeneric | 132 | ja .Ldone |
| 150 | and \$0xefffffff,%edx # ~(1<<28) | 133 | and \$0xefffffff,%edx # ~(1<<28) |
| 151 | .Lgeneric: | ||
| 152 | and \$0x00000800,%r9d # isolate AMD XOP flag | ||
| 153 | and \$0xfffff7ff,%ecx | ||
| 154 | or %ecx,%r9d # merge AMD XOP flag | ||
| 155 | |||
| 156 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | ||
| 157 | bt \$27,%r9d # check OSXSAVE bit | ||
| 158 | jnc .Lclear_avx | ||
| 159 | xor %ecx,%ecx # XCR0 | ||
| 160 | .byte 0x0f,0x01,0xd0 # xgetbv | ||
| 161 | and \$6,%eax # isolate XMM and YMM state support | ||
| 162 | cmp \$6,%eax | ||
| 163 | je .Ldone | ||
| 164 | .Lclear_avx: | ||
| 165 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | ||
| 166 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | ||
| 167 | .Ldone: | 134 | .Ldone: |
| 168 | shl \$32,%r9 | 135 | shl \$32,%rcx |
| 169 | mov %r10d,%eax | 136 | mov %edx,%eax |
| 170 | mov %r8,%rbx # restore %rbx | 137 | mov %r8,%rbx |
| 171 | or %r9,%rax | 138 | or %rcx,%rax |
| 172 | ret | 139 | ret |
| 173 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 140 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
| 174 | 141 | ||
| @@ -263,21 +230,4 @@ OPENSSL_wipe_cpu: | |||
| 263 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 230 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 264 | ___ | 231 | ___ |
| 265 | 232 | ||
| 266 | print<<___; | ||
| 267 | .globl OPENSSL_ia32_rdrand | ||
| 268 | .type OPENSSL_ia32_rdrand,\@abi-omnipotent | ||
| 269 | .align 16 | ||
| 270 | OPENSSL_ia32_rdrand: | ||
| 271 | mov \$8,%ecx | ||
| 272 | .Loop_rdrand: | ||
| 273 | rdrand %rax | ||
| 274 | jc .Lbreak_rdrand | ||
| 275 | loop .Loop_rdrand | ||
| 276 | .Lbreak_rdrand: | ||
| 277 | cmp \$0,%rax | ||
| 278 | cmove %rcx,%rax | ||
| 279 | ret | ||
| 280 | .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand | ||
| 281 | ___ | ||
| 282 | |||
| 283 | close STDOUT; # flush | 233 | close STDOUT; # flush |
