diff options
-rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86_64.pl | 97 |
1 files changed, 58 insertions, 39 deletions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl index c073667fcb..f0b30109ae 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl | |||
@@ -1007,7 +1007,7 @@ ___ | |||
1007 | # does not update *ivec! (see engine/eng_aesni.c for details) | 1007 | # does not update *ivec! (see engine/eng_aesni.c for details) |
1008 | # | 1008 | # |
1009 | { | 1009 | { |
1010 | my $reserved = $win64?0:-0x28; | 1010 | my $frame_size = 0x20+($win64?160:0); |
1011 | my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); | 1011 | my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); |
1012 | my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); | 1012 | my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); |
1013 | my $bswap_mask="%xmm15"; | 1013 | my $bswap_mask="%xmm15"; |
@@ -1017,9 +1017,11 @@ $code.=<<___; | |||
1017 | .type aesni_ctr32_encrypt_blocks,\@function,5 | 1017 | .type aesni_ctr32_encrypt_blocks,\@function,5 |
1018 | .align 16 | 1018 | .align 16 |
1019 | aesni_ctr32_encrypt_blocks: | 1019 | aesni_ctr32_encrypt_blocks: |
1020 | lea (%rsp),%rax | ||
1021 | push %rbp | ||
1022 | sub \$$frame_size,%rsp | ||
1020 | ___ | 1023 | ___ |
1021 | $code.=<<___ if ($win64); | 1024 | $code.=<<___ if ($win64); |
1022 | lea -0xc8(%rsp),%rsp | ||
1023 | movaps %xmm6,0x20(%rsp) | 1025 | movaps %xmm6,0x20(%rsp) |
1024 | movaps %xmm7,0x30(%rsp) | 1026 | movaps %xmm7,0x30(%rsp) |
1025 | movaps %xmm8,0x40(%rsp) | 1027 | movaps %xmm8,0x40(%rsp) |
@@ -1033,6 +1035,7 @@ $code.=<<___ if ($win64); | |||
1033 | .Lctr32_body: | 1035 | .Lctr32_body: |
1034 | ___ | 1036 | ___ |
1035 | $code.=<<___; | 1037 | $code.=<<___; |
1038 | lea -8(%rax),%rbp | ||
1036 | cmp \$1,$len | 1039 | cmp \$1,$len |
1037 | je .Lctr32_one_shortcut | 1040 | je .Lctr32_one_shortcut |
1038 | 1041 | ||
@@ -1057,9 +1060,9 @@ $code.=<<___; | |||
1057 | pinsrd \$2,$rnds_,$iv0 | 1060 | pinsrd \$2,$rnds_,$iv0 |
1058 | inc $key_ | 1061 | inc $key_ |
1059 | pinsrd \$2,$key_,$iv1 | 1062 | pinsrd \$2,$key_,$iv1 |
1060 | movdqa $iv0,$reserved(%rsp) | 1063 | movdqa $iv0,0x00(%rsp) |
1061 | pshufb $bswap_mask,$iv0 | 1064 | pshufb $bswap_mask,$iv0 |
1062 | movdqa $iv1,`$reserved+0x10`(%rsp) | 1065 | movdqa $iv1,0x10(%rsp) |
1063 | pshufb $bswap_mask,$iv1 | 1066 | pshufb $bswap_mask,$iv1 |
1064 | 1067 | ||
1065 | pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword | 1068 | pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword |
@@ -1099,7 +1102,7 @@ $code.=<<___; | |||
1099 | movdqa .Lincrement32(%rip),$iv1 | 1102 | movdqa .Lincrement32(%rip),$iv1 |
1100 | pxor $rndkey0,$inout3 | 1103 | pxor $rndkey0,$inout3 |
1101 | aesenc $rndkey1,$inout2 | 1104 | aesenc $rndkey1,$inout2 |
1102 | movdqa $reserved(%rsp),$iv0 | 1105 | movdqa (%rsp),$iv0 |
1103 | pxor $rndkey0,$inout4 | 1106 | pxor $rndkey0,$inout4 |
1104 | aesenc $rndkey1,$inout3 | 1107 | aesenc $rndkey1,$inout3 |
1105 | pxor $rndkey0,$inout5 | 1108 | pxor $rndkey0,$inout5 |
@@ -1132,11 +1135,11 @@ $code.=<<___; | |||
1132 | aesenc $rndkey1,$inout0 | 1135 | aesenc $rndkey1,$inout0 |
1133 | paddd $iv1,$iv0 # increment counter vector | 1136 | paddd $iv1,$iv0 # increment counter vector |
1134 | aesenc $rndkey1,$inout1 | 1137 | aesenc $rndkey1,$inout1 |
1135 | paddd `$reserved+0x10`(%rsp),$iv1 | 1138 | paddd 0x10(%rsp),$iv1 |
1136 | aesenc $rndkey1,$inout2 | 1139 | aesenc $rndkey1,$inout2 |
1137 | movdqa $iv0,$reserved(%rsp) # save counter vector | 1140 | movdqa $iv0,0x00(%rsp) # save counter vector |
1138 | aesenc $rndkey1,$inout3 | 1141 | aesenc $rndkey1,$inout3 |
1139 | movdqa $iv1,`$reserved+0x10`(%rsp) | 1142 | movdqa $iv1,0x10(%rsp) |
1140 | aesenc $rndkey1,$inout4 | 1143 | aesenc $rndkey1,$inout4 |
1141 | pshufb $bswap_mask,$iv0 # byte swap | 1144 | pshufb $bswap_mask,$iv0 # byte swap |
1142 | aesenc $rndkey1,$inout5 | 1145 | aesenc $rndkey1,$inout5 |
@@ -1279,10 +1282,11 @@ $code.=<<___ if ($win64); | |||
1279 | movaps 0x90(%rsp),%xmm13 | 1282 | movaps 0x90(%rsp),%xmm13 |
1280 | movaps 0xa0(%rsp),%xmm14 | 1283 | movaps 0xa0(%rsp),%xmm14 |
1281 | movaps 0xb0(%rsp),%xmm15 | 1284 | movaps 0xb0(%rsp),%xmm15 |
1282 | lea 0xc8(%rsp),%rsp | ||
1283 | .Lctr32_ret: | ||
1284 | ___ | 1285 | ___ |
1285 | $code.=<<___; | 1286 | $code.=<<___; |
1287 | lea (%rbp),%rsp | ||
1288 | pop %rbp | ||
1289 | .Lctr32_ret: | ||
1286 | ret | 1290 | ret |
1287 | .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks | 1291 | .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks |
1288 | ___ | 1292 | ___ |
@@ -1297,14 +1301,16 @@ ___ | |||
1297 | my @tweak=map("%xmm$_",(10..15)); | 1301 | my @tweak=map("%xmm$_",(10..15)); |
1298 | my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); | 1302 | my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); |
1299 | my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); | 1303 | my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); |
1300 | my $frame_size = 0x68 + ($win64?160:0); | 1304 | my $frame_size = 0x60 + ($win64?160:0); |
1301 | 1305 | ||
1302 | $code.=<<___; | 1306 | $code.=<<___; |
1303 | .globl aesni_xts_encrypt | 1307 | .globl aesni_xts_encrypt |
1304 | .type aesni_xts_encrypt,\@function,6 | 1308 | .type aesni_xts_encrypt,\@function,6 |
1305 | .align 16 | 1309 | .align 16 |
1306 | aesni_xts_encrypt: | 1310 | aesni_xts_encrypt: |
1307 | lea -$frame_size(%rsp),%rsp | 1311 | lea (%rsp),%rax |
1312 | push %rbp | ||
1313 | sub \$$frame_size,%rsp | ||
1308 | ___ | 1314 | ___ |
1309 | $code.=<<___ if ($win64); | 1315 | $code.=<<___ if ($win64); |
1310 | movaps %xmm6,0x60(%rsp) | 1316 | movaps %xmm6,0x60(%rsp) |
@@ -1320,6 +1326,7 @@ $code.=<<___ if ($win64); | |||
1320 | .Lxts_enc_body: | 1326 | .Lxts_enc_body: |
1321 | ___ | 1327 | ___ |
1322 | $code.=<<___; | 1328 | $code.=<<___; |
1329 | lea -8(%rax),%rbp | ||
1323 | movups ($ivp),@tweak[5] # load clear-text tweak | 1330 | movups ($ivp),@tweak[5] # load clear-text tweak |
1324 | mov 240(%r8),$rounds # key2->rounds | 1331 | mov 240(%r8),$rounds # key2->rounds |
1325 | mov 240($key),$rnds_ # key1->rounds | 1332 | mov 240($key),$rnds_ # key1->rounds |
@@ -1683,7 +1690,8 @@ $code.=<<___ if ($win64); | |||
1683 | movaps 0xf0(%rsp),%xmm15 | 1690 | movaps 0xf0(%rsp),%xmm15 |
1684 | ___ | 1691 | ___ |
1685 | $code.=<<___; | 1692 | $code.=<<___; |
1686 | lea $frame_size(%rsp),%rsp | 1693 | lea (%rbp),%rsp |
1694 | pop %rbp | ||
1687 | .Lxts_enc_epilogue: | 1695 | .Lxts_enc_epilogue: |
1688 | ret | 1696 | ret |
1689 | .size aesni_xts_encrypt,.-aesni_xts_encrypt | 1697 | .size aesni_xts_encrypt,.-aesni_xts_encrypt |
@@ -1694,7 +1702,9 @@ $code.=<<___; | |||
1694 | .type aesni_xts_decrypt,\@function,6 | 1702 | .type aesni_xts_decrypt,\@function,6 |
1695 | .align 16 | 1703 | .align 16 |
1696 | aesni_xts_decrypt: | 1704 | aesni_xts_decrypt: |
1697 | lea -$frame_size(%rsp),%rsp | 1705 | lea (%rsp),%rax |
1706 | push %rbp | ||
1707 | sub \$$frame_size,%rsp | ||
1698 | ___ | 1708 | ___ |
1699 | $code.=<<___ if ($win64); | 1709 | $code.=<<___ if ($win64); |
1700 | movaps %xmm6,0x60(%rsp) | 1710 | movaps %xmm6,0x60(%rsp) |
@@ -1710,6 +1720,7 @@ $code.=<<___ if ($win64); | |||
1710 | .Lxts_dec_body: | 1720 | .Lxts_dec_body: |
1711 | ___ | 1721 | ___ |
1712 | $code.=<<___; | 1722 | $code.=<<___; |
1723 | lea -8(%rax),%rbp | ||
1713 | movups ($ivp),@tweak[5] # load clear-text tweak | 1724 | movups ($ivp),@tweak[5] # load clear-text tweak |
1714 | mov 240($key2),$rounds # key2->rounds | 1725 | mov 240($key2),$rounds # key2->rounds |
1715 | mov 240($key),$rnds_ # key1->rounds | 1726 | mov 240($key),$rnds_ # key1->rounds |
@@ -2109,7 +2120,8 @@ $code.=<<___ if ($win64); | |||
2109 | movaps 0xf0(%rsp),%xmm15 | 2120 | movaps 0xf0(%rsp),%xmm15 |
2110 | ___ | 2121 | ___ |
2111 | $code.=<<___; | 2122 | $code.=<<___; |
2112 | lea $frame_size(%rsp),%rsp | 2123 | lea (%rbp),%rsp |
2124 | pop %rbp | ||
2113 | .Lxts_dec_epilogue: | 2125 | .Lxts_dec_epilogue: |
2114 | ret | 2126 | ret |
2115 | .size aesni_xts_decrypt,.-aesni_xts_decrypt | 2127 | .size aesni_xts_decrypt,.-aesni_xts_decrypt |
@@ -2121,7 +2133,7 @@ ___ | |||
2121 | # size_t length, const AES_KEY *key, | 2133 | # size_t length, const AES_KEY *key, |
2122 | # unsigned char *ivp,const int enc); | 2134 | # unsigned char *ivp,const int enc); |
2123 | { | 2135 | { |
2124 | my $reserved = $win64?0x40:-0x18; # used in decrypt | 2136 | my $frame_size = 0x10 + ($win64?0x40:0); # used in decrypt |
2125 | $code.=<<___; | 2137 | $code.=<<___; |
2126 | .globl ${PREFIX}_cbc_encrypt | 2138 | .globl ${PREFIX}_cbc_encrypt |
2127 | .type ${PREFIX}_cbc_encrypt,\@function,6 | 2139 | .type ${PREFIX}_cbc_encrypt,\@function,6 |
@@ -2177,16 +2189,19 @@ $code.=<<___; | |||
2177 | #--------------------------- CBC DECRYPT ------------------------------# | 2189 | #--------------------------- CBC DECRYPT ------------------------------# |
2178 | .align 16 | 2190 | .align 16 |
2179 | .Lcbc_decrypt: | 2191 | .Lcbc_decrypt: |
2192 | lea (%rsp),%rax | ||
2193 | push %rbp | ||
2194 | sub \$$frame_size,%rsp | ||
2180 | ___ | 2195 | ___ |
2181 | $code.=<<___ if ($win64); | 2196 | $code.=<<___ if ($win64); |
2182 | lea -0x58(%rsp),%rsp | 2197 | movaps %xmm6,0x10(%rsp) |
2183 | movaps %xmm6,(%rsp) | 2198 | movaps %xmm7,0x20(%rsp) |
2184 | movaps %xmm7,0x10(%rsp) | 2199 | movaps %xmm8,0x30(%rsp) |
2185 | movaps %xmm8,0x20(%rsp) | 2200 | movaps %xmm9,0x40(%rsp) |
2186 | movaps %xmm9,0x30(%rsp) | ||
2187 | .Lcbc_decrypt_body: | 2201 | .Lcbc_decrypt_body: |
2188 | ___ | 2202 | ___ |
2189 | $code.=<<___; | 2203 | $code.=<<___; |
2204 | lea -8(%rax),%rbp | ||
2190 | movups ($ivp),$iv | 2205 | movups ($ivp),$iv |
2191 | mov $rnds_,$rounds | 2206 | mov $rnds_,$rounds |
2192 | cmp \$0x70,$len | 2207 | cmp \$0x70,$len |
@@ -2194,11 +2209,11 @@ $code.=<<___; | |||
2194 | shr \$1,$rnds_ | 2209 | shr \$1,$rnds_ |
2195 | sub \$0x70,$len | 2210 | sub \$0x70,$len |
2196 | mov $rnds_,$rounds | 2211 | mov $rnds_,$rounds |
2197 | movaps $iv,$reserved(%rsp) | 2212 | movaps $iv,(%rsp) |
2198 | jmp .Lcbc_dec_loop8_enter | 2213 | jmp .Lcbc_dec_loop8_enter |
2199 | .align 16 | 2214 | .align 16 |
2200 | .Lcbc_dec_loop8: | 2215 | .Lcbc_dec_loop8: |
2201 | movaps $rndkey0,$reserved(%rsp) # save IV | 2216 | movaps $rndkey0,(%rsp) # save IV |
2202 | movups $inout7,($out) | 2217 | movups $inout7,($out) |
2203 | lea 0x10($out),$out | 2218 | lea 0x10($out),$out |
2204 | .Lcbc_dec_loop8_enter: | 2219 | .Lcbc_dec_loop8_enter: |
@@ -2238,7 +2253,7 @@ $code.=<<___; | |||
2238 | 2253 | ||
2239 | movups ($inp),$rndkey1 # re-load input | 2254 | movups ($inp),$rndkey1 # re-load input |
2240 | movups 0x10($inp),$rndkey0 | 2255 | movups 0x10($inp),$rndkey0 |
2241 | xorps $reserved(%rsp),$inout0 # ^= IV | 2256 | xorps (%rsp),$inout0 # ^= IV |
2242 | xorps $rndkey1,$inout1 | 2257 | xorps $rndkey1,$inout1 |
2243 | movups 0x20($inp),$rndkey1 | 2258 | movups 0x20($inp),$rndkey1 |
2244 | xorps $rndkey0,$inout2 | 2259 | xorps $rndkey0,$inout2 |
@@ -2302,11 +2317,11 @@ $code.=<<___; | |||
2302 | jbe .Lcbc_dec_six | 2317 | jbe .Lcbc_dec_six |
2303 | 2318 | ||
2304 | movups 0x60($inp),$inout6 | 2319 | movups 0x60($inp),$inout6 |
2305 | movaps $iv,$reserved(%rsp) # save IV | 2320 | movaps $iv,(%rsp) # save IV |
2306 | call _aesni_decrypt8 | 2321 | call _aesni_decrypt8 |
2307 | movups ($inp),$rndkey1 | 2322 | movups ($inp),$rndkey1 |
2308 | movups 0x10($inp),$rndkey0 | 2323 | movups 0x10($inp),$rndkey0 |
2309 | xorps $reserved(%rsp),$inout0 # ^= IV | 2324 | xorps (%rsp),$inout0 # ^= IV |
2310 | xorps $rndkey1,$inout1 | 2325 | xorps $rndkey1,$inout1 |
2311 | movups 0x20($inp),$rndkey1 | 2326 | movups 0x20($inp),$rndkey1 |
2312 | xorps $rndkey0,$inout2 | 2327 | xorps $rndkey0,$inout2 |
@@ -2430,23 +2445,24 @@ $code.=<<___; | |||
2430 | jmp .Lcbc_dec_ret | 2445 | jmp .Lcbc_dec_ret |
2431 | .align 16 | 2446 | .align 16 |
2432 | .Lcbc_dec_tail_partial: | 2447 | .Lcbc_dec_tail_partial: |
2433 | movaps $inout0,$reserved(%rsp) | 2448 | movaps $inout0,(%rsp) |
2434 | mov \$16,%rcx | 2449 | mov \$16,%rcx |
2435 | mov $out,%rdi | 2450 | mov $out,%rdi |
2436 | sub $len,%rcx | 2451 | sub $len,%rcx |
2437 | lea $reserved(%rsp),%rsi | 2452 | lea (%rsp),%rsi |
2438 | .long 0x9066A4F3 # rep movsb | 2453 | .long 0x9066A4F3 # rep movsb |
2439 | 2454 | ||
2440 | .Lcbc_dec_ret: | 2455 | .Lcbc_dec_ret: |
2441 | ___ | 2456 | ___ |
2442 | $code.=<<___ if ($win64); | 2457 | $code.=<<___ if ($win64); |
2443 | movaps (%rsp),%xmm6 | 2458 | movaps 0x10(%rsp),%xmm6 |
2444 | movaps 0x10(%rsp),%xmm7 | 2459 | movaps 0x20(%rsp),%xmm7 |
2445 | movaps 0x20(%rsp),%xmm8 | 2460 | movaps 0x30(%rsp),%xmm8 |
2446 | movaps 0x30(%rsp),%xmm9 | 2461 | movaps 0x40(%rsp),%xmm9 |
2447 | lea 0x58(%rsp),%rsp | ||
2448 | ___ | 2462 | ___ |
2449 | $code.=<<___; | 2463 | $code.=<<___; |
2464 | lea (%rbp),%rsp | ||
2465 | pop %rbp | ||
2450 | .Lcbc_ret: | 2466 | .Lcbc_ret: |
2451 | ret | 2467 | ret |
2452 | .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt | 2468 | .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt |
@@ -2821,9 +2837,8 @@ ctr32_se_handler: | |||
2821 | lea 512($context),%rdi # &context.Xmm6 | 2837 | lea 512($context),%rdi # &context.Xmm6 |
2822 | mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) | 2838 | mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) |
2823 | .long 0xa548f3fc # cld; rep movsq | 2839 | .long 0xa548f3fc # cld; rep movsq |
2824 | lea 0xc8(%rax),%rax # adjust stack pointer | ||
2825 | 2840 | ||
2826 | jmp .Lcommon_seh_tail | 2841 | jmp .Lcommon_rbp_tail |
2827 | .size ctr32_se_handler,.-ctr32_se_handler | 2842 | .size ctr32_se_handler,.-ctr32_se_handler |
2828 | 2843 | ||
2829 | .type xts_se_handler,\@abi-omnipotent | 2844 | .type xts_se_handler,\@abi-omnipotent |
@@ -2862,9 +2877,8 @@ xts_se_handler: | |||
2862 | lea 512($context),%rdi # & context.Xmm6 | 2877 | lea 512($context),%rdi # & context.Xmm6 |
2863 | mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) | 2878 | mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) |
2864 | .long 0xa548f3fc # cld; rep movsq | 2879 | .long 0xa548f3fc # cld; rep movsq |
2865 | lea 0x68+160(%rax),%rax # adjust stack pointer | ||
2866 | 2880 | ||
2867 | jmp .Lcommon_seh_tail | 2881 | jmp .Lcommon_rbp_tail |
2868 | .size xts_se_handler,.-xts_se_handler | 2882 | .size xts_se_handler,.-xts_se_handler |
2869 | ___ | 2883 | ___ |
2870 | $code.=<<___; | 2884 | $code.=<<___; |
@@ -2897,11 +2911,16 @@ cbc_se_handler: | |||
2897 | cmp %r10,%rbx # context->Rip>="epilogue" label | 2911 | cmp %r10,%rbx # context->Rip>="epilogue" label |
2898 | jae .Lcommon_seh_tail | 2912 | jae .Lcommon_seh_tail |
2899 | 2913 | ||
2900 | lea 0(%rax),%rsi # top of stack | 2914 | lea 16(%rax),%rsi # %xmm save area |
2901 | lea 512($context),%rdi # &context.Xmm6 | 2915 | lea 512($context),%rdi # &context.Xmm6 |
2902 | mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) | 2916 | mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) |
2903 | .long 0xa548f3fc # cld; rep movsq | 2917 | .long 0xa548f3fc # cld; rep movsq |
2904 | lea 0x58(%rax),%rax # adjust stack pointer | 2918 | |
2919 | .Lcommon_rbp_tail: | ||
2920 | mov 160($context),%rax # pull context->Rbp | ||
2921 | mov (%rax),%rbp # restore saved %rbp | ||
2922 | lea 8(%rax),%rax # adjust stack pointer | ||
2923 | mov %rbp,160($context) # restore context->Rbp | ||
2905 | jmp .Lcommon_seh_tail | 2924 | jmp .Lcommon_seh_tail |
2906 | 2925 | ||
2907 | .Lrestore_cbc_rax: | 2926 | .Lrestore_cbc_rax: |