summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-x86_64.pl97
1 files changed, 58 insertions, 39 deletions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
index c073667fcb..f0b30109ae 100644
--- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -1007,7 +1007,7 @@ ___
1007# does not update *ivec! (see engine/eng_aesni.c for details) 1007# does not update *ivec! (see engine/eng_aesni.c for details)
1008# 1008#
1009{ 1009{
1010my $reserved = $win64?0:-0x28; 1010my $frame_size = 0x20+($win64?160:0);
1011my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); 1011my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
1012my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); 1012my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
1013my $bswap_mask="%xmm15"; 1013my $bswap_mask="%xmm15";
@@ -1017,9 +1017,11 @@ $code.=<<___;
1017.type aesni_ctr32_encrypt_blocks,\@function,5 1017.type aesni_ctr32_encrypt_blocks,\@function,5
1018.align 16 1018.align 16
1019aesni_ctr32_encrypt_blocks: 1019aesni_ctr32_encrypt_blocks:
1020 lea (%rsp),%rax
1021 push %rbp
1022 sub \$$frame_size,%rsp
1020___ 1023___
1021$code.=<<___ if ($win64); 1024$code.=<<___ if ($win64);
1022 lea -0xc8(%rsp),%rsp
1023 movaps %xmm6,0x20(%rsp) 1025 movaps %xmm6,0x20(%rsp)
1024 movaps %xmm7,0x30(%rsp) 1026 movaps %xmm7,0x30(%rsp)
1025 movaps %xmm8,0x40(%rsp) 1027 movaps %xmm8,0x40(%rsp)
@@ -1033,6 +1035,7 @@ $code.=<<___ if ($win64);
1033.Lctr32_body: 1035.Lctr32_body:
1034___ 1036___
1035$code.=<<___; 1037$code.=<<___;
1038 lea -8(%rax),%rbp
1036 cmp \$1,$len 1039 cmp \$1,$len
1037 je .Lctr32_one_shortcut 1040 je .Lctr32_one_shortcut
1038 1041
@@ -1057,9 +1060,9 @@ $code.=<<___;
1057 pinsrd \$2,$rnds_,$iv0 1060 pinsrd \$2,$rnds_,$iv0
1058 inc $key_ 1061 inc $key_
1059 pinsrd \$2,$key_,$iv1 1062 pinsrd \$2,$key_,$iv1
1060 movdqa $iv0,$reserved(%rsp) 1063 movdqa $iv0,0x00(%rsp)
1061 pshufb $bswap_mask,$iv0 1064 pshufb $bswap_mask,$iv0
1062 movdqa $iv1,`$reserved+0x10`(%rsp) 1065 movdqa $iv1,0x10(%rsp)
1063 pshufb $bswap_mask,$iv1 1066 pshufb $bswap_mask,$iv1
1064 1067
1065 pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword 1068 pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword
@@ -1099,7 +1102,7 @@ $code.=<<___;
1099 movdqa .Lincrement32(%rip),$iv1 1102 movdqa .Lincrement32(%rip),$iv1
1100 pxor $rndkey0,$inout3 1103 pxor $rndkey0,$inout3
1101 aesenc $rndkey1,$inout2 1104 aesenc $rndkey1,$inout2
1102 movdqa $reserved(%rsp),$iv0 1105 movdqa (%rsp),$iv0
1103 pxor $rndkey0,$inout4 1106 pxor $rndkey0,$inout4
1104 aesenc $rndkey1,$inout3 1107 aesenc $rndkey1,$inout3
1105 pxor $rndkey0,$inout5 1108 pxor $rndkey0,$inout5
@@ -1132,11 +1135,11 @@ $code.=<<___;
1132 aesenc $rndkey1,$inout0 1135 aesenc $rndkey1,$inout0
1133 paddd $iv1,$iv0 # increment counter vector 1136 paddd $iv1,$iv0 # increment counter vector
1134 aesenc $rndkey1,$inout1 1137 aesenc $rndkey1,$inout1
1135 paddd `$reserved+0x10`(%rsp),$iv1 1138 paddd 0x10(%rsp),$iv1
1136 aesenc $rndkey1,$inout2 1139 aesenc $rndkey1,$inout2
1137 movdqa $iv0,$reserved(%rsp) # save counter vector 1140 movdqa $iv0,0x00(%rsp) # save counter vector
1138 aesenc $rndkey1,$inout3 1141 aesenc $rndkey1,$inout3
1139 movdqa $iv1,`$reserved+0x10`(%rsp) 1142 movdqa $iv1,0x10(%rsp)
1140 aesenc $rndkey1,$inout4 1143 aesenc $rndkey1,$inout4
1141 pshufb $bswap_mask,$iv0 # byte swap 1144 pshufb $bswap_mask,$iv0 # byte swap
1142 aesenc $rndkey1,$inout5 1145 aesenc $rndkey1,$inout5
@@ -1279,10 +1282,11 @@ $code.=<<___ if ($win64);
1279 movaps 0x90(%rsp),%xmm13 1282 movaps 0x90(%rsp),%xmm13
1280 movaps 0xa0(%rsp),%xmm14 1283 movaps 0xa0(%rsp),%xmm14
1281 movaps 0xb0(%rsp),%xmm15 1284 movaps 0xb0(%rsp),%xmm15
1282 lea 0xc8(%rsp),%rsp
1283.Lctr32_ret:
1284___ 1285___
1285$code.=<<___; 1286$code.=<<___;
1287 lea (%rbp),%rsp
1288 pop %rbp
1289.Lctr32_ret:
1286 ret 1290 ret
1287.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1291.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1288___ 1292___
@@ -1297,14 +1301,16 @@ ___
1297my @tweak=map("%xmm$_",(10..15)); 1301my @tweak=map("%xmm$_",(10..15));
1298my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); 1302my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
1299my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); 1303my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
1300my $frame_size = 0x68 + ($win64?160:0); 1304my $frame_size = 0x60 + ($win64?160:0);
1301 1305
1302$code.=<<___; 1306$code.=<<___;
1303.globl aesni_xts_encrypt 1307.globl aesni_xts_encrypt
1304.type aesni_xts_encrypt,\@function,6 1308.type aesni_xts_encrypt,\@function,6
1305.align 16 1309.align 16
1306aesni_xts_encrypt: 1310aesni_xts_encrypt:
1307 lea -$frame_size(%rsp),%rsp 1311 lea (%rsp),%rax
1312 push %rbp
1313 sub \$$frame_size,%rsp
1308___ 1314___
1309$code.=<<___ if ($win64); 1315$code.=<<___ if ($win64);
1310 movaps %xmm6,0x60(%rsp) 1316 movaps %xmm6,0x60(%rsp)
@@ -1320,6 +1326,7 @@ $code.=<<___ if ($win64);
1320.Lxts_enc_body: 1326.Lxts_enc_body:
1321___ 1327___
1322$code.=<<___; 1328$code.=<<___;
1329 lea -8(%rax),%rbp
1323 movups ($ivp),@tweak[5] # load clear-text tweak 1330 movups ($ivp),@tweak[5] # load clear-text tweak
1324 mov 240(%r8),$rounds # key2->rounds 1331 mov 240(%r8),$rounds # key2->rounds
1325 mov 240($key),$rnds_ # key1->rounds 1332 mov 240($key),$rnds_ # key1->rounds
@@ -1683,7 +1690,8 @@ $code.=<<___ if ($win64);
1683 movaps 0xf0(%rsp),%xmm15 1690 movaps 0xf0(%rsp),%xmm15
1684___ 1691___
1685$code.=<<___; 1692$code.=<<___;
1686 lea $frame_size(%rsp),%rsp 1693 lea (%rbp),%rsp
1694 pop %rbp
1687.Lxts_enc_epilogue: 1695.Lxts_enc_epilogue:
1688 ret 1696 ret
1689.size aesni_xts_encrypt,.-aesni_xts_encrypt 1697.size aesni_xts_encrypt,.-aesni_xts_encrypt
@@ -1694,7 +1702,9 @@ $code.=<<___;
1694.type aesni_xts_decrypt,\@function,6 1702.type aesni_xts_decrypt,\@function,6
1695.align 16 1703.align 16
1696aesni_xts_decrypt: 1704aesni_xts_decrypt:
1697 lea -$frame_size(%rsp),%rsp 1705 lea (%rsp),%rax
1706 push %rbp
1707 sub \$$frame_size,%rsp
1698___ 1708___
1699$code.=<<___ if ($win64); 1709$code.=<<___ if ($win64);
1700 movaps %xmm6,0x60(%rsp) 1710 movaps %xmm6,0x60(%rsp)
@@ -1710,6 +1720,7 @@ $code.=<<___ if ($win64);
1710.Lxts_dec_body: 1720.Lxts_dec_body:
1711___ 1721___
1712$code.=<<___; 1722$code.=<<___;
1723 lea -8(%rax),%rbp
1713 movups ($ivp),@tweak[5] # load clear-text tweak 1724 movups ($ivp),@tweak[5] # load clear-text tweak
1714 mov 240($key2),$rounds # key2->rounds 1725 mov 240($key2),$rounds # key2->rounds
1715 mov 240($key),$rnds_ # key1->rounds 1726 mov 240($key),$rnds_ # key1->rounds
@@ -2109,7 +2120,8 @@ $code.=<<___ if ($win64);
2109 movaps 0xf0(%rsp),%xmm15 2120 movaps 0xf0(%rsp),%xmm15
2110___ 2121___
2111$code.=<<___; 2122$code.=<<___;
2112 lea $frame_size(%rsp),%rsp 2123 lea (%rbp),%rsp
2124 pop %rbp
2113.Lxts_dec_epilogue: 2125.Lxts_dec_epilogue:
2114 ret 2126 ret
2115.size aesni_xts_decrypt,.-aesni_xts_decrypt 2127.size aesni_xts_decrypt,.-aesni_xts_decrypt
@@ -2121,7 +2133,7 @@ ___
2121# size_t length, const AES_KEY *key, 2133# size_t length, const AES_KEY *key,
2122# unsigned char *ivp,const int enc); 2134# unsigned char *ivp,const int enc);
2123{ 2135{
2124my $reserved = $win64?0x40:-0x18; # used in decrypt 2136my $frame_size = 0x10 + ($win64?0x40:0); # used in decrypt
2125$code.=<<___; 2137$code.=<<___;
2126.globl ${PREFIX}_cbc_encrypt 2138.globl ${PREFIX}_cbc_encrypt
2127.type ${PREFIX}_cbc_encrypt,\@function,6 2139.type ${PREFIX}_cbc_encrypt,\@function,6
@@ -2177,16 +2189,19 @@ $code.=<<___;
2177 #--------------------------- CBC DECRYPT ------------------------------# 2189 #--------------------------- CBC DECRYPT ------------------------------#
2178.align 16 2190.align 16
2179.Lcbc_decrypt: 2191.Lcbc_decrypt:
2192 lea (%rsp),%rax
2193 push %rbp
2194 sub \$$frame_size,%rsp
2180___ 2195___
2181$code.=<<___ if ($win64); 2196$code.=<<___ if ($win64);
2182 lea -0x58(%rsp),%rsp 2197 movaps %xmm6,0x10(%rsp)
2183 movaps %xmm6,(%rsp) 2198 movaps %xmm7,0x20(%rsp)
2184 movaps %xmm7,0x10(%rsp) 2199 movaps %xmm8,0x30(%rsp)
2185 movaps %xmm8,0x20(%rsp) 2200 movaps %xmm9,0x40(%rsp)
2186 movaps %xmm9,0x30(%rsp)
2187.Lcbc_decrypt_body: 2201.Lcbc_decrypt_body:
2188___ 2202___
2189$code.=<<___; 2203$code.=<<___;
2204 lea -8(%rax),%rbp
2190 movups ($ivp),$iv 2205 movups ($ivp),$iv
2191 mov $rnds_,$rounds 2206 mov $rnds_,$rounds
2192 cmp \$0x70,$len 2207 cmp \$0x70,$len
@@ -2194,11 +2209,11 @@ $code.=<<___;
2194 shr \$1,$rnds_ 2209 shr \$1,$rnds_
2195 sub \$0x70,$len 2210 sub \$0x70,$len
2196 mov $rnds_,$rounds 2211 mov $rnds_,$rounds
2197 movaps $iv,$reserved(%rsp) 2212 movaps $iv,(%rsp)
2198 jmp .Lcbc_dec_loop8_enter 2213 jmp .Lcbc_dec_loop8_enter
2199.align 16 2214.align 16
2200.Lcbc_dec_loop8: 2215.Lcbc_dec_loop8:
2201 movaps $rndkey0,$reserved(%rsp) # save IV 2216 movaps $rndkey0,(%rsp) # save IV
2202 movups $inout7,($out) 2217 movups $inout7,($out)
2203 lea 0x10($out),$out 2218 lea 0x10($out),$out
2204.Lcbc_dec_loop8_enter: 2219.Lcbc_dec_loop8_enter:
@@ -2238,7 +2253,7 @@ $code.=<<___;
2238 2253
2239 movups ($inp),$rndkey1 # re-load input 2254 movups ($inp),$rndkey1 # re-load input
2240 movups 0x10($inp),$rndkey0 2255 movups 0x10($inp),$rndkey0
2241 xorps $reserved(%rsp),$inout0 # ^= IV 2256 xorps (%rsp),$inout0 # ^= IV
2242 xorps $rndkey1,$inout1 2257 xorps $rndkey1,$inout1
2243 movups 0x20($inp),$rndkey1 2258 movups 0x20($inp),$rndkey1
2244 xorps $rndkey0,$inout2 2259 xorps $rndkey0,$inout2
@@ -2302,11 +2317,11 @@ $code.=<<___;
2302 jbe .Lcbc_dec_six 2317 jbe .Lcbc_dec_six
2303 2318
2304 movups 0x60($inp),$inout6 2319 movups 0x60($inp),$inout6
2305 movaps $iv,$reserved(%rsp) # save IV 2320 movaps $iv,(%rsp) # save IV
2306 call _aesni_decrypt8 2321 call _aesni_decrypt8
2307 movups ($inp),$rndkey1 2322 movups ($inp),$rndkey1
2308 movups 0x10($inp),$rndkey0 2323 movups 0x10($inp),$rndkey0
2309 xorps $reserved(%rsp),$inout0 # ^= IV 2324 xorps (%rsp),$inout0 # ^= IV
2310 xorps $rndkey1,$inout1 2325 xorps $rndkey1,$inout1
2311 movups 0x20($inp),$rndkey1 2326 movups 0x20($inp),$rndkey1
2312 xorps $rndkey0,$inout2 2327 xorps $rndkey0,$inout2
@@ -2430,23 +2445,24 @@ $code.=<<___;
2430 jmp .Lcbc_dec_ret 2445 jmp .Lcbc_dec_ret
2431.align 16 2446.align 16
2432.Lcbc_dec_tail_partial: 2447.Lcbc_dec_tail_partial:
2433 movaps $inout0,$reserved(%rsp) 2448 movaps $inout0,(%rsp)
2434 mov \$16,%rcx 2449 mov \$16,%rcx
2435 mov $out,%rdi 2450 mov $out,%rdi
2436 sub $len,%rcx 2451 sub $len,%rcx
2437 lea $reserved(%rsp),%rsi 2452 lea (%rsp),%rsi
2438 .long 0x9066A4F3 # rep movsb 2453 .long 0x9066A4F3 # rep movsb
2439 2454
2440.Lcbc_dec_ret: 2455.Lcbc_dec_ret:
2441___ 2456___
2442$code.=<<___ if ($win64); 2457$code.=<<___ if ($win64);
2443 movaps (%rsp),%xmm6 2458 movaps 0x10(%rsp),%xmm6
2444 movaps 0x10(%rsp),%xmm7 2459 movaps 0x20(%rsp),%xmm7
2445 movaps 0x20(%rsp),%xmm8 2460 movaps 0x30(%rsp),%xmm8
2446 movaps 0x30(%rsp),%xmm9 2461 movaps 0x40(%rsp),%xmm9
2447 lea 0x58(%rsp),%rsp
2448___ 2462___
2449$code.=<<___; 2463$code.=<<___;
2464 lea (%rbp),%rsp
2465 pop %rbp
2450.Lcbc_ret: 2466.Lcbc_ret:
2451 ret 2467 ret
2452.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt 2468.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
@@ -2821,9 +2837,8 @@ ctr32_se_handler:
2821 lea 512($context),%rdi # &context.Xmm6 2837 lea 512($context),%rdi # &context.Xmm6
2822 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2838 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2823 .long 0xa548f3fc # cld; rep movsq 2839 .long 0xa548f3fc # cld; rep movsq
2824 lea 0xc8(%rax),%rax # adjust stack pointer
2825 2840
2826 jmp .Lcommon_seh_tail 2841 jmp .Lcommon_rbp_tail
2827.size ctr32_se_handler,.-ctr32_se_handler 2842.size ctr32_se_handler,.-ctr32_se_handler
2828 2843
2829.type xts_se_handler,\@abi-omnipotent 2844.type xts_se_handler,\@abi-omnipotent
@@ -2862,9 +2877,8 @@ xts_se_handler:
2862 lea 512($context),%rdi # & context.Xmm6 2877 lea 512($context),%rdi # & context.Xmm6
2863 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2878 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2864 .long 0xa548f3fc # cld; rep movsq 2879 .long 0xa548f3fc # cld; rep movsq
2865 lea 0x68+160(%rax),%rax # adjust stack pointer
2866 2880
2867 jmp .Lcommon_seh_tail 2881 jmp .Lcommon_rbp_tail
2868.size xts_se_handler,.-xts_se_handler 2882.size xts_se_handler,.-xts_se_handler
2869___ 2883___
2870$code.=<<___; 2884$code.=<<___;
@@ -2897,11 +2911,16 @@ cbc_se_handler:
2897 cmp %r10,%rbx # context->Rip>="epilogue" label 2911 cmp %r10,%rbx # context->Rip>="epilogue" label
2898 jae .Lcommon_seh_tail 2912 jae .Lcommon_seh_tail
2899 2913
2900 lea 0(%rax),%rsi # top of stack 2914 lea 16(%rax),%rsi # %xmm save area
2901 lea 512($context),%rdi # &context.Xmm6 2915 lea 512($context),%rdi # &context.Xmm6
2902 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) 2916 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
2903 .long 0xa548f3fc # cld; rep movsq 2917 .long 0xa548f3fc # cld; rep movsq
2904 lea 0x58(%rax),%rax # adjust stack pointer 2918
2919.Lcommon_rbp_tail:
2920 mov 160($context),%rax # pull context->Rbp
2921 mov (%rax),%rbp # restore saved %rbp
2922 lea 8(%rax),%rax # adjust stack pointer
2923 mov %rbp,160($context) # restore context->Rbp
2905 jmp .Lcommon_seh_tail 2924 jmp .Lcommon_seh_tail
2906 2925
2907.Lrestore_cbc_rax: 2926.Lrestore_cbc_rax: