summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2022-07-30 13:51:31 +0000
committerjsing <>2022-07-30 13:51:31 +0000
commitb501550b32ce2e071cf3b7543b3fbea7ec536a11 (patch)
tree3ba97f39872a992e0d8d5296e1139550cece1379
parent1cf7b34b847eab95bca1e8ca960305109169bf2d (diff)
downloadopenbsd-b501550b32ce2e071cf3b7543b3fbea7ec536a11.tar.gz
openbsd-b501550b32ce2e071cf3b7543b3fbea7ec536a11.tar.bz2
openbsd-b501550b32ce2e071cf3b7543b3fbea7ec536a11.zip
Add stack frames to AES-NI x86_64 assembly.
The current AES-NI x86_64 assembly does some strange, although valid things, such as making internal function calls without creating stack frames. In this case, the return address lands in the red zone (which it allows for when making use of the stack) and everything works as expected. However, this trips a false positive in valgrind, which seems to think that any data saved on the stack prior to the internal function call is now "undefined" once the function returns. Avoid this by actually using stack frames - this brings in most of 6a40ebe86b4 from OpenSSL, omitting the unnecessary explicit stack alignment (which was apparently added so this code could be used in the Linux kernel with an incorrectly aligned stack). Valgrind issue reported by Steffen Jaeckel (@sjaeckel), found via libstrophe unit tests. ok tb@
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-x86_64.pl97
1 files changed, 58 insertions, 39 deletions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
index c073667fcb..f0b30109ae 100644
--- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -1007,7 +1007,7 @@ ___
1007# does not update *ivec! (see engine/eng_aesni.c for details) 1007# does not update *ivec! (see engine/eng_aesni.c for details)
1008# 1008#
1009{ 1009{
1010my $reserved = $win64?0:-0x28; 1010my $frame_size = 0x20+($win64?160:0);
1011my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); 1011my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
1012my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); 1012my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
1013my $bswap_mask="%xmm15"; 1013my $bswap_mask="%xmm15";
@@ -1017,9 +1017,11 @@ $code.=<<___;
1017.type aesni_ctr32_encrypt_blocks,\@function,5 1017.type aesni_ctr32_encrypt_blocks,\@function,5
1018.align 16 1018.align 16
1019aesni_ctr32_encrypt_blocks: 1019aesni_ctr32_encrypt_blocks:
1020 lea (%rsp),%rax
1021 push %rbp
1022 sub \$$frame_size,%rsp
1020___ 1023___
1021$code.=<<___ if ($win64); 1024$code.=<<___ if ($win64);
1022 lea -0xc8(%rsp),%rsp
1023 movaps %xmm6,0x20(%rsp) 1025 movaps %xmm6,0x20(%rsp)
1024 movaps %xmm7,0x30(%rsp) 1026 movaps %xmm7,0x30(%rsp)
1025 movaps %xmm8,0x40(%rsp) 1027 movaps %xmm8,0x40(%rsp)
@@ -1033,6 +1035,7 @@ $code.=<<___ if ($win64);
1033.Lctr32_body: 1035.Lctr32_body:
1034___ 1036___
1035$code.=<<___; 1037$code.=<<___;
1038 lea -8(%rax),%rbp
1036 cmp \$1,$len 1039 cmp \$1,$len
1037 je .Lctr32_one_shortcut 1040 je .Lctr32_one_shortcut
1038 1041
@@ -1057,9 +1060,9 @@ $code.=<<___;
1057 pinsrd \$2,$rnds_,$iv0 1060 pinsrd \$2,$rnds_,$iv0
1058 inc $key_ 1061 inc $key_
1059 pinsrd \$2,$key_,$iv1 1062 pinsrd \$2,$key_,$iv1
1060 movdqa $iv0,$reserved(%rsp) 1063 movdqa $iv0,0x00(%rsp)
1061 pshufb $bswap_mask,$iv0 1064 pshufb $bswap_mask,$iv0
1062 movdqa $iv1,`$reserved+0x10`(%rsp) 1065 movdqa $iv1,0x10(%rsp)
1063 pshufb $bswap_mask,$iv1 1066 pshufb $bswap_mask,$iv1
1064 1067
1065 pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword 1068 pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword
@@ -1099,7 +1102,7 @@ $code.=<<___;
1099 movdqa .Lincrement32(%rip),$iv1 1102 movdqa .Lincrement32(%rip),$iv1
1100 pxor $rndkey0,$inout3 1103 pxor $rndkey0,$inout3
1101 aesenc $rndkey1,$inout2 1104 aesenc $rndkey1,$inout2
1102 movdqa $reserved(%rsp),$iv0 1105 movdqa (%rsp),$iv0
1103 pxor $rndkey0,$inout4 1106 pxor $rndkey0,$inout4
1104 aesenc $rndkey1,$inout3 1107 aesenc $rndkey1,$inout3
1105 pxor $rndkey0,$inout5 1108 pxor $rndkey0,$inout5
@@ -1132,11 +1135,11 @@ $code.=<<___;
1132 aesenc $rndkey1,$inout0 1135 aesenc $rndkey1,$inout0
1133 paddd $iv1,$iv0 # increment counter vector 1136 paddd $iv1,$iv0 # increment counter vector
1134 aesenc $rndkey1,$inout1 1137 aesenc $rndkey1,$inout1
1135 paddd `$reserved+0x10`(%rsp),$iv1 1138 paddd 0x10(%rsp),$iv1
1136 aesenc $rndkey1,$inout2 1139 aesenc $rndkey1,$inout2
1137 movdqa $iv0,$reserved(%rsp) # save counter vector 1140 movdqa $iv0,0x00(%rsp) # save counter vector
1138 aesenc $rndkey1,$inout3 1141 aesenc $rndkey1,$inout3
1139 movdqa $iv1,`$reserved+0x10`(%rsp) 1142 movdqa $iv1,0x10(%rsp)
1140 aesenc $rndkey1,$inout4 1143 aesenc $rndkey1,$inout4
1141 pshufb $bswap_mask,$iv0 # byte swap 1144 pshufb $bswap_mask,$iv0 # byte swap
1142 aesenc $rndkey1,$inout5 1145 aesenc $rndkey1,$inout5
@@ -1279,10 +1282,11 @@ $code.=<<___ if ($win64);
1279 movaps 0x90(%rsp),%xmm13 1282 movaps 0x90(%rsp),%xmm13
1280 movaps 0xa0(%rsp),%xmm14 1283 movaps 0xa0(%rsp),%xmm14
1281 movaps 0xb0(%rsp),%xmm15 1284 movaps 0xb0(%rsp),%xmm15
1282 lea 0xc8(%rsp),%rsp
1283.Lctr32_ret:
1284___ 1285___
1285$code.=<<___; 1286$code.=<<___;
1287 lea (%rbp),%rsp
1288 pop %rbp
1289.Lctr32_ret:
1286 ret 1290 ret
1287.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1291.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1288___ 1292___
@@ -1297,14 +1301,16 @@ ___
1297my @tweak=map("%xmm$_",(10..15)); 1301my @tweak=map("%xmm$_",(10..15));
1298my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); 1302my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
1299my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); 1303my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
1300my $frame_size = 0x68 + ($win64?160:0); 1304my $frame_size = 0x60 + ($win64?160:0);
1301 1305
1302$code.=<<___; 1306$code.=<<___;
1303.globl aesni_xts_encrypt 1307.globl aesni_xts_encrypt
1304.type aesni_xts_encrypt,\@function,6 1308.type aesni_xts_encrypt,\@function,6
1305.align 16 1309.align 16
1306aesni_xts_encrypt: 1310aesni_xts_encrypt:
1307 lea -$frame_size(%rsp),%rsp 1311 lea (%rsp),%rax
1312 push %rbp
1313 sub \$$frame_size,%rsp
1308___ 1314___
1309$code.=<<___ if ($win64); 1315$code.=<<___ if ($win64);
1310 movaps %xmm6,0x60(%rsp) 1316 movaps %xmm6,0x60(%rsp)
@@ -1320,6 +1326,7 @@ $code.=<<___ if ($win64);
1320.Lxts_enc_body: 1326.Lxts_enc_body:
1321___ 1327___
1322$code.=<<___; 1328$code.=<<___;
1329 lea -8(%rax),%rbp
1323 movups ($ivp),@tweak[5] # load clear-text tweak 1330 movups ($ivp),@tweak[5] # load clear-text tweak
1324 mov 240(%r8),$rounds # key2->rounds 1331 mov 240(%r8),$rounds # key2->rounds
1325 mov 240($key),$rnds_ # key1->rounds 1332 mov 240($key),$rnds_ # key1->rounds
@@ -1683,7 +1690,8 @@ $code.=<<___ if ($win64);
1683 movaps 0xf0(%rsp),%xmm15 1690 movaps 0xf0(%rsp),%xmm15
1684___ 1691___
1685$code.=<<___; 1692$code.=<<___;
1686 lea $frame_size(%rsp),%rsp 1693 lea (%rbp),%rsp
1694 pop %rbp
1687.Lxts_enc_epilogue: 1695.Lxts_enc_epilogue:
1688 ret 1696 ret
1689.size aesni_xts_encrypt,.-aesni_xts_encrypt 1697.size aesni_xts_encrypt,.-aesni_xts_encrypt
@@ -1694,7 +1702,9 @@ $code.=<<___;
1694.type aesni_xts_decrypt,\@function,6 1702.type aesni_xts_decrypt,\@function,6
1695.align 16 1703.align 16
1696aesni_xts_decrypt: 1704aesni_xts_decrypt:
1697 lea -$frame_size(%rsp),%rsp 1705 lea (%rsp),%rax
1706 push %rbp
1707 sub \$$frame_size,%rsp
1698___ 1708___
1699$code.=<<___ if ($win64); 1709$code.=<<___ if ($win64);
1700 movaps %xmm6,0x60(%rsp) 1710 movaps %xmm6,0x60(%rsp)
@@ -1710,6 +1720,7 @@ $code.=<<___ if ($win64);
1710.Lxts_dec_body: 1720.Lxts_dec_body:
1711___ 1721___
1712$code.=<<___; 1722$code.=<<___;
1723 lea -8(%rax),%rbp
1713 movups ($ivp),@tweak[5] # load clear-text tweak 1724 movups ($ivp),@tweak[5] # load clear-text tweak
1714 mov 240($key2),$rounds # key2->rounds 1725 mov 240($key2),$rounds # key2->rounds
1715 mov 240($key),$rnds_ # key1->rounds 1726 mov 240($key),$rnds_ # key1->rounds
@@ -2109,7 +2120,8 @@ $code.=<<___ if ($win64);
2109 movaps 0xf0(%rsp),%xmm15 2120 movaps 0xf0(%rsp),%xmm15
2110___ 2121___
2111$code.=<<___; 2122$code.=<<___;
2112 lea $frame_size(%rsp),%rsp 2123 lea (%rbp),%rsp
2124 pop %rbp
2113.Lxts_dec_epilogue: 2125.Lxts_dec_epilogue:
2114 ret 2126 ret
2115.size aesni_xts_decrypt,.-aesni_xts_decrypt 2127.size aesni_xts_decrypt,.-aesni_xts_decrypt
@@ -2121,7 +2133,7 @@ ___
2121# size_t length, const AES_KEY *key, 2133# size_t length, const AES_KEY *key,
2122# unsigned char *ivp,const int enc); 2134# unsigned char *ivp,const int enc);
2123{ 2135{
2124my $reserved = $win64?0x40:-0x18; # used in decrypt 2136my $frame_size = 0x10 + ($win64?0x40:0); # used in decrypt
2125$code.=<<___; 2137$code.=<<___;
2126.globl ${PREFIX}_cbc_encrypt 2138.globl ${PREFIX}_cbc_encrypt
2127.type ${PREFIX}_cbc_encrypt,\@function,6 2139.type ${PREFIX}_cbc_encrypt,\@function,6
@@ -2177,16 +2189,19 @@ $code.=<<___;
2177 #--------------------------- CBC DECRYPT ------------------------------# 2189 #--------------------------- CBC DECRYPT ------------------------------#
2178.align 16 2190.align 16
2179.Lcbc_decrypt: 2191.Lcbc_decrypt:
2192 lea (%rsp),%rax
2193 push %rbp
2194 sub \$$frame_size,%rsp
2180___ 2195___
2181$code.=<<___ if ($win64); 2196$code.=<<___ if ($win64);
2182 lea -0x58(%rsp),%rsp 2197 movaps %xmm6,0x10(%rsp)
2183 movaps %xmm6,(%rsp) 2198 movaps %xmm7,0x20(%rsp)
2184 movaps %xmm7,0x10(%rsp) 2199 movaps %xmm8,0x30(%rsp)
2185 movaps %xmm8,0x20(%rsp) 2200 movaps %xmm9,0x40(%rsp)
2186 movaps %xmm9,0x30(%rsp)
2187.Lcbc_decrypt_body: 2201.Lcbc_decrypt_body:
2188___ 2202___
2189$code.=<<___; 2203$code.=<<___;
2204 lea -8(%rax),%rbp
2190 movups ($ivp),$iv 2205 movups ($ivp),$iv
2191 mov $rnds_,$rounds 2206 mov $rnds_,$rounds
2192 cmp \$0x70,$len 2207 cmp \$0x70,$len
@@ -2194,11 +2209,11 @@ $code.=<<___;
2194 shr \$1,$rnds_ 2209 shr \$1,$rnds_
2195 sub \$0x70,$len 2210 sub \$0x70,$len
2196 mov $rnds_,$rounds 2211 mov $rnds_,$rounds
2197 movaps $iv,$reserved(%rsp) 2212 movaps $iv,(%rsp)
2198 jmp .Lcbc_dec_loop8_enter 2213 jmp .Lcbc_dec_loop8_enter
2199.align 16 2214.align 16
2200.Lcbc_dec_loop8: 2215.Lcbc_dec_loop8:
2201 movaps $rndkey0,$reserved(%rsp) # save IV 2216 movaps $rndkey0,(%rsp) # save IV
2202 movups $inout7,($out) 2217 movups $inout7,($out)
2203 lea 0x10($out),$out 2218 lea 0x10($out),$out
2204.Lcbc_dec_loop8_enter: 2219.Lcbc_dec_loop8_enter:
@@ -2238,7 +2253,7 @@ $code.=<<___;
2238 2253
2239 movups ($inp),$rndkey1 # re-load input 2254 movups ($inp),$rndkey1 # re-load input
2240 movups 0x10($inp),$rndkey0 2255 movups 0x10($inp),$rndkey0
2241 xorps $reserved(%rsp),$inout0 # ^= IV 2256 xorps (%rsp),$inout0 # ^= IV
2242 xorps $rndkey1,$inout1 2257 xorps $rndkey1,$inout1
2243 movups 0x20($inp),$rndkey1 2258 movups 0x20($inp),$rndkey1
2244 xorps $rndkey0,$inout2 2259 xorps $rndkey0,$inout2
@@ -2302,11 +2317,11 @@ $code.=<<___;
2302 jbe .Lcbc_dec_six 2317 jbe .Lcbc_dec_six
2303 2318
2304 movups 0x60($inp),$inout6 2319 movups 0x60($inp),$inout6
2305 movaps $iv,$reserved(%rsp) # save IV 2320 movaps $iv,(%rsp) # save IV
2306 call _aesni_decrypt8 2321 call _aesni_decrypt8
2307 movups ($inp),$rndkey1 2322 movups ($inp),$rndkey1
2308 movups 0x10($inp),$rndkey0 2323 movups 0x10($inp),$rndkey0
2309 xorps $reserved(%rsp),$inout0 # ^= IV 2324 xorps (%rsp),$inout0 # ^= IV
2310 xorps $rndkey1,$inout1 2325 xorps $rndkey1,$inout1
2311 movups 0x20($inp),$rndkey1 2326 movups 0x20($inp),$rndkey1
2312 xorps $rndkey0,$inout2 2327 xorps $rndkey0,$inout2
@@ -2430,23 +2445,24 @@ $code.=<<___;
2430 jmp .Lcbc_dec_ret 2445 jmp .Lcbc_dec_ret
2431.align 16 2446.align 16
2432.Lcbc_dec_tail_partial: 2447.Lcbc_dec_tail_partial:
2433 movaps $inout0,$reserved(%rsp) 2448 movaps $inout0,(%rsp)
2434 mov \$16,%rcx 2449 mov \$16,%rcx
2435 mov $out,%rdi 2450 mov $out,%rdi
2436 sub $len,%rcx 2451 sub $len,%rcx
2437 lea $reserved(%rsp),%rsi 2452 lea (%rsp),%rsi
2438 .long 0x9066A4F3 # rep movsb 2453 .long 0x9066A4F3 # rep movsb
2439 2454
2440.Lcbc_dec_ret: 2455.Lcbc_dec_ret:
2441___ 2456___
2442$code.=<<___ if ($win64); 2457$code.=<<___ if ($win64);
2443 movaps (%rsp),%xmm6 2458 movaps 0x10(%rsp),%xmm6
2444 movaps 0x10(%rsp),%xmm7 2459 movaps 0x20(%rsp),%xmm7
2445 movaps 0x20(%rsp),%xmm8 2460 movaps 0x30(%rsp),%xmm8
2446 movaps 0x30(%rsp),%xmm9 2461 movaps 0x40(%rsp),%xmm9
2447 lea 0x58(%rsp),%rsp
2448___ 2462___
2449$code.=<<___; 2463$code.=<<___;
2464 lea (%rbp),%rsp
2465 pop %rbp
2450.Lcbc_ret: 2466.Lcbc_ret:
2451 ret 2467 ret
2452.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt 2468.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
@@ -2821,9 +2837,8 @@ ctr32_se_handler:
2821 lea 512($context),%rdi # &context.Xmm6 2837 lea 512($context),%rdi # &context.Xmm6
2822 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2838 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2823 .long 0xa548f3fc # cld; rep movsq 2839 .long 0xa548f3fc # cld; rep movsq
2824 lea 0xc8(%rax),%rax # adjust stack pointer
2825 2840
2826 jmp .Lcommon_seh_tail 2841 jmp .Lcommon_rbp_tail
2827.size ctr32_se_handler,.-ctr32_se_handler 2842.size ctr32_se_handler,.-ctr32_se_handler
2828 2843
2829.type xts_se_handler,\@abi-omnipotent 2844.type xts_se_handler,\@abi-omnipotent
@@ -2862,9 +2877,8 @@ xts_se_handler:
2862 lea 512($context),%rdi # & context.Xmm6 2877 lea 512($context),%rdi # & context.Xmm6
2863 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2878 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2864 .long 0xa548f3fc # cld; rep movsq 2879 .long 0xa548f3fc # cld; rep movsq
2865 lea 0x68+160(%rax),%rax # adjust stack pointer
2866 2880
2867 jmp .Lcommon_seh_tail 2881 jmp .Lcommon_rbp_tail
2868.size xts_se_handler,.-xts_se_handler 2882.size xts_se_handler,.-xts_se_handler
2869___ 2883___
2870$code.=<<___; 2884$code.=<<___;
@@ -2897,11 +2911,16 @@ cbc_se_handler:
2897 cmp %r10,%rbx # context->Rip>="epilogue" label 2911 cmp %r10,%rbx # context->Rip>="epilogue" label
2898 jae .Lcommon_seh_tail 2912 jae .Lcommon_seh_tail
2899 2913
2900 lea 0(%rax),%rsi # top of stack 2914 lea 16(%rax),%rsi # %xmm save area
2901 lea 512($context),%rdi # &context.Xmm6 2915 lea 512($context),%rdi # &context.Xmm6
2902 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) 2916 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
2903 .long 0xa548f3fc # cld; rep movsq 2917 .long 0xa548f3fc # cld; rep movsq
2904 lea 0x58(%rax),%rax # adjust stack pointer 2918
2919.Lcommon_rbp_tail:
2920 mov 160($context),%rax # pull context->Rbp
2921 mov (%rax),%rbp # restore saved %rbp
2922 lea 8(%rax),%rax # adjust stack pointer
2923 mov %rbp,160($context) # restore context->Rbp
2905 jmp .Lcommon_seh_tail 2924 jmp .Lcommon_seh_tail
2906 2925
2907.Lrestore_cbc_rax: 2926.Lrestore_cbc_rax: