summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aesni-x86_64.pl')
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-x86_64.pl30
1 files changed, 15 insertions, 15 deletions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
index f0b30109ae..a849073728 100644
--- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -52,7 +52,7 @@
52# nothing one can do and the result appears optimal. CCM result is 52# nothing one can do and the result appears optimal. CCM result is
53# identical to CBC, because CBC-MAC is essentially CBC encrypt without 53# identical to CBC, because CBC-MAC is essentially CBC encrypt without
54# saving output. CCM CTR "stays invisible," because it's neatly 54# saving output. CCM CTR "stays invisible," because it's neatly
55# interleaved wih CBC-MAC. This provides ~30% improvement over 55# interleaved with CBC-MAC. This provides ~30% improvement over
56# "straghtforward" CCM implementation with CTR and CBC-MAC performed 56# "straghtforward" CCM implementation with CTR and CBC-MAC performed
57# disjointly. Parallelizable modes practically achieve the theoretical 57# disjointly. Parallelizable modes practically achieve the theoretical
58# limit. 58# limit.
@@ -136,7 +136,7 @@
136# asymptotic, if it can be surpassed, isn't it? What happens there? 136# asymptotic, if it can be surpassed, isn't it? What happens there?
137# Rewind to CBC paragraph for the answer. Yes, out-of-order execution 137# Rewind to CBC paragraph for the answer. Yes, out-of-order execution
138# magic is responsible for this. Processor overlaps not only the 138# magic is responsible for this. Processor overlaps not only the
139# additional instructions with AES ones, but even AES instuctions 139# additional instructions with AES ones, but even AES instructions
140# processing adjacent triplets of independent blocks. In the 6x case 140# processing adjacent triplets of independent blocks. In the 6x case
141# additional instructions still claim disproportionally small amount 141# additional instructions still claim disproportionally small amount
142# of additional cycles, but in 8x case number of instructions must be 142# of additional cycles, but in 8x case number of instructions must be
@@ -1350,7 +1350,7 @@ ___
1350 movdqa @tweak[5],@tweak[$i] 1350 movdqa @tweak[5],@tweak[$i]
1351 paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1351 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1352 pand $twmask,$twres # isolate carry and residue 1352 pand $twmask,$twres # isolate carry and residue
1353 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1353 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1354 pxor $twres,@tweak[5] 1354 pxor $twres,@tweak[5]
1355___ 1355___
1356 } 1356 }
@@ -1456,7 +1456,7 @@ $code.=<<___;
1456 aesenc $rndkey0,$inout0 1456 aesenc $rndkey0,$inout0
1457 pand $twmask,$twres # isolate carry and residue 1457 pand $twmask,$twres # isolate carry and residue
1458 aesenc $rndkey0,$inout1 1458 aesenc $rndkey0,$inout1
1459 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1459 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1460 aesenc $rndkey0,$inout2 1460 aesenc $rndkey0,$inout2
1461 pxor $twres,@tweak[5] 1461 pxor $twres,@tweak[5]
1462 aesenc $rndkey0,$inout3 1462 aesenc $rndkey0,$inout3
@@ -1471,7 +1471,7 @@ $code.=<<___;
1471 aesenc $rndkey1,$inout0 1471 aesenc $rndkey1,$inout0
1472 pand $twmask,$twres # isolate carry and residue 1472 pand $twmask,$twres # isolate carry and residue
1473 aesenc $rndkey1,$inout1 1473 aesenc $rndkey1,$inout1
1474 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1474 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1475 aesenc $rndkey1,$inout2 1475 aesenc $rndkey1,$inout2
1476 pxor $twres,@tweak[5] 1476 pxor $twres,@tweak[5]
1477 aesenc $rndkey1,$inout3 1477 aesenc $rndkey1,$inout3
@@ -1485,7 +1485,7 @@ $code.=<<___;
1485 aesenclast $rndkey0,$inout0 1485 aesenclast $rndkey0,$inout0
1486 pand $twmask,$twres # isolate carry and residue 1486 pand $twmask,$twres # isolate carry and residue
1487 aesenclast $rndkey0,$inout1 1487 aesenclast $rndkey0,$inout1
1488 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1488 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1489 aesenclast $rndkey0,$inout2 1489 aesenclast $rndkey0,$inout2
1490 pxor $twres,@tweak[5] 1490 pxor $twres,@tweak[5]
1491 aesenclast $rndkey0,$inout3 1491 aesenclast $rndkey0,$inout3
@@ -1499,7 +1499,7 @@ $code.=<<___;
1499 xorps `16*0`(%rsp),$inout0 # output^=tweak 1499 xorps `16*0`(%rsp),$inout0 # output^=tweak
1500 pand $twmask,$twres # isolate carry and residue 1500 pand $twmask,$twres # isolate carry and residue
1501 xorps `16*1`(%rsp),$inout1 1501 xorps `16*1`(%rsp),$inout1
1502 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1502 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1503 pxor $twres,@tweak[5] 1503 pxor $twres,@tweak[5]
1504 1504
1505 xorps `16*2`(%rsp),$inout2 1505 xorps `16*2`(%rsp),$inout2
@@ -1750,7 +1750,7 @@ ___
1750 movdqa @tweak[5],@tweak[$i] 1750 movdqa @tweak[5],@tweak[$i]
1751 paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1751 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1752 pand $twmask,$twres # isolate carry and residue 1752 pand $twmask,$twres # isolate carry and residue
1753 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1753 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1754 pxor $twres,@tweak[5] 1754 pxor $twres,@tweak[5]
1755___ 1755___
1756 } 1756 }
@@ -1856,7 +1856,7 @@ $code.=<<___;
1856 aesdec $rndkey0,$inout0 1856 aesdec $rndkey0,$inout0
1857 pand $twmask,$twres # isolate carry and residue 1857 pand $twmask,$twres # isolate carry and residue
1858 aesdec $rndkey0,$inout1 1858 aesdec $rndkey0,$inout1
1859 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1859 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1860 aesdec $rndkey0,$inout2 1860 aesdec $rndkey0,$inout2
1861 pxor $twres,@tweak[5] 1861 pxor $twres,@tweak[5]
1862 aesdec $rndkey0,$inout3 1862 aesdec $rndkey0,$inout3
@@ -1871,7 +1871,7 @@ $code.=<<___;
1871 aesdec $rndkey1,$inout0 1871 aesdec $rndkey1,$inout0
1872 pand $twmask,$twres # isolate carry and residue 1872 pand $twmask,$twres # isolate carry and residue
1873 aesdec $rndkey1,$inout1 1873 aesdec $rndkey1,$inout1
1874 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1874 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1875 aesdec $rndkey1,$inout2 1875 aesdec $rndkey1,$inout2
1876 pxor $twres,@tweak[5] 1876 pxor $twres,@tweak[5]
1877 aesdec $rndkey1,$inout3 1877 aesdec $rndkey1,$inout3
@@ -1885,7 +1885,7 @@ $code.=<<___;
1885 aesdeclast $rndkey0,$inout0 1885 aesdeclast $rndkey0,$inout0
1886 pand $twmask,$twres # isolate carry and residue 1886 pand $twmask,$twres # isolate carry and residue
1887 aesdeclast $rndkey0,$inout1 1887 aesdeclast $rndkey0,$inout1
1888 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1888 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1889 aesdeclast $rndkey0,$inout2 1889 aesdeclast $rndkey0,$inout2
1890 pxor $twres,@tweak[5] 1890 pxor $twres,@tweak[5]
1891 aesdeclast $rndkey0,$inout3 1891 aesdeclast $rndkey0,$inout3
@@ -1899,7 +1899,7 @@ $code.=<<___;
1899 xorps `16*0`(%rsp),$inout0 # output^=tweak 1899 xorps `16*0`(%rsp),$inout0 # output^=tweak
1900 pand $twmask,$twres # isolate carry and residue 1900 pand $twmask,$twres # isolate carry and residue
1901 xorps `16*1`(%rsp),$inout1 1901 xorps `16*1`(%rsp),$inout1
1902 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1902 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1903 pxor $twres,@tweak[5] 1903 pxor $twres,@tweak[5]
1904 1904
1905 xorps `16*2`(%rsp),$inout2 1905 xorps `16*2`(%rsp),$inout2
@@ -2520,7 +2520,7 @@ ___
2520# Vinodh Gopal <vinodh.gopal@intel.com> 2520# Vinodh Gopal <vinodh.gopal@intel.com>
2521# Kahraman Akdemir 2521# Kahraman Akdemir
2522# 2522#
2523# Agressively optimized in respect to aeskeygenassist's critical path 2523# Aggressively optimized in respect to aeskeygenassist's critical path
2524# and is contained in %xmm0-5 to meet Win64 ABI requirement. 2524# and is contained in %xmm0-5 to meet Win64 ABI requirement.
2525# 2525#
2526$code.=<<___; 2526$code.=<<___;
@@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key:
2602 2602
2603.align 16 2603.align 16
2604.L14rounds: 2604.L14rounds:
2605 movups 16($inp),%xmm2 # remaning half of *userKey 2605 movups 16($inp),%xmm2 # remaining half of *userKey
2606 mov \$13,$bits # 14 rounds for 256 2606 mov \$13,$bits # 14 rounds for 256
2607 lea 16(%rax),%rax 2607 lea 16(%rax),%rax
2608 $movkey %xmm0,($key) # round 0 2608 $movkey %xmm0,($key) # round 0
@@ -2862,7 +2862,7 @@ xts_se_handler:
2862 mov 56($disp),%r11 # disp->HandlerData 2862 mov 56($disp),%r11 # disp->HandlerData
2863 2863
2864 mov 0(%r11),%r10d # HandlerData[0] 2864 mov 0(%r11),%r10d # HandlerData[0]
2865 lea (%rsi,%r10),%r10 # prologue lable 2865 lea (%rsi,%r10),%r10 # prologue label
2866 cmp %r10,%rbx # context->Rip<prologue label 2866 cmp %r10,%rbx # context->Rip<prologue label
2867 jb .Lcommon_seh_tail 2867 jb .Lcommon_seh_tail
2868 2868