diff options
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aesni-x86_64.pl')
-rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86_64.pl | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl index f0b30109ae..a849073728 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl | |||
@@ -52,7 +52,7 @@ | |||
52 | # nothing one can do and the result appears optimal. CCM result is | 52 | # nothing one can do and the result appears optimal. CCM result is |
53 | # identical to CBC, because CBC-MAC is essentially CBC encrypt without | 53 | # identical to CBC, because CBC-MAC is essentially CBC encrypt without |
54 | # saving output. CCM CTR "stays invisible," because it's neatly | 54 | # saving output. CCM CTR "stays invisible," because it's neatly |
55 | # interleaved wih CBC-MAC. This provides ~30% improvement over | 55 | # interleaved with CBC-MAC. This provides ~30% improvement over |
56 | # "straghtforward" CCM implementation with CTR and CBC-MAC performed | 56 | # "straghtforward" CCM implementation with CTR and CBC-MAC performed |
57 | # disjointly. Parallelizable modes practically achieve the theoretical | 57 | # disjointly. Parallelizable modes practically achieve the theoretical |
58 | # limit. | 58 | # limit. |
@@ -136,7 +136,7 @@ | |||
136 | # asymptotic, if it can be surpassed, isn't it? What happens there? | 136 | # asymptotic, if it can be surpassed, isn't it? What happens there? |
137 | # Rewind to CBC paragraph for the answer. Yes, out-of-order execution | 137 | # Rewind to CBC paragraph for the answer. Yes, out-of-order execution |
138 | # magic is responsible for this. Processor overlaps not only the | 138 | # magic is responsible for this. Processor overlaps not only the |
139 | # additional instructions with AES ones, but even AES instuctions | 139 | # additional instructions with AES ones, but even AES instructions |
140 | # processing adjacent triplets of independent blocks. In the 6x case | 140 | # processing adjacent triplets of independent blocks. In the 6x case |
141 | # additional instructions still claim disproportionally small amount | 141 | # additional instructions still claim disproportionally small amount |
142 | # of additional cycles, but in 8x case number of instructions must be | 142 | # of additional cycles, but in 8x case number of instructions must be |
@@ -1350,7 +1350,7 @@ ___ | |||
1350 | movdqa @tweak[5],@tweak[$i] | 1350 | movdqa @tweak[5],@tweak[$i] |
1351 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak | 1351 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak |
1352 | pand $twmask,$twres # isolate carry and residue | 1352 | pand $twmask,$twres # isolate carry and residue |
1353 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1353 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1354 | pxor $twres,@tweak[5] | 1354 | pxor $twres,@tweak[5] |
1355 | ___ | 1355 | ___ |
1356 | } | 1356 | } |
@@ -1456,7 +1456,7 @@ $code.=<<___; | |||
1456 | aesenc $rndkey0,$inout0 | 1456 | aesenc $rndkey0,$inout0 |
1457 | pand $twmask,$twres # isolate carry and residue | 1457 | pand $twmask,$twres # isolate carry and residue |
1458 | aesenc $rndkey0,$inout1 | 1458 | aesenc $rndkey0,$inout1 |
1459 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1459 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1460 | aesenc $rndkey0,$inout2 | 1460 | aesenc $rndkey0,$inout2 |
1461 | pxor $twres,@tweak[5] | 1461 | pxor $twres,@tweak[5] |
1462 | aesenc $rndkey0,$inout3 | 1462 | aesenc $rndkey0,$inout3 |
@@ -1471,7 +1471,7 @@ $code.=<<___; | |||
1471 | aesenc $rndkey1,$inout0 | 1471 | aesenc $rndkey1,$inout0 |
1472 | pand $twmask,$twres # isolate carry and residue | 1472 | pand $twmask,$twres # isolate carry and residue |
1473 | aesenc $rndkey1,$inout1 | 1473 | aesenc $rndkey1,$inout1 |
1474 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1474 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1475 | aesenc $rndkey1,$inout2 | 1475 | aesenc $rndkey1,$inout2 |
1476 | pxor $twres,@tweak[5] | 1476 | pxor $twres,@tweak[5] |
1477 | aesenc $rndkey1,$inout3 | 1477 | aesenc $rndkey1,$inout3 |
@@ -1485,7 +1485,7 @@ $code.=<<___; | |||
1485 | aesenclast $rndkey0,$inout0 | 1485 | aesenclast $rndkey0,$inout0 |
1486 | pand $twmask,$twres # isolate carry and residue | 1486 | pand $twmask,$twres # isolate carry and residue |
1487 | aesenclast $rndkey0,$inout1 | 1487 | aesenclast $rndkey0,$inout1 |
1488 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1488 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1489 | aesenclast $rndkey0,$inout2 | 1489 | aesenclast $rndkey0,$inout2 |
1490 | pxor $twres,@tweak[5] | 1490 | pxor $twres,@tweak[5] |
1491 | aesenclast $rndkey0,$inout3 | 1491 | aesenclast $rndkey0,$inout3 |
@@ -1499,7 +1499,7 @@ $code.=<<___; | |||
1499 | xorps `16*0`(%rsp),$inout0 # output^=tweak | 1499 | xorps `16*0`(%rsp),$inout0 # output^=tweak |
1500 | pand $twmask,$twres # isolate carry and residue | 1500 | pand $twmask,$twres # isolate carry and residue |
1501 | xorps `16*1`(%rsp),$inout1 | 1501 | xorps `16*1`(%rsp),$inout1 |
1502 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1502 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1503 | pxor $twres,@tweak[5] | 1503 | pxor $twres,@tweak[5] |
1504 | 1504 | ||
1505 | xorps `16*2`(%rsp),$inout2 | 1505 | xorps `16*2`(%rsp),$inout2 |
@@ -1750,7 +1750,7 @@ ___ | |||
1750 | movdqa @tweak[5],@tweak[$i] | 1750 | movdqa @tweak[5],@tweak[$i] |
1751 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak | 1751 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak |
1752 | pand $twmask,$twres # isolate carry and residue | 1752 | pand $twmask,$twres # isolate carry and residue |
1753 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1753 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1754 | pxor $twres,@tweak[5] | 1754 | pxor $twres,@tweak[5] |
1755 | ___ | 1755 | ___ |
1756 | } | 1756 | } |
@@ -1856,7 +1856,7 @@ $code.=<<___; | |||
1856 | aesdec $rndkey0,$inout0 | 1856 | aesdec $rndkey0,$inout0 |
1857 | pand $twmask,$twres # isolate carry and residue | 1857 | pand $twmask,$twres # isolate carry and residue |
1858 | aesdec $rndkey0,$inout1 | 1858 | aesdec $rndkey0,$inout1 |
1859 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1859 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1860 | aesdec $rndkey0,$inout2 | 1860 | aesdec $rndkey0,$inout2 |
1861 | pxor $twres,@tweak[5] | 1861 | pxor $twres,@tweak[5] |
1862 | aesdec $rndkey0,$inout3 | 1862 | aesdec $rndkey0,$inout3 |
@@ -1871,7 +1871,7 @@ $code.=<<___; | |||
1871 | aesdec $rndkey1,$inout0 | 1871 | aesdec $rndkey1,$inout0 |
1872 | pand $twmask,$twres # isolate carry and residue | 1872 | pand $twmask,$twres # isolate carry and residue |
1873 | aesdec $rndkey1,$inout1 | 1873 | aesdec $rndkey1,$inout1 |
1874 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1874 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1875 | aesdec $rndkey1,$inout2 | 1875 | aesdec $rndkey1,$inout2 |
1876 | pxor $twres,@tweak[5] | 1876 | pxor $twres,@tweak[5] |
1877 | aesdec $rndkey1,$inout3 | 1877 | aesdec $rndkey1,$inout3 |
@@ -1885,7 +1885,7 @@ $code.=<<___; | |||
1885 | aesdeclast $rndkey0,$inout0 | 1885 | aesdeclast $rndkey0,$inout0 |
1886 | pand $twmask,$twres # isolate carry and residue | 1886 | pand $twmask,$twres # isolate carry and residue |
1887 | aesdeclast $rndkey0,$inout1 | 1887 | aesdeclast $rndkey0,$inout1 |
1888 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1888 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1889 | aesdeclast $rndkey0,$inout2 | 1889 | aesdeclast $rndkey0,$inout2 |
1890 | pxor $twres,@tweak[5] | 1890 | pxor $twres,@tweak[5] |
1891 | aesdeclast $rndkey0,$inout3 | 1891 | aesdeclast $rndkey0,$inout3 |
@@ -1899,7 +1899,7 @@ $code.=<<___; | |||
1899 | xorps `16*0`(%rsp),$inout0 # output^=tweak | 1899 | xorps `16*0`(%rsp),$inout0 # output^=tweak |
1900 | pand $twmask,$twres # isolate carry and residue | 1900 | pand $twmask,$twres # isolate carry and residue |
1901 | xorps `16*1`(%rsp),$inout1 | 1901 | xorps `16*1`(%rsp),$inout1 |
1902 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1902 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
1903 | pxor $twres,@tweak[5] | 1903 | pxor $twres,@tweak[5] |
1904 | 1904 | ||
1905 | xorps `16*2`(%rsp),$inout2 | 1905 | xorps `16*2`(%rsp),$inout2 |
@@ -2520,7 +2520,7 @@ ___ | |||
2520 | # Vinodh Gopal <vinodh.gopal@intel.com> | 2520 | # Vinodh Gopal <vinodh.gopal@intel.com> |
2521 | # Kahraman Akdemir | 2521 | # Kahraman Akdemir |
2522 | # | 2522 | # |
2523 | # Agressively optimized in respect to aeskeygenassist's critical path | 2523 | # Aggressively optimized in respect to aeskeygenassist's critical path |
2524 | # and is contained in %xmm0-5 to meet Win64 ABI requirement. | 2524 | # and is contained in %xmm0-5 to meet Win64 ABI requirement. |
2525 | # | 2525 | # |
2526 | $code.=<<___; | 2526 | $code.=<<___; |
@@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key: | |||
2602 | 2602 | ||
2603 | .align 16 | 2603 | .align 16 |
2604 | .L14rounds: | 2604 | .L14rounds: |
2605 | movups 16($inp),%xmm2 # remaning half of *userKey | 2605 | movups 16($inp),%xmm2 # remaining half of *userKey |
2606 | mov \$13,$bits # 14 rounds for 256 | 2606 | mov \$13,$bits # 14 rounds for 256 |
2607 | lea 16(%rax),%rax | 2607 | lea 16(%rax),%rax |
2608 | $movkey %xmm0,($key) # round 0 | 2608 | $movkey %xmm0,($key) # round 0 |
@@ -2862,7 +2862,7 @@ xts_se_handler: | |||
2862 | mov 56($disp),%r11 # disp->HandlerData | 2862 | mov 56($disp),%r11 # disp->HandlerData |
2863 | 2863 | ||
2864 | mov 0(%r11),%r10d # HandlerData[0] | 2864 | mov 0(%r11),%r10d # HandlerData[0] |
2865 | lea (%rsi,%r10),%r10 # prologue lable | 2865 | lea (%rsi,%r10),%r10 # prologue label |
2866 | cmp %r10,%rbx # context->Rip<prologue label | 2866 | cmp %r10,%rbx # context->Rip<prologue label |
2867 | jb .Lcommon_seh_tail | 2867 | jb .Lcommon_seh_tail |
2868 | 2868 | ||