diff options
| author | jmc <> | 2022-12-26 07:18:53 +0000 |
|---|---|---|
| committer | jmc <> | 2022-12-26 07:18:53 +0000 |
| commit | 2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch) | |
| tree | 26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/aes | |
| parent | df59a12113ba6ec4c6faecd033d46176453f697e (diff) | |
| download | openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2 openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip | |
spelling fixes; from paul tagliamonte
i removed the arithmetics -> arithmetic changes, as i felt they
were not clearly correct
ok tb
Diffstat (limited to 'src/lib/libcrypto/aes')
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-586.pl | 6 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-mips.pl | 2 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-ppc.pl | 2 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | 4 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86_64.pl | 30 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | 2 |
6 files changed, 23 insertions, 23 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index 3ba8a26eaa..c5ae3f6903 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl | |||
| @@ -48,8 +48,8 @@ | |||
| 48 | # better performance on most recent µ-archs... | 48 | # better performance on most recent µ-archs... |
| 49 | # | 49 | # |
| 50 | # Third version adds AES_cbc_encrypt implementation, which resulted in | 50 | # Third version adds AES_cbc_encrypt implementation, which resulted in |
| 51 | # up to 40% performance imrovement of CBC benchmark results. 40% was | 51 | # up to 40% performance improvement of CBC benchmark results. 40% was |
| 52 | # observed on P4 core, where "overall" imrovement coefficient, i.e. if | 52 | # observed on P4 core, where "overall" improvement coefficient, i.e. if |
| 53 | # compared to PIC generated by GCC and in CBC mode, was observed to be | 53 | # compared to PIC generated by GCC and in CBC mode, was observed to be |
| 54 | # as large as 4x:-) CBC performance is virtually identical to ECB now | 54 | # as large as 4x:-) CBC performance is virtually identical to ECB now |
| 55 | # and on some platforms even better, e.g. 17.6 "small" cycles/byte on | 55 | # and on some platforms even better, e.g. 17.6 "small" cycles/byte on |
| @@ -228,7 +228,7 @@ $small_footprint=1; # $small_footprint=1 code is ~5% slower [on | |||
| 228 | # contention and in hope to "collect" 5% back | 228 | # contention and in hope to "collect" 5% back |
| 229 | # in real-life applications... | 229 | # in real-life applications... |
| 230 | 230 | ||
| 231 | $vertical_spin=0; # shift "verticaly" defaults to 0, because of | 231 | $vertical_spin=0; # shift "vertically" defaults to 0, because of |
| 232 | # its proof-of-concept status... | 232 | # its proof-of-concept status... |
| 233 | # Note that there is no decvert(), as well as last encryption round is | 233 | # Note that there is no decvert(), as well as last encryption round is |
| 234 | # performed with "horizontal" shifts. This is because this "vertical" | 234 | # performed with "horizontal" shifts. This is because this "vertical" |
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl index 2f6ff74ffe..b95d1afd5a 100644 --- a/src/lib/libcrypto/aes/asm/aes-mips.pl +++ b/src/lib/libcrypto/aes/asm/aes-mips.pl | |||
| @@ -106,7 +106,7 @@ my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); | |||
| 106 | my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); | 106 | my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); |
| 107 | my ($key0,$cnt)=($gp,$fp); | 107 | my ($key0,$cnt)=($gp,$fp); |
| 108 | 108 | ||
| 109 | # instuction ordering is "stolen" from output from MIPSpro assembler | 109 | # instruction ordering is "stolen" from output from MIPSpro assembler |
| 110 | # invoked with -mips3 -O3 arguments... | 110 | # invoked with -mips3 -O3 arguments... |
| 111 | $code.=<<___; | 111 | $code.=<<___; |
| 112 | .align 5 | 112 | .align 5 |
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl index 7c52cbe5f9..91a46f60ed 100644 --- a/src/lib/libcrypto/aes/asm/aes-ppc.pl +++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | # February 2010 | 19 | # February 2010 |
| 20 | # | 20 | # |
| 21 | # Rescheduling instructions to favour Power6 pipeline gave 10% | 21 | # Rescheduling instructions to favour Power6 pipeline gave 10% |
| 22 | # performance improvement on the platfrom in question (and marginal | 22 | # performance improvement on the platform in question (and marginal |
| 23 | # improvement even on others). It should be noted that Power6 fails | 23 | # improvement even on others). It should be noted that Power6 fails |
| 24 | # to process byte in 18 cycles, only in 23, because it fails to issue | 24 | # to process byte in 18 cycles, only in 23, because it fails to issue |
| 25 | # 4 load instructions in two cycles, only in 3. As result non-compact | 25 | # 4 load instructions in two cycles, only in 3. As result non-compact |
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index bc6c8f3fc0..880bcc2d58 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | |||
| @@ -250,7 +250,7 @@ ___ | |||
| 250 | $r++; unshift(@rndkey,pop(@rndkey)); | 250 | $r++; unshift(@rndkey,pop(@rndkey)); |
| 251 | }; | 251 | }; |
| 252 | 252 | ||
| 253 | sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 | 253 | sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4 |
| 254 | { use integer; | 254 | { use integer; |
| 255 | my $body = shift; | 255 | my $body = shift; |
| 256 | my @insns = (&$body,&$body,&$body,&$body); # 40 instructions | 256 | my @insns = (&$body,&$body,&$body,&$body); # 40 instructions |
| @@ -767,7 +767,7 @@ ___ | |||
| 767 | $r++; unshift(@rndkey,pop(@rndkey)); | 767 | $r++; unshift(@rndkey,pop(@rndkey)); |
| 768 | }; | 768 | }; |
| 769 | 769 | ||
| 770 | sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 | 770 | sub Xupdate_avx_16_31() # recall that $Xi starts with 4 |
| 771 | { use integer; | 771 | { use integer; |
| 772 | my $body = shift; | 772 | my $body = shift; |
| 773 | my @insns = (&$body,&$body,&$body,&$body); # 40 instructions | 773 | my @insns = (&$body,&$body,&$body,&$body); # 40 instructions |
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl index f0b30109ae..a849073728 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | # nothing one can do and the result appears optimal. CCM result is | 52 | # nothing one can do and the result appears optimal. CCM result is |
| 53 | # identical to CBC, because CBC-MAC is essentially CBC encrypt without | 53 | # identical to CBC, because CBC-MAC is essentially CBC encrypt without |
| 54 | # saving output. CCM CTR "stays invisible," because it's neatly | 54 | # saving output. CCM CTR "stays invisible," because it's neatly |
| 55 | # interleaved wih CBC-MAC. This provides ~30% improvement over | 55 | # interleaved with CBC-MAC. This provides ~30% improvement over |
| 56 | # "straghtforward" CCM implementation with CTR and CBC-MAC performed | 56 | # "straghtforward" CCM implementation with CTR and CBC-MAC performed |
| 57 | # disjointly. Parallelizable modes practically achieve the theoretical | 57 | # disjointly. Parallelizable modes practically achieve the theoretical |
| 58 | # limit. | 58 | # limit. |
| @@ -136,7 +136,7 @@ | |||
| 136 | # asymptotic, if it can be surpassed, isn't it? What happens there? | 136 | # asymptotic, if it can be surpassed, isn't it? What happens there? |
| 137 | # Rewind to CBC paragraph for the answer. Yes, out-of-order execution | 137 | # Rewind to CBC paragraph for the answer. Yes, out-of-order execution |
| 138 | # magic is responsible for this. Processor overlaps not only the | 138 | # magic is responsible for this. Processor overlaps not only the |
| 139 | # additional instructions with AES ones, but even AES instuctions | 139 | # additional instructions with AES ones, but even AES instructions |
| 140 | # processing adjacent triplets of independent blocks. In the 6x case | 140 | # processing adjacent triplets of independent blocks. In the 6x case |
| 141 | # additional instructions still claim disproportionally small amount | 141 | # additional instructions still claim disproportionally small amount |
| 142 | # of additional cycles, but in 8x case number of instructions must be | 142 | # of additional cycles, but in 8x case number of instructions must be |
| @@ -1350,7 +1350,7 @@ ___ | |||
| 1350 | movdqa @tweak[5],@tweak[$i] | 1350 | movdqa @tweak[5],@tweak[$i] |
| 1351 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak | 1351 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak |
| 1352 | pand $twmask,$twres # isolate carry and residue | 1352 | pand $twmask,$twres # isolate carry and residue |
| 1353 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1353 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1354 | pxor $twres,@tweak[5] | 1354 | pxor $twres,@tweak[5] |
| 1355 | ___ | 1355 | ___ |
| 1356 | } | 1356 | } |
| @@ -1456,7 +1456,7 @@ $code.=<<___; | |||
| 1456 | aesenc $rndkey0,$inout0 | 1456 | aesenc $rndkey0,$inout0 |
| 1457 | pand $twmask,$twres # isolate carry and residue | 1457 | pand $twmask,$twres # isolate carry and residue |
| 1458 | aesenc $rndkey0,$inout1 | 1458 | aesenc $rndkey0,$inout1 |
| 1459 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1459 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1460 | aesenc $rndkey0,$inout2 | 1460 | aesenc $rndkey0,$inout2 |
| 1461 | pxor $twres,@tweak[5] | 1461 | pxor $twres,@tweak[5] |
| 1462 | aesenc $rndkey0,$inout3 | 1462 | aesenc $rndkey0,$inout3 |
| @@ -1471,7 +1471,7 @@ $code.=<<___; | |||
| 1471 | aesenc $rndkey1,$inout0 | 1471 | aesenc $rndkey1,$inout0 |
| 1472 | pand $twmask,$twres # isolate carry and residue | 1472 | pand $twmask,$twres # isolate carry and residue |
| 1473 | aesenc $rndkey1,$inout1 | 1473 | aesenc $rndkey1,$inout1 |
| 1474 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1474 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1475 | aesenc $rndkey1,$inout2 | 1475 | aesenc $rndkey1,$inout2 |
| 1476 | pxor $twres,@tweak[5] | 1476 | pxor $twres,@tweak[5] |
| 1477 | aesenc $rndkey1,$inout3 | 1477 | aesenc $rndkey1,$inout3 |
| @@ -1485,7 +1485,7 @@ $code.=<<___; | |||
| 1485 | aesenclast $rndkey0,$inout0 | 1485 | aesenclast $rndkey0,$inout0 |
| 1486 | pand $twmask,$twres # isolate carry and residue | 1486 | pand $twmask,$twres # isolate carry and residue |
| 1487 | aesenclast $rndkey0,$inout1 | 1487 | aesenclast $rndkey0,$inout1 |
| 1488 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1488 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1489 | aesenclast $rndkey0,$inout2 | 1489 | aesenclast $rndkey0,$inout2 |
| 1490 | pxor $twres,@tweak[5] | 1490 | pxor $twres,@tweak[5] |
| 1491 | aesenclast $rndkey0,$inout3 | 1491 | aesenclast $rndkey0,$inout3 |
| @@ -1499,7 +1499,7 @@ $code.=<<___; | |||
| 1499 | xorps `16*0`(%rsp),$inout0 # output^=tweak | 1499 | xorps `16*0`(%rsp),$inout0 # output^=tweak |
| 1500 | pand $twmask,$twres # isolate carry and residue | 1500 | pand $twmask,$twres # isolate carry and residue |
| 1501 | xorps `16*1`(%rsp),$inout1 | 1501 | xorps `16*1`(%rsp),$inout1 |
| 1502 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1502 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1503 | pxor $twres,@tweak[5] | 1503 | pxor $twres,@tweak[5] |
| 1504 | 1504 | ||
| 1505 | xorps `16*2`(%rsp),$inout2 | 1505 | xorps `16*2`(%rsp),$inout2 |
| @@ -1750,7 +1750,7 @@ ___ | |||
| 1750 | movdqa @tweak[5],@tweak[$i] | 1750 | movdqa @tweak[5],@tweak[$i] |
| 1751 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak | 1751 | paddq @tweak[5],@tweak[5] # psllq 1,$tweak |
| 1752 | pand $twmask,$twres # isolate carry and residue | 1752 | pand $twmask,$twres # isolate carry and residue |
| 1753 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1753 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1754 | pxor $twres,@tweak[5] | 1754 | pxor $twres,@tweak[5] |
| 1755 | ___ | 1755 | ___ |
| 1756 | } | 1756 | } |
| @@ -1856,7 +1856,7 @@ $code.=<<___; | |||
| 1856 | aesdec $rndkey0,$inout0 | 1856 | aesdec $rndkey0,$inout0 |
| 1857 | pand $twmask,$twres # isolate carry and residue | 1857 | pand $twmask,$twres # isolate carry and residue |
| 1858 | aesdec $rndkey0,$inout1 | 1858 | aesdec $rndkey0,$inout1 |
| 1859 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1859 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1860 | aesdec $rndkey0,$inout2 | 1860 | aesdec $rndkey0,$inout2 |
| 1861 | pxor $twres,@tweak[5] | 1861 | pxor $twres,@tweak[5] |
| 1862 | aesdec $rndkey0,$inout3 | 1862 | aesdec $rndkey0,$inout3 |
| @@ -1871,7 +1871,7 @@ $code.=<<___; | |||
| 1871 | aesdec $rndkey1,$inout0 | 1871 | aesdec $rndkey1,$inout0 |
| 1872 | pand $twmask,$twres # isolate carry and residue | 1872 | pand $twmask,$twres # isolate carry and residue |
| 1873 | aesdec $rndkey1,$inout1 | 1873 | aesdec $rndkey1,$inout1 |
| 1874 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1874 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1875 | aesdec $rndkey1,$inout2 | 1875 | aesdec $rndkey1,$inout2 |
| 1876 | pxor $twres,@tweak[5] | 1876 | pxor $twres,@tweak[5] |
| 1877 | aesdec $rndkey1,$inout3 | 1877 | aesdec $rndkey1,$inout3 |
| @@ -1885,7 +1885,7 @@ $code.=<<___; | |||
| 1885 | aesdeclast $rndkey0,$inout0 | 1885 | aesdeclast $rndkey0,$inout0 |
| 1886 | pand $twmask,$twres # isolate carry and residue | 1886 | pand $twmask,$twres # isolate carry and residue |
| 1887 | aesdeclast $rndkey0,$inout1 | 1887 | aesdeclast $rndkey0,$inout1 |
| 1888 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1888 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1889 | aesdeclast $rndkey0,$inout2 | 1889 | aesdeclast $rndkey0,$inout2 |
| 1890 | pxor $twres,@tweak[5] | 1890 | pxor $twres,@tweak[5] |
| 1891 | aesdeclast $rndkey0,$inout3 | 1891 | aesdeclast $rndkey0,$inout3 |
| @@ -1899,7 +1899,7 @@ $code.=<<___; | |||
| 1899 | xorps `16*0`(%rsp),$inout0 # output^=tweak | 1899 | xorps `16*0`(%rsp),$inout0 # output^=tweak |
| 1900 | pand $twmask,$twres # isolate carry and residue | 1900 | pand $twmask,$twres # isolate carry and residue |
| 1901 | xorps `16*1`(%rsp),$inout1 | 1901 | xorps `16*1`(%rsp),$inout1 |
| 1902 | pcmpgtd @tweak[5],$twtmp # broadcat upper bits | 1902 | pcmpgtd @tweak[5],$twtmp # broadcast upper bits |
| 1903 | pxor $twres,@tweak[5] | 1903 | pxor $twres,@tweak[5] |
| 1904 | 1904 | ||
| 1905 | xorps `16*2`(%rsp),$inout2 | 1905 | xorps `16*2`(%rsp),$inout2 |
| @@ -2520,7 +2520,7 @@ ___ | |||
| 2520 | # Vinodh Gopal <vinodh.gopal@intel.com> | 2520 | # Vinodh Gopal <vinodh.gopal@intel.com> |
| 2521 | # Kahraman Akdemir | 2521 | # Kahraman Akdemir |
| 2522 | # | 2522 | # |
| 2523 | # Agressively optimized in respect to aeskeygenassist's critical path | 2523 | # Aggressively optimized in respect to aeskeygenassist's critical path |
| 2524 | # and is contained in %xmm0-5 to meet Win64 ABI requirement. | 2524 | # and is contained in %xmm0-5 to meet Win64 ABI requirement. |
| 2525 | # | 2525 | # |
| 2526 | $code.=<<___; | 2526 | $code.=<<___; |
| @@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key: | |||
| 2602 | 2602 | ||
| 2603 | .align 16 | 2603 | .align 16 |
| 2604 | .L14rounds: | 2604 | .L14rounds: |
| 2605 | movups 16($inp),%xmm2 # remaning half of *userKey | 2605 | movups 16($inp),%xmm2 # remaining half of *userKey |
| 2606 | mov \$13,$bits # 14 rounds for 256 | 2606 | mov \$13,$bits # 14 rounds for 256 |
| 2607 | lea 16(%rax),%rax | 2607 | lea 16(%rax),%rax |
| 2608 | $movkey %xmm0,($key) # round 0 | 2608 | $movkey %xmm0,($key) # round 0 |
| @@ -2862,7 +2862,7 @@ xts_se_handler: | |||
| 2862 | mov 56($disp),%r11 # disp->HandlerData | 2862 | mov 56($disp),%r11 # disp->HandlerData |
| 2863 | 2863 | ||
| 2864 | mov 0(%r11),%r10d # HandlerData[0] | 2864 | mov 0(%r11),%r10d # HandlerData[0] |
| 2865 | lea (%rsi,%r10),%r10 # prologue lable | 2865 | lea (%rsi,%r10),%r10 # prologue label |
| 2866 | cmp %r10,%rbx # context->Rip<prologue label | 2866 | cmp %r10,%rbx # context->Rip<prologue label |
| 2867 | jb .Lcommon_seh_tail | 2867 | jb .Lcommon_seh_tail |
| 2868 | 2868 | ||
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl index 41b90f0844..14dc2c02e7 100644 --- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | # - code was made position-independent; | 20 | # - code was made position-independent; |
| 21 | # - rounds were folded into a loop resulting in >5x size reduction | 21 | # - rounds were folded into a loop resulting in >5x size reduction |
| 22 | # from 12.5KB to 2.2KB; | 22 | # from 12.5KB to 2.2KB; |
| 23 | # - above was possibile thanks to mixcolumns() modification that | 23 | # - above was possible thanks to mixcolumns() modification that |
| 24 | # allowed to feed its output back to aesenc[last], this was | 24 | # allowed to feed its output back to aesenc[last], this was |
| 25 | # achieved at cost of two additional inter-registers moves; | 25 | # achieved at cost of two additional inter-registers moves; |
| 26 | # - some instruction reordering and interleaving; | 26 | # - some instruction reordering and interleaving; |
