summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes
diff options
context:
space:
mode:
authorjmc <>2022-12-26 07:18:53 +0000
committerjmc <>2022-12-26 07:18:53 +0000
commit2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch)
tree26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/aes
parentdf59a12113ba6ec4c6faecd033d46176453f697e (diff)
downloadopenbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip
spelling fixes; from paul tagliamonte
i removed the arithmetics -> arithmetic changes, as i felt they were not clearly correct ok tb
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl6
-rw-r--r--src/lib/libcrypto/aes/asm/aes-mips.pl2
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ppc.pl2
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl4
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-x86_64.pl30
-rw-r--r--src/lib/libcrypto/aes/asm/bsaes-x86_64.pl2
6 files changed, 23 insertions, 23 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
index 3ba8a26eaa..c5ae3f6903 100644
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -48,8 +48,8 @@
48# better performance on most recent µ-archs... 48# better performance on most recent µ-archs...
49# 49#
50# Third version adds AES_cbc_encrypt implementation, which resulted in 50# Third version adds AES_cbc_encrypt implementation, which resulted in
51# up to 40% performance imrovement of CBC benchmark results. 40% was 51# up to 40% performance improvement of CBC benchmark results. 40% was
52# observed on P4 core, where "overall" imrovement coefficient, i.e. if 52# observed on P4 core, where "overall" improvement coefficient, i.e. if
53# compared to PIC generated by GCC and in CBC mode, was observed to be 53# compared to PIC generated by GCC and in CBC mode, was observed to be
54# as large as 4x:-) CBC performance is virtually identical to ECB now 54# as large as 4x:-) CBC performance is virtually identical to ECB now
55# and on some platforms even better, e.g. 17.6 "small" cycles/byte on 55# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
@@ -228,7 +228,7 @@ $small_footprint=1; # $small_footprint=1 code is ~5% slower [on
228 # contention and in hope to "collect" 5% back 228 # contention and in hope to "collect" 5% back
229 # in real-life applications... 229 # in real-life applications...
230 230
231$vertical_spin=0; # shift "verticaly" defaults to 0, because of 231$vertical_spin=0; # shift "vertically" defaults to 0, because of
232 # its proof-of-concept status... 232 # its proof-of-concept status...
233# Note that there is no decvert(), as well as last encryption round is 233# Note that there is no decvert(), as well as last encryption round is
234# performed with "horizontal" shifts. This is because this "vertical" 234# performed with "horizontal" shifts. This is because this "vertical"
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl
index 2f6ff74ffe..b95d1afd5a 100644
--- a/src/lib/libcrypto/aes/asm/aes-mips.pl
+++ b/src/lib/libcrypto/aes/asm/aes-mips.pl
@@ -106,7 +106,7 @@ my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
106my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); 106my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
107my ($key0,$cnt)=($gp,$fp); 107my ($key0,$cnt)=($gp,$fp);
108 108
109# instuction ordering is "stolen" from output from MIPSpro assembler 109# instruction ordering is "stolen" from output from MIPSpro assembler
110# invoked with -mips3 -O3 arguments... 110# invoked with -mips3 -O3 arguments...
111$code.=<<___; 111$code.=<<___;
112.align 5 112.align 5
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
index 7c52cbe5f9..91a46f60ed 100644
--- a/src/lib/libcrypto/aes/asm/aes-ppc.pl
+++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl
@@ -19,7 +19,7 @@
19# February 2010 19# February 2010
20# 20#
21# Rescheduling instructions to favour Power6 pipeline gave 10% 21# Rescheduling instructions to favour Power6 pipeline gave 10%
22# performance improvement on the platfrom in question (and marginal 22# performance improvement on the platform in question (and marginal
23# improvement even on others). It should be noted that Power6 fails 23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue 24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact 25# 4 load instructions in two cycles, only in 3. As result non-compact
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index bc6c8f3fc0..880bcc2d58 100644
--- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -250,7 +250,7 @@ ___
250 $r++; unshift(@rndkey,pop(@rndkey)); 250 $r++; unshift(@rndkey,pop(@rndkey));
251}; 251};
252 252
253sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 253sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
254{ use integer; 254{ use integer;
255 my $body = shift; 255 my $body = shift;
256 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions 256 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -767,7 +767,7 @@ ___
767 $r++; unshift(@rndkey,pop(@rndkey)); 767 $r++; unshift(@rndkey,pop(@rndkey));
768}; 768};
769 769
770sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 770sub Xupdate_avx_16_31() # recall that $Xi starts with 4
771{ use integer; 771{ use integer;
772 my $body = shift; 772 my $body = shift;
773 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions 773 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
index f0b30109ae..a849073728 100644
--- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -52,7 +52,7 @@
52# nothing one can do and the result appears optimal. CCM result is 52# nothing one can do and the result appears optimal. CCM result is
53# identical to CBC, because CBC-MAC is essentially CBC encrypt without 53# identical to CBC, because CBC-MAC is essentially CBC encrypt without
54# saving output. CCM CTR "stays invisible," because it's neatly 54# saving output. CCM CTR "stays invisible," because it's neatly
55# interleaved wih CBC-MAC. This provides ~30% improvement over 55# interleaved with CBC-MAC. This provides ~30% improvement over
56# "straghtforward" CCM implementation with CTR and CBC-MAC performed 56# "straghtforward" CCM implementation with CTR and CBC-MAC performed
57# disjointly. Parallelizable modes practically achieve the theoretical 57# disjointly. Parallelizable modes practically achieve the theoretical
58# limit. 58# limit.
@@ -136,7 +136,7 @@
136# asymptotic, if it can be surpassed, isn't it? What happens there? 136# asymptotic, if it can be surpassed, isn't it? What happens there?
137# Rewind to CBC paragraph for the answer. Yes, out-of-order execution 137# Rewind to CBC paragraph for the answer. Yes, out-of-order execution
138# magic is responsible for this. Processor overlaps not only the 138# magic is responsible for this. Processor overlaps not only the
139# additional instructions with AES ones, but even AES instuctions 139# additional instructions with AES ones, but even AES instructions
140# processing adjacent triplets of independent blocks. In the 6x case 140# processing adjacent triplets of independent blocks. In the 6x case
141# additional instructions still claim disproportionally small amount 141# additional instructions still claim disproportionally small amount
142# of additional cycles, but in 8x case number of instructions must be 142# of additional cycles, but in 8x case number of instructions must be
@@ -1350,7 +1350,7 @@ ___
1350 movdqa @tweak[5],@tweak[$i] 1350 movdqa @tweak[5],@tweak[$i]
1351 paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1351 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1352 pand $twmask,$twres # isolate carry and residue 1352 pand $twmask,$twres # isolate carry and residue
1353 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1353 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1354 pxor $twres,@tweak[5] 1354 pxor $twres,@tweak[5]
1355___ 1355___
1356 } 1356 }
@@ -1456,7 +1456,7 @@ $code.=<<___;
1456 aesenc $rndkey0,$inout0 1456 aesenc $rndkey0,$inout0
1457 pand $twmask,$twres # isolate carry and residue 1457 pand $twmask,$twres # isolate carry and residue
1458 aesenc $rndkey0,$inout1 1458 aesenc $rndkey0,$inout1
1459 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1459 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1460 aesenc $rndkey0,$inout2 1460 aesenc $rndkey0,$inout2
1461 pxor $twres,@tweak[5] 1461 pxor $twres,@tweak[5]
1462 aesenc $rndkey0,$inout3 1462 aesenc $rndkey0,$inout3
@@ -1471,7 +1471,7 @@ $code.=<<___;
1471 aesenc $rndkey1,$inout0 1471 aesenc $rndkey1,$inout0
1472 pand $twmask,$twres # isolate carry and residue 1472 pand $twmask,$twres # isolate carry and residue
1473 aesenc $rndkey1,$inout1 1473 aesenc $rndkey1,$inout1
1474 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1474 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1475 aesenc $rndkey1,$inout2 1475 aesenc $rndkey1,$inout2
1476 pxor $twres,@tweak[5] 1476 pxor $twres,@tweak[5]
1477 aesenc $rndkey1,$inout3 1477 aesenc $rndkey1,$inout3
@@ -1485,7 +1485,7 @@ $code.=<<___;
1485 aesenclast $rndkey0,$inout0 1485 aesenclast $rndkey0,$inout0
1486 pand $twmask,$twres # isolate carry and residue 1486 pand $twmask,$twres # isolate carry and residue
1487 aesenclast $rndkey0,$inout1 1487 aesenclast $rndkey0,$inout1
1488 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1488 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1489 aesenclast $rndkey0,$inout2 1489 aesenclast $rndkey0,$inout2
1490 pxor $twres,@tweak[5] 1490 pxor $twres,@tweak[5]
1491 aesenclast $rndkey0,$inout3 1491 aesenclast $rndkey0,$inout3
@@ -1499,7 +1499,7 @@ $code.=<<___;
1499 xorps `16*0`(%rsp),$inout0 # output^=tweak 1499 xorps `16*0`(%rsp),$inout0 # output^=tweak
1500 pand $twmask,$twres # isolate carry and residue 1500 pand $twmask,$twres # isolate carry and residue
1501 xorps `16*1`(%rsp),$inout1 1501 xorps `16*1`(%rsp),$inout1
1502 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1502 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1503 pxor $twres,@tweak[5] 1503 pxor $twres,@tweak[5]
1504 1504
1505 xorps `16*2`(%rsp),$inout2 1505 xorps `16*2`(%rsp),$inout2
@@ -1750,7 +1750,7 @@ ___
1750 movdqa @tweak[5],@tweak[$i] 1750 movdqa @tweak[5],@tweak[$i]
1751 paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1751 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1752 pand $twmask,$twres # isolate carry and residue 1752 pand $twmask,$twres # isolate carry and residue
1753 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1753 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1754 pxor $twres,@tweak[5] 1754 pxor $twres,@tweak[5]
1755___ 1755___
1756 } 1756 }
@@ -1856,7 +1856,7 @@ $code.=<<___;
1856 aesdec $rndkey0,$inout0 1856 aesdec $rndkey0,$inout0
1857 pand $twmask,$twres # isolate carry and residue 1857 pand $twmask,$twres # isolate carry and residue
1858 aesdec $rndkey0,$inout1 1858 aesdec $rndkey0,$inout1
1859 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1859 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1860 aesdec $rndkey0,$inout2 1860 aesdec $rndkey0,$inout2
1861 pxor $twres,@tweak[5] 1861 pxor $twres,@tweak[5]
1862 aesdec $rndkey0,$inout3 1862 aesdec $rndkey0,$inout3
@@ -1871,7 +1871,7 @@ $code.=<<___;
1871 aesdec $rndkey1,$inout0 1871 aesdec $rndkey1,$inout0
1872 pand $twmask,$twres # isolate carry and residue 1872 pand $twmask,$twres # isolate carry and residue
1873 aesdec $rndkey1,$inout1 1873 aesdec $rndkey1,$inout1
1874 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1874 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1875 aesdec $rndkey1,$inout2 1875 aesdec $rndkey1,$inout2
1876 pxor $twres,@tweak[5] 1876 pxor $twres,@tweak[5]
1877 aesdec $rndkey1,$inout3 1877 aesdec $rndkey1,$inout3
@@ -1885,7 +1885,7 @@ $code.=<<___;
1885 aesdeclast $rndkey0,$inout0 1885 aesdeclast $rndkey0,$inout0
1886 pand $twmask,$twres # isolate carry and residue 1886 pand $twmask,$twres # isolate carry and residue
1887 aesdeclast $rndkey0,$inout1 1887 aesdeclast $rndkey0,$inout1
1888 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1888 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1889 aesdeclast $rndkey0,$inout2 1889 aesdeclast $rndkey0,$inout2
1890 pxor $twres,@tweak[5] 1890 pxor $twres,@tweak[5]
1891 aesdeclast $rndkey0,$inout3 1891 aesdeclast $rndkey0,$inout3
@@ -1899,7 +1899,7 @@ $code.=<<___;
1899 xorps `16*0`(%rsp),$inout0 # output^=tweak 1899 xorps `16*0`(%rsp),$inout0 # output^=tweak
1900 pand $twmask,$twres # isolate carry and residue 1900 pand $twmask,$twres # isolate carry and residue
1901 xorps `16*1`(%rsp),$inout1 1901 xorps `16*1`(%rsp),$inout1
1902 pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1902 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1903 pxor $twres,@tweak[5] 1903 pxor $twres,@tweak[5]
1904 1904
1905 xorps `16*2`(%rsp),$inout2 1905 xorps `16*2`(%rsp),$inout2
@@ -2520,7 +2520,7 @@ ___
2520# Vinodh Gopal <vinodh.gopal@intel.com> 2520# Vinodh Gopal <vinodh.gopal@intel.com>
2521# Kahraman Akdemir 2521# Kahraman Akdemir
2522# 2522#
2523# Agressively optimized in respect to aeskeygenassist's critical path 2523# Aggressively optimized in respect to aeskeygenassist's critical path
2524# and is contained in %xmm0-5 to meet Win64 ABI requirement. 2524# and is contained in %xmm0-5 to meet Win64 ABI requirement.
2525# 2525#
2526$code.=<<___; 2526$code.=<<___;
@@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key:
2602 2602
2603.align 16 2603.align 16
2604.L14rounds: 2604.L14rounds:
2605 movups 16($inp),%xmm2 # remaning half of *userKey 2605 movups 16($inp),%xmm2 # remaining half of *userKey
2606 mov \$13,$bits # 14 rounds for 256 2606 mov \$13,$bits # 14 rounds for 256
2607 lea 16(%rax),%rax 2607 lea 16(%rax),%rax
2608 $movkey %xmm0,($key) # round 0 2608 $movkey %xmm0,($key) # round 0
@@ -2862,7 +2862,7 @@ xts_se_handler:
2862 mov 56($disp),%r11 # disp->HandlerData 2862 mov 56($disp),%r11 # disp->HandlerData
2863 2863
2864 mov 0(%r11),%r10d # HandlerData[0] 2864 mov 0(%r11),%r10d # HandlerData[0]
2865 lea (%rsi,%r10),%r10 # prologue lable 2865 lea (%rsi,%r10),%r10 # prologue label
2866 cmp %r10,%rbx # context->Rip<prologue label 2866 cmp %r10,%rbx # context->Rip<prologue label
2867 jb .Lcommon_seh_tail 2867 jb .Lcommon_seh_tail
2868 2868
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
index 41b90f0844..14dc2c02e7 100644
--- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
@@ -20,7 +20,7 @@
20# - code was made position-independent; 20# - code was made position-independent;
21# - rounds were folded into a loop resulting in >5x size reduction 21# - rounds were folded into a loop resulting in >5x size reduction
22# from 12.5KB to 2.2KB; 22# from 12.5KB to 2.2KB;
23# - above was possibile thanks to mixcolumns() modification that 23# - above was possible thanks to mixcolumns() modification that
24# allowed to feed its output back to aesenc[last], this was 24# allowed to feed its output back to aesenc[last], this was
25# achieved at cost of two additional inter-registers moves; 25# achieved at cost of two additional inter-registers moves;
26# - some instruction reordering and interleaving; 26# - some instruction reordering and interleaving;