spelling fixes; from paul tagliamonte

i removed the arithmetics -> arithmetic changes, as i felt they were not clearly correct ok tb
author: jmc <> 2022-12-26 07:18:53 +0000
committer: jmc <> 2022-12-26 07:18:53 +0000
commit: 2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch)
tree: 26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/aes
parent: df59a12113ba6ec4c6faecd033d46176453f697e (diff)
download: openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip
6 files changed, 23 insertions, 23 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
index 3ba8a26eaa..c5ae3f6903 100644
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -48,8 +48,8 @@
 # better performance on most recent ľ-archs...
 #
 # Third version adds AES_cbc_encrypt implementation, which resulted in
-# up to 40% performance imrovement of CBC benchmark results. 40% was
+# up to 40% performance improvement of CBC benchmark results. 40% was
-# observed on P4 core, where "overall" imrovement coefficient, i.e. if
+# observed on P4 core, where "overall" improvement coefficient, i.e. if
 # compared to PIC generated by GCC and in CBC mode, was observed to be
 # as large as 4x:-) CBC performance is virtually identical to ECB now
 # and on some platforms even better, e.g. 17.6 "small" cycles/byte on
@@ -228,7 +228,7 @@ $small_footprint=1;	# $small_footprint=1 code is ~5% slower [on
                        # contention and in hope to "collect" 5% back
                        # in real-life applications...
-$vertical_spin=0;       # shift "verticaly" defaults to 0, because of
+$vertical_spin=0;       # shift "vertically" defaults to 0, because of
                        # its proof-of-concept status...
 # Note that there is no decvert(), as well as last encryption round is
 # performed with "horizontal" shifts. This is because this "vertical"
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl
index 2f6ff74ffe..b95d1afd5a 100644
--- a/src/lib/libcrypto/aes/asm/aes-mips.pl
+++ b/src/lib/libcrypto/aes/asm/aes-mips.pl
@@ -106,7 +106,7 @@ my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
 my ($key0,$cnt)=($gp,$fp);
-# instuction ordering is "stolen" from output from MIPSpro assembler
+# instruction ordering is "stolen" from output from MIPSpro assembler
 # invoked with -mips3 -O3 arguments...
 $code.=<<___;
 .align  5
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
index 7c52cbe5f9..91a46f60ed 100644
--- a/src/lib/libcrypto/aes/asm/aes-ppc.pl
+++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl
@@ -19,7 +19,7 @@
 # February 2010
 #
 # Rescheduling instructions to favour Power6 pipeline gave 10%
-# performance improvement on the platfrom in question (and marginal
+# performance improvement on the platform in question (and marginal
 # improvement even on others). It should be noted that Power6 fails
 # to process byte in 18 cycles, only in 23, because it fails to issue
 # 4 load instructions in two cycles, only in 3. As result non-compact
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index bc6c8f3fc0..880bcc2d58 100644
--- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -250,7 +250,7 @@ ___
    $r++;       unshift(@rndkey,pop(@rndkey));
 };
-sub Xupdate_ssse3_16_31()               # recall that $Xi starts wtih 4
+sub Xupdate_ssse3_16_31()               # recall that $Xi starts with 4
 { use integer;
  my $body = shift;
  my @insns = (&$body,&$body,&$body,&$body);    # 40 instructions
@@ -767,7 +767,7 @@ ___
    $r++;       unshift(@rndkey,pop(@rndkey));
 };
-sub Xupdate_avx_16_31()         # recall that $Xi starts wtih 4
+sub Xupdate_avx_16_31()         # recall that $Xi starts with 4
 { use integer;
  my $body = shift;
  my @insns = (&$body,&$body,&$body,&$body);    # 40 instructions
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
index f0b30109ae..a849073728 100644
--- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -52,7 +52,7 @@
 # nothing one can do and the result appears optimal. CCM result is
 # identical to CBC, because CBC-MAC is essentially CBC encrypt without
 # saving output. CCM CTR "stays invisible," because it's neatly
-# interleaved wih CBC-MAC. This provides ~30% improvement over
+# interleaved with CBC-MAC. This provides ~30% improvement over
 # "straghtforward" CCM implementation with CTR and CBC-MAC performed
 # disjointly. Parallelizable modes practically achieve the theoretical
 # limit.
@@ -136,7 +136,7 @@
 # asymptotic, if it can be surpassed, isn't it? What happens there?
 # Rewind to CBC paragraph for the answer. Yes, out-of-order execution
 # magic is responsible for this. Processor overlaps not only the
-# additional instructions with AES ones, but even AES instuctions
+# additional instructions with AES ones, but even AES instructions
 # processing adjacent triplets of independent blocks. In the 6x case
 # additional instructions  still claim disproportionally small amount
 # of additional cycles, but in 8x case number of instructions must be
@@ -1350,7 +1350,7 @@ ___
        movdqa  @tweak[5],@tweak[$i]
        paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
        pand    $twmask,$twres                  # isolate carry and residue
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
        pxor    $twres,@tweak[5]
 ___
    }
@@ -1456,7 +1456,7 @@ $code.=<<___;
         aesenc         $rndkey0,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesenc         $rndkey0,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesenc         $rndkey0,$inout2
        pxor    $twres,@tweak[5]
         aesenc         $rndkey0,$inout3
@@ -1471,7 +1471,7 @@ $code.=<<___;
         aesenc         $rndkey1,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesenc         $rndkey1,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesenc         $rndkey1,$inout2
        pxor    $twres,@tweak[5]
         aesenc         $rndkey1,$inout3
@@ -1485,7 +1485,7 @@ $code.=<<___;
         aesenclast     $rndkey0,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesenclast     $rndkey0,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesenclast     $rndkey0,$inout2
        pxor    $twres,@tweak[5]
         aesenclast     $rndkey0,$inout3
@@ -1499,7 +1499,7 @@ $code.=<<___;
         xorps  `16*0`(%rsp),$inout0            # output^=tweak
        pand    $twmask,$twres                  # isolate carry and residue
         xorps  `16*1`(%rsp),$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
        pxor    $twres,@tweak[5]
        xorps   `16*2`(%rsp),$inout2
@@ -1750,7 +1750,7 @@ ___
        movdqa  @tweak[5],@tweak[$i]
        paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
        pand    $twmask,$twres                  # isolate carry and residue
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
        pxor    $twres,@tweak[5]
 ___
    }
@@ -1856,7 +1856,7 @@ $code.=<<___;
         aesdec         $rndkey0,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesdec         $rndkey0,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesdec         $rndkey0,$inout2
        pxor    $twres,@tweak[5]
         aesdec         $rndkey0,$inout3
@@ -1871,7 +1871,7 @@ $code.=<<___;
         aesdec         $rndkey1,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesdec         $rndkey1,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesdec         $rndkey1,$inout2
        pxor    $twres,@tweak[5]
         aesdec         $rndkey1,$inout3
@@ -1885,7 +1885,7 @@ $code.=<<___;
         aesdeclast     $rndkey0,$inout0
        pand    $twmask,$twres                  # isolate carry and residue
         aesdeclast     $rndkey0,$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
         aesdeclast     $rndkey0,$inout2
        pxor    $twres,@tweak[5]
         aesdeclast     $rndkey0,$inout3
@@ -1899,7 +1899,7 @@ $code.=<<___;
         xorps  `16*0`(%rsp),$inout0            # output^=tweak
        pand    $twmask,$twres                  # isolate carry and residue
         xorps  `16*1`(%rsp),$inout1
-        pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+        pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
        pxor    $twres,@tweak[5]
        xorps   `16*2`(%rsp),$inout2
@@ -2520,7 +2520,7 @@ ___
 #       Vinodh Gopal <vinodh.gopal@intel.com>
 #       Kahraman Akdemir
 #
-# Agressively optimized in respect to aeskeygenassist's critical path
+# Aggressively optimized in respect to aeskeygenassist's critical path
 # and is contained in %xmm0-5 to meet Win64 ABI requirement.
 #
 $code.=<<___;
@@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key:
 .align  16
 .L14rounds:
-        movups  16($inp),%xmm2                  # remaning half of *userKey
+        movups  16($inp),%xmm2                  # remaining half of *userKey
        mov     \$13,$bits                      # 14 rounds for 256
        lea     16(%rax),%rax
        $movkey %xmm0,($key)                    # round 0
@@ -2862,7 +2862,7 @@ xts_se_handler:
        mov     56($disp),%r11          # disp->HandlerData
        mov     0(%r11),%r10d           # HandlerData[0]
-        lea     (%rsi,%r10),%r10        # prologue lable
+        lea     (%rsi,%r10),%r10        # prologue label
        cmp     %r10,%rbx               # context->Rip<prologue label
        jb      .Lcommon_seh_tail
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
index 41b90f0844..14dc2c02e7 100644
--- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
@@ -20,7 +20,7 @@
 # - code was made position-independent;
 # - rounds were folded into a loop resulting in >5x size reduction
 #   from 12.5KB to 2.2KB;
-# - above was possibile thanks to mixcolumns() modification that
+# - above was possible thanks to mixcolumns() modification that
 #   allowed to feed its output back to aesenc[last], this was
 #   achieved at cost of two additional inter-registers moves;
 # - some instruction reordering and interleaving;
author	jmc <>	2022-12-26 07:18:53 +0000
committer	jmc <>	2022-12-26 07:18:53 +0000
commit	2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch)
tree	26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/aes
parent	df59a12113ba6ec4c6faecd033d46176453f697e (diff)
download	openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2 openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip

diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index 3ba8a26eaa..c5ae3f6903 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl
@@ -48,8 +48,8 @@
48	# better performance on most recent ľ-archs...	48	# better performance on most recent ľ-archs...
49	#	49	#
50	# Third version adds AES_cbc_encrypt implementation, which resulted in	50	# Third version adds AES_cbc_encrypt implementation, which resulted in
51	# up to 40% performance imrovement of CBC benchmark results. 40% was	51	# up to 40% performance improvement of CBC benchmark results. 40% was
52	# observed on P4 core, where "overall" imrovement coefficient, i.e. if	52	# observed on P4 core, where "overall" improvement coefficient, i.e. if
53	# compared to PIC generated by GCC and in CBC mode, was observed to be	53	# compared to PIC generated by GCC and in CBC mode, was observed to be
54	# as large as 4x:-) CBC performance is virtually identical to ECB now	54	# as large as 4x:-) CBC performance is virtually identical to ECB now
55	# and on some platforms even better, e.g. 17.6 "small" cycles/byte on	55	# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
@@ -228,7 +228,7 @@ $small_footprint=1; # $small_footprint=1 code is ~5% slower [on
228	# contention and in hope to "collect" 5% back	228	# contention and in hope to "collect" 5% back
229	# in real-life applications...	229	# in real-life applications...
230		230
231	$vertical_spin=0; # shift "verticaly" defaults to 0, because of	231	$vertical_spin=0; # shift "vertically" defaults to 0, because of
232	# its proof-of-concept status...	232	# its proof-of-concept status...
233	# Note that there is no decvert(), as well as last encryption round is	233	# Note that there is no decvert(), as well as last encryption round is
234	# performed with "horizontal" shifts. This is because this "vertical"	234	# performed with "horizontal" shifts. This is because this "vertical"


diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl index 2f6ff74ffe..b95d1afd5a 100644 --- a/src/lib/libcrypto/aes/asm/aes-mips.pl +++ b/src/lib/libcrypto/aes/asm/aes-mips.pl
@@ -106,7 +106,7 @@ my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
106	my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));	106	my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
107	my ($key0,$cnt)=($gp,$fp);	107	my ($key0,$cnt)=($gp,$fp);
108		108
109	# instuction ordering is "stolen" from output from MIPSpro assembler	109	# instruction ordering is "stolen" from output from MIPSpro assembler
110	# invoked with -mips3 -O3 arguments...	110	# invoked with -mips3 -O3 arguments...
111	$code.=<<___;	111	$code.=<<___;
112	.align 5	112	.align 5


diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl index 7c52cbe5f9..91a46f60ed 100644 --- a/src/lib/libcrypto/aes/asm/aes-ppc.pl +++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl
@@ -19,7 +19,7 @@
19	# February 2010	19	# February 2010
20	#	20	#
21	# Rescheduling instructions to favour Power6 pipeline gave 10%	21	# Rescheduling instructions to favour Power6 pipeline gave 10%
22	# performance improvement on the platfrom in question (and marginal	22	# performance improvement on the platform in question (and marginal
23	# improvement even on others). It should be noted that Power6 fails	23	# improvement even on others). It should be noted that Power6 fails
24	# to process byte in 18 cycles, only in 23, because it fails to issue	24	# to process byte in 18 cycles, only in 23, because it fails to issue
25	# 4 load instructions in two cycles, only in 3. As result non-compact	25	# 4 load instructions in two cycles, only in 3. As result non-compact


diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index bc6c8f3fc0..880bcc2d58 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -250,7 +250,7 @@ ___
250	$r++; unshift(@rndkey,pop(@rndkey));	250	$r++; unshift(@rndkey,pop(@rndkey));
251	};	251	};
252		252
253	sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4	253	sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
254	{ use integer;	254	{ use integer;
255	my $body = shift;	255	my $body = shift;
256	my @insns = (&$body,&$body,&$body,&$body); # 40 instructions	256	my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -767,7 +767,7 @@ ___
767	$r++; unshift(@rndkey,pop(@rndkey));	767	$r++; unshift(@rndkey,pop(@rndkey));
768	};	768	};
769		769
770	sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4	770	sub Xupdate_avx_16_31() # recall that $Xi starts with 4
771	{ use integer;	771	{ use integer;
772	my $body = shift;	772	my $body = shift;
773	my @insns = (&$body,&$body,&$body,&$body); # 40 instructions	773	my @insns = (&$body,&$body,&$body,&$body); # 40 instructions


diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl index f0b30109ae..a849073728 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl
@@ -52,7 +52,7 @@
52	# nothing one can do and the result appears optimal. CCM result is	52	# nothing one can do and the result appears optimal. CCM result is
53	# identical to CBC, because CBC-MAC is essentially CBC encrypt without	53	# identical to CBC, because CBC-MAC is essentially CBC encrypt without
54	# saving output. CCM CTR "stays invisible," because it's neatly	54	# saving output. CCM CTR "stays invisible," because it's neatly
55	# interleaved wih CBC-MAC. This provides ~30% improvement over	55	# interleaved with CBC-MAC. This provides ~30% improvement over
56	# "straghtforward" CCM implementation with CTR and CBC-MAC performed	56	# "straghtforward" CCM implementation with CTR and CBC-MAC performed
57	# disjointly. Parallelizable modes practically achieve the theoretical	57	# disjointly. Parallelizable modes practically achieve the theoretical
58	# limit.	58	# limit.
@@ -136,7 +136,7 @@
136	# asymptotic, if it can be surpassed, isn't it? What happens there?	136	# asymptotic, if it can be surpassed, isn't it? What happens there?
137	# Rewind to CBC paragraph for the answer. Yes, out-of-order execution	137	# Rewind to CBC paragraph for the answer. Yes, out-of-order execution
138	# magic is responsible for this. Processor overlaps not only the	138	# magic is responsible for this. Processor overlaps not only the
139	# additional instructions with AES ones, but even AES instuctions	139	# additional instructions with AES ones, but even AES instructions
140	# processing adjacent triplets of independent blocks. In the 6x case	140	# processing adjacent triplets of independent blocks. In the 6x case
141	# additional instructions still claim disproportionally small amount	141	# additional instructions still claim disproportionally small amount
142	# of additional cycles, but in 8x case number of instructions must be	142	# of additional cycles, but in 8x case number of instructions must be
@@ -1350,7 +1350,7 @@ ___
1350	movdqa @tweak[5],@tweak[$i]	1350	movdqa @tweak[5],@tweak[$i]
1351	paddq @tweak[5],@tweak[5] # psllq 1,$tweak	1351	paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1352	pand $twmask,$twres # isolate carry and residue	1352	pand $twmask,$twres # isolate carry and residue
1353	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1353	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1354	pxor $twres,@tweak[5]	1354	pxor $twres,@tweak[5]
1355	___	1355	___
1356	}	1356	}
@@ -1456,7 +1456,7 @@ $code.=<<___;
1456	aesenc $rndkey0,$inout0	1456	aesenc $rndkey0,$inout0
1457	pand $twmask,$twres # isolate carry and residue	1457	pand $twmask,$twres # isolate carry and residue
1458	aesenc $rndkey0,$inout1	1458	aesenc $rndkey0,$inout1
1459	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1459	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1460	aesenc $rndkey0,$inout2	1460	aesenc $rndkey0,$inout2
1461	pxor $twres,@tweak[5]	1461	pxor $twres,@tweak[5]
1462	aesenc $rndkey0,$inout3	1462	aesenc $rndkey0,$inout3
@@ -1471,7 +1471,7 @@ $code.=<<___;
1471	aesenc $rndkey1,$inout0	1471	aesenc $rndkey1,$inout0
1472	pand $twmask,$twres # isolate carry and residue	1472	pand $twmask,$twres # isolate carry and residue
1473	aesenc $rndkey1,$inout1	1473	aesenc $rndkey1,$inout1
1474	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1474	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1475	aesenc $rndkey1,$inout2	1475	aesenc $rndkey1,$inout2
1476	pxor $twres,@tweak[5]	1476	pxor $twres,@tweak[5]
1477	aesenc $rndkey1,$inout3	1477	aesenc $rndkey1,$inout3
@@ -1485,7 +1485,7 @@ $code.=<<___;
1485	aesenclast $rndkey0,$inout0	1485	aesenclast $rndkey0,$inout0
1486	pand $twmask,$twres # isolate carry and residue	1486	pand $twmask,$twres # isolate carry and residue
1487	aesenclast $rndkey0,$inout1	1487	aesenclast $rndkey0,$inout1
1488	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1488	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1489	aesenclast $rndkey0,$inout2	1489	aesenclast $rndkey0,$inout2
1490	pxor $twres,@tweak[5]	1490	pxor $twres,@tweak[5]
1491	aesenclast $rndkey0,$inout3	1491	aesenclast $rndkey0,$inout3
@@ -1499,7 +1499,7 @@ $code.=<<___;
1499	xorps `16*0`(%rsp),$inout0 # output^=tweak	1499	xorps `16*0`(%rsp),$inout0 # output^=tweak
1500	pand $twmask,$twres # isolate carry and residue	1500	pand $twmask,$twres # isolate carry and residue
1501	xorps `16*1`(%rsp),$inout1	1501	xorps `16*1`(%rsp),$inout1
1502	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1502	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1503	pxor $twres,@tweak[5]	1503	pxor $twres,@tweak[5]
1504		1504
1505	xorps `16*2`(%rsp),$inout2	1505	xorps `16*2`(%rsp),$inout2
@@ -1750,7 +1750,7 @@ ___
1750	movdqa @tweak[5],@tweak[$i]	1750	movdqa @tweak[5],@tweak[$i]
1751	paddq @tweak[5],@tweak[5] # psllq 1,$tweak	1751	paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1752	pand $twmask,$twres # isolate carry and residue	1752	pand $twmask,$twres # isolate carry and residue
1753	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1753	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1754	pxor $twres,@tweak[5]	1754	pxor $twres,@tweak[5]
1755	___	1755	___
1756	}	1756	}
@@ -1856,7 +1856,7 @@ $code.=<<___;
1856	aesdec $rndkey0,$inout0	1856	aesdec $rndkey0,$inout0
1857	pand $twmask,$twres # isolate carry and residue	1857	pand $twmask,$twres # isolate carry and residue
1858	aesdec $rndkey0,$inout1	1858	aesdec $rndkey0,$inout1
1859	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1859	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1860	aesdec $rndkey0,$inout2	1860	aesdec $rndkey0,$inout2
1861	pxor $twres,@tweak[5]	1861	pxor $twres,@tweak[5]
1862	aesdec $rndkey0,$inout3	1862	aesdec $rndkey0,$inout3
@@ -1871,7 +1871,7 @@ $code.=<<___;
1871	aesdec $rndkey1,$inout0	1871	aesdec $rndkey1,$inout0
1872	pand $twmask,$twres # isolate carry and residue	1872	pand $twmask,$twres # isolate carry and residue
1873	aesdec $rndkey1,$inout1	1873	aesdec $rndkey1,$inout1
1874	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1874	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1875	aesdec $rndkey1,$inout2	1875	aesdec $rndkey1,$inout2
1876	pxor $twres,@tweak[5]	1876	pxor $twres,@tweak[5]
1877	aesdec $rndkey1,$inout3	1877	aesdec $rndkey1,$inout3
@@ -1885,7 +1885,7 @@ $code.=<<___;
1885	aesdeclast $rndkey0,$inout0	1885	aesdeclast $rndkey0,$inout0
1886	pand $twmask,$twres # isolate carry and residue	1886	pand $twmask,$twres # isolate carry and residue
1887	aesdeclast $rndkey0,$inout1	1887	aesdeclast $rndkey0,$inout1
1888	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1888	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1889	aesdeclast $rndkey0,$inout2	1889	aesdeclast $rndkey0,$inout2
1890	pxor $twres,@tweak[5]	1890	pxor $twres,@tweak[5]
1891	aesdeclast $rndkey0,$inout3	1891	aesdeclast $rndkey0,$inout3
@@ -1899,7 +1899,7 @@ $code.=<<___;
1899	xorps `16*0`(%rsp),$inout0 # output^=tweak	1899	xorps `16*0`(%rsp),$inout0 # output^=tweak
1900	pand $twmask,$twres # isolate carry and residue	1900	pand $twmask,$twres # isolate carry and residue
1901	xorps `16*1`(%rsp),$inout1	1901	xorps `16*1`(%rsp),$inout1
1902	pcmpgtd @tweak[5],$twtmp # broadcat upper bits	1902	pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1903	pxor $twres,@tweak[5]	1903	pxor $twres,@tweak[5]
1904		1904
1905	xorps `16*2`(%rsp),$inout2	1905	xorps `16*2`(%rsp),$inout2
@@ -2520,7 +2520,7 @@ ___
2520	# Vinodh Gopal <vinodh.gopal@intel.com>	2520	# Vinodh Gopal <vinodh.gopal@intel.com>
2521	# Kahraman Akdemir	2521	# Kahraman Akdemir
2522	#	2522	#
2523	# Agressively optimized in respect to aeskeygenassist's critical path	2523	# Aggressively optimized in respect to aeskeygenassist's critical path
2524	# and is contained in %xmm0-5 to meet Win64 ABI requirement.	2524	# and is contained in %xmm0-5 to meet Win64 ABI requirement.
2525	#	2525	#
2526	$code.=<<___;	2526	$code.=<<___;
@@ -2602,7 +2602,7 @@ __aesni_set_encrypt_key:
2602		2602
2603	.align 16	2603	.align 16
2604	.L14rounds:	2604	.L14rounds:
2605	movups 16($inp),%xmm2 # remaning half of *userKey	2605	movups 16($inp),%xmm2 # remaining half of *userKey
2606	mov \$13,$bits # 14 rounds for 256	2606	mov \$13,$bits # 14 rounds for 256
2607	lea 16(%rax),%rax	2607	lea 16(%rax),%rax
2608	$movkey %xmm0,($key) # round 0	2608	$movkey %xmm0,($key) # round 0
@@ -2862,7 +2862,7 @@ xts_se_handler:
2862	mov 56($disp),%r11 # disp->HandlerData	2862	mov 56($disp),%r11 # disp->HandlerData
2863		2863
2864	mov 0(%r11),%r10d # HandlerData[0]	2864	mov 0(%r11),%r10d # HandlerData[0]
2865	lea (%rsi,%r10),%r10 # prologue lable	2865	lea (%rsi,%r10),%r10 # prologue label
2866	cmp %r10,%rbx # context->Rip<prologue label	2866	cmp %r10,%rbx # context->Rip<prologue label
2867	jb .Lcommon_seh_tail	2867	jb .Lcommon_seh_tail
2868		2868


diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl index 41b90f0844..14dc2c02e7 100644 --- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
@@ -20,7 +20,7 @@
20	# - code was made position-independent;	20	# - code was made position-independent;
21	# - rounds were folded into a loop resulting in >5x size reduction	21	# - rounds were folded into a loop resulting in >5x size reduction
22	# from 12.5KB to 2.2KB;	22	# from 12.5KB to 2.2KB;
23	# - above was possibile thanks to mixcolumns() modification that	23	# - above was possible thanks to mixcolumns() modification that
24	# allowed to feed its output back to aesenc[last], this was	24	# allowed to feed its output back to aesenc[last], this was
25	# achieved at cost of two additional inter-registers moves;	25	# achieved at cost of two additional inter-registers moves;
26	# - some instruction reordering and interleaving;	26	# - some instruction reordering and interleaving;