spelling fixes; from paul tagliamonte

i removed the arithmetics -> arithmetic changes, as i felt they were not clearly correct ok tb
author: jmc <> 2022-12-26 07:18:53 +0000
committer: jmc <> 2022-12-26 07:18:53 +0000
commit: 2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch)
tree: 26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/rc4
parent: df59a12113ba6ec4c6faecd033d46176453f697e (diff)
download: openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2
openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip
2 files changed, 4 insertions, 4 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl
index 03f0cff467..f3c3e117bc 100644
--- a/src/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -123,7 +123,7 @@ if ($alt=0) {
        push    (@XX,shift(@XX))                        if ($i>=0);
  }
 } else {
-  # Using pinsrw here improves performane on Intel CPUs by 2-3%, but
+  # Using pinsrw here improves performance on Intel CPUs by 2-3%, but
  # brings down AMD by 7%...
  $RC4_loop_mmx = sub {
    my $i=shift;
@@ -144,7 +144,7 @@ if ($alt=0) {
        &movd   ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));
        # (*)   This is the key to Core2 and Westmere performance.
-        #       Whithout movz out-of-order execution logic confuses
+        #       Without movz out-of-order execution logic confuses
        #       itself and fails to reorder loads and stores. Problem
        #       appears to be fixed in Sandy Bridge...
  }
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
index 2135b38ef8..18a967e546 100755
--- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
+++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -50,7 +50,7 @@
 # As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
 # performance by >30% [unlike P4 32-bit case that is]. But this is
 # provided that loads are reordered even more aggressively! Both code
-# pathes, AMD64 and EM64T, reorder loads in essentially same manner
+# paths, AMD64 and EM64T, reorder loads in essentially same manner
 # as my IA-64 implementation. On Opteron this resulted in modest 5%
 # improvement [I had to test it], while final Intel P4 performance
 # achieves respectful 432MBps on 2.8GHz processor now. For reference.
@@ -81,7 +81,7 @@
 # The only code path that was not modified is P4-specific one. Non-P4
 # Intel code path optimization is heavily based on submission by Maxim
 # Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used
-# some of the ideas even in attempt to optmize the original RC4_INT
+# some of the ideas even in attempt to optimize the original RC4_INT
 # code path... Current performance in cycles per processed byte (less
 # is better) and improvement coefficients relative to previous
 # version of this module are:
author	jmc <>	2022-12-26 07:18:53 +0000
committer	jmc <>	2022-12-26 07:18:53 +0000
commit	2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch)
tree	26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/rc4
parent	df59a12113ba6ec4c6faecd033d46176453f697e (diff)
download	openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2 openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip

diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index 03f0cff467..f3c3e117bc 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -123,7 +123,7 @@ if ($alt=0) {
123	push (@XX,shift(@XX)) if ($i>=0);	123	push (@XX,shift(@XX)) if ($i>=0);
124	}	124	}
125	} else {	125	} else {
126	# Using pinsrw here improves performane on Intel CPUs by 2-3%, but	126	# Using pinsrw here improves performance on Intel CPUs by 2-3%, but
127	# brings down AMD by 7%...	127	# brings down AMD by 7%...
128	$RC4_loop_mmx = sub {	128	$RC4_loop_mmx = sub {
129	my $i=shift;	129	my $i=shift;
@@ -144,7 +144,7 @@ if ($alt=0) {
144	&movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));	144	&movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));
145		145
146	# (*) This is the key to Core2 and Westmere performance.	146	# (*) This is the key to Core2 and Westmere performance.
147	# Whithout movz out-of-order execution logic confuses	147	# Without movz out-of-order execution logic confuses
148	# itself and fails to reorder loads and stores. Problem	148	# itself and fails to reorder loads and stores. Problem
149	# appears to be fixed in Sandy Bridge...	149	# appears to be fixed in Sandy Bridge...
150	}	150	}


diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl index 2135b38ef8..18a967e546 100755 --- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl
@@ -50,7 +50,7 @@
50	# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T	50	# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
51	# performance by >30% [unlike P4 32-bit case that is]. But this is	51	# performance by >30% [unlike P4 32-bit case that is]. But this is
52	# provided that loads are reordered even more aggressively! Both code	52	# provided that loads are reordered even more aggressively! Both code
53	# pathes, AMD64 and EM64T, reorder loads in essentially same manner	53	# paths, AMD64 and EM64T, reorder loads in essentially same manner
54	# as my IA-64 implementation. On Opteron this resulted in modest 5%	54	# as my IA-64 implementation. On Opteron this resulted in modest 5%
55	# improvement [I had to test it], while final Intel P4 performance	55	# improvement [I had to test it], while final Intel P4 performance
56	# achieves respectful 432MBps on 2.8GHz processor now. For reference.	56	# achieves respectful 432MBps on 2.8GHz processor now. For reference.
@@ -81,7 +81,7 @@
81	# The only code path that was not modified is P4-specific one. Non-P4	81	# The only code path that was not modified is P4-specific one. Non-P4
82	# Intel code path optimization is heavily based on submission by Maxim	82	# Intel code path optimization is heavily based on submission by Maxim
83	# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used	83	# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used
84	# some of the ideas even in attempt to optmize the original RC4_INT	84	# some of the ideas even in attempt to optimize the original RC4_INT
85	# code path... Current performance in cycles per processed byte (less	85	# code path... Current performance in cycles per processed byte (less
86	# is better) and improvement coefficients relative to previous	86	# is better) and improvement coefficients relative to previous
87	# version of this module are:	87	# version of this module are: