diff options
| author | jmc <> | 2022-12-26 07:18:53 +0000 |
|---|---|---|
| committer | jmc <> | 2022-12-26 07:18:53 +0000 |
| commit | 2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120 (patch) | |
| tree | 26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/rc4 | |
| parent | df59a12113ba6ec4c6faecd033d46176453f697e (diff) | |
| download | openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.gz openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.tar.bz2 openbsd-2eb7e5ff6bb69760f9dd4a43e7e3520ebb930120.zip | |
spelling fixes; from paul tagliamonte
i removed the arithmetics -> arithmetic changes, as i felt they
were not clearly correct
ok tb
Diffstat (limited to 'src/lib/libcrypto/rc4')
| -rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-586.pl | 4 | ||||
| -rwxr-xr-x | src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index 03f0cff467..f3c3e117bc 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl | |||
| @@ -123,7 +123,7 @@ if ($alt=0) { | |||
| 123 | push (@XX,shift(@XX)) if ($i>=0); | 123 | push (@XX,shift(@XX)) if ($i>=0); |
| 124 | } | 124 | } |
| 125 | } else { | 125 | } else { |
| 126 | # Using pinsrw here improves performane on Intel CPUs by 2-3%, but | 126 | # Using pinsrw here improves performance on Intel CPUs by 2-3%, but |
| 127 | # brings down AMD by 7%... | 127 | # brings down AMD by 7%... |
| 128 | $RC4_loop_mmx = sub { | 128 | $RC4_loop_mmx = sub { |
| 129 | my $i=shift; | 129 | my $i=shift; |
| @@ -144,7 +144,7 @@ if ($alt=0) { | |||
| 144 | &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); | 144 | &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); |
| 145 | 145 | ||
| 146 | # (*) This is the key to Core2 and Westmere performance. | 146 | # (*) This is the key to Core2 and Westmere performance. |
| 147 | # Whithout movz out-of-order execution logic confuses | 147 | # Without movz out-of-order execution logic confuses |
| 148 | # itself and fails to reorder loads and stores. Problem | 148 | # itself and fails to reorder loads and stores. Problem |
| 149 | # appears to be fixed in Sandy Bridge... | 149 | # appears to be fixed in Sandy Bridge... |
| 150 | } | 150 | } |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl index 2135b38ef8..18a967e546 100755 --- a/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-x86_64.pl | |||
| @@ -50,7 +50,7 @@ | |||
| 50 | # As was shown by Zou Nanhai loop unrolling can improve Intel EM64T | 50 | # As was shown by Zou Nanhai loop unrolling can improve Intel EM64T |
| 51 | # performance by >30% [unlike P4 32-bit case that is]. But this is | 51 | # performance by >30% [unlike P4 32-bit case that is]. But this is |
| 52 | # provided that loads are reordered even more aggressively! Both code | 52 | # provided that loads are reordered even more aggressively! Both code |
| 53 | # pathes, AMD64 and EM64T, reorder loads in essentially same manner | 53 | # paths, AMD64 and EM64T, reorder loads in essentially same manner |
| 54 | # as my IA-64 implementation. On Opteron this resulted in modest 5% | 54 | # as my IA-64 implementation. On Opteron this resulted in modest 5% |
| 55 | # improvement [I had to test it], while final Intel P4 performance | 55 | # improvement [I had to test it], while final Intel P4 performance |
| 56 | # achieves respectful 432MBps on 2.8GHz processor now. For reference. | 56 | # achieves respectful 432MBps on 2.8GHz processor now. For reference. |
| @@ -81,7 +81,7 @@ | |||
| 81 | # The only code path that was not modified is P4-specific one. Non-P4 | 81 | # The only code path that was not modified is P4-specific one. Non-P4 |
| 82 | # Intel code path optimization is heavily based on submission by Maxim | 82 | # Intel code path optimization is heavily based on submission by Maxim |
| 83 | # Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used | 83 | # Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used |
| 84 | # some of the ideas even in attempt to optmize the original RC4_INT | 84 | # some of the ideas even in attempt to optimize the original RC4_INT |
| 85 | # code path... Current performance in cycles per processed byte (less | 85 | # code path... Current performance in cycles per processed byte (less |
| 86 | # is better) and improvement coefficients relative to previous | 86 | # is better) and improvement coefficients relative to previous |
| 87 | # version of this module are: | 87 | # version of this module are: |
