spelling fixes; from paul tagliamonte

i removed the arithmetics -> arithmetic changes, as i felt they were not clearly correct ok tb
author: jmc <> 2022-12-26 07:18:53 +0000
committer: jmc <> 2022-12-26 07:18:53 +0000
commit: 8144b51086b3c46594192ccbec62762e58d61200 (patch)
tree: 26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/bn/asm
parent: 54da696f897367a85e20e97a53d29b18b44cf8b7 (diff)
download: openbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.gz
openbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.bz2
openbsd-8144b51086b3c46594192ccbec62762e58d61200.zip
13 files changed, 21 insertions, 21 deletions
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
index 57101a6bd7..37d79cc0c1 100644
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -28,17 +28,17 @@ sub mul_add_c
        &mul("edx");
        &add($c0,"eax");
-         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # laod next a
+         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
         &mov("eax",&wparam(0)) if $pos > 0;                    # load r[]
         ###
        &adc($c1,"edx");
-         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # laod next b
+         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # load next b
-         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # laod next b
+         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # load next b
         ###
        &adc($c2,0);
         # is pos > 1, it means it is the last loop 
         &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;           # save r[];
-        &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # laod next a
+        &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # load next a
        }
 sub sqr_add_c
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl
index 215c9a7483..02d43e15b0 100644
--- a/src/lib/libcrypto/bn/asm/mips.pl
+++ b/src/lib/libcrypto/bn/asm/mips.pl
@@ -15,7 +15,7 @@
 # This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
 #
 # The module is designed to work with either of the "new" MIPS ABI(5),
-# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
+# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under
 # IRIX 5.x not only because it doesn't support new ABIs but also
 # because 5.x kernels put R4x00 CPU into 32-bit mode and all those
 # 64-bit instructions (daddu, dmultu, etc.) found below gonna only
diff --git a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
index 4317282835..2e71a7f03d 100644
--- a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
+++ b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
@@ -1307,7 +1307,7 @@ end_main_loop_a3b:
         movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
         call   mont_reduce
-        # If result > m, subract m
+        # If result > m, subtract m
        # load result into r15:r8
         mov    (+$pResult_offset)(%rsp), %rax
         mov    (+8*0)(%rax), %r8
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
index a99545754d..a91f3ea5af 100644
--- a/src/lib/libcrypto/bn/asm/pa-risc2W.s
+++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s
@@ -783,7 +783,7 @@ $00000012
    COPY    %r0,%r10                           ; ret = 0
    MTSARCM %r31                               ; i to shift
    DEPD,Z  %r3,%sar,64,%r3                    ; d <<= i;
-    SUBI    64,%r31,%r19                       ; 64 - i; redundent
+    SUBI    64,%r31,%r19                       ; 64 - i; redundant
    MTSAR   %r19                               ; (64 -i) to shift
    SHRPD   %r4,%r5,%sar,%r4                   ; l>> (64-i)
    MTSARCM %r31                               ; i to shift
diff --git a/src/lib/libcrypto/bn/asm/parisc-mont.pl b/src/lib/libcrypto/bn/asm/parisc-mont.pl
index fcfdee1f1f..6da9574adf 100644
--- a/src/lib/libcrypto/bn/asm/parisc-mont.pl
+++ b/src/lib/libcrypto/bn/asm/parisc-mont.pl
@@ -116,7 +116,7 @@ $fp="%r3";
 $hi1="%r2";
 $hi0="%r1";
-$xfer=$n0;      # accomodates [-16..15] offset in fld[dw]s
+$xfer=$n0;      # accommodates [-16..15] offset in fld[dw]s
 $fm0="%fr4";    $fti=$fm0;
 $fbi="%fr5L";
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
index 1249ce2299..34e38d8f6a 100644
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -34,7 +34,7 @@
 #dsa  512 bits   0.0087s   0.0106s    114.3     94.5
 #dsa 1024 bits   0.0256s   0.0313s     39.0     32.0    
 #
-#       Same bechmark with this assembler code:
+#       Same benchmark with this assembler code:
 #
 #rsa  512 bits   0.0056s   0.0005s    178.6   2049.2
 #rsa 1024 bits   0.0283s   0.0015s     35.3    674.1
diff --git a/src/lib/libcrypto/bn/asm/ppc64-mont.pl b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
index a14e769ad0..a9291f4bf7 100644
--- a/src/lib/libcrypto/bn/asm/ppc64-mont.pl
+++ b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
@@ -919,7 +919,7 @@ $code.=<<___;
        std     $t3,-16($tp)            ; tp[j-1]
        std     $t5,-8($tp)             ; tp[j]
-        add     $carry,$carry,$ovf      ; comsume upmost overflow
+        add     $carry,$carry,$ovf      ; consume upmost overflow
        add     $t6,$t6,$carry          ; can not overflow
        srdi    $carry,$t6,16
        add     $t7,$t7,$carry
diff --git a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
index b8fb1e8a25..fb44c01443 100644
--- a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
+++ b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
@@ -13,7 +13,7 @@
 # for undertaken effort are multiple. First of all, UltraSPARC is not
 # the whole SPARCv9 universe and other VIS-free implementations deserve
 # optimized code as much. Secondly, newly introduced UltraSPARC T1,
-# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
+# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
 # such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
 # several integrated RSA/DSA accelerator circuits accessible through
 # kernel driver [only(*)], but having decent user-land software
diff --git a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
index a14205f2f0..4ebe15c5c0 100755
--- a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
+++ b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
@@ -51,7 +51,7 @@
 #
 # Modulo-scheduled inner loops allow to interleave floating point and
 # integer instructions and minimize Read-After-Write penalties. This
-# results in *further* 20-50% perfromance improvement [depending on
+# results in *further* 20-50% performance improvement [depending on
 # key length, more for longer keys] on USI&II cores and 30-80% - on
 # USIII&IV.
@@ -867,7 +867,7 @@ ___
 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
 # Below substitution makes it possible to compile without demanding
-# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
+# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
 # dare to do this, because VIS capability is detected at run-time now
 # and this routine is not called on CPU not capable to execute it. Do
 # note that fzeros is not the only VIS dependency! Another dependency
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
index 97d9136260..9715b2158f 100644
--- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -142,7 +142,7 @@ $R="mm0";
         &xor   ($a4,$a2);              # a2=a4^a2^a4
         &mov   (&DWP(5*4,"esp"),$a1);  # a1^a4
         &xor   ($a4,$a1);              # a1^a2^a4
-        &sar    (@i[1],31);             # broardcast 30th bit
+        &sar    (@i[1],31);             # broadcast 30th bit
        &and    ($lo,$b);
         &mov   (&DWP(6*4,"esp"),$a2);  # a2^a4
        &and    (@i[1],$b);
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
index a0bdd5787e..e6c04739b1 100755
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86-mont.pl
@@ -69,7 +69,7 @@ $frame=32;				# size of above frame rounded up to 16n
        &lea    ("esp",&DWP(-$frame,"esp","edi",4));    # alloca($frame+4*(num+2))
        &neg    ("edi");
-        # minimize cache contention by arraning 2K window between stack
+        # minimize cache contention by arranging 2K window between stack
        # pointer and ap argument [np is also position sensitive vector,
        # but it's assumed to be near ap, as it's allocated at ~same
        # time].
@@ -267,7 +267,7 @@ if (0) {
        &xor    ("eax","eax");  # signal "not fast enough [yet]"
        &jmp    (&label("just_leave"));
        # While the below code provides competitive performance for
-        # all key lengthes on modern Intel cores, it's still more
+        # all key lengths on modern Intel cores, it's still more
        # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
        # means compared to the original integer-only assembler.
        # 512-bit RSA sign is better by ~40%, but that's about all
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
index dc4ec97ff5..762412974a 100644
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ b/src/lib/libcrypto/bn/asm/x86/comba.pl
@@ -16,17 +16,17 @@ sub mul_add_c
        &mul("edx");
        &add($c0,"eax");
-         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # laod next a
+         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
         &mov("eax",&wparam(0)) if $pos > 0;                    # load r[]
         ###
        &adc($c1,"edx");
-         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # laod next b
+         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # load next b
-         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # laod next b
+         &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # load next b
         ###
        &adc($c2,0);
         # is pos > 1, it means it is the last loop 
         &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;           # save r[];
-        &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # laod next a
+        &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # load next a
        }
 sub sqr_add_c
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
index 3ecb425dad..24dacb1e9c 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -59,7 +59,7 @@ _mul_1x1:
        sar     \$63,$i0                # broadcast 62nd bit
        lea     (,$a1,4),$a4
        and     $b,$a
-        sar     \$63,$i1                # boardcast 61st bit
+        sar     \$63,$i1                # broadcast 61st bit
        mov     $a,$hi                  # $a is $lo
        shl     \$63,$lo
        and     $b,$i0
author	jmc <>	2022-12-26 07:18:53 +0000
committer	jmc <>	2022-12-26 07:18:53 +0000
commit	8144b51086b3c46594192ccbec62762e58d61200 (patch)
tree	26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/bn/asm
parent	54da696f897367a85e20e97a53d29b18b44cf8b7 (diff)
download	openbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.gz openbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.bz2 openbsd-8144b51086b3c46594192ccbec62762e58d61200.zip

diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl index 57101a6bd7..37d79cc0c1 100644 --- a/src/lib/libcrypto/bn/asm/co-586.pl +++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -28,17 +28,17 @@ sub mul_add_c
28		28
29	&mul("edx");	29	&mul("edx");
30	&add($c0,"eax");	30	&add($c0,"eax");
31	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a	31	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
32	&mov("eax",&wparam(0)) if $pos > 0; # load r[]	32	&mov("eax",&wparam(0)) if $pos > 0; # load r[]
33	###	33	###
34	&adc($c1,"edx");	34	&adc($c1,"edx");
35	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b	35	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
36	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b	36	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
37	###	37	###
38	&adc($c2,0);	38	&adc($c2,0);
39	# is pos > 1, it means it is the last loop	39	# is pos > 1, it means it is the last loop
40	&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];	40	&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
41	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a	41	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
42	}	42	}
43		43
44	sub sqr_add_c	44	sub sqr_add_c


diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl index 215c9a7483..02d43e15b0 100644 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ b/src/lib/libcrypto/bn/asm/mips.pl
@@ -15,7 +15,7 @@
15	# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.	15	# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
16	#	16	#
17	# The module is designed to work with either of the "new" MIPS ABI(5),	17	# The module is designed to work with either of the "new" MIPS ABI(5),
18	# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under	18	# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under
19	# IRIX 5.x not only because it doesn't support new ABIs but also	19	# IRIX 5.x not only because it doesn't support new ABIs but also
20	# because 5.x kernels put R4x00 CPU into 32-bit mode and all those	20	# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
21	# 64-bit instructions (daddu, dmultu, etc.) found below gonna only	21	# 64-bit instructions (daddu, dmultu, etc.) found below gonna only


diff --git a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl index 4317282835..2e71a7f03d 100644 --- a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl +++ b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
@@ -1307,7 +1307,7 @@ end_main_loop_a3b:
1307	movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)	1307	movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
1308	call mont_reduce	1308	call mont_reduce
1309		1309
1310	# If result > m, subract m	1310	# If result > m, subtract m
1311	# load result into r15:r8	1311	# load result into r15:r8
1312	mov (+$pResult_offset)(%rsp), %rax	1312	mov (+$pResult_offset)(%rsp), %rax
1313	mov (+8*0)(%rax), %r8	1313	mov (+8*0)(%rax), %r8


diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s index a99545754d..a91f3ea5af 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2W.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s
@@ -783,7 +783,7 @@ $00000012
783	COPY %r0,%r10 ; ret = 0	783	COPY %r0,%r10 ; ret = 0
784	MTSARCM %r31 ; i to shift	784	MTSARCM %r31 ; i to shift
785	DEPD,Z %r3,%sar,64,%r3 ; d <<= i;	785	DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786	SUBI 64,%r31,%r19 ; 64 - i; redundent	786	SUBI 64,%r31,%r19 ; 64 - i; redundant
787	MTSAR %r19 ; (64 -i) to shift	787	MTSAR %r19 ; (64 -i) to shift
788	SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)	788	SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789	MTSARCM %r31 ; i to shift	789	MTSARCM %r31 ; i to shift


diff --git a/src/lib/libcrypto/bn/asm/parisc-mont.pl b/src/lib/libcrypto/bn/asm/parisc-mont.pl index fcfdee1f1f..6da9574adf 100644 --- a/src/lib/libcrypto/bn/asm/parisc-mont.pl +++ b/src/lib/libcrypto/bn/asm/parisc-mont.pl
@@ -116,7 +116,7 @@ $fp="%r3";
116	$hi1="%r2";	116	$hi1="%r2";
117	$hi0="%r1";	117	$hi0="%r1";
118		118
119	$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s	119	$xfer=$n0; # accommodates [-16..15] offset in fld[dw]s
120		120
121	$fm0="%fr4"; $fti=$fm0;	121	$fm0="%fr4"; $fti=$fm0;
122	$fbi="%fr5L";	122	$fbi="%fr5L";


diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl index 1249ce2299..34e38d8f6a 100644 --- a/src/lib/libcrypto/bn/asm/ppc.pl +++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -34,7 +34,7 @@
34	#dsa 512 bits 0.0087s 0.0106s 114.3 94.5	34	#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35	#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0	35	#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36	#	36	#
37	# Same bechmark with this assembler code:	37	# Same benchmark with this assembler code:
38	#	38	#
39	#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2	39	#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40	#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1	40	#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1


diff --git a/src/lib/libcrypto/bn/asm/ppc64-mont.pl b/src/lib/libcrypto/bn/asm/ppc64-mont.pl index a14e769ad0..a9291f4bf7 100644 --- a/src/lib/libcrypto/bn/asm/ppc64-mont.pl +++ b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
@@ -919,7 +919,7 @@ $code.=<<___;
919	std $t3,-16($tp) ; tp[j-1]	919	std $t3,-16($tp) ; tp[j-1]
920	std $t5,-8($tp) ; tp[j]	920	std $t5,-8($tp) ; tp[j]
921		921
922	add $carry,$carry,$ovf ; comsume upmost overflow	922	add $carry,$carry,$ovf ; consume upmost overflow
923	add $t6,$t6,$carry ; can not overflow	923	add $t6,$t6,$carry ; can not overflow
924	srdi $carry,$t6,16	924	srdi $carry,$t6,16
925	add $t7,$t7,$carry	925	add $t7,$t7,$carry


diff --git a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl index b8fb1e8a25..fb44c01443 100644 --- a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl +++ b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
@@ -13,7 +13,7 @@
13	# for undertaken effort are multiple. First of all, UltraSPARC is not	13	# for undertaken effort are multiple. First of all, UltraSPARC is not
14	# the whole SPARCv9 universe and other VIS-free implementations deserve	14	# the whole SPARCv9 universe and other VIS-free implementations deserve
15	# optimized code as much. Secondly, newly introduced UltraSPARC T1,	15	# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16	# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,	16	# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
17	# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with	17	# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18	# several integrated RSA/DSA accelerator circuits accessible through	18	# several integrated RSA/DSA accelerator circuits accessible through
19	# kernel driver [only(*)], but having decent user-land software	19	# kernel driver [only(*)], but having decent user-land software


diff --git a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl index a14205f2f0..4ebe15c5c0 100755 --- a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl +++ b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
@@ -51,7 +51,7 @@
51	#	51	#
52	# Modulo-scheduled inner loops allow to interleave floating point and	52	# Modulo-scheduled inner loops allow to interleave floating point and
53	# integer instructions and minimize Read-After-Write penalties. This	53	# integer instructions and minimize Read-After-Write penalties. This
54	# results in further 20-50% perfromance improvement [depending on	54	# results in further 20-50% performance improvement [depending on
55	# key length, more for longer keys] on USI&II cores and 30-80% - on	55	# key length, more for longer keys] on USI&II cores and 30-80% - on
56	# USIII&IV.	56	# USIII&IV.
57		57
@@ -867,7 +867,7 @@ ___
867	$code =~ s/\`([^\`]*)\`/eval($1)/gem;	867	$code =~ s/\`([^\`]*)\`/eval($1)/gem;
868		868
869	# Below substitution makes it possible to compile without demanding	869	# Below substitution makes it possible to compile without demanding
870	# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I	870	# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
871	# dare to do this, because VIS capability is detected at run-time now	871	# dare to do this, because VIS capability is detected at run-time now
872	# and this routine is not called on CPU not capable to execute it. Do	872	# and this routine is not called on CPU not capable to execute it. Do
873	# note that fzeros is not the only VIS dependency! Another dependency	873	# note that fzeros is not the only VIS dependency! Another dependency


diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 97d9136260..9715b2158f 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -142,7 +142,7 @@ $R="mm0";
142	&xor ($a4,$a2); # a2=a4^a2^a4	142	&xor ($a4,$a2); # a2=a4^a2^a4
143	&mov (&DWP(5*4,"esp"),$a1); # a1^a4	143	&mov (&DWP(5*4,"esp"),$a1); # a1^a4
144	&xor ($a4,$a1); # a1^a2^a4	144	&xor ($a4,$a1); # a1^a2^a4
145	&sar (@i[1],31); # broardcast 30th bit	145	&sar (@i[1],31); # broadcast 30th bit
146	&and ($lo,$b);	146	&and ($lo,$b);
147	&mov (&DWP(6*4,"esp"),$a2); # a2^a4	147	&mov (&DWP(6*4,"esp"),$a2); # a2^a4
148	&and (@i[1],$b);	148	&and (@i[1],$b);


diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index a0bdd5787e..e6c04739b1 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl
@@ -69,7 +69,7 @@ $frame=32; # size of above frame rounded up to 16n
69	&lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))	69	&lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70	&neg ("edi");	70	&neg ("edi");
71		71
72	# minimize cache contention by arraning 2K window between stack	72	# minimize cache contention by arranging 2K window between stack
73	# pointer and ap argument [np is also position sensitive vector,	73	# pointer and ap argument [np is also position sensitive vector,
74	# but it's assumed to be near ap, as it's allocated at ~same	74	# but it's assumed to be near ap, as it's allocated at ~same
75	# time].	75	# time].
@@ -267,7 +267,7 @@ if (0) {
267	&xor ("eax","eax"); # signal "not fast enough [yet]"	267	&xor ("eax","eax"); # signal "not fast enough [yet]"
268	&jmp (&label("just_leave"));	268	&jmp (&label("just_leave"));
269	# While the below code provides competitive performance for	269	# While the below code provides competitive performance for
270	# all key lengthes on modern Intel cores, it's still more	270	# all key lengths on modern Intel cores, it's still more
271	# than 10% slower for 4096-bit key elsewhere:-( "Competitive"	271	# than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272	# means compared to the original integer-only assembler.	272	# means compared to the original integer-only assembler.
273	# 512-bit RSA sign is better by ~40%, but that's about all	273	# 512-bit RSA sign is better by ~40%, but that's about all


diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl index dc4ec97ff5..762412974a 100644 --- a/src/lib/libcrypto/bn/asm/x86/comba.pl +++ b/src/lib/libcrypto/bn/asm/x86/comba.pl
@@ -16,17 +16,17 @@ sub mul_add_c
16		16
17	&mul("edx");	17	&mul("edx");
18	&add($c0,"eax");	18	&add($c0,"eax");
19	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a	19	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
20	&mov("eax",&wparam(0)) if $pos > 0; # load r[]	20	&mov("eax",&wparam(0)) if $pos > 0; # load r[]
21	###	21	###
22	&adc($c1,"edx");	22	&adc($c1,"edx");
23	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b	23	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
24	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b	24	&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
25	###	25	###
26	&adc($c2,0);	26	&adc($c2,0);
27	# is pos > 1, it means it is the last loop	27	# is pos > 1, it means it is the last loop
28	&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];	28	&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a	29	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
30	}	30	}
31		31
32	sub sqr_add_c	32	sub sqr_add_c


diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl index 3ecb425dad..24dacb1e9c 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -59,7 +59,7 @@ _mul_1x1:
59	sar \$63,$i0 # broadcast 62nd bit	59	sar \$63,$i0 # broadcast 62nd bit
60	lea (,$a1,4),$a4	60	lea (,$a1,4),$a4
61	and $b,$a	61	and $b,$a
62	sar \$63,$i1 # boardcast 61st bit	62	sar \$63,$i1 # broadcast 61st bit
63	mov $a,$hi # $a is $lo	63	mov $a,$hi # $a is $lo
64	shl \$63,$lo	64	shl \$63,$lo
65	and $b,$i0	65	and $b,$i0