summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm
diff options
context:
space:
mode:
authorjmc <>2022-12-26 07:18:53 +0000
committerjmc <>2022-12-26 07:18:53 +0000
commit8144b51086b3c46594192ccbec62762e58d61200 (patch)
tree26f3d93398833b7449b8a97e9fe4af9904382dbf /src/lib/libcrypto/bn/asm
parent54da696f897367a85e20e97a53d29b18b44cf8b7 (diff)
downloadopenbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.gz
openbsd-8144b51086b3c46594192ccbec62762e58d61200.tar.bz2
openbsd-8144b51086b3c46594192ccbec62762e58d61200.zip
spelling fixes; from paul tagliamonte
i removed the arithmetics -> arithmetic changes, as i felt they were not clearly correct ok tb
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl8
-rw-r--r--src/lib/libcrypto/bn/asm/mips.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/modexp512-x86_64.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s2
-rw-r--r--src/lib/libcrypto/bn/asm/parisc-mont.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/ppc64-mont.pl2
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv9-mont.pl2
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/sparcv9a-mont.pl4
-rw-r--r--src/lib/libcrypto/bn/asm/x86-gf2m.pl2
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl4
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl8
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gf2m.pl2
13 files changed, 21 insertions, 21 deletions
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
index 57101a6bd7..37d79cc0c1 100644
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -28,17 +28,17 @@ sub mul_add_c
28 28
29 &mul("edx"); 29 &mul("edx");
30 &add($c0,"eax"); 30 &add($c0,"eax");
31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
32 &mov("eax",&wparam(0)) if $pos > 0; # load r[] 32 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
33 ### 33 ###
34 &adc($c1,"edx"); 34 &adc($c1,"edx");
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
37 ### 37 ###
38 &adc($c2,0); 38 &adc($c2,0);
39 # is pos > 1, it means it is the last loop 39 # is pos > 1, it means it is the last loop
40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
42 } 42 }
43 43
44sub sqr_add_c 44sub sqr_add_c
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl
index 215c9a7483..02d43e15b0 100644
--- a/src/lib/libcrypto/bn/asm/mips.pl
+++ b/src/lib/libcrypto/bn/asm/mips.pl
@@ -15,7 +15,7 @@
15# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. 15# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
16# 16#
17# The module is designed to work with either of the "new" MIPS ABI(5), 17# The module is designed to work with either of the "new" MIPS ABI(5),
18# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under 18# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under
19# IRIX 5.x not only because it doesn't support new ABIs but also 19# IRIX 5.x not only because it doesn't support new ABIs but also
20# because 5.x kernels put R4x00 CPU into 32-bit mode and all those 20# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
21# 64-bit instructions (daddu, dmultu, etc.) found below gonna only 21# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
diff --git a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
index 4317282835..2e71a7f03d 100644
--- a/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
+++ b/src/lib/libcrypto/bn/asm/modexp512-x86_64.pl
@@ -1307,7 +1307,7 @@ end_main_loop_a3b:
1307 movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp) 1307 movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
1308 call mont_reduce 1308 call mont_reduce
1309 1309
1310 # If result > m, subract m 1310 # If result > m, subtract m
1311 # load result into r15:r8 1311 # load result into r15:r8
1312 mov (+$pResult_offset)(%rsp), %rax 1312 mov (+$pResult_offset)(%rsp), %rax
1313 mov (+8*0)(%rax), %r8 1313 mov (+8*0)(%rax), %r8
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
index a99545754d..a91f3ea5af 100644
--- a/src/lib/libcrypto/bn/asm/pa-risc2W.s
+++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s
@@ -783,7 +783,7 @@ $00000012
783 COPY %r0,%r10 ; ret = 0 783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift 784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i; 785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent 786 SUBI 64,%r31,%r19 ; 64 - i; redundant
787 MTSAR %r19 ; (64 -i) to shift 787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) 788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift 789 MTSARCM %r31 ; i to shift
diff --git a/src/lib/libcrypto/bn/asm/parisc-mont.pl b/src/lib/libcrypto/bn/asm/parisc-mont.pl
index fcfdee1f1f..6da9574adf 100644
--- a/src/lib/libcrypto/bn/asm/parisc-mont.pl
+++ b/src/lib/libcrypto/bn/asm/parisc-mont.pl
@@ -116,7 +116,7 @@ $fp="%r3";
116$hi1="%r2"; 116$hi1="%r2";
117$hi0="%r1"; 117$hi0="%r1";
118 118
119$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s 119$xfer=$n0; # accommodates [-16..15] offset in fld[dw]s
120 120
121$fm0="%fr4"; $fti=$fm0; 121$fm0="%fr4"; $fti=$fm0;
122$fbi="%fr5L"; 122$fbi="%fr5L";
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
index 1249ce2299..34e38d8f6a 100644
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -34,7 +34,7 @@
34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5 34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36# 36#
37# Same bechmark with this assembler code: 37# Same benchmark with this assembler code:
38# 38#
39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2 39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1 40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
diff --git a/src/lib/libcrypto/bn/asm/ppc64-mont.pl b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
index a14e769ad0..a9291f4bf7 100644
--- a/src/lib/libcrypto/bn/asm/ppc64-mont.pl
+++ b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
@@ -919,7 +919,7 @@ $code.=<<___;
919 std $t3,-16($tp) ; tp[j-1] 919 std $t3,-16($tp) ; tp[j-1]
920 std $t5,-8($tp) ; tp[j] 920 std $t5,-8($tp) ; tp[j]
921 921
922 add $carry,$carry,$ovf ; comsume upmost overflow 922 add $carry,$carry,$ovf ; consume upmost overflow
923 add $t6,$t6,$carry ; can not overflow 923 add $t6,$t6,$carry ; can not overflow
924 srdi $carry,$t6,16 924 srdi $carry,$t6,16
925 add $t7,$t7,$carry 925 add $t7,$t7,$carry
diff --git a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
index b8fb1e8a25..fb44c01443 100644
--- a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
+++ b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
@@ -13,7 +13,7 @@
13# for undertaken effort are multiple. First of all, UltraSPARC is not 13# for undertaken effort are multiple. First of all, UltraSPARC is not
14# the whole SPARCv9 universe and other VIS-free implementations deserve 14# the whole SPARCv9 universe and other VIS-free implementations deserve
15# optimized code as much. Secondly, newly introduced UltraSPARC T1, 15# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, 16# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
17# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with 17# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18# several integrated RSA/DSA accelerator circuits accessible through 18# several integrated RSA/DSA accelerator circuits accessible through
19# kernel driver [only(*)], but having decent user-land software 19# kernel driver [only(*)], but having decent user-land software
diff --git a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
index a14205f2f0..4ebe15c5c0 100755
--- a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
+++ b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
@@ -51,7 +51,7 @@
51# 51#
52# Modulo-scheduled inner loops allow to interleave floating point and 52# Modulo-scheduled inner loops allow to interleave floating point and
53# integer instructions and minimize Read-After-Write penalties. This 53# integer instructions and minimize Read-After-Write penalties. This
54# results in *further* 20-50% perfromance improvement [depending on 54# results in *further* 20-50% performance improvement [depending on
55# key length, more for longer keys] on USI&II cores and 30-80% - on 55# key length, more for longer keys] on USI&II cores and 30-80% - on
56# USIII&IV. 56# USIII&IV.
57 57
@@ -867,7 +867,7 @@ ___
867$code =~ s/\`([^\`]*)\`/eval($1)/gem; 867$code =~ s/\`([^\`]*)\`/eval($1)/gem;
868 868
869# Below substitution makes it possible to compile without demanding 869# Below substitution makes it possible to compile without demanding
870# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I 870# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
871# dare to do this, because VIS capability is detected at run-time now 871# dare to do this, because VIS capability is detected at run-time now
872# and this routine is not called on CPU not capable to execute it. Do 872# and this routine is not called on CPU not capable to execute it. Do
873# note that fzeros is not the only VIS dependency! Another dependency 873# note that fzeros is not the only VIS dependency! Another dependency
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
index 97d9136260..9715b2158f 100644
--- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -142,7 +142,7 @@ $R="mm0";
142 &xor ($a4,$a2); # a2=a4^a2^a4 142 &xor ($a4,$a2); # a2=a4^a2^a4
143 &mov (&DWP(5*4,"esp"),$a1); # a1^a4 143 &mov (&DWP(5*4,"esp"),$a1); # a1^a4
144 &xor ($a4,$a1); # a1^a2^a4 144 &xor ($a4,$a1); # a1^a2^a4
145 &sar (@i[1],31); # broardcast 30th bit 145 &sar (@i[1],31); # broadcast 30th bit
146 &and ($lo,$b); 146 &and ($lo,$b);
147 &mov (&DWP(6*4,"esp"),$a2); # a2^a4 147 &mov (&DWP(6*4,"esp"),$a2); # a2^a4
148 &and (@i[1],$b); 148 &and (@i[1],$b);
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
index a0bdd5787e..e6c04739b1 100755
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86-mont.pl
@@ -69,7 +69,7 @@ $frame=32; # size of above frame rounded up to 16n
69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) 69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70 &neg ("edi"); 70 &neg ("edi");
71 71
72 # minimize cache contention by arraning 2K window between stack 72 # minimize cache contention by arranging 2K window between stack
73 # pointer and ap argument [np is also position sensitive vector, 73 # pointer and ap argument [np is also position sensitive vector,
74 # but it's assumed to be near ap, as it's allocated at ~same 74 # but it's assumed to be near ap, as it's allocated at ~same
75 # time]. 75 # time].
@@ -267,7 +267,7 @@ if (0) {
267 &xor ("eax","eax"); # signal "not fast enough [yet]" 267 &xor ("eax","eax"); # signal "not fast enough [yet]"
268 &jmp (&label("just_leave")); 268 &jmp (&label("just_leave"));
269 # While the below code provides competitive performance for 269 # While the below code provides competitive performance for
270 # all key lengthes on modern Intel cores, it's still more 270 # all key lengths on modern Intel cores, it's still more
271 # than 10% slower for 4096-bit key elsewhere:-( "Competitive" 271 # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272 # means compared to the original integer-only assembler. 272 # means compared to the original integer-only assembler.
273 # 512-bit RSA sign is better by ~40%, but that's about all 273 # 512-bit RSA sign is better by ~40%, but that's about all
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
index dc4ec97ff5..762412974a 100644
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ b/src/lib/libcrypto/bn/asm/x86/comba.pl
@@ -16,17 +16,17 @@ sub mul_add_c
16 16
17 &mul("edx"); 17 &mul("edx");
18 &add($c0,"eax"); 18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[] 20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ### 21 ###
22 &adc($c1,"edx"); 22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
25 ### 25 ###
26 &adc($c2,0); 26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop 27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
30 } 30 }
31 31
32sub sqr_add_c 32sub sqr_add_c
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
index 3ecb425dad..24dacb1e9c 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-gf2m.pl
@@ -59,7 +59,7 @@ _mul_1x1:
59 sar \$63,$i0 # broadcast 62nd bit 59 sar \$63,$i0 # broadcast 62nd bit
60 lea (,$a1,4),$a4 60 lea (,$a1,4),$a4
61 and $b,$a 61 and $b,$a
62 sar \$63,$i1 # boardcast 61st bit 62 sar \$63,$i1 # broadcast 61st bit
63 mov $a,$hi # $a is $lo 63 mov $a,$hi # $a is $lo
64 shl \$63,$lo 64 shl \$63,$lo
65 and $b,$i0 65 and $b,$i0