summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm
diff options
context:
space:
mode:
authordjm <>2011-11-03 02:32:23 +0000
committerdjm <>2011-11-03 02:32:23 +0000
commit113f799ec7d1728f0a5d7ab5b0e3b42e3de56407 (patch)
tree26d712b25a8fa580b8f2dfc6df470ba5ffea9eb7 /src/lib/libcrypto/bn/asm
parent829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2 (diff)
downloadopenbsd-113f799ec7d1728f0a5d7ab5b0e3b42e3de56407.tar.gz
openbsd-113f799ec7d1728f0a5d7ab5b0e3b42e3de56407.tar.bz2
openbsd-113f799ec7d1728f0a5d7ab5b0e3b42e3de56407.zip
import OpenSSL 1.0.0e
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
-rw-r--r--src/lib/libcrypto/bn/asm/alpha-mont.pl36
-rw-r--r--src/lib/libcrypto/bn/asm/s390x-mont.pl4
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/s390x.S86
3 files changed, 65 insertions, 61 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl
index f7e0ca1646..03596e2014 100644
--- a/src/lib/libcrypto/bn/asm/alpha-mont.pl
+++ b/src/lib/libcrypto/bn/asm/alpha-mont.pl
@@ -41,8 +41,12 @@ $j="s4";
41$m1="s5"; 41$m1="s5";
42 42
43$code=<<___; 43$code=<<___;
44#ifdef __linux__
45#include <asm/regdef.h>
46#else
44#include <asm.h> 47#include <asm.h>
45#include <regdef.h> 48#include <regdef.h>
49#endif
46 50
47.text 51.text
48 52
@@ -76,7 +80,7 @@ bn_mul_mont:
76 ldq $aj,8($ap) 80 ldq $aj,8($ap)
77 subq sp,AT,sp 81 subq sp,AT,sp
78 ldq $bi,0($bp) # bp[0] 82 ldq $bi,0($bp) # bp[0]
79 mov -4096,AT 83 lda AT,-4096(zero) # mov -4096,AT
80 ldq $n0,0($n0) 84 ldq $n0,0($n0)
81 and sp,AT,sp 85 and sp,AT,sp
82 86
@@ -106,9 +110,9 @@ bn_mul_mont:
106.align 4 110.align 4
107.L1st: 111.L1st:
108 .set noreorder 112 .set noreorder
109 ldq $aj,($aj) 113 ldq $aj,0($aj)
110 addl $j,1,$j 114 addl $j,1,$j
111 ldq $nj,($nj) 115 ldq $nj,0($nj)
112 lda $tp,8($tp) 116 lda $tp,8($tp)
113 117
114 addq $alo,$hi0,$lo0 118 addq $alo,$hi0,$lo0
@@ -159,12 +163,12 @@ bn_mul_mont:
159.align 4 163.align 4
160.Louter: 164.Louter:
161 s8addq $i,$bp,$bi 165 s8addq $i,$bp,$bi
162 ldq $hi0,($ap) 166 ldq $hi0,0($ap)
163 ldq $aj,8($ap) 167 ldq $aj,8($ap)
164 ldq $bi,($bi) 168 ldq $bi,0($bi)
165 ldq $hi1,($np) 169 ldq $hi1,0($np)
166 ldq $nj,8($np) 170 ldq $nj,8($np)
167 ldq $tj,(sp) 171 ldq $tj,0(sp)
168 172
169 mulq $hi0,$bi,$lo0 173 mulq $hi0,$bi,$lo0
170 umulh $hi0,$bi,$hi0 174 umulh $hi0,$bi,$hi0
@@ -195,10 +199,10 @@ bn_mul_mont:
195 .set noreorder 199 .set noreorder
196 ldq $tj,8($tp) #L0 200 ldq $tj,8($tp) #L0
197 nop #U1 201 nop #U1
198 ldq $aj,($aj) #L1 202 ldq $aj,0($aj) #L1
199 s8addq $j,$np,$nj #U0 203 s8addq $j,$np,$nj #U0
200 204
201 ldq $nj,($nj) #L0 205 ldq $nj,0($nj) #L0
202 nop #U1 206 nop #U1
203 addq $alo,$hi0,$lo0 #L1 207 addq $alo,$hi0,$lo0 #L1
204 lda $tp,8($tp) 208 lda $tp,8($tp)
@@ -247,7 +251,7 @@ bn_mul_mont:
247 addq $hi1,v0,$hi1 251 addq $hi1,v0,$hi1
248 252
249 addq $hi1,$hi0,$lo1 253 addq $hi1,$hi0,$lo1
250 stq $j,($tp) 254 stq $j,0($tp)
251 cmpult $lo1,$hi0,$hi1 255 cmpult $lo1,$hi0,$hi1
252 addq $lo1,$tj,$lo1 256 addq $lo1,$tj,$lo1
253 cmpult $lo1,$tj,AT 257 cmpult $lo1,$tj,AT
@@ -265,8 +269,8 @@ bn_mul_mont:
265 mov 0,$hi0 # clear borrow bit 269 mov 0,$hi0 # clear borrow bit
266 270
267.align 4 271.align 4
268.Lsub: ldq $lo0,($tp) 272.Lsub: ldq $lo0,0($tp)
269 ldq $lo1,($np) 273 ldq $lo1,0($np)
270 lda $tp,8($tp) 274 lda $tp,8($tp)
271 lda $np,8($np) 275 lda $np,8($np)
272 subq $lo0,$lo1,$lo1 # tp[i]-np[i] 276 subq $lo0,$lo1,$lo1 # tp[i]-np[i]
@@ -274,7 +278,7 @@ bn_mul_mont:
274 subq $lo1,$hi0,$lo0 278 subq $lo1,$hi0,$lo0
275 cmpult $lo1,$lo0,$hi0 279 cmpult $lo1,$lo0,$hi0
276 or $hi0,AT,$hi0 280 or $hi0,AT,$hi0
277 stq $lo0,($rp) 281 stq $lo0,0($rp)
278 cmpult $tp,$tj,v0 282 cmpult $tp,$tj,v0
279 lda $rp,8($rp) 283 lda $rp,8($rp)
280 bne v0,.Lsub 284 bne v0,.Lsub
@@ -288,7 +292,7 @@ bn_mul_mont:
288 bis $bp,$ap,$ap # ap=borrow?tp:rp 292 bis $bp,$ap,$ap # ap=borrow?tp:rp
289 293
290.align 4 294.align 4
291.Lcopy: ldq $aj,($ap) # copy or in-place refresh 295.Lcopy: ldq $aj,0($ap) # copy or in-place refresh
292 lda $tp,8($tp) 296 lda $tp,8($tp)
293 lda $rp,8($rp) 297 lda $rp,8($rp)
294 lda $ap,8($ap) 298 lda $ap,8($ap)
@@ -309,8 +313,8 @@ bn_mul_mont:
309 lda sp,48(sp) 313 lda sp,48(sp)
310 ret (ra) 314 ret (ra)
311.end bn_mul_mont 315.end bn_mul_mont
312.rdata 316.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
313.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>" 317.align 2
314___ 318___
315 319
316print $code; 320print $code;
diff --git a/src/lib/libcrypto/bn/asm/s390x-mont.pl b/src/lib/libcrypto/bn/asm/s390x-mont.pl
index d23251033b..f61246f5b6 100644
--- a/src/lib/libcrypto/bn/asm/s390x-mont.pl
+++ b/src/lib/libcrypto/bn/asm/s390x-mont.pl
@@ -69,8 +69,8 @@ bn_mul_mont:
69 cghi $num,16 # 69 cghi $num,16 #
70 lghi %r2,0 # 70 lghi %r2,0 #
71 blr %r14 # if($num<16) return 0; 71 blr %r14 # if($num<16) return 0;
72 cghi $num,128 # 72 cghi $num,96 #
73 bhr %r14 # if($num>128) return 0; 73 bhr %r14 # if($num>96) return 0;
74 74
75 stmg %r3,%r15,24($sp) 75 stmg %r3,%r15,24($sp)
76 76
diff --git a/src/lib/libcrypto/bn/asm/s390x.S b/src/lib/libcrypto/bn/asm/s390x.S
index 8f45f5d513..43fcb79bc0 100755
--- a/src/lib/libcrypto/bn/asm/s390x.S
+++ b/src/lib/libcrypto/bn/asm/s390x.S
@@ -1,4 +1,4 @@
1.ident "s390x.S, version 1.0" 1.ident "s390x.S, version 1.1"
2// ==================================================================== 2// ====================================================================
3// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 3// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4// project. 4// project.
@@ -24,67 +24,67 @@ bn_mul_add_words:
24 bler %r14 // if (len<=0) return 0; 24 bler %r14 // if (len<=0) return 0;
25 25
26 stmg %r6,%r10,48(%r15) 26 stmg %r6,%r10,48(%r15)
27 lghi %r10,3
27 lghi %r8,0 // carry = 0 28 lghi %r8,0 // carry = 0
28 srag %r10,%r4,2 // cnt=len/4 29 nr %r10,%r4 // len%4
29 jz .Loop1_madd 30 sra %r4,2 // cnt=len/4
31 jz .Loop1_madd // carry is incidentally cleared if branch taken
32 algr zero,zero // clear carry
30 33
31.Loop4_madd: 34.Loop4_madd:
32 lg %r7,0(%r2,%r3) // ap[i] 35 lg %r7,0(%r2,%r3) // ap[i]
33 mlgr %r6,%r5 // *=w 36 mlgr %r6,%r5 // *=w
34 algr %r7,%r8 // +=carry 37 alcgr %r7,%r8 // +=carry
35 alcgr %r6,zero 38 alcgr %r6,zero
36 alg %r7,0(%r2,%r1) // +=rp[i] 39 alg %r7,0(%r2,%r1) // +=rp[i]
37 alcgr %r6,zero
38 stg %r7,0(%r2,%r1) // rp[i]= 40 stg %r7,0(%r2,%r1) // rp[i]=
39 41
40 lg %r9,8(%r2,%r3) 42 lg %r9,8(%r2,%r3)
41 mlgr %r8,%r5 43 mlgr %r8,%r5
42 algr %r9,%r6 44 alcgr %r9,%r6
43 alcgr %r8,zero 45 alcgr %r8,zero
44 alg %r9,8(%r2,%r1) 46 alg %r9,8(%r2,%r1)
45 alcgr %r8,zero
46 stg %r9,8(%r2,%r1) 47 stg %r9,8(%r2,%r1)
47 48
48 lg %r7,16(%r2,%r3) 49 lg %r7,16(%r2,%r3)
49 mlgr %r6,%r5 50 mlgr %r6,%r5
50 algr %r7,%r8 51 alcgr %r7,%r8
51 alcgr %r6,zero 52 alcgr %r6,zero
52 alg %r7,16(%r2,%r1) 53 alg %r7,16(%r2,%r1)
53 alcgr %r6,zero
54 stg %r7,16(%r2,%r1) 54 stg %r7,16(%r2,%r1)
55 55
56 lg %r9,24(%r2,%r3) 56 lg %r9,24(%r2,%r3)
57 mlgr %r8,%r5 57 mlgr %r8,%r5
58 algr %r9,%r6 58 alcgr %r9,%r6
59 alcgr %r8,zero 59 alcgr %r8,zero
60 alg %r9,24(%r2,%r1) 60 alg %r9,24(%r2,%r1)
61 alcgr %r8,zero
62 stg %r9,24(%r2,%r1) 61 stg %r9,24(%r2,%r1)
63 62
64 la %r2,32(%r2) // i+=4 63 la %r2,32(%r2) // i+=4
65 brct %r10,.Loop4_madd 64 brct %r4,.Loop4_madd
66 65
67 lghi %r10,3 66 la %r10,1(%r10) // see if len%4 is zero ...
68 nr %r4,%r10 // cnt=len%4 67 brct %r10,.Loop1_madd // without touching condition code:-)
69 jz .Lend_madd 68
69.Lend_madd:
70 alcgr %r8,zero // collect carry bit
71 lgr %r2,%r8
72 lmg %r6,%r10,48(%r15)
73 br %r14
70 74
71.Loop1_madd: 75.Loop1_madd:
72 lg %r7,0(%r2,%r3) // ap[i] 76 lg %r7,0(%r2,%r3) // ap[i]
73 mlgr %r6,%r5 // *=w 77 mlgr %r6,%r5 // *=w
74 algr %r7,%r8 // +=carry 78 alcgr %r7,%r8 // +=carry
75 alcgr %r6,zero 79 alcgr %r6,zero
76 alg %r7,0(%r2,%r1) // +=rp[i] 80 alg %r7,0(%r2,%r1) // +=rp[i]
77 alcgr %r6,zero
78 stg %r7,0(%r2,%r1) // rp[i]= 81 stg %r7,0(%r2,%r1) // rp[i]=
79 82
80 lgr %r8,%r6 83 lgr %r8,%r6
81 la %r2,8(%r2) // i++ 84 la %r2,8(%r2) // i++
82 brct %r4,.Loop1_madd 85 brct %r10,.Loop1_madd
83 86
84.Lend_madd: 87 j .Lend_madd
85 lgr %r2,%r8
86 lmg %r6,%r10,48(%r15)
87 br %r14
88.size bn_mul_add_words,.-bn_mul_add_words 88.size bn_mul_add_words,.-bn_mul_add_words
89 89
90// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); 90// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
@@ -99,57 +99,57 @@ bn_mul_words:
99 bler %r14 // if (len<=0) return 0; 99 bler %r14 // if (len<=0) return 0;
100 100
101 stmg %r6,%r10,48(%r15) 101 stmg %r6,%r10,48(%r15)
102 lghi %r10,3
102 lghi %r8,0 // carry = 0 103 lghi %r8,0 // carry = 0
103 srag %r10,%r4,2 // cnt=len/4 104 nr %r10,%r4 // len%4
104 jz .Loop1_mul 105 sra %r4,2 // cnt=len/4
106 jz .Loop1_mul // carry is incidentally cleared if branch taken
107 algr zero,zero // clear carry
105 108
106.Loop4_mul: 109.Loop4_mul:
107 lg %r7,0(%r2,%r3) // ap[i] 110 lg %r7,0(%r2,%r3) // ap[i]
108 mlgr %r6,%r5 // *=w 111 mlgr %r6,%r5 // *=w
109 algr %r7,%r8 // +=carry 112 alcgr %r7,%r8 // +=carry
110 alcgr %r6,zero
111 stg %r7,0(%r2,%r1) // rp[i]= 113 stg %r7,0(%r2,%r1) // rp[i]=
112 114
113 lg %r9,8(%r2,%r3) 115 lg %r9,8(%r2,%r3)
114 mlgr %r8,%r5 116 mlgr %r8,%r5
115 algr %r9,%r6 117 alcgr %r9,%r6
116 alcgr %r8,zero
117 stg %r9,8(%r2,%r1) 118 stg %r9,8(%r2,%r1)
118 119
119 lg %r7,16(%r2,%r3) 120 lg %r7,16(%r2,%r3)
120 mlgr %r6,%r5 121 mlgr %r6,%r5
121 algr %r7,%r8 122 alcgr %r7,%r8
122 alcgr %r6,zero
123 stg %r7,16(%r2,%r1) 123 stg %r7,16(%r2,%r1)
124 124
125 lg %r9,24(%r2,%r3) 125 lg %r9,24(%r2,%r3)
126 mlgr %r8,%r5 126 mlgr %r8,%r5
127 algr %r9,%r6 127 alcgr %r9,%r6
128 alcgr %r8,zero
129 stg %r9,24(%r2,%r1) 128 stg %r9,24(%r2,%r1)
130 129
131 la %r2,32(%r2) // i+=4 130 la %r2,32(%r2) // i+=4
132 brct %r10,.Loop4_mul 131 brct %r4,.Loop4_mul
133 132
134 lghi %r10,3 133 la %r10,1(%r10) // see if len%4 is zero ...
135 nr %r4,%r10 // cnt=len%4 134 brct %r10,.Loop1_mul // without touching condition code:-)
136 jz .Lend_mul 135
136.Lend_mul:
137 alcgr %r8,zero // collect carry bit
138 lgr %r2,%r8
139 lmg %r6,%r10,48(%r15)
140 br %r14
137 141
138.Loop1_mul: 142.Loop1_mul:
139 lg %r7,0(%r2,%r3) // ap[i] 143 lg %r7,0(%r2,%r3) // ap[i]
140 mlgr %r6,%r5 // *=w 144 mlgr %r6,%r5 // *=w
141 algr %r7,%r8 // +=carry 145 alcgr %r7,%r8 // +=carry
142 alcgr %r6,zero
143 stg %r7,0(%r2,%r1) // rp[i]= 146 stg %r7,0(%r2,%r1) // rp[i]=
144 147
145 lgr %r8,%r6 148 lgr %r8,%r6
146 la %r2,8(%r2) // i++ 149 la %r2,8(%r2) // i++
147 brct %r4,.Loop1_mul 150 brct %r10,.Loop1_mul
148 151
149.Lend_mul: 152 j .Lend_mul
150 lgr %r2,%r8
151 lmg %r6,%r10,48(%r15)
152 br %r14
153.size bn_mul_words,.-bn_mul_words 153.size bn_mul_words,.-bn_mul_words
154 154
155// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) 155// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)