diff options
author | djm <> | 2011-11-03 02:32:23 +0000 |
---|---|---|
committer | djm <> | 2011-11-03 02:32:23 +0000 |
commit | fa3384eda96e42a23f6d3208998246abda2535c1 (patch) | |
tree | d7ee8f9bcd8062ffa41dd1e250e04de24159ee80 /src/lib/libcrypto/bn/asm | |
parent | 154527e9cde3004ed29ea1316880670ec73dcafa (diff) | |
parent | 113f799ec7d1728f0a5d7ab5b0e3b42e3de56407 (diff) | |
download | openbsd-fa3384eda96e42a23f6d3208998246abda2535c1.tar.gz openbsd-fa3384eda96e42a23f6d3208998246abda2535c1.tar.bz2 openbsd-fa3384eda96e42a23f6d3208998246abda2535c1.zip |
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
-rw-r--r-- | src/lib/libcrypto/bn/asm/alpha-mont.pl | 36 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/s390x-mont.pl | 4 | ||||
-rwxr-xr-x | src/lib/libcrypto/bn/asm/s390x.S | 86 |
3 files changed, 65 insertions, 61 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl index f7e0ca1646..03596e2014 100644 --- a/src/lib/libcrypto/bn/asm/alpha-mont.pl +++ b/src/lib/libcrypto/bn/asm/alpha-mont.pl | |||
@@ -41,8 +41,12 @@ $j="s4"; | |||
41 | $m1="s5"; | 41 | $m1="s5"; |
42 | 42 | ||
43 | $code=<<___; | 43 | $code=<<___; |
44 | #ifdef __linux__ | ||
45 | #include <asm/regdef.h> | ||
46 | #else | ||
44 | #include <asm.h> | 47 | #include <asm.h> |
45 | #include <regdef.h> | 48 | #include <regdef.h> |
49 | #endif | ||
46 | 50 | ||
47 | .text | 51 | .text |
48 | 52 | ||
@@ -76,7 +80,7 @@ bn_mul_mont: | |||
76 | ldq $aj,8($ap) | 80 | ldq $aj,8($ap) |
77 | subq sp,AT,sp | 81 | subq sp,AT,sp |
78 | ldq $bi,0($bp) # bp[0] | 82 | ldq $bi,0($bp) # bp[0] |
79 | mov -4096,AT | 83 | lda AT,-4096(zero) # mov -4096,AT |
80 | ldq $n0,0($n0) | 84 | ldq $n0,0($n0) |
81 | and sp,AT,sp | 85 | and sp,AT,sp |
82 | 86 | ||
@@ -106,9 +110,9 @@ bn_mul_mont: | |||
106 | .align 4 | 110 | .align 4 |
107 | .L1st: | 111 | .L1st: |
108 | .set noreorder | 112 | .set noreorder |
109 | ldq $aj,($aj) | 113 | ldq $aj,0($aj) |
110 | addl $j,1,$j | 114 | addl $j,1,$j |
111 | ldq $nj,($nj) | 115 | ldq $nj,0($nj) |
112 | lda $tp,8($tp) | 116 | lda $tp,8($tp) |
113 | 117 | ||
114 | addq $alo,$hi0,$lo0 | 118 | addq $alo,$hi0,$lo0 |
@@ -159,12 +163,12 @@ bn_mul_mont: | |||
159 | .align 4 | 163 | .align 4 |
160 | .Louter: | 164 | .Louter: |
161 | s8addq $i,$bp,$bi | 165 | s8addq $i,$bp,$bi |
162 | ldq $hi0,($ap) | 166 | ldq $hi0,0($ap) |
163 | ldq $aj,8($ap) | 167 | ldq $aj,8($ap) |
164 | ldq $bi,($bi) | 168 | ldq $bi,0($bi) |
165 | ldq $hi1,($np) | 169 | ldq $hi1,0($np) |
166 | ldq $nj,8($np) | 170 | ldq $nj,8($np) |
167 | ldq $tj,(sp) | 171 | ldq $tj,0(sp) |
168 | 172 | ||
169 | mulq $hi0,$bi,$lo0 | 173 | mulq $hi0,$bi,$lo0 |
170 | umulh $hi0,$bi,$hi0 | 174 | umulh $hi0,$bi,$hi0 |
@@ -195,10 +199,10 @@ bn_mul_mont: | |||
195 | .set noreorder | 199 | .set noreorder |
196 | ldq $tj,8($tp) #L0 | 200 | ldq $tj,8($tp) #L0 |
197 | nop #U1 | 201 | nop #U1 |
198 | ldq $aj,($aj) #L1 | 202 | ldq $aj,0($aj) #L1 |
199 | s8addq $j,$np,$nj #U0 | 203 | s8addq $j,$np,$nj #U0 |
200 | 204 | ||
201 | ldq $nj,($nj) #L0 | 205 | ldq $nj,0($nj) #L0 |
202 | nop #U1 | 206 | nop #U1 |
203 | addq $alo,$hi0,$lo0 #L1 | 207 | addq $alo,$hi0,$lo0 #L1 |
204 | lda $tp,8($tp) | 208 | lda $tp,8($tp) |
@@ -247,7 +251,7 @@ bn_mul_mont: | |||
247 | addq $hi1,v0,$hi1 | 251 | addq $hi1,v0,$hi1 |
248 | 252 | ||
249 | addq $hi1,$hi0,$lo1 | 253 | addq $hi1,$hi0,$lo1 |
250 | stq $j,($tp) | 254 | stq $j,0($tp) |
251 | cmpult $lo1,$hi0,$hi1 | 255 | cmpult $lo1,$hi0,$hi1 |
252 | addq $lo1,$tj,$lo1 | 256 | addq $lo1,$tj,$lo1 |
253 | cmpult $lo1,$tj,AT | 257 | cmpult $lo1,$tj,AT |
@@ -265,8 +269,8 @@ bn_mul_mont: | |||
265 | mov 0,$hi0 # clear borrow bit | 269 | mov 0,$hi0 # clear borrow bit |
266 | 270 | ||
267 | .align 4 | 271 | .align 4 |
268 | .Lsub: ldq $lo0,($tp) | 272 | .Lsub: ldq $lo0,0($tp) |
269 | ldq $lo1,($np) | 273 | ldq $lo1,0($np) |
270 | lda $tp,8($tp) | 274 | lda $tp,8($tp) |
271 | lda $np,8($np) | 275 | lda $np,8($np) |
272 | subq $lo0,$lo1,$lo1 # tp[i]-np[i] | 276 | subq $lo0,$lo1,$lo1 # tp[i]-np[i] |
@@ -274,7 +278,7 @@ bn_mul_mont: | |||
274 | subq $lo1,$hi0,$lo0 | 278 | subq $lo1,$hi0,$lo0 |
275 | cmpult $lo1,$lo0,$hi0 | 279 | cmpult $lo1,$lo0,$hi0 |
276 | or $hi0,AT,$hi0 | 280 | or $hi0,AT,$hi0 |
277 | stq $lo0,($rp) | 281 | stq $lo0,0($rp) |
278 | cmpult $tp,$tj,v0 | 282 | cmpult $tp,$tj,v0 |
279 | lda $rp,8($rp) | 283 | lda $rp,8($rp) |
280 | bne v0,.Lsub | 284 | bne v0,.Lsub |
@@ -288,7 +292,7 @@ bn_mul_mont: | |||
288 | bis $bp,$ap,$ap # ap=borrow?tp:rp | 292 | bis $bp,$ap,$ap # ap=borrow?tp:rp |
289 | 293 | ||
290 | .align 4 | 294 | .align 4 |
291 | .Lcopy: ldq $aj,($ap) # copy or in-place refresh | 295 | .Lcopy: ldq $aj,0($ap) # copy or in-place refresh |
292 | lda $tp,8($tp) | 296 | lda $tp,8($tp) |
293 | lda $rp,8($rp) | 297 | lda $rp,8($rp) |
294 | lda $ap,8($ap) | 298 | lda $ap,8($ap) |
@@ -309,8 +313,8 @@ bn_mul_mont: | |||
309 | lda sp,48(sp) | 313 | lda sp,48(sp) |
310 | ret (ra) | 314 | ret (ra) |
311 | .end bn_mul_mont | 315 | .end bn_mul_mont |
312 | .rdata | 316 | .ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>" |
313 | .asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>" | 317 | .align 2 |
314 | ___ | 318 | ___ |
315 | 319 | ||
316 | print $code; | 320 | print $code; |
diff --git a/src/lib/libcrypto/bn/asm/s390x-mont.pl b/src/lib/libcrypto/bn/asm/s390x-mont.pl index d23251033b..f61246f5b6 100644 --- a/src/lib/libcrypto/bn/asm/s390x-mont.pl +++ b/src/lib/libcrypto/bn/asm/s390x-mont.pl | |||
@@ -69,8 +69,8 @@ bn_mul_mont: | |||
69 | cghi $num,16 # | 69 | cghi $num,16 # |
70 | lghi %r2,0 # | 70 | lghi %r2,0 # |
71 | blr %r14 # if($num<16) return 0; | 71 | blr %r14 # if($num<16) return 0; |
72 | cghi $num,128 # | 72 | cghi $num,96 # |
73 | bhr %r14 # if($num>128) return 0; | 73 | bhr %r14 # if($num>96) return 0; |
74 | 74 | ||
75 | stmg %r3,%r15,24($sp) | 75 | stmg %r3,%r15,24($sp) |
76 | 76 | ||
diff --git a/src/lib/libcrypto/bn/asm/s390x.S b/src/lib/libcrypto/bn/asm/s390x.S index 8f45f5d513..43fcb79bc0 100755 --- a/src/lib/libcrypto/bn/asm/s390x.S +++ b/src/lib/libcrypto/bn/asm/s390x.S | |||
@@ -1,4 +1,4 @@ | |||
1 | .ident "s390x.S, version 1.0" | 1 | .ident "s390x.S, version 1.1" |
2 | // ==================================================================== | 2 | // ==================================================================== |
3 | // Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | 3 | // Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
4 | // project. | 4 | // project. |
@@ -24,67 +24,67 @@ bn_mul_add_words: | |||
24 | bler %r14 // if (len<=0) return 0; | 24 | bler %r14 // if (len<=0) return 0; |
25 | 25 | ||
26 | stmg %r6,%r10,48(%r15) | 26 | stmg %r6,%r10,48(%r15) |
27 | lghi %r10,3 | ||
27 | lghi %r8,0 // carry = 0 | 28 | lghi %r8,0 // carry = 0 |
28 | srag %r10,%r4,2 // cnt=len/4 | 29 | nr %r10,%r4 // len%4 |
29 | jz .Loop1_madd | 30 | sra %r4,2 // cnt=len/4 |
31 | jz .Loop1_madd // carry is incidentally cleared if branch taken | ||
32 | algr zero,zero // clear carry | ||
30 | 33 | ||
31 | .Loop4_madd: | 34 | .Loop4_madd: |
32 | lg %r7,0(%r2,%r3) // ap[i] | 35 | lg %r7,0(%r2,%r3) // ap[i] |
33 | mlgr %r6,%r5 // *=w | 36 | mlgr %r6,%r5 // *=w |
34 | algr %r7,%r8 // +=carry | 37 | alcgr %r7,%r8 // +=carry |
35 | alcgr %r6,zero | 38 | alcgr %r6,zero |
36 | alg %r7,0(%r2,%r1) // +=rp[i] | 39 | alg %r7,0(%r2,%r1) // +=rp[i] |
37 | alcgr %r6,zero | ||
38 | stg %r7,0(%r2,%r1) // rp[i]= | 40 | stg %r7,0(%r2,%r1) // rp[i]= |
39 | 41 | ||
40 | lg %r9,8(%r2,%r3) | 42 | lg %r9,8(%r2,%r3) |
41 | mlgr %r8,%r5 | 43 | mlgr %r8,%r5 |
42 | algr %r9,%r6 | 44 | alcgr %r9,%r6 |
43 | alcgr %r8,zero | 45 | alcgr %r8,zero |
44 | alg %r9,8(%r2,%r1) | 46 | alg %r9,8(%r2,%r1) |
45 | alcgr %r8,zero | ||
46 | stg %r9,8(%r2,%r1) | 47 | stg %r9,8(%r2,%r1) |
47 | 48 | ||
48 | lg %r7,16(%r2,%r3) | 49 | lg %r7,16(%r2,%r3) |
49 | mlgr %r6,%r5 | 50 | mlgr %r6,%r5 |
50 | algr %r7,%r8 | 51 | alcgr %r7,%r8 |
51 | alcgr %r6,zero | 52 | alcgr %r6,zero |
52 | alg %r7,16(%r2,%r1) | 53 | alg %r7,16(%r2,%r1) |
53 | alcgr %r6,zero | ||
54 | stg %r7,16(%r2,%r1) | 54 | stg %r7,16(%r2,%r1) |
55 | 55 | ||
56 | lg %r9,24(%r2,%r3) | 56 | lg %r9,24(%r2,%r3) |
57 | mlgr %r8,%r5 | 57 | mlgr %r8,%r5 |
58 | algr %r9,%r6 | 58 | alcgr %r9,%r6 |
59 | alcgr %r8,zero | 59 | alcgr %r8,zero |
60 | alg %r9,24(%r2,%r1) | 60 | alg %r9,24(%r2,%r1) |
61 | alcgr %r8,zero | ||
62 | stg %r9,24(%r2,%r1) | 61 | stg %r9,24(%r2,%r1) |
63 | 62 | ||
64 | la %r2,32(%r2) // i+=4 | 63 | la %r2,32(%r2) // i+=4 |
65 | brct %r10,.Loop4_madd | 64 | brct %r4,.Loop4_madd |
66 | 65 | ||
67 | lghi %r10,3 | 66 | la %r10,1(%r10) // see if len%4 is zero ... |
68 | nr %r4,%r10 // cnt=len%4 | 67 | brct %r10,.Loop1_madd // without touching condition code:-) |
69 | jz .Lend_madd | 68 | |
69 | .Lend_madd: | ||
70 | alcgr %r8,zero // collect carry bit | ||
71 | lgr %r2,%r8 | ||
72 | lmg %r6,%r10,48(%r15) | ||
73 | br %r14 | ||
70 | 74 | ||
71 | .Loop1_madd: | 75 | .Loop1_madd: |
72 | lg %r7,0(%r2,%r3) // ap[i] | 76 | lg %r7,0(%r2,%r3) // ap[i] |
73 | mlgr %r6,%r5 // *=w | 77 | mlgr %r6,%r5 // *=w |
74 | algr %r7,%r8 // +=carry | 78 | alcgr %r7,%r8 // +=carry |
75 | alcgr %r6,zero | 79 | alcgr %r6,zero |
76 | alg %r7,0(%r2,%r1) // +=rp[i] | 80 | alg %r7,0(%r2,%r1) // +=rp[i] |
77 | alcgr %r6,zero | ||
78 | stg %r7,0(%r2,%r1) // rp[i]= | 81 | stg %r7,0(%r2,%r1) // rp[i]= |
79 | 82 | ||
80 | lgr %r8,%r6 | 83 | lgr %r8,%r6 |
81 | la %r2,8(%r2) // i++ | 84 | la %r2,8(%r2) // i++ |
82 | brct %r4,.Loop1_madd | 85 | brct %r10,.Loop1_madd |
83 | 86 | ||
84 | .Lend_madd: | 87 | j .Lend_madd |
85 | lgr %r2,%r8 | ||
86 | lmg %r6,%r10,48(%r15) | ||
87 | br %r14 | ||
88 | .size bn_mul_add_words,.-bn_mul_add_words | 88 | .size bn_mul_add_words,.-bn_mul_add_words |
89 | 89 | ||
90 | // BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); | 90 | // BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); |
@@ -99,57 +99,57 @@ bn_mul_words: | |||
99 | bler %r14 // if (len<=0) return 0; | 99 | bler %r14 // if (len<=0) return 0; |
100 | 100 | ||
101 | stmg %r6,%r10,48(%r15) | 101 | stmg %r6,%r10,48(%r15) |
102 | lghi %r10,3 | ||
102 | lghi %r8,0 // carry = 0 | 103 | lghi %r8,0 // carry = 0 |
103 | srag %r10,%r4,2 // cnt=len/4 | 104 | nr %r10,%r4 // len%4 |
104 | jz .Loop1_mul | 105 | sra %r4,2 // cnt=len/4 |
106 | jz .Loop1_mul // carry is incidentally cleared if branch taken | ||
107 | algr zero,zero // clear carry | ||
105 | 108 | ||
106 | .Loop4_mul: | 109 | .Loop4_mul: |
107 | lg %r7,0(%r2,%r3) // ap[i] | 110 | lg %r7,0(%r2,%r3) // ap[i] |
108 | mlgr %r6,%r5 // *=w | 111 | mlgr %r6,%r5 // *=w |
109 | algr %r7,%r8 // +=carry | 112 | alcgr %r7,%r8 // +=carry |
110 | alcgr %r6,zero | ||
111 | stg %r7,0(%r2,%r1) // rp[i]= | 113 | stg %r7,0(%r2,%r1) // rp[i]= |
112 | 114 | ||
113 | lg %r9,8(%r2,%r3) | 115 | lg %r9,8(%r2,%r3) |
114 | mlgr %r8,%r5 | 116 | mlgr %r8,%r5 |
115 | algr %r9,%r6 | 117 | alcgr %r9,%r6 |
116 | alcgr %r8,zero | ||
117 | stg %r9,8(%r2,%r1) | 118 | stg %r9,8(%r2,%r1) |
118 | 119 | ||
119 | lg %r7,16(%r2,%r3) | 120 | lg %r7,16(%r2,%r3) |
120 | mlgr %r6,%r5 | 121 | mlgr %r6,%r5 |
121 | algr %r7,%r8 | 122 | alcgr %r7,%r8 |
122 | alcgr %r6,zero | ||
123 | stg %r7,16(%r2,%r1) | 123 | stg %r7,16(%r2,%r1) |
124 | 124 | ||
125 | lg %r9,24(%r2,%r3) | 125 | lg %r9,24(%r2,%r3) |
126 | mlgr %r8,%r5 | 126 | mlgr %r8,%r5 |
127 | algr %r9,%r6 | 127 | alcgr %r9,%r6 |
128 | alcgr %r8,zero | ||
129 | stg %r9,24(%r2,%r1) | 128 | stg %r9,24(%r2,%r1) |
130 | 129 | ||
131 | la %r2,32(%r2) // i+=4 | 130 | la %r2,32(%r2) // i+=4 |
132 | brct %r10,.Loop4_mul | 131 | brct %r4,.Loop4_mul |
133 | 132 | ||
134 | lghi %r10,3 | 133 | la %r10,1(%r10) // see if len%4 is zero ... |
135 | nr %r4,%r10 // cnt=len%4 | 134 | brct %r10,.Loop1_mul // without touching condition code:-) |
136 | jz .Lend_mul | 135 | |
136 | .Lend_mul: | ||
137 | alcgr %r8,zero // collect carry bit | ||
138 | lgr %r2,%r8 | ||
139 | lmg %r6,%r10,48(%r15) | ||
140 | br %r14 | ||
137 | 141 | ||
138 | .Loop1_mul: | 142 | .Loop1_mul: |
139 | lg %r7,0(%r2,%r3) // ap[i] | 143 | lg %r7,0(%r2,%r3) // ap[i] |
140 | mlgr %r6,%r5 // *=w | 144 | mlgr %r6,%r5 // *=w |
141 | algr %r7,%r8 // +=carry | 145 | alcgr %r7,%r8 // +=carry |
142 | alcgr %r6,zero | ||
143 | stg %r7,0(%r2,%r1) // rp[i]= | 146 | stg %r7,0(%r2,%r1) // rp[i]= |
144 | 147 | ||
145 | lgr %r8,%r6 | 148 | lgr %r8,%r6 |
146 | la %r2,8(%r2) // i++ | 149 | la %r2,8(%r2) // i++ |
147 | brct %r4,.Loop1_mul | 150 | brct %r10,.Loop1_mul |
148 | 151 | ||
149 | .Lend_mul: | 152 | j .Lend_mul |
150 | lgr %r2,%r8 | ||
151 | lmg %r6,%r10,48(%r15) | ||
152 | br %r14 | ||
153 | .size bn_mul_words,.-bn_mul_words | 153 | .size bn_mul_words,.-bn_mul_words |
154 | 154 | ||
155 | // void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) | 155 | // void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) |