summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/bn/asm/alpha-mont.pl317
-rw-r--r--src/lib/libcrypto/bn/asm/armv4-mont.pl200
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl675
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl286
-rw-r--r--src/lib/libcrypto/bn/asm/ia64.S1555
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s1618
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s1605
-rw-r--r--src/lib/libcrypto/bn/asm/ppc-mont.pl323
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl2078
-rw-r--r--src/lib/libcrypto/bn/asm/ppc64-mont.pl918
-rw-r--r--src/lib/libcrypto/bn/asm/s390x-mont.pl225
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/s390x.S678
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1547
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv9-mont.pl606
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/sparcv9a-mont.pl882
-rw-r--r--src/lib/libcrypto/bn/asm/via-mont.pl242
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl591
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c597
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont.pl214
-rw-r--r--src/lib/libcrypto/bn/bn.h855
-rw-r--r--src/lib/libcrypto/bn/bn_add.c313
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c860
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c365
-rw-r--r--src/lib/libcrypto/bn/bn_const.c402
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c454
-rw-r--r--src/lib/libcrypto/bn/bn_depr.c112
-rw-r--r--src/lib/libcrypto/bn/bn_div.c643
-rw-r--r--src/lib/libcrypto/bn/bn_err.c150
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c990
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c311
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c654
-rw-r--r--src/lib/libcrypto/bn/bn_gf2m.c1095
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c184
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h490
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c826
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c301
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c732
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c130
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c1169
-rw-r--r--src/lib/libcrypto/bn/bn_nist.c836
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c494
-rw-r--r--src/lib/libcrypto/bn/bn_prime.h327
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl119
-rw-r--r--src/lib/libcrypto/bn/bn_print.c338
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c305
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c234
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c220
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c294
-rw-r--r--src/lib/libcrypto/bn/bn_sqrt.c393
-rw-r--r--src/lib/libcrypto/bn/bn_word.c247
-rw-r--r--src/lib/libcrypto/bn/bn_x931p.c272
60 files changed, 0 insertions, 32426 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl
deleted file mode 100644
index 7a2cc3173b..0000000000
--- a/src/lib/libcrypto/bn/asm/alpha-mont.pl
+++ /dev/null
@@ -1,317 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# On 21264 RSA sign performance improves by 70/35/20/15 percent for
11# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
12# instructed to '-tune host' code with in-line assembler. Other
13# benchmarks improve by 15-20%. To anchor it to something else, the
14# code provides approximately the same performance per GHz as AMD64.
15# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
16# difference.
17
18# int bn_mul_mont(
19$rp="a0"; # BN_ULONG *rp,
20$ap="a1"; # const BN_ULONG *ap,
21$bp="a2"; # const BN_ULONG *bp,
22$np="a3"; # const BN_ULONG *np,
23$n0="a4"; # const BN_ULONG *n0,
24$num="a5"; # int num);
25
26$lo0="t0";
27$hi0="t1";
28$lo1="t2";
29$hi1="t3";
30$aj="t4";
31$bi="t5";
32$nj="t6";
33$tp="t7";
34$alo="t8";
35$ahi="t9";
36$nlo="t10";
37$nhi="t11";
38$tj="t12";
39$i="s3";
40$j="s4";
41$m1="s5";
42
43$code=<<___;
44#include <asm.h>
45#include <regdef.h>
46
47.text
48
49.set noat
50.set noreorder
51
52.globl bn_mul_mont
53.align 5
54.ent bn_mul_mont
55bn_mul_mont:
56 lda sp,-40(sp)
57 stq ra,0(sp)
58 stq s3,8(sp)
59 stq s4,16(sp)
60 stq s5,24(sp)
61 stq fp,32(sp)
62 mov sp,fp
63 .mask 0x0400f000,-40
64 .frame fp,40,ra
65 .prologue 0
66
67 .align 4
68 .set reorder
69 sextl $num,$num
70 mov 0,v0
71 cmplt $num,4,AT
72 bne AT,.Lexit
73
74 ldq $hi0,0($ap) # ap[0]
75 s8addq $num,16,AT
76 ldq $aj,8($ap)
77 subq sp,AT,sp
78 ldq $bi,0($bp) # bp[0]
79 mov -4096,AT
80 ldq $n0,0($n0)
81 and sp,AT,sp
82
83 mulq $hi0,$bi,$lo0
84 ldq $hi1,0($np) # np[0]
85 umulh $hi0,$bi,$hi0
86 ldq $nj,8($np)
87
88 mulq $lo0,$n0,$m1
89
90 mulq $hi1,$m1,$lo1
91 umulh $hi1,$m1,$hi1
92
93 addq $lo1,$lo0,$lo1
94 cmpult $lo1,$lo0,AT
95 addq $hi1,AT,$hi1
96
97 mulq $aj,$bi,$alo
98 mov 2,$j
99 umulh $aj,$bi,$ahi
100 mov sp,$tp
101
102 mulq $nj,$m1,$nlo
103 s8addq $j,$ap,$aj
104 umulh $nj,$m1,$nhi
105 s8addq $j,$np,$nj
106.align 4
107.L1st:
108 .set noreorder
109 ldq $aj,($aj)
110 addl $j,1,$j
111 ldq $nj,($nj)
112 lda $tp,8($tp)
113
114 addq $alo,$hi0,$lo0
115 mulq $aj,$bi,$alo
116 cmpult $lo0,$hi0,AT
117 addq $nlo,$hi1,$lo1
118
119 mulq $nj,$m1,$nlo
120 addq $ahi,AT,$hi0
121 cmpult $lo1,$hi1,v0
122 cmplt $j,$num,$tj
123
124 umulh $aj,$bi,$ahi
125 addq $nhi,v0,$hi1
126 addq $lo1,$lo0,$lo1
127 s8addq $j,$ap,$aj
128
129 umulh $nj,$m1,$nhi
130 cmpult $lo1,$lo0,v0
131 addq $hi1,v0,$hi1
132 s8addq $j,$np,$nj
133
134 stq $lo1,-8($tp)
135 nop
136 unop
137 bne $tj,.L1st
138 .set reorder
139
140 addq $alo,$hi0,$lo0
141 addq $nlo,$hi1,$lo1
142 cmpult $lo0,$hi0,AT
143 cmpult $lo1,$hi1,v0
144 addq $ahi,AT,$hi0
145 addq $nhi,v0,$hi1
146
147 addq $lo1,$lo0,$lo1
148 cmpult $lo1,$lo0,v0
149 addq $hi1,v0,$hi1
150
151 stq $lo1,0($tp)
152
153 addq $hi1,$hi0,$hi1
154 cmpult $hi1,$hi0,AT
155 stq $hi1,8($tp)
156 stq AT,16($tp)
157
158 mov 1,$i
159.align 4
160.Louter:
161 s8addq $i,$bp,$bi
162 ldq $hi0,($ap)
163 ldq $aj,8($ap)
164 ldq $bi,($bi)
165 ldq $hi1,($np)
166 ldq $nj,8($np)
167 ldq $tj,(sp)
168
169 mulq $hi0,$bi,$lo0
170 umulh $hi0,$bi,$hi0
171
172 addq $lo0,$tj,$lo0
173 cmpult $lo0,$tj,AT
174 addq $hi0,AT,$hi0
175
176 mulq $lo0,$n0,$m1
177
178 mulq $hi1,$m1,$lo1
179 umulh $hi1,$m1,$hi1
180
181 addq $lo1,$lo0,$lo1
182 cmpult $lo1,$lo0,AT
183 mov 2,$j
184 addq $hi1,AT,$hi1
185
186 mulq $aj,$bi,$alo
187 mov sp,$tp
188 umulh $aj,$bi,$ahi
189
190 mulq $nj,$m1,$nlo
191 s8addq $j,$ap,$aj
192 umulh $nj,$m1,$nhi
193.align 4
194.Linner:
195 .set noreorder
196 ldq $tj,8($tp) #L0
197 nop #U1
198 ldq $aj,($aj) #L1
199 s8addq $j,$np,$nj #U0
200
201 ldq $nj,($nj) #L0
202 nop #U1
203 addq $alo,$hi0,$lo0 #L1
204 lda $tp,8($tp)
205
206 mulq $aj,$bi,$alo #U1
207 cmpult $lo0,$hi0,AT #L0
208 addq $nlo,$hi1,$lo1 #L1
209 addl $j,1,$j
210
211 mulq $nj,$m1,$nlo #U1
212 addq $ahi,AT,$hi0 #L0
213 addq $lo0,$tj,$lo0 #L1
214 cmpult $lo1,$hi1,v0 #U0
215
216 umulh $aj,$bi,$ahi #U1
217 cmpult $lo0,$tj,AT #L0
218 addq $lo1,$lo0,$lo1 #L1
219 addq $nhi,v0,$hi1 #U0
220
221 umulh $nj,$m1,$nhi #U1
222 s8addq $j,$ap,$aj #L0
223 cmpult $lo1,$lo0,v0 #L1
224 cmplt $j,$num,$tj #U0 # borrow $tj
225
226 addq $hi0,AT,$hi0 #L0
227 addq $hi1,v0,$hi1 #U1
228 stq $lo1,-8($tp) #L1
229 bne $tj,.Linner #U0
230 .set reorder
231
232 ldq $tj,8($tp)
233 addq $alo,$hi0,$lo0
234 addq $nlo,$hi1,$lo1
235 cmpult $lo0,$hi0,AT
236 cmpult $lo1,$hi1,v0
237 addq $ahi,AT,$hi0
238 addq $nhi,v0,$hi1
239
240 addq $lo0,$tj,$lo0
241 cmpult $lo0,$tj,AT
242 addq $hi0,AT,$hi0
243
244 ldq $tj,16($tp)
245 addq $lo1,$lo0,$j
246 cmpult $j,$lo0,v0
247 addq $hi1,v0,$hi1
248
249 addq $hi1,$hi0,$lo1
250 stq $j,($tp)
251 cmpult $lo1,$hi0,$hi1
252 addq $lo1,$tj,$lo1
253 cmpult $lo1,$tj,AT
254 addl $i,1,$i
255 addq $hi1,AT,$hi1
256 stq $lo1,8($tp)
257 cmplt $i,$num,$tj # borrow $tj
258 stq $hi1,16($tp)
259 bne $tj,.Louter
260
261 s8addq $num,sp,$tj # &tp[num]
262 mov $rp,$bp # put rp aside
263 mov sp,$tp
264 mov sp,$ap
265 mov 0,$hi0 # clear borrow bit
266
267.align 4
268.Lsub: ldq $lo0,($tp)
269 ldq $lo1,($np)
270 lda $tp,8($tp)
271 lda $np,8($np)
272 subq $lo0,$lo1,$lo1 # tp[i]-np[i]
273 cmpult $lo0,$lo1,AT
274 subq $lo1,$hi0,$lo0
275 cmpult $lo1,$lo0,$hi0
276 or $hi0,AT,$hi0
277 stq $lo0,($rp)
278 cmpult $tp,$tj,v0
279 lda $rp,8($rp)
280 bne v0,.Lsub
281
282 subq $hi1,$hi0,$hi0 # handle upmost overflow bit
283 mov sp,$tp
284 mov $bp,$rp # restore rp
285
286 and sp,$hi0,$ap
287 bic $bp,$hi0,$bp
288 bis $bp,$ap,$ap # ap=borrow?tp:rp
289
290.align 4
291.Lcopy: ldq $aj,($ap) # copy or in-place refresh
292 lda $tp,8($tp)
293 lda $rp,8($rp)
294 lda $ap,8($ap)
295 stq zero,-8($tp) # zap tp
296 cmpult $tp,$tj,AT
297 stq $aj,-8($rp)
298 bne AT,.Lcopy
299 mov 1,v0
300
301.Lexit:
302 .set noreorder
303 mov fp,sp
304 /*ldq ra,0(sp)*/
305 ldq s3,8(sp)
306 ldq s4,16(sp)
307 ldq s5,24(sp)
308 ldq fp,32(sp)
309 lda sp,40(sp)
310 ret (ra)
311.end bn_mul_mont
312.rdata
313.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
314___
315
316print $code;
317close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl
deleted file mode 100644
index 05d5dc1a48..0000000000
--- a/src/lib/libcrypto/bn/asm/armv4-mont.pl
+++ /dev/null
@@ -1,200 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# January 2007.
11
12# Montgomery multiplication for ARMv4.
13#
14# Performance improvement naturally varies among CPU implementations
15# and compilers. The code was observed to provide +65-35% improvement
16# [depending on key length, less for longer keys] on ARM920T, and
17# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
18# base and compiler generated code with in-lined umull and even umlal
19# instructions. The latter means that this code didn't really have an
20# "advantage" of utilizing some "secret" instruction.
21#
22# The code is interoperable with Thumb ISA and is rather compact, less
23# than 1/2KB. Windows CE port would be trivial, as it's exclusively
24# about decorations, ABI and instruction syntax are identical.
25
26$num="r0"; # starts as num argument, but holds &tp[num-1]
27$ap="r1";
28$bp="r2"; $bi="r2"; $rp="r2";
29$np="r3";
30$tp="r4";
31$aj="r5";
32$nj="r6";
33$tj="r7";
34$n0="r8";
35########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
36$alo="r10"; # sl, gcc uses it to keep @GOT
37$ahi="r11"; # fp
38$nlo="r12"; # ip
39########### # r13 is stack pointer
40$nhi="r14"; # lr
41########### # r15 is program counter
42
43#### argument block layout relative to &tp[num-1], a.k.a. $num
44$_rp="$num,#12*4";
45# ap permanently resides in r1
46$_bp="$num,#13*4";
47# np permanently resides in r3
48$_n0="$num,#14*4";
49$_num="$num,#15*4"; $_bpend=$_num;
50
51$code=<<___;
52.text
53
54.global bn_mul_mont
55.type bn_mul_mont,%function
56
57.align 2
58bn_mul_mont:
59 stmdb sp!,{r0,r2} @ sp points at argument block
60 ldr $num,[sp,#3*4] @ load num
61 cmp $num,#2
62 movlt r0,#0
63 addlt sp,sp,#2*4
64 blt .Labrt
65
66 stmdb sp!,{r4-r12,lr} @ save 10 registers
67
68 mov $num,$num,lsl#2 @ rescale $num for byte count
69 sub sp,sp,$num @ alloca(4*num)
70 sub sp,sp,#4 @ +extra dword
71 sub $num,$num,#4 @ "num=num-1"
72 add $tp,$bp,$num @ &bp[num-1]
73
74 add $num,sp,$num @ $num to point at &tp[num-1]
75 ldr $n0,[$_n0] @ &n0
76 ldr $bi,[$bp] @ bp[0]
77 ldr $aj,[$ap],#4 @ ap[0],ap++
78 ldr $nj,[$np],#4 @ np[0],np++
79 ldr $n0,[$n0] @ *n0
80 str $tp,[$_bpend] @ save &bp[num]
81
82 umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
83 str $n0,[$_n0] @ save n0 value
84 mul $n0,$alo,$n0 @ "tp[0]"*n0
85 mov $nlo,#0
86 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
87 mov $tp,sp
88
89.L1st:
90 ldr $aj,[$ap],#4 @ ap[j],ap++
91 mov $alo,$ahi
92 mov $ahi,#0
93 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
94 ldr $nj,[$np],#4 @ np[j],np++
95 mov $nhi,#0
96 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
97 adds $nlo,$nlo,$alo
98 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
99 adc $nlo,$nhi,#0
100 cmp $tp,$num
101 bne .L1st
102
103 adds $nlo,$nlo,$ahi
104 mov $nhi,#0
105 adc $nhi,$nhi,#0
106 ldr $tp,[$_bp] @ restore bp
107 str $nlo,[$num] @ tp[num-1]=
108 ldr $n0,[$_n0] @ restore n0
109 str $nhi,[$num,#4] @ tp[num]=
110
111.Louter:
112 sub $tj,$num,sp @ "original" $num-1 value
113 sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
114 sub $np,$np,$tj @ "rewind" np to &np[1]
115 ldr $bi,[$tp,#4]! @ *(++bp)
116 ldr $aj,[$ap,#-4] @ ap[0]
117 ldr $nj,[$np,#-4] @ np[0]
118 ldr $alo,[sp] @ tp[0]
119 ldr $tj,[sp,#4] @ tp[1]
120
121 mov $ahi,#0
122 umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
123 str $tp,[$_bp] @ save bp
124 mul $n0,$alo,$n0
125 mov $nlo,#0
126 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
127 mov $tp,sp
128
129.Linner:
130 ldr $aj,[$ap],#4 @ ap[j],ap++
131 adds $alo,$ahi,$tj @ +=tp[j]
132 mov $ahi,#0
133 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
134 ldr $nj,[$np],#4 @ np[j],np++
135 mov $nhi,#0
136 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
137 ldr $tj,[$tp,#8] @ tp[j+1]
138 adc $ahi,$ahi,#0
139 adds $nlo,$nlo,$alo
140 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
141 adc $nlo,$nhi,#0
142 cmp $tp,$num
143 bne .Linner
144
145 adds $nlo,$nlo,$ahi
146 mov $nhi,#0
147 adc $nhi,$nhi,#0
148 adds $nlo,$nlo,$tj
149 adc $nhi,$nhi,#0
150 ldr $tp,[$_bp] @ restore bp
151 ldr $tj,[$_bpend] @ restore &bp[num]
152 str $nlo,[$num] @ tp[num-1]=
153 ldr $n0,[$_n0] @ restore n0
154 str $nhi,[$num,#4] @ tp[num]=
155
156 cmp $tp,$tj
157 bne .Louter
158
159 ldr $rp,[$_rp] @ pull rp
160 add $num,$num,#4 @ $num to point at &tp[num]
161 sub $aj,$num,sp @ "original" num value
162 mov $tp,sp @ "rewind" $tp
163 mov $ap,$tp @ "borrow" $ap
164 sub $np,$np,$aj @ "rewind" $np to &np[0]
165
166 subs $tj,$tj,$tj @ "clear" carry flag
167.Lsub: ldr $tj,[$tp],#4
168 ldr $nj,[$np],#4
169 sbcs $tj,$tj,$nj @ tp[j]-np[j]
170 str $tj,[$rp],#4 @ rp[j]=
171 teq $tp,$num @ preserve carry
172 bne .Lsub
173 sbcs $nhi,$nhi,#0 @ upmost carry
174 mov $tp,sp @ "rewind" $tp
175 sub $rp,$rp,$aj @ "rewind" $rp
176
177 and $ap,$tp,$nhi
178 bic $np,$rp,$nhi
179 orr $ap,$ap,$np @ ap=borrow?tp:rp
180
181.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh
182 str sp,[$tp],#4 @ zap tp
183 str $tj,[$rp],#4
184 cmp $tp,$num
185 bne .Lcopy
186
187 add sp,$num,#4 @ skip over tp[num+1]
188 ldmia sp!,{r4-r12,lr} @ restore registers
189 add sp,sp,#2*4 @ skip over {r0,r2}
190 mov r0,#1
191.Labrt: tst lr,#1
192 moveq pc,lr @ be binary compatible with V4, yet
193 bx lr @ interoperable with Thumb ISA:-)
194.size bn_mul_mont,.-bn_mul_mont
195.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
196___
197
198$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
199print $code;
200close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index 26c2685a72..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,675 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8$sse2=0;
9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
10
11&external_label("OPENSSL_ia32cap_P") if ($sse2);
12
13&bn_mul_add_words("bn_mul_add_words");
14&bn_mul_words("bn_mul_words");
15&bn_sqr_words("bn_sqr_words");
16&bn_div_words("bn_div_words");
17&bn_add_words("bn_add_words");
18&bn_sub_words("bn_sub_words");
19&bn_sub_part_words("bn_sub_part_words");
20
21&asm_finish();
22
23sub bn_mul_add_words
24 {
25 local($name)=@_;
26
27 &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
28
29 &comment("");
30 $Low="eax";
31 $High="edx";
32 $a="ebx";
33 $w="ebp";
34 $r="edi";
35 $c="esi";
36
37 &xor($c,$c); # clear carry
38 &mov($r,&wparam(0)); #
39
40 &mov("ecx",&wparam(2)); #
41 &mov($a,&wparam(1)); #
42
43 &and("ecx",0xfffffff8); # num / 8
44 &mov($w,&wparam(3)); #
45
46 &push("ecx"); # Up the stack for a tmp variable
47
48 &jz(&label("maw_finish"));
49
50 if ($sse2) {
51 &picmeup("eax","OPENSSL_ia32cap_P");
52 &bt(&DWP(0,"eax"),26);
53 &jnc(&label("maw_loop"));
54
55 &movd("mm0",$w); # mm0 = w
56 &pxor("mm1","mm1"); # mm1 = carry_in
57
58 &set_label("maw_sse2_loop",0);
59 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
60 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
61 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
62 &pmuludq("mm2","mm0"); # mm2 = w*a[0]
63 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
64 &pmuludq("mm4","mm0"); # mm4 = w*a[1]
65 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
66 &pmuludq("mm6","mm0"); # mm6 = w*a[2]
67 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
68 &pmuludq("mm7","mm0"); # mm7 = w*a[3]
69 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
70 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
71 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
72 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
73 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
74 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
75 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
76 &movd(&DWP(0,$r,"",0),"mm1");
77 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
78 &pmuludq("mm2","mm0"); # mm2 = w*a[4]
79 &psrlq("mm1",32); # mm1 = carry0
80 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
81 &pmuludq("mm4","mm0"); # mm4 = w*a[5]
82 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
83 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
84 &pmuludq("mm6","mm0"); # mm6 = w*a[6]
85 &movd(&DWP(4,$r,"",0),"mm1");
86 &psrlq("mm1",32); # mm1 = carry1
87 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
88 &add($a,32);
89 &pmuludq("mm3","mm0"); # mm3 = w*a[7]
90 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
91 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
92 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
93 &movd(&DWP(8,$r,"",0),"mm1");
94 &psrlq("mm1",32); # mm1 = carry2
95 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
96 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
97 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
98 &movd(&DWP(12,$r,"",0),"mm1");
99 &psrlq("mm1",32); # mm1 = carry3
100 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
101 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
102 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
103 &movd(&DWP(16,$r,"",0),"mm1");
104 &psrlq("mm1",32); # mm1 = carry4
105 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
106 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
107 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
108 &movd(&DWP(20,$r,"",0),"mm1");
109 &psrlq("mm1",32); # mm1 = carry5
110 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
111 &movd(&DWP(24,$r,"",0),"mm1");
112 &psrlq("mm1",32); # mm1 = carry6
113 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
114 &movd(&DWP(28,$r,"",0),"mm1");
115 &add($r,32);
116 &psrlq("mm1",32); # mm1 = carry_out
117
118 &sub("ecx",8);
119 &jnz(&label("maw_sse2_loop"));
120
121 &movd($c,"mm1"); # c = carry_out
122 &emms();
123
124 &jmp(&label("maw_finish"));
125 }
126
127 &set_label("maw_loop",0);
128
129 &mov(&swtmp(0),"ecx"); #
130
131 for ($i=0; $i<32; $i+=4)
132 {
133 &comment("Round $i");
134
135 &mov("eax",&DWP($i,$a,"",0)); # *a
136 &mul($w); # *a * w
137 &add("eax",$c); # L(t)+= *r
138 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
139 &adc("edx",0); # H(t)+=carry
140 &add("eax",$c); # L(t)+=c
141 &adc("edx",0); # H(t)+=carry
142 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
143 &mov($c,"edx"); # c= H(t);
144 }
145
146 &comment("");
147 &mov("ecx",&swtmp(0)); #
148 &add($a,32);
149 &add($r,32);
150 &sub("ecx",8);
151 &jnz(&label("maw_loop"));
152
153 &set_label("maw_finish",0);
154 &mov("ecx",&wparam(2)); # get num
155 &and("ecx",7);
156 &jnz(&label("maw_finish2")); # helps branch prediction
157 &jmp(&label("maw_end"));
158
159 &set_label("maw_finish2",1);
160 for ($i=0; $i<7; $i++)
161 {
162 &comment("Tail Round $i");
163 &mov("eax",&DWP($i*4,$a,"",0));# *a
164 &mul($w); # *a * w
165 &add("eax",$c); # L(t)+=c
166 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
167 &adc("edx",0); # H(t)+=carry
168 &add("eax",$c);
169 &adc("edx",0); # H(t)+=carry
170 &dec("ecx") if ($i != 7-1);
171 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
172 &mov($c,"edx"); # c= H(t);
173 &jz(&label("maw_end")) if ($i != 7-1);
174 }
175 &set_label("maw_end",0);
176 &mov("eax",$c);
177
178 &pop("ecx"); # clear variable from
179
180 &function_end($name);
181 }
182
183sub bn_mul_words
184 {
185 local($name)=@_;
186
187 &function_begin($name,"");
188
189 &comment("");
190 $Low="eax";
191 $High="edx";
192 $a="ebx";
193 $w="ecx";
194 $r="edi";
195 $c="esi";
196 $num="ebp";
197
198 &xor($c,$c); # clear carry
199 &mov($r,&wparam(0)); #
200 &mov($a,&wparam(1)); #
201 &mov($num,&wparam(2)); #
202 &mov($w,&wparam(3)); #
203
204 &and($num,0xfffffff8); # num / 8
205 &jz(&label("mw_finish"));
206
207 &set_label("mw_loop",0);
208 for ($i=0; $i<32; $i+=4)
209 {
210 &comment("Round $i");
211
212 &mov("eax",&DWP($i,$a,"",0)); # *a
213 &mul($w); # *a * w
214 &add("eax",$c); # L(t)+=c
215 # XXX
216
217 &adc("edx",0); # H(t)+=carry
218 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
219
220 &mov($c,"edx"); # c= H(t);
221 }
222
223 &comment("");
224 &add($a,32);
225 &add($r,32);
226 &sub($num,8);
227 &jz(&label("mw_finish"));
228 &jmp(&label("mw_loop"));
229
230 &set_label("mw_finish",0);
231 &mov($num,&wparam(2)); # get num
232 &and($num,7);
233 &jnz(&label("mw_finish2"));
234 &jmp(&label("mw_end"));
235
236 &set_label("mw_finish2",1);
237 for ($i=0; $i<7; $i++)
238 {
239 &comment("Tail Round $i");
240 &mov("eax",&DWP($i*4,$a,"",0));# *a
241 &mul($w); # *a * w
242 &add("eax",$c); # L(t)+=c
243 # XXX
244 &adc("edx",0); # H(t)+=carry
245 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
246 &mov($c,"edx"); # c= H(t);
247 &dec($num) if ($i != 7-1);
248 &jz(&label("mw_end")) if ($i != 7-1);
249 }
250 &set_label("mw_end",0);
251 &mov("eax",$c);
252
253 &function_end($name);
254 }
255
256sub bn_sqr_words
257 {
258 local($name)=@_;
259
260 &function_begin($name,"");
261
262 &comment("");
263 $r="esi";
264 $a="edi";
265 $num="ebx";
266
267 &mov($r,&wparam(0)); #
268 &mov($a,&wparam(1)); #
269 &mov($num,&wparam(2)); #
270
271 &and($num,0xfffffff8); # num / 8
272 &jz(&label("sw_finish"));
273
274 &set_label("sw_loop",0);
275 for ($i=0; $i<32; $i+=4)
276 {
277 &comment("Round $i");
278 &mov("eax",&DWP($i,$a,"",0)); # *a
279 # XXX
280 &mul("eax"); # *a * *a
281 &mov(&DWP($i*2,$r,"",0),"eax"); #
282 &mov(&DWP($i*2+4,$r,"",0),"edx");#
283 }
284
285 &comment("");
286 &add($a,32);
287 &add($r,64);
288 &sub($num,8);
289 &jnz(&label("sw_loop"));
290
291 &set_label("sw_finish",0);
292 &mov($num,&wparam(2)); # get num
293 &and($num,7);
294 &jz(&label("sw_end"));
295
296 for ($i=0; $i<7; $i++)
297 {
298 &comment("Tail Round $i");
299 &mov("eax",&DWP($i*4,$a,"",0)); # *a
300 # XXX
301 &mul("eax"); # *a * *a
302 &mov(&DWP($i*8,$r,"",0),"eax"); #
303 &dec($num) if ($i != 7-1);
304 &mov(&DWP($i*8+4,$r,"",0),"edx");
305 &jz(&label("sw_end")) if ($i != 7-1);
306 }
307 &set_label("sw_end",0);
308
309 &function_end($name);
310 }
311
312sub bn_div_words
313 {
314 local($name)=@_;
315
316 &function_begin($name,"");
317 &mov("edx",&wparam(0)); #
318 &mov("eax",&wparam(1)); #
319 &mov("ebx",&wparam(2)); #
320 &div("ebx");
321 &function_end($name);
322 }
323
324sub bn_add_words
325 {
326 local($name)=@_;
327
328 &function_begin($name,"");
329
330 &comment("");
331 $a="esi";
332 $b="edi";
333 $c="eax";
334 $r="ebx";
335 $tmp1="ecx";
336 $tmp2="edx";
337 $num="ebp";
338
339 &mov($r,&wparam(0)); # get r
340 &mov($a,&wparam(1)); # get a
341 &mov($b,&wparam(2)); # get b
342 &mov($num,&wparam(3)); # get num
343 &xor($c,$c); # clear carry
344 &and($num,0xfffffff8); # num / 8
345
346 &jz(&label("aw_finish"));
347
348 &set_label("aw_loop",0);
349 for ($i=0; $i<8; $i++)
350 {
351 &comment("Round $i");
352
353 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
354 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
355 &add($tmp1,$c);
356 &mov($c,0);
357 &adc($c,$c);
358 &add($tmp1,$tmp2);
359 &adc($c,0);
360 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
361 }
362
363 &comment("");
364 &add($a,32);
365 &add($b,32);
366 &add($r,32);
367 &sub($num,8);
368 &jnz(&label("aw_loop"));
369
370 &set_label("aw_finish",0);
371 &mov($num,&wparam(3)); # get num
372 &and($num,7);
373 &jz(&label("aw_end"));
374
375 for ($i=0; $i<7; $i++)
376 {
377 &comment("Tail Round $i");
378 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
379 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
380 &add($tmp1,$c);
381 &mov($c,0);
382 &adc($c,$c);
383 &add($tmp1,$tmp2);
384 &adc($c,0);
385 &dec($num) if ($i != 6);
386 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
387 &jz(&label("aw_end")) if ($i != 6);
388 }
389 &set_label("aw_end",0);
390
391# &mov("eax",$c); # $c is "eax"
392
393 &function_end($name);
394 }
395
396sub bn_sub_words
397 {
398 local($name)=@_;
399
400 &function_begin($name,"");
401
402 &comment("");
403 $a="esi";
404 $b="edi";
405 $c="eax";
406 $r="ebx";
407 $tmp1="ecx";
408 $tmp2="edx";
409 $num="ebp";
410
411 &mov($r,&wparam(0)); # get r
412 &mov($a,&wparam(1)); # get a
413 &mov($b,&wparam(2)); # get b
414 &mov($num,&wparam(3)); # get num
415 &xor($c,$c); # clear carry
416 &and($num,0xfffffff8); # num / 8
417
418 &jz(&label("aw_finish"));
419
420 &set_label("aw_loop",0);
421 for ($i=0; $i<8; $i++)
422 {
423 &comment("Round $i");
424
425 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
426 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
427 &sub($tmp1,$c);
428 &mov($c,0);
429 &adc($c,$c);
430 &sub($tmp1,$tmp2);
431 &adc($c,0);
432 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
433 }
434
435 &comment("");
436 &add($a,32);
437 &add($b,32);
438 &add($r,32);
439 &sub($num,8);
440 &jnz(&label("aw_loop"));
441
442 &set_label("aw_finish",0);
443 &mov($num,&wparam(3)); # get num
444 &and($num,7);
445 &jz(&label("aw_end"));
446
447 for ($i=0; $i<7; $i++)
448 {
449 &comment("Tail Round $i");
450 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
451 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
452 &sub($tmp1,$c);
453 &mov($c,0);
454 &adc($c,$c);
455 &sub($tmp1,$tmp2);
456 &adc($c,0);
457 &dec($num) if ($i != 6);
458 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
459 &jz(&label("aw_end")) if ($i != 6);
460 }
461 &set_label("aw_end",0);
462
463# &mov("eax",$c); # $c is "eax"
464
465 &function_end($name);
466 }
467
468sub bn_sub_part_words
469 {
470 local($name)=@_;
471
472 &function_begin($name,"");
473
474 &comment("");
475 $a="esi";
476 $b="edi";
477 $c="eax";
478 $r="ebx";
479 $tmp1="ecx";
480 $tmp2="edx";
481 $num="ebp";
482
483 &mov($r,&wparam(0)); # get r
484 &mov($a,&wparam(1)); # get a
485 &mov($b,&wparam(2)); # get b
486 &mov($num,&wparam(3)); # get num
487 &xor($c,$c); # clear carry
488 &and($num,0xfffffff8); # num / 8
489
490 &jz(&label("aw_finish"));
491
492 &set_label("aw_loop",0);
493 for ($i=0; $i<8; $i++)
494 {
495 &comment("Round $i");
496
497 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
498 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
499 &sub($tmp1,$c);
500 &mov($c,0);
501 &adc($c,$c);
502 &sub($tmp1,$tmp2);
503 &adc($c,0);
504 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
505 }
506
507 &comment("");
508 &add($a,32);
509 &add($b,32);
510 &add($r,32);
511 &sub($num,8);
512 &jnz(&label("aw_loop"));
513
514 &set_label("aw_finish",0);
515 &mov($num,&wparam(3)); # get num
516 &and($num,7);
517 &jz(&label("aw_end"));
518
519 for ($i=0; $i<7; $i++)
520 {
521 &comment("Tail Round $i");
522 &mov($tmp1,&DWP(0,$a,"",0)); # *a
523 &mov($tmp2,&DWP(0,$b,"",0));# *b
524 &sub($tmp1,$c);
525 &mov($c,0);
526 &adc($c,$c);
527 &sub($tmp1,$tmp2);
528 &adc($c,0);
529 &mov(&DWP(0,$r,"",0),$tmp1); # *r
530 &add($a, 4);
531 &add($b, 4);
532 &add($r, 4);
533 &dec($num) if ($i != 6);
534 &jz(&label("aw_end")) if ($i != 6);
535 }
536 &set_label("aw_end",0);
537
538 &cmp(&wparam(4),0);
539 &je(&label("pw_end"));
540
541 &mov($num,&wparam(4)); # get dl
542 &cmp($num,0);
543 &je(&label("pw_end"));
544 &jge(&label("pw_pos"));
545
546 &comment("pw_neg");
547 &mov($tmp2,0);
548 &sub($tmp2,$num);
549 &mov($num,$tmp2);
550 &and($num,0xfffffff8); # num / 8
551 &jz(&label("pw_neg_finish"));
552
553 &set_label("pw_neg_loop",0);
554 for ($i=0; $i<8; $i++)
555 {
556 &comment("dl<0 Round $i");
557
558 &mov($tmp1,0);
559 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
560 &sub($tmp1,$c);
561 &mov($c,0);
562 &adc($c,$c);
563 &sub($tmp1,$tmp2);
564 &adc($c,0);
565 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
566 }
567
568 &comment("");
569 &add($b,32);
570 &add($r,32);
571 &sub($num,8);
572 &jnz(&label("pw_neg_loop"));
573
574 &set_label("pw_neg_finish",0);
575 &mov($tmp2,&wparam(4)); # get dl
576 &mov($num,0);
577 &sub($num,$tmp2);
578 &and($num,7);
579 &jz(&label("pw_end"));
580
581 for ($i=0; $i<7; $i++)
582 {
583 &comment("dl<0 Tail Round $i");
584 &mov($tmp1,0);
585 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
586 &sub($tmp1,$c);
587 &mov($c,0);
588 &adc($c,$c);
589 &sub($tmp1,$tmp2);
590 &adc($c,0);
591 &dec($num) if ($i != 6);
592 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
593 &jz(&label("pw_end")) if ($i != 6);
594 }
595
596 &jmp(&label("pw_end"));
597
598 &set_label("pw_pos",0);
599
600 &and($num,0xfffffff8); # num / 8
601 &jz(&label("pw_pos_finish"));
602
603 &set_label("pw_pos_loop",0);
604
605 for ($i=0; $i<8; $i++)
606 {
607 &comment("dl>0 Round $i");
608
609 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
610 &sub($tmp1,$c);
611 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
612 &jnc(&label("pw_nc".$i));
613 }
614
615 &comment("");
616 &add($a,32);
617 &add($r,32);
618 &sub($num,8);
619 &jnz(&label("pw_pos_loop"));
620
621 &set_label("pw_pos_finish",0);
622 &mov($num,&wparam(4)); # get dl
623 &and($num,7);
624 &jz(&label("pw_end"));
625
626 for ($i=0; $i<7; $i++)
627 {
628 &comment("dl>0 Tail Round $i");
629 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
630 &sub($tmp1,$c);
631 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
632 &jnc(&label("pw_tail_nc".$i));
633 &dec($num) if ($i != 6);
634 &jz(&label("pw_end")) if ($i != 6);
635 }
636 &mov($c,1);
637 &jmp(&label("pw_end"));
638
639 &set_label("pw_nc_loop",0);
640 for ($i=0; $i<8; $i++)
641 {
642 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
643 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
644 &set_label("pw_nc".$i,0);
645 }
646
647 &comment("");
648 &add($a,32);
649 &add($r,32);
650 &sub($num,8);
651 &jnz(&label("pw_nc_loop"));
652
653 &mov($num,&wparam(4)); # get dl
654 &and($num,7);
655 &jz(&label("pw_nc_end"));
656
657 for ($i=0; $i<7; $i++)
658 {
659 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
660 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
661 &set_label("pw_tail_nc".$i,0);
662 &dec($num) if ($i != 6);
663 &jz(&label("pw_nc_end")) if ($i != 6);
664 }
665
666 &set_label("pw_nc_end",0);
667 &mov($c,0);
668
669 &set_label("pw_end",0);
670
671# &mov("eax",$c); # $c is "eax"
672
673 &function_end($name);
674 }
675
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
deleted file mode 100644
index 5d962cb957..0000000000
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ /dev/null
@@ -1,286 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_comba("bn_mul_comba8",8);
9&bn_mul_comba("bn_mul_comba4",4);
10&bn_sqr_comba("bn_sqr_comba8",8);
11&bn_sqr_comba("bn_sqr_comba4",4);
12
13&asm_finish();
14
15sub mul_add_c
16 {
17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
18
19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
20 # words, and 1 if load return value
21
22 &comment("mul a[$ai]*b[$bi]");
23
24 # "eax" and "edx" will always be pre-loaded.
25 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
26 # &mov("edx",&DWP($bi*4,$b,"",0));
27
28 &mul("edx");
29 &add($c0,"eax");
30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
31 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
32 ###
33 &adc($c1,"edx");
34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
36 ###
37 &adc($c2,0);
38 # is pos > 1, it means it is the last loop
39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
41 }
42
43sub sqr_add_c
44 {
45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
46
47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
48 # words, and 1 if load return value
49
50 &comment("sqr a[$ai]*a[$bi]");
51
52 # "eax" and "edx" will always be pre-loaded.
53 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
54 # &mov("edx",&DWP($bi*4,$b,"",0));
55
56 if ($ai == $bi)
57 { &mul("eax");}
58 else
59 { &mul("edx");}
60 &add($c0,"eax");
61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
62 ###
63 &adc($c1,"edx");
64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
65 ###
66 &adc($c2,0);
67 # is pos > 1, it means it is the last loop
68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
70 }
71
72sub sqr_add_c2
73 {
74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
75
76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
77 # words, and 1 if load return value
78
79 &comment("sqr a[$ai]*a[$bi]");
80
81 # "eax" and "edx" will always be pre-loaded.
82 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
83 # &mov("edx",&DWP($bi*4,$a,"",0));
84
85 if ($ai == $bi)
86 { &mul("eax");}
87 else
88 { &mul("edx");}
89 &add("eax","eax");
90 ###
91 &adc("edx","edx");
92 ###
93 &adc($c2,0);
94 &add($c0,"eax");
95 &adc($c1,"edx");
96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
98 &adc($c2,0);
99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
101 ###
102 }
103
104sub bn_mul_comba
105 {
106 local($name,$num)=@_;
107 local($a,$b,$c0,$c1,$c2);
108 local($i,$as,$ae,$bs,$be,$ai,$bi);
109 local($tot,$end);
110
111 &function_begin_B($name,"");
112
113 $c0="ebx";
114 $c1="ecx";
115 $c2="ebp";
116 $a="esi";
117 $b="edi";
118
119 $as=0;
120 $ae=0;
121 $bs=0;
122 $be=0;
123 $tot=$num+$num-1;
124
125 &push("esi");
126 &mov($a,&wparam(1));
127 &push("edi");
128 &mov($b,&wparam(2));
129 &push("ebp");
130 &push("ebx");
131
132 &xor($c0,$c0);
133 &mov("eax",&DWP(0,$a,"",0)); # load the first word
134 &xor($c1,$c1);
135 &mov("edx",&DWP(0,$b,"",0)); # load the first second
136
137 for ($i=0; $i<$tot; $i++)
138 {
139 $ai=$as;
140 $bi=$bs;
141 $end=$be+1;
142
143 &comment("################## Calculate word $i");
144
145 for ($j=$bs; $j<$end; $j++)
146 {
147 &xor($c2,$c2) if ($j == $bs);
148 if (($j+1) == $end)
149 {
150 $v=1;
151 $v=2 if (($i+1) == $tot);
152 }
153 else
154 { $v=0; }
155 if (($j+1) != $end)
156 {
157 $na=($ai-1);
158 $nb=($bi+1);
159 }
160 else
161 {
162 $na=$as+($i < ($num-1));
163 $nb=$bs+($i >= ($num-1));
164 }
165#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
167 if ($v)
168 {
169 &comment("saved r[$i]");
170 # &mov("eax",&wparam(0));
171 # &mov(&DWP($i*4,"eax","",0),$c0);
172 ($c0,$c1,$c2)=($c1,$c2,$c0);
173 }
174 $ai--;
175 $bi++;
176 }
177 $as++ if ($i < ($num-1));
178 $ae++ if ($i >= ($num-1));
179
180 $bs++ if ($i >= ($num-1));
181 $be++ if ($i < ($num-1));
182 }
183 &comment("save r[$i]");
184 # &mov("eax",&wparam(0));
185 &mov(&DWP($i*4,"eax","",0),$c0);
186
187 &pop("ebx");
188 &pop("ebp");
189 &pop("edi");
190 &pop("esi");
191 &ret();
192 &function_end_B($name);
193 }
194
195sub bn_sqr_comba
196 {
197 local($name,$num)=@_;
198 local($r,$a,$c0,$c1,$c2)=@_;
199 local($i,$as,$ae,$bs,$be,$ai,$bi);
200 local($b,$tot,$end,$half);
201
202 &function_begin_B($name,"");
203
204 $c0="ebx";
205 $c1="ecx";
206 $c2="ebp";
207 $a="esi";
208 $r="edi";
209
210 &push("esi");
211 &push("edi");
212 &push("ebp");
213 &push("ebx");
214 &mov($r,&wparam(0));
215 &mov($a,&wparam(1));
216 &xor($c0,$c0);
217 &xor($c1,$c1);
218 &mov("eax",&DWP(0,$a,"",0)); # load the first word
219
220 $as=0;
221 $ae=0;
222 $bs=0;
223 $be=0;
224 $tot=$num+$num-1;
225
226 for ($i=0; $i<$tot; $i++)
227 {
228 $ai=$as;
229 $bi=$bs;
230 $end=$be+1;
231
232 &comment("############### Calculate word $i");
233 for ($j=$bs; $j<$end; $j++)
234 {
235 &xor($c2,$c2) if ($j == $bs);
236 if (($ai-1) < ($bi+1))
237 {
238 $v=1;
239 $v=2 if ($i+1) == $tot;
240 }
241 else
242 { $v=0; }
243 if (!$v)
244 {
245 $na=$ai-1;
246 $nb=$bi+1;
247 }
248 else
249 {
250 $na=$as+($i < ($num-1));
251 $nb=$bs+($i >= ($num-1));
252 }
253 if ($ai == $bi)
254 {
255 &sqr_add_c($r,$a,$ai,$bi,
256 $c0,$c1,$c2,$v,$i,$na,$nb);
257 }
258 else
259 {
260 &sqr_add_c2($r,$a,$ai,$bi,
261 $c0,$c1,$c2,$v,$i,$na,$nb);
262 }
263 if ($v)
264 {
265 &comment("saved r[$i]");
266 #&mov(&DWP($i*4,$r,"",0),$c0);
267 ($c0,$c1,$c2)=($c1,$c2,$c0);
268 last;
269 }
270 $ai--;
271 $bi++;
272 }
273 $as++ if ($i < ($num-1));
274 $ae++ if ($i >= ($num-1));
275
276 $bs++ if ($i >= ($num-1));
277 $be++ if ($i < ($num-1));
278 }
279 &mov(&DWP($i*4,$r,"",0),$c0);
280 &pop("ebx");
281 &pop("ebp");
282 &pop("edi");
283 &pop("esi");
284 &ret();
285 &function_end_B($name);
286 }
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S
deleted file mode 100644
index 951abc53ea..0000000000
--- a/src/lib/libcrypto/bn/asm/ia64.S
+++ /dev/null
@@ -1,1555 +0,0 @@
1.explicit
2.text
3.ident "ia64.S, Version 2.1"
4.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6//
7// ====================================================================
8// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9// project.
10//
11// Rights for redistribution and usage in source and binary forms are
12// granted according to the OpenSSL license. Warranty of any kind is
13// disclaimed.
14// ====================================================================
15//
16// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
17// different from Itanium to this module viewpoint. Most notably, is it
18// "wider" than Itanium? Can you experience loop scalability as
19// discussed in commentary sections? Not really:-( Itanium2 has 6
20// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to
21// spin twice as fast, as I need 8 IALU ports. Amount of floating point
22// ports is the same, i.e. 2, while I need 4. In other words, to this
23// module Itanium2 remains effectively as "wide" as Itanium. Yet it's
24// essentially different in respect to this module, and a re-tune was
25// required. Well, because some intruction latencies has changed. Most
26// noticeably those intensively used:
27//
28// Itanium Itanium2
29// ldf8 9 6 L2 hit
30// ld8 2 1 L1 hit
31// getf 2 5
32// xma[->getf] 7[+1] 4[+0]
33// add[->st8] 1[+1] 1[+0]
34//
35// What does it mean? You might ratiocinate that the original code
36// should run just faster... Because sum of latencies is smaller...
37// Wrong! Note that getf latency increased. This means that if a loop is
38// scheduled for lower latency (as they were), then it will suffer from
39// stall condition and the code will therefore turn anti-scalable, e.g.
40// original bn_mul_words spun at 5*n or 2.5 times slower than expected
41// on Itanium2! What to do? Reschedule loops for Itanium2? But then
42// Itanium would exhibit anti-scalability. So I've chosen to reschedule
43// for worst latency for every instruction aiming for best *all-round*
44// performance.
45
46// Q. How much faster does it get?
47// A. Here is the output from 'openssl speed rsa dsa' for vanilla
48// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat
49// Linux 7.1 2.96-81):
50//
51// sign verify sign/s verify/s
52// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2
53// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1
54// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9
55// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1
56// sign verify sign/s verify/s
57// dsa 512 bits 0.0035s 0.0043s 288.3 234.8
58// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2
59//
60// And here is similar output but for this assembler
61// implementation:-)
62//
63// sign verify sign/s verify/s
64// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5
65// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1
66// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3
67// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5
68// sign verify sign/s verify/s
69// dsa 512 bits 0.0012s 0.0013s 891.9 756.6
70// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2
71//
72// Yes, you may argue that it's not fair comparison as it's
73// possible to craft the C implementation with BN_UMULT_HIGH
74// inline assembler macro. But of course! Here is the output
75// with the macro:
76//
77// sign verify sign/s verify/s
78// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0
79// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7
80// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3
81// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7
82// sign verify sign/s verify/s
83// dsa 512 bits 0.0016s 0.0020s 613.1 510.5
84// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9
85//
86// My code is still way faster, huh:-) And I believe that even
87// higher performance can be achieved. Note that as keys get
88// longer, performance gain is larger. Why? According to the
89// profiler there is another player in the field, namely
90// BN_from_montgomery consuming larger and larger portion of CPU
91// time as keysize decreases. I therefore consider putting effort
92// to assembler implementation of the following routine:
93//
94// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0)
95// {
96// int i,j;
97// BN_ULONG v;
98//
99// for (i=0; i<nl; i++)
100// {
101// v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
102// nrp++;
103// rp++;
104// if (((nrp[-1]+=v)&BN_MASK2) < v)
105// for (j=0; ((++nrp[j])&BN_MASK2) == 0; j++) ;
106// }
107// }
108//
109// It might as well be beneficial to implement even combaX
110// variants, as it appears as it can literally unleash the
111// performance (see comment section to bn_mul_comba8 below).
112//
113// And finally for your reference the output for 0.9.6a compiled
114// with SGIcc version 0.01.0-12 (keep in mind that for the moment
115// of this writing it's not possible to convince SGIcc to use
116// BN_UMULT_HIGH inline assembler macro, yet the code is fast,
117// i.e. for a compiler generated one:-):
118//
119// sign verify sign/s verify/s
120// rsa 512 bits 0.0022s 0.0002s 452.7 5894.3
121// rsa 1024 bits 0.0097s 0.0005s 102.7 2002.9
122// rsa 2048 bits 0.0578s 0.0017s 17.3 600.2
123// rsa 4096 bits 0.3838s 0.0061s 2.6 164.5
124// sign verify sign/s verify/s
125// dsa 512 bits 0.0018s 0.0022s 547.3 459.6
126// dsa 1024 bits 0.0051s 0.0062s 196.6 161.3
127//
128// Oh! Benchmarks were performed on 733MHz Lion-class Itanium
129// system running Redhat Linux 7.1 (very special thanks to Ray
130// McCaffity of Williams Communications for providing an account).
131//
132// Q. What's the heck with 'rum 1<<5' at the end of every function?
133// A. Well, by clearing the "upper FP registers written" bit of the
134// User Mask I want to excuse the kernel from preserving upper
135// (f32-f128) FP register bank over process context switch, thus
136// minimizing bus bandwidth consumption during the switch (i.e.
137// after PKI opration completes and the program is off doing
138// something else like bulk symmetric encryption). Having said
139// this, I also want to point out that it might be good idea
140// to compile the whole toolkit (as well as majority of the
141// programs for that matter) with -mfixed-range=f32-f127 command
142// line option. No, it doesn't prevent the compiler from writing
143// to upper bank, but at least discourages to do so. If you don't
144// like the idea you have the option to compile the module with
145// -Drum=nop.m in command line.
146//
147
148#if defined(_HPUX_SOURCE) && !defined(_LP64)
149#define ADDP addp4
150#else
151#define ADDP add
152#endif
153
154#if 1
155//
156// bn_[add|sub]_words routines.
157//
158// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
159// data reside in L1 cache, i.e. 2 ticks away). It's possible to
160// compress the epilogue and get down to 2*n+6, but at the cost of
161// scalability (the neat feature of this implementation is that it
162// shall automagically spin in n+5 on "wider" IA-64 implementations:-)
163// I consider that the epilogue is short enough as it is to trade tiny
164// performance loss on Itanium for scalability.
165//
166// BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
167//
168.global bn_add_words#
169.proc bn_add_words#
170.align 64
171.skip 32 // makes the loop body aligned at 64-byte boundary
172bn_add_words:
173 .prologue
174 .save ar.pfs,r2
175{ .mii; alloc r2=ar.pfs,4,12,0,16
176 cmp4.le p6,p0=r35,r0 };;
177{ .mfb; mov r8=r0 // return value
178(p6) br.ret.spnt.many b0 };;
179
180{ .mib; sub r10=r35,r0,1
181 .save ar.lc,r3
182 mov r3=ar.lc
183 brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
184 }
185{ .mib; ADDP r14=0,r32 // rp
186 .save pr,r9
187 mov r9=pr };;
188 .body
189{ .mii; ADDP r15=0,r33 // ap
190 mov ar.lc=r10
191 mov ar.ec=6 }
192{ .mib; ADDP r16=0,r34 // bp
193 mov pr.rot=1<<16 };;
194
195.L_bn_add_words_ctop:
196{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
197 (p18) add r39=r37,r34
198 (p19) cmp.ltu.unc p56,p0=r40,r38 }
199{ .mfb; (p0) nop.m 0x0
200 (p0) nop.f 0x0
201 (p0) nop.b 0x0 }
202{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
203 (p58) cmp.eq.or p57,p0=-1,r41 // (p20)
204 (p58) add r41=1,r41 } // (p20)
205{ .mfb; (p21) st8 [r14]=r42,8 // *(rp++)=r
206 (p0) nop.f 0x0
207 br.ctop.sptk .L_bn_add_words_ctop };;
208.L_bn_add_words_cend:
209
210{ .mii;
211(p59) add r8=1,r8 // return value
212 mov pr=r9,0x1ffff
213 mov ar.lc=r3 }
214{ .mbb; nop.b 0x0
215 br.ret.sptk.many b0 };;
216.endp bn_add_words#
217
218//
219// BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
220//
221.global bn_sub_words#
222.proc bn_sub_words#
223.align 64
224.skip 32 // makes the loop body aligned at 64-byte boundary
225bn_sub_words:
226 .prologue
227 .save ar.pfs,r2
228{ .mii; alloc r2=ar.pfs,4,12,0,16
229 cmp4.le p6,p0=r35,r0 };;
230{ .mfb; mov r8=r0 // return value
231(p6) br.ret.spnt.many b0 };;
232
233{ .mib; sub r10=r35,r0,1
234 .save ar.lc,r3
235 mov r3=ar.lc
236 brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
237 }
238{ .mib; ADDP r14=0,r32 // rp
239 .save pr,r9
240 mov r9=pr };;
241 .body
242{ .mii; ADDP r15=0,r33 // ap
243 mov ar.lc=r10
244 mov ar.ec=6 }
245{ .mib; ADDP r16=0,r34 // bp
246 mov pr.rot=1<<16 };;
247
248.L_bn_sub_words_ctop:
249{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
250 (p18) sub r39=r37,r34
251 (p19) cmp.gtu.unc p56,p0=r40,r38 }
252{ .mfb; (p0) nop.m 0x0
253 (p0) nop.f 0x0
254 (p0) nop.b 0x0 }
255{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
256 (p58) cmp.eq.or p57,p0=0,r41 // (p20)
257 (p58) add r41=-1,r41 } // (p20)
258{ .mbb; (p21) st8 [r14]=r42,8 // *(rp++)=r
259 (p0) nop.b 0x0
260 br.ctop.sptk .L_bn_sub_words_ctop };;
261.L_bn_sub_words_cend:
262
263{ .mii;
264(p59) add r8=1,r8 // return value
265 mov pr=r9,0x1ffff
266 mov ar.lc=r3 }
267{ .mbb; nop.b 0x0
268 br.ret.sptk.many b0 };;
269.endp bn_sub_words#
270#endif
271
272#if 0
273#define XMA_TEMPTATION
274#endif
275
276#if 1
277//
278// BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
279//
280.global bn_mul_words#
281.proc bn_mul_words#
282.align 64
283.skip 32 // makes the loop body aligned at 64-byte boundary
284bn_mul_words:
285 .prologue
286 .save ar.pfs,r2
287#ifdef XMA_TEMPTATION
288{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
289#else
290{ .mfi; alloc r2=ar.pfs,4,12,0,16 };;
291#endif
292{ .mib; mov r8=r0 // return value
293 cmp4.le p6,p0=r34,r0
294(p6) br.ret.spnt.many b0 };;
295
296{ .mii; sub r10=r34,r0,1
297 .save ar.lc,r3
298 mov r3=ar.lc
299 .save pr,r9
300 mov r9=pr };;
301
302 .body
303{ .mib; setf.sig f8=r35 // w
304 mov pr.rot=0x800001<<16
305 // ------^----- serves as (p50) at first (p27)
306 brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16
307 }
308
309#ifndef XMA_TEMPTATION
310
311{ .mmi; ADDP r14=0,r32 // rp
312 ADDP r15=0,r33 // ap
313 mov ar.lc=r10 }
314{ .mmi; mov r40=0 // serves as r35 at first (p27)
315 mov ar.ec=13 };;
316
317// This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium
318// L2 cache (i.e. 9 ticks away) as floating point load/store instructions
319// bypass L1 cache and L2 latency is actually best-case scenario for
320// ldf8. The loop is not scalable and shall run in 2*(n+12) even on
321// "wider" IA-64 implementations. It's a trade-off here. n+24 loop
322// would give us ~5% in *overall* performance improvement on "wider"
323// IA-64, but would hurt Itanium for about same because of longer
324// epilogue. As it's a matter of few percents in either case I've
325// chosen to trade the scalability for development time (you can see
326// this very instruction sequence in bn_mul_add_words loop which in
327// turn is scalable).
328.L_bn_mul_words_ctop:
329{ .mfi; (p25) getf.sig r36=f52 // low
330 (p21) xmpy.lu f48=f37,f8
331 (p28) cmp.ltu p54,p50=r41,r39 }
332{ .mfi; (p16) ldf8 f32=[r15],8
333 (p21) xmpy.hu f40=f37,f8
334 (p0) nop.i 0x0 };;
335{ .mii; (p25) getf.sig r32=f44 // high
336 .pred.rel "mutex",p50,p54
337 (p50) add r40=r38,r35 // (p27)
338 (p54) add r40=r38,r35,1 } // (p27)
339{ .mfb; (p28) st8 [r14]=r41,8
340 (p0) nop.f 0x0
341 br.ctop.sptk .L_bn_mul_words_ctop };;
342.L_bn_mul_words_cend:
343
344{ .mii; nop.m 0x0
345.pred.rel "mutex",p51,p55
346(p51) add r8=r36,r0
347(p55) add r8=r36,r0,1 }
348{ .mfb; nop.m 0x0
349 nop.f 0x0
350 nop.b 0x0 }
351
352#else // XMA_TEMPTATION
353
354 setf.sig f37=r0 // serves as carry at (p18) tick
355 mov ar.lc=r10
356 mov ar.ec=5;;
357
358// Most of you examining this code very likely wonder why in the name
359// of Intel the following loop is commented out? Indeed, it looks so
360// neat that you find it hard to believe that it's something wrong
361// with it, right? The catch is that every iteration depends on the
362// result from previous one and the latter isn't available instantly.
363// The loop therefore spins at the latency of xma minus 1, or in other
364// words at 6*(n+4) ticks:-( Compare to the "production" loop above
365// that runs in 2*(n+11) where the low latency problem is worked around
366// by moving the dependency to one-tick latent interger ALU. Note that
367// "distance" between ldf8 and xma is not latency of ldf8, but the
368// *difference* between xma and ldf8 latencies.
369.L_bn_mul_words_ctop:
370{ .mfi; (p16) ldf8 f32=[r33],8
371 (p18) xma.hu f38=f34,f8,f39 }
372{ .mfb; (p20) stf8 [r32]=f37,8
373 (p18) xma.lu f35=f34,f8,f39
374 br.ctop.sptk .L_bn_mul_words_ctop };;
375.L_bn_mul_words_cend:
376
377 getf.sig r8=f41 // the return value
378
379#endif // XMA_TEMPTATION
380
381{ .mii; nop.m 0x0
382 mov pr=r9,0x1ffff
383 mov ar.lc=r3 }
384{ .mfb; rum 1<<5 // clear um.mfh
385 nop.f 0x0
386 br.ret.sptk.many b0 };;
387.endp bn_mul_words#
388#endif
389
390#if 1
391//
392// BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
393//
394.global bn_mul_add_words#
395.proc bn_mul_add_words#
396.align 64
397.skip 48 // makes the loop body aligned at 64-byte boundary
398bn_mul_add_words:
399 .prologue
400 .save ar.pfs,r2
401{ .mmi; alloc r2=ar.pfs,4,4,0,8
402 cmp4.le p6,p0=r34,r0
403 .save ar.lc,r3
404 mov r3=ar.lc };;
405{ .mib; mov r8=r0 // return value
406 sub r10=r34,r0,1
407(p6) br.ret.spnt.many b0 };;
408
409{ .mib; setf.sig f8=r35 // w
410 .save pr,r9
411 mov r9=pr
412 brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
413 }
414 .body
415{ .mmi; ADDP r14=0,r32 // rp
416 ADDP r15=0,r33 // ap
417 mov ar.lc=r10 }
418{ .mii; ADDP r16=0,r32 // rp copy
419 mov pr.rot=0x2001<<16
420 // ------^----- serves as (p40) at first (p27)
421 mov ar.ec=11 };;
422
423// This loop spins in 3*(n+10) ticks on Itanium and in 2*(n+10) on
424// Itanium 2. Yes, unlike previous versions it scales:-) Previous
425// version was peforming *all* additions in IALU and was starving
426// for those even on Itanium 2. In this version one addition is
427// moved to FPU and is folded with multiplication. This is at cost
428// of propogating the result from previous call to this subroutine
429// to L2 cache... In other words negligible even for shorter keys.
430// *Overall* performance improvement [over previous version] varies
431// from 11 to 22 percent depending on key length.
432.L_bn_mul_add_words_ctop:
433.pred.rel "mutex",p40,p42
434{ .mfi; (p23) getf.sig r36=f45 // low
435 (p20) xma.lu f42=f36,f8,f50 // low
436 (p40) add r39=r39,r35 } // (p27)
437{ .mfi; (p16) ldf8 f32=[r15],8 // *(ap++)
438 (p20) xma.hu f36=f36,f8,f50 // high
439 (p42) add r39=r39,r35,1 };; // (p27)
440{ .mmi; (p24) getf.sig r32=f40 // high
441 (p16) ldf8 f46=[r16],8 // *(rp1++)
442 (p40) cmp.ltu p41,p39=r39,r35 } // (p27)
443{ .mib; (p26) st8 [r14]=r39,8 // *(rp2++)
444 (p42) cmp.leu p41,p39=r39,r35 // (p27)
445 br.ctop.sptk .L_bn_mul_add_words_ctop};;
446.L_bn_mul_add_words_cend:
447
448{ .mmi; .pred.rel "mutex",p40,p42
449(p40) add r8=r35,r0
450(p42) add r8=r35,r0,1
451 mov pr=r9,0x1ffff }
452{ .mib; rum 1<<5 // clear um.mfh
453 mov ar.lc=r3
454 br.ret.sptk.many b0 };;
455.endp bn_mul_add_words#
456#endif
457
458#if 1
459//
460// void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
461//
462.global bn_sqr_words#
463.proc bn_sqr_words#
464.align 64
465.skip 32 // makes the loop body aligned at 64-byte boundary
466bn_sqr_words:
467 .prologue
468 .save ar.pfs,r2
469{ .mii; alloc r2=ar.pfs,3,0,0,0
470 sxt4 r34=r34 };;
471{ .mii; cmp.le p6,p0=r34,r0
472 mov r8=r0 } // return value
473{ .mfb; ADDP r32=0,r32
474 nop.f 0x0
475(p6) br.ret.spnt.many b0 };;
476
477{ .mii; sub r10=r34,r0,1
478 .save ar.lc,r3
479 mov r3=ar.lc
480 .save pr,r9
481 mov r9=pr };;
482
483 .body
484{ .mib; ADDP r33=0,r33
485 mov pr.rot=1<<16
486 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16
487 }
488{ .mii; add r34=8,r32
489 mov ar.lc=r10
490 mov ar.ec=18 };;
491
492// 2*(n+17) on Itanium, (n+17) on "wider" IA-64 implementations. It's
493// possible to compress the epilogue (I'm getting tired to write this
494// comment over and over) and get down to 2*n+16 at the cost of
495// scalability. The decision will very likely be reconsidered after the
496// benchmark program is profiled. I.e. if perfomance gain on Itanium
497// will appear larger than loss on "wider" IA-64, then the loop should
498// be explicitely split and the epilogue compressed.
499.L_bn_sqr_words_ctop:
500{ .mfi; (p16) ldf8 f32=[r33],8
501 (p25) xmpy.lu f42=f41,f41
502 (p0) nop.i 0x0 }
503{ .mib; (p33) stf8 [r32]=f50,16
504 (p0) nop.i 0x0
505 (p0) nop.b 0x0 }
506{ .mfi; (p0) nop.m 0x0
507 (p25) xmpy.hu f52=f41,f41
508 (p0) nop.i 0x0 }
509{ .mib; (p33) stf8 [r34]=f60,16
510 (p0) nop.i 0x0
511 br.ctop.sptk .L_bn_sqr_words_ctop };;
512.L_bn_sqr_words_cend:
513
514{ .mii; nop.m 0x0
515 mov pr=r9,0x1ffff
516 mov ar.lc=r3 }
517{ .mfb; rum 1<<5 // clear um.mfh
518 nop.f 0x0
519 br.ret.sptk.many b0 };;
520.endp bn_sqr_words#
521#endif
522
523#if 1
524// Apparently we win nothing by implementing special bn_sqr_comba8.
525// Yes, it is possible to reduce the number of multiplications by
526// almost factor of two, but then the amount of additions would
527// increase by factor of two (as we would have to perform those
528// otherwise performed by xma ourselves). Normally we would trade
529// anyway as multiplications are way more expensive, but not this
530// time... Multiplication kernel is fully pipelined and as we drain
531// one 128-bit multiplication result per clock cycle multiplications
532// are effectively as inexpensive as additions. Special implementation
533// might become of interest for "wider" IA-64 implementation as you'll
534// be able to get through the multiplication phase faster (there won't
535// be any stall issues as discussed in the commentary section below and
536// you therefore will be able to employ all 4 FP units)... But these
537// Itanium days it's simply too hard to justify the effort so I just
538// drop down to bn_mul_comba8 code:-)
539//
540// void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
541//
542.global bn_sqr_comba8#
543.proc bn_sqr_comba8#
544.align 64
545bn_sqr_comba8:
546 .prologue
547 .save ar.pfs,r2
548#if defined(_HPUX_SOURCE) && !defined(_LP64)
549{ .mii; alloc r2=ar.pfs,2,1,0,0
550 addp4 r33=0,r33
551 addp4 r32=0,r32 };;
552{ .mii;
553#else
554{ .mii; alloc r2=ar.pfs,2,1,0,0
555#endif
556 mov r34=r33
557 add r14=8,r33 };;
558 .body
559{ .mii; add r17=8,r34
560 add r15=16,r33
561 add r18=16,r34 }
562{ .mfb; add r16=24,r33
563 br .L_cheat_entry_point8 };;
564.endp bn_sqr_comba8#
565#endif
566
567#if 1
568// I've estimated this routine to run in ~120 ticks, but in reality
569// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
570// cycles consumed for instructions fetch? Or did I misinterpret some
571// clause in Itanium �-architecture manual? Comments are welcomed and
572// highly appreciated.
573//
574// On Itanium 2 it takes ~190 ticks. This is because of stalls on
575// result from getf.sig. I do nothing about it at this point for
576// reasons depicted below.
577//
578// However! It should be noted that even 160 ticks is darn good result
579// as it's over 10 (yes, ten, spelled as t-e-n) times faster than the
580// C version (compiled with gcc with inline assembler). I really
581// kicked compiler's butt here, didn't I? Yeah! This brings us to the
582// following statement. It's damn shame that this routine isn't called
583// very often nowadays! According to the profiler most CPU time is
584// consumed by bn_mul_add_words called from BN_from_montgomery. In
585// order to estimate what we're missing, I've compared the performance
586// of this routine against "traditional" implementation, i.e. against
587// following routine:
588//
589// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
590// { r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
591// r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
592// r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
593// r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
594// r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
595// r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
596// r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
597// r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
598// }
599//
600// The one below is over 8 times faster than the one above:-( Even
601// more reasons to "combafy" bn_mul_add_mont...
602//
603// And yes, this routine really made me wish there were an optimizing
604// assembler! It also feels like it deserves a dedication.
605//
606// To my wife for being there and to my kids...
607//
608// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
609//
610#define carry1 r14
611#define carry2 r15
612#define carry3 r34
613.global bn_mul_comba8#
614.proc bn_mul_comba8#
615.align 64
616bn_mul_comba8:
617 .prologue
618 .save ar.pfs,r2
619#if defined(_HPUX_SOURCE) && !defined(_LP64)
620{ .mii; alloc r2=ar.pfs,3,0,0,0
621 addp4 r33=0,r33
622 addp4 r34=0,r34 };;
623{ .mii; addp4 r32=0,r32
624#else
625{ .mii; alloc r2=ar.pfs,3,0,0,0
626#endif
627 add r14=8,r33
628 add r17=8,r34 }
629 .body
630{ .mii; add r15=16,r33
631 add r18=16,r34
632 add r16=24,r33 }
633.L_cheat_entry_point8:
634{ .mmi; add r19=24,r34
635
636 ldf8 f32=[r33],32 };;
637
638{ .mmi; ldf8 f120=[r34],32
639 ldf8 f121=[r17],32 }
640{ .mmi; ldf8 f122=[r18],32
641 ldf8 f123=[r19],32 };;
642{ .mmi; ldf8 f124=[r34]
643 ldf8 f125=[r17] }
644{ .mmi; ldf8 f126=[r18]
645 ldf8 f127=[r19] }
646
647{ .mmi; ldf8 f33=[r14],32
648 ldf8 f34=[r15],32 }
649{ .mmi; ldf8 f35=[r16],32;;
650 ldf8 f36=[r33] }
651{ .mmi; ldf8 f37=[r14]
652 ldf8 f38=[r15] }
653{ .mfi; ldf8 f39=[r16]
654// -------\ Entering multiplier's heaven /-------
655// ------------\ /------------
656// -----------------\ /-----------------
657// ----------------------\/----------------------
658 xma.hu f41=f32,f120,f0 }
659{ .mfi; xma.lu f40=f32,f120,f0 };; // (*)
660{ .mfi; xma.hu f51=f32,f121,f0 }
661{ .mfi; xma.lu f50=f32,f121,f0 };;
662{ .mfi; xma.hu f61=f32,f122,f0 }
663{ .mfi; xma.lu f60=f32,f122,f0 };;
664{ .mfi; xma.hu f71=f32,f123,f0 }
665{ .mfi; xma.lu f70=f32,f123,f0 };;
666{ .mfi; xma.hu f81=f32,f124,f0 }
667{ .mfi; xma.lu f80=f32,f124,f0 };;
668{ .mfi; xma.hu f91=f32,f125,f0 }
669{ .mfi; xma.lu f90=f32,f125,f0 };;
670{ .mfi; xma.hu f101=f32,f126,f0 }
671{ .mfi; xma.lu f100=f32,f126,f0 };;
672{ .mfi; xma.hu f111=f32,f127,f0 }
673{ .mfi; xma.lu f110=f32,f127,f0 };;//
674// (*) You can argue that splitting at every second bundle would
675// prevent "wider" IA-64 implementations from achieving the peak
676// performance. Well, not really... The catch is that if you
677// intend to keep 4 FP units busy by splitting at every fourth
678// bundle and thus perform these 16 multiplications in 4 ticks,
679// the first bundle *below* would stall because the result from
680// the first xma bundle *above* won't be available for another 3
681// ticks (if not more, being an optimist, I assume that "wider"
682// implementation will have same latency:-). This stall will hold
683// you back and the performance would be as if every second bundle
684// were split *anyway*...
685{ .mfi; getf.sig r16=f40
686 xma.hu f42=f33,f120,f41
687 add r33=8,r32 }
688{ .mfi; xma.lu f41=f33,f120,f41 };;
689{ .mfi; getf.sig r24=f50
690 xma.hu f52=f33,f121,f51 }
691{ .mfi; xma.lu f51=f33,f121,f51 };;
692{ .mfi; st8 [r32]=r16,16
693 xma.hu f62=f33,f122,f61 }
694{ .mfi; xma.lu f61=f33,f122,f61 };;
695{ .mfi; xma.hu f72=f33,f123,f71 }
696{ .mfi; xma.lu f71=f33,f123,f71 };;
697{ .mfi; xma.hu f82=f33,f124,f81 }
698{ .mfi; xma.lu f81=f33,f124,f81 };;
699{ .mfi; xma.hu f92=f33,f125,f91 }
700{ .mfi; xma.lu f91=f33,f125,f91 };;
701{ .mfi; xma.hu f102=f33,f126,f101 }
702{ .mfi; xma.lu f101=f33,f126,f101 };;
703{ .mfi; xma.hu f112=f33,f127,f111 }
704{ .mfi; xma.lu f111=f33,f127,f111 };;//
705//-------------------------------------------------//
706{ .mfi; getf.sig r25=f41
707 xma.hu f43=f34,f120,f42 }
708{ .mfi; xma.lu f42=f34,f120,f42 };;
709{ .mfi; getf.sig r16=f60
710 xma.hu f53=f34,f121,f52 }
711{ .mfi; xma.lu f52=f34,f121,f52 };;
712{ .mfi; getf.sig r17=f51
713 xma.hu f63=f34,f122,f62
714 add r25=r25,r24 }
715{ .mfi; xma.lu f62=f34,f122,f62
716 mov carry1=0 };;
717{ .mfi; cmp.ltu p6,p0=r25,r24
718 xma.hu f73=f34,f123,f72 }
719{ .mfi; xma.lu f72=f34,f123,f72 };;
720{ .mfi; st8 [r33]=r25,16
721 xma.hu f83=f34,f124,f82
722(p6) add carry1=1,carry1 }
723{ .mfi; xma.lu f82=f34,f124,f82 };;
724{ .mfi; xma.hu f93=f34,f125,f92 }
725{ .mfi; xma.lu f92=f34,f125,f92 };;
726{ .mfi; xma.hu f103=f34,f126,f102 }
727{ .mfi; xma.lu f102=f34,f126,f102 };;
728{ .mfi; xma.hu f113=f34,f127,f112 }
729{ .mfi; xma.lu f112=f34,f127,f112 };;//
730//-------------------------------------------------//
731{ .mfi; getf.sig r18=f42
732 xma.hu f44=f35,f120,f43
733 add r17=r17,r16 }
734{ .mfi; xma.lu f43=f35,f120,f43 };;
735{ .mfi; getf.sig r24=f70
736 xma.hu f54=f35,f121,f53 }
737{ .mfi; mov carry2=0
738 xma.lu f53=f35,f121,f53 };;
739{ .mfi; getf.sig r25=f61
740 xma.hu f64=f35,f122,f63
741 cmp.ltu p7,p0=r17,r16 }
742{ .mfi; add r18=r18,r17
743 xma.lu f63=f35,f122,f63 };;
744{ .mfi; getf.sig r26=f52
745 xma.hu f74=f35,f123,f73
746(p7) add carry2=1,carry2 }
747{ .mfi; cmp.ltu p7,p0=r18,r17
748 xma.lu f73=f35,f123,f73
749 add r18=r18,carry1 };;
750{ .mfi;
751 xma.hu f84=f35,f124,f83
752(p7) add carry2=1,carry2 }
753{ .mfi; cmp.ltu p7,p0=r18,carry1
754 xma.lu f83=f35,f124,f83 };;
755{ .mfi; st8 [r32]=r18,16
756 xma.hu f94=f35,f125,f93
757(p7) add carry2=1,carry2 }
758{ .mfi; xma.lu f93=f35,f125,f93 };;
759{ .mfi; xma.hu f104=f35,f126,f103 }
760{ .mfi; xma.lu f103=f35,f126,f103 };;
761{ .mfi; xma.hu f114=f35,f127,f113 }
762{ .mfi; mov carry1=0
763 xma.lu f113=f35,f127,f113
764 add r25=r25,r24 };;//
765//-------------------------------------------------//
766{ .mfi; getf.sig r27=f43
767 xma.hu f45=f36,f120,f44
768 cmp.ltu p6,p0=r25,r24 }
769{ .mfi; xma.lu f44=f36,f120,f44
770 add r26=r26,r25 };;
771{ .mfi; getf.sig r16=f80
772 xma.hu f55=f36,f121,f54
773(p6) add carry1=1,carry1 }
774{ .mfi; xma.lu f54=f36,f121,f54 };;
775{ .mfi; getf.sig r17=f71
776 xma.hu f65=f36,f122,f64
777 cmp.ltu p6,p0=r26,r25 }
778{ .mfi; xma.lu f64=f36,f122,f64
779 add r27=r27,r26 };;
780{ .mfi; getf.sig r18=f62
781 xma.hu f75=f36,f123,f74
782(p6) add carry1=1,carry1 }
783{ .mfi; cmp.ltu p6,p0=r27,r26
784 xma.lu f74=f36,f123,f74
785 add r27=r27,carry2 };;
786{ .mfi; getf.sig r19=f53
787 xma.hu f85=f36,f124,f84
788(p6) add carry1=1,carry1 }
789{ .mfi; xma.lu f84=f36,f124,f84
790 cmp.ltu p6,p0=r27,carry2 };;
791{ .mfi; st8 [r33]=r27,16
792 xma.hu f95=f36,f125,f94
793(p6) add carry1=1,carry1 }
794{ .mfi; xma.lu f94=f36,f125,f94 };;
795{ .mfi; xma.hu f105=f36,f126,f104 }
796{ .mfi; mov carry2=0
797 xma.lu f104=f36,f126,f104
798 add r17=r17,r16 };;
799{ .mfi; xma.hu f115=f36,f127,f114
800 cmp.ltu p7,p0=r17,r16 }
801{ .mfi; xma.lu f114=f36,f127,f114
802 add r18=r18,r17 };;//
803//-------------------------------------------------//
804{ .mfi; getf.sig r20=f44
805 xma.hu f46=f37,f120,f45
806(p7) add carry2=1,carry2 }
807{ .mfi; cmp.ltu p7,p0=r18,r17
808 xma.lu f45=f37,f120,f45
809 add r19=r19,r18 };;
810{ .mfi; getf.sig r24=f90
811 xma.hu f56=f37,f121,f55 }
812{ .mfi; xma.lu f55=f37,f121,f55 };;
813{ .mfi; getf.sig r25=f81
814 xma.hu f66=f37,f122,f65
815(p7) add carry2=1,carry2 }
816{ .mfi; cmp.ltu p7,p0=r19,r18
817 xma.lu f65=f37,f122,f65
818 add r20=r20,r19 };;
819{ .mfi; getf.sig r26=f72
820 xma.hu f76=f37,f123,f75
821(p7) add carry2=1,carry2 }
822{ .mfi; cmp.ltu p7,p0=r20,r19
823 xma.lu f75=f37,f123,f75
824 add r20=r20,carry1 };;
825{ .mfi; getf.sig r27=f63
826 xma.hu f86=f37,f124,f85
827(p7) add carry2=1,carry2 }
828{ .mfi; xma.lu f85=f37,f124,f85
829 cmp.ltu p7,p0=r20,carry1 };;
830{ .mfi; getf.sig r28=f54
831 xma.hu f96=f37,f125,f95
832(p7) add carry2=1,carry2 }
833{ .mfi; st8 [r32]=r20,16
834 xma.lu f95=f37,f125,f95 };;
835{ .mfi; xma.hu f106=f37,f126,f105 }
836{ .mfi; mov carry1=0
837 xma.lu f105=f37,f126,f105
838 add r25=r25,r24 };;
839{ .mfi; xma.hu f116=f37,f127,f115
840 cmp.ltu p6,p0=r25,r24 }
841{ .mfi; xma.lu f115=f37,f127,f115
842 add r26=r26,r25 };;//
843//-------------------------------------------------//
844{ .mfi; getf.sig r29=f45
845 xma.hu f47=f38,f120,f46
846(p6) add carry1=1,carry1 }
847{ .mfi; cmp.ltu p6,p0=r26,r25
848 xma.lu f46=f38,f120,f46
849 add r27=r27,r26 };;
850{ .mfi; getf.sig r16=f100
851 xma.hu f57=f38,f121,f56
852(p6) add carry1=1,carry1 }
853{ .mfi; cmp.ltu p6,p0=r27,r26
854 xma.lu f56=f38,f121,f56
855 add r28=r28,r27 };;
856{ .mfi; getf.sig r17=f91
857 xma.hu f67=f38,f122,f66
858(p6) add carry1=1,carry1 }
859{ .mfi; cmp.ltu p6,p0=r28,r27
860 xma.lu f66=f38,f122,f66
861 add r29=r29,r28 };;
862{ .mfi; getf.sig r18=f82
863 xma.hu f77=f38,f123,f76
864(p6) add carry1=1,carry1 }
865{ .mfi; cmp.ltu p6,p0=r29,r28
866 xma.lu f76=f38,f123,f76
867 add r29=r29,carry2 };;
868{ .mfi; getf.sig r19=f73
869 xma.hu f87=f38,f124,f86
870(p6) add carry1=1,carry1 }
871{ .mfi; xma.lu f86=f38,f124,f86
872 cmp.ltu p6,p0=r29,carry2 };;
873{ .mfi; getf.sig r20=f64
874 xma.hu f97=f38,f125,f96
875(p6) add carry1=1,carry1 }
876{ .mfi; st8 [r33]=r29,16
877 xma.lu f96=f38,f125,f96 };;
878{ .mfi; getf.sig r21=f55
879 xma.hu f107=f38,f126,f106 }
880{ .mfi; mov carry2=0
881 xma.lu f106=f38,f126,f106
882 add r17=r17,r16 };;
883{ .mfi; xma.hu f117=f38,f127,f116
884 cmp.ltu p7,p0=r17,r16 }
885{ .mfi; xma.lu f116=f38,f127,f116
886 add r18=r18,r17 };;//
887//-------------------------------------------------//
888{ .mfi; getf.sig r22=f46
889 xma.hu f48=f39,f120,f47
890(p7) add carry2=1,carry2 }
891{ .mfi; cmp.ltu p7,p0=r18,r17
892 xma.lu f47=f39,f120,f47
893 add r19=r19,r18 };;
894{ .mfi; getf.sig r24=f110
895 xma.hu f58=f39,f121,f57
896(p7) add carry2=1,carry2 }
897{ .mfi; cmp.ltu p7,p0=r19,r18
898 xma.lu f57=f39,f121,f57
899 add r20=r20,r19 };;
900{ .mfi; getf.sig r25=f101
901 xma.hu f68=f39,f122,f67
902(p7) add carry2=1,carry2 }
903{ .mfi; cmp.ltu p7,p0=r20,r19
904 xma.lu f67=f39,f122,f67
905 add r21=r21,r20 };;
906{ .mfi; getf.sig r26=f92
907 xma.hu f78=f39,f123,f77
908(p7) add carry2=1,carry2 }
909{ .mfi; cmp.ltu p7,p0=r21,r20
910 xma.lu f77=f39,f123,f77
911 add r22=r22,r21 };;
912{ .mfi; getf.sig r27=f83
913 xma.hu f88=f39,f124,f87
914(p7) add carry2=1,carry2 }
915{ .mfi; cmp.ltu p7,p0=r22,r21
916 xma.lu f87=f39,f124,f87
917 add r22=r22,carry1 };;
918{ .mfi; getf.sig r28=f74
919 xma.hu f98=f39,f125,f97
920(p7) add carry2=1,carry2 }
921{ .mfi; xma.lu f97=f39,f125,f97
922 cmp.ltu p7,p0=r22,carry1 };;
923{ .mfi; getf.sig r29=f65
924 xma.hu f108=f39,f126,f107
925(p7) add carry2=1,carry2 }
926{ .mfi; st8 [r32]=r22,16
927 xma.lu f107=f39,f126,f107 };;
928{ .mfi; getf.sig r30=f56
929 xma.hu f118=f39,f127,f117 }
930{ .mfi; xma.lu f117=f39,f127,f117 };;//
931//-------------------------------------------------//
932// Leaving muliplier's heaven... Quite a ride, huh?
933
934{ .mii; getf.sig r31=f47
935 add r25=r25,r24
936 mov carry1=0 };;
937{ .mii; getf.sig r16=f111
938 cmp.ltu p6,p0=r25,r24
939 add r26=r26,r25 };;
940{ .mfb; getf.sig r17=f102 }
941{ .mii;
942(p6) add carry1=1,carry1
943 cmp.ltu p6,p0=r26,r25
944 add r27=r27,r26 };;
945{ .mfb; nop.m 0x0 }
946{ .mii;
947(p6) add carry1=1,carry1
948 cmp.ltu p6,p0=r27,r26
949 add r28=r28,r27 };;
950{ .mii; getf.sig r18=f93
951 add r17=r17,r16
952 mov carry3=0 }
953{ .mii;
954(p6) add carry1=1,carry1
955 cmp.ltu p6,p0=r28,r27
956 add r29=r29,r28 };;
957{ .mii; getf.sig r19=f84
958 cmp.ltu p7,p0=r17,r16 }
959{ .mii;
960(p6) add carry1=1,carry1
961 cmp.ltu p6,p0=r29,r28
962 add r30=r30,r29 };;
963{ .mii; getf.sig r20=f75
964 add r18=r18,r17 }
965{ .mii;
966(p6) add carry1=1,carry1
967 cmp.ltu p6,p0=r30,r29
968 add r31=r31,r30 };;
969{ .mfb; getf.sig r21=f66 }
970{ .mii; (p7) add carry3=1,carry3
971 cmp.ltu p7,p0=r18,r17
972 add r19=r19,r18 }
973{ .mfb; nop.m 0x0 }
974{ .mii;
975(p6) add carry1=1,carry1
976 cmp.ltu p6,p0=r31,r30
977 add r31=r31,carry2 };;
978{ .mfb; getf.sig r22=f57 }
979{ .mii; (p7) add carry3=1,carry3
980 cmp.ltu p7,p0=r19,r18
981 add r20=r20,r19 }
982{ .mfb; nop.m 0x0 }
983{ .mii;
984(p6) add carry1=1,carry1
985 cmp.ltu p6,p0=r31,carry2 };;
986{ .mfb; getf.sig r23=f48 }
987{ .mii; (p7) add carry3=1,carry3
988 cmp.ltu p7,p0=r20,r19
989 add r21=r21,r20 }
990{ .mii;
991(p6) add carry1=1,carry1 }
992{ .mfb; st8 [r33]=r31,16 };;
993
994{ .mfb; getf.sig r24=f112 }
995{ .mii; (p7) add carry3=1,carry3
996 cmp.ltu p7,p0=r21,r20
997 add r22=r22,r21 };;
998{ .mfb; getf.sig r25=f103 }
999{ .mii; (p7) add carry3=1,carry3
1000 cmp.ltu p7,p0=r22,r21
1001 add r23=r23,r22 };;
1002{ .mfb; getf.sig r26=f94 }
1003{ .mii; (p7) add carry3=1,carry3
1004 cmp.ltu p7,p0=r23,r22
1005 add r23=r23,carry1 };;
1006{ .mfb; getf.sig r27=f85 }
1007{ .mii; (p7) add carry3=1,carry3
1008 cmp.ltu p7,p8=r23,carry1};;
1009{ .mii; getf.sig r28=f76
1010 add r25=r25,r24
1011 mov carry1=0 }
1012{ .mii; st8 [r32]=r23,16
1013 (p7) add carry2=1,carry3
1014 (p8) add carry2=0,carry3 };;
1015
1016{ .mfb; nop.m 0x0 }
1017{ .mii; getf.sig r29=f67
1018 cmp.ltu p6,p0=r25,r24
1019 add r26=r26,r25 };;
1020{ .mfb; getf.sig r30=f58 }
1021{ .mii;
1022(p6) add carry1=1,carry1
1023 cmp.ltu p6,p0=r26,r25
1024 add r27=r27,r26 };;
1025{ .mfb; getf.sig r16=f113 }
1026{ .mii;
1027(p6) add carry1=1,carry1
1028 cmp.ltu p6,p0=r27,r26
1029 add r28=r28,r27 };;
1030{ .mfb; getf.sig r17=f104 }
1031{ .mii;
1032(p6) add carry1=1,carry1
1033 cmp.ltu p6,p0=r28,r27
1034 add r29=r29,r28 };;
1035{ .mfb; getf.sig r18=f95 }
1036{ .mii;
1037(p6) add carry1=1,carry1
1038 cmp.ltu p6,p0=r29,r28
1039 add r30=r30,r29 };;
1040{ .mii; getf.sig r19=f86
1041 add r17=r17,r16
1042 mov carry3=0 }
1043{ .mii;
1044(p6) add carry1=1,carry1
1045 cmp.ltu p6,p0=r30,r29
1046 add r30=r30,carry2 };;
1047{ .mii; getf.sig r20=f77
1048 cmp.ltu p7,p0=r17,r16
1049 add r18=r18,r17 }
1050{ .mii;
1051(p6) add carry1=1,carry1
1052 cmp.ltu p6,p0=r30,carry2 };;
1053{ .mfb; getf.sig r21=f68 }
1054{ .mii; st8 [r33]=r30,16
1055(p6) add carry1=1,carry1 };;
1056
1057{ .mfb; getf.sig r24=f114 }
1058{ .mii; (p7) add carry3=1,carry3
1059 cmp.ltu p7,p0=r18,r17
1060 add r19=r19,r18 };;
1061{ .mfb; getf.sig r25=f105 }
1062{ .mii; (p7) add carry3=1,carry3
1063 cmp.ltu p7,p0=r19,r18
1064 add r20=r20,r19 };;
1065{ .mfb; getf.sig r26=f96 }
1066{ .mii; (p7) add carry3=1,carry3
1067 cmp.ltu p7,p0=r20,r19
1068 add r21=r21,r20 };;
1069{ .mfb; getf.sig r27=f87 }
1070{ .mii; (p7) add carry3=1,carry3
1071 cmp.ltu p7,p0=r21,r20
1072 add r21=r21,carry1 };;
1073{ .mib; getf.sig r28=f78
1074 add r25=r25,r24 }
1075{ .mib; (p7) add carry3=1,carry3
1076 cmp.ltu p7,p8=r21,carry1};;
1077{ .mii; st8 [r32]=r21,16
1078 (p7) add carry2=1,carry3
1079 (p8) add carry2=0,carry3 }
1080
1081{ .mii; mov carry1=0
1082 cmp.ltu p6,p0=r25,r24
1083 add r26=r26,r25 };;
1084{ .mfb; getf.sig r16=f115 }
1085{ .mii;
1086(p6) add carry1=1,carry1
1087 cmp.ltu p6,p0=r26,r25
1088 add r27=r27,r26 };;
1089{ .mfb; getf.sig r17=f106 }
1090{ .mii;
1091(p6) add carry1=1,carry1
1092 cmp.ltu p6,p0=r27,r26
1093 add r28=r28,r27 };;
1094{ .mfb; getf.sig r18=f97 }
1095{ .mii;
1096(p6) add carry1=1,carry1
1097 cmp.ltu p6,p0=r28,r27
1098 add r28=r28,carry2 };;
1099{ .mib; getf.sig r19=f88
1100 add r17=r17,r16 }
1101{ .mib;
1102(p6) add carry1=1,carry1
1103 cmp.ltu p6,p0=r28,carry2 };;
1104{ .mii; st8 [r33]=r28,16
1105(p6) add carry1=1,carry1 }
1106
1107{ .mii; mov carry2=0
1108 cmp.ltu p7,p0=r17,r16
1109 add r18=r18,r17 };;
1110{ .mfb; getf.sig r24=f116 }
1111{ .mii; (p7) add carry2=1,carry2
1112 cmp.ltu p7,p0=r18,r17
1113 add r19=r19,r18 };;
1114{ .mfb; getf.sig r25=f107 }
1115{ .mii; (p7) add carry2=1,carry2
1116 cmp.ltu p7,p0=r19,r18
1117 add r19=r19,carry1 };;
1118{ .mfb; getf.sig r26=f98 }
1119{ .mii; (p7) add carry2=1,carry2
1120 cmp.ltu p7,p0=r19,carry1};;
1121{ .mii; st8 [r32]=r19,16
1122 (p7) add carry2=1,carry2 }
1123
1124{ .mfb; add r25=r25,r24 };;
1125
1126{ .mfb; getf.sig r16=f117 }
1127{ .mii; mov carry1=0
1128 cmp.ltu p6,p0=r25,r24
1129 add r26=r26,r25 };;
1130{ .mfb; getf.sig r17=f108 }
1131{ .mii;
1132(p6) add carry1=1,carry1
1133 cmp.ltu p6,p0=r26,r25
1134 add r26=r26,carry2 };;
1135{ .mfb; nop.m 0x0 }
1136{ .mii;
1137(p6) add carry1=1,carry1
1138 cmp.ltu p6,p0=r26,carry2 };;
1139{ .mii; st8 [r33]=r26,16
1140(p6) add carry1=1,carry1 }
1141
1142{ .mfb; add r17=r17,r16 };;
1143{ .mfb; getf.sig r24=f118 }
1144{ .mii; mov carry2=0
1145 cmp.ltu p7,p0=r17,r16
1146 add r17=r17,carry1 };;
1147{ .mii; (p7) add carry2=1,carry2
1148 cmp.ltu p7,p0=r17,carry1};;
1149{ .mii; st8 [r32]=r17
1150 (p7) add carry2=1,carry2 };;
1151{ .mfb; add r24=r24,carry2 };;
1152{ .mib; st8 [r33]=r24 }
1153
1154{ .mib; rum 1<<5 // clear um.mfh
1155 br.ret.sptk.many b0 };;
1156.endp bn_mul_comba8#
1157#undef carry3
1158#undef carry2
1159#undef carry1
1160#endif
1161
1162#if 1
1163// It's possible to make it faster (see comment to bn_sqr_comba8), but
1164// I reckon it doesn't worth the effort. Basically because the routine
1165// (actually both of them) practically never called... So I just play
1166// same trick as with bn_sqr_comba8.
1167//
1168// void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1169//
1170.global bn_sqr_comba4#
1171.proc bn_sqr_comba4#
1172.align 64
1173bn_sqr_comba4:
1174 .prologue
1175 .save ar.pfs,r2
1176#if defined(_HPUX_SOURCE) && !defined(_LP64)
1177{ .mii; alloc r2=ar.pfs,2,1,0,0
1178 addp4 r32=0,r32
1179 addp4 r33=0,r33 };;
1180{ .mii;
1181#else
1182{ .mii; alloc r2=ar.pfs,2,1,0,0
1183#endif
1184 mov r34=r33
1185 add r14=8,r33 };;
1186 .body
1187{ .mii; add r17=8,r34
1188 add r15=16,r33
1189 add r18=16,r34 }
1190{ .mfb; add r16=24,r33
1191 br .L_cheat_entry_point4 };;
1192.endp bn_sqr_comba4#
1193#endif
1194
1195#if 1
1196// Runs in ~115 cycles and ~4.5 times faster than C. Well, whatever...
1197//
1198// void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1199//
1200#define carry1 r14
1201#define carry2 r15
1202.global bn_mul_comba4#
1203.proc bn_mul_comba4#
1204.align 64
1205bn_mul_comba4:
1206 .prologue
1207 .save ar.pfs,r2
1208#if defined(_HPUX_SOURCE) && !defined(_LP64)
1209{ .mii; alloc r2=ar.pfs,3,0,0,0
1210 addp4 r33=0,r33
1211 addp4 r34=0,r34 };;
1212{ .mii; addp4 r32=0,r32
1213#else
1214{ .mii; alloc r2=ar.pfs,3,0,0,0
1215#endif
1216 add r14=8,r33
1217 add r17=8,r34 }
1218 .body
1219{ .mii; add r15=16,r33
1220 add r18=16,r34
1221 add r16=24,r33 };;
1222.L_cheat_entry_point4:
1223{ .mmi; add r19=24,r34
1224
1225 ldf8 f32=[r33] }
1226
1227{ .mmi; ldf8 f120=[r34]
1228 ldf8 f121=[r17] };;
1229{ .mmi; ldf8 f122=[r18]
1230 ldf8 f123=[r19] }
1231
1232{ .mmi; ldf8 f33=[r14]
1233 ldf8 f34=[r15] }
1234{ .mfi; ldf8 f35=[r16]
1235
1236 xma.hu f41=f32,f120,f0 }
1237{ .mfi; xma.lu f40=f32,f120,f0 };;
1238{ .mfi; xma.hu f51=f32,f121,f0 }
1239{ .mfi; xma.lu f50=f32,f121,f0 };;
1240{ .mfi; xma.hu f61=f32,f122,f0 }
1241{ .mfi; xma.lu f60=f32,f122,f0 };;
1242{ .mfi; xma.hu f71=f32,f123,f0 }
1243{ .mfi; xma.lu f70=f32,f123,f0 };;//
1244// Major stall takes place here, and 3 more places below. Result from
1245// first xma is not available for another 3 ticks.
1246{ .mfi; getf.sig r16=f40
1247 xma.hu f42=f33,f120,f41
1248 add r33=8,r32 }
1249{ .mfi; xma.lu f41=f33,f120,f41 };;
1250{ .mfi; getf.sig r24=f50
1251 xma.hu f52=f33,f121,f51 }
1252{ .mfi; xma.lu f51=f33,f121,f51 };;
1253{ .mfi; st8 [r32]=r16,16
1254 xma.hu f62=f33,f122,f61 }
1255{ .mfi; xma.lu f61=f33,f122,f61 };;
1256{ .mfi; xma.hu f72=f33,f123,f71 }
1257{ .mfi; xma.lu f71=f33,f123,f71 };;//
1258//-------------------------------------------------//
1259{ .mfi; getf.sig r25=f41
1260 xma.hu f43=f34,f120,f42 }
1261{ .mfi; xma.lu f42=f34,f120,f42 };;
1262{ .mfi; getf.sig r16=f60
1263 xma.hu f53=f34,f121,f52 }
1264{ .mfi; xma.lu f52=f34,f121,f52 };;
1265{ .mfi; getf.sig r17=f51
1266 xma.hu f63=f34,f122,f62
1267 add r25=r25,r24 }
1268{ .mfi; mov carry1=0
1269 xma.lu f62=f34,f122,f62 };;
1270{ .mfi; st8 [r33]=r25,16
1271 xma.hu f73=f34,f123,f72
1272 cmp.ltu p6,p0=r25,r24 }
1273{ .mfi; xma.lu f72=f34,f123,f72 };;//
1274//-------------------------------------------------//
1275{ .mfi; getf.sig r18=f42
1276 xma.hu f44=f35,f120,f43
1277(p6) add carry1=1,carry1 }
1278{ .mfi; add r17=r17,r16
1279 xma.lu f43=f35,f120,f43
1280 mov carry2=0 };;
1281{ .mfi; getf.sig r24=f70
1282 xma.hu f54=f35,f121,f53
1283 cmp.ltu p7,p0=r17,r16 }
1284{ .mfi; xma.lu f53=f35,f121,f53 };;
1285{ .mfi; getf.sig r25=f61
1286 xma.hu f64=f35,f122,f63
1287 add r18=r18,r17 }
1288{ .mfi; xma.lu f63=f35,f122,f63
1289(p7) add carry2=1,carry2 };;
1290{ .mfi; getf.sig r26=f52
1291 xma.hu f74=f35,f123,f73
1292 cmp.ltu p7,p0=r18,r17 }
1293{ .mfi; xma.lu f73=f35,f123,f73
1294 add r18=r18,carry1 };;
1295//-------------------------------------------------//
1296{ .mii; st8 [r32]=r18,16
1297(p7) add carry2=1,carry2
1298 cmp.ltu p7,p0=r18,carry1 };;
1299
1300{ .mfi; getf.sig r27=f43 // last major stall
1301(p7) add carry2=1,carry2 };;
1302{ .mii; getf.sig r16=f71
1303 add r25=r25,r24
1304 mov carry1=0 };;
1305{ .mii; getf.sig r17=f62
1306 cmp.ltu p6,p0=r25,r24
1307 add r26=r26,r25 };;
1308{ .mii;
1309(p6) add carry1=1,carry1
1310 cmp.ltu p6,p0=r26,r25
1311 add r27=r27,r26 };;
1312{ .mii;
1313(p6) add carry1=1,carry1
1314 cmp.ltu p6,p0=r27,r26
1315 add r27=r27,carry2 };;
1316{ .mii; getf.sig r18=f53
1317(p6) add carry1=1,carry1
1318 cmp.ltu p6,p0=r27,carry2 };;
1319{ .mfi; st8 [r33]=r27,16
1320(p6) add carry1=1,carry1 }
1321
1322{ .mii; getf.sig r19=f44
1323 add r17=r17,r16
1324 mov carry2=0 };;
1325{ .mii; getf.sig r24=f72
1326 cmp.ltu p7,p0=r17,r16
1327 add r18=r18,r17 };;
1328{ .mii; (p7) add carry2=1,carry2
1329 cmp.ltu p7,p0=r18,r17
1330 add r19=r19,r18 };;
1331{ .mii; (p7) add carry2=1,carry2
1332 cmp.ltu p7,p0=r19,r18
1333 add r19=r19,carry1 };;
1334{ .mii; getf.sig r25=f63
1335 (p7) add carry2=1,carry2
1336 cmp.ltu p7,p0=r19,carry1};;
1337{ .mii; st8 [r32]=r19,16
1338 (p7) add carry2=1,carry2 }
1339
1340{ .mii; getf.sig r26=f54
1341 add r25=r25,r24
1342 mov carry1=0 };;
1343{ .mii; getf.sig r16=f73
1344 cmp.ltu p6,p0=r25,r24
1345 add r26=r26,r25 };;
1346{ .mii;
1347(p6) add carry1=1,carry1
1348 cmp.ltu p6,p0=r26,r25
1349 add r26=r26,carry2 };;
1350{ .mii; getf.sig r17=f64
1351(p6) add carry1=1,carry1
1352 cmp.ltu p6,p0=r26,carry2 };;
1353{ .mii; st8 [r33]=r26,16
1354(p6) add carry1=1,carry1 }
1355
1356{ .mii; getf.sig r24=f74
1357 add r17=r17,r16
1358 mov carry2=0 };;
1359{ .mii; cmp.ltu p7,p0=r17,r16
1360 add r17=r17,carry1 };;
1361
1362{ .mii; (p7) add carry2=1,carry2
1363 cmp.ltu p7,p0=r17,carry1};;
1364{ .mii; st8 [r32]=r17,16
1365 (p7) add carry2=1,carry2 };;
1366
1367{ .mii; add r24=r24,carry2 };;
1368{ .mii; st8 [r33]=r24 }
1369
1370{ .mib; rum 1<<5 // clear um.mfh
1371 br.ret.sptk.many b0 };;
1372.endp bn_mul_comba4#
1373#undef carry2
1374#undef carry1
1375#endif
1376
1377#if 1
1378//
1379// BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
1380//
1381// In the nutshell it's a port of my MIPS III/IV implementation.
1382//
1383#define AT r14
1384#define H r16
1385#define HH r20
1386#define L r17
1387#define D r18
1388#define DH r22
1389#define I r21
1390
1391#if 0
1392// Some preprocessors (most notably HP-UX) appear to be allergic to
1393// macros enclosed to parenthesis [as these three were].
1394#define cont p16
1395#define break p0 // p20
1396#define equ p24
1397#else
1398cont=p16
1399break=p0
1400equ=p24
1401#endif
1402
1403.global abort#
1404.global bn_div_words#
1405.proc bn_div_words#
1406.align 64
1407bn_div_words:
1408 .prologue
1409 .save ar.pfs,r2
1410{ .mii; alloc r2=ar.pfs,3,5,0,8
1411 .save b0,r3
1412 mov r3=b0
1413 .save pr,r10
1414 mov r10=pr };;
1415{ .mmb; cmp.eq p6,p0=r34,r0
1416 mov r8=-1
1417(p6) br.ret.spnt.many b0 };;
1418
1419 .body
1420{ .mii; mov H=r32 // save h
1421 mov ar.ec=0 // don't rotate at exit
1422 mov pr.rot=0 }
1423{ .mii; mov L=r33 // save l
1424 mov r36=r0 };;
1425
1426.L_divw_shift: // -vv- note signed comparison
1427{ .mfi; (p0) cmp.lt p16,p0=r0,r34 // d
1428 (p0) shladd r33=r34,1,r0 }
1429{ .mfb; (p0) add r35=1,r36
1430 (p0) nop.f 0x0
1431(p16) br.wtop.dpnt .L_divw_shift };;
1432
1433{ .mii; mov D=r34
1434 shr.u DH=r34,32
1435 sub r35=64,r36 };;
1436{ .mii; setf.sig f7=DH
1437 shr.u AT=H,r35
1438 mov I=r36 };;
1439{ .mib; cmp.ne p6,p0=r0,AT
1440 shl H=H,r36
1441(p6) br.call.spnt.clr b0=abort };; // overflow, die...
1442
1443{ .mfi; fcvt.xuf.s1 f7=f7
1444 shr.u AT=L,r35 };;
1445{ .mii; shl L=L,r36
1446 or H=H,AT };;
1447
1448{ .mii; nop.m 0x0
1449 cmp.leu p6,p0=D,H;;
1450(p6) sub H=H,D }
1451
1452{ .mlx; setf.sig f14=D
1453 movl AT=0xffffffff };;
1454///////////////////////////////////////////////////////////
1455{ .mii; setf.sig f6=H
1456 shr.u HH=H,32;;
1457 cmp.eq p6,p7=HH,DH };;
1458{ .mfb;
1459(p6) setf.sig f8=AT
1460(p7) fcvt.xuf.s1 f6=f6
1461(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1462
1463{ .mfi; getf.sig r33=f8 // q
1464 xmpy.lu f9=f8,f14 }
1465{ .mfi; xmpy.hu f10=f8,f14
1466 shrp H=H,L,32 };;
1467
1468{ .mmi; getf.sig r35=f9 // tl
1469 getf.sig r31=f10 };; // th
1470
1471.L_divw_1st_iter:
1472{ .mii; (p0) add r32=-1,r33
1473 (p0) cmp.eq equ,cont=HH,r31 };;
1474{ .mii; (p0) cmp.ltu p8,p0=r35,D
1475 (p0) sub r34=r35,D
1476 (equ) cmp.leu break,cont=r35,H };;
1477{ .mib; (cont) cmp.leu cont,break=HH,r31
1478 (p8) add r31=-1,r31
1479(cont) br.wtop.spnt .L_divw_1st_iter };;
1480///////////////////////////////////////////////////////////
1481{ .mii; sub H=H,r35
1482 shl r8=r33,32
1483 shl L=L,32 };;
1484///////////////////////////////////////////////////////////
1485{ .mii; setf.sig f6=H
1486 shr.u HH=H,32;;
1487 cmp.eq p6,p7=HH,DH };;
1488{ .mfb;
1489(p6) setf.sig f8=AT
1490(p7) fcvt.xuf.s1 f6=f6
1491(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1492
1493{ .mfi; getf.sig r33=f8 // q
1494 xmpy.lu f9=f8,f14 }
1495{ .mfi; xmpy.hu f10=f8,f14
1496 shrp H=H,L,32 };;
1497
1498{ .mmi; getf.sig r35=f9 // tl
1499 getf.sig r31=f10 };; // th
1500
1501.L_divw_2nd_iter:
1502{ .mii; (p0) add r32=-1,r33
1503 (p0) cmp.eq equ,cont=HH,r31 };;
1504{ .mii; (p0) cmp.ltu p8,p0=r35,D
1505 (p0) sub r34=r35,D
1506 (equ) cmp.leu break,cont=r35,H };;
1507{ .mib; (cont) cmp.leu cont,break=HH,r31
1508 (p8) add r31=-1,r31
1509(cont) br.wtop.spnt .L_divw_2nd_iter };;
1510///////////////////////////////////////////////////////////
1511{ .mii; sub H=H,r35
1512 or r8=r8,r33
1513 mov ar.pfs=r2 };;
1514{ .mii; shr.u r9=H,I // remainder if anybody wants it
1515 mov pr=r10,0x1ffff }
1516{ .mfb; br.ret.sptk.many b0 };;
1517
1518// Unsigned 64 by 32 (well, by 64 for the moment) bit integer division
1519// procedure.
1520//
1521// inputs: f6 = (double)a, f7 = (double)b
1522// output: f8 = (int)(a/b)
1523// clobbered: f8,f9,f10,f11,pred
1524pred=p15
1525// One can argue that this snippet is copyrighted to Intel
1526// Corporation, as it's essentially identical to one of those
1527// found in "Divide, Square Root and Remainder" section at
1528// http://www.intel.com/software/products/opensource/libraries/num.htm.
1529// Yes, I admit that the referred code was used as template,
1530// but after I realized that there hardly is any other instruction
1531// sequence which would perform this operation. I mean I figure that
1532// any independent attempt to implement high-performance division
1533// will result in code virtually identical to the Intel code. It
1534// should be noted though that below division kernel is 1 cycle
1535// faster than Intel one (note commented splits:-), not to mention
1536// original prologue (rather lack of one) and epilogue.
1537.align 32
1538.skip 16
1539.L_udiv64_32_b6:
1540 frcpa.s1 f8,pred=f6,f7;; // [0] y0 = 1 / b
1541
1542(pred) fnma.s1 f9=f7,f8,f1 // [5] e0 = 1 - b * y0
1543(pred) fmpy.s1 f10=f6,f8;; // [5] q0 = a * y0
1544(pred) fmpy.s1 f11=f9,f9 // [10] e1 = e0 * e0
1545(pred) fma.s1 f10=f9,f10,f10;; // [10] q1 = q0 + e0 * q0
1546(pred) fma.s1 f8=f9,f8,f8 //;; // [15] y1 = y0 + e0 * y0
1547(pred) fma.s1 f9=f11,f10,f10;; // [15] q2 = q1 + e1 * q1
1548(pred) fma.s1 f8=f11,f8,f8 //;; // [20] y2 = y1 + e1 * y1
1549(pred) fnma.s1 f10=f7,f9,f6;; // [20] r2 = a - b * q2
1550(pred) fma.s1 f8=f10,f8,f9;; // [25] q3 = q2 + r2 * y2
1551
1552 fcvt.fxu.trunc.s1 f8=f8 // [30] q = trunc(q3)
1553 br.ret.sptk.many b6;;
1554.endp bn_div_words#
1555#endif
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
deleted file mode 100644
index f3b16290eb..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,1618 +0,0 @@
1;
2; PA-RISC 2.0 implementation of bn_asm code, based on the
3; 64-bit version of the code. This code is effectively the
4; same as the 64-bit version except the register model is
5; slightly different given all values must be 32-bit between
6; function calls. Thus the 64-bit return values are returned
7; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
8;
9;
10; This code is approximately 2x faster than the C version
11; for RSA/DSA.
12;
13; See http://devresource.hp.com/ for more details on the PA-RISC
14; architecture. Also see the book "PA-RISC 2.0 Architecture"
15; by Gerry Kane for information on the instruction set architecture.
16;
17; Code written by Chris Ruemmler (with some help from the HP C
18; compiler).
19;
20; The code compiles with HP's assembler
21;
22
23 .level 2.0N
24 .space $TEXT$
25 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
26
27;
28; Global Register definitions used for the routines.
29;
30; Some information about HP's runtime architecture for 32-bits.
31;
32; "Caller save" means the calling function must save the register
33; if it wants the register to be preserved.
34; "Callee save" means if a function uses the register, it must save
35; the value before using it.
36;
37; For the floating point registers
38;
39; "caller save" registers: fr4-fr11, fr22-fr31
40; "callee save" registers: fr12-fr21
41; "special" registers: fr0-fr3 (status and exception registers)
42;
43; For the integer registers
44; value zero : r0
45; "caller save" registers: r1,r19-r26
46; "callee save" registers: r3-r18
47; return register : r2 (rp)
48; return values ; r28,r29 (ret0,ret1)
49; Stack pointer ; r30 (sp)
50; millicode return ptr ; r31 (also a caller save register)
51
52
53;
54; Arguments to the routines
55;
56r_ptr .reg %r26
57a_ptr .reg %r25
58b_ptr .reg %r24
59num .reg %r24
60n .reg %r23
61
62;
63; Note that the "w" argument for bn_mul_add_words and bn_mul_words
64; is passed on the stack at a delta of -56 from the top of stack
65; as the routine is entered.
66;
67
68;
69; Globals used in some routines
70;
71
72top_overflow .reg %r23
73high_mask .reg %r22 ; value 0xffffffff80000000L
74
75
76;------------------------------------------------------------------------------
77;
78; bn_mul_add_words
79;
80;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
81; int num, BN_ULONG w)
82;
83; arg0 = r_ptr
84; arg1 = a_ptr
85; arg3 = num
86; -56(sp) = w
87;
88; Local register definitions
89;
90
91fm1 .reg %fr22
92fm .reg %fr23
93ht_temp .reg %fr24
94ht_temp_1 .reg %fr25
95lt_temp .reg %fr26
96lt_temp_1 .reg %fr27
97fm1_1 .reg %fr28
98fm_1 .reg %fr29
99
100fw_h .reg %fr7L
101fw_l .reg %fr7R
102fw .reg %fr7
103
104fht_0 .reg %fr8L
105flt_0 .reg %fr8R
106t_float_0 .reg %fr8
107
108fht_1 .reg %fr9L
109flt_1 .reg %fr9R
110t_float_1 .reg %fr9
111
112tmp_0 .reg %r31
113tmp_1 .reg %r21
114m_0 .reg %r20
115m_1 .reg %r19
116ht_0 .reg %r1
117ht_1 .reg %r3
118lt_0 .reg %r4
119lt_1 .reg %r5
120m1_0 .reg %r6
121m1_1 .reg %r7
122rp_val .reg %r8
123rp_val_1 .reg %r9
124
125bn_mul_add_words
126 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
127 .proc
128 .callinfo frame=128
129 .entry
130 .align 64
131
132 STD %r3,0(%sp) ; save r3
133 STD %r4,8(%sp) ; save r4
134 NOP ; Needed to make the loop 16-byte aligned
135 NOP ; needed to make the loop 16-byte aligned
136
137 STD %r5,16(%sp) ; save r5
138 NOP
139 STD %r6,24(%sp) ; save r6
140 STD %r7,32(%sp) ; save r7
141
142 STD %r8,40(%sp) ; save r8
143 STD %r9,48(%sp) ; save r9
144 COPY %r0,%ret1 ; return 0 by default
145 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
146
147 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
148 LDO 128(%sp),%sp ; bump stack
149
150 ;
151 ; The loop is unrolled twice, so if there is only 1 number
152 ; then go straight to the cleanup code.
153 ;
154 CMPIB,= 1,num,bn_mul_add_words_single_top
155 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
156
157 ;
158 ; This loop is unrolled 2 times (64-byte aligned as well)
159 ;
160 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
161 ; two 32-bit mutiplies can be issued per cycle.
162 ;
163bn_mul_add_words_unroll2
164
165 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
166 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
167 LDD 0(r_ptr),rp_val ; rp[0]
168 LDD 8(r_ptr),rp_val_1 ; rp[1]
169
170 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
171 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
172 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
173 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
174
175 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
176 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
177 FSTD fm,-8(%sp) ; -8(sp) = m[0]
178 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
179
180 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
181 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
182 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
183 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
184
185 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
186 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
187 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
188 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
189
190 LDD -8(%sp),m_0 ; m[0]
191 LDD -40(%sp),m_1 ; m[1]
192 LDD -16(%sp),m1_0 ; m1[0]
193 LDD -48(%sp),m1_1 ; m1[1]
194
195 LDD -24(%sp),ht_0 ; ht[0]
196 LDD -56(%sp),ht_1 ; ht[1]
197 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
198 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
199
200 LDD -32(%sp),lt_0
201 LDD -64(%sp),lt_1
202 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
203 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
204
205 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
206 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
207 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
208 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
209
210 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
211 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
212 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
213 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
214
215 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
216 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
217 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
218 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
219
220 ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
221 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
222 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
223 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
224
225 LDO -2(num),num ; num = num - 2;
226 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
227 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
228 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
229
230 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
231 ADD,DC ht_1,%r0,%ret1 ; ht[1]++
232 LDO 16(a_ptr),a_ptr ; a_ptr += 2
233
234 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
235 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
236 LDO 16(r_ptr),r_ptr ; r_ptr += 2
237
238 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
239
240 ;
241 ; Top of loop aligned on 64-byte boundary
242 ;
243bn_mul_add_words_single_top
244 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
245 LDD 0(r_ptr),rp_val ; rp[0]
246 LDO 8(a_ptr),a_ptr ; a_ptr++
247 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
248 FSTD fm1,-16(%sp) ; -16(sp) = m1
249 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
250 FSTD fm,-8(%sp) ; -8(sp) = m
251 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
252 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
253 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
254 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
255
256 LDD -8(%sp),m_0
257 LDD -16(%sp),m1_0 ; m1 = temp1
258 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
259 LDD -24(%sp),ht_0
260 LDD -32(%sp),lt_0
261
262 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
263 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
264
265 EXTRD,U tmp_0,31,32,m_0 ; m>>32
266 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
267
268 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
269 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
270 ADD,DC ht_0,%r0,ht_0 ; ht++
271 ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
272 ADD,DC ht_0,%r0,ht_0 ; ht++
273 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
274 ADD,DC ht_0,%r0,%ret1 ; ht++
275 STD lt_0,0(r_ptr) ; rp[0] = lt
276
277bn_mul_add_words_exit
278 .EXIT
279
280 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
281 LDD -80(%sp),%r9 ; restore r9
282 LDD -88(%sp),%r8 ; restore r8
283 LDD -96(%sp),%r7 ; restore r7
284 LDD -104(%sp),%r6 ; restore r6
285 LDD -112(%sp),%r5 ; restore r5
286 LDD -120(%sp),%r4 ; restore r4
287 BVE (%rp)
288 LDD,MB -128(%sp),%r3 ; restore r3
289 .PROCEND ;in=23,24,25,26,29;out=28;
290
291;----------------------------------------------------------------------------
292;
293;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
294;
295; arg0 = rp
296; arg1 = ap
297; arg3 = num
298; w on stack at -56(sp)
299
300bn_mul_words
301 .proc
302 .callinfo frame=128
303 .entry
304 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
305 .align 64
306
307 STD %r3,0(%sp) ; save r3
308 STD %r4,8(%sp) ; save r4
309 NOP
310 STD %r5,16(%sp) ; save r5
311
312 STD %r6,24(%sp) ; save r6
313 STD %r7,32(%sp) ; save r7
314 COPY %r0,%ret1 ; return 0 by default
315 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
316
317 CMPIB,>= 0,num,bn_mul_words_exit
318 LDO 128(%sp),%sp ; bump stack
319
320 ;
321 ; See if only 1 word to do, thus just do cleanup
322 ;
323 CMPIB,= 1,num,bn_mul_words_single_top
324 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
325
326 ;
327 ; This loop is unrolled 2 times (64-byte aligned as well)
328 ;
329 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
330 ; two 32-bit mutiplies can be issued per cycle.
331 ;
332bn_mul_words_unroll2
333
334 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
335 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
336 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
337 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
338
339 FSTD fm1,-16(%sp) ; -16(sp) = m1
340 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
341 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
342 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
343
344 FSTD fm,-8(%sp) ; -8(sp) = m
345 FSTD fm_1,-40(%sp) ; -40(sp) = m
346 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
347 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
348
349 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
350 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
351 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
352 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
353
354 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
355 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
356 LDD -8(%sp),m_0
357 LDD -40(%sp),m_1
358
359 LDD -16(%sp),m1_0
360 LDD -48(%sp),m1_1
361 LDD -24(%sp),ht_0
362 LDD -56(%sp),ht_1
363
364 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
365 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
366 LDD -32(%sp),lt_0
367 LDD -64(%sp),lt_1
368
369 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
370 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
371 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
372 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
373
374 EXTRD,U tmp_0,31,32,m_0 ; m>>32
375 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
376 EXTRD,U tmp_1,31,32,m_1 ; m>>32
377 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
378
379 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
380 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
381 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
382 ADD,DC ht_0,%r0,ht_0 ; ht++
383
384 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
385 ADD,DC ht_1,%r0,ht_1 ; ht++
386 ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
387 ADD,DC ht_0,%r0,ht_0 ; ht++
388
389 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
390 ADD,DC ht_1,%r0,ht_1 ; ht++
391 STD lt_0,0(r_ptr) ; rp[0] = lt
392 STD lt_1,8(r_ptr) ; rp[1] = lt
393
394 COPY ht_1,%ret1 ; carry = ht
395 LDO -2(num),num ; num = num - 2;
396 LDO 16(a_ptr),a_ptr ; ap += 2
397 CMPIB,<= 2,num,bn_mul_words_unroll2
398 LDO 16(r_ptr),r_ptr ; rp++
399
400 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
401
402 ;
403 ; Top of loop aligned on 64-byte boundary
404 ;
405bn_mul_words_single_top
406 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
407
408 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
409 FSTD fm1,-16(%sp) ; -16(sp) = m1
410 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
411 FSTD fm,-8(%sp) ; -8(sp) = m
412 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
413 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
414 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
415 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
416
417 LDD -8(%sp),m_0
418 LDD -16(%sp),m1_0
419 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
420 LDD -24(%sp),ht_0
421 LDD -32(%sp),lt_0
422
423 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
424 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
425
426 EXTRD,U tmp_0,31,32,m_0 ; m>>32
427 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
428
429 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
430 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
431 ADD,DC ht_0,%r0,ht_0 ; ht++
432
433 ADD %ret1,lt_0,lt_0 ; lt = lt + c;
434 ADD,DC ht_0,%r0,ht_0 ; ht++
435
436 COPY ht_0,%ret1 ; copy carry
437 STD lt_0,0(r_ptr) ; rp[0] = lt
438
439bn_mul_words_exit
440 .EXIT
441 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
442 LDD -96(%sp),%r7 ; restore r7
443 LDD -104(%sp),%r6 ; restore r6
444 LDD -112(%sp),%r5 ; restore r5
445 LDD -120(%sp),%r4 ; restore r4
446 BVE (%rp)
447 LDD,MB -128(%sp),%r3 ; restore r3
448 .PROCEND
449
450;----------------------------------------------------------------------------
451;
452;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
453;
454; arg0 = rp
455; arg1 = ap
456; arg2 = num
457;
458
459bn_sqr_words
460 .proc
461 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
462 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
463 .entry
464 .align 64
465
466 STD %r3,0(%sp) ; save r3
467 STD %r4,8(%sp) ; save r4
468 NOP
469 STD %r5,16(%sp) ; save r5
470
471 CMPIB,>= 0,num,bn_sqr_words_exit
472 LDO 128(%sp),%sp ; bump stack
473
474 ;
475 ; If only 1, the goto straight to cleanup
476 ;
477 CMPIB,= 1,num,bn_sqr_words_single_top
478 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
479
480 ;
481 ; This loop is unrolled 2 times (64-byte aligned as well)
482 ;
483
484bn_sqr_words_unroll2
485 FLDD 0(a_ptr),t_float_0 ; a[0]
486 FLDD 8(a_ptr),t_float_1 ; a[1]
487 XMPYU fht_0,flt_0,fm ; m[0]
488 XMPYU fht_1,flt_1,fm_1 ; m[1]
489
490 FSTD fm,-24(%sp) ; store m[0]
491 FSTD fm_1,-56(%sp) ; store m[1]
492 XMPYU flt_0,flt_0,lt_temp ; lt[0]
493 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
494
495 FSTD lt_temp,-16(%sp) ; store lt[0]
496 FSTD lt_temp_1,-48(%sp) ; store lt[1]
497 XMPYU fht_0,fht_0,ht_temp ; ht[0]
498 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
499
500 FSTD ht_temp,-8(%sp) ; store ht[0]
501 FSTD ht_temp_1,-40(%sp) ; store ht[1]
502 LDD -24(%sp),m_0
503 LDD -56(%sp),m_1
504
505 AND m_0,high_mask,tmp_0 ; m[0] & Mask
506 AND m_1,high_mask,tmp_1 ; m[1] & Mask
507 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
508 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
509
510 LDD -16(%sp),lt_0
511 LDD -48(%sp),lt_1
512 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
513 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
514
515 LDD -8(%sp),ht_0
516 LDD -40(%sp),ht_1
517 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
518 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
519
520 ADD lt_0,m_0,lt_0 ; lt = lt+m
521 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
522 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
523 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
524
525 ADD lt_1,m_1,lt_1 ; lt = lt+m
526 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
527 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
528 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
529
530 LDO -2(num),num ; num = num - 2;
531 LDO 16(a_ptr),a_ptr ; ap += 2
532 CMPIB,<= 2,num,bn_sqr_words_unroll2
533 LDO 32(r_ptr),r_ptr ; rp += 4
534
535 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
536
537 ;
538 ; Top of loop aligned on 64-byte boundary
539 ;
540bn_sqr_words_single_top
541 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
542
543 XMPYU fht_0,flt_0,fm ; m
544 FSTD fm,-24(%sp) ; store m
545
546 XMPYU flt_0,flt_0,lt_temp ; lt
547 FSTD lt_temp,-16(%sp) ; store lt
548
549 XMPYU fht_0,fht_0,ht_temp ; ht
550 FSTD ht_temp,-8(%sp) ; store ht
551
552 LDD -24(%sp),m_0 ; load m
553 AND m_0,high_mask,tmp_0 ; m & Mask
554 DEPD,Z m_0,30,31,m_0 ; m << 32+1
555 LDD -16(%sp),lt_0 ; lt
556
557 LDD -8(%sp),ht_0 ; ht
558 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
559 ADD m_0,lt_0,lt_0 ; lt = lt+m
560 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
561 ADD,DC ht_0,%r0,ht_0 ; ht++
562
563 STD lt_0,0(r_ptr) ; rp[0] = lt
564 STD ht_0,8(r_ptr) ; rp[1] = ht
565
566bn_sqr_words_exit
567 .EXIT
568 LDD -112(%sp),%r5 ; restore r5
569 LDD -120(%sp),%r4 ; restore r4
570 BVE (%rp)
571 LDD,MB -128(%sp),%r3
572 .PROCEND ;in=23,24,25,26,29;out=28;
573
574
575;----------------------------------------------------------------------------
576;
577;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
578;
579; arg0 = rp
580; arg1 = ap
581; arg2 = bp
582; arg3 = n
583
584t .reg %r22
585b .reg %r21
586l .reg %r20
587
588bn_add_words
589 .proc
590 .entry
591 .callinfo
592 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
593 .align 64
594
595 CMPIB,>= 0,n,bn_add_words_exit
596 COPY %r0,%ret1 ; return 0 by default
597
598 ;
599 ; If 2 or more numbers do the loop
600 ;
601 CMPIB,= 1,n,bn_add_words_single_top
602 NOP
603
604 ;
605 ; This loop is unrolled 2 times (64-byte aligned as well)
606 ;
607bn_add_words_unroll2
608 LDD 0(a_ptr),t
609 LDD 0(b_ptr),b
610 ADD t,%ret1,t ; t = t+c;
611 ADD,DC %r0,%r0,%ret1 ; set c to carry
612 ADD t,b,l ; l = t + b[0]
613 ADD,DC %ret1,%r0,%ret1 ; c+= carry
614 STD l,0(r_ptr)
615
616 LDD 8(a_ptr),t
617 LDD 8(b_ptr),b
618 ADD t,%ret1,t ; t = t+c;
619 ADD,DC %r0,%r0,%ret1 ; set c to carry
620 ADD t,b,l ; l = t + b[0]
621 ADD,DC %ret1,%r0,%ret1 ; c+= carry
622 STD l,8(r_ptr)
623
624 LDO -2(n),n
625 LDO 16(a_ptr),a_ptr
626 LDO 16(b_ptr),b_ptr
627
628 CMPIB,<= 2,n,bn_add_words_unroll2
629 LDO 16(r_ptr),r_ptr
630
631 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
632
633bn_add_words_single_top
634 LDD 0(a_ptr),t
635 LDD 0(b_ptr),b
636
637 ADD t,%ret1,t ; t = t+c;
638 ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
639 ADD t,b,l ; l = t + b[0]
640 ADD,DC %ret1,%r0,%ret1 ; c+= carry
641 STD l,0(r_ptr)
642
643bn_add_words_exit
644 .EXIT
645 BVE (%rp)
646 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
647 .PROCEND ;in=23,24,25,26,29;out=28;
648
649;----------------------------------------------------------------------------
650;
651;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
652;
653; arg0 = rp
654; arg1 = ap
655; arg2 = bp
656; arg3 = n
657
658t1 .reg %r22
659t2 .reg %r21
660sub_tmp1 .reg %r20
661sub_tmp2 .reg %r19
662
663
664bn_sub_words
665 .proc
666 .callinfo
667 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
668 .entry
669 .align 64
670
671 CMPIB,>= 0,n,bn_sub_words_exit
672 COPY %r0,%ret1 ; return 0 by default
673
674 ;
675 ; If 2 or more numbers do the loop
676 ;
677 CMPIB,= 1,n,bn_sub_words_single_top
678 NOP
679
680 ;
681 ; This loop is unrolled 2 times (64-byte aligned as well)
682 ;
683bn_sub_words_unroll2
684 LDD 0(a_ptr),t1
685 LDD 0(b_ptr),t2
686 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
687 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
688
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret1
694 STD sub_tmp1,0(r_ptr)
695
696 LDD 8(a_ptr),t1
697 LDD 8(b_ptr),t2
698 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
699 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
700 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
701 LDO 1(%r0),sub_tmp2
702
703 CMPCLR,*= t1,t2,%r0
704 COPY sub_tmp2,%ret1
705 STD sub_tmp1,8(r_ptr)
706
707 LDO -2(n),n
708 LDO 16(a_ptr),a_ptr
709 LDO 16(b_ptr),b_ptr
710
711 CMPIB,<= 2,n,bn_sub_words_unroll2
712 LDO 16(r_ptr),r_ptr
713
714 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
715
716bn_sub_words_single_top
717 LDD 0(a_ptr),t1
718 LDD 0(b_ptr),t2
719 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
720 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
721 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
722 LDO 1(%r0),sub_tmp2
723
724 CMPCLR,*= t1,t2,%r0
725 COPY sub_tmp2,%ret1
726
727 STD sub_tmp1,0(r_ptr)
728
729bn_sub_words_exit
730 .EXIT
731 BVE (%rp)
732 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
733 .PROCEND ;in=23,24,25,26,29;out=28;
734
735;------------------------------------------------------------------------------
736;
737; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
738;
739; arg0 = h
740; arg1 = l
741; arg2 = d
742;
743; This is mainly just output from the HP C compiler.
744;
745;------------------------------------------------------------------------------
746bn_div_words
747 .PROC
748 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
749 .IMPORT BN_num_bits_word,CODE
750 ;--- not PIC .IMPORT __iob,DATA
751 ;--- not PIC .IMPORT fprintf,CODE
752 .IMPORT abort,CODE
753 .IMPORT $$div2U,MILLICODE
754 .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
755 .ENTRY
756 STW %r2,-20(%r30) ;offset 0x8ec
757 STW,MA %r3,192(%r30) ;offset 0x8f0
758 STW %r4,-188(%r30) ;offset 0x8f4
759 DEPD %r5,31,32,%r6 ;offset 0x8f8
760 STD %r6,-184(%r30) ;offset 0x8fc
761 DEPD %r7,31,32,%r8 ;offset 0x900
762 STD %r8,-176(%r30) ;offset 0x904
763 STW %r9,-168(%r30) ;offset 0x908
764 LDD -248(%r30),%r3 ;offset 0x90c
765 COPY %r26,%r4 ;offset 0x910
766 COPY %r24,%r5 ;offset 0x914
767 DEPD %r25,31,32,%r4 ;offset 0x918
768 CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
769 DEPD %r23,31,32,%r5 ;offset 0x920
770 MOVIB,TR -1,%r29,$00060002 ;offset 0x924
771 EXTRD,U %r29,31,32,%r28 ;offset 0x928
772$0006002A
773 LDO -1(%r29),%r29 ;offset 0x92c
774 SUB %r23,%r7,%r23 ;offset 0x930
775$00060024
776 SUB %r4,%r31,%r25 ;offset 0x934
777 AND %r25,%r19,%r26 ;offset 0x938
778 CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
779 DEPD,Z %r25,31,32,%r20 ;offset 0x940
780 OR %r20,%r24,%r21 ;offset 0x944
781 CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
782 SUB %r31,%r2,%r31 ;offset 0x94c
783$00060046
784$0006002E
785 DEPD,Z %r23,31,32,%r25 ;offset 0x950
786 EXTRD,U %r23,31,32,%r26 ;offset 0x954
787 AND %r25,%r19,%r24 ;offset 0x958
788 ADD,L %r31,%r26,%r31 ;offset 0x95c
789 CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
790 LDO 1(%r31),%r31 ;offset 0x964
791$00060032
792 CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
793 LDO -1(%r29),%r29 ;offset 0x96c
794 ADD,L %r4,%r3,%r4 ;offset 0x970
795$00060036
796 ADDIB,=,N -1,%r8,$D0 ;offset 0x974
797 SUB %r5,%r24,%r28 ;offset 0x978
798$0006003A
799 SUB %r4,%r31,%r24 ;offset 0x97c
800 SHRPD %r24,%r28,32,%r4 ;offset 0x980
801 DEPD,Z %r29,31,32,%r9 ;offset 0x984
802 DEPD,Z %r28,31,32,%r5 ;offset 0x988
803$0006001C
804 EXTRD,U %r4,31,32,%r31 ;offset 0x98c
805 CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
806 MOVB,TR %r6,%r29,$D1 ;offset 0x994
807 STD %r29,-152(%r30) ;offset 0x998
808$0006000C
809 EXTRD,U %r3,31,32,%r25 ;offset 0x99c
810 COPY %r3,%r26 ;offset 0x9a0
811 EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
812 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
813 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
814 B,L BN_num_bits_word,%r2 ;offset 0x9ac
815 EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
816 LDI 64,%r20 ;offset 0x9b4
817 DEPD %r7,31,32,%r5 ;offset 0x9b8
818 DEPD %r8,31,32,%r4 ;offset 0x9bc
819 DEPD %r9,31,32,%r3 ;offset 0x9c0
820 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
821 COPY %r28,%r24 ;offset 0x9c8
822 MTSARCM %r24 ;offset 0x9cc
823 DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
824 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
825$00060012
826 SUBI 64,%r24,%r31 ;offset 0x9d8
827 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
828 SUB %r4,%r3,%r4 ;offset 0x9e0
829$00060016
830 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
831 COPY %r0,%r9 ;offset 0x9e8
832 MTSARCM %r31 ;offset 0x9ec
833 DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
834 SUBI 64,%r31,%r26 ;offset 0x9f4
835 MTSAR %r26 ;offset 0x9f8
836 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
837 MTSARCM %r31 ;offset 0xa00
838 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
839$0006001A
840 DEPDI,Z -1,31,32,%r19 ;offset 0xa08
841 AND %r3,%r19,%r29 ;offset 0xa0c
842 EXTRD,U %r29,31,32,%r2 ;offset 0xa10
843 DEPDI,Z -1,63,32,%r6 ;offset 0xa14
844 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
845 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
846$D2
847 ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
848 ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24
849 ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
850 ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
851 ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c
852 ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30
853 .CALL ;
854 B,L abort,%r2 ;offset 0xa34
855 NOP ;offset 0xa38
856 B $D3 ;offset 0xa3c
857 LDW -212(%r30),%r2 ;offset 0xa40
858$00060020
859 COPY %r4,%r26 ;offset 0xa44
860 EXTRD,U %r4,31,32,%r25 ;offset 0xa48
861 COPY %r2,%r24 ;offset 0xa4c
862 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
863 B,L $$div2U,%r31 ;offset 0xa50
864 EXTRD,U %r2,31,32,%r23 ;offset 0xa54
865 DEPD %r28,31,32,%r29 ;offset 0xa58
866$00060022
867 STD %r29,-152(%r30) ;offset 0xa5c
868$D1
869 AND %r5,%r19,%r24 ;offset 0xa60
870 EXTRD,U %r24,31,32,%r24 ;offset 0xa64
871 STW %r2,-160(%r30) ;offset 0xa68
872 STW %r7,-128(%r30) ;offset 0xa6c
873 FLDD -152(%r30),%fr4 ;offset 0xa70
874 FLDD -152(%r30),%fr7 ;offset 0xa74
875 FLDW -160(%r30),%fr8L ;offset 0xa78
876 FLDW -128(%r30),%fr5L ;offset 0xa7c
877 XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
878 FSTD %fr10,-136(%r30) ;offset 0xa84
879 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
880 FSTD %fr22,-144(%r30) ;offset 0xa8c
881 XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
882 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
883 FSTD %fr11,-112(%r30) ;offset 0xa98
884 FSTD %fr23,-120(%r30) ;offset 0xa9c
885 LDD -136(%r30),%r28 ;offset 0xaa0
886 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
887 LDD -144(%r30),%r20 ;offset 0xaa8
888 ADD,L %r20,%r31,%r31 ;offset 0xaac
889 LDD -112(%r30),%r22 ;offset 0xab0
890 DEPD,Z %r22,31,32,%r22 ;offset 0xab4
891 LDD -120(%r30),%r21 ;offset 0xab8
892 B $00060024 ;offset 0xabc
893 ADD,L %r21,%r22,%r23 ;offset 0xac0
894$D0
895 OR %r9,%r29,%r29 ;offset 0xac4
896$00060040
897 EXTRD,U %r29,31,32,%r28 ;offset 0xac8
898$00060002
899$L2
900 LDW -212(%r30),%r2 ;offset 0xacc
901$D3
902 LDW -168(%r30),%r9 ;offset 0xad0
903 LDD -176(%r30),%r8 ;offset 0xad4
904 EXTRD,U %r8,31,32,%r7 ;offset 0xad8
905 LDD -184(%r30),%r6 ;offset 0xadc
906 EXTRD,U %r6,31,32,%r5 ;offset 0xae0
907 LDW -188(%r30),%r4 ;offset 0xae4
908 BVE (%r2) ;offset 0xae8
909 .EXIT
910 LDW,MB -192(%r30),%r3 ;offset 0xaec
911 .PROCEND ;in=23,25;out=28,29;fpin=105,107;
912
913
914
915
916;----------------------------------------------------------------------------
917;
918; Registers to hold 64-bit values to manipulate. The "L" part
919; of the register corresponds to the upper 32-bits, while the "R"
920; part corresponds to the lower 32-bits
921;
922; Note, that when using b6 and b7, the code must save these before
923; using them because they are callee save registers
924;
925;
926; Floating point registers to use to save values that
927; are manipulated. These don't collide with ftemp1-6 and
928; are all caller save registers
929;
930a0 .reg %fr22
931a0L .reg %fr22L
932a0R .reg %fr22R
933
934a1 .reg %fr23
935a1L .reg %fr23L
936a1R .reg %fr23R
937
938a2 .reg %fr24
939a2L .reg %fr24L
940a2R .reg %fr24R
941
942a3 .reg %fr25
943a3L .reg %fr25L
944a3R .reg %fr25R
945
946a4 .reg %fr26
947a4L .reg %fr26L
948a4R .reg %fr26R
949
950a5 .reg %fr27
951a5L .reg %fr27L
952a5R .reg %fr27R
953
954a6 .reg %fr28
955a6L .reg %fr28L
956a6R .reg %fr28R
957
958a7 .reg %fr29
959a7L .reg %fr29L
960a7R .reg %fr29R
961
962b0 .reg %fr30
963b0L .reg %fr30L
964b0R .reg %fr30R
965
966b1 .reg %fr31
967b1L .reg %fr31L
968b1R .reg %fr31R
969
970;
971; Temporary floating point variables, these are all caller save
972; registers
973;
974ftemp1 .reg %fr4
975ftemp2 .reg %fr5
976ftemp3 .reg %fr6
977ftemp4 .reg %fr7
978
979;
980; The B set of registers when used.
981;
982
983b2 .reg %fr8
984b2L .reg %fr8L
985b2R .reg %fr8R
986
987b3 .reg %fr9
988b3L .reg %fr9L
989b3R .reg %fr9R
990
991b4 .reg %fr10
992b4L .reg %fr10L
993b4R .reg %fr10R
994
995b5 .reg %fr11
996b5L .reg %fr11L
997b5R .reg %fr11R
998
999b6 .reg %fr12
1000b6L .reg %fr12L
1001b6R .reg %fr12R
1002
1003b7 .reg %fr13
1004b7L .reg %fr13L
1005b7R .reg %fr13R
1006
1007c1 .reg %r21 ; only reg
1008temp1 .reg %r20 ; only reg
1009temp2 .reg %r19 ; only reg
1010temp3 .reg %r31 ; only reg
1011
1012m1 .reg %r28
1013c2 .reg %r23
1014high_one .reg %r1
1015ht .reg %r6
1016lt .reg %r5
1017m .reg %r4
1018c3 .reg %r3
1019
1020SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1021 XMPYU A0L,A0R,ftemp1 ; m
1022 FSTD ftemp1,-24(%sp) ; store m
1023
1024 XMPYU A0R,A0R,ftemp2 ; lt
1025 FSTD ftemp2,-16(%sp) ; store lt
1026
1027 XMPYU A0L,A0L,ftemp3 ; ht
1028 FSTD ftemp3,-8(%sp) ; store ht
1029
1030 LDD -24(%sp),m ; load m
1031 AND m,high_mask,temp2 ; m & Mask
1032 DEPD,Z m,30,31,temp3 ; m << 32+1
1033 LDD -16(%sp),lt ; lt
1034
1035 LDD -8(%sp),ht ; ht
1036 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1037 ADD temp3,lt,lt ; lt = lt+m
1038 ADD,L ht,temp1,ht ; ht += temp1
1039 ADD,DC ht,%r0,ht ; ht++
1040
1041 ADD C1,lt,C1 ; c1=c1+lt
1042 ADD,DC ht,%r0,ht ; ht++
1043
1044 ADD C2,ht,C2 ; c2=c2+ht
1045 ADD,DC C3,%r0,C3 ; c3++
1046.endm
1047
1048SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1049 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1050 FSTD ftemp1,-16(%sp) ;
1051 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1052 FSTD ftemp2,-8(%sp) ;
1053 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1054 FSTD ftemp3,-32(%sp)
1055 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1056 FSTD ftemp4,-24(%sp) ;
1057
1058 LDD -8(%sp),m ; r21 = m
1059 LDD -16(%sp),m1 ; r19 = m1
1060 ADD,L m,m1,m ; m+m1
1061
1062 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1063 LDD -24(%sp),ht ; r24 = ht
1064
1065 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1066 ADD,L ht,high_one,ht ; ht+=high_one
1067
1068 EXTRD,U m,31,32,temp1 ; m >> 32
1069 LDD -32(%sp),lt ; lt
1070 ADD,L ht,temp1,ht ; ht+= m>>32
1071 ADD lt,temp3,lt ; lt = lt+m1
1072 ADD,DC ht,%r0,ht ; ht++
1073
1074 ADD ht,ht,ht ; ht=ht+ht;
1075 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1076
1077 ADD lt,lt,lt ; lt=lt+lt;
1078 ADD,DC ht,%r0,ht ; add in carry (ht++)
1079
1080 ADD C1,lt,C1 ; c1=c1+lt
1081 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1082 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1083
1084 ADD C2,ht,C2 ; c2 = c2 + ht
1085 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1086.endm
1087
1088;
1089;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1090; arg0 = r_ptr
1091; arg1 = a_ptr
1092;
1093
1094bn_sqr_comba8
1095 .PROC
1096 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1097 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1098 .ENTRY
1099 .align 64
1100
1101 STD %r3,0(%sp) ; save r3
1102 STD %r4,8(%sp) ; save r4
1103 STD %r5,16(%sp) ; save r5
1104 STD %r6,24(%sp) ; save r6
1105
1106 ;
1107 ; Zero out carries
1108 ;
1109 COPY %r0,c1
1110 COPY %r0,c2
1111 COPY %r0,c3
1112
1113 LDO 128(%sp),%sp ; bump stack
1114 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1115 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1116
1117 ;
1118 ; Load up all of the values we are going to use
1119 ;
1120 FLDD 0(a_ptr),a0
1121 FLDD 8(a_ptr),a1
1122 FLDD 16(a_ptr),a2
1123 FLDD 24(a_ptr),a3
1124 FLDD 32(a_ptr),a4
1125 FLDD 40(a_ptr),a5
1126 FLDD 48(a_ptr),a6
1127 FLDD 56(a_ptr),a7
1128
1129 SQR_ADD_C a0L,a0R,c1,c2,c3
1130 STD c1,0(r_ptr) ; r[0] = c1;
1131 COPY %r0,c1
1132
1133 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1134 STD c2,8(r_ptr) ; r[1] = c2;
1135 COPY %r0,c2
1136
1137 SQR_ADD_C a1L,a1R,c3,c1,c2
1138 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1139 STD c3,16(r_ptr) ; r[2] = c3;
1140 COPY %r0,c3
1141
1142 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1143 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1144 STD c1,24(r_ptr) ; r[3] = c1;
1145 COPY %r0,c1
1146
1147 SQR_ADD_C a2L,a2R,c2,c3,c1
1148 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1149 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1150 STD c2,32(r_ptr) ; r[4] = c2;
1151 COPY %r0,c2
1152
1153 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1154 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1155 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1156 STD c3,40(r_ptr) ; r[5] = c3;
1157 COPY %r0,c3
1158
1159 SQR_ADD_C a3L,a3R,c1,c2,c3
1160 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1161 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1162 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1163 STD c1,48(r_ptr) ; r[6] = c1;
1164 COPY %r0,c1
1165
1166 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1167 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1168 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1169 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1170 STD c2,56(r_ptr) ; r[7] = c2;
1171 COPY %r0,c2
1172
1173 SQR_ADD_C a4L,a4R,c3,c1,c2
1174 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1175 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1176 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1177 STD c3,64(r_ptr) ; r[8] = c3;
1178 COPY %r0,c3
1179
1180 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1181 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1182 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1183 STD c1,72(r_ptr) ; r[9] = c1;
1184 COPY %r0,c1
1185
1186 SQR_ADD_C a5L,a5R,c2,c3,c1
1187 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1188 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1189 STD c2,80(r_ptr) ; r[10] = c2;
1190 COPY %r0,c2
1191
1192 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1193 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1194 STD c3,88(r_ptr) ; r[11] = c3;
1195 COPY %r0,c3
1196
1197 SQR_ADD_C a6L,a6R,c1,c2,c3
1198 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1199 STD c1,96(r_ptr) ; r[12] = c1;
1200 COPY %r0,c1
1201
1202 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1203 STD c2,104(r_ptr) ; r[13] = c2;
1204 COPY %r0,c2
1205
1206 SQR_ADD_C a7L,a7R,c3,c1,c2
1207 STD c3, 112(r_ptr) ; r[14] = c3
1208 STD c1, 120(r_ptr) ; r[15] = c1
1209
1210 .EXIT
1211 LDD -104(%sp),%r6 ; restore r6
1212 LDD -112(%sp),%r5 ; restore r5
1213 LDD -120(%sp),%r4 ; restore r4
1214 BVE (%rp)
1215 LDD,MB -128(%sp),%r3
1216
1217 .PROCEND
1218
1219;-----------------------------------------------------------------------------
1220;
1221;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1222; arg0 = r_ptr
1223; arg1 = a_ptr
1224;
1225
1226bn_sqr_comba4
1227 .proc
1228 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1229 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1230 .entry
1231 .align 64
1232 STD %r3,0(%sp) ; save r3
1233 STD %r4,8(%sp) ; save r4
1234 STD %r5,16(%sp) ; save r5
1235 STD %r6,24(%sp) ; save r6
1236
1237 ;
1238 ; Zero out carries
1239 ;
1240 COPY %r0,c1
1241 COPY %r0,c2
1242 COPY %r0,c3
1243
1244 LDO 128(%sp),%sp ; bump stack
1245 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1246 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1247
1248 ;
1249 ; Load up all of the values we are going to use
1250 ;
1251 FLDD 0(a_ptr),a0
1252 FLDD 8(a_ptr),a1
1253 FLDD 16(a_ptr),a2
1254 FLDD 24(a_ptr),a3
1255 FLDD 32(a_ptr),a4
1256 FLDD 40(a_ptr),a5
1257 FLDD 48(a_ptr),a6
1258 FLDD 56(a_ptr),a7
1259
1260 SQR_ADD_C a0L,a0R,c1,c2,c3
1261
1262 STD c1,0(r_ptr) ; r[0] = c1;
1263 COPY %r0,c1
1264
1265 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1266
1267 STD c2,8(r_ptr) ; r[1] = c2;
1268 COPY %r0,c2
1269
1270 SQR_ADD_C a1L,a1R,c3,c1,c2
1271 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1272
1273 STD c3,16(r_ptr) ; r[2] = c3;
1274 COPY %r0,c3
1275
1276 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1277 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1278
1279 STD c1,24(r_ptr) ; r[3] = c1;
1280 COPY %r0,c1
1281
1282 SQR_ADD_C a2L,a2R,c2,c3,c1
1283 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1284
1285 STD c2,32(r_ptr) ; r[4] = c2;
1286 COPY %r0,c2
1287
1288 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1289 STD c3,40(r_ptr) ; r[5] = c3;
1290 COPY %r0,c3
1291
1292 SQR_ADD_C a3L,a3R,c1,c2,c3
1293 STD c1,48(r_ptr) ; r[6] = c1;
1294 STD c2,56(r_ptr) ; r[7] = c2;
1295
1296 .EXIT
1297 LDD -104(%sp),%r6 ; restore r6
1298 LDD -112(%sp),%r5 ; restore r5
1299 LDD -120(%sp),%r4 ; restore r4
1300 BVE (%rp)
1301 LDD,MB -128(%sp),%r3
1302
1303 .PROCEND
1304
1305
1306;---------------------------------------------------------------------------
1307
1308MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1309 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1310 FSTD ftemp1,-16(%sp) ;
1311 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1312 FSTD ftemp2,-8(%sp) ;
1313 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1314 FSTD ftemp3,-32(%sp)
1315 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1316 FSTD ftemp4,-24(%sp) ;
1317
1318 LDD -8(%sp),m ; r21 = m
1319 LDD -16(%sp),m1 ; r19 = m1
1320 ADD,L m,m1,m ; m+m1
1321
1322 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1323 LDD -24(%sp),ht ; r24 = ht
1324
1325 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1326 ADD,L ht,high_one,ht ; ht+=high_one
1327
1328 EXTRD,U m,31,32,temp1 ; m >> 32
1329 LDD -32(%sp),lt ; lt
1330 ADD,L ht,temp1,ht ; ht+= m>>32
1331 ADD lt,temp3,lt ; lt = lt+m1
1332 ADD,DC ht,%r0,ht ; ht++
1333
1334 ADD C1,lt,C1 ; c1=c1+lt
1335 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1336
1337 ADD C2,ht,C2 ; c2 = c2 + ht
1338 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1339.endm
1340
1341
1342;
1343;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1344; arg0 = r_ptr
1345; arg1 = a_ptr
1346; arg2 = b_ptr
1347;
1348
1349bn_mul_comba8
1350 .proc
1351 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1352 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1353 .entry
1354 .align 64
1355
1356 STD %r3,0(%sp) ; save r3
1357 STD %r4,8(%sp) ; save r4
1358 STD %r5,16(%sp) ; save r5
1359 STD %r6,24(%sp) ; save r6
1360 FSTD %fr12,32(%sp) ; save r6
1361 FSTD %fr13,40(%sp) ; save r7
1362
1363 ;
1364 ; Zero out carries
1365 ;
1366 COPY %r0,c1
1367 COPY %r0,c2
1368 COPY %r0,c3
1369
1370 LDO 128(%sp),%sp ; bump stack
1371 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1372
1373 ;
1374 ; Load up all of the values we are going to use
1375 ;
1376 FLDD 0(a_ptr),a0
1377 FLDD 8(a_ptr),a1
1378 FLDD 16(a_ptr),a2
1379 FLDD 24(a_ptr),a3
1380 FLDD 32(a_ptr),a4
1381 FLDD 40(a_ptr),a5
1382 FLDD 48(a_ptr),a6
1383 FLDD 56(a_ptr),a7
1384
1385 FLDD 0(b_ptr),b0
1386 FLDD 8(b_ptr),b1
1387 FLDD 16(b_ptr),b2
1388 FLDD 24(b_ptr),b3
1389 FLDD 32(b_ptr),b4
1390 FLDD 40(b_ptr),b5
1391 FLDD 48(b_ptr),b6
1392 FLDD 56(b_ptr),b7
1393
1394 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1395 STD c1,0(r_ptr)
1396 COPY %r0,c1
1397
1398 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1399 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1400 STD c2,8(r_ptr)
1401 COPY %r0,c2
1402
1403 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1404 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1405 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1406 STD c3,16(r_ptr)
1407 COPY %r0,c3
1408
1409 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1410 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1411 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1412 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1413 STD c1,24(r_ptr)
1414 COPY %r0,c1
1415
1416 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1417 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1418 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1419 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1420 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1421 STD c2,32(r_ptr)
1422 COPY %r0,c2
1423
1424 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1425 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1426 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1427 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1428 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1429 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1430 STD c3,40(r_ptr)
1431 COPY %r0,c3
1432
1433 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1434 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1435 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1436 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1437 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1438 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1439 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1440 STD c1,48(r_ptr)
1441 COPY %r0,c1
1442
1443 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1444 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1445 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1446 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1447 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1448 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1449 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1450 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1451 STD c2,56(r_ptr)
1452 COPY %r0,c2
1453
1454 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1455 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1456 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1457 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1458 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1459 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1460 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1461 STD c3,64(r_ptr)
1462 COPY %r0,c3
1463
1464 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1465 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1466 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1467 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1468 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1469 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1470 STD c1,72(r_ptr)
1471 COPY %r0,c1
1472
1473 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1474 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1475 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1476 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1477 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1478 STD c2,80(r_ptr)
1479 COPY %r0,c2
1480
1481 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1482 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1483 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1484 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1485 STD c3,88(r_ptr)
1486 COPY %r0,c3
1487
1488 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1489 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1490 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1491 STD c1,96(r_ptr)
1492 COPY %r0,c1
1493
1494 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1495 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1496 STD c2,104(r_ptr)
1497 COPY %r0,c2
1498
1499 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1500 STD c3,112(r_ptr)
1501 STD c1,120(r_ptr)
1502
1503 .EXIT
1504 FLDD -88(%sp),%fr13
1505 FLDD -96(%sp),%fr12
1506 LDD -104(%sp),%r6 ; restore r6
1507 LDD -112(%sp),%r5 ; restore r5
1508 LDD -120(%sp),%r4 ; restore r4
1509 BVE (%rp)
1510 LDD,MB -128(%sp),%r3
1511
1512 .PROCEND
1513
1514;-----------------------------------------------------------------------------
1515;
1516;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1517; arg0 = r_ptr
1518; arg1 = a_ptr
1519; arg2 = b_ptr
1520;
1521
1522bn_mul_comba4
1523 .proc
1524 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1525 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1526 .entry
1527 .align 64
1528
1529 STD %r3,0(%sp) ; save r3
1530 STD %r4,8(%sp) ; save r4
1531 STD %r5,16(%sp) ; save r5
1532 STD %r6,24(%sp) ; save r6
1533 FSTD %fr12,32(%sp) ; save r6
1534 FSTD %fr13,40(%sp) ; save r7
1535
1536 ;
1537 ; Zero out carries
1538 ;
1539 COPY %r0,c1
1540 COPY %r0,c2
1541 COPY %r0,c3
1542
1543 LDO 128(%sp),%sp ; bump stack
1544 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1545
1546 ;
1547 ; Load up all of the values we are going to use
1548 ;
1549 FLDD 0(a_ptr),a0
1550 FLDD 8(a_ptr),a1
1551 FLDD 16(a_ptr),a2
1552 FLDD 24(a_ptr),a3
1553
1554 FLDD 0(b_ptr),b0
1555 FLDD 8(b_ptr),b1
1556 FLDD 16(b_ptr),b2
1557 FLDD 24(b_ptr),b3
1558
1559 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1560 STD c1,0(r_ptr)
1561 COPY %r0,c1
1562
1563 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1564 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1565 STD c2,8(r_ptr)
1566 COPY %r0,c2
1567
1568 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1569 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1570 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1571 STD c3,16(r_ptr)
1572 COPY %r0,c3
1573
1574 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1575 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1576 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1577 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1578 STD c1,24(r_ptr)
1579 COPY %r0,c1
1580
1581 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1582 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1583 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1584 STD c2,32(r_ptr)
1585 COPY %r0,c2
1586
1587 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1588 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1589 STD c3,40(r_ptr)
1590 COPY %r0,c3
1591
1592 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1593 STD c1,48(r_ptr)
1594 STD c2,56(r_ptr)
1595
1596 .EXIT
1597 FLDD -88(%sp),%fr13
1598 FLDD -96(%sp),%fr12
1599 LDD -104(%sp),%r6 ; restore r6
1600 LDD -112(%sp),%r5 ; restore r5
1601 LDD -120(%sp),%r4 ; restore r4
1602 BVE (%rp)
1603 LDD,MB -128(%sp),%r3
1604
1605 .PROCEND
1606
1607
1608;--- not PIC .SPACE $TEXT$
1609;--- not PIC .SUBSPA $CODE$
1610;--- not PIC .SPACE $PRIVATE$,SORT=16
1611;--- not PIC .IMPORT $global$,DATA
1612;--- not PIC .SPACE $TEXT$
1613;--- not PIC .SUBSPA $CODE$
1614;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c
1615;--- not PIC C$7
1616;--- not PIC .ALIGN 8
1617;--- not PIC .STRINGZ "Division would overflow (%d)\n"
1618 .END
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
deleted file mode 100644
index a99545754d..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2W.s
+++ /dev/null
@@ -1,1605 +0,0 @@
1;
2; PA-RISC 64-bit implementation of bn_asm code
3;
4; This code is approximately 2x faster than the C version
5; for RSA/DSA.
6;
7; See http://devresource.hp.com/ for more details on the PA-RISC
8; architecture. Also see the book "PA-RISC 2.0 Architecture"
9; by Gerry Kane for information on the instruction set architecture.
10;
11; Code written by Chris Ruemmler (with some help from the HP C
12; compiler).
13;
14; The code compiles with HP's assembler
15;
16
17 .level 2.0W
18 .space $TEXT$
19 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
20
21;
22; Global Register definitions used for the routines.
23;
24; Some information about HP's runtime architecture for 64-bits.
25;
26; "Caller save" means the calling function must save the register
27; if it wants the register to be preserved.
28; "Callee save" means if a function uses the register, it must save
29; the value before using it.
30;
31; For the floating point registers
32;
33; "caller save" registers: fr4-fr11, fr22-fr31
34; "callee save" registers: fr12-fr21
35; "special" registers: fr0-fr3 (status and exception registers)
36;
37; For the integer registers
38; value zero : r0
39; "caller save" registers: r1,r19-r26
40; "callee save" registers: r3-r18
41; return register : r2 (rp)
42; return values ; r28 (ret0,ret1)
43; Stack pointer ; r30 (sp)
44; global data pointer ; r27 (dp)
45; argument pointer ; r29 (ap)
46; millicode return ptr ; r31 (also a caller save register)
47
48
49;
50; Arguments to the routines
51;
52r_ptr .reg %r26
53a_ptr .reg %r25
54b_ptr .reg %r24
55num .reg %r24
56w .reg %r23
57n .reg %r23
58
59
60;
61; Globals used in some routines
62;
63
64top_overflow .reg %r29
65high_mask .reg %r22 ; value 0xffffffff80000000L
66
67
68;------------------------------------------------------------------------------
69;
70; bn_mul_add_words
71;
72;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
73; int num, BN_ULONG w)
74;
75; arg0 = r_ptr
76; arg1 = a_ptr
77; arg2 = num
78; arg3 = w
79;
80; Local register definitions
81;
82
83fm1 .reg %fr22
84fm .reg %fr23
85ht_temp .reg %fr24
86ht_temp_1 .reg %fr25
87lt_temp .reg %fr26
88lt_temp_1 .reg %fr27
89fm1_1 .reg %fr28
90fm_1 .reg %fr29
91
92fw_h .reg %fr7L
93fw_l .reg %fr7R
94fw .reg %fr7
95
96fht_0 .reg %fr8L
97flt_0 .reg %fr8R
98t_float_0 .reg %fr8
99
100fht_1 .reg %fr9L
101flt_1 .reg %fr9R
102t_float_1 .reg %fr9
103
104tmp_0 .reg %r31
105tmp_1 .reg %r21
106m_0 .reg %r20
107m_1 .reg %r19
108ht_0 .reg %r1
109ht_1 .reg %r3
110lt_0 .reg %r4
111lt_1 .reg %r5
112m1_0 .reg %r6
113m1_1 .reg %r7
114rp_val .reg %r8
115rp_val_1 .reg %r9
116
117bn_mul_add_words
118 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
119 .proc
120 .callinfo frame=128
121 .entry
122 .align 64
123
124 STD %r3,0(%sp) ; save r3
125 STD %r4,8(%sp) ; save r4
126 NOP ; Needed to make the loop 16-byte aligned
127 NOP ; Needed to make the loop 16-byte aligned
128
129 STD %r5,16(%sp) ; save r5
130 STD %r6,24(%sp) ; save r6
131 STD %r7,32(%sp) ; save r7
132 STD %r8,40(%sp) ; save r8
133
134 STD %r9,48(%sp) ; save r9
135 COPY %r0,%ret0 ; return 0 by default
136 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 STD w,56(%sp) ; store w on stack
138
139 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
140 LDO 128(%sp),%sp ; bump stack
141
142 ;
143 ; The loop is unrolled twice, so if there is only 1 number
144 ; then go straight to the cleanup code.
145 ;
146 CMPIB,= 1,num,bn_mul_add_words_single_top
147 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
148
149 ;
150 ; This loop is unrolled 2 times (64-byte aligned as well)
151 ;
152 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
153 ; two 32-bit mutiplies can be issued per cycle.
154 ;
155bn_mul_add_words_unroll2
156
157 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
158 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
159 LDD 0(r_ptr),rp_val ; rp[0]
160 LDD 8(r_ptr),rp_val_1 ; rp[1]
161
162 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
163 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
164 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
165 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
166
167 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
168 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
169 FSTD fm,-8(%sp) ; -8(sp) = m[0]
170 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
171
172 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
173 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
174 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
175 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
176
177 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
178 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
179 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
180 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
181
182 LDD -8(%sp),m_0 ; m[0]
183 LDD -40(%sp),m_1 ; m[1]
184 LDD -16(%sp),m1_0 ; m1[0]
185 LDD -48(%sp),m1_1 ; m1[1]
186
187 LDD -24(%sp),ht_0 ; ht[0]
188 LDD -56(%sp),ht_1 ; ht[1]
189 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
190 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
191
192 LDD -32(%sp),lt_0
193 LDD -64(%sp),lt_1
194 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
195 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
196
197 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
198 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
199 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
200 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
201
202 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
203 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
204 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
205 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
206
207 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
208 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
209 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
210 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
211
212 ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
213 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
214 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
215 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
216
217 LDO -2(num),num ; num = num - 2;
218 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
219 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
220 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
221
222 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
223 ADD,DC ht_1,%r0,%ret0 ; ht[1]++
224 LDO 16(a_ptr),a_ptr ; a_ptr += 2
225
226 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
227 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
228 LDO 16(r_ptr),r_ptr ; r_ptr += 2
229
230 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
231
232 ;
233 ; Top of loop aligned on 64-byte boundary
234 ;
235bn_mul_add_words_single_top
236 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
237 LDD 0(r_ptr),rp_val ; rp[0]
238 LDO 8(a_ptr),a_ptr ; a_ptr++
239 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
240 FSTD fm1,-16(%sp) ; -16(sp) = m1
241 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
242 FSTD fm,-8(%sp) ; -8(sp) = m
243 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
244 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
245 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
246 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
247
248 LDD -8(%sp),m_0
249 LDD -16(%sp),m1_0 ; m1 = temp1
250 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
251 LDD -24(%sp),ht_0
252 LDD -32(%sp),lt_0
253
254 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
255 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
256
257 EXTRD,U tmp_0,31,32,m_0 ; m>>32
258 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
259
260 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
261 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
262 ADD,DC ht_0,%r0,ht_0 ; ht++
263 ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
264 ADD,DC ht_0,%r0,ht_0 ; ht++
265 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
266 ADD,DC ht_0,%r0,%ret0 ; ht++
267 STD lt_0,0(r_ptr) ; rp[0] = lt
268
269bn_mul_add_words_exit
270 .EXIT
271 LDD -80(%sp),%r9 ; restore r9
272 LDD -88(%sp),%r8 ; restore r8
273 LDD -96(%sp),%r7 ; restore r7
274 LDD -104(%sp),%r6 ; restore r6
275 LDD -112(%sp),%r5 ; restore r5
276 LDD -120(%sp),%r4 ; restore r4
277 BVE (%rp)
278 LDD,MB -128(%sp),%r3 ; restore r3
279 .PROCEND ;in=23,24,25,26,29;out=28;
280
281;----------------------------------------------------------------------------
282;
283;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
284;
285; arg0 = rp
286; arg1 = ap
287; arg2 = num
288; arg3 = w
289
290bn_mul_words
291 .proc
292 .callinfo frame=128
293 .entry
294 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
295 .align 64
296
297 STD %r3,0(%sp) ; save r3
298 STD %r4,8(%sp) ; save r4
299 STD %r5,16(%sp) ; save r5
300 STD %r6,24(%sp) ; save r6
301
302 STD %r7,32(%sp) ; save r7
303 COPY %r0,%ret0 ; return 0 by default
304 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
305 STD w,56(%sp) ; w on stack
306
307 CMPIB,>= 0,num,bn_mul_words_exit
308 LDO 128(%sp),%sp ; bump stack
309
310 ;
311 ; See if only 1 word to do, thus just do cleanup
312 ;
313 CMPIB,= 1,num,bn_mul_words_single_top
314 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
315
316 ;
317 ; This loop is unrolled 2 times (64-byte aligned as well)
318 ;
319 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
320 ; two 32-bit mutiplies can be issued per cycle.
321 ;
322bn_mul_words_unroll2
323
324 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
325 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
326 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
327 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
328
329 FSTD fm1,-16(%sp) ; -16(sp) = m1
330 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
331 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
332 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
333
334 FSTD fm,-8(%sp) ; -8(sp) = m
335 FSTD fm_1,-40(%sp) ; -40(sp) = m
336 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
337 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
338
339 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
340 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
341 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
342 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
343
344 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
345 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
346 LDD -8(%sp),m_0
347 LDD -40(%sp),m_1
348
349 LDD -16(%sp),m1_0
350 LDD -48(%sp),m1_1
351 LDD -24(%sp),ht_0
352 LDD -56(%sp),ht_1
353
354 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
355 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
356 LDD -32(%sp),lt_0
357 LDD -64(%sp),lt_1
358
359 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
360 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
361 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
362 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
363
364 EXTRD,U tmp_0,31,32,m_0 ; m>>32
365 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
366 EXTRD,U tmp_1,31,32,m_1 ; m>>32
367 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
368
369 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
370 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
371 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
372 ADD,DC ht_0,%r0,ht_0 ; ht++
373
374 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
375 ADD,DC ht_1,%r0,ht_1 ; ht++
376 ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
377 ADD,DC ht_0,%r0,ht_0 ; ht++
378
379 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
380 ADD,DC ht_1,%r0,ht_1 ; ht++
381 STD lt_0,0(r_ptr) ; rp[0] = lt
382 STD lt_1,8(r_ptr) ; rp[1] = lt
383
384 COPY ht_1,%ret0 ; carry = ht
385 LDO -2(num),num ; num = num - 2;
386 LDO 16(a_ptr),a_ptr ; ap += 2
387 CMPIB,<= 2,num,bn_mul_words_unroll2
388 LDO 16(r_ptr),r_ptr ; rp++
389
390 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
391
392 ;
393 ; Top of loop aligned on 64-byte boundary
394 ;
395bn_mul_words_single_top
396 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
397
398 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
399 FSTD fm1,-16(%sp) ; -16(sp) = m1
400 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
401 FSTD fm,-8(%sp) ; -8(sp) = m
402 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
403 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
404 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
405 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
406
407 LDD -8(%sp),m_0
408 LDD -16(%sp),m1_0
409 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
410 LDD -24(%sp),ht_0
411 LDD -32(%sp),lt_0
412
413 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
414 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
415
416 EXTRD,U tmp_0,31,32,m_0 ; m>>32
417 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
418
419 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
420 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
421 ADD,DC ht_0,%r0,ht_0 ; ht++
422
423 ADD %ret0,lt_0,lt_0 ; lt = lt + c;
424 ADD,DC ht_0,%r0,ht_0 ; ht++
425
426 COPY ht_0,%ret0 ; copy carry
427 STD lt_0,0(r_ptr) ; rp[0] = lt
428
429bn_mul_words_exit
430 .EXIT
431 LDD -96(%sp),%r7 ; restore r7
432 LDD -104(%sp),%r6 ; restore r6
433 LDD -112(%sp),%r5 ; restore r5
434 LDD -120(%sp),%r4 ; restore r4
435 BVE (%rp)
436 LDD,MB -128(%sp),%r3 ; restore r3
437 .PROCEND ;in=23,24,25,26,29;out=28;
438
439;----------------------------------------------------------------------------
440;
441;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442;
443; arg0 = rp
444; arg1 = ap
445; arg2 = num
446;
447
448bn_sqr_words
449 .proc
450 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
451 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
452 .entry
453 .align 64
454
455 STD %r3,0(%sp) ; save r3
456 STD %r4,8(%sp) ; save r4
457 NOP
458 STD %r5,16(%sp) ; save r5
459
460 CMPIB,>= 0,num,bn_sqr_words_exit
461 LDO 128(%sp),%sp ; bump stack
462
463 ;
464 ; If only 1, the goto straight to cleanup
465 ;
466 CMPIB,= 1,num,bn_sqr_words_single_top
467 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
468
469 ;
470 ; This loop is unrolled 2 times (64-byte aligned as well)
471 ;
472
473bn_sqr_words_unroll2
474 FLDD 0(a_ptr),t_float_0 ; a[0]
475 FLDD 8(a_ptr),t_float_1 ; a[1]
476 XMPYU fht_0,flt_0,fm ; m[0]
477 XMPYU fht_1,flt_1,fm_1 ; m[1]
478
479 FSTD fm,-24(%sp) ; store m[0]
480 FSTD fm_1,-56(%sp) ; store m[1]
481 XMPYU flt_0,flt_0,lt_temp ; lt[0]
482 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
483
484 FSTD lt_temp,-16(%sp) ; store lt[0]
485 FSTD lt_temp_1,-48(%sp) ; store lt[1]
486 XMPYU fht_0,fht_0,ht_temp ; ht[0]
487 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
488
489 FSTD ht_temp,-8(%sp) ; store ht[0]
490 FSTD ht_temp_1,-40(%sp) ; store ht[1]
491 LDD -24(%sp),m_0
492 LDD -56(%sp),m_1
493
494 AND m_0,high_mask,tmp_0 ; m[0] & Mask
495 AND m_1,high_mask,tmp_1 ; m[1] & Mask
496 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
497 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
498
499 LDD -16(%sp),lt_0
500 LDD -48(%sp),lt_1
501 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
502 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
503
504 LDD -8(%sp),ht_0
505 LDD -40(%sp),ht_1
506 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
507 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
508
509 ADD lt_0,m_0,lt_0 ; lt = lt+m
510 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
511 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
512 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
513
514 ADD lt_1,m_1,lt_1 ; lt = lt+m
515 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
516 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
517 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
518
519 LDO -2(num),num ; num = num - 2;
520 LDO 16(a_ptr),a_ptr ; ap += 2
521 CMPIB,<= 2,num,bn_sqr_words_unroll2
522 LDO 32(r_ptr),r_ptr ; rp += 4
523
524 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
525
526 ;
527 ; Top of loop aligned on 64-byte boundary
528 ;
529bn_sqr_words_single_top
530 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
531
532 XMPYU fht_0,flt_0,fm ; m
533 FSTD fm,-24(%sp) ; store m
534
535 XMPYU flt_0,flt_0,lt_temp ; lt
536 FSTD lt_temp,-16(%sp) ; store lt
537
538 XMPYU fht_0,fht_0,ht_temp ; ht
539 FSTD ht_temp,-8(%sp) ; store ht
540
541 LDD -24(%sp),m_0 ; load m
542 AND m_0,high_mask,tmp_0 ; m & Mask
543 DEPD,Z m_0,30,31,m_0 ; m << 32+1
544 LDD -16(%sp),lt_0 ; lt
545
546 LDD -8(%sp),ht_0 ; ht
547 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
548 ADD m_0,lt_0,lt_0 ; lt = lt+m
549 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
550 ADD,DC ht_0,%r0,ht_0 ; ht++
551
552 STD lt_0,0(r_ptr) ; rp[0] = lt
553 STD ht_0,8(r_ptr) ; rp[1] = ht
554
555bn_sqr_words_exit
556 .EXIT
557 LDD -112(%sp),%r5 ; restore r5
558 LDD -120(%sp),%r4 ; restore r4
559 BVE (%rp)
560 LDD,MB -128(%sp),%r3
561 .PROCEND ;in=23,24,25,26,29;out=28;
562
563
564;----------------------------------------------------------------------------
565;
566;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
567;
568; arg0 = rp
569; arg1 = ap
570; arg2 = bp
571; arg3 = n
572
573t .reg %r22
574b .reg %r21
575l .reg %r20
576
577bn_add_words
578 .proc
579 .entry
580 .callinfo
581 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
582 .align 64
583
584 CMPIB,>= 0,n,bn_add_words_exit
585 COPY %r0,%ret0 ; return 0 by default
586
587 ;
588 ; If 2 or more numbers do the loop
589 ;
590 CMPIB,= 1,n,bn_add_words_single_top
591 NOP
592
593 ;
594 ; This loop is unrolled 2 times (64-byte aligned as well)
595 ;
596bn_add_words_unroll2
597 LDD 0(a_ptr),t
598 LDD 0(b_ptr),b
599 ADD t,%ret0,t ; t = t+c;
600 ADD,DC %r0,%r0,%ret0 ; set c to carry
601 ADD t,b,l ; l = t + b[0]
602 ADD,DC %ret0,%r0,%ret0 ; c+= carry
603 STD l,0(r_ptr)
604
605 LDD 8(a_ptr),t
606 LDD 8(b_ptr),b
607 ADD t,%ret0,t ; t = t+c;
608 ADD,DC %r0,%r0,%ret0 ; set c to carry
609 ADD t,b,l ; l = t + b[0]
610 ADD,DC %ret0,%r0,%ret0 ; c+= carry
611 STD l,8(r_ptr)
612
613 LDO -2(n),n
614 LDO 16(a_ptr),a_ptr
615 LDO 16(b_ptr),b_ptr
616
617 CMPIB,<= 2,n,bn_add_words_unroll2
618 LDO 16(r_ptr),r_ptr
619
620 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
621
622bn_add_words_single_top
623 LDD 0(a_ptr),t
624 LDD 0(b_ptr),b
625
626 ADD t,%ret0,t ; t = t+c;
627 ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
628 ADD t,b,l ; l = t + b[0]
629 ADD,DC %ret0,%r0,%ret0 ; c+= carry
630 STD l,0(r_ptr)
631
632bn_add_words_exit
633 .EXIT
634 BVE (%rp)
635 NOP
636 .PROCEND ;in=23,24,25,26,29;out=28;
637
638;----------------------------------------------------------------------------
639;
640;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
641;
642; arg0 = rp
643; arg1 = ap
644; arg2 = bp
645; arg3 = n
646
647t1 .reg %r22
648t2 .reg %r21
649sub_tmp1 .reg %r20
650sub_tmp2 .reg %r19
651
652
653bn_sub_words
654 .proc
655 .callinfo
656 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
657 .entry
658 .align 64
659
660 CMPIB,>= 0,n,bn_sub_words_exit
661 COPY %r0,%ret0 ; return 0 by default
662
663 ;
664 ; If 2 or more numbers do the loop
665 ;
666 CMPIB,= 1,n,bn_sub_words_single_top
667 NOP
668
669 ;
670 ; This loop is unrolled 2 times (64-byte aligned as well)
671 ;
672bn_sub_words_unroll2
673 LDD 0(a_ptr),t1
674 LDD 0(b_ptr),t2
675 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
676 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
677
678 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
679 LDO 1(%r0),sub_tmp2
680
681 CMPCLR,*= t1,t2,%r0
682 COPY sub_tmp2,%ret0
683 STD sub_tmp1,0(r_ptr)
684
685 LDD 8(a_ptr),t1
686 LDD 8(b_ptr),t2
687 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
688 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret0
694 STD sub_tmp1,8(r_ptr)
695
696 LDO -2(n),n
697 LDO 16(a_ptr),a_ptr
698 LDO 16(b_ptr),b_ptr
699
700 CMPIB,<= 2,n,bn_sub_words_unroll2
701 LDO 16(r_ptr),r_ptr
702
703 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
704
705bn_sub_words_single_top
706 LDD 0(a_ptr),t1
707 LDD 0(b_ptr),t2
708 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
709 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
710 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
711 LDO 1(%r0),sub_tmp2
712
713 CMPCLR,*= t1,t2,%r0
714 COPY sub_tmp2,%ret0
715
716 STD sub_tmp1,0(r_ptr)
717
718bn_sub_words_exit
719 .EXIT
720 BVE (%rp)
721 NOP
722 .PROCEND ;in=23,24,25,26,29;out=28;
723
724;------------------------------------------------------------------------------
725;
726; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
727;
728; arg0 = h
729; arg1 = l
730; arg2 = d
731;
732; This is mainly just modified assembly from the compiler, thus the
733; lack of variable names.
734;
735;------------------------------------------------------------------------------
736bn_div_words
737 .proc
738 .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
739 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
740 .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
741 .IMPORT __iob,DATA
742 .IMPORT fprintf,CODE,NO_RELOCATION
743 .IMPORT abort,CODE,NO_RELOCATION
744 .IMPORT $$div2U,MILLICODE
745 .entry
746 STD %r2,-16(%r30)
747 STD,MA %r3,352(%r30)
748 STD %r4,-344(%r30)
749 STD %r5,-336(%r30)
750 STD %r6,-328(%r30)
751 STD %r7,-320(%r30)
752 STD %r8,-312(%r30)
753 STD %r9,-304(%r30)
754 STD %r10,-296(%r30)
755
756 STD %r27,-288(%r30) ; save gp
757
758 COPY %r24,%r3 ; save d
759 COPY %r26,%r4 ; save h (high 64-bits)
760 LDO -1(%r0),%ret0 ; return -1 by default
761
762 CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
763 COPY %r25,%r5 ; save l (low 64-bits)
764
765 LDO -48(%r30),%r29 ; create ap
766 .CALL ;in=26,29;out=28;
767 B,L BN_num_bits_word,%r2
768 COPY %r3,%r26
769 LDD -288(%r30),%r27 ; restore gp
770 LDI 64,%r21
771
772 CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
773 COPY %ret0,%r24 ; i
774 MTSARCM %r24
775 DEPDI,Z -1,%sar,1,%r29
776 CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
777
778$00000012
779 SUBI 64,%r24,%r31 ; i = 64 - i;
780 CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
781 SUB %r4,%r3,%r4 ; h -= d
782 CMPB,= %r31,%r0,$0000001A ; if (i)
783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent
787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift
790 DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
791
792$0000001A
793 DEPDI,Z -1,31,32,%r19
794 EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
795 EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
796 LDO 2(%r0),%r9
797 STD %r3,-280(%r30) ; "d" to stack
798
799$0000001C
800 DEPDI,Z -1,63,32,%r29 ;
801 EXTRD,U %r4,31,32,%r31 ; h >> 32
802 CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
803 COPY %r4,%r26
804 EXTRD,U %r4,31,32,%r25
805 COPY %r6,%r24
806 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
807 B,L $$div2U,%r2
808 EXTRD,U %r6,31,32,%r23
809 DEPD %r28,31,32,%r29
810$D2
811 STD %r29,-272(%r30) ; q
812 AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
813 EXTRD,U %r24,31,32,%r24 ; ???
814 FLDD -272(%r30),%fr7 ; q
815 FLDD -280(%r30),%fr8 ; d
816 XMPYU %fr8L,%fr7L,%fr10
817 FSTD %fr10,-256(%r30)
818 XMPYU %fr8L,%fr7R,%fr22
819 FSTD %fr22,-264(%r30)
820 XMPYU %fr8R,%fr7L,%fr11
821 XMPYU %fr8R,%fr7R,%fr23
822 FSTD %fr11,-232(%r30)
823 FSTD %fr23,-240(%r30)
824 LDD -256(%r30),%r28
825 DEPD,Z %r28,31,32,%r2
826 LDD -264(%r30),%r20
827 ADD,L %r20,%r2,%r31
828 LDD -232(%r30),%r22
829 DEPD,Z %r22,31,32,%r22
830 LDD -240(%r30),%r21
831 B $00000024 ; enter loop
832 ADD,L %r21,%r22,%r23
833
834$0000002A
835 LDO -1(%r29),%r29
836 SUB %r23,%r8,%r23
837$00000024
838 SUB %r4,%r31,%r25
839 AND %r25,%r19,%r26
840 CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
841 DEPD,Z %r25,31,32,%r20
842 OR %r20,%r24,%r21
843 CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
844 SUB %r31,%r6,%r31
845;-------------Break path---------------------
846
847$00000046
848 DEPD,Z %r23,31,32,%r25 ;tl
849 EXTRD,U %r23,31,32,%r26 ;t
850 AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
851 ADD,L %r31,%r26,%r31 ;th += t;
852 CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
853 LDO 1(%r31),%r31 ; th++;
854 CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
855 LDO -1(%r29),%r29 ;q--;
856 ADD,L %r4,%r3,%r4 ;h += d;
857$00000036
858 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
859 SUB %r5,%r24,%r28 ; l -= tl;
860 SUB %r4,%r31,%r24 ; h -= th;
861 SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
862 DEPD,Z %r29,31,32,%r10 ; ret = q<<32
863 b $0000001C
864 DEPD,Z %r28,31,32,%r5 ; l = l << 32
865
866$D1
867 OR %r10,%r29,%r28 ; ret |= q
868$D3
869 LDD -368(%r30),%r2
870$D0
871 LDD -296(%r30),%r10
872 LDD -304(%r30),%r9
873 LDD -312(%r30),%r8
874 LDD -320(%r30),%r7
875 LDD -328(%r30),%r6
876 LDD -336(%r30),%r5
877 LDD -344(%r30),%r4
878 BVE (%r2)
879 .EXIT
880 LDD,MB -352(%r30),%r3
881
882bn_div_err_case
883 MFIA %r6
884 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
885 LDO R'bn_div_words-bn_div_err_case(%r1),%r6
886 ADDIL LT'__iob,%r27,%r1
887 LDD RT'__iob(%r1),%r26
888 ADDIL L'C$4-bn_div_words,%r6,%r1
889 LDO R'C$4-bn_div_words(%r1),%r25
890 LDO 64(%r26),%r26
891 .CALL ;in=24,25,26,29;out=28;
892 B,L fprintf,%r2
893 LDO -48(%r30),%r29
894 LDD -288(%r30),%r27
895 .CALL ;in=29;
896 B,L abort,%r2
897 LDO -48(%r30),%r29
898 LDD -288(%r30),%r27
899 B $D0
900 LDD -368(%r30),%r2
901 .PROCEND ;in=24,25,26,29;out=28;
902
903;----------------------------------------------------------------------------
904;
905; Registers to hold 64-bit values to manipulate. The "L" part
906; of the register corresponds to the upper 32-bits, while the "R"
907; part corresponds to the lower 32-bits
908;
909; Note, that when using b6 and b7, the code must save these before
910; using them because they are callee save registers
911;
912;
913; Floating point registers to use to save values that
914; are manipulated. These don't collide with ftemp1-6 and
915; are all caller save registers
916;
917a0 .reg %fr22
918a0L .reg %fr22L
919a0R .reg %fr22R
920
921a1 .reg %fr23
922a1L .reg %fr23L
923a1R .reg %fr23R
924
925a2 .reg %fr24
926a2L .reg %fr24L
927a2R .reg %fr24R
928
929a3 .reg %fr25
930a3L .reg %fr25L
931a3R .reg %fr25R
932
933a4 .reg %fr26
934a4L .reg %fr26L
935a4R .reg %fr26R
936
937a5 .reg %fr27
938a5L .reg %fr27L
939a5R .reg %fr27R
940
941a6 .reg %fr28
942a6L .reg %fr28L
943a6R .reg %fr28R
944
945a7 .reg %fr29
946a7L .reg %fr29L
947a7R .reg %fr29R
948
949b0 .reg %fr30
950b0L .reg %fr30L
951b0R .reg %fr30R
952
953b1 .reg %fr31
954b1L .reg %fr31L
955b1R .reg %fr31R
956
957;
958; Temporary floating point variables, these are all caller save
959; registers
960;
961ftemp1 .reg %fr4
962ftemp2 .reg %fr5
963ftemp3 .reg %fr6
964ftemp4 .reg %fr7
965
966;
967; The B set of registers when used.
968;
969
970b2 .reg %fr8
971b2L .reg %fr8L
972b2R .reg %fr8R
973
974b3 .reg %fr9
975b3L .reg %fr9L
976b3R .reg %fr9R
977
978b4 .reg %fr10
979b4L .reg %fr10L
980b4R .reg %fr10R
981
982b5 .reg %fr11
983b5L .reg %fr11L
984b5R .reg %fr11R
985
986b6 .reg %fr12
987b6L .reg %fr12L
988b6R .reg %fr12R
989
990b7 .reg %fr13
991b7L .reg %fr13L
992b7R .reg %fr13R
993
994c1 .reg %r21 ; only reg
995temp1 .reg %r20 ; only reg
996temp2 .reg %r19 ; only reg
997temp3 .reg %r31 ; only reg
998
999m1 .reg %r28
1000c2 .reg %r23
1001high_one .reg %r1
1002ht .reg %r6
1003lt .reg %r5
1004m .reg %r4
1005c3 .reg %r3
1006
1007SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1008 XMPYU A0L,A0R,ftemp1 ; m
1009 FSTD ftemp1,-24(%sp) ; store m
1010
1011 XMPYU A0R,A0R,ftemp2 ; lt
1012 FSTD ftemp2,-16(%sp) ; store lt
1013
1014 XMPYU A0L,A0L,ftemp3 ; ht
1015 FSTD ftemp3,-8(%sp) ; store ht
1016
1017 LDD -24(%sp),m ; load m
1018 AND m,high_mask,temp2 ; m & Mask
1019 DEPD,Z m,30,31,temp3 ; m << 32+1
1020 LDD -16(%sp),lt ; lt
1021
1022 LDD -8(%sp),ht ; ht
1023 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1024 ADD temp3,lt,lt ; lt = lt+m
1025 ADD,L ht,temp1,ht ; ht += temp1
1026 ADD,DC ht,%r0,ht ; ht++
1027
1028 ADD C1,lt,C1 ; c1=c1+lt
1029 ADD,DC ht,%r0,ht ; ht++
1030
1031 ADD C2,ht,C2 ; c2=c2+ht
1032 ADD,DC C3,%r0,C3 ; c3++
1033.endm
1034
1035SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1036 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1037 FSTD ftemp1,-16(%sp) ;
1038 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1039 FSTD ftemp2,-8(%sp) ;
1040 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1041 FSTD ftemp3,-32(%sp)
1042 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1043 FSTD ftemp4,-24(%sp) ;
1044
1045 LDD -8(%sp),m ; r21 = m
1046 LDD -16(%sp),m1 ; r19 = m1
1047 ADD,L m,m1,m ; m+m1
1048
1049 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1050 LDD -24(%sp),ht ; r24 = ht
1051
1052 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1053 ADD,L ht,high_one,ht ; ht+=high_one
1054
1055 EXTRD,U m,31,32,temp1 ; m >> 32
1056 LDD -32(%sp),lt ; lt
1057 ADD,L ht,temp1,ht ; ht+= m>>32
1058 ADD lt,temp3,lt ; lt = lt+m1
1059 ADD,DC ht,%r0,ht ; ht++
1060
1061 ADD ht,ht,ht ; ht=ht+ht;
1062 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1063
1064 ADD lt,lt,lt ; lt=lt+lt;
1065 ADD,DC ht,%r0,ht ; add in carry (ht++)
1066
1067 ADD C1,lt,C1 ; c1=c1+lt
1068 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1069 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1070
1071 ADD C2,ht,C2 ; c2 = c2 + ht
1072 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1073.endm
1074
1075;
1076;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1077; arg0 = r_ptr
1078; arg1 = a_ptr
1079;
1080
1081bn_sqr_comba8
1082 .PROC
1083 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1084 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1085 .ENTRY
1086 .align 64
1087
1088 STD %r3,0(%sp) ; save r3
1089 STD %r4,8(%sp) ; save r4
1090 STD %r5,16(%sp) ; save r5
1091 STD %r6,24(%sp) ; save r6
1092
1093 ;
1094 ; Zero out carries
1095 ;
1096 COPY %r0,c1
1097 COPY %r0,c2
1098 COPY %r0,c3
1099
1100 LDO 128(%sp),%sp ; bump stack
1101 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1102 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1103
1104 ;
1105 ; Load up all of the values we are going to use
1106 ;
1107 FLDD 0(a_ptr),a0
1108 FLDD 8(a_ptr),a1
1109 FLDD 16(a_ptr),a2
1110 FLDD 24(a_ptr),a3
1111 FLDD 32(a_ptr),a4
1112 FLDD 40(a_ptr),a5
1113 FLDD 48(a_ptr),a6
1114 FLDD 56(a_ptr),a7
1115
1116 SQR_ADD_C a0L,a0R,c1,c2,c3
1117 STD c1,0(r_ptr) ; r[0] = c1;
1118 COPY %r0,c1
1119
1120 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1121 STD c2,8(r_ptr) ; r[1] = c2;
1122 COPY %r0,c2
1123
1124 SQR_ADD_C a1L,a1R,c3,c1,c2
1125 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1126 STD c3,16(r_ptr) ; r[2] = c3;
1127 COPY %r0,c3
1128
1129 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1130 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1131 STD c1,24(r_ptr) ; r[3] = c1;
1132 COPY %r0,c1
1133
1134 SQR_ADD_C a2L,a2R,c2,c3,c1
1135 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1136 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1137 STD c2,32(r_ptr) ; r[4] = c2;
1138 COPY %r0,c2
1139
1140 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1141 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1142 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1143 STD c3,40(r_ptr) ; r[5] = c3;
1144 COPY %r0,c3
1145
1146 SQR_ADD_C a3L,a3R,c1,c2,c3
1147 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1148 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1149 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1150 STD c1,48(r_ptr) ; r[6] = c1;
1151 COPY %r0,c1
1152
1153 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1154 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1155 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1156 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1157 STD c2,56(r_ptr) ; r[7] = c2;
1158 COPY %r0,c2
1159
1160 SQR_ADD_C a4L,a4R,c3,c1,c2
1161 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1162 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1163 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1164 STD c3,64(r_ptr) ; r[8] = c3;
1165 COPY %r0,c3
1166
1167 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1168 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1169 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1170 STD c1,72(r_ptr) ; r[9] = c1;
1171 COPY %r0,c1
1172
1173 SQR_ADD_C a5L,a5R,c2,c3,c1
1174 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1175 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1176 STD c2,80(r_ptr) ; r[10] = c2;
1177 COPY %r0,c2
1178
1179 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1180 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1181 STD c3,88(r_ptr) ; r[11] = c3;
1182 COPY %r0,c3
1183
1184 SQR_ADD_C a6L,a6R,c1,c2,c3
1185 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1186 STD c1,96(r_ptr) ; r[12] = c1;
1187 COPY %r0,c1
1188
1189 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1190 STD c2,104(r_ptr) ; r[13] = c2;
1191 COPY %r0,c2
1192
1193 SQR_ADD_C a7L,a7R,c3,c1,c2
1194 STD c3, 112(r_ptr) ; r[14] = c3
1195 STD c1, 120(r_ptr) ; r[15] = c1
1196
1197 .EXIT
1198 LDD -104(%sp),%r6 ; restore r6
1199 LDD -112(%sp),%r5 ; restore r5
1200 LDD -120(%sp),%r4 ; restore r4
1201 BVE (%rp)
1202 LDD,MB -128(%sp),%r3
1203
1204 .PROCEND
1205
1206;-----------------------------------------------------------------------------
1207;
1208;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1209; arg0 = r_ptr
1210; arg1 = a_ptr
1211;
1212
1213bn_sqr_comba4
1214 .proc
1215 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1216 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1217 .entry
1218 .align 64
1219 STD %r3,0(%sp) ; save r3
1220 STD %r4,8(%sp) ; save r4
1221 STD %r5,16(%sp) ; save r5
1222 STD %r6,24(%sp) ; save r6
1223
1224 ;
1225 ; Zero out carries
1226 ;
1227 COPY %r0,c1
1228 COPY %r0,c2
1229 COPY %r0,c3
1230
1231 LDO 128(%sp),%sp ; bump stack
1232 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1233 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1234
1235 ;
1236 ; Load up all of the values we are going to use
1237 ;
1238 FLDD 0(a_ptr),a0
1239 FLDD 8(a_ptr),a1
1240 FLDD 16(a_ptr),a2
1241 FLDD 24(a_ptr),a3
1242 FLDD 32(a_ptr),a4
1243 FLDD 40(a_ptr),a5
1244 FLDD 48(a_ptr),a6
1245 FLDD 56(a_ptr),a7
1246
1247 SQR_ADD_C a0L,a0R,c1,c2,c3
1248
1249 STD c1,0(r_ptr) ; r[0] = c1;
1250 COPY %r0,c1
1251
1252 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1253
1254 STD c2,8(r_ptr) ; r[1] = c2;
1255 COPY %r0,c2
1256
1257 SQR_ADD_C a1L,a1R,c3,c1,c2
1258 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1259
1260 STD c3,16(r_ptr) ; r[2] = c3;
1261 COPY %r0,c3
1262
1263 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1264 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1265
1266 STD c1,24(r_ptr) ; r[3] = c1;
1267 COPY %r0,c1
1268
1269 SQR_ADD_C a2L,a2R,c2,c3,c1
1270 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1271
1272 STD c2,32(r_ptr) ; r[4] = c2;
1273 COPY %r0,c2
1274
1275 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1276 STD c3,40(r_ptr) ; r[5] = c3;
1277 COPY %r0,c3
1278
1279 SQR_ADD_C a3L,a3R,c1,c2,c3
1280 STD c1,48(r_ptr) ; r[6] = c1;
1281 STD c2,56(r_ptr) ; r[7] = c2;
1282
1283 .EXIT
1284 LDD -104(%sp),%r6 ; restore r6
1285 LDD -112(%sp),%r5 ; restore r5
1286 LDD -120(%sp),%r4 ; restore r4
1287 BVE (%rp)
1288 LDD,MB -128(%sp),%r3
1289
1290 .PROCEND
1291
1292
1293;---------------------------------------------------------------------------
1294
1295MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1296 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1297 FSTD ftemp1,-16(%sp) ;
1298 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1299 FSTD ftemp2,-8(%sp) ;
1300 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1301 FSTD ftemp3,-32(%sp)
1302 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1303 FSTD ftemp4,-24(%sp) ;
1304
1305 LDD -8(%sp),m ; r21 = m
1306 LDD -16(%sp),m1 ; r19 = m1
1307 ADD,L m,m1,m ; m+m1
1308
1309 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1310 LDD -24(%sp),ht ; r24 = ht
1311
1312 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1313 ADD,L ht,high_one,ht ; ht+=high_one
1314
1315 EXTRD,U m,31,32,temp1 ; m >> 32
1316 LDD -32(%sp),lt ; lt
1317 ADD,L ht,temp1,ht ; ht+= m>>32
1318 ADD lt,temp3,lt ; lt = lt+m1
1319 ADD,DC ht,%r0,ht ; ht++
1320
1321 ADD C1,lt,C1 ; c1=c1+lt
1322 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1323
1324 ADD C2,ht,C2 ; c2 = c2 + ht
1325 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1326.endm
1327
1328
1329;
1330;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1331; arg0 = r_ptr
1332; arg1 = a_ptr
1333; arg2 = b_ptr
1334;
1335
1336bn_mul_comba8
1337 .proc
1338 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1339 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1340 .entry
1341 .align 64
1342
1343 STD %r3,0(%sp) ; save r3
1344 STD %r4,8(%sp) ; save r4
1345 STD %r5,16(%sp) ; save r5
1346 STD %r6,24(%sp) ; save r6
1347 FSTD %fr12,32(%sp) ; save r6
1348 FSTD %fr13,40(%sp) ; save r7
1349
1350 ;
1351 ; Zero out carries
1352 ;
1353 COPY %r0,c1
1354 COPY %r0,c2
1355 COPY %r0,c3
1356
1357 LDO 128(%sp),%sp ; bump stack
1358 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1359
1360 ;
1361 ; Load up all of the values we are going to use
1362 ;
1363 FLDD 0(a_ptr),a0
1364 FLDD 8(a_ptr),a1
1365 FLDD 16(a_ptr),a2
1366 FLDD 24(a_ptr),a3
1367 FLDD 32(a_ptr),a4
1368 FLDD 40(a_ptr),a5
1369 FLDD 48(a_ptr),a6
1370 FLDD 56(a_ptr),a7
1371
1372 FLDD 0(b_ptr),b0
1373 FLDD 8(b_ptr),b1
1374 FLDD 16(b_ptr),b2
1375 FLDD 24(b_ptr),b3
1376 FLDD 32(b_ptr),b4
1377 FLDD 40(b_ptr),b5
1378 FLDD 48(b_ptr),b6
1379 FLDD 56(b_ptr),b7
1380
1381 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1382 STD c1,0(r_ptr)
1383 COPY %r0,c1
1384
1385 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1386 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1387 STD c2,8(r_ptr)
1388 COPY %r0,c2
1389
1390 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1391 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1392 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1393 STD c3,16(r_ptr)
1394 COPY %r0,c3
1395
1396 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1397 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1398 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1399 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1400 STD c1,24(r_ptr)
1401 COPY %r0,c1
1402
1403 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1404 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1405 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1406 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1407 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1408 STD c2,32(r_ptr)
1409 COPY %r0,c2
1410
1411 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1412 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1413 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1414 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1415 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1416 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1417 STD c3,40(r_ptr)
1418 COPY %r0,c3
1419
1420 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1421 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1422 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1423 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1424 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1425 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1426 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1427 STD c1,48(r_ptr)
1428 COPY %r0,c1
1429
1430 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1431 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1432 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1433 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1434 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1435 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1436 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1437 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1438 STD c2,56(r_ptr)
1439 COPY %r0,c2
1440
1441 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1442 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1443 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1444 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1445 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1446 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1447 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1448 STD c3,64(r_ptr)
1449 COPY %r0,c3
1450
1451 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1452 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1453 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1454 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1455 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1456 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1457 STD c1,72(r_ptr)
1458 COPY %r0,c1
1459
1460 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1461 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1462 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1463 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1464 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1465 STD c2,80(r_ptr)
1466 COPY %r0,c2
1467
1468 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1469 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1470 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1471 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1472 STD c3,88(r_ptr)
1473 COPY %r0,c3
1474
1475 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1476 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1477 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1478 STD c1,96(r_ptr)
1479 COPY %r0,c1
1480
1481 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1482 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1483 STD c2,104(r_ptr)
1484 COPY %r0,c2
1485
1486 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1487 STD c3,112(r_ptr)
1488 STD c1,120(r_ptr)
1489
1490 .EXIT
1491 FLDD -88(%sp),%fr13
1492 FLDD -96(%sp),%fr12
1493 LDD -104(%sp),%r6 ; restore r6
1494 LDD -112(%sp),%r5 ; restore r5
1495 LDD -120(%sp),%r4 ; restore r4
1496 BVE (%rp)
1497 LDD,MB -128(%sp),%r3
1498
1499 .PROCEND
1500
1501;-----------------------------------------------------------------------------
1502;
1503;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1504; arg0 = r_ptr
1505; arg1 = a_ptr
1506; arg2 = b_ptr
1507;
1508
1509bn_mul_comba4
1510 .proc
1511 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1512 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1513 .entry
1514 .align 64
1515
1516 STD %r3,0(%sp) ; save r3
1517 STD %r4,8(%sp) ; save r4
1518 STD %r5,16(%sp) ; save r5
1519 STD %r6,24(%sp) ; save r6
1520 FSTD %fr12,32(%sp) ; save r6
1521 FSTD %fr13,40(%sp) ; save r7
1522
1523 ;
1524 ; Zero out carries
1525 ;
1526 COPY %r0,c1
1527 COPY %r0,c2
1528 COPY %r0,c3
1529
1530 LDO 128(%sp),%sp ; bump stack
1531 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1532
1533 ;
1534 ; Load up all of the values we are going to use
1535 ;
1536 FLDD 0(a_ptr),a0
1537 FLDD 8(a_ptr),a1
1538 FLDD 16(a_ptr),a2
1539 FLDD 24(a_ptr),a3
1540
1541 FLDD 0(b_ptr),b0
1542 FLDD 8(b_ptr),b1
1543 FLDD 16(b_ptr),b2
1544 FLDD 24(b_ptr),b3
1545
1546 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1547 STD c1,0(r_ptr)
1548 COPY %r0,c1
1549
1550 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1551 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1552 STD c2,8(r_ptr)
1553 COPY %r0,c2
1554
1555 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1556 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1557 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1558 STD c3,16(r_ptr)
1559 COPY %r0,c3
1560
1561 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1562 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1563 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1564 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1565 STD c1,24(r_ptr)
1566 COPY %r0,c1
1567
1568 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1569 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1570 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1571 STD c2,32(r_ptr)
1572 COPY %r0,c2
1573
1574 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1575 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1576 STD c3,40(r_ptr)
1577 COPY %r0,c3
1578
1579 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1580 STD c1,48(r_ptr)
1581 STD c2,56(r_ptr)
1582
1583 .EXIT
1584 FLDD -88(%sp),%fr13
1585 FLDD -96(%sp),%fr12
1586 LDD -104(%sp),%r6 ; restore r6
1587 LDD -112(%sp),%r5 ; restore r5
1588 LDD -120(%sp),%r4 ; restore r4
1589 BVE (%rp)
1590 LDD,MB -128(%sp),%r3
1591
1592 .PROCEND
1593
1594
1595 .SPACE $TEXT$
1596 .SUBSPA $CODE$
1597 .SPACE $PRIVATE$,SORT=16
1598 .IMPORT $global$,DATA
1599 .SPACE $TEXT$
1600 .SUBSPA $CODE$
1601 .SUBSPA $LIT$,ACCESS=0x2c
1602C$4
1603 .ALIGN 8
1604 .STRINGZ "Division would overflow (%d)\n"
1605 .END
diff --git a/src/lib/libcrypto/bn/asm/ppc-mont.pl b/src/lib/libcrypto/bn/asm/ppc-mont.pl
deleted file mode 100644
index 7849eae959..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc-mont.pl
+++ /dev/null
@@ -1,323 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2006
11
12# "Teaser" Montgomery multiplication module for PowerPC. It's possible
13# to gain a bit more by modulo-scheduling outer loop, then dedicated
14# squaring procedure should give further 20% and code can be adapted
15# for 32-bit application running on 64-bit CPU. As for the latter.
16# It won't be able to achieve "native" 64-bit performance, because in
17# 32-bit application context every addc instruction will have to be
18# expanded as addc, twice right shift by 32 and finally adde, etc.
19# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
20# for 64-bit application running on PPC970/G5 is:
21#
22# 512-bit +65%
23# 1024-bit +35%
24# 2048-bit +18%
25# 4096-bit +4%
26
27$flavour = shift;
28
29if ($flavour =~ /32/) {
30 $BITS= 32;
31 $BNSZ= $BITS/8;
32 $SIZE_T=4;
33 $RZONE= 224;
34 $FRAME= $SIZE_T*16;
35
36 $LD= "lwz"; # load
37 $LDU= "lwzu"; # load and update
38 $LDX= "lwzx"; # load indexed
39 $ST= "stw"; # store
40 $STU= "stwu"; # store and update
41 $STX= "stwx"; # store indexed
42 $STUX= "stwux"; # store indexed and update
43 $UMULL= "mullw"; # unsigned multiply low
44 $UMULH= "mulhwu"; # unsigned multiply high
45 $UCMP= "cmplw"; # unsigned compare
46 $SHRI= "srwi"; # unsigned shift right by immediate
47 $PUSH= $ST;
48 $POP= $LD;
49} elsif ($flavour =~ /64/) {
50 $BITS= 64;
51 $BNSZ= $BITS/8;
52 $SIZE_T=8;
53 $RZONE= 288;
54 $FRAME= $SIZE_T*16;
55
56 # same as above, but 64-bit mnemonics...
57 $LD= "ld"; # load
58 $LDU= "ldu"; # load and update
59 $LDX= "ldx"; # load indexed
60 $ST= "std"; # store
61 $STU= "stdu"; # store and update
62 $STX= "stdx"; # store indexed
63 $STUX= "stdux"; # store indexed and update
64 $UMULL= "mulld"; # unsigned multiply low
65 $UMULH= "mulhdu"; # unsigned multiply high
66 $UCMP= "cmpld"; # unsigned compare
67 $SHRI= "srdi"; # unsigned shift right by immediate
68 $PUSH= $ST;
69 $POP= $LD;
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$sp="r1";
80$toc="r2";
81$rp="r3"; $ovf="r3";
82$ap="r4";
83$bp="r5";
84$np="r6";
85$n0="r7";
86$num="r8";
87$rp="r9"; # $rp is reassigned
88$aj="r10";
89$nj="r11";
90$tj="r12";
91# non-volatile registers
92$i="r14";
93$j="r15";
94$tp="r16";
95$m0="r17";
96$m1="r18";
97$lo0="r19";
98$hi0="r20";
99$lo1="r21";
100$hi1="r22";
101$alo="r23";
102$ahi="r24";
103$nlo="r25";
104#
105$nhi="r0";
106
107$code=<<___;
108.machine "any"
109.text
110
111.globl .bn_mul_mont
112.align 4
113.bn_mul_mont:
114 cmpwi $num,4
115 mr $rp,r3 ; $rp is reassigned
116 li r3,0
117 bltlr
118
119 slwi $num,$num,`log($BNSZ)/log(2)`
120 li $tj,-4096
121 addi $ovf,$num,`$FRAME+$RZONE`
122 subf $ovf,$ovf,$sp ; $sp-$ovf
123 and $ovf,$ovf,$tj ; minimize TLB usage
124 subf $ovf,$sp,$ovf ; $ovf-$sp
125 srwi $num,$num,`log($BNSZ)/log(2)`
126 $STUX $sp,$sp,$ovf
127
128 $PUSH r14,`4*$SIZE_T`($sp)
129 $PUSH r15,`5*$SIZE_T`($sp)
130 $PUSH r16,`6*$SIZE_T`($sp)
131 $PUSH r17,`7*$SIZE_T`($sp)
132 $PUSH r18,`8*$SIZE_T`($sp)
133 $PUSH r19,`9*$SIZE_T`($sp)
134 $PUSH r20,`10*$SIZE_T`($sp)
135 $PUSH r21,`11*$SIZE_T`($sp)
136 $PUSH r22,`12*$SIZE_T`($sp)
137 $PUSH r23,`13*$SIZE_T`($sp)
138 $PUSH r24,`14*$SIZE_T`($sp)
139 $PUSH r25,`15*$SIZE_T`($sp)
140
141 $LD $n0,0($n0) ; pull n0[0] value
142 addi $num,$num,-2 ; adjust $num for counter register
143
144 $LD $m0,0($bp) ; m0=bp[0]
145 $LD $aj,0($ap) ; ap[0]
146 addi $tp,$sp,$FRAME
147 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
148 $UMULH $hi0,$aj,$m0
149
150 $LD $aj,$BNSZ($ap) ; ap[1]
151 $LD $nj,0($np) ; np[0]
152
153 $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
154
155 $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
156 $UMULH $ahi,$aj,$m0
157
158 $UMULL $lo1,$nj,$m1 ; np[0]*m1
159 $UMULH $hi1,$nj,$m1
160 $LD $nj,$BNSZ($np) ; np[1]
161 addc $lo1,$lo1,$lo0
162 addze $hi1,$hi1
163
164 $UMULL $nlo,$nj,$m1 ; np[1]*m1
165 $UMULH $nhi,$nj,$m1
166
167 mtctr $num
168 li $j,`2*$BNSZ`
169.align 4
170L1st:
171 $LDX $aj,$ap,$j ; ap[j]
172 addc $lo0,$alo,$hi0
173 $LDX $nj,$np,$j ; np[j]
174 addze $hi0,$ahi
175 $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
176 addc $lo1,$nlo,$hi1
177 $UMULH $ahi,$aj,$m0
178 addze $hi1,$nhi
179 $UMULL $nlo,$nj,$m1 ; np[j]*m1
180 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
181 $UMULH $nhi,$nj,$m1
182 addze $hi1,$hi1
183 $ST $lo1,0($tp) ; tp[j-1]
184
185 addi $j,$j,$BNSZ ; j++
186 addi $tp,$tp,$BNSZ ; tp++
187 bdnz- L1st
188;L1st
189 addc $lo0,$alo,$hi0
190 addze $hi0,$ahi
191
192 addc $lo1,$nlo,$hi1
193 addze $hi1,$nhi
194 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
195 addze $hi1,$hi1
196 $ST $lo1,0($tp) ; tp[j-1]
197
198 li $ovf,0
199 addc $hi1,$hi1,$hi0
200 addze $ovf,$ovf ; upmost overflow bit
201 $ST $hi1,$BNSZ($tp)
202
203 li $i,$BNSZ
204.align 4
205Louter:
206 $LDX $m0,$bp,$i ; m0=bp[i]
207 $LD $aj,0($ap) ; ap[0]
208 addi $tp,$sp,$FRAME
209 $LD $tj,$FRAME($sp) ; tp[0]
210 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
211 $UMULH $hi0,$aj,$m0
212 $LD $aj,$BNSZ($ap) ; ap[1]
213 $LD $nj,0($np) ; np[0]
214 addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
215 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
216 addze $hi0,$hi0
217 $UMULL $m1,$lo0,$n0 ; tp[0]*n0
218 $UMULH $ahi,$aj,$m0
219 $UMULL $lo1,$nj,$m1 ; np[0]*m1
220 $UMULH $hi1,$nj,$m1
221 $LD $nj,$BNSZ($np) ; np[1]
222 addc $lo1,$lo1,$lo0
223 $UMULL $nlo,$nj,$m1 ; np[1]*m1
224 addze $hi1,$hi1
225 $UMULH $nhi,$nj,$m1
226
227 mtctr $num
228 li $j,`2*$BNSZ`
229.align 4
230Linner:
231 $LDX $aj,$ap,$j ; ap[j]
232 addc $lo0,$alo,$hi0
233 $LD $tj,$BNSZ($tp) ; tp[j]
234 addze $hi0,$ahi
235 $LDX $nj,$np,$j ; np[j]
236 addc $lo1,$nlo,$hi1
237 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
238 addze $hi1,$nhi
239 $UMULH $ahi,$aj,$m0
240 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
241 $UMULL $nlo,$nj,$m1 ; np[j]*m1
242 addze $hi0,$hi0
243 $UMULH $nhi,$nj,$m1
244 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
245 addi $j,$j,$BNSZ ; j++
246 addze $hi1,$hi1
247 $ST $lo1,0($tp) ; tp[j-1]
248 addi $tp,$tp,$BNSZ ; tp++
249 bdnz- Linner
250;Linner
251 $LD $tj,$BNSZ($tp) ; tp[j]
252 addc $lo0,$alo,$hi0
253 addze $hi0,$ahi
254 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
255 addze $hi0,$hi0
256
257 addc $lo1,$nlo,$hi1
258 addze $hi1,$nhi
259 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
260 addze $hi1,$hi1
261 $ST $lo1,0($tp) ; tp[j-1]
262
263 addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
264 li $ovf,0
265 adde $hi1,$hi1,$hi0
266 addze $ovf,$ovf
267 $ST $hi1,$BNSZ($tp)
268;
269 slwi $tj,$num,`log($BNSZ)/log(2)`
270 $UCMP $i,$tj
271 addi $i,$i,$BNSZ
272 ble- Louter
273
274 addi $num,$num,2 ; restore $num
275 subfc $j,$j,$j ; j=0 and "clear" XER[CA]
276 addi $tp,$sp,$FRAME
277 mtctr $num
278
279.align 4
280Lsub: $LDX $tj,$tp,$j
281 $LDX $nj,$np,$j
282 subfe $aj,$nj,$tj ; tp[j]-np[j]
283 $STX $aj,$rp,$j
284 addi $j,$j,$BNSZ
285 bdnz- Lsub
286
287 li $j,0
288 mtctr $num
289 subfe $ovf,$j,$ovf ; handle upmost overflow bit
290 and $ap,$tp,$ovf
291 andc $np,$rp,$ovf
292 or $ap,$ap,$np ; ap=borrow?tp:rp
293
294.align 4
295Lcopy: ; copy or in-place refresh
296 $LDX $tj,$ap,$j
297 $STX $tj,$rp,$j
298 $STX $j,$tp,$j ; zap at once
299 addi $j,$j,$BNSZ
300 bdnz- Lcopy
301
302 $POP r14,`4*$SIZE_T`($sp)
303 $POP r15,`5*$SIZE_T`($sp)
304 $POP r16,`6*$SIZE_T`($sp)
305 $POP r17,`7*$SIZE_T`($sp)
306 $POP r18,`8*$SIZE_T`($sp)
307 $POP r19,`9*$SIZE_T`($sp)
308 $POP r20,`10*$SIZE_T`($sp)
309 $POP r21,`11*$SIZE_T`($sp)
310 $POP r22,`12*$SIZE_T`($sp)
311 $POP r23,`13*$SIZE_T`($sp)
312 $POP r24,`14*$SIZE_T`($sp)
313 $POP r25,`15*$SIZE_T`($sp)
314 $POP $sp,0($sp)
315 li r3,1
316 blr
317 .long 0
318.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
319___
320
321$code =~ s/\`([^\`]*)\`/eval $1/gem;
322print $code;
323close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
deleted file mode 100644
index 08e0053473..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ /dev/null
@@ -1,2078 +0,0 @@
1#!/usr/bin/env perl
2#
3# Implemented as a Perl wrapper as we want to support several different
4# architectures with single file. We pick up the target based on the
5# file name we are asked to generate.
6#
7# It should be noted though that this perl code is nothing like
8# <openssl>/crypto/perlasm/x86*. In this case perl is used pretty much
9# as pre-processor to cover for platform differences in name decoration,
10# linker tables, 32-/64-bit instruction sets...
11#
12# As you might know there're several PowerPC ABI in use. Most notably
13# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs
14# are similar enough to implement leaf(!) functions, which would be ABI
15# neutral. And that's what you find here: ABI neutral leaf functions.
16# In case you wonder what that is...
17#
18# AIX performance
19#
20# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e.
21#
22# The following is the performance of 32-bit compiler
23# generated code:
24#
25# OpenSSL 0.9.6c 21 dec 2001
26# built on: Tue Jun 11 11:06:51 EDT 2002
27# options:bn(64,32) ...
28#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3
29# sign verify sign/s verify/s
30#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6
31#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5
32#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1
33#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4
34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36#
37# Same bechmark with this assembler code:
38#
39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
41#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2
42#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7
43#dsa 512 bits 0.0052s 0.0062s 191.6 162.0
44#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5
45#
46# Number of operations increases by at almost 75%
47#
48# Here are performance numbers for 64-bit compiler
49# generated code:
50#
51# OpenSSL 0.9.6g [engine] 9 Aug 2002
52# built on: Fri Apr 18 16:59:20 EDT 2003
53# options:bn(64,64) ...
54# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3
55# sign verify sign/s verify/s
56#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4
57#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7
58#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0
59#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1
60#dsa 512 bits 0.0026s 0.0032s 382.5 313.7
61#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6
62#
63# Same benchmark with this assembler code:
64#
65#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7
66#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3
67#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5
68#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0
69#dsa 512 bits 0.0016s 0.0020s 610.7 507.1
70#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2
71#
72# Again, performance increases by at about 75%
73#
74# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
75# OpenSSL 0.9.7c 30 Sep 2003
76#
77# Original code.
78#
79#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5
80#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1
81#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4
82#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4
83#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5
84#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7
85#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6
86#
87# Same benchmark with this assembler code:
88#
89#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9
90#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6
91#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5
92#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6
93#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2
94#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2
95#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8
96#
97# Performance increase of ~60%
98#
99# If you have comments or suggestions to improve code send
100# me a note at schari@us.ibm.com
101#
102
103$opf = shift;
104
105if ($opf =~ /32\.s/) {
106 $BITS= 32;
107 $BNSZ= $BITS/8;
108 $ISA= "\"ppc\"";
109
110 $LD= "lwz"; # load
111 $LDU= "lwzu"; # load and update
112 $ST= "stw"; # store
113 $STU= "stwu"; # store and update
114 $UMULL= "mullw"; # unsigned multiply low
115 $UMULH= "mulhwu"; # unsigned multiply high
116 $UDIV= "divwu"; # unsigned divide
117 $UCMPI= "cmplwi"; # unsigned compare with immediate
118 $UCMP= "cmplw"; # unsigned compare
119 $CNTLZ= "cntlzw"; # count leading zeros
120 $SHL= "slw"; # shift left
121 $SHR= "srw"; # unsigned shift right
122 $SHRI= "srwi"; # unsigned shift right by immediate
123 $SHLI= "slwi"; # shift left by immediate
124 $CLRU= "clrlwi"; # clear upper bits
125 $INSR= "insrwi"; # insert right
126 $ROTL= "rotlwi"; # rotate left by immediate
127 $TR= "tw"; # conditional trap
128} elsif ($opf =~ /64\.s/) {
129 $BITS= 64;
130 $BNSZ= $BITS/8;
131 $ISA= "\"ppc64\"";
132
133 # same as above, but 64-bit mnemonics...
134 $LD= "ld"; # load
135 $LDU= "ldu"; # load and update
136 $ST= "std"; # store
137 $STU= "stdu"; # store and update
138 $UMULL= "mulld"; # unsigned multiply low
139 $UMULH= "mulhdu"; # unsigned multiply high
140 $UDIV= "divdu"; # unsigned divide
141 $UCMPI= "cmpldi"; # unsigned compare with immediate
142 $UCMP= "cmpld"; # unsigned compare
143 $CNTLZ= "cntlzd"; # count leading zeros
144 $SHL= "sld"; # shift left
145 $SHR= "srd"; # unsigned shift right
146 $SHRI= "srdi"; # unsigned shift right by immediate
147 $SHLI= "sldi"; # shift left by immediate
148 $CLRU= "clrldi"; # clear upper bits
149 $INSR= "insrdi"; # insert right
150 $ROTL= "rotldi"; # rotate left by immediate
151 $TR= "td"; # conditional trap
152} else { die "nonsense $opf"; }
153
154( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
155
156# function entry points from the AIX code
157#
158# There are other, more elegant, ways to handle this. We (IBM) chose
159# this approach as it plays well with scripts we run to 'namespace'
160# OpenSSL .i.e. we add a prefix to all the public symbols so we can
161# co-exist in the same process with other implementations of OpenSSL.
162# 'cleverer' ways of doing these substitutions tend to hide data we
163# need to be obvious.
164#
165my @items = ("bn_sqr_comba4",
166 "bn_sqr_comba8",
167 "bn_mul_comba4",
168 "bn_mul_comba8",
169 "bn_sub_words",
170 "bn_add_words",
171 "bn_div_words",
172 "bn_sqr_words",
173 "bn_mul_words",
174 "bn_mul_add_words");
175
176if ($opf =~ /linux/) { do_linux(); }
177elsif ($opf =~ /aix/) { do_aix(); }
178elsif ($opf =~ /osx/) { do_osx(); }
179else { do_bsd(); }
180
181sub do_linux {
182 $d=&data();
183
184 if ($BITS==64) {
185 foreach $t (@items) {
186 $d =~ s/\.$t:/\
187\t.section\t".opd","aw"\
188\t.align\t3\
189\t.globl\t$t\
190$t:\
191\t.quad\t.$t,.TOC.\@tocbase,0\
192\t.size\t$t,24\
193\t.previous\n\
194\t.type\t.$t,\@function\
195\t.globl\t.$t\
196.$t:/g;
197 }
198 }
199 else {
200 foreach $t (@items) {
201 $d=~s/\.$t/$t/g;
202 }
203 }
204 # hide internal labels to avoid pollution of name table...
205 $d=~s/Lppcasm_/.Lppcasm_/gm;
206 print $d;
207}
208
209sub do_aix {
210 # AIX assembler is smart enough to please the linker without
211 # making us do something special...
212 print &data();
213}
214
215# MacOSX 32 bit
216sub do_osx {
217 $d=&data();
218 # Change the bn symbol prefix from '.' to '_'
219 foreach $t (@items) {
220 $d=~s/\.$t/_$t/g;
221 }
222 # Change .machine to something OS X asm will accept
223 $d=~s/\.machine.*/.text/g;
224 $d=~s/\#/;/g; # change comment from '#' to ';'
225 print $d;
226}
227
228# BSD (Untested)
229sub do_bsd {
230 $d=&data();
231 foreach $t (@items) {
232 $d=~s/\.$t/_$t/g;
233 }
234 print $d;
235}
236
237sub data {
238 local($data)=<<EOF;
239#--------------------------------------------------------------------
240#
241#
242#
243#
244# File: ppc32.s
245#
246# Created by: Suresh Chari
247# IBM Thomas J. Watson Research Library
248# Hawthorne, NY
249#
250#
251# Description: Optimized assembly routines for OpenSSL crypto
252# on the 32 bitPowerPC platform.
253#
254#
255# Version History
256#
257# 2. Fixed bn_add,bn_sub and bn_div_words, added comments,
258# cleaned up code. Also made a single version which can
259# be used for both the AIX and Linux compilers. See NOTE
260# below.
261# 12/05/03 Suresh Chari
262# (with lots of help from) Andy Polyakov
263##
264# 1. Initial version 10/20/02 Suresh Chari
265#
266#
267# The following file works for the xlc,cc
268# and gcc compilers.
269#
270# NOTE: To get the file to link correctly with the gcc compiler
271# you have to change the names of the routines and remove
272# the first .(dot) character. This should automatically
273# be done in the build process.
274#
275# Hand optimized assembly code for the following routines
276#
277# bn_sqr_comba4
278# bn_sqr_comba8
279# bn_mul_comba4
280# bn_mul_comba8
281# bn_sub_words
282# bn_add_words
283# bn_div_words
284# bn_sqr_words
285# bn_mul_words
286# bn_mul_add_words
287#
288# NOTE: It is possible to optimize this code more for
289# specific PowerPC or Power architectures. On the Northstar
290# architecture the optimizations in this file do
291# NOT provide much improvement.
292#
293# If you have comments or suggestions to improve code send
294# me a note at schari\@us.ibm.com
295#
296#--------------------------------------------------------------------------
297#
298# Defines to be used in the assembly code.
299#
300.set r0,0 # we use it as storage for value of 0
301.set SP,1 # preserved
302.set RTOC,2 # preserved
303.set r3,3 # 1st argument/return value
304.set r4,4 # 2nd argument/volatile register
305.set r5,5 # 3rd argument/volatile register
306.set r6,6 # ...
307.set r7,7
308.set r8,8
309.set r9,9
310.set r10,10
311.set r11,11
312.set r12,12
313.set r13,13 # not used, nor any other "below" it...
314
315.set BO_IF_NOT,4
316.set BO_IF,12
317.set BO_dCTR_NZERO,16
318.set BO_dCTR_ZERO,18
319.set BO_ALWAYS,20
320.set CR0_LT,0;
321.set CR0_GT,1;
322.set CR0_EQ,2
323.set CR1_FX,4;
324.set CR1_FEX,5;
325.set CR1_VX,6
326.set LR,8
327
328# Declare function names to be global
329# NOTE: For gcc these names MUST be changed to remove
330# the first . i.e. for example change ".bn_sqr_comba4"
331# to "bn_sqr_comba4". This should be automatically done
332# in the build.
333
334 .globl .bn_sqr_comba4
335 .globl .bn_sqr_comba8
336 .globl .bn_mul_comba4
337 .globl .bn_mul_comba8
338 .globl .bn_sub_words
339 .globl .bn_add_words
340 .globl .bn_div_words
341 .globl .bn_sqr_words
342 .globl .bn_mul_words
343 .globl .bn_mul_add_words
344
345# .text section
346
347 .machine $ISA
348
349#
350# NOTE: The following label name should be changed to
351# "bn_sqr_comba4" i.e. remove the first dot
352# for the gcc compiler. This should be automatically
353# done in the build
354#
355
356.align 4
357.bn_sqr_comba4:
358#
359# Optimized version of bn_sqr_comba4.
360#
361# void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
362# r3 contains r
363# r4 contains a
364#
365# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
366#
367# r5,r6 are the two BN_ULONGs being multiplied.
368# r7,r8 are the results of the 32x32 giving 64 bit multiply.
369# r9,r10, r11 are the equivalents of c1,c2, c3.
370# Here's the assembly
371#
372#
373 xor r0,r0,r0 # set r0 = 0. Used in the addze
374 # instructions below
375
376 #sqr_add_c(a,0,c1,c2,c3)
377 $LD r5,`0*$BNSZ`(r4)
378 $UMULL r9,r5,r5
379 $UMULH r10,r5,r5 #in first iteration. No need
380 #to add since c1=c2=c3=0.
381 # Note c3(r11) is NOT set to 0
382 # but will be.
383
384 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
385 # sqr_add_c2(a,1,0,c2,c3,c1);
386 $LD r6,`1*$BNSZ`(r4)
387 $UMULL r7,r5,r6
388 $UMULH r8,r5,r6
389
390 addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8)
391 adde r8,r8,r8
392 addze r9,r0 # catch carry if any.
393 # r9= r0(=0) and carry
394
395 addc r10,r7,r10 # now add to temp result.
396 addze r11,r8 # r8 added to r11 which is 0
397 addze r9,r9
398
399 $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
400 #sqr_add_c(a,1,c3,c1,c2)
401 $UMULL r7,r6,r6
402 $UMULH r8,r6,r6
403 addc r11,r7,r11
404 adde r9,r8,r9
405 addze r10,r0
406 #sqr_add_c2(a,2,0,c3,c1,c2)
407 $LD r6,`2*$BNSZ`(r4)
408 $UMULL r7,r5,r6
409 $UMULH r8,r5,r6
410
411 addc r7,r7,r7
412 adde r8,r8,r8
413 addze r10,r10
414
415 addc r11,r7,r11
416 adde r9,r8,r9
417 addze r10,r10
418 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
419 #sqr_add_c2(a,3,0,c1,c2,c3);
420 $LD r6,`3*$BNSZ`(r4)
421 $UMULL r7,r5,r6
422 $UMULH r8,r5,r6
423 addc r7,r7,r7
424 adde r8,r8,r8
425 addze r11,r0
426
427 addc r9,r7,r9
428 adde r10,r8,r10
429 addze r11,r11
430 #sqr_add_c2(a,2,1,c1,c2,c3);
431 $LD r5,`1*$BNSZ`(r4)
432 $LD r6,`2*$BNSZ`(r4)
433 $UMULL r7,r5,r6
434 $UMULH r8,r5,r6
435
436 addc r7,r7,r7
437 adde r8,r8,r8
438 addze r11,r11
439 addc r9,r7,r9
440 adde r10,r8,r10
441 addze r11,r11
442 $ST r9,`3*$BNSZ`(r3) #r[3]=c1
443 #sqr_add_c(a,2,c2,c3,c1);
444 $UMULL r7,r6,r6
445 $UMULH r8,r6,r6
446 addc r10,r7,r10
447 adde r11,r8,r11
448 addze r9,r0
449 #sqr_add_c2(a,3,1,c2,c3,c1);
450 $LD r6,`3*$BNSZ`(r4)
451 $UMULL r7,r5,r6
452 $UMULH r8,r5,r6
453 addc r7,r7,r7
454 adde r8,r8,r8
455 addze r9,r9
456
457 addc r10,r7,r10
458 adde r11,r8,r11
459 addze r9,r9
460 $ST r10,`4*$BNSZ`(r3) #r[4]=c2
461 #sqr_add_c2(a,3,2,c3,c1,c2);
462 $LD r5,`2*$BNSZ`(r4)
463 $UMULL r7,r5,r6
464 $UMULH r8,r5,r6
465 addc r7,r7,r7
466 adde r8,r8,r8
467 addze r10,r0
468
469 addc r11,r7,r11
470 adde r9,r8,r9
471 addze r10,r10
472 $ST r11,`5*$BNSZ`(r3) #r[5] = c3
473 #sqr_add_c(a,3,c1,c2,c3);
474 $UMULL r7,r6,r6
475 $UMULH r8,r6,r6
476 addc r9,r7,r9
477 adde r10,r8,r10
478
479 $ST r9,`6*$BNSZ`(r3) #r[6]=c1
480 $ST r10,`7*$BNSZ`(r3) #r[7]=c2
481 bclr BO_ALWAYS,CR0_LT
482 .long 0x00000000
483
484#
485# NOTE: The following label name should be changed to
486# "bn_sqr_comba8" i.e. remove the first dot
487# for the gcc compiler. This should be automatically
488# done in the build
489#
490
491.align 4
492.bn_sqr_comba8:
493#
494# This is an optimized version of the bn_sqr_comba8 routine.
495# Tightly uses the adde instruction
496#
497#
498# void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
499# r3 contains r
500# r4 contains a
501#
502# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
503#
504# r5,r6 are the two BN_ULONGs being multiplied.
505# r7,r8 are the results of the 32x32 giving 64 bit multiply.
506# r9,r10, r11 are the equivalents of c1,c2, c3.
507#
508# Possible optimization of loading all 8 longs of a into registers
509# doesnt provide any speedup
510#
511
512 xor r0,r0,r0 #set r0 = 0.Used in addze
513 #instructions below.
514
515 #sqr_add_c(a,0,c1,c2,c3);
516 $LD r5,`0*$BNSZ`(r4)
517 $UMULL r9,r5,r5 #1st iteration: no carries.
518 $UMULH r10,r5,r5
519 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
520 #sqr_add_c2(a,1,0,c2,c3,c1);
521 $LD r6,`1*$BNSZ`(r4)
522 $UMULL r7,r5,r6
523 $UMULH r8,r5,r6
524
525 addc r10,r7,r10 #add the two register number
526 adde r11,r8,r0 # (r8,r7) to the three register
527 addze r9,r0 # number (r9,r11,r10).NOTE:r0=0
528
529 addc r10,r7,r10 #add the two register number
530 adde r11,r8,r11 # (r8,r7) to the three register
531 addze r9,r9 # number (r9,r11,r10).
532
533 $ST r10,`1*$BNSZ`(r3) # r[1]=c2
534
535 #sqr_add_c(a,1,c3,c1,c2);
536 $UMULL r7,r6,r6
537 $UMULH r8,r6,r6
538 addc r11,r7,r11
539 adde r9,r8,r9
540 addze r10,r0
541 #sqr_add_c2(a,2,0,c3,c1,c2);
542 $LD r6,`2*$BNSZ`(r4)
543 $UMULL r7,r5,r6
544 $UMULH r8,r5,r6
545
546 addc r11,r7,r11
547 adde r9,r8,r9
548 addze r10,r10
549
550 addc r11,r7,r11
551 adde r9,r8,r9
552 addze r10,r10
553
554 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
555 #sqr_add_c2(a,3,0,c1,c2,c3);
556 $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0].
557 $UMULL r7,r5,r6
558 $UMULH r8,r5,r6
559
560 addc r9,r7,r9
561 adde r10,r8,r10
562 addze r11,r0
563
564 addc r9,r7,r9
565 adde r10,r8,r10
566 addze r11,r11
567 #sqr_add_c2(a,2,1,c1,c2,c3);
568 $LD r5,`1*$BNSZ`(r4)
569 $LD r6,`2*$BNSZ`(r4)
570 $UMULL r7,r5,r6
571 $UMULH r8,r5,r6
572
573 addc r9,r7,r9
574 adde r10,r8,r10
575 addze r11,r11
576
577 addc r9,r7,r9
578 adde r10,r8,r10
579 addze r11,r11
580
581 $ST r9,`3*$BNSZ`(r3) #r[3]=c1;
582 #sqr_add_c(a,2,c2,c3,c1);
583 $UMULL r7,r6,r6
584 $UMULH r8,r6,r6
585
586 addc r10,r7,r10
587 adde r11,r8,r11
588 addze r9,r0
589 #sqr_add_c2(a,3,1,c2,c3,c1);
590 $LD r6,`3*$BNSZ`(r4)
591 $UMULL r7,r5,r6
592 $UMULH r8,r5,r6
593
594 addc r10,r7,r10
595 adde r11,r8,r11
596 addze r9,r9
597
598 addc r10,r7,r10
599 adde r11,r8,r11
600 addze r9,r9
601 #sqr_add_c2(a,4,0,c2,c3,c1);
602 $LD r5,`0*$BNSZ`(r4)
603 $LD r6,`4*$BNSZ`(r4)
604 $UMULL r7,r5,r6
605 $UMULH r8,r5,r6
606
607 addc r10,r7,r10
608 adde r11,r8,r11
609 addze r9,r9
610
611 addc r10,r7,r10
612 adde r11,r8,r11
613 addze r9,r9
614 $ST r10,`4*$BNSZ`(r3) #r[4]=c2;
615 #sqr_add_c2(a,5,0,c3,c1,c2);
616 $LD r6,`5*$BNSZ`(r4)
617 $UMULL r7,r5,r6
618 $UMULH r8,r5,r6
619
620 addc r11,r7,r11
621 adde r9,r8,r9
622 addze r10,r0
623
624 addc r11,r7,r11
625 adde r9,r8,r9
626 addze r10,r10
627 #sqr_add_c2(a,4,1,c3,c1,c2);
628 $LD r5,`1*$BNSZ`(r4)
629 $LD r6,`4*$BNSZ`(r4)
630 $UMULL r7,r5,r6
631 $UMULH r8,r5,r6
632
633 addc r11,r7,r11
634 adde r9,r8,r9
635 addze r10,r10
636
637 addc r11,r7,r11
638 adde r9,r8,r9
639 addze r10,r10
640 #sqr_add_c2(a,3,2,c3,c1,c2);
641 $LD r5,`2*$BNSZ`(r4)
642 $LD r6,`3*$BNSZ`(r4)
643 $UMULL r7,r5,r6
644 $UMULH r8,r5,r6
645
646 addc r11,r7,r11
647 adde r9,r8,r9
648 addze r10,r10
649
650 addc r11,r7,r11
651 adde r9,r8,r9
652 addze r10,r10
653 $ST r11,`5*$BNSZ`(r3) #r[5]=c3;
654 #sqr_add_c(a,3,c1,c2,c3);
655 $UMULL r7,r6,r6
656 $UMULH r8,r6,r6
657 addc r9,r7,r9
658 adde r10,r8,r10
659 addze r11,r0
660 #sqr_add_c2(a,4,2,c1,c2,c3);
661 $LD r6,`4*$BNSZ`(r4)
662 $UMULL r7,r5,r6
663 $UMULH r8,r5,r6
664
665 addc r9,r7,r9
666 adde r10,r8,r10
667 addze r11,r11
668
669 addc r9,r7,r9
670 adde r10,r8,r10
671 addze r11,r11
672 #sqr_add_c2(a,5,1,c1,c2,c3);
673 $LD r5,`1*$BNSZ`(r4)
674 $LD r6,`5*$BNSZ`(r4)
675 $UMULL r7,r5,r6
676 $UMULH r8,r5,r6
677
678 addc r9,r7,r9
679 adde r10,r8,r10
680 addze r11,r11
681
682 addc r9,r7,r9
683 adde r10,r8,r10
684 addze r11,r11
685 #sqr_add_c2(a,6,0,c1,c2,c3);
686 $LD r5,`0*$BNSZ`(r4)
687 $LD r6,`6*$BNSZ`(r4)
688 $UMULL r7,r5,r6
689 $UMULH r8,r5,r6
690 addc r9,r7,r9
691 adde r10,r8,r10
692 addze r11,r11
693 addc r9,r7,r9
694 adde r10,r8,r10
695 addze r11,r11
696 $ST r9,`6*$BNSZ`(r3) #r[6]=c1;
697 #sqr_add_c2(a,7,0,c2,c3,c1);
698 $LD r6,`7*$BNSZ`(r4)
699 $UMULL r7,r5,r6
700 $UMULH r8,r5,r6
701
702 addc r10,r7,r10
703 adde r11,r8,r11
704 addze r9,r0
705 addc r10,r7,r10
706 adde r11,r8,r11
707 addze r9,r9
708 #sqr_add_c2(a,6,1,c2,c3,c1);
709 $LD r5,`1*$BNSZ`(r4)
710 $LD r6,`6*$BNSZ`(r4)
711 $UMULL r7,r5,r6
712 $UMULH r8,r5,r6
713
714 addc r10,r7,r10
715 adde r11,r8,r11
716 addze r9,r9
717 addc r10,r7,r10
718 adde r11,r8,r11
719 addze r9,r9
720 #sqr_add_c2(a,5,2,c2,c3,c1);
721 $LD r5,`2*$BNSZ`(r4)
722 $LD r6,`5*$BNSZ`(r4)
723 $UMULL r7,r5,r6
724 $UMULH r8,r5,r6
725 addc r10,r7,r10
726 adde r11,r8,r11
727 addze r9,r9
728 addc r10,r7,r10
729 adde r11,r8,r11
730 addze r9,r9
731 #sqr_add_c2(a,4,3,c2,c3,c1);
732 $LD r5,`3*$BNSZ`(r4)
733 $LD r6,`4*$BNSZ`(r4)
734 $UMULL r7,r5,r6
735 $UMULH r8,r5,r6
736
737 addc r10,r7,r10
738 adde r11,r8,r11
739 addze r9,r9
740 addc r10,r7,r10
741 adde r11,r8,r11
742 addze r9,r9
743 $ST r10,`7*$BNSZ`(r3) #r[7]=c2;
744 #sqr_add_c(a,4,c3,c1,c2);
745 $UMULL r7,r6,r6
746 $UMULH r8,r6,r6
747 addc r11,r7,r11
748 adde r9,r8,r9
749 addze r10,r0
750 #sqr_add_c2(a,5,3,c3,c1,c2);
751 $LD r6,`5*$BNSZ`(r4)
752 $UMULL r7,r5,r6
753 $UMULH r8,r5,r6
754 addc r11,r7,r11
755 adde r9,r8,r9
756 addze r10,r10
757 addc r11,r7,r11
758 adde r9,r8,r9
759 addze r10,r10
760 #sqr_add_c2(a,6,2,c3,c1,c2);
761 $LD r5,`2*$BNSZ`(r4)
762 $LD r6,`6*$BNSZ`(r4)
763 $UMULL r7,r5,r6
764 $UMULH r8,r5,r6
765 addc r11,r7,r11
766 adde r9,r8,r9
767 addze r10,r10
768
769 addc r11,r7,r11
770 adde r9,r8,r9
771 addze r10,r10
772 #sqr_add_c2(a,7,1,c3,c1,c2);
773 $LD r5,`1*$BNSZ`(r4)
774 $LD r6,`7*$BNSZ`(r4)
775 $UMULL r7,r5,r6
776 $UMULH r8,r5,r6
777 addc r11,r7,r11
778 adde r9,r8,r9
779 addze r10,r10
780 addc r11,r7,r11
781 adde r9,r8,r9
782 addze r10,r10
783 $ST r11,`8*$BNSZ`(r3) #r[8]=c3;
784 #sqr_add_c2(a,7,2,c1,c2,c3);
785 $LD r5,`2*$BNSZ`(r4)
786 $UMULL r7,r5,r6
787 $UMULH r8,r5,r6
788
789 addc r9,r7,r9
790 adde r10,r8,r10
791 addze r11,r0
792 addc r9,r7,r9
793 adde r10,r8,r10
794 addze r11,r11
795 #sqr_add_c2(a,6,3,c1,c2,c3);
796 $LD r5,`3*$BNSZ`(r4)
797 $LD r6,`6*$BNSZ`(r4)
798 $UMULL r7,r5,r6
799 $UMULH r8,r5,r6
800 addc r9,r7,r9
801 adde r10,r8,r10
802 addze r11,r11
803 addc r9,r7,r9
804 adde r10,r8,r10
805 addze r11,r11
806 #sqr_add_c2(a,5,4,c1,c2,c3);
807 $LD r5,`4*$BNSZ`(r4)
808 $LD r6,`5*$BNSZ`(r4)
809 $UMULL r7,r5,r6
810 $UMULH r8,r5,r6
811 addc r9,r7,r9
812 adde r10,r8,r10
813 addze r11,r11
814 addc r9,r7,r9
815 adde r10,r8,r10
816 addze r11,r11
817 $ST r9,`9*$BNSZ`(r3) #r[9]=c1;
818 #sqr_add_c(a,5,c2,c3,c1);
819 $UMULL r7,r6,r6
820 $UMULH r8,r6,r6
821 addc r10,r7,r10
822 adde r11,r8,r11
823 addze r9,r0
824 #sqr_add_c2(a,6,4,c2,c3,c1);
825 $LD r6,`6*$BNSZ`(r4)
826 $UMULL r7,r5,r6
827 $UMULH r8,r5,r6
828 addc r10,r7,r10
829 adde r11,r8,r11
830 addze r9,r9
831 addc r10,r7,r10
832 adde r11,r8,r11
833 addze r9,r9
834 #sqr_add_c2(a,7,3,c2,c3,c1);
835 $LD r5,`3*$BNSZ`(r4)
836 $LD r6,`7*$BNSZ`(r4)
837 $UMULL r7,r5,r6
838 $UMULH r8,r5,r6
839 addc r10,r7,r10
840 adde r11,r8,r11
841 addze r9,r9
842 addc r10,r7,r10
843 adde r11,r8,r11
844 addze r9,r9
845 $ST r10,`10*$BNSZ`(r3) #r[10]=c2;
846 #sqr_add_c2(a,7,4,c3,c1,c2);
847 $LD r5,`4*$BNSZ`(r4)
848 $UMULL r7,r5,r6
849 $UMULH r8,r5,r6
850 addc r11,r7,r11
851 adde r9,r8,r9
852 addze r10,r0
853 addc r11,r7,r11
854 adde r9,r8,r9
855 addze r10,r10
856 #sqr_add_c2(a,6,5,c3,c1,c2);
857 $LD r5,`5*$BNSZ`(r4)
858 $LD r6,`6*$BNSZ`(r4)
859 $UMULL r7,r5,r6
860 $UMULH r8,r5,r6
861 addc r11,r7,r11
862 adde r9,r8,r9
863 addze r10,r10
864 addc r11,r7,r11
865 adde r9,r8,r9
866 addze r10,r10
867 $ST r11,`11*$BNSZ`(r3) #r[11]=c3;
868 #sqr_add_c(a,6,c1,c2,c3);
869 $UMULL r7,r6,r6
870 $UMULH r8,r6,r6
871 addc r9,r7,r9
872 adde r10,r8,r10
873 addze r11,r0
874 #sqr_add_c2(a,7,5,c1,c2,c3)
875 $LD r6,`7*$BNSZ`(r4)
876 $UMULL r7,r5,r6
877 $UMULH r8,r5,r6
878 addc r9,r7,r9
879 adde r10,r8,r10
880 addze r11,r11
881 addc r9,r7,r9
882 adde r10,r8,r10
883 addze r11,r11
884 $ST r9,`12*$BNSZ`(r3) #r[12]=c1;
885
886 #sqr_add_c2(a,7,6,c2,c3,c1)
887 $LD r5,`6*$BNSZ`(r4)
888 $UMULL r7,r5,r6
889 $UMULH r8,r5,r6
890 addc r10,r7,r10
891 adde r11,r8,r11
892 addze r9,r0
893 addc r10,r7,r10
894 adde r11,r8,r11
895 addze r9,r9
896 $ST r10,`13*$BNSZ`(r3) #r[13]=c2;
897 #sqr_add_c(a,7,c3,c1,c2);
898 $UMULL r7,r6,r6
899 $UMULH r8,r6,r6
900 addc r11,r7,r11
901 adde r9,r8,r9
902 $ST r11,`14*$BNSZ`(r3) #r[14]=c3;
903 $ST r9, `15*$BNSZ`(r3) #r[15]=c1;
904
905
906 bclr BO_ALWAYS,CR0_LT
907
908 .long 0x00000000
909
910#
911# NOTE: The following label name should be changed to
912# "bn_mul_comba4" i.e. remove the first dot
913# for the gcc compiler. This should be automatically
914# done in the build
915#
916
917.align 4
918.bn_mul_comba4:
919#
920# This is an optimized version of the bn_mul_comba4 routine.
921#
922# void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
923# r3 contains r
924# r4 contains a
925# r5 contains b
926# r6, r7 are the 2 BN_ULONGs being multiplied.
927# r8, r9 are the results of the 32x32 giving 64 multiply.
928# r10, r11, r12 are the equivalents of c1, c2, and c3.
929#
930 xor r0,r0,r0 #r0=0. Used in addze below.
931 #mul_add_c(a[0],b[0],c1,c2,c3);
932 $LD r6,`0*$BNSZ`(r4)
933 $LD r7,`0*$BNSZ`(r5)
934 $UMULL r10,r6,r7
935 $UMULH r11,r6,r7
936 $ST r10,`0*$BNSZ`(r3) #r[0]=c1
937 #mul_add_c(a[0],b[1],c2,c3,c1);
938 $LD r7,`1*$BNSZ`(r5)
939 $UMULL r8,r6,r7
940 $UMULH r9,r6,r7
941 addc r11,r8,r11
942 adde r12,r9,r0
943 addze r10,r0
944 #mul_add_c(a[1],b[0],c2,c3,c1);
945 $LD r6, `1*$BNSZ`(r4)
946 $LD r7, `0*$BNSZ`(r5)
947 $UMULL r8,r6,r7
948 $UMULH r9,r6,r7
949 addc r11,r8,r11
950 adde r12,r9,r12
951 addze r10,r10
952 $ST r11,`1*$BNSZ`(r3) #r[1]=c2
953 #mul_add_c(a[2],b[0],c3,c1,c2);
954 $LD r6,`2*$BNSZ`(r4)
955 $UMULL r8,r6,r7
956 $UMULH r9,r6,r7
957 addc r12,r8,r12
958 adde r10,r9,r10
959 addze r11,r0
960 #mul_add_c(a[1],b[1],c3,c1,c2);
961 $LD r6,`1*$BNSZ`(r4)
962 $LD r7,`1*$BNSZ`(r5)
963 $UMULL r8,r6,r7
964 $UMULH r9,r6,r7
965 addc r12,r8,r12
966 adde r10,r9,r10
967 addze r11,r11
968 #mul_add_c(a[0],b[2],c3,c1,c2);
969 $LD r6,`0*$BNSZ`(r4)
970 $LD r7,`2*$BNSZ`(r5)
971 $UMULL r8,r6,r7
972 $UMULH r9,r6,r7
973 addc r12,r8,r12
974 adde r10,r9,r10
975 addze r11,r11
976 $ST r12,`2*$BNSZ`(r3) #r[2]=c3
977 #mul_add_c(a[0],b[3],c1,c2,c3);
978 $LD r7,`3*$BNSZ`(r5)
979 $UMULL r8,r6,r7
980 $UMULH r9,r6,r7
981 addc r10,r8,r10
982 adde r11,r9,r11
983 addze r12,r0
984 #mul_add_c(a[1],b[2],c1,c2,c3);
985 $LD r6,`1*$BNSZ`(r4)
986 $LD r7,`2*$BNSZ`(r5)
987 $UMULL r8,r6,r7
988 $UMULH r9,r6,r7
989 addc r10,r8,r10
990 adde r11,r9,r11
991 addze r12,r12
992 #mul_add_c(a[2],b[1],c1,c2,c3);
993 $LD r6,`2*$BNSZ`(r4)
994 $LD r7,`1*$BNSZ`(r5)
995 $UMULL r8,r6,r7
996 $UMULH r9,r6,r7
997 addc r10,r8,r10
998 adde r11,r9,r11
999 addze r12,r12
1000 #mul_add_c(a[3],b[0],c1,c2,c3);
1001 $LD r6,`3*$BNSZ`(r4)
1002 $LD r7,`0*$BNSZ`(r5)
1003 $UMULL r8,r6,r7
1004 $UMULH r9,r6,r7
1005 addc r10,r8,r10
1006 adde r11,r9,r11
1007 addze r12,r12
1008 $ST r10,`3*$BNSZ`(r3) #r[3]=c1
1009 #mul_add_c(a[3],b[1],c2,c3,c1);
1010 $LD r7,`1*$BNSZ`(r5)
1011 $UMULL r8,r6,r7
1012 $UMULH r9,r6,r7
1013 addc r11,r8,r11
1014 adde r12,r9,r12
1015 addze r10,r0
1016 #mul_add_c(a[2],b[2],c2,c3,c1);
1017 $LD r6,`2*$BNSZ`(r4)
1018 $LD r7,`2*$BNSZ`(r5)
1019 $UMULL r8,r6,r7
1020 $UMULH r9,r6,r7
1021 addc r11,r8,r11
1022 adde r12,r9,r12
1023 addze r10,r10
1024 #mul_add_c(a[1],b[3],c2,c3,c1);
1025 $LD r6,`1*$BNSZ`(r4)
1026 $LD r7,`3*$BNSZ`(r5)
1027 $UMULL r8,r6,r7
1028 $UMULH r9,r6,r7
1029 addc r11,r8,r11
1030 adde r12,r9,r12
1031 addze r10,r10
1032 $ST r11,`4*$BNSZ`(r3) #r[4]=c2
1033 #mul_add_c(a[2],b[3],c3,c1,c2);
1034 $LD r6,`2*$BNSZ`(r4)
1035 $UMULL r8,r6,r7
1036 $UMULH r9,r6,r7
1037 addc r12,r8,r12
1038 adde r10,r9,r10
1039 addze r11,r0
1040 #mul_add_c(a[3],b[2],c3,c1,c2);
1041 $LD r6,`3*$BNSZ`(r4)
1042 $LD r7,`2*$BNSZ`(r4)
1043 $UMULL r8,r6,r7
1044 $UMULH r9,r6,r7
1045 addc r12,r8,r12
1046 adde r10,r9,r10
1047 addze r11,r11
1048 $ST r12,`5*$BNSZ`(r3) #r[5]=c3
1049 #mul_add_c(a[3],b[3],c1,c2,c3);
1050 $LD r7,`3*$BNSZ`(r5)
1051 $UMULL r8,r6,r7
1052 $UMULH r9,r6,r7
1053 addc r10,r8,r10
1054 adde r11,r9,r11
1055
1056 $ST r10,`6*$BNSZ`(r3) #r[6]=c1
1057 $ST r11,`7*$BNSZ`(r3) #r[7]=c2
1058 bclr BO_ALWAYS,CR0_LT
1059 .long 0x00000000
1060
1061#
1062# NOTE: The following label name should be changed to
1063# "bn_mul_comba8" i.e. remove the first dot
1064# for the gcc compiler. This should be automatically
1065# done in the build
1066#
1067
1068.align 4
1069.bn_mul_comba8:
1070#
1071# Optimized version of the bn_mul_comba8 routine.
1072#
1073# void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1074# r3 contains r
1075# r4 contains a
1076# r5 contains b
1077# r6, r7 are the 2 BN_ULONGs being multiplied.
1078# r8, r9 are the results of the 32x32 giving 64 multiply.
1079# r10, r11, r12 are the equivalents of c1, c2, and c3.
1080#
1081 xor r0,r0,r0 #r0=0. Used in addze below.
1082
1083 #mul_add_c(a[0],b[0],c1,c2,c3);
1084 $LD r6,`0*$BNSZ`(r4) #a[0]
1085 $LD r7,`0*$BNSZ`(r5) #b[0]
1086 $UMULL r10,r6,r7
1087 $UMULH r11,r6,r7
1088 $ST r10,`0*$BNSZ`(r3) #r[0]=c1;
1089 #mul_add_c(a[0],b[1],c2,c3,c1);
1090 $LD r7,`1*$BNSZ`(r5)
1091 $UMULL r8,r6,r7
1092 $UMULH r9,r6,r7
1093 addc r11,r11,r8
1094 addze r12,r9 # since we didnt set r12 to zero before.
1095 addze r10,r0
1096 #mul_add_c(a[1],b[0],c2,c3,c1);
1097 $LD r6,`1*$BNSZ`(r4)
1098 $LD r7,`0*$BNSZ`(r5)
1099 $UMULL r8,r6,r7
1100 $UMULH r9,r6,r7
1101 addc r11,r11,r8
1102 adde r12,r12,r9
1103 addze r10,r10
1104 $ST r11,`1*$BNSZ`(r3) #r[1]=c2;
1105 #mul_add_c(a[2],b[0],c3,c1,c2);
1106 $LD r6,`2*$BNSZ`(r4)
1107 $UMULL r8,r6,r7
1108 $UMULH r9,r6,r7
1109 addc r12,r12,r8
1110 adde r10,r10,r9
1111 addze r11,r0
1112 #mul_add_c(a[1],b[1],c3,c1,c2);
1113 $LD r6,`1*$BNSZ`(r4)
1114 $LD r7,`1*$BNSZ`(r5)
1115 $UMULL r8,r6,r7
1116 $UMULH r9,r6,r7
1117 addc r12,r12,r8
1118 adde r10,r10,r9
1119 addze r11,r11
1120 #mul_add_c(a[0],b[2],c3,c1,c2);
1121 $LD r6,`0*$BNSZ`(r4)
1122 $LD r7,`2*$BNSZ`(r5)
1123 $UMULL r8,r6,r7
1124 $UMULH r9,r6,r7
1125 addc r12,r12,r8
1126 adde r10,r10,r9
1127 addze r11,r11
1128 $ST r12,`2*$BNSZ`(r3) #r[2]=c3;
1129 #mul_add_c(a[0],b[3],c1,c2,c3);
1130 $LD r7,`3*$BNSZ`(r5)
1131 $UMULL r8,r6,r7
1132 $UMULH r9,r6,r7
1133 addc r10,r10,r8
1134 adde r11,r11,r9
1135 addze r12,r0
1136 #mul_add_c(a[1],b[2],c1,c2,c3);
1137 $LD r6,`1*$BNSZ`(r4)
1138 $LD r7,`2*$BNSZ`(r5)
1139 $UMULL r8,r6,r7
1140 $UMULH r9,r6,r7
1141 addc r10,r10,r8
1142 adde r11,r11,r9
1143 addze r12,r12
1144
1145 #mul_add_c(a[2],b[1],c1,c2,c3);
1146 $LD r6,`2*$BNSZ`(r4)
1147 $LD r7,`1*$BNSZ`(r5)
1148 $UMULL r8,r6,r7
1149 $UMULH r9,r6,r7
1150 addc r10,r10,r8
1151 adde r11,r11,r9
1152 addze r12,r12
1153 #mul_add_c(a[3],b[0],c1,c2,c3);
1154 $LD r6,`3*$BNSZ`(r4)
1155 $LD r7,`0*$BNSZ`(r5)
1156 $UMULL r8,r6,r7
1157 $UMULH r9,r6,r7
1158 addc r10,r10,r8
1159 adde r11,r11,r9
1160 addze r12,r12
1161 $ST r10,`3*$BNSZ`(r3) #r[3]=c1;
1162 #mul_add_c(a[4],b[0],c2,c3,c1);
1163 $LD r6,`4*$BNSZ`(r4)
1164 $UMULL r8,r6,r7
1165 $UMULH r9,r6,r7
1166 addc r11,r11,r8
1167 adde r12,r12,r9
1168 addze r10,r0
1169 #mul_add_c(a[3],b[1],c2,c3,c1);
1170 $LD r6,`3*$BNSZ`(r4)
1171 $LD r7,`1*$BNSZ`(r5)
1172 $UMULL r8,r6,r7
1173 $UMULH r9,r6,r7
1174 addc r11,r11,r8
1175 adde r12,r12,r9
1176 addze r10,r10
1177 #mul_add_c(a[2],b[2],c2,c3,c1);
1178 $LD r6,`2*$BNSZ`(r4)
1179 $LD r7,`2*$BNSZ`(r5)
1180 $UMULL r8,r6,r7
1181 $UMULH r9,r6,r7
1182 addc r11,r11,r8
1183 adde r12,r12,r9
1184 addze r10,r10
1185 #mul_add_c(a[1],b[3],c2,c3,c1);
1186 $LD r6,`1*$BNSZ`(r4)
1187 $LD r7,`3*$BNSZ`(r5)
1188 $UMULL r8,r6,r7
1189 $UMULH r9,r6,r7
1190 addc r11,r11,r8
1191 adde r12,r12,r9
1192 addze r10,r10
1193 #mul_add_c(a[0],b[4],c2,c3,c1);
1194 $LD r6,`0*$BNSZ`(r4)
1195 $LD r7,`4*$BNSZ`(r5)
1196 $UMULL r8,r6,r7
1197 $UMULH r9,r6,r7
1198 addc r11,r11,r8
1199 adde r12,r12,r9
1200 addze r10,r10
1201 $ST r11,`4*$BNSZ`(r3) #r[4]=c2;
1202 #mul_add_c(a[0],b[5],c3,c1,c2);
1203 $LD r7,`5*$BNSZ`(r5)
1204 $UMULL r8,r6,r7
1205 $UMULH r9,r6,r7
1206 addc r12,r12,r8
1207 adde r10,r10,r9
1208 addze r11,r0
1209 #mul_add_c(a[1],b[4],c3,c1,c2);
1210 $LD r6,`1*$BNSZ`(r4)
1211 $LD r7,`4*$BNSZ`(r5)
1212 $UMULL r8,r6,r7
1213 $UMULH r9,r6,r7
1214 addc r12,r12,r8
1215 adde r10,r10,r9
1216 addze r11,r11
1217 #mul_add_c(a[2],b[3],c3,c1,c2);
1218 $LD r6,`2*$BNSZ`(r4)
1219 $LD r7,`3*$BNSZ`(r5)
1220 $UMULL r8,r6,r7
1221 $UMULH r9,r6,r7
1222 addc r12,r12,r8
1223 adde r10,r10,r9
1224 addze r11,r11
1225 #mul_add_c(a[3],b[2],c3,c1,c2);
1226 $LD r6,`3*$BNSZ`(r4)
1227 $LD r7,`2*$BNSZ`(r5)
1228 $UMULL r8,r6,r7
1229 $UMULH r9,r6,r7
1230 addc r12,r12,r8
1231 adde r10,r10,r9
1232 addze r11,r11
1233 #mul_add_c(a[4],b[1],c3,c1,c2);
1234 $LD r6,`4*$BNSZ`(r4)
1235 $LD r7,`1*$BNSZ`(r5)
1236 $UMULL r8,r6,r7
1237 $UMULH r9,r6,r7
1238 addc r12,r12,r8
1239 adde r10,r10,r9
1240 addze r11,r11
1241 #mul_add_c(a[5],b[0],c3,c1,c2);
1242 $LD r6,`5*$BNSZ`(r4)
1243 $LD r7,`0*$BNSZ`(r5)
1244 $UMULL r8,r6,r7
1245 $UMULH r9,r6,r7
1246 addc r12,r12,r8
1247 adde r10,r10,r9
1248 addze r11,r11
1249 $ST r12,`5*$BNSZ`(r3) #r[5]=c3;
1250 #mul_add_c(a[6],b[0],c1,c2,c3);
1251 $LD r6,`6*$BNSZ`(r4)
1252 $UMULL r8,r6,r7
1253 $UMULH r9,r6,r7
1254 addc r10,r10,r8
1255 adde r11,r11,r9
1256 addze r12,r0
1257 #mul_add_c(a[5],b[1],c1,c2,c3);
1258 $LD r6,`5*$BNSZ`(r4)
1259 $LD r7,`1*$BNSZ`(r5)
1260 $UMULL r8,r6,r7
1261 $UMULH r9,r6,r7
1262 addc r10,r10,r8
1263 adde r11,r11,r9
1264 addze r12,r12
1265 #mul_add_c(a[4],b[2],c1,c2,c3);
1266 $LD r6,`4*$BNSZ`(r4)
1267 $LD r7,`2*$BNSZ`(r5)
1268 $UMULL r8,r6,r7
1269 $UMULH r9,r6,r7
1270 addc r10,r10,r8
1271 adde r11,r11,r9
1272 addze r12,r12
1273 #mul_add_c(a[3],b[3],c1,c2,c3);
1274 $LD r6,`3*$BNSZ`(r4)
1275 $LD r7,`3*$BNSZ`(r5)
1276 $UMULL r8,r6,r7
1277 $UMULH r9,r6,r7
1278 addc r10,r10,r8
1279 adde r11,r11,r9
1280 addze r12,r12
1281 #mul_add_c(a[2],b[4],c1,c2,c3);
1282 $LD r6,`2*$BNSZ`(r4)
1283 $LD r7,`4*$BNSZ`(r5)
1284 $UMULL r8,r6,r7
1285 $UMULH r9,r6,r7
1286 addc r10,r10,r8
1287 adde r11,r11,r9
1288 addze r12,r12
1289 #mul_add_c(a[1],b[5],c1,c2,c3);
1290 $LD r6,`1*$BNSZ`(r4)
1291 $LD r7,`5*$BNSZ`(r5)
1292 $UMULL r8,r6,r7
1293 $UMULH r9,r6,r7
1294 addc r10,r10,r8
1295 adde r11,r11,r9
1296 addze r12,r12
1297 #mul_add_c(a[0],b[6],c1,c2,c3);
1298 $LD r6,`0*$BNSZ`(r4)
1299 $LD r7,`6*$BNSZ`(r5)
1300 $UMULL r8,r6,r7
1301 $UMULH r9,r6,r7
1302 addc r10,r10,r8
1303 adde r11,r11,r9
1304 addze r12,r12
1305 $ST r10,`6*$BNSZ`(r3) #r[6]=c1;
1306 #mul_add_c(a[0],b[7],c2,c3,c1);
1307 $LD r7,`7*$BNSZ`(r5)
1308 $UMULL r8,r6,r7
1309 $UMULH r9,r6,r7
1310 addc r11,r11,r8
1311 adde r12,r12,r9
1312 addze r10,r0
1313 #mul_add_c(a[1],b[6],c2,c3,c1);
1314 $LD r6,`1*$BNSZ`(r4)
1315 $LD r7,`6*$BNSZ`(r5)
1316 $UMULL r8,r6,r7
1317 $UMULH r9,r6,r7
1318 addc r11,r11,r8
1319 adde r12,r12,r9
1320 addze r10,r10
1321 #mul_add_c(a[2],b[5],c2,c3,c1);
1322 $LD r6,`2*$BNSZ`(r4)
1323 $LD r7,`5*$BNSZ`(r5)
1324 $UMULL r8,r6,r7
1325 $UMULH r9,r6,r7
1326 addc r11,r11,r8
1327 adde r12,r12,r9
1328 addze r10,r10
1329 #mul_add_c(a[3],b[4],c2,c3,c1);
1330 $LD r6,`3*$BNSZ`(r4)
1331 $LD r7,`4*$BNSZ`(r5)
1332 $UMULL r8,r6,r7
1333 $UMULH r9,r6,r7
1334 addc r11,r11,r8
1335 adde r12,r12,r9
1336 addze r10,r10
1337 #mul_add_c(a[4],b[3],c2,c3,c1);
1338 $LD r6,`4*$BNSZ`(r4)
1339 $LD r7,`3*$BNSZ`(r5)
1340 $UMULL r8,r6,r7
1341 $UMULH r9,r6,r7
1342 addc r11,r11,r8
1343 adde r12,r12,r9
1344 addze r10,r10
1345 #mul_add_c(a[5],b[2],c2,c3,c1);
1346 $LD r6,`5*$BNSZ`(r4)
1347 $LD r7,`2*$BNSZ`(r5)
1348 $UMULL r8,r6,r7
1349 $UMULH r9,r6,r7
1350 addc r11,r11,r8
1351 adde r12,r12,r9
1352 addze r10,r10
1353 #mul_add_c(a[6],b[1],c2,c3,c1);
1354 $LD r6,`6*$BNSZ`(r4)
1355 $LD r7,`1*$BNSZ`(r5)
1356 $UMULL r8,r6,r7
1357 $UMULH r9,r6,r7
1358 addc r11,r11,r8
1359 adde r12,r12,r9
1360 addze r10,r10
1361 #mul_add_c(a[7],b[0],c2,c3,c1);
1362 $LD r6,`7*$BNSZ`(r4)
1363 $LD r7,`0*$BNSZ`(r5)
1364 $UMULL r8,r6,r7
1365 $UMULH r9,r6,r7
1366 addc r11,r11,r8
1367 adde r12,r12,r9
1368 addze r10,r10
1369 $ST r11,`7*$BNSZ`(r3) #r[7]=c2;
1370 #mul_add_c(a[7],b[1],c3,c1,c2);
1371 $LD r7,`1*$BNSZ`(r5)
1372 $UMULL r8,r6,r7
1373 $UMULH r9,r6,r7
1374 addc r12,r12,r8
1375 adde r10,r10,r9
1376 addze r11,r0
1377 #mul_add_c(a[6],b[2],c3,c1,c2);
1378 $LD r6,`6*$BNSZ`(r4)
1379 $LD r7,`2*$BNSZ`(r5)
1380 $UMULL r8,r6,r7
1381 $UMULH r9,r6,r7
1382 addc r12,r12,r8
1383 adde r10,r10,r9
1384 addze r11,r11
1385 #mul_add_c(a[5],b[3],c3,c1,c2);
1386 $LD r6,`5*$BNSZ`(r4)
1387 $LD r7,`3*$BNSZ`(r5)
1388 $UMULL r8,r6,r7
1389 $UMULH r9,r6,r7
1390 addc r12,r12,r8
1391 adde r10,r10,r9
1392 addze r11,r11
1393 #mul_add_c(a[4],b[4],c3,c1,c2);
1394 $LD r6,`4*$BNSZ`(r4)
1395 $LD r7,`4*$BNSZ`(r5)
1396 $UMULL r8,r6,r7
1397 $UMULH r9,r6,r7
1398 addc r12,r12,r8
1399 adde r10,r10,r9
1400 addze r11,r11
1401 #mul_add_c(a[3],b[5],c3,c1,c2);
1402 $LD r6,`3*$BNSZ`(r4)
1403 $LD r7,`5*$BNSZ`(r5)
1404 $UMULL r8,r6,r7
1405 $UMULH r9,r6,r7
1406 addc r12,r12,r8
1407 adde r10,r10,r9
1408 addze r11,r11
1409 #mul_add_c(a[2],b[6],c3,c1,c2);
1410 $LD r6,`2*$BNSZ`(r4)
1411 $LD r7,`6*$BNSZ`(r5)
1412 $UMULL r8,r6,r7
1413 $UMULH r9,r6,r7
1414 addc r12,r12,r8
1415 adde r10,r10,r9
1416 addze r11,r11
1417 #mul_add_c(a[1],b[7],c3,c1,c2);
1418 $LD r6,`1*$BNSZ`(r4)
1419 $LD r7,`7*$BNSZ`(r5)
1420 $UMULL r8,r6,r7
1421 $UMULH r9,r6,r7
1422 addc r12,r12,r8
1423 adde r10,r10,r9
1424 addze r11,r11
1425 $ST r12,`8*$BNSZ`(r3) #r[8]=c3;
1426 #mul_add_c(a[2],b[7],c1,c2,c3);
1427 $LD r6,`2*$BNSZ`(r4)
1428 $UMULL r8,r6,r7
1429 $UMULH r9,r6,r7
1430 addc r10,r10,r8
1431 adde r11,r11,r9
1432 addze r12,r0
1433 #mul_add_c(a[3],b[6],c1,c2,c3);
1434 $LD r6,`3*$BNSZ`(r4)
1435 $LD r7,`6*$BNSZ`(r5)
1436 $UMULL r8,r6,r7
1437 $UMULH r9,r6,r7
1438 addc r10,r10,r8
1439 adde r11,r11,r9
1440 addze r12,r12
1441 #mul_add_c(a[4],b[5],c1,c2,c3);
1442 $LD r6,`4*$BNSZ`(r4)
1443 $LD r7,`5*$BNSZ`(r5)
1444 $UMULL r8,r6,r7
1445 $UMULH r9,r6,r7
1446 addc r10,r10,r8
1447 adde r11,r11,r9
1448 addze r12,r12
1449 #mul_add_c(a[5],b[4],c1,c2,c3);
1450 $LD r6,`5*$BNSZ`(r4)
1451 $LD r7,`4*$BNSZ`(r5)
1452 $UMULL r8,r6,r7
1453 $UMULH r9,r6,r7
1454 addc r10,r10,r8
1455 adde r11,r11,r9
1456 addze r12,r12
1457 #mul_add_c(a[6],b[3],c1,c2,c3);
1458 $LD r6,`6*$BNSZ`(r4)
1459 $LD r7,`3*$BNSZ`(r5)
1460 $UMULL r8,r6,r7
1461 $UMULH r9,r6,r7
1462 addc r10,r10,r8
1463 adde r11,r11,r9
1464 addze r12,r12
1465 #mul_add_c(a[7],b[2],c1,c2,c3);
1466 $LD r6,`7*$BNSZ`(r4)
1467 $LD r7,`2*$BNSZ`(r5)
1468 $UMULL r8,r6,r7
1469 $UMULH r9,r6,r7
1470 addc r10,r10,r8
1471 adde r11,r11,r9
1472 addze r12,r12
1473 $ST r10,`9*$BNSZ`(r3) #r[9]=c1;
1474 #mul_add_c(a[7],b[3],c2,c3,c1);
1475 $LD r7,`3*$BNSZ`(r5)
1476 $UMULL r8,r6,r7
1477 $UMULH r9,r6,r7
1478 addc r11,r11,r8
1479 adde r12,r12,r9
1480 addze r10,r0
1481 #mul_add_c(a[6],b[4],c2,c3,c1);
1482 $LD r6,`6*$BNSZ`(r4)
1483 $LD r7,`4*$BNSZ`(r5)
1484 $UMULL r8,r6,r7
1485 $UMULH r9,r6,r7
1486 addc r11,r11,r8
1487 adde r12,r12,r9
1488 addze r10,r10
1489 #mul_add_c(a[5],b[5],c2,c3,c1);
1490 $LD r6,`5*$BNSZ`(r4)
1491 $LD r7,`5*$BNSZ`(r5)
1492 $UMULL r8,r6,r7
1493 $UMULH r9,r6,r7
1494 addc r11,r11,r8
1495 adde r12,r12,r9
1496 addze r10,r10
1497 #mul_add_c(a[4],b[6],c2,c3,c1);
1498 $LD r6,`4*$BNSZ`(r4)
1499 $LD r7,`6*$BNSZ`(r5)
1500 $UMULL r8,r6,r7
1501 $UMULH r9,r6,r7
1502 addc r11,r11,r8
1503 adde r12,r12,r9
1504 addze r10,r10
1505 #mul_add_c(a[3],b[7],c2,c3,c1);
1506 $LD r6,`3*$BNSZ`(r4)
1507 $LD r7,`7*$BNSZ`(r5)
1508 $UMULL r8,r6,r7
1509 $UMULH r9,r6,r7
1510 addc r11,r11,r8
1511 adde r12,r12,r9
1512 addze r10,r10
1513 $ST r11,`10*$BNSZ`(r3) #r[10]=c2;
1514 #mul_add_c(a[4],b[7],c3,c1,c2);
1515 $LD r6,`4*$BNSZ`(r4)
1516 $UMULL r8,r6,r7
1517 $UMULH r9,r6,r7
1518 addc r12,r12,r8
1519 adde r10,r10,r9
1520 addze r11,r0
1521 #mul_add_c(a[5],b[6],c3,c1,c2);
1522 $LD r6,`5*$BNSZ`(r4)
1523 $LD r7,`6*$BNSZ`(r5)
1524 $UMULL r8,r6,r7
1525 $UMULH r9,r6,r7
1526 addc r12,r12,r8
1527 adde r10,r10,r9
1528 addze r11,r11
1529 #mul_add_c(a[6],b[5],c3,c1,c2);
1530 $LD r6,`6*$BNSZ`(r4)
1531 $LD r7,`5*$BNSZ`(r5)
1532 $UMULL r8,r6,r7
1533 $UMULH r9,r6,r7
1534 addc r12,r12,r8
1535 adde r10,r10,r9
1536 addze r11,r11
1537 #mul_add_c(a[7],b[4],c3,c1,c2);
1538 $LD r6,`7*$BNSZ`(r4)
1539 $LD r7,`4*$BNSZ`(r5)
1540 $UMULL r8,r6,r7
1541 $UMULH r9,r6,r7
1542 addc r12,r12,r8
1543 adde r10,r10,r9
1544 addze r11,r11
1545 $ST r12,`11*$BNSZ`(r3) #r[11]=c3;
1546 #mul_add_c(a[7],b[5],c1,c2,c3);
1547 $LD r7,`5*$BNSZ`(r5)
1548 $UMULL r8,r6,r7
1549 $UMULH r9,r6,r7
1550 addc r10,r10,r8
1551 adde r11,r11,r9
1552 addze r12,r0
1553 #mul_add_c(a[6],b[6],c1,c2,c3);
1554 $LD r6,`6*$BNSZ`(r4)
1555 $LD r7,`6*$BNSZ`(r5)
1556 $UMULL r8,r6,r7
1557 $UMULH r9,r6,r7
1558 addc r10,r10,r8
1559 adde r11,r11,r9
1560 addze r12,r12
1561 #mul_add_c(a[5],b[7],c1,c2,c3);
1562 $LD r6,`5*$BNSZ`(r4)
1563 $LD r7,`7*$BNSZ`(r5)
1564 $UMULL r8,r6,r7
1565 $UMULH r9,r6,r7
1566 addc r10,r10,r8
1567 adde r11,r11,r9
1568 addze r12,r12
1569 $ST r10,`12*$BNSZ`(r3) #r[12]=c1;
1570 #mul_add_c(a[6],b[7],c2,c3,c1);
1571 $LD r6,`6*$BNSZ`(r4)
1572 $UMULL r8,r6,r7
1573 $UMULH r9,r6,r7
1574 addc r11,r11,r8
1575 adde r12,r12,r9
1576 addze r10,r0
1577 #mul_add_c(a[7],b[6],c2,c3,c1);
1578 $LD r6,`7*$BNSZ`(r4)
1579 $LD r7,`6*$BNSZ`(r5)
1580 $UMULL r8,r6,r7
1581 $UMULH r9,r6,r7
1582 addc r11,r11,r8
1583 adde r12,r12,r9
1584 addze r10,r10
1585 $ST r11,`13*$BNSZ`(r3) #r[13]=c2;
1586 #mul_add_c(a[7],b[7],c3,c1,c2);
1587 $LD r7,`7*$BNSZ`(r5)
1588 $UMULL r8,r6,r7
1589 $UMULH r9,r6,r7
1590 addc r12,r12,r8
1591 adde r10,r10,r9
1592 $ST r12,`14*$BNSZ`(r3) #r[14]=c3;
1593 $ST r10,`15*$BNSZ`(r3) #r[15]=c1;
1594 bclr BO_ALWAYS,CR0_LT
1595 .long 0x00000000
1596
1597#
1598# NOTE: The following label name should be changed to
1599# "bn_sub_words" i.e. remove the first dot
1600# for the gcc compiler. This should be automatically
1601# done in the build
1602#
1603#
1604.align 4
1605.bn_sub_words:
1606#
1607# Handcoded version of bn_sub_words
1608#
1609#BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1610#
1611# r3 = r
1612# r4 = a
1613# r5 = b
1614# r6 = n
1615#
1616# Note: No loop unrolling done since this is not a performance
1617# critical loop.
1618
1619 xor r0,r0,r0 #set r0 = 0
1620#
1621# check for r6 = 0 AND set carry bit.
1622#
1623 subfc. r7,r0,r6 # If r6 is 0 then result is 0.
1624 # if r6 > 0 then result !=0
1625 # In either case carry bit is set.
1626 bc BO_IF,CR0_EQ,Lppcasm_sub_adios
1627 addi r4,r4,-$BNSZ
1628 addi r3,r3,-$BNSZ
1629 addi r5,r5,-$BNSZ
1630 mtctr r6
1631Lppcasm_sub_mainloop:
1632 $LDU r7,$BNSZ(r4)
1633 $LDU r8,$BNSZ(r5)
1634 subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8)
1635 # if carry = 1 this is r7-r8. Else it
1636 # is r7-r8 -1 as we need.
1637 $STU r6,$BNSZ(r3)
1638 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
1639Lppcasm_sub_adios:
1640 subfze r3,r0 # if carry bit is set then r3 = 0 else -1
1641 andi. r3,r3,1 # keep only last bit.
1642 bclr BO_ALWAYS,CR0_LT
1643 .long 0x00000000
1644
1645
1646#
1647# NOTE: The following label name should be changed to
1648# "bn_add_words" i.e. remove the first dot
1649# for the gcc compiler. This should be automatically
1650# done in the build
1651#
1652
1653.align 4
1654.bn_add_words:
1655#
1656# Handcoded version of bn_add_words
1657#
1658#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1659#
1660# r3 = r
1661# r4 = a
1662# r5 = b
1663# r6 = n
1664#
1665# Note: No loop unrolling done since this is not a performance
1666# critical loop.
1667
1668 xor r0,r0,r0
1669#
1670# check for r6 = 0. Is this needed?
1671#
1672 addic. r6,r6,0 #test r6 and clear carry bit.
1673 bc BO_IF,CR0_EQ,Lppcasm_add_adios
1674 addi r4,r4,-$BNSZ
1675 addi r3,r3,-$BNSZ
1676 addi r5,r5,-$BNSZ
1677 mtctr r6
1678Lppcasm_add_mainloop:
1679 $LDU r7,$BNSZ(r4)
1680 $LDU r8,$BNSZ(r5)
1681 adde r8,r7,r8
1682 $STU r8,$BNSZ(r3)
1683 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
1684Lppcasm_add_adios:
1685 addze r3,r0 #return carry bit.
1686 bclr BO_ALWAYS,CR0_LT
1687 .long 0x00000000
1688
1689#
1690# NOTE: The following label name should be changed to
1691# "bn_div_words" i.e. remove the first dot
1692# for the gcc compiler. This should be automatically
1693# done in the build
1694#
1695
1696.align 4
1697.bn_div_words:
1698#
1699# This is a cleaned up version of code generated by
1700# the AIX compiler. The only optimization is to use
1701# the PPC instruction to count leading zeros instead
1702# of call to num_bits_word. Since this was compiled
1703# only at level -O2 we can possibly squeeze it more?
1704#
1705# r3 = h
1706# r4 = l
1707# r5 = d
1708
1709 $UCMPI 0,r5,0 # compare r5 and 0
1710 bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0
1711 li r3,-1 # d=0 return -1
1712 bclr BO_ALWAYS,CR0_LT
1713Lppcasm_div1:
1714 xor r0,r0,r0 #r0=0
1715 li r8,$BITS
1716 $CNTLZ. r7,r5 #r7 = num leading 0s in d.
1717 bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros
1718 subf r8,r7,r8 #r8 = BN_num_bits_word(d)
1719 $SHR. r9,r3,r8 #are there any bits above r8'th?
1720 $TR 16,r9,r0 #if there're, signal to dump core...
1721Lppcasm_div2:
1722 $UCMP 0,r3,r5 #h>=d?
1723 bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not
1724 subf r3,r5,r3 #h-=d ;
1725Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
1726 cmpi 0,0,r7,0 # is (i == 0)?
1727 bc BO_IF,CR0_EQ,Lppcasm_div4
1728 $SHL r3,r3,r7 # h = (h<< i)
1729 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
1730 $SHL r5,r5,r7 # d<<=i
1731 or r3,r3,r8 # h = (h<<i)|(l>>(BN_BITS2-i))
1732 $SHL r4,r4,r7 # l <<=i
1733Lppcasm_div4:
1734 $SHRI r9,r5,`$BITS/2` # r9 = dh
1735 # dl will be computed when needed
1736 # as it saves registers.
1737 li r6,2 #r6=2
1738 mtctr r6 #counter will be in count.
1739Lppcasm_divouterloop:
1740 $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4)
1741 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
1742 # compute here for innerloop.
1743 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
1744 bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not
1745
1746 li r8,-1
1747 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
1748 b Lppcasm_div6
1749Lppcasm_div5:
1750 $UDIV r8,r3,r9 #q = h/dh
1751Lppcasm_div6:
1752 $UMULL r12,r9,r8 #th = q*dh
1753 $CLRU r10,r5,`$BITS/2` #r10=dl
1754 $UMULL r6,r8,r10 #tl = q*dl
1755
1756Lppcasm_divinnerloop:
1757 subf r10,r12,r3 #t = h -th
1758 $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of...
1759 addic. r7,r7,0 #test if r7 == 0. used below.
1760 # now want to compute
1761 # r7 = (t<<BN_BITS4)|((l&BN_MASK2h)>>BN_BITS4)
1762 # the following 2 instructions do that
1763 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
1764 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
1765 $UCMP 1,r6,r7 # compare (tl <= r7)
1766 bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
1767 bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
1768 addi r8,r8,-1 #q--
1769 subf r12,r9,r12 #th -=dh
1770 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
1771 subf r6,r10,r6 #tl -=dl
1772 b Lppcasm_divinnerloop
1773Lppcasm_divinnerexit:
1774 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
1775 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
1776 $UCMP 1,r4,r11 # compare l and tl
1777 add r12,r12,r10 # th+=t
1778 bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
1779 addi r12,r12,1 # th++
1780Lppcasm_div7:
1781 subf r11,r11,r4 #r11=l-tl
1782 $UCMP 1,r3,r12 #compare h and th
1783 bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
1784 addi r8,r8,-1 # q--
1785 add r3,r5,r3 # h+=d
1786Lppcasm_div8:
1787 subf r12,r12,r3 #r12 = h-th
1788 $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<<BN_BITS4
1789 # want to compute
1790 # h = ((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2
1791 # the following 2 instructions will do this.
1792 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
1793 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
1794 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
1795 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
1796 b Lppcasm_divouterloop
1797Lppcasm_div9:
1798 or r3,r8,r0
1799 bclr BO_ALWAYS,CR0_LT
1800 .long 0x00000000
1801
1802#
1803# NOTE: The following label name should be changed to
1804# "bn_sqr_words" i.e. remove the first dot
1805# for the gcc compiler. This should be automatically
1806# done in the build
1807#
1808.align 4
1809.bn_sqr_words:
1810#
1811# Optimized version of bn_sqr_words
1812#
1813# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
1814#
1815# r3 = r
1816# r4 = a
1817# r5 = n
1818#
1819# r6 = a[i].
1820# r7,r8 = product.
1821#
1822# No unrolling done here. Not performance critical.
1823
1824 addic. r5,r5,0 #test r5.
1825 bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
1826 addi r4,r4,-$BNSZ
1827 addi r3,r3,-$BNSZ
1828 mtctr r5
1829Lppcasm_sqr_mainloop:
1830 #sqr(r[0],r[1],a[0]);
1831 $LDU r6,$BNSZ(r4)
1832 $UMULL r7,r6,r6
1833 $UMULH r8,r6,r6
1834 $STU r7,$BNSZ(r3)
1835 $STU r8,$BNSZ(r3)
1836 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
1837Lppcasm_sqr_adios:
1838 bclr BO_ALWAYS,CR0_LT
1839 .long 0x00000000
1840
1841
1842#
1843# NOTE: The following label name should be changed to
1844# "bn_mul_words" i.e. remove the first dot
1845# for the gcc compiler. This should be automatically
1846# done in the build
1847#
1848
1849.align 4
1850.bn_mul_words:
1851#
1852# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1853#
1854# r3 = rp
1855# r4 = ap
1856# r5 = num
1857# r6 = w
1858 xor r0,r0,r0
1859 xor r12,r12,r12 # used for carry
1860 rlwinm. r7,r5,30,2,31 # num >> 2
1861 bc BO_IF,CR0_EQ,Lppcasm_mw_REM
1862 mtctr r7
1863Lppcasm_mw_LOOP:
1864 #mul(rp[0],ap[0],w,c1);
1865 $LD r8,`0*$BNSZ`(r4)
1866 $UMULL r9,r6,r8
1867 $UMULH r10,r6,r8
1868 addc r9,r9,r12
1869 #addze r10,r10 #carry is NOT ignored.
1870 #will be taken care of
1871 #in second spin below
1872 #using adde.
1873 $ST r9,`0*$BNSZ`(r3)
1874 #mul(rp[1],ap[1],w,c1);
1875 $LD r8,`1*$BNSZ`(r4)
1876 $UMULL r11,r6,r8
1877 $UMULH r12,r6,r8
1878 adde r11,r11,r10
1879 #addze r12,r12
1880 $ST r11,`1*$BNSZ`(r3)
1881 #mul(rp[2],ap[2],w,c1);
1882 $LD r8,`2*$BNSZ`(r4)
1883 $UMULL r9,r6,r8
1884 $UMULH r10,r6,r8
1885 adde r9,r9,r12
1886 #addze r10,r10
1887 $ST r9,`2*$BNSZ`(r3)
1888 #mul_add(rp[3],ap[3],w,c1);
1889 $LD r8,`3*$BNSZ`(r4)
1890 $UMULL r11,r6,r8
1891 $UMULH r12,r6,r8
1892 adde r11,r11,r10
1893 addze r12,r12 #this spin we collect carry into
1894 #r12
1895 $ST r11,`3*$BNSZ`(r3)
1896
1897 addi r3,r3,`4*$BNSZ`
1898 addi r4,r4,`4*$BNSZ`
1899 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
1900
1901Lppcasm_mw_REM:
1902 andi. r5,r5,0x3
1903 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1904 #mul(rp[0],ap[0],w,c1);
1905 $LD r8,`0*$BNSZ`(r4)
1906 $UMULL r9,r6,r8
1907 $UMULH r10,r6,r8
1908 addc r9,r9,r12
1909 addze r10,r10
1910 $ST r9,`0*$BNSZ`(r3)
1911 addi r12,r10,0
1912
1913 addi r5,r5,-1
1914 cmpli 0,0,r5,0
1915 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1916
1917
1918 #mul(rp[1],ap[1],w,c1);
1919 $LD r8,`1*$BNSZ`(r4)
1920 $UMULL r9,r6,r8
1921 $UMULH r10,r6,r8
1922 addc r9,r9,r12
1923 addze r10,r10
1924 $ST r9,`1*$BNSZ`(r3)
1925 addi r12,r10,0
1926
1927 addi r5,r5,-1
1928 cmpli 0,0,r5,0
1929 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1930
1931 #mul_add(rp[2],ap[2],w,c1);
1932 $LD r8,`2*$BNSZ`(r4)
1933 $UMULL r9,r6,r8
1934 $UMULH r10,r6,r8
1935 addc r9,r9,r12
1936 addze r10,r10
1937 $ST r9,`2*$BNSZ`(r3)
1938 addi r12,r10,0
1939
1940Lppcasm_mw_OVER:
1941 addi r3,r12,0
1942 bclr BO_ALWAYS,CR0_LT
1943 .long 0x00000000
1944
1945#
1946# NOTE: The following label name should be changed to
1947# "bn_mul_add_words" i.e. remove the first dot
1948# for the gcc compiler. This should be automatically
1949# done in the build
1950#
1951
1952.align 4
1953.bn_mul_add_words:
1954#
1955# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1956#
1957# r3 = rp
1958# r4 = ap
1959# r5 = num
1960# r6 = w
1961#
1962# empirical evidence suggests that unrolled version performs best!!
1963#
1964 xor r0,r0,r0 #r0 = 0
1965 xor r12,r12,r12 #r12 = 0 . used for carry
1966 rlwinm. r7,r5,30,2,31 # num >> 2
1967 bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
1968 mtctr r7
1969Lppcasm_maw_mainloop:
1970 #mul_add(rp[0],ap[0],w,c1);
1971 $LD r8,`0*$BNSZ`(r4)
1972 $LD r11,`0*$BNSZ`(r3)
1973 $UMULL r9,r6,r8
1974 $UMULH r10,r6,r8
1975 addc r9,r9,r12 #r12 is carry.
1976 addze r10,r10
1977 addc r9,r9,r11
1978 #addze r10,r10
1979 #the above instruction addze
1980 #is NOT needed. Carry will NOT
1981 #be ignored. It's not affected
1982 #by multiply and will be collected
1983 #in the next spin
1984 $ST r9,`0*$BNSZ`(r3)
1985
1986 #mul_add(rp[1],ap[1],w,c1);
1987 $LD r8,`1*$BNSZ`(r4)
1988 $LD r9,`1*$BNSZ`(r3)
1989 $UMULL r11,r6,r8
1990 $UMULH r12,r6,r8
1991 adde r11,r11,r10 #r10 is carry.
1992 addze r12,r12
1993 addc r11,r11,r9
1994 #addze r12,r12
1995 $ST r11,`1*$BNSZ`(r3)
1996
1997 #mul_add(rp[2],ap[2],w,c1);
1998 $LD r8,`2*$BNSZ`(r4)
1999 $UMULL r9,r6,r8
2000 $LD r11,`2*$BNSZ`(r3)
2001 $UMULH r10,r6,r8
2002 adde r9,r9,r12
2003 addze r10,r10
2004 addc r9,r9,r11
2005 #addze r10,r10
2006 $ST r9,`2*$BNSZ`(r3)
2007
2008 #mul_add(rp[3],ap[3],w,c1);
2009 $LD r8,`3*$BNSZ`(r4)
2010 $UMULL r11,r6,r8
2011 $LD r9,`3*$BNSZ`(r3)
2012 $UMULH r12,r6,r8
2013 adde r11,r11,r10
2014 addze r12,r12
2015 addc r11,r11,r9
2016 addze r12,r12
2017 $ST r11,`3*$BNSZ`(r3)
2018 addi r3,r3,`4*$BNSZ`
2019 addi r4,r4,`4*$BNSZ`
2020 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
2021
2022Lppcasm_maw_leftover:
2023 andi. r5,r5,0x3
2024 bc BO_IF,CR0_EQ,Lppcasm_maw_adios
2025 addi r3,r3,-$BNSZ
2026 addi r4,r4,-$BNSZ
2027 #mul_add(rp[0],ap[0],w,c1);
2028 mtctr r5
2029 $LDU r8,$BNSZ(r4)
2030 $UMULL r9,r6,r8
2031 $UMULH r10,r6,r8
2032 $LDU r11,$BNSZ(r3)
2033 addc r9,r9,r11
2034 addze r10,r10
2035 addc r9,r9,r12
2036 addze r12,r10
2037 $ST r9,0(r3)
2038
2039 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
2040 #mul_add(rp[1],ap[1],w,c1);
2041 $LDU r8,$BNSZ(r4)
2042 $UMULL r9,r6,r8
2043 $UMULH r10,r6,r8
2044 $LDU r11,$BNSZ(r3)
2045 addc r9,r9,r11
2046 addze r10,r10
2047 addc r9,r9,r12
2048 addze r12,r10
2049 $ST r9,0(r3)
2050
2051 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
2052 #mul_add(rp[2],ap[2],w,c1);
2053 $LDU r8,$BNSZ(r4)
2054 $UMULL r9,r6,r8
2055 $UMULH r10,r6,r8
2056 $LDU r11,$BNSZ(r3)
2057 addc r9,r9,r11
2058 addze r10,r10
2059 addc r9,r9,r12
2060 addze r12,r10
2061 $ST r9,0(r3)
2062
2063Lppcasm_maw_adios:
2064 addi r3,r12,0
2065 bclr BO_ALWAYS,CR0_LT
2066 .long 0x00000000
2067 .align 4
2068EOF
2069 $data =~ s/\`([^\`]*)\`/eval $1/gem;
2070
2071 # if some assembler chokes on some simplified mnemonic,
2072 # this is the spot to fix it up, e.g.:
2073 # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
2074 $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
2075 # assembler X doesn't accept li, load immediate value
2076 #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
2077 return($data);
2078}
diff --git a/src/lib/libcrypto/bn/asm/ppc64-mont.pl b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
deleted file mode 100644
index 3449b35855..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc64-mont.pl
+++ /dev/null
@@ -1,918 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2007
11
12# The reason for undertaken effort is basically following. Even though
13# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI
14# performance was observed to be less than impressive, essentially as
15# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope.
16# Well, it's not surprising that IBM had to make some sacrifices to
17# boost the clock frequency that much, but no overall improvement?
18# Having observed how much difference did switching to FPU make on
19# UltraSPARC, playing same stunt on Power 6 appeared appropriate...
20# Unfortunately the resulting performance improvement is not as
21# impressive, ~30%, and in absolute terms is still very far from what
22# one would expect from 4.7GHz CPU. There is a chance that I'm doing
23# something wrong, but in the lack of assembler level micro-profiling
24# data or at least decent platform guide I can't tell... Or better
25# results might be achieved with VMX... Anyway, this module provides
26# *worse* performance on other PowerPC implementations, ~40-15% slower
27# on PPC970 depending on key length and ~40% slower on Power 5 for all
28# key lengths. As it's obviously inappropriate as "best all-round"
29# alternative, it has to be complemented with run-time CPU family
30# detection. Oh! It should also be noted that unlike other PowerPC
31# implementation IALU ppc-mont.pl module performs *suboptimaly* on
32# >=1024-bit key lengths on Power 6. It should also be noted that
33# *everything* said so far applies to 64-bit builds! As far as 32-bit
34# application executed on 64-bit CPU goes, this module is likely to
35# become preferred choice, because it's easy to adapt it for such
36# case and *is* faster than 32-bit ppc-mont.pl on *all* processors.
37
38# February 2008
39
40# Micro-profiling assisted optimization results in ~15% improvement
41# over original ppc64-mont.pl version, or overall ~50% improvement
42# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same
43# Power 6 CPU, this module is 5-150% faster depending on key length,
44# [hereafter] more for longer keys. But if compared to ppc-mont.pl
45# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
46# in absolute terms, but it's apparently the way Power 6 is...
47
48$flavour = shift;
49
50if ($flavour =~ /32/) {
51 $SIZE_T=4;
52 $RZONE= 224;
53 $FRAME= $SIZE_T*12+8*12;
54 $fname= "bn_mul_mont_ppc64";
55
56 $STUX= "stwux"; # store indexed and update
57 $PUSH= "stw";
58 $POP= "lwz";
59 die "not implemented yet";
60} elsif ($flavour =~ /64/) {
61 $SIZE_T=8;
62 $RZONE= 288;
63 $FRAME= $SIZE_T*12+8*12;
64 $fname= "bn_mul_mont";
65
66 # same as above, but 64-bit mnemonics...
67 $STUX= "stdux"; # store indexed and update
68 $PUSH= "std";
69 $POP= "ld";
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$FRAME=($FRAME+63)&~63;
80$TRANSFER=16*8;
81
82$carry="r0";
83$sp="r1";
84$toc="r2";
85$rp="r3"; $ovf="r3";
86$ap="r4";
87$bp="r5";
88$np="r6";
89$n0="r7";
90$num="r8";
91$rp="r9"; # $rp is reassigned
92$tp="r10";
93$j="r11";
94$i="r12";
95# non-volatile registers
96$nap_d="r14"; # interleaved ap and np in double format
97$a0="r15"; # ap[0]
98$t0="r16"; # temporary registers
99$t1="r17";
100$t2="r18";
101$t3="r19";
102$t4="r20";
103$t5="r21";
104$t6="r22";
105$t7="r23";
106
107# PPC offers enough register bank capacity to unroll inner loops twice
108#
109# ..A3A2A1A0
110# dcba
111# -----------
112# A0a
113# A0b
114# A0c
115# A0d
116# A1a
117# A1b
118# A1c
119# A1d
120# A2a
121# A2b
122# A2c
123# A2d
124# A3a
125# A3b
126# A3c
127# A3d
128# ..a
129# ..b
130#
131$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
132$na="f4"; $nb="f5"; $nc="f6"; $nd="f7";
133$dota="f8"; $dotb="f9";
134$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13";
135$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17";
136$T0a="f18"; $T0b="f19";
137$T1a="f20"; $T1b="f21";
138$T2a="f22"; $T2b="f23";
139$T3a="f24"; $T3b="f25";
140
141# sp----------->+-------------------------------+
142# | saved sp |
143# +-------------------------------+
144# | |
145# +-------------------------------+
146# | 10 saved gpr, r14-r23 |
147# . .
148# . .
149# +12*size_t +-------------------------------+
150# | 12 saved fpr, f14-f25 |
151# . .
152# . .
153# +12*8 +-------------------------------+
154# | padding to 64 byte boundary |
155# . .
156# +X +-------------------------------+
157# | 16 gpr<->fpr transfer zone |
158# . .
159# . .
160# +16*8 +-------------------------------+
161# | __int64 tmp[-1] |
162# +-------------------------------+
163# | __int64 tmp[num] |
164# . .
165# . .
166# . .
167# +(num+1)*8 +-------------------------------+
168# | padding to 64 byte boundary |
169# . .
170# +X +-------------------------------+
171# | double nap_d[4*num] |
172# . .
173# . .
174# . .
175# +-------------------------------+
176
177$code=<<___;
178.machine "any"
179.text
180
181.globl .$fname
182.align 5
183.$fname:
184 cmpwi $num,4
185 mr $rp,r3 ; $rp is reassigned
186 li r3,0 ; possible "not handled" return code
187 bltlr-
188 andi. r0,$num,1 ; $num has to be even
189 bnelr-
190
191 slwi $num,$num,3 ; num*=8
192 li $i,-4096
193 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
194 add $tp,$tp,$num ; place for tp[num+1]
195 addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE`
196 subf $tp,$tp,$sp ; $sp-$tp
197 and $tp,$tp,$i ; minimize TLB usage
198 subf $tp,$sp,$tp ; $tp-$sp
199 $STUX $sp,$sp,$tp ; alloca
200
201 $PUSH r14,`2*$SIZE_T`($sp)
202 $PUSH r15,`3*$SIZE_T`($sp)
203 $PUSH r16,`4*$SIZE_T`($sp)
204 $PUSH r17,`5*$SIZE_T`($sp)
205 $PUSH r18,`6*$SIZE_T`($sp)
206 $PUSH r19,`7*$SIZE_T`($sp)
207 $PUSH r20,`8*$SIZE_T`($sp)
208 $PUSH r21,`9*$SIZE_T`($sp)
209 $PUSH r22,`10*$SIZE_T`($sp)
210 $PUSH r23,`11*$SIZE_T`($sp)
211 stfd f14,`12*$SIZE_T+0`($sp)
212 stfd f15,`12*$SIZE_T+8`($sp)
213 stfd f16,`12*$SIZE_T+16`($sp)
214 stfd f17,`12*$SIZE_T+24`($sp)
215 stfd f18,`12*$SIZE_T+32`($sp)
216 stfd f19,`12*$SIZE_T+40`($sp)
217 stfd f20,`12*$SIZE_T+48`($sp)
218 stfd f21,`12*$SIZE_T+56`($sp)
219 stfd f22,`12*$SIZE_T+64`($sp)
220 stfd f23,`12*$SIZE_T+72`($sp)
221 stfd f24,`12*$SIZE_T+80`($sp)
222 stfd f25,`12*$SIZE_T+88`($sp)
223
224 ld $a0,0($ap) ; pull ap[0] value
225 ld $n0,0($n0) ; pull n0[0] value
226 ld $t3,0($bp) ; bp[0]
227
228 addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
229 li $i,-64
230 add $nap_d,$tp,$num
231 and $nap_d,$nap_d,$i ; align to 64 bytes
232
233 mulld $t7,$a0,$t3 ; ap[0]*bp[0]
234 ; nap_d is off by 1, because it's used with stfdu/lfdu
235 addi $nap_d,$nap_d,-8
236 srwi $j,$num,`3+1` ; counter register, num/2
237 mulld $t7,$t7,$n0 ; tp[0]*n0
238 addi $j,$j,-1
239 addi $tp,$sp,`$FRAME+$TRANSFER-8`
240 li $carry,0
241 mtctr $j
242
243 ; transfer bp[0] to FPU as 4x16-bit values
244 extrdi $t0,$t3,16,48
245 extrdi $t1,$t3,16,32
246 extrdi $t2,$t3,16,16
247 extrdi $t3,$t3,16,0
248 std $t0,`$FRAME+0`($sp)
249 std $t1,`$FRAME+8`($sp)
250 std $t2,`$FRAME+16`($sp)
251 std $t3,`$FRAME+24`($sp)
252 ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
253 extrdi $t4,$t7,16,48
254 extrdi $t5,$t7,16,32
255 extrdi $t6,$t7,16,16
256 extrdi $t7,$t7,16,0
257 std $t4,`$FRAME+32`($sp)
258 std $t5,`$FRAME+40`($sp)
259 std $t6,`$FRAME+48`($sp)
260 std $t7,`$FRAME+56`($sp)
261 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
262 lwz $t1,0($ap)
263 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
264 lwz $t3,8($ap)
265 lwz $t4,4($np) ; load n[j] as 32-bit word pair
266 lwz $t5,0($np)
267 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
268 lwz $t7,8($np)
269 lfd $ba,`$FRAME+0`($sp)
270 lfd $bb,`$FRAME+8`($sp)
271 lfd $bc,`$FRAME+16`($sp)
272 lfd $bd,`$FRAME+24`($sp)
273 lfd $na,`$FRAME+32`($sp)
274 lfd $nb,`$FRAME+40`($sp)
275 lfd $nc,`$FRAME+48`($sp)
276 lfd $nd,`$FRAME+56`($sp)
277 std $t0,`$FRAME+64`($sp)
278 std $t1,`$FRAME+72`($sp)
279 std $t2,`$FRAME+80`($sp)
280 std $t3,`$FRAME+88`($sp)
281 std $t4,`$FRAME+96`($sp)
282 std $t5,`$FRAME+104`($sp)
283 std $t6,`$FRAME+112`($sp)
284 std $t7,`$FRAME+120`($sp)
285 fcfid $ba,$ba
286 fcfid $bb,$bb
287 fcfid $bc,$bc
288 fcfid $bd,$bd
289 fcfid $na,$na
290 fcfid $nb,$nb
291 fcfid $nc,$nc
292 fcfid $nd,$nd
293
294 lfd $A0,`$FRAME+64`($sp)
295 lfd $A1,`$FRAME+72`($sp)
296 lfd $A2,`$FRAME+80`($sp)
297 lfd $A3,`$FRAME+88`($sp)
298 lfd $N0,`$FRAME+96`($sp)
299 lfd $N1,`$FRAME+104`($sp)
300 lfd $N2,`$FRAME+112`($sp)
301 lfd $N3,`$FRAME+120`($sp)
302 fcfid $A0,$A0
303 fcfid $A1,$A1
304 fcfid $A2,$A2
305 fcfid $A3,$A3
306 fcfid $N0,$N0
307 fcfid $N1,$N1
308 fcfid $N2,$N2
309 fcfid $N3,$N3
310 addi $ap,$ap,16
311 addi $np,$np,16
312
313 fmul $T1a,$A1,$ba
314 fmul $T1b,$A1,$bb
315 stfd $A0,8($nap_d) ; save a[j] in double format
316 stfd $A1,16($nap_d)
317 fmul $T2a,$A2,$ba
318 fmul $T2b,$A2,$bb
319 stfd $A2,24($nap_d) ; save a[j+1] in double format
320 stfd $A3,32($nap_d)
321 fmul $T3a,$A3,$ba
322 fmul $T3b,$A3,$bb
323 stfd $N0,40($nap_d) ; save n[j] in double format
324 stfd $N1,48($nap_d)
325 fmul $T0a,$A0,$ba
326 fmul $T0b,$A0,$bb
327 stfd $N2,56($nap_d) ; save n[j+1] in double format
328 stfdu $N3,64($nap_d)
329
330 fmadd $T1a,$A0,$bc,$T1a
331 fmadd $T1b,$A0,$bd,$T1b
332 fmadd $T2a,$A1,$bc,$T2a
333 fmadd $T2b,$A1,$bd,$T2b
334 fmadd $T3a,$A2,$bc,$T3a
335 fmadd $T3b,$A2,$bd,$T3b
336 fmul $dota,$A3,$bc
337 fmul $dotb,$A3,$bd
338
339 fmadd $T1a,$N1,$na,$T1a
340 fmadd $T1b,$N1,$nb,$T1b
341 fmadd $T2a,$N2,$na,$T2a
342 fmadd $T2b,$N2,$nb,$T2b
343 fmadd $T3a,$N3,$na,$T3a
344 fmadd $T3b,$N3,$nb,$T3b
345 fmadd $T0a,$N0,$na,$T0a
346 fmadd $T0b,$N0,$nb,$T0b
347
348 fmadd $T1a,$N0,$nc,$T1a
349 fmadd $T1b,$N0,$nd,$T1b
350 fmadd $T2a,$N1,$nc,$T2a
351 fmadd $T2b,$N1,$nd,$T2b
352 fmadd $T3a,$N2,$nc,$T3a
353 fmadd $T3b,$N2,$nd,$T3b
354 fmadd $dota,$N3,$nc,$dota
355 fmadd $dotb,$N3,$nd,$dotb
356
357 fctid $T0a,$T0a
358 fctid $T0b,$T0b
359 fctid $T1a,$T1a
360 fctid $T1b,$T1b
361 fctid $T2a,$T2a
362 fctid $T2b,$T2b
363 fctid $T3a,$T3a
364 fctid $T3b,$T3b
365
366 stfd $T0a,`$FRAME+0`($sp)
367 stfd $T0b,`$FRAME+8`($sp)
368 stfd $T1a,`$FRAME+16`($sp)
369 stfd $T1b,`$FRAME+24`($sp)
370 stfd $T2a,`$FRAME+32`($sp)
371 stfd $T2b,`$FRAME+40`($sp)
372 stfd $T3a,`$FRAME+48`($sp)
373 stfd $T3b,`$FRAME+56`($sp)
374
375.align 5
376L1st:
377 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
378 lwz $t1,0($ap)
379 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
380 lwz $t3,8($ap)
381 lwz $t4,4($np) ; load n[j] as 32-bit word pair
382 lwz $t5,0($np)
383 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
384 lwz $t7,8($np)
385 std $t0,`$FRAME+64`($sp)
386 std $t1,`$FRAME+72`($sp)
387 std $t2,`$FRAME+80`($sp)
388 std $t3,`$FRAME+88`($sp)
389 std $t4,`$FRAME+96`($sp)
390 std $t5,`$FRAME+104`($sp)
391 std $t6,`$FRAME+112`($sp)
392 std $t7,`$FRAME+120`($sp)
393 ld $t0,`$FRAME+0`($sp)
394 ld $t1,`$FRAME+8`($sp)
395 ld $t2,`$FRAME+16`($sp)
396 ld $t3,`$FRAME+24`($sp)
397 ld $t4,`$FRAME+32`($sp)
398 ld $t5,`$FRAME+40`($sp)
399 ld $t6,`$FRAME+48`($sp)
400 ld $t7,`$FRAME+56`($sp)
401 lfd $A0,`$FRAME+64`($sp)
402 lfd $A1,`$FRAME+72`($sp)
403 lfd $A2,`$FRAME+80`($sp)
404 lfd $A3,`$FRAME+88`($sp)
405 lfd $N0,`$FRAME+96`($sp)
406 lfd $N1,`$FRAME+104`($sp)
407 lfd $N2,`$FRAME+112`($sp)
408 lfd $N3,`$FRAME+120`($sp)
409 fcfid $A0,$A0
410 fcfid $A1,$A1
411 fcfid $A2,$A2
412 fcfid $A3,$A3
413 fcfid $N0,$N0
414 fcfid $N1,$N1
415 fcfid $N2,$N2
416 fcfid $N3,$N3
417 addi $ap,$ap,16
418 addi $np,$np,16
419
420 fmul $T1a,$A1,$ba
421 fmul $T1b,$A1,$bb
422 fmul $T2a,$A2,$ba
423 fmul $T2b,$A2,$bb
424 stfd $A0,8($nap_d) ; save a[j] in double format
425 stfd $A1,16($nap_d)
426 fmul $T3a,$A3,$ba
427 fmul $T3b,$A3,$bb
428 fmadd $T0a,$A0,$ba,$dota
429 fmadd $T0b,$A0,$bb,$dotb
430 stfd $A2,24($nap_d) ; save a[j+1] in double format
431 stfd $A3,32($nap_d)
432
433 fmadd $T1a,$A0,$bc,$T1a
434 fmadd $T1b,$A0,$bd,$T1b
435 fmadd $T2a,$A1,$bc,$T2a
436 fmadd $T2b,$A1,$bd,$T2b
437 stfd $N0,40($nap_d) ; save n[j] in double format
438 stfd $N1,48($nap_d)
439 fmadd $T3a,$A2,$bc,$T3a
440 fmadd $T3b,$A2,$bd,$T3b
441 add $t0,$t0,$carry ; can not overflow
442 fmul $dota,$A3,$bc
443 fmul $dotb,$A3,$bd
444 stfd $N2,56($nap_d) ; save n[j+1] in double format
445 stfdu $N3,64($nap_d)
446 srdi $carry,$t0,16
447 add $t1,$t1,$carry
448 srdi $carry,$t1,16
449
450 fmadd $T1a,$N1,$na,$T1a
451 fmadd $T1b,$N1,$nb,$T1b
452 insrdi $t0,$t1,16,32
453 fmadd $T2a,$N2,$na,$T2a
454 fmadd $T2b,$N2,$nb,$T2b
455 add $t2,$t2,$carry
456 fmadd $T3a,$N3,$na,$T3a
457 fmadd $T3b,$N3,$nb,$T3b
458 srdi $carry,$t2,16
459 fmadd $T0a,$N0,$na,$T0a
460 fmadd $T0b,$N0,$nb,$T0b
461 insrdi $t0,$t2,16,16
462 add $t3,$t3,$carry
463 srdi $carry,$t3,16
464
465 fmadd $T1a,$N0,$nc,$T1a
466 fmadd $T1b,$N0,$nd,$T1b
467 insrdi $t0,$t3,16,0 ; 0..63 bits
468 fmadd $T2a,$N1,$nc,$T2a
469 fmadd $T2b,$N1,$nd,$T2b
470 add $t4,$t4,$carry
471 fmadd $T3a,$N2,$nc,$T3a
472 fmadd $T3b,$N2,$nd,$T3b
473 srdi $carry,$t4,16
474 fmadd $dota,$N3,$nc,$dota
475 fmadd $dotb,$N3,$nd,$dotb
476 add $t5,$t5,$carry
477 srdi $carry,$t5,16
478 insrdi $t4,$t5,16,32
479
480 fctid $T0a,$T0a
481 fctid $T0b,$T0b
482 add $t6,$t6,$carry
483 fctid $T1a,$T1a
484 fctid $T1b,$T1b
485 srdi $carry,$t6,16
486 fctid $T2a,$T2a
487 fctid $T2b,$T2b
488 insrdi $t4,$t6,16,16
489 fctid $T3a,$T3a
490 fctid $T3b,$T3b
491 add $t7,$t7,$carry
492 insrdi $t4,$t7,16,0 ; 64..127 bits
493 srdi $carry,$t7,16 ; upper 33 bits
494
495 stfd $T0a,`$FRAME+0`($sp)
496 stfd $T0b,`$FRAME+8`($sp)
497 stfd $T1a,`$FRAME+16`($sp)
498 stfd $T1b,`$FRAME+24`($sp)
499 stfd $T2a,`$FRAME+32`($sp)
500 stfd $T2b,`$FRAME+40`($sp)
501 stfd $T3a,`$FRAME+48`($sp)
502 stfd $T3b,`$FRAME+56`($sp)
503 std $t0,8($tp) ; tp[j-1]
504 stdu $t4,16($tp) ; tp[j]
505 bdnz- L1st
506
507 fctid $dota,$dota
508 fctid $dotb,$dotb
509
510 ld $t0,`$FRAME+0`($sp)
511 ld $t1,`$FRAME+8`($sp)
512 ld $t2,`$FRAME+16`($sp)
513 ld $t3,`$FRAME+24`($sp)
514 ld $t4,`$FRAME+32`($sp)
515 ld $t5,`$FRAME+40`($sp)
516 ld $t6,`$FRAME+48`($sp)
517 ld $t7,`$FRAME+56`($sp)
518 stfd $dota,`$FRAME+64`($sp)
519 stfd $dotb,`$FRAME+72`($sp)
520
521 add $t0,$t0,$carry ; can not overflow
522 srdi $carry,$t0,16
523 add $t1,$t1,$carry
524 srdi $carry,$t1,16
525 insrdi $t0,$t1,16,32
526 add $t2,$t2,$carry
527 srdi $carry,$t2,16
528 insrdi $t0,$t2,16,16
529 add $t3,$t3,$carry
530 srdi $carry,$t3,16
531 insrdi $t0,$t3,16,0 ; 0..63 bits
532 add $t4,$t4,$carry
533 srdi $carry,$t4,16
534 add $t5,$t5,$carry
535 srdi $carry,$t5,16
536 insrdi $t4,$t5,16,32
537 add $t6,$t6,$carry
538 srdi $carry,$t6,16
539 insrdi $t4,$t6,16,16
540 add $t7,$t7,$carry
541 insrdi $t4,$t7,16,0 ; 64..127 bits
542 srdi $carry,$t7,16 ; upper 33 bits
543 ld $t6,`$FRAME+64`($sp)
544 ld $t7,`$FRAME+72`($sp)
545
546 std $t0,8($tp) ; tp[j-1]
547 stdu $t4,16($tp) ; tp[j]
548
549 add $t6,$t6,$carry ; can not overflow
550 srdi $carry,$t6,16
551 add $t7,$t7,$carry
552 insrdi $t6,$t7,48,0
553 srdi $ovf,$t7,48
554 std $t6,8($tp) ; tp[num-1]
555
556 slwi $t7,$num,2
557 subf $nap_d,$t7,$nap_d ; rewind pointer
558
559 li $i,8 ; i=1
560.align 5
561Louter:
562 ldx $t3,$bp,$i ; bp[i]
563 ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
564 mulld $t7,$a0,$t3 ; ap[0]*bp[i]
565
566 addi $tp,$sp,`$FRAME+$TRANSFER`
567 add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
568 li $carry,0
569 mulld $t7,$t7,$n0 ; tp[0]*n0
570 mtctr $j
571
572 ; transfer bp[i] to FPU as 4x16-bit values
573 extrdi $t0,$t3,16,48
574 extrdi $t1,$t3,16,32
575 extrdi $t2,$t3,16,16
576 extrdi $t3,$t3,16,0
577 std $t0,`$FRAME+0`($sp)
578 std $t1,`$FRAME+8`($sp)
579 std $t2,`$FRAME+16`($sp)
580 std $t3,`$FRAME+24`($sp)
581 ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
582 extrdi $t4,$t7,16,48
583 extrdi $t5,$t7,16,32
584 extrdi $t6,$t7,16,16
585 extrdi $t7,$t7,16,0
586 std $t4,`$FRAME+32`($sp)
587 std $t5,`$FRAME+40`($sp)
588 std $t6,`$FRAME+48`($sp)
589 std $t7,`$FRAME+56`($sp)
590
591 lfd $A0,8($nap_d) ; load a[j] in double format
592 lfd $A1,16($nap_d)
593 lfd $A2,24($nap_d) ; load a[j+1] in double format
594 lfd $A3,32($nap_d)
595 lfd $N0,40($nap_d) ; load n[j] in double format
596 lfd $N1,48($nap_d)
597 lfd $N2,56($nap_d) ; load n[j+1] in double format
598 lfdu $N3,64($nap_d)
599
600 lfd $ba,`$FRAME+0`($sp)
601 lfd $bb,`$FRAME+8`($sp)
602 lfd $bc,`$FRAME+16`($sp)
603 lfd $bd,`$FRAME+24`($sp)
604 lfd $na,`$FRAME+32`($sp)
605 lfd $nb,`$FRAME+40`($sp)
606 lfd $nc,`$FRAME+48`($sp)
607 lfd $nd,`$FRAME+56`($sp)
608
609 fcfid $ba,$ba
610 fcfid $bb,$bb
611 fcfid $bc,$bc
612 fcfid $bd,$bd
613 fcfid $na,$na
614 fcfid $nb,$nb
615 fcfid $nc,$nc
616 fcfid $nd,$nd
617
618 fmul $T1a,$A1,$ba
619 fmul $T1b,$A1,$bb
620 fmul $T2a,$A2,$ba
621 fmul $T2b,$A2,$bb
622 fmul $T3a,$A3,$ba
623 fmul $T3b,$A3,$bb
624 fmul $T0a,$A0,$ba
625 fmul $T0b,$A0,$bb
626
627 fmadd $T1a,$A0,$bc,$T1a
628 fmadd $T1b,$A0,$bd,$T1b
629 fmadd $T2a,$A1,$bc,$T2a
630 fmadd $T2b,$A1,$bd,$T2b
631 fmadd $T3a,$A2,$bc,$T3a
632 fmadd $T3b,$A2,$bd,$T3b
633 fmul $dota,$A3,$bc
634 fmul $dotb,$A3,$bd
635
636 fmadd $T1a,$N1,$na,$T1a
637 fmadd $T1b,$N1,$nb,$T1b
638 lfd $A0,8($nap_d) ; load a[j] in double format
639 lfd $A1,16($nap_d)
640 fmadd $T2a,$N2,$na,$T2a
641 fmadd $T2b,$N2,$nb,$T2b
642 lfd $A2,24($nap_d) ; load a[j+1] in double format
643 lfd $A3,32($nap_d)
644 fmadd $T3a,$N3,$na,$T3a
645 fmadd $T3b,$N3,$nb,$T3b
646 fmadd $T0a,$N0,$na,$T0a
647 fmadd $T0b,$N0,$nb,$T0b
648
649 fmadd $T1a,$N0,$nc,$T1a
650 fmadd $T1b,$N0,$nd,$T1b
651 fmadd $T2a,$N1,$nc,$T2a
652 fmadd $T2b,$N1,$nd,$T2b
653 fmadd $T3a,$N2,$nc,$T3a
654 fmadd $T3b,$N2,$nd,$T3b
655 fmadd $dota,$N3,$nc,$dota
656 fmadd $dotb,$N3,$nd,$dotb
657
658 fctid $T0a,$T0a
659 fctid $T0b,$T0b
660 fctid $T1a,$T1a
661 fctid $T1b,$T1b
662 fctid $T2a,$T2a
663 fctid $T2b,$T2b
664 fctid $T3a,$T3a
665 fctid $T3b,$T3b
666
667 stfd $T0a,`$FRAME+0`($sp)
668 stfd $T0b,`$FRAME+8`($sp)
669 stfd $T1a,`$FRAME+16`($sp)
670 stfd $T1b,`$FRAME+24`($sp)
671 stfd $T2a,`$FRAME+32`($sp)
672 stfd $T2b,`$FRAME+40`($sp)
673 stfd $T3a,`$FRAME+48`($sp)
674 stfd $T3b,`$FRAME+56`($sp)
675
676.align 5
677Linner:
678 fmul $T1a,$A1,$ba
679 fmul $T1b,$A1,$bb
680 fmul $T2a,$A2,$ba
681 fmul $T2b,$A2,$bb
682 lfd $N0,40($nap_d) ; load n[j] in double format
683 lfd $N1,48($nap_d)
684 fmul $T3a,$A3,$ba
685 fmul $T3b,$A3,$bb
686 fmadd $T0a,$A0,$ba,$dota
687 fmadd $T0b,$A0,$bb,$dotb
688 lfd $N2,56($nap_d) ; load n[j+1] in double format
689 lfdu $N3,64($nap_d)
690
691 fmadd $T1a,$A0,$bc,$T1a
692 fmadd $T1b,$A0,$bd,$T1b
693 fmadd $T2a,$A1,$bc,$T2a
694 fmadd $T2b,$A1,$bd,$T2b
695 lfd $A0,8($nap_d) ; load a[j] in double format
696 lfd $A1,16($nap_d)
697 fmadd $T3a,$A2,$bc,$T3a
698 fmadd $T3b,$A2,$bd,$T3b
699 fmul $dota,$A3,$bc
700 fmul $dotb,$A3,$bd
701 lfd $A2,24($nap_d) ; load a[j+1] in double format
702 lfd $A3,32($nap_d)
703
704 fmadd $T1a,$N1,$na,$T1a
705 fmadd $T1b,$N1,$nb,$T1b
706 ld $t0,`$FRAME+0`($sp)
707 ld $t1,`$FRAME+8`($sp)
708 fmadd $T2a,$N2,$na,$T2a
709 fmadd $T2b,$N2,$nb,$T2b
710 ld $t2,`$FRAME+16`($sp)
711 ld $t3,`$FRAME+24`($sp)
712 fmadd $T3a,$N3,$na,$T3a
713 fmadd $T3b,$N3,$nb,$T3b
714 add $t0,$t0,$carry ; can not overflow
715 ld $t4,`$FRAME+32`($sp)
716 ld $t5,`$FRAME+40`($sp)
717 fmadd $T0a,$N0,$na,$T0a
718 fmadd $T0b,$N0,$nb,$T0b
719 srdi $carry,$t0,16
720 add $t1,$t1,$carry
721 srdi $carry,$t1,16
722 ld $t6,`$FRAME+48`($sp)
723 ld $t7,`$FRAME+56`($sp)
724
725 fmadd $T1a,$N0,$nc,$T1a
726 fmadd $T1b,$N0,$nd,$T1b
727 insrdi $t0,$t1,16,32
728 ld $t1,8($tp) ; tp[j]
729 fmadd $T2a,$N1,$nc,$T2a
730 fmadd $T2b,$N1,$nd,$T2b
731 add $t2,$t2,$carry
732 fmadd $T3a,$N2,$nc,$T3a
733 fmadd $T3b,$N2,$nd,$T3b
734 srdi $carry,$t2,16
735 insrdi $t0,$t2,16,16
736 fmadd $dota,$N3,$nc,$dota
737 fmadd $dotb,$N3,$nd,$dotb
738 add $t3,$t3,$carry
739 ldu $t2,16($tp) ; tp[j+1]
740 srdi $carry,$t3,16
741 insrdi $t0,$t3,16,0 ; 0..63 bits
742 add $t4,$t4,$carry
743
744 fctid $T0a,$T0a
745 fctid $T0b,$T0b
746 srdi $carry,$t4,16
747 fctid $T1a,$T1a
748 fctid $T1b,$T1b
749 add $t5,$t5,$carry
750 fctid $T2a,$T2a
751 fctid $T2b,$T2b
752 srdi $carry,$t5,16
753 insrdi $t4,$t5,16,32
754 fctid $T3a,$T3a
755 fctid $T3b,$T3b
756 add $t6,$t6,$carry
757 srdi $carry,$t6,16
758 insrdi $t4,$t6,16,16
759
760 stfd $T0a,`$FRAME+0`($sp)
761 stfd $T0b,`$FRAME+8`($sp)
762 add $t7,$t7,$carry
763 addc $t3,$t0,$t1
764 stfd $T1a,`$FRAME+16`($sp)
765 stfd $T1b,`$FRAME+24`($sp)
766 insrdi $t4,$t7,16,0 ; 64..127 bits
767 srdi $carry,$t7,16 ; upper 33 bits
768 stfd $T2a,`$FRAME+32`($sp)
769 stfd $T2b,`$FRAME+40`($sp)
770 adde $t5,$t4,$t2
771 stfd $T3a,`$FRAME+48`($sp)
772 stfd $T3b,`$FRAME+56`($sp)
773 addze $carry,$carry
774 std $t3,-16($tp) ; tp[j-1]
775 std $t5,-8($tp) ; tp[j]
776 bdnz- Linner
777
778 fctid $dota,$dota
779 fctid $dotb,$dotb
780 ld $t0,`$FRAME+0`($sp)
781 ld $t1,`$FRAME+8`($sp)
782 ld $t2,`$FRAME+16`($sp)
783 ld $t3,`$FRAME+24`($sp)
784 ld $t4,`$FRAME+32`($sp)
785 ld $t5,`$FRAME+40`($sp)
786 ld $t6,`$FRAME+48`($sp)
787 ld $t7,`$FRAME+56`($sp)
788 stfd $dota,`$FRAME+64`($sp)
789 stfd $dotb,`$FRAME+72`($sp)
790
791 add $t0,$t0,$carry ; can not overflow
792 srdi $carry,$t0,16
793 add $t1,$t1,$carry
794 srdi $carry,$t1,16
795 insrdi $t0,$t1,16,32
796 add $t2,$t2,$carry
797 ld $t1,8($tp) ; tp[j]
798 srdi $carry,$t2,16
799 insrdi $t0,$t2,16,16
800 add $t3,$t3,$carry
801 ldu $t2,16($tp) ; tp[j+1]
802 srdi $carry,$t3,16
803 insrdi $t0,$t3,16,0 ; 0..63 bits
804 add $t4,$t4,$carry
805 srdi $carry,$t4,16
806 add $t5,$t5,$carry
807 srdi $carry,$t5,16
808 insrdi $t4,$t5,16,32
809 add $t6,$t6,$carry
810 srdi $carry,$t6,16
811 insrdi $t4,$t6,16,16
812 add $t7,$t7,$carry
813 insrdi $t4,$t7,16,0 ; 64..127 bits
814 srdi $carry,$t7,16 ; upper 33 bits
815 ld $t6,`$FRAME+64`($sp)
816 ld $t7,`$FRAME+72`($sp)
817
818 addc $t3,$t0,$t1
819 adde $t5,$t4,$t2
820 addze $carry,$carry
821
822 std $t3,-16($tp) ; tp[j-1]
823 std $t5,-8($tp) ; tp[j]
824
825 add $carry,$carry,$ovf ; comsume upmost overflow
826 add $t6,$t6,$carry ; can not overflow
827 srdi $carry,$t6,16
828 add $t7,$t7,$carry
829 insrdi $t6,$t7,48,0
830 srdi $ovf,$t7,48
831 std $t6,0($tp) ; tp[num-1]
832
833 slwi $t7,$num,2
834 addi $i,$i,8
835 subf $nap_d,$t7,$nap_d ; rewind pointer
836 cmpw $i,$num
837 blt- Louter
838
839 subf $np,$num,$np ; rewind np
840 addi $j,$j,1 ; restore counter
841 subfc $i,$i,$i ; j=0 and "clear" XER[CA]
842 addi $tp,$sp,`$FRAME+$TRANSFER+8`
843 addi $t4,$sp,`$FRAME+$TRANSFER+16`
844 addi $t5,$np,8
845 addi $t6,$rp,8
846 mtctr $j
847
848.align 4
849Lsub: ldx $t0,$tp,$i
850 ldx $t1,$np,$i
851 ldx $t2,$t4,$i
852 ldx $t3,$t5,$i
853 subfe $t0,$t1,$t0 ; tp[j]-np[j]
854 subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1]
855 stdx $t0,$rp,$i
856 stdx $t2,$t6,$i
857 addi $i,$i,16
858 bdnz- Lsub
859
860 li $i,0
861 subfe $ovf,$i,$ovf ; handle upmost overflow bit
862 and $ap,$tp,$ovf
863 andc $np,$rp,$ovf
864 or $ap,$ap,$np ; ap=borrow?tp:rp
865 addi $t7,$ap,8
866 mtctr $j
867
868.align 4
869Lcopy: ; copy or in-place refresh
870 ldx $t0,$ap,$i
871 ldx $t1,$t7,$i
872 std $i,8($nap_d) ; zap nap_d
873 std $i,16($nap_d)
874 std $i,24($nap_d)
875 std $i,32($nap_d)
876 std $i,40($nap_d)
877 std $i,48($nap_d)
878 std $i,56($nap_d)
879 stdu $i,64($nap_d)
880 stdx $t0,$rp,$i
881 stdx $t1,$t6,$i
882 stdx $i,$tp,$i ; zap tp at once
883 stdx $i,$t4,$i
884 addi $i,$i,16
885 bdnz- Lcopy
886
887 $POP r14,`2*$SIZE_T`($sp)
888 $POP r15,`3*$SIZE_T`($sp)
889 $POP r16,`4*$SIZE_T`($sp)
890 $POP r17,`5*$SIZE_T`($sp)
891 $POP r18,`6*$SIZE_T`($sp)
892 $POP r19,`7*$SIZE_T`($sp)
893 $POP r20,`8*$SIZE_T`($sp)
894 $POP r21,`9*$SIZE_T`($sp)
895 $POP r22,`10*$SIZE_T`($sp)
896 $POP r23,`11*$SIZE_T`($sp)
897 lfd f14,`12*$SIZE_T+0`($sp)
898 lfd f15,`12*$SIZE_T+8`($sp)
899 lfd f16,`12*$SIZE_T+16`($sp)
900 lfd f17,`12*$SIZE_T+24`($sp)
901 lfd f18,`12*$SIZE_T+32`($sp)
902 lfd f19,`12*$SIZE_T+40`($sp)
903 lfd f20,`12*$SIZE_T+48`($sp)
904 lfd f21,`12*$SIZE_T+56`($sp)
905 lfd f22,`12*$SIZE_T+64`($sp)
906 lfd f23,`12*$SIZE_T+72`($sp)
907 lfd f24,`12*$SIZE_T+80`($sp)
908 lfd f25,`12*$SIZE_T+88`($sp)
909 $POP $sp,0($sp)
910 li r3,1 ; signal "handled"
911 blr
912 .long 0
913.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>"
914___
915
916$code =~ s/\`([^\`]*)\`/eval $1/gem;
917print $code;
918close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x-mont.pl b/src/lib/libcrypto/bn/asm/s390x-mont.pl
deleted file mode 100644
index d23251033b..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x-mont.pl
+++ /dev/null
@@ -1,225 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2007.
11#
12# Performance improvement over vanilla C code varies from 85% to 45%
13# depending on key length and benchmark. Unfortunately in this context
14# these are not very impressive results [for code that utilizes "wide"
15# 64x64=128-bit multiplication, which is not commonly available to C
16# programmers], at least hand-coded bn_asm.c replacement is known to
17# provide 30-40% better results for longest keys. Well, on a second
18# thought it's not very surprising, because z-CPUs are single-issue
19# and _strictly_ in-order execution, while bn_mul_mont is more or less
20# dependent on CPU ability to pipe-line instructions and have several
21# of them "in-flight" at the same time. I mean while other methods,
22# for example Karatsuba, aim to minimize amount of multiplications at
23# the cost of other operations increase, bn_mul_mont aim to neatly
24# "overlap" multiplications and the other operations [and on most
25# platforms even minimize the amount of the other operations, in
26# particular references to memory]. But it's possible to improve this
27# module performance by implementing dedicated squaring code-path and
28# possibly by unrolling loops...
29
30# January 2009.
31#
32# Reschedule to minimize/avoid Address Generation Interlock hazard,
33# make inner loops counter-based.
34
35$mn0="%r0";
36$num="%r1";
37
38# int bn_mul_mont(
39$rp="%r2"; # BN_ULONG *rp,
40$ap="%r3"; # const BN_ULONG *ap,
41$bp="%r4"; # const BN_ULONG *bp,
42$np="%r5"; # const BN_ULONG *np,
43$n0="%r6"; # const BN_ULONG *n0,
44#$num="160(%r15)" # int num);
45
46$bi="%r2"; # zaps rp
47$j="%r7";
48
49$ahi="%r8";
50$alo="%r9";
51$nhi="%r10";
52$nlo="%r11";
53$AHI="%r12";
54$NHI="%r13";
55$count="%r14";
56$sp="%r15";
57
58$code.=<<___;
59.text
60.globl bn_mul_mont
61.type bn_mul_mont,\@function
62bn_mul_mont:
63 lgf $num,164($sp) # pull $num
64 sla $num,3 # $num to enumerate bytes
65 la $bp,0($num,$bp)
66
67 stg %r2,16($sp)
68
69 cghi $num,16 #
70 lghi %r2,0 #
71 blr %r14 # if($num<16) return 0;
72 cghi $num,128 #
73 bhr %r14 # if($num>128) return 0;
74
75 stmg %r3,%r15,24($sp)
76
77 lghi $rp,-160-8 # leave room for carry bit
78 lcgr $j,$num # -$num
79 lgr %r0,$sp
80 la $rp,0($rp,$sp)
81 la $sp,0($j,$rp) # alloca
82 stg %r0,0($sp) # back chain
83
84 sra $num,3 # restore $num
85 la $bp,0($j,$bp) # restore $bp
86 ahi $num,-1 # adjust $num for inner loop
87 lg $n0,0($n0) # pull n0
88
89 lg $bi,0($bp)
90 lg $alo,0($ap)
91 mlgr $ahi,$bi # ap[0]*bp[0]
92 lgr $AHI,$ahi
93
94 lgr $mn0,$alo # "tp[0]"*n0
95 msgr $mn0,$n0
96
97 lg $nlo,0($np) #
98 mlgr $nhi,$mn0 # np[0]*m1
99 algr $nlo,$alo # +="tp[0]"
100 lghi $NHI,0
101 alcgr $NHI,$nhi
102
103 la $j,8(%r0) # j=1
104 lr $count,$num
105
106.align 16
107.L1st:
108 lg $alo,0($j,$ap)
109 mlgr $ahi,$bi # ap[j]*bp[0]
110 algr $alo,$AHI
111 lghi $AHI,0
112 alcgr $AHI,$ahi
113
114 lg $nlo,0($j,$np)
115 mlgr $nhi,$mn0 # np[j]*m1
116 algr $nlo,$NHI
117 lghi $NHI,0
118 alcgr $nhi,$NHI # +="tp[j]"
119 algr $nlo,$alo
120 alcgr $NHI,$nhi
121
122 stg $nlo,160-8($j,$sp) # tp[j-1]=
123 la $j,8($j) # j++
124 brct $count,.L1st
125
126 algr $NHI,$AHI
127 lghi $AHI,0
128 alcgr $AHI,$AHI # upmost overflow bit
129 stg $NHI,160-8($j,$sp)
130 stg $AHI,160($j,$sp)
131 la $bp,8($bp) # bp++
132
133.Louter:
134 lg $bi,0($bp) # bp[i]
135 lg $alo,0($ap)
136 mlgr $ahi,$bi # ap[0]*bp[i]
137 alg $alo,160($sp) # +=tp[0]
138 lghi $AHI,0
139 alcgr $AHI,$ahi
140
141 lgr $mn0,$alo
142 msgr $mn0,$n0 # tp[0]*n0
143
144 lg $nlo,0($np) # np[0]
145 mlgr $nhi,$mn0 # np[0]*m1
146 algr $nlo,$alo # +="tp[0]"
147 lghi $NHI,0
148 alcgr $NHI,$nhi
149
150 la $j,8(%r0) # j=1
151 lr $count,$num
152
153.align 16
154.Linner:
155 lg $alo,0($j,$ap)
156 mlgr $ahi,$bi # ap[j]*bp[i]
157 algr $alo,$AHI
158 lghi $AHI,0
159 alcgr $ahi,$AHI
160 alg $alo,160($j,$sp)# +=tp[j]
161 alcgr $AHI,$ahi
162
163 lg $nlo,0($j,$np)
164 mlgr $nhi,$mn0 # np[j]*m1
165 algr $nlo,$NHI
166 lghi $NHI,0
167 alcgr $nhi,$NHI
168 algr $nlo,$alo # +="tp[j]"
169 alcgr $NHI,$nhi
170
171 stg $nlo,160-8($j,$sp) # tp[j-1]=
172 la $j,8($j) # j++
173 brct $count,.Linner
174
175 algr $NHI,$AHI
176 lghi $AHI,0
177 alcgr $AHI,$AHI
178 alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
179 lghi $ahi,0
180 alcgr $AHI,$ahi # new upmost overflow bit
181 stg $NHI,160-8($j,$sp)
182 stg $AHI,160($j,$sp)
183
184 la $bp,8($bp) # bp++
185 clg $bp,160+8+32($j,$sp) # compare to &bp[num]
186 jne .Louter
187
188 lg $rp,160+8+16($j,$sp) # reincarnate rp
189 la $ap,160($sp)
190 ahi $num,1 # restore $num, incidentally clears "borrow"
191
192 la $j,0(%r0)
193 lr $count,$num
194.Lsub: lg $alo,0($j,$ap)
195 slbg $alo,0($j,$np)
196 stg $alo,0($j,$rp)
197 la $j,8($j)
198 brct $count,.Lsub
199 lghi $ahi,0
200 slbgr $AHI,$ahi # handle upmost carry
201
202 ngr $ap,$AHI
203 lghi $np,-1
204 xgr $np,$AHI
205 ngr $np,$rp
206 ogr $ap,$np # ap=borrow?tp:rp
207
208 la $j,0(%r0)
209 lgr $count,$num
210.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
211 stg $j,160($j,$sp) # zap tp
212 stg $alo,0($j,$rp)
213 la $j,8($j)
214 brct $count,.Lcopy
215
216 la %r1,160+8+48($j,$sp)
217 lmg %r6,%r15,0(%r1)
218 lghi %r2,1 # signal "processed"
219 br %r14
220.size bn_mul_mont,.-bn_mul_mont
221.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
222___
223
224print $code;
225close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x.S b/src/lib/libcrypto/bn/asm/s390x.S
deleted file mode 100755
index 8f45f5d513..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x.S
+++ /dev/null
@@ -1,678 +0,0 @@
1.ident "s390x.S, version 1.0"
2// ====================================================================
3// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4// project.
5//
6// Rights for redistribution and usage in source and binary forms are
7// granted according to the OpenSSL license. Warranty of any kind is
8// disclaimed.
9// ====================================================================
10
11.text
12
13#define zero %r0
14
15// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
16.globl bn_mul_add_words
17.type bn_mul_add_words,@function
18.align 4
19bn_mul_add_words:
20 lghi zero,0 // zero = 0
21 la %r1,0(%r2) // put rp aside
22 lghi %r2,0 // i=0;
23 ltgfr %r4,%r4
24 bler %r14 // if (len<=0) return 0;
25
26 stmg %r6,%r10,48(%r15)
27 lghi %r8,0 // carry = 0
28 srag %r10,%r4,2 // cnt=len/4
29 jz .Loop1_madd
30
31.Loop4_madd:
32 lg %r7,0(%r2,%r3) // ap[i]
33 mlgr %r6,%r5 // *=w
34 algr %r7,%r8 // +=carry
35 alcgr %r6,zero
36 alg %r7,0(%r2,%r1) // +=rp[i]
37 alcgr %r6,zero
38 stg %r7,0(%r2,%r1) // rp[i]=
39
40 lg %r9,8(%r2,%r3)
41 mlgr %r8,%r5
42 algr %r9,%r6
43 alcgr %r8,zero
44 alg %r9,8(%r2,%r1)
45 alcgr %r8,zero
46 stg %r9,8(%r2,%r1)
47
48 lg %r7,16(%r2,%r3)
49 mlgr %r6,%r5
50 algr %r7,%r8
51 alcgr %r6,zero
52 alg %r7,16(%r2,%r1)
53 alcgr %r6,zero
54 stg %r7,16(%r2,%r1)
55
56 lg %r9,24(%r2,%r3)
57 mlgr %r8,%r5
58 algr %r9,%r6
59 alcgr %r8,zero
60 alg %r9,24(%r2,%r1)
61 alcgr %r8,zero
62 stg %r9,24(%r2,%r1)
63
64 la %r2,32(%r2) // i+=4
65 brct %r10,.Loop4_madd
66
67 lghi %r10,3
68 nr %r4,%r10 // cnt=len%4
69 jz .Lend_madd
70
71.Loop1_madd:
72 lg %r7,0(%r2,%r3) // ap[i]
73 mlgr %r6,%r5 // *=w
74 algr %r7,%r8 // +=carry
75 alcgr %r6,zero
76 alg %r7,0(%r2,%r1) // +=rp[i]
77 alcgr %r6,zero
78 stg %r7,0(%r2,%r1) // rp[i]=
79
80 lgr %r8,%r6
81 la %r2,8(%r2) // i++
82 brct %r4,.Loop1_madd
83
84.Lend_madd:
85 lgr %r2,%r8
86 lmg %r6,%r10,48(%r15)
87 br %r14
88.size bn_mul_add_words,.-bn_mul_add_words
89
90// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
91.globl bn_mul_words
92.type bn_mul_words,@function
93.align 4
94bn_mul_words:
95 lghi zero,0 // zero = 0
96 la %r1,0(%r2) // put rp aside
97 lghi %r2,0 // i=0;
98 ltgfr %r4,%r4
99 bler %r14 // if (len<=0) return 0;
100
101 stmg %r6,%r10,48(%r15)
102 lghi %r8,0 // carry = 0
103 srag %r10,%r4,2 // cnt=len/4
104 jz .Loop1_mul
105
106.Loop4_mul:
107 lg %r7,0(%r2,%r3) // ap[i]
108 mlgr %r6,%r5 // *=w
109 algr %r7,%r8 // +=carry
110 alcgr %r6,zero
111 stg %r7,0(%r2,%r1) // rp[i]=
112
113 lg %r9,8(%r2,%r3)
114 mlgr %r8,%r5
115 algr %r9,%r6
116 alcgr %r8,zero
117 stg %r9,8(%r2,%r1)
118
119 lg %r7,16(%r2,%r3)
120 mlgr %r6,%r5
121 algr %r7,%r8
122 alcgr %r6,zero
123 stg %r7,16(%r2,%r1)
124
125 lg %r9,24(%r2,%r3)
126 mlgr %r8,%r5
127 algr %r9,%r6
128 alcgr %r8,zero
129 stg %r9,24(%r2,%r1)
130
131 la %r2,32(%r2) // i+=4
132 brct %r10,.Loop4_mul
133
134 lghi %r10,3
135 nr %r4,%r10 // cnt=len%4
136 jz .Lend_mul
137
138.Loop1_mul:
139 lg %r7,0(%r2,%r3) // ap[i]
140 mlgr %r6,%r5 // *=w
141 algr %r7,%r8 // +=carry
142 alcgr %r6,zero
143 stg %r7,0(%r2,%r1) // rp[i]=
144
145 lgr %r8,%r6
146 la %r2,8(%r2) // i++
147 brct %r4,.Loop1_mul
148
149.Lend_mul:
150 lgr %r2,%r8
151 lmg %r6,%r10,48(%r15)
152 br %r14
153.size bn_mul_words,.-bn_mul_words
154
155// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
156.globl bn_sqr_words
157.type bn_sqr_words,@function
158.align 4
159bn_sqr_words:
160 ltgfr %r4,%r4
161 bler %r14
162
163 stmg %r6,%r7,48(%r15)
164 srag %r1,%r4,2 // cnt=len/4
165 jz .Loop1_sqr
166
167.Loop4_sqr:
168 lg %r7,0(%r3)
169 mlgr %r6,%r7
170 stg %r7,0(%r2)
171 stg %r6,8(%r2)
172
173 lg %r7,8(%r3)
174 mlgr %r6,%r7
175 stg %r7,16(%r2)
176 stg %r6,24(%r2)
177
178 lg %r7,16(%r3)
179 mlgr %r6,%r7
180 stg %r7,32(%r2)
181 stg %r6,40(%r2)
182
183 lg %r7,24(%r3)
184 mlgr %r6,%r7
185 stg %r7,48(%r2)
186 stg %r6,56(%r2)
187
188 la %r3,32(%r3)
189 la %r2,64(%r2)
190 brct %r1,.Loop4_sqr
191
192 lghi %r1,3
193 nr %r4,%r1 // cnt=len%4
194 jz .Lend_sqr
195
196.Loop1_sqr:
197 lg %r7,0(%r3)
198 mlgr %r6,%r7
199 stg %r7,0(%r2)
200 stg %r6,8(%r2)
201
202 la %r3,8(%r3)
203 la %r2,16(%r2)
204 brct %r4,.Loop1_sqr
205
206.Lend_sqr:
207 lmg %r6,%r7,48(%r15)
208 br %r14
209.size bn_sqr_words,.-bn_sqr_words
210
211// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
212.globl bn_div_words
213.type bn_div_words,@function
214.align 4
215bn_div_words:
216 dlgr %r2,%r4
217 lgr %r2,%r3
218 br %r14
219.size bn_div_words,.-bn_div_words
220
221// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
222.globl bn_add_words
223.type bn_add_words,@function
224.align 4
225bn_add_words:
226 la %r1,0(%r2) // put rp aside
227 lghi %r2,0 // i=0
228 ltgfr %r5,%r5
229 bler %r14 // if (len<=0) return 0;
230
231 stg %r6,48(%r15)
232 lghi %r6,3
233 nr %r6,%r5 // len%4
234 sra %r5,2 // len/4, use sra because it sets condition code
235 jz .Loop1_add // carry is incidentally cleared if branch taken
236 algr %r2,%r2 // clear carry
237
238.Loop4_add:
239 lg %r0,0(%r2,%r3)
240 alcg %r0,0(%r2,%r4)
241 stg %r0,0(%r2,%r1)
242 lg %r0,8(%r2,%r3)
243 alcg %r0,8(%r2,%r4)
244 stg %r0,8(%r2,%r1)
245 lg %r0,16(%r2,%r3)
246 alcg %r0,16(%r2,%r4)
247 stg %r0,16(%r2,%r1)
248 lg %r0,24(%r2,%r3)
249 alcg %r0,24(%r2,%r4)
250 stg %r0,24(%r2,%r1)
251
252 la %r2,32(%r2) // i+=4
253 brct %r5,.Loop4_add
254
255 la %r6,1(%r6) // see if len%4 is zero ...
256 brct %r6,.Loop1_add // without touching condition code:-)
257
258.Lexit_add:
259 lghi %r2,0
260 alcgr %r2,%r2
261 lg %r6,48(%r15)
262 br %r14
263
264.Loop1_add:
265 lg %r0,0(%r2,%r3)
266 alcg %r0,0(%r2,%r4)
267 stg %r0,0(%r2,%r1)
268
269 la %r2,8(%r2) // i++
270 brct %r6,.Loop1_add
271
272 j .Lexit_add
273.size bn_add_words,.-bn_add_words
274
275// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
276.globl bn_sub_words
277.type bn_sub_words,@function
278.align 4
279bn_sub_words:
280 la %r1,0(%r2) // put rp aside
281 lghi %r2,0 // i=0
282 ltgfr %r5,%r5
283 bler %r14 // if (len<=0) return 0;
284
285 stg %r6,48(%r15)
286 lghi %r6,3
287 nr %r6,%r5 // len%4
288 sra %r5,2 // len/4, use sra because it sets condition code
289 jnz .Loop4_sub // borrow is incidentally cleared if branch taken
290 slgr %r2,%r2 // clear borrow
291
292.Loop1_sub:
293 lg %r0,0(%r2,%r3)
294 slbg %r0,0(%r2,%r4)
295 stg %r0,0(%r2,%r1)
296
297 la %r2,8(%r2) // i++
298 brct %r6,.Loop1_sub
299 j .Lexit_sub
300
301.Loop4_sub:
302 lg %r0,0(%r2,%r3)
303 slbg %r0,0(%r2,%r4)
304 stg %r0,0(%r2,%r1)
305 lg %r0,8(%r2,%r3)
306 slbg %r0,8(%r2,%r4)
307 stg %r0,8(%r2,%r1)
308 lg %r0,16(%r2,%r3)
309 slbg %r0,16(%r2,%r4)
310 stg %r0,16(%r2,%r1)
311 lg %r0,24(%r2,%r3)
312 slbg %r0,24(%r2,%r4)
313 stg %r0,24(%r2,%r1)
314
315 la %r2,32(%r2) // i+=4
316 brct %r5,.Loop4_sub
317
318 la %r6,1(%r6) // see if len%4 is zero ...
319 brct %r6,.Loop1_sub // without touching condition code:-)
320
321.Lexit_sub:
322 lghi %r2,0
323 slbgr %r2,%r2
324 lcgr %r2,%r2
325 lg %r6,48(%r15)
326 br %r14
327.size bn_sub_words,.-bn_sub_words
328
329#define c1 %r1
330#define c2 %r5
331#define c3 %r8
332
333#define mul_add_c(ai,bi,c1,c2,c3) \
334 lg %r7,ai*8(%r3); \
335 mlg %r6,bi*8(%r4); \
336 algr c1,%r7; \
337 alcgr c2,%r6; \
338 alcgr c3,zero
339
340// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
341.globl bn_mul_comba8
342.type bn_mul_comba8,@function
343.align 4
344bn_mul_comba8:
345 stmg %r6,%r8,48(%r15)
346
347 lghi c1,0
348 lghi c2,0
349 lghi c3,0
350 lghi zero,0
351
352 mul_add_c(0,0,c1,c2,c3);
353 stg c1,0*8(%r2)
354 lghi c1,0
355
356 mul_add_c(0,1,c2,c3,c1);
357 mul_add_c(1,0,c2,c3,c1);
358 stg c2,1*8(%r2)
359 lghi c2,0
360
361 mul_add_c(2,0,c3,c1,c2);
362 mul_add_c(1,1,c3,c1,c2);
363 mul_add_c(0,2,c3,c1,c2);
364 stg c3,2*8(%r2)
365 lghi c3,0
366
367 mul_add_c(0,3,c1,c2,c3);
368 mul_add_c(1,2,c1,c2,c3);
369 mul_add_c(2,1,c1,c2,c3);
370 mul_add_c(3,0,c1,c2,c3);
371 stg c1,3*8(%r2)
372 lghi c1,0
373
374 mul_add_c(4,0,c2,c3,c1);
375 mul_add_c(3,1,c2,c3,c1);
376 mul_add_c(2,2,c2,c3,c1);
377 mul_add_c(1,3,c2,c3,c1);
378 mul_add_c(0,4,c2,c3,c1);
379 stg c2,4*8(%r2)
380 lghi c2,0
381
382 mul_add_c(0,5,c3,c1,c2);
383 mul_add_c(1,4,c3,c1,c2);
384 mul_add_c(2,3,c3,c1,c2);
385 mul_add_c(3,2,c3,c1,c2);
386 mul_add_c(4,1,c3,c1,c2);
387 mul_add_c(5,0,c3,c1,c2);
388 stg c3,5*8(%r2)
389 lghi c3,0
390
391 mul_add_c(6,0,c1,c2,c3);
392 mul_add_c(5,1,c1,c2,c3);
393 mul_add_c(4,2,c1,c2,c3);
394 mul_add_c(3,3,c1,c2,c3);
395 mul_add_c(2,4,c1,c2,c3);
396 mul_add_c(1,5,c1,c2,c3);
397 mul_add_c(0,6,c1,c2,c3);
398 stg c1,6*8(%r2)
399 lghi c1,0
400
401 mul_add_c(0,7,c2,c3,c1);
402 mul_add_c(1,6,c2,c3,c1);
403 mul_add_c(2,5,c2,c3,c1);
404 mul_add_c(3,4,c2,c3,c1);
405 mul_add_c(4,3,c2,c3,c1);
406 mul_add_c(5,2,c2,c3,c1);
407 mul_add_c(6,1,c2,c3,c1);
408 mul_add_c(7,0,c2,c3,c1);
409 stg c2,7*8(%r2)
410 lghi c2,0
411
412 mul_add_c(7,1,c3,c1,c2);
413 mul_add_c(6,2,c3,c1,c2);
414 mul_add_c(5,3,c3,c1,c2);
415 mul_add_c(4,4,c3,c1,c2);
416 mul_add_c(3,5,c3,c1,c2);
417 mul_add_c(2,6,c3,c1,c2);
418 mul_add_c(1,7,c3,c1,c2);
419 stg c3,8*8(%r2)
420 lghi c3,0
421
422 mul_add_c(2,7,c1,c2,c3);
423 mul_add_c(3,6,c1,c2,c3);
424 mul_add_c(4,5,c1,c2,c3);
425 mul_add_c(5,4,c1,c2,c3);
426 mul_add_c(6,3,c1,c2,c3);
427 mul_add_c(7,2,c1,c2,c3);
428 stg c1,9*8(%r2)
429 lghi c1,0
430
431 mul_add_c(7,3,c2,c3,c1);
432 mul_add_c(6,4,c2,c3,c1);
433 mul_add_c(5,5,c2,c3,c1);
434 mul_add_c(4,6,c2,c3,c1);
435 mul_add_c(3,7,c2,c3,c1);
436 stg c2,10*8(%r2)
437 lghi c2,0
438
439 mul_add_c(4,7,c3,c1,c2);
440 mul_add_c(5,6,c3,c1,c2);
441 mul_add_c(6,5,c3,c1,c2);
442 mul_add_c(7,4,c3,c1,c2);
443 stg c3,11*8(%r2)
444 lghi c3,0
445
446 mul_add_c(7,5,c1,c2,c3);
447 mul_add_c(6,6,c1,c2,c3);
448 mul_add_c(5,7,c1,c2,c3);
449 stg c1,12*8(%r2)
450 lghi c1,0
451
452
453 mul_add_c(6,7,c2,c3,c1);
454 mul_add_c(7,6,c2,c3,c1);
455 stg c2,13*8(%r2)
456 lghi c2,0
457
458 mul_add_c(7,7,c3,c1,c2);
459 stg c3,14*8(%r2)
460 stg c1,15*8(%r2)
461
462 lmg %r6,%r8,48(%r15)
463 br %r14
464.size bn_mul_comba8,.-bn_mul_comba8
465
466// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
467.globl bn_mul_comba4
468.type bn_mul_comba4,@function
469.align 4
470bn_mul_comba4:
471 stmg %r6,%r8,48(%r15)
472
473 lghi c1,0
474 lghi c2,0
475 lghi c3,0
476 lghi zero,0
477
478 mul_add_c(0,0,c1,c2,c3);
479 stg c1,0*8(%r3)
480 lghi c1,0
481
482 mul_add_c(0,1,c2,c3,c1);
483 mul_add_c(1,0,c2,c3,c1);
484 stg c2,1*8(%r2)
485 lghi c2,0
486
487 mul_add_c(2,0,c3,c1,c2);
488 mul_add_c(1,1,c3,c1,c2);
489 mul_add_c(0,2,c3,c1,c2);
490 stg c3,2*8(%r2)
491 lghi c3,0
492
493 mul_add_c(0,3,c1,c2,c3);
494 mul_add_c(1,2,c1,c2,c3);
495 mul_add_c(2,1,c1,c2,c3);
496 mul_add_c(3,0,c1,c2,c3);
497 stg c1,3*8(%r2)
498 lghi c1,0
499
500 mul_add_c(3,1,c2,c3,c1);
501 mul_add_c(2,2,c2,c3,c1);
502 mul_add_c(1,3,c2,c3,c1);
503 stg c2,4*8(%r2)
504 lghi c2,0
505
506 mul_add_c(2,3,c3,c1,c2);
507 mul_add_c(3,2,c3,c1,c2);
508 stg c3,5*8(%r2)
509 lghi c3,0
510
511 mul_add_c(3,3,c1,c2,c3);
512 stg c1,6*8(%r2)
513 stg c2,7*8(%r2)
514
515 stmg %r6,%r8,48(%r15)
516 br %r14
517.size bn_mul_comba4,.-bn_mul_comba4
518
519#define sqr_add_c(ai,c1,c2,c3) \
520 lg %r7,ai*8(%r3); \
521 mlgr %r6,%r7; \
522 algr c1,%r7; \
523 alcgr c2,%r6; \
524 alcgr c3,zero
525
526#define sqr_add_c2(ai,aj,c1,c2,c3) \
527 lg %r7,ai*8(%r3); \
528 mlg %r6,aj*8(%r3); \
529 algr c1,%r7; \
530 alcgr c2,%r6; \
531 alcgr c3,zero; \
532 algr c1,%r7; \
533 alcgr c2,%r6; \
534 alcgr c3,zero
535
536// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
537.globl bn_sqr_comba8
538.type bn_sqr_comba8,@function
539.align 4
540bn_sqr_comba8:
541 stmg %r6,%r8,48(%r15)
542
543 lghi c1,0
544 lghi c2,0
545 lghi c3,0
546 lghi zero,0
547
548 sqr_add_c(0,c1,c2,c3);
549 stg c1,0*8(%r2)
550 lghi c1,0
551
552 sqr_add_c2(1,0,c2,c3,c1);
553 stg c2,1*8(%r2)
554 lghi c2,0
555
556 sqr_add_c(1,c3,c1,c2);
557 sqr_add_c2(2,0,c3,c1,c2);
558 stg c3,2*8(%r2)
559 lghi c3,0
560
561 sqr_add_c2(3,0,c1,c2,c3);
562 sqr_add_c2(2,1,c1,c2,c3);
563 stg c1,3*8(%r2)
564 lghi c1,0
565
566 sqr_add_c(2,c2,c3,c1);
567 sqr_add_c2(3,1,c2,c3,c1);
568 sqr_add_c2(4,0,c2,c3,c1);
569 stg c2,4*8(%r2)
570 lghi c2,0
571
572 sqr_add_c2(5,0,c3,c1,c2);
573 sqr_add_c2(4,1,c3,c1,c2);
574 sqr_add_c2(3,2,c3,c1,c2);
575 stg c3,5*8(%r2)
576 lghi c3,0
577
578 sqr_add_c(3,c1,c2,c3);
579 sqr_add_c2(4,2,c1,c2,c3);
580 sqr_add_c2(5,1,c1,c2,c3);
581 sqr_add_c2(6,0,c1,c2,c3);
582 stg c1,6*8(%r2)
583 lghi c1,0
584
585 sqr_add_c2(7,0,c2,c3,c1);
586 sqr_add_c2(6,1,c2,c3,c1);
587 sqr_add_c2(5,2,c2,c3,c1);
588 sqr_add_c2(4,3,c2,c3,c1);
589 stg c2,7*8(%r2)
590 lghi c2,0
591
592 sqr_add_c(4,c3,c1,c2);
593 sqr_add_c2(5,3,c3,c1,c2);
594 sqr_add_c2(6,2,c3,c1,c2);
595 sqr_add_c2(7,1,c3,c1,c2);
596 stg c3,8*8(%r2)
597 lghi c3,0
598
599 sqr_add_c2(7,2,c1,c2,c3);
600 sqr_add_c2(6,3,c1,c2,c3);
601 sqr_add_c2(5,4,c1,c2,c3);
602 stg c1,9*8(%r2)
603 lghi c1,0
604
605 sqr_add_c(5,c2,c3,c1);
606 sqr_add_c2(6,4,c2,c3,c1);
607 sqr_add_c2(7,3,c2,c3,c1);
608 stg c2,10*8(%r2)
609 lghi c2,0
610
611 sqr_add_c2(7,4,c3,c1,c2);
612 sqr_add_c2(6,5,c3,c1,c2);
613 stg c3,11*8(%r2)
614 lghi c3,0
615
616 sqr_add_c(6,c1,c2,c3);
617 sqr_add_c2(7,5,c1,c2,c3);
618 stg c1,12*8(%r2)
619 lghi c1,0
620
621 sqr_add_c2(7,6,c2,c3,c1);
622 stg c2,13*8(%r2)
623 lghi c2,0
624
625 sqr_add_c(7,c3,c1,c2);
626 stg c3,14*8(%r2)
627 stg c1,15*8(%r2)
628
629 lmg %r6,%r8,48(%r15)
630 br %r14
631.size bn_sqr_comba8,.-bn_sqr_comba8
632
633// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
634.globl bn_sqr_comba4
635.type bn_sqr_comba4,@function
636.align 4
637bn_sqr_comba4:
638 stmg %r6,%r8,48(%r15)
639
640 lghi c1,0
641 lghi c2,0
642 lghi c3,0
643 lghi zero,0
644
645 sqr_add_c(0,c1,c2,c3);
646 stg c1,0*8(%r2)
647 lghi c1,0
648
649 sqr_add_c2(1,0,c2,c3,c1);
650 stg c2,1*8(%r2)
651 lghi c2,0
652
653 sqr_add_c(1,c3,c1,c2);
654 sqr_add_c2(2,0,c3,c1,c2);
655 stg c3,2*8(%r2)
656 lghi c3,0
657
658 sqr_add_c2(3,0,c1,c2,c3);
659 sqr_add_c2(2,1,c1,c2,c3);
660 stg c1,3*8(%r2)
661 lghi c1,0
662
663 sqr_add_c(2,c2,c3,c1);
664 sqr_add_c2(3,1,c2,c3,c1);
665 stg c2,4*8(%r2)
666 lghi c2,0
667
668 sqr_add_c2(3,2,c3,c1,c2);
669 stg c3,5*8(%r2)
670 lghi c3,0
671
672 sqr_add_c(3,c1,c2,c3);
673 stg c1,6*8(%r2)
674 stg c2,7*8(%r2)
675
676 lmg %r6,%r8,48(%r15)
677 br %r14
678.size bn_sqr_comba4,.-bn_sqr_comba4
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
deleted file mode 100644
index 88c5dc480a..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8.S
+++ /dev/null
@@ -1,1458 +0,0 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
deleted file mode 100644
index 8c56e2e7e7..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8plus.S
+++ /dev/null
@@ -1,1547 +0,0 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147/*
148 * GNU assembler can't stand stuw:-(
149 */
150#define stuw st
151
152.section ".text",#alloc,#execinstr
153.file "bn_asm.sparc.v8plus.S"
154
155.align 32
156
157.global bn_mul_add_words
158/*
159 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
160 * BN_ULONG *rp,*ap;
161 * int num;
162 * BN_ULONG w;
163 */
164bn_mul_add_words:
165 sra %o2,%g0,%o2 ! signx %o2
166 brgz,a %o2,.L_bn_mul_add_words_proceed
167 lduw [%o1],%g2
168 retl
169 clr %o0
170 nop
171 nop
172 nop
173
174.L_bn_mul_add_words_proceed:
175 srl %o3,%g0,%o3 ! clruw %o3
176 andcc %o2,-4,%g0
177 bz,pn %icc,.L_bn_mul_add_words_tail
178 clr %o5
179
180.L_bn_mul_add_words_loop: ! wow! 32 aligned!
181 lduw [%o0],%g1
182 lduw [%o1+4],%g3
183 mulx %o3,%g2,%g2
184 add %g1,%o5,%o4
185 nop
186 add %o4,%g2,%o4
187 stuw %o4,[%o0]
188 srlx %o4,32,%o5
189
190 lduw [%o0+4],%g1
191 lduw [%o1+8],%g2
192 mulx %o3,%g3,%g3
193 add %g1,%o5,%o4
194 dec 4,%o2
195 add %o4,%g3,%o4
196 stuw %o4,[%o0+4]
197 srlx %o4,32,%o5
198
199 lduw [%o0+8],%g1
200 lduw [%o1+12],%g3
201 mulx %o3,%g2,%g2
202 add %g1,%o5,%o4
203 inc 16,%o1
204 add %o4,%g2,%o4
205 stuw %o4,[%o0+8]
206 srlx %o4,32,%o5
207
208 lduw [%o0+12],%g1
209 mulx %o3,%g3,%g3
210 add %g1,%o5,%o4
211 inc 16,%o0
212 add %o4,%g3,%o4
213 andcc %o2,-4,%g0
214 stuw %o4,[%o0-4]
215 srlx %o4,32,%o5
216 bnz,a,pt %icc,.L_bn_mul_add_words_loop
217 lduw [%o1],%g2
218
219 brnz,a,pn %o2,.L_bn_mul_add_words_tail
220 lduw [%o1],%g2
221.L_bn_mul_add_words_return:
222 retl
223 mov %o5,%o0
224
225.L_bn_mul_add_words_tail:
226 lduw [%o0],%g1
227 mulx %o3,%g2,%g2
228 add %g1,%o5,%o4
229 dec %o2
230 add %o4,%g2,%o4
231 srlx %o4,32,%o5
232 brz,pt %o2,.L_bn_mul_add_words_return
233 stuw %o4,[%o0]
234
235 lduw [%o1+4],%g2
236 lduw [%o0+4],%g1
237 mulx %o3,%g2,%g2
238 add %g1,%o5,%o4
239 dec %o2
240 add %o4,%g2,%o4
241 srlx %o4,32,%o5
242 brz,pt %o2,.L_bn_mul_add_words_return
243 stuw %o4,[%o0+4]
244
245 lduw [%o1+8],%g2
246 lduw [%o0+8],%g1
247 mulx %o3,%g2,%g2
248 add %g1,%o5,%o4
249 add %o4,%g2,%o4
250 stuw %o4,[%o0+8]
251 retl
252 srlx %o4,32,%o0
253
254.type bn_mul_add_words,#function
255.size bn_mul_add_words,(.-bn_mul_add_words)
256
257.align 32
258
259.global bn_mul_words
260/*
261 * BN_ULONG bn_mul_words(rp,ap,num,w)
262 * BN_ULONG *rp,*ap;
263 * int num;
264 * BN_ULONG w;
265 */
266bn_mul_words:
267 sra %o2,%g0,%o2 ! signx %o2
268 brgz,a %o2,.L_bn_mul_words_proceeed
269 lduw [%o1],%g2
270 retl
271 clr %o0
272 nop
273 nop
274 nop
275
276.L_bn_mul_words_proceeed:
277 srl %o3,%g0,%o3 ! clruw %o3
278 andcc %o2,-4,%g0
279 bz,pn %icc,.L_bn_mul_words_tail
280 clr %o5
281
282.L_bn_mul_words_loop: ! wow! 32 aligned!
283 lduw [%o1+4],%g3
284 mulx %o3,%g2,%g2
285 add %g2,%o5,%o4
286 nop
287 stuw %o4,[%o0]
288 srlx %o4,32,%o5
289
290 lduw [%o1+8],%g2
291 mulx %o3,%g3,%g3
292 add %g3,%o5,%o4
293 dec 4,%o2
294 stuw %o4,[%o0+4]
295 srlx %o4,32,%o5
296
297 lduw [%o1+12],%g3
298 mulx %o3,%g2,%g2
299 add %g2,%o5,%o4
300 inc 16,%o1
301 stuw %o4,[%o0+8]
302 srlx %o4,32,%o5
303
304 mulx %o3,%g3,%g3
305 add %g3,%o5,%o4
306 inc 16,%o0
307 stuw %o4,[%o0-4]
308 srlx %o4,32,%o5
309 andcc %o2,-4,%g0
310 bnz,a,pt %icc,.L_bn_mul_words_loop
311 lduw [%o1],%g2
312 nop
313 nop
314
315 brnz,a,pn %o2,.L_bn_mul_words_tail
316 lduw [%o1],%g2
317.L_bn_mul_words_return:
318 retl
319 mov %o5,%o0
320
321.L_bn_mul_words_tail:
322 mulx %o3,%g2,%g2
323 add %g2,%o5,%o4
324 dec %o2
325 srlx %o4,32,%o5
326 brz,pt %o2,.L_bn_mul_words_return
327 stuw %o4,[%o0]
328
329 lduw [%o1+4],%g2
330 mulx %o3,%g2,%g2
331 add %g2,%o5,%o4
332 dec %o2
333 srlx %o4,32,%o5
334 brz,pt %o2,.L_bn_mul_words_return
335 stuw %o4,[%o0+4]
336
337 lduw [%o1+8],%g2
338 mulx %o3,%g2,%g2
339 add %g2,%o5,%o4
340 stuw %o4,[%o0+8]
341 retl
342 srlx %o4,32,%o0
343
344.type bn_mul_words,#function
345.size bn_mul_words,(.-bn_mul_words)
346
347.align 32
348.global bn_sqr_words
349/*
350 * void bn_sqr_words(r,a,n)
351 * BN_ULONG *r,*a;
352 * int n;
353 */
354bn_sqr_words:
355 sra %o2,%g0,%o2 ! signx %o2
356 brgz,a %o2,.L_bn_sqr_words_proceeed
357 lduw [%o1],%g2
358 retl
359 clr %o0
360 nop
361 nop
362 nop
363
364.L_bn_sqr_words_proceeed:
365 andcc %o2,-4,%g0
366 nop
367 bz,pn %icc,.L_bn_sqr_words_tail
368 nop
369
370.L_bn_sqr_words_loop: ! wow! 32 aligned!
371 lduw [%o1+4],%g3
372 mulx %g2,%g2,%o4
373 stuw %o4,[%o0]
374 srlx %o4,32,%o5
375 stuw %o5,[%o0+4]
376 nop
377
378 lduw [%o1+8],%g2
379 mulx %g3,%g3,%o4
380 dec 4,%o2
381 stuw %o4,[%o0+8]
382 srlx %o4,32,%o5
383 stuw %o5,[%o0+12]
384
385 lduw [%o1+12],%g3
386 mulx %g2,%g2,%o4
387 srlx %o4,32,%o5
388 stuw %o4,[%o0+16]
389 inc 16,%o1
390 stuw %o5,[%o0+20]
391
392 mulx %g3,%g3,%o4
393 inc 32,%o0
394 stuw %o4,[%o0-8]
395 srlx %o4,32,%o5
396 andcc %o2,-4,%g2
397 stuw %o5,[%o0-4]
398 bnz,a,pt %icc,.L_bn_sqr_words_loop
399 lduw [%o1],%g2
400 nop
401
402 brnz,a,pn %o2,.L_bn_sqr_words_tail
403 lduw [%o1],%g2
404.L_bn_sqr_words_return:
405 retl
406 clr %o0
407
408.L_bn_sqr_words_tail:
409 mulx %g2,%g2,%o4
410 dec %o2
411 stuw %o4,[%o0]
412 srlx %o4,32,%o5
413 brz,pt %o2,.L_bn_sqr_words_return
414 stuw %o5,[%o0+4]
415
416 lduw [%o1+4],%g2
417 mulx %g2,%g2,%o4
418 dec %o2
419 stuw %o4,[%o0+8]
420 srlx %o4,32,%o5
421 brz,pt %o2,.L_bn_sqr_words_return
422 stuw %o5,[%o0+12]
423
424 lduw [%o1+8],%g2
425 mulx %g2,%g2,%o4
426 srlx %o4,32,%o5
427 stuw %o4,[%o0+16]
428 stuw %o5,[%o0+20]
429 retl
430 clr %o0
431
432.type bn_sqr_words,#function
433.size bn_sqr_words,(.-bn_sqr_words)
434
435.align 32
436.global bn_div_words
437/*
438 * BN_ULONG bn_div_words(h,l,d)
439 * BN_ULONG h,l,d;
440 */
441bn_div_words:
442 sllx %o0,32,%o0
443 or %o0,%o1,%o0
444 udivx %o0,%o2,%o0
445 retl
446 srl %o0,%g0,%o0 ! clruw %o0
447
448.type bn_div_words,#function
449.size bn_div_words,(.-bn_div_words)
450
451.align 32
452
453.global bn_add_words
454/*
455 * BN_ULONG bn_add_words(rp,ap,bp,n)
456 * BN_ULONG *rp,*ap,*bp;
457 * int n;
458 */
459bn_add_words:
460 sra %o3,%g0,%o3 ! signx %o3
461 brgz,a %o3,.L_bn_add_words_proceed
462 lduw [%o1],%o4
463 retl
464 clr %o0
465
466.L_bn_add_words_proceed:
467 andcc %o3,-4,%g0
468 bz,pn %icc,.L_bn_add_words_tail
469 addcc %g0,0,%g0 ! clear carry flag
470
471.L_bn_add_words_loop: ! wow! 32 aligned!
472 dec 4,%o3
473 lduw [%o2],%o5
474 lduw [%o1+4],%g1
475 lduw [%o2+4],%g2
476 lduw [%o1+8],%g3
477 lduw [%o2+8],%g4
478 addccc %o5,%o4,%o5
479 stuw %o5,[%o0]
480
481 lduw [%o1+12],%o4
482 lduw [%o2+12],%o5
483 inc 16,%o1
484 addccc %g1,%g2,%g1
485 stuw %g1,[%o0+4]
486
487 inc 16,%o2
488 addccc %g3,%g4,%g3
489 stuw %g3,[%o0+8]
490
491 inc 16,%o0
492 addccc %o5,%o4,%o5
493 stuw %o5,[%o0-4]
494 and %o3,-4,%g1
495 brnz,a,pt %g1,.L_bn_add_words_loop
496 lduw [%o1],%o4
497
498 brnz,a,pn %o3,.L_bn_add_words_tail
499 lduw [%o1],%o4
500.L_bn_add_words_return:
501 clr %o0
502 retl
503 movcs %icc,1,%o0
504 nop
505
506.L_bn_add_words_tail:
507 lduw [%o2],%o5
508 dec %o3
509 addccc %o5,%o4,%o5
510 brz,pt %o3,.L_bn_add_words_return
511 stuw %o5,[%o0]
512
513 lduw [%o1+4],%o4
514 lduw [%o2+4],%o5
515 dec %o3
516 addccc %o5,%o4,%o5
517 brz,pt %o3,.L_bn_add_words_return
518 stuw %o5,[%o0+4]
519
520 lduw [%o1+8],%o4
521 lduw [%o2+8],%o5
522 addccc %o5,%o4,%o5
523 stuw %o5,[%o0+8]
524 clr %o0
525 retl
526 movcs %icc,1,%o0
527
528.type bn_add_words,#function
529.size bn_add_words,(.-bn_add_words)
530
531.global bn_sub_words
532/*
533 * BN_ULONG bn_sub_words(rp,ap,bp,n)
534 * BN_ULONG *rp,*ap,*bp;
535 * int n;
536 */
537bn_sub_words:
538 sra %o3,%g0,%o3 ! signx %o3
539 brgz,a %o3,.L_bn_sub_words_proceed
540 lduw [%o1],%o4
541 retl
542 clr %o0
543
544.L_bn_sub_words_proceed:
545 andcc %o3,-4,%g0
546 bz,pn %icc,.L_bn_sub_words_tail
547 addcc %g0,0,%g0 ! clear carry flag
548
549.L_bn_sub_words_loop: ! wow! 32 aligned!
550 dec 4,%o3
551 lduw [%o2],%o5
552 lduw [%o1+4],%g1
553 lduw [%o2+4],%g2
554 lduw [%o1+8],%g3
555 lduw [%o2+8],%g4
556 subccc %o4,%o5,%o5
557 stuw %o5,[%o0]
558
559 lduw [%o1+12],%o4
560 lduw [%o2+12],%o5
561 inc 16,%o1
562 subccc %g1,%g2,%g2
563 stuw %g2,[%o0+4]
564
565 inc 16,%o2
566 subccc %g3,%g4,%g4
567 stuw %g4,[%o0+8]
568
569 inc 16,%o0
570 subccc %o4,%o5,%o5
571 stuw %o5,[%o0-4]
572 and %o3,-4,%g1
573 brnz,a,pt %g1,.L_bn_sub_words_loop
574 lduw [%o1],%o4
575
576 brnz,a,pn %o3,.L_bn_sub_words_tail
577 lduw [%o1],%o4
578.L_bn_sub_words_return:
579 clr %o0
580 retl
581 movcs %icc,1,%o0
582 nop
583
584.L_bn_sub_words_tail: ! wow! 32 aligned!
585 lduw [%o2],%o5
586 dec %o3
587 subccc %o4,%o5,%o5
588 brz,pt %o3,.L_bn_sub_words_return
589 stuw %o5,[%o0]
590
591 lduw [%o1+4],%o4
592 lduw [%o2+4],%o5
593 dec %o3
594 subccc %o4,%o5,%o5
595 brz,pt %o3,.L_bn_sub_words_return
596 stuw %o5,[%o0+4]
597
598 lduw [%o1+8],%o4
599 lduw [%o2+8],%o5
600 subccc %o4,%o5,%o5
601 stuw %o5,[%o0+8]
602 clr %o0
603 retl
604 movcs %icc,1,%o0
605
606.type bn_sub_words,#function
607.size bn_sub_words,(.-bn_sub_words)
608
609/*
610 * Code below depends on the fact that upper parts of the %l0-%l7
611 * and %i0-%i7 are zeroed by kernel after context switch. In
612 * previous versions this comment stated that "the trouble is that
613 * it's not feasible to implement the mumbo-jumbo in less V9
614 * instructions:-(" which apparently isn't true thanks to
615 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
616 * results not from the shorter code, but from elimination of
617 * multicycle none-pairable 'rd %y,%rd' instructions.
618 *
619 * Andy.
620 */
621
622#define FRAME_SIZE -96
623
624/*
625 * Here is register usage map for *all* routines below.
626 */
627#define t_1 %o0
628#define t_2 %o1
629#define c_12 %o2
630#define c_3 %o3
631
632#define ap(I) [%i1+4*I]
633#define bp(I) [%i2+4*I]
634#define rp(I) [%i0+4*I]
635
636#define a_0 %l0
637#define a_1 %l1
638#define a_2 %l2
639#define a_3 %l3
640#define a_4 %l4
641#define a_5 %l5
642#define a_6 %l6
643#define a_7 %l7
644
645#define b_0 %i3
646#define b_1 %i4
647#define b_2 %i5
648#define b_3 %o4
649#define b_4 %o5
650#define b_5 %o7
651#define b_6 %g1
652#define b_7 %g4
653
654.align 32
655.global bn_mul_comba8
656/*
657 * void bn_mul_comba8(r,a,b)
658 * BN_ULONG *r,*a,*b;
659 */
660bn_mul_comba8:
661 save %sp,FRAME_SIZE,%sp
662 mov 1,t_2
663 lduw ap(0),a_0
664 sllx t_2,32,t_2
665 lduw bp(0),b_0 !=
666 lduw bp(1),b_1
667 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
668 srlx t_1,32,c_12
669 stuw t_1,rp(0) !=!r[0]=c1;
670
671 lduw ap(1),a_1
672 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
673 addcc c_12,t_1,c_12
674 clr c_3 !=
675 bcs,a %xcc,.+8
676 add c_3,t_2,c_3
677 lduw ap(2),a_2
678 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
679 addcc c_12,t_1,t_1
680 bcs,a %xcc,.+8
681 add c_3,t_2,c_3
682 srlx t_1,32,c_12 !=
683 stuw t_1,rp(1) !r[1]=c2;
684 or c_12,c_3,c_12
685
686 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
687 addcc c_12,t_1,c_12 !=
688 clr c_3
689 bcs,a %xcc,.+8
690 add c_3,t_2,c_3
691 lduw bp(2),b_2 !=
692 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
693 addcc c_12,t_1,c_12
694 bcs,a %xcc,.+8
695 add c_3,t_2,c_3 !=
696 lduw bp(3),b_3
697 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
698 addcc c_12,t_1,t_1
699 bcs,a %xcc,.+8 !=
700 add c_3,t_2,c_3
701 srlx t_1,32,c_12
702 stuw t_1,rp(2) !r[2]=c3;
703 or c_12,c_3,c_12 !=
704
705 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
706 addcc c_12,t_1,c_12
707 clr c_3
708 bcs,a %xcc,.+8 !=
709 add c_3,t_2,c_3
710 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
711 addcc c_12,t_1,c_12
712 bcs,a %xcc,.+8 !=
713 add c_3,t_2,c_3
714 lduw ap(3),a_3
715 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
716 addcc c_12,t_1,c_12 !=
717 bcs,a %xcc,.+8
718 add c_3,t_2,c_3
719 lduw ap(4),a_4
720 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
721 addcc c_12,t_1,t_1
722 bcs,a %xcc,.+8
723 add c_3,t_2,c_3
724 srlx t_1,32,c_12 !=
725 stuw t_1,rp(3) !r[3]=c1;
726 or c_12,c_3,c_12
727
728 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
729 addcc c_12,t_1,c_12 !=
730 clr c_3
731 bcs,a %xcc,.+8
732 add c_3,t_2,c_3
733 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
734 addcc c_12,t_1,c_12
735 bcs,a %xcc,.+8
736 add c_3,t_2,c_3
737 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
738 addcc c_12,t_1,c_12
739 bcs,a %xcc,.+8
740 add c_3,t_2,c_3
741 lduw bp(4),b_4 !=
742 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
743 addcc c_12,t_1,c_12
744 bcs,a %xcc,.+8
745 add c_3,t_2,c_3 !=
746 lduw bp(5),b_5
747 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
748 addcc c_12,t_1,t_1
749 bcs,a %xcc,.+8 !=
750 add c_3,t_2,c_3
751 srlx t_1,32,c_12
752 stuw t_1,rp(4) !r[4]=c2;
753 or c_12,c_3,c_12 !=
754
755 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
756 addcc c_12,t_1,c_12
757 clr c_3
758 bcs,a %xcc,.+8 !=
759 add c_3,t_2,c_3
760 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
761 addcc c_12,t_1,c_12
762 bcs,a %xcc,.+8 !=
763 add c_3,t_2,c_3
764 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
765 addcc c_12,t_1,c_12
766 bcs,a %xcc,.+8 !=
767 add c_3,t_2,c_3
768 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
769 addcc c_12,t_1,c_12
770 bcs,a %xcc,.+8 !=
771 add c_3,t_2,c_3
772 lduw ap(5),a_5
773 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
774 addcc c_12,t_1,c_12 !=
775 bcs,a %xcc,.+8
776 add c_3,t_2,c_3
777 lduw ap(6),a_6
778 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
779 addcc c_12,t_1,t_1
780 bcs,a %xcc,.+8
781 add c_3,t_2,c_3
782 srlx t_1,32,c_12 !=
783 stuw t_1,rp(5) !r[5]=c3;
784 or c_12,c_3,c_12
785
786 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
787 addcc c_12,t_1,c_12 !=
788 clr c_3
789 bcs,a %xcc,.+8
790 add c_3,t_2,c_3
791 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
792 addcc c_12,t_1,c_12
793 bcs,a %xcc,.+8
794 add c_3,t_2,c_3
795 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
796 addcc c_12,t_1,c_12
797 bcs,a %xcc,.+8
798 add c_3,t_2,c_3
799 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
800 addcc c_12,t_1,c_12
801 bcs,a %xcc,.+8
802 add c_3,t_2,c_3
803 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
804 addcc c_12,t_1,c_12
805 bcs,a %xcc,.+8
806 add c_3,t_2,c_3
807 lduw bp(6),b_6 !=
808 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
809 addcc c_12,t_1,c_12
810 bcs,a %xcc,.+8
811 add c_3,t_2,c_3 !=
812 lduw bp(7),b_7
813 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
814 addcc c_12,t_1,t_1
815 bcs,a %xcc,.+8 !=
816 add c_3,t_2,c_3
817 srlx t_1,32,c_12
818 stuw t_1,rp(6) !r[6]=c1;
819 or c_12,c_3,c_12 !=
820
821 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
822 addcc c_12,t_1,c_12
823 clr c_3
824 bcs,a %xcc,.+8 !=
825 add c_3,t_2,c_3
826 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
827 addcc c_12,t_1,c_12
828 bcs,a %xcc,.+8 !=
829 add c_3,t_2,c_3
830 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
831 addcc c_12,t_1,c_12
832 bcs,a %xcc,.+8 !=
833 add c_3,t_2,c_3
834 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
835 addcc c_12,t_1,c_12
836 bcs,a %xcc,.+8 !=
837 add c_3,t_2,c_3
838 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
839 addcc c_12,t_1,c_12
840 bcs,a %xcc,.+8 !=
841 add c_3,t_2,c_3
842 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
843 addcc c_12,t_1,c_12
844 bcs,a %xcc,.+8 !=
845 add c_3,t_2,c_3
846 lduw ap(7),a_7
847 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
848 addcc c_12,t_1,c_12
849 bcs,a %xcc,.+8
850 add c_3,t_2,c_3
851 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
852 addcc c_12,t_1,t_1
853 bcs,a %xcc,.+8
854 add c_3,t_2,c_3
855 srlx t_1,32,c_12 !=
856 stuw t_1,rp(7) !r[7]=c2;
857 or c_12,c_3,c_12
858
859 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
860 addcc c_12,t_1,c_12
861 clr c_3
862 bcs,a %xcc,.+8
863 add c_3,t_2,c_3 !=
864 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
865 addcc c_12,t_1,c_12
866 bcs,a %xcc,.+8
867 add c_3,t_2,c_3 !=
868 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
869 addcc c_12,t_1,c_12
870 bcs,a %xcc,.+8
871 add c_3,t_2,c_3 !=
872 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
873 addcc c_12,t_1,c_12
874 bcs,a %xcc,.+8
875 add c_3,t_2,c_3 !=
876 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
877 addcc c_12,t_1,c_12
878 bcs,a %xcc,.+8
879 add c_3,t_2,c_3 !=
880 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
881 addcc c_12,t_1,c_12
882 bcs,a %xcc,.+8
883 add c_3,t_2,c_3 !=
884 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
885 addcc c_12,t_1,t_1
886 bcs,a %xcc,.+8
887 add c_3,t_2,c_3 !=
888 srlx t_1,32,c_12
889 stuw t_1,rp(8) !r[8]=c3;
890 or c_12,c_3,c_12
891
892 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
893 addcc c_12,t_1,c_12
894 clr c_3
895 bcs,a %xcc,.+8
896 add c_3,t_2,c_3 !=
897 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
898 addcc c_12,t_1,c_12
899 bcs,a %xcc,.+8 !=
900 add c_3,t_2,c_3
901 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
902 addcc c_12,t_1,c_12
903 bcs,a %xcc,.+8 !=
904 add c_3,t_2,c_3
905 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
906 addcc c_12,t_1,c_12
907 bcs,a %xcc,.+8 !=
908 add c_3,t_2,c_3
909 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
910 addcc c_12,t_1,c_12
911 bcs,a %xcc,.+8 !=
912 add c_3,t_2,c_3
913 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
914 addcc c_12,t_1,t_1
915 bcs,a %xcc,.+8 !=
916 add c_3,t_2,c_3
917 srlx t_1,32,c_12
918 stuw t_1,rp(9) !r[9]=c1;
919 or c_12,c_3,c_12 !=
920
921 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
922 addcc c_12,t_1,c_12
923 clr c_3
924 bcs,a %xcc,.+8 !=
925 add c_3,t_2,c_3
926 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
927 addcc c_12,t_1,c_12
928 bcs,a %xcc,.+8 !=
929 add c_3,t_2,c_3
930 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
931 addcc c_12,t_1,c_12
932 bcs,a %xcc,.+8 !=
933 add c_3,t_2,c_3
934 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
935 addcc c_12,t_1,c_12
936 bcs,a %xcc,.+8 !=
937 add c_3,t_2,c_3
938 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
939 addcc c_12,t_1,t_1
940 bcs,a %xcc,.+8 !=
941 add c_3,t_2,c_3
942 srlx t_1,32,c_12
943 stuw t_1,rp(10) !r[10]=c2;
944 or c_12,c_3,c_12 !=
945
946 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
947 addcc c_12,t_1,c_12
948 clr c_3
949 bcs,a %xcc,.+8 !=
950 add c_3,t_2,c_3
951 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
952 addcc c_12,t_1,c_12
953 bcs,a %xcc,.+8 !=
954 add c_3,t_2,c_3
955 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
956 addcc c_12,t_1,c_12
957 bcs,a %xcc,.+8 !=
958 add c_3,t_2,c_3
959 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
960 addcc c_12,t_1,t_1
961 bcs,a %xcc,.+8 !=
962 add c_3,t_2,c_3
963 srlx t_1,32,c_12
964 stuw t_1,rp(11) !r[11]=c3;
965 or c_12,c_3,c_12 !=
966
967 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
968 addcc c_12,t_1,c_12
969 clr c_3
970 bcs,a %xcc,.+8 !=
971 add c_3,t_2,c_3
972 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
973 addcc c_12,t_1,c_12
974 bcs,a %xcc,.+8 !=
975 add c_3,t_2,c_3
976 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
977 addcc c_12,t_1,t_1
978 bcs,a %xcc,.+8 !=
979 add c_3,t_2,c_3
980 srlx t_1,32,c_12
981 stuw t_1,rp(12) !r[12]=c1;
982 or c_12,c_3,c_12 !=
983
984 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
985 addcc c_12,t_1,c_12
986 clr c_3
987 bcs,a %xcc,.+8 !=
988 add c_3,t_2,c_3
989 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
990 addcc c_12,t_1,t_1
991 bcs,a %xcc,.+8 !=
992 add c_3,t_2,c_3
993 srlx t_1,32,c_12
994 st t_1,rp(13) !r[13]=c2;
995 or c_12,c_3,c_12 !=
996
997 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
998 addcc c_12,t_1,t_1
999 srlx t_1,32,c_12 !=
1000 stuw t_1,rp(14) !r[14]=c3;
1001 stuw c_12,rp(15) !r[15]=c1;
1002
1003 ret
1004 restore %g0,%g0,%o0 !=
1005
1006.type bn_mul_comba8,#function
1007.size bn_mul_comba8,(.-bn_mul_comba8)
1008
1009.align 32
1010
1011.global bn_mul_comba4
1012/*
1013 * void bn_mul_comba4(r,a,b)
1014 * BN_ULONG *r,*a,*b;
1015 */
1016bn_mul_comba4:
1017 save %sp,FRAME_SIZE,%sp
1018 lduw ap(0),a_0
1019 mov 1,t_2
1020 lduw bp(0),b_0
1021 sllx t_2,32,t_2 !=
1022 lduw bp(1),b_1
1023 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1024 srlx t_1,32,c_12
1025 stuw t_1,rp(0) !=!r[0]=c1;
1026
1027 lduw ap(1),a_1
1028 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1029 addcc c_12,t_1,c_12
1030 clr c_3 !=
1031 bcs,a %xcc,.+8
1032 add c_3,t_2,c_3
1033 lduw ap(2),a_2
1034 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1035 addcc c_12,t_1,t_1
1036 bcs,a %xcc,.+8
1037 add c_3,t_2,c_3
1038 srlx t_1,32,c_12 !=
1039 stuw t_1,rp(1) !r[1]=c2;
1040 or c_12,c_3,c_12
1041
1042 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1043 addcc c_12,t_1,c_12 !=
1044 clr c_3
1045 bcs,a %xcc,.+8
1046 add c_3,t_2,c_3
1047 lduw bp(2),b_2 !=
1048 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1049 addcc c_12,t_1,c_12
1050 bcs,a %xcc,.+8
1051 add c_3,t_2,c_3 !=
1052 lduw bp(3),b_3
1053 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1054 addcc c_12,t_1,t_1
1055 bcs,a %xcc,.+8 !=
1056 add c_3,t_2,c_3
1057 srlx t_1,32,c_12
1058 stuw t_1,rp(2) !r[2]=c3;
1059 or c_12,c_3,c_12 !=
1060
1061 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1062 addcc c_12,t_1,c_12
1063 clr c_3
1064 bcs,a %xcc,.+8 !=
1065 add c_3,t_2,c_3
1066 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1067 addcc c_12,t_1,c_12
1068 bcs,a %xcc,.+8 !=
1069 add c_3,t_2,c_3
1070 lduw ap(3),a_3
1071 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1072 addcc c_12,t_1,c_12 !=
1073 bcs,a %xcc,.+8
1074 add c_3,t_2,c_3
1075 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1076 addcc c_12,t_1,t_1 !=
1077 bcs,a %xcc,.+8
1078 add c_3,t_2,c_3
1079 srlx t_1,32,c_12
1080 stuw t_1,rp(3) !=!r[3]=c1;
1081 or c_12,c_3,c_12
1082
1083 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1084 addcc c_12,t_1,c_12
1085 clr c_3 !=
1086 bcs,a %xcc,.+8
1087 add c_3,t_2,c_3
1088 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1089 addcc c_12,t_1,c_12 !=
1090 bcs,a %xcc,.+8
1091 add c_3,t_2,c_3
1092 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1093 addcc c_12,t_1,t_1 !=
1094 bcs,a %xcc,.+8
1095 add c_3,t_2,c_3
1096 srlx t_1,32,c_12
1097 stuw t_1,rp(4) !=!r[4]=c2;
1098 or c_12,c_3,c_12
1099
1100 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1101 addcc c_12,t_1,c_12
1102 clr c_3 !=
1103 bcs,a %xcc,.+8
1104 add c_3,t_2,c_3
1105 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1106 addcc c_12,t_1,t_1 !=
1107 bcs,a %xcc,.+8
1108 add c_3,t_2,c_3
1109 srlx t_1,32,c_12
1110 stuw t_1,rp(5) !=!r[5]=c3;
1111 or c_12,c_3,c_12
1112
1113 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1114 addcc c_12,t_1,t_1
1115 srlx t_1,32,c_12 !=
1116 stuw t_1,rp(6) !r[6]=c1;
1117 stuw c_12,rp(7) !r[7]=c2;
1118
1119 ret
1120 restore %g0,%g0,%o0
1121
1122.type bn_mul_comba4,#function
1123.size bn_mul_comba4,(.-bn_mul_comba4)
1124
1125.align 32
1126
1127.global bn_sqr_comba8
1128bn_sqr_comba8:
1129 save %sp,FRAME_SIZE,%sp
1130 mov 1,t_2
1131 lduw ap(0),a_0
1132 sllx t_2,32,t_2
1133 lduw ap(1),a_1
1134 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1135 srlx t_1,32,c_12
1136 stuw t_1,rp(0) !r[0]=c1;
1137
1138 lduw ap(2),a_2
1139 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1140 addcc c_12,t_1,c_12
1141 clr c_3
1142 bcs,a %xcc,.+8
1143 add c_3,t_2,c_3
1144 addcc c_12,t_1,t_1
1145 bcs,a %xcc,.+8
1146 add c_3,t_2,c_3
1147 srlx t_1,32,c_12
1148 stuw t_1,rp(1) !r[1]=c2;
1149 or c_12,c_3,c_12
1150
1151 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1152 addcc c_12,t_1,c_12
1153 clr c_3
1154 bcs,a %xcc,.+8
1155 add c_3,t_2,c_3
1156 addcc c_12,t_1,c_12
1157 bcs,a %xcc,.+8
1158 add c_3,t_2,c_3
1159 lduw ap(3),a_3
1160 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1161 addcc c_12,t_1,t_1
1162 bcs,a %xcc,.+8
1163 add c_3,t_2,c_3
1164 srlx t_1,32,c_12
1165 stuw t_1,rp(2) !r[2]=c3;
1166 or c_12,c_3,c_12
1167
1168 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1169 addcc c_12,t_1,c_12
1170 clr c_3
1171 bcs,a %xcc,.+8
1172 add c_3,t_2,c_3
1173 addcc c_12,t_1,c_12
1174 bcs,a %xcc,.+8
1175 add c_3,t_2,c_3
1176 lduw ap(4),a_4
1177 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1178 addcc c_12,t_1,c_12
1179 bcs,a %xcc,.+8
1180 add c_3,t_2,c_3
1181 addcc c_12,t_1,t_1
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 srlx t_1,32,c_12
1185 st t_1,rp(3) !r[3]=c1;
1186 or c_12,c_3,c_12
1187
1188 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1189 addcc c_12,t_1,c_12
1190 clr c_3
1191 bcs,a %xcc,.+8
1192 add c_3,t_2,c_3
1193 addcc c_12,t_1,c_12
1194 bcs,a %xcc,.+8
1195 add c_3,t_2,c_3
1196 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1197 addcc c_12,t_1,c_12
1198 bcs,a %xcc,.+8
1199 add c_3,t_2,c_3
1200 addcc c_12,t_1,c_12
1201 bcs,a %xcc,.+8
1202 add c_3,t_2,c_3
1203 lduw ap(5),a_5
1204 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1205 addcc c_12,t_1,t_1
1206 bcs,a %xcc,.+8
1207 add c_3,t_2,c_3
1208 srlx t_1,32,c_12
1209 stuw t_1,rp(4) !r[4]=c2;
1210 or c_12,c_3,c_12
1211
1212 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1213 addcc c_12,t_1,c_12
1214 clr c_3
1215 bcs,a %xcc,.+8
1216 add c_3,t_2,c_3
1217 addcc c_12,t_1,c_12
1218 bcs,a %xcc,.+8
1219 add c_3,t_2,c_3
1220 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1221 addcc c_12,t_1,c_12
1222 bcs,a %xcc,.+8
1223 add c_3,t_2,c_3
1224 addcc c_12,t_1,c_12
1225 bcs,a %xcc,.+8
1226 add c_3,t_2,c_3
1227 lduw ap(6),a_6
1228 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1229 addcc c_12,t_1,c_12
1230 bcs,a %xcc,.+8
1231 add c_3,t_2,c_3
1232 addcc c_12,t_1,t_1
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 srlx t_1,32,c_12
1236 stuw t_1,rp(5) !r[5]=c3;
1237 or c_12,c_3,c_12
1238
1239 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1240 addcc c_12,t_1,c_12
1241 clr c_3
1242 bcs,a %xcc,.+8
1243 add c_3,t_2,c_3
1244 addcc c_12,t_1,c_12
1245 bcs,a %xcc,.+8
1246 add c_3,t_2,c_3
1247 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1248 addcc c_12,t_1,c_12
1249 bcs,a %xcc,.+8
1250 add c_3,t_2,c_3
1251 addcc c_12,t_1,c_12
1252 bcs,a %xcc,.+8
1253 add c_3,t_2,c_3
1254 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1255 addcc c_12,t_1,c_12
1256 bcs,a %xcc,.+8
1257 add c_3,t_2,c_3
1258 addcc c_12,t_1,c_12
1259 bcs,a %xcc,.+8
1260 add c_3,t_2,c_3
1261 lduw ap(7),a_7
1262 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1263 addcc c_12,t_1,t_1
1264 bcs,a %xcc,.+8
1265 add c_3,t_2,c_3
1266 srlx t_1,32,c_12
1267 stuw t_1,rp(6) !r[6]=c1;
1268 or c_12,c_3,c_12
1269
1270 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1271 addcc c_12,t_1,c_12
1272 clr c_3
1273 bcs,a %xcc,.+8
1274 add c_3,t_2,c_3
1275 addcc c_12,t_1,c_12
1276 bcs,a %xcc,.+8
1277 add c_3,t_2,c_3
1278 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1279 addcc c_12,t_1,c_12
1280 bcs,a %xcc,.+8
1281 add c_3,t_2,c_3
1282 addcc c_12,t_1,c_12
1283 bcs,a %xcc,.+8
1284 add c_3,t_2,c_3
1285 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1286 addcc c_12,t_1,c_12
1287 bcs,a %xcc,.+8
1288 add c_3,t_2,c_3
1289 addcc c_12,t_1,c_12
1290 bcs,a %xcc,.+8
1291 add c_3,t_2,c_3
1292 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1293 addcc c_12,t_1,c_12
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 addcc c_12,t_1,t_1
1297 bcs,a %xcc,.+8
1298 add c_3,t_2,c_3
1299 srlx t_1,32,c_12
1300 stuw t_1,rp(7) !r[7]=c2;
1301 or c_12,c_3,c_12
1302
1303 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1304 addcc c_12,t_1,c_12
1305 clr c_3
1306 bcs,a %xcc,.+8
1307 add c_3,t_2,c_3
1308 addcc c_12,t_1,c_12
1309 bcs,a %xcc,.+8
1310 add c_3,t_2,c_3
1311 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1312 addcc c_12,t_1,c_12
1313 bcs,a %xcc,.+8
1314 add c_3,t_2,c_3
1315 addcc c_12,t_1,c_12
1316 bcs,a %xcc,.+8
1317 add c_3,t_2,c_3
1318 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1319 addcc c_12,t_1,c_12
1320 bcs,a %xcc,.+8
1321 add c_3,t_2,c_3
1322 addcc c_12,t_1,c_12
1323 bcs,a %xcc,.+8
1324 add c_3,t_2,c_3
1325 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1326 addcc c_12,t_1,t_1
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 srlx t_1,32,c_12
1330 stuw t_1,rp(8) !r[8]=c3;
1331 or c_12,c_3,c_12
1332
1333 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1334 addcc c_12,t_1,c_12
1335 clr c_3
1336 bcs,a %xcc,.+8
1337 add c_3,t_2,c_3
1338 addcc c_12,t_1,c_12
1339 bcs,a %xcc,.+8
1340 add c_3,t_2,c_3
1341 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1342 addcc c_12,t_1,c_12
1343 bcs,a %xcc,.+8
1344 add c_3,t_2,c_3
1345 addcc c_12,t_1,c_12
1346 bcs,a %xcc,.+8
1347 add c_3,t_2,c_3
1348 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1349 addcc c_12,t_1,c_12
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 addcc c_12,t_1,t_1
1353 bcs,a %xcc,.+8
1354 add c_3,t_2,c_3
1355 srlx t_1,32,c_12
1356 stuw t_1,rp(9) !r[9]=c1;
1357 or c_12,c_3,c_12
1358
1359 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1360 addcc c_12,t_1,c_12
1361 clr c_3
1362 bcs,a %xcc,.+8
1363 add c_3,t_2,c_3
1364 addcc c_12,t_1,c_12
1365 bcs,a %xcc,.+8
1366 add c_3,t_2,c_3
1367 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1368 addcc c_12,t_1,c_12
1369 bcs,a %xcc,.+8
1370 add c_3,t_2,c_3
1371 addcc c_12,t_1,c_12
1372 bcs,a %xcc,.+8
1373 add c_3,t_2,c_3
1374 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1375 addcc c_12,t_1,t_1
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 srlx t_1,32,c_12
1379 stuw t_1,rp(10) !r[10]=c2;
1380 or c_12,c_3,c_12
1381
1382 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1383 addcc c_12,t_1,c_12
1384 clr c_3
1385 bcs,a %xcc,.+8
1386 add c_3,t_2,c_3
1387 addcc c_12,t_1,c_12
1388 bcs,a %xcc,.+8
1389 add c_3,t_2,c_3
1390 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1391 addcc c_12,t_1,c_12
1392 bcs,a %xcc,.+8
1393 add c_3,t_2,c_3
1394 addcc c_12,t_1,t_1
1395 bcs,a %xcc,.+8
1396 add c_3,t_2,c_3
1397 srlx t_1,32,c_12
1398 stuw t_1,rp(11) !r[11]=c3;
1399 or c_12,c_3,c_12
1400
1401 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1402 addcc c_12,t_1,c_12
1403 clr c_3
1404 bcs,a %xcc,.+8
1405 add c_3,t_2,c_3
1406 addcc c_12,t_1,c_12
1407 bcs,a %xcc,.+8
1408 add c_3,t_2,c_3
1409 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1410 addcc c_12,t_1,t_1
1411 bcs,a %xcc,.+8
1412 add c_3,t_2,c_3
1413 srlx t_1,32,c_12
1414 stuw t_1,rp(12) !r[12]=c1;
1415 or c_12,c_3,c_12
1416
1417 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1418 addcc c_12,t_1,c_12
1419 clr c_3
1420 bcs,a %xcc,.+8
1421 add c_3,t_2,c_3
1422 addcc c_12,t_1,t_1
1423 bcs,a %xcc,.+8
1424 add c_3,t_2,c_3
1425 srlx t_1,32,c_12
1426 stuw t_1,rp(13) !r[13]=c2;
1427 or c_12,c_3,c_12
1428
1429 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1430 addcc c_12,t_1,t_1
1431 srlx t_1,32,c_12
1432 stuw t_1,rp(14) !r[14]=c3;
1433 stuw c_12,rp(15) !r[15]=c1;
1434
1435 ret
1436 restore %g0,%g0,%o0
1437
1438.type bn_sqr_comba8,#function
1439.size bn_sqr_comba8,(.-bn_sqr_comba8)
1440
1441.align 32
1442
1443.global bn_sqr_comba4
1444/*
1445 * void bn_sqr_comba4(r,a)
1446 * BN_ULONG *r,*a;
1447 */
1448bn_sqr_comba4:
1449 save %sp,FRAME_SIZE,%sp
1450 mov 1,t_2
1451 lduw ap(0),a_0
1452 sllx t_2,32,t_2
1453 lduw ap(1),a_1
1454 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1455 srlx t_1,32,c_12
1456 stuw t_1,rp(0) !r[0]=c1;
1457
1458 lduw ap(2),a_2
1459 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1460 addcc c_12,t_1,c_12
1461 clr c_3
1462 bcs,a %xcc,.+8
1463 add c_3,t_2,c_3
1464 addcc c_12,t_1,t_1
1465 bcs,a %xcc,.+8
1466 add c_3,t_2,c_3
1467 srlx t_1,32,c_12
1468 stuw t_1,rp(1) !r[1]=c2;
1469 or c_12,c_3,c_12
1470
1471 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1472 addcc c_12,t_1,c_12
1473 clr c_3
1474 bcs,a %xcc,.+8
1475 add c_3,t_2,c_3
1476 addcc c_12,t_1,c_12
1477 bcs,a %xcc,.+8
1478 add c_3,t_2,c_3
1479 lduw ap(3),a_3
1480 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1481 addcc c_12,t_1,t_1
1482 bcs,a %xcc,.+8
1483 add c_3,t_2,c_3
1484 srlx t_1,32,c_12
1485 stuw t_1,rp(2) !r[2]=c3;
1486 or c_12,c_3,c_12
1487
1488 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1489 addcc c_12,t_1,c_12
1490 clr c_3
1491 bcs,a %xcc,.+8
1492 add c_3,t_2,c_3
1493 addcc c_12,t_1,c_12
1494 bcs,a %xcc,.+8
1495 add c_3,t_2,c_3
1496 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1497 addcc c_12,t_1,c_12
1498 bcs,a %xcc,.+8
1499 add c_3,t_2,c_3
1500 addcc c_12,t_1,t_1
1501 bcs,a %xcc,.+8
1502 add c_3,t_2,c_3
1503 srlx t_1,32,c_12
1504 stuw t_1,rp(3) !r[3]=c1;
1505 or c_12,c_3,c_12
1506
1507 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1508 addcc c_12,t_1,c_12
1509 clr c_3
1510 bcs,a %xcc,.+8
1511 add c_3,t_2,c_3
1512 addcc c_12,t_1,c_12
1513 bcs,a %xcc,.+8
1514 add c_3,t_2,c_3
1515 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1516 addcc c_12,t_1,t_1
1517 bcs,a %xcc,.+8
1518 add c_3,t_2,c_3
1519 srlx t_1,32,c_12
1520 stuw t_1,rp(4) !r[4]=c2;
1521 or c_12,c_3,c_12
1522
1523 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1524 addcc c_12,t_1,c_12
1525 clr c_3
1526 bcs,a %xcc,.+8
1527 add c_3,t_2,c_3
1528 addcc c_12,t_1,t_1
1529 bcs,a %xcc,.+8
1530 add c_3,t_2,c_3
1531 srlx t_1,32,c_12
1532 stuw t_1,rp(5) !r[5]=c3;
1533 or c_12,c_3,c_12
1534
1535 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1536 addcc c_12,t_1,t_1
1537 srlx t_1,32,c_12
1538 stuw t_1,rp(6) !r[6]=c1;
1539 stuw c_12,rp(7) !r[7]=c2;
1540
1541 ret
1542 restore %g0,%g0,%o0
1543
1544.type bn_sqr_comba4,#function
1545.size bn_sqr_comba4,(.-bn_sqr_comba4)
1546
1547.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
deleted file mode 100644
index b8fb1e8a25..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
+++ /dev/null
@@ -1,606 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2005
11#
12# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
13# for undertaken effort are multiple. First of all, UltraSPARC is not
14# the whole SPARCv9 universe and other VIS-free implementations deserve
15# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
17# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18# several integrated RSA/DSA accelerator circuits accessible through
19# kernel driver [only(*)], but having decent user-land software
20# implementation is important too. Finally, reasons like desire to
21# experiment with dedicated squaring procedure. Yes, this module
22# implements one, because it was easiest to draft it in SPARCv9
23# instructions...
24
25# (*) Engine accessing the driver in question is on my TODO list.
26# For reference, acceleator is estimated to give 6 to 10 times
27# improvement on single-threaded RSA sign. It should be noted
28# that 6-10x improvement coefficient does not actually mean
29# something extraordinary in terms of absolute [single-threaded]
30# performance, as SPARCv9 instruction set is by all means least
31# suitable for high performance crypto among other 64 bit
32# platforms. 6-10x factor simply places T1 in same performance
33# domain as say AMD64 and IA-64. Improvement of RSA verify don't
34# appear impressive at all, but it's the sign operation which is
35# far more critical/interesting.
36
37# You might notice that inner loops are modulo-scheduled:-) This has
38# essentially negligible impact on UltraSPARC performance, it's
39# Fujitsu SPARC64 V users who should notice and hopefully appreciate
40# the advantage... Currently this module surpasses sparcv9a-mont.pl
41# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
42# module still have hidden potential [see TODO list there], which is
43# estimated to be larger than 20%...
44
45# int bn_mul_mont(
46$rp="%i0"; # BN_ULONG *rp,
47$ap="%i1"; # const BN_ULONG *ap,
48$bp="%i2"; # const BN_ULONG *bp,
49$np="%i3"; # const BN_ULONG *np,
50$n0="%i4"; # const BN_ULONG *n0,
51$num="%i5"; # int num);
52
53$bits=32;
54for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
55if ($bits==64) { $bias=2047; $frame=192; }
56else { $bias=0; $frame=128; }
57
58$car0="%o0";
59$car1="%o1";
60$car2="%o2"; # 1 bit
61$acc0="%o3";
62$acc1="%o4";
63$mask="%g1"; # 32 bits, what a waste...
64$tmp0="%g4";
65$tmp1="%g5";
66
67$i="%l0";
68$j="%l1";
69$mul0="%l2";
70$mul1="%l3";
71$tp="%l4";
72$apj="%l5";
73$npj="%l6";
74$tpj="%l7";
75
76$fname="bn_mul_mont_int";
77
78$code=<<___;
79.section ".text",#alloc,#execinstr
80
81.global $fname
82.align 32
83$fname:
84 cmp %o5,4 ! 128 bits minimum
85 bge,pt %icc,.Lenter
86 sethi %hi(0xffffffff),$mask
87 retl
88 clr %o0
89.align 32
90.Lenter:
91 save %sp,-$frame,%sp
92 sll $num,2,$num ! num*=4
93 or $mask,%lo(0xffffffff),$mask
94 ld [$n0],$n0
95 cmp $ap,$bp
96 and $num,$mask,$num
97 ld [$bp],$mul0 ! bp[0]
98 nop
99
100 add %sp,$bias,%o7 ! real top of stack
101 ld [$ap],$car0 ! ap[0] ! redundant in squaring context
102 sub %o7,$num,%o7
103 ld [$ap+4],$apj ! ap[1]
104 and %o7,-1024,%o7
105 ld [$np],$car1 ! np[0]
106 sub %o7,$bias,%sp ! alloca
107 ld [$np+4],$npj ! np[1]
108 be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
109 mov 12,$j
110
111 mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
112 mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
113 and $car0,$mask,$acc0
114 add %sp,$bias+$frame,$tp
115 ld [$ap+8],$apj !prologue!
116
117 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
118 and $mul1,$mask,$mul1
119
120 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
121 mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
122 srlx $car0,32,$car0
123 add $acc0,$car1,$car1
124 ld [$np+8],$npj !prologue!
125 srlx $car1,32,$car1
126 mov $tmp0,$acc0 !prologue!
127
128.L1st:
129 mulx $apj,$mul0,$tmp0
130 mulx $npj,$mul1,$tmp1
131 add $acc0,$car0,$car0
132 ld [$ap+$j],$apj ! ap[j]
133 and $car0,$mask,$acc0
134 add $acc1,$car1,$car1
135 ld [$np+$j],$npj ! np[j]
136 srlx $car0,32,$car0
137 add $acc0,$car1,$car1
138 add $j,4,$j ! j++
139 mov $tmp0,$acc0
140 st $car1,[$tp]
141 cmp $j,$num
142 mov $tmp1,$acc1
143 srlx $car1,32,$car1
144 bl %icc,.L1st
145 add $tp,4,$tp ! tp++
146!.L1st
147
148 mulx $apj,$mul0,$tmp0 !epilogue!
149 mulx $npj,$mul1,$tmp1
150 add $acc0,$car0,$car0
151 and $car0,$mask,$acc0
152 add $acc1,$car1,$car1
153 srlx $car0,32,$car0
154 add $acc0,$car1,$car1
155 st $car1,[$tp]
156 srlx $car1,32,$car1
157
158 add $tmp0,$car0,$car0
159 and $car0,$mask,$acc0
160 add $tmp1,$car1,$car1
161 srlx $car0,32,$car0
162 add $acc0,$car1,$car1
163 st $car1,[$tp+4]
164 srlx $car1,32,$car1
165
166 add $car0,$car1,$car1
167 st $car1,[$tp+8]
168 srlx $car1,32,$car2
169
170 mov 4,$i ! i++
171 ld [$bp+4],$mul0 ! bp[1]
172.Louter:
173 add %sp,$bias+$frame,$tp
174 ld [$ap],$car0 ! ap[0]
175 ld [$ap+4],$apj ! ap[1]
176 ld [$np],$car1 ! np[0]
177 ld [$np+4],$npj ! np[1]
178 ld [$tp],$tmp1 ! tp[0]
179 ld [$tp+4],$tpj ! tp[1]
180 mov 12,$j
181
182 mulx $car0,$mul0,$car0
183 mulx $apj,$mul0,$tmp0 !prologue!
184 add $tmp1,$car0,$car0
185 ld [$ap+8],$apj !prologue!
186 and $car0,$mask,$acc0
187
188 mulx $n0,$acc0,$mul1
189 and $mul1,$mask,$mul1
190
191 mulx $car1,$mul1,$car1
192 mulx $npj,$mul1,$acc1 !prologue!
193 srlx $car0,32,$car0
194 add $acc0,$car1,$car1
195 ld [$np+8],$npj !prologue!
196 srlx $car1,32,$car1
197 mov $tmp0,$acc0 !prologue!
198
199.Linner:
200 mulx $apj,$mul0,$tmp0
201 mulx $npj,$mul1,$tmp1
202 add $tpj,$car0,$car0
203 ld [$ap+$j],$apj ! ap[j]
204 add $acc0,$car0,$car0
205 add $acc1,$car1,$car1
206 ld [$np+$j],$npj ! np[j]
207 and $car0,$mask,$acc0
208 ld [$tp+8],$tpj ! tp[j]
209 srlx $car0,32,$car0
210 add $acc0,$car1,$car1
211 add $j,4,$j ! j++
212 mov $tmp0,$acc0
213 st $car1,[$tp] ! tp[j-1]
214 srlx $car1,32,$car1
215 mov $tmp1,$acc1
216 cmp $j,$num
217 bl %icc,.Linner
218 add $tp,4,$tp ! tp++
219!.Linner
220
221 mulx $apj,$mul0,$tmp0 !epilogue!
222 mulx $npj,$mul1,$tmp1
223 add $tpj,$car0,$car0
224 add $acc0,$car0,$car0
225 ld [$tp+8],$tpj ! tp[j]
226 and $car0,$mask,$acc0
227 add $acc1,$car1,$car1
228 srlx $car0,32,$car0
229 add $acc0,$car1,$car1
230 st $car1,[$tp] ! tp[j-1]
231 srlx $car1,32,$car1
232
233 add $tpj,$car0,$car0
234 add $tmp0,$car0,$car0
235 and $car0,$mask,$acc0
236 add $tmp1,$car1,$car1
237 add $acc0,$car1,$car1
238 st $car1,[$tp+4] ! tp[j-1]
239 srlx $car0,32,$car0
240 add $i,4,$i ! i++
241 srlx $car1,32,$car1
242
243 add $car0,$car1,$car1
244 cmp $i,$num
245 add $car2,$car1,$car1
246 st $car1,[$tp+8]
247
248 srlx $car1,32,$car2
249 bl,a %icc,.Louter
250 ld [$bp+$i],$mul0 ! bp[i]
251!.Louter
252
253 add $tp,12,$tp
254
255.Ltail:
256 add $np,$num,$np
257 add $rp,$num,$rp
258 mov $tp,$ap
259 sub %g0,$num,%o7 ! k=-num
260 ba .Lsub
261 subcc %g0,%g0,%g0 ! clear %icc.c
262.align 16
263.Lsub:
264 ld [$tp+%o7],%o0
265 ld [$np+%o7],%o1
266 subccc %o0,%o1,%o1 ! tp[j]-np[j]
267 add $rp,%o7,$i
268 add %o7,4,%o7
269 brnz %o7,.Lsub
270 st %o1,[$i]
271 subc $car2,0,$car2 ! handle upmost overflow bit
272 and $tp,$car2,$ap
273 andn $rp,$car2,$np
274 or $ap,$np,$ap
275 sub %g0,$num,%o7
276
277.Lcopy:
278 ld [$ap+%o7],%o0 ! copy or in-place refresh
279 st %g0,[$tp+%o7] ! zap tp
280 st %o0,[$rp+%o7]
281 add %o7,4,%o7
282 brnz %o7,.Lcopy
283 nop
284 mov 1,%i0
285 ret
286 restore
287___
288
289########
290######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
291######## code without following dedicated squaring procedure.
292########
293$sbit="%i2"; # re-use $bp!
294
295$code.=<<___;
296.align 32
297.Lbn_sqr_mont:
298 mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
299 mulx $apj,$mul0,$tmp0 !prologue!
300 and $car0,$mask,$acc0
301 add %sp,$bias+$frame,$tp
302 ld [$ap+8],$apj !prologue!
303
304 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
305 srlx $car0,32,$car0
306 and $mul1,$mask,$mul1
307
308 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
309 mulx $npj,$mul1,$acc1 !prologue!
310 and $car0,1,$sbit
311 ld [$np+8],$npj !prologue!
312 srlx $car0,1,$car0
313 add $acc0,$car1,$car1
314 srlx $car1,32,$car1
315 mov $tmp0,$acc0 !prologue!
316
317.Lsqr_1st:
318 mulx $apj,$mul0,$tmp0
319 mulx $npj,$mul1,$tmp1
320 add $acc0,$car0,$car0 ! ap[j]*a0+c0
321 add $acc1,$car1,$car1
322 ld [$ap+$j],$apj ! ap[j]
323 and $car0,$mask,$acc0
324 ld [$np+$j],$npj ! np[j]
325 srlx $car0,32,$car0
326 add $acc0,$acc0,$acc0
327 or $sbit,$acc0,$acc0
328 mov $tmp1,$acc1
329 srlx $acc0,32,$sbit
330 add $j,4,$j ! j++
331 and $acc0,$mask,$acc0
332 cmp $j,$num
333 add $acc0,$car1,$car1
334 st $car1,[$tp]
335 mov $tmp0,$acc0
336 srlx $car1,32,$car1
337 bl %icc,.Lsqr_1st
338 add $tp,4,$tp ! tp++
339!.Lsqr_1st
340
341 mulx $apj,$mul0,$tmp0 ! epilogue
342 mulx $npj,$mul1,$tmp1
343 add $acc0,$car0,$car0 ! ap[j]*a0+c0
344 add $acc1,$car1,$car1
345 and $car0,$mask,$acc0
346 srlx $car0,32,$car0
347 add $acc0,$acc0,$acc0
348 or $sbit,$acc0,$acc0
349 srlx $acc0,32,$sbit
350 and $acc0,$mask,$acc0
351 add $acc0,$car1,$car1
352 st $car1,[$tp]
353 srlx $car1,32,$car1
354
355 add $tmp0,$car0,$car0 ! ap[j]*a0+c0
356 add $tmp1,$car1,$car1
357 and $car0,$mask,$acc0
358 srlx $car0,32,$car0
359 add $acc0,$acc0,$acc0
360 or $sbit,$acc0,$acc0
361 srlx $acc0,32,$sbit
362 and $acc0,$mask,$acc0
363 add $acc0,$car1,$car1
364 st $car1,[$tp+4]
365 srlx $car1,32,$car1
366
367 add $car0,$car0,$car0
368 or $sbit,$car0,$car0
369 add $car0,$car1,$car1
370 st $car1,[$tp+8]
371 srlx $car1,32,$car2
372
373 ld [%sp+$bias+$frame],$tmp0 ! tp[0]
374 ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
375 ld [%sp+$bias+$frame+8],$tpj ! tp[2]
376 ld [$ap+4],$mul0 ! ap[1]
377 ld [$ap+8],$apj ! ap[2]
378 ld [$np],$car1 ! np[0]
379 ld [$np+4],$npj ! np[1]
380 mulx $n0,$tmp0,$mul1
381
382 mulx $mul0,$mul0,$car0
383 and $mul1,$mask,$mul1
384
385 mulx $car1,$mul1,$car1
386 mulx $npj,$mul1,$acc1
387 add $tmp0,$car1,$car1
388 and $car0,$mask,$acc0
389 ld [$np+8],$npj ! np[2]
390 srlx $car1,32,$car1
391 add $tmp1,$car1,$car1
392 srlx $car0,32,$car0
393 add $acc0,$car1,$car1
394 and $car0,1,$sbit
395 add $acc1,$car1,$car1
396 srlx $car0,1,$car0
397 mov 12,$j
398 st $car1,[%sp+$bias+$frame] ! tp[0]=
399 srlx $car1,32,$car1
400 add %sp,$bias+$frame+4,$tp
401
402.Lsqr_2nd:
403 mulx $apj,$mul0,$acc0
404 mulx $npj,$mul1,$acc1
405 add $acc0,$car0,$car0
406 add $tpj,$car1,$car1
407 ld [$ap+$j],$apj ! ap[j]
408 and $car0,$mask,$acc0
409 ld [$np+$j],$npj ! np[j]
410 srlx $car0,32,$car0
411 add $acc1,$car1,$car1
412 ld [$tp+8],$tpj ! tp[j]
413 add $acc0,$acc0,$acc0
414 add $j,4,$j ! j++
415 or $sbit,$acc0,$acc0
416 srlx $acc0,32,$sbit
417 and $acc0,$mask,$acc0
418 cmp $j,$num
419 add $acc0,$car1,$car1
420 st $car1,[$tp] ! tp[j-1]
421 srlx $car1,32,$car1
422 bl %icc,.Lsqr_2nd
423 add $tp,4,$tp ! tp++
424!.Lsqr_2nd
425
426 mulx $apj,$mul0,$acc0
427 mulx $npj,$mul1,$acc1
428 add $acc0,$car0,$car0
429 add $tpj,$car1,$car1
430 and $car0,$mask,$acc0
431 srlx $car0,32,$car0
432 add $acc1,$car1,$car1
433 add $acc0,$acc0,$acc0
434 or $sbit,$acc0,$acc0
435 srlx $acc0,32,$sbit
436 and $acc0,$mask,$acc0
437 add $acc0,$car1,$car1
438 st $car1,[$tp] ! tp[j-1]
439 srlx $car1,32,$car1
440
441 add $car0,$car0,$car0
442 or $sbit,$car0,$car0
443 add $car0,$car1,$car1
444 add $car2,$car1,$car1
445 st $car1,[$tp+4]
446 srlx $car1,32,$car2
447
448 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
449 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
450 ld [$ap+8],$mul0 ! ap[2]
451 ld [$np],$car1 ! np[0]
452 ld [$np+4],$npj ! np[1]
453 mulx $n0,$tmp1,$mul1
454 and $mul1,$mask,$mul1
455 mov 8,$i
456
457 mulx $mul0,$mul0,$car0
458 mulx $car1,$mul1,$car1
459 and $car0,$mask,$acc0
460 add $tmp1,$car1,$car1
461 srlx $car0,32,$car0
462 add %sp,$bias+$frame,$tp
463 srlx $car1,32,$car1
464 and $car0,1,$sbit
465 srlx $car0,1,$car0
466 mov 4,$j
467
468.Lsqr_outer:
469.Lsqr_inner1:
470 mulx $npj,$mul1,$acc1
471 add $tpj,$car1,$car1
472 add $j,4,$j
473 ld [$tp+8],$tpj
474 cmp $j,$i
475 add $acc1,$car1,$car1
476 ld [$np+$j],$npj
477 st $car1,[$tp]
478 srlx $car1,32,$car1
479 bl %icc,.Lsqr_inner1
480 add $tp,4,$tp
481!.Lsqr_inner1
482
483 add $j,4,$j
484 ld [$ap+$j],$apj ! ap[j]
485 mulx $npj,$mul1,$acc1
486 add $tpj,$car1,$car1
487 ld [$np+$j],$npj ! np[j]
488 add $acc0,$car1,$car1
489 ld [$tp+8],$tpj ! tp[j]
490 add $acc1,$car1,$car1
491 st $car1,[$tp]
492 srlx $car1,32,$car1
493
494 add $j,4,$j
495 cmp $j,$num
496 be,pn %icc,.Lsqr_no_inner2
497 add $tp,4,$tp
498
499.Lsqr_inner2:
500 mulx $apj,$mul0,$acc0
501 mulx $npj,$mul1,$acc1
502 add $tpj,$car1,$car1
503 add $acc0,$car0,$car0
504 ld [$ap+$j],$apj ! ap[j]
505 and $car0,$mask,$acc0
506 ld [$np+$j],$npj ! np[j]
507 srlx $car0,32,$car0
508 add $acc0,$acc0,$acc0
509 ld [$tp+8],$tpj ! tp[j]
510 or $sbit,$acc0,$acc0
511 add $j,4,$j ! j++
512 srlx $acc0,32,$sbit
513 and $acc0,$mask,$acc0
514 cmp $j,$num
515 add $acc0,$car1,$car1
516 add $acc1,$car1,$car1
517 st $car1,[$tp] ! tp[j-1]
518 srlx $car1,32,$car1
519 bl %icc,.Lsqr_inner2
520 add $tp,4,$tp ! tp++
521
522.Lsqr_no_inner2:
523 mulx $apj,$mul0,$acc0
524 mulx $npj,$mul1,$acc1
525 add $tpj,$car1,$car1
526 add $acc0,$car0,$car0
527 and $car0,$mask,$acc0
528 srlx $car0,32,$car0
529 add $acc0,$acc0,$acc0
530 or $sbit,$acc0,$acc0
531 srlx $acc0,32,$sbit
532 and $acc0,$mask,$acc0
533 add $acc0,$car1,$car1
534 add $acc1,$car1,$car1
535 st $car1,[$tp] ! tp[j-1]
536 srlx $car1,32,$car1
537
538 add $car0,$car0,$car0
539 or $sbit,$car0,$car0
540 add $car0,$car1,$car1
541 add $car2,$car1,$car1
542 st $car1,[$tp+4]
543 srlx $car1,32,$car2
544
545 add $i,4,$i ! i++
546 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
547 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
548 ld [$ap+$i],$mul0 ! ap[j]
549 ld [$np],$car1 ! np[0]
550 ld [$np+4],$npj ! np[1]
551 mulx $n0,$tmp1,$mul1
552 and $mul1,$mask,$mul1
553 add $i,4,$tmp0
554
555 mulx $mul0,$mul0,$car0
556 mulx $car1,$mul1,$car1
557 and $car0,$mask,$acc0
558 add $tmp1,$car1,$car1
559 srlx $car0,32,$car0
560 add %sp,$bias+$frame,$tp
561 srlx $car1,32,$car1
562 and $car0,1,$sbit
563 srlx $car0,1,$car0
564
565 cmp $tmp0,$num ! i<num-1
566 bl %icc,.Lsqr_outer
567 mov 4,$j
568
569.Lsqr_last:
570 mulx $npj,$mul1,$acc1
571 add $tpj,$car1,$car1
572 add $j,4,$j
573 ld [$tp+8],$tpj
574 cmp $j,$i
575 add $acc1,$car1,$car1
576 ld [$np+$j],$npj
577 st $car1,[$tp]
578 srlx $car1,32,$car1
579 bl %icc,.Lsqr_last
580 add $tp,4,$tp
581!.Lsqr_last
582
583 mulx $npj,$mul1,$acc1
584 add $tpj,$car1,$car1
585 add $acc0,$car1,$car1
586 add $acc1,$car1,$car1
587 st $car1,[$tp]
588 srlx $car1,32,$car1
589
590 add $car0,$car0,$car0 ! recover $car0
591 or $sbit,$car0,$car0
592 add $car0,$car1,$car1
593 add $car2,$car1,$car1
594 st $car1,[$tp+4]
595 srlx $car1,32,$car2
596
597 ba .Ltail
598 add $tp,8,$tp
599.type $fname,#function
600.size $fname,(.-$fname)
601.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
602.align 32
603___
604$code =~ s/\`([^\`]*)\`/eval($1)/gem;
605print $code;
606close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
deleted file mode 100755
index a14205f2f0..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
+++ /dev/null
@@ -1,882 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
13# Because unlike integer multiplier, which simply stalls whole CPU,
14# FPU is fully pipelined and can effectively emit 48 bit partial
15# product every cycle. Why not blended SPARC v9? One can argue that
16# making this module dependent on UltraSPARC VIS extension limits its
17# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
18# implementations from compatibility matrix. But the rest, whole Sun
19# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
20# VIS extension instructions used in this module. This is considered
21# good enough to not care about HAL SPARC64 users [if any] who have
22# integer-only pure SPARCv9 module to "fall down" to.
23
24# USI&II cores currently exhibit uniform 2x improvement [over pre-
25# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
26# performance improves few percents for shorter keys and worsens few
27# percents for longer keys. This is because USIII integer multiplier
28# is >3x faster than USI&II one, which is harder to match [but see
29# TODO list below]. It should also be noted that SPARC64 V features
30# out-of-order execution, which *might* mean that integer multiplier
31# is pipelined, which in turn *might* be impossible to match... On
32# additional note, SPARC64 V implements FP Multiply-Add instruction,
33# which is perfectly usable in this context... In other words, as far
34# as Fujitsu SPARC64 V goes, talk to the author:-)
35
36# The implementation implies following "non-natural" limitations on
37# input arguments:
38# - num may not be less than 4;
39# - num has to be even;
40# Failure to meet either condition has no fatal effects, simply
41# doesn't give any performance gain.
42
43# TODO:
44# - modulo-schedule inner loop for better performance (on in-order
45# execution core such as UltraSPARC this shall result in further
46# noticeable(!) improvement);
47# - dedicated squaring procedure[?];
48
49######################################################################
50# November 2006
51#
52# Modulo-scheduled inner loops allow to interleave floating point and
53# integer instructions and minimize Read-After-Write penalties. This
54# results in *further* 20-50% perfromance improvement [depending on
55# key length, more for longer keys] on USI&II cores and 30-80% - on
56# USIII&IV.
57
58$fname="bn_mul_mont_fpu";
59$bits=32;
60for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
61
62if ($bits==64) {
63 $bias=2047;
64 $frame=192;
65} else {
66 $bias=0;
67 $frame=128; # 96 rounded up to largest known cache-line
68}
69$locals=64;
70
71# In order to provide for 32-/64-bit ABI duality, I keep integers wider
72# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
73# exclusively for pointers, indexes and other small values...
74# int bn_mul_mont(
75$rp="%i0"; # BN_ULONG *rp,
76$ap="%i1"; # const BN_ULONG *ap,
77$bp="%i2"; # const BN_ULONG *bp,
78$np="%i3"; # const BN_ULONG *np,
79$n0="%i4"; # const BN_ULONG *n0,
80$num="%i5"; # int num);
81
82$tp="%l0"; # t[num]
83$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved
84$ap_h="%l2"; # to these four vectors as double-precision FP values.
85$np_l="%l3"; # This way a bunch of fxtods are eliminated in second
86$np_h="%l4"; # loop and L1-cache aliasing is minimized...
87$i="%l5";
88$j="%l6";
89$mask="%l7"; # 16-bit mask, 0xffff
90
91$n0="%g4"; # reassigned(!) to "64-bit" register
92$carry="%i4"; # %i4 reused(!) for a carry bit
93
94# FP register naming chart
95#
96# ..HILO
97# dcba
98# --------
99# LOa
100# LOb
101# LOc
102# LOd
103# HIa
104# HIb
105# HIc
106# HId
107# ..a
108# ..b
109$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6";
110$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14";
111$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19";
112$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23";
113
114$dota="%f24"; $dotb="%f26";
115
116$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
117$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
118$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
119$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
120
121$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
122
123$code=<<___;
124.section ".text",#alloc,#execinstr
125
126.global $fname
127.align 32
128$fname:
129 save %sp,-$frame-$locals,%sp
130
131 cmp $num,4
132 bl,a,pn %icc,.Lret
133 clr %i0
134 andcc $num,1,%g0 ! $num has to be even...
135 bnz,a,pn %icc,.Lret
136 clr %i0 ! signal "unsupported input value"
137
138 srl $num,1,$num
139 sethi %hi(0xffff),$mask
140 ld [%i4+0],$n0 ! $n0 reassigned, remember?
141 or $mask,%lo(0xffff),$mask
142 ld [%i4+4],%o0
143 sllx %o0,32,%o0
144 or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
145
146 sll $num,3,$num ! num*=8
147
148 add %sp,$bias,%o0 ! real top of stack
149 sll $num,2,%o1
150 add %o1,$num,%o1 ! %o1=num*5
151 sub %o0,%o1,%o0
152 and %o0,-2048,%o0 ! optimize TLB utilization
153 sub %o0,$bias,%sp ! alloca(5*num*8)
154
155 rd %asi,%o7 ! save %asi
156 add %sp,$bias+$frame+$locals,$tp
157 add $tp,$num,$ap_l
158 add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends !
159 add $ap_l,$num,$ap_h
160 add $ap_h,$num,$np_l
161 add $np_l,$num,$np_h
162
163 wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads
164
165 add $rp,$num,$rp ! readjust input pointers to point
166 add $ap,$num,$ap ! at the ends too...
167 add $bp,$num,$bp
168 add $np,$num,$np
169
170 stx %o7,[%sp+$bias+$frame+48] ! save %asi
171
172 sub %g0,$num,$i ! i=-num
173 sub %g0,$num,$j ! j=-num
174
175 add $ap,$j,%o3
176 add $bp,$i,%o4
177
178 ld [%o3+4],%g1 ! bp[0]
179 ld [%o3+0],%o0
180 ld [%o4+4],%g5 ! ap[0]
181 sllx %g1,32,%g1
182 ld [%o4+0],%o1
183 sllx %g5,32,%g5
184 or %g1,%o0,%o0
185 or %g5,%o1,%o1
186
187 add $np,$j,%o5
188
189 mulx %o1,%o0,%o0 ! ap[0]*bp[0]
190 mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
191 stx %o0,[%sp+$bias+$frame+0]
192
193 ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
194 fzeros $alo
195 ld [%o3+4],$ahi_
196 fzeros $ahi
197 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
198 fzeros $nlo
199 ld [%o5+4],$nhi_
200 fzeros $nhi
201
202 ! transfer b[i] to FPU as 4x16-bit values
203 ldda [%o4+2]%asi,$ba
204 fxtod $alo,$alo
205 ldda [%o4+0]%asi,$bb
206 fxtod $ahi,$ahi
207 ldda [%o4+6]%asi,$bc
208 fxtod $nlo,$nlo
209 ldda [%o4+4]%asi,$bd
210 fxtod $nhi,$nhi
211
212 ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
213 ldda [%sp+$bias+$frame+6]%asi,$na
214 fxtod $ba,$ba
215 ldda [%sp+$bias+$frame+4]%asi,$nb
216 fxtod $bb,$bb
217 ldda [%sp+$bias+$frame+2]%asi,$nc
218 fxtod $bc,$bc
219 ldda [%sp+$bias+$frame+0]%asi,$nd
220 fxtod $bd,$bd
221
222 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
223 fxtod $na,$na
224 std $ahi,[$ap_h+$j]
225 fxtod $nb,$nb
226 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
227 fxtod $nc,$nc
228 std $nhi,[$np_h+$j]
229 fxtod $nd,$nd
230
231 fmuld $alo,$ba,$aloa
232 fmuld $nlo,$na,$nloa
233 fmuld $alo,$bb,$alob
234 fmuld $nlo,$nb,$nlob
235 fmuld $alo,$bc,$aloc
236 faddd $aloa,$nloa,$nloa
237 fmuld $nlo,$nc,$nloc
238 fmuld $alo,$bd,$alod
239 faddd $alob,$nlob,$nlob
240 fmuld $nlo,$nd,$nlod
241 fmuld $ahi,$ba,$ahia
242 faddd $aloc,$nloc,$nloc
243 fmuld $nhi,$na,$nhia
244 fmuld $ahi,$bb,$ahib
245 faddd $alod,$nlod,$nlod
246 fmuld $nhi,$nb,$nhib
247 fmuld $ahi,$bc,$ahic
248 faddd $ahia,$nhia,$nhia
249 fmuld $nhi,$nc,$nhic
250 fmuld $ahi,$bd,$ahid
251 faddd $ahib,$nhib,$nhib
252 fmuld $nhi,$nd,$nhid
253
254 faddd $ahic,$nhic,$dota ! $nhic
255 faddd $ahid,$nhid,$dotb ! $nhid
256
257 faddd $nloc,$nhia,$nloc
258 faddd $nlod,$nhib,$nlod
259
260 fdtox $nloa,$nloa
261 fdtox $nlob,$nlob
262 fdtox $nloc,$nloc
263 fdtox $nlod,$nlod
264
265 std $nloa,[%sp+$bias+$frame+0]
266 add $j,8,$j
267 std $nlob,[%sp+$bias+$frame+8]
268 add $ap,$j,%o4
269 std $nloc,[%sp+$bias+$frame+16]
270 add $np,$j,%o5
271 std $nlod,[%sp+$bias+$frame+24]
272
273 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
274 fzeros $alo
275 ld [%o4+4],$ahi_
276 fzeros $ahi
277 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
278 fzeros $nlo
279 ld [%o5+4],$nhi_
280 fzeros $nhi
281
282 fxtod $alo,$alo
283 fxtod $ahi,$ahi
284 fxtod $nlo,$nlo
285 fxtod $nhi,$nhi
286
287 ldx [%sp+$bias+$frame+0],%o0
288 fmuld $alo,$ba,$aloa
289 ldx [%sp+$bias+$frame+8],%o1
290 fmuld $nlo,$na,$nloa
291 ldx [%sp+$bias+$frame+16],%o2
292 fmuld $alo,$bb,$alob
293 ldx [%sp+$bias+$frame+24],%o3
294 fmuld $nlo,$nb,$nlob
295
296 srlx %o0,16,%o7
297 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
298 fmuld $alo,$bc,$aloc
299 add %o7,%o1,%o1
300 std $ahi,[$ap_h+$j]
301 faddd $aloa,$nloa,$nloa
302 fmuld $nlo,$nc,$nloc
303 srlx %o1,16,%o7
304 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
305 fmuld $alo,$bd,$alod
306 add %o7,%o2,%o2
307 std $nhi,[$np_h+$j]
308 faddd $alob,$nlob,$nlob
309 fmuld $nlo,$nd,$nlod
310 srlx %o2,16,%o7
311 fmuld $ahi,$ba,$ahia
312 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
313 faddd $aloc,$nloc,$nloc
314 fmuld $nhi,$na,$nhia
315 !and %o0,$mask,%o0
316 !and %o1,$mask,%o1
317 !and %o2,$mask,%o2
318 !sllx %o1,16,%o1
319 !sllx %o2,32,%o2
320 !sllx %o3,48,%o7
321 !or %o1,%o0,%o0
322 !or %o2,%o0,%o0
323 !or %o7,%o0,%o0 ! 64-bit result
324 srlx %o3,16,%g1 ! 34-bit carry
325 fmuld $ahi,$bb,$ahib
326
327 faddd $alod,$nlod,$nlod
328 fmuld $nhi,$nb,$nhib
329 fmuld $ahi,$bc,$ahic
330 faddd $ahia,$nhia,$nhia
331 fmuld $nhi,$nc,$nhic
332 fmuld $ahi,$bd,$ahid
333 faddd $ahib,$nhib,$nhib
334 fmuld $nhi,$nd,$nhid
335
336 faddd $dota,$nloa,$nloa
337 faddd $dotb,$nlob,$nlob
338 faddd $ahic,$nhic,$dota ! $nhic
339 faddd $ahid,$nhid,$dotb ! $nhid
340
341 faddd $nloc,$nhia,$nloc
342 faddd $nlod,$nhib,$nlod
343
344 fdtox $nloa,$nloa
345 fdtox $nlob,$nlob
346 fdtox $nloc,$nloc
347 fdtox $nlod,$nlod
348
349 std $nloa,[%sp+$bias+$frame+0]
350 std $nlob,[%sp+$bias+$frame+8]
351 addcc $j,8,$j
352 std $nloc,[%sp+$bias+$frame+16]
353 bz,pn %icc,.L1stskip
354 std $nlod,[%sp+$bias+$frame+24]
355
356.align 32 ! incidentally already aligned !
357.L1st:
358 add $ap,$j,%o4
359 add $np,$j,%o5
360 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
361 fzeros $alo
362 ld [%o4+4],$ahi_
363 fzeros $ahi
364 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
365 fzeros $nlo
366 ld [%o5+4],$nhi_
367 fzeros $nhi
368
369 fxtod $alo,$alo
370 fxtod $ahi,$ahi
371 fxtod $nlo,$nlo
372 fxtod $nhi,$nhi
373
374 ldx [%sp+$bias+$frame+0],%o0
375 fmuld $alo,$ba,$aloa
376 ldx [%sp+$bias+$frame+8],%o1
377 fmuld $nlo,$na,$nloa
378 ldx [%sp+$bias+$frame+16],%o2
379 fmuld $alo,$bb,$alob
380 ldx [%sp+$bias+$frame+24],%o3
381 fmuld $nlo,$nb,$nlob
382
383 srlx %o0,16,%o7
384 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
385 fmuld $alo,$bc,$aloc
386 add %o7,%o1,%o1
387 std $ahi,[$ap_h+$j]
388 faddd $aloa,$nloa,$nloa
389 fmuld $nlo,$nc,$nloc
390 srlx %o1,16,%o7
391 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
392 fmuld $alo,$bd,$alod
393 add %o7,%o2,%o2
394 std $nhi,[$np_h+$j]
395 faddd $alob,$nlob,$nlob
396 fmuld $nlo,$nd,$nlod
397 srlx %o2,16,%o7
398 fmuld $ahi,$ba,$ahia
399 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
400 and %o0,$mask,%o0
401 faddd $aloc,$nloc,$nloc
402 fmuld $nhi,$na,$nhia
403 and %o1,$mask,%o1
404 and %o2,$mask,%o2
405 fmuld $ahi,$bb,$ahib
406 sllx %o1,16,%o1
407 faddd $alod,$nlod,$nlod
408 fmuld $nhi,$nb,$nhib
409 sllx %o2,32,%o2
410 fmuld $ahi,$bc,$ahic
411 sllx %o3,48,%o7
412 or %o1,%o0,%o0
413 faddd $ahia,$nhia,$nhia
414 fmuld $nhi,$nc,$nhic
415 or %o2,%o0,%o0
416 fmuld $ahi,$bd,$ahid
417 or %o7,%o0,%o0 ! 64-bit result
418 faddd $ahib,$nhib,$nhib
419 fmuld $nhi,$nd,$nhid
420 addcc %g1,%o0,%o0
421 faddd $dota,$nloa,$nloa
422 srlx %o3,16,%g1 ! 34-bit carry
423 faddd $dotb,$nlob,$nlob
424 bcs,a %xcc,.+8
425 add %g1,1,%g1
426
427 stx %o0,[$tp] ! tp[j-1]=
428
429 faddd $ahic,$nhic,$dota ! $nhic
430 faddd $ahid,$nhid,$dotb ! $nhid
431
432 faddd $nloc,$nhia,$nloc
433 faddd $nlod,$nhib,$nlod
434
435 fdtox $nloa,$nloa
436 fdtox $nlob,$nlob
437 fdtox $nloc,$nloc
438 fdtox $nlod,$nlod
439
440 std $nloa,[%sp+$bias+$frame+0]
441 std $nlob,[%sp+$bias+$frame+8]
442 std $nloc,[%sp+$bias+$frame+16]
443 std $nlod,[%sp+$bias+$frame+24]
444
445 addcc $j,8,$j
446 bnz,pt %icc,.L1st
447 add $tp,8,$tp
448
449.L1stskip:
450 fdtox $dota,$dota
451 fdtox $dotb,$dotb
452
453 ldx [%sp+$bias+$frame+0],%o0
454 ldx [%sp+$bias+$frame+8],%o1
455 ldx [%sp+$bias+$frame+16],%o2
456 ldx [%sp+$bias+$frame+24],%o3
457
458 srlx %o0,16,%o7
459 std $dota,[%sp+$bias+$frame+32]
460 add %o7,%o1,%o1
461 std $dotb,[%sp+$bias+$frame+40]
462 srlx %o1,16,%o7
463 add %o7,%o2,%o2
464 srlx %o2,16,%o7
465 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
466 and %o0,$mask,%o0
467 and %o1,$mask,%o1
468 and %o2,$mask,%o2
469 sllx %o1,16,%o1
470 sllx %o2,32,%o2
471 sllx %o3,48,%o7
472 or %o1,%o0,%o0
473 or %o2,%o0,%o0
474 or %o7,%o0,%o0 ! 64-bit result
475 ldx [%sp+$bias+$frame+32],%o4
476 addcc %g1,%o0,%o0
477 ldx [%sp+$bias+$frame+40],%o5
478 srlx %o3,16,%g1 ! 34-bit carry
479 bcs,a %xcc,.+8
480 add %g1,1,%g1
481
482 stx %o0,[$tp] ! tp[j-1]=
483 add $tp,8,$tp
484
485 srlx %o4,16,%o7
486 add %o7,%o5,%o5
487 and %o4,$mask,%o4
488 sllx %o5,16,%o7
489 or %o7,%o4,%o4
490 addcc %g1,%o4,%o4
491 srlx %o5,48,%g1
492 bcs,a %xcc,.+8
493 add %g1,1,%g1
494
495 mov %g1,$carry
496 stx %o4,[$tp] ! tp[num-1]=
497
498 ba .Louter
499 add $i,8,$i
500.align 32
501.Louter:
502 sub %g0,$num,$j ! j=-num
503 add %sp,$bias+$frame+$locals,$tp
504
505 add $ap,$j,%o3
506 add $bp,$i,%o4
507
508 ld [%o3+4],%g1 ! bp[i]
509 ld [%o3+0],%o0
510 ld [%o4+4],%g5 ! ap[0]
511 sllx %g1,32,%g1
512 ld [%o4+0],%o1
513 sllx %g5,32,%g5
514 or %g1,%o0,%o0
515 or %g5,%o1,%o1
516
517 ldx [$tp],%o2 ! tp[0]
518 mulx %o1,%o0,%o0
519 addcc %o2,%o0,%o0
520 mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
521 stx %o0,[%sp+$bias+$frame+0]
522
523 ! transfer b[i] to FPU as 4x16-bit values
524 ldda [%o4+2]%asi,$ba
525 ldda [%o4+0]%asi,$bb
526 ldda [%o4+6]%asi,$bc
527 ldda [%o4+4]%asi,$bd
528
529 ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
530 ldda [%sp+$bias+$frame+6]%asi,$na
531 fxtod $ba,$ba
532 ldda [%sp+$bias+$frame+4]%asi,$nb
533 fxtod $bb,$bb
534 ldda [%sp+$bias+$frame+2]%asi,$nc
535 fxtod $bc,$bc
536 ldda [%sp+$bias+$frame+0]%asi,$nd
537 fxtod $bd,$bd
538 ldd [$ap_l+$j],$alo ! load a[j] in double format
539 fxtod $na,$na
540 ldd [$ap_h+$j],$ahi
541 fxtod $nb,$nb
542 ldd [$np_l+$j],$nlo ! load n[j] in double format
543 fxtod $nc,$nc
544 ldd [$np_h+$j],$nhi
545 fxtod $nd,$nd
546
547 fmuld $alo,$ba,$aloa
548 fmuld $nlo,$na,$nloa
549 fmuld $alo,$bb,$alob
550 fmuld $nlo,$nb,$nlob
551 fmuld $alo,$bc,$aloc
552 faddd $aloa,$nloa,$nloa
553 fmuld $nlo,$nc,$nloc
554 fmuld $alo,$bd,$alod
555 faddd $alob,$nlob,$nlob
556 fmuld $nlo,$nd,$nlod
557 fmuld $ahi,$ba,$ahia
558 faddd $aloc,$nloc,$nloc
559 fmuld $nhi,$na,$nhia
560 fmuld $ahi,$bb,$ahib
561 faddd $alod,$nlod,$nlod
562 fmuld $nhi,$nb,$nhib
563 fmuld $ahi,$bc,$ahic
564 faddd $ahia,$nhia,$nhia
565 fmuld $nhi,$nc,$nhic
566 fmuld $ahi,$bd,$ahid
567 faddd $ahib,$nhib,$nhib
568 fmuld $nhi,$nd,$nhid
569
570 faddd $ahic,$nhic,$dota ! $nhic
571 faddd $ahid,$nhid,$dotb ! $nhid
572
573 faddd $nloc,$nhia,$nloc
574 faddd $nlod,$nhib,$nlod
575
576 fdtox $nloa,$nloa
577 fdtox $nlob,$nlob
578 fdtox $nloc,$nloc
579 fdtox $nlod,$nlod
580
581 std $nloa,[%sp+$bias+$frame+0]
582 std $nlob,[%sp+$bias+$frame+8]
583 std $nloc,[%sp+$bias+$frame+16]
584 add $j,8,$j
585 std $nlod,[%sp+$bias+$frame+24]
586
587 ldd [$ap_l+$j],$alo ! load a[j] in double format
588 ldd [$ap_h+$j],$ahi
589 ldd [$np_l+$j],$nlo ! load n[j] in double format
590 ldd [$np_h+$j],$nhi
591
592 fmuld $alo,$ba,$aloa
593 fmuld $nlo,$na,$nloa
594 fmuld $alo,$bb,$alob
595 fmuld $nlo,$nb,$nlob
596 fmuld $alo,$bc,$aloc
597 ldx [%sp+$bias+$frame+0],%o0
598 faddd $aloa,$nloa,$nloa
599 fmuld $nlo,$nc,$nloc
600 ldx [%sp+$bias+$frame+8],%o1
601 fmuld $alo,$bd,$alod
602 ldx [%sp+$bias+$frame+16],%o2
603 faddd $alob,$nlob,$nlob
604 fmuld $nlo,$nd,$nlod
605 ldx [%sp+$bias+$frame+24],%o3
606 fmuld $ahi,$ba,$ahia
607
608 srlx %o0,16,%o7
609 faddd $aloc,$nloc,$nloc
610 fmuld $nhi,$na,$nhia
611 add %o7,%o1,%o1
612 fmuld $ahi,$bb,$ahib
613 srlx %o1,16,%o7
614 faddd $alod,$nlod,$nlod
615 fmuld $nhi,$nb,$nhib
616 add %o7,%o2,%o2
617 fmuld $ahi,$bc,$ahic
618 srlx %o2,16,%o7
619 faddd $ahia,$nhia,$nhia
620 fmuld $nhi,$nc,$nhic
621 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
622 ! why?
623 and %o0,$mask,%o0
624 fmuld $ahi,$bd,$ahid
625 and %o1,$mask,%o1
626 and %o2,$mask,%o2
627 faddd $ahib,$nhib,$nhib
628 fmuld $nhi,$nd,$nhid
629 sllx %o1,16,%o1
630 faddd $dota,$nloa,$nloa
631 sllx %o2,32,%o2
632 faddd $dotb,$nlob,$nlob
633 sllx %o3,48,%o7
634 or %o1,%o0,%o0
635 faddd $ahic,$nhic,$dota ! $nhic
636 or %o2,%o0,%o0
637 faddd $ahid,$nhid,$dotb ! $nhid
638 or %o7,%o0,%o0 ! 64-bit result
639 ldx [$tp],%o7
640 faddd $nloc,$nhia,$nloc
641 addcc %o7,%o0,%o0
642 ! end-of-why?
643 faddd $nlod,$nhib,$nlod
644 srlx %o3,16,%g1 ! 34-bit carry
645 fdtox $nloa,$nloa
646 bcs,a %xcc,.+8
647 add %g1,1,%g1
648
649 fdtox $nlob,$nlob
650 fdtox $nloc,$nloc
651 fdtox $nlod,$nlod
652
653 std $nloa,[%sp+$bias+$frame+0]
654 std $nlob,[%sp+$bias+$frame+8]
655 addcc $j,8,$j
656 std $nloc,[%sp+$bias+$frame+16]
657 bz,pn %icc,.Linnerskip
658 std $nlod,[%sp+$bias+$frame+24]
659
660 ba .Linner
661 nop
662.align 32
663.Linner:
664 ldd [$ap_l+$j],$alo ! load a[j] in double format
665 ldd [$ap_h+$j],$ahi
666 ldd [$np_l+$j],$nlo ! load n[j] in double format
667 ldd [$np_h+$j],$nhi
668
669 fmuld $alo,$ba,$aloa
670 fmuld $nlo,$na,$nloa
671 fmuld $alo,$bb,$alob
672 fmuld $nlo,$nb,$nlob
673 fmuld $alo,$bc,$aloc
674 ldx [%sp+$bias+$frame+0],%o0
675 faddd $aloa,$nloa,$nloa
676 fmuld $nlo,$nc,$nloc
677 ldx [%sp+$bias+$frame+8],%o1
678 fmuld $alo,$bd,$alod
679 ldx [%sp+$bias+$frame+16],%o2
680 faddd $alob,$nlob,$nlob
681 fmuld $nlo,$nd,$nlod
682 ldx [%sp+$bias+$frame+24],%o3
683 fmuld $ahi,$ba,$ahia
684
685 srlx %o0,16,%o7
686 faddd $aloc,$nloc,$nloc
687 fmuld $nhi,$na,$nhia
688 add %o7,%o1,%o1
689 fmuld $ahi,$bb,$ahib
690 srlx %o1,16,%o7
691 faddd $alod,$nlod,$nlod
692 fmuld $nhi,$nb,$nhib
693 add %o7,%o2,%o2
694 fmuld $ahi,$bc,$ahic
695 srlx %o2,16,%o7
696 faddd $ahia,$nhia,$nhia
697 fmuld $nhi,$nc,$nhic
698 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
699 and %o0,$mask,%o0
700 fmuld $ahi,$bd,$ahid
701 and %o1,$mask,%o1
702 and %o2,$mask,%o2
703 faddd $ahib,$nhib,$nhib
704 fmuld $nhi,$nd,$nhid
705 sllx %o1,16,%o1
706 faddd $dota,$nloa,$nloa
707 sllx %o2,32,%o2
708 faddd $dotb,$nlob,$nlob
709 sllx %o3,48,%o7
710 or %o1,%o0,%o0
711 faddd $ahic,$nhic,$dota ! $nhic
712 or %o2,%o0,%o0
713 faddd $ahid,$nhid,$dotb ! $nhid
714 or %o7,%o0,%o0 ! 64-bit result
715 faddd $nloc,$nhia,$nloc
716 addcc %g1,%o0,%o0
717 ldx [$tp+8],%o7 ! tp[j]
718 faddd $nlod,$nhib,$nlod
719 srlx %o3,16,%g1 ! 34-bit carry
720 fdtox $nloa,$nloa
721 bcs,a %xcc,.+8
722 add %g1,1,%g1
723 fdtox $nlob,$nlob
724 addcc %o7,%o0,%o0
725 fdtox $nloc,$nloc
726 bcs,a %xcc,.+8
727 add %g1,1,%g1
728
729 stx %o0,[$tp] ! tp[j-1]
730 fdtox $nlod,$nlod
731
732 std $nloa,[%sp+$bias+$frame+0]
733 std $nlob,[%sp+$bias+$frame+8]
734 std $nloc,[%sp+$bias+$frame+16]
735 addcc $j,8,$j
736 std $nlod,[%sp+$bias+$frame+24]
737 bnz,pt %icc,.Linner
738 add $tp,8,$tp
739
740.Linnerskip:
741 fdtox $dota,$dota
742 fdtox $dotb,$dotb
743
744 ldx [%sp+$bias+$frame+0],%o0
745 ldx [%sp+$bias+$frame+8],%o1
746 ldx [%sp+$bias+$frame+16],%o2
747 ldx [%sp+$bias+$frame+24],%o3
748
749 srlx %o0,16,%o7
750 std $dota,[%sp+$bias+$frame+32]
751 add %o7,%o1,%o1
752 std $dotb,[%sp+$bias+$frame+40]
753 srlx %o1,16,%o7
754 add %o7,%o2,%o2
755 srlx %o2,16,%o7
756 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
757 and %o0,$mask,%o0
758 and %o1,$mask,%o1
759 and %o2,$mask,%o2
760 sllx %o1,16,%o1
761 sllx %o2,32,%o2
762 sllx %o3,48,%o7
763 or %o1,%o0,%o0
764 or %o2,%o0,%o0
765 ldx [%sp+$bias+$frame+32],%o4
766 or %o7,%o0,%o0 ! 64-bit result
767 ldx [%sp+$bias+$frame+40],%o5
768 addcc %g1,%o0,%o0
769 ldx [$tp+8],%o7 ! tp[j]
770 srlx %o3,16,%g1 ! 34-bit carry
771 bcs,a %xcc,.+8
772 add %g1,1,%g1
773
774 addcc %o7,%o0,%o0
775 bcs,a %xcc,.+8
776 add %g1,1,%g1
777
778 stx %o0,[$tp] ! tp[j-1]
779 add $tp,8,$tp
780
781 srlx %o4,16,%o7
782 add %o7,%o5,%o5
783 and %o4,$mask,%o4
784 sllx %o5,16,%o7
785 or %o7,%o4,%o4
786 addcc %g1,%o4,%o4
787 srlx %o5,48,%g1
788 bcs,a %xcc,.+8
789 add %g1,1,%g1
790
791 addcc $carry,%o4,%o4
792 stx %o4,[$tp] ! tp[num-1]
793 mov %g1,$carry
794 bcs,a %xcc,.+8
795 add $carry,1,$carry
796
797 addcc $i,8,$i
798 bnz %icc,.Louter
799 nop
800
801 add $tp,8,$tp ! adjust tp to point at the end
802 orn %g0,%g0,%g4
803 sub %g0,$num,%o7 ! n=-num
804 ba .Lsub
805 subcc %g0,%g0,%g0 ! clear %icc.c
806
807.align 32
808.Lsub:
809 ldx [$tp+%o7],%o0
810 add $np,%o7,%g1
811 ld [%g1+0],%o2
812 ld [%g1+4],%o3
813 srlx %o0,32,%o1
814 subccc %o0,%o2,%o2
815 add $rp,%o7,%g1
816 subccc %o1,%o3,%o3
817 st %o2,[%g1+0]
818 add %o7,8,%o7
819 brnz,pt %o7,.Lsub
820 st %o3,[%g1+4]
821 subc $carry,0,%g4
822 sub %g0,$num,%o7 ! n=-num
823 ba .Lcopy
824 nop
825
826.align 32
827.Lcopy:
828 ldx [$tp+%o7],%o0
829 add $rp,%o7,%g1
830 ld [%g1+0],%o2
831 ld [%g1+4],%o3
832 stx %g0,[$tp+%o7]
833 and %o0,%g4,%o0
834 srlx %o0,32,%o1
835 andn %o2,%g4,%o2
836 andn %o3,%g4,%o3
837 or %o2,%o0,%o0
838 or %o3,%o1,%o1
839 st %o0,[%g1+0]
840 add %o7,8,%o7
841 brnz,pt %o7,.Lcopy
842 st %o1,[%g1+4]
843 sub %g0,$num,%o7 ! n=-num
844
845.Lzap:
846 stx %g0,[$ap_l+%o7]
847 stx %g0,[$ap_h+%o7]
848 stx %g0,[$np_l+%o7]
849 stx %g0,[$np_h+%o7]
850 add %o7,8,%o7
851 brnz,pt %o7,.Lzap
852 nop
853
854 ldx [%sp+$bias+$frame+48],%o7
855 wr %g0,%o7,%asi ! restore %asi
856
857 mov 1,%i0
858.Lret:
859 ret
860 restore
861.type $fname,#function
862.size $fname,(.-$fname)
863.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
864.align 32
865___
866
867$code =~ s/\`([^\`]*)\`/eval($1)/gem;
868
869# Below substitution makes it possible to compile without demanding
870# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
871# dare to do this, because VIS capability is detected at run-time now
872# and this routine is not called on CPU not capable to execute it. Do
873# note that fzeros is not the only VIS dependency! Another dependency
874# is implicit and is just _a_ numerical value loaded to %asi register,
875# which assembler can't recognize as VIS specific...
876$code =~ s/fzeros\s+%f([0-9]+)/
877 sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
878 /gem;
879
880print $code;
881# flush
882close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/via-mont.pl b/src/lib/libcrypto/bn/asm/via-mont.pl
deleted file mode 100644
index c046a514c8..0000000000
--- a/src/lib/libcrypto/bn/asm/via-mont.pl
+++ /dev/null
@@ -1,242 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# Wrapper around 'rep montmul', VIA-specific instruction accessing
11# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
12# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
13#
14# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
15# different software configurations on 1.5GHz VIA Esther processor.
16# Lines marked with "software integer" denote performance of hand-
17# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
18# refers to hand-coded SSE2 Montgomery multiplication procedure found
19# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
20# Padlock SDK 2.0.1 available for download from VIA, which naturally
21# utilizes the magic 'repz montmul' instruction. And finally "hardware
22# this" refers to *this* implementation which also uses 'repz montmul'
23#
24# sign verify sign/s verify/s
25# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer
26# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2
27# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK
28# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this
29#
30# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer
31# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2
32# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK
33# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this
34#
35# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer
36# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2
37# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK
38# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this
39#
40# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer
41# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2
42# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK
43# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this
44#
45# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer
46# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2
47# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK
48# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this
49#
50# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer
51# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2
52# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK
53# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this
54#
55# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer
56# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2
57# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK
58# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this
59#
60# To give you some other reference point here is output for 2.4GHz P4
61# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
62# SSE2" in above terms.
63#
64# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0
65# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0
66# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9
67# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3
68# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1
69# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
70# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
71#
72# Conclusions:
73# - VIA SDK leaves a *lot* of room for improvement (which this
74# implementation successfully fills:-);
75# - 'rep montmul' gives up to >3x performance improvement depending on
76# key length;
77# - in terms of absolute performance it delivers approximately as much
78# as modern out-of-order 32-bit cores [again, for longer keys].
79
80$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
81push(@INC,"${dir}","${dir}../../perlasm");
82require "x86asm.pl";
83
84&asm_init($ARGV[0],"via-mont.pl");
85
86# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
87$func="bn_mul_mont_padlock";
88
89$pad=16*1; # amount of reserved bytes on top of every vector
90
91# stack layout
92$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA
93$A=&DWP(4,"esp");
94$B=&DWP(8,"esp");
95$T=&DWP(12,"esp");
96$M=&DWP(16,"esp");
97$scratch=&DWP(20,"esp");
98$rp=&DWP(24,"esp"); # these are mine
99$sp=&DWP(28,"esp");
100# &DWP(32,"esp") # 32 byte scratch area
101# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num]
102# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num]
103# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num]
104# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num]
105# Note that SDK suggests to unconditionally allocate 2K per vector. This
106# has quite an impact on performance. It naturally depends on key length,
107# but to give an example 1024 bit private RSA key operations suffer >30%
108# penalty. I allocate only as much as actually required...
109
110&function_begin($func);
111 &xor ("eax","eax");
112 &mov ("ecx",&wparam(5)); # num
113 # meet VIA's limitations for num [note that the specification
114 # expresses them in bits, while we work with amount of 32-bit words]
115 &test ("ecx",3);
116 &jnz (&label("leave")); # num % 4 != 0
117 &cmp ("ecx",8);
118 &jb (&label("leave")); # num < 8
119 &cmp ("ecx",1024);
120 &ja (&label("leave")); # num > 1024
121
122 &pushf ();
123 &cld ();
124
125 &mov ("edi",&wparam(0)); # rp
126 &mov ("eax",&wparam(1)); # ap
127 &mov ("ebx",&wparam(2)); # bp
128 &mov ("edx",&wparam(3)); # np
129 &mov ("esi",&wparam(4)); # n0
130 &mov ("esi",&DWP(0,"esi")); # *n0
131
132 &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes
133 &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes
134 &neg ("ebp");
135 &add ("ebp","esp");
136 &and ("ebp",-64); # align to cache-line
137 &xchg ("ebp","esp"); # alloca
138
139 &mov ($rp,"edi"); # save rp
140 &mov ($sp,"ebp"); # save esp
141
142 &mov ($mZeroPrime,"esi");
143 &lea ("esi",&DWP(64,"esp")); # tp
144 &mov ($T,"esi");
145 &lea ("edi",&DWP(32,"esp")); # scratch area
146 &mov ($scratch,"edi");
147 &mov ("esi","eax");
148
149 &lea ("ebp",&DWP(-$pad,"ecx"));
150 &shr ("ebp",2); # restore original num value in ebp
151
152 &xor ("eax","eax");
153
154 &mov ("ecx","ebp");
155 &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
156 &data_byte(0xf3,0xab); # rep stosl, bzero
157
158 &mov ("ecx","ebp");
159 &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
160 &mov ($A,"edi");
161 &data_byte(0xf3,0xa5); # rep movsl, memcpy
162 &mov ("ecx",$pad/4);
163 &data_byte(0xf3,0xab); # rep stosl, bzero pad
164 # edi points at the end of padded ap copy...
165
166 &mov ("ecx","ebp");
167 &mov ("esi","ebx");
168 &mov ($B,"edi");
169 &data_byte(0xf3,0xa5); # rep movsl, memcpy
170 &mov ("ecx",$pad/4);
171 &data_byte(0xf3,0xab); # rep stosl, bzero pad
172 # edi points at the end of padded bp copy...
173
174 &mov ("ecx","ebp");
175 &mov ("esi","edx");
176 &mov ($M,"edi");
177 &data_byte(0xf3,0xa5); # rep movsl, memcpy
178 &mov ("ecx",$pad/4);
179 &data_byte(0xf3,0xab); # rep stosl, bzero pad
180 # edi points at the end of padded np copy...
181
182 # let magic happen...
183 &mov ("ecx","ebp");
184 &mov ("esi","esp");
185 &shl ("ecx",5); # convert word counter to bit counter
186 &align (4);
187 &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
188
189 &mov ("ecx","ebp");
190 &lea ("esi",&DWP(64,"esp")); # tp
191 # edi still points at the end of padded np copy...
192 &neg ("ebp");
193 &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind"
194 &mov ("edi",$rp); # restore rp
195 &xor ("edx","edx"); # i=0 and clear CF
196
197&set_label("sub",8);
198 &mov ("eax",&DWP(0,"esi","edx",4));
199 &sbb ("eax",&DWP(0,"ebp","edx",4));
200 &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i]
201 &lea ("edx",&DWP(1,"edx")); # i++
202 &loop (&label("sub")); # doesn't affect CF!
203
204 &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit
205 &sbb ("eax",0);
206 &and ("esi","eax");
207 &not ("eax");
208 &mov ("ebp","edi");
209 &and ("ebp","eax");
210 &or ("esi","ebp"); # tp=carry?tp:rp
211
212 &mov ("ecx","edx"); # num
213 &xor ("edx","edx"); # i=0
214
215&set_label("copy",8);
216 &mov ("eax",&DWP(0,"esi","edx",4));
217 &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp
218 &mov (&DWP(0,"edi","edx",4),"eax");
219 &lea ("edx",&DWP(1,"edx")); # i++
220 &loop (&label("copy"));
221
222 &mov ("ebp",$sp);
223 &xor ("eax","eax");
224
225 &mov ("ecx",64/4);
226 &mov ("edi","esp"); # zap frame including scratch area
227 &data_byte(0xf3,0xab); # rep stosl, bzero
228
229 # zap copies of ap, bp and np
230 &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
231 &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2));
232 &data_byte(0xf3,0xab); # rep stosl, bzero
233
234 &mov ("esp","ebp");
235 &inc ("eax"); # signal "done"
236 &popf ();
237&set_label("leave");
238&function_end($func);
239
240&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
241
242&asm_finish();
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
deleted file mode 100755
index 5cd3cd2ed5..0000000000
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ /dev/null
@@ -1,591 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# This is a "teaser" code, as it can be improved in several ways...
13# First of all non-SSE2 path should be implemented (yes, for now it
14# performs Montgomery multiplication/convolution only on SSE2-capable
15# CPUs such as P4, others fall down to original code). Then inner loop
16# can be unrolled and modulo-scheduled to improve ILP and possibly
17# moved to 128-bit XMM register bank (though it would require input
18# rearrangement and/or increase bus bandwidth utilization). Dedicated
19# squaring procedure should give further performance improvement...
20# Yet, for being draft, the code improves rsa512 *sign* benchmark by
21# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
22
23# December 2006
24#
25# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
26# Integer-only code [being equipped with dedicated squaring procedure]
27# gives ~40% on rsa512 sign benchmark...
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30push(@INC,"${dir}","${dir}../../perlasm");
31require "x86asm.pl";
32
33&asm_init($ARGV[0],$0);
34
35$sse2=0;
36for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37
38&external_label("OPENSSL_ia32cap_P") if ($sse2);
39
40&function_begin("bn_mul_mont");
41
42$i="edx";
43$j="ecx";
44$ap="esi"; $tp="esi"; # overlapping variables!!!
45$rp="edi"; $bp="edi"; # overlapping variables!!!
46$np="ebp";
47$num="ebx";
48
49$_num=&DWP(4*0,"esp"); # stack top layout
50$_rp=&DWP(4*1,"esp");
51$_ap=&DWP(4*2,"esp");
52$_bp=&DWP(4*3,"esp");
53$_np=&DWP(4*4,"esp");
54$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
55$_sp=&DWP(4*6,"esp");
56$_bpend=&DWP(4*7,"esp");
57$frame=32; # size of above frame rounded up to 16n
58
59 &xor ("eax","eax");
60 &mov ("edi",&wparam(5)); # int num
61 &cmp ("edi",4);
62 &jl (&label("just_leave"));
63
64 &lea ("esi",&wparam(0)); # put aside pointer to argument block
65 &lea ("edx",&wparam(1)); # load ap
66 &mov ("ebp","esp"); # saved stack pointer!
67 &add ("edi",2); # extra two words on top of tp
68 &neg ("edi");
69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70 &neg ("edi");
71
72 # minimize cache contention by arraning 2K window between stack
73 # pointer and ap argument [np is also position sensitive vector,
74 # but it's assumed to be near ap, as it's allocated at ~same
75 # time].
76 &mov ("eax","esp");
77 &sub ("eax","edx");
78 &and ("eax",2047);
79 &sub ("esp","eax"); # this aligns sp and ap modulo 2048
80
81 &xor ("edx","esp");
82 &and ("edx",2048);
83 &xor ("edx",2048);
84 &sub ("esp","edx"); # this splits them apart modulo 4096
85
86 &and ("esp",-64); # align to cache line
87
88 ################################# load argument block...
89 &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
90 &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
91 &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
92 &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
93 &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
94 #&mov ("edi",&DWP(5*4,"esi"));# int num
95
96 &mov ("esi",&DWP(0,"esi")); # pull n0[0]
97 &mov ($_rp,"eax"); # ... save a copy of argument block
98 &mov ($_ap,"ebx");
99 &mov ($_bp,"ecx");
100 &mov ($_np,"edx");
101 &mov ($_n0,"esi");
102 &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
103 #&mov ($_num,$num); # redundant as $num is not reused
104 &mov ($_sp,"ebp"); # saved stack pointer!
105
106if($sse2) {
107$acc0="mm0"; # mmx register bank layout
108$acc1="mm1";
109$car0="mm2";
110$car1="mm3";
111$mul0="mm4";
112$mul1="mm5";
113$temp="mm6";
114$mask="mm7";
115
116 &picmeup("eax","OPENSSL_ia32cap_P");
117 &bt (&DWP(0,"eax"),26);
118 &jnc (&label("non_sse2"));
119
120 &mov ("eax",-1);
121 &movd ($mask,"eax"); # mask 32 lower bits
122
123 &mov ($ap,$_ap); # load input pointers
124 &mov ($bp,$_bp);
125 &mov ($np,$_np);
126
127 &xor ($i,$i); # i=0
128 &xor ($j,$j); # j=0
129
130 &movd ($mul0,&DWP(0,$bp)); # bp[0]
131 &movd ($mul1,&DWP(0,$ap)); # ap[0]
132 &movd ($car1,&DWP(0,$np)); # np[0]
133
134 &pmuludq($mul1,$mul0); # ap[0]*bp[0]
135 &movq ($car0,$mul1);
136 &movq ($acc0,$mul1); # I wish movd worked for
137 &pand ($acc0,$mask); # inter-register transfers
138
139 &pmuludq($mul1,$_n0q); # *=n0
140
141 &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
142 &paddq ($car1,$acc0);
143
144 &movd ($acc1,&DWP(4,$np)); # np[1]
145 &movd ($acc0,&DWP(4,$ap)); # ap[1]
146
147 &psrlq ($car0,32);
148 &psrlq ($car1,32);
149
150 &inc ($j); # j++
151&set_label("1st",16);
152 &pmuludq($acc0,$mul0); # ap[j]*bp[0]
153 &pmuludq($acc1,$mul1); # np[j]*m1
154 &paddq ($car0,$acc0); # +=c0
155 &paddq ($car1,$acc1); # +=c1
156
157 &movq ($acc0,$car0);
158 &pand ($acc0,$mask);
159 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
160 &paddq ($car1,$acc0); # +=ap[j]*bp[0];
161 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
162 &psrlq ($car0,32);
163 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
164 &psrlq ($car1,32);
165
166 &lea ($j,&DWP(1,$j));
167 &cmp ($j,$num);
168 &jl (&label("1st"));
169
170 &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
171 &pmuludq($acc1,$mul1); # np[num-1]*m1
172 &paddq ($car0,$acc0); # +=c0
173 &paddq ($car1,$acc1); # +=c1
174
175 &movq ($acc0,$car0);
176 &pand ($acc0,$mask);
177 &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
178 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
179
180 &psrlq ($car0,32);
181 &psrlq ($car1,32);
182
183 &paddq ($car1,$car0);
184 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
185
186 &inc ($i); # i++
187&set_label("outer");
188 &xor ($j,$j); # j=0
189
190 &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
191 &movd ($mul1,&DWP(0,$ap)); # ap[0]
192 &movd ($temp,&DWP($frame,"esp")); # tp[0]
193 &movd ($car1,&DWP(0,$np)); # np[0]
194 &pmuludq($mul1,$mul0); # ap[0]*bp[i]
195
196 &paddq ($mul1,$temp); # +=tp[0]
197 &movq ($acc0,$mul1);
198 &movq ($car0,$mul1);
199 &pand ($acc0,$mask);
200
201 &pmuludq($mul1,$_n0q); # *=n0
202
203 &pmuludq($car1,$mul1);
204 &paddq ($car1,$acc0);
205
206 &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
207 &movd ($acc1,&DWP(4,$np)); # np[1]
208 &movd ($acc0,&DWP(4,$ap)); # ap[1]
209
210 &psrlq ($car0,32);
211 &psrlq ($car1,32);
212 &paddq ($car0,$temp); # +=tp[1]
213
214 &inc ($j); # j++
215 &dec ($num);
216&set_label("inner");
217 &pmuludq($acc0,$mul0); # ap[j]*bp[i]
218 &pmuludq($acc1,$mul1); # np[j]*m1
219 &paddq ($car0,$acc0); # +=c0
220 &paddq ($car1,$acc1); # +=c1
221
222 &movq ($acc0,$car0);
223 &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
224 &pand ($acc0,$mask);
225 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
226 &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
227 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
228 &psrlq ($car0,32);
229 &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
230 &psrlq ($car1,32);
231 &paddq ($car0,$temp); # +=tp[j+1]
232
233 &dec ($num);
234 &lea ($j,&DWP(1,$j)); # j++
235 &jnz (&label("inner"));
236
237 &mov ($num,$j);
238 &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
239 &pmuludq($acc1,$mul1); # np[num-1]*m1
240 &paddq ($car0,$acc0); # +=c0
241 &paddq ($car1,$acc1); # +=c1
242
243 &movq ($acc0,$car0);
244 &pand ($acc0,$mask);
245 &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
246 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
247 &psrlq ($car0,32);
248 &psrlq ($car1,32);
249
250 &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
251 &paddq ($car1,$car0);
252 &paddq ($car1,$temp);
253 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
254
255 &lea ($i,&DWP(1,$i)); # i++
256 &cmp ($i,$num);
257 &jle (&label("outer"));
258
259 &emms (); # done with mmx bank
260 &jmp (&label("common_tail"));
261
262&set_label("non_sse2",16);
263}
264
265if (0) {
266 &mov ("esp",$_sp);
267 &xor ("eax","eax"); # signal "not fast enough [yet]"
268 &jmp (&label("just_leave"));
269 # While the below code provides competitive performance for
270 # all key lengthes on modern Intel cores, it's still more
271 # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272 # means compared to the original integer-only assembler.
273 # 512-bit RSA sign is better by ~40%, but that's about all
274 # one can say about all CPUs...
275} else {
276$inp="esi"; # integer path uses these registers differently
277$word="edi";
278$carry="ebp";
279
280 &mov ($inp,$_ap);
281 &lea ($carry,&DWP(1,$num));
282 &mov ($word,$_bp);
283 &xor ($j,$j); # j=0
284 &mov ("edx",$inp);
285 &and ($carry,1); # see if num is even
286 &sub ("edx",$word); # see if ap==bp
287 &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
288 &or ($carry,"edx");
289 &mov ($word,&DWP(0,$word)); # bp[0]
290 &jz (&label("bn_sqr_mont"));
291 &mov ($_bpend,"eax");
292 &mov ("eax",&DWP(0,$inp));
293 &xor ("edx","edx");
294
295&set_label("mull",16);
296 &mov ($carry,"edx");
297 &mul ($word); # ap[j]*bp[0]
298 &add ($carry,"eax");
299 &lea ($j,&DWP(1,$j));
300 &adc ("edx",0);
301 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
302 &cmp ($j,$num);
303 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
304 &jl (&label("mull"));
305
306 &mov ($carry,"edx");
307 &mul ($word); # ap[num-1]*bp[0]
308 &mov ($word,$_n0);
309 &add ("eax",$carry);
310 &mov ($inp,$_np);
311 &adc ("edx",0);
312 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
313
314 &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
315 &xor ($j,$j);
316 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
317 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
318
319 &mov ("eax",&DWP(0,$inp)); # np[0]
320 &mul ($word); # np[0]*m
321 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
322 &mov ("eax",&DWP(4,$inp)); # np[1]
323 &adc ("edx",0);
324 &inc ($j);
325
326 &jmp (&label("2ndmadd"));
327
328&set_label("1stmadd",16);
329 &mov ($carry,"edx");
330 &mul ($word); # ap[j]*bp[i]
331 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
332 &lea ($j,&DWP(1,$j));
333 &adc ("edx",0);
334 &add ($carry,"eax");
335 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
336 &adc ("edx",0);
337 &cmp ($j,$num);
338 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
339 &jl (&label("1stmadd"));
340
341 &mov ($carry,"edx");
342 &mul ($word); # ap[num-1]*bp[i]
343 &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
344 &mov ($word,$_n0);
345 &adc ("edx",0);
346 &mov ($inp,$_np);
347 &add ($carry,"eax");
348 &adc ("edx",0);
349 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
350
351 &xor ($j,$j);
352 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
353 &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
354 &adc ($j,0);
355 &mov ("eax",&DWP(0,$inp)); # np[0]
356 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
357 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
358
359 &mul ($word); # np[0]*m
360 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
361 &mov ("eax",&DWP(4,$inp)); # np[1]
362 &adc ("edx",0);
363 &mov ($j,1);
364
365&set_label("2ndmadd",16);
366 &mov ($carry,"edx");
367 &mul ($word); # np[j]*m
368 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
369 &lea ($j,&DWP(1,$j));
370 &adc ("edx",0);
371 &add ($carry,"eax");
372 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
373 &adc ("edx",0);
374 &cmp ($j,$num);
375 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
376 &jl (&label("2ndmadd"));
377
378 &mov ($carry,"edx");
379 &mul ($word); # np[j]*m
380 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
381 &adc ("edx",0);
382 &add ($carry,"eax");
383 &adc ("edx",0);
384 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
385
386 &xor ("eax","eax");
387 &mov ($j,$_bp); # &bp[i]
388 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
389 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
390 &lea ($j,&DWP(4,$j));
391 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
392 &cmp ($j,$_bpend);
393 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
394 &je (&label("common_tail"));
395
396 &mov ($word,&DWP(0,$j)); # bp[i+1]
397 &mov ($inp,$_ap);
398 &mov ($_bp,$j); # &bp[++i]
399 &xor ($j,$j);
400 &xor ("edx","edx");
401 &mov ("eax",&DWP(0,$inp));
402 &jmp (&label("1stmadd"));
403
404&set_label("bn_sqr_mont",16);
405$sbit=$num;
406 &mov ($_num,$num);
407 &mov ($_bp,$j); # i=0
408
409 &mov ("eax",$word); # ap[0]
410 &mul ($word); # ap[0]*ap[0]
411 &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
412 &mov ($sbit,"edx");
413 &shr ("edx",1);
414 &and ($sbit,1);
415 &inc ($j);
416&set_label("sqr",16);
417 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
418 &mov ($carry,"edx");
419 &mul ($word); # ap[j]*ap[0]
420 &add ("eax",$carry);
421 &lea ($j,&DWP(1,$j));
422 &adc ("edx",0);
423 &lea ($carry,&DWP(0,$sbit,"eax",2));
424 &shr ("eax",31);
425 &cmp ($j,$_num);
426 &mov ($sbit,"eax");
427 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
428 &jl (&label("sqr"));
429
430 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
431 &mov ($carry,"edx");
432 &mul ($word); # ap[num-1]*ap[0]
433 &add ("eax",$carry);
434 &mov ($word,$_n0);
435 &adc ("edx",0);
436 &mov ($inp,$_np);
437 &lea ($carry,&DWP(0,$sbit,"eax",2));
438 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
439 &shr ("eax",31);
440 &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
441
442 &lea ($carry,&DWP(0,"eax","edx",2));
443 &mov ("eax",&DWP(0,$inp)); # np[0]
444 &shr ("edx",31);
445 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
446 &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
447
448 &mul ($word); # np[0]*m
449 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
450 &mov ($num,$j);
451 &adc ("edx",0);
452 &mov ("eax",&DWP(4,$inp)); # np[1]
453 &mov ($j,1);
454
455&set_label("3rdmadd",16);
456 &mov ($carry,"edx");
457 &mul ($word); # np[j]*m
458 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
459 &adc ("edx",0);
460 &add ($carry,"eax");
461 &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
462 &adc ("edx",0);
463 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
464
465 &mov ($carry,"edx");
466 &mul ($word); # np[j+1]*m
467 &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
468 &lea ($j,&DWP(2,$j));
469 &adc ("edx",0);
470 &add ($carry,"eax");
471 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
472 &adc ("edx",0);
473 &cmp ($j,$num);
474 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
475 &jl (&label("3rdmadd"));
476
477 &mov ($carry,"edx");
478 &mul ($word); # np[j]*m
479 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
480 &adc ("edx",0);
481 &add ($carry,"eax");
482 &adc ("edx",0);
483 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
484
485 &mov ($j,$_bp); # i
486 &xor ("eax","eax");
487 &mov ($inp,$_ap);
488 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
489 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
490 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
491 &cmp ($j,$num);
492 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
493 &je (&label("common_tail"));
494
495 &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
496 &lea ($j,&DWP(1,$j));
497 &mov ("eax",$word);
498 &mov ($_bp,$j); # ++i
499 &mul ($word); # ap[i]*ap[i]
500 &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
501 &adc ("edx",0);
502 &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
503 &xor ($carry,$carry);
504 &cmp ($j,$num);
505 &lea ($j,&DWP(1,$j));
506 &je (&label("sqrlast"));
507
508 &mov ($sbit,"edx"); # zaps $num
509 &shr ("edx",1);
510 &and ($sbit,1);
511&set_label("sqradd",16);
512 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
513 &mov ($carry,"edx");
514 &mul ($word); # ap[j]*ap[i]
515 &add ("eax",$carry);
516 &lea ($carry,&DWP(0,"eax","eax"));
517 &adc ("edx",0);
518 &shr ("eax",31);
519 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
520 &lea ($j,&DWP(1,$j));
521 &adc ("eax",0);
522 &add ($carry,$sbit);
523 &adc ("eax",0);
524 &cmp ($j,$_num);
525 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
526 &mov ($sbit,"eax");
527 &jle (&label("sqradd"));
528
529 &mov ($carry,"edx");
530 &lea ("edx",&DWP(0,$sbit,"edx",2));
531 &shr ($carry,31);
532&set_label("sqrlast");
533 &mov ($word,$_n0);
534 &mov ($inp,$_np);
535 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
536
537 &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
538 &mov ("eax",&DWP(0,$inp)); # np[0]
539 &adc ($carry,0);
540 &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
541 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
542
543 &mul ($word); # np[0]*m
544 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
545 &lea ($num,&DWP(-1,$j));
546 &adc ("edx",0);
547 &mov ($j,1);
548 &mov ("eax",&DWP(4,$inp)); # np[1]
549
550 &jmp (&label("3rdmadd"));
551}
552
553&set_label("common_tail",16);
554 &mov ($np,$_np); # load modulus pointer
555 &mov ($rp,$_rp); # load result pointer
556 &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
557
558 &mov ("eax",&DWP(0,$tp)); # tp[0]
559 &mov ($j,$num); # j=num-1
560 &xor ($i,$i); # i=0 and clear CF!
561
562&set_label("sub",16);
563 &sbb ("eax",&DWP(0,$np,$i,4));
564 &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
565 &dec ($j); # doesn't affect CF!
566 &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
567 &lea ($i,&DWP(1,$i)); # i++
568 &jge (&label("sub"));
569
570 &sbb ("eax",0); # handle upmost overflow bit
571 &and ($tp,"eax");
572 &not ("eax");
573 &mov ($np,$rp);
574 &and ($np,"eax");
575 &or ($tp,$np); # tp=carry?tp:rp
576
577&set_label("copy",16); # copy or in-place refresh
578 &mov ("eax",&DWP(0,$tp,$num,4));
579 &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
580 &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
581 &dec ($num);
582 &jge (&label("copy"));
583
584 &mov ("esp",$_sp); # pull saved stack pointer
585 &mov ("eax",1);
586&set_label("just_leave");
587&function_end("bn_mul_mont");
588
589&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
590
591&asm_finish();
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
deleted file mode 100644
index 1bc4f1bb27..0000000000
--- a/src/lib/libcrypto/bn/asm/x86.pl
+++ /dev/null
@@ -1,28 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
deleted file mode 100644
index 0b5cf583e3..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/add.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
deleted file mode 100644
index 2291253629..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ /dev/null
@@ -1,277 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
deleted file mode 100644
index 0e90152caa..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/div.pl
+++ /dev/null
@@ -1,15 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
deleted file mode 100644
index 674cb9b055..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul.pl
+++ /dev/null
@@ -1,77 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
deleted file mode 100644
index 61830d3a90..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul_add.pl
+++ /dev/null
@@ -1,87 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
deleted file mode 100644
index 1f90993cf6..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sqr.pl
+++ /dev/null
@@ -1,60 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
deleted file mode 100644
index 837b0e1b07..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sub.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
deleted file mode 100644
index f13f52dd85..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ /dev/null
@@ -1,597 +0,0 @@
1#ifdef __SUNPRO_C
2# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
3#else
4/*
5 * x86_64 BIGNUM accelerator version 0.1, December 2002.
6 *
7 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 *
14 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
15 * versions, like 1.0...
16 * A. Well, that's because this code is basically a quick-n-dirty
17 * proof-of-concept hack. As you can see it's implemented with
18 * inline assembler, which means that you're bound to GCC and that
19 * there might be enough room for further improvement.
20 *
21 * Q. Why inline assembler?
22 * A. x86_64 features own ABI which I'm not familiar with. This is
23 * why I decided to let the compiler take care of subroutine
24 * prologue/epilogue as well as register allocation. For reference.
25 * Win64 implements different ABI for AMD64, different from Linux.
26 *
27 * Q. How much faster does it get?
28 * A. 'apps/openssl speed rsa dsa' output with no-asm:
29 *
30 * sign verify sign/s verify/s
31 * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
32 * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
33 * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
34 * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
35 * sign verify sign/s verify/s
36 * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
37 * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
38 * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
39 *
40 * 'apps/openssl speed rsa dsa' output with this module:
41 *
42 * sign verify sign/s verify/s
43 * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
44 * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
45 * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
46 * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
47 * sign verify sign/s verify/s
48 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
49 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
50 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
51 *
52 * For the reference. IA-32 assembler implementation performs
53 * very much like 64-bit code compiled with no-asm on the same
54 * machine.
55 */
56
57#define BN_ULONG unsigned long
58
59/*
60 * "m"(a), "+m"(r) is the way to favor DirectPath �-code;
61 * "g"(0) let the compiler to decide where does it
62 * want to keep the value of zero;
63 */
64#define mul_add(r,a,word,carry) do { \
65 register BN_ULONG high,low; \
66 asm ("mulq %3" \
67 : "=a"(low),"=d"(high) \
68 : "a"(word),"m"(a) \
69 : "cc"); \
70 asm ("addq %2,%0; adcq %3,%1" \
71 : "+r"(carry),"+d"(high)\
72 : "a"(low),"g"(0) \
73 : "cc"); \
74 asm ("addq %2,%0; adcq %3,%1" \
75 : "+m"(r),"+d"(high) \
76 : "r"(carry),"g"(0) \
77 : "cc"); \
78 carry=high; \
79 } while (0)
80
81#define mul(r,a,word,carry) do { \
82 register BN_ULONG high,low; \
83 asm ("mulq %3" \
84 : "=a"(low),"=d"(high) \
85 : "a"(word),"g"(a) \
86 : "cc"); \
87 asm ("addq %2,%0; adcq %3,%1" \
88 : "+r"(carry),"+d"(high)\
89 : "a"(low),"g"(0) \
90 : "cc"); \
91 (r)=carry, carry=high; \
92 } while (0)
93
94#define sqr(r0,r1,a) \
95 asm ("mulq %2" \
96 : "=a"(r0),"=d"(r1) \
97 : "a"(a) \
98 : "cc");
99
100BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
101 {
102 BN_ULONG c1=0;
103
104 if (num <= 0) return(c1);
105
106 while (num&~3)
107 {
108 mul_add(rp[0],ap[0],w,c1);
109 mul_add(rp[1],ap[1],w,c1);
110 mul_add(rp[2],ap[2],w,c1);
111 mul_add(rp[3],ap[3],w,c1);
112 ap+=4; rp+=4; num-=4;
113 }
114 if (num)
115 {
116 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
117 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
118 mul_add(rp[2],ap[2],w,c1); return c1;
119 }
120
121 return(c1);
122 }
123
124BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
125 {
126 BN_ULONG c1=0;
127
128 if (num <= 0) return(c1);
129
130 while (num&~3)
131 {
132 mul(rp[0],ap[0],w,c1);
133 mul(rp[1],ap[1],w,c1);
134 mul(rp[2],ap[2],w,c1);
135 mul(rp[3],ap[3],w,c1);
136 ap+=4; rp+=4; num-=4;
137 }
138 if (num)
139 {
140 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
141 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
142 mul(rp[2],ap[2],w,c1);
143 }
144 return(c1);
145 }
146
147void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
148 {
149 if (n <= 0) return;
150
151 while (n&~3)
152 {
153 sqr(r[0],r[1],a[0]);
154 sqr(r[2],r[3],a[1]);
155 sqr(r[4],r[5],a[2]);
156 sqr(r[6],r[7],a[3]);
157 a+=4; r+=8; n-=4;
158 }
159 if (n)
160 {
161 sqr(r[0],r[1],a[0]); if (--n == 0) return;
162 sqr(r[2],r[3],a[1]); if (--n == 0) return;
163 sqr(r[4],r[5],a[2]);
164 }
165 }
166
167BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
168{ BN_ULONG ret,waste;
169
170 asm ("divq %4"
171 : "=a"(ret),"=d"(waste)
172 : "a"(l),"d"(h),"g"(d)
173 : "cc");
174
175 return ret;
176}
177
178BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
179{ BN_ULONG ret=0,i=0;
180
181 if (n <= 0) return 0;
182
183 asm (
184 " subq %2,%2 \n"
185 ".align 16 \n"
186 "1: movq (%4,%2,8),%0 \n"
187 " adcq (%5,%2,8),%0 \n"
188 " movq %0,(%3,%2,8) \n"
189 " leaq 1(%2),%2 \n"
190 " loop 1b \n"
191 " sbbq %0,%0 \n"
192 : "=&a"(ret),"+c"(n),"=&r"(i)
193 : "r"(rp),"r"(ap),"r"(bp)
194 : "cc"
195 );
196
197 return ret&1;
198}
199
200#ifndef SIMICS
201BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
202{ BN_ULONG ret=0,i=0;
203
204 if (n <= 0) return 0;
205
206 asm (
207 " subq %2,%2 \n"
208 ".align 16 \n"
209 "1: movq (%4,%2,8),%0 \n"
210 " sbbq (%5,%2,8),%0 \n"
211 " movq %0,(%3,%2,8) \n"
212 " leaq 1(%2),%2 \n"
213 " loop 1b \n"
214 " sbbq %0,%0 \n"
215 : "=&a"(ret),"+c"(n),"=&r"(i)
216 : "r"(rp),"r"(ap),"r"(bp)
217 : "cc"
218 );
219
220 return ret&1;
221}
222#else
223/* Simics 1.4<7 has buggy sbbq:-( */
224#define BN_MASK2 0xffffffffffffffffL
225BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
226 {
227 BN_ULONG t1,t2;
228 int c=0;
229
230 if (n <= 0) return((BN_ULONG)0);
231
232 for (;;)
233 {
234 t1=a[0]; t2=b[0];
235 r[0]=(t1-t2-c)&BN_MASK2;
236 if (t1 != t2) c=(t1 < t2);
237 if (--n <= 0) break;
238
239 t1=a[1]; t2=b[1];
240 r[1]=(t1-t2-c)&BN_MASK2;
241 if (t1 != t2) c=(t1 < t2);
242 if (--n <= 0) break;
243
244 t1=a[2]; t2=b[2];
245 r[2]=(t1-t2-c)&BN_MASK2;
246 if (t1 != t2) c=(t1 < t2);
247 if (--n <= 0) break;
248
249 t1=a[3]; t2=b[3];
250 r[3]=(t1-t2-c)&BN_MASK2;
251 if (t1 != t2) c=(t1 < t2);
252 if (--n <= 0) break;
253
254 a+=4;
255 b+=4;
256 r+=4;
257 }
258 return(c);
259 }
260#endif
261
262/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
263/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
264/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
265/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
266
267#if 0
268/* original macros are kept for reference purposes */
269#define mul_add_c(a,b,c0,c1,c2) { \
270 BN_ULONG ta=(a),tb=(b); \
271 t1 = ta * tb; \
272 t2 = BN_UMULT_HIGH(ta,tb); \
273 c0 += t1; t2 += (c0<t1)?1:0; \
274 c1 += t2; c2 += (c1<t2)?1:0; \
275 }
276
277#define mul_add_c2(a,b,c0,c1,c2) { \
278 BN_ULONG ta=(a),tb=(b),t0; \
279 t1 = BN_UMULT_HIGH(ta,tb); \
280 t0 = ta * tb; \
281 t2 = t1+t1; c2 += (t2<t1)?1:0; \
282 t1 = t0+t0; t2 += (t1<t0)?1:0; \
283 c0 += t1; t2 += (c0<t1)?1:0; \
284 c1 += t2; c2 += (c1<t2)?1:0; \
285 }
286#else
287#define mul_add_c(a,b,c0,c1,c2) do { \
288 asm ("mulq %3" \
289 : "=a"(t1),"=d"(t2) \
290 : "a"(a),"m"(b) \
291 : "cc"); \
292 asm ("addq %2,%0; adcq %3,%1" \
293 : "+r"(c0),"+d"(t2) \
294 : "a"(t1),"g"(0) \
295 : "cc"); \
296 asm ("addq %2,%0; adcq %3,%1" \
297 : "+r"(c1),"+r"(c2) \
298 : "d"(t2),"g"(0) \
299 : "cc"); \
300 } while (0)
301
302#define sqr_add_c(a,i,c0,c1,c2) do { \
303 asm ("mulq %2" \
304 : "=a"(t1),"=d"(t2) \
305 : "a"(a[i]) \
306 : "cc"); \
307 asm ("addq %2,%0; adcq %3,%1" \
308 : "+r"(c0),"+d"(t2) \
309 : "a"(t1),"g"(0) \
310 : "cc"); \
311 asm ("addq %2,%0; adcq %3,%1" \
312 : "+r"(c1),"+r"(c2) \
313 : "d"(t2),"g"(0) \
314 : "cc"); \
315 } while (0)
316
317#define mul_add_c2(a,b,c0,c1,c2) do { \
318 asm ("mulq %3" \
319 : "=a"(t1),"=d"(t2) \
320 : "a"(a),"m"(b) \
321 : "cc"); \
322 asm ("addq %0,%0; adcq %2,%1" \
323 : "+d"(t2),"+r"(c2) \
324 : "g"(0) \
325 : "cc"); \
326 asm ("addq %0,%0; adcq %2,%1" \
327 : "+a"(t1),"+d"(t2) \
328 : "g"(0) \
329 : "cc"); \
330 asm ("addq %2,%0; adcq %3,%1" \
331 : "+r"(c0),"+d"(t2) \
332 : "a"(t1),"g"(0) \
333 : "cc"); \
334 asm ("addq %2,%0; adcq %3,%1" \
335 : "+r"(c1),"+r"(c2) \
336 : "d"(t2),"g"(0) \
337 : "cc"); \
338 } while (0)
339#endif
340
341#define sqr_add_c2(a,i,j,c0,c1,c2) \
342 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
343
344void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
345 {
346 BN_ULONG t1,t2;
347 BN_ULONG c1,c2,c3;
348
349 c1=0;
350 c2=0;
351 c3=0;
352 mul_add_c(a[0],b[0],c1,c2,c3);
353 r[0]=c1;
354 c1=0;
355 mul_add_c(a[0],b[1],c2,c3,c1);
356 mul_add_c(a[1],b[0],c2,c3,c1);
357 r[1]=c2;
358 c2=0;
359 mul_add_c(a[2],b[0],c3,c1,c2);
360 mul_add_c(a[1],b[1],c3,c1,c2);
361 mul_add_c(a[0],b[2],c3,c1,c2);
362 r[2]=c3;
363 c3=0;
364 mul_add_c(a[0],b[3],c1,c2,c3);
365 mul_add_c(a[1],b[2],c1,c2,c3);
366 mul_add_c(a[2],b[1],c1,c2,c3);
367 mul_add_c(a[3],b[0],c1,c2,c3);
368 r[3]=c1;
369 c1=0;
370 mul_add_c(a[4],b[0],c2,c3,c1);
371 mul_add_c(a[3],b[1],c2,c3,c1);
372 mul_add_c(a[2],b[2],c2,c3,c1);
373 mul_add_c(a[1],b[3],c2,c3,c1);
374 mul_add_c(a[0],b[4],c2,c3,c1);
375 r[4]=c2;
376 c2=0;
377 mul_add_c(a[0],b[5],c3,c1,c2);
378 mul_add_c(a[1],b[4],c3,c1,c2);
379 mul_add_c(a[2],b[3],c3,c1,c2);
380 mul_add_c(a[3],b[2],c3,c1,c2);
381 mul_add_c(a[4],b[1],c3,c1,c2);
382 mul_add_c(a[5],b[0],c3,c1,c2);
383 r[5]=c3;
384 c3=0;
385 mul_add_c(a[6],b[0],c1,c2,c3);
386 mul_add_c(a[5],b[1],c1,c2,c3);
387 mul_add_c(a[4],b[2],c1,c2,c3);
388 mul_add_c(a[3],b[3],c1,c2,c3);
389 mul_add_c(a[2],b[4],c1,c2,c3);
390 mul_add_c(a[1],b[5],c1,c2,c3);
391 mul_add_c(a[0],b[6],c1,c2,c3);
392 r[6]=c1;
393 c1=0;
394 mul_add_c(a[0],b[7],c2,c3,c1);
395 mul_add_c(a[1],b[6],c2,c3,c1);
396 mul_add_c(a[2],b[5],c2,c3,c1);
397 mul_add_c(a[3],b[4],c2,c3,c1);
398 mul_add_c(a[4],b[3],c2,c3,c1);
399 mul_add_c(a[5],b[2],c2,c3,c1);
400 mul_add_c(a[6],b[1],c2,c3,c1);
401 mul_add_c(a[7],b[0],c2,c3,c1);
402 r[7]=c2;
403 c2=0;
404 mul_add_c(a[7],b[1],c3,c1,c2);
405 mul_add_c(a[6],b[2],c3,c1,c2);
406 mul_add_c(a[5],b[3],c3,c1,c2);
407 mul_add_c(a[4],b[4],c3,c1,c2);
408 mul_add_c(a[3],b[5],c3,c1,c2);
409 mul_add_c(a[2],b[6],c3,c1,c2);
410 mul_add_c(a[1],b[7],c3,c1,c2);
411 r[8]=c3;
412 c3=0;
413 mul_add_c(a[2],b[7],c1,c2,c3);
414 mul_add_c(a[3],b[6],c1,c2,c3);
415 mul_add_c(a[4],b[5],c1,c2,c3);
416 mul_add_c(a[5],b[4],c1,c2,c3);
417 mul_add_c(a[6],b[3],c1,c2,c3);
418 mul_add_c(a[7],b[2],c1,c2,c3);
419 r[9]=c1;
420 c1=0;
421 mul_add_c(a[7],b[3],c2,c3,c1);
422 mul_add_c(a[6],b[4],c2,c3,c1);
423 mul_add_c(a[5],b[5],c2,c3,c1);
424 mul_add_c(a[4],b[6],c2,c3,c1);
425 mul_add_c(a[3],b[7],c2,c3,c1);
426 r[10]=c2;
427 c2=0;
428 mul_add_c(a[4],b[7],c3,c1,c2);
429 mul_add_c(a[5],b[6],c3,c1,c2);
430 mul_add_c(a[6],b[5],c3,c1,c2);
431 mul_add_c(a[7],b[4],c3,c1,c2);
432 r[11]=c3;
433 c3=0;
434 mul_add_c(a[7],b[5],c1,c2,c3);
435 mul_add_c(a[6],b[6],c1,c2,c3);
436 mul_add_c(a[5],b[7],c1,c2,c3);
437 r[12]=c1;
438 c1=0;
439 mul_add_c(a[6],b[7],c2,c3,c1);
440 mul_add_c(a[7],b[6],c2,c3,c1);
441 r[13]=c2;
442 c2=0;
443 mul_add_c(a[7],b[7],c3,c1,c2);
444 r[14]=c3;
445 r[15]=c1;
446 }
447
448void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
449 {
450 BN_ULONG t1,t2;
451 BN_ULONG c1,c2,c3;
452
453 c1=0;
454 c2=0;
455 c3=0;
456 mul_add_c(a[0],b[0],c1,c2,c3);
457 r[0]=c1;
458 c1=0;
459 mul_add_c(a[0],b[1],c2,c3,c1);
460 mul_add_c(a[1],b[0],c2,c3,c1);
461 r[1]=c2;
462 c2=0;
463 mul_add_c(a[2],b[0],c3,c1,c2);
464 mul_add_c(a[1],b[1],c3,c1,c2);
465 mul_add_c(a[0],b[2],c3,c1,c2);
466 r[2]=c3;
467 c3=0;
468 mul_add_c(a[0],b[3],c1,c2,c3);
469 mul_add_c(a[1],b[2],c1,c2,c3);
470 mul_add_c(a[2],b[1],c1,c2,c3);
471 mul_add_c(a[3],b[0],c1,c2,c3);
472 r[3]=c1;
473 c1=0;
474 mul_add_c(a[3],b[1],c2,c3,c1);
475 mul_add_c(a[2],b[2],c2,c3,c1);
476 mul_add_c(a[1],b[3],c2,c3,c1);
477 r[4]=c2;
478 c2=0;
479 mul_add_c(a[2],b[3],c3,c1,c2);
480 mul_add_c(a[3],b[2],c3,c1,c2);
481 r[5]=c3;
482 c3=0;
483 mul_add_c(a[3],b[3],c1,c2,c3);
484 r[6]=c1;
485 r[7]=c2;
486 }
487
488void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
489 {
490 BN_ULONG t1,t2;
491 BN_ULONG c1,c2,c3;
492
493 c1=0;
494 c2=0;
495 c3=0;
496 sqr_add_c(a,0,c1,c2,c3);
497 r[0]=c1;
498 c1=0;
499 sqr_add_c2(a,1,0,c2,c3,c1);
500 r[1]=c2;
501 c2=0;
502 sqr_add_c(a,1,c3,c1,c2);
503 sqr_add_c2(a,2,0,c3,c1,c2);
504 r[2]=c3;
505 c3=0;
506 sqr_add_c2(a,3,0,c1,c2,c3);
507 sqr_add_c2(a,2,1,c1,c2,c3);
508 r[3]=c1;
509 c1=0;
510 sqr_add_c(a,2,c2,c3,c1);
511 sqr_add_c2(a,3,1,c2,c3,c1);
512 sqr_add_c2(a,4,0,c2,c3,c1);
513 r[4]=c2;
514 c2=0;
515 sqr_add_c2(a,5,0,c3,c1,c2);
516 sqr_add_c2(a,4,1,c3,c1,c2);
517 sqr_add_c2(a,3,2,c3,c1,c2);
518 r[5]=c3;
519 c3=0;
520 sqr_add_c(a,3,c1,c2,c3);
521 sqr_add_c2(a,4,2,c1,c2,c3);
522 sqr_add_c2(a,5,1,c1,c2,c3);
523 sqr_add_c2(a,6,0,c1,c2,c3);
524 r[6]=c1;
525 c1=0;
526 sqr_add_c2(a,7,0,c2,c3,c1);
527 sqr_add_c2(a,6,1,c2,c3,c1);
528 sqr_add_c2(a,5,2,c2,c3,c1);
529 sqr_add_c2(a,4,3,c2,c3,c1);
530 r[7]=c2;
531 c2=0;
532 sqr_add_c(a,4,c3,c1,c2);
533 sqr_add_c2(a,5,3,c3,c1,c2);
534 sqr_add_c2(a,6,2,c3,c1,c2);
535 sqr_add_c2(a,7,1,c3,c1,c2);
536 r[8]=c3;
537 c3=0;
538 sqr_add_c2(a,7,2,c1,c2,c3);
539 sqr_add_c2(a,6,3,c1,c2,c3);
540 sqr_add_c2(a,5,4,c1,c2,c3);
541 r[9]=c1;
542 c1=0;
543 sqr_add_c(a,5,c2,c3,c1);
544 sqr_add_c2(a,6,4,c2,c3,c1);
545 sqr_add_c2(a,7,3,c2,c3,c1);
546 r[10]=c2;
547 c2=0;
548 sqr_add_c2(a,7,4,c3,c1,c2);
549 sqr_add_c2(a,6,5,c3,c1,c2);
550 r[11]=c3;
551 c3=0;
552 sqr_add_c(a,6,c1,c2,c3);
553 sqr_add_c2(a,7,5,c1,c2,c3);
554 r[12]=c1;
555 c1=0;
556 sqr_add_c2(a,7,6,c2,c3,c1);
557 r[13]=c2;
558 c2=0;
559 sqr_add_c(a,7,c3,c1,c2);
560 r[14]=c3;
561 r[15]=c1;
562 }
563
564void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
565 {
566 BN_ULONG t1,t2;
567 BN_ULONG c1,c2,c3;
568
569 c1=0;
570 c2=0;
571 c3=0;
572 sqr_add_c(a,0,c1,c2,c3);
573 r[0]=c1;
574 c1=0;
575 sqr_add_c2(a,1,0,c2,c3,c1);
576 r[1]=c2;
577 c2=0;
578 sqr_add_c(a,1,c3,c1,c2);
579 sqr_add_c2(a,2,0,c3,c1,c2);
580 r[2]=c3;
581 c3=0;
582 sqr_add_c2(a,3,0,c1,c2,c3);
583 sqr_add_c2(a,2,1,c1,c2,c3);
584 r[3]=c1;
585 c1=0;
586 sqr_add_c(a,2,c2,c3,c1);
587 sqr_add_c2(a,3,1,c2,c3,c1);
588 r[4]=c2;
589 c2=0;
590 sqr_add_c2(a,3,2,c3,c1,c2);
591 r[5]=c3;
592 c3=0;
593 sqr_add_c(a,3,c1,c2,c3);
594 r[6]=c1;
595 r[7]=c2;
596 }
597#endif
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
deleted file mode 100755
index c43b69592a..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl
+++ /dev/null
@@ -1,214 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005.
11#
12# Montgomery multiplication routine for x86_64. While it gives modest
13# 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more
14# than twice, >2x, as fast. Most common rsa1024 sign is improved by
15# respectful 50%. It remains to be seen if loop unrolling and
16# dedicated squaring routine can provide further improvement...
17
18$output=shift;
19
20$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
23die "can't locate x86_64-xlate.pl";
24
25open STDOUT,"| $^X $xlate $output";
26
27# int bn_mul_mont(
28$rp="%rdi"; # BN_ULONG *rp,
29$ap="%rsi"; # const BN_ULONG *ap,
30$bp="%rdx"; # const BN_ULONG *bp,
31$np="%rcx"; # const BN_ULONG *np,
32$n0="%r8"; # const BN_ULONG *n0,
33$num="%r9"; # int num);
34$lo0="%r10";
35$hi0="%r11";
36$bp="%r12"; # reassign $bp
37$hi1="%r13";
38$i="%r14";
39$j="%r15";
40$m0="%rbx";
41$m1="%rbp";
42
43$code=<<___;
44.text
45
46.globl bn_mul_mont
47.type bn_mul_mont,\@function,6
48.align 16
49bn_mul_mont:
50 push %rbx
51 push %rbp
52 push %r12
53 push %r13
54 push %r14
55 push %r15
56
57 mov ${num}d,${num}d
58 lea 2($num),%rax
59 mov %rsp,%rbp
60 neg %rax
61 lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2))
62 and \$-1024,%rsp # minimize TLB usage
63
64 mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
65 mov %rdx,$bp # $bp reassigned, remember?
66
67 mov ($n0),$n0 # pull n0[0] value
68
69 xor $i,$i # i=0
70 xor $j,$j # j=0
71
72 mov ($bp),$m0 # m0=bp[0]
73 mov ($ap),%rax
74 mulq $m0 # ap[0]*bp[0]
75 mov %rax,$lo0
76 mov %rdx,$hi0
77
78 imulq $n0,%rax # "tp[0]"*n0
79 mov %rax,$m1
80
81 mulq ($np) # np[0]*m1
82 add $lo0,%rax # discarded
83 adc \$0,%rdx
84 mov %rdx,$hi1
85
86 lea 1($j),$j # j++
87.L1st:
88 mov ($ap,$j,8),%rax
89 mulq $m0 # ap[j]*bp[0]
90 add $hi0,%rax
91 adc \$0,%rdx
92 mov %rax,$lo0
93 mov ($np,$j,8),%rax
94 mov %rdx,$hi0
95
96 mulq $m1 # np[j]*m1
97 add $hi1,%rax
98 lea 1($j),$j # j++
99 adc \$0,%rdx
100 add $lo0,%rax # np[j]*m1+ap[j]*bp[0]
101 adc \$0,%rdx
102 mov %rax,-16(%rsp,$j,8) # tp[j-1]
103 cmp $num,$j
104 mov %rdx,$hi1
105 jl .L1st
106
107 xor %rdx,%rdx
108 add $hi0,$hi1
109 adc \$0,%rdx
110 mov $hi1,-8(%rsp,$num,8)
111 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
112
113 lea 1($i),$i # i++
114.align 4
115.Louter:
116 xor $j,$j # j=0
117
118 mov ($bp,$i,8),$m0 # m0=bp[i]
119 mov ($ap),%rax # ap[0]
120 mulq $m0 # ap[0]*bp[i]
121 add (%rsp),%rax # ap[0]*bp[i]+tp[0]
122 adc \$0,%rdx
123 mov %rax,$lo0
124 mov %rdx,$hi0
125
126 imulq $n0,%rax # tp[0]*n0
127 mov %rax,$m1
128
129 mulq ($np,$j,8) # np[0]*m1
130 add $lo0,%rax # discarded
131 mov 8(%rsp),$lo0 # tp[1]
132 adc \$0,%rdx
133 mov %rdx,$hi1
134
135 lea 1($j),$j # j++
136.align 4
137.Linner:
138 mov ($ap,$j,8),%rax
139 mulq $m0 # ap[j]*bp[i]
140 add $hi0,%rax
141 adc \$0,%rdx
142 add %rax,$lo0 # ap[j]*bp[i]+tp[j]
143 mov ($np,$j,8),%rax
144 adc \$0,%rdx
145 mov %rdx,$hi0
146
147 mulq $m1 # np[j]*m1
148 add $hi1,%rax
149 lea 1($j),$j # j++
150 adc \$0,%rdx
151 add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j]
152 adc \$0,%rdx
153 mov (%rsp,$j,8),$lo0
154 cmp $num,$j
155 mov %rax,-16(%rsp,$j,8) # tp[j-1]
156 mov %rdx,$hi1
157 jl .Linner
158
159 xor %rdx,%rdx
160 add $hi0,$hi1
161 adc \$0,%rdx
162 add $lo0,$hi1 # pull upmost overflow bit
163 adc \$0,%rdx
164 mov $hi1,-8(%rsp,$num,8)
165 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
166
167 lea 1($i),$i # i++
168 cmp $num,$i
169 jl .Louter
170
171 lea (%rsp),$ap # borrow ap for tp
172 lea -1($num),$j # j=num-1
173
174 mov ($ap),%rax # tp[0]
175 xor $i,$i # i=0 and clear CF!
176 jmp .Lsub
177.align 16
178.Lsub: sbb ($np,$i,8),%rax
179 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
180 dec $j # doesn't affect CF!
181 mov 8($ap,$i,8),%rax # tp[i+1]
182 lea 1($i),$i # i++
183 jge .Lsub
184
185 sbb \$0,%rax # handle upmost overflow bit
186 and %rax,$ap
187 not %rax
188 mov $rp,$np
189 and %rax,$np
190 lea -1($num),$j
191 or $np,$ap # ap=borrow?tp:rp
192.align 16
193.Lcopy: # copy or in-place refresh
194 mov ($ap,$j,8),%rax
195 mov %rax,($rp,$j,8) # rp[i]=tp[i]
196 mov $i,(%rsp,$j,8) # zap temporary vector
197 dec $j
198 jge .Lcopy
199
200 mov 8(%rsp,$num,8),%rsp # restore %rsp
201 mov \$1,%rax
202 pop %r15
203 pop %r14
204 pop %r13
205 pop %r12
206 pop %rbp
207 pop %rbx
208 ret
209.size bn_mul_mont,.-bn_mul_mont
210.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
211___
212
213print $code;
214close STDOUT;
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
deleted file mode 100644
index f1719a5877..0000000000
--- a/src/lib/libcrypto/bn/bn.h
+++ /dev/null
@@ -1,855 +0,0 @@
1/* crypto/bn/bn.h */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
60 *
61 * Portions of the attached software ("Contribution") are developed by
62 * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
63 *
64 * The Contribution is licensed pursuant to the Eric Young open source
65 * license provided above.
66 *
67 * The binary polynomial arithmetic software is originally written by
68 * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories.
69 *
70 */
71
72#ifndef HEADER_BN_H
73#define HEADER_BN_H
74
75#include <openssl/e_os2.h>
76#ifndef OPENSSL_NO_FP_API
77#include <stdio.h> /* FILE */
78#endif
79#include <openssl/ossl_typ.h>
80
81#ifdef __cplusplus
82extern "C" {
83#endif
84
85/* These preprocessor symbols control various aspects of the bignum headers and
86 * library code. They're not defined by any "normal" configuration, as they are
87 * intended for development and testing purposes. NB: defining all three can be
88 * useful for debugging application code as well as openssl itself.
89 *
90 * BN_DEBUG - turn on various debugging alterations to the bignum code
91 * BN_DEBUG_RAND - uses random poisoning of unused words to trip up
92 * mismanagement of bignum internals. You must also define BN_DEBUG.
93 */
94/* #define BN_DEBUG */
95/* #define BN_DEBUG_RAND */
96
97#define BN_MUL_COMBA
98#define BN_SQR_COMBA
99#define BN_RECURSION
100
101/* This next option uses the C libraries (2 word)/(1 word) function.
102 * If it is not defined, I use my C version (which is slower).
103 * The reason for this flag is that when the particular C compiler
104 * library routine is used, and the library is linked with a different
105 * compiler, the library is missing. This mostly happens when the
106 * library is built with gcc and then linked using normal cc. This would
107 * be a common occurrence because gcc normally produces code that is
108 * 2 times faster than system compilers for the big number stuff.
109 * For machines with only one compiler (or shared libraries), this should
110 * be on. Again this in only really a problem on machines
111 * using "long long's", are 32bit, and are not using my assembler code. */
112#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
113 defined(OPENSSL_SYS_WIN32) || defined(linux)
114# ifndef BN_DIV2W
115# define BN_DIV2W
116# endif
117#endif
118
119/* assuming long is 64bit - this is the DEC Alpha
120 * unsigned long long is only 64 bits :-(, don't define
121 * BN_LLONG for the DEC Alpha */
122#ifdef SIXTY_FOUR_BIT_LONG
123#define BN_ULLONG unsigned long long
124#define BN_ULONG unsigned long
125#define BN_LONG long
126#define BN_BITS 128
127#define BN_BYTES 8
128#define BN_BITS2 64
129#define BN_BITS4 32
130#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
131#define BN_MASK2 (0xffffffffffffffffL)
132#define BN_MASK2l (0xffffffffL)
133#define BN_MASK2h (0xffffffff00000000L)
134#define BN_MASK2h1 (0xffffffff80000000L)
135#define BN_TBIT (0x8000000000000000L)
136#define BN_DEC_CONV (10000000000000000000UL)
137#define BN_DEC_FMT1 "%lu"
138#define BN_DEC_FMT2 "%019lu"
139#define BN_DEC_NUM 19
140#endif
141
142/* This is where the long long data type is 64 bits, but long is 32.
143 * For machines where there are 64bit registers, this is the mode to use.
144 * IRIX, on R4000 and above should use this mode, along with the relevant
145 * assembler code :-). Do NOT define BN_LLONG.
146 */
147#ifdef SIXTY_FOUR_BIT
148#undef BN_LLONG
149#undef BN_ULLONG
150#define BN_ULONG unsigned long long
151#define BN_LONG long long
152#define BN_BITS 128
153#define BN_BYTES 8
154#define BN_BITS2 64
155#define BN_BITS4 32
156#define BN_MASK2 (0xffffffffffffffffLL)
157#define BN_MASK2l (0xffffffffL)
158#define BN_MASK2h (0xffffffff00000000LL)
159#define BN_MASK2h1 (0xffffffff80000000LL)
160#define BN_TBIT (0x8000000000000000LL)
161#define BN_DEC_CONV (10000000000000000000ULL)
162#define BN_DEC_FMT1 "%llu"
163#define BN_DEC_FMT2 "%019llu"
164#define BN_DEC_NUM 19
165#endif
166
167#ifdef THIRTY_TWO_BIT
168#ifdef BN_LLONG
169# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
170# define BN_ULLONG unsigned __int64
171# else
172# define BN_ULLONG unsigned long long
173# endif
174#endif
175#define BN_ULONG unsigned long
176#define BN_LONG long
177#define BN_BITS 64
178#define BN_BYTES 4
179#define BN_BITS2 32
180#define BN_BITS4 16
181#ifdef OPENSSL_SYS_WIN32
182/* VC++ doesn't like the LL suffix */
183#define BN_MASK (0xffffffffffffffffL)
184#else
185#define BN_MASK (0xffffffffffffffffLL)
186#endif
187#define BN_MASK2 (0xffffffffL)
188#define BN_MASK2l (0xffff)
189#define BN_MASK2h1 (0xffff8000L)
190#define BN_MASK2h (0xffff0000L)
191#define BN_TBIT (0x80000000L)
192#define BN_DEC_CONV (1000000000L)
193#define BN_DEC_FMT1 "%lu"
194#define BN_DEC_FMT2 "%09lu"
195#define BN_DEC_NUM 9
196#endif
197
198#ifdef SIXTEEN_BIT
199#ifndef BN_DIV2W
200#define BN_DIV2W
201#endif
202#define BN_ULLONG unsigned long
203#define BN_ULONG unsigned short
204#define BN_LONG short
205#define BN_BITS 32
206#define BN_BYTES 2
207#define BN_BITS2 16
208#define BN_BITS4 8
209#define BN_MASK (0xffffffff)
210#define BN_MASK2 (0xffff)
211#define BN_MASK2l (0xff)
212#define BN_MASK2h1 (0xff80)
213#define BN_MASK2h (0xff00)
214#define BN_TBIT (0x8000)
215#define BN_DEC_CONV (100000)
216#define BN_DEC_FMT1 "%u"
217#define BN_DEC_FMT2 "%05u"
218#define BN_DEC_NUM 5
219#endif
220
221#ifdef EIGHT_BIT
222#ifndef BN_DIV2W
223#define BN_DIV2W
224#endif
225#define BN_ULLONG unsigned short
226#define BN_ULONG unsigned char
227#define BN_LONG char
228#define BN_BITS 16
229#define BN_BYTES 1
230#define BN_BITS2 8
231#define BN_BITS4 4
232#define BN_MASK (0xffff)
233#define BN_MASK2 (0xff)
234#define BN_MASK2l (0xf)
235#define BN_MASK2h1 (0xf8)
236#define BN_MASK2h (0xf0)
237#define BN_TBIT (0x80)
238#define BN_DEC_CONV (100)
239#define BN_DEC_FMT1 "%u"
240#define BN_DEC_FMT2 "%02u"
241#define BN_DEC_NUM 2
242#endif
243
244#define BN_DEFAULT_BITS 1280
245
246#define BN_FLG_MALLOCED 0x01
247#define BN_FLG_STATIC_DATA 0x02
248#define BN_FLG_CONSTTIME 0x04 /* avoid leaking exponent information through timing,
249 * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime,
250 * BN_div() will call BN_div_no_branch,
251 * BN_mod_inverse() will call BN_mod_inverse_no_branch.
252 */
253
254#ifndef OPENSSL_NO_DEPRECATED
255#define BN_FLG_EXP_CONSTTIME BN_FLG_CONSTTIME /* deprecated name for the flag */
256 /* avoid leaking exponent information through timings
257 * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
258#endif
259
260#ifndef OPENSSL_NO_DEPRECATED
261#define BN_FLG_FREE 0x8000 /* used for debuging */
262#endif
263#define BN_set_flags(b,n) ((b)->flags|=(n))
264#define BN_get_flags(b,n) ((b)->flags&(n))
265
266/* get a clone of a BIGNUM with changed flags, for *temporary* use only
267 * (the two BIGNUMs cannot not be used in parallel!) */
268#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \
269 (dest)->top=(b)->top, \
270 (dest)->dmax=(b)->dmax, \
271 (dest)->neg=(b)->neg, \
272 (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \
273 | ((b)->flags & ~BN_FLG_MALLOCED) \
274 | BN_FLG_STATIC_DATA \
275 | (n)))
276
277/* Already declared in ossl_typ.h */
278#if 0
279typedef struct bignum_st BIGNUM;
280/* Used for temp variables (declaration hidden in bn_lcl.h) */
281typedef struct bignum_ctx BN_CTX;
282typedef struct bn_blinding_st BN_BLINDING;
283typedef struct bn_mont_ctx_st BN_MONT_CTX;
284typedef struct bn_recp_ctx_st BN_RECP_CTX;
285typedef struct bn_gencb_st BN_GENCB;
286#endif
287
288struct bignum_st
289 {
290 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
291 int top; /* Index of last used d +1. */
292 /* The next are internal book keeping for bn_expand. */
293 int dmax; /* Size of the d array. */
294 int neg; /* one if the number is negative */
295 int flags;
296 };
297
298/* Used for montgomery multiplication */
299struct bn_mont_ctx_st
300 {
301 int ri; /* number of bits in R */
302 BIGNUM RR; /* used to convert to montgomery form */
303 BIGNUM N; /* The modulus */
304 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
305 * (Ni is only stored for bignum algorithm) */
306#if 0
307 /* OpenSSL 0.9.9 preview: */
308 BN_ULONG n0[2];/* least significant word(s) of Ni */
309#else
310 BN_ULONG n0; /* least significant word of Ni */
311#endif
312 int flags;
313 };
314
315/* Used for reciprocal division/mod functions
316 * It cannot be shared between threads
317 */
318struct bn_recp_ctx_st
319 {
320 BIGNUM N; /* the divisor */
321 BIGNUM Nr; /* the reciprocal */
322 int num_bits;
323 int shift;
324 int flags;
325 };
326
327/* Used for slow "generation" functions. */
328struct bn_gencb_st
329 {
330 unsigned int ver; /* To handle binary (in)compatibility */
331 void *arg; /* callback-specific data */
332 union
333 {
334 /* if(ver==1) - handles old style callbacks */
335 void (*cb_1)(int, int, void *);
336 /* if(ver==2) - new callback style */
337 int (*cb_2)(int, int, BN_GENCB *);
338 } cb;
339 };
340/* Wrapper function to make using BN_GENCB easier, */
341int BN_GENCB_call(BN_GENCB *cb, int a, int b);
342/* Macro to populate a BN_GENCB structure with an "old"-style callback */
343#define BN_GENCB_set_old(gencb, callback, cb_arg) { \
344 BN_GENCB *tmp_gencb = (gencb); \
345 tmp_gencb->ver = 1; \
346 tmp_gencb->arg = (cb_arg); \
347 tmp_gencb->cb.cb_1 = (callback); }
348/* Macro to populate a BN_GENCB structure with a "new"-style callback */
349#define BN_GENCB_set(gencb, callback, cb_arg) { \
350 BN_GENCB *tmp_gencb = (gencb); \
351 tmp_gencb->ver = 2; \
352 tmp_gencb->arg = (cb_arg); \
353 tmp_gencb->cb.cb_2 = (callback); }
354
355#define BN_prime_checks 0 /* default: select number of iterations
356 based on the size of the number */
357
358/* number of Miller-Rabin iterations for an error rate of less than 2^-80
359 * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook
360 * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996];
361 * original paper: Damgaard, Landrock, Pomerance: Average case error estimates
362 * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */
363#define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \
364 (b) >= 850 ? 3 : \
365 (b) >= 650 ? 4 : \
366 (b) >= 550 ? 5 : \
367 (b) >= 450 ? 6 : \
368 (b) >= 400 ? 7 : \
369 (b) >= 350 ? 8 : \
370 (b) >= 300 ? 9 : \
371 (b) >= 250 ? 12 : \
372 (b) >= 200 ? 15 : \
373 (b) >= 150 ? 18 : \
374 /* b >= 100 */ 27)
375
376#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
377
378/* Note that BN_abs_is_word didn't work reliably for w == 0 until 0.9.8 */
379#define BN_abs_is_word(a,w) ((((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) || \
380 (((w) == 0) && ((a)->top == 0)))
381#define BN_is_zero(a) ((a)->top == 0)
382#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg)
383#define BN_is_word(a,w) (BN_abs_is_word((a),(w)) && (!(w) || !(a)->neg))
384#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
385
386#define BN_one(a) (BN_set_word((a),1))
387#define BN_zero_ex(a) \
388 do { \
389 BIGNUM *_tmp_bn = (a); \
390 _tmp_bn->top = 0; \
391 _tmp_bn->neg = 0; \
392 } while(0)
393#ifdef OPENSSL_NO_DEPRECATED
394#define BN_zero(a) BN_zero_ex(a)
395#else
396#define BN_zero(a) (BN_set_word((a),0))
397#endif
398
399const BIGNUM *BN_value_one(void);
400char * BN_options(void);
401BN_CTX *BN_CTX_new(void);
402#ifndef OPENSSL_NO_DEPRECATED
403void BN_CTX_init(BN_CTX *c);
404#endif
405void BN_CTX_free(BN_CTX *c);
406void BN_CTX_start(BN_CTX *ctx);
407BIGNUM *BN_CTX_get(BN_CTX *ctx);
408void BN_CTX_end(BN_CTX *ctx);
409int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
410int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
411int BN_rand_range(BIGNUM *rnd, const BIGNUM *range);
412int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range);
413int BN_num_bits(const BIGNUM *a);
414int BN_num_bits_word(BN_ULONG);
415BIGNUM *BN_new(void);
416void BN_init(BIGNUM *);
417void BN_clear_free(BIGNUM *a);
418BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
419void BN_swap(BIGNUM *a, BIGNUM *b);
420BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
421int BN_bn2bin(const BIGNUM *a, unsigned char *to);
422BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret);
423int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
424int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
425int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
426int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
427int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
428int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
429int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx);
430/** BN_set_negative sets sign of a BIGNUM
431 * \param b pointer to the BIGNUM object
432 * \param n 0 if the BIGNUM b should be positive and a value != 0 otherwise
433 */
434void BN_set_negative(BIGNUM *b, int n);
435/** BN_is_negative returns 1 if the BIGNUM is negative
436 * \param a pointer to the BIGNUM object
437 * \return 1 if a < 0 and 0 otherwise
438 */
439#define BN_is_negative(a) ((a)->neg != 0)
440
441int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
442 BN_CTX *ctx);
443#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
444int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
445int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
446int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
447int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
448int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
449int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
450 const BIGNUM *m, BN_CTX *ctx);
451int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
452int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
453int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
454int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
455int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
456
457BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
458BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
459int BN_mul_word(BIGNUM *a, BN_ULONG w);
460int BN_add_word(BIGNUM *a, BN_ULONG w);
461int BN_sub_word(BIGNUM *a, BN_ULONG w);
462int BN_set_word(BIGNUM *a, BN_ULONG w);
463BN_ULONG BN_get_word(const BIGNUM *a);
464
465int BN_cmp(const BIGNUM *a, const BIGNUM *b);
466void BN_free(BIGNUM *a);
467int BN_is_bit_set(const BIGNUM *a, int n);
468int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
469int BN_lshift1(BIGNUM *r, const BIGNUM *a);
470int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx);
471
472int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
473 const BIGNUM *m,BN_CTX *ctx);
474int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
475 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
476int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
477 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont);
478int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
479 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
480int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
481 const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m,
482 BN_CTX *ctx,BN_MONT_CTX *m_ctx);
483int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
484 const BIGNUM *m,BN_CTX *ctx);
485
486int BN_mask_bits(BIGNUM *a,int n);
487#ifndef OPENSSL_NO_FP_API
488int BN_print_fp(FILE *fp, const BIGNUM *a);
489#endif
490#ifdef HEADER_BIO_H
491int BN_print(BIO *fp, const BIGNUM *a);
492#else
493int BN_print(void *fp, const BIGNUM *a);
494#endif
495int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx);
496int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
497int BN_rshift1(BIGNUM *r, const BIGNUM *a);
498void BN_clear(BIGNUM *a);
499BIGNUM *BN_dup(const BIGNUM *a);
500int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
501int BN_set_bit(BIGNUM *a, int n);
502int BN_clear_bit(BIGNUM *a, int n);
503char * BN_bn2hex(const BIGNUM *a);
504char * BN_bn2dec(const BIGNUM *a);
505int BN_hex2bn(BIGNUM **a, const char *str);
506int BN_dec2bn(BIGNUM **a, const char *str);
507int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
508int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
509BIGNUM *BN_mod_inverse(BIGNUM *ret,
510 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
511BIGNUM *BN_mod_sqrt(BIGNUM *ret,
512 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
513
514/* Deprecated versions */
515#ifndef OPENSSL_NO_DEPRECATED
516BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
517 const BIGNUM *add, const BIGNUM *rem,
518 void (*callback)(int,int,void *),void *cb_arg);
519int BN_is_prime(const BIGNUM *p,int nchecks,
520 void (*callback)(int,int,void *),
521 BN_CTX *ctx,void *cb_arg);
522int BN_is_prime_fasttest(const BIGNUM *p,int nchecks,
523 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg,
524 int do_trial_division);
525#endif /* !defined(OPENSSL_NO_DEPRECATED) */
526
527/* Newer versions */
528int BN_generate_prime_ex(BIGNUM *ret,int bits,int safe, const BIGNUM *add,
529 const BIGNUM *rem, BN_GENCB *cb);
530int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
531int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
532 int do_trial_division, BN_GENCB *cb);
533
534int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
535
536int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
537 const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
538 const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb);
539int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
540 BIGNUM *Xp1, BIGNUM *Xp2,
541 const BIGNUM *Xp,
542 const BIGNUM *e, BN_CTX *ctx,
543 BN_GENCB *cb);
544
545BN_MONT_CTX *BN_MONT_CTX_new(void );
546void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
547int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
548 BN_MONT_CTX *mont, BN_CTX *ctx);
549#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
550 (r),(a),&((mont)->RR),(mont),(ctx))
551int BN_from_montgomery(BIGNUM *r,const BIGNUM *a,
552 BN_MONT_CTX *mont, BN_CTX *ctx);
553void BN_MONT_CTX_free(BN_MONT_CTX *mont);
554int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *mod,BN_CTX *ctx);
555BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
556BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
557 const BIGNUM *mod, BN_CTX *ctx);
558
559/* BN_BLINDING flags */
560#define BN_BLINDING_NO_UPDATE 0x00000001
561#define BN_BLINDING_NO_RECREATE 0x00000002
562
563BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod);
564void BN_BLINDING_free(BN_BLINDING *b);
565int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
566int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
567int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
568int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *);
569int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *);
570unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *);
571void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long);
572unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);
573void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);
574BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
575 const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
576 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
577 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
578 BN_MONT_CTX *m_ctx);
579
580#ifndef OPENSSL_NO_DEPRECATED
581void BN_set_params(int mul,int high,int low,int mont);
582int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
583#endif
584
585void BN_RECP_CTX_init(BN_RECP_CTX *recp);
586BN_RECP_CTX *BN_RECP_CTX_new(void);
587void BN_RECP_CTX_free(BN_RECP_CTX *recp);
588int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
589int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
590 BN_RECP_CTX *recp,BN_CTX *ctx);
591int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
592 const BIGNUM *m, BN_CTX *ctx);
593int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
594 BN_RECP_CTX *recp, BN_CTX *ctx);
595
596/* Functions for arithmetic over binary polynomials represented by BIGNUMs.
597 *
598 * The BIGNUM::neg property of BIGNUMs representing binary polynomials is
599 * ignored.
600 *
601 * Note that input arguments are not const so that their bit arrays can
602 * be expanded to the appropriate size if needed.
603 */
604
605int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); /*r = a + b*/
606#define BN_GF2m_sub(r, a, b) BN_GF2m_add(r, a, b)
607int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p); /*r=a mod p*/
608int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
609 const BIGNUM *p, BN_CTX *ctx); /* r = (a * b) mod p */
610int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
611 BN_CTX *ctx); /* r = (a * a) mod p */
612int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *b, const BIGNUM *p,
613 BN_CTX *ctx); /* r = (1 / b) mod p */
614int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
615 const BIGNUM *p, BN_CTX *ctx); /* r = (a / b) mod p */
616int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
617 const BIGNUM *p, BN_CTX *ctx); /* r = (a ^ b) mod p */
618int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
619 BN_CTX *ctx); /* r = sqrt(a) mod p */
620int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
621 BN_CTX *ctx); /* r^2 + r = a mod p */
622#define BN_GF2m_cmp(a, b) BN_ucmp((a), (b))
623/* Some functions allow for representation of the irreducible polynomials
624 * as an unsigned int[], say p. The irreducible f(t) is then of the form:
625 * t^p[0] + t^p[1] + ... + t^p[k]
626 * where m = p[0] > p[1] > ... > p[k] = 0.
627 */
628int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]);
629 /* r = a mod p */
630int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
631 const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */
632int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[],
633 BN_CTX *ctx); /* r = (a * a) mod p */
634int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[],
635 BN_CTX *ctx); /* r = (1 / b) mod p */
636int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
637 const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */
638int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
639 const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
640int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a,
641 const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
642int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a,
643 const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
644int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max);
645int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a);
646
647/* faster mod functions for the 'NIST primes'
648 * 0 <= a < p^2 */
649int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
650int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
651int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
652int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
653int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
654
655const BIGNUM *BN_get0_nist_prime_192(void);
656const BIGNUM *BN_get0_nist_prime_224(void);
657const BIGNUM *BN_get0_nist_prime_256(void);
658const BIGNUM *BN_get0_nist_prime_384(void);
659const BIGNUM *BN_get0_nist_prime_521(void);
660
661/* library internal functions */
662
663#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
664 (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2))
665#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
666BIGNUM *bn_expand2(BIGNUM *a, int words);
667#ifndef OPENSSL_NO_DEPRECATED
668BIGNUM *bn_dup_expand(const BIGNUM *a, int words); /* unused */
669#endif
670
671/* Bignum consistency macros
672 * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from
673 * bignum data after direct manipulations on the data. There is also an
674 * "internal" macro, bn_check_top(), for verifying that there are no leading
675 * zeroes. Unfortunately, some auditing is required due to the fact that
676 * bn_fix_top() has become an overabused duct-tape because bignum data is
677 * occasionally passed around in an inconsistent state. So the following
678 * changes have been made to sort this out;
679 * - bn_fix_top()s implementation has been moved to bn_correct_top()
680 * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and
681 * bn_check_top() is as before.
682 * - if BN_DEBUG *is* defined;
683 * - bn_check_top() tries to pollute unused words even if the bignum 'top' is
684 * consistent. (ed: only if BN_DEBUG_RAND is defined)
685 * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything.
686 * The idea is to have debug builds flag up inconsistent bignums when they
687 * occur. If that occurs in a bn_fix_top(), we examine the code in question; if
688 * the use of bn_fix_top() was appropriate (ie. it follows directly after code
689 * that manipulates the bignum) it is converted to bn_correct_top(), and if it
690 * was not appropriate, we convert it permanently to bn_check_top() and track
691 * down the cause of the bug. Eventually, no internal code should be using the
692 * bn_fix_top() macro. External applications and libraries should try this with
693 * their own code too, both in terms of building against the openssl headers
694 * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it
695 * defined. This not only improves external code, it provides more test
696 * coverage for openssl's own code.
697 */
698
699#ifdef BN_DEBUG
700
701/* We only need assert() when debugging */
702#include <assert.h>
703
704#ifdef BN_DEBUG_RAND
705/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */
706#ifndef RAND_pseudo_bytes
707int RAND_pseudo_bytes(unsigned char *buf,int num);
708#define BN_DEBUG_TRIX
709#endif
710#define bn_pollute(a) \
711 do { \
712 const BIGNUM *_bnum1 = (a); \
713 if(_bnum1->top < _bnum1->dmax) { \
714 unsigned char _tmp_char; \
715 /* We cast away const without the compiler knowing, any \
716 * *genuinely* constant variables that aren't mutable \
717 * wouldn't be constructed with top!=dmax. */ \
718 BN_ULONG *_not_const; \
719 memcpy(&_not_const, &_bnum1->d, sizeof(BN_ULONG*)); \
720 RAND_pseudo_bytes(&_tmp_char, 1); \
721 memset((unsigned char *)(_not_const + _bnum1->top), _tmp_char, \
722 (_bnum1->dmax - _bnum1->top) * sizeof(BN_ULONG)); \
723 } \
724 } while(0)
725#ifdef BN_DEBUG_TRIX
726#undef RAND_pseudo_bytes
727#endif
728#else
729#define bn_pollute(a)
730#endif
731#define bn_check_top(a) \
732 do { \
733 const BIGNUM *_bnum2 = (a); \
734 if (_bnum2 != NULL) { \
735 assert((_bnum2->top == 0) || \
736 (_bnum2->d[_bnum2->top - 1] != 0)); \
737 bn_pollute(_bnum2); \
738 } \
739 } while(0)
740
741#define bn_fix_top(a) bn_check_top(a)
742
743#else /* !BN_DEBUG */
744
745#define bn_pollute(a)
746#define bn_check_top(a)
747#define bn_fix_top(a) bn_correct_top(a)
748
749#endif
750
751#define bn_correct_top(a) \
752 { \
753 BN_ULONG *ftl; \
754 if ((a)->top > 0) \
755 { \
756 for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
757 if (*(ftl--)) break; \
758 } \
759 bn_pollute(a); \
760 }
761
762BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
763BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
764void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
765BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
766BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
767BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
768
769/* Primes from RFC 2409 */
770BIGNUM *get_rfc2409_prime_768(BIGNUM *bn);
771BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn);
772
773/* Primes from RFC 3526 */
774BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn);
775BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn);
776BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn);
777BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn);
778BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn);
779BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn);
780
781int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
782
783/* BEGIN ERROR CODES */
784/* The following lines are auto generated by the script mkerr.pl. Any changes
785 * made after this point may be overwritten when the script is next run.
786 */
787void ERR_load_BN_strings(void);
788
789/* Error codes for the BN functions. */
790
791/* Function codes. */
792#define BN_F_BNRAND 127
793#define BN_F_BN_BLINDING_CONVERT_EX 100
794#define BN_F_BN_BLINDING_CREATE_PARAM 128
795#define BN_F_BN_BLINDING_INVERT_EX 101
796#define BN_F_BN_BLINDING_NEW 102
797#define BN_F_BN_BLINDING_UPDATE 103
798#define BN_F_BN_BN2DEC 104
799#define BN_F_BN_BN2HEX 105
800#define BN_F_BN_CTX_GET 116
801#define BN_F_BN_CTX_NEW 106
802#define BN_F_BN_CTX_START 129
803#define BN_F_BN_DIV 107
804#define BN_F_BN_DIV_NO_BRANCH 138
805#define BN_F_BN_DIV_RECP 130
806#define BN_F_BN_EXP 123
807#define BN_F_BN_EXPAND2 108
808#define BN_F_BN_EXPAND_INTERNAL 120
809#define BN_F_BN_GF2M_MOD 131
810#define BN_F_BN_GF2M_MOD_EXP 132
811#define BN_F_BN_GF2M_MOD_MUL 133
812#define BN_F_BN_GF2M_MOD_SOLVE_QUAD 134
813#define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135
814#define BN_F_BN_GF2M_MOD_SQR 136
815#define BN_F_BN_GF2M_MOD_SQRT 137
816#define BN_F_BN_MOD_EXP2_MONT 118
817#define BN_F_BN_MOD_EXP_MONT 109
818#define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124
819#define BN_F_BN_MOD_EXP_MONT_WORD 117
820#define BN_F_BN_MOD_EXP_RECP 125
821#define BN_F_BN_MOD_EXP_SIMPLE 126
822#define BN_F_BN_MOD_INVERSE 110
823#define BN_F_BN_MOD_INVERSE_NO_BRANCH 139
824#define BN_F_BN_MOD_LSHIFT_QUICK 119
825#define BN_F_BN_MOD_MUL_RECIPROCAL 111
826#define BN_F_BN_MOD_SQRT 121
827#define BN_F_BN_MPI2BN 112
828#define BN_F_BN_NEW 113
829#define BN_F_BN_RAND 114
830#define BN_F_BN_RAND_RANGE 122
831#define BN_F_BN_USUB 115
832
833/* Reason codes. */
834#define BN_R_ARG2_LT_ARG3 100
835#define BN_R_BAD_RECIPROCAL 101
836#define BN_R_BIGNUM_TOO_LONG 114
837#define BN_R_CALLED_WITH_EVEN_MODULUS 102
838#define BN_R_DIV_BY_ZERO 103
839#define BN_R_ENCODING_ERROR 104
840#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
841#define BN_R_INPUT_NOT_REDUCED 110
842#define BN_R_INVALID_LENGTH 106
843#define BN_R_INVALID_RANGE 115
844#define BN_R_NOT_A_SQUARE 111
845#define BN_R_NOT_INITIALIZED 107
846#define BN_R_NO_INVERSE 108
847#define BN_R_NO_SOLUTION 116
848#define BN_R_P_IS_NOT_PRIME 112
849#define BN_R_TOO_MANY_ITERATIONS 113
850#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
851
852#ifdef __cplusplus
853}
854#endif
855#endif
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
deleted file mode 100644
index 9405163706..0000000000
--- a/src/lib/libcrypto/bn/bn_add.c
+++ /dev/null
@@ -1,313 +0,0 @@
1/* crypto/bn/bn_add.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r can == a or b */
64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
65 {
66 const BIGNUM *tmp;
67 int a_neg = a->neg, ret;
68
69 bn_check_top(a);
70 bn_check_top(b);
71
72 /* a + b a+b
73 * a + -b a-b
74 * -a + b b-a
75 * -a + -b -(a+b)
76 */
77 if (a_neg ^ b->neg)
78 {
79 /* only one is negative */
80 if (a_neg)
81 { tmp=a; a=b; b=tmp; }
82
83 /* we are now a - b */
84
85 if (BN_ucmp(a,b) < 0)
86 {
87 if (!BN_usub(r,b,a)) return(0);
88 r->neg=1;
89 }
90 else
91 {
92 if (!BN_usub(r,a,b)) return(0);
93 r->neg=0;
94 }
95 return(1);
96 }
97
98 ret = BN_uadd(r,a,b);
99 r->neg = a_neg;
100 bn_check_top(r);
101 return ret;
102 }
103
104/* unsigned add of b to a */
105int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
106 {
107 int max,min,dif;
108 BN_ULONG *ap,*bp,*rp,carry,t1,t2;
109 const BIGNUM *tmp;
110
111 bn_check_top(a);
112 bn_check_top(b);
113
114 if (a->top < b->top)
115 { tmp=a; a=b; b=tmp; }
116 max = a->top;
117 min = b->top;
118 dif = max - min;
119
120 if (bn_wexpand(r,max+1) == NULL)
121 return 0;
122
123 r->top=max;
124
125
126 ap=a->d;
127 bp=b->d;
128 rp=r->d;
129
130 carry=bn_add_words(rp,ap,bp,min);
131 rp+=min;
132 ap+=min;
133 bp+=min;
134
135 if (carry)
136 {
137 while (dif)
138 {
139 dif--;
140 t1 = *(ap++);
141 t2 = (t1+1) & BN_MASK2;
142 *(rp++) = t2;
143 if (t2)
144 {
145 carry=0;
146 break;
147 }
148 }
149 if (carry)
150 {
151 /* carry != 0 => dif == 0 */
152 *rp = 1;
153 r->top++;
154 }
155 }
156 if (dif && rp != ap)
157 while (dif--)
158 /* copy remaining words if ap != rp */
159 *(rp++) = *(ap++);
160 r->neg = 0;
161 bn_check_top(r);
162 return 1;
163 }
164
165/* unsigned subtraction of b from a, a must be larger than b. */
166int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
167 {
168 int max,min,dif;
169 register BN_ULONG t1,t2,*ap,*bp,*rp;
170 int i,carry;
171#if defined(IRIX_CC_BUG) && !defined(LINT)
172 int dummy;
173#endif
174
175 bn_check_top(a);
176 bn_check_top(b);
177
178 max = a->top;
179 min = b->top;
180 dif = max - min;
181
182 if (dif < 0) /* hmm... should not be happening */
183 {
184 BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
185 return(0);
186 }
187
188 if (bn_wexpand(r,max) == NULL) return(0);
189
190 ap=a->d;
191 bp=b->d;
192 rp=r->d;
193
194#if 1
195 carry=0;
196 for (i = min; i != 0; i--)
197 {
198 t1= *(ap++);
199 t2= *(bp++);
200 if (carry)
201 {
202 carry=(t1 <= t2);
203 t1=(t1-t2-1)&BN_MASK2;
204 }
205 else
206 {
207 carry=(t1 < t2);
208 t1=(t1-t2)&BN_MASK2;
209 }
210#if defined(IRIX_CC_BUG) && !defined(LINT)
211 dummy=t1;
212#endif
213 *(rp++)=t1&BN_MASK2;
214 }
215#else
216 carry=bn_sub_words(rp,ap,bp,min);
217 ap+=min;
218 bp+=min;
219 rp+=min;
220#endif
221 if (carry) /* subtracted */
222 {
223 if (!dif)
224 /* error: a < b */
225 return 0;
226 while (dif)
227 {
228 dif--;
229 t1 = *(ap++);
230 t2 = (t1-1)&BN_MASK2;
231 *(rp++) = t2;
232 if (t1)
233 break;
234 }
235 }
236#if 0
237 memcpy(rp,ap,sizeof(*rp)*(max-i));
238#else
239 if (rp != ap)
240 {
241 for (;;)
242 {
243 if (!dif--) break;
244 rp[0]=ap[0];
245 if (!dif--) break;
246 rp[1]=ap[1];
247 if (!dif--) break;
248 rp[2]=ap[2];
249 if (!dif--) break;
250 rp[3]=ap[3];
251 rp+=4;
252 ap+=4;
253 }
254 }
255#endif
256
257 r->top=max;
258 r->neg=0;
259 bn_correct_top(r);
260 return(1);
261 }
262
263int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
264 {
265 int max;
266 int add=0,neg=0;
267 const BIGNUM *tmp;
268
269 bn_check_top(a);
270 bn_check_top(b);
271
272 /* a - b a-b
273 * a - -b a+b
274 * -a - b -(a+b)
275 * -a - -b b-a
276 */
277 if (a->neg)
278 {
279 if (b->neg)
280 { tmp=a; a=b; b=tmp; }
281 else
282 { add=1; neg=1; }
283 }
284 else
285 {
286 if (b->neg) { add=1; neg=0; }
287 }
288
289 if (add)
290 {
291 if (!BN_uadd(r,a,b)) return(0);
292 r->neg=neg;
293 return(1);
294 }
295
296 /* We are actually doing a - b :-) */
297
298 max=(a->top > b->top)?a->top:b->top;
299 if (bn_wexpand(r,max) == NULL) return(0);
300 if (BN_ucmp(a,b) < 0)
301 {
302 if (!BN_usub(r,b,a)) return(0);
303 r->neg=1;
304 }
305 else
306 {
307 if (!BN_usub(r,a,b)) return(0);
308 r->neg=0;
309 }
310 bn_check_top(r);
311 return(1);
312 }
313
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
deleted file mode 100644
index 99bc2de491..0000000000
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ /dev/null
@@ -1,860 +0,0 @@
1/* crypto/bn/bn_asm.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70
71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72 {
73 BN_ULONG c1=0;
74
75 assert(num >= 0);
76 if (num <= 0) return(c1);
77
78 while (num&~3)
79 {
80 mul_add(rp[0],ap[0],w,c1);
81 mul_add(rp[1],ap[1],w,c1);
82 mul_add(rp[2],ap[2],w,c1);
83 mul_add(rp[3],ap[3],w,c1);
84 ap+=4; rp+=4; num-=4;
85 }
86 if (num)
87 {
88 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90 mul_add(rp[2],ap[2],w,c1); return c1;
91 }
92
93 return(c1);
94 }
95
96BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
97 {
98 BN_ULONG c1=0;
99
100 assert(num >= 0);
101 if (num <= 0) return(c1);
102
103 while (num&~3)
104 {
105 mul(rp[0],ap[0],w,c1);
106 mul(rp[1],ap[1],w,c1);
107 mul(rp[2],ap[2],w,c1);
108 mul(rp[3],ap[3],w,c1);
109 ap+=4; rp+=4; num-=4;
110 }
111 if (num)
112 {
113 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115 mul(rp[2],ap[2],w,c1);
116 }
117 return(c1);
118 }
119
120void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
121 {
122 assert(n >= 0);
123 if (n <= 0) return;
124 while (n&~3)
125 {
126 sqr(r[0],r[1],a[0]);
127 sqr(r[2],r[3],a[1]);
128 sqr(r[4],r[5],a[2]);
129 sqr(r[6],r[7],a[3]);
130 a+=4; r+=8; n-=4;
131 }
132 if (n)
133 {
134 sqr(r[0],r[1],a[0]); if (--n == 0) return;
135 sqr(r[2],r[3],a[1]); if (--n == 0) return;
136 sqr(r[4],r[5],a[2]);
137 }
138 }
139
140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
141
142BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
143 {
144 BN_ULONG c=0;
145 BN_ULONG bl,bh;
146
147 assert(num >= 0);
148 if (num <= 0) return((BN_ULONG)0);
149
150 bl=LBITS(w);
151 bh=HBITS(w);
152
153 for (;;)
154 {
155 mul_add(rp[0],ap[0],bl,bh,c);
156 if (--num == 0) break;
157 mul_add(rp[1],ap[1],bl,bh,c);
158 if (--num == 0) break;
159 mul_add(rp[2],ap[2],bl,bh,c);
160 if (--num == 0) break;
161 mul_add(rp[3],ap[3],bl,bh,c);
162 if (--num == 0) break;
163 ap+=4;
164 rp+=4;
165 }
166 return(c);
167 }
168
169BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
170 {
171 BN_ULONG carry=0;
172 BN_ULONG bl,bh;
173
174 assert(num >= 0);
175 if (num <= 0) return((BN_ULONG)0);
176
177 bl=LBITS(w);
178 bh=HBITS(w);
179
180 for (;;)
181 {
182 mul(rp[0],ap[0],bl,bh,carry);
183 if (--num == 0) break;
184 mul(rp[1],ap[1],bl,bh,carry);
185 if (--num == 0) break;
186 mul(rp[2],ap[2],bl,bh,carry);
187 if (--num == 0) break;
188 mul(rp[3],ap[3],bl,bh,carry);
189 if (--num == 0) break;
190 ap+=4;
191 rp+=4;
192 }
193 return(carry);
194 }
195
196void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
197 {
198 assert(n >= 0);
199 if (n <= 0) return;
200 for (;;)
201 {
202 sqr64(r[0],r[1],a[0]);
203 if (--n == 0) break;
204
205 sqr64(r[2],r[3],a[1]);
206 if (--n == 0) break;
207
208 sqr64(r[4],r[5],a[2]);
209 if (--n == 0) break;
210
211 sqr64(r[6],r[7],a[3]);
212 if (--n == 0) break;
213
214 a+=4;
215 r+=8;
216 }
217 }
218
219#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
220
221#if defined(BN_LLONG) && defined(BN_DIV2W)
222
223BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
224 {
225 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
226 }
227
228#else
229
230/* Divide h,l by d and return the result. */
231/* I need to test this some more :-( */
232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
233 {
234 BN_ULONG dh,dl,q,ret=0,th,tl,t;
235 int i,count=2;
236
237 if (d == 0) return(BN_MASK2);
238
239 i=BN_num_bits_word(d);
240 assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
241
242 i=BN_BITS2-i;
243 if (h >= d) h-=d;
244
245 if (i)
246 {
247 d<<=i;
248 h=(h<<i)|(l>>(BN_BITS2-i));
249 l<<=i;
250 }
251 dh=(d&BN_MASK2h)>>BN_BITS4;
252 dl=(d&BN_MASK2l);
253 for (;;)
254 {
255 if ((h>>BN_BITS4) == dh)
256 q=BN_MASK2l;
257 else
258 q=h/dh;
259
260 th=q*dh;
261 tl=dl*q;
262 for (;;)
263 {
264 t=h-th;
265 if ((t&BN_MASK2h) ||
266 ((tl) <= (
267 (t<<BN_BITS4)|
268 ((l&BN_MASK2h)>>BN_BITS4))))
269 break;
270 q--;
271 th-=dh;
272 tl-=dl;
273 }
274 t=(tl>>BN_BITS4);
275 tl=(tl<<BN_BITS4)&BN_MASK2h;
276 th+=t;
277
278 if (l < tl) th++;
279 l-=tl;
280 if (h < th)
281 {
282 h+=d;
283 q--;
284 }
285 h-=th;
286
287 if (--count == 0) break;
288
289 ret=q<<BN_BITS4;
290 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
291 l=(l&BN_MASK2l)<<BN_BITS4;
292 }
293 ret|=q;
294 return(ret);
295 }
296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
297
298#ifdef BN_LLONG
299BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
300 {
301 BN_ULLONG ll=0;
302
303 assert(n >= 0);
304 if (n <= 0) return((BN_ULONG)0);
305
306 for (;;)
307 {
308 ll+=(BN_ULLONG)a[0]+b[0];
309 r[0]=(BN_ULONG)ll&BN_MASK2;
310 ll>>=BN_BITS2;
311 if (--n <= 0) break;
312
313 ll+=(BN_ULLONG)a[1]+b[1];
314 r[1]=(BN_ULONG)ll&BN_MASK2;
315 ll>>=BN_BITS2;
316 if (--n <= 0) break;
317
318 ll+=(BN_ULLONG)a[2]+b[2];
319 r[2]=(BN_ULONG)ll&BN_MASK2;
320 ll>>=BN_BITS2;
321 if (--n <= 0) break;
322
323 ll+=(BN_ULLONG)a[3]+b[3];
324 r[3]=(BN_ULONG)ll&BN_MASK2;
325 ll>>=BN_BITS2;
326 if (--n <= 0) break;
327
328 a+=4;
329 b+=4;
330 r+=4;
331 }
332 return((BN_ULONG)ll);
333 }
334#else /* !BN_LLONG */
335BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
336 {
337 BN_ULONG c,l,t;
338
339 assert(n >= 0);
340 if (n <= 0) return((BN_ULONG)0);
341
342 c=0;
343 for (;;)
344 {
345 t=a[0];
346 t=(t+c)&BN_MASK2;
347 c=(t < c);
348 l=(t+b[0])&BN_MASK2;
349 c+=(l < t);
350 r[0]=l;
351 if (--n <= 0) break;
352
353 t=a[1];
354 t=(t+c)&BN_MASK2;
355 c=(t < c);
356 l=(t+b[1])&BN_MASK2;
357 c+=(l < t);
358 r[1]=l;
359 if (--n <= 0) break;
360
361 t=a[2];
362 t=(t+c)&BN_MASK2;
363 c=(t < c);
364 l=(t+b[2])&BN_MASK2;
365 c+=(l < t);
366 r[2]=l;
367 if (--n <= 0) break;
368
369 t=a[3];
370 t=(t+c)&BN_MASK2;
371 c=(t < c);
372 l=(t+b[3])&BN_MASK2;
373 c+=(l < t);
374 r[3]=l;
375 if (--n <= 0) break;
376
377 a+=4;
378 b+=4;
379 r+=4;
380 }
381 return((BN_ULONG)c);
382 }
383#endif /* !BN_LLONG */
384
385BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
386 {
387 BN_ULONG t1,t2;
388 int c=0;
389
390 assert(n >= 0);
391 if (n <= 0) return((BN_ULONG)0);
392
393 for (;;)
394 {
395 t1=a[0]; t2=b[0];
396 r[0]=(t1-t2-c)&BN_MASK2;
397 if (t1 != t2) c=(t1 < t2);
398 if (--n <= 0) break;
399
400 t1=a[1]; t2=b[1];
401 r[1]=(t1-t2-c)&BN_MASK2;
402 if (t1 != t2) c=(t1 < t2);
403 if (--n <= 0) break;
404
405 t1=a[2]; t2=b[2];
406 r[2]=(t1-t2-c)&BN_MASK2;
407 if (t1 != t2) c=(t1 < t2);
408 if (--n <= 0) break;
409
410 t1=a[3]; t2=b[3];
411 r[3]=(t1-t2-c)&BN_MASK2;
412 if (t1 != t2) c=(t1 < t2);
413 if (--n <= 0) break;
414
415 a+=4;
416 b+=4;
417 r+=4;
418 }
419 return(c);
420 }
421
422#ifdef BN_MUL_COMBA
423
424#undef bn_mul_comba8
425#undef bn_mul_comba4
426#undef bn_sqr_comba8
427#undef bn_sqr_comba4
428
429/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
430/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433
434#ifdef BN_LLONG
435#define mul_add_c(a,b,c0,c1,c2) \
436 t=(BN_ULLONG)a*b; \
437 t1=(BN_ULONG)Lw(t); \
438 t2=(BN_ULONG)Hw(t); \
439 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
440 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
441
442#define mul_add_c2(a,b,c0,c1,c2) \
443 t=(BN_ULLONG)a*b; \
444 tt=(t+t)&BN_MASK; \
445 if (tt < t) c2++; \
446 t1=(BN_ULONG)Lw(tt); \
447 t2=(BN_ULONG)Hw(tt); \
448 c0=(c0+t1)&BN_MASK2; \
449 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
450 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
451
452#define sqr_add_c(a,i,c0,c1,c2) \
453 t=(BN_ULLONG)a[i]*a[i]; \
454 t1=(BN_ULONG)Lw(t); \
455 t2=(BN_ULONG)Hw(t); \
456 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
457 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
458
459#define sqr_add_c2(a,i,j,c0,c1,c2) \
460 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
461
462#elif defined(BN_UMULT_LOHI)
463
464#define mul_add_c(a,b,c0,c1,c2) { \
465 BN_ULONG ta=(a),tb=(b); \
466 BN_UMULT_LOHI(t1,t2,ta,tb); \
467 c0 += t1; t2 += (c0<t1)?1:0; \
468 c1 += t2; c2 += (c1<t2)?1:0; \
469 }
470
471#define mul_add_c2(a,b,c0,c1,c2) { \
472 BN_ULONG ta=(a),tb=(b),t0; \
473 BN_UMULT_LOHI(t0,t1,ta,tb); \
474 t2 = t1+t1; c2 += (t2<t1)?1:0; \
475 t1 = t0+t0; t2 += (t1<t0)?1:0; \
476 c0 += t1; t2 += (c0<t1)?1:0; \
477 c1 += t2; c2 += (c1<t2)?1:0; \
478 }
479
480#define sqr_add_c(a,i,c0,c1,c2) { \
481 BN_ULONG ta=(a)[i]; \
482 BN_UMULT_LOHI(t1,t2,ta,ta); \
483 c0 += t1; t2 += (c0<t1)?1:0; \
484 c1 += t2; c2 += (c1<t2)?1:0; \
485 }
486
487#define sqr_add_c2(a,i,j,c0,c1,c2) \
488 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
489
490#elif defined(BN_UMULT_HIGH)
491
492#define mul_add_c(a,b,c0,c1,c2) { \
493 BN_ULONG ta=(a),tb=(b); \
494 t1 = ta * tb; \
495 t2 = BN_UMULT_HIGH(ta,tb); \
496 c0 += t1; t2 += (c0<t1)?1:0; \
497 c1 += t2; c2 += (c1<t2)?1:0; \
498 }
499
500#define mul_add_c2(a,b,c0,c1,c2) { \
501 BN_ULONG ta=(a),tb=(b),t0; \
502 t1 = BN_UMULT_HIGH(ta,tb); \
503 t0 = ta * tb; \
504 t2 = t1+t1; c2 += (t2<t1)?1:0; \
505 t1 = t0+t0; t2 += (t1<t0)?1:0; \
506 c0 += t1; t2 += (c0<t1)?1:0; \
507 c1 += t2; c2 += (c1<t2)?1:0; \
508 }
509
510#define sqr_add_c(a,i,c0,c1,c2) { \
511 BN_ULONG ta=(a)[i]; \
512 t1 = ta * ta; \
513 t2 = BN_UMULT_HIGH(ta,ta); \
514 c0 += t1; t2 += (c0<t1)?1:0; \
515 c1 += t2; c2 += (c1<t2)?1:0; \
516 }
517
518#define sqr_add_c2(a,i,j,c0,c1,c2) \
519 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
520
521#else /* !BN_LLONG */
522#define mul_add_c(a,b,c0,c1,c2) \
523 t1=LBITS(a); t2=HBITS(a); \
524 bl=LBITS(b); bh=HBITS(b); \
525 mul64(t1,t2,bl,bh); \
526 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
527 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
528
529#define mul_add_c2(a,b,c0,c1,c2) \
530 t1=LBITS(a); t2=HBITS(a); \
531 bl=LBITS(b); bh=HBITS(b); \
532 mul64(t1,t2,bl,bh); \
533 if (t2 & BN_TBIT) c2++; \
534 t2=(t2+t2)&BN_MASK2; \
535 if (t1 & BN_TBIT) t2++; \
536 t1=(t1+t1)&BN_MASK2; \
537 c0=(c0+t1)&BN_MASK2; \
538 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
539 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
540
541#define sqr_add_c(a,i,c0,c1,c2) \
542 sqr64(t1,t2,(a)[i]); \
543 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
544 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
545
546#define sqr_add_c2(a,i,j,c0,c1,c2) \
547 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
548#endif /* !BN_LLONG */
549
550void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
551 {
552#ifdef BN_LLONG
553 BN_ULLONG t;
554#else
555 BN_ULONG bl,bh;
556#endif
557 BN_ULONG t1,t2;
558 BN_ULONG c1,c2,c3;
559
560 c1=0;
561 c2=0;
562 c3=0;
563 mul_add_c(a[0],b[0],c1,c2,c3);
564 r[0]=c1;
565 c1=0;
566 mul_add_c(a[0],b[1],c2,c3,c1);
567 mul_add_c(a[1],b[0],c2,c3,c1);
568 r[1]=c2;
569 c2=0;
570 mul_add_c(a[2],b[0],c3,c1,c2);
571 mul_add_c(a[1],b[1],c3,c1,c2);
572 mul_add_c(a[0],b[2],c3,c1,c2);
573 r[2]=c3;
574 c3=0;
575 mul_add_c(a[0],b[3],c1,c2,c3);
576 mul_add_c(a[1],b[2],c1,c2,c3);
577 mul_add_c(a[2],b[1],c1,c2,c3);
578 mul_add_c(a[3],b[0],c1,c2,c3);
579 r[3]=c1;
580 c1=0;
581 mul_add_c(a[4],b[0],c2,c3,c1);
582 mul_add_c(a[3],b[1],c2,c3,c1);
583 mul_add_c(a[2],b[2],c2,c3,c1);
584 mul_add_c(a[1],b[3],c2,c3,c1);
585 mul_add_c(a[0],b[4],c2,c3,c1);
586 r[4]=c2;
587 c2=0;
588 mul_add_c(a[0],b[5],c3,c1,c2);
589 mul_add_c(a[1],b[4],c3,c1,c2);
590 mul_add_c(a[2],b[3],c3,c1,c2);
591 mul_add_c(a[3],b[2],c3,c1,c2);
592 mul_add_c(a[4],b[1],c3,c1,c2);
593 mul_add_c(a[5],b[0],c3,c1,c2);
594 r[5]=c3;
595 c3=0;
596 mul_add_c(a[6],b[0],c1,c2,c3);
597 mul_add_c(a[5],b[1],c1,c2,c3);
598 mul_add_c(a[4],b[2],c1,c2,c3);
599 mul_add_c(a[3],b[3],c1,c2,c3);
600 mul_add_c(a[2],b[4],c1,c2,c3);
601 mul_add_c(a[1],b[5],c1,c2,c3);
602 mul_add_c(a[0],b[6],c1,c2,c3);
603 r[6]=c1;
604 c1=0;
605 mul_add_c(a[0],b[7],c2,c3,c1);
606 mul_add_c(a[1],b[6],c2,c3,c1);
607 mul_add_c(a[2],b[5],c2,c3,c1);
608 mul_add_c(a[3],b[4],c2,c3,c1);
609 mul_add_c(a[4],b[3],c2,c3,c1);
610 mul_add_c(a[5],b[2],c2,c3,c1);
611 mul_add_c(a[6],b[1],c2,c3,c1);
612 mul_add_c(a[7],b[0],c2,c3,c1);
613 r[7]=c2;
614 c2=0;
615 mul_add_c(a[7],b[1],c3,c1,c2);
616 mul_add_c(a[6],b[2],c3,c1,c2);
617 mul_add_c(a[5],b[3],c3,c1,c2);
618 mul_add_c(a[4],b[4],c3,c1,c2);
619 mul_add_c(a[3],b[5],c3,c1,c2);
620 mul_add_c(a[2],b[6],c3,c1,c2);
621 mul_add_c(a[1],b[7],c3,c1,c2);
622 r[8]=c3;
623 c3=0;
624 mul_add_c(a[2],b[7],c1,c2,c3);
625 mul_add_c(a[3],b[6],c1,c2,c3);
626 mul_add_c(a[4],b[5],c1,c2,c3);
627 mul_add_c(a[5],b[4],c1,c2,c3);
628 mul_add_c(a[6],b[3],c1,c2,c3);
629 mul_add_c(a[7],b[2],c1,c2,c3);
630 r[9]=c1;
631 c1=0;
632 mul_add_c(a[7],b[3],c2,c3,c1);
633 mul_add_c(a[6],b[4],c2,c3,c1);
634 mul_add_c(a[5],b[5],c2,c3,c1);
635 mul_add_c(a[4],b[6],c2,c3,c1);
636 mul_add_c(a[3],b[7],c2,c3,c1);
637 r[10]=c2;
638 c2=0;
639 mul_add_c(a[4],b[7],c3,c1,c2);
640 mul_add_c(a[5],b[6],c3,c1,c2);
641 mul_add_c(a[6],b[5],c3,c1,c2);
642 mul_add_c(a[7],b[4],c3,c1,c2);
643 r[11]=c3;
644 c3=0;
645 mul_add_c(a[7],b[5],c1,c2,c3);
646 mul_add_c(a[6],b[6],c1,c2,c3);
647 mul_add_c(a[5],b[7],c1,c2,c3);
648 r[12]=c1;
649 c1=0;
650 mul_add_c(a[6],b[7],c2,c3,c1);
651 mul_add_c(a[7],b[6],c2,c3,c1);
652 r[13]=c2;
653 c2=0;
654 mul_add_c(a[7],b[7],c3,c1,c2);
655 r[14]=c3;
656 r[15]=c1;
657 }
658
659void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
660 {
661#ifdef BN_LLONG
662 BN_ULLONG t;
663#else
664 BN_ULONG bl,bh;
665#endif
666 BN_ULONG t1,t2;
667 BN_ULONG c1,c2,c3;
668
669 c1=0;
670 c2=0;
671 c3=0;
672 mul_add_c(a[0],b[0],c1,c2,c3);
673 r[0]=c1;
674 c1=0;
675 mul_add_c(a[0],b[1],c2,c3,c1);
676 mul_add_c(a[1],b[0],c2,c3,c1);
677 r[1]=c2;
678 c2=0;
679 mul_add_c(a[2],b[0],c3,c1,c2);
680 mul_add_c(a[1],b[1],c3,c1,c2);
681 mul_add_c(a[0],b[2],c3,c1,c2);
682 r[2]=c3;
683 c3=0;
684 mul_add_c(a[0],b[3],c1,c2,c3);
685 mul_add_c(a[1],b[2],c1,c2,c3);
686 mul_add_c(a[2],b[1],c1,c2,c3);
687 mul_add_c(a[3],b[0],c1,c2,c3);
688 r[3]=c1;
689 c1=0;
690 mul_add_c(a[3],b[1],c2,c3,c1);
691 mul_add_c(a[2],b[2],c2,c3,c1);
692 mul_add_c(a[1],b[3],c2,c3,c1);
693 r[4]=c2;
694 c2=0;
695 mul_add_c(a[2],b[3],c3,c1,c2);
696 mul_add_c(a[3],b[2],c3,c1,c2);
697 r[5]=c3;
698 c3=0;
699 mul_add_c(a[3],b[3],c1,c2,c3);
700 r[6]=c1;
701 r[7]=c2;
702 }
703
704void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
705 {
706#ifdef BN_LLONG
707 BN_ULLONG t,tt;
708#else
709 BN_ULONG bl,bh;
710#endif
711 BN_ULONG t1,t2;
712 BN_ULONG c1,c2,c3;
713
714 c1=0;
715 c2=0;
716 c3=0;
717 sqr_add_c(a,0,c1,c2,c3);
718 r[0]=c1;
719 c1=0;
720 sqr_add_c2(a,1,0,c2,c3,c1);
721 r[1]=c2;
722 c2=0;
723 sqr_add_c(a,1,c3,c1,c2);
724 sqr_add_c2(a,2,0,c3,c1,c2);
725 r[2]=c3;
726 c3=0;
727 sqr_add_c2(a,3,0,c1,c2,c3);
728 sqr_add_c2(a,2,1,c1,c2,c3);
729 r[3]=c1;
730 c1=0;
731 sqr_add_c(a,2,c2,c3,c1);
732 sqr_add_c2(a,3,1,c2,c3,c1);
733 sqr_add_c2(a,4,0,c2,c3,c1);
734 r[4]=c2;
735 c2=0;
736 sqr_add_c2(a,5,0,c3,c1,c2);
737 sqr_add_c2(a,4,1,c3,c1,c2);
738 sqr_add_c2(a,3,2,c3,c1,c2);
739 r[5]=c3;
740 c3=0;
741 sqr_add_c(a,3,c1,c2,c3);
742 sqr_add_c2(a,4,2,c1,c2,c3);
743 sqr_add_c2(a,5,1,c1,c2,c3);
744 sqr_add_c2(a,6,0,c1,c2,c3);
745 r[6]=c1;
746 c1=0;
747 sqr_add_c2(a,7,0,c2,c3,c1);
748 sqr_add_c2(a,6,1,c2,c3,c1);
749 sqr_add_c2(a,5,2,c2,c3,c1);
750 sqr_add_c2(a,4,3,c2,c3,c1);
751 r[7]=c2;
752 c2=0;
753 sqr_add_c(a,4,c3,c1,c2);
754 sqr_add_c2(a,5,3,c3,c1,c2);
755 sqr_add_c2(a,6,2,c3,c1,c2);
756 sqr_add_c2(a,7,1,c3,c1,c2);
757 r[8]=c3;
758 c3=0;
759 sqr_add_c2(a,7,2,c1,c2,c3);
760 sqr_add_c2(a,6,3,c1,c2,c3);
761 sqr_add_c2(a,5,4,c1,c2,c3);
762 r[9]=c1;
763 c1=0;
764 sqr_add_c(a,5,c2,c3,c1);
765 sqr_add_c2(a,6,4,c2,c3,c1);
766 sqr_add_c2(a,7,3,c2,c3,c1);
767 r[10]=c2;
768 c2=0;
769 sqr_add_c2(a,7,4,c3,c1,c2);
770 sqr_add_c2(a,6,5,c3,c1,c2);
771 r[11]=c3;
772 c3=0;
773 sqr_add_c(a,6,c1,c2,c3);
774 sqr_add_c2(a,7,5,c1,c2,c3);
775 r[12]=c1;
776 c1=0;
777 sqr_add_c2(a,7,6,c2,c3,c1);
778 r[13]=c2;
779 c2=0;
780 sqr_add_c(a,7,c3,c1,c2);
781 r[14]=c3;
782 r[15]=c1;
783 }
784
785void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
786 {
787#ifdef BN_LLONG
788 BN_ULLONG t,tt;
789#else
790 BN_ULONG bl,bh;
791#endif
792 BN_ULONG t1,t2;
793 BN_ULONG c1,c2,c3;
794
795 c1=0;
796 c2=0;
797 c3=0;
798 sqr_add_c(a,0,c1,c2,c3);
799 r[0]=c1;
800 c1=0;
801 sqr_add_c2(a,1,0,c2,c3,c1);
802 r[1]=c2;
803 c2=0;
804 sqr_add_c(a,1,c3,c1,c2);
805 sqr_add_c2(a,2,0,c3,c1,c2);
806 r[2]=c3;
807 c3=0;
808 sqr_add_c2(a,3,0,c1,c2,c3);
809 sqr_add_c2(a,2,1,c1,c2,c3);
810 r[3]=c1;
811 c1=0;
812 sqr_add_c(a,2,c2,c3,c1);
813 sqr_add_c2(a,3,1,c2,c3,c1);
814 r[4]=c2;
815 c2=0;
816 sqr_add_c2(a,3,2,c3,c1,c2);
817 r[5]=c3;
818 c3=0;
819 sqr_add_c(a,3,c1,c2,c3);
820 r[6]=c1;
821 r[7]=c2;
822 }
823#else /* !BN_MUL_COMBA */
824
825/* hmm... is it faster just to do a multiply? */
826#undef bn_sqr_comba4
827void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
828 {
829 BN_ULONG t[8];
830 bn_sqr_normal(r,a,4,t);
831 }
832
833#undef bn_sqr_comba8
834void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
835 {
836 BN_ULONG t[16];
837 bn_sqr_normal(r,a,8,t);
838 }
839
840void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
841 {
842 r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
843 r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
844 r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
845 r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
846 }
847
848void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
849 {
850 r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
851 r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
852 r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
853 r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
854 r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
855 r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
856 r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
857 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
858 }
859
860#endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
deleted file mode 100644
index c11fb4ccc2..0000000000
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ /dev/null
@@ -1,365 +0,0 @@
1/* crypto/bn/bn_blind.c */
2/* ====================================================================
3 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
56 * All rights reserved.
57 *
58 * This package is an SSL implementation written
59 * by Eric Young (eay@cryptsoft.com).
60 * The implementation was written so as to conform with Netscapes SSL.
61 *
62 * This library is free for commercial and non-commercial use as long as
63 * the following conditions are aheared to. The following conditions
64 * apply to all code found in this distribution, be it the RC4, RSA,
65 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
66 * included with this distribution is covered by the same copyright terms
67 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
68 *
69 * Copyright remains Eric Young's, and as such any Copyright notices in
70 * the code are not to be removed.
71 * If this package is used in a product, Eric Young should be given attribution
72 * as the author of the parts of the library used.
73 * This can be in the form of a textual message at program startup or
74 * in documentation (online or textual) provided with the package.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * "This product includes cryptographic software written by
87 * Eric Young (eay@cryptsoft.com)"
88 * The word 'cryptographic' can be left out if the rouines from the library
89 * being used are not cryptographic related :-).
90 * 4. If you include any Windows specific code (or a derivative thereof) from
91 * the apps directory (application code) you must include an acknowledgement:
92 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
93 *
94 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
95 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
96 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
97 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
98 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
99 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
100 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
101 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
102 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
103 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
104 * SUCH DAMAGE.
105 *
106 * The licence and distribution terms for any publically available version or
107 * derivative of this code cannot be changed. i.e. this code cannot simply be
108 * copied and put under another distribution licence
109 * [including the GNU Public Licence.]
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define BN_BLINDING_COUNTER 32
117
118struct bn_blinding_st
119 {
120 BIGNUM *A;
121 BIGNUM *Ai;
122 BIGNUM *e;
123 BIGNUM *mod; /* just a reference */
124 unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
125 * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
126 unsigned int counter;
127 unsigned long flags;
128 BN_MONT_CTX *m_ctx;
129 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
130 const BIGNUM *m, BN_CTX *ctx,
131 BN_MONT_CTX *m_ctx);
132 };
133
134BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod)
135 {
136 BN_BLINDING *ret=NULL;
137
138 bn_check_top(mod);
139
140 if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
141 {
142 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
143 return(NULL);
144 }
145 memset(ret,0,sizeof(BN_BLINDING));
146 if (A != NULL)
147 {
148 if ((ret->A = BN_dup(A)) == NULL) goto err;
149 }
150 if (Ai != NULL)
151 {
152 if ((ret->Ai = BN_dup(Ai)) == NULL) goto err;
153 }
154
155 /* save a copy of mod in the BN_BLINDING structure */
156 if ((ret->mod = BN_dup(mod)) == NULL) goto err;
157 if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
158 BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
159
160 ret->counter = BN_BLINDING_COUNTER;
161 return(ret);
162err:
163 if (ret != NULL) BN_BLINDING_free(ret);
164 return(NULL);
165 }
166
167void BN_BLINDING_free(BN_BLINDING *r)
168 {
169 if(r == NULL)
170 return;
171
172 if (r->A != NULL) BN_free(r->A );
173 if (r->Ai != NULL) BN_free(r->Ai);
174 if (r->e != NULL) BN_free(r->e );
175 if (r->mod != NULL) BN_free(r->mod);
176 OPENSSL_free(r);
177 }
178
179int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
180 {
181 int ret=0;
182
183 if ((b->A == NULL) || (b->Ai == NULL))
184 {
185 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
186 goto err;
187 }
188
189 if (--(b->counter) == 0 && b->e != NULL &&
190 !(b->flags & BN_BLINDING_NO_RECREATE))
191 {
192 /* re-create blinding parameters */
193 if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
194 goto err;
195 }
196 else if (!(b->flags & BN_BLINDING_NO_UPDATE))
197 {
198 if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err;
199 if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err;
200 }
201
202 ret=1;
203err:
204 if (b->counter == 0)
205 b->counter = BN_BLINDING_COUNTER;
206 return(ret);
207 }
208
209int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
210 {
211 return BN_BLINDING_convert_ex(n, NULL, b, ctx);
212 }
213
214int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
215 {
216 int ret = 1;
217
218 bn_check_top(n);
219
220 if ((b->A == NULL) || (b->Ai == NULL))
221 {
222 BNerr(BN_F_BN_BLINDING_CONVERT_EX,BN_R_NOT_INITIALIZED);
223 return(0);
224 }
225
226 if (r != NULL)
227 {
228 if (!BN_copy(r, b->Ai)) ret=0;
229 }
230
231 if (!BN_mod_mul(n,n,b->A,b->mod,ctx)) ret=0;
232
233 return ret;
234 }
235
236int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
237 {
238 return BN_BLINDING_invert_ex(n, NULL, b, ctx);
239 }
240
241int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
242 {
243 int ret;
244
245 bn_check_top(n);
246 if ((b->A == NULL) || (b->Ai == NULL))
247 {
248 BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED);
249 return(0);
250 }
251
252 if (r != NULL)
253 ret = BN_mod_mul(n, n, r, b->mod, ctx);
254 else
255 ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx);
256
257 if (ret >= 0)
258 {
259 if (!BN_BLINDING_update(b,ctx))
260 return(0);
261 }
262 bn_check_top(n);
263 return(ret);
264 }
265
266unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)
267 {
268 return b->thread_id;
269 }
270
271void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)
272 {
273 b->thread_id = n;
274 }
275
276unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
277 {
278 return b->flags;
279 }
280
281void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
282 {
283 b->flags = flags;
284 }
285
286BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
287 const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
288 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
289 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
290 BN_MONT_CTX *m_ctx)
291{
292 int retry_counter = 32;
293 BN_BLINDING *ret = NULL;
294
295 if (b == NULL)
296 ret = BN_BLINDING_new(NULL, NULL, m);
297 else
298 ret = b;
299
300 if (ret == NULL)
301 goto err;
302
303 if (ret->A == NULL && (ret->A = BN_new()) == NULL)
304 goto err;
305 if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
306 goto err;
307
308 if (e != NULL)
309 {
310 if (ret->e != NULL)
311 BN_free(ret->e);
312 ret->e = BN_dup(e);
313 }
314 if (ret->e == NULL)
315 goto err;
316
317 if (bn_mod_exp != NULL)
318 ret->bn_mod_exp = bn_mod_exp;
319 if (m_ctx != NULL)
320 ret->m_ctx = m_ctx;
321
322 do {
323 if (!BN_rand_range(ret->A, ret->mod)) goto err;
324 if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL)
325 {
326 /* this should almost never happen for good RSA keys */
327 unsigned long error = ERR_peek_last_error();
328 if (ERR_GET_REASON(error) == BN_R_NO_INVERSE)
329 {
330 if (retry_counter-- == 0)
331 {
332 BNerr(BN_F_BN_BLINDING_CREATE_PARAM,
333 BN_R_TOO_MANY_ITERATIONS);
334 goto err;
335 }
336 ERR_clear_error();
337 }
338 else
339 goto err;
340 }
341 else
342 break;
343 } while (1);
344
345 if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL)
346 {
347 if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
348 goto err;
349 }
350 else
351 {
352 if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
353 goto err;
354 }
355
356 return ret;
357err:
358 if (b == NULL && ret != NULL)
359 {
360 BN_BLINDING_free(ret);
361 ret = NULL;
362 }
363
364 return ret;
365}
diff --git a/src/lib/libcrypto/bn/bn_const.c b/src/lib/libcrypto/bn/bn_const.c
deleted file mode 100644
index eb60a25b3c..0000000000
--- a/src/lib/libcrypto/bn/bn_const.c
+++ /dev/null
@@ -1,402 +0,0 @@
1/* crypto/bn/knownprimes.c */
2/* Insert boilerplate */
3
4#include "bn.h"
5
6/* "First Oakley Default Group" from RFC2409, section 6.1.
7 *
8 * The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
9 *
10 * RFC2409 specifies a generator of 2.
11 * RFC2412 specifies a generator of of 22.
12 */
13
14BIGNUM *get_rfc2409_prime_768(BIGNUM *bn)
15 {
16 static const unsigned char RFC2409_PRIME_768[]={
17 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
18 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
19 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
20 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
21 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
22 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
23 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
24 0xA6,0x3A,0x36,0x20,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
25 };
26 return BN_bin2bn(RFC2409_PRIME_768,sizeof(RFC2409_PRIME_768),bn);
27 }
28
29/* "Second Oakley Default Group" from RFC2409, section 6.2.
30 *
31 * The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
32 *
33 * RFC2409 specifies a generator of 2.
34 * RFC2412 specifies a generator of 22.
35 */
36
37BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn)
38 {
39 static const unsigned char RFC2409_PRIME_1024[]={
40 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
41 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
42 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
43 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
44 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
45 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
46 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
47 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
48 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
49 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE6,0x53,0x81,
50 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
51 };
52 return BN_bin2bn(RFC2409_PRIME_1024,sizeof(RFC2409_PRIME_1024),bn);
53 }
54
55/* "1536-bit MODP Group" from RFC3526, Section 2.
56 *
57 * The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
58 *
59 * RFC3526 specifies a generator of 2.
60 * RFC2312 specifies a generator of 22.
61 */
62
63BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn)
64 {
65 static const unsigned char RFC3526_PRIME_1536[]={
66 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
67 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
68 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
69 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
70 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
71 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
72 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
73 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
74 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
75 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
76 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
77 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
78 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
79 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
80 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
81 0xCA,0x23,0x73,0x27,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
82 };
83 return BN_bin2bn(RFC3526_PRIME_1536,sizeof(RFC3526_PRIME_1536),bn);
84 }
85
86/* "2048-bit MODP Group" from RFC3526, Section 3.
87 *
88 * The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
89 *
90 * RFC3526 specifies a generator of 2.
91 */
92
93BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn)
94 {
95 static const unsigned char RFC3526_PRIME_2048[]={
96 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
97 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
98 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
99 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
100 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
101 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
102 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
103 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
104 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
105 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
106 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
107 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
108 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
109 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
110 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
111 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
112 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
113 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
114 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
115 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
116 0x15,0x72,0x8E,0x5A,0x8A,0xAC,0xAA,0x68,0xFF,0xFF,0xFF,0xFF,
117 0xFF,0xFF,0xFF,0xFF,
118 };
119 return BN_bin2bn(RFC3526_PRIME_2048,sizeof(RFC3526_PRIME_2048),bn);
120 }
121
122/* "3072-bit MODP Group" from RFC3526, Section 4.
123 *
124 * The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
125 *
126 * RFC3526 specifies a generator of 2.
127 */
128
129BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn)
130 {
131 static const unsigned char RFC3526_PRIME_3072[]={
132 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
133 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
134 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
135 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
136 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
137 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
138 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
139 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
140 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
141 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
142 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
143 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
144 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
145 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
146 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
147 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
148 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
149 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
150 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
151 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
152 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
153 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
154 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
155 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
156 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
157 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
158 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
159 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
160 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
161 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
162 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
163 0xA9,0x3A,0xD2,0xCA,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
164 };
165 return BN_bin2bn(RFC3526_PRIME_3072,sizeof(RFC3526_PRIME_3072),bn);
166 }
167
168/* "4096-bit MODP Group" from RFC3526, Section 5.
169 *
170 * The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
171 *
172 * RFC3526 specifies a generator of 2.
173 */
174
175BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn)
176 {
177 static const unsigned char RFC3526_PRIME_4096[]={
178 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
179 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
180 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
181 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
182 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
183 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
184 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
185 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
186 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
187 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
188 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
189 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
190 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
191 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
192 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
193 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
194 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
195 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
196 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
197 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
198 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
199 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
200 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
201 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
202 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
203 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
204 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
205 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
206 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
207 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
208 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
209 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
210 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
211 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
212 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
213 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
214 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
215 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
216 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
217 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
218 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
219 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x06,0x31,0x99,
220 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
221 };
222 return BN_bin2bn(RFC3526_PRIME_4096,sizeof(RFC3526_PRIME_4096),bn);
223 }
224
225/* "6144-bit MODP Group" from RFC3526, Section 6.
226 *
227 * The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
228 *
229 * RFC3526 specifies a generator of 2.
230 */
231
232BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn)
233 {
234 static const unsigned char RFC3526_PRIME_6144[]={
235 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
236 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
237 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
238 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
239 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
240 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
241 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
242 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
243 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
244 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
245 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
246 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
247 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
248 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
249 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
250 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
251 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
252 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
253 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
254 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
255 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
256 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
257 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
258 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
259 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
260 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
261 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
262 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
263 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
264 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
265 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
266 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
267 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
268 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
269 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
270 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
271 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
272 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
273 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
274 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
275 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
276 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
277 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
278 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
279 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
280 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
281 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
282 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
283 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
284 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
285 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
286 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
287 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
288 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
289 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
290 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
291 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
292 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
293 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
294 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
295 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
296 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
297 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
298 0x6D,0xCC,0x40,0x24,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
299 };
300 return BN_bin2bn(RFC3526_PRIME_6144,sizeof(RFC3526_PRIME_6144),bn);
301 }
302
303/* "8192-bit MODP Group" from RFC3526, Section 7.
304 *
305 * The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
306 *
307 * RFC3526 specifies a generator of 2.
308 */
309
310BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn)
311 {
312 static const unsigned char RFC3526_PRIME_8192[]={
313 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
314 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
315 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
316 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
317 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
318 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
319 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
320 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
321 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
322 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
323 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
324 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
325 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
326 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
327 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
328 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
329 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
330 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
331 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
332 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
333 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
334 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
335 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
336 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
337 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
338 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
339 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
340 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
341 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
342 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
343 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
344 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
345 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
346 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
347 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
348 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
349 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
350 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
351 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
352 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
353 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
354 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
355 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
356 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
357 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
358 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
359 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
360 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
361 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
362 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
363 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
364 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
365 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
366 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
367 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
368 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
369 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
370 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
371 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
372 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
373 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
374 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
375 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
376 0x6D,0xBE,0x11,0x59,0x74,0xA3,0x92,0x6F,0x12,0xFE,0xE5,0xE4,
377 0x38,0x77,0x7C,0xB6,0xA9,0x32,0xDF,0x8C,0xD8,0xBE,0xC4,0xD0,
378 0x73,0xB9,0x31,0xBA,0x3B,0xC8,0x32,0xB6,0x8D,0x9D,0xD3,0x00,
379 0x74,0x1F,0xA7,0xBF,0x8A,0xFC,0x47,0xED,0x25,0x76,0xF6,0x93,
380 0x6B,0xA4,0x24,0x66,0x3A,0xAB,0x63,0x9C,0x5A,0xE4,0xF5,0x68,
381 0x34,0x23,0xB4,0x74,0x2B,0xF1,0xC9,0x78,0x23,0x8F,0x16,0xCB,
382 0xE3,0x9D,0x65,0x2D,0xE3,0xFD,0xB8,0xBE,0xFC,0x84,0x8A,0xD9,
383 0x22,0x22,0x2E,0x04,0xA4,0x03,0x7C,0x07,0x13,0xEB,0x57,0xA8,
384 0x1A,0x23,0xF0,0xC7,0x34,0x73,0xFC,0x64,0x6C,0xEA,0x30,0x6B,
385 0x4B,0xCB,0xC8,0x86,0x2F,0x83,0x85,0xDD,0xFA,0x9D,0x4B,0x7F,
386 0xA2,0xC0,0x87,0xE8,0x79,0x68,0x33,0x03,0xED,0x5B,0xDD,0x3A,
387 0x06,0x2B,0x3C,0xF5,0xB3,0xA2,0x78,0xA6,0x6D,0x2A,0x13,0xF8,
388 0x3F,0x44,0xF8,0x2D,0xDF,0x31,0x0E,0xE0,0x74,0xAB,0x6A,0x36,
389 0x45,0x97,0xE8,0x99,0xA0,0x25,0x5D,0xC1,0x64,0xF3,0x1C,0xC5,
390 0x08,0x46,0x85,0x1D,0xF9,0xAB,0x48,0x19,0x5D,0xED,0x7E,0xA1,
391 0xB1,0xD5,0x10,0xBD,0x7E,0xE7,0x4D,0x73,0xFA,0xF3,0x6B,0xC3,
392 0x1E,0xCF,0xA2,0x68,0x35,0x90,0x46,0xF4,0xEB,0x87,0x9F,0x92,
393 0x40,0x09,0x43,0x8B,0x48,0x1C,0x6C,0xD7,0x88,0x9A,0x00,0x2E,
394 0xD5,0xEE,0x38,0x2B,0xC9,0x19,0x0D,0xA6,0xFC,0x02,0x6E,0x47,
395 0x95,0x58,0xE4,0x47,0x56,0x77,0xE9,0xAA,0x9E,0x30,0x50,0xE2,
396 0x76,0x56,0x94,0xDF,0xC8,0x1F,0x56,0xE8,0x80,0xB9,0x6E,0x71,
397 0x60,0xC9,0x80,0xDD,0x98,0xED,0xD3,0xDF,0xFF,0xFF,0xFF,0xFF,
398 0xFF,0xFF,0xFF,0xFF,
399 };
400 return BN_bin2bn(RFC3526_PRIME_8192,sizeof(RFC3526_PRIME_8192),bn);
401 }
402
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
deleted file mode 100644
index b3452f1a91..0000000000
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ /dev/null
@@ -1,454 +0,0 @@
1/* crypto/bn/bn_ctx.c */
2/* Written by Ulf Moeller for the OpenSSL project. */
3/* ====================================================================
4 * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. All advertising materials mentioning features or use of this
19 * software must display the following acknowledgment:
20 * "This product includes software developed by the OpenSSL Project
21 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
22 *
23 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
24 * endorse or promote products derived from this software without
25 * prior written permission. For written permission, please contact
26 * openssl-core@openssl.org.
27 *
28 * 5. Products derived from this software may not be called "OpenSSL"
29 * nor may "OpenSSL" appear in their names without prior written
30 * permission of the OpenSSL Project.
31 *
32 * 6. Redistributions of any form whatsoever must retain the following
33 * acknowledgment:
34 * "This product includes software developed by the OpenSSL Project
35 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
38 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
40 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
43 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
44 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
46 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
47 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
48 * OF THE POSSIBILITY OF SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This product includes cryptographic software written by Eric Young
52 * (eay@cryptsoft.com). This product includes software written by Tim
53 * Hudson (tjh@cryptsoft.com).
54 *
55 */
56
57#if !defined(BN_CTX_DEBUG) && !defined(BN_DEBUG)
58#ifndef NDEBUG
59#define NDEBUG
60#endif
61#endif
62
63#include <stdio.h>
64#include <assert.h>
65
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69/* TODO list
70 *
71 * 1. Check a bunch of "(words+1)" type hacks in various bignum functions and
72 * check they can be safely removed.
73 * - Check +1 and other ugliness in BN_from_montgomery()
74 *
75 * 2. Consider allowing a BN_new_ex() that, at least, lets you specify an
76 * appropriate 'block' size that will be honoured by bn_expand_internal() to
77 * prevent piddly little reallocations. OTOH, profiling bignum expansions in
78 * BN_CTX doesn't show this to be a big issue.
79 */
80
81/* How many bignums are in each "pool item"; */
82#define BN_CTX_POOL_SIZE 16
83/* The stack frame info is resizing, set a first-time expansion size; */
84#define BN_CTX_START_FRAMES 32
85
86/***********/
87/* BN_POOL */
88/***********/
89
90/* A bundle of bignums that can be linked with other bundles */
91typedef struct bignum_pool_item
92 {
93 /* The bignum values */
94 BIGNUM vals[BN_CTX_POOL_SIZE];
95 /* Linked-list admin */
96 struct bignum_pool_item *prev, *next;
97 } BN_POOL_ITEM;
98/* A linked-list of bignums grouped in bundles */
99typedef struct bignum_pool
100 {
101 /* Linked-list admin */
102 BN_POOL_ITEM *head, *current, *tail;
103 /* Stack depth and allocation size */
104 unsigned used, size;
105 } BN_POOL;
106static void BN_POOL_init(BN_POOL *);
107static void BN_POOL_finish(BN_POOL *);
108#ifndef OPENSSL_NO_DEPRECATED
109static void BN_POOL_reset(BN_POOL *);
110#endif
111static BIGNUM * BN_POOL_get(BN_POOL *);
112static void BN_POOL_release(BN_POOL *, unsigned int);
113
114/************/
115/* BN_STACK */
116/************/
117
118/* A wrapper to manage the "stack frames" */
119typedef struct bignum_ctx_stack
120 {
121 /* Array of indexes into the bignum stack */
122 unsigned int *indexes;
123 /* Number of stack frames, and the size of the allocated array */
124 unsigned int depth, size;
125 } BN_STACK;
126static void BN_STACK_init(BN_STACK *);
127static void BN_STACK_finish(BN_STACK *);
128#ifndef OPENSSL_NO_DEPRECATED
129static void BN_STACK_reset(BN_STACK *);
130#endif
131static int BN_STACK_push(BN_STACK *, unsigned int);
132static unsigned int BN_STACK_pop(BN_STACK *);
133
134/**********/
135/* BN_CTX */
136/**********/
137
138/* The opaque BN_CTX type */
139struct bignum_ctx
140 {
141 /* The bignum bundles */
142 BN_POOL pool;
143 /* The "stack frames", if you will */
144 BN_STACK stack;
145 /* The number of bignums currently assigned */
146 unsigned int used;
147 /* Depth of stack overflow */
148 int err_stack;
149 /* Block "gets" until an "end" (compatibility behaviour) */
150 int too_many;
151 };
152
153/* Enable this to find BN_CTX bugs */
154#ifdef BN_CTX_DEBUG
155static const char *ctxdbg_cur = NULL;
156static void ctxdbg(BN_CTX *ctx)
157 {
158 unsigned int bnidx = 0, fpidx = 0;
159 BN_POOL_ITEM *item = ctx->pool.head;
160 BN_STACK *stack = &ctx->stack;
161 fprintf(stderr,"(%08x): ", (unsigned int)ctx);
162 while(bnidx < ctx->used)
163 {
164 fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
165 if(!(bnidx % BN_CTX_POOL_SIZE))
166 item = item->next;
167 }
168 fprintf(stderr,"\n");
169 bnidx = 0;
170 fprintf(stderr," : ");
171 while(fpidx < stack->depth)
172 {
173 while(bnidx++ < stack->indexes[fpidx])
174 fprintf(stderr," ");
175 fprintf(stderr,"^^ ");
176 bnidx++;
177 fpidx++;
178 }
179 fprintf(stderr,"\n");
180 }
181#define CTXDBG_ENTRY(str, ctx) do { \
182 ctxdbg_cur = (str); \
183 fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
184 ctxdbg(ctx); \
185 } while(0)
186#define CTXDBG_EXIT(ctx) do { \
187 fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
188 ctxdbg(ctx); \
189 } while(0)
190#define CTXDBG_RET(ctx,ret)
191#else
192#define CTXDBG_ENTRY(str, ctx)
193#define CTXDBG_EXIT(ctx)
194#define CTXDBG_RET(ctx,ret)
195#endif
196
197/* This function is an evil legacy and should not be used. This implementation
198 * is WYSIWYG, though I've done my best. */
199#ifndef OPENSSL_NO_DEPRECATED
200void BN_CTX_init(BN_CTX *ctx)
201 {
202 /* Assume the caller obtained the context via BN_CTX_new() and so is
203 * trying to reset it for use. Nothing else makes sense, least of all
204 * binary compatibility from a time when they could declare a static
205 * variable. */
206 BN_POOL_reset(&ctx->pool);
207 BN_STACK_reset(&ctx->stack);
208 ctx->used = 0;
209 ctx->err_stack = 0;
210 ctx->too_many = 0;
211 }
212#endif
213
214BN_CTX *BN_CTX_new(void)
215 {
216 BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
217 if(!ret)
218 {
219 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
220 return NULL;
221 }
222 /* Initialise the structure */
223 BN_POOL_init(&ret->pool);
224 BN_STACK_init(&ret->stack);
225 ret->used = 0;
226 ret->err_stack = 0;
227 ret->too_many = 0;
228 return ret;
229 }
230
231void BN_CTX_free(BN_CTX *ctx)
232 {
233 if (ctx == NULL)
234 return;
235#ifdef BN_CTX_DEBUG
236 {
237 BN_POOL_ITEM *pool = ctx->pool.head;
238 fprintf(stderr,"BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
239 ctx->stack.size, ctx->pool.size);
240 fprintf(stderr,"dmaxs: ");
241 while(pool) {
242 unsigned loop = 0;
243 while(loop < BN_CTX_POOL_SIZE)
244 fprintf(stderr,"%02x ", pool->vals[loop++].dmax);
245 pool = pool->next;
246 }
247 fprintf(stderr,"\n");
248 }
249#endif
250 BN_STACK_finish(&ctx->stack);
251 BN_POOL_finish(&ctx->pool);
252 OPENSSL_free(ctx);
253 }
254
255void BN_CTX_start(BN_CTX *ctx)
256 {
257 CTXDBG_ENTRY("BN_CTX_start", ctx);
258 /* If we're already overflowing ... */
259 if(ctx->err_stack || ctx->too_many)
260 ctx->err_stack++;
261 /* (Try to) get a new frame pointer */
262 else if(!BN_STACK_push(&ctx->stack, ctx->used))
263 {
264 BNerr(BN_F_BN_CTX_START,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
265 ctx->err_stack++;
266 }
267 CTXDBG_EXIT(ctx);
268 }
269
270void BN_CTX_end(BN_CTX *ctx)
271 {
272 CTXDBG_ENTRY("BN_CTX_end", ctx);
273 if(ctx->err_stack)
274 ctx->err_stack--;
275 else
276 {
277 unsigned int fp = BN_STACK_pop(&ctx->stack);
278 /* Does this stack frame have anything to release? */
279 if(fp < ctx->used)
280 BN_POOL_release(&ctx->pool, ctx->used - fp);
281 ctx->used = fp;
282 /* Unjam "too_many" in case "get" had failed */
283 ctx->too_many = 0;
284 }
285 CTXDBG_EXIT(ctx);
286 }
287
288BIGNUM *BN_CTX_get(BN_CTX *ctx)
289 {
290 BIGNUM *ret;
291 CTXDBG_ENTRY("BN_CTX_get", ctx);
292 if(ctx->err_stack || ctx->too_many) return NULL;
293 if((ret = BN_POOL_get(&ctx->pool)) == NULL)
294 {
295 /* Setting too_many prevents repeated "get" attempts from
296 * cluttering the error stack. */
297 ctx->too_many = 1;
298 BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
299 return NULL;
300 }
301 /* OK, make sure the returned bignum is "zero" */
302 BN_zero(ret);
303 ctx->used++;
304 CTXDBG_RET(ctx, ret);
305 return ret;
306 }
307
308/************/
309/* BN_STACK */
310/************/
311
312static void BN_STACK_init(BN_STACK *st)
313 {
314 st->indexes = NULL;
315 st->depth = st->size = 0;
316 }
317
318static void BN_STACK_finish(BN_STACK *st)
319 {
320 if(st->size) OPENSSL_free(st->indexes);
321 }
322
323#ifndef OPENSSL_NO_DEPRECATED
324static void BN_STACK_reset(BN_STACK *st)
325 {
326 st->depth = 0;
327 }
328#endif
329
330static int BN_STACK_push(BN_STACK *st, unsigned int idx)
331 {
332 if(st->depth == st->size)
333 /* Need to expand */
334 {
335 unsigned int newsize = (st->size ?
336 (st->size * 3 / 2) : BN_CTX_START_FRAMES);
337 unsigned int *newitems = OPENSSL_malloc(newsize *
338 sizeof(unsigned int));
339 if(!newitems) return 0;
340 if(st->depth)
341 memcpy(newitems, st->indexes, st->depth *
342 sizeof(unsigned int));
343 if(st->size) OPENSSL_free(st->indexes);
344 st->indexes = newitems;
345 st->size = newsize;
346 }
347 st->indexes[(st->depth)++] = idx;
348 return 1;
349 }
350
351static unsigned int BN_STACK_pop(BN_STACK *st)
352 {
353 return st->indexes[--(st->depth)];
354 }
355
356/***********/
357/* BN_POOL */
358/***********/
359
360static void BN_POOL_init(BN_POOL *p)
361 {
362 p->head = p->current = p->tail = NULL;
363 p->used = p->size = 0;
364 }
365
366static void BN_POOL_finish(BN_POOL *p)
367 {
368 while(p->head)
369 {
370 unsigned int loop = 0;
371 BIGNUM *bn = p->head->vals;
372 while(loop++ < BN_CTX_POOL_SIZE)
373 {
374 if(bn->d) BN_clear_free(bn);
375 bn++;
376 }
377 p->current = p->head->next;
378 OPENSSL_free(p->head);
379 p->head = p->current;
380 }
381 }
382
383#ifndef OPENSSL_NO_DEPRECATED
384static void BN_POOL_reset(BN_POOL *p)
385 {
386 BN_POOL_ITEM *item = p->head;
387 while(item)
388 {
389 unsigned int loop = 0;
390 BIGNUM *bn = item->vals;
391 while(loop++ < BN_CTX_POOL_SIZE)
392 {
393 if(bn->d) BN_clear(bn);
394 bn++;
395 }
396 item = item->next;
397 }
398 p->current = p->head;
399 p->used = 0;
400 }
401#endif
402
403static BIGNUM *BN_POOL_get(BN_POOL *p)
404 {
405 if(p->used == p->size)
406 {
407 BIGNUM *bn;
408 unsigned int loop = 0;
409 BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
410 if(!item) return NULL;
411 /* Initialise the structure */
412 bn = item->vals;
413 while(loop++ < BN_CTX_POOL_SIZE)
414 BN_init(bn++);
415 item->prev = p->tail;
416 item->next = NULL;
417 /* Link it in */
418 if(!p->head)
419 p->head = p->current = p->tail = item;
420 else
421 {
422 p->tail->next = item;
423 p->tail = item;
424 p->current = item;
425 }
426 p->size += BN_CTX_POOL_SIZE;
427 p->used++;
428 /* Return the first bignum from the new pool */
429 return item->vals;
430 }
431 if(!p->used)
432 p->current = p->head;
433 else if((p->used % BN_CTX_POOL_SIZE) == 0)
434 p->current = p->current->next;
435 return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
436 }
437
438static void BN_POOL_release(BN_POOL *p, unsigned int num)
439 {
440 unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
441 p->used -= num;
442 while(num--)
443 {
444 bn_check_top(p->current->vals + offset);
445 if(!offset)
446 {
447 offset = BN_CTX_POOL_SIZE - 1;
448 p->current = p->current->prev;
449 }
450 else
451 offset--;
452 }
453 }
454
diff --git a/src/lib/libcrypto/bn/bn_depr.c b/src/lib/libcrypto/bn/bn_depr.c
deleted file mode 100644
index 27535e4fca..0000000000
--- a/src/lib/libcrypto/bn/bn_depr.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/* crypto/bn/bn_depr.c */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* Support for deprecated functions goes here - static linkage will only slurp
57 * this code if applications are using them directly. */
58
59#include <stdio.h>
60#include <time.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63#include <openssl/rand.h>
64
65static void *dummy=&dummy;
66
67#ifndef OPENSSL_NO_DEPRECATED
68BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
69 const BIGNUM *add, const BIGNUM *rem,
70 void (*callback)(int,int,void *), void *cb_arg)
71 {
72 BN_GENCB cb;
73 BIGNUM *rnd=NULL;
74 int found = 0;
75
76 BN_GENCB_set_old(&cb, callback, cb_arg);
77
78 if (ret == NULL)
79 {
80 if ((rnd=BN_new()) == NULL) goto err;
81 }
82 else
83 rnd=ret;
84 if(!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
85 goto err;
86
87 /* we have a prime :-) */
88 found = 1;
89err:
90 if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd);
91 return(found ? rnd : NULL);
92 }
93
94int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *),
95 BN_CTX *ctx_passed, void *cb_arg)
96 {
97 BN_GENCB cb;
98 BN_GENCB_set_old(&cb, callback, cb_arg);
99 return BN_is_prime_ex(a, checks, ctx_passed, &cb);
100 }
101
102int BN_is_prime_fasttest(const BIGNUM *a, int checks,
103 void (*callback)(int,int,void *),
104 BN_CTX *ctx_passed, void *cb_arg,
105 int do_trial_division)
106 {
107 BN_GENCB cb;
108 BN_GENCB_set_old(&cb, callback, cb_arg);
109 return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
110 do_trial_division, &cb);
111 }
112#endif
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
deleted file mode 100644
index 1e8e57626b..0000000000
--- a/src/lib/libcrypto/bn/bn_div.c
+++ /dev/null
@@ -1,643 +0,0 @@
1/* crypto/bn/bn_div.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63
64
65/* The old slow way */
66#if 0
67int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
68 BN_CTX *ctx)
69 {
70 int i,nm,nd;
71 int ret = 0;
72 BIGNUM *D;
73
74 bn_check_top(m);
75 bn_check_top(d);
76 if (BN_is_zero(d))
77 {
78 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
79 return(0);
80 }
81
82 if (BN_ucmp(m,d) < 0)
83 {
84 if (rem != NULL)
85 { if (BN_copy(rem,m) == NULL) return(0); }
86 if (dv != NULL) BN_zero(dv);
87 return(1);
88 }
89
90 BN_CTX_start(ctx);
91 D = BN_CTX_get(ctx);
92 if (dv == NULL) dv = BN_CTX_get(ctx);
93 if (rem == NULL) rem = BN_CTX_get(ctx);
94 if (D == NULL || dv == NULL || rem == NULL)
95 goto end;
96
97 nd=BN_num_bits(d);
98 nm=BN_num_bits(m);
99 if (BN_copy(D,d) == NULL) goto end;
100 if (BN_copy(rem,m) == NULL) goto end;
101
102 /* The next 2 are needed so we can do a dv->d[0]|=1 later
103 * since BN_lshift1 will only work once there is a value :-) */
104 BN_zero(dv);
105 bn_wexpand(dv,1);
106 dv->top=1;
107
108 if (!BN_lshift(D,D,nm-nd)) goto end;
109 for (i=nm-nd; i>=0; i--)
110 {
111 if (!BN_lshift1(dv,dv)) goto end;
112 if (BN_ucmp(rem,D) >= 0)
113 {
114 dv->d[0]|=1;
115 if (!BN_usub(rem,rem,D)) goto end;
116 }
117/* CAN IMPROVE (and have now :=) */
118 if (!BN_rshift1(D,D)) goto end;
119 }
120 rem->neg=BN_is_zero(rem)?0:m->neg;
121 dv->neg=m->neg^d->neg;
122 ret = 1;
123 end:
124 BN_CTX_end(ctx);
125 return(ret);
126 }
127
128#else
129
130#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131 && !defined(PEDANTIC) && !defined(BN_DIV3W)
132# if defined(__GNUC__) && __GNUC__>=2
133# if defined(__i386) || defined (__i386__)
134 /*
135 * There were two reasons for implementing this template:
136 * - GNU C generates a call to a function (__udivdi3 to be exact)
137 * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
138 * understand why...);
139 * - divl doesn't only calculate quotient, but also leaves
140 * remainder in %edx which we can definitely use here:-)
141 *
142 * <appro@fy.chalmers.se>
143 */
144# define bn_div_words(n0,n1,d0) \
145 ({ asm volatile ( \
146 "divl %4" \
147 : "=a"(q), "=d"(rem) \
148 : "a"(n1), "d"(n0), "g"(d0) \
149 : "cc"); \
150 q; \
151 })
152# define REMAINDER_IS_ALREADY_CALCULATED
153# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
154 /*
155 * Same story here, but it's 128-bit by 64-bit division. Wow!
156 * <appro@fy.chalmers.se>
157 */
158# define bn_div_words(n0,n1,d0) \
159 ({ asm volatile ( \
160 "divq %4" \
161 : "=a"(q), "=d"(rem) \
162 : "a"(n1), "d"(n0), "g"(d0) \
163 : "cc"); \
164 q; \
165 })
166# define REMAINDER_IS_ALREADY_CALCULATED
167# endif /* __<cpu> */
168# endif /* __GNUC__ */
169#endif /* OPENSSL_NO_ASM */
170
171
172/* BN_div[_no_branch] computes dv := num / divisor, rounding towards
173 * zero, and sets up rm such that dv*divisor + rm = num holds.
174 * Thus:
175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
176 * rm->neg == num->neg (unless the remainder is zero)
177 * If 'dv' or 'rm' is NULL, the respective value is not returned.
178 */
179static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
180 const BIGNUM *divisor, BN_CTX *ctx);
181int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
182 BN_CTX *ctx)
183 {
184 int norm_shift,i,loop;
185 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
186 BN_ULONG *resp,*wnump;
187 BN_ULONG d0,d1;
188 int num_n,div_n;
189
190 /* Invalid zero-padding would have particularly bad consequences
191 * in the case of 'num', so don't just rely on bn_check_top() for this one
192 * (bn_check_top() works only for BN_DEBUG builds) */
193 if (num->top > 0 && num->d[num->top - 1] == 0)
194 {
195 BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED);
196 return 0;
197 }
198
199 bn_check_top(num);
200
201 if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
202 {
203 return BN_div_no_branch(dv, rm, num, divisor, ctx);
204 }
205
206 bn_check_top(dv);
207 bn_check_top(rm);
208 /* bn_check_top(num); */ /* 'num' has been checked already */
209 bn_check_top(divisor);
210
211 if (BN_is_zero(divisor))
212 {
213 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
214 return(0);
215 }
216
217 if (BN_ucmp(num,divisor) < 0)
218 {
219 if (rm != NULL)
220 { if (BN_copy(rm,num) == NULL) return(0); }
221 if (dv != NULL) BN_zero(dv);
222 return(1);
223 }
224
225 BN_CTX_start(ctx);
226 tmp=BN_CTX_get(ctx);
227 snum=BN_CTX_get(ctx);
228 sdiv=BN_CTX_get(ctx);
229 if (dv == NULL)
230 res=BN_CTX_get(ctx);
231 else res=dv;
232 if (sdiv == NULL || res == NULL) goto err;
233
234 /* First we normalise the numbers */
235 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
236 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
237 sdiv->neg=0;
238 norm_shift+=BN_BITS2;
239 if (!(BN_lshift(snum,num,norm_shift))) goto err;
240 snum->neg=0;
241 div_n=sdiv->top;
242 num_n=snum->top;
243 loop=num_n-div_n;
244 /* Lets setup a 'window' into snum
245 * This is the part that corresponds to the current
246 * 'area' being divided */
247 wnum.neg = 0;
248 wnum.d = &(snum->d[loop]);
249 wnum.top = div_n;
250 /* only needed when BN_ucmp messes up the values between top and max */
251 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
252
253 /* Get the top 2 words of sdiv */
254 /* div_n=sdiv->top; */
255 d0=sdiv->d[div_n-1];
256 d1=(div_n == 1)?0:sdiv->d[div_n-2];
257
258 /* pointer to the 'top' of snum */
259 wnump= &(snum->d[num_n-1]);
260
261 /* Setup to 'res' */
262 res->neg= (num->neg^divisor->neg);
263 if (!bn_wexpand(res,(loop+1))) goto err;
264 res->top=loop;
265 resp= &(res->d[loop-1]);
266
267 /* space for temp */
268 if (!bn_wexpand(tmp,(div_n+1))) goto err;
269
270 if (BN_ucmp(&wnum,sdiv) >= 0)
271 {
272 /* If BN_DEBUG_RAND is defined BN_ucmp changes (via
273 * bn_pollute) the const bignum arguments =>
274 * clean the values between top and max again */
275 bn_clear_top2max(&wnum);
276 bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
277 *resp=1;
278 }
279 else
280 res->top--;
281 /* if res->top == 0 then clear the neg value otherwise decrease
282 * the resp pointer */
283 if (res->top == 0)
284 res->neg = 0;
285 else
286 resp--;
287
288 for (i=0; i<loop-1; i++, wnump--, resp--)
289 {
290 BN_ULONG q,l0;
291 /* the first part of the loop uses the top two words of
292 * snum and sdiv to calculate a BN_ULONG q such that
293 * | wnum - sdiv * q | < sdiv */
294#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
295 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
296 q=bn_div_3_words(wnump,d1,d0);
297#else
298 BN_ULONG n0,n1,rem=0;
299
300 n0=wnump[0];
301 n1=wnump[-1];
302 if (n0 == d0)
303 q=BN_MASK2;
304 else /* n0 < d0 */
305 {
306#ifdef BN_LLONG
307 BN_ULLONG t2;
308
309#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
310 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
311#else
312 q=bn_div_words(n0,n1,d0);
313#ifdef BN_DEBUG_LEVITTE
314 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
315X) -> 0x%08X\n",
316 n0, n1, d0, q);
317#endif
318#endif
319
320#ifndef REMAINDER_IS_ALREADY_CALCULATED
321 /*
322 * rem doesn't have to be BN_ULLONG. The least we
323 * know it's less that d0, isn't it?
324 */
325 rem=(n1-q*d0)&BN_MASK2;
326#endif
327 t2=(BN_ULLONG)d1*q;
328
329 for (;;)
330 {
331 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
332 break;
333 q--;
334 rem += d0;
335 if (rem < d0) break; /* don't let rem overflow */
336 t2 -= d1;
337 }
338#else /* !BN_LLONG */
339 BN_ULONG t2l,t2h,ql,qh;
340
341 q=bn_div_words(n0,n1,d0);
342#ifdef BN_DEBUG_LEVITTE
343 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
344X) -> 0x%08X\n",
345 n0, n1, d0, q);
346#endif
347#ifndef REMAINDER_IS_ALREADY_CALCULATED
348 rem=(n1-q*d0)&BN_MASK2;
349#endif
350
351#if defined(BN_UMULT_LOHI)
352 BN_UMULT_LOHI(t2l,t2h,d1,q);
353#elif defined(BN_UMULT_HIGH)
354 t2l = d1 * q;
355 t2h = BN_UMULT_HIGH(d1,q);
356#else
357 t2l=LBITS(d1); t2h=HBITS(d1);
358 ql =LBITS(q); qh =HBITS(q);
359 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
360#endif
361
362 for (;;)
363 {
364 if ((t2h < rem) ||
365 ((t2h == rem) && (t2l <= wnump[-2])))
366 break;
367 q--;
368 rem += d0;
369 if (rem < d0) break; /* don't let rem overflow */
370 if (t2l < d1) t2h--; t2l -= d1;
371 }
372#endif /* !BN_LLONG */
373 }
374#endif /* !BN_DIV3W */
375
376 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
377 tmp->d[div_n]=l0;
378 wnum.d--;
379 /* ingore top values of the bignums just sub the two
380 * BN_ULONG arrays with bn_sub_words */
381 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
382 {
383 /* Note: As we have considered only the leading
384 * two BN_ULONGs in the calculation of q, sdiv * q
385 * might be greater than wnum (but then (q-1) * sdiv
386 * is less or equal than wnum)
387 */
388 q--;
389 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
390 /* we can't have an overflow here (assuming
391 * that q != 0, but if q == 0 then tmp is
392 * zero anyway) */
393 (*wnump)++;
394 }
395 /* store part of the result */
396 *resp = q;
397 }
398 bn_correct_top(snum);
399 if (rm != NULL)
400 {
401 /* Keep a copy of the neg flag in num because if rm==num
402 * BN_rshift() will overwrite it.
403 */
404 int neg = num->neg;
405 BN_rshift(rm,snum,norm_shift);
406 if (!BN_is_zero(rm))
407 rm->neg = neg;
408 bn_check_top(rm);
409 }
410 BN_CTX_end(ctx);
411 return(1);
412err:
413 bn_check_top(rm);
414 BN_CTX_end(ctx);
415 return(0);
416 }
417
418
419/* BN_div_no_branch is a special version of BN_div. It does not contain
420 * branches that may leak sensitive information.
421 */
422static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
423 const BIGNUM *divisor, BN_CTX *ctx)
424 {
425 int norm_shift,i,loop;
426 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
427 BN_ULONG *resp,*wnump;
428 BN_ULONG d0,d1;
429 int num_n,div_n;
430
431 bn_check_top(dv);
432 bn_check_top(rm);
433 /* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
434 bn_check_top(divisor);
435
436 if (BN_is_zero(divisor))
437 {
438 BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
439 return(0);
440 }
441
442 BN_CTX_start(ctx);
443 tmp=BN_CTX_get(ctx);
444 snum=BN_CTX_get(ctx);
445 sdiv=BN_CTX_get(ctx);
446 if (dv == NULL)
447 res=BN_CTX_get(ctx);
448 else res=dv;
449 if (sdiv == NULL || res == NULL) goto err;
450
451 /* First we normalise the numbers */
452 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
453 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
454 sdiv->neg=0;
455 norm_shift+=BN_BITS2;
456 if (!(BN_lshift(snum,num,norm_shift))) goto err;
457 snum->neg=0;
458
459 /* Since we don't know whether snum is larger than sdiv,
460 * we pad snum with enough zeroes without changing its
461 * value.
462 */
463 if (snum->top <= sdiv->top+1)
464 {
465 if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
466 for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
467 snum->top = sdiv->top + 2;
468 }
469 else
470 {
471 if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
472 snum->d[snum->top] = 0;
473 snum->top ++;
474 }
475
476 div_n=sdiv->top;
477 num_n=snum->top;
478 loop=num_n-div_n;
479 /* Lets setup a 'window' into snum
480 * This is the part that corresponds to the current
481 * 'area' being divided */
482 wnum.neg = 0;
483 wnum.d = &(snum->d[loop]);
484 wnum.top = div_n;
485 /* only needed when BN_ucmp messes up the values between top and max */
486 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
487
488 /* Get the top 2 words of sdiv */
489 /* div_n=sdiv->top; */
490 d0=sdiv->d[div_n-1];
491 d1=(div_n == 1)?0:sdiv->d[div_n-2];
492
493 /* pointer to the 'top' of snum */
494 wnump= &(snum->d[num_n-1]);
495
496 /* Setup to 'res' */
497 res->neg= (num->neg^divisor->neg);
498 if (!bn_wexpand(res,(loop+1))) goto err;
499 res->top=loop-1;
500 resp= &(res->d[loop-1]);
501
502 /* space for temp */
503 if (!bn_wexpand(tmp,(div_n+1))) goto err;
504
505 /* if res->top == 0 then clear the neg value otherwise decrease
506 * the resp pointer */
507 if (res->top == 0)
508 res->neg = 0;
509 else
510 resp--;
511
512 for (i=0; i<loop-1; i++, wnump--, resp--)
513 {
514 BN_ULONG q,l0;
515 /* the first part of the loop uses the top two words of
516 * snum and sdiv to calculate a BN_ULONG q such that
517 * | wnum - sdiv * q | < sdiv */
518#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
519 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
520 q=bn_div_3_words(wnump,d1,d0);
521#else
522 BN_ULONG n0,n1,rem=0;
523
524 n0=wnump[0];
525 n1=wnump[-1];
526 if (n0 == d0)
527 q=BN_MASK2;
528 else /* n0 < d0 */
529 {
530#ifdef BN_LLONG
531 BN_ULLONG t2;
532
533#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
534 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
535#else
536 q=bn_div_words(n0,n1,d0);
537#ifdef BN_DEBUG_LEVITTE
538 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
539X) -> 0x%08X\n",
540 n0, n1, d0, q);
541#endif
542#endif
543
544#ifndef REMAINDER_IS_ALREADY_CALCULATED
545 /*
546 * rem doesn't have to be BN_ULLONG. The least we
547 * know it's less that d0, isn't it?
548 */
549 rem=(n1-q*d0)&BN_MASK2;
550#endif
551 t2=(BN_ULLONG)d1*q;
552
553 for (;;)
554 {
555 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
556 break;
557 q--;
558 rem += d0;
559 if (rem < d0) break; /* don't let rem overflow */
560 t2 -= d1;
561 }
562#else /* !BN_LLONG */
563 BN_ULONG t2l,t2h,ql,qh;
564
565 q=bn_div_words(n0,n1,d0);
566#ifdef BN_DEBUG_LEVITTE
567 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
568X) -> 0x%08X\n",
569 n0, n1, d0, q);
570#endif
571#ifndef REMAINDER_IS_ALREADY_CALCULATED
572 rem=(n1-q*d0)&BN_MASK2;
573#endif
574
575#if defined(BN_UMULT_LOHI)
576 BN_UMULT_LOHI(t2l,t2h,d1,q);
577#elif defined(BN_UMULT_HIGH)
578 t2l = d1 * q;
579 t2h = BN_UMULT_HIGH(d1,q);
580#else
581 t2l=LBITS(d1); t2h=HBITS(d1);
582 ql =LBITS(q); qh =HBITS(q);
583 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
584#endif
585
586 for (;;)
587 {
588 if ((t2h < rem) ||
589 ((t2h == rem) && (t2l <= wnump[-2])))
590 break;
591 q--;
592 rem += d0;
593 if (rem < d0) break; /* don't let rem overflow */
594 if (t2l < d1) t2h--; t2l -= d1;
595 }
596#endif /* !BN_LLONG */
597 }
598#endif /* !BN_DIV3W */
599
600 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
601 tmp->d[div_n]=l0;
602 wnum.d--;
603 /* ingore top values of the bignums just sub the two
604 * BN_ULONG arrays with bn_sub_words */
605 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
606 {
607 /* Note: As we have considered only the leading
608 * two BN_ULONGs in the calculation of q, sdiv * q
609 * might be greater than wnum (but then (q-1) * sdiv
610 * is less or equal than wnum)
611 */
612 q--;
613 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
614 /* we can't have an overflow here (assuming
615 * that q != 0, but if q == 0 then tmp is
616 * zero anyway) */
617 (*wnump)++;
618 }
619 /* store part of the result */
620 *resp = q;
621 }
622 bn_correct_top(snum);
623 if (rm != NULL)
624 {
625 /* Keep a copy of the neg flag in num because if rm==num
626 * BN_rshift() will overwrite it.
627 */
628 int neg = num->neg;
629 BN_rshift(rm,snum,norm_shift);
630 if (!BN_is_zero(rm))
631 rm->neg = neg;
632 bn_check_top(rm);
633 }
634 bn_correct_top(res);
635 BN_CTX_end(ctx);
636 return(1);
637err:
638 bn_check_top(rm);
639 BN_CTX_end(ctx);
640 return(0);
641 }
642
643#endif
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
deleted file mode 100644
index cfe2eb94a0..0000000000
--- a/src/lib/libcrypto/bn/bn_err.c
+++ /dev/null
@@ -1,150 +0,0 @@
1/* crypto/bn/bn_err.c */
2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* NOTE: this file was auto generated by the mkerr.pl script: any changes
57 * made to it will be overwritten when the script next updates this file,
58 * only reason strings will be preserved.
59 */
60
61#include <stdio.h>
62#include <openssl/err.h>
63#include <openssl/bn.h>
64
65/* BEGIN ERROR CODES */
66#ifndef OPENSSL_NO_ERR
67
68#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
69#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
70
71static ERR_STRING_DATA BN_str_functs[]=
72 {
73{ERR_FUNC(BN_F_BNRAND), "BNRAND"},
74{ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
75{ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
76{ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
77{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
78{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
79{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
80{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
81{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
82{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
83{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
84{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
85{ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"},
86{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
87{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
88{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
89{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
90{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
91{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
92{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
93{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
94{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
95{ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
96{ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
97{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
98{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
99{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
100{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
101{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
102{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
103{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
104{ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
105{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
106{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
107{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
108{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
109{ERR_FUNC(BN_F_BN_NEW), "BN_new"},
110{ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
111{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
112{ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
113{0,NULL}
114 };
115
116static ERR_STRING_DATA BN_str_reasons[]=
117 {
118{ERR_REASON(BN_R_ARG2_LT_ARG3) ,"arg2 lt arg3"},
119{ERR_REASON(BN_R_BAD_RECIPROCAL) ,"bad reciprocal"},
120{ERR_REASON(BN_R_BIGNUM_TOO_LONG) ,"bignum too long"},
121{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS),"called with even modulus"},
122{ERR_REASON(BN_R_DIV_BY_ZERO) ,"div by zero"},
123{ERR_REASON(BN_R_ENCODING_ERROR) ,"encoding error"},
124{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),"expand on static bignum data"},
125{ERR_REASON(BN_R_INPUT_NOT_REDUCED) ,"input not reduced"},
126{ERR_REASON(BN_R_INVALID_LENGTH) ,"invalid length"},
127{ERR_REASON(BN_R_INVALID_RANGE) ,"invalid range"},
128{ERR_REASON(BN_R_NOT_A_SQUARE) ,"not a square"},
129{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
130{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
131{ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"},
132{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
133{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
134{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
135{0,NULL}
136 };
137
138#endif
139
140void ERR_load_BN_strings(void)
141 {
142#ifndef OPENSSL_NO_ERR
143
144 if (ERR_func_error_string(BN_str_functs[0].error) == NULL)
145 {
146 ERR_load_strings(0,BN_str_functs);
147 ERR_load_strings(0,BN_str_reasons);
148 }
149#endif
150 }
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
deleted file mode 100644
index 70a33f0d93..0000000000
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ /dev/null
@@ -1,990 +0,0 @@
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116/* maximum precomputation table size for *variable* sliding windows */
117#define TABLE_SIZE 32
118
119/* this one works - simple but works */
120int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 {
122 int i,bits,ret=0;
123 BIGNUM *v,*rr;
124
125 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
126 {
127 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
128 BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 return -1;
130 }
131
132 BN_CTX_start(ctx);
133 if ((r == a) || (r == p))
134 rr = BN_CTX_get(ctx);
135 else
136 rr = r;
137 if ((v = BN_CTX_get(ctx)) == NULL) goto err;
138
139 if (BN_copy(v,a) == NULL) goto err;
140 bits=BN_num_bits(p);
141
142 if (BN_is_odd(p))
143 { if (BN_copy(rr,a) == NULL) goto err; }
144 else { if (!BN_one(rr)) goto err; }
145
146 for (i=1; i<bits; i++)
147 {
148 if (!BN_sqr(v,v,ctx)) goto err;
149 if (BN_is_bit_set(p,i))
150 {
151 if (!BN_mul(rr,rr,v,ctx)) goto err;
152 }
153 }
154 ret=1;
155err:
156 if (r != rr) BN_copy(r,rr);
157 BN_CTX_end(ctx);
158 bn_check_top(r);
159 return(ret);
160 }
161
162
163int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
164 BN_CTX *ctx)
165 {
166 int ret;
167
168 bn_check_top(a);
169 bn_check_top(p);
170 bn_check_top(m);
171
172 /* For even modulus m = 2^k*m_odd, it might make sense to compute
173 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
174 * exponentiation for the odd part), using appropriate exponent
175 * reductions, and combine the results using the CRT.
176 *
177 * For now, we use Montgomery only if the modulus is odd; otherwise,
178 * exponentiation using the reciprocal-based quick remaindering
179 * algorithm is used.
180 *
181 * (Timing obtained with expspeed.c [computations a^p mod m
182 * where a, p, m are of the same length: 256, 512, 1024, 2048,
183 * 4096, 8192 bits], compared to the running time of the
184 * standard algorithm:
185 *
186 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
187 * 55 .. 77 % [UltraSparc processor, but
188 * debug-solaris-sparcv8-gcc conf.]
189 *
190 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
191 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
192 *
193 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
194 * at 2048 and more bits, but at 512 and 1024 bits, it was
195 * slower even than the standard algorithm!
196 *
197 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
198 * should be obtained when the new Montgomery reduction code
199 * has been integrated into OpenSSL.)
200 */
201
202#define MONT_MUL_MOD
203#define MONT_EXP_WORD
204#define RECP_MUL_MOD
205
206#ifdef MONT_MUL_MOD
207 /* I have finally been able to take out this pre-condition of
208 * the top bit being set. It was caused by an error in BN_div
209 * with negatives. There was also another problem when for a^b%m
210 * a >= m. eay 07-May-97 */
211/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
212
213 if (BN_is_odd(m))
214 {
215# ifdef MONT_EXP_WORD
216 if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
217 {
218 BN_ULONG A = a->d[0];
219 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
220 }
221 else
222# endif
223 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
224 }
225 else
226#endif
227#ifdef RECP_MUL_MOD
228 { ret=BN_mod_exp_recp(r,a,p,m,ctx); }
229#else
230 { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
231#endif
232
233 bn_check_top(r);
234 return(ret);
235 }
236
237
238int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
239 const BIGNUM *m, BN_CTX *ctx)
240 {
241 int i,j,bits,ret=0,wstart,wend,window,wvalue;
242 int start=1;
243 BIGNUM *aa;
244 /* Table of variables obtained from 'ctx' */
245 BIGNUM *val[TABLE_SIZE];
246 BN_RECP_CTX recp;
247
248 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
249 {
250 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
251 BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
252 return -1;
253 }
254
255 bits=BN_num_bits(p);
256
257 if (bits == 0)
258 {
259 ret = BN_one(r);
260 return ret;
261 }
262
263 BN_CTX_start(ctx);
264 aa = BN_CTX_get(ctx);
265 val[0] = BN_CTX_get(ctx);
266 if(!aa || !val[0]) goto err;
267
268 BN_RECP_CTX_init(&recp);
269 if (m->neg)
270 {
271 /* ignore sign of 'm' */
272 if (!BN_copy(aa, m)) goto err;
273 aa->neg = 0;
274 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
275 }
276 else
277 {
278 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
279 }
280
281 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
282 if (BN_is_zero(val[0]))
283 {
284 BN_zero(r);
285 ret = 1;
286 goto err;
287 }
288
289 window = BN_window_bits_for_exponent_size(bits);
290 if (window > 1)
291 {
292 if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
293 goto err; /* 2 */
294 j=1<<(window-1);
295 for (i=1; i<j; i++)
296 {
297 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
298 !BN_mod_mul_reciprocal(val[i],val[i-1],
299 aa,&recp,ctx))
300 goto err;
301 }
302 }
303
304 start=1; /* This is used to avoid multiplication etc
305 * when there is only the value '1' in the
306 * buffer. */
307 wvalue=0; /* The 'value' of the window */
308 wstart=bits-1; /* The top bit of the window */
309 wend=0; /* The bottom bit of the window */
310
311 if (!BN_one(r)) goto err;
312
313 for (;;)
314 {
315 if (BN_is_bit_set(p,wstart) == 0)
316 {
317 if (!start)
318 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
319 goto err;
320 if (wstart == 0) break;
321 wstart--;
322 continue;
323 }
324 /* We now have wstart on a 'set' bit, we now need to work out
325 * how bit a window to do. To do this we need to scan
326 * forward until the last set bit before the end of the
327 * window */
328 j=wstart;
329 wvalue=1;
330 wend=0;
331 for (i=1; i<window; i++)
332 {
333 if (wstart-i < 0) break;
334 if (BN_is_bit_set(p,wstart-i))
335 {
336 wvalue<<=(i-wend);
337 wvalue|=1;
338 wend=i;
339 }
340 }
341
342 /* wend is the size of the current window */
343 j=wend+1;
344 /* add the 'bytes above' */
345 if (!start)
346 for (i=0; i<j; i++)
347 {
348 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
349 goto err;
350 }
351
352 /* wvalue will be an odd number < 2^window */
353 if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
354 goto err;
355
356 /* move the 'window' down further */
357 wstart-=wend+1;
358 wvalue=0;
359 start=0;
360 if (wstart < 0) break;
361 }
362 ret=1;
363err:
364 BN_CTX_end(ctx);
365 BN_RECP_CTX_free(&recp);
366 bn_check_top(r);
367 return(ret);
368 }
369
370
371int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
372 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
373 {
374 int i,j,bits,ret=0,wstart,wend,window,wvalue;
375 int start=1;
376 BIGNUM *d,*r;
377 const BIGNUM *aa;
378 /* Table of variables obtained from 'ctx' */
379 BIGNUM *val[TABLE_SIZE];
380 BN_MONT_CTX *mont=NULL;
381
382 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
383 {
384 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
385 }
386
387 bn_check_top(a);
388 bn_check_top(p);
389 bn_check_top(m);
390
391 if (!BN_is_odd(m))
392 {
393 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
394 return(0);
395 }
396 bits=BN_num_bits(p);
397 if (bits == 0)
398 {
399 ret = BN_one(rr);
400 return ret;
401 }
402
403 BN_CTX_start(ctx);
404 d = BN_CTX_get(ctx);
405 r = BN_CTX_get(ctx);
406 val[0] = BN_CTX_get(ctx);
407 if (!d || !r || !val[0]) goto err;
408
409 /* If this is not done, things will break in the montgomery
410 * part */
411
412 if (in_mont != NULL)
413 mont=in_mont;
414 else
415 {
416 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
417 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
418 }
419
420 if (a->neg || BN_ucmp(a,m) >= 0)
421 {
422 if (!BN_nnmod(val[0],a,m,ctx))
423 goto err;
424 aa= val[0];
425 }
426 else
427 aa=a;
428 if (BN_is_zero(aa))
429 {
430 BN_zero(rr);
431 ret = 1;
432 goto err;
433 }
434 if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
435
436 window = BN_window_bits_for_exponent_size(bits);
437 if (window > 1)
438 {
439 if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
440 j=1<<(window-1);
441 for (i=1; i<j; i++)
442 {
443 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
444 !BN_mod_mul_montgomery(val[i],val[i-1],
445 d,mont,ctx))
446 goto err;
447 }
448 }
449
450 start=1; /* This is used to avoid multiplication etc
451 * when there is only the value '1' in the
452 * buffer. */
453 wvalue=0; /* The 'value' of the window */
454 wstart=bits-1; /* The top bit of the window */
455 wend=0; /* The bottom bit of the window */
456
457 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
458 for (;;)
459 {
460 if (BN_is_bit_set(p,wstart) == 0)
461 {
462 if (!start)
463 {
464 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
465 goto err;
466 }
467 if (wstart == 0) break;
468 wstart--;
469 continue;
470 }
471 /* We now have wstart on a 'set' bit, we now need to work out
472 * how bit a window to do. To do this we need to scan
473 * forward until the last set bit before the end of the
474 * window */
475 j=wstart;
476 wvalue=1;
477 wend=0;
478 for (i=1; i<window; i++)
479 {
480 if (wstart-i < 0) break;
481 if (BN_is_bit_set(p,wstart-i))
482 {
483 wvalue<<=(i-wend);
484 wvalue|=1;
485 wend=i;
486 }
487 }
488
489 /* wend is the size of the current window */
490 j=wend+1;
491 /* add the 'bytes above' */
492 if (!start)
493 for (i=0; i<j; i++)
494 {
495 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
496 goto err;
497 }
498
499 /* wvalue will be an odd number < 2^window */
500 if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
501 goto err;
502
503 /* move the 'window' down further */
504 wstart-=wend+1;
505 wvalue=0;
506 start=0;
507 if (wstart < 0) break;
508 }
509 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
510 ret=1;
511err:
512 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
513 BN_CTX_end(ctx);
514 bn_check_top(rr);
515 return(ret);
516 }
517
518
519/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
520 * so that accessing any of these table values shows the same access pattern as far
521 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
522 * from/to that table. */
523
524static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
525 {
526 size_t i, j;
527
528 if (bn_wexpand(b, top) == NULL)
529 return 0;
530 while (b->top < top)
531 {
532 b->d[b->top++] = 0;
533 }
534
535 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
536 {
537 buf[j] = ((unsigned char*)b->d)[i];
538 }
539
540 bn_correct_top(b);
541 return 1;
542 }
543
544static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
545 {
546 size_t i, j;
547
548 if (bn_wexpand(b, top) == NULL)
549 return 0;
550
551 for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
552 {
553 ((unsigned char*)b->d)[i] = buf[j];
554 }
555
556 b->top = top;
557 bn_correct_top(b);
558 return 1;
559 }
560
561/* Given a pointer value, compute the next address that is a cache line multiple. */
562#define MOD_EXP_CTIME_ALIGN(x_) \
563 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
564
565/* This variant of BN_mod_exp_mont() uses fixed windows and the special
566 * precomputation memory layout to limit data-dependency to a minimum
567 * to protect secret exponents (cf. the hyper-threading timing attacks
568 * pointed out by Colin Percival,
569 * http://www.daemonology.net/hyperthreading-considered-harmful/)
570 */
571int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
572 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
573 {
574 int i,bits,ret=0,idx,window,wvalue;
575 int top;
576 BIGNUM *r;
577 const BIGNUM *aa;
578 BN_MONT_CTX *mont=NULL;
579
580 int numPowers;
581 unsigned char *powerbufFree=NULL;
582 int powerbufLen = 0;
583 unsigned char *powerbuf=NULL;
584 BIGNUM *computeTemp=NULL, *am=NULL;
585
586 bn_check_top(a);
587 bn_check_top(p);
588 bn_check_top(m);
589
590 top = m->top;
591
592 if (!(m->d[0] & 1))
593 {
594 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
595 return(0);
596 }
597 bits=BN_num_bits(p);
598 if (bits == 0)
599 {
600 ret = BN_one(rr);
601 return ret;
602 }
603
604 /* Initialize BIGNUM context and allocate intermediate result */
605 BN_CTX_start(ctx);
606 r = BN_CTX_get(ctx);
607 if (r == NULL) goto err;
608
609 /* Allocate a montgomery context if it was not supplied by the caller.
610 * If this is not done, things will break in the montgomery part.
611 */
612 if (in_mont != NULL)
613 mont=in_mont;
614 else
615 {
616 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
617 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
618 }
619
620 /* Get the window size to use with size of p. */
621 window = BN_window_bits_for_ctime_exponent_size(bits);
622
623 /* Allocate a buffer large enough to hold all of the pre-computed
624 * powers of a.
625 */
626 numPowers = 1 << window;
627 powerbufLen = sizeof(m->d[0])*top*numPowers;
628 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
629 goto err;
630
631 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
632 memset(powerbuf, 0, powerbufLen);
633
634 /* Initialize the intermediate result. Do this early to save double conversion,
635 * once each for a^0 and intermediate result.
636 */
637 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
638 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
639
640 /* Initialize computeTemp as a^1 with montgomery precalcs */
641 computeTemp = BN_CTX_get(ctx);
642 am = BN_CTX_get(ctx);
643 if (computeTemp==NULL || am==NULL) goto err;
644
645 if (a->neg || BN_ucmp(a,m) >= 0)
646 {
647 if (!BN_mod(am,a,m,ctx))
648 goto err;
649 aa= am;
650 }
651 else
652 aa=a;
653 if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
654 if (!BN_copy(computeTemp, am)) goto err;
655 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
656
657 /* If the window size is greater than 1, then calculate
658 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
659 * (even powers could instead be computed as (a^(i/2))^2
660 * to use the slight performance advantage of sqr over mul).
661 */
662 if (window > 1)
663 {
664 for (i=2; i<numPowers; i++)
665 {
666 /* Calculate a^i = a^(i-1) * a */
667 if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
668 goto err;
669 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
670 }
671 }
672
673 /* Adjust the number of bits up to a multiple of the window size.
674 * If the exponent length is not a multiple of the window size, then
675 * this pads the most significant bits with zeros to normalize the
676 * scanning loop to there's no special cases.
677 *
678 * * NOTE: Making the window size a power of two less than the native
679 * * word size ensures that the padded bits won't go past the last
680 * * word in the internal BIGNUM structure. Going past the end will
681 * * still produce the correct result, but causes a different branch
682 * * to be taken in the BN_is_bit_set function.
683 */
684 bits = ((bits+window-1)/window)*window;
685 idx=bits-1; /* The top bit of the window */
686
687 /* Scan the exponent one window at a time starting from the most
688 * significant bits.
689 */
690 while (idx >= 0)
691 {
692 wvalue=0; /* The 'value' of the window */
693
694 /* Scan the window, squaring the result as we go */
695 for (i=0; i<window; i++,idx--)
696 {
697 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
698 wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
699 }
700
701 /* Fetch the appropriate pre-computed value from the pre-buf */
702 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
703
704 /* Multiply the result into the intermediate result */
705 if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
706 }
707
708 /* Convert the final result from montgomery to standard format */
709 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
710 ret=1;
711err:
712 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
713 if (powerbuf!=NULL)
714 {
715 OPENSSL_cleanse(powerbuf,powerbufLen);
716 OPENSSL_free(powerbufFree);
717 }
718 if (am!=NULL) BN_clear(am);
719 if (computeTemp!=NULL) BN_clear(computeTemp);
720 BN_CTX_end(ctx);
721 return(ret);
722 }
723
724int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
725 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
726 {
727 BN_MONT_CTX *mont = NULL;
728 int b, bits, ret=0;
729 int r_is_one;
730 BN_ULONG w, next_w;
731 BIGNUM *d, *r, *t;
732 BIGNUM *swap_tmp;
733#define BN_MOD_MUL_WORD(r, w, m) \
734 (BN_mul_word(r, (w)) && \
735 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
736 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
737 /* BN_MOD_MUL_WORD is only used with 'w' large,
738 * so the BN_ucmp test is probably more overhead
739 * than always using BN_mod (which uses BN_copy if
740 * a similar test returns true). */
741 /* We can use BN_mod and do not need BN_nnmod because our
742 * accumulator is never negative (the result of BN_mod does
743 * not depend on the sign of the modulus).
744 */
745#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
746 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
747
748 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
749 {
750 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
751 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
752 return -1;
753 }
754
755 bn_check_top(p);
756 bn_check_top(m);
757
758 if (!BN_is_odd(m))
759 {
760 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
761 return(0);
762 }
763 if (m->top == 1)
764 a %= m->d[0]; /* make sure that 'a' is reduced */
765
766 bits = BN_num_bits(p);
767 if (bits == 0)
768 {
769 ret = BN_one(rr);
770 return ret;
771 }
772 if (a == 0)
773 {
774 BN_zero(rr);
775 ret = 1;
776 return ret;
777 }
778
779 BN_CTX_start(ctx);
780 d = BN_CTX_get(ctx);
781 r = BN_CTX_get(ctx);
782 t = BN_CTX_get(ctx);
783 if (d == NULL || r == NULL || t == NULL) goto err;
784
785 if (in_mont != NULL)
786 mont=in_mont;
787 else
788 {
789 if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
790 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
791 }
792
793 r_is_one = 1; /* except for Montgomery factor */
794
795 /* bits-1 >= 0 */
796
797 /* The result is accumulated in the product r*w. */
798 w = a; /* bit 'bits-1' of 'p' is always set */
799 for (b = bits-2; b >= 0; b--)
800 {
801 /* First, square r*w. */
802 next_w = w*w;
803 if ((next_w/w) != w) /* overflow */
804 {
805 if (r_is_one)
806 {
807 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
808 r_is_one = 0;
809 }
810 else
811 {
812 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
813 }
814 next_w = 1;
815 }
816 w = next_w;
817 if (!r_is_one)
818 {
819 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
820 }
821
822 /* Second, multiply r*w by 'a' if exponent bit is set. */
823 if (BN_is_bit_set(p, b))
824 {
825 next_w = w*a;
826 if ((next_w/a) != w) /* overflow */
827 {
828 if (r_is_one)
829 {
830 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
831 r_is_one = 0;
832 }
833 else
834 {
835 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
836 }
837 next_w = a;
838 }
839 w = next_w;
840 }
841 }
842
843 /* Finally, set r:=r*w. */
844 if (w != 1)
845 {
846 if (r_is_one)
847 {
848 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
849 r_is_one = 0;
850 }
851 else
852 {
853 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
854 }
855 }
856
857 if (r_is_one) /* can happen only if a == 1*/
858 {
859 if (!BN_one(rr)) goto err;
860 }
861 else
862 {
863 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
864 }
865 ret = 1;
866err:
867 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
868 BN_CTX_end(ctx);
869 bn_check_top(rr);
870 return(ret);
871 }
872
873
874/* The old fallback, simple version :-) */
875int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
876 const BIGNUM *m, BN_CTX *ctx)
877 {
878 int i,j,bits,ret=0,wstart,wend,window,wvalue;
879 int start=1;
880 BIGNUM *d;
881 /* Table of variables obtained from 'ctx' */
882 BIGNUM *val[TABLE_SIZE];
883
884 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
885 {
886 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
887 BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
888 return -1;
889 }
890
891 bits=BN_num_bits(p);
892
893 if (bits == 0)
894 {
895 ret = BN_one(r);
896 return ret;
897 }
898
899 BN_CTX_start(ctx);
900 d = BN_CTX_get(ctx);
901 val[0] = BN_CTX_get(ctx);
902 if(!d || !val[0]) goto err;
903
904 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
905 if (BN_is_zero(val[0]))
906 {
907 BN_zero(r);
908 ret = 1;
909 goto err;
910 }
911
912 window = BN_window_bits_for_exponent_size(bits);
913 if (window > 1)
914 {
915 if (!BN_mod_mul(d,val[0],val[0],m,ctx))
916 goto err; /* 2 */
917 j=1<<(window-1);
918 for (i=1; i<j; i++)
919 {
920 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
921 !BN_mod_mul(val[i],val[i-1],d,m,ctx))
922 goto err;
923 }
924 }
925
926 start=1; /* This is used to avoid multiplication etc
927 * when there is only the value '1' in the
928 * buffer. */
929 wvalue=0; /* The 'value' of the window */
930 wstart=bits-1; /* The top bit of the window */
931 wend=0; /* The bottom bit of the window */
932
933 if (!BN_one(r)) goto err;
934
935 for (;;)
936 {
937 if (BN_is_bit_set(p,wstart) == 0)
938 {
939 if (!start)
940 if (!BN_mod_mul(r,r,r,m,ctx))
941 goto err;
942 if (wstart == 0) break;
943 wstart--;
944 continue;
945 }
946 /* We now have wstart on a 'set' bit, we now need to work out
947 * how bit a window to do. To do this we need to scan
948 * forward until the last set bit before the end of the
949 * window */
950 j=wstart;
951 wvalue=1;
952 wend=0;
953 for (i=1; i<window; i++)
954 {
955 if (wstart-i < 0) break;
956 if (BN_is_bit_set(p,wstart-i))
957 {
958 wvalue<<=(i-wend);
959 wvalue|=1;
960 wend=i;
961 }
962 }
963
964 /* wend is the size of the current window */
965 j=wend+1;
966 /* add the 'bytes above' */
967 if (!start)
968 for (i=0; i<j; i++)
969 {
970 if (!BN_mod_mul(r,r,r,m,ctx))
971 goto err;
972 }
973
974 /* wvalue will be an odd number < 2^window */
975 if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
976 goto err;
977
978 /* move the 'window' down further */
979 wstart-=wend+1;
980 wvalue=0;
981 start=0;
982 if (wstart < 0) break;
983 }
984 ret=1;
985err:
986 BN_CTX_end(ctx);
987 bn_check_top(r);
988 return(ret);
989 }
990
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
deleted file mode 100644
index b3f43cec8c..0000000000
--- a/src/lib/libcrypto/bn/bn_exp2.c
+++ /dev/null
@@ -1,311 +0,0 @@
1/* crypto/bn/bn_exp2.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define TABLE_SIZE 32
117
118int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
119 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
120 BN_CTX *ctx, BN_MONT_CTX *in_mont)
121 {
122 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
123 int r_is_one=1;
124 BIGNUM *d,*r;
125 const BIGNUM *a_mod_m;
126 /* Tables of variables obtained from 'ctx' */
127 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
128 BN_MONT_CTX *mont=NULL;
129
130 bn_check_top(a1);
131 bn_check_top(p1);
132 bn_check_top(a2);
133 bn_check_top(p2);
134 bn_check_top(m);
135
136 if (!(m->d[0] & 1))
137 {
138 BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
139 return(0);
140 }
141 bits1=BN_num_bits(p1);
142 bits2=BN_num_bits(p2);
143 if ((bits1 == 0) && (bits2 == 0))
144 {
145 ret = BN_one(rr);
146 return ret;
147 }
148
149 bits=(bits1 > bits2)?bits1:bits2;
150
151 BN_CTX_start(ctx);
152 d = BN_CTX_get(ctx);
153 r = BN_CTX_get(ctx);
154 val1[0] = BN_CTX_get(ctx);
155 val2[0] = BN_CTX_get(ctx);
156 if(!d || !r || !val1[0] || !val2[0]) goto err;
157
158 if (in_mont != NULL)
159 mont=in_mont;
160 else
161 {
162 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
163 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
164 }
165
166 window1 = BN_window_bits_for_exponent_size(bits1);
167 window2 = BN_window_bits_for_exponent_size(bits2);
168
169 /*
170 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
171 */
172 if (a1->neg || BN_ucmp(a1,m) >= 0)
173 {
174 if (!BN_mod(val1[0],a1,m,ctx))
175 goto err;
176 a_mod_m = val1[0];
177 }
178 else
179 a_mod_m = a1;
180 if (BN_is_zero(a_mod_m))
181 {
182 BN_zero(rr);
183 ret = 1;
184 goto err;
185 }
186
187 if (!BN_to_montgomery(val1[0],a_mod_m,mont,ctx)) goto err;
188 if (window1 > 1)
189 {
190 if (!BN_mod_mul_montgomery(d,val1[0],val1[0],mont,ctx)) goto err;
191
192 j=1<<(window1-1);
193 for (i=1; i<j; i++)
194 {
195 if(((val1[i] = BN_CTX_get(ctx)) == NULL) ||
196 !BN_mod_mul_montgomery(val1[i],val1[i-1],
197 d,mont,ctx))
198 goto err;
199 }
200 }
201
202
203 /*
204 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
205 */
206 if (a2->neg || BN_ucmp(a2,m) >= 0)
207 {
208 if (!BN_mod(val2[0],a2,m,ctx))
209 goto err;
210 a_mod_m = val2[0];
211 }
212 else
213 a_mod_m = a2;
214 if (BN_is_zero(a_mod_m))
215 {
216 BN_zero(rr);
217 ret = 1;
218 goto err;
219 }
220 if (!BN_to_montgomery(val2[0],a_mod_m,mont,ctx)) goto err;
221 if (window2 > 1)
222 {
223 if (!BN_mod_mul_montgomery(d,val2[0],val2[0],mont,ctx)) goto err;
224
225 j=1<<(window2-1);
226 for (i=1; i<j; i++)
227 {
228 if(((val2[i] = BN_CTX_get(ctx)) == NULL) ||
229 !BN_mod_mul_montgomery(val2[i],val2[i-1],
230 d,mont,ctx))
231 goto err;
232 }
233 }
234
235
236 /* Now compute the power product, using independent windows. */
237 r_is_one=1;
238 wvalue1=0; /* The 'value' of the first window */
239 wvalue2=0; /* The 'value' of the second window */
240 wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
241 wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
242
243 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
244 for (b=bits-1; b>=0; b--)
245 {
246 if (!r_is_one)
247 {
248 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
249 goto err;
250 }
251
252 if (!wvalue1)
253 if (BN_is_bit_set(p1, b))
254 {
255 /* consider bits b-window1+1 .. b for this window */
256 i = b-window1+1;
257 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
258 i++;
259 wpos1 = i;
260 wvalue1 = 1;
261 for (i = b-1; i >= wpos1; i--)
262 {
263 wvalue1 <<= 1;
264 if (BN_is_bit_set(p1, i))
265 wvalue1++;
266 }
267 }
268
269 if (!wvalue2)
270 if (BN_is_bit_set(p2, b))
271 {
272 /* consider bits b-window2+1 .. b for this window */
273 i = b-window2+1;
274 while (!BN_is_bit_set(p2, i))
275 i++;
276 wpos2 = i;
277 wvalue2 = 1;
278 for (i = b-1; i >= wpos2; i--)
279 {
280 wvalue2 <<= 1;
281 if (BN_is_bit_set(p2, i))
282 wvalue2++;
283 }
284 }
285
286 if (wvalue1 && b == wpos1)
287 {
288 /* wvalue1 is odd and < 2^window1 */
289 if (!BN_mod_mul_montgomery(r,r,val1[wvalue1>>1],mont,ctx))
290 goto err;
291 wvalue1 = 0;
292 r_is_one = 0;
293 }
294
295 if (wvalue2 && b == wpos2)
296 {
297 /* wvalue2 is odd and < 2^window2 */
298 if (!BN_mod_mul_montgomery(r,r,val2[wvalue2>>1],mont,ctx))
299 goto err;
300 wvalue2 = 0;
301 r_is_one = 0;
302 }
303 }
304 BN_from_montgomery(rr,r,mont,ctx);
305 ret=1;
306err:
307 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
308 BN_CTX_end(ctx);
309 bn_check_top(rr);
310 return(ret);
311 }
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
deleted file mode 100644
index 4a352119ba..0000000000
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ /dev/null
@@ -1,654 +0,0 @@
1/* crypto/bn/bn_gcd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
116
117int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
118 {
119 BIGNUM *a,*b,*t;
120 int ret=0;
121
122 bn_check_top(in_a);
123 bn_check_top(in_b);
124
125 BN_CTX_start(ctx);
126 a = BN_CTX_get(ctx);
127 b = BN_CTX_get(ctx);
128 if (a == NULL || b == NULL) goto err;
129
130 if (BN_copy(a,in_a) == NULL) goto err;
131 if (BN_copy(b,in_b) == NULL) goto err;
132 a->neg = 0;
133 b->neg = 0;
134
135 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; }
136 t=euclid(a,b);
137 if (t == NULL) goto err;
138
139 if (BN_copy(r,t) == NULL) goto err;
140 ret=1;
141err:
142 BN_CTX_end(ctx);
143 bn_check_top(r);
144 return(ret);
145 }
146
147static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
148 {
149 BIGNUM *t;
150 int shifts=0;
151
152 bn_check_top(a);
153 bn_check_top(b);
154
155 /* 0 <= b <= a */
156 while (!BN_is_zero(b))
157 {
158 /* 0 < b <= a */
159
160 if (BN_is_odd(a))
161 {
162 if (BN_is_odd(b))
163 {
164 if (!BN_sub(a,a,b)) goto err;
165 if (!BN_rshift1(a,a)) goto err;
166 if (BN_cmp(a,b) < 0)
167 { t=a; a=b; b=t; }
168 }
169 else /* a odd - b even */
170 {
171 if (!BN_rshift1(b,b)) goto err;
172 if (BN_cmp(a,b) < 0)
173 { t=a; a=b; b=t; }
174 }
175 }
176 else /* a is even */
177 {
178 if (BN_is_odd(b))
179 {
180 if (!BN_rshift1(a,a)) goto err;
181 if (BN_cmp(a,b) < 0)
182 { t=a; a=b; b=t; }
183 }
184 else /* a even - b even */
185 {
186 if (!BN_rshift1(a,a)) goto err;
187 if (!BN_rshift1(b,b)) goto err;
188 shifts++;
189 }
190 }
191 /* 0 <= b <= a */
192 }
193
194 if (shifts)
195 {
196 if (!BN_lshift(a,a,shifts)) goto err;
197 }
198 bn_check_top(a);
199 return(a);
200err:
201 return(NULL);
202 }
203
204
205/* solves ax == 1 (mod n) */
206static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
207 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
208BIGNUM *BN_mod_inverse(BIGNUM *in,
209 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
210 {
211 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
212 BIGNUM *ret=NULL;
213 int sign;
214
215 if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0))
216 {
217 return BN_mod_inverse_no_branch(in, a, n, ctx);
218 }
219
220 bn_check_top(a);
221 bn_check_top(n);
222
223 BN_CTX_start(ctx);
224 A = BN_CTX_get(ctx);
225 B = BN_CTX_get(ctx);
226 X = BN_CTX_get(ctx);
227 D = BN_CTX_get(ctx);
228 M = BN_CTX_get(ctx);
229 Y = BN_CTX_get(ctx);
230 T = BN_CTX_get(ctx);
231 if (T == NULL) goto err;
232
233 if (in == NULL)
234 R=BN_new();
235 else
236 R=in;
237 if (R == NULL) goto err;
238
239 BN_one(X);
240 BN_zero(Y);
241 if (BN_copy(B,a) == NULL) goto err;
242 if (BN_copy(A,n) == NULL) goto err;
243 A->neg = 0;
244 if (B->neg || (BN_ucmp(B, A) >= 0))
245 {
246 if (!BN_nnmod(B, B, A, ctx)) goto err;
247 }
248 sign = -1;
249 /* From B = a mod |n|, A = |n| it follows that
250 *
251 * 0 <= B < A,
252 * -sign*X*a == B (mod |n|),
253 * sign*Y*a == A (mod |n|).
254 */
255
256 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
257 {
258 /* Binary inversion algorithm; requires odd modulus.
259 * This is faster than the general algorithm if the modulus
260 * is sufficiently small (about 400 .. 500 bits on 32-bit
261 * sytems, but much more on 64-bit systems) */
262 int shift;
263
264 while (!BN_is_zero(B))
265 {
266 /*
267 * 0 < B < |n|,
268 * 0 < A <= |n|,
269 * (1) -sign*X*a == B (mod |n|),
270 * (2) sign*Y*a == A (mod |n|)
271 */
272
273 /* Now divide B by the maximum possible power of two in the integers,
274 * and divide X by the same value mod |n|.
275 * When we're done, (1) still holds. */
276 shift = 0;
277 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
278 {
279 shift++;
280
281 if (BN_is_odd(X))
282 {
283 if (!BN_uadd(X, X, n)) goto err;
284 }
285 /* now X is even, so we can easily divide it by two */
286 if (!BN_rshift1(X, X)) goto err;
287 }
288 if (shift > 0)
289 {
290 if (!BN_rshift(B, B, shift)) goto err;
291 }
292
293
294 /* Same for A and Y. Afterwards, (2) still holds. */
295 shift = 0;
296 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
297 {
298 shift++;
299
300 if (BN_is_odd(Y))
301 {
302 if (!BN_uadd(Y, Y, n)) goto err;
303 }
304 /* now Y is even */
305 if (!BN_rshift1(Y, Y)) goto err;
306 }
307 if (shift > 0)
308 {
309 if (!BN_rshift(A, A, shift)) goto err;
310 }
311
312
313 /* We still have (1) and (2).
314 * Both A and B are odd.
315 * The following computations ensure that
316 *
317 * 0 <= B < |n|,
318 * 0 < A < |n|,
319 * (1) -sign*X*a == B (mod |n|),
320 * (2) sign*Y*a == A (mod |n|),
321 *
322 * and that either A or B is even in the next iteration.
323 */
324 if (BN_ucmp(B, A) >= 0)
325 {
326 /* -sign*(X + Y)*a == B - A (mod |n|) */
327 if (!BN_uadd(X, X, Y)) goto err;
328 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
329 * actually makes the algorithm slower */
330 if (!BN_usub(B, B, A)) goto err;
331 }
332 else
333 {
334 /* sign*(X + Y)*a == A - B (mod |n|) */
335 if (!BN_uadd(Y, Y, X)) goto err;
336 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
337 if (!BN_usub(A, A, B)) goto err;
338 }
339 }
340 }
341 else
342 {
343 /* general inversion algorithm */
344
345 while (!BN_is_zero(B))
346 {
347 BIGNUM *tmp;
348
349 /*
350 * 0 < B < A,
351 * (*) -sign*X*a == B (mod |n|),
352 * sign*Y*a == A (mod |n|)
353 */
354
355 /* (D, M) := (A/B, A%B) ... */
356 if (BN_num_bits(A) == BN_num_bits(B))
357 {
358 if (!BN_one(D)) goto err;
359 if (!BN_sub(M,A,B)) goto err;
360 }
361 else if (BN_num_bits(A) == BN_num_bits(B) + 1)
362 {
363 /* A/B is 1, 2, or 3 */
364 if (!BN_lshift1(T,B)) goto err;
365 if (BN_ucmp(A,T) < 0)
366 {
367 /* A < 2*B, so D=1 */
368 if (!BN_one(D)) goto err;
369 if (!BN_sub(M,A,B)) goto err;
370 }
371 else
372 {
373 /* A >= 2*B, so D=2 or D=3 */
374 if (!BN_sub(M,A,T)) goto err;
375 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
376 if (BN_ucmp(A,D) < 0)
377 {
378 /* A < 3*B, so D=2 */
379 if (!BN_set_word(D,2)) goto err;
380 /* M (= A - 2*B) already has the correct value */
381 }
382 else
383 {
384 /* only D=3 remains */
385 if (!BN_set_word(D,3)) goto err;
386 /* currently M = A - 2*B, but we need M = A - 3*B */
387 if (!BN_sub(M,M,B)) goto err;
388 }
389 }
390 }
391 else
392 {
393 if (!BN_div(D,M,A,B,ctx)) goto err;
394 }
395
396 /* Now
397 * A = D*B + M;
398 * thus we have
399 * (**) sign*Y*a == D*B + M (mod |n|).
400 */
401
402 tmp=A; /* keep the BIGNUM object, the value does not matter */
403
404 /* (A, B) := (B, A mod B) ... */
405 A=B;
406 B=M;
407 /* ... so we have 0 <= B < A again */
408
409 /* Since the former M is now B and the former B is now A,
410 * (**) translates into
411 * sign*Y*a == D*A + B (mod |n|),
412 * i.e.
413 * sign*Y*a - D*A == B (mod |n|).
414 * Similarly, (*) translates into
415 * -sign*X*a == A (mod |n|).
416 *
417 * Thus,
418 * sign*Y*a + D*sign*X*a == B (mod |n|),
419 * i.e.
420 * sign*(Y + D*X)*a == B (mod |n|).
421 *
422 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
423 * -sign*X*a == B (mod |n|),
424 * sign*Y*a == A (mod |n|).
425 * Note that X and Y stay non-negative all the time.
426 */
427
428 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
429 if (BN_is_one(D))
430 {
431 if (!BN_add(tmp,X,Y)) goto err;
432 }
433 else
434 {
435 if (BN_is_word(D,2))
436 {
437 if (!BN_lshift1(tmp,X)) goto err;
438 }
439 else if (BN_is_word(D,4))
440 {
441 if (!BN_lshift(tmp,X,2)) goto err;
442 }
443 else if (D->top == 1)
444 {
445 if (!BN_copy(tmp,X)) goto err;
446 if (!BN_mul_word(tmp,D->d[0])) goto err;
447 }
448 else
449 {
450 if (!BN_mul(tmp,D,X,ctx)) goto err;
451 }
452 if (!BN_add(tmp,tmp,Y)) goto err;
453 }
454
455 M=Y; /* keep the BIGNUM object, the value does not matter */
456 Y=X;
457 X=tmp;
458 sign = -sign;
459 }
460 }
461
462 /*
463 * The while loop (Euclid's algorithm) ends when
464 * A == gcd(a,n);
465 * we have
466 * sign*Y*a == A (mod |n|),
467 * where Y is non-negative.
468 */
469
470 if (sign < 0)
471 {
472 if (!BN_sub(Y,n,Y)) goto err;
473 }
474 /* Now Y*a == A (mod |n|). */
475
476
477 if (BN_is_one(A))
478 {
479 /* Y*a == 1 (mod |n|) */
480 if (!Y->neg && BN_ucmp(Y,n) < 0)
481 {
482 if (!BN_copy(R,Y)) goto err;
483 }
484 else
485 {
486 if (!BN_nnmod(R,Y,n,ctx)) goto err;
487 }
488 }
489 else
490 {
491 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
492 goto err;
493 }
494 ret=R;
495err:
496 if ((ret == NULL) && (in == NULL)) BN_free(R);
497 BN_CTX_end(ctx);
498 bn_check_top(ret);
499 return(ret);
500 }
501
502
503/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse.
504 * It does not contain branches that may leak sensitive information.
505 */
506static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
507 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
508 {
509 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
510 BIGNUM local_A, local_B;
511 BIGNUM *pA, *pB;
512 BIGNUM *ret=NULL;
513 int sign;
514
515 bn_check_top(a);
516 bn_check_top(n);
517
518 BN_CTX_start(ctx);
519 A = BN_CTX_get(ctx);
520 B = BN_CTX_get(ctx);
521 X = BN_CTX_get(ctx);
522 D = BN_CTX_get(ctx);
523 M = BN_CTX_get(ctx);
524 Y = BN_CTX_get(ctx);
525 T = BN_CTX_get(ctx);
526 if (T == NULL) goto err;
527
528 if (in == NULL)
529 R=BN_new();
530 else
531 R=in;
532 if (R == NULL) goto err;
533
534 BN_one(X);
535 BN_zero(Y);
536 if (BN_copy(B,a) == NULL) goto err;
537 if (BN_copy(A,n) == NULL) goto err;
538 A->neg = 0;
539
540 if (B->neg || (BN_ucmp(B, A) >= 0))
541 {
542 /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
543 * BN_div_no_branch will be called eventually.
544 */
545 pB = &local_B;
546 BN_with_flags(pB, B, BN_FLG_CONSTTIME);
547 if (!BN_nnmod(B, pB, A, ctx)) goto err;
548 }
549 sign = -1;
550 /* From B = a mod |n|, A = |n| it follows that
551 *
552 * 0 <= B < A,
553 * -sign*X*a == B (mod |n|),
554 * sign*Y*a == A (mod |n|).
555 */
556
557 while (!BN_is_zero(B))
558 {
559 BIGNUM *tmp;
560
561 /*
562 * 0 < B < A,
563 * (*) -sign*X*a == B (mod |n|),
564 * sign*Y*a == A (mod |n|)
565 */
566
567 /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
568 * BN_div_no_branch will be called eventually.
569 */
570 pA = &local_A;
571 BN_with_flags(pA, A, BN_FLG_CONSTTIME);
572
573 /* (D, M) := (A/B, A%B) ... */
574 if (!BN_div(D,M,pA,B,ctx)) goto err;
575
576 /* Now
577 * A = D*B + M;
578 * thus we have
579 * (**) sign*Y*a == D*B + M (mod |n|).
580 */
581
582 tmp=A; /* keep the BIGNUM object, the value does not matter */
583
584 /* (A, B) := (B, A mod B) ... */
585 A=B;
586 B=M;
587 /* ... so we have 0 <= B < A again */
588
589 /* Since the former M is now B and the former B is now A,
590 * (**) translates into
591 * sign*Y*a == D*A + B (mod |n|),
592 * i.e.
593 * sign*Y*a - D*A == B (mod |n|).
594 * Similarly, (*) translates into
595 * -sign*X*a == A (mod |n|).
596 *
597 * Thus,
598 * sign*Y*a + D*sign*X*a == B (mod |n|),
599 * i.e.
600 * sign*(Y + D*X)*a == B (mod |n|).
601 *
602 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
603 * -sign*X*a == B (mod |n|),
604 * sign*Y*a == A (mod |n|).
605 * Note that X and Y stay non-negative all the time.
606 */
607
608 if (!BN_mul(tmp,D,X,ctx)) goto err;
609 if (!BN_add(tmp,tmp,Y)) goto err;
610
611 M=Y; /* keep the BIGNUM object, the value does not matter */
612 Y=X;
613 X=tmp;
614 sign = -sign;
615 }
616
617 /*
618 * The while loop (Euclid's algorithm) ends when
619 * A == gcd(a,n);
620 * we have
621 * sign*Y*a == A (mod |n|),
622 * where Y is non-negative.
623 */
624
625 if (sign < 0)
626 {
627 if (!BN_sub(Y,n,Y)) goto err;
628 }
629 /* Now Y*a == A (mod |n|). */
630
631 if (BN_is_one(A))
632 {
633 /* Y*a == 1 (mod |n|) */
634 if (!Y->neg && BN_ucmp(Y,n) < 0)
635 {
636 if (!BN_copy(R,Y)) goto err;
637 }
638 else
639 {
640 if (!BN_nnmod(R,Y,n,ctx)) goto err;
641 }
642 }
643 else
644 {
645 BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH,BN_R_NO_INVERSE);
646 goto err;
647 }
648 ret=R;
649err:
650 if ((ret == NULL) && (in == NULL)) BN_free(R);
651 BN_CTX_end(ctx);
652 bn_check_top(ret);
653 return(ret);
654 }
diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c
deleted file mode 100644
index 306f029f27..0000000000
--- a/src/lib/libcrypto/bn/bn_gf2m.c
+++ /dev/null
@@ -1,1095 +0,0 @@
1/* crypto/bn/bn_gf2m.c */
2/* ====================================================================
3 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
4 *
5 * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
6 * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
7 * to the OpenSSL project.
8 *
9 * The ECC Code is licensed pursuant to the OpenSSL open source
10 * license provided below.
11 *
12 * In addition, Sun covenants to all licensees who provide a reciprocal
13 * covenant with respect to their own patents if any, not to sue under
14 * current and future patent claims necessarily infringed by the making,
15 * using, practicing, selling, offering for sale and/or otherwise
16 * disposing of the ECC Code as delivered hereunder (or portions thereof),
17 * provided that such covenant shall not apply:
18 * 1) for code that a licensee deletes from the ECC Code;
19 * 2) separates from the ECC Code; or
20 * 3) for infringements caused by:
21 * i) the modification of the ECC Code or
22 * ii) the combination of the ECC Code with other software or
23 * devices where such combination causes the infringement.
24 *
25 * The software is originally written by Sheueling Chang Shantz and
26 * Douglas Stebila of Sun Microsystems Laboratories.
27 *
28 */
29
30/* NOTE: This file is licensed pursuant to the OpenSSL license below
31 * and may be modified; but after modifications, the above covenant
32 * may no longer apply! In such cases, the corresponding paragraph
33 * ["In addition, Sun covenants ... causes the infringement."] and
34 * this note can be edited out; but please keep the Sun copyright
35 * notice and attribution. */
36
37/* ====================================================================
38 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 *
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 *
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in
49 * the documentation and/or other materials provided with the
50 * distribution.
51 *
52 * 3. All advertising materials mentioning features or use of this
53 * software must display the following acknowledgment:
54 * "This product includes software developed by the OpenSSL Project
55 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
56 *
57 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
58 * endorse or promote products derived from this software without
59 * prior written permission. For written permission, please contact
60 * openssl-core@openssl.org.
61 *
62 * 5. Products derived from this software may not be called "OpenSSL"
63 * nor may "OpenSSL" appear in their names without prior written
64 * permission of the OpenSSL Project.
65 *
66 * 6. Redistributions of any form whatsoever must retain the following
67 * acknowledgment:
68 * "This product includes software developed by the OpenSSL Project
69 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
70 *
71 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
72 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
74 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
75 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
76 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
77 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
78 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
79 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
80 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
81 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
82 * OF THE POSSIBILITY OF SUCH DAMAGE.
83 * ====================================================================
84 *
85 * This product includes cryptographic software written by Eric Young
86 * (eay@cryptsoft.com). This product includes software written by Tim
87 * Hudson (tjh@cryptsoft.com).
88 *
89 */
90
91#include <assert.h>
92#include <limits.h>
93#include <stdio.h>
94#include "cryptlib.h"
95#include "bn_lcl.h"
96
97/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
98#define MAX_ITERATIONS 50
99
100static const BN_ULONG SQR_tb[16] =
101 { 0, 1, 4, 5, 16, 17, 20, 21,
102 64, 65, 68, 69, 80, 81, 84, 85 };
103/* Platform-specific macros to accelerate squaring. */
104#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
105#define SQR1(w) \
106 SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \
107 SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \
108 SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \
109 SQR_tb[(w) >> 36 & 0xF] << 8 | SQR_tb[(w) >> 32 & 0xF]
110#define SQR0(w) \
111 SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \
112 SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \
113 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \
114 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
115#endif
116#ifdef THIRTY_TWO_BIT
117#define SQR1(w) \
118 SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \
119 SQR_tb[(w) >> 20 & 0xF] << 8 | SQR_tb[(w) >> 16 & 0xF]
120#define SQR0(w) \
121 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \
122 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
123#endif
124#ifdef SIXTEEN_BIT
125#define SQR1(w) \
126 SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF]
127#define SQR0(w) \
128 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
129#endif
130#ifdef EIGHT_BIT
131#define SQR1(w) \
132 SQR_tb[(w) >> 4 & 0xF]
133#define SQR0(w) \
134 SQR_tb[(w) & 15]
135#endif
136
137/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
138 * result is a polynomial r with degree < 2 * BN_BITS - 1
139 * The caller MUST ensure that the variables have the right amount
140 * of space allocated.
141 */
142#ifdef EIGHT_BIT
143static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
144 {
145 register BN_ULONG h, l, s;
146 BN_ULONG tab[4], top1b = a >> 7;
147 register BN_ULONG a1, a2;
148
149 a1 = a & (0x7F); a2 = a1 << 1;
150
151 tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
152
153 s = tab[b & 0x3]; l = s;
154 s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6;
155 s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4;
156 s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2;
157
158 /* compensate for the top bit of a */
159
160 if (top1b & 01) { l ^= b << 7; h ^= b >> 1; }
161
162 *r1 = h; *r0 = l;
163 }
164#endif
165#ifdef SIXTEEN_BIT
166static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
167 {
168 register BN_ULONG h, l, s;
169 BN_ULONG tab[4], top1b = a >> 15;
170 register BN_ULONG a1, a2;
171
172 a1 = a & (0x7FFF); a2 = a1 << 1;
173
174 tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
175
176 s = tab[b & 0x3]; l = s;
177 s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14;
178 s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12;
179 s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10;
180 s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8;
181 s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6;
182 s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4;
183 s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2;
184
185 /* compensate for the top bit of a */
186
187 if (top1b & 01) { l ^= b << 15; h ^= b >> 1; }
188
189 *r1 = h; *r0 = l;
190 }
191#endif
192#ifdef THIRTY_TWO_BIT
193static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
194 {
195 register BN_ULONG h, l, s;
196 BN_ULONG tab[8], top2b = a >> 30;
197 register BN_ULONG a1, a2, a4;
198
199 a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1;
200
201 tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
202 tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4;
203
204 s = tab[b & 0x7]; l = s;
205 s = tab[b >> 3 & 0x7]; l ^= s << 3; h = s >> 29;
206 s = tab[b >> 6 & 0x7]; l ^= s << 6; h ^= s >> 26;
207 s = tab[b >> 9 & 0x7]; l ^= s << 9; h ^= s >> 23;
208 s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20;
209 s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17;
210 s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14;
211 s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11;
212 s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >> 8;
213 s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >> 5;
214 s = tab[b >> 30 ]; l ^= s << 30; h ^= s >> 2;
215
216 /* compensate for the top two bits of a */
217
218 if (top2b & 01) { l ^= b << 30; h ^= b >> 2; }
219 if (top2b & 02) { l ^= b << 31; h ^= b >> 1; }
220
221 *r1 = h; *r0 = l;
222 }
223#endif
224#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
225static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
226 {
227 register BN_ULONG h, l, s;
228 BN_ULONG tab[16], top3b = a >> 61;
229 register BN_ULONG a1, a2, a4, a8;
230
231 a1 = a & (0x1FFFFFFFFFFFFFFFULL); a2 = a1 << 1; a4 = a2 << 1; a8 = a4 << 1;
232
233 tab[ 0] = 0; tab[ 1] = a1; tab[ 2] = a2; tab[ 3] = a1^a2;
234 tab[ 4] = a4; tab[ 5] = a1^a4; tab[ 6] = a2^a4; tab[ 7] = a1^a2^a4;
235 tab[ 8] = a8; tab[ 9] = a1^a8; tab[10] = a2^a8; tab[11] = a1^a2^a8;
236 tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8;
237
238 s = tab[b & 0xF]; l = s;
239 s = tab[b >> 4 & 0xF]; l ^= s << 4; h = s >> 60;
240 s = tab[b >> 8 & 0xF]; l ^= s << 8; h ^= s >> 56;
241 s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52;
242 s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48;
243 s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44;
244 s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40;
245 s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36;
246 s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32;
247 s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28;
248 s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24;
249 s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20;
250 s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16;
251 s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12;
252 s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >> 8;
253 s = tab[b >> 60 ]; l ^= s << 60; h ^= s >> 4;
254
255 /* compensate for the top three bits of a */
256
257 if (top3b & 01) { l ^= b << 61; h ^= b >> 3; }
258 if (top3b & 02) { l ^= b << 62; h ^= b >> 2; }
259 if (top3b & 04) { l ^= b << 63; h ^= b >> 1; }
260
261 *r1 = h; *r0 = l;
262 }
263#endif
264
265/* Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1,
266 * result is a polynomial r with degree < 4 * BN_BITS2 - 1
267 * The caller MUST ensure that the variables have the right amount
268 * of space allocated.
269 */
270static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, const BN_ULONG b1, const BN_ULONG b0)
271 {
272 BN_ULONG m1, m0;
273 /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
274 bn_GF2m_mul_1x1(r+3, r+2, a1, b1);
275 bn_GF2m_mul_1x1(r+1, r, a0, b0);
276 bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
277 /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
278 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
279 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
280 }
281
282
283/* Add polynomials a and b and store result in r; r could be a or b, a and b
284 * could be equal; r is the bitwise XOR of a and b.
285 */
286int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
287 {
288 int i;
289 const BIGNUM *at, *bt;
290
291 bn_check_top(a);
292 bn_check_top(b);
293
294 if (a->top < b->top) { at = b; bt = a; }
295 else { at = a; bt = b; }
296
297 bn_wexpand(r, at->top);
298
299 for (i = 0; i < bt->top; i++)
300 {
301 r->d[i] = at->d[i] ^ bt->d[i];
302 }
303 for (; i < at->top; i++)
304 {
305 r->d[i] = at->d[i];
306 }
307
308 r->top = at->top;
309 bn_correct_top(r);
310
311 return 1;
312 }
313
314
315/* Some functions allow for representation of the irreducible polynomials
316 * as an int[], say p. The irreducible f(t) is then of the form:
317 * t^p[0] + t^p[1] + ... + t^p[k]
318 * where m = p[0] > p[1] > ... > p[k] = 0.
319 */
320
321
322/* Performs modular reduction of a and store result in r. r could be a. */
323int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
324 {
325 int j, k;
326 int n, dN, d0, d1;
327 BN_ULONG zz, *z;
328
329 bn_check_top(a);
330
331 if (!p[0])
332 {
333 /* reduction mod 1 => return 0 */
334 BN_zero(r);
335 return 1;
336 }
337
338 /* Since the algorithm does reduction in the r value, if a != r, copy
339 * the contents of a into r so we can do reduction in r.
340 */
341 if (a != r)
342 {
343 if (!bn_wexpand(r, a->top)) return 0;
344 for (j = 0; j < a->top; j++)
345 {
346 r->d[j] = a->d[j];
347 }
348 r->top = a->top;
349 }
350 z = r->d;
351
352 /* start reduction */
353 dN = p[0] / BN_BITS2;
354 for (j = r->top - 1; j > dN;)
355 {
356 zz = z[j];
357 if (z[j] == 0) { j--; continue; }
358 z[j] = 0;
359
360 for (k = 1; p[k] != 0; k++)
361 {
362 /* reducing component t^p[k] */
363 n = p[0] - p[k];
364 d0 = n % BN_BITS2; d1 = BN_BITS2 - d0;
365 n /= BN_BITS2;
366 z[j-n] ^= (zz>>d0);
367 if (d0) z[j-n-1] ^= (zz<<d1);
368 }
369
370 /* reducing component t^0 */
371 n = dN;
372 d0 = p[0] % BN_BITS2;
373 d1 = BN_BITS2 - d0;
374 z[j-n] ^= (zz >> d0);
375 if (d0) z[j-n-1] ^= (zz << d1);
376 }
377
378 /* final round of reduction */
379 while (j == dN)
380 {
381
382 d0 = p[0] % BN_BITS2;
383 zz = z[dN] >> d0;
384 if (zz == 0) break;
385 d1 = BN_BITS2 - d0;
386
387 /* clear up the top d1 bits */
388 if (d0)
389 z[dN] = (z[dN] << d1) >> d1;
390 else
391 z[dN] = 0;
392 z[0] ^= zz; /* reduction t^0 component */
393
394 for (k = 1; p[k] != 0; k++)
395 {
396 BN_ULONG tmp_ulong;
397
398 /* reducing component t^p[k]*/
399 n = p[k] / BN_BITS2;
400 d0 = p[k] % BN_BITS2;
401 d1 = BN_BITS2 - d0;
402 z[n] ^= (zz << d0);
403 tmp_ulong = zz >> d1;
404 if (d0 && tmp_ulong)
405 z[n+1] ^= tmp_ulong;
406 }
407
408
409 }
410
411 bn_correct_top(r);
412 return 1;
413 }
414
415/* Performs modular reduction of a by p and store result in r. r could be a.
416 *
417 * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper
418 * function is only provided for convenience; for best performance, use the
419 * BN_GF2m_mod_arr function.
420 */
421int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
422 {
423 int ret = 0;
424 const int max = BN_num_bits(p);
425 unsigned int *arr=NULL;
426 bn_check_top(a);
427 bn_check_top(p);
428 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
429 ret = BN_GF2m_poly2arr(p, arr, max);
430 if (!ret || ret > max)
431 {
432 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
433 goto err;
434 }
435 ret = BN_GF2m_mod_arr(r, a, arr);
436 bn_check_top(r);
437err:
438 if (arr) OPENSSL_free(arr);
439 return ret;
440 }
441
442
443/* Compute the product of two polynomials a and b, reduce modulo p, and store
444 * the result in r. r could be a or b; a could be b.
445 */
446int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
447 {
448 int zlen, i, j, k, ret = 0;
449 BIGNUM *s;
450 BN_ULONG x1, x0, y1, y0, zz[4];
451
452 bn_check_top(a);
453 bn_check_top(b);
454
455 if (a == b)
456 {
457 return BN_GF2m_mod_sqr_arr(r, a, p, ctx);
458 }
459
460 BN_CTX_start(ctx);
461 if ((s = BN_CTX_get(ctx)) == NULL) goto err;
462
463 zlen = a->top + b->top + 4;
464 if (!bn_wexpand(s, zlen)) goto err;
465 s->top = zlen;
466
467 for (i = 0; i < zlen; i++) s->d[i] = 0;
468
469 for (j = 0; j < b->top; j += 2)
470 {
471 y0 = b->d[j];
472 y1 = ((j+1) == b->top) ? 0 : b->d[j+1];
473 for (i = 0; i < a->top; i += 2)
474 {
475 x0 = a->d[i];
476 x1 = ((i+1) == a->top) ? 0 : a->d[i+1];
477 bn_GF2m_mul_2x2(zz, x1, x0, y1, y0);
478 for (k = 0; k < 4; k++) s->d[i+j+k] ^= zz[k];
479 }
480 }
481
482 bn_correct_top(s);
483 if (BN_GF2m_mod_arr(r, s, p))
484 ret = 1;
485 bn_check_top(r);
486
487err:
488 BN_CTX_end(ctx);
489 return ret;
490 }
491
492/* Compute the product of two polynomials a and b, reduce modulo p, and store
493 * the result in r. r could be a or b; a could equal b.
494 *
495 * This function calls down to the BN_GF2m_mod_mul_arr implementation; this wrapper
496 * function is only provided for convenience; for best performance, use the
497 * BN_GF2m_mod_mul_arr function.
498 */
499int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
500 {
501 int ret = 0;
502 const int max = BN_num_bits(p);
503 unsigned int *arr=NULL;
504 bn_check_top(a);
505 bn_check_top(b);
506 bn_check_top(p);
507 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
508 ret = BN_GF2m_poly2arr(p, arr, max);
509 if (!ret || ret > max)
510 {
511 BNerr(BN_F_BN_GF2M_MOD_MUL,BN_R_INVALID_LENGTH);
512 goto err;
513 }
514 ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx);
515 bn_check_top(r);
516err:
517 if (arr) OPENSSL_free(arr);
518 return ret;
519 }
520
521
522/* Square a, reduce the result mod p, and store it in a. r could be a. */
523int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
524 {
525 int i, ret = 0;
526 BIGNUM *s;
527
528 bn_check_top(a);
529 BN_CTX_start(ctx);
530 if ((s = BN_CTX_get(ctx)) == NULL) return 0;
531 if (!bn_wexpand(s, 2 * a->top)) goto err;
532
533 for (i = a->top - 1; i >= 0; i--)
534 {
535 s->d[2*i+1] = SQR1(a->d[i]);
536 s->d[2*i ] = SQR0(a->d[i]);
537 }
538
539 s->top = 2 * a->top;
540 bn_correct_top(s);
541 if (!BN_GF2m_mod_arr(r, s, p)) goto err;
542 bn_check_top(r);
543 ret = 1;
544err:
545 BN_CTX_end(ctx);
546 return ret;
547 }
548
549/* Square a, reduce the result mod p, and store it in a. r could be a.
550 *
551 * This function calls down to the BN_GF2m_mod_sqr_arr implementation; this wrapper
552 * function is only provided for convenience; for best performance, use the
553 * BN_GF2m_mod_sqr_arr function.
554 */
555int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
556 {
557 int ret = 0;
558 const int max = BN_num_bits(p);
559 unsigned int *arr=NULL;
560
561 bn_check_top(a);
562 bn_check_top(p);
563 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
564 ret = BN_GF2m_poly2arr(p, arr, max);
565 if (!ret || ret > max)
566 {
567 BNerr(BN_F_BN_GF2M_MOD_SQR,BN_R_INVALID_LENGTH);
568 goto err;
569 }
570 ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx);
571 bn_check_top(r);
572err:
573 if (arr) OPENSSL_free(arr);
574 return ret;
575 }
576
577
578/* Invert a, reduce modulo p, and store the result in r. r could be a.
579 * Uses Modified Almost Inverse Algorithm (Algorithm 10) from
580 * Hankerson, D., Hernandez, J.L., and Menezes, A. "Software Implementation
581 * of Elliptic Curve Cryptography Over Binary Fields".
582 */
583int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
584 {
585 BIGNUM *b, *c, *u, *v, *tmp;
586 int ret = 0;
587
588 bn_check_top(a);
589 bn_check_top(p);
590
591 BN_CTX_start(ctx);
592
593 b = BN_CTX_get(ctx);
594 c = BN_CTX_get(ctx);
595 u = BN_CTX_get(ctx);
596 v = BN_CTX_get(ctx);
597 if (v == NULL) goto err;
598
599 if (!BN_one(b)) goto err;
600 if (!BN_GF2m_mod(u, a, p)) goto err;
601 if (!BN_copy(v, p)) goto err;
602
603 if (BN_is_zero(u)) goto err;
604
605 while (1)
606 {
607 while (!BN_is_odd(u))
608 {
609 if (!BN_rshift1(u, u)) goto err;
610 if (BN_is_odd(b))
611 {
612 if (!BN_GF2m_add(b, b, p)) goto err;
613 }
614 if (!BN_rshift1(b, b)) goto err;
615 }
616
617 if (BN_abs_is_word(u, 1)) break;
618
619 if (BN_num_bits(u) < BN_num_bits(v))
620 {
621 tmp = u; u = v; v = tmp;
622 tmp = b; b = c; c = tmp;
623 }
624
625 if (!BN_GF2m_add(u, u, v)) goto err;
626 if (!BN_GF2m_add(b, b, c)) goto err;
627 }
628
629
630 if (!BN_copy(r, b)) goto err;
631 bn_check_top(r);
632 ret = 1;
633
634err:
635 BN_CTX_end(ctx);
636 return ret;
637 }
638
639/* Invert xx, reduce modulo p, and store the result in r. r could be xx.
640 *
641 * This function calls down to the BN_GF2m_mod_inv implementation; this wrapper
642 * function is only provided for convenience; for best performance, use the
643 * BN_GF2m_mod_inv function.
644 */
645int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
646 {
647 BIGNUM *field;
648 int ret = 0;
649
650 bn_check_top(xx);
651 BN_CTX_start(ctx);
652 if ((field = BN_CTX_get(ctx)) == NULL) goto err;
653 if (!BN_GF2m_arr2poly(p, field)) goto err;
654
655 ret = BN_GF2m_mod_inv(r, xx, field, ctx);
656 bn_check_top(r);
657
658err:
659 BN_CTX_end(ctx);
660 return ret;
661 }
662
663
664#ifndef OPENSSL_SUN_GF2M_DIV
665/* Divide y by x, reduce modulo p, and store the result in r. r could be x
666 * or y, x could equal y.
667 */
668int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx)
669 {
670 BIGNUM *xinv = NULL;
671 int ret = 0;
672
673 bn_check_top(y);
674 bn_check_top(x);
675 bn_check_top(p);
676
677 BN_CTX_start(ctx);
678 xinv = BN_CTX_get(ctx);
679 if (xinv == NULL) goto err;
680
681 if (!BN_GF2m_mod_inv(xinv, x, p, ctx)) goto err;
682 if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx)) goto err;
683 bn_check_top(r);
684 ret = 1;
685
686err:
687 BN_CTX_end(ctx);
688 return ret;
689 }
690#else
691/* Divide y by x, reduce modulo p, and store the result in r. r could be x
692 * or y, x could equal y.
693 * Uses algorithm Modular_Division_GF(2^m) from
694 * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
695 * the Great Divide".
696 */
697int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx)
698 {
699 BIGNUM *a, *b, *u, *v;
700 int ret = 0;
701
702 bn_check_top(y);
703 bn_check_top(x);
704 bn_check_top(p);
705
706 BN_CTX_start(ctx);
707
708 a = BN_CTX_get(ctx);
709 b = BN_CTX_get(ctx);
710 u = BN_CTX_get(ctx);
711 v = BN_CTX_get(ctx);
712 if (v == NULL) goto err;
713
714 /* reduce x and y mod p */
715 if (!BN_GF2m_mod(u, y, p)) goto err;
716 if (!BN_GF2m_mod(a, x, p)) goto err;
717 if (!BN_copy(b, p)) goto err;
718
719 while (!BN_is_odd(a))
720 {
721 if (!BN_rshift1(a, a)) goto err;
722 if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err;
723 if (!BN_rshift1(u, u)) goto err;
724 }
725
726 do
727 {
728 if (BN_GF2m_cmp(b, a) > 0)
729 {
730 if (!BN_GF2m_add(b, b, a)) goto err;
731 if (!BN_GF2m_add(v, v, u)) goto err;
732 do
733 {
734 if (!BN_rshift1(b, b)) goto err;
735 if (BN_is_odd(v)) if (!BN_GF2m_add(v, v, p)) goto err;
736 if (!BN_rshift1(v, v)) goto err;
737 } while (!BN_is_odd(b));
738 }
739 else if (BN_abs_is_word(a, 1))
740 break;
741 else
742 {
743 if (!BN_GF2m_add(a, a, b)) goto err;
744 if (!BN_GF2m_add(u, u, v)) goto err;
745 do
746 {
747 if (!BN_rshift1(a, a)) goto err;
748 if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err;
749 if (!BN_rshift1(u, u)) goto err;
750 } while (!BN_is_odd(a));
751 }
752 } while (1);
753
754 if (!BN_copy(r, u)) goto err;
755 bn_check_top(r);
756 ret = 1;
757
758err:
759 BN_CTX_end(ctx);
760 return ret;
761 }
762#endif
763
764/* Divide yy by xx, reduce modulo p, and store the result in r. r could be xx
765 * or yy, xx could equal yy.
766 *
767 * This function calls down to the BN_GF2m_mod_div implementation; this wrapper
768 * function is only provided for convenience; for best performance, use the
769 * BN_GF2m_mod_div function.
770 */
771int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
772 {
773 BIGNUM *field;
774 int ret = 0;
775
776 bn_check_top(yy);
777 bn_check_top(xx);
778
779 BN_CTX_start(ctx);
780 if ((field = BN_CTX_get(ctx)) == NULL) goto err;
781 if (!BN_GF2m_arr2poly(p, field)) goto err;
782
783 ret = BN_GF2m_mod_div(r, yy, xx, field, ctx);
784 bn_check_top(r);
785
786err:
787 BN_CTX_end(ctx);
788 return ret;
789 }
790
791
792/* Compute the bth power of a, reduce modulo p, and store
793 * the result in r. r could be a.
794 * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
795 */
796int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
797 {
798 int ret = 0, i, n;
799 BIGNUM *u;
800
801 bn_check_top(a);
802 bn_check_top(b);
803
804 if (BN_is_zero(b))
805 return(BN_one(r));
806
807 if (BN_abs_is_word(b, 1))
808 return (BN_copy(r, a) != NULL);
809
810 BN_CTX_start(ctx);
811 if ((u = BN_CTX_get(ctx)) == NULL) goto err;
812
813 if (!BN_GF2m_mod_arr(u, a, p)) goto err;
814
815 n = BN_num_bits(b) - 1;
816 for (i = n - 1; i >= 0; i--)
817 {
818 if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx)) goto err;
819 if (BN_is_bit_set(b, i))
820 {
821 if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx)) goto err;
822 }
823 }
824 if (!BN_copy(r, u)) goto err;
825 bn_check_top(r);
826 ret = 1;
827err:
828 BN_CTX_end(ctx);
829 return ret;
830 }
831
832/* Compute the bth power of a, reduce modulo p, and store
833 * the result in r. r could be a.
834 *
835 * This function calls down to the BN_GF2m_mod_exp_arr implementation; this wrapper
836 * function is only provided for convenience; for best performance, use the
837 * BN_GF2m_mod_exp_arr function.
838 */
839int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
840 {
841 int ret = 0;
842 const int max = BN_num_bits(p);
843 unsigned int *arr=NULL;
844 bn_check_top(a);
845 bn_check_top(b);
846 bn_check_top(p);
847 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
848 ret = BN_GF2m_poly2arr(p, arr, max);
849 if (!ret || ret > max)
850 {
851 BNerr(BN_F_BN_GF2M_MOD_EXP,BN_R_INVALID_LENGTH);
852 goto err;
853 }
854 ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx);
855 bn_check_top(r);
856err:
857 if (arr) OPENSSL_free(arr);
858 return ret;
859 }
860
861/* Compute the square root of a, reduce modulo p, and store
862 * the result in r. r could be a.
863 * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
864 */
865int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
866 {
867 int ret = 0;
868 BIGNUM *u;
869
870 bn_check_top(a);
871
872 if (!p[0])
873 {
874 /* reduction mod 1 => return 0 */
875 BN_zero(r);
876 return 1;
877 }
878
879 BN_CTX_start(ctx);
880 if ((u = BN_CTX_get(ctx)) == NULL) goto err;
881
882 if (!BN_set_bit(u, p[0] - 1)) goto err;
883 ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx);
884 bn_check_top(r);
885
886err:
887 BN_CTX_end(ctx);
888 return ret;
889 }
890
891/* Compute the square root of a, reduce modulo p, and store
892 * the result in r. r could be a.
893 *
894 * This function calls down to the BN_GF2m_mod_sqrt_arr implementation; this wrapper
895 * function is only provided for convenience; for best performance, use the
896 * BN_GF2m_mod_sqrt_arr function.
897 */
898int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
899 {
900 int ret = 0;
901 const int max = BN_num_bits(p);
902 unsigned int *arr=NULL;
903 bn_check_top(a);
904 bn_check_top(p);
905 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
906 ret = BN_GF2m_poly2arr(p, arr, max);
907 if (!ret || ret > max)
908 {
909 BNerr(BN_F_BN_GF2M_MOD_SQRT,BN_R_INVALID_LENGTH);
910 goto err;
911 }
912 ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx);
913 bn_check_top(r);
914err:
915 if (arr) OPENSSL_free(arr);
916 return ret;
917 }
918
919/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0.
920 * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
921 */
922int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx)
923 {
924 int ret = 0, count = 0;
925 unsigned int j;
926 BIGNUM *a, *z, *rho, *w, *w2, *tmp;
927
928 bn_check_top(a_);
929
930 if (!p[0])
931 {
932 /* reduction mod 1 => return 0 */
933 BN_zero(r);
934 return 1;
935 }
936
937 BN_CTX_start(ctx);
938 a = BN_CTX_get(ctx);
939 z = BN_CTX_get(ctx);
940 w = BN_CTX_get(ctx);
941 if (w == NULL) goto err;
942
943 if (!BN_GF2m_mod_arr(a, a_, p)) goto err;
944
945 if (BN_is_zero(a))
946 {
947 BN_zero(r);
948 ret = 1;
949 goto err;
950 }
951
952 if (p[0] & 0x1) /* m is odd */
953 {
954 /* compute half-trace of a */
955 if (!BN_copy(z, a)) goto err;
956 for (j = 1; j <= (p[0] - 1) / 2; j++)
957 {
958 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
959 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
960 if (!BN_GF2m_add(z, z, a)) goto err;
961 }
962
963 }
964 else /* m is even */
965 {
966 rho = BN_CTX_get(ctx);
967 w2 = BN_CTX_get(ctx);
968 tmp = BN_CTX_get(ctx);
969 if (tmp == NULL) goto err;
970 do
971 {
972 if (!BN_rand(rho, p[0], 0, 0)) goto err;
973 if (!BN_GF2m_mod_arr(rho, rho, p)) goto err;
974 BN_zero(z);
975 if (!BN_copy(w, rho)) goto err;
976 for (j = 1; j <= p[0] - 1; j++)
977 {
978 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
979 if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx)) goto err;
980 if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx)) goto err;
981 if (!BN_GF2m_add(z, z, tmp)) goto err;
982 if (!BN_GF2m_add(w, w2, rho)) goto err;
983 }
984 count++;
985 } while (BN_is_zero(w) && (count < MAX_ITERATIONS));
986 if (BN_is_zero(w))
987 {
988 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR,BN_R_TOO_MANY_ITERATIONS);
989 goto err;
990 }
991 }
992
993 if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx)) goto err;
994 if (!BN_GF2m_add(w, z, w)) goto err;
995 if (BN_GF2m_cmp(w, a))
996 {
997 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION);
998 goto err;
999 }
1000
1001 if (!BN_copy(r, z)) goto err;
1002 bn_check_top(r);
1003
1004 ret = 1;
1005
1006err:
1007 BN_CTX_end(ctx);
1008 return ret;
1009 }
1010
1011/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0.
1012 *
1013 * This function calls down to the BN_GF2m_mod_solve_quad_arr implementation; this wrapper
1014 * function is only provided for convenience; for best performance, use the
1015 * BN_GF2m_mod_solve_quad_arr function.
1016 */
1017int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
1018 {
1019 int ret = 0;
1020 const int max = BN_num_bits(p);
1021 unsigned int *arr=NULL;
1022 bn_check_top(a);
1023 bn_check_top(p);
1024 if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) *
1025 max)) == NULL) goto err;
1026 ret = BN_GF2m_poly2arr(p, arr, max);
1027 if (!ret || ret > max)
1028 {
1029 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD,BN_R_INVALID_LENGTH);
1030 goto err;
1031 }
1032 ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx);
1033 bn_check_top(r);
1034err:
1035 if (arr) OPENSSL_free(arr);
1036 return ret;
1037 }
1038
1039/* Convert the bit-string representation of a polynomial
1040 * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array
1041 * of integers corresponding to the bits with non-zero coefficient.
1042 * Up to max elements of the array will be filled. Return value is total
1043 * number of coefficients that would be extracted if array was large enough.
1044 */
1045int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
1046 {
1047 int i, j, k = 0;
1048 BN_ULONG mask;
1049
1050 if (BN_is_zero(a) || !BN_is_bit_set(a, 0))
1051 /* a_0 == 0 => return error (the unsigned int array
1052 * must be terminated by 0)
1053 */
1054 return 0;
1055
1056 for (i = a->top - 1; i >= 0; i--)
1057 {
1058 if (!a->d[i])
1059 /* skip word if a->d[i] == 0 */
1060 continue;
1061 mask = BN_TBIT;
1062 for (j = BN_BITS2 - 1; j >= 0; j--)
1063 {
1064 if (a->d[i] & mask)
1065 {
1066 if (k < max) p[k] = BN_BITS2 * i + j;
1067 k++;
1068 }
1069 mask >>= 1;
1070 }
1071 }
1072
1073 return k;
1074 }
1075
1076/* Convert the coefficient array representation of a polynomial to a
1077 * bit-string. The array must be terminated by 0.
1078 */
1079int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a)
1080 {
1081 int i;
1082
1083 bn_check_top(a);
1084 BN_zero(a);
1085 for (i = 0; p[i] != 0; i++)
1086 {
1087 if (BN_set_bit(a, p[i]) == 0)
1088 return 0;
1089 }
1090 BN_set_bit(a, 0);
1091 bn_check_top(a);
1092
1093 return 1;
1094 }
1095
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
deleted file mode 100644
index 740359b752..0000000000
--- a/src/lib/libcrypto/bn/bn_kron.c
+++ /dev/null
@@ -1,184 +0,0 @@
1/* crypto/bn/bn_kron.c */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "cryptlib.h"
57#include "bn_lcl.h"
58
59/* least significant word */
60#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
61
62/* Returns -2 for errors because both -1 and 0 are valid results. */
63int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
64 {
65 int i;
66 int ret = -2; /* avoid 'uninitialized' warning */
67 int err = 0;
68 BIGNUM *A, *B, *tmp;
69 /* In 'tab', only odd-indexed entries are relevant:
70 * For any odd BIGNUM n,
71 * tab[BN_lsw(n) & 7]
72 * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
73 * Note that the sign of n does not matter.
74 */
75 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
76
77 bn_check_top(a);
78 bn_check_top(b);
79
80 BN_CTX_start(ctx);
81 A = BN_CTX_get(ctx);
82 B = BN_CTX_get(ctx);
83 if (B == NULL) goto end;
84
85 err = !BN_copy(A, a);
86 if (err) goto end;
87 err = !BN_copy(B, b);
88 if (err) goto end;
89
90 /*
91 * Kronecker symbol, imlemented according to Henri Cohen,
92 * "A Course in Computational Algebraic Number Theory"
93 * (algorithm 1.4.10).
94 */
95
96 /* Cohen's step 1: */
97
98 if (BN_is_zero(B))
99 {
100 ret = BN_abs_is_word(A, 1);
101 goto end;
102 }
103
104 /* Cohen's step 2: */
105
106 if (!BN_is_odd(A) && !BN_is_odd(B))
107 {
108 ret = 0;
109 goto end;
110 }
111
112 /* now B is non-zero */
113 i = 0;
114 while (!BN_is_bit_set(B, i))
115 i++;
116 err = !BN_rshift(B, B, i);
117 if (err) goto end;
118 if (i & 1)
119 {
120 /* i is odd */
121 /* (thus B was even, thus A must be odd!) */
122
123 /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
124 ret = tab[BN_lsw(A) & 7];
125 }
126 else
127 {
128 /* i is even */
129 ret = 1;
130 }
131
132 if (B->neg)
133 {
134 B->neg = 0;
135 if (A->neg)
136 ret = -ret;
137 }
138
139 /* now B is positive and odd, so what remains to be done is
140 * to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
141
142 while (1)
143 {
144 /* Cohen's step 3: */
145
146 /* B is positive and odd */
147
148 if (BN_is_zero(A))
149 {
150 ret = BN_is_one(B) ? ret : 0;
151 goto end;
152 }
153
154 /* now A is non-zero */
155 i = 0;
156 while (!BN_is_bit_set(A, i))
157 i++;
158 err = !BN_rshift(A, A, i);
159 if (err) goto end;
160 if (i & 1)
161 {
162 /* i is odd */
163 /* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
164 ret = ret * tab[BN_lsw(B) & 7];
165 }
166
167 /* Cohen's step 4: */
168 /* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
169 if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
170 ret = -ret;
171
172 /* (A, B) := (B mod |A|, |A|) */
173 err = !BN_nnmod(B, B, A, ctx);
174 if (err) goto end;
175 tmp = A; A = B; B = tmp;
176 tmp->neg = 0;
177 }
178end:
179 BN_CTX_end(ctx);
180 if (err)
181 return -2;
182 else
183 return ret;
184 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
deleted file mode 100644
index 27ac4397a1..0000000000
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ /dev/null
@@ -1,490 +0,0 @@
1/* crypto/bn/bn_lcl.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#ifndef HEADER_BN_LCL_H
113#define HEADER_BN_LCL_H
114
115#include <openssl/bn.h>
116
117#ifdef __cplusplus
118extern "C" {
119#endif
120
121
122/*
123 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
124 *
125 *
126 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
127 * the number of multiplications is a constant plus on average
128 *
129 * 2^(w-1) + (b-w)/(w+1);
130 *
131 * here 2^(w-1) is for precomputing the table (we actually need
132 * entries only for windows that have the lowest bit set), and
133 * (b-w)/(w+1) is an approximation for the expected number of
134 * w-bit windows, not counting the first one.
135 *
136 * Thus we should use
137 *
138 * w >= 6 if b > 671
139 * w = 5 if 671 > b > 239
140 * w = 4 if 239 > b > 79
141 * w = 3 if 79 > b > 23
142 * w <= 2 if 23 > b
143 *
144 * (with draws in between). Very small exponents are often selected
145 * with low Hamming weight, so we use w = 1 for b <= 23.
146 */
147#if 1
148#define BN_window_bits_for_exponent_size(b) \
149 ((b) > 671 ? 6 : \
150 (b) > 239 ? 5 : \
151 (b) > 79 ? 4 : \
152 (b) > 23 ? 3 : 1)
153#else
154/* Old SSLeay/OpenSSL table.
155 * Maximum window size was 5, so this table differs for b==1024;
156 * but it coincides for other interesting values (b==160, b==512).
157 */
158#define BN_window_bits_for_exponent_size(b) \
159 ((b) > 255 ? 5 : \
160 (b) > 127 ? 4 : \
161 (b) > 17 ? 3 : 1)
162#endif
163
164
165
166/* BN_mod_exp_mont_conttime is based on the assumption that the
167 * L1 data cache line width of the target processor is at least
168 * the following value.
169 */
170#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
171#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
172
173/* Window sizes optimized for fixed window size modular exponentiation
174 * algorithm (BN_mod_exp_mont_consttime).
175 *
176 * To achieve the security goals of BN_mode_exp_mont_consttime, the
177 * maximum size of the window must not exceed
178 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
179 *
180 * Window size thresholds are defined for cache line sizes of 32 and 64,
181 * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
182 * window size of 7 should only be used on processors that have a 128
183 * byte or greater cache line size.
184 */
185#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
186
187# define BN_window_bits_for_ctime_exponent_size(b) \
188 ((b) > 937 ? 6 : \
189 (b) > 306 ? 5 : \
190 (b) > 89 ? 4 : \
191 (b) > 22 ? 3 : 1)
192# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
193
194#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
195
196# define BN_window_bits_for_ctime_exponent_size(b) \
197 ((b) > 306 ? 5 : \
198 (b) > 89 ? 4 : \
199 (b) > 22 ? 3 : 1)
200# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
201
202#endif
203
204
205/* Pentium pro 16,16,16,32,64 */
206/* Alpha 16,16,16,16.64 */
207#define BN_MULL_SIZE_NORMAL (16) /* 32 */
208#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
209#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
210#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
211#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
212
213#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
214/*
215 * BN_UMULT_HIGH section.
216 *
217 * No, I'm not trying to overwhelm you when stating that the
218 * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
219 * you to be impressed when I say that if the compiler doesn't
220 * support 2*N integer type, then you have to replace every N*N
221 * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
222 * and additions which unavoidably results in severe performance
223 * penalties. Of course provided that the hardware is capable of
224 * producing 2*N result... That's when you normally start
225 * considering assembler implementation. However! It should be
226 * pointed out that some CPUs (most notably Alpha, PowerPC and
227 * upcoming IA-64 family:-) provide *separate* instruction
228 * calculating the upper half of the product placing the result
229 * into a general purpose register. Now *if* the compiler supports
230 * inline assembler, then it's not impossible to implement the
231 * "bignum" routines (and have the compiler optimize 'em)
232 * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
233 * macro is about:-)
234 *
235 * <appro@fy.chalmers.se>
236 */
237# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
238# if defined(__DECC)
239# include <c_asm.h>
240# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
241# elif defined(__GNUC__)
242# define BN_UMULT_HIGH(a,b) ({ \
243 register BN_ULONG ret; \
244 asm ("umulh %1,%2,%0" \
245 : "=r"(ret) \
246 : "r"(a), "r"(b)); \
247 ret; })
248# endif /* compiler */
249# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
250# if defined(__GNUC__)
251# define BN_UMULT_HIGH(a,b) ({ \
252 register BN_ULONG ret; \
253 asm ("mulhdu %0,%1,%2" \
254 : "=r"(ret) \
255 : "r"(a), "r"(b)); \
256 ret; })
257# endif /* compiler */
258# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
259# if defined(__GNUC__)
260# define BN_UMULT_HIGH(a,b) ({ \
261 register BN_ULONG ret,discard; \
262 asm ("mulq %3" \
263 : "=a"(discard),"=d"(ret) \
264 : "a"(a), "g"(b) \
265 : "cc"); \
266 ret; })
267# define BN_UMULT_LOHI(low,high,a,b) \
268 asm ("mulq %3" \
269 : "=a"(low),"=d"(high) \
270 : "a"(a),"g"(b) \
271 : "cc");
272# endif
273# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
274# if defined(_MSC_VER) && _MSC_VER>=1400
275 unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b);
276 unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b,
277 unsigned __int64 *h);
278# pragma intrinsic(__umulh,_umul128)
279# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
280# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
281# endif
282# endif /* cpu */
283#endif /* OPENSSL_NO_ASM */
284
285/*************************************************************
286 * Using the long long type
287 */
288#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
289#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
290
291#ifdef BN_DEBUG_RAND
292#define bn_clear_top2max(a) \
293 { \
294 int ind = (a)->dmax - (a)->top; \
295 BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
296 for (; ind != 0; ind--) \
297 *(++ftl) = 0x0; \
298 }
299#else
300#define bn_clear_top2max(a)
301#endif
302
303#ifdef BN_LLONG
304#define mul_add(r,a,w,c) { \
305 BN_ULLONG t; \
306 t=(BN_ULLONG)w * (a) + (r) + (c); \
307 (r)= Lw(t); \
308 (c)= Hw(t); \
309 }
310
311#define mul(r,a,w,c) { \
312 BN_ULLONG t; \
313 t=(BN_ULLONG)w * (a) + (c); \
314 (r)= Lw(t); \
315 (c)= Hw(t); \
316 }
317
318#define sqr(r0,r1,a) { \
319 BN_ULLONG t; \
320 t=(BN_ULLONG)(a)*(a); \
321 (r0)=Lw(t); \
322 (r1)=Hw(t); \
323 }
324
325#elif defined(BN_UMULT_LOHI)
326#define mul_add(r,a,w,c) { \
327 BN_ULONG high,low,ret,tmp=(a); \
328 ret = (r); \
329 BN_UMULT_LOHI(low,high,w,tmp); \
330 ret += (c); \
331 (c) = (ret<(c))?1:0; \
332 (c) += high; \
333 ret += low; \
334 (c) += (ret<low)?1:0; \
335 (r) = ret; \
336 }
337
338#define mul(r,a,w,c) { \
339 BN_ULONG high,low,ret,ta=(a); \
340 BN_UMULT_LOHI(low,high,w,ta); \
341 ret = low + (c); \
342 (c) = high; \
343 (c) += (ret<low)?1:0; \
344 (r) = ret; \
345 }
346
347#define sqr(r0,r1,a) { \
348 BN_ULONG tmp=(a); \
349 BN_UMULT_LOHI(r0,r1,tmp,tmp); \
350 }
351
352#elif defined(BN_UMULT_HIGH)
353#define mul_add(r,a,w,c) { \
354 BN_ULONG high,low,ret,tmp=(a); \
355 ret = (r); \
356 high= BN_UMULT_HIGH(w,tmp); \
357 ret += (c); \
358 low = (w) * tmp; \
359 (c) = (ret<(c))?1:0; \
360 (c) += high; \
361 ret += low; \
362 (c) += (ret<low)?1:0; \
363 (r) = ret; \
364 }
365
366#define mul(r,a,w,c) { \
367 BN_ULONG high,low,ret,ta=(a); \
368 low = (w) * ta; \
369 high= BN_UMULT_HIGH(w,ta); \
370 ret = low + (c); \
371 (c) = high; \
372 (c) += (ret<low)?1:0; \
373 (r) = ret; \
374 }
375
376#define sqr(r0,r1,a) { \
377 BN_ULONG tmp=(a); \
378 (r0) = tmp * tmp; \
379 (r1) = BN_UMULT_HIGH(tmp,tmp); \
380 }
381
382#else
383/*************************************************************
384 * No long long type
385 */
386
387#define LBITS(a) ((a)&BN_MASK2l)
388#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
389#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
390
391#define LLBITS(a) ((a)&BN_MASKl)
392#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
393#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
394
395#define mul64(l,h,bl,bh) \
396 { \
397 BN_ULONG m,m1,lt,ht; \
398 \
399 lt=l; \
400 ht=h; \
401 m =(bh)*(lt); \
402 lt=(bl)*(lt); \
403 m1=(bl)*(ht); \
404 ht =(bh)*(ht); \
405 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
406 ht+=HBITS(m); \
407 m1=L2HBITS(m); \
408 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
409 (l)=lt; \
410 (h)=ht; \
411 }
412
413#define sqr64(lo,ho,in) \
414 { \
415 BN_ULONG l,h,m; \
416 \
417 h=(in); \
418 l=LBITS(h); \
419 h=HBITS(h); \
420 m =(l)*(h); \
421 l*=l; \
422 h*=h; \
423 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
424 m =(m&BN_MASK2l)<<(BN_BITS4+1); \
425 l=(l+m)&BN_MASK2; if (l < m) h++; \
426 (lo)=l; \
427 (ho)=h; \
428 }
429
430#define mul_add(r,a,bl,bh,c) { \
431 BN_ULONG l,h; \
432 \
433 h= (a); \
434 l=LBITS(h); \
435 h=HBITS(h); \
436 mul64(l,h,(bl),(bh)); \
437 \
438 /* non-multiply part */ \
439 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
440 (c)=(r); \
441 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
442 (c)=h&BN_MASK2; \
443 (r)=l; \
444 }
445
446#define mul(r,a,bl,bh,c) { \
447 BN_ULONG l,h; \
448 \
449 h= (a); \
450 l=LBITS(h); \
451 h=HBITS(h); \
452 mul64(l,h,(bl),(bh)); \
453 \
454 /* non-multiply part */ \
455 l+=(c); if ((l&BN_MASK2) < (c)) h++; \
456 (c)=h&BN_MASK2; \
457 (r)=l&BN_MASK2; \
458 }
459#endif /* !BN_LLONG */
460
461void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
462void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
463void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
464void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
465void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
466void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
467int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
468int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
469 int cl, int dl);
470void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
471 int dna,int dnb,BN_ULONG *t);
472void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
473 int n,int tna,int tnb,BN_ULONG *t);
474void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
475void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
476void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
477 BN_ULONG *t);
478void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
479 BN_ULONG *t);
480BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
481 int cl, int dl);
482BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
483 int cl, int dl);
484int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
485
486#ifdef __cplusplus
487}
488#endif
489
490#endif
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
deleted file mode 100644
index 32a8fbaf51..0000000000
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ /dev/null
@@ -1,826 +0,0 @@
1/* crypto/bn/bn_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <limits.h>
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70const char BN_version[]="Big Number" OPENSSL_VERSION_PTEXT;
71
72/* This stuff appears to be completely unused, so is deprecated */
73#ifndef OPENSSL_NO_DEPRECATED
74/* For a 32 bit machine
75 * 2 - 4 == 128
76 * 3 - 8 == 256
77 * 4 - 16 == 512
78 * 5 - 32 == 1024
79 * 6 - 64 == 2048
80 * 7 - 128 == 4096
81 * 8 - 256 == 8192
82 */
83static int bn_limit_bits=0;
84static int bn_limit_num=8; /* (1<<bn_limit_bits) */
85static int bn_limit_bits_low=0;
86static int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
87static int bn_limit_bits_high=0;
88static int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
89static int bn_limit_bits_mont=0;
90static int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
91
92void BN_set_params(int mult, int high, int low, int mont)
93 {
94 if (mult >= 0)
95 {
96 if (mult > (int)(sizeof(int)*8)-1)
97 mult=sizeof(int)*8-1;
98 bn_limit_bits=mult;
99 bn_limit_num=1<<mult;
100 }
101 if (high >= 0)
102 {
103 if (high > (int)(sizeof(int)*8)-1)
104 high=sizeof(int)*8-1;
105 bn_limit_bits_high=high;
106 bn_limit_num_high=1<<high;
107 }
108 if (low >= 0)
109 {
110 if (low > (int)(sizeof(int)*8)-1)
111 low=sizeof(int)*8-1;
112 bn_limit_bits_low=low;
113 bn_limit_num_low=1<<low;
114 }
115 if (mont >= 0)
116 {
117 if (mont > (int)(sizeof(int)*8)-1)
118 mont=sizeof(int)*8-1;
119 bn_limit_bits_mont=mont;
120 bn_limit_num_mont=1<<mont;
121 }
122 }
123
124int BN_get_params(int which)
125 {
126 if (which == 0) return(bn_limit_bits);
127 else if (which == 1) return(bn_limit_bits_high);
128 else if (which == 2) return(bn_limit_bits_low);
129 else if (which == 3) return(bn_limit_bits_mont);
130 else return(0);
131 }
132#endif
133
134const BIGNUM *BN_value_one(void)
135 {
136 static BN_ULONG data_one=1L;
137 static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA};
138
139 return(&const_one);
140 }
141
142int BN_num_bits_word(BN_ULONG l)
143 {
144 static const char bits[256]={
145 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
146 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
147 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
148 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
149 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
150 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
151 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
152 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
153 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
154 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
155 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
156 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
157 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
158 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
159 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
160 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
161 };
162
163#if defined(SIXTY_FOUR_BIT_LONG)
164 if (l & 0xffffffff00000000L)
165 {
166 if (l & 0xffff000000000000L)
167 {
168 if (l & 0xff00000000000000L)
169 {
170 return(bits[(int)(l>>56)]+56);
171 }
172 else return(bits[(int)(l>>48)]+48);
173 }
174 else
175 {
176 if (l & 0x0000ff0000000000L)
177 {
178 return(bits[(int)(l>>40)]+40);
179 }
180 else return(bits[(int)(l>>32)]+32);
181 }
182 }
183 else
184#else
185#ifdef SIXTY_FOUR_BIT
186 if (l & 0xffffffff00000000LL)
187 {
188 if (l & 0xffff000000000000LL)
189 {
190 if (l & 0xff00000000000000LL)
191 {
192 return(bits[(int)(l>>56)]+56);
193 }
194 else return(bits[(int)(l>>48)]+48);
195 }
196 else
197 {
198 if (l & 0x0000ff0000000000LL)
199 {
200 return(bits[(int)(l>>40)]+40);
201 }
202 else return(bits[(int)(l>>32)]+32);
203 }
204 }
205 else
206#endif
207#endif
208 {
209#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
210 if (l & 0xffff0000L)
211 {
212 if (l & 0xff000000L)
213 return(bits[(int)(l>>24L)]+24);
214 else return(bits[(int)(l>>16L)]+16);
215 }
216 else
217#endif
218 {
219#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
220 if (l & 0xff00L)
221 return(bits[(int)(l>>8)]+8);
222 else
223#endif
224 return(bits[(int)(l )] );
225 }
226 }
227 }
228
229int BN_num_bits(const BIGNUM *a)
230 {
231 int i = a->top - 1;
232 bn_check_top(a);
233
234 if (BN_is_zero(a)) return 0;
235 return ((i*BN_BITS2) + BN_num_bits_word(a->d[i]));
236 }
237
238void BN_clear_free(BIGNUM *a)
239 {
240 int i;
241
242 if (a == NULL) return;
243 bn_check_top(a);
244 if (a->d != NULL)
245 {
246 OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0]));
247 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
248 OPENSSL_free(a->d);
249 }
250 i=BN_get_flags(a,BN_FLG_MALLOCED);
251 OPENSSL_cleanse(a,sizeof(BIGNUM));
252 if (i)
253 OPENSSL_free(a);
254 }
255
256void BN_free(BIGNUM *a)
257 {
258 if (a == NULL) return;
259 bn_check_top(a);
260 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
261 OPENSSL_free(a->d);
262 if (a->flags & BN_FLG_MALLOCED)
263 OPENSSL_free(a);
264 else
265 {
266#ifndef OPENSSL_NO_DEPRECATED
267 a->flags|=BN_FLG_FREE;
268#endif
269 a->d = NULL;
270 }
271 }
272
273void BN_init(BIGNUM *a)
274 {
275 memset(a,0,sizeof(BIGNUM));
276 bn_check_top(a);
277 }
278
279BIGNUM *BN_new(void)
280 {
281 BIGNUM *ret;
282
283 if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
284 {
285 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
286 return(NULL);
287 }
288 ret->flags=BN_FLG_MALLOCED;
289 ret->top=0;
290 ret->neg=0;
291 ret->dmax=0;
292 ret->d=NULL;
293 bn_check_top(ret);
294 return(ret);
295 }
296
297/* This is used both by bn_expand2() and bn_dup_expand() */
298/* The caller MUST check that words > b->dmax before calling this */
299static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
300 {
301 BN_ULONG *A,*a = NULL;
302 const BN_ULONG *B;
303 int i;
304
305 bn_check_top(b);
306
307 if (words > (INT_MAX/(4*BN_BITS2)))
308 {
309 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG);
310 return NULL;
311 }
312 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
313 {
314 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
315 return(NULL);
316 }
317 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*words);
318 if (A == NULL)
319 {
320 BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
321 return(NULL);
322 }
323#if 1
324 B=b->d;
325 /* Check if the previous number needs to be copied */
326 if (B != NULL)
327 {
328 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
329 {
330 /*
331 * The fact that the loop is unrolled
332 * 4-wise is a tribute to Intel. It's
333 * the one that doesn't have enough
334 * registers to accomodate more data.
335 * I'd unroll it 8-wise otherwise:-)
336 *
337 * <appro@fy.chalmers.se>
338 */
339 BN_ULONG a0,a1,a2,a3;
340 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
341 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
342 }
343 switch (b->top&3)
344 {
345 case 3: A[2]=B[2];
346 case 2: A[1]=B[1];
347 case 1: A[0]=B[0];
348 case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
349 * the switch table by doing a=top&3; a--; goto jump_table[a];
350 * which fails for top== 0 */
351 ;
352 }
353 }
354
355#else
356 memset(A,0,sizeof(BN_ULONG)*words);
357 memcpy(A,b->d,sizeof(b->d[0])*b->top);
358#endif
359
360 return(a);
361 }
362
363/* This is an internal function that can be used instead of bn_expand2()
364 * when there is a need to copy BIGNUMs instead of only expanding the
365 * data part, while still expanding them.
366 * Especially useful when needing to expand BIGNUMs that are declared
367 * 'const' and should therefore not be changed.
368 * The reason to use this instead of a BN_dup() followed by a bn_expand2()
369 * is memory allocation overhead. A BN_dup() followed by a bn_expand2()
370 * will allocate new memory for the BIGNUM data twice, and free it once,
371 * while bn_dup_expand() makes sure allocation is made only once.
372 */
373
374#ifndef OPENSSL_NO_DEPRECATED
375BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
376 {
377 BIGNUM *r = NULL;
378
379 bn_check_top(b);
380
381 /* This function does not work if
382 * words <= b->dmax && top < words
383 * because BN_dup() does not preserve 'dmax'!
384 * (But bn_dup_expand() is not used anywhere yet.)
385 */
386
387 if (words > b->dmax)
388 {
389 BN_ULONG *a = bn_expand_internal(b, words);
390
391 if (a)
392 {
393 r = BN_new();
394 if (r)
395 {
396 r->top = b->top;
397 r->dmax = words;
398 r->neg = b->neg;
399 r->d = a;
400 }
401 else
402 {
403 /* r == NULL, BN_new failure */
404 OPENSSL_free(a);
405 }
406 }
407 /* If a == NULL, there was an error in allocation in
408 bn_expand_internal(), and NULL should be returned */
409 }
410 else
411 {
412 r = BN_dup(b);
413 }
414
415 bn_check_top(r);
416 return r;
417 }
418#endif
419
420/* This is an internal function that should not be used in applications.
421 * It ensures that 'b' has enough room for a 'words' word number
422 * and initialises any unused part of b->d with leading zeros.
423 * It is mostly used by the various BIGNUM routines. If there is an error,
424 * NULL is returned. If not, 'b' is returned. */
425
426BIGNUM *bn_expand2(BIGNUM *b, int words)
427 {
428 bn_check_top(b);
429
430 if (words > b->dmax)
431 {
432 BN_ULONG *a = bn_expand_internal(b, words);
433 if(!a) return NULL;
434 if(b->d) OPENSSL_free(b->d);
435 b->d=a;
436 b->dmax=words;
437 }
438
439/* None of this should be necessary because of what b->top means! */
440#if 0
441 /* NB: bn_wexpand() calls this only if the BIGNUM really has to grow */
442 if (b->top < b->dmax)
443 {
444 int i;
445 BN_ULONG *A = &(b->d[b->top]);
446 for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8)
447 {
448 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
449 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
450 }
451 for (i=(b->dmax - b->top)&7; i>0; i--,A++)
452 A[0]=0;
453 assert(A == &(b->d[b->dmax]));
454 }
455#endif
456 bn_check_top(b);
457 return b;
458 }
459
460BIGNUM *BN_dup(const BIGNUM *a)
461 {
462 BIGNUM *t;
463
464 if (a == NULL) return NULL;
465 bn_check_top(a);
466
467 t = BN_new();
468 if (t == NULL) return NULL;
469 if(!BN_copy(t, a))
470 {
471 BN_free(t);
472 return NULL;
473 }
474 bn_check_top(t);
475 return t;
476 }
477
478BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
479 {
480 int i;
481 BN_ULONG *A;
482 const BN_ULONG *B;
483
484 bn_check_top(b);
485
486 if (a == b) return(a);
487 if (bn_wexpand(a,b->top) == NULL) return(NULL);
488
489#if 1
490 A=a->d;
491 B=b->d;
492 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
493 {
494 BN_ULONG a0,a1,a2,a3;
495 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
496 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
497 }
498 switch (b->top&3)
499 {
500 case 3: A[2]=B[2];
501 case 2: A[1]=B[1];
502 case 1: A[0]=B[0];
503 case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
504 }
505#else
506 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
507#endif
508
509 a->top=b->top;
510 a->neg=b->neg;
511 bn_check_top(a);
512 return(a);
513 }
514
515void BN_swap(BIGNUM *a, BIGNUM *b)
516 {
517 int flags_old_a, flags_old_b;
518 BN_ULONG *tmp_d;
519 int tmp_top, tmp_dmax, tmp_neg;
520
521 bn_check_top(a);
522 bn_check_top(b);
523
524 flags_old_a = a->flags;
525 flags_old_b = b->flags;
526
527 tmp_d = a->d;
528 tmp_top = a->top;
529 tmp_dmax = a->dmax;
530 tmp_neg = a->neg;
531
532 a->d = b->d;
533 a->top = b->top;
534 a->dmax = b->dmax;
535 a->neg = b->neg;
536
537 b->d = tmp_d;
538 b->top = tmp_top;
539 b->dmax = tmp_dmax;
540 b->neg = tmp_neg;
541
542 a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
543 b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
544 bn_check_top(a);
545 bn_check_top(b);
546 }
547
548void BN_clear(BIGNUM *a)
549 {
550 bn_check_top(a);
551 if (a->d != NULL)
552 memset(a->d,0,a->dmax*sizeof(a->d[0]));
553 a->top=0;
554 a->neg=0;
555 }
556
557BN_ULONG BN_get_word(const BIGNUM *a)
558 {
559 if (a->top > 1)
560 return BN_MASK2;
561 else if (a->top == 1)
562 return a->d[0];
563 /* a->top == 0 */
564 return 0;
565 }
566
567int BN_set_word(BIGNUM *a, BN_ULONG w)
568 {
569 bn_check_top(a);
570 if (bn_expand(a,(int)sizeof(BN_ULONG)*8) == NULL) return(0);
571 a->neg = 0;
572 a->d[0] = w;
573 a->top = (w ? 1 : 0);
574 bn_check_top(a);
575 return(1);
576 }
577
578BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
579 {
580 unsigned int i,m;
581 unsigned int n;
582 BN_ULONG l;
583 BIGNUM *bn = NULL;
584
585 if (ret == NULL)
586 ret = bn = BN_new();
587 if (ret == NULL) return(NULL);
588 bn_check_top(ret);
589 l=0;
590 n=len;
591 if (n == 0)
592 {
593 ret->top=0;
594 return(ret);
595 }
596 i=((n-1)/BN_BYTES)+1;
597 m=((n-1)%(BN_BYTES));
598 if (bn_wexpand(ret, (int)i) == NULL)
599 {
600 if (bn) BN_free(bn);
601 return NULL;
602 }
603 ret->top=i;
604 ret->neg=0;
605 while (n--)
606 {
607 l=(l<<8L)| *(s++);
608 if (m-- == 0)
609 {
610 ret->d[--i]=l;
611 l=0;
612 m=BN_BYTES-1;
613 }
614 }
615 /* need to call this due to clear byte at top if avoiding
616 * having the top bit set (-ve number) */
617 bn_correct_top(ret);
618 return(ret);
619 }
620
621/* ignore negative */
622int BN_bn2bin(const BIGNUM *a, unsigned char *to)
623 {
624 int n,i;
625 BN_ULONG l;
626
627 bn_check_top(a);
628 n=i=BN_num_bytes(a);
629 while (i--)
630 {
631 l=a->d[i/BN_BYTES];
632 *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
633 }
634 return(n);
635 }
636
637int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
638 {
639 int i;
640 BN_ULONG t1,t2,*ap,*bp;
641
642 bn_check_top(a);
643 bn_check_top(b);
644
645 i=a->top-b->top;
646 if (i != 0) return(i);
647 ap=a->d;
648 bp=b->d;
649 for (i=a->top-1; i>=0; i--)
650 {
651 t1= ap[i];
652 t2= bp[i];
653 if (t1 != t2)
654 return((t1 > t2) ? 1 : -1);
655 }
656 return(0);
657 }
658
659int BN_cmp(const BIGNUM *a, const BIGNUM *b)
660 {
661 int i;
662 int gt,lt;
663 BN_ULONG t1,t2;
664
665 if ((a == NULL) || (b == NULL))
666 {
667 if (a != NULL)
668 return(-1);
669 else if (b != NULL)
670 return(1);
671 else
672 return(0);
673 }
674
675 bn_check_top(a);
676 bn_check_top(b);
677
678 if (a->neg != b->neg)
679 {
680 if (a->neg)
681 return(-1);
682 else return(1);
683 }
684 if (a->neg == 0)
685 { gt=1; lt= -1; }
686 else { gt= -1; lt=1; }
687
688 if (a->top > b->top) return(gt);
689 if (a->top < b->top) return(lt);
690 for (i=a->top-1; i>=0; i--)
691 {
692 t1=a->d[i];
693 t2=b->d[i];
694 if (t1 > t2) return(gt);
695 if (t1 < t2) return(lt);
696 }
697 return(0);
698 }
699
700int BN_set_bit(BIGNUM *a, int n)
701 {
702 int i,j,k;
703
704 if (n < 0)
705 return 0;
706
707 i=n/BN_BITS2;
708 j=n%BN_BITS2;
709 if (a->top <= i)
710 {
711 if (bn_wexpand(a,i+1) == NULL) return(0);
712 for(k=a->top; k<i+1; k++)
713 a->d[k]=0;
714 a->top=i+1;
715 }
716
717 a->d[i]|=(((BN_ULONG)1)<<j);
718 bn_check_top(a);
719 return(1);
720 }
721
722int BN_clear_bit(BIGNUM *a, int n)
723 {
724 int i,j;
725
726 bn_check_top(a);
727 if (n < 0) return 0;
728
729 i=n/BN_BITS2;
730 j=n%BN_BITS2;
731 if (a->top <= i) return(0);
732
733 a->d[i]&=(~(((BN_ULONG)1)<<j));
734 bn_correct_top(a);
735 return(1);
736 }
737
738int BN_is_bit_set(const BIGNUM *a, int n)
739 {
740 int i,j;
741
742 bn_check_top(a);
743 if (n < 0) return 0;
744 i=n/BN_BITS2;
745 j=n%BN_BITS2;
746 if (a->top <= i) return 0;
747 return(((a->d[i])>>j)&((BN_ULONG)1));
748 }
749
750int BN_mask_bits(BIGNUM *a, int n)
751 {
752 int b,w;
753
754 bn_check_top(a);
755 if (n < 0) return 0;
756
757 w=n/BN_BITS2;
758 b=n%BN_BITS2;
759 if (w >= a->top) return 0;
760 if (b == 0)
761 a->top=w;
762 else
763 {
764 a->top=w+1;
765 a->d[w]&= ~(BN_MASK2<<b);
766 }
767 bn_correct_top(a);
768 return(1);
769 }
770
771void BN_set_negative(BIGNUM *a, int b)
772 {
773 if (b && !BN_is_zero(a))
774 a->neg = 1;
775 else
776 a->neg = 0;
777 }
778
779int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
780 {
781 int i;
782 BN_ULONG aa,bb;
783
784 aa=a[n-1];
785 bb=b[n-1];
786 if (aa != bb) return((aa > bb)?1:-1);
787 for (i=n-2; i>=0; i--)
788 {
789 aa=a[i];
790 bb=b[i];
791 if (aa != bb) return((aa > bb)?1:-1);
792 }
793 return(0);
794 }
795
796/* Here follows a specialised variants of bn_cmp_words(). It has the
797 property of performing the operation on arrays of different sizes.
798 The sizes of those arrays is expressed through cl, which is the
799 common length ( basicall, min(len(a),len(b)) ), and dl, which is the
800 delta between the two lengths, calculated as len(a)-len(b).
801 All lengths are the number of BN_ULONGs... */
802
803int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
804 int cl, int dl)
805 {
806 int n,i;
807 n = cl-1;
808
809 if (dl < 0)
810 {
811 for (i=dl; i<0; i++)
812 {
813 if (b[n-i] != 0)
814 return -1; /* a < b */
815 }
816 }
817 if (dl > 0)
818 {
819 for (i=dl; i>0; i--)
820 {
821 if (a[n+i] != 0)
822 return 1; /* a > b */
823 }
824 }
825 return bn_cmp_words(a,b,cl);
826 }
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
deleted file mode 100644
index 77d6ddb91a..0000000000
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ /dev/null
@@ -1,301 +0,0 @@
1/* crypto/bn/bn_mod.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include "cryptlib.h"
115#include "bn_lcl.h"
116
117
118#if 0 /* now just a #define */
119int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
120 {
121 return(BN_div(NULL,rem,m,d,ctx));
122 /* note that rem->neg == m->neg (unless the remainder is zero) */
123 }
124#endif
125
126
127int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
128 {
129 /* like BN_mod, but returns non-negative remainder
130 * (i.e., 0 <= r < |d| always holds) */
131
132 if (!(BN_mod(r,m,d,ctx)))
133 return 0;
134 if (!r->neg)
135 return 1;
136 /* now -|d| < r < 0, so we have to set r := r + |d| */
137 return (d->neg ? BN_sub : BN_add)(r, r, d);
138}
139
140
141int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
142 {
143 if (!BN_add(r, a, b)) return 0;
144 return BN_nnmod(r, r, m, ctx);
145 }
146
147
148/* BN_mod_add variant that may be used if both a and b are non-negative
149 * and less than m */
150int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
151 {
152 if (!BN_uadd(r, a, b)) return 0;
153 if (BN_ucmp(r, m) >= 0)
154 return BN_usub(r, r, m);
155 return 1;
156 }
157
158
159int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
160 {
161 if (!BN_sub(r, a, b)) return 0;
162 return BN_nnmod(r, r, m, ctx);
163 }
164
165
166/* BN_mod_sub variant that may be used if both a and b are non-negative
167 * and less than m */
168int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
169 {
170 if (!BN_sub(r, a, b)) return 0;
171 if (r->neg)
172 return BN_add(r, r, m);
173 return 1;
174 }
175
176
177/* slow but works */
178int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
179 BN_CTX *ctx)
180 {
181 BIGNUM *t;
182 int ret=0;
183
184 bn_check_top(a);
185 bn_check_top(b);
186 bn_check_top(m);
187
188 BN_CTX_start(ctx);
189 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
190 if (a == b)
191 { if (!BN_sqr(t,a,ctx)) goto err; }
192 else
193 { if (!BN_mul(t,a,b,ctx)) goto err; }
194 if (!BN_nnmod(r,t,m,ctx)) goto err;
195 bn_check_top(r);
196 ret=1;
197err:
198 BN_CTX_end(ctx);
199 return(ret);
200 }
201
202
203int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
204 {
205 if (!BN_sqr(r, a, ctx)) return 0;
206 /* r->neg == 0, thus we don't need BN_nnmod */
207 return BN_mod(r, r, m, ctx);
208 }
209
210
211int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
212 {
213 if (!BN_lshift1(r, a)) return 0;
214 bn_check_top(r);
215 return BN_nnmod(r, r, m, ctx);
216 }
217
218
219/* BN_mod_lshift1 variant that may be used if a is non-negative
220 * and less than m */
221int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
222 {
223 if (!BN_lshift1(r, a)) return 0;
224 bn_check_top(r);
225 if (BN_cmp(r, m) >= 0)
226 return BN_sub(r, r, m);
227 return 1;
228 }
229
230
231int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
232 {
233 BIGNUM *abs_m = NULL;
234 int ret;
235
236 if (!BN_nnmod(r, a, m, ctx)) return 0;
237
238 if (m->neg)
239 {
240 abs_m = BN_dup(m);
241 if (abs_m == NULL) return 0;
242 abs_m->neg = 0;
243 }
244
245 ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
246 bn_check_top(r);
247
248 if (abs_m)
249 BN_free(abs_m);
250 return ret;
251 }
252
253
254/* BN_mod_lshift variant that may be used if a is non-negative
255 * and less than m */
256int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
257 {
258 if (r != a)
259 {
260 if (BN_copy(r, a) == NULL) return 0;
261 }
262
263 while (n > 0)
264 {
265 int max_shift;
266
267 /* 0 < r < m */
268 max_shift = BN_num_bits(m) - BN_num_bits(r);
269 /* max_shift >= 0 */
270
271 if (max_shift < 0)
272 {
273 BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
274 return 0;
275 }
276
277 if (max_shift > n)
278 max_shift = n;
279
280 if (max_shift)
281 {
282 if (!BN_lshift(r, r, max_shift)) return 0;
283 n -= max_shift;
284 }
285 else
286 {
287 if (!BN_lshift1(r, r)) return 0;
288 --n;
289 }
290
291 /* BN_num_bits(r) <= BN_num_bits(m) */
292
293 if (BN_cmp(r, m) >= 0)
294 {
295 if (!BN_sub(r, r, m)) return 0;
296 }
297 }
298 bn_check_top(r);
299
300 return 1;
301 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
deleted file mode 100644
index 4799b152dd..0000000000
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ /dev/null
@@ -1,732 +0,0 @@
1/* crypto/bn/bn_mont.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112/*
113 * Details about Montgomery multiplication algorithms can be found at
114 * http://security.ece.orst.edu/publications.html, e.g.
115 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
117 */
118
119#include <stdio.h>
120#include "cryptlib.h"
121#include "bn_lcl.h"
122
123#define MONT_WORD /* use the faster word-based algorithm */
124
125#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
126/* This condition means we have a specific non-default build:
127 * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any
128 * BN_BITS2<=32 platform; an explicit "enable-montasm" is required.
129 * I.e., if we are here, the user intentionally deviates from the
130 * normal stable build to get better Montgomery performance from
131 * the 0.9.9-dev backport.
132 *
133 * In this case only, we also enable BN_from_montgomery_word()
134 * (another non-stable feature from 0.9.9-dev).
135 */
136#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
137#endif
138
139#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
140static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
141#endif
142
143
144
145int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
146 BN_MONT_CTX *mont, BN_CTX *ctx)
147 {
148 BIGNUM *tmp;
149 int ret=0;
150#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
151 int num = mont->N.top;
152
153 if (num>1 && a->top==num && b->top==num)
154 {
155 if (bn_wexpand(r,num) == NULL) return(0);
156#if 0 /* for OpenSSL 0.9.9 mont->n0 */
157 if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
158#else
159 if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num))
160#endif
161 {
162 r->neg = a->neg^b->neg;
163 r->top = num;
164 bn_correct_top(r);
165 return(1);
166 }
167 }
168#endif
169
170 BN_CTX_start(ctx);
171 tmp = BN_CTX_get(ctx);
172 if (tmp == NULL) goto err;
173
174 bn_check_top(tmp);
175 if (a == b)
176 {
177 if (!BN_sqr(tmp,a,ctx)) goto err;
178 }
179 else
180 {
181 if (!BN_mul(tmp,a,b,ctx)) goto err;
182 }
183 /* reduce from aRR to aR */
184#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
185 if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
186#else
187 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
188#endif
189 bn_check_top(r);
190 ret=1;
191err:
192 BN_CTX_end(ctx);
193 return(ret);
194 }
195
196#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
197static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
198 {
199 BIGNUM *n;
200 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
201 int al,nl,max,i,x,ri;
202
203 n= &(mont->N);
204 /* mont->ri is the size of mont->N in bits (rounded up
205 to the word size) */
206 al=ri=mont->ri/BN_BITS2;
207
208 nl=n->top;
209 if ((al == 0) || (nl == 0)) { ret->top=0; return(1); }
210
211 max=(nl+al+1); /* allow for overflow (no?) XXX */
212 if (bn_wexpand(r,max) == NULL) return(0);
213
214 r->neg^=n->neg;
215 np=n->d;
216 rp=r->d;
217 nrp= &(r->d[nl]);
218
219 /* clear the top words of T */
220 for (i=r->top; i<max; i++) /* memset? XXX */
221 r->d[i]=0;
222
223 r->top=max;
224#if 0 /* for OpenSSL 0.9.9 mont->n0 */
225 n0=mont->n0[0];
226#else
227 n0=mont->n0;
228#endif
229
230#ifdef BN_COUNT
231 fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
232#endif
233 for (i=0; i<nl; i++)
234 {
235#ifdef __TANDEM
236 {
237 long long t1;
238 long long t2;
239 long long t3;
240 t1 = rp[0] * (n0 & 0177777);
241 t2 = 037777600000l;
242 t2 = n0 & t2;
243 t3 = rp[0] & 0177777;
244 t2 = (t3 * t2) & BN_MASK2;
245 t1 = t1 + t2;
246 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
247 }
248#else
249 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
250#endif
251 nrp++;
252 rp++;
253 if (((nrp[-1]+=v)&BN_MASK2) >= v)
254 continue;
255 else
256 {
257 if (((++nrp[0])&BN_MASK2) != 0) continue;
258 if (((++nrp[1])&BN_MASK2) != 0) continue;
259 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
260 }
261 }
262 bn_correct_top(r);
263
264 /* mont->ri will be a multiple of the word size and below code
265 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
266 if (r->top <= ri)
267 {
268 ret->top=0;
269 return(1);
270 }
271 al=r->top-ri;
272
273 if (bn_wexpand(ret,ri) == NULL) return(0);
274 x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
275 ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
276 ret->neg=r->neg;
277
278 rp=ret->d;
279 ap=&(r->d[ri]);
280
281 {
282 size_t m1,m2;
283
284 v=bn_sub_words(rp,ap,np,ri);
285 /* this ----------------^^ works even in al<ri case
286 * thanks to zealous zeroing of top of the vector in the
287 * beginning. */
288
289 /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
290 /* in other words if subtraction result is real, then
291 * trick unconditional memcpy below to perform in-place
292 * "refresh" instead of actual copy. */
293 m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
294 m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
295 m1|=m2; /* (al!=ri) */
296 m1|=(0-(size_t)v); /* (al!=ri || v) */
297 m1&=~m2; /* (al!=ri || v) && !al>ri */
298 nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
299 }
300
301 /* 'i<ri' is chosen to eliminate dependency on input data, even
302 * though it results in redundant copy in al<ri case. */
303 for (i=0,ri-=4; i<ri; i+=4)
304 {
305 BN_ULONG t1,t2,t3,t4;
306
307 t1=nrp[i+0];
308 t2=nrp[i+1];
309 t3=nrp[i+2]; ap[i+0]=0;
310 t4=nrp[i+3]; ap[i+1]=0;
311 rp[i+0]=t1; ap[i+2]=0;
312 rp[i+1]=t2; ap[i+3]=0;
313 rp[i+2]=t3;
314 rp[i+3]=t4;
315 }
316 for (ri+=4; i<ri; i++)
317 rp[i]=nrp[i], ap[i]=0;
318 bn_correct_top(r);
319 bn_correct_top(ret);
320 bn_check_top(ret);
321
322 return(1);
323 }
324
325int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
326 BN_CTX *ctx)
327 {
328 int retn=0;
329 BIGNUM *t;
330
331 BN_CTX_start(ctx);
332 if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
333 retn = BN_from_montgomery_word(ret,t,mont);
334 BN_CTX_end(ctx);
335 return retn;
336 }
337
338#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
339
340int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
341 BN_CTX *ctx)
342 {
343 int retn=0;
344
345#ifdef MONT_WORD
346 BIGNUM *n,*r;
347 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
348 int al,nl,max,i,x,ri;
349
350 BN_CTX_start(ctx);
351 if ((r = BN_CTX_get(ctx)) == NULL) goto err;
352
353 if (!BN_copy(r,a)) goto err;
354 n= &(mont->N);
355
356 ap=a->d;
357 /* mont->ri is the size of mont->N in bits (rounded up
358 to the word size) */
359 al=ri=mont->ri/BN_BITS2;
360
361 nl=n->top;
362 if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
363
364 max=(nl+al+1); /* allow for overflow (no?) XXX */
365 if (bn_wexpand(r,max) == NULL) goto err;
366
367 r->neg=a->neg^n->neg;
368 np=n->d;
369 rp=r->d;
370 nrp= &(r->d[nl]);
371
372 /* clear the top words of T */
373#if 1
374 for (i=r->top; i<max; i++) /* memset? XXX */
375 r->d[i]=0;
376#else
377 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
378#endif
379
380 r->top=max;
381 n0=mont->n0;
382
383#ifdef BN_COUNT
384 fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
385#endif
386 for (i=0; i<nl; i++)
387 {
388#ifdef __TANDEM
389 {
390 long long t1;
391 long long t2;
392 long long t3;
393 t1 = rp[0] * (n0 & 0177777);
394 t2 = 037777600000l;
395 t2 = n0 & t2;
396 t3 = rp[0] & 0177777;
397 t2 = (t3 * t2) & BN_MASK2;
398 t1 = t1 + t2;
399 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
400 }
401#else
402 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
403#endif
404 nrp++;
405 rp++;
406 if (((nrp[-1]+=v)&BN_MASK2) >= v)
407 continue;
408 else
409 {
410 if (((++nrp[0])&BN_MASK2) != 0) continue;
411 if (((++nrp[1])&BN_MASK2) != 0) continue;
412 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
413 }
414 }
415 bn_correct_top(r);
416
417 /* mont->ri will be a multiple of the word size and below code
418 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
419 if (r->top <= ri)
420 {
421 ret->top=0;
422 retn=1;
423 goto err;
424 }
425 al=r->top-ri;
426
427# define BRANCH_FREE 1
428# if BRANCH_FREE
429 if (bn_wexpand(ret,ri) == NULL) goto err;
430 x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
431 ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
432 ret->neg=r->neg;
433
434 rp=ret->d;
435 ap=&(r->d[ri]);
436
437 {
438 size_t m1,m2;
439
440 v=bn_sub_words(rp,ap,np,ri);
441 /* this ----------------^^ works even in al<ri case
442 * thanks to zealous zeroing of top of the vector in the
443 * beginning. */
444
445 /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
446 /* in other words if subtraction result is real, then
447 * trick unconditional memcpy below to perform in-place
448 * "refresh" instead of actual copy. */
449 m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
450 m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
451 m1|=m2; /* (al!=ri) */
452 m1|=(0-(size_t)v); /* (al!=ri || v) */
453 m1&=~m2; /* (al!=ri || v) && !al>ri */
454 nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
455 }
456
457 /* 'i<ri' is chosen to eliminate dependency on input data, even
458 * though it results in redundant copy in al<ri case. */
459 for (i=0,ri-=4; i<ri; i+=4)
460 {
461 BN_ULONG t1,t2,t3,t4;
462
463 t1=nrp[i+0];
464 t2=nrp[i+1];
465 t3=nrp[i+2]; ap[i+0]=0;
466 t4=nrp[i+3]; ap[i+1]=0;
467 rp[i+0]=t1; ap[i+2]=0;
468 rp[i+1]=t2; ap[i+3]=0;
469 rp[i+2]=t3;
470 rp[i+3]=t4;
471 }
472 for (ri+=4; i<ri; i++)
473 rp[i]=nrp[i], ap[i]=0;
474 bn_correct_top(r);
475 bn_correct_top(ret);
476# else
477 if (bn_wexpand(ret,al) == NULL) goto err;
478 ret->top=al;
479 ret->neg=r->neg;
480
481 rp=ret->d;
482 ap=&(r->d[ri]);
483 al-=4;
484 for (i=0; i<al; i+=4)
485 {
486 BN_ULONG t1,t2,t3,t4;
487
488 t1=ap[i+0];
489 t2=ap[i+1];
490 t3=ap[i+2];
491 t4=ap[i+3];
492 rp[i+0]=t1;
493 rp[i+1]=t2;
494 rp[i+2]=t3;
495 rp[i+3]=t4;
496 }
497 al+=4;
498 for (; i<al; i++)
499 rp[i]=ap[i];
500# endif
501#else /* !MONT_WORD */
502 BIGNUM *t1,*t2;
503
504 BN_CTX_start(ctx);
505 t1 = BN_CTX_get(ctx);
506 t2 = BN_CTX_get(ctx);
507 if (t1 == NULL || t2 == NULL) goto err;
508
509 if (!BN_copy(t1,a)) goto err;
510 BN_mask_bits(t1,mont->ri);
511
512 if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err;
513 BN_mask_bits(t2,mont->ri);
514
515 if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
516 if (!BN_add(t2,a,t1)) goto err;
517 if (!BN_rshift(ret,t2,mont->ri)) goto err;
518#endif /* MONT_WORD */
519
520#if !defined(BRANCH_FREE) || BRANCH_FREE==0
521 if (BN_ucmp(ret, &(mont->N)) >= 0)
522 {
523 if (!BN_usub(ret,ret,&(mont->N))) goto err;
524 }
525#endif
526 retn=1;
527 bn_check_top(ret);
528 err:
529 BN_CTX_end(ctx);
530 return(retn);
531 }
532#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
533
534BN_MONT_CTX *BN_MONT_CTX_new(void)
535 {
536 BN_MONT_CTX *ret;
537
538 if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
539 return(NULL);
540
541 BN_MONT_CTX_init(ret);
542 ret->flags=BN_FLG_MALLOCED;
543 return(ret);
544 }
545
546void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
547 {
548 ctx->ri=0;
549 BN_init(&(ctx->RR));
550 BN_init(&(ctx->N));
551 BN_init(&(ctx->Ni));
552#if 0 /* for OpenSSL 0.9.9 mont->n0 */
553 ctx->n0[0] = ctx->n0[1] = 0;
554#else
555 ctx->n0 = 0;
556#endif
557 ctx->flags=0;
558 }
559
560void BN_MONT_CTX_free(BN_MONT_CTX *mont)
561 {
562 if(mont == NULL)
563 return;
564
565 BN_free(&(mont->RR));
566 BN_free(&(mont->N));
567 BN_free(&(mont->Ni));
568 if (mont->flags & BN_FLG_MALLOCED)
569 OPENSSL_free(mont);
570 }
571
572int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
573 {
574 int ret = 0;
575 BIGNUM *Ri,*R;
576
577 BN_CTX_start(ctx);
578 if((Ri = BN_CTX_get(ctx)) == NULL) goto err;
579 R= &(mont->RR); /* grab RR as a temp */
580 if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
581 mont->N.neg = 0;
582
583#ifdef MONT_WORD
584 {
585 BIGNUM tmod;
586 BN_ULONG buf[2];
587
588 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
589 BN_zero(R);
590#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)",
591 only certain BN_BITS2<=32 platforms actually need this */
592 if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */
593#else
594 if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
595#endif
596
597 buf[0]=mod->d[0]; /* tmod = N mod word size */
598 buf[1]=0;
599
600 BN_init(&tmod);
601 tmod.d=buf;
602 tmod.top = buf[0] != 0 ? 1 : 0;
603 tmod.dmax=2;
604 tmod.neg=0;
605
606#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)";
607 only certain BN_BITS2<=32 platforms actually need this */
608 tmod.top=0;
609 if ((buf[0] = mod->d[0])) tmod.top=1;
610 if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2;
611
612 if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
613 goto err;
614 if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
615 if (!BN_is_zero(Ri))
616 {
617 if (!BN_sub_word(Ri,1)) goto err;
618 }
619 else /* if N mod word size == 1 */
620 {
621 if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
622 goto err;
623 /* Ri-- (mod double word size) */
624 Ri->neg=0;
625 Ri->d[0]=BN_MASK2;
626 Ri->d[1]=BN_MASK2;
627 Ri->top=2;
628 }
629 if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
630 /* Ni = (R*Ri-1)/N,
631 * keep only couple of least significant words: */
632 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
633 mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
634#else
635 /* Ri = R^-1 mod N*/
636 if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
637 goto err;
638 if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */
639 if (!BN_is_zero(Ri))
640 {
641 if (!BN_sub_word(Ri,1)) goto err;
642 }
643 else /* if N mod word size == 1 */
644 {
645 if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
646 }
647 if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
648 /* Ni = (R*Ri-1)/N,
649 * keep only least significant word: */
650# if 0 /* for OpenSSL 0.9.9 mont->n0 */
651 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
652 mont->n0[1] = 0;
653# else
654 mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
655# endif
656#endif
657 }
658#else /* !MONT_WORD */
659 { /* bignum version */
660 mont->ri=BN_num_bits(&mont->N);
661 BN_zero(R);
662 if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
663 /* Ri = R^-1 mod N*/
664 if ((BN_mod_inverse(Ri,R,&mont->N,ctx)) == NULL)
665 goto err;
666 if (!BN_lshift(Ri,Ri,mont->ri)) goto err; /* R*Ri */
667 if (!BN_sub_word(Ri,1)) goto err;
668 /* Ni = (R*Ri-1) / N */
669 if (!BN_div(&(mont->Ni),NULL,Ri,&mont->N,ctx)) goto err;
670 }
671#endif
672
673 /* setup RR for conversions */
674 BN_zero(&(mont->RR));
675 if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
676 if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
677
678 ret = 1;
679err:
680 BN_CTX_end(ctx);
681 return ret;
682 }
683
684BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
685 {
686 if (to == from) return(to);
687
688 if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
689 if (!BN_copy(&(to->N),&(from->N))) return NULL;
690 if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
691 to->ri=from->ri;
692#if 0 /* for OpenSSL 0.9.9 mont->n0 */
693 to->n0[0]=from->n0[0];
694 to->n0[1]=from->n0[1];
695#else
696 to->n0=from->n0;
697#endif
698 return(to);
699 }
700
701BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
702 const BIGNUM *mod, BN_CTX *ctx)
703 {
704 int got_write_lock = 0;
705 BN_MONT_CTX *ret;
706
707 CRYPTO_r_lock(lock);
708 if (!*pmont)
709 {
710 CRYPTO_r_unlock(lock);
711 CRYPTO_w_lock(lock);
712 got_write_lock = 1;
713
714 if (!*pmont)
715 {
716 ret = BN_MONT_CTX_new();
717 if (ret && !BN_MONT_CTX_set(ret, mod, ctx))
718 BN_MONT_CTX_free(ret);
719 else
720 *pmont = ret;
721 }
722 }
723
724 ret = *pmont;
725
726 if (got_write_lock)
727 CRYPTO_w_unlock(lock);
728 else
729 CRYPTO_r_unlock(lock);
730
731 return ret;
732 }
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
deleted file mode 100644
index a054d21aed..0000000000
--- a/src/lib/libcrypto/bn/bn_mpi.c
+++ /dev/null
@@ -1,130 +0,0 @@
1/* crypto/bn/bn_mpi.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
64 {
65 int bits;
66 int num=0;
67 int ext=0;
68 long l;
69
70 bits=BN_num_bits(a);
71 num=(bits+7)/8;
72 if (bits > 0)
73 {
74 ext=((bits & 0x07) == 0);
75 }
76 if (d == NULL)
77 return(num+4+ext);
78
79 l=num+ext;
80 d[0]=(unsigned char)(l>>24)&0xff;
81 d[1]=(unsigned char)(l>>16)&0xff;
82 d[2]=(unsigned char)(l>> 8)&0xff;
83 d[3]=(unsigned char)(l )&0xff;
84 if (ext) d[4]=0;
85 num=BN_bn2bin(a,&(d[4+ext]));
86 if (a->neg)
87 d[4]|=0x80;
88 return(num+4+ext);
89 }
90
91BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
92 {
93 long len;
94 int neg=0;
95
96 if (n < 4)
97 {
98 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
99 return(NULL);
100 }
101 len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
102 if ((len+4) != n)
103 {
104 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
105 return(NULL);
106 }
107
108 if (a == NULL) a=BN_new();
109 if (a == NULL) return(NULL);
110
111 if (len == 0)
112 {
113 a->neg=0;
114 a->top=0;
115 return(a);
116 }
117 d+=4;
118 if ((*d) & 0x80)
119 neg=1;
120 if (BN_bin2bn(d,(int)len,a) == NULL)
121 return(NULL);
122 a->neg=neg;
123 if (neg)
124 {
125 BN_clear_bit(a,BN_num_bits(a)-1);
126 }
127 bn_check_top(a);
128 return(a);
129 }
130
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
deleted file mode 100644
index b848c8cc60..0000000000
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ /dev/null
@@ -1,1169 +0,0 @@
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/* Here follows specialised variants of bn_add_words() and
71 bn_sub_words(). They have the property performing operations on
72 arrays of different sizes. The sizes of those arrays is expressed through
73 cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74 which is the delta between the two lengths, calculated as len(a)-len(b).
75 All lengths are the number of BN_ULONGs... For the operations that require
76 a result array as parameter, it must have the length cl+abs(dl).
77 These functions should probably end up in bn_asm.c as soon as there are
78 assembler counterparts for the systems that use assembler files. */
79
80BN_ULONG bn_sub_part_words(BN_ULONG *r,
81 const BN_ULONG *a, const BN_ULONG *b,
82 int cl, int dl)
83 {
84 BN_ULONG c, t;
85
86 assert(cl >= 0);
87 c = bn_sub_words(r, a, b, cl);
88
89 if (dl == 0)
90 return c;
91
92 r += cl;
93 a += cl;
94 b += cl;
95
96 if (dl < 0)
97 {
98#ifdef BN_COUNT
99 fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100#endif
101 for (;;)
102 {
103 t = b[0];
104 r[0] = (0-t-c)&BN_MASK2;
105 if (t != 0) c=1;
106 if (++dl >= 0) break;
107
108 t = b[1];
109 r[1] = (0-t-c)&BN_MASK2;
110 if (t != 0) c=1;
111 if (++dl >= 0) break;
112
113 t = b[2];
114 r[2] = (0-t-c)&BN_MASK2;
115 if (t != 0) c=1;
116 if (++dl >= 0) break;
117
118 t = b[3];
119 r[3] = (0-t-c)&BN_MASK2;
120 if (t != 0) c=1;
121 if (++dl >= 0) break;
122
123 b += 4;
124 r += 4;
125 }
126 }
127 else
128 {
129 int save_dl = dl;
130#ifdef BN_COUNT
131 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132#endif
133 while(c)
134 {
135 t = a[0];
136 r[0] = (t-c)&BN_MASK2;
137 if (t != 0) c=0;
138 if (--dl <= 0) break;
139
140 t = a[1];
141 r[1] = (t-c)&BN_MASK2;
142 if (t != 0) c=0;
143 if (--dl <= 0) break;
144
145 t = a[2];
146 r[2] = (t-c)&BN_MASK2;
147 if (t != 0) c=0;
148 if (--dl <= 0) break;
149
150 t = a[3];
151 r[3] = (t-c)&BN_MASK2;
152 if (t != 0) c=0;
153 if (--dl <= 0) break;
154
155 save_dl = dl;
156 a += 4;
157 r += 4;
158 }
159 if (dl > 0)
160 {
161#ifdef BN_COUNT
162 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163#endif
164 if (save_dl > dl)
165 {
166 switch (save_dl - dl)
167 {
168 case 1:
169 r[1] = a[1];
170 if (--dl <= 0) break;
171 case 2:
172 r[2] = a[2];
173 if (--dl <= 0) break;
174 case 3:
175 r[3] = a[3];
176 if (--dl <= 0) break;
177 }
178 a += 4;
179 r += 4;
180 }
181 }
182 if (dl > 0)
183 {
184#ifdef BN_COUNT
185 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186#endif
187 for(;;)
188 {
189 r[0] = a[0];
190 if (--dl <= 0) break;
191 r[1] = a[1];
192 if (--dl <= 0) break;
193 r[2] = a[2];
194 if (--dl <= 0) break;
195 r[3] = a[3];
196 if (--dl <= 0) break;
197
198 a += 4;
199 r += 4;
200 }
201 }
202 }
203 return c;
204 }
205#endif
206
207BN_ULONG bn_add_part_words(BN_ULONG *r,
208 const BN_ULONG *a, const BN_ULONG *b,
209 int cl, int dl)
210 {
211 BN_ULONG c, l, t;
212
213 assert(cl >= 0);
214 c = bn_add_words(r, a, b, cl);
215
216 if (dl == 0)
217 return c;
218
219 r += cl;
220 a += cl;
221 b += cl;
222
223 if (dl < 0)
224 {
225 int save_dl = dl;
226#ifdef BN_COUNT
227 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228#endif
229 while (c)
230 {
231 l=(c+b[0])&BN_MASK2;
232 c=(l < c);
233 r[0]=l;
234 if (++dl >= 0) break;
235
236 l=(c+b[1])&BN_MASK2;
237 c=(l < c);
238 r[1]=l;
239 if (++dl >= 0) break;
240
241 l=(c+b[2])&BN_MASK2;
242 c=(l < c);
243 r[2]=l;
244 if (++dl >= 0) break;
245
246 l=(c+b[3])&BN_MASK2;
247 c=(l < c);
248 r[3]=l;
249 if (++dl >= 0) break;
250
251 save_dl = dl;
252 b+=4;
253 r+=4;
254 }
255 if (dl < 0)
256 {
257#ifdef BN_COUNT
258 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259#endif
260 if (save_dl < dl)
261 {
262 switch (dl - save_dl)
263 {
264 case 1:
265 r[1] = b[1];
266 if (++dl >= 0) break;
267 case 2:
268 r[2] = b[2];
269 if (++dl >= 0) break;
270 case 3:
271 r[3] = b[3];
272 if (++dl >= 0) break;
273 }
274 b += 4;
275 r += 4;
276 }
277 }
278 if (dl < 0)
279 {
280#ifdef BN_COUNT
281 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282#endif
283 for(;;)
284 {
285 r[0] = b[0];
286 if (++dl >= 0) break;
287 r[1] = b[1];
288 if (++dl >= 0) break;
289 r[2] = b[2];
290 if (++dl >= 0) break;
291 r[3] = b[3];
292 if (++dl >= 0) break;
293
294 b += 4;
295 r += 4;
296 }
297 }
298 }
299 else
300 {
301 int save_dl = dl;
302#ifdef BN_COUNT
303 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304#endif
305 while (c)
306 {
307 t=(a[0]+c)&BN_MASK2;
308 c=(t < c);
309 r[0]=t;
310 if (--dl <= 0) break;
311
312 t=(a[1]+c)&BN_MASK2;
313 c=(t < c);
314 r[1]=t;
315 if (--dl <= 0) break;
316
317 t=(a[2]+c)&BN_MASK2;
318 c=(t < c);
319 r[2]=t;
320 if (--dl <= 0) break;
321
322 t=(a[3]+c)&BN_MASK2;
323 c=(t < c);
324 r[3]=t;
325 if (--dl <= 0) break;
326
327 save_dl = dl;
328 a+=4;
329 r+=4;
330 }
331#ifdef BN_COUNT
332 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333#endif
334 if (dl > 0)
335 {
336 if (save_dl > dl)
337 {
338 switch (save_dl - dl)
339 {
340 case 1:
341 r[1] = a[1];
342 if (--dl <= 0) break;
343 case 2:
344 r[2] = a[2];
345 if (--dl <= 0) break;
346 case 3:
347 r[3] = a[3];
348 if (--dl <= 0) break;
349 }
350 a += 4;
351 r += 4;
352 }
353 }
354 if (dl > 0)
355 {
356#ifdef BN_COUNT
357 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358#endif
359 for(;;)
360 {
361 r[0] = a[0];
362 if (--dl <= 0) break;
363 r[1] = a[1];
364 if (--dl <= 0) break;
365 r[2] = a[2];
366 if (--dl <= 0) break;
367 r[3] = a[3];
368 if (--dl <= 0) break;
369
370 a += 4;
371 r += 4;
372 }
373 }
374 }
375 return c;
376 }
377
378#ifdef BN_RECURSION
379/* Karatsuba recursive multiplication algorithm
380 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381
382/* r is 2*n2 words in size,
383 * a and b are both n2 words in size.
384 * n2 must be a power of 2.
385 * We multiply and return the result.
386 * t must be 2*n2 words in size
387 * We calculate
388 * a[0]*b[0]
389 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390 * a[1]*b[1]
391 */
392/* dnX may not be positive, but n2/2+dnX has to be */
393void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
394 int dna, int dnb, BN_ULONG *t)
395 {
396 int n=n2/2,c1,c2;
397 int tna=n+dna, tnb=n+dnb;
398 unsigned int neg,zero;
399 BN_ULONG ln,lo,*p;
400
401# ifdef BN_COUNT
402 fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
403# endif
404# ifdef BN_MUL_COMBA
405# if 0
406 if (n2 == 4)
407 {
408 bn_mul_comba4(r,a,b);
409 return;
410 }
411# endif
412 /* Only call bn_mul_comba 8 if n2 == 8 and the
413 * two arrays are complete [steve]
414 */
415 if (n2 == 8 && dna == 0 && dnb == 0)
416 {
417 bn_mul_comba8(r,a,b);
418 return;
419 }
420# endif /* BN_MUL_COMBA */
421 /* Else do normal multiply */
422 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
423 {
424 bn_mul_normal(r,a,n2+dna,b,n2+dnb);
425 if ((dna + dnb) < 0)
426 memset(&r[2*n2 + dna + dnb], 0,
427 sizeof(BN_ULONG) * -(dna + dnb));
428 return;
429 }
430 /* r=(a[0]-a[1])*(b[1]-b[0]) */
431 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
432 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
433 zero=neg=0;
434 switch (c1*3+c2)
435 {
436 case -4:
437 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
438 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
439 break;
440 case -3:
441 zero=1;
442 break;
443 case -2:
444 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
445 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
446 neg=1;
447 break;
448 case -1:
449 case 0:
450 case 1:
451 zero=1;
452 break;
453 case 2:
454 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
455 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
456 neg=1;
457 break;
458 case 3:
459 zero=1;
460 break;
461 case 4:
462 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
463 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
464 break;
465 }
466
467# ifdef BN_MUL_COMBA
468 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
469 extra args to do this well */
470 {
471 if (!zero)
472 bn_mul_comba4(&(t[n2]),t,&(t[n]));
473 else
474 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
475
476 bn_mul_comba4(r,a,b);
477 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
478 }
479 else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
480 take extra args to do this
481 well */
482 {
483 if (!zero)
484 bn_mul_comba8(&(t[n2]),t,&(t[n]));
485 else
486 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
487
488 bn_mul_comba8(r,a,b);
489 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
490 }
491 else
492# endif /* BN_MUL_COMBA */
493 {
494 p= &(t[n2*2]);
495 if (!zero)
496 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
497 else
498 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
499 bn_mul_recursive(r,a,b,n,0,0,p);
500 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
501 }
502
503 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
504 * r[10] holds (a[0]*b[0])
505 * r[32] holds (b[1]*b[1])
506 */
507
508 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
509
510 if (neg) /* if t[32] is negative */
511 {
512 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
513 }
514 else
515 {
516 /* Might have a carry */
517 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
518 }
519
520 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
521 * r[10] holds (a[0]*b[0])
522 * r[32] holds (b[1]*b[1])
523 * c1 holds the carry bits
524 */
525 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
526 if (c1)
527 {
528 p= &(r[n+n2]);
529 lo= *p;
530 ln=(lo+c1)&BN_MASK2;
531 *p=ln;
532
533 /* The overflow will stop before we over write
534 * words we should not overwrite */
535 if (ln < (BN_ULONG)c1)
536 {
537 do {
538 p++;
539 lo= *p;
540 ln=(lo+1)&BN_MASK2;
541 *p=ln;
542 } while (ln == 0);
543 }
544 }
545 }
546
547/* n+tn is the word length
548 * t needs to be n*4 is size, as does r */
549/* tnX may not be negative but less than n */
550void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
551 int tna, int tnb, BN_ULONG *t)
552 {
553 int i,j,n2=n*2;
554 int c1,c2,neg,zero;
555 BN_ULONG ln,lo,*p;
556
557# ifdef BN_COUNT
558 fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
559 n, tna, n, tnb);
560# endif
561 if (n < 8)
562 {
563 bn_mul_normal(r,a,n+tna,b,n+tnb);
564 return;
565 }
566
567 /* r=(a[0]-a[1])*(b[1]-b[0]) */
568 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
569 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
570 zero=neg=0;
571 switch (c1*3+c2)
572 {
573 case -4:
574 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
575 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
576 break;
577 case -3:
578 zero=1;
579 /* break; */
580 case -2:
581 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
582 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
583 neg=1;
584 break;
585 case -1:
586 case 0:
587 case 1:
588 zero=1;
589 /* break; */
590 case 2:
591 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
592 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
593 neg=1;
594 break;
595 case 3:
596 zero=1;
597 /* break; */
598 case 4:
599 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
600 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
601 break;
602 }
603 /* The zero case isn't yet implemented here. The speedup
604 would probably be negligible. */
605# if 0
606 if (n == 4)
607 {
608 bn_mul_comba4(&(t[n2]),t,&(t[n]));
609 bn_mul_comba4(r,a,b);
610 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
611 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
612 }
613 else
614# endif
615 if (n == 8)
616 {
617 bn_mul_comba8(&(t[n2]),t,&(t[n]));
618 bn_mul_comba8(r,a,b);
619 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
620 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
621 }
622 else
623 {
624 p= &(t[n2*2]);
625 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
626 bn_mul_recursive(r,a,b,n,0,0,p);
627 i=n/2;
628 /* If there is only a bottom half to the number,
629 * just do it */
630 if (tna > tnb)
631 j = tna - i;
632 else
633 j = tnb - i;
634 if (j == 0)
635 {
636 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
637 i,tna-i,tnb-i,p);
638 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
639 }
640 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
641 {
642 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
643 i,tna-i,tnb-i,p);
644 memset(&(r[n2+tna+tnb]),0,
645 sizeof(BN_ULONG)*(n2-tna-tnb));
646 }
647 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
648 {
649 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
650 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
651 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
652 {
653 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
654 }
655 else
656 {
657 for (;;)
658 {
659 i/=2;
660 /* these simplified conditions work
661 * exclusively because difference
662 * between tna and tnb is 1 or 0 */
663 if (i < tna || i < tnb)
664 {
665 bn_mul_part_recursive(&(r[n2]),
666 &(a[n]),&(b[n]),
667 i,tna-i,tnb-i,p);
668 break;
669 }
670 else if (i == tna || i == tnb)
671 {
672 bn_mul_recursive(&(r[n2]),
673 &(a[n]),&(b[n]),
674 i,tna-i,tnb-i,p);
675 break;
676 }
677 }
678 }
679 }
680 }
681
682 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
683 * r[10] holds (a[0]*b[0])
684 * r[32] holds (b[1]*b[1])
685 */
686
687 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
688
689 if (neg) /* if t[32] is negative */
690 {
691 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
692 }
693 else
694 {
695 /* Might have a carry */
696 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
697 }
698
699 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
700 * r[10] holds (a[0]*b[0])
701 * r[32] holds (b[1]*b[1])
702 * c1 holds the carry bits
703 */
704 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
705 if (c1)
706 {
707 p= &(r[n+n2]);
708 lo= *p;
709 ln=(lo+c1)&BN_MASK2;
710 *p=ln;
711
712 /* The overflow will stop before we over write
713 * words we should not overwrite */
714 if (ln < (BN_ULONG)c1)
715 {
716 do {
717 p++;
718 lo= *p;
719 ln=(lo+1)&BN_MASK2;
720 *p=ln;
721 } while (ln == 0);
722 }
723 }
724 }
725
726/* a and b must be the same size, which is n2.
727 * r needs to be n2 words and t needs to be n2*2
728 */
729void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
730 BN_ULONG *t)
731 {
732 int n=n2/2;
733
734# ifdef BN_COUNT
735 fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
736# endif
737
738 bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
739 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
740 {
741 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
742 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
743 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
744 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
745 }
746 else
747 {
748 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
749 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
750 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
751 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
752 }
753 }
754
755/* a and b must be the same size, which is n2.
756 * r needs to be n2 words and t needs to be n2*2
757 * l is the low words of the output.
758 * t needs to be n2*3
759 */
760void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
761 BN_ULONG *t)
762 {
763 int i,n;
764 int c1,c2;
765 int neg,oneg,zero;
766 BN_ULONG ll,lc,*lp,*mp;
767
768# ifdef BN_COUNT
769 fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
770# endif
771 n=n2/2;
772
773 /* Calculate (al-ah)*(bh-bl) */
774 neg=zero=0;
775 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
776 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
777 switch (c1*3+c2)
778 {
779 case -4:
780 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
781 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
782 break;
783 case -3:
784 zero=1;
785 break;
786 case -2:
787 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
788 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
789 neg=1;
790 break;
791 case -1:
792 case 0:
793 case 1:
794 zero=1;
795 break;
796 case 2:
797 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
798 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
799 neg=1;
800 break;
801 case 3:
802 zero=1;
803 break;
804 case 4:
805 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
806 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
807 break;
808 }
809
810 oneg=neg;
811 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
812 /* r[10] = (a[1]*b[1]) */
813# ifdef BN_MUL_COMBA
814 if (n == 8)
815 {
816 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
817 bn_mul_comba8(r,&(a[n]),&(b[n]));
818 }
819 else
820# endif
821 {
822 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
823 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
824 }
825
826 /* s0 == low(al*bl)
827 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
828 * We know s0 and s1 so the only unknown is high(al*bl)
829 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
830 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
831 */
832 if (l != NULL)
833 {
834 lp= &(t[n2+n]);
835 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
836 }
837 else
838 {
839 c1=0;
840 lp= &(r[0]);
841 }
842
843 if (neg)
844 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
845 else
846 {
847 bn_add_words(&(t[n2]),lp,&(t[0]),n);
848 neg=0;
849 }
850
851 if (l != NULL)
852 {
853 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
854 }
855 else
856 {
857 lp= &(t[n2+n]);
858 mp= &(t[n2]);
859 for (i=0; i<n; i++)
860 lp[i]=((~mp[i])+1)&BN_MASK2;
861 }
862
863 /* s[0] = low(al*bl)
864 * t[3] = high(al*bl)
865 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866 * r[10] = (a[1]*b[1])
867 */
868 /* R[10] = al*bl
869 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
870 * R[32] = ah*bh
871 */
872 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
873 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
874 * R[3]=r[1]+(carry/borrow)
875 */
876 if (l != NULL)
877 {
878 lp= &(t[n2]);
879 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
880 }
881 else
882 {
883 lp= &(t[n2+n]);
884 c1=0;
885 }
886 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
887 if (oneg)
888 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
889 else
890 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
891
892 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
893 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
894 if (oneg)
895 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
896 else
897 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
898
899 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
900 {
901 i=0;
902 if (c1 > 0)
903 {
904 lc=c1;
905 do {
906 ll=(r[i]+lc)&BN_MASK2;
907 r[i++]=ll;
908 lc=(lc > ll);
909 } while (lc);
910 }
911 else
912 {
913 lc= -c1;
914 do {
915 ll=r[i];
916 r[i++]=(ll-lc)&BN_MASK2;
917 lc=(lc > ll);
918 } while (lc);
919 }
920 }
921 if (c2 != 0) /* Add starting at r[1] */
922 {
923 i=n;
924 if (c2 > 0)
925 {
926 lc=c2;
927 do {
928 ll=(r[i]+lc)&BN_MASK2;
929 r[i++]=ll;
930 lc=(lc > ll);
931 } while (lc);
932 }
933 else
934 {
935 lc= -c2;
936 do {
937 ll=r[i];
938 r[i++]=(ll-lc)&BN_MASK2;
939 lc=(lc > ll);
940 } while (lc);
941 }
942 }
943 }
944#endif /* BN_RECURSION */
945
946int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
947 {
948 int ret=0;
949 int top,al,bl;
950 BIGNUM *rr;
951#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
952 int i;
953#endif
954#ifdef BN_RECURSION
955 BIGNUM *t=NULL;
956 int j=0,k;
957#endif
958
959#ifdef BN_COUNT
960 fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
961#endif
962
963 bn_check_top(a);
964 bn_check_top(b);
965 bn_check_top(r);
966
967 al=a->top;
968 bl=b->top;
969
970 if ((al == 0) || (bl == 0))
971 {
972 BN_zero(r);
973 return(1);
974 }
975 top=al+bl;
976
977 BN_CTX_start(ctx);
978 if ((r == a) || (r == b))
979 {
980 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
981 }
982 else
983 rr = r;
984 rr->neg=a->neg^b->neg;
985
986#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
987 i = al-bl;
988#endif
989#ifdef BN_MUL_COMBA
990 if (i == 0)
991 {
992# if 0
993 if (al == 4)
994 {
995 if (bn_wexpand(rr,8) == NULL) goto err;
996 rr->top=8;
997 bn_mul_comba4(rr->d,a->d,b->d);
998 goto end;
999 }
1000# endif
1001 if (al == 8)
1002 {
1003 if (bn_wexpand(rr,16) == NULL) goto err;
1004 rr->top=16;
1005 bn_mul_comba8(rr->d,a->d,b->d);
1006 goto end;
1007 }
1008 }
1009#endif /* BN_MUL_COMBA */
1010#ifdef BN_RECURSION
1011 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1012 {
1013 if (i >= -1 && i <= 1)
1014 {
1015 int sav_j =0;
1016 /* Find out the power of two lower or equal
1017 to the longest of the two numbers */
1018 if (i >= 0)
1019 {
1020 j = BN_num_bits_word((BN_ULONG)al);
1021 }
1022 if (i == -1)
1023 {
1024 j = BN_num_bits_word((BN_ULONG)bl);
1025 }
1026 sav_j = j;
1027 j = 1<<(j-1);
1028 assert(j <= al || j <= bl);
1029 k = j+j;
1030 t = BN_CTX_get(ctx);
1031 if (al > j || bl > j)
1032 {
1033 bn_wexpand(t,k*4);
1034 bn_wexpand(rr,k*4);
1035 bn_mul_part_recursive(rr->d,a->d,b->d,
1036 j,al-j,bl-j,t->d);
1037 }
1038 else /* al <= j || bl <= j */
1039 {
1040 bn_wexpand(t,k*2);
1041 bn_wexpand(rr,k*2);
1042 bn_mul_recursive(rr->d,a->d,b->d,
1043 j,al-j,bl-j,t->d);
1044 }
1045 rr->top=top;
1046 goto end;
1047 }
1048#if 0
1049 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1050 {
1051 BIGNUM *tmp_bn = (BIGNUM *)b;
1052 if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1053 tmp_bn->d[bl]=0;
1054 bl++;
1055 i--;
1056 }
1057 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1058 {
1059 BIGNUM *tmp_bn = (BIGNUM *)a;
1060 if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1061 tmp_bn->d[al]=0;
1062 al++;
1063 i++;
1064 }
1065 if (i == 0)
1066 {
1067 /* symmetric and > 4 */
1068 /* 16 or larger */
1069 j=BN_num_bits_word((BN_ULONG)al);
1070 j=1<<(j-1);
1071 k=j+j;
1072 t = BN_CTX_get(ctx);
1073 if (al == j) /* exact multiple */
1074 {
1075 if (bn_wexpand(t,k*2) == NULL) goto err;
1076 if (bn_wexpand(rr,k*2) == NULL) goto err;
1077 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1078 }
1079 else
1080 {
1081 if (bn_wexpand(t,k*4) == NULL) goto err;
1082 if (bn_wexpand(rr,k*4) == NULL) goto err;
1083 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1084 }
1085 rr->top=top;
1086 goto end;
1087 }
1088#endif
1089 }
1090#endif /* BN_RECURSION */
1091 if (bn_wexpand(rr,top) == NULL) goto err;
1092 rr->top=top;
1093 bn_mul_normal(rr->d,a->d,al,b->d,bl);
1094
1095#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1096end:
1097#endif
1098 bn_correct_top(rr);
1099 if (r != rr) BN_copy(r,rr);
1100 ret=1;
1101err:
1102 bn_check_top(r);
1103 BN_CTX_end(ctx);
1104 return(ret);
1105 }
1106
1107void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1108 {
1109 BN_ULONG *rr;
1110
1111#ifdef BN_COUNT
1112 fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1113#endif
1114
1115 if (na < nb)
1116 {
1117 int itmp;
1118 BN_ULONG *ltmp;
1119
1120 itmp=na; na=nb; nb=itmp;
1121 ltmp=a; a=b; b=ltmp;
1122
1123 }
1124 rr= &(r[na]);
1125 if (nb <= 0)
1126 {
1127 (void)bn_mul_words(r,a,na,0);
1128 return;
1129 }
1130 else
1131 rr[0]=bn_mul_words(r,a,na,b[0]);
1132
1133 for (;;)
1134 {
1135 if (--nb <= 0) return;
1136 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1137 if (--nb <= 0) return;
1138 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1139 if (--nb <= 0) return;
1140 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1141 if (--nb <= 0) return;
1142 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1143 rr+=4;
1144 r+=4;
1145 b+=4;
1146 }
1147 }
1148
1149void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1150 {
1151#ifdef BN_COUNT
1152 fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1153#endif
1154 bn_mul_words(r,a,n,b[0]);
1155
1156 for (;;)
1157 {
1158 if (--n <= 0) return;
1159 bn_mul_add_words(&(r[1]),a,n,b[1]);
1160 if (--n <= 0) return;
1161 bn_mul_add_words(&(r[2]),a,n,b[2]);
1162 if (--n <= 0) return;
1163 bn_mul_add_words(&(r[3]),a,n,b[3]);
1164 if (--n <= 0) return;
1165 bn_mul_add_words(&(r[4]),a,n,b[4]);
1166 r+=4;
1167 b+=4;
1168 }
1169 }
diff --git a/src/lib/libcrypto/bn/bn_nist.c b/src/lib/libcrypto/bn/bn_nist.c
deleted file mode 100644
index 2ca5b01391..0000000000
--- a/src/lib/libcrypto/bn/bn_nist.c
+++ /dev/null
@@ -1,836 +0,0 @@
1/* crypto/bn/bn_nist.c */
2/*
3 * Written by Nils Larsch for the OpenSSL project
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include "bn_lcl.h"
60#include "cryptlib.h"
61
62
63#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
64#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
65#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
66#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
67#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
68
69/* pre-computed tables are "carry-less" values of modulus*(i+1) */
70#if BN_BITS2 == 64
71static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
72 {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL},
73 {0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL},
74 {0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFCULL,0xFFFFFFFFFFFFFFFFULL}
75 };
76static const BN_ULONG _nist_p_192_sqr[] = {
77 0x0000000000000001ULL,0x0000000000000002ULL,0x0000000000000001ULL,
78 0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL
79 };
80static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
81 {0x0000000000000001ULL,0xFFFFFFFF00000000ULL,
82 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL},
83 {0x0000000000000002ULL,0xFFFFFFFE00000000ULL,
84 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFFULL} /* this one is "carry-full" */
85 };
86static const BN_ULONG _nist_p_224_sqr[] = {
87 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
88 0xFFFFFFFFFFFFFFFFULL,0x0000000200000000ULL,
89 0x0000000000000000ULL,0xFFFFFFFFFFFFFFFEULL,
90 0xFFFFFFFFFFFFFFFFULL
91 };
92static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
93 {0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL,
94 0x0000000000000000ULL,0xFFFFFFFF00000001ULL},
95 {0xFFFFFFFFFFFFFFFEULL,0x00000001FFFFFFFFULL,
96 0x0000000000000000ULL,0xFFFFFFFE00000002ULL},
97 {0xFFFFFFFFFFFFFFFDULL,0x00000002FFFFFFFFULL,
98 0x0000000000000000ULL,0xFFFFFFFD00000003ULL},
99 {0xFFFFFFFFFFFFFFFCULL,0x00000003FFFFFFFFULL,
100 0x0000000000000000ULL,0xFFFFFFFC00000004ULL},
101 {0xFFFFFFFFFFFFFFFBULL,0x00000004FFFFFFFFULL,
102 0x0000000000000000ULL,0xFFFFFFFB00000005ULL},
103 };
104static const BN_ULONG _nist_p_256_sqr[] = {
105 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
106 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFEULL,
107 0x00000001FFFFFFFEULL,0x00000001FFFFFFFEULL,
108 0xFFFFFFFE00000001ULL,0xFFFFFFFE00000002ULL
109 };
110static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
111 {0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,0xFFFFFFFFFFFFFFFEULL,
112 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
113 {0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
114 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
115 {0x00000002FFFFFFFDULL,0xFFFFFFFD00000000ULL,0xFFFFFFFFFFFFFFFCULL,
116 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
117 {0x00000003FFFFFFFCULL,0xFFFFFFFC00000000ULL,0xFFFFFFFFFFFFFFFBULL,
118 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
119 {0x00000004FFFFFFFBULL,0xFFFFFFFB00000000ULL,0xFFFFFFFFFFFFFFFAULL,
120 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
121 };
122static const BN_ULONG _nist_p_384_sqr[] = {
123 0xFFFFFFFE00000001ULL,0x0000000200000000ULL,0xFFFFFFFE00000000ULL,
124 0x0000000200000000ULL,0x0000000000000001ULL,0x0000000000000000ULL,
125 0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
126 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL
127 };
128static const BN_ULONG _nist_p_521[] =
129 {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
130 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
131 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
132 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
133 0x00000000000001FFULL};
134static const BN_ULONG _nist_p_521_sqr[] = {
135 0x0000000000000001ULL,0x0000000000000000ULL,0x0000000000000000ULL,
136 0x0000000000000000ULL,0x0000000000000000ULL,0x0000000000000000ULL,
137 0x0000000000000000ULL,0x0000000000000000ULL,0xFFFFFFFFFFFFFC00ULL,
138 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
139 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
140 0xFFFFFFFFFFFFFFFFULL,0x000000000003FFFFULL
141 };
142#elif BN_BITS2 == 32
143static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
144 {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
145 {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
146 {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
147 };
148static const BN_ULONG _nist_p_192_sqr[] = {
149 0x00000001,0x00000000,0x00000002,0x00000000,0x00000001,0x00000000,
150 0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
151 };
152static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
153 {0x00000001,0x00000000,0x00000000,0xFFFFFFFF,
154 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
155 {0x00000002,0x00000000,0x00000000,0xFFFFFFFE,
156 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
157 };
158static const BN_ULONG _nist_p_224_sqr[] = {
159 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
160 0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000002,
161 0x00000000,0x00000000,0xFFFFFFFE,0xFFFFFFFF,
162 0xFFFFFFFF,0xFFFFFFFF
163 };
164static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
165 {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
166 0x00000000,0x00000000,0x00000001,0xFFFFFFFF},
167 {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0x00000001,
168 0x00000000,0x00000000,0x00000002,0xFFFFFFFE},
169 {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0x00000002,
170 0x00000000,0x00000000,0x00000003,0xFFFFFFFD},
171 {0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0x00000003,
172 0x00000000,0x00000000,0x00000004,0xFFFFFFFC},
173 {0xFFFFFFFB,0xFFFFFFFF,0xFFFFFFFF,0x00000004,
174 0x00000000,0x00000000,0x00000005,0xFFFFFFFB},
175 };
176static const BN_ULONG _nist_p_256_sqr[] = {
177 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
178 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0x00000001,
179 0xFFFFFFFE,0x00000001,0xFFFFFFFE,0x00000001,
180 0x00000001,0xFFFFFFFE,0x00000002,0xFFFFFFFE
181 };
182static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
183 {0xFFFFFFFF,0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
184 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
185 {0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
186 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
187 {0xFFFFFFFD,0x00000002,0x00000000,0xFFFFFFFD,0xFFFFFFFC,0xFFFFFFFF,
188 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
189 {0xFFFFFFFC,0x00000003,0x00000000,0xFFFFFFFC,0xFFFFFFFB,0xFFFFFFFF,
190 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
191 {0xFFFFFFFB,0x00000004,0x00000000,0xFFFFFFFB,0xFFFFFFFA,0xFFFFFFFF,
192 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
193 };
194static const BN_ULONG _nist_p_384_sqr[] = {
195 0x00000001,0xFFFFFFFE,0x00000000,0x00000002,0x00000000,0xFFFFFFFE,
196 0x00000000,0x00000002,0x00000001,0x00000000,0x00000000,0x00000000,
197 0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
198 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
199 };
200static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
201 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
202 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
203 0xFFFFFFFF,0x000001FF};
204static const BN_ULONG _nist_p_521_sqr[] = {
205 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
206 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
207 0x00000000,0x00000000,0x00000000,0x00000000,0xFFFFFC00,0xFFFFFFFF,
208 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
209 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
210 0xFFFFFFFF,0xFFFFFFFF,0x0003FFFF
211 };
212#else
213#error "unsupported BN_BITS2"
214#endif
215
216
217static const BIGNUM _bignum_nist_p_192 =
218 {
219 (BN_ULONG *)_nist_p_192[0],
220 BN_NIST_192_TOP,
221 BN_NIST_192_TOP,
222 0,
223 BN_FLG_STATIC_DATA
224 };
225
226static const BIGNUM _bignum_nist_p_224 =
227 {
228 (BN_ULONG *)_nist_p_224[0],
229 BN_NIST_224_TOP,
230 BN_NIST_224_TOP,
231 0,
232 BN_FLG_STATIC_DATA
233 };
234
235static const BIGNUM _bignum_nist_p_256 =
236 {
237 (BN_ULONG *)_nist_p_256[0],
238 BN_NIST_256_TOP,
239 BN_NIST_256_TOP,
240 0,
241 BN_FLG_STATIC_DATA
242 };
243
244static const BIGNUM _bignum_nist_p_384 =
245 {
246 (BN_ULONG *)_nist_p_384[0],
247 BN_NIST_384_TOP,
248 BN_NIST_384_TOP,
249 0,
250 BN_FLG_STATIC_DATA
251 };
252
253static const BIGNUM _bignum_nist_p_521 =
254 {
255 (BN_ULONG *)_nist_p_521,
256 BN_NIST_521_TOP,
257 BN_NIST_521_TOP,
258 0,
259 BN_FLG_STATIC_DATA
260 };
261
262
263const BIGNUM *BN_get0_nist_prime_192(void)
264 {
265 return &_bignum_nist_p_192;
266 }
267
268const BIGNUM *BN_get0_nist_prime_224(void)
269 {
270 return &_bignum_nist_p_224;
271 }
272
273const BIGNUM *BN_get0_nist_prime_256(void)
274 {
275 return &_bignum_nist_p_256;
276 }
277
278const BIGNUM *BN_get0_nist_prime_384(void)
279 {
280 return &_bignum_nist_p_384;
281 }
282
283const BIGNUM *BN_get0_nist_prime_521(void)
284 {
285 return &_bignum_nist_p_521;
286 }
287
288
289static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
290 {
291 int i;
292 BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
293
294#ifdef BN_DEBUG
295 OPENSSL_assert(top <= max);
296#endif
297 for (i = (top); i != 0; i--)
298 *_tmp1++ = *_tmp2++;
299 for (i = (max) - (top); i != 0; i--)
300 *_tmp1++ = (BN_ULONG) 0;
301 }
302
303static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
304 {
305 int i;
306 BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
307 for (i = (top); i != 0; i--)
308 *_tmp1++ = *_tmp2++;
309 }
310
311#if BN_BITS2 == 64
312#define bn_cp_64(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
313#define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0;
314/*
315 * two following macros are implemented under assumption that they
316 * are called in a sequence with *ascending* n, i.e. as they are...
317 */
318#define bn_cp_32_naked(to, n, from, m) (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
322#else
323#define bn_cp_64(to, n, from, m) \
324 { \
325 bn_cp_32(to, (n)*2, from, (m)*2); \
326 bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
327 }
328#define bn_64_set_0(to, n) \
329 { \
330 bn_32_set_0(to, (n)*2); \
331 bn_32_set_0(to, (n)*2+1); \
332 }
333#if BN_BITS2 == 32
334#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
335#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
336#endif
337#endif /* BN_BITS2 != 64 */
338
339
340#define nist_set_192(to, from, a1, a2, a3) \
341 { \
342 bn_cp_64(to, 0, from, (a3) - 3) \
343 bn_cp_64(to, 1, from, (a2) - 3) \
344 bn_cp_64(to, 2, from, (a1) - 3) \
345 }
346
347int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
348 BN_CTX *ctx)
349 {
350 int top = a->top, i;
351 int carry;
352 register BN_ULONG *r_d, *a_d = a->d;
353 BN_ULONG t_d[BN_NIST_192_TOP],
354 buf[BN_NIST_192_TOP],
355 c_d[BN_NIST_192_TOP],
356 *res;
357 size_t mask;
358 static const BIGNUM _bignum_nist_p_192_sqr = {
359 (BN_ULONG *)_nist_p_192_sqr,
360 sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
361 sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
362 0,BN_FLG_STATIC_DATA };
363
364 field = &_bignum_nist_p_192; /* just to make sure */
365
366 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_192_sqr)>=0)
367 return BN_nnmod(r, a, field, ctx);
368
369 i = BN_ucmp(field, a);
370 if (i == 0)
371 {
372 BN_zero(r);
373 return 1;
374 }
375 else if (i > 0)
376 return (r == a) ? 1 : (BN_copy(r ,a) != NULL);
377
378 if (r != a)
379 {
380 if (!bn_wexpand(r, BN_NIST_192_TOP))
381 return 0;
382 r_d = r->d;
383 nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
384 }
385 else
386 r_d = a_d;
387
388 nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
389
390 nist_set_192(t_d, buf, 0, 3, 3);
391 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
392 nist_set_192(t_d, buf, 4, 4, 0);
393 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
394 nist_set_192(t_d, buf, 5, 5, 5)
395 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
396
397 if (carry > 0)
398 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
399 else
400 carry = 1;
401
402 /*
403 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
404 * as comparison implies subtraction, we can write
405 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
406 * this is what happens below, but without explicit if:-) a.
407 */
408 mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
409 mask &= 0-(size_t)carry;
410 res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
411 nist_cp_bn(r_d, res, BN_NIST_192_TOP);
412 r->top = BN_NIST_192_TOP;
413 bn_correct_top(r);
414
415 return 1;
416 }
417
418typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *,const BN_ULONG *,const BN_ULONG *,int);
419
420#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
421 { \
422 bn_cp_32(to, 0, from, (a7) - 7) \
423 bn_cp_32(to, 1, from, (a6) - 7) \
424 bn_cp_32(to, 2, from, (a5) - 7) \
425 bn_cp_32(to, 3, from, (a4) - 7) \
426 bn_cp_32(to, 4, from, (a3) - 7) \
427 bn_cp_32(to, 5, from, (a2) - 7) \
428 bn_cp_32(to, 6, from, (a1) - 7) \
429 }
430
431int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
432 BN_CTX *ctx)
433 {
434 int top = a->top, i;
435 int carry;
436 BN_ULONG *r_d, *a_d = a->d;
437 BN_ULONG t_d[BN_NIST_224_TOP],
438 buf[BN_NIST_224_TOP],
439 c_d[BN_NIST_224_TOP],
440 *res;
441 size_t mask;
442 union { bn_addsub_f f; size_t p; } u;
443 static const BIGNUM _bignum_nist_p_224_sqr = {
444 (BN_ULONG *)_nist_p_224_sqr,
445 sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
446 sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
447 0,BN_FLG_STATIC_DATA };
448
449
450 field = &_bignum_nist_p_224; /* just to make sure */
451
452 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_224_sqr)>=0)
453 return BN_nnmod(r, a, field, ctx);
454
455 i = BN_ucmp(field, a);
456 if (i == 0)
457 {
458 BN_zero(r);
459 return 1;
460 }
461 else if (i > 0)
462 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
463
464 if (r != a)
465 {
466 if (!bn_wexpand(r, BN_NIST_224_TOP))
467 return 0;
468 r_d = r->d;
469 nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
470 }
471 else
472 r_d = a_d;
473
474#if BN_BITS2==64
475 /* copy upper 256 bits of 448 bit number ... */
476 nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
477 /* ... and right shift by 32 to obtain upper 224 bits */
478 nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8);
479 /* truncate lower part to 224 bits too */
480 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
481#else
482 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
483#endif
484 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
485 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
486 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
487 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
488 nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
489 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
490 nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
491 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
492
493#if BN_BITS2==64
494 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
495#endif
496 u.f = bn_sub_words;
497 if (carry > 0)
498 {
499 carry = (int)bn_sub_words(r_d,r_d,_nist_p_224[carry-1],BN_NIST_224_TOP);
500#if BN_BITS2==64
501 carry=(int)(~(r_d[BN_NIST_224_TOP-1]>>32))&1;
502#endif
503 }
504 else if (carry < 0)
505 {
506 /* it's a bit more comlicated logic in this case.
507 * if bn_add_words yields no carry, then result
508 * has to be adjusted by unconditionally *adding*
509 * the modulus. but if it does, then result has
510 * to be compared to the modulus and conditionally
511 * adjusted by *subtracting* the latter. */
512 carry = (int)bn_add_words(r_d,r_d,_nist_p_224[-carry-1],BN_NIST_224_TOP);
513 mask = 0-(size_t)carry;
514 u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
515 }
516 else
517 carry = 1;
518
519 /* otherwise it's effectively same as in BN_nist_mod_192... */
520 mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
521 mask &= 0-(size_t)carry;
522 res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
523 nist_cp_bn(r_d, res, BN_NIST_224_TOP);
524 r->top = BN_NIST_224_TOP;
525 bn_correct_top(r);
526
527 return 1;
528 }
529
530#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
531 { \
532 bn_cp_32(to, 0, from, (a8) - 8) \
533 bn_cp_32(to, 1, from, (a7) - 8) \
534 bn_cp_32(to, 2, from, (a6) - 8) \
535 bn_cp_32(to, 3, from, (a5) - 8) \
536 bn_cp_32(to, 4, from, (a4) - 8) \
537 bn_cp_32(to, 5, from, (a3) - 8) \
538 bn_cp_32(to, 6, from, (a2) - 8) \
539 bn_cp_32(to, 7, from, (a1) - 8) \
540 }
541
542int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
543 BN_CTX *ctx)
544 {
545 int i, top = a->top;
546 int carry = 0;
547 register BN_ULONG *a_d = a->d, *r_d;
548 BN_ULONG t_d[BN_NIST_256_TOP],
549 buf[BN_NIST_256_TOP],
550 c_d[BN_NIST_256_TOP],
551 *res;
552 size_t mask;
553 union { bn_addsub_f f; size_t p; } u;
554 static const BIGNUM _bignum_nist_p_256_sqr = {
555 (BN_ULONG *)_nist_p_256_sqr,
556 sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
557 sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
558 0,BN_FLG_STATIC_DATA };
559
560 field = &_bignum_nist_p_256; /* just to make sure */
561
562 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_256_sqr)>=0)
563 return BN_nnmod(r, a, field, ctx);
564
565 i = BN_ucmp(field, a);
566 if (i == 0)
567 {
568 BN_zero(r);
569 return 1;
570 }
571 else if (i > 0)
572 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
573
574 if (r != a)
575 {
576 if (!bn_wexpand(r, BN_NIST_256_TOP))
577 return 0;
578 r_d = r->d;
579 nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
580 }
581 else
582 r_d = a_d;
583
584 nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
585
586 /*S1*/
587 nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0);
588 /*S2*/
589 nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0);
590 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
591 /* left shift */
592 {
593 register BN_ULONG *ap,t,c;
594 ap = t_d;
595 c=0;
596 for (i = BN_NIST_256_TOP; i != 0; --i)
597 {
598 t= *ap;
599 *(ap++)=((t<<1)|c)&BN_MASK2;
600 c=(t & BN_TBIT)?1:0;
601 }
602 carry <<= 1;
603 carry |= c;
604 }
605 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
606 /*S3*/
607 nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8);
608 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
609 /*S4*/
610 nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9);
611 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
612 /*D1*/
613 nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11);
614 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
615 /*D2*/
616 nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12);
617 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
618 /*D3*/
619 nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13);
620 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
621 /*D4*/
622 nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14);
623 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
624
625 /* see BN_nist_mod_224 for explanation */
626 u.f = bn_sub_words;
627 if (carry > 0)
628 carry = (int)bn_sub_words(r_d,r_d,_nist_p_256[carry-1],BN_NIST_256_TOP);
629 else if (carry < 0)
630 {
631 carry = (int)bn_add_words(r_d,r_d,_nist_p_256[-carry-1],BN_NIST_256_TOP);
632 mask = 0-(size_t)carry;
633 u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
634 }
635 else
636 carry = 1;
637
638 mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
639 mask &= 0-(size_t)carry;
640 res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
641 nist_cp_bn(r_d, res, BN_NIST_256_TOP);
642 r->top = BN_NIST_256_TOP;
643 bn_correct_top(r);
644
645 return 1;
646 }
647
648#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
649 { \
650 bn_cp_32(to, 0, from, (a12) - 12) \
651 bn_cp_32(to, 1, from, (a11) - 12) \
652 bn_cp_32(to, 2, from, (a10) - 12) \
653 bn_cp_32(to, 3, from, (a9) - 12) \
654 bn_cp_32(to, 4, from, (a8) - 12) \
655 bn_cp_32(to, 5, from, (a7) - 12) \
656 bn_cp_32(to, 6, from, (a6) - 12) \
657 bn_cp_32(to, 7, from, (a5) - 12) \
658 bn_cp_32(to, 8, from, (a4) - 12) \
659 bn_cp_32(to, 9, from, (a3) - 12) \
660 bn_cp_32(to, 10, from, (a2) - 12) \
661 bn_cp_32(to, 11, from, (a1) - 12) \
662 }
663
664int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
665 BN_CTX *ctx)
666 {
667 int i, top = a->top;
668 int carry = 0;
669 register BN_ULONG *r_d, *a_d = a->d;
670 BN_ULONG t_d[BN_NIST_384_TOP],
671 buf[BN_NIST_384_TOP],
672 c_d[BN_NIST_384_TOP],
673 *res;
674 size_t mask;
675 union { bn_addsub_f f; size_t p; } u;
676 static const BIGNUM _bignum_nist_p_384_sqr = {
677 (BN_ULONG *)_nist_p_384_sqr,
678 sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
679 sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
680 0,BN_FLG_STATIC_DATA };
681
682
683 field = &_bignum_nist_p_384; /* just to make sure */
684
685 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_384_sqr)>=0)
686 return BN_nnmod(r, a, field, ctx);
687
688 i = BN_ucmp(field, a);
689 if (i == 0)
690 {
691 BN_zero(r);
692 return 1;
693 }
694 else if (i > 0)
695 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
696
697 if (r != a)
698 {
699 if (!bn_wexpand(r, BN_NIST_384_TOP))
700 return 0;
701 r_d = r->d;
702 nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
703 }
704 else
705 r_d = a_d;
706
707 nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
708
709 /*S1*/
710 nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
711 /* left shift */
712 {
713 register BN_ULONG *ap,t,c;
714 ap = t_d;
715 c=0;
716 for (i = 3; i != 0; --i)
717 {
718 t= *ap;
719 *(ap++)=((t<<1)|c)&BN_MASK2;
720 c=(t & BN_TBIT)?1:0;
721 }
722 *ap=c;
723 }
724 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
725 t_d, BN_NIST_256_TOP);
726 /*S2 */
727 carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP);
728 /*S3*/
729 nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21);
730 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
731 /*S4*/
732 nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0);
733 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
734 /*S5*/
735 nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0);
736 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
737 /*S6*/
738 nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20);
739 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
740 /*D1*/
741 nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23);
742 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
743 /*D2*/
744 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0);
745 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
746 /*D3*/
747 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0);
748 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
749
750 /* see BN_nist_mod_224 for explanation */
751 u.f = bn_sub_words;
752 if (carry > 0)
753 carry = (int)bn_sub_words(r_d,r_d,_nist_p_384[carry-1],BN_NIST_384_TOP);
754 else if (carry < 0)
755 {
756 carry = (int)bn_add_words(r_d,r_d,_nist_p_384[-carry-1],BN_NIST_384_TOP);
757 mask = 0-(size_t)carry;
758 u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
759 }
760 else
761 carry = 1;
762
763 mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
764 mask &= 0-(size_t)carry;
765 res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
766 nist_cp_bn(r_d, res, BN_NIST_384_TOP);
767 r->top = BN_NIST_384_TOP;
768 bn_correct_top(r);
769
770 return 1;
771 }
772
773#define BN_NIST_521_RSHIFT (521%BN_BITS2)
774#define BN_NIST_521_LSHIFT (BN_BITS2-BN_NIST_521_RSHIFT)
775#define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
776
777int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
778 BN_CTX *ctx)
779 {
780 int top = a->top, i;
781 BN_ULONG *r_d, *a_d = a->d,
782 t_d[BN_NIST_521_TOP],
783 val,tmp,*res;
784 size_t mask;
785 static const BIGNUM _bignum_nist_p_521_sqr = {
786 (BN_ULONG *)_nist_p_521_sqr,
787 sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
788 sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
789 0,BN_FLG_STATIC_DATA };
790
791 field = &_bignum_nist_p_521; /* just to make sure */
792
793 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_521_sqr)>=0)
794 return BN_nnmod(r, a, field, ctx);
795
796 i = BN_ucmp(field, a);
797 if (i == 0)
798 {
799 BN_zero(r);
800 return 1;
801 }
802 else if (i > 0)
803 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
804
805 if (r != a)
806 {
807 if (!bn_wexpand(r,BN_NIST_521_TOP))
808 return 0;
809 r_d = r->d;
810 nist_cp_bn(r_d,a_d, BN_NIST_521_TOP);
811 }
812 else
813 r_d = a_d;
814
815 /* upper 521 bits, copy ... */
816 nist_cp_bn_0(t_d,a_d + (BN_NIST_521_TOP-1), top - (BN_NIST_521_TOP-1),BN_NIST_521_TOP);
817 /* ... and right shift */
818 for (val=t_d[0],i=0; i<BN_NIST_521_TOP-1; i++)
819 {
820 tmp = val>>BN_NIST_521_RSHIFT;
821 val = t_d[i+1];
822 t_d[i] = (tmp | val<<BN_NIST_521_LSHIFT) & BN_MASK2;
823 }
824 t_d[i] = val>>BN_NIST_521_RSHIFT;
825 /* lower 521 bits */
826 r_d[i] &= BN_NIST_521_TOP_MASK;
827
828 bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
829 mask = 0-(size_t)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
830 res = (BN_ULONG *)(((size_t)t_d&~mask) | ((size_t)r_d&mask));
831 nist_cp_bn(r_d,res,BN_NIST_521_TOP);
832 r->top = BN_NIST_521_TOP;
833 bn_correct_top(r);
834
835 return 1;
836 }
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
deleted file mode 100644
index 7b25979dd1..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ /dev/null
@@ -1,494 +0,0 @@
1/* crypto/bn/bn_prime.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118/* NB: these functions have been "upgraded", the deprecated versions (which are
119 * compatibility wrappers using these functions) are in bn_depr.c.
120 * - Geoff
121 */
122
123/* The quick sieve algorithm approach to weeding out primes is
124 * Philip Zimmermann's, as implemented in PGP. I have had a read of
125 * his comments and implemented my own version.
126 */
127#include "bn_prime.h"
128
129static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
130 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
131static int probable_prime(BIGNUM *rnd, int bits);
132static int probable_prime_dh(BIGNUM *rnd, int bits,
133 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
134static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
135 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
136
137int BN_GENCB_call(BN_GENCB *cb, int a, int b)
138 {
139 /* No callback means continue */
140 if(!cb) return 1;
141 switch(cb->ver)
142 {
143 case 1:
144 /* Deprecated-style callbacks */
145 if(!cb->cb.cb_1)
146 return 1;
147 cb->cb.cb_1(a, b, cb->arg);
148 return 1;
149 case 2:
150 /* New-style callbacks */
151 return cb->cb.cb_2(a, b, cb);
152 default:
153 break;
154 }
155 /* Unrecognised callback type */
156 return 0;
157 }
158
159int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
160 const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
161 {
162 BIGNUM *t;
163 int found=0;
164 int i,j,c1=0;
165 BN_CTX *ctx;
166 int checks = BN_prime_checks_for_size(bits);
167
168 ctx=BN_CTX_new();
169 if (ctx == NULL) goto err;
170 BN_CTX_start(ctx);
171 t = BN_CTX_get(ctx);
172 if(!t) goto err;
173loop:
174 /* make a random number and set the top and bottom bits */
175 if (add == NULL)
176 {
177 if (!probable_prime(ret,bits)) goto err;
178 }
179 else
180 {
181 if (safe)
182 {
183 if (!probable_prime_dh_safe(ret,bits,add,rem,ctx))
184 goto err;
185 }
186 else
187 {
188 if (!probable_prime_dh(ret,bits,add,rem,ctx))
189 goto err;
190 }
191 }
192 /* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */
193 if(!BN_GENCB_call(cb, 0, c1++))
194 /* aborted */
195 goto err;
196
197 if (!safe)
198 {
199 i=BN_is_prime_fasttest_ex(ret,checks,ctx,0,cb);
200 if (i == -1) goto err;
201 if (i == 0) goto loop;
202 }
203 else
204 {
205 /* for "safe prime" generation,
206 * check that (p-1)/2 is prime.
207 * Since a prime is odd, We just
208 * need to divide by 2 */
209 if (!BN_rshift1(t,ret)) goto err;
210
211 for (i=0; i<checks; i++)
212 {
213 j=BN_is_prime_fasttest_ex(ret,1,ctx,0,cb);
214 if (j == -1) goto err;
215 if (j == 0) goto loop;
216
217 j=BN_is_prime_fasttest_ex(t,1,ctx,0,cb);
218 if (j == -1) goto err;
219 if (j == 0) goto loop;
220
221 if(!BN_GENCB_call(cb, 2, c1-1))
222 goto err;
223 /* We have a safe prime test pass */
224 }
225 }
226 /* we have a prime :-) */
227 found = 1;
228err:
229 if (ctx != NULL)
230 {
231 BN_CTX_end(ctx);
232 BN_CTX_free(ctx);
233 }
234 bn_check_top(ret);
235 return found;
236 }
237
238int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb)
239 {
240 return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
241 }
242
243int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
244 int do_trial_division, BN_GENCB *cb)
245 {
246 int i, j, ret = -1;
247 int k;
248 BN_CTX *ctx = NULL;
249 BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
250 BN_MONT_CTX *mont = NULL;
251 const BIGNUM *A = NULL;
252
253 if (BN_cmp(a, BN_value_one()) <= 0)
254 return 0;
255
256 if (checks == BN_prime_checks)
257 checks = BN_prime_checks_for_size(BN_num_bits(a));
258
259 /* first look for small factors */
260 if (!BN_is_odd(a))
261 /* a is even => a is prime if and only if a == 2 */
262 return BN_is_word(a, 2);
263 if (do_trial_division)
264 {
265 for (i = 1; i < NUMPRIMES; i++)
266 if (BN_mod_word(a, primes[i]) == 0)
267 return 0;
268 if(!BN_GENCB_call(cb, 1, -1))
269 goto err;
270 }
271
272 if (ctx_passed != NULL)
273 ctx = ctx_passed;
274 else
275 if ((ctx=BN_CTX_new()) == NULL)
276 goto err;
277 BN_CTX_start(ctx);
278
279 /* A := abs(a) */
280 if (a->neg)
281 {
282 BIGNUM *t;
283 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
284 BN_copy(t, a);
285 t->neg = 0;
286 A = t;
287 }
288 else
289 A = a;
290 A1 = BN_CTX_get(ctx);
291 A1_odd = BN_CTX_get(ctx);
292 check = BN_CTX_get(ctx);
293 if (check == NULL) goto err;
294
295 /* compute A1 := A - 1 */
296 if (!BN_copy(A1, A))
297 goto err;
298 if (!BN_sub_word(A1, 1))
299 goto err;
300 if (BN_is_zero(A1))
301 {
302 ret = 0;
303 goto err;
304 }
305
306 /* write A1 as A1_odd * 2^k */
307 k = 1;
308 while (!BN_is_bit_set(A1, k))
309 k++;
310 if (!BN_rshift(A1_odd, A1, k))
311 goto err;
312
313 /* Montgomery setup for computations mod A */
314 mont = BN_MONT_CTX_new();
315 if (mont == NULL)
316 goto err;
317 if (!BN_MONT_CTX_set(mont, A, ctx))
318 goto err;
319
320 for (i = 0; i < checks; i++)
321 {
322 if (!BN_pseudo_rand_range(check, A1))
323 goto err;
324 if (!BN_add_word(check, 1))
325 goto err;
326 /* now 1 <= check < A */
327
328 j = witness(check, A, A1, A1_odd, k, ctx, mont);
329 if (j == -1) goto err;
330 if (j)
331 {
332 ret=0;
333 goto err;
334 }
335 if(!BN_GENCB_call(cb, 1, i))
336 goto err;
337 }
338 ret=1;
339err:
340 if (ctx != NULL)
341 {
342 BN_CTX_end(ctx);
343 if (ctx_passed == NULL)
344 BN_CTX_free(ctx);
345 }
346 if (mont != NULL)
347 BN_MONT_CTX_free(mont);
348
349 return(ret);
350 }
351
352static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
353 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
354 {
355 if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
356 return -1;
357 if (BN_is_one(w))
358 return 0; /* probably prime */
359 if (BN_cmp(w, a1) == 0)
360 return 0; /* w == -1 (mod a), 'a' is probably prime */
361 while (--k)
362 {
363 if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
364 return -1;
365 if (BN_is_one(w))
366 return 1; /* 'a' is composite, otherwise a previous 'w' would
367 * have been == -1 (mod 'a') */
368 if (BN_cmp(w, a1) == 0)
369 return 0; /* w == -1 (mod a), 'a' is probably prime */
370 }
371 /* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
372 * and it is neither -1 nor +1 -- so 'a' cannot be prime */
373 bn_check_top(w);
374 return 1;
375 }
376
377static int probable_prime(BIGNUM *rnd, int bits)
378 {
379 int i;
380 prime_t mods[NUMPRIMES];
381 BN_ULONG delta,maxdelta;
382
383again:
384 if (!BN_rand(rnd,bits,1,1)) return(0);
385 /* we now have a random number 'rand' to test. */
386 for (i=1; i<NUMPRIMES; i++)
387 mods[i]=(prime_t)BN_mod_word(rnd,(BN_ULONG)primes[i]);
388 maxdelta=BN_MASK2 - primes[NUMPRIMES-1];
389 delta=0;
390 loop: for (i=1; i<NUMPRIMES; i++)
391 {
392 /* check that rnd is not a prime and also
393 * that gcd(rnd-1,primes) == 1 (except for 2) */
394 if (((mods[i]+delta)%primes[i]) <= 1)
395 {
396 delta+=2;
397 if (delta > maxdelta) goto again;
398 goto loop;
399 }
400 }
401 if (!BN_add_word(rnd,delta)) return(0);
402 bn_check_top(rnd);
403 return(1);
404 }
405
406static int probable_prime_dh(BIGNUM *rnd, int bits,
407 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
408 {
409 int i,ret=0;
410 BIGNUM *t1;
411
412 BN_CTX_start(ctx);
413 if ((t1 = BN_CTX_get(ctx)) == NULL) goto err;
414
415 if (!BN_rand(rnd,bits,0,1)) goto err;
416
417 /* we need ((rnd-rem) % add) == 0 */
418
419 if (!BN_mod(t1,rnd,add,ctx)) goto err;
420 if (!BN_sub(rnd,rnd,t1)) goto err;
421 if (rem == NULL)
422 { if (!BN_add_word(rnd,1)) goto err; }
423 else
424 { if (!BN_add(rnd,rnd,rem)) goto err; }
425
426 /* we now have a random number 'rand' to test. */
427
428 loop: for (i=1; i<NUMPRIMES; i++)
429 {
430 /* check that rnd is a prime */
431 if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
432 {
433 if (!BN_add(rnd,rnd,add)) goto err;
434 goto loop;
435 }
436 }
437 ret=1;
438err:
439 BN_CTX_end(ctx);
440 bn_check_top(rnd);
441 return(ret);
442 }
443
444static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
445 const BIGNUM *rem, BN_CTX *ctx)
446 {
447 int i,ret=0;
448 BIGNUM *t1,*qadd,*q;
449
450 bits--;
451 BN_CTX_start(ctx);
452 t1 = BN_CTX_get(ctx);
453 q = BN_CTX_get(ctx);
454 qadd = BN_CTX_get(ctx);
455 if (qadd == NULL) goto err;
456
457 if (!BN_rshift1(qadd,padd)) goto err;
458
459 if (!BN_rand(q,bits,0,1)) goto err;
460
461 /* we need ((rnd-rem) % add) == 0 */
462 if (!BN_mod(t1,q,qadd,ctx)) goto err;
463 if (!BN_sub(q,q,t1)) goto err;
464 if (rem == NULL)
465 { if (!BN_add_word(q,1)) goto err; }
466 else
467 {
468 if (!BN_rshift1(t1,rem)) goto err;
469 if (!BN_add(q,q,t1)) goto err;
470 }
471
472 /* we now have a random number 'rand' to test. */
473 if (!BN_lshift1(p,q)) goto err;
474 if (!BN_add_word(p,1)) goto err;
475
476 loop: for (i=1; i<NUMPRIMES; i++)
477 {
478 /* check that p and q are prime */
479 /* check that for p and q
480 * gcd(p-1,primes) == 1 (except for 2) */
481 if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
482 (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
483 {
484 if (!BN_add(p,p,padd)) goto err;
485 if (!BN_add(q,q,qadd)) goto err;
486 goto loop;
487 }
488 }
489 ret=1;
490err:
491 BN_CTX_end(ctx);
492 bn_check_top(p);
493 return(ret);
494 }
diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h
deleted file mode 100644
index 51d2194feb..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.h
+++ /dev/null
@@ -1,327 +0,0 @@
1/* Auto generated by bn_prime.pl */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef EIGHT_BIT
60#define NUMPRIMES 2048
61typedef unsigned short prime_t;
62#else
63#define NUMPRIMES 54
64typedef unsigned char prime_t;
65#endif
66static const prime_t primes[NUMPRIMES]=
67 {
68 2, 3, 5, 7, 11, 13, 17, 19,
69 23, 29, 31, 37, 41, 43, 47, 53,
70 59, 61, 67, 71, 73, 79, 83, 89,
71 97, 101, 103, 107, 109, 113, 127, 131,
72 137, 139, 149, 151, 157, 163, 167, 173,
73 179, 181, 191, 193, 197, 199, 211, 223,
74 227, 229, 233, 239, 241, 251,
75#ifndef EIGHT_BIT
76 257, 263,
77 269, 271, 277, 281, 283, 293, 307, 311,
78 313, 317, 331, 337, 347, 349, 353, 359,
79 367, 373, 379, 383, 389, 397, 401, 409,
80 419, 421, 431, 433, 439, 443, 449, 457,
81 461, 463, 467, 479, 487, 491, 499, 503,
82 509, 521, 523, 541, 547, 557, 563, 569,
83 571, 577, 587, 593, 599, 601, 607, 613,
84 617, 619, 631, 641, 643, 647, 653, 659,
85 661, 673, 677, 683, 691, 701, 709, 719,
86 727, 733, 739, 743, 751, 757, 761, 769,
87 773, 787, 797, 809, 811, 821, 823, 827,
88 829, 839, 853, 857, 859, 863, 877, 881,
89 883, 887, 907, 911, 919, 929, 937, 941,
90 947, 953, 967, 971, 977, 983, 991, 997,
91 1009,1013,1019,1021,1031,1033,1039,1049,
92 1051,1061,1063,1069,1087,1091,1093,1097,
93 1103,1109,1117,1123,1129,1151,1153,1163,
94 1171,1181,1187,1193,1201,1213,1217,1223,
95 1229,1231,1237,1249,1259,1277,1279,1283,
96 1289,1291,1297,1301,1303,1307,1319,1321,
97 1327,1361,1367,1373,1381,1399,1409,1423,
98 1427,1429,1433,1439,1447,1451,1453,1459,
99 1471,1481,1483,1487,1489,1493,1499,1511,
100 1523,1531,1543,1549,1553,1559,1567,1571,
101 1579,1583,1597,1601,1607,1609,1613,1619,
102 1621,1627,1637,1657,1663,1667,1669,1693,
103 1697,1699,1709,1721,1723,1733,1741,1747,
104 1753,1759,1777,1783,1787,1789,1801,1811,
105 1823,1831,1847,1861,1867,1871,1873,1877,
106 1879,1889,1901,1907,1913,1931,1933,1949,
107 1951,1973,1979,1987,1993,1997,1999,2003,
108 2011,2017,2027,2029,2039,2053,2063,2069,
109 2081,2083,2087,2089,2099,2111,2113,2129,
110 2131,2137,2141,2143,2153,2161,2179,2203,
111 2207,2213,2221,2237,2239,2243,2251,2267,
112 2269,2273,2281,2287,2293,2297,2309,2311,
113 2333,2339,2341,2347,2351,2357,2371,2377,
114 2381,2383,2389,2393,2399,2411,2417,2423,
115 2437,2441,2447,2459,2467,2473,2477,2503,
116 2521,2531,2539,2543,2549,2551,2557,2579,
117 2591,2593,2609,2617,2621,2633,2647,2657,
118 2659,2663,2671,2677,2683,2687,2689,2693,
119 2699,2707,2711,2713,2719,2729,2731,2741,
120 2749,2753,2767,2777,2789,2791,2797,2801,
121 2803,2819,2833,2837,2843,2851,2857,2861,
122 2879,2887,2897,2903,2909,2917,2927,2939,
123 2953,2957,2963,2969,2971,2999,3001,3011,
124 3019,3023,3037,3041,3049,3061,3067,3079,
125 3083,3089,3109,3119,3121,3137,3163,3167,
126 3169,3181,3187,3191,3203,3209,3217,3221,
127 3229,3251,3253,3257,3259,3271,3299,3301,
128 3307,3313,3319,3323,3329,3331,3343,3347,
129 3359,3361,3371,3373,3389,3391,3407,3413,
130 3433,3449,3457,3461,3463,3467,3469,3491,
131 3499,3511,3517,3527,3529,3533,3539,3541,
132 3547,3557,3559,3571,3581,3583,3593,3607,
133 3613,3617,3623,3631,3637,3643,3659,3671,
134 3673,3677,3691,3697,3701,3709,3719,3727,
135 3733,3739,3761,3767,3769,3779,3793,3797,
136 3803,3821,3823,3833,3847,3851,3853,3863,
137 3877,3881,3889,3907,3911,3917,3919,3923,
138 3929,3931,3943,3947,3967,3989,4001,4003,
139 4007,4013,4019,4021,4027,4049,4051,4057,
140 4073,4079,4091,4093,4099,4111,4127,4129,
141 4133,4139,4153,4157,4159,4177,4201,4211,
142 4217,4219,4229,4231,4241,4243,4253,4259,
143 4261,4271,4273,4283,4289,4297,4327,4337,
144 4339,4349,4357,4363,4373,4391,4397,4409,
145 4421,4423,4441,4447,4451,4457,4463,4481,
146 4483,4493,4507,4513,4517,4519,4523,4547,
147 4549,4561,4567,4583,4591,4597,4603,4621,
148 4637,4639,4643,4649,4651,4657,4663,4673,
149 4679,4691,4703,4721,4723,4729,4733,4751,
150 4759,4783,4787,4789,4793,4799,4801,4813,
151 4817,4831,4861,4871,4877,4889,4903,4909,
152 4919,4931,4933,4937,4943,4951,4957,4967,
153 4969,4973,4987,4993,4999,5003,5009,5011,
154 5021,5023,5039,5051,5059,5077,5081,5087,
155 5099,5101,5107,5113,5119,5147,5153,5167,
156 5171,5179,5189,5197,5209,5227,5231,5233,
157 5237,5261,5273,5279,5281,5297,5303,5309,
158 5323,5333,5347,5351,5381,5387,5393,5399,
159 5407,5413,5417,5419,5431,5437,5441,5443,
160 5449,5471,5477,5479,5483,5501,5503,5507,
161 5519,5521,5527,5531,5557,5563,5569,5573,
162 5581,5591,5623,5639,5641,5647,5651,5653,
163 5657,5659,5669,5683,5689,5693,5701,5711,
164 5717,5737,5741,5743,5749,5779,5783,5791,
165 5801,5807,5813,5821,5827,5839,5843,5849,
166 5851,5857,5861,5867,5869,5879,5881,5897,
167 5903,5923,5927,5939,5953,5981,5987,6007,
168 6011,6029,6037,6043,6047,6053,6067,6073,
169 6079,6089,6091,6101,6113,6121,6131,6133,
170 6143,6151,6163,6173,6197,6199,6203,6211,
171 6217,6221,6229,6247,6257,6263,6269,6271,
172 6277,6287,6299,6301,6311,6317,6323,6329,
173 6337,6343,6353,6359,6361,6367,6373,6379,
174 6389,6397,6421,6427,6449,6451,6469,6473,
175 6481,6491,6521,6529,6547,6551,6553,6563,
176 6569,6571,6577,6581,6599,6607,6619,6637,
177 6653,6659,6661,6673,6679,6689,6691,6701,
178 6703,6709,6719,6733,6737,6761,6763,6779,
179 6781,6791,6793,6803,6823,6827,6829,6833,
180 6841,6857,6863,6869,6871,6883,6899,6907,
181 6911,6917,6947,6949,6959,6961,6967,6971,
182 6977,6983,6991,6997,7001,7013,7019,7027,
183 7039,7043,7057,7069,7079,7103,7109,7121,
184 7127,7129,7151,7159,7177,7187,7193,7207,
185 7211,7213,7219,7229,7237,7243,7247,7253,
186 7283,7297,7307,7309,7321,7331,7333,7349,
187 7351,7369,7393,7411,7417,7433,7451,7457,
188 7459,7477,7481,7487,7489,7499,7507,7517,
189 7523,7529,7537,7541,7547,7549,7559,7561,
190 7573,7577,7583,7589,7591,7603,7607,7621,
191 7639,7643,7649,7669,7673,7681,7687,7691,
192 7699,7703,7717,7723,7727,7741,7753,7757,
193 7759,7789,7793,7817,7823,7829,7841,7853,
194 7867,7873,7877,7879,7883,7901,7907,7919,
195 7927,7933,7937,7949,7951,7963,7993,8009,
196 8011,8017,8039,8053,8059,8069,8081,8087,
197 8089,8093,8101,8111,8117,8123,8147,8161,
198 8167,8171,8179,8191,8209,8219,8221,8231,
199 8233,8237,8243,8263,8269,8273,8287,8291,
200 8293,8297,8311,8317,8329,8353,8363,8369,
201 8377,8387,8389,8419,8423,8429,8431,8443,
202 8447,8461,8467,8501,8513,8521,8527,8537,
203 8539,8543,8563,8573,8581,8597,8599,8609,
204 8623,8627,8629,8641,8647,8663,8669,8677,
205 8681,8689,8693,8699,8707,8713,8719,8731,
206 8737,8741,8747,8753,8761,8779,8783,8803,
207 8807,8819,8821,8831,8837,8839,8849,8861,
208 8863,8867,8887,8893,8923,8929,8933,8941,
209 8951,8963,8969,8971,8999,9001,9007,9011,
210 9013,9029,9041,9043,9049,9059,9067,9091,
211 9103,9109,9127,9133,9137,9151,9157,9161,
212 9173,9181,9187,9199,9203,9209,9221,9227,
213 9239,9241,9257,9277,9281,9283,9293,9311,
214 9319,9323,9337,9341,9343,9349,9371,9377,
215 9391,9397,9403,9413,9419,9421,9431,9433,
216 9437,9439,9461,9463,9467,9473,9479,9491,
217 9497,9511,9521,9533,9539,9547,9551,9587,
218 9601,9613,9619,9623,9629,9631,9643,9649,
219 9661,9677,9679,9689,9697,9719,9721,9733,
220 9739,9743,9749,9767,9769,9781,9787,9791,
221 9803,9811,9817,9829,9833,9839,9851,9857,
222 9859,9871,9883,9887,9901,9907,9923,9929,
223 9931,9941,9949,9967,9973,10007,10009,10037,
224 10039,10061,10067,10069,10079,10091,10093,10099,
225 10103,10111,10133,10139,10141,10151,10159,10163,
226 10169,10177,10181,10193,10211,10223,10243,10247,
227 10253,10259,10267,10271,10273,10289,10301,10303,
228 10313,10321,10331,10333,10337,10343,10357,10369,
229 10391,10399,10427,10429,10433,10453,10457,10459,
230 10463,10477,10487,10499,10501,10513,10529,10531,
231 10559,10567,10589,10597,10601,10607,10613,10627,
232 10631,10639,10651,10657,10663,10667,10687,10691,
233 10709,10711,10723,10729,10733,10739,10753,10771,
234 10781,10789,10799,10831,10837,10847,10853,10859,
235 10861,10867,10883,10889,10891,10903,10909,10937,
236 10939,10949,10957,10973,10979,10987,10993,11003,
237 11027,11047,11057,11059,11069,11071,11083,11087,
238 11093,11113,11117,11119,11131,11149,11159,11161,
239 11171,11173,11177,11197,11213,11239,11243,11251,
240 11257,11261,11273,11279,11287,11299,11311,11317,
241 11321,11329,11351,11353,11369,11383,11393,11399,
242 11411,11423,11437,11443,11447,11467,11471,11483,
243 11489,11491,11497,11503,11519,11527,11549,11551,
244 11579,11587,11593,11597,11617,11621,11633,11657,
245 11677,11681,11689,11699,11701,11717,11719,11731,
246 11743,11777,11779,11783,11789,11801,11807,11813,
247 11821,11827,11831,11833,11839,11863,11867,11887,
248 11897,11903,11909,11923,11927,11933,11939,11941,
249 11953,11959,11969,11971,11981,11987,12007,12011,
250 12037,12041,12043,12049,12071,12073,12097,12101,
251 12107,12109,12113,12119,12143,12149,12157,12161,
252 12163,12197,12203,12211,12227,12239,12241,12251,
253 12253,12263,12269,12277,12281,12289,12301,12323,
254 12329,12343,12347,12373,12377,12379,12391,12401,
255 12409,12413,12421,12433,12437,12451,12457,12473,
256 12479,12487,12491,12497,12503,12511,12517,12527,
257 12539,12541,12547,12553,12569,12577,12583,12589,
258 12601,12611,12613,12619,12637,12641,12647,12653,
259 12659,12671,12689,12697,12703,12713,12721,12739,
260 12743,12757,12763,12781,12791,12799,12809,12821,
261 12823,12829,12841,12853,12889,12893,12899,12907,
262 12911,12917,12919,12923,12941,12953,12959,12967,
263 12973,12979,12983,13001,13003,13007,13009,13033,
264 13037,13043,13049,13063,13093,13099,13103,13109,
265 13121,13127,13147,13151,13159,13163,13171,13177,
266 13183,13187,13217,13219,13229,13241,13249,13259,
267 13267,13291,13297,13309,13313,13327,13331,13337,
268 13339,13367,13381,13397,13399,13411,13417,13421,
269 13441,13451,13457,13463,13469,13477,13487,13499,
270 13513,13523,13537,13553,13567,13577,13591,13597,
271 13613,13619,13627,13633,13649,13669,13679,13681,
272 13687,13691,13693,13697,13709,13711,13721,13723,
273 13729,13751,13757,13759,13763,13781,13789,13799,
274 13807,13829,13831,13841,13859,13873,13877,13879,
275 13883,13901,13903,13907,13913,13921,13931,13933,
276 13963,13967,13997,13999,14009,14011,14029,14033,
277 14051,14057,14071,14081,14083,14087,14107,14143,
278 14149,14153,14159,14173,14177,14197,14207,14221,
279 14243,14249,14251,14281,14293,14303,14321,14323,
280 14327,14341,14347,14369,14387,14389,14401,14407,
281 14411,14419,14423,14431,14437,14447,14449,14461,
282 14479,14489,14503,14519,14533,14537,14543,14549,
283 14551,14557,14561,14563,14591,14593,14621,14627,
284 14629,14633,14639,14653,14657,14669,14683,14699,
285 14713,14717,14723,14731,14737,14741,14747,14753,
286 14759,14767,14771,14779,14783,14797,14813,14821,
287 14827,14831,14843,14851,14867,14869,14879,14887,
288 14891,14897,14923,14929,14939,14947,14951,14957,
289 14969,14983,15013,15017,15031,15053,15061,15073,
290 15077,15083,15091,15101,15107,15121,15131,15137,
291 15139,15149,15161,15173,15187,15193,15199,15217,
292 15227,15233,15241,15259,15263,15269,15271,15277,
293 15287,15289,15299,15307,15313,15319,15329,15331,
294 15349,15359,15361,15373,15377,15383,15391,15401,
295 15413,15427,15439,15443,15451,15461,15467,15473,
296 15493,15497,15511,15527,15541,15551,15559,15569,
297 15581,15583,15601,15607,15619,15629,15641,15643,
298 15647,15649,15661,15667,15671,15679,15683,15727,
299 15731,15733,15737,15739,15749,15761,15767,15773,
300 15787,15791,15797,15803,15809,15817,15823,15859,
301 15877,15881,15887,15889,15901,15907,15913,15919,
302 15923,15937,15959,15971,15973,15991,16001,16007,
303 16033,16057,16061,16063,16067,16069,16073,16087,
304 16091,16097,16103,16111,16127,16139,16141,16183,
305 16187,16189,16193,16217,16223,16229,16231,16249,
306 16253,16267,16273,16301,16319,16333,16339,16349,
307 16361,16363,16369,16381,16411,16417,16421,16427,
308 16433,16447,16451,16453,16477,16481,16487,16493,
309 16519,16529,16547,16553,16561,16567,16573,16603,
310 16607,16619,16631,16633,16649,16651,16657,16661,
311 16673,16691,16693,16699,16703,16729,16741,16747,
312 16759,16763,16787,16811,16823,16829,16831,16843,
313 16871,16879,16883,16889,16901,16903,16921,16927,
314 16931,16937,16943,16963,16979,16981,16987,16993,
315 17011,17021,17027,17029,17033,17041,17047,17053,
316 17077,17093,17099,17107,17117,17123,17137,17159,
317 17167,17183,17189,17191,17203,17207,17209,17231,
318 17239,17257,17291,17293,17299,17317,17321,17327,
319 17333,17341,17351,17359,17377,17383,17387,17389,
320 17393,17401,17417,17419,17431,17443,17449,17467,
321 17471,17477,17483,17489,17491,17497,17509,17519,
322 17539,17551,17569,17573,17579,17581,17597,17599,
323 17609,17623,17627,17657,17659,17669,17681,17683,
324 17707,17713,17729,17737,17747,17749,17761,17783,
325 17789,17791,17807,17827,17837,17839,17851,17863,
326#endif
327 };
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
deleted file mode 100644
index 3fafb6f3e9..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.pl
+++ /dev/null
@@ -1,119 +0,0 @@
1#!/usr/local/bin/perl
2# bn_prime.pl
3
4$num=2048;
5$num=$ARGV[0] if ($#ARGV >= 0);
6
7push(@primes,2);
8$p=1;
9loop: while ($#primes < $num-1)
10 {
11 $p+=2;
12 $s=int(sqrt($p));
13
14 for ($i=0; defined($primes[$i]) && $primes[$i]<=$s; $i++)
15 {
16 next loop if (($p%$primes[$i]) == 0);
17 }
18 push(@primes,$p);
19 }
20
21# print <<"EOF";
22# /* Auto generated by bn_prime.pl */
23# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au).
24# * All rights reserved.
25# * Copyright remains Eric Young's, and as such any Copyright notices in
26# * the code are not to be removed.
27# * See the COPYRIGHT file in the SSLeay distribution for more details.
28# */
29#
30# EOF
31
32print <<\EOF;
33/* Auto generated by bn_prime.pl */
34/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35 * All rights reserved.
36 *
37 * This package is an SSL implementation written
38 * by Eric Young (eay@cryptsoft.com).
39 * The implementation was written so as to conform with Netscapes SSL.
40 *
41 * This library is free for commercial and non-commercial use as long as
42 * the following conditions are aheared to. The following conditions
43 * apply to all code found in this distribution, be it the RC4, RSA,
44 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
45 * included with this distribution is covered by the same copyright terms
46 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
47 *
48 * Copyright remains Eric Young's, and as such any Copyright notices in
49 * the code are not to be removed.
50 * If this package is used in a product, Eric Young should be given attribution
51 * as the author of the parts of the library used.
52 * This can be in the form of a textual message at program startup or
53 * in documentation (online or textual) provided with the package.
54 *
55 * Redistribution and use in source and binary forms, with or without
56 * modification, are permitted provided that the following conditions
57 * are met:
58 * 1. Redistributions of source code must retain the copyright
59 * notice, this list of conditions and the following disclaimer.
60 * 2. Redistributions in binary form must reproduce the above copyright
61 * notice, this list of conditions and the following disclaimer in the
62 * documentation and/or other materials provided with the distribution.
63 * 3. All advertising materials mentioning features or use of this software
64 * must display the following acknowledgement:
65 * "This product includes cryptographic software written by
66 * Eric Young (eay@cryptsoft.com)"
67 * The word 'cryptographic' can be left out if the rouines from the library
68 * being used are not cryptographic related :-).
69 * 4. If you include any Windows specific code (or a derivative thereof) from
70 * the apps directory (application code) you must include an acknowledgement:
71 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
72 *
73 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * The licence and distribution terms for any publically available version or
86 * derivative of this code cannot be changed. i.e. this code cannot simply be
87 * copied and put under another distribution licence
88 * [including the GNU Public Licence.]
89 */
90
91EOF
92
93for ($i=0; $i <= $#primes; $i++)
94 {
95 if ($primes[$i] > 256)
96 {
97 $eight=$i;
98 last;
99 }
100 }
101
102printf "#ifndef EIGHT_BIT\n";
103printf "#define NUMPRIMES %d\n",$num;
104printf "typedef unsigned short prime_t;\n";
105printf "#else\n";
106printf "#define NUMPRIMES %d\n",$eight;
107printf "typedef unsigned char prime_t;\n";
108printf "#endif\n";
109print "static const prime_t primes[NUMPRIMES]=\n\t{\n\t";
110$init=0;
111for ($i=0; $i <= $#primes; $i++)
112 {
113 printf "\n#ifndef EIGHT_BIT\n\t" if ($primes[$i] > 256) && !($init++);
114 printf("\n\t") if (($i%8) == 0) && ($i != 0);
115 printf("%4d,",$primes[$i]);
116 }
117print "\n#endif\n\t};\n";
118
119
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
deleted file mode 100644
index 810dde34e1..0000000000
--- a/src/lib/libcrypto/bn/bn_print.c
+++ /dev/null
@@ -1,338 +0,0 @@
1/* crypto/bn/bn_print.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <ctype.h>
61#include "cryptlib.h"
62#include <openssl/buffer.h>
63#include "bn_lcl.h"
64
65static const char Hex[]="0123456789ABCDEF";
66
67/* Must 'OPENSSL_free' the returned data */
68char *BN_bn2hex(const BIGNUM *a)
69 {
70 int i,j,v,z=0;
71 char *buf;
72 char *p;
73
74 buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
75 if (buf == NULL)
76 {
77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
78 goto err;
79 }
80 p=buf;
81 if (a->neg) *(p++)='-';
82 if (BN_is_zero(a)) *(p++)='0';
83 for (i=a->top-1; i >=0; i--)
84 {
85 for (j=BN_BITS2-8; j >= 0; j-=8)
86 {
87 /* strip leading zeros */
88 v=((int)(a->d[i]>>(long)j))&0xff;
89 if (z || (v != 0))
90 {
91 *(p++)=Hex[v>>4];
92 *(p++)=Hex[v&0x0f];
93 z=1;
94 }
95 }
96 }
97 *p='\0';
98err:
99 return(buf);
100 }
101
102/* Must 'OPENSSL_free' the returned data */
103char *BN_bn2dec(const BIGNUM *a)
104 {
105 int i=0,num, ok = 0;
106 char *buf=NULL;
107 char *p;
108 BIGNUM *t=NULL;
109 BN_ULONG *bn_data=NULL,*lp;
110
111 /* get an upper bound for the length of the decimal integer
112 * num <= (BN_num_bits(a) + 1) * log(2)
113 * <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error)
114 * <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1
115 */
116 i=BN_num_bits(a)*3;
117 num=(i/10+i/1000+1)+1;
118 bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
119 buf=(char *)OPENSSL_malloc(num+3);
120 if ((buf == NULL) || (bn_data == NULL))
121 {
122 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
123 goto err;
124 }
125 if ((t=BN_dup(a)) == NULL) goto err;
126
127#define BUF_REMAIN (num+3 - (size_t)(p - buf))
128 p=buf;
129 lp=bn_data;
130 if (BN_is_zero(t))
131 {
132 *(p++)='0';
133 *(p++)='\0';
134 }
135 else
136 {
137 if (BN_is_negative(t))
138 *p++ = '-';
139
140 i=0;
141 while (!BN_is_zero(t))
142 {
143 *lp=BN_div_word(t,BN_DEC_CONV);
144 lp++;
145 }
146 lp--;
147 /* We now have a series of blocks, BN_DEC_NUM chars
148 * in length, where the last one needs truncation.
149 * The blocks need to be reversed in order. */
150 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT1,*lp);
151 while (*p) p++;
152 while (lp != bn_data)
153 {
154 lp--;
155 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT2,*lp);
156 while (*p) p++;
157 }
158 }
159 ok = 1;
160err:
161 if (bn_data != NULL) OPENSSL_free(bn_data);
162 if (t != NULL) BN_free(t);
163 if (!ok && buf)
164 {
165 OPENSSL_free(buf);
166 buf = NULL;
167 }
168
169 return(buf);
170 }
171
172int BN_hex2bn(BIGNUM **bn, const char *a)
173 {
174 BIGNUM *ret=NULL;
175 BN_ULONG l=0;
176 int neg=0,h,m,i,j,k,c;
177 int num;
178
179 if ((a == NULL) || (*a == '\0')) return(0);
180
181 if (*a == '-') { neg=1; a++; }
182
183 for (i=0; isxdigit((unsigned char) a[i]); i++)
184 ;
185
186 num=i+neg;
187 if (bn == NULL) return(num);
188
189 /* a is the start of the hex digits, and it is 'i' long */
190 if (*bn == NULL)
191 {
192 if ((ret=BN_new()) == NULL) return(0);
193 }
194 else
195 {
196 ret= *bn;
197 BN_zero(ret);
198 }
199
200 /* i is the number of hex digests; */
201 if (bn_expand(ret,i*4) == NULL) goto err;
202
203 j=i; /* least significant 'hex' */
204 m=0;
205 h=0;
206 while (j > 0)
207 {
208 m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
209 l=0;
210 for (;;)
211 {
212 c=a[j-m];
213 if ((c >= '0') && (c <= '9')) k=c-'0';
214 else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
215 else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
216 else k=0; /* paranoia */
217 l=(l<<4)|k;
218
219 if (--m <= 0)
220 {
221 ret->d[h++]=l;
222 break;
223 }
224 }
225 j-=(BN_BYTES*2);
226 }
227 ret->top=h;
228 bn_correct_top(ret);
229 ret->neg=neg;
230
231 *bn=ret;
232 bn_check_top(ret);
233 return(num);
234err:
235 if (*bn == NULL) BN_free(ret);
236 return(0);
237 }
238
239int BN_dec2bn(BIGNUM **bn, const char *a)
240 {
241 BIGNUM *ret=NULL;
242 BN_ULONG l=0;
243 int neg=0,i,j;
244 int num;
245
246 if ((a == NULL) || (*a == '\0')) return(0);
247 if (*a == '-') { neg=1; a++; }
248
249 for (i=0; isdigit((unsigned char) a[i]); i++)
250 ;
251
252 num=i+neg;
253 if (bn == NULL) return(num);
254
255 /* a is the start of the digits, and it is 'i' long.
256 * We chop it into BN_DEC_NUM digits at a time */
257 if (*bn == NULL)
258 {
259 if ((ret=BN_new()) == NULL) return(0);
260 }
261 else
262 {
263 ret= *bn;
264 BN_zero(ret);
265 }
266
267 /* i is the number of digests, a bit of an over expand; */
268 if (bn_expand(ret,i*4) == NULL) goto err;
269
270 j=BN_DEC_NUM-(i%BN_DEC_NUM);
271 if (j == BN_DEC_NUM) j=0;
272 l=0;
273 while (*a)
274 {
275 l*=10;
276 l+= *a-'0';
277 a++;
278 if (++j == BN_DEC_NUM)
279 {
280 BN_mul_word(ret,BN_DEC_CONV);
281 BN_add_word(ret,l);
282 l=0;
283 j=0;
284 }
285 }
286 ret->neg=neg;
287
288 bn_correct_top(ret);
289 *bn=ret;
290 bn_check_top(ret);
291 return(num);
292err:
293 if (*bn == NULL) BN_free(ret);
294 return(0);
295 }
296
297#ifndef OPENSSL_NO_BIO
298#ifndef OPENSSL_NO_FP_API
299int BN_print_fp(FILE *fp, const BIGNUM *a)
300 {
301 BIO *b;
302 int ret;
303
304 if ((b=BIO_new(BIO_s_file())) == NULL)
305 return(0);
306 BIO_set_fp(b,fp,BIO_NOCLOSE);
307 ret=BN_print(b,a);
308 BIO_free(b);
309 return(ret);
310 }
311#endif
312
313int BN_print(BIO *bp, const BIGNUM *a)
314 {
315 int i,j,v,z=0;
316 int ret=0;
317
318 if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end;
319 if (BN_is_zero(a) && (BIO_write(bp,"0",1) != 1)) goto end;
320 for (i=a->top-1; i >=0; i--)
321 {
322 for (j=BN_BITS2-4; j >= 0; j-=4)
323 {
324 /* strip leading zeros */
325 v=((int)(a->d[i]>>(long)j))&0x0f;
326 if (z || (v != 0))
327 {
328 if (BIO_write(bp,&(Hex[v]),1) != 1)
329 goto end;
330 z=1;
331 }
332 }
333 }
334 ret=1;
335end:
336 return(ret);
337 }
338#endif
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
deleted file mode 100644
index b376c28ff3..0000000000
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ /dev/null
@@ -1,305 +0,0 @@
1/* crypto/bn/bn_rand.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
119 {
120 unsigned char *buf=NULL;
121 int ret=0,bit,bytes,mask;
122 time_t tim;
123
124 if (bits == 0)
125 {
126 BN_zero(rnd);
127 return 1;
128 }
129
130 bytes=(bits+7)/8;
131 bit=(bits-1)%8;
132 mask=0xff<<(bit+1);
133
134 buf=(unsigned char *)OPENSSL_malloc(bytes);
135 if (buf == NULL)
136 {
137 BNerr(BN_F_BNRAND,ERR_R_MALLOC_FAILURE);
138 goto err;
139 }
140
141 /* make a random number and set the top and bottom bits */
142 time(&tim);
143 RAND_add(&tim,sizeof(tim),0.0);
144
145 if (pseudorand)
146 {
147 if (RAND_pseudo_bytes(buf, bytes) == -1)
148 goto err;
149 }
150 else
151 {
152 if (RAND_bytes(buf, bytes) <= 0)
153 goto err;
154 }
155
156#if 1
157 if (pseudorand == 2)
158 {
159 /* generate patterns that are more likely to trigger BN
160 library bugs */
161 int i;
162 unsigned char c;
163
164 for (i = 0; i < bytes; i++)
165 {
166 RAND_pseudo_bytes(&c, 1);
167 if (c >= 128 && i > 0)
168 buf[i] = buf[i-1];
169 else if (c < 42)
170 buf[i] = 0;
171 else if (c < 84)
172 buf[i] = 255;
173 }
174 }
175#endif
176
177 if (top != -1)
178 {
179 if (top)
180 {
181 if (bit == 0)
182 {
183 buf[0]=1;
184 buf[1]|=0x80;
185 }
186 else
187 {
188 buf[0]|=(3<<(bit-1));
189 }
190 }
191 else
192 {
193 buf[0]|=(1<<bit);
194 }
195 }
196 buf[0] &= ~mask;
197 if (bottom) /* set bottom bit if requested */
198 buf[bytes-1]|=1;
199 if (!BN_bin2bn(buf,bytes,rnd)) goto err;
200 ret=1;
201err:
202 if (buf != NULL)
203 {
204 OPENSSL_cleanse(buf,bytes);
205 OPENSSL_free(buf);
206 }
207 bn_check_top(rnd);
208 return(ret);
209 }
210
211int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
212 {
213 return bnrand(0, rnd, bits, top, bottom);
214 }
215
216int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
217 {
218 return bnrand(1, rnd, bits, top, bottom);
219 }
220
221#if 1
222int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
223 {
224 return bnrand(2, rnd, bits, top, bottom);
225 }
226#endif
227
228
229/* random number r: 0 <= r < range */
230static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
231 {
232 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
233 int n;
234 int count = 100;
235
236 if (range->neg || BN_is_zero(range))
237 {
238 BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
239 return 0;
240 }
241
242 n = BN_num_bits(range); /* n > 0 */
243
244 /* BN_is_bit_set(range, n - 1) always holds */
245
246 if (n == 1)
247 BN_zero(r);
248 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
249 {
250 /* range = 100..._2,
251 * so 3*range (= 11..._2) is exactly one bit longer than range */
252 do
253 {
254 if (!bn_rand(r, n + 1, -1, 0)) return 0;
255 /* If r < 3*range, use r := r MOD range
256 * (which is either r, r - range, or r - 2*range).
257 * Otherwise, iterate once more.
258 * Since 3*range = 11..._2, each iteration succeeds with
259 * probability >= .75. */
260 if (BN_cmp(r ,range) >= 0)
261 {
262 if (!BN_sub(r, r, range)) return 0;
263 if (BN_cmp(r, range) >= 0)
264 if (!BN_sub(r, r, range)) return 0;
265 }
266
267 if (!--count)
268 {
269 BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
270 return 0;
271 }
272
273 }
274 while (BN_cmp(r, range) >= 0);
275 }
276 else
277 {
278 do
279 {
280 /* range = 11..._2 or range = 101..._2 */
281 if (!bn_rand(r, n, -1, 0)) return 0;
282
283 if (!--count)
284 {
285 BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
286 return 0;
287 }
288 }
289 while (BN_cmp(r, range) >= 0);
290 }
291
292 bn_check_top(r);
293 return 1;
294 }
295
296
297int BN_rand_range(BIGNUM *r, const BIGNUM *range)
298 {
299 return bn_rand_range(0, r, range);
300 }
301
302int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
303 {
304 return bn_rand_range(1, r, range);
305 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
deleted file mode 100644
index 2e8efb8dae..0000000000
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ /dev/null
@@ -1,234 +0,0 @@
1/* crypto/bn/bn_recp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63void BN_RECP_CTX_init(BN_RECP_CTX *recp)
64 {
65 BN_init(&(recp->N));
66 BN_init(&(recp->Nr));
67 recp->num_bits=0;
68 recp->flags=0;
69 }
70
71BN_RECP_CTX *BN_RECP_CTX_new(void)
72 {
73 BN_RECP_CTX *ret;
74
75 if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL);
77
78 BN_RECP_CTX_init(ret);
79 ret->flags=BN_FLG_MALLOCED;
80 return(ret);
81 }
82
83void BN_RECP_CTX_free(BN_RECP_CTX *recp)
84 {
85 if(recp == NULL)
86 return;
87
88 BN_free(&(recp->N));
89 BN_free(&(recp->Nr));
90 if (recp->flags & BN_FLG_MALLOCED)
91 OPENSSL_free(recp);
92 }
93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
95 {
96 if (!BN_copy(&(recp->N),d)) return 0;
97 BN_zero(&(recp->Nr));
98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0;
100 return(1);
101 }
102
103int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
104 BN_RECP_CTX *recp, BN_CTX *ctx)
105 {
106 int ret=0;
107 BIGNUM *a;
108 const BIGNUM *ca;
109
110 BN_CTX_start(ctx);
111 if ((a = BN_CTX_get(ctx)) == NULL) goto err;
112 if (y != NULL)
113 {
114 if (x == y)
115 { if (!BN_sqr(a,x,ctx)) goto err; }
116 else
117 { if (!BN_mul(a,x,y,ctx)) goto err; }
118 ca = a;
119 }
120 else
121 ca=x; /* Just do the mod */
122
123 ret = BN_div_recp(NULL,r,ca,recp,ctx);
124err:
125 BN_CTX_end(ctx);
126 bn_check_top(r);
127 return(ret);
128 }
129
130int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
131 BN_RECP_CTX *recp, BN_CTX *ctx)
132 {
133 int i,j,ret=0;
134 BIGNUM *a,*b,*d,*r;
135
136 BN_CTX_start(ctx);
137 a=BN_CTX_get(ctx);
138 b=BN_CTX_get(ctx);
139 if (dv != NULL)
140 d=dv;
141 else
142 d=BN_CTX_get(ctx);
143 if (rem != NULL)
144 r=rem;
145 else
146 r=BN_CTX_get(ctx);
147 if (a == NULL || b == NULL || d == NULL || r == NULL) goto err;
148
149 if (BN_ucmp(m,&(recp->N)) < 0)
150 {
151 BN_zero(d);
152 if (!BN_copy(r,m)) return 0;
153 BN_CTX_end(ctx);
154 return(1);
155 }
156
157 /* We want the remainder
158 * Given input of ABCDEF / ab
159 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
160 *
161 */
162
163 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
164 i=BN_num_bits(m);
165 j=recp->num_bits<<1;
166 if (j>i) i=j;
167
168 /* Nr := round(2^i / N) */
169 if (i != recp->shift)
170 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
171 i,ctx); /* BN_reciprocal returns i, or -1 for an error */
172 if (recp->shift == -1) goto err;
173
174 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
175 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
176 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
177 * = |m/N|
178 */
179 if (!BN_rshift(a,m,recp->num_bits)) goto err;
180 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
181 if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
182 d->neg=0;
183
184 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
185 if (!BN_usub(r,m,b)) goto err;
186 r->neg=0;
187
188#if 1
189 j=0;
190 while (BN_ucmp(r,&(recp->N)) >= 0)
191 {
192 if (j++ > 2)
193 {
194 BNerr(BN_F_BN_DIV_RECP,BN_R_BAD_RECIPROCAL);
195 goto err;
196 }
197 if (!BN_usub(r,r,&(recp->N))) goto err;
198 if (!BN_add_word(d,1)) goto err;
199 }
200#endif
201
202 r->neg=BN_is_zero(r)?0:m->neg;
203 d->neg=m->neg^recp->N.neg;
204 ret=1;
205err:
206 BN_CTX_end(ctx);
207 bn_check_top(dv);
208 bn_check_top(rem);
209 return(ret);
210 }
211
212/* len is the expected size of the result
213 * We actually calculate with an extra word of precision, so
214 * we can do faster division if the remainder is not required.
215 */
216/* r := 2^len / m */
217int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
218 {
219 int ret= -1;
220 BIGNUM *t;
221
222 BN_CTX_start(ctx);
223 if((t = BN_CTX_get(ctx)) == NULL) goto err;
224
225 if (!BN_set_bit(t,len)) goto err;
226
227 if (!BN_div(r,NULL,t,m,ctx)) goto err;
228
229 ret=len;
230err:
231 bn_check_top(r);
232 BN_CTX_end(ctx);
233 return(ret);
234 }
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
deleted file mode 100644
index c4d301afc4..0000000000
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ /dev/null
@@ -1,220 +0,0 @@
1/* crypto/bn/bn_shift.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_lshift1(BIGNUM *r, const BIGNUM *a)
64 {
65 register BN_ULONG *ap,*rp,t,c;
66 int i;
67
68 bn_check_top(r);
69 bn_check_top(a);
70
71 if (r != a)
72 {
73 r->neg=a->neg;
74 if (bn_wexpand(r,a->top+1) == NULL) return(0);
75 r->top=a->top;
76 }
77 else
78 {
79 if (bn_wexpand(r,a->top+1) == NULL) return(0);
80 }
81 ap=a->d;
82 rp=r->d;
83 c=0;
84 for (i=0; i<a->top; i++)
85 {
86 t= *(ap++);
87 *(rp++)=((t<<1)|c)&BN_MASK2;
88 c=(t & BN_TBIT)?1:0;
89 }
90 if (c)
91 {
92 *rp=1;
93 r->top++;
94 }
95 bn_check_top(r);
96 return(1);
97 }
98
99int BN_rshift1(BIGNUM *r, const BIGNUM *a)
100 {
101 BN_ULONG *ap,*rp,t,c;
102 int i;
103
104 bn_check_top(r);
105 bn_check_top(a);
106
107 if (BN_is_zero(a))
108 {
109 BN_zero(r);
110 return(1);
111 }
112 if (a != r)
113 {
114 if (bn_wexpand(r,a->top) == NULL) return(0);
115 r->top=a->top;
116 r->neg=a->neg;
117 }
118 ap=a->d;
119 rp=r->d;
120 c=0;
121 for (i=a->top-1; i>=0; i--)
122 {
123 t=ap[i];
124 rp[i]=((t>>1)&BN_MASK2)|c;
125 c=(t&1)?BN_TBIT:0;
126 }
127 bn_correct_top(r);
128 bn_check_top(r);
129 return(1);
130 }
131
132int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
133 {
134 int i,nw,lb,rb;
135 BN_ULONG *t,*f;
136 BN_ULONG l;
137
138 bn_check_top(r);
139 bn_check_top(a);
140
141 r->neg=a->neg;
142 nw=n/BN_BITS2;
143 if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
144 lb=n%BN_BITS2;
145 rb=BN_BITS2-lb;
146 f=a->d;
147 t=r->d;
148 t[a->top+nw]=0;
149 if (lb == 0)
150 for (i=a->top-1; i>=0; i--)
151 t[nw+i]=f[i];
152 else
153 for (i=a->top-1; i>=0; i--)
154 {
155 l=f[i];
156 t[nw+i+1]|=(l>>rb)&BN_MASK2;
157 t[nw+i]=(l<<lb)&BN_MASK2;
158 }
159 memset(t,0,nw*sizeof(t[0]));
160/* for (i=0; i<nw; i++)
161 t[i]=0;*/
162 r->top=a->top+nw+1;
163 bn_correct_top(r);
164 bn_check_top(r);
165 return(1);
166 }
167
168int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
169 {
170 int i,j,nw,lb,rb;
171 BN_ULONG *t,*f;
172 BN_ULONG l,tmp;
173
174 bn_check_top(r);
175 bn_check_top(a);
176
177 nw=n/BN_BITS2;
178 rb=n%BN_BITS2;
179 lb=BN_BITS2-rb;
180 if (nw >= a->top || a->top == 0)
181 {
182 BN_zero(r);
183 return(1);
184 }
185 if (r != a)
186 {
187 r->neg=a->neg;
188 if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
189 }
190 else
191 {
192 if (n == 0)
193 return 1; /* or the copying loop will go berserk */
194 }
195
196 f= &(a->d[nw]);
197 t=r->d;
198 j=a->top-nw;
199 r->top=j;
200
201 if (rb == 0)
202 {
203 for (i=j; i != 0; i--)
204 *(t++)= *(f++);
205 }
206 else
207 {
208 l= *(f++);
209 for (i=j-1; i != 0; i--)
210 {
211 tmp =(l>>rb)&BN_MASK2;
212 l= *(f++);
213 *(t++) =(tmp|(l<<lb))&BN_MASK2;
214 }
215 *(t++) =(l>>rb)&BN_MASK2;
216 }
217 bn_correct_top(r);
218 bn_check_top(r);
219 return(1);
220 }
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
deleted file mode 100644
index 270d0cd348..0000000000
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ /dev/null
@@ -1,294 +0,0 @@
1/* crypto/bn/bn_sqr.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
66 {
67 int max,al;
68 int ret = 0;
69 BIGNUM *tmp,*rr;
70
71#ifdef BN_COUNT
72 fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
73#endif
74 bn_check_top(a);
75
76 al=a->top;
77 if (al <= 0)
78 {
79 r->top=0;
80 return 1;
81 }
82
83 BN_CTX_start(ctx);
84 rr=(a != r) ? r : BN_CTX_get(ctx);
85 tmp=BN_CTX_get(ctx);
86 if (!rr || !tmp) goto err;
87
88 max = 2 * al; /* Non-zero (from above) */
89 if (bn_wexpand(rr,max) == NULL) goto err;
90
91 if (al == 4)
92 {
93#ifndef BN_SQR_COMBA
94 BN_ULONG t[8];
95 bn_sqr_normal(rr->d,a->d,4,t);
96#else
97 bn_sqr_comba4(rr->d,a->d);
98#endif
99 }
100 else if (al == 8)
101 {
102#ifndef BN_SQR_COMBA
103 BN_ULONG t[16];
104 bn_sqr_normal(rr->d,a->d,8,t);
105#else
106 bn_sqr_comba8(rr->d,a->d);
107#endif
108 }
109 else
110 {
111#if defined(BN_RECURSION)
112 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
113 {
114 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
115 bn_sqr_normal(rr->d,a->d,al,t);
116 }
117 else
118 {
119 int j,k;
120
121 j=BN_num_bits_word((BN_ULONG)al);
122 j=1<<(j-1);
123 k=j+j;
124 if (al == j)
125 {
126 if (bn_wexpand(tmp,k*2) == NULL) goto err;
127 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
128 }
129 else
130 {
131 if (bn_wexpand(tmp,max) == NULL) goto err;
132 bn_sqr_normal(rr->d,a->d,al,tmp->d);
133 }
134 }
135#else
136 if (bn_wexpand(tmp,max) == NULL) goto err;
137 bn_sqr_normal(rr->d,a->d,al,tmp->d);
138#endif
139 }
140
141 rr->neg=0;
142 /* If the most-significant half of the top word of 'a' is zero, then
143 * the square of 'a' will max-1 words. */
144 if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
145 rr->top = max - 1;
146 else
147 rr->top = max;
148 if (rr != r) BN_copy(r,rr);
149 ret = 1;
150 err:
151 bn_check_top(rr);
152 bn_check_top(tmp);
153 BN_CTX_end(ctx);
154 return(ret);
155 }
156
157/* tmp must have 2*n words */
158void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
159 {
160 int i,j,max;
161 const BN_ULONG *ap;
162 BN_ULONG *rp;
163
164 max=n*2;
165 ap=a;
166 rp=r;
167 rp[0]=rp[max-1]=0;
168 rp++;
169 j=n;
170
171 if (--j > 0)
172 {
173 ap++;
174 rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
175 rp+=2;
176 }
177
178 for (i=n-2; i>0; i--)
179 {
180 j--;
181 ap++;
182 rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
183 rp+=2;
184 }
185
186 bn_add_words(r,r,r,max);
187
188 /* There will not be a carry */
189
190 bn_sqr_words(tmp,a,n);
191
192 bn_add_words(r,r,tmp,max);
193 }
194
195#ifdef BN_RECURSION
196/* r is 2*n words in size,
197 * a and b are both n words in size. (There's not actually a 'b' here ...)
198 * n must be a power of 2.
199 * We multiply and return the result.
200 * t must be 2*n words in size
201 * We calculate
202 * a[0]*b[0]
203 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
204 * a[1]*b[1]
205 */
206void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
207 {
208 int n=n2/2;
209 int zero,c1;
210 BN_ULONG ln,lo,*p;
211
212#ifdef BN_COUNT
213 fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
214#endif
215 if (n2 == 4)
216 {
217#ifndef BN_SQR_COMBA
218 bn_sqr_normal(r,a,4,t);
219#else
220 bn_sqr_comba4(r,a);
221#endif
222 return;
223 }
224 else if (n2 == 8)
225 {
226#ifndef BN_SQR_COMBA
227 bn_sqr_normal(r,a,8,t);
228#else
229 bn_sqr_comba8(r,a);
230#endif
231 return;
232 }
233 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
234 {
235 bn_sqr_normal(r,a,n2,t);
236 return;
237 }
238 /* r=(a[0]-a[1])*(a[1]-a[0]) */
239 c1=bn_cmp_words(a,&(a[n]),n);
240 zero=0;
241 if (c1 > 0)
242 bn_sub_words(t,a,&(a[n]),n);
243 else if (c1 < 0)
244 bn_sub_words(t,&(a[n]),a,n);
245 else
246 zero=1;
247
248 /* The result will always be negative unless it is zero */
249 p= &(t[n2*2]);
250
251 if (!zero)
252 bn_sqr_recursive(&(t[n2]),t,n,p);
253 else
254 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
255 bn_sqr_recursive(r,a,n,p);
256 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
257
258 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
259 * r[10] holds (a[0]*b[0])
260 * r[32] holds (b[1]*b[1])
261 */
262
263 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
264
265 /* t[32] is negative */
266 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
267
268 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
269 * r[10] holds (a[0]*a[0])
270 * r[32] holds (a[1]*a[1])
271 * c1 holds the carry bits
272 */
273 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
274 if (c1)
275 {
276 p= &(r[n+n2]);
277 lo= *p;
278 ln=(lo+c1)&BN_MASK2;
279 *p=ln;
280
281 /* The overflow will stop before we over write
282 * words we should not overwrite */
283 if (ln < (BN_ULONG)c1)
284 {
285 do {
286 p++;
287 lo= *p;
288 ln=(lo+1)&BN_MASK2;
289 *p=ln;
290 } while (ln == 0);
291 }
292 }
293 }
294#endif
diff --git a/src/lib/libcrypto/bn/bn_sqrt.c b/src/lib/libcrypto/bn/bn_sqrt.c
deleted file mode 100644
index 6beaf9e5e5..0000000000
--- a/src/lib/libcrypto/bn/bn_sqrt.c
+++ /dev/null
@@ -1,393 +0,0 @@
1/* crypto/bn/bn_sqrt.c */
2/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * and Bodo Moeller for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#include "bn_lcl.h"
60
61
62BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
63/* Returns 'ret' such that
64 * ret^2 == a (mod p),
65 * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
66 * in Algebraic Computational Number Theory", algorithm 1.5.1).
67 * 'p' must be prime!
68 */
69 {
70 BIGNUM *ret = in;
71 int err = 1;
72 int r;
73 BIGNUM *A, *b, *q, *t, *x, *y;
74 int e, i, j;
75
76 if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
77 {
78 if (BN_abs_is_word(p, 2))
79 {
80 if (ret == NULL)
81 ret = BN_new();
82 if (ret == NULL)
83 goto end;
84 if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
85 {
86 if (ret != in)
87 BN_free(ret);
88 return NULL;
89 }
90 bn_check_top(ret);
91 return ret;
92 }
93
94 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
95 return(NULL);
96 }
97
98 if (BN_is_zero(a) || BN_is_one(a))
99 {
100 if (ret == NULL)
101 ret = BN_new();
102 if (ret == NULL)
103 goto end;
104 if (!BN_set_word(ret, BN_is_one(a)))
105 {
106 if (ret != in)
107 BN_free(ret);
108 return NULL;
109 }
110 bn_check_top(ret);
111 return ret;
112 }
113
114 BN_CTX_start(ctx);
115 A = BN_CTX_get(ctx);
116 b = BN_CTX_get(ctx);
117 q = BN_CTX_get(ctx);
118 t = BN_CTX_get(ctx);
119 x = BN_CTX_get(ctx);
120 y = BN_CTX_get(ctx);
121 if (y == NULL) goto end;
122
123 if (ret == NULL)
124 ret = BN_new();
125 if (ret == NULL) goto end;
126
127 /* A = a mod p */
128 if (!BN_nnmod(A, a, p, ctx)) goto end;
129
130 /* now write |p| - 1 as 2^e*q where q is odd */
131 e = 1;
132 while (!BN_is_bit_set(p, e))
133 e++;
134 /* we'll set q later (if needed) */
135
136 if (e == 1)
137 {
138 /* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
139 * modulo (|p|-1)/2, and square roots can be computed
140 * directly by modular exponentiation.
141 * We have
142 * 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
143 * so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
144 */
145 if (!BN_rshift(q, p, 2)) goto end;
146 q->neg = 0;
147 if (!BN_add_word(q, 1)) goto end;
148 if (!BN_mod_exp(ret, A, q, p, ctx)) goto end;
149 err = 0;
150 goto vrfy;
151 }
152
153 if (e == 2)
154 {
155 /* |p| == 5 (mod 8)
156 *
157 * In this case 2 is always a non-square since
158 * Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
159 * So if a really is a square, then 2*a is a non-square.
160 * Thus for
161 * b := (2*a)^((|p|-5)/8),
162 * i := (2*a)*b^2
163 * we have
164 * i^2 = (2*a)^((1 + (|p|-5)/4)*2)
165 * = (2*a)^((p-1)/2)
166 * = -1;
167 * so if we set
168 * x := a*b*(i-1),
169 * then
170 * x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
171 * = a^2 * b^2 * (-2*i)
172 * = a*(-i)*(2*a*b^2)
173 * = a*(-i)*i
174 * = a.
175 *
176 * (This is due to A.O.L. Atkin,
177 * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
178 * November 1992.)
179 */
180
181 /* t := 2*a */
182 if (!BN_mod_lshift1_quick(t, A, p)) goto end;
183
184 /* b := (2*a)^((|p|-5)/8) */
185 if (!BN_rshift(q, p, 3)) goto end;
186 q->neg = 0;
187 if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
188
189 /* y := b^2 */
190 if (!BN_mod_sqr(y, b, p, ctx)) goto end;
191
192 /* t := (2*a)*b^2 - 1*/
193 if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
194 if (!BN_sub_word(t, 1)) goto end;
195
196 /* x = a*b*t */
197 if (!BN_mod_mul(x, A, b, p, ctx)) goto end;
198 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
199
200 if (!BN_copy(ret, x)) goto end;
201 err = 0;
202 goto vrfy;
203 }
204
205 /* e > 2, so we really have to use the Tonelli/Shanks algorithm.
206 * First, find some y that is not a square. */
207 if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
208 q->neg = 0;
209 i = 2;
210 do
211 {
212 /* For efficiency, try small numbers first;
213 * if this fails, try random numbers.
214 */
215 if (i < 22)
216 {
217 if (!BN_set_word(y, i)) goto end;
218 }
219 else
220 {
221 if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
222 if (BN_ucmp(y, p) >= 0)
223 {
224 if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
225 }
226 /* now 0 <= y < |p| */
227 if (BN_is_zero(y))
228 if (!BN_set_word(y, i)) goto end;
229 }
230
231 r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
232 if (r < -1) goto end;
233 if (r == 0)
234 {
235 /* m divides p */
236 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
237 goto end;
238 }
239 }
240 while (r == 1 && ++i < 82);
241
242 if (r != -1)
243 {
244 /* Many rounds and still no non-square -- this is more likely
245 * a bug than just bad luck.
246 * Even if p is not prime, we should have found some y
247 * such that r == -1.
248 */
249 BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
250 goto end;
251 }
252
253 /* Here's our actual 'q': */
254 if (!BN_rshift(q, q, e)) goto end;
255
256 /* Now that we have some non-square, we can find an element
257 * of order 2^e by computing its q'th power. */
258 if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
259 if (BN_is_one(y))
260 {
261 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
262 goto end;
263 }
264
265 /* Now we know that (if p is indeed prime) there is an integer
266 * k, 0 <= k < 2^e, such that
267 *
268 * a^q * y^k == 1 (mod p).
269 *
270 * As a^q is a square and y is not, k must be even.
271 * q+1 is even, too, so there is an element
272 *
273 * X := a^((q+1)/2) * y^(k/2),
274 *
275 * and it satisfies
276 *
277 * X^2 = a^q * a * y^k
278 * = a,
279 *
280 * so it is the square root that we are looking for.
281 */
282
283 /* t := (q-1)/2 (note that q is odd) */
284 if (!BN_rshift1(t, q)) goto end;
285
286 /* x := a^((q-1)/2) */
287 if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
288 {
289 if (!BN_nnmod(t, A, p, ctx)) goto end;
290 if (BN_is_zero(t))
291 {
292 /* special case: a == 0 (mod p) */
293 BN_zero(ret);
294 err = 0;
295 goto end;
296 }
297 else
298 if (!BN_one(x)) goto end;
299 }
300 else
301 {
302 if (!BN_mod_exp(x, A, t, p, ctx)) goto end;
303 if (BN_is_zero(x))
304 {
305 /* special case: a == 0 (mod p) */
306 BN_zero(ret);
307 err = 0;
308 goto end;
309 }
310 }
311
312 /* b := a*x^2 (= a^q) */
313 if (!BN_mod_sqr(b, x, p, ctx)) goto end;
314 if (!BN_mod_mul(b, b, A, p, ctx)) goto end;
315
316 /* x := a*x (= a^((q+1)/2)) */
317 if (!BN_mod_mul(x, x, A, p, ctx)) goto end;
318
319 while (1)
320 {
321 /* Now b is a^q * y^k for some even k (0 <= k < 2^E
322 * where E refers to the original value of e, which we
323 * don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
324 *
325 * We have a*b = x^2,
326 * y^2^(e-1) = -1,
327 * b^2^(e-1) = 1.
328 */
329
330 if (BN_is_one(b))
331 {
332 if (!BN_copy(ret, x)) goto end;
333 err = 0;
334 goto vrfy;
335 }
336
337
338 /* find smallest i such that b^(2^i) = 1 */
339 i = 1;
340 if (!BN_mod_sqr(t, b, p, ctx)) goto end;
341 while (!BN_is_one(t))
342 {
343 i++;
344 if (i == e)
345 {
346 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
347 goto end;
348 }
349 if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
350 }
351
352
353 /* t := y^2^(e - i - 1) */
354 if (!BN_copy(t, y)) goto end;
355 for (j = e - i - 1; j > 0; j--)
356 {
357 if (!BN_mod_sqr(t, t, p, ctx)) goto end;
358 }
359 if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
360 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
361 if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
362 e = i;
363 }
364
365 vrfy:
366 if (!err)
367 {
368 /* verify the result -- the input might have been not a square
369 * (test added in 0.9.8) */
370
371 if (!BN_mod_sqr(x, ret, p, ctx))
372 err = 1;
373
374 if (!err && 0 != BN_cmp(x, A))
375 {
376 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
377 err = 1;
378 }
379 }
380
381 end:
382 if (err)
383 {
384 if (ret != NULL && ret != in)
385 {
386 BN_clear_free(ret);
387 }
388 ret = NULL;
389 }
390 BN_CTX_end(ctx);
391 bn_check_top(ret);
392 return ret;
393 }
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
deleted file mode 100644
index ee7b87c45c..0000000000
--- a/src/lib/libcrypto/bn/bn_word.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/* crypto/bn/bn_word.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
64 {
65#ifndef BN_LLONG
66 BN_ULONG ret=0;
67#else
68 BN_ULLONG ret=0;
69#endif
70 int i;
71
72 if (w == 0)
73 return (BN_ULONG)-1;
74
75 bn_check_top(a);
76 w&=BN_MASK2;
77 for (i=a->top-1; i>=0; i--)
78 {
79#ifndef BN_LLONG
80 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
81 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
82#else
83 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
84 (BN_ULLONG)w);
85#endif
86 }
87 return((BN_ULONG)ret);
88 }
89
90BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
91 {
92 BN_ULONG ret = 0;
93 int i, j;
94
95 bn_check_top(a);
96 w &= BN_MASK2;
97
98 if (!w)
99 /* actually this an error (division by zero) */
100 return (BN_ULONG)-1;
101 if (a->top == 0)
102 return 0;
103
104 /* normalize input (so bn_div_words doesn't complain) */
105 j = BN_BITS2 - BN_num_bits_word(w);
106 w <<= j;
107 if (!BN_lshift(a, a, j))
108 return (BN_ULONG)-1;
109
110 for (i=a->top-1; i>=0; i--)
111 {
112 BN_ULONG l,d;
113
114 l=a->d[i];
115 d=bn_div_words(ret,l,w);
116 ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
117 a->d[i]=d;
118 }
119 if ((a->top > 0) && (a->d[a->top-1] == 0))
120 a->top--;
121 ret >>= j;
122 bn_check_top(a);
123 return(ret);
124 }
125
126int BN_add_word(BIGNUM *a, BN_ULONG w)
127 {
128 BN_ULONG l;
129 int i;
130
131 bn_check_top(a);
132 w &= BN_MASK2;
133
134 /* degenerate case: w is zero */
135 if (!w) return 1;
136 /* degenerate case: a is zero */
137 if(BN_is_zero(a)) return BN_set_word(a, w);
138 /* handle 'a' when negative */
139 if (a->neg)
140 {
141 a->neg=0;
142 i=BN_sub_word(a,w);
143 if (!BN_is_zero(a))
144 a->neg=!(a->neg);
145 return(i);
146 }
147 /* Only expand (and risk failing) if it's possibly necessary */
148 if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) &&
149 (bn_wexpand(a,a->top+1) == NULL))
150 return(0);
151 i=0;
152 for (;;)
153 {
154 if (i >= a->top)
155 l=w;
156 else
157 l=(a->d[i]+w)&BN_MASK2;
158 a->d[i]=l;
159 if (w > l)
160 w=1;
161 else
162 break;
163 i++;
164 }
165 if (i >= a->top)
166 a->top++;
167 bn_check_top(a);
168 return(1);
169 }
170
171int BN_sub_word(BIGNUM *a, BN_ULONG w)
172 {
173 int i;
174
175 bn_check_top(a);
176 w &= BN_MASK2;
177
178 /* degenerate case: w is zero */
179 if (!w) return 1;
180 /* degenerate case: a is zero */
181 if(BN_is_zero(a))
182 {
183 i = BN_set_word(a,w);
184 if (i != 0)
185 BN_set_negative(a, 1);
186 return i;
187 }
188 /* handle 'a' when negative */
189 if (a->neg)
190 {
191 a->neg=0;
192 i=BN_add_word(a,w);
193 a->neg=1;
194 return(i);
195 }
196
197 if ((a->top == 1) && (a->d[0] < w))
198 {
199 a->d[0]=w-a->d[0];
200 a->neg=1;
201 return(1);
202 }
203 i=0;
204 for (;;)
205 {
206 if (a->d[i] >= w)
207 {
208 a->d[i]-=w;
209 break;
210 }
211 else
212 {
213 a->d[i]=(a->d[i]-w)&BN_MASK2;
214 i++;
215 w=1;
216 }
217 }
218 if ((a->d[i] == 0) && (i == (a->top-1)))
219 a->top--;
220 bn_check_top(a);
221 return(1);
222 }
223
224int BN_mul_word(BIGNUM *a, BN_ULONG w)
225 {
226 BN_ULONG ll;
227
228 bn_check_top(a);
229 w&=BN_MASK2;
230 if (a->top)
231 {
232 if (w == 0)
233 BN_zero(a);
234 else
235 {
236 ll=bn_mul_words(a->d,a->d,a->top,w);
237 if (ll)
238 {
239 if (bn_wexpand(a,a->top+1) == NULL) return(0);
240 a->d[a->top++]=ll;
241 }
242 }
243 }
244 bn_check_top(a);
245 return(1);
246 }
247
diff --git a/src/lib/libcrypto/bn/bn_x931p.c b/src/lib/libcrypto/bn/bn_x931p.c
deleted file mode 100644
index 04c5c874ec..0000000000
--- a/src/lib/libcrypto/bn/bn_x931p.c
+++ /dev/null
@@ -1,272 +0,0 @@
1/* bn_x931p.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project 2005.
4 */
5/* ====================================================================
6 * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61
62/* X9.31 routines for prime derivation */
63
64/* X9.31 prime derivation. This is used to generate the primes pi
65 * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd
66 * integers.
67 */
68
69static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
70 BN_GENCB *cb)
71 {
72 int i = 0;
73 if (!BN_copy(pi, Xpi))
74 return 0;
75 if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
76 return 0;
77 for(;;)
78 {
79 i++;
80 BN_GENCB_call(cb, 0, i);
81 /* NB 27 MR is specificed in X9.31 */
82 if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb))
83 break;
84 if (!BN_add_word(pi, 2))
85 return 0;
86 }
87 BN_GENCB_call(cb, 2, i);
88 return 1;
89 }
90
91/* This is the main X9.31 prime derivation function. From parameters
92 * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are
93 * not NULL they will be returned too: this is needed for testing.
94 */
95
96int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
97 const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
98 const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
99 {
100 int ret = 0;
101
102 BIGNUM *t, *p1p2, *pm1;
103
104 /* Only even e supported */
105 if (!BN_is_odd(e))
106 return 0;
107
108 BN_CTX_start(ctx);
109 if (!p1)
110 p1 = BN_CTX_get(ctx);
111
112 if (!p2)
113 p2 = BN_CTX_get(ctx);
114
115 t = BN_CTX_get(ctx);
116
117 p1p2 = BN_CTX_get(ctx);
118
119 pm1 = BN_CTX_get(ctx);
120
121 if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
122 goto err;
123
124 if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
125 goto err;
126
127 if (!BN_mul(p1p2, p1, p2, ctx))
128 goto err;
129
130 /* First set p to value of Rp */
131
132 if (!BN_mod_inverse(p, p2, p1, ctx))
133 goto err;
134
135 if (!BN_mul(p, p, p2, ctx))
136 goto err;
137
138 if (!BN_mod_inverse(t, p1, p2, ctx))
139 goto err;
140
141 if (!BN_mul(t, t, p1, ctx))
142 goto err;
143
144 if (!BN_sub(p, p, t))
145 goto err;
146
147 if (p->neg && !BN_add(p, p, p1p2))
148 goto err;
149
150 /* p now equals Rp */
151
152 if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
153 goto err;
154
155 if (!BN_add(p, p, Xp))
156 goto err;
157
158 /* p now equals Yp0 */
159
160 for (;;)
161 {
162 int i = 1;
163 BN_GENCB_call(cb, 0, i++);
164 if (!BN_copy(pm1, p))
165 goto err;
166 if (!BN_sub_word(pm1, 1))
167 goto err;
168 if (!BN_gcd(t, pm1, e, ctx))
169 goto err;
170 if (BN_is_one(t)
171 /* X9.31 specifies 8 MR and 1 Lucas test or any prime test
172 * offering similar or better guarantees 50 MR is considerably
173 * better.
174 */
175 && BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb))
176 break;
177 if (!BN_add(p, p, p1p2))
178 goto err;
179 }
180
181 BN_GENCB_call(cb, 3, 0);
182
183 ret = 1;
184
185 err:
186
187 BN_CTX_end(ctx);
188
189 return ret;
190 }
191
192/* Generate pair of paramters Xp, Xq for X9.31 prime generation.
193 * Note: nbits paramter is sum of number of bits in both.
194 */
195
196int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
197 {
198 BIGNUM *t;
199 int i;
200 /* Number of bits for each prime is of the form
201 * 512+128s for s = 0, 1, ...
202 */
203 if ((nbits < 1024) || (nbits & 0xff))
204 return 0;
205 nbits >>= 1;
206 /* The random value Xp must be between sqrt(2) * 2^(nbits-1) and
207 * 2^nbits - 1. By setting the top two bits we ensure that the lower
208 * bound is exceeded.
209 */
210 if (!BN_rand(Xp, nbits, 1, 0))
211 return 0;
212
213 BN_CTX_start(ctx);
214 t = BN_CTX_get(ctx);
215
216 for (i = 0; i < 1000; i++)
217 {
218 if (!BN_rand(Xq, nbits, 1, 0))
219 return 0;
220 /* Check that |Xp - Xq| > 2^(nbits - 100) */
221 BN_sub(t, Xp, Xq);
222 if (BN_num_bits(t) > (nbits - 100))
223 break;
224 }
225
226 BN_CTX_end(ctx);
227
228 if (i < 1000)
229 return 1;
230
231 return 0;
232
233 }
234
235/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1
236 * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL
237 * the relevant parameter will be stored in it.
238 *
239 * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq
240 * are generated using the previous function and supplied as input.
241 */
242
243int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
244 BIGNUM *Xp1, BIGNUM *Xp2,
245 const BIGNUM *Xp,
246 const BIGNUM *e, BN_CTX *ctx,
247 BN_GENCB *cb)
248 {
249 int ret = 0;
250
251 BN_CTX_start(ctx);
252 if (!Xp1)
253 Xp1 = BN_CTX_get(ctx);
254 if (!Xp2)
255 Xp2 = BN_CTX_get(ctx);
256
257 if (!BN_rand(Xp1, 101, 0, 0))
258 goto error;
259 if (!BN_rand(Xp2, 101, 0, 0))
260 goto error;
261 if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
262 goto error;
263
264 ret = 1;
265
266 error:
267 BN_CTX_end(ctx);
268
269 return ret;
270
271 }
272