diff options
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r-- | src/lib/libcrypto/bn/Makefile.ssl | 33 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/ia64.S | 235 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/pa-risc2.s | 36 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/vms.mar | 254 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn.h | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_div.c | 28 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_lcl.h | 26 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_lib.c | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 529 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_prime.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_rand.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_word.c | 5 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bntest.c | 23 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/divtest.c | 6 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/exptest.c | 22 |
15 files changed, 500 insertions, 707 deletions
diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl index 6a479726c4..fa17d3c7d8 100644 --- a/src/lib/libcrypto/bn/Makefile.ssl +++ b/src/lib/libcrypto/bn/Makefile.ssl | |||
@@ -23,14 +23,6 @@ BN_ASM= bn_asm.o | |||
23 | 23 | ||
24 | CFLAGS= $(INCLUDES) $(CFLAG) | 24 | CFLAGS= $(INCLUDES) $(CFLAG) |
25 | 25 | ||
26 | # We let the C compiler driver to take care of .s files. This is done in | ||
27 | # order to be excused from maintaining a separate set of architecture | ||
28 | # dependent assembler flags. E.g. if you throw -mcpu=ultrasparc at SPARC | ||
29 | # gcc, then the driver will automatically translate it to -xarch=v8plus | ||
30 | # and pass it down to assembler. | ||
31 | AS=$(CC) -c | ||
32 | ASFLAGS=$(CFLAGS) | ||
33 | |||
34 | GENERAL=Makefile | 26 | GENERAL=Makefile |
35 | TEST=bntest.c exptest.c | 27 | TEST=bntest.c exptest.c |
36 | APPS= | 28 | APPS= |
@@ -73,22 +65,11 @@ lib: $(LIBOBJ) | |||
73 | @touch lib | 65 | @touch lib |
74 | 66 | ||
75 | # elf | 67 | # elf |
76 | asm/bn86-elf.o: asm/bn86unix.cpp | 68 | asm/bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl |
77 | $(CPP) -DELF -x c asm/bn86unix.cpp | as -o asm/bn86-elf.o | 69 | (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > bn86-elf.s) |
78 | |||
79 | asm/co86-elf.o: asm/co86unix.cpp | ||
80 | $(CPP) -DELF -x c asm/co86unix.cpp | as -o asm/co86-elf.o | ||
81 | 70 | ||
82 | # solaris | 71 | asm/co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl |
83 | asm/bn86-sol.o: asm/bn86unix.cpp | 72 | (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > co86-elf.s) |
84 | $(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s | ||
85 | as -o asm/bn86-sol.o asm/bn86-sol.s | ||
86 | rm -f asm/bn86-sol.s | ||
87 | |||
88 | asm/co86-sol.o: asm/co86unix.cpp | ||
89 | $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s | ||
90 | as -o asm/co86-sol.o asm/co86-sol.s | ||
91 | rm -f asm/co86-sol.s | ||
92 | 73 | ||
93 | # a.out | 74 | # a.out |
94 | asm/bn86-out.o: asm/bn86unix.cpp | 75 | asm/bn86-out.o: asm/bn86unix.cpp |
@@ -136,6 +117,8 @@ asm/ia64-cpp.o: asm/ia64.S | |||
136 | $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \ | 117 | $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \ |
137 | rm -f /tmp/ia64.$$$$.s | 118 | rm -f /tmp/ia64.$$$$.s |
138 | 119 | ||
120 | asm/x86_64-gcc.o: asm/x86_64-gcc.c | ||
121 | |||
139 | files: | 122 | files: |
140 | $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO | 123 | $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO |
141 | 124 | ||
@@ -169,14 +152,14 @@ lint: | |||
169 | lint -DLINT $(INCLUDES) $(SRC)>fluff | 152 | lint -DLINT $(INCLUDES) $(SRC)>fluff |
170 | 153 | ||
171 | depend: | 154 | depend: |
172 | $(MAKEDEPEND) $(CFLAG) $(INCLUDES) $(DEPFLAG) $(PROGS) $(LIBSRC) | 155 | $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) |
173 | 156 | ||
174 | dclean: | 157 | dclean: |
175 | $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new | 158 | $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new |
176 | mv -f Makefile.new $(MAKEFILE) | 159 | mv -f Makefile.new $(MAKEFILE) |
177 | 160 | ||
178 | clean: | 161 | clean: |
179 | rm -f asm/co86unix.cpp asm/bn86unix.cpp *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s | 162 | rm -f asm/co86unix.cpp asm/bn86unix.cpp asm/*-elf.* *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s |
180 | 163 | ||
181 | # DO NOT DELETE THIS LINE -- make depend depends on it. | 164 | # DO NOT DELETE THIS LINE -- make depend depends on it. |
182 | 165 | ||
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S index ae56066310..7dfda85566 100644 --- a/src/lib/libcrypto/bn/asm/ia64.S +++ b/src/lib/libcrypto/bn/asm/ia64.S | |||
@@ -1,6 +1,6 @@ | |||
1 | .explicit | 1 | .explicit |
2 | .text | 2 | .text |
3 | .ident "ia64.S, Version 1.1" | 3 | .ident "ia64.S, Version 2.0" |
4 | .ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | 4 | .ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" |
5 | 5 | ||
6 | // | 6 | // |
@@ -13,6 +13,35 @@ | |||
13 | // disclaimed. | 13 | // disclaimed. |
14 | // ==================================================================== | 14 | // ==================================================================== |
15 | // | 15 | // |
16 | // Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is | ||
17 | // different from Itanium to this module viewpoint. Most notably, is it | ||
18 | // "wider" than Itanium? Can you experience loop scalability as | ||
19 | // discussed in commentary sections? Not really:-( Itanium2 has 6 | ||
20 | // integer ALU ports, i.e. it's 2 ports wider, but it's not enough to | ||
21 | // spin twice as fast, as I need 8 IALU ports. Amount of floating point | ||
22 | // ports is the same, i.e. 2, while I need 4. In other words, to this | ||
23 | // module Itanium2 remains effectively as "wide" as Itanium. Yet it's | ||
24 | // essentially different in respect to this module, and a re-tune was | ||
25 | // required. Well, because some intruction latencies has changed. Most | ||
26 | // noticeably those intensively used: | ||
27 | // | ||
28 | // Itanium Itanium2 | ||
29 | // ldf8 9 6 L2 hit | ||
30 | // ld8 2 1 L1 hit | ||
31 | // getf 2 5 | ||
32 | // xma[->getf] 7[+1] 4[+0] | ||
33 | // add[->st8] 1[+1] 1[+0] | ||
34 | // | ||
35 | // What does it mean? You might ratiocinate that the original code | ||
36 | // should run just faster... Because sum of latencies is smaller... | ||
37 | // Wrong! Note that getf latency increased. This means that if a loop is | ||
38 | // scheduled for lower latency (and they are), then it will suffer from | ||
39 | // stall condition and the code will therefore turn anti-scalable, e.g. | ||
40 | // original bn_mul_words spun at 5*n or 2.5 times slower than expected | ||
41 | // on Itanium2! What to do? Reschedule loops for Itanium2? But then | ||
42 | // Itanium would exhibit anti-scalability. So I've chosen to reschedule | ||
43 | // for worst latency for every instruction aiming for best *all-round* | ||
44 | // performance. | ||
16 | 45 | ||
17 | // Q. How much faster does it get? | 46 | // Q. How much faster does it get? |
18 | // A. Here is the output from 'openssl speed rsa dsa' for vanilla | 47 | // A. Here is the output from 'openssl speed rsa dsa' for vanilla |
@@ -149,12 +178,27 @@ bn_add_words: | |||
149 | brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16 | 178 | brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16 |
150 | } | 179 | } |
151 | .body | 180 | .body |
152 | { .mib; mov r14=r32 // rp | 181 | { .mib; |
182 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
183 | addp4 r14=0,r32 // rp | ||
184 | #else | ||
185 | mov r14=r32 // rp | ||
186 | #endif | ||
153 | mov r9=pr };; | 187 | mov r9=pr };; |
154 | { .mii; mov r15=r33 // ap | 188 | { .mii; |
189 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
190 | addp4 r15=0,r33 // ap | ||
191 | #else | ||
192 | mov r15=r33 // ap | ||
193 | #endif | ||
155 | mov ar.lc=r10 | 194 | mov ar.lc=r10 |
156 | mov ar.ec=6 } | 195 | mov ar.ec=6 } |
157 | { .mib; mov r16=r34 // bp | 196 | { .mib; |
197 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
198 | addp4 r16=0,r34 // bp | ||
199 | #else | ||
200 | mov r16=r34 // bp | ||
201 | #endif | ||
158 | mov pr.rot=1<<16 };; | 202 | mov pr.rot=1<<16 };; |
159 | 203 | ||
160 | .L_bn_add_words_ctop: | 204 | .L_bn_add_words_ctop: |
@@ -174,7 +218,7 @@ bn_add_words: | |||
174 | 218 | ||
175 | { .mii; | 219 | { .mii; |
176 | (p59) add r8=1,r8 // return value | 220 | (p59) add r8=1,r8 // return value |
177 | mov pr=r9,-1 | 221 | mov pr=r9,0x1ffff |
178 | mov ar.lc=r3 } | 222 | mov ar.lc=r3 } |
179 | { .mbb; nop.b 0x0 | 223 | { .mbb; nop.b 0x0 |
180 | br.ret.sptk.many b0 };; | 224 | br.ret.sptk.many b0 };; |
@@ -202,12 +246,27 @@ bn_sub_words: | |||
202 | brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16 | 246 | brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16 |
203 | } | 247 | } |
204 | .body | 248 | .body |
205 | { .mib; mov r14=r32 // rp | 249 | { .mib; |
250 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
251 | addp4 r14=0,r32 // rp | ||
252 | #else | ||
253 | mov r14=r32 // rp | ||
254 | #endif | ||
206 | mov r9=pr };; | 255 | mov r9=pr };; |
207 | { .mii; mov r15=r33 // ap | 256 | { .mii; |
257 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
258 | addp4 r15=0,r33 // ap | ||
259 | #else | ||
260 | mov r15=r33 // ap | ||
261 | #endif | ||
208 | mov ar.lc=r10 | 262 | mov ar.lc=r10 |
209 | mov ar.ec=6 } | 263 | mov ar.ec=6 } |
210 | { .mib; mov r16=r34 // bp | 264 | { .mib; |
265 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
266 | addp4 r16=0,r34 // bp | ||
267 | #else | ||
268 | mov r16=r34 // bp | ||
269 | #endif | ||
211 | mov pr.rot=1<<16 };; | 270 | mov pr.rot=1<<16 };; |
212 | 271 | ||
213 | .L_bn_sub_words_ctop: | 272 | .L_bn_sub_words_ctop: |
@@ -227,7 +286,7 @@ bn_sub_words: | |||
227 | 286 | ||
228 | { .mii; | 287 | { .mii; |
229 | (p59) add r8=1,r8 // return value | 288 | (p59) add r8=1,r8 // return value |
230 | mov pr=r9,-1 | 289 | mov pr=r9,0x1ffff |
231 | mov ar.lc=r3 } | 290 | mov ar.lc=r3 } |
232 | { .mbb; nop.b 0x0 | 291 | { .mbb; nop.b 0x0 |
233 | br.ret.sptk.many b0 };; | 292 | br.ret.sptk.many b0 };; |
@@ -253,7 +312,7 @@ bn_mul_words: | |||
253 | #ifdef XMA_TEMPTATION | 312 | #ifdef XMA_TEMPTATION |
254 | { .mfi; alloc r2=ar.pfs,4,0,0,0 };; | 313 | { .mfi; alloc r2=ar.pfs,4,0,0,0 };; |
255 | #else | 314 | #else |
256 | { .mfi; alloc r2=ar.pfs,4,4,0,8 };; | 315 | { .mfi; alloc r2=ar.pfs,4,12,0,16 };; |
257 | #endif | 316 | #endif |
258 | { .mib; mov r8=r0 // return value | 317 | { .mib; mov r8=r0 // return value |
259 | cmp4.le p6,p0=r34,r0 | 318 | cmp4.le p6,p0=r34,r0 |
@@ -266,24 +325,30 @@ bn_mul_words: | |||
266 | 325 | ||
267 | .body | 326 | .body |
268 | { .mib; setf.sig f8=r35 // w | 327 | { .mib; setf.sig f8=r35 // w |
269 | mov pr.rot=0x400001<<16 | 328 | mov pr.rot=0x800001<<16 |
270 | // ------^----- serves as (p48) at first (p26) | 329 | // ------^----- serves as (p50) at first (p27) |
271 | brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16 | 330 | brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16 |
272 | } | 331 | } |
273 | 332 | ||
274 | #ifndef XMA_TEMPTATION | 333 | #ifndef XMA_TEMPTATION |
275 | 334 | ||
276 | { .mii; mov r14=r32 // rp | 335 | { .mii; |
277 | mov r15=r33 // ap | 336 | #if defined(_HPUX_SOURCE) && defined(_ILP32) |
337 | addp4 r14=0,r32 // rp | ||
338 | addp4 r15=0,r33 // ap | ||
339 | #else | ||
340 | mov r14=r32 // rp | ||
341 | mov r15=r33 // ap | ||
342 | #endif | ||
278 | mov ar.lc=r10 } | 343 | mov ar.lc=r10 } |
279 | { .mii; mov r39=0 // serves as r33 at first (p26) | 344 | { .mii; mov r40=0 // serves as r35 at first (p27) |
280 | mov ar.ec=12 };; | 345 | mov ar.ec=13 };; |
281 | 346 | ||
282 | // This loop spins in 2*(n+11) ticks. It's scheduled for data in L2 | 347 | // This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium |
283 | // cache (i.e. 9 ticks away) as floating point load/store instructions | 348 | // L2 cache (i.e. 9 ticks away) as floating point load/store instructions |
284 | // bypass L1 cache and L2 latency is actually best-case scenario for | 349 | // bypass L1 cache and L2 latency is actually best-case scenario for |
285 | // ldf8. The loop is not scalable and shall run in 2*(n+11) even on | 350 | // ldf8. The loop is not scalable and shall run in 2*(n+12) even on |
286 | // "wider" IA-64 implementations. It's a trade-off here. n+22 loop | 351 | // "wider" IA-64 implementations. It's a trade-off here. n+24 loop |
287 | // would give us ~5% in *overall* performance improvement on "wider" | 352 | // would give us ~5% in *overall* performance improvement on "wider" |
288 | // IA-64, but would hurt Itanium for about same because of longer | 353 | // IA-64, but would hurt Itanium for about same because of longer |
289 | // epilogue. As it's a matter of few percents in either case I've | 354 | // epilogue. As it's a matter of few percents in either case I've |
@@ -291,25 +356,25 @@ bn_mul_words: | |||
291 | // this very instruction sequence in bn_mul_add_words loop which in | 356 | // this very instruction sequence in bn_mul_add_words loop which in |
292 | // turn is scalable). | 357 | // turn is scalable). |
293 | .L_bn_mul_words_ctop: | 358 | .L_bn_mul_words_ctop: |
294 | { .mfi; (p25) getf.sig r36=f49 // low | 359 | { .mfi; (p25) getf.sig r36=f52 // low |
295 | (p21) xmpy.lu f45=f37,f8 | 360 | (p21) xmpy.lu f48=f37,f8 |
296 | (p27) cmp.ltu p52,p48=r39,r38 } | 361 | (p28) cmp.ltu p54,p50=r41,r39 } |
297 | { .mfi; (p16) ldf8 f32=[r15],8 | 362 | { .mfi; (p16) ldf8 f32=[r15],8 |
298 | (p21) xmpy.hu f38=f37,f8 | 363 | (p21) xmpy.hu f40=f37,f8 |
299 | (p0) nop.i 0x0 };; | 364 | (p0) nop.i 0x0 };; |
300 | { .mii; (p26) getf.sig r32=f43 // high | 365 | { .mii; (p25) getf.sig r32=f44 // high |
301 | .pred.rel "mutex",p48,p52 | 366 | .pred.rel "mutex",p50,p54 |
302 | (p48) add r38=r37,r33 // (p26) | 367 | (p50) add r40=r38,r35 // (p27) |
303 | (p52) add r38=r37,r33,1 } // (p26) | 368 | (p54) add r40=r38,r35,1 } // (p27) |
304 | { .mfb; (p27) st8 [r14]=r39,8 | 369 | { .mfb; (p28) st8 [r14]=r41,8 |
305 | (p0) nop.f 0x0 | 370 | (p0) nop.f 0x0 |
306 | br.ctop.sptk .L_bn_mul_words_ctop };; | 371 | br.ctop.sptk .L_bn_mul_words_ctop };; |
307 | .L_bn_mul_words_cend: | 372 | .L_bn_mul_words_cend: |
308 | 373 | ||
309 | { .mii; nop.m 0x0 | 374 | { .mii; nop.m 0x0 |
310 | .pred.rel "mutex",p49,p53 | 375 | .pred.rel "mutex",p51,p55 |
311 | (p49) add r8=r34,r0 | 376 | (p51) add r8=r36,r0 |
312 | (p53) add r8=r34,r0,1 } | 377 | (p55) add r8=r36,r0,1 } |
313 | { .mfb; nop.m 0x0 | 378 | { .mfb; nop.m 0x0 |
314 | nop.f 0x0 | 379 | nop.f 0x0 |
315 | nop.b 0x0 } | 380 | nop.b 0x0 } |
@@ -344,7 +409,7 @@ bn_mul_words: | |||
344 | #endif // XMA_TEMPTATION | 409 | #endif // XMA_TEMPTATION |
345 | 410 | ||
346 | { .mii; nop.m 0x0 | 411 | { .mii; nop.m 0x0 |
347 | mov pr=r9,-1 | 412 | mov pr=r9,0x1ffff |
348 | mov ar.lc=r3 } | 413 | mov ar.lc=r3 } |
349 | { .mfb; rum 1<<5 // clear um.mfh | 414 | { .mfb; rum 1<<5 // clear um.mfh |
350 | nop.f 0x0 | 415 | nop.f 0x0 |
@@ -376,59 +441,69 @@ bn_mul_add_words: | |||
376 | 441 | ||
377 | .body | 442 | .body |
378 | { .mib; setf.sig f8=r35 // w | 443 | { .mib; setf.sig f8=r35 // w |
379 | mov pr.rot=0x400001<<16 | 444 | mov pr.rot=0x800001<<16 |
380 | // ------^----- serves as (p48) at first (p26) | 445 | // ------^----- serves as (p50) at first (p27) |
381 | brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16 | 446 | brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16 |
382 | } | 447 | } |
383 | { .mii; mov r14=r32 // rp | 448 | { .mii; |
384 | mov r15=r33 // ap | 449 | #if defined(_HPUX_SOURCE) && defined(_ILP32) |
450 | addp4 r14=0,r32 // rp | ||
451 | addp4 r15=0,r33 // ap | ||
452 | #else | ||
453 | mov r14=r32 // rp | ||
454 | mov r15=r33 // ap | ||
455 | #endif | ||
385 | mov ar.lc=r10 } | 456 | mov ar.lc=r10 } |
386 | { .mii; mov r39=0 // serves as r33 at first (p26) | 457 | { .mii; mov r40=0 // serves as r35 at first (p27) |
387 | mov r18=r32 // rp copy | 458 | #if defined(_HPUX_SOURCE) && defined(_ILP32) |
388 | mov ar.ec=14 };; | 459 | addp4 r18=0,r32 // rp copy |
460 | #else | ||
461 | mov r18=r32 // rp copy | ||
462 | #endif | ||
463 | mov ar.ec=15 };; | ||
389 | 464 | ||
390 | // This loop spins in 3*(n+13) ticks on Itanium and should spin in | 465 | // This loop spins in 3*(n+14) ticks on Itanium and should spin in |
391 | // 2*(n+13) on "wider" IA-64 implementations (to be verified with new | 466 | // 2*(n+14) on "wider" IA-64 implementations (to be verified with new |
392 | // µ-architecture manuals as they become available). As usual it's | 467 | // µ-architecture manuals as they become available). As usual it's |
393 | // possible to compress the epilogue, down to 10 in this case, at the | 468 | // possible to compress the epilogue, down to 10 in this case, at the |
394 | // cost of scalability. Compressed (and therefore non-scalable) loop | 469 | // cost of scalability. Compressed (and therefore non-scalable) loop |
395 | // running at 3*(n+10) would buy you ~10% on Itanium but take ~35% | 470 | // running at 3*(n+11) would buy you ~10% on Itanium but take ~35% |
396 | // from "wider" IA-64 so let it be scalable! Special attention was | 471 | // from "wider" IA-64 so let it be scalable! Special attention was |
397 | // paid for having the loop body split at 64-byte boundary. ld8 is | 472 | // paid for having the loop body split at 64-byte boundary. ld8 is |
398 | // scheduled for L1 cache as the data is more than likely there. | 473 | // scheduled for L1 cache as the data is more than likely there. |
399 | // Indeed, bn_mul_words has put it there a moment ago:-) | 474 | // Indeed, bn_mul_words has put it there a moment ago:-) |
400 | .L_bn_mul_add_words_ctop: | 475 | .L_bn_mul_add_words_ctop: |
401 | { .mfi; (p25) getf.sig r36=f49 // low | 476 | { .mfi; (p25) getf.sig r36=f52 // low |
402 | (p21) xmpy.lu f45=f37,f8 | 477 | (p21) xmpy.lu f48=f37,f8 |
403 | (p27) cmp.ltu p52,p48=r39,r38 } | 478 | (p28) cmp.ltu p54,p50=r41,r39 } |
404 | { .mfi; (p16) ldf8 f32=[r15],8 | 479 | { .mfi; (p16) ldf8 f32=[r15],8 |
405 | (p21) xmpy.hu f38=f37,f8 | 480 | (p21) xmpy.hu f40=f37,f8 |
406 | (p27) add r43=r43,r39 };; | 481 | (p28) add r45=r45,r41 };; |
407 | { .mii; (p26) getf.sig r32=f43 // high | 482 | { .mii; (p25) getf.sig r32=f44 // high |
408 | .pred.rel "mutex",p48,p52 | 483 | .pred.rel "mutex",p50,p54 |
409 | (p48) add r38=r37,r33 // (p26) | 484 | (p50) add r40=r38,r35 // (p27) |
410 | (p52) add r38=r37,r33,1 } // (p26) | 485 | (p54) add r40=r38,r35,1 } // (p27) |
411 | { .mfb; (p27) cmp.ltu.unc p56,p0=r43,r39 | 486 | { .mfb; (p28) cmp.ltu.unc p60,p0=r45,r41 |
412 | (p0) nop.f 0x0 | 487 | (p0) nop.f 0x0 |
413 | (p0) nop.b 0x0 } | 488 | (p0) nop.b 0x0 } |
414 | { .mii; (p26) ld8 r42=[r18],8 | 489 | { .mii; (p27) ld8 r44=[r18],8 |
415 | (p58) cmp.eq.or p57,p0=-1,r44 | 490 | (p62) cmp.eq.or p61,p0=-1,r46 |
416 | (p58) add r44=1,r44 } | 491 | (p62) add r46=1,r46 } |
417 | { .mfb; (p29) st8 [r14]=r45,8 | 492 | { .mfb; (p30) st8 [r14]=r47,8 |
418 | (p0) nop.f 0x0 | 493 | (p0) nop.f 0x0 |
419 | br.ctop.sptk .L_bn_mul_add_words_ctop};; | 494 | br.ctop.sptk .L_bn_mul_add_words_ctop};; |
420 | .L_bn_mul_add_words_cend: | 495 | .L_bn_mul_add_words_cend: |
421 | 496 | ||
422 | { .mii; nop.m 0x0 | 497 | { .mii; nop.m 0x0 |
423 | .pred.rel "mutex",p51,p55 | 498 | .pred.rel "mutex",p53,p57 |
424 | (p51) add r8=r36,r0 | 499 | (p53) add r8=r38,r0 |
425 | (p55) add r8=r36,r0,1 } | 500 | (p57) add r8=r38,r0,1 } |
426 | { .mfb; nop.m 0x0 | 501 | { .mfb; nop.m 0x0 |
427 | nop.f 0x0 | 502 | nop.f 0x0 |
428 | nop.b 0x0 };; | 503 | nop.b 0x0 };; |
429 | { .mii; | 504 | { .mii; |
430 | (p59) add r8=1,r8 | 505 | (p63) add r8=1,r8 |
431 | mov pr=r9,-1 | 506 | mov pr=r9,0x1ffff |
432 | mov ar.lc=r3 } | 507 | mov ar.lc=r3 } |
433 | { .mfb; rum 1<<5 // clear um.mfh | 508 | { .mfb; rum 1<<5 // clear um.mfh |
434 | nop.f 0x0 | 509 | nop.f 0x0 |
@@ -461,6 +536,10 @@ bn_sqr_words: | |||
461 | mov r9=pr };; | 536 | mov r9=pr };; |
462 | 537 | ||
463 | .body | 538 | .body |
539 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
540 | { .mii; addp4 r32=0,r32 | ||
541 | addp4 r33=0,r33 };; | ||
542 | #endif | ||
464 | { .mib; | 543 | { .mib; |
465 | mov pr.rot=1<<16 | 544 | mov pr.rot=1<<16 |
466 | brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16 | 545 | brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16 |
@@ -492,7 +571,7 @@ bn_sqr_words: | |||
492 | .L_bn_sqr_words_cend: | 571 | .L_bn_sqr_words_cend: |
493 | 572 | ||
494 | { .mii; nop.m 0x0 | 573 | { .mii; nop.m 0x0 |
495 | mov pr=r9,-1 | 574 | mov pr=r9,0x1ffff |
496 | mov ar.lc=r3 } | 575 | mov ar.lc=r3 } |
497 | { .mfb; rum 1<<5 // clear um.mfh | 576 | { .mfb; rum 1<<5 // clear um.mfh |
498 | nop.f 0x0 | 577 | nop.f 0x0 |
@@ -526,7 +605,14 @@ bn_sqr_comba8: | |||
526 | .prologue | 605 | .prologue |
527 | .fframe 0 | 606 | .fframe 0 |
528 | .save ar.pfs,r2 | 607 | .save ar.pfs,r2 |
608 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
529 | { .mii; alloc r2=ar.pfs,2,1,0,0 | 609 | { .mii; alloc r2=ar.pfs,2,1,0,0 |
610 | addp4 r33=0,r33 | ||
611 | addp4 r32=0,r32 };; | ||
612 | { .mii; | ||
613 | #else | ||
614 | { .mii; alloc r2=ar.pfs,2,1,0,0 | ||
615 | #endif | ||
530 | mov r34=r33 | 616 | mov r34=r33 |
531 | add r14=8,r33 };; | 617 | add r14=8,r33 };; |
532 | .body | 618 | .body |
@@ -587,7 +673,14 @@ bn_mul_comba8: | |||
587 | .prologue | 673 | .prologue |
588 | .fframe 0 | 674 | .fframe 0 |
589 | .save ar.pfs,r2 | 675 | .save ar.pfs,r2 |
676 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
590 | { .mii; alloc r2=ar.pfs,3,0,0,0 | 677 | { .mii; alloc r2=ar.pfs,3,0,0,0 |
678 | addp4 r33=0,r33 | ||
679 | addp4 r34=0,r34 };; | ||
680 | { .mii; addp4 r32=0,r32 | ||
681 | #else | ||
682 | { .mii; alloc r2=ar.pfs,3,0,0,0 | ||
683 | #endif | ||
591 | add r14=8,r33 | 684 | add r14=8,r33 |
592 | add r17=8,r34 } | 685 | add r17=8,r34 } |
593 | .body | 686 | .body |
@@ -1138,7 +1231,14 @@ bn_sqr_comba4: | |||
1138 | .prologue | 1231 | .prologue |
1139 | .fframe 0 | 1232 | .fframe 0 |
1140 | .save ar.pfs,r2 | 1233 | .save ar.pfs,r2 |
1234 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
1235 | { .mii; alloc r2=ar.pfs,2,1,0,0 | ||
1236 | addp4 r32=0,r32 | ||
1237 | addp4 r33=0,r33 };; | ||
1238 | { .mii; | ||
1239 | #else | ||
1141 | { .mii; alloc r2=ar.pfs,2,1,0,0 | 1240 | { .mii; alloc r2=ar.pfs,2,1,0,0 |
1241 | #endif | ||
1142 | mov r34=r33 | 1242 | mov r34=r33 |
1143 | add r14=8,r33 };; | 1243 | add r14=8,r33 };; |
1144 | .body | 1244 | .body |
@@ -1164,7 +1264,14 @@ bn_mul_comba4: | |||
1164 | .prologue | 1264 | .prologue |
1165 | .fframe 0 | 1265 | .fframe 0 |
1166 | .save ar.pfs,r2 | 1266 | .save ar.pfs,r2 |
1267 | #if defined(_HPUX_SOURCE) && defined(_ILP32) | ||
1268 | { .mii; alloc r2=ar.pfs,3,0,0,0 | ||
1269 | addp4 r33=0,r33 | ||
1270 | addp4 r34=0,r34 };; | ||
1271 | { .mii; addp4 r32=0,r32 | ||
1272 | #else | ||
1167 | { .mii; alloc r2=ar.pfs,3,0,0,0 | 1273 | { .mii; alloc r2=ar.pfs,3,0,0,0 |
1274 | #endif | ||
1168 | add r14=8,r33 | 1275 | add r14=8,r33 |
1169 | add r17=8,r34 } | 1276 | add r17=8,r34 } |
1170 | .body | 1277 | .body |
@@ -1464,7 +1571,7 @@ bn_div_words: | |||
1464 | or r8=r8,r33 | 1571 | or r8=r8,r33 |
1465 | mov ar.pfs=r2 };; | 1572 | mov ar.pfs=r2 };; |
1466 | { .mii; shr.u r9=H,I // remainder if anybody wants it | 1573 | { .mii; shr.u r9=H,I // remainder if anybody wants it |
1467 | mov pr=r10,-1 } | 1574 | mov pr=r10,0x1ffff } |
1468 | { .mfb; br.ret.sptk.many b0 };; | 1575 | { .mfb; br.ret.sptk.many b0 };; |
1469 | 1576 | ||
1470 | // Unsigned 64 by 32 (well, by 64 for the moment) bit integer division | 1577 | // Unsigned 64 by 32 (well, by 64 for the moment) bit integer division |
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s index af9730d062..f3b16290eb 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2.s | |||
@@ -747,8 +747,8 @@ bn_div_words | |||
747 | .PROC | 747 | .PROC |
748 | .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN | 748 | .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN |
749 | .IMPORT BN_num_bits_word,CODE | 749 | .IMPORT BN_num_bits_word,CODE |
750 | .IMPORT __iob,DATA | 750 | ;--- not PIC .IMPORT __iob,DATA |
751 | .IMPORT fprintf,CODE | 751 | ;--- not PIC .IMPORT fprintf,CODE |
752 | .IMPORT abort,CODE | 752 | .IMPORT abort,CODE |
753 | .IMPORT $$div2U,MILLICODE | 753 | .IMPORT $$div2U,MILLICODE |
754 | .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE | 754 | .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE |
@@ -844,12 +844,12 @@ $0006001A | |||
844 | MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 | 844 | MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 |
845 | EXTRD,U %r3,63,32,%r7 ;offset 0xa1c | 845 | EXTRD,U %r3,63,32,%r7 ;offset 0xa1c |
846 | $D2 | 846 | $D2 |
847 | ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 | 847 | ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 |
848 | LDIL LR'C$7,%r21 ;offset 0xa24 | 848 | ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 |
849 | LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 | 849 | ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 |
850 | .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; | 850 | ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; |
851 | B,L fprintf,%r2 ;offset 0xa2c | 851 | ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c |
852 | LDO RR'C$7(%r21),%r25 ;offset 0xa30 | 852 | ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 |
853 | .CALL ; | 853 | .CALL ; |
854 | B,L abort,%r2 ;offset 0xa34 | 854 | B,L abort,%r2 ;offset 0xa34 |
855 | NOP ;offset 0xa38 | 855 | NOP ;offset 0xa38 |
@@ -1605,14 +1605,14 @@ bn_mul_comba4 | |||
1605 | .PROCEND | 1605 | .PROCEND |
1606 | 1606 | ||
1607 | 1607 | ||
1608 | .SPACE $TEXT$ | 1608 | ;--- not PIC .SPACE $TEXT$ |
1609 | .SUBSPA $CODE$ | 1609 | ;--- not PIC .SUBSPA $CODE$ |
1610 | .SPACE $PRIVATE$,SORT=16 | 1610 | ;--- not PIC .SPACE $PRIVATE$,SORT=16 |
1611 | .IMPORT $global$,DATA | 1611 | ;--- not PIC .IMPORT $global$,DATA |
1612 | .SPACE $TEXT$ | 1612 | ;--- not PIC .SPACE $TEXT$ |
1613 | .SUBSPA $CODE$ | 1613 | ;--- not PIC .SUBSPA $CODE$ |
1614 | .SUBSPA $LIT$,ACCESS=0x2c | 1614 | ;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c |
1615 | C$7 | 1615 | ;--- not PIC C$7 |
1616 | .ALIGN 8 | 1616 | ;--- not PIC .ALIGN 8 |
1617 | .STRINGZ "Division would overflow (%d)\n" | 1617 | ;--- not PIC .STRINGZ "Division would overflow (%d)\n" |
1618 | .END | 1618 | .END |
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar index 465f2774b6..aefab15cdb 100644 --- a/src/lib/libcrypto/bn/asm/vms.mar +++ b/src/lib/libcrypto/bn/asm/vms.mar | |||
@@ -1,4 +1,4 @@ | |||
1 | .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64 | 1 | .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64 |
2 | ; | 2 | ; |
3 | ; w.j.m. 15-jan-1999 | 3 | ; w.j.m. 15-jan-1999 |
4 | ; | 4 | ; |
@@ -59,7 +59,7 @@ w=16 ;(AP) w by value (input) | |||
59 | movl r6,r0 ; return c | 59 | movl r6,r0 ; return c |
60 | ret | 60 | ret |
61 | 61 | ||
62 | .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64 | 62 | .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64 |
63 | ; | 63 | ; |
64 | ; w.j.m. 15-jan-1999 | 64 | ; w.j.m. 15-jan-1999 |
65 | ; | 65 | ; |
@@ -172,147 +172,175 @@ n=12 ;(AP) n by value (input) | |||
172 | ; } | 172 | ; } |
173 | ; | 173 | ; |
174 | ; Using EDIV would be very easy, if it didn't do signed calculations. | 174 | ; Using EDIV would be very easy, if it didn't do signed calculations. |
175 | ; Therefore, som extra things have to happen around it. The way to | 175 | ; Any time any of the input numbers are signed, there are problems, |
176 | ; handle that is to shift all operands right one step (basically dividing | 176 | ; usually with integer overflow, at which point it returns useless |
177 | ; them by 2) and handle the different cases depending on what the lowest | 177 | ; data (the quotient gets the value of l, and the remainder becomes 0). |
178 | ; bit of each operand was. | ||
179 | ; | 178 | ; |
180 | ; To start with, let's define the following: | 179 | ; If it was just for the dividend, it would be very easy, just divide |
180 | ; it by 2 (unsigned), do the division, multiply the resulting quotient | ||
181 | ; and remainder by 2, add the bit that was dropped when dividing by 2 | ||
182 | ; to the remainder, and do some adjustment so the remainder doesn't | ||
183 | ; end up larger than the divisor. For some cases when the divisor is | ||
184 | ; negative (from EDIV's point of view, i.e. when the highest bit is set), | ||
185 | ; dividing the dividend by 2 isn't enough, and since some operations | ||
186 | ; might generate integer overflows even when the dividend is divided by | ||
187 | ; 4 (when the high part of the shifted down dividend ends up being exactly | ||
188 | ; half of the divisor, the result is the quotient 0x80000000, which is | ||
189 | ; negative...) it needs to be divided by 8. Furthermore, the divisor needs | ||
190 | ; to be divided by 2 (unsigned) as well, to avoid more problems with the sign. | ||
191 | ; In this case, a little extra fiddling with the remainder is required. | ||
181 | ; | 192 | ; |
182 | ; a' = l & 1 | 193 | ; So, the simplest way to handle this is always to divide the dividend |
183 | ; a2 = <h,l> >> 1 # UNSIGNED shift! | 194 | ; by 8, and to divide the divisor by 2 if it's highest bit is set. |
184 | ; b' = d & 1 | 195 | ; After EDIV has been used, the quotient gets multiplied by 8 if the |
185 | ; b2 = d >> 1 # UNSIGNED shift! | 196 | ; original divisor was positive, otherwise 4. The remainder, oddly |
197 | ; enough, is *always* multiplied by 8. | ||
198 | ; NOTE: in the case mentioned above, where the high part of the shifted | ||
199 | ; down dividend ends up being exactly half the shifted down divisor, we | ||
200 | ; end up with a 33 bit quotient. That's no problem however, it usually | ||
201 | ; means we have ended up with a too large remainder as well, and the | ||
202 | ; problem is fixed by the last part of the algorithm (next paragraph). | ||
186 | ; | 203 | ; |
187 | ; Now, use EDIV to calculate a quotient and a remainder: | 204 | ; The routine ends with comparing the resulting remainder with the |
205 | ; original divisor and if the remainder is larger, subtract the | ||
206 | ; original divisor from it, and increase the quotient by 1. This is | ||
207 | ; done until the remainder is smaller than the divisor. | ||
188 | ; | 208 | ; |
189 | ; q'' = a2/b2 | 209 | ; The complete algorithm looks like this: |
190 | ; r'' = a2 - q''*b2 | ||
191 | ; | 210 | ; |
192 | ; If b' is 0, the quotient is already correct, we just need to adjust the | 211 | ; d' = d |
193 | ; remainder: | 212 | ; l' = l & 7 |
213 | ; [h,l] = [h,l] >> 3 | ||
214 | ; [q,r] = floor([h,l] / d) # This is the EDIV operation | ||
215 | ; if (q < 0) q = -q # I doubt this is necessary any more | ||
194 | ; | 216 | ; |
195 | ; if (b' == 0) | 217 | ; r' = r >> 29 |
196 | ; { | 218 | ; if (d' >= 0) |
197 | ; r = 2*r'' + a' | 219 | ; q' = q >> 29 |
198 | ; q = q'' | 220 | ; q = q << 3 |
199 | ; } | 221 | ; else |
200 | ; | 222 | ; q' = q >> 30 |
201 | ; If b' is 1, we need to do other adjustements. The first thought is the | 223 | ; q = q << 2 |
202 | ; following (note that r' will not always have the right value, but an | 224 | ; r = (r << 3) + l' |
203 | ; adjustement follows further down): | ||
204 | ; | ||
205 | ; if (b' == 1) | ||
206 | ; { | ||
207 | ; q' = q'' | ||
208 | ; r' = a - q'*b | ||
209 | ; | ||
210 | ; However, one can note the folowing relationship: | ||
211 | ; | ||
212 | ; r'' = a2 - q''*b2 | ||
213 | ; => 2*r'' = 2*a2 - 2*q''*b2 | ||
214 | ; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1, | ||
215 | ; q' = q'' } | ||
216 | ; = a - a' - q'*(b - 1) | ||
217 | ; = a - q'*b - a' + q' | ||
218 | ; = r' - a' + q' | ||
219 | ; => r' = 2*r'' - q' + a' | ||
220 | ; | 225 | ; |
221 | ; This enables us to use r'' instead of discarding and calculating another | 226 | ; if (d' < 0) |
222 | ; modulo: | ||
223 | ; | ||
224 | ; if (b' == 1) | ||
225 | ; { | 227 | ; { |
226 | ; q' = q'' | 228 | ; [r',r] = [r',r] - q |
227 | ; r' = (r'' << 1) - q' + a' | 229 | ; while ([r',r] < 0) |
228 | ; | ||
229 | ; Now, all we have to do is adjust r', because it might be < 0: | ||
230 | ; | ||
231 | ; while (r' < 0) | ||
232 | ; { | 230 | ; { |
233 | ; r' = r' + b | 231 | ; [r',r] = [r',r] + d |
234 | ; q' = q' - 1 | 232 | ; [q',q] = [q',q] - 1 |
235 | ; } | 233 | ; } |
236 | ; } | 234 | ; } |
237 | ; | 235 | ; |
238 | ; return q' | 236 | ; while ([r',r] >= d') |
237 | ; { | ||
238 | ; [r',r] = [r',r] - d' | ||
239 | ; [q',q] = [q',q] + 1 | ||
240 | ; } | ||
241 | ; | ||
242 | ; return q | ||
239 | 243 | ||
240 | h=4 ;(AP) h by value (input) | 244 | h=4 ;(AP) h by value (input) |
241 | l=8 ;(AP) l by value (input) | 245 | l=8 ;(AP) l by value (input) |
242 | d=12 ;(AP) d by value (input) | 246 | d=12 ;(AP) d by value (input) |
243 | 247 | ||
244 | ;aprim=r5 | 248 | ;r2 = l, q |
245 | ;a2=r6 | 249 | ;r3 = h, r |
246 | ;a20=r6 | 250 | ;r4 = d |
247 | ;a21=r7 | 251 | ;r5 = l' |
248 | ;bprim=r8 | 252 | ;r6 = r' |
249 | ;b2=r9 | 253 | ;r7 = d' |
250 | ;qprim=r10 ; initially used as q'' | 254 | ;r8 = q' |
251 | ;rprim=r11 ; initially used as r'' | ||
252 | |||
253 | 255 | ||
254 | .psect code,nowrt | 256 | .psect code,nowrt |
255 | 257 | ||
256 | .entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11> | 258 | .entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8> |
257 | movl l(ap),r2 | 259 | movl l(ap),r2 |
258 | movl h(ap),r3 | 260 | movl h(ap),r3 |
259 | movl d(ap),r4 | 261 | movl d(ap),r4 |
260 | 262 | ||
261 | movl #0,r5 | 263 | bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7 |
262 | movl #0,r8 | 264 | bicl3 #^X00000007,r2,r2 |
263 | movl #0,r0 | ||
264 | ; movl #0,r1 | ||
265 | 265 | ||
266 | rotl #-1,r2,r6 ; a20 = l >> 1 (almost) | 266 | bicl3 #^XFFFFFFF8,r3,r6 |
267 | rotl #-1,r3,r7 ; a21 = h >> 1 (almost) | 267 | bicl3 #^X00000007,r3,r3 |
268 | rotl #-1,r4,r9 ; b2 = d >> 1 (almost) | 268 | |
269 | addl r6,r2 | ||
269 | 270 | ||
270 | tstl r6 | 271 | rotl #-3,r2,r2 ; l = l >> 3 |
271 | bgeq 1$ | 272 | rotl #-3,r3,r3 ; h = h >> 3 |
272 | xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0 | 273 | |
273 | incl r5 ; a' = 1 | 274 | movl r4,r7 ; d' = d |
274 | 1$: | 275 | |
275 | tstl r7 | 276 | movl #0,r6 ; r' = 0 |
276 | bgeq 2$ | 277 | movl #0,r8 ; q' = 0 |
277 | xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1, | 278 | |
278 | ; since that's what was lowest in a21 | 279 | tstl r4 |
279 | xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1 | ||
280 | 2$: | ||
281 | tstl r9 | ||
282 | beql 666$ ; Uh-oh, the divisor is 0... | 280 | beql 666$ ; Uh-oh, the divisor is 0... |
283 | bgtr 3$ | 281 | bgtr 1$ |
284 | xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0 | 282 | rotl #-1,r4,r4 ; If d is negative, shift it right. |
285 | incl r8 ; b' = 1 | 283 | bicl2 #^X80000000,r4 ; Since d is then a large number, the |
286 | 3$: | 284 | ; lowest bit is insignificant |
287 | tstl r9 | 285 | ; (contradict that, and I'll fix the problem!) |
288 | bneq 4$ ; if b2 is 0, we know that b' is 1 | 286 | 1$: |
289 | tstl r3 | 287 | ediv r4,r2,r2,r3 ; Do the actual division |
290 | bneq 666$ ; if higher half isn't 0, we overflow | 288 | |
291 | movl r2,r10 ; otherwise, we have our result | 289 | tstl r2 |
292 | brb 42$ ; This is a success, really. | 290 | bgeq 3$ |
293 | 4$: | 291 | mnegl r2,r2 ; if q < 0, negate it |
294 | ediv r9,r6,r10,r11 | 292 | 3$: |
295 | 293 | tstl r7 | |
296 | tstl r8 | 294 | blss 4$ |
297 | bneq 5$ ; If b' != 0, go to the other part | 295 | rotl #3,r2,r2 ; q = q << 3 |
298 | ; addl3 r11,r11,r1 | 296 | bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q |
299 | ; addl2 r5,r1 | 297 | bicl3 #^X00000007,r2,r2 |
300 | brb 42$ | 298 | bsb 41$ |
301 | 5$: | 299 | 4$: ; else |
302 | ashl #1,r11,r11 | 300 | rotl #2,r2,r2 ; q = q << 2 |
303 | subl2 r10,r11 | 301 | bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q |
304 | addl2 r5,r11 | 302 | bicl3 #^X00000003,r2,r2 |
305 | bgeq 7$ | 303 | 41$: |
306 | 6$: | 304 | rotl #3,r3,r3 ; r = r << 3 |
307 | decl r10 | 305 | bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r |
308 | addl2 r4,r11 | 306 | bicl3 #^X00000007,r3,r3 |
309 | blss 6$ | 307 | addl r5,r3 ; r = r + l' |
310 | 7$: | 308 | |
311 | ; movl r11,r1 | 309 | tstl r7 |
310 | bgeq 5$ | ||
311 | bitl #1,r7 | ||
312 | beql 5$ ; if d' < 0 && d' & 1 | ||
313 | subl r2,r3 ; [r',r] = [r',r] - [q',q] | ||
314 | sbwc r8,r6 | ||
315 | 45$: | ||
316 | bgeq 5$ ; while r < 0 | ||
317 | decl r2 ; [q',q] = [q',q] - 1 | ||
318 | sbwc #0,r8 | ||
319 | addl r7,r3 ; [r',r] = [r',r] + d' | ||
320 | adwc #0,r6 | ||
321 | brb 45$ | ||
322 | |||
323 | ; The return points are placed in the middle to keep a short distance from | ||
324 | ; all the branch points | ||
312 | 42$: | 325 | 42$: |
313 | movl r10,r0 | 326 | ; movl r3,r1 |
327 | movl r2,r0 | ||
328 | ret | ||
314 | 666$: | 329 | 666$: |
330 | movl #^XFFFFFFFF,r0 | ||
315 | ret | 331 | ret |
332 | |||
333 | 5$: | ||
334 | tstl r6 | ||
335 | bneq 6$ | ||
336 | cmpl r3,r7 | ||
337 | blssu 42$ ; while [r',r] >= d' | ||
338 | 6$: | ||
339 | subl r7,r3 ; [r',r] = [r',r] - d' | ||
340 | sbwc #0,r6 | ||
341 | incl r2 ; [q',q] = [q',q] + 1 | ||
342 | adwc #0,r8 | ||
343 | brb 5$ | ||
316 | 344 | ||
317 | .title vax_bn_add_words unsigned add of two arrays | 345 | .title vax_bn_add_words unsigned add of two arrays |
318 | ; | 346 | ; |
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index b40682f831..3da6d8ced9 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h | |||
@@ -248,6 +248,8 @@ typedef struct bn_blinding_st | |||
248 | BIGNUM *A; | 248 | BIGNUM *A; |
249 | BIGNUM *Ai; | 249 | BIGNUM *Ai; |
250 | BIGNUM *mod; /* just a reference */ | 250 | BIGNUM *mod; /* just a reference */ |
251 | unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; | ||
252 | * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ | ||
251 | } BN_BLINDING; | 253 | } BN_BLINDING; |
252 | 254 | ||
253 | /* Used for montgomery multiplication */ | 255 | /* Used for montgomery multiplication */ |
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index f9a095e3b3..580d1201bc 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c | |||
@@ -150,6 +150,20 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, | |||
150 | q; \ | 150 | q; \ |
151 | }) | 151 | }) |
152 | # define REMAINDER_IS_ALREADY_CALCULATED | 152 | # define REMAINDER_IS_ALREADY_CALCULATED |
153 | # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) | ||
154 | /* | ||
155 | * Same story here, but it's 128-bit by 64-bit division. Wow! | ||
156 | * <appro@fy.chalmers.se> | ||
157 | */ | ||
158 | # define bn_div_words(n0,n1,d0) \ | ||
159 | ({ asm volatile ( \ | ||
160 | "divq %4" \ | ||
161 | : "=a"(q), "=d"(rem) \ | ||
162 | : "a"(n1), "d"(n0), "g"(d0) \ | ||
163 | : "cc"); \ | ||
164 | q; \ | ||
165 | }) | ||
166 | # define REMAINDER_IS_ALREADY_CALCULATED | ||
153 | # endif /* __<cpu> */ | 167 | # endif /* __<cpu> */ |
154 | # endif /* __GNUC__ */ | 168 | # endif /* __GNUC__ */ |
155 | #endif /* OPENSSL_NO_ASM */ | 169 | #endif /* OPENSSL_NO_ASM */ |
@@ -268,6 +282,11 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, | |||
268 | q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0); | 282 | q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0); |
269 | #else | 283 | #else |
270 | q=bn_div_words(n0,n1,d0); | 284 | q=bn_div_words(n0,n1,d0); |
285 | #ifdef BN_DEBUG_LEVITTE | ||
286 | fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ | ||
287 | X) -> 0x%08X\n", | ||
288 | n0, n1, d0, q); | ||
289 | #endif | ||
271 | #endif | 290 | #endif |
272 | 291 | ||
273 | #ifndef REMAINDER_IS_ALREADY_CALCULATED | 292 | #ifndef REMAINDER_IS_ALREADY_CALCULATED |
@@ -292,11 +311,18 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, | |||
292 | BN_ULONG t2l,t2h,ql,qh; | 311 | BN_ULONG t2l,t2h,ql,qh; |
293 | 312 | ||
294 | q=bn_div_words(n0,n1,d0); | 313 | q=bn_div_words(n0,n1,d0); |
314 | #ifdef BN_DEBUG_LEVITTE | ||
315 | fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ | ||
316 | X) -> 0x%08X\n", | ||
317 | n0, n1, d0, q); | ||
318 | #endif | ||
295 | #ifndef REMAINDER_IS_ALREADY_CALCULATED | 319 | #ifndef REMAINDER_IS_ALREADY_CALCULATED |
296 | rem=(n1-q*d0)&BN_MASK2; | 320 | rem=(n1-q*d0)&BN_MASK2; |
297 | #endif | 321 | #endif |
298 | 322 | ||
299 | #ifdef BN_UMULT_HIGH | 323 | #if defined(BN_UMULT_LOHI) |
324 | BN_UMULT_LOHI(t2l,t2h,d1,q); | ||
325 | #elif defined(BN_UMULT_HIGH) | ||
300 | t2l = d1 * q; | 326 | t2l = d1 * q; |
301 | t2h = BN_UMULT_HIGH(d1,q); | 327 | t2h = BN_UMULT_HIGH(d1,q); |
302 | #else | 328 | #else |
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 8a4dba375a..5614bc6164 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h | |||
@@ -230,6 +230,21 @@ struct bignum_ctx | |||
230 | : "r"(a), "r"(b)); \ | 230 | : "r"(a), "r"(b)); \ |
231 | ret; }) | 231 | ret; }) |
232 | # endif /* compiler */ | 232 | # endif /* compiler */ |
233 | # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) | ||
234 | # if defined(__GNUC__) | ||
235 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
236 | register BN_ULONG ret,discard; \ | ||
237 | asm ("mulq %3" \ | ||
238 | : "=a"(discard),"=d"(ret) \ | ||
239 | : "a"(a), "g"(b) \ | ||
240 | : "cc"); \ | ||
241 | ret; }) | ||
242 | # define BN_UMULT_LOHI(low,high,a,b) \ | ||
243 | asm ("mulq %3" \ | ||
244 | : "=a"(low),"=d"(high) \ | ||
245 | : "a"(a),"g"(b) \ | ||
246 | : "cc"); | ||
247 | # endif | ||
233 | # endif /* cpu */ | 248 | # endif /* cpu */ |
234 | #endif /* OPENSSL_NO_ASM */ | 249 | #endif /* OPENSSL_NO_ASM */ |
235 | 250 | ||
@@ -337,7 +352,7 @@ struct bignum_ctx | |||
337 | 352 | ||
338 | #define LBITS(a) ((a)&BN_MASK2l) | 353 | #define LBITS(a) ((a)&BN_MASK2l) |
339 | #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) | 354 | #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) |
340 | #define L2HBITS(a) ((BN_ULONG)((a)&BN_MASK2l)<<BN_BITS4) | 355 | #define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) |
341 | 356 | ||
342 | #define LLBITS(a) ((a)&BN_MASKl) | 357 | #define LLBITS(a) ((a)&BN_MASKl) |
343 | #define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl) | 358 | #define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl) |
@@ -353,7 +368,7 @@ struct bignum_ctx | |||
353 | lt=(bl)*(lt); \ | 368 | lt=(bl)*(lt); \ |
354 | m1=(bl)*(ht); \ | 369 | m1=(bl)*(ht); \ |
355 | ht =(bh)*(ht); \ | 370 | ht =(bh)*(ht); \ |
356 | m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS(1L); \ | 371 | m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ |
357 | ht+=HBITS(m); \ | 372 | ht+=HBITS(m); \ |
358 | m1=L2HBITS(m); \ | 373 | m1=L2HBITS(m); \ |
359 | lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ | 374 | lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ |
@@ -418,20 +433,19 @@ void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a); | |||
418 | int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n); | 433 | int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n); |
419 | int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, | 434 | int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, |
420 | int cl, int dl); | 435 | int cl, int dl); |
436 | #if 0 | ||
437 | /* bn_mul.c rollback <appro> */ | ||
421 | void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, | 438 | void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, |
422 | int dna,int dnb,BN_ULONG *t); | 439 | int dna,int dnb,BN_ULONG *t); |
423 | void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, | 440 | void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, |
424 | int n,int tna,int tnb,BN_ULONG *t); | 441 | int n,int tna,int tnb,BN_ULONG *t); |
442 | #endif | ||
425 | void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t); | 443 | void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t); |
426 | void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); | 444 | void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); |
427 | void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, | 445 | void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, |
428 | BN_ULONG *t); | 446 | BN_ULONG *t); |
429 | void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, | 447 | void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, |
430 | BN_ULONG *t); | 448 | BN_ULONG *t); |
431 | BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
432 | int cl, int dl); | ||
433 | BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | ||
434 | int cl, int dl); | ||
435 | 449 | ||
436 | #ifdef __cplusplus | 450 | #ifdef __cplusplus |
437 | } | 451 | } |
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index ce2ae78419..463463cfcb 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c | |||
@@ -263,12 +263,12 @@ void BN_clear_free(BIGNUM *a) | |||
263 | if (a == NULL) return; | 263 | if (a == NULL) return; |
264 | if (a->d != NULL) | 264 | if (a->d != NULL) |
265 | { | 265 | { |
266 | memset(a->d,0,a->dmax*sizeof(a->d[0])); | 266 | OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0])); |
267 | if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) | 267 | if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) |
268 | OPENSSL_free(a->d); | 268 | OPENSSL_free(a->d); |
269 | } | 269 | } |
270 | i=BN_get_flags(a,BN_FLG_MALLOCED); | 270 | i=BN_get_flags(a,BN_FLG_MALLOCED); |
271 | memset(a,0,sizeof(BIGNUM)); | 271 | OPENSSL_cleanse(a,sizeof(BIGNUM)); |
272 | if (i) | 272 | if (i) |
273 | OPENSSL_free(a); | 273 | OPENSSL_free(a); |
274 | } | 274 | } |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b03458d002..cb93ac3356 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -56,325 +56,10 @@ | |||
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | 58 | ||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
64 | #include <stdio.h> | 59 | #include <stdio.h> |
65 | #include <assert.h> | ||
66 | #include "cryptlib.h" | 60 | #include "cryptlib.h" |
67 | #include "bn_lcl.h" | 61 | #include "bn_lcl.h" |
68 | 62 | ||
69 | #if defined(OPENSSL_NO_ASM) || !(defined(__i386) || defined(__i386__)) || defined(__DJGPP__) /* Assembler implementation exists only for x86 */ | ||
70 | /* Here follows specialised variants of bn_add_words() and | ||
71 | bn_sub_words(). They have the property performing operations on | ||
72 | arrays of different sizes. The sizes of those arrays is expressed through | ||
73 | cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, | ||
74 | which is the delta between the two lengths, calculated as len(a)-len(b). | ||
75 | All lengths are the number of BN_ULONGs... For the operations that require | ||
76 | a result array as parameter, it must have the length cl+abs(dl). | ||
77 | These functions should probably end up in bn_asm.c as soon as there are | ||
78 | assembler counterparts for the systems that use assembler files. */ | ||
79 | |||
80 | BN_ULONG bn_sub_part_words(BN_ULONG *r, | ||
81 | const BN_ULONG *a, const BN_ULONG *b, | ||
82 | int cl, int dl) | ||
83 | { | ||
84 | BN_ULONG c, t; | ||
85 | |||
86 | assert(cl >= 0); | ||
87 | c = bn_sub_words(r, a, b, cl); | ||
88 | |||
89 | if (dl == 0) | ||
90 | return c; | ||
91 | |||
92 | r += cl; | ||
93 | a += cl; | ||
94 | b += cl; | ||
95 | |||
96 | if (dl < 0) | ||
97 | { | ||
98 | #ifdef BN_COUNT | ||
99 | fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); | ||
100 | #endif | ||
101 | for (;;) | ||
102 | { | ||
103 | t = b[0]; | ||
104 | r[0] = (0-t-c)&BN_MASK2; | ||
105 | if (t != 0) c=1; | ||
106 | if (++dl >= 0) break; | ||
107 | |||
108 | t = b[1]; | ||
109 | r[1] = (0-t-c)&BN_MASK2; | ||
110 | if (t != 0) c=1; | ||
111 | if (++dl >= 0) break; | ||
112 | |||
113 | t = b[2]; | ||
114 | r[2] = (0-t-c)&BN_MASK2; | ||
115 | if (t != 0) c=1; | ||
116 | if (++dl >= 0) break; | ||
117 | |||
118 | t = b[3]; | ||
119 | r[3] = (0-t-c)&BN_MASK2; | ||
120 | if (t != 0) c=1; | ||
121 | if (++dl >= 0) break; | ||
122 | |||
123 | b += 4; | ||
124 | r += 4; | ||
125 | } | ||
126 | } | ||
127 | else | ||
128 | { | ||
129 | int save_dl = dl; | ||
130 | #ifdef BN_COUNT | ||
131 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c); | ||
132 | #endif | ||
133 | while(c) | ||
134 | { | ||
135 | t = a[0]; | ||
136 | r[0] = (t-c)&BN_MASK2; | ||
137 | if (t != 0) c=0; | ||
138 | if (--dl <= 0) break; | ||
139 | |||
140 | t = a[1]; | ||
141 | r[1] = (t-c)&BN_MASK2; | ||
142 | if (t != 0) c=0; | ||
143 | if (--dl <= 0) break; | ||
144 | |||
145 | t = a[2]; | ||
146 | r[2] = (t-c)&BN_MASK2; | ||
147 | if (t != 0) c=0; | ||
148 | if (--dl <= 0) break; | ||
149 | |||
150 | t = a[3]; | ||
151 | r[3] = (t-c)&BN_MASK2; | ||
152 | if (t != 0) c=0; | ||
153 | if (--dl <= 0) break; | ||
154 | |||
155 | save_dl = dl; | ||
156 | a += 4; | ||
157 | r += 4; | ||
158 | } | ||
159 | if (dl > 0) | ||
160 | { | ||
161 | #ifdef BN_COUNT | ||
162 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); | ||
163 | #endif | ||
164 | if (save_dl > dl) | ||
165 | { | ||
166 | switch (save_dl - dl) | ||
167 | { | ||
168 | case 1: | ||
169 | r[1] = a[1]; | ||
170 | if (--dl <= 0) break; | ||
171 | case 2: | ||
172 | r[2] = a[2]; | ||
173 | if (--dl <= 0) break; | ||
174 | case 3: | ||
175 | r[3] = a[3]; | ||
176 | if (--dl <= 0) break; | ||
177 | } | ||
178 | a += 4; | ||
179 | r += 4; | ||
180 | } | ||
181 | } | ||
182 | if (dl > 0) | ||
183 | { | ||
184 | #ifdef BN_COUNT | ||
185 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl); | ||
186 | #endif | ||
187 | for(;;) | ||
188 | { | ||
189 | r[0] = a[0]; | ||
190 | if (--dl <= 0) break; | ||
191 | r[1] = a[1]; | ||
192 | if (--dl <= 0) break; | ||
193 | r[2] = a[2]; | ||
194 | if (--dl <= 0) break; | ||
195 | r[3] = a[3]; | ||
196 | if (--dl <= 0) break; | ||
197 | |||
198 | a += 4; | ||
199 | r += 4; | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | return c; | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | BN_ULONG bn_add_part_words(BN_ULONG *r, | ||
208 | const BN_ULONG *a, const BN_ULONG *b, | ||
209 | int cl, int dl) | ||
210 | { | ||
211 | BN_ULONG c, l, t; | ||
212 | |||
213 | assert(cl >= 0); | ||
214 | c = bn_add_words(r, a, b, cl); | ||
215 | |||
216 | if (dl == 0) | ||
217 | return c; | ||
218 | |||
219 | r += cl; | ||
220 | a += cl; | ||
221 | b += cl; | ||
222 | |||
223 | if (dl < 0) | ||
224 | { | ||
225 | int save_dl = dl; | ||
226 | #ifdef BN_COUNT | ||
227 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); | ||
228 | #endif | ||
229 | while (c) | ||
230 | { | ||
231 | l=(c+b[0])&BN_MASK2; | ||
232 | c=(l < c); | ||
233 | r[0]=l; | ||
234 | if (++dl >= 0) break; | ||
235 | |||
236 | l=(c+b[1])&BN_MASK2; | ||
237 | c=(l < c); | ||
238 | r[1]=l; | ||
239 | if (++dl >= 0) break; | ||
240 | |||
241 | l=(c+b[2])&BN_MASK2; | ||
242 | c=(l < c); | ||
243 | r[2]=l; | ||
244 | if (++dl >= 0) break; | ||
245 | |||
246 | l=(c+b[3])&BN_MASK2; | ||
247 | c=(l < c); | ||
248 | r[3]=l; | ||
249 | if (++dl >= 0) break; | ||
250 | |||
251 | save_dl = dl; | ||
252 | b+=4; | ||
253 | r+=4; | ||
254 | } | ||
255 | if (dl < 0) | ||
256 | { | ||
257 | #ifdef BN_COUNT | ||
258 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl); | ||
259 | #endif | ||
260 | if (save_dl < dl) | ||
261 | { | ||
262 | switch (dl - save_dl) | ||
263 | { | ||
264 | case 1: | ||
265 | r[1] = b[1]; | ||
266 | if (++dl >= 0) break; | ||
267 | case 2: | ||
268 | r[2] = b[2]; | ||
269 | if (++dl >= 0) break; | ||
270 | case 3: | ||
271 | r[3] = b[3]; | ||
272 | if (++dl >= 0) break; | ||
273 | } | ||
274 | b += 4; | ||
275 | r += 4; | ||
276 | } | ||
277 | } | ||
278 | if (dl < 0) | ||
279 | { | ||
280 | #ifdef BN_COUNT | ||
281 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl); | ||
282 | #endif | ||
283 | for(;;) | ||
284 | { | ||
285 | r[0] = b[0]; | ||
286 | if (++dl >= 0) break; | ||
287 | r[1] = b[1]; | ||
288 | if (++dl >= 0) break; | ||
289 | r[2] = b[2]; | ||
290 | if (++dl >= 0) break; | ||
291 | r[3] = b[3]; | ||
292 | if (++dl >= 0) break; | ||
293 | |||
294 | b += 4; | ||
295 | r += 4; | ||
296 | } | ||
297 | } | ||
298 | } | ||
299 | else | ||
300 | { | ||
301 | int save_dl = dl; | ||
302 | #ifdef BN_COUNT | ||
303 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); | ||
304 | #endif | ||
305 | while (c) | ||
306 | { | ||
307 | t=(a[0]+c)&BN_MASK2; | ||
308 | c=(t < c); | ||
309 | r[0]=t; | ||
310 | if (--dl <= 0) break; | ||
311 | |||
312 | t=(a[1]+c)&BN_MASK2; | ||
313 | c=(t < c); | ||
314 | r[1]=t; | ||
315 | if (--dl <= 0) break; | ||
316 | |||
317 | t=(a[2]+c)&BN_MASK2; | ||
318 | c=(t < c); | ||
319 | r[2]=t; | ||
320 | if (--dl <= 0) break; | ||
321 | |||
322 | t=(a[3]+c)&BN_MASK2; | ||
323 | c=(t < c); | ||
324 | r[3]=t; | ||
325 | if (--dl <= 0) break; | ||
326 | |||
327 | save_dl = dl; | ||
328 | a+=4; | ||
329 | r+=4; | ||
330 | } | ||
331 | #ifdef BN_COUNT | ||
332 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); | ||
333 | #endif | ||
334 | if (dl > 0) | ||
335 | { | ||
336 | if (save_dl > dl) | ||
337 | { | ||
338 | switch (save_dl - dl) | ||
339 | { | ||
340 | case 1: | ||
341 | r[1] = a[1]; | ||
342 | if (--dl <= 0) break; | ||
343 | case 2: | ||
344 | r[2] = a[2]; | ||
345 | if (--dl <= 0) break; | ||
346 | case 3: | ||
347 | r[3] = a[3]; | ||
348 | if (--dl <= 0) break; | ||
349 | } | ||
350 | a += 4; | ||
351 | r += 4; | ||
352 | } | ||
353 | } | ||
354 | if (dl > 0) | ||
355 | { | ||
356 | #ifdef BN_COUNT | ||
357 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl); | ||
358 | #endif | ||
359 | for(;;) | ||
360 | { | ||
361 | r[0] = a[0]; | ||
362 | if (--dl <= 0) break; | ||
363 | r[1] = a[1]; | ||
364 | if (--dl <= 0) break; | ||
365 | r[2] = a[2]; | ||
366 | if (--dl <= 0) break; | ||
367 | r[3] = a[3]; | ||
368 | if (--dl <= 0) break; | ||
369 | |||
370 | a += 4; | ||
371 | r += 4; | ||
372 | } | ||
373 | } | ||
374 | } | ||
375 | return c; | ||
376 | } | ||
377 | |||
378 | #ifdef BN_RECURSION | 63 | #ifdef BN_RECURSION |
379 | /* Karatsuba recursive multiplication algorithm | 64 | /* Karatsuba recursive multiplication algorithm |
380 | * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ | 65 | * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ |
@@ -390,15 +75,14 @@ BN_ULONG bn_add_part_words(BN_ULONG *r, | |||
390 | * a[1]*b[1] | 75 | * a[1]*b[1] |
391 | */ | 76 | */ |
392 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 77 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, |
393 | int dna, int dnb, BN_ULONG *t) | 78 | BN_ULONG *t) |
394 | { | 79 | { |
395 | int n=n2/2,c1,c2; | 80 | int n=n2/2,c1,c2; |
396 | int tna=n+dna, tnb=n+dnb; | ||
397 | unsigned int neg,zero; | 81 | unsigned int neg,zero; |
398 | BN_ULONG ln,lo,*p; | 82 | BN_ULONG ln,lo,*p; |
399 | 83 | ||
400 | # ifdef BN_COUNT | 84 | # ifdef BN_COUNT |
401 | fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2); | 85 | printf(" bn_mul_recursive %d * %d\n",n2,n2); |
402 | # endif | 86 | # endif |
403 | # ifdef BN_MUL_COMBA | 87 | # ifdef BN_MUL_COMBA |
404 | # if 0 | 88 | # if 0 |
@@ -408,40 +92,34 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
408 | return; | 92 | return; |
409 | } | 93 | } |
410 | # endif | 94 | # endif |
411 | /* Only call bn_mul_comba 8 if n2 == 8 and the | 95 | if (n2 == 8) |
412 | * two arrays are complete [steve] | ||
413 | */ | ||
414 | if (n2 == 8 && dna == 0 && dnb == 0) | ||
415 | { | 96 | { |
416 | bn_mul_comba8(r,a,b); | 97 | bn_mul_comba8(r,a,b); |
417 | return; | 98 | return; |
418 | } | 99 | } |
419 | # endif /* BN_MUL_COMBA */ | 100 | # endif /* BN_MUL_COMBA */ |
420 | /* Else do normal multiply */ | ||
421 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) | 101 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) |
422 | { | 102 | { |
423 | bn_mul_normal(r,a,n2+dna,b,n2+dnb); | 103 | /* This should not happen */ |
424 | if ((dna + dnb) < 0) | 104 | bn_mul_normal(r,a,n2,b,n2); |
425 | memset(&r[2*n2 + dna + dnb], 0, | ||
426 | sizeof(BN_ULONG) * -(dna + dnb)); | ||
427 | return; | 105 | return; |
428 | } | 106 | } |
429 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 107 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
430 | c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); | 108 | c1=bn_cmp_words(a,&(a[n]),n); |
431 | c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); | 109 | c2=bn_cmp_words(&(b[n]),b,n); |
432 | zero=neg=0; | 110 | zero=neg=0; |
433 | switch (c1*3+c2) | 111 | switch (c1*3+c2) |
434 | { | 112 | { |
435 | case -4: | 113 | case -4: |
436 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ | 114 | bn_sub_words(t, &(a[n]),a, n); /* - */ |
437 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ | 115 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ |
438 | break; | 116 | break; |
439 | case -3: | 117 | case -3: |
440 | zero=1; | 118 | zero=1; |
441 | break; | 119 | break; |
442 | case -2: | 120 | case -2: |
443 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ | 121 | bn_sub_words(t, &(a[n]),a, n); /* - */ |
444 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ | 122 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ |
445 | neg=1; | 123 | neg=1; |
446 | break; | 124 | break; |
447 | case -1: | 125 | case -1: |
@@ -450,22 +128,21 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
450 | zero=1; | 128 | zero=1; |
451 | break; | 129 | break; |
452 | case 2: | 130 | case 2: |
453 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ | 131 | bn_sub_words(t, a, &(a[n]),n); /* + */ |
454 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ | 132 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ |
455 | neg=1; | 133 | neg=1; |
456 | break; | 134 | break; |
457 | case 3: | 135 | case 3: |
458 | zero=1; | 136 | zero=1; |
459 | break; | 137 | break; |
460 | case 4: | 138 | case 4: |
461 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); | 139 | bn_sub_words(t, a, &(a[n]),n); |
462 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); | 140 | bn_sub_words(&(t[n]),&(b[n]),b, n); |
463 | break; | 141 | break; |
464 | } | 142 | } |
465 | 143 | ||
466 | # ifdef BN_MUL_COMBA | 144 | # ifdef BN_MUL_COMBA |
467 | if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take | 145 | if (n == 4) |
468 | extra args to do this well */ | ||
469 | { | 146 | { |
470 | if (!zero) | 147 | if (!zero) |
471 | bn_mul_comba4(&(t[n2]),t,&(t[n])); | 148 | bn_mul_comba4(&(t[n2]),t,&(t[n])); |
@@ -475,9 +152,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
475 | bn_mul_comba4(r,a,b); | 152 | bn_mul_comba4(r,a,b); |
476 | bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); | 153 | bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); |
477 | } | 154 | } |
478 | else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could | 155 | else if (n == 8) |
479 | take extra args to do this | ||
480 | well */ | ||
481 | { | 156 | { |
482 | if (!zero) | 157 | if (!zero) |
483 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | 158 | bn_mul_comba8(&(t[n2]),t,&(t[n])); |
@@ -492,11 +167,11 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
492 | { | 167 | { |
493 | p= &(t[n2*2]); | 168 | p= &(t[n2*2]); |
494 | if (!zero) | 169 | if (!zero) |
495 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); | 170 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); |
496 | else | 171 | else |
497 | memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); | 172 | memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); |
498 | bn_mul_recursive(r,a,b,n,0,0,p); | 173 | bn_mul_recursive(r,a,b,n,p); |
499 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); | 174 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); |
500 | } | 175 | } |
501 | 176 | ||
502 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | 177 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign |
@@ -545,39 +220,39 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
545 | 220 | ||
546 | /* n+tn is the word length | 221 | /* n+tn is the word length |
547 | * t needs to be n*4 is size, as does r */ | 222 | * t needs to be n*4 is size, as does r */ |
548 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, | 223 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, |
549 | int tna, int tnb, BN_ULONG *t) | 224 | int n, BN_ULONG *t) |
550 | { | 225 | { |
551 | int i,j,n2=n*2; | 226 | int i,j,n2=n*2; |
552 | unsigned int c1,c2,neg,zero; | 227 | unsigned int c1,c2,neg,zero; |
553 | BN_ULONG ln,lo,*p; | 228 | BN_ULONG ln,lo,*p; |
554 | 229 | ||
555 | # ifdef BN_COUNT | 230 | # ifdef BN_COUNT |
556 | fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n", | 231 | printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); |
557 | tna, n, tnb, n); | ||
558 | # endif | 232 | # endif |
559 | if (n < 8) | 233 | if (n < 8) |
560 | { | 234 | { |
561 | bn_mul_normal(r,a,n+tna,b,n+tnb); | 235 | i=tn+n; |
236 | bn_mul_normal(r,a,i,b,i); | ||
562 | return; | 237 | return; |
563 | } | 238 | } |
564 | 239 | ||
565 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 240 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
566 | c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); | 241 | c1=bn_cmp_words(a,&(a[n]),n); |
567 | c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); | 242 | c2=bn_cmp_words(&(b[n]),b,n); |
568 | zero=neg=0; | 243 | zero=neg=0; |
569 | switch (c1*3+c2) | 244 | switch (c1*3+c2) |
570 | { | 245 | { |
571 | case -4: | 246 | case -4: |
572 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ | 247 | bn_sub_words(t, &(a[n]),a, n); /* - */ |
573 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ | 248 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ |
574 | break; | 249 | break; |
575 | case -3: | 250 | case -3: |
576 | zero=1; | 251 | zero=1; |
577 | /* break; */ | 252 | /* break; */ |
578 | case -2: | 253 | case -2: |
579 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ | 254 | bn_sub_words(t, &(a[n]),a, n); /* - */ |
580 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ | 255 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ |
581 | neg=1; | 256 | neg=1; |
582 | break; | 257 | break; |
583 | case -1: | 258 | case -1: |
@@ -586,16 +261,16 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, | |||
586 | zero=1; | 261 | zero=1; |
587 | /* break; */ | 262 | /* break; */ |
588 | case 2: | 263 | case 2: |
589 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ | 264 | bn_sub_words(t, a, &(a[n]),n); /* + */ |
590 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ | 265 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ |
591 | neg=1; | 266 | neg=1; |
592 | break; | 267 | break; |
593 | case 3: | 268 | case 3: |
594 | zero=1; | 269 | zero=1; |
595 | /* break; */ | 270 | /* break; */ |
596 | case 4: | 271 | case 4: |
597 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); | 272 | bn_sub_words(t, a, &(a[n]),n); |
598 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); | 273 | bn_sub_words(&(t[n]),&(b[n]),b, n); |
599 | break; | 274 | break; |
600 | } | 275 | } |
601 | /* The zero case isn't yet implemented here. The speedup | 276 | /* The zero case isn't yet implemented here. The speedup |
@@ -614,59 +289,54 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, | |||
614 | { | 289 | { |
615 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | 290 | bn_mul_comba8(&(t[n2]),t,&(t[n])); |
616 | bn_mul_comba8(r,a,b); | 291 | bn_mul_comba8(r,a,b); |
617 | bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); | 292 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); |
618 | memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); | 293 | memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); |
619 | } | 294 | } |
620 | else | 295 | else |
621 | { | 296 | { |
622 | p= &(t[n2*2]); | 297 | p= &(t[n2*2]); |
623 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); | 298 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); |
624 | bn_mul_recursive(r,a,b,n,0,0,p); | 299 | bn_mul_recursive(r,a,b,n,p); |
625 | i=n/2; | 300 | i=n/2; |
626 | /* If there is only a bottom half to the number, | 301 | /* If there is only a bottom half to the number, |
627 | * just do it */ | 302 | * just do it */ |
628 | if (tna > tnb) | 303 | j=tn-i; |
629 | j = tna - i; | ||
630 | else | ||
631 | j = tnb - i; | ||
632 | if (j == 0) | 304 | if (j == 0) |
633 | { | 305 | { |
634 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), | 306 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); |
635 | i,tna-i,tnb-i,p); | ||
636 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); | 307 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); |
637 | } | 308 | } |
638 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ | 309 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ |
639 | { | 310 | { |
640 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), | 311 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), |
641 | i,tna-i,tnb-i,p); | 312 | j,i,p); |
642 | memset(&(r[n2+tna+tnb]),0, | 313 | memset(&(r[n2+tn*2]),0, |
643 | sizeof(BN_ULONG)*(n2-tna-tnb)); | 314 | sizeof(BN_ULONG)*(n2-tn*2)); |
644 | } | 315 | } |
645 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ | 316 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ |
646 | { | 317 | { |
647 | memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); | 318 | memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); |
648 | if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL | 319 | if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL) |
649 | && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) | ||
650 | { | 320 | { |
651 | bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); | 321 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); |
652 | } | 322 | } |
653 | else | 323 | else |
654 | { | 324 | { |
655 | for (;;) | 325 | for (;;) |
656 | { | 326 | { |
657 | i/=2; | 327 | i/=2; |
658 | if (i < tna && i < tnb) | 328 | if (i < tn) |
659 | { | 329 | { |
660 | bn_mul_part_recursive(&(r[n2]), | 330 | bn_mul_part_recursive(&(r[n2]), |
661 | &(a[n]),&(b[n]), | 331 | &(a[n]),&(b[n]), |
662 | i,tna-i,tnb-i,p); | 332 | tn-i,i,p); |
663 | break; | 333 | break; |
664 | } | 334 | } |
665 | else if (i <= tna && i <= tnb) | 335 | else if (i == tn) |
666 | { | 336 | { |
667 | bn_mul_recursive(&(r[n2]), | 337 | bn_mul_recursive(&(r[n2]), |
668 | &(a[n]),&(b[n]), | 338 | &(a[n]),&(b[n]), |
669 | i,tna-i,tnb-i,p); | 339 | i,p); |
670 | break; | 340 | break; |
671 | } | 341 | } |
672 | } | 342 | } |
@@ -727,10 +397,10 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
727 | int n=n2/2; | 397 | int n=n2/2; |
728 | 398 | ||
729 | # ifdef BN_COUNT | 399 | # ifdef BN_COUNT |
730 | fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2); | 400 | printf(" bn_mul_low_recursive %d * %d\n",n2,n2); |
731 | # endif | 401 | # endif |
732 | 402 | ||
733 | bn_mul_recursive(r,a,b,n,0,0,&(t[0])); | 403 | bn_mul_recursive(r,a,b,n,&(t[0])); |
734 | if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) | 404 | if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) |
735 | { | 405 | { |
736 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); | 406 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); |
@@ -761,7 +431,7 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
761 | BN_ULONG ll,lc,*lp,*mp; | 431 | BN_ULONG ll,lc,*lp,*mp; |
762 | 432 | ||
763 | # ifdef BN_COUNT | 433 | # ifdef BN_COUNT |
764 | fprintf(stderr," bn_mul_high %d * %d\n",n2,n2); | 434 | printf(" bn_mul_high %d * %d\n",n2,n2); |
765 | # endif | 435 | # endif |
766 | n=n2/2; | 436 | n=n2/2; |
767 | 437 | ||
@@ -814,8 +484,8 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
814 | else | 484 | else |
815 | # endif | 485 | # endif |
816 | { | 486 | { |
817 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2])); | 487 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); |
818 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2])); | 488 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); |
819 | } | 489 | } |
820 | 490 | ||
821 | /* s0 == low(al*bl) | 491 | /* s0 == low(al*bl) |
@@ -940,19 +610,19 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
940 | 610 | ||
941 | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | 611 | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) |
942 | { | 612 | { |
943 | int ret=0; | ||
944 | int top,al,bl; | 613 | int top,al,bl; |
945 | BIGNUM *rr; | 614 | BIGNUM *rr; |
615 | int ret = 0; | ||
946 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 616 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
947 | int i; | 617 | int i; |
948 | #endif | 618 | #endif |
949 | #ifdef BN_RECURSION | 619 | #ifdef BN_RECURSION |
950 | BIGNUM *t=NULL; | 620 | BIGNUM *t; |
951 | int j=0,k; | 621 | int j,k; |
952 | #endif | 622 | #endif |
953 | 623 | ||
954 | #ifdef BN_COUNT | 624 | #ifdef BN_COUNT |
955 | fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top); | 625 | printf("BN_mul %d * %d\n",a->top,b->top); |
956 | #endif | 626 | #endif |
957 | 627 | ||
958 | bn_check_top(a); | 628 | bn_check_top(a); |
@@ -1005,55 +675,21 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
1005 | #ifdef BN_RECURSION | 675 | #ifdef BN_RECURSION |
1006 | if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) | 676 | if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) |
1007 | { | 677 | { |
1008 | if (i >= -1 && i <= 1) | 678 | if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bl<b->dmax) |
1009 | { | 679 | { |
1010 | int sav_j =0; | 680 | #if 0 /* tribute to const-ification, bl<b->dmax above covers for this */ |
1011 | /* Find out the power of two lower or equal | 681 | if (bn_wexpand(b,al) == NULL) goto err; |
1012 | to the longest of the two numbers */ | 682 | #endif |
1013 | if (i >= 0) | 683 | b->d[bl]=0; |
1014 | { | ||
1015 | j = BN_num_bits_word((BN_ULONG)al); | ||
1016 | } | ||
1017 | if (i == -1) | ||
1018 | { | ||
1019 | j = BN_num_bits_word((BN_ULONG)bl); | ||
1020 | } | ||
1021 | sav_j = j; | ||
1022 | j = 1<<(j-1); | ||
1023 | assert(j <= al || j <= bl); | ||
1024 | k = j+j; | ||
1025 | t = BN_CTX_get(ctx); | ||
1026 | if (al > j || bl > j) | ||
1027 | { | ||
1028 | bn_wexpand(t,k*4); | ||
1029 | bn_wexpand(rr,k*4); | ||
1030 | bn_mul_part_recursive(rr->d,a->d,b->d, | ||
1031 | j,al-j,bl-j,t->d); | ||
1032 | } | ||
1033 | else /* al <= j || bl <= j */ | ||
1034 | { | ||
1035 | bn_wexpand(t,k*2); | ||
1036 | bn_wexpand(rr,k*2); | ||
1037 | bn_mul_recursive(rr->d,a->d,b->d, | ||
1038 | j,al-j,bl-j,t->d); | ||
1039 | } | ||
1040 | rr->top=top; | ||
1041 | goto end; | ||
1042 | } | ||
1043 | #if 0 | ||
1044 | if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) | ||
1045 | { | ||
1046 | BIGNUM *tmp_bn = (BIGNUM *)b; | ||
1047 | if (bn_wexpand(tmp_bn,al) == NULL) goto err; | ||
1048 | tmp_bn->d[bl]=0; | ||
1049 | bl++; | 684 | bl++; |
1050 | i--; | 685 | i--; |
1051 | } | 686 | } |
1052 | else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) | 687 | else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && al<a->dmax) |
1053 | { | 688 | { |
1054 | BIGNUM *tmp_bn = (BIGNUM *)a; | 689 | #if 0 /* tribute to const-ification, al<a->dmax above covers for this */ |
1055 | if (bn_wexpand(tmp_bn,bl) == NULL) goto err; | 690 | if (bn_wexpand(a,bl) == NULL) goto err; |
1056 | tmp_bn->d[al]=0; | 691 | #endif |
692 | a->d[al]=0; | ||
1057 | al++; | 693 | al++; |
1058 | i++; | 694 | i++; |
1059 | } | 695 | } |
@@ -1070,17 +706,26 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
1070 | if (bn_wexpand(t,k*2) == NULL) goto err; | 706 | if (bn_wexpand(t,k*2) == NULL) goto err; |
1071 | if (bn_wexpand(rr,k*2) == NULL) goto err; | 707 | if (bn_wexpand(rr,k*2) == NULL) goto err; |
1072 | bn_mul_recursive(rr->d,a->d,b->d,al,t->d); | 708 | bn_mul_recursive(rr->d,a->d,b->d,al,t->d); |
709 | rr->top=top; | ||
710 | goto end; | ||
1073 | } | 711 | } |
712 | #if 0 /* tribute to const-ification, rsa/dsa performance is not affected */ | ||
1074 | else | 713 | else |
1075 | { | 714 | { |
1076 | if (bn_wexpand(t,k*4) == NULL) goto err; | 715 | if (bn_wexpand(a,k) == NULL ) goto err; |
1077 | if (bn_wexpand(rr,k*4) == NULL) goto err; | 716 | if (bn_wexpand(b,k) == NULL ) goto err; |
717 | if (bn_wexpand(t,k*4) == NULL ) goto err; | ||
718 | if (bn_wexpand(rr,k*4) == NULL ) goto err; | ||
719 | for (i=a->top; i<k; i++) | ||
720 | a->d[i]=0; | ||
721 | for (i=b->top; i<k; i++) | ||
722 | b->d[i]=0; | ||
1078 | bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); | 723 | bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); |
1079 | } | 724 | } |
1080 | rr->top=top; | 725 | rr->top=top; |
1081 | goto end; | 726 | goto end; |
1082 | } | ||
1083 | #endif | 727 | #endif |
728 | } | ||
1084 | } | 729 | } |
1085 | #endif /* BN_RECURSION */ | 730 | #endif /* BN_RECURSION */ |
1086 | if (bn_wexpand(rr,top) == NULL) goto err; | 731 | if (bn_wexpand(rr,top) == NULL) goto err; |
@@ -1103,7 +748,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
1103 | BN_ULONG *rr; | 748 | BN_ULONG *rr; |
1104 | 749 | ||
1105 | #ifdef BN_COUNT | 750 | #ifdef BN_COUNT |
1106 | fprintf(stderr," bn_mul_normal %d * %d\n",na,nb); | 751 | printf(" bn_mul_normal %d * %d\n",na,nb); |
1107 | #endif | 752 | #endif |
1108 | 753 | ||
1109 | if (na < nb) | 754 | if (na < nb) |
@@ -1116,13 +761,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
1116 | 761 | ||
1117 | } | 762 | } |
1118 | rr= &(r[na]); | 763 | rr= &(r[na]); |
1119 | if (nb <= 0) | 764 | rr[0]=bn_mul_words(r,a,na,b[0]); |
1120 | { | ||
1121 | (void)bn_mul_words(r,a,na,0); | ||
1122 | return; | ||
1123 | } | ||
1124 | else | ||
1125 | rr[0]=bn_mul_words(r,a,na,b[0]); | ||
1126 | 765 | ||
1127 | for (;;) | 766 | for (;;) |
1128 | { | 767 | { |
@@ -1143,7 +782,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
1143 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 782 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
1144 | { | 783 | { |
1145 | #ifdef BN_COUNT | 784 | #ifdef BN_COUNT |
1146 | fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n); | 785 | printf(" bn_mul_low_normal %d * %d\n",n,n); |
1147 | #endif | 786 | #endif |
1148 | bn_mul_words(r,a,n,b[0]); | 787 | bn_mul_words(r,a,n,b[0]); |
1149 | 788 | ||
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c index 918b9237c6..e072d9255c 100644 --- a/src/lib/libcrypto/bn/bn_prime.c +++ b/src/lib/libcrypto/bn/bn_prime.c | |||
@@ -140,6 +140,7 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, | |||
140 | BN_CTX *ctx; | 140 | BN_CTX *ctx; |
141 | int checks = BN_prime_checks_for_size(bits); | 141 | int checks = BN_prime_checks_for_size(bits); |
142 | 142 | ||
143 | BN_init(&t); | ||
143 | ctx=BN_CTX_new(); | 144 | ctx=BN_CTX_new(); |
144 | if (ctx == NULL) goto err; | 145 | if (ctx == NULL) goto err; |
145 | if (ret == NULL) | 146 | if (ret == NULL) |
@@ -148,7 +149,6 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, | |||
148 | } | 149 | } |
149 | else | 150 | else |
150 | rnd=ret; | 151 | rnd=ret; |
151 | BN_init(&t); | ||
152 | loop: | 152 | loop: |
153 | /* make a random number and set the top and bottom bits */ | 153 | /* make a random number and set the top and bottom bits */ |
154 | if (add == NULL) | 154 | if (add == NULL) |
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c index 9e08ccd22e..893c9d2af9 100644 --- a/src/lib/libcrypto/bn/bn_rand.c +++ b/src/lib/libcrypto/bn/bn_rand.c | |||
@@ -201,7 +201,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) | |||
201 | err: | 201 | err: |
202 | if (buf != NULL) | 202 | if (buf != NULL) |
203 | { | 203 | { |
204 | memset(buf,0,bytes); | 204 | OPENSSL_cleanse(buf,bytes); |
205 | OPENSSL_free(buf); | 205 | OPENSSL_free(buf); |
206 | } | 206 | } |
207 | return(ret); | 207 | return(ret); |
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c index cd59baa2c4..988e0ca7b3 100644 --- a/src/lib/libcrypto/bn/bn_word.c +++ b/src/lib/libcrypto/bn/bn_word.c | |||
@@ -123,7 +123,10 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) | |||
123 | i=0; | 123 | i=0; |
124 | for (;;) | 124 | for (;;) |
125 | { | 125 | { |
126 | l=(a->d[i]+(BN_ULONG)w)&BN_MASK2; | 126 | if (i >= a->top) |
127 | l=w; | ||
128 | else | ||
129 | l=(a->d[i]+(BN_ULONG)w)&BN_MASK2; | ||
127 | a->d[i]=l; | 130 | a->d[i]=l; |
128 | if (w > l) | 131 | if (w > l) |
129 | w=1; | 132 | w=1; |
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c index 8158a67374..3c8c540387 100644 --- a/src/lib/libcrypto/bn/bntest.c +++ b/src/lib/libcrypto/bn/bntest.c | |||
@@ -68,10 +68,6 @@ | |||
68 | #include <openssl/x509.h> | 68 | #include <openssl/x509.h> |
69 | #include <openssl/err.h> | 69 | #include <openssl/err.h> |
70 | 70 | ||
71 | #ifdef OPENSSL_SYS_WINDOWS | ||
72 | #include "../bio/bss_file.c" | ||
73 | #endif | ||
74 | |||
75 | const int num0 = 100; /* number of tests */ | 71 | const int num0 = 100; /* number of tests */ |
76 | const int num1 = 50; /* additional tests for some functions */ | 72 | const int num1 = 50; /* additional tests for some functions */ |
77 | const int num2 = 5; /* number of tests for slow functions */ | 73 | const int num2 = 5; /* number of tests for slow functions */ |
@@ -96,11 +92,6 @@ int test_sqrt(BIO *bp,BN_CTX *ctx); | |||
96 | int rand_neg(void); | 92 | int rand_neg(void); |
97 | static int results=0; | 93 | static int results=0; |
98 | 94 | ||
99 | #ifdef OPENSSL_NO_STDIO | ||
100 | #define APPS_WIN16 | ||
101 | #include "bss_file.c" | ||
102 | #endif | ||
103 | |||
104 | static unsigned char lst[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9" | 95 | static unsigned char lst[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9" |
105 | "\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0"; | 96 | "\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0"; |
106 | 97 | ||
@@ -141,10 +132,10 @@ int main(int argc, char *argv[]) | |||
141 | 132 | ||
142 | 133 | ||
143 | ctx=BN_CTX_new(); | 134 | ctx=BN_CTX_new(); |
144 | if (ctx == NULL) exit(1); | 135 | if (ctx == NULL) EXIT(1); |
145 | 136 | ||
146 | out=BIO_new(BIO_s_file()); | 137 | out=BIO_new(BIO_s_file()); |
147 | if (out == NULL) exit(1); | 138 | if (out == NULL) EXIT(1); |
148 | if (outfile == NULL) | 139 | if (outfile == NULL) |
149 | { | 140 | { |
150 | BIO_set_fp(out,stdout,BIO_NOCLOSE); | 141 | BIO_set_fp(out,stdout,BIO_NOCLOSE); |
@@ -154,7 +145,7 @@ int main(int argc, char *argv[]) | |||
154 | if (!BIO_write_filename(out,outfile)) | 145 | if (!BIO_write_filename(out,outfile)) |
155 | { | 146 | { |
156 | perror(outfile); | 147 | perror(outfile); |
157 | exit(1); | 148 | EXIT(1); |
158 | } | 149 | } |
159 | } | 150 | } |
160 | 151 | ||
@@ -238,14 +229,14 @@ int main(int argc, char *argv[]) | |||
238 | BIO_free(out); | 229 | BIO_free(out); |
239 | 230 | ||
240 | /**/ | 231 | /**/ |
241 | exit(0); | 232 | EXIT(0); |
242 | err: | 233 | err: |
243 | BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices | 234 | BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices |
244 | * the failure, see test_bn in test/Makefile.ssl*/ | 235 | * the failure, see test_bn in test/Makefile.ssl*/ |
245 | BIO_flush(out); | 236 | BIO_flush(out); |
246 | ERR_load_crypto_strings(); | 237 | ERR_load_crypto_strings(); |
247 | ERR_print_errors_fp(stderr); | 238 | ERR_print_errors_fp(stderr); |
248 | exit(1); | 239 | EXIT(1); |
249 | return(1); | 240 | return(1); |
250 | } | 241 | } |
251 | 242 | ||
@@ -488,7 +479,7 @@ int test_mul(BIO *bp) | |||
488 | BN_CTX *ctx; | 479 | BN_CTX *ctx; |
489 | 480 | ||
490 | ctx = BN_CTX_new(); | 481 | ctx = BN_CTX_new(); |
491 | if (ctx == NULL) exit(1); | 482 | if (ctx == NULL) EXIT(1); |
492 | 483 | ||
493 | BN_init(&a); | 484 | BN_init(&a); |
494 | BN_init(&b); | 485 | BN_init(&b); |
@@ -726,7 +717,7 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx) | |||
726 | while ((l=ERR_get_error())) | 717 | while ((l=ERR_get_error())) |
727 | fprintf(stderr,"ERROR:%s\n", | 718 | fprintf(stderr,"ERROR:%s\n", |
728 | ERR_error_string(l,NULL)); | 719 | ERR_error_string(l,NULL)); |
729 | exit(1); | 720 | EXIT(1); |
730 | } | 721 | } |
731 | if (bp != NULL) | 722 | if (bp != NULL) |
732 | { | 723 | { |
diff --git a/src/lib/libcrypto/bn/divtest.c b/src/lib/libcrypto/bn/divtest.c index 13ba86e3c4..d3fc688f33 100644 --- a/src/lib/libcrypto/bn/divtest.c +++ b/src/lib/libcrypto/bn/divtest.c | |||
@@ -1,7 +1,7 @@ | |||
1 | #include <openssl/bn.h> | 1 | #include <openssl/bn.h> |
2 | #include <openssl/rand.h> | 2 | #include <openssl/rand.h> |
3 | 3 | ||
4 | static int rand(n) | 4 | static int Rand(n) |
5 | { | 5 | { |
6 | unsigned char x[2]; | 6 | unsigned char x[2]; |
7 | RAND_pseudo_bytes(x,2); | 7 | RAND_pseudo_bytes(x,2); |
@@ -26,8 +26,8 @@ main() | |||
26 | BN_CTX *ctx=BN_CTX_new(); | 26 | BN_CTX *ctx=BN_CTX_new(); |
27 | 27 | ||
28 | for(;;) { | 28 | for(;;) { |
29 | BN_pseudo_rand(a,rand(),0,0); | 29 | BN_pseudo_rand(a,Rand(),0,0); |
30 | BN_pseudo_rand(b,rand(),0,0); | 30 | BN_pseudo_rand(b,Rand(),0,0); |
31 | if (BN_is_zero(b)) continue; | 31 | if (BN_is_zero(b)) continue; |
32 | 32 | ||
33 | BN_RECP_CTX_set(recp,b,ctx); | 33 | BN_RECP_CTX_set(recp,b,ctx); |
diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c index 5ca570d1a8..b09cf88705 100644 --- a/src/lib/libcrypto/bn/exptest.c +++ b/src/lib/libcrypto/bn/exptest.c | |||
@@ -59,13 +59,13 @@ | |||
59 | #include <stdio.h> | 59 | #include <stdio.h> |
60 | #include <stdlib.h> | 60 | #include <stdlib.h> |
61 | #include <string.h> | 61 | #include <string.h> |
62 | |||
63 | #include "../e_os.h" | ||
64 | |||
62 | #include <openssl/bio.h> | 65 | #include <openssl/bio.h> |
63 | #include <openssl/bn.h> | 66 | #include <openssl/bn.h> |
64 | #include <openssl/rand.h> | 67 | #include <openssl/rand.h> |
65 | #include <openssl/err.h> | 68 | #include <openssl/err.h> |
66 | #ifdef OPENSSL_SYS_WINDOWS | ||
67 | #include "../bio/bss_file.c" | ||
68 | #endif | ||
69 | 69 | ||
70 | #define NUM_BITS (BN_BITS*2) | 70 | #define NUM_BITS (BN_BITS*2) |
71 | 71 | ||
@@ -86,7 +86,7 @@ int main(int argc, char *argv[]) | |||
86 | ERR_load_BN_strings(); | 86 | ERR_load_BN_strings(); |
87 | 87 | ||
88 | ctx=BN_CTX_new(); | 88 | ctx=BN_CTX_new(); |
89 | if (ctx == NULL) exit(1); | 89 | if (ctx == NULL) EXIT(1); |
90 | r_mont=BN_new(); | 90 | r_mont=BN_new(); |
91 | r_recp=BN_new(); | 91 | r_recp=BN_new(); |
92 | r_simple=BN_new(); | 92 | r_simple=BN_new(); |
@@ -99,7 +99,7 @@ int main(int argc, char *argv[]) | |||
99 | 99 | ||
100 | out=BIO_new(BIO_s_file()); | 100 | out=BIO_new(BIO_s_file()); |
101 | 101 | ||
102 | if (out == NULL) exit(1); | 102 | if (out == NULL) EXIT(1); |
103 | BIO_set_fp(out,stdout,BIO_NOCLOSE); | 103 | BIO_set_fp(out,stdout,BIO_NOCLOSE); |
104 | 104 | ||
105 | for (i=0; i<200; i++) | 105 | for (i=0; i<200; i++) |
@@ -124,7 +124,7 @@ int main(int argc, char *argv[]) | |||
124 | { | 124 | { |
125 | printf("BN_mod_exp_mont() problems\n"); | 125 | printf("BN_mod_exp_mont() problems\n"); |
126 | ERR_print_errors(out); | 126 | ERR_print_errors(out); |
127 | exit(1); | 127 | EXIT(1); |
128 | } | 128 | } |
129 | 129 | ||
130 | ret=BN_mod_exp_recp(r_recp,a,b,m,ctx); | 130 | ret=BN_mod_exp_recp(r_recp,a,b,m,ctx); |
@@ -132,7 +132,7 @@ int main(int argc, char *argv[]) | |||
132 | { | 132 | { |
133 | printf("BN_mod_exp_recp() problems\n"); | 133 | printf("BN_mod_exp_recp() problems\n"); |
134 | ERR_print_errors(out); | 134 | ERR_print_errors(out); |
135 | exit(1); | 135 | EXIT(1); |
136 | } | 136 | } |
137 | 137 | ||
138 | ret=BN_mod_exp_simple(r_simple,a,b,m,ctx); | 138 | ret=BN_mod_exp_simple(r_simple,a,b,m,ctx); |
@@ -140,7 +140,7 @@ int main(int argc, char *argv[]) | |||
140 | { | 140 | { |
141 | printf("BN_mod_exp_simple() problems\n"); | 141 | printf("BN_mod_exp_simple() problems\n"); |
142 | ERR_print_errors(out); | 142 | ERR_print_errors(out); |
143 | exit(1); | 143 | EXIT(1); |
144 | } | 144 | } |
145 | 145 | ||
146 | if (BN_cmp(r_simple, r_mont) == 0 | 146 | if (BN_cmp(r_simple, r_mont) == 0 |
@@ -163,7 +163,7 @@ int main(int argc, char *argv[]) | |||
163 | printf("\nrecp ="); BN_print(out,r_recp); | 163 | printf("\nrecp ="); BN_print(out,r_recp); |
164 | printf("\nmont ="); BN_print(out,r_mont); | 164 | printf("\nmont ="); BN_print(out,r_mont); |
165 | printf("\n"); | 165 | printf("\n"); |
166 | exit(1); | 166 | EXIT(1); |
167 | } | 167 | } |
168 | } | 168 | } |
169 | BN_free(r_mont); | 169 | BN_free(r_mont); |
@@ -177,11 +177,11 @@ int main(int argc, char *argv[]) | |||
177 | CRYPTO_mem_leaks(out); | 177 | CRYPTO_mem_leaks(out); |
178 | BIO_free(out); | 178 | BIO_free(out); |
179 | printf(" done\n"); | 179 | printf(" done\n"); |
180 | exit(0); | 180 | EXIT(0); |
181 | err: | 181 | err: |
182 | ERR_load_crypto_strings(); | 182 | ERR_load_crypto_strings(); |
183 | ERR_print_errors(out); | 183 | ERR_print_errors(out); |
184 | exit(1); | 184 | EXIT(1); |
185 | return(1); | 185 | return(1); |
186 | } | 186 | } |
187 | 187 | ||