From d4fcd82bb7f6d603bd61e19a81ba97337b89dfca Mon Sep 17 00:00:00 2001 From: markus <> Date: Mon, 12 May 2003 02:18:40 +0000 Subject: merge 0.9.7b with local changes; crank majors for libssl/libcrypto --- src/lib/libcrypto/bn/Makefile.ssl | 33 +-- src/lib/libcrypto/bn/asm/ia64.S | 235 +++++++++++----- src/lib/libcrypto/bn/asm/pa-risc2.s | 36 +-- src/lib/libcrypto/bn/asm/vms.mar | 254 +++++++++-------- src/lib/libcrypto/bn/bn.h | 2 + src/lib/libcrypto/bn/bn_div.c | 28 +- src/lib/libcrypto/bn/bn_lcl.h | 26 +- src/lib/libcrypto/bn/bn_lib.c | 4 +- src/lib/libcrypto/bn/bn_mul.c | 529 ++++++------------------------------ src/lib/libcrypto/bn/bn_prime.c | 2 +- src/lib/libcrypto/bn/bn_rand.c | 2 +- src/lib/libcrypto/bn/bn_word.c | 5 +- src/lib/libcrypto/bn/bntest.c | 23 +- src/lib/libcrypto/bn/divtest.c | 6 +- src/lib/libcrypto/bn/exptest.c | 22 +- 15 files changed, 500 insertions(+), 707 deletions(-) (limited to 'src/lib/libcrypto/bn') diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl index 6a479726c4..fa17d3c7d8 100644 --- a/src/lib/libcrypto/bn/Makefile.ssl +++ b/src/lib/libcrypto/bn/Makefile.ssl @@ -23,14 +23,6 @@ BN_ASM= bn_asm.o CFLAGS= $(INCLUDES) $(CFLAG) -# We let the C compiler driver to take care of .s files. This is done in -# order to be excused from maintaining a separate set of architecture -# dependent assembler flags. E.g. if you throw -mcpu=ultrasparc at SPARC -# gcc, then the driver will automatically translate it to -xarch=v8plus -# and pass it down to assembler. -AS=$(CC) -c -ASFLAGS=$(CFLAGS) - GENERAL=Makefile TEST=bntest.c exptest.c APPS= @@ -73,22 +65,11 @@ lib: $(LIBOBJ) @touch lib # elf -asm/bn86-elf.o: asm/bn86unix.cpp - $(CPP) -DELF -x c asm/bn86unix.cpp | as -o asm/bn86-elf.o - -asm/co86-elf.o: asm/co86unix.cpp - $(CPP) -DELF -x c asm/co86unix.cpp | as -o asm/co86-elf.o +asm/bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > bn86-elf.s) -# solaris -asm/bn86-sol.o: asm/bn86unix.cpp - $(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s - as -o asm/bn86-sol.o asm/bn86-sol.s - rm -f asm/bn86-sol.s - -asm/co86-sol.o: asm/co86unix.cpp - $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s - as -o asm/co86-sol.o asm/co86-sol.s - rm -f asm/co86-sol.s +asm/co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl + (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > co86-elf.s) # a.out asm/bn86-out.o: asm/bn86unix.cpp @@ -136,6 +117,8 @@ asm/ia64-cpp.o: asm/ia64.S $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \ rm -f /tmp/ia64.$$$$.s +asm/x86_64-gcc.o: asm/x86_64-gcc.c + files: $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO @@ -169,14 +152,14 @@ lint: lint -DLINT $(INCLUDES) $(SRC)>fluff depend: - $(MAKEDEPEND) $(CFLAG) $(INCLUDES) $(DEPFLAG) $(PROGS) $(LIBSRC) + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) dclean: $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new mv -f Makefile.new $(MAKEFILE) clean: - rm -f asm/co86unix.cpp asm/bn86unix.cpp *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s + rm -f asm/co86unix.cpp asm/bn86unix.cpp asm/*-elf.* *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s # DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S index ae56066310..7dfda85566 100644 --- a/src/lib/libcrypto/bn/asm/ia64.S +++ b/src/lib/libcrypto/bn/asm/ia64.S @@ -1,6 +1,6 @@ .explicit .text -.ident "ia64.S, Version 1.1" +.ident "ia64.S, Version 2.0" .ident "IA-64 ISA artwork by Andy Polyakov " // @@ -13,6 +13,35 @@ // disclaimed. // ==================================================================== // +// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is +// different from Itanium to this module viewpoint. Most notably, is it +// "wider" than Itanium? Can you experience loop scalability as +// discussed in commentary sections? Not really:-( Itanium2 has 6 +// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to +// spin twice as fast, as I need 8 IALU ports. Amount of floating point +// ports is the same, i.e. 2, while I need 4. In other words, to this +// module Itanium2 remains effectively as "wide" as Itanium. Yet it's +// essentially different in respect to this module, and a re-tune was +// required. Well, because some intruction latencies has changed. Most +// noticeably those intensively used: +// +// Itanium Itanium2 +// ldf8 9 6 L2 hit +// ld8 2 1 L1 hit +// getf 2 5 +// xma[->getf] 7[+1] 4[+0] +// add[->st8] 1[+1] 1[+0] +// +// What does it mean? You might ratiocinate that the original code +// should run just faster... Because sum of latencies is smaller... +// Wrong! Note that getf latency increased. This means that if a loop is +// scheduled for lower latency (and they are), then it will suffer from +// stall condition and the code will therefore turn anti-scalable, e.g. +// original bn_mul_words spun at 5*n or 2.5 times slower than expected +// on Itanium2! What to do? Reschedule loops for Itanium2? But then +// Itanium would exhibit anti-scalability. So I've chosen to reschedule +// for worst latency for every instruction aiming for best *all-round* +// performance. // Q. How much faster does it get? // A. Here is the output from 'openssl speed rsa dsa' for vanilla @@ -149,12 +178,27 @@ bn_add_words: brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16 } .body -{ .mib; mov r14=r32 // rp +{ .mib; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r14=0,r32 // rp +#else + mov r14=r32 // rp +#endif mov r9=pr };; -{ .mii; mov r15=r33 // ap +{ .mii; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r15=0,r33 // ap +#else + mov r15=r33 // ap +#endif mov ar.lc=r10 mov ar.ec=6 } -{ .mib; mov r16=r34 // bp +{ .mib; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r16=0,r34 // bp +#else + mov r16=r34 // bp +#endif mov pr.rot=1<<16 };; .L_bn_add_words_ctop: @@ -174,7 +218,7 @@ bn_add_words: { .mii; (p59) add r8=1,r8 // return value - mov pr=r9,-1 + mov pr=r9,0x1ffff mov ar.lc=r3 } { .mbb; nop.b 0x0 br.ret.sptk.many b0 };; @@ -202,12 +246,27 @@ bn_sub_words: brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16 } .body -{ .mib; mov r14=r32 // rp +{ .mib; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r14=0,r32 // rp +#else + mov r14=r32 // rp +#endif mov r9=pr };; -{ .mii; mov r15=r33 // ap +{ .mii; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r15=0,r33 // ap +#else + mov r15=r33 // ap +#endif mov ar.lc=r10 mov ar.ec=6 } -{ .mib; mov r16=r34 // bp +{ .mib; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r16=0,r34 // bp +#else + mov r16=r34 // bp +#endif mov pr.rot=1<<16 };; .L_bn_sub_words_ctop: @@ -227,7 +286,7 @@ bn_sub_words: { .mii; (p59) add r8=1,r8 // return value - mov pr=r9,-1 + mov pr=r9,0x1ffff mov ar.lc=r3 } { .mbb; nop.b 0x0 br.ret.sptk.many b0 };; @@ -253,7 +312,7 @@ bn_mul_words: #ifdef XMA_TEMPTATION { .mfi; alloc r2=ar.pfs,4,0,0,0 };; #else -{ .mfi; alloc r2=ar.pfs,4,4,0,8 };; +{ .mfi; alloc r2=ar.pfs,4,12,0,16 };; #endif { .mib; mov r8=r0 // return value cmp4.le p6,p0=r34,r0 @@ -266,24 +325,30 @@ bn_mul_words: .body { .mib; setf.sig f8=r35 // w - mov pr.rot=0x400001<<16 - // ------^----- serves as (p48) at first (p26) + mov pr.rot=0x800001<<16 + // ------^----- serves as (p50) at first (p27) brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16 } #ifndef XMA_TEMPTATION -{ .mii; mov r14=r32 // rp - mov r15=r33 // ap +{ .mii; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r14=0,r32 // rp + addp4 r15=0,r33 // ap +#else + mov r14=r32 // rp + mov r15=r33 // ap +#endif mov ar.lc=r10 } -{ .mii; mov r39=0 // serves as r33 at first (p26) - mov ar.ec=12 };; +{ .mii; mov r40=0 // serves as r35 at first (p27) + mov ar.ec=13 };; -// This loop spins in 2*(n+11) ticks. It's scheduled for data in L2 -// cache (i.e. 9 ticks away) as floating point load/store instructions +// This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium +// L2 cache (i.e. 9 ticks away) as floating point load/store instructions // bypass L1 cache and L2 latency is actually best-case scenario for -// ldf8. The loop is not scalable and shall run in 2*(n+11) even on -// "wider" IA-64 implementations. It's a trade-off here. n+22 loop +// ldf8. The loop is not scalable and shall run in 2*(n+12) even on +// "wider" IA-64 implementations. It's a trade-off here. n+24 loop // would give us ~5% in *overall* performance improvement on "wider" // IA-64, but would hurt Itanium for about same because of longer // epilogue. As it's a matter of few percents in either case I've @@ -291,25 +356,25 @@ bn_mul_words: // this very instruction sequence in bn_mul_add_words loop which in // turn is scalable). .L_bn_mul_words_ctop: -{ .mfi; (p25) getf.sig r36=f49 // low - (p21) xmpy.lu f45=f37,f8 - (p27) cmp.ltu p52,p48=r39,r38 } +{ .mfi; (p25) getf.sig r36=f52 // low + (p21) xmpy.lu f48=f37,f8 + (p28) cmp.ltu p54,p50=r41,r39 } { .mfi; (p16) ldf8 f32=[r15],8 - (p21) xmpy.hu f38=f37,f8 + (p21) xmpy.hu f40=f37,f8 (p0) nop.i 0x0 };; -{ .mii; (p26) getf.sig r32=f43 // high - .pred.rel "mutex",p48,p52 - (p48) add r38=r37,r33 // (p26) - (p52) add r38=r37,r33,1 } // (p26) -{ .mfb; (p27) st8 [r14]=r39,8 +{ .mii; (p25) getf.sig r32=f44 // high + .pred.rel "mutex",p50,p54 + (p50) add r40=r38,r35 // (p27) + (p54) add r40=r38,r35,1 } // (p27) +{ .mfb; (p28) st8 [r14]=r41,8 (p0) nop.f 0x0 br.ctop.sptk .L_bn_mul_words_ctop };; .L_bn_mul_words_cend: { .mii; nop.m 0x0 -.pred.rel "mutex",p49,p53 -(p49) add r8=r34,r0 -(p53) add r8=r34,r0,1 } +.pred.rel "mutex",p51,p55 +(p51) add r8=r36,r0 +(p55) add r8=r36,r0,1 } { .mfb; nop.m 0x0 nop.f 0x0 nop.b 0x0 } @@ -344,7 +409,7 @@ bn_mul_words: #endif // XMA_TEMPTATION { .mii; nop.m 0x0 - mov pr=r9,-1 + mov pr=r9,0x1ffff mov ar.lc=r3 } { .mfb; rum 1<<5 // clear um.mfh nop.f 0x0 @@ -376,59 +441,69 @@ bn_mul_add_words: .body { .mib; setf.sig f8=r35 // w - mov pr.rot=0x400001<<16 - // ------^----- serves as (p48) at first (p26) + mov pr.rot=0x800001<<16 + // ------^----- serves as (p50) at first (p27) brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16 } -{ .mii; mov r14=r32 // rp - mov r15=r33 // ap +{ .mii; +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r14=0,r32 // rp + addp4 r15=0,r33 // ap +#else + mov r14=r32 // rp + mov r15=r33 // ap +#endif mov ar.lc=r10 } -{ .mii; mov r39=0 // serves as r33 at first (p26) - mov r18=r32 // rp copy - mov ar.ec=14 };; +{ .mii; mov r40=0 // serves as r35 at first (p27) +#if defined(_HPUX_SOURCE) && defined(_ILP32) + addp4 r18=0,r32 // rp copy +#else + mov r18=r32 // rp copy +#endif + mov ar.ec=15 };; -// This loop spins in 3*(n+13) ticks on Itanium and should spin in -// 2*(n+13) on "wider" IA-64 implementations (to be verified with new +// This loop spins in 3*(n+14) ticks on Itanium and should spin in +// 2*(n+14) on "wider" IA-64 implementations (to be verified with new // µ-architecture manuals as they become available). As usual it's // possible to compress the epilogue, down to 10 in this case, at the // cost of scalability. Compressed (and therefore non-scalable) loop -// running at 3*(n+10) would buy you ~10% on Itanium but take ~35% +// running at 3*(n+11) would buy you ~10% on Itanium but take ~35% // from "wider" IA-64 so let it be scalable! Special attention was // paid for having the loop body split at 64-byte boundary. ld8 is // scheduled for L1 cache as the data is more than likely there. // Indeed, bn_mul_words has put it there a moment ago:-) .L_bn_mul_add_words_ctop: -{ .mfi; (p25) getf.sig r36=f49 // low - (p21) xmpy.lu f45=f37,f8 - (p27) cmp.ltu p52,p48=r39,r38 } +{ .mfi; (p25) getf.sig r36=f52 // low + (p21) xmpy.lu f48=f37,f8 + (p28) cmp.ltu p54,p50=r41,r39 } { .mfi; (p16) ldf8 f32=[r15],8 - (p21) xmpy.hu f38=f37,f8 - (p27) add r43=r43,r39 };; -{ .mii; (p26) getf.sig r32=f43 // high - .pred.rel "mutex",p48,p52 - (p48) add r38=r37,r33 // (p26) - (p52) add r38=r37,r33,1 } // (p26) -{ .mfb; (p27) cmp.ltu.unc p56,p0=r43,r39 + (p21) xmpy.hu f40=f37,f8 + (p28) add r45=r45,r41 };; +{ .mii; (p25) getf.sig r32=f44 // high + .pred.rel "mutex",p50,p54 + (p50) add r40=r38,r35 // (p27) + (p54) add r40=r38,r35,1 } // (p27) +{ .mfb; (p28) cmp.ltu.unc p60,p0=r45,r41 (p0) nop.f 0x0 (p0) nop.b 0x0 } -{ .mii; (p26) ld8 r42=[r18],8 - (p58) cmp.eq.or p57,p0=-1,r44 - (p58) add r44=1,r44 } -{ .mfb; (p29) st8 [r14]=r45,8 +{ .mii; (p27) ld8 r44=[r18],8 + (p62) cmp.eq.or p61,p0=-1,r46 + (p62) add r46=1,r46 } +{ .mfb; (p30) st8 [r14]=r47,8 (p0) nop.f 0x0 br.ctop.sptk .L_bn_mul_add_words_ctop};; .L_bn_mul_add_words_cend: { .mii; nop.m 0x0 -.pred.rel "mutex",p51,p55 -(p51) add r8=r36,r0 -(p55) add r8=r36,r0,1 } +.pred.rel "mutex",p53,p57 +(p53) add r8=r38,r0 +(p57) add r8=r38,r0,1 } { .mfb; nop.m 0x0 nop.f 0x0 nop.b 0x0 };; { .mii; -(p59) add r8=1,r8 - mov pr=r9,-1 +(p63) add r8=1,r8 + mov pr=r9,0x1ffff mov ar.lc=r3 } { .mfb; rum 1<<5 // clear um.mfh nop.f 0x0 @@ -461,6 +536,10 @@ bn_sqr_words: mov r9=pr };; .body +#if defined(_HPUX_SOURCE) && defined(_ILP32) +{ .mii; addp4 r32=0,r32 + addp4 r33=0,r33 };; +#endif { .mib; mov pr.rot=1<<16 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16 @@ -492,7 +571,7 @@ bn_sqr_words: .L_bn_sqr_words_cend: { .mii; nop.m 0x0 - mov pr=r9,-1 + mov pr=r9,0x1ffff mov ar.lc=r3 } { .mfb; rum 1<<5 // clear um.mfh nop.f 0x0 @@ -526,7 +605,14 @@ bn_sqr_comba8: .prologue .fframe 0 .save ar.pfs,r2 +#if defined(_HPUX_SOURCE) && defined(_ILP32) { .mii; alloc r2=ar.pfs,2,1,0,0 + addp4 r33=0,r33 + addp4 r32=0,r32 };; +{ .mii; +#else +{ .mii; alloc r2=ar.pfs,2,1,0,0 +#endif mov r34=r33 add r14=8,r33 };; .body @@ -587,7 +673,14 @@ bn_mul_comba8: .prologue .fframe 0 .save ar.pfs,r2 +#if defined(_HPUX_SOURCE) && defined(_ILP32) { .mii; alloc r2=ar.pfs,3,0,0,0 + addp4 r33=0,r33 + addp4 r34=0,r34 };; +{ .mii; addp4 r32=0,r32 +#else +{ .mii; alloc r2=ar.pfs,3,0,0,0 +#endif add r14=8,r33 add r17=8,r34 } .body @@ -1138,7 +1231,14 @@ bn_sqr_comba4: .prologue .fframe 0 .save ar.pfs,r2 +#if defined(_HPUX_SOURCE) && defined(_ILP32) +{ .mii; alloc r2=ar.pfs,2,1,0,0 + addp4 r32=0,r32 + addp4 r33=0,r33 };; +{ .mii; +#else { .mii; alloc r2=ar.pfs,2,1,0,0 +#endif mov r34=r33 add r14=8,r33 };; .body @@ -1164,7 +1264,14 @@ bn_mul_comba4: .prologue .fframe 0 .save ar.pfs,r2 +#if defined(_HPUX_SOURCE) && defined(_ILP32) +{ .mii; alloc r2=ar.pfs,3,0,0,0 + addp4 r33=0,r33 + addp4 r34=0,r34 };; +{ .mii; addp4 r32=0,r32 +#else { .mii; alloc r2=ar.pfs,3,0,0,0 +#endif add r14=8,r33 add r17=8,r34 } .body @@ -1464,7 +1571,7 @@ bn_div_words: or r8=r8,r33 mov ar.pfs=r2 };; { .mii; shr.u r9=H,I // remainder if anybody wants it - mov pr=r10,-1 } + mov pr=r10,0x1ffff } { .mfb; br.ret.sptk.many b0 };; // Unsigned 64 by 32 (well, by 64 for the moment) bit integer division diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s index af9730d062..f3b16290eb 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2.s @@ -747,8 +747,8 @@ bn_div_words .PROC .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN .IMPORT BN_num_bits_word,CODE - .IMPORT __iob,DATA - .IMPORT fprintf,CODE + ;--- not PIC .IMPORT __iob,DATA + ;--- not PIC .IMPORT fprintf,CODE .IMPORT abort,CODE .IMPORT $$div2U,MILLICODE .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE @@ -844,12 +844,12 @@ $0006001A MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c $D2 - ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 - LDIL LR'C$7,%r21 ;offset 0xa24 - LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 - .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; - B,L fprintf,%r2 ;offset 0xa2c - LDO RR'C$7(%r21),%r25 ;offset 0xa30 + ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 + ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 + ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 + ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; + ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c + ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 .CALL ; B,L abort,%r2 ;offset 0xa34 NOP ;offset 0xa38 @@ -1605,14 +1605,14 @@ bn_mul_comba4 .PROCEND - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SPACE $PRIVATE$,SORT=16 - .IMPORT $global$,DATA - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SUBSPA $LIT$,ACCESS=0x2c -C$7 - .ALIGN 8 - .STRINGZ "Division would overflow (%d)\n" +;--- not PIC .SPACE $TEXT$ +;--- not PIC .SUBSPA $CODE$ +;--- not PIC .SPACE $PRIVATE$,SORT=16 +;--- not PIC .IMPORT $global$,DATA +;--- not PIC .SPACE $TEXT$ +;--- not PIC .SUBSPA $CODE$ +;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c +;--- not PIC C$7 +;--- not PIC .ALIGN 8 +;--- not PIC .STRINGZ "Division would overflow (%d)\n" .END diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar index 465f2774b6..aefab15cdb 100644 --- a/src/lib/libcrypto/bn/asm/vms.mar +++ b/src/lib/libcrypto/bn/asm/vms.mar @@ -1,4 +1,4 @@ - .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64 + .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64 ; ; w.j.m. 15-jan-1999 ; @@ -59,7 +59,7 @@ w=16 ;(AP) w by value (input) movl r6,r0 ; return c ret - .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64 + .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64 ; ; w.j.m. 15-jan-1999 ; @@ -172,147 +172,175 @@ n=12 ;(AP) n by value (input) ; } ; ; Using EDIV would be very easy, if it didn't do signed calculations. -; Therefore, som extra things have to happen around it. The way to -; handle that is to shift all operands right one step (basically dividing -; them by 2) and handle the different cases depending on what the lowest -; bit of each operand was. +; Any time any of the input numbers are signed, there are problems, +; usually with integer overflow, at which point it returns useless +; data (the quotient gets the value of l, and the remainder becomes 0). ; -; To start with, let's define the following: +; If it was just for the dividend, it would be very easy, just divide +; it by 2 (unsigned), do the division, multiply the resulting quotient +; and remainder by 2, add the bit that was dropped when dividing by 2 +; to the remainder, and do some adjustment so the remainder doesn't +; end up larger than the divisor. For some cases when the divisor is +; negative (from EDIV's point of view, i.e. when the highest bit is set), +; dividing the dividend by 2 isn't enough, and since some operations +; might generate integer overflows even when the dividend is divided by +; 4 (when the high part of the shifted down dividend ends up being exactly +; half of the divisor, the result is the quotient 0x80000000, which is +; negative...) it needs to be divided by 8. Furthermore, the divisor needs +; to be divided by 2 (unsigned) as well, to avoid more problems with the sign. +; In this case, a little extra fiddling with the remainder is required. ; -; a' = l & 1 -; a2 = >> 1 # UNSIGNED shift! -; b' = d & 1 -; b2 = d >> 1 # UNSIGNED shift! +; So, the simplest way to handle this is always to divide the dividend +; by 8, and to divide the divisor by 2 if it's highest bit is set. +; After EDIV has been used, the quotient gets multiplied by 8 if the +; original divisor was positive, otherwise 4. The remainder, oddly +; enough, is *always* multiplied by 8. +; NOTE: in the case mentioned above, where the high part of the shifted +; down dividend ends up being exactly half the shifted down divisor, we +; end up with a 33 bit quotient. That's no problem however, it usually +; means we have ended up with a too large remainder as well, and the +; problem is fixed by the last part of the algorithm (next paragraph). ; -; Now, use EDIV to calculate a quotient and a remainder: +; The routine ends with comparing the resulting remainder with the +; original divisor and if the remainder is larger, subtract the +; original divisor from it, and increase the quotient by 1. This is +; done until the remainder is smaller than the divisor. ; -; q'' = a2/b2 -; r'' = a2 - q''*b2 +; The complete algorithm looks like this: ; -; If b' is 0, the quotient is already correct, we just need to adjust the -; remainder: +; d' = d +; l' = l & 7 +; [h,l] = [h,l] >> 3 +; [q,r] = floor([h,l] / d) # This is the EDIV operation +; if (q < 0) q = -q # I doubt this is necessary any more ; -; if (b' == 0) -; { -; r = 2*r'' + a' -; q = q'' -; } -; -; If b' is 1, we need to do other adjustements. The first thought is the -; following (note that r' will not always have the right value, but an -; adjustement follows further down): -; -; if (b' == 1) -; { -; q' = q'' -; r' = a - q'*b -; -; However, one can note the folowing relationship: -; -; r'' = a2 - q''*b2 -; => 2*r'' = 2*a2 - 2*q''*b2 -; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1, -; q' = q'' } -; = a - a' - q'*(b - 1) -; = a - q'*b - a' + q' -; = r' - a' + q' -; => r' = 2*r'' - q' + a' +; r' = r >> 29 +; if (d' >= 0) +; q' = q >> 29 +; q = q << 3 +; else +; q' = q >> 30 +; q = q << 2 +; r = (r << 3) + l' ; -; This enables us to use r'' instead of discarding and calculating another -; modulo: -; -; if (b' == 1) +; if (d' < 0) ; { -; q' = q'' -; r' = (r'' << 1) - q' + a' -; -; Now, all we have to do is adjust r', because it might be < 0: -; -; while (r' < 0) +; [r',r] = [r',r] - q +; while ([r',r] < 0) ; { -; r' = r' + b -; q' = q' - 1 +; [r',r] = [r',r] + d +; [q',q] = [q',q] - 1 ; } ; } ; -; return q' +; while ([r',r] >= d') +; { +; [r',r] = [r',r] - d' +; [q',q] = [q',q] + 1 +; } +; +; return q h=4 ;(AP) h by value (input) l=8 ;(AP) l by value (input) d=12 ;(AP) d by value (input) -;aprim=r5 -;a2=r6 -;a20=r6 -;a21=r7 -;bprim=r8 -;b2=r9 -;qprim=r10 ; initially used as q'' -;rprim=r11 ; initially used as r'' - +;r2 = l, q +;r3 = h, r +;r4 = d +;r5 = l' +;r6 = r' +;r7 = d' +;r8 = q' .psect code,nowrt -.entry bn_div_words,^m +.entry bn_div_words,^m movl l(ap),r2 movl h(ap),r3 movl d(ap),r4 - movl #0,r5 - movl #0,r8 - movl #0,r0 -; movl #0,r1 + bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7 + bicl3 #^X00000007,r2,r2 - rotl #-1,r2,r6 ; a20 = l >> 1 (almost) - rotl #-1,r3,r7 ; a21 = h >> 1 (almost) - rotl #-1,r4,r9 ; b2 = d >> 1 (almost) + bicl3 #^XFFFFFFF8,r3,r6 + bicl3 #^X00000007,r3,r3 + + addl r6,r2 - tstl r6 - bgeq 1$ - xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0 - incl r5 ; a' = 1 -1$: - tstl r7 - bgeq 2$ - xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1, - ; since that's what was lowest in a21 - xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1 -2$: - tstl r9 + rotl #-3,r2,r2 ; l = l >> 3 + rotl #-3,r3,r3 ; h = h >> 3 + + movl r4,r7 ; d' = d + + movl #0,r6 ; r' = 0 + movl #0,r8 ; q' = 0 + + tstl r4 beql 666$ ; Uh-oh, the divisor is 0... - bgtr 3$ - xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0 - incl r8 ; b' = 1 -3$: - tstl r9 - bneq 4$ ; if b2 is 0, we know that b' is 1 - tstl r3 - bneq 666$ ; if higher half isn't 0, we overflow - movl r2,r10 ; otherwise, we have our result - brb 42$ ; This is a success, really. -4$: - ediv r9,r6,r10,r11 - - tstl r8 - bneq 5$ ; If b' != 0, go to the other part -; addl3 r11,r11,r1 -; addl2 r5,r1 - brb 42$ -5$: - ashl #1,r11,r11 - subl2 r10,r11 - addl2 r5,r11 - bgeq 7$ -6$: - decl r10 - addl2 r4,r11 - blss 6$ -7$: -; movl r11,r1 + bgtr 1$ + rotl #-1,r4,r4 ; If d is negative, shift it right. + bicl2 #^X80000000,r4 ; Since d is then a large number, the + ; lowest bit is insignificant + ; (contradict that, and I'll fix the problem!) +1$: + ediv r4,r2,r2,r3 ; Do the actual division + + tstl r2 + bgeq 3$ + mnegl r2,r2 ; if q < 0, negate it +3$: + tstl r7 + blss 4$ + rotl #3,r2,r2 ; q = q << 3 + bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q + bicl3 #^X00000007,r2,r2 + bsb 41$ +4$: ; else + rotl #2,r2,r2 ; q = q << 2 + bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q + bicl3 #^X00000003,r2,r2 +41$: + rotl #3,r3,r3 ; r = r << 3 + bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r + bicl3 #^X00000007,r3,r3 + addl r5,r3 ; r = r + l' + + tstl r7 + bgeq 5$ + bitl #1,r7 + beql 5$ ; if d' < 0 && d' & 1 + subl r2,r3 ; [r',r] = [r',r] - [q',q] + sbwc r8,r6 +45$: + bgeq 5$ ; while r < 0 + decl r2 ; [q',q] = [q',q] - 1 + sbwc #0,r8 + addl r7,r3 ; [r',r] = [r',r] + d' + adwc #0,r6 + brb 45$ + +; The return points are placed in the middle to keep a short distance from +; all the branch points 42$: - movl r10,r0 +; movl r3,r1 + movl r2,r0 + ret 666$: + movl #^XFFFFFFFF,r0 ret + +5$: + tstl r6 + bneq 6$ + cmpl r3,r7 + blssu 42$ ; while [r',r] >= d' +6$: + subl r7,r3 ; [r',r] = [r',r] - d' + sbwc #0,r6 + incl r2 ; [q',q] = [q',q] + 1 + adwc #0,r8 + brb 5$ .title vax_bn_add_words unsigned add of two arrays ; diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index b40682f831..3da6d8ced9 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h @@ -248,6 +248,8 @@ typedef struct bn_blinding_st BIGNUM *A; BIGNUM *Ai; BIGNUM *mod; /* just a reference */ + unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; + * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ } BN_BLINDING; /* Used for montgomery multiplication */ diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index f9a095e3b3..580d1201bc 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c @@ -150,6 +150,20 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, q; \ }) # define REMAINDER_IS_ALREADY_CALCULATED +# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) + /* + * Same story here, but it's 128-bit by 64-bit division. Wow! + * + */ +# define bn_div_words(n0,n1,d0) \ + ({ asm volatile ( \ + "divq %4" \ + : "=a"(q), "=d"(rem) \ + : "a"(n1), "d"(n0), "g"(d0) \ + : "cc"); \ + q; \ + }) +# define REMAINDER_IS_ALREADY_CALCULATED # endif /* __ */ # endif /* __GNUC__ */ #endif /* OPENSSL_NO_ASM */ @@ -268,6 +282,11 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, q=(BN_ULONG)(((((BN_ULLONG)n0)< 0x%08X\n", + n0, n1, d0, q); +#endif #endif #ifndef REMAINDER_IS_ALREADY_CALCULATED @@ -292,11 +311,18 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_ULONG t2l,t2h,ql,qh; q=bn_div_words(n0,n1,d0); +#ifdef BN_DEBUG_LEVITTE + fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ +X) -> 0x%08X\n", + n0, n1, d0, q); +#endif #ifndef REMAINDER_IS_ALREADY_CALCULATED rem=(n1-q*d0)&BN_MASK2; #endif -#ifdef BN_UMULT_HIGH +#if defined(BN_UMULT_LOHI) + BN_UMULT_LOHI(t2l,t2h,d1,q); +#elif defined(BN_UMULT_HIGH) t2l = d1 * q; t2h = BN_UMULT_HIGH(d1,q); #else diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 8a4dba375a..5614bc6164 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h @@ -230,6 +230,21 @@ struct bignum_ctx : "r"(a), "r"(b)); \ ret; }) # endif /* compiler */ +# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) +# if defined(__GNUC__) +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret,discard; \ + asm ("mulq %3" \ + : "=a"(discard),"=d"(ret) \ + : "a"(a), "g"(b) \ + : "cc"); \ + ret; }) +# define BN_UMULT_LOHI(low,high,a,b) \ + asm ("mulq %3" \ + : "=a"(low),"=d"(high) \ + : "a"(a),"g"(b) \ + : "cc"); +# endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ @@ -337,7 +352,7 @@ struct bignum_ctx #define LBITS(a) ((a)&BN_MASK2l) #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) -#define L2HBITS(a) ((BN_ULONG)((a)&BN_MASK2l)<>BN_BITS2)&BN_MASKl) @@ -353,7 +368,7 @@ struct bignum_ctx lt=(bl)*(lt); \ m1=(bl)*(ht); \ ht =(bh)*(ht); \ - m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS(1L); \ + m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ ht+=HBITS(m); \ m1=L2HBITS(m); \ lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ @@ -418,20 +433,19 @@ void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a); int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n); int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); +#if 0 +/* bn_mul.c rollback */ void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, int dna,int dnb,BN_ULONG *t); void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n,int tna,int tnb,BN_ULONG *t); +#endif void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t); void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, BN_ULONG *t); void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, BN_ULONG *t); -BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl); -BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl); #ifdef __cplusplus } diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index ce2ae78419..463463cfcb 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c @@ -263,12 +263,12 @@ void BN_clear_free(BIGNUM *a) if (a == NULL) return; if (a->d != NULL) { - memset(a->d,0,a->dmax*sizeof(a->d[0])); + OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0])); if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) OPENSSL_free(a->d); } i=BN_get_flags(a,BN_FLG_MALLOCED); - memset(a,0,sizeof(BIGNUM)); + OPENSSL_cleanse(a,sizeof(BIGNUM)); if (i) OPENSSL_free(a); } diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b03458d002..cb93ac3356 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c @@ -56,325 +56,10 @@ * [including the GNU Public Licence.] */ -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - #include -#include #include "cryptlib.h" #include "bn_lcl.h" -#if defined(OPENSSL_NO_ASM) || !(defined(__i386) || defined(__i386__)) || defined(__DJGPP__) /* Assembler implementation exists only for x86 */ -/* Here follows specialised variants of bn_add_words() and - bn_sub_words(). They have the property performing operations on - arrays of different sizes. The sizes of those arrays is expressed through - cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, - which is the delta between the two lengths, calculated as len(a)-len(b). - All lengths are the number of BN_ULONGs... For the operations that require - a result array as parameter, it must have the length cl+abs(dl). - These functions should probably end up in bn_asm.c as soon as there are - assembler counterparts for the systems that use assembler files. */ - -BN_ULONG bn_sub_part_words(BN_ULONG *r, - const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl) - { - BN_ULONG c, t; - - assert(cl >= 0); - c = bn_sub_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); -#endif - for (;;) - { - t = b[0]; - r[0] = (0-t-c)&BN_MASK2; - if (t != 0) c=1; - if (++dl >= 0) break; - - t = b[1]; - r[1] = (0-t-c)&BN_MASK2; - if (t != 0) c=1; - if (++dl >= 0) break; - - t = b[2]; - r[2] = (0-t-c)&BN_MASK2; - if (t != 0) c=1; - if (++dl >= 0) break; - - t = b[3]; - r[3] = (0-t-c)&BN_MASK2; - if (t != 0) c=1; - if (++dl >= 0) break; - - b += 4; - r += 4; - } - } - else - { - int save_dl = dl; -#ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c); -#endif - while(c) - { - t = a[0]; - r[0] = (t-c)&BN_MASK2; - if (t != 0) c=0; - if (--dl <= 0) break; - - t = a[1]; - r[1] = (t-c)&BN_MASK2; - if (t != 0) c=0; - if (--dl <= 0) break; - - t = a[2]; - r[2] = (t-c)&BN_MASK2; - if (t != 0) c=0; - if (--dl <= 0) break; - - t = a[3]; - r[3] = (t-c)&BN_MASK2; - if (t != 0) c=0; - if (--dl <= 0) break; - - save_dl = dl; - a += 4; - r += 4; - } - if (dl > 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); -#endif - if (save_dl > dl) - { - switch (save_dl - dl) - { - case 1: - r[1] = a[1]; - if (--dl <= 0) break; - case 2: - r[2] = a[2]; - if (--dl <= 0) break; - case 3: - r[3] = a[3]; - if (--dl <= 0) break; - } - a += 4; - r += 4; - } - } - if (dl > 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl); -#endif - for(;;) - { - r[0] = a[0]; - if (--dl <= 0) break; - r[1] = a[1]; - if (--dl <= 0) break; - r[2] = a[2]; - if (--dl <= 0) break; - r[3] = a[3]; - if (--dl <= 0) break; - - a += 4; - r += 4; - } - } - } - return c; - } -#endif - -BN_ULONG bn_add_part_words(BN_ULONG *r, - const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl) - { - BN_ULONG c, l, t; - - assert(cl >= 0); - c = bn_add_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) - { - int save_dl = dl; -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); -#endif - while (c) - { - l=(c+b[0])&BN_MASK2; - c=(l < c); - r[0]=l; - if (++dl >= 0) break; - - l=(c+b[1])&BN_MASK2; - c=(l < c); - r[1]=l; - if (++dl >= 0) break; - - l=(c+b[2])&BN_MASK2; - c=(l < c); - r[2]=l; - if (++dl >= 0) break; - - l=(c+b[3])&BN_MASK2; - c=(l < c); - r[3]=l; - if (++dl >= 0) break; - - save_dl = dl; - b+=4; - r+=4; - } - if (dl < 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl); -#endif - if (save_dl < dl) - { - switch (dl - save_dl) - { - case 1: - r[1] = b[1]; - if (++dl >= 0) break; - case 2: - r[2] = b[2]; - if (++dl >= 0) break; - case 3: - r[3] = b[3]; - if (++dl >= 0) break; - } - b += 4; - r += 4; - } - } - if (dl < 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl); -#endif - for(;;) - { - r[0] = b[0]; - if (++dl >= 0) break; - r[1] = b[1]; - if (++dl >= 0) break; - r[2] = b[2]; - if (++dl >= 0) break; - r[3] = b[3]; - if (++dl >= 0) break; - - b += 4; - r += 4; - } - } - } - else - { - int save_dl = dl; -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); -#endif - while (c) - { - t=(a[0]+c)&BN_MASK2; - c=(t < c); - r[0]=t; - if (--dl <= 0) break; - - t=(a[1]+c)&BN_MASK2; - c=(t < c); - r[1]=t; - if (--dl <= 0) break; - - t=(a[2]+c)&BN_MASK2; - c=(t < c); - r[2]=t; - if (--dl <= 0) break; - - t=(a[3]+c)&BN_MASK2; - c=(t < c); - r[3]=t; - if (--dl <= 0) break; - - save_dl = dl; - a+=4; - r+=4; - } -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); -#endif - if (dl > 0) - { - if (save_dl > dl) - { - switch (save_dl - dl) - { - case 1: - r[1] = a[1]; - if (--dl <= 0) break; - case 2: - r[2] = a[2]; - if (--dl <= 0) break; - case 3: - r[3] = a[3]; - if (--dl <= 0) break; - } - a += 4; - r += 4; - } - } - if (dl > 0) - { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl); -#endif - for(;;) - { - r[0] = a[0]; - if (--dl <= 0) break; - r[1] = a[1]; - if (--dl <= 0) break; - r[2] = a[2]; - if (--dl <= 0) break; - r[3] = a[3]; - if (--dl <= 0) break; - - a += 4; - r += 4; - } - } - } - return c; - } - #ifdef BN_RECURSION /* Karatsuba recursive multiplication algorithm * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ @@ -390,15 +75,14 @@ BN_ULONG bn_add_part_words(BN_ULONG *r, * a[1]*b[1] */ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, - int dna, int dnb, BN_ULONG *t) + BN_ULONG *t) { int n=n2/2,c1,c2; - int tna=n+dna, tnb=n+dnb; unsigned int neg,zero; BN_ULONG ln,lo,*p; # ifdef BN_COUNT - fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2); + printf(" bn_mul_recursive %d * %d\n",n2,n2); # endif # ifdef BN_MUL_COMBA # if 0 @@ -408,40 +92,34 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, return; } # endif - /* Only call bn_mul_comba 8 if n2 == 8 and the - * two arrays are complete [steve] - */ - if (n2 == 8 && dna == 0 && dnb == 0) + if (n2 == 8) { bn_mul_comba8(r,a,b); return; } # endif /* BN_MUL_COMBA */ - /* Else do normal multiply */ if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { - bn_mul_normal(r,a,n2+dna,b,n2+dnb); - if ((dna + dnb) < 0) - memset(&r[2*n2 + dna + dnb], 0, - sizeof(BN_ULONG) * -(dna + dnb)); + /* This should not happen */ + bn_mul_normal(r,a,n2,b,n2); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); - c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); + c1=bn_cmp_words(a,&(a[n]),n); + c2=bn_cmp_words(&(b[n]),b,n); zero=neg=0; switch (c1*3+c2) { case -4: - bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ - bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + bn_sub_words(t, &(a[n]),a, n); /* - */ + bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ break; case -3: zero=1; break; case -2: - bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ - bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ + bn_sub_words(t, &(a[n]),a, n); /* - */ + bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ neg=1; break; case -1: @@ -450,22 +128,21 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, zero=1; break; case 2: - bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ - bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + bn_sub_words(t, a, &(a[n]),n); /* + */ + bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ neg=1; break; case 3: zero=1; break; case 4: - bn_sub_part_words(t, a, &(a[n]),tna,n-tna); - bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); + bn_sub_words(t, a, &(a[n]),n); + bn_sub_words(&(t[n]),&(b[n]),b, n); break; } # ifdef BN_MUL_COMBA - if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take - extra args to do this well */ + if (n == 4) { if (!zero) bn_mul_comba4(&(t[n2]),t,&(t[n])); @@ -475,9 +152,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, bn_mul_comba4(r,a,b); bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); } - else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could - take extra args to do this - well */ + else if (n == 8) { if (!zero) bn_mul_comba8(&(t[n2]),t,&(t[n])); @@ -492,11 +167,11 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, { p= &(t[n2*2]); if (!zero) - bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); else memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); - bn_mul_recursive(r,a,b,n,0,0,p); - bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); + bn_mul_recursive(r,a,b,n,p); + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); } /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign @@ -545,39 +220,39 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, /* n+tn is the word length * t needs to be n*4 is size, as does r */ -void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, - int tna, int tnb, BN_ULONG *t) +void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, + int n, BN_ULONG *t) { int i,j,n2=n*2; unsigned int c1,c2,neg,zero; BN_ULONG ln,lo,*p; # ifdef BN_COUNT - fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n", - tna, n, tnb, n); + printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); # endif if (n < 8) { - bn_mul_normal(r,a,n+tna,b,n+tnb); + i=tn+n; + bn_mul_normal(r,a,i,b,i); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ - c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); - c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); + c1=bn_cmp_words(a,&(a[n]),n); + c2=bn_cmp_words(&(b[n]),b,n); zero=neg=0; switch (c1*3+c2) { case -4: - bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ - bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + bn_sub_words(t, &(a[n]),a, n); /* - */ + bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ break; case -3: zero=1; /* break; */ case -2: - bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ - bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ + bn_sub_words(t, &(a[n]),a, n); /* - */ + bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ neg=1; break; case -1: @@ -586,16 +261,16 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, zero=1; /* break; */ case 2: - bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ - bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + bn_sub_words(t, a, &(a[n]),n); /* + */ + bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ neg=1; break; case 3: zero=1; /* break; */ case 4: - bn_sub_part_words(t, a, &(a[n]),tna,n-tna); - bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); + bn_sub_words(t, a, &(a[n]),n); + bn_sub_words(&(t[n]),&(b[n]),b, n); break; } /* The zero case isn't yet implemented here. The speedup @@ -614,59 +289,54 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, { bn_mul_comba8(&(t[n2]),t,&(t[n])); bn_mul_comba8(r,a,b); - bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); - memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); + bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); + memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); } else { p= &(t[n2*2]); - bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); - bn_mul_recursive(r,a,b,n,0,0,p); + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); + bn_mul_recursive(r,a,b,n,p); i=n/2; /* If there is only a bottom half to the number, * just do it */ - if (tna > tnb) - j = tna - i; - else - j = tnb - i; + j=tn-i; if (j == 0) { - bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), - i,tna-i,tnb-i,p); + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); } else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ { bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), - i,tna-i,tnb-i,p); - memset(&(r[n2+tna+tnb]),0, - sizeof(BN_ULONG)*(n2-tna-tnb)); + j,i,p); + memset(&(r[n2+tn*2]),0, + sizeof(BN_ULONG)*(n2-tn*2)); } else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ { memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); - if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL - && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) + if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL) { - bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); + bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); } else { for (;;) { i/=2; - if (i < tna && i < tnb) + if (i < tn) { bn_mul_part_recursive(&(r[n2]), &(a[n]),&(b[n]), - i,tna-i,tnb-i,p); + tn-i,i,p); break; } - else if (i <= tna && i <= tnb) + else if (i == tn) { bn_mul_recursive(&(r[n2]), &(a[n]),&(b[n]), - i,tna-i,tnb-i,p); + i,p); break; } } @@ -727,10 +397,10 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int n=n2/2; # ifdef BN_COUNT - fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2); + printf(" bn_mul_low_recursive %d * %d\n",n2,n2); # endif - bn_mul_recursive(r,a,b,n,0,0,&(t[0])); + bn_mul_recursive(r,a,b,n,&(t[0])); if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); @@ -761,7 +431,7 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, BN_ULONG ll,lc,*lp,*mp; # ifdef BN_COUNT - fprintf(stderr," bn_mul_high %d * %d\n",n2,n2); + printf(" bn_mul_high %d * %d\n",n2,n2); # endif n=n2/2; @@ -814,8 +484,8 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, else # endif { - bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2])); - bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2])); + bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); + bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); } /* s0 == low(al*bl) @@ -940,19 +610,19 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { - int ret=0; int top,al,bl; BIGNUM *rr; + int ret = 0; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) int i; #endif #ifdef BN_RECURSION - BIGNUM *t=NULL; - int j=0,k; + BIGNUM *t; + int j,k; #endif #ifdef BN_COUNT - fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top); + printf("BN_mul %d * %d\n",a->top,b->top); #endif bn_check_top(a); @@ -1005,55 +675,21 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) #ifdef BN_RECURSION if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { - if (i >= -1 && i <= 1) + if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bldmax) { - int sav_j =0; - /* Find out the power of two lower or equal - to the longest of the two numbers */ - if (i >= 0) - { - j = BN_num_bits_word((BN_ULONG)al); - } - if (i == -1) - { - j = BN_num_bits_word((BN_ULONG)bl); - } - sav_j = j; - j = 1<<(j-1); - assert(j <= al || j <= bl); - k = j+j; - t = BN_CTX_get(ctx); - if (al > j || bl > j) - { - bn_wexpand(t,k*4); - bn_wexpand(rr,k*4); - bn_mul_part_recursive(rr->d,a->d,b->d, - j,al-j,bl-j,t->d); - } - else /* al <= j || bl <= j */ - { - bn_wexpand(t,k*2); - bn_wexpand(rr,k*2); - bn_mul_recursive(rr->d,a->d,b->d, - j,al-j,bl-j,t->d); - } - rr->top=top; - goto end; - } -#if 0 - if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) - { - BIGNUM *tmp_bn = (BIGNUM *)b; - if (bn_wexpand(tmp_bn,al) == NULL) goto err; - tmp_bn->d[bl]=0; +#if 0 /* tribute to const-ification, bldmax above covers for this */ + if (bn_wexpand(b,al) == NULL) goto err; +#endif + b->d[bl]=0; bl++; i--; } - else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) + else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && aldmax) { - BIGNUM *tmp_bn = (BIGNUM *)a; - if (bn_wexpand(tmp_bn,bl) == NULL) goto err; - tmp_bn->d[al]=0; +#if 0 /* tribute to const-ification, aldmax above covers for this */ + if (bn_wexpand(a,bl) == NULL) goto err; +#endif + a->d[al]=0; al++; i++; } @@ -1070,17 +706,26 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) if (bn_wexpand(t,k*2) == NULL) goto err; if (bn_wexpand(rr,k*2) == NULL) goto err; bn_mul_recursive(rr->d,a->d,b->d,al,t->d); + rr->top=top; + goto end; } +#if 0 /* tribute to const-ification, rsa/dsa performance is not affected */ else { - if (bn_wexpand(t,k*4) == NULL) goto err; - if (bn_wexpand(rr,k*4) == NULL) goto err; + if (bn_wexpand(a,k) == NULL ) goto err; + if (bn_wexpand(b,k) == NULL ) goto err; + if (bn_wexpand(t,k*4) == NULL ) goto err; + if (bn_wexpand(rr,k*4) == NULL ) goto err; + for (i=a->top; id[i]=0; + for (i=b->top; id[i]=0; bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); } rr->top=top; goto end; - } #endif + } } #endif /* BN_RECURSION */ if (bn_wexpand(rr,top) == NULL) goto err; @@ -1103,7 +748,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) BN_ULONG *rr; #ifdef BN_COUNT - fprintf(stderr," bn_mul_normal %d * %d\n",na,nb); + printf(" bn_mul_normal %d * %d\n",na,nb); #endif if (na < nb) @@ -1116,13 +761,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) } rr= &(r[na]); - if (nb <= 0) - { - (void)bn_mul_words(r,a,na,0); - return; - } - else - rr[0]=bn_mul_words(r,a,na,b[0]); + rr[0]=bn_mul_words(r,a,na,b[0]); for (;;) { @@ -1143,7 +782,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) { #ifdef BN_COUNT - fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n); + printf(" bn_mul_low_normal %d * %d\n",n,n); #endif bn_mul_words(r,a,n,b[0]); diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c index 918b9237c6..e072d9255c 100644 --- a/src/lib/libcrypto/bn/bn_prime.c +++ b/src/lib/libcrypto/bn/bn_prime.c @@ -140,6 +140,7 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, BN_CTX *ctx; int checks = BN_prime_checks_for_size(bits); + BN_init(&t); ctx=BN_CTX_new(); if (ctx == NULL) goto err; if (ret == NULL) @@ -148,7 +149,6 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, } else rnd=ret; - BN_init(&t); loop: /* make a random number and set the top and bottom bits */ if (add == NULL) diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c index 9e08ccd22e..893c9d2af9 100644 --- a/src/lib/libcrypto/bn/bn_rand.c +++ b/src/lib/libcrypto/bn/bn_rand.c @@ -201,7 +201,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) err: if (buf != NULL) { - memset(buf,0,bytes); + OPENSSL_cleanse(buf,bytes); OPENSSL_free(buf); } return(ret); diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c index cd59baa2c4..988e0ca7b3 100644 --- a/src/lib/libcrypto/bn/bn_word.c +++ b/src/lib/libcrypto/bn/bn_word.c @@ -123,7 +123,10 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) i=0; for (;;) { - l=(a->d[i]+(BN_ULONG)w)&BN_MASK2; + if (i >= a->top) + l=w; + else + l=(a->d[i]+(BN_ULONG)w)&BN_MASK2; a->d[i]=l; if (w > l) w=1; diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c index 8158a67374..3c8c540387 100644 --- a/src/lib/libcrypto/bn/bntest.c +++ b/src/lib/libcrypto/bn/bntest.c @@ -68,10 +68,6 @@ #include #include -#ifdef OPENSSL_SYS_WINDOWS -#include "../bio/bss_file.c" -#endif - const int num0 = 100; /* number of tests */ const int num1 = 50; /* additional tests for some functions */ const int num2 = 5; /* number of tests for slow functions */ @@ -96,11 +92,6 @@ int test_sqrt(BIO *bp,BN_CTX *ctx); int rand_neg(void); static int results=0; -#ifdef OPENSSL_NO_STDIO -#define APPS_WIN16 -#include "bss_file.c" -#endif - static unsigned char lst[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9" "\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0"; @@ -141,10 +132,10 @@ int main(int argc, char *argv[]) ctx=BN_CTX_new(); - if (ctx == NULL) exit(1); + if (ctx == NULL) EXIT(1); out=BIO_new(BIO_s_file()); - if (out == NULL) exit(1); + if (out == NULL) EXIT(1); if (outfile == NULL) { BIO_set_fp(out,stdout,BIO_NOCLOSE); @@ -154,7 +145,7 @@ int main(int argc, char *argv[]) if (!BIO_write_filename(out,outfile)) { perror(outfile); - exit(1); + EXIT(1); } } @@ -238,14 +229,14 @@ int main(int argc, char *argv[]) BIO_free(out); /**/ - exit(0); + EXIT(0); err: BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices * the failure, see test_bn in test/Makefile.ssl*/ BIO_flush(out); ERR_load_crypto_strings(); ERR_print_errors_fp(stderr); - exit(1); + EXIT(1); return(1); } @@ -488,7 +479,7 @@ int test_mul(BIO *bp) BN_CTX *ctx; ctx = BN_CTX_new(); - if (ctx == NULL) exit(1); + if (ctx == NULL) EXIT(1); BN_init(&a); BN_init(&b); @@ -726,7 +717,7 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx) while ((l=ERR_get_error())) fprintf(stderr,"ERROR:%s\n", ERR_error_string(l,NULL)); - exit(1); + EXIT(1); } if (bp != NULL) { diff --git a/src/lib/libcrypto/bn/divtest.c b/src/lib/libcrypto/bn/divtest.c index 13ba86e3c4..d3fc688f33 100644 --- a/src/lib/libcrypto/bn/divtest.c +++ b/src/lib/libcrypto/bn/divtest.c @@ -1,7 +1,7 @@ #include #include -static int rand(n) +static int Rand(n) { unsigned char x[2]; RAND_pseudo_bytes(x,2); @@ -26,8 +26,8 @@ main() BN_CTX *ctx=BN_CTX_new(); for(;;) { - BN_pseudo_rand(a,rand(),0,0); - BN_pseudo_rand(b,rand(),0,0); + BN_pseudo_rand(a,Rand(),0,0); + BN_pseudo_rand(b,Rand(),0,0); if (BN_is_zero(b)) continue; BN_RECP_CTX_set(recp,b,ctx); diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c index 5ca570d1a8..b09cf88705 100644 --- a/src/lib/libcrypto/bn/exptest.c +++ b/src/lib/libcrypto/bn/exptest.c @@ -59,13 +59,13 @@ #include #include #include + +#include "../e_os.h" + #include #include #include #include -#ifdef OPENSSL_SYS_WINDOWS -#include "../bio/bss_file.c" -#endif #define NUM_BITS (BN_BITS*2) @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) ERR_load_BN_strings(); ctx=BN_CTX_new(); - if (ctx == NULL) exit(1); + if (ctx == NULL) EXIT(1); r_mont=BN_new(); r_recp=BN_new(); r_simple=BN_new(); @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) out=BIO_new(BIO_s_file()); - if (out == NULL) exit(1); + if (out == NULL) EXIT(1); BIO_set_fp(out,stdout,BIO_NOCLOSE); for (i=0; i<200; i++) @@ -124,7 +124,7 @@ int main(int argc, char *argv[]) { printf("BN_mod_exp_mont() problems\n"); ERR_print_errors(out); - exit(1); + EXIT(1); } ret=BN_mod_exp_recp(r_recp,a,b,m,ctx); @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) { printf("BN_mod_exp_recp() problems\n"); ERR_print_errors(out); - exit(1); + EXIT(1); } ret=BN_mod_exp_simple(r_simple,a,b,m,ctx); @@ -140,7 +140,7 @@ int main(int argc, char *argv[]) { printf("BN_mod_exp_simple() problems\n"); ERR_print_errors(out); - exit(1); + EXIT(1); } if (BN_cmp(r_simple, r_mont) == 0 @@ -163,7 +163,7 @@ int main(int argc, char *argv[]) printf("\nrecp ="); BN_print(out,r_recp); printf("\nmont ="); BN_print(out,r_mont); printf("\n"); - exit(1); + EXIT(1); } } BN_free(r_mont); @@ -177,11 +177,11 @@ int main(int argc, char *argv[]) CRYPTO_mem_leaks(out); BIO_free(out); printf(" done\n"); - exit(0); + EXIT(0); err: ERR_load_crypto_strings(); ERR_print_errors(out); - exit(1); + EXIT(1); return(1); } -- cgit v1.2.3-55-g6feb