From 8922d4bc4a8b8893d72a48deb2cdf58215f98505 Mon Sep 17 00:00:00 2001 From: djm <> Date: Fri, 1 Oct 2010 22:59:01 +0000 Subject: resolve conflicts, fix local changes --- src/lib/libcrypto/bn/Makefile | 79 +- src/lib/libcrypto/bn/Makefile.ssl | 326 -- src/lib/libcrypto/bn/asm/alpha.s | 3199 -------------------- src/lib/libcrypto/bn/asm/alpha.s.works | 533 ---- src/lib/libcrypto/bn/asm/alpha.works/add.pl | 119 - src/lib/libcrypto/bn/asm/alpha.works/div.pl | 144 - src/lib/libcrypto/bn/asm/alpha.works/mul.pl | 116 - src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | 120 - src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | 213 -- .../libcrypto/bn/asm/alpha.works/mul_c4.works.pl | 98 - src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | 177 -- src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | 113 - src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | 109 - src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | 132 - src/lib/libcrypto/bn/asm/alpha.works/sub.pl | 108 - src/lib/libcrypto/bn/asm/alpha/add.pl | 118 - src/lib/libcrypto/bn/asm/alpha/div.pl | 144 - src/lib/libcrypto/bn/asm/alpha/mul.pl | 104 - src/lib/libcrypto/bn/asm/alpha/mul_add.pl | 123 - src/lib/libcrypto/bn/asm/alpha/mul_c4.pl | 215 -- src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl | 98 - src/lib/libcrypto/bn/asm/alpha/mul_c8.pl | 177 -- src/lib/libcrypto/bn/asm/alpha/sqr.pl | 113 - src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl | 109 - src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl | 132 - src/lib/libcrypto/bn/asm/alpha/sub.pl | 108 - src/lib/libcrypto/bn/asm/bn-586.pl | 203 +- src/lib/libcrypto/bn/asm/bn-alpha.pl | 571 ---- src/lib/libcrypto/bn/asm/ca.pl | 33 - src/lib/libcrypto/bn/asm/co-586.pl | 3 +- src/lib/libcrypto/bn/asm/co-alpha.pl | 116 - src/lib/libcrypto/bn/asm/mips1.s | 539 ---- src/lib/libcrypto/bn/asm/mo-586.pl | 603 ---- src/lib/libcrypto/bn/asm/pa-risc.s | 710 ----- src/lib/libcrypto/bn/asm/r3000.s | 646 ---- src/lib/libcrypto/bn/asm/sparcv8plus.S | 15 +- src/lib/libcrypto/bn/bn.h | 181 +- src/lib/libcrypto/bn/bn_asm.c | 322 +- src/lib/libcrypto/bn/bn_blind.c | 17 +- src/lib/libcrypto/bn/bn_ctx.c | 6 +- src/lib/libcrypto/bn/bn_div.c | 13 +- src/lib/libcrypto/bn/bn_exp.c | 3 +- src/lib/libcrypto/bn/bn_gf2m.c | 142 +- src/lib/libcrypto/bn/bn_lcl.h | 3 +- src/lib/libcrypto/bn/bn_lib.c | 29 +- src/lib/libcrypto/bn/bn_mont.c | 269 +- src/lib/libcrypto/bn/bn_mul.c | 10 +- src/lib/libcrypto/bn/bn_opt.c | 87 - src/lib/libcrypto/bn/bn_print.c | 21 + src/lib/libcrypto/bn/bn_x931p.c | 272 -- src/lib/libcrypto/bn/bntest.c | 38 +- src/lib/libcrypto/bn/exptest.c | 4 +- 52 files changed, 732 insertions(+), 11151 deletions(-) delete mode 100644 src/lib/libcrypto/bn/Makefile.ssl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.s delete mode 100644 src/lib/libcrypto/bn/asm/alpha.s.works delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/add.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/div.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/mul.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/sqr.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha.works/sub.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/add.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/div.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/mul.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/mul_add.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/mul_c4.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/mul_c8.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/sqr.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl delete mode 100644 src/lib/libcrypto/bn/asm/alpha/sub.pl delete mode 100644 src/lib/libcrypto/bn/asm/bn-alpha.pl delete mode 100644 src/lib/libcrypto/bn/asm/ca.pl delete mode 100644 src/lib/libcrypto/bn/asm/co-alpha.pl delete mode 100644 src/lib/libcrypto/bn/asm/mips1.s delete mode 100644 src/lib/libcrypto/bn/asm/mo-586.pl delete mode 100644 src/lib/libcrypto/bn/asm/pa-risc.s delete mode 100644 src/lib/libcrypto/bn/asm/r3000.s delete mode 100644 src/lib/libcrypto/bn/bn_opt.c delete mode 100644 src/lib/libcrypto/bn/bn_x931p.c (limited to 'src/lib/libcrypto/bn') diff --git a/src/lib/libcrypto/bn/Makefile b/src/lib/libcrypto/bn/Makefile index f5e8f65a46..aabc4f56b8 100644 --- a/src/lib/libcrypto/bn/Makefile +++ b/src/lib/libcrypto/bn/Makefile @@ -12,8 +12,6 @@ MAKEFILE= Makefile AR= ar r BN_ASM= bn_asm.o -# or use -#BN_ASM= bn86-elf.o CFLAGS= $(INCLUDES) $(CFLAG) ASFLAGS= $(INCLUDES) $(ASFLAG) @@ -28,13 +26,13 @@ LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \ bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \ bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ - bn_depr.c bn_x931p.c bn_const.c bn_opt.c + bn_depr.c bn_const.c LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \ bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \ bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \ - bn_depr.o bn_x931p.o bn_const.o bn_opt.o + bn_depr.o bn_const.o SRC= $(LIBSRC) @@ -58,36 +56,25 @@ bnbug: bnbug.c ../../libcrypto.a top cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a lib: $(LIBOBJ) - $(ARX) $(LIB) $(LIBOBJ) + $(AR) $(LIB) $(LIBOBJ) $(RANLIB) $(LIB) || echo Never mind. @touch lib -# ELF -bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > ../$@) -co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > ../$@) -mo86-elf.s: asm/mo-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) mo-586.pl elf $(CFLAGS) > ../$@) -# COFF -bn86-cof.s: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl coff $(CFLAGS) > ../$@) -co86-cof.s: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl coff $(CFLAGS) > ../$@) -mo86-cof.s: asm/mo-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) mo-586.pl coff $(CFLAGS) > ../$@) -# a.out -bn86-out.s: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl a.out $(CFLAGS) > ../$@) -co86-out.s: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl a.out $(CFLAGS) > ../$@) -mo86-out.s: asm/mo-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) mo-586.pl a.out $(CFLAGS) > ../$@) +bn-586.s: asm/bn-586.pl ../perlasm/x86asm.pl + $(PERL) asm/bn-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ +co-586.s: asm/co-586.pl ../perlasm/x86asm.pl + $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ +x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl + $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ sparcv8.o: asm/sparcv8.S $(CC) $(CFLAGS) -c asm/sparcv8.S -sparcv8plus.o: asm/sparcv8plus.S - $(CC) $(CFLAGS) -c asm/sparcv8plus.S +bn-sparcv9.o: asm/sparcv8plus.S + $(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S +sparcv9a-mont.s: asm/sparcv9a-mont.pl + $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ +sparcv9-mont.s: asm/sparcv9-mont.pl + $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ bn-mips3.o: asm/mips3.s @if [ "$(CC)" = "gcc" ]; then \ @@ -95,10 +82,13 @@ bn-mips3.o: asm/mips3.s as -$$ABI -O -o $@ asm/mips3.s; \ else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi +bn-s390x.o: asm/s390x.S + $(CC) $(CFLAGS) -c -o $@ asm/s390x.S + x86_64-gcc.o: asm/x86_64-gcc.c $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c x86_64-mont.s: asm/x86_64-mont.pl - $(PERL) asm/x86_64-mont.pl $@ + $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@ bn-ia64.s: asm/ia64.S $(CC) $(CFLAGS) -E asm/ia64.S > $@ @@ -111,12 +101,14 @@ pa-risc2.o: asm/pa-risc2.s /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s # ppc - AIX, Linux, MacOS X... -linux_ppc32.s: asm/ppc.pl; $(PERL) $< $@ -linux_ppc64.s: asm/ppc.pl; $(PERL) $< $@ -aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@ -aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@ -osx_ppc32.s: asm/ppc.pl; $(PERL) $< $@ -osx_ppc64.s: asm/ppc.pl; $(PERL) $< $@ +bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@ +ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ + +alpha-mont.s: asm/alpha-mont.pl + $(PERL) $< | $(CC) -E - | tee $@ > /dev/null + +# GNU make "catch all" +%-mont.s: asm/%-mont.pl; $(PERL) $< $(CFLAGS) > $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -184,8 +176,11 @@ bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h -bn_const.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h -bn_const.o: ../../include/openssl/ossl_typ.h bn.h bn_const.c +bn_const.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +bn_const.o: ../../include/openssl/opensslconf.h +bn_const.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_const.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_const.o: ../../include/openssl/symhacks.h bn.h bn_const.c bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -292,13 +287,6 @@ bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c -bn_opt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_opt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_opt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_opt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_opt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_opt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_opt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_opt.c bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -357,6 +345,3 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c -bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h -bn_x931p.o: ../../include/openssl/opensslconf.h -bn_x931p.o: ../../include/openssl/ossl_typ.h bn_x931p.c diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl deleted file mode 100644 index 50892ef44c..0000000000 --- a/src/lib/libcrypto/bn/Makefile.ssl +++ /dev/null @@ -1,326 +0,0 @@ -# -# SSLeay/crypto/bn/Makefile -# - -DIR= bn -TOP= ../.. -CC= cc -CPP= $(CC) -E -INCLUDES= -I.. -I$(TOP) -I../../include -CFLAG=-g -INSTALL_PREFIX= -OPENSSLDIR= /usr/local/ssl -INSTALLTOP=/usr/local/ssl -MAKE= make -f Makefile.ssl -MAKEDEPPROG= makedepend -MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG) -MAKEFILE= Makefile.ssl -AR= ar r - -BN_ASM= bn_asm.o -# or use -#BN_ASM= bn86-elf.o - -CFLAGS= $(INCLUDES) $(CFLAG) -ASFLAGS= $(INCLUDES) $(ASFLAG) - -GENERAL=Makefile -TEST=bntest.c exptest.c -APPS= - -LIB=$(TOP)/libcrypto.a -LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \ - bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ - bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \ - bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c - -LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \ - bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ - bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \ - bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o - -SRC= $(LIBSRC) - -EXHEADER= bn.h -HEADER= bn_lcl.h bn_prime.h $(EXHEADER) - -ALL= $(GENERAL) $(SRC) $(HEADER) - -top: - (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) - -all: lib - -bn_prime.h: bn_prime.pl - $(PERL) bn_prime.pl >bn_prime.h - -divtest: divtest.c ../../libcrypto.a - cc -I../../include divtest.c -o divtest ../../libcrypto.a - -bnbug: bnbug.c ../../libcrypto.a top - cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a - -lib: $(LIBOBJ) - $(AR) $(LIB) $(LIBOBJ) - $(RANLIB) $(LIB) || echo Never mind. - @touch lib - -# elf -asm/bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > bn86-elf.s) - -asm/co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > co86-elf.s) - -# a.out -asm/bn86-out.o: asm/bn86unix.cpp - $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o - -asm/co86-out.o: asm/co86unix.cpp - $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o - -# bsdi -asm/bn86bsdi.o: asm/bn86unix.cpp - $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o - -asm/co86bsdi.o: asm/co86unix.cpp - $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o - -asm/bn86unix.cpp: asm/bn-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) bn-586.pl cpp >bn86unix.cpp ) - -asm/co86unix.cpp: asm/co-586.pl ../perlasm/x86asm.pl - (cd asm; $(PERL) co-586.pl cpp >co86unix.cpp ) - -asm/sparcv8.o: asm/sparcv8.S - -asm/sparcv8plus.o: asm/sparcv8plus.S - -# Old GNU assembler doesn't understand V9 instructions, so we -# hire /usr/ccs/bin/as to do the job. Note that option is called -# *-gcc27, but even gcc 2>=8 users may experience similar problem -# if they didn't bother to upgrade GNU assembler. Such users should -# not choose this option, but be adviced to *remove* GNU assembler -# or upgrade it. -asm/sparcv8plus-gcc27.o: asm/sparcv8plus.S - $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \ - /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o - - -asm/ia64.o: asm/ia64.S - -# Some compiler drivers (most notably HP-UX and Intel C++) don't -# understand .S extension:-( I wish I could pipe output from cc -E, -# but it's too compiler driver/ABI dependent to cover with a single -# rule... -asm/ia64-cpp.o: asm/ia64.S - $(CC) $(ASFLAGS) -E asm/ia64.S > /tmp/ia64.$$$$.s && \ - $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \ - rm -f /tmp/ia64.$$$$.s - -asm/x86_64-gcc.o: asm/x86_64-gcc.c - -asm/pa-risc2W.o: asm/pa-risc2W.s - /usr/ccs/bin/as -o asm/pa-rics2W.o asm/pa-risc2W.s - -files: - $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO - -links: - @sh $(TOP)/util/point.sh Makefile.ssl Makefile - @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) - @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) - @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) - -install: - @for i in $(EXHEADER) ; \ - do \ - (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ - chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ - done; - -exptest: - rm -f exptest - gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a - -div: - rm -f a.out - gcc -I.. -g div.c ../../libcrypto.a - -tags: - ctags $(SRC) - -tests: - -lint: - lint -DLINT $(INCLUDES) $(SRC)>fluff - -depend: - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) - -dclean: - $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new - mv -f Makefile.new $(MAKEFILE) - -clean: - rm -f asm/co86unix.cpp asm/bn86unix.cpp asm/*-elf.* *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_add.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_add.o: ../cryptlib.h bn_add.c bn_lcl.h -bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_asm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_asm.o: ../cryptlib.h bn_asm.c bn_lcl.h -bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_blind.o: ../cryptlib.h bn_blind.c bn_lcl.h -bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_ctx.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_ctx.o: ../cryptlib.h bn_ctx.c bn_lcl.h -bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_div.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_div.o: ../cryptlib.h bn_div.c bn_lcl.h -bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h -bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -bn_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_err.o: ../../include/openssl/symhacks.h bn_err.c -bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h -bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp2.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_exp2.o: ../cryptlib.h bn_exp2.c bn_lcl.h -bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_gcd.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_gcd.o: ../cryptlib.h bn_gcd.c bn_lcl.h -bn_kron.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h -bn_kron.o: ../../include/openssl/opensslconf.h bn_kron.c bn_lcl.h -bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_lib.o: ../cryptlib.h bn_lcl.h bn_lib.c -bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mod.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mod.o: ../cryptlib.h bn_lcl.h bn_mod.c -bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mont.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mont.o: ../cryptlib.h bn_lcl.h bn_mont.c -bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mpi.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mpi.o: ../cryptlib.h bn_lcl.h bn_mpi.c -bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mul.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_mul.o: ../cryptlib.h bn_lcl.h bn_mul.c -bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_prime.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_prime.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h -bn_prime.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.c bn_prime.h -bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_print.o: ../cryptlib.h bn_lcl.h bn_print.c -bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_rand.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h -bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_rand.o: ../cryptlib.h bn_lcl.h bn_rand.c -bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_recp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_recp.o: ../cryptlib.h bn_lcl.h bn_recp.c -bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_shift.o: ../cryptlib.h bn_lcl.h bn_shift.c -bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_sqr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_sqr.o: ../cryptlib.h bn_lcl.h bn_sqr.c -bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_sqrt.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_sqrt.o: ../cryptlib.h bn_lcl.h bn_sqrt.c -bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_word.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_word.o: ../cryptlib.h bn_lcl.h bn_word.c diff --git a/src/lib/libcrypto/bn/asm/alpha.s b/src/lib/libcrypto/bn/asm/alpha.s deleted file mode 100644 index 555ff0b92d..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.s +++ /dev/null @@ -1,3199 +0,0 @@ - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words - .text - .align 3 - .globl bn_mul_comba4 - .ent bn_mul_comba4 -bn_mul_comba4: -bn_mul_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 0($18) - ldq $2, 8($17) - ldq $3, 8($18) - ldq $4, 16($17) - ldq $5, 16($18) - ldq $6, 24($17) - ldq $7, 24($18) - bis $31, $31, $23 - mulq $0, $1, $8 - umulh $0, $1, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $0, $3, $24 - umulh $0, $3, $25 - addq $22, $24, $22 - cmpult $22, $24, $27 - addq $27, $25, $25 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $8, $28, $8 - mulq $2, $1, $21 - umulh $2, $1, $20 - addq $22, $21, $22 - cmpult $22, $21, $19 - addq $19, $20, $20 - addq $23, $20, $23 - cmpult $23, $20, $17 - addq $8, $17, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $2, $3, $18 - umulh $2, $3, $24 - addq $23, $18, $23 - cmpult $23, $18, $27 - addq $27, $24, $24 - addq $8, $24, $8 - cmpult $8, $24, $25 - addq $22, $25, $22 - mulq $0, $5, $28 - umulh $0, $5, $21 - addq $23, $28, $23 - cmpult $23, $28, $19 - addq $19, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - mulq $4, $1, $17 - umulh $4, $1, $18 - addq $23, $17, $23 - cmpult $23, $17, $27 - addq $27, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $24 - addq $22, $24, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $0, $7, $25 - umulh $0, $7, $28 - addq $8, $25, $8 - cmpult $8, $25, $19 - addq $19, $28, $28 - addq $22, $28, $22 - cmpult $22, $28, $21 - addq $23, $21, $23 - mulq $2, $5, $20 - umulh $2, $5, $17 - addq $8, $20, $8 - cmpult $8, $20, $27 - addq $27, $17, $17 - addq $22, $17, $22 - cmpult $22, $17, $18 - addq $23, $18, $23 - mulq $4, $3, $24 - umulh $4, $3, $25 - addq $8, $24, $8 - cmpult $8, $24, $19 - addq $19, $25, $25 - addq $22, $25, $22 - cmpult $22, $25, $28 - addq $23, $28, $23 - mulq $6, $1, $21 - umulh $6, $1, $0 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $20, $0, $0 - addq $22, $0, $22 - cmpult $22, $0, $27 - addq $23, $27, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $7, $17 - umulh $2, $7, $18 - addq $22, $17, $22 - cmpult $22, $17, $24 - addq $24, $18, $18 - addq $23, $18, $23 - cmpult $23, $18, $19 - addq $8, $19, $8 - mulq $4, $5, $25 - umulh $4, $5, $28 - addq $22, $25, $22 - cmpult $22, $25, $21 - addq $21, $28, $28 - addq $23, $28, $23 - cmpult $23, $28, $20 - addq $8, $20, $8 - mulq $6, $3, $0 - umulh $6, $3, $27 - addq $22, $0, $22 - cmpult $22, $0, $1 - addq $1, $27, $27 - addq $23, $27, $23 - cmpult $23, $27, $17 - addq $8, $17, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $4, $7, $24 - umulh $4, $7, $18 - addq $23, $24, $23 - cmpult $23, $24, $19 - addq $19, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $2 - addq $22, $2, $22 - mulq $6, $5, $25 - umulh $6, $5, $21 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $28, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $6, $7, $0 - umulh $6, $7, $1 - addq $8, $0, $8 - cmpult $8, $0, $27 - addq $27, $1, $1 - addq $22, $1, $22 - cmpult $22, $1, $17 - addq $23, $17, $23 - stq $8, 48($16) - stq $22, 56($16) - ret $31,($26),1 - .end bn_mul_comba4 - .text - .align 3 - .globl bn_mul_comba8 - .ent bn_mul_comba8 -bn_mul_comba8: -bn_mul_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - ldq $1, 0($17) - ldq $2, 0($18) - zapnot $1, 15, $7 - srl $2, 32, $8 - mulq $8, $7, $22 - srl $1, 32, $6 - zapnot $2, 15, $5 - mulq $5, $6, $4 - mulq $7, $5, $24 - addq $22, $4, $22 - cmpult $22, $4, $1 - mulq $6, $8, $3 - beq $1, $173 - bis $31, 1, $1 - sll $1, 32, $1 - addq $3, $1, $3 -$173: - sll $22, 32, $4 - addq $24, $4, $24 - stq $24, 0($16) - ldq $2, 0($17) - ldq $1, 8($18) - zapnot $2, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $2, 32, $6 - mulq $5, $6, $23 - mulq $6, $8, $6 - srl $22, 32, $1 - cmpult $24, $4, $2 - addq $3, $1, $3 - addq $2, $3, $22 - addq $25, $23, $25 - cmpult $25, $23, $1 - bis $31, 1, $2 - beq $1, $177 - sll $2, 32, $1 - addq $6, $1, $6 -$177: - sll $25, 32, $23 - ldq $1, 0($18) - addq $0, $23, $0 - bis $0, $0, $7 - ldq $3, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $4 - zapnot $3, 15, $7 - mulq $8, $7, $28 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $25, 32, $1 - cmpult $0, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $24 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $28, $2, $28 - cmpult $28, $2, $1 - bis $31, 1, $2 - beq $1, $181 - sll $2, 32, $1 - addq $6, $1, $6 -$181: - sll $28, 32, $2 - addq $21, $2, $21 - bis $21, $21, $7 - addq $22, $7, $22 - stq $22, 8($16) - ldq $3, 16($17) - ldq $1, 0($18) - cmpult $22, $7, $4 - zapnot $3, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $22 - zapnot $1, 15, $5 - mulq $7, $5, $20 - srl $28, 32, $1 - cmpult $21, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $22, $2, $22 - cmpult $22, $2, $1 - bis $31, 1, $2 - beq $1, $185 - sll $2, 32, $1 - addq $6, $1, $6 -$185: - sll $22, 32, $2 - ldq $1, 8($18) - addq $20, $2, $20 - bis $20, $20, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $22, 32, $1 - cmpult $20, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $189 - sll $21, 32, $1 - addq $6, $1, $6 -$189: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 0($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $193 - sll $21, 32, $1 - addq $6, $1, $6 -$193: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 16($16) - ldq $4, 0($17) - ldq $5, 24($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $197 - sll $21, 32, $1 - addq $6, $1, $6 -$197: - sll $0, 32, $24 - ldq $1, 16($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $201 - sll $20, 32, $1 - addq $6, $1, $6 -$201: - sll $25, 32, $5 - ldq $2, 8($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $205 - sll $20, 32, $1 - addq $6, $1, $6 -$205: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $209 - sll $20, 32, $1 - addq $6, $1, $6 -$209: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 24($16) - ldq $4, 32($17) - ldq $5, 0($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $213 - sll $20, 32, $1 - addq $6, $1, $6 -$213: - sll $28, 32, $23 - ldq $1, 8($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $217 - sll $21, 32, $1 - addq $6, $1, $6 -$217: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $221 - sll $21, 32, $1 - addq $6, $1, $6 -$221: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $225 - sll $21, 32, $1 - addq $6, $1, $6 -$225: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $229 - sll $21, 32, $1 - addq $6, $1, $6 -$229: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 32($16) - ldq $4, 0($17) - ldq $5, 40($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $233 - sll $21, 32, $1 - addq $6, $1, $6 -$233: - sll $0, 32, $22 - ldq $1, 32($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $237 - sll $20, 32, $1 - addq $6, $1, $6 -$237: - sll $25, 32, $5 - ldq $2, 24($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $241 - sll $20, 32, $1 - addq $6, $1, $6 -$241: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $245 - sll $20, 32, $1 - addq $6, $1, $6 -$245: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $249 - sll $20, 32, $1 - addq $6, $1, $6 -$249: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $253 - sll $20, 32, $1 - addq $6, $1, $6 -$253: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 40($16) - ldq $4, 48($17) - ldq $5, 0($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $24, $28 - cmpult $28, $24, $1 - mulq $6, $8, $6 - beq $1, $257 - sll $20, 32, $1 - addq $6, $1, $6 -$257: - sll $28, 32, $24 - ldq $1, 8($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $261 - sll $21, 32, $1 - addq $6, $1, $6 -$261: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $265 - sll $21, 32, $1 - addq $6, $1, $6 -$265: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $269 - sll $21, 32, $1 - addq $6, $1, $6 -$269: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $273 - sll $21, 32, $1 - addq $6, $1, $6 -$273: - sll $28, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $277 - sll $21, 32, $1 - addq $6, $1, $6 -$277: - sll $0, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $281 - sll $21, 32, $1 - addq $6, $1, $6 -$281: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 48($16) - ldq $4, 0($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $23, $0 - cmpult $0, $23, $1 - mulq $6, $8, $6 - beq $1, $285 - sll $21, 32, $1 - addq $6, $1, $6 -$285: - sll $0, 32, $23 - ldq $1, 48($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $289 - sll $20, 32, $1 - addq $6, $1, $6 -$289: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $293 - sll $20, 32, $1 - addq $6, $1, $6 -$293: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $297 - sll $20, 32, $1 - addq $6, $1, $6 -$297: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $301 - sll $20, 32, $1 - addq $6, $1, $6 -$301: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $305 - sll $20, 32, $1 - addq $6, $1, $6 -$305: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $309 - sll $20, 32, $1 - addq $6, $1, $6 -$309: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $313 - sll $20, 32, $1 - addq $6, $1, $6 -$313: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 56($16) - ldq $4, 56($17) - ldq $5, 8($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $6 - beq $1, $317 - sll $20, 32, $1 - addq $6, $1, $6 -$317: - sll $28, 32, $22 - ldq $1, 16($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $321 - sll $21, 32, $1 - addq $6, $1, $6 -$321: - sll $25, 32, $5 - ldq $2, 24($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $325 - sll $21, 32, $1 - addq $6, $1, $6 -$325: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $329 - sll $21, 32, $1 - addq $6, $1, $6 -$329: - sll $0, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $333 - sll $21, 32, $1 - addq $6, $1, $6 -$333: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $337 - sll $21, 32, $1 - addq $6, $1, $6 -$337: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $341 - sll $21, 32, $1 - addq $6, $1, $6 -$341: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 64($16) - ldq $4, 16($17) - ldq $5, 56($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $345 - sll $21, 32, $1 - addq $6, $1, $6 -$345: - sll $0, 32, $24 - ldq $1, 48($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $349 - sll $20, 32, $1 - addq $6, $1, $6 -$349: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $353 - sll $20, 32, $1 - addq $6, $1, $6 -$353: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $357 - sll $20, 32, $1 - addq $6, $1, $6 -$357: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $361 - sll $20, 32, $1 - addq $6, $1, $6 -$361: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $365 - sll $20, 32, $1 - addq $6, $1, $6 -$365: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 72($16) - ldq $4, 56($17) - ldq $5, 24($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $369 - sll $20, 32, $1 - addq $6, $1, $6 -$369: - sll $28, 32, $23 - ldq $1, 32($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $373 - sll $21, 32, $1 - addq $6, $1, $6 -$373: - sll $25, 32, $5 - ldq $2, 40($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $377 - sll $21, 32, $1 - addq $6, $1, $6 -$377: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $381 - sll $21, 32, $1 - addq $6, $1, $6 -$381: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $385 - sll $21, 32, $1 - addq $6, $1, $6 -$385: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 80($16) - ldq $4, 32($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $389 - sll $21, 32, $1 - addq $6, $1, $6 -$389: - sll $0, 32, $22 - ldq $1, 48($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $393 - sll $20, 32, $1 - addq $6, $1, $6 -$393: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $397 - sll $20, 32, $1 - addq $6, $1, $6 -$397: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $21 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $21, $25, $21 - cmpult $21, $25, $1 - mulq $6, $8, $6 - beq $1, $401 - sll $20, 32, $1 - addq $6, $1, $6 -$401: - sll $21, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 88($16) - ldq $4, 56($17) - ldq $5, 40($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $21, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $405 - sll $20, 32, $1 - addq $6, $1, $6 -$405: - sll $0, 32, $24 - ldq $2, 48($18) - addq $5, $24, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $24, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $409 - sll $20, 32, $1 - addq $6, $1, $6 -$409: - sll $28, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $413 - sll $20, 32, $1 - addq $6, $1, $6 -$413: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 96($16) - ldq $4, 48($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $417 - sll $20, 32, $1 - addq $6, $1, $6 -$417: - sll $28, 32, $23 - ldq $2, 48($18) - addq $5, $23, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $23, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $421 - sll $20, 32, $1 - addq $6, $1, $6 -$421: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 104($16) - ldq $4, 56($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $3 - beq $1, $425 - sll $20, 32, $1 - addq $3, $1, $3 -$425: - sll $28, 32, $22 - srl $28, 32, $1 - addq $2, $22, $2 - addq $3, $1, $3 - bis $2, $2, $7 - addq $24, $7, $24 - cmpult $7, $22, $1 - cmpult $24, $7, $2 - addq $1, $3, $6 - addq $2, $6, $6 - stq $24, 112($16) - addq $23, $6, $23 - stq $23, 120($16) - ret $31, ($26), 1 - .end bn_mul_comba8 - .text - .align 3 - .globl bn_sqr_comba4 - .ent bn_sqr_comba4 -bn_sqr_comba4: -bn_sqr_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - bis $31, $31, $6 - mulq $0, $0, $4 - umulh $0, $0, $5 - stq $4, 0($16) - bis $31, $31, $4 - mulq $0, $1, $7 - umulh $0, $1, $8 - cmplt $7, $31, $22 - cmplt $8, $31, $23 - addq $7, $7, $7 - addq $8, $8, $8 - addq $8, $22, $8 - addq $4, $23, $4 - addq $5, $7, $5 - addq $6, $8, $6 - cmpult $5, $7, $24 - cmpult $6, $8, $25 - addq $6, $24, $6 - addq $4, $25, $4 - stq $5, 8($16) - bis $31, $31, $5 - mulq $1, $1, $27 - umulh $1, $1, $28 - addq $6, $27, $6 - addq $4, $28, $4 - cmpult $6, $27, $21 - cmpult $4, $28, $20 - addq $4, $21, $4 - addq $5, $20, $5 - mulq $2, $0, $19 - umulh $2, $0, $18 - cmplt $19, $31, $17 - cmplt $18, $31, $22 - addq $19, $19, $19 - addq $18, $18, $18 - addq $18, $17, $18 - addq $5, $22, $5 - addq $6, $19, $6 - addq $4, $18, $4 - cmpult $6, $19, $23 - cmpult $4, $18, $7 - addq $4, $23, $4 - addq $5, $7, $5 - stq $6, 16($16) - bis $31, $31, $6 - mulq $3, $0, $8 - umulh $3, $0, $24 - cmplt $8, $31, $25 - cmplt $24, $31, $27 - addq $8, $8, $8 - addq $24, $24, $24 - addq $24, $25, $24 - addq $6, $27, $6 - addq $4, $8, $4 - addq $5, $24, $5 - cmpult $4, $8, $28 - cmpult $5, $24, $21 - addq $5, $28, $5 - addq $6, $21, $6 - mulq $2, $1, $20 - umulh $2, $1, $17 - cmplt $20, $31, $22 - cmplt $17, $31, $19 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $22, $17 - addq $6, $19, $6 - addq $4, $20, $4 - addq $5, $17, $5 - cmpult $4, $20, $18 - cmpult $5, $17, $23 - addq $5, $18, $5 - addq $6, $23, $6 - stq $4, 24($16) - bis $31, $31, $4 - mulq $2, $2, $7 - umulh $2, $2, $25 - addq $5, $7, $5 - addq $6, $25, $6 - cmpult $5, $7, $27 - cmpult $6, $25, $8 - addq $6, $27, $6 - addq $4, $8, $4 - mulq $3, $1, $24 - umulh $3, $1, $28 - cmplt $24, $31, $21 - cmplt $28, $31, $22 - addq $24, $24, $24 - addq $28, $28, $28 - addq $28, $21, $28 - addq $4, $22, $4 - addq $5, $24, $5 - addq $6, $28, $6 - cmpult $5, $24, $19 - cmpult $6, $28, $20 - addq $6, $19, $6 - addq $4, $20, $4 - stq $5, 32($16) - bis $31, $31, $5 - mulq $3, $2, $17 - umulh $3, $2, $18 - cmplt $17, $31, $23 - cmplt $18, $31, $7 - addq $17, $17, $17 - addq $18, $18, $18 - addq $18, $23, $18 - addq $5, $7, $5 - addq $6, $17, $6 - addq $4, $18, $4 - cmpult $6, $17, $25 - cmpult $4, $18, $27 - addq $4, $25, $4 - addq $5, $27, $5 - stq $6, 40($16) - bis $31, $31, $6 - mulq $3, $3, $8 - umulh $3, $3, $21 - addq $4, $8, $4 - addq $5, $21, $5 - cmpult $4, $8, $22 - cmpult $5, $21, $24 - addq $5, $22, $5 - addq $6, $24, $6 - stq $4, 48($16) - stq $5, 56($16) - ret $31,($26),1 - .end bn_sqr_comba4 - .text - .align 3 - .globl bn_sqr_comba8 - .ent bn_sqr_comba8 -bn_sqr_comba8: -bn_sqr_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - ldq $4, 32($17) - ldq $5, 40($17) - ldq $6, 48($17) - ldq $7, 56($17) - bis $31, $31, $23 - mulq $0, $0, $8 - umulh $0, $0, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $1, $0, $24 - umulh $1, $0, $25 - cmplt $24, $31, $27 - cmplt $25, $31, $28 - addq $24, $24, $24 - addq $25, $25, $25 - addq $25, $27, $25 - addq $8, $28, $8 - addq $22, $24, $22 - addq $23, $25, $23 - cmpult $22, $24, $21 - cmpult $23, $25, $20 - addq $23, $21, $23 - addq $8, $20, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $1, $1, $19 - umulh $1, $1, $18 - addq $23, $19, $23 - addq $8, $18, $8 - cmpult $23, $19, $17 - cmpult $8, $18, $27 - addq $8, $17, $8 - addq $22, $27, $22 - mulq $2, $0, $28 - umulh $2, $0, $24 - cmplt $28, $31, $25 - cmplt $24, $31, $21 - addq $28, $28, $28 - addq $24, $24, $24 - addq $24, $25, $24 - addq $22, $21, $22 - addq $23, $28, $23 - addq $8, $24, $8 - cmpult $23, $28, $20 - cmpult $8, $24, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $2, $1, $18 - umulh $2, $1, $17 - cmplt $18, $31, $27 - cmplt $17, $31, $25 - addq $18, $18, $18 - addq $17, $17, $17 - addq $17, $27, $17 - addq $23, $25, $23 - addq $8, $18, $8 - addq $22, $17, $22 - cmpult $8, $18, $21 - cmpult $22, $17, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $3, $0, $24 - umulh $3, $0, $20 - cmplt $24, $31, $19 - cmplt $20, $31, $27 - addq $24, $24, $24 - addq $20, $20, $20 - addq $20, $19, $20 - addq $23, $27, $23 - addq $8, $24, $8 - addq $22, $20, $22 - cmpult $8, $24, $25 - cmpult $22, $20, $18 - addq $22, $25, $22 - addq $23, $18, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $2, $17 - umulh $2, $2, $21 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $28 - cmpult $23, $21, $19 - addq $23, $28, $23 - addq $8, $19, $8 - mulq $3, $1, $27 - umulh $3, $1, $24 - cmplt $27, $31, $20 - cmplt $24, $31, $25 - addq $27, $27, $27 - addq $24, $24, $24 - addq $24, $20, $24 - addq $8, $25, $8 - addq $22, $27, $22 - addq $23, $24, $23 - cmpult $22, $27, $18 - cmpult $23, $24, $17 - addq $23, $18, $23 - addq $8, $17, $8 - mulq $4, $0, $21 - umulh $4, $0, $28 - cmplt $21, $31, $19 - cmplt $28, $31, $20 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $19, $28 - addq $8, $20, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $25 - cmpult $23, $28, $27 - addq $23, $25, $23 - addq $8, $27, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $3, $2, $24 - umulh $3, $2, $18 - cmplt $24, $31, $17 - cmplt $18, $31, $19 - addq $24, $24, $24 - addq $18, $18, $18 - addq $18, $17, $18 - addq $22, $19, $22 - addq $23, $24, $23 - addq $8, $18, $8 - cmpult $23, $24, $20 - cmpult $8, $18, $21 - addq $8, $20, $8 - addq $22, $21, $22 - mulq $4, $1, $28 - umulh $4, $1, $25 - cmplt $28, $31, $27 - cmplt $25, $31, $17 - addq $28, $28, $28 - addq $25, $25, $25 - addq $25, $27, $25 - addq $22, $17, $22 - addq $23, $28, $23 - addq $8, $25, $8 - cmpult $23, $28, $19 - cmpult $8, $25, $24 - addq $8, $19, $8 - addq $22, $24, $22 - mulq $5, $0, $18 - umulh $5, $0, $20 - cmplt $18, $31, $21 - cmplt $20, $31, $27 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $21, $20 - addq $22, $27, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $28 - addq $8, $17, $8 - addq $22, $28, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $3, $3, $25 - umulh $3, $3, $19 - addq $8, $25, $8 - addq $22, $19, $22 - cmpult $8, $25, $24 - cmpult $22, $19, $21 - addq $22, $24, $22 - addq $23, $21, $23 - mulq $4, $2, $27 - umulh $4, $2, $18 - cmplt $27, $31, $20 - cmplt $18, $31, $17 - addq $27, $27, $27 - addq $18, $18, $18 - addq $18, $20, $18 - addq $23, $17, $23 - addq $8, $27, $8 - addq $22, $18, $22 - cmpult $8, $27, $28 - cmpult $22, $18, $25 - addq $22, $28, $22 - addq $23, $25, $23 - mulq $5, $1, $19 - umulh $5, $1, $24 - cmplt $19, $31, $21 - cmplt $24, $31, $20 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $21, $24 - addq $23, $20, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $17 - cmpult $22, $24, $27 - addq $22, $17, $22 - addq $23, $27, $23 - mulq $6, $0, $18 - umulh $6, $0, $28 - cmplt $18, $31, $25 - cmplt $28, $31, $21 - addq $18, $18, $18 - addq $28, $28, $28 - addq $28, $25, $28 - addq $23, $21, $23 - addq $8, $18, $8 - addq $22, $28, $22 - cmpult $8, $18, $20 - cmpult $22, $28, $19 - addq $22, $20, $22 - addq $23, $19, $23 - stq $8, 48($16) - bis $31, $31, $8 - mulq $4, $3, $24 - umulh $4, $3, $17 - cmplt $24, $31, $27 - cmplt $17, $31, $25 - addq $24, $24, $24 - addq $17, $17, $17 - addq $17, $27, $17 - addq $8, $25, $8 - addq $22, $24, $22 - addq $23, $17, $23 - cmpult $22, $24, $21 - cmpult $23, $17, $18 - addq $23, $21, $23 - addq $8, $18, $8 - mulq $5, $2, $28 - umulh $5, $2, $20 - cmplt $28, $31, $19 - cmplt $20, $31, $27 - addq $28, $28, $28 - addq $20, $20, $20 - addq $20, $19, $20 - addq $8, $27, $8 - addq $22, $28, $22 - addq $23, $20, $23 - cmpult $22, $28, $25 - cmpult $23, $20, $24 - addq $23, $25, $23 - addq $8, $24, $8 - mulq $6, $1, $17 - umulh $6, $1, $21 - cmplt $17, $31, $18 - cmplt $21, $31, $19 - addq $17, $17, $17 - addq $21, $21, $21 - addq $21, $18, $21 - addq $8, $19, $8 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $27 - cmpult $23, $21, $28 - addq $23, $27, $23 - addq $8, $28, $8 - mulq $7, $0, $20 - umulh $7, $0, $25 - cmplt $20, $31, $24 - cmplt $25, $31, $18 - addq $20, $20, $20 - addq $25, $25, $25 - addq $25, $24, $25 - addq $8, $18, $8 - addq $22, $20, $22 - addq $23, $25, $23 - cmpult $22, $20, $19 - cmpult $23, $25, $17 - addq $23, $19, $23 - addq $8, $17, $8 - stq $22, 56($16) - bis $31, $31, $22 - mulq $4, $4, $21 - umulh $4, $4, $27 - addq $23, $21, $23 - addq $8, $27, $8 - cmpult $23, $21, $28 - cmpult $8, $27, $24 - addq $8, $28, $8 - addq $22, $24, $22 - mulq $5, $3, $18 - umulh $5, $3, $20 - cmplt $18, $31, $25 - cmplt $20, $31, $19 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $25, $20 - addq $22, $19, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $21 - addq $8, $17, $8 - addq $22, $21, $22 - mulq $6, $2, $27 - umulh $6, $2, $28 - cmplt $27, $31, $24 - cmplt $28, $31, $25 - addq $27, $27, $27 - addq $28, $28, $28 - addq $28, $24, $28 - addq $22, $25, $22 - addq $23, $27, $23 - addq $8, $28, $8 - cmpult $23, $27, $19 - cmpult $8, $28, $18 - addq $8, $19, $8 - addq $22, $18, $22 - mulq $7, $1, $20 - umulh $7, $1, $17 - cmplt $20, $31, $21 - cmplt $17, $31, $24 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $21, $17 - addq $22, $24, $22 - addq $23, $20, $23 - addq $8, $17, $8 - cmpult $23, $20, $25 - cmpult $8, $17, $27 - addq $8, $25, $8 - addq $22, $27, $22 - stq $23, 64($16) - bis $31, $31, $23 - mulq $5, $4, $28 - umulh $5, $4, $19 - cmplt $28, $31, $18 - cmplt $19, $31, $21 - addq $28, $28, $28 - addq $19, $19, $19 - addq $19, $18, $19 - addq $23, $21, $23 - addq $8, $28, $8 - addq $22, $19, $22 - cmpult $8, $28, $24 - cmpult $22, $19, $20 - addq $22, $24, $22 - addq $23, $20, $23 - mulq $6, $3, $17 - umulh $6, $3, $25 - cmplt $17, $31, $27 - cmplt $25, $31, $18 - addq $17, $17, $17 - addq $25, $25, $25 - addq $25, $27, $25 - addq $23, $18, $23 - addq $8, $17, $8 - addq $22, $25, $22 - cmpult $8, $17, $21 - cmpult $22, $25, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $7, $2, $19 - umulh $7, $2, $24 - cmplt $19, $31, $20 - cmplt $24, $31, $27 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $20, $24 - addq $23, $27, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $18 - cmpult $22, $24, $17 - addq $22, $18, $22 - addq $23, $17, $23 - stq $8, 72($16) - bis $31, $31, $8 - mulq $5, $5, $25 - umulh $5, $5, $21 - addq $22, $25, $22 - addq $23, $21, $23 - cmpult $22, $25, $28 - cmpult $23, $21, $20 - addq $23, $28, $23 - addq $8, $20, $8 - mulq $6, $4, $27 - umulh $6, $4, $19 - cmplt $27, $31, $24 - cmplt $19, $31, $18 - addq $27, $27, $27 - addq $19, $19, $19 - addq $19, $24, $19 - addq $8, $18, $8 - addq $22, $27, $22 - addq $23, $19, $23 - cmpult $22, $27, $17 - cmpult $23, $19, $25 - addq $23, $17, $23 - addq $8, $25, $8 - mulq $7, $3, $21 - umulh $7, $3, $28 - cmplt $21, $31, $20 - cmplt $28, $31, $24 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $20, $28 - addq $8, $24, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $18 - cmpult $23, $28, $27 - addq $23, $18, $23 - addq $8, $27, $8 - stq $22, 80($16) - bis $31, $31, $22 - mulq $6, $5, $19 - umulh $6, $5, $17 - cmplt $19, $31, $25 - cmplt $17, $31, $20 - addq $19, $19, $19 - addq $17, $17, $17 - addq $17, $25, $17 - addq $22, $20, $22 - addq $23, $19, $23 - addq $8, $17, $8 - cmpult $23, $19, $24 - cmpult $8, $17, $21 - addq $8, $24, $8 - addq $22, $21, $22 - mulq $7, $4, $28 - umulh $7, $4, $18 - cmplt $28, $31, $27 - cmplt $18, $31, $25 - addq $28, $28, $28 - addq $18, $18, $18 - addq $18, $27, $18 - addq $22, $25, $22 - addq $23, $28, $23 - addq $8, $18, $8 - cmpult $23, $28, $20 - cmpult $8, $18, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 88($16) - bis $31, $31, $23 - mulq $6, $6, $17 - umulh $6, $6, $24 - addq $8, $17, $8 - addq $22, $24, $22 - cmpult $8, $17, $21 - cmpult $22, $24, $27 - addq $22, $21, $22 - addq $23, $27, $23 - mulq $7, $5, $25 - umulh $7, $5, $28 - cmplt $25, $31, $18 - cmplt $28, $31, $20 - addq $25, $25, $25 - addq $28, $28, $28 - addq $28, $18, $28 - addq $23, $20, $23 - addq $8, $25, $8 - addq $22, $28, $22 - cmpult $8, $25, $19 - cmpult $22, $28, $17 - addq $22, $19, $22 - addq $23, $17, $23 - stq $8, 96($16) - bis $31, $31, $8 - mulq $7, $6, $24 - umulh $7, $6, $21 - cmplt $24, $31, $27 - cmplt $21, $31, $18 - addq $24, $24, $24 - addq $21, $21, $21 - addq $21, $27, $21 - addq $8, $18, $8 - addq $22, $24, $22 - addq $23, $21, $23 - cmpult $22, $24, $20 - cmpult $23, $21, $25 - addq $23, $20, $23 - addq $8, $25, $8 - stq $22, 104($16) - bis $31, $31, $22 - mulq $7, $7, $28 - umulh $7, $7, $19 - addq $23, $28, $23 - addq $8, $19, $8 - cmpult $23, $28, $17 - cmpult $8, $19, $27 - addq $8, $17, $8 - addq $22, $27, $22 - stq $23, 112($16) - stq $8, 120($16) - ret $31,($26),1 - .end bn_sqr_comba8 diff --git a/src/lib/libcrypto/bn/asm/alpha.s.works b/src/lib/libcrypto/bn/asm/alpha.s.works deleted file mode 100644 index ee6c587809..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.s.works +++ /dev/null @@ -1,533 +0,0 @@ - - # DEC Alpha assember - # The bn_div64 is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div64. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl deleted file mode 100644 index 4dc76e6b69..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/add.pl +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($cc,$t0,$cc); # add the borrows - &st($o0,&QWPw(0,$rp)); # save - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl deleted file mode 100644 index 7ec144377f..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div64 - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 -EOF - &asm_add($data); - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl deleted file mode 100644 index b182bae452..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$word,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($l0,$cc,$l0); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &cmpult($l0,$cc,$cc); - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl deleted file mode 100644 index e37f6315fb..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - &mul($a0,$word,($l0)=&NR(1)); - &sub($count,1,$count); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($r0,$l0,$r0); - &add($rp,$QWS,$rp); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl deleted file mode 100644 index 5efd201281..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl deleted file mode 100644 index 79d86dd25c..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl deleted file mode 100644 index 525ca7494b..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl deleted file mode 100644 index a55b696906..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl deleted file mode 100644 index bf33f5b503..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl deleted file mode 100644 index b4afe085f1..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl deleted file mode 100644 index d998da5c21..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl deleted file mode 100644 index 13bf516428..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/add.pl +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - -########################################################## - &set_label("loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &ld(($b0)=&NR(1),&QWPw(0,$bp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &ld(($b1)=&NR(1),&QWPw(1,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &ld(($b2)=&NR(1),&QWPw(2,$bp)); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &ld(($b3)=&NR(1),&QWPw(3,$bp)); - &st($o0,&QWPw(0,$rp)); &FR($o0); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &st($o1,&QWPw(0,$rp)); &FR($o1); - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &cmpult($o3,$cc,$cc); - &st($o3,&QWPw(0,$rp)); &FR($o3); - &add($cc,$t3,$cc); &FR($t3); - - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - ### - &bge($count,&label("loop")); - ### - &br(&label("finish")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($a0,$b0,$o0); - &sub($count,1,$count); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($rp,$QWS,$rp); - &st($o0,&QWPw(-1,$rp)); # save - &add($cc,$t0,$cc); # add the borrows - - ### - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl deleted file mode 100644 index e9e680897a..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div_words - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words - ldgp $29,0($27) -bn_div_words.ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - &asm_add($data); - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl deleted file mode 100644 index 76c926566c..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/mul.pl +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - ### wait 8 - &muh($a1,$word,($h1)=&NR(1)); &FR($a1); - &add($l0,$cc,$l0); ### wait 8 - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &cmpult($l0,$cc,$cc); ### wait 8 - &muh($a2,$word,($h2)=&NR(1)); &FR($a2); - &add($h0,$cc,$cc); &FR($h0); ### wait 8 - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($l1,$cc,$l1); ### wait 8 - &st($l0,&QWPw(0,$rp)); &FR($l0); - &cmpult($l1,$cc,$cc); ### wait 8 - &muh($a3,$word,($h3)=&NR(1)); &FR($a3); - &add($h1,$cc,$cc); &FR($h1); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($l2,$cc,$l2); - &st($l1,&QWPw(1,$rp)); &FR($l1); - &cmpult($l2,$cc,$cc); - &add($h2,$cc,$cc); &FR($h2); - &sub($count,4,$count); # count-=4 - &st($l2,&QWPw(2,$rp)); &FR($l2); - &add($l3,$cc,$l3); - &cmpult($l3,$cc,$cc); - &add($bp,4*$QWS,$bp); # count+=4 - &add($h3,$cc,$cc); &FR($h3); - &add($ap,4*$QWS,$ap); # count+=4 - &st($l3,&QWPw(3,$rp)); &FR($l3); - &add($rp,4*$QWS,$rp); # count+=4 - ### - &blt($count,&label("finish")); - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - &br(&label("finish")); -################################################## - -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - ### - ### - ### - &muh($a0,$word,($h0)=&NR(1)); - ### Wait 8 for next mul issue - &mul($a0,$word,($l0)=&NR(1)); &FR($a0) - &add($ap,$QWS,$ap); - ### Loose 12 until result is available - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &add($l0,$cc,$l0); - ### - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &cmpult($l0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl deleted file mode 100644 index 0d6df69bc4..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - &ld(($r0)=&NR(1),&QWPw(0,$rp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &muh($a0,$word,($h0)=&NR(1)); - &ld(($r1)=&NR(1),&QWPw(1,$rp)); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - ### - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &ld(($r2)=&NR(1),&QWPw(2,$rp)); - &muh($a1,$word,($h1)=&NR(1)); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &ld(($r3)=&NR(1),&QWPw(3,$rp)); - &add($r0,$l0,$r0); - &add($r1,$l1,$r1); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); - &muh($a2,$word,($h2)=&NR(1)); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h1,$t1,$h1); &FR($t1); - &add($h0,$cc,$cc); &FR($h0); - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($r1,$cc,$r1); - &cmpult($r1,$cc,$cc); - &add($r2,$l2,$r2); - &add($h1,$cc,$cc); &FR($h1); - &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); - &muh($a3,$word,($h3)=&NR(1)); - &add($r2,$cc,$r2); - &st($r0,&QWPw(0,$rp)); &FR($r0); - &add($h2,$t2,$h2); &FR($t2); - &st($r1,&QWPw(1,$rp)); &FR($r1); - &cmpult($r2,$cc,$cc); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($h2,$cc,$cc); &FR($h2); - &st($r2,&QWPw(2,$rp)); &FR($r2); - &sub($count,4,$count); # count-=4 - &add($rp,4*$QWS,$rp); # count+=4 - &add($r3,$l3,$r3); - &add($ap,4*$QWS,$ap); # count+=4 - &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); - &add($r3,$cc,$r3); - &add($h3,$t3,$h3); &FR($t3); - &cmpult($r3,$cc,$cc); - &st($r3,&QWPw(-1,$rp)); &FR($r3); - &add($h3,$cc,$cc); &FR($h3); - - ### - &blt($count,&label("finish")); - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - ### - ### - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &add($rp,$QWS,$rp); - &add($ap,$QWS,$ap); - &sub($count,1,$count); - ### wait 3 until l0 is available - &add($r0,$l0,$r0); - ### - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl deleted file mode 100644 index 9cc876ded4..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -# upto - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl deleted file mode 100644 index 79d86dd25c..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl deleted file mode 100644 index 525ca7494b..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl deleted file mode 100644 index a55b696906..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl deleted file mode 100644 index bf33f5b503..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl deleted file mode 100644 index b4afe085f1..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl deleted file mode 100644 index d998da5c21..0000000000 --- a/src/lib/libcrypto/bn/asm/alpha/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 26c2685a72..332ef3e91d 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); @@ -24,38 +25,25 @@ sub bn_mul_add_words { local($name)=@_; - &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); + $r="eax"; + $a="edx"; + $c="ecx"; if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); &bt(&DWP(0,"eax"),26); - &jnc(&label("maw_loop")); + &jnc(&label("maw_non_sse2")); - &movd("mm0",$w); # mm0 = w + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in - - &set_label("maw_sse2_loop",0); + &jmp(&label("maw_sse2_entry")); + + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] @@ -112,42 +100,82 @@ sub bn_mul_add_words &psrlq("mm1",32); # mm1 = carry6 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] &movd(&DWP(28,$r,"",0),"mm1"); - &add($r,32); + &lea($r,&DWP(32,$r)); &psrlq("mm1",32); # mm1 = carry_out - &sub("ecx",8); + &sub($c,8); + &jz(&label("maw_sse2_exit")); + &set_label("maw_sse2_entry"); + &test($c,0xfffffff8); + &jnz(&label("maw_sse2_unrolled")); + + &set_label("maw_sse2_loop",4); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &movd("mm3",&DWP(0,$r)); # mm3 = r[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm3"); # carry += r[i] + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); &jnz(&label("maw_sse2_loop")); - - &movd($c,"mm1"); # c = carry_out + &set_label("maw_sse2_exit"); + &movd("eax","mm1"); # c = carry_out &emms(); + &ret(); - &jmp(&label("maw_finish")); + &set_label("maw_non_sse2",16); } - &set_label("maw_loop",0); + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + &comment(""); + $Low="eax"; + $High="edx"; + $a="ebx"; + $w="ebp"; + $r="edi"; + $c="esi"; + + &xor($c,$c); # clear carry + &mov($r,&wparam(0)); # + + &mov("ecx",&wparam(2)); # + &mov($a,&wparam(1)); # + + &and("ecx",0xfffffff8); # num / 8 + &mov($w,&wparam(3)); # - &mov(&swtmp(0),"ecx"); # + &push("ecx"); # Up the stack for a tmp variable + + &jz(&label("maw_finish")); + + &set_label("maw_loop",16); for ($i=0; $i<32; $i+=4) { &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a + &mov("eax",&DWP($i,$a)); # *a &mul($w); # *a * w - &add("eax",$c); # L(t)+= *r - &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r + &add("eax",$c); # L(t)+= c &adc("edx",0); # H(t)+=carry - &add("eax",$c); # L(t)+=c + &add("eax",&DWP($i,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); + &mov(&DWP($i,$r),"eax"); # *r= L(t); &mov($c,"edx"); # c= H(t); } &comment(""); - &mov("ecx",&swtmp(0)); # - &add($a,32); - &add($r,32); &sub("ecx",8); + &lea($a,&DWP(32,$a)); + &lea($r,&DWP(32,$r)); &jnz(&label("maw_loop")); &set_label("maw_finish",0); @@ -160,16 +188,15 @@ sub bn_mul_add_words for ($i=0; $i<7; $i++) { &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a + &mov("eax",&DWP($i*4,$a)); # *a &mul($w); # *a * w &add("eax",$c); # L(t)+=c - &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &add("eax",$c); + &add("eax",&DWP($i*4,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); + &mov(&DWP($i*4,$r),"eax"); # *r= L(t); + &mov($c,"edx"); # c= H(t); &jz(&label("maw_end")) if ($i != 7-1); } &set_label("maw_end",0); @@ -184,7 +211,45 @@ sub bn_mul_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("mw_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w + &pxor("mm1","mm1"); # mm1 = carry = 0 + + &set_label("mw_sse2_loop",16); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); + &jnz(&label("mw_sse2_loop")); + + &movd("eax","mm1"); # return carry + &emms(); + &ret(); + &set_label("mw_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $Low="eax"; @@ -257,7 +322,40 @@ sub bn_sqr_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("sqr_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + + &set_label("sqr_sse2_loop",16); + &movd("mm0",&DWP(0,$a)); # mm0 = a[i] + &pmuludq("mm0","mm0"); # a[i] *= a[i] + &lea($a,&DWP(4,$a)); # a++ + &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] + &sub($c,1); + &lea($r,&DWP(8,$r)); # r += 2 + &jnz(&label("sqr_sse2_loop")); + + &emms(); + &ret(); + &set_label("sqr_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $r="esi"; @@ -313,12 +411,13 @@ sub bn_div_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,""); &mov("edx",&wparam(0)); # &mov("eax",&wparam(1)); # - &mov("ebx",&wparam(2)); # - &div("ebx"); - &function_end($name); + &mov("ecx",&wparam(2)); # + &div("ecx"); + &ret(); + &function_end_B($name); } sub bn_add_words diff --git a/src/lib/libcrypto/bn/asm/bn-alpha.pl b/src/lib/libcrypto/bn/asm/bn-alpha.pl deleted file mode 100644 index 302edf2376..0000000000 --- a/src/lib/libcrypto/bn/asm/bn-alpha.pl +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -$d=&data(); -$d =~ s/CC/0/g; -$d =~ s/R1/1/g; -$d =~ s/R2/2/g; -$d =~ s/R3/3/g; -$d =~ s/R4/4/g; -$d =~ s/L1/5/g; -$d =~ s/L2/6/g; -$d =~ s/L3/7/g; -$d =~ s/L4/8/g; -$d =~ s/O1/22/g; -$d =~ s/O2/23/g; -$d =~ s/O3/24/g; -$d =~ s/O4/25/g; -$d =~ s/A1/20/g; -$d =~ s/A2/21/g; -$d =~ s/A3/27/g; -$d =~ s/A4/28/g; -if (0){ -} - -print $d; - -sub data - { - local($data)=<<'EOF'; - - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - .align 3 -$42: - mulq $A1,$19,$L1 # 1 2 1 ###### - ldq $A2,8($17) # 2 1 - ldq $R2,8($16) # 2 1 - umulh $A1,$19,$A1 # 1 2 ###### - ldq $A3,16($17) # 3 1 - ldq $R3,16($16) # 3 1 - mulq $A2,$19,$L2 # 2 2 1 ###### - ldq $A4,24($17) # 4 1 - addq $R1,$L1,$R1 # 1 2 2 - ldq $R4,24($16) # 4 1 - umulh $A2,$19,$A2 # 2 2 ###### - cmpult $R1,$L1,$O1 # 1 2 3 1 - addq $A1,$O1,$A1 # 1 3 1 - addq $R1,$CC,$R1 # 1 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ###### - cmpult $R1,$CC,$CC # 1 2 3 2 - addq $R2,$L2,$R2 # 2 2 2 - addq $A1,$CC,$CC # 1 3 2 - cmpult $R2,$L2,$O2 # 2 2 3 1 - addq $A2,$O2,$A2 # 2 3 1 - umulh $A3,$19,$A3 # 3 2 ###### - addq $R2,$CC,$R2 # 2 2 3 1 - cmpult $R2,$CC,$CC # 2 2 3 2 - subq $18,4,$18 - mulq $A4,$19,$L4 # 4 2 1 ###### - addq $A2,$CC,$CC # 2 3 2 - addq $R3,$L3,$R3 # 3 2 2 - addq $16,32,$16 - cmpult $R3,$L3,$O3 # 3 2 3 1 - stq $R1,-32($16) # 1 2 4 - umulh $A4,$19,$A4 # 4 2 ###### - addq $A3,$O3,$A3 # 3 3 1 - addq $R3,$CC,$R3 # 3 2 3 1 - stq $R2,-24($16) # 2 2 4 - cmpult $R3,$CC,$CC # 3 2 3 2 - stq $R3,-16($16) # 3 2 4 - addq $R4,$L4,$R4 # 4 2 2 - addq $A3,$CC,$CC # 3 3 2 - cmpult $R4,$L4,$O4 # 4 2 3 1 - addq $17,32,$17 - addq $A4,$O4,$A4 # 4 3 1 - addq $R4,$CC,$R4 # 4 2 3 1 - cmpult $R4,$CC,$CC # 4 2 3 2 - stq $R4,-8($16) # 4 2 4 - addq $A4,$CC,$CC # 4 3 2 - blt $18,$43 - - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $A1,0($17) # 4 1 - ldq $R1,0($16) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $A1,$19,$A1 # 4 2 - addq $R1,$L1,$R1 # 4 2 2 - cmpult $R1,$L1,$O1 # 4 2 3 1 - addq $A1,$O1,$A1 # 4 3 1 - addq $R1,$CC,$R1 # 4 2 3 1 - cmpult $R1,$CC,$CC # 4 2 3 2 - addq $A1,$CC,$CC # 4 3 2 - stq $R1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$142: - - mulq $A1,$19,$L1 # 1 2 1 ##### - ldq $A2,8($17) # 2 1 - ldq $A3,16($17) # 3 1 - umulh $A1,$19,$A1 # 1 2 ##### - ldq $A4,24($17) # 4 1 - mulq $A2,$19,$L2 # 2 2 1 ##### - addq $L1,$CC,$L1 # 1 2 3 1 - subq $18,4,$18 - cmpult $L1,$CC,$CC # 1 2 3 2 - umulh $A2,$19,$A2 # 2 2 ##### - addq $A1,$CC,$CC # 1 3 2 - addq $17,32,$17 - addq $L2,$CC,$L2 # 2 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ##### - cmpult $L2,$CC,$CC # 2 2 3 2 - addq $A2,$CC,$CC # 2 3 2 - addq $16,32,$16 - umulh $A3,$19,$A3 # 3 2 ##### - stq $L1,-32($16) # 1 2 4 - mulq $A4,$19,$L4 # 4 2 1 ##### - addq $L3,$CC,$L3 # 3 2 3 1 - stq $L2,-24($16) # 2 2 4 - cmpult $L3,$CC,$CC # 3 2 3 2 - umulh $A4,$19,$A4 # 4 2 ##### - addq $A3,$CC,$CC # 3 3 2 - stq $L3,-16($16) # 3 2 4 - addq $L4,$CC,$L4 # 4 2 3 1 - cmpult $L4,$CC,$CC # 4 2 3 2 - - addq $A4,$CC,$CC # 4 3 2 - - stq $L4,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $A1,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $A1,0($17) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - umulh $A1,$19,$A1 # 4 2 - addq $L1,$CC,$L1 # 4 2 3 1 - addq $16,8,$16 - cmpult $L1,$CC,$CC # 4 2 3 2 - addq $17,8,$17 - addq $A1,$CC,$CC # 4 3 2 - stq $L1,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$542: - mulq $A1,$A1,$L1 ###### - ldq $A2,8($17) # 1 1 - subq $18,4 - umulh $A1,$A1,$R1 ###### - ldq $A3,16($17) # 1 1 - mulq $A2,$A2,$L2 ###### - ldq $A4,24($17) # 1 1 - stq $L1,0($16) # r[0] - umulh $A2,$A2,$R2 ###### - stq $R1,8($16) # r[1] - mulq $A3,$A3,$L3 ###### - stq $L2,16($16) # r[0] - umulh $A3,$A3,$R3 ###### - stq $R2,24($16) # r[1] - mulq $A4,$A4,$L4 ###### - stq $L3,32($16) # r[0] - umulh $A4,$A4,$R4 ###### - stq $R3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $L4,-16($16) # r[0] - stq $R4,-8($16) # r[1] - - blt $18,$543 - ldq $A1,0($17) # 1 1 - br $542 - -$442: - ldq $A1,0($17) # a[0] - mulq $A1,$A1,$L1 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $A1,$A1,$R1 # a[0]*w high part r3 - stq $L1,-16($16) # r[0] - stq $R1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$901: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - addq $R1,$L1,$R1 # r=a+b; - subq $19,1,$19 # loop-- - addq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $R1,$L1,$O1 # did we overflow? - cmpult $R1,$CC,$CC # overflow? - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $CC,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - br $800 - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$801: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $801 - .align 4 -$845: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - cmpult $L1,$R1,$O1 # will we borrow? - subq $L1,$R1,$R1 # r=a-b; - subq $19,1,$19 # loop-- - cmpult $R1,$CC,$O2 # will we borrow? - subq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $O2,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$845 - ret $31,($26),1 # else exit - -$800: - addq $19,4,$19 - bgt $19,$845 # goto tail code - ret $31,($26),1 # else exit - .end bn_sub_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - return($data); - } - diff --git a/src/lib/libcrypto/bn/asm/ca.pl b/src/lib/libcrypto/bn/asm/ca.pl deleted file mode 100644 index c1ce67a6b4..0000000000 --- a/src/lib/libcrypto/bn/asm/ca.pl +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; -require "alpha/mul_add.pl"; -require "alpha/mul.pl"; -require "alpha/sqr.pl"; -require "alpha/add.pl"; -require "alpha/sub.pl"; -require "alpha/mul_c8.pl"; -require "alpha/mul_c4.pl"; -require "alpha/sqr_c4.pl"; -require "alpha/sqr_c8.pl"; -require "alpha/div.pl"; - -&asm_init($ARGV[0],$0); - -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_mul_add_words("bn_mul_add_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_div_words("bn_div_words"); -&bn_mul_comba8("bn_mul_comba8"); -&bn_mul_comba4("bn_mul_comba4"); -&bn_sqr_comba4("bn_sqr_comba4"); -&bn_sqr_comba8("bn_sqr_comba8"); - -&asm_finish(); - diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl index 5d962cb957..57101a6bd7 100644 --- a/src/lib/libcrypto/bn/asm/co-586.pl +++ b/src/lib/libcrypto/bn/asm/co-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); diff --git a/src/lib/libcrypto/bn/asm/co-alpha.pl b/src/lib/libcrypto/bn/asm/co-alpha.pl deleted file mode 100644 index 67dad3e3d5..0000000000 --- a/src/lib/libcrypto/bn/asm/co-alpha.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; - -&asm_init($ARGV[0],$0); - -print &bn_sub_words("bn_sub_words"); - -&asm_finish(); - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - $cc="r0"; - $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13"; - $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14"; - $a2="r3"; $b2="r7"; $r2="r11"; - $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15"; - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$a0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); # add the borrows - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$a1); # will we borrow? - &add($b1,$t1,$cc); # add the borrows - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($a0,&QWPw(0,$rp)); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$a2); # will we borrow? - &add($b2,$tmp,$cc); # add the borrows - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($a1,&QWPw(1,$rp)); # save - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$a3); # will we borrow? - &add($b3,$t3,$cc); # add the borrows - - &st($a2,&QWPw(2,$rp)); # save - &sub($count,4,$count); # count-=4 - &st($a3,&QWPw(3,$rp)); # save - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - } - diff --git a/src/lib/libcrypto/bn/asm/mips1.s b/src/lib/libcrypto/bn/asm/mips1.s deleted file mode 100644 index 44fa1254c7..0000000000 --- a/src/lib/libcrypto/bn/asm/mips1.s +++ /dev/null @@ -1,539 +0,0 @@ -/* This assember is for R2000/R3000 machines, or higher ones that do - * no want to do any 64 bit arithmatic. - * Make sure that the SSLeay bignum library is compiled with - * THIRTY_TWO_BIT set. - * This must either be compiled with the system CC, or, if you use GNU gas, - * cc -E mips1.s|gas -o mips1.o - */ - .set reorder - .set noat - -#define R1 $1 -#define CC $2 -#define R2 $3 -#define R3 $8 -#define R4 $9 -#define L1 $10 -#define L2 $11 -#define L3 $12 -#define L4 $13 -#define H1 $14 -#define H2 $15 -#define H3 $24 -#define H4 $25 - -#define P1 $4 -#define P2 $5 -#define P3 $6 -#define P4 $7 - - .align 2 - .ent bn_mul_add_words - .globl bn_mul_add_words -.text -bn_mul_add_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - #blt P3,4,$lab34 - - subu R1,P3,4 - move CC,$0 - bltz R1,$lab34 -$lab2: - lw R1,0(P1) - lw L1,0(P2) - lw R2,4(P1) - lw L2,4(P2) - lw R3,8(P1) - lw L3,8(P2) - lw R4,12(P1) - lw L4,12(P2) - multu L1,P4 - addu R1,R1,CC - mflo L1 - sltu CC,R1,CC - addu R1,R1,L1 - mfhi H1 - sltu L1,R1,L1 - sw R1,0(P1) - addu CC,CC,L1 - multu L2,P4 - addu CC,H1,CC - mflo L2 - addu R2,R2,CC - sltu CC,R2,CC - mfhi H2 - addu R2,R2,L2 - addu P2,P2,16 - sltu L2,R2,L2 - sw R2,4(P1) - addu CC,CC,L2 - multu L3,P4 - addu CC,H2,CC - mflo L3 - addu R3,R3,CC - sltu CC,R3,CC - mfhi H3 - addu R3,R3,L3 - addu P1,P1,16 - sltu L3,R3,L3 - sw R3,-8(P1) - addu CC,CC,L3 - multu L4,P4 - addu CC,H3,CC - mflo L4 - addu R4,R4,CC - sltu CC,R4,CC - mfhi H4 - addu R4,R4,L4 - subu P3,P3,4 - sltu L4,R4,L4 - addu CC,CC,L4 - addu CC,H4,CC - - subu R1,P3,4 - sw R4,-4(P1) # delay slot - bgez R1,$lab2 - - bleu P3,0,$lab3 - .align 2 -$lab33: - lw L1,0(P2) - lw R1,0(P1) - multu L1,P4 - addu R1,R1,CC - sltu CC,R1,CC - addu P1,P1,4 - mflo L1 - mfhi H1 - addu R1,R1,L1 - addu P2,P2,4 - sltu L1,R1,L1 - subu P3,P3,1 - addu CC,CC,L1 - sw R1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab33 - j $31 - .align 2 -$lab3: - j $31 - .align 2 -$lab34: - bgt P3,0,$lab33 - j $31 - .end bn_mul_add_words - - .align 2 - # Program Unit: bn_mul_words - .ent bn_mul_words - .globl bn_mul_words -.text -bn_mul_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - move CC,$0 - bltz P3,$lab45 -$lab44: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - multu L1,P4 - subu P3,P3,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - multu L2,P4 - sltu CC,L1,CC - sw L1,0(P1) - addu CC,H1,CC - mflo L2 - mfhi H2 - addu L2,L2,CC - multu L3,P4 - sltu CC,L2,CC - sw L2,4(P1) - addu CC,H2,CC - mflo L3 - mfhi H3 - addu L3,L3,CC - multu L4,P4 - sltu CC,L3,CC - sw L3,8(P1) - addu CC,H3,CC - mflo L4 - mfhi H4 - addu L4,L4,CC - addu P1,P1,16 - sltu CC,L4,CC - addu P2,P2,16 - addu CC,H4,CC - sw L4,-4(P1) - - bgez P3,$lab44 - b $lab45 -$lab46: - lw L1,0(P2) - addu P1,P1,4 - multu L1,P4 - addu P2,P2,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - subu P3,P3,1 - sltu CC,L1,CC - sw L1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab46 - j $31 -$lab45: - addu P3,P3,4 - bgtz P3,$lab46 - j $31 - .align 2 - .end bn_mul_words - - # Program Unit: bn_sqr_words - .ent bn_sqr_words - .globl bn_sqr_words -.text -bn_sqr_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - bltz P3,$lab55 -$lab54: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - - multu L1,L1 - subu P3,P3,4 - mflo L1 - mfhi H1 - sw L1,0(P1) - sw H1,4(P1) - - multu L2,L2 - addu P1,P1,32 - mflo L2 - mfhi H2 - sw L2,-24(P1) - sw H2,-20(P1) - - multu L3,L3 - addu P2,P2,16 - mflo L3 - mfhi H3 - sw L3,-16(P1) - sw H3,-12(P1) - - multu L4,L4 - - mflo L4 - mfhi H4 - sw L4,-8(P1) - sw H4,-4(P1) - - bgtz P3,$lab54 - b $lab55 -$lab56: - lw L1,0(P2) - addu P1,P1,8 - multu L1,L1 - addu P2,P2,4 - subu P3,P3,1 - mflo L1 - mfhi H1 - sw L1,-8(P1) - sw H1,-4(P1) - - bgtz P3,$lab56 - j $31 -$lab55: - addu P3,P3,4 - bgtz P3,$lab56 - j $31 - .align 2 - .end bn_sqr_words - - # Program Unit: bn_add_words - .ent bn_add_words - .globl bn_add_words -.text -bn_add_words: # 0x590 - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P4,P4,4 - move CC,$0 - bltz P4,$lab65 -$lab64: - lw L1,0(P2) - lw R1,0(P3) - lw L2,4(P2) - lw R2,4(P3) - - addu L1,L1,CC - lw L3,8(P2) - sltu CC,L1,CC - addu L1,L1,R1 - sltu R1,L1,R1 - lw R3,8(P3) - addu CC,CC,R1 - lw L4,12(P2) - - addu L2,L2,CC - lw R4,12(P3) - sltu CC,L2,CC - addu L2,L2,R2 - sltu R2,L2,R2 - sw L1,0(P1) - addu CC,CC,R2 - addu P1,P1,16 - addu L3,L3,CC - sw L2,-12(P1) - - sltu CC,L3,CC - addu L3,L3,R3 - sltu R3,L3,R3 - addu P2,P2,16 - addu CC,CC,R3 - - addu L4,L4,CC - addu P3,P3,16 - sltu CC,L4,CC - addu L4,L4,R4 - subu P4,P4,4 - sltu R4,L4,R4 - sw L3,-8(P1) - addu CC,CC,R4 - sw L4,-4(P1) - - bgtz P4,$lab64 - b $lab65 -$lab66: - lw L1,0(P2) - lw R1,0(P3) - addu L1,L1,CC - addu P1,P1,4 - sltu CC,L1,CC - addu P2,P2,4 - addu P3,P3,4 - addu L1,L1,R1 - subu P4,P4,1 - sltu R1,L1,R1 - sw L1,-4(P1) - addu CC,CC,R1 - - bgtz P4,$lab66 - j $31 -$lab65: - addu P4,P4,4 - bgtz P4,$lab66 - j $31 - .end bn_add_words - - # Program Unit: bn_div64 - .set at - .set reorder - .text - .align 2 - .globl bn_div64 - # 321 { - .ent bn_div64 2 -bn_div64: - subu $sp, 64 - sw $31, 56($sp) - sw $16, 48($sp) - .mask 0x80010000, -56 - .frame $sp, 64, $31 - move $9, $4 - move $12, $5 - move $16, $6 - # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t; - move $31, $0 - # 323 int i,count=2; - li $13, 2 - # 324 - # 325 if (d == 0) return(BN_MASK2); - bne $16, 0, $80 - li $2, -1 - b $93 -$80: - # 326 - # 327 i=BN_num_bits_word(d); - move $4, $16 - sw $31, 16($sp) - sw $9, 24($sp) - sw $12, 32($sp) - sw $13, 40($sp) - .livereg 0x800ff0e,0xfff - jal BN_num_bits_word - li $4, 32 - lw $31, 16($sp) - lw $9, 24($sp) - lw $12, 32($sp) - lw $13, 40($sp) - move $3, $2 - # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<= d) h-=d; - bltu $9, $16, $82 - subu $9, $9, $16 -$82: - # 337 - # 338 if (i) - beq $3, 0, $83 - # 339 { - # 340 d<<=i; - sll $16, $16, $3 - # 341 h=(h<>(BN_BITS2-i)); - sll $24, $9, $3 - subu $25, $4, $3 - srl $14, $12, $25 - or $9, $24, $14 - # 342 l<<=i; - sll $12, $12, $3 - # 343 } -$83: - # 344 dh=(d&BN_MASK2h)>>BN_BITS4; - # 345 dl=(d&BN_MASK2l); - and $8, $16, -65536 - srl $8, $8, 16 - and $10, $16, 65535 - li $6, -65536 -$84: - # 346 for (;;) - # 347 { - # 348 if ((h>>BN_BITS4) == dh) - srl $15, $9, 16 - bne $8, $15, $85 - # 349 q=BN_MASK2l; - li $5, 65535 - b $86 -$85: - # 350 else - # 351 q=h/dh; - divu $5, $9, $8 -$86: - # 352 - # 353 for (;;) - # 354 { - # 355 t=(h-q*dh); - mul $4, $5, $8 - subu $2, $9, $4 - move $3, $2 - # 356 if ((t&BN_MASK2h) || - # 357 ((dl*q) <= ( - # 358 (t<>BN_BITS4)))) - and $25, $2, $6 - bne $25, $0, $87 - mul $24, $10, $5 - sll $14, $3, 16 - and $15, $12, $6 - srl $25, $15, 16 - addu $15, $14, $25 - bgtu $24, $15, $88 -$87: - # 360 break; - mul $3, $10, $5 - b $89 -$88: - # 361 q--; - addu $5, $5, -1 - # 362 } - b $86 -$89: - # 363 th=q*dh; - # 364 tl=q*dl; - # 365 t=(tl>>BN_BITS4); - # 366 tl=(tl<>BN_BITS4))&BN_MASK2; - sll $24, $9, 16 - srl $15, $12, 16 - or $9, $24, $15 - # 382 l=(l&BN_MASK2l)<15) { - &data_byte(@str[0..15]); - foreach (0..15) { shift @str; } - } - &data_byte(@str) if (@str); -} - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# This is a "teaser" code, as it can be improved in several ways... -# First of all non-SSE2 path should be implemented (yes, for now it -# performs Montgomery multiplication/convolution only on SSE2-capable -# CPUs such as P4, others fall down to original code). Then inner loop -# can be unrolled and modulo-scheduled to improve ILP and possibly -# moved to 128-bit XMM register bank (though it would require input -# rearrangement and/or increase bus bandwidth utilization). Dedicated -# squaring procedure should give further performance improvement... -# Yet, for being draft, the code improves rsa512 *sign* benchmark by -# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-) - -# December 2006 -# -# Modulo-scheduling SSE2 loops results in further 15-20% improvement. -# Integer-only code [being equipped with dedicated squaring procedure] -# gives ~40% on rsa512 sign benchmark... - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&function_begin("bn_mul_mont"); - -$i="edx"; -$j="ecx"; -$ap="esi"; $tp="esi"; # overlapping variables!!! -$rp="edi"; $bp="edi"; # overlapping variables!!! -$np="ebp"; -$num="ebx"; - -$_num=&DWP(4*0,"esp"); # stack top layout -$_rp=&DWP(4*1,"esp"); -$_ap=&DWP(4*2,"esp"); -$_bp=&DWP(4*3,"esp"); -$_np=&DWP(4*4,"esp"); -$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp"); -$_sp=&DWP(4*6,"esp"); -$_bpend=&DWP(4*7,"esp"); -$frame=32; # size of above frame rounded up to 16n - - &xor ("eax","eax"); - &mov ("edi",&wparam(5)); # int num - &cmp ("edi",4); - &jl (&label("just_leave")); - - &lea ("esi",&wparam(0)); # put aside pointer to argument block - &lea ("edx",&wparam(1)); # load ap - &mov ("ebp","esp"); # saved stack pointer! - &add ("edi",2); # extra two words on top of tp - &neg ("edi"); - &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) - &neg ("edi"); - - # minimize cache contention by arraning 2K window between stack - # pointer and ap argument [np is also position sensitive vector, - # but it's assumed to be near ap, as it's allocated at ~same - # time]. - &mov ("eax","esp"); - &sub ("eax","edx"); - &and ("eax",2047); - &sub ("esp","eax"); # this aligns sp and ap modulo 2048 - - &xor ("edx","esp"); - &and ("edx",2048); - &xor ("edx",2048); - &sub ("esp","edx"); # this splits them apart modulo 4096 - - &and ("esp",-64); # align to cache line - - ################################# load argument block... - &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp - &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap - &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp - &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np - &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 - #&mov ("edi",&DWP(5*4,"esi"));# int num - - &mov ("esi",&DWP(0,"esi")); # pull n0[0] - &mov ($_rp,"eax"); # ... save a copy of argument block - &mov ($_ap,"ebx"); - &mov ($_bp,"ecx"); - &mov ($_np,"edx"); - &mov ($_n0,"esi"); - &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling - #&mov ($_num,$num); # redundant as $num is not reused - &mov ($_sp,"ebp"); # saved stack pointer! - -if($sse2) { -$acc0="mm0"; # mmx register bank layout -$acc1="mm1"; -$car0="mm2"; -$car1="mm3"; -$mul0="mm4"; -$mul1="mm5"; -$temp="mm6"; -$mask="mm7"; - - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"eax"),26); - &jnc (&label("non_sse2")); - - &mov ("eax",-1); - &movd ($mask,"eax"); # mask 32 lower bits - - &mov ($ap,$_ap); # load input pointers - &mov ($bp,$_bp); - &mov ($np,$_np); - - &xor ($i,$i); # i=0 - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp)); # bp[0] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($car1,&DWP(0,$np)); # np[0] - - &pmuludq($mul1,$mul0); # ap[0]*bp[0] - &movq ($car0,$mul1); - &movq ($acc0,$mul1); # I wish movd worked for - &pand ($acc0,$mask); # inter-register transfers - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0 - &paddq ($car1,$acc0); - - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &inc ($j); # j++ -&set_label("1st",16); - &pmuludq($acc0,$mul0); # ap[j]*bp[0] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[0]; - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]= - &psrlq ($car1,32); - - &lea ($j,&DWP(1,$j)); - &cmp ($j,$num); - &jl (&label("1st")); - - &pmuludq($acc0,$mul0); # ap[num-1]*bp[0] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[0]; - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &paddq ($car1,$car0); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &inc ($i); # i++ -&set_label("outer"); - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($temp,&DWP($frame,"esp")); # tp[0] - &movd ($car1,&DWP(0,$np)); # np[0] - &pmuludq($mul1,$mul0); # ap[0]*bp[i] - - &paddq ($mul1,$temp); # +=tp[0] - &movq ($acc0,$mul1); - &movq ($car0,$mul1); - &pand ($acc0,$mask); - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); - &paddq ($car1,$acc0); - - &movd ($temp,&DWP($frame+4,"esp")); # tp[1] - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[1] - - &inc ($j); # j++ - &dec ($num); -&set_label("inner"); - &pmuludq($acc0,$mul0); # ap[j]*bp[i] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1] - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j] - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]= - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[j+1] - - &dec ($num); - &lea ($j,&DWP(1,$j)); # j++ - &jnz (&label("inner")); - - &mov ($num,$j); - &pmuludq($acc0,$mul0); # ap[num-1]*bp[i] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1] - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - &psrlq ($car0,32); - &psrlq ($car1,32); - - &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num] - &paddq ($car1,$car0); - &paddq ($car1,$temp); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &lea ($i,&DWP(1,$i)); # i++ - &cmp ($i,$num); - &jle (&label("outer")); - - &emms (); # done with mmx bank - &jmp (&label("common_tail")); - -&set_label("non_sse2",16); -} - -if (0) { - &mov ("esp",$_sp); - &xor ("eax","eax"); # signal "not fast enough [yet]" - &jmp (&label("just_leave")); - # While the below code provides competitive performance for - # all key lengthes on modern Intel cores, it's still more - # than 10% slower for 4096-bit key elsewhere:-( "Competitive" - # means compared to the original integer-only assembler. - # 512-bit RSA sign is better by ~40%, but that's about all - # one can say about all CPUs... -} else { -$inp="esi"; # integer path uses these registers differently -$word="edi"; -$carry="ebp"; - - &mov ($inp,$_ap); - &lea ($carry,&DWP(1,$num)); - &mov ($word,$_bp); - &xor ($j,$j); # j=0 - &mov ("edx",$inp); - &and ($carry,1); # see if num is even - &sub ("edx",$word); # see if ap==bp - &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num] - &or ($carry,"edx"); - &mov ($word,&DWP(0,$word)); # bp[0] - &jz (&label("bn_sqr_mont")); - &mov ($_bpend,"eax"); - &mov ("eax",&DWP(0,$inp)); - &xor ("edx","edx"); - -&set_label("mull",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[0] - &add ($carry,"eax"); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("mull")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[0] - &mov ($word,$_n0); - &add ("eax",$carry); - &mov ($inp,$_np); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]= - &xor ($j,$j); - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mov ("eax",&DWP(0,$inp)); # np[0] - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &inc ($j); - - &jmp (&label("2ndmadd")); - -&set_label("1stmadd",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[i] - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("1stmadd")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[i] - &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &add ($carry,"eax"); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &xor ($j,$j); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]= - &adc ($j,0); - &mov ("eax",&DWP(0,$inp)); # np[0] - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &mov ($j,1); - -&set_label("2ndmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]= - &jl (&label("2ndmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &xor ("eax","eax"); - &mov ($j,$_bp); # &bp[i] - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &lea ($j,&DWP(4,$j)); - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$_bpend); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(0,$j)); # bp[i+1] - &mov ($inp,$_ap); - &mov ($_bp,$j); # &bp[++i] - &xor ($j,$j); - &xor ("edx","edx"); - &mov ("eax",&DWP(0,$inp)); - &jmp (&label("1stmadd")); - -&set_label("bn_sqr_mont",16); -$sbit=$num; - &mov ($_num,$num); - &mov ($_bp,$j); # i=0 - - &mov ("eax",$word); # ap[0] - &mul ($word); # ap[0]*ap[0] - &mov (&DWP($frame,"esp"),"eax"); # tp[0]= - &mov ($sbit,"edx"); - &shr ("edx",1); - &and ($sbit,1); - &inc ($j); -&set_label("sqr",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[0] - &add ("eax",$carry); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &shr ("eax",31); - &cmp ($j,$_num); - &mov ($sbit,"eax"); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("sqr")); - - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1] - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*ap[0] - &add ("eax",$carry); - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - &shr ("eax",31); - &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]= - - &lea ($carry,&DWP(0,"eax","edx",2)); - &mov ("eax",&DWP(0,$inp)); # np[0] - &shr ("edx",31); - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]= - &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ($num,$j); - &adc ("edx",0); - &mov ("eax",&DWP(4,$inp)); # np[1] - &mov ($j,1); - -&set_label("3rdmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]= - - &mov ($carry,"edx"); - &mul ($word); # np[j+1]*m - &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1] - &lea ($j,&DWP(2,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]= - &jl (&label("3rdmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &mov ($j,$_bp); # i - &xor ("eax","eax"); - &mov ($inp,$_ap); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$num); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(4,$inp,$j,4)); # ap[i] - &lea ($j,&DWP(1,$j)); - &mov ("eax",$word); - &mov ($_bp,$j); # ++i - &mul ($word); # ap[i]*ap[i] - &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i] - &adc ("edx",0); - &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]= - &xor ($carry,$carry); - &cmp ($j,$num); - &lea ($j,&DWP(1,$j)); - &je (&label("sqrlast")); - - &mov ($sbit,"edx"); # zaps $num - &shr ("edx",1); - &and ($sbit,1); -&set_label("sqradd",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[i] - &add ("eax",$carry); - &lea ($carry,&DWP(0,"eax","eax")); - &adc ("edx",0); - &shr ("eax",31); - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("eax",0); - &add ($carry,$sbit); - &adc ("eax",0); - &cmp ($j,$_num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &mov ($sbit,"eax"); - &jle (&label("sqradd")); - - &mov ($carry,"edx"); - &lea ("edx",&DWP(0,$sbit,"edx",2)); - &shr ($carry,31); -&set_label("sqrlast"); - &mov ($word,$_n0); - &mov ($inp,$_np); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num] - &mov ("eax",&DWP(0,$inp)); # np[0] - &adc ($carry,0); - &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]= - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &lea ($num,&DWP(-1,$j)); - &adc ("edx",0); - &mov ($j,1); - &mov ("eax",&DWP(4,$inp)); # np[1] - - &jmp (&label("3rdmadd")); -} - -&set_label("common_tail",16); - &mov ($np,$_np); # load modulus pointer - &mov ($rp,$_rp); # load result pointer - &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped] - - &mov ("eax",&DWP(0,$tp)); # tp[0] - &mov ($j,$num); # j=num-1 - &xor ($i,$i); # i=0 and clear CF! - -&set_label("sub",16); - &sbb ("eax",&DWP(0,$np,$i,4)); - &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i] - &dec ($j); # doesn't affect CF! - &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1] - &lea ($i,&DWP(1,$i)); # i++ - &jge (&label("sub")); - - &sbb ("eax",0); # handle upmost overflow bit - &and ($tp,"eax"); - ¬ ("eax"); - &mov ($np,$rp); - &and ($np,"eax"); - &or ($tp,$np); # tp=carry?tp:rp - -&set_label("copy",16); # copy or in-place refresh - &mov ("eax",&DWP(0,$tp,$num,4)); - &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i] - &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector - &dec ($num); - &jge (&label("copy")); - - &mov ("esp",$_sp); # pull saved stack pointer - &mov ("eax",1); -&set_label("just_leave"); -&function_end("bn_mul_mont"); - -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/src/lib/libcrypto/bn/asm/pa-risc.s b/src/lib/libcrypto/bn/asm/pa-risc.s deleted file mode 100644 index 775130a191..0000000000 --- a/src/lib/libcrypto/bn/asm/pa-risc.s +++ /dev/null @@ -1,710 +0,0 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_add_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r25),%r29 - ldo 12(%r26),%r23 - fldws -16(0,%r30),%fr11L -L$0002 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0005 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r26),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r26) - ldw -8(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0010 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -8(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-8(0,%r23) - ldw -4(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0015 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -4(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-4(0,%r23) - ldw 0(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0020 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r23) - ldo 16(%r29),%r29 - ldo 16(%r25),%r25 - ldo 16(%r23),%r23 - bl L$0002,0 - ldo 16(%r26),%r26 -L$0003 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r26),%r29 - ldo 12(%r25),%r23 - fldws -16(0,%r30),%fr11L -L$0026 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0029 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r26) - ldw -8(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0033 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-8(0,%r29) - ldw -4(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0037 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-4(0,%r29) - ldw 0(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0041 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r29) - ldo 16(%r23),%r23 - ldo 16(%r25),%r25 - ldo 16(%r29),%r29 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR -bn_sqr_words - .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r23 - ldo 12(%r25),%r28 -L$0046 - ldw 0(0,%r25),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,0(0,%r26) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-24(0,%r23) - ldw -8(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-20(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-16(0,%r23) - ldw -4(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-12(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-8(0,%r23) - ldw 0(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-4(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,0(0,%r23) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r23),%r23 - bl L$0046,0 - ldo 32(%r26),%r26 -L$0057 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ - - .align 4 -L$C0000 - .STRING "Division would overflow\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 - .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0059 - ldi 2,%r6 - bl L$0076,0 - ldi -1,%r28 -L$0059 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - ldi 32,%r19 - comb,= %r19,%r28,L$0060 - subi 31,%r28,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0060 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0060 - comb,>> %r5,%r3,L$0061 - subi 32,%r28,%r28 - sub %r3,%r5,%r3 -L$0061 - comib,= 0,%r28,L$0062 - subi 31,%r28,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r28,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0062 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0063 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0066 - copy %r3,%r26 - bl L$0067,0 - zdepi -1,31,16,%r29 -L$0066 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0067 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0068 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0069 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0069 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0068,0 - ldo -1(%r29),%r29 -L$0069 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0074 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0074 - addib,= -1,%r6,L$0064 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0063,0 - zdep %r4,15,16,%r4 -L$0064 - or %r7,%r29,%r28 -L$0076 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND diff --git a/src/lib/libcrypto/bn/asm/r3000.s b/src/lib/libcrypto/bn/asm/r3000.s deleted file mode 100644 index e95269afa3..0000000000 --- a/src/lib/libcrypto/bn/asm/r3000.s +++ /dev/null @@ -1,646 +0,0 @@ - .file 1 "../bn_mulw.c" - .set nobopt - .option pic2 - - # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C - - # Cc1 defaults: - # -mabicalls - - # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1): - # -quiet -dumpbase -O2 -o - -gcc2_compiled.: -__gnu_compiled_c: - .rdata - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - .text - .align 2 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $12,$4 - move $14,$5 - move $9,$6 - move $13,$7 - move $8,$0 - addu $10,$12,12 - addu $11,$14,12 -$L2: - lw $6,0($14) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($12) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($12) - .set macro - .set reorder - - lw $6,-8($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-8($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-8($10) - .set macro - .set reorder - - lw $6,-4($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-4($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-4($10) - .set macro - .set reorder - - lw $6,0($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($10) - .set macro - .set reorder - - addu $11,$11,16 - addu $14,$14,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L2 - addu $12,$12,16 - .set macro - .set reorder - -$L3: - .set noreorder - .set nomacro - j $31 - move $2,$8 - .set macro - .set reorder - - .end bn_mul_add_words - .align 2 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $11,$4 - move $12,$5 - move $8,$6 - move $6,$0 - addu $10,$11,12 - addu $9,$12,12 -$L10: - lw $4,0($12) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($11) - .set macro - .set reorder - - lw $4,-8($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-8($10) - .set macro - .set reorder - - lw $4,-4($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-4($10) - .set macro - .set reorder - - lw $4,0($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($10) - .set macro - .set reorder - - addu $9,$9,16 - addu $12,$12,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L10 - addu $11,$11,16 - .set macro - .set reorder - -$L11: - .set noreorder - .set nomacro - j $31 - move $2,$6 - .set macro - .set reorder - - .end bn_mul_words - .align 2 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $9,$4 - addu $7,$9,28 - addu $8,$5,12 -$L18: - lw $2,0($5) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,0($9) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-24($7) - .set macro - .set reorder - - lw $2,-8($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-20($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-16($7) - .set macro - .set reorder - - lw $2,-4($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-12($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-8($7) - .set macro - .set reorder - - lw $2,0($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-4($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,0($7) - .set macro - .set reorder - - addu $8,$8,16 - addu $5,$5,16 - addu $7,$7,32 - .set noreorder - .set nomacro - j $L18 - addu $9,$9,32 - .set macro - .set reorder - -$L19: - j $31 - .end bn_sqr_words - .rdata - .align 2 -$LC0: - - .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e - .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f - .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa - .byte 0x0 - .text - .align 2 - .globl bn_div64 - .ent bn_div64 -bn_div64: - .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8 - .mask 0x901f0000,-8 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - subu $sp,$sp,56 - .cprestore 16 - sw $16,24($sp) - move $16,$4 - sw $17,28($sp) - move $17,$5 - sw $18,32($sp) - move $18,$6 - sw $20,40($sp) - move $20,$0 - sw $19,36($sp) - li $19,0x00000002 # 2 - sw $31,48($sp) - .set noreorder - .set nomacro - bne $18,$0,$L26 - sw $28,44($sp) - .set macro - .set reorder - - .set noreorder - .set nomacro - j $L43 - li $2,-1 # 0xffffffff - .set macro - .set reorder - -$L26: - move $4,$18 - jal BN_num_bits_word - move $4,$2 - li $2,0x00000020 # 32 - .set noreorder - .set nomacro - beq $4,$2,$L27 - li $2,0x00000001 # 1 - .set macro - .set reorder - - sll $2,$2,$4 - sltu $2,$2,$16 - .set noreorder - .set nomacro - beq $2,$0,$L44 - li $5,0x00000020 # 32 - .set macro - .set reorder - - la $4,__iob+32 - la $5,$LC0 - jal fprintf - jal abort -$L27: - li $5,0x00000020 # 32 -$L44: - sltu $2,$16,$18 - .set noreorder - .set nomacro - bne $2,$0,$L28 - subu $4,$5,$4 - .set macro - .set reorder - - subu $16,$16,$18 -$L28: - .set noreorder - .set nomacro - beq $4,$0,$L29 - li $10,-65536 # 0xffff0000 - .set macro - .set reorder - - sll $18,$18,$4 - sll $3,$16,$4 - subu $2,$5,$4 - srl $2,$17,$2 - or $16,$3,$2 - sll $17,$17,$4 -$L29: - srl $7,$18,16 - andi $9,$18,0xffff -$L30: - srl $2,$16,16 - .set noreorder - .set nomacro - beq $2,$7,$L34 - li $6,0x0000ffff # 65535 - .set macro - .set reorder - - divu $6,$16,$7 -$L34: - mult $6,$9 - mflo $5 - #nop - #nop - mult $6,$7 - and $2,$17,$10 - srl $8,$2,16 - mflo $4 -$L35: - subu $3,$16,$4 - and $2,$3,$10 - .set noreorder - .set nomacro - bne $2,$0,$L36 - sll $2,$3,16 - .set macro - .set reorder - - addu $2,$2,$8 - sltu $2,$2,$5 - .set noreorder - .set nomacro - beq $2,$0,$L36 - subu $5,$5,$9 - .set macro - .set reorder - - subu $4,$4,$7 - .set noreorder - .set nomacro - j $L35 - addu $6,$6,-1 - .set macro - .set reorder - -$L36: - mult $6,$7 - mflo $5 - #nop - #nop - mult $6,$9 - mflo $4 - #nop - #nop - srl $3,$4,16 - sll $2,$4,16 - and $4,$2,$10 - sltu $2,$17,$4 - .set noreorder - .set nomacro - beq $2,$0,$L40 - addu $5,$5,$3 - .set macro - .set reorder - - addu $5,$5,1 -$L40: - sltu $2,$16,$5 - .set noreorder - .set nomacro - beq $2,$0,$L41 - subu $17,$17,$4 - .set macro - .set reorder - - addu $16,$16,$18 - addu $6,$6,-1 -$L41: - addu $19,$19,-1 - .set noreorder - .set nomacro - beq $19,$0,$L31 - subu $16,$16,$5 - .set macro - .set reorder - - sll $20,$6,16 - sll $3,$16,16 - srl $2,$17,16 - or $16,$3,$2 - .set noreorder - .set nomacro - j $L30 - sll $17,$17,16 - .set macro - .set reorder - -$L31: - or $2,$20,$6 -$L43: - lw $31,48($sp) - lw $20,40($sp) - lw $19,36($sp) - lw $18,32($sp) - lw $17,28($sp) - lw $16,24($sp) - addu $sp,$sp,56 - j $31 - .end bn_div64 - - .globl abort .text - .globl fprintf .text - .globl BN_num_bits_word .text diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S index 8c56e2e7e7..63de1860f2 100644 --- a/src/lib/libcrypto/bn/asm/sparcv8plus.S +++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S @@ -144,6 +144,19 @@ * } */ +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#elif defined(__GNUC__) && defined(__arch64__) + /* They've said -m64 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#else +# define FRAME_SIZE -96 +#endif /* * GNU assembler can't stand stuw:-( */ @@ -619,8 +632,6 @@ bn_sub_words: * Andy. */ -#define FRAME_SIZE -96 - /* * Here is register usage map for *all* routines below. */ diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index f1719a5877..e484b7fc11 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h @@ -55,6 +55,59 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ /* ==================================================================== * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. * @@ -77,6 +130,7 @@ #include /* FILE */ #endif #include +#include #ifdef __cplusplus extern "C" { @@ -94,9 +148,11 @@ extern "C" { /* #define BN_DEBUG */ /* #define BN_DEBUG_RAND */ +#ifndef OPENSSL_SMALL_FOOTPRINT #define BN_MUL_COMBA #define BN_SQR_COMBA #define BN_RECURSION +#endif /* This next option uses the C libraries (2 word)/(1 word) function. * If it is not defined, I use my C version (which is slower). @@ -137,6 +193,8 @@ extern "C" { #define BN_DEC_FMT1 "%lu" #define BN_DEC_FMT2 "%019lu" #define BN_DEC_NUM 19 +#define BN_HEX_FMT1 "%lX" +#define BN_HEX_FMT2 "%016lX" #endif /* This is where the long long data type is 64 bits, but long is 32. @@ -162,83 +220,37 @@ extern "C" { #define BN_DEC_FMT1 "%llu" #define BN_DEC_FMT2 "%019llu" #define BN_DEC_NUM 19 +#define BN_HEX_FMT1 "%llX" +#define BN_HEX_FMT2 "%016llX" #endif #ifdef THIRTY_TWO_BIT #ifdef BN_LLONG -# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) +# if defined(_WIN32) && !defined(__GNUC__) # define BN_ULLONG unsigned __int64 +# define BN_MASK (0xffffffffffffffffI64) # else # define BN_ULLONG unsigned long long +# define BN_MASK (0xffffffffffffffffLL) # endif #endif -#define BN_ULONG unsigned long -#define BN_LONG long +#define BN_ULONG unsigned int +#define BN_LONG int #define BN_BITS 64 #define BN_BYTES 4 #define BN_BITS2 32 #define BN_BITS4 16 -#ifdef OPENSSL_SYS_WIN32 -/* VC++ doesn't like the LL suffix */ -#define BN_MASK (0xffffffffffffffffL) -#else -#define BN_MASK (0xffffffffffffffffLL) -#endif #define BN_MASK2 (0xffffffffL) #define BN_MASK2l (0xffff) #define BN_MASK2h1 (0xffff8000L) #define BN_MASK2h (0xffff0000L) #define BN_TBIT (0x80000000L) #define BN_DEC_CONV (1000000000L) -#define BN_DEC_FMT1 "%lu" -#define BN_DEC_FMT2 "%09lu" -#define BN_DEC_NUM 9 -#endif - -#ifdef SIXTEEN_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG unsigned long -#define BN_ULONG unsigned short -#define BN_LONG short -#define BN_BITS 32 -#define BN_BYTES 2 -#define BN_BITS2 16 -#define BN_BITS4 8 -#define BN_MASK (0xffffffff) -#define BN_MASK2 (0xffff) -#define BN_MASK2l (0xff) -#define BN_MASK2h1 (0xff80) -#define BN_MASK2h (0xff00) -#define BN_TBIT (0x8000) -#define BN_DEC_CONV (100000) #define BN_DEC_FMT1 "%u" -#define BN_DEC_FMT2 "%05u" -#define BN_DEC_NUM 5 -#endif - -#ifdef EIGHT_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG unsigned short -#define BN_ULONG unsigned char -#define BN_LONG char -#define BN_BITS 16 -#define BN_BYTES 1 -#define BN_BITS2 8 -#define BN_BITS4 4 -#define BN_MASK (0xffff) -#define BN_MASK2 (0xff) -#define BN_MASK2l (0xf) -#define BN_MASK2h1 (0xf8) -#define BN_MASK2h (0xf0) -#define BN_TBIT (0x80) -#define BN_DEC_CONV (100) -#define BN_DEC_FMT1 "%u" -#define BN_DEC_FMT2 "%02u" -#define BN_DEC_NUM 2 +#define BN_DEC_FMT2 "%09u" +#define BN_DEC_NUM 9 +#define BN_HEX_FMT1 "%X" +#define BN_HEX_FMT2 "%08X" #endif #define BN_DEFAULT_BITS 1280 @@ -303,12 +315,8 @@ struct bn_mont_ctx_st BIGNUM N; /* The modulus */ BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 * (Ni is only stored for bignum algorithm) */ -#if 0 - /* OpenSSL 0.9.9 preview: */ - BN_ULONG n0[2];/* least significant word(s) of Ni */ -#else - BN_ULONG n0; /* least significant word of Ni */ -#endif + BN_ULONG n0[2];/* least significant word(s) of Ni; + (type changed with 0.9.9, was "BN_ULONG n0;" before) */ int flags; }; @@ -504,6 +512,7 @@ char * BN_bn2hex(const BIGNUM *a); char * BN_bn2dec(const BIGNUM *a); int BN_hex2bn(BIGNUM **a, const char *str); int BN_dec2bn(BIGNUM **a, const char *str); +int BN_asc2bn(BIGNUM **a, const char *str); int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */ BIGNUM *BN_mod_inverse(BIGNUM *ret, @@ -531,17 +540,6 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, int do_trial_division, BN_GENCB *cb); -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, - const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - BIGNUM *Xp1, BIGNUM *Xp2, - const BIGNUM *Xp, - const BIGNUM *e, BN_CTX *ctx, - BN_GENCB *cb); - BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, @@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, #define BN_BLINDING_NO_UPDATE 0x00000001 #define BN_BLINDING_NO_RECREATE 0x00000002 -BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod); +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod); void BN_BLINDING_free(BN_BLINDING *b); int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx); int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); +#ifndef OPENSSL_NO_DEPRECATED unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); +#endif +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *); unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, - const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), BN_MONT_CTX *m_ctx); @@ -625,24 +626,24 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, * t^p[0] + t^p[1] + ... + t^p[k] * where m = p[0] > p[1] > ... > p[k] = 0. */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]); +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]); /* r = a mod p */ int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */ -int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], + const int p[], BN_CTX *ctx); /* r = (a * b) mod p */ +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx); /* r = (a * a) mod p */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[], +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[], BN_CTX *ctx); /* r = (1 / b) mod p */ int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */ + const int p[], BN_CTX *ctx); /* r = (a / b) mod p */ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ + const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, - const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ + const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, - const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ -int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max); -int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a); + const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ +int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); +int BN_GF2m_arr2poly(const int p[], BIGNUM *a); /* faster mod functions for the 'NIST primes' * 0 <= a < p^2 */ @@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); #define bn_correct_top(a) \ { \ BN_ULONG *ftl; \ - if ((a)->top > 0) \ + int tmp_top = (a)->top; \ + if (tmp_top > 0) \ { \ - for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ - if (*(ftl--)) break; \ + for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \ + if (*(ftl--)) break; \ + (a)->top = tmp_top; \ } \ bn_pollute(a); \ } diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 99bc2de491..c43c91cc09 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c @@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul_add(rp[0],ap[0],w,c1); @@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) mul_add(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; - mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; - mul_add(rp[2],ap[2],w,c1); return c1; + mul_add(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); @@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul(rp[0],ap[0],w,c1); @@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) mul(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; - mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; - mul(rp[2],ap[2],w,c1); + mul(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); } @@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; + +#ifndef OPENSSL_SMALL_FOOTPRINT while (n&~3) { sqr(r[0],r[1],a[0]); @@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) sqr(r[6],r[7],a[3]); a+=4; r+=8; n-=4; } - if (n) +#endif + while (n) { - sqr(r[0],r[1],a[0]); if (--n == 0) return; - sqr(r[2],r[3],a[1]); if (--n == 0) return; - sqr(r[4],r[5],a[2]); + sqr(r[0],r[1],a[0]); + a++; r+=2; n--; } } @@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul_add(rp[0],ap[0],bl,bh,c); - if (--num == 0) break; mul_add(rp[1],ap[1],bl,bh,c); - if (--num == 0) break; mul_add(rp[2],ap[2],bl,bh,c); - if (--num == 0) break; mul_add(rp[3],ap[3],bl,bh,c); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul_add(rp[0],ap[0],bl,bh,c); + ap++; rp++; num--; } return(c); } @@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul(rp[0],ap[0],bl,bh,carry); - if (--num == 0) break; mul(rp[1],ap[1],bl,bh,carry); - if (--num == 0) break; mul(rp[2],ap[2],bl,bh,carry); - if (--num == 0) break; mul(rp[3],ap[3],bl,bh,carry); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul(rp[0],ap[0],bl,bh,carry); + ap++; rp++; num--; } return(carry); } @@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; - for (;;) + +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { sqr64(r[0],r[1],a[0]); - if (--n == 0) break; - sqr64(r[2],r[3],a[1]); - if (--n == 0) break; - sqr64(r[4],r[5],a[2]); - if (--n == 0) break; - sqr64(r[6],r[7],a[3]); - if (--n == 0) break; - - a+=4; - r+=8; + a+=4; r+=8; n-=4; + } +#endif + while (n) + { + sqr64(r[0],r[1],a[0]); + a++; r+=2; n--; } } @@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { ll+=(BN_ULLONG)a[0]+b[0]; r[0]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[1]+b[1]; r[1]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[2]+b[2]; r[2]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[3]+b[3]; r[3]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + ll+=(BN_ULLONG)a[0]+b[0]; + r[0]=(BN_ULONG)ll&BN_MASK2; + ll>>=BN_BITS2; + a++; b++; r++; n--; } return((BN_ULONG)ll); } @@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) if (n <= 0) return((BN_ULONG)0); c=0; - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t=a[0]; t=(t+c)&BN_MASK2; @@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) l=(t+b[0])&BN_MASK2; c+=(l < t); r[0]=l; - if (--n <= 0) break; - t=a[1]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[1])&BN_MASK2; c+=(l < t); r[1]=l; - if (--n <= 0) break; - t=a[2]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[2])&BN_MASK2; c+=(l < t); r[2]=l; - if (--n <= 0) break; - t=a[3]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[3])&BN_MASK2; c+=(l < t); r[3]=l; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while(n) + { + t=a[0]; + t=(t+c)&BN_MASK2; + c=(t < c); + l=(t+b[0])&BN_MASK2; + c+=(l < t); + r[0]=l; + a++; b++; r++; n--; } return((BN_ULONG)c); } @@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t1=a[0]; t2=b[0]; r[0]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[1]; t2=b[1]; r[1]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[2]; t2=b[2]; r[2]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[3]; t2=b[3]; r[3]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + t1=a[0]; t2=b[0]; + r[0]=(t1-t2-c)&BN_MASK2; + if (t1 != t2) c=(t1 < t2); + a++; b++; r++; n--; } return(c); } -#ifdef BN_MUL_COMBA +#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) #undef bn_mul_comba8 #undef bn_mul_comba4 @@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) r[6]=c1; r[7]=c2; } + +#ifdef OPENSSL_NO_ASM +#ifdef OPENSSL_BN_ASM_MONT +#include +/* + * This is essentially reference implementation, which may or may not + * result in performance improvement. E.g. on IA-32 this routine was + * observed to give 40% faster rsa1024 private key operations and 10% + * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only + * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a + * reference implementation, one to be used as starting point for + * platform-specific assembler. Mentioned numbers apply to compiler + * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and + * can vary not only from platform to platform, but even for compiler + * versions. Assembler vs. assembler improvement coefficients can + * [and are known to] differ and are to be documented elsewhere. + */ +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) + { + BN_ULONG c0,c1,ml,*tp,n0; +#ifdef mul64 + BN_ULONG mh; +#endif + volatile BN_ULONG *vp; + int i=0,j; + +#if 0 /* template for platform-specific implementation */ + if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num); +#endif + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + n0 = *n0p; + + c0 = 0; + ml = bp[0]; +#ifdef mul64 + mh = HBITS(ml); + ml = LBITS(ml); + for (j=0;j=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) + { + BN_ULONG c0,c1,*tp,n0=*n0p; + volatile BN_ULONG *vp; + int i=0,j; + + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + for(i=0;i<=num;i++) tp[i]=0; + + for(i=0;i=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;imod, BN_FLG_CONSTTIME); ret->counter = BN_BLINDING_COUNTER; + CRYPTO_THREADID_current(&ret->tid); return(ret); err: if (ret != NULL) BN_BLINDING_free(ret); @@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct return(ret); } +#ifndef OPENSSL_NO_DEPRECATED unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) { return b->thread_id; @@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n) { b->thread_id = n; } +#endif + +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b) + { + return &b->tid; + } unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) { @@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags) } BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, - const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), BN_MONT_CTX *m_ctx) diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index b3452f1a91..3f2256f675 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c @@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx) fprintf(stderr,"(%08x): ", (unsigned int)ctx); while(bnidx < ctx->used) { - fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); + fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); if(!(bnidx % BN_CTX_POOL_SIZE)) item = item->next; } @@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx) while(fpidx < stack->depth) { while(bnidx++ < stack->indexes[fpidx]) - fprintf(stderr," "); - fprintf(stderr,"^^ "); + fprintf(stderr," "); + fprintf(stderr,"^^^ "); bnidx++; fpidx++; } diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index d6ddc9cbe3..802a43d642 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c @@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (dv == NULL) res=BN_CTX_get(ctx); else res=dv; - if (sdiv == NULL || res == NULL) goto err; + if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) + goto err; /* First we normalise the numbers */ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); @@ -336,7 +337,7 @@ X) -> 0x%08X\n", t2 -= d1; } #else /* !BN_LLONG */ - BN_ULONG t2l,t2h,ql,qh; + BN_ULONG t2l,t2h; q=bn_div_words(n0,n1,d0); #ifdef BN_DEBUG_LEVITTE @@ -354,9 +355,12 @@ X) -> 0x%08X\n", t2l = d1 * q; t2h = BN_UMULT_HIGH(d1,q); #else + { + BN_ULONG ql, qh; t2l=LBITS(d1); t2h=HBITS(d1); ql =LBITS(q); qh =HBITS(q); mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ + } #endif for (;;) @@ -560,7 +564,7 @@ X) -> 0x%08X\n", t2 -= d1; } #else /* !BN_LLONG */ - BN_ULONG t2l,t2h,ql,qh; + BN_ULONG t2l,t2h; q=bn_div_words(n0,n1,d0); #ifdef BN_DEBUG_LEVITTE @@ -578,9 +582,12 @@ X) -> 0x%08X\n", t2l = d1 * q; t2h = BN_UMULT_HIGH(d1,q); #else + { + BN_ULONG ql, qh; t2l=LBITS(d1); t2h=HBITS(d1); ql =LBITS(q); qh =HBITS(q); mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ + } #endif for (;;) diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index 70a33f0d93..d9b6c737fc 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c @@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) rr = BN_CTX_get(ctx); else rr = r; - if ((v = BN_CTX_get(ctx)) == NULL) goto err; + v = BN_CTX_get(ctx); + if (rr == NULL || v == NULL) goto err; if (BN_copy(v,a) == NULL) goto err; bits=BN_num_bits(p); diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c index ae642ccb39..527b0fa15b 100644 --- a/src/lib/libcrypto/bn/bn_gf2m.c +++ b/src/lib/libcrypto/bn/bn_gf2m.c @@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] = SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] #endif -#ifdef SIXTEEN_BIT -#define SQR1(w) \ - SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF] -#define SQR0(w) \ - SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] -#endif -#ifdef EIGHT_BIT -#define SQR1(w) \ - SQR_tb[(w) >> 4 & 0xF] -#define SQR0(w) \ - SQR_tb[(w) & 15] -#endif /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, * result is a polynomial r with degree < 2 * BN_BITS - 1 * The caller MUST ensure that the variables have the right amount * of space allocated. */ -#ifdef EIGHT_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) - { - register BN_ULONG h, l, s; - BN_ULONG tab[4], top1b = a >> 7; - register BN_ULONG a1, a2; - - a1 = a & (0x7F); a2 = a1 << 1; - - tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - - s = tab[b & 0x3]; l = s; - s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6; - s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4; - s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2; - - /* compensate for the top bit of a */ - - if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } - - *r1 = h; *r0 = l; - } -#endif -#ifdef SIXTEEN_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) - { - register BN_ULONG h, l, s; - BN_ULONG tab[4], top1b = a >> 15; - register BN_ULONG a1, a2; - - a1 = a & (0x7FFF); a2 = a1 << 1; - - tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - - s = tab[b & 0x3]; l = s; - s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14; - s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12; - s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10; - s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8; - s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6; - s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4; - s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2; - - /* compensate for the top bit of a */ - - if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } - - *r1 = h; *r0 = l; - } -#endif #ifdef THIRTY_TWO_BIT static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) { @@ -321,7 +259,7 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) /* Performs modular reduction of a and store result in r. r could be a. */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) { int j, k; int n, dN, d0, d1; @@ -422,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -444,7 +382,7 @@ err: /* Compute the product of two polynomials a and b, reduce modulo p, and store * the result in r. r could be a or b; a could be b. */ -int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) { int zlen, i, j, k, ret = 0; BIGNUM *s; @@ -500,12 +438,12 @@ err: int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -521,7 +459,7 @@ err: /* Square a, reduce the result mod p, and store it in a. r could be a. */ -int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) { int i, ret = 0; BIGNUM *s; @@ -556,12 +494,12 @@ err: int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -643,7 +581,7 @@ err: * function is only provided for convenience; for best performance, use the * BN_GF2m_mod_inv function. */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx) { BIGNUM *field; int ret = 0; @@ -769,7 +707,7 @@ err: * function is only provided for convenience; for best performance, use the * BN_GF2m_mod_div function. */ -int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx) { BIGNUM *field; int ret = 0; @@ -794,7 +732,7 @@ err: * the result in r. r could be a. * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. */ -int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) { int ret = 0, i, n; BIGNUM *u; @@ -840,12 +778,12 @@ err: int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -863,7 +801,7 @@ err: * the result in r. r could be a. * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. */ -int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) { int ret = 0; BIGNUM *u; @@ -899,11 +837,11 @@ err: int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -920,10 +858,9 @@ err: /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. */ -int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx) { - int ret = 0, count = 0; - unsigned int j; + int ret = 0, count = 0, j; BIGNUM *a, *z, *rho, *w, *w2, *tmp; bn_check_top(a_); @@ -1018,11 +955,11 @@ err: int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) @@ -1038,20 +975,17 @@ err: } /* Convert the bit-string representation of a polynomial - * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array - * of integers corresponding to the bits with non-zero coefficient. + * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding + * to the bits with non-zero coefficient. Array is terminated with -1. * Up to max elements of the array will be filled. Return value is total - * number of coefficients that would be extracted if array was large enough. + * number of array elements that would be filled if array was large enough. */ -int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) +int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max) { int i, j, k = 0; BN_ULONG mask; - if (BN_is_zero(a) || !BN_is_bit_set(a, 0)) - /* a_0 == 0 => return error (the unsigned int array - * must be terminated by 0) - */ + if (BN_is_zero(a)) return 0; for (i = a->top - 1; i >= 0; i--) @@ -1071,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) } } + if (k < max) { + p[k] = -1; + k++; + } + return k; } /* Convert the coefficient array representation of a polynomial to a - * bit-string. The array must be terminated by 0. + * bit-string. The array must be terminated by -1. */ -int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a) +int BN_GF2m_arr2poly(const int p[], BIGNUM *a) { int i; bn_check_top(a); BN_zero(a); - for (i = 0; p[i] != 0; i++) + for (i = 0; p[i] != -1; i++) { if (BN_set_bit(a, p[i]) == 0) return 0; } - BN_set_bit(a, 0); bn_check_top(a); return 1; diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 27ac4397a1..8e5e98e3f2 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h @@ -255,7 +255,8 @@ extern "C" { : "r"(a), "r"(b)); \ ret; }) # endif /* compiler */ -# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) +# elif (defined(__x86_64) || defined(__x86_64__)) && \ + (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__GNUC__) # define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret,discard; \ diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index 32a8fbaf51..5470fbe6ef 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c @@ -133,15 +133,34 @@ int BN_get_params(int which) const BIGNUM *BN_value_one(void) { - static BN_ULONG data_one=1L; - static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA}; + static const BN_ULONG data_one=1L; + static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA}; return(&const_one); } +char *BN_options(void) + { + static int init=0; + static char data[16]; + + if (!init) + { + init++; +#ifdef BN_LLONG + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); +#else + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); +#endif + } + return(data); + } + int BN_num_bits_word(BN_ULONG l) { - static const char bits[256]={ + static const unsigned char bits[256]={ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l) else #endif { -#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) +#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) if (l & 0xff00L) return(bits[(int)(l>>8)]+8); else @@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n) i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) return 0; - return(((a->d[i])>>j)&((BN_ULONG)1)); + return (int)(((a->d[i])>>j)&((BN_ULONG)1)); } int BN_mask_bits(BIGNUM *a, int n) diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index 4799b152dd..7224637ab3 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c @@ -122,26 +122,10 @@ #define MONT_WORD /* use the faster word-based algorithm */ -#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) -/* This condition means we have a specific non-default build: - * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any - * BN_BITS2<=32 platform; an explicit "enable-montasm" is required. - * I.e., if we are here, the user intentionally deviates from the - * normal stable build to get better Montgomery performance from - * the 0.9.9-dev backport. - * - * In this case only, we also enable BN_from_montgomery_word() - * (another non-stable feature from 0.9.9-dev). - */ -#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD -#endif - -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); #endif - - int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx) { @@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (num>1 && a->top==num && b->top==num) { if (bn_wexpand(r,num) == NULL) return(0); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) -#else - if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num)) -#endif { r->neg = a->neg^b->neg; r->top = num; @@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (!BN_mul(tmp,a,b,ctx)) goto err; } /* reduce from aRR to aR */ -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD if (!BN_from_montgomery_word(r,tmp,mont)) goto err; #else if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; @@ -193,7 +173,7 @@ err: return(ret); } -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; @@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) nrp= &(r->d[nl]); /* clear the top words of T */ +#if 1 for (i=r->top; id[i]=0; +#else + memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); +#endif r->top=max; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ n0=mont->n0[0]; -#else - n0=mont->n0; -#endif #ifdef BN_COUNT fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); @@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) } al=r->top-ri; +#define BRANCH_FREE 1 +#if BRANCH_FREE if (bn_wexpand(ret,ri) == NULL) return(0); x=0-(((al-ri)>>(sizeof(al)*8-1))&1); ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ @@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) rp[i]=nrp[i], ap[i]=0; bn_correct_top(r); bn_correct_top(ret); - bn_check_top(ret); - - return(1); - } - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, - BN_CTX *ctx) - { - int retn=0; - BIGNUM *t; - - BN_CTX_start(ctx); - if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) - retn = BN_from_montgomery_word(ret,t,mont); - BN_CTX_end(ctx); - return retn; - } - -#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, - BN_CTX *ctx) - { - int retn=0; - -#ifdef MONT_WORD - BIGNUM *n,*r; - BN_ULONG *ap,*np,*rp,n0,v,*nrp; - int al,nl,max,i,x,ri; - - BN_CTX_start(ctx); - if ((r = BN_CTX_get(ctx)) == NULL) goto err; - - if (!BN_copy(r,a)) goto err; - n= &(mont->N); - - ap=a->d; - /* mont->ri is the size of mont->N in bits (rounded up - to the word size) */ - al=ri=mont->ri/BN_BITS2; - - nl=n->top; - if ((al == 0) || (nl == 0)) { r->top=0; return(1); } - - max=(nl+al+1); /* allow for overflow (no?) XXX */ - if (bn_wexpand(r,max) == NULL) goto err; - - r->neg=a->neg^n->neg; - np=n->d; - rp=r->d; - nrp= &(r->d[nl]); - - /* clear the top words of T */ -#if 1 - for (i=r->top; id[i]=0; #else - memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); -#endif - - r->top=max; - n0=mont->n0; - -#ifdef BN_COUNT - fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl); -#endif - for (i=0; i= v) - continue; - else - { - if (((++nrp[0])&BN_MASK2) != 0) continue; - if (((++nrp[1])&BN_MASK2) != 0) continue; - for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; - } - } - bn_correct_top(r); - - /* mont->ri will be a multiple of the word size and below code - * is kind of BN_rshift(ret,r,mont->ri) equivalent */ - if (r->top <= ri) - { - ret->top=0; - retn=1; - goto err; - } - al=r->top-ri; - -# define BRANCH_FREE 1 -# if BRANCH_FREE - if (bn_wexpand(ret,ri) == NULL) goto err; - x=0-(((al-ri)>>(sizeof(al)*8-1))&1); - ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ - ret->neg=r->neg; - - rp=ret->d; - ap=&(r->d[ri]); - - { - size_t m1,m2; - - v=bn_sub_words(rp,ap,np,ri); - /* this ----------------^^ works even in alri) nrp=rp; else nrp=ap; */ - /* in other words if subtraction result is real, then - * trick unconditional memcpy below to perform in-place - * "refresh" instead of actual copy. */ - m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al>(sizeof(al)*8-1))&1); /* al>ri */ - m1|=m2; /* (al!=ri) */ - m1|=(0-(size_t)v); /* (al!=ri || v) */ - m1&=~m2; /* (al!=ri || v) && !al>ri */ - nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1)); - } - - /* 'itop=al; ret->neg=r->neg; @@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, al+=4; for (; iN)) >= 0) + { + if (!BN_usub(ret,ret,&(mont->N))) return(0); + } +#endif + bn_check_top(ret); + + return(1); + } +#endif /* MONT_WORD */ + +int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) + { + int retn=0; +#ifdef MONT_WORD + BIGNUM *t; + + BN_CTX_start(ctx); + if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) + retn = BN_from_montgomery_word(ret,t,mont); + BN_CTX_end(ctx); +#else /* !MONT_WORD */ BIGNUM *t1,*t2; BN_CTX_start(ctx); @@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, if (!BN_mul(t1,t2,&mont->N,ctx)) goto err; if (!BN_add(t2,a,t1)) goto err; if (!BN_rshift(ret,t2,mont->ri)) goto err; -#endif /* MONT_WORD */ -#if !defined(BRANCH_FREE) || BRANCH_FREE==0 if (BN_ucmp(ret, &(mont->N)) >= 0) { if (!BN_usub(ret,ret,&(mont->N))) goto err; } -#endif retn=1; bn_check_top(ret); err: BN_CTX_end(ctx); +#endif /* MONT_WORD */ return(retn); } -#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ BN_MONT_CTX *BN_MONT_CTX_new(void) { @@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx) BN_init(&(ctx->RR)); BN_init(&(ctx->N)); BN_init(&(ctx->Ni)); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ ctx->n0[0] = ctx->n0[1] = 0; -#else - ctx->n0 = 0; -#endif ctx->flags=0; } @@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BIGNUM tmod; BN_ULONG buf[2]; - mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; - BN_zero(R); -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)", - only certain BN_BITS2<=32 platforms actually need this */ - if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */ -#else - if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ -#endif - - buf[0]=mod->d[0]; /* tmod = N mod word size */ - buf[1]=0; - BN_init(&tmod); tmod.d=buf; - tmod.top = buf[0] != 0 ? 1 : 0; tmod.dmax=2; tmod.neg=0; -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)"; - only certain BN_BITS2<=32 platforms actually need this */ + mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; + +#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) + /* Only certain BN_BITS2<=32 platforms actually make use of + * n0[1], and we could use the #else case (with a shorter R + * value) for the others. However, currently only the assembler + * files do know which is which. */ + + BN_zero(R); + if (!(BN_set_bit(R,2*BN_BITS2))) goto err; + tmod.top=0; if ((buf[0] = mod->d[0])) tmod.top=1; if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2; @@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; #else + BN_zero(R); + if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ + + buf[0]=mod->d[0]; /* tmod = N mod word size */ + buf[1]=0; + tmod.top = buf[0] != 0 ? 1 : 0; /* Ri = R^-1 mod N*/ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) goto err; @@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; /* Ni = (R*Ri-1)/N, * keep only least significant word: */ -# if 0 /* for OpenSSL 0.9.9 mont->n0 */ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = 0; -# else - mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; -# endif #endif } #else /* !MONT_WORD */ @@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) if (!BN_copy(&(to->N),&(from->N))) return NULL; if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; to->ri=from->ri; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ to->n0[0]=from->n0[0]; to->n0[1]=from->n0[1]; -#else - to->n0=from->n0; -#endif return(to); } diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b848c8cc60..a0e9ec3b46 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c @@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) assert(j <= al || j <= bl); k = j+j; t = BN_CTX_get(ctx); + if (t == NULL) + goto err; if (al > j || bl > j) { - bn_wexpand(t,k*4); - bn_wexpand(rr,k*4); + if (bn_wexpand(t,k*4) == NULL) goto err; + if (bn_wexpand(rr,k*4) == NULL) goto err; bn_mul_part_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } else /* al <= j || bl <= j */ { - bn_wexpand(t,k*2); - bn_wexpand(rr,k*2); + if (bn_wexpand(t,k*2) == NULL) goto err; + if (bn_wexpand(rr,k*2) == NULL) goto err; bn_mul_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } diff --git a/src/lib/libcrypto/bn/bn_opt.c b/src/lib/libcrypto/bn/bn_opt.c deleted file mode 100644 index 21cbb38f62..0000000000 --- a/src/lib/libcrypto/bn/bn_opt.c +++ /dev/null @@ -1,87 +0,0 @@ -/* crypto/bn/bn_opt.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - -#include -#include -#include -#include "cryptlib.h" -#include "bn_lcl.h" - -char *BN_options(void) - { - static int init=0; - static char data[16]; - - if (!init) - { - init++; -#ifdef BN_LLONG - BIO_snprintf(data,sizeof data,"bn(%d,%d)", - (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); -#else - BIO_snprintf(data,sizeof data,"bn(%d,%d)", - (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); -#endif - } - return(data); - } diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c index 810dde34e1..bebb466d08 100644 --- a/src/lib/libcrypto/bn/bn_print.c +++ b/src/lib/libcrypto/bn/bn_print.c @@ -294,6 +294,27 @@ err: return(0); } +int BN_asc2bn(BIGNUM **bn, const char *a) + { + const char *p = a; + if (*p == '-') + p++; + + if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) + { + if (!BN_hex2bn(bn, p + 2)) + return 0; + } + else + { + if (!BN_dec2bn(bn, p)) + return 0; + } + if (*a == '-') + (*bn)->neg = 1; + return 1; + } + #ifndef OPENSSL_NO_BIO #ifndef OPENSSL_NO_FP_API int BN_print_fp(FILE *fp, const BIGNUM *a) diff --git a/src/lib/libcrypto/bn/bn_x931p.c b/src/lib/libcrypto/bn/bn_x931p.c deleted file mode 100644 index 04c5c874ec..0000000000 --- a/src/lib/libcrypto/bn/bn_x931p.c +++ /dev/null @@ -1,272 +0,0 @@ -/* bn_x931p.c */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project 2005. - */ -/* ==================================================================== - * Copyright (c) 2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include -#include - -/* X9.31 routines for prime derivation */ - -/* X9.31 prime derivation. This is used to generate the primes pi - * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd - * integers. - */ - -static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx, - BN_GENCB *cb) - { - int i = 0; - if (!BN_copy(pi, Xpi)) - return 0; - if (!BN_is_odd(pi) && !BN_add_word(pi, 1)) - return 0; - for(;;) - { - i++; - BN_GENCB_call(cb, 0, i); - /* NB 27 MR is specificed in X9.31 */ - if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb)) - break; - if (!BN_add_word(pi, 2)) - return 0; - } - BN_GENCB_call(cb, 2, i); - return 1; - } - -/* This is the main X9.31 prime derivation function. From parameters - * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are - * not NULL they will be returned too: this is needed for testing. - */ - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, - const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb) - { - int ret = 0; - - BIGNUM *t, *p1p2, *pm1; - - /* Only even e supported */ - if (!BN_is_odd(e)) - return 0; - - BN_CTX_start(ctx); - if (!p1) - p1 = BN_CTX_get(ctx); - - if (!p2) - p2 = BN_CTX_get(ctx); - - t = BN_CTX_get(ctx); - - p1p2 = BN_CTX_get(ctx); - - pm1 = BN_CTX_get(ctx); - - if (!bn_x931_derive_pi(p1, Xp1, ctx, cb)) - goto err; - - if (!bn_x931_derive_pi(p2, Xp2, ctx, cb)) - goto err; - - if (!BN_mul(p1p2, p1, p2, ctx)) - goto err; - - /* First set p to value of Rp */ - - if (!BN_mod_inverse(p, p2, p1, ctx)) - goto err; - - if (!BN_mul(p, p, p2, ctx)) - goto err; - - if (!BN_mod_inverse(t, p1, p2, ctx)) - goto err; - - if (!BN_mul(t, t, p1, ctx)) - goto err; - - if (!BN_sub(p, p, t)) - goto err; - - if (p->neg && !BN_add(p, p, p1p2)) - goto err; - - /* p now equals Rp */ - - if (!BN_mod_sub(p, p, Xp, p1p2, ctx)) - goto err; - - if (!BN_add(p, p, Xp)) - goto err; - - /* p now equals Yp0 */ - - for (;;) - { - int i = 1; - BN_GENCB_call(cb, 0, i++); - if (!BN_copy(pm1, p)) - goto err; - if (!BN_sub_word(pm1, 1)) - goto err; - if (!BN_gcd(t, pm1, e, ctx)) - goto err; - if (BN_is_one(t) - /* X9.31 specifies 8 MR and 1 Lucas test or any prime test - * offering similar or better guarantees 50 MR is considerably - * better. - */ - && BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb)) - break; - if (!BN_add(p, p, p1p2)) - goto err; - } - - BN_GENCB_call(cb, 3, 0); - - ret = 1; - - err: - - BN_CTX_end(ctx); - - return ret; - } - -/* Generate pair of paramters Xp, Xq for X9.31 prime generation. - * Note: nbits paramter is sum of number of bits in both. - */ - -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) - { - BIGNUM *t; - int i; - /* Number of bits for each prime is of the form - * 512+128s for s = 0, 1, ... - */ - if ((nbits < 1024) || (nbits & 0xff)) - return 0; - nbits >>= 1; - /* The random value Xp must be between sqrt(2) * 2^(nbits-1) and - * 2^nbits - 1. By setting the top two bits we ensure that the lower - * bound is exceeded. - */ - if (!BN_rand(Xp, nbits, 1, 0)) - return 0; - - BN_CTX_start(ctx); - t = BN_CTX_get(ctx); - - for (i = 0; i < 1000; i++) - { - if (!BN_rand(Xq, nbits, 1, 0)) - return 0; - /* Check that |Xp - Xq| > 2^(nbits - 100) */ - BN_sub(t, Xp, Xq); - if (BN_num_bits(t) > (nbits - 100)) - break; - } - - BN_CTX_end(ctx); - - if (i < 1000) - return 1; - - return 0; - - } - -/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1 - * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL - * the relevant parameter will be stored in it. - * - * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq - * are generated using the previous function and supplied as input. - */ - -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - BIGNUM *Xp1, BIGNUM *Xp2, - const BIGNUM *Xp, - const BIGNUM *e, BN_CTX *ctx, - BN_GENCB *cb) - { - int ret = 0; - - BN_CTX_start(ctx); - if (!Xp1) - Xp1 = BN_CTX_get(ctx); - if (!Xp2) - Xp2 = BN_CTX_get(ctx); - - if (!BN_rand(Xp1, 101, 0, 0)) - goto error; - if (!BN_rand(Xp2, 101, 0, 0)) - goto error; - if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb)) - goto error; - - ret = 1; - - error: - BN_CTX_end(ctx); - - return ret; - - } - diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c index cf190380f5..0cd99c5b4b 100644 --- a/src/lib/libcrypto/bn/bntest.c +++ b/src/lib/libcrypto/bn/bntest.c @@ -486,7 +486,7 @@ static void print_word(BIO *bp,BN_ULONG w) return; } #endif - BIO_printf(bp,"%lX",w); + BIO_printf(bp,BN_HEX_FMT1,w); } int test_div_word(BIO *bp) @@ -732,6 +732,8 @@ int test_mont(BIO *bp, BN_CTX *ctx) BN_init(&n); mont=BN_MONT_CTX_new(); + if (mont == NULL) + return 0; BN_bntest_rand(&a,100,0,0); /**/ BN_bntest_rand(&b,100,0,0); /**/ @@ -1027,7 +1029,7 @@ int test_exp(BIO *bp, BN_CTX *ctx) BN_bntest_rand(a,20+i*5,0,0); /**/ BN_bntest_rand(b,2+i,0,0); /**/ - if (!BN_exp(d,a,b,ctx)) + if (BN_exp(d,a,b,ctx) <= 0) return(0); if (bp != NULL) @@ -1116,8 +1118,8 @@ int test_gf2m_mod(BIO *bp) { BIGNUM *a,*b[2],*c,*d,*e; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1174,8 +1176,8 @@ int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d,*e,*f,*g,*h; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1245,8 +1247,8 @@ int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1304,8 +1306,8 @@ int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1359,8 +1361,8 @@ int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d,*e,*f; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1422,8 +1424,8 @@ int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d,*e,*f; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1493,8 +1495,8 @@ int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d,*e,*f; int i, j, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); @@ -1552,8 +1554,8 @@ int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx) { BIGNUM *a,*b[2],*c,*d,*e; int i, j, s = 0, t, ret = 0; - unsigned int p0[] = {163,7,6,3,0}; - unsigned int p1[] = {193,15,0}; + int p0[] = {163,7,6,3,0,-1}; + int p1[] = {193,15,0,-1}; a=BN_new(); b[0]=BN_new(); diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c index f598a07cf5..074a8e882a 100644 --- a/src/lib/libcrypto/bn/exptest.c +++ b/src/lib/libcrypto/bn/exptest.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[]) { if (BN_cmp(r_simple,r_mont) != 0) printf("\nsimple and mont results differ\n"); - if (BN_cmp(r_simple,r_mont) != 0) + if (BN_cmp(r_simple,r_mont_const) != 0) printf("\nsimple and mont const time results differ\n"); if (BN_cmp(r_simple,r_recp) != 0) printf("\nsimple and recp results differ\n"); @@ -187,7 +187,7 @@ int main(int argc, char *argv[]) BN_free(b); BN_free(m); BN_CTX_free(ctx); - ERR_remove_state(0); + ERR_remove_thread_state(NULL); CRYPTO_mem_leaks(out); BIO_free(out); printf(" done\n"); -- cgit v1.2.3-55-g6feb